{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999878308487983, "eval_steps": 500, "global_step": 16434, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012169151201703681, "grad_norm": 103.34007263183594, "learning_rate": 6.079027355623101e-08, "loss": 2.0025, "step": 1 }, { "epoch": 0.00024338302403407362, "grad_norm": 103.11289978027344, "learning_rate": 1.2158054711246203e-07, "loss": 2.0521, "step": 2 }, { "epoch": 0.0003650745360511104, "grad_norm": 95.9359130859375, "learning_rate": 1.8237082066869301e-07, "loss": 1.8462, "step": 3 }, { "epoch": 0.00048676604806814725, "grad_norm": 75.22039031982422, "learning_rate": 2.4316109422492405e-07, "loss": 1.7121, "step": 4 }, { "epoch": 0.000608457560085184, "grad_norm": 69.626708984375, "learning_rate": 3.0395136778115507e-07, "loss": 1.4793, "step": 5 }, { "epoch": 0.0007301490721022208, "grad_norm": 34.15979766845703, "learning_rate": 3.6474164133738603e-07, "loss": 1.3097, "step": 6 }, { "epoch": 0.0008518405841192577, "grad_norm": 19.58428192138672, "learning_rate": 4.2553191489361704e-07, "loss": 1.0655, "step": 7 }, { "epoch": 0.0009735320961362945, "grad_norm": 16.660463333129883, "learning_rate": 4.863221884498481e-07, "loss": 1.0143, "step": 8 }, { "epoch": 0.0010952236081533313, "grad_norm": 28.945844650268555, "learning_rate": 5.471124620060791e-07, "loss": 0.99, "step": 9 }, { "epoch": 0.001216915120170368, "grad_norm": 28.549259185791016, "learning_rate": 6.079027355623101e-07, "loss": 1.0168, "step": 10 }, { "epoch": 0.0013386066321874049, "grad_norm": 32.18401336669922, "learning_rate": 6.686930091185411e-07, "loss": 0.9256, "step": 11 }, { "epoch": 0.0014602981442044416, "grad_norm": 31.586956024169922, "learning_rate": 7.294832826747721e-07, "loss": 0.9215, "step": 12 }, { "epoch": 0.0015819896562214786, "grad_norm": 28.776288986206055, "learning_rate": 7.902735562310031e-07, "loss": 0.9203, "step": 13 }, { "epoch": 0.0017036811682385154, "grad_norm": 16.66475486755371, "learning_rate": 8.510638297872341e-07, "loss": 0.7229, "step": 14 }, { "epoch": 0.0018253726802555522, "grad_norm": 12.193924903869629, "learning_rate": 9.118541033434651e-07, "loss": 0.7789, "step": 15 }, { "epoch": 0.001947064192272589, "grad_norm": 25.486886978149414, "learning_rate": 9.726443768996962e-07, "loss": 0.8058, "step": 16 }, { "epoch": 0.002068755704289626, "grad_norm": 47.33616256713867, "learning_rate": 1.0334346504559272e-06, "loss": 0.8348, "step": 17 }, { "epoch": 0.0021904472163066626, "grad_norm": 51.361446380615234, "learning_rate": 1.0942249240121581e-06, "loss": 0.8964, "step": 18 }, { "epoch": 0.0023121387283236996, "grad_norm": 50.1767578125, "learning_rate": 1.155015197568389e-06, "loss": 0.8765, "step": 19 }, { "epoch": 0.002433830240340736, "grad_norm": 28.88109016418457, "learning_rate": 1.2158054711246203e-06, "loss": 0.6874, "step": 20 }, { "epoch": 0.002555521752357773, "grad_norm": 10.250425338745117, "learning_rate": 1.276595744680851e-06, "loss": 0.7434, "step": 21 }, { "epoch": 0.0026772132643748097, "grad_norm": 12.3607759475708, "learning_rate": 1.3373860182370822e-06, "loss": 0.7158, "step": 22 }, { "epoch": 0.0027989047763918467, "grad_norm": 14.11135196685791, "learning_rate": 1.3981762917933131e-06, "loss": 0.7237, "step": 23 }, { "epoch": 0.0029205962884088833, "grad_norm": 11.794168472290039, "learning_rate": 1.4589665653495441e-06, "loss": 0.7236, "step": 24 }, { "epoch": 0.0030422878004259203, "grad_norm": 5.893219470977783, "learning_rate": 1.519756838905775e-06, "loss": 0.5845, "step": 25 }, { "epoch": 0.0031639793124429573, "grad_norm": 5.568309783935547, "learning_rate": 1.5805471124620062e-06, "loss": 0.5906, "step": 26 }, { "epoch": 0.003285670824459994, "grad_norm": 14.479249954223633, "learning_rate": 1.6413373860182372e-06, "loss": 0.5553, "step": 27 }, { "epoch": 0.003407362336477031, "grad_norm": 5.768650531768799, "learning_rate": 1.7021276595744682e-06, "loss": 0.561, "step": 28 }, { "epoch": 0.0035290538484940674, "grad_norm": 12.296796798706055, "learning_rate": 1.7629179331306991e-06, "loss": 0.6391, "step": 29 }, { "epoch": 0.0036507453605111044, "grad_norm": 6.892004013061523, "learning_rate": 1.8237082066869303e-06, "loss": 0.5507, "step": 30 }, { "epoch": 0.003772436872528141, "grad_norm": 4.593900680541992, "learning_rate": 1.8844984802431613e-06, "loss": 0.5387, "step": 31 }, { "epoch": 0.003894128384545178, "grad_norm": 11.900116920471191, "learning_rate": 1.9452887537993924e-06, "loss": 0.6643, "step": 32 }, { "epoch": 0.0040158198965622146, "grad_norm": 8.636366844177246, "learning_rate": 2.0060790273556234e-06, "loss": 0.6494, "step": 33 }, { "epoch": 0.004137511408579252, "grad_norm": 11.49934196472168, "learning_rate": 2.0668693009118543e-06, "loss": 0.5801, "step": 34 }, { "epoch": 0.0042592029205962886, "grad_norm": 12.536081314086914, "learning_rate": 2.1276595744680853e-06, "loss": 0.5851, "step": 35 }, { "epoch": 0.004380894432613325, "grad_norm": 3.741614818572998, "learning_rate": 2.1884498480243163e-06, "loss": 0.5611, "step": 36 }, { "epoch": 0.004502585944630362, "grad_norm": 4.851962089538574, "learning_rate": 2.2492401215805472e-06, "loss": 0.5617, "step": 37 }, { "epoch": 0.004624277456647399, "grad_norm": 3.1746225357055664, "learning_rate": 2.310030395136778e-06, "loss": 0.4807, "step": 38 }, { "epoch": 0.004745968968664436, "grad_norm": 3.161885976791382, "learning_rate": 2.3708206686930096e-06, "loss": 0.5301, "step": 39 }, { "epoch": 0.004867660480681472, "grad_norm": 4.953426837921143, "learning_rate": 2.4316109422492405e-06, "loss": 0.5889, "step": 40 }, { "epoch": 0.00498935199269851, "grad_norm": 5.341402530670166, "learning_rate": 2.4924012158054715e-06, "loss": 0.6252, "step": 41 }, { "epoch": 0.005111043504715546, "grad_norm": 9.844038963317871, "learning_rate": 2.553191489361702e-06, "loss": 0.6178, "step": 42 }, { "epoch": 0.005232735016732583, "grad_norm": 8.744702339172363, "learning_rate": 2.613981762917933e-06, "loss": 0.5627, "step": 43 }, { "epoch": 0.005354426528749619, "grad_norm": 2.649850606918335, "learning_rate": 2.6747720364741644e-06, "loss": 0.4677, "step": 44 }, { "epoch": 0.005476118040766657, "grad_norm": 13.687397956848145, "learning_rate": 2.7355623100303953e-06, "loss": 0.6101, "step": 45 }, { "epoch": 0.005597809552783693, "grad_norm": 13.606250762939453, "learning_rate": 2.7963525835866263e-06, "loss": 0.6129, "step": 46 }, { "epoch": 0.00571950106480073, "grad_norm": 10.961346626281738, "learning_rate": 2.8571428571428573e-06, "loss": 0.6148, "step": 47 }, { "epoch": 0.0058411925768177666, "grad_norm": 3.9125142097473145, "learning_rate": 2.9179331306990882e-06, "loss": 0.5812, "step": 48 }, { "epoch": 0.005962884088834804, "grad_norm": 6.282345294952393, "learning_rate": 2.978723404255319e-06, "loss": 0.4988, "step": 49 }, { "epoch": 0.0060845756008518406, "grad_norm": 6.259521484375, "learning_rate": 3.03951367781155e-06, "loss": 0.5785, "step": 50 }, { "epoch": 0.006206267112868877, "grad_norm": 3.5301406383514404, "learning_rate": 3.100303951367781e-06, "loss": 0.5786, "step": 51 }, { "epoch": 0.0063279586248859146, "grad_norm": 7.533758640289307, "learning_rate": 3.1610942249240125e-06, "loss": 0.5956, "step": 52 }, { "epoch": 0.006449650136902951, "grad_norm": 4.951503753662109, "learning_rate": 3.2218844984802434e-06, "loss": 0.5074, "step": 53 }, { "epoch": 0.006571341648919988, "grad_norm": 5.272761821746826, "learning_rate": 3.2826747720364744e-06, "loss": 0.5353, "step": 54 }, { "epoch": 0.006693033160937024, "grad_norm": 9.48100471496582, "learning_rate": 3.3434650455927054e-06, "loss": 0.5857, "step": 55 }, { "epoch": 0.006814724672954062, "grad_norm": 2.2276763916015625, "learning_rate": 3.4042553191489363e-06, "loss": 0.4564, "step": 56 }, { "epoch": 0.006936416184971098, "grad_norm": 2.5077621936798096, "learning_rate": 3.4650455927051673e-06, "loss": 0.5416, "step": 57 }, { "epoch": 0.007058107696988135, "grad_norm": 2.5162456035614014, "learning_rate": 3.5258358662613982e-06, "loss": 0.4984, "step": 58 }, { "epoch": 0.007179799209005172, "grad_norm": 3.7858893871307373, "learning_rate": 3.586626139817629e-06, "loss": 0.444, "step": 59 }, { "epoch": 0.007301490721022209, "grad_norm": 7.784074306488037, "learning_rate": 3.6474164133738606e-06, "loss": 0.5784, "step": 60 }, { "epoch": 0.007423182233039245, "grad_norm": 6.502420902252197, "learning_rate": 3.7082066869300915e-06, "loss": 0.5378, "step": 61 }, { "epoch": 0.007544873745056282, "grad_norm": 3.627375602722168, "learning_rate": 3.7689969604863225e-06, "loss": 0.531, "step": 62 }, { "epoch": 0.007666565257073319, "grad_norm": 6.836421012878418, "learning_rate": 3.8297872340425535e-06, "loss": 0.5663, "step": 63 }, { "epoch": 0.007788256769090356, "grad_norm": 7.055088520050049, "learning_rate": 3.890577507598785e-06, "loss": 0.5491, "step": 64 }, { "epoch": 0.007909948281107393, "grad_norm": 8.892171859741211, "learning_rate": 3.951367781155015e-06, "loss": 0.4521, "step": 65 }, { "epoch": 0.008031639793124429, "grad_norm": 3.930820941925049, "learning_rate": 4.012158054711247e-06, "loss": 0.52, "step": 66 }, { "epoch": 0.008153331305141466, "grad_norm": 12.123941421508789, "learning_rate": 4.072948328267477e-06, "loss": 0.6482, "step": 67 }, { "epoch": 0.008275022817158504, "grad_norm": 2.9747817516326904, "learning_rate": 4.133738601823709e-06, "loss": 0.5113, "step": 68 }, { "epoch": 0.00839671432917554, "grad_norm": 3.337928533554077, "learning_rate": 4.194528875379939e-06, "loss": 0.5408, "step": 69 }, { "epoch": 0.008518405841192577, "grad_norm": 9.0592622756958, "learning_rate": 4.255319148936171e-06, "loss": 0.5251, "step": 70 }, { "epoch": 0.008640097353209614, "grad_norm": 3.707595109939575, "learning_rate": 4.316109422492402e-06, "loss": 0.5173, "step": 71 }, { "epoch": 0.00876178886522665, "grad_norm": 1.4675965309143066, "learning_rate": 4.3768996960486325e-06, "loss": 0.4422, "step": 72 }, { "epoch": 0.008883480377243687, "grad_norm": 10.405287742614746, "learning_rate": 4.437689969604864e-06, "loss": 0.6371, "step": 73 }, { "epoch": 0.009005171889260723, "grad_norm": 7.697737693786621, "learning_rate": 4.4984802431610945e-06, "loss": 0.5918, "step": 74 }, { "epoch": 0.009126863401277762, "grad_norm": 1.7287604808807373, "learning_rate": 4.559270516717326e-06, "loss": 0.4604, "step": 75 }, { "epoch": 0.009248554913294798, "grad_norm": 2.8088109493255615, "learning_rate": 4.620060790273556e-06, "loss": 0.4477, "step": 76 }, { "epoch": 0.009370246425311835, "grad_norm": 1.390653133392334, "learning_rate": 4.680851063829788e-06, "loss": 0.5009, "step": 77 }, { "epoch": 0.009491937937328871, "grad_norm": 8.15897274017334, "learning_rate": 4.741641337386019e-06, "loss": 0.6593, "step": 78 }, { "epoch": 0.009613629449345908, "grad_norm": 6.279831409454346, "learning_rate": 4.80243161094225e-06, "loss": 0.4464, "step": 79 }, { "epoch": 0.009735320961362945, "grad_norm": 4.1383514404296875, "learning_rate": 4.863221884498481e-06, "loss": 0.6184, "step": 80 }, { "epoch": 0.009857012473379981, "grad_norm": 8.388766288757324, "learning_rate": 4.924012158054712e-06, "loss": 0.4524, "step": 81 }, { "epoch": 0.00997870398539702, "grad_norm": 1.4439729452133179, "learning_rate": 4.984802431610943e-06, "loss": 0.5034, "step": 82 }, { "epoch": 0.010100395497414056, "grad_norm": 2.35052227973938, "learning_rate": 5.0455927051671735e-06, "loss": 0.5005, "step": 83 }, { "epoch": 0.010222087009431093, "grad_norm": 1.400492548942566, "learning_rate": 5.106382978723404e-06, "loss": 0.4697, "step": 84 }, { "epoch": 0.010343778521448129, "grad_norm": 2.054168939590454, "learning_rate": 5.1671732522796354e-06, "loss": 0.5697, "step": 85 }, { "epoch": 0.010465470033465166, "grad_norm": 2.0980570316314697, "learning_rate": 5.227963525835866e-06, "loss": 0.5479, "step": 86 }, { "epoch": 0.010587161545482202, "grad_norm": 10.143542289733887, "learning_rate": 5.288753799392098e-06, "loss": 0.5854, "step": 87 }, { "epoch": 0.010708853057499239, "grad_norm": 10.95671558380127, "learning_rate": 5.349544072948329e-06, "loss": 0.5479, "step": 88 }, { "epoch": 0.010830544569516275, "grad_norm": 4.231632709503174, "learning_rate": 5.41033434650456e-06, "loss": 0.514, "step": 89 }, { "epoch": 0.010952236081533314, "grad_norm": 6.10847282409668, "learning_rate": 5.471124620060791e-06, "loss": 0.5568, "step": 90 }, { "epoch": 0.01107392759355035, "grad_norm": 7.639449596405029, "learning_rate": 5.531914893617022e-06, "loss": 0.5347, "step": 91 }, { "epoch": 0.011195619105567387, "grad_norm": 8.659672737121582, "learning_rate": 5.592705167173253e-06, "loss": 0.6023, "step": 92 }, { "epoch": 0.011317310617584423, "grad_norm": 2.5167593955993652, "learning_rate": 5.653495440729484e-06, "loss": 0.5298, "step": 93 }, { "epoch": 0.01143900212960146, "grad_norm": 9.971253395080566, "learning_rate": 5.7142857142857145e-06, "loss": 0.5734, "step": 94 }, { "epoch": 0.011560693641618497, "grad_norm": 16.309722900390625, "learning_rate": 5.775075987841946e-06, "loss": 0.6351, "step": 95 }, { "epoch": 0.011682385153635533, "grad_norm": 12.970685005187988, "learning_rate": 5.8358662613981764e-06, "loss": 0.5801, "step": 96 }, { "epoch": 0.011804076665652571, "grad_norm": 2.6465160846710205, "learning_rate": 5.896656534954408e-06, "loss": 0.5355, "step": 97 }, { "epoch": 0.011925768177669608, "grad_norm": 5.874882698059082, "learning_rate": 5.957446808510638e-06, "loss": 0.5155, "step": 98 }, { "epoch": 0.012047459689686645, "grad_norm": 6.874263763427734, "learning_rate": 6.01823708206687e-06, "loss": 0.5755, "step": 99 }, { "epoch": 0.012169151201703681, "grad_norm": 8.646947860717773, "learning_rate": 6.0790273556231e-06, "loss": 0.5908, "step": 100 }, { "epoch": 0.012290842713720718, "grad_norm": 1.429110050201416, "learning_rate": 6.1398176291793325e-06, "loss": 0.4829, "step": 101 }, { "epoch": 0.012412534225737754, "grad_norm": 3.2724087238311768, "learning_rate": 6.200607902735562e-06, "loss": 0.5257, "step": 102 }, { "epoch": 0.01253422573775479, "grad_norm": 6.1953511238098145, "learning_rate": 6.2613981762917944e-06, "loss": 0.5356, "step": 103 }, { "epoch": 0.012655917249771829, "grad_norm": 1.6497021913528442, "learning_rate": 6.322188449848025e-06, "loss": 0.5142, "step": 104 }, { "epoch": 0.012777608761788866, "grad_norm": 1.1163548231124878, "learning_rate": 6.382978723404256e-06, "loss": 0.4192, "step": 105 }, { "epoch": 0.012899300273805902, "grad_norm": 5.324417591094971, "learning_rate": 6.443768996960487e-06, "loss": 0.4452, "step": 106 }, { "epoch": 0.013020991785822939, "grad_norm": 9.527470588684082, "learning_rate": 6.504559270516718e-06, "loss": 0.5937, "step": 107 }, { "epoch": 0.013142683297839975, "grad_norm": 4.845725059509277, "learning_rate": 6.565349544072949e-06, "loss": 0.4486, "step": 108 }, { "epoch": 0.013264374809857012, "grad_norm": 5.2774271965026855, "learning_rate": 6.62613981762918e-06, "loss": 0.5525, "step": 109 }, { "epoch": 0.013386066321874049, "grad_norm": 5.132938861846924, "learning_rate": 6.686930091185411e-06, "loss": 0.527, "step": 110 }, { "epoch": 0.013507757833891087, "grad_norm": 11.405508995056152, "learning_rate": 6.747720364741642e-06, "loss": 0.6392, "step": 111 }, { "epoch": 0.013629449345908123, "grad_norm": 9.988105773925781, "learning_rate": 6.808510638297873e-06, "loss": 0.6323, "step": 112 }, { "epoch": 0.01375114085792516, "grad_norm": 6.9306769371032715, "learning_rate": 6.869300911854104e-06, "loss": 0.5208, "step": 113 }, { "epoch": 0.013872832369942197, "grad_norm": 5.900634765625, "learning_rate": 6.9300911854103346e-06, "loss": 0.6097, "step": 114 }, { "epoch": 0.013994523881959233, "grad_norm": 2.1774234771728516, "learning_rate": 6.990881458966566e-06, "loss": 0.4525, "step": 115 }, { "epoch": 0.01411621539397627, "grad_norm": 2.4822614192962646, "learning_rate": 7.0516717325227965e-06, "loss": 0.4894, "step": 116 }, { "epoch": 0.014237906905993306, "grad_norm": 2.224492311477661, "learning_rate": 7.112462006079029e-06, "loss": 0.5191, "step": 117 }, { "epoch": 0.014359598418010345, "grad_norm": 4.142158031463623, "learning_rate": 7.173252279635258e-06, "loss": 0.4941, "step": 118 }, { "epoch": 0.014481289930027381, "grad_norm": 1.3689216375350952, "learning_rate": 7.234042553191491e-06, "loss": 0.5031, "step": 119 }, { "epoch": 0.014602981442044418, "grad_norm": 1.7017595767974854, "learning_rate": 7.294832826747721e-06, "loss": 0.4759, "step": 120 }, { "epoch": 0.014724672954061454, "grad_norm": 2.922427177429199, "learning_rate": 7.3556231003039526e-06, "loss": 0.5302, "step": 121 }, { "epoch": 0.01484636446607849, "grad_norm": 2.68149471282959, "learning_rate": 7.416413373860183e-06, "loss": 0.5008, "step": 122 }, { "epoch": 0.014968055978095527, "grad_norm": 2.105281114578247, "learning_rate": 7.4772036474164145e-06, "loss": 0.5114, "step": 123 }, { "epoch": 0.015089747490112564, "grad_norm": 1.4559125900268555, "learning_rate": 7.537993920972645e-06, "loss": 0.4699, "step": 124 }, { "epoch": 0.015211439002129602, "grad_norm": 3.2693777084350586, "learning_rate": 7.598784194528876e-06, "loss": 0.5062, "step": 125 }, { "epoch": 0.015333130514146639, "grad_norm": 2.183715343475342, "learning_rate": 7.659574468085107e-06, "loss": 0.5046, "step": 126 }, { "epoch": 0.015454822026163675, "grad_norm": 1.0585291385650635, "learning_rate": 7.720364741641338e-06, "loss": 0.5224, "step": 127 }, { "epoch": 0.015576513538180712, "grad_norm": 2.833522081375122, "learning_rate": 7.78115501519757e-06, "loss": 0.5289, "step": 128 }, { "epoch": 0.01569820505019775, "grad_norm": 1.0448874235153198, "learning_rate": 7.841945288753801e-06, "loss": 0.499, "step": 129 }, { "epoch": 0.015819896562214785, "grad_norm": 0.856838047504425, "learning_rate": 7.90273556231003e-06, "loss": 0.4805, "step": 130 }, { "epoch": 0.01594158807423182, "grad_norm": 5.1833391189575195, "learning_rate": 7.963525835866262e-06, "loss": 0.5597, "step": 131 }, { "epoch": 0.016063279586248858, "grad_norm": 6.778291702270508, "learning_rate": 8.024316109422494e-06, "loss": 0.5685, "step": 132 }, { "epoch": 0.016184971098265895, "grad_norm": 3.2497646808624268, "learning_rate": 8.085106382978723e-06, "loss": 0.4972, "step": 133 }, { "epoch": 0.01630666261028293, "grad_norm": 1.978299617767334, "learning_rate": 8.145896656534955e-06, "loss": 0.5389, "step": 134 }, { "epoch": 0.016428354122299968, "grad_norm": 8.466510772705078, "learning_rate": 8.206686930091186e-06, "loss": 0.5174, "step": 135 }, { "epoch": 0.016550045634317008, "grad_norm": 6.085693836212158, "learning_rate": 8.267477203647417e-06, "loss": 0.5267, "step": 136 }, { "epoch": 0.016671737146334045, "grad_norm": 1.6802982091903687, "learning_rate": 8.328267477203647e-06, "loss": 0.5324, "step": 137 }, { "epoch": 0.01679342865835108, "grad_norm": 3.9072301387786865, "learning_rate": 8.389057750759878e-06, "loss": 0.4342, "step": 138 }, { "epoch": 0.016915120170368118, "grad_norm": 1.671865701675415, "learning_rate": 8.44984802431611e-06, "loss": 0.4282, "step": 139 }, { "epoch": 0.017036811682385154, "grad_norm": 3.5130422115325928, "learning_rate": 8.510638297872341e-06, "loss": 0.4797, "step": 140 }, { "epoch": 0.01715850319440219, "grad_norm": 1.251298189163208, "learning_rate": 8.571428571428571e-06, "loss": 0.4266, "step": 141 }, { "epoch": 0.017280194706419227, "grad_norm": 1.784014105796814, "learning_rate": 8.632218844984804e-06, "loss": 0.5515, "step": 142 }, { "epoch": 0.017401886218436264, "grad_norm": 8.78653621673584, "learning_rate": 8.693009118541034e-06, "loss": 0.6032, "step": 143 }, { "epoch": 0.0175235777304533, "grad_norm": 11.122417449951172, "learning_rate": 8.753799392097265e-06, "loss": 0.6042, "step": 144 }, { "epoch": 0.017645269242470337, "grad_norm": 13.862433433532715, "learning_rate": 8.814589665653496e-06, "loss": 0.6152, "step": 145 }, { "epoch": 0.017766960754487374, "grad_norm": 9.457635879516602, "learning_rate": 8.875379939209728e-06, "loss": 0.5355, "step": 146 }, { "epoch": 0.01788865226650441, "grad_norm": 1.336855173110962, "learning_rate": 8.936170212765958e-06, "loss": 0.5491, "step": 147 }, { "epoch": 0.018010343778521447, "grad_norm": 2.6724729537963867, "learning_rate": 8.996960486322189e-06, "loss": 0.5289, "step": 148 }, { "epoch": 0.018132035290538483, "grad_norm": 3.028740167617798, "learning_rate": 9.05775075987842e-06, "loss": 0.5178, "step": 149 }, { "epoch": 0.018253726802555523, "grad_norm": 2.821035861968994, "learning_rate": 9.118541033434652e-06, "loss": 0.52, "step": 150 }, { "epoch": 0.01837541831457256, "grad_norm": 2.4448201656341553, "learning_rate": 9.179331306990881e-06, "loss": 0.5336, "step": 151 }, { "epoch": 0.018497109826589597, "grad_norm": 10.468660354614258, "learning_rate": 9.240121580547113e-06, "loss": 0.5113, "step": 152 }, { "epoch": 0.018618801338606633, "grad_norm": 4.343624114990234, "learning_rate": 9.300911854103344e-06, "loss": 0.5518, "step": 153 }, { "epoch": 0.01874049285062367, "grad_norm": 0.9401276707649231, "learning_rate": 9.361702127659576e-06, "loss": 0.4987, "step": 154 }, { "epoch": 0.018862184362640706, "grad_norm": 2.693747043609619, "learning_rate": 9.422492401215805e-06, "loss": 0.4839, "step": 155 }, { "epoch": 0.018983875874657743, "grad_norm": 3.352670669555664, "learning_rate": 9.483282674772038e-06, "loss": 0.5325, "step": 156 }, { "epoch": 0.01910556738667478, "grad_norm": 3.489257574081421, "learning_rate": 9.544072948328268e-06, "loss": 0.4427, "step": 157 }, { "epoch": 0.019227258898691816, "grad_norm": 1.562726378440857, "learning_rate": 9.6048632218845e-06, "loss": 0.4979, "step": 158 }, { "epoch": 0.019348950410708853, "grad_norm": 1.2442615032196045, "learning_rate": 9.66565349544073e-06, "loss": 0.503, "step": 159 }, { "epoch": 0.01947064192272589, "grad_norm": 2.0917882919311523, "learning_rate": 9.726443768996962e-06, "loss": 0.5225, "step": 160 }, { "epoch": 0.019592333434742926, "grad_norm": 3.4695098400115967, "learning_rate": 9.787234042553192e-06, "loss": 0.4602, "step": 161 }, { "epoch": 0.019714024946759962, "grad_norm": 4.359494686126709, "learning_rate": 9.848024316109423e-06, "loss": 0.4752, "step": 162 }, { "epoch": 0.019835716458777, "grad_norm": 4.106424808502197, "learning_rate": 9.908814589665655e-06, "loss": 0.5438, "step": 163 }, { "epoch": 0.01995740797079404, "grad_norm": 2.0894875526428223, "learning_rate": 9.969604863221886e-06, "loss": 0.4366, "step": 164 }, { "epoch": 0.020079099482811075, "grad_norm": 0.964451253414154, "learning_rate": 1.0030395136778117e-05, "loss": 0.4383, "step": 165 }, { "epoch": 0.020200790994828112, "grad_norm": 4.342470169067383, "learning_rate": 1.0091185410334347e-05, "loss": 0.5336, "step": 166 }, { "epoch": 0.02032248250684515, "grad_norm": 1.3713736534118652, "learning_rate": 1.0151975683890578e-05, "loss": 0.4795, "step": 167 }, { "epoch": 0.020444174018862185, "grad_norm": 4.365769863128662, "learning_rate": 1.0212765957446808e-05, "loss": 0.4266, "step": 168 }, { "epoch": 0.02056586553087922, "grad_norm": 1.003467321395874, "learning_rate": 1.0273556231003041e-05, "loss": 0.5158, "step": 169 }, { "epoch": 0.020687557042896258, "grad_norm": 3.7893731594085693, "learning_rate": 1.0334346504559271e-05, "loss": 0.4936, "step": 170 }, { "epoch": 0.020809248554913295, "grad_norm": 0.8778900504112244, "learning_rate": 1.0395136778115502e-05, "loss": 0.5322, "step": 171 }, { "epoch": 0.02093094006693033, "grad_norm": 3.3810131549835205, "learning_rate": 1.0455927051671732e-05, "loss": 0.5526, "step": 172 }, { "epoch": 0.021052631578947368, "grad_norm": 1.4848077297210693, "learning_rate": 1.0516717325227965e-05, "loss": 0.4736, "step": 173 }, { "epoch": 0.021174323090964405, "grad_norm": 0.8066727519035339, "learning_rate": 1.0577507598784196e-05, "loss": 0.4718, "step": 174 }, { "epoch": 0.02129601460298144, "grad_norm": 0.8787925839424133, "learning_rate": 1.0638297872340426e-05, "loss": 0.4784, "step": 175 }, { "epoch": 0.021417706114998478, "grad_norm": 4.162363529205322, "learning_rate": 1.0699088145896657e-05, "loss": 0.549, "step": 176 }, { "epoch": 0.021539397627015514, "grad_norm": 0.9096184372901917, "learning_rate": 1.0759878419452889e-05, "loss": 0.4436, "step": 177 }, { "epoch": 0.02166108913903255, "grad_norm": 2.608381748199463, "learning_rate": 1.082066869300912e-05, "loss": 0.5248, "step": 178 }, { "epoch": 0.02178278065104959, "grad_norm": 2.3067309856414795, "learning_rate": 1.088145896656535e-05, "loss": 0.4617, "step": 179 }, { "epoch": 0.021904472163066627, "grad_norm": 1.1305943727493286, "learning_rate": 1.0942249240121581e-05, "loss": 0.4767, "step": 180 }, { "epoch": 0.022026163675083664, "grad_norm": 2.6685497760772705, "learning_rate": 1.1003039513677813e-05, "loss": 0.5104, "step": 181 }, { "epoch": 0.0221478551871007, "grad_norm": 1.002787709236145, "learning_rate": 1.1063829787234044e-05, "loss": 0.4866, "step": 182 }, { "epoch": 0.022269546699117737, "grad_norm": 2.944110870361328, "learning_rate": 1.1124620060790274e-05, "loss": 0.5557, "step": 183 }, { "epoch": 0.022391238211134774, "grad_norm": 3.287485122680664, "learning_rate": 1.1185410334346505e-05, "loss": 0.4684, "step": 184 }, { "epoch": 0.02251292972315181, "grad_norm": 2.4028401374816895, "learning_rate": 1.1246200607902738e-05, "loss": 0.5386, "step": 185 }, { "epoch": 0.022634621235168847, "grad_norm": 2.2685742378234863, "learning_rate": 1.1306990881458968e-05, "loss": 0.4718, "step": 186 }, { "epoch": 0.022756312747185883, "grad_norm": 0.9333683252334595, "learning_rate": 1.1367781155015198e-05, "loss": 0.4368, "step": 187 }, { "epoch": 0.02287800425920292, "grad_norm": 5.021225929260254, "learning_rate": 1.1428571428571429e-05, "loss": 0.5321, "step": 188 }, { "epoch": 0.022999695771219957, "grad_norm": 4.723958969116211, "learning_rate": 1.1489361702127662e-05, "loss": 0.5198, "step": 189 }, { "epoch": 0.023121387283236993, "grad_norm": 2.935609817504883, "learning_rate": 1.1550151975683892e-05, "loss": 0.5107, "step": 190 }, { "epoch": 0.02324307879525403, "grad_norm": 3.1390504837036133, "learning_rate": 1.1610942249240123e-05, "loss": 0.49, "step": 191 }, { "epoch": 0.023364770307271066, "grad_norm": 4.994821071624756, "learning_rate": 1.1671732522796353e-05, "loss": 0.5153, "step": 192 }, { "epoch": 0.023486461819288106, "grad_norm": 5.175562381744385, "learning_rate": 1.1732522796352586e-05, "loss": 0.501, "step": 193 }, { "epoch": 0.023608153331305143, "grad_norm": 1.231567621231079, "learning_rate": 1.1793313069908816e-05, "loss": 0.5349, "step": 194 }, { "epoch": 0.02372984484332218, "grad_norm": 2.930424928665161, "learning_rate": 1.1854103343465047e-05, "loss": 0.4459, "step": 195 }, { "epoch": 0.023851536355339216, "grad_norm": 4.068364143371582, "learning_rate": 1.1914893617021277e-05, "loss": 0.5316, "step": 196 }, { "epoch": 0.023973227867356253, "grad_norm": 6.025232791900635, "learning_rate": 1.197568389057751e-05, "loss": 0.5814, "step": 197 }, { "epoch": 0.02409491937937329, "grad_norm": 3.0085935592651367, "learning_rate": 1.203647416413374e-05, "loss": 0.5008, "step": 198 }, { "epoch": 0.024216610891390326, "grad_norm": 0.9371998906135559, "learning_rate": 1.2097264437689971e-05, "loss": 0.4974, "step": 199 }, { "epoch": 0.024338302403407362, "grad_norm": 6.098117351531982, "learning_rate": 1.21580547112462e-05, "loss": 0.4823, "step": 200 }, { "epoch": 0.0244599939154244, "grad_norm": 3.9042601585388184, "learning_rate": 1.2218844984802432e-05, "loss": 0.5012, "step": 201 }, { "epoch": 0.024581685427441435, "grad_norm": 1.7924104928970337, "learning_rate": 1.2279635258358665e-05, "loss": 0.4639, "step": 202 }, { "epoch": 0.024703376939458472, "grad_norm": 1.657804250717163, "learning_rate": 1.2340425531914895e-05, "loss": 0.4892, "step": 203 }, { "epoch": 0.02482506845147551, "grad_norm": 1.11095130443573, "learning_rate": 1.2401215805471124e-05, "loss": 0.4308, "step": 204 }, { "epoch": 0.024946759963492545, "grad_norm": 4.05391788482666, "learning_rate": 1.2462006079027356e-05, "loss": 0.5384, "step": 205 }, { "epoch": 0.02506845147550958, "grad_norm": 1.0696470737457275, "learning_rate": 1.2522796352583589e-05, "loss": 0.4884, "step": 206 }, { "epoch": 0.02519014298752662, "grad_norm": 1.3560253381729126, "learning_rate": 1.2583586626139819e-05, "loss": 0.5223, "step": 207 }, { "epoch": 0.025311834499543658, "grad_norm": 1.0009101629257202, "learning_rate": 1.264437689969605e-05, "loss": 0.5241, "step": 208 }, { "epoch": 0.025433526011560695, "grad_norm": 4.8241071701049805, "learning_rate": 1.270516717325228e-05, "loss": 0.4817, "step": 209 }, { "epoch": 0.02555521752357773, "grad_norm": 2.7588982582092285, "learning_rate": 1.2765957446808513e-05, "loss": 0.4969, "step": 210 }, { "epoch": 0.025676909035594768, "grad_norm": 1.2233370542526245, "learning_rate": 1.2826747720364742e-05, "loss": 0.4564, "step": 211 }, { "epoch": 0.025798600547611805, "grad_norm": 3.9769198894500732, "learning_rate": 1.2887537993920974e-05, "loss": 0.525, "step": 212 }, { "epoch": 0.02592029205962884, "grad_norm": 1.5460400581359863, "learning_rate": 1.2948328267477203e-05, "loss": 0.4579, "step": 213 }, { "epoch": 0.026041983571645878, "grad_norm": 1.1946918964385986, "learning_rate": 1.3009118541033437e-05, "loss": 0.4664, "step": 214 }, { "epoch": 0.026163675083662914, "grad_norm": 2.718722105026245, "learning_rate": 1.3069908814589666e-05, "loss": 0.5612, "step": 215 }, { "epoch": 0.02628536659567995, "grad_norm": 4.307705402374268, "learning_rate": 1.3130699088145898e-05, "loss": 0.5013, "step": 216 }, { "epoch": 0.026407058107696987, "grad_norm": 3.6850602626800537, "learning_rate": 1.3191489361702127e-05, "loss": 0.5158, "step": 217 }, { "epoch": 0.026528749619714024, "grad_norm": 3.4409823417663574, "learning_rate": 1.325227963525836e-05, "loss": 0.4689, "step": 218 }, { "epoch": 0.02665044113173106, "grad_norm": 1.849242091178894, "learning_rate": 1.3313069908814592e-05, "loss": 0.5321, "step": 219 }, { "epoch": 0.026772132643748097, "grad_norm": 1.3650225400924683, "learning_rate": 1.3373860182370821e-05, "loss": 0.4366, "step": 220 }, { "epoch": 0.026893824155765137, "grad_norm": 1.60374116897583, "learning_rate": 1.3434650455927051e-05, "loss": 0.4558, "step": 221 }, { "epoch": 0.027015515667782174, "grad_norm": 1.3378283977508545, "learning_rate": 1.3495440729483284e-05, "loss": 0.5394, "step": 222 }, { "epoch": 0.02713720717979921, "grad_norm": 4.942467212677002, "learning_rate": 1.3556231003039516e-05, "loss": 0.4617, "step": 223 }, { "epoch": 0.027258898691816247, "grad_norm": 4.84968900680542, "learning_rate": 1.3617021276595745e-05, "loss": 0.4769, "step": 224 }, { "epoch": 0.027380590203833283, "grad_norm": 4.440089702606201, "learning_rate": 1.3677811550151977e-05, "loss": 0.4477, "step": 225 }, { "epoch": 0.02750228171585032, "grad_norm": 3.5566582679748535, "learning_rate": 1.3738601823708208e-05, "loss": 0.5177, "step": 226 }, { "epoch": 0.027623973227867357, "grad_norm": 3.283655881881714, "learning_rate": 1.379939209726444e-05, "loss": 0.5039, "step": 227 }, { "epoch": 0.027745664739884393, "grad_norm": 3.0711073875427246, "learning_rate": 1.3860182370820669e-05, "loss": 0.4983, "step": 228 }, { "epoch": 0.02786735625190143, "grad_norm": 3.589137077331543, "learning_rate": 1.39209726443769e-05, "loss": 0.4378, "step": 229 }, { "epoch": 0.027989047763918466, "grad_norm": 2.9477665424346924, "learning_rate": 1.3981762917933132e-05, "loss": 0.4478, "step": 230 }, { "epoch": 0.028110739275935503, "grad_norm": 1.009657382965088, "learning_rate": 1.4042553191489363e-05, "loss": 0.4792, "step": 231 }, { "epoch": 0.02823243078795254, "grad_norm": 0.7988582253456116, "learning_rate": 1.4103343465045593e-05, "loss": 0.4839, "step": 232 }, { "epoch": 0.028354122299969576, "grad_norm": 1.4024368524551392, "learning_rate": 1.4164133738601824e-05, "loss": 0.5062, "step": 233 }, { "epoch": 0.028475813811986612, "grad_norm": 1.2348082065582275, "learning_rate": 1.4224924012158057e-05, "loss": 0.5137, "step": 234 }, { "epoch": 0.02859750532400365, "grad_norm": 5.30483865737915, "learning_rate": 1.4285714285714287e-05, "loss": 0.4835, "step": 235 }, { "epoch": 0.02871919683602069, "grad_norm": 6.46610689163208, "learning_rate": 1.4346504559270517e-05, "loss": 0.4385, "step": 236 }, { "epoch": 0.028840888348037726, "grad_norm": 3.176692485809326, "learning_rate": 1.4407294832826748e-05, "loss": 0.4695, "step": 237 }, { "epoch": 0.028962579860054762, "grad_norm": 1.7878280878067017, "learning_rate": 1.4468085106382981e-05, "loss": 0.4521, "step": 238 }, { "epoch": 0.0290842713720718, "grad_norm": 3.6849300861358643, "learning_rate": 1.4528875379939211e-05, "loss": 0.4959, "step": 239 }, { "epoch": 0.029205962884088835, "grad_norm": 2.631035089492798, "learning_rate": 1.4589665653495442e-05, "loss": 0.4523, "step": 240 }, { "epoch": 0.029327654396105872, "grad_norm": 2.448408842086792, "learning_rate": 1.4650455927051672e-05, "loss": 0.5762, "step": 241 }, { "epoch": 0.02944934590812291, "grad_norm": 6.383382320404053, "learning_rate": 1.4711246200607905e-05, "loss": 0.534, "step": 242 }, { "epoch": 0.029571037420139945, "grad_norm": 6.500555992126465, "learning_rate": 1.4772036474164135e-05, "loss": 0.522, "step": 243 }, { "epoch": 0.02969272893215698, "grad_norm": 5.931544303894043, "learning_rate": 1.4832826747720366e-05, "loss": 0.5868, "step": 244 }, { "epoch": 0.029814420444174018, "grad_norm": 2.1462721824645996, "learning_rate": 1.4893617021276596e-05, "loss": 0.5073, "step": 245 }, { "epoch": 0.029936111956191055, "grad_norm": 1.5611602067947388, "learning_rate": 1.4954407294832829e-05, "loss": 0.5167, "step": 246 }, { "epoch": 0.03005780346820809, "grad_norm": 1.691525936126709, "learning_rate": 1.5015197568389059e-05, "loss": 0.4363, "step": 247 }, { "epoch": 0.030179494980225128, "grad_norm": 3.175032138824463, "learning_rate": 1.507598784194529e-05, "loss": 0.5074, "step": 248 }, { "epoch": 0.030301186492242164, "grad_norm": 3.3713743686676025, "learning_rate": 1.513677811550152e-05, "loss": 0.582, "step": 249 }, { "epoch": 0.030422878004259205, "grad_norm": 3.4759769439697266, "learning_rate": 1.5197568389057753e-05, "loss": 0.5001, "step": 250 }, { "epoch": 0.03054456951627624, "grad_norm": 5.467329978942871, "learning_rate": 1.5258358662613984e-05, "loss": 0.5104, "step": 251 }, { "epoch": 0.030666261028293278, "grad_norm": 4.782961845397949, "learning_rate": 1.5319148936170214e-05, "loss": 0.5096, "step": 252 }, { "epoch": 0.030787952540310314, "grad_norm": 4.240315914154053, "learning_rate": 1.5379939209726444e-05, "loss": 0.4531, "step": 253 }, { "epoch": 0.03090964405232735, "grad_norm": 2.355752944946289, "learning_rate": 1.5440729483282677e-05, "loss": 0.518, "step": 254 }, { "epoch": 0.031031335564344387, "grad_norm": 3.694533109664917, "learning_rate": 1.5501519756838906e-05, "loss": 0.4638, "step": 255 }, { "epoch": 0.031153027076361424, "grad_norm": 4.192401885986328, "learning_rate": 1.556231003039514e-05, "loss": 0.5219, "step": 256 }, { "epoch": 0.031274718588378464, "grad_norm": 0.9859777688980103, "learning_rate": 1.562310030395137e-05, "loss": 0.432, "step": 257 }, { "epoch": 0.0313964101003955, "grad_norm": 2.2214391231536865, "learning_rate": 1.5683890577507602e-05, "loss": 0.4391, "step": 258 }, { "epoch": 0.03151810161241254, "grad_norm": 2.3708407878875732, "learning_rate": 1.5744680851063832e-05, "loss": 0.4693, "step": 259 }, { "epoch": 0.03163979312442957, "grad_norm": 3.287144660949707, "learning_rate": 1.580547112462006e-05, "loss": 0.4559, "step": 260 }, { "epoch": 0.03176148463644661, "grad_norm": 3.5540168285369873, "learning_rate": 1.586626139817629e-05, "loss": 0.5076, "step": 261 }, { "epoch": 0.03188317614846364, "grad_norm": 2.0204150676727295, "learning_rate": 1.5927051671732524e-05, "loss": 0.4701, "step": 262 }, { "epoch": 0.03200486766048068, "grad_norm": 1.7141085863113403, "learning_rate": 1.5987841945288754e-05, "loss": 0.4284, "step": 263 }, { "epoch": 0.032126559172497716, "grad_norm": 1.6268690824508667, "learning_rate": 1.6048632218844987e-05, "loss": 0.4692, "step": 264 }, { "epoch": 0.032248250684514757, "grad_norm": 0.8782216310501099, "learning_rate": 1.6109422492401217e-05, "loss": 0.4461, "step": 265 }, { "epoch": 0.03236994219653179, "grad_norm": 2.026752233505249, "learning_rate": 1.6170212765957446e-05, "loss": 0.5242, "step": 266 }, { "epoch": 0.03249163370854883, "grad_norm": 6.13303279876709, "learning_rate": 1.623100303951368e-05, "loss": 0.454, "step": 267 }, { "epoch": 0.03261332522056586, "grad_norm": 4.696516990661621, "learning_rate": 1.629179331306991e-05, "loss": 0.5818, "step": 268 }, { "epoch": 0.0327350167325829, "grad_norm": 6.200691223144531, "learning_rate": 1.6352583586626142e-05, "loss": 0.518, "step": 269 }, { "epoch": 0.032856708244599936, "grad_norm": 0.9025941491127014, "learning_rate": 1.6413373860182372e-05, "loss": 0.558, "step": 270 }, { "epoch": 0.032978399756616976, "grad_norm": 1.4122920036315918, "learning_rate": 1.6474164133738605e-05, "loss": 0.4987, "step": 271 }, { "epoch": 0.033100091268634016, "grad_norm": 0.9242690205574036, "learning_rate": 1.6534954407294835e-05, "loss": 0.4018, "step": 272 }, { "epoch": 0.03322178278065105, "grad_norm": 1.8618580102920532, "learning_rate": 1.6595744680851064e-05, "loss": 0.4336, "step": 273 }, { "epoch": 0.03334347429266809, "grad_norm": 3.876899003982544, "learning_rate": 1.6656534954407294e-05, "loss": 0.4944, "step": 274 }, { "epoch": 0.03346516580468512, "grad_norm": 2.4337871074676514, "learning_rate": 1.6717325227963527e-05, "loss": 0.4392, "step": 275 }, { "epoch": 0.03358685731670216, "grad_norm": 1.483424425125122, "learning_rate": 1.6778115501519757e-05, "loss": 0.5294, "step": 276 }, { "epoch": 0.033708548828719195, "grad_norm": 3.8621246814727783, "learning_rate": 1.683890577507599e-05, "loss": 0.5008, "step": 277 }, { "epoch": 0.033830240340736235, "grad_norm": 7.214571475982666, "learning_rate": 1.689969604863222e-05, "loss": 0.5577, "step": 278 }, { "epoch": 0.03395193185275327, "grad_norm": 8.26302433013916, "learning_rate": 1.6960486322188453e-05, "loss": 0.5369, "step": 279 }, { "epoch": 0.03407362336477031, "grad_norm": 3.8138177394866943, "learning_rate": 1.7021276595744682e-05, "loss": 0.531, "step": 280 }, { "epoch": 0.03419531487678734, "grad_norm": 2.2180545330047607, "learning_rate": 1.7082066869300912e-05, "loss": 0.4654, "step": 281 }, { "epoch": 0.03431700638880438, "grad_norm": 2.205357789993286, "learning_rate": 1.7142857142857142e-05, "loss": 0.511, "step": 282 }, { "epoch": 0.034438697900821415, "grad_norm": 3.5590744018554688, "learning_rate": 1.7203647416413375e-05, "loss": 0.4326, "step": 283 }, { "epoch": 0.034560389412838455, "grad_norm": 5.377081394195557, "learning_rate": 1.7264437689969608e-05, "loss": 0.5293, "step": 284 }, { "epoch": 0.03468208092485549, "grad_norm": 4.490129470825195, "learning_rate": 1.7325227963525838e-05, "loss": 0.4954, "step": 285 }, { "epoch": 0.03480377243687253, "grad_norm": 2.799752950668335, "learning_rate": 1.7386018237082067e-05, "loss": 0.5176, "step": 286 }, { "epoch": 0.03492546394888957, "grad_norm": 3.117070198059082, "learning_rate": 1.74468085106383e-05, "loss": 0.4903, "step": 287 }, { "epoch": 0.0350471554609066, "grad_norm": 7.865053653717041, "learning_rate": 1.750759878419453e-05, "loss": 0.5106, "step": 288 }, { "epoch": 0.03516884697292364, "grad_norm": 6.971349716186523, "learning_rate": 1.756838905775076e-05, "loss": 0.5398, "step": 289 }, { "epoch": 0.035290538484940674, "grad_norm": 3.857039213180542, "learning_rate": 1.7629179331306993e-05, "loss": 0.5261, "step": 290 }, { "epoch": 0.035412229996957714, "grad_norm": 2.1724472045898438, "learning_rate": 1.7689969604863223e-05, "loss": 0.4705, "step": 291 }, { "epoch": 0.03553392150897475, "grad_norm": 2.0216357707977295, "learning_rate": 1.7750759878419456e-05, "loss": 0.4393, "step": 292 }, { "epoch": 0.03565561302099179, "grad_norm": 3.1432712078094482, "learning_rate": 1.7811550151975685e-05, "loss": 0.4266, "step": 293 }, { "epoch": 0.03577730453300882, "grad_norm": 2.9277265071868896, "learning_rate": 1.7872340425531915e-05, "loss": 0.4499, "step": 294 }, { "epoch": 0.03589899604502586, "grad_norm": 1.1710922718048096, "learning_rate": 1.7933130699088148e-05, "loss": 0.4201, "step": 295 }, { "epoch": 0.036020687557042894, "grad_norm": 0.8354945182800293, "learning_rate": 1.7993920972644378e-05, "loss": 0.448, "step": 296 }, { "epoch": 0.036142379069059934, "grad_norm": 1.3958899974822998, "learning_rate": 1.8054711246200608e-05, "loss": 0.4885, "step": 297 }, { "epoch": 0.03626407058107697, "grad_norm": 3.9022505283355713, "learning_rate": 1.811550151975684e-05, "loss": 0.3886, "step": 298 }, { "epoch": 0.03638576209309401, "grad_norm": 0.839918851852417, "learning_rate": 1.8176291793313074e-05, "loss": 0.4849, "step": 299 }, { "epoch": 0.03650745360511105, "grad_norm": 3.41076397895813, "learning_rate": 1.8237082066869303e-05, "loss": 0.5644, "step": 300 }, { "epoch": 0.03662914511712808, "grad_norm": 0.78065425157547, "learning_rate": 1.8297872340425533e-05, "loss": 0.4809, "step": 301 }, { "epoch": 0.03675083662914512, "grad_norm": 0.8052918314933777, "learning_rate": 1.8358662613981763e-05, "loss": 0.4642, "step": 302 }, { "epoch": 0.03687252814116215, "grad_norm": 0.6569188237190247, "learning_rate": 1.8419452887537996e-05, "loss": 0.4968, "step": 303 }, { "epoch": 0.03699421965317919, "grad_norm": 0.7363128662109375, "learning_rate": 1.8480243161094226e-05, "loss": 0.4562, "step": 304 }, { "epoch": 0.037115911165196226, "grad_norm": 1.6311641931533813, "learning_rate": 1.854103343465046e-05, "loss": 0.4832, "step": 305 }, { "epoch": 0.037237602677213266, "grad_norm": 2.1871397495269775, "learning_rate": 1.8601823708206688e-05, "loss": 0.5213, "step": 306 }, { "epoch": 0.0373592941892303, "grad_norm": 2.021156072616577, "learning_rate": 1.866261398176292e-05, "loss": 0.4394, "step": 307 }, { "epoch": 0.03748098570124734, "grad_norm": 0.7310835123062134, "learning_rate": 1.872340425531915e-05, "loss": 0.4983, "step": 308 }, { "epoch": 0.03760267721326437, "grad_norm": 1.3981071710586548, "learning_rate": 1.878419452887538e-05, "loss": 0.4777, "step": 309 }, { "epoch": 0.03772436872528141, "grad_norm": 1.1701666116714478, "learning_rate": 1.884498480243161e-05, "loss": 0.4418, "step": 310 }, { "epoch": 0.037846060237298446, "grad_norm": 2.2293436527252197, "learning_rate": 1.8905775075987844e-05, "loss": 0.4731, "step": 311 }, { "epoch": 0.037967751749315486, "grad_norm": 3.6277523040771484, "learning_rate": 1.8966565349544077e-05, "loss": 0.4534, "step": 312 }, { "epoch": 0.03808944326133252, "grad_norm": 1.6841593980789185, "learning_rate": 1.9027355623100306e-05, "loss": 0.4092, "step": 313 }, { "epoch": 0.03821113477334956, "grad_norm": 1.074906826019287, "learning_rate": 1.9088145896656536e-05, "loss": 0.4645, "step": 314 }, { "epoch": 0.0383328262853666, "grad_norm": 2.8434669971466064, "learning_rate": 1.914893617021277e-05, "loss": 0.4917, "step": 315 }, { "epoch": 0.03845451779738363, "grad_norm": 2.05016827583313, "learning_rate": 1.9209726443769e-05, "loss": 0.503, "step": 316 }, { "epoch": 0.03857620930940067, "grad_norm": 1.4294145107269287, "learning_rate": 1.927051671732523e-05, "loss": 0.5082, "step": 317 }, { "epoch": 0.038697900821417705, "grad_norm": 1.0127038955688477, "learning_rate": 1.933130699088146e-05, "loss": 0.4814, "step": 318 }, { "epoch": 0.038819592333434745, "grad_norm": 1.9038892984390259, "learning_rate": 1.939209726443769e-05, "loss": 0.4856, "step": 319 }, { "epoch": 0.03894128384545178, "grad_norm": 1.7029699087142944, "learning_rate": 1.9452887537993924e-05, "loss": 0.5233, "step": 320 }, { "epoch": 0.03906297535746882, "grad_norm": 2.3036530017852783, "learning_rate": 1.9513677811550154e-05, "loss": 0.4595, "step": 321 }, { "epoch": 0.03918466686948585, "grad_norm": 3.268527030944824, "learning_rate": 1.9574468085106384e-05, "loss": 0.439, "step": 322 }, { "epoch": 0.03930635838150289, "grad_norm": 1.4382330179214478, "learning_rate": 1.9635258358662617e-05, "loss": 0.5159, "step": 323 }, { "epoch": 0.039428049893519924, "grad_norm": 1.197644591331482, "learning_rate": 1.9696048632218846e-05, "loss": 0.4827, "step": 324 }, { "epoch": 0.039549741405536964, "grad_norm": 0.81956946849823, "learning_rate": 1.9756838905775076e-05, "loss": 0.4557, "step": 325 }, { "epoch": 0.039671432917554, "grad_norm": 0.9710156321525574, "learning_rate": 1.981762917933131e-05, "loss": 0.4497, "step": 326 }, { "epoch": 0.03979312442957104, "grad_norm": 1.3333054780960083, "learning_rate": 1.9878419452887542e-05, "loss": 0.4709, "step": 327 }, { "epoch": 0.03991481594158808, "grad_norm": 2.5868492126464844, "learning_rate": 1.9939209726443772e-05, "loss": 0.5669, "step": 328 }, { "epoch": 0.04003650745360511, "grad_norm": 1.8039932250976562, "learning_rate": 2e-05, "loss": 0.5401, "step": 329 }, { "epoch": 0.04015819896562215, "grad_norm": 2.1921143531799316, "learning_rate": 1.999999980973965e-05, "loss": 0.5032, "step": 330 }, { "epoch": 0.040279890477639184, "grad_norm": 2.1752829551696777, "learning_rate": 1.999999923895861e-05, "loss": 0.5376, "step": 331 }, { "epoch": 0.040401581989656224, "grad_norm": 0.7578356862068176, "learning_rate": 1.9999998287656898e-05, "loss": 0.4951, "step": 332 }, { "epoch": 0.04052327350167326, "grad_norm": 2.8342578411102295, "learning_rate": 1.9999996955834554e-05, "loss": 0.4677, "step": 333 }, { "epoch": 0.0406449650136903, "grad_norm": 0.7389137744903564, "learning_rate": 1.9999995243491626e-05, "loss": 0.4923, "step": 334 }, { "epoch": 0.04076665652570733, "grad_norm": 1.022040843963623, "learning_rate": 1.999999315062818e-05, "loss": 0.4689, "step": 335 }, { "epoch": 0.04088834803772437, "grad_norm": 0.8664236664772034, "learning_rate": 1.99999906772443e-05, "loss": 0.483, "step": 336 }, { "epoch": 0.0410100395497414, "grad_norm": 3.2425484657287598, "learning_rate": 1.999998782334007e-05, "loss": 0.5375, "step": 337 }, { "epoch": 0.04113173106175844, "grad_norm": 0.7734496593475342, "learning_rate": 1.9999984588915606e-05, "loss": 0.5087, "step": 338 }, { "epoch": 0.041253422573775476, "grad_norm": 1.4330683946609497, "learning_rate": 1.9999980973971033e-05, "loss": 0.5776, "step": 339 }, { "epoch": 0.041375114085792516, "grad_norm": 5.349335193634033, "learning_rate": 1.999997697850648e-05, "loss": 0.5142, "step": 340 }, { "epoch": 0.04149680559780955, "grad_norm": 5.650288105010986, "learning_rate": 1.9999972602522106e-05, "loss": 0.5403, "step": 341 }, { "epoch": 0.04161849710982659, "grad_norm": 6.786245346069336, "learning_rate": 1.9999967846018074e-05, "loss": 0.5502, "step": 342 }, { "epoch": 0.04174018862184363, "grad_norm": 2.413320302963257, "learning_rate": 1.999996270899457e-05, "loss": 0.5304, "step": 343 }, { "epoch": 0.04186188013386066, "grad_norm": 1.1327245235443115, "learning_rate": 1.9999957191451788e-05, "loss": 0.4999, "step": 344 }, { "epoch": 0.0419835716458777, "grad_norm": 2.179643154144287, "learning_rate": 1.9999951293389933e-05, "loss": 0.4844, "step": 345 }, { "epoch": 0.042105263157894736, "grad_norm": 2.8902554512023926, "learning_rate": 1.9999945014809234e-05, "loss": 0.4757, "step": 346 }, { "epoch": 0.042226954669911776, "grad_norm": 2.484220027923584, "learning_rate": 1.9999938355709926e-05, "loss": 0.5243, "step": 347 }, { "epoch": 0.04234864618192881, "grad_norm": 1.1086695194244385, "learning_rate": 1.999993131609226e-05, "loss": 0.49, "step": 348 }, { "epoch": 0.04247033769394585, "grad_norm": 2.491640329360962, "learning_rate": 1.9999923895956518e-05, "loss": 0.4581, "step": 349 }, { "epoch": 0.04259202920596288, "grad_norm": 0.9489032626152039, "learning_rate": 1.9999916095302972e-05, "loss": 0.4852, "step": 350 }, { "epoch": 0.04271372071797992, "grad_norm": 0.7250332236289978, "learning_rate": 1.9999907914131916e-05, "loss": 0.4906, "step": 351 }, { "epoch": 0.042835412229996955, "grad_norm": 3.0588483810424805, "learning_rate": 1.9999899352443666e-05, "loss": 0.5091, "step": 352 }, { "epoch": 0.042957103742013995, "grad_norm": 0.782995343208313, "learning_rate": 1.999989041023855e-05, "loss": 0.4513, "step": 353 }, { "epoch": 0.04307879525403103, "grad_norm": 1.0188297033309937, "learning_rate": 1.99998810875169e-05, "loss": 0.497, "step": 354 }, { "epoch": 0.04320048676604807, "grad_norm": 1.5506155490875244, "learning_rate": 1.999987138427908e-05, "loss": 0.4425, "step": 355 }, { "epoch": 0.0433221782780651, "grad_norm": 2.8500266075134277, "learning_rate": 1.999986130052545e-05, "loss": 0.5447, "step": 356 }, { "epoch": 0.04344386979008214, "grad_norm": 2.310084581375122, "learning_rate": 1.9999850836256406e-05, "loss": 0.4612, "step": 357 }, { "epoch": 0.04356556130209918, "grad_norm": 3.462616443634033, "learning_rate": 1.9999839991472337e-05, "loss": 0.5422, "step": 358 }, { "epoch": 0.043687252814116215, "grad_norm": 1.3153492212295532, "learning_rate": 1.9999828766173655e-05, "loss": 0.5231, "step": 359 }, { "epoch": 0.043808944326133255, "grad_norm": 1.3355218172073364, "learning_rate": 1.9999817160360794e-05, "loss": 0.5127, "step": 360 }, { "epoch": 0.04393063583815029, "grad_norm": 1.3839240074157715, "learning_rate": 1.999980517403419e-05, "loss": 0.5186, "step": 361 }, { "epoch": 0.04405232735016733, "grad_norm": 2.0387721061706543, "learning_rate": 1.9999792807194297e-05, "loss": 0.4131, "step": 362 }, { "epoch": 0.04417401886218436, "grad_norm": 3.998417377471924, "learning_rate": 1.9999780059841593e-05, "loss": 0.5405, "step": 363 }, { "epoch": 0.0442957103742014, "grad_norm": 1.6171361207962036, "learning_rate": 1.999976693197656e-05, "loss": 0.5098, "step": 364 }, { "epoch": 0.044417401886218434, "grad_norm": 0.6380708813667297, "learning_rate": 1.9999753423599696e-05, "loss": 0.4769, "step": 365 }, { "epoch": 0.044539093398235474, "grad_norm": 1.0627521276474, "learning_rate": 1.9999739534711514e-05, "loss": 0.5338, "step": 366 }, { "epoch": 0.04466078491025251, "grad_norm": 2.228125810623169, "learning_rate": 1.9999725265312545e-05, "loss": 0.524, "step": 367 }, { "epoch": 0.04478247642226955, "grad_norm": 4.595504283905029, "learning_rate": 1.9999710615403333e-05, "loss": 0.5322, "step": 368 }, { "epoch": 0.04490416793428658, "grad_norm": 3.422703504562378, "learning_rate": 1.9999695584984434e-05, "loss": 0.5009, "step": 369 }, { "epoch": 0.04502585944630362, "grad_norm": 0.6527664065361023, "learning_rate": 1.999968017405642e-05, "loss": 0.498, "step": 370 }, { "epoch": 0.04514755095832066, "grad_norm": 1.485638976097107, "learning_rate": 1.9999664382619876e-05, "loss": 0.4686, "step": 371 }, { "epoch": 0.045269242470337694, "grad_norm": 2.8725483417510986, "learning_rate": 1.9999648210675402e-05, "loss": 0.4766, "step": 372 }, { "epoch": 0.045390933982354734, "grad_norm": 2.0454182624816895, "learning_rate": 1.9999631658223617e-05, "loss": 0.4705, "step": 373 }, { "epoch": 0.04551262549437177, "grad_norm": 1.9818562269210815, "learning_rate": 1.999961472526515e-05, "loss": 0.4322, "step": 374 }, { "epoch": 0.04563431700638881, "grad_norm": 1.107570767402649, "learning_rate": 1.9999597411800645e-05, "loss": 0.5237, "step": 375 }, { "epoch": 0.04575600851840584, "grad_norm": 2.3792004585266113, "learning_rate": 1.999957971783076e-05, "loss": 0.4749, "step": 376 }, { "epoch": 0.04587770003042288, "grad_norm": 6.037851810455322, "learning_rate": 1.9999561643356168e-05, "loss": 0.4582, "step": 377 }, { "epoch": 0.04599939154243991, "grad_norm": 1.0004757642745972, "learning_rate": 1.9999543188377557e-05, "loss": 0.5358, "step": 378 }, { "epoch": 0.04612108305445695, "grad_norm": 2.3799378871917725, "learning_rate": 1.9999524352895633e-05, "loss": 0.4951, "step": 379 }, { "epoch": 0.046242774566473986, "grad_norm": 1.0956833362579346, "learning_rate": 1.9999505136911106e-05, "loss": 0.498, "step": 380 }, { "epoch": 0.046364466078491026, "grad_norm": 1.9067175388336182, "learning_rate": 1.999948554042471e-05, "loss": 0.4516, "step": 381 }, { "epoch": 0.04648615759050806, "grad_norm": 2.6854097843170166, "learning_rate": 1.9999465563437194e-05, "loss": 0.5042, "step": 382 }, { "epoch": 0.0466078491025251, "grad_norm": 2.899186849594116, "learning_rate": 1.9999445205949315e-05, "loss": 0.536, "step": 383 }, { "epoch": 0.04672954061454213, "grad_norm": 0.5587601661682129, "learning_rate": 1.999942446796185e-05, "loss": 0.4765, "step": 384 }, { "epoch": 0.04685123212655917, "grad_norm": 2.5319371223449707, "learning_rate": 1.9999403349475584e-05, "loss": 0.4526, "step": 385 }, { "epoch": 0.04697292363857621, "grad_norm": 2.785966396331787, "learning_rate": 1.9999381850491323e-05, "loss": 0.5165, "step": 386 }, { "epoch": 0.047094615150593246, "grad_norm": 1.0049335956573486, "learning_rate": 1.9999359971009885e-05, "loss": 0.5142, "step": 387 }, { "epoch": 0.047216306662610286, "grad_norm": 2.116328477859497, "learning_rate": 1.9999337711032102e-05, "loss": 0.513, "step": 388 }, { "epoch": 0.04733799817462732, "grad_norm": 0.9692975282669067, "learning_rate": 1.9999315070558825e-05, "loss": 0.4365, "step": 389 }, { "epoch": 0.04745968968664436, "grad_norm": 0.9520614147186279, "learning_rate": 1.9999292049590908e-05, "loss": 0.4283, "step": 390 }, { "epoch": 0.04758138119866139, "grad_norm": 1.2361197471618652, "learning_rate": 1.9999268648129234e-05, "loss": 0.479, "step": 391 }, { "epoch": 0.04770307271067843, "grad_norm": 0.915768563747406, "learning_rate": 1.9999244866174686e-05, "loss": 0.4832, "step": 392 }, { "epoch": 0.047824764222695465, "grad_norm": 0.9080315232276917, "learning_rate": 1.999922070372818e-05, "loss": 0.4433, "step": 393 }, { "epoch": 0.047946455734712505, "grad_norm": 1.0250380039215088, "learning_rate": 1.9999196160790627e-05, "loss": 0.4607, "step": 394 }, { "epoch": 0.04806814724672954, "grad_norm": 1.125348687171936, "learning_rate": 1.999917123736296e-05, "loss": 0.5341, "step": 395 }, { "epoch": 0.04818983875874658, "grad_norm": 1.0356920957565308, "learning_rate": 1.999914593344613e-05, "loss": 0.5035, "step": 396 }, { "epoch": 0.04831153027076361, "grad_norm": 1.220845341682434, "learning_rate": 1.9999120249041108e-05, "loss": 0.4952, "step": 397 }, { "epoch": 0.04843322178278065, "grad_norm": 0.7832775712013245, "learning_rate": 1.9999094184148852e-05, "loss": 0.4883, "step": 398 }, { "epoch": 0.048554913294797684, "grad_norm": 2.6819679737091064, "learning_rate": 1.9999067738770376e-05, "loss": 0.5663, "step": 399 }, { "epoch": 0.048676604806814724, "grad_norm": 3.7089293003082275, "learning_rate": 1.9999040912906668e-05, "loss": 0.4004, "step": 400 }, { "epoch": 0.048798296318831764, "grad_norm": 2.1037650108337402, "learning_rate": 1.999901370655876e-05, "loss": 0.5194, "step": 401 }, { "epoch": 0.0489199878308488, "grad_norm": 0.7117876410484314, "learning_rate": 1.999898611972768e-05, "loss": 0.5096, "step": 402 }, { "epoch": 0.04904167934286584, "grad_norm": 1.2061089277267456, "learning_rate": 1.9998958152414486e-05, "loss": 0.5562, "step": 403 }, { "epoch": 0.04916337085488287, "grad_norm": 1.141405701637268, "learning_rate": 1.9998929804620234e-05, "loss": 0.558, "step": 404 }, { "epoch": 0.04928506236689991, "grad_norm": 4.762540817260742, "learning_rate": 1.999890107634601e-05, "loss": 0.5011, "step": 405 }, { "epoch": 0.049406753878916944, "grad_norm": 2.3913159370422363, "learning_rate": 1.99988719675929e-05, "loss": 0.5089, "step": 406 }, { "epoch": 0.049528445390933984, "grad_norm": 0.647175133228302, "learning_rate": 1.9998842478362017e-05, "loss": 0.5114, "step": 407 }, { "epoch": 0.04965013690295102, "grad_norm": 0.9456661343574524, "learning_rate": 1.999881260865448e-05, "loss": 0.4777, "step": 408 }, { "epoch": 0.04977182841496806, "grad_norm": 2.7678298950195312, "learning_rate": 1.9998782358471428e-05, "loss": 0.5321, "step": 409 }, { "epoch": 0.04989351992698509, "grad_norm": 3.9966847896575928, "learning_rate": 1.999875172781401e-05, "loss": 0.5802, "step": 410 }, { "epoch": 0.05001521143900213, "grad_norm": 2.654068946838379, "learning_rate": 1.9998720716683393e-05, "loss": 0.5437, "step": 411 }, { "epoch": 0.05013690295101916, "grad_norm": 2.712520122528076, "learning_rate": 1.9998689325080754e-05, "loss": 0.4692, "step": 412 }, { "epoch": 0.0502585944630362, "grad_norm": 5.590470314025879, "learning_rate": 1.9998657553007294e-05, "loss": 0.4677, "step": 413 }, { "epoch": 0.05038028597505324, "grad_norm": 3.3917086124420166, "learning_rate": 1.9998625400464218e-05, "loss": 0.5052, "step": 414 }, { "epoch": 0.050501977487070276, "grad_norm": 0.8707383275032043, "learning_rate": 1.9998592867452747e-05, "loss": 0.5402, "step": 415 }, { "epoch": 0.050623668999087316, "grad_norm": 1.3056360483169556, "learning_rate": 1.9998559953974123e-05, "loss": 0.4541, "step": 416 }, { "epoch": 0.05074536051110435, "grad_norm": 2.2689220905303955, "learning_rate": 1.9998526660029597e-05, "loss": 0.4737, "step": 417 }, { "epoch": 0.05086705202312139, "grad_norm": 5.5445427894592285, "learning_rate": 1.9998492985620436e-05, "loss": 0.5552, "step": 418 }, { "epoch": 0.05098874353513842, "grad_norm": 5.148685455322266, "learning_rate": 1.9998458930747917e-05, "loss": 0.5547, "step": 419 }, { "epoch": 0.05111043504715546, "grad_norm": 2.8303542137145996, "learning_rate": 1.9998424495413346e-05, "loss": 0.4769, "step": 420 }, { "epoch": 0.051232126559172496, "grad_norm": 1.7225395441055298, "learning_rate": 1.9998389679618025e-05, "loss": 0.4278, "step": 421 }, { "epoch": 0.051353818071189536, "grad_norm": 1.1106629371643066, "learning_rate": 1.9998354483363277e-05, "loss": 0.5342, "step": 422 }, { "epoch": 0.05147550958320657, "grad_norm": 1.0146124362945557, "learning_rate": 1.999831890665045e-05, "loss": 0.4857, "step": 423 }, { "epoch": 0.05159720109522361, "grad_norm": 1.1379413604736328, "learning_rate": 1.9998282949480893e-05, "loss": 0.4965, "step": 424 }, { "epoch": 0.05171889260724064, "grad_norm": 2.018253803253174, "learning_rate": 1.9998246611855974e-05, "loss": 0.4512, "step": 425 }, { "epoch": 0.05184058411925768, "grad_norm": 0.7502725124359131, "learning_rate": 1.9998209893777076e-05, "loss": 0.4766, "step": 426 }, { "epoch": 0.051962275631274715, "grad_norm": 0.723865807056427, "learning_rate": 1.9998172795245598e-05, "loss": 0.4393, "step": 427 }, { "epoch": 0.052083967143291755, "grad_norm": 3.620739698410034, "learning_rate": 1.999813531626295e-05, "loss": 0.5174, "step": 428 }, { "epoch": 0.052205658655308795, "grad_norm": 4.749644756317139, "learning_rate": 1.9998097456830553e-05, "loss": 0.5757, "step": 429 }, { "epoch": 0.05232735016732583, "grad_norm": 0.6498982906341553, "learning_rate": 1.9998059216949856e-05, "loss": 0.4976, "step": 430 }, { "epoch": 0.05244904167934287, "grad_norm": 1.9424355030059814, "learning_rate": 1.9998020596622312e-05, "loss": 0.4655, "step": 431 }, { "epoch": 0.0525707331913599, "grad_norm": 1.9933793544769287, "learning_rate": 1.999798159584939e-05, "loss": 0.4856, "step": 432 }, { "epoch": 0.05269242470337694, "grad_norm": 3.7161357402801514, "learning_rate": 1.9997942214632574e-05, "loss": 0.4689, "step": 433 }, { "epoch": 0.052814116215393975, "grad_norm": 1.2382320165634155, "learning_rate": 1.9997902452973358e-05, "loss": 0.476, "step": 434 }, { "epoch": 0.052935807727411015, "grad_norm": 0.6176339387893677, "learning_rate": 1.999786231087326e-05, "loss": 0.3707, "step": 435 }, { "epoch": 0.05305749923942805, "grad_norm": 6.290051460266113, "learning_rate": 1.9997821788333812e-05, "loss": 0.5724, "step": 436 }, { "epoch": 0.05317919075144509, "grad_norm": 5.688040733337402, "learning_rate": 1.9997780885356545e-05, "loss": 0.5273, "step": 437 }, { "epoch": 0.05330088226346212, "grad_norm": 5.579360008239746, "learning_rate": 1.9997739601943025e-05, "loss": 0.5466, "step": 438 }, { "epoch": 0.05342257377547916, "grad_norm": 6.472201347351074, "learning_rate": 1.9997697938094815e-05, "loss": 0.6465, "step": 439 }, { "epoch": 0.053544265287496194, "grad_norm": 0.6766566038131714, "learning_rate": 1.999765589381351e-05, "loss": 0.4383, "step": 440 }, { "epoch": 0.053665956799513234, "grad_norm": 1.6292884349822998, "learning_rate": 1.9997613469100702e-05, "loss": 0.4656, "step": 441 }, { "epoch": 0.053787648311530274, "grad_norm": 3.057184934616089, "learning_rate": 1.9997570663958005e-05, "loss": 0.4397, "step": 442 }, { "epoch": 0.05390933982354731, "grad_norm": 0.6015315651893616, "learning_rate": 1.9997527478387048e-05, "loss": 0.4972, "step": 443 }, { "epoch": 0.05403103133556435, "grad_norm": 0.6792243123054504, "learning_rate": 1.9997483912389484e-05, "loss": 0.4833, "step": 444 }, { "epoch": 0.05415272284758138, "grad_norm": 1.7965141534805298, "learning_rate": 1.9997439965966957e-05, "loss": 0.5224, "step": 445 }, { "epoch": 0.05427441435959842, "grad_norm": 2.2070422172546387, "learning_rate": 1.999739563912115e-05, "loss": 0.5126, "step": 446 }, { "epoch": 0.054396105871615454, "grad_norm": 2.6806740760803223, "learning_rate": 1.9997350931853745e-05, "loss": 0.4878, "step": 447 }, { "epoch": 0.054517797383632494, "grad_norm": 1.3019781112670898, "learning_rate": 1.999730584416644e-05, "loss": 0.5178, "step": 448 }, { "epoch": 0.05463948889564953, "grad_norm": 1.6776936054229736, "learning_rate": 1.9997260376060958e-05, "loss": 0.5151, "step": 449 }, { "epoch": 0.05476118040766657, "grad_norm": 0.9040634036064148, "learning_rate": 1.9997214527539025e-05, "loss": 0.5035, "step": 450 }, { "epoch": 0.0548828719196836, "grad_norm": 0.9000717997550964, "learning_rate": 1.999716829860239e-05, "loss": 0.4203, "step": 451 }, { "epoch": 0.05500456343170064, "grad_norm": 4.662450790405273, "learning_rate": 1.99971216892528e-05, "loss": 0.5618, "step": 452 }, { "epoch": 0.05512625494371767, "grad_norm": 5.027231693267822, "learning_rate": 1.999707469949204e-05, "loss": 0.5614, "step": 453 }, { "epoch": 0.05524794645573471, "grad_norm": 1.7503198385238647, "learning_rate": 1.9997027329321896e-05, "loss": 0.4443, "step": 454 }, { "epoch": 0.055369637967751746, "grad_norm": 1.9756243228912354, "learning_rate": 1.999697957874417e-05, "loss": 0.5215, "step": 455 }, { "epoch": 0.055491329479768786, "grad_norm": 2.088327407836914, "learning_rate": 1.9996931447760677e-05, "loss": 0.4871, "step": 456 }, { "epoch": 0.055613020991785826, "grad_norm": 4.555768966674805, "learning_rate": 1.999688293637325e-05, "loss": 0.5058, "step": 457 }, { "epoch": 0.05573471250380286, "grad_norm": 6.4992570877075195, "learning_rate": 1.9996834044583736e-05, "loss": 0.5105, "step": 458 }, { "epoch": 0.0558564040158199, "grad_norm": 3.1522233486175537, "learning_rate": 1.999678477239399e-05, "loss": 0.5332, "step": 459 }, { "epoch": 0.05597809552783693, "grad_norm": 1.4650074243545532, "learning_rate": 1.9996735119805895e-05, "loss": 0.5653, "step": 460 }, { "epoch": 0.05609978703985397, "grad_norm": 0.9694099426269531, "learning_rate": 1.9996685086821338e-05, "loss": 0.4524, "step": 461 }, { "epoch": 0.056221478551871006, "grad_norm": 1.3509925603866577, "learning_rate": 1.9996634673442222e-05, "loss": 0.4799, "step": 462 }, { "epoch": 0.056343170063888046, "grad_norm": 1.8713821172714233, "learning_rate": 1.9996583879670463e-05, "loss": 0.3705, "step": 463 }, { "epoch": 0.05646486157590508, "grad_norm": 3.057955503463745, "learning_rate": 1.999653270550799e-05, "loss": 0.4877, "step": 464 }, { "epoch": 0.05658655308792212, "grad_norm": 3.9810750484466553, "learning_rate": 1.9996481150956764e-05, "loss": 0.4692, "step": 465 }, { "epoch": 0.05670824459993915, "grad_norm": 3.1951699256896973, "learning_rate": 1.9996429216018734e-05, "loss": 0.49, "step": 466 }, { "epoch": 0.05682993611195619, "grad_norm": 0.9979059100151062, "learning_rate": 1.9996376900695884e-05, "loss": 0.4406, "step": 467 }, { "epoch": 0.056951627623973225, "grad_norm": 1.417478084564209, "learning_rate": 1.99963242049902e-05, "loss": 0.465, "step": 468 }, { "epoch": 0.057073319135990265, "grad_norm": 1.4806361198425293, "learning_rate": 1.999627112890369e-05, "loss": 0.5039, "step": 469 }, { "epoch": 0.0571950106480073, "grad_norm": 3.3733386993408203, "learning_rate": 1.999621767243837e-05, "loss": 0.4892, "step": 470 }, { "epoch": 0.05731670216002434, "grad_norm": 0.9595805406570435, "learning_rate": 1.9996163835596277e-05, "loss": 0.5136, "step": 471 }, { "epoch": 0.05743839367204138, "grad_norm": 0.77589350938797, "learning_rate": 1.999610961837946e-05, "loss": 0.4569, "step": 472 }, { "epoch": 0.05756008518405841, "grad_norm": 4.537379264831543, "learning_rate": 1.9996055020789983e-05, "loss": 0.5497, "step": 473 }, { "epoch": 0.05768177669607545, "grad_norm": 3.5111641883850098, "learning_rate": 1.999600004282992e-05, "loss": 0.4577, "step": 474 }, { "epoch": 0.057803468208092484, "grad_norm": 3.577052116394043, "learning_rate": 1.999594468450136e-05, "loss": 0.5132, "step": 475 }, { "epoch": 0.057925159720109524, "grad_norm": 1.4253040552139282, "learning_rate": 1.9995888945806425e-05, "loss": 0.4952, "step": 476 }, { "epoch": 0.05804685123212656, "grad_norm": 1.8965922594070435, "learning_rate": 1.9995832826747213e-05, "loss": 0.4613, "step": 477 }, { "epoch": 0.0581685427441436, "grad_norm": 1.0503103733062744, "learning_rate": 1.999577632732588e-05, "loss": 0.5271, "step": 478 }, { "epoch": 0.05829023425616063, "grad_norm": 1.200190782546997, "learning_rate": 1.9995719447544567e-05, "loss": 0.535, "step": 479 }, { "epoch": 0.05841192576817767, "grad_norm": 1.953628420829773, "learning_rate": 1.9995662187405438e-05, "loss": 0.4963, "step": 480 }, { "epoch": 0.058533617280194704, "grad_norm": 1.271244764328003, "learning_rate": 1.999560454691067e-05, "loss": 0.4463, "step": 481 }, { "epoch": 0.058655308792211744, "grad_norm": 1.5221885442733765, "learning_rate": 1.9995546526062464e-05, "loss": 0.4485, "step": 482 }, { "epoch": 0.05877700030422878, "grad_norm": 4.1907572746276855, "learning_rate": 1.999548812486302e-05, "loss": 0.5146, "step": 483 }, { "epoch": 0.05889869181624582, "grad_norm": 1.2595373392105103, "learning_rate": 1.9995429343314564e-05, "loss": 0.4258, "step": 484 }, { "epoch": 0.05902038332826286, "grad_norm": 2.8437564373016357, "learning_rate": 1.9995370181419332e-05, "loss": 0.4847, "step": 485 }, { "epoch": 0.05914207484027989, "grad_norm": 2.1599769592285156, "learning_rate": 1.9995310639179575e-05, "loss": 0.523, "step": 486 }, { "epoch": 0.05926376635229693, "grad_norm": 1.2120065689086914, "learning_rate": 1.999525071659756e-05, "loss": 0.5032, "step": 487 }, { "epoch": 0.05938545786431396, "grad_norm": 2.0114965438842773, "learning_rate": 1.9995190413675564e-05, "loss": 0.5021, "step": 488 }, { "epoch": 0.059507149376331, "grad_norm": 2.2819175720214844, "learning_rate": 1.9995129730415884e-05, "loss": 0.4898, "step": 489 }, { "epoch": 0.059628840888348036, "grad_norm": 2.7193124294281006, "learning_rate": 1.9995068666820833e-05, "loss": 0.5523, "step": 490 }, { "epoch": 0.059750532400365076, "grad_norm": 1.8687043190002441, "learning_rate": 1.999500722289273e-05, "loss": 0.4748, "step": 491 }, { "epoch": 0.05987222391238211, "grad_norm": 0.7520543336868286, "learning_rate": 1.999494539863391e-05, "loss": 0.4602, "step": 492 }, { "epoch": 0.05999391542439915, "grad_norm": 4.535391807556152, "learning_rate": 1.999488319404673e-05, "loss": 0.5382, "step": 493 }, { "epoch": 0.06011560693641618, "grad_norm": 4.206397533416748, "learning_rate": 1.9994820609133558e-05, "loss": 0.5677, "step": 494 }, { "epoch": 0.06023729844843322, "grad_norm": 1.6907819509506226, "learning_rate": 1.999475764389677e-05, "loss": 0.4298, "step": 495 }, { "epoch": 0.060358989960450256, "grad_norm": 4.499179363250732, "learning_rate": 1.999469429833877e-05, "loss": 0.5314, "step": 496 }, { "epoch": 0.060480681472467296, "grad_norm": 1.034327745437622, "learning_rate": 1.999463057246196e-05, "loss": 0.505, "step": 497 }, { "epoch": 0.06060237298448433, "grad_norm": 2.9028069972991943, "learning_rate": 1.999456646626877e-05, "loss": 0.4879, "step": 498 }, { "epoch": 0.06072406449650137, "grad_norm": 2.4188835620880127, "learning_rate": 1.9994501979761644e-05, "loss": 0.5334, "step": 499 }, { "epoch": 0.06084575600851841, "grad_norm": 6.573634624481201, "learning_rate": 1.9994437112943025e-05, "loss": 0.5274, "step": 500 }, { "epoch": 0.06096744752053544, "grad_norm": 3.9176599979400635, "learning_rate": 1.9994371865815388e-05, "loss": 0.5444, "step": 501 }, { "epoch": 0.06108913903255248, "grad_norm": 3.5634405612945557, "learning_rate": 1.999430623838121e-05, "loss": 0.4507, "step": 502 }, { "epoch": 0.061210830544569515, "grad_norm": 2.5009710788726807, "learning_rate": 1.9994240230642997e-05, "loss": 0.3973, "step": 503 }, { "epoch": 0.061332522056586555, "grad_norm": 5.271429538726807, "learning_rate": 1.9994173842603258e-05, "loss": 0.5523, "step": 504 }, { "epoch": 0.06145421356860359, "grad_norm": 3.050410747528076, "learning_rate": 1.9994107074264516e-05, "loss": 0.4536, "step": 505 }, { "epoch": 0.06157590508062063, "grad_norm": 5.5924177169799805, "learning_rate": 1.9994039925629313e-05, "loss": 0.5737, "step": 506 }, { "epoch": 0.06169759659263766, "grad_norm": 7.040491104125977, "learning_rate": 1.9993972396700202e-05, "loss": 0.6121, "step": 507 }, { "epoch": 0.0618192881046547, "grad_norm": 2.6737732887268066, "learning_rate": 1.9993904487479753e-05, "loss": 0.4417, "step": 508 }, { "epoch": 0.061940979616671735, "grad_norm": 1.7591124773025513, "learning_rate": 1.9993836197970556e-05, "loss": 0.4893, "step": 509 }, { "epoch": 0.062062671128688775, "grad_norm": 0.7876875400543213, "learning_rate": 1.999376752817521e-05, "loss": 0.4941, "step": 510 }, { "epoch": 0.06218436264070581, "grad_norm": 2.649890422821045, "learning_rate": 1.9993698478096315e-05, "loss": 0.5487, "step": 511 }, { "epoch": 0.06230605415272285, "grad_norm": 3.669792413711548, "learning_rate": 1.999362904773651e-05, "loss": 0.5138, "step": 512 }, { "epoch": 0.06242774566473988, "grad_norm": 4.561140060424805, "learning_rate": 1.9993559237098436e-05, "loss": 0.5344, "step": 513 }, { "epoch": 0.06254943717675693, "grad_norm": 4.805019855499268, "learning_rate": 1.9993489046184743e-05, "loss": 0.4836, "step": 514 }, { "epoch": 0.06267112868877396, "grad_norm": 1.5226490497589111, "learning_rate": 1.9993418474998113e-05, "loss": 0.5059, "step": 515 }, { "epoch": 0.062792820200791, "grad_norm": 0.5332703590393066, "learning_rate": 1.999334752354122e-05, "loss": 0.4763, "step": 516 }, { "epoch": 0.06291451171280803, "grad_norm": 2.0380637645721436, "learning_rate": 1.999327619181677e-05, "loss": 0.48, "step": 517 }, { "epoch": 0.06303620322482507, "grad_norm": 3.6975255012512207, "learning_rate": 1.9993204479827476e-05, "loss": 0.4883, "step": 518 }, { "epoch": 0.06315789473684211, "grad_norm": 5.069300174713135, "learning_rate": 1.9993132387576067e-05, "loss": 0.5322, "step": 519 }, { "epoch": 0.06327958624885914, "grad_norm": 5.06345272064209, "learning_rate": 1.9993059915065286e-05, "loss": 0.5796, "step": 520 }, { "epoch": 0.06340127776087617, "grad_norm": 0.6309605240821838, "learning_rate": 1.9992987062297892e-05, "loss": 0.4647, "step": 521 }, { "epoch": 0.06352296927289322, "grad_norm": 1.317283272743225, "learning_rate": 1.999291382927665e-05, "loss": 0.4892, "step": 522 }, { "epoch": 0.06364466078491025, "grad_norm": 2.677121639251709, "learning_rate": 1.9992840216004358e-05, "loss": 0.5097, "step": 523 }, { "epoch": 0.06376635229692729, "grad_norm": 5.121943950653076, "learning_rate": 1.999276622248381e-05, "loss": 0.4887, "step": 524 }, { "epoch": 0.06388804380894432, "grad_norm": 5.404939651489258, "learning_rate": 1.9992691848717826e-05, "loss": 0.4744, "step": 525 }, { "epoch": 0.06400973532096137, "grad_norm": 2.4270565509796143, "learning_rate": 1.9992617094709233e-05, "loss": 0.5326, "step": 526 }, { "epoch": 0.0641314268329784, "grad_norm": 2.0383589267730713, "learning_rate": 1.999254196046087e-05, "loss": 0.4906, "step": 527 }, { "epoch": 0.06425311834499543, "grad_norm": 2.442333459854126, "learning_rate": 1.999246644597561e-05, "loss": 0.5293, "step": 528 }, { "epoch": 0.06437480985701248, "grad_norm": 3.750847816467285, "learning_rate": 1.9992390551256314e-05, "loss": 0.5635, "step": 529 }, { "epoch": 0.06449650136902951, "grad_norm": 1.1939170360565186, "learning_rate": 1.9992314276305874e-05, "loss": 0.4542, "step": 530 }, { "epoch": 0.06461819288104655, "grad_norm": 0.9305385947227478, "learning_rate": 1.9992237621127196e-05, "loss": 0.4437, "step": 531 }, { "epoch": 0.06473988439306358, "grad_norm": 1.619693398475647, "learning_rate": 1.999216058572319e-05, "loss": 0.4907, "step": 532 }, { "epoch": 0.06486157590508063, "grad_norm": 1.302485704421997, "learning_rate": 1.9992083170096794e-05, "loss": 0.4165, "step": 533 }, { "epoch": 0.06498326741709766, "grad_norm": 1.965978741645813, "learning_rate": 1.999200537425095e-05, "loss": 0.5254, "step": 534 }, { "epoch": 0.06510495892911469, "grad_norm": 0.8912264704704285, "learning_rate": 1.9991927198188618e-05, "loss": 0.514, "step": 535 }, { "epoch": 0.06522665044113173, "grad_norm": 1.8724647760391235, "learning_rate": 1.9991848641912774e-05, "loss": 0.4674, "step": 536 }, { "epoch": 0.06534834195314877, "grad_norm": 2.7350761890411377, "learning_rate": 1.999176970542641e-05, "loss": 0.4574, "step": 537 }, { "epoch": 0.0654700334651658, "grad_norm": 0.8851805925369263, "learning_rate": 1.9991690388732527e-05, "loss": 0.5187, "step": 538 }, { "epoch": 0.06559172497718284, "grad_norm": 0.7318903803825378, "learning_rate": 1.9991610691834137e-05, "loss": 0.493, "step": 539 }, { "epoch": 0.06571341648919987, "grad_norm": 3.4889729022979736, "learning_rate": 1.9991530614734285e-05, "loss": 0.4216, "step": 540 }, { "epoch": 0.06583510800121692, "grad_norm": 2.57153582572937, "learning_rate": 1.9991450157436008e-05, "loss": 0.4032, "step": 541 }, { "epoch": 0.06595679951323395, "grad_norm": 1.5060590505599976, "learning_rate": 1.9991369319942374e-05, "loss": 0.4741, "step": 542 }, { "epoch": 0.06607849102525098, "grad_norm": 2.0602190494537354, "learning_rate": 1.9991288102256453e-05, "loss": 0.4927, "step": 543 }, { "epoch": 0.06620018253726803, "grad_norm": 2.9015443325042725, "learning_rate": 1.9991206504381343e-05, "loss": 0.5037, "step": 544 }, { "epoch": 0.06632187404928507, "grad_norm": 0.7738814353942871, "learning_rate": 1.9991124526320142e-05, "loss": 0.4696, "step": 545 }, { "epoch": 0.0664435655613021, "grad_norm": 3.829948902130127, "learning_rate": 1.9991042168075972e-05, "loss": 0.3952, "step": 546 }, { "epoch": 0.06656525707331913, "grad_norm": 0.9256926774978638, "learning_rate": 1.999095942965197e-05, "loss": 0.5364, "step": 547 }, { "epoch": 0.06668694858533618, "grad_norm": 2.401303768157959, "learning_rate": 1.999087631105128e-05, "loss": 0.433, "step": 548 }, { "epoch": 0.06680864009735321, "grad_norm": 1.4442120790481567, "learning_rate": 1.9990792812277068e-05, "loss": 0.4271, "step": 549 }, { "epoch": 0.06693033160937024, "grad_norm": 3.9047372341156006, "learning_rate": 1.9990708933332506e-05, "loss": 0.542, "step": 550 }, { "epoch": 0.06705202312138728, "grad_norm": 4.522757530212402, "learning_rate": 1.9990624674220794e-05, "loss": 0.5684, "step": 551 }, { "epoch": 0.06717371463340432, "grad_norm": 5.236936092376709, "learning_rate": 1.999054003494513e-05, "loss": 0.5813, "step": 552 }, { "epoch": 0.06729540614542136, "grad_norm": 3.983177423477173, "learning_rate": 1.9990455015508738e-05, "loss": 0.5217, "step": 553 }, { "epoch": 0.06741709765743839, "grad_norm": 0.9311882257461548, "learning_rate": 1.9990369615914854e-05, "loss": 0.5054, "step": 554 }, { "epoch": 0.06753878916945542, "grad_norm": 2.2126944065093994, "learning_rate": 1.9990283836166732e-05, "loss": 0.4941, "step": 555 }, { "epoch": 0.06766048068147247, "grad_norm": 3.741231679916382, "learning_rate": 1.9990197676267624e-05, "loss": 0.5134, "step": 556 }, { "epoch": 0.0677821721934895, "grad_norm": 4.3026628494262695, "learning_rate": 1.999011113622082e-05, "loss": 0.498, "step": 557 }, { "epoch": 0.06790386370550654, "grad_norm": 1.644929051399231, "learning_rate": 1.999002421602961e-05, "loss": 0.5626, "step": 558 }, { "epoch": 0.06802555521752358, "grad_norm": 0.7731117010116577, "learning_rate": 1.9989936915697295e-05, "loss": 0.5289, "step": 559 }, { "epoch": 0.06814724672954062, "grad_norm": 1.6183112859725952, "learning_rate": 1.998984923522721e-05, "loss": 0.4713, "step": 560 }, { "epoch": 0.06826893824155765, "grad_norm": 1.2894877195358276, "learning_rate": 1.998976117462268e-05, "loss": 0.428, "step": 561 }, { "epoch": 0.06839062975357468, "grad_norm": 1.8582295179367065, "learning_rate": 1.9989672733887058e-05, "loss": 0.4538, "step": 562 }, { "epoch": 0.06851232126559173, "grad_norm": 3.921905517578125, "learning_rate": 1.998958391302371e-05, "loss": 0.4928, "step": 563 }, { "epoch": 0.06863401277760876, "grad_norm": 3.756831169128418, "learning_rate": 1.998949471203602e-05, "loss": 0.4769, "step": 564 }, { "epoch": 0.0687557042896258, "grad_norm": 1.5508170127868652, "learning_rate": 1.998940513092738e-05, "loss": 0.4209, "step": 565 }, { "epoch": 0.06887739580164283, "grad_norm": 2.5746004581451416, "learning_rate": 1.9989315169701197e-05, "loss": 0.4748, "step": 566 }, { "epoch": 0.06899908731365988, "grad_norm": 3.5863168239593506, "learning_rate": 1.9989224828360893e-05, "loss": 0.4375, "step": 567 }, { "epoch": 0.06912077882567691, "grad_norm": 4.102794647216797, "learning_rate": 1.9989134106909908e-05, "loss": 0.4298, "step": 568 }, { "epoch": 0.06924247033769394, "grad_norm": 4.051459789276123, "learning_rate": 1.9989043005351695e-05, "loss": 0.4173, "step": 569 }, { "epoch": 0.06936416184971098, "grad_norm": 2.05891489982605, "learning_rate": 1.9988951523689718e-05, "loss": 0.4662, "step": 570 }, { "epoch": 0.06948585336172802, "grad_norm": 1.4780614376068115, "learning_rate": 1.9988859661927465e-05, "loss": 0.4957, "step": 571 }, { "epoch": 0.06960754487374506, "grad_norm": 2.591366767883301, "learning_rate": 1.998876742006842e-05, "loss": 0.5035, "step": 572 }, { "epoch": 0.06972923638576209, "grad_norm": 0.8730366826057434, "learning_rate": 1.99886747981161e-05, "loss": 0.3829, "step": 573 }, { "epoch": 0.06985092789777914, "grad_norm": 2.2657363414764404, "learning_rate": 1.998858179607403e-05, "loss": 0.4586, "step": 574 }, { "epoch": 0.06997261940979617, "grad_norm": 3.80833101272583, "learning_rate": 1.9988488413945747e-05, "loss": 0.4622, "step": 575 }, { "epoch": 0.0700943109218132, "grad_norm": 1.078434944152832, "learning_rate": 1.9988394651734804e-05, "loss": 0.4044, "step": 576 }, { "epoch": 0.07021600243383024, "grad_norm": 2.571518659591675, "learning_rate": 1.998830050944477e-05, "loss": 0.5679, "step": 577 }, { "epoch": 0.07033769394584728, "grad_norm": 3.9330830574035645, "learning_rate": 1.9988205987079227e-05, "loss": 0.4345, "step": 578 }, { "epoch": 0.07045938545786432, "grad_norm": 5.81560754776001, "learning_rate": 1.9988111084641772e-05, "loss": 0.4732, "step": 579 }, { "epoch": 0.07058107696988135, "grad_norm": 6.2755351066589355, "learning_rate": 1.9988015802136017e-05, "loss": 0.4984, "step": 580 }, { "epoch": 0.07070276848189838, "grad_norm": 2.3427951335906982, "learning_rate": 1.9987920139565585e-05, "loss": 0.5199, "step": 581 }, { "epoch": 0.07082445999391543, "grad_norm": 1.0140855312347412, "learning_rate": 1.998782409693412e-05, "loss": 0.5637, "step": 582 }, { "epoch": 0.07094615150593246, "grad_norm": 0.869575560092926, "learning_rate": 1.998772767424527e-05, "loss": 0.5076, "step": 583 }, { "epoch": 0.0710678430179495, "grad_norm": 3.894951105117798, "learning_rate": 1.9987630871502713e-05, "loss": 0.5494, "step": 584 }, { "epoch": 0.07118953452996654, "grad_norm": 0.6733858585357666, "learning_rate": 1.9987533688710124e-05, "loss": 0.506, "step": 585 }, { "epoch": 0.07131122604198357, "grad_norm": 3.8077239990234375, "learning_rate": 1.998743612587121e-05, "loss": 0.5401, "step": 586 }, { "epoch": 0.07143291755400061, "grad_norm": 1.1248000860214233, "learning_rate": 1.9987338182989676e-05, "loss": 0.5047, "step": 587 }, { "epoch": 0.07155460906601764, "grad_norm": 2.5931150913238525, "learning_rate": 1.9987239860069253e-05, "loss": 0.4814, "step": 588 }, { "epoch": 0.07167630057803469, "grad_norm": 2.2741830348968506, "learning_rate": 1.9987141157113677e-05, "loss": 0.4806, "step": 589 }, { "epoch": 0.07179799209005172, "grad_norm": 1.1695295572280884, "learning_rate": 1.998704207412671e-05, "loss": 0.4837, "step": 590 }, { "epoch": 0.07191968360206875, "grad_norm": 0.9373350739479065, "learning_rate": 1.998694261111212e-05, "loss": 0.5446, "step": 591 }, { "epoch": 0.07204137511408579, "grad_norm": 1.958163857460022, "learning_rate": 1.9986842768073693e-05, "loss": 0.426, "step": 592 }, { "epoch": 0.07216306662610283, "grad_norm": 2.700521945953369, "learning_rate": 1.9986742545015226e-05, "loss": 0.4952, "step": 593 }, { "epoch": 0.07228475813811987, "grad_norm": 1.9372899532318115, "learning_rate": 1.9986641941940534e-05, "loss": 0.4693, "step": 594 }, { "epoch": 0.0724064496501369, "grad_norm": 2.725580930709839, "learning_rate": 1.9986540958853445e-05, "loss": 0.5572, "step": 595 }, { "epoch": 0.07252814116215393, "grad_norm": 1.3791035413742065, "learning_rate": 1.9986439595757803e-05, "loss": 0.5472, "step": 596 }, { "epoch": 0.07264983267417098, "grad_norm": 5.115930557250977, "learning_rate": 1.998633785265746e-05, "loss": 0.4827, "step": 597 }, { "epoch": 0.07277152418618801, "grad_norm": 1.7975986003875732, "learning_rate": 1.9986235729556292e-05, "loss": 0.5776, "step": 598 }, { "epoch": 0.07289321569820505, "grad_norm": 4.306365013122559, "learning_rate": 1.9986133226458187e-05, "loss": 0.5265, "step": 599 }, { "epoch": 0.0730149072102221, "grad_norm": 4.6414713859558105, "learning_rate": 1.998603034336704e-05, "loss": 0.4969, "step": 600 }, { "epoch": 0.07313659872223913, "grad_norm": 2.895811080932617, "learning_rate": 1.998592708028677e-05, "loss": 0.4529, "step": 601 }, { "epoch": 0.07325829023425616, "grad_norm": 0.918133020401001, "learning_rate": 1.9985823437221305e-05, "loss": 0.4501, "step": 602 }, { "epoch": 0.07337998174627319, "grad_norm": 4.656303405761719, "learning_rate": 1.9985719414174584e-05, "loss": 0.5325, "step": 603 }, { "epoch": 0.07350167325829024, "grad_norm": 5.414013862609863, "learning_rate": 1.9985615011150577e-05, "loss": 0.5515, "step": 604 }, { "epoch": 0.07362336477030727, "grad_norm": 3.049126148223877, "learning_rate": 1.9985510228153245e-05, "loss": 0.4249, "step": 605 }, { "epoch": 0.0737450562823243, "grad_norm": 2.161055564880371, "learning_rate": 1.9985405065186582e-05, "loss": 0.4104, "step": 606 }, { "epoch": 0.07386674779434134, "grad_norm": 5.049049377441406, "learning_rate": 1.9985299522254586e-05, "loss": 0.5592, "step": 607 }, { "epoch": 0.07398843930635839, "grad_norm": 2.2185564041137695, "learning_rate": 1.9985193599361276e-05, "loss": 0.5075, "step": 608 }, { "epoch": 0.07411013081837542, "grad_norm": 0.7173896431922913, "learning_rate": 1.998508729651068e-05, "loss": 0.5072, "step": 609 }, { "epoch": 0.07423182233039245, "grad_norm": 5.2295403480529785, "learning_rate": 1.9984980613706847e-05, "loss": 0.467, "step": 610 }, { "epoch": 0.07435351384240949, "grad_norm": 4.912859916687012, "learning_rate": 1.9984873550953833e-05, "loss": 0.5044, "step": 611 }, { "epoch": 0.07447520535442653, "grad_norm": 6.189967155456543, "learning_rate": 1.9984766108255712e-05, "loss": 0.5105, "step": 612 }, { "epoch": 0.07459689686644357, "grad_norm": 6.655056476593018, "learning_rate": 1.9984658285616573e-05, "loss": 0.5368, "step": 613 }, { "epoch": 0.0747185883784606, "grad_norm": 2.5998427867889404, "learning_rate": 1.9984550083040516e-05, "loss": 0.4911, "step": 614 }, { "epoch": 0.07484027989047765, "grad_norm": 3.477908134460449, "learning_rate": 1.9984441500531667e-05, "loss": 0.438, "step": 615 }, { "epoch": 0.07496197140249468, "grad_norm": 0.9623250961303711, "learning_rate": 1.9984332538094148e-05, "loss": 0.4336, "step": 616 }, { "epoch": 0.07508366291451171, "grad_norm": 3.307692050933838, "learning_rate": 1.9984223195732113e-05, "loss": 0.4819, "step": 617 }, { "epoch": 0.07520535442652874, "grad_norm": 3.5812041759490967, "learning_rate": 1.998411347344972e-05, "loss": 0.4711, "step": 618 }, { "epoch": 0.07532704593854579, "grad_norm": 4.81799840927124, "learning_rate": 1.998400337125114e-05, "loss": 0.5442, "step": 619 }, { "epoch": 0.07544873745056282, "grad_norm": 4.450498104095459, "learning_rate": 1.998389288914057e-05, "loss": 0.5491, "step": 620 }, { "epoch": 0.07557042896257986, "grad_norm": 0.8603129982948303, "learning_rate": 1.9983782027122206e-05, "loss": 0.509, "step": 621 }, { "epoch": 0.07569212047459689, "grad_norm": 2.767587900161743, "learning_rate": 1.998367078520027e-05, "loss": 0.484, "step": 622 }, { "epoch": 0.07581381198661394, "grad_norm": 1.7696137428283691, "learning_rate": 1.9983559163379e-05, "loss": 0.5173, "step": 623 }, { "epoch": 0.07593550349863097, "grad_norm": 2.1982898712158203, "learning_rate": 1.9983447161662636e-05, "loss": 0.5144, "step": 624 }, { "epoch": 0.076057195010648, "grad_norm": 0.9459839463233948, "learning_rate": 1.9983334780055442e-05, "loss": 0.4982, "step": 625 }, { "epoch": 0.07617888652266504, "grad_norm": 1.8358972072601318, "learning_rate": 1.9983222018561696e-05, "loss": 0.4644, "step": 626 }, { "epoch": 0.07630057803468208, "grad_norm": 2.825239896774292, "learning_rate": 1.9983108877185687e-05, "loss": 0.4953, "step": 627 }, { "epoch": 0.07642226954669912, "grad_norm": 2.4302449226379395, "learning_rate": 1.9982995355931726e-05, "loss": 0.4941, "step": 628 }, { "epoch": 0.07654396105871615, "grad_norm": 2.003037691116333, "learning_rate": 1.9982881454804125e-05, "loss": 0.4765, "step": 629 }, { "epoch": 0.0766656525707332, "grad_norm": 1.2302556037902832, "learning_rate": 1.9982767173807218e-05, "loss": 0.3842, "step": 630 }, { "epoch": 0.07678734408275023, "grad_norm": 1.5501840114593506, "learning_rate": 1.998265251294536e-05, "loss": 0.4991, "step": 631 }, { "epoch": 0.07690903559476726, "grad_norm": 1.1216791868209839, "learning_rate": 1.9982537472222913e-05, "loss": 0.4981, "step": 632 }, { "epoch": 0.0770307271067843, "grad_norm": 0.7789998650550842, "learning_rate": 1.998242205164425e-05, "loss": 0.5166, "step": 633 }, { "epoch": 0.07715241861880134, "grad_norm": 1.4345245361328125, "learning_rate": 1.9982306251213767e-05, "loss": 0.5435, "step": 634 }, { "epoch": 0.07727411013081838, "grad_norm": 4.756383419036865, "learning_rate": 1.9982190070935864e-05, "loss": 0.461, "step": 635 }, { "epoch": 0.07739580164283541, "grad_norm": 2.975379228591919, "learning_rate": 1.998207351081497e-05, "loss": 0.4201, "step": 636 }, { "epoch": 0.07751749315485244, "grad_norm": 1.7346969842910767, "learning_rate": 1.998195657085552e-05, "loss": 0.436, "step": 637 }, { "epoch": 0.07763918466686949, "grad_norm": 2.7414207458496094, "learning_rate": 1.9981839251061957e-05, "loss": 0.5508, "step": 638 }, { "epoch": 0.07776087617888652, "grad_norm": 2.9115066528320312, "learning_rate": 1.9981721551438757e-05, "loss": 0.4698, "step": 639 }, { "epoch": 0.07788256769090356, "grad_norm": 0.6065772175788879, "learning_rate": 1.998160347199038e-05, "loss": 0.4355, "step": 640 }, { "epoch": 0.07800425920292059, "grad_norm": 3.2246828079223633, "learning_rate": 1.9981485012721338e-05, "loss": 0.5056, "step": 641 }, { "epoch": 0.07812595071493764, "grad_norm": 1.8916140794754028, "learning_rate": 1.9981366173636125e-05, "loss": 0.4789, "step": 642 }, { "epoch": 0.07824764222695467, "grad_norm": 1.258357048034668, "learning_rate": 1.9981246954739272e-05, "loss": 0.5108, "step": 643 }, { "epoch": 0.0783693337389717, "grad_norm": 5.186991214752197, "learning_rate": 1.998112735603531e-05, "loss": 0.4405, "step": 644 }, { "epoch": 0.07849102525098875, "grad_norm": 4.149471282958984, "learning_rate": 1.9981007377528795e-05, "loss": 0.4755, "step": 645 }, { "epoch": 0.07861271676300578, "grad_norm": 1.185378074645996, "learning_rate": 1.998088701922429e-05, "loss": 0.5405, "step": 646 }, { "epoch": 0.07873440827502282, "grad_norm": 4.87866735458374, "learning_rate": 1.9980766281126373e-05, "loss": 0.4716, "step": 647 }, { "epoch": 0.07885609978703985, "grad_norm": 2.766418933868408, "learning_rate": 1.998064516323964e-05, "loss": 0.4536, "step": 648 }, { "epoch": 0.0789777912990569, "grad_norm": 3.7572224140167236, "learning_rate": 1.9980523665568704e-05, "loss": 0.5514, "step": 649 }, { "epoch": 0.07909948281107393, "grad_norm": 3.2041800022125244, "learning_rate": 1.998040178811818e-05, "loss": 0.5101, "step": 650 }, { "epoch": 0.07922117432309096, "grad_norm": 0.7075926661491394, "learning_rate": 1.9980279530892708e-05, "loss": 0.4541, "step": 651 }, { "epoch": 0.079342865835108, "grad_norm": 2.305121898651123, "learning_rate": 1.998015689389694e-05, "loss": 0.4913, "step": 652 }, { "epoch": 0.07946455734712504, "grad_norm": 3.071054697036743, "learning_rate": 1.9980033877135548e-05, "loss": 0.5009, "step": 653 }, { "epoch": 0.07958624885914208, "grad_norm": 1.4548903703689575, "learning_rate": 1.9979910480613212e-05, "loss": 0.4932, "step": 654 }, { "epoch": 0.07970794037115911, "grad_norm": 1.6935406923294067, "learning_rate": 1.9979786704334624e-05, "loss": 0.4554, "step": 655 }, { "epoch": 0.07982963188317616, "grad_norm": 0.7648898363113403, "learning_rate": 1.9979662548304492e-05, "loss": 0.5108, "step": 656 }, { "epoch": 0.07995132339519319, "grad_norm": 4.780690670013428, "learning_rate": 1.9979538012527544e-05, "loss": 0.4358, "step": 657 }, { "epoch": 0.08007301490721022, "grad_norm": 2.169375419616699, "learning_rate": 1.9979413097008518e-05, "loss": 0.4821, "step": 658 }, { "epoch": 0.08019470641922725, "grad_norm": 3.0978429317474365, "learning_rate": 1.997928780175217e-05, "loss": 0.487, "step": 659 }, { "epoch": 0.0803163979312443, "grad_norm": 1.2815518379211426, "learning_rate": 1.997916212676326e-05, "loss": 0.5157, "step": 660 }, { "epoch": 0.08043808944326133, "grad_norm": 0.915164053440094, "learning_rate": 1.997903607204658e-05, "loss": 0.472, "step": 661 }, { "epoch": 0.08055978095527837, "grad_norm": 2.605832099914551, "learning_rate": 1.997890963760692e-05, "loss": 0.521, "step": 662 }, { "epoch": 0.0806814724672954, "grad_norm": 1.9893995523452759, "learning_rate": 1.9978782823449094e-05, "loss": 0.481, "step": 663 }, { "epoch": 0.08080316397931245, "grad_norm": 2.0475409030914307, "learning_rate": 1.997865562957793e-05, "loss": 0.4323, "step": 664 }, { "epoch": 0.08092485549132948, "grad_norm": 1.4584271907806396, "learning_rate": 1.9978528055998258e-05, "loss": 0.4443, "step": 665 }, { "epoch": 0.08104654700334651, "grad_norm": 1.6210416555404663, "learning_rate": 1.997840010271494e-05, "loss": 0.4888, "step": 666 }, { "epoch": 0.08116823851536355, "grad_norm": 1.6450836658477783, "learning_rate": 1.9978271769732848e-05, "loss": 0.4643, "step": 667 }, { "epoch": 0.0812899300273806, "grad_norm": 1.25798499584198, "learning_rate": 1.997814305705686e-05, "loss": 0.4872, "step": 668 }, { "epoch": 0.08141162153939763, "grad_norm": 1.6350147724151611, "learning_rate": 1.9978013964691875e-05, "loss": 0.4669, "step": 669 }, { "epoch": 0.08153331305141466, "grad_norm": 1.0698541402816772, "learning_rate": 1.9977884492642802e-05, "loss": 0.4882, "step": 670 }, { "epoch": 0.08165500456343171, "grad_norm": 3.3107619285583496, "learning_rate": 1.9977754640914572e-05, "loss": 0.4375, "step": 671 }, { "epoch": 0.08177669607544874, "grad_norm": 4.443649768829346, "learning_rate": 1.9977624409512128e-05, "loss": 0.4511, "step": 672 }, { "epoch": 0.08189838758746577, "grad_norm": 4.450760364532471, "learning_rate": 1.9977493798440423e-05, "loss": 0.4579, "step": 673 }, { "epoch": 0.0820200790994828, "grad_norm": 0.8068259358406067, "learning_rate": 1.9977362807704424e-05, "loss": 0.4998, "step": 674 }, { "epoch": 0.08214177061149985, "grad_norm": 1.5836198329925537, "learning_rate": 1.9977231437309118e-05, "loss": 0.526, "step": 675 }, { "epoch": 0.08226346212351689, "grad_norm": 3.4682936668395996, "learning_rate": 1.9977099687259507e-05, "loss": 0.5666, "step": 676 }, { "epoch": 0.08238515363553392, "grad_norm": 4.291449069976807, "learning_rate": 1.9976967557560598e-05, "loss": 0.5683, "step": 677 }, { "epoch": 0.08250684514755095, "grad_norm": 1.4395569562911987, "learning_rate": 1.9976835048217422e-05, "loss": 0.4415, "step": 678 }, { "epoch": 0.082628536659568, "grad_norm": 0.5459081530570984, "learning_rate": 1.9976702159235025e-05, "loss": 0.4887, "step": 679 }, { "epoch": 0.08275022817158503, "grad_norm": 0.7991795539855957, "learning_rate": 1.9976568890618458e-05, "loss": 0.5233, "step": 680 }, { "epoch": 0.08287191968360207, "grad_norm": 2.7139601707458496, "learning_rate": 1.9976435242372796e-05, "loss": 0.4678, "step": 681 }, { "epoch": 0.0829936111956191, "grad_norm": 1.2524338960647583, "learning_rate": 1.997630121450312e-05, "loss": 0.4884, "step": 682 }, { "epoch": 0.08311530270763615, "grad_norm": 0.6839807033538818, "learning_rate": 1.9976166807014534e-05, "loss": 0.4555, "step": 683 }, { "epoch": 0.08323699421965318, "grad_norm": 2.2753326892852783, "learning_rate": 1.9976032019912153e-05, "loss": 0.5509, "step": 684 }, { "epoch": 0.08335868573167021, "grad_norm": 0.8514805436134338, "learning_rate": 1.9975896853201104e-05, "loss": 0.4656, "step": 685 }, { "epoch": 0.08348037724368726, "grad_norm": 0.7678331732749939, "learning_rate": 1.997576130688653e-05, "loss": 0.4715, "step": 686 }, { "epoch": 0.08360206875570429, "grad_norm": 0.6587056517601013, "learning_rate": 1.997562538097359e-05, "loss": 0.4708, "step": 687 }, { "epoch": 0.08372376026772133, "grad_norm": 1.8988431692123413, "learning_rate": 1.9975489075467456e-05, "loss": 0.5027, "step": 688 }, { "epoch": 0.08384545177973836, "grad_norm": 1.1294777393341064, "learning_rate": 1.9975352390373315e-05, "loss": 0.4647, "step": 689 }, { "epoch": 0.0839671432917554, "grad_norm": 2.2918758392333984, "learning_rate": 1.9975215325696368e-05, "loss": 0.4432, "step": 690 }, { "epoch": 0.08408883480377244, "grad_norm": 0.8133344650268555, "learning_rate": 1.997507788144183e-05, "loss": 0.4883, "step": 691 }, { "epoch": 0.08421052631578947, "grad_norm": 0.8164210319519043, "learning_rate": 1.9974940057614927e-05, "loss": 0.4511, "step": 692 }, { "epoch": 0.0843322178278065, "grad_norm": 2.617898464202881, "learning_rate": 1.9974801854220913e-05, "loss": 0.56, "step": 693 }, { "epoch": 0.08445390933982355, "grad_norm": 0.9751055240631104, "learning_rate": 1.997466327126504e-05, "loss": 0.4568, "step": 694 }, { "epoch": 0.08457560085184058, "grad_norm": 0.8269926905632019, "learning_rate": 1.997452430875258e-05, "loss": 0.4883, "step": 695 }, { "epoch": 0.08469729236385762, "grad_norm": 2.613527774810791, "learning_rate": 1.9974384966688832e-05, "loss": 0.5167, "step": 696 }, { "epoch": 0.08481898387587465, "grad_norm": 1.7332885265350342, "learning_rate": 1.9974245245079086e-05, "loss": 0.5151, "step": 697 }, { "epoch": 0.0849406753878917, "grad_norm": 0.8025236129760742, "learning_rate": 1.997410514392866e-05, "loss": 0.4848, "step": 698 }, { "epoch": 0.08506236689990873, "grad_norm": 2.6296651363372803, "learning_rate": 1.9973964663242892e-05, "loss": 0.4481, "step": 699 }, { "epoch": 0.08518405841192576, "grad_norm": 2.230381727218628, "learning_rate": 1.997382380302712e-05, "loss": 0.4668, "step": 700 }, { "epoch": 0.08530574992394281, "grad_norm": 2.0779402256011963, "learning_rate": 1.997368256328671e-05, "loss": 0.5133, "step": 701 }, { "epoch": 0.08542744143595984, "grad_norm": 2.086312770843506, "learning_rate": 1.9973540944027033e-05, "loss": 0.4751, "step": 702 }, { "epoch": 0.08554913294797688, "grad_norm": 2.4139175415039062, "learning_rate": 1.997339894525348e-05, "loss": 0.5354, "step": 703 }, { "epoch": 0.08567082445999391, "grad_norm": 1.2465251684188843, "learning_rate": 1.9973256566971455e-05, "loss": 0.4577, "step": 704 }, { "epoch": 0.08579251597201096, "grad_norm": 1.7190598249435425, "learning_rate": 1.997311380918637e-05, "loss": 0.4723, "step": 705 }, { "epoch": 0.08591420748402799, "grad_norm": 3.6874632835388184, "learning_rate": 1.9972970671903666e-05, "loss": 0.5263, "step": 706 }, { "epoch": 0.08603589899604502, "grad_norm": 3.3435325622558594, "learning_rate": 1.9972827155128782e-05, "loss": 0.5053, "step": 707 }, { "epoch": 0.08615759050806206, "grad_norm": 2.0431723594665527, "learning_rate": 1.9972683258867183e-05, "loss": 0.4962, "step": 708 }, { "epoch": 0.0862792820200791, "grad_norm": 0.8176119923591614, "learning_rate": 1.997253898312434e-05, "loss": 0.5145, "step": 709 }, { "epoch": 0.08640097353209614, "grad_norm": 2.744959831237793, "learning_rate": 1.997239432790575e-05, "loss": 0.4921, "step": 710 }, { "epoch": 0.08652266504411317, "grad_norm": 1.4425395727157593, "learning_rate": 1.9972249293216913e-05, "loss": 0.5073, "step": 711 }, { "epoch": 0.0866443565561302, "grad_norm": 1.0155556201934814, "learning_rate": 1.9972103879063352e-05, "loss": 0.5211, "step": 712 }, { "epoch": 0.08676604806814725, "grad_norm": 1.4708799123764038, "learning_rate": 1.9971958085450594e-05, "loss": 0.5522, "step": 713 }, { "epoch": 0.08688773958016428, "grad_norm": 2.139232873916626, "learning_rate": 1.9971811912384193e-05, "loss": 0.4502, "step": 714 }, { "epoch": 0.08700943109218132, "grad_norm": 2.236619710922241, "learning_rate": 1.9971665359869705e-05, "loss": 0.4659, "step": 715 }, { "epoch": 0.08713112260419836, "grad_norm": 0.6716448068618774, "learning_rate": 1.9971518427912713e-05, "loss": 0.4488, "step": 716 }, { "epoch": 0.0872528141162154, "grad_norm": 5.232616424560547, "learning_rate": 1.99713711165188e-05, "loss": 0.6131, "step": 717 }, { "epoch": 0.08737450562823243, "grad_norm": 4.66196346282959, "learning_rate": 1.997122342569358e-05, "loss": 0.591, "step": 718 }, { "epoch": 0.08749619714024946, "grad_norm": 2.9173707962036133, "learning_rate": 1.997107535544267e-05, "loss": 0.5139, "step": 719 }, { "epoch": 0.08761788865226651, "grad_norm": 2.5711612701416016, "learning_rate": 1.9970926905771704e-05, "loss": 0.502, "step": 720 }, { "epoch": 0.08773958016428354, "grad_norm": 3.4926297664642334, "learning_rate": 1.997077807668633e-05, "loss": 0.4308, "step": 721 }, { "epoch": 0.08786127167630058, "grad_norm": 2.6729512214660645, "learning_rate": 1.997062886819221e-05, "loss": 0.4532, "step": 722 }, { "epoch": 0.08798296318831761, "grad_norm": 0.8609975576400757, "learning_rate": 1.9970479280295024e-05, "loss": 0.5278, "step": 723 }, { "epoch": 0.08810465470033466, "grad_norm": 4.873531341552734, "learning_rate": 1.9970329313000463e-05, "loss": 0.4265, "step": 724 }, { "epoch": 0.08822634621235169, "grad_norm": 2.0352935791015625, "learning_rate": 1.9970178966314238e-05, "loss": 0.5317, "step": 725 }, { "epoch": 0.08834803772436872, "grad_norm": 0.8737489581108093, "learning_rate": 1.9970028240242064e-05, "loss": 0.5306, "step": 726 }, { "epoch": 0.08846972923638577, "grad_norm": 1.6529440879821777, "learning_rate": 1.9969877134789678e-05, "loss": 0.4342, "step": 727 }, { "epoch": 0.0885914207484028, "grad_norm": 1.2789009809494019, "learning_rate": 1.996972564996283e-05, "loss": 0.5364, "step": 728 }, { "epoch": 0.08871311226041984, "grad_norm": 1.3129600286483765, "learning_rate": 1.9969573785767283e-05, "loss": 0.4785, "step": 729 }, { "epoch": 0.08883480377243687, "grad_norm": 0.9026879072189331, "learning_rate": 1.9969421542208822e-05, "loss": 0.467, "step": 730 }, { "epoch": 0.08895649528445392, "grad_norm": 1.4935601949691772, "learning_rate": 1.9969268919293234e-05, "loss": 0.4896, "step": 731 }, { "epoch": 0.08907818679647095, "grad_norm": 0.9021433591842651, "learning_rate": 1.996911591702633e-05, "loss": 0.4782, "step": 732 }, { "epoch": 0.08919987830848798, "grad_norm": 0.6788372993469238, "learning_rate": 1.9968962535413928e-05, "loss": 0.4376, "step": 733 }, { "epoch": 0.08932156982050501, "grad_norm": 0.7806774377822876, "learning_rate": 1.9968808774461864e-05, "loss": 0.4691, "step": 734 }, { "epoch": 0.08944326133252206, "grad_norm": 1.081540822982788, "learning_rate": 1.9968654634175995e-05, "loss": 0.5036, "step": 735 }, { "epoch": 0.0895649528445391, "grad_norm": 2.1174347400665283, "learning_rate": 1.9968500114562186e-05, "loss": 0.4296, "step": 736 }, { "epoch": 0.08968664435655613, "grad_norm": 0.7901777625083923, "learning_rate": 1.9968345215626314e-05, "loss": 0.5028, "step": 737 }, { "epoch": 0.08980833586857316, "grad_norm": 1.5048437118530273, "learning_rate": 1.996818993737427e-05, "loss": 0.5052, "step": 738 }, { "epoch": 0.08993002738059021, "grad_norm": 3.451389789581299, "learning_rate": 1.9968034279811966e-05, "loss": 0.4719, "step": 739 }, { "epoch": 0.09005171889260724, "grad_norm": 0.8119122385978699, "learning_rate": 1.9967878242945328e-05, "loss": 0.5408, "step": 740 }, { "epoch": 0.09017341040462427, "grad_norm": 1.7198354005813599, "learning_rate": 1.9967721826780284e-05, "loss": 0.4764, "step": 741 }, { "epoch": 0.09029510191664132, "grad_norm": 0.8779088258743286, "learning_rate": 1.9967565031322797e-05, "loss": 0.5024, "step": 742 }, { "epoch": 0.09041679342865835, "grad_norm": 3.365600109100342, "learning_rate": 1.996740785657883e-05, "loss": 0.5576, "step": 743 }, { "epoch": 0.09053848494067539, "grad_norm": 0.9623547196388245, "learning_rate": 1.996725030255436e-05, "loss": 0.4835, "step": 744 }, { "epoch": 0.09066017645269242, "grad_norm": 0.7516545653343201, "learning_rate": 1.9967092369255386e-05, "loss": 0.4944, "step": 745 }, { "epoch": 0.09078186796470947, "grad_norm": 1.2452806234359741, "learning_rate": 1.9966934056687917e-05, "loss": 0.5219, "step": 746 }, { "epoch": 0.0909035594767265, "grad_norm": 3.2794156074523926, "learning_rate": 1.9966775364857977e-05, "loss": 0.413, "step": 747 }, { "epoch": 0.09102525098874353, "grad_norm": 1.883211612701416, "learning_rate": 1.9966616293771602e-05, "loss": 0.523, "step": 748 }, { "epoch": 0.09114694250076057, "grad_norm": 1.2692965269088745, "learning_rate": 1.996645684343485e-05, "loss": 0.4636, "step": 749 }, { "epoch": 0.09126863401277761, "grad_norm": 2.2186050415039062, "learning_rate": 1.9966297013853784e-05, "loss": 0.4492, "step": 750 }, { "epoch": 0.09139032552479465, "grad_norm": 0.8209418058395386, "learning_rate": 1.996613680503449e-05, "loss": 0.4841, "step": 751 }, { "epoch": 0.09151201703681168, "grad_norm": 0.8032373785972595, "learning_rate": 1.9965976216983057e-05, "loss": 0.4351, "step": 752 }, { "epoch": 0.09163370854882871, "grad_norm": 1.9146921634674072, "learning_rate": 1.9965815249705603e-05, "loss": 0.4913, "step": 753 }, { "epoch": 0.09175540006084576, "grad_norm": 2.451493978500366, "learning_rate": 1.996565390320825e-05, "loss": 0.393, "step": 754 }, { "epoch": 0.09187709157286279, "grad_norm": 1.9551799297332764, "learning_rate": 1.996549217749714e-05, "loss": 0.5184, "step": 755 }, { "epoch": 0.09199878308487983, "grad_norm": 2.2005109786987305, "learning_rate": 1.9965330072578423e-05, "loss": 0.5057, "step": 756 }, { "epoch": 0.09212047459689687, "grad_norm": 1.9353986978530884, "learning_rate": 1.996516758845827e-05, "loss": 0.4057, "step": 757 }, { "epoch": 0.0922421661089139, "grad_norm": 0.7365932464599609, "learning_rate": 1.9965004725142867e-05, "loss": 0.492, "step": 758 }, { "epoch": 0.09236385762093094, "grad_norm": 2.247995376586914, "learning_rate": 1.9964841482638406e-05, "loss": 0.4588, "step": 759 }, { "epoch": 0.09248554913294797, "grad_norm": 0.7459166049957275, "learning_rate": 1.9964677860951097e-05, "loss": 0.43, "step": 760 }, { "epoch": 0.09260724064496502, "grad_norm": 1.3299825191497803, "learning_rate": 1.9964513860087172e-05, "loss": 0.4365, "step": 761 }, { "epoch": 0.09272893215698205, "grad_norm": 1.968151569366455, "learning_rate": 1.9964349480052872e-05, "loss": 0.5189, "step": 762 }, { "epoch": 0.09285062366899909, "grad_norm": 1.3241199254989624, "learning_rate": 1.9964184720854444e-05, "loss": 0.4101, "step": 763 }, { "epoch": 0.09297231518101612, "grad_norm": 1.8080377578735352, "learning_rate": 1.9964019582498167e-05, "loss": 0.5639, "step": 764 }, { "epoch": 0.09309400669303317, "grad_norm": 0.9407429099082947, "learning_rate": 1.996385406499032e-05, "loss": 0.4957, "step": 765 }, { "epoch": 0.0932156982050502, "grad_norm": 2.313314914703369, "learning_rate": 1.99636881683372e-05, "loss": 0.4636, "step": 766 }, { "epoch": 0.09333738971706723, "grad_norm": 2.7045557498931885, "learning_rate": 1.9963521892545122e-05, "loss": 0.5549, "step": 767 }, { "epoch": 0.09345908122908426, "grad_norm": 2.3331668376922607, "learning_rate": 1.9963355237620412e-05, "loss": 0.4176, "step": 768 }, { "epoch": 0.09358077274110131, "grad_norm": 1.3269882202148438, "learning_rate": 1.9963188203569413e-05, "loss": 0.4579, "step": 769 }, { "epoch": 0.09370246425311834, "grad_norm": 2.986143112182617, "learning_rate": 1.996302079039848e-05, "loss": 0.5186, "step": 770 }, { "epoch": 0.09382415576513538, "grad_norm": 1.0925177335739136, "learning_rate": 1.9962852998113983e-05, "loss": 0.4653, "step": 771 }, { "epoch": 0.09394584727715243, "grad_norm": 0.6733190417289734, "learning_rate": 1.996268482672231e-05, "loss": 0.4671, "step": 772 }, { "epoch": 0.09406753878916946, "grad_norm": 4.965397834777832, "learning_rate": 1.9962516276229856e-05, "loss": 0.5807, "step": 773 }, { "epoch": 0.09418923030118649, "grad_norm": 1.6570329666137695, "learning_rate": 1.9962347346643035e-05, "loss": 0.4375, "step": 774 }, { "epoch": 0.09431092181320352, "grad_norm": 0.9413464069366455, "learning_rate": 1.9962178037968282e-05, "loss": 0.458, "step": 775 }, { "epoch": 0.09443261332522057, "grad_norm": 1.3105307817459106, "learning_rate": 1.9962008350212028e-05, "loss": 0.5169, "step": 776 }, { "epoch": 0.0945543048372376, "grad_norm": 1.5985509157180786, "learning_rate": 1.9961838283380737e-05, "loss": 0.4437, "step": 777 }, { "epoch": 0.09467599634925464, "grad_norm": 1.004827618598938, "learning_rate": 1.996166783748088e-05, "loss": 0.4798, "step": 778 }, { "epoch": 0.09479768786127167, "grad_norm": 2.7168939113616943, "learning_rate": 1.9961497012518944e-05, "loss": 0.4753, "step": 779 }, { "epoch": 0.09491937937328872, "grad_norm": 1.3701131343841553, "learning_rate": 1.9961325808501425e-05, "loss": 0.5003, "step": 780 }, { "epoch": 0.09504107088530575, "grad_norm": 2.065967559814453, "learning_rate": 1.9961154225434843e-05, "loss": 0.5236, "step": 781 }, { "epoch": 0.09516276239732278, "grad_norm": 0.8278501629829407, "learning_rate": 1.9960982263325723e-05, "loss": 0.507, "step": 782 }, { "epoch": 0.09528445390933982, "grad_norm": 1.4648507833480835, "learning_rate": 1.9960809922180607e-05, "loss": 0.4769, "step": 783 }, { "epoch": 0.09540614542135686, "grad_norm": 4.1555399894714355, "learning_rate": 1.996063720200606e-05, "loss": 0.4459, "step": 784 }, { "epoch": 0.0955278369333739, "grad_norm": 1.7836990356445312, "learning_rate": 1.996046410280865e-05, "loss": 0.5033, "step": 785 }, { "epoch": 0.09564952844539093, "grad_norm": 1.0879727602005005, "learning_rate": 1.996029062459496e-05, "loss": 0.4188, "step": 786 }, { "epoch": 0.09577121995740798, "grad_norm": 3.2963836193084717, "learning_rate": 1.99601167673716e-05, "loss": 0.5207, "step": 787 }, { "epoch": 0.09589291146942501, "grad_norm": 1.8777716159820557, "learning_rate": 1.9959942531145176e-05, "loss": 0.4218, "step": 788 }, { "epoch": 0.09601460298144204, "grad_norm": 2.411846160888672, "learning_rate": 1.9959767915922325e-05, "loss": 0.4521, "step": 789 }, { "epoch": 0.09613629449345908, "grad_norm": 4.264321804046631, "learning_rate": 1.995959292170969e-05, "loss": 0.5327, "step": 790 }, { "epoch": 0.09625798600547612, "grad_norm": 1.3433444499969482, "learning_rate": 1.9959417548513926e-05, "loss": 0.4393, "step": 791 }, { "epoch": 0.09637967751749316, "grad_norm": 0.7187265157699585, "learning_rate": 1.9959241796341714e-05, "loss": 0.4836, "step": 792 }, { "epoch": 0.09650136902951019, "grad_norm": 1.6666771173477173, "learning_rate": 1.995906566519973e-05, "loss": 0.4286, "step": 793 }, { "epoch": 0.09662306054152722, "grad_norm": 1.0978599786758423, "learning_rate": 1.9958889155094693e-05, "loss": 0.4652, "step": 794 }, { "epoch": 0.09674475205354427, "grad_norm": 2.4084653854370117, "learning_rate": 1.99587122660333e-05, "loss": 0.4832, "step": 795 }, { "epoch": 0.0968664435655613, "grad_norm": 1.3513977527618408, "learning_rate": 1.9958534998022298e-05, "loss": 0.5276, "step": 796 }, { "epoch": 0.09698813507757834, "grad_norm": 0.6939101219177246, "learning_rate": 1.9958357351068425e-05, "loss": 0.4665, "step": 797 }, { "epoch": 0.09710982658959537, "grad_norm": 2.0230472087860107, "learning_rate": 1.9958179325178443e-05, "loss": 0.4881, "step": 798 }, { "epoch": 0.09723151810161242, "grad_norm": 1.1054073572158813, "learning_rate": 1.9958000920359123e-05, "loss": 0.4991, "step": 799 }, { "epoch": 0.09735320961362945, "grad_norm": 2.6655113697052, "learning_rate": 1.9957822136617257e-05, "loss": 0.5364, "step": 800 }, { "epoch": 0.09747490112564648, "grad_norm": 1.5029679536819458, "learning_rate": 1.9957642973959647e-05, "loss": 0.4314, "step": 801 }, { "epoch": 0.09759659263766353, "grad_norm": 0.5799332857131958, "learning_rate": 1.9957463432393113e-05, "loss": 0.4507, "step": 802 }, { "epoch": 0.09771828414968056, "grad_norm": 3.1802175045013428, "learning_rate": 1.9957283511924483e-05, "loss": 0.5046, "step": 803 }, { "epoch": 0.0978399756616976, "grad_norm": 1.7593673467636108, "learning_rate": 1.9957103212560605e-05, "loss": 0.4751, "step": 804 }, { "epoch": 0.09796166717371463, "grad_norm": 1.0430735349655151, "learning_rate": 1.9956922534308338e-05, "loss": 0.518, "step": 805 }, { "epoch": 0.09808335868573168, "grad_norm": 3.0028467178344727, "learning_rate": 1.995674147717456e-05, "loss": 0.4686, "step": 806 }, { "epoch": 0.09820505019774871, "grad_norm": 0.6665245294570923, "learning_rate": 1.995656004116616e-05, "loss": 0.5079, "step": 807 }, { "epoch": 0.09832674170976574, "grad_norm": 3.278702974319458, "learning_rate": 1.995637822629004e-05, "loss": 0.4364, "step": 808 }, { "epoch": 0.09844843322178277, "grad_norm": 2.833733081817627, "learning_rate": 1.9956196032553122e-05, "loss": 0.4419, "step": 809 }, { "epoch": 0.09857012473379982, "grad_norm": 0.650507390499115, "learning_rate": 1.995601345996234e-05, "loss": 0.4976, "step": 810 }, { "epoch": 0.09869181624581685, "grad_norm": 1.0216940641403198, "learning_rate": 1.995583050852463e-05, "loss": 0.4909, "step": 811 }, { "epoch": 0.09881350775783389, "grad_norm": 1.4588136672973633, "learning_rate": 1.9955647178246965e-05, "loss": 0.4329, "step": 812 }, { "epoch": 0.09893519926985093, "grad_norm": 1.6294156312942505, "learning_rate": 1.995546346913632e-05, "loss": 0.4915, "step": 813 }, { "epoch": 0.09905689078186797, "grad_norm": 2.5995635986328125, "learning_rate": 1.995527938119968e-05, "loss": 0.4922, "step": 814 }, { "epoch": 0.099178582293885, "grad_norm": 2.078435182571411, "learning_rate": 1.9955094914444056e-05, "loss": 0.432, "step": 815 }, { "epoch": 0.09930027380590203, "grad_norm": 2.96297550201416, "learning_rate": 1.9954910068876465e-05, "loss": 0.5354, "step": 816 }, { "epoch": 0.09942196531791908, "grad_norm": 2.1711313724517822, "learning_rate": 1.995472484450394e-05, "loss": 0.435, "step": 817 }, { "epoch": 0.09954365682993611, "grad_norm": 3.4678258895874023, "learning_rate": 1.9954539241333527e-05, "loss": 0.3969, "step": 818 }, { "epoch": 0.09966534834195315, "grad_norm": 0.750209391117096, "learning_rate": 1.9954353259372295e-05, "loss": 0.4628, "step": 819 }, { "epoch": 0.09978703985397018, "grad_norm": 2.4872379302978516, "learning_rate": 1.9954166898627313e-05, "loss": 0.505, "step": 820 }, { "epoch": 0.09990873136598723, "grad_norm": 0.9021669626235962, "learning_rate": 1.995398015910568e-05, "loss": 0.487, "step": 821 }, { "epoch": 0.10003042287800426, "grad_norm": 1.3522422313690186, "learning_rate": 1.9953793040814492e-05, "loss": 0.4986, "step": 822 }, { "epoch": 0.1001521143900213, "grad_norm": 2.327824592590332, "learning_rate": 1.995360554376088e-05, "loss": 0.5162, "step": 823 }, { "epoch": 0.10027380590203833, "grad_norm": 2.900378942489624, "learning_rate": 1.9953417667951975e-05, "loss": 0.4366, "step": 824 }, { "epoch": 0.10039549741405537, "grad_norm": 1.3112826347351074, "learning_rate": 1.9953229413394925e-05, "loss": 0.4836, "step": 825 }, { "epoch": 0.1005171889260724, "grad_norm": 1.839365005493164, "learning_rate": 1.9953040780096897e-05, "loss": 0.4911, "step": 826 }, { "epoch": 0.10063888043808944, "grad_norm": 2.9116950035095215, "learning_rate": 1.995285176806506e-05, "loss": 0.4629, "step": 827 }, { "epoch": 0.10076057195010649, "grad_norm": 0.6931069493293762, "learning_rate": 1.9952662377306614e-05, "loss": 0.4987, "step": 828 }, { "epoch": 0.10088226346212352, "grad_norm": 1.930259108543396, "learning_rate": 1.9952472607828762e-05, "loss": 0.4959, "step": 829 }, { "epoch": 0.10100395497414055, "grad_norm": 2.0763046741485596, "learning_rate": 1.995228245963873e-05, "loss": 0.4727, "step": 830 }, { "epoch": 0.10112564648615759, "grad_norm": 0.8251340389251709, "learning_rate": 1.9952091932743747e-05, "loss": 0.4737, "step": 831 }, { "epoch": 0.10124733799817463, "grad_norm": 1.5569857358932495, "learning_rate": 1.995190102715107e-05, "loss": 0.4774, "step": 832 }, { "epoch": 0.10136902951019167, "grad_norm": 0.6953150033950806, "learning_rate": 1.9951709742867958e-05, "loss": 0.474, "step": 833 }, { "epoch": 0.1014907210222087, "grad_norm": 2.0638668537139893, "learning_rate": 1.995151807990169e-05, "loss": 0.4485, "step": 834 }, { "epoch": 0.10161241253422573, "grad_norm": 0.864733874797821, "learning_rate": 1.995132603825956e-05, "loss": 0.5076, "step": 835 }, { "epoch": 0.10173410404624278, "grad_norm": 1.438027024269104, "learning_rate": 1.9951133617948878e-05, "loss": 0.4838, "step": 836 }, { "epoch": 0.10185579555825981, "grad_norm": 1.6404670476913452, "learning_rate": 1.9950940818976968e-05, "loss": 0.4596, "step": 837 }, { "epoch": 0.10197748707027685, "grad_norm": 1.4606159925460815, "learning_rate": 1.9950747641351156e-05, "loss": 0.4691, "step": 838 }, { "epoch": 0.10209917858229388, "grad_norm": 2.0456371307373047, "learning_rate": 1.99505540850788e-05, "loss": 0.4973, "step": 839 }, { "epoch": 0.10222087009431093, "grad_norm": 1.6921428442001343, "learning_rate": 1.9950360150167268e-05, "loss": 0.4706, "step": 840 }, { "epoch": 0.10234256160632796, "grad_norm": 1.1583380699157715, "learning_rate": 1.9950165836623934e-05, "loss": 0.4314, "step": 841 }, { "epoch": 0.10246425311834499, "grad_norm": 0.7111881375312805, "learning_rate": 1.9949971144456195e-05, "loss": 0.4348, "step": 842 }, { "epoch": 0.10258594463036204, "grad_norm": 1.1225740909576416, "learning_rate": 1.9949776073671457e-05, "loss": 0.4831, "step": 843 }, { "epoch": 0.10270763614237907, "grad_norm": 0.5911033749580383, "learning_rate": 1.9949580624277148e-05, "loss": 0.4614, "step": 844 }, { "epoch": 0.1028293276543961, "grad_norm": 4.8914642333984375, "learning_rate": 1.9949384796280697e-05, "loss": 0.5864, "step": 845 }, { "epoch": 0.10295101916641314, "grad_norm": 0.8192464113235474, "learning_rate": 1.9949188589689565e-05, "loss": 0.4838, "step": 846 }, { "epoch": 0.10307271067843018, "grad_norm": 0.6399006843566895, "learning_rate": 1.994899200451121e-05, "loss": 0.4895, "step": 847 }, { "epoch": 0.10319440219044722, "grad_norm": 2.1333060264587402, "learning_rate": 1.994879504075312e-05, "loss": 0.4945, "step": 848 }, { "epoch": 0.10331609370246425, "grad_norm": 2.7365994453430176, "learning_rate": 1.994859769842278e-05, "loss": 0.4895, "step": 849 }, { "epoch": 0.10343778521448128, "grad_norm": 5.075249671936035, "learning_rate": 1.994839997752771e-05, "loss": 0.4565, "step": 850 }, { "epoch": 0.10355947672649833, "grad_norm": 2.683525562286377, "learning_rate": 1.9948201878075427e-05, "loss": 0.4617, "step": 851 }, { "epoch": 0.10368116823851536, "grad_norm": 1.4198042154312134, "learning_rate": 1.994800340007347e-05, "loss": 0.427, "step": 852 }, { "epoch": 0.1038028597505324, "grad_norm": 1.2674226760864258, "learning_rate": 1.9947804543529394e-05, "loss": 0.4575, "step": 853 }, { "epoch": 0.10392455126254943, "grad_norm": 3.470630168914795, "learning_rate": 1.9947605308450763e-05, "loss": 0.5183, "step": 854 }, { "epoch": 0.10404624277456648, "grad_norm": 6.524876594543457, "learning_rate": 1.994740569484516e-05, "loss": 0.6254, "step": 855 }, { "epoch": 0.10416793428658351, "grad_norm": 6.869317054748535, "learning_rate": 1.994720570272018e-05, "loss": 0.6211, "step": 856 }, { "epoch": 0.10428962579860054, "grad_norm": 1.907633900642395, "learning_rate": 1.9947005332083435e-05, "loss": 0.4342, "step": 857 }, { "epoch": 0.10441131731061759, "grad_norm": 4.02207088470459, "learning_rate": 1.9946804582942546e-05, "loss": 0.5247, "step": 858 }, { "epoch": 0.10453300882263462, "grad_norm": 0.6411753296852112, "learning_rate": 1.9946603455305157e-05, "loss": 0.4765, "step": 859 }, { "epoch": 0.10465470033465166, "grad_norm": 2.5639162063598633, "learning_rate": 1.9946401949178916e-05, "loss": 0.4834, "step": 860 }, { "epoch": 0.10477639184666869, "grad_norm": 3.0237371921539307, "learning_rate": 1.994620006457149e-05, "loss": 0.4756, "step": 861 }, { "epoch": 0.10489808335868574, "grad_norm": 5.799304485321045, "learning_rate": 1.9945997801490568e-05, "loss": 0.4627, "step": 862 }, { "epoch": 0.10501977487070277, "grad_norm": 3.4371719360351562, "learning_rate": 1.994579515994384e-05, "loss": 0.528, "step": 863 }, { "epoch": 0.1051414663827198, "grad_norm": 1.3571069240570068, "learning_rate": 1.9945592139939024e-05, "loss": 0.4961, "step": 864 }, { "epoch": 0.10526315789473684, "grad_norm": 1.2791073322296143, "learning_rate": 1.9945388741483837e-05, "loss": 0.4875, "step": 865 }, { "epoch": 0.10538484940675388, "grad_norm": 2.836144208908081, "learning_rate": 1.9945184964586022e-05, "loss": 0.3998, "step": 866 }, { "epoch": 0.10550654091877092, "grad_norm": 3.1967105865478516, "learning_rate": 1.9944980809253338e-05, "loss": 0.4914, "step": 867 }, { "epoch": 0.10562823243078795, "grad_norm": 5.369935989379883, "learning_rate": 1.9944776275493546e-05, "loss": 0.5375, "step": 868 }, { "epoch": 0.10574992394280498, "grad_norm": 0.90535569190979, "learning_rate": 1.994457136331443e-05, "loss": 0.3825, "step": 869 }, { "epoch": 0.10587161545482203, "grad_norm": 5.089012622833252, "learning_rate": 1.9944366072723794e-05, "loss": 0.5606, "step": 870 }, { "epoch": 0.10599330696683906, "grad_norm": 3.612671136856079, "learning_rate": 1.9944160403729444e-05, "loss": 0.5026, "step": 871 }, { "epoch": 0.1061149984788561, "grad_norm": 1.5098363161087036, "learning_rate": 1.9943954356339207e-05, "loss": 0.3872, "step": 872 }, { "epoch": 0.10623668999087314, "grad_norm": 3.0710408687591553, "learning_rate": 1.994374793056092e-05, "loss": 0.5371, "step": 873 }, { "epoch": 0.10635838150289018, "grad_norm": 0.7500321269035339, "learning_rate": 1.9943541126402447e-05, "loss": 0.4474, "step": 874 }, { "epoch": 0.10648007301490721, "grad_norm": 0.9233241677284241, "learning_rate": 1.994333394387165e-05, "loss": 0.4565, "step": 875 }, { "epoch": 0.10660176452692424, "grad_norm": 3.2471554279327393, "learning_rate": 1.9943126382976413e-05, "loss": 0.583, "step": 876 }, { "epoch": 0.10672345603894129, "grad_norm": 1.9059737920761108, "learning_rate": 1.994291844372464e-05, "loss": 0.5059, "step": 877 }, { "epoch": 0.10684514755095832, "grad_norm": 4.65517520904541, "learning_rate": 1.9942710126124238e-05, "loss": 0.4765, "step": 878 }, { "epoch": 0.10696683906297536, "grad_norm": 4.789467811584473, "learning_rate": 1.9942501430183135e-05, "loss": 0.4697, "step": 879 }, { "epoch": 0.10708853057499239, "grad_norm": 3.8766236305236816, "learning_rate": 1.994229235590927e-05, "loss": 0.47, "step": 880 }, { "epoch": 0.10721022208700944, "grad_norm": 2.6545610427856445, "learning_rate": 1.9942082903310607e-05, "loss": 0.5592, "step": 881 }, { "epoch": 0.10733191359902647, "grad_norm": 0.9934804439544678, "learning_rate": 1.994187307239511e-05, "loss": 0.4918, "step": 882 }, { "epoch": 0.1074536051110435, "grad_norm": 0.7720689177513123, "learning_rate": 1.994166286317076e-05, "loss": 0.4565, "step": 883 }, { "epoch": 0.10757529662306055, "grad_norm": 0.6046221852302551, "learning_rate": 1.9941452275645562e-05, "loss": 0.4436, "step": 884 }, { "epoch": 0.10769698813507758, "grad_norm": 2.1446096897125244, "learning_rate": 1.9941241309827525e-05, "loss": 0.5119, "step": 885 }, { "epoch": 0.10781867964709461, "grad_norm": 1.3840595483779907, "learning_rate": 1.9941029965724683e-05, "loss": 0.4771, "step": 886 }, { "epoch": 0.10794037115911165, "grad_norm": 0.8104763031005859, "learning_rate": 1.994081824334507e-05, "loss": 0.4435, "step": 887 }, { "epoch": 0.1080620626711287, "grad_norm": 0.6394281387329102, "learning_rate": 1.994060614269675e-05, "loss": 0.4484, "step": 888 }, { "epoch": 0.10818375418314573, "grad_norm": 0.690391480922699, "learning_rate": 1.9940393663787788e-05, "loss": 0.4747, "step": 889 }, { "epoch": 0.10830544569516276, "grad_norm": 1.1946213245391846, "learning_rate": 1.9940180806626275e-05, "loss": 0.4622, "step": 890 }, { "epoch": 0.1084271372071798, "grad_norm": 1.191467046737671, "learning_rate": 1.9939967571220306e-05, "loss": 0.467, "step": 891 }, { "epoch": 0.10854882871919684, "grad_norm": 1.7758926153182983, "learning_rate": 1.9939753957577994e-05, "loss": 0.4391, "step": 892 }, { "epoch": 0.10867052023121387, "grad_norm": 3.4437081813812256, "learning_rate": 1.993953996570747e-05, "loss": 0.5467, "step": 893 }, { "epoch": 0.10879221174323091, "grad_norm": 0.9177939891815186, "learning_rate": 1.9939325595616878e-05, "loss": 0.4543, "step": 894 }, { "epoch": 0.10891390325524794, "grad_norm": 1.0351811647415161, "learning_rate": 1.9939110847314373e-05, "loss": 0.5189, "step": 895 }, { "epoch": 0.10903559476726499, "grad_norm": 3.4975383281707764, "learning_rate": 1.9938895720808127e-05, "loss": 0.5733, "step": 896 }, { "epoch": 0.10915728627928202, "grad_norm": 1.1447761058807373, "learning_rate": 1.9938680216106326e-05, "loss": 0.4417, "step": 897 }, { "epoch": 0.10927897779129905, "grad_norm": 0.6356577277183533, "learning_rate": 1.9938464333217175e-05, "loss": 0.4615, "step": 898 }, { "epoch": 0.1094006693033161, "grad_norm": 4.211939334869385, "learning_rate": 1.993824807214888e-05, "loss": 0.5406, "step": 899 }, { "epoch": 0.10952236081533313, "grad_norm": 0.8971146941184998, "learning_rate": 1.9938031432909675e-05, "loss": 0.4292, "step": 900 }, { "epoch": 0.10964405232735017, "grad_norm": 2.098039150238037, "learning_rate": 1.9937814415507807e-05, "loss": 0.5111, "step": 901 }, { "epoch": 0.1097657438393672, "grad_norm": 0.7762995958328247, "learning_rate": 1.9937597019951528e-05, "loss": 0.5062, "step": 902 }, { "epoch": 0.10988743535138425, "grad_norm": 0.6407161355018616, "learning_rate": 1.9937379246249113e-05, "loss": 0.5024, "step": 903 }, { "epoch": 0.11000912686340128, "grad_norm": 0.8079186081886292, "learning_rate": 1.9937161094408845e-05, "loss": 0.5342, "step": 904 }, { "epoch": 0.11013081837541831, "grad_norm": 5.116917133331299, "learning_rate": 1.9936942564439033e-05, "loss": 0.4633, "step": 905 }, { "epoch": 0.11025250988743535, "grad_norm": 2.475346326828003, "learning_rate": 1.9936723656347987e-05, "loss": 0.4502, "step": 906 }, { "epoch": 0.11037420139945239, "grad_norm": 1.9243186712265015, "learning_rate": 1.9936504370144035e-05, "loss": 0.4578, "step": 907 }, { "epoch": 0.11049589291146943, "grad_norm": 1.1755101680755615, "learning_rate": 1.993628470583553e-05, "loss": 0.5217, "step": 908 }, { "epoch": 0.11061758442348646, "grad_norm": 1.224277138710022, "learning_rate": 1.9936064663430823e-05, "loss": 0.4773, "step": 909 }, { "epoch": 0.11073927593550349, "grad_norm": 1.4294017553329468, "learning_rate": 1.9935844242938285e-05, "loss": 0.4639, "step": 910 }, { "epoch": 0.11086096744752054, "grad_norm": 2.0240399837493896, "learning_rate": 1.993562344436631e-05, "loss": 0.5299, "step": 911 }, { "epoch": 0.11098265895953757, "grad_norm": 0.9139840602874756, "learning_rate": 1.99354022677233e-05, "loss": 0.4666, "step": 912 }, { "epoch": 0.1111043504715546, "grad_norm": 3.7568726539611816, "learning_rate": 1.9935180713017668e-05, "loss": 0.5334, "step": 913 }, { "epoch": 0.11122604198357165, "grad_norm": 0.7634105682373047, "learning_rate": 1.9934958780257843e-05, "loss": 0.4715, "step": 914 }, { "epoch": 0.11134773349558869, "grad_norm": 2.5053353309631348, "learning_rate": 1.9934736469452272e-05, "loss": 0.4529, "step": 915 }, { "epoch": 0.11146942500760572, "grad_norm": 0.9232064485549927, "learning_rate": 1.9934513780609416e-05, "loss": 0.5065, "step": 916 }, { "epoch": 0.11159111651962275, "grad_norm": 1.6837064027786255, "learning_rate": 1.9934290713737747e-05, "loss": 0.4873, "step": 917 }, { "epoch": 0.1117128080316398, "grad_norm": 4.035303592681885, "learning_rate": 1.9934067268845753e-05, "loss": 0.4216, "step": 918 }, { "epoch": 0.11183449954365683, "grad_norm": 0.6953611373901367, "learning_rate": 1.9933843445941935e-05, "loss": 0.5105, "step": 919 }, { "epoch": 0.11195619105567386, "grad_norm": 0.8081039190292358, "learning_rate": 1.9933619245034818e-05, "loss": 0.4463, "step": 920 }, { "epoch": 0.1120778825676909, "grad_norm": 3.260673761367798, "learning_rate": 1.9933394666132922e-05, "loss": 0.5449, "step": 921 }, { "epoch": 0.11219957407970794, "grad_norm": 0.915120542049408, "learning_rate": 1.99331697092448e-05, "loss": 0.4654, "step": 922 }, { "epoch": 0.11232126559172498, "grad_norm": 1.1050307750701904, "learning_rate": 1.993294437437901e-05, "loss": 0.4771, "step": 923 }, { "epoch": 0.11244295710374201, "grad_norm": 1.436227798461914, "learning_rate": 1.9932718661544125e-05, "loss": 0.454, "step": 924 }, { "epoch": 0.11256464861575904, "grad_norm": 0.7246893048286438, "learning_rate": 1.9932492570748737e-05, "loss": 0.5215, "step": 925 }, { "epoch": 0.11268634012777609, "grad_norm": 2.818067789077759, "learning_rate": 1.9932266102001445e-05, "loss": 0.4894, "step": 926 }, { "epoch": 0.11280803163979312, "grad_norm": 1.345335841178894, "learning_rate": 1.9932039255310873e-05, "loss": 0.5307, "step": 927 }, { "epoch": 0.11292972315181016, "grad_norm": 0.6055800914764404, "learning_rate": 1.9931812030685646e-05, "loss": 0.4788, "step": 928 }, { "epoch": 0.1130514146638272, "grad_norm": 0.9500460028648376, "learning_rate": 1.9931584428134413e-05, "loss": 0.4571, "step": 929 }, { "epoch": 0.11317310617584424, "grad_norm": 1.5998355150222778, "learning_rate": 1.993135644766584e-05, "loss": 0.4345, "step": 930 }, { "epoch": 0.11329479768786127, "grad_norm": 3.776104211807251, "learning_rate": 1.9931128089288592e-05, "loss": 0.5406, "step": 931 }, { "epoch": 0.1134164891998783, "grad_norm": 0.6229236721992493, "learning_rate": 1.9930899353011365e-05, "loss": 0.4151, "step": 932 }, { "epoch": 0.11353818071189535, "grad_norm": 5.357823848724365, "learning_rate": 1.9930670238842864e-05, "loss": 0.5458, "step": 933 }, { "epoch": 0.11365987222391238, "grad_norm": 2.321004629135132, "learning_rate": 1.9930440746791806e-05, "loss": 0.4422, "step": 934 }, { "epoch": 0.11378156373592942, "grad_norm": 1.4499346017837524, "learning_rate": 1.993021087686692e-05, "loss": 0.4569, "step": 935 }, { "epoch": 0.11390325524794645, "grad_norm": 0.6692584156990051, "learning_rate": 1.9929980629076956e-05, "loss": 0.468, "step": 936 }, { "epoch": 0.1140249467599635, "grad_norm": 0.6575024127960205, "learning_rate": 1.9929750003430673e-05, "loss": 0.4698, "step": 937 }, { "epoch": 0.11414663827198053, "grad_norm": 1.0745880603790283, "learning_rate": 1.9929518999936854e-05, "loss": 0.4704, "step": 938 }, { "epoch": 0.11426832978399756, "grad_norm": 1.6050153970718384, "learning_rate": 1.9929287618604282e-05, "loss": 0.4374, "step": 939 }, { "epoch": 0.1143900212960146, "grad_norm": 1.3916279077529907, "learning_rate": 1.9929055859441765e-05, "loss": 0.5018, "step": 940 }, { "epoch": 0.11451171280803164, "grad_norm": 1.0368601083755493, "learning_rate": 1.9928823722458115e-05, "loss": 0.4426, "step": 941 }, { "epoch": 0.11463340432004868, "grad_norm": 0.6375675797462463, "learning_rate": 1.9928591207662175e-05, "loss": 0.4695, "step": 942 }, { "epoch": 0.11475509583206571, "grad_norm": 4.086197853088379, "learning_rate": 1.992835831506279e-05, "loss": 0.5444, "step": 943 }, { "epoch": 0.11487678734408276, "grad_norm": 0.8371261358261108, "learning_rate": 1.9928125044668816e-05, "loss": 0.4071, "step": 944 }, { "epoch": 0.11499847885609979, "grad_norm": 3.4294397830963135, "learning_rate": 1.9927891396489138e-05, "loss": 0.4922, "step": 945 }, { "epoch": 0.11512017036811682, "grad_norm": 4.158844470977783, "learning_rate": 1.9927657370532643e-05, "loss": 0.5664, "step": 946 }, { "epoch": 0.11524186188013386, "grad_norm": 1.3181960582733154, "learning_rate": 1.9927422966808233e-05, "loss": 0.4846, "step": 947 }, { "epoch": 0.1153635533921509, "grad_norm": 3.0161585807800293, "learning_rate": 1.992718818532483e-05, "loss": 0.443, "step": 948 }, { "epoch": 0.11548524490416794, "grad_norm": 4.553255081176758, "learning_rate": 1.992695302609137e-05, "loss": 0.4652, "step": 949 }, { "epoch": 0.11560693641618497, "grad_norm": 3.0985097885131836, "learning_rate": 1.99267174891168e-05, "loss": 0.4747, "step": 950 }, { "epoch": 0.115728627928202, "grad_norm": 0.7888035774230957, "learning_rate": 1.9926481574410085e-05, "loss": 0.4975, "step": 951 }, { "epoch": 0.11585031944021905, "grad_norm": 1.0028328895568848, "learning_rate": 1.9926245281980194e-05, "loss": 0.4851, "step": 952 }, { "epoch": 0.11597201095223608, "grad_norm": 0.7690591812133789, "learning_rate": 1.9926008611836127e-05, "loss": 0.5013, "step": 953 }, { "epoch": 0.11609370246425312, "grad_norm": 4.568863868713379, "learning_rate": 1.9925771563986885e-05, "loss": 0.4469, "step": 954 }, { "epoch": 0.11621539397627015, "grad_norm": 1.4368757009506226, "learning_rate": 1.992553413844149e-05, "loss": 0.4179, "step": 955 }, { "epoch": 0.1163370854882872, "grad_norm": 0.8411977291107178, "learning_rate": 1.992529633520898e-05, "loss": 0.4744, "step": 956 }, { "epoch": 0.11645877700030423, "grad_norm": 3.5928070545196533, "learning_rate": 1.9925058154298397e-05, "loss": 0.5184, "step": 957 }, { "epoch": 0.11658046851232126, "grad_norm": 1.5290552377700806, "learning_rate": 1.9924819595718805e-05, "loss": 0.4537, "step": 958 }, { "epoch": 0.11670216002433831, "grad_norm": 0.76931232213974, "learning_rate": 1.9924580659479288e-05, "loss": 0.4143, "step": 959 }, { "epoch": 0.11682385153635534, "grad_norm": 2.371206760406494, "learning_rate": 1.9924341345588935e-05, "loss": 0.4811, "step": 960 }, { "epoch": 0.11694554304837237, "grad_norm": 1.6431336402893066, "learning_rate": 1.992410165405685e-05, "loss": 0.4544, "step": 961 }, { "epoch": 0.11706723456038941, "grad_norm": 1.5598409175872803, "learning_rate": 1.9923861584892155e-05, "loss": 0.458, "step": 962 }, { "epoch": 0.11718892607240645, "grad_norm": 0.637040913105011, "learning_rate": 1.9923621138103983e-05, "loss": 0.4577, "step": 963 }, { "epoch": 0.11731061758442349, "grad_norm": 2.3656256198883057, "learning_rate": 1.992338031370149e-05, "loss": 0.4685, "step": 964 }, { "epoch": 0.11743230909644052, "grad_norm": 0.9912247061729431, "learning_rate": 1.9923139111693833e-05, "loss": 0.4945, "step": 965 }, { "epoch": 0.11755400060845755, "grad_norm": 1.3829625844955444, "learning_rate": 1.9922897532090194e-05, "loss": 0.4901, "step": 966 }, { "epoch": 0.1176756921204746, "grad_norm": 2.4134111404418945, "learning_rate": 1.9922655574899762e-05, "loss": 0.4622, "step": 967 }, { "epoch": 0.11779738363249163, "grad_norm": 1.6261649131774902, "learning_rate": 1.992241324013175e-05, "loss": 0.4443, "step": 968 }, { "epoch": 0.11791907514450867, "grad_norm": 0.6375196576118469, "learning_rate": 1.9922170527795376e-05, "loss": 0.4885, "step": 969 }, { "epoch": 0.11804076665652571, "grad_norm": 3.2338030338287354, "learning_rate": 1.9921927437899876e-05, "loss": 0.5274, "step": 970 }, { "epoch": 0.11816245816854275, "grad_norm": 0.527633786201477, "learning_rate": 1.99216839704545e-05, "loss": 0.4384, "step": 971 }, { "epoch": 0.11828414968055978, "grad_norm": 2.055549383163452, "learning_rate": 1.992144012546851e-05, "loss": 0.4834, "step": 972 }, { "epoch": 0.11840584119257681, "grad_norm": 2.95906925201416, "learning_rate": 1.9921195902951187e-05, "loss": 0.4957, "step": 973 }, { "epoch": 0.11852753270459386, "grad_norm": 0.7035129070281982, "learning_rate": 1.9920951302911823e-05, "loss": 0.4753, "step": 974 }, { "epoch": 0.1186492242166109, "grad_norm": 0.6045013070106506, "learning_rate": 1.992070632535973e-05, "loss": 0.4678, "step": 975 }, { "epoch": 0.11877091572862793, "grad_norm": 1.5679330825805664, "learning_rate": 1.9920460970304224e-05, "loss": 0.4778, "step": 976 }, { "epoch": 0.11889260724064496, "grad_norm": 1.5576215982437134, "learning_rate": 1.9920215237754647e-05, "loss": 0.4611, "step": 977 }, { "epoch": 0.119014298752662, "grad_norm": 1.4160807132720947, "learning_rate": 1.9919969127720345e-05, "loss": 0.471, "step": 978 }, { "epoch": 0.11913599026467904, "grad_norm": 0.7447370290756226, "learning_rate": 1.9919722640210685e-05, "loss": 0.4402, "step": 979 }, { "epoch": 0.11925768177669607, "grad_norm": 1.8573156595230103, "learning_rate": 1.9919475775235043e-05, "loss": 0.5008, "step": 980 }, { "epoch": 0.1193793732887131, "grad_norm": 4.865150451660156, "learning_rate": 1.9919228532802817e-05, "loss": 0.5459, "step": 981 }, { "epoch": 0.11950106480073015, "grad_norm": 2.2894115447998047, "learning_rate": 1.9918980912923412e-05, "loss": 0.4876, "step": 982 }, { "epoch": 0.11962275631274719, "grad_norm": 0.7747580409049988, "learning_rate": 1.9918732915606255e-05, "loss": 0.4849, "step": 983 }, { "epoch": 0.11974444782476422, "grad_norm": 0.8401018977165222, "learning_rate": 1.991848454086078e-05, "loss": 0.5099, "step": 984 }, { "epoch": 0.11986613933678127, "grad_norm": 1.1207845211029053, "learning_rate": 1.9918235788696437e-05, "loss": 0.5499, "step": 985 }, { "epoch": 0.1199878308487983, "grad_norm": 1.5522438287734985, "learning_rate": 1.9917986659122692e-05, "loss": 0.5234, "step": 986 }, { "epoch": 0.12010952236081533, "grad_norm": 4.20838737487793, "learning_rate": 1.9917737152149027e-05, "loss": 0.4454, "step": 987 }, { "epoch": 0.12023121387283237, "grad_norm": 1.867698073387146, "learning_rate": 1.9917487267784934e-05, "loss": 0.4816, "step": 988 }, { "epoch": 0.12035290538484941, "grad_norm": 4.351875305175781, "learning_rate": 1.9917237006039922e-05, "loss": 0.4132, "step": 989 }, { "epoch": 0.12047459689686645, "grad_norm": 2.0159127712249756, "learning_rate": 1.9916986366923517e-05, "loss": 0.5015, "step": 990 }, { "epoch": 0.12059628840888348, "grad_norm": 2.2653989791870117, "learning_rate": 1.991673535044525e-05, "loss": 0.497, "step": 991 }, { "epoch": 0.12071797992090051, "grad_norm": 1.2745273113250732, "learning_rate": 1.991648395661468e-05, "loss": 0.4094, "step": 992 }, { "epoch": 0.12083967143291756, "grad_norm": 2.639756441116333, "learning_rate": 1.9916232185441365e-05, "loss": 0.4572, "step": 993 }, { "epoch": 0.12096136294493459, "grad_norm": 5.562560081481934, "learning_rate": 1.9915980036934894e-05, "loss": 0.613, "step": 994 }, { "epoch": 0.12108305445695162, "grad_norm": 0.9916262626647949, "learning_rate": 1.9915727511104854e-05, "loss": 0.5221, "step": 995 }, { "epoch": 0.12120474596896866, "grad_norm": 1.339257836341858, "learning_rate": 1.991547460796086e-05, "loss": 0.5748, "step": 996 }, { "epoch": 0.1213264374809857, "grad_norm": 5.516851425170898, "learning_rate": 1.9915221327512532e-05, "loss": 0.5339, "step": 997 }, { "epoch": 0.12144812899300274, "grad_norm": 4.493785381317139, "learning_rate": 1.9914967669769512e-05, "loss": 0.5209, "step": 998 }, { "epoch": 0.12156982050501977, "grad_norm": 7.7040019035339355, "learning_rate": 1.991471363474145e-05, "loss": 0.5222, "step": 999 }, { "epoch": 0.12169151201703682, "grad_norm": 6.866464138031006, "learning_rate": 1.9914459222438006e-05, "loss": 0.5507, "step": 1000 }, { "epoch": 0.12181320352905385, "grad_norm": 6.365875720977783, "learning_rate": 1.991420443286887e-05, "loss": 0.5163, "step": 1001 }, { "epoch": 0.12193489504107088, "grad_norm": 4.724626541137695, "learning_rate": 1.9913949266043735e-05, "loss": 0.5124, "step": 1002 }, { "epoch": 0.12205658655308792, "grad_norm": 4.0745344161987305, "learning_rate": 1.9913693721972307e-05, "loss": 0.5122, "step": 1003 }, { "epoch": 0.12217827806510496, "grad_norm": 1.1672906875610352, "learning_rate": 1.9913437800664313e-05, "loss": 0.4765, "step": 1004 }, { "epoch": 0.122299969577122, "grad_norm": 2.6373729705810547, "learning_rate": 1.9913181502129495e-05, "loss": 0.4792, "step": 1005 }, { "epoch": 0.12242166108913903, "grad_norm": 3.2573599815368652, "learning_rate": 1.9912924826377598e-05, "loss": 0.4979, "step": 1006 }, { "epoch": 0.12254335260115606, "grad_norm": 5.804367542266846, "learning_rate": 1.9912667773418394e-05, "loss": 0.5343, "step": 1007 }, { "epoch": 0.12266504411317311, "grad_norm": 6.964966297149658, "learning_rate": 1.9912410343261664e-05, "loss": 0.5789, "step": 1008 }, { "epoch": 0.12278673562519014, "grad_norm": 4.244586944580078, "learning_rate": 1.99121525359172e-05, "loss": 0.496, "step": 1009 }, { "epoch": 0.12290842713720718, "grad_norm": 1.811240315437317, "learning_rate": 1.991189435139482e-05, "loss": 0.455, "step": 1010 }, { "epoch": 0.12303011864922421, "grad_norm": 1.173520803451538, "learning_rate": 1.9911635789704338e-05, "loss": 0.4285, "step": 1011 }, { "epoch": 0.12315181016124126, "grad_norm": 5.163769721984863, "learning_rate": 1.99113768508556e-05, "loss": 0.5986, "step": 1012 }, { "epoch": 0.12327350167325829, "grad_norm": 2.043832540512085, "learning_rate": 1.991111753485846e-05, "loss": 0.4694, "step": 1013 }, { "epoch": 0.12339519318527532, "grad_norm": 3.5095207691192627, "learning_rate": 1.991085784172278e-05, "loss": 0.4978, "step": 1014 }, { "epoch": 0.12351688469729237, "grad_norm": 4.421924591064453, "learning_rate": 1.9910597771458446e-05, "loss": 0.4866, "step": 1015 }, { "epoch": 0.1236385762093094, "grad_norm": 5.892740726470947, "learning_rate": 1.991033732407535e-05, "loss": 0.4918, "step": 1016 }, { "epoch": 0.12376026772132644, "grad_norm": 6.371843338012695, "learning_rate": 1.991007649958341e-05, "loss": 0.4837, "step": 1017 }, { "epoch": 0.12388195923334347, "grad_norm": 1.5353717803955078, "learning_rate": 1.990981529799254e-05, "loss": 0.4839, "step": 1018 }, { "epoch": 0.12400365074536052, "grad_norm": 2.3877511024475098, "learning_rate": 1.9909553719312693e-05, "loss": 0.52, "step": 1019 }, { "epoch": 0.12412534225737755, "grad_norm": 1.7043259143829346, "learning_rate": 1.9909291763553813e-05, "loss": 0.4516, "step": 1020 }, { "epoch": 0.12424703376939458, "grad_norm": 1.2159875631332397, "learning_rate": 1.990902943072587e-05, "loss": 0.4664, "step": 1021 }, { "epoch": 0.12436872528141162, "grad_norm": 1.8231126070022583, "learning_rate": 1.9908766720838847e-05, "loss": 0.4709, "step": 1022 }, { "epoch": 0.12449041679342866, "grad_norm": 0.6514496803283691, "learning_rate": 1.990850363390274e-05, "loss": 0.5004, "step": 1023 }, { "epoch": 0.1246121083054457, "grad_norm": 0.9176408648490906, "learning_rate": 1.990824016992756e-05, "loss": 0.5215, "step": 1024 }, { "epoch": 0.12473379981746273, "grad_norm": 3.3087456226348877, "learning_rate": 1.990797632892333e-05, "loss": 0.403, "step": 1025 }, { "epoch": 0.12485549132947976, "grad_norm": 3.45039701461792, "learning_rate": 1.9907712110900098e-05, "loss": 0.4367, "step": 1026 }, { "epoch": 0.12497718284149681, "grad_norm": 2.794262409210205, "learning_rate": 1.9907447515867907e-05, "loss": 0.4585, "step": 1027 }, { "epoch": 0.12509887435351386, "grad_norm": 0.5666921138763428, "learning_rate": 1.9907182543836835e-05, "loss": 0.5058, "step": 1028 }, { "epoch": 0.12522056586553088, "grad_norm": 0.8674224019050598, "learning_rate": 1.990691719481696e-05, "loss": 0.4928, "step": 1029 }, { "epoch": 0.12534225737754792, "grad_norm": 1.3358700275421143, "learning_rate": 1.9906651468818374e-05, "loss": 0.4874, "step": 1030 }, { "epoch": 0.12546394888956494, "grad_norm": 0.5441078543663025, "learning_rate": 1.9906385365851198e-05, "loss": 0.4483, "step": 1031 }, { "epoch": 0.125585640401582, "grad_norm": 0.9660111665725708, "learning_rate": 1.9906118885925558e-05, "loss": 0.5085, "step": 1032 }, { "epoch": 0.12570733191359904, "grad_norm": 0.888455331325531, "learning_rate": 1.990585202905158e-05, "loss": 0.4702, "step": 1033 }, { "epoch": 0.12582902342561605, "grad_norm": 1.2749918699264526, "learning_rate": 1.9905584795239435e-05, "loss": 0.4653, "step": 1034 }, { "epoch": 0.1259507149376331, "grad_norm": 1.0314130783081055, "learning_rate": 1.9905317184499284e-05, "loss": 0.4667, "step": 1035 }, { "epoch": 0.12607240644965015, "grad_norm": 0.6198263764381409, "learning_rate": 1.9905049196841313e-05, "loss": 0.4537, "step": 1036 }, { "epoch": 0.12619409796166717, "grad_norm": 0.7804372906684875, "learning_rate": 1.9904780832275717e-05, "loss": 0.4572, "step": 1037 }, { "epoch": 0.12631578947368421, "grad_norm": 1.3657641410827637, "learning_rate": 1.9904512090812705e-05, "loss": 0.4797, "step": 1038 }, { "epoch": 0.12643748098570123, "grad_norm": 3.615712881088257, "learning_rate": 1.9904242972462507e-05, "loss": 0.5061, "step": 1039 }, { "epoch": 0.12655917249771828, "grad_norm": 0.7965696454048157, "learning_rate": 1.9903973477235368e-05, "loss": 0.4296, "step": 1040 }, { "epoch": 0.12668086400973533, "grad_norm": 0.87240070104599, "learning_rate": 1.9903703605141536e-05, "loss": 0.4423, "step": 1041 }, { "epoch": 0.12680255552175235, "grad_norm": 0.8806689381599426, "learning_rate": 1.990343335619128e-05, "loss": 0.4445, "step": 1042 }, { "epoch": 0.1269242470337694, "grad_norm": 0.944068431854248, "learning_rate": 1.990316273039489e-05, "loss": 0.495, "step": 1043 }, { "epoch": 0.12704593854578644, "grad_norm": 1.1978839635849, "learning_rate": 1.9902891727762656e-05, "loss": 0.5269, "step": 1044 }, { "epoch": 0.12716763005780346, "grad_norm": 2.517319679260254, "learning_rate": 1.9902620348304894e-05, "loss": 0.4923, "step": 1045 }, { "epoch": 0.1272893215698205, "grad_norm": 3.820152759552002, "learning_rate": 1.9902348592031932e-05, "loss": 0.4883, "step": 1046 }, { "epoch": 0.12741101308183755, "grad_norm": 2.059230089187622, "learning_rate": 1.990207645895411e-05, "loss": 0.4895, "step": 1047 }, { "epoch": 0.12753270459385457, "grad_norm": 4.584321022033691, "learning_rate": 1.990180394908178e-05, "loss": 0.4741, "step": 1048 }, { "epoch": 0.12765439610587162, "grad_norm": 1.132431149482727, "learning_rate": 1.9901531062425316e-05, "loss": 0.4337, "step": 1049 }, { "epoch": 0.12777608761788864, "grad_norm": 1.0330190658569336, "learning_rate": 1.99012577989951e-05, "loss": 0.481, "step": 1050 }, { "epoch": 0.1278977791299057, "grad_norm": 5.490146160125732, "learning_rate": 1.990098415880153e-05, "loss": 0.6271, "step": 1051 }, { "epoch": 0.12801947064192273, "grad_norm": 2.893369436264038, "learning_rate": 1.9900710141855018e-05, "loss": 0.5039, "step": 1052 }, { "epoch": 0.12814116215393975, "grad_norm": 5.065304756164551, "learning_rate": 1.9900435748165994e-05, "loss": 0.5771, "step": 1053 }, { "epoch": 0.1282628536659568, "grad_norm": 1.762570858001709, "learning_rate": 1.9900160977744897e-05, "loss": 0.5018, "step": 1054 }, { "epoch": 0.12838454517797385, "grad_norm": 1.611161708831787, "learning_rate": 1.989988583060218e-05, "loss": 0.5268, "step": 1055 }, { "epoch": 0.12850623668999087, "grad_norm": 3.819828510284424, "learning_rate": 1.989961030674832e-05, "loss": 0.4531, "step": 1056 }, { "epoch": 0.1286279282020079, "grad_norm": 2.2731497287750244, "learning_rate": 1.989933440619379e-05, "loss": 0.5055, "step": 1057 }, { "epoch": 0.12874961971402496, "grad_norm": 2.4282963275909424, "learning_rate": 1.98990581289491e-05, "loss": 0.5095, "step": 1058 }, { "epoch": 0.12887131122604198, "grad_norm": 1.9960323572158813, "learning_rate": 1.9898781475024755e-05, "loss": 0.5085, "step": 1059 }, { "epoch": 0.12899300273805903, "grad_norm": 1.3955953121185303, "learning_rate": 1.989850444443129e-05, "loss": 0.5128, "step": 1060 }, { "epoch": 0.12911469425007605, "grad_norm": 3.0201621055603027, "learning_rate": 1.989822703717924e-05, "loss": 0.5401, "step": 1061 }, { "epoch": 0.1292363857620931, "grad_norm": 1.393862247467041, "learning_rate": 1.989794925327916e-05, "loss": 0.4261, "step": 1062 }, { "epoch": 0.12935807727411014, "grad_norm": 6.086862087249756, "learning_rate": 1.989767109274163e-05, "loss": 0.5931, "step": 1063 }, { "epoch": 0.12947976878612716, "grad_norm": 3.9537577629089355, "learning_rate": 1.989739255557722e-05, "loss": 0.5358, "step": 1064 }, { "epoch": 0.1296014602981442, "grad_norm": 1.3847154378890991, "learning_rate": 1.989711364179654e-05, "loss": 0.4831, "step": 1065 }, { "epoch": 0.12972315181016125, "grad_norm": 0.8064104914665222, "learning_rate": 1.98968343514102e-05, "loss": 0.4341, "step": 1066 }, { "epoch": 0.12984484332217827, "grad_norm": 1.4337934255599976, "learning_rate": 1.989655468442883e-05, "loss": 0.4775, "step": 1067 }, { "epoch": 0.12996653483419532, "grad_norm": 2.249725103378296, "learning_rate": 1.9896274640863068e-05, "loss": 0.4609, "step": 1068 }, { "epoch": 0.13008822634621234, "grad_norm": 4.285830974578857, "learning_rate": 1.989599422072357e-05, "loss": 0.4345, "step": 1069 }, { "epoch": 0.13020991785822938, "grad_norm": 1.1885123252868652, "learning_rate": 1.9895713424021013e-05, "loss": 0.4692, "step": 1070 }, { "epoch": 0.13033160937024643, "grad_norm": 0.7677265405654907, "learning_rate": 1.9895432250766073e-05, "loss": 0.5207, "step": 1071 }, { "epoch": 0.13045330088226345, "grad_norm": 0.6993517279624939, "learning_rate": 1.9895150700969453e-05, "loss": 0.4858, "step": 1072 }, { "epoch": 0.1305749923942805, "grad_norm": 3.629011392593384, "learning_rate": 1.989486877464187e-05, "loss": 0.4192, "step": 1073 }, { "epoch": 0.13069668390629754, "grad_norm": 4.147429943084717, "learning_rate": 1.9894586471794047e-05, "loss": 0.5904, "step": 1074 }, { "epoch": 0.13081837541831456, "grad_norm": 1.766664743423462, "learning_rate": 1.989430379243673e-05, "loss": 0.526, "step": 1075 }, { "epoch": 0.1309400669303316, "grad_norm": 1.9937471151351929, "learning_rate": 1.9894020736580672e-05, "loss": 0.5269, "step": 1076 }, { "epoch": 0.13106175844234866, "grad_norm": 1.8291488885879517, "learning_rate": 1.9893737304236644e-05, "loss": 0.5168, "step": 1077 }, { "epoch": 0.13118344995436568, "grad_norm": 3.317469835281372, "learning_rate": 1.9893453495415436e-05, "loss": 0.4334, "step": 1078 }, { "epoch": 0.13130514146638272, "grad_norm": 1.517987608909607, "learning_rate": 1.989316931012784e-05, "loss": 0.5321, "step": 1079 }, { "epoch": 0.13142683297839974, "grad_norm": 3.121569871902466, "learning_rate": 1.9892884748384678e-05, "loss": 0.4948, "step": 1080 }, { "epoch": 0.1315485244904168, "grad_norm": 4.9344024658203125, "learning_rate": 1.9892599810196772e-05, "loss": 0.4189, "step": 1081 }, { "epoch": 0.13167021600243384, "grad_norm": 2.4887192249298096, "learning_rate": 1.9892314495574967e-05, "loss": 0.4245, "step": 1082 }, { "epoch": 0.13179190751445086, "grad_norm": 1.7641483545303345, "learning_rate": 1.989202880453012e-05, "loss": 0.5314, "step": 1083 }, { "epoch": 0.1319135990264679, "grad_norm": 5.092477321624756, "learning_rate": 1.98917427370731e-05, "loss": 0.5421, "step": 1084 }, { "epoch": 0.13203529053848495, "grad_norm": 3.9195423126220703, "learning_rate": 1.9891456293214797e-05, "loss": 0.5188, "step": 1085 }, { "epoch": 0.13215698205050197, "grad_norm": 3.7562026977539062, "learning_rate": 1.9891169472966107e-05, "loss": 0.5396, "step": 1086 }, { "epoch": 0.13227867356251902, "grad_norm": 1.3222163915634155, "learning_rate": 1.9890882276337943e-05, "loss": 0.5105, "step": 1087 }, { "epoch": 0.13240036507453606, "grad_norm": 0.5431426167488098, "learning_rate": 1.9890594703341234e-05, "loss": 0.4952, "step": 1088 }, { "epoch": 0.13252205658655308, "grad_norm": 4.460387706756592, "learning_rate": 1.9890306753986928e-05, "loss": 0.4645, "step": 1089 }, { "epoch": 0.13264374809857013, "grad_norm": 2.880514144897461, "learning_rate": 1.9890018428285977e-05, "loss": 0.4797, "step": 1090 }, { "epoch": 0.13276543961058715, "grad_norm": 2.051021099090576, "learning_rate": 1.9889729726249355e-05, "loss": 0.4882, "step": 1091 }, { "epoch": 0.1328871311226042, "grad_norm": 2.871445894241333, "learning_rate": 1.9889440647888043e-05, "loss": 0.5146, "step": 1092 }, { "epoch": 0.13300882263462124, "grad_norm": 1.0191707611083984, "learning_rate": 1.988915119321305e-05, "loss": 0.4754, "step": 1093 }, { "epoch": 0.13313051414663826, "grad_norm": 3.2318060398101807, "learning_rate": 1.988886136223538e-05, "loss": 0.5293, "step": 1094 }, { "epoch": 0.1332522056586553, "grad_norm": 0.6224351525306702, "learning_rate": 1.9888571154966065e-05, "loss": 0.4171, "step": 1095 }, { "epoch": 0.13337389717067236, "grad_norm": 2.7519643306732178, "learning_rate": 1.988828057141615e-05, "loss": 0.4937, "step": 1096 }, { "epoch": 0.13349558868268938, "grad_norm": 1.3889471292495728, "learning_rate": 1.9887989611596694e-05, "loss": 0.4393, "step": 1097 }, { "epoch": 0.13361728019470642, "grad_norm": 2.1229891777038574, "learning_rate": 1.9887698275518764e-05, "loss": 0.495, "step": 1098 }, { "epoch": 0.13373897170672347, "grad_norm": 0.706048309803009, "learning_rate": 1.9887406563193452e-05, "loss": 0.5292, "step": 1099 }, { "epoch": 0.1338606632187405, "grad_norm": 0.8292319178581238, "learning_rate": 1.9887114474631852e-05, "loss": 0.4669, "step": 1100 }, { "epoch": 0.13398235473075754, "grad_norm": 2.162430763244629, "learning_rate": 1.9886822009845082e-05, "loss": 0.5202, "step": 1101 }, { "epoch": 0.13410404624277455, "grad_norm": 3.347226142883301, "learning_rate": 1.9886529168844267e-05, "loss": 0.4971, "step": 1102 }, { "epoch": 0.1342257377547916, "grad_norm": 5.355313777923584, "learning_rate": 1.9886235951640556e-05, "loss": 0.4803, "step": 1103 }, { "epoch": 0.13434742926680865, "grad_norm": 2.244405746459961, "learning_rate": 1.9885942358245104e-05, "loss": 0.4865, "step": 1104 }, { "epoch": 0.13446912077882567, "grad_norm": 4.953440189361572, "learning_rate": 1.988564838866908e-05, "loss": 0.428, "step": 1105 }, { "epoch": 0.13459081229084272, "grad_norm": 0.7820557951927185, "learning_rate": 1.9885354042923674e-05, "loss": 0.4954, "step": 1106 }, { "epoch": 0.13471250380285976, "grad_norm": 1.209432601928711, "learning_rate": 1.9885059321020085e-05, "loss": 0.4696, "step": 1107 }, { "epoch": 0.13483419531487678, "grad_norm": 0.7294307351112366, "learning_rate": 1.988476422296953e-05, "loss": 0.487, "step": 1108 }, { "epoch": 0.13495588682689383, "grad_norm": 0.777212381362915, "learning_rate": 1.9884468748783236e-05, "loss": 0.4852, "step": 1109 }, { "epoch": 0.13507757833891085, "grad_norm": 0.8023810982704163, "learning_rate": 1.9884172898472444e-05, "loss": 0.5026, "step": 1110 }, { "epoch": 0.1351992698509279, "grad_norm": 0.6087104678153992, "learning_rate": 1.988387667204841e-05, "loss": 0.4899, "step": 1111 }, { "epoch": 0.13532096136294494, "grad_norm": 0.657290518283844, "learning_rate": 1.9883580069522417e-05, "loss": 0.5048, "step": 1112 }, { "epoch": 0.13544265287496196, "grad_norm": 1.1142759323120117, "learning_rate": 1.9883283090905744e-05, "loss": 0.4823, "step": 1113 }, { "epoch": 0.135564344386979, "grad_norm": 2.6991608142852783, "learning_rate": 1.988298573620969e-05, "loss": 0.4653, "step": 1114 }, { "epoch": 0.13568603589899605, "grad_norm": 0.6051679253578186, "learning_rate": 1.988268800544557e-05, "loss": 0.475, "step": 1115 }, { "epoch": 0.13580772741101307, "grad_norm": 1.133419394493103, "learning_rate": 1.9882389898624716e-05, "loss": 0.4446, "step": 1116 }, { "epoch": 0.13592941892303012, "grad_norm": 3.801710367202759, "learning_rate": 1.9882091415758474e-05, "loss": 0.5287, "step": 1117 }, { "epoch": 0.13605111043504717, "grad_norm": 2.743229866027832, "learning_rate": 1.9881792556858197e-05, "loss": 0.5043, "step": 1118 }, { "epoch": 0.1361728019470642, "grad_norm": 0.6475219130516052, "learning_rate": 1.9881493321935258e-05, "loss": 0.4895, "step": 1119 }, { "epoch": 0.13629449345908123, "grad_norm": 1.1221957206726074, "learning_rate": 1.9881193711001048e-05, "loss": 0.5212, "step": 1120 }, { "epoch": 0.13641618497109825, "grad_norm": 1.2978185415267944, "learning_rate": 1.9880893724066964e-05, "loss": 0.4827, "step": 1121 }, { "epoch": 0.1365378764831153, "grad_norm": 2.9070370197296143, "learning_rate": 1.9880593361144415e-05, "loss": 0.4286, "step": 1122 }, { "epoch": 0.13665956799513235, "grad_norm": 0.9966745972633362, "learning_rate": 1.9880292622244845e-05, "loss": 0.4341, "step": 1123 }, { "epoch": 0.13678125950714937, "grad_norm": 0.8091104030609131, "learning_rate": 1.9879991507379686e-05, "loss": 0.4283, "step": 1124 }, { "epoch": 0.1369029510191664, "grad_norm": 2.4667422771453857, "learning_rate": 1.98796900165604e-05, "loss": 0.4976, "step": 1125 }, { "epoch": 0.13702464253118346, "grad_norm": 4.526529788970947, "learning_rate": 1.9879388149798456e-05, "loss": 0.5393, "step": 1126 }, { "epoch": 0.13714633404320048, "grad_norm": 4.00773286819458, "learning_rate": 1.987908590710535e-05, "loss": 0.5343, "step": 1127 }, { "epoch": 0.13726802555521753, "grad_norm": 0.6866424679756165, "learning_rate": 1.987878328849257e-05, "loss": 0.4278, "step": 1128 }, { "epoch": 0.13738971706723457, "grad_norm": 1.0679837465286255, "learning_rate": 1.9878480293971646e-05, "loss": 0.44, "step": 1129 }, { "epoch": 0.1375114085792516, "grad_norm": 0.7804145812988281, "learning_rate": 1.9878176923554093e-05, "loss": 0.5078, "step": 1130 }, { "epoch": 0.13763310009126864, "grad_norm": 1.4702852964401245, "learning_rate": 1.9877873177251464e-05, "loss": 0.4924, "step": 1131 }, { "epoch": 0.13775479160328566, "grad_norm": 1.2784676551818848, "learning_rate": 1.987756905507531e-05, "loss": 0.5162, "step": 1132 }, { "epoch": 0.1378764831153027, "grad_norm": 3.1514322757720947, "learning_rate": 1.9877264557037213e-05, "loss": 0.4654, "step": 1133 }, { "epoch": 0.13799817462731975, "grad_norm": 0.7366341352462769, "learning_rate": 1.9876959683148753e-05, "loss": 0.4548, "step": 1134 }, { "epoch": 0.13811986613933677, "grad_norm": 0.8866164088249207, "learning_rate": 1.9876654433421534e-05, "loss": 0.4808, "step": 1135 }, { "epoch": 0.13824155765135382, "grad_norm": 2.6878104209899902, "learning_rate": 1.987634880786717e-05, "loss": 0.495, "step": 1136 }, { "epoch": 0.13836324916337087, "grad_norm": 0.9557205438613892, "learning_rate": 1.987604280649729e-05, "loss": 0.4519, "step": 1137 }, { "epoch": 0.13848494067538789, "grad_norm": 3.3331611156463623, "learning_rate": 1.987573642932354e-05, "loss": 0.4966, "step": 1138 }, { "epoch": 0.13860663218740493, "grad_norm": 1.2201111316680908, "learning_rate": 1.9875429676357576e-05, "loss": 0.4481, "step": 1139 }, { "epoch": 0.13872832369942195, "grad_norm": 1.5304169654846191, "learning_rate": 1.9875122547611072e-05, "loss": 0.4668, "step": 1140 }, { "epoch": 0.138850015211439, "grad_norm": 0.8971834182739258, "learning_rate": 1.9874815043095714e-05, "loss": 0.5049, "step": 1141 }, { "epoch": 0.13897170672345605, "grad_norm": 2.9369068145751953, "learning_rate": 1.9874507162823205e-05, "loss": 0.4672, "step": 1142 }, { "epoch": 0.13909339823547306, "grad_norm": 1.979405403137207, "learning_rate": 1.987419890680526e-05, "loss": 0.5469, "step": 1143 }, { "epoch": 0.1392150897474901, "grad_norm": 3.0535216331481934, "learning_rate": 1.9873890275053606e-05, "loss": 0.4846, "step": 1144 }, { "epoch": 0.13933678125950716, "grad_norm": 7.240109443664551, "learning_rate": 1.9873581267579992e-05, "loss": 0.4554, "step": 1145 }, { "epoch": 0.13945847277152418, "grad_norm": 6.796575546264648, "learning_rate": 1.9873271884396173e-05, "loss": 0.4877, "step": 1146 }, { "epoch": 0.13958016428354122, "grad_norm": 4.516753673553467, "learning_rate": 1.9872962125513922e-05, "loss": 0.4755, "step": 1147 }, { "epoch": 0.13970185579555827, "grad_norm": 2.8543522357940674, "learning_rate": 1.9872651990945024e-05, "loss": 0.4566, "step": 1148 }, { "epoch": 0.1398235473075753, "grad_norm": 0.7286142706871033, "learning_rate": 1.9872341480701286e-05, "loss": 0.4856, "step": 1149 }, { "epoch": 0.13994523881959234, "grad_norm": 2.64182448387146, "learning_rate": 1.987203059479452e-05, "loss": 0.4704, "step": 1150 }, { "epoch": 0.14006693033160936, "grad_norm": 2.409075975418091, "learning_rate": 1.987171933323655e-05, "loss": 0.5042, "step": 1151 }, { "epoch": 0.1401886218436264, "grad_norm": 5.348703861236572, "learning_rate": 1.987140769603923e-05, "loss": 0.5052, "step": 1152 }, { "epoch": 0.14031031335564345, "grad_norm": 3.3560080528259277, "learning_rate": 1.9871095683214414e-05, "loss": 0.4862, "step": 1153 }, { "epoch": 0.14043200486766047, "grad_norm": 1.841772437095642, "learning_rate": 1.9870783294773977e-05, "loss": 0.511, "step": 1154 }, { "epoch": 0.14055369637967752, "grad_norm": 0.870384156703949, "learning_rate": 1.9870470530729805e-05, "loss": 0.4354, "step": 1155 }, { "epoch": 0.14067538789169456, "grad_norm": 0.7190083861351013, "learning_rate": 1.9870157391093795e-05, "loss": 0.5154, "step": 1156 }, { "epoch": 0.14079707940371158, "grad_norm": 0.9531543850898743, "learning_rate": 1.9869843875877867e-05, "loss": 0.5232, "step": 1157 }, { "epoch": 0.14091877091572863, "grad_norm": 2.533087968826294, "learning_rate": 1.986952998509395e-05, "loss": 0.5027, "step": 1158 }, { "epoch": 0.14104046242774568, "grad_norm": 4.98061466217041, "learning_rate": 1.986921571875399e-05, "loss": 0.4683, "step": 1159 }, { "epoch": 0.1411621539397627, "grad_norm": 1.3997783660888672, "learning_rate": 1.9868901076869946e-05, "loss": 0.4841, "step": 1160 }, { "epoch": 0.14128384545177974, "grad_norm": 4.7056474685668945, "learning_rate": 1.986858605945378e-05, "loss": 0.4607, "step": 1161 }, { "epoch": 0.14140553696379676, "grad_norm": 3.663830041885376, "learning_rate": 1.9868270666517496e-05, "loss": 0.4455, "step": 1162 }, { "epoch": 0.1415272284758138, "grad_norm": 1.792288899421692, "learning_rate": 1.9867954898073084e-05, "loss": 0.4995, "step": 1163 }, { "epoch": 0.14164891998783086, "grad_norm": 2.9815714359283447, "learning_rate": 1.9867638754132562e-05, "loss": 0.5496, "step": 1164 }, { "epoch": 0.14177061149984788, "grad_norm": 2.771726369857788, "learning_rate": 1.9867322234707963e-05, "loss": 0.5031, "step": 1165 }, { "epoch": 0.14189230301186492, "grad_norm": 2.5088088512420654, "learning_rate": 1.9867005339811324e-05, "loss": 0.4973, "step": 1166 }, { "epoch": 0.14201399452388197, "grad_norm": 1.4814538955688477, "learning_rate": 1.986668806945471e-05, "loss": 0.4504, "step": 1167 }, { "epoch": 0.142135686035899, "grad_norm": 1.8390085697174072, "learning_rate": 1.9866370423650194e-05, "loss": 0.4916, "step": 1168 }, { "epoch": 0.14225737754791604, "grad_norm": 2.3560585975646973, "learning_rate": 1.986605240240986e-05, "loss": 0.4252, "step": 1169 }, { "epoch": 0.14237906905993308, "grad_norm": 0.6675092577934265, "learning_rate": 1.9865734005745812e-05, "loss": 0.4759, "step": 1170 }, { "epoch": 0.1425007605719501, "grad_norm": 1.4101814031600952, "learning_rate": 1.986541523367016e-05, "loss": 0.494, "step": 1171 }, { "epoch": 0.14262245208396715, "grad_norm": 0.9319317936897278, "learning_rate": 1.986509608619504e-05, "loss": 0.4938, "step": 1172 }, { "epoch": 0.14274414359598417, "grad_norm": 1.551396369934082, "learning_rate": 1.9864776563332598e-05, "loss": 0.4423, "step": 1173 }, { "epoch": 0.14286583510800122, "grad_norm": 0.7158803939819336, "learning_rate": 1.9864456665094985e-05, "loss": 0.4823, "step": 1174 }, { "epoch": 0.14298752662001826, "grad_norm": 1.0481125116348267, "learning_rate": 1.9864136391494376e-05, "loss": 0.4491, "step": 1175 }, { "epoch": 0.14310921813203528, "grad_norm": 0.7452911734580994, "learning_rate": 1.9863815742542965e-05, "loss": 0.4208, "step": 1176 }, { "epoch": 0.14323090964405233, "grad_norm": 2.0615103244781494, "learning_rate": 1.9863494718252945e-05, "loss": 0.5336, "step": 1177 }, { "epoch": 0.14335260115606938, "grad_norm": 1.0810911655426025, "learning_rate": 1.9863173318636535e-05, "loss": 0.473, "step": 1178 }, { "epoch": 0.1434742926680864, "grad_norm": 0.989427387714386, "learning_rate": 1.9862851543705965e-05, "loss": 0.4505, "step": 1179 }, { "epoch": 0.14359598418010344, "grad_norm": 1.1737773418426514, "learning_rate": 1.9862529393473476e-05, "loss": 0.5425, "step": 1180 }, { "epoch": 0.14371767569212046, "grad_norm": 5.367320537567139, "learning_rate": 1.986220686795133e-05, "loss": 0.4171, "step": 1181 }, { "epoch": 0.1438393672041375, "grad_norm": 2.964874744415283, "learning_rate": 1.98618839671518e-05, "loss": 0.5029, "step": 1182 }, { "epoch": 0.14396105871615456, "grad_norm": 3.477079153060913, "learning_rate": 1.9861560691087175e-05, "loss": 0.4708, "step": 1183 }, { "epoch": 0.14408275022817157, "grad_norm": 1.2514410018920898, "learning_rate": 1.9861237039769752e-05, "loss": 0.4466, "step": 1184 }, { "epoch": 0.14420444174018862, "grad_norm": 2.359375238418579, "learning_rate": 1.9860913013211848e-05, "loss": 0.5482, "step": 1185 }, { "epoch": 0.14432613325220567, "grad_norm": 2.930695056915283, "learning_rate": 1.986058861142579e-05, "loss": 0.4813, "step": 1186 }, { "epoch": 0.1444478247642227, "grad_norm": 3.335024356842041, "learning_rate": 1.9860263834423926e-05, "loss": 0.5209, "step": 1187 }, { "epoch": 0.14456951627623973, "grad_norm": 1.3997037410736084, "learning_rate": 1.9859938682218615e-05, "loss": 0.4741, "step": 1188 }, { "epoch": 0.14469120778825678, "grad_norm": 0.7062254548072815, "learning_rate": 1.9859613154822228e-05, "loss": 0.4747, "step": 1189 }, { "epoch": 0.1448128993002738, "grad_norm": 1.2905937433242798, "learning_rate": 1.985928725224715e-05, "loss": 0.4199, "step": 1190 }, { "epoch": 0.14493459081229085, "grad_norm": 0.667495846748352, "learning_rate": 1.9858960974505786e-05, "loss": 0.4732, "step": 1191 }, { "epoch": 0.14505628232430787, "grad_norm": 2.2584023475646973, "learning_rate": 1.9858634321610553e-05, "loss": 0.4406, "step": 1192 }, { "epoch": 0.14517797383632491, "grad_norm": 2.2858972549438477, "learning_rate": 1.9858307293573873e-05, "loss": 0.5062, "step": 1193 }, { "epoch": 0.14529966534834196, "grad_norm": 2.145503282546997, "learning_rate": 1.9857979890408198e-05, "loss": 0.4937, "step": 1194 }, { "epoch": 0.14542135686035898, "grad_norm": 1.4335459470748901, "learning_rate": 1.985765211212598e-05, "loss": 0.5567, "step": 1195 }, { "epoch": 0.14554304837237603, "grad_norm": 1.0593979358673096, "learning_rate": 1.98573239587397e-05, "loss": 0.5149, "step": 1196 }, { "epoch": 0.14566473988439307, "grad_norm": 2.3031575679779053, "learning_rate": 1.9856995430261837e-05, "loss": 0.4734, "step": 1197 }, { "epoch": 0.1457864313964101, "grad_norm": 3.046948194503784, "learning_rate": 1.9856666526704893e-05, "loss": 0.4595, "step": 1198 }, { "epoch": 0.14590812290842714, "grad_norm": 0.8235745429992676, "learning_rate": 1.9856337248081388e-05, "loss": 0.523, "step": 1199 }, { "epoch": 0.1460298144204442, "grad_norm": 0.8751189112663269, "learning_rate": 1.9856007594403846e-05, "loss": 0.51, "step": 1200 }, { "epoch": 0.1461515059324612, "grad_norm": 1.4345073699951172, "learning_rate": 1.985567756568482e-05, "loss": 0.5157, "step": 1201 }, { "epoch": 0.14627319744447825, "grad_norm": 0.7432553172111511, "learning_rate": 1.9855347161936858e-05, "loss": 0.4679, "step": 1202 }, { "epoch": 0.14639488895649527, "grad_norm": 3.2682695388793945, "learning_rate": 1.9855016383172538e-05, "loss": 0.5176, "step": 1203 }, { "epoch": 0.14651658046851232, "grad_norm": 1.7866331338882446, "learning_rate": 1.9854685229404444e-05, "loss": 0.5018, "step": 1204 }, { "epoch": 0.14663827198052937, "grad_norm": 0.6467131972312927, "learning_rate": 1.9854353700645185e-05, "loss": 0.4415, "step": 1205 }, { "epoch": 0.14675996349254639, "grad_norm": 0.9730408787727356, "learning_rate": 1.9854021796907364e-05, "loss": 0.4247, "step": 1206 }, { "epoch": 0.14688165500456343, "grad_norm": 0.8538654446601868, "learning_rate": 1.985368951820362e-05, "loss": 0.4538, "step": 1207 }, { "epoch": 0.14700334651658048, "grad_norm": 0.6595228314399719, "learning_rate": 1.9853356864546595e-05, "loss": 0.4631, "step": 1208 }, { "epoch": 0.1471250380285975, "grad_norm": 1.1981980800628662, "learning_rate": 1.9853023835948943e-05, "loss": 0.4812, "step": 1209 }, { "epoch": 0.14724672954061455, "grad_norm": 1.0465261936187744, "learning_rate": 1.985269043242334e-05, "loss": 0.4832, "step": 1210 }, { "epoch": 0.14736842105263157, "grad_norm": 0.7678244709968567, "learning_rate": 1.9852356653982473e-05, "loss": 0.473, "step": 1211 }, { "epoch": 0.1474901125646486, "grad_norm": 0.7757473587989807, "learning_rate": 1.985202250063904e-05, "loss": 0.5061, "step": 1212 }, { "epoch": 0.14761180407666566, "grad_norm": 1.121665120124817, "learning_rate": 1.9851687972405763e-05, "loss": 0.4582, "step": 1213 }, { "epoch": 0.14773349558868268, "grad_norm": 1.907209038734436, "learning_rate": 1.985135306929536e-05, "loss": 0.44, "step": 1214 }, { "epoch": 0.14785518710069973, "grad_norm": 0.8149450421333313, "learning_rate": 1.985101779132059e-05, "loss": 0.432, "step": 1215 }, { "epoch": 0.14797687861271677, "grad_norm": 3.072385549545288, "learning_rate": 1.9850682138494193e-05, "loss": 0.4966, "step": 1216 }, { "epoch": 0.1480985701247338, "grad_norm": 2.164401054382324, "learning_rate": 1.9850346110828957e-05, "loss": 0.4656, "step": 1217 }, { "epoch": 0.14822026163675084, "grad_norm": 0.923026978969574, "learning_rate": 1.985000970833766e-05, "loss": 0.4549, "step": 1218 }, { "epoch": 0.14834195314876789, "grad_norm": 0.5282090902328491, "learning_rate": 1.9849672931033107e-05, "loss": 0.4801, "step": 1219 }, { "epoch": 0.1484636446607849, "grad_norm": 1.6033319234848022, "learning_rate": 1.984933577892811e-05, "loss": 0.463, "step": 1220 }, { "epoch": 0.14858533617280195, "grad_norm": 1.5123202800750732, "learning_rate": 1.98489982520355e-05, "loss": 0.4453, "step": 1221 }, { "epoch": 0.14870702768481897, "grad_norm": 1.361024022102356, "learning_rate": 1.984866035036812e-05, "loss": 0.4721, "step": 1222 }, { "epoch": 0.14882871919683602, "grad_norm": 3.041339159011841, "learning_rate": 1.9848322073938832e-05, "loss": 0.5307, "step": 1223 }, { "epoch": 0.14895041070885306, "grad_norm": 0.6843611598014832, "learning_rate": 1.98479834227605e-05, "loss": 0.4554, "step": 1224 }, { "epoch": 0.14907210222087008, "grad_norm": 1.0955889225006104, "learning_rate": 1.984764439684601e-05, "loss": 0.4861, "step": 1225 }, { "epoch": 0.14919379373288713, "grad_norm": 1.1836271286010742, "learning_rate": 1.9847304996208273e-05, "loss": 0.5122, "step": 1226 }, { "epoch": 0.14931548524490418, "grad_norm": 1.690466046333313, "learning_rate": 1.9846965220860198e-05, "loss": 0.5115, "step": 1227 }, { "epoch": 0.1494371767569212, "grad_norm": 2.871076822280884, "learning_rate": 1.9846625070814715e-05, "loss": 0.4335, "step": 1228 }, { "epoch": 0.14955886826893824, "grad_norm": 2.2395992279052734, "learning_rate": 1.9846284546084766e-05, "loss": 0.447, "step": 1229 }, { "epoch": 0.1496805597809553, "grad_norm": 2.40677547454834, "learning_rate": 1.984594364668331e-05, "loss": 0.4331, "step": 1230 }, { "epoch": 0.1498022512929723, "grad_norm": 1.767208456993103, "learning_rate": 1.9845602372623313e-05, "loss": 0.4661, "step": 1231 }, { "epoch": 0.14992394280498936, "grad_norm": 2.165086030960083, "learning_rate": 1.984526072391777e-05, "loss": 0.4881, "step": 1232 }, { "epoch": 0.15004563431700638, "grad_norm": 2.5997352600097656, "learning_rate": 1.984491870057968e-05, "loss": 0.5009, "step": 1233 }, { "epoch": 0.15016732582902342, "grad_norm": 3.7121734619140625, "learning_rate": 1.9844576302622054e-05, "loss": 0.544, "step": 1234 }, { "epoch": 0.15028901734104047, "grad_norm": 0.8754316568374634, "learning_rate": 1.9844233530057923e-05, "loss": 0.4954, "step": 1235 }, { "epoch": 0.1504107088530575, "grad_norm": 3.065603017807007, "learning_rate": 1.984389038290033e-05, "loss": 0.4756, "step": 1236 }, { "epoch": 0.15053240036507454, "grad_norm": 1.9599072933197021, "learning_rate": 1.984354686116233e-05, "loss": 0.4949, "step": 1237 }, { "epoch": 0.15065409187709158, "grad_norm": 5.242061138153076, "learning_rate": 1.9843202964856998e-05, "loss": 0.4772, "step": 1238 }, { "epoch": 0.1507757833891086, "grad_norm": 1.7279301881790161, "learning_rate": 1.984285869399742e-05, "loss": 0.5142, "step": 1239 }, { "epoch": 0.15089747490112565, "grad_norm": 3.280184268951416, "learning_rate": 1.9842514048596696e-05, "loss": 0.4491, "step": 1240 }, { "epoch": 0.1510191664131427, "grad_norm": 3.316697835922241, "learning_rate": 1.9842169028667935e-05, "loss": 0.5483, "step": 1241 }, { "epoch": 0.15114085792515972, "grad_norm": 3.42592453956604, "learning_rate": 1.9841823634224274e-05, "loss": 0.5365, "step": 1242 }, { "epoch": 0.15126254943717676, "grad_norm": 1.5819356441497803, "learning_rate": 1.984147786527885e-05, "loss": 0.4699, "step": 1243 }, { "epoch": 0.15138424094919378, "grad_norm": 2.019106388092041, "learning_rate": 1.9841131721844825e-05, "loss": 0.4797, "step": 1244 }, { "epoch": 0.15150593246121083, "grad_norm": 0.5561219453811646, "learning_rate": 1.9840785203935366e-05, "loss": 0.4535, "step": 1245 }, { "epoch": 0.15162762397322788, "grad_norm": 1.6924302577972412, "learning_rate": 1.9840438311563663e-05, "loss": 0.4818, "step": 1246 }, { "epoch": 0.1517493154852449, "grad_norm": 1.3197678327560425, "learning_rate": 1.984009104474291e-05, "loss": 0.4978, "step": 1247 }, { "epoch": 0.15187100699726194, "grad_norm": 1.0036183595657349, "learning_rate": 1.9839743403486326e-05, "loss": 0.4692, "step": 1248 }, { "epoch": 0.151992698509279, "grad_norm": 1.493024230003357, "learning_rate": 1.9839395387807138e-05, "loss": 0.4888, "step": 1249 }, { "epoch": 0.152114390021296, "grad_norm": 1.1445404291152954, "learning_rate": 1.983904699771859e-05, "loss": 0.469, "step": 1250 }, { "epoch": 0.15223608153331306, "grad_norm": 1.5749421119689941, "learning_rate": 1.983869823323394e-05, "loss": 0.4765, "step": 1251 }, { "epoch": 0.15235777304533007, "grad_norm": 1.6942704916000366, "learning_rate": 1.9838349094366457e-05, "loss": 0.4437, "step": 1252 }, { "epoch": 0.15247946455734712, "grad_norm": 0.5991498827934265, "learning_rate": 1.9837999581129423e-05, "loss": 0.466, "step": 1253 }, { "epoch": 0.15260115606936417, "grad_norm": 1.3291352987289429, "learning_rate": 1.9837649693536146e-05, "loss": 0.4655, "step": 1254 }, { "epoch": 0.1527228475813812, "grad_norm": 1.4382669925689697, "learning_rate": 1.9837299431599934e-05, "loss": 0.476, "step": 1255 }, { "epoch": 0.15284453909339824, "grad_norm": 1.5753225088119507, "learning_rate": 1.9836948795334113e-05, "loss": 0.4147, "step": 1256 }, { "epoch": 0.15296623060541528, "grad_norm": 3.0628366470336914, "learning_rate": 1.9836597784752032e-05, "loss": 0.5525, "step": 1257 }, { "epoch": 0.1530879221174323, "grad_norm": 0.7171027064323425, "learning_rate": 1.9836246399867043e-05, "loss": 0.4853, "step": 1258 }, { "epoch": 0.15320961362944935, "grad_norm": 0.6188491582870483, "learning_rate": 1.983589464069252e-05, "loss": 0.4688, "step": 1259 }, { "epoch": 0.1533313051414664, "grad_norm": 2.889864444732666, "learning_rate": 1.9835542507241847e-05, "loss": 0.468, "step": 1260 }, { "epoch": 0.15345299665348341, "grad_norm": 0.6995101571083069, "learning_rate": 1.9835189999528425e-05, "loss": 0.5083, "step": 1261 }, { "epoch": 0.15357468816550046, "grad_norm": 1.239412784576416, "learning_rate": 1.9834837117565662e-05, "loss": 0.4339, "step": 1262 }, { "epoch": 0.15369637967751748, "grad_norm": 1.104770541191101, "learning_rate": 1.9834483861366992e-05, "loss": 0.4905, "step": 1263 }, { "epoch": 0.15381807118953453, "grad_norm": 1.2953804731369019, "learning_rate": 1.9834130230945853e-05, "loss": 0.4552, "step": 1264 }, { "epoch": 0.15393976270155157, "grad_norm": 3.1955671310424805, "learning_rate": 1.9833776226315705e-05, "loss": 0.5419, "step": 1265 }, { "epoch": 0.1540614542135686, "grad_norm": 0.6878771781921387, "learning_rate": 1.9833421847490016e-05, "loss": 0.4383, "step": 1266 }, { "epoch": 0.15418314572558564, "grad_norm": 1.1765056848526, "learning_rate": 1.983306709448227e-05, "loss": 0.4682, "step": 1267 }, { "epoch": 0.1543048372376027, "grad_norm": 3.464308023452759, "learning_rate": 1.9832711967305972e-05, "loss": 0.5212, "step": 1268 }, { "epoch": 0.1544265287496197, "grad_norm": 0.6605225205421448, "learning_rate": 1.9832356465974623e-05, "loss": 0.4457, "step": 1269 }, { "epoch": 0.15454822026163675, "grad_norm": 1.1634217500686646, "learning_rate": 1.9832000590501764e-05, "loss": 0.4741, "step": 1270 }, { "epoch": 0.1546699117736538, "grad_norm": 0.6154793500900269, "learning_rate": 1.9831644340900934e-05, "loss": 0.4729, "step": 1271 }, { "epoch": 0.15479160328567082, "grad_norm": 1.2960669994354248, "learning_rate": 1.9831287717185685e-05, "loss": 0.4494, "step": 1272 }, { "epoch": 0.15491329479768787, "grad_norm": 1.1229777336120605, "learning_rate": 1.9830930719369587e-05, "loss": 0.4363, "step": 1273 }, { "epoch": 0.1550349863097049, "grad_norm": 1.195603847503662, "learning_rate": 1.9830573347466226e-05, "loss": 0.4574, "step": 1274 }, { "epoch": 0.15515667782172193, "grad_norm": 2.0545639991760254, "learning_rate": 1.9830215601489202e-05, "loss": 0.485, "step": 1275 }, { "epoch": 0.15527836933373898, "grad_norm": 2.203857660293579, "learning_rate": 1.9829857481452125e-05, "loss": 0.5288, "step": 1276 }, { "epoch": 0.155400060845756, "grad_norm": 1.737296462059021, "learning_rate": 1.9829498987368626e-05, "loss": 0.4403, "step": 1277 }, { "epoch": 0.15552175235777305, "grad_norm": 2.9370386600494385, "learning_rate": 1.9829140119252345e-05, "loss": 0.4675, "step": 1278 }, { "epoch": 0.1556434438697901, "grad_norm": 0.869575560092926, "learning_rate": 1.9828780877116936e-05, "loss": 0.5189, "step": 1279 }, { "epoch": 0.1557651353818071, "grad_norm": 1.3392378091812134, "learning_rate": 1.982842126097607e-05, "loss": 0.4932, "step": 1280 }, { "epoch": 0.15588682689382416, "grad_norm": 0.5321990847587585, "learning_rate": 1.9828061270843434e-05, "loss": 0.4607, "step": 1281 }, { "epoch": 0.15600851840584118, "grad_norm": 2.7002933025360107, "learning_rate": 1.982770090673272e-05, "loss": 0.4551, "step": 1282 }, { "epoch": 0.15613020991785823, "grad_norm": 1.1558703184127808, "learning_rate": 1.9827340168657648e-05, "loss": 0.4484, "step": 1283 }, { "epoch": 0.15625190142987527, "grad_norm": 1.924221396446228, "learning_rate": 1.982697905663194e-05, "loss": 0.5133, "step": 1284 }, { "epoch": 0.1563735929418923, "grad_norm": 0.8073462247848511, "learning_rate": 1.9826617570669336e-05, "loss": 0.4456, "step": 1285 }, { "epoch": 0.15649528445390934, "grad_norm": 0.9620110988616943, "learning_rate": 1.9826255710783595e-05, "loss": 0.482, "step": 1286 }, { "epoch": 0.15661697596592639, "grad_norm": 1.0379416942596436, "learning_rate": 1.9825893476988485e-05, "loss": 0.4754, "step": 1287 }, { "epoch": 0.1567386674779434, "grad_norm": 1.1025772094726562, "learning_rate": 1.9825530869297788e-05, "loss": 0.4721, "step": 1288 }, { "epoch": 0.15686035898996045, "grad_norm": 0.9665660858154297, "learning_rate": 1.9825167887725305e-05, "loss": 0.4766, "step": 1289 }, { "epoch": 0.1569820505019775, "grad_norm": 1.3248168230056763, "learning_rate": 1.9824804532284846e-05, "loss": 0.4727, "step": 1290 }, { "epoch": 0.15710374201399452, "grad_norm": 0.65620356798172, "learning_rate": 1.982444080299024e-05, "loss": 0.4748, "step": 1291 }, { "epoch": 0.15722543352601157, "grad_norm": 0.9081881046295166, "learning_rate": 1.9824076699855324e-05, "loss": 0.5071, "step": 1292 }, { "epoch": 0.15734712503802858, "grad_norm": 1.0297307968139648, "learning_rate": 1.982371222289396e-05, "loss": 0.5102, "step": 1293 }, { "epoch": 0.15746881655004563, "grad_norm": 1.0757161378860474, "learning_rate": 1.9823347372120008e-05, "loss": 0.5321, "step": 1294 }, { "epoch": 0.15759050806206268, "grad_norm": 1.443177580833435, "learning_rate": 1.9822982147547353e-05, "loss": 0.4689, "step": 1295 }, { "epoch": 0.1577121995740797, "grad_norm": 2.8265788555145264, "learning_rate": 1.9822616549189898e-05, "loss": 0.4059, "step": 1296 }, { "epoch": 0.15783389108609674, "grad_norm": 1.7425169944763184, "learning_rate": 1.982225057706155e-05, "loss": 0.494, "step": 1297 }, { "epoch": 0.1579555825981138, "grad_norm": 0.5316634774208069, "learning_rate": 1.9821884231176237e-05, "loss": 0.4543, "step": 1298 }, { "epoch": 0.1580772741101308, "grad_norm": 4.981151580810547, "learning_rate": 1.98215175115479e-05, "loss": 0.5478, "step": 1299 }, { "epoch": 0.15819896562214786, "grad_norm": 1.6411571502685547, "learning_rate": 1.9821150418190492e-05, "loss": 0.3831, "step": 1300 }, { "epoch": 0.1583206571341649, "grad_norm": 1.6752369403839111, "learning_rate": 1.982078295111798e-05, "loss": 0.4786, "step": 1301 }, { "epoch": 0.15844234864618192, "grad_norm": 3.5548789501190186, "learning_rate": 1.982041511034435e-05, "loss": 0.552, "step": 1302 }, { "epoch": 0.15856404015819897, "grad_norm": 0.6623461842536926, "learning_rate": 1.9820046895883596e-05, "loss": 0.4651, "step": 1303 }, { "epoch": 0.158685731670216, "grad_norm": 0.5890607833862305, "learning_rate": 1.9819678307749735e-05, "loss": 0.4894, "step": 1304 }, { "epoch": 0.15880742318223304, "grad_norm": 1.6179745197296143, "learning_rate": 1.981930934595679e-05, "loss": 0.4875, "step": 1305 }, { "epoch": 0.15892911469425008, "grad_norm": 3.996370792388916, "learning_rate": 1.9818940010518798e-05, "loss": 0.4228, "step": 1306 }, { "epoch": 0.1590508062062671, "grad_norm": 1.6369858980178833, "learning_rate": 1.9818570301449812e-05, "loss": 0.4584, "step": 1307 }, { "epoch": 0.15917249771828415, "grad_norm": 2.4803812503814697, "learning_rate": 1.98182002187639e-05, "loss": 0.5223, "step": 1308 }, { "epoch": 0.1592941892303012, "grad_norm": 1.377143383026123, "learning_rate": 1.9817829762475154e-05, "loss": 0.4676, "step": 1309 }, { "epoch": 0.15941588074231822, "grad_norm": 1.393758773803711, "learning_rate": 1.981745893259766e-05, "loss": 0.4693, "step": 1310 }, { "epoch": 0.15953757225433526, "grad_norm": 1.0268025398254395, "learning_rate": 1.9817087729145532e-05, "loss": 0.4291, "step": 1311 }, { "epoch": 0.1596592637663523, "grad_norm": 4.2349348068237305, "learning_rate": 1.9816716152132897e-05, "loss": 0.552, "step": 1312 }, { "epoch": 0.15978095527836933, "grad_norm": 0.6407625675201416, "learning_rate": 1.9816344201573895e-05, "loss": 0.4616, "step": 1313 }, { "epoch": 0.15990264679038638, "grad_norm": 0.6401371359825134, "learning_rate": 1.9815971877482676e-05, "loss": 0.4627, "step": 1314 }, { "epoch": 0.1600243383024034, "grad_norm": 1.0625510215759277, "learning_rate": 1.9815599179873407e-05, "loss": 0.4909, "step": 1315 }, { "epoch": 0.16014602981442044, "grad_norm": 1.3133800029754639, "learning_rate": 1.981522610876027e-05, "loss": 0.4868, "step": 1316 }, { "epoch": 0.1602677213264375, "grad_norm": 0.6777065396308899, "learning_rate": 1.981485266415747e-05, "loss": 0.5052, "step": 1317 }, { "epoch": 0.1603894128384545, "grad_norm": 1.8892720937728882, "learning_rate": 1.9814478846079206e-05, "loss": 0.4298, "step": 1318 }, { "epoch": 0.16051110435047156, "grad_norm": 2.186417818069458, "learning_rate": 1.981410465453971e-05, "loss": 0.4795, "step": 1319 }, { "epoch": 0.1606327958624886, "grad_norm": 2.658979892730713, "learning_rate": 1.9813730089553217e-05, "loss": 0.5155, "step": 1320 }, { "epoch": 0.16075448737450562, "grad_norm": 1.0589911937713623, "learning_rate": 1.9813355151133977e-05, "loss": 0.4451, "step": 1321 }, { "epoch": 0.16087617888652267, "grad_norm": 1.619744062423706, "learning_rate": 1.9812979839296264e-05, "loss": 0.4959, "step": 1322 }, { "epoch": 0.1609978703985397, "grad_norm": 0.577512800693512, "learning_rate": 1.981260415405436e-05, "loss": 0.4367, "step": 1323 }, { "epoch": 0.16111956191055674, "grad_norm": 3.114175319671631, "learning_rate": 1.981222809542255e-05, "loss": 0.5297, "step": 1324 }, { "epoch": 0.16124125342257378, "grad_norm": 0.6847550868988037, "learning_rate": 1.9811851663415153e-05, "loss": 0.5072, "step": 1325 }, { "epoch": 0.1613629449345908, "grad_norm": 0.6069439649581909, "learning_rate": 1.981147485804649e-05, "loss": 0.5412, "step": 1326 }, { "epoch": 0.16148463644660785, "grad_norm": 2.925081729888916, "learning_rate": 1.9811097679330902e-05, "loss": 0.48, "step": 1327 }, { "epoch": 0.1616063279586249, "grad_norm": 4.44187068939209, "learning_rate": 1.981072012728274e-05, "loss": 0.4733, "step": 1328 }, { "epoch": 0.16172801947064191, "grad_norm": 3.3925576210021973, "learning_rate": 1.981034220191637e-05, "loss": 0.4853, "step": 1329 }, { "epoch": 0.16184971098265896, "grad_norm": 3.8604345321655273, "learning_rate": 1.980996390324617e-05, "loss": 0.439, "step": 1330 }, { "epoch": 0.161971402494676, "grad_norm": 0.7895316481590271, "learning_rate": 1.980958523128654e-05, "loss": 0.4518, "step": 1331 }, { "epoch": 0.16209309400669303, "grad_norm": 2.4462637901306152, "learning_rate": 1.9809206186051888e-05, "loss": 0.3835, "step": 1332 }, { "epoch": 0.16221478551871008, "grad_norm": 5.019735336303711, "learning_rate": 1.9808826767556636e-05, "loss": 0.5215, "step": 1333 }, { "epoch": 0.1623364770307271, "grad_norm": 1.9361854791641235, "learning_rate": 1.9808446975815224e-05, "loss": 0.4501, "step": 1334 }, { "epoch": 0.16245816854274414, "grad_norm": 2.8780288696289062, "learning_rate": 1.98080668108421e-05, "loss": 0.4343, "step": 1335 }, { "epoch": 0.1625798600547612, "grad_norm": 3.8520593643188477, "learning_rate": 1.9807686272651732e-05, "loss": 0.5221, "step": 1336 }, { "epoch": 0.1627015515667782, "grad_norm": 3.5907115936279297, "learning_rate": 1.9807305361258603e-05, "loss": 0.4857, "step": 1337 }, { "epoch": 0.16282324307879525, "grad_norm": 0.6669024229049683, "learning_rate": 1.98069240766772e-05, "loss": 0.4889, "step": 1338 }, { "epoch": 0.1629449345908123, "grad_norm": 1.2091970443725586, "learning_rate": 1.980654241892204e-05, "loss": 0.5279, "step": 1339 }, { "epoch": 0.16306662610282932, "grad_norm": 2.0911097526550293, "learning_rate": 1.9806160388007644e-05, "loss": 0.5004, "step": 1340 }, { "epoch": 0.16318831761484637, "grad_norm": 2.878972053527832, "learning_rate": 1.9805777983948545e-05, "loss": 0.5446, "step": 1341 }, { "epoch": 0.16331000912686341, "grad_norm": 4.621723175048828, "learning_rate": 1.98053952067593e-05, "loss": 0.5321, "step": 1342 }, { "epoch": 0.16343170063888043, "grad_norm": 6.323509216308594, "learning_rate": 1.980501205645447e-05, "loss": 0.4841, "step": 1343 }, { "epoch": 0.16355339215089748, "grad_norm": 4.261968612670898, "learning_rate": 1.9804628533048635e-05, "loss": 0.4904, "step": 1344 }, { "epoch": 0.1636750836629145, "grad_norm": 1.5866800546646118, "learning_rate": 1.980424463655639e-05, "loss": 0.516, "step": 1345 }, { "epoch": 0.16379677517493155, "grad_norm": 1.8813953399658203, "learning_rate": 1.980386036699234e-05, "loss": 0.4551, "step": 1346 }, { "epoch": 0.1639184666869486, "grad_norm": 1.9495863914489746, "learning_rate": 1.9803475724371114e-05, "loss": 0.4733, "step": 1347 }, { "epoch": 0.1640401581989656, "grad_norm": 5.486594200134277, "learning_rate": 1.9803090708707344e-05, "loss": 0.5705, "step": 1348 }, { "epoch": 0.16416184971098266, "grad_norm": 4.218322277069092, "learning_rate": 1.980270532001568e-05, "loss": 0.5275, "step": 1349 }, { "epoch": 0.1642835412229997, "grad_norm": 2.9367752075195312, "learning_rate": 1.9802319558310793e-05, "loss": 0.4456, "step": 1350 }, { "epoch": 0.16440523273501673, "grad_norm": 3.1779539585113525, "learning_rate": 1.980193342360735e-05, "loss": 0.4785, "step": 1351 }, { "epoch": 0.16452692424703377, "grad_norm": 3.3056774139404297, "learning_rate": 1.9801546915920056e-05, "loss": 0.5167, "step": 1352 }, { "epoch": 0.1646486157590508, "grad_norm": 1.3719897270202637, "learning_rate": 1.9801160035263615e-05, "loss": 0.43, "step": 1353 }, { "epoch": 0.16477030727106784, "grad_norm": 2.068129539489746, "learning_rate": 1.9800772781652743e-05, "loss": 0.4732, "step": 1354 }, { "epoch": 0.1648919987830849, "grad_norm": 2.7401654720306396, "learning_rate": 1.980038515510218e-05, "loss": 0.4272, "step": 1355 }, { "epoch": 0.1650136902951019, "grad_norm": 2.1367151737213135, "learning_rate": 1.9799997155626677e-05, "loss": 0.4377, "step": 1356 }, { "epoch": 0.16513538180711895, "grad_norm": 2.0781209468841553, "learning_rate": 1.9799608783241e-05, "loss": 0.5002, "step": 1357 }, { "epoch": 0.165257073319136, "grad_norm": 2.80288028717041, "learning_rate": 1.9799220037959924e-05, "loss": 0.4984, "step": 1358 }, { "epoch": 0.16537876483115302, "grad_norm": 2.189610719680786, "learning_rate": 1.979883091979824e-05, "loss": 0.5228, "step": 1359 }, { "epoch": 0.16550045634317007, "grad_norm": 3.734894037246704, "learning_rate": 1.979844142877076e-05, "loss": 0.4173, "step": 1360 }, { "epoch": 0.1656221478551871, "grad_norm": 1.6671816110610962, "learning_rate": 1.97980515648923e-05, "loss": 0.4727, "step": 1361 }, { "epoch": 0.16574383936720413, "grad_norm": 0.8954133987426758, "learning_rate": 1.9797661328177696e-05, "loss": 0.4993, "step": 1362 }, { "epoch": 0.16586553087922118, "grad_norm": 0.8533859848976135, "learning_rate": 1.9797270718641803e-05, "loss": 0.4749, "step": 1363 }, { "epoch": 0.1659872223912382, "grad_norm": 2.5973927974700928, "learning_rate": 1.9796879736299476e-05, "loss": 0.549, "step": 1364 }, { "epoch": 0.16610891390325525, "grad_norm": 3.560835838317871, "learning_rate": 1.9796488381165595e-05, "loss": 0.3948, "step": 1365 }, { "epoch": 0.1662306054152723, "grad_norm": 1.5512914657592773, "learning_rate": 1.9796096653255056e-05, "loss": 0.4799, "step": 1366 }, { "epoch": 0.1663522969272893, "grad_norm": 0.5944809913635254, "learning_rate": 1.9795704552582765e-05, "loss": 0.4685, "step": 1367 }, { "epoch": 0.16647398843930636, "grad_norm": 0.7071087956428528, "learning_rate": 1.9795312079163634e-05, "loss": 0.4407, "step": 1368 }, { "epoch": 0.1665956799513234, "grad_norm": 1.7477627992630005, "learning_rate": 1.979491923301261e-05, "loss": 0.4873, "step": 1369 }, { "epoch": 0.16671737146334042, "grad_norm": 2.589707136154175, "learning_rate": 1.979452601414463e-05, "loss": 0.5397, "step": 1370 }, { "epoch": 0.16683906297535747, "grad_norm": 1.9456111192703247, "learning_rate": 1.9794132422574663e-05, "loss": 0.4219, "step": 1371 }, { "epoch": 0.16696075448737452, "grad_norm": 0.8413524627685547, "learning_rate": 1.9793738458317683e-05, "loss": 0.4416, "step": 1372 }, { "epoch": 0.16708244599939154, "grad_norm": 1.5911568403244019, "learning_rate": 1.9793344121388687e-05, "loss": 0.4726, "step": 1373 }, { "epoch": 0.16720413751140858, "grad_norm": 1.8373585939407349, "learning_rate": 1.979294941180267e-05, "loss": 0.5415, "step": 1374 }, { "epoch": 0.1673258290234256, "grad_norm": 0.7813757658004761, "learning_rate": 1.9792554329574663e-05, "loss": 0.4752, "step": 1375 }, { "epoch": 0.16744752053544265, "grad_norm": 3.577542781829834, "learning_rate": 1.979215887471969e-05, "loss": 0.4165, "step": 1376 }, { "epoch": 0.1675692120474597, "grad_norm": 0.7695497870445251, "learning_rate": 1.9791763047252805e-05, "loss": 0.475, "step": 1377 }, { "epoch": 0.16769090355947672, "grad_norm": 0.779041588306427, "learning_rate": 1.979136684718907e-05, "loss": 0.4779, "step": 1378 }, { "epoch": 0.16781259507149376, "grad_norm": 0.7756707072257996, "learning_rate": 1.9790970274543557e-05, "loss": 0.4676, "step": 1379 }, { "epoch": 0.1679342865835108, "grad_norm": 0.7839758992195129, "learning_rate": 1.979057332933136e-05, "loss": 0.4524, "step": 1380 }, { "epoch": 0.16805597809552783, "grad_norm": 0.6166628003120422, "learning_rate": 1.9790176011567583e-05, "loss": 0.4534, "step": 1381 }, { "epoch": 0.16817766960754488, "grad_norm": 0.7479401230812073, "learning_rate": 1.9789778321267343e-05, "loss": 0.4626, "step": 1382 }, { "epoch": 0.16829936111956192, "grad_norm": 0.7742567658424377, "learning_rate": 1.9789380258445777e-05, "loss": 0.4857, "step": 1383 }, { "epoch": 0.16842105263157894, "grad_norm": 0.9864305853843689, "learning_rate": 1.9788981823118027e-05, "loss": 0.5222, "step": 1384 }, { "epoch": 0.168542744143596, "grad_norm": 0.8407061100006104, "learning_rate": 1.9788583015299254e-05, "loss": 0.4125, "step": 1385 }, { "epoch": 0.168664435655613, "grad_norm": 4.476330280303955, "learning_rate": 1.978818383500464e-05, "loss": 0.5687, "step": 1386 }, { "epoch": 0.16878612716763006, "grad_norm": 0.6254599094390869, "learning_rate": 1.9787784282249366e-05, "loss": 0.5055, "step": 1387 }, { "epoch": 0.1689078186796471, "grad_norm": 4.242245674133301, "learning_rate": 1.9787384357048645e-05, "loss": 0.408, "step": 1388 }, { "epoch": 0.16902951019166412, "grad_norm": 0.7404464483261108, "learning_rate": 1.978698405941769e-05, "loss": 0.5097, "step": 1389 }, { "epoch": 0.16915120170368117, "grad_norm": 3.682858467102051, "learning_rate": 1.9786583389371733e-05, "loss": 0.4462, "step": 1390 }, { "epoch": 0.16927289321569822, "grad_norm": 0.7396299839019775, "learning_rate": 1.978618234692602e-05, "loss": 0.461, "step": 1391 }, { "epoch": 0.16939458472771524, "grad_norm": 1.486547827720642, "learning_rate": 1.9785780932095812e-05, "loss": 0.4913, "step": 1392 }, { "epoch": 0.16951627623973228, "grad_norm": 2.3887839317321777, "learning_rate": 1.9785379144896385e-05, "loss": 0.4833, "step": 1393 }, { "epoch": 0.1696379677517493, "grad_norm": 3.684333324432373, "learning_rate": 1.9784976985343028e-05, "loss": 0.5747, "step": 1394 }, { "epoch": 0.16975965926376635, "grad_norm": 1.318081259727478, "learning_rate": 1.978457445345104e-05, "loss": 0.5194, "step": 1395 }, { "epoch": 0.1698813507757834, "grad_norm": 1.0038377046585083, "learning_rate": 1.9784171549235743e-05, "loss": 0.49, "step": 1396 }, { "epoch": 0.17000304228780042, "grad_norm": 1.3077162504196167, "learning_rate": 1.9783768272712467e-05, "loss": 0.4569, "step": 1397 }, { "epoch": 0.17012473379981746, "grad_norm": 4.728083610534668, "learning_rate": 1.9783364623896557e-05, "loss": 0.4363, "step": 1398 }, { "epoch": 0.1702464253118345, "grad_norm": 0.90293288230896, "learning_rate": 1.978296060280337e-05, "loss": 0.5272, "step": 1399 }, { "epoch": 0.17036811682385153, "grad_norm": 0.9383813738822937, "learning_rate": 1.9782556209448285e-05, "loss": 0.484, "step": 1400 }, { "epoch": 0.17048980833586858, "grad_norm": 1.6882658004760742, "learning_rate": 1.978215144384669e-05, "loss": 0.4673, "step": 1401 }, { "epoch": 0.17061149984788562, "grad_norm": 1.3856250047683716, "learning_rate": 1.978174630601398e-05, "loss": 0.4766, "step": 1402 }, { "epoch": 0.17073319135990264, "grad_norm": 1.3125702142715454, "learning_rate": 1.9781340795965577e-05, "loss": 0.4922, "step": 1403 }, { "epoch": 0.1708548828719197, "grad_norm": 2.7786688804626465, "learning_rate": 1.978093491371691e-05, "loss": 0.4305, "step": 1404 }, { "epoch": 0.1709765743839367, "grad_norm": 1.0468382835388184, "learning_rate": 1.9780528659283424e-05, "loss": 0.494, "step": 1405 }, { "epoch": 0.17109826589595376, "grad_norm": 0.6543203592300415, "learning_rate": 1.9780122032680578e-05, "loss": 0.4446, "step": 1406 }, { "epoch": 0.1712199574079708, "grad_norm": 2.7950873374938965, "learning_rate": 1.9779715033923846e-05, "loss": 0.5344, "step": 1407 }, { "epoch": 0.17134164891998782, "grad_norm": 1.3265191316604614, "learning_rate": 1.977930766302872e-05, "loss": 0.5046, "step": 1408 }, { "epoch": 0.17146334043200487, "grad_norm": 2.336111068725586, "learning_rate": 1.9778899920010682e-05, "loss": 0.5123, "step": 1409 }, { "epoch": 0.17158503194402192, "grad_norm": 2.7772269248962402, "learning_rate": 1.977849180488527e-05, "loss": 0.4722, "step": 1410 }, { "epoch": 0.17170672345603893, "grad_norm": 0.832023561000824, "learning_rate": 1.9778083317668004e-05, "loss": 0.5053, "step": 1411 }, { "epoch": 0.17182841496805598, "grad_norm": 2.150932550430298, "learning_rate": 1.9777674458374428e-05, "loss": 0.4809, "step": 1412 }, { "epoch": 0.17195010648007303, "grad_norm": 3.3440325260162354, "learning_rate": 1.9777265227020096e-05, "loss": 0.4613, "step": 1413 }, { "epoch": 0.17207179799209005, "grad_norm": 3.8201725482940674, "learning_rate": 1.9776855623620588e-05, "loss": 0.4687, "step": 1414 }, { "epoch": 0.1721934895041071, "grad_norm": 0.9091955423355103, "learning_rate": 1.9776445648191488e-05, "loss": 0.5033, "step": 1415 }, { "epoch": 0.1723151810161241, "grad_norm": 0.576927125453949, "learning_rate": 1.977603530074839e-05, "loss": 0.4989, "step": 1416 }, { "epoch": 0.17243687252814116, "grad_norm": 0.6666160821914673, "learning_rate": 1.9775624581306917e-05, "loss": 0.4638, "step": 1417 }, { "epoch": 0.1725585640401582, "grad_norm": 0.6030458211898804, "learning_rate": 1.9775213489882693e-05, "loss": 0.4479, "step": 1418 }, { "epoch": 0.17268025555217523, "grad_norm": 2.0674116611480713, "learning_rate": 1.9774802026491363e-05, "loss": 0.4862, "step": 1419 }, { "epoch": 0.17280194706419227, "grad_norm": 0.6450443863868713, "learning_rate": 1.977439019114858e-05, "loss": 0.4664, "step": 1420 }, { "epoch": 0.17292363857620932, "grad_norm": 0.700458824634552, "learning_rate": 1.9773977983870023e-05, "loss": 0.452, "step": 1421 }, { "epoch": 0.17304533008822634, "grad_norm": 1.8581122159957886, "learning_rate": 1.9773565404671374e-05, "loss": 0.4482, "step": 1422 }, { "epoch": 0.1731670216002434, "grad_norm": 1.016422986984253, "learning_rate": 1.9773152453568326e-05, "loss": 0.3997, "step": 1423 }, { "epoch": 0.1732887131122604, "grad_norm": 1.2067203521728516, "learning_rate": 1.9772739130576598e-05, "loss": 0.4319, "step": 1424 }, { "epoch": 0.17341040462427745, "grad_norm": 2.8970839977264404, "learning_rate": 1.977232543571192e-05, "loss": 0.4704, "step": 1425 }, { "epoch": 0.1735320961362945, "grad_norm": 2.0348024368286133, "learning_rate": 1.9771911368990032e-05, "loss": 0.4478, "step": 1426 }, { "epoch": 0.17365378764831152, "grad_norm": 1.62767493724823, "learning_rate": 1.977149693042669e-05, "loss": 0.4721, "step": 1427 }, { "epoch": 0.17377547916032857, "grad_norm": 2.4103634357452393, "learning_rate": 1.9771082120037662e-05, "loss": 0.4777, "step": 1428 }, { "epoch": 0.1738971706723456, "grad_norm": 1.2694464921951294, "learning_rate": 1.9770666937838736e-05, "loss": 0.4837, "step": 1429 }, { "epoch": 0.17401886218436263, "grad_norm": 0.7212273478507996, "learning_rate": 1.9770251383845705e-05, "loss": 0.4851, "step": 1430 }, { "epoch": 0.17414055369637968, "grad_norm": 1.1466623544692993, "learning_rate": 1.976983545807439e-05, "loss": 0.5138, "step": 1431 }, { "epoch": 0.17426224520839673, "grad_norm": 4.840686321258545, "learning_rate": 1.976941916054061e-05, "loss": 0.4672, "step": 1432 }, { "epoch": 0.17438393672041375, "grad_norm": 2.6939663887023926, "learning_rate": 1.9769002491260213e-05, "loss": 0.4899, "step": 1433 }, { "epoch": 0.1745056282324308, "grad_norm": 1.769325613975525, "learning_rate": 1.976858545024905e-05, "loss": 0.5061, "step": 1434 }, { "epoch": 0.1746273197444478, "grad_norm": 2.664933681488037, "learning_rate": 1.9768168037522984e-05, "loss": 0.4518, "step": 1435 }, { "epoch": 0.17474901125646486, "grad_norm": 1.2224141359329224, "learning_rate": 1.9767750253097914e-05, "loss": 0.4823, "step": 1436 }, { "epoch": 0.1748707027684819, "grad_norm": 1.0934165716171265, "learning_rate": 1.9767332096989725e-05, "loss": 0.4899, "step": 1437 }, { "epoch": 0.17499239428049893, "grad_norm": 4.517724990844727, "learning_rate": 1.976691356921433e-05, "loss": 0.5438, "step": 1438 }, { "epoch": 0.17511408579251597, "grad_norm": 1.3791165351867676, "learning_rate": 1.9766494669787662e-05, "loss": 0.4578, "step": 1439 }, { "epoch": 0.17523577730453302, "grad_norm": 1.3613044023513794, "learning_rate": 1.9766075398725654e-05, "loss": 0.5075, "step": 1440 }, { "epoch": 0.17535746881655004, "grad_norm": 1.1534603834152222, "learning_rate": 1.976565575604426e-05, "loss": 0.465, "step": 1441 }, { "epoch": 0.17547916032856709, "grad_norm": 2.8964574337005615, "learning_rate": 1.9765235741759455e-05, "loss": 0.4515, "step": 1442 }, { "epoch": 0.17560085184058413, "grad_norm": 1.6747967004776, "learning_rate": 1.9764815355887217e-05, "loss": 0.499, "step": 1443 }, { "epoch": 0.17572254335260115, "grad_norm": 2.8970320224761963, "learning_rate": 1.976439459844354e-05, "loss": 0.4272, "step": 1444 }, { "epoch": 0.1758442348646182, "grad_norm": 2.2143208980560303, "learning_rate": 1.976397346944444e-05, "loss": 0.4318, "step": 1445 }, { "epoch": 0.17596592637663522, "grad_norm": 2.0721049308776855, "learning_rate": 1.9763551968905936e-05, "loss": 0.4664, "step": 1446 }, { "epoch": 0.17608761788865226, "grad_norm": 3.9072515964508057, "learning_rate": 1.976313009684407e-05, "loss": 0.5073, "step": 1447 }, { "epoch": 0.1762093094006693, "grad_norm": 1.4120219945907593, "learning_rate": 1.9762707853274898e-05, "loss": 0.4753, "step": 1448 }, { "epoch": 0.17633100091268633, "grad_norm": 1.265520691871643, "learning_rate": 1.9762285238214485e-05, "loss": 0.4801, "step": 1449 }, { "epoch": 0.17645269242470338, "grad_norm": 0.5826008915901184, "learning_rate": 1.976186225167891e-05, "loss": 0.4543, "step": 1450 }, { "epoch": 0.17657438393672042, "grad_norm": 0.7945842146873474, "learning_rate": 1.976143889368427e-05, "loss": 0.4836, "step": 1451 }, { "epoch": 0.17669607544873744, "grad_norm": 1.0744837522506714, "learning_rate": 1.9761015164246675e-05, "loss": 0.5164, "step": 1452 }, { "epoch": 0.1768177669607545, "grad_norm": 0.738902747631073, "learning_rate": 1.976059106338225e-05, "loss": 0.5643, "step": 1453 }, { "epoch": 0.17693945847277154, "grad_norm": 3.688669204711914, "learning_rate": 1.976016659110713e-05, "loss": 0.5167, "step": 1454 }, { "epoch": 0.17706114998478856, "grad_norm": 8.816498756408691, "learning_rate": 1.975974174743747e-05, "loss": 0.4466, "step": 1455 }, { "epoch": 0.1771828414968056, "grad_norm": 6.204555034637451, "learning_rate": 1.9759316532389436e-05, "loss": 0.4853, "step": 1456 }, { "epoch": 0.17730453300882262, "grad_norm": 3.5571959018707275, "learning_rate": 1.9758890945979207e-05, "loss": 0.521, "step": 1457 }, { "epoch": 0.17742622452083967, "grad_norm": 0.8225950002670288, "learning_rate": 1.9758464988222977e-05, "loss": 0.5098, "step": 1458 }, { "epoch": 0.17754791603285672, "grad_norm": 1.2101472616195679, "learning_rate": 1.9758038659136953e-05, "loss": 0.5098, "step": 1459 }, { "epoch": 0.17766960754487374, "grad_norm": 0.677570641040802, "learning_rate": 1.9757611958737364e-05, "loss": 0.4861, "step": 1460 }, { "epoch": 0.17779129905689078, "grad_norm": 0.9192119836807251, "learning_rate": 1.975718488704044e-05, "loss": 0.4325, "step": 1461 }, { "epoch": 0.17791299056890783, "grad_norm": 5.186525344848633, "learning_rate": 1.9756757444062436e-05, "loss": 0.5557, "step": 1462 }, { "epoch": 0.17803468208092485, "grad_norm": 5.008727073669434, "learning_rate": 1.9756329629819613e-05, "loss": 0.5383, "step": 1463 }, { "epoch": 0.1781563735929419, "grad_norm": 4.002062797546387, "learning_rate": 1.9755901444328257e-05, "loss": 0.4975, "step": 1464 }, { "epoch": 0.17827806510495892, "grad_norm": 2.736166477203369, "learning_rate": 1.9755472887604657e-05, "loss": 0.4873, "step": 1465 }, { "epoch": 0.17839975661697596, "grad_norm": 1.2713899612426758, "learning_rate": 1.975504395966512e-05, "loss": 0.4485, "step": 1466 }, { "epoch": 0.178521448128993, "grad_norm": 0.8007040023803711, "learning_rate": 1.975461466052597e-05, "loss": 0.4454, "step": 1467 }, { "epoch": 0.17864313964101003, "grad_norm": 1.5794107913970947, "learning_rate": 1.975418499020354e-05, "loss": 0.4887, "step": 1468 }, { "epoch": 0.17876483115302708, "grad_norm": 1.300399899482727, "learning_rate": 1.975375494871418e-05, "loss": 0.4707, "step": 1469 }, { "epoch": 0.17888652266504412, "grad_norm": 0.7974724769592285, "learning_rate": 1.9753324536074255e-05, "loss": 0.5093, "step": 1470 }, { "epoch": 0.17900821417706114, "grad_norm": 3.367178201675415, "learning_rate": 1.9752893752300145e-05, "loss": 0.441, "step": 1471 }, { "epoch": 0.1791299056890782, "grad_norm": 0.9021749496459961, "learning_rate": 1.975246259740824e-05, "loss": 0.4766, "step": 1472 }, { "epoch": 0.17925159720109524, "grad_norm": 0.5949093103408813, "learning_rate": 1.9752031071414945e-05, "loss": 0.4384, "step": 1473 }, { "epoch": 0.17937328871311226, "grad_norm": 1.8567700386047363, "learning_rate": 1.9751599174336683e-05, "loss": 0.4416, "step": 1474 }, { "epoch": 0.1794949802251293, "grad_norm": 3.348370313644409, "learning_rate": 1.9751166906189886e-05, "loss": 0.5317, "step": 1475 }, { "epoch": 0.17961667173714632, "grad_norm": 4.627408504486084, "learning_rate": 1.9750734266991006e-05, "loss": 0.5239, "step": 1476 }, { "epoch": 0.17973836324916337, "grad_norm": 4.465948104858398, "learning_rate": 1.975030125675651e-05, "loss": 0.5472, "step": 1477 }, { "epoch": 0.17986005476118042, "grad_norm": 1.0233813524246216, "learning_rate": 1.9749867875502858e-05, "loss": 0.4415, "step": 1478 }, { "epoch": 0.17998174627319743, "grad_norm": 5.391047954559326, "learning_rate": 1.974943412324656e-05, "loss": 0.4094, "step": 1479 }, { "epoch": 0.18010343778521448, "grad_norm": 1.2071317434310913, "learning_rate": 1.9749000000004113e-05, "loss": 0.5348, "step": 1480 }, { "epoch": 0.18022512929723153, "grad_norm": 1.624406337738037, "learning_rate": 1.9748565505792033e-05, "loss": 0.5067, "step": 1481 }, { "epoch": 0.18034682080924855, "grad_norm": 1.272099494934082, "learning_rate": 1.9748130640626858e-05, "loss": 0.5282, "step": 1482 }, { "epoch": 0.1804685123212656, "grad_norm": 3.5889365673065186, "learning_rate": 1.9747695404525138e-05, "loss": 0.4702, "step": 1483 }, { "epoch": 0.18059020383328264, "grad_norm": 4.6624956130981445, "learning_rate": 1.9747259797503425e-05, "loss": 0.4418, "step": 1484 }, { "epoch": 0.18071189534529966, "grad_norm": 1.1972137689590454, "learning_rate": 1.974682381957831e-05, "loss": 0.5065, "step": 1485 }, { "epoch": 0.1808335868573167, "grad_norm": 0.8423951268196106, "learning_rate": 1.9746387470766366e-05, "loss": 0.4769, "step": 1486 }, { "epoch": 0.18095527836933373, "grad_norm": 1.6587998867034912, "learning_rate": 1.974595075108421e-05, "loss": 0.4039, "step": 1487 }, { "epoch": 0.18107696988135077, "grad_norm": 2.4894673824310303, "learning_rate": 1.974551366054845e-05, "loss": 0.5075, "step": 1488 }, { "epoch": 0.18119866139336782, "grad_norm": 1.9014842510223389, "learning_rate": 1.9745076199175724e-05, "loss": 0.3931, "step": 1489 }, { "epoch": 0.18132035290538484, "grad_norm": 2.464437246322632, "learning_rate": 1.9744638366982684e-05, "loss": 0.4796, "step": 1490 }, { "epoch": 0.1814420444174019, "grad_norm": 6.043072700500488, "learning_rate": 1.9744200163985975e-05, "loss": 0.5681, "step": 1491 }, { "epoch": 0.18156373592941893, "grad_norm": 1.4362983703613281, "learning_rate": 1.9743761590202287e-05, "loss": 0.3742, "step": 1492 }, { "epoch": 0.18168542744143595, "grad_norm": 1.9414125680923462, "learning_rate": 1.9743322645648297e-05, "loss": 0.4798, "step": 1493 }, { "epoch": 0.181807118953453, "grad_norm": 1.6063414812088013, "learning_rate": 1.9742883330340715e-05, "loss": 0.3964, "step": 1494 }, { "epoch": 0.18192881046547002, "grad_norm": 1.3805633783340454, "learning_rate": 1.9742443644296255e-05, "loss": 0.4396, "step": 1495 }, { "epoch": 0.18205050197748707, "grad_norm": 2.264557361602783, "learning_rate": 1.974200358753165e-05, "loss": 0.4916, "step": 1496 }, { "epoch": 0.18217219348950411, "grad_norm": 2.068603038787842, "learning_rate": 1.9741563160063643e-05, "loss": 0.4703, "step": 1497 }, { "epoch": 0.18229388500152113, "grad_norm": 1.1746764183044434, "learning_rate": 1.9741122361908992e-05, "loss": 0.5142, "step": 1498 }, { "epoch": 0.18241557651353818, "grad_norm": 1.3434480428695679, "learning_rate": 1.9740681193084478e-05, "loss": 0.536, "step": 1499 }, { "epoch": 0.18253726802555523, "grad_norm": 1.5712302923202515, "learning_rate": 1.9740239653606876e-05, "loss": 0.546, "step": 1500 }, { "epoch": 0.18265895953757225, "grad_norm": 5.305239200592041, "learning_rate": 1.9739797743492997e-05, "loss": 0.4613, "step": 1501 }, { "epoch": 0.1827806510495893, "grad_norm": 1.1593258380889893, "learning_rate": 1.9739355462759652e-05, "loss": 0.5387, "step": 1502 }, { "epoch": 0.18290234256160634, "grad_norm": 0.6386822462081909, "learning_rate": 1.9738912811423673e-05, "loss": 0.5373, "step": 1503 }, { "epoch": 0.18302403407362336, "grad_norm": 2.554844617843628, "learning_rate": 1.9738469789501903e-05, "loss": 0.4694, "step": 1504 }, { "epoch": 0.1831457255856404, "grad_norm": 0.9370599389076233, "learning_rate": 1.97380263970112e-05, "loss": 0.5007, "step": 1505 }, { "epoch": 0.18326741709765743, "grad_norm": 0.9100454449653625, "learning_rate": 1.9737582633968436e-05, "loss": 0.4997, "step": 1506 }, { "epoch": 0.18338910860967447, "grad_norm": 1.140694499015808, "learning_rate": 1.97371385003905e-05, "loss": 0.4223, "step": 1507 }, { "epoch": 0.18351080012169152, "grad_norm": 1.4786837100982666, "learning_rate": 1.9736693996294286e-05, "loss": 0.4445, "step": 1508 }, { "epoch": 0.18363249163370854, "grad_norm": 3.574382781982422, "learning_rate": 1.973624912169671e-05, "loss": 0.4964, "step": 1509 }, { "epoch": 0.18375418314572559, "grad_norm": 2.0820257663726807, "learning_rate": 1.9735803876614705e-05, "loss": 0.4301, "step": 1510 }, { "epoch": 0.18387587465774263, "grad_norm": 2.8352251052856445, "learning_rate": 1.973535826106521e-05, "loss": 0.4673, "step": 1511 }, { "epoch": 0.18399756616975965, "grad_norm": 1.9511176347732544, "learning_rate": 1.9734912275065184e-05, "loss": 0.473, "step": 1512 }, { "epoch": 0.1841192576817767, "grad_norm": 1.2392737865447998, "learning_rate": 1.9734465918631592e-05, "loss": 0.4755, "step": 1513 }, { "epoch": 0.18424094919379375, "grad_norm": 1.0681768655776978, "learning_rate": 1.9734019191781425e-05, "loss": 0.4941, "step": 1514 }, { "epoch": 0.18436264070581077, "grad_norm": 4.178776741027832, "learning_rate": 1.9733572094531676e-05, "loss": 0.4713, "step": 1515 }, { "epoch": 0.1844843322178278, "grad_norm": 4.6374311447143555, "learning_rate": 1.9733124626899365e-05, "loss": 0.4569, "step": 1516 }, { "epoch": 0.18460602372984483, "grad_norm": 4.767979621887207, "learning_rate": 1.9732676788901512e-05, "loss": 0.516, "step": 1517 }, { "epoch": 0.18472771524186188, "grad_norm": 4.664592266082764, "learning_rate": 1.9732228580555166e-05, "loss": 0.5147, "step": 1518 }, { "epoch": 0.18484940675387893, "grad_norm": 2.1037893295288086, "learning_rate": 1.9731780001877376e-05, "loss": 0.5286, "step": 1519 }, { "epoch": 0.18497109826589594, "grad_norm": 3.0732429027557373, "learning_rate": 1.9731331052885214e-05, "loss": 0.4729, "step": 1520 }, { "epoch": 0.185092789777913, "grad_norm": 0.7659862041473389, "learning_rate": 1.973088173359576e-05, "loss": 0.4441, "step": 1521 }, { "epoch": 0.18521448128993004, "grad_norm": 3.8449151515960693, "learning_rate": 1.9730432044026117e-05, "loss": 0.5271, "step": 1522 }, { "epoch": 0.18533617280194706, "grad_norm": 2.932002544403076, "learning_rate": 1.9729981984193393e-05, "loss": 0.4535, "step": 1523 }, { "epoch": 0.1854578643139641, "grad_norm": 4.661848068237305, "learning_rate": 1.9729531554114713e-05, "loss": 0.5529, "step": 1524 }, { "epoch": 0.18557955582598112, "grad_norm": 3.4412319660186768, "learning_rate": 1.9729080753807223e-05, "loss": 0.5123, "step": 1525 }, { "epoch": 0.18570124733799817, "grad_norm": 2.6023199558258057, "learning_rate": 1.972862958328807e-05, "loss": 0.4922, "step": 1526 }, { "epoch": 0.18582293885001522, "grad_norm": 1.1642584800720215, "learning_rate": 1.9728178042574422e-05, "loss": 0.4206, "step": 1527 }, { "epoch": 0.18594463036203224, "grad_norm": 1.6560192108154297, "learning_rate": 1.9727726131683464e-05, "loss": 0.4345, "step": 1528 }, { "epoch": 0.18606632187404928, "grad_norm": 1.3707681894302368, "learning_rate": 1.9727273850632393e-05, "loss": 0.5126, "step": 1529 }, { "epoch": 0.18618801338606633, "grad_norm": 1.8400931358337402, "learning_rate": 1.9726821199438417e-05, "loss": 0.5129, "step": 1530 }, { "epoch": 0.18630970489808335, "grad_norm": 4.784855365753174, "learning_rate": 1.9726368178118758e-05, "loss": 0.4349, "step": 1531 }, { "epoch": 0.1864313964101004, "grad_norm": 1.6330609321594238, "learning_rate": 1.972591478669066e-05, "loss": 0.5038, "step": 1532 }, { "epoch": 0.18655308792211744, "grad_norm": 2.845757246017456, "learning_rate": 1.9725461025171372e-05, "loss": 0.4627, "step": 1533 }, { "epoch": 0.18667477943413446, "grad_norm": 2.9922804832458496, "learning_rate": 1.9725006893578165e-05, "loss": 0.4335, "step": 1534 }, { "epoch": 0.1867964709461515, "grad_norm": 1.7592154741287231, "learning_rate": 1.972455239192831e-05, "loss": 0.4252, "step": 1535 }, { "epoch": 0.18691816245816853, "grad_norm": 2.723053216934204, "learning_rate": 1.9724097520239113e-05, "loss": 0.4712, "step": 1536 }, { "epoch": 0.18703985397018558, "grad_norm": 2.3298447132110596, "learning_rate": 1.9723642278527876e-05, "loss": 0.4578, "step": 1537 }, { "epoch": 0.18716154548220262, "grad_norm": 5.630123138427734, "learning_rate": 1.9723186666811922e-05, "loss": 0.5456, "step": 1538 }, { "epoch": 0.18728323699421964, "grad_norm": 4.125349998474121, "learning_rate": 1.9722730685108588e-05, "loss": 0.4988, "step": 1539 }, { "epoch": 0.1874049285062367, "grad_norm": 1.7700765132904053, "learning_rate": 1.9722274333435233e-05, "loss": 0.4519, "step": 1540 }, { "epoch": 0.18752662001825374, "grad_norm": 1.5369956493377686, "learning_rate": 1.9721817611809212e-05, "loss": 0.49, "step": 1541 }, { "epoch": 0.18764831153027076, "grad_norm": 0.5952844619750977, "learning_rate": 1.97213605202479e-05, "loss": 0.4692, "step": 1542 }, { "epoch": 0.1877700030422878, "grad_norm": 2.1867940425872803, "learning_rate": 1.972090305876871e-05, "loss": 0.491, "step": 1543 }, { "epoch": 0.18789169455430485, "grad_norm": 2.9622294902801514, "learning_rate": 1.9720445227389032e-05, "loss": 0.4972, "step": 1544 }, { "epoch": 0.18801338606632187, "grad_norm": 3.5816261768341064, "learning_rate": 1.9719987026126296e-05, "loss": 0.5116, "step": 1545 }, { "epoch": 0.18813507757833892, "grad_norm": 2.4237194061279297, "learning_rate": 1.971952845499793e-05, "loss": 0.4703, "step": 1546 }, { "epoch": 0.18825676909035594, "grad_norm": 2.7580742835998535, "learning_rate": 1.9719069514021393e-05, "loss": 0.5255, "step": 1547 }, { "epoch": 0.18837846060237298, "grad_norm": 2.2957465648651123, "learning_rate": 1.9718610203214136e-05, "loss": 0.4359, "step": 1548 }, { "epoch": 0.18850015211439003, "grad_norm": 2.8581619262695312, "learning_rate": 1.971815052259365e-05, "loss": 0.5241, "step": 1549 }, { "epoch": 0.18862184362640705, "grad_norm": 2.8525230884552, "learning_rate": 1.971769047217742e-05, "loss": 0.5052, "step": 1550 }, { "epoch": 0.1887435351384241, "grad_norm": 2.728126049041748, "learning_rate": 1.9717230051982958e-05, "loss": 0.459, "step": 1551 }, { "epoch": 0.18886522665044114, "grad_norm": 2.577486276626587, "learning_rate": 1.971676926202777e-05, "loss": 0.3763, "step": 1552 }, { "epoch": 0.18898691816245816, "grad_norm": 0.9827896952629089, "learning_rate": 1.97163081023294e-05, "loss": 0.4509, "step": 1553 }, { "epoch": 0.1891086096744752, "grad_norm": 5.716380596160889, "learning_rate": 1.9715846572905402e-05, "loss": 0.5873, "step": 1554 }, { "epoch": 0.18923030118649226, "grad_norm": 0.8581305146217346, "learning_rate": 1.9715384673773327e-05, "loss": 0.453, "step": 1555 }, { "epoch": 0.18935199269850927, "grad_norm": 1.1005945205688477, "learning_rate": 1.9714922404950755e-05, "loss": 0.4697, "step": 1556 }, { "epoch": 0.18947368421052632, "grad_norm": 0.757716953754425, "learning_rate": 1.971445976645528e-05, "loss": 0.4548, "step": 1557 }, { "epoch": 0.18959537572254334, "grad_norm": 1.0333950519561768, "learning_rate": 1.9713996758304498e-05, "loss": 0.4893, "step": 1558 }, { "epoch": 0.1897170672345604, "grad_norm": 1.0344300270080566, "learning_rate": 1.9713533380516037e-05, "loss": 0.4795, "step": 1559 }, { "epoch": 0.18983875874657744, "grad_norm": 0.6981343626976013, "learning_rate": 1.9713069633107524e-05, "loss": 0.4903, "step": 1560 }, { "epoch": 0.18996045025859445, "grad_norm": 0.9314073920249939, "learning_rate": 1.9712605516096604e-05, "loss": 0.4881, "step": 1561 }, { "epoch": 0.1900821417706115, "grad_norm": 1.9468327760696411, "learning_rate": 1.971214102950094e-05, "loss": 0.4537, "step": 1562 }, { "epoch": 0.19020383328262855, "grad_norm": 0.5633378028869629, "learning_rate": 1.971167617333821e-05, "loss": 0.4775, "step": 1563 }, { "epoch": 0.19032552479464557, "grad_norm": 0.9177316427230835, "learning_rate": 1.9711210947626098e-05, "loss": 0.5227, "step": 1564 }, { "epoch": 0.19044721630666261, "grad_norm": 1.4895813465118408, "learning_rate": 1.971074535238231e-05, "loss": 0.4706, "step": 1565 }, { "epoch": 0.19056890781867963, "grad_norm": 0.8878472447395325, "learning_rate": 1.971027938762456e-05, "loss": 0.4882, "step": 1566 }, { "epoch": 0.19069059933069668, "grad_norm": 0.852823793888092, "learning_rate": 1.9709813053370578e-05, "loss": 0.4971, "step": 1567 }, { "epoch": 0.19081229084271373, "grad_norm": 0.7474746108055115, "learning_rate": 1.970934634963811e-05, "loss": 0.517, "step": 1568 }, { "epoch": 0.19093398235473075, "grad_norm": 1.4665369987487793, "learning_rate": 1.970887927644492e-05, "loss": 0.4324, "step": 1569 }, { "epoch": 0.1910556738667478, "grad_norm": 1.3971643447875977, "learning_rate": 1.9708411833808777e-05, "loss": 0.4399, "step": 1570 }, { "epoch": 0.19117736537876484, "grad_norm": 3.2567062377929688, "learning_rate": 1.9707944021747468e-05, "loss": 0.5218, "step": 1571 }, { "epoch": 0.19129905689078186, "grad_norm": 1.3860619068145752, "learning_rate": 1.9707475840278794e-05, "loss": 0.5077, "step": 1572 }, { "epoch": 0.1914207484027989, "grad_norm": 1.0201047658920288, "learning_rate": 1.970700728942057e-05, "loss": 0.467, "step": 1573 }, { "epoch": 0.19154243991481595, "grad_norm": 2.747863531112671, "learning_rate": 1.970653836919063e-05, "loss": 0.5031, "step": 1574 }, { "epoch": 0.19166413142683297, "grad_norm": 0.9433050751686096, "learning_rate": 1.9706069079606812e-05, "loss": 0.449, "step": 1575 }, { "epoch": 0.19178582293885002, "grad_norm": 1.9547761678695679, "learning_rate": 1.970559942068697e-05, "loss": 0.4538, "step": 1576 }, { "epoch": 0.19190751445086704, "grad_norm": 0.9808923602104187, "learning_rate": 1.9705129392448987e-05, "loss": 0.5191, "step": 1577 }, { "epoch": 0.1920292059628841, "grad_norm": 2.032186508178711, "learning_rate": 1.970465899491074e-05, "loss": 0.4711, "step": 1578 }, { "epoch": 0.19215089747490113, "grad_norm": 3.9353396892547607, "learning_rate": 1.9704188228090127e-05, "loss": 0.4053, "step": 1579 }, { "epoch": 0.19227258898691815, "grad_norm": 1.908689022064209, "learning_rate": 1.970371709200507e-05, "loss": 0.546, "step": 1580 }, { "epoch": 0.1923942804989352, "grad_norm": 0.7221613526344299, "learning_rate": 1.9703245586673492e-05, "loss": 0.4312, "step": 1581 }, { "epoch": 0.19251597201095225, "grad_norm": 0.6049261093139648, "learning_rate": 1.970277371211333e-05, "loss": 0.4789, "step": 1582 }, { "epoch": 0.19263766352296927, "grad_norm": 1.7574793100357056, "learning_rate": 1.970230146834255e-05, "loss": 0.4951, "step": 1583 }, { "epoch": 0.1927593550349863, "grad_norm": 0.6227784156799316, "learning_rate": 1.9701828855379113e-05, "loss": 0.4605, "step": 1584 }, { "epoch": 0.19288104654700336, "grad_norm": 2.2516350746154785, "learning_rate": 1.970135587324101e-05, "loss": 0.4836, "step": 1585 }, { "epoch": 0.19300273805902038, "grad_norm": 1.1869683265686035, "learning_rate": 1.9700882521946236e-05, "loss": 0.4411, "step": 1586 }, { "epoch": 0.19312442957103743, "grad_norm": 3.2054316997528076, "learning_rate": 1.9700408801512797e-05, "loss": 0.5176, "step": 1587 }, { "epoch": 0.19324612108305445, "grad_norm": 1.0156232118606567, "learning_rate": 1.9699934711958728e-05, "loss": 0.4538, "step": 1588 }, { "epoch": 0.1933678125950715, "grad_norm": 1.167963981628418, "learning_rate": 1.9699460253302066e-05, "loss": 0.5323, "step": 1589 }, { "epoch": 0.19348950410708854, "grad_norm": 0.937066376209259, "learning_rate": 1.969898542556086e-05, "loss": 0.467, "step": 1590 }, { "epoch": 0.19361119561910556, "grad_norm": 1.9532171487808228, "learning_rate": 1.969851022875319e-05, "loss": 0.5079, "step": 1591 }, { "epoch": 0.1937328871311226, "grad_norm": 0.8318353891372681, "learning_rate": 1.9698034662897125e-05, "loss": 0.4786, "step": 1592 }, { "epoch": 0.19385457864313965, "grad_norm": 2.816213846206665, "learning_rate": 1.969755872801077e-05, "loss": 0.4129, "step": 1593 }, { "epoch": 0.19397627015515667, "grad_norm": 1.4102116823196411, "learning_rate": 1.969708242411223e-05, "loss": 0.4369, "step": 1594 }, { "epoch": 0.19409796166717372, "grad_norm": 2.5704290866851807, "learning_rate": 1.9696605751219634e-05, "loss": 0.4941, "step": 1595 }, { "epoch": 0.19421965317919074, "grad_norm": 0.6522196531295776, "learning_rate": 1.969612870935112e-05, "loss": 0.4207, "step": 1596 }, { "epoch": 0.19434134469120778, "grad_norm": 1.5597845315933228, "learning_rate": 1.9695651298524834e-05, "loss": 0.4537, "step": 1597 }, { "epoch": 0.19446303620322483, "grad_norm": 4.25393009185791, "learning_rate": 1.969517351875895e-05, "loss": 0.5641, "step": 1598 }, { "epoch": 0.19458472771524185, "grad_norm": 1.259340524673462, "learning_rate": 1.9694695370071645e-05, "loss": 0.5148, "step": 1599 }, { "epoch": 0.1947064192272589, "grad_norm": 1.2821242809295654, "learning_rate": 1.9694216852481113e-05, "loss": 0.4794, "step": 1600 }, { "epoch": 0.19482811073927594, "grad_norm": 1.6673787832260132, "learning_rate": 1.9693737966005567e-05, "loss": 0.4423, "step": 1601 }, { "epoch": 0.19494980225129296, "grad_norm": 5.536383152008057, "learning_rate": 1.9693258710663226e-05, "loss": 0.4358, "step": 1602 }, { "epoch": 0.19507149376331, "grad_norm": 2.4902002811431885, "learning_rate": 1.9692779086472328e-05, "loss": 0.4815, "step": 1603 }, { "epoch": 0.19519318527532706, "grad_norm": 3.3068079948425293, "learning_rate": 1.969229909345112e-05, "loss": 0.4255, "step": 1604 }, { "epoch": 0.19531487678734408, "grad_norm": 2.6858489513397217, "learning_rate": 1.969181873161787e-05, "loss": 0.5322, "step": 1605 }, { "epoch": 0.19543656829936112, "grad_norm": 3.3064382076263428, "learning_rate": 1.9691338000990855e-05, "loss": 0.5252, "step": 1606 }, { "epoch": 0.19555825981137814, "grad_norm": 1.214639663696289, "learning_rate": 1.9690856901588372e-05, "loss": 0.4623, "step": 1607 }, { "epoch": 0.1956799513233952, "grad_norm": 1.6819217205047607, "learning_rate": 1.9690375433428723e-05, "loss": 0.4662, "step": 1608 }, { "epoch": 0.19580164283541224, "grad_norm": 0.8032550811767578, "learning_rate": 1.968989359653023e-05, "loss": 0.4565, "step": 1609 }, { "epoch": 0.19592333434742926, "grad_norm": 1.0182809829711914, "learning_rate": 1.9689411390911233e-05, "loss": 0.4792, "step": 1610 }, { "epoch": 0.1960450258594463, "grad_norm": 1.019756555557251, "learning_rate": 1.9688928816590075e-05, "loss": 0.468, "step": 1611 }, { "epoch": 0.19616671737146335, "grad_norm": 3.679610252380371, "learning_rate": 1.968844587358512e-05, "loss": 0.4385, "step": 1612 }, { "epoch": 0.19628840888348037, "grad_norm": 2.3145744800567627, "learning_rate": 1.9687962561914743e-05, "loss": 0.4934, "step": 1613 }, { "epoch": 0.19641010039549742, "grad_norm": 1.0022114515304565, "learning_rate": 1.968747888159734e-05, "loss": 0.4672, "step": 1614 }, { "epoch": 0.19653179190751446, "grad_norm": 0.7019039988517761, "learning_rate": 1.968699483265131e-05, "loss": 0.4291, "step": 1615 }, { "epoch": 0.19665348341953148, "grad_norm": 3.1726160049438477, "learning_rate": 1.9686510415095078e-05, "loss": 0.5213, "step": 1616 }, { "epoch": 0.19677517493154853, "grad_norm": 3.2987570762634277, "learning_rate": 1.968602562894708e-05, "loss": 0.4844, "step": 1617 }, { "epoch": 0.19689686644356555, "grad_norm": 0.9264179468154907, "learning_rate": 1.968554047422575e-05, "loss": 0.4127, "step": 1618 }, { "epoch": 0.1970185579555826, "grad_norm": 0.6382076740264893, "learning_rate": 1.968505495094956e-05, "loss": 0.4601, "step": 1619 }, { "epoch": 0.19714024946759964, "grad_norm": 2.624558448791504, "learning_rate": 1.9684569059136984e-05, "loss": 0.5142, "step": 1620 }, { "epoch": 0.19726194097961666, "grad_norm": 1.2857919931411743, "learning_rate": 1.9684082798806507e-05, "loss": 0.494, "step": 1621 }, { "epoch": 0.1973836324916337, "grad_norm": 2.6425671577453613, "learning_rate": 1.9683596169976637e-05, "loss": 0.5788, "step": 1622 }, { "epoch": 0.19750532400365076, "grad_norm": 2.8701817989349365, "learning_rate": 1.9683109172665887e-05, "loss": 0.4749, "step": 1623 }, { "epoch": 0.19762701551566778, "grad_norm": 4.955239295959473, "learning_rate": 1.968262180689279e-05, "loss": 0.4661, "step": 1624 }, { "epoch": 0.19774870702768482, "grad_norm": 1.945864200592041, "learning_rate": 1.968213407267589e-05, "loss": 0.5084, "step": 1625 }, { "epoch": 0.19787039853970187, "grad_norm": 3.634922981262207, "learning_rate": 1.968164597003375e-05, "loss": 0.4742, "step": 1626 }, { "epoch": 0.1979920900517189, "grad_norm": 3.1284406185150146, "learning_rate": 1.968115749898494e-05, "loss": 0.4812, "step": 1627 }, { "epoch": 0.19811378156373594, "grad_norm": 0.6137954592704773, "learning_rate": 1.9680668659548047e-05, "loss": 0.5057, "step": 1628 }, { "epoch": 0.19823547307575295, "grad_norm": 1.2982310056686401, "learning_rate": 1.9680179451741676e-05, "loss": 0.4971, "step": 1629 }, { "epoch": 0.19835716458777, "grad_norm": 0.7524140477180481, "learning_rate": 1.967968987558444e-05, "loss": 0.4631, "step": 1630 }, { "epoch": 0.19847885609978705, "grad_norm": 2.648993730545044, "learning_rate": 1.9679199931094967e-05, "loss": 0.4921, "step": 1631 }, { "epoch": 0.19860054761180407, "grad_norm": 4.057670593261719, "learning_rate": 1.9678709618291903e-05, "loss": 0.5464, "step": 1632 }, { "epoch": 0.19872223912382111, "grad_norm": 3.1056995391845703, "learning_rate": 1.96782189371939e-05, "loss": 0.5447, "step": 1633 }, { "epoch": 0.19884393063583816, "grad_norm": 0.6378831267356873, "learning_rate": 1.9677727887819637e-05, "loss": 0.526, "step": 1634 }, { "epoch": 0.19896562214785518, "grad_norm": 5.221714019775391, "learning_rate": 1.9677236470187796e-05, "loss": 0.4275, "step": 1635 }, { "epoch": 0.19908731365987223, "grad_norm": 2.322024345397949, "learning_rate": 1.9676744684317076e-05, "loss": 0.5101, "step": 1636 }, { "epoch": 0.19920900517188925, "grad_norm": 2.1963043212890625, "learning_rate": 1.9676252530226195e-05, "loss": 0.4707, "step": 1637 }, { "epoch": 0.1993306966839063, "grad_norm": 1.522512674331665, "learning_rate": 1.967576000793387e-05, "loss": 0.4794, "step": 1638 }, { "epoch": 0.19945238819592334, "grad_norm": 0.6875200867652893, "learning_rate": 1.9675267117458853e-05, "loss": 0.5085, "step": 1639 }, { "epoch": 0.19957407970794036, "grad_norm": 1.8022245168685913, "learning_rate": 1.9674773858819895e-05, "loss": 0.5139, "step": 1640 }, { "epoch": 0.1996957712199574, "grad_norm": 0.6288191676139832, "learning_rate": 1.9674280232035766e-05, "loss": 0.4801, "step": 1641 }, { "epoch": 0.19981746273197445, "grad_norm": 0.6057789325714111, "learning_rate": 1.9673786237125247e-05, "loss": 0.4749, "step": 1642 }, { "epoch": 0.19993915424399147, "grad_norm": 2.8260412216186523, "learning_rate": 1.967329187410714e-05, "loss": 0.5089, "step": 1643 }, { "epoch": 0.20006084575600852, "grad_norm": 0.9718367457389832, "learning_rate": 1.9672797143000254e-05, "loss": 0.4772, "step": 1644 }, { "epoch": 0.20018253726802557, "grad_norm": 2.362091302871704, "learning_rate": 1.9672302043823418e-05, "loss": 0.5211, "step": 1645 }, { "epoch": 0.2003042287800426, "grad_norm": 2.3104591369628906, "learning_rate": 1.9671806576595466e-05, "loss": 0.5554, "step": 1646 }, { "epoch": 0.20042592029205963, "grad_norm": 2.9208545684814453, "learning_rate": 1.9671310741335253e-05, "loss": 0.4374, "step": 1647 }, { "epoch": 0.20054761180407665, "grad_norm": 1.604841709136963, "learning_rate": 1.967081453806165e-05, "loss": 0.4621, "step": 1648 }, { "epoch": 0.2006693033160937, "grad_norm": 2.000429630279541, "learning_rate": 1.9670317966793537e-05, "loss": 0.4759, "step": 1649 }, { "epoch": 0.20079099482811075, "grad_norm": 0.6824323534965515, "learning_rate": 1.9669821027549804e-05, "loss": 0.4973, "step": 1650 }, { "epoch": 0.20091268634012777, "grad_norm": 2.5940921306610107, "learning_rate": 1.9669323720349373e-05, "loss": 0.5476, "step": 1651 }, { "epoch": 0.2010343778521448, "grad_norm": 0.9436287879943848, "learning_rate": 1.9668826045211154e-05, "loss": 0.44, "step": 1652 }, { "epoch": 0.20115606936416186, "grad_norm": 0.8677988648414612, "learning_rate": 1.9668328002154092e-05, "loss": 0.4688, "step": 1653 }, { "epoch": 0.20127776087617888, "grad_norm": 1.9527193307876587, "learning_rate": 1.966782959119714e-05, "loss": 0.5002, "step": 1654 }, { "epoch": 0.20139945238819593, "grad_norm": 1.1784852743148804, "learning_rate": 1.9667330812359256e-05, "loss": 0.4618, "step": 1655 }, { "epoch": 0.20152114390021297, "grad_norm": 0.9972782731056213, "learning_rate": 1.9666831665659426e-05, "loss": 0.4955, "step": 1656 }, { "epoch": 0.20164283541223, "grad_norm": 2.0112106800079346, "learning_rate": 1.9666332151116644e-05, "loss": 0.4423, "step": 1657 }, { "epoch": 0.20176452692424704, "grad_norm": 3.010773181915283, "learning_rate": 1.9665832268749915e-05, "loss": 0.4715, "step": 1658 }, { "epoch": 0.20188621843626406, "grad_norm": 2.0638082027435303, "learning_rate": 1.966533201857826e-05, "loss": 0.4301, "step": 1659 }, { "epoch": 0.2020079099482811, "grad_norm": 1.6525065898895264, "learning_rate": 1.9664831400620716e-05, "loss": 0.492, "step": 1660 }, { "epoch": 0.20212960146029815, "grad_norm": 1.4906165599822998, "learning_rate": 1.966433041489633e-05, "loss": 0.4813, "step": 1661 }, { "epoch": 0.20225129297231517, "grad_norm": 0.6232516169548035, "learning_rate": 1.9663829061424172e-05, "loss": 0.4963, "step": 1662 }, { "epoch": 0.20237298448433222, "grad_norm": 1.5922417640686035, "learning_rate": 1.966332734022331e-05, "loss": 0.4668, "step": 1663 }, { "epoch": 0.20249467599634927, "grad_norm": 1.6148895025253296, "learning_rate": 1.9662825251312845e-05, "loss": 0.4853, "step": 1664 }, { "epoch": 0.20261636750836629, "grad_norm": 0.6693671941757202, "learning_rate": 1.9662322794711875e-05, "loss": 0.4473, "step": 1665 }, { "epoch": 0.20273805902038333, "grad_norm": 2.002835512161255, "learning_rate": 1.9661819970439526e-05, "loss": 0.4778, "step": 1666 }, { "epoch": 0.20285975053240035, "grad_norm": 0.8164340853691101, "learning_rate": 1.966131677851493e-05, "loss": 0.4557, "step": 1667 }, { "epoch": 0.2029814420444174, "grad_norm": 0.6746049523353577, "learning_rate": 1.9660813218957226e-05, "loss": 0.4741, "step": 1668 }, { "epoch": 0.20310313355643445, "grad_norm": 0.8485199213027954, "learning_rate": 1.9660309291785588e-05, "loss": 0.47, "step": 1669 }, { "epoch": 0.20322482506845146, "grad_norm": 1.3783713579177856, "learning_rate": 1.965980499701918e-05, "loss": 0.5104, "step": 1670 }, { "epoch": 0.2033465165804685, "grad_norm": 2.0943398475646973, "learning_rate": 1.96593003346772e-05, "loss": 0.4353, "step": 1671 }, { "epoch": 0.20346820809248556, "grad_norm": 1.8847817182540894, "learning_rate": 1.965879530477885e-05, "loss": 0.5095, "step": 1672 }, { "epoch": 0.20358989960450258, "grad_norm": 1.30797278881073, "learning_rate": 1.9658289907343344e-05, "loss": 0.5466, "step": 1673 }, { "epoch": 0.20371159111651962, "grad_norm": 1.2622805833816528, "learning_rate": 1.9657784142389918e-05, "loss": 0.4799, "step": 1674 }, { "epoch": 0.20383328262853667, "grad_norm": 1.3247990608215332, "learning_rate": 1.9657278009937813e-05, "loss": 0.4787, "step": 1675 }, { "epoch": 0.2039549741405537, "grad_norm": 3.931656837463379, "learning_rate": 1.9656771510006288e-05, "loss": 0.3798, "step": 1676 }, { "epoch": 0.20407666565257074, "grad_norm": 1.6144198179244995, "learning_rate": 1.9656264642614625e-05, "loss": 0.3948, "step": 1677 }, { "epoch": 0.20419835716458776, "grad_norm": 2.988980770111084, "learning_rate": 1.96557574077821e-05, "loss": 0.5197, "step": 1678 }, { "epoch": 0.2043200486766048, "grad_norm": 4.086143970489502, "learning_rate": 1.965524980552802e-05, "loss": 0.506, "step": 1679 }, { "epoch": 0.20444174018862185, "grad_norm": 3.4909090995788574, "learning_rate": 1.96547418358717e-05, "loss": 0.4732, "step": 1680 }, { "epoch": 0.20456343170063887, "grad_norm": 5.454492568969727, "learning_rate": 1.9654233498832467e-05, "loss": 0.5418, "step": 1681 }, { "epoch": 0.20468512321265592, "grad_norm": 3.660712242126465, "learning_rate": 1.965372479442967e-05, "loss": 0.4767, "step": 1682 }, { "epoch": 0.20480681472467296, "grad_norm": 0.8374196887016296, "learning_rate": 1.965321572268266e-05, "loss": 0.4485, "step": 1683 }, { "epoch": 0.20492850623668998, "grad_norm": 0.5816757082939148, "learning_rate": 1.9652706283610814e-05, "loss": 0.4607, "step": 1684 }, { "epoch": 0.20505019774870703, "grad_norm": 0.9996181726455688, "learning_rate": 1.965219647723351e-05, "loss": 0.4796, "step": 1685 }, { "epoch": 0.20517188926072408, "grad_norm": 1.0449223518371582, "learning_rate": 1.9651686303570156e-05, "loss": 0.509, "step": 1686 }, { "epoch": 0.2052935807727411, "grad_norm": 4.236747741699219, "learning_rate": 1.9651175762640155e-05, "loss": 0.4712, "step": 1687 }, { "epoch": 0.20541527228475814, "grad_norm": 3.6349127292633057, "learning_rate": 1.965066485446294e-05, "loss": 0.4773, "step": 1688 }, { "epoch": 0.20553696379677516, "grad_norm": 4.4095869064331055, "learning_rate": 1.9650153579057955e-05, "loss": 0.4812, "step": 1689 }, { "epoch": 0.2056586553087922, "grad_norm": 2.3108582496643066, "learning_rate": 1.964964193644465e-05, "loss": 0.4961, "step": 1690 }, { "epoch": 0.20578034682080926, "grad_norm": 1.692794680595398, "learning_rate": 1.964912992664249e-05, "loss": 0.4767, "step": 1691 }, { "epoch": 0.20590203833282628, "grad_norm": 0.6628739237785339, "learning_rate": 1.9648617549670972e-05, "loss": 0.4661, "step": 1692 }, { "epoch": 0.20602372984484332, "grad_norm": 3.363628387451172, "learning_rate": 1.964810480554958e-05, "loss": 0.5465, "step": 1693 }, { "epoch": 0.20614542135686037, "grad_norm": 0.7035858035087585, "learning_rate": 1.9647591694297833e-05, "loss": 0.4204, "step": 1694 }, { "epoch": 0.2062671128688774, "grad_norm": 1.1090821027755737, "learning_rate": 1.964707821593525e-05, "loss": 0.4426, "step": 1695 }, { "epoch": 0.20638880438089444, "grad_norm": 3.3618927001953125, "learning_rate": 1.9646564370481373e-05, "loss": 0.4973, "step": 1696 }, { "epoch": 0.20651049589291148, "grad_norm": 3.866363763809204, "learning_rate": 1.9646050157955752e-05, "loss": 0.5566, "step": 1697 }, { "epoch": 0.2066321874049285, "grad_norm": 2.323979616165161, "learning_rate": 1.964553557837796e-05, "loss": 0.5455, "step": 1698 }, { "epoch": 0.20675387891694555, "grad_norm": 1.5137630701065063, "learning_rate": 1.9645020631767574e-05, "loss": 0.4519, "step": 1699 }, { "epoch": 0.20687557042896257, "grad_norm": 1.2780119180679321, "learning_rate": 1.9644505318144188e-05, "loss": 0.4665, "step": 1700 }, { "epoch": 0.20699726194097962, "grad_norm": 0.6163020730018616, "learning_rate": 1.9643989637527412e-05, "loss": 0.4992, "step": 1701 }, { "epoch": 0.20711895345299666, "grad_norm": 1.6576118469238281, "learning_rate": 1.9643473589936867e-05, "loss": 0.4643, "step": 1702 }, { "epoch": 0.20724064496501368, "grad_norm": 0.7050173878669739, "learning_rate": 1.9642957175392192e-05, "loss": 0.495, "step": 1703 }, { "epoch": 0.20736233647703073, "grad_norm": 1.7582765817642212, "learning_rate": 1.9642440393913034e-05, "loss": 0.4285, "step": 1704 }, { "epoch": 0.20748402798904778, "grad_norm": 1.6956439018249512, "learning_rate": 1.9641923245519065e-05, "loss": 0.4437, "step": 1705 }, { "epoch": 0.2076057195010648, "grad_norm": 0.760586142539978, "learning_rate": 1.9641405730229955e-05, "loss": 0.4931, "step": 1706 }, { "epoch": 0.20772741101308184, "grad_norm": 2.3825411796569824, "learning_rate": 1.96408878480654e-05, "loss": 0.452, "step": 1707 }, { "epoch": 0.20784910252509886, "grad_norm": 0.6912671327590942, "learning_rate": 1.9640369599045113e-05, "loss": 0.4434, "step": 1708 }, { "epoch": 0.2079707940371159, "grad_norm": 0.9357388615608215, "learning_rate": 1.9639850983188802e-05, "loss": 0.419, "step": 1709 }, { "epoch": 0.20809248554913296, "grad_norm": 2.109111785888672, "learning_rate": 1.963933200051621e-05, "loss": 0.4641, "step": 1710 }, { "epoch": 0.20821417706114997, "grad_norm": 1.1385763883590698, "learning_rate": 1.9638812651047082e-05, "loss": 0.4858, "step": 1711 }, { "epoch": 0.20833586857316702, "grad_norm": 1.7680236101150513, "learning_rate": 1.9638292934801183e-05, "loss": 0.489, "step": 1712 }, { "epoch": 0.20845756008518407, "grad_norm": 0.8580129742622375, "learning_rate": 1.9637772851798287e-05, "loss": 0.5272, "step": 1713 }, { "epoch": 0.2085792515972011, "grad_norm": 3.5318949222564697, "learning_rate": 1.9637252402058186e-05, "loss": 0.48, "step": 1714 }, { "epoch": 0.20870094310921813, "grad_norm": 4.347428321838379, "learning_rate": 1.963673158560068e-05, "loss": 0.4515, "step": 1715 }, { "epoch": 0.20882263462123518, "grad_norm": 3.0599441528320312, "learning_rate": 1.963621040244559e-05, "loss": 0.524, "step": 1716 }, { "epoch": 0.2089443261332522, "grad_norm": 4.8122053146362305, "learning_rate": 1.963568885261275e-05, "loss": 0.4874, "step": 1717 }, { "epoch": 0.20906601764526925, "grad_norm": 2.8686513900756836, "learning_rate": 1.9635166936122007e-05, "loss": 0.4431, "step": 1718 }, { "epoch": 0.20918770915728627, "grad_norm": 1.3506346940994263, "learning_rate": 1.9634644652993217e-05, "loss": 0.5089, "step": 1719 }, { "epoch": 0.2093094006693033, "grad_norm": 0.822596549987793, "learning_rate": 1.9634122003246253e-05, "loss": 0.4589, "step": 1720 }, { "epoch": 0.20943109218132036, "grad_norm": 1.0705163478851318, "learning_rate": 1.9633598986901007e-05, "loss": 0.45, "step": 1721 }, { "epoch": 0.20955278369333738, "grad_norm": 2.329230308532715, "learning_rate": 1.963307560397738e-05, "loss": 0.4612, "step": 1722 }, { "epoch": 0.20967447520535443, "grad_norm": 1.2663108110427856, "learning_rate": 1.9632551854495285e-05, "loss": 0.4053, "step": 1723 }, { "epoch": 0.20979616671737147, "grad_norm": 2.3283092975616455, "learning_rate": 1.9632027738474652e-05, "loss": 0.4691, "step": 1724 }, { "epoch": 0.2099178582293885, "grad_norm": 2.6995961666107178, "learning_rate": 1.963150325593543e-05, "loss": 0.4785, "step": 1725 }, { "epoch": 0.21003954974140554, "grad_norm": 3.0928895473480225, "learning_rate": 1.963097840689757e-05, "loss": 0.5069, "step": 1726 }, { "epoch": 0.2101612412534226, "grad_norm": 0.7816913723945618, "learning_rate": 1.9630453191381053e-05, "loss": 0.4427, "step": 1727 }, { "epoch": 0.2102829327654396, "grad_norm": 1.0641828775405884, "learning_rate": 1.962992760940585e-05, "loss": 0.5262, "step": 1728 }, { "epoch": 0.21040462427745665, "grad_norm": 1.9659558534622192, "learning_rate": 1.9629401660991974e-05, "loss": 0.4812, "step": 1729 }, { "epoch": 0.21052631578947367, "grad_norm": 2.0269155502319336, "learning_rate": 1.962887534615943e-05, "loss": 0.4901, "step": 1730 }, { "epoch": 0.21064800730149072, "grad_norm": 3.1222195625305176, "learning_rate": 1.9628348664928247e-05, "loss": 0.4477, "step": 1731 }, { "epoch": 0.21076969881350777, "grad_norm": 2.626588821411133, "learning_rate": 1.962782161731847e-05, "loss": 0.4712, "step": 1732 }, { "epoch": 0.21089139032552479, "grad_norm": 1.2941805124282837, "learning_rate": 1.9627294203350152e-05, "loss": 0.5096, "step": 1733 }, { "epoch": 0.21101308183754183, "grad_norm": 0.8076706528663635, "learning_rate": 1.9626766423043365e-05, "loss": 0.4525, "step": 1734 }, { "epoch": 0.21113477334955888, "grad_norm": 0.6155074834823608, "learning_rate": 1.9626238276418187e-05, "loss": 0.4853, "step": 1735 }, { "epoch": 0.2112564648615759, "grad_norm": 0.9919223189353943, "learning_rate": 1.9625709763494715e-05, "loss": 0.4641, "step": 1736 }, { "epoch": 0.21137815637359295, "grad_norm": 3.096661329269409, "learning_rate": 1.9625180884293064e-05, "loss": 0.5071, "step": 1737 }, { "epoch": 0.21149984788560997, "grad_norm": 4.443472862243652, "learning_rate": 1.9624651638833358e-05, "loss": 0.5887, "step": 1738 }, { "epoch": 0.211621539397627, "grad_norm": 0.8604888916015625, "learning_rate": 1.9624122027135735e-05, "loss": 0.4802, "step": 1739 }, { "epoch": 0.21174323090964406, "grad_norm": 3.7411863803863525, "learning_rate": 1.9623592049220347e-05, "loss": 0.3979, "step": 1740 }, { "epoch": 0.21186492242166108, "grad_norm": 3.657951593399048, "learning_rate": 1.962306170510736e-05, "loss": 0.45, "step": 1741 }, { "epoch": 0.21198661393367813, "grad_norm": 2.9228360652923584, "learning_rate": 1.9622530994816963e-05, "loss": 0.4605, "step": 1742 }, { "epoch": 0.21210830544569517, "grad_norm": 1.526663899421692, "learning_rate": 1.962199991836934e-05, "loss": 0.4615, "step": 1743 }, { "epoch": 0.2122299969577122, "grad_norm": 1.1082109212875366, "learning_rate": 1.9621468475784703e-05, "loss": 0.4833, "step": 1744 }, { "epoch": 0.21235168846972924, "grad_norm": 2.901003122329712, "learning_rate": 1.9620936667083277e-05, "loss": 0.5169, "step": 1745 }, { "epoch": 0.21247337998174629, "grad_norm": 5.073161602020264, "learning_rate": 1.9620404492285293e-05, "loss": 0.566, "step": 1746 }, { "epoch": 0.2125950714937633, "grad_norm": 1.83866286277771, "learning_rate": 1.961987195141101e-05, "loss": 0.4691, "step": 1747 }, { "epoch": 0.21271676300578035, "grad_norm": 2.80662202835083, "learning_rate": 1.9619339044480682e-05, "loss": 0.5147, "step": 1748 }, { "epoch": 0.21283845451779737, "grad_norm": 2.4437170028686523, "learning_rate": 1.9618805771514596e-05, "loss": 0.4068, "step": 1749 }, { "epoch": 0.21296014602981442, "grad_norm": 1.8178179264068604, "learning_rate": 1.961827213253304e-05, "loss": 0.4783, "step": 1750 }, { "epoch": 0.21308183754183146, "grad_norm": 2.1436269283294678, "learning_rate": 1.961773812755632e-05, "loss": 0.5199, "step": 1751 }, { "epoch": 0.21320352905384848, "grad_norm": 1.8073639869689941, "learning_rate": 1.9617203756604757e-05, "loss": 0.4897, "step": 1752 }, { "epoch": 0.21332522056586553, "grad_norm": 0.6793341040611267, "learning_rate": 1.9616669019698683e-05, "loss": 0.5321, "step": 1753 }, { "epoch": 0.21344691207788258, "grad_norm": 2.9085850715637207, "learning_rate": 1.961613391685845e-05, "loss": 0.5059, "step": 1754 }, { "epoch": 0.2135686035898996, "grad_norm": 3.8958041667938232, "learning_rate": 1.9615598448104417e-05, "loss": 0.415, "step": 1755 }, { "epoch": 0.21369029510191664, "grad_norm": 3.782226324081421, "learning_rate": 1.961506261345696e-05, "loss": 0.409, "step": 1756 }, { "epoch": 0.2138119866139337, "grad_norm": 1.3364371061325073, "learning_rate": 1.9614526412936464e-05, "loss": 0.5208, "step": 1757 }, { "epoch": 0.2139336781259507, "grad_norm": 1.1603829860687256, "learning_rate": 1.961398984656334e-05, "loss": 0.4394, "step": 1758 }, { "epoch": 0.21405536963796776, "grad_norm": 1.2047865390777588, "learning_rate": 1.9613452914358002e-05, "loss": 0.4675, "step": 1759 }, { "epoch": 0.21417706114998478, "grad_norm": 1.6339871883392334, "learning_rate": 1.9612915616340886e-05, "loss": 0.4626, "step": 1760 }, { "epoch": 0.21429875266200182, "grad_norm": 0.5711697340011597, "learning_rate": 1.9612377952532428e-05, "loss": 0.4651, "step": 1761 }, { "epoch": 0.21442044417401887, "grad_norm": 0.6917303800582886, "learning_rate": 1.9611839922953094e-05, "loss": 0.4676, "step": 1762 }, { "epoch": 0.2145421356860359, "grad_norm": 1.1803677082061768, "learning_rate": 1.9611301527623358e-05, "loss": 0.4282, "step": 1763 }, { "epoch": 0.21466382719805294, "grad_norm": 1.2051255702972412, "learning_rate": 1.96107627665637e-05, "loss": 0.4997, "step": 1764 }, { "epoch": 0.21478551871006998, "grad_norm": 3.2209393978118896, "learning_rate": 1.9610223639794628e-05, "loss": 0.4172, "step": 1765 }, { "epoch": 0.214907210222087, "grad_norm": 0.9959712028503418, "learning_rate": 1.9609684147336654e-05, "loss": 0.4617, "step": 1766 }, { "epoch": 0.21502890173410405, "grad_norm": 0.6463029384613037, "learning_rate": 1.9609144289210308e-05, "loss": 0.4563, "step": 1767 }, { "epoch": 0.2151505932461211, "grad_norm": 1.8162944316864014, "learning_rate": 1.960860406543613e-05, "loss": 0.5229, "step": 1768 }, { "epoch": 0.21527228475813812, "grad_norm": 2.373663902282715, "learning_rate": 1.9608063476034683e-05, "loss": 0.4438, "step": 1769 }, { "epoch": 0.21539397627015516, "grad_norm": 0.7366934418678284, "learning_rate": 1.960752252102653e-05, "loss": 0.5095, "step": 1770 }, { "epoch": 0.21551566778217218, "grad_norm": 1.5187993049621582, "learning_rate": 1.9606981200432256e-05, "loss": 0.4881, "step": 1771 }, { "epoch": 0.21563735929418923, "grad_norm": 1.8819949626922607, "learning_rate": 1.9606439514272465e-05, "loss": 0.5457, "step": 1772 }, { "epoch": 0.21575905080620628, "grad_norm": 1.7443825006484985, "learning_rate": 1.9605897462567765e-05, "loss": 0.4856, "step": 1773 }, { "epoch": 0.2158807423182233, "grad_norm": 1.0818630456924438, "learning_rate": 1.9605355045338784e-05, "loss": 0.4808, "step": 1774 }, { "epoch": 0.21600243383024034, "grad_norm": 3.5668601989746094, "learning_rate": 1.9604812262606162e-05, "loss": 0.4699, "step": 1775 }, { "epoch": 0.2161241253422574, "grad_norm": 1.3467004299163818, "learning_rate": 1.9604269114390556e-05, "loss": 0.4751, "step": 1776 }, { "epoch": 0.2162458168542744, "grad_norm": 0.5569159388542175, "learning_rate": 1.9603725600712624e-05, "loss": 0.4459, "step": 1777 }, { "epoch": 0.21636750836629146, "grad_norm": 1.232100486755371, "learning_rate": 1.9603181721593057e-05, "loss": 0.4397, "step": 1778 }, { "epoch": 0.21648919987830847, "grad_norm": 4.7086944580078125, "learning_rate": 1.960263747705255e-05, "loss": 0.5435, "step": 1779 }, { "epoch": 0.21661089139032552, "grad_norm": 1.6949118375778198, "learning_rate": 1.9602092867111807e-05, "loss": 0.4974, "step": 1780 }, { "epoch": 0.21673258290234257, "grad_norm": 2.6949243545532227, "learning_rate": 1.9601547891791558e-05, "loss": 0.5137, "step": 1781 }, { "epoch": 0.2168542744143596, "grad_norm": 0.7370144724845886, "learning_rate": 1.9601002551112537e-05, "loss": 0.4492, "step": 1782 }, { "epoch": 0.21697596592637663, "grad_norm": 1.397643804550171, "learning_rate": 1.9600456845095498e-05, "loss": 0.4988, "step": 1783 }, { "epoch": 0.21709765743839368, "grad_norm": 4.239346981048584, "learning_rate": 1.9599910773761198e-05, "loss": 0.4688, "step": 1784 }, { "epoch": 0.2172193489504107, "grad_norm": 4.348527431488037, "learning_rate": 1.9599364337130425e-05, "loss": 0.4913, "step": 1785 }, { "epoch": 0.21734104046242775, "grad_norm": 2.584226131439209, "learning_rate": 1.9598817535223974e-05, "loss": 0.5575, "step": 1786 }, { "epoch": 0.2174627319744448, "grad_norm": 1.4294018745422363, "learning_rate": 1.9598270368062642e-05, "loss": 0.5263, "step": 1787 }, { "epoch": 0.21758442348646181, "grad_norm": 6.325389385223389, "learning_rate": 1.9597722835667256e-05, "loss": 0.4818, "step": 1788 }, { "epoch": 0.21770611499847886, "grad_norm": 2.446532726287842, "learning_rate": 1.9597174938058653e-05, "loss": 0.4782, "step": 1789 }, { "epoch": 0.21782780651049588, "grad_norm": 3.8551485538482666, "learning_rate": 1.9596626675257673e-05, "loss": 0.5752, "step": 1790 }, { "epoch": 0.21794949802251293, "grad_norm": 0.8540073037147522, "learning_rate": 1.959607804728519e-05, "loss": 0.453, "step": 1791 }, { "epoch": 0.21807118953452997, "grad_norm": 0.7740235328674316, "learning_rate": 1.959552905416207e-05, "loss": 0.3969, "step": 1792 }, { "epoch": 0.218192881046547, "grad_norm": 2.930610179901123, "learning_rate": 1.9594979695909212e-05, "loss": 0.509, "step": 1793 }, { "epoch": 0.21831457255856404, "grad_norm": 2.813776731491089, "learning_rate": 1.9594429972547512e-05, "loss": 0.4784, "step": 1794 }, { "epoch": 0.2184362640705811, "grad_norm": 1.513473629951477, "learning_rate": 1.9593879884097894e-05, "loss": 0.4409, "step": 1795 }, { "epoch": 0.2185579555825981, "grad_norm": 0.9543504118919373, "learning_rate": 1.9593329430581288e-05, "loss": 0.385, "step": 1796 }, { "epoch": 0.21867964709461515, "grad_norm": 0.7908185720443726, "learning_rate": 1.959277861201864e-05, "loss": 0.4647, "step": 1797 }, { "epoch": 0.2188013386066322, "grad_norm": 2.3689825534820557, "learning_rate": 1.9592227428430914e-05, "loss": 0.5096, "step": 1798 }, { "epoch": 0.21892303011864922, "grad_norm": 1.5015161037445068, "learning_rate": 1.9591675879839074e-05, "loss": 0.4844, "step": 1799 }, { "epoch": 0.21904472163066627, "grad_norm": 3.2143096923828125, "learning_rate": 1.9591123966264113e-05, "loss": 0.4031, "step": 1800 }, { "epoch": 0.2191664131426833, "grad_norm": 3.81089448928833, "learning_rate": 1.9590571687727035e-05, "loss": 0.469, "step": 1801 }, { "epoch": 0.21928810465470033, "grad_norm": 4.690742492675781, "learning_rate": 1.959001904424885e-05, "loss": 0.45, "step": 1802 }, { "epoch": 0.21940979616671738, "grad_norm": 0.7875168919563293, "learning_rate": 1.9589466035850595e-05, "loss": 0.4531, "step": 1803 }, { "epoch": 0.2195314876787344, "grad_norm": 2.462885856628418, "learning_rate": 1.9588912662553302e-05, "loss": 0.5211, "step": 1804 }, { "epoch": 0.21965317919075145, "grad_norm": 1.3978126049041748, "learning_rate": 1.958835892437804e-05, "loss": 0.463, "step": 1805 }, { "epoch": 0.2197748707027685, "grad_norm": 1.172132968902588, "learning_rate": 1.9587804821345874e-05, "loss": 0.4236, "step": 1806 }, { "epoch": 0.2198965622147855, "grad_norm": 3.6447465419769287, "learning_rate": 1.9587250353477885e-05, "loss": 0.5574, "step": 1807 }, { "epoch": 0.22001825372680256, "grad_norm": 0.9601985812187195, "learning_rate": 1.9586695520795178e-05, "loss": 0.4515, "step": 1808 }, { "epoch": 0.22013994523881958, "grad_norm": 1.2912932634353638, "learning_rate": 1.958614032331886e-05, "loss": 0.5161, "step": 1809 }, { "epoch": 0.22026163675083663, "grad_norm": 0.9475353956222534, "learning_rate": 1.9585584761070064e-05, "loss": 0.4697, "step": 1810 }, { "epoch": 0.22038332826285367, "grad_norm": 0.9893306493759155, "learning_rate": 1.9585028834069924e-05, "loss": 0.5137, "step": 1811 }, { "epoch": 0.2205050197748707, "grad_norm": 1.252672553062439, "learning_rate": 1.9584472542339603e-05, "loss": 0.4504, "step": 1812 }, { "epoch": 0.22062671128688774, "grad_norm": 1.7876701354980469, "learning_rate": 1.9583915885900255e-05, "loss": 0.4579, "step": 1813 }, { "epoch": 0.22074840279890479, "grad_norm": 2.5286545753479004, "learning_rate": 1.9583358864773075e-05, "loss": 0.5526, "step": 1814 }, { "epoch": 0.2208700943109218, "grad_norm": 1.6176166534423828, "learning_rate": 1.9582801478979254e-05, "loss": 0.4994, "step": 1815 }, { "epoch": 0.22099178582293885, "grad_norm": 3.4232685565948486, "learning_rate": 1.958224372854e-05, "loss": 0.5623, "step": 1816 }, { "epoch": 0.2211134773349559, "grad_norm": 1.9408061504364014, "learning_rate": 1.9581685613476536e-05, "loss": 0.4838, "step": 1817 }, { "epoch": 0.22123516884697292, "grad_norm": 1.1546335220336914, "learning_rate": 1.9581127133810103e-05, "loss": 0.4906, "step": 1818 }, { "epoch": 0.22135686035898997, "grad_norm": 0.6846708059310913, "learning_rate": 1.958056828956195e-05, "loss": 0.5004, "step": 1819 }, { "epoch": 0.22147855187100698, "grad_norm": 3.5709493160247803, "learning_rate": 1.9580009080753343e-05, "loss": 0.4759, "step": 1820 }, { "epoch": 0.22160024338302403, "grad_norm": 0.5858487486839294, "learning_rate": 1.957944950740556e-05, "loss": 0.4857, "step": 1821 }, { "epoch": 0.22172193489504108, "grad_norm": 0.6543298363685608, "learning_rate": 1.9578889569539895e-05, "loss": 0.4621, "step": 1822 }, { "epoch": 0.2218436264070581, "grad_norm": 1.3765203952789307, "learning_rate": 1.957832926717766e-05, "loss": 0.4849, "step": 1823 }, { "epoch": 0.22196531791907514, "grad_norm": 1.184235692024231, "learning_rate": 1.9577768600340165e-05, "loss": 0.4716, "step": 1824 }, { "epoch": 0.2220870094310922, "grad_norm": 2.138306140899658, "learning_rate": 1.957720756904875e-05, "loss": 0.4938, "step": 1825 }, { "epoch": 0.2222087009431092, "grad_norm": 2.1889560222625732, "learning_rate": 1.9576646173324768e-05, "loss": 0.4491, "step": 1826 }, { "epoch": 0.22233039245512626, "grad_norm": 3.8328733444213867, "learning_rate": 1.957608441318957e-05, "loss": 0.6034, "step": 1827 }, { "epoch": 0.2224520839671433, "grad_norm": 1.3347678184509277, "learning_rate": 1.9575522288664542e-05, "loss": 0.5146, "step": 1828 }, { "epoch": 0.22257377547916032, "grad_norm": 0.8727834820747375, "learning_rate": 1.957495979977107e-05, "loss": 0.532, "step": 1829 }, { "epoch": 0.22269546699117737, "grad_norm": 4.480216979980469, "learning_rate": 1.957439694653056e-05, "loss": 0.4885, "step": 1830 }, { "epoch": 0.2228171585031944, "grad_norm": 3.6056365966796875, "learning_rate": 1.9573833728964428e-05, "loss": 0.5132, "step": 1831 }, { "epoch": 0.22293885001521144, "grad_norm": 2.7152721881866455, "learning_rate": 1.9573270147094104e-05, "loss": 0.4854, "step": 1832 }, { "epoch": 0.22306054152722848, "grad_norm": 1.096818447113037, "learning_rate": 1.9572706200941036e-05, "loss": 0.472, "step": 1833 }, { "epoch": 0.2231822330392455, "grad_norm": 1.5718345642089844, "learning_rate": 1.9572141890526684e-05, "loss": 0.4515, "step": 1834 }, { "epoch": 0.22330392455126255, "grad_norm": 1.770283818244934, "learning_rate": 1.9571577215872518e-05, "loss": 0.4712, "step": 1835 }, { "epoch": 0.2234256160632796, "grad_norm": 1.5038691759109497, "learning_rate": 1.957101217700003e-05, "loss": 0.4915, "step": 1836 }, { "epoch": 0.22354730757529662, "grad_norm": 0.8830704689025879, "learning_rate": 1.9570446773930715e-05, "loss": 0.4567, "step": 1837 }, { "epoch": 0.22366899908731366, "grad_norm": 0.9205377697944641, "learning_rate": 1.956988100668609e-05, "loss": 0.4732, "step": 1838 }, { "epoch": 0.2237906905993307, "grad_norm": 3.763309955596924, "learning_rate": 1.9569314875287687e-05, "loss": 0.4171, "step": 1839 }, { "epoch": 0.22391238211134773, "grad_norm": 1.2489964962005615, "learning_rate": 1.956874837975704e-05, "loss": 0.4366, "step": 1840 }, { "epoch": 0.22403407362336478, "grad_norm": 0.7394869327545166, "learning_rate": 1.9568181520115717e-05, "loss": 0.4668, "step": 1841 }, { "epoch": 0.2241557651353818, "grad_norm": 0.8081273436546326, "learning_rate": 1.956761429638528e-05, "loss": 0.5224, "step": 1842 }, { "epoch": 0.22427745664739884, "grad_norm": 1.2300883531570435, "learning_rate": 1.9567046708587313e-05, "loss": 0.4521, "step": 1843 }, { "epoch": 0.2243991481594159, "grad_norm": 1.7750821113586426, "learning_rate": 1.956647875674342e-05, "loss": 0.3908, "step": 1844 }, { "epoch": 0.2245208396714329, "grad_norm": 1.9537758827209473, "learning_rate": 1.9565910440875203e-05, "loss": 0.4675, "step": 1845 }, { "epoch": 0.22464253118344996, "grad_norm": 1.7771425247192383, "learning_rate": 1.95653417610043e-05, "loss": 0.4941, "step": 1846 }, { "epoch": 0.224764222695467, "grad_norm": 4.661746025085449, "learning_rate": 1.956477271715234e-05, "loss": 0.5875, "step": 1847 }, { "epoch": 0.22488591420748402, "grad_norm": 0.7554677724838257, "learning_rate": 1.9564203309340982e-05, "loss": 0.4472, "step": 1848 }, { "epoch": 0.22500760571950107, "grad_norm": 0.7332402467727661, "learning_rate": 1.956363353759189e-05, "loss": 0.5146, "step": 1849 }, { "epoch": 0.2251292972315181, "grad_norm": 0.6872220635414124, "learning_rate": 1.9563063401926747e-05, "loss": 0.4928, "step": 1850 }, { "epoch": 0.22525098874353514, "grad_norm": 2.8405821323394775, "learning_rate": 1.9562492902367247e-05, "loss": 0.4289, "step": 1851 }, { "epoch": 0.22537268025555218, "grad_norm": 0.5601333975791931, "learning_rate": 1.9561922038935096e-05, "loss": 0.4823, "step": 1852 }, { "epoch": 0.2254943717675692, "grad_norm": 1.0897819995880127, "learning_rate": 1.9561350811652024e-05, "loss": 0.4858, "step": 1853 }, { "epoch": 0.22561606327958625, "grad_norm": 0.6712708473205566, "learning_rate": 1.956077922053976e-05, "loss": 0.4965, "step": 1854 }, { "epoch": 0.2257377547916033, "grad_norm": 2.067476987838745, "learning_rate": 1.9560207265620058e-05, "loss": 0.4242, "step": 1855 }, { "epoch": 0.22585944630362031, "grad_norm": 0.992720901966095, "learning_rate": 1.955963494691468e-05, "loss": 0.4779, "step": 1856 }, { "epoch": 0.22598113781563736, "grad_norm": 2.4270431995391846, "learning_rate": 1.9559062264445404e-05, "loss": 0.5383, "step": 1857 }, { "epoch": 0.2261028293276544, "grad_norm": 0.9442155957221985, "learning_rate": 1.9558489218234023e-05, "loss": 0.4629, "step": 1858 }, { "epoch": 0.22622452083967143, "grad_norm": 1.953385829925537, "learning_rate": 1.9557915808302344e-05, "loss": 0.4717, "step": 1859 }, { "epoch": 0.22634621235168847, "grad_norm": 3.446536064147949, "learning_rate": 1.9557342034672184e-05, "loss": 0.5155, "step": 1860 }, { "epoch": 0.2264679038637055, "grad_norm": 2.581662654876709, "learning_rate": 1.9556767897365375e-05, "loss": 0.3887, "step": 1861 }, { "epoch": 0.22658959537572254, "grad_norm": 0.994106113910675, "learning_rate": 1.955619339640377e-05, "loss": 0.4577, "step": 1862 }, { "epoch": 0.2267112868877396, "grad_norm": 1.1811777353286743, "learning_rate": 1.9555618531809225e-05, "loss": 0.4742, "step": 1863 }, { "epoch": 0.2268329783997566, "grad_norm": 0.8801113963127136, "learning_rate": 1.9555043303603614e-05, "loss": 0.4302, "step": 1864 }, { "epoch": 0.22695466991177365, "grad_norm": 1.5900946855545044, "learning_rate": 1.955446771180883e-05, "loss": 0.4826, "step": 1865 }, { "epoch": 0.2270763614237907, "grad_norm": 4.646539688110352, "learning_rate": 1.955389175644677e-05, "loss": 0.5922, "step": 1866 }, { "epoch": 0.22719805293580772, "grad_norm": 0.7682361602783203, "learning_rate": 1.9553315437539354e-05, "loss": 0.4721, "step": 1867 }, { "epoch": 0.22731974444782477, "grad_norm": 0.7815783619880676, "learning_rate": 1.955273875510851e-05, "loss": 0.4761, "step": 1868 }, { "epoch": 0.22744143595984181, "grad_norm": 2.1399126052856445, "learning_rate": 1.9552161709176186e-05, "loss": 0.4418, "step": 1869 }, { "epoch": 0.22756312747185883, "grad_norm": 1.1615606546401978, "learning_rate": 1.9551584299764337e-05, "loss": 0.4836, "step": 1870 }, { "epoch": 0.22768481898387588, "grad_norm": 1.3012791872024536, "learning_rate": 1.9551006526894937e-05, "loss": 0.4472, "step": 1871 }, { "epoch": 0.2278065104958929, "grad_norm": 1.6008267402648926, "learning_rate": 1.9550428390589963e-05, "loss": 0.5028, "step": 1872 }, { "epoch": 0.22792820200790995, "grad_norm": 1.3280154466629028, "learning_rate": 1.9549849890871423e-05, "loss": 0.5007, "step": 1873 }, { "epoch": 0.228049893519927, "grad_norm": 2.204503059387207, "learning_rate": 1.954927102776133e-05, "loss": 0.4493, "step": 1874 }, { "epoch": 0.228171585031944, "grad_norm": 0.6567058563232422, "learning_rate": 1.9548691801281706e-05, "loss": 0.5283, "step": 1875 }, { "epoch": 0.22829327654396106, "grad_norm": 0.6983780264854431, "learning_rate": 1.9548112211454592e-05, "loss": 0.4797, "step": 1876 }, { "epoch": 0.2284149680559781, "grad_norm": 1.5089805126190186, "learning_rate": 1.954753225830205e-05, "loss": 0.5219, "step": 1877 }, { "epoch": 0.22853665956799513, "grad_norm": 3.4219298362731934, "learning_rate": 1.954695194184614e-05, "loss": 0.4544, "step": 1878 }, { "epoch": 0.22865835108001217, "grad_norm": 2.7527565956115723, "learning_rate": 1.954637126210895e-05, "loss": 0.4123, "step": 1879 }, { "epoch": 0.2287800425920292, "grad_norm": 0.8203837275505066, "learning_rate": 1.954579021911257e-05, "loss": 0.459, "step": 1880 }, { "epoch": 0.22890173410404624, "grad_norm": 0.6884888410568237, "learning_rate": 1.9545208812879114e-05, "loss": 0.4456, "step": 1881 }, { "epoch": 0.2290234256160633, "grad_norm": 0.9354062676429749, "learning_rate": 1.9544627043430706e-05, "loss": 0.4658, "step": 1882 }, { "epoch": 0.2291451171280803, "grad_norm": 2.0113630294799805, "learning_rate": 1.9544044910789485e-05, "loss": 0.5228, "step": 1883 }, { "epoch": 0.22926680864009735, "grad_norm": 1.7827974557876587, "learning_rate": 1.95434624149776e-05, "loss": 0.5069, "step": 1884 }, { "epoch": 0.2293885001521144, "grad_norm": 1.4230190515518188, "learning_rate": 1.9542879556017212e-05, "loss": 0.4563, "step": 1885 }, { "epoch": 0.22951019166413142, "grad_norm": 2.9803624153137207, "learning_rate": 1.9542296333930508e-05, "loss": 0.5347, "step": 1886 }, { "epoch": 0.22963188317614847, "grad_norm": 1.2775452136993408, "learning_rate": 1.9541712748739675e-05, "loss": 0.4578, "step": 1887 }, { "epoch": 0.2297535746881655, "grad_norm": 2.397963285446167, "learning_rate": 1.9541128800466923e-05, "loss": 0.4792, "step": 1888 }, { "epoch": 0.22987526620018253, "grad_norm": 2.783512830734253, "learning_rate": 1.954054448913447e-05, "loss": 0.4446, "step": 1889 }, { "epoch": 0.22999695771219958, "grad_norm": 0.6921996474266052, "learning_rate": 1.9539959814764553e-05, "loss": 0.4802, "step": 1890 }, { "epoch": 0.2301186492242166, "grad_norm": 2.020805835723877, "learning_rate": 1.9539374777379416e-05, "loss": 0.5665, "step": 1891 }, { "epoch": 0.23024034073623365, "grad_norm": 1.0037791728973389, "learning_rate": 1.9538789377001324e-05, "loss": 0.4838, "step": 1892 }, { "epoch": 0.2303620322482507, "grad_norm": 1.7108644247055054, "learning_rate": 1.9538203613652555e-05, "loss": 0.48, "step": 1893 }, { "epoch": 0.2304837237602677, "grad_norm": 2.821938991546631, "learning_rate": 1.9537617487355393e-05, "loss": 0.4187, "step": 1894 }, { "epoch": 0.23060541527228476, "grad_norm": 0.7069106101989746, "learning_rate": 1.9537030998132144e-05, "loss": 0.453, "step": 1895 }, { "epoch": 0.2307271067843018, "grad_norm": 1.8053488731384277, "learning_rate": 1.953644414600512e-05, "loss": 0.4792, "step": 1896 }, { "epoch": 0.23084879829631882, "grad_norm": 2.3494744300842285, "learning_rate": 1.9535856930996664e-05, "loss": 0.3955, "step": 1897 }, { "epoch": 0.23097048980833587, "grad_norm": 3.206209421157837, "learning_rate": 1.953526935312911e-05, "loss": 0.4974, "step": 1898 }, { "epoch": 0.23109218132035292, "grad_norm": 3.2301957607269287, "learning_rate": 1.9534681412424824e-05, "loss": 0.5038, "step": 1899 }, { "epoch": 0.23121387283236994, "grad_norm": 1.8758232593536377, "learning_rate": 1.953409310890617e-05, "loss": 0.5013, "step": 1900 }, { "epoch": 0.23133556434438698, "grad_norm": 0.5013694167137146, "learning_rate": 1.953350444259554e-05, "loss": 0.4573, "step": 1901 }, { "epoch": 0.231457255856404, "grad_norm": 2.4453580379486084, "learning_rate": 1.9532915413515337e-05, "loss": 0.5048, "step": 1902 }, { "epoch": 0.23157894736842105, "grad_norm": 1.0904576778411865, "learning_rate": 1.9532326021687967e-05, "loss": 0.5322, "step": 1903 }, { "epoch": 0.2317006388804381, "grad_norm": 3.3153820037841797, "learning_rate": 1.9531736267135858e-05, "loss": 0.4781, "step": 1904 }, { "epoch": 0.23182233039245512, "grad_norm": 5.554010391235352, "learning_rate": 1.9531146149881456e-05, "loss": 0.503, "step": 1905 }, { "epoch": 0.23194402190447216, "grad_norm": 7.145920276641846, "learning_rate": 1.953055566994722e-05, "loss": 0.5097, "step": 1906 }, { "epoch": 0.2320657134164892, "grad_norm": 5.55267333984375, "learning_rate": 1.952996482735561e-05, "loss": 0.5018, "step": 1907 }, { "epoch": 0.23218740492850623, "grad_norm": 5.191817760467529, "learning_rate": 1.9529373622129107e-05, "loss": 0.5197, "step": 1908 }, { "epoch": 0.23230909644052328, "grad_norm": 1.8097574710845947, "learning_rate": 1.952878205429022e-05, "loss": 0.4605, "step": 1909 }, { "epoch": 0.2324307879525403, "grad_norm": 1.145939588546753, "learning_rate": 1.952819012386145e-05, "loss": 0.4898, "step": 1910 }, { "epoch": 0.23255247946455734, "grad_norm": 1.1284998655319214, "learning_rate": 1.9527597830865325e-05, "loss": 0.4932, "step": 1911 }, { "epoch": 0.2326741709765744, "grad_norm": 1.0728673934936523, "learning_rate": 1.952700517532438e-05, "loss": 0.4367, "step": 1912 }, { "epoch": 0.2327958624885914, "grad_norm": 1.984288215637207, "learning_rate": 1.952641215726117e-05, "loss": 0.4561, "step": 1913 }, { "epoch": 0.23291755400060846, "grad_norm": 2.4089512825012207, "learning_rate": 1.9525818776698257e-05, "loss": 0.4925, "step": 1914 }, { "epoch": 0.2330392455126255, "grad_norm": 2.184720993041992, "learning_rate": 1.9525225033658222e-05, "loss": 0.4942, "step": 1915 }, { "epoch": 0.23316093702464252, "grad_norm": 2.7158260345458984, "learning_rate": 1.952463092816366e-05, "loss": 0.502, "step": 1916 }, { "epoch": 0.23328262853665957, "grad_norm": 0.6456950306892395, "learning_rate": 1.952403646023717e-05, "loss": 0.4662, "step": 1917 }, { "epoch": 0.23340432004867662, "grad_norm": 1.2357690334320068, "learning_rate": 1.952344162990139e-05, "loss": 0.4933, "step": 1918 }, { "epoch": 0.23352601156069364, "grad_norm": 2.7533791065216064, "learning_rate": 1.9522846437178933e-05, "loss": 0.4568, "step": 1919 }, { "epoch": 0.23364770307271068, "grad_norm": 0.5824018120765686, "learning_rate": 1.9522250882092465e-05, "loss": 0.4831, "step": 1920 }, { "epoch": 0.2337693945847277, "grad_norm": 2.744816780090332, "learning_rate": 1.952165496466464e-05, "loss": 0.4405, "step": 1921 }, { "epoch": 0.23389108609674475, "grad_norm": 1.5789531469345093, "learning_rate": 1.9521058684918133e-05, "loss": 0.4395, "step": 1922 }, { "epoch": 0.2340127776087618, "grad_norm": 3.622791051864624, "learning_rate": 1.9520462042875635e-05, "loss": 0.5167, "step": 1923 }, { "epoch": 0.23413446912077882, "grad_norm": 0.9125657081604004, "learning_rate": 1.951986503855985e-05, "loss": 0.4775, "step": 1924 }, { "epoch": 0.23425616063279586, "grad_norm": 1.3564062118530273, "learning_rate": 1.9519267671993498e-05, "loss": 0.4726, "step": 1925 }, { "epoch": 0.2343778521448129, "grad_norm": 1.0720938444137573, "learning_rate": 1.9518669943199303e-05, "loss": 0.4711, "step": 1926 }, { "epoch": 0.23449954365682993, "grad_norm": 2.363069772720337, "learning_rate": 1.9518071852200017e-05, "loss": 0.4189, "step": 1927 }, { "epoch": 0.23462123516884698, "grad_norm": 0.6921089887619019, "learning_rate": 1.9517473399018397e-05, "loss": 0.4886, "step": 1928 }, { "epoch": 0.23474292668086402, "grad_norm": 0.6319681406021118, "learning_rate": 1.951687458367721e-05, "loss": 0.5, "step": 1929 }, { "epoch": 0.23486461819288104, "grad_norm": 2.0676770210266113, "learning_rate": 1.951627540619925e-05, "loss": 0.4935, "step": 1930 }, { "epoch": 0.2349863097048981, "grad_norm": 0.8923307061195374, "learning_rate": 1.951567586660731e-05, "loss": 0.4506, "step": 1931 }, { "epoch": 0.2351080012169151, "grad_norm": 1.6264599561691284, "learning_rate": 1.951507596492421e-05, "loss": 0.5304, "step": 1932 }, { "epoch": 0.23522969272893215, "grad_norm": 1.3363028764724731, "learning_rate": 1.9514475701172775e-05, "loss": 0.5183, "step": 1933 }, { "epoch": 0.2353513842409492, "grad_norm": 1.9410772323608398, "learning_rate": 1.951387507537584e-05, "loss": 0.4618, "step": 1934 }, { "epoch": 0.23547307575296622, "grad_norm": 0.6874043941497803, "learning_rate": 1.951327408755627e-05, "loss": 0.4997, "step": 1935 }, { "epoch": 0.23559476726498327, "grad_norm": 0.8465669751167297, "learning_rate": 1.9512672737736932e-05, "loss": 0.4785, "step": 1936 }, { "epoch": 0.23571645877700032, "grad_norm": 0.6066112518310547, "learning_rate": 1.9512071025940702e-05, "loss": 0.4777, "step": 1937 }, { "epoch": 0.23583815028901733, "grad_norm": 1.6517739295959473, "learning_rate": 1.9511468952190482e-05, "loss": 0.4206, "step": 1938 }, { "epoch": 0.23595984180103438, "grad_norm": 2.3720359802246094, "learning_rate": 1.951086651650918e-05, "loss": 0.3877, "step": 1939 }, { "epoch": 0.23608153331305143, "grad_norm": 3.838566780090332, "learning_rate": 1.9510263718919723e-05, "loss": 0.5187, "step": 1940 }, { "epoch": 0.23620322482506845, "grad_norm": 0.7764478921890259, "learning_rate": 1.9509660559445042e-05, "loss": 0.4338, "step": 1941 }, { "epoch": 0.2363249163370855, "grad_norm": 0.8405648469924927, "learning_rate": 1.9509057038108095e-05, "loss": 0.4133, "step": 1942 }, { "epoch": 0.2364466078491025, "grad_norm": 2.0843472480773926, "learning_rate": 1.950845315493185e-05, "loss": 0.4736, "step": 1943 }, { "epoch": 0.23656829936111956, "grad_norm": 1.8424642086029053, "learning_rate": 1.9507848909939273e-05, "loss": 0.4793, "step": 1944 }, { "epoch": 0.2366899908731366, "grad_norm": 0.6707680821418762, "learning_rate": 1.950724430315337e-05, "loss": 0.4889, "step": 1945 }, { "epoch": 0.23681168238515363, "grad_norm": 2.9982569217681885, "learning_rate": 1.9506639334597137e-05, "loss": 0.4055, "step": 1946 }, { "epoch": 0.23693337389717067, "grad_norm": 1.0548427104949951, "learning_rate": 1.9506034004293606e-05, "loss": 0.486, "step": 1947 }, { "epoch": 0.23705506540918772, "grad_norm": 0.7568971514701843, "learning_rate": 1.9505428312265802e-05, "loss": 0.4748, "step": 1948 }, { "epoch": 0.23717675692120474, "grad_norm": 1.1806089878082275, "learning_rate": 1.9504822258536773e-05, "loss": 0.4741, "step": 1949 }, { "epoch": 0.2372984484332218, "grad_norm": 1.6513252258300781, "learning_rate": 1.9504215843129585e-05, "loss": 0.4595, "step": 1950 }, { "epoch": 0.2374201399452388, "grad_norm": 1.5319706201553345, "learning_rate": 1.9503609066067315e-05, "loss": 0.4736, "step": 1951 }, { "epoch": 0.23754183145725585, "grad_norm": 1.2497912645339966, "learning_rate": 1.9503001927373045e-05, "loss": 0.4246, "step": 1952 }, { "epoch": 0.2376635229692729, "grad_norm": 1.8263548612594604, "learning_rate": 1.950239442706988e-05, "loss": 0.4915, "step": 1953 }, { "epoch": 0.23778521448128992, "grad_norm": 4.213039398193359, "learning_rate": 1.9501786565180944e-05, "loss": 0.5669, "step": 1954 }, { "epoch": 0.23790690599330697, "grad_norm": 1.3360931873321533, "learning_rate": 1.9501178341729356e-05, "loss": 0.4602, "step": 1955 }, { "epoch": 0.238028597505324, "grad_norm": 2.0655064582824707, "learning_rate": 1.950056975673827e-05, "loss": 0.4722, "step": 1956 }, { "epoch": 0.23815028901734103, "grad_norm": 0.5991892218589783, "learning_rate": 1.9499960810230836e-05, "loss": 0.4838, "step": 1957 }, { "epoch": 0.23827198052935808, "grad_norm": 1.3843307495117188, "learning_rate": 1.949935150223023e-05, "loss": 0.4921, "step": 1958 }, { "epoch": 0.23839367204137513, "grad_norm": 2.5422253608703613, "learning_rate": 1.9498741832759638e-05, "loss": 0.4749, "step": 1959 }, { "epoch": 0.23851536355339215, "grad_norm": 5.866820335388184, "learning_rate": 1.9498131801842256e-05, "loss": 0.4401, "step": 1960 }, { "epoch": 0.2386370550654092, "grad_norm": 1.6477247476577759, "learning_rate": 1.9497521409501302e-05, "loss": 0.4534, "step": 1961 }, { "epoch": 0.2387587465774262, "grad_norm": 1.0300894975662231, "learning_rate": 1.9496910655759996e-05, "loss": 0.4297, "step": 1962 }, { "epoch": 0.23888043808944326, "grad_norm": 3.3744678497314453, "learning_rate": 1.9496299540641586e-05, "loss": 0.5299, "step": 1963 }, { "epoch": 0.2390021296014603, "grad_norm": 1.0261774063110352, "learning_rate": 1.949568806416932e-05, "loss": 0.4944, "step": 1964 }, { "epoch": 0.23912382111347733, "grad_norm": 0.6185161471366882, "learning_rate": 1.949507622636647e-05, "loss": 0.4847, "step": 1965 }, { "epoch": 0.23924551262549437, "grad_norm": 1.6548773050308228, "learning_rate": 1.9494464027256313e-05, "loss": 0.4379, "step": 1966 }, { "epoch": 0.23936720413751142, "grad_norm": 2.1984941959381104, "learning_rate": 1.9493851466862147e-05, "loss": 0.4849, "step": 1967 }, { "epoch": 0.23948889564952844, "grad_norm": 1.665209174156189, "learning_rate": 1.9493238545207284e-05, "loss": 0.4208, "step": 1968 }, { "epoch": 0.23961058716154549, "grad_norm": 1.1189358234405518, "learning_rate": 1.9492625262315044e-05, "loss": 0.506, "step": 1969 }, { "epoch": 0.23973227867356253, "grad_norm": 2.0177454948425293, "learning_rate": 1.9492011618208764e-05, "loss": 0.4045, "step": 1970 }, { "epoch": 0.23985397018557955, "grad_norm": 2.4533140659332275, "learning_rate": 1.9491397612911793e-05, "loss": 0.5428, "step": 1971 }, { "epoch": 0.2399756616975966, "grad_norm": 2.858595371246338, "learning_rate": 1.94907832464475e-05, "loss": 0.5619, "step": 1972 }, { "epoch": 0.24009735320961362, "grad_norm": 0.5678976774215698, "learning_rate": 1.9490168518839255e-05, "loss": 0.473, "step": 1973 }, { "epoch": 0.24021904472163066, "grad_norm": 1.6049307584762573, "learning_rate": 1.9489553430110458e-05, "loss": 0.4734, "step": 1974 }, { "epoch": 0.2403407362336477, "grad_norm": 2.4758260250091553, "learning_rate": 1.9488937980284508e-05, "loss": 0.4305, "step": 1975 }, { "epoch": 0.24046242774566473, "grad_norm": 1.9499974250793457, "learning_rate": 1.948832216938483e-05, "loss": 0.4245, "step": 1976 }, { "epoch": 0.24058411925768178, "grad_norm": 1.9068377017974854, "learning_rate": 1.948770599743485e-05, "loss": 0.5296, "step": 1977 }, { "epoch": 0.24070581076969882, "grad_norm": 0.6975396275520325, "learning_rate": 1.948708946445802e-05, "loss": 0.4464, "step": 1978 }, { "epoch": 0.24082750228171584, "grad_norm": 4.860745429992676, "learning_rate": 1.94864725704778e-05, "loss": 0.5933, "step": 1979 }, { "epoch": 0.2409491937937329, "grad_norm": 0.5379089117050171, "learning_rate": 1.948585531551766e-05, "loss": 0.4426, "step": 1980 }, { "epoch": 0.2410708853057499, "grad_norm": 1.878548502922058, "learning_rate": 1.9485237699601095e-05, "loss": 0.4766, "step": 1981 }, { "epoch": 0.24119257681776696, "grad_norm": 1.4302805662155151, "learning_rate": 1.9484619722751596e-05, "loss": 0.4927, "step": 1982 }, { "epoch": 0.241314268329784, "grad_norm": 1.1551105976104736, "learning_rate": 1.948400138499269e-05, "loss": 0.5121, "step": 1983 }, { "epoch": 0.24143595984180102, "grad_norm": 2.576261043548584, "learning_rate": 1.9483382686347898e-05, "loss": 0.4855, "step": 1984 }, { "epoch": 0.24155765135381807, "grad_norm": 1.2556712627410889, "learning_rate": 1.9482763626840767e-05, "loss": 0.5142, "step": 1985 }, { "epoch": 0.24167934286583512, "grad_norm": 1.447229027748108, "learning_rate": 1.948214420649485e-05, "loss": 0.5148, "step": 1986 }, { "epoch": 0.24180103437785214, "grad_norm": 5.459035396575928, "learning_rate": 1.9481524425333717e-05, "loss": 0.4225, "step": 1987 }, { "epoch": 0.24192272588986918, "grad_norm": 2.6113619804382324, "learning_rate": 1.948090428338096e-05, "loss": 0.5247, "step": 1988 }, { "epoch": 0.24204441740188623, "grad_norm": 2.7069947719573975, "learning_rate": 1.9480283780660164e-05, "loss": 0.4035, "step": 1989 }, { "epoch": 0.24216610891390325, "grad_norm": 1.63068687915802, "learning_rate": 1.947966291719495e-05, "loss": 0.467, "step": 1990 }, { "epoch": 0.2422878004259203, "grad_norm": 3.6869382858276367, "learning_rate": 1.9479041693008944e-05, "loss": 0.5476, "step": 1991 }, { "epoch": 0.24240949193793732, "grad_norm": 3.250044584274292, "learning_rate": 1.9478420108125773e-05, "loss": 0.5629, "step": 1992 }, { "epoch": 0.24253118344995436, "grad_norm": 3.5663516521453857, "learning_rate": 1.9477798162569105e-05, "loss": 0.4127, "step": 1993 }, { "epoch": 0.2426528749619714, "grad_norm": 2.670294761657715, "learning_rate": 1.9477175856362597e-05, "loss": 0.4251, "step": 1994 }, { "epoch": 0.24277456647398843, "grad_norm": 4.858532905578613, "learning_rate": 1.9476553189529928e-05, "loss": 0.4047, "step": 1995 }, { "epoch": 0.24289625798600548, "grad_norm": 0.7859447002410889, "learning_rate": 1.9475930162094797e-05, "loss": 0.4906, "step": 1996 }, { "epoch": 0.24301794949802252, "grad_norm": 0.641616702079773, "learning_rate": 1.9475306774080906e-05, "loss": 0.4803, "step": 1997 }, { "epoch": 0.24313964101003954, "grad_norm": 2.0165393352508545, "learning_rate": 1.9474683025511984e-05, "loss": 0.4235, "step": 1998 }, { "epoch": 0.2432613325220566, "grad_norm": 0.6107766032218933, "learning_rate": 1.947405891641176e-05, "loss": 0.4807, "step": 1999 }, { "epoch": 0.24338302403407364, "grad_norm": 1.1288394927978516, "learning_rate": 1.9473434446803983e-05, "loss": 0.4626, "step": 2000 }, { "epoch": 0.24350471554609066, "grad_norm": 0.6809804439544678, "learning_rate": 1.9472809616712416e-05, "loss": 0.4509, "step": 2001 }, { "epoch": 0.2436264070581077, "grad_norm": 3.5753886699676514, "learning_rate": 1.947218442616084e-05, "loss": 0.5296, "step": 2002 }, { "epoch": 0.24374809857012472, "grad_norm": 0.5915960073471069, "learning_rate": 1.9471558875173032e-05, "loss": 0.4311, "step": 2003 }, { "epoch": 0.24386979008214177, "grad_norm": 3.084946870803833, "learning_rate": 1.947093296377281e-05, "loss": 0.5147, "step": 2004 }, { "epoch": 0.24399148159415882, "grad_norm": 0.6248292922973633, "learning_rate": 1.9470306691983985e-05, "loss": 0.4607, "step": 2005 }, { "epoch": 0.24411317310617583, "grad_norm": 2.3376266956329346, "learning_rate": 1.9469680059830385e-05, "loss": 0.5279, "step": 2006 }, { "epoch": 0.24423486461819288, "grad_norm": 1.9937899112701416, "learning_rate": 1.9469053067335858e-05, "loss": 0.4314, "step": 2007 }, { "epoch": 0.24435655613020993, "grad_norm": 1.7855887413024902, "learning_rate": 1.9468425714524265e-05, "loss": 0.5054, "step": 2008 }, { "epoch": 0.24447824764222695, "grad_norm": 2.464897871017456, "learning_rate": 1.9467798001419473e-05, "loss": 0.4798, "step": 2009 }, { "epoch": 0.244599939154244, "grad_norm": 1.9896260499954224, "learning_rate": 1.9467169928045368e-05, "loss": 0.4576, "step": 2010 }, { "epoch": 0.24472163066626104, "grad_norm": 2.3477017879486084, "learning_rate": 1.9466541494425853e-05, "loss": 0.4582, "step": 2011 }, { "epoch": 0.24484332217827806, "grad_norm": 1.1433109045028687, "learning_rate": 1.946591270058484e-05, "loss": 0.4259, "step": 2012 }, { "epoch": 0.2449650136902951, "grad_norm": 3.811953544616699, "learning_rate": 1.9465283546546256e-05, "loss": 0.5063, "step": 2013 }, { "epoch": 0.24508670520231213, "grad_norm": 3.000900983810425, "learning_rate": 1.9464654032334037e-05, "loss": 0.4729, "step": 2014 }, { "epoch": 0.24520839671432917, "grad_norm": 4.58302640914917, "learning_rate": 1.9464024157972147e-05, "loss": 0.5203, "step": 2015 }, { "epoch": 0.24533008822634622, "grad_norm": 3.0696282386779785, "learning_rate": 1.9463393923484543e-05, "loss": 0.4356, "step": 2016 }, { "epoch": 0.24545177973836324, "grad_norm": 3.5197534561157227, "learning_rate": 1.9462763328895214e-05, "loss": 0.489, "step": 2017 }, { "epoch": 0.2455734712503803, "grad_norm": 2.1193952560424805, "learning_rate": 1.9462132374228154e-05, "loss": 0.4487, "step": 2018 }, { "epoch": 0.24569516276239733, "grad_norm": 0.969070315361023, "learning_rate": 1.946150105950737e-05, "loss": 0.4758, "step": 2019 }, { "epoch": 0.24581685427441435, "grad_norm": 1.8888543844223022, "learning_rate": 1.946086938475689e-05, "loss": 0.4576, "step": 2020 }, { "epoch": 0.2459385457864314, "grad_norm": 5.966174602508545, "learning_rate": 1.9460237350000744e-05, "loss": 0.4188, "step": 2021 }, { "epoch": 0.24606023729844842, "grad_norm": 4.978442668914795, "learning_rate": 1.945960495526299e-05, "loss": 0.4571, "step": 2022 }, { "epoch": 0.24618192881046547, "grad_norm": 4.704474449157715, "learning_rate": 1.945897220056768e-05, "loss": 0.4063, "step": 2023 }, { "epoch": 0.2463036203224825, "grad_norm": 0.8123753070831299, "learning_rate": 1.9458339085938902e-05, "loss": 0.5244, "step": 2024 }, { "epoch": 0.24642531183449953, "grad_norm": 0.9004320502281189, "learning_rate": 1.9457705611400747e-05, "loss": 0.4622, "step": 2025 }, { "epoch": 0.24654700334651658, "grad_norm": 2.759958028793335, "learning_rate": 1.945707177697731e-05, "loss": 0.4771, "step": 2026 }, { "epoch": 0.24666869485853363, "grad_norm": 2.455812692642212, "learning_rate": 1.945643758269272e-05, "loss": 0.5234, "step": 2027 }, { "epoch": 0.24679038637055065, "grad_norm": 0.7070157527923584, "learning_rate": 1.9455803028571108e-05, "loss": 0.4688, "step": 2028 }, { "epoch": 0.2469120778825677, "grad_norm": 1.804274320602417, "learning_rate": 1.945516811463662e-05, "loss": 0.4572, "step": 2029 }, { "epoch": 0.24703376939458474, "grad_norm": 1.191839575767517, "learning_rate": 1.945453284091341e-05, "loss": 0.5136, "step": 2030 }, { "epoch": 0.24715546090660176, "grad_norm": 3.599769115447998, "learning_rate": 1.945389720742566e-05, "loss": 0.5154, "step": 2031 }, { "epoch": 0.2472771524186188, "grad_norm": 1.8192546367645264, "learning_rate": 1.9453261214197548e-05, "loss": 0.4857, "step": 2032 }, { "epoch": 0.24739884393063583, "grad_norm": 2.1078298091888428, "learning_rate": 1.945262486125328e-05, "loss": 0.4766, "step": 2033 }, { "epoch": 0.24752053544265287, "grad_norm": 1.5302866697311401, "learning_rate": 1.9451988148617077e-05, "loss": 0.4294, "step": 2034 }, { "epoch": 0.24764222695466992, "grad_norm": 0.7828501462936401, "learning_rate": 1.9451351076313153e-05, "loss": 0.4909, "step": 2035 }, { "epoch": 0.24776391846668694, "grad_norm": 1.9352378845214844, "learning_rate": 1.9450713644365758e-05, "loss": 0.5069, "step": 2036 }, { "epoch": 0.24788560997870399, "grad_norm": 2.0285608768463135, "learning_rate": 1.9450075852799148e-05, "loss": 0.4491, "step": 2037 }, { "epoch": 0.24800730149072103, "grad_norm": 1.0679059028625488, "learning_rate": 1.9449437701637596e-05, "loss": 0.4988, "step": 2038 }, { "epoch": 0.24812899300273805, "grad_norm": 2.314944267272949, "learning_rate": 1.9448799190905373e-05, "loss": 0.4612, "step": 2039 }, { "epoch": 0.2482506845147551, "grad_norm": 1.9832383394241333, "learning_rate": 1.9448160320626787e-05, "loss": 0.4437, "step": 2040 }, { "epoch": 0.24837237602677215, "grad_norm": 2.0206549167633057, "learning_rate": 1.9447521090826144e-05, "loss": 0.5113, "step": 2041 }, { "epoch": 0.24849406753878917, "grad_norm": 1.4016667604446411, "learning_rate": 1.9446881501527764e-05, "loss": 0.5089, "step": 2042 }, { "epoch": 0.2486157590508062, "grad_norm": 2.2301433086395264, "learning_rate": 1.9446241552755994e-05, "loss": 0.4546, "step": 2043 }, { "epoch": 0.24873745056282323, "grad_norm": 1.0734565258026123, "learning_rate": 1.9445601244535178e-05, "loss": 0.4469, "step": 2044 }, { "epoch": 0.24885914207484028, "grad_norm": 0.7326973676681519, "learning_rate": 1.9444960576889683e-05, "loss": 0.4469, "step": 2045 }, { "epoch": 0.24898083358685733, "grad_norm": 0.9987289905548096, "learning_rate": 1.9444319549843887e-05, "loss": 0.4473, "step": 2046 }, { "epoch": 0.24910252509887434, "grad_norm": 2.1177191734313965, "learning_rate": 1.9443678163422188e-05, "loss": 0.5367, "step": 2047 }, { "epoch": 0.2492242166108914, "grad_norm": 0.7155449986457825, "learning_rate": 1.9443036417648983e-05, "loss": 0.4839, "step": 2048 }, { "epoch": 0.24934590812290844, "grad_norm": 0.7096008658409119, "learning_rate": 1.94423943125487e-05, "loss": 0.4979, "step": 2049 }, { "epoch": 0.24946759963492546, "grad_norm": 1.0529060363769531, "learning_rate": 1.9441751848145766e-05, "loss": 0.4686, "step": 2050 }, { "epoch": 0.2495892911469425, "grad_norm": 0.7185301184654236, "learning_rate": 1.9441109024464633e-05, "loss": 0.4914, "step": 2051 }, { "epoch": 0.24971098265895952, "grad_norm": 0.927126944065094, "learning_rate": 1.9440465841529756e-05, "loss": 0.5258, "step": 2052 }, { "epoch": 0.24983267417097657, "grad_norm": 0.8160197734832764, "learning_rate": 1.9439822299365614e-05, "loss": 0.5177, "step": 2053 }, { "epoch": 0.24995436568299362, "grad_norm": 1.8633160591125488, "learning_rate": 1.9439178397996697e-05, "loss": 0.5176, "step": 2054 }, { "epoch": 0.25007605719501064, "grad_norm": 0.8045934438705444, "learning_rate": 1.9438534137447504e-05, "loss": 0.4727, "step": 2055 }, { "epoch": 0.2501977487070277, "grad_norm": 0.9101488590240479, "learning_rate": 1.943788951774255e-05, "loss": 0.4853, "step": 2056 }, { "epoch": 0.25031944021904473, "grad_norm": 1.4784225225448608, "learning_rate": 1.943724453890636e-05, "loss": 0.4963, "step": 2057 }, { "epoch": 0.25044113173106175, "grad_norm": 0.8246391415596008, "learning_rate": 1.9436599200963483e-05, "loss": 0.5007, "step": 2058 }, { "epoch": 0.25056282324307877, "grad_norm": 3.6727147102355957, "learning_rate": 1.9435953503938477e-05, "loss": 0.4572, "step": 2059 }, { "epoch": 0.25068451475509584, "grad_norm": 0.6610450148582458, "learning_rate": 1.9435307447855907e-05, "loss": 0.5188, "step": 2060 }, { "epoch": 0.25080620626711286, "grad_norm": 3.301255464553833, "learning_rate": 1.943466103274036e-05, "loss": 0.4586, "step": 2061 }, { "epoch": 0.2509278977791299, "grad_norm": 0.9013835191726685, "learning_rate": 1.943401425861643e-05, "loss": 0.5056, "step": 2062 }, { "epoch": 0.25104958929114696, "grad_norm": 1.0015555620193481, "learning_rate": 1.943336712550873e-05, "loss": 0.5238, "step": 2063 }, { "epoch": 0.251171280803164, "grad_norm": 1.1146483421325684, "learning_rate": 1.9432719633441887e-05, "loss": 0.4374, "step": 2064 }, { "epoch": 0.251292972315181, "grad_norm": 0.6692948937416077, "learning_rate": 1.9432071782440536e-05, "loss": 0.4511, "step": 2065 }, { "epoch": 0.25141466382719807, "grad_norm": 2.12496018409729, "learning_rate": 1.943142357252933e-05, "loss": 0.3818, "step": 2066 }, { "epoch": 0.2515363553392151, "grad_norm": 3.132035732269287, "learning_rate": 1.9430775003732934e-05, "loss": 0.4566, "step": 2067 }, { "epoch": 0.2516580468512321, "grad_norm": 0.588349461555481, "learning_rate": 1.943012607607603e-05, "loss": 0.4216, "step": 2068 }, { "epoch": 0.2517797383632492, "grad_norm": 3.744241714477539, "learning_rate": 1.942947678958331e-05, "loss": 0.5176, "step": 2069 }, { "epoch": 0.2519014298752662, "grad_norm": 0.6484661102294922, "learning_rate": 1.9428827144279482e-05, "loss": 0.4388, "step": 2070 }, { "epoch": 0.2520231213872832, "grad_norm": 4.514074325561523, "learning_rate": 1.942817714018926e-05, "loss": 0.5645, "step": 2071 }, { "epoch": 0.2521448128993003, "grad_norm": 0.5719945430755615, "learning_rate": 1.9427526777337383e-05, "loss": 0.45, "step": 2072 }, { "epoch": 0.2522665044113173, "grad_norm": 3.7293598651885986, "learning_rate": 1.94268760557486e-05, "loss": 0.4308, "step": 2073 }, { "epoch": 0.25238819592333434, "grad_norm": 0.8145673274993896, "learning_rate": 1.942622497544767e-05, "loss": 0.5112, "step": 2074 }, { "epoch": 0.2525098874353514, "grad_norm": 0.6826562881469727, "learning_rate": 1.942557353645937e-05, "loss": 0.5207, "step": 2075 }, { "epoch": 0.25263157894736843, "grad_norm": 0.5564038157463074, "learning_rate": 1.9424921738808488e-05, "loss": 0.4955, "step": 2076 }, { "epoch": 0.25275327045938545, "grad_norm": 1.6042362451553345, "learning_rate": 1.9424269582519823e-05, "loss": 0.4865, "step": 2077 }, { "epoch": 0.25287496197140247, "grad_norm": 2.475109100341797, "learning_rate": 1.942361706761819e-05, "loss": 0.4577, "step": 2078 }, { "epoch": 0.25299665348341954, "grad_norm": 1.4047729969024658, "learning_rate": 1.9422964194128427e-05, "loss": 0.5013, "step": 2079 }, { "epoch": 0.25311834499543656, "grad_norm": 0.605525016784668, "learning_rate": 1.9422310962075372e-05, "loss": 0.4243, "step": 2080 }, { "epoch": 0.2532400365074536, "grad_norm": 2.4479782581329346, "learning_rate": 1.942165737148388e-05, "loss": 0.495, "step": 2081 }, { "epoch": 0.25336172801947066, "grad_norm": 3.241241931915283, "learning_rate": 1.942100342237882e-05, "loss": 0.5247, "step": 2082 }, { "epoch": 0.2534834195314877, "grad_norm": 1.4796130657196045, "learning_rate": 1.9420349114785085e-05, "loss": 0.4205, "step": 2083 }, { "epoch": 0.2536051110435047, "grad_norm": 0.9417670369148254, "learning_rate": 1.9419694448727566e-05, "loss": 0.4824, "step": 2084 }, { "epoch": 0.25372680255552177, "grad_norm": 1.0193896293640137, "learning_rate": 1.9419039424231175e-05, "loss": 0.474, "step": 2085 }, { "epoch": 0.2538484940675388, "grad_norm": 0.62428218126297, "learning_rate": 1.9418384041320835e-05, "loss": 0.4622, "step": 2086 }, { "epoch": 0.2539701855795558, "grad_norm": 0.6079922914505005, "learning_rate": 1.9417728300021488e-05, "loss": 0.4668, "step": 2087 }, { "epoch": 0.2540918770915729, "grad_norm": 3.7671585083007812, "learning_rate": 1.9417072200358086e-05, "loss": 0.4308, "step": 2088 }, { "epoch": 0.2542135686035899, "grad_norm": 0.777911365032196, "learning_rate": 1.9416415742355596e-05, "loss": 0.5113, "step": 2089 }, { "epoch": 0.2543352601156069, "grad_norm": 3.967838764190674, "learning_rate": 1.9415758926038997e-05, "loss": 0.4617, "step": 2090 }, { "epoch": 0.254456951627624, "grad_norm": 2.4600133895874023, "learning_rate": 1.9415101751433278e-05, "loss": 0.4774, "step": 2091 }, { "epoch": 0.254578643139641, "grad_norm": 0.7972972989082336, "learning_rate": 1.941444421856345e-05, "loss": 0.4907, "step": 2092 }, { "epoch": 0.25470033465165803, "grad_norm": 0.8862624764442444, "learning_rate": 1.9413786327454534e-05, "loss": 0.5256, "step": 2093 }, { "epoch": 0.2548220261636751, "grad_norm": 3.9216880798339844, "learning_rate": 1.941312807813156e-05, "loss": 0.4142, "step": 2094 }, { "epoch": 0.2549437176756921, "grad_norm": 0.7092095017433167, "learning_rate": 1.9412469470619582e-05, "loss": 0.4778, "step": 2095 }, { "epoch": 0.25506540918770915, "grad_norm": 1.5533114671707153, "learning_rate": 1.941181050494366e-05, "loss": 0.4856, "step": 2096 }, { "epoch": 0.2551871006997262, "grad_norm": 0.6967470049858093, "learning_rate": 1.9411151181128862e-05, "loss": 0.475, "step": 2097 }, { "epoch": 0.25530879221174324, "grad_norm": 2.781846523284912, "learning_rate": 1.9410491499200282e-05, "loss": 0.4963, "step": 2098 }, { "epoch": 0.25543048372376026, "grad_norm": 0.8223859071731567, "learning_rate": 1.9409831459183022e-05, "loss": 0.4714, "step": 2099 }, { "epoch": 0.2555521752357773, "grad_norm": 0.7446115612983704, "learning_rate": 1.94091710611022e-05, "loss": 0.457, "step": 2100 }, { "epoch": 0.25567386674779435, "grad_norm": 3.1892948150634766, "learning_rate": 1.940851030498294e-05, "loss": 0.4483, "step": 2101 }, { "epoch": 0.2557955582598114, "grad_norm": 1.5362653732299805, "learning_rate": 1.9407849190850392e-05, "loss": 0.526, "step": 2102 }, { "epoch": 0.2559172497718284, "grad_norm": 2.39308500289917, "learning_rate": 1.9407187718729706e-05, "loss": 0.4301, "step": 2103 }, { "epoch": 0.25603894128384547, "grad_norm": 0.7289983034133911, "learning_rate": 1.9406525888646056e-05, "loss": 0.4903, "step": 2104 }, { "epoch": 0.2561606327958625, "grad_norm": 1.737002968788147, "learning_rate": 1.940586370062463e-05, "loss": 0.502, "step": 2105 }, { "epoch": 0.2562823243078795, "grad_norm": 1.964486002922058, "learning_rate": 1.9405201154690613e-05, "loss": 0.4537, "step": 2106 }, { "epoch": 0.2564040158198966, "grad_norm": 1.0727633237838745, "learning_rate": 1.9404538250869232e-05, "loss": 0.4334, "step": 2107 }, { "epoch": 0.2565257073319136, "grad_norm": 0.9376628398895264, "learning_rate": 1.94038749891857e-05, "loss": 0.4774, "step": 2108 }, { "epoch": 0.2566473988439306, "grad_norm": 2.0631730556488037, "learning_rate": 1.940321136966526e-05, "loss": 0.4905, "step": 2109 }, { "epoch": 0.2567690903559477, "grad_norm": 0.629659116268158, "learning_rate": 1.9402547392333164e-05, "loss": 0.4625, "step": 2110 }, { "epoch": 0.2568907818679647, "grad_norm": 0.5920600891113281, "learning_rate": 1.940188305721468e-05, "loss": 0.4606, "step": 2111 }, { "epoch": 0.25701247337998173, "grad_norm": 2.8812389373779297, "learning_rate": 1.940121836433508e-05, "loss": 0.5282, "step": 2112 }, { "epoch": 0.2571341648919988, "grad_norm": 0.6083371043205261, "learning_rate": 1.9400553313719665e-05, "loss": 0.4579, "step": 2113 }, { "epoch": 0.2572558564040158, "grad_norm": 1.8386752605438232, "learning_rate": 1.9399887905393736e-05, "loss": 0.4796, "step": 2114 }, { "epoch": 0.25737754791603284, "grad_norm": 0.7643177509307861, "learning_rate": 1.939922213938262e-05, "loss": 0.5253, "step": 2115 }, { "epoch": 0.2574992394280499, "grad_norm": 6.415184020996094, "learning_rate": 1.9398556015711642e-05, "loss": 0.4542, "step": 2116 }, { "epoch": 0.25762093094006694, "grad_norm": 1.6722726821899414, "learning_rate": 1.9397889534406157e-05, "loss": 0.5623, "step": 2117 }, { "epoch": 0.25774262245208396, "grad_norm": 6.019209861755371, "learning_rate": 1.939722269549152e-05, "loss": 0.4724, "step": 2118 }, { "epoch": 0.257864313964101, "grad_norm": 5.110898494720459, "learning_rate": 1.939655549899311e-05, "loss": 0.4221, "step": 2119 }, { "epoch": 0.25798600547611805, "grad_norm": 1.2796639204025269, "learning_rate": 1.939588794493631e-05, "loss": 0.4717, "step": 2120 }, { "epoch": 0.25810769698813507, "grad_norm": 0.8888446688652039, "learning_rate": 1.939522003334653e-05, "loss": 0.4764, "step": 2121 }, { "epoch": 0.2582293885001521, "grad_norm": 0.6068385243415833, "learning_rate": 1.939455176424918e-05, "loss": 0.4427, "step": 2122 }, { "epoch": 0.25835108001216917, "grad_norm": 4.162399768829346, "learning_rate": 1.9393883137669685e-05, "loss": 0.5389, "step": 2123 }, { "epoch": 0.2584727715241862, "grad_norm": 2.305088996887207, "learning_rate": 1.93932141536335e-05, "loss": 0.4304, "step": 2124 }, { "epoch": 0.2585944630362032, "grad_norm": 0.608076810836792, "learning_rate": 1.9392544812166067e-05, "loss": 0.4071, "step": 2125 }, { "epoch": 0.2587161545482203, "grad_norm": 2.529494047164917, "learning_rate": 1.9391875113292867e-05, "loss": 0.4576, "step": 2126 }, { "epoch": 0.2588378460602373, "grad_norm": 1.8165841102600098, "learning_rate": 1.9391205057039373e-05, "loss": 0.4516, "step": 2127 }, { "epoch": 0.2589595375722543, "grad_norm": 2.3291776180267334, "learning_rate": 1.9390534643431095e-05, "loss": 0.5445, "step": 2128 }, { "epoch": 0.2590812290842714, "grad_norm": 0.573996901512146, "learning_rate": 1.9389863872493532e-05, "loss": 0.4904, "step": 2129 }, { "epoch": 0.2592029205962884, "grad_norm": 1.5546201467514038, "learning_rate": 1.9389192744252213e-05, "loss": 0.5248, "step": 2130 }, { "epoch": 0.25932461210830543, "grad_norm": 1.4292373657226562, "learning_rate": 1.9388521258732675e-05, "loss": 0.5139, "step": 2131 }, { "epoch": 0.2594463036203225, "grad_norm": 4.89036750793457, "learning_rate": 1.938784941596047e-05, "loss": 0.448, "step": 2132 }, { "epoch": 0.2595679951323395, "grad_norm": 4.473513126373291, "learning_rate": 1.9387177215961167e-05, "loss": 0.5163, "step": 2133 }, { "epoch": 0.25968968664435654, "grad_norm": 4.129972457885742, "learning_rate": 1.9386504658760334e-05, "loss": 0.5059, "step": 2134 }, { "epoch": 0.2598113781563736, "grad_norm": 3.488123655319214, "learning_rate": 1.9385831744383577e-05, "loss": 0.5243, "step": 2135 }, { "epoch": 0.25993306966839064, "grad_norm": 1.467781662940979, "learning_rate": 1.938515847285649e-05, "loss": 0.5173, "step": 2136 }, { "epoch": 0.26005476118040766, "grad_norm": 1.2999566793441772, "learning_rate": 1.9384484844204698e-05, "loss": 0.4551, "step": 2137 }, { "epoch": 0.2601764526924247, "grad_norm": 1.2947548627853394, "learning_rate": 1.938381085845383e-05, "loss": 0.4794, "step": 2138 }, { "epoch": 0.26029814420444175, "grad_norm": 3.792018175125122, "learning_rate": 1.938313651562954e-05, "loss": 0.5355, "step": 2139 }, { "epoch": 0.26041983571645877, "grad_norm": 1.3959805965423584, "learning_rate": 1.938246181575748e-05, "loss": 0.438, "step": 2140 }, { "epoch": 0.2605415272284758, "grad_norm": 1.5099070072174072, "learning_rate": 1.938178675886333e-05, "loss": 0.4443, "step": 2141 }, { "epoch": 0.26066321874049286, "grad_norm": 0.9759753346443176, "learning_rate": 1.9381111344972772e-05, "loss": 0.4437, "step": 2142 }, { "epoch": 0.2607849102525099, "grad_norm": 1.4681051969528198, "learning_rate": 1.9380435574111512e-05, "loss": 0.4046, "step": 2143 }, { "epoch": 0.2609066017645269, "grad_norm": 1.7579940557479858, "learning_rate": 1.9379759446305263e-05, "loss": 0.4154, "step": 2144 }, { "epoch": 0.261028293276544, "grad_norm": 2.2013461589813232, "learning_rate": 1.9379082961579747e-05, "loss": 0.4129, "step": 2145 }, { "epoch": 0.261149984788561, "grad_norm": 1.7656598091125488, "learning_rate": 1.9378406119960713e-05, "loss": 0.4611, "step": 2146 }, { "epoch": 0.261271676300578, "grad_norm": 1.525275707244873, "learning_rate": 1.9377728921473915e-05, "loss": 0.4184, "step": 2147 }, { "epoch": 0.2613933678125951, "grad_norm": 2.439988613128662, "learning_rate": 1.937705136614512e-05, "loss": 0.4759, "step": 2148 }, { "epoch": 0.2615150593246121, "grad_norm": 1.2350704669952393, "learning_rate": 1.937637345400011e-05, "loss": 0.4316, "step": 2149 }, { "epoch": 0.26163675083662913, "grad_norm": 2.9700818061828613, "learning_rate": 1.9375695185064686e-05, "loss": 0.5293, "step": 2150 }, { "epoch": 0.2617584423486462, "grad_norm": 2.5232958793640137, "learning_rate": 1.937501655936465e-05, "loss": 0.3677, "step": 2151 }, { "epoch": 0.2618801338606632, "grad_norm": 2.983248472213745, "learning_rate": 1.937433757692583e-05, "loss": 0.5393, "step": 2152 }, { "epoch": 0.26200182537268024, "grad_norm": 1.8255480527877808, "learning_rate": 1.937365823777406e-05, "loss": 0.5169, "step": 2153 }, { "epoch": 0.2621235168846973, "grad_norm": 2.7404890060424805, "learning_rate": 1.9372978541935192e-05, "loss": 0.5093, "step": 2154 }, { "epoch": 0.26224520839671434, "grad_norm": 0.8094709515571594, "learning_rate": 1.937229848943509e-05, "loss": 0.572, "step": 2155 }, { "epoch": 0.26236689990873135, "grad_norm": 3.907942771911621, "learning_rate": 1.9371618080299633e-05, "loss": 0.4653, "step": 2156 }, { "epoch": 0.26248859142074843, "grad_norm": 5.443722248077393, "learning_rate": 1.9370937314554707e-05, "loss": 0.4763, "step": 2157 }, { "epoch": 0.26261028293276545, "grad_norm": 1.1331325769424438, "learning_rate": 1.937025619222622e-05, "loss": 0.5443, "step": 2158 }, { "epoch": 0.26273197444478247, "grad_norm": 3.942476511001587, "learning_rate": 1.936957471334009e-05, "loss": 0.4644, "step": 2159 }, { "epoch": 0.2628536659567995, "grad_norm": 4.187047481536865, "learning_rate": 1.9368892877922248e-05, "loss": 0.4084, "step": 2160 }, { "epoch": 0.26297535746881656, "grad_norm": 0.6882132887840271, "learning_rate": 1.9368210685998637e-05, "loss": 0.4785, "step": 2161 }, { "epoch": 0.2630970489808336, "grad_norm": 3.2719621658325195, "learning_rate": 1.936752813759522e-05, "loss": 0.5122, "step": 2162 }, { "epoch": 0.2632187404928506, "grad_norm": 1.8161216974258423, "learning_rate": 1.936684523273797e-05, "loss": 0.4466, "step": 2163 }, { "epoch": 0.2633404320048677, "grad_norm": 2.6128652095794678, "learning_rate": 1.936616197145287e-05, "loss": 0.4741, "step": 2164 }, { "epoch": 0.2634621235168847, "grad_norm": 3.289337396621704, "learning_rate": 1.936547835376592e-05, "loss": 0.5019, "step": 2165 }, { "epoch": 0.2635838150289017, "grad_norm": 0.7658073902130127, "learning_rate": 1.936479437970313e-05, "loss": 0.4821, "step": 2166 }, { "epoch": 0.2637055065409188, "grad_norm": 0.5153932571411133, "learning_rate": 1.936411004929053e-05, "loss": 0.4461, "step": 2167 }, { "epoch": 0.2638271980529358, "grad_norm": 1.6056749820709229, "learning_rate": 1.9363425362554164e-05, "loss": 0.4823, "step": 2168 }, { "epoch": 0.2639488895649528, "grad_norm": 2.5974862575531006, "learning_rate": 1.936274031952008e-05, "loss": 0.4662, "step": 2169 }, { "epoch": 0.2640705810769699, "grad_norm": 0.7344169616699219, "learning_rate": 1.9362054920214347e-05, "loss": 0.4949, "step": 2170 }, { "epoch": 0.2641922725889869, "grad_norm": 0.9193450808525085, "learning_rate": 1.9361369164663045e-05, "loss": 0.5033, "step": 2171 }, { "epoch": 0.26431396410100394, "grad_norm": 1.0487114191055298, "learning_rate": 1.936068305289227e-05, "loss": 0.5028, "step": 2172 }, { "epoch": 0.264435655613021, "grad_norm": 3.5050742626190186, "learning_rate": 1.935999658492813e-05, "loss": 0.4179, "step": 2173 }, { "epoch": 0.26455734712503803, "grad_norm": 2.2077624797821045, "learning_rate": 1.9359309760796744e-05, "loss": 0.4884, "step": 2174 }, { "epoch": 0.26467903863705505, "grad_norm": 1.7855932712554932, "learning_rate": 1.935862258052425e-05, "loss": 0.4103, "step": 2175 }, { "epoch": 0.2648007301490721, "grad_norm": 1.12953782081604, "learning_rate": 1.9357935044136795e-05, "loss": 0.4326, "step": 2176 }, { "epoch": 0.26492242166108915, "grad_norm": 2.887451410293579, "learning_rate": 1.935724715166054e-05, "loss": 0.5096, "step": 2177 }, { "epoch": 0.26504411317310617, "grad_norm": 3.2784857749938965, "learning_rate": 1.935655890312167e-05, "loss": 0.4511, "step": 2178 }, { "epoch": 0.2651658046851232, "grad_norm": 4.850772380828857, "learning_rate": 1.9355870298546358e-05, "loss": 0.5255, "step": 2179 }, { "epoch": 0.26528749619714026, "grad_norm": 3.1773927211761475, "learning_rate": 1.935518133796082e-05, "loss": 0.492, "step": 2180 }, { "epoch": 0.2654091877091573, "grad_norm": 1.1647827625274658, "learning_rate": 1.9354492021391265e-05, "loss": 0.4258, "step": 2181 }, { "epoch": 0.2655308792211743, "grad_norm": 4.982237339019775, "learning_rate": 1.935380234886393e-05, "loss": 0.5518, "step": 2182 }, { "epoch": 0.2656525707331914, "grad_norm": 1.5905909538269043, "learning_rate": 1.935311232040505e-05, "loss": 0.3749, "step": 2183 }, { "epoch": 0.2657742622452084, "grad_norm": 0.5936698317527771, "learning_rate": 1.935242193604089e-05, "loss": 0.4552, "step": 2184 }, { "epoch": 0.2658959537572254, "grad_norm": 1.5796947479248047, "learning_rate": 1.9351731195797715e-05, "loss": 0.4618, "step": 2185 }, { "epoch": 0.2660176452692425, "grad_norm": 1.928078532218933, "learning_rate": 1.935104009970181e-05, "loss": 0.5464, "step": 2186 }, { "epoch": 0.2661393367812595, "grad_norm": 1.3768632411956787, "learning_rate": 1.9350348647779476e-05, "loss": 0.4793, "step": 2187 }, { "epoch": 0.2662610282932765, "grad_norm": 4.635210037231445, "learning_rate": 1.934965684005702e-05, "loss": 0.4367, "step": 2188 }, { "epoch": 0.2663827198052936, "grad_norm": 0.9606807231903076, "learning_rate": 1.934896467656077e-05, "loss": 0.4888, "step": 2189 }, { "epoch": 0.2665044113173106, "grad_norm": 2.4115006923675537, "learning_rate": 1.934827215731706e-05, "loss": 0.4899, "step": 2190 }, { "epoch": 0.26662610282932764, "grad_norm": 2.0559732913970947, "learning_rate": 1.9347579282352246e-05, "loss": 0.4114, "step": 2191 }, { "epoch": 0.2667477943413447, "grad_norm": 1.51226007938385, "learning_rate": 1.9346886051692694e-05, "loss": 0.4894, "step": 2192 }, { "epoch": 0.26686948585336173, "grad_norm": 0.567767858505249, "learning_rate": 1.9346192465364776e-05, "loss": 0.4321, "step": 2193 }, { "epoch": 0.26699117736537875, "grad_norm": 0.7620263695716858, "learning_rate": 1.9345498523394893e-05, "loss": 0.4424, "step": 2194 }, { "epoch": 0.2671128688773958, "grad_norm": 0.5864660143852234, "learning_rate": 1.9344804225809445e-05, "loss": 0.4139, "step": 2195 }, { "epoch": 0.26723456038941285, "grad_norm": 1.7521392107009888, "learning_rate": 1.9344109572634857e-05, "loss": 0.4849, "step": 2196 }, { "epoch": 0.26735625190142986, "grad_norm": 1.7217597961425781, "learning_rate": 1.9343414563897555e-05, "loss": 0.5002, "step": 2197 }, { "epoch": 0.26747794341344694, "grad_norm": 0.6881518363952637, "learning_rate": 1.934271919962399e-05, "loss": 0.4608, "step": 2198 }, { "epoch": 0.26759963492546396, "grad_norm": 2.678766965866089, "learning_rate": 1.9342023479840622e-05, "loss": 0.5312, "step": 2199 }, { "epoch": 0.267721326437481, "grad_norm": 1.6380953788757324, "learning_rate": 1.9341327404573925e-05, "loss": 0.4994, "step": 2200 }, { "epoch": 0.267843017949498, "grad_norm": 0.6163226962089539, "learning_rate": 1.934063097385038e-05, "loss": 0.5, "step": 2201 }, { "epoch": 0.26796470946151507, "grad_norm": 4.325922012329102, "learning_rate": 1.9339934187696498e-05, "loss": 0.4136, "step": 2202 }, { "epoch": 0.2680864009735321, "grad_norm": 1.6639729738235474, "learning_rate": 1.933923704613878e-05, "loss": 0.5488, "step": 2203 }, { "epoch": 0.2682080924855491, "grad_norm": 1.3243522644042969, "learning_rate": 1.9338539549203767e-05, "loss": 0.5001, "step": 2204 }, { "epoch": 0.2683297839975662, "grad_norm": 2.7837817668914795, "learning_rate": 1.9337841696917996e-05, "loss": 0.4861, "step": 2205 }, { "epoch": 0.2684514755095832, "grad_norm": 3.3673646450042725, "learning_rate": 1.9337143489308015e-05, "loss": 0.441, "step": 2206 }, { "epoch": 0.2685731670216002, "grad_norm": 1.975538969039917, "learning_rate": 1.9336444926400402e-05, "loss": 0.4832, "step": 2207 }, { "epoch": 0.2686948585336173, "grad_norm": 0.7136867046356201, "learning_rate": 1.9335746008221733e-05, "loss": 0.4625, "step": 2208 }, { "epoch": 0.2688165500456343, "grad_norm": 0.8701152205467224, "learning_rate": 1.93350467347986e-05, "loss": 0.4747, "step": 2209 }, { "epoch": 0.26893824155765134, "grad_norm": 1.676513910293579, "learning_rate": 1.933434710615762e-05, "loss": 0.4889, "step": 2210 }, { "epoch": 0.2690599330696684, "grad_norm": 1.9879202842712402, "learning_rate": 1.933364712232541e-05, "loss": 0.4739, "step": 2211 }, { "epoch": 0.26918162458168543, "grad_norm": 1.4625931978225708, "learning_rate": 1.933294678332861e-05, "loss": 0.4926, "step": 2212 }, { "epoch": 0.26930331609370245, "grad_norm": 1.0162174701690674, "learning_rate": 1.9332246089193867e-05, "loss": 0.4096, "step": 2213 }, { "epoch": 0.2694250076057195, "grad_norm": 1.7706711292266846, "learning_rate": 1.933154503994784e-05, "loss": 0.5084, "step": 2214 }, { "epoch": 0.26954669911773654, "grad_norm": 1.3316534757614136, "learning_rate": 1.9330843635617212e-05, "loss": 0.481, "step": 2215 }, { "epoch": 0.26966839062975356, "grad_norm": 0.7049447894096375, "learning_rate": 1.933014187622867e-05, "loss": 0.4486, "step": 2216 }, { "epoch": 0.26979008214177064, "grad_norm": 2.222529888153076, "learning_rate": 1.9329439761808915e-05, "loss": 0.4474, "step": 2217 }, { "epoch": 0.26991177365378766, "grad_norm": 0.7434383630752563, "learning_rate": 1.932873729238467e-05, "loss": 0.4741, "step": 2218 }, { "epoch": 0.2700334651658047, "grad_norm": 1.619347095489502, "learning_rate": 1.932803446798266e-05, "loss": 0.4741, "step": 2219 }, { "epoch": 0.2701551566778217, "grad_norm": 1.62326180934906, "learning_rate": 1.932733128862963e-05, "loss": 0.4739, "step": 2220 }, { "epoch": 0.27027684818983877, "grad_norm": 1.5984550714492798, "learning_rate": 1.9326627754352336e-05, "loss": 0.4963, "step": 2221 }, { "epoch": 0.2703985397018558, "grad_norm": 0.9873847961425781, "learning_rate": 1.9325923865177555e-05, "loss": 0.447, "step": 2222 }, { "epoch": 0.2705202312138728, "grad_norm": 2.9002130031585693, "learning_rate": 1.9325219621132063e-05, "loss": 0.4574, "step": 2223 }, { "epoch": 0.2706419227258899, "grad_norm": 1.3462127447128296, "learning_rate": 1.9324515022242664e-05, "loss": 0.4172, "step": 2224 }, { "epoch": 0.2707636142379069, "grad_norm": 2.764958620071411, "learning_rate": 1.9323810068536167e-05, "loss": 0.5009, "step": 2225 }, { "epoch": 0.2708853057499239, "grad_norm": 0.8994090557098389, "learning_rate": 1.93231047600394e-05, "loss": 0.4367, "step": 2226 }, { "epoch": 0.271006997261941, "grad_norm": 1.600385069847107, "learning_rate": 1.9322399096779197e-05, "loss": 0.4662, "step": 2227 }, { "epoch": 0.271128688773958, "grad_norm": 1.9499895572662354, "learning_rate": 1.932169307878241e-05, "loss": 0.4469, "step": 2228 }, { "epoch": 0.27125038028597503, "grad_norm": 2.8471550941467285, "learning_rate": 1.9320986706075913e-05, "loss": 0.4985, "step": 2229 }, { "epoch": 0.2713720717979921, "grad_norm": 1.845637559890747, "learning_rate": 1.9320279978686575e-05, "loss": 0.5318, "step": 2230 }, { "epoch": 0.27149376331000913, "grad_norm": 0.7418341636657715, "learning_rate": 1.931957289664129e-05, "loss": 0.4912, "step": 2231 }, { "epoch": 0.27161545482202615, "grad_norm": 2.186032772064209, "learning_rate": 1.9318865459966968e-05, "loss": 0.4461, "step": 2232 }, { "epoch": 0.2717371463340432, "grad_norm": 3.187054395675659, "learning_rate": 1.9318157668690526e-05, "loss": 0.4776, "step": 2233 }, { "epoch": 0.27185883784606024, "grad_norm": 2.6151225566864014, "learning_rate": 1.9317449522838896e-05, "loss": 0.4735, "step": 2234 }, { "epoch": 0.27198052935807726, "grad_norm": 1.8302274942398071, "learning_rate": 1.9316741022439024e-05, "loss": 0.5444, "step": 2235 }, { "epoch": 0.27210222087009434, "grad_norm": 0.6191455125808716, "learning_rate": 1.9316032167517876e-05, "loss": 0.5168, "step": 2236 }, { "epoch": 0.27222391238211135, "grad_norm": 1.1184285879135132, "learning_rate": 1.9315322958102417e-05, "loss": 0.4514, "step": 2237 }, { "epoch": 0.2723456038941284, "grad_norm": 2.1851346492767334, "learning_rate": 1.9314613394219642e-05, "loss": 0.54, "step": 2238 }, { "epoch": 0.27246729540614545, "grad_norm": 2.121798515319824, "learning_rate": 1.9313903475896544e-05, "loss": 0.4459, "step": 2239 }, { "epoch": 0.27258898691816247, "grad_norm": 1.4100135564804077, "learning_rate": 1.9313193203160142e-05, "loss": 0.4631, "step": 2240 }, { "epoch": 0.2727106784301795, "grad_norm": 1.1367652416229248, "learning_rate": 1.9312482576037457e-05, "loss": 0.4941, "step": 2241 }, { "epoch": 0.2728323699421965, "grad_norm": 0.8598164319992065, "learning_rate": 1.9311771594555537e-05, "loss": 0.4661, "step": 2242 }, { "epoch": 0.2729540614542136, "grad_norm": 0.7251045107841492, "learning_rate": 1.9311060258741436e-05, "loss": 0.4794, "step": 2243 }, { "epoch": 0.2730757529662306, "grad_norm": 0.6071829199790955, "learning_rate": 1.9310348568622218e-05, "loss": 0.4597, "step": 2244 }, { "epoch": 0.2731974444782476, "grad_norm": 0.7943944334983826, "learning_rate": 1.9309636524224965e-05, "loss": 0.461, "step": 2245 }, { "epoch": 0.2733191359902647, "grad_norm": 0.5888009667396545, "learning_rate": 1.930892412557677e-05, "loss": 0.4669, "step": 2246 }, { "epoch": 0.2734408275022817, "grad_norm": 4.058525085449219, "learning_rate": 1.9308211372704745e-05, "loss": 0.4134, "step": 2247 }, { "epoch": 0.27356251901429873, "grad_norm": 0.9731010794639587, "learning_rate": 1.9307498265636013e-05, "loss": 0.4935, "step": 2248 }, { "epoch": 0.2736842105263158, "grad_norm": 2.326507568359375, "learning_rate": 1.9306784804397702e-05, "loss": 0.4454, "step": 2249 }, { "epoch": 0.2738059020383328, "grad_norm": 0.8813508749008179, "learning_rate": 1.930607098901697e-05, "loss": 0.5151, "step": 2250 }, { "epoch": 0.27392759355034985, "grad_norm": 2.809396505355835, "learning_rate": 1.930535681952097e-05, "loss": 0.4301, "step": 2251 }, { "epoch": 0.2740492850623669, "grad_norm": 1.8027896881103516, "learning_rate": 1.9304642295936882e-05, "loss": 0.5244, "step": 2252 }, { "epoch": 0.27417097657438394, "grad_norm": 1.5659570693969727, "learning_rate": 1.93039274182919e-05, "loss": 0.4208, "step": 2253 }, { "epoch": 0.27429266808640096, "grad_norm": 1.3783743381500244, "learning_rate": 1.9303212186613218e-05, "loss": 0.4895, "step": 2254 }, { "epoch": 0.27441435959841803, "grad_norm": 4.133609294891357, "learning_rate": 1.930249660092806e-05, "loss": 0.5578, "step": 2255 }, { "epoch": 0.27453605111043505, "grad_norm": 0.8889201283454895, "learning_rate": 1.9301780661263647e-05, "loss": 0.4732, "step": 2256 }, { "epoch": 0.2746577426224521, "grad_norm": 2.5607032775878906, "learning_rate": 1.9301064367647226e-05, "loss": 0.5005, "step": 2257 }, { "epoch": 0.27477943413446915, "grad_norm": 0.8294042348861694, "learning_rate": 1.9300347720106054e-05, "loss": 0.4443, "step": 2258 }, { "epoch": 0.27490112564648617, "grad_norm": 2.740612030029297, "learning_rate": 1.92996307186674e-05, "loss": 0.413, "step": 2259 }, { "epoch": 0.2750228171585032, "grad_norm": 2.467393636703491, "learning_rate": 1.9298913363358552e-05, "loss": 0.4236, "step": 2260 }, { "epoch": 0.2751445086705202, "grad_norm": 0.8414928913116455, "learning_rate": 1.92981956542068e-05, "loss": 0.4725, "step": 2261 }, { "epoch": 0.2752662001825373, "grad_norm": 0.662197470664978, "learning_rate": 1.9297477591239456e-05, "loss": 0.4877, "step": 2262 }, { "epoch": 0.2753878916945543, "grad_norm": 2.279961109161377, "learning_rate": 1.9296759174483846e-05, "loss": 0.4785, "step": 2263 }, { "epoch": 0.2755095832065713, "grad_norm": 0.7570948600769043, "learning_rate": 1.9296040403967308e-05, "loss": 0.4634, "step": 2264 }, { "epoch": 0.2756312747185884, "grad_norm": 1.0017637014389038, "learning_rate": 1.9295321279717187e-05, "loss": 0.4686, "step": 2265 }, { "epoch": 0.2757529662306054, "grad_norm": 0.9803701043128967, "learning_rate": 1.9294601801760855e-05, "loss": 0.4628, "step": 2266 }, { "epoch": 0.27587465774262243, "grad_norm": 2.301069974899292, "learning_rate": 1.9293881970125682e-05, "loss": 0.4529, "step": 2267 }, { "epoch": 0.2759963492546395, "grad_norm": 1.323573350906372, "learning_rate": 1.9293161784839065e-05, "loss": 0.448, "step": 2268 }, { "epoch": 0.2761180407666565, "grad_norm": 1.5175703763961792, "learning_rate": 1.9292441245928407e-05, "loss": 0.4807, "step": 2269 }, { "epoch": 0.27623973227867354, "grad_norm": 2.870685577392578, "learning_rate": 1.9291720353421124e-05, "loss": 0.4407, "step": 2270 }, { "epoch": 0.2763614237906906, "grad_norm": 1.8498321771621704, "learning_rate": 1.9290999107344647e-05, "loss": 0.4528, "step": 2271 }, { "epoch": 0.27648311530270764, "grad_norm": 2.5913422107696533, "learning_rate": 1.9290277507726424e-05, "loss": 0.4353, "step": 2272 }, { "epoch": 0.27660480681472466, "grad_norm": 3.1198387145996094, "learning_rate": 1.9289555554593914e-05, "loss": 0.5134, "step": 2273 }, { "epoch": 0.27672649832674173, "grad_norm": 1.7768486738204956, "learning_rate": 1.928883324797459e-05, "loss": 0.4734, "step": 2274 }, { "epoch": 0.27684818983875875, "grad_norm": 1.2226992845535278, "learning_rate": 1.9288110587895926e-05, "loss": 0.4916, "step": 2275 }, { "epoch": 0.27696988135077577, "grad_norm": 0.8427600264549255, "learning_rate": 1.928738757438543e-05, "loss": 0.4526, "step": 2276 }, { "epoch": 0.27709157286279285, "grad_norm": 1.2037626504898071, "learning_rate": 1.9286664207470618e-05, "loss": 0.4697, "step": 2277 }, { "epoch": 0.27721326437480986, "grad_norm": 4.869029521942139, "learning_rate": 1.928594048717901e-05, "loss": 0.5728, "step": 2278 }, { "epoch": 0.2773349558868269, "grad_norm": 2.456289529800415, "learning_rate": 1.928521641353814e-05, "loss": 0.5, "step": 2279 }, { "epoch": 0.2774566473988439, "grad_norm": 0.8228026032447815, "learning_rate": 1.928449198657557e-05, "loss": 0.4886, "step": 2280 }, { "epoch": 0.277578338910861, "grad_norm": 0.7905141711235046, "learning_rate": 1.9283767206318865e-05, "loss": 0.4451, "step": 2281 }, { "epoch": 0.277700030422878, "grad_norm": 0.5709277987480164, "learning_rate": 1.9283042072795597e-05, "loss": 0.4851, "step": 2282 }, { "epoch": 0.277821721934895, "grad_norm": 0.796428382396698, "learning_rate": 1.9282316586033362e-05, "loss": 0.4954, "step": 2283 }, { "epoch": 0.2779434134469121, "grad_norm": 0.6578648686408997, "learning_rate": 1.928159074605977e-05, "loss": 0.4905, "step": 2284 }, { "epoch": 0.2780651049589291, "grad_norm": 1.1714102029800415, "learning_rate": 1.928086455290244e-05, "loss": 0.4908, "step": 2285 }, { "epoch": 0.27818679647094613, "grad_norm": 0.7354956865310669, "learning_rate": 1.9280138006589e-05, "loss": 0.5065, "step": 2286 }, { "epoch": 0.2783084879829632, "grad_norm": 0.6846218109130859, "learning_rate": 1.9279411107147104e-05, "loss": 0.5125, "step": 2287 }, { "epoch": 0.2784301794949802, "grad_norm": 1.20828378200531, "learning_rate": 1.9278683854604404e-05, "loss": 0.5039, "step": 2288 }, { "epoch": 0.27855187100699724, "grad_norm": 1.1589032411575317, "learning_rate": 1.9277956248988578e-05, "loss": 0.5256, "step": 2289 }, { "epoch": 0.2786735625190143, "grad_norm": 2.371429443359375, "learning_rate": 1.9277228290327318e-05, "loss": 0.4737, "step": 2290 }, { "epoch": 0.27879525403103134, "grad_norm": 2.052790880203247, "learning_rate": 1.927649997864831e-05, "loss": 0.4986, "step": 2291 }, { "epoch": 0.27891694554304836, "grad_norm": 2.4020862579345703, "learning_rate": 1.9275771313979284e-05, "loss": 0.4932, "step": 2292 }, { "epoch": 0.27903863705506543, "grad_norm": 1.0700608491897583, "learning_rate": 1.9275042296347957e-05, "loss": 0.5298, "step": 2293 }, { "epoch": 0.27916032856708245, "grad_norm": 3.5983588695526123, "learning_rate": 1.927431292578207e-05, "loss": 0.4424, "step": 2294 }, { "epoch": 0.27928202007909947, "grad_norm": 0.9342120885848999, "learning_rate": 1.9273583202309382e-05, "loss": 0.4334, "step": 2295 }, { "epoch": 0.27940371159111654, "grad_norm": 2.4847373962402344, "learning_rate": 1.927285312595766e-05, "loss": 0.4921, "step": 2296 }, { "epoch": 0.27952540310313356, "grad_norm": 1.8025132417678833, "learning_rate": 1.9272122696754677e-05, "loss": 0.4823, "step": 2297 }, { "epoch": 0.2796470946151506, "grad_norm": 0.6341730356216431, "learning_rate": 1.9271391914728236e-05, "loss": 0.4618, "step": 2298 }, { "epoch": 0.27976878612716766, "grad_norm": 0.7882016897201538, "learning_rate": 1.9270660779906144e-05, "loss": 0.4464, "step": 2299 }, { "epoch": 0.2798904776391847, "grad_norm": 1.488471269607544, "learning_rate": 1.926992929231622e-05, "loss": 0.5159, "step": 2300 }, { "epoch": 0.2800121691512017, "grad_norm": 2.271127700805664, "learning_rate": 1.9269197451986295e-05, "loss": 0.4994, "step": 2301 }, { "epoch": 0.2801338606632187, "grad_norm": 2.7641971111297607, "learning_rate": 1.926846525894422e-05, "loss": 0.5357, "step": 2302 }, { "epoch": 0.2802555521752358, "grad_norm": 1.1550077199935913, "learning_rate": 1.9267732713217865e-05, "loss": 0.5515, "step": 2303 }, { "epoch": 0.2803772436872528, "grad_norm": 4.89072847366333, "learning_rate": 1.926699981483509e-05, "loss": 0.4561, "step": 2304 }, { "epoch": 0.2804989351992698, "grad_norm": 5.637208938598633, "learning_rate": 1.926626656382379e-05, "loss": 0.4555, "step": 2305 }, { "epoch": 0.2806206267112869, "grad_norm": 3.1426360607147217, "learning_rate": 1.926553296021187e-05, "loss": 0.4986, "step": 2306 }, { "epoch": 0.2807423182233039, "grad_norm": 2.4551095962524414, "learning_rate": 1.9264799004027245e-05, "loss": 0.5442, "step": 2307 }, { "epoch": 0.28086400973532094, "grad_norm": 5.031198501586914, "learning_rate": 1.9264064695297836e-05, "loss": 0.4493, "step": 2308 }, { "epoch": 0.280985701247338, "grad_norm": 0.5657782554626465, "learning_rate": 1.926333003405159e-05, "loss": 0.491, "step": 2309 }, { "epoch": 0.28110739275935503, "grad_norm": 0.5203744769096375, "learning_rate": 1.9262595020316465e-05, "loss": 0.5181, "step": 2310 }, { "epoch": 0.28122908427137205, "grad_norm": 0.8510515093803406, "learning_rate": 1.926185965412043e-05, "loss": 0.4504, "step": 2311 }, { "epoch": 0.28135077578338913, "grad_norm": 4.522860527038574, "learning_rate": 1.9261123935491458e-05, "loss": 0.5591, "step": 2312 }, { "epoch": 0.28147246729540615, "grad_norm": 3.9935905933380127, "learning_rate": 1.926038786445755e-05, "loss": 0.5442, "step": 2313 }, { "epoch": 0.28159415880742317, "grad_norm": 2.9965057373046875, "learning_rate": 1.925965144104672e-05, "loss": 0.5105, "step": 2314 }, { "epoch": 0.28171585031944024, "grad_norm": 0.4628284275531769, "learning_rate": 1.9258914665286983e-05, "loss": 0.41, "step": 2315 }, { "epoch": 0.28183754183145726, "grad_norm": 1.1634234189987183, "learning_rate": 1.925817753720638e-05, "loss": 0.4551, "step": 2316 }, { "epoch": 0.2819592333434743, "grad_norm": 1.2035220861434937, "learning_rate": 1.9257440056832955e-05, "loss": 0.469, "step": 2317 }, { "epoch": 0.28208092485549136, "grad_norm": 1.8352148532867432, "learning_rate": 1.9256702224194778e-05, "loss": 0.4468, "step": 2318 }, { "epoch": 0.2822026163675084, "grad_norm": 0.6279745697975159, "learning_rate": 1.925596403931992e-05, "loss": 0.5322, "step": 2319 }, { "epoch": 0.2823243078795254, "grad_norm": 4.472016334533691, "learning_rate": 1.9255225502236473e-05, "loss": 0.4378, "step": 2320 }, { "epoch": 0.2824459993915424, "grad_norm": 4.093414783477783, "learning_rate": 1.9254486612972538e-05, "loss": 0.4489, "step": 2321 }, { "epoch": 0.2825676909035595, "grad_norm": 2.149359703063965, "learning_rate": 1.925374737155623e-05, "loss": 0.4886, "step": 2322 }, { "epoch": 0.2826893824155765, "grad_norm": 0.7144204378128052, "learning_rate": 1.9253007778015682e-05, "loss": 0.4277, "step": 2323 }, { "epoch": 0.2828110739275935, "grad_norm": 1.666211485862732, "learning_rate": 1.9252267832379035e-05, "loss": 0.5334, "step": 2324 }, { "epoch": 0.2829327654396106, "grad_norm": 2.787982940673828, "learning_rate": 1.9251527534674444e-05, "loss": 0.524, "step": 2325 }, { "epoch": 0.2830544569516276, "grad_norm": 0.6031523942947388, "learning_rate": 1.9250786884930083e-05, "loss": 0.4115, "step": 2326 }, { "epoch": 0.28317614846364464, "grad_norm": 1.570724606513977, "learning_rate": 1.9250045883174132e-05, "loss": 0.478, "step": 2327 }, { "epoch": 0.2832978399756617, "grad_norm": 0.8946213126182556, "learning_rate": 1.9249304529434786e-05, "loss": 0.4306, "step": 2328 }, { "epoch": 0.28341953148767873, "grad_norm": 0.9296962022781372, "learning_rate": 1.9248562823740263e-05, "loss": 0.4789, "step": 2329 }, { "epoch": 0.28354122299969575, "grad_norm": 1.0657775402069092, "learning_rate": 1.9247820766118773e-05, "loss": 0.4362, "step": 2330 }, { "epoch": 0.2836629145117128, "grad_norm": 2.9592397212982178, "learning_rate": 1.9247078356598567e-05, "loss": 0.4978, "step": 2331 }, { "epoch": 0.28378460602372985, "grad_norm": 0.6225043535232544, "learning_rate": 1.924633559520789e-05, "loss": 0.446, "step": 2332 }, { "epoch": 0.28390629753574687, "grad_norm": 1.293463945388794, "learning_rate": 1.9245592481975004e-05, "loss": 0.451, "step": 2333 }, { "epoch": 0.28402798904776394, "grad_norm": 0.6287753582000732, "learning_rate": 1.9244849016928184e-05, "loss": 0.4997, "step": 2334 }, { "epoch": 0.28414968055978096, "grad_norm": 1.572800636291504, "learning_rate": 1.9244105200095723e-05, "loss": 0.5002, "step": 2335 }, { "epoch": 0.284271372071798, "grad_norm": 0.7218564748764038, "learning_rate": 1.924336103150593e-05, "loss": 0.4669, "step": 2336 }, { "epoch": 0.28439306358381505, "grad_norm": 1.4338526725769043, "learning_rate": 1.9242616511187113e-05, "loss": 0.4811, "step": 2337 }, { "epoch": 0.2845147550958321, "grad_norm": 1.0821044445037842, "learning_rate": 1.9241871639167606e-05, "loss": 0.4953, "step": 2338 }, { "epoch": 0.2846364466078491, "grad_norm": 0.9856778979301453, "learning_rate": 1.9241126415475755e-05, "loss": 0.4809, "step": 2339 }, { "epoch": 0.28475813811986617, "grad_norm": 0.9627021551132202, "learning_rate": 1.9240380840139917e-05, "loss": 0.5173, "step": 2340 }, { "epoch": 0.2848798296318832, "grad_norm": 3.340620994567871, "learning_rate": 1.9239634913188458e-05, "loss": 0.488, "step": 2341 }, { "epoch": 0.2850015211439002, "grad_norm": 4.829874515533447, "learning_rate": 1.923888863464977e-05, "loss": 0.4835, "step": 2342 }, { "epoch": 0.2851232126559172, "grad_norm": 0.6045045852661133, "learning_rate": 1.9238142004552243e-05, "loss": 0.5201, "step": 2343 }, { "epoch": 0.2852449041679343, "grad_norm": 0.9127470254898071, "learning_rate": 1.923739502292429e-05, "loss": 0.4976, "step": 2344 }, { "epoch": 0.2853665956799513, "grad_norm": 3.985020875930786, "learning_rate": 1.923664768979434e-05, "loss": 0.4391, "step": 2345 }, { "epoch": 0.28548828719196834, "grad_norm": 2.159661293029785, "learning_rate": 1.923590000519082e-05, "loss": 0.4728, "step": 2346 }, { "epoch": 0.2856099787039854, "grad_norm": 0.770510733127594, "learning_rate": 1.9235151969142194e-05, "loss": 0.4209, "step": 2347 }, { "epoch": 0.28573167021600243, "grad_norm": 3.129359483718872, "learning_rate": 1.9234403581676917e-05, "loss": 0.5098, "step": 2348 }, { "epoch": 0.28585336172801945, "grad_norm": 3.219273805618286, "learning_rate": 1.923365484282347e-05, "loss": 0.5177, "step": 2349 }, { "epoch": 0.2859750532400365, "grad_norm": 3.299837112426758, "learning_rate": 1.9232905752610343e-05, "loss": 0.5014, "step": 2350 }, { "epoch": 0.28609674475205354, "grad_norm": 1.3421010971069336, "learning_rate": 1.9232156311066045e-05, "loss": 0.4848, "step": 2351 }, { "epoch": 0.28621843626407056, "grad_norm": 1.5841246843338013, "learning_rate": 1.9231406518219084e-05, "loss": 0.5247, "step": 2352 }, { "epoch": 0.28634012777608764, "grad_norm": 0.7412301301956177, "learning_rate": 1.9230656374098e-05, "loss": 0.4661, "step": 2353 }, { "epoch": 0.28646181928810466, "grad_norm": 2.3621151447296143, "learning_rate": 1.9229905878731333e-05, "loss": 0.4927, "step": 2354 }, { "epoch": 0.2865835108001217, "grad_norm": 3.8191757202148438, "learning_rate": 1.9229155032147644e-05, "loss": 0.4524, "step": 2355 }, { "epoch": 0.28670520231213875, "grad_norm": 1.4366422891616821, "learning_rate": 1.9228403834375502e-05, "loss": 0.5542, "step": 2356 }, { "epoch": 0.28682689382415577, "grad_norm": 1.9640687704086304, "learning_rate": 1.9227652285443494e-05, "loss": 0.485, "step": 2357 }, { "epoch": 0.2869485853361728, "grad_norm": 1.7183088064193726, "learning_rate": 1.9226900385380215e-05, "loss": 0.5474, "step": 2358 }, { "epoch": 0.28707027684818986, "grad_norm": 3.0479187965393066, "learning_rate": 1.9226148134214275e-05, "loss": 0.4701, "step": 2359 }, { "epoch": 0.2871919683602069, "grad_norm": 1.2891998291015625, "learning_rate": 1.9225395531974306e-05, "loss": 0.4179, "step": 2360 }, { "epoch": 0.2873136598722239, "grad_norm": 1.107069492340088, "learning_rate": 1.9224642578688943e-05, "loss": 0.4554, "step": 2361 }, { "epoch": 0.2874353513842409, "grad_norm": 2.142453908920288, "learning_rate": 1.922388927438683e-05, "loss": 0.4842, "step": 2362 }, { "epoch": 0.287557042896258, "grad_norm": 3.440673589706421, "learning_rate": 1.9223135619096637e-05, "loss": 0.4964, "step": 2363 }, { "epoch": 0.287678734408275, "grad_norm": 3.190993070602417, "learning_rate": 1.922238161284705e-05, "loss": 0.4949, "step": 2364 }, { "epoch": 0.28780042592029204, "grad_norm": 4.557056903839111, "learning_rate": 1.9221627255666747e-05, "loss": 0.4768, "step": 2365 }, { "epoch": 0.2879221174323091, "grad_norm": 3.298712968826294, "learning_rate": 1.9220872547584443e-05, "loss": 0.4852, "step": 2366 }, { "epoch": 0.28804380894432613, "grad_norm": 1.117448091506958, "learning_rate": 1.922011748862885e-05, "loss": 0.3987, "step": 2367 }, { "epoch": 0.28816550045634315, "grad_norm": 2.4735140800476074, "learning_rate": 1.92193620788287e-05, "loss": 0.4845, "step": 2368 }, { "epoch": 0.2882871919683602, "grad_norm": 2.5398001670837402, "learning_rate": 1.9218606318212747e-05, "loss": 0.4312, "step": 2369 }, { "epoch": 0.28840888348037724, "grad_norm": 0.6697260141372681, "learning_rate": 1.921785020680974e-05, "loss": 0.4837, "step": 2370 }, { "epoch": 0.28853057499239426, "grad_norm": 1.876666784286499, "learning_rate": 1.9217093744648446e-05, "loss": 0.4604, "step": 2371 }, { "epoch": 0.28865226650441134, "grad_norm": 2.5812888145446777, "learning_rate": 1.921633693175766e-05, "loss": 0.4395, "step": 2372 }, { "epoch": 0.28877395801642836, "grad_norm": 1.9372605085372925, "learning_rate": 1.9215579768166183e-05, "loss": 0.4555, "step": 2373 }, { "epoch": 0.2888956495284454, "grad_norm": 0.7640296220779419, "learning_rate": 1.9214822253902813e-05, "loss": 0.4639, "step": 2374 }, { "epoch": 0.28901734104046245, "grad_norm": 2.6883747577667236, "learning_rate": 1.921406438899639e-05, "loss": 0.4993, "step": 2375 }, { "epoch": 0.28913903255247947, "grad_norm": 4.16229248046875, "learning_rate": 1.921330617347574e-05, "loss": 0.5439, "step": 2376 }, { "epoch": 0.2892607240644965, "grad_norm": 0.6326306462287903, "learning_rate": 1.921254760736972e-05, "loss": 0.4194, "step": 2377 }, { "epoch": 0.28938241557651356, "grad_norm": 1.1483418941497803, "learning_rate": 1.9211788690707194e-05, "loss": 0.5079, "step": 2378 }, { "epoch": 0.2895041070885306, "grad_norm": 3.2198092937469482, "learning_rate": 1.921102942351704e-05, "loss": 0.5132, "step": 2379 }, { "epoch": 0.2896257986005476, "grad_norm": 1.8041861057281494, "learning_rate": 1.9210269805828152e-05, "loss": 0.4677, "step": 2380 }, { "epoch": 0.2897474901125647, "grad_norm": 1.8039244413375854, "learning_rate": 1.9209509837669436e-05, "loss": 0.4692, "step": 2381 }, { "epoch": 0.2898691816245817, "grad_norm": 1.2893513441085815, "learning_rate": 1.9208749519069805e-05, "loss": 0.4675, "step": 2382 }, { "epoch": 0.2899908731365987, "grad_norm": 2.565755605697632, "learning_rate": 1.9207988850058193e-05, "loss": 0.4988, "step": 2383 }, { "epoch": 0.29011256464861573, "grad_norm": 0.6244593858718872, "learning_rate": 1.920722783066355e-05, "loss": 0.4613, "step": 2384 }, { "epoch": 0.2902342561606328, "grad_norm": 3.351763963699341, "learning_rate": 1.920646646091483e-05, "loss": 0.5336, "step": 2385 }, { "epoch": 0.29035594767264983, "grad_norm": 1.7007615566253662, "learning_rate": 1.9205704740840998e-05, "loss": 0.4341, "step": 2386 }, { "epoch": 0.29047763918466685, "grad_norm": 1.734458088874817, "learning_rate": 1.920494267047105e-05, "loss": 0.5159, "step": 2387 }, { "epoch": 0.2905993306966839, "grad_norm": 1.9868311882019043, "learning_rate": 1.9204180249833984e-05, "loss": 0.4485, "step": 2388 }, { "epoch": 0.29072102220870094, "grad_norm": 1.497117519378662, "learning_rate": 1.92034174789588e-05, "loss": 0.4978, "step": 2389 }, { "epoch": 0.29084271372071796, "grad_norm": 3.925015449523926, "learning_rate": 1.920265435787454e-05, "loss": 0.3847, "step": 2390 }, { "epoch": 0.29096440523273503, "grad_norm": 1.5976030826568604, "learning_rate": 1.9201890886610225e-05, "loss": 0.4759, "step": 2391 }, { "epoch": 0.29108609674475205, "grad_norm": 1.1353329420089722, "learning_rate": 1.920112706519492e-05, "loss": 0.4495, "step": 2392 }, { "epoch": 0.2912077882567691, "grad_norm": 0.681307315826416, "learning_rate": 1.9200362893657685e-05, "loss": 0.4632, "step": 2393 }, { "epoch": 0.29132947976878615, "grad_norm": 1.3313989639282227, "learning_rate": 1.9199598372027593e-05, "loss": 0.4714, "step": 2394 }, { "epoch": 0.29145117128080317, "grad_norm": 1.5667229890823364, "learning_rate": 1.9198833500333746e-05, "loss": 0.4323, "step": 2395 }, { "epoch": 0.2915728627928202, "grad_norm": 0.9350096583366394, "learning_rate": 1.919806827860524e-05, "loss": 0.4355, "step": 2396 }, { "epoch": 0.29169455430483726, "grad_norm": 0.5832048654556274, "learning_rate": 1.9197302706871202e-05, "loss": 0.4359, "step": 2397 }, { "epoch": 0.2918162458168543, "grad_norm": 3.143641471862793, "learning_rate": 1.9196536785160755e-05, "loss": 0.5044, "step": 2398 }, { "epoch": 0.2919379373288713, "grad_norm": 3.0947747230529785, "learning_rate": 1.919577051350305e-05, "loss": 0.4606, "step": 2399 }, { "epoch": 0.2920596288408884, "grad_norm": 1.3649258613586426, "learning_rate": 1.919500389192724e-05, "loss": 0.4519, "step": 2400 }, { "epoch": 0.2921813203529054, "grad_norm": 3.5724737644195557, "learning_rate": 1.91942369204625e-05, "loss": 0.4938, "step": 2401 }, { "epoch": 0.2923030118649224, "grad_norm": 0.7385405898094177, "learning_rate": 1.9193469599138015e-05, "loss": 0.4786, "step": 2402 }, { "epoch": 0.29242470337693943, "grad_norm": 1.4629532098770142, "learning_rate": 1.919270192798298e-05, "loss": 0.4887, "step": 2403 }, { "epoch": 0.2925463948889565, "grad_norm": 3.9168739318847656, "learning_rate": 1.9191933907026612e-05, "loss": 0.4213, "step": 2404 }, { "epoch": 0.2926680864009735, "grad_norm": 2.0356836318969727, "learning_rate": 1.9191165536298132e-05, "loss": 0.5293, "step": 2405 }, { "epoch": 0.29278977791299055, "grad_norm": 2.9639980792999268, "learning_rate": 1.9190396815826777e-05, "loss": 0.4895, "step": 2406 }, { "epoch": 0.2929114694250076, "grad_norm": 2.15425968170166, "learning_rate": 1.91896277456418e-05, "loss": 0.4493, "step": 2407 }, { "epoch": 0.29303316093702464, "grad_norm": 1.167723536491394, "learning_rate": 1.9188858325772467e-05, "loss": 0.5197, "step": 2408 }, { "epoch": 0.29315485244904166, "grad_norm": 0.8349345922470093, "learning_rate": 1.918808855624806e-05, "loss": 0.4763, "step": 2409 }, { "epoch": 0.29327654396105873, "grad_norm": 0.5412417650222778, "learning_rate": 1.9187318437097855e-05, "loss": 0.4501, "step": 2410 }, { "epoch": 0.29339823547307575, "grad_norm": 2.312762975692749, "learning_rate": 1.918654796835117e-05, "loss": 0.4769, "step": 2411 }, { "epoch": 0.29351992698509277, "grad_norm": 3.0606560707092285, "learning_rate": 1.9185777150037323e-05, "loss": 0.4928, "step": 2412 }, { "epoch": 0.29364161849710985, "grad_norm": 0.9675942063331604, "learning_rate": 1.918500598218564e-05, "loss": 0.4834, "step": 2413 }, { "epoch": 0.29376331000912687, "grad_norm": 0.9042759537696838, "learning_rate": 1.918423446482547e-05, "loss": 0.5189, "step": 2414 }, { "epoch": 0.2938850015211439, "grad_norm": 1.7681832313537598, "learning_rate": 1.9183462597986165e-05, "loss": 0.4502, "step": 2415 }, { "epoch": 0.29400669303316096, "grad_norm": 1.1707684993743896, "learning_rate": 1.91826903816971e-05, "loss": 0.5075, "step": 2416 }, { "epoch": 0.294128384545178, "grad_norm": 5.285305023193359, "learning_rate": 1.9181917815987658e-05, "loss": 0.4428, "step": 2417 }, { "epoch": 0.294250076057195, "grad_norm": 2.9431893825531006, "learning_rate": 1.918114490088724e-05, "loss": 0.5061, "step": 2418 }, { "epoch": 0.2943717675692121, "grad_norm": 2.6012656688690186, "learning_rate": 1.9180371636425253e-05, "loss": 0.4953, "step": 2419 }, { "epoch": 0.2944934590812291, "grad_norm": 1.6666074991226196, "learning_rate": 1.9179598022631124e-05, "loss": 0.4996, "step": 2420 }, { "epoch": 0.2946151505932461, "grad_norm": 1.650451421737671, "learning_rate": 1.917882405953429e-05, "loss": 0.5031, "step": 2421 }, { "epoch": 0.29473684210526313, "grad_norm": 1.5714176893234253, "learning_rate": 1.9178049747164198e-05, "loss": 0.5226, "step": 2422 }, { "epoch": 0.2948585336172802, "grad_norm": 0.761840283870697, "learning_rate": 1.9177275085550316e-05, "loss": 0.4144, "step": 2423 }, { "epoch": 0.2949802251292972, "grad_norm": 0.5682148933410645, "learning_rate": 1.9176500074722125e-05, "loss": 0.4423, "step": 2424 }, { "epoch": 0.29510191664131424, "grad_norm": 0.9621097445487976, "learning_rate": 1.917572471470911e-05, "loss": 0.4437, "step": 2425 }, { "epoch": 0.2952236081533313, "grad_norm": 1.0111867189407349, "learning_rate": 1.9174949005540777e-05, "loss": 0.4262, "step": 2426 }, { "epoch": 0.29534529966534834, "grad_norm": 2.042614221572876, "learning_rate": 1.9174172947246642e-05, "loss": 0.4754, "step": 2427 }, { "epoch": 0.29546699117736536, "grad_norm": 4.59519100189209, "learning_rate": 1.9173396539856236e-05, "loss": 0.553, "step": 2428 }, { "epoch": 0.29558868268938243, "grad_norm": 1.9203051328659058, "learning_rate": 1.9172619783399108e-05, "loss": 0.4725, "step": 2429 }, { "epoch": 0.29571037420139945, "grad_norm": 1.053370714187622, "learning_rate": 1.9171842677904808e-05, "loss": 0.4826, "step": 2430 }, { "epoch": 0.29583206571341647, "grad_norm": 4.839434623718262, "learning_rate": 1.9171065223402908e-05, "loss": 0.4299, "step": 2431 }, { "epoch": 0.29595375722543354, "grad_norm": 3.3164596557617188, "learning_rate": 1.9170287419922993e-05, "loss": 0.4689, "step": 2432 }, { "epoch": 0.29607544873745056, "grad_norm": 3.1286797523498535, "learning_rate": 1.9169509267494666e-05, "loss": 0.481, "step": 2433 }, { "epoch": 0.2961971402494676, "grad_norm": 2.202488660812378, "learning_rate": 1.916873076614753e-05, "loss": 0.5111, "step": 2434 }, { "epoch": 0.29631883176148466, "grad_norm": 1.047048568725586, "learning_rate": 1.9167951915911206e-05, "loss": 0.5096, "step": 2435 }, { "epoch": 0.2964405232735017, "grad_norm": 2.2479233741760254, "learning_rate": 1.9167172716815335e-05, "loss": 0.528, "step": 2436 }, { "epoch": 0.2965622147855187, "grad_norm": 0.7420548796653748, "learning_rate": 1.916639316888957e-05, "loss": 0.4368, "step": 2437 }, { "epoch": 0.29668390629753577, "grad_norm": 5.904504776000977, "learning_rate": 1.916561327216357e-05, "loss": 0.5687, "step": 2438 }, { "epoch": 0.2968055978095528, "grad_norm": 2.3244457244873047, "learning_rate": 1.9164833026667016e-05, "loss": 0.4947, "step": 2439 }, { "epoch": 0.2969272893215698, "grad_norm": 1.5631097555160522, "learning_rate": 1.9164052432429588e-05, "loss": 0.4252, "step": 2440 }, { "epoch": 0.2970489808335869, "grad_norm": 2.7869882583618164, "learning_rate": 1.9163271489481005e-05, "loss": 0.5109, "step": 2441 }, { "epoch": 0.2971706723456039, "grad_norm": 0.6710759997367859, "learning_rate": 1.916249019785097e-05, "loss": 0.4478, "step": 2442 }, { "epoch": 0.2972923638576209, "grad_norm": 0.7273445725440979, "learning_rate": 1.916170855756922e-05, "loss": 0.434, "step": 2443 }, { "epoch": 0.29741405536963794, "grad_norm": 1.0478951930999756, "learning_rate": 1.9160926568665496e-05, "loss": 0.4523, "step": 2444 }, { "epoch": 0.297535746881655, "grad_norm": 1.6161104440689087, "learning_rate": 1.9160144231169552e-05, "loss": 0.4872, "step": 2445 }, { "epoch": 0.29765743839367204, "grad_norm": 0.8364020586013794, "learning_rate": 1.9159361545111158e-05, "loss": 0.4355, "step": 2446 }, { "epoch": 0.29777912990568906, "grad_norm": 0.6326636075973511, "learning_rate": 1.9158578510520103e-05, "loss": 0.4926, "step": 2447 }, { "epoch": 0.29790082141770613, "grad_norm": 0.7602832913398743, "learning_rate": 1.9157795127426174e-05, "loss": 0.4765, "step": 2448 }, { "epoch": 0.29802251292972315, "grad_norm": 2.0874526500701904, "learning_rate": 1.9157011395859188e-05, "loss": 0.4091, "step": 2449 }, { "epoch": 0.29814420444174017, "grad_norm": 4.617832183837891, "learning_rate": 1.9156227315848962e-05, "loss": 0.6028, "step": 2450 }, { "epoch": 0.29826589595375724, "grad_norm": 3.986806869506836, "learning_rate": 1.9155442887425337e-05, "loss": 0.557, "step": 2451 }, { "epoch": 0.29838758746577426, "grad_norm": 1.9443796873092651, "learning_rate": 1.9154658110618156e-05, "loss": 0.4479, "step": 2452 }, { "epoch": 0.2985092789777913, "grad_norm": 0.6482906341552734, "learning_rate": 1.9153872985457287e-05, "loss": 0.4684, "step": 2453 }, { "epoch": 0.29863097048980836, "grad_norm": 0.6355880498886108, "learning_rate": 1.9153087511972604e-05, "loss": 0.4498, "step": 2454 }, { "epoch": 0.2987526620018254, "grad_norm": 2.498286008834839, "learning_rate": 1.915230169019399e-05, "loss": 0.5195, "step": 2455 }, { "epoch": 0.2988743535138424, "grad_norm": 1.5511423349380493, "learning_rate": 1.915151552015136e-05, "loss": 0.4857, "step": 2456 }, { "epoch": 0.29899604502585947, "grad_norm": 0.9520419836044312, "learning_rate": 1.9150729001874618e-05, "loss": 0.4751, "step": 2457 }, { "epoch": 0.2991177365378765, "grad_norm": 1.1225643157958984, "learning_rate": 1.9149942135393696e-05, "loss": 0.4656, "step": 2458 }, { "epoch": 0.2992394280498935, "grad_norm": 1.2036497592926025, "learning_rate": 1.914915492073854e-05, "loss": 0.4623, "step": 2459 }, { "epoch": 0.2993611195619106, "grad_norm": 2.4605398178100586, "learning_rate": 1.91483673579391e-05, "loss": 0.4863, "step": 2460 }, { "epoch": 0.2994828110739276, "grad_norm": 0.9072971343994141, "learning_rate": 1.9147579447025343e-05, "loss": 0.4194, "step": 2461 }, { "epoch": 0.2996045025859446, "grad_norm": 1.0341025590896606, "learning_rate": 1.9146791188027257e-05, "loss": 0.5028, "step": 2462 }, { "epoch": 0.29972619409796164, "grad_norm": 2.1331140995025635, "learning_rate": 1.914600258097483e-05, "loss": 0.3947, "step": 2463 }, { "epoch": 0.2998478856099787, "grad_norm": 1.425396203994751, "learning_rate": 1.914521362589808e-05, "loss": 0.4031, "step": 2464 }, { "epoch": 0.29996957712199573, "grad_norm": 1.7659382820129395, "learning_rate": 1.9144424322827016e-05, "loss": 0.4646, "step": 2465 }, { "epoch": 0.30009126863401275, "grad_norm": 0.8104053735733032, "learning_rate": 1.914363467179168e-05, "loss": 0.4547, "step": 2466 }, { "epoch": 0.30021296014602983, "grad_norm": 1.4789149761199951, "learning_rate": 1.914284467282212e-05, "loss": 0.4649, "step": 2467 }, { "epoch": 0.30033465165804685, "grad_norm": 0.697638988494873, "learning_rate": 1.9142054325948393e-05, "loss": 0.4429, "step": 2468 }, { "epoch": 0.30045634317006387, "grad_norm": 1.612952709197998, "learning_rate": 1.9141263631200578e-05, "loss": 0.5509, "step": 2469 }, { "epoch": 0.30057803468208094, "grad_norm": 1.3188133239746094, "learning_rate": 1.914047258860876e-05, "loss": 0.476, "step": 2470 }, { "epoch": 0.30069972619409796, "grad_norm": 0.8661088347434998, "learning_rate": 1.913968119820304e-05, "loss": 0.5148, "step": 2471 }, { "epoch": 0.300821417706115, "grad_norm": 1.7356548309326172, "learning_rate": 1.913888946001353e-05, "loss": 0.4534, "step": 2472 }, { "epoch": 0.30094310921813205, "grad_norm": 2.5952534675598145, "learning_rate": 1.9138097374070364e-05, "loss": 0.4611, "step": 2473 }, { "epoch": 0.3010648007301491, "grad_norm": 1.9293267726898193, "learning_rate": 1.9137304940403678e-05, "loss": 0.4856, "step": 2474 }, { "epoch": 0.3011864922421661, "grad_norm": 0.8100680708885193, "learning_rate": 1.913651215904362e-05, "loss": 0.4947, "step": 2475 }, { "epoch": 0.30130818375418317, "grad_norm": 1.1908873319625854, "learning_rate": 1.9135719030020368e-05, "loss": 0.4405, "step": 2476 }, { "epoch": 0.3014298752662002, "grad_norm": 2.3379483222961426, "learning_rate": 1.9134925553364097e-05, "loss": 0.4851, "step": 2477 }, { "epoch": 0.3015515667782172, "grad_norm": 2.4926347732543945, "learning_rate": 1.9134131729104997e-05, "loss": 0.507, "step": 2478 }, { "epoch": 0.3016732582902343, "grad_norm": 1.1349623203277588, "learning_rate": 1.9133337557273282e-05, "loss": 0.4548, "step": 2479 }, { "epoch": 0.3017949498022513, "grad_norm": 1.875536322593689, "learning_rate": 1.9132543037899166e-05, "loss": 0.4822, "step": 2480 }, { "epoch": 0.3019166413142683, "grad_norm": 2.2871627807617188, "learning_rate": 1.9131748171012882e-05, "loss": 0.5135, "step": 2481 }, { "epoch": 0.3020383328262854, "grad_norm": 1.4449636936187744, "learning_rate": 1.913095295664468e-05, "loss": 0.4997, "step": 2482 }, { "epoch": 0.3021600243383024, "grad_norm": 2.2744860649108887, "learning_rate": 1.913015739482482e-05, "loss": 0.4831, "step": 2483 }, { "epoch": 0.30228171585031943, "grad_norm": 0.9401661157608032, "learning_rate": 1.9129361485583572e-05, "loss": 0.4872, "step": 2484 }, { "epoch": 0.30240340736233645, "grad_norm": 2.0501365661621094, "learning_rate": 1.9128565228951223e-05, "loss": 0.4214, "step": 2485 }, { "epoch": 0.3025250988743535, "grad_norm": 3.1846559047698975, "learning_rate": 1.912776862495807e-05, "loss": 0.4215, "step": 2486 }, { "epoch": 0.30264679038637055, "grad_norm": 2.1140296459198, "learning_rate": 1.912697167363443e-05, "loss": 0.5448, "step": 2487 }, { "epoch": 0.30276848189838756, "grad_norm": 1.8529914617538452, "learning_rate": 1.9126174375010622e-05, "loss": 0.4128, "step": 2488 }, { "epoch": 0.30289017341040464, "grad_norm": 3.9982242584228516, "learning_rate": 1.9125376729116992e-05, "loss": 0.4783, "step": 2489 }, { "epoch": 0.30301186492242166, "grad_norm": 6.011218547821045, "learning_rate": 1.912457873598389e-05, "loss": 0.5774, "step": 2490 }, { "epoch": 0.3031335564344387, "grad_norm": 1.1237566471099854, "learning_rate": 1.9123780395641676e-05, "loss": 0.4281, "step": 2491 }, { "epoch": 0.30325524794645575, "grad_norm": 1.6968867778778076, "learning_rate": 1.9122981708120734e-05, "loss": 0.4728, "step": 2492 }, { "epoch": 0.30337693945847277, "grad_norm": 0.6782594323158264, "learning_rate": 1.9122182673451456e-05, "loss": 0.407, "step": 2493 }, { "epoch": 0.3034986309704898, "grad_norm": 1.179334044456482, "learning_rate": 1.9121383291664245e-05, "loss": 0.4575, "step": 2494 }, { "epoch": 0.30362032248250687, "grad_norm": 0.8710147738456726, "learning_rate": 1.9120583562789522e-05, "loss": 0.4924, "step": 2495 }, { "epoch": 0.3037420139945239, "grad_norm": 2.6657702922821045, "learning_rate": 1.9119783486857712e-05, "loss": 0.446, "step": 2496 }, { "epoch": 0.3038637055065409, "grad_norm": 1.1198617219924927, "learning_rate": 1.9118983063899266e-05, "loss": 0.4392, "step": 2497 }, { "epoch": 0.303985397018558, "grad_norm": 0.7784521579742432, "learning_rate": 1.9118182293944638e-05, "loss": 0.4623, "step": 2498 }, { "epoch": 0.304107088530575, "grad_norm": 1.823186993598938, "learning_rate": 1.9117381177024298e-05, "loss": 0.4761, "step": 2499 }, { "epoch": 0.304228780042592, "grad_norm": 1.5546553134918213, "learning_rate": 1.9116579713168736e-05, "loss": 0.4741, "step": 2500 }, { "epoch": 0.3043504715546091, "grad_norm": 1.006614327430725, "learning_rate": 1.9115777902408442e-05, "loss": 0.4693, "step": 2501 }, { "epoch": 0.3044721630666261, "grad_norm": 0.8246433734893799, "learning_rate": 1.9114975744773937e-05, "loss": 0.4552, "step": 2502 }, { "epoch": 0.30459385457864313, "grad_norm": 0.7124652862548828, "learning_rate": 1.9114173240295732e-05, "loss": 0.4905, "step": 2503 }, { "epoch": 0.30471554609066015, "grad_norm": 2.100367546081543, "learning_rate": 1.9113370389004367e-05, "loss": 0.5163, "step": 2504 }, { "epoch": 0.3048372376026772, "grad_norm": 2.617018938064575, "learning_rate": 1.9112567190930402e-05, "loss": 0.428, "step": 2505 }, { "epoch": 0.30495892911469424, "grad_norm": 2.650043249130249, "learning_rate": 1.911176364610439e-05, "loss": 0.4706, "step": 2506 }, { "epoch": 0.30508062062671126, "grad_norm": 5.9851274490356445, "learning_rate": 1.9110959754556914e-05, "loss": 0.4172, "step": 2507 }, { "epoch": 0.30520231213872834, "grad_norm": 0.9120343923568726, "learning_rate": 1.9110155516318558e-05, "loss": 0.4849, "step": 2508 }, { "epoch": 0.30532400365074536, "grad_norm": 1.3405015468597412, "learning_rate": 1.9109350931419926e-05, "loss": 0.5099, "step": 2509 }, { "epoch": 0.3054456951627624, "grad_norm": 0.6575984954833984, "learning_rate": 1.910854599989164e-05, "loss": 0.4623, "step": 2510 }, { "epoch": 0.30556738667477945, "grad_norm": 0.6552987098693848, "learning_rate": 1.910774072176432e-05, "loss": 0.5027, "step": 2511 }, { "epoch": 0.30568907818679647, "grad_norm": 0.8985508680343628, "learning_rate": 1.9106935097068618e-05, "loss": 0.4469, "step": 2512 }, { "epoch": 0.3058107696988135, "grad_norm": 0.9726141095161438, "learning_rate": 1.9106129125835183e-05, "loss": 0.4518, "step": 2513 }, { "epoch": 0.30593246121083056, "grad_norm": 0.8692079186439514, "learning_rate": 1.9105322808094685e-05, "loss": 0.433, "step": 2514 }, { "epoch": 0.3060541527228476, "grad_norm": 2.1015286445617676, "learning_rate": 1.910451614387781e-05, "loss": 0.4818, "step": 2515 }, { "epoch": 0.3061758442348646, "grad_norm": 0.8361689448356628, "learning_rate": 1.910370913321525e-05, "loss": 0.5331, "step": 2516 }, { "epoch": 0.3062975357468817, "grad_norm": 1.3579707145690918, "learning_rate": 1.910290177613771e-05, "loss": 0.4276, "step": 2517 }, { "epoch": 0.3064192272588987, "grad_norm": 0.7217264175415039, "learning_rate": 1.910209407267592e-05, "loss": 0.4899, "step": 2518 }, { "epoch": 0.3065409187709157, "grad_norm": 1.9349385499954224, "learning_rate": 1.910128602286061e-05, "loss": 0.4995, "step": 2519 }, { "epoch": 0.3066626102829328, "grad_norm": 0.823741614818573, "learning_rate": 1.9100477626722525e-05, "loss": 0.5206, "step": 2520 }, { "epoch": 0.3067843017949498, "grad_norm": 3.0831916332244873, "learning_rate": 1.909966888429243e-05, "loss": 0.4678, "step": 2521 }, { "epoch": 0.30690599330696683, "grad_norm": 4.734895706176758, "learning_rate": 1.9098859795601102e-05, "loss": 0.4239, "step": 2522 }, { "epoch": 0.3070276848189839, "grad_norm": 2.184396982192993, "learning_rate": 1.9098050360679323e-05, "loss": 0.4342, "step": 2523 }, { "epoch": 0.3071493763310009, "grad_norm": 1.0338777303695679, "learning_rate": 1.9097240579557893e-05, "loss": 0.4687, "step": 2524 }, { "epoch": 0.30727106784301794, "grad_norm": 0.7237124443054199, "learning_rate": 1.9096430452267633e-05, "loss": 0.4232, "step": 2525 }, { "epoch": 0.30739275935503496, "grad_norm": 0.7365623712539673, "learning_rate": 1.9095619978839367e-05, "loss": 0.4227, "step": 2526 }, { "epoch": 0.30751445086705204, "grad_norm": 1.2416023015975952, "learning_rate": 1.909480915930393e-05, "loss": 0.4526, "step": 2527 }, { "epoch": 0.30763614237906906, "grad_norm": 2.0551960468292236, "learning_rate": 1.909399799369218e-05, "loss": 0.5196, "step": 2528 }, { "epoch": 0.3077578338910861, "grad_norm": 1.16765558719635, "learning_rate": 1.9093186482034985e-05, "loss": 0.4921, "step": 2529 }, { "epoch": 0.30787952540310315, "grad_norm": 1.5175971984863281, "learning_rate": 1.9092374624363218e-05, "loss": 0.4539, "step": 2530 }, { "epoch": 0.30800121691512017, "grad_norm": 1.5309423208236694, "learning_rate": 1.909156242070778e-05, "loss": 0.496, "step": 2531 }, { "epoch": 0.3081229084271372, "grad_norm": 0.7406741976737976, "learning_rate": 1.9090749871099573e-05, "loss": 0.4751, "step": 2532 }, { "epoch": 0.30824459993915426, "grad_norm": 0.831911027431488, "learning_rate": 1.9089936975569516e-05, "loss": 0.4986, "step": 2533 }, { "epoch": 0.3083662914511713, "grad_norm": 0.7211702466011047, "learning_rate": 1.9089123734148544e-05, "loss": 0.489, "step": 2534 }, { "epoch": 0.3084879829631883, "grad_norm": 1.2845163345336914, "learning_rate": 1.9088310146867597e-05, "loss": 0.4846, "step": 2535 }, { "epoch": 0.3086096744752054, "grad_norm": 2.483842611312866, "learning_rate": 1.9087496213757637e-05, "loss": 0.4313, "step": 2536 }, { "epoch": 0.3087313659872224, "grad_norm": 0.9204269051551819, "learning_rate": 1.9086681934849636e-05, "loss": 0.4669, "step": 2537 }, { "epoch": 0.3088530574992394, "grad_norm": 1.0120172500610352, "learning_rate": 1.9085867310174584e-05, "loss": 0.497, "step": 2538 }, { "epoch": 0.3089747490112565, "grad_norm": 1.2834872007369995, "learning_rate": 1.908505233976347e-05, "loss": 0.5095, "step": 2539 }, { "epoch": 0.3090964405232735, "grad_norm": 1.1929011344909668, "learning_rate": 1.908423702364731e-05, "loss": 0.5056, "step": 2540 }, { "epoch": 0.3092181320352905, "grad_norm": 0.6175735592842102, "learning_rate": 1.908342136185713e-05, "loss": 0.4391, "step": 2541 }, { "epoch": 0.3093398235473076, "grad_norm": 2.8288943767547607, "learning_rate": 1.9082605354423962e-05, "loss": 0.3945, "step": 2542 }, { "epoch": 0.3094615150593246, "grad_norm": 0.8661826848983765, "learning_rate": 1.9081789001378862e-05, "loss": 0.4429, "step": 2543 }, { "epoch": 0.30958320657134164, "grad_norm": 0.6647040247917175, "learning_rate": 1.9080972302752895e-05, "loss": 0.4475, "step": 2544 }, { "epoch": 0.30970489808335866, "grad_norm": 2.2310469150543213, "learning_rate": 1.908015525857713e-05, "loss": 0.504, "step": 2545 }, { "epoch": 0.30982658959537573, "grad_norm": 2.417956590652466, "learning_rate": 1.9079337868882667e-05, "loss": 0.3962, "step": 2546 }, { "epoch": 0.30994828110739275, "grad_norm": 1.634032130241394, "learning_rate": 1.90785201337006e-05, "loss": 0.447, "step": 2547 }, { "epoch": 0.3100699726194098, "grad_norm": 0.724041223526001, "learning_rate": 1.907770205306205e-05, "loss": 0.4854, "step": 2548 }, { "epoch": 0.31019166413142685, "grad_norm": 3.027937889099121, "learning_rate": 1.9076883626998155e-05, "loss": 0.5682, "step": 2549 }, { "epoch": 0.31031335564344387, "grad_norm": 1.0180504322052002, "learning_rate": 1.9076064855540045e-05, "loss": 0.481, "step": 2550 }, { "epoch": 0.3104350471554609, "grad_norm": 0.6276563405990601, "learning_rate": 1.907524573871888e-05, "loss": 0.498, "step": 2551 }, { "epoch": 0.31055673866747796, "grad_norm": 1.1533331871032715, "learning_rate": 1.907442627656583e-05, "loss": 0.482, "step": 2552 }, { "epoch": 0.310678430179495, "grad_norm": 0.6698744893074036, "learning_rate": 1.907360646911208e-05, "loss": 0.4765, "step": 2553 }, { "epoch": 0.310800121691512, "grad_norm": 3.309514045715332, "learning_rate": 1.907278631638882e-05, "loss": 0.4083, "step": 2554 }, { "epoch": 0.3109218132035291, "grad_norm": 0.6981541514396667, "learning_rate": 1.9071965818427263e-05, "loss": 0.4487, "step": 2555 }, { "epoch": 0.3110435047155461, "grad_norm": 3.5589592456817627, "learning_rate": 1.9071144975258626e-05, "loss": 0.5174, "step": 2556 }, { "epoch": 0.3111651962275631, "grad_norm": 3.27559232711792, "learning_rate": 1.907032378691415e-05, "loss": 0.4969, "step": 2557 }, { "epoch": 0.3112868877395802, "grad_norm": 2.0862462520599365, "learning_rate": 1.9069502253425075e-05, "loss": 0.4246, "step": 2558 }, { "epoch": 0.3114085792515972, "grad_norm": 0.8357444405555725, "learning_rate": 1.906868037482267e-05, "loss": 0.4468, "step": 2559 }, { "epoch": 0.3115302707636142, "grad_norm": 4.043797492980957, "learning_rate": 1.9067858151138202e-05, "loss": 0.533, "step": 2560 }, { "epoch": 0.3116519622756313, "grad_norm": 3.5708625316619873, "learning_rate": 1.9067035582402966e-05, "loss": 0.5395, "step": 2561 }, { "epoch": 0.3117736537876483, "grad_norm": 1.6642167568206787, "learning_rate": 1.9066212668648252e-05, "loss": 0.4719, "step": 2562 }, { "epoch": 0.31189534529966534, "grad_norm": 1.824846863746643, "learning_rate": 1.9065389409905388e-05, "loss": 0.504, "step": 2563 }, { "epoch": 0.31201703681168236, "grad_norm": 1.5471199750900269, "learning_rate": 1.906456580620569e-05, "loss": 0.4801, "step": 2564 }, { "epoch": 0.31213872832369943, "grad_norm": 3.82975697517395, "learning_rate": 1.90637418575805e-05, "loss": 0.4872, "step": 2565 }, { "epoch": 0.31226041983571645, "grad_norm": 4.280481815338135, "learning_rate": 1.906291756406117e-05, "loss": 0.4568, "step": 2566 }, { "epoch": 0.31238211134773347, "grad_norm": 3.213447332382202, "learning_rate": 1.906209292567907e-05, "loss": 0.5027, "step": 2567 }, { "epoch": 0.31250380285975055, "grad_norm": 5.017328262329102, "learning_rate": 1.906126794246557e-05, "loss": 0.4715, "step": 2568 }, { "epoch": 0.31262549437176756, "grad_norm": 2.812809467315674, "learning_rate": 1.906044261445208e-05, "loss": 0.4877, "step": 2569 }, { "epoch": 0.3127471858837846, "grad_norm": 3.3021914958953857, "learning_rate": 1.9059616941669984e-05, "loss": 0.4812, "step": 2570 }, { "epoch": 0.31286887739580166, "grad_norm": 0.8027843236923218, "learning_rate": 1.9058790924150717e-05, "loss": 0.4703, "step": 2571 }, { "epoch": 0.3129905689078187, "grad_norm": 1.1845877170562744, "learning_rate": 1.90579645619257e-05, "loss": 0.4588, "step": 2572 }, { "epoch": 0.3131122604198357, "grad_norm": 2.4159934520721436, "learning_rate": 1.905713785502639e-05, "loss": 0.4828, "step": 2573 }, { "epoch": 0.31323395193185277, "grad_norm": 0.750566840171814, "learning_rate": 1.9056310803484232e-05, "loss": 0.4151, "step": 2574 }, { "epoch": 0.3133556434438698, "grad_norm": 1.8776639699935913, "learning_rate": 1.9055483407330705e-05, "loss": 0.4723, "step": 2575 }, { "epoch": 0.3134773349558868, "grad_norm": 5.200119972229004, "learning_rate": 1.9054655666597287e-05, "loss": 0.581, "step": 2576 }, { "epoch": 0.3135990264679039, "grad_norm": 0.7368274331092834, "learning_rate": 1.905382758131548e-05, "loss": 0.463, "step": 2577 }, { "epoch": 0.3137207179799209, "grad_norm": 2.442462921142578, "learning_rate": 1.9052999151516792e-05, "loss": 0.5122, "step": 2578 }, { "epoch": 0.3138424094919379, "grad_norm": 0.9392907619476318, "learning_rate": 1.9052170377232752e-05, "loss": 0.4879, "step": 2579 }, { "epoch": 0.313964101003955, "grad_norm": 2.1264443397521973, "learning_rate": 1.905134125849489e-05, "loss": 0.478, "step": 2580 }, { "epoch": 0.314085792515972, "grad_norm": 1.442296028137207, "learning_rate": 1.9050511795334756e-05, "loss": 0.4788, "step": 2581 }, { "epoch": 0.31420748402798904, "grad_norm": 4.7049946784973145, "learning_rate": 1.9049681987783914e-05, "loss": 0.4229, "step": 2582 }, { "epoch": 0.3143291755400061, "grad_norm": 0.6403811573982239, "learning_rate": 1.9048851835873945e-05, "loss": 0.4954, "step": 2583 }, { "epoch": 0.31445086705202313, "grad_norm": 0.6243477463722229, "learning_rate": 1.9048021339636433e-05, "loss": 0.48, "step": 2584 }, { "epoch": 0.31457255856404015, "grad_norm": 1.9575669765472412, "learning_rate": 1.9047190499102975e-05, "loss": 0.4271, "step": 2585 }, { "epoch": 0.31469425007605717, "grad_norm": 1.1199640035629272, "learning_rate": 1.9046359314305198e-05, "loss": 0.4391, "step": 2586 }, { "epoch": 0.31481594158807424, "grad_norm": 3.7026619911193848, "learning_rate": 1.904552778527472e-05, "loss": 0.5125, "step": 2587 }, { "epoch": 0.31493763310009126, "grad_norm": 3.1253371238708496, "learning_rate": 1.904469591204319e-05, "loss": 0.5123, "step": 2588 }, { "epoch": 0.3150593246121083, "grad_norm": 2.278315782546997, "learning_rate": 1.9043863694642258e-05, "loss": 0.3863, "step": 2589 }, { "epoch": 0.31518101612412536, "grad_norm": 2.6691672801971436, "learning_rate": 1.9043031133103593e-05, "loss": 0.4662, "step": 2590 }, { "epoch": 0.3153027076361424, "grad_norm": 1.2145169973373413, "learning_rate": 1.9042198227458873e-05, "loss": 0.4847, "step": 2591 }, { "epoch": 0.3154243991481594, "grad_norm": 2.419055223464966, "learning_rate": 1.9041364977739792e-05, "loss": 0.4087, "step": 2592 }, { "epoch": 0.31554609066017647, "grad_norm": 1.7537059783935547, "learning_rate": 1.9040531383978063e-05, "loss": 0.5198, "step": 2593 }, { "epoch": 0.3156677821721935, "grad_norm": 2.4825210571289062, "learning_rate": 1.9039697446205405e-05, "loss": 0.4276, "step": 2594 }, { "epoch": 0.3157894736842105, "grad_norm": 2.5851564407348633, "learning_rate": 1.9038863164453544e-05, "loss": 0.4007, "step": 2595 }, { "epoch": 0.3159111651962276, "grad_norm": 2.3171944618225098, "learning_rate": 1.903802853875423e-05, "loss": 0.4155, "step": 2596 }, { "epoch": 0.3160328567082446, "grad_norm": 0.9399721622467041, "learning_rate": 1.9037193569139224e-05, "loss": 0.5118, "step": 2597 }, { "epoch": 0.3161545482202616, "grad_norm": 3.951594114303589, "learning_rate": 1.9036358255640298e-05, "loss": 0.3415, "step": 2598 }, { "epoch": 0.3162762397322787, "grad_norm": 0.9210025072097778, "learning_rate": 1.903552259828923e-05, "loss": 0.4397, "step": 2599 }, { "epoch": 0.3163979312442957, "grad_norm": 3.0612332820892334, "learning_rate": 1.9034686597117832e-05, "loss": 0.496, "step": 2600 }, { "epoch": 0.31651962275631274, "grad_norm": 1.8251466751098633, "learning_rate": 1.90338502521579e-05, "loss": 0.4521, "step": 2601 }, { "epoch": 0.3166413142683298, "grad_norm": 0.791189432144165, "learning_rate": 1.9033013563441276e-05, "loss": 0.4273, "step": 2602 }, { "epoch": 0.31676300578034683, "grad_norm": 1.7600021362304688, "learning_rate": 1.9032176530999782e-05, "loss": 0.4843, "step": 2603 }, { "epoch": 0.31688469729236385, "grad_norm": 2.6301116943359375, "learning_rate": 1.903133915486528e-05, "loss": 0.4079, "step": 2604 }, { "epoch": 0.31700638880438087, "grad_norm": 2.031949758529663, "learning_rate": 1.9030501435069626e-05, "loss": 0.4813, "step": 2605 }, { "epoch": 0.31712808031639794, "grad_norm": 3.4081430435180664, "learning_rate": 1.9029663371644698e-05, "loss": 0.4335, "step": 2606 }, { "epoch": 0.31724977182841496, "grad_norm": 0.8446583151817322, "learning_rate": 1.9028824964622393e-05, "loss": 0.4635, "step": 2607 }, { "epoch": 0.317371463340432, "grad_norm": 2.818190574645996, "learning_rate": 1.902798621403461e-05, "loss": 0.4097, "step": 2608 }, { "epoch": 0.31749315485244906, "grad_norm": 0.8132585287094116, "learning_rate": 1.9027147119913263e-05, "loss": 0.472, "step": 2609 }, { "epoch": 0.3176148463644661, "grad_norm": 1.3215408325195312, "learning_rate": 1.9026307682290284e-05, "loss": 0.4712, "step": 2610 }, { "epoch": 0.3177365378764831, "grad_norm": 1.2061406373977661, "learning_rate": 1.902546790119761e-05, "loss": 0.4666, "step": 2611 }, { "epoch": 0.31785822938850017, "grad_norm": 1.0162789821624756, "learning_rate": 1.9024627776667204e-05, "loss": 0.4524, "step": 2612 }, { "epoch": 0.3179799209005172, "grad_norm": 0.5247098803520203, "learning_rate": 1.902378730873103e-05, "loss": 0.4736, "step": 2613 }, { "epoch": 0.3181016124125342, "grad_norm": 2.5346930027008057, "learning_rate": 1.9022946497421074e-05, "loss": 0.5081, "step": 2614 }, { "epoch": 0.3182233039245513, "grad_norm": 1.1095057725906372, "learning_rate": 1.9022105342769323e-05, "loss": 0.4631, "step": 2615 }, { "epoch": 0.3183449954365683, "grad_norm": 3.6375324726104736, "learning_rate": 1.902126384480779e-05, "loss": 0.413, "step": 2616 }, { "epoch": 0.3184666869485853, "grad_norm": 0.7454293966293335, "learning_rate": 1.9020422003568498e-05, "loss": 0.4436, "step": 2617 }, { "epoch": 0.3185883784606024, "grad_norm": 0.5880517363548279, "learning_rate": 1.9019579819083475e-05, "loss": 0.4692, "step": 2618 }, { "epoch": 0.3187100699726194, "grad_norm": 1.3258553743362427, "learning_rate": 1.901873729138477e-05, "loss": 0.4984, "step": 2619 }, { "epoch": 0.31883176148463643, "grad_norm": 0.8498595356941223, "learning_rate": 1.9017894420504445e-05, "loss": 0.4221, "step": 2620 }, { "epoch": 0.3189534529966535, "grad_norm": 1.8798936605453491, "learning_rate": 1.9017051206474568e-05, "loss": 0.512, "step": 2621 }, { "epoch": 0.3190751445086705, "grad_norm": 2.344435214996338, "learning_rate": 1.901620764932723e-05, "loss": 0.4559, "step": 2622 }, { "epoch": 0.31919683602068755, "grad_norm": 0.6877545118331909, "learning_rate": 1.901536374909453e-05, "loss": 0.5253, "step": 2623 }, { "epoch": 0.3193185275327046, "grad_norm": 0.9242064356803894, "learning_rate": 1.9014519505808575e-05, "loss": 0.504, "step": 2624 }, { "epoch": 0.31944021904472164, "grad_norm": 1.3465564250946045, "learning_rate": 1.9013674919501498e-05, "loss": 0.4877, "step": 2625 }, { "epoch": 0.31956191055673866, "grad_norm": 2.5336482524871826, "learning_rate": 1.901282999020543e-05, "loss": 0.4596, "step": 2626 }, { "epoch": 0.3196836020687557, "grad_norm": 3.471236228942871, "learning_rate": 1.901198471795253e-05, "loss": 0.4903, "step": 2627 }, { "epoch": 0.31980529358077275, "grad_norm": 3.1795148849487305, "learning_rate": 1.9011139102774948e-05, "loss": 0.4349, "step": 2628 }, { "epoch": 0.3199269850927898, "grad_norm": 3.48786997795105, "learning_rate": 1.901029314470488e-05, "loss": 0.4243, "step": 2629 }, { "epoch": 0.3200486766048068, "grad_norm": 1.9860074520111084, "learning_rate": 1.9009446843774503e-05, "loss": 0.5187, "step": 2630 }, { "epoch": 0.32017036811682387, "grad_norm": 1.605178713798523, "learning_rate": 1.900860020001603e-05, "loss": 0.4712, "step": 2631 }, { "epoch": 0.3202920596288409, "grad_norm": 1.0413576364517212, "learning_rate": 1.9007753213461667e-05, "loss": 0.5031, "step": 2632 }, { "epoch": 0.3204137511408579, "grad_norm": 2.5105197429656982, "learning_rate": 1.9006905884143652e-05, "loss": 0.5191, "step": 2633 }, { "epoch": 0.320535442652875, "grad_norm": 2.3670494556427, "learning_rate": 1.9006058212094224e-05, "loss": 0.456, "step": 2634 }, { "epoch": 0.320657134164892, "grad_norm": 3.461168050765991, "learning_rate": 1.900521019734564e-05, "loss": 0.4252, "step": 2635 }, { "epoch": 0.320778825676909, "grad_norm": 1.7053115367889404, "learning_rate": 1.9004361839930167e-05, "loss": 0.4593, "step": 2636 }, { "epoch": 0.3209005171889261, "grad_norm": 1.234446406364441, "learning_rate": 1.900351313988009e-05, "loss": 0.4533, "step": 2637 }, { "epoch": 0.3210222087009431, "grad_norm": 0.5966386795043945, "learning_rate": 1.9002664097227702e-05, "loss": 0.4492, "step": 2638 }, { "epoch": 0.32114390021296013, "grad_norm": 1.6105024814605713, "learning_rate": 1.9001814712005307e-05, "loss": 0.409, "step": 2639 }, { "epoch": 0.3212655917249772, "grad_norm": 2.2848470211029053, "learning_rate": 1.9000964984245235e-05, "loss": 0.4825, "step": 2640 }, { "epoch": 0.3213872832369942, "grad_norm": 3.3710803985595703, "learning_rate": 1.9000114913979812e-05, "loss": 0.5075, "step": 2641 }, { "epoch": 0.32150897474901124, "grad_norm": 3.251838207244873, "learning_rate": 1.8999264501241383e-05, "loss": 0.4821, "step": 2642 }, { "epoch": 0.3216306662610283, "grad_norm": 1.7791250944137573, "learning_rate": 1.899841374606232e-05, "loss": 0.4193, "step": 2643 }, { "epoch": 0.32175235777304534, "grad_norm": 3.4943878650665283, "learning_rate": 1.8997562648474982e-05, "loss": 0.4736, "step": 2644 }, { "epoch": 0.32187404928506236, "grad_norm": 1.01828932762146, "learning_rate": 1.8996711208511763e-05, "loss": 0.4264, "step": 2645 }, { "epoch": 0.3219957407970794, "grad_norm": 0.7106894850730896, "learning_rate": 1.899585942620506e-05, "loss": 0.4713, "step": 2646 }, { "epoch": 0.32211743230909645, "grad_norm": 0.8335497379302979, "learning_rate": 1.8995007301587286e-05, "loss": 0.4871, "step": 2647 }, { "epoch": 0.32223912382111347, "grad_norm": 1.4388506412506104, "learning_rate": 1.8994154834690867e-05, "loss": 0.4841, "step": 2648 }, { "epoch": 0.3223608153331305, "grad_norm": 3.9006192684173584, "learning_rate": 1.899330202554824e-05, "loss": 0.442, "step": 2649 }, { "epoch": 0.32248250684514757, "grad_norm": 4.2960100173950195, "learning_rate": 1.8992448874191852e-05, "loss": 0.4606, "step": 2650 }, { "epoch": 0.3226041983571646, "grad_norm": 3.8702080249786377, "learning_rate": 1.8991595380654173e-05, "loss": 0.4501, "step": 2651 }, { "epoch": 0.3227258898691816, "grad_norm": 0.8250051736831665, "learning_rate": 1.8990741544967683e-05, "loss": 0.5075, "step": 2652 }, { "epoch": 0.3228475813811987, "grad_norm": 1.2080780267715454, "learning_rate": 1.898988736716486e-05, "loss": 0.4404, "step": 2653 }, { "epoch": 0.3229692728932157, "grad_norm": 0.9255439043045044, "learning_rate": 1.8989032847278216e-05, "loss": 0.3903, "step": 2654 }, { "epoch": 0.3230909644052327, "grad_norm": 1.1625902652740479, "learning_rate": 1.8988177985340267e-05, "loss": 0.4181, "step": 2655 }, { "epoch": 0.3232126559172498, "grad_norm": 1.6690421104431152, "learning_rate": 1.898732278138354e-05, "loss": 0.4357, "step": 2656 }, { "epoch": 0.3233343474292668, "grad_norm": 4.66319465637207, "learning_rate": 1.8986467235440585e-05, "loss": 0.5556, "step": 2657 }, { "epoch": 0.32345603894128383, "grad_norm": 3.319766044616699, "learning_rate": 1.8985611347543944e-05, "loss": 0.4759, "step": 2658 }, { "epoch": 0.3235777304533009, "grad_norm": 4.456297874450684, "learning_rate": 1.8984755117726194e-05, "loss": 0.5651, "step": 2659 }, { "epoch": 0.3236994219653179, "grad_norm": 1.5082676410675049, "learning_rate": 1.8983898546019916e-05, "loss": 0.4919, "step": 2660 }, { "epoch": 0.32382111347733494, "grad_norm": 1.5878376960754395, "learning_rate": 1.89830416324577e-05, "loss": 0.5402, "step": 2661 }, { "epoch": 0.323942804989352, "grad_norm": 1.2165762186050415, "learning_rate": 1.8982184377072155e-05, "loss": 0.5297, "step": 2662 }, { "epoch": 0.32406449650136904, "grad_norm": 3.985576629638672, "learning_rate": 1.8981326779895905e-05, "loss": 0.5141, "step": 2663 }, { "epoch": 0.32418618801338606, "grad_norm": 6.296304702758789, "learning_rate": 1.898046884096158e-05, "loss": 0.5078, "step": 2664 }, { "epoch": 0.3243078795254031, "grad_norm": 4.510111331939697, "learning_rate": 1.8979610560301826e-05, "loss": 0.4696, "step": 2665 }, { "epoch": 0.32442957103742015, "grad_norm": 6.386861324310303, "learning_rate": 1.8978751937949304e-05, "loss": 0.4972, "step": 2666 }, { "epoch": 0.32455126254943717, "grad_norm": 4.193247318267822, "learning_rate": 1.8977892973936685e-05, "loss": 0.4859, "step": 2667 }, { "epoch": 0.3246729540614542, "grad_norm": 1.83122718334198, "learning_rate": 1.8977033668296658e-05, "loss": 0.5128, "step": 2668 }, { "epoch": 0.32479464557347126, "grad_norm": 1.1672346591949463, "learning_rate": 1.8976174021061917e-05, "loss": 0.463, "step": 2669 }, { "epoch": 0.3249163370854883, "grad_norm": 0.722151517868042, "learning_rate": 1.8975314032265175e-05, "loss": 0.4263, "step": 2670 }, { "epoch": 0.3250380285975053, "grad_norm": 0.6169853210449219, "learning_rate": 1.8974453701939155e-05, "loss": 0.4185, "step": 2671 }, { "epoch": 0.3251597201095224, "grad_norm": 7.150989055633545, "learning_rate": 1.8973593030116597e-05, "loss": 0.5815, "step": 2672 }, { "epoch": 0.3252814116215394, "grad_norm": 1.5084476470947266, "learning_rate": 1.897273201683025e-05, "loss": 0.4425, "step": 2673 }, { "epoch": 0.3254031031335564, "grad_norm": 2.0572752952575684, "learning_rate": 1.8971870662112873e-05, "loss": 0.4273, "step": 2674 }, { "epoch": 0.3255247946455735, "grad_norm": 3.8607330322265625, "learning_rate": 1.897100896599725e-05, "loss": 0.5148, "step": 2675 }, { "epoch": 0.3256464861575905, "grad_norm": 4.759674072265625, "learning_rate": 1.8970146928516167e-05, "loss": 0.5774, "step": 2676 }, { "epoch": 0.32576817766960753, "grad_norm": 3.9468443393707275, "learning_rate": 1.8969284549702425e-05, "loss": 0.5379, "step": 2677 }, { "epoch": 0.3258898691816246, "grad_norm": 3.6373515129089355, "learning_rate": 1.8968421829588843e-05, "loss": 0.5789, "step": 2678 }, { "epoch": 0.3260115606936416, "grad_norm": 1.2568577527999878, "learning_rate": 1.8967558768208244e-05, "loss": 0.4792, "step": 2679 }, { "epoch": 0.32613325220565864, "grad_norm": 3.928135633468628, "learning_rate": 1.8966695365593476e-05, "loss": 0.4718, "step": 2680 }, { "epoch": 0.3262549437176757, "grad_norm": 2.526728868484497, "learning_rate": 1.8965831621777385e-05, "loss": 0.534, "step": 2681 }, { "epoch": 0.32637663522969274, "grad_norm": 4.106503009796143, "learning_rate": 1.8964967536792845e-05, "loss": 0.5144, "step": 2682 }, { "epoch": 0.32649832674170975, "grad_norm": 1.0128921270370483, "learning_rate": 1.8964103110672734e-05, "loss": 0.4953, "step": 2683 }, { "epoch": 0.32662001825372683, "grad_norm": 0.894976794719696, "learning_rate": 1.8963238343449945e-05, "loss": 0.5, "step": 2684 }, { "epoch": 0.32674170976574385, "grad_norm": 2.012657642364502, "learning_rate": 1.8962373235157382e-05, "loss": 0.4423, "step": 2685 }, { "epoch": 0.32686340127776087, "grad_norm": 1.444948673248291, "learning_rate": 1.8961507785827966e-05, "loss": 0.4272, "step": 2686 }, { "epoch": 0.3269850927897779, "grad_norm": 3.2959814071655273, "learning_rate": 1.896064199549463e-05, "loss": 0.5427, "step": 2687 }, { "epoch": 0.32710678430179496, "grad_norm": 4.652558326721191, "learning_rate": 1.895977586419032e-05, "loss": 0.5288, "step": 2688 }, { "epoch": 0.327228475813812, "grad_norm": 0.7047215104103088, "learning_rate": 1.895890939194799e-05, "loss": 0.3965, "step": 2689 }, { "epoch": 0.327350167325829, "grad_norm": 3.409531354904175, "learning_rate": 1.8958042578800614e-05, "loss": 0.4796, "step": 2690 }, { "epoch": 0.3274718588378461, "grad_norm": 1.6422313451766968, "learning_rate": 1.895717542478118e-05, "loss": 0.4466, "step": 2691 }, { "epoch": 0.3275935503498631, "grad_norm": 5.620854377746582, "learning_rate": 1.8956307929922676e-05, "loss": 0.5593, "step": 2692 }, { "epoch": 0.3277152418618801, "grad_norm": 1.3282305002212524, "learning_rate": 1.895544009425812e-05, "loss": 0.46, "step": 2693 }, { "epoch": 0.3278369333738972, "grad_norm": 1.2437347173690796, "learning_rate": 1.895457191782053e-05, "loss": 0.4019, "step": 2694 }, { "epoch": 0.3279586248859142, "grad_norm": 1.669395923614502, "learning_rate": 1.8953703400642945e-05, "loss": 0.485, "step": 2695 }, { "epoch": 0.3280803163979312, "grad_norm": 1.0959354639053345, "learning_rate": 1.8952834542758413e-05, "loss": 0.452, "step": 2696 }, { "epoch": 0.3282020079099483, "grad_norm": 2.905395984649658, "learning_rate": 1.8951965344199995e-05, "loss": 0.463, "step": 2697 }, { "epoch": 0.3283236994219653, "grad_norm": 2.0541398525238037, "learning_rate": 1.8951095805000762e-05, "loss": 0.4591, "step": 2698 }, { "epoch": 0.32844539093398234, "grad_norm": 0.7949023246765137, "learning_rate": 1.895022592519381e-05, "loss": 0.5016, "step": 2699 }, { "epoch": 0.3285670824459994, "grad_norm": 0.7792330980300903, "learning_rate": 1.8949355704812235e-05, "loss": 0.4759, "step": 2700 }, { "epoch": 0.32868877395801643, "grad_norm": 1.6458280086517334, "learning_rate": 1.894848514388915e-05, "loss": 0.4482, "step": 2701 }, { "epoch": 0.32881046547003345, "grad_norm": 2.4719221591949463, "learning_rate": 1.8947614242457685e-05, "loss": 0.5209, "step": 2702 }, { "epoch": 0.3289321569820505, "grad_norm": 3.014310359954834, "learning_rate": 1.8946743000550975e-05, "loss": 0.4965, "step": 2703 }, { "epoch": 0.32905384849406755, "grad_norm": 0.6068018674850464, "learning_rate": 1.8945871418202174e-05, "loss": 0.4677, "step": 2704 }, { "epoch": 0.32917554000608457, "grad_norm": 1.910712480545044, "learning_rate": 1.894499949544445e-05, "loss": 0.5107, "step": 2705 }, { "epoch": 0.3292972315181016, "grad_norm": 1.9104607105255127, "learning_rate": 1.8944127232310984e-05, "loss": 0.4692, "step": 2706 }, { "epoch": 0.32941892303011866, "grad_norm": 2.0082781314849854, "learning_rate": 1.894325462883496e-05, "loss": 0.4773, "step": 2707 }, { "epoch": 0.3295406145421357, "grad_norm": 0.5868907570838928, "learning_rate": 1.8942381685049587e-05, "loss": 0.4841, "step": 2708 }, { "epoch": 0.3296623060541527, "grad_norm": 0.6337372660636902, "learning_rate": 1.894150840098808e-05, "loss": 0.4778, "step": 2709 }, { "epoch": 0.3297839975661698, "grad_norm": 2.131854772567749, "learning_rate": 1.8940634776683672e-05, "loss": 0.435, "step": 2710 }, { "epoch": 0.3299056890781868, "grad_norm": 3.417280673980713, "learning_rate": 1.89397608121696e-05, "loss": 0.5719, "step": 2711 }, { "epoch": 0.3300273805902038, "grad_norm": 0.8599411845207214, "learning_rate": 1.893888650747913e-05, "loss": 0.4387, "step": 2712 }, { "epoch": 0.3301490721022209, "grad_norm": 2.0451738834381104, "learning_rate": 1.8938011862645527e-05, "loss": 0.4744, "step": 2713 }, { "epoch": 0.3302707636142379, "grad_norm": 1.1521553993225098, "learning_rate": 1.8937136877702066e-05, "loss": 0.4606, "step": 2714 }, { "epoch": 0.3303924551262549, "grad_norm": 0.6685805916786194, "learning_rate": 1.8936261552682053e-05, "loss": 0.4721, "step": 2715 }, { "epoch": 0.330514146638272, "grad_norm": 0.9258437752723694, "learning_rate": 1.893538588761879e-05, "loss": 0.4562, "step": 2716 }, { "epoch": 0.330635838150289, "grad_norm": 0.9971997141838074, "learning_rate": 1.8934509882545597e-05, "loss": 0.4747, "step": 2717 }, { "epoch": 0.33075752966230604, "grad_norm": 2.043272018432617, "learning_rate": 1.893363353749581e-05, "loss": 0.5159, "step": 2718 }, { "epoch": 0.3308792211743231, "grad_norm": 0.5416118502616882, "learning_rate": 1.8932756852502777e-05, "loss": 0.5034, "step": 2719 }, { "epoch": 0.33100091268634013, "grad_norm": 1.4942349195480347, "learning_rate": 1.8931879827599853e-05, "loss": 0.5068, "step": 2720 }, { "epoch": 0.33112260419835715, "grad_norm": 1.0185729265213013, "learning_rate": 1.8931002462820416e-05, "loss": 0.4905, "step": 2721 }, { "epoch": 0.3312442957103742, "grad_norm": 1.0013389587402344, "learning_rate": 1.893012475819785e-05, "loss": 0.5051, "step": 2722 }, { "epoch": 0.33136598722239125, "grad_norm": 0.8700234889984131, "learning_rate": 1.8929246713765548e-05, "loss": 0.4942, "step": 2723 }, { "epoch": 0.33148767873440826, "grad_norm": 1.5754849910736084, "learning_rate": 1.892836832955693e-05, "loss": 0.4665, "step": 2724 }, { "epoch": 0.33160937024642534, "grad_norm": 2.8131306171417236, "learning_rate": 1.8927489605605414e-05, "loss": 0.458, "step": 2725 }, { "epoch": 0.33173106175844236, "grad_norm": 1.2453330755233765, "learning_rate": 1.8926610541944443e-05, "loss": 0.4675, "step": 2726 }, { "epoch": 0.3318527532704594, "grad_norm": 0.7671390771865845, "learning_rate": 1.892573113860746e-05, "loss": 0.4825, "step": 2727 }, { "epoch": 0.3319744447824764, "grad_norm": 1.2482337951660156, "learning_rate": 1.8924851395627932e-05, "loss": 0.4569, "step": 2728 }, { "epoch": 0.33209613629449347, "grad_norm": 0.6935709714889526, "learning_rate": 1.8923971313039336e-05, "loss": 0.4748, "step": 2729 }, { "epoch": 0.3322178278065105, "grad_norm": 0.9674332141876221, "learning_rate": 1.8923090890875164e-05, "loss": 0.506, "step": 2730 }, { "epoch": 0.3323395193185275, "grad_norm": 1.2981398105621338, "learning_rate": 1.892221012916891e-05, "loss": 0.4117, "step": 2731 }, { "epoch": 0.3324612108305446, "grad_norm": 0.8491005897521973, "learning_rate": 1.892132902795409e-05, "loss": 0.474, "step": 2732 }, { "epoch": 0.3325829023425616, "grad_norm": 0.6901066899299622, "learning_rate": 1.892044758726424e-05, "loss": 0.4595, "step": 2733 }, { "epoch": 0.3327045938545786, "grad_norm": 0.8076638579368591, "learning_rate": 1.8919565807132893e-05, "loss": 0.4713, "step": 2734 }, { "epoch": 0.3328262853665957, "grad_norm": 1.3747620582580566, "learning_rate": 1.89186836875936e-05, "loss": 0.5038, "step": 2735 }, { "epoch": 0.3329479768786127, "grad_norm": 0.7989272475242615, "learning_rate": 1.891780122867994e-05, "loss": 0.4633, "step": 2736 }, { "epoch": 0.33306966839062974, "grad_norm": 0.6159539222717285, "learning_rate": 1.891691843042548e-05, "loss": 0.5099, "step": 2737 }, { "epoch": 0.3331913599026468, "grad_norm": 4.381326198577881, "learning_rate": 1.8916035292863816e-05, "loss": 0.4413, "step": 2738 }, { "epoch": 0.33331305141466383, "grad_norm": 1.493550181388855, "learning_rate": 1.8915151816028554e-05, "loss": 0.4773, "step": 2739 }, { "epoch": 0.33343474292668085, "grad_norm": 2.293708086013794, "learning_rate": 1.8914267999953314e-05, "loss": 0.4657, "step": 2740 }, { "epoch": 0.3335564344386979, "grad_norm": 1.616192102432251, "learning_rate": 1.8913383844671722e-05, "loss": 0.4863, "step": 2741 }, { "epoch": 0.33367812595071494, "grad_norm": 1.026611566543579, "learning_rate": 1.8912499350217427e-05, "loss": 0.4974, "step": 2742 }, { "epoch": 0.33379981746273196, "grad_norm": 1.9575178623199463, "learning_rate": 1.8911614516624085e-05, "loss": 0.5133, "step": 2743 }, { "epoch": 0.33392150897474904, "grad_norm": 2.692417860031128, "learning_rate": 1.8910729343925366e-05, "loss": 0.5383, "step": 2744 }, { "epoch": 0.33404320048676606, "grad_norm": 0.7036192417144775, "learning_rate": 1.8909843832154946e-05, "loss": 0.4751, "step": 2745 }, { "epoch": 0.3341648919987831, "grad_norm": 1.0749365091323853, "learning_rate": 1.890895798134653e-05, "loss": 0.4912, "step": 2746 }, { "epoch": 0.3342865835108001, "grad_norm": 2.491318702697754, "learning_rate": 1.890807179153382e-05, "loss": 0.4296, "step": 2747 }, { "epoch": 0.33440827502281717, "grad_norm": 2.35545015335083, "learning_rate": 1.890718526275054e-05, "loss": 0.4132, "step": 2748 }, { "epoch": 0.3345299665348342, "grad_norm": 0.6383217573165894, "learning_rate": 1.8906298395030424e-05, "loss": 0.4687, "step": 2749 }, { "epoch": 0.3346516580468512, "grad_norm": 2.1951515674591064, "learning_rate": 1.8905411188407223e-05, "loss": 0.4882, "step": 2750 }, { "epoch": 0.3347733495588683, "grad_norm": 1.6575039625167847, "learning_rate": 1.8904523642914688e-05, "loss": 0.493, "step": 2751 }, { "epoch": 0.3348950410708853, "grad_norm": 1.6144286394119263, "learning_rate": 1.89036357585866e-05, "loss": 0.4798, "step": 2752 }, { "epoch": 0.3350167325829023, "grad_norm": 2.8362350463867188, "learning_rate": 1.890274753545674e-05, "loss": 0.5066, "step": 2753 }, { "epoch": 0.3351384240949194, "grad_norm": 4.57258939743042, "learning_rate": 1.8901858973558908e-05, "loss": 0.3973, "step": 2754 }, { "epoch": 0.3352601156069364, "grad_norm": 0.7866657376289368, "learning_rate": 1.890097007292692e-05, "loss": 0.5257, "step": 2755 }, { "epoch": 0.33538180711895343, "grad_norm": 0.8888288736343384, "learning_rate": 1.890008083359459e-05, "loss": 0.5212, "step": 2756 }, { "epoch": 0.3355034986309705, "grad_norm": 2.7362277507781982, "learning_rate": 1.8899191255595765e-05, "loss": 0.4664, "step": 2757 }, { "epoch": 0.33562519014298753, "grad_norm": 2.118499994277954, "learning_rate": 1.8898301338964294e-05, "loss": 0.5035, "step": 2758 }, { "epoch": 0.33574688165500455, "grad_norm": 0.7798293232917786, "learning_rate": 1.889741108373404e-05, "loss": 0.4654, "step": 2759 }, { "epoch": 0.3358685731670216, "grad_norm": 1.1053814888000488, "learning_rate": 1.8896520489938875e-05, "loss": 0.4615, "step": 2760 }, { "epoch": 0.33599026467903864, "grad_norm": 1.2729030847549438, "learning_rate": 1.889562955761269e-05, "loss": 0.4872, "step": 2761 }, { "epoch": 0.33611195619105566, "grad_norm": 0.782295286655426, "learning_rate": 1.8894738286789392e-05, "loss": 0.4995, "step": 2762 }, { "epoch": 0.33623364770307274, "grad_norm": 0.6598064303398132, "learning_rate": 1.889384667750289e-05, "loss": 0.418, "step": 2763 }, { "epoch": 0.33635533921508975, "grad_norm": 3.087890863418579, "learning_rate": 1.889295472978711e-05, "loss": 0.5344, "step": 2764 }, { "epoch": 0.3364770307271068, "grad_norm": 0.7532784938812256, "learning_rate": 1.8892062443675994e-05, "loss": 0.4529, "step": 2765 }, { "epoch": 0.33659872223912385, "grad_norm": 2.8095479011535645, "learning_rate": 1.8891169819203495e-05, "loss": 0.5191, "step": 2766 }, { "epoch": 0.33672041375114087, "grad_norm": 1.919663429260254, "learning_rate": 1.8890276856403587e-05, "loss": 0.5213, "step": 2767 }, { "epoch": 0.3368421052631579, "grad_norm": 4.026738166809082, "learning_rate": 1.888938355531024e-05, "loss": 0.445, "step": 2768 }, { "epoch": 0.3369637967751749, "grad_norm": 2.9974539279937744, "learning_rate": 1.8888489915957445e-05, "loss": 0.4558, "step": 2769 }, { "epoch": 0.337085488287192, "grad_norm": 4.151406288146973, "learning_rate": 1.8887595938379215e-05, "loss": 0.4215, "step": 2770 }, { "epoch": 0.337207179799209, "grad_norm": 1.079087495803833, "learning_rate": 1.888670162260956e-05, "loss": 0.4717, "step": 2771 }, { "epoch": 0.337328871311226, "grad_norm": 1.5083805322647095, "learning_rate": 1.888580696868252e-05, "loss": 0.461, "step": 2772 }, { "epoch": 0.3374505628232431, "grad_norm": 1.3275517225265503, "learning_rate": 1.888491197663213e-05, "loss": 0.4833, "step": 2773 }, { "epoch": 0.3375722543352601, "grad_norm": 4.457827091217041, "learning_rate": 1.8884016646492443e-05, "loss": 0.5247, "step": 2774 }, { "epoch": 0.33769394584727713, "grad_norm": 1.0470054149627686, "learning_rate": 1.8883120978297538e-05, "loss": 0.4699, "step": 2775 }, { "epoch": 0.3378156373592942, "grad_norm": 3.0794103145599365, "learning_rate": 1.888222497208149e-05, "loss": 0.5117, "step": 2776 }, { "epoch": 0.3379373288713112, "grad_norm": 1.5797334909439087, "learning_rate": 1.88813286278784e-05, "loss": 0.5184, "step": 2777 }, { "epoch": 0.33805902038332825, "grad_norm": 0.5934585332870483, "learning_rate": 1.888043194572237e-05, "loss": 0.4778, "step": 2778 }, { "epoch": 0.3381807118953453, "grad_norm": 0.6023896336555481, "learning_rate": 1.8879534925647526e-05, "loss": 0.5243, "step": 2779 }, { "epoch": 0.33830240340736234, "grad_norm": 3.0522515773773193, "learning_rate": 1.8878637567687997e-05, "loss": 0.4728, "step": 2780 }, { "epoch": 0.33842409491937936, "grad_norm": 2.841778516769409, "learning_rate": 1.887773987187793e-05, "loss": 0.4514, "step": 2781 }, { "epoch": 0.33854578643139643, "grad_norm": 1.7480906248092651, "learning_rate": 1.8876841838251483e-05, "loss": 0.4862, "step": 2782 }, { "epoch": 0.33866747794341345, "grad_norm": 1.4940919876098633, "learning_rate": 1.8875943466842833e-05, "loss": 0.5177, "step": 2783 }, { "epoch": 0.33878916945543047, "grad_norm": 2.3407397270202637, "learning_rate": 1.8875044757686162e-05, "loss": 0.5175, "step": 2784 }, { "epoch": 0.33891086096744755, "grad_norm": 1.7706379890441895, "learning_rate": 1.8874145710815666e-05, "loss": 0.4893, "step": 2785 }, { "epoch": 0.33903255247946457, "grad_norm": 1.0196385383605957, "learning_rate": 1.8873246326265558e-05, "loss": 0.4712, "step": 2786 }, { "epoch": 0.3391542439914816, "grad_norm": 0.5567321181297302, "learning_rate": 1.887234660407006e-05, "loss": 0.4105, "step": 2787 }, { "epoch": 0.3392759355034986, "grad_norm": 3.6515402793884277, "learning_rate": 1.887144654426341e-05, "loss": 0.5096, "step": 2788 }, { "epoch": 0.3393976270155157, "grad_norm": 2.8964643478393555, "learning_rate": 1.8870546146879854e-05, "loss": 0.5019, "step": 2789 }, { "epoch": 0.3395193185275327, "grad_norm": 4.397404193878174, "learning_rate": 1.886964541195366e-05, "loss": 0.5012, "step": 2790 }, { "epoch": 0.3396410100395497, "grad_norm": 3.022035837173462, "learning_rate": 1.8868744339519094e-05, "loss": 0.5045, "step": 2791 }, { "epoch": 0.3397627015515668, "grad_norm": 0.6503461003303528, "learning_rate": 1.8867842929610454e-05, "loss": 0.4702, "step": 2792 }, { "epoch": 0.3398843930635838, "grad_norm": 1.7951877117156982, "learning_rate": 1.886694118226203e-05, "loss": 0.5322, "step": 2793 }, { "epoch": 0.34000608457560083, "grad_norm": 1.2949520349502563, "learning_rate": 1.8866039097508145e-05, "loss": 0.4826, "step": 2794 }, { "epoch": 0.3401277760876179, "grad_norm": 4.235204219818115, "learning_rate": 1.8865136675383118e-05, "loss": 0.4421, "step": 2795 }, { "epoch": 0.3402494675996349, "grad_norm": 2.6653316020965576, "learning_rate": 1.886423391592129e-05, "loss": 0.4239, "step": 2796 }, { "epoch": 0.34037115911165194, "grad_norm": 5.5826263427734375, "learning_rate": 1.8863330819157017e-05, "loss": 0.4413, "step": 2797 }, { "epoch": 0.340492850623669, "grad_norm": 2.973708152770996, "learning_rate": 1.8862427385124656e-05, "loss": 0.4527, "step": 2798 }, { "epoch": 0.34061454213568604, "grad_norm": 3.3249263763427734, "learning_rate": 1.8861523613858592e-05, "loss": 0.4075, "step": 2799 }, { "epoch": 0.34073623364770306, "grad_norm": 0.6413004398345947, "learning_rate": 1.8860619505393208e-05, "loss": 0.4784, "step": 2800 }, { "epoch": 0.34085792515972013, "grad_norm": 3.2820169925689697, "learning_rate": 1.8859715059762914e-05, "loss": 0.5052, "step": 2801 }, { "epoch": 0.34097961667173715, "grad_norm": 3.1755149364471436, "learning_rate": 1.8858810277002126e-05, "loss": 0.5167, "step": 2802 }, { "epoch": 0.34110130818375417, "grad_norm": 2.0215020179748535, "learning_rate": 1.8857905157145267e-05, "loss": 0.5109, "step": 2803 }, { "epoch": 0.34122299969577125, "grad_norm": 3.8230559825897217, "learning_rate": 1.8856999700226782e-05, "loss": 0.5325, "step": 2804 }, { "epoch": 0.34134469120778826, "grad_norm": 0.9028104543685913, "learning_rate": 1.8856093906281128e-05, "loss": 0.4747, "step": 2805 }, { "epoch": 0.3414663827198053, "grad_norm": 1.2290637493133545, "learning_rate": 1.8855187775342765e-05, "loss": 0.4252, "step": 2806 }, { "epoch": 0.3415880742318223, "grad_norm": 1.887423038482666, "learning_rate": 1.885428130744618e-05, "loss": 0.54, "step": 2807 }, { "epoch": 0.3417097657438394, "grad_norm": 0.7684519290924072, "learning_rate": 1.8853374502625868e-05, "loss": 0.5034, "step": 2808 }, { "epoch": 0.3418314572558564, "grad_norm": 3.2190868854522705, "learning_rate": 1.8852467360916327e-05, "loss": 0.4629, "step": 2809 }, { "epoch": 0.3419531487678734, "grad_norm": 0.6683231592178345, "learning_rate": 1.885155988235208e-05, "loss": 0.5333, "step": 2810 }, { "epoch": 0.3420748402798905, "grad_norm": 1.9942041635513306, "learning_rate": 1.8850652066967656e-05, "loss": 0.5002, "step": 2811 }, { "epoch": 0.3421965317919075, "grad_norm": 2.207380533218384, "learning_rate": 1.8849743914797603e-05, "loss": 0.5059, "step": 2812 }, { "epoch": 0.34231822330392453, "grad_norm": 1.8366376161575317, "learning_rate": 1.8848835425876474e-05, "loss": 0.5013, "step": 2813 }, { "epoch": 0.3424399148159416, "grad_norm": 2.434509515762329, "learning_rate": 1.8847926600238843e-05, "loss": 0.4347, "step": 2814 }, { "epoch": 0.3425616063279586, "grad_norm": 1.484734296798706, "learning_rate": 1.884701743791929e-05, "loss": 0.4256, "step": 2815 }, { "epoch": 0.34268329783997564, "grad_norm": 1.5371334552764893, "learning_rate": 1.8846107938952412e-05, "loss": 0.4507, "step": 2816 }, { "epoch": 0.3428049893519927, "grad_norm": 4.77773904800415, "learning_rate": 1.8845198103372814e-05, "loss": 0.5031, "step": 2817 }, { "epoch": 0.34292668086400974, "grad_norm": 4.3006157875061035, "learning_rate": 1.8844287931215123e-05, "loss": 0.5207, "step": 2818 }, { "epoch": 0.34304837237602676, "grad_norm": 0.9419856667518616, "learning_rate": 1.884337742251397e-05, "loss": 0.3967, "step": 2819 }, { "epoch": 0.34317006388804383, "grad_norm": 4.042980670928955, "learning_rate": 1.8842466577303997e-05, "loss": 0.5164, "step": 2820 }, { "epoch": 0.34329175540006085, "grad_norm": 1.0231975317001343, "learning_rate": 1.884155539561987e-05, "loss": 0.4028, "step": 2821 }, { "epoch": 0.34341344691207787, "grad_norm": 3.537090301513672, "learning_rate": 1.8840643877496258e-05, "loss": 0.4964, "step": 2822 }, { "epoch": 0.34353513842409494, "grad_norm": 2.2879130840301514, "learning_rate": 1.883973202296785e-05, "loss": 0.5221, "step": 2823 }, { "epoch": 0.34365682993611196, "grad_norm": 1.6462324857711792, "learning_rate": 1.883881983206934e-05, "loss": 0.482, "step": 2824 }, { "epoch": 0.343778521448129, "grad_norm": 2.8886613845825195, "learning_rate": 1.883790730483544e-05, "loss": 0.482, "step": 2825 }, { "epoch": 0.34390021296014606, "grad_norm": 1.7604490518569946, "learning_rate": 1.8836994441300875e-05, "loss": 0.5086, "step": 2826 }, { "epoch": 0.3440219044721631, "grad_norm": 3.2546560764312744, "learning_rate": 1.8836081241500378e-05, "loss": 0.5067, "step": 2827 }, { "epoch": 0.3441435959841801, "grad_norm": 3.459789991378784, "learning_rate": 1.88351677054687e-05, "loss": 0.4771, "step": 2828 }, { "epoch": 0.3442652874961971, "grad_norm": 2.373236894607544, "learning_rate": 1.8834253833240603e-05, "loss": 0.4621, "step": 2829 }, { "epoch": 0.3443869790082142, "grad_norm": 1.322063684463501, "learning_rate": 1.8833339624850864e-05, "loss": 0.5272, "step": 2830 }, { "epoch": 0.3445086705202312, "grad_norm": 1.5012385845184326, "learning_rate": 1.8832425080334266e-05, "loss": 0.411, "step": 2831 }, { "epoch": 0.3446303620322482, "grad_norm": 1.3234150409698486, "learning_rate": 1.883151019972561e-05, "loss": 0.4698, "step": 2832 }, { "epoch": 0.3447520535442653, "grad_norm": 2.324329137802124, "learning_rate": 1.8830594983059712e-05, "loss": 0.4496, "step": 2833 }, { "epoch": 0.3448737450562823, "grad_norm": 1.3122886419296265, "learning_rate": 1.88296794303714e-05, "loss": 0.4643, "step": 2834 }, { "epoch": 0.34499543656829934, "grad_norm": 1.234619140625, "learning_rate": 1.8828763541695507e-05, "loss": 0.4623, "step": 2835 }, { "epoch": 0.3451171280803164, "grad_norm": 3.562305450439453, "learning_rate": 1.8827847317066887e-05, "loss": 0.5686, "step": 2836 }, { "epoch": 0.34523881959233343, "grad_norm": 2.6911988258361816, "learning_rate": 1.8826930756520402e-05, "loss": 0.5208, "step": 2837 }, { "epoch": 0.34536051110435045, "grad_norm": 1.0738780498504639, "learning_rate": 1.882601386009093e-05, "loss": 0.5003, "step": 2838 }, { "epoch": 0.34548220261636753, "grad_norm": 2.8758702278137207, "learning_rate": 1.8825096627813366e-05, "loss": 0.5024, "step": 2839 }, { "epoch": 0.34560389412838455, "grad_norm": 2.8085744380950928, "learning_rate": 1.8824179059722607e-05, "loss": 0.4672, "step": 2840 }, { "epoch": 0.34572558564040157, "grad_norm": 1.3944097757339478, "learning_rate": 1.882326115585357e-05, "loss": 0.4967, "step": 2841 }, { "epoch": 0.34584727715241864, "grad_norm": 1.1971148252487183, "learning_rate": 1.882234291624118e-05, "loss": 0.4531, "step": 2842 }, { "epoch": 0.34596896866443566, "grad_norm": 0.6625984907150269, "learning_rate": 1.8821424340920385e-05, "loss": 0.4953, "step": 2843 }, { "epoch": 0.3460906601764527, "grad_norm": 3.236550807952881, "learning_rate": 1.882050542992613e-05, "loss": 0.3911, "step": 2844 }, { "epoch": 0.34621235168846975, "grad_norm": 3.334770441055298, "learning_rate": 1.881958618329339e-05, "loss": 0.4725, "step": 2845 }, { "epoch": 0.3463340432004868, "grad_norm": 1.0479373931884766, "learning_rate": 1.8818666601057138e-05, "loss": 0.4473, "step": 2846 }, { "epoch": 0.3464557347125038, "grad_norm": 4.242272853851318, "learning_rate": 1.8817746683252368e-05, "loss": 0.4854, "step": 2847 }, { "epoch": 0.3465774262245208, "grad_norm": 2.097931385040283, "learning_rate": 1.8816826429914087e-05, "loss": 0.4628, "step": 2848 }, { "epoch": 0.3466991177365379, "grad_norm": 0.7699806094169617, "learning_rate": 1.8815905841077307e-05, "loss": 0.4293, "step": 2849 }, { "epoch": 0.3468208092485549, "grad_norm": 1.3720494508743286, "learning_rate": 1.8814984916777062e-05, "loss": 0.4073, "step": 2850 }, { "epoch": 0.3469425007605719, "grad_norm": 0.8056511282920837, "learning_rate": 1.88140636570484e-05, "loss": 0.4344, "step": 2851 }, { "epoch": 0.347064192272589, "grad_norm": 0.6837261915206909, "learning_rate": 1.8813142061926365e-05, "loss": 0.4409, "step": 2852 }, { "epoch": 0.347185883784606, "grad_norm": 1.1518371105194092, "learning_rate": 1.8812220131446036e-05, "loss": 0.4991, "step": 2853 }, { "epoch": 0.34730757529662304, "grad_norm": 1.1454092264175415, "learning_rate": 1.8811297865642494e-05, "loss": 0.4919, "step": 2854 }, { "epoch": 0.3474292668086401, "grad_norm": 2.137289047241211, "learning_rate": 1.8810375264550823e-05, "loss": 0.4316, "step": 2855 }, { "epoch": 0.34755095832065713, "grad_norm": 1.0075408220291138, "learning_rate": 1.880945232820614e-05, "loss": 0.5358, "step": 2856 }, { "epoch": 0.34767264983267415, "grad_norm": 0.9391731023788452, "learning_rate": 1.8808529056643564e-05, "loss": 0.5343, "step": 2857 }, { "epoch": 0.3477943413446912, "grad_norm": 1.0174161195755005, "learning_rate": 1.8807605449898222e-05, "loss": 0.527, "step": 2858 }, { "epoch": 0.34791603285670825, "grad_norm": 1.3289825916290283, "learning_rate": 1.880668150800526e-05, "loss": 0.4528, "step": 2859 }, { "epoch": 0.34803772436872527, "grad_norm": 0.6193122863769531, "learning_rate": 1.880575723099984e-05, "loss": 0.4765, "step": 2860 }, { "epoch": 0.34815941588074234, "grad_norm": 0.7730928659439087, "learning_rate": 1.8804832618917133e-05, "loss": 0.4744, "step": 2861 }, { "epoch": 0.34828110739275936, "grad_norm": 2.7276222705841064, "learning_rate": 1.8803907671792317e-05, "loss": 0.541, "step": 2862 }, { "epoch": 0.3484027989047764, "grad_norm": 0.9313828945159912, "learning_rate": 1.880298238966059e-05, "loss": 0.4434, "step": 2863 }, { "epoch": 0.34852449041679345, "grad_norm": 0.7989535331726074, "learning_rate": 1.8802056772557162e-05, "loss": 0.4701, "step": 2864 }, { "epoch": 0.3486461819288105, "grad_norm": 2.94881010055542, "learning_rate": 1.8801130820517256e-05, "loss": 0.5601, "step": 2865 }, { "epoch": 0.3487678734408275, "grad_norm": 0.5548211336135864, "learning_rate": 1.8800204533576104e-05, "loss": 0.4637, "step": 2866 }, { "epoch": 0.34888956495284457, "grad_norm": 2.4702563285827637, "learning_rate": 1.8799277911768957e-05, "loss": 0.4482, "step": 2867 }, { "epoch": 0.3490112564648616, "grad_norm": 0.616993248462677, "learning_rate": 1.8798350955131068e-05, "loss": 0.4901, "step": 2868 }, { "epoch": 0.3491329479768786, "grad_norm": 0.8367584347724915, "learning_rate": 1.8797423663697714e-05, "loss": 0.4935, "step": 2869 }, { "epoch": 0.3492546394888956, "grad_norm": 3.479548454284668, "learning_rate": 1.8796496037504184e-05, "loss": 0.4365, "step": 2870 }, { "epoch": 0.3493763310009127, "grad_norm": 1.181849479675293, "learning_rate": 1.8795568076585767e-05, "loss": 0.4605, "step": 2871 }, { "epoch": 0.3494980225129297, "grad_norm": 2.44211745262146, "learning_rate": 1.8794639780977782e-05, "loss": 0.441, "step": 2872 }, { "epoch": 0.34961971402494674, "grad_norm": 2.3819055557250977, "learning_rate": 1.8793711150715546e-05, "loss": 0.5386, "step": 2873 }, { "epoch": 0.3497414055369638, "grad_norm": 0.6934733986854553, "learning_rate": 1.8792782185834402e-05, "loss": 0.4636, "step": 2874 }, { "epoch": 0.34986309704898083, "grad_norm": 0.6664032936096191, "learning_rate": 1.8791852886369697e-05, "loss": 0.4538, "step": 2875 }, { "epoch": 0.34998478856099785, "grad_norm": 2.2818241119384766, "learning_rate": 1.879092325235679e-05, "loss": 0.4985, "step": 2876 }, { "epoch": 0.3501064800730149, "grad_norm": 2.093757152557373, "learning_rate": 1.8789993283831056e-05, "loss": 0.4957, "step": 2877 }, { "epoch": 0.35022817158503194, "grad_norm": 2.0471956729888916, "learning_rate": 1.8789062980827882e-05, "loss": 0.4531, "step": 2878 }, { "epoch": 0.35034986309704896, "grad_norm": 0.890379011631012, "learning_rate": 1.8788132343382672e-05, "loss": 0.4521, "step": 2879 }, { "epoch": 0.35047155460906604, "grad_norm": 0.76273113489151, "learning_rate": 1.8787201371530834e-05, "loss": 0.4405, "step": 2880 }, { "epoch": 0.35059324612108306, "grad_norm": 0.5440983176231384, "learning_rate": 1.8786270065307797e-05, "loss": 0.4935, "step": 2881 }, { "epoch": 0.3507149376331001, "grad_norm": 0.6818732023239136, "learning_rate": 1.8785338424748997e-05, "loss": 0.5389, "step": 2882 }, { "epoch": 0.35083662914511715, "grad_norm": 1.2945020198822021, "learning_rate": 1.8784406449889884e-05, "loss": 0.4481, "step": 2883 }, { "epoch": 0.35095832065713417, "grad_norm": 1.3245103359222412, "learning_rate": 1.8783474140765925e-05, "loss": 0.4908, "step": 2884 }, { "epoch": 0.3510800121691512, "grad_norm": 1.9959620237350464, "learning_rate": 1.878254149741259e-05, "loss": 0.4373, "step": 2885 }, { "epoch": 0.35120170368116826, "grad_norm": 1.1753344535827637, "learning_rate": 1.878160851986538e-05, "loss": 0.4317, "step": 2886 }, { "epoch": 0.3513233951931853, "grad_norm": 0.8848109841346741, "learning_rate": 1.878067520815978e-05, "loss": 0.4722, "step": 2887 }, { "epoch": 0.3514450867052023, "grad_norm": 1.23782479763031, "learning_rate": 1.8779741562331318e-05, "loss": 0.47, "step": 2888 }, { "epoch": 0.3515667782172193, "grad_norm": 0.7877939343452454, "learning_rate": 1.8778807582415517e-05, "loss": 0.4515, "step": 2889 }, { "epoch": 0.3516884697292364, "grad_norm": 1.2659249305725098, "learning_rate": 1.8777873268447914e-05, "loss": 0.4937, "step": 2890 }, { "epoch": 0.3518101612412534, "grad_norm": 0.7893572449684143, "learning_rate": 1.8776938620464065e-05, "loss": 0.5063, "step": 2891 }, { "epoch": 0.35193185275327044, "grad_norm": 2.6077706813812256, "learning_rate": 1.8776003638499534e-05, "loss": 0.4431, "step": 2892 }, { "epoch": 0.3520535442652875, "grad_norm": 1.8502311706542969, "learning_rate": 1.8775068322589898e-05, "loss": 0.5301, "step": 2893 }, { "epoch": 0.35217523577730453, "grad_norm": 0.7246078252792358, "learning_rate": 1.877413267277075e-05, "loss": 0.4604, "step": 2894 }, { "epoch": 0.35229692728932155, "grad_norm": 0.6087958216667175, "learning_rate": 1.8773196689077692e-05, "loss": 0.4996, "step": 2895 }, { "epoch": 0.3524186188013386, "grad_norm": 1.547107458114624, "learning_rate": 1.8772260371546344e-05, "loss": 0.4747, "step": 2896 }, { "epoch": 0.35254031031335564, "grad_norm": 0.6520598530769348, "learning_rate": 1.8771323720212326e-05, "loss": 0.5005, "step": 2897 }, { "epoch": 0.35266200182537266, "grad_norm": 3.044903039932251, "learning_rate": 1.877038673511129e-05, "loss": 0.5054, "step": 2898 }, { "epoch": 0.35278369333738974, "grad_norm": 1.189260482788086, "learning_rate": 1.8769449416278883e-05, "loss": 0.481, "step": 2899 }, { "epoch": 0.35290538484940676, "grad_norm": 0.6019036173820496, "learning_rate": 1.8768511763750772e-05, "loss": 0.4801, "step": 2900 }, { "epoch": 0.3530270763614238, "grad_norm": 0.663816511631012, "learning_rate": 1.876757377756264e-05, "loss": 0.4882, "step": 2901 }, { "epoch": 0.35314876787344085, "grad_norm": 3.2873332500457764, "learning_rate": 1.8766635457750177e-05, "loss": 0.5305, "step": 2902 }, { "epoch": 0.35327045938545787, "grad_norm": 3.3045499324798584, "learning_rate": 1.8765696804349088e-05, "loss": 0.5321, "step": 2903 }, { "epoch": 0.3533921508974749, "grad_norm": 3.791252374649048, "learning_rate": 1.8764757817395094e-05, "loss": 0.5519, "step": 2904 }, { "epoch": 0.35351384240949196, "grad_norm": 0.915611207485199, "learning_rate": 1.8763818496923924e-05, "loss": 0.4589, "step": 2905 }, { "epoch": 0.353635533921509, "grad_norm": 0.6651073098182678, "learning_rate": 1.876287884297132e-05, "loss": 0.4832, "step": 2906 }, { "epoch": 0.353757225433526, "grad_norm": 1.3482612371444702, "learning_rate": 1.8761938855573033e-05, "loss": 0.482, "step": 2907 }, { "epoch": 0.3538789169455431, "grad_norm": 2.7918756008148193, "learning_rate": 1.876099853476484e-05, "loss": 0.4702, "step": 2908 }, { "epoch": 0.3540006084575601, "grad_norm": 1.6856144666671753, "learning_rate": 1.876005788058252e-05, "loss": 0.5149, "step": 2909 }, { "epoch": 0.3541222999695771, "grad_norm": 1.2886600494384766, "learning_rate": 1.875911689306186e-05, "loss": 0.4733, "step": 2910 }, { "epoch": 0.35424399148159413, "grad_norm": 0.736177384853363, "learning_rate": 1.875817557223868e-05, "loss": 0.4227, "step": 2911 }, { "epoch": 0.3543656829936112, "grad_norm": 1.5236917734146118, "learning_rate": 1.8757233918148784e-05, "loss": 0.4576, "step": 2912 }, { "epoch": 0.3544873745056282, "grad_norm": 2.465754270553589, "learning_rate": 1.8756291930828013e-05, "loss": 0.4282, "step": 2913 }, { "epoch": 0.35460906601764525, "grad_norm": 3.590388536453247, "learning_rate": 1.875534961031221e-05, "loss": 0.4917, "step": 2914 }, { "epoch": 0.3547307575296623, "grad_norm": 3.2868614196777344, "learning_rate": 1.875440695663723e-05, "loss": 0.4756, "step": 2915 }, { "epoch": 0.35485244904167934, "grad_norm": 3.7432565689086914, "learning_rate": 1.8753463969838946e-05, "loss": 0.4873, "step": 2916 }, { "epoch": 0.35497414055369636, "grad_norm": 1.5701254606246948, "learning_rate": 1.875252064995324e-05, "loss": 0.4789, "step": 2917 }, { "epoch": 0.35509583206571343, "grad_norm": 2.1755101680755615, "learning_rate": 1.8751576997016007e-05, "loss": 0.4393, "step": 2918 }, { "epoch": 0.35521752357773045, "grad_norm": 4.6662821769714355, "learning_rate": 1.875063301106315e-05, "loss": 0.424, "step": 2919 }, { "epoch": 0.3553392150897475, "grad_norm": 1.7847932577133179, "learning_rate": 1.87496886921306e-05, "loss": 0.4688, "step": 2920 }, { "epoch": 0.35546090660176455, "grad_norm": 4.292427062988281, "learning_rate": 1.874874404025428e-05, "loss": 0.442, "step": 2921 }, { "epoch": 0.35558259811378157, "grad_norm": 0.6120730638504028, "learning_rate": 1.8747799055470142e-05, "loss": 0.4823, "step": 2922 }, { "epoch": 0.3557042896257986, "grad_norm": 1.2348554134368896, "learning_rate": 1.8746853737814144e-05, "loss": 0.4693, "step": 2923 }, { "epoch": 0.35582598113781566, "grad_norm": 1.578283667564392, "learning_rate": 1.8745908087322254e-05, "loss": 0.3873, "step": 2924 }, { "epoch": 0.3559476726498327, "grad_norm": 1.4291319847106934, "learning_rate": 1.874496210403046e-05, "loss": 0.4529, "step": 2925 }, { "epoch": 0.3560693641618497, "grad_norm": 4.548673629760742, "learning_rate": 1.8744015787974757e-05, "loss": 0.5328, "step": 2926 }, { "epoch": 0.3561910556738668, "grad_norm": 3.222435712814331, "learning_rate": 1.8743069139191154e-05, "loss": 0.4922, "step": 2927 }, { "epoch": 0.3563127471858838, "grad_norm": 2.703038454055786, "learning_rate": 1.8742122157715673e-05, "loss": 0.4921, "step": 2928 }, { "epoch": 0.3564344386979008, "grad_norm": 1.7311506271362305, "learning_rate": 1.874117484358435e-05, "loss": 0.5038, "step": 2929 }, { "epoch": 0.35655613020991783, "grad_norm": 0.9427840709686279, "learning_rate": 1.874022719683323e-05, "loss": 0.4464, "step": 2930 }, { "epoch": 0.3566778217219349, "grad_norm": 0.9701077342033386, "learning_rate": 1.873927921749837e-05, "loss": 0.4763, "step": 2931 }, { "epoch": 0.3567995132339519, "grad_norm": 1.4765790700912476, "learning_rate": 1.8738330905615852e-05, "loss": 0.4468, "step": 2932 }, { "epoch": 0.35692120474596895, "grad_norm": 2.8618738651275635, "learning_rate": 1.873738226122175e-05, "loss": 0.4578, "step": 2933 }, { "epoch": 0.357042896257986, "grad_norm": 2.3875904083251953, "learning_rate": 1.8736433284352175e-05, "loss": 0.4877, "step": 2934 }, { "epoch": 0.35716458777000304, "grad_norm": 1.8850828409194946, "learning_rate": 1.8735483975043225e-05, "loss": 0.4546, "step": 2935 }, { "epoch": 0.35728627928202006, "grad_norm": 0.5865916013717651, "learning_rate": 1.873453433333103e-05, "loss": 0.4806, "step": 2936 }, { "epoch": 0.35740797079403713, "grad_norm": 3.0282232761383057, "learning_rate": 1.8733584359251724e-05, "loss": 0.4123, "step": 2937 }, { "epoch": 0.35752966230605415, "grad_norm": 2.4615886211395264, "learning_rate": 1.8732634052841456e-05, "loss": 0.524, "step": 2938 }, { "epoch": 0.35765135381807117, "grad_norm": 1.4478707313537598, "learning_rate": 1.8731683414136386e-05, "loss": 0.431, "step": 2939 }, { "epoch": 0.35777304533008825, "grad_norm": 4.296384811401367, "learning_rate": 1.8730732443172687e-05, "loss": 0.4883, "step": 2940 }, { "epoch": 0.35789473684210527, "grad_norm": 1.4367122650146484, "learning_rate": 1.872978113998655e-05, "loss": 0.4463, "step": 2941 }, { "epoch": 0.3580164283541223, "grad_norm": 4.117356300354004, "learning_rate": 1.8728829504614168e-05, "loss": 0.5055, "step": 2942 }, { "epoch": 0.35813811986613936, "grad_norm": 3.7410740852355957, "learning_rate": 1.8727877537091758e-05, "loss": 0.557, "step": 2943 }, { "epoch": 0.3582598113781564, "grad_norm": 1.436547040939331, "learning_rate": 1.872692523745554e-05, "loss": 0.4756, "step": 2944 }, { "epoch": 0.3583815028901734, "grad_norm": 2.0740349292755127, "learning_rate": 1.8725972605741755e-05, "loss": 0.4979, "step": 2945 }, { "epoch": 0.3585031944021905, "grad_norm": 2.4010510444641113, "learning_rate": 1.8725019641986647e-05, "loss": 0.4986, "step": 2946 }, { "epoch": 0.3586248859142075, "grad_norm": 1.252356767654419, "learning_rate": 1.8724066346226485e-05, "loss": 0.5293, "step": 2947 }, { "epoch": 0.3587465774262245, "grad_norm": 3.0641136169433594, "learning_rate": 1.8723112718497544e-05, "loss": 0.4434, "step": 2948 }, { "epoch": 0.35886826893824153, "grad_norm": 1.4979795217514038, "learning_rate": 1.8722158758836103e-05, "loss": 0.4689, "step": 2949 }, { "epoch": 0.3589899604502586, "grad_norm": 1.061935544013977, "learning_rate": 1.8721204467278465e-05, "loss": 0.4475, "step": 2950 }, { "epoch": 0.3591116519622756, "grad_norm": 2.4618287086486816, "learning_rate": 1.8720249843860953e-05, "loss": 0.4959, "step": 2951 }, { "epoch": 0.35923334347429264, "grad_norm": 1.5725812911987305, "learning_rate": 1.871929488861988e-05, "loss": 0.4504, "step": 2952 }, { "epoch": 0.3593550349863097, "grad_norm": 1.2420568466186523, "learning_rate": 1.8718339601591585e-05, "loss": 0.4554, "step": 2953 }, { "epoch": 0.35947672649832674, "grad_norm": 0.7908331751823425, "learning_rate": 1.871738398281243e-05, "loss": 0.4604, "step": 2954 }, { "epoch": 0.35959841801034376, "grad_norm": 0.5810034275054932, "learning_rate": 1.871642803231876e-05, "loss": 0.4401, "step": 2955 }, { "epoch": 0.35972010952236083, "grad_norm": 2.186800718307495, "learning_rate": 1.8715471750146972e-05, "loss": 0.5402, "step": 2956 }, { "epoch": 0.35984180103437785, "grad_norm": 2.936331272125244, "learning_rate": 1.871451513633344e-05, "loss": 0.4414, "step": 2957 }, { "epoch": 0.35996349254639487, "grad_norm": 1.2168655395507812, "learning_rate": 1.871355819091457e-05, "loss": 0.5076, "step": 2958 }, { "epoch": 0.36008518405841194, "grad_norm": 3.5353779792785645, "learning_rate": 1.8712600913926772e-05, "loss": 0.476, "step": 2959 }, { "epoch": 0.36020687557042896, "grad_norm": 1.4204754829406738, "learning_rate": 1.8711643305406477e-05, "loss": 0.5084, "step": 2960 }, { "epoch": 0.360328567082446, "grad_norm": 1.3160148859024048, "learning_rate": 1.871068536539012e-05, "loss": 0.4482, "step": 2961 }, { "epoch": 0.36045025859446306, "grad_norm": 0.6225706934928894, "learning_rate": 1.8709727093914157e-05, "loss": 0.4804, "step": 2962 }, { "epoch": 0.3605719501064801, "grad_norm": 1.5934213399887085, "learning_rate": 1.870876849101505e-05, "loss": 0.4316, "step": 2963 }, { "epoch": 0.3606936416184971, "grad_norm": 3.3230910301208496, "learning_rate": 1.8707809556729276e-05, "loss": 0.4535, "step": 2964 }, { "epoch": 0.36081533313051417, "grad_norm": 2.180410861968994, "learning_rate": 1.8706850291093323e-05, "loss": 0.4707, "step": 2965 }, { "epoch": 0.3609370246425312, "grad_norm": 1.1301518678665161, "learning_rate": 1.8705890694143693e-05, "loss": 0.4177, "step": 2966 }, { "epoch": 0.3610587161545482, "grad_norm": 4.07941198348999, "learning_rate": 1.8704930765916905e-05, "loss": 0.538, "step": 2967 }, { "epoch": 0.3611804076665653, "grad_norm": 1.671410322189331, "learning_rate": 1.870397050644948e-05, "loss": 0.4319, "step": 2968 }, { "epoch": 0.3613020991785823, "grad_norm": 1.7199392318725586, "learning_rate": 1.8703009915777963e-05, "loss": 0.4621, "step": 2969 }, { "epoch": 0.3614237906905993, "grad_norm": 0.6137834787368774, "learning_rate": 1.8702048993938902e-05, "loss": 0.4629, "step": 2970 }, { "epoch": 0.36154548220261634, "grad_norm": 3.8859007358551025, "learning_rate": 1.8701087740968868e-05, "loss": 0.4386, "step": 2971 }, { "epoch": 0.3616671737146334, "grad_norm": 3.885434865951538, "learning_rate": 1.8700126156904436e-05, "loss": 0.4457, "step": 2972 }, { "epoch": 0.36178886522665044, "grad_norm": 2.96514630317688, "learning_rate": 1.8699164241782194e-05, "loss": 0.4704, "step": 2973 }, { "epoch": 0.36191055673866745, "grad_norm": 1.3474972248077393, "learning_rate": 1.869820199563874e-05, "loss": 0.5081, "step": 2974 }, { "epoch": 0.36203224825068453, "grad_norm": 0.6787735223770142, "learning_rate": 1.8697239418510707e-05, "loss": 0.5376, "step": 2975 }, { "epoch": 0.36215393976270155, "grad_norm": 2.3206374645233154, "learning_rate": 1.8696276510434702e-05, "loss": 0.4883, "step": 2976 }, { "epoch": 0.36227563127471857, "grad_norm": 0.8279562592506409, "learning_rate": 1.869531327144738e-05, "loss": 0.5084, "step": 2977 }, { "epoch": 0.36239732278673564, "grad_norm": 1.6186060905456543, "learning_rate": 1.8694349701585392e-05, "loss": 0.4235, "step": 2978 }, { "epoch": 0.36251901429875266, "grad_norm": 2.840852975845337, "learning_rate": 1.8693385800885398e-05, "loss": 0.5106, "step": 2979 }, { "epoch": 0.3626407058107697, "grad_norm": 2.2010650634765625, "learning_rate": 1.869242156938408e-05, "loss": 0.4212, "step": 2980 }, { "epoch": 0.36276239732278676, "grad_norm": 1.0991575717926025, "learning_rate": 1.869145700711813e-05, "loss": 0.4654, "step": 2981 }, { "epoch": 0.3628840888348038, "grad_norm": 1.6484620571136475, "learning_rate": 1.869049211412425e-05, "loss": 0.5005, "step": 2982 }, { "epoch": 0.3630057803468208, "grad_norm": 1.2549841403961182, "learning_rate": 1.8689526890439156e-05, "loss": 0.462, "step": 2983 }, { "epoch": 0.36312747185883787, "grad_norm": 1.1052154302597046, "learning_rate": 1.868856133609958e-05, "loss": 0.4376, "step": 2984 }, { "epoch": 0.3632491633708549, "grad_norm": 2.9149084091186523, "learning_rate": 1.8687595451142257e-05, "loss": 0.5047, "step": 2985 }, { "epoch": 0.3633708548828719, "grad_norm": 0.6402516961097717, "learning_rate": 1.868662923560395e-05, "loss": 0.4594, "step": 2986 }, { "epoch": 0.363492546394889, "grad_norm": 1.3251070976257324, "learning_rate": 1.8685662689521417e-05, "loss": 0.4426, "step": 2987 }, { "epoch": 0.363614237906906, "grad_norm": 0.6694589257240295, "learning_rate": 1.8684695812931442e-05, "loss": 0.4383, "step": 2988 }, { "epoch": 0.363735929418923, "grad_norm": 1.0918630361557007, "learning_rate": 1.868372860587081e-05, "loss": 0.4706, "step": 2989 }, { "epoch": 0.36385762093094004, "grad_norm": 0.621202826499939, "learning_rate": 1.8682761068376335e-05, "loss": 0.4735, "step": 2990 }, { "epoch": 0.3639793124429571, "grad_norm": 2.1684494018554688, "learning_rate": 1.8681793200484827e-05, "loss": 0.5035, "step": 2991 }, { "epoch": 0.36410100395497413, "grad_norm": 0.8980317711830139, "learning_rate": 1.8680825002233122e-05, "loss": 0.4568, "step": 2992 }, { "epoch": 0.36422269546699115, "grad_norm": 0.6235323548316956, "learning_rate": 1.8679856473658053e-05, "loss": 0.524, "step": 2993 }, { "epoch": 0.36434438697900823, "grad_norm": 1.199827790260315, "learning_rate": 1.8678887614796475e-05, "loss": 0.4607, "step": 2994 }, { "epoch": 0.36446607849102525, "grad_norm": 1.9739487171173096, "learning_rate": 1.8677918425685266e-05, "loss": 0.5419, "step": 2995 }, { "epoch": 0.36458777000304227, "grad_norm": 3.7945010662078857, "learning_rate": 1.8676948906361295e-05, "loss": 0.4466, "step": 2996 }, { "epoch": 0.36470946151505934, "grad_norm": 0.9805242419242859, "learning_rate": 1.8675979056861462e-05, "loss": 0.5259, "step": 2997 }, { "epoch": 0.36483115302707636, "grad_norm": 1.7036242485046387, "learning_rate": 1.8675008877222664e-05, "loss": 0.4341, "step": 2998 }, { "epoch": 0.3649528445390934, "grad_norm": 1.3992851972579956, "learning_rate": 1.867403836748182e-05, "loss": 0.5244, "step": 2999 }, { "epoch": 0.36507453605111045, "grad_norm": 1.4511429071426392, "learning_rate": 1.8673067527675867e-05, "loss": 0.4555, "step": 3000 }, { "epoch": 0.3651962275631275, "grad_norm": 1.183764934539795, "learning_rate": 1.867209635784174e-05, "loss": 0.5047, "step": 3001 }, { "epoch": 0.3653179190751445, "grad_norm": 0.9412627220153809, "learning_rate": 1.8671124858016393e-05, "loss": 0.4418, "step": 3002 }, { "epoch": 0.36543961058716157, "grad_norm": 0.5016685724258423, "learning_rate": 1.86701530282368e-05, "loss": 0.426, "step": 3003 }, { "epoch": 0.3655613020991786, "grad_norm": 1.155144453048706, "learning_rate": 1.866918086853994e-05, "loss": 0.4714, "step": 3004 }, { "epoch": 0.3656829936111956, "grad_norm": 2.9690589904785156, "learning_rate": 1.86682083789628e-05, "loss": 0.5204, "step": 3005 }, { "epoch": 0.3658046851232127, "grad_norm": 1.997794508934021, "learning_rate": 1.866723555954239e-05, "loss": 0.4133, "step": 3006 }, { "epoch": 0.3659263766352297, "grad_norm": 2.890911102294922, "learning_rate": 1.8666262410315727e-05, "loss": 0.4246, "step": 3007 }, { "epoch": 0.3660480681472467, "grad_norm": 0.6775578856468201, "learning_rate": 1.8665288931319843e-05, "loss": 0.4426, "step": 3008 }, { "epoch": 0.3661697596592638, "grad_norm": 1.965939998626709, "learning_rate": 1.8664315122591778e-05, "loss": 0.4939, "step": 3009 }, { "epoch": 0.3662914511712808, "grad_norm": 0.7638002038002014, "learning_rate": 1.8663340984168585e-05, "loss": 0.4658, "step": 3010 }, { "epoch": 0.36641314268329783, "grad_norm": 0.7034749388694763, "learning_rate": 1.866236651608734e-05, "loss": 0.4643, "step": 3011 }, { "epoch": 0.36653483419531485, "grad_norm": 1.5485363006591797, "learning_rate": 1.8661391718385115e-05, "loss": 0.4317, "step": 3012 }, { "epoch": 0.3666565257073319, "grad_norm": 1.4653748273849487, "learning_rate": 1.866041659109901e-05, "loss": 0.4267, "step": 3013 }, { "epoch": 0.36677821721934895, "grad_norm": 1.0314141511917114, "learning_rate": 1.865944113426613e-05, "loss": 0.445, "step": 3014 }, { "epoch": 0.36689990873136596, "grad_norm": 1.2275699377059937, "learning_rate": 1.8658465347923586e-05, "loss": 0.4702, "step": 3015 }, { "epoch": 0.36702160024338304, "grad_norm": 0.7662177085876465, "learning_rate": 1.8657489232108514e-05, "loss": 0.4413, "step": 3016 }, { "epoch": 0.36714329175540006, "grad_norm": 3.074997901916504, "learning_rate": 1.865651278685806e-05, "loss": 0.498, "step": 3017 }, { "epoch": 0.3672649832674171, "grad_norm": 4.269886493682861, "learning_rate": 1.8655536012209373e-05, "loss": 0.5312, "step": 3018 }, { "epoch": 0.36738667477943415, "grad_norm": 1.078355312347412, "learning_rate": 1.8654558908199627e-05, "loss": 0.4139, "step": 3019 }, { "epoch": 0.36750836629145117, "grad_norm": 0.8460218906402588, "learning_rate": 1.8653581474865998e-05, "loss": 0.4355, "step": 3020 }, { "epoch": 0.3676300578034682, "grad_norm": 2.2422807216644287, "learning_rate": 1.8652603712245685e-05, "loss": 0.477, "step": 3021 }, { "epoch": 0.36775174931548527, "grad_norm": 1.8369975090026855, "learning_rate": 1.865162562037589e-05, "loss": 0.4722, "step": 3022 }, { "epoch": 0.3678734408275023, "grad_norm": 1.912951946258545, "learning_rate": 1.865064719929383e-05, "loss": 0.4495, "step": 3023 }, { "epoch": 0.3679951323395193, "grad_norm": 0.7084397077560425, "learning_rate": 1.864966844903674e-05, "loss": 0.4629, "step": 3024 }, { "epoch": 0.3681168238515364, "grad_norm": 1.1747092008590698, "learning_rate": 1.864868936964186e-05, "loss": 0.5198, "step": 3025 }, { "epoch": 0.3682385153635534, "grad_norm": 0.6192000508308411, "learning_rate": 1.864770996114645e-05, "loss": 0.5062, "step": 3026 }, { "epoch": 0.3683602068755704, "grad_norm": 0.6726124882698059, "learning_rate": 1.8646730223587778e-05, "loss": 0.4915, "step": 3027 }, { "epoch": 0.3684818983875875, "grad_norm": 1.4156765937805176, "learning_rate": 1.864575015700312e-05, "loss": 0.4549, "step": 3028 }, { "epoch": 0.3686035898996045, "grad_norm": 0.8093295097351074, "learning_rate": 1.864476976142977e-05, "loss": 0.5323, "step": 3029 }, { "epoch": 0.36872528141162153, "grad_norm": 1.5512853860855103, "learning_rate": 1.8643789036905042e-05, "loss": 0.5057, "step": 3030 }, { "epoch": 0.36884697292363855, "grad_norm": 1.6699373722076416, "learning_rate": 1.8642807983466248e-05, "loss": 0.4696, "step": 3031 }, { "epoch": 0.3689686644356556, "grad_norm": 3.567721128463745, "learning_rate": 1.864182660115072e-05, "loss": 0.4849, "step": 3032 }, { "epoch": 0.36909035594767264, "grad_norm": 1.2548764944076538, "learning_rate": 1.8640844889995803e-05, "loss": 0.4827, "step": 3033 }, { "epoch": 0.36921204745968966, "grad_norm": 3.7645862102508545, "learning_rate": 1.8639862850038854e-05, "loss": 0.44, "step": 3034 }, { "epoch": 0.36933373897170674, "grad_norm": 1.1269211769104004, "learning_rate": 1.8638880481317237e-05, "loss": 0.4405, "step": 3035 }, { "epoch": 0.36945543048372376, "grad_norm": 2.2970995903015137, "learning_rate": 1.8637897783868337e-05, "loss": 0.5253, "step": 3036 }, { "epoch": 0.3695771219957408, "grad_norm": 0.6540803909301758, "learning_rate": 1.8636914757729548e-05, "loss": 0.4067, "step": 3037 }, { "epoch": 0.36969881350775785, "grad_norm": 4.421058177947998, "learning_rate": 1.8635931402938273e-05, "loss": 0.5236, "step": 3038 }, { "epoch": 0.36982050501977487, "grad_norm": 3.468085765838623, "learning_rate": 1.8634947719531936e-05, "loss": 0.5116, "step": 3039 }, { "epoch": 0.3699421965317919, "grad_norm": 2.381770372390747, "learning_rate": 1.8633963707547963e-05, "loss": 0.4558, "step": 3040 }, { "epoch": 0.37006388804380896, "grad_norm": 0.6035611629486084, "learning_rate": 1.86329793670238e-05, "loss": 0.4219, "step": 3041 }, { "epoch": 0.370185579555826, "grad_norm": 3.956756114959717, "learning_rate": 1.86319946979969e-05, "loss": 0.4186, "step": 3042 }, { "epoch": 0.370307271067843, "grad_norm": 1.367324948310852, "learning_rate": 1.8631009700504738e-05, "loss": 0.4244, "step": 3043 }, { "epoch": 0.3704289625798601, "grad_norm": 2.888293743133545, "learning_rate": 1.8630024374584788e-05, "loss": 0.5086, "step": 3044 }, { "epoch": 0.3705506540918771, "grad_norm": 1.178636908531189, "learning_rate": 1.862903872027455e-05, "loss": 0.4545, "step": 3045 }, { "epoch": 0.3706723456038941, "grad_norm": 0.98091721534729, "learning_rate": 1.8628052737611528e-05, "loss": 0.4393, "step": 3046 }, { "epoch": 0.3707940371159112, "grad_norm": 2.4809136390686035, "learning_rate": 1.862706642663324e-05, "loss": 0.4855, "step": 3047 }, { "epoch": 0.3709157286279282, "grad_norm": 2.2146472930908203, "learning_rate": 1.8626079787377217e-05, "loss": 0.5508, "step": 3048 }, { "epoch": 0.37103742013994523, "grad_norm": 1.3630300760269165, "learning_rate": 1.8625092819881e-05, "loss": 0.4659, "step": 3049 }, { "epoch": 0.37115911165196225, "grad_norm": 2.8149638175964355, "learning_rate": 1.8624105524182156e-05, "loss": 0.4705, "step": 3050 }, { "epoch": 0.3712808031639793, "grad_norm": 3.239107370376587, "learning_rate": 1.862311790031824e-05, "loss": 0.4514, "step": 3051 }, { "epoch": 0.37140249467599634, "grad_norm": 2.323507785797119, "learning_rate": 1.8622129948326838e-05, "loss": 0.4717, "step": 3052 }, { "epoch": 0.37152418618801336, "grad_norm": 1.4264402389526367, "learning_rate": 1.862114166824555e-05, "loss": 0.4246, "step": 3053 }, { "epoch": 0.37164587770003044, "grad_norm": 1.1614569425582886, "learning_rate": 1.8620153060111973e-05, "loss": 0.4682, "step": 3054 }, { "epoch": 0.37176756921204746, "grad_norm": 0.7365112900733948, "learning_rate": 1.861916412396373e-05, "loss": 0.4603, "step": 3055 }, { "epoch": 0.3718892607240645, "grad_norm": 2.646296262741089, "learning_rate": 1.8618174859838452e-05, "loss": 0.4112, "step": 3056 }, { "epoch": 0.37201095223608155, "grad_norm": 2.904318332672119, "learning_rate": 1.861718526777378e-05, "loss": 0.4823, "step": 3057 }, { "epoch": 0.37213264374809857, "grad_norm": 0.769153892993927, "learning_rate": 1.8616195347807374e-05, "loss": 0.409, "step": 3058 }, { "epoch": 0.3722543352601156, "grad_norm": 2.342787981033325, "learning_rate": 1.86152050999769e-05, "loss": 0.4912, "step": 3059 }, { "epoch": 0.37237602677213266, "grad_norm": 2.2397356033325195, "learning_rate": 1.861421452432004e-05, "loss": 0.4621, "step": 3060 }, { "epoch": 0.3724977182841497, "grad_norm": 3.1565186977386475, "learning_rate": 1.8613223620874486e-05, "loss": 0.4904, "step": 3061 }, { "epoch": 0.3726194097961667, "grad_norm": 2.013030767440796, "learning_rate": 1.8612232389677943e-05, "loss": 0.4241, "step": 3062 }, { "epoch": 0.3727411013081838, "grad_norm": 4.367907524108887, "learning_rate": 1.8611240830768134e-05, "loss": 0.4096, "step": 3063 }, { "epoch": 0.3728627928202008, "grad_norm": 0.6541080474853516, "learning_rate": 1.8610248944182786e-05, "loss": 0.5421, "step": 3064 }, { "epoch": 0.3729844843322178, "grad_norm": 3.0682034492492676, "learning_rate": 1.8609256729959642e-05, "loss": 0.4723, "step": 3065 }, { "epoch": 0.3731061758442349, "grad_norm": 3.624817132949829, "learning_rate": 1.860826418813646e-05, "loss": 0.4369, "step": 3066 }, { "epoch": 0.3732278673562519, "grad_norm": 1.2501392364501953, "learning_rate": 1.860727131875101e-05, "loss": 0.5066, "step": 3067 }, { "epoch": 0.3733495588682689, "grad_norm": 1.8625503778457642, "learning_rate": 1.860627812184107e-05, "loss": 0.4618, "step": 3068 }, { "epoch": 0.373471250380286, "grad_norm": 1.5235109329223633, "learning_rate": 1.860528459744443e-05, "loss": 0.4613, "step": 3069 }, { "epoch": 0.373592941892303, "grad_norm": 1.551566243171692, "learning_rate": 1.8604290745598902e-05, "loss": 0.4596, "step": 3070 }, { "epoch": 0.37371463340432004, "grad_norm": 2.195451259613037, "learning_rate": 1.8603296566342303e-05, "loss": 0.4602, "step": 3071 }, { "epoch": 0.37383632491633706, "grad_norm": 2.439582586288452, "learning_rate": 1.8602302059712457e-05, "loss": 0.4571, "step": 3072 }, { "epoch": 0.37395801642835413, "grad_norm": 3.3360729217529297, "learning_rate": 1.8601307225747213e-05, "loss": 0.4833, "step": 3073 }, { "epoch": 0.37407970794037115, "grad_norm": 1.1381869316101074, "learning_rate": 1.8600312064484427e-05, "loss": 0.4559, "step": 3074 }, { "epoch": 0.3742013994523882, "grad_norm": 1.5621495246887207, "learning_rate": 1.8599316575961964e-05, "loss": 0.4815, "step": 3075 }, { "epoch": 0.37432309096440525, "grad_norm": 0.6870940327644348, "learning_rate": 1.8598320760217707e-05, "loss": 0.477, "step": 3076 }, { "epoch": 0.37444478247642227, "grad_norm": 1.9823907613754272, "learning_rate": 1.859732461728955e-05, "loss": 0.4602, "step": 3077 }, { "epoch": 0.3745664739884393, "grad_norm": 1.0109331607818604, "learning_rate": 1.8596328147215394e-05, "loss": 0.4893, "step": 3078 }, { "epoch": 0.37468816550045636, "grad_norm": 2.337618827819824, "learning_rate": 1.8595331350033153e-05, "loss": 0.409, "step": 3079 }, { "epoch": 0.3748098570124734, "grad_norm": 0.6965295076370239, "learning_rate": 1.8594334225780768e-05, "loss": 0.5056, "step": 3080 }, { "epoch": 0.3749315485244904, "grad_norm": 3.9322896003723145, "learning_rate": 1.8593336774496175e-05, "loss": 0.3649, "step": 3081 }, { "epoch": 0.3750532400365075, "grad_norm": 1.7873562574386597, "learning_rate": 1.8592338996217332e-05, "loss": 0.4541, "step": 3082 }, { "epoch": 0.3751749315485245, "grad_norm": 1.682086706161499, "learning_rate": 1.8591340890982203e-05, "loss": 0.4129, "step": 3083 }, { "epoch": 0.3752966230605415, "grad_norm": 2.881730794906616, "learning_rate": 1.859034245882877e-05, "loss": 0.4704, "step": 3084 }, { "epoch": 0.3754183145725586, "grad_norm": 0.9620259404182434, "learning_rate": 1.858934369979503e-05, "loss": 0.3721, "step": 3085 }, { "epoch": 0.3755400060845756, "grad_norm": 3.6075077056884766, "learning_rate": 1.8588344613918977e-05, "loss": 0.4792, "step": 3086 }, { "epoch": 0.3756616975965926, "grad_norm": 7.657585620880127, "learning_rate": 1.8587345201238637e-05, "loss": 0.5958, "step": 3087 }, { "epoch": 0.3757833891086097, "grad_norm": 3.526669979095459, "learning_rate": 1.8586345461792038e-05, "loss": 0.448, "step": 3088 }, { "epoch": 0.3759050806206267, "grad_norm": 3.5936107635498047, "learning_rate": 1.8585345395617224e-05, "loss": 0.4398, "step": 3089 }, { "epoch": 0.37602677213264374, "grad_norm": 3.408573627471924, "learning_rate": 1.858434500275224e-05, "loss": 0.471, "step": 3090 }, { "epoch": 0.37614846364466076, "grad_norm": 4.0012311935424805, "learning_rate": 1.8583344283235165e-05, "loss": 0.5293, "step": 3091 }, { "epoch": 0.37627015515667783, "grad_norm": 0.6235780715942383, "learning_rate": 1.8582343237104072e-05, "loss": 0.4129, "step": 3092 }, { "epoch": 0.37639184666869485, "grad_norm": 0.6681236624717712, "learning_rate": 1.8581341864397055e-05, "loss": 0.4596, "step": 3093 }, { "epoch": 0.37651353818071187, "grad_norm": 0.9401674866676331, "learning_rate": 1.858034016515222e-05, "loss": 0.4958, "step": 3094 }, { "epoch": 0.37663522969272895, "grad_norm": 2.4536235332489014, "learning_rate": 1.8579338139407675e-05, "loss": 0.4365, "step": 3095 }, { "epoch": 0.37675692120474596, "grad_norm": 5.458724021911621, "learning_rate": 1.8578335787201562e-05, "loss": 0.4391, "step": 3096 }, { "epoch": 0.376878612716763, "grad_norm": 1.9668610095977783, "learning_rate": 1.8577333108572012e-05, "loss": 0.4531, "step": 3097 }, { "epoch": 0.37700030422878006, "grad_norm": 0.6157615780830383, "learning_rate": 1.8576330103557187e-05, "loss": 0.4675, "step": 3098 }, { "epoch": 0.3771219957407971, "grad_norm": 0.9017341732978821, "learning_rate": 1.8575326772195244e-05, "loss": 0.4424, "step": 3099 }, { "epoch": 0.3772436872528141, "grad_norm": 1.8384252786636353, "learning_rate": 1.857432311452437e-05, "loss": 0.4114, "step": 3100 }, { "epoch": 0.37736537876483117, "grad_norm": 3.7494544982910156, "learning_rate": 1.8573319130582756e-05, "loss": 0.5206, "step": 3101 }, { "epoch": 0.3774870702768482, "grad_norm": 1.9978009462356567, "learning_rate": 1.85723148204086e-05, "loss": 0.4427, "step": 3102 }, { "epoch": 0.3776087617888652, "grad_norm": 1.0292961597442627, "learning_rate": 1.8571310184040124e-05, "loss": 0.4257, "step": 3103 }, { "epoch": 0.3777304533008823, "grad_norm": 2.8446903228759766, "learning_rate": 1.8570305221515556e-05, "loss": 0.4582, "step": 3104 }, { "epoch": 0.3778521448128993, "grad_norm": 4.5071587562561035, "learning_rate": 1.856929993287313e-05, "loss": 0.5599, "step": 3105 }, { "epoch": 0.3779738363249163, "grad_norm": 2.5502004623413086, "learning_rate": 1.8568294318151104e-05, "loss": 0.4947, "step": 3106 }, { "epoch": 0.3780955278369334, "grad_norm": 2.0243778228759766, "learning_rate": 1.8567288377387745e-05, "loss": 0.4753, "step": 3107 }, { "epoch": 0.3782172193489504, "grad_norm": 2.132016181945801, "learning_rate": 1.856628211062133e-05, "loss": 0.4193, "step": 3108 }, { "epoch": 0.37833891086096744, "grad_norm": 1.9406275749206543, "learning_rate": 1.856527551789015e-05, "loss": 0.551, "step": 3109 }, { "epoch": 0.3784606023729845, "grad_norm": 2.985602617263794, "learning_rate": 1.8564268599232507e-05, "loss": 0.4908, "step": 3110 }, { "epoch": 0.37858229388500153, "grad_norm": 3.1786415576934814, "learning_rate": 1.8563261354686718e-05, "loss": 0.4849, "step": 3111 }, { "epoch": 0.37870398539701855, "grad_norm": 1.6061556339263916, "learning_rate": 1.8562253784291108e-05, "loss": 0.5219, "step": 3112 }, { "epoch": 0.37882567690903557, "grad_norm": 2.637531042098999, "learning_rate": 1.8561245888084017e-05, "loss": 0.4505, "step": 3113 }, { "epoch": 0.37894736842105264, "grad_norm": 3.5307555198669434, "learning_rate": 1.8560237666103803e-05, "loss": 0.4024, "step": 3114 }, { "epoch": 0.37906905993306966, "grad_norm": 0.8938859701156616, "learning_rate": 1.8559229118388825e-05, "loss": 0.4621, "step": 3115 }, { "epoch": 0.3791907514450867, "grad_norm": 0.8579207062721252, "learning_rate": 1.8558220244977462e-05, "loss": 0.4147, "step": 3116 }, { "epoch": 0.37931244295710376, "grad_norm": 3.38228440284729, "learning_rate": 1.8557211045908104e-05, "loss": 0.4969, "step": 3117 }, { "epoch": 0.3794341344691208, "grad_norm": 4.783172607421875, "learning_rate": 1.8556201521219152e-05, "loss": 0.5167, "step": 3118 }, { "epoch": 0.3795558259811378, "grad_norm": 3.9394025802612305, "learning_rate": 1.8555191670949028e-05, "loss": 0.491, "step": 3119 }, { "epoch": 0.37967751749315487, "grad_norm": 3.0613887310028076, "learning_rate": 1.8554181495136146e-05, "loss": 0.438, "step": 3120 }, { "epoch": 0.3797992090051719, "grad_norm": 0.802926242351532, "learning_rate": 1.855317099381895e-05, "loss": 0.3874, "step": 3121 }, { "epoch": 0.3799209005171889, "grad_norm": 2.5228464603424072, "learning_rate": 1.8552160167035902e-05, "loss": 0.489, "step": 3122 }, { "epoch": 0.380042592029206, "grad_norm": 0.8206809163093567, "learning_rate": 1.855114901482545e-05, "loss": 0.4929, "step": 3123 }, { "epoch": 0.380164283541223, "grad_norm": 1.3448454141616821, "learning_rate": 1.8550137537226085e-05, "loss": 0.4734, "step": 3124 }, { "epoch": 0.38028597505324, "grad_norm": 1.8659371137619019, "learning_rate": 1.8549125734276284e-05, "loss": 0.4574, "step": 3125 }, { "epoch": 0.3804076665652571, "grad_norm": 1.289597749710083, "learning_rate": 1.854811360601456e-05, "loss": 0.4602, "step": 3126 }, { "epoch": 0.3805293580772741, "grad_norm": 1.759281039237976, "learning_rate": 1.854710115247941e-05, "loss": 0.5542, "step": 3127 }, { "epoch": 0.38065104958929113, "grad_norm": 0.6956965327262878, "learning_rate": 1.8546088373709373e-05, "loss": 0.4512, "step": 3128 }, { "epoch": 0.3807727411013082, "grad_norm": 2.488795757293701, "learning_rate": 1.8545075269742986e-05, "loss": 0.5226, "step": 3129 }, { "epoch": 0.38089443261332523, "grad_norm": 1.9304710626602173, "learning_rate": 1.8544061840618798e-05, "loss": 0.5039, "step": 3130 }, { "epoch": 0.38101612412534225, "grad_norm": 2.1361005306243896, "learning_rate": 1.8543048086375368e-05, "loss": 0.466, "step": 3131 }, { "epoch": 0.38113781563735927, "grad_norm": 0.9239948987960815, "learning_rate": 1.8542034007051278e-05, "loss": 0.5134, "step": 3132 }, { "epoch": 0.38125950714937634, "grad_norm": 1.5530760288238525, "learning_rate": 1.854101960268511e-05, "loss": 0.4617, "step": 3133 }, { "epoch": 0.38138119866139336, "grad_norm": 2.8756372928619385, "learning_rate": 1.854000487331547e-05, "loss": 0.5835, "step": 3134 }, { "epoch": 0.3815028901734104, "grad_norm": 1.1003234386444092, "learning_rate": 1.8538989818980967e-05, "loss": 0.4987, "step": 3135 }, { "epoch": 0.38162458168542746, "grad_norm": 0.6150041222572327, "learning_rate": 1.853797443972023e-05, "loss": 0.5037, "step": 3136 }, { "epoch": 0.3817462731974445, "grad_norm": 1.2901042699813843, "learning_rate": 1.853695873557189e-05, "loss": 0.473, "step": 3137 }, { "epoch": 0.3818679647094615, "grad_norm": 2.51076078414917, "learning_rate": 1.8535942706574598e-05, "loss": 0.4695, "step": 3138 }, { "epoch": 0.38198965622147857, "grad_norm": 1.9291123151779175, "learning_rate": 1.853492635276702e-05, "loss": 0.4401, "step": 3139 }, { "epoch": 0.3821113477334956, "grad_norm": 2.271430730819702, "learning_rate": 1.8533909674187828e-05, "loss": 0.4886, "step": 3140 }, { "epoch": 0.3822330392455126, "grad_norm": 3.22769832611084, "learning_rate": 1.8532892670875707e-05, "loss": 0.5278, "step": 3141 }, { "epoch": 0.3823547307575297, "grad_norm": 1.6389480829238892, "learning_rate": 1.8531875342869355e-05, "loss": 0.4236, "step": 3142 }, { "epoch": 0.3824764222695467, "grad_norm": 0.8063991665840149, "learning_rate": 1.8530857690207492e-05, "loss": 0.4253, "step": 3143 }, { "epoch": 0.3825981137815637, "grad_norm": 0.7704494595527649, "learning_rate": 1.852983971292883e-05, "loss": 0.4495, "step": 3144 }, { "epoch": 0.3827198052935808, "grad_norm": 1.41098952293396, "learning_rate": 1.8528821411072118e-05, "loss": 0.4881, "step": 3145 }, { "epoch": 0.3828414968055978, "grad_norm": 0.8357442021369934, "learning_rate": 1.852780278467609e-05, "loss": 0.4332, "step": 3146 }, { "epoch": 0.38296318831761483, "grad_norm": 2.3424081802368164, "learning_rate": 1.852678383377952e-05, "loss": 0.4608, "step": 3147 }, { "epoch": 0.3830848798296319, "grad_norm": 1.8472098112106323, "learning_rate": 1.8525764558421175e-05, "loss": 0.4991, "step": 3148 }, { "epoch": 0.3832065713416489, "grad_norm": 0.8137487173080444, "learning_rate": 1.852474495863984e-05, "loss": 0.494, "step": 3149 }, { "epoch": 0.38332826285366595, "grad_norm": 1.8833178281784058, "learning_rate": 1.8523725034474317e-05, "loss": 0.4695, "step": 3150 }, { "epoch": 0.383449954365683, "grad_norm": 1.8519325256347656, "learning_rate": 1.8522704785963412e-05, "loss": 0.4628, "step": 3151 }, { "epoch": 0.38357164587770004, "grad_norm": 0.9221046566963196, "learning_rate": 1.8521684213145947e-05, "loss": 0.464, "step": 3152 }, { "epoch": 0.38369333738971706, "grad_norm": 0.7965523600578308, "learning_rate": 1.852066331606076e-05, "loss": 0.4283, "step": 3153 }, { "epoch": 0.3838150289017341, "grad_norm": 2.841887950897217, "learning_rate": 1.8519642094746696e-05, "loss": 0.4569, "step": 3154 }, { "epoch": 0.38393672041375115, "grad_norm": 5.291511058807373, "learning_rate": 1.8518620549242615e-05, "loss": 0.517, "step": 3155 }, { "epoch": 0.3840584119257682, "grad_norm": 3.747833490371704, "learning_rate": 1.8517598679587393e-05, "loss": 0.509, "step": 3156 }, { "epoch": 0.3841801034377852, "grad_norm": 0.9247905611991882, "learning_rate": 1.8516576485819907e-05, "loss": 0.4003, "step": 3157 }, { "epoch": 0.38430179494980227, "grad_norm": 2.4241535663604736, "learning_rate": 1.8515553967979058e-05, "loss": 0.4917, "step": 3158 }, { "epoch": 0.3844234864618193, "grad_norm": 0.6749876737594604, "learning_rate": 1.8514531126103756e-05, "loss": 0.4372, "step": 3159 }, { "epoch": 0.3845451779738363, "grad_norm": 2.327443838119507, "learning_rate": 1.8513507960232918e-05, "loss": 0.505, "step": 3160 }, { "epoch": 0.3846668694858534, "grad_norm": 1.2845443487167358, "learning_rate": 1.8512484470405482e-05, "loss": 0.4301, "step": 3161 }, { "epoch": 0.3847885609978704, "grad_norm": 0.9656535387039185, "learning_rate": 1.8511460656660393e-05, "loss": 0.5199, "step": 3162 }, { "epoch": 0.3849102525098874, "grad_norm": 1.7029277086257935, "learning_rate": 1.8510436519036606e-05, "loss": 0.4509, "step": 3163 }, { "epoch": 0.3850319440219045, "grad_norm": 1.0286000967025757, "learning_rate": 1.8509412057573096e-05, "loss": 0.4566, "step": 3164 }, { "epoch": 0.3851536355339215, "grad_norm": 2.8110828399658203, "learning_rate": 1.8508387272308843e-05, "loss": 0.4685, "step": 3165 }, { "epoch": 0.38527532704593853, "grad_norm": 0.7943376898765564, "learning_rate": 1.8507362163282844e-05, "loss": 0.4981, "step": 3166 }, { "epoch": 0.3853970185579556, "grad_norm": 1.1829615831375122, "learning_rate": 1.8506336730534104e-05, "loss": 0.4821, "step": 3167 }, { "epoch": 0.3855187100699726, "grad_norm": 2.0051746368408203, "learning_rate": 1.8505310974101643e-05, "loss": 0.4552, "step": 3168 }, { "epoch": 0.38564040158198964, "grad_norm": 1.1509095430374146, "learning_rate": 1.8504284894024497e-05, "loss": 0.4286, "step": 3169 }, { "epoch": 0.3857620930940067, "grad_norm": 2.34548020362854, "learning_rate": 1.8503258490341706e-05, "loss": 0.4734, "step": 3170 }, { "epoch": 0.38588378460602374, "grad_norm": 0.5796806216239929, "learning_rate": 1.850223176309233e-05, "loss": 0.417, "step": 3171 }, { "epoch": 0.38600547611804076, "grad_norm": 1.1869587898254395, "learning_rate": 1.8501204712315433e-05, "loss": 0.433, "step": 3172 }, { "epoch": 0.3861271676300578, "grad_norm": 1.4356694221496582, "learning_rate": 1.8500177338050104e-05, "loss": 0.4626, "step": 3173 }, { "epoch": 0.38624885914207485, "grad_norm": 3.639671802520752, "learning_rate": 1.8499149640335432e-05, "loss": 0.5392, "step": 3174 }, { "epoch": 0.38637055065409187, "grad_norm": 1.6376925706863403, "learning_rate": 1.8498121619210523e-05, "loss": 0.4046, "step": 3175 }, { "epoch": 0.3864922421661089, "grad_norm": 1.9681793451309204, "learning_rate": 1.84970932747145e-05, "loss": 0.4775, "step": 3176 }, { "epoch": 0.38661393367812596, "grad_norm": 2.046285390853882, "learning_rate": 1.8496064606886485e-05, "loss": 0.3908, "step": 3177 }, { "epoch": 0.386735625190143, "grad_norm": 1.4376453161239624, "learning_rate": 1.8495035615765625e-05, "loss": 0.5125, "step": 3178 }, { "epoch": 0.38685731670216, "grad_norm": 0.7068982124328613, "learning_rate": 1.8494006301391083e-05, "loss": 0.5078, "step": 3179 }, { "epoch": 0.3869790082141771, "grad_norm": 1.790786623954773, "learning_rate": 1.8492976663802013e-05, "loss": 0.5096, "step": 3180 }, { "epoch": 0.3871006997261941, "grad_norm": 1.2692198753356934, "learning_rate": 1.8491946703037604e-05, "loss": 0.5065, "step": 3181 }, { "epoch": 0.3872223912382111, "grad_norm": 1.8550636768341064, "learning_rate": 1.8490916419137046e-05, "loss": 0.4894, "step": 3182 }, { "epoch": 0.3873440827502282, "grad_norm": 2.1490261554718018, "learning_rate": 1.8489885812139543e-05, "loss": 0.4483, "step": 3183 }, { "epoch": 0.3874657742622452, "grad_norm": 1.4833805561065674, "learning_rate": 1.8488854882084312e-05, "loss": 0.4461, "step": 3184 }, { "epoch": 0.38758746577426223, "grad_norm": 2.0343050956726074, "learning_rate": 1.8487823629010582e-05, "loss": 0.4807, "step": 3185 }, { "epoch": 0.3877091572862793, "grad_norm": 1.3174079656600952, "learning_rate": 1.8486792052957593e-05, "loss": 0.4724, "step": 3186 }, { "epoch": 0.3878308487982963, "grad_norm": 2.131606101989746, "learning_rate": 1.84857601539646e-05, "loss": 0.489, "step": 3187 }, { "epoch": 0.38795254031031334, "grad_norm": 2.056992769241333, "learning_rate": 1.8484727932070864e-05, "loss": 0.4577, "step": 3188 }, { "epoch": 0.3880742318223304, "grad_norm": 0.6132264137268066, "learning_rate": 1.8483695387315675e-05, "loss": 0.4851, "step": 3189 }, { "epoch": 0.38819592333434744, "grad_norm": 2.0401928424835205, "learning_rate": 1.8482662519738313e-05, "loss": 0.4652, "step": 3190 }, { "epoch": 0.38831761484636446, "grad_norm": 1.4590213298797607, "learning_rate": 1.8481629329378084e-05, "loss": 0.4427, "step": 3191 }, { "epoch": 0.3884393063583815, "grad_norm": 1.439366102218628, "learning_rate": 1.8480595816274305e-05, "loss": 0.4303, "step": 3192 }, { "epoch": 0.38856099787039855, "grad_norm": 0.7999074459075928, "learning_rate": 1.84795619804663e-05, "loss": 0.4477, "step": 3193 }, { "epoch": 0.38868268938241557, "grad_norm": 1.023721694946289, "learning_rate": 1.847852782199341e-05, "loss": 0.4605, "step": 3194 }, { "epoch": 0.3888043808944326, "grad_norm": 1.1404272317886353, "learning_rate": 1.8477493340894984e-05, "loss": 0.4423, "step": 3195 }, { "epoch": 0.38892607240644966, "grad_norm": 1.0734004974365234, "learning_rate": 1.8476458537210393e-05, "loss": 0.4737, "step": 3196 }, { "epoch": 0.3890477639184667, "grad_norm": 2.0375170707702637, "learning_rate": 1.8475423410979006e-05, "loss": 0.5029, "step": 3197 }, { "epoch": 0.3891694554304837, "grad_norm": 2.0767176151275635, "learning_rate": 1.8474387962240218e-05, "loss": 0.4329, "step": 3198 }, { "epoch": 0.3892911469425008, "grad_norm": 1.063238263130188, "learning_rate": 1.8473352191033425e-05, "loss": 0.4815, "step": 3199 }, { "epoch": 0.3894128384545178, "grad_norm": 0.7509887218475342, "learning_rate": 1.8472316097398045e-05, "loss": 0.5098, "step": 3200 }, { "epoch": 0.3895345299665348, "grad_norm": 0.647834062576294, "learning_rate": 1.8471279681373496e-05, "loss": 0.4813, "step": 3201 }, { "epoch": 0.3896562214785519, "grad_norm": 1.1288594007492065, "learning_rate": 1.8470242942999225e-05, "loss": 0.4776, "step": 3202 }, { "epoch": 0.3897779129905689, "grad_norm": 2.3126728534698486, "learning_rate": 1.8469205882314675e-05, "loss": 0.4457, "step": 3203 }, { "epoch": 0.38989960450258593, "grad_norm": 1.7314441204071045, "learning_rate": 1.8468168499359312e-05, "loss": 0.5302, "step": 3204 }, { "epoch": 0.390021296014603, "grad_norm": 0.7410517334938049, "learning_rate": 1.8467130794172606e-05, "loss": 0.4846, "step": 3205 }, { "epoch": 0.39014298752662, "grad_norm": 1.3053652048110962, "learning_rate": 1.8466092766794052e-05, "loss": 0.4721, "step": 3206 }, { "epoch": 0.39026467903863704, "grad_norm": 0.9628663659095764, "learning_rate": 1.8465054417263142e-05, "loss": 0.4574, "step": 3207 }, { "epoch": 0.3903863705506541, "grad_norm": 0.7171255350112915, "learning_rate": 1.8464015745619392e-05, "loss": 0.497, "step": 3208 }, { "epoch": 0.39050806206267114, "grad_norm": 1.5145084857940674, "learning_rate": 1.846297675190232e-05, "loss": 0.4119, "step": 3209 }, { "epoch": 0.39062975357468815, "grad_norm": 1.139266848564148, "learning_rate": 1.846193743615147e-05, "loss": 0.439, "step": 3210 }, { "epoch": 0.39075144508670523, "grad_norm": 1.806175947189331, "learning_rate": 1.846089779840638e-05, "loss": 0.4214, "step": 3211 }, { "epoch": 0.39087313659872225, "grad_norm": 1.168665885925293, "learning_rate": 1.8459857838706622e-05, "loss": 0.4281, "step": 3212 }, { "epoch": 0.39099482811073927, "grad_norm": 0.6928753852844238, "learning_rate": 1.8458817557091757e-05, "loss": 0.42, "step": 3213 }, { "epoch": 0.3911165196227563, "grad_norm": 2.061511278152466, "learning_rate": 1.8457776953601377e-05, "loss": 0.4984, "step": 3214 }, { "epoch": 0.39123821113477336, "grad_norm": 2.9306259155273438, "learning_rate": 1.8456736028275075e-05, "loss": 0.4753, "step": 3215 }, { "epoch": 0.3913599026467904, "grad_norm": 1.4316598176956177, "learning_rate": 1.8455694781152463e-05, "loss": 0.4614, "step": 3216 }, { "epoch": 0.3914815941588074, "grad_norm": 1.952510118484497, "learning_rate": 1.8454653212273165e-05, "loss": 0.5211, "step": 3217 }, { "epoch": 0.3916032856708245, "grad_norm": 1.8742213249206543, "learning_rate": 1.845361132167681e-05, "loss": 0.5503, "step": 3218 }, { "epoch": 0.3917249771828415, "grad_norm": 3.4823076725006104, "learning_rate": 1.8452569109403045e-05, "loss": 0.4356, "step": 3219 }, { "epoch": 0.3918466686948585, "grad_norm": 2.8115499019622803, "learning_rate": 1.8451526575491537e-05, "loss": 0.4885, "step": 3220 }, { "epoch": 0.3919683602068756, "grad_norm": 3.0544533729553223, "learning_rate": 1.845048371998194e-05, "loss": 0.5334, "step": 3221 }, { "epoch": 0.3920900517188926, "grad_norm": 2.2687594890594482, "learning_rate": 1.8449440542913953e-05, "loss": 0.5203, "step": 3222 }, { "epoch": 0.3922117432309096, "grad_norm": 6.608068466186523, "learning_rate": 1.844839704432726e-05, "loss": 0.4333, "step": 3223 }, { "epoch": 0.3923334347429267, "grad_norm": 2.947314739227295, "learning_rate": 1.8447353224261572e-05, "loss": 0.4973, "step": 3224 }, { "epoch": 0.3924551262549437, "grad_norm": 1.3329120874404907, "learning_rate": 1.844630908275661e-05, "loss": 0.4909, "step": 3225 }, { "epoch": 0.39257681776696074, "grad_norm": 0.5378501415252686, "learning_rate": 1.8445264619852103e-05, "loss": 0.4375, "step": 3226 }, { "epoch": 0.3926985092789778, "grad_norm": 2.282072067260742, "learning_rate": 1.8444219835587798e-05, "loss": 0.44, "step": 3227 }, { "epoch": 0.39282020079099483, "grad_norm": 1.0764062404632568, "learning_rate": 1.844317473000345e-05, "loss": 0.3869, "step": 3228 }, { "epoch": 0.39294189230301185, "grad_norm": 3.2027347087860107, "learning_rate": 1.8442129303138825e-05, "loss": 0.4877, "step": 3229 }, { "epoch": 0.3930635838150289, "grad_norm": 2.674499988555908, "learning_rate": 1.8441083555033706e-05, "loss": 0.4407, "step": 3230 }, { "epoch": 0.39318527532704595, "grad_norm": 3.659951686859131, "learning_rate": 1.8440037485727887e-05, "loss": 0.4509, "step": 3231 }, { "epoch": 0.39330696683906297, "grad_norm": 6.1977105140686035, "learning_rate": 1.843899109526117e-05, "loss": 0.5455, "step": 3232 }, { "epoch": 0.39342865835108, "grad_norm": 1.3173803091049194, "learning_rate": 1.8437944383673377e-05, "loss": 0.4109, "step": 3233 }, { "epoch": 0.39355034986309706, "grad_norm": 2.1625349521636963, "learning_rate": 1.8436897351004336e-05, "loss": 0.4817, "step": 3234 }, { "epoch": 0.3936720413751141, "grad_norm": 0.7124523520469666, "learning_rate": 1.8435849997293883e-05, "loss": 0.4062, "step": 3235 }, { "epoch": 0.3937937328871311, "grad_norm": 2.6680171489715576, "learning_rate": 1.8434802322581877e-05, "loss": 0.5378, "step": 3236 }, { "epoch": 0.3939154243991482, "grad_norm": 1.0353717803955078, "learning_rate": 1.8433754326908185e-05, "loss": 0.5727, "step": 3237 }, { "epoch": 0.3940371159111652, "grad_norm": 1.7438045740127563, "learning_rate": 1.8432706010312684e-05, "loss": 0.5204, "step": 3238 }, { "epoch": 0.3941588074231822, "grad_norm": 6.921764850616455, "learning_rate": 1.8431657372835264e-05, "loss": 0.4544, "step": 3239 }, { "epoch": 0.3942804989351993, "grad_norm": 4.179312705993652, "learning_rate": 1.8430608414515828e-05, "loss": 0.5309, "step": 3240 }, { "epoch": 0.3944021904472163, "grad_norm": 5.485282897949219, "learning_rate": 1.842955913539429e-05, "loss": 0.4554, "step": 3241 }, { "epoch": 0.3945238819592333, "grad_norm": 7.443978309631348, "learning_rate": 1.842850953551058e-05, "loss": 0.4626, "step": 3242 }, { "epoch": 0.3946455734712504, "grad_norm": 2.6822259426116943, "learning_rate": 1.8427459614904636e-05, "loss": 0.5191, "step": 3243 }, { "epoch": 0.3947672649832674, "grad_norm": 4.467551231384277, "learning_rate": 1.8426409373616412e-05, "loss": 0.4317, "step": 3244 }, { "epoch": 0.39488895649528444, "grad_norm": 2.4827613830566406, "learning_rate": 1.842535881168587e-05, "loss": 0.4193, "step": 3245 }, { "epoch": 0.3950106480073015, "grad_norm": 1.2630842924118042, "learning_rate": 1.8424307929152983e-05, "loss": 0.4623, "step": 3246 }, { "epoch": 0.39513233951931853, "grad_norm": 2.2650156021118164, "learning_rate": 1.842325672605774e-05, "loss": 0.4433, "step": 3247 }, { "epoch": 0.39525403103133555, "grad_norm": 3.270244836807251, "learning_rate": 1.8422205202440148e-05, "loss": 0.4915, "step": 3248 }, { "epoch": 0.3953757225433526, "grad_norm": 3.1242475509643555, "learning_rate": 1.8421153358340213e-05, "loss": 0.4862, "step": 3249 }, { "epoch": 0.39549741405536964, "grad_norm": 3.5277795791625977, "learning_rate": 1.842010119379796e-05, "loss": 0.4808, "step": 3250 }, { "epoch": 0.39561910556738666, "grad_norm": 2.312628746032715, "learning_rate": 1.8419048708853433e-05, "loss": 0.4654, "step": 3251 }, { "epoch": 0.39574079707940374, "grad_norm": 0.6908182501792908, "learning_rate": 1.841799590354667e-05, "loss": 0.4268, "step": 3252 }, { "epoch": 0.39586248859142076, "grad_norm": 4.046888828277588, "learning_rate": 1.841694277791774e-05, "loss": 0.555, "step": 3253 }, { "epoch": 0.3959841801034378, "grad_norm": 1.4682831764221191, "learning_rate": 1.8415889332006718e-05, "loss": 0.5175, "step": 3254 }, { "epoch": 0.3961058716154548, "grad_norm": 1.267814040184021, "learning_rate": 1.8414835565853687e-05, "loss": 0.497, "step": 3255 }, { "epoch": 0.39622756312747187, "grad_norm": 3.2317216396331787, "learning_rate": 1.8413781479498746e-05, "loss": 0.4678, "step": 3256 }, { "epoch": 0.3963492546394889, "grad_norm": 4.2852888107299805, "learning_rate": 1.8412727072982e-05, "loss": 0.4886, "step": 3257 }, { "epoch": 0.3964709461515059, "grad_norm": 6.081776142120361, "learning_rate": 1.8411672346343575e-05, "loss": 0.4546, "step": 3258 }, { "epoch": 0.396592637663523, "grad_norm": 3.301145315170288, "learning_rate": 1.8410617299623607e-05, "loss": 0.4797, "step": 3259 }, { "epoch": 0.39671432917554, "grad_norm": 2.7169342041015625, "learning_rate": 1.8409561932862244e-05, "loss": 0.4636, "step": 3260 }, { "epoch": 0.396836020687557, "grad_norm": 0.7840808033943176, "learning_rate": 1.8408506246099644e-05, "loss": 0.4668, "step": 3261 }, { "epoch": 0.3969577121995741, "grad_norm": 0.6465523838996887, "learning_rate": 1.8407450239375976e-05, "loss": 0.4728, "step": 3262 }, { "epoch": 0.3970794037115911, "grad_norm": 2.52044677734375, "learning_rate": 1.840639391273142e-05, "loss": 0.3897, "step": 3263 }, { "epoch": 0.39720109522360814, "grad_norm": 4.680634021759033, "learning_rate": 1.8405337266206178e-05, "loss": 0.543, "step": 3264 }, { "epoch": 0.3973227867356252, "grad_norm": 3.9424571990966797, "learning_rate": 1.8404280299840452e-05, "loss": 0.4894, "step": 3265 }, { "epoch": 0.39744447824764223, "grad_norm": 2.4979941844940186, "learning_rate": 1.840322301367447e-05, "loss": 0.4603, "step": 3266 }, { "epoch": 0.39756616975965925, "grad_norm": 3.376971483230591, "learning_rate": 1.8402165407748453e-05, "loss": 0.5014, "step": 3267 }, { "epoch": 0.3976878612716763, "grad_norm": 1.4296678304672241, "learning_rate": 1.8401107482102654e-05, "loss": 0.3858, "step": 3268 }, { "epoch": 0.39780955278369334, "grad_norm": 3.5760421752929688, "learning_rate": 1.8400049236777328e-05, "loss": 0.5213, "step": 3269 }, { "epoch": 0.39793124429571036, "grad_norm": 3.6825640201568604, "learning_rate": 1.8398990671812738e-05, "loss": 0.4906, "step": 3270 }, { "epoch": 0.39805293580772744, "grad_norm": 1.4299966096878052, "learning_rate": 1.839793178724917e-05, "loss": 0.484, "step": 3271 }, { "epoch": 0.39817462731974446, "grad_norm": 2.198282241821289, "learning_rate": 1.839687258312691e-05, "loss": 0.4378, "step": 3272 }, { "epoch": 0.3982963188317615, "grad_norm": 0.8088342547416687, "learning_rate": 1.8395813059486273e-05, "loss": 0.5123, "step": 3273 }, { "epoch": 0.3984180103437785, "grad_norm": 2.553713083267212, "learning_rate": 1.839475321636757e-05, "loss": 0.501, "step": 3274 }, { "epoch": 0.39853970185579557, "grad_norm": 2.351316213607788, "learning_rate": 1.839369305381113e-05, "loss": 0.4584, "step": 3275 }, { "epoch": 0.3986613933678126, "grad_norm": 3.3632233142852783, "learning_rate": 1.8392632571857294e-05, "loss": 0.4518, "step": 3276 }, { "epoch": 0.3987830848798296, "grad_norm": 1.2141530513763428, "learning_rate": 1.839157177054642e-05, "loss": 0.4583, "step": 3277 }, { "epoch": 0.3989047763918467, "grad_norm": 1.5478171110153198, "learning_rate": 1.8390510649918867e-05, "loss": 0.4387, "step": 3278 }, { "epoch": 0.3990264679038637, "grad_norm": 3.9410643577575684, "learning_rate": 1.8389449210015017e-05, "loss": 0.5018, "step": 3279 }, { "epoch": 0.3991481594158807, "grad_norm": 1.6830229759216309, "learning_rate": 1.838838745087526e-05, "loss": 0.4476, "step": 3280 }, { "epoch": 0.3992698509278978, "grad_norm": 3.652818441390991, "learning_rate": 1.838732537254e-05, "loss": 0.5367, "step": 3281 }, { "epoch": 0.3993915424399148, "grad_norm": 3.036189556121826, "learning_rate": 1.8386262975049644e-05, "loss": 0.475, "step": 3282 }, { "epoch": 0.39951323395193183, "grad_norm": 0.6534156203269958, "learning_rate": 1.8385200258444628e-05, "loss": 0.4087, "step": 3283 }, { "epoch": 0.3996349254639489, "grad_norm": 2.4086899757385254, "learning_rate": 1.8384137222765383e-05, "loss": 0.4772, "step": 3284 }, { "epoch": 0.39975661697596593, "grad_norm": 3.8731942176818848, "learning_rate": 1.838307386805236e-05, "loss": 0.5396, "step": 3285 }, { "epoch": 0.39987830848798295, "grad_norm": 4.17984676361084, "learning_rate": 1.838201019434603e-05, "loss": 0.4055, "step": 3286 }, { "epoch": 0.4, "grad_norm": 3.8723251819610596, "learning_rate": 1.8380946201686857e-05, "loss": 0.5522, "step": 3287 }, { "epoch": 0.40012169151201704, "grad_norm": 1.8191925287246704, "learning_rate": 1.8379881890115338e-05, "loss": 0.4723, "step": 3288 }, { "epoch": 0.40024338302403406, "grad_norm": 5.876353740692139, "learning_rate": 1.8378817259671967e-05, "loss": 0.5102, "step": 3289 }, { "epoch": 0.40036507453605114, "grad_norm": 3.0011301040649414, "learning_rate": 1.8377752310397254e-05, "loss": 0.4784, "step": 3290 }, { "epoch": 0.40048676604806815, "grad_norm": 3.0897092819213867, "learning_rate": 1.8376687042331723e-05, "loss": 0.4851, "step": 3291 }, { "epoch": 0.4006084575600852, "grad_norm": 6.221704006195068, "learning_rate": 1.8375621455515916e-05, "loss": 0.4841, "step": 3292 }, { "epoch": 0.40073014907210225, "grad_norm": 2.357998847961426, "learning_rate": 1.837455554999037e-05, "loss": 0.4625, "step": 3293 }, { "epoch": 0.40085184058411927, "grad_norm": 1.0451438426971436, "learning_rate": 1.8373489325795657e-05, "loss": 0.4475, "step": 3294 }, { "epoch": 0.4009735320961363, "grad_norm": 1.7552294731140137, "learning_rate": 1.837242278297234e-05, "loss": 0.4717, "step": 3295 }, { "epoch": 0.4010952236081533, "grad_norm": 1.0604931116104126, "learning_rate": 1.8371355921561007e-05, "loss": 0.446, "step": 3296 }, { "epoch": 0.4012169151201704, "grad_norm": 3.4094369411468506, "learning_rate": 1.8370288741602255e-05, "loss": 0.4922, "step": 3297 }, { "epoch": 0.4013386066321874, "grad_norm": 3.9399912357330322, "learning_rate": 1.836922124313669e-05, "loss": 0.5141, "step": 3298 }, { "epoch": 0.4014602981442044, "grad_norm": 3.1885385513305664, "learning_rate": 1.8368153426204932e-05, "loss": 0.4728, "step": 3299 }, { "epoch": 0.4015819896562215, "grad_norm": 6.2420244216918945, "learning_rate": 1.8367085290847612e-05, "loss": 0.5498, "step": 3300 }, { "epoch": 0.4017036811682385, "grad_norm": 0.5824435949325562, "learning_rate": 1.836601683710538e-05, "loss": 0.4129, "step": 3301 }, { "epoch": 0.40182537268025553, "grad_norm": 1.4892432689666748, "learning_rate": 1.836494806501889e-05, "loss": 0.4185, "step": 3302 }, { "epoch": 0.4019470641922726, "grad_norm": 0.5604004859924316, "learning_rate": 1.8363878974628817e-05, "loss": 0.4461, "step": 3303 }, { "epoch": 0.4020687557042896, "grad_norm": 1.9960089921951294, "learning_rate": 1.8362809565975832e-05, "loss": 0.4965, "step": 3304 }, { "epoch": 0.40219044721630665, "grad_norm": 0.629467248916626, "learning_rate": 1.8361739839100634e-05, "loss": 0.501, "step": 3305 }, { "epoch": 0.4023121387283237, "grad_norm": 0.8893454074859619, "learning_rate": 1.836066979404393e-05, "loss": 0.4848, "step": 3306 }, { "epoch": 0.40243383024034074, "grad_norm": 5.554185390472412, "learning_rate": 1.835959943084643e-05, "loss": 0.4314, "step": 3307 }, { "epoch": 0.40255552175235776, "grad_norm": 3.0835459232330322, "learning_rate": 1.835852874954887e-05, "loss": 0.4508, "step": 3308 }, { "epoch": 0.40267721326437483, "grad_norm": 3.098881721496582, "learning_rate": 1.835745775019199e-05, "loss": 0.4798, "step": 3309 }, { "epoch": 0.40279890477639185, "grad_norm": 1.095411777496338, "learning_rate": 1.835638643281654e-05, "loss": 0.4785, "step": 3310 }, { "epoch": 0.40292059628840887, "grad_norm": 4.372706413269043, "learning_rate": 1.8355314797463297e-05, "loss": 0.4251, "step": 3311 }, { "epoch": 0.40304228780042595, "grad_norm": 2.5591952800750732, "learning_rate": 1.8354242844173028e-05, "loss": 0.4365, "step": 3312 }, { "epoch": 0.40316397931244297, "grad_norm": 0.6773574948310852, "learning_rate": 1.8353170572986523e-05, "loss": 0.414, "step": 3313 }, { "epoch": 0.40328567082446, "grad_norm": 1.0209612846374512, "learning_rate": 1.8352097983944593e-05, "loss": 0.3957, "step": 3314 }, { "epoch": 0.403407362336477, "grad_norm": 3.8572816848754883, "learning_rate": 1.8351025077088044e-05, "loss": 0.5391, "step": 3315 }, { "epoch": 0.4035290538484941, "grad_norm": 3.0534300804138184, "learning_rate": 1.8349951852457707e-05, "loss": 0.4318, "step": 3316 }, { "epoch": 0.4036507453605111, "grad_norm": 5.404112339019775, "learning_rate": 1.8348878310094415e-05, "loss": 0.5699, "step": 3317 }, { "epoch": 0.4037724368725281, "grad_norm": 6.7610602378845215, "learning_rate": 1.8347804450039025e-05, "loss": 0.5638, "step": 3318 }, { "epoch": 0.4038941283845452, "grad_norm": 4.170486927032471, "learning_rate": 1.8346730272332396e-05, "loss": 0.4879, "step": 3319 }, { "epoch": 0.4040158198965622, "grad_norm": 4.948755264282227, "learning_rate": 1.8345655777015405e-05, "loss": 0.5558, "step": 3320 }, { "epoch": 0.40413751140857923, "grad_norm": 1.204403042793274, "learning_rate": 1.8344580964128936e-05, "loss": 0.4401, "step": 3321 }, { "epoch": 0.4042592029205963, "grad_norm": 0.9451858401298523, "learning_rate": 1.8343505833713885e-05, "loss": 0.4616, "step": 3322 }, { "epoch": 0.4043808944326133, "grad_norm": 1.7080293893814087, "learning_rate": 1.8342430385811172e-05, "loss": 0.53, "step": 3323 }, { "epoch": 0.40450258594463034, "grad_norm": 0.9750133156776428, "learning_rate": 1.8341354620461715e-05, "loss": 0.5347, "step": 3324 }, { "epoch": 0.4046242774566474, "grad_norm": 6.347476482391357, "learning_rate": 1.8340278537706443e-05, "loss": 0.4849, "step": 3325 }, { "epoch": 0.40474596896866444, "grad_norm": 7.887727737426758, "learning_rate": 1.8339202137586317e-05, "loss": 0.4894, "step": 3326 }, { "epoch": 0.40486766048068146, "grad_norm": 5.168460369110107, "learning_rate": 1.8338125420142282e-05, "loss": 0.471, "step": 3327 }, { "epoch": 0.40498935199269853, "grad_norm": 3.7578842639923096, "learning_rate": 1.8337048385415318e-05, "loss": 0.5391, "step": 3328 }, { "epoch": 0.40511104350471555, "grad_norm": 4.743853569030762, "learning_rate": 1.8335971033446406e-05, "loss": 0.481, "step": 3329 }, { "epoch": 0.40523273501673257, "grad_norm": 2.081660509109497, "learning_rate": 1.8334893364276545e-05, "loss": 0.4572, "step": 3330 }, { "epoch": 0.40535442652874965, "grad_norm": 0.922373354434967, "learning_rate": 1.8333815377946736e-05, "loss": 0.54, "step": 3331 }, { "epoch": 0.40547611804076666, "grad_norm": 1.1141186952590942, "learning_rate": 1.8332737074498004e-05, "loss": 0.4836, "step": 3332 }, { "epoch": 0.4055978095527837, "grad_norm": 2.6496200561523438, "learning_rate": 1.833165845397138e-05, "loss": 0.4913, "step": 3333 }, { "epoch": 0.4057195010648007, "grad_norm": 2.3231499195098877, "learning_rate": 1.83305795164079e-05, "loss": 0.4828, "step": 3334 }, { "epoch": 0.4058411925768178, "grad_norm": 1.9056240320205688, "learning_rate": 1.832950026184863e-05, "loss": 0.4781, "step": 3335 }, { "epoch": 0.4059628840888348, "grad_norm": 0.7461123466491699, "learning_rate": 1.832842069033463e-05, "loss": 0.4762, "step": 3336 }, { "epoch": 0.4060845756008518, "grad_norm": 1.2821344137191772, "learning_rate": 1.832734080190699e-05, "loss": 0.3855, "step": 3337 }, { "epoch": 0.4062062671128689, "grad_norm": 0.6767551898956299, "learning_rate": 1.8326260596606794e-05, "loss": 0.4364, "step": 3338 }, { "epoch": 0.4063279586248859, "grad_norm": 0.6140134930610657, "learning_rate": 1.8325180074475146e-05, "loss": 0.4084, "step": 3339 }, { "epoch": 0.40644965013690293, "grad_norm": 1.4850640296936035, "learning_rate": 1.8324099235553165e-05, "loss": 0.4795, "step": 3340 }, { "epoch": 0.40657134164892, "grad_norm": 2.3355071544647217, "learning_rate": 1.8323018079881976e-05, "loss": 0.5079, "step": 3341 }, { "epoch": 0.406693033160937, "grad_norm": 1.6171776056289673, "learning_rate": 1.8321936607502723e-05, "loss": 0.5031, "step": 3342 }, { "epoch": 0.40681472467295404, "grad_norm": 1.5494000911712646, "learning_rate": 1.832085481845656e-05, "loss": 0.4083, "step": 3343 }, { "epoch": 0.4069364161849711, "grad_norm": 3.4000465869903564, "learning_rate": 1.8319772712784646e-05, "loss": 0.4416, "step": 3344 }, { "epoch": 0.40705810769698814, "grad_norm": 1.4358357191085815, "learning_rate": 1.8318690290528154e-05, "loss": 0.5197, "step": 3345 }, { "epoch": 0.40717979920900516, "grad_norm": 0.8626274466514587, "learning_rate": 1.8317607551728285e-05, "loss": 0.5032, "step": 3346 }, { "epoch": 0.40730149072102223, "grad_norm": 0.861656129360199, "learning_rate": 1.831652449642623e-05, "loss": 0.4972, "step": 3347 }, { "epoch": 0.40742318223303925, "grad_norm": 2.628462076187134, "learning_rate": 1.8315441124663202e-05, "loss": 0.4517, "step": 3348 }, { "epoch": 0.40754487374505627, "grad_norm": 1.4902620315551758, "learning_rate": 1.831435743648043e-05, "loss": 0.4858, "step": 3349 }, { "epoch": 0.40766656525707334, "grad_norm": 1.521276593208313, "learning_rate": 1.831327343191914e-05, "loss": 0.5016, "step": 3350 }, { "epoch": 0.40778825676909036, "grad_norm": 3.085007667541504, "learning_rate": 1.8312189111020595e-05, "loss": 0.4451, "step": 3351 }, { "epoch": 0.4079099482811074, "grad_norm": 1.2297166585922241, "learning_rate": 1.831110447382605e-05, "loss": 0.5072, "step": 3352 }, { "epoch": 0.40803163979312446, "grad_norm": 0.931147575378418, "learning_rate": 1.8310019520376773e-05, "loss": 0.4918, "step": 3353 }, { "epoch": 0.4081533313051415, "grad_norm": 1.8890422582626343, "learning_rate": 1.8308934250714054e-05, "loss": 0.4916, "step": 3354 }, { "epoch": 0.4082750228171585, "grad_norm": 1.5281296968460083, "learning_rate": 1.830784866487919e-05, "loss": 0.4513, "step": 3355 }, { "epoch": 0.4083967143291755, "grad_norm": 3.4499669075012207, "learning_rate": 1.8306762762913487e-05, "loss": 0.4268, "step": 3356 }, { "epoch": 0.4085184058411926, "grad_norm": 0.7381409406661987, "learning_rate": 1.8305676544858264e-05, "loss": 0.4737, "step": 3357 }, { "epoch": 0.4086400973532096, "grad_norm": 2.1498446464538574, "learning_rate": 1.830459001075486e-05, "loss": 0.4298, "step": 3358 }, { "epoch": 0.4087617888652266, "grad_norm": 0.7460692524909973, "learning_rate": 1.8303503160644616e-05, "loss": 0.4171, "step": 3359 }, { "epoch": 0.4088834803772437, "grad_norm": 0.7128798961639404, "learning_rate": 1.8302415994568886e-05, "loss": 0.4263, "step": 3360 }, { "epoch": 0.4090051718892607, "grad_norm": 0.6934868693351746, "learning_rate": 1.830132851256905e-05, "loss": 0.4192, "step": 3361 }, { "epoch": 0.40912686340127774, "grad_norm": 1.396048665046692, "learning_rate": 1.8300240714686476e-05, "loss": 0.4333, "step": 3362 }, { "epoch": 0.4092485549132948, "grad_norm": 3.5061545372009277, "learning_rate": 1.829915260096256e-05, "loss": 0.5092, "step": 3363 }, { "epoch": 0.40937024642531183, "grad_norm": 5.316830635070801, "learning_rate": 1.829806417143871e-05, "loss": 0.5483, "step": 3364 }, { "epoch": 0.40949193793732885, "grad_norm": 3.1464223861694336, "learning_rate": 1.8296975426156346e-05, "loss": 0.4639, "step": 3365 }, { "epoch": 0.40961362944934593, "grad_norm": 6.090936183929443, "learning_rate": 1.8295886365156893e-05, "loss": 0.5401, "step": 3366 }, { "epoch": 0.40973532096136295, "grad_norm": 0.7507444620132446, "learning_rate": 1.829479698848179e-05, "loss": 0.4469, "step": 3367 }, { "epoch": 0.40985701247337997, "grad_norm": 3.268221378326416, "learning_rate": 1.8293707296172493e-05, "loss": 0.4241, "step": 3368 }, { "epoch": 0.40997870398539704, "grad_norm": 0.9399189949035645, "learning_rate": 1.8292617288270467e-05, "loss": 0.5179, "step": 3369 }, { "epoch": 0.41010039549741406, "grad_norm": 1.1699978113174438, "learning_rate": 1.829152696481719e-05, "loss": 0.4816, "step": 3370 }, { "epoch": 0.4102220870094311, "grad_norm": 0.703235387802124, "learning_rate": 1.8290436325854143e-05, "loss": 0.527, "step": 3371 }, { "epoch": 0.41034377852144815, "grad_norm": 1.9470754861831665, "learning_rate": 1.828934537142284e-05, "loss": 0.4971, "step": 3372 }, { "epoch": 0.4104654700334652, "grad_norm": 2.758213520050049, "learning_rate": 1.8288254101564783e-05, "loss": 0.445, "step": 3373 }, { "epoch": 0.4105871615454822, "grad_norm": 2.5770461559295654, "learning_rate": 1.8287162516321506e-05, "loss": 0.4301, "step": 3374 }, { "epoch": 0.4107088530574992, "grad_norm": 0.9836858510971069, "learning_rate": 1.828607061573454e-05, "loss": 0.4585, "step": 3375 }, { "epoch": 0.4108305445695163, "grad_norm": 0.8131800293922424, "learning_rate": 1.8284978399845435e-05, "loss": 0.4285, "step": 3376 }, { "epoch": 0.4109522360815333, "grad_norm": 1.0802713632583618, "learning_rate": 1.8283885868695756e-05, "loss": 0.4344, "step": 3377 }, { "epoch": 0.4110739275935503, "grad_norm": 4.088990688323975, "learning_rate": 1.8282793022327068e-05, "loss": 0.5048, "step": 3378 }, { "epoch": 0.4111956191055674, "grad_norm": 4.4374823570251465, "learning_rate": 1.8281699860780965e-05, "loss": 0.5403, "step": 3379 }, { "epoch": 0.4113173106175844, "grad_norm": 2.3837692737579346, "learning_rate": 1.828060638409904e-05, "loss": 0.4597, "step": 3380 }, { "epoch": 0.41143900212960144, "grad_norm": 3.1547815799713135, "learning_rate": 1.8279512592322903e-05, "loss": 0.486, "step": 3381 }, { "epoch": 0.4115606936416185, "grad_norm": 2.082961082458496, "learning_rate": 1.827841848549417e-05, "loss": 0.4724, "step": 3382 }, { "epoch": 0.41168238515363553, "grad_norm": 1.4442596435546875, "learning_rate": 1.827732406365448e-05, "loss": 0.4824, "step": 3383 }, { "epoch": 0.41180407666565255, "grad_norm": 1.8321324586868286, "learning_rate": 1.827622932684548e-05, "loss": 0.5221, "step": 3384 }, { "epoch": 0.4119257681776696, "grad_norm": 3.4394984245300293, "learning_rate": 1.8275134275108817e-05, "loss": 0.5057, "step": 3385 }, { "epoch": 0.41204745968968665, "grad_norm": 2.7843782901763916, "learning_rate": 1.827403890848617e-05, "loss": 0.464, "step": 3386 }, { "epoch": 0.41216915120170367, "grad_norm": 4.026331901550293, "learning_rate": 1.8272943227019216e-05, "loss": 0.4785, "step": 3387 }, { "epoch": 0.41229084271372074, "grad_norm": 2.7709524631500244, "learning_rate": 1.8271847230749643e-05, "loss": 0.4911, "step": 3388 }, { "epoch": 0.41241253422573776, "grad_norm": 1.947475552558899, "learning_rate": 1.8270750919719167e-05, "loss": 0.4404, "step": 3389 }, { "epoch": 0.4125342257377548, "grad_norm": 1.105980396270752, "learning_rate": 1.82696542939695e-05, "loss": 0.5274, "step": 3390 }, { "epoch": 0.41265591724977185, "grad_norm": 2.854992389678955, "learning_rate": 1.8268557353542366e-05, "loss": 0.4452, "step": 3391 }, { "epoch": 0.4127776087617889, "grad_norm": 3.014014482498169, "learning_rate": 1.826746009847951e-05, "loss": 0.4757, "step": 3392 }, { "epoch": 0.4128993002738059, "grad_norm": 0.8796677589416504, "learning_rate": 1.8266362528822687e-05, "loss": 0.4567, "step": 3393 }, { "epoch": 0.41302099178582297, "grad_norm": 1.8404645919799805, "learning_rate": 1.826526464461366e-05, "loss": 0.4512, "step": 3394 }, { "epoch": 0.41314268329784, "grad_norm": 1.5910371541976929, "learning_rate": 1.8264166445894204e-05, "loss": 0.4683, "step": 3395 }, { "epoch": 0.413264374809857, "grad_norm": 0.6318998336791992, "learning_rate": 1.8263067932706106e-05, "loss": 0.4666, "step": 3396 }, { "epoch": 0.413386066321874, "grad_norm": 2.4946372509002686, "learning_rate": 1.8261969105091172e-05, "loss": 0.507, "step": 3397 }, { "epoch": 0.4135077578338911, "grad_norm": 0.6217384338378906, "learning_rate": 1.826086996309121e-05, "loss": 0.4719, "step": 3398 }, { "epoch": 0.4136294493459081, "grad_norm": 1.6143803596496582, "learning_rate": 1.825977050674805e-05, "loss": 0.4742, "step": 3399 }, { "epoch": 0.41375114085792514, "grad_norm": 1.202484369277954, "learning_rate": 1.8258670736103523e-05, "loss": 0.5338, "step": 3400 }, { "epoch": 0.4138728323699422, "grad_norm": 0.6325135231018066, "learning_rate": 1.8257570651199483e-05, "loss": 0.4858, "step": 3401 }, { "epoch": 0.41399452388195923, "grad_norm": 0.6948682069778442, "learning_rate": 1.8256470252077786e-05, "loss": 0.5216, "step": 3402 }, { "epoch": 0.41411621539397625, "grad_norm": 4.379599094390869, "learning_rate": 1.8255369538780307e-05, "loss": 0.424, "step": 3403 }, { "epoch": 0.4142379069059933, "grad_norm": 3.0073533058166504, "learning_rate": 1.8254268511348926e-05, "loss": 0.4858, "step": 3404 }, { "epoch": 0.41435959841801034, "grad_norm": 3.610830783843994, "learning_rate": 1.8253167169825545e-05, "loss": 0.422, "step": 3405 }, { "epoch": 0.41448128993002736, "grad_norm": 3.9430594444274902, "learning_rate": 1.8252065514252068e-05, "loss": 0.4568, "step": 3406 }, { "epoch": 0.41460298144204444, "grad_norm": 2.9395554065704346, "learning_rate": 1.8250963544670422e-05, "loss": 0.3995, "step": 3407 }, { "epoch": 0.41472467295406146, "grad_norm": 1.8223469257354736, "learning_rate": 1.824986126112253e-05, "loss": 0.4552, "step": 3408 }, { "epoch": 0.4148463644660785, "grad_norm": 2.2542219161987305, "learning_rate": 1.824875866365034e-05, "loss": 0.4619, "step": 3409 }, { "epoch": 0.41496805597809555, "grad_norm": 2.5607094764709473, "learning_rate": 1.8247655752295814e-05, "loss": 0.475, "step": 3410 }, { "epoch": 0.41508974749011257, "grad_norm": 1.6788568496704102, "learning_rate": 1.824655252710091e-05, "loss": 0.4749, "step": 3411 }, { "epoch": 0.4152114390021296, "grad_norm": 5.006715774536133, "learning_rate": 1.824544898810762e-05, "loss": 0.5662, "step": 3412 }, { "epoch": 0.41533313051414666, "grad_norm": 2.8922886848449707, "learning_rate": 1.8244345135357922e-05, "loss": 0.5175, "step": 3413 }, { "epoch": 0.4154548220261637, "grad_norm": 1.1472426652908325, "learning_rate": 1.8243240968893827e-05, "loss": 0.463, "step": 3414 }, { "epoch": 0.4155765135381807, "grad_norm": 2.0325188636779785, "learning_rate": 1.8242136488757355e-05, "loss": 0.4689, "step": 3415 }, { "epoch": 0.4156982050501977, "grad_norm": 1.1346851587295532, "learning_rate": 1.8241031694990525e-05, "loss": 0.4601, "step": 3416 }, { "epoch": 0.4158198965622148, "grad_norm": 1.8192977905273438, "learning_rate": 1.823992658763538e-05, "loss": 0.501, "step": 3417 }, { "epoch": 0.4159415880742318, "grad_norm": 2.941066026687622, "learning_rate": 1.823882116673398e-05, "loss": 0.4911, "step": 3418 }, { "epoch": 0.41606327958624884, "grad_norm": 1.0002509355545044, "learning_rate": 1.8237715432328372e-05, "loss": 0.5257, "step": 3419 }, { "epoch": 0.4161849710982659, "grad_norm": 0.8495650887489319, "learning_rate": 1.8236609384460647e-05, "loss": 0.4786, "step": 3420 }, { "epoch": 0.41630666261028293, "grad_norm": 0.671356201171875, "learning_rate": 1.8235503023172883e-05, "loss": 0.474, "step": 3421 }, { "epoch": 0.41642835412229995, "grad_norm": 0.6988985538482666, "learning_rate": 1.8234396348507185e-05, "loss": 0.4906, "step": 3422 }, { "epoch": 0.416550045634317, "grad_norm": 0.5956538915634155, "learning_rate": 1.8233289360505663e-05, "loss": 0.4345, "step": 3423 }, { "epoch": 0.41667173714633404, "grad_norm": 0.7774266600608826, "learning_rate": 1.8232182059210434e-05, "loss": 0.4751, "step": 3424 }, { "epoch": 0.41679342865835106, "grad_norm": 0.6722429990768433, "learning_rate": 1.823107444466364e-05, "loss": 0.4674, "step": 3425 }, { "epoch": 0.41691512017036814, "grad_norm": 1.5568604469299316, "learning_rate": 1.8229966516907426e-05, "loss": 0.4183, "step": 3426 }, { "epoch": 0.41703681168238516, "grad_norm": 0.8228222727775574, "learning_rate": 1.822885827598395e-05, "loss": 0.4377, "step": 3427 }, { "epoch": 0.4171585031944022, "grad_norm": 2.1870813369750977, "learning_rate": 1.8227749721935386e-05, "loss": 0.4758, "step": 3428 }, { "epoch": 0.41728019470641925, "grad_norm": 0.8316046595573425, "learning_rate": 1.8226640854803914e-05, "loss": 0.4713, "step": 3429 }, { "epoch": 0.41740188621843627, "grad_norm": 2.2942614555358887, "learning_rate": 1.822553167463173e-05, "loss": 0.4328, "step": 3430 }, { "epoch": 0.4175235777304533, "grad_norm": 3.5903139114379883, "learning_rate": 1.8224422181461035e-05, "loss": 0.4831, "step": 3431 }, { "epoch": 0.41764526924247036, "grad_norm": 1.6217827796936035, "learning_rate": 1.8223312375334056e-05, "loss": 0.4861, "step": 3432 }, { "epoch": 0.4177669607544874, "grad_norm": 1.5356171131134033, "learning_rate": 1.8222202256293022e-05, "loss": 0.4808, "step": 3433 }, { "epoch": 0.4178886522665044, "grad_norm": 3.0851364135742188, "learning_rate": 1.8221091824380172e-05, "loss": 0.5581, "step": 3434 }, { "epoch": 0.4180103437785214, "grad_norm": 4.13778829574585, "learning_rate": 1.8219981079637756e-05, "loss": 0.4668, "step": 3435 }, { "epoch": 0.4181320352905385, "grad_norm": 1.4807171821594238, "learning_rate": 1.8218870022108052e-05, "loss": 0.4669, "step": 3436 }, { "epoch": 0.4182537268025555, "grad_norm": 1.7364503145217896, "learning_rate": 1.821775865183333e-05, "loss": 0.4634, "step": 3437 }, { "epoch": 0.41837541831457253, "grad_norm": 2.777949571609497, "learning_rate": 1.821664696885588e-05, "loss": 0.4795, "step": 3438 }, { "epoch": 0.4184971098265896, "grad_norm": 1.0836976766586304, "learning_rate": 1.8215534973218005e-05, "loss": 0.512, "step": 3439 }, { "epoch": 0.4186188013386066, "grad_norm": 1.1404494047164917, "learning_rate": 1.821442266496202e-05, "loss": 0.424, "step": 3440 }, { "epoch": 0.41874049285062365, "grad_norm": 0.692095160484314, "learning_rate": 1.8213310044130252e-05, "loss": 0.4325, "step": 3441 }, { "epoch": 0.4188621843626407, "grad_norm": 0.6862213611602783, "learning_rate": 1.821219711076503e-05, "loss": 0.4487, "step": 3442 }, { "epoch": 0.41898387587465774, "grad_norm": 3.447136640548706, "learning_rate": 1.8211083864908716e-05, "loss": 0.5145, "step": 3443 }, { "epoch": 0.41910556738667476, "grad_norm": 1.4854779243469238, "learning_rate": 1.820997030660366e-05, "loss": 0.446, "step": 3444 }, { "epoch": 0.41922725889869183, "grad_norm": 0.635477602481842, "learning_rate": 1.8208856435892242e-05, "loss": 0.4159, "step": 3445 }, { "epoch": 0.41934895041070885, "grad_norm": 0.9970229268074036, "learning_rate": 1.8207742252816848e-05, "loss": 0.4346, "step": 3446 }, { "epoch": 0.4194706419227259, "grad_norm": 2.287344217300415, "learning_rate": 1.820662775741987e-05, "loss": 0.3874, "step": 3447 }, { "epoch": 0.41959233343474295, "grad_norm": 4.3752288818359375, "learning_rate": 1.820551294974372e-05, "loss": 0.5562, "step": 3448 }, { "epoch": 0.41971402494675997, "grad_norm": 1.8130929470062256, "learning_rate": 1.8204397829830816e-05, "loss": 0.5413, "step": 3449 }, { "epoch": 0.419835716458777, "grad_norm": 2.3292407989501953, "learning_rate": 1.8203282397723595e-05, "loss": 0.4668, "step": 3450 }, { "epoch": 0.41995740797079406, "grad_norm": 2.0253348350524902, "learning_rate": 1.8202166653464496e-05, "loss": 0.4466, "step": 3451 }, { "epoch": 0.4200790994828111, "grad_norm": 1.1529165506362915, "learning_rate": 1.820105059709598e-05, "loss": 0.4927, "step": 3452 }, { "epoch": 0.4202007909948281, "grad_norm": 1.3928618431091309, "learning_rate": 1.819993422866051e-05, "loss": 0.4947, "step": 3453 }, { "epoch": 0.4203224825068452, "grad_norm": 4.294394016265869, "learning_rate": 1.8198817548200573e-05, "loss": 0.4146, "step": 3454 }, { "epoch": 0.4204441740188622, "grad_norm": 1.6274051666259766, "learning_rate": 1.8197700555758656e-05, "loss": 0.4835, "step": 3455 }, { "epoch": 0.4205658655308792, "grad_norm": 2.516552448272705, "learning_rate": 1.8196583251377266e-05, "loss": 0.4625, "step": 3456 }, { "epoch": 0.42068755704289623, "grad_norm": 1.104288101196289, "learning_rate": 1.819546563509892e-05, "loss": 0.4258, "step": 3457 }, { "epoch": 0.4208092485549133, "grad_norm": 5.030282020568848, "learning_rate": 1.819434770696614e-05, "loss": 0.5489, "step": 3458 }, { "epoch": 0.4209309400669303, "grad_norm": 3.4844894409179688, "learning_rate": 1.8193229467021468e-05, "loss": 0.5059, "step": 3459 }, { "epoch": 0.42105263157894735, "grad_norm": 3.8286397457122803, "learning_rate": 1.8192110915307454e-05, "loss": 0.4693, "step": 3460 }, { "epoch": 0.4211743230909644, "grad_norm": 4.959057807922363, "learning_rate": 1.8190992051866664e-05, "loss": 0.5689, "step": 3461 }, { "epoch": 0.42129601460298144, "grad_norm": 2.2343626022338867, "learning_rate": 1.8189872876741676e-05, "loss": 0.4614, "step": 3462 }, { "epoch": 0.42141770611499846, "grad_norm": 2.78779935836792, "learning_rate": 1.818875338997507e-05, "loss": 0.5115, "step": 3463 }, { "epoch": 0.42153939762701553, "grad_norm": 0.9812670946121216, "learning_rate": 1.818763359160945e-05, "loss": 0.4569, "step": 3464 }, { "epoch": 0.42166108913903255, "grad_norm": 2.293518304824829, "learning_rate": 1.818651348168742e-05, "loss": 0.4576, "step": 3465 }, { "epoch": 0.42178278065104957, "grad_norm": 2.0170249938964844, "learning_rate": 1.818539306025161e-05, "loss": 0.457, "step": 3466 }, { "epoch": 0.42190447216306665, "grad_norm": 2.2560160160064697, "learning_rate": 1.818427232734465e-05, "loss": 0.481, "step": 3467 }, { "epoch": 0.42202616367508367, "grad_norm": 3.352612018585205, "learning_rate": 1.8183151283009186e-05, "loss": 0.4166, "step": 3468 }, { "epoch": 0.4221478551871007, "grad_norm": 0.5273807644844055, "learning_rate": 1.8182029927287878e-05, "loss": 0.4574, "step": 3469 }, { "epoch": 0.42226954669911776, "grad_norm": 0.9066219925880432, "learning_rate": 1.8180908260223398e-05, "loss": 0.4589, "step": 3470 }, { "epoch": 0.4223912382111348, "grad_norm": 1.6761623620986938, "learning_rate": 1.8179786281858423e-05, "loss": 0.468, "step": 3471 }, { "epoch": 0.4225129297231518, "grad_norm": 2.475247859954834, "learning_rate": 1.817866399223565e-05, "loss": 0.55, "step": 3472 }, { "epoch": 0.4226346212351689, "grad_norm": 1.0085084438323975, "learning_rate": 1.8177541391397783e-05, "loss": 0.4677, "step": 3473 }, { "epoch": 0.4227563127471859, "grad_norm": 1.6037474870681763, "learning_rate": 1.8176418479387536e-05, "loss": 0.4184, "step": 3474 }, { "epoch": 0.4228780042592029, "grad_norm": 4.496516227722168, "learning_rate": 1.8175295256247648e-05, "loss": 0.3825, "step": 3475 }, { "epoch": 0.42299969577121993, "grad_norm": 0.6811562180519104, "learning_rate": 1.817417172202085e-05, "loss": 0.4389, "step": 3476 }, { "epoch": 0.423121387283237, "grad_norm": 1.438529372215271, "learning_rate": 1.8173047876749898e-05, "loss": 0.4591, "step": 3477 }, { "epoch": 0.423243078795254, "grad_norm": 1.5971885919570923, "learning_rate": 1.817192372047756e-05, "loss": 0.4761, "step": 3478 }, { "epoch": 0.42336477030727104, "grad_norm": 5.686800003051758, "learning_rate": 1.81707992532466e-05, "loss": 0.5873, "step": 3479 }, { "epoch": 0.4234864618192881, "grad_norm": 0.879494845867157, "learning_rate": 1.8169674475099827e-05, "loss": 0.4553, "step": 3480 }, { "epoch": 0.42360815333130514, "grad_norm": 2.3507444858551025, "learning_rate": 1.8168549386080024e-05, "loss": 0.4739, "step": 3481 }, { "epoch": 0.42372984484332216, "grad_norm": 1.277342438697815, "learning_rate": 1.8167423986230013e-05, "loss": 0.4308, "step": 3482 }, { "epoch": 0.42385153635533923, "grad_norm": 1.3545634746551514, "learning_rate": 1.8166298275592612e-05, "loss": 0.4413, "step": 3483 }, { "epoch": 0.42397322786735625, "grad_norm": 1.9282746315002441, "learning_rate": 1.8165172254210658e-05, "loss": 0.4137, "step": 3484 }, { "epoch": 0.42409491937937327, "grad_norm": 3.0479226112365723, "learning_rate": 1.8164045922127e-05, "loss": 0.4864, "step": 3485 }, { "epoch": 0.42421661089139034, "grad_norm": 1.064579963684082, "learning_rate": 1.816291927938449e-05, "loss": 0.469, "step": 3486 }, { "epoch": 0.42433830240340736, "grad_norm": 2.6730575561523438, "learning_rate": 1.816179232602601e-05, "loss": 0.4854, "step": 3487 }, { "epoch": 0.4244599939154244, "grad_norm": 0.7642972469329834, "learning_rate": 1.816066506209444e-05, "loss": 0.4122, "step": 3488 }, { "epoch": 0.42458168542744146, "grad_norm": 2.3168811798095703, "learning_rate": 1.815953748763267e-05, "loss": 0.4776, "step": 3489 }, { "epoch": 0.4247033769394585, "grad_norm": 1.2830462455749512, "learning_rate": 1.815840960268361e-05, "loss": 0.5178, "step": 3490 }, { "epoch": 0.4248250684514755, "grad_norm": 2.7923495769500732, "learning_rate": 1.8157281407290176e-05, "loss": 0.4382, "step": 3491 }, { "epoch": 0.42494675996349257, "grad_norm": 1.8422168493270874, "learning_rate": 1.8156152901495302e-05, "loss": 0.5148, "step": 3492 }, { "epoch": 0.4250684514755096, "grad_norm": 3.1832783222198486, "learning_rate": 1.8155024085341924e-05, "loss": 0.4743, "step": 3493 }, { "epoch": 0.4251901429875266, "grad_norm": 4.278620719909668, "learning_rate": 1.8153894958873005e-05, "loss": 0.4392, "step": 3494 }, { "epoch": 0.4253118344995437, "grad_norm": 5.984546661376953, "learning_rate": 1.8152765522131503e-05, "loss": 0.4783, "step": 3495 }, { "epoch": 0.4254335260115607, "grad_norm": 2.6696994304656982, "learning_rate": 1.8151635775160396e-05, "loss": 0.5193, "step": 3496 }, { "epoch": 0.4255552175235777, "grad_norm": 1.6294728517532349, "learning_rate": 1.8150505718002676e-05, "loss": 0.4854, "step": 3497 }, { "epoch": 0.42567690903559474, "grad_norm": 1.8985587358474731, "learning_rate": 1.814937535070134e-05, "loss": 0.5055, "step": 3498 }, { "epoch": 0.4257986005476118, "grad_norm": 1.1241909265518188, "learning_rate": 1.814824467329941e-05, "loss": 0.5013, "step": 3499 }, { "epoch": 0.42592029205962884, "grad_norm": 0.8562899827957153, "learning_rate": 1.8147113685839897e-05, "loss": 0.4742, "step": 3500 }, { "epoch": 0.42604198357164585, "grad_norm": 0.9550133347511292, "learning_rate": 1.814598238836585e-05, "loss": 0.4552, "step": 3501 }, { "epoch": 0.42616367508366293, "grad_norm": 0.7729173898696899, "learning_rate": 1.814485078092031e-05, "loss": 0.4681, "step": 3502 }, { "epoch": 0.42628536659567995, "grad_norm": 0.6184672713279724, "learning_rate": 1.814371886354634e-05, "loss": 0.4711, "step": 3503 }, { "epoch": 0.42640705810769697, "grad_norm": 1.3956339359283447, "learning_rate": 1.8142586636287006e-05, "loss": 0.4401, "step": 3504 }, { "epoch": 0.42652874961971404, "grad_norm": 1.0720890760421753, "learning_rate": 1.8141454099185403e-05, "loss": 0.4321, "step": 3505 }, { "epoch": 0.42665044113173106, "grad_norm": 1.84479558467865, "learning_rate": 1.8140321252284617e-05, "loss": 0.4683, "step": 3506 }, { "epoch": 0.4267721326437481, "grad_norm": 2.46502685546875, "learning_rate": 1.8139188095627758e-05, "loss": 0.4028, "step": 3507 }, { "epoch": 0.42689382415576516, "grad_norm": 0.7737486958503723, "learning_rate": 1.8138054629257943e-05, "loss": 0.5058, "step": 3508 }, { "epoch": 0.4270155156677822, "grad_norm": 2.7997729778289795, "learning_rate": 1.8136920853218308e-05, "loss": 0.4604, "step": 3509 }, { "epoch": 0.4271372071797992, "grad_norm": 1.1051172018051147, "learning_rate": 1.813578676755199e-05, "loss": 0.523, "step": 3510 }, { "epoch": 0.42725889869181627, "grad_norm": 4.384588718414307, "learning_rate": 1.813465237230214e-05, "loss": 0.4176, "step": 3511 }, { "epoch": 0.4273805902038333, "grad_norm": 1.8338876962661743, "learning_rate": 1.8133517667511936e-05, "loss": 0.4727, "step": 3512 }, { "epoch": 0.4275022817158503, "grad_norm": 0.812783420085907, "learning_rate": 1.813238265322455e-05, "loss": 0.4329, "step": 3513 }, { "epoch": 0.4276239732278674, "grad_norm": 1.7692828178405762, "learning_rate": 1.8131247329483167e-05, "loss": 0.4753, "step": 3514 }, { "epoch": 0.4277456647398844, "grad_norm": 4.737448692321777, "learning_rate": 1.8130111696330995e-05, "loss": 0.5285, "step": 3515 }, { "epoch": 0.4278673562519014, "grad_norm": 4.863689422607422, "learning_rate": 1.8128975753811242e-05, "loss": 0.5593, "step": 3516 }, { "epoch": 0.42798904776391844, "grad_norm": 2.1955201625823975, "learning_rate": 1.812783950196714e-05, "loss": 0.4665, "step": 3517 }, { "epoch": 0.4281107392759355, "grad_norm": 2.6062629222869873, "learning_rate": 1.8126702940841917e-05, "loss": 0.4715, "step": 3518 }, { "epoch": 0.42823243078795253, "grad_norm": 2.4371426105499268, "learning_rate": 1.8125566070478825e-05, "loss": 0.3948, "step": 3519 }, { "epoch": 0.42835412229996955, "grad_norm": 0.6621604561805725, "learning_rate": 1.8124428890921128e-05, "loss": 0.4342, "step": 3520 }, { "epoch": 0.4284758138119866, "grad_norm": 0.9522155523300171, "learning_rate": 1.8123291402212096e-05, "loss": 0.486, "step": 3521 }, { "epoch": 0.42859750532400365, "grad_norm": 0.8200867176055908, "learning_rate": 1.812215360439501e-05, "loss": 0.4563, "step": 3522 }, { "epoch": 0.42871919683602067, "grad_norm": 0.7225762605667114, "learning_rate": 1.8121015497513166e-05, "loss": 0.4863, "step": 3523 }, { "epoch": 0.42884088834803774, "grad_norm": 0.9208165407180786, "learning_rate": 1.8119877081609876e-05, "loss": 0.4696, "step": 3524 }, { "epoch": 0.42896257986005476, "grad_norm": 2.1651906967163086, "learning_rate": 1.811873835672845e-05, "loss": 0.4358, "step": 3525 }, { "epoch": 0.4290842713720718, "grad_norm": 1.9516147375106812, "learning_rate": 1.811759932291223e-05, "loss": 0.5196, "step": 3526 }, { "epoch": 0.42920596288408885, "grad_norm": 2.269613742828369, "learning_rate": 1.811645998020455e-05, "loss": 0.3981, "step": 3527 }, { "epoch": 0.4293276543961059, "grad_norm": 1.2206636667251587, "learning_rate": 1.8115320328648774e-05, "loss": 0.453, "step": 3528 }, { "epoch": 0.4294493459081229, "grad_norm": 0.9070826172828674, "learning_rate": 1.8114180368288257e-05, "loss": 0.4432, "step": 3529 }, { "epoch": 0.42957103742013997, "grad_norm": 0.7746086120605469, "learning_rate": 1.8113040099166383e-05, "loss": 0.4246, "step": 3530 }, { "epoch": 0.429692728932157, "grad_norm": 3.6724777221679688, "learning_rate": 1.8111899521326535e-05, "loss": 0.5331, "step": 3531 }, { "epoch": 0.429814420444174, "grad_norm": 2.6812140941619873, "learning_rate": 1.8110758634812123e-05, "loss": 0.4455, "step": 3532 }, { "epoch": 0.4299361119561911, "grad_norm": 1.895676851272583, "learning_rate": 1.810961743966656e-05, "loss": 0.4869, "step": 3533 }, { "epoch": 0.4300578034682081, "grad_norm": 4.820573806762695, "learning_rate": 1.8108475935933263e-05, "loss": 0.5522, "step": 3534 }, { "epoch": 0.4301794949802251, "grad_norm": 0.795985221862793, "learning_rate": 1.8107334123655675e-05, "loss": 0.4574, "step": 3535 }, { "epoch": 0.4303011864922422, "grad_norm": 2.154789686203003, "learning_rate": 1.8106192002877243e-05, "loss": 0.5156, "step": 3536 }, { "epoch": 0.4304228780042592, "grad_norm": 0.7947393655776978, "learning_rate": 1.8105049573641423e-05, "loss": 0.4721, "step": 3537 }, { "epoch": 0.43054456951627623, "grad_norm": 2.2035183906555176, "learning_rate": 1.8103906835991694e-05, "loss": 0.4401, "step": 3538 }, { "epoch": 0.43066626102829325, "grad_norm": 1.8686167001724243, "learning_rate": 1.8102763789971534e-05, "loss": 0.4722, "step": 3539 }, { "epoch": 0.4307879525403103, "grad_norm": 4.780008316040039, "learning_rate": 1.810162043562444e-05, "loss": 0.3956, "step": 3540 }, { "epoch": 0.43090964405232735, "grad_norm": 2.7494750022888184, "learning_rate": 1.8100476772993918e-05, "loss": 0.4507, "step": 3541 }, { "epoch": 0.43103133556434436, "grad_norm": 0.79237961769104, "learning_rate": 1.809933280212349e-05, "loss": 0.4862, "step": 3542 }, { "epoch": 0.43115302707636144, "grad_norm": 2.263197660446167, "learning_rate": 1.8098188523056685e-05, "loss": 0.4596, "step": 3543 }, { "epoch": 0.43127471858837846, "grad_norm": 2.3083460330963135, "learning_rate": 1.809704393583704e-05, "loss": 0.3967, "step": 3544 }, { "epoch": 0.4313964101003955, "grad_norm": 0.7695382833480835, "learning_rate": 1.8095899040508115e-05, "loss": 0.4085, "step": 3545 }, { "epoch": 0.43151810161241255, "grad_norm": 1.6725908517837524, "learning_rate": 1.8094753837113473e-05, "loss": 0.4756, "step": 3546 }, { "epoch": 0.43163979312442957, "grad_norm": 3.4811530113220215, "learning_rate": 1.8093608325696695e-05, "loss": 0.5039, "step": 3547 }, { "epoch": 0.4317614846364466, "grad_norm": 1.961618185043335, "learning_rate": 1.8092462506301367e-05, "loss": 0.4096, "step": 3548 }, { "epoch": 0.43188317614846367, "grad_norm": 3.5613763332366943, "learning_rate": 1.809131637897109e-05, "loss": 0.4797, "step": 3549 }, { "epoch": 0.4320048676604807, "grad_norm": 0.792500376701355, "learning_rate": 1.8090169943749477e-05, "loss": 0.428, "step": 3550 }, { "epoch": 0.4321265591724977, "grad_norm": 0.8221563100814819, "learning_rate": 1.8089023200680152e-05, "loss": 0.4576, "step": 3551 }, { "epoch": 0.4322482506845148, "grad_norm": 1.3412209749221802, "learning_rate": 1.8087876149806746e-05, "loss": 0.4646, "step": 3552 }, { "epoch": 0.4323699421965318, "grad_norm": 1.2393990755081177, "learning_rate": 1.8086728791172917e-05, "loss": 0.5035, "step": 3553 }, { "epoch": 0.4324916337085488, "grad_norm": 1.7637604475021362, "learning_rate": 1.808558112482232e-05, "loss": 0.423, "step": 3554 }, { "epoch": 0.4326133252205659, "grad_norm": 1.3344647884368896, "learning_rate": 1.808443315079862e-05, "loss": 0.5133, "step": 3555 }, { "epoch": 0.4327350167325829, "grad_norm": 0.5926569700241089, "learning_rate": 1.8083284869145504e-05, "loss": 0.48, "step": 3556 }, { "epoch": 0.43285670824459993, "grad_norm": 1.8394889831542969, "learning_rate": 1.8082136279906672e-05, "loss": 0.4413, "step": 3557 }, { "epoch": 0.43297839975661695, "grad_norm": 0.5895689129829407, "learning_rate": 1.8080987383125826e-05, "loss": 0.4586, "step": 3558 }, { "epoch": 0.433100091268634, "grad_norm": 3.8162670135498047, "learning_rate": 1.8079838178846677e-05, "loss": 0.4662, "step": 3559 }, { "epoch": 0.43322178278065104, "grad_norm": 0.640853762626648, "learning_rate": 1.8078688667112965e-05, "loss": 0.5245, "step": 3560 }, { "epoch": 0.43334347429266806, "grad_norm": 1.9288088083267212, "learning_rate": 1.807753884796843e-05, "loss": 0.4369, "step": 3561 }, { "epoch": 0.43346516580468514, "grad_norm": 0.737969696521759, "learning_rate": 1.807638872145682e-05, "loss": 0.4464, "step": 3562 }, { "epoch": 0.43358685731670216, "grad_norm": 4.160451412200928, "learning_rate": 1.80752382876219e-05, "loss": 0.5351, "step": 3563 }, { "epoch": 0.4337085488287192, "grad_norm": 1.9113166332244873, "learning_rate": 1.8074087546507448e-05, "loss": 0.4893, "step": 3564 }, { "epoch": 0.43383024034073625, "grad_norm": 2.4951300621032715, "learning_rate": 1.8072936498157254e-05, "loss": 0.4872, "step": 3565 }, { "epoch": 0.43395193185275327, "grad_norm": 3.5019259452819824, "learning_rate": 1.8071785142615118e-05, "loss": 0.5351, "step": 3566 }, { "epoch": 0.4340736233647703, "grad_norm": 1.509994387626648, "learning_rate": 1.807063347992485e-05, "loss": 0.5239, "step": 3567 }, { "epoch": 0.43419531487678736, "grad_norm": 4.691476821899414, "learning_rate": 1.806948151013027e-05, "loss": 0.4274, "step": 3568 }, { "epoch": 0.4343170063888044, "grad_norm": 2.923957586288452, "learning_rate": 1.8068329233275215e-05, "loss": 0.4908, "step": 3569 }, { "epoch": 0.4344386979008214, "grad_norm": 4.793639659881592, "learning_rate": 1.8067176649403535e-05, "loss": 0.4861, "step": 3570 }, { "epoch": 0.4345603894128385, "grad_norm": 3.9228663444519043, "learning_rate": 1.806602375855908e-05, "loss": 0.4801, "step": 3571 }, { "epoch": 0.4346820809248555, "grad_norm": 3.6681878566741943, "learning_rate": 1.8064870560785734e-05, "loss": 0.4598, "step": 3572 }, { "epoch": 0.4348037724368725, "grad_norm": 1.9317867755889893, "learning_rate": 1.8063717056127362e-05, "loss": 0.4952, "step": 3573 }, { "epoch": 0.4349254639488896, "grad_norm": 2.479525327682495, "learning_rate": 1.806256324462787e-05, "loss": 0.4097, "step": 3574 }, { "epoch": 0.4350471554609066, "grad_norm": 0.7312712073326111, "learning_rate": 1.8061409126331157e-05, "loss": 0.4804, "step": 3575 }, { "epoch": 0.43516884697292363, "grad_norm": 1.7317736148834229, "learning_rate": 1.8060254701281137e-05, "loss": 0.4731, "step": 3576 }, { "epoch": 0.43529053848494065, "grad_norm": 2.882720470428467, "learning_rate": 1.8059099969521747e-05, "loss": 0.5266, "step": 3577 }, { "epoch": 0.4354122299969577, "grad_norm": 0.724238932132721, "learning_rate": 1.8057944931096914e-05, "loss": 0.3851, "step": 3578 }, { "epoch": 0.43553392150897474, "grad_norm": 3.183248281478882, "learning_rate": 1.8056789586050603e-05, "loss": 0.5077, "step": 3579 }, { "epoch": 0.43565561302099176, "grad_norm": 1.8994688987731934, "learning_rate": 1.805563393442677e-05, "loss": 0.4547, "step": 3580 }, { "epoch": 0.43577730453300884, "grad_norm": 3.0137557983398438, "learning_rate": 1.8054477976269394e-05, "loss": 0.4719, "step": 3581 }, { "epoch": 0.43589899604502585, "grad_norm": 2.221926212310791, "learning_rate": 1.805332171162246e-05, "loss": 0.5107, "step": 3582 }, { "epoch": 0.4360206875570429, "grad_norm": 2.2234017848968506, "learning_rate": 1.805216514052996e-05, "loss": 0.4966, "step": 3583 }, { "epoch": 0.43614237906905995, "grad_norm": 4.759458065032959, "learning_rate": 1.8051008263035913e-05, "loss": 0.5852, "step": 3584 }, { "epoch": 0.43626407058107697, "grad_norm": 3.5544915199279785, "learning_rate": 1.8049851079184337e-05, "loss": 0.4546, "step": 3585 }, { "epoch": 0.436385762093094, "grad_norm": 3.342607259750366, "learning_rate": 1.8048693589019267e-05, "loss": 0.5096, "step": 3586 }, { "epoch": 0.43650745360511106, "grad_norm": 6.241463661193848, "learning_rate": 1.804753579258474e-05, "loss": 0.4798, "step": 3587 }, { "epoch": 0.4366291451171281, "grad_norm": 5.943205833435059, "learning_rate": 1.8046377689924823e-05, "loss": 0.5163, "step": 3588 }, { "epoch": 0.4367508366291451, "grad_norm": 4.17158842086792, "learning_rate": 1.804521928108358e-05, "loss": 0.5011, "step": 3589 }, { "epoch": 0.4368725281411622, "grad_norm": 5.012595176696777, "learning_rate": 1.8044060566105088e-05, "loss": 0.4918, "step": 3590 }, { "epoch": 0.4369942196531792, "grad_norm": 2.1947054862976074, "learning_rate": 1.8042901545033446e-05, "loss": 0.5261, "step": 3591 }, { "epoch": 0.4371159111651962, "grad_norm": 3.3039352893829346, "learning_rate": 1.8041742217912746e-05, "loss": 0.4359, "step": 3592 }, { "epoch": 0.4372376026772133, "grad_norm": 0.761996865272522, "learning_rate": 1.804058258478711e-05, "loss": 0.4397, "step": 3593 }, { "epoch": 0.4373592941892303, "grad_norm": 1.6251555681228638, "learning_rate": 1.8039422645700672e-05, "loss": 0.4399, "step": 3594 }, { "epoch": 0.4374809857012473, "grad_norm": 3.0419235229492188, "learning_rate": 1.8038262400697554e-05, "loss": 0.4908, "step": 3595 }, { "epoch": 0.4376026772132644, "grad_norm": 2.6626012325286865, "learning_rate": 1.803710184982191e-05, "loss": 0.4483, "step": 3596 }, { "epoch": 0.4377243687252814, "grad_norm": 5.830317497253418, "learning_rate": 1.803594099311791e-05, "loss": 0.5787, "step": 3597 }, { "epoch": 0.43784606023729844, "grad_norm": 0.9365830421447754, "learning_rate": 1.8034779830629725e-05, "loss": 0.4021, "step": 3598 }, { "epoch": 0.43796775174931546, "grad_norm": 1.5541876554489136, "learning_rate": 1.803361836240153e-05, "loss": 0.4361, "step": 3599 }, { "epoch": 0.43808944326133253, "grad_norm": 1.2261126041412354, "learning_rate": 1.8032456588477533e-05, "loss": 0.4349, "step": 3600 }, { "epoch": 0.43821113477334955, "grad_norm": 1.5325047969818115, "learning_rate": 1.803129450890193e-05, "loss": 0.5074, "step": 3601 }, { "epoch": 0.4383328262853666, "grad_norm": 0.6190427541732788, "learning_rate": 1.8030132123718953e-05, "loss": 0.4453, "step": 3602 }, { "epoch": 0.43845451779738365, "grad_norm": 3.680974245071411, "learning_rate": 1.8028969432972826e-05, "loss": 0.4604, "step": 3603 }, { "epoch": 0.43857620930940067, "grad_norm": 2.0287678241729736, "learning_rate": 1.8027806436707792e-05, "loss": 0.4754, "step": 3604 }, { "epoch": 0.4386979008214177, "grad_norm": 2.9809305667877197, "learning_rate": 1.8026643134968106e-05, "loss": 0.4819, "step": 3605 }, { "epoch": 0.43881959233343476, "grad_norm": 6.6440863609313965, "learning_rate": 1.802547952779803e-05, "loss": 0.4978, "step": 3606 }, { "epoch": 0.4389412838454518, "grad_norm": 2.136735200881958, "learning_rate": 1.8024315615241853e-05, "loss": 0.4557, "step": 3607 }, { "epoch": 0.4390629753574688, "grad_norm": 3.9290289878845215, "learning_rate": 1.8023151397343857e-05, "loss": 0.4094, "step": 3608 }, { "epoch": 0.4391846668694859, "grad_norm": 1.2914810180664062, "learning_rate": 1.8021986874148342e-05, "loss": 0.4297, "step": 3609 }, { "epoch": 0.4393063583815029, "grad_norm": 2.9472551345825195, "learning_rate": 1.8020822045699622e-05, "loss": 0.4975, "step": 3610 }, { "epoch": 0.4394280498935199, "grad_norm": 0.6643158793449402, "learning_rate": 1.801965691204202e-05, "loss": 0.3759, "step": 3611 }, { "epoch": 0.439549741405537, "grad_norm": 2.8930108547210693, "learning_rate": 1.8018491473219875e-05, "loss": 0.5077, "step": 3612 }, { "epoch": 0.439671432917554, "grad_norm": 1.9661660194396973, "learning_rate": 1.8017325729277532e-05, "loss": 0.4441, "step": 3613 }, { "epoch": 0.439793124429571, "grad_norm": 4.3794941902160645, "learning_rate": 1.8016159680259346e-05, "loss": 0.4921, "step": 3614 }, { "epoch": 0.4399148159415881, "grad_norm": 3.2524712085723877, "learning_rate": 1.8014993326209695e-05, "loss": 0.4702, "step": 3615 }, { "epoch": 0.4400365074536051, "grad_norm": 1.2992031574249268, "learning_rate": 1.8013826667172956e-05, "loss": 0.4833, "step": 3616 }, { "epoch": 0.44015819896562214, "grad_norm": 1.4849412441253662, "learning_rate": 1.8012659703193528e-05, "loss": 0.4682, "step": 3617 }, { "epoch": 0.44027989047763916, "grad_norm": 1.713822364807129, "learning_rate": 1.801149243431581e-05, "loss": 0.4883, "step": 3618 }, { "epoch": 0.44040158198965623, "grad_norm": 1.120497465133667, "learning_rate": 1.801032486058422e-05, "loss": 0.4824, "step": 3619 }, { "epoch": 0.44052327350167325, "grad_norm": 3.9051904678344727, "learning_rate": 1.8009156982043192e-05, "loss": 0.4432, "step": 3620 }, { "epoch": 0.44064496501369027, "grad_norm": 4.045989990234375, "learning_rate": 1.8007988798737163e-05, "loss": 0.4757, "step": 3621 }, { "epoch": 0.44076665652570735, "grad_norm": 4.53957986831665, "learning_rate": 1.8006820310710584e-05, "loss": 0.4469, "step": 3622 }, { "epoch": 0.44088834803772436, "grad_norm": 1.0481483936309814, "learning_rate": 1.8005651518007917e-05, "loss": 0.5475, "step": 3623 }, { "epoch": 0.4410100395497414, "grad_norm": 2.4604034423828125, "learning_rate": 1.8004482420673642e-05, "loss": 0.521, "step": 3624 }, { "epoch": 0.44113173106175846, "grad_norm": 3.3843867778778076, "learning_rate": 1.800331301875224e-05, "loss": 0.4371, "step": 3625 }, { "epoch": 0.4412534225737755, "grad_norm": 3.9853744506835938, "learning_rate": 1.8002143312288213e-05, "loss": 0.4224, "step": 3626 }, { "epoch": 0.4413751140857925, "grad_norm": 1.23611581325531, "learning_rate": 1.8000973301326068e-05, "loss": 0.4731, "step": 3627 }, { "epoch": 0.44149680559780957, "grad_norm": 1.6350594758987427, "learning_rate": 1.799980298591033e-05, "loss": 0.495, "step": 3628 }, { "epoch": 0.4416184971098266, "grad_norm": 3.6791670322418213, "learning_rate": 1.7998632366085527e-05, "loss": 0.4994, "step": 3629 }, { "epoch": 0.4417401886218436, "grad_norm": 3.62467098236084, "learning_rate": 1.799746144189621e-05, "loss": 0.5047, "step": 3630 }, { "epoch": 0.4418618801338607, "grad_norm": 2.546487331390381, "learning_rate": 1.7996290213386927e-05, "loss": 0.4929, "step": 3631 }, { "epoch": 0.4419835716458777, "grad_norm": 1.99821138381958, "learning_rate": 1.799511868060225e-05, "loss": 0.3992, "step": 3632 }, { "epoch": 0.4421052631578947, "grad_norm": 2.085075855255127, "learning_rate": 1.7993946843586766e-05, "loss": 0.4839, "step": 3633 }, { "epoch": 0.4422269546699118, "grad_norm": 0.5979752540588379, "learning_rate": 1.799277470238505e-05, "loss": 0.4611, "step": 3634 }, { "epoch": 0.4423486461819288, "grad_norm": 1.6199902296066284, "learning_rate": 1.7991602257041717e-05, "loss": 0.4078, "step": 3635 }, { "epoch": 0.44247033769394584, "grad_norm": 0.978445291519165, "learning_rate": 1.799042950760137e-05, "loss": 0.4959, "step": 3636 }, { "epoch": 0.4425920292059629, "grad_norm": 0.7537236213684082, "learning_rate": 1.7989256454108644e-05, "loss": 0.4696, "step": 3637 }, { "epoch": 0.44271372071797993, "grad_norm": 2.1018757820129395, "learning_rate": 1.798808309660818e-05, "loss": 0.4545, "step": 3638 }, { "epoch": 0.44283541222999695, "grad_norm": 0.9911224246025085, "learning_rate": 1.798690943514461e-05, "loss": 0.4297, "step": 3639 }, { "epoch": 0.44295710374201397, "grad_norm": 1.6557685136795044, "learning_rate": 1.7985735469762608e-05, "loss": 0.4334, "step": 3640 }, { "epoch": 0.44307879525403104, "grad_norm": 1.01318359375, "learning_rate": 1.798456120050684e-05, "loss": 0.4271, "step": 3641 }, { "epoch": 0.44320048676604806, "grad_norm": 1.455711841583252, "learning_rate": 1.7983386627421996e-05, "loss": 0.4834, "step": 3642 }, { "epoch": 0.4433221782780651, "grad_norm": 1.481998324394226, "learning_rate": 1.7982211750552763e-05, "loss": 0.4819, "step": 3643 }, { "epoch": 0.44344386979008216, "grad_norm": 1.1394925117492676, "learning_rate": 1.798103656994385e-05, "loss": 0.4671, "step": 3644 }, { "epoch": 0.4435655613020992, "grad_norm": 3.476978302001953, "learning_rate": 1.7979861085639975e-05, "loss": 0.5468, "step": 3645 }, { "epoch": 0.4436872528141162, "grad_norm": 0.9046595692634583, "learning_rate": 1.797868529768587e-05, "loss": 0.4951, "step": 3646 }, { "epoch": 0.44380894432613327, "grad_norm": 0.703719437122345, "learning_rate": 1.7977509206126274e-05, "loss": 0.4727, "step": 3647 }, { "epoch": 0.4439306358381503, "grad_norm": 1.4390543699264526, "learning_rate": 1.7976332811005943e-05, "loss": 0.5164, "step": 3648 }, { "epoch": 0.4440523273501673, "grad_norm": 5.06640100479126, "learning_rate": 1.7975156112369634e-05, "loss": 0.4624, "step": 3649 }, { "epoch": 0.4441740188621844, "grad_norm": 2.183990955352783, "learning_rate": 1.797397911026213e-05, "loss": 0.4968, "step": 3650 }, { "epoch": 0.4442957103742014, "grad_norm": 3.0395090579986572, "learning_rate": 1.7972801804728213e-05, "loss": 0.4543, "step": 3651 }, { "epoch": 0.4444174018862184, "grad_norm": 0.6118097305297852, "learning_rate": 1.7971624195812687e-05, "loss": 0.4827, "step": 3652 }, { "epoch": 0.4445390933982355, "grad_norm": 1.3973321914672852, "learning_rate": 1.7970446283560358e-05, "loss": 0.5065, "step": 3653 }, { "epoch": 0.4446607849102525, "grad_norm": 0.7725859880447388, "learning_rate": 1.796926806801605e-05, "loss": 0.509, "step": 3654 }, { "epoch": 0.44478247642226953, "grad_norm": 1.0097979307174683, "learning_rate": 1.7968089549224598e-05, "loss": 0.4639, "step": 3655 }, { "epoch": 0.4449041679342866, "grad_norm": 2.128896713256836, "learning_rate": 1.7966910727230846e-05, "loss": 0.4611, "step": 3656 }, { "epoch": 0.44502585944630363, "grad_norm": 1.722802758216858, "learning_rate": 1.796573160207965e-05, "loss": 0.5142, "step": 3657 }, { "epoch": 0.44514755095832065, "grad_norm": 2.6929211616516113, "learning_rate": 1.796455217381588e-05, "loss": 0.5164, "step": 3658 }, { "epoch": 0.44526924247033767, "grad_norm": 1.578313946723938, "learning_rate": 1.796337244248441e-05, "loss": 0.4304, "step": 3659 }, { "epoch": 0.44539093398235474, "grad_norm": 2.301638126373291, "learning_rate": 1.7962192408130137e-05, "loss": 0.4424, "step": 3660 }, { "epoch": 0.44551262549437176, "grad_norm": 1.1318200826644897, "learning_rate": 1.7961012070797964e-05, "loss": 0.46, "step": 3661 }, { "epoch": 0.4456343170063888, "grad_norm": 0.882349967956543, "learning_rate": 1.79598314305328e-05, "loss": 0.4947, "step": 3662 }, { "epoch": 0.44575600851840586, "grad_norm": 1.035900354385376, "learning_rate": 1.7958650487379574e-05, "loss": 0.4493, "step": 3663 }, { "epoch": 0.4458777000304229, "grad_norm": 4.451942443847656, "learning_rate": 1.795746924138323e-05, "loss": 0.5493, "step": 3664 }, { "epoch": 0.4459993915424399, "grad_norm": 1.1115249395370483, "learning_rate": 1.7956287692588704e-05, "loss": 0.4852, "step": 3665 }, { "epoch": 0.44612108305445697, "grad_norm": 2.1183042526245117, "learning_rate": 1.7955105841040962e-05, "loss": 0.4061, "step": 3666 }, { "epoch": 0.446242774566474, "grad_norm": 1.7583872079849243, "learning_rate": 1.7953923686784977e-05, "loss": 0.4558, "step": 3667 }, { "epoch": 0.446364466078491, "grad_norm": 0.9607902765274048, "learning_rate": 1.7952741229865735e-05, "loss": 0.4705, "step": 3668 }, { "epoch": 0.4464861575905081, "grad_norm": 1.133246898651123, "learning_rate": 1.795155847032823e-05, "loss": 0.462, "step": 3669 }, { "epoch": 0.4466078491025251, "grad_norm": 1.0391075611114502, "learning_rate": 1.795037540821746e-05, "loss": 0.5168, "step": 3670 }, { "epoch": 0.4467295406145421, "grad_norm": 1.3781741857528687, "learning_rate": 1.794919204357845e-05, "loss": 0.4272, "step": 3671 }, { "epoch": 0.4468512321265592, "grad_norm": 0.6506280303001404, "learning_rate": 1.7948008376456235e-05, "loss": 0.48, "step": 3672 }, { "epoch": 0.4469729236385762, "grad_norm": 0.892476499080658, "learning_rate": 1.7946824406895845e-05, "loss": 0.4561, "step": 3673 }, { "epoch": 0.44709461515059323, "grad_norm": 1.248900294303894, "learning_rate": 1.7945640134942336e-05, "loss": 0.5008, "step": 3674 }, { "epoch": 0.4472163066626103, "grad_norm": 1.0760215520858765, "learning_rate": 1.7944455560640775e-05, "loss": 0.4878, "step": 3675 }, { "epoch": 0.4473379981746273, "grad_norm": 3.092639684677124, "learning_rate": 1.794327068403624e-05, "loss": 0.3874, "step": 3676 }, { "epoch": 0.44745968968664435, "grad_norm": 1.7849409580230713, "learning_rate": 1.794208550517381e-05, "loss": 0.4354, "step": 3677 }, { "epoch": 0.4475813811986614, "grad_norm": 1.5701935291290283, "learning_rate": 1.7940900024098585e-05, "loss": 0.4115, "step": 3678 }, { "epoch": 0.44770307271067844, "grad_norm": 1.1873188018798828, "learning_rate": 1.793971424085568e-05, "loss": 0.4755, "step": 3679 }, { "epoch": 0.44782476422269546, "grad_norm": 1.8533138036727905, "learning_rate": 1.7938528155490215e-05, "loss": 0.4706, "step": 3680 }, { "epoch": 0.4479464557347125, "grad_norm": 3.065023422241211, "learning_rate": 1.7937341768047322e-05, "loss": 0.504, "step": 3681 }, { "epoch": 0.44806814724672955, "grad_norm": 1.467176914215088, "learning_rate": 1.793615507857214e-05, "loss": 0.4483, "step": 3682 }, { "epoch": 0.4481898387587466, "grad_norm": 4.730821132659912, "learning_rate": 1.7934968087109837e-05, "loss": 0.5012, "step": 3683 }, { "epoch": 0.4483115302707636, "grad_norm": 0.5738435387611389, "learning_rate": 1.7933780793705572e-05, "loss": 0.4191, "step": 3684 }, { "epoch": 0.44843322178278067, "grad_norm": 0.4980515241622925, "learning_rate": 1.7932593198404524e-05, "loss": 0.4294, "step": 3685 }, { "epoch": 0.4485549132947977, "grad_norm": 2.7868967056274414, "learning_rate": 1.7931405301251885e-05, "loss": 0.4866, "step": 3686 }, { "epoch": 0.4486766048068147, "grad_norm": 1.3674237728118896, "learning_rate": 1.793021710229286e-05, "loss": 0.4485, "step": 3687 }, { "epoch": 0.4487982963188318, "grad_norm": 1.4269791841506958, "learning_rate": 1.7929028601572656e-05, "loss": 0.5182, "step": 3688 }, { "epoch": 0.4489199878308488, "grad_norm": 2.5022637844085693, "learning_rate": 1.7927839799136505e-05, "loss": 0.4469, "step": 3689 }, { "epoch": 0.4490416793428658, "grad_norm": 1.9184142351150513, "learning_rate": 1.7926650695029635e-05, "loss": 0.4679, "step": 3690 }, { "epoch": 0.4491633708548829, "grad_norm": 0.9711014032363892, "learning_rate": 1.7925461289297303e-05, "loss": 0.4837, "step": 3691 }, { "epoch": 0.4492850623668999, "grad_norm": 0.6276342868804932, "learning_rate": 1.7924271581984762e-05, "loss": 0.4912, "step": 3692 }, { "epoch": 0.44940675387891693, "grad_norm": 0.5797700881958008, "learning_rate": 1.7923081573137287e-05, "loss": 0.5034, "step": 3693 }, { "epoch": 0.449528445390934, "grad_norm": 2.8033759593963623, "learning_rate": 1.7921891262800158e-05, "loss": 0.445, "step": 3694 }, { "epoch": 0.449650136902951, "grad_norm": 1.6636983156204224, "learning_rate": 1.7920700651018667e-05, "loss": 0.4711, "step": 3695 }, { "epoch": 0.44977182841496804, "grad_norm": 3.3158512115478516, "learning_rate": 1.7919509737838123e-05, "loss": 0.4434, "step": 3696 }, { "epoch": 0.4498935199269851, "grad_norm": 1.4207450151443481, "learning_rate": 1.7918318523303843e-05, "loss": 0.4615, "step": 3697 }, { "epoch": 0.45001521143900214, "grad_norm": 2.2780520915985107, "learning_rate": 1.791712700746115e-05, "loss": 0.5064, "step": 3698 }, { "epoch": 0.45013690295101916, "grad_norm": 2.2248661518096924, "learning_rate": 1.7915935190355386e-05, "loss": 0.4462, "step": 3699 }, { "epoch": 0.4502585944630362, "grad_norm": 0.8618022203445435, "learning_rate": 1.7914743072031906e-05, "loss": 0.3969, "step": 3700 }, { "epoch": 0.45038028597505325, "grad_norm": 1.1378401517868042, "learning_rate": 1.791355065253607e-05, "loss": 0.3672, "step": 3701 }, { "epoch": 0.45050197748707027, "grad_norm": 4.653499126434326, "learning_rate": 1.7912357931913245e-05, "loss": 0.494, "step": 3702 }, { "epoch": 0.4506236689990873, "grad_norm": 2.465895652770996, "learning_rate": 1.791116491020883e-05, "loss": 0.4396, "step": 3703 }, { "epoch": 0.45074536051110436, "grad_norm": 2.1950838565826416, "learning_rate": 1.7909971587468212e-05, "loss": 0.4512, "step": 3704 }, { "epoch": 0.4508670520231214, "grad_norm": 3.510770320892334, "learning_rate": 1.7908777963736802e-05, "loss": 0.4879, "step": 3705 }, { "epoch": 0.4509887435351384, "grad_norm": 1.9464043378829956, "learning_rate": 1.790758403906002e-05, "loss": 0.4959, "step": 3706 }, { "epoch": 0.4511104350471555, "grad_norm": 1.292353630065918, "learning_rate": 1.79063898134833e-05, "loss": 0.4514, "step": 3707 }, { "epoch": 0.4512321265591725, "grad_norm": 0.5941998362541199, "learning_rate": 1.790519528705208e-05, "loss": 0.4442, "step": 3708 }, { "epoch": 0.4513538180711895, "grad_norm": 1.889320969581604, "learning_rate": 1.790400045981182e-05, "loss": 0.449, "step": 3709 }, { "epoch": 0.4514755095832066, "grad_norm": 1.416853666305542, "learning_rate": 1.7902805331807977e-05, "loss": 0.5016, "step": 3710 }, { "epoch": 0.4515972010952236, "grad_norm": 0.5840568542480469, "learning_rate": 1.7901609903086036e-05, "loss": 0.4585, "step": 3711 }, { "epoch": 0.45171889260724063, "grad_norm": 2.6656131744384766, "learning_rate": 1.7900414173691482e-05, "loss": 0.4889, "step": 3712 }, { "epoch": 0.4518405841192577, "grad_norm": 1.2066540718078613, "learning_rate": 1.7899218143669817e-05, "loss": 0.4516, "step": 3713 }, { "epoch": 0.4519622756312747, "grad_norm": 0.5673147439956665, "learning_rate": 1.789802181306655e-05, "loss": 0.4816, "step": 3714 }, { "epoch": 0.45208396714329174, "grad_norm": 2.687893867492676, "learning_rate": 1.7896825181927203e-05, "loss": 0.504, "step": 3715 }, { "epoch": 0.4522056586553088, "grad_norm": 2.188190460205078, "learning_rate": 1.789562825029732e-05, "loss": 0.5118, "step": 3716 }, { "epoch": 0.45232735016732584, "grad_norm": 2.8252346515655518, "learning_rate": 1.789443101822243e-05, "loss": 0.5364, "step": 3717 }, { "epoch": 0.45244904167934286, "grad_norm": 0.5264298915863037, "learning_rate": 1.7893233485748107e-05, "loss": 0.4892, "step": 3718 }, { "epoch": 0.4525707331913599, "grad_norm": 1.020469069480896, "learning_rate": 1.7892035652919904e-05, "loss": 0.4331, "step": 3719 }, { "epoch": 0.45269242470337695, "grad_norm": 0.6001567840576172, "learning_rate": 1.7890837519783414e-05, "loss": 0.509, "step": 3720 }, { "epoch": 0.45281411621539397, "grad_norm": 1.42613685131073, "learning_rate": 1.788963908638422e-05, "loss": 0.4818, "step": 3721 }, { "epoch": 0.452935807727411, "grad_norm": 4.9061784744262695, "learning_rate": 1.7888440352767927e-05, "loss": 0.4356, "step": 3722 }, { "epoch": 0.45305749923942806, "grad_norm": 4.788461208343506, "learning_rate": 1.7887241318980156e-05, "loss": 0.4378, "step": 3723 }, { "epoch": 0.4531791907514451, "grad_norm": 1.0287023782730103, "learning_rate": 1.7886041985066524e-05, "loss": 0.4683, "step": 3724 }, { "epoch": 0.4533008822634621, "grad_norm": 0.6777575612068176, "learning_rate": 1.788484235107267e-05, "loss": 0.4313, "step": 3725 }, { "epoch": 0.4534225737754792, "grad_norm": 3.7466187477111816, "learning_rate": 1.788364241704425e-05, "loss": 0.5249, "step": 3726 }, { "epoch": 0.4535442652874962, "grad_norm": 0.6062596440315247, "learning_rate": 1.7882442183026908e-05, "loss": 0.4085, "step": 3727 }, { "epoch": 0.4536659567995132, "grad_norm": 4.0420684814453125, "learning_rate": 1.788124164906633e-05, "loss": 0.4956, "step": 3728 }, { "epoch": 0.4537876483115303, "grad_norm": 5.275223255157471, "learning_rate": 1.7880040815208196e-05, "loss": 0.5215, "step": 3729 }, { "epoch": 0.4539093398235473, "grad_norm": 2.709576368331909, "learning_rate": 1.7878839681498195e-05, "loss": 0.4773, "step": 3730 }, { "epoch": 0.45403103133556433, "grad_norm": 1.9356625080108643, "learning_rate": 1.787763824798204e-05, "loss": 0.4709, "step": 3731 }, { "epoch": 0.4541527228475814, "grad_norm": 2.830550193786621, "learning_rate": 1.787643651470544e-05, "loss": 0.4965, "step": 3732 }, { "epoch": 0.4542744143595984, "grad_norm": 2.397204637527466, "learning_rate": 1.7875234481714126e-05, "loss": 0.4983, "step": 3733 }, { "epoch": 0.45439610587161544, "grad_norm": 0.8092364072799683, "learning_rate": 1.7874032149053844e-05, "loss": 0.4383, "step": 3734 }, { "epoch": 0.4545177973836325, "grad_norm": 2.5677106380462646, "learning_rate": 1.787282951677034e-05, "loss": 0.4482, "step": 3735 }, { "epoch": 0.45463948889564954, "grad_norm": 2.810300827026367, "learning_rate": 1.7871626584909374e-05, "loss": 0.4965, "step": 3736 }, { "epoch": 0.45476118040766655, "grad_norm": 2.3000433444976807, "learning_rate": 1.7870423353516723e-05, "loss": 0.4841, "step": 3737 }, { "epoch": 0.45488287191968363, "grad_norm": 4.659969806671143, "learning_rate": 1.7869219822638175e-05, "loss": 0.4329, "step": 3738 }, { "epoch": 0.45500456343170065, "grad_norm": 2.9188547134399414, "learning_rate": 1.786801599231952e-05, "loss": 0.4563, "step": 3739 }, { "epoch": 0.45512625494371767, "grad_norm": 1.448610782623291, "learning_rate": 1.7866811862606576e-05, "loss": 0.5119, "step": 3740 }, { "epoch": 0.4552479464557347, "grad_norm": 1.2721482515335083, "learning_rate": 1.7865607433545154e-05, "loss": 0.5271, "step": 3741 }, { "epoch": 0.45536963796775176, "grad_norm": 4.151176452636719, "learning_rate": 1.786440270518109e-05, "loss": 0.557, "step": 3742 }, { "epoch": 0.4554913294797688, "grad_norm": 0.549390435218811, "learning_rate": 1.7863197677560222e-05, "loss": 0.4078, "step": 3743 }, { "epoch": 0.4556130209917858, "grad_norm": 1.7603040933609009, "learning_rate": 1.786199235072841e-05, "loss": 0.4618, "step": 3744 }, { "epoch": 0.4557347125038029, "grad_norm": 2.47747540473938, "learning_rate": 1.7860786724731512e-05, "loss": 0.4762, "step": 3745 }, { "epoch": 0.4558564040158199, "grad_norm": 1.6292123794555664, "learning_rate": 1.785958079961541e-05, "loss": 0.4939, "step": 3746 }, { "epoch": 0.4559780955278369, "grad_norm": 1.9071540832519531, "learning_rate": 1.785837457542599e-05, "loss": 0.441, "step": 3747 }, { "epoch": 0.456099787039854, "grad_norm": 0.6643903851509094, "learning_rate": 1.785716805220915e-05, "loss": 0.4735, "step": 3748 }, { "epoch": 0.456221478551871, "grad_norm": 1.6004359722137451, "learning_rate": 1.7855961230010804e-05, "loss": 0.5101, "step": 3749 }, { "epoch": 0.456343170063888, "grad_norm": 0.7148923873901367, "learning_rate": 1.7854754108876874e-05, "loss": 0.4871, "step": 3750 }, { "epoch": 0.4564648615759051, "grad_norm": 3.2015488147735596, "learning_rate": 1.785354668885329e-05, "loss": 0.4596, "step": 3751 }, { "epoch": 0.4565865530879221, "grad_norm": 3.9936769008636475, "learning_rate": 1.7852338969985996e-05, "loss": 0.4663, "step": 3752 }, { "epoch": 0.45670824459993914, "grad_norm": 3.0331928730010986, "learning_rate": 1.7851130952320953e-05, "loss": 0.4671, "step": 3753 }, { "epoch": 0.4568299361119562, "grad_norm": 0.7422975897789001, "learning_rate": 1.7849922635904127e-05, "loss": 0.5073, "step": 3754 }, { "epoch": 0.45695162762397323, "grad_norm": 3.285104513168335, "learning_rate": 1.7848714020781495e-05, "loss": 0.4424, "step": 3755 }, { "epoch": 0.45707331913599025, "grad_norm": 1.2517046928405762, "learning_rate": 1.7847505106999047e-05, "loss": 0.453, "step": 3756 }, { "epoch": 0.4571950106480073, "grad_norm": 1.3074380159378052, "learning_rate": 1.7846295894602787e-05, "loss": 0.4556, "step": 3757 }, { "epoch": 0.45731670216002435, "grad_norm": 0.7568677663803101, "learning_rate": 1.7845086383638733e-05, "loss": 0.463, "step": 3758 }, { "epoch": 0.45743839367204137, "grad_norm": 0.7038098573684692, "learning_rate": 1.7843876574152896e-05, "loss": 0.4344, "step": 3759 }, { "epoch": 0.4575600851840584, "grad_norm": 0.8059700727462769, "learning_rate": 1.7842666466191323e-05, "loss": 0.422, "step": 3760 }, { "epoch": 0.45768177669607546, "grad_norm": 1.4205422401428223, "learning_rate": 1.7841456059800057e-05, "loss": 0.4254, "step": 3761 }, { "epoch": 0.4578034682080925, "grad_norm": 4.686393737792969, "learning_rate": 1.7840245355025158e-05, "loss": 0.5376, "step": 3762 }, { "epoch": 0.4579251597201095, "grad_norm": 4.416186332702637, "learning_rate": 1.7839034351912695e-05, "loss": 0.5207, "step": 3763 }, { "epoch": 0.4580468512321266, "grad_norm": 4.030603885650635, "learning_rate": 1.7837823050508748e-05, "loss": 0.5198, "step": 3764 }, { "epoch": 0.4581685427441436, "grad_norm": 0.7521973848342896, "learning_rate": 1.783661145085941e-05, "loss": 0.4701, "step": 3765 }, { "epoch": 0.4582902342561606, "grad_norm": 2.4491348266601562, "learning_rate": 1.7835399553010785e-05, "loss": 0.4408, "step": 3766 }, { "epoch": 0.4584119257681777, "grad_norm": 1.0065889358520508, "learning_rate": 1.783418735700899e-05, "loss": 0.4873, "step": 3767 }, { "epoch": 0.4585336172801947, "grad_norm": 5.402151107788086, "learning_rate": 1.7832974862900147e-05, "loss": 0.4324, "step": 3768 }, { "epoch": 0.4586553087922117, "grad_norm": 3.2790324687957764, "learning_rate": 1.7831762070730402e-05, "loss": 0.5019, "step": 3769 }, { "epoch": 0.4587770003042288, "grad_norm": 4.150466442108154, "learning_rate": 1.7830548980545895e-05, "loss": 0.4097, "step": 3770 }, { "epoch": 0.4588986918162458, "grad_norm": 0.6237979531288147, "learning_rate": 1.7829335592392795e-05, "loss": 0.5079, "step": 3771 }, { "epoch": 0.45902038332826284, "grad_norm": 1.5224637985229492, "learning_rate": 1.7828121906317268e-05, "loss": 0.431, "step": 3772 }, { "epoch": 0.4591420748402799, "grad_norm": 0.721524715423584, "learning_rate": 1.78269079223655e-05, "loss": 0.478, "step": 3773 }, { "epoch": 0.45926376635229693, "grad_norm": 0.9786434769630432, "learning_rate": 1.782569364058368e-05, "loss": 0.4422, "step": 3774 }, { "epoch": 0.45938545786431395, "grad_norm": 0.7823892831802368, "learning_rate": 1.7824479061018025e-05, "loss": 0.4519, "step": 3775 }, { "epoch": 0.459507149376331, "grad_norm": 0.7161319851875305, "learning_rate": 1.7823264183714742e-05, "loss": 0.4428, "step": 3776 }, { "epoch": 0.45962884088834804, "grad_norm": 2.006716728210449, "learning_rate": 1.7822049008720066e-05, "loss": 0.3923, "step": 3777 }, { "epoch": 0.45975053240036506, "grad_norm": 0.8360805511474609, "learning_rate": 1.7820833536080235e-05, "loss": 0.4275, "step": 3778 }, { "epoch": 0.45987222391238214, "grad_norm": 3.4726150035858154, "learning_rate": 1.7819617765841496e-05, "loss": 0.5008, "step": 3779 }, { "epoch": 0.45999391542439916, "grad_norm": 6.386075019836426, "learning_rate": 1.781840169805012e-05, "loss": 0.6021, "step": 3780 }, { "epoch": 0.4601156069364162, "grad_norm": 1.8586344718933105, "learning_rate": 1.7817185332752372e-05, "loss": 0.4739, "step": 3781 }, { "epoch": 0.4602372984484332, "grad_norm": 0.9821096062660217, "learning_rate": 1.7815968669994543e-05, "loss": 0.4423, "step": 3782 }, { "epoch": 0.46035898996045027, "grad_norm": 0.6338329315185547, "learning_rate": 1.7814751709822927e-05, "loss": 0.4833, "step": 3783 }, { "epoch": 0.4604806814724673, "grad_norm": 1.1122868061065674, "learning_rate": 1.7813534452283835e-05, "loss": 0.4164, "step": 3784 }, { "epoch": 0.4606023729844843, "grad_norm": 3.8239831924438477, "learning_rate": 1.7812316897423584e-05, "loss": 0.375, "step": 3785 }, { "epoch": 0.4607240644965014, "grad_norm": 1.563788890838623, "learning_rate": 1.78110990452885e-05, "loss": 0.4274, "step": 3786 }, { "epoch": 0.4608457560085184, "grad_norm": 1.5114758014678955, "learning_rate": 1.7809880895924935e-05, "loss": 0.5033, "step": 3787 }, { "epoch": 0.4609674475205354, "grad_norm": 1.6998744010925293, "learning_rate": 1.7808662449379233e-05, "loss": 0.4505, "step": 3788 }, { "epoch": 0.4610891390325525, "grad_norm": 1.6544452905654907, "learning_rate": 1.780744370569776e-05, "loss": 0.5071, "step": 3789 }, { "epoch": 0.4612108305445695, "grad_norm": 3.371568202972412, "learning_rate": 1.78062246649269e-05, "loss": 0.5407, "step": 3790 }, { "epoch": 0.46133252205658654, "grad_norm": 1.258341670036316, "learning_rate": 1.7805005327113028e-05, "loss": 0.4406, "step": 3791 }, { "epoch": 0.4614542135686036, "grad_norm": 0.6823922395706177, "learning_rate": 1.7803785692302548e-05, "loss": 0.484, "step": 3792 }, { "epoch": 0.46157590508062063, "grad_norm": 0.9683195352554321, "learning_rate": 1.7802565760541873e-05, "loss": 0.4713, "step": 3793 }, { "epoch": 0.46169759659263765, "grad_norm": 4.262025833129883, "learning_rate": 1.7801345531877417e-05, "loss": 0.4367, "step": 3794 }, { "epoch": 0.4618192881046547, "grad_norm": 3.687232255935669, "learning_rate": 1.7800125006355618e-05, "loss": 0.4934, "step": 3795 }, { "epoch": 0.46194097961667174, "grad_norm": 3.1868460178375244, "learning_rate": 1.7798904184022916e-05, "loss": 0.4662, "step": 3796 }, { "epoch": 0.46206267112868876, "grad_norm": 1.6585912704467773, "learning_rate": 1.7797683064925767e-05, "loss": 0.4582, "step": 3797 }, { "epoch": 0.46218436264070584, "grad_norm": 1.2644832134246826, "learning_rate": 1.7796461649110636e-05, "loss": 0.4843, "step": 3798 }, { "epoch": 0.46230605415272286, "grad_norm": 0.7805526852607727, "learning_rate": 1.7795239936624004e-05, "loss": 0.4628, "step": 3799 }, { "epoch": 0.4624277456647399, "grad_norm": 1.4566998481750488, "learning_rate": 1.7794017927512356e-05, "loss": 0.5042, "step": 3800 }, { "epoch": 0.4625494371767569, "grad_norm": 0.6934186816215515, "learning_rate": 1.779279562182219e-05, "loss": 0.4564, "step": 3801 }, { "epoch": 0.46267112868877397, "grad_norm": 0.7121795415878296, "learning_rate": 1.7791573019600024e-05, "loss": 0.4521, "step": 3802 }, { "epoch": 0.462792820200791, "grad_norm": 0.9816343784332275, "learning_rate": 1.7790350120892376e-05, "loss": 0.4665, "step": 3803 }, { "epoch": 0.462914511712808, "grad_norm": 1.8246629238128662, "learning_rate": 1.7789126925745778e-05, "loss": 0.4029, "step": 3804 }, { "epoch": 0.4630362032248251, "grad_norm": 3.1889076232910156, "learning_rate": 1.7787903434206783e-05, "loss": 0.4012, "step": 3805 }, { "epoch": 0.4631578947368421, "grad_norm": 1.9276530742645264, "learning_rate": 1.7786679646321937e-05, "loss": 0.4858, "step": 3806 }, { "epoch": 0.4632795862488591, "grad_norm": 2.8115413188934326, "learning_rate": 1.7785455562137818e-05, "loss": 0.5007, "step": 3807 }, { "epoch": 0.4634012777608762, "grad_norm": 1.1064033508300781, "learning_rate": 1.7784231181700998e-05, "loss": 0.401, "step": 3808 }, { "epoch": 0.4635229692728932, "grad_norm": 1.599065899848938, "learning_rate": 1.7783006505058068e-05, "loss": 0.4867, "step": 3809 }, { "epoch": 0.46364466078491023, "grad_norm": 2.7338004112243652, "learning_rate": 1.778178153225563e-05, "loss": 0.4869, "step": 3810 }, { "epoch": 0.4637663522969273, "grad_norm": 0.6401821374893188, "learning_rate": 1.77805562633403e-05, "loss": 0.4681, "step": 3811 }, { "epoch": 0.46388804380894433, "grad_norm": 1.9539035558700562, "learning_rate": 1.77793306983587e-05, "loss": 0.4517, "step": 3812 }, { "epoch": 0.46400973532096135, "grad_norm": 0.6471558809280396, "learning_rate": 1.7778104837357462e-05, "loss": 0.4657, "step": 3813 }, { "epoch": 0.4641314268329784, "grad_norm": 0.9634807109832764, "learning_rate": 1.7776878680383232e-05, "loss": 0.471, "step": 3814 }, { "epoch": 0.46425311834499544, "grad_norm": 2.7370593547821045, "learning_rate": 1.7775652227482677e-05, "loss": 0.429, "step": 3815 }, { "epoch": 0.46437480985701246, "grad_norm": 2.71479868888855, "learning_rate": 1.7774425478702458e-05, "loss": 0.4274, "step": 3816 }, { "epoch": 0.46449650136902954, "grad_norm": 1.3262461423873901, "learning_rate": 1.7773198434089256e-05, "loss": 0.4955, "step": 3817 }, { "epoch": 0.46461819288104655, "grad_norm": 4.287867546081543, "learning_rate": 1.7771971093689765e-05, "loss": 0.5026, "step": 3818 }, { "epoch": 0.4647398843930636, "grad_norm": 2.4017977714538574, "learning_rate": 1.7770743457550688e-05, "loss": 0.5088, "step": 3819 }, { "epoch": 0.4648615759050806, "grad_norm": 0.6364077925682068, "learning_rate": 1.7769515525718738e-05, "loss": 0.4219, "step": 3820 }, { "epoch": 0.46498326741709767, "grad_norm": 0.9554967284202576, "learning_rate": 1.7768287298240638e-05, "loss": 0.4376, "step": 3821 }, { "epoch": 0.4651049589291147, "grad_norm": 1.2945671081542969, "learning_rate": 1.7767058775163126e-05, "loss": 0.4526, "step": 3822 }, { "epoch": 0.4652266504411317, "grad_norm": 1.7694463729858398, "learning_rate": 1.7765829956532953e-05, "loss": 0.4589, "step": 3823 }, { "epoch": 0.4653483419531488, "grad_norm": 1.8475998640060425, "learning_rate": 1.7764600842396874e-05, "loss": 0.4849, "step": 3824 }, { "epoch": 0.4654700334651658, "grad_norm": 0.6160339117050171, "learning_rate": 1.7763371432801663e-05, "loss": 0.4598, "step": 3825 }, { "epoch": 0.4655917249771828, "grad_norm": 0.7143591642379761, "learning_rate": 1.77621417277941e-05, "loss": 0.5174, "step": 3826 }, { "epoch": 0.4657134164891999, "grad_norm": 1.369228482246399, "learning_rate": 1.7760911727420977e-05, "loss": 0.4692, "step": 3827 }, { "epoch": 0.4658351080012169, "grad_norm": 2.0281574726104736, "learning_rate": 1.7759681431729095e-05, "loss": 0.4916, "step": 3828 }, { "epoch": 0.46595679951323393, "grad_norm": 1.601747989654541, "learning_rate": 1.7758450840765275e-05, "loss": 0.5092, "step": 3829 }, { "epoch": 0.466078491025251, "grad_norm": 1.9887176752090454, "learning_rate": 1.7757219954576345e-05, "loss": 0.4802, "step": 3830 }, { "epoch": 0.466200182537268, "grad_norm": 1.197410225868225, "learning_rate": 1.7755988773209135e-05, "loss": 0.4655, "step": 3831 }, { "epoch": 0.46632187404928505, "grad_norm": 1.164315938949585, "learning_rate": 1.7754757296710502e-05, "loss": 0.5029, "step": 3832 }, { "epoch": 0.4664435655613021, "grad_norm": 2.4914932250976562, "learning_rate": 1.77535255251273e-05, "loss": 0.5014, "step": 3833 }, { "epoch": 0.46656525707331914, "grad_norm": 1.3653773069381714, "learning_rate": 1.77522934585064e-05, "loss": 0.4461, "step": 3834 }, { "epoch": 0.46668694858533616, "grad_norm": 1.459444284439087, "learning_rate": 1.775106109689469e-05, "loss": 0.4332, "step": 3835 }, { "epoch": 0.46680864009735323, "grad_norm": 1.3079185485839844, "learning_rate": 1.774982844033906e-05, "loss": 0.4074, "step": 3836 }, { "epoch": 0.46693033160937025, "grad_norm": 1.0709340572357178, "learning_rate": 1.774859548888642e-05, "loss": 0.4394, "step": 3837 }, { "epoch": 0.46705202312138727, "grad_norm": 5.704166889190674, "learning_rate": 1.774736224258368e-05, "loss": 0.5509, "step": 3838 }, { "epoch": 0.46717371463340435, "grad_norm": 1.1317018270492554, "learning_rate": 1.7746128701477775e-05, "loss": 0.4485, "step": 3839 }, { "epoch": 0.46729540614542137, "grad_norm": 1.9302055835723877, "learning_rate": 1.7744894865615638e-05, "loss": 0.4492, "step": 3840 }, { "epoch": 0.4674170976574384, "grad_norm": 3.718958616256714, "learning_rate": 1.7743660735044216e-05, "loss": 0.4925, "step": 3841 }, { "epoch": 0.4675387891694554, "grad_norm": 1.6200578212738037, "learning_rate": 1.7742426309810475e-05, "loss": 0.4323, "step": 3842 }, { "epoch": 0.4676604806814725, "grad_norm": 0.9008049964904785, "learning_rate": 1.774119158996139e-05, "loss": 0.4503, "step": 3843 }, { "epoch": 0.4677821721934895, "grad_norm": 0.8503767251968384, "learning_rate": 1.773995657554394e-05, "loss": 0.4789, "step": 3844 }, { "epoch": 0.4679038637055065, "grad_norm": 3.2512094974517822, "learning_rate": 1.7738721266605122e-05, "loss": 0.4485, "step": 3845 }, { "epoch": 0.4680255552175236, "grad_norm": 0.7887652516365051, "learning_rate": 1.7737485663191944e-05, "loss": 0.5324, "step": 3846 }, { "epoch": 0.4681472467295406, "grad_norm": 1.9994163513183594, "learning_rate": 1.7736249765351418e-05, "loss": 0.4199, "step": 3847 }, { "epoch": 0.46826893824155763, "grad_norm": 0.9749098420143127, "learning_rate": 1.7735013573130574e-05, "loss": 0.4692, "step": 3848 }, { "epoch": 0.4683906297535747, "grad_norm": 0.7642173171043396, "learning_rate": 1.7733777086576457e-05, "loss": 0.4222, "step": 3849 }, { "epoch": 0.4685123212655917, "grad_norm": 3.0395326614379883, "learning_rate": 1.773254030573611e-05, "loss": 0.4655, "step": 3850 }, { "epoch": 0.46863401277760874, "grad_norm": 4.106037139892578, "learning_rate": 1.7731303230656598e-05, "loss": 0.5463, "step": 3851 }, { "epoch": 0.4687557042896258, "grad_norm": 3.6523020267486572, "learning_rate": 1.7730065861384998e-05, "loss": 0.5447, "step": 3852 }, { "epoch": 0.46887739580164284, "grad_norm": 2.7071120738983154, "learning_rate": 1.772882819796839e-05, "loss": 0.5235, "step": 3853 }, { "epoch": 0.46899908731365986, "grad_norm": 0.5655571222305298, "learning_rate": 1.7727590240453872e-05, "loss": 0.4853, "step": 3854 }, { "epoch": 0.46912077882567693, "grad_norm": 3.7611331939697266, "learning_rate": 1.772635198888855e-05, "loss": 0.4106, "step": 3855 }, { "epoch": 0.46924247033769395, "grad_norm": 0.7470240592956543, "learning_rate": 1.7725113443319545e-05, "loss": 0.4921, "step": 3856 }, { "epoch": 0.46936416184971097, "grad_norm": 0.6359212398529053, "learning_rate": 1.772387460379398e-05, "loss": 0.4929, "step": 3857 }, { "epoch": 0.46948585336172804, "grad_norm": 2.6233646869659424, "learning_rate": 1.7722635470359e-05, "loss": 0.4745, "step": 3858 }, { "epoch": 0.46960754487374506, "grad_norm": 3.774162530899048, "learning_rate": 1.7721396043061758e-05, "loss": 0.4428, "step": 3859 }, { "epoch": 0.4697292363857621, "grad_norm": 0.6341200470924377, "learning_rate": 1.772015632194941e-05, "loss": 0.5379, "step": 3860 }, { "epoch": 0.4698509278977791, "grad_norm": 3.63291597366333, "learning_rate": 1.7718916307069133e-05, "loss": 0.4503, "step": 3861 }, { "epoch": 0.4699726194097962, "grad_norm": 1.582391619682312, "learning_rate": 1.7717675998468116e-05, "loss": 0.448, "step": 3862 }, { "epoch": 0.4700943109218132, "grad_norm": 1.2197394371032715, "learning_rate": 1.7716435396193553e-05, "loss": 0.459, "step": 3863 }, { "epoch": 0.4702160024338302, "grad_norm": 0.724090039730072, "learning_rate": 1.771519450029265e-05, "loss": 0.4733, "step": 3864 }, { "epoch": 0.4703376939458473, "grad_norm": 0.7258306741714478, "learning_rate": 1.7713953310812626e-05, "loss": 0.4367, "step": 3865 }, { "epoch": 0.4704593854578643, "grad_norm": 1.6493923664093018, "learning_rate": 1.7712711827800713e-05, "loss": 0.3821, "step": 3866 }, { "epoch": 0.47058107696988133, "grad_norm": 3.9862005710601807, "learning_rate": 1.7711470051304148e-05, "loss": 0.5409, "step": 3867 }, { "epoch": 0.4707027684818984, "grad_norm": 4.961276531219482, "learning_rate": 1.771022798137019e-05, "loss": 0.5585, "step": 3868 }, { "epoch": 0.4708244599939154, "grad_norm": 2.241069793701172, "learning_rate": 1.7708985618046096e-05, "loss": 0.4895, "step": 3869 }, { "epoch": 0.47094615150593244, "grad_norm": 0.7463817596435547, "learning_rate": 1.770774296137914e-05, "loss": 0.488, "step": 3870 }, { "epoch": 0.4710678430179495, "grad_norm": 0.6749276518821716, "learning_rate": 1.7706500011416616e-05, "loss": 0.4543, "step": 3871 }, { "epoch": 0.47118953452996654, "grad_norm": 2.60927414894104, "learning_rate": 1.7705256768205806e-05, "loss": 0.5627, "step": 3872 }, { "epoch": 0.47131122604198356, "grad_norm": 3.590068817138672, "learning_rate": 1.7704013231794036e-05, "loss": 0.4644, "step": 3873 }, { "epoch": 0.47143291755400063, "grad_norm": 2.517420530319214, "learning_rate": 1.770276940222861e-05, "loss": 0.4595, "step": 3874 }, { "epoch": 0.47155460906601765, "grad_norm": 2.272606372833252, "learning_rate": 1.770152527955687e-05, "loss": 0.4536, "step": 3875 }, { "epoch": 0.47167630057803467, "grad_norm": 2.187446355819702, "learning_rate": 1.7700280863826144e-05, "loss": 0.444, "step": 3876 }, { "epoch": 0.47179799209005174, "grad_norm": 1.4742701053619385, "learning_rate": 1.7699036155083797e-05, "loss": 0.5108, "step": 3877 }, { "epoch": 0.47191968360206876, "grad_norm": 1.1884597539901733, "learning_rate": 1.769779115337719e-05, "loss": 0.4347, "step": 3878 }, { "epoch": 0.4720413751140858, "grad_norm": 1.8193438053131104, "learning_rate": 1.7696545858753693e-05, "loss": 0.4224, "step": 3879 }, { "epoch": 0.47216306662610286, "grad_norm": 2.2266461849212646, "learning_rate": 1.76953002712607e-05, "loss": 0.5117, "step": 3880 }, { "epoch": 0.4722847581381199, "grad_norm": 0.5971178412437439, "learning_rate": 1.7694054390945595e-05, "loss": 0.4135, "step": 3881 }, { "epoch": 0.4724064496501369, "grad_norm": 2.7110724449157715, "learning_rate": 1.7692808217855803e-05, "loss": 0.5097, "step": 3882 }, { "epoch": 0.4725281411621539, "grad_norm": 2.0419669151306152, "learning_rate": 1.769156175203873e-05, "loss": 0.4734, "step": 3883 }, { "epoch": 0.472649832674171, "grad_norm": 1.217822790145874, "learning_rate": 1.7690314993541814e-05, "loss": 0.4206, "step": 3884 }, { "epoch": 0.472771524186188, "grad_norm": 2.3650529384613037, "learning_rate": 1.7689067942412492e-05, "loss": 0.4842, "step": 3885 }, { "epoch": 0.472893215698205, "grad_norm": 0.6739344596862793, "learning_rate": 1.768782059869822e-05, "loss": 0.4683, "step": 3886 }, { "epoch": 0.4730149072102221, "grad_norm": 1.8087377548217773, "learning_rate": 1.768657296244646e-05, "loss": 0.4023, "step": 3887 }, { "epoch": 0.4731365987222391, "grad_norm": 1.481730341911316, "learning_rate": 1.768532503370469e-05, "loss": 0.524, "step": 3888 }, { "epoch": 0.47325829023425614, "grad_norm": 1.256410837173462, "learning_rate": 1.7684076812520397e-05, "loss": 0.4728, "step": 3889 }, { "epoch": 0.4733799817462732, "grad_norm": 2.8640084266662598, "learning_rate": 1.7682828298941074e-05, "loss": 0.465, "step": 3890 }, { "epoch": 0.47350167325829023, "grad_norm": 1.838978886604309, "learning_rate": 1.7681579493014232e-05, "loss": 0.4309, "step": 3891 }, { "epoch": 0.47362336477030725, "grad_norm": 0.6003907918930054, "learning_rate": 1.768033039478739e-05, "loss": 0.4523, "step": 3892 }, { "epoch": 0.47374505628232433, "grad_norm": 0.8352644443511963, "learning_rate": 1.767908100430808e-05, "loss": 0.5105, "step": 3893 }, { "epoch": 0.47386674779434135, "grad_norm": 0.6823041439056396, "learning_rate": 1.767783132162384e-05, "loss": 0.5165, "step": 3894 }, { "epoch": 0.47398843930635837, "grad_norm": 0.9185401797294617, "learning_rate": 1.767658134678223e-05, "loss": 0.4721, "step": 3895 }, { "epoch": 0.47411013081837544, "grad_norm": 1.2255065441131592, "learning_rate": 1.767533107983081e-05, "loss": 0.506, "step": 3896 }, { "epoch": 0.47423182233039246, "grad_norm": 1.4775419235229492, "learning_rate": 1.7674080520817153e-05, "loss": 0.5556, "step": 3897 }, { "epoch": 0.4743535138424095, "grad_norm": 0.6126468777656555, "learning_rate": 1.7672829669788847e-05, "loss": 0.4671, "step": 3898 }, { "epoch": 0.47447520535442655, "grad_norm": 0.8431262373924255, "learning_rate": 1.7671578526793492e-05, "loss": 0.4915, "step": 3899 }, { "epoch": 0.4745968968664436, "grad_norm": 4.0832438468933105, "learning_rate": 1.7670327091878694e-05, "loss": 0.4709, "step": 3900 }, { "epoch": 0.4747185883784606, "grad_norm": 2.341308832168579, "learning_rate": 1.7669075365092073e-05, "loss": 0.482, "step": 3901 }, { "epoch": 0.4748402798904776, "grad_norm": 0.706177830696106, "learning_rate": 1.7667823346481263e-05, "loss": 0.4873, "step": 3902 }, { "epoch": 0.4749619714024947, "grad_norm": 0.6799188256263733, "learning_rate": 1.76665710360939e-05, "loss": 0.4703, "step": 3903 }, { "epoch": 0.4750836629145117, "grad_norm": 1.3929799795150757, "learning_rate": 1.766531843397764e-05, "loss": 0.5084, "step": 3904 }, { "epoch": 0.4752053544265287, "grad_norm": 0.5733524560928345, "learning_rate": 1.766406554018015e-05, "loss": 0.471, "step": 3905 }, { "epoch": 0.4753270459385458, "grad_norm": 1.8817859888076782, "learning_rate": 1.76628123547491e-05, "loss": 0.4149, "step": 3906 }, { "epoch": 0.4754487374505628, "grad_norm": 0.9669296145439148, "learning_rate": 1.766155887773218e-05, "loss": 0.4681, "step": 3907 }, { "epoch": 0.47557042896257984, "grad_norm": 2.8782145977020264, "learning_rate": 1.7660305109177087e-05, "loss": 0.394, "step": 3908 }, { "epoch": 0.4756921204745969, "grad_norm": 0.8162304162979126, "learning_rate": 1.7659051049131527e-05, "loss": 0.4689, "step": 3909 }, { "epoch": 0.47581381198661393, "grad_norm": 1.816863775253296, "learning_rate": 1.765779669764322e-05, "loss": 0.5006, "step": 3910 }, { "epoch": 0.47593550349863095, "grad_norm": 2.4650182723999023, "learning_rate": 1.7656542054759902e-05, "loss": 0.3802, "step": 3911 }, { "epoch": 0.476057195010648, "grad_norm": 2.4364404678344727, "learning_rate": 1.7655287120529307e-05, "loss": 0.4814, "step": 3912 }, { "epoch": 0.47617888652266505, "grad_norm": 1.3512928485870361, "learning_rate": 1.7654031894999192e-05, "loss": 0.4554, "step": 3913 }, { "epoch": 0.47630057803468207, "grad_norm": 2.0477256774902344, "learning_rate": 1.7652776378217324e-05, "loss": 0.3982, "step": 3914 }, { "epoch": 0.47642226954669914, "grad_norm": 2.214003324508667, "learning_rate": 1.7651520570231467e-05, "loss": 0.3799, "step": 3915 }, { "epoch": 0.47654396105871616, "grad_norm": 0.8521255254745483, "learning_rate": 1.7650264471089423e-05, "loss": 0.4711, "step": 3916 }, { "epoch": 0.4766656525707332, "grad_norm": 1.2974274158477783, "learning_rate": 1.7649008080838976e-05, "loss": 0.4625, "step": 3917 }, { "epoch": 0.47678734408275025, "grad_norm": 2.397916078567505, "learning_rate": 1.7647751399527938e-05, "loss": 0.5298, "step": 3918 }, { "epoch": 0.47690903559476727, "grad_norm": 2.0906403064727783, "learning_rate": 1.764649442720413e-05, "loss": 0.4034, "step": 3919 }, { "epoch": 0.4770307271067843, "grad_norm": 1.348837971687317, "learning_rate": 1.7645237163915383e-05, "loss": 0.4621, "step": 3920 }, { "epoch": 0.47715241861880137, "grad_norm": 2.266852855682373, "learning_rate": 1.764397960970954e-05, "loss": 0.4936, "step": 3921 }, { "epoch": 0.4772741101308184, "grad_norm": 1.897757887840271, "learning_rate": 1.7642721764634447e-05, "loss": 0.5249, "step": 3922 }, { "epoch": 0.4773958016428354, "grad_norm": 1.560788631439209, "learning_rate": 1.764146362873797e-05, "loss": 0.4601, "step": 3923 }, { "epoch": 0.4775174931548524, "grad_norm": 2.0800445079803467, "learning_rate": 1.7640205202067985e-05, "loss": 0.5232, "step": 3924 }, { "epoch": 0.4776391846668695, "grad_norm": 2.9814136028289795, "learning_rate": 1.7638946484672382e-05, "loss": 0.5021, "step": 3925 }, { "epoch": 0.4777608761788865, "grad_norm": 6.038138389587402, "learning_rate": 1.763768747659905e-05, "loss": 0.4347, "step": 3926 }, { "epoch": 0.47788256769090354, "grad_norm": 2.1282153129577637, "learning_rate": 1.7636428177895902e-05, "loss": 0.4785, "step": 3927 }, { "epoch": 0.4780042592029206, "grad_norm": 3.8376569747924805, "learning_rate": 1.7635168588610855e-05, "loss": 0.5067, "step": 3928 }, { "epoch": 0.47812595071493763, "grad_norm": 0.8172527551651001, "learning_rate": 1.7633908708791837e-05, "loss": 0.503, "step": 3929 }, { "epoch": 0.47824764222695465, "grad_norm": 2.8463196754455566, "learning_rate": 1.7632648538486792e-05, "loss": 0.4964, "step": 3930 }, { "epoch": 0.4783693337389717, "grad_norm": 0.7011560201644897, "learning_rate": 1.7631388077743675e-05, "loss": 0.4432, "step": 3931 }, { "epoch": 0.47849102525098874, "grad_norm": 1.3020755052566528, "learning_rate": 1.7630127326610442e-05, "loss": 0.467, "step": 3932 }, { "epoch": 0.47861271676300576, "grad_norm": 1.6074851751327515, "learning_rate": 1.762886628513507e-05, "loss": 0.4168, "step": 3933 }, { "epoch": 0.47873440827502284, "grad_norm": 0.9652578234672546, "learning_rate": 1.7627604953365548e-05, "loss": 0.4363, "step": 3934 }, { "epoch": 0.47885609978703986, "grad_norm": 1.9899330139160156, "learning_rate": 1.7626343331349872e-05, "loss": 0.4976, "step": 3935 }, { "epoch": 0.4789777912990569, "grad_norm": 1.2797359228134155, "learning_rate": 1.7625081419136042e-05, "loss": 0.4216, "step": 3936 }, { "epoch": 0.47909948281107395, "grad_norm": 3.8270087242126465, "learning_rate": 1.762381921677208e-05, "loss": 0.523, "step": 3937 }, { "epoch": 0.47922117432309097, "grad_norm": 4.82899808883667, "learning_rate": 1.7622556724306018e-05, "loss": 0.5312, "step": 3938 }, { "epoch": 0.479342865835108, "grad_norm": 1.2736430168151855, "learning_rate": 1.7621293941785893e-05, "loss": 0.442, "step": 3939 }, { "epoch": 0.47946455734712506, "grad_norm": 0.8976635932922363, "learning_rate": 1.7620030869259762e-05, "loss": 0.4657, "step": 3940 }, { "epoch": 0.4795862488591421, "grad_norm": 0.7164414525032043, "learning_rate": 1.7618767506775683e-05, "loss": 0.4773, "step": 3941 }, { "epoch": 0.4797079403711591, "grad_norm": 1.1275383234024048, "learning_rate": 1.7617503854381727e-05, "loss": 0.4578, "step": 3942 }, { "epoch": 0.4798296318831761, "grad_norm": 2.4435224533081055, "learning_rate": 1.7616239912125985e-05, "loss": 0.4429, "step": 3943 }, { "epoch": 0.4799513233951932, "grad_norm": 1.6950342655181885, "learning_rate": 1.761497568005655e-05, "loss": 0.4977, "step": 3944 }, { "epoch": 0.4800730149072102, "grad_norm": 3.6762256622314453, "learning_rate": 1.7613711158221523e-05, "loss": 0.3738, "step": 3945 }, { "epoch": 0.48019470641922724, "grad_norm": 1.2391748428344727, "learning_rate": 1.7612446346669034e-05, "loss": 0.4362, "step": 3946 }, { "epoch": 0.4803163979312443, "grad_norm": 1.591848611831665, "learning_rate": 1.7611181245447203e-05, "loss": 0.4849, "step": 3947 }, { "epoch": 0.48043808944326133, "grad_norm": 2.195936441421509, "learning_rate": 1.7609915854604168e-05, "loss": 0.5437, "step": 3948 }, { "epoch": 0.48055978095527835, "grad_norm": 4.923917293548584, "learning_rate": 1.7608650174188087e-05, "loss": 0.5803, "step": 3949 }, { "epoch": 0.4806814724672954, "grad_norm": 1.3654608726501465, "learning_rate": 1.7607384204247114e-05, "loss": 0.4934, "step": 3950 }, { "epoch": 0.48080316397931244, "grad_norm": 1.8725523948669434, "learning_rate": 1.7606117944829427e-05, "loss": 0.4706, "step": 3951 }, { "epoch": 0.48092485549132946, "grad_norm": 1.3736755847930908, "learning_rate": 1.7604851395983208e-05, "loss": 0.4654, "step": 3952 }, { "epoch": 0.48104654700334654, "grad_norm": 1.4079389572143555, "learning_rate": 1.7603584557756655e-05, "loss": 0.445, "step": 3953 }, { "epoch": 0.48116823851536356, "grad_norm": 1.7103395462036133, "learning_rate": 1.760231743019797e-05, "loss": 0.5362, "step": 3954 }, { "epoch": 0.4812899300273806, "grad_norm": 2.03585147857666, "learning_rate": 1.7601050013355368e-05, "loss": 0.4644, "step": 3955 }, { "epoch": 0.48141162153939765, "grad_norm": 1.4892867803573608, "learning_rate": 1.759978230727708e-05, "loss": 0.5017, "step": 3956 }, { "epoch": 0.48153331305141467, "grad_norm": 2.6341655254364014, "learning_rate": 1.7598514312011348e-05, "loss": 0.4664, "step": 3957 }, { "epoch": 0.4816550045634317, "grad_norm": 0.9715067148208618, "learning_rate": 1.7597246027606417e-05, "loss": 0.5133, "step": 3958 }, { "epoch": 0.48177669607544876, "grad_norm": 2.2763943672180176, "learning_rate": 1.7595977454110547e-05, "loss": 0.4355, "step": 3959 }, { "epoch": 0.4818983875874658, "grad_norm": 3.025862216949463, "learning_rate": 1.7594708591572013e-05, "loss": 0.4154, "step": 3960 }, { "epoch": 0.4820200790994828, "grad_norm": 1.073451042175293, "learning_rate": 1.7593439440039098e-05, "loss": 0.4503, "step": 3961 }, { "epoch": 0.4821417706114998, "grad_norm": 2.2360925674438477, "learning_rate": 1.7592169999560097e-05, "loss": 0.5033, "step": 3962 }, { "epoch": 0.4822634621235169, "grad_norm": 1.2230095863342285, "learning_rate": 1.7590900270183308e-05, "loss": 0.4539, "step": 3963 }, { "epoch": 0.4823851536355339, "grad_norm": 2.842133045196533, "learning_rate": 1.7589630251957052e-05, "loss": 0.4501, "step": 3964 }, { "epoch": 0.48250684514755093, "grad_norm": 1.4633009433746338, "learning_rate": 1.7588359944929658e-05, "loss": 0.4063, "step": 3965 }, { "epoch": 0.482628536659568, "grad_norm": 1.738244891166687, "learning_rate": 1.758708934914946e-05, "loss": 0.4938, "step": 3966 }, { "epoch": 0.482750228171585, "grad_norm": 0.9651433825492859, "learning_rate": 1.758581846466481e-05, "loss": 0.4369, "step": 3967 }, { "epoch": 0.48287191968360205, "grad_norm": 0.5670275092124939, "learning_rate": 1.758454729152406e-05, "loss": 0.4598, "step": 3968 }, { "epoch": 0.4829936111956191, "grad_norm": 2.052802562713623, "learning_rate": 1.7583275829775593e-05, "loss": 0.4577, "step": 3969 }, { "epoch": 0.48311530270763614, "grad_norm": 1.7143601179122925, "learning_rate": 1.7582004079467777e-05, "loss": 0.4412, "step": 3970 }, { "epoch": 0.48323699421965316, "grad_norm": 1.1367214918136597, "learning_rate": 1.7580732040649016e-05, "loss": 0.4603, "step": 3971 }, { "epoch": 0.48335868573167023, "grad_norm": 0.6317379474639893, "learning_rate": 1.757945971336771e-05, "loss": 0.4584, "step": 3972 }, { "epoch": 0.48348037724368725, "grad_norm": 1.0157256126403809, "learning_rate": 1.7578187097672272e-05, "loss": 0.4668, "step": 3973 }, { "epoch": 0.4836020687557043, "grad_norm": 2.4871716499328613, "learning_rate": 1.757691419361113e-05, "loss": 0.5098, "step": 3974 }, { "epoch": 0.48372376026772135, "grad_norm": 0.5689814686775208, "learning_rate": 1.7575641001232718e-05, "loss": 0.4674, "step": 3975 }, { "epoch": 0.48384545177973837, "grad_norm": 1.0546921491622925, "learning_rate": 1.7574367520585486e-05, "loss": 0.4654, "step": 3976 }, { "epoch": 0.4839671432917554, "grad_norm": 1.9708023071289062, "learning_rate": 1.7573093751717895e-05, "loss": 0.4678, "step": 3977 }, { "epoch": 0.48408883480377246, "grad_norm": 2.288806676864624, "learning_rate": 1.757181969467841e-05, "loss": 0.4426, "step": 3978 }, { "epoch": 0.4842105263157895, "grad_norm": 1.6643061637878418, "learning_rate": 1.7570545349515508e-05, "loss": 0.4832, "step": 3979 }, { "epoch": 0.4843322178278065, "grad_norm": 3.4978158473968506, "learning_rate": 1.756927071627769e-05, "loss": 0.5471, "step": 3980 }, { "epoch": 0.4844539093398236, "grad_norm": 0.7838190793991089, "learning_rate": 1.7567995795013454e-05, "loss": 0.4458, "step": 3981 }, { "epoch": 0.4845756008518406, "grad_norm": 3.7718207836151123, "learning_rate": 1.756672058577131e-05, "loss": 0.516, "step": 3982 }, { "epoch": 0.4846972923638576, "grad_norm": 3.249464273452759, "learning_rate": 1.7565445088599788e-05, "loss": 0.5103, "step": 3983 }, { "epoch": 0.48481898387587463, "grad_norm": 0.5579743385314941, "learning_rate": 1.7564169303547415e-05, "loss": 0.4202, "step": 3984 }, { "epoch": 0.4849406753878917, "grad_norm": 0.9039687514305115, "learning_rate": 1.756289323066275e-05, "loss": 0.4614, "step": 3985 }, { "epoch": 0.4850623668999087, "grad_norm": 1.2483718395233154, "learning_rate": 1.756161686999434e-05, "loss": 0.4882, "step": 3986 }, { "epoch": 0.48518405841192574, "grad_norm": 1.0861611366271973, "learning_rate": 1.7560340221590756e-05, "loss": 0.4555, "step": 3987 }, { "epoch": 0.4853057499239428, "grad_norm": 1.099472999572754, "learning_rate": 1.7559063285500578e-05, "loss": 0.4417, "step": 3988 }, { "epoch": 0.48542744143595984, "grad_norm": 3.4981207847595215, "learning_rate": 1.75577860617724e-05, "loss": 0.5534, "step": 3989 }, { "epoch": 0.48554913294797686, "grad_norm": 2.388129949569702, "learning_rate": 1.7556508550454815e-05, "loss": 0.491, "step": 3990 }, { "epoch": 0.48567082445999393, "grad_norm": 1.3636597394943237, "learning_rate": 1.7555230751596437e-05, "loss": 0.4993, "step": 3991 }, { "epoch": 0.48579251597201095, "grad_norm": 0.9246495962142944, "learning_rate": 1.755395266524589e-05, "loss": 0.4778, "step": 3992 }, { "epoch": 0.48591420748402797, "grad_norm": 2.5662167072296143, "learning_rate": 1.7552674291451814e-05, "loss": 0.4185, "step": 3993 }, { "epoch": 0.48603589899604505, "grad_norm": 1.6272306442260742, "learning_rate": 1.7551395630262843e-05, "loss": 0.4665, "step": 3994 }, { "epoch": 0.48615759050806207, "grad_norm": 1.5110605955123901, "learning_rate": 1.7550116681727637e-05, "loss": 0.4882, "step": 3995 }, { "epoch": 0.4862792820200791, "grad_norm": 0.7350053191184998, "learning_rate": 1.7548837445894866e-05, "loss": 0.4889, "step": 3996 }, { "epoch": 0.48640097353209616, "grad_norm": 2.356365442276001, "learning_rate": 1.7547557922813204e-05, "loss": 0.4135, "step": 3997 }, { "epoch": 0.4865226650441132, "grad_norm": 3.077037811279297, "learning_rate": 1.754627811253134e-05, "loss": 0.4469, "step": 3998 }, { "epoch": 0.4866443565561302, "grad_norm": 2.208920478820801, "learning_rate": 1.7544998015097973e-05, "loss": 0.4359, "step": 3999 }, { "epoch": 0.4867660480681473, "grad_norm": 0.7632595300674438, "learning_rate": 1.754371763056182e-05, "loss": 0.4808, "step": 4000 }, { "epoch": 0.4868877395801643, "grad_norm": 1.7443029880523682, "learning_rate": 1.754243695897159e-05, "loss": 0.4512, "step": 4001 }, { "epoch": 0.4870094310921813, "grad_norm": 3.6838951110839844, "learning_rate": 1.7541156000376025e-05, "loss": 0.4925, "step": 4002 }, { "epoch": 0.48713112260419833, "grad_norm": 2.964545726776123, "learning_rate": 1.7539874754823863e-05, "loss": 0.531, "step": 4003 }, { "epoch": 0.4872528141162154, "grad_norm": 0.8194064497947693, "learning_rate": 1.753859322236386e-05, "loss": 0.4405, "step": 4004 }, { "epoch": 0.4873745056282324, "grad_norm": 0.8101553916931152, "learning_rate": 1.753731140304478e-05, "loss": 0.4474, "step": 4005 }, { "epoch": 0.48749619714024944, "grad_norm": 1.820739984512329, "learning_rate": 1.75360292969154e-05, "loss": 0.4061, "step": 4006 }, { "epoch": 0.4876178886522665, "grad_norm": 1.4210189580917358, "learning_rate": 1.753474690402451e-05, "loss": 0.4641, "step": 4007 }, { "epoch": 0.48773958016428354, "grad_norm": 3.2839109897613525, "learning_rate": 1.7533464224420904e-05, "loss": 0.5322, "step": 4008 }, { "epoch": 0.48786127167630056, "grad_norm": 1.1567356586456299, "learning_rate": 1.753218125815339e-05, "loss": 0.4478, "step": 4009 }, { "epoch": 0.48798296318831763, "grad_norm": 1.8849678039550781, "learning_rate": 1.7530898005270788e-05, "loss": 0.4845, "step": 4010 }, { "epoch": 0.48810465470033465, "grad_norm": 1.187034249305725, "learning_rate": 1.7529614465821928e-05, "loss": 0.4692, "step": 4011 }, { "epoch": 0.48822634621235167, "grad_norm": 2.560075044631958, "learning_rate": 1.752833063985565e-05, "loss": 0.4299, "step": 4012 }, { "epoch": 0.48834803772436874, "grad_norm": 2.4375059604644775, "learning_rate": 1.752704652742081e-05, "loss": 0.4855, "step": 4013 }, { "epoch": 0.48846972923638576, "grad_norm": 3.6712076663970947, "learning_rate": 1.752576212856627e-05, "loss": 0.4107, "step": 4014 }, { "epoch": 0.4885914207484028, "grad_norm": 1.1576403379440308, "learning_rate": 1.7524477443340905e-05, "loss": 0.5098, "step": 4015 }, { "epoch": 0.48871311226041986, "grad_norm": 2.328127145767212, "learning_rate": 1.7523192471793598e-05, "loss": 0.4331, "step": 4016 }, { "epoch": 0.4888348037724369, "grad_norm": 2.9909980297088623, "learning_rate": 1.7521907213973243e-05, "loss": 0.3904, "step": 4017 }, { "epoch": 0.4889564952844539, "grad_norm": 3.2087669372558594, "learning_rate": 1.752062166992875e-05, "loss": 0.5608, "step": 4018 }, { "epoch": 0.48907818679647097, "grad_norm": 1.2506738901138306, "learning_rate": 1.7519335839709035e-05, "loss": 0.4497, "step": 4019 }, { "epoch": 0.489199878308488, "grad_norm": 0.967567503452301, "learning_rate": 1.751804972336303e-05, "loss": 0.4856, "step": 4020 }, { "epoch": 0.489321569820505, "grad_norm": 0.9390907287597656, "learning_rate": 1.751676332093967e-05, "loss": 0.4632, "step": 4021 }, { "epoch": 0.4894432613325221, "grad_norm": 0.8339318037033081, "learning_rate": 1.7515476632487907e-05, "loss": 0.4618, "step": 4022 }, { "epoch": 0.4895649528445391, "grad_norm": 1.5600872039794922, "learning_rate": 1.75141896580567e-05, "loss": 0.448, "step": 4023 }, { "epoch": 0.4896866443565561, "grad_norm": 0.7890185117721558, "learning_rate": 1.7512902397695025e-05, "loss": 0.4597, "step": 4024 }, { "epoch": 0.48980833586857314, "grad_norm": 2.2592806816101074, "learning_rate": 1.7511614851451862e-05, "loss": 0.4719, "step": 4025 }, { "epoch": 0.4899300273805902, "grad_norm": 2.5629689693450928, "learning_rate": 1.7510327019376205e-05, "loss": 0.442, "step": 4026 }, { "epoch": 0.49005171889260724, "grad_norm": 4.361266136169434, "learning_rate": 1.7509038901517063e-05, "loss": 0.4591, "step": 4027 }, { "epoch": 0.49017341040462425, "grad_norm": 3.8432695865631104, "learning_rate": 1.7507750497923444e-05, "loss": 0.562, "step": 4028 }, { "epoch": 0.49029510191664133, "grad_norm": 1.3878878355026245, "learning_rate": 1.750646180864438e-05, "loss": 0.4691, "step": 4029 }, { "epoch": 0.49041679342865835, "grad_norm": 2.3850536346435547, "learning_rate": 1.7505172833728905e-05, "loss": 0.4728, "step": 4030 }, { "epoch": 0.49053848494067537, "grad_norm": 3.909846067428589, "learning_rate": 1.750388357322607e-05, "loss": 0.3717, "step": 4031 }, { "epoch": 0.49066017645269244, "grad_norm": 0.9928010702133179, "learning_rate": 1.7502594027184937e-05, "loss": 0.4621, "step": 4032 }, { "epoch": 0.49078186796470946, "grad_norm": 2.579378128051758, "learning_rate": 1.7501304195654564e-05, "loss": 0.4829, "step": 4033 }, { "epoch": 0.4909035594767265, "grad_norm": 4.5460686683654785, "learning_rate": 1.7500014078684045e-05, "loss": 0.5258, "step": 4034 }, { "epoch": 0.49102525098874356, "grad_norm": 0.8646695613861084, "learning_rate": 1.7498723676322464e-05, "loss": 0.4579, "step": 4035 }, { "epoch": 0.4911469425007606, "grad_norm": 2.715090751647949, "learning_rate": 1.7497432988618926e-05, "loss": 0.5009, "step": 4036 }, { "epoch": 0.4912686340127776, "grad_norm": 1.2365626096725464, "learning_rate": 1.7496142015622545e-05, "loss": 0.4652, "step": 4037 }, { "epoch": 0.49139032552479467, "grad_norm": 3.33669376373291, "learning_rate": 1.7494850757382442e-05, "loss": 0.4254, "step": 4038 }, { "epoch": 0.4915120170368117, "grad_norm": 1.378563642501831, "learning_rate": 1.7493559213947755e-05, "loss": 0.5104, "step": 4039 }, { "epoch": 0.4916337085488287, "grad_norm": 1.6664854288101196, "learning_rate": 1.749226738536763e-05, "loss": 0.4294, "step": 4040 }, { "epoch": 0.4917554000608458, "grad_norm": 0.942799985408783, "learning_rate": 1.7490975271691223e-05, "loss": 0.5178, "step": 4041 }, { "epoch": 0.4918770915728628, "grad_norm": 3.033684730529785, "learning_rate": 1.74896828729677e-05, "loss": 0.4936, "step": 4042 }, { "epoch": 0.4919987830848798, "grad_norm": 2.8540375232696533, "learning_rate": 1.7488390189246242e-05, "loss": 0.5073, "step": 4043 }, { "epoch": 0.49212047459689684, "grad_norm": 2.979496717453003, "learning_rate": 1.7487097220576035e-05, "loss": 0.5174, "step": 4044 }, { "epoch": 0.4922421661089139, "grad_norm": 2.3568081855773926, "learning_rate": 1.7485803967006287e-05, "loss": 0.5338, "step": 4045 }, { "epoch": 0.49236385762093093, "grad_norm": 1.4154558181762695, "learning_rate": 1.7484510428586195e-05, "loss": 0.4959, "step": 4046 }, { "epoch": 0.49248554913294795, "grad_norm": 2.261134624481201, "learning_rate": 1.7483216605364997e-05, "loss": 0.4787, "step": 4047 }, { "epoch": 0.492607240644965, "grad_norm": 1.465767741203308, "learning_rate": 1.7481922497391912e-05, "loss": 0.4968, "step": 4048 }, { "epoch": 0.49272893215698205, "grad_norm": 2.4370086193084717, "learning_rate": 1.7480628104716193e-05, "loss": 0.4608, "step": 4049 }, { "epoch": 0.49285062366899907, "grad_norm": 2.0654404163360596, "learning_rate": 1.747933342738709e-05, "loss": 0.5361, "step": 4050 }, { "epoch": 0.49297231518101614, "grad_norm": 2.6077675819396973, "learning_rate": 1.7478038465453866e-05, "loss": 0.4227, "step": 4051 }, { "epoch": 0.49309400669303316, "grad_norm": 0.9016644954681396, "learning_rate": 1.7476743218965802e-05, "loss": 0.4744, "step": 4052 }, { "epoch": 0.4932156982050502, "grad_norm": 2.60406756401062, "learning_rate": 1.747544768797218e-05, "loss": 0.5289, "step": 4053 }, { "epoch": 0.49333738971706725, "grad_norm": 2.225156545639038, "learning_rate": 1.7474151872522305e-05, "loss": 0.5364, "step": 4054 }, { "epoch": 0.4934590812290843, "grad_norm": 1.802567958831787, "learning_rate": 1.7472855772665477e-05, "loss": 0.4567, "step": 4055 }, { "epoch": 0.4935807727411013, "grad_norm": 0.8455933928489685, "learning_rate": 1.747155938845102e-05, "loss": 0.4737, "step": 4056 }, { "epoch": 0.49370246425311837, "grad_norm": 2.3552300930023193, "learning_rate": 1.747026271992826e-05, "loss": 0.4232, "step": 4057 }, { "epoch": 0.4938241557651354, "grad_norm": 4.302911281585693, "learning_rate": 1.7468965767146545e-05, "loss": 0.5367, "step": 4058 }, { "epoch": 0.4939458472771524, "grad_norm": 1.4347680807113647, "learning_rate": 1.7467668530155223e-05, "loss": 0.4538, "step": 4059 }, { "epoch": 0.4940675387891695, "grad_norm": 2.1260783672332764, "learning_rate": 1.7466371009003652e-05, "loss": 0.5289, "step": 4060 }, { "epoch": 0.4941892303011865, "grad_norm": 1.401283860206604, "learning_rate": 1.7465073203741215e-05, "loss": 0.5127, "step": 4061 }, { "epoch": 0.4943109218132035, "grad_norm": 2.4197702407836914, "learning_rate": 1.7463775114417284e-05, "loss": 0.4605, "step": 4062 }, { "epoch": 0.4944326133252206, "grad_norm": 4.5010666847229, "learning_rate": 1.7462476741081267e-05, "loss": 0.4089, "step": 4063 }, { "epoch": 0.4945543048372376, "grad_norm": 1.8668774366378784, "learning_rate": 1.746117808378256e-05, "loss": 0.5301, "step": 4064 }, { "epoch": 0.49467599634925463, "grad_norm": 3.9548213481903076, "learning_rate": 1.7459879142570585e-05, "loss": 0.4425, "step": 4065 }, { "epoch": 0.49479768786127165, "grad_norm": 3.71551251411438, "learning_rate": 1.7458579917494767e-05, "loss": 0.4331, "step": 4066 }, { "epoch": 0.4949193793732887, "grad_norm": 0.6080107688903809, "learning_rate": 1.7457280408604545e-05, "loss": 0.4848, "step": 4067 }, { "epoch": 0.49504107088530575, "grad_norm": 0.9849717020988464, "learning_rate": 1.7455980615949365e-05, "loss": 0.5123, "step": 4068 }, { "epoch": 0.49516276239732276, "grad_norm": 1.4397056102752686, "learning_rate": 1.7454680539578693e-05, "loss": 0.4668, "step": 4069 }, { "epoch": 0.49528445390933984, "grad_norm": 2.6759631633758545, "learning_rate": 1.7453380179541996e-05, "loss": 0.45, "step": 4070 }, { "epoch": 0.49540614542135686, "grad_norm": 2.5754477977752686, "learning_rate": 1.7452079535888755e-05, "loss": 0.5208, "step": 4071 }, { "epoch": 0.4955278369333739, "grad_norm": 1.1573103666305542, "learning_rate": 1.745077860866846e-05, "loss": 0.4589, "step": 4072 }, { "epoch": 0.49564952844539095, "grad_norm": 1.0905152559280396, "learning_rate": 1.744947739793062e-05, "loss": 0.4502, "step": 4073 }, { "epoch": 0.49577121995740797, "grad_norm": 0.6032271981239319, "learning_rate": 1.744817590372474e-05, "loss": 0.4309, "step": 4074 }, { "epoch": 0.495892911469425, "grad_norm": 2.3655929565429688, "learning_rate": 1.7446874126100356e-05, "loss": 0.4804, "step": 4075 }, { "epoch": 0.49601460298144207, "grad_norm": 2.5027287006378174, "learning_rate": 1.7445572065106996e-05, "loss": 0.4181, "step": 4076 }, { "epoch": 0.4961362944934591, "grad_norm": 0.9157415628433228, "learning_rate": 1.7444269720794206e-05, "loss": 0.5058, "step": 4077 }, { "epoch": 0.4962579860054761, "grad_norm": 1.4561556577682495, "learning_rate": 1.7442967093211546e-05, "loss": 0.5247, "step": 4078 }, { "epoch": 0.4963796775174932, "grad_norm": 2.8743176460266113, "learning_rate": 1.744166418240858e-05, "loss": 0.4662, "step": 4079 }, { "epoch": 0.4965013690295102, "grad_norm": 0.6691282391548157, "learning_rate": 1.744036098843489e-05, "loss": 0.4882, "step": 4080 }, { "epoch": 0.4966230605415272, "grad_norm": 0.9167976379394531, "learning_rate": 1.7439057511340064e-05, "loss": 0.5171, "step": 4081 }, { "epoch": 0.4967447520535443, "grad_norm": 2.1515262126922607, "learning_rate": 1.74377537511737e-05, "loss": 0.5415, "step": 4082 }, { "epoch": 0.4968664435655613, "grad_norm": 2.418586015701294, "learning_rate": 1.743644970798541e-05, "loss": 0.4787, "step": 4083 }, { "epoch": 0.49698813507757833, "grad_norm": 1.9286082983016968, "learning_rate": 1.743514538182482e-05, "loss": 0.4931, "step": 4084 }, { "epoch": 0.49710982658959535, "grad_norm": 2.3758864402770996, "learning_rate": 1.7433840772741556e-05, "loss": 0.4512, "step": 4085 }, { "epoch": 0.4972315181016124, "grad_norm": 3.0013015270233154, "learning_rate": 1.743253588078526e-05, "loss": 0.445, "step": 4086 }, { "epoch": 0.49735320961362944, "grad_norm": 1.7188196182250977, "learning_rate": 1.7431230706005596e-05, "loss": 0.4143, "step": 4087 }, { "epoch": 0.49747490112564646, "grad_norm": 1.4521846771240234, "learning_rate": 1.7429925248452218e-05, "loss": 0.4227, "step": 4088 }, { "epoch": 0.49759659263766354, "grad_norm": 3.806412696838379, "learning_rate": 1.7428619508174804e-05, "loss": 0.4834, "step": 4089 }, { "epoch": 0.49771828414968056, "grad_norm": 6.8284525871276855, "learning_rate": 1.7427313485223045e-05, "loss": 0.5896, "step": 4090 }, { "epoch": 0.4978399756616976, "grad_norm": 4.187354564666748, "learning_rate": 1.742600717964663e-05, "loss": 0.4897, "step": 4091 }, { "epoch": 0.49796166717371465, "grad_norm": 4.489995956420898, "learning_rate": 1.7424700591495273e-05, "loss": 0.4805, "step": 4092 }, { "epoch": 0.49808335868573167, "grad_norm": 3.3616721630096436, "learning_rate": 1.742339372081869e-05, "loss": 0.4352, "step": 4093 }, { "epoch": 0.4982050501977487, "grad_norm": 1.5986047983169556, "learning_rate": 1.742208656766661e-05, "loss": 0.4272, "step": 4094 }, { "epoch": 0.49832674170976576, "grad_norm": 1.3044337034225464, "learning_rate": 1.742077913208877e-05, "loss": 0.4099, "step": 4095 }, { "epoch": 0.4984484332217828, "grad_norm": 2.6791794300079346, "learning_rate": 1.7419471414134932e-05, "loss": 0.5036, "step": 4096 }, { "epoch": 0.4985701247337998, "grad_norm": 2.7280631065368652, "learning_rate": 1.7418163413854844e-05, "loss": 0.4131, "step": 4097 }, { "epoch": 0.4986918162458169, "grad_norm": 1.076400637626648, "learning_rate": 1.7416855131298285e-05, "loss": 0.4781, "step": 4098 }, { "epoch": 0.4988135077578339, "grad_norm": 4.6368727684021, "learning_rate": 1.7415546566515034e-05, "loss": 0.4339, "step": 4099 }, { "epoch": 0.4989351992698509, "grad_norm": 3.553502082824707, "learning_rate": 1.7414237719554886e-05, "loss": 0.4217, "step": 4100 }, { "epoch": 0.499056890781868, "grad_norm": 2.262726068496704, "learning_rate": 1.741292859046765e-05, "loss": 0.4472, "step": 4101 }, { "epoch": 0.499178582293885, "grad_norm": 1.033531904220581, "learning_rate": 1.7411619179303136e-05, "loss": 0.5153, "step": 4102 }, { "epoch": 0.49930027380590203, "grad_norm": 4.11911153793335, "learning_rate": 1.741030948611117e-05, "loss": 0.377, "step": 4103 }, { "epoch": 0.49942196531791905, "grad_norm": 1.7390393018722534, "learning_rate": 1.740899951094159e-05, "loss": 0.4826, "step": 4104 }, { "epoch": 0.4995436568299361, "grad_norm": 2.917625665664673, "learning_rate": 1.7407689253844244e-05, "loss": 0.5216, "step": 4105 }, { "epoch": 0.49966534834195314, "grad_norm": 1.3471200466156006, "learning_rate": 1.7406378714868987e-05, "loss": 0.4533, "step": 4106 }, { "epoch": 0.49978703985397016, "grad_norm": 0.6451941132545471, "learning_rate": 1.7405067894065692e-05, "loss": 0.4281, "step": 4107 }, { "epoch": 0.49990873136598724, "grad_norm": 2.406010389328003, "learning_rate": 1.7403756791484232e-05, "loss": 0.4903, "step": 4108 }, { "epoch": 0.5000304228780043, "grad_norm": 2.0882816314697266, "learning_rate": 1.7402445407174502e-05, "loss": 0.4176, "step": 4109 }, { "epoch": 0.5001521143900213, "grad_norm": 1.0812381505966187, "learning_rate": 1.74011337411864e-05, "loss": 0.4613, "step": 4110 }, { "epoch": 0.5002738059020383, "grad_norm": 0.6587196588516235, "learning_rate": 1.7399821793569847e-05, "loss": 0.4721, "step": 4111 }, { "epoch": 0.5003954974140554, "grad_norm": 1.8877878189086914, "learning_rate": 1.7398509564374754e-05, "loss": 0.53, "step": 4112 }, { "epoch": 0.5005171889260724, "grad_norm": 1.9657156467437744, "learning_rate": 1.7397197053651055e-05, "loss": 0.4418, "step": 4113 }, { "epoch": 0.5006388804380895, "grad_norm": 3.436377763748169, "learning_rate": 1.73958842614487e-05, "loss": 0.4596, "step": 4114 }, { "epoch": 0.5007605719501065, "grad_norm": 0.9162855744361877, "learning_rate": 1.7394571187817642e-05, "loss": 0.5119, "step": 4115 }, { "epoch": 0.5008822634621235, "grad_norm": 1.646669626235962, "learning_rate": 1.7393257832807843e-05, "loss": 0.4449, "step": 4116 }, { "epoch": 0.5010039549741405, "grad_norm": 2.6974854469299316, "learning_rate": 1.7391944196469278e-05, "loss": 0.4252, "step": 4117 }, { "epoch": 0.5011256464861575, "grad_norm": 0.7376638650894165, "learning_rate": 1.7390630278851938e-05, "loss": 0.4669, "step": 4118 }, { "epoch": 0.5012473379981747, "grad_norm": 0.874047040939331, "learning_rate": 1.7389316080005816e-05, "loss": 0.4451, "step": 4119 }, { "epoch": 0.5013690295101917, "grad_norm": 1.197197675704956, "learning_rate": 1.7388001599980927e-05, "loss": 0.4617, "step": 4120 }, { "epoch": 0.5014907210222087, "grad_norm": 2.542201519012451, "learning_rate": 1.738668683882728e-05, "loss": 0.4511, "step": 4121 }, { "epoch": 0.5016124125342257, "grad_norm": 1.7542531490325928, "learning_rate": 1.738537179659491e-05, "loss": 0.4955, "step": 4122 }, { "epoch": 0.5017341040462427, "grad_norm": 0.7148008346557617, "learning_rate": 1.7384056473333854e-05, "loss": 0.4621, "step": 4123 }, { "epoch": 0.5018557955582598, "grad_norm": 1.160097599029541, "learning_rate": 1.738274086909417e-05, "loss": 0.4712, "step": 4124 }, { "epoch": 0.5019774870702769, "grad_norm": 3.369720697402954, "learning_rate": 1.738142498392591e-05, "loss": 0.4247, "step": 4125 }, { "epoch": 0.5020991785822939, "grad_norm": 1.2303825616836548, "learning_rate": 1.7380108817879156e-05, "loss": 0.5338, "step": 4126 }, { "epoch": 0.5022208700943109, "grad_norm": 0.8658193349838257, "learning_rate": 1.737879237100398e-05, "loss": 0.4616, "step": 4127 }, { "epoch": 0.502342561606328, "grad_norm": 1.5830113887786865, "learning_rate": 1.7377475643350484e-05, "loss": 0.5055, "step": 4128 }, { "epoch": 0.502464253118345, "grad_norm": 1.2154951095581055, "learning_rate": 1.737615863496877e-05, "loss": 0.4631, "step": 4129 }, { "epoch": 0.502585944630362, "grad_norm": 0.8518238067626953, "learning_rate": 1.737484134590895e-05, "loss": 0.4541, "step": 4130 }, { "epoch": 0.5027076361423791, "grad_norm": 3.4788103103637695, "learning_rate": 1.7373523776221154e-05, "loss": 0.5365, "step": 4131 }, { "epoch": 0.5028293276543961, "grad_norm": 2.623056411743164, "learning_rate": 1.7372205925955513e-05, "loss": 0.4676, "step": 4132 }, { "epoch": 0.5029510191664132, "grad_norm": 0.6838477253913879, "learning_rate": 1.737088779516218e-05, "loss": 0.4693, "step": 4133 }, { "epoch": 0.5030727106784302, "grad_norm": 2.127175807952881, "learning_rate": 1.7369569383891306e-05, "loss": 0.4386, "step": 4134 }, { "epoch": 0.5031944021904472, "grad_norm": 2.199289321899414, "learning_rate": 1.7368250692193066e-05, "loss": 0.4609, "step": 4135 }, { "epoch": 0.5033160937024642, "grad_norm": 3.731417417526245, "learning_rate": 1.7366931720117633e-05, "loss": 0.4096, "step": 4136 }, { "epoch": 0.5034377852144812, "grad_norm": 0.7778638005256653, "learning_rate": 1.7365612467715203e-05, "loss": 0.4376, "step": 4137 }, { "epoch": 0.5035594767264984, "grad_norm": 1.3162226676940918, "learning_rate": 1.7364292935035968e-05, "loss": 0.4524, "step": 4138 }, { "epoch": 0.5036811682385154, "grad_norm": 3.344547748565674, "learning_rate": 1.7362973122130146e-05, "loss": 0.4787, "step": 4139 }, { "epoch": 0.5038028597505324, "grad_norm": 3.077211618423462, "learning_rate": 1.7361653029047956e-05, "loss": 0.4852, "step": 4140 }, { "epoch": 0.5039245512625494, "grad_norm": 2.960585117340088, "learning_rate": 1.7360332655839635e-05, "loss": 0.4577, "step": 4141 }, { "epoch": 0.5040462427745664, "grad_norm": 4.019593715667725, "learning_rate": 1.7359012002555414e-05, "loss": 0.526, "step": 4142 }, { "epoch": 0.5041679342865835, "grad_norm": 0.6087198853492737, "learning_rate": 1.7357691069245554e-05, "loss": 0.4062, "step": 4143 }, { "epoch": 0.5042896257986006, "grad_norm": 1.8577182292938232, "learning_rate": 1.7356369855960323e-05, "loss": 0.4851, "step": 4144 }, { "epoch": 0.5044113173106176, "grad_norm": 1.0888867378234863, "learning_rate": 1.7355048362749995e-05, "loss": 0.423, "step": 4145 }, { "epoch": 0.5045330088226346, "grad_norm": 1.065039873123169, "learning_rate": 1.7353726589664847e-05, "loss": 0.4904, "step": 4146 }, { "epoch": 0.5046547003346517, "grad_norm": 2.658245086669922, "learning_rate": 1.7352404536755185e-05, "loss": 0.4414, "step": 4147 }, { "epoch": 0.5047763918466687, "grad_norm": 0.7043006420135498, "learning_rate": 1.735108220407131e-05, "loss": 0.5126, "step": 4148 }, { "epoch": 0.5048980833586857, "grad_norm": 2.154768943786621, "learning_rate": 1.7349759591663538e-05, "loss": 0.5068, "step": 4149 }, { "epoch": 0.5050197748707028, "grad_norm": 2.659702777862549, "learning_rate": 1.7348436699582204e-05, "loss": 0.453, "step": 4150 }, { "epoch": 0.5051414663827198, "grad_norm": 2.186054229736328, "learning_rate": 1.7347113527877642e-05, "loss": 0.5176, "step": 4151 }, { "epoch": 0.5052631578947369, "grad_norm": 2.088869571685791, "learning_rate": 1.7345790076600203e-05, "loss": 0.542, "step": 4152 }, { "epoch": 0.5053848494067539, "grad_norm": 3.6443979740142822, "learning_rate": 1.7344466345800245e-05, "loss": 0.457, "step": 4153 }, { "epoch": 0.5055065409187709, "grad_norm": 2.7742044925689697, "learning_rate": 1.7343142335528146e-05, "loss": 0.4461, "step": 4154 }, { "epoch": 0.5056282324307879, "grad_norm": 0.9349391460418701, "learning_rate": 1.7341818045834274e-05, "loss": 0.5243, "step": 4155 }, { "epoch": 0.5057499239428049, "grad_norm": 1.9604241847991943, "learning_rate": 1.7340493476769034e-05, "loss": 0.4679, "step": 4156 }, { "epoch": 0.5058716154548221, "grad_norm": 3.2405283451080322, "learning_rate": 1.733916862838282e-05, "loss": 0.4052, "step": 4157 }, { "epoch": 0.5059933069668391, "grad_norm": 0.9567452073097229, "learning_rate": 1.733784350072605e-05, "loss": 0.4537, "step": 4158 }, { "epoch": 0.5061149984788561, "grad_norm": 1.1237132549285889, "learning_rate": 1.7336518093849145e-05, "loss": 0.4919, "step": 4159 }, { "epoch": 0.5062366899908731, "grad_norm": 1.4495265483856201, "learning_rate": 1.7335192407802543e-05, "loss": 0.472, "step": 4160 }, { "epoch": 0.5063583815028901, "grad_norm": 0.9601306319236755, "learning_rate": 1.7333866442636688e-05, "loss": 0.4649, "step": 4161 }, { "epoch": 0.5064800730149072, "grad_norm": 0.9483425617218018, "learning_rate": 1.733254019840203e-05, "loss": 0.4897, "step": 4162 }, { "epoch": 0.5066017645269243, "grad_norm": 4.501932621002197, "learning_rate": 1.733121367514904e-05, "loss": 0.401, "step": 4163 }, { "epoch": 0.5067234560389413, "grad_norm": 0.9702973365783691, "learning_rate": 1.7329886872928198e-05, "loss": 0.4985, "step": 4164 }, { "epoch": 0.5068451475509583, "grad_norm": 0.978947639465332, "learning_rate": 1.732855979178999e-05, "loss": 0.4877, "step": 4165 }, { "epoch": 0.5069668390629753, "grad_norm": 2.289695978164673, "learning_rate": 1.7327232431784908e-05, "loss": 0.4432, "step": 4166 }, { "epoch": 0.5070885305749924, "grad_norm": 1.8299050331115723, "learning_rate": 1.732590479296347e-05, "loss": 0.4472, "step": 4167 }, { "epoch": 0.5072102220870094, "grad_norm": 0.9049208760261536, "learning_rate": 1.7324576875376185e-05, "loss": 0.4436, "step": 4168 }, { "epoch": 0.5073319135990265, "grad_norm": 0.6118660569190979, "learning_rate": 1.7323248679073597e-05, "loss": 0.4401, "step": 4169 }, { "epoch": 0.5074536051110435, "grad_norm": 1.1906383037567139, "learning_rate": 1.732192020410623e-05, "loss": 0.4581, "step": 4170 }, { "epoch": 0.5075752966230606, "grad_norm": 1.0905030965805054, "learning_rate": 1.7320591450524648e-05, "loss": 0.4124, "step": 4171 }, { "epoch": 0.5076969881350776, "grad_norm": 2.1233954429626465, "learning_rate": 1.731926241837941e-05, "loss": 0.4598, "step": 4172 }, { "epoch": 0.5078186796470946, "grad_norm": 1.2013047933578491, "learning_rate": 1.7317933107721086e-05, "loss": 0.4087, "step": 4173 }, { "epoch": 0.5079403711591116, "grad_norm": 2.3826663494110107, "learning_rate": 1.731660351860026e-05, "loss": 0.4887, "step": 4174 }, { "epoch": 0.5080620626711286, "grad_norm": 1.9369261264801025, "learning_rate": 1.7315273651067524e-05, "loss": 0.4968, "step": 4175 }, { "epoch": 0.5081837541831458, "grad_norm": 0.9575729370117188, "learning_rate": 1.7313943505173483e-05, "loss": 0.4478, "step": 4176 }, { "epoch": 0.5083054456951628, "grad_norm": 0.9614267349243164, "learning_rate": 1.7312613080968753e-05, "loss": 0.4365, "step": 4177 }, { "epoch": 0.5084271372071798, "grad_norm": 3.17458438873291, "learning_rate": 1.731128237850396e-05, "loss": 0.4386, "step": 4178 }, { "epoch": 0.5085488287191968, "grad_norm": 5.5925211906433105, "learning_rate": 1.730995139782974e-05, "loss": 0.4063, "step": 4179 }, { "epoch": 0.5086705202312138, "grad_norm": 1.0321651697158813, "learning_rate": 1.7308620138996738e-05, "loss": 0.4571, "step": 4180 }, { "epoch": 0.5087922117432309, "grad_norm": 2.9922969341278076, "learning_rate": 1.730728860205561e-05, "loss": 0.4258, "step": 4181 }, { "epoch": 0.508913903255248, "grad_norm": 2.339219093322754, "learning_rate": 1.7305956787057024e-05, "loss": 0.5205, "step": 4182 }, { "epoch": 0.509035594767265, "grad_norm": 0.655060887336731, "learning_rate": 1.7304624694051668e-05, "loss": 0.4319, "step": 4183 }, { "epoch": 0.509157286279282, "grad_norm": 0.9896650910377502, "learning_rate": 1.7303292323090213e-05, "loss": 0.4283, "step": 4184 }, { "epoch": 0.509278977791299, "grad_norm": 4.292147159576416, "learning_rate": 1.730195967422337e-05, "loss": 0.5668, "step": 4185 }, { "epoch": 0.5094006693033161, "grad_norm": 2.3417439460754395, "learning_rate": 1.7300626747501852e-05, "loss": 0.4874, "step": 4186 }, { "epoch": 0.5095223608153331, "grad_norm": 0.5930525660514832, "learning_rate": 1.7299293542976373e-05, "loss": 0.464, "step": 4187 }, { "epoch": 0.5096440523273502, "grad_norm": 2.3383800983428955, "learning_rate": 1.7297960060697666e-05, "loss": 0.5812, "step": 4188 }, { "epoch": 0.5097657438393672, "grad_norm": 1.579831838607788, "learning_rate": 1.7296626300716467e-05, "loss": 0.5273, "step": 4189 }, { "epoch": 0.5098874353513843, "grad_norm": 1.0200680494308472, "learning_rate": 1.7295292263083543e-05, "loss": 0.5626, "step": 4190 }, { "epoch": 0.5100091268634013, "grad_norm": 1.3273138999938965, "learning_rate": 1.729395794784964e-05, "loss": 0.5056, "step": 4191 }, { "epoch": 0.5101308183754183, "grad_norm": 6.612588882446289, "learning_rate": 1.7292623355065546e-05, "loss": 0.4488, "step": 4192 }, { "epoch": 0.5102525098874353, "grad_norm": 5.209959506988525, "learning_rate": 1.7291288484782037e-05, "loss": 0.4682, "step": 4193 }, { "epoch": 0.5103742013994524, "grad_norm": 2.4689810276031494, "learning_rate": 1.7289953337049907e-05, "loss": 0.4987, "step": 4194 }, { "epoch": 0.5104958929114695, "grad_norm": 3.125850200653076, "learning_rate": 1.7288617911919963e-05, "loss": 0.4687, "step": 4195 }, { "epoch": 0.5106175844234865, "grad_norm": 0.8129388093948364, "learning_rate": 1.7287282209443022e-05, "loss": 0.4901, "step": 4196 }, { "epoch": 0.5107392759355035, "grad_norm": 2.6493606567382812, "learning_rate": 1.7285946229669906e-05, "loss": 0.4648, "step": 4197 }, { "epoch": 0.5108609674475205, "grad_norm": 1.5875645875930786, "learning_rate": 1.728460997265146e-05, "loss": 0.4635, "step": 4198 }, { "epoch": 0.5109826589595375, "grad_norm": 1.5532900094985962, "learning_rate": 1.7283273438438525e-05, "loss": 0.4109, "step": 4199 }, { "epoch": 0.5111043504715546, "grad_norm": 1.8107355833053589, "learning_rate": 1.7281936627081957e-05, "loss": 0.4249, "step": 4200 }, { "epoch": 0.5112260419835717, "grad_norm": 4.1558942794799805, "learning_rate": 1.728059953863263e-05, "loss": 0.5209, "step": 4201 }, { "epoch": 0.5113477334955887, "grad_norm": 0.5414964556694031, "learning_rate": 1.727926217314142e-05, "loss": 0.3922, "step": 4202 }, { "epoch": 0.5114694250076057, "grad_norm": 0.9969562292098999, "learning_rate": 1.7277924530659218e-05, "loss": 0.4593, "step": 4203 }, { "epoch": 0.5115911165196227, "grad_norm": 3.9075279235839844, "learning_rate": 1.7276586611236923e-05, "loss": 0.5432, "step": 4204 }, { "epoch": 0.5117128080316398, "grad_norm": 2.7518815994262695, "learning_rate": 1.7275248414925443e-05, "loss": 0.5094, "step": 4205 }, { "epoch": 0.5118344995436568, "grad_norm": 1.7149337530136108, "learning_rate": 1.7273909941775705e-05, "loss": 0.5003, "step": 4206 }, { "epoch": 0.5119561910556739, "grad_norm": 1.7216975688934326, "learning_rate": 1.7272571191838636e-05, "loss": 0.4639, "step": 4207 }, { "epoch": 0.5120778825676909, "grad_norm": 1.2411049604415894, "learning_rate": 1.727123216516518e-05, "loss": 0.4718, "step": 4208 }, { "epoch": 0.512199574079708, "grad_norm": 1.085710883140564, "learning_rate": 1.726989286180629e-05, "loss": 0.4424, "step": 4209 }, { "epoch": 0.512321265591725, "grad_norm": 3.1756680011749268, "learning_rate": 1.726855328181293e-05, "loss": 0.4665, "step": 4210 }, { "epoch": 0.512442957103742, "grad_norm": 3.1350553035736084, "learning_rate": 1.7267213425236068e-05, "loss": 0.4327, "step": 4211 }, { "epoch": 0.512564648615759, "grad_norm": 0.7822600603103638, "learning_rate": 1.7265873292126698e-05, "loss": 0.4245, "step": 4212 }, { "epoch": 0.5126863401277761, "grad_norm": 1.2810771465301514, "learning_rate": 1.7264532882535806e-05, "loss": 0.4163, "step": 4213 }, { "epoch": 0.5128080316397932, "grad_norm": 1.6901302337646484, "learning_rate": 1.7263192196514404e-05, "loss": 0.474, "step": 4214 }, { "epoch": 0.5129297231518102, "grad_norm": 2.7114171981811523, "learning_rate": 1.7261851234113504e-05, "loss": 0.4763, "step": 4215 }, { "epoch": 0.5130514146638272, "grad_norm": 4.1810622215271, "learning_rate": 1.7260509995384134e-05, "loss": 0.4644, "step": 4216 }, { "epoch": 0.5131731061758442, "grad_norm": 2.705242156982422, "learning_rate": 1.725916848037733e-05, "loss": 0.4948, "step": 4217 }, { "epoch": 0.5132947976878612, "grad_norm": 1.6102042198181152, "learning_rate": 1.7257826689144136e-05, "loss": 0.4279, "step": 4218 }, { "epoch": 0.5134164891998783, "grad_norm": 3.9557557106018066, "learning_rate": 1.7256484621735617e-05, "loss": 0.5429, "step": 4219 }, { "epoch": 0.5135381807118954, "grad_norm": 3.7163450717926025, "learning_rate": 1.7255142278202837e-05, "loss": 0.6007, "step": 4220 }, { "epoch": 0.5136598722239124, "grad_norm": 1.8740004301071167, "learning_rate": 1.7253799658596876e-05, "loss": 0.4645, "step": 4221 }, { "epoch": 0.5137815637359294, "grad_norm": 1.6189954280853271, "learning_rate": 1.7252456762968824e-05, "loss": 0.4754, "step": 4222 }, { "epoch": 0.5139032552479464, "grad_norm": 2.928853988647461, "learning_rate": 1.725111359136978e-05, "loss": 0.4769, "step": 4223 }, { "epoch": 0.5140249467599635, "grad_norm": 4.390937805175781, "learning_rate": 1.7249770143850857e-05, "loss": 0.4878, "step": 4224 }, { "epoch": 0.5141466382719805, "grad_norm": 1.4352803230285645, "learning_rate": 1.724842642046317e-05, "loss": 0.4702, "step": 4225 }, { "epoch": 0.5142683297839976, "grad_norm": 0.9098310470581055, "learning_rate": 1.7247082421257857e-05, "loss": 0.5597, "step": 4226 }, { "epoch": 0.5143900212960146, "grad_norm": 1.9175128936767578, "learning_rate": 1.7245738146286057e-05, "loss": 0.5001, "step": 4227 }, { "epoch": 0.5145117128080317, "grad_norm": 0.9920680522918701, "learning_rate": 1.724439359559892e-05, "loss": 0.4591, "step": 4228 }, { "epoch": 0.5146334043200487, "grad_norm": 2.749835968017578, "learning_rate": 1.7243048769247613e-05, "loss": 0.5375, "step": 4229 }, { "epoch": 0.5147550958320657, "grad_norm": 1.11312997341156, "learning_rate": 1.7241703667283312e-05, "loss": 0.4813, "step": 4230 }, { "epoch": 0.5148767873440827, "grad_norm": 1.1330859661102295, "learning_rate": 1.7240358289757195e-05, "loss": 0.4896, "step": 4231 }, { "epoch": 0.5149984788560998, "grad_norm": 1.01093327999115, "learning_rate": 1.7239012636720455e-05, "loss": 0.4682, "step": 4232 }, { "epoch": 0.5151201703681169, "grad_norm": 2.669506311416626, "learning_rate": 1.7237666708224305e-05, "loss": 0.5285, "step": 4233 }, { "epoch": 0.5152418618801339, "grad_norm": 1.8913952112197876, "learning_rate": 1.723632050431995e-05, "loss": 0.4625, "step": 4234 }, { "epoch": 0.5153635533921509, "grad_norm": 3.2770209312438965, "learning_rate": 1.7234974025058625e-05, "loss": 0.4378, "step": 4235 }, { "epoch": 0.5154852449041679, "grad_norm": 2.3642210960388184, "learning_rate": 1.7233627270491563e-05, "loss": 0.4806, "step": 4236 }, { "epoch": 0.5156069364161849, "grad_norm": 0.9507365226745605, "learning_rate": 1.723228024067001e-05, "loss": 0.4633, "step": 4237 }, { "epoch": 0.515728627928202, "grad_norm": 0.725978672504425, "learning_rate": 1.723093293564522e-05, "loss": 0.5193, "step": 4238 }, { "epoch": 0.5158503194402191, "grad_norm": 1.2293250560760498, "learning_rate": 1.7229585355468473e-05, "loss": 0.4787, "step": 4239 }, { "epoch": 0.5159720109522361, "grad_norm": 1.822168231010437, "learning_rate": 1.722823750019103e-05, "loss": 0.5467, "step": 4240 }, { "epoch": 0.5160937024642531, "grad_norm": 1.5222446918487549, "learning_rate": 1.7226889369864196e-05, "loss": 0.5023, "step": 4241 }, { "epoch": 0.5162153939762701, "grad_norm": 1.0482224225997925, "learning_rate": 1.722554096453926e-05, "loss": 0.4847, "step": 4242 }, { "epoch": 0.5163370854882872, "grad_norm": 1.706272840499878, "learning_rate": 1.7224192284267532e-05, "loss": 0.5126, "step": 4243 }, { "epoch": 0.5164587770003042, "grad_norm": 2.338200330734253, "learning_rate": 1.7222843329100335e-05, "loss": 0.512, "step": 4244 }, { "epoch": 0.5165804685123213, "grad_norm": 1.325298547744751, "learning_rate": 1.7221494099089e-05, "loss": 0.4755, "step": 4245 }, { "epoch": 0.5167021600243383, "grad_norm": 1.9629168510437012, "learning_rate": 1.7220144594284867e-05, "loss": 0.443, "step": 4246 }, { "epoch": 0.5168238515363553, "grad_norm": 1.20656156539917, "learning_rate": 1.7218794814739286e-05, "loss": 0.508, "step": 4247 }, { "epoch": 0.5169455430483724, "grad_norm": 2.1021134853363037, "learning_rate": 1.721744476050362e-05, "loss": 0.4782, "step": 4248 }, { "epoch": 0.5170672345603894, "grad_norm": 1.4776021242141724, "learning_rate": 1.7216094431629243e-05, "loss": 0.4677, "step": 4249 }, { "epoch": 0.5171889260724064, "grad_norm": 3.4578335285186768, "learning_rate": 1.721474382816754e-05, "loss": 0.4005, "step": 4250 }, { "epoch": 0.5173106175844235, "grad_norm": 0.9584029912948608, "learning_rate": 1.7213392950169893e-05, "loss": 0.4322, "step": 4251 }, { "epoch": 0.5174323090964406, "grad_norm": 3.120654821395874, "learning_rate": 1.7212041797687718e-05, "loss": 0.5056, "step": 4252 }, { "epoch": 0.5175540006084576, "grad_norm": 1.416257381439209, "learning_rate": 1.7210690370772424e-05, "loss": 0.4517, "step": 4253 }, { "epoch": 0.5176756921204746, "grad_norm": 2.2621262073516846, "learning_rate": 1.7209338669475436e-05, "loss": 0.4912, "step": 4254 }, { "epoch": 0.5177973836324916, "grad_norm": 0.9535350799560547, "learning_rate": 1.7207986693848188e-05, "loss": 0.3897, "step": 4255 }, { "epoch": 0.5179190751445086, "grad_norm": 2.8512415885925293, "learning_rate": 1.7206634443942126e-05, "loss": 0.4744, "step": 4256 }, { "epoch": 0.5180407666565257, "grad_norm": 0.6961869597434998, "learning_rate": 1.7205281919808708e-05, "loss": 0.4297, "step": 4257 }, { "epoch": 0.5181624581685428, "grad_norm": 0.9827209115028381, "learning_rate": 1.7203929121499398e-05, "loss": 0.4442, "step": 4258 }, { "epoch": 0.5182841496805598, "grad_norm": 2.678150177001953, "learning_rate": 1.7202576049065672e-05, "loss": 0.5111, "step": 4259 }, { "epoch": 0.5184058411925768, "grad_norm": 2.8638734817504883, "learning_rate": 1.7201222702559024e-05, "loss": 0.4201, "step": 4260 }, { "epoch": 0.5185275327045938, "grad_norm": 2.1369612216949463, "learning_rate": 1.7199869082030943e-05, "loss": 0.5096, "step": 4261 }, { "epoch": 0.5186492242166109, "grad_norm": 1.1570193767547607, "learning_rate": 1.719851518753294e-05, "loss": 0.4641, "step": 4262 }, { "epoch": 0.5187709157286279, "grad_norm": 1.9469950199127197, "learning_rate": 1.7197161019116536e-05, "loss": 0.482, "step": 4263 }, { "epoch": 0.518892607240645, "grad_norm": 0.6886633038520813, "learning_rate": 1.7195806576833258e-05, "loss": 0.4471, "step": 4264 }, { "epoch": 0.519014298752662, "grad_norm": 0.8435122966766357, "learning_rate": 1.7194451860734642e-05, "loss": 0.4484, "step": 4265 }, { "epoch": 0.519135990264679, "grad_norm": 2.783785820007324, "learning_rate": 1.7193096870872245e-05, "loss": 0.5064, "step": 4266 }, { "epoch": 0.5192576817766961, "grad_norm": 3.0867910385131836, "learning_rate": 1.7191741607297618e-05, "loss": 0.551, "step": 4267 }, { "epoch": 0.5193793732887131, "grad_norm": 0.8378987312316895, "learning_rate": 1.7190386070062343e-05, "loss": 0.4494, "step": 4268 }, { "epoch": 0.5195010648007301, "grad_norm": 1.8553073406219482, "learning_rate": 1.718903025921799e-05, "loss": 0.5132, "step": 4269 }, { "epoch": 0.5196227563127472, "grad_norm": 2.2718989849090576, "learning_rate": 1.718767417481616e-05, "loss": 0.5392, "step": 4270 }, { "epoch": 0.5197444478247643, "grad_norm": 0.8725932240486145, "learning_rate": 1.7186317816908446e-05, "loss": 0.5039, "step": 4271 }, { "epoch": 0.5198661393367813, "grad_norm": 1.375636339187622, "learning_rate": 1.7184961185546462e-05, "loss": 0.4961, "step": 4272 }, { "epoch": 0.5199878308487983, "grad_norm": 0.8475624918937683, "learning_rate": 1.7183604280781837e-05, "loss": 0.4866, "step": 4273 }, { "epoch": 0.5201095223608153, "grad_norm": 3.4090487957000732, "learning_rate": 1.7182247102666202e-05, "loss": 0.4834, "step": 4274 }, { "epoch": 0.5202312138728323, "grad_norm": 4.4174323081970215, "learning_rate": 1.7180889651251195e-05, "loss": 0.4856, "step": 4275 }, { "epoch": 0.5203529053848494, "grad_norm": 3.009592294692993, "learning_rate": 1.7179531926588472e-05, "loss": 0.5154, "step": 4276 }, { "epoch": 0.5204745968968665, "grad_norm": 3.412261486053467, "learning_rate": 1.71781739287297e-05, "loss": 0.4374, "step": 4277 }, { "epoch": 0.5205962884088835, "grad_norm": 1.509874701499939, "learning_rate": 1.717681565772655e-05, "loss": 0.4908, "step": 4278 }, { "epoch": 0.5207179799209005, "grad_norm": 4.0937299728393555, "learning_rate": 1.717545711363071e-05, "loss": 0.4304, "step": 4279 }, { "epoch": 0.5208396714329175, "grad_norm": 1.2221148014068604, "learning_rate": 1.7174098296493875e-05, "loss": 0.4548, "step": 4280 }, { "epoch": 0.5209613629449346, "grad_norm": 3.9335718154907227, "learning_rate": 1.717273920636775e-05, "loss": 0.5566, "step": 4281 }, { "epoch": 0.5210830544569516, "grad_norm": 1.5230729579925537, "learning_rate": 1.7171379843304053e-05, "loss": 0.3334, "step": 4282 }, { "epoch": 0.5212047459689687, "grad_norm": 5.134873390197754, "learning_rate": 1.7170020207354507e-05, "loss": 0.5461, "step": 4283 }, { "epoch": 0.5213264374809857, "grad_norm": 2.6143572330474854, "learning_rate": 1.7168660298570855e-05, "loss": 0.4184, "step": 4284 }, { "epoch": 0.5214481289930027, "grad_norm": 4.081435203552246, "learning_rate": 1.7167300117004836e-05, "loss": 0.4742, "step": 4285 }, { "epoch": 0.5215698205050198, "grad_norm": 4.400676250457764, "learning_rate": 1.7165939662708215e-05, "loss": 0.4768, "step": 4286 }, { "epoch": 0.5216915120170368, "grad_norm": 5.75072717666626, "learning_rate": 1.716457893573275e-05, "loss": 0.5651, "step": 4287 }, { "epoch": 0.5218132035290538, "grad_norm": 2.7458395957946777, "learning_rate": 1.7163217936130237e-05, "loss": 0.496, "step": 4288 }, { "epoch": 0.5219348950410709, "grad_norm": 1.4024596214294434, "learning_rate": 1.7161856663952446e-05, "loss": 0.4286, "step": 4289 }, { "epoch": 0.522056586553088, "grad_norm": 1.2655744552612305, "learning_rate": 1.716049511925119e-05, "loss": 0.4469, "step": 4290 }, { "epoch": 0.522178278065105, "grad_norm": 1.254462480545044, "learning_rate": 1.715913330207827e-05, "loss": 0.3975, "step": 4291 }, { "epoch": 0.522299969577122, "grad_norm": 5.87849235534668, "learning_rate": 1.7157771212485514e-05, "loss": 0.4475, "step": 4292 }, { "epoch": 0.522421661089139, "grad_norm": 1.213152289390564, "learning_rate": 1.715640885052474e-05, "loss": 0.5433, "step": 4293 }, { "epoch": 0.522543352601156, "grad_norm": 1.4798842668533325, "learning_rate": 1.7155046216247803e-05, "loss": 0.4764, "step": 4294 }, { "epoch": 0.5226650441131732, "grad_norm": 2.092900276184082, "learning_rate": 1.7153683309706547e-05, "loss": 0.5102, "step": 4295 }, { "epoch": 0.5227867356251902, "grad_norm": 2.112396001815796, "learning_rate": 1.715232013095283e-05, "loss": 0.4268, "step": 4296 }, { "epoch": 0.5229084271372072, "grad_norm": 0.8308590650558472, "learning_rate": 1.715095668003853e-05, "loss": 0.446, "step": 4297 }, { "epoch": 0.5230301186492242, "grad_norm": 1.8258624076843262, "learning_rate": 1.7149592957015528e-05, "loss": 0.4633, "step": 4298 }, { "epoch": 0.5231518101612412, "grad_norm": 2.094789505004883, "learning_rate": 1.714822896193571e-05, "loss": 0.4796, "step": 4299 }, { "epoch": 0.5232735016732583, "grad_norm": 2.63779878616333, "learning_rate": 1.7146864694850988e-05, "loss": 0.5265, "step": 4300 }, { "epoch": 0.5233951931852753, "grad_norm": 0.6422003507614136, "learning_rate": 1.714550015581327e-05, "loss": 0.4783, "step": 4301 }, { "epoch": 0.5235168846972924, "grad_norm": 2.620772123336792, "learning_rate": 1.7144135344874482e-05, "loss": 0.4379, "step": 4302 }, { "epoch": 0.5236385762093094, "grad_norm": 0.6759090423583984, "learning_rate": 1.7142770262086555e-05, "loss": 0.4859, "step": 4303 }, { "epoch": 0.5237602677213264, "grad_norm": 0.8766072988510132, "learning_rate": 1.7141404907501433e-05, "loss": 0.445, "step": 4304 }, { "epoch": 0.5238819592333435, "grad_norm": 3.3468663692474365, "learning_rate": 1.7140039281171078e-05, "loss": 0.469, "step": 4305 }, { "epoch": 0.5240036507453605, "grad_norm": 1.918367862701416, "learning_rate": 1.7138673383147444e-05, "loss": 0.4344, "step": 4306 }, { "epoch": 0.5241253422573775, "grad_norm": 0.7115817070007324, "learning_rate": 1.7137307213482513e-05, "loss": 0.4826, "step": 4307 }, { "epoch": 0.5242470337693946, "grad_norm": 0.6736893653869629, "learning_rate": 1.713594077222827e-05, "loss": 0.4772, "step": 4308 }, { "epoch": 0.5243687252814117, "grad_norm": 1.109004259109497, "learning_rate": 1.7134574059436708e-05, "loss": 0.485, "step": 4309 }, { "epoch": 0.5244904167934287, "grad_norm": 0.5608221888542175, "learning_rate": 1.713320707515984e-05, "loss": 0.4639, "step": 4310 }, { "epoch": 0.5246121083054457, "grad_norm": 3.7170917987823486, "learning_rate": 1.7131839819449673e-05, "loss": 0.5261, "step": 4311 }, { "epoch": 0.5247337998174627, "grad_norm": 0.5669408440589905, "learning_rate": 1.713047229235824e-05, "loss": 0.4629, "step": 4312 }, { "epoch": 0.5248554913294797, "grad_norm": 1.5024335384368896, "learning_rate": 1.712910449393758e-05, "loss": 0.3937, "step": 4313 }, { "epoch": 0.5249771828414969, "grad_norm": 1.9744325876235962, "learning_rate": 1.7127736424239735e-05, "loss": 0.5048, "step": 4314 }, { "epoch": 0.5250988743535139, "grad_norm": 0.6904813647270203, "learning_rate": 1.712636808331676e-05, "loss": 0.4498, "step": 4315 }, { "epoch": 0.5252205658655309, "grad_norm": 2.075857162475586, "learning_rate": 1.712499947122074e-05, "loss": 0.5032, "step": 4316 }, { "epoch": 0.5253422573775479, "grad_norm": 1.9914791584014893, "learning_rate": 1.7123630588003735e-05, "loss": 0.5115, "step": 4317 }, { "epoch": 0.5254639488895649, "grad_norm": 2.0719830989837646, "learning_rate": 1.712226143371784e-05, "loss": 0.4369, "step": 4318 }, { "epoch": 0.525585640401582, "grad_norm": 2.386539936065674, "learning_rate": 1.7120892008415156e-05, "loss": 0.4711, "step": 4319 }, { "epoch": 0.525707331913599, "grad_norm": 0.9945567846298218, "learning_rate": 1.7119522312147797e-05, "loss": 0.4932, "step": 4320 }, { "epoch": 0.5258290234256161, "grad_norm": 0.8807507157325745, "learning_rate": 1.7118152344967873e-05, "loss": 0.4962, "step": 4321 }, { "epoch": 0.5259507149376331, "grad_norm": 1.061413288116455, "learning_rate": 1.7116782106927517e-05, "loss": 0.4946, "step": 4322 }, { "epoch": 0.5260724064496501, "grad_norm": 3.4664292335510254, "learning_rate": 1.7115411598078874e-05, "loss": 0.4208, "step": 4323 }, { "epoch": 0.5261940979616672, "grad_norm": 2.399772882461548, "learning_rate": 1.7114040818474095e-05, "loss": 0.4239, "step": 4324 }, { "epoch": 0.5263157894736842, "grad_norm": 1.0668370723724365, "learning_rate": 1.711266976816533e-05, "loss": 0.5125, "step": 4325 }, { "epoch": 0.5264374809857012, "grad_norm": 0.8144875764846802, "learning_rate": 1.711129844720476e-05, "loss": 0.4769, "step": 4326 }, { "epoch": 0.5265591724977183, "grad_norm": 0.6472660899162292, "learning_rate": 1.7109926855644567e-05, "loss": 0.4858, "step": 4327 }, { "epoch": 0.5266808640097353, "grad_norm": 2.5389180183410645, "learning_rate": 1.710855499353694e-05, "loss": 0.4952, "step": 4328 }, { "epoch": 0.5268025555217524, "grad_norm": 0.6194647550582886, "learning_rate": 1.710718286093408e-05, "loss": 0.4715, "step": 4329 }, { "epoch": 0.5269242470337694, "grad_norm": 1.4323644638061523, "learning_rate": 1.7105810457888202e-05, "loss": 0.4727, "step": 4330 }, { "epoch": 0.5270459385457864, "grad_norm": 1.7734172344207764, "learning_rate": 1.7104437784451527e-05, "loss": 0.4946, "step": 4331 }, { "epoch": 0.5271676300578034, "grad_norm": 1.1468697786331177, "learning_rate": 1.7103064840676287e-05, "loss": 0.4526, "step": 4332 }, { "epoch": 0.5272893215698206, "grad_norm": 2.776639461517334, "learning_rate": 1.7101691626614735e-05, "loss": 0.4451, "step": 4333 }, { "epoch": 0.5274110130818376, "grad_norm": 1.2381566762924194, "learning_rate": 1.710031814231911e-05, "loss": 0.4351, "step": 4334 }, { "epoch": 0.5275327045938546, "grad_norm": 1.198959231376648, "learning_rate": 1.7098944387841686e-05, "loss": 0.4433, "step": 4335 }, { "epoch": 0.5276543961058716, "grad_norm": 1.1127218008041382, "learning_rate": 1.709757036323473e-05, "loss": 0.4534, "step": 4336 }, { "epoch": 0.5277760876178886, "grad_norm": 2.3528897762298584, "learning_rate": 1.7096196068550535e-05, "loss": 0.5155, "step": 4337 }, { "epoch": 0.5278977791299057, "grad_norm": 1.0404629707336426, "learning_rate": 1.709482150384139e-05, "loss": 0.49, "step": 4338 }, { "epoch": 0.5280194706419227, "grad_norm": 0.5362629294395447, "learning_rate": 1.70934466691596e-05, "loss": 0.4199, "step": 4339 }, { "epoch": 0.5281411621539398, "grad_norm": 1.6750553846359253, "learning_rate": 1.709207156455748e-05, "loss": 0.4848, "step": 4340 }, { "epoch": 0.5282628536659568, "grad_norm": 3.2698941230773926, "learning_rate": 1.7090696190087358e-05, "loss": 0.5596, "step": 4341 }, { "epoch": 0.5283845451779738, "grad_norm": 1.4459127187728882, "learning_rate": 1.7089320545801574e-05, "loss": 0.4529, "step": 4342 }, { "epoch": 0.5285062366899909, "grad_norm": 1.2845115661621094, "learning_rate": 1.7087944631752466e-05, "loss": 0.4779, "step": 4343 }, { "epoch": 0.5286279282020079, "grad_norm": 0.7503172755241394, "learning_rate": 1.708656844799239e-05, "loss": 0.5371, "step": 4344 }, { "epoch": 0.5287496197140249, "grad_norm": 2.6030914783477783, "learning_rate": 1.708519199457372e-05, "loss": 0.4416, "step": 4345 }, { "epoch": 0.528871311226042, "grad_norm": 2.975177526473999, "learning_rate": 1.708381527154883e-05, "loss": 0.5184, "step": 4346 }, { "epoch": 0.528993002738059, "grad_norm": 1.4393500089645386, "learning_rate": 1.7082438278970103e-05, "loss": 0.4357, "step": 4347 }, { "epoch": 0.5291146942500761, "grad_norm": 1.8966056108474731, "learning_rate": 1.708106101688994e-05, "loss": 0.4358, "step": 4348 }, { "epoch": 0.5292363857620931, "grad_norm": 2.0543057918548584, "learning_rate": 1.7079683485360753e-05, "loss": 0.5132, "step": 4349 }, { "epoch": 0.5293580772741101, "grad_norm": 3.377936363220215, "learning_rate": 1.707830568443495e-05, "loss": 0.5183, "step": 4350 }, { "epoch": 0.5294797687861271, "grad_norm": 0.7073395848274231, "learning_rate": 1.707692761416497e-05, "loss": 0.4372, "step": 4351 }, { "epoch": 0.5296014602981443, "grad_norm": 0.6043311953544617, "learning_rate": 1.707554927460324e-05, "loss": 0.4346, "step": 4352 }, { "epoch": 0.5297231518101613, "grad_norm": 4.110519886016846, "learning_rate": 1.7074170665802213e-05, "loss": 0.5369, "step": 4353 }, { "epoch": 0.5298448433221783, "grad_norm": 0.6491762399673462, "learning_rate": 1.707279178781436e-05, "loss": 0.4788, "step": 4354 }, { "epoch": 0.5299665348341953, "grad_norm": 0.9395544528961182, "learning_rate": 1.707141264069213e-05, "loss": 0.4782, "step": 4355 }, { "epoch": 0.5300882263462123, "grad_norm": 2.036043882369995, "learning_rate": 1.7070033224488015e-05, "loss": 0.4958, "step": 4356 }, { "epoch": 0.5302099178582294, "grad_norm": 0.676498293876648, "learning_rate": 1.70686535392545e-05, "loss": 0.5, "step": 4357 }, { "epoch": 0.5303316093702464, "grad_norm": 3.6512396335601807, "learning_rate": 1.706727358504409e-05, "loss": 0.427, "step": 4358 }, { "epoch": 0.5304533008822635, "grad_norm": 2.562626600265503, "learning_rate": 1.7065893361909287e-05, "loss": 0.4477, "step": 4359 }, { "epoch": 0.5305749923942805, "grad_norm": 0.6620128154754639, "learning_rate": 1.706451286990262e-05, "loss": 0.5259, "step": 4360 }, { "epoch": 0.5306966839062975, "grad_norm": 1.16592538356781, "learning_rate": 1.7063132109076614e-05, "loss": 0.4656, "step": 4361 }, { "epoch": 0.5308183754183146, "grad_norm": 2.391920566558838, "learning_rate": 1.706175107948381e-05, "loss": 0.4522, "step": 4362 }, { "epoch": 0.5309400669303316, "grad_norm": 2.8118526935577393, "learning_rate": 1.706036978117676e-05, "loss": 0.3875, "step": 4363 }, { "epoch": 0.5310617584423486, "grad_norm": 4.067058563232422, "learning_rate": 1.705898821420803e-05, "loss": 0.5152, "step": 4364 }, { "epoch": 0.5311834499543657, "grad_norm": 0.6446700096130371, "learning_rate": 1.7057606378630186e-05, "loss": 0.4365, "step": 4365 }, { "epoch": 0.5313051414663827, "grad_norm": 3.115743637084961, "learning_rate": 1.7056224274495806e-05, "loss": 0.5132, "step": 4366 }, { "epoch": 0.5314268329783998, "grad_norm": 1.6996607780456543, "learning_rate": 1.7054841901857494e-05, "loss": 0.4611, "step": 4367 }, { "epoch": 0.5315485244904168, "grad_norm": 4.770429611206055, "learning_rate": 1.7053459260767837e-05, "loss": 0.5108, "step": 4368 }, { "epoch": 0.5316702160024338, "grad_norm": 1.3477798700332642, "learning_rate": 1.705207635127946e-05, "loss": 0.464, "step": 4369 }, { "epoch": 0.5317919075144508, "grad_norm": 1.077989101409912, "learning_rate": 1.7050693173444982e-05, "loss": 0.4584, "step": 4370 }, { "epoch": 0.531913599026468, "grad_norm": 3.331965446472168, "learning_rate": 1.7049309727317035e-05, "loss": 0.5296, "step": 4371 }, { "epoch": 0.532035290538485, "grad_norm": 1.5123640298843384, "learning_rate": 1.7047926012948255e-05, "loss": 0.4677, "step": 4372 }, { "epoch": 0.532156982050502, "grad_norm": 3.858304262161255, "learning_rate": 1.7046542030391307e-05, "loss": 0.4444, "step": 4373 }, { "epoch": 0.532278673562519, "grad_norm": 2.681751012802124, "learning_rate": 1.704515777969885e-05, "loss": 0.5066, "step": 4374 }, { "epoch": 0.532400365074536, "grad_norm": 4.16647481918335, "learning_rate": 1.7043773260923557e-05, "loss": 0.4516, "step": 4375 }, { "epoch": 0.532522056586553, "grad_norm": 2.4314897060394287, "learning_rate": 1.704238847411811e-05, "loss": 0.4653, "step": 4376 }, { "epoch": 0.5326437480985702, "grad_norm": 0.959051251411438, "learning_rate": 1.7041003419335204e-05, "loss": 0.4537, "step": 4377 }, { "epoch": 0.5327654396105872, "grad_norm": 2.0506279468536377, "learning_rate": 1.7039618096627545e-05, "loss": 0.4241, "step": 4378 }, { "epoch": 0.5328871311226042, "grad_norm": 0.9842852354049683, "learning_rate": 1.7038232506047844e-05, "loss": 0.4856, "step": 4379 }, { "epoch": 0.5330088226346212, "grad_norm": 1.5740323066711426, "learning_rate": 1.7036846647648827e-05, "loss": 0.4695, "step": 4380 }, { "epoch": 0.5331305141466383, "grad_norm": 3.1457581520080566, "learning_rate": 1.703546052148323e-05, "loss": 0.5303, "step": 4381 }, { "epoch": 0.5332522056586553, "grad_norm": 4.844799518585205, "learning_rate": 1.7034074127603805e-05, "loss": 0.5308, "step": 4382 }, { "epoch": 0.5333738971706723, "grad_norm": 1.412689208984375, "learning_rate": 1.7032687466063292e-05, "loss": 0.4884, "step": 4383 }, { "epoch": 0.5334955886826894, "grad_norm": 0.8175567984580994, "learning_rate": 1.7031300536914467e-05, "loss": 0.4566, "step": 4384 }, { "epoch": 0.5336172801947064, "grad_norm": 2.5051369667053223, "learning_rate": 1.70299133402101e-05, "loss": 0.5196, "step": 4385 }, { "epoch": 0.5337389717067235, "grad_norm": 4.426215648651123, "learning_rate": 1.702852587600298e-05, "loss": 0.4202, "step": 4386 }, { "epoch": 0.5338606632187405, "grad_norm": 0.9319576025009155, "learning_rate": 1.7027138144345906e-05, "loss": 0.486, "step": 4387 }, { "epoch": 0.5339823547307575, "grad_norm": 3.2823240756988525, "learning_rate": 1.702575014529168e-05, "loss": 0.4387, "step": 4388 }, { "epoch": 0.5341040462427745, "grad_norm": 2.318687915802002, "learning_rate": 1.702436187889311e-05, "loss": 0.4616, "step": 4389 }, { "epoch": 0.5342257377547917, "grad_norm": 1.5169910192489624, "learning_rate": 1.702297334520304e-05, "loss": 0.4657, "step": 4390 }, { "epoch": 0.5343474292668087, "grad_norm": 0.652877926826477, "learning_rate": 1.7021584544274294e-05, "loss": 0.4724, "step": 4391 }, { "epoch": 0.5344691207788257, "grad_norm": 0.5499361157417297, "learning_rate": 1.7020195476159724e-05, "loss": 0.4621, "step": 4392 }, { "epoch": 0.5345908122908427, "grad_norm": 3.1362850666046143, "learning_rate": 1.701880614091218e-05, "loss": 0.4163, "step": 4393 }, { "epoch": 0.5347125038028597, "grad_norm": 0.5517951846122742, "learning_rate": 1.701741653858454e-05, "loss": 0.4089, "step": 4394 }, { "epoch": 0.5348341953148767, "grad_norm": 4.176994323730469, "learning_rate": 1.701602666922967e-05, "loss": 0.5421, "step": 4395 }, { "epoch": 0.5349558868268939, "grad_norm": 2.6072075366973877, "learning_rate": 1.701463653290047e-05, "loss": 0.4728, "step": 4396 }, { "epoch": 0.5350775783389109, "grad_norm": 1.0330872535705566, "learning_rate": 1.7013246129649825e-05, "loss": 0.4258, "step": 4397 }, { "epoch": 0.5351992698509279, "grad_norm": 3.934319496154785, "learning_rate": 1.701185545953065e-05, "loss": 0.5134, "step": 4398 }, { "epoch": 0.5353209613629449, "grad_norm": 1.0257481336593628, "learning_rate": 1.7010464522595863e-05, "loss": 0.4314, "step": 4399 }, { "epoch": 0.535442652874962, "grad_norm": 3.8877146244049072, "learning_rate": 1.7009073318898386e-05, "loss": 0.5703, "step": 4400 }, { "epoch": 0.535564344386979, "grad_norm": 2.250124454498291, "learning_rate": 1.7007681848491163e-05, "loss": 0.3921, "step": 4401 }, { "epoch": 0.535686035898996, "grad_norm": 1.0167269706726074, "learning_rate": 1.700629011142714e-05, "loss": 0.4644, "step": 4402 }, { "epoch": 0.5358077274110131, "grad_norm": 0.91866135597229, "learning_rate": 1.7004898107759276e-05, "loss": 0.4841, "step": 4403 }, { "epoch": 0.5359294189230301, "grad_norm": 1.4587697982788086, "learning_rate": 1.7003505837540538e-05, "loss": 0.4953, "step": 4404 }, { "epoch": 0.5360511104350472, "grad_norm": 0.7243636250495911, "learning_rate": 1.7002113300823914e-05, "loss": 0.4664, "step": 4405 }, { "epoch": 0.5361728019470642, "grad_norm": 1.1874521970748901, "learning_rate": 1.7000720497662376e-05, "loss": 0.4971, "step": 4406 }, { "epoch": 0.5362944934590812, "grad_norm": 0.9897105097770691, "learning_rate": 1.699932742810894e-05, "loss": 0.4577, "step": 4407 }, { "epoch": 0.5364161849710982, "grad_norm": 2.2329607009887695, "learning_rate": 1.69979340922166e-05, "loss": 0.4491, "step": 4408 }, { "epoch": 0.5365378764831154, "grad_norm": 3.0158960819244385, "learning_rate": 1.6996540490038387e-05, "loss": 0.4425, "step": 4409 }, { "epoch": 0.5366595679951324, "grad_norm": 1.7176218032836914, "learning_rate": 1.6995146621627328e-05, "loss": 0.4632, "step": 4410 }, { "epoch": 0.5367812595071494, "grad_norm": 0.8636429905891418, "learning_rate": 1.699375248703646e-05, "loss": 0.474, "step": 4411 }, { "epoch": 0.5369029510191664, "grad_norm": 1.9928045272827148, "learning_rate": 1.6992358086318832e-05, "loss": 0.4679, "step": 4412 }, { "epoch": 0.5370246425311834, "grad_norm": 2.9881694316864014, "learning_rate": 1.6990963419527507e-05, "loss": 0.4901, "step": 4413 }, { "epoch": 0.5371463340432004, "grad_norm": 1.7402344942092896, "learning_rate": 1.698956848671555e-05, "loss": 0.4655, "step": 4414 }, { "epoch": 0.5372680255552176, "grad_norm": 1.499241590499878, "learning_rate": 1.6988173287936044e-05, "loss": 0.4655, "step": 4415 }, { "epoch": 0.5373897170672346, "grad_norm": 4.834335803985596, "learning_rate": 1.6986777823242087e-05, "loss": 0.5442, "step": 4416 }, { "epoch": 0.5375114085792516, "grad_norm": 0.8546738624572754, "learning_rate": 1.6985382092686766e-05, "loss": 0.434, "step": 4417 }, { "epoch": 0.5376331000912686, "grad_norm": 3.5857698917388916, "learning_rate": 1.6983986096323198e-05, "loss": 0.5334, "step": 4418 }, { "epoch": 0.5377547916032857, "grad_norm": 4.435749053955078, "learning_rate": 1.6982589834204507e-05, "loss": 0.3882, "step": 4419 }, { "epoch": 0.5378764831153027, "grad_norm": 4.089552402496338, "learning_rate": 1.6981193306383815e-05, "loss": 0.4474, "step": 4420 }, { "epoch": 0.5379981746273197, "grad_norm": 1.451317310333252, "learning_rate": 1.6979796512914268e-05, "loss": 0.4887, "step": 4421 }, { "epoch": 0.5381198661393368, "grad_norm": 3.0299057960510254, "learning_rate": 1.697839945384902e-05, "loss": 0.4518, "step": 4422 }, { "epoch": 0.5382415576513538, "grad_norm": 2.9058687686920166, "learning_rate": 1.697700212924122e-05, "loss": 0.4367, "step": 4423 }, { "epoch": 0.5383632491633709, "grad_norm": 0.6400445699691772, "learning_rate": 1.697560453914406e-05, "loss": 0.5043, "step": 4424 }, { "epoch": 0.5384849406753879, "grad_norm": 1.751715064048767, "learning_rate": 1.69742066836107e-05, "loss": 0.4979, "step": 4425 }, { "epoch": 0.5386066321874049, "grad_norm": 2.2712137699127197, "learning_rate": 1.6972808562694337e-05, "loss": 0.5098, "step": 4426 }, { "epoch": 0.5387283236994219, "grad_norm": 0.9613345861434937, "learning_rate": 1.6971410176448183e-05, "loss": 0.4499, "step": 4427 }, { "epoch": 0.538850015211439, "grad_norm": 0.5806231498718262, "learning_rate": 1.697001152492544e-05, "loss": 0.4994, "step": 4428 }, { "epoch": 0.5389717067234561, "grad_norm": 1.7357196807861328, "learning_rate": 1.6968612608179328e-05, "loss": 0.5158, "step": 4429 }, { "epoch": 0.5390933982354731, "grad_norm": 1.5228915214538574, "learning_rate": 1.6967213426263084e-05, "loss": 0.4658, "step": 4430 }, { "epoch": 0.5392150897474901, "grad_norm": 2.5115373134613037, "learning_rate": 1.6965813979229947e-05, "loss": 0.4553, "step": 4431 }, { "epoch": 0.5393367812595071, "grad_norm": 0.6168383359909058, "learning_rate": 1.696441426713317e-05, "loss": 0.4806, "step": 4432 }, { "epoch": 0.5394584727715241, "grad_norm": 1.9714595079421997, "learning_rate": 1.6963014290026014e-05, "loss": 0.4487, "step": 4433 }, { "epoch": 0.5395801642835413, "grad_norm": 0.7653484344482422, "learning_rate": 1.6961614047961755e-05, "loss": 0.4851, "step": 4434 }, { "epoch": 0.5397018557955583, "grad_norm": 1.018984317779541, "learning_rate": 1.6960213540993668e-05, "loss": 0.445, "step": 4435 }, { "epoch": 0.5398235473075753, "grad_norm": 0.766427218914032, "learning_rate": 1.695881276917505e-05, "loss": 0.4643, "step": 4436 }, { "epoch": 0.5399452388195923, "grad_norm": 2.314392566680908, "learning_rate": 1.69574117325592e-05, "loss": 0.507, "step": 4437 }, { "epoch": 0.5400669303316094, "grad_norm": 1.6304572820663452, "learning_rate": 1.6956010431199437e-05, "loss": 0.488, "step": 4438 }, { "epoch": 0.5401886218436264, "grad_norm": 1.6652350425720215, "learning_rate": 1.6954608865149075e-05, "loss": 0.4362, "step": 4439 }, { "epoch": 0.5403103133556434, "grad_norm": 0.621074914932251, "learning_rate": 1.6953207034461456e-05, "loss": 0.4909, "step": 4440 }, { "epoch": 0.5404320048676605, "grad_norm": 4.307516098022461, "learning_rate": 1.6951804939189912e-05, "loss": 0.3552, "step": 4441 }, { "epoch": 0.5405536963796775, "grad_norm": 0.6626152992248535, "learning_rate": 1.69504025793878e-05, "loss": 0.4665, "step": 4442 }, { "epoch": 0.5406753878916946, "grad_norm": 1.8344053030014038, "learning_rate": 1.6948999955108484e-05, "loss": 0.4216, "step": 4443 }, { "epoch": 0.5407970794037116, "grad_norm": 2.0432820320129395, "learning_rate": 1.6947597066405335e-05, "loss": 0.5277, "step": 4444 }, { "epoch": 0.5409187709157286, "grad_norm": 1.7651121616363525, "learning_rate": 1.6946193913331742e-05, "loss": 0.4775, "step": 4445 }, { "epoch": 0.5410404624277456, "grad_norm": 0.6606485247612, "learning_rate": 1.6944790495941094e-05, "loss": 0.451, "step": 4446 }, { "epoch": 0.5411621539397627, "grad_norm": 0.6312865018844604, "learning_rate": 1.6943386814286785e-05, "loss": 0.4736, "step": 4447 }, { "epoch": 0.5412838454517798, "grad_norm": 2.1066336631774902, "learning_rate": 1.694198286842224e-05, "loss": 0.5037, "step": 4448 }, { "epoch": 0.5414055369637968, "grad_norm": 2.693444013595581, "learning_rate": 1.6940578658400876e-05, "loss": 0.4198, "step": 4449 }, { "epoch": 0.5415272284758138, "grad_norm": 1.0548605918884277, "learning_rate": 1.693917418427613e-05, "loss": 0.4843, "step": 4450 }, { "epoch": 0.5416489199878308, "grad_norm": 1.1741557121276855, "learning_rate": 1.6937769446101443e-05, "loss": 0.4229, "step": 4451 }, { "epoch": 0.5417706114998478, "grad_norm": 1.3165100812911987, "learning_rate": 1.6936364443930267e-05, "loss": 0.4929, "step": 4452 }, { "epoch": 0.541892303011865, "grad_norm": 1.962891936302185, "learning_rate": 1.6934959177816062e-05, "loss": 0.4519, "step": 4453 }, { "epoch": 0.542013994523882, "grad_norm": 1.664731502532959, "learning_rate": 1.693355364781231e-05, "loss": 0.4852, "step": 4454 }, { "epoch": 0.542135686035899, "grad_norm": 1.6235625743865967, "learning_rate": 1.693214785397249e-05, "loss": 0.4579, "step": 4455 }, { "epoch": 0.542257377547916, "grad_norm": 3.519458532333374, "learning_rate": 1.6930741796350096e-05, "loss": 0.5604, "step": 4456 }, { "epoch": 0.542379069059933, "grad_norm": 1.6399915218353271, "learning_rate": 1.692933547499863e-05, "loss": 0.4062, "step": 4457 }, { "epoch": 0.5425007605719501, "grad_norm": 5.24549674987793, "learning_rate": 1.6927928889971606e-05, "loss": 0.5516, "step": 4458 }, { "epoch": 0.5426224520839671, "grad_norm": 0.7926618456840515, "learning_rate": 1.692652204132255e-05, "loss": 0.4773, "step": 4459 }, { "epoch": 0.5427441435959842, "grad_norm": 0.8715618848800659, "learning_rate": 1.692511492910499e-05, "loss": 0.4178, "step": 4460 }, { "epoch": 0.5428658351080012, "grad_norm": 2.7368240356445312, "learning_rate": 1.6923707553372473e-05, "loss": 0.5298, "step": 4461 }, { "epoch": 0.5429875266200183, "grad_norm": 0.9556664228439331, "learning_rate": 1.6922299914178556e-05, "loss": 0.4706, "step": 4462 }, { "epoch": 0.5431092181320353, "grad_norm": 1.2215858697891235, "learning_rate": 1.6920892011576796e-05, "loss": 0.4695, "step": 4463 }, { "epoch": 0.5432309096440523, "grad_norm": 1.4884281158447266, "learning_rate": 1.691948384562077e-05, "loss": 0.4724, "step": 4464 }, { "epoch": 0.5433526011560693, "grad_norm": 1.1030713319778442, "learning_rate": 1.6918075416364065e-05, "loss": 0.4794, "step": 4465 }, { "epoch": 0.5434742926680864, "grad_norm": 1.4520702362060547, "learning_rate": 1.6916666723860263e-05, "loss": 0.5309, "step": 4466 }, { "epoch": 0.5435959841801035, "grad_norm": 2.1015758514404297, "learning_rate": 1.691525776816298e-05, "loss": 0.4605, "step": 4467 }, { "epoch": 0.5437176756921205, "grad_norm": 1.9199395179748535, "learning_rate": 1.6913848549325825e-05, "loss": 0.4908, "step": 4468 }, { "epoch": 0.5438393672041375, "grad_norm": 0.8052958250045776, "learning_rate": 1.691243906740242e-05, "loss": 0.4665, "step": 4469 }, { "epoch": 0.5439610587161545, "grad_norm": 0.5576810836791992, "learning_rate": 1.6911029322446402e-05, "loss": 0.4764, "step": 4470 }, { "epoch": 0.5440827502281715, "grad_norm": 2.050319194793701, "learning_rate": 1.6909619314511414e-05, "loss": 0.4844, "step": 4471 }, { "epoch": 0.5442044417401887, "grad_norm": 1.3106358051300049, "learning_rate": 1.6908209043651107e-05, "loss": 0.4114, "step": 4472 }, { "epoch": 0.5443261332522057, "grad_norm": 1.462104320526123, "learning_rate": 1.6906798509919147e-05, "loss": 0.4581, "step": 4473 }, { "epoch": 0.5444478247642227, "grad_norm": 0.8785805106163025, "learning_rate": 1.6905387713369207e-05, "loss": 0.4826, "step": 4474 }, { "epoch": 0.5445695162762397, "grad_norm": 4.6345415115356445, "learning_rate": 1.6903976654054973e-05, "loss": 0.5868, "step": 4475 }, { "epoch": 0.5446912077882567, "grad_norm": 1.8743029832839966, "learning_rate": 1.6902565332030136e-05, "loss": 0.4257, "step": 4476 }, { "epoch": 0.5448128993002738, "grad_norm": 0.8281827569007874, "learning_rate": 1.6901153747348403e-05, "loss": 0.4734, "step": 4477 }, { "epoch": 0.5449345908122909, "grad_norm": 3.947049856185913, "learning_rate": 1.689974190006348e-05, "loss": 0.4194, "step": 4478 }, { "epoch": 0.5450562823243079, "grad_norm": 2.000213384628296, "learning_rate": 1.68983297902291e-05, "loss": 0.4394, "step": 4479 }, { "epoch": 0.5451779738363249, "grad_norm": 1.2415028810501099, "learning_rate": 1.6896917417898992e-05, "loss": 0.458, "step": 4480 }, { "epoch": 0.545299665348342, "grad_norm": 1.306781530380249, "learning_rate": 1.6895504783126902e-05, "loss": 0.4265, "step": 4481 }, { "epoch": 0.545421356860359, "grad_norm": 1.994301676750183, "learning_rate": 1.689409188596658e-05, "loss": 0.467, "step": 4482 }, { "epoch": 0.545543048372376, "grad_norm": 2.5383036136627197, "learning_rate": 1.6892678726471788e-05, "loss": 0.4743, "step": 4483 }, { "epoch": 0.545664739884393, "grad_norm": 2.524864435195923, "learning_rate": 1.689126530469631e-05, "loss": 0.5043, "step": 4484 }, { "epoch": 0.5457864313964101, "grad_norm": 1.3676741123199463, "learning_rate": 1.688985162069392e-05, "loss": 0.5021, "step": 4485 }, { "epoch": 0.5459081229084272, "grad_norm": 1.1412726640701294, "learning_rate": 1.6888437674518418e-05, "loss": 0.4829, "step": 4486 }, { "epoch": 0.5460298144204442, "grad_norm": 0.9708731770515442, "learning_rate": 1.6887023466223596e-05, "loss": 0.5063, "step": 4487 }, { "epoch": 0.5461515059324612, "grad_norm": 0.9692050814628601, "learning_rate": 1.6885608995863282e-05, "loss": 0.4804, "step": 4488 }, { "epoch": 0.5462731974444782, "grad_norm": 6.402462482452393, "learning_rate": 1.6884194263491293e-05, "loss": 0.4229, "step": 4489 }, { "epoch": 0.5463948889564952, "grad_norm": 2.0523343086242676, "learning_rate": 1.688277926916146e-05, "loss": 0.4944, "step": 4490 }, { "epoch": 0.5465165804685124, "grad_norm": 3.7021658420562744, "learning_rate": 1.6881364012927633e-05, "loss": 0.5076, "step": 4491 }, { "epoch": 0.5466382719805294, "grad_norm": 2.4698987007141113, "learning_rate": 1.687994849484366e-05, "loss": 0.5152, "step": 4492 }, { "epoch": 0.5467599634925464, "grad_norm": 3.081721544265747, "learning_rate": 1.6878532714963406e-05, "loss": 0.4497, "step": 4493 }, { "epoch": 0.5468816550045634, "grad_norm": 1.5112048387527466, "learning_rate": 1.6877116673340746e-05, "loss": 0.4707, "step": 4494 }, { "epoch": 0.5470033465165804, "grad_norm": 1.6402385234832764, "learning_rate": 1.687570037002956e-05, "loss": 0.4772, "step": 4495 }, { "epoch": 0.5471250380285975, "grad_norm": 1.9205235242843628, "learning_rate": 1.687428380508374e-05, "loss": 0.4888, "step": 4496 }, { "epoch": 0.5472467295406146, "grad_norm": 2.1676223278045654, "learning_rate": 1.6872866978557197e-05, "loss": 0.4948, "step": 4497 }, { "epoch": 0.5473684210526316, "grad_norm": 3.0421810150146484, "learning_rate": 1.687144989050384e-05, "loss": 0.4957, "step": 4498 }, { "epoch": 0.5474901125646486, "grad_norm": 1.840588927268982, "learning_rate": 1.687003254097759e-05, "loss": 0.4732, "step": 4499 }, { "epoch": 0.5476118040766657, "grad_norm": 2.079371690750122, "learning_rate": 1.686861493003238e-05, "loss": 0.3964, "step": 4500 }, { "epoch": 0.5477334955886827, "grad_norm": 0.7197351455688477, "learning_rate": 1.6867197057722153e-05, "loss": 0.4992, "step": 4501 }, { "epoch": 0.5478551871006997, "grad_norm": 1.077666997909546, "learning_rate": 1.6865778924100867e-05, "loss": 0.4579, "step": 4502 }, { "epoch": 0.5479768786127167, "grad_norm": 2.053227424621582, "learning_rate": 1.686436052922248e-05, "loss": 0.4269, "step": 4503 }, { "epoch": 0.5480985701247338, "grad_norm": 0.6475696563720703, "learning_rate": 1.686294187314097e-05, "loss": 0.4638, "step": 4504 }, { "epoch": 0.5482202616367509, "grad_norm": 1.3565797805786133, "learning_rate": 1.6861522955910313e-05, "loss": 0.4694, "step": 4505 }, { "epoch": 0.5483419531487679, "grad_norm": 0.9397381544113159, "learning_rate": 1.6860103777584502e-05, "loss": 0.4249, "step": 4506 }, { "epoch": 0.5484636446607849, "grad_norm": 0.6636385917663574, "learning_rate": 1.6858684338217547e-05, "loss": 0.4291, "step": 4507 }, { "epoch": 0.5485853361728019, "grad_norm": 5.156121253967285, "learning_rate": 1.6857264637863457e-05, "loss": 0.5727, "step": 4508 }, { "epoch": 0.5487070276848189, "grad_norm": 1.4113715887069702, "learning_rate": 1.6855844676576253e-05, "loss": 0.4652, "step": 4509 }, { "epoch": 0.5488287191968361, "grad_norm": 1.1701862812042236, "learning_rate": 1.6854424454409962e-05, "loss": 0.4241, "step": 4510 }, { "epoch": 0.5489504107088531, "grad_norm": 3.4081249237060547, "learning_rate": 1.685300397141864e-05, "loss": 0.5306, "step": 4511 }, { "epoch": 0.5490721022208701, "grad_norm": 1.1083649396896362, "learning_rate": 1.685158322765633e-05, "loss": 0.4515, "step": 4512 }, { "epoch": 0.5491937937328871, "grad_norm": 3.107776641845703, "learning_rate": 1.6850162223177094e-05, "loss": 0.5052, "step": 4513 }, { "epoch": 0.5493154852449041, "grad_norm": 0.8299750685691833, "learning_rate": 1.6848740958035012e-05, "loss": 0.4737, "step": 4514 }, { "epoch": 0.5494371767569212, "grad_norm": 5.90954065322876, "learning_rate": 1.6847319432284155e-05, "loss": 0.4333, "step": 4515 }, { "epoch": 0.5495588682689383, "grad_norm": 4.6237382888793945, "learning_rate": 1.6845897645978624e-05, "loss": 0.4629, "step": 4516 }, { "epoch": 0.5496805597809553, "grad_norm": 3.8987646102905273, "learning_rate": 1.6844475599172516e-05, "loss": 0.443, "step": 4517 }, { "epoch": 0.5498022512929723, "grad_norm": 2.2025468349456787, "learning_rate": 1.684305329191994e-05, "loss": 0.4827, "step": 4518 }, { "epoch": 0.5499239428049894, "grad_norm": 0.7520539164543152, "learning_rate": 1.6841630724275027e-05, "loss": 0.462, "step": 4519 }, { "epoch": 0.5500456343170064, "grad_norm": 1.3843849897384644, "learning_rate": 1.6840207896291905e-05, "loss": 0.4426, "step": 4520 }, { "epoch": 0.5501673258290234, "grad_norm": 2.7557339668273926, "learning_rate": 1.683878480802471e-05, "loss": 0.4157, "step": 4521 }, { "epoch": 0.5502890173410404, "grad_norm": 1.0031745433807373, "learning_rate": 1.6837361459527597e-05, "loss": 0.4636, "step": 4522 }, { "epoch": 0.5504107088530575, "grad_norm": 1.2858787775039673, "learning_rate": 1.683593785085473e-05, "loss": 0.429, "step": 4523 }, { "epoch": 0.5505324003650746, "grad_norm": 0.6925680637359619, "learning_rate": 1.6834513982060277e-05, "loss": 0.4322, "step": 4524 }, { "epoch": 0.5506540918770916, "grad_norm": 1.1897066831588745, "learning_rate": 1.6833089853198422e-05, "loss": 0.4261, "step": 4525 }, { "epoch": 0.5507757833891086, "grad_norm": 1.5610861778259277, "learning_rate": 1.6831665464323354e-05, "loss": 0.4535, "step": 4526 }, { "epoch": 0.5508974749011256, "grad_norm": 2.1373817920684814, "learning_rate": 1.6830240815489274e-05, "loss": 0.4717, "step": 4527 }, { "epoch": 0.5510191664131426, "grad_norm": 1.7551811933517456, "learning_rate": 1.6828815906750395e-05, "loss": 0.4387, "step": 4528 }, { "epoch": 0.5511408579251598, "grad_norm": 2.961994171142578, "learning_rate": 1.6827390738160933e-05, "loss": 0.5014, "step": 4529 }, { "epoch": 0.5512625494371768, "grad_norm": 0.7889112830162048, "learning_rate": 1.682596530977512e-05, "loss": 0.4805, "step": 4530 }, { "epoch": 0.5513842409491938, "grad_norm": 3.972547769546509, "learning_rate": 1.6824539621647202e-05, "loss": 0.4624, "step": 4531 }, { "epoch": 0.5515059324612108, "grad_norm": 0.8660321235656738, "learning_rate": 1.682311367383142e-05, "loss": 0.5218, "step": 4532 }, { "epoch": 0.5516276239732278, "grad_norm": 0.7364094853401184, "learning_rate": 1.6821687466382046e-05, "loss": 0.5325, "step": 4533 }, { "epoch": 0.5517493154852449, "grad_norm": 3.64518404006958, "learning_rate": 1.682026099935334e-05, "loss": 0.4127, "step": 4534 }, { "epoch": 0.551871006997262, "grad_norm": 1.7240809202194214, "learning_rate": 1.6818834272799587e-05, "loss": 0.4547, "step": 4535 }, { "epoch": 0.551992698509279, "grad_norm": 1.6545079946517944, "learning_rate": 1.6817407286775075e-05, "loss": 0.4767, "step": 4536 }, { "epoch": 0.552114390021296, "grad_norm": 2.3876535892486572, "learning_rate": 1.6815980041334108e-05, "loss": 0.5022, "step": 4537 }, { "epoch": 0.552236081533313, "grad_norm": 1.983043909072876, "learning_rate": 1.681455253653099e-05, "loss": 0.451, "step": 4538 }, { "epoch": 0.5523577730453301, "grad_norm": 0.5386200547218323, "learning_rate": 1.681312477242004e-05, "loss": 0.4298, "step": 4539 }, { "epoch": 0.5524794645573471, "grad_norm": 1.1130664348602295, "learning_rate": 1.6811696749055592e-05, "loss": 0.3978, "step": 4540 }, { "epoch": 0.5526011560693641, "grad_norm": 3.314008951187134, "learning_rate": 1.681026846649198e-05, "loss": 0.4727, "step": 4541 }, { "epoch": 0.5527228475813812, "grad_norm": 1.0250221490859985, "learning_rate": 1.6808839924783563e-05, "loss": 0.4253, "step": 4542 }, { "epoch": 0.5528445390933983, "grad_norm": 2.0319528579711914, "learning_rate": 1.680741112398469e-05, "loss": 0.4398, "step": 4543 }, { "epoch": 0.5529662306054153, "grad_norm": 2.3150103092193604, "learning_rate": 1.680598206414973e-05, "loss": 0.4706, "step": 4544 }, { "epoch": 0.5530879221174323, "grad_norm": 1.0327575206756592, "learning_rate": 1.6804552745333073e-05, "loss": 0.3994, "step": 4545 }, { "epoch": 0.5532096136294493, "grad_norm": 1.6495288610458374, "learning_rate": 1.6803123167589092e-05, "loss": 0.4733, "step": 4546 }, { "epoch": 0.5533313051414663, "grad_norm": 1.3810745477676392, "learning_rate": 1.6801693330972195e-05, "loss": 0.5053, "step": 4547 }, { "epoch": 0.5534529966534835, "grad_norm": 1.2458540201187134, "learning_rate": 1.6800263235536787e-05, "loss": 0.4658, "step": 4548 }, { "epoch": 0.5535746881655005, "grad_norm": 3.3685176372528076, "learning_rate": 1.6798832881337287e-05, "loss": 0.4695, "step": 4549 }, { "epoch": 0.5536963796775175, "grad_norm": 3.4742231369018555, "learning_rate": 1.6797402268428127e-05, "loss": 0.5015, "step": 4550 }, { "epoch": 0.5538180711895345, "grad_norm": 4.773903846740723, "learning_rate": 1.679597139686374e-05, "loss": 0.4695, "step": 4551 }, { "epoch": 0.5539397627015515, "grad_norm": 1.949647068977356, "learning_rate": 1.679454026669857e-05, "loss": 0.5018, "step": 4552 }, { "epoch": 0.5540614542135686, "grad_norm": 2.3221402168273926, "learning_rate": 1.679310887798708e-05, "loss": 0.4421, "step": 4553 }, { "epoch": 0.5541831457255857, "grad_norm": 2.228231191635132, "learning_rate": 1.679167723078374e-05, "loss": 0.4202, "step": 4554 }, { "epoch": 0.5543048372376027, "grad_norm": 0.7083761692047119, "learning_rate": 1.679024532514302e-05, "loss": 0.4665, "step": 4555 }, { "epoch": 0.5544265287496197, "grad_norm": 2.922244071960449, "learning_rate": 1.6788813161119412e-05, "loss": 0.5286, "step": 4556 }, { "epoch": 0.5545482202616367, "grad_norm": 2.4359800815582275, "learning_rate": 1.6787380738767408e-05, "loss": 0.5093, "step": 4557 }, { "epoch": 0.5546699117736538, "grad_norm": 4.310858249664307, "learning_rate": 1.6785948058141523e-05, "loss": 0.5383, "step": 4558 }, { "epoch": 0.5547916032856708, "grad_norm": 1.0454226732254028, "learning_rate": 1.6784515119296262e-05, "loss": 0.4027, "step": 4559 }, { "epoch": 0.5549132947976878, "grad_norm": 1.8177512884140015, "learning_rate": 1.6783081922286166e-05, "loss": 0.4619, "step": 4560 }, { "epoch": 0.5550349863097049, "grad_norm": 1.7751100063323975, "learning_rate": 1.6781648467165755e-05, "loss": 0.4635, "step": 4561 }, { "epoch": 0.555156677821722, "grad_norm": 1.5312920808792114, "learning_rate": 1.6780214753989587e-05, "loss": 0.398, "step": 4562 }, { "epoch": 0.555278369333739, "grad_norm": 0.9218560457229614, "learning_rate": 1.6778780782812208e-05, "loss": 0.4978, "step": 4563 }, { "epoch": 0.555400060845756, "grad_norm": 0.9840953350067139, "learning_rate": 1.6777346553688194e-05, "loss": 0.4493, "step": 4564 }, { "epoch": 0.555521752357773, "grad_norm": 0.9022337198257446, "learning_rate": 1.6775912066672114e-05, "loss": 0.4574, "step": 4565 }, { "epoch": 0.55564344386979, "grad_norm": 1.912704348564148, "learning_rate": 1.6774477321818558e-05, "loss": 0.4326, "step": 4566 }, { "epoch": 0.5557651353818072, "grad_norm": 1.1413514614105225, "learning_rate": 1.6773042319182115e-05, "loss": 0.4808, "step": 4567 }, { "epoch": 0.5558868268938242, "grad_norm": 0.8798618316650391, "learning_rate": 1.6771607058817388e-05, "loss": 0.4272, "step": 4568 }, { "epoch": 0.5560085184058412, "grad_norm": 1.481116771697998, "learning_rate": 1.6770171540778998e-05, "loss": 0.4009, "step": 4569 }, { "epoch": 0.5561302099178582, "grad_norm": 1.1077401638031006, "learning_rate": 1.6768735765121568e-05, "loss": 0.509, "step": 4570 }, { "epoch": 0.5562519014298752, "grad_norm": 2.603334426879883, "learning_rate": 1.6767299731899735e-05, "loss": 0.5018, "step": 4571 }, { "epoch": 0.5563735929418923, "grad_norm": 0.6485960483551025, "learning_rate": 1.6765863441168136e-05, "loss": 0.4543, "step": 4572 }, { "epoch": 0.5564952844539094, "grad_norm": 0.6065893769264221, "learning_rate": 1.6764426892981428e-05, "loss": 0.4407, "step": 4573 }, { "epoch": 0.5566169759659264, "grad_norm": 1.0884718894958496, "learning_rate": 1.676299008739428e-05, "loss": 0.449, "step": 4574 }, { "epoch": 0.5567386674779434, "grad_norm": 1.9185982942581177, "learning_rate": 1.6761553024461353e-05, "loss": 0.5235, "step": 4575 }, { "epoch": 0.5568603589899604, "grad_norm": 1.969686508178711, "learning_rate": 1.6760115704237345e-05, "loss": 0.4874, "step": 4576 }, { "epoch": 0.5569820505019775, "grad_norm": 4.09069299697876, "learning_rate": 1.6758678126776935e-05, "loss": 0.4007, "step": 4577 }, { "epoch": 0.5571037420139945, "grad_norm": 1.188254714012146, "learning_rate": 1.675724029213484e-05, "loss": 0.4849, "step": 4578 }, { "epoch": 0.5572254335260116, "grad_norm": 0.7444627285003662, "learning_rate": 1.6755802200365757e-05, "loss": 0.4778, "step": 4579 }, { "epoch": 0.5573471250380286, "grad_norm": 0.8509902954101562, "learning_rate": 1.6754363851524423e-05, "loss": 0.4862, "step": 4580 }, { "epoch": 0.5574688165500457, "grad_norm": 1.926151156425476, "learning_rate": 1.675292524566556e-05, "loss": 0.3914, "step": 4581 }, { "epoch": 0.5575905080620627, "grad_norm": 1.415952205657959, "learning_rate": 1.675148638284392e-05, "loss": 0.4093, "step": 4582 }, { "epoch": 0.5577121995740797, "grad_norm": 1.0945243835449219, "learning_rate": 1.675004726311424e-05, "loss": 0.4246, "step": 4583 }, { "epoch": 0.5578338910860967, "grad_norm": 0.7974362373352051, "learning_rate": 1.6748607886531293e-05, "loss": 0.4402, "step": 4584 }, { "epoch": 0.5579555825981137, "grad_norm": 3.542086124420166, "learning_rate": 1.6747168253149847e-05, "loss": 0.4661, "step": 4585 }, { "epoch": 0.5580772741101309, "grad_norm": 4.370833873748779, "learning_rate": 1.6745728363024685e-05, "loss": 0.5445, "step": 4586 }, { "epoch": 0.5581989656221479, "grad_norm": 3.091870069503784, "learning_rate": 1.6744288216210593e-05, "loss": 0.4522, "step": 4587 }, { "epoch": 0.5583206571341649, "grad_norm": 3.192312002182007, "learning_rate": 1.6742847812762377e-05, "loss": 0.4739, "step": 4588 }, { "epoch": 0.5584423486461819, "grad_norm": 3.6456682682037354, "learning_rate": 1.6741407152734844e-05, "loss": 0.524, "step": 4589 }, { "epoch": 0.5585640401581989, "grad_norm": 1.131582498550415, "learning_rate": 1.6739966236182814e-05, "loss": 0.42, "step": 4590 }, { "epoch": 0.558685731670216, "grad_norm": 2.22727370262146, "learning_rate": 1.6738525063161117e-05, "loss": 0.4722, "step": 4591 }, { "epoch": 0.5588074231822331, "grad_norm": 0.9118644595146179, "learning_rate": 1.6737083633724596e-05, "loss": 0.4698, "step": 4592 }, { "epoch": 0.5589291146942501, "grad_norm": 2.725691795349121, "learning_rate": 1.6735641947928095e-05, "loss": 0.4214, "step": 4593 }, { "epoch": 0.5590508062062671, "grad_norm": 4.871251583099365, "learning_rate": 1.673420000582648e-05, "loss": 0.4596, "step": 4594 }, { "epoch": 0.5591724977182841, "grad_norm": 3.398688316345215, "learning_rate": 1.673275780747461e-05, "loss": 0.4721, "step": 4595 }, { "epoch": 0.5592941892303012, "grad_norm": 2.8676795959472656, "learning_rate": 1.6731315352927375e-05, "loss": 0.4868, "step": 4596 }, { "epoch": 0.5594158807423182, "grad_norm": 3.3091068267822266, "learning_rate": 1.6729872642239656e-05, "loss": 0.4924, "step": 4597 }, { "epoch": 0.5595375722543353, "grad_norm": 2.1699841022491455, "learning_rate": 1.672842967546635e-05, "loss": 0.5075, "step": 4598 }, { "epoch": 0.5596592637663523, "grad_norm": 2.9286632537841797, "learning_rate": 1.6726986452662372e-05, "loss": 0.4725, "step": 4599 }, { "epoch": 0.5597809552783694, "grad_norm": 1.9711849689483643, "learning_rate": 1.672554297388263e-05, "loss": 0.4611, "step": 4600 }, { "epoch": 0.5599026467903864, "grad_norm": 2.653597831726074, "learning_rate": 1.6724099239182062e-05, "loss": 0.5205, "step": 4601 }, { "epoch": 0.5600243383024034, "grad_norm": 1.3509598970413208, "learning_rate": 1.6722655248615597e-05, "loss": 0.4686, "step": 4602 }, { "epoch": 0.5601460298144204, "grad_norm": 0.5680484771728516, "learning_rate": 1.6721211002238185e-05, "loss": 0.3957, "step": 4603 }, { "epoch": 0.5602677213264374, "grad_norm": 2.985861301422119, "learning_rate": 1.6719766500104782e-05, "loss": 0.4969, "step": 4604 }, { "epoch": 0.5603894128384546, "grad_norm": 4.2559895515441895, "learning_rate": 1.6718321742270353e-05, "loss": 0.5123, "step": 4605 }, { "epoch": 0.5605111043504716, "grad_norm": 1.1454652547836304, "learning_rate": 1.6716876728789882e-05, "loss": 0.4393, "step": 4606 }, { "epoch": 0.5606327958624886, "grad_norm": 2.8724396228790283, "learning_rate": 1.671543145971834e-05, "loss": 0.5274, "step": 4607 }, { "epoch": 0.5607544873745056, "grad_norm": 0.716998815536499, "learning_rate": 1.671398593511074e-05, "loss": 0.4497, "step": 4608 }, { "epoch": 0.5608761788865226, "grad_norm": 2.7783238887786865, "learning_rate": 1.6712540155022072e-05, "loss": 0.5338, "step": 4609 }, { "epoch": 0.5609978703985397, "grad_norm": 1.3436685800552368, "learning_rate": 1.6711094119507354e-05, "loss": 0.4717, "step": 4610 }, { "epoch": 0.5611195619105568, "grad_norm": 2.0009007453918457, "learning_rate": 1.6709647828621614e-05, "loss": 0.5594, "step": 4611 }, { "epoch": 0.5612412534225738, "grad_norm": 3.040457248687744, "learning_rate": 1.6708201282419893e-05, "loss": 0.4543, "step": 4612 }, { "epoch": 0.5613629449345908, "grad_norm": 2.340853691101074, "learning_rate": 1.6706754480957225e-05, "loss": 0.5073, "step": 4613 }, { "epoch": 0.5614846364466078, "grad_norm": 2.7375338077545166, "learning_rate": 1.6705307424288666e-05, "loss": 0.4791, "step": 4614 }, { "epoch": 0.5616063279586249, "grad_norm": 5.7058024406433105, "learning_rate": 1.670386011246928e-05, "loss": 0.4387, "step": 4615 }, { "epoch": 0.5617280194706419, "grad_norm": 2.7753055095672607, "learning_rate": 1.6702412545554142e-05, "loss": 0.4808, "step": 4616 }, { "epoch": 0.561849710982659, "grad_norm": 1.7488722801208496, "learning_rate": 1.670096472359833e-05, "loss": 0.4558, "step": 4617 }, { "epoch": 0.561971402494676, "grad_norm": 0.9848224520683289, "learning_rate": 1.6699516646656945e-05, "loss": 0.4455, "step": 4618 }, { "epoch": 0.562093094006693, "grad_norm": 1.3750373125076294, "learning_rate": 1.6698068314785078e-05, "loss": 0.3986, "step": 4619 }, { "epoch": 0.5622147855187101, "grad_norm": 1.9118283987045288, "learning_rate": 1.6696619728037853e-05, "loss": 0.4601, "step": 4620 }, { "epoch": 0.5623364770307271, "grad_norm": 2.759049892425537, "learning_rate": 1.669517088647038e-05, "loss": 0.4695, "step": 4621 }, { "epoch": 0.5624581685427441, "grad_norm": 4.957734107971191, "learning_rate": 1.66937217901378e-05, "loss": 0.5114, "step": 4622 }, { "epoch": 0.5625798600547611, "grad_norm": 3.4825074672698975, "learning_rate": 1.6692272439095256e-05, "loss": 0.4419, "step": 4623 }, { "epoch": 0.5627015515667783, "grad_norm": 2.347583055496216, "learning_rate": 1.6690822833397885e-05, "loss": 0.4439, "step": 4624 }, { "epoch": 0.5628232430787953, "grad_norm": 2.6734941005706787, "learning_rate": 1.6689372973100862e-05, "loss": 0.4732, "step": 4625 }, { "epoch": 0.5629449345908123, "grad_norm": 0.9740279316902161, "learning_rate": 1.668792285825935e-05, "loss": 0.4302, "step": 4626 }, { "epoch": 0.5630666261028293, "grad_norm": 1.0920979976654053, "learning_rate": 1.6686472488928526e-05, "loss": 0.4567, "step": 4627 }, { "epoch": 0.5631883176148463, "grad_norm": 0.9627112150192261, "learning_rate": 1.6685021865163587e-05, "loss": 0.4918, "step": 4628 }, { "epoch": 0.5633100091268634, "grad_norm": 1.3993078470230103, "learning_rate": 1.668357098701973e-05, "loss": 0.5383, "step": 4629 }, { "epoch": 0.5634317006388805, "grad_norm": 1.691230297088623, "learning_rate": 1.6682119854552156e-05, "loss": 0.5084, "step": 4630 }, { "epoch": 0.5635533921508975, "grad_norm": 3.0796499252319336, "learning_rate": 1.6680668467816097e-05, "loss": 0.5028, "step": 4631 }, { "epoch": 0.5636750836629145, "grad_norm": 6.316005229949951, "learning_rate": 1.667921682686677e-05, "loss": 0.5222, "step": 4632 }, { "epoch": 0.5637967751749315, "grad_norm": 5.254156589508057, "learning_rate": 1.6677764931759423e-05, "loss": 0.4946, "step": 4633 }, { "epoch": 0.5639184666869486, "grad_norm": 5.059786796569824, "learning_rate": 1.6676312782549292e-05, "loss": 0.5197, "step": 4634 }, { "epoch": 0.5640401581989656, "grad_norm": 6.235612869262695, "learning_rate": 1.6674860379291646e-05, "loss": 0.4901, "step": 4635 }, { "epoch": 0.5641618497109827, "grad_norm": 5.896116733551025, "learning_rate": 1.6673407722041744e-05, "loss": 0.5305, "step": 4636 }, { "epoch": 0.5642835412229997, "grad_norm": 4.427737236022949, "learning_rate": 1.6671954810854865e-05, "loss": 0.4569, "step": 4637 }, { "epoch": 0.5644052327350167, "grad_norm": 3.9069371223449707, "learning_rate": 1.6670501645786293e-05, "loss": 0.4572, "step": 4638 }, { "epoch": 0.5645269242470338, "grad_norm": 2.219663143157959, "learning_rate": 1.666904822689133e-05, "loss": 0.4588, "step": 4639 }, { "epoch": 0.5646486157590508, "grad_norm": 2.792325019836426, "learning_rate": 1.666759455422528e-05, "loss": 0.399, "step": 4640 }, { "epoch": 0.5647703072710678, "grad_norm": 3.0173792839050293, "learning_rate": 1.666614062784345e-05, "loss": 0.4876, "step": 4641 }, { "epoch": 0.5648919987830848, "grad_norm": 3.6950814723968506, "learning_rate": 1.6664686447801174e-05, "loss": 0.5053, "step": 4642 }, { "epoch": 0.565013690295102, "grad_norm": 4.100681781768799, "learning_rate": 1.666323201415378e-05, "loss": 0.4865, "step": 4643 }, { "epoch": 0.565135381807119, "grad_norm": 5.905385971069336, "learning_rate": 1.666177732695662e-05, "loss": 0.5211, "step": 4644 }, { "epoch": 0.565257073319136, "grad_norm": 7.304001808166504, "learning_rate": 1.666032238626504e-05, "loss": 0.6014, "step": 4645 }, { "epoch": 0.565378764831153, "grad_norm": 0.6334952116012573, "learning_rate": 1.665886719213441e-05, "loss": 0.4107, "step": 4646 }, { "epoch": 0.56550045634317, "grad_norm": 2.1371588706970215, "learning_rate": 1.66574117446201e-05, "loss": 0.466, "step": 4647 }, { "epoch": 0.565622147855187, "grad_norm": 3.159528970718384, "learning_rate": 1.6655956043777496e-05, "loss": 0.4985, "step": 4648 }, { "epoch": 0.5657438393672042, "grad_norm": 0.7443142533302307, "learning_rate": 1.665450008966198e-05, "loss": 0.4486, "step": 4649 }, { "epoch": 0.5658655308792212, "grad_norm": 0.5971969962120056, "learning_rate": 1.6653043882328965e-05, "loss": 0.4701, "step": 4650 }, { "epoch": 0.5659872223912382, "grad_norm": 2.6483771800994873, "learning_rate": 1.665158742183386e-05, "loss": 0.4667, "step": 4651 }, { "epoch": 0.5661089139032552, "grad_norm": 2.697734832763672, "learning_rate": 1.6650130708232088e-05, "loss": 0.4719, "step": 4652 }, { "epoch": 0.5662306054152723, "grad_norm": 3.1195905208587646, "learning_rate": 1.6648673741579075e-05, "loss": 0.4742, "step": 4653 }, { "epoch": 0.5663522969272893, "grad_norm": 1.0706874132156372, "learning_rate": 1.664721652193026e-05, "loss": 0.5073, "step": 4654 }, { "epoch": 0.5664739884393064, "grad_norm": 2.4989240169525146, "learning_rate": 1.6645759049341103e-05, "loss": 0.4466, "step": 4655 }, { "epoch": 0.5665956799513234, "grad_norm": 2.144212007522583, "learning_rate": 1.6644301323867058e-05, "loss": 0.4208, "step": 4656 }, { "epoch": 0.5667173714633404, "grad_norm": 1.0743597745895386, "learning_rate": 1.664284334556359e-05, "loss": 0.4878, "step": 4657 }, { "epoch": 0.5668390629753575, "grad_norm": 2.698432445526123, "learning_rate": 1.6641385114486188e-05, "loss": 0.5191, "step": 4658 }, { "epoch": 0.5669607544873745, "grad_norm": 0.7531874179840088, "learning_rate": 1.6639926630690332e-05, "loss": 0.4123, "step": 4659 }, { "epoch": 0.5670824459993915, "grad_norm": 1.6510300636291504, "learning_rate": 1.6638467894231527e-05, "loss": 0.4555, "step": 4660 }, { "epoch": 0.5672041375114085, "grad_norm": 2.069141387939453, "learning_rate": 1.6637008905165274e-05, "loss": 0.5063, "step": 4661 }, { "epoch": 0.5673258290234257, "grad_norm": 0.9788318276405334, "learning_rate": 1.6635549663547096e-05, "loss": 0.4381, "step": 4662 }, { "epoch": 0.5674475205354427, "grad_norm": 0.9708678126335144, "learning_rate": 1.663409016943252e-05, "loss": 0.4658, "step": 4663 }, { "epoch": 0.5675692120474597, "grad_norm": 2.4511301517486572, "learning_rate": 1.663263042287708e-05, "loss": 0.5153, "step": 4664 }, { "epoch": 0.5676909035594767, "grad_norm": 3.0183563232421875, "learning_rate": 1.6631170423936326e-05, "loss": 0.483, "step": 4665 }, { "epoch": 0.5678125950714937, "grad_norm": 3.073075532913208, "learning_rate": 1.6629710172665805e-05, "loss": 0.5383, "step": 4666 }, { "epoch": 0.5679342865835108, "grad_norm": 0.844896674156189, "learning_rate": 1.6628249669121095e-05, "loss": 0.4744, "step": 4667 }, { "epoch": 0.5680559780955279, "grad_norm": 4.082496166229248, "learning_rate": 1.6626788913357764e-05, "loss": 0.4501, "step": 4668 }, { "epoch": 0.5681776696075449, "grad_norm": 3.9005239009857178, "learning_rate": 1.6625327905431395e-05, "loss": 0.5026, "step": 4669 }, { "epoch": 0.5682993611195619, "grad_norm": 3.5627479553222656, "learning_rate": 1.662386664539759e-05, "loss": 0.4466, "step": 4670 }, { "epoch": 0.5684210526315789, "grad_norm": 2.6441195011138916, "learning_rate": 1.6622405133311946e-05, "loss": 0.4449, "step": 4671 }, { "epoch": 0.568542744143596, "grad_norm": 1.890883207321167, "learning_rate": 1.662094336923008e-05, "loss": 0.5002, "step": 4672 }, { "epoch": 0.568664435655613, "grad_norm": 3.0529839992523193, "learning_rate": 1.6619481353207617e-05, "loss": 0.4042, "step": 4673 }, { "epoch": 0.5687861271676301, "grad_norm": 1.8808848857879639, "learning_rate": 1.6618019085300183e-05, "loss": 0.5237, "step": 4674 }, { "epoch": 0.5689078186796471, "grad_norm": 0.9577593207359314, "learning_rate": 1.6616556565563427e-05, "loss": 0.4571, "step": 4675 }, { "epoch": 0.5690295101916641, "grad_norm": 0.7071251273155212, "learning_rate": 1.6615093794052995e-05, "loss": 0.4333, "step": 4676 }, { "epoch": 0.5691512017036812, "grad_norm": 0.5237108469009399, "learning_rate": 1.6613630770824554e-05, "loss": 0.4135, "step": 4677 }, { "epoch": 0.5692728932156982, "grad_norm": 2.173288345336914, "learning_rate": 1.6612167495933772e-05, "loss": 0.4607, "step": 4678 }, { "epoch": 0.5693945847277152, "grad_norm": 3.9257712364196777, "learning_rate": 1.6610703969436328e-05, "loss": 0.515, "step": 4679 }, { "epoch": 0.5695162762397323, "grad_norm": 0.795545220375061, "learning_rate": 1.6609240191387917e-05, "loss": 0.3928, "step": 4680 }, { "epoch": 0.5696379677517494, "grad_norm": 2.5526885986328125, "learning_rate": 1.6607776161844237e-05, "loss": 0.5013, "step": 4681 }, { "epoch": 0.5697596592637664, "grad_norm": 1.6104010343551636, "learning_rate": 1.6606311880860998e-05, "loss": 0.3992, "step": 4682 }, { "epoch": 0.5698813507757834, "grad_norm": 0.6600081324577332, "learning_rate": 1.6604847348493914e-05, "loss": 0.4367, "step": 4683 }, { "epoch": 0.5700030422878004, "grad_norm": 0.7592805624008179, "learning_rate": 1.6603382564798718e-05, "loss": 0.4157, "step": 4684 }, { "epoch": 0.5701247337998174, "grad_norm": 0.8882337808609009, "learning_rate": 1.6601917529831147e-05, "loss": 0.4152, "step": 4685 }, { "epoch": 0.5702464253118344, "grad_norm": 1.7887595891952515, "learning_rate": 1.6600452243646947e-05, "loss": 0.4681, "step": 4686 }, { "epoch": 0.5703681168238516, "grad_norm": 1.3326091766357422, "learning_rate": 1.6598986706301882e-05, "loss": 0.3958, "step": 4687 }, { "epoch": 0.5704898083358686, "grad_norm": 0.5888066291809082, "learning_rate": 1.659752091785171e-05, "loss": 0.4339, "step": 4688 }, { "epoch": 0.5706114998478856, "grad_norm": 1.2763609886169434, "learning_rate": 1.6596054878352213e-05, "loss": 0.4798, "step": 4689 }, { "epoch": 0.5707331913599026, "grad_norm": 1.2049351930618286, "learning_rate": 1.6594588587859174e-05, "loss": 0.4304, "step": 4690 }, { "epoch": 0.5708548828719197, "grad_norm": 1.304108738899231, "learning_rate": 1.659312204642839e-05, "loss": 0.4148, "step": 4691 }, { "epoch": 0.5709765743839367, "grad_norm": 0.7838099598884583, "learning_rate": 1.659165525411566e-05, "loss": 0.4536, "step": 4692 }, { "epoch": 0.5710982658959538, "grad_norm": 3.4736058712005615, "learning_rate": 1.659018821097681e-05, "loss": 0.4795, "step": 4693 }, { "epoch": 0.5712199574079708, "grad_norm": 1.1521637439727783, "learning_rate": 1.6588720917067655e-05, "loss": 0.5262, "step": 4694 }, { "epoch": 0.5713416489199878, "grad_norm": 0.8665807843208313, "learning_rate": 1.6587253372444034e-05, "loss": 0.4808, "step": 4695 }, { "epoch": 0.5714633404320049, "grad_norm": 1.7835919857025146, "learning_rate": 1.658578557716178e-05, "loss": 0.4328, "step": 4696 }, { "epoch": 0.5715850319440219, "grad_norm": 2.4548680782318115, "learning_rate": 1.658431753127676e-05, "loss": 0.4276, "step": 4697 }, { "epoch": 0.5717067234560389, "grad_norm": 3.152401924133301, "learning_rate": 1.658284923484483e-05, "loss": 0.4058, "step": 4698 }, { "epoch": 0.571828414968056, "grad_norm": 1.9626328945159912, "learning_rate": 1.6581380687921856e-05, "loss": 0.5017, "step": 4699 }, { "epoch": 0.571950106480073, "grad_norm": 1.310911774635315, "learning_rate": 1.6579911890563726e-05, "loss": 0.4867, "step": 4700 }, { "epoch": 0.5720717979920901, "grad_norm": 1.5087254047393799, "learning_rate": 1.657844284282633e-05, "loss": 0.4185, "step": 4701 }, { "epoch": 0.5721934895041071, "grad_norm": 0.9733480215072632, "learning_rate": 1.6576973544765564e-05, "loss": 0.4241, "step": 4702 }, { "epoch": 0.5723151810161241, "grad_norm": 3.9932644367218018, "learning_rate": 1.6575503996437344e-05, "loss": 0.5327, "step": 4703 }, { "epoch": 0.5724368725281411, "grad_norm": 3.181094169616699, "learning_rate": 1.6574034197897587e-05, "loss": 0.4977, "step": 4704 }, { "epoch": 0.5725585640401581, "grad_norm": 0.5844399929046631, "learning_rate": 1.6572564149202217e-05, "loss": 0.4598, "step": 4705 }, { "epoch": 0.5726802555521753, "grad_norm": 2.7010364532470703, "learning_rate": 1.657109385040718e-05, "loss": 0.4997, "step": 4706 }, { "epoch": 0.5728019470641923, "grad_norm": 1.0860276222229004, "learning_rate": 1.656962330156842e-05, "loss": 0.4276, "step": 4707 }, { "epoch": 0.5729236385762093, "grad_norm": 2.9141764640808105, "learning_rate": 1.6568152502741895e-05, "loss": 0.5186, "step": 4708 }, { "epoch": 0.5730453300882263, "grad_norm": 1.4544330835342407, "learning_rate": 1.6566681453983574e-05, "loss": 0.4707, "step": 4709 }, { "epoch": 0.5731670216002434, "grad_norm": 1.860729694366455, "learning_rate": 1.656521015534943e-05, "loss": 0.4024, "step": 4710 }, { "epoch": 0.5732887131122604, "grad_norm": 0.5835658311843872, "learning_rate": 1.6563738606895447e-05, "loss": 0.4861, "step": 4711 }, { "epoch": 0.5734104046242775, "grad_norm": 2.6423275470733643, "learning_rate": 1.6562266808677628e-05, "loss": 0.5109, "step": 4712 }, { "epoch": 0.5735320961362945, "grad_norm": 1.2133762836456299, "learning_rate": 1.656079476075197e-05, "loss": 0.4732, "step": 4713 }, { "epoch": 0.5736537876483115, "grad_norm": 3.589836597442627, "learning_rate": 1.6559322463174495e-05, "loss": 0.4235, "step": 4714 }, { "epoch": 0.5737754791603286, "grad_norm": 4.277742385864258, "learning_rate": 1.655784991600122e-05, "loss": 0.4232, "step": 4715 }, { "epoch": 0.5738971706723456, "grad_norm": 4.2156662940979, "learning_rate": 1.6556377119288185e-05, "loss": 0.4285, "step": 4716 }, { "epoch": 0.5740188621843626, "grad_norm": 2.3883912563323975, "learning_rate": 1.655490407309143e-05, "loss": 0.4379, "step": 4717 }, { "epoch": 0.5741405536963797, "grad_norm": 2.569422721862793, "learning_rate": 1.6553430777467004e-05, "loss": 0.4769, "step": 4718 }, { "epoch": 0.5742622452083967, "grad_norm": 2.017634391784668, "learning_rate": 1.6551957232470973e-05, "loss": 0.4085, "step": 4719 }, { "epoch": 0.5743839367204138, "grad_norm": 2.7294881343841553, "learning_rate": 1.6550483438159407e-05, "loss": 0.4689, "step": 4720 }, { "epoch": 0.5745056282324308, "grad_norm": 3.2133383750915527, "learning_rate": 1.654900939458839e-05, "loss": 0.4674, "step": 4721 }, { "epoch": 0.5746273197444478, "grad_norm": 5.8939290046691895, "learning_rate": 1.6547535101814007e-05, "loss": 0.565, "step": 4722 }, { "epoch": 0.5747490112564648, "grad_norm": 2.4256250858306885, "learning_rate": 1.654606055989236e-05, "loss": 0.4194, "step": 4723 }, { "epoch": 0.5748707027684818, "grad_norm": 1.6022299528121948, "learning_rate": 1.654458576887956e-05, "loss": 0.4738, "step": 4724 }, { "epoch": 0.574992394280499, "grad_norm": 0.6131394505500793, "learning_rate": 1.6543110728831727e-05, "loss": 0.4412, "step": 4725 }, { "epoch": 0.575114085792516, "grad_norm": 0.6355972290039062, "learning_rate": 1.6541635439804985e-05, "loss": 0.4802, "step": 4726 }, { "epoch": 0.575235777304533, "grad_norm": 0.8190802931785583, "learning_rate": 1.6540159901855477e-05, "loss": 0.4735, "step": 4727 }, { "epoch": 0.57535746881655, "grad_norm": 1.7251677513122559, "learning_rate": 1.6538684115039344e-05, "loss": 0.4333, "step": 4728 }, { "epoch": 0.575479160328567, "grad_norm": 0.7691818475723267, "learning_rate": 1.6537208079412747e-05, "loss": 0.5113, "step": 4729 }, { "epoch": 0.5756008518405841, "grad_norm": 1.1802380084991455, "learning_rate": 1.6535731795031853e-05, "loss": 0.4724, "step": 4730 }, { "epoch": 0.5757225433526012, "grad_norm": 2.2256858348846436, "learning_rate": 1.6534255261952835e-05, "loss": 0.4635, "step": 4731 }, { "epoch": 0.5758442348646182, "grad_norm": 1.4814857244491577, "learning_rate": 1.653277848023188e-05, "loss": 0.4927, "step": 4732 }, { "epoch": 0.5759659263766352, "grad_norm": 1.2086328268051147, "learning_rate": 1.653130144992518e-05, "loss": 0.4895, "step": 4733 }, { "epoch": 0.5760876178886523, "grad_norm": 0.6060865521430969, "learning_rate": 1.652982417108894e-05, "loss": 0.4568, "step": 4734 }, { "epoch": 0.5762093094006693, "grad_norm": 1.071037769317627, "learning_rate": 1.6528346643779377e-05, "loss": 0.4623, "step": 4735 }, { "epoch": 0.5763310009126863, "grad_norm": 0.8550727963447571, "learning_rate": 1.652686886805271e-05, "loss": 0.4454, "step": 4736 }, { "epoch": 0.5764526924247034, "grad_norm": 1.1249865293502808, "learning_rate": 1.6525390843965172e-05, "loss": 0.4224, "step": 4737 }, { "epoch": 0.5765743839367204, "grad_norm": 1.7408998012542725, "learning_rate": 1.6523912571573007e-05, "loss": 0.4851, "step": 4738 }, { "epoch": 0.5766960754487375, "grad_norm": 1.7351619005203247, "learning_rate": 1.6522434050932466e-05, "loss": 0.4468, "step": 4739 }, { "epoch": 0.5768177669607545, "grad_norm": 0.5777702927589417, "learning_rate": 1.6520955282099803e-05, "loss": 0.45, "step": 4740 }, { "epoch": 0.5769394584727715, "grad_norm": 1.0006364583969116, "learning_rate": 1.6519476265131302e-05, "loss": 0.4369, "step": 4741 }, { "epoch": 0.5770611499847885, "grad_norm": 1.83672034740448, "learning_rate": 1.6517997000083228e-05, "loss": 0.4153, "step": 4742 }, { "epoch": 0.5771828414968055, "grad_norm": 0.8941745758056641, "learning_rate": 1.651651748701188e-05, "loss": 0.4614, "step": 4743 }, { "epoch": 0.5773045330088227, "grad_norm": 0.72663813829422, "learning_rate": 1.651503772597355e-05, "loss": 0.4476, "step": 4744 }, { "epoch": 0.5774262245208397, "grad_norm": 1.300182819366455, "learning_rate": 1.651355771702455e-05, "loss": 0.429, "step": 4745 }, { "epoch": 0.5775479160328567, "grad_norm": 2.005625009536743, "learning_rate": 1.65120774602212e-05, "loss": 0.4519, "step": 4746 }, { "epoch": 0.5776696075448737, "grad_norm": 0.5779674649238586, "learning_rate": 1.651059695561982e-05, "loss": 0.4911, "step": 4747 }, { "epoch": 0.5777912990568908, "grad_norm": 4.288958549499512, "learning_rate": 1.650911620327675e-05, "loss": 0.3728, "step": 4748 }, { "epoch": 0.5779129905689078, "grad_norm": 0.7756676077842712, "learning_rate": 1.6507635203248334e-05, "loss": 0.4317, "step": 4749 }, { "epoch": 0.5780346820809249, "grad_norm": 0.5822075009346008, "learning_rate": 1.6506153955590932e-05, "loss": 0.4457, "step": 4750 }, { "epoch": 0.5781563735929419, "grad_norm": 1.170448660850525, "learning_rate": 1.6504672460360907e-05, "loss": 0.4572, "step": 4751 }, { "epoch": 0.5782780651049589, "grad_norm": 1.5626784563064575, "learning_rate": 1.6503190717614628e-05, "loss": 0.4258, "step": 4752 }, { "epoch": 0.578399756616976, "grad_norm": 1.3395341634750366, "learning_rate": 1.650170872740848e-05, "loss": 0.4338, "step": 4753 }, { "epoch": 0.578521448128993, "grad_norm": 3.614774227142334, "learning_rate": 1.6500226489798858e-05, "loss": 0.5023, "step": 4754 }, { "epoch": 0.57864313964101, "grad_norm": 3.1597650051116943, "learning_rate": 1.649874400484216e-05, "loss": 0.4769, "step": 4755 }, { "epoch": 0.5787648311530271, "grad_norm": 0.8550236225128174, "learning_rate": 1.6497261272594808e-05, "loss": 0.4298, "step": 4756 }, { "epoch": 0.5788865226650441, "grad_norm": 1.9913235902786255, "learning_rate": 1.649577829311321e-05, "loss": 0.5268, "step": 4757 }, { "epoch": 0.5790082141770612, "grad_norm": 0.6691299080848694, "learning_rate": 1.6494295066453806e-05, "loss": 0.481, "step": 4758 }, { "epoch": 0.5791299056890782, "grad_norm": 4.586913108825684, "learning_rate": 1.649281159267303e-05, "loss": 0.4638, "step": 4759 }, { "epoch": 0.5792515972010952, "grad_norm": 3.3829920291900635, "learning_rate": 1.6491327871827333e-05, "loss": 0.4657, "step": 4760 }, { "epoch": 0.5793732887131122, "grad_norm": 3.0412375926971436, "learning_rate": 1.648984390397318e-05, "loss": 0.4837, "step": 4761 }, { "epoch": 0.5794949802251294, "grad_norm": 1.7043672800064087, "learning_rate": 1.6488359689167027e-05, "loss": 0.4748, "step": 4762 }, { "epoch": 0.5796166717371464, "grad_norm": 3.3852155208587646, "learning_rate": 1.6486875227465356e-05, "loss": 0.4383, "step": 4763 }, { "epoch": 0.5797383632491634, "grad_norm": 3.5761141777038574, "learning_rate": 1.648539051892466e-05, "loss": 0.4572, "step": 4764 }, { "epoch": 0.5798600547611804, "grad_norm": 1.1287291049957275, "learning_rate": 1.648390556360143e-05, "loss": 0.4512, "step": 4765 }, { "epoch": 0.5799817462731974, "grad_norm": 0.5648757815361023, "learning_rate": 1.6482420361552173e-05, "loss": 0.4424, "step": 4766 }, { "epoch": 0.5801034377852144, "grad_norm": 3.5339434146881104, "learning_rate": 1.64809349128334e-05, "loss": 0.5179, "step": 4767 }, { "epoch": 0.5802251292972315, "grad_norm": 2.2196714878082275, "learning_rate": 1.6479449217501643e-05, "loss": 0.4842, "step": 4768 }, { "epoch": 0.5803468208092486, "grad_norm": 3.4961092472076416, "learning_rate": 1.647796327561343e-05, "loss": 0.4792, "step": 4769 }, { "epoch": 0.5804685123212656, "grad_norm": 3.0751497745513916, "learning_rate": 1.6476477087225306e-05, "loss": 0.4604, "step": 4770 }, { "epoch": 0.5805902038332826, "grad_norm": 2.4273838996887207, "learning_rate": 1.6474990652393822e-05, "loss": 0.4588, "step": 4771 }, { "epoch": 0.5807118953452997, "grad_norm": 1.406319499015808, "learning_rate": 1.647350397117554e-05, "loss": 0.4464, "step": 4772 }, { "epoch": 0.5808335868573167, "grad_norm": 0.7434306144714355, "learning_rate": 1.647201704362704e-05, "loss": 0.4546, "step": 4773 }, { "epoch": 0.5809552783693337, "grad_norm": 0.7493419647216797, "learning_rate": 1.6470529869804886e-05, "loss": 0.4255, "step": 4774 }, { "epoch": 0.5810769698813508, "grad_norm": 0.9410748481750488, "learning_rate": 1.6469042449765682e-05, "loss": 0.5143, "step": 4775 }, { "epoch": 0.5811986613933678, "grad_norm": 1.2688018083572388, "learning_rate": 1.646755478356602e-05, "loss": 0.4925, "step": 4776 }, { "epoch": 0.5813203529053849, "grad_norm": 2.6639444828033447, "learning_rate": 1.6466066871262513e-05, "loss": 0.4878, "step": 4777 }, { "epoch": 0.5814420444174019, "grad_norm": 1.3994344472885132, "learning_rate": 1.6464578712911776e-05, "loss": 0.4807, "step": 4778 }, { "epoch": 0.5815637359294189, "grad_norm": 2.3861780166625977, "learning_rate": 1.646309030857044e-05, "loss": 0.4415, "step": 4779 }, { "epoch": 0.5816854274414359, "grad_norm": 1.6318391561508179, "learning_rate": 1.646160165829514e-05, "loss": 0.5171, "step": 4780 }, { "epoch": 0.581807118953453, "grad_norm": 0.8817933797836304, "learning_rate": 1.646011276214252e-05, "loss": 0.4937, "step": 4781 }, { "epoch": 0.5819288104654701, "grad_norm": 2.3189210891723633, "learning_rate": 1.6458623620169238e-05, "loss": 0.4736, "step": 4782 }, { "epoch": 0.5820505019774871, "grad_norm": 1.1711735725402832, "learning_rate": 1.6457134232431958e-05, "loss": 0.4882, "step": 4783 }, { "epoch": 0.5821721934895041, "grad_norm": 0.8435712456703186, "learning_rate": 1.645564459898736e-05, "loss": 0.4444, "step": 4784 }, { "epoch": 0.5822938850015211, "grad_norm": 0.9287135004997253, "learning_rate": 1.645415471989212e-05, "loss": 0.4782, "step": 4785 }, { "epoch": 0.5824155765135381, "grad_norm": 1.105912446975708, "learning_rate": 1.645266459520293e-05, "loss": 0.4619, "step": 4786 }, { "epoch": 0.5825372680255552, "grad_norm": 2.156761646270752, "learning_rate": 1.64511742249765e-05, "loss": 0.4537, "step": 4787 }, { "epoch": 0.5826589595375723, "grad_norm": 3.292728900909424, "learning_rate": 1.6449683609269535e-05, "loss": 0.4982, "step": 4788 }, { "epoch": 0.5827806510495893, "grad_norm": 1.987899661064148, "learning_rate": 1.644819274813876e-05, "loss": 0.4016, "step": 4789 }, { "epoch": 0.5829023425616063, "grad_norm": 1.496383786201477, "learning_rate": 1.6446701641640904e-05, "loss": 0.4981, "step": 4790 }, { "epoch": 0.5830240340736234, "grad_norm": 1.2411843538284302, "learning_rate": 1.6445210289832706e-05, "loss": 0.542, "step": 4791 }, { "epoch": 0.5831457255856404, "grad_norm": 0.8396580219268799, "learning_rate": 1.6443718692770916e-05, "loss": 0.4655, "step": 4792 }, { "epoch": 0.5832674170976574, "grad_norm": 1.2516398429870605, "learning_rate": 1.6442226850512292e-05, "loss": 0.4523, "step": 4793 }, { "epoch": 0.5833891086096745, "grad_norm": 2.404691696166992, "learning_rate": 1.6440734763113598e-05, "loss": 0.4642, "step": 4794 }, { "epoch": 0.5835108001216915, "grad_norm": 1.21297287940979, "learning_rate": 1.643924243063162e-05, "loss": 0.48, "step": 4795 }, { "epoch": 0.5836324916337086, "grad_norm": 2.2260003089904785, "learning_rate": 1.643774985312314e-05, "loss": 0.5069, "step": 4796 }, { "epoch": 0.5837541831457256, "grad_norm": 1.9143424034118652, "learning_rate": 1.643625703064495e-05, "loss": 0.4693, "step": 4797 }, { "epoch": 0.5838758746577426, "grad_norm": 1.9398530721664429, "learning_rate": 1.6434763963253856e-05, "loss": 0.4186, "step": 4798 }, { "epoch": 0.5839975661697596, "grad_norm": 2.0223612785339355, "learning_rate": 1.6433270651006676e-05, "loss": 0.4722, "step": 4799 }, { "epoch": 0.5841192576817767, "grad_norm": 0.7397261261940002, "learning_rate": 1.6431777093960228e-05, "loss": 0.4237, "step": 4800 }, { "epoch": 0.5842409491937938, "grad_norm": 2.930478572845459, "learning_rate": 1.643028329217135e-05, "loss": 0.5151, "step": 4801 }, { "epoch": 0.5843626407058108, "grad_norm": 1.4000325202941895, "learning_rate": 1.6428789245696886e-05, "loss": 0.4847, "step": 4802 }, { "epoch": 0.5844843322178278, "grad_norm": 0.560815155506134, "learning_rate": 1.6427294954593685e-05, "loss": 0.4272, "step": 4803 }, { "epoch": 0.5846060237298448, "grad_norm": 0.6566979885101318, "learning_rate": 1.6425800418918604e-05, "loss": 0.4515, "step": 4804 }, { "epoch": 0.5847277152418618, "grad_norm": 2.36249041557312, "learning_rate": 1.6424305638728516e-05, "loss": 0.3983, "step": 4805 }, { "epoch": 0.5848494067538789, "grad_norm": 0.9769846796989441, "learning_rate": 1.64228106140803e-05, "loss": 0.4773, "step": 4806 }, { "epoch": 0.584971098265896, "grad_norm": 0.8010731339454651, "learning_rate": 1.642131534503085e-05, "loss": 0.5231, "step": 4807 }, { "epoch": 0.585092789777913, "grad_norm": 2.609766960144043, "learning_rate": 1.641981983163706e-05, "loss": 0.4453, "step": 4808 }, { "epoch": 0.58521448128993, "grad_norm": 1.526746153831482, "learning_rate": 1.6418324073955833e-05, "loss": 0.5037, "step": 4809 }, { "epoch": 0.585336172801947, "grad_norm": 2.1547367572784424, "learning_rate": 1.641682807204409e-05, "loss": 0.452, "step": 4810 }, { "epoch": 0.5854578643139641, "grad_norm": 1.9977123737335205, "learning_rate": 1.6415331825958757e-05, "loss": 0.4689, "step": 4811 }, { "epoch": 0.5855795558259811, "grad_norm": 1.8976426124572754, "learning_rate": 1.641383533575677e-05, "loss": 0.4885, "step": 4812 }, { "epoch": 0.5857012473379982, "grad_norm": 1.007218360900879, "learning_rate": 1.6412338601495073e-05, "loss": 0.4817, "step": 4813 }, { "epoch": 0.5858229388500152, "grad_norm": 0.6481262445449829, "learning_rate": 1.641084162323062e-05, "loss": 0.4444, "step": 4814 }, { "epoch": 0.5859446303620323, "grad_norm": 2.1297457218170166, "learning_rate": 1.6409344401020372e-05, "loss": 0.5232, "step": 4815 }, { "epoch": 0.5860663218740493, "grad_norm": 2.920334815979004, "learning_rate": 1.6407846934921304e-05, "loss": 0.5061, "step": 4816 }, { "epoch": 0.5861880133860663, "grad_norm": 1.4199961423873901, "learning_rate": 1.6406349224990396e-05, "loss": 0.4438, "step": 4817 }, { "epoch": 0.5863097048980833, "grad_norm": 2.7833731174468994, "learning_rate": 1.6404851271284638e-05, "loss": 0.544, "step": 4818 }, { "epoch": 0.5864313964101004, "grad_norm": 0.976802408695221, "learning_rate": 1.6403353073861037e-05, "loss": 0.4999, "step": 4819 }, { "epoch": 0.5865530879221175, "grad_norm": 1.2913380861282349, "learning_rate": 1.6401854632776594e-05, "loss": 0.5261, "step": 4820 }, { "epoch": 0.5866747794341345, "grad_norm": 1.2182037830352783, "learning_rate": 1.6400355948088328e-05, "loss": 0.4645, "step": 4821 }, { "epoch": 0.5867964709461515, "grad_norm": 0.6297833323478699, "learning_rate": 1.6398857019853272e-05, "loss": 0.4733, "step": 4822 }, { "epoch": 0.5869181624581685, "grad_norm": 1.6896119117736816, "learning_rate": 1.6397357848128465e-05, "loss": 0.4369, "step": 4823 }, { "epoch": 0.5870398539701855, "grad_norm": 2.1172473430633545, "learning_rate": 1.639585843297095e-05, "loss": 0.5097, "step": 4824 }, { "epoch": 0.5871615454822026, "grad_norm": 0.5329152941703796, "learning_rate": 1.639435877443778e-05, "loss": 0.4364, "step": 4825 }, { "epoch": 0.5872832369942197, "grad_norm": 0.8318140506744385, "learning_rate": 1.6392858872586022e-05, "loss": 0.471, "step": 4826 }, { "epoch": 0.5874049285062367, "grad_norm": 1.0805152654647827, "learning_rate": 1.6391358727472755e-05, "loss": 0.4668, "step": 4827 }, { "epoch": 0.5875266200182537, "grad_norm": 1.5350522994995117, "learning_rate": 1.6389858339155057e-05, "loss": 0.4386, "step": 4828 }, { "epoch": 0.5876483115302708, "grad_norm": 2.1716132164001465, "learning_rate": 1.638835770769002e-05, "loss": 0.5089, "step": 4829 }, { "epoch": 0.5877700030422878, "grad_norm": 0.7079111337661743, "learning_rate": 1.6386856833134753e-05, "loss": 0.4696, "step": 4830 }, { "epoch": 0.5878916945543048, "grad_norm": 0.7532975673675537, "learning_rate": 1.6385355715546364e-05, "loss": 0.4632, "step": 4831 }, { "epoch": 0.5880133860663219, "grad_norm": 0.8625035881996155, "learning_rate": 1.6383854354981972e-05, "loss": 0.4494, "step": 4832 }, { "epoch": 0.5881350775783389, "grad_norm": 0.8266791701316833, "learning_rate": 1.6382352751498706e-05, "loss": 0.4651, "step": 4833 }, { "epoch": 0.588256769090356, "grad_norm": 1.7575665712356567, "learning_rate": 1.6380850905153706e-05, "loss": 0.4565, "step": 4834 }, { "epoch": 0.588378460602373, "grad_norm": 1.365768313407898, "learning_rate": 1.6379348816004127e-05, "loss": 0.4584, "step": 4835 }, { "epoch": 0.58850015211439, "grad_norm": 0.6995435357093811, "learning_rate": 1.6377846484107116e-05, "loss": 0.4273, "step": 4836 }, { "epoch": 0.588621843626407, "grad_norm": 1.1196446418762207, "learning_rate": 1.637634390951985e-05, "loss": 0.4104, "step": 4837 }, { "epoch": 0.5887435351384241, "grad_norm": 1.402665615081787, "learning_rate": 1.6374841092299493e-05, "loss": 0.4343, "step": 4838 }, { "epoch": 0.5888652266504412, "grad_norm": 2.1505250930786133, "learning_rate": 1.637333803250324e-05, "loss": 0.4644, "step": 4839 }, { "epoch": 0.5889869181624582, "grad_norm": 3.135927438735962, "learning_rate": 1.6371834730188284e-05, "loss": 0.5546, "step": 4840 }, { "epoch": 0.5891086096744752, "grad_norm": 0.7046291828155518, "learning_rate": 1.637033118541183e-05, "loss": 0.4591, "step": 4841 }, { "epoch": 0.5892303011864922, "grad_norm": 1.194037914276123, "learning_rate": 1.6368827398231085e-05, "loss": 0.5576, "step": 4842 }, { "epoch": 0.5893519926985092, "grad_norm": 1.2430156469345093, "learning_rate": 1.6367323368703275e-05, "loss": 0.5076, "step": 4843 }, { "epoch": 0.5894736842105263, "grad_norm": 0.9934839606285095, "learning_rate": 1.6365819096885635e-05, "loss": 0.4939, "step": 4844 }, { "epoch": 0.5895953757225434, "grad_norm": 3.0190505981445312, "learning_rate": 1.6364314582835395e-05, "loss": 0.4703, "step": 4845 }, { "epoch": 0.5897170672345604, "grad_norm": 1.2759708166122437, "learning_rate": 1.6362809826609817e-05, "loss": 0.4738, "step": 4846 }, { "epoch": 0.5898387587465774, "grad_norm": 3.3026254177093506, "learning_rate": 1.6361304828266153e-05, "loss": 0.469, "step": 4847 }, { "epoch": 0.5899604502585944, "grad_norm": 2.642491340637207, "learning_rate": 1.6359799587861675e-05, "loss": 0.4496, "step": 4848 }, { "epoch": 0.5900821417706115, "grad_norm": 0.5912262797355652, "learning_rate": 1.6358294105453656e-05, "loss": 0.4696, "step": 4849 }, { "epoch": 0.5902038332826285, "grad_norm": 1.8558549880981445, "learning_rate": 1.6356788381099384e-05, "loss": 0.4912, "step": 4850 }, { "epoch": 0.5903255247946456, "grad_norm": 2.138456344604492, "learning_rate": 1.6355282414856158e-05, "loss": 0.5067, "step": 4851 }, { "epoch": 0.5904472163066626, "grad_norm": 3.22438645362854, "learning_rate": 1.635377620678128e-05, "loss": 0.3721, "step": 4852 }, { "epoch": 0.5905689078186797, "grad_norm": 1.7777165174484253, "learning_rate": 1.6352269756932072e-05, "loss": 0.4624, "step": 4853 }, { "epoch": 0.5906905993306967, "grad_norm": 3.3200674057006836, "learning_rate": 1.6350763065365846e-05, "loss": 0.4839, "step": 4854 }, { "epoch": 0.5908122908427137, "grad_norm": 1.2257426977157593, "learning_rate": 1.634925613213994e-05, "loss": 0.4666, "step": 4855 }, { "epoch": 0.5909339823547307, "grad_norm": 1.2461884021759033, "learning_rate": 1.6347748957311698e-05, "loss": 0.4759, "step": 4856 }, { "epoch": 0.5910556738667478, "grad_norm": 3.076406240463257, "learning_rate": 1.6346241540938467e-05, "loss": 0.4267, "step": 4857 }, { "epoch": 0.5911773653787649, "grad_norm": 0.7298029065132141, "learning_rate": 1.634473388307761e-05, "loss": 0.4986, "step": 4858 }, { "epoch": 0.5912990568907819, "grad_norm": 4.680472373962402, "learning_rate": 1.6343225983786496e-05, "loss": 0.4754, "step": 4859 }, { "epoch": 0.5914207484027989, "grad_norm": 4.681982517242432, "learning_rate": 1.6341717843122507e-05, "loss": 0.4279, "step": 4860 }, { "epoch": 0.5915424399148159, "grad_norm": 1.4455347061157227, "learning_rate": 1.6340209461143023e-05, "loss": 0.4768, "step": 4861 }, { "epoch": 0.5916641314268329, "grad_norm": 2.154365301132202, "learning_rate": 1.6338700837905446e-05, "loss": 0.4205, "step": 4862 }, { "epoch": 0.5917858229388501, "grad_norm": 0.7109108567237854, "learning_rate": 1.6337191973467182e-05, "loss": 0.4549, "step": 4863 }, { "epoch": 0.5919075144508671, "grad_norm": 1.5381110906600952, "learning_rate": 1.6335682867885646e-05, "loss": 0.5015, "step": 4864 }, { "epoch": 0.5920292059628841, "grad_norm": 2.0715560913085938, "learning_rate": 1.633417352121826e-05, "loss": 0.3974, "step": 4865 }, { "epoch": 0.5921508974749011, "grad_norm": 1.9576348066329956, "learning_rate": 1.6332663933522468e-05, "loss": 0.4673, "step": 4866 }, { "epoch": 0.5922725889869181, "grad_norm": 0.580463171005249, "learning_rate": 1.63311541048557e-05, "loss": 0.3903, "step": 4867 }, { "epoch": 0.5923942804989352, "grad_norm": 4.284531116485596, "learning_rate": 1.6329644035275416e-05, "loss": 0.5127, "step": 4868 }, { "epoch": 0.5925159720109522, "grad_norm": 1.575125813484192, "learning_rate": 1.6328133724839074e-05, "loss": 0.4162, "step": 4869 }, { "epoch": 0.5926376635229693, "grad_norm": 0.6911008358001709, "learning_rate": 1.6326623173604143e-05, "loss": 0.4181, "step": 4870 }, { "epoch": 0.5927593550349863, "grad_norm": 2.712672233581543, "learning_rate": 1.6325112381628106e-05, "loss": 0.4471, "step": 4871 }, { "epoch": 0.5928810465470034, "grad_norm": 4.300261974334717, "learning_rate": 1.6323601348968454e-05, "loss": 0.5571, "step": 4872 }, { "epoch": 0.5930027380590204, "grad_norm": 1.1900861263275146, "learning_rate": 1.632209007568268e-05, "loss": 0.4998, "step": 4873 }, { "epoch": 0.5931244295710374, "grad_norm": 1.2653896808624268, "learning_rate": 1.6320578561828288e-05, "loss": 0.4469, "step": 4874 }, { "epoch": 0.5932461210830544, "grad_norm": 1.5442241430282593, "learning_rate": 1.63190668074628e-05, "loss": 0.443, "step": 4875 }, { "epoch": 0.5933678125950715, "grad_norm": 1.3200793266296387, "learning_rate": 1.6317554812643744e-05, "loss": 0.4788, "step": 4876 }, { "epoch": 0.5934895041070886, "grad_norm": 2.5573348999023438, "learning_rate": 1.631604257742865e-05, "loss": 0.4422, "step": 4877 }, { "epoch": 0.5936111956191056, "grad_norm": 2.103461503982544, "learning_rate": 1.631453010187506e-05, "loss": 0.4654, "step": 4878 }, { "epoch": 0.5937328871311226, "grad_norm": 0.6830573678016663, "learning_rate": 1.6313017386040532e-05, "loss": 0.4731, "step": 4879 }, { "epoch": 0.5938545786431396, "grad_norm": 1.3459645509719849, "learning_rate": 1.6311504429982624e-05, "loss": 0.4194, "step": 4880 }, { "epoch": 0.5939762701551566, "grad_norm": 1.379117727279663, "learning_rate": 1.6309991233758908e-05, "loss": 0.4661, "step": 4881 }, { "epoch": 0.5940979616671738, "grad_norm": 4.034727573394775, "learning_rate": 1.6308477797426966e-05, "loss": 0.5149, "step": 4882 }, { "epoch": 0.5942196531791908, "grad_norm": 1.3604557514190674, "learning_rate": 1.6306964121044386e-05, "loss": 0.4949, "step": 4883 }, { "epoch": 0.5943413446912078, "grad_norm": 0.6975970268249512, "learning_rate": 1.6305450204668766e-05, "loss": 0.3993, "step": 4884 }, { "epoch": 0.5944630362032248, "grad_norm": 0.853491485118866, "learning_rate": 1.630393604835771e-05, "loss": 0.4374, "step": 4885 }, { "epoch": 0.5945847277152418, "grad_norm": 1.17313551902771, "learning_rate": 1.6302421652168846e-05, "loss": 0.4897, "step": 4886 }, { "epoch": 0.5947064192272589, "grad_norm": 1.399959683418274, "learning_rate": 1.6300907016159787e-05, "loss": 0.5056, "step": 4887 }, { "epoch": 0.5948281107392759, "grad_norm": 1.9173386096954346, "learning_rate": 1.6299392140388176e-05, "loss": 0.5131, "step": 4888 }, { "epoch": 0.594949802251293, "grad_norm": 2.126542091369629, "learning_rate": 1.6297877024911658e-05, "loss": 0.4163, "step": 4889 }, { "epoch": 0.59507149376331, "grad_norm": 1.7467695474624634, "learning_rate": 1.629636166978788e-05, "loss": 0.5125, "step": 4890 }, { "epoch": 0.595193185275327, "grad_norm": 1.3654292821884155, "learning_rate": 1.629484607507451e-05, "loss": 0.4825, "step": 4891 }, { "epoch": 0.5953148767873441, "grad_norm": 0.8388196229934692, "learning_rate": 1.6293330240829215e-05, "loss": 0.4478, "step": 4892 }, { "epoch": 0.5954365682993611, "grad_norm": 4.521686553955078, "learning_rate": 1.6291814167109677e-05, "loss": 0.4196, "step": 4893 }, { "epoch": 0.5955582598113781, "grad_norm": 1.1228781938552856, "learning_rate": 1.629029785397359e-05, "loss": 0.4497, "step": 4894 }, { "epoch": 0.5956799513233952, "grad_norm": 1.361053466796875, "learning_rate": 1.6288781301478647e-05, "loss": 0.5548, "step": 4895 }, { "epoch": 0.5958016428354123, "grad_norm": 2.383488178253174, "learning_rate": 1.6287264509682558e-05, "loss": 0.4392, "step": 4896 }, { "epoch": 0.5959233343474293, "grad_norm": 0.8069707751274109, "learning_rate": 1.6285747478643042e-05, "loss": 0.4718, "step": 4897 }, { "epoch": 0.5960450258594463, "grad_norm": 0.800216794013977, "learning_rate": 1.628423020841782e-05, "loss": 0.47, "step": 4898 }, { "epoch": 0.5961667173714633, "grad_norm": 2.162963390350342, "learning_rate": 1.628271269906464e-05, "loss": 0.4975, "step": 4899 }, { "epoch": 0.5962884088834803, "grad_norm": 2.524778366088867, "learning_rate": 1.6281194950641227e-05, "loss": 0.4764, "step": 4900 }, { "epoch": 0.5964101003954975, "grad_norm": 0.7725741267204285, "learning_rate": 1.627967696320535e-05, "loss": 0.4834, "step": 4901 }, { "epoch": 0.5965317919075145, "grad_norm": 0.7840932011604309, "learning_rate": 1.6278158736814765e-05, "loss": 0.527, "step": 4902 }, { "epoch": 0.5966534834195315, "grad_norm": 1.531099796295166, "learning_rate": 1.6276640271527245e-05, "loss": 0.4972, "step": 4903 }, { "epoch": 0.5967751749315485, "grad_norm": 2.037529706954956, "learning_rate": 1.6275121567400575e-05, "loss": 0.4906, "step": 4904 }, { "epoch": 0.5968968664435655, "grad_norm": 1.8573511838912964, "learning_rate": 1.627360262449253e-05, "loss": 0.4649, "step": 4905 }, { "epoch": 0.5970185579555826, "grad_norm": 2.257289409637451, "learning_rate": 1.627208344286093e-05, "loss": 0.4452, "step": 4906 }, { "epoch": 0.5971402494675996, "grad_norm": 0.8152996897697449, "learning_rate": 1.627056402256357e-05, "loss": 0.4529, "step": 4907 }, { "epoch": 0.5972619409796167, "grad_norm": 0.8906998634338379, "learning_rate": 1.6269044363658268e-05, "loss": 0.4517, "step": 4908 }, { "epoch": 0.5973836324916337, "grad_norm": 1.3469293117523193, "learning_rate": 1.626752446620285e-05, "loss": 0.4754, "step": 4909 }, { "epoch": 0.5975053240036508, "grad_norm": 4.20992374420166, "learning_rate": 1.6266004330255155e-05, "loss": 0.5106, "step": 4910 }, { "epoch": 0.5976270155156678, "grad_norm": 4.386756896972656, "learning_rate": 1.6264483955873025e-05, "loss": 0.5171, "step": 4911 }, { "epoch": 0.5977487070276848, "grad_norm": 3.6281614303588867, "learning_rate": 1.6262963343114316e-05, "loss": 0.5142, "step": 4912 }, { "epoch": 0.5978703985397018, "grad_norm": 4.057873249053955, "learning_rate": 1.6261442492036887e-05, "loss": 0.5312, "step": 4913 }, { "epoch": 0.5979920900517189, "grad_norm": 1.9563565254211426, "learning_rate": 1.625992140269861e-05, "loss": 0.5086, "step": 4914 }, { "epoch": 0.598113781563736, "grad_norm": 1.8507587909698486, "learning_rate": 1.6258400075157362e-05, "loss": 0.4166, "step": 4915 }, { "epoch": 0.598235473075753, "grad_norm": 0.7084296345710754, "learning_rate": 1.6256878509471043e-05, "loss": 0.4427, "step": 4916 }, { "epoch": 0.59835716458777, "grad_norm": 0.6602417230606079, "learning_rate": 1.6255356705697548e-05, "loss": 0.4912, "step": 4917 }, { "epoch": 0.598478856099787, "grad_norm": 0.734244704246521, "learning_rate": 1.6253834663894778e-05, "loss": 0.4978, "step": 4918 }, { "epoch": 0.598600547611804, "grad_norm": 2.2112808227539062, "learning_rate": 1.6252312384120652e-05, "loss": 0.4609, "step": 4919 }, { "epoch": 0.5987222391238212, "grad_norm": 2.419268846511841, "learning_rate": 1.6250789866433102e-05, "loss": 0.48, "step": 4920 }, { "epoch": 0.5988439306358382, "grad_norm": 1.058548927307129, "learning_rate": 1.6249267110890057e-05, "loss": 0.4747, "step": 4921 }, { "epoch": 0.5989656221478552, "grad_norm": 0.5447883009910583, "learning_rate": 1.624774411754946e-05, "loss": 0.4808, "step": 4922 }, { "epoch": 0.5990873136598722, "grad_norm": 0.8803136944770813, "learning_rate": 1.624622088646928e-05, "loss": 0.4378, "step": 4923 }, { "epoch": 0.5992090051718892, "grad_norm": 0.6201842427253723, "learning_rate": 1.6244697417707453e-05, "loss": 0.4355, "step": 4924 }, { "epoch": 0.5993306966839063, "grad_norm": 0.7090044617652893, "learning_rate": 1.624317371132197e-05, "loss": 0.3923, "step": 4925 }, { "epoch": 0.5994523881959233, "grad_norm": 6.064487934112549, "learning_rate": 1.62416497673708e-05, "loss": 0.5719, "step": 4926 }, { "epoch": 0.5995740797079404, "grad_norm": 4.118100643157959, "learning_rate": 1.624012558591194e-05, "loss": 0.4988, "step": 4927 }, { "epoch": 0.5996957712199574, "grad_norm": 1.6848117113113403, "learning_rate": 1.6238601167003384e-05, "loss": 0.4782, "step": 4928 }, { "epoch": 0.5998174627319744, "grad_norm": 1.9276460409164429, "learning_rate": 1.6237076510703142e-05, "loss": 0.4817, "step": 4929 }, { "epoch": 0.5999391542439915, "grad_norm": 3.334984302520752, "learning_rate": 1.6235551617069228e-05, "loss": 0.5285, "step": 4930 }, { "epoch": 0.6000608457560085, "grad_norm": 1.0082061290740967, "learning_rate": 1.6234026486159668e-05, "loss": 0.4259, "step": 4931 }, { "epoch": 0.6001825372680255, "grad_norm": 2.297333002090454, "learning_rate": 1.6232501118032496e-05, "loss": 0.5576, "step": 4932 }, { "epoch": 0.6003042287800426, "grad_norm": 1.1869359016418457, "learning_rate": 1.6230975512745756e-05, "loss": 0.4634, "step": 4933 }, { "epoch": 0.6004259202920597, "grad_norm": 1.9856035709381104, "learning_rate": 1.6229449670357502e-05, "loss": 0.4876, "step": 4934 }, { "epoch": 0.6005476118040767, "grad_norm": 0.6499016880989075, "learning_rate": 1.6227923590925794e-05, "loss": 0.4823, "step": 4935 }, { "epoch": 0.6006693033160937, "grad_norm": 2.8664772510528564, "learning_rate": 1.6226397274508697e-05, "loss": 0.4258, "step": 4936 }, { "epoch": 0.6007909948281107, "grad_norm": 2.8016884326934814, "learning_rate": 1.6224870721164304e-05, "loss": 0.448, "step": 4937 }, { "epoch": 0.6009126863401277, "grad_norm": 0.6614217758178711, "learning_rate": 1.622334393095069e-05, "loss": 0.4562, "step": 4938 }, { "epoch": 0.6010343778521449, "grad_norm": 1.4142911434173584, "learning_rate": 1.6221816903925956e-05, "loss": 0.4369, "step": 4939 }, { "epoch": 0.6011560693641619, "grad_norm": 1.9742951393127441, "learning_rate": 1.6220289640148214e-05, "loss": 0.4935, "step": 4940 }, { "epoch": 0.6012777608761789, "grad_norm": 0.5329909324645996, "learning_rate": 1.6218762139675574e-05, "loss": 0.4132, "step": 4941 }, { "epoch": 0.6013994523881959, "grad_norm": 2.0596513748168945, "learning_rate": 1.6217234402566165e-05, "loss": 0.4943, "step": 4942 }, { "epoch": 0.6015211439002129, "grad_norm": 2.7426199913024902, "learning_rate": 1.6215706428878115e-05, "loss": 0.3465, "step": 4943 }, { "epoch": 0.60164283541223, "grad_norm": 4.940963268280029, "learning_rate": 1.621417821866957e-05, "loss": 0.5164, "step": 4944 }, { "epoch": 0.601764526924247, "grad_norm": 0.9825218319892883, "learning_rate": 1.6212649771998685e-05, "loss": 0.4044, "step": 4945 }, { "epoch": 0.6018862184362641, "grad_norm": 1.3012899160385132, "learning_rate": 1.621112108892361e-05, "loss": 0.4254, "step": 4946 }, { "epoch": 0.6020079099482811, "grad_norm": 0.6120168566703796, "learning_rate": 1.6209592169502527e-05, "loss": 0.374, "step": 4947 }, { "epoch": 0.6021296014602981, "grad_norm": 2.465064764022827, "learning_rate": 1.6208063013793607e-05, "loss": 0.4842, "step": 4948 }, { "epoch": 0.6022512929723152, "grad_norm": 2.29573917388916, "learning_rate": 1.6206533621855037e-05, "loss": 0.3824, "step": 4949 }, { "epoch": 0.6023729844843322, "grad_norm": 1.0123569965362549, "learning_rate": 1.620500399374502e-05, "loss": 0.4662, "step": 4950 }, { "epoch": 0.6024946759963492, "grad_norm": 0.9911242127418518, "learning_rate": 1.6203474129521753e-05, "loss": 0.4553, "step": 4951 }, { "epoch": 0.6026163675083663, "grad_norm": 1.0947352647781372, "learning_rate": 1.620194402924346e-05, "loss": 0.4781, "step": 4952 }, { "epoch": 0.6027380590203834, "grad_norm": 2.374330759048462, "learning_rate": 1.6200413692968354e-05, "loss": 0.423, "step": 4953 }, { "epoch": 0.6028597505324004, "grad_norm": 1.1348915100097656, "learning_rate": 1.619888312075468e-05, "loss": 0.5043, "step": 4954 }, { "epoch": 0.6029814420444174, "grad_norm": 3.420475721359253, "learning_rate": 1.6197352312660664e-05, "loss": 0.4196, "step": 4955 }, { "epoch": 0.6031031335564344, "grad_norm": 4.5161824226379395, "learning_rate": 1.6195821268744566e-05, "loss": 0.3938, "step": 4956 }, { "epoch": 0.6032248250684514, "grad_norm": 1.612881064414978, "learning_rate": 1.619428998906465e-05, "loss": 0.4163, "step": 4957 }, { "epoch": 0.6033465165804686, "grad_norm": 1.0352132320404053, "learning_rate": 1.6192758473679175e-05, "loss": 0.4616, "step": 4958 }, { "epoch": 0.6034682080924856, "grad_norm": 1.2749437093734741, "learning_rate": 1.619122672264642e-05, "loss": 0.4593, "step": 4959 }, { "epoch": 0.6035898996045026, "grad_norm": 4.1626691818237305, "learning_rate": 1.6189694736024674e-05, "loss": 0.5131, "step": 4960 }, { "epoch": 0.6037115911165196, "grad_norm": 1.3867450952529907, "learning_rate": 1.6188162513872234e-05, "loss": 0.4607, "step": 4961 }, { "epoch": 0.6038332826285366, "grad_norm": 1.3090866804122925, "learning_rate": 1.61866300562474e-05, "loss": 0.4699, "step": 4962 }, { "epoch": 0.6039549741405537, "grad_norm": 2.3500983715057373, "learning_rate": 1.6185097363208487e-05, "loss": 0.484, "step": 4963 }, { "epoch": 0.6040766656525708, "grad_norm": 2.8437275886535645, "learning_rate": 1.6183564434813813e-05, "loss": 0.5235, "step": 4964 }, { "epoch": 0.6041983571645878, "grad_norm": 2.4429731369018555, "learning_rate": 1.6182031271121718e-05, "loss": 0.3938, "step": 4965 }, { "epoch": 0.6043200486766048, "grad_norm": 1.2975447177886963, "learning_rate": 1.6180497872190534e-05, "loss": 0.468, "step": 4966 }, { "epoch": 0.6044417401886218, "grad_norm": 0.9424712657928467, "learning_rate": 1.6178964238078617e-05, "loss": 0.4977, "step": 4967 }, { "epoch": 0.6045634317006389, "grad_norm": 4.481785774230957, "learning_rate": 1.6177430368844316e-05, "loss": 0.441, "step": 4968 }, { "epoch": 0.6046851232126559, "grad_norm": 1.465193271636963, "learning_rate": 1.6175896264546005e-05, "loss": 0.4188, "step": 4969 }, { "epoch": 0.6048068147246729, "grad_norm": 0.6886785626411438, "learning_rate": 1.617436192524206e-05, "loss": 0.4769, "step": 4970 }, { "epoch": 0.60492850623669, "grad_norm": 0.572315514087677, "learning_rate": 1.617282735099086e-05, "loss": 0.4988, "step": 4971 }, { "epoch": 0.605050197748707, "grad_norm": 0.5896018147468567, "learning_rate": 1.6171292541850805e-05, "loss": 0.4711, "step": 4972 }, { "epoch": 0.6051718892607241, "grad_norm": 0.6635491251945496, "learning_rate": 1.6169757497880293e-05, "loss": 0.4336, "step": 4973 }, { "epoch": 0.6052935807727411, "grad_norm": 0.7088550925254822, "learning_rate": 1.6168222219137736e-05, "loss": 0.4356, "step": 4974 }, { "epoch": 0.6054152722847581, "grad_norm": 2.279660224914551, "learning_rate": 1.616668670568156e-05, "loss": 0.4754, "step": 4975 }, { "epoch": 0.6055369637967751, "grad_norm": 1.945552945137024, "learning_rate": 1.6165150957570187e-05, "loss": 0.4438, "step": 4976 }, { "epoch": 0.6056586553087923, "grad_norm": 1.1030707359313965, "learning_rate": 1.616361497486206e-05, "loss": 0.4702, "step": 4977 }, { "epoch": 0.6057803468208093, "grad_norm": 0.756426990032196, "learning_rate": 1.616207875761563e-05, "loss": 0.4668, "step": 4978 }, { "epoch": 0.6059020383328263, "grad_norm": 0.6813930869102478, "learning_rate": 1.616054230588934e-05, "loss": 0.4458, "step": 4979 }, { "epoch": 0.6060237298448433, "grad_norm": 0.7297816872596741, "learning_rate": 1.6159005619741667e-05, "loss": 0.4551, "step": 4980 }, { "epoch": 0.6061454213568603, "grad_norm": 1.605370044708252, "learning_rate": 1.6157468699231083e-05, "loss": 0.5052, "step": 4981 }, { "epoch": 0.6062671128688774, "grad_norm": 1.510328769683838, "learning_rate": 1.6155931544416072e-05, "loss": 0.4427, "step": 4982 }, { "epoch": 0.6063888043808945, "grad_norm": 0.8366087675094604, "learning_rate": 1.6154394155355122e-05, "loss": 0.4812, "step": 4983 }, { "epoch": 0.6065104958929115, "grad_norm": 2.1733524799346924, "learning_rate": 1.6152856532106733e-05, "loss": 0.442, "step": 4984 }, { "epoch": 0.6066321874049285, "grad_norm": 2.20855712890625, "learning_rate": 1.615131867472942e-05, "loss": 0.4537, "step": 4985 }, { "epoch": 0.6067538789169455, "grad_norm": 0.5362184643745422, "learning_rate": 1.6149780583281698e-05, "loss": 0.4579, "step": 4986 }, { "epoch": 0.6068755704289626, "grad_norm": 1.5650177001953125, "learning_rate": 1.6148242257822095e-05, "loss": 0.469, "step": 4987 }, { "epoch": 0.6069972619409796, "grad_norm": 4.210306167602539, "learning_rate": 1.614670369840915e-05, "loss": 0.5633, "step": 4988 }, { "epoch": 0.6071189534529966, "grad_norm": 1.1583614349365234, "learning_rate": 1.61451649051014e-05, "loss": 0.4377, "step": 4989 }, { "epoch": 0.6072406449650137, "grad_norm": 0.6843428015708923, "learning_rate": 1.614362587795741e-05, "loss": 0.4719, "step": 4990 }, { "epoch": 0.6073623364770308, "grad_norm": 1.592503309249878, "learning_rate": 1.614208661703574e-05, "loss": 0.4186, "step": 4991 }, { "epoch": 0.6074840279890478, "grad_norm": 0.6590865850448608, "learning_rate": 1.6140547122394957e-05, "loss": 0.4677, "step": 4992 }, { "epoch": 0.6076057195010648, "grad_norm": 1.2795705795288086, "learning_rate": 1.613900739409365e-05, "loss": 0.4689, "step": 4993 }, { "epoch": 0.6077274110130818, "grad_norm": 3.304614305496216, "learning_rate": 1.61374674321904e-05, "loss": 0.4189, "step": 4994 }, { "epoch": 0.6078491025250988, "grad_norm": 2.031473159790039, "learning_rate": 1.6135927236743814e-05, "loss": 0.496, "step": 4995 }, { "epoch": 0.607970794037116, "grad_norm": 2.451896905899048, "learning_rate": 1.6134386807812497e-05, "loss": 0.3828, "step": 4996 }, { "epoch": 0.608092485549133, "grad_norm": 0.5683725476264954, "learning_rate": 1.6132846145455064e-05, "loss": 0.4231, "step": 4997 }, { "epoch": 0.60821417706115, "grad_norm": 3.8416659832000732, "learning_rate": 1.6131305249730137e-05, "loss": 0.5397, "step": 4998 }, { "epoch": 0.608335868573167, "grad_norm": 0.7293667793273926, "learning_rate": 1.6129764120696358e-05, "loss": 0.4245, "step": 4999 }, { "epoch": 0.608457560085184, "grad_norm": 1.453599214553833, "learning_rate": 1.6128222758412365e-05, "loss": 0.4548, "step": 5000 }, { "epoch": 0.608579251597201, "grad_norm": 0.8216196298599243, "learning_rate": 1.612668116293681e-05, "loss": 0.4281, "step": 5001 }, { "epoch": 0.6087009431092182, "grad_norm": 1.4079035520553589, "learning_rate": 1.6125139334328355e-05, "loss": 0.4655, "step": 5002 }, { "epoch": 0.6088226346212352, "grad_norm": 0.7399680614471436, "learning_rate": 1.6123597272645673e-05, "loss": 0.4446, "step": 5003 }, { "epoch": 0.6089443261332522, "grad_norm": 3.4661307334899902, "learning_rate": 1.612205497794744e-05, "loss": 0.5774, "step": 5004 }, { "epoch": 0.6090660176452692, "grad_norm": 1.2089195251464844, "learning_rate": 1.612051245029234e-05, "loss": 0.4944, "step": 5005 }, { "epoch": 0.6091877091572863, "grad_norm": 1.1540513038635254, "learning_rate": 1.6118969689739072e-05, "loss": 0.4597, "step": 5006 }, { "epoch": 0.6093094006693033, "grad_norm": 0.5720807909965515, "learning_rate": 1.6117426696346345e-05, "loss": 0.5137, "step": 5007 }, { "epoch": 0.6094310921813203, "grad_norm": 4.510030746459961, "learning_rate": 1.6115883470172867e-05, "loss": 0.4496, "step": 5008 }, { "epoch": 0.6095527836933374, "grad_norm": 3.608569383621216, "learning_rate": 1.6114340011277365e-05, "loss": 0.4726, "step": 5009 }, { "epoch": 0.6096744752053544, "grad_norm": 5.447242259979248, "learning_rate": 1.6112796319718568e-05, "loss": 0.4202, "step": 5010 }, { "epoch": 0.6097961667173715, "grad_norm": 0.8850874304771423, "learning_rate": 1.6111252395555223e-05, "loss": 0.4787, "step": 5011 }, { "epoch": 0.6099178582293885, "grad_norm": 0.6327923536300659, "learning_rate": 1.610970823884607e-05, "loss": 0.4952, "step": 5012 }, { "epoch": 0.6100395497414055, "grad_norm": 0.665271520614624, "learning_rate": 1.6108163849649874e-05, "loss": 0.4684, "step": 5013 }, { "epoch": 0.6101612412534225, "grad_norm": 0.6051411032676697, "learning_rate": 1.61066192280254e-05, "loss": 0.4839, "step": 5014 }, { "epoch": 0.6102829327654397, "grad_norm": 1.5371066331863403, "learning_rate": 1.6105074374031425e-05, "loss": 0.4579, "step": 5015 }, { "epoch": 0.6104046242774567, "grad_norm": 2.597341299057007, "learning_rate": 1.6103529287726733e-05, "loss": 0.512, "step": 5016 }, { "epoch": 0.6105263157894737, "grad_norm": 1.2913267612457275, "learning_rate": 1.6101983969170117e-05, "loss": 0.4294, "step": 5017 }, { "epoch": 0.6106480073014907, "grad_norm": 3.880460500717163, "learning_rate": 1.610043841842038e-05, "loss": 0.5532, "step": 5018 }, { "epoch": 0.6107696988135077, "grad_norm": 1.3539235591888428, "learning_rate": 1.6098892635536336e-05, "loss": 0.4773, "step": 5019 }, { "epoch": 0.6108913903255248, "grad_norm": 0.8958723545074463, "learning_rate": 1.60973466205768e-05, "loss": 0.5105, "step": 5020 }, { "epoch": 0.6110130818375419, "grad_norm": 3.1031079292297363, "learning_rate": 1.609580037360061e-05, "loss": 0.5389, "step": 5021 }, { "epoch": 0.6111347733495589, "grad_norm": 2.0144450664520264, "learning_rate": 1.6094253894666595e-05, "loss": 0.4731, "step": 5022 }, { "epoch": 0.6112564648615759, "grad_norm": 3.7965126037597656, "learning_rate": 1.6092707183833605e-05, "loss": 0.4452, "step": 5023 }, { "epoch": 0.6113781563735929, "grad_norm": 2.4872331619262695, "learning_rate": 1.6091160241160492e-05, "loss": 0.4603, "step": 5024 }, { "epoch": 0.61149984788561, "grad_norm": 3.876128673553467, "learning_rate": 1.608961306670613e-05, "loss": 0.444, "step": 5025 }, { "epoch": 0.611621539397627, "grad_norm": 1.5480860471725464, "learning_rate": 1.608806566052938e-05, "loss": 0.4383, "step": 5026 }, { "epoch": 0.611743230909644, "grad_norm": 1.0099836587905884, "learning_rate": 1.6086518022689135e-05, "loss": 0.4676, "step": 5027 }, { "epoch": 0.6118649224216611, "grad_norm": 1.7212241888046265, "learning_rate": 1.608497015324428e-05, "loss": 0.4228, "step": 5028 }, { "epoch": 0.6119866139336781, "grad_norm": 0.7636945247650146, "learning_rate": 1.6083422052253713e-05, "loss": 0.448, "step": 5029 }, { "epoch": 0.6121083054456952, "grad_norm": 2.512002468109131, "learning_rate": 1.6081873719776346e-05, "loss": 0.4652, "step": 5030 }, { "epoch": 0.6122299969577122, "grad_norm": 3.997149705886841, "learning_rate": 1.6080325155871095e-05, "loss": 0.507, "step": 5031 }, { "epoch": 0.6123516884697292, "grad_norm": 4.454903602600098, "learning_rate": 1.6078776360596885e-05, "loss": 0.5244, "step": 5032 }, { "epoch": 0.6124733799817462, "grad_norm": 4.167040824890137, "learning_rate": 1.6077227334012657e-05, "loss": 0.5063, "step": 5033 }, { "epoch": 0.6125950714937634, "grad_norm": 3.052686929702759, "learning_rate": 1.6075678076177345e-05, "loss": 0.476, "step": 5034 }, { "epoch": 0.6127167630057804, "grad_norm": 1.222151279449463, "learning_rate": 1.607412858714991e-05, "loss": 0.4604, "step": 5035 }, { "epoch": 0.6128384545177974, "grad_norm": 1.5098296403884888, "learning_rate": 1.6072578866989303e-05, "loss": 0.4152, "step": 5036 }, { "epoch": 0.6129601460298144, "grad_norm": 1.3683090209960938, "learning_rate": 1.6071028915754505e-05, "loss": 0.4434, "step": 5037 }, { "epoch": 0.6130818375418314, "grad_norm": 1.541131854057312, "learning_rate": 1.6069478733504494e-05, "loss": 0.5116, "step": 5038 }, { "epoch": 0.6132035290538485, "grad_norm": 6.025382041931152, "learning_rate": 1.6067928320298247e-05, "loss": 0.4294, "step": 5039 }, { "epoch": 0.6133252205658656, "grad_norm": 1.479815125465393, "learning_rate": 1.606637767619477e-05, "loss": 0.4795, "step": 5040 }, { "epoch": 0.6134469120778826, "grad_norm": 1.080148696899414, "learning_rate": 1.6064826801253066e-05, "loss": 0.5043, "step": 5041 }, { "epoch": 0.6135686035898996, "grad_norm": 1.3082995414733887, "learning_rate": 1.606327569553215e-05, "loss": 0.431, "step": 5042 }, { "epoch": 0.6136902951019166, "grad_norm": 1.3065533638000488, "learning_rate": 1.606172435909104e-05, "loss": 0.4555, "step": 5043 }, { "epoch": 0.6138119866139337, "grad_norm": 1.8795653581619263, "learning_rate": 1.6060172791988778e-05, "loss": 0.4893, "step": 5044 }, { "epoch": 0.6139336781259507, "grad_norm": 1.1706182956695557, "learning_rate": 1.6058620994284394e-05, "loss": 0.4463, "step": 5045 }, { "epoch": 0.6140553696379678, "grad_norm": 0.9591489434242249, "learning_rate": 1.6057068966036938e-05, "loss": 0.504, "step": 5046 }, { "epoch": 0.6141770611499848, "grad_norm": 0.6447705626487732, "learning_rate": 1.6055516707305474e-05, "loss": 0.4692, "step": 5047 }, { "epoch": 0.6142987526620018, "grad_norm": 1.0074318647384644, "learning_rate": 1.605396421814906e-05, "loss": 0.491, "step": 5048 }, { "epoch": 0.6144204441740189, "grad_norm": 2.2753360271453857, "learning_rate": 1.6052411498626777e-05, "loss": 0.4424, "step": 5049 }, { "epoch": 0.6145421356860359, "grad_norm": 2.695660352706909, "learning_rate": 1.6050858548797713e-05, "loss": 0.4454, "step": 5050 }, { "epoch": 0.6146638271980529, "grad_norm": 1.296058177947998, "learning_rate": 1.6049305368720956e-05, "loss": 0.4783, "step": 5051 }, { "epoch": 0.6147855187100699, "grad_norm": 1.0239715576171875, "learning_rate": 1.604775195845561e-05, "loss": 0.4706, "step": 5052 }, { "epoch": 0.614907210222087, "grad_norm": 0.730590283870697, "learning_rate": 1.604619831806078e-05, "loss": 0.485, "step": 5053 }, { "epoch": 0.6150289017341041, "grad_norm": 0.6984491348266602, "learning_rate": 1.6044644447595587e-05, "loss": 0.4213, "step": 5054 }, { "epoch": 0.6151505932461211, "grad_norm": 2.7646350860595703, "learning_rate": 1.6043090347119165e-05, "loss": 0.5108, "step": 5055 }, { "epoch": 0.6152722847581381, "grad_norm": 1.0232855081558228, "learning_rate": 1.6041536016690646e-05, "loss": 0.4026, "step": 5056 }, { "epoch": 0.6153939762701551, "grad_norm": 4.111287593841553, "learning_rate": 1.6039981456369175e-05, "loss": 0.5338, "step": 5057 }, { "epoch": 0.6155156677821721, "grad_norm": 1.2919718027114868, "learning_rate": 1.603842666621391e-05, "loss": 0.46, "step": 5058 }, { "epoch": 0.6156373592941893, "grad_norm": 1.4163293838500977, "learning_rate": 1.603687164628401e-05, "loss": 0.4058, "step": 5059 }, { "epoch": 0.6157590508062063, "grad_norm": 3.231069564819336, "learning_rate": 1.603531639663865e-05, "loss": 0.5565, "step": 5060 }, { "epoch": 0.6158807423182233, "grad_norm": 1.8095322847366333, "learning_rate": 1.6033760917337007e-05, "loss": 0.4078, "step": 5061 }, { "epoch": 0.6160024338302403, "grad_norm": 0.8809127807617188, "learning_rate": 1.6032205208438273e-05, "loss": 0.4693, "step": 5062 }, { "epoch": 0.6161241253422574, "grad_norm": 2.7180685997009277, "learning_rate": 1.603064927000164e-05, "loss": 0.5109, "step": 5063 }, { "epoch": 0.6162458168542744, "grad_norm": 0.8069382905960083, "learning_rate": 1.6029093102086325e-05, "loss": 0.4567, "step": 5064 }, { "epoch": 0.6163675083662915, "grad_norm": 1.958562970161438, "learning_rate": 1.6027536704751535e-05, "loss": 0.5393, "step": 5065 }, { "epoch": 0.6164891998783085, "grad_norm": 1.3394745588302612, "learning_rate": 1.60259800780565e-05, "loss": 0.4854, "step": 5066 }, { "epoch": 0.6166108913903255, "grad_norm": 2.8461692333221436, "learning_rate": 1.602442322206045e-05, "loss": 0.4376, "step": 5067 }, { "epoch": 0.6167325829023426, "grad_norm": 2.2890515327453613, "learning_rate": 1.6022866136822623e-05, "loss": 0.4666, "step": 5068 }, { "epoch": 0.6168542744143596, "grad_norm": 3.1733345985412598, "learning_rate": 1.6021308822402277e-05, "loss": 0.4322, "step": 5069 }, { "epoch": 0.6169759659263766, "grad_norm": 2.5025064945220947, "learning_rate": 1.601975127885866e-05, "loss": 0.446, "step": 5070 }, { "epoch": 0.6170976574383936, "grad_norm": 0.9727060794830322, "learning_rate": 1.6018193506251053e-05, "loss": 0.4911, "step": 5071 }, { "epoch": 0.6172193489504108, "grad_norm": 2.2073404788970947, "learning_rate": 1.6016635504638724e-05, "loss": 0.4827, "step": 5072 }, { "epoch": 0.6173410404624278, "grad_norm": 0.7293530106544495, "learning_rate": 1.601507727408096e-05, "loss": 0.4454, "step": 5073 }, { "epoch": 0.6174627319744448, "grad_norm": 1.3858633041381836, "learning_rate": 1.6013518814637055e-05, "loss": 0.432, "step": 5074 }, { "epoch": 0.6175844234864618, "grad_norm": 1.1621677875518799, "learning_rate": 1.6011960126366314e-05, "loss": 0.4445, "step": 5075 }, { "epoch": 0.6177061149984788, "grad_norm": 0.6184937357902527, "learning_rate": 1.6010401209328046e-05, "loss": 0.4228, "step": 5076 }, { "epoch": 0.6178278065104958, "grad_norm": 0.5854331254959106, "learning_rate": 1.6008842063581566e-05, "loss": 0.4689, "step": 5077 }, { "epoch": 0.617949498022513, "grad_norm": 1.4424673318862915, "learning_rate": 1.6007282689186215e-05, "loss": 0.4507, "step": 5078 }, { "epoch": 0.61807118953453, "grad_norm": 1.6235584020614624, "learning_rate": 1.6005723086201318e-05, "loss": 0.4512, "step": 5079 }, { "epoch": 0.618192881046547, "grad_norm": 1.3193002939224243, "learning_rate": 1.600416325468623e-05, "loss": 0.471, "step": 5080 }, { "epoch": 0.618314572558564, "grad_norm": 2.2656495571136475, "learning_rate": 1.60026031947003e-05, "loss": 0.492, "step": 5081 }, { "epoch": 0.618436264070581, "grad_norm": 1.2294079065322876, "learning_rate": 1.600104290630289e-05, "loss": 0.4719, "step": 5082 }, { "epoch": 0.6185579555825981, "grad_norm": 3.1018309593200684, "learning_rate": 1.5999482389553382e-05, "loss": 0.4441, "step": 5083 }, { "epoch": 0.6186796470946152, "grad_norm": 0.8289585113525391, "learning_rate": 1.5997921644511153e-05, "loss": 0.4753, "step": 5084 }, { "epoch": 0.6188013386066322, "grad_norm": 0.6501938104629517, "learning_rate": 1.5996360671235588e-05, "loss": 0.4853, "step": 5085 }, { "epoch": 0.6189230301186492, "grad_norm": 1.0880118608474731, "learning_rate": 1.599479946978608e-05, "loss": 0.4962, "step": 5086 }, { "epoch": 0.6190447216306663, "grad_norm": 0.6680058836936951, "learning_rate": 1.5993238040222053e-05, "loss": 0.4557, "step": 5087 }, { "epoch": 0.6191664131426833, "grad_norm": 2.530580997467041, "learning_rate": 1.599167638260291e-05, "loss": 0.4418, "step": 5088 }, { "epoch": 0.6192881046547003, "grad_norm": 3.3691353797912598, "learning_rate": 1.5990114496988077e-05, "loss": 0.4235, "step": 5089 }, { "epoch": 0.6194097961667173, "grad_norm": 1.6397669315338135, "learning_rate": 1.5988552383436992e-05, "loss": 0.5388, "step": 5090 }, { "epoch": 0.6195314876787344, "grad_norm": 1.6287271976470947, "learning_rate": 1.598699004200909e-05, "loss": 0.5284, "step": 5091 }, { "epoch": 0.6196531791907515, "grad_norm": 1.6321117877960205, "learning_rate": 1.5985427472763828e-05, "loss": 0.457, "step": 5092 }, { "epoch": 0.6197748707027685, "grad_norm": 1.9909961223602295, "learning_rate": 1.598386467576066e-05, "loss": 0.5002, "step": 5093 }, { "epoch": 0.6198965622147855, "grad_norm": 1.2118253707885742, "learning_rate": 1.598230165105905e-05, "loss": 0.4897, "step": 5094 }, { "epoch": 0.6200182537268025, "grad_norm": 1.9891451597213745, "learning_rate": 1.5980738398718485e-05, "loss": 0.4212, "step": 5095 }, { "epoch": 0.6201399452388195, "grad_norm": 2.7410707473754883, "learning_rate": 1.5979174918798447e-05, "loss": 0.4292, "step": 5096 }, { "epoch": 0.6202616367508367, "grad_norm": 1.1813677549362183, "learning_rate": 1.597761121135842e-05, "loss": 0.458, "step": 5097 }, { "epoch": 0.6203833282628537, "grad_norm": 0.6601487398147583, "learning_rate": 1.5976047276457916e-05, "loss": 0.474, "step": 5098 }, { "epoch": 0.6205050197748707, "grad_norm": 0.5981538891792297, "learning_rate": 1.5974483114156447e-05, "loss": 0.4616, "step": 5099 }, { "epoch": 0.6206267112868877, "grad_norm": 1.1330775022506714, "learning_rate": 1.5972918724513524e-05, "loss": 0.4499, "step": 5100 }, { "epoch": 0.6207484027989048, "grad_norm": 2.344569444656372, "learning_rate": 1.5971354107588687e-05, "loss": 0.4656, "step": 5101 }, { "epoch": 0.6208700943109218, "grad_norm": 1.7827597856521606, "learning_rate": 1.596978926344146e-05, "loss": 0.5002, "step": 5102 }, { "epoch": 0.6209917858229389, "grad_norm": 0.9423059821128845, "learning_rate": 1.59682241921314e-05, "loss": 0.455, "step": 5103 }, { "epoch": 0.6211134773349559, "grad_norm": 0.8463561534881592, "learning_rate": 1.596665889371805e-05, "loss": 0.4902, "step": 5104 }, { "epoch": 0.6212351688469729, "grad_norm": 0.6051124930381775, "learning_rate": 1.596509336826098e-05, "loss": 0.4697, "step": 5105 }, { "epoch": 0.62135686035899, "grad_norm": 0.9681133031845093, "learning_rate": 1.5963527615819764e-05, "loss": 0.4791, "step": 5106 }, { "epoch": 0.621478551871007, "grad_norm": 1.2018495798110962, "learning_rate": 1.5961961636453974e-05, "loss": 0.4505, "step": 5107 }, { "epoch": 0.621600243383024, "grad_norm": 3.915792465209961, "learning_rate": 1.5960395430223206e-05, "loss": 0.4162, "step": 5108 }, { "epoch": 0.621721934895041, "grad_norm": 1.6480474472045898, "learning_rate": 1.5958828997187054e-05, "loss": 0.4328, "step": 5109 }, { "epoch": 0.6218436264070581, "grad_norm": 1.29371976852417, "learning_rate": 1.5957262337405125e-05, "loss": 0.4462, "step": 5110 }, { "epoch": 0.6219653179190752, "grad_norm": 2.5986709594726562, "learning_rate": 1.595569545093703e-05, "loss": 0.4659, "step": 5111 }, { "epoch": 0.6220870094310922, "grad_norm": 4.487428665161133, "learning_rate": 1.5954128337842398e-05, "loss": 0.5351, "step": 5112 }, { "epoch": 0.6222087009431092, "grad_norm": 1.3018454313278198, "learning_rate": 1.5952560998180858e-05, "loss": 0.4611, "step": 5113 }, { "epoch": 0.6223303924551262, "grad_norm": 1.7453690767288208, "learning_rate": 1.5950993432012053e-05, "loss": 0.4462, "step": 5114 }, { "epoch": 0.6224520839671432, "grad_norm": 0.8891845345497131, "learning_rate": 1.594942563939563e-05, "loss": 0.4487, "step": 5115 }, { "epoch": 0.6225737754791604, "grad_norm": 3.4997634887695312, "learning_rate": 1.5947857620391243e-05, "loss": 0.5302, "step": 5116 }, { "epoch": 0.6226954669911774, "grad_norm": 0.6400924921035767, "learning_rate": 1.5946289375058562e-05, "loss": 0.4907, "step": 5117 }, { "epoch": 0.6228171585031944, "grad_norm": 1.9319634437561035, "learning_rate": 1.5944720903457266e-05, "loss": 0.4963, "step": 5118 }, { "epoch": 0.6229388500152114, "grad_norm": 3.117453098297119, "learning_rate": 1.5943152205647035e-05, "loss": 0.4496, "step": 5119 }, { "epoch": 0.6230605415272285, "grad_norm": 1.8318192958831787, "learning_rate": 1.594158328168756e-05, "loss": 0.4525, "step": 5120 }, { "epoch": 0.6231822330392455, "grad_norm": 0.903439462184906, "learning_rate": 1.594001413163854e-05, "loss": 0.4767, "step": 5121 }, { "epoch": 0.6233039245512626, "grad_norm": 1.5297845602035522, "learning_rate": 1.5938444755559688e-05, "loss": 0.479, "step": 5122 }, { "epoch": 0.6234256160632796, "grad_norm": 0.9639740586280823, "learning_rate": 1.5936875153510723e-05, "loss": 0.4581, "step": 5123 }, { "epoch": 0.6235473075752966, "grad_norm": 2.8778951168060303, "learning_rate": 1.5935305325551367e-05, "loss": 0.4404, "step": 5124 }, { "epoch": 0.6236689990873137, "grad_norm": 0.8338596224784851, "learning_rate": 1.593373527174136e-05, "loss": 0.5022, "step": 5125 }, { "epoch": 0.6237906905993307, "grad_norm": 0.7757508754730225, "learning_rate": 1.5932164992140443e-05, "loss": 0.5042, "step": 5126 }, { "epoch": 0.6239123821113477, "grad_norm": 0.849368155002594, "learning_rate": 1.593059448680837e-05, "loss": 0.4919, "step": 5127 }, { "epoch": 0.6240340736233647, "grad_norm": 1.5418044328689575, "learning_rate": 1.59290237558049e-05, "loss": 0.4748, "step": 5128 }, { "epoch": 0.6241557651353818, "grad_norm": 0.7975398898124695, "learning_rate": 1.5927452799189804e-05, "loss": 0.5243, "step": 5129 }, { "epoch": 0.6242774566473989, "grad_norm": 3.799165964126587, "learning_rate": 1.5925881617022862e-05, "loss": 0.4201, "step": 5130 }, { "epoch": 0.6243991481594159, "grad_norm": 2.2506279945373535, "learning_rate": 1.5924310209363854e-05, "loss": 0.432, "step": 5131 }, { "epoch": 0.6245208396714329, "grad_norm": 0.8625126481056213, "learning_rate": 1.5922738576272584e-05, "loss": 0.424, "step": 5132 }, { "epoch": 0.6246425311834499, "grad_norm": 0.9615429639816284, "learning_rate": 1.592116671780885e-05, "loss": 0.4782, "step": 5133 }, { "epoch": 0.6247642226954669, "grad_norm": 0.7857763171195984, "learning_rate": 1.5919594634032468e-05, "loss": 0.4736, "step": 5134 }, { "epoch": 0.6248859142074841, "grad_norm": 3.0265188217163086, "learning_rate": 1.5918022325003258e-05, "loss": 0.5136, "step": 5135 }, { "epoch": 0.6250076057195011, "grad_norm": 0.7972736358642578, "learning_rate": 1.5916449790781045e-05, "loss": 0.4418, "step": 5136 }, { "epoch": 0.6251292972315181, "grad_norm": 2.0893096923828125, "learning_rate": 1.5914877031425674e-05, "loss": 0.4794, "step": 5137 }, { "epoch": 0.6252509887435351, "grad_norm": 1.120312213897705, "learning_rate": 1.5913304046996986e-05, "loss": 0.4673, "step": 5138 }, { "epoch": 0.6253726802555521, "grad_norm": 1.1246203184127808, "learning_rate": 1.5911730837554843e-05, "loss": 0.4067, "step": 5139 }, { "epoch": 0.6254943717675692, "grad_norm": 2.8888893127441406, "learning_rate": 1.5910157403159102e-05, "loss": 0.4083, "step": 5140 }, { "epoch": 0.6256160632795863, "grad_norm": 2.039015769958496, "learning_rate": 1.590858374386964e-05, "loss": 0.4315, "step": 5141 }, { "epoch": 0.6257377547916033, "grad_norm": 1.864123821258545, "learning_rate": 1.5907009859746336e-05, "loss": 0.4771, "step": 5142 }, { "epoch": 0.6258594463036203, "grad_norm": 2.2958595752716064, "learning_rate": 1.590543575084908e-05, "loss": 0.4834, "step": 5143 }, { "epoch": 0.6259811378156374, "grad_norm": 0.6923889517784119, "learning_rate": 1.590386141723777e-05, "loss": 0.4716, "step": 5144 }, { "epoch": 0.6261028293276544, "grad_norm": 0.6824405789375305, "learning_rate": 1.590228685897231e-05, "loss": 0.4586, "step": 5145 }, { "epoch": 0.6262245208396714, "grad_norm": 0.7675740122795105, "learning_rate": 1.590071207611262e-05, "loss": 0.4556, "step": 5146 }, { "epoch": 0.6263462123516885, "grad_norm": 0.6634732484817505, "learning_rate": 1.5899137068718624e-05, "loss": 0.4344, "step": 5147 }, { "epoch": 0.6264679038637055, "grad_norm": 0.9993885159492493, "learning_rate": 1.5897561836850254e-05, "loss": 0.4542, "step": 5148 }, { "epoch": 0.6265895953757226, "grad_norm": 1.7292815446853638, "learning_rate": 1.5895986380567444e-05, "loss": 0.431, "step": 5149 }, { "epoch": 0.6267112868877396, "grad_norm": 2.644493579864502, "learning_rate": 1.589441069993015e-05, "loss": 0.5238, "step": 5150 }, { "epoch": 0.6268329783997566, "grad_norm": 2.9488461017608643, "learning_rate": 1.5892834794998325e-05, "loss": 0.3738, "step": 5151 }, { "epoch": 0.6269546699117736, "grad_norm": 1.1818256378173828, "learning_rate": 1.5891258665831942e-05, "loss": 0.4326, "step": 5152 }, { "epoch": 0.6270763614237906, "grad_norm": 0.8681172132492065, "learning_rate": 1.588968231249097e-05, "loss": 0.427, "step": 5153 }, { "epoch": 0.6271980529358078, "grad_norm": 1.333761215209961, "learning_rate": 1.58881057350354e-05, "loss": 0.4909, "step": 5154 }, { "epoch": 0.6273197444478248, "grad_norm": 1.2859219312667847, "learning_rate": 1.5886528933525214e-05, "loss": 0.394, "step": 5155 }, { "epoch": 0.6274414359598418, "grad_norm": 0.5924080610275269, "learning_rate": 1.5884951908020418e-05, "loss": 0.4341, "step": 5156 }, { "epoch": 0.6275631274718588, "grad_norm": 4.823788166046143, "learning_rate": 1.5883374658581022e-05, "loss": 0.5689, "step": 5157 }, { "epoch": 0.6276848189838758, "grad_norm": 1.682992935180664, "learning_rate": 1.5881797185267044e-05, "loss": 0.4708, "step": 5158 }, { "epoch": 0.6278065104958929, "grad_norm": 0.7032126784324646, "learning_rate": 1.5880219488138507e-05, "loss": 0.4509, "step": 5159 }, { "epoch": 0.62792820200791, "grad_norm": 1.5909678936004639, "learning_rate": 1.5878641567255447e-05, "loss": 0.4401, "step": 5160 }, { "epoch": 0.628049893519927, "grad_norm": 1.1590338945388794, "learning_rate": 1.5877063422677904e-05, "loss": 0.4821, "step": 5161 }, { "epoch": 0.628171585031944, "grad_norm": 0.644655704498291, "learning_rate": 1.587548505446594e-05, "loss": 0.4964, "step": 5162 }, { "epoch": 0.628293276543961, "grad_norm": 0.7603266835212708, "learning_rate": 1.58739064626796e-05, "loss": 0.4482, "step": 5163 }, { "epoch": 0.6284149680559781, "grad_norm": 1.9220455884933472, "learning_rate": 1.5872327647378968e-05, "loss": 0.4739, "step": 5164 }, { "epoch": 0.6285366595679951, "grad_norm": 2.9807608127593994, "learning_rate": 1.587074860862411e-05, "loss": 0.4803, "step": 5165 }, { "epoch": 0.6286583510800122, "grad_norm": 1.0503041744232178, "learning_rate": 1.5869169346475116e-05, "loss": 0.4631, "step": 5166 }, { "epoch": 0.6287800425920292, "grad_norm": 1.2424944639205933, "learning_rate": 1.586758986099208e-05, "loss": 0.4956, "step": 5167 }, { "epoch": 0.6289017341040463, "grad_norm": 0.8208186626434326, "learning_rate": 1.5866010152235105e-05, "loss": 0.452, "step": 5168 }, { "epoch": 0.6290234256160633, "grad_norm": 0.6422441601753235, "learning_rate": 1.5864430220264303e-05, "loss": 0.4307, "step": 5169 }, { "epoch": 0.6291451171280803, "grad_norm": 1.340247631072998, "learning_rate": 1.5862850065139788e-05, "loss": 0.4408, "step": 5170 }, { "epoch": 0.6292668086400973, "grad_norm": 3.4430768489837646, "learning_rate": 1.58612696869217e-05, "loss": 0.5098, "step": 5171 }, { "epoch": 0.6293885001521143, "grad_norm": 1.6935397386550903, "learning_rate": 1.5859689085670162e-05, "loss": 0.49, "step": 5172 }, { "epoch": 0.6295101916641315, "grad_norm": 3.508847951889038, "learning_rate": 1.585810826144533e-05, "loss": 0.5803, "step": 5173 }, { "epoch": 0.6296318831761485, "grad_norm": 2.1378252506256104, "learning_rate": 1.585652721430735e-05, "loss": 0.4301, "step": 5174 }, { "epoch": 0.6297535746881655, "grad_norm": 0.6643288731575012, "learning_rate": 1.5854945944316385e-05, "loss": 0.4668, "step": 5175 }, { "epoch": 0.6298752662001825, "grad_norm": 0.8753386735916138, "learning_rate": 1.5853364451532608e-05, "loss": 0.5287, "step": 5176 }, { "epoch": 0.6299969577121995, "grad_norm": 1.686498999595642, "learning_rate": 1.5851782736016205e-05, "loss": 0.4932, "step": 5177 }, { "epoch": 0.6301186492242166, "grad_norm": 1.5822488069534302, "learning_rate": 1.585020079782735e-05, "loss": 0.5047, "step": 5178 }, { "epoch": 0.6302403407362337, "grad_norm": 1.9763538837432861, "learning_rate": 1.5848618637026248e-05, "loss": 0.5038, "step": 5179 }, { "epoch": 0.6303620322482507, "grad_norm": 3.5097556114196777, "learning_rate": 1.5847036253673097e-05, "loss": 0.4345, "step": 5180 }, { "epoch": 0.6304837237602677, "grad_norm": 1.1224864721298218, "learning_rate": 1.584545364782812e-05, "loss": 0.4911, "step": 5181 }, { "epoch": 0.6306054152722848, "grad_norm": 0.9945358633995056, "learning_rate": 1.5843870819551526e-05, "loss": 0.4711, "step": 5182 }, { "epoch": 0.6307271067843018, "grad_norm": 2.5181162357330322, "learning_rate": 1.5842287768903553e-05, "loss": 0.429, "step": 5183 }, { "epoch": 0.6308487982963188, "grad_norm": 0.7400104403495789, "learning_rate": 1.584070449594444e-05, "loss": 0.4913, "step": 5184 }, { "epoch": 0.6309704898083359, "grad_norm": 2.210681438446045, "learning_rate": 1.583912100073443e-05, "loss": 0.4597, "step": 5185 }, { "epoch": 0.6310921813203529, "grad_norm": 4.362020492553711, "learning_rate": 1.5837537283333778e-05, "loss": 0.5229, "step": 5186 }, { "epoch": 0.63121387283237, "grad_norm": 1.9521353244781494, "learning_rate": 1.5835953343802752e-05, "loss": 0.4793, "step": 5187 }, { "epoch": 0.631335564344387, "grad_norm": 1.97042977809906, "learning_rate": 1.5834369182201622e-05, "loss": 0.5017, "step": 5188 }, { "epoch": 0.631457255856404, "grad_norm": 1.4353485107421875, "learning_rate": 1.5832784798590667e-05, "loss": 0.4165, "step": 5189 }, { "epoch": 0.631578947368421, "grad_norm": 0.9788597822189331, "learning_rate": 1.5831200193030178e-05, "loss": 0.5053, "step": 5190 }, { "epoch": 0.631700638880438, "grad_norm": 2.2736315727233887, "learning_rate": 1.582961536558045e-05, "loss": 0.526, "step": 5191 }, { "epoch": 0.6318223303924552, "grad_norm": 2.653142213821411, "learning_rate": 1.5828030316301788e-05, "loss": 0.4812, "step": 5192 }, { "epoch": 0.6319440219044722, "grad_norm": 3.02508282661438, "learning_rate": 1.5826445045254512e-05, "loss": 0.4367, "step": 5193 }, { "epoch": 0.6320657134164892, "grad_norm": 2.291142463684082, "learning_rate": 1.5824859552498944e-05, "loss": 0.4363, "step": 5194 }, { "epoch": 0.6321874049285062, "grad_norm": 2.344771385192871, "learning_rate": 1.582327383809541e-05, "loss": 0.4709, "step": 5195 }, { "epoch": 0.6323090964405232, "grad_norm": 1.9913361072540283, "learning_rate": 1.5821687902104254e-05, "loss": 0.426, "step": 5196 }, { "epoch": 0.6324307879525403, "grad_norm": 1.2045221328735352, "learning_rate": 1.5820101744585824e-05, "loss": 0.4792, "step": 5197 }, { "epoch": 0.6325524794645574, "grad_norm": 0.5647122263908386, "learning_rate": 1.5818515365600476e-05, "loss": 0.4754, "step": 5198 }, { "epoch": 0.6326741709765744, "grad_norm": 0.9412499666213989, "learning_rate": 1.5816928765208573e-05, "loss": 0.4264, "step": 5199 }, { "epoch": 0.6327958624885914, "grad_norm": 0.7304482460021973, "learning_rate": 1.5815341943470494e-05, "loss": 0.4226, "step": 5200 }, { "epoch": 0.6329175540006085, "grad_norm": 4.112847328186035, "learning_rate": 1.5813754900446613e-05, "loss": 0.5466, "step": 5201 }, { "epoch": 0.6330392455126255, "grad_norm": 3.4612157344818115, "learning_rate": 1.5812167636197323e-05, "loss": 0.4729, "step": 5202 }, { "epoch": 0.6331609370246425, "grad_norm": 2.4185543060302734, "learning_rate": 1.5810580150783024e-05, "loss": 0.4932, "step": 5203 }, { "epoch": 0.6332826285366596, "grad_norm": 2.4247961044311523, "learning_rate": 1.5808992444264128e-05, "loss": 0.4842, "step": 5204 }, { "epoch": 0.6334043200486766, "grad_norm": 1.3789006471633911, "learning_rate": 1.5807404516701043e-05, "loss": 0.5269, "step": 5205 }, { "epoch": 0.6335260115606937, "grad_norm": 4.834405899047852, "learning_rate": 1.580581636815419e-05, "loss": 0.4216, "step": 5206 }, { "epoch": 0.6336477030727107, "grad_norm": 2.637833595275879, "learning_rate": 1.5804227998684013e-05, "loss": 0.4934, "step": 5207 }, { "epoch": 0.6337693945847277, "grad_norm": 5.018129348754883, "learning_rate": 1.580263940835095e-05, "loss": 0.4937, "step": 5208 }, { "epoch": 0.6338910860967447, "grad_norm": 2.8750083446502686, "learning_rate": 1.580105059721544e-05, "loss": 0.4857, "step": 5209 }, { "epoch": 0.6340127776087617, "grad_norm": 3.6261367797851562, "learning_rate": 1.579946156533795e-05, "loss": 0.4281, "step": 5210 }, { "epoch": 0.6341344691207789, "grad_norm": 3.6124765872955322, "learning_rate": 1.5797872312778944e-05, "loss": 0.4266, "step": 5211 }, { "epoch": 0.6342561606327959, "grad_norm": 2.5368010997772217, "learning_rate": 1.5796282839598892e-05, "loss": 0.4392, "step": 5212 }, { "epoch": 0.6343778521448129, "grad_norm": 1.7947090864181519, "learning_rate": 1.579469314585828e-05, "loss": 0.5077, "step": 5213 }, { "epoch": 0.6344995436568299, "grad_norm": 0.8332177996635437, "learning_rate": 1.5793103231617603e-05, "loss": 0.4674, "step": 5214 }, { "epoch": 0.6346212351688469, "grad_norm": 0.6878183484077454, "learning_rate": 1.5791513096937356e-05, "loss": 0.4312, "step": 5215 }, { "epoch": 0.634742926680864, "grad_norm": 1.2983524799346924, "learning_rate": 1.578992274187805e-05, "loss": 0.4759, "step": 5216 }, { "epoch": 0.6348646181928811, "grad_norm": 1.0577523708343506, "learning_rate": 1.5788332166500196e-05, "loss": 0.4247, "step": 5217 }, { "epoch": 0.6349863097048981, "grad_norm": 1.254083275794983, "learning_rate": 1.578674137086432e-05, "loss": 0.4704, "step": 5218 }, { "epoch": 0.6351080012169151, "grad_norm": 1.862012267112732, "learning_rate": 1.578515035503096e-05, "loss": 0.5051, "step": 5219 }, { "epoch": 0.6352296927289321, "grad_norm": 1.180809497833252, "learning_rate": 1.5783559119060656e-05, "loss": 0.5547, "step": 5220 }, { "epoch": 0.6353513842409492, "grad_norm": 1.7578306198120117, "learning_rate": 1.5781967663013954e-05, "loss": 0.4508, "step": 5221 }, { "epoch": 0.6354730757529662, "grad_norm": 1.8263388872146606, "learning_rate": 1.5780375986951417e-05, "loss": 0.4601, "step": 5222 }, { "epoch": 0.6355947672649833, "grad_norm": 4.632490158081055, "learning_rate": 1.5778784090933607e-05, "loss": 0.4253, "step": 5223 }, { "epoch": 0.6357164587770003, "grad_norm": 0.77925044298172, "learning_rate": 1.57771919750211e-05, "loss": 0.5082, "step": 5224 }, { "epoch": 0.6358381502890174, "grad_norm": 3.3072025775909424, "learning_rate": 1.577559963927448e-05, "loss": 0.4455, "step": 5225 }, { "epoch": 0.6359598418010344, "grad_norm": 0.5746170282363892, "learning_rate": 1.5774007083754338e-05, "loss": 0.5056, "step": 5226 }, { "epoch": 0.6360815333130514, "grad_norm": 1.3007668256759644, "learning_rate": 1.5772414308521278e-05, "loss": 0.503, "step": 5227 }, { "epoch": 0.6362032248250684, "grad_norm": 0.714690625667572, "learning_rate": 1.5770821313635906e-05, "loss": 0.5201, "step": 5228 }, { "epoch": 0.6363249163370854, "grad_norm": 1.153825283050537, "learning_rate": 1.5769228099158835e-05, "loss": 0.4634, "step": 5229 }, { "epoch": 0.6364466078491026, "grad_norm": 0.6355719566345215, "learning_rate": 1.5767634665150692e-05, "loss": 0.4479, "step": 5230 }, { "epoch": 0.6365682993611196, "grad_norm": 1.5121638774871826, "learning_rate": 1.5766041011672114e-05, "loss": 0.5044, "step": 5231 }, { "epoch": 0.6366899908731366, "grad_norm": 1.667244553565979, "learning_rate": 1.576444713878374e-05, "loss": 0.3769, "step": 5232 }, { "epoch": 0.6368116823851536, "grad_norm": 2.893745183944702, "learning_rate": 1.576285304654622e-05, "loss": 0.4857, "step": 5233 }, { "epoch": 0.6369333738971706, "grad_norm": 1.582780122756958, "learning_rate": 1.576125873502022e-05, "loss": 0.458, "step": 5234 }, { "epoch": 0.6370550654091877, "grad_norm": 2.3318421840667725, "learning_rate": 1.5759664204266392e-05, "loss": 0.5, "step": 5235 }, { "epoch": 0.6371767569212048, "grad_norm": 2.5021045207977295, "learning_rate": 1.575806945434542e-05, "loss": 0.5116, "step": 5236 }, { "epoch": 0.6372984484332218, "grad_norm": 0.8516119122505188, "learning_rate": 1.5756474485317986e-05, "loss": 0.4837, "step": 5237 }, { "epoch": 0.6374201399452388, "grad_norm": 3.058852434158325, "learning_rate": 1.5754879297244786e-05, "loss": 0.4431, "step": 5238 }, { "epoch": 0.6375418314572558, "grad_norm": 0.5639399290084839, "learning_rate": 1.5753283890186515e-05, "loss": 0.4668, "step": 5239 }, { "epoch": 0.6376635229692729, "grad_norm": 0.5681480169296265, "learning_rate": 1.5751688264203885e-05, "loss": 0.4879, "step": 5240 }, { "epoch": 0.6377852144812899, "grad_norm": 1.5465158224105835, "learning_rate": 1.575009241935761e-05, "loss": 0.4864, "step": 5241 }, { "epoch": 0.637906905993307, "grad_norm": 2.9557650089263916, "learning_rate": 1.5748496355708416e-05, "loss": 0.4475, "step": 5242 }, { "epoch": 0.638028597505324, "grad_norm": 2.6665782928466797, "learning_rate": 1.5746900073317037e-05, "loss": 0.4435, "step": 5243 }, { "epoch": 0.638150289017341, "grad_norm": 1.3420488834381104, "learning_rate": 1.5745303572244215e-05, "loss": 0.5205, "step": 5244 }, { "epoch": 0.6382719805293581, "grad_norm": 3.566497325897217, "learning_rate": 1.57437068525507e-05, "loss": 0.5772, "step": 5245 }, { "epoch": 0.6383936720413751, "grad_norm": 0.9798552393913269, "learning_rate": 1.574210991429725e-05, "loss": 0.4787, "step": 5246 }, { "epoch": 0.6385153635533921, "grad_norm": 2.5602974891662598, "learning_rate": 1.5740512757544634e-05, "loss": 0.4915, "step": 5247 }, { "epoch": 0.6386370550654092, "grad_norm": 3.6115434169769287, "learning_rate": 1.5738915382353624e-05, "loss": 0.4268, "step": 5248 }, { "epoch": 0.6387587465774263, "grad_norm": 1.4270596504211426, "learning_rate": 1.5737317788785e-05, "loss": 0.4992, "step": 5249 }, { "epoch": 0.6388804380894433, "grad_norm": 0.8352439999580383, "learning_rate": 1.5735719976899566e-05, "loss": 0.4929, "step": 5250 }, { "epoch": 0.6390021296014603, "grad_norm": 4.70586633682251, "learning_rate": 1.5734121946758114e-05, "loss": 0.4813, "step": 5251 }, { "epoch": 0.6391238211134773, "grad_norm": 1.007424235343933, "learning_rate": 1.5732523698421452e-05, "loss": 0.5212, "step": 5252 }, { "epoch": 0.6392455126254943, "grad_norm": 3.383769989013672, "learning_rate": 1.5730925231950395e-05, "loss": 0.4723, "step": 5253 }, { "epoch": 0.6393672041375114, "grad_norm": 2.987266778945923, "learning_rate": 1.5729326547405772e-05, "loss": 0.4829, "step": 5254 }, { "epoch": 0.6394888956495285, "grad_norm": 0.930807888507843, "learning_rate": 1.5727727644848415e-05, "loss": 0.5075, "step": 5255 }, { "epoch": 0.6396105871615455, "grad_norm": 1.6078609228134155, "learning_rate": 1.5726128524339163e-05, "loss": 0.5098, "step": 5256 }, { "epoch": 0.6397322786735625, "grad_norm": 0.6467624306678772, "learning_rate": 1.572452918593887e-05, "loss": 0.4819, "step": 5257 }, { "epoch": 0.6398539701855795, "grad_norm": 1.394906759262085, "learning_rate": 1.5722929629708397e-05, "loss": 0.496, "step": 5258 }, { "epoch": 0.6399756616975966, "grad_norm": 1.2083563804626465, "learning_rate": 1.57213298557086e-05, "loss": 0.4524, "step": 5259 }, { "epoch": 0.6400973532096136, "grad_norm": 1.187841534614563, "learning_rate": 1.5719729864000363e-05, "loss": 0.4273, "step": 5260 }, { "epoch": 0.6402190447216307, "grad_norm": 1.1763041019439697, "learning_rate": 1.5718129654644562e-05, "loss": 0.4677, "step": 5261 }, { "epoch": 0.6403407362336477, "grad_norm": 1.6368167400360107, "learning_rate": 1.571652922770209e-05, "loss": 0.4532, "step": 5262 }, { "epoch": 0.6404624277456648, "grad_norm": 1.949811577796936, "learning_rate": 1.5714928583233854e-05, "loss": 0.4179, "step": 5263 }, { "epoch": 0.6405841192576818, "grad_norm": 1.9312878847122192, "learning_rate": 1.5713327721300753e-05, "loss": 0.4226, "step": 5264 }, { "epoch": 0.6407058107696988, "grad_norm": 1.3562158346176147, "learning_rate": 1.5711726641963708e-05, "loss": 0.4908, "step": 5265 }, { "epoch": 0.6408275022817158, "grad_norm": 1.574460506439209, "learning_rate": 1.571012534528364e-05, "loss": 0.472, "step": 5266 }, { "epoch": 0.6409491937937329, "grad_norm": 2.8433024883270264, "learning_rate": 1.5708523831321482e-05, "loss": 0.5206, "step": 5267 }, { "epoch": 0.64107088530575, "grad_norm": 1.0959405899047852, "learning_rate": 1.570692210013818e-05, "loss": 0.4416, "step": 5268 }, { "epoch": 0.641192576817767, "grad_norm": 1.6597760915756226, "learning_rate": 1.570532015179468e-05, "loss": 0.4574, "step": 5269 }, { "epoch": 0.641314268329784, "grad_norm": 2.3682327270507812, "learning_rate": 1.5703717986351934e-05, "loss": 0.5196, "step": 5270 }, { "epoch": 0.641435959841801, "grad_norm": 1.7758485078811646, "learning_rate": 1.5702115603870914e-05, "loss": 0.502, "step": 5271 }, { "epoch": 0.641557651353818, "grad_norm": 1.2930625677108765, "learning_rate": 1.5700513004412593e-05, "loss": 0.4532, "step": 5272 }, { "epoch": 0.6416793428658351, "grad_norm": 1.7209218740463257, "learning_rate": 1.5698910188037954e-05, "loss": 0.4361, "step": 5273 }, { "epoch": 0.6418010343778522, "grad_norm": 0.6990587711334229, "learning_rate": 1.569730715480799e-05, "loss": 0.4443, "step": 5274 }, { "epoch": 0.6419227258898692, "grad_norm": 1.332281231880188, "learning_rate": 1.569570390478369e-05, "loss": 0.5104, "step": 5275 }, { "epoch": 0.6420444174018862, "grad_norm": 0.722692608833313, "learning_rate": 1.5694100438026066e-05, "loss": 0.439, "step": 5276 }, { "epoch": 0.6421661089139032, "grad_norm": 0.8775246739387512, "learning_rate": 1.569249675459614e-05, "loss": 0.5065, "step": 5277 }, { "epoch": 0.6422878004259203, "grad_norm": 0.7255299687385559, "learning_rate": 1.5690892854554926e-05, "loss": 0.4966, "step": 5278 }, { "epoch": 0.6424094919379373, "grad_norm": 0.5717247724533081, "learning_rate": 1.568928873796346e-05, "loss": 0.4658, "step": 5279 }, { "epoch": 0.6425311834499544, "grad_norm": 2.2494089603424072, "learning_rate": 1.568768440488278e-05, "loss": 0.4658, "step": 5280 }, { "epoch": 0.6426528749619714, "grad_norm": 1.175611138343811, "learning_rate": 1.5686079855373936e-05, "loss": 0.4917, "step": 5281 }, { "epoch": 0.6427745664739885, "grad_norm": 1.840100884437561, "learning_rate": 1.5684475089497983e-05, "loss": 0.4713, "step": 5282 }, { "epoch": 0.6428962579860055, "grad_norm": 0.8531798720359802, "learning_rate": 1.568287010731599e-05, "loss": 0.4593, "step": 5283 }, { "epoch": 0.6430179494980225, "grad_norm": 1.5386767387390137, "learning_rate": 1.5681264908889026e-05, "loss": 0.4133, "step": 5284 }, { "epoch": 0.6431396410100395, "grad_norm": 1.202364444732666, "learning_rate": 1.567965949427817e-05, "loss": 0.5303, "step": 5285 }, { "epoch": 0.6432613325220566, "grad_norm": 2.0715887546539307, "learning_rate": 1.5678053863544516e-05, "loss": 0.5159, "step": 5286 }, { "epoch": 0.6433830240340737, "grad_norm": 0.6678662300109863, "learning_rate": 1.567644801674916e-05, "loss": 0.4942, "step": 5287 }, { "epoch": 0.6435047155460907, "grad_norm": 3.980654239654541, "learning_rate": 1.5674841953953205e-05, "loss": 0.5529, "step": 5288 }, { "epoch": 0.6436264070581077, "grad_norm": 1.2205555438995361, "learning_rate": 1.567323567521777e-05, "loss": 0.4752, "step": 5289 }, { "epoch": 0.6437480985701247, "grad_norm": 2.3752331733703613, "learning_rate": 1.5671629180603972e-05, "loss": 0.4849, "step": 5290 }, { "epoch": 0.6438697900821417, "grad_norm": 3.106736660003662, "learning_rate": 1.5670022470172947e-05, "loss": 0.485, "step": 5291 }, { "epoch": 0.6439914815941588, "grad_norm": 1.166581153869629, "learning_rate": 1.5668415543985828e-05, "loss": 0.5282, "step": 5292 }, { "epoch": 0.6441131731061759, "grad_norm": 2.677962303161621, "learning_rate": 1.5666808402103764e-05, "loss": 0.4469, "step": 5293 }, { "epoch": 0.6442348646181929, "grad_norm": 4.46078634262085, "learning_rate": 1.5665201044587912e-05, "loss": 0.3993, "step": 5294 }, { "epoch": 0.6443565561302099, "grad_norm": 1.144884705543518, "learning_rate": 1.5663593471499434e-05, "loss": 0.4871, "step": 5295 }, { "epoch": 0.6444782476422269, "grad_norm": 0.7435542941093445, "learning_rate": 1.5661985682899503e-05, "loss": 0.4552, "step": 5296 }, { "epoch": 0.644599939154244, "grad_norm": 0.851441502571106, "learning_rate": 1.5660377678849298e-05, "loss": 0.4103, "step": 5297 }, { "epoch": 0.644721630666261, "grad_norm": 2.650285482406616, "learning_rate": 1.565876945941e-05, "loss": 0.4894, "step": 5298 }, { "epoch": 0.6448433221782781, "grad_norm": 1.9493920803070068, "learning_rate": 1.5657161024642817e-05, "loss": 0.4432, "step": 5299 }, { "epoch": 0.6449650136902951, "grad_norm": 2.567227840423584, "learning_rate": 1.5655552374608945e-05, "loss": 0.4976, "step": 5300 }, { "epoch": 0.6450867052023121, "grad_norm": 0.7542309761047363, "learning_rate": 1.56539435093696e-05, "loss": 0.4631, "step": 5301 }, { "epoch": 0.6452083967143292, "grad_norm": 0.6160406470298767, "learning_rate": 1.5652334428985998e-05, "loss": 0.443, "step": 5302 }, { "epoch": 0.6453300882263462, "grad_norm": 0.9996970891952515, "learning_rate": 1.5650725133519376e-05, "loss": 0.4547, "step": 5303 }, { "epoch": 0.6454517797383632, "grad_norm": 0.5419983863830566, "learning_rate": 1.564911562303096e-05, "loss": 0.4629, "step": 5304 }, { "epoch": 0.6455734712503803, "grad_norm": 0.7758529782295227, "learning_rate": 1.5647505897582006e-05, "loss": 0.4724, "step": 5305 }, { "epoch": 0.6456951627623974, "grad_norm": 3.16382098197937, "learning_rate": 1.5645895957233765e-05, "loss": 0.4316, "step": 5306 }, { "epoch": 0.6458168542744144, "grad_norm": 0.7268728613853455, "learning_rate": 1.5644285802047493e-05, "loss": 0.4823, "step": 5307 }, { "epoch": 0.6459385457864314, "grad_norm": 2.1866092681884766, "learning_rate": 1.5642675432084463e-05, "loss": 0.4537, "step": 5308 }, { "epoch": 0.6460602372984484, "grad_norm": 2.061885356903076, "learning_rate": 1.5641064847405957e-05, "loss": 0.4952, "step": 5309 }, { "epoch": 0.6461819288104654, "grad_norm": 1.0812735557556152, "learning_rate": 1.5639454048073256e-05, "loss": 0.4955, "step": 5310 }, { "epoch": 0.6463036203224825, "grad_norm": 2.22967529296875, "learning_rate": 1.563784303414765e-05, "loss": 0.4669, "step": 5311 }, { "epoch": 0.6464253118344996, "grad_norm": 0.7430720925331116, "learning_rate": 1.563623180569045e-05, "loss": 0.4569, "step": 5312 }, { "epoch": 0.6465470033465166, "grad_norm": 3.014439105987549, "learning_rate": 1.5634620362762963e-05, "loss": 0.4925, "step": 5313 }, { "epoch": 0.6466686948585336, "grad_norm": 2.8883817195892334, "learning_rate": 1.563300870542651e-05, "loss": 0.4759, "step": 5314 }, { "epoch": 0.6467903863705506, "grad_norm": 1.9737778902053833, "learning_rate": 1.5631396833742417e-05, "loss": 0.4736, "step": 5315 }, { "epoch": 0.6469120778825677, "grad_norm": 3.116694211959839, "learning_rate": 1.562978474777202e-05, "loss": 0.543, "step": 5316 }, { "epoch": 0.6470337693945847, "grad_norm": 0.757223904132843, "learning_rate": 1.5628172447576652e-05, "loss": 0.4693, "step": 5317 }, { "epoch": 0.6471554609066018, "grad_norm": 3.094352960586548, "learning_rate": 1.562655993321768e-05, "loss": 0.5112, "step": 5318 }, { "epoch": 0.6472771524186188, "grad_norm": 1.4840407371520996, "learning_rate": 1.5624947204756454e-05, "loss": 0.4901, "step": 5319 }, { "epoch": 0.6473988439306358, "grad_norm": 1.1860746145248413, "learning_rate": 1.5623334262254343e-05, "loss": 0.5034, "step": 5320 }, { "epoch": 0.6475205354426529, "grad_norm": 1.8971335887908936, "learning_rate": 1.562172110577272e-05, "loss": 0.4777, "step": 5321 }, { "epoch": 0.6476422269546699, "grad_norm": 2.2858195304870605, "learning_rate": 1.562010773537298e-05, "loss": 0.4795, "step": 5322 }, { "epoch": 0.6477639184666869, "grad_norm": 2.7201476097106934, "learning_rate": 1.5618494151116505e-05, "loss": 0.539, "step": 5323 }, { "epoch": 0.647885609978704, "grad_norm": 2.1227800846099854, "learning_rate": 1.5616880353064696e-05, "loss": 0.5149, "step": 5324 }, { "epoch": 0.648007301490721, "grad_norm": 2.547903299331665, "learning_rate": 1.5615266341278966e-05, "loss": 0.4574, "step": 5325 }, { "epoch": 0.6481289930027381, "grad_norm": 2.562516689300537, "learning_rate": 1.561365211582073e-05, "loss": 0.4475, "step": 5326 }, { "epoch": 0.6482506845147551, "grad_norm": 0.7874716520309448, "learning_rate": 1.5612037676751407e-05, "loss": 0.483, "step": 5327 }, { "epoch": 0.6483723760267721, "grad_norm": 1.3339600563049316, "learning_rate": 1.5610423024132435e-05, "loss": 0.4822, "step": 5328 }, { "epoch": 0.6484940675387891, "grad_norm": 0.5802419781684875, "learning_rate": 1.5608808158025256e-05, "loss": 0.4078, "step": 5329 }, { "epoch": 0.6486157590508062, "grad_norm": 1.7999463081359863, "learning_rate": 1.5607193078491314e-05, "loss": 0.4811, "step": 5330 }, { "epoch": 0.6487374505628233, "grad_norm": 1.8625408411026, "learning_rate": 1.5605577785592073e-05, "loss": 0.4349, "step": 5331 }, { "epoch": 0.6488591420748403, "grad_norm": 1.455593228340149, "learning_rate": 1.560396227938899e-05, "loss": 0.4569, "step": 5332 }, { "epoch": 0.6489808335868573, "grad_norm": 1.1102403402328491, "learning_rate": 1.5602346559943548e-05, "loss": 0.4034, "step": 5333 }, { "epoch": 0.6491025250988743, "grad_norm": 0.7270345091819763, "learning_rate": 1.560073062731722e-05, "loss": 0.4186, "step": 5334 }, { "epoch": 0.6492242166108914, "grad_norm": 1.4078460931777954, "learning_rate": 1.5599114481571498e-05, "loss": 0.4467, "step": 5335 }, { "epoch": 0.6493459081229084, "grad_norm": 1.071409821510315, "learning_rate": 1.5597498122767884e-05, "loss": 0.473, "step": 5336 }, { "epoch": 0.6494675996349255, "grad_norm": 1.9835041761398315, "learning_rate": 1.5595881550967873e-05, "loss": 0.4919, "step": 5337 }, { "epoch": 0.6495892911469425, "grad_norm": 2.0246047973632812, "learning_rate": 1.5594264766232993e-05, "loss": 0.5229, "step": 5338 }, { "epoch": 0.6497109826589595, "grad_norm": 0.6315078735351562, "learning_rate": 1.5592647768624756e-05, "loss": 0.4514, "step": 5339 }, { "epoch": 0.6498326741709766, "grad_norm": 2.390519142150879, "learning_rate": 1.5591030558204696e-05, "loss": 0.5189, "step": 5340 }, { "epoch": 0.6499543656829936, "grad_norm": 2.800323486328125, "learning_rate": 1.558941313503435e-05, "loss": 0.5062, "step": 5341 }, { "epoch": 0.6500760571950106, "grad_norm": 4.180983543395996, "learning_rate": 1.5587795499175265e-05, "loss": 0.4858, "step": 5342 }, { "epoch": 0.6501977487070277, "grad_norm": 3.3033080101013184, "learning_rate": 1.5586177650688996e-05, "loss": 0.4585, "step": 5343 }, { "epoch": 0.6503194402190448, "grad_norm": 2.4165728092193604, "learning_rate": 1.5584559589637108e-05, "loss": 0.471, "step": 5344 }, { "epoch": 0.6504411317310618, "grad_norm": 4.680166244506836, "learning_rate": 1.558294131608116e-05, "loss": 0.3864, "step": 5345 }, { "epoch": 0.6505628232430788, "grad_norm": 1.562940001487732, "learning_rate": 1.5581322830082747e-05, "loss": 0.481, "step": 5346 }, { "epoch": 0.6506845147550958, "grad_norm": 1.728887677192688, "learning_rate": 1.5579704131703442e-05, "loss": 0.4934, "step": 5347 }, { "epoch": 0.6508062062671128, "grad_norm": 2.84002423286438, "learning_rate": 1.557808522100485e-05, "loss": 0.4873, "step": 5348 }, { "epoch": 0.65092789777913, "grad_norm": 1.885184407234192, "learning_rate": 1.5576466098048566e-05, "loss": 0.3914, "step": 5349 }, { "epoch": 0.651049589291147, "grad_norm": 0.6747542023658752, "learning_rate": 1.5574846762896204e-05, "loss": 0.4383, "step": 5350 }, { "epoch": 0.651171280803164, "grad_norm": 2.4659926891326904, "learning_rate": 1.5573227215609383e-05, "loss": 0.4715, "step": 5351 }, { "epoch": 0.651292972315181, "grad_norm": 2.432208299636841, "learning_rate": 1.557160745624973e-05, "loss": 0.5088, "step": 5352 }, { "epoch": 0.651414663827198, "grad_norm": 2.133415937423706, "learning_rate": 1.5569987484878887e-05, "loss": 0.4315, "step": 5353 }, { "epoch": 0.6515363553392151, "grad_norm": 0.7251248359680176, "learning_rate": 1.5568367301558486e-05, "loss": 0.3825, "step": 5354 }, { "epoch": 0.6516580468512321, "grad_norm": 3.2056667804718018, "learning_rate": 1.5566746906350184e-05, "loss": 0.4797, "step": 5355 }, { "epoch": 0.6517797383632492, "grad_norm": 0.8430180549621582, "learning_rate": 1.556512629931564e-05, "loss": 0.4634, "step": 5356 }, { "epoch": 0.6519014298752662, "grad_norm": 1.3142163753509521, "learning_rate": 1.5563505480516517e-05, "loss": 0.4085, "step": 5357 }, { "epoch": 0.6520231213872832, "grad_norm": 1.23605477809906, "learning_rate": 1.55618844500145e-05, "loss": 0.4429, "step": 5358 }, { "epoch": 0.6521448128993003, "grad_norm": 1.4192166328430176, "learning_rate": 1.5560263207871263e-05, "loss": 0.5062, "step": 5359 }, { "epoch": 0.6522665044113173, "grad_norm": 0.722477376461029, "learning_rate": 1.5558641754148506e-05, "loss": 0.4979, "step": 5360 }, { "epoch": 0.6523881959233343, "grad_norm": 1.8833805322647095, "learning_rate": 1.5557020088907924e-05, "loss": 0.4776, "step": 5361 }, { "epoch": 0.6525098874353514, "grad_norm": 1.0407469272613525, "learning_rate": 1.5555398212211225e-05, "loss": 0.4656, "step": 5362 }, { "epoch": 0.6526315789473685, "grad_norm": 1.2440108060836792, "learning_rate": 1.5553776124120123e-05, "loss": 0.4512, "step": 5363 }, { "epoch": 0.6527532704593855, "grad_norm": 1.5020626783370972, "learning_rate": 1.5552153824696344e-05, "loss": 0.4823, "step": 5364 }, { "epoch": 0.6528749619714025, "grad_norm": 1.579473614692688, "learning_rate": 1.555053131400162e-05, "loss": 0.4724, "step": 5365 }, { "epoch": 0.6529966534834195, "grad_norm": 1.1490020751953125, "learning_rate": 1.5548908592097692e-05, "loss": 0.4654, "step": 5366 }, { "epoch": 0.6531183449954365, "grad_norm": 1.049917459487915, "learning_rate": 1.554728565904631e-05, "loss": 0.494, "step": 5367 }, { "epoch": 0.6532400365074537, "grad_norm": 1.5266389846801758, "learning_rate": 1.554566251490922e-05, "loss": 0.5075, "step": 5368 }, { "epoch": 0.6533617280194707, "grad_norm": 2.6105377674102783, "learning_rate": 1.5544039159748194e-05, "loss": 0.4033, "step": 5369 }, { "epoch": 0.6534834195314877, "grad_norm": 0.7732549905776978, "learning_rate": 1.5542415593625002e-05, "loss": 0.4629, "step": 5370 }, { "epoch": 0.6536051110435047, "grad_norm": 2.330594062805176, "learning_rate": 1.5540791816601423e-05, "loss": 0.5096, "step": 5371 }, { "epoch": 0.6537268025555217, "grad_norm": 2.2057223320007324, "learning_rate": 1.553916782873925e-05, "loss": 0.5194, "step": 5372 }, { "epoch": 0.6538484940675388, "grad_norm": 1.3206000328063965, "learning_rate": 1.5537543630100277e-05, "loss": 0.507, "step": 5373 }, { "epoch": 0.6539701855795558, "grad_norm": 0.6833221316337585, "learning_rate": 1.55359192207463e-05, "loss": 0.474, "step": 5374 }, { "epoch": 0.6540918770915729, "grad_norm": 0.6391467452049255, "learning_rate": 1.5534294600739143e-05, "loss": 0.5138, "step": 5375 }, { "epoch": 0.6542135686035899, "grad_norm": 4.589588642120361, "learning_rate": 1.553266977014062e-05, "loss": 0.4424, "step": 5376 }, { "epoch": 0.6543352601156069, "grad_norm": 2.5533909797668457, "learning_rate": 1.5531044729012558e-05, "loss": 0.4476, "step": 5377 }, { "epoch": 0.654456951627624, "grad_norm": 3.4447693824768066, "learning_rate": 1.55294194774168e-05, "loss": 0.4461, "step": 5378 }, { "epoch": 0.654578643139641, "grad_norm": 1.4094719886779785, "learning_rate": 1.5527794015415184e-05, "loss": 0.4644, "step": 5379 }, { "epoch": 0.654700334651658, "grad_norm": 1.311448335647583, "learning_rate": 1.5526168343069567e-05, "loss": 0.4886, "step": 5380 }, { "epoch": 0.6548220261636751, "grad_norm": 0.9678633809089661, "learning_rate": 1.55245424604418e-05, "loss": 0.4304, "step": 5381 }, { "epoch": 0.6549437176756921, "grad_norm": 0.8762246370315552, "learning_rate": 1.552291636759376e-05, "loss": 0.3999, "step": 5382 }, { "epoch": 0.6550654091877092, "grad_norm": 1.3736207485198975, "learning_rate": 1.552129006458732e-05, "loss": 0.393, "step": 5383 }, { "epoch": 0.6551871006997262, "grad_norm": 3.8743481636047363, "learning_rate": 1.5519663551484363e-05, "loss": 0.4919, "step": 5384 }, { "epoch": 0.6553087922117432, "grad_norm": 4.454881191253662, "learning_rate": 1.5518036828346785e-05, "loss": 0.479, "step": 5385 }, { "epoch": 0.6554304837237602, "grad_norm": 7.7504963874816895, "learning_rate": 1.5516409895236485e-05, "loss": 0.6064, "step": 5386 }, { "epoch": 0.6555521752357774, "grad_norm": 3.546290397644043, "learning_rate": 1.5514782752215368e-05, "loss": 0.4805, "step": 5387 }, { "epoch": 0.6556738667477944, "grad_norm": 0.7145805358886719, "learning_rate": 1.551315539934535e-05, "loss": 0.3471, "step": 5388 }, { "epoch": 0.6557955582598114, "grad_norm": 1.957080364227295, "learning_rate": 1.5511527836688364e-05, "loss": 0.4635, "step": 5389 }, { "epoch": 0.6559172497718284, "grad_norm": 4.414008617401123, "learning_rate": 1.550990006430633e-05, "loss": 0.5479, "step": 5390 }, { "epoch": 0.6560389412838454, "grad_norm": 2.238805055618286, "learning_rate": 1.5508272082261196e-05, "loss": 0.4913, "step": 5391 }, { "epoch": 0.6561606327958625, "grad_norm": 1.4231898784637451, "learning_rate": 1.5506643890614906e-05, "loss": 0.4437, "step": 5392 }, { "epoch": 0.6562823243078795, "grad_norm": 1.734437108039856, "learning_rate": 1.550501548942942e-05, "loss": 0.4563, "step": 5393 }, { "epoch": 0.6564040158198966, "grad_norm": 0.7681286931037903, "learning_rate": 1.5503386878766698e-05, "loss": 0.5023, "step": 5394 }, { "epoch": 0.6565257073319136, "grad_norm": 0.8565199375152588, "learning_rate": 1.5501758058688717e-05, "loss": 0.4895, "step": 5395 }, { "epoch": 0.6566473988439306, "grad_norm": 4.39603853225708, "learning_rate": 1.550012902925745e-05, "loss": 0.3932, "step": 5396 }, { "epoch": 0.6567690903559477, "grad_norm": 4.179236888885498, "learning_rate": 1.5498499790534892e-05, "loss": 0.4024, "step": 5397 }, { "epoch": 0.6568907818679647, "grad_norm": 3.2235324382781982, "learning_rate": 1.5496870342583033e-05, "loss": 0.434, "step": 5398 }, { "epoch": 0.6570124733799817, "grad_norm": 0.9747445583343506, "learning_rate": 1.5495240685463882e-05, "loss": 0.4964, "step": 5399 }, { "epoch": 0.6571341648919988, "grad_norm": 4.188055038452148, "learning_rate": 1.5493610819239446e-05, "loss": 0.4294, "step": 5400 }, { "epoch": 0.6572558564040158, "grad_norm": 2.397346258163452, "learning_rate": 1.5491980743971747e-05, "loss": 0.4005, "step": 5401 }, { "epoch": 0.6573775479160329, "grad_norm": 2.431401491165161, "learning_rate": 1.5490350459722815e-05, "loss": 0.4571, "step": 5402 }, { "epoch": 0.6574992394280499, "grad_norm": 0.6270154118537903, "learning_rate": 1.5488719966554685e-05, "loss": 0.3771, "step": 5403 }, { "epoch": 0.6576209309400669, "grad_norm": 2.432560682296753, "learning_rate": 1.5487089264529395e-05, "loss": 0.4197, "step": 5404 }, { "epoch": 0.6577426224520839, "grad_norm": 4.105227947235107, "learning_rate": 1.5485458353709003e-05, "loss": 0.5095, "step": 5405 }, { "epoch": 0.657864313964101, "grad_norm": 4.4798970222473145, "learning_rate": 1.5483827234155566e-05, "loss": 0.5282, "step": 5406 }, { "epoch": 0.6579860054761181, "grad_norm": 1.9804679155349731, "learning_rate": 1.5482195905931155e-05, "loss": 0.435, "step": 5407 }, { "epoch": 0.6581076969881351, "grad_norm": 2.009260416030884, "learning_rate": 1.548056436909784e-05, "loss": 0.4863, "step": 5408 }, { "epoch": 0.6582293885001521, "grad_norm": 1.7735986709594727, "learning_rate": 1.5478932623717704e-05, "loss": 0.458, "step": 5409 }, { "epoch": 0.6583510800121691, "grad_norm": 1.1705528497695923, "learning_rate": 1.547730066985284e-05, "loss": 0.4082, "step": 5410 }, { "epoch": 0.6584727715241862, "grad_norm": 1.4086755514144897, "learning_rate": 1.5475668507565355e-05, "loss": 0.5022, "step": 5411 }, { "epoch": 0.6585944630362032, "grad_norm": 3.3585305213928223, "learning_rate": 1.5474036136917343e-05, "loss": 0.4414, "step": 5412 }, { "epoch": 0.6587161545482203, "grad_norm": 1.8085182905197144, "learning_rate": 1.547240355797093e-05, "loss": 0.509, "step": 5413 }, { "epoch": 0.6588378460602373, "grad_norm": 3.6988284587860107, "learning_rate": 1.547077077078823e-05, "loss": 0.4333, "step": 5414 }, { "epoch": 0.6589595375722543, "grad_norm": 1.0893356800079346, "learning_rate": 1.546913777543138e-05, "loss": 0.5473, "step": 5415 }, { "epoch": 0.6590812290842714, "grad_norm": 2.670675277709961, "learning_rate": 1.546750457196252e-05, "loss": 0.4735, "step": 5416 }, { "epoch": 0.6592029205962884, "grad_norm": 2.467972993850708, "learning_rate": 1.546587116044379e-05, "loss": 0.4606, "step": 5417 }, { "epoch": 0.6593246121083054, "grad_norm": 2.314988136291504, "learning_rate": 1.546423754093735e-05, "loss": 0.4472, "step": 5418 }, { "epoch": 0.6594463036203225, "grad_norm": 1.3508634567260742, "learning_rate": 1.546260371350536e-05, "loss": 0.4122, "step": 5419 }, { "epoch": 0.6595679951323395, "grad_norm": 0.7820689678192139, "learning_rate": 1.5460969678209992e-05, "loss": 0.4673, "step": 5420 }, { "epoch": 0.6596896866443566, "grad_norm": 4.751261234283447, "learning_rate": 1.5459335435113427e-05, "loss": 0.5512, "step": 5421 }, { "epoch": 0.6598113781563736, "grad_norm": 5.589266300201416, "learning_rate": 1.5457700984277844e-05, "loss": 0.5782, "step": 5422 }, { "epoch": 0.6599330696683906, "grad_norm": 3.081406593322754, "learning_rate": 1.5456066325765446e-05, "loss": 0.4664, "step": 5423 }, { "epoch": 0.6600547611804076, "grad_norm": 0.7796817421913147, "learning_rate": 1.545443145963843e-05, "loss": 0.3979, "step": 5424 }, { "epoch": 0.6601764526924248, "grad_norm": 3.147052764892578, "learning_rate": 1.5452796385959007e-05, "loss": 0.5019, "step": 5425 }, { "epoch": 0.6602981442044418, "grad_norm": 3.250377893447876, "learning_rate": 1.5451161104789392e-05, "loss": 0.5038, "step": 5426 }, { "epoch": 0.6604198357164588, "grad_norm": 3.661663770675659, "learning_rate": 1.5449525616191817e-05, "loss": 0.5171, "step": 5427 }, { "epoch": 0.6605415272284758, "grad_norm": 3.851524591445923, "learning_rate": 1.544788992022851e-05, "loss": 0.5036, "step": 5428 }, { "epoch": 0.6606632187404928, "grad_norm": 1.04959237575531, "learning_rate": 1.544625401696172e-05, "loss": 0.4604, "step": 5429 }, { "epoch": 0.6607849102525098, "grad_norm": 0.8070778846740723, "learning_rate": 1.5444617906453685e-05, "loss": 0.4762, "step": 5430 }, { "epoch": 0.660906601764527, "grad_norm": 0.5956177711486816, "learning_rate": 1.544298158876667e-05, "loss": 0.4951, "step": 5431 }, { "epoch": 0.661028293276544, "grad_norm": 2.463252305984497, "learning_rate": 1.544134506396294e-05, "loss": 0.484, "step": 5432 }, { "epoch": 0.661149984788561, "grad_norm": 4.864258289337158, "learning_rate": 1.5439708332104772e-05, "loss": 0.48, "step": 5433 }, { "epoch": 0.661271676300578, "grad_norm": 1.0821592807769775, "learning_rate": 1.5438071393254437e-05, "loss": 0.4501, "step": 5434 }, { "epoch": 0.6613933678125951, "grad_norm": 0.5609249472618103, "learning_rate": 1.543643424747423e-05, "loss": 0.4688, "step": 5435 }, { "epoch": 0.6615150593246121, "grad_norm": 1.0449097156524658, "learning_rate": 1.543479689482645e-05, "loss": 0.4628, "step": 5436 }, { "epoch": 0.6616367508366291, "grad_norm": 2.5006985664367676, "learning_rate": 1.5433159335373396e-05, "loss": 0.4908, "step": 5437 }, { "epoch": 0.6617584423486462, "grad_norm": 1.795767903327942, "learning_rate": 1.5431521569177382e-05, "loss": 0.481, "step": 5438 }, { "epoch": 0.6618801338606632, "grad_norm": 2.1494393348693848, "learning_rate": 1.5429883596300732e-05, "loss": 0.4861, "step": 5439 }, { "epoch": 0.6620018253726803, "grad_norm": 0.6086944937705994, "learning_rate": 1.5428245416805773e-05, "loss": 0.4491, "step": 5440 }, { "epoch": 0.6621235168846973, "grad_norm": 1.006093144416809, "learning_rate": 1.5426607030754833e-05, "loss": 0.4501, "step": 5441 }, { "epoch": 0.6622452083967143, "grad_norm": 0.8190460801124573, "learning_rate": 1.5424968438210274e-05, "loss": 0.4526, "step": 5442 }, { "epoch": 0.6623668999087313, "grad_norm": 0.9641715288162231, "learning_rate": 1.542332963923443e-05, "loss": 0.4415, "step": 5443 }, { "epoch": 0.6624885914207485, "grad_norm": 2.7706751823425293, "learning_rate": 1.542169063388967e-05, "loss": 0.5111, "step": 5444 }, { "epoch": 0.6626102829327655, "grad_norm": 0.5637524724006653, "learning_rate": 1.5420051422238356e-05, "loss": 0.4413, "step": 5445 }, { "epoch": 0.6627319744447825, "grad_norm": 1.2848612070083618, "learning_rate": 1.5418412004342867e-05, "loss": 0.4529, "step": 5446 }, { "epoch": 0.6628536659567995, "grad_norm": 0.5230322480201721, "learning_rate": 1.541677238026559e-05, "loss": 0.4474, "step": 5447 }, { "epoch": 0.6629753574688165, "grad_norm": 2.7056941986083984, "learning_rate": 1.541513255006891e-05, "loss": 0.4824, "step": 5448 }, { "epoch": 0.6630970489808335, "grad_norm": 1.772239089012146, "learning_rate": 1.5413492513815226e-05, "loss": 0.4833, "step": 5449 }, { "epoch": 0.6632187404928507, "grad_norm": 2.0207180976867676, "learning_rate": 1.5411852271566945e-05, "loss": 0.4532, "step": 5450 }, { "epoch": 0.6633404320048677, "grad_norm": 0.6144729852676392, "learning_rate": 1.5410211823386488e-05, "loss": 0.5102, "step": 5451 }, { "epoch": 0.6634621235168847, "grad_norm": 3.442842960357666, "learning_rate": 1.540857116933627e-05, "loss": 0.4237, "step": 5452 }, { "epoch": 0.6635838150289017, "grad_norm": 0.664132833480835, "learning_rate": 1.5406930309478726e-05, "loss": 0.4604, "step": 5453 }, { "epoch": 0.6637055065409188, "grad_norm": 1.3695380687713623, "learning_rate": 1.5405289243876286e-05, "loss": 0.4292, "step": 5454 }, { "epoch": 0.6638271980529358, "grad_norm": 0.751721978187561, "learning_rate": 1.5403647972591407e-05, "loss": 0.4654, "step": 5455 }, { "epoch": 0.6639488895649528, "grad_norm": 2.613236904144287, "learning_rate": 1.5402006495686536e-05, "loss": 0.3987, "step": 5456 }, { "epoch": 0.6640705810769699, "grad_norm": 0.7441973090171814, "learning_rate": 1.540036481322414e-05, "loss": 0.4523, "step": 5457 }, { "epoch": 0.6641922725889869, "grad_norm": 3.3111002445220947, "learning_rate": 1.5398722925266678e-05, "loss": 0.5148, "step": 5458 }, { "epoch": 0.664313964101004, "grad_norm": 1.8782531023025513, "learning_rate": 1.5397080831876642e-05, "loss": 0.4594, "step": 5459 }, { "epoch": 0.664435655613021, "grad_norm": 0.7957891821861267, "learning_rate": 1.53954385331165e-05, "loss": 0.4332, "step": 5460 }, { "epoch": 0.664557347125038, "grad_norm": 1.368462324142456, "learning_rate": 1.539379602904876e-05, "loss": 0.4418, "step": 5461 }, { "epoch": 0.664679038637055, "grad_norm": 2.4055609703063965, "learning_rate": 1.5392153319735913e-05, "loss": 0.5075, "step": 5462 }, { "epoch": 0.6648007301490721, "grad_norm": 1.81770920753479, "learning_rate": 1.5390510405240476e-05, "loss": 0.4736, "step": 5463 }, { "epoch": 0.6649224216610892, "grad_norm": 0.7309744358062744, "learning_rate": 1.5388867285624955e-05, "loss": 0.4946, "step": 5464 }, { "epoch": 0.6650441131731062, "grad_norm": 2.999840497970581, "learning_rate": 1.5387223960951882e-05, "loss": 0.5051, "step": 5465 }, { "epoch": 0.6651658046851232, "grad_norm": 1.6216768026351929, "learning_rate": 1.538558043128379e-05, "loss": 0.4504, "step": 5466 }, { "epoch": 0.6652874961971402, "grad_norm": 2.0688908100128174, "learning_rate": 1.538393669668321e-05, "loss": 0.4362, "step": 5467 }, { "epoch": 0.6654091877091572, "grad_norm": 2.377845525741577, "learning_rate": 1.5382292757212694e-05, "loss": 0.5253, "step": 5468 }, { "epoch": 0.6655308792211744, "grad_norm": 4.468915939331055, "learning_rate": 1.53806486129348e-05, "loss": 0.409, "step": 5469 }, { "epoch": 0.6656525707331914, "grad_norm": 0.770176351070404, "learning_rate": 1.537900426391209e-05, "loss": 0.4928, "step": 5470 }, { "epoch": 0.6657742622452084, "grad_norm": 1.4929450750350952, "learning_rate": 1.5377359710207132e-05, "loss": 0.4306, "step": 5471 }, { "epoch": 0.6658959537572254, "grad_norm": 1.6689743995666504, "learning_rate": 1.5375714951882505e-05, "loss": 0.4661, "step": 5472 }, { "epoch": 0.6660176452692425, "grad_norm": 0.7525126934051514, "learning_rate": 1.53740699890008e-05, "loss": 0.4826, "step": 5473 }, { "epoch": 0.6661393367812595, "grad_norm": 2.3113670349121094, "learning_rate": 1.5372424821624606e-05, "loss": 0.5149, "step": 5474 }, { "epoch": 0.6662610282932765, "grad_norm": 1.2864298820495605, "learning_rate": 1.5370779449816526e-05, "loss": 0.4331, "step": 5475 }, { "epoch": 0.6663827198052936, "grad_norm": 4.866365432739258, "learning_rate": 1.5369133873639176e-05, "loss": 0.5607, "step": 5476 }, { "epoch": 0.6665044113173106, "grad_norm": 1.059059500694275, "learning_rate": 1.5367488093155168e-05, "loss": 0.4295, "step": 5477 }, { "epoch": 0.6666261028293277, "grad_norm": 1.578230619430542, "learning_rate": 1.536584210842712e-05, "loss": 0.5176, "step": 5478 }, { "epoch": 0.6667477943413447, "grad_norm": 2.8086841106414795, "learning_rate": 1.536419591951768e-05, "loss": 0.5332, "step": 5479 }, { "epoch": 0.6668694858533617, "grad_norm": 1.0661730766296387, "learning_rate": 1.5362549526489484e-05, "loss": 0.4423, "step": 5480 }, { "epoch": 0.6669911773653787, "grad_norm": 2.354785203933716, "learning_rate": 1.5360902929405176e-05, "loss": 0.5158, "step": 5481 }, { "epoch": 0.6671128688773958, "grad_norm": 1.889513373374939, "learning_rate": 1.5359256128327413e-05, "loss": 0.4993, "step": 5482 }, { "epoch": 0.6672345603894129, "grad_norm": 0.8707626461982727, "learning_rate": 1.5357609123318864e-05, "loss": 0.4848, "step": 5483 }, { "epoch": 0.6673562519014299, "grad_norm": 2.764751434326172, "learning_rate": 1.5355961914442198e-05, "loss": 0.4228, "step": 5484 }, { "epoch": 0.6674779434134469, "grad_norm": 3.466641664505005, "learning_rate": 1.5354314501760093e-05, "loss": 0.4407, "step": 5485 }, { "epoch": 0.6675996349254639, "grad_norm": 2.373563766479492, "learning_rate": 1.535266688533524e-05, "loss": 0.4894, "step": 5486 }, { "epoch": 0.6677213264374809, "grad_norm": 3.4814956188201904, "learning_rate": 1.535101906523033e-05, "loss": 0.4381, "step": 5487 }, { "epoch": 0.6678430179494981, "grad_norm": 2.9427573680877686, "learning_rate": 1.5349371041508073e-05, "loss": 0.4122, "step": 5488 }, { "epoch": 0.6679647094615151, "grad_norm": 0.8163983821868896, "learning_rate": 1.5347722814231172e-05, "loss": 0.4651, "step": 5489 }, { "epoch": 0.6680864009735321, "grad_norm": 0.5386254787445068, "learning_rate": 1.534607438346235e-05, "loss": 0.4104, "step": 5490 }, { "epoch": 0.6682080924855491, "grad_norm": 5.744544506072998, "learning_rate": 1.5344425749264332e-05, "loss": 0.5607, "step": 5491 }, { "epoch": 0.6683297839975662, "grad_norm": 6.783387184143066, "learning_rate": 1.5342776911699853e-05, "loss": 0.5923, "step": 5492 }, { "epoch": 0.6684514755095832, "grad_norm": 4.496654987335205, "learning_rate": 1.534112787083165e-05, "loss": 0.5294, "step": 5493 }, { "epoch": 0.6685731670216002, "grad_norm": 2.8708765506744385, "learning_rate": 1.533947862672248e-05, "loss": 0.4533, "step": 5494 }, { "epoch": 0.6686948585336173, "grad_norm": 4.694194793701172, "learning_rate": 1.5337829179435094e-05, "loss": 0.5418, "step": 5495 }, { "epoch": 0.6688165500456343, "grad_norm": 1.285262107849121, "learning_rate": 1.533617952903226e-05, "loss": 0.4647, "step": 5496 }, { "epoch": 0.6689382415576514, "grad_norm": 2.4946682453155518, "learning_rate": 1.5334529675576744e-05, "loss": 0.4507, "step": 5497 }, { "epoch": 0.6690599330696684, "grad_norm": 1.1124497652053833, "learning_rate": 1.533287961913134e-05, "loss": 0.416, "step": 5498 }, { "epoch": 0.6691816245816854, "grad_norm": 2.001948118209839, "learning_rate": 1.5331229359758822e-05, "loss": 0.4752, "step": 5499 }, { "epoch": 0.6693033160937024, "grad_norm": 2.354109764099121, "learning_rate": 1.5329578897521996e-05, "loss": 0.4701, "step": 5500 }, { "epoch": 0.6694250076057195, "grad_norm": 3.811180353164673, "learning_rate": 1.5327928232483662e-05, "loss": 0.3861, "step": 5501 }, { "epoch": 0.6695466991177366, "grad_norm": 1.0407211780548096, "learning_rate": 1.5326277364706627e-05, "loss": 0.4993, "step": 5502 }, { "epoch": 0.6696683906297536, "grad_norm": 1.975663423538208, "learning_rate": 1.5324626294253717e-05, "loss": 0.4661, "step": 5503 }, { "epoch": 0.6697900821417706, "grad_norm": 0.8591955900192261, "learning_rate": 1.532297502118775e-05, "loss": 0.4676, "step": 5504 }, { "epoch": 0.6699117736537876, "grad_norm": 1.231006145477295, "learning_rate": 1.532132354557157e-05, "loss": 0.5024, "step": 5505 }, { "epoch": 0.6700334651658046, "grad_norm": 1.9702006578445435, "learning_rate": 1.5319671867468013e-05, "loss": 0.442, "step": 5506 }, { "epoch": 0.6701551566778218, "grad_norm": 2.3092076778411865, "learning_rate": 1.531801998693993e-05, "loss": 0.4003, "step": 5507 }, { "epoch": 0.6702768481898388, "grad_norm": 1.0356674194335938, "learning_rate": 1.531636790405018e-05, "loss": 0.4754, "step": 5508 }, { "epoch": 0.6703985397018558, "grad_norm": 2.3935837745666504, "learning_rate": 1.5314715618861626e-05, "loss": 0.5382, "step": 5509 }, { "epoch": 0.6705202312138728, "grad_norm": 1.5197428464889526, "learning_rate": 1.531306313143714e-05, "loss": 0.4783, "step": 5510 }, { "epoch": 0.6706419227258898, "grad_norm": 1.051122784614563, "learning_rate": 1.531141044183961e-05, "loss": 0.4467, "step": 5511 }, { "epoch": 0.6707636142379069, "grad_norm": 0.6544637680053711, "learning_rate": 1.5309757550131913e-05, "loss": 0.4602, "step": 5512 }, { "epoch": 0.6708853057499239, "grad_norm": 1.5131142139434814, "learning_rate": 1.5308104456376955e-05, "loss": 0.4584, "step": 5513 }, { "epoch": 0.671006997261941, "grad_norm": 1.4282640218734741, "learning_rate": 1.5306451160637633e-05, "loss": 0.4791, "step": 5514 }, { "epoch": 0.671128688773958, "grad_norm": 1.8938522338867188, "learning_rate": 1.530479766297686e-05, "loss": 0.4683, "step": 5515 }, { "epoch": 0.6712503802859751, "grad_norm": 1.9751620292663574, "learning_rate": 1.5303143963457558e-05, "loss": 0.4577, "step": 5516 }, { "epoch": 0.6713720717979921, "grad_norm": 1.6509391069412231, "learning_rate": 1.530149006214265e-05, "loss": 0.4275, "step": 5517 }, { "epoch": 0.6714937633100091, "grad_norm": 4.49000358581543, "learning_rate": 1.529983595909507e-05, "loss": 0.3828, "step": 5518 }, { "epoch": 0.6716154548220261, "grad_norm": 2.0262069702148438, "learning_rate": 1.5298181654377763e-05, "loss": 0.4375, "step": 5519 }, { "epoch": 0.6717371463340432, "grad_norm": 0.6181219816207886, "learning_rate": 1.5296527148053677e-05, "loss": 0.438, "step": 5520 }, { "epoch": 0.6718588378460603, "grad_norm": 1.0754001140594482, "learning_rate": 1.5294872440185772e-05, "loss": 0.4789, "step": 5521 }, { "epoch": 0.6719805293580773, "grad_norm": 1.1276960372924805, "learning_rate": 1.529321753083701e-05, "loss": 0.4451, "step": 5522 }, { "epoch": 0.6721022208700943, "grad_norm": 1.2000776529312134, "learning_rate": 1.529156242007036e-05, "loss": 0.4504, "step": 5523 }, { "epoch": 0.6722239123821113, "grad_norm": 2.402139663696289, "learning_rate": 1.5289907107948813e-05, "loss": 0.4979, "step": 5524 }, { "epoch": 0.6723456038941283, "grad_norm": 1.8518776893615723, "learning_rate": 1.528825159453535e-05, "loss": 0.4142, "step": 5525 }, { "epoch": 0.6724672954061455, "grad_norm": 1.219595193862915, "learning_rate": 1.5286595879892964e-05, "loss": 0.494, "step": 5526 }, { "epoch": 0.6725889869181625, "grad_norm": 2.703617572784424, "learning_rate": 1.5284939964084664e-05, "loss": 0.5516, "step": 5527 }, { "epoch": 0.6727106784301795, "grad_norm": 0.9608203768730164, "learning_rate": 1.528328384717346e-05, "loss": 0.4381, "step": 5528 }, { "epoch": 0.6728323699421965, "grad_norm": 2.390717029571533, "learning_rate": 1.528162752922237e-05, "loss": 0.4369, "step": 5529 }, { "epoch": 0.6729540614542135, "grad_norm": 1.0250049829483032, "learning_rate": 1.527997101029442e-05, "loss": 0.4801, "step": 5530 }, { "epoch": 0.6730757529662306, "grad_norm": 1.4124159812927246, "learning_rate": 1.5278314290452643e-05, "loss": 0.4673, "step": 5531 }, { "epoch": 0.6731974444782477, "grad_norm": 1.3681262731552124, "learning_rate": 1.527665736976008e-05, "loss": 0.4686, "step": 5532 }, { "epoch": 0.6733191359902647, "grad_norm": 0.6639538407325745, "learning_rate": 1.527500024827979e-05, "loss": 0.4615, "step": 5533 }, { "epoch": 0.6734408275022817, "grad_norm": 1.2778096199035645, "learning_rate": 1.5273342926074816e-05, "loss": 0.4352, "step": 5534 }, { "epoch": 0.6735625190142988, "grad_norm": 3.330084800720215, "learning_rate": 1.5271685403208226e-05, "loss": 0.5171, "step": 5535 }, { "epoch": 0.6736842105263158, "grad_norm": 1.220238447189331, "learning_rate": 1.5270027679743095e-05, "loss": 0.4413, "step": 5536 }, { "epoch": 0.6738059020383328, "grad_norm": 1.125942349433899, "learning_rate": 1.5268369755742507e-05, "loss": 0.4512, "step": 5537 }, { "epoch": 0.6739275935503498, "grad_norm": 2.4602012634277344, "learning_rate": 1.5266711631269543e-05, "loss": 0.5589, "step": 5538 }, { "epoch": 0.6740492850623669, "grad_norm": 2.895455837249756, "learning_rate": 1.52650533063873e-05, "loss": 0.5227, "step": 5539 }, { "epoch": 0.674170976574384, "grad_norm": 1.3872512578964233, "learning_rate": 1.526339478115888e-05, "loss": 0.5064, "step": 5540 }, { "epoch": 0.674292668086401, "grad_norm": 3.270491600036621, "learning_rate": 1.5261736055647397e-05, "loss": 0.3966, "step": 5541 }, { "epoch": 0.674414359598418, "grad_norm": 0.9828242659568787, "learning_rate": 1.526007712991596e-05, "loss": 0.5008, "step": 5542 }, { "epoch": 0.674536051110435, "grad_norm": 0.8394197821617126, "learning_rate": 1.5258418004027705e-05, "loss": 0.4652, "step": 5543 }, { "epoch": 0.674657742622452, "grad_norm": 1.5907459259033203, "learning_rate": 1.5256758678045757e-05, "loss": 0.4611, "step": 5544 }, { "epoch": 0.6747794341344692, "grad_norm": 0.7604082226753235, "learning_rate": 1.5255099152033266e-05, "loss": 0.4791, "step": 5545 }, { "epoch": 0.6749011256464862, "grad_norm": 1.8490198850631714, "learning_rate": 1.525343942605337e-05, "loss": 0.4198, "step": 5546 }, { "epoch": 0.6750228171585032, "grad_norm": 0.7479878664016724, "learning_rate": 1.525177950016923e-05, "loss": 0.4598, "step": 5547 }, { "epoch": 0.6751445086705202, "grad_norm": 0.5962470173835754, "learning_rate": 1.525011937444401e-05, "loss": 0.4445, "step": 5548 }, { "epoch": 0.6752662001825372, "grad_norm": 2.5411951541900635, "learning_rate": 1.5248459048940882e-05, "loss": 0.4763, "step": 5549 }, { "epoch": 0.6753878916945543, "grad_norm": 2.578291893005371, "learning_rate": 1.5246798523723023e-05, "loss": 0.512, "step": 5550 }, { "epoch": 0.6755095832065714, "grad_norm": 1.4908937215805054, "learning_rate": 1.524513779885362e-05, "loss": 0.4611, "step": 5551 }, { "epoch": 0.6756312747185884, "grad_norm": 1.360190987586975, "learning_rate": 1.5243476874395866e-05, "loss": 0.4921, "step": 5552 }, { "epoch": 0.6757529662306054, "grad_norm": 1.984915018081665, "learning_rate": 1.5241815750412964e-05, "loss": 0.468, "step": 5553 }, { "epoch": 0.6758746577426225, "grad_norm": 0.7403093576431274, "learning_rate": 1.5240154426968124e-05, "loss": 0.4391, "step": 5554 }, { "epoch": 0.6759963492546395, "grad_norm": 0.7237421870231628, "learning_rate": 1.523849290412456e-05, "loss": 0.5147, "step": 5555 }, { "epoch": 0.6761180407666565, "grad_norm": 3.2059812545776367, "learning_rate": 1.5236831181945499e-05, "loss": 0.4522, "step": 5556 }, { "epoch": 0.6762397322786735, "grad_norm": 0.7338497638702393, "learning_rate": 1.5235169260494171e-05, "loss": 0.5182, "step": 5557 }, { "epoch": 0.6763614237906906, "grad_norm": 1.3542253971099854, "learning_rate": 1.5233507139833816e-05, "loss": 0.4887, "step": 5558 }, { "epoch": 0.6764831153027077, "grad_norm": 3.261929750442505, "learning_rate": 1.5231844820027681e-05, "loss": 0.4913, "step": 5559 }, { "epoch": 0.6766048068147247, "grad_norm": 4.380527973175049, "learning_rate": 1.5230182301139025e-05, "loss": 0.4261, "step": 5560 }, { "epoch": 0.6767264983267417, "grad_norm": 1.559615969657898, "learning_rate": 1.5228519583231102e-05, "loss": 0.4594, "step": 5561 }, { "epoch": 0.6768481898387587, "grad_norm": 1.7029471397399902, "learning_rate": 1.5226856666367189e-05, "loss": 0.4205, "step": 5562 }, { "epoch": 0.6769698813507757, "grad_norm": 2.043337345123291, "learning_rate": 1.5225193550610563e-05, "loss": 0.393, "step": 5563 }, { "epoch": 0.6770915728627929, "grad_norm": 0.7511071562767029, "learning_rate": 1.5223530236024502e-05, "loss": 0.4422, "step": 5564 }, { "epoch": 0.6772132643748099, "grad_norm": 3.5116820335388184, "learning_rate": 1.5221866722672305e-05, "loss": 0.4958, "step": 5565 }, { "epoch": 0.6773349558868269, "grad_norm": 5.950035572052002, "learning_rate": 1.5220203010617272e-05, "loss": 0.573, "step": 5566 }, { "epoch": 0.6774566473988439, "grad_norm": 3.390221118927002, "learning_rate": 1.5218539099922708e-05, "loss": 0.4837, "step": 5567 }, { "epoch": 0.6775783389108609, "grad_norm": 2.9605467319488525, "learning_rate": 1.521687499065193e-05, "loss": 0.4167, "step": 5568 }, { "epoch": 0.677700030422878, "grad_norm": 4.046869277954102, "learning_rate": 1.521521068286826e-05, "loss": 0.4792, "step": 5569 }, { "epoch": 0.6778217219348951, "grad_norm": 3.218365430831909, "learning_rate": 1.5213546176635028e-05, "loss": 0.4844, "step": 5570 }, { "epoch": 0.6779434134469121, "grad_norm": 2.4036426544189453, "learning_rate": 1.5211881472015575e-05, "loss": 0.4585, "step": 5571 }, { "epoch": 0.6780651049589291, "grad_norm": 1.279919981956482, "learning_rate": 1.521021656907324e-05, "loss": 0.4493, "step": 5572 }, { "epoch": 0.6781867964709462, "grad_norm": 0.6202378869056702, "learning_rate": 1.5208551467871382e-05, "loss": 0.4631, "step": 5573 }, { "epoch": 0.6783084879829632, "grad_norm": 0.7910367250442505, "learning_rate": 1.520688616847336e-05, "loss": 0.5026, "step": 5574 }, { "epoch": 0.6784301794949802, "grad_norm": 0.6538732647895813, "learning_rate": 1.5205220670942544e-05, "loss": 0.4999, "step": 5575 }, { "epoch": 0.6785518710069972, "grad_norm": 5.974656105041504, "learning_rate": 1.5203554975342303e-05, "loss": 0.4544, "step": 5576 }, { "epoch": 0.6786735625190143, "grad_norm": 4.890570163726807, "learning_rate": 1.5201889081736026e-05, "loss": 0.4672, "step": 5577 }, { "epoch": 0.6787952540310314, "grad_norm": 3.031325340270996, "learning_rate": 1.52002229901871e-05, "loss": 0.5157, "step": 5578 }, { "epoch": 0.6789169455430484, "grad_norm": 4.464944362640381, "learning_rate": 1.519855670075893e-05, "loss": 0.4632, "step": 5579 }, { "epoch": 0.6790386370550654, "grad_norm": 4.358697891235352, "learning_rate": 1.5196890213514915e-05, "loss": 0.4865, "step": 5580 }, { "epoch": 0.6791603285670824, "grad_norm": 1.2585469484329224, "learning_rate": 1.5195223528518468e-05, "loss": 0.5086, "step": 5581 }, { "epoch": 0.6792820200790994, "grad_norm": 0.8789167404174805, "learning_rate": 1.5193556645833014e-05, "loss": 0.5191, "step": 5582 }, { "epoch": 0.6794037115911166, "grad_norm": 4.2059149742126465, "learning_rate": 1.5191889565521982e-05, "loss": 0.4085, "step": 5583 }, { "epoch": 0.6795254031031336, "grad_norm": 0.7056273818016052, "learning_rate": 1.5190222287648802e-05, "loss": 0.4414, "step": 5584 }, { "epoch": 0.6796470946151506, "grad_norm": 2.5234735012054443, "learning_rate": 1.5188554812276918e-05, "loss": 0.4537, "step": 5585 }, { "epoch": 0.6797687861271676, "grad_norm": 0.6492642164230347, "learning_rate": 1.518688713946979e-05, "loss": 0.4398, "step": 5586 }, { "epoch": 0.6798904776391846, "grad_norm": 2.277618408203125, "learning_rate": 1.518521926929087e-05, "loss": 0.5147, "step": 5587 }, { "epoch": 0.6800121691512017, "grad_norm": 0.8896979093551636, "learning_rate": 1.5183551201803622e-05, "loss": 0.4478, "step": 5588 }, { "epoch": 0.6801338606632188, "grad_norm": 2.678936243057251, "learning_rate": 1.518188293707152e-05, "loss": 0.4671, "step": 5589 }, { "epoch": 0.6802555521752358, "grad_norm": 1.265942096710205, "learning_rate": 1.5180214475158045e-05, "loss": 0.4043, "step": 5590 }, { "epoch": 0.6803772436872528, "grad_norm": 2.5065581798553467, "learning_rate": 1.5178545816126692e-05, "loss": 0.489, "step": 5591 }, { "epoch": 0.6804989351992698, "grad_norm": 1.4736247062683105, "learning_rate": 1.5176876960040949e-05, "loss": 0.4395, "step": 5592 }, { "epoch": 0.6806206267112869, "grad_norm": 0.8289631009101868, "learning_rate": 1.5175207906964322e-05, "loss": 0.4188, "step": 5593 }, { "epoch": 0.6807423182233039, "grad_norm": 1.6755791902542114, "learning_rate": 1.5173538656960323e-05, "loss": 0.4852, "step": 5594 }, { "epoch": 0.6808640097353209, "grad_norm": 1.860309362411499, "learning_rate": 1.5171869210092467e-05, "loss": 0.5106, "step": 5595 }, { "epoch": 0.680985701247338, "grad_norm": 1.8324044942855835, "learning_rate": 1.5170199566424284e-05, "loss": 0.4293, "step": 5596 }, { "epoch": 0.6811073927593551, "grad_norm": 2.2882726192474365, "learning_rate": 1.5168529726019305e-05, "loss": 0.3954, "step": 5597 }, { "epoch": 0.6812290842713721, "grad_norm": 1.8817601203918457, "learning_rate": 1.5166859688941076e-05, "loss": 0.4848, "step": 5598 }, { "epoch": 0.6813507757833891, "grad_norm": 2.001265048980713, "learning_rate": 1.5165189455253133e-05, "loss": 0.501, "step": 5599 }, { "epoch": 0.6814724672954061, "grad_norm": 0.9268087148666382, "learning_rate": 1.5163519025019046e-05, "loss": 0.5207, "step": 5600 }, { "epoch": 0.6815941588074231, "grad_norm": 2.2409093379974365, "learning_rate": 1.516184839830237e-05, "loss": 0.4014, "step": 5601 }, { "epoch": 0.6817158503194403, "grad_norm": 0.9919574856758118, "learning_rate": 1.5160177575166679e-05, "loss": 0.5077, "step": 5602 }, { "epoch": 0.6818375418314573, "grad_norm": 1.652867078781128, "learning_rate": 1.515850655567555e-05, "loss": 0.4699, "step": 5603 }, { "epoch": 0.6819592333434743, "grad_norm": 1.7148882150650024, "learning_rate": 1.5156835339892568e-05, "loss": 0.458, "step": 5604 }, { "epoch": 0.6820809248554913, "grad_norm": 1.8744171857833862, "learning_rate": 1.515516392788133e-05, "loss": 0.4774, "step": 5605 }, { "epoch": 0.6822026163675083, "grad_norm": 1.464301586151123, "learning_rate": 1.515349231970543e-05, "loss": 0.4594, "step": 5606 }, { "epoch": 0.6823243078795254, "grad_norm": 3.034067392349243, "learning_rate": 1.5151820515428482e-05, "loss": 0.5391, "step": 5607 }, { "epoch": 0.6824459993915425, "grad_norm": 0.645681619644165, "learning_rate": 1.5150148515114099e-05, "loss": 0.4468, "step": 5608 }, { "epoch": 0.6825676909035595, "grad_norm": 0.6690253019332886, "learning_rate": 1.5148476318825909e-05, "loss": 0.4275, "step": 5609 }, { "epoch": 0.6826893824155765, "grad_norm": 2.369357109069824, "learning_rate": 1.5146803926627534e-05, "loss": 0.3906, "step": 5610 }, { "epoch": 0.6828110739275935, "grad_norm": 2.59771466255188, "learning_rate": 1.5145131338582615e-05, "loss": 0.4812, "step": 5611 }, { "epoch": 0.6829327654396106, "grad_norm": 2.3747684955596924, "learning_rate": 1.51434585547548e-05, "loss": 0.5041, "step": 5612 }, { "epoch": 0.6830544569516276, "grad_norm": 2.551231861114502, "learning_rate": 1.5141785575207744e-05, "loss": 0.4599, "step": 5613 }, { "epoch": 0.6831761484636446, "grad_norm": 0.8567521572113037, "learning_rate": 1.5140112400005103e-05, "loss": 0.4329, "step": 5614 }, { "epoch": 0.6832978399756617, "grad_norm": 1.7583280801773071, "learning_rate": 1.5138439029210542e-05, "loss": 0.4723, "step": 5615 }, { "epoch": 0.6834195314876788, "grad_norm": 0.6423031687736511, "learning_rate": 1.5136765462887742e-05, "loss": 0.4496, "step": 5616 }, { "epoch": 0.6835412229996958, "grad_norm": 2.487212657928467, "learning_rate": 1.5135091701100384e-05, "loss": 0.4933, "step": 5617 }, { "epoch": 0.6836629145117128, "grad_norm": 4.152840614318848, "learning_rate": 1.5133417743912157e-05, "loss": 0.4278, "step": 5618 }, { "epoch": 0.6837846060237298, "grad_norm": 2.1671485900878906, "learning_rate": 1.5131743591386758e-05, "loss": 0.445, "step": 5619 }, { "epoch": 0.6839062975357468, "grad_norm": 0.6360027194023132, "learning_rate": 1.5130069243587895e-05, "loss": 0.4911, "step": 5620 }, { "epoch": 0.684027989047764, "grad_norm": 0.8293319940567017, "learning_rate": 1.5128394700579281e-05, "loss": 0.5493, "step": 5621 }, { "epoch": 0.684149680559781, "grad_norm": 4.631020545959473, "learning_rate": 1.512671996242463e-05, "loss": 0.4577, "step": 5622 }, { "epoch": 0.684271372071798, "grad_norm": 2.609022855758667, "learning_rate": 1.5125045029187673e-05, "loss": 0.446, "step": 5623 }, { "epoch": 0.684393063583815, "grad_norm": 2.240832805633545, "learning_rate": 1.5123369900932146e-05, "loss": 0.4519, "step": 5624 }, { "epoch": 0.684514755095832, "grad_norm": 1.183343768119812, "learning_rate": 1.5121694577721786e-05, "loss": 0.4758, "step": 5625 }, { "epoch": 0.6846364466078491, "grad_norm": 1.699865460395813, "learning_rate": 1.512001905962035e-05, "loss": 0.4708, "step": 5626 }, { "epoch": 0.6847581381198662, "grad_norm": 0.5912694334983826, "learning_rate": 1.5118343346691591e-05, "loss": 0.4246, "step": 5627 }, { "epoch": 0.6848798296318832, "grad_norm": 0.5839297771453857, "learning_rate": 1.511666743899927e-05, "loss": 0.4443, "step": 5628 }, { "epoch": 0.6850015211439002, "grad_norm": 1.018214464187622, "learning_rate": 1.5114991336607165e-05, "loss": 0.4821, "step": 5629 }, { "epoch": 0.6851232126559172, "grad_norm": 1.6293895244598389, "learning_rate": 1.511331503957905e-05, "loss": 0.4425, "step": 5630 }, { "epoch": 0.6852449041679343, "grad_norm": 3.308887481689453, "learning_rate": 1.5111638547978716e-05, "loss": 0.5099, "step": 5631 }, { "epoch": 0.6853665956799513, "grad_norm": 1.809523344039917, "learning_rate": 1.5109961861869953e-05, "loss": 0.3913, "step": 5632 }, { "epoch": 0.6854882871919684, "grad_norm": 0.7841582298278809, "learning_rate": 1.5108284981316565e-05, "loss": 0.4681, "step": 5633 }, { "epoch": 0.6856099787039854, "grad_norm": 1.096095085144043, "learning_rate": 1.5106607906382358e-05, "loss": 0.486, "step": 5634 }, { "epoch": 0.6857316702160025, "grad_norm": 0.6582747101783752, "learning_rate": 1.5104930637131153e-05, "loss": 0.4538, "step": 5635 }, { "epoch": 0.6858533617280195, "grad_norm": 0.7895265221595764, "learning_rate": 1.5103253173626768e-05, "loss": 0.4495, "step": 5636 }, { "epoch": 0.6859750532400365, "grad_norm": 1.1880378723144531, "learning_rate": 1.510157551593304e-05, "loss": 0.5013, "step": 5637 }, { "epoch": 0.6860967447520535, "grad_norm": 0.7382751703262329, "learning_rate": 1.5099897664113798e-05, "loss": 0.4878, "step": 5638 }, { "epoch": 0.6862184362640705, "grad_norm": 2.165334463119507, "learning_rate": 1.5098219618232901e-05, "loss": 0.4351, "step": 5639 }, { "epoch": 0.6863401277760877, "grad_norm": 2.846501350402832, "learning_rate": 1.509654137835419e-05, "loss": 0.4026, "step": 5640 }, { "epoch": 0.6864618192881047, "grad_norm": 0.986497700214386, "learning_rate": 1.5094862944541531e-05, "loss": 0.4494, "step": 5641 }, { "epoch": 0.6865835108001217, "grad_norm": 0.9017858505249023, "learning_rate": 1.509318431685879e-05, "loss": 0.45, "step": 5642 }, { "epoch": 0.6867052023121387, "grad_norm": 0.8901139497756958, "learning_rate": 1.509150549536985e-05, "loss": 0.4423, "step": 5643 }, { "epoch": 0.6868268938241557, "grad_norm": 2.297985553741455, "learning_rate": 1.5089826480138578e-05, "loss": 0.4732, "step": 5644 }, { "epoch": 0.6869485853361728, "grad_norm": 1.1578972339630127, "learning_rate": 1.508814727122888e-05, "loss": 0.3979, "step": 5645 }, { "epoch": 0.6870702768481899, "grad_norm": 2.9678287506103516, "learning_rate": 1.5086467868704641e-05, "loss": 0.462, "step": 5646 }, { "epoch": 0.6871919683602069, "grad_norm": 1.9689537286758423, "learning_rate": 1.5084788272629776e-05, "loss": 0.4712, "step": 5647 }, { "epoch": 0.6873136598722239, "grad_norm": 3.804685354232788, "learning_rate": 1.5083108483068187e-05, "loss": 0.5089, "step": 5648 }, { "epoch": 0.687435351384241, "grad_norm": 3.8038132190704346, "learning_rate": 1.5081428500083803e-05, "loss": 0.5358, "step": 5649 }, { "epoch": 0.687557042896258, "grad_norm": 0.9257991909980774, "learning_rate": 1.5079748323740546e-05, "loss": 0.4731, "step": 5650 }, { "epoch": 0.687678734408275, "grad_norm": 1.4556457996368408, "learning_rate": 1.507806795410235e-05, "loss": 0.5403, "step": 5651 }, { "epoch": 0.6878004259202921, "grad_norm": 3.0026285648345947, "learning_rate": 1.5076387391233156e-05, "loss": 0.4319, "step": 5652 }, { "epoch": 0.6879221174323091, "grad_norm": 2.762108564376831, "learning_rate": 1.5074706635196917e-05, "loss": 0.4569, "step": 5653 }, { "epoch": 0.6880438089443262, "grad_norm": 3.7026870250701904, "learning_rate": 1.5073025686057584e-05, "loss": 0.4529, "step": 5654 }, { "epoch": 0.6881655004563432, "grad_norm": 0.7228449583053589, "learning_rate": 1.5071344543879127e-05, "loss": 0.5242, "step": 5655 }, { "epoch": 0.6882871919683602, "grad_norm": 5.228067398071289, "learning_rate": 1.506966320872551e-05, "loss": 0.4504, "step": 5656 }, { "epoch": 0.6884088834803772, "grad_norm": 1.1155802011489868, "learning_rate": 1.5067981680660713e-05, "loss": 0.5388, "step": 5657 }, { "epoch": 0.6885305749923942, "grad_norm": 3.316756248474121, "learning_rate": 1.5066299959748727e-05, "loss": 0.412, "step": 5658 }, { "epoch": 0.6886522665044114, "grad_norm": 3.818772077560425, "learning_rate": 1.5064618046053536e-05, "loss": 0.3747, "step": 5659 }, { "epoch": 0.6887739580164284, "grad_norm": 1.189836859703064, "learning_rate": 1.506293593963915e-05, "loss": 0.4608, "step": 5660 }, { "epoch": 0.6888956495284454, "grad_norm": 1.1319499015808105, "learning_rate": 1.5061253640569567e-05, "loss": 0.4224, "step": 5661 }, { "epoch": 0.6890173410404624, "grad_norm": 1.5180237293243408, "learning_rate": 1.5059571148908808e-05, "loss": 0.4526, "step": 5662 }, { "epoch": 0.6891390325524794, "grad_norm": 1.438735842704773, "learning_rate": 1.5057888464720899e-05, "loss": 0.4363, "step": 5663 }, { "epoch": 0.6892607240644965, "grad_norm": 0.6268392205238342, "learning_rate": 1.5056205588069856e-05, "loss": 0.4438, "step": 5664 }, { "epoch": 0.6893824155765136, "grad_norm": 1.0462889671325684, "learning_rate": 1.5054522519019731e-05, "loss": 0.4679, "step": 5665 }, { "epoch": 0.6895041070885306, "grad_norm": 4.216460704803467, "learning_rate": 1.5052839257634562e-05, "loss": 0.5472, "step": 5666 }, { "epoch": 0.6896257986005476, "grad_norm": 1.432166337966919, "learning_rate": 1.5051155803978397e-05, "loss": 0.4426, "step": 5667 }, { "epoch": 0.6897474901125646, "grad_norm": 1.6374738216400146, "learning_rate": 1.5049472158115302e-05, "loss": 0.4699, "step": 5668 }, { "epoch": 0.6898691816245817, "grad_norm": 4.411357402801514, "learning_rate": 1.5047788320109335e-05, "loss": 0.4548, "step": 5669 }, { "epoch": 0.6899908731365987, "grad_norm": 1.863133430480957, "learning_rate": 1.5046104290024577e-05, "loss": 0.4958, "step": 5670 }, { "epoch": 0.6901125646486158, "grad_norm": 1.1183873414993286, "learning_rate": 1.5044420067925104e-05, "loss": 0.4668, "step": 5671 }, { "epoch": 0.6902342561606328, "grad_norm": 0.8334675431251526, "learning_rate": 1.5042735653875008e-05, "loss": 0.4572, "step": 5672 }, { "epoch": 0.6903559476726499, "grad_norm": 1.472253441810608, "learning_rate": 1.5041051047938387e-05, "loss": 0.4418, "step": 5673 }, { "epoch": 0.6904776391846669, "grad_norm": 1.1135720014572144, "learning_rate": 1.503936625017933e-05, "loss": 0.4828, "step": 5674 }, { "epoch": 0.6905993306966839, "grad_norm": 1.8146601915359497, "learning_rate": 1.5037681260661963e-05, "loss": 0.4016, "step": 5675 }, { "epoch": 0.6907210222087009, "grad_norm": 2.1689348220825195, "learning_rate": 1.5035996079450392e-05, "loss": 0.4691, "step": 5676 }, { "epoch": 0.6908427137207179, "grad_norm": 2.723686456680298, "learning_rate": 1.503431070660875e-05, "loss": 0.4806, "step": 5677 }, { "epoch": 0.6909644052327351, "grad_norm": 5.7829437255859375, "learning_rate": 1.5032625142201163e-05, "loss": 0.5798, "step": 5678 }, { "epoch": 0.6910860967447521, "grad_norm": 0.8585506081581116, "learning_rate": 1.5030939386291775e-05, "loss": 0.455, "step": 5679 }, { "epoch": 0.6912077882567691, "grad_norm": 0.9236282110214233, "learning_rate": 1.5029253438944725e-05, "loss": 0.4338, "step": 5680 }, { "epoch": 0.6913294797687861, "grad_norm": 1.8230072259902954, "learning_rate": 1.5027567300224175e-05, "loss": 0.4613, "step": 5681 }, { "epoch": 0.6914511712808031, "grad_norm": 0.6471611261367798, "learning_rate": 1.5025880970194282e-05, "loss": 0.4345, "step": 5682 }, { "epoch": 0.6915728627928202, "grad_norm": 1.7413103580474854, "learning_rate": 1.5024194448919217e-05, "loss": 0.4108, "step": 5683 }, { "epoch": 0.6916945543048373, "grad_norm": 0.8616017699241638, "learning_rate": 1.502250773646315e-05, "loss": 0.4738, "step": 5684 }, { "epoch": 0.6918162458168543, "grad_norm": 0.7052682638168335, "learning_rate": 1.502082083289027e-05, "loss": 0.4773, "step": 5685 }, { "epoch": 0.6919379373288713, "grad_norm": 0.9195804595947266, "learning_rate": 1.5019133738264764e-05, "loss": 0.453, "step": 5686 }, { "epoch": 0.6920596288408883, "grad_norm": 3.081632614135742, "learning_rate": 1.501744645265083e-05, "loss": 0.492, "step": 5687 }, { "epoch": 0.6921813203529054, "grad_norm": 2.3086626529693604, "learning_rate": 1.5015758976112675e-05, "loss": 0.5479, "step": 5688 }, { "epoch": 0.6923030118649224, "grad_norm": 2.546926259994507, "learning_rate": 1.5014071308714508e-05, "loss": 0.4258, "step": 5689 }, { "epoch": 0.6924247033769395, "grad_norm": 1.9054832458496094, "learning_rate": 1.5012383450520549e-05, "loss": 0.4514, "step": 5690 }, { "epoch": 0.6925463948889565, "grad_norm": 1.3438888788223267, "learning_rate": 1.5010695401595024e-05, "loss": 0.4868, "step": 5691 }, { "epoch": 0.6926680864009735, "grad_norm": 0.9181016683578491, "learning_rate": 1.5009007162002167e-05, "loss": 0.4442, "step": 5692 }, { "epoch": 0.6927897779129906, "grad_norm": 1.5741522312164307, "learning_rate": 1.500731873180622e-05, "loss": 0.4839, "step": 5693 }, { "epoch": 0.6929114694250076, "grad_norm": 1.6062912940979004, "learning_rate": 1.500563011107143e-05, "loss": 0.473, "step": 5694 }, { "epoch": 0.6930331609370246, "grad_norm": 2.201292037963867, "learning_rate": 1.5003941299862055e-05, "loss": 0.4974, "step": 5695 }, { "epoch": 0.6931548524490416, "grad_norm": 0.7437626123428345, "learning_rate": 1.5002252298242356e-05, "loss": 0.4317, "step": 5696 }, { "epoch": 0.6932765439610588, "grad_norm": 0.65212082862854, "learning_rate": 1.50005631062766e-05, "loss": 0.4424, "step": 5697 }, { "epoch": 0.6933982354730758, "grad_norm": 2.1397335529327393, "learning_rate": 1.4998873724029068e-05, "loss": 0.4782, "step": 5698 }, { "epoch": 0.6935199269850928, "grad_norm": 0.9799501299858093, "learning_rate": 1.4997184151564046e-05, "loss": 0.4234, "step": 5699 }, { "epoch": 0.6936416184971098, "grad_norm": 1.6238120794296265, "learning_rate": 1.4995494388945821e-05, "loss": 0.4468, "step": 5700 }, { "epoch": 0.6937633100091268, "grad_norm": 1.225841999053955, "learning_rate": 1.4993804436238696e-05, "loss": 0.4318, "step": 5701 }, { "epoch": 0.6938850015211439, "grad_norm": 0.6529251337051392, "learning_rate": 1.4992114293506976e-05, "loss": 0.419, "step": 5702 }, { "epoch": 0.694006693033161, "grad_norm": 1.2169039249420166, "learning_rate": 1.4990423960814972e-05, "loss": 0.3727, "step": 5703 }, { "epoch": 0.694128384545178, "grad_norm": 0.755707859992981, "learning_rate": 1.4988733438227008e-05, "loss": 0.5071, "step": 5704 }, { "epoch": 0.694250076057195, "grad_norm": 1.2898119688034058, "learning_rate": 1.498704272580741e-05, "loss": 0.5125, "step": 5705 }, { "epoch": 0.694371767569212, "grad_norm": 1.5574294328689575, "learning_rate": 1.4985351823620516e-05, "loss": 0.5136, "step": 5706 }, { "epoch": 0.6944934590812291, "grad_norm": 1.4448221921920776, "learning_rate": 1.4983660731730663e-05, "loss": 0.4437, "step": 5707 }, { "epoch": 0.6946151505932461, "grad_norm": 2.195535182952881, "learning_rate": 1.4981969450202204e-05, "loss": 0.3932, "step": 5708 }, { "epoch": 0.6947368421052632, "grad_norm": 0.743669331073761, "learning_rate": 1.4980277979099499e-05, "loss": 0.4938, "step": 5709 }, { "epoch": 0.6948585336172802, "grad_norm": 2.3098220825195312, "learning_rate": 1.4978586318486903e-05, "loss": 0.5058, "step": 5710 }, { "epoch": 0.6949802251292972, "grad_norm": 1.493943452835083, "learning_rate": 1.4976894468428796e-05, "loss": 0.512, "step": 5711 }, { "epoch": 0.6951019166413143, "grad_norm": 2.427304744720459, "learning_rate": 1.497520242898955e-05, "loss": 0.5396, "step": 5712 }, { "epoch": 0.6952236081533313, "grad_norm": 0.9201792478561401, "learning_rate": 1.4973510200233556e-05, "loss": 0.487, "step": 5713 }, { "epoch": 0.6953452996653483, "grad_norm": 2.018176317214966, "learning_rate": 1.4971817782225202e-05, "loss": 0.4906, "step": 5714 }, { "epoch": 0.6954669911773653, "grad_norm": 2.691685199737549, "learning_rate": 1.4970125175028894e-05, "loss": 0.4541, "step": 5715 }, { "epoch": 0.6955886826893825, "grad_norm": 1.2946938276290894, "learning_rate": 1.4968432378709032e-05, "loss": 0.4812, "step": 5716 }, { "epoch": 0.6957103742013995, "grad_norm": 0.8203717470169067, "learning_rate": 1.4966739393330034e-05, "loss": 0.4848, "step": 5717 }, { "epoch": 0.6958320657134165, "grad_norm": 1.0984758138656616, "learning_rate": 1.4965046218956324e-05, "loss": 0.4805, "step": 5718 }, { "epoch": 0.6959537572254335, "grad_norm": 0.6645656228065491, "learning_rate": 1.4963352855652326e-05, "loss": 0.4598, "step": 5719 }, { "epoch": 0.6960754487374505, "grad_norm": 1.2504998445510864, "learning_rate": 1.4961659303482478e-05, "loss": 0.4238, "step": 5720 }, { "epoch": 0.6961971402494675, "grad_norm": 1.5913711786270142, "learning_rate": 1.4959965562511224e-05, "loss": 0.513, "step": 5721 }, { "epoch": 0.6963188317614847, "grad_norm": 1.1032487154006958, "learning_rate": 1.4958271632803017e-05, "loss": 0.4615, "step": 5722 }, { "epoch": 0.6964405232735017, "grad_norm": 1.172655701637268, "learning_rate": 1.4956577514422308e-05, "loss": 0.4485, "step": 5723 }, { "epoch": 0.6965622147855187, "grad_norm": 1.7108309268951416, "learning_rate": 1.4954883207433566e-05, "loss": 0.429, "step": 5724 }, { "epoch": 0.6966839062975357, "grad_norm": 1.714949369430542, "learning_rate": 1.4953188711901261e-05, "loss": 0.403, "step": 5725 }, { "epoch": 0.6968055978095528, "grad_norm": 1.5385502576828003, "learning_rate": 1.4951494027889872e-05, "loss": 0.4724, "step": 5726 }, { "epoch": 0.6969272893215698, "grad_norm": 0.7456535696983337, "learning_rate": 1.494979915546389e-05, "loss": 0.4401, "step": 5727 }, { "epoch": 0.6970489808335869, "grad_norm": 0.6776686906814575, "learning_rate": 1.4948104094687802e-05, "loss": 0.4696, "step": 5728 }, { "epoch": 0.6971706723456039, "grad_norm": 1.2693860530853271, "learning_rate": 1.494640884562611e-05, "loss": 0.426, "step": 5729 }, { "epoch": 0.697292363857621, "grad_norm": 0.7595444917678833, "learning_rate": 1.4944713408343325e-05, "loss": 0.395, "step": 5730 }, { "epoch": 0.697414055369638, "grad_norm": 1.4870376586914062, "learning_rate": 1.494301778290396e-05, "loss": 0.4203, "step": 5731 }, { "epoch": 0.697535746881655, "grad_norm": 2.3356668949127197, "learning_rate": 1.4941321969372536e-05, "loss": 0.4833, "step": 5732 }, { "epoch": 0.697657438393672, "grad_norm": 0.7620767951011658, "learning_rate": 1.4939625967813581e-05, "loss": 0.466, "step": 5733 }, { "epoch": 0.6977791299056891, "grad_norm": 3.1272616386413574, "learning_rate": 1.4937929778291638e-05, "loss": 0.5087, "step": 5734 }, { "epoch": 0.6979008214177062, "grad_norm": 3.444051742553711, "learning_rate": 1.4936233400871242e-05, "loss": 0.5168, "step": 5735 }, { "epoch": 0.6980225129297232, "grad_norm": 1.9624532461166382, "learning_rate": 1.4934536835616951e-05, "loss": 0.4502, "step": 5736 }, { "epoch": 0.6981442044417402, "grad_norm": 2.0625057220458984, "learning_rate": 1.4932840082593317e-05, "loss": 0.4729, "step": 5737 }, { "epoch": 0.6982658959537572, "grad_norm": 1.064643144607544, "learning_rate": 1.4931143141864908e-05, "loss": 0.4574, "step": 5738 }, { "epoch": 0.6983875874657742, "grad_norm": 1.589437484741211, "learning_rate": 1.4929446013496297e-05, "loss": 0.4285, "step": 5739 }, { "epoch": 0.6985092789777912, "grad_norm": 0.9425216913223267, "learning_rate": 1.4927748697552058e-05, "loss": 0.4235, "step": 5740 }, { "epoch": 0.6986309704898084, "grad_norm": 0.6291329860687256, "learning_rate": 1.4926051194096785e-05, "loss": 0.4441, "step": 5741 }, { "epoch": 0.6987526620018254, "grad_norm": 1.7625223398208618, "learning_rate": 1.4924353503195068e-05, "loss": 0.4293, "step": 5742 }, { "epoch": 0.6988743535138424, "grad_norm": 1.1574428081512451, "learning_rate": 1.4922655624911503e-05, "loss": 0.4465, "step": 5743 }, { "epoch": 0.6989960450258594, "grad_norm": 2.1988799571990967, "learning_rate": 1.4920957559310704e-05, "loss": 0.5103, "step": 5744 }, { "epoch": 0.6991177365378765, "grad_norm": 0.7254383563995361, "learning_rate": 1.4919259306457289e-05, "loss": 0.4544, "step": 5745 }, { "epoch": 0.6992394280498935, "grad_norm": 1.0539625883102417, "learning_rate": 1.4917560866415868e-05, "loss": 0.4843, "step": 5746 }, { "epoch": 0.6993611195619106, "grad_norm": 1.2837316989898682, "learning_rate": 1.4915862239251081e-05, "loss": 0.4656, "step": 5747 }, { "epoch": 0.6994828110739276, "grad_norm": 0.7310906052589417, "learning_rate": 1.4914163425027559e-05, "loss": 0.4958, "step": 5748 }, { "epoch": 0.6996045025859446, "grad_norm": 0.942044734954834, "learning_rate": 1.491246442380995e-05, "loss": 0.4321, "step": 5749 }, { "epoch": 0.6997261940979617, "grad_norm": 2.467073678970337, "learning_rate": 1.4910765235662898e-05, "loss": 0.4225, "step": 5750 }, { "epoch": 0.6998478856099787, "grad_norm": 2.887951374053955, "learning_rate": 1.4909065860651064e-05, "loss": 0.4274, "step": 5751 }, { "epoch": 0.6999695771219957, "grad_norm": 0.6570051908493042, "learning_rate": 1.4907366298839115e-05, "loss": 0.4795, "step": 5752 }, { "epoch": 0.7000912686340128, "grad_norm": 0.7301941514015198, "learning_rate": 1.4905666550291723e-05, "loss": 0.4611, "step": 5753 }, { "epoch": 0.7002129601460299, "grad_norm": 2.472259521484375, "learning_rate": 1.4903966615073558e-05, "loss": 0.4582, "step": 5754 }, { "epoch": 0.7003346516580469, "grad_norm": 2.261049270629883, "learning_rate": 1.4902266493249315e-05, "loss": 0.5226, "step": 5755 }, { "epoch": 0.7004563431700639, "grad_norm": 1.559216856956482, "learning_rate": 1.4900566184883687e-05, "loss": 0.4821, "step": 5756 }, { "epoch": 0.7005780346820809, "grad_norm": 3.4437015056610107, "learning_rate": 1.489886569004137e-05, "loss": 0.3834, "step": 5757 }, { "epoch": 0.7006997261940979, "grad_norm": 2.4797775745391846, "learning_rate": 1.4897165008787075e-05, "loss": 0.451, "step": 5758 }, { "epoch": 0.700821417706115, "grad_norm": 1.560303807258606, "learning_rate": 1.4895464141185513e-05, "loss": 0.4415, "step": 5759 }, { "epoch": 0.7009431092181321, "grad_norm": 0.6299927234649658, "learning_rate": 1.4893763087301409e-05, "loss": 0.4945, "step": 5760 }, { "epoch": 0.7010648007301491, "grad_norm": 0.8315796256065369, "learning_rate": 1.489206184719949e-05, "loss": 0.4082, "step": 5761 }, { "epoch": 0.7011864922421661, "grad_norm": 2.6863346099853516, "learning_rate": 1.489036042094449e-05, "loss": 0.5307, "step": 5762 }, { "epoch": 0.7013081837541831, "grad_norm": 1.8284863233566284, "learning_rate": 1.4888658808601154e-05, "loss": 0.3859, "step": 5763 }, { "epoch": 0.7014298752662002, "grad_norm": 0.75916588306427, "learning_rate": 1.4886957010234234e-05, "loss": 0.4393, "step": 5764 }, { "epoch": 0.7015515667782172, "grad_norm": 2.136780261993408, "learning_rate": 1.4885255025908483e-05, "loss": 0.4948, "step": 5765 }, { "epoch": 0.7016732582902343, "grad_norm": 2.5012426376342773, "learning_rate": 1.4883552855688663e-05, "loss": 0.4917, "step": 5766 }, { "epoch": 0.7017949498022513, "grad_norm": 2.0736212730407715, "learning_rate": 1.488185049963955e-05, "loss": 0.505, "step": 5767 }, { "epoch": 0.7019166413142683, "grad_norm": 1.3989472389221191, "learning_rate": 1.4880147957825923e-05, "loss": 0.4589, "step": 5768 }, { "epoch": 0.7020383328262854, "grad_norm": 2.407496452331543, "learning_rate": 1.4878445230312564e-05, "loss": 0.4992, "step": 5769 }, { "epoch": 0.7021600243383024, "grad_norm": 0.6087340116500854, "learning_rate": 1.4876742317164266e-05, "loss": 0.4994, "step": 5770 }, { "epoch": 0.7022817158503194, "grad_norm": 3.714498996734619, "learning_rate": 1.4875039218445829e-05, "loss": 0.4473, "step": 5771 }, { "epoch": 0.7024034073623365, "grad_norm": 1.873753309249878, "learning_rate": 1.4873335934222058e-05, "loss": 0.4541, "step": 5772 }, { "epoch": 0.7025250988743535, "grad_norm": 0.7393276691436768, "learning_rate": 1.4871632464557765e-05, "loss": 0.4965, "step": 5773 }, { "epoch": 0.7026467903863706, "grad_norm": 4.063852787017822, "learning_rate": 1.4869928809517775e-05, "loss": 0.4466, "step": 5774 }, { "epoch": 0.7027684818983876, "grad_norm": 1.7568515539169312, "learning_rate": 1.4868224969166916e-05, "loss": 0.4829, "step": 5775 }, { "epoch": 0.7028901734104046, "grad_norm": 0.6663513779640198, "learning_rate": 1.486652094357002e-05, "loss": 0.4925, "step": 5776 }, { "epoch": 0.7030118649224216, "grad_norm": 1.2031806707382202, "learning_rate": 1.4864816732791924e-05, "loss": 0.4852, "step": 5777 }, { "epoch": 0.7031335564344386, "grad_norm": 3.4352993965148926, "learning_rate": 1.4863112336897488e-05, "loss": 0.4169, "step": 5778 }, { "epoch": 0.7032552479464558, "grad_norm": 0.6917054057121277, "learning_rate": 1.4861407755951558e-05, "loss": 0.5017, "step": 5779 }, { "epoch": 0.7033769394584728, "grad_norm": 0.9249730110168457, "learning_rate": 1.4859702990019e-05, "loss": 0.4599, "step": 5780 }, { "epoch": 0.7034986309704898, "grad_norm": 1.2479867935180664, "learning_rate": 1.4857998039164688e-05, "loss": 0.5164, "step": 5781 }, { "epoch": 0.7036203224825068, "grad_norm": 2.1523680686950684, "learning_rate": 1.4856292903453488e-05, "loss": 0.4649, "step": 5782 }, { "epoch": 0.7037420139945239, "grad_norm": 1.4893996715545654, "learning_rate": 1.4854587582950297e-05, "loss": 0.4061, "step": 5783 }, { "epoch": 0.7038637055065409, "grad_norm": 1.368286371231079, "learning_rate": 1.4852882077719998e-05, "loss": 0.4246, "step": 5784 }, { "epoch": 0.703985397018558, "grad_norm": 0.6861351728439331, "learning_rate": 1.4851176387827491e-05, "loss": 0.452, "step": 5785 }, { "epoch": 0.704107088530575, "grad_norm": 2.3142786026000977, "learning_rate": 1.484947051333768e-05, "loss": 0.4771, "step": 5786 }, { "epoch": 0.704228780042592, "grad_norm": 1.445865273475647, "learning_rate": 1.4847764454315483e-05, "loss": 0.3717, "step": 5787 }, { "epoch": 0.7043504715546091, "grad_norm": 0.9301458597183228, "learning_rate": 1.484605821082581e-05, "loss": 0.4165, "step": 5788 }, { "epoch": 0.7044721630666261, "grad_norm": 1.196312427520752, "learning_rate": 1.484435178293359e-05, "loss": 0.4422, "step": 5789 }, { "epoch": 0.7045938545786431, "grad_norm": 0.9635326266288757, "learning_rate": 1.484264517070376e-05, "loss": 0.451, "step": 5790 }, { "epoch": 0.7047155460906602, "grad_norm": 2.4443612098693848, "learning_rate": 1.4840938374201258e-05, "loss": 0.4943, "step": 5791 }, { "epoch": 0.7048372376026772, "grad_norm": 2.8149466514587402, "learning_rate": 1.4839231393491028e-05, "loss": 0.444, "step": 5792 }, { "epoch": 0.7049589291146943, "grad_norm": 0.6951661109924316, "learning_rate": 1.483752422863803e-05, "loss": 0.4546, "step": 5793 }, { "epoch": 0.7050806206267113, "grad_norm": 1.6646931171417236, "learning_rate": 1.483581687970722e-05, "loss": 0.4362, "step": 5794 }, { "epoch": 0.7052023121387283, "grad_norm": 2.798440933227539, "learning_rate": 1.4834109346763568e-05, "loss": 0.4205, "step": 5795 }, { "epoch": 0.7053240036507453, "grad_norm": 0.9285602569580078, "learning_rate": 1.4832401629872048e-05, "loss": 0.4837, "step": 5796 }, { "epoch": 0.7054456951627623, "grad_norm": 3.0468952655792236, "learning_rate": 1.4830693729097646e-05, "loss": 0.4108, "step": 5797 }, { "epoch": 0.7055673866747795, "grad_norm": 1.4386199712753296, "learning_rate": 1.4828985644505349e-05, "loss": 0.4557, "step": 5798 }, { "epoch": 0.7056890781867965, "grad_norm": 1.4633842706680298, "learning_rate": 1.4827277376160152e-05, "loss": 0.4327, "step": 5799 }, { "epoch": 0.7058107696988135, "grad_norm": 4.524606227874756, "learning_rate": 1.4825568924127059e-05, "loss": 0.5267, "step": 5800 }, { "epoch": 0.7059324612108305, "grad_norm": 1.3131097555160522, "learning_rate": 1.4823860288471076e-05, "loss": 0.4354, "step": 5801 }, { "epoch": 0.7060541527228476, "grad_norm": 0.942453920841217, "learning_rate": 1.4822151469257228e-05, "loss": 0.4598, "step": 5802 }, { "epoch": 0.7061758442348646, "grad_norm": 1.812514066696167, "learning_rate": 1.4820442466550536e-05, "loss": 0.4776, "step": 5803 }, { "epoch": 0.7062975357468817, "grad_norm": 0.9279342889785767, "learning_rate": 1.4818733280416032e-05, "loss": 0.4733, "step": 5804 }, { "epoch": 0.7064192272588987, "grad_norm": 0.8237505555152893, "learning_rate": 1.4817023910918749e-05, "loss": 0.4654, "step": 5805 }, { "epoch": 0.7065409187709157, "grad_norm": 2.42615008354187, "learning_rate": 1.4815314358123736e-05, "loss": 0.4265, "step": 5806 }, { "epoch": 0.7066626102829328, "grad_norm": 1.8335754871368408, "learning_rate": 1.4813604622096044e-05, "loss": 0.4393, "step": 5807 }, { "epoch": 0.7067843017949498, "grad_norm": 3.235401153564453, "learning_rate": 1.4811894702900733e-05, "loss": 0.4272, "step": 5808 }, { "epoch": 0.7069059933069668, "grad_norm": 0.8789718747138977, "learning_rate": 1.481018460060287e-05, "loss": 0.4828, "step": 5809 }, { "epoch": 0.7070276848189839, "grad_norm": 1.8483392000198364, "learning_rate": 1.4808474315267528e-05, "loss": 0.4666, "step": 5810 }, { "epoch": 0.707149376331001, "grad_norm": 2.138598918914795, "learning_rate": 1.480676384695978e-05, "loss": 0.4116, "step": 5811 }, { "epoch": 0.707271067843018, "grad_norm": 1.3106712102890015, "learning_rate": 1.4805053195744721e-05, "loss": 0.4412, "step": 5812 }, { "epoch": 0.707392759355035, "grad_norm": 3.930408477783203, "learning_rate": 1.4803342361687444e-05, "loss": 0.5671, "step": 5813 }, { "epoch": 0.707514450867052, "grad_norm": 1.1056636571884155, "learning_rate": 1.4801631344853043e-05, "loss": 0.4313, "step": 5814 }, { "epoch": 0.707636142379069, "grad_norm": 1.9966340065002441, "learning_rate": 1.4799920145306632e-05, "loss": 0.4761, "step": 5815 }, { "epoch": 0.7077578338910862, "grad_norm": 0.7812060713768005, "learning_rate": 1.4798208763113326e-05, "loss": 0.4313, "step": 5816 }, { "epoch": 0.7078795254031032, "grad_norm": 0.795521080493927, "learning_rate": 1.4796497198338245e-05, "loss": 0.3767, "step": 5817 }, { "epoch": 0.7080012169151202, "grad_norm": 0.5660823583602905, "learning_rate": 1.4794785451046517e-05, "loss": 0.4524, "step": 5818 }, { "epoch": 0.7081229084271372, "grad_norm": 1.1923648118972778, "learning_rate": 1.4793073521303277e-05, "loss": 0.4397, "step": 5819 }, { "epoch": 0.7082445999391542, "grad_norm": 1.0181834697723389, "learning_rate": 1.4791361409173668e-05, "loss": 0.4499, "step": 5820 }, { "epoch": 0.7083662914511712, "grad_norm": 0.6459413766860962, "learning_rate": 1.478964911472284e-05, "loss": 0.4158, "step": 5821 }, { "epoch": 0.7084879829631883, "grad_norm": 0.7527841925621033, "learning_rate": 1.4787936638015951e-05, "loss": 0.4379, "step": 5822 }, { "epoch": 0.7086096744752054, "grad_norm": 1.0385246276855469, "learning_rate": 1.4786223979118161e-05, "loss": 0.4924, "step": 5823 }, { "epoch": 0.7087313659872224, "grad_norm": 2.1502439975738525, "learning_rate": 1.4784511138094642e-05, "loss": 0.4311, "step": 5824 }, { "epoch": 0.7088530574992394, "grad_norm": 3.1310477256774902, "learning_rate": 1.478279811501057e-05, "loss": 0.5512, "step": 5825 }, { "epoch": 0.7089747490112565, "grad_norm": 2.4541409015655518, "learning_rate": 1.478108490993113e-05, "loss": 0.5089, "step": 5826 }, { "epoch": 0.7090964405232735, "grad_norm": 2.9800031185150146, "learning_rate": 1.4779371522921513e-05, "loss": 0.4152, "step": 5827 }, { "epoch": 0.7092181320352905, "grad_norm": 3.3575425148010254, "learning_rate": 1.477765795404692e-05, "loss": 0.5093, "step": 5828 }, { "epoch": 0.7093398235473076, "grad_norm": 4.567439556121826, "learning_rate": 1.4775944203372547e-05, "loss": 0.4041, "step": 5829 }, { "epoch": 0.7094615150593246, "grad_norm": 1.471593976020813, "learning_rate": 1.4774230270963614e-05, "loss": 0.4673, "step": 5830 }, { "epoch": 0.7095832065713417, "grad_norm": 1.7912744283676147, "learning_rate": 1.4772516156885337e-05, "loss": 0.5279, "step": 5831 }, { "epoch": 0.7097048980833587, "grad_norm": 0.7484567165374756, "learning_rate": 1.4770801861202943e-05, "loss": 0.4824, "step": 5832 }, { "epoch": 0.7098265895953757, "grad_norm": 1.3792976140975952, "learning_rate": 1.4769087383981663e-05, "loss": 0.4234, "step": 5833 }, { "epoch": 0.7099482811073927, "grad_norm": 2.3992929458618164, "learning_rate": 1.4767372725286735e-05, "loss": 0.4106, "step": 5834 }, { "epoch": 0.7100699726194099, "grad_norm": 0.61894291639328, "learning_rate": 1.4765657885183407e-05, "loss": 0.3979, "step": 5835 }, { "epoch": 0.7101916641314269, "grad_norm": 2.3444623947143555, "learning_rate": 1.4763942863736933e-05, "loss": 0.4568, "step": 5836 }, { "epoch": 0.7103133556434439, "grad_norm": 3.8838231563568115, "learning_rate": 1.4762227661012572e-05, "loss": 0.5065, "step": 5837 }, { "epoch": 0.7104350471554609, "grad_norm": 4.756305694580078, "learning_rate": 1.476051227707559e-05, "loss": 0.5242, "step": 5838 }, { "epoch": 0.7105567386674779, "grad_norm": 5.430038928985596, "learning_rate": 1.4758796711991266e-05, "loss": 0.5217, "step": 5839 }, { "epoch": 0.710678430179495, "grad_norm": 3.640932321548462, "learning_rate": 1.4757080965824875e-05, "loss": 0.5106, "step": 5840 }, { "epoch": 0.710800121691512, "grad_norm": 0.9580857753753662, "learning_rate": 1.4755365038641704e-05, "loss": 0.4355, "step": 5841 }, { "epoch": 0.7109218132035291, "grad_norm": 2.5009093284606934, "learning_rate": 1.475364893050705e-05, "loss": 0.4956, "step": 5842 }, { "epoch": 0.7110435047155461, "grad_norm": 2.2408010959625244, "learning_rate": 1.4751932641486218e-05, "loss": 0.5174, "step": 5843 }, { "epoch": 0.7111651962275631, "grad_norm": 0.7956969141960144, "learning_rate": 1.4750216171644512e-05, "loss": 0.4878, "step": 5844 }, { "epoch": 0.7112868877395802, "grad_norm": 1.8937441110610962, "learning_rate": 1.4748499521047249e-05, "loss": 0.5596, "step": 5845 }, { "epoch": 0.7114085792515972, "grad_norm": 7.486874580383301, "learning_rate": 1.4746782689759749e-05, "loss": 0.4819, "step": 5846 }, { "epoch": 0.7115302707636142, "grad_norm": 3.9388303756713867, "learning_rate": 1.4745065677847343e-05, "loss": 0.5238, "step": 5847 }, { "epoch": 0.7116519622756313, "grad_norm": 8.442525863647461, "learning_rate": 1.4743348485375366e-05, "loss": 0.5231, "step": 5848 }, { "epoch": 0.7117736537876483, "grad_norm": 7.823458671569824, "learning_rate": 1.474163111240916e-05, "loss": 0.518, "step": 5849 }, { "epoch": 0.7118953452996654, "grad_norm": 6.702693462371826, "learning_rate": 1.4739913559014076e-05, "loss": 0.4794, "step": 5850 }, { "epoch": 0.7120170368116824, "grad_norm": 5.421233654022217, "learning_rate": 1.4738195825255473e-05, "loss": 0.465, "step": 5851 }, { "epoch": 0.7121387283236994, "grad_norm": 5.18204402923584, "learning_rate": 1.4736477911198707e-05, "loss": 0.5068, "step": 5852 }, { "epoch": 0.7122604198357164, "grad_norm": 4.880541801452637, "learning_rate": 1.4734759816909155e-05, "loss": 0.4628, "step": 5853 }, { "epoch": 0.7123821113477335, "grad_norm": 4.086813449859619, "learning_rate": 1.4733041542452191e-05, "loss": 0.4474, "step": 5854 }, { "epoch": 0.7125038028597506, "grad_norm": 2.828464984893799, "learning_rate": 1.4731323087893197e-05, "loss": 0.4365, "step": 5855 }, { "epoch": 0.7126254943717676, "grad_norm": 1.823262095451355, "learning_rate": 1.472960445329757e-05, "loss": 0.5002, "step": 5856 }, { "epoch": 0.7127471858837846, "grad_norm": 2.4425671100616455, "learning_rate": 1.47278856387307e-05, "loss": 0.4883, "step": 5857 }, { "epoch": 0.7128688773958016, "grad_norm": 2.2251341342926025, "learning_rate": 1.4726166644258e-05, "loss": 0.4824, "step": 5858 }, { "epoch": 0.7129905689078186, "grad_norm": 1.1528090238571167, "learning_rate": 1.4724447469944873e-05, "loss": 0.4208, "step": 5859 }, { "epoch": 0.7131122604198357, "grad_norm": 2.3191587924957275, "learning_rate": 1.4722728115856742e-05, "loss": 0.4846, "step": 5860 }, { "epoch": 0.7132339519318528, "grad_norm": 1.4297856092453003, "learning_rate": 1.4721008582059029e-05, "loss": 0.4338, "step": 5861 }, { "epoch": 0.7133556434438698, "grad_norm": 2.150423765182495, "learning_rate": 1.4719288868617172e-05, "loss": 0.4886, "step": 5862 }, { "epoch": 0.7134773349558868, "grad_norm": 3.3077571392059326, "learning_rate": 1.4717568975596601e-05, "loss": 0.5114, "step": 5863 }, { "epoch": 0.7135990264679039, "grad_norm": 0.746435821056366, "learning_rate": 1.4715848903062766e-05, "loss": 0.4851, "step": 5864 }, { "epoch": 0.7137207179799209, "grad_norm": 1.1853212118148804, "learning_rate": 1.4714128651081118e-05, "loss": 0.4575, "step": 5865 }, { "epoch": 0.7138424094919379, "grad_norm": 3.2545742988586426, "learning_rate": 1.471240821971712e-05, "loss": 0.4258, "step": 5866 }, { "epoch": 0.713964101003955, "grad_norm": 2.2967448234558105, "learning_rate": 1.4710687609036234e-05, "loss": 0.4682, "step": 5867 }, { "epoch": 0.714085792515972, "grad_norm": 2.816882371902466, "learning_rate": 1.4708966819103936e-05, "loss": 0.4652, "step": 5868 }, { "epoch": 0.7142074840279891, "grad_norm": 1.005064845085144, "learning_rate": 1.47072458499857e-05, "loss": 0.47, "step": 5869 }, { "epoch": 0.7143291755400061, "grad_norm": 2.8408045768737793, "learning_rate": 1.4705524701747018e-05, "loss": 0.4671, "step": 5870 }, { "epoch": 0.7144508670520231, "grad_norm": 0.9220614433288574, "learning_rate": 1.470380337445338e-05, "loss": 0.4954, "step": 5871 }, { "epoch": 0.7145725585640401, "grad_norm": 0.7577651739120483, "learning_rate": 1.4702081868170286e-05, "loss": 0.4406, "step": 5872 }, { "epoch": 0.7146942500760572, "grad_norm": 2.3003900051116943, "learning_rate": 1.4700360182963246e-05, "loss": 0.4832, "step": 5873 }, { "epoch": 0.7148159415880743, "grad_norm": 3.662625312805176, "learning_rate": 1.4698638318897773e-05, "loss": 0.5059, "step": 5874 }, { "epoch": 0.7149376331000913, "grad_norm": 3.140347480773926, "learning_rate": 1.4696916276039386e-05, "loss": 0.4814, "step": 5875 }, { "epoch": 0.7150593246121083, "grad_norm": 2.9149513244628906, "learning_rate": 1.469519405445361e-05, "loss": 0.5218, "step": 5876 }, { "epoch": 0.7151810161241253, "grad_norm": 0.9762936234474182, "learning_rate": 1.4693471654205986e-05, "loss": 0.4256, "step": 5877 }, { "epoch": 0.7153027076361423, "grad_norm": 0.8658326268196106, "learning_rate": 1.4691749075362047e-05, "loss": 0.475, "step": 5878 }, { "epoch": 0.7154243991481594, "grad_norm": 2.1648755073547363, "learning_rate": 1.4690026317987345e-05, "loss": 0.5038, "step": 5879 }, { "epoch": 0.7155460906601765, "grad_norm": 0.8065931797027588, "learning_rate": 1.4688303382147433e-05, "loss": 0.4604, "step": 5880 }, { "epoch": 0.7156677821721935, "grad_norm": 1.0664634704589844, "learning_rate": 1.4686580267907874e-05, "loss": 0.4805, "step": 5881 }, { "epoch": 0.7157894736842105, "grad_norm": 1.085752248764038, "learning_rate": 1.4684856975334236e-05, "loss": 0.4813, "step": 5882 }, { "epoch": 0.7159111651962276, "grad_norm": 3.1130802631378174, "learning_rate": 1.468313350449209e-05, "loss": 0.4718, "step": 5883 }, { "epoch": 0.7160328567082446, "grad_norm": 1.487403154373169, "learning_rate": 1.4681409855447025e-05, "loss": 0.4896, "step": 5884 }, { "epoch": 0.7161545482202616, "grad_norm": 3.086695432662964, "learning_rate": 1.4679686028264623e-05, "loss": 0.4607, "step": 5885 }, { "epoch": 0.7162762397322787, "grad_norm": 0.9925442337989807, "learning_rate": 1.4677962023010482e-05, "loss": 0.492, "step": 5886 }, { "epoch": 0.7163979312442957, "grad_norm": 2.014378309249878, "learning_rate": 1.46762378397502e-05, "loss": 0.4685, "step": 5887 }, { "epoch": 0.7165196227563128, "grad_norm": 3.0304832458496094, "learning_rate": 1.4674513478549392e-05, "loss": 0.5593, "step": 5888 }, { "epoch": 0.7166413142683298, "grad_norm": 0.8478283882141113, "learning_rate": 1.467278893947367e-05, "loss": 0.4695, "step": 5889 }, { "epoch": 0.7167630057803468, "grad_norm": 1.388184905052185, "learning_rate": 1.4671064222588655e-05, "loss": 0.4229, "step": 5890 }, { "epoch": 0.7168846972923638, "grad_norm": 3.3339555263519287, "learning_rate": 1.466933932795998e-05, "loss": 0.5191, "step": 5891 }, { "epoch": 0.717006388804381, "grad_norm": 2.435145854949951, "learning_rate": 1.466761425565328e-05, "loss": 0.3819, "step": 5892 }, { "epoch": 0.717128080316398, "grad_norm": 2.5282704830169678, "learning_rate": 1.466588900573419e-05, "loss": 0.3678, "step": 5893 }, { "epoch": 0.717249771828415, "grad_norm": 4.2987799644470215, "learning_rate": 1.466416357826837e-05, "loss": 0.5137, "step": 5894 }, { "epoch": 0.717371463340432, "grad_norm": 2.4013993740081787, "learning_rate": 1.466243797332147e-05, "loss": 0.4572, "step": 5895 }, { "epoch": 0.717493154852449, "grad_norm": 1.6005690097808838, "learning_rate": 1.4660712190959156e-05, "loss": 0.4596, "step": 5896 }, { "epoch": 0.717614846364466, "grad_norm": 1.3890860080718994, "learning_rate": 1.4658986231247096e-05, "loss": 0.4286, "step": 5897 }, { "epoch": 0.7177365378764831, "grad_norm": 2.1254982948303223, "learning_rate": 1.4657260094250965e-05, "loss": 0.5241, "step": 5898 }, { "epoch": 0.7178582293885002, "grad_norm": 0.6179935336112976, "learning_rate": 1.4655533780036447e-05, "loss": 0.4128, "step": 5899 }, { "epoch": 0.7179799209005172, "grad_norm": 1.0235605239868164, "learning_rate": 1.4653807288669234e-05, "loss": 0.459, "step": 5900 }, { "epoch": 0.7181016124125342, "grad_norm": 1.7978922128677368, "learning_rate": 1.4652080620215019e-05, "loss": 0.4812, "step": 5901 }, { "epoch": 0.7182233039245512, "grad_norm": 0.7480745315551758, "learning_rate": 1.465035377473951e-05, "loss": 0.5349, "step": 5902 }, { "epoch": 0.7183449954365683, "grad_norm": 3.734602928161621, "learning_rate": 1.4648626752308411e-05, "loss": 0.4396, "step": 5903 }, { "epoch": 0.7184666869485853, "grad_norm": 1.0031163692474365, "learning_rate": 1.4646899552987444e-05, "loss": 0.5399, "step": 5904 }, { "epoch": 0.7185883784606024, "grad_norm": 1.9062103033065796, "learning_rate": 1.4645172176842328e-05, "loss": 0.5394, "step": 5905 }, { "epoch": 0.7187100699726194, "grad_norm": 1.9268572330474854, "learning_rate": 1.4643444623938795e-05, "loss": 0.4902, "step": 5906 }, { "epoch": 0.7188317614846365, "grad_norm": 4.705201625823975, "learning_rate": 1.4641716894342588e-05, "loss": 0.4799, "step": 5907 }, { "epoch": 0.7189534529966535, "grad_norm": 3.5488810539245605, "learning_rate": 1.4639988988119442e-05, "loss": 0.4139, "step": 5908 }, { "epoch": 0.7190751445086705, "grad_norm": 1.1344313621520996, "learning_rate": 1.4638260905335112e-05, "loss": 0.4726, "step": 5909 }, { "epoch": 0.7191968360206875, "grad_norm": 1.1475894451141357, "learning_rate": 1.4636532646055353e-05, "loss": 0.459, "step": 5910 }, { "epoch": 0.7193185275327046, "grad_norm": 2.2852580547332764, "learning_rate": 1.463480421034593e-05, "loss": 0.5121, "step": 5911 }, { "epoch": 0.7194402190447217, "grad_norm": 1.8809605836868286, "learning_rate": 1.463307559827261e-05, "loss": 0.4656, "step": 5912 }, { "epoch": 0.7195619105567387, "grad_norm": 2.616227865219116, "learning_rate": 1.4631346809901177e-05, "loss": 0.4341, "step": 5913 }, { "epoch": 0.7196836020687557, "grad_norm": 3.135422468185425, "learning_rate": 1.4629617845297412e-05, "loss": 0.5123, "step": 5914 }, { "epoch": 0.7198052935807727, "grad_norm": 0.6078523993492126, "learning_rate": 1.4627888704527106e-05, "loss": 0.4089, "step": 5915 }, { "epoch": 0.7199269850927897, "grad_norm": 1.3834606409072876, "learning_rate": 1.4626159387656053e-05, "loss": 0.4314, "step": 5916 }, { "epoch": 0.7200486766048069, "grad_norm": 1.716572880744934, "learning_rate": 1.4624429894750058e-05, "loss": 0.4798, "step": 5917 }, { "epoch": 0.7201703681168239, "grad_norm": 1.5965250730514526, "learning_rate": 1.4622700225874937e-05, "loss": 0.4619, "step": 5918 }, { "epoch": 0.7202920596288409, "grad_norm": 2.600443124771118, "learning_rate": 1.4620970381096502e-05, "loss": 0.4933, "step": 5919 }, { "epoch": 0.7204137511408579, "grad_norm": 1.8338534832000732, "learning_rate": 1.4619240360480582e-05, "loss": 0.4453, "step": 5920 }, { "epoch": 0.720535442652875, "grad_norm": 0.6676712036132812, "learning_rate": 1.4617510164093004e-05, "loss": 0.4644, "step": 5921 }, { "epoch": 0.720657134164892, "grad_norm": 1.648997187614441, "learning_rate": 1.4615779791999605e-05, "loss": 0.4346, "step": 5922 }, { "epoch": 0.720778825676909, "grad_norm": 0.579127848148346, "learning_rate": 1.4614049244266231e-05, "loss": 0.4259, "step": 5923 }, { "epoch": 0.7209005171889261, "grad_norm": 2.6304335594177246, "learning_rate": 1.4612318520958732e-05, "loss": 0.503, "step": 5924 }, { "epoch": 0.7210222087009431, "grad_norm": 0.8772050738334656, "learning_rate": 1.4610587622142967e-05, "loss": 0.4569, "step": 5925 }, { "epoch": 0.7211439002129602, "grad_norm": 0.6525957584381104, "learning_rate": 1.4608856547884801e-05, "loss": 0.417, "step": 5926 }, { "epoch": 0.7212655917249772, "grad_norm": 0.6636439561843872, "learning_rate": 1.4607125298250102e-05, "loss": 0.4363, "step": 5927 }, { "epoch": 0.7213872832369942, "grad_norm": 1.632570505142212, "learning_rate": 1.460539387330475e-05, "loss": 0.4237, "step": 5928 }, { "epoch": 0.7215089747490112, "grad_norm": 1.3413593769073486, "learning_rate": 1.4603662273114627e-05, "loss": 0.5053, "step": 5929 }, { "epoch": 0.7216306662610283, "grad_norm": 1.0939706563949585, "learning_rate": 1.4601930497745627e-05, "loss": 0.4152, "step": 5930 }, { "epoch": 0.7217523577730454, "grad_norm": 3.562614917755127, "learning_rate": 1.4600198547263646e-05, "loss": 0.3813, "step": 5931 }, { "epoch": 0.7218740492850624, "grad_norm": 2.0604302883148193, "learning_rate": 1.4598466421734587e-05, "loss": 0.4275, "step": 5932 }, { "epoch": 0.7219957407970794, "grad_norm": 2.1646599769592285, "learning_rate": 1.4596734121224364e-05, "loss": 0.512, "step": 5933 }, { "epoch": 0.7221174323090964, "grad_norm": 0.6308103203773499, "learning_rate": 1.4595001645798893e-05, "loss": 0.4383, "step": 5934 }, { "epoch": 0.7222391238211134, "grad_norm": 0.6690347790718079, "learning_rate": 1.4593268995524097e-05, "loss": 0.4302, "step": 5935 }, { "epoch": 0.7223608153331306, "grad_norm": 1.080430030822754, "learning_rate": 1.4591536170465907e-05, "loss": 0.4319, "step": 5936 }, { "epoch": 0.7224825068451476, "grad_norm": 0.5825051069259644, "learning_rate": 1.4589803170690265e-05, "loss": 0.425, "step": 5937 }, { "epoch": 0.7226041983571646, "grad_norm": 0.9939306378364563, "learning_rate": 1.4588069996263112e-05, "loss": 0.4695, "step": 5938 }, { "epoch": 0.7227258898691816, "grad_norm": 1.0614670515060425, "learning_rate": 1.4586336647250396e-05, "loss": 0.4963, "step": 5939 }, { "epoch": 0.7228475813811986, "grad_norm": 0.8363815546035767, "learning_rate": 1.4584603123718077e-05, "loss": 0.4382, "step": 5940 }, { "epoch": 0.7229692728932157, "grad_norm": 1.1701054573059082, "learning_rate": 1.4582869425732123e-05, "loss": 0.4902, "step": 5941 }, { "epoch": 0.7230909644052327, "grad_norm": 3.5971455574035645, "learning_rate": 1.4581135553358499e-05, "loss": 0.4136, "step": 5942 }, { "epoch": 0.7232126559172498, "grad_norm": 0.7977434396743774, "learning_rate": 1.4579401506663187e-05, "loss": 0.4752, "step": 5943 }, { "epoch": 0.7233343474292668, "grad_norm": 3.086181640625, "learning_rate": 1.4577667285712164e-05, "loss": 0.4305, "step": 5944 }, { "epoch": 0.7234560389412839, "grad_norm": 1.7511723041534424, "learning_rate": 1.4575932890571431e-05, "loss": 0.4408, "step": 5945 }, { "epoch": 0.7235777304533009, "grad_norm": 2.2279856204986572, "learning_rate": 1.4574198321306976e-05, "loss": 0.5178, "step": 5946 }, { "epoch": 0.7236994219653179, "grad_norm": 1.210328221321106, "learning_rate": 1.4572463577984807e-05, "loss": 0.4651, "step": 5947 }, { "epoch": 0.7238211134773349, "grad_norm": 0.8268551230430603, "learning_rate": 1.457072866067094e-05, "loss": 0.4507, "step": 5948 }, { "epoch": 0.723942804989352, "grad_norm": 3.1050729751586914, "learning_rate": 1.4568993569431382e-05, "loss": 0.5243, "step": 5949 }, { "epoch": 0.7240644965013691, "grad_norm": 1.531017780303955, "learning_rate": 1.4567258304332162e-05, "loss": 0.4292, "step": 5950 }, { "epoch": 0.7241861880133861, "grad_norm": 2.9399163722991943, "learning_rate": 1.4565522865439309e-05, "loss": 0.5025, "step": 5951 }, { "epoch": 0.7243078795254031, "grad_norm": 1.609010100364685, "learning_rate": 1.4563787252818862e-05, "loss": 0.4655, "step": 5952 }, { "epoch": 0.7244295710374201, "grad_norm": 1.4937900304794312, "learning_rate": 1.456205146653686e-05, "loss": 0.4875, "step": 5953 }, { "epoch": 0.7245512625494371, "grad_norm": 0.7539805769920349, "learning_rate": 1.4560315506659362e-05, "loss": 0.4559, "step": 5954 }, { "epoch": 0.7246729540614543, "grad_norm": 2.937546491622925, "learning_rate": 1.4558579373252414e-05, "loss": 0.4344, "step": 5955 }, { "epoch": 0.7247946455734713, "grad_norm": 2.2965593338012695, "learning_rate": 1.4556843066382091e-05, "loss": 0.4613, "step": 5956 }, { "epoch": 0.7249163370854883, "grad_norm": 2.042698621749878, "learning_rate": 1.4555106586114453e-05, "loss": 0.4273, "step": 5957 }, { "epoch": 0.7250380285975053, "grad_norm": 1.3926624059677124, "learning_rate": 1.4553369932515582e-05, "loss": 0.5431, "step": 5958 }, { "epoch": 0.7251597201095223, "grad_norm": 2.5994324684143066, "learning_rate": 1.4551633105651561e-05, "loss": 0.5101, "step": 5959 }, { "epoch": 0.7252814116215394, "grad_norm": 0.6208077073097229, "learning_rate": 1.4549896105588481e-05, "loss": 0.4981, "step": 5960 }, { "epoch": 0.7254031031335564, "grad_norm": 0.6817079186439514, "learning_rate": 1.4548158932392432e-05, "loss": 0.487, "step": 5961 }, { "epoch": 0.7255247946455735, "grad_norm": 1.5651510953903198, "learning_rate": 1.4546421586129524e-05, "loss": 0.4199, "step": 5962 }, { "epoch": 0.7256464861575905, "grad_norm": 2.3398616313934326, "learning_rate": 1.4544684066865861e-05, "loss": 0.4183, "step": 5963 }, { "epoch": 0.7257681776696076, "grad_norm": 0.9032950401306152, "learning_rate": 1.4542946374667565e-05, "loss": 0.4485, "step": 5964 }, { "epoch": 0.7258898691816246, "grad_norm": 3.494946002960205, "learning_rate": 1.4541208509600756e-05, "loss": 0.5279, "step": 5965 }, { "epoch": 0.7260115606936416, "grad_norm": 1.360865592956543, "learning_rate": 1.4539470471731564e-05, "loss": 0.4893, "step": 5966 }, { "epoch": 0.7261332522056586, "grad_norm": 1.9501760005950928, "learning_rate": 1.4537732261126123e-05, "loss": 0.4504, "step": 5967 }, { "epoch": 0.7262549437176757, "grad_norm": 3.098060369491577, "learning_rate": 1.4535993877850578e-05, "loss": 0.357, "step": 5968 }, { "epoch": 0.7263766352296928, "grad_norm": 1.314513921737671, "learning_rate": 1.4534255321971077e-05, "loss": 0.4599, "step": 5969 }, { "epoch": 0.7264983267417098, "grad_norm": 1.2205268144607544, "learning_rate": 1.4532516593553773e-05, "loss": 0.5137, "step": 5970 }, { "epoch": 0.7266200182537268, "grad_norm": 0.7066939473152161, "learning_rate": 1.4530777692664835e-05, "loss": 0.4336, "step": 5971 }, { "epoch": 0.7267417097657438, "grad_norm": 1.753503680229187, "learning_rate": 1.4529038619370425e-05, "loss": 0.4184, "step": 5972 }, { "epoch": 0.7268634012777608, "grad_norm": 1.8298426866531372, "learning_rate": 1.4527299373736721e-05, "loss": 0.4486, "step": 5973 }, { "epoch": 0.726985092789778, "grad_norm": 3.290282964706421, "learning_rate": 1.4525559955829904e-05, "loss": 0.4086, "step": 5974 }, { "epoch": 0.727106784301795, "grad_norm": 1.6207035779953003, "learning_rate": 1.4523820365716166e-05, "loss": 0.4068, "step": 5975 }, { "epoch": 0.727228475813812, "grad_norm": 1.3039968013763428, "learning_rate": 1.4522080603461696e-05, "loss": 0.4258, "step": 5976 }, { "epoch": 0.727350167325829, "grad_norm": 2.7305996417999268, "learning_rate": 1.4520340669132704e-05, "loss": 0.5397, "step": 5977 }, { "epoch": 0.727471858837846, "grad_norm": 4.471368312835693, "learning_rate": 1.4518600562795389e-05, "loss": 0.514, "step": 5978 }, { "epoch": 0.7275935503498631, "grad_norm": 6.474067211151123, "learning_rate": 1.451686028451597e-05, "loss": 0.5442, "step": 5979 }, { "epoch": 0.7277152418618801, "grad_norm": 3.210071086883545, "learning_rate": 1.4515119834360667e-05, "loss": 0.4903, "step": 5980 }, { "epoch": 0.7278369333738972, "grad_norm": 0.7314683198928833, "learning_rate": 1.4513379212395709e-05, "loss": 0.4202, "step": 5981 }, { "epoch": 0.7279586248859142, "grad_norm": 1.355797290802002, "learning_rate": 1.4511638418687331e-05, "loss": 0.3614, "step": 5982 }, { "epoch": 0.7280803163979312, "grad_norm": 1.362479329109192, "learning_rate": 1.4509897453301774e-05, "loss": 0.381, "step": 5983 }, { "epoch": 0.7282020079099483, "grad_norm": 0.6450584530830383, "learning_rate": 1.450815631630528e-05, "loss": 0.3976, "step": 5984 }, { "epoch": 0.7283236994219653, "grad_norm": 2.72170090675354, "learning_rate": 1.4506415007764107e-05, "loss": 0.5003, "step": 5985 }, { "epoch": 0.7284453909339823, "grad_norm": 4.203404426574707, "learning_rate": 1.4504673527744518e-05, "loss": 0.5351, "step": 5986 }, { "epoch": 0.7285670824459994, "grad_norm": 4.757719993591309, "learning_rate": 1.4502931876312775e-05, "loss": 0.5548, "step": 5987 }, { "epoch": 0.7286887739580165, "grad_norm": 0.9673572778701782, "learning_rate": 1.4501190053535154e-05, "loss": 0.4309, "step": 5988 }, { "epoch": 0.7288104654700335, "grad_norm": 1.2794528007507324, "learning_rate": 1.4499448059477937e-05, "loss": 0.4278, "step": 5989 }, { "epoch": 0.7289321569820505, "grad_norm": 2.066556692123413, "learning_rate": 1.4497705894207406e-05, "loss": 0.4871, "step": 5990 }, { "epoch": 0.7290538484940675, "grad_norm": 1.3739701509475708, "learning_rate": 1.4495963557789854e-05, "loss": 0.4573, "step": 5991 }, { "epoch": 0.7291755400060845, "grad_norm": 0.831739604473114, "learning_rate": 1.4494221050291583e-05, "loss": 0.4695, "step": 5992 }, { "epoch": 0.7292972315181017, "grad_norm": 1.7267261743545532, "learning_rate": 1.44924783717789e-05, "loss": 0.4176, "step": 5993 }, { "epoch": 0.7294189230301187, "grad_norm": 3.431475877761841, "learning_rate": 1.4490735522318118e-05, "loss": 0.469, "step": 5994 }, { "epoch": 0.7295406145421357, "grad_norm": 2.1612870693206787, "learning_rate": 1.4488992501975551e-05, "loss": 0.4403, "step": 5995 }, { "epoch": 0.7296623060541527, "grad_norm": 1.2238311767578125, "learning_rate": 1.4487249310817529e-05, "loss": 0.4537, "step": 5996 }, { "epoch": 0.7297839975661697, "grad_norm": 2.493213176727295, "learning_rate": 1.4485505948910381e-05, "loss": 0.4746, "step": 5997 }, { "epoch": 0.7299056890781868, "grad_norm": 1.2177133560180664, "learning_rate": 1.448376241632045e-05, "loss": 0.4385, "step": 5998 }, { "epoch": 0.7300273805902038, "grad_norm": 1.7608141899108887, "learning_rate": 1.4482018713114076e-05, "loss": 0.5151, "step": 5999 }, { "epoch": 0.7301490721022209, "grad_norm": 1.5174391269683838, "learning_rate": 1.4480274839357614e-05, "loss": 0.4775, "step": 6000 }, { "epoch": 0.7302707636142379, "grad_norm": 1.2709519863128662, "learning_rate": 1.4478530795117418e-05, "loss": 0.4722, "step": 6001 }, { "epoch": 0.730392455126255, "grad_norm": 1.2479143142700195, "learning_rate": 1.4476786580459858e-05, "loss": 0.4332, "step": 6002 }, { "epoch": 0.730514146638272, "grad_norm": 0.7227367758750916, "learning_rate": 1.44750421954513e-05, "loss": 0.4708, "step": 6003 }, { "epoch": 0.730635838150289, "grad_norm": 0.7163572907447815, "learning_rate": 1.4473297640158126e-05, "loss": 0.4706, "step": 6004 }, { "epoch": 0.730757529662306, "grad_norm": 0.9213974475860596, "learning_rate": 1.4471552914646716e-05, "loss": 0.4079, "step": 6005 }, { "epoch": 0.7308792211743231, "grad_norm": 1.2564480304718018, "learning_rate": 1.4469808018983464e-05, "loss": 0.5273, "step": 6006 }, { "epoch": 0.7310009126863402, "grad_norm": 0.6403021812438965, "learning_rate": 1.4468062953234762e-05, "loss": 0.4486, "step": 6007 }, { "epoch": 0.7311226041983572, "grad_norm": 0.9182046055793762, "learning_rate": 1.4466317717467017e-05, "loss": 0.4827, "step": 6008 }, { "epoch": 0.7312442957103742, "grad_norm": 1.4530596733093262, "learning_rate": 1.4464572311746641e-05, "loss": 0.4827, "step": 6009 }, { "epoch": 0.7313659872223912, "grad_norm": 0.9574877619743347, "learning_rate": 1.4462826736140044e-05, "loss": 0.489, "step": 6010 }, { "epoch": 0.7314876787344082, "grad_norm": 1.3845919370651245, "learning_rate": 1.4461080990713652e-05, "loss": 0.4515, "step": 6011 }, { "epoch": 0.7316093702464254, "grad_norm": 0.7245004177093506, "learning_rate": 1.4459335075533898e-05, "loss": 0.4878, "step": 6012 }, { "epoch": 0.7317310617584424, "grad_norm": 0.7056178450584412, "learning_rate": 1.4457588990667213e-05, "loss": 0.5015, "step": 6013 }, { "epoch": 0.7318527532704594, "grad_norm": 1.8699898719787598, "learning_rate": 1.4455842736180037e-05, "loss": 0.497, "step": 6014 }, { "epoch": 0.7319744447824764, "grad_norm": 0.7331851720809937, "learning_rate": 1.4454096312138824e-05, "loss": 0.4927, "step": 6015 }, { "epoch": 0.7320961362944934, "grad_norm": 2.5568206310272217, "learning_rate": 1.4452349718610026e-05, "loss": 0.4548, "step": 6016 }, { "epoch": 0.7322178278065105, "grad_norm": 0.5771918892860413, "learning_rate": 1.4450602955660104e-05, "loss": 0.4428, "step": 6017 }, { "epoch": 0.7323395193185276, "grad_norm": 2.8147075176239014, "learning_rate": 1.4448856023355534e-05, "loss": 0.4689, "step": 6018 }, { "epoch": 0.7324612108305446, "grad_norm": 1.0193289518356323, "learning_rate": 1.4447108921762776e-05, "loss": 0.3991, "step": 6019 }, { "epoch": 0.7325829023425616, "grad_norm": 2.1485724449157715, "learning_rate": 1.4445361650948322e-05, "loss": 0.4746, "step": 6020 }, { "epoch": 0.7327045938545786, "grad_norm": 0.8030677437782288, "learning_rate": 1.4443614210978653e-05, "loss": 0.4164, "step": 6021 }, { "epoch": 0.7328262853665957, "grad_norm": 2.0524990558624268, "learning_rate": 1.4441866601920267e-05, "loss": 0.4891, "step": 6022 }, { "epoch": 0.7329479768786127, "grad_norm": 0.626239538192749, "learning_rate": 1.4440118823839666e-05, "loss": 0.4295, "step": 6023 }, { "epoch": 0.7330696683906297, "grad_norm": 3.944857597351074, "learning_rate": 1.4438370876803352e-05, "loss": 0.5429, "step": 6024 }, { "epoch": 0.7331913599026468, "grad_norm": 1.0516984462738037, "learning_rate": 1.4436622760877837e-05, "loss": 0.489, "step": 6025 }, { "epoch": 0.7333130514146639, "grad_norm": 1.464741587638855, "learning_rate": 1.443487447612964e-05, "loss": 0.4528, "step": 6026 }, { "epoch": 0.7334347429266809, "grad_norm": 1.0691699981689453, "learning_rate": 1.4433126022625294e-05, "loss": 0.4712, "step": 6027 }, { "epoch": 0.7335564344386979, "grad_norm": 3.160796880722046, "learning_rate": 1.4431377400431325e-05, "loss": 0.4193, "step": 6028 }, { "epoch": 0.7336781259507149, "grad_norm": 1.5413644313812256, "learning_rate": 1.4429628609614277e-05, "loss": 0.4308, "step": 6029 }, { "epoch": 0.7337998174627319, "grad_norm": 2.5708987712860107, "learning_rate": 1.4427879650240689e-05, "loss": 0.4258, "step": 6030 }, { "epoch": 0.7339215089747491, "grad_norm": 2.6990456581115723, "learning_rate": 1.4426130522377114e-05, "loss": 0.4086, "step": 6031 }, { "epoch": 0.7340432004867661, "grad_norm": 0.9847862124443054, "learning_rate": 1.4424381226090112e-05, "loss": 0.4118, "step": 6032 }, { "epoch": 0.7341648919987831, "grad_norm": 3.2449450492858887, "learning_rate": 1.4422631761446244e-05, "loss": 0.5218, "step": 6033 }, { "epoch": 0.7342865835108001, "grad_norm": 1.1541451215744019, "learning_rate": 1.4420882128512083e-05, "loss": 0.4187, "step": 6034 }, { "epoch": 0.7344082750228171, "grad_norm": 4.895941257476807, "learning_rate": 1.4419132327354212e-05, "loss": 0.5096, "step": 6035 }, { "epoch": 0.7345299665348342, "grad_norm": 1.1692439317703247, "learning_rate": 1.4417382358039207e-05, "loss": 0.4297, "step": 6036 }, { "epoch": 0.7346516580468513, "grad_norm": 0.8970011472702026, "learning_rate": 1.4415632220633655e-05, "loss": 0.4166, "step": 6037 }, { "epoch": 0.7347733495588683, "grad_norm": 4.252860069274902, "learning_rate": 1.441388191520416e-05, "loss": 0.5466, "step": 6038 }, { "epoch": 0.7348950410708853, "grad_norm": 3.2991886138916016, "learning_rate": 1.4412131441817325e-05, "loss": 0.5121, "step": 6039 }, { "epoch": 0.7350167325829023, "grad_norm": 1.2423970699310303, "learning_rate": 1.4410380800539751e-05, "loss": 0.4284, "step": 6040 }, { "epoch": 0.7351384240949194, "grad_norm": 1.6417961120605469, "learning_rate": 1.4408629991438064e-05, "loss": 0.5017, "step": 6041 }, { "epoch": 0.7352601156069364, "grad_norm": 0.8933529257774353, "learning_rate": 1.4406879014578875e-05, "loss": 0.4613, "step": 6042 }, { "epoch": 0.7353818071189534, "grad_norm": 1.0704344511032104, "learning_rate": 1.440512787002882e-05, "loss": 0.4568, "step": 6043 }, { "epoch": 0.7355034986309705, "grad_norm": 1.5222461223602295, "learning_rate": 1.4403376557854531e-05, "loss": 0.484, "step": 6044 }, { "epoch": 0.7356251901429876, "grad_norm": 1.502522349357605, "learning_rate": 1.440162507812265e-05, "loss": 0.4512, "step": 6045 }, { "epoch": 0.7357468816550046, "grad_norm": 2.4995596408843994, "learning_rate": 1.4399873430899826e-05, "loss": 0.4204, "step": 6046 }, { "epoch": 0.7358685731670216, "grad_norm": 0.9355295896530151, "learning_rate": 1.439812161625271e-05, "loss": 0.4922, "step": 6047 }, { "epoch": 0.7359902646790386, "grad_norm": 0.6649131178855896, "learning_rate": 1.439636963424796e-05, "loss": 0.4532, "step": 6048 }, { "epoch": 0.7361119561910556, "grad_norm": 0.8592821359634399, "learning_rate": 1.4394617484952247e-05, "loss": 0.4581, "step": 6049 }, { "epoch": 0.7362336477030728, "grad_norm": 2.093097448348999, "learning_rate": 1.4392865168432242e-05, "loss": 0.448, "step": 6050 }, { "epoch": 0.7363553392150898, "grad_norm": 2.3281893730163574, "learning_rate": 1.4391112684754624e-05, "loss": 0.5024, "step": 6051 }, { "epoch": 0.7364770307271068, "grad_norm": 1.9858554601669312, "learning_rate": 1.4389360033986081e-05, "loss": 0.4949, "step": 6052 }, { "epoch": 0.7365987222391238, "grad_norm": 1.0654797554016113, "learning_rate": 1.4387607216193301e-05, "loss": 0.4536, "step": 6053 }, { "epoch": 0.7367204137511408, "grad_norm": 0.8458957076072693, "learning_rate": 1.4385854231442988e-05, "loss": 0.5319, "step": 6054 }, { "epoch": 0.7368421052631579, "grad_norm": 1.7338603734970093, "learning_rate": 1.438410107980184e-05, "loss": 0.5203, "step": 6055 }, { "epoch": 0.736963796775175, "grad_norm": 0.9456865787506104, "learning_rate": 1.4382347761336572e-05, "loss": 0.4419, "step": 6056 }, { "epoch": 0.737085488287192, "grad_norm": 0.7714319229125977, "learning_rate": 1.4380594276113899e-05, "loss": 0.4369, "step": 6057 }, { "epoch": 0.737207179799209, "grad_norm": 0.9238236546516418, "learning_rate": 1.4378840624200554e-05, "loss": 0.4569, "step": 6058 }, { "epoch": 0.737328871311226, "grad_norm": 1.561295509338379, "learning_rate": 1.4377086805663253e-05, "loss": 0.4599, "step": 6059 }, { "epoch": 0.7374505628232431, "grad_norm": 1.9544538259506226, "learning_rate": 1.437533282056874e-05, "loss": 0.4566, "step": 6060 }, { "epoch": 0.7375722543352601, "grad_norm": 1.2616060972213745, "learning_rate": 1.4373578668983754e-05, "loss": 0.5024, "step": 6061 }, { "epoch": 0.7376939458472771, "grad_norm": 1.5177977085113525, "learning_rate": 1.4371824350975052e-05, "loss": 0.4671, "step": 6062 }, { "epoch": 0.7378156373592942, "grad_norm": 1.683990240097046, "learning_rate": 1.4370069866609381e-05, "loss": 0.4766, "step": 6063 }, { "epoch": 0.7379373288713112, "grad_norm": 3.2027511596679688, "learning_rate": 1.4368315215953508e-05, "loss": 0.5095, "step": 6064 }, { "epoch": 0.7380590203833283, "grad_norm": 1.4263639450073242, "learning_rate": 1.4366560399074199e-05, "loss": 0.4772, "step": 6065 }, { "epoch": 0.7381807118953453, "grad_norm": 0.7903024554252625, "learning_rate": 1.4364805416038229e-05, "loss": 0.4954, "step": 6066 }, { "epoch": 0.7383024034073623, "grad_norm": 4.501694202423096, "learning_rate": 1.4363050266912375e-05, "loss": 0.4043, "step": 6067 }, { "epoch": 0.7384240949193793, "grad_norm": 1.428065299987793, "learning_rate": 1.4361294951763429e-05, "loss": 0.448, "step": 6068 }, { "epoch": 0.7385457864313965, "grad_norm": 3.916456937789917, "learning_rate": 1.4359539470658184e-05, "loss": 0.4458, "step": 6069 }, { "epoch": 0.7386674779434135, "grad_norm": 5.319366931915283, "learning_rate": 1.4357783823663439e-05, "loss": 0.3954, "step": 6070 }, { "epoch": 0.7387891694554305, "grad_norm": 3.7769486904144287, "learning_rate": 1.4356028010845995e-05, "loss": 0.4294, "step": 6071 }, { "epoch": 0.7389108609674475, "grad_norm": 0.56814044713974, "learning_rate": 1.4354272032272671e-05, "loss": 0.479, "step": 6072 }, { "epoch": 0.7390325524794645, "grad_norm": 3.2404799461364746, "learning_rate": 1.4352515888010285e-05, "loss": 0.5203, "step": 6073 }, { "epoch": 0.7391542439914816, "grad_norm": 0.6911748051643372, "learning_rate": 1.435075957812566e-05, "loss": 0.4232, "step": 6074 }, { "epoch": 0.7392759355034987, "grad_norm": 0.900762677192688, "learning_rate": 1.4349003102685627e-05, "loss": 0.4974, "step": 6075 }, { "epoch": 0.7393976270155157, "grad_norm": 1.5303701162338257, "learning_rate": 1.4347246461757022e-05, "loss": 0.4523, "step": 6076 }, { "epoch": 0.7395193185275327, "grad_norm": 1.6663451194763184, "learning_rate": 1.4345489655406695e-05, "loss": 0.4831, "step": 6077 }, { "epoch": 0.7396410100395497, "grad_norm": 0.6217003464698792, "learning_rate": 1.4343732683701489e-05, "loss": 0.4503, "step": 6078 }, { "epoch": 0.7397627015515668, "grad_norm": 0.7839460372924805, "learning_rate": 1.434197554670826e-05, "loss": 0.4368, "step": 6079 }, { "epoch": 0.7398843930635838, "grad_norm": 0.7203729748725891, "learning_rate": 1.434021824449388e-05, "loss": 0.437, "step": 6080 }, { "epoch": 0.7400060845756008, "grad_norm": 0.592182457447052, "learning_rate": 1.4338460777125211e-05, "loss": 0.4376, "step": 6081 }, { "epoch": 0.7401277760876179, "grad_norm": 2.769014596939087, "learning_rate": 1.4336703144669129e-05, "loss": 0.4962, "step": 6082 }, { "epoch": 0.740249467599635, "grad_norm": 2.8925223350524902, "learning_rate": 1.4334945347192512e-05, "loss": 0.3603, "step": 6083 }, { "epoch": 0.740371159111652, "grad_norm": 1.656357765197754, "learning_rate": 1.4333187384762255e-05, "loss": 0.4227, "step": 6084 }, { "epoch": 0.740492850623669, "grad_norm": 0.7605194449424744, "learning_rate": 1.4331429257445248e-05, "loss": 0.4204, "step": 6085 }, { "epoch": 0.740614542135686, "grad_norm": 1.3129829168319702, "learning_rate": 1.4329670965308393e-05, "loss": 0.5163, "step": 6086 }, { "epoch": 0.740736233647703, "grad_norm": 1.6419928073883057, "learning_rate": 1.4327912508418596e-05, "loss": 0.4725, "step": 6087 }, { "epoch": 0.7408579251597202, "grad_norm": 1.90644109249115, "learning_rate": 1.432615388684277e-05, "loss": 0.457, "step": 6088 }, { "epoch": 0.7409796166717372, "grad_norm": 0.9284657835960388, "learning_rate": 1.4324395100647834e-05, "loss": 0.4033, "step": 6089 }, { "epoch": 0.7411013081837542, "grad_norm": 2.17691969871521, "learning_rate": 1.432263614990071e-05, "loss": 0.4851, "step": 6090 }, { "epoch": 0.7412229996957712, "grad_norm": 1.805864691734314, "learning_rate": 1.4320877034668334e-05, "loss": 0.4579, "step": 6091 }, { "epoch": 0.7413446912077882, "grad_norm": 0.7513481378555298, "learning_rate": 1.431911775501765e-05, "loss": 0.4591, "step": 6092 }, { "epoch": 0.7414663827198053, "grad_norm": 2.5264878273010254, "learning_rate": 1.431735831101559e-05, "loss": 0.4501, "step": 6093 }, { "epoch": 0.7415880742318224, "grad_norm": 1.6010634899139404, "learning_rate": 1.4315598702729108e-05, "loss": 0.4412, "step": 6094 }, { "epoch": 0.7417097657438394, "grad_norm": 0.9898052215576172, "learning_rate": 1.4313838930225163e-05, "loss": 0.4001, "step": 6095 }, { "epoch": 0.7418314572558564, "grad_norm": 0.9766753911972046, "learning_rate": 1.4312078993570722e-05, "loss": 0.515, "step": 6096 }, { "epoch": 0.7419531487678734, "grad_norm": 1.4401381015777588, "learning_rate": 1.4310318892832746e-05, "loss": 0.4613, "step": 6097 }, { "epoch": 0.7420748402798905, "grad_norm": 1.6686652898788452, "learning_rate": 1.4308558628078216e-05, "loss": 0.4764, "step": 6098 }, { "epoch": 0.7421965317919075, "grad_norm": 2.4315953254699707, "learning_rate": 1.430679819937411e-05, "loss": 0.3932, "step": 6099 }, { "epoch": 0.7423182233039245, "grad_norm": 0.7192448377609253, "learning_rate": 1.430503760678742e-05, "loss": 0.4288, "step": 6100 }, { "epoch": 0.7424399148159416, "grad_norm": 1.8684046268463135, "learning_rate": 1.4303276850385138e-05, "loss": 0.4748, "step": 6101 }, { "epoch": 0.7425616063279586, "grad_norm": 1.9394108057022095, "learning_rate": 1.430151593023426e-05, "loss": 0.473, "step": 6102 }, { "epoch": 0.7426832978399757, "grad_norm": 0.9935706257820129, "learning_rate": 1.4299754846401803e-05, "loss": 0.4586, "step": 6103 }, { "epoch": 0.7428049893519927, "grad_norm": 2.932865858078003, "learning_rate": 1.4297993598954773e-05, "loss": 0.507, "step": 6104 }, { "epoch": 0.7429266808640097, "grad_norm": 0.6139119863510132, "learning_rate": 1.4296232187960188e-05, "loss": 0.4553, "step": 6105 }, { "epoch": 0.7430483723760267, "grad_norm": 1.5251184701919556, "learning_rate": 1.4294470613485076e-05, "loss": 0.4521, "step": 6106 }, { "epoch": 0.7431700638880439, "grad_norm": 2.93698787689209, "learning_rate": 1.429270887559647e-05, "loss": 0.4311, "step": 6107 }, { "epoch": 0.7432917554000609, "grad_norm": 0.6989178657531738, "learning_rate": 1.4290946974361406e-05, "loss": 0.475, "step": 6108 }, { "epoch": 0.7434134469120779, "grad_norm": 1.882533311843872, "learning_rate": 1.4289184909846925e-05, "loss": 0.3862, "step": 6109 }, { "epoch": 0.7435351384240949, "grad_norm": 1.3065087795257568, "learning_rate": 1.4287422682120083e-05, "loss": 0.4972, "step": 6110 }, { "epoch": 0.7436568299361119, "grad_norm": 1.854225516319275, "learning_rate": 1.4285660291247934e-05, "loss": 0.5097, "step": 6111 }, { "epoch": 0.743778521448129, "grad_norm": 1.8840022087097168, "learning_rate": 1.4283897737297536e-05, "loss": 0.4979, "step": 6112 }, { "epoch": 0.7439002129601461, "grad_norm": 1.655224084854126, "learning_rate": 1.4282135020335962e-05, "loss": 0.4028, "step": 6113 }, { "epoch": 0.7440219044721631, "grad_norm": 1.3481104373931885, "learning_rate": 1.4280372140430292e-05, "loss": 0.4418, "step": 6114 }, { "epoch": 0.7441435959841801, "grad_norm": 1.0108870267868042, "learning_rate": 1.42786090976476e-05, "loss": 0.4448, "step": 6115 }, { "epoch": 0.7442652874961971, "grad_norm": 3.476997137069702, "learning_rate": 1.4276845892054973e-05, "loss": 0.3725, "step": 6116 }, { "epoch": 0.7443869790082142, "grad_norm": 0.9589830040931702, "learning_rate": 1.427508252371951e-05, "loss": 0.4116, "step": 6117 }, { "epoch": 0.7445086705202312, "grad_norm": 0.716291606426239, "learning_rate": 1.4273318992708306e-05, "loss": 0.4057, "step": 6118 }, { "epoch": 0.7446303620322483, "grad_norm": 0.9443511366844177, "learning_rate": 1.4271555299088471e-05, "loss": 0.4097, "step": 6119 }, { "epoch": 0.7447520535442653, "grad_norm": 2.6531548500061035, "learning_rate": 1.4269791442927112e-05, "loss": 0.4657, "step": 6120 }, { "epoch": 0.7448737450562823, "grad_norm": 2.101766586303711, "learning_rate": 1.4268027424291355e-05, "loss": 0.4665, "step": 6121 }, { "epoch": 0.7449954365682994, "grad_norm": 4.310474872589111, "learning_rate": 1.426626324324832e-05, "loss": 0.522, "step": 6122 }, { "epoch": 0.7451171280803164, "grad_norm": 1.9548985958099365, "learning_rate": 1.4264498899865133e-05, "loss": 0.4162, "step": 6123 }, { "epoch": 0.7452388195923334, "grad_norm": 2.3424088954925537, "learning_rate": 1.4262734394208938e-05, "loss": 0.5095, "step": 6124 }, { "epoch": 0.7453605111043504, "grad_norm": 0.6676538586616516, "learning_rate": 1.4260969726346878e-05, "loss": 0.4723, "step": 6125 }, { "epoch": 0.7454822026163676, "grad_norm": 3.512550115585327, "learning_rate": 1.4259204896346099e-05, "loss": 0.4389, "step": 6126 }, { "epoch": 0.7456038941283846, "grad_norm": 3.5912954807281494, "learning_rate": 1.425743990427376e-05, "loss": 0.5473, "step": 6127 }, { "epoch": 0.7457255856404016, "grad_norm": 1.7361263036727905, "learning_rate": 1.4255674750197017e-05, "loss": 0.4941, "step": 6128 }, { "epoch": 0.7458472771524186, "grad_norm": 3.1802353858947754, "learning_rate": 1.425390943418304e-05, "loss": 0.4235, "step": 6129 }, { "epoch": 0.7459689686644356, "grad_norm": 0.8718621730804443, "learning_rate": 1.4252143956299008e-05, "loss": 0.5157, "step": 6130 }, { "epoch": 0.7460906601764526, "grad_norm": 3.226101875305176, "learning_rate": 1.4250378316612094e-05, "loss": 0.4383, "step": 6131 }, { "epoch": 0.7462123516884698, "grad_norm": 2.2750840187072754, "learning_rate": 1.4248612515189486e-05, "loss": 0.4161, "step": 6132 }, { "epoch": 0.7463340432004868, "grad_norm": 1.2184933423995972, "learning_rate": 1.4246846552098382e-05, "loss": 0.5146, "step": 6133 }, { "epoch": 0.7464557347125038, "grad_norm": 0.802947998046875, "learning_rate": 1.4245080427405975e-05, "loss": 0.4813, "step": 6134 }, { "epoch": 0.7465774262245208, "grad_norm": 0.8590162992477417, "learning_rate": 1.4243314141179467e-05, "loss": 0.4744, "step": 6135 }, { "epoch": 0.7466991177365379, "grad_norm": 1.9463750123977661, "learning_rate": 1.4241547693486075e-05, "loss": 0.4756, "step": 6136 }, { "epoch": 0.7468208092485549, "grad_norm": 0.5914576649665833, "learning_rate": 1.4239781084393017e-05, "loss": 0.4592, "step": 6137 }, { "epoch": 0.746942500760572, "grad_norm": 0.985463559627533, "learning_rate": 1.423801431396751e-05, "loss": 0.4602, "step": 6138 }, { "epoch": 0.747064192272589, "grad_norm": 1.106080174446106, "learning_rate": 1.4236247382276787e-05, "loss": 0.4303, "step": 6139 }, { "epoch": 0.747185883784606, "grad_norm": 0.8888548612594604, "learning_rate": 1.4234480289388079e-05, "loss": 0.4443, "step": 6140 }, { "epoch": 0.7473075752966231, "grad_norm": 0.67289799451828, "learning_rate": 1.4232713035368637e-05, "loss": 0.4474, "step": 6141 }, { "epoch": 0.7474292668086401, "grad_norm": 1.9233694076538086, "learning_rate": 1.42309456202857e-05, "loss": 0.4564, "step": 6142 }, { "epoch": 0.7475509583206571, "grad_norm": 1.9132890701293945, "learning_rate": 1.4229178044206522e-05, "loss": 0.4799, "step": 6143 }, { "epoch": 0.7476726498326741, "grad_norm": 0.6200768947601318, "learning_rate": 1.4227410307198368e-05, "loss": 0.4231, "step": 6144 }, { "epoch": 0.7477943413446912, "grad_norm": 0.9088713526725769, "learning_rate": 1.4225642409328504e-05, "loss": 0.4461, "step": 6145 }, { "epoch": 0.7479160328567083, "grad_norm": 1.5293049812316895, "learning_rate": 1.4223874350664195e-05, "loss": 0.4041, "step": 6146 }, { "epoch": 0.7480377243687253, "grad_norm": 1.091378092765808, "learning_rate": 1.4222106131272726e-05, "loss": 0.4448, "step": 6147 }, { "epoch": 0.7481594158807423, "grad_norm": 1.5216516256332397, "learning_rate": 1.422033775122138e-05, "loss": 0.465, "step": 6148 }, { "epoch": 0.7482811073927593, "grad_norm": 0.6210158467292786, "learning_rate": 1.4218569210577446e-05, "loss": 0.4484, "step": 6149 }, { "epoch": 0.7484027989047763, "grad_norm": 1.097381830215454, "learning_rate": 1.4216800509408222e-05, "loss": 0.4377, "step": 6150 }, { "epoch": 0.7485244904167935, "grad_norm": 1.5110297203063965, "learning_rate": 1.421503164778101e-05, "loss": 0.4224, "step": 6151 }, { "epoch": 0.7486461819288105, "grad_norm": 2.089169979095459, "learning_rate": 1.421326262576312e-05, "loss": 0.4273, "step": 6152 }, { "epoch": 0.7487678734408275, "grad_norm": 0.6483123302459717, "learning_rate": 1.4211493443421867e-05, "loss": 0.4432, "step": 6153 }, { "epoch": 0.7488895649528445, "grad_norm": 0.6911384463310242, "learning_rate": 1.4209724100824569e-05, "loss": 0.4114, "step": 6154 }, { "epoch": 0.7490112564648616, "grad_norm": 2.1643879413604736, "learning_rate": 1.4207954598038554e-05, "loss": 0.4626, "step": 6155 }, { "epoch": 0.7491329479768786, "grad_norm": 1.4167627096176147, "learning_rate": 1.4206184935131163e-05, "loss": 0.4771, "step": 6156 }, { "epoch": 0.7492546394888957, "grad_norm": 5.108611583709717, "learning_rate": 1.4204415112169722e-05, "loss": 0.5653, "step": 6157 }, { "epoch": 0.7493763310009127, "grad_norm": 1.5637836456298828, "learning_rate": 1.4202645129221586e-05, "loss": 0.4755, "step": 6158 }, { "epoch": 0.7494980225129297, "grad_norm": 1.8591865301132202, "learning_rate": 1.4200874986354103e-05, "loss": 0.4654, "step": 6159 }, { "epoch": 0.7496197140249468, "grad_norm": 0.8010194301605225, "learning_rate": 1.4199104683634632e-05, "loss": 0.475, "step": 6160 }, { "epoch": 0.7497414055369638, "grad_norm": 3.587212085723877, "learning_rate": 1.4197334221130536e-05, "loss": 0.4319, "step": 6161 }, { "epoch": 0.7498630970489808, "grad_norm": 1.6658228635787964, "learning_rate": 1.4195563598909186e-05, "loss": 0.504, "step": 6162 }, { "epoch": 0.7499847885609978, "grad_norm": 4.480776786804199, "learning_rate": 1.4193792817037956e-05, "loss": 0.4505, "step": 6163 }, { "epoch": 0.750106480073015, "grad_norm": 2.69167160987854, "learning_rate": 1.419202187558423e-05, "loss": 0.506, "step": 6164 }, { "epoch": 0.750228171585032, "grad_norm": 1.3299251794815063, "learning_rate": 1.4190250774615391e-05, "loss": 0.4919, "step": 6165 }, { "epoch": 0.750349863097049, "grad_norm": 5.226564884185791, "learning_rate": 1.4188479514198839e-05, "loss": 0.3578, "step": 6166 }, { "epoch": 0.750471554609066, "grad_norm": 1.679444670677185, "learning_rate": 1.4186708094401974e-05, "loss": 0.458, "step": 6167 }, { "epoch": 0.750593246121083, "grad_norm": 2.688237190246582, "learning_rate": 1.4184936515292197e-05, "loss": 0.4935, "step": 6168 }, { "epoch": 0.7507149376331, "grad_norm": 1.224530577659607, "learning_rate": 1.4183164776936924e-05, "loss": 0.4572, "step": 6169 }, { "epoch": 0.7508366291451172, "grad_norm": 0.6217115521430969, "learning_rate": 1.4181392879403571e-05, "loss": 0.4263, "step": 6170 }, { "epoch": 0.7509583206571342, "grad_norm": 1.9961968660354614, "learning_rate": 1.4179620822759565e-05, "loss": 0.4795, "step": 6171 }, { "epoch": 0.7510800121691512, "grad_norm": 0.787127673625946, "learning_rate": 1.4177848607072338e-05, "loss": 0.4202, "step": 6172 }, { "epoch": 0.7512017036811682, "grad_norm": 3.706024408340454, "learning_rate": 1.4176076232409321e-05, "loss": 0.515, "step": 6173 }, { "epoch": 0.7513233951931853, "grad_norm": 1.176655888557434, "learning_rate": 1.4174303698837959e-05, "loss": 0.4429, "step": 6174 }, { "epoch": 0.7514450867052023, "grad_norm": 0.9679132699966431, "learning_rate": 1.4172531006425702e-05, "loss": 0.4408, "step": 6175 }, { "epoch": 0.7515667782172194, "grad_norm": 2.6041781902313232, "learning_rate": 1.4170758155240002e-05, "loss": 0.509, "step": 6176 }, { "epoch": 0.7516884697292364, "grad_norm": 4.564065456390381, "learning_rate": 1.4168985145348325e-05, "loss": 0.3952, "step": 6177 }, { "epoch": 0.7518101612412534, "grad_norm": 3.0847840309143066, "learning_rate": 1.4167211976818131e-05, "loss": 0.4491, "step": 6178 }, { "epoch": 0.7519318527532705, "grad_norm": 4.316932201385498, "learning_rate": 1.4165438649716899e-05, "loss": 0.3967, "step": 6179 }, { "epoch": 0.7520535442652875, "grad_norm": 2.947129726409912, "learning_rate": 1.4163665164112099e-05, "loss": 0.4491, "step": 6180 }, { "epoch": 0.7521752357773045, "grad_norm": 1.3479586839675903, "learning_rate": 1.4161891520071222e-05, "loss": 0.471, "step": 6181 }, { "epoch": 0.7522969272893215, "grad_norm": 0.9378282427787781, "learning_rate": 1.4160117717661761e-05, "loss": 0.4513, "step": 6182 }, { "epoch": 0.7524186188013386, "grad_norm": 1.7979302406311035, "learning_rate": 1.4158343756951207e-05, "loss": 0.502, "step": 6183 }, { "epoch": 0.7525403103133557, "grad_norm": 1.7357534170150757, "learning_rate": 1.4156569638007067e-05, "loss": 0.4453, "step": 6184 }, { "epoch": 0.7526620018253727, "grad_norm": 0.6115503311157227, "learning_rate": 1.4154795360896851e-05, "loss": 0.4293, "step": 6185 }, { "epoch": 0.7527836933373897, "grad_norm": 0.5923071503639221, "learning_rate": 1.4153020925688067e-05, "loss": 0.428, "step": 6186 }, { "epoch": 0.7529053848494067, "grad_norm": 1.1753947734832764, "learning_rate": 1.4151246332448245e-05, "loss": 0.4243, "step": 6187 }, { "epoch": 0.7530270763614237, "grad_norm": 1.2448301315307617, "learning_rate": 1.4149471581244904e-05, "loss": 0.4523, "step": 6188 }, { "epoch": 0.7531487678734409, "grad_norm": 2.440727949142456, "learning_rate": 1.4147696672145581e-05, "loss": 0.3989, "step": 6189 }, { "epoch": 0.7532704593854579, "grad_norm": 1.6212975978851318, "learning_rate": 1.4145921605217818e-05, "loss": 0.4734, "step": 6190 }, { "epoch": 0.7533921508974749, "grad_norm": 2.1013755798339844, "learning_rate": 1.4144146380529153e-05, "loss": 0.4352, "step": 6191 }, { "epoch": 0.7535138424094919, "grad_norm": 1.486703634262085, "learning_rate": 1.4142370998147142e-05, "loss": 0.3978, "step": 6192 }, { "epoch": 0.753635533921509, "grad_norm": 1.0408581495285034, "learning_rate": 1.4140595458139336e-05, "loss": 0.4439, "step": 6193 }, { "epoch": 0.753757225433526, "grad_norm": 3.242246389389038, "learning_rate": 1.4138819760573307e-05, "loss": 0.5018, "step": 6194 }, { "epoch": 0.7538789169455431, "grad_norm": 4.305569648742676, "learning_rate": 1.4137043905516618e-05, "loss": 0.4965, "step": 6195 }, { "epoch": 0.7540006084575601, "grad_norm": 2.4217042922973633, "learning_rate": 1.4135267893036846e-05, "loss": 0.4811, "step": 6196 }, { "epoch": 0.7541222999695771, "grad_norm": 1.1083225011825562, "learning_rate": 1.4133491723201568e-05, "loss": 0.4583, "step": 6197 }, { "epoch": 0.7542439914815942, "grad_norm": 1.0425829887390137, "learning_rate": 1.4131715396078378e-05, "loss": 0.3968, "step": 6198 }, { "epoch": 0.7543656829936112, "grad_norm": 1.5561612844467163, "learning_rate": 1.4129938911734862e-05, "loss": 0.4804, "step": 6199 }, { "epoch": 0.7544873745056282, "grad_norm": 1.563738226890564, "learning_rate": 1.4128162270238624e-05, "loss": 0.4653, "step": 6200 }, { "epoch": 0.7546090660176453, "grad_norm": 0.6354861259460449, "learning_rate": 1.4126385471657264e-05, "loss": 0.4298, "step": 6201 }, { "epoch": 0.7547307575296623, "grad_norm": 2.2225916385650635, "learning_rate": 1.41246085160584e-05, "loss": 0.5005, "step": 6202 }, { "epoch": 0.7548524490416794, "grad_norm": 1.5832833051681519, "learning_rate": 1.412283140350964e-05, "loss": 0.4206, "step": 6203 }, { "epoch": 0.7549741405536964, "grad_norm": 0.7486116886138916, "learning_rate": 1.4121054134078614e-05, "loss": 0.4735, "step": 6204 }, { "epoch": 0.7550958320657134, "grad_norm": 1.1001203060150146, "learning_rate": 1.4119276707832947e-05, "loss": 0.4766, "step": 6205 }, { "epoch": 0.7552175235777304, "grad_norm": 3.053910732269287, "learning_rate": 1.4117499124840275e-05, "loss": 0.4291, "step": 6206 }, { "epoch": 0.7553392150897474, "grad_norm": 1.651018500328064, "learning_rate": 1.4115721385168238e-05, "loss": 0.446, "step": 6207 }, { "epoch": 0.7554609066017646, "grad_norm": 2.9158413410186768, "learning_rate": 1.4113943488884484e-05, "loss": 0.4949, "step": 6208 }, { "epoch": 0.7555825981137816, "grad_norm": 3.128105401992798, "learning_rate": 1.4112165436056665e-05, "loss": 0.3843, "step": 6209 }, { "epoch": 0.7557042896257986, "grad_norm": 4.592965602874756, "learning_rate": 1.411038722675244e-05, "loss": 0.5558, "step": 6210 }, { "epoch": 0.7558259811378156, "grad_norm": 2.2755916118621826, "learning_rate": 1.4108608861039471e-05, "loss": 0.4778, "step": 6211 }, { "epoch": 0.7559476726498326, "grad_norm": 2.344993829727173, "learning_rate": 1.410683033898543e-05, "loss": 0.4826, "step": 6212 }, { "epoch": 0.7560693641618497, "grad_norm": 0.717507004737854, "learning_rate": 1.4105051660658e-05, "loss": 0.4191, "step": 6213 }, { "epoch": 0.7561910556738668, "grad_norm": 1.4596961736679077, "learning_rate": 1.4103272826124851e-05, "loss": 0.3941, "step": 6214 }, { "epoch": 0.7563127471858838, "grad_norm": 4.0464067459106445, "learning_rate": 1.410149383545368e-05, "loss": 0.5399, "step": 6215 }, { "epoch": 0.7564344386979008, "grad_norm": 1.7963706254959106, "learning_rate": 1.4099714688712176e-05, "loss": 0.4045, "step": 6216 }, { "epoch": 0.7565561302099179, "grad_norm": 0.8785014152526855, "learning_rate": 1.4097935385968047e-05, "loss": 0.4383, "step": 6217 }, { "epoch": 0.7566778217219349, "grad_norm": 0.9378146529197693, "learning_rate": 1.409615592728899e-05, "loss": 0.4987, "step": 6218 }, { "epoch": 0.7567995132339519, "grad_norm": 0.8495815396308899, "learning_rate": 1.4094376312742726e-05, "loss": 0.4569, "step": 6219 }, { "epoch": 0.756921204745969, "grad_norm": 3.7105400562286377, "learning_rate": 1.4092596542396964e-05, "loss": 0.3826, "step": 6220 }, { "epoch": 0.757042896257986, "grad_norm": 0.7114666104316711, "learning_rate": 1.4090816616319436e-05, "loss": 0.4599, "step": 6221 }, { "epoch": 0.7571645877700031, "grad_norm": 0.859508216381073, "learning_rate": 1.4089036534577865e-05, "loss": 0.442, "step": 6222 }, { "epoch": 0.7572862792820201, "grad_norm": 1.110184907913208, "learning_rate": 1.4087256297239992e-05, "loss": 0.4818, "step": 6223 }, { "epoch": 0.7574079707940371, "grad_norm": 0.813687801361084, "learning_rate": 1.4085475904373555e-05, "loss": 0.4465, "step": 6224 }, { "epoch": 0.7575296623060541, "grad_norm": 0.7737751007080078, "learning_rate": 1.4083695356046305e-05, "loss": 0.4602, "step": 6225 }, { "epoch": 0.7576513538180711, "grad_norm": 1.0113240480422974, "learning_rate": 1.4081914652325993e-05, "loss": 0.4702, "step": 6226 }, { "epoch": 0.7577730453300883, "grad_norm": 0.7339016795158386, "learning_rate": 1.408013379328038e-05, "loss": 0.4586, "step": 6227 }, { "epoch": 0.7578947368421053, "grad_norm": 3.585162878036499, "learning_rate": 1.407835277897723e-05, "loss": 0.5134, "step": 6228 }, { "epoch": 0.7580164283541223, "grad_norm": 2.021517515182495, "learning_rate": 1.4076571609484315e-05, "loss": 0.492, "step": 6229 }, { "epoch": 0.7581381198661393, "grad_norm": 0.9600722789764404, "learning_rate": 1.4074790284869413e-05, "loss": 0.4608, "step": 6230 }, { "epoch": 0.7582598113781563, "grad_norm": 0.7058702111244202, "learning_rate": 1.4073008805200305e-05, "loss": 0.5258, "step": 6231 }, { "epoch": 0.7583815028901734, "grad_norm": 1.4849412441253662, "learning_rate": 1.4071227170544785e-05, "loss": 0.4701, "step": 6232 }, { "epoch": 0.7585031944021905, "grad_norm": 2.2401154041290283, "learning_rate": 1.4069445380970642e-05, "loss": 0.4296, "step": 6233 }, { "epoch": 0.7586248859142075, "grad_norm": 2.012572765350342, "learning_rate": 1.4067663436545678e-05, "loss": 0.503, "step": 6234 }, { "epoch": 0.7587465774262245, "grad_norm": 2.148432493209839, "learning_rate": 1.4065881337337702e-05, "loss": 0.4142, "step": 6235 }, { "epoch": 0.7588682689382416, "grad_norm": 2.492414951324463, "learning_rate": 1.4064099083414524e-05, "loss": 0.4166, "step": 6236 }, { "epoch": 0.7589899604502586, "grad_norm": 0.5464257001876831, "learning_rate": 1.4062316674843963e-05, "loss": 0.4455, "step": 6237 }, { "epoch": 0.7591116519622756, "grad_norm": 0.5835963487625122, "learning_rate": 1.4060534111693844e-05, "loss": 0.4577, "step": 6238 }, { "epoch": 0.7592333434742927, "grad_norm": 0.5076159238815308, "learning_rate": 1.4058751394032e-05, "loss": 0.4497, "step": 6239 }, { "epoch": 0.7593550349863097, "grad_norm": 0.6692099571228027, "learning_rate": 1.4056968521926263e-05, "loss": 0.4483, "step": 6240 }, { "epoch": 0.7594767264983268, "grad_norm": 2.1873650550842285, "learning_rate": 1.4055185495444471e-05, "loss": 0.4726, "step": 6241 }, { "epoch": 0.7595984180103438, "grad_norm": 2.5355031490325928, "learning_rate": 1.4053402314654485e-05, "loss": 0.507, "step": 6242 }, { "epoch": 0.7597201095223608, "grad_norm": 0.678715169429779, "learning_rate": 1.4051618979624148e-05, "loss": 0.4156, "step": 6243 }, { "epoch": 0.7598418010343778, "grad_norm": 1.4492064714431763, "learning_rate": 1.4049835490421322e-05, "loss": 0.4665, "step": 6244 }, { "epoch": 0.7599634925463948, "grad_norm": 1.2751160860061646, "learning_rate": 1.4048051847113872e-05, "loss": 0.4664, "step": 6245 }, { "epoch": 0.760085184058412, "grad_norm": 1.7432068586349487, "learning_rate": 1.4046268049769671e-05, "loss": 0.5113, "step": 6246 }, { "epoch": 0.760206875570429, "grad_norm": 1.3685929775238037, "learning_rate": 1.4044484098456596e-05, "loss": 0.4781, "step": 6247 }, { "epoch": 0.760328567082446, "grad_norm": 0.6925094723701477, "learning_rate": 1.404269999324253e-05, "loss": 0.4705, "step": 6248 }, { "epoch": 0.760450258594463, "grad_norm": 2.2157018184661865, "learning_rate": 1.4040915734195362e-05, "loss": 0.5278, "step": 6249 }, { "epoch": 0.76057195010648, "grad_norm": 0.6025604009628296, "learning_rate": 1.4039131321382981e-05, "loss": 0.463, "step": 6250 }, { "epoch": 0.7606936416184971, "grad_norm": 2.4737188816070557, "learning_rate": 1.4037346754873297e-05, "loss": 0.4322, "step": 6251 }, { "epoch": 0.7608153331305142, "grad_norm": 0.7999370694160461, "learning_rate": 1.4035562034734212e-05, "loss": 0.4926, "step": 6252 }, { "epoch": 0.7609370246425312, "grad_norm": 1.7495086193084717, "learning_rate": 1.4033777161033636e-05, "loss": 0.4549, "step": 6253 }, { "epoch": 0.7610587161545482, "grad_norm": 3.3959546089172363, "learning_rate": 1.4031992133839493e-05, "loss": 0.4324, "step": 6254 }, { "epoch": 0.7611804076665653, "grad_norm": 1.3222390413284302, "learning_rate": 1.4030206953219703e-05, "loss": 0.3875, "step": 6255 }, { "epoch": 0.7613020991785823, "grad_norm": 3.336341619491577, "learning_rate": 1.4028421619242195e-05, "loss": 0.518, "step": 6256 }, { "epoch": 0.7614237906905993, "grad_norm": 1.691164493560791, "learning_rate": 1.4026636131974905e-05, "loss": 0.4753, "step": 6257 }, { "epoch": 0.7615454822026164, "grad_norm": 1.8130719661712646, "learning_rate": 1.4024850491485777e-05, "loss": 0.4808, "step": 6258 }, { "epoch": 0.7616671737146334, "grad_norm": 2.3917202949523926, "learning_rate": 1.4023064697842759e-05, "loss": 0.4737, "step": 6259 }, { "epoch": 0.7617888652266505, "grad_norm": 2.974717140197754, "learning_rate": 1.4021278751113798e-05, "loss": 0.5057, "step": 6260 }, { "epoch": 0.7619105567386675, "grad_norm": 0.7627979516983032, "learning_rate": 1.4019492651366857e-05, "loss": 0.452, "step": 6261 }, { "epoch": 0.7620322482506845, "grad_norm": 1.5494791269302368, "learning_rate": 1.4017706398669903e-05, "loss": 0.5055, "step": 6262 }, { "epoch": 0.7621539397627015, "grad_norm": 0.7332980036735535, "learning_rate": 1.4015919993090903e-05, "loss": 0.4968, "step": 6263 }, { "epoch": 0.7622756312747185, "grad_norm": 1.1335598230361938, "learning_rate": 1.4014133434697834e-05, "loss": 0.5001, "step": 6264 }, { "epoch": 0.7623973227867357, "grad_norm": 4.4941911697387695, "learning_rate": 1.4012346723558682e-05, "loss": 0.4083, "step": 6265 }, { "epoch": 0.7625190142987527, "grad_norm": 3.962347984313965, "learning_rate": 1.4010559859741432e-05, "loss": 0.4492, "step": 6266 }, { "epoch": 0.7626407058107697, "grad_norm": 0.9826950430870056, "learning_rate": 1.4008772843314073e-05, "loss": 0.5136, "step": 6267 }, { "epoch": 0.7627623973227867, "grad_norm": 0.7330333590507507, "learning_rate": 1.4006985674344614e-05, "loss": 0.5176, "step": 6268 }, { "epoch": 0.7628840888348037, "grad_norm": 2.1018736362457275, "learning_rate": 1.4005198352901057e-05, "loss": 0.4189, "step": 6269 }, { "epoch": 0.7630057803468208, "grad_norm": 0.92769855260849, "learning_rate": 1.4003410879051409e-05, "loss": 0.4839, "step": 6270 }, { "epoch": 0.7631274718588379, "grad_norm": 1.6458044052124023, "learning_rate": 1.4001623252863692e-05, "loss": 0.4103, "step": 6271 }, { "epoch": 0.7632491633708549, "grad_norm": 1.2086974382400513, "learning_rate": 1.3999835474405927e-05, "loss": 0.402, "step": 6272 }, { "epoch": 0.7633708548828719, "grad_norm": 1.6740870475769043, "learning_rate": 1.3998047543746144e-05, "loss": 0.4497, "step": 6273 }, { "epoch": 0.763492546394889, "grad_norm": 2.8389978408813477, "learning_rate": 1.3996259460952373e-05, "loss": 0.4674, "step": 6274 }, { "epoch": 0.763614237906906, "grad_norm": 3.967487096786499, "learning_rate": 1.399447122609266e-05, "loss": 0.506, "step": 6275 }, { "epoch": 0.763735929418923, "grad_norm": 3.609405755996704, "learning_rate": 1.3992682839235048e-05, "loss": 0.5023, "step": 6276 }, { "epoch": 0.7638576209309401, "grad_norm": 2.6832504272460938, "learning_rate": 1.3990894300447594e-05, "loss": 0.4316, "step": 6277 }, { "epoch": 0.7639793124429571, "grad_norm": 0.5120570659637451, "learning_rate": 1.3989105609798346e-05, "loss": 0.4213, "step": 6278 }, { "epoch": 0.7641010039549742, "grad_norm": 1.5480200052261353, "learning_rate": 1.3987316767355375e-05, "loss": 0.4727, "step": 6279 }, { "epoch": 0.7642226954669912, "grad_norm": 0.5076949596405029, "learning_rate": 1.3985527773186744e-05, "loss": 0.4282, "step": 6280 }, { "epoch": 0.7643443869790082, "grad_norm": 0.569638192653656, "learning_rate": 1.3983738627360536e-05, "loss": 0.4255, "step": 6281 }, { "epoch": 0.7644660784910252, "grad_norm": 2.1035823822021484, "learning_rate": 1.3981949329944828e-05, "loss": 0.4337, "step": 6282 }, { "epoch": 0.7645877700030422, "grad_norm": 1.754557490348816, "learning_rate": 1.3980159881007703e-05, "loss": 0.428, "step": 6283 }, { "epoch": 0.7647094615150594, "grad_norm": 2.9902520179748535, "learning_rate": 1.3978370280617255e-05, "loss": 0.541, "step": 6284 }, { "epoch": 0.7648311530270764, "grad_norm": 0.9860368371009827, "learning_rate": 1.3976580528841588e-05, "loss": 0.4641, "step": 6285 }, { "epoch": 0.7649528445390934, "grad_norm": 1.414223551750183, "learning_rate": 1.3974790625748795e-05, "loss": 0.4627, "step": 6286 }, { "epoch": 0.7650745360511104, "grad_norm": 0.855566680431366, "learning_rate": 1.3973000571406995e-05, "loss": 0.4921, "step": 6287 }, { "epoch": 0.7651962275631274, "grad_norm": 0.7208280563354492, "learning_rate": 1.39712103658843e-05, "loss": 0.4161, "step": 6288 }, { "epoch": 0.7653179190751445, "grad_norm": 1.3667845726013184, "learning_rate": 1.3969420009248829e-05, "loss": 0.4772, "step": 6289 }, { "epoch": 0.7654396105871616, "grad_norm": 1.8280746936798096, "learning_rate": 1.3967629501568709e-05, "loss": 0.4807, "step": 6290 }, { "epoch": 0.7655613020991786, "grad_norm": 0.6588313579559326, "learning_rate": 1.3965838842912076e-05, "loss": 0.5001, "step": 6291 }, { "epoch": 0.7656829936111956, "grad_norm": 1.9605486392974854, "learning_rate": 1.3964048033347066e-05, "loss": 0.4445, "step": 6292 }, { "epoch": 0.7658046851232126, "grad_norm": 2.34366512298584, "learning_rate": 1.3962257072941823e-05, "loss": 0.4059, "step": 6293 }, { "epoch": 0.7659263766352297, "grad_norm": 1.2740964889526367, "learning_rate": 1.3960465961764497e-05, "loss": 0.4971, "step": 6294 }, { "epoch": 0.7660480681472467, "grad_norm": 2.097116470336914, "learning_rate": 1.3958674699883244e-05, "loss": 0.4322, "step": 6295 }, { "epoch": 0.7661697596592638, "grad_norm": 2.9974052906036377, "learning_rate": 1.3956883287366223e-05, "loss": 0.3888, "step": 6296 }, { "epoch": 0.7662914511712808, "grad_norm": 1.7853320837020874, "learning_rate": 1.3955091724281603e-05, "loss": 0.3838, "step": 6297 }, { "epoch": 0.7664131426832979, "grad_norm": 2.2220449447631836, "learning_rate": 1.3953300010697558e-05, "loss": 0.4776, "step": 6298 }, { "epoch": 0.7665348341953149, "grad_norm": 4.222470760345459, "learning_rate": 1.3951508146682265e-05, "loss": 0.5196, "step": 6299 }, { "epoch": 0.7666565257073319, "grad_norm": 2.9106667041778564, "learning_rate": 1.3949716132303905e-05, "loss": 0.4976, "step": 6300 }, { "epoch": 0.7667782172193489, "grad_norm": 0.9559108018875122, "learning_rate": 1.3947923967630673e-05, "loss": 0.4347, "step": 6301 }, { "epoch": 0.766899908731366, "grad_norm": 1.8563309907913208, "learning_rate": 1.3946131652730758e-05, "loss": 0.4669, "step": 6302 }, { "epoch": 0.7670216002433831, "grad_norm": 0.6632311344146729, "learning_rate": 1.3944339187672373e-05, "loss": 0.4586, "step": 6303 }, { "epoch": 0.7671432917554001, "grad_norm": 1.7021671533584595, "learning_rate": 1.3942546572523713e-05, "loss": 0.4099, "step": 6304 }, { "epoch": 0.7672649832674171, "grad_norm": 1.3959519863128662, "learning_rate": 1.3940753807352993e-05, "loss": 0.4419, "step": 6305 }, { "epoch": 0.7673866747794341, "grad_norm": 1.1899783611297607, "learning_rate": 1.3938960892228439e-05, "loss": 0.4721, "step": 6306 }, { "epoch": 0.7675083662914511, "grad_norm": 1.665818452835083, "learning_rate": 1.3937167827218269e-05, "loss": 0.4059, "step": 6307 }, { "epoch": 0.7676300578034682, "grad_norm": 2.4116342067718506, "learning_rate": 1.393537461239071e-05, "loss": 0.4999, "step": 6308 }, { "epoch": 0.7677517493154853, "grad_norm": 1.0609209537506104, "learning_rate": 1.3933581247814003e-05, "loss": 0.471, "step": 6309 }, { "epoch": 0.7678734408275023, "grad_norm": 0.7659875154495239, "learning_rate": 1.3931787733556388e-05, "loss": 0.4423, "step": 6310 }, { "epoch": 0.7679951323395193, "grad_norm": 1.3102385997772217, "learning_rate": 1.3929994069686114e-05, "loss": 0.411, "step": 6311 }, { "epoch": 0.7681168238515363, "grad_norm": 0.6528482437133789, "learning_rate": 1.3928200256271427e-05, "loss": 0.4424, "step": 6312 }, { "epoch": 0.7682385153635534, "grad_norm": 1.4142247438430786, "learning_rate": 1.392640629338059e-05, "loss": 0.4585, "step": 6313 }, { "epoch": 0.7683602068755704, "grad_norm": 2.222587823867798, "learning_rate": 1.3924612181081865e-05, "loss": 0.496, "step": 6314 }, { "epoch": 0.7684818983875875, "grad_norm": 0.6123923063278198, "learning_rate": 1.3922817919443525e-05, "loss": 0.419, "step": 6315 }, { "epoch": 0.7686035898996045, "grad_norm": 1.7828913927078247, "learning_rate": 1.3921023508533844e-05, "loss": 0.4894, "step": 6316 }, { "epoch": 0.7687252814116216, "grad_norm": 1.040553331375122, "learning_rate": 1.3919228948421102e-05, "loss": 0.44, "step": 6317 }, { "epoch": 0.7688469729236386, "grad_norm": 0.8742536902427673, "learning_rate": 1.3917434239173586e-05, "loss": 0.449, "step": 6318 }, { "epoch": 0.7689686644356556, "grad_norm": 0.8310899138450623, "learning_rate": 1.3915639380859589e-05, "loss": 0.4681, "step": 6319 }, { "epoch": 0.7690903559476726, "grad_norm": 0.8623323440551758, "learning_rate": 1.3913844373547407e-05, "loss": 0.4616, "step": 6320 }, { "epoch": 0.7692120474596897, "grad_norm": 2.6715378761291504, "learning_rate": 1.3912049217305346e-05, "loss": 0.4024, "step": 6321 }, { "epoch": 0.7693337389717068, "grad_norm": 0.5722630023956299, "learning_rate": 1.3910253912201717e-05, "loss": 0.4581, "step": 6322 }, { "epoch": 0.7694554304837238, "grad_norm": 0.7350225448608398, "learning_rate": 1.3908458458304833e-05, "loss": 0.459, "step": 6323 }, { "epoch": 0.7695771219957408, "grad_norm": 2.838193655014038, "learning_rate": 1.3906662855683012e-05, "loss": 0.4299, "step": 6324 }, { "epoch": 0.7696988135077578, "grad_norm": 0.8740293383598328, "learning_rate": 1.390486710440458e-05, "loss": 0.4643, "step": 6325 }, { "epoch": 0.7698205050197748, "grad_norm": 1.383274793624878, "learning_rate": 1.3903071204537877e-05, "loss": 0.451, "step": 6326 }, { "epoch": 0.7699421965317919, "grad_norm": 1.5800137519836426, "learning_rate": 1.3901275156151232e-05, "loss": 0.4928, "step": 6327 }, { "epoch": 0.770063888043809, "grad_norm": 0.6011481285095215, "learning_rate": 1.3899478959312995e-05, "loss": 0.4498, "step": 6328 }, { "epoch": 0.770185579555826, "grad_norm": 1.9011449813842773, "learning_rate": 1.3897682614091514e-05, "loss": 0.469, "step": 6329 }, { "epoch": 0.770307271067843, "grad_norm": 1.7227898836135864, "learning_rate": 1.389588612055514e-05, "loss": 0.4258, "step": 6330 }, { "epoch": 0.77042896257986, "grad_norm": 2.1644248962402344, "learning_rate": 1.3894089478772236e-05, "loss": 0.4913, "step": 6331 }, { "epoch": 0.7705506540918771, "grad_norm": 1.188430666923523, "learning_rate": 1.3892292688811162e-05, "loss": 0.4784, "step": 6332 }, { "epoch": 0.7706723456038941, "grad_norm": 1.5333168506622314, "learning_rate": 1.3890495750740299e-05, "loss": 0.4415, "step": 6333 }, { "epoch": 0.7707940371159112, "grad_norm": 4.04938268661499, "learning_rate": 1.388869866462802e-05, "loss": 0.4251, "step": 6334 }, { "epoch": 0.7709157286279282, "grad_norm": 0.6987572312355042, "learning_rate": 1.3886901430542705e-05, "loss": 0.4885, "step": 6335 }, { "epoch": 0.7710374201399453, "grad_norm": 0.6160205602645874, "learning_rate": 1.3885104048552746e-05, "loss": 0.4983, "step": 6336 }, { "epoch": 0.7711591116519623, "grad_norm": 2.124755859375, "learning_rate": 1.3883306518726537e-05, "loss": 0.4489, "step": 6337 }, { "epoch": 0.7712808031639793, "grad_norm": 0.6458317637443542, "learning_rate": 1.3881508841132478e-05, "loss": 0.4841, "step": 6338 }, { "epoch": 0.7714024946759963, "grad_norm": 0.7657617926597595, "learning_rate": 1.3879711015838971e-05, "loss": 0.4808, "step": 6339 }, { "epoch": 0.7715241861880134, "grad_norm": 0.8904387354850769, "learning_rate": 1.3877913042914433e-05, "loss": 0.4857, "step": 6340 }, { "epoch": 0.7716458777000305, "grad_norm": 0.642325758934021, "learning_rate": 1.3876114922427273e-05, "loss": 0.4086, "step": 6341 }, { "epoch": 0.7717675692120475, "grad_norm": 1.4109954833984375, "learning_rate": 1.3874316654445918e-05, "loss": 0.4338, "step": 6342 }, { "epoch": 0.7718892607240645, "grad_norm": 0.6055235266685486, "learning_rate": 1.3872518239038795e-05, "loss": 0.4362, "step": 6343 }, { "epoch": 0.7720109522360815, "grad_norm": 2.2909557819366455, "learning_rate": 1.3870719676274335e-05, "loss": 0.4864, "step": 6344 }, { "epoch": 0.7721326437480985, "grad_norm": 3.124408006668091, "learning_rate": 1.3868920966220984e-05, "loss": 0.5032, "step": 6345 }, { "epoch": 0.7722543352601156, "grad_norm": 2.1755948066711426, "learning_rate": 1.3867122108947182e-05, "loss": 0.4677, "step": 6346 }, { "epoch": 0.7723760267721327, "grad_norm": 0.8197031617164612, "learning_rate": 1.3865323104521378e-05, "loss": 0.4477, "step": 6347 }, { "epoch": 0.7724977182841497, "grad_norm": 0.6858694553375244, "learning_rate": 1.3863523953012026e-05, "loss": 0.4034, "step": 6348 }, { "epoch": 0.7726194097961667, "grad_norm": 0.8114713430404663, "learning_rate": 1.3861724654487593e-05, "loss": 0.4808, "step": 6349 }, { "epoch": 0.7727411013081837, "grad_norm": 2.3523550033569336, "learning_rate": 1.3859925209016543e-05, "loss": 0.4682, "step": 6350 }, { "epoch": 0.7728627928202008, "grad_norm": 1.0293583869934082, "learning_rate": 1.385812561666735e-05, "loss": 0.4593, "step": 6351 }, { "epoch": 0.7729844843322178, "grad_norm": 2.6615138053894043, "learning_rate": 1.3856325877508491e-05, "loss": 0.4122, "step": 6352 }, { "epoch": 0.7731061758442349, "grad_norm": 0.6460965871810913, "learning_rate": 1.3854525991608452e-05, "loss": 0.4832, "step": 6353 }, { "epoch": 0.7732278673562519, "grad_norm": 0.6187189817428589, "learning_rate": 1.3852725959035718e-05, "loss": 0.4675, "step": 6354 }, { "epoch": 0.773349558868269, "grad_norm": 1.2971177101135254, "learning_rate": 1.3850925779858786e-05, "loss": 0.477, "step": 6355 }, { "epoch": 0.773471250380286, "grad_norm": 3.847198486328125, "learning_rate": 1.384912545414616e-05, "loss": 0.4911, "step": 6356 }, { "epoch": 0.773592941892303, "grad_norm": 0.6393918991088867, "learning_rate": 1.3847324981966344e-05, "loss": 0.4697, "step": 6357 }, { "epoch": 0.77371463340432, "grad_norm": 0.7775894999504089, "learning_rate": 1.3845524363387845e-05, "loss": 0.4614, "step": 6358 }, { "epoch": 0.7738363249163371, "grad_norm": 1.077245831489563, "learning_rate": 1.3843723598479185e-05, "loss": 0.4592, "step": 6359 }, { "epoch": 0.7739580164283542, "grad_norm": 0.734381914138794, "learning_rate": 1.3841922687308889e-05, "loss": 0.4915, "step": 6360 }, { "epoch": 0.7740797079403712, "grad_norm": 0.8254387378692627, "learning_rate": 1.3840121629945478e-05, "loss": 0.4889, "step": 6361 }, { "epoch": 0.7742013994523882, "grad_norm": 3.6123549938201904, "learning_rate": 1.3838320426457493e-05, "loss": 0.4165, "step": 6362 }, { "epoch": 0.7743230909644052, "grad_norm": 0.5983392000198364, "learning_rate": 1.383651907691347e-05, "loss": 0.435, "step": 6363 }, { "epoch": 0.7744447824764222, "grad_norm": 1.523672342300415, "learning_rate": 1.3834717581381956e-05, "loss": 0.4771, "step": 6364 }, { "epoch": 0.7745664739884393, "grad_norm": 0.6426738500595093, "learning_rate": 1.38329159399315e-05, "loss": 0.4386, "step": 6365 }, { "epoch": 0.7746881655004564, "grad_norm": 2.6999123096466064, "learning_rate": 1.3831114152630658e-05, "loss": 0.4972, "step": 6366 }, { "epoch": 0.7748098570124734, "grad_norm": 1.011676549911499, "learning_rate": 1.3829312219547995e-05, "loss": 0.455, "step": 6367 }, { "epoch": 0.7749315485244904, "grad_norm": 2.438202142715454, "learning_rate": 1.3827510140752071e-05, "loss": 0.4724, "step": 6368 }, { "epoch": 0.7750532400365074, "grad_norm": 0.7217767238616943, "learning_rate": 1.3825707916311468e-05, "loss": 0.4771, "step": 6369 }, { "epoch": 0.7751749315485245, "grad_norm": 1.510183572769165, "learning_rate": 1.3823905546294758e-05, "loss": 0.4212, "step": 6370 }, { "epoch": 0.7752966230605415, "grad_norm": 2.2356646060943604, "learning_rate": 1.3822103030770531e-05, "loss": 0.4242, "step": 6371 }, { "epoch": 0.7754183145725586, "grad_norm": 0.5703055262565613, "learning_rate": 1.3820300369807367e-05, "loss": 0.483, "step": 6372 }, { "epoch": 0.7755400060845756, "grad_norm": 2.6634860038757324, "learning_rate": 1.3818497563473867e-05, "loss": 0.4255, "step": 6373 }, { "epoch": 0.7756616975965926, "grad_norm": 0.7997645735740662, "learning_rate": 1.3816694611838633e-05, "loss": 0.5027, "step": 6374 }, { "epoch": 0.7757833891086097, "grad_norm": 2.9507253170013428, "learning_rate": 1.381489151497027e-05, "loss": 0.5292, "step": 6375 }, { "epoch": 0.7759050806206267, "grad_norm": 0.6053027510643005, "learning_rate": 1.3813088272937387e-05, "loss": 0.4847, "step": 6376 }, { "epoch": 0.7760267721326437, "grad_norm": 0.9724107980728149, "learning_rate": 1.38112848858086e-05, "loss": 0.4819, "step": 6377 }, { "epoch": 0.7761484636446608, "grad_norm": 0.6126247644424438, "learning_rate": 1.3809481353652536e-05, "loss": 0.4758, "step": 6378 }, { "epoch": 0.7762701551566779, "grad_norm": 2.095560312271118, "learning_rate": 1.3807677676537823e-05, "loss": 0.4395, "step": 6379 }, { "epoch": 0.7763918466686949, "grad_norm": 0.6928943991661072, "learning_rate": 1.3805873854533093e-05, "loss": 0.4961, "step": 6380 }, { "epoch": 0.7765135381807119, "grad_norm": 1.153184413909912, "learning_rate": 1.380406988770698e-05, "loss": 0.4788, "step": 6381 }, { "epoch": 0.7766352296927289, "grad_norm": 1.0534050464630127, "learning_rate": 1.3802265776128139e-05, "loss": 0.4809, "step": 6382 }, { "epoch": 0.7767569212047459, "grad_norm": 1.61732816696167, "learning_rate": 1.3800461519865214e-05, "loss": 0.469, "step": 6383 }, { "epoch": 0.776878612716763, "grad_norm": 0.7235574126243591, "learning_rate": 1.379865711898686e-05, "loss": 0.4702, "step": 6384 }, { "epoch": 0.7770003042287801, "grad_norm": 1.3811155557632446, "learning_rate": 1.379685257356174e-05, "loss": 0.4118, "step": 6385 }, { "epoch": 0.7771219957407971, "grad_norm": 0.6938037276268005, "learning_rate": 1.3795047883658523e-05, "loss": 0.448, "step": 6386 }, { "epoch": 0.7772436872528141, "grad_norm": 1.8938543796539307, "learning_rate": 1.379324304934588e-05, "loss": 0.4431, "step": 6387 }, { "epoch": 0.7773653787648311, "grad_norm": 4.036731243133545, "learning_rate": 1.3791438070692483e-05, "loss": 0.5117, "step": 6388 }, { "epoch": 0.7774870702768482, "grad_norm": 0.718258261680603, "learning_rate": 1.378963294776702e-05, "loss": 0.3954, "step": 6389 }, { "epoch": 0.7776087617888652, "grad_norm": 2.801138401031494, "learning_rate": 1.3787827680638181e-05, "loss": 0.5315, "step": 6390 }, { "epoch": 0.7777304533008823, "grad_norm": 3.210613965988159, "learning_rate": 1.3786022269374656e-05, "loss": 0.5369, "step": 6391 }, { "epoch": 0.7778521448128993, "grad_norm": 0.7968564629554749, "learning_rate": 1.378421671404515e-05, "loss": 0.4276, "step": 6392 }, { "epoch": 0.7779738363249163, "grad_norm": 1.0648247003555298, "learning_rate": 1.3782411014718363e-05, "loss": 0.4272, "step": 6393 }, { "epoch": 0.7780955278369334, "grad_norm": 2.704493284225464, "learning_rate": 1.3780605171463011e-05, "loss": 0.5124, "step": 6394 }, { "epoch": 0.7782172193489504, "grad_norm": 2.9222958087921143, "learning_rate": 1.3778799184347802e-05, "loss": 0.4211, "step": 6395 }, { "epoch": 0.7783389108609674, "grad_norm": 0.6378592848777771, "learning_rate": 1.3776993053441463e-05, "loss": 0.4479, "step": 6396 }, { "epoch": 0.7784606023729845, "grad_norm": 2.8318779468536377, "learning_rate": 1.3775186778812724e-05, "loss": 0.4044, "step": 6397 }, { "epoch": 0.7785822938850016, "grad_norm": 2.004495620727539, "learning_rate": 1.3773380360530312e-05, "loss": 0.5328, "step": 6398 }, { "epoch": 0.7787039853970186, "grad_norm": 1.049403190612793, "learning_rate": 1.3771573798662966e-05, "loss": 0.4416, "step": 6399 }, { "epoch": 0.7788256769090356, "grad_norm": 1.4881782531738281, "learning_rate": 1.3769767093279432e-05, "loss": 0.5014, "step": 6400 }, { "epoch": 0.7789473684210526, "grad_norm": 1.335480809211731, "learning_rate": 1.3767960244448457e-05, "loss": 0.4849, "step": 6401 }, { "epoch": 0.7790690599330696, "grad_norm": 0.7853442430496216, "learning_rate": 1.3766153252238794e-05, "loss": 0.5364, "step": 6402 }, { "epoch": 0.7791907514450868, "grad_norm": 2.952008008956909, "learning_rate": 1.3764346116719204e-05, "loss": 0.4707, "step": 6403 }, { "epoch": 0.7793124429571038, "grad_norm": 1.7441281080245972, "learning_rate": 1.3762538837958454e-05, "loss": 0.4456, "step": 6404 }, { "epoch": 0.7794341344691208, "grad_norm": 3.066791296005249, "learning_rate": 1.3760731416025314e-05, "loss": 0.4796, "step": 6405 }, { "epoch": 0.7795558259811378, "grad_norm": 1.7521189451217651, "learning_rate": 1.3758923850988556e-05, "loss": 0.4341, "step": 6406 }, { "epoch": 0.7796775174931548, "grad_norm": 1.0489593744277954, "learning_rate": 1.3757116142916967e-05, "loss": 0.489, "step": 6407 }, { "epoch": 0.7797992090051719, "grad_norm": 0.6687289476394653, "learning_rate": 1.375530829187933e-05, "loss": 0.4091, "step": 6408 }, { "epoch": 0.7799209005171889, "grad_norm": 4.361695289611816, "learning_rate": 1.3753500297944444e-05, "loss": 0.5757, "step": 6409 }, { "epoch": 0.780042592029206, "grad_norm": 1.25162672996521, "learning_rate": 1.3751692161181099e-05, "loss": 0.4427, "step": 6410 }, { "epoch": 0.780164283541223, "grad_norm": 3.646221160888672, "learning_rate": 1.3749883881658101e-05, "loss": 0.5428, "step": 6411 }, { "epoch": 0.78028597505324, "grad_norm": 2.618098735809326, "learning_rate": 1.3748075459444258e-05, "loss": 0.4906, "step": 6412 }, { "epoch": 0.7804076665652571, "grad_norm": 2.6059303283691406, "learning_rate": 1.374626689460839e-05, "loss": 0.4919, "step": 6413 }, { "epoch": 0.7805293580772741, "grad_norm": 0.7968857288360596, "learning_rate": 1.3744458187219312e-05, "loss": 0.4373, "step": 6414 }, { "epoch": 0.7806510495892911, "grad_norm": 1.2142266035079956, "learning_rate": 1.3742649337345849e-05, "loss": 0.4482, "step": 6415 }, { "epoch": 0.7807727411013082, "grad_norm": 0.8644128441810608, "learning_rate": 1.374084034505683e-05, "loss": 0.476, "step": 6416 }, { "epoch": 0.7808944326133253, "grad_norm": 1.1000736951828003, "learning_rate": 1.3739031210421094e-05, "loss": 0.5062, "step": 6417 }, { "epoch": 0.7810161241253423, "grad_norm": 3.9336023330688477, "learning_rate": 1.3737221933507481e-05, "loss": 0.4391, "step": 6418 }, { "epoch": 0.7811378156373593, "grad_norm": 1.4541451930999756, "learning_rate": 1.3735412514384837e-05, "loss": 0.4629, "step": 6419 }, { "epoch": 0.7812595071493763, "grad_norm": 2.3276968002319336, "learning_rate": 1.3733602953122014e-05, "loss": 0.4614, "step": 6420 }, { "epoch": 0.7813811986613933, "grad_norm": 0.907979428768158, "learning_rate": 1.3731793249787875e-05, "loss": 0.4683, "step": 6421 }, { "epoch": 0.7815028901734105, "grad_norm": 3.948151111602783, "learning_rate": 1.3729983404451272e-05, "loss": 0.4201, "step": 6422 }, { "epoch": 0.7816245816854275, "grad_norm": 0.6311213374137878, "learning_rate": 1.3728173417181085e-05, "loss": 0.4654, "step": 6423 }, { "epoch": 0.7817462731974445, "grad_norm": 1.8003267049789429, "learning_rate": 1.3726363288046181e-05, "loss": 0.4549, "step": 6424 }, { "epoch": 0.7818679647094615, "grad_norm": 1.109677791595459, "learning_rate": 1.3724553017115438e-05, "loss": 0.4337, "step": 6425 }, { "epoch": 0.7819896562214785, "grad_norm": 4.777871608734131, "learning_rate": 1.3722742604457747e-05, "loss": 0.5414, "step": 6426 }, { "epoch": 0.7821113477334956, "grad_norm": 1.626053810119629, "learning_rate": 1.3720932050141996e-05, "loss": 0.4728, "step": 6427 }, { "epoch": 0.7822330392455126, "grad_norm": 1.4872353076934814, "learning_rate": 1.3719121354237074e-05, "loss": 0.4589, "step": 6428 }, { "epoch": 0.7823547307575297, "grad_norm": 3.58205246925354, "learning_rate": 1.3717310516811888e-05, "loss": 0.5007, "step": 6429 }, { "epoch": 0.7824764222695467, "grad_norm": 0.6488206386566162, "learning_rate": 1.3715499537935341e-05, "loss": 0.4383, "step": 6430 }, { "epoch": 0.7825981137815637, "grad_norm": 0.6844986081123352, "learning_rate": 1.371368841767635e-05, "loss": 0.4375, "step": 6431 }, { "epoch": 0.7827198052935808, "grad_norm": 1.920110821723938, "learning_rate": 1.3711877156103826e-05, "loss": 0.4727, "step": 6432 }, { "epoch": 0.7828414968055978, "grad_norm": 0.9400794506072998, "learning_rate": 1.371006575328669e-05, "loss": 0.5225, "step": 6433 }, { "epoch": 0.7829631883176148, "grad_norm": 3.376373767852783, "learning_rate": 1.3708254209293874e-05, "loss": 0.4412, "step": 6434 }, { "epoch": 0.7830848798296319, "grad_norm": 0.8922308683395386, "learning_rate": 1.3706442524194313e-05, "loss": 0.4354, "step": 6435 }, { "epoch": 0.783206571341649, "grad_norm": 2.258458137512207, "learning_rate": 1.3704630698056938e-05, "loss": 0.4349, "step": 6436 }, { "epoch": 0.783328262853666, "grad_norm": 1.4515469074249268, "learning_rate": 1.3702818730950698e-05, "loss": 0.4934, "step": 6437 }, { "epoch": 0.783449954365683, "grad_norm": 1.3646801710128784, "learning_rate": 1.3701006622944541e-05, "loss": 0.4672, "step": 6438 }, { "epoch": 0.7835716458777, "grad_norm": 0.8932005763053894, "learning_rate": 1.3699194374107423e-05, "loss": 0.4127, "step": 6439 }, { "epoch": 0.783693337389717, "grad_norm": 0.6561334133148193, "learning_rate": 1.36973819845083e-05, "loss": 0.4465, "step": 6440 }, { "epoch": 0.7838150289017342, "grad_norm": 1.804722785949707, "learning_rate": 1.3695569454216138e-05, "loss": 0.3787, "step": 6441 }, { "epoch": 0.7839367204137512, "grad_norm": 5.419088840484619, "learning_rate": 1.3693756783299912e-05, "loss": 0.5702, "step": 6442 }, { "epoch": 0.7840584119257682, "grad_norm": 1.484358787536621, "learning_rate": 1.3691943971828594e-05, "loss": 0.4366, "step": 6443 }, { "epoch": 0.7841801034377852, "grad_norm": 1.8780895471572876, "learning_rate": 1.3690131019871167e-05, "loss": 0.4635, "step": 6444 }, { "epoch": 0.7843017949498022, "grad_norm": 0.8616920113563538, "learning_rate": 1.3688317927496612e-05, "loss": 0.398, "step": 6445 }, { "epoch": 0.7844234864618193, "grad_norm": 3.895115375518799, "learning_rate": 1.368650469477393e-05, "loss": 0.5957, "step": 6446 }, { "epoch": 0.7845451779738363, "grad_norm": 1.2453265190124512, "learning_rate": 1.3684691321772113e-05, "loss": 0.4492, "step": 6447 }, { "epoch": 0.7846668694858534, "grad_norm": 1.2877627611160278, "learning_rate": 1.3682877808560161e-05, "loss": 0.3868, "step": 6448 }, { "epoch": 0.7847885609978704, "grad_norm": 0.6936625242233276, "learning_rate": 1.3681064155207088e-05, "loss": 0.4509, "step": 6449 }, { "epoch": 0.7849102525098874, "grad_norm": 0.8250743746757507, "learning_rate": 1.3679250361781907e-05, "loss": 0.4356, "step": 6450 }, { "epoch": 0.7850319440219045, "grad_norm": 0.7703726291656494, "learning_rate": 1.3677436428353632e-05, "loss": 0.4986, "step": 6451 }, { "epoch": 0.7851536355339215, "grad_norm": 1.0868650674819946, "learning_rate": 1.3675622354991286e-05, "loss": 0.4504, "step": 6452 }, { "epoch": 0.7852753270459385, "grad_norm": 1.223739504814148, "learning_rate": 1.3673808141763904e-05, "loss": 0.4237, "step": 6453 }, { "epoch": 0.7853970185579556, "grad_norm": 2.5764353275299072, "learning_rate": 1.3671993788740519e-05, "loss": 0.4736, "step": 6454 }, { "epoch": 0.7855187100699726, "grad_norm": 0.646618664264679, "learning_rate": 1.3670179295990169e-05, "loss": 0.4239, "step": 6455 }, { "epoch": 0.7856404015819897, "grad_norm": 3.6138246059417725, "learning_rate": 1.3668364663581899e-05, "loss": 0.3765, "step": 6456 }, { "epoch": 0.7857620930940067, "grad_norm": 1.7653045654296875, "learning_rate": 1.3666549891584759e-05, "loss": 0.5194, "step": 6457 }, { "epoch": 0.7858837846060237, "grad_norm": 1.4856643676757812, "learning_rate": 1.366473498006781e-05, "loss": 0.4613, "step": 6458 }, { "epoch": 0.7860054761180407, "grad_norm": 2.887765407562256, "learning_rate": 1.3662919929100107e-05, "loss": 0.3844, "step": 6459 }, { "epoch": 0.7861271676300579, "grad_norm": 0.6231181621551514, "learning_rate": 1.3661104738750716e-05, "loss": 0.4294, "step": 6460 }, { "epoch": 0.7862488591420749, "grad_norm": 1.4189949035644531, "learning_rate": 1.3659289409088719e-05, "loss": 0.4769, "step": 6461 }, { "epoch": 0.7863705506540919, "grad_norm": 3.7589845657348633, "learning_rate": 1.3657473940183182e-05, "loss": 0.3839, "step": 6462 }, { "epoch": 0.7864922421661089, "grad_norm": 2.8911733627319336, "learning_rate": 1.365565833210319e-05, "loss": 0.5308, "step": 6463 }, { "epoch": 0.7866139336781259, "grad_norm": 1.43215811252594, "learning_rate": 1.3653842584917832e-05, "loss": 0.4179, "step": 6464 }, { "epoch": 0.786735625190143, "grad_norm": 2.66440749168396, "learning_rate": 1.3652026698696201e-05, "loss": 0.4608, "step": 6465 }, { "epoch": 0.78685731670216, "grad_norm": 1.143523931503296, "learning_rate": 1.3650210673507393e-05, "loss": 0.3864, "step": 6466 }, { "epoch": 0.7869790082141771, "grad_norm": 0.8716341257095337, "learning_rate": 1.3648394509420517e-05, "loss": 0.3965, "step": 6467 }, { "epoch": 0.7871006997261941, "grad_norm": 2.1278419494628906, "learning_rate": 1.3646578206504675e-05, "loss": 0.4714, "step": 6468 }, { "epoch": 0.7872223912382111, "grad_norm": 2.4792592525482178, "learning_rate": 1.3644761764828986e-05, "loss": 0.4715, "step": 6469 }, { "epoch": 0.7873440827502282, "grad_norm": 3.393587112426758, "learning_rate": 1.3642945184462569e-05, "loss": 0.4737, "step": 6470 }, { "epoch": 0.7874657742622452, "grad_norm": 1.6722385883331299, "learning_rate": 1.3641128465474543e-05, "loss": 0.4715, "step": 6471 }, { "epoch": 0.7875874657742622, "grad_norm": 0.9631937146186829, "learning_rate": 1.3639311607934042e-05, "loss": 0.4704, "step": 6472 }, { "epoch": 0.7877091572862793, "grad_norm": 2.8960633277893066, "learning_rate": 1.363749461191021e-05, "loss": 0.4971, "step": 6473 }, { "epoch": 0.7878308487982963, "grad_norm": 1.4005905389785767, "learning_rate": 1.3635677477472168e-05, "loss": 0.429, "step": 6474 }, { "epoch": 0.7879525403103134, "grad_norm": 3.2016022205352783, "learning_rate": 1.3633860204689078e-05, "loss": 0.4018, "step": 6475 }, { "epoch": 0.7880742318223304, "grad_norm": 0.6304191946983337, "learning_rate": 1.3632042793630086e-05, "loss": 0.4765, "step": 6476 }, { "epoch": 0.7881959233343474, "grad_norm": 3.028447151184082, "learning_rate": 1.3630225244364345e-05, "loss": 0.417, "step": 6477 }, { "epoch": 0.7883176148463644, "grad_norm": 1.7520768642425537, "learning_rate": 1.3628407556961023e-05, "loss": 0.4991, "step": 6478 }, { "epoch": 0.7884393063583816, "grad_norm": 5.242979049682617, "learning_rate": 1.3626589731489279e-05, "loss": 0.3984, "step": 6479 }, { "epoch": 0.7885609978703986, "grad_norm": 1.7049250602722168, "learning_rate": 1.362477176801829e-05, "loss": 0.4551, "step": 6480 }, { "epoch": 0.7886826893824156, "grad_norm": 1.8115218877792358, "learning_rate": 1.3622953666617236e-05, "loss": 0.5037, "step": 6481 }, { "epoch": 0.7888043808944326, "grad_norm": 0.8826643824577332, "learning_rate": 1.3621135427355293e-05, "loss": 0.5147, "step": 6482 }, { "epoch": 0.7889260724064496, "grad_norm": 1.7377963066101074, "learning_rate": 1.361931705030165e-05, "loss": 0.4976, "step": 6483 }, { "epoch": 0.7890477639184666, "grad_norm": 2.7146713733673096, "learning_rate": 1.3617498535525504e-05, "loss": 0.5104, "step": 6484 }, { "epoch": 0.7891694554304837, "grad_norm": 1.4817066192626953, "learning_rate": 1.3615679883096051e-05, "loss": 0.4647, "step": 6485 }, { "epoch": 0.7892911469425008, "grad_norm": 1.098913311958313, "learning_rate": 1.3613861093082494e-05, "loss": 0.4601, "step": 6486 }, { "epoch": 0.7894128384545178, "grad_norm": 2.1476383209228516, "learning_rate": 1.361204216555404e-05, "loss": 0.5228, "step": 6487 }, { "epoch": 0.7895345299665348, "grad_norm": 0.5649075508117676, "learning_rate": 1.361022310057991e-05, "loss": 0.4664, "step": 6488 }, { "epoch": 0.7896562214785519, "grad_norm": 2.738413095474243, "learning_rate": 1.3608403898229314e-05, "loss": 0.4699, "step": 6489 }, { "epoch": 0.7897779129905689, "grad_norm": 1.8481881618499756, "learning_rate": 1.3606584558571483e-05, "loss": 0.4628, "step": 6490 }, { "epoch": 0.7898996045025859, "grad_norm": 1.9711811542510986, "learning_rate": 1.3604765081675641e-05, "loss": 0.449, "step": 6491 }, { "epoch": 0.790021296014603, "grad_norm": 1.1668801307678223, "learning_rate": 1.3602945467611031e-05, "loss": 0.4673, "step": 6492 }, { "epoch": 0.79014298752662, "grad_norm": 2.104055166244507, "learning_rate": 1.3601125716446885e-05, "loss": 0.4204, "step": 6493 }, { "epoch": 0.7902646790386371, "grad_norm": 1.897060751914978, "learning_rate": 1.3599305828252452e-05, "loss": 0.4173, "step": 6494 }, { "epoch": 0.7903863705506541, "grad_norm": 0.6285289525985718, "learning_rate": 1.3597485803096983e-05, "loss": 0.4019, "step": 6495 }, { "epoch": 0.7905080620626711, "grad_norm": 5.3628339767456055, "learning_rate": 1.3595665641049734e-05, "loss": 0.5362, "step": 6496 }, { "epoch": 0.7906297535746881, "grad_norm": 3.2266054153442383, "learning_rate": 1.359384534217996e-05, "loss": 0.4644, "step": 6497 }, { "epoch": 0.7907514450867053, "grad_norm": 4.081913471221924, "learning_rate": 1.3592024906556936e-05, "loss": 0.4825, "step": 6498 }, { "epoch": 0.7908731365987223, "grad_norm": 2.8375980854034424, "learning_rate": 1.3590204334249928e-05, "loss": 0.5015, "step": 6499 }, { "epoch": 0.7909948281107393, "grad_norm": 1.9742200374603271, "learning_rate": 1.3588383625328215e-05, "loss": 0.4875, "step": 6500 }, { "epoch": 0.7911165196227563, "grad_norm": 0.7385501861572266, "learning_rate": 1.3586562779861077e-05, "loss": 0.4159, "step": 6501 }, { "epoch": 0.7912382111347733, "grad_norm": 1.6306407451629639, "learning_rate": 1.35847417979178e-05, "loss": 0.3884, "step": 6502 }, { "epoch": 0.7913599026467903, "grad_norm": 0.6484414935112, "learning_rate": 1.3582920679567679e-05, "loss": 0.3928, "step": 6503 }, { "epoch": 0.7914815941588075, "grad_norm": 1.0021167993545532, "learning_rate": 1.3581099424880009e-05, "loss": 0.3776, "step": 6504 }, { "epoch": 0.7916032856708245, "grad_norm": 1.3124027252197266, "learning_rate": 1.3579278033924093e-05, "loss": 0.4218, "step": 6505 }, { "epoch": 0.7917249771828415, "grad_norm": 1.2102559804916382, "learning_rate": 1.3577456506769238e-05, "loss": 0.3956, "step": 6506 }, { "epoch": 0.7918466686948585, "grad_norm": 0.8479447960853577, "learning_rate": 1.3575634843484764e-05, "loss": 0.4437, "step": 6507 }, { "epoch": 0.7919683602068756, "grad_norm": 1.2440087795257568, "learning_rate": 1.3573813044139975e-05, "loss": 0.4555, "step": 6508 }, { "epoch": 0.7920900517188926, "grad_norm": 2.2641000747680664, "learning_rate": 1.3571991108804208e-05, "loss": 0.4961, "step": 6509 }, { "epoch": 0.7922117432309096, "grad_norm": 2.6826422214508057, "learning_rate": 1.3570169037546781e-05, "loss": 0.4992, "step": 6510 }, { "epoch": 0.7923334347429267, "grad_norm": 1.5702519416809082, "learning_rate": 1.3568346830437039e-05, "loss": 0.4136, "step": 6511 }, { "epoch": 0.7924551262549437, "grad_norm": 1.8496413230895996, "learning_rate": 1.3566524487544308e-05, "loss": 0.4888, "step": 6512 }, { "epoch": 0.7925768177669608, "grad_norm": 1.3136520385742188, "learning_rate": 1.356470200893794e-05, "loss": 0.4155, "step": 6513 }, { "epoch": 0.7926985092789778, "grad_norm": 1.2406553030014038, "learning_rate": 1.3562879394687283e-05, "loss": 0.4158, "step": 6514 }, { "epoch": 0.7928202007909948, "grad_norm": 2.73810076713562, "learning_rate": 1.356105664486169e-05, "loss": 0.3995, "step": 6515 }, { "epoch": 0.7929418923030118, "grad_norm": 4.002604007720947, "learning_rate": 1.355923375953052e-05, "loss": 0.5193, "step": 6516 }, { "epoch": 0.793063583815029, "grad_norm": 2.522327184677124, "learning_rate": 1.3557410738763137e-05, "loss": 0.5068, "step": 6517 }, { "epoch": 0.793185275327046, "grad_norm": 3.4677863121032715, "learning_rate": 1.3555587582628913e-05, "loss": 0.5471, "step": 6518 }, { "epoch": 0.793306966839063, "grad_norm": 1.7428756952285767, "learning_rate": 1.3553764291197224e-05, "loss": 0.4736, "step": 6519 }, { "epoch": 0.79342865835108, "grad_norm": 0.9846490025520325, "learning_rate": 1.3551940864537445e-05, "loss": 0.4776, "step": 6520 }, { "epoch": 0.793550349863097, "grad_norm": 1.8506323099136353, "learning_rate": 1.3550117302718964e-05, "loss": 0.4826, "step": 6521 }, { "epoch": 0.793672041375114, "grad_norm": 4.4684739112854, "learning_rate": 1.3548293605811174e-05, "loss": 0.4113, "step": 6522 }, { "epoch": 0.7937937328871312, "grad_norm": 0.8352494835853577, "learning_rate": 1.3546469773883463e-05, "loss": 0.4626, "step": 6523 }, { "epoch": 0.7939154243991482, "grad_norm": 1.014554500579834, "learning_rate": 1.354464580700524e-05, "loss": 0.4993, "step": 6524 }, { "epoch": 0.7940371159111652, "grad_norm": 2.263448476791382, "learning_rate": 1.3542821705245908e-05, "loss": 0.4553, "step": 6525 }, { "epoch": 0.7941588074231822, "grad_norm": 5.160703659057617, "learning_rate": 1.3540997468674876e-05, "loss": 0.3738, "step": 6526 }, { "epoch": 0.7942804989351993, "grad_norm": 2.4914934635162354, "learning_rate": 1.353917309736156e-05, "loss": 0.4812, "step": 6527 }, { "epoch": 0.7944021904472163, "grad_norm": 3.265282392501831, "learning_rate": 1.353734859137538e-05, "loss": 0.3995, "step": 6528 }, { "epoch": 0.7945238819592333, "grad_norm": 0.5951906442642212, "learning_rate": 1.3535523950785768e-05, "loss": 0.4473, "step": 6529 }, { "epoch": 0.7946455734712504, "grad_norm": 3.6978344917297363, "learning_rate": 1.3533699175662149e-05, "loss": 0.481, "step": 6530 }, { "epoch": 0.7947672649832674, "grad_norm": 3.979156017303467, "learning_rate": 1.3531874266073963e-05, "loss": 0.5491, "step": 6531 }, { "epoch": 0.7948889564952845, "grad_norm": 0.5644336342811584, "learning_rate": 1.353004922209065e-05, "loss": 0.421, "step": 6532 }, { "epoch": 0.7950106480073015, "grad_norm": 1.8630427122116089, "learning_rate": 1.3528224043781659e-05, "loss": 0.4421, "step": 6533 }, { "epoch": 0.7951323395193185, "grad_norm": 1.3852897882461548, "learning_rate": 1.3526398731216438e-05, "loss": 0.4247, "step": 6534 }, { "epoch": 0.7952540310313355, "grad_norm": 0.5969976186752319, "learning_rate": 1.3524573284464444e-05, "loss": 0.4231, "step": 6535 }, { "epoch": 0.7953757225433526, "grad_norm": 1.3499457836151123, "learning_rate": 1.3522747703595145e-05, "loss": 0.3978, "step": 6536 }, { "epoch": 0.7954974140553697, "grad_norm": 1.8621306419372559, "learning_rate": 1.3520921988678003e-05, "loss": 0.4872, "step": 6537 }, { "epoch": 0.7956191055673867, "grad_norm": 1.6049827337265015, "learning_rate": 1.3519096139782493e-05, "loss": 0.4328, "step": 6538 }, { "epoch": 0.7957407970794037, "grad_norm": 0.6554694771766663, "learning_rate": 1.3517270156978088e-05, "loss": 0.4753, "step": 6539 }, { "epoch": 0.7958624885914207, "grad_norm": 0.6777941584587097, "learning_rate": 1.3515444040334274e-05, "loss": 0.4845, "step": 6540 }, { "epoch": 0.7959841801034377, "grad_norm": 0.7399910092353821, "learning_rate": 1.3513617789920538e-05, "loss": 0.4673, "step": 6541 }, { "epoch": 0.7961058716154549, "grad_norm": 2.4419808387756348, "learning_rate": 1.3511791405806374e-05, "loss": 0.4347, "step": 6542 }, { "epoch": 0.7962275631274719, "grad_norm": 0.7896532416343689, "learning_rate": 1.3509964888061276e-05, "loss": 0.4232, "step": 6543 }, { "epoch": 0.7963492546394889, "grad_norm": 0.5458998680114746, "learning_rate": 1.3508138236754746e-05, "loss": 0.4167, "step": 6544 }, { "epoch": 0.7964709461515059, "grad_norm": 1.0785274505615234, "learning_rate": 1.35063114519563e-05, "loss": 0.4212, "step": 6545 }, { "epoch": 0.796592637663523, "grad_norm": 1.268502950668335, "learning_rate": 1.3504484533735442e-05, "loss": 0.4492, "step": 6546 }, { "epoch": 0.79671432917554, "grad_norm": 2.303605079650879, "learning_rate": 1.3502657482161699e-05, "loss": 0.4725, "step": 6547 }, { "epoch": 0.796836020687557, "grad_norm": 5.79015588760376, "learning_rate": 1.3500830297304582e-05, "loss": 0.5871, "step": 6548 }, { "epoch": 0.7969577121995741, "grad_norm": 4.665104389190674, "learning_rate": 1.349900297923363e-05, "loss": 0.5182, "step": 6549 }, { "epoch": 0.7970794037115911, "grad_norm": 2.7406017780303955, "learning_rate": 1.3497175528018369e-05, "loss": 0.4544, "step": 6550 }, { "epoch": 0.7972010952236082, "grad_norm": 0.5910564064979553, "learning_rate": 1.3495347943728341e-05, "loss": 0.3936, "step": 6551 }, { "epoch": 0.7973227867356252, "grad_norm": 1.666337013244629, "learning_rate": 1.3493520226433093e-05, "loss": 0.4549, "step": 6552 }, { "epoch": 0.7974444782476422, "grad_norm": 1.374701738357544, "learning_rate": 1.3491692376202165e-05, "loss": 0.5081, "step": 6553 }, { "epoch": 0.7975661697596592, "grad_norm": 0.7142630815505981, "learning_rate": 1.3489864393105117e-05, "loss": 0.4929, "step": 6554 }, { "epoch": 0.7976878612716763, "grad_norm": 2.436876058578491, "learning_rate": 1.3488036277211502e-05, "loss": 0.4503, "step": 6555 }, { "epoch": 0.7978095527836934, "grad_norm": 2.3436686992645264, "learning_rate": 1.348620802859089e-05, "loss": 0.4649, "step": 6556 }, { "epoch": 0.7979312442957104, "grad_norm": 0.8214684724807739, "learning_rate": 1.3484379647312844e-05, "loss": 0.5103, "step": 6557 }, { "epoch": 0.7980529358077274, "grad_norm": 2.7678382396698, "learning_rate": 1.3482551133446942e-05, "loss": 0.4188, "step": 6558 }, { "epoch": 0.7981746273197444, "grad_norm": 2.1724841594696045, "learning_rate": 1.3480722487062759e-05, "loss": 0.4948, "step": 6559 }, { "epoch": 0.7982963188317614, "grad_norm": 1.8633304834365845, "learning_rate": 1.3478893708229883e-05, "loss": 0.4662, "step": 6560 }, { "epoch": 0.7984180103437786, "grad_norm": 0.9057595133781433, "learning_rate": 1.34770647970179e-05, "loss": 0.4964, "step": 6561 }, { "epoch": 0.7985397018557956, "grad_norm": 0.6968687176704407, "learning_rate": 1.3475235753496403e-05, "loss": 0.4332, "step": 6562 }, { "epoch": 0.7986613933678126, "grad_norm": 2.182694673538208, "learning_rate": 1.3473406577734993e-05, "loss": 0.5144, "step": 6563 }, { "epoch": 0.7987830848798296, "grad_norm": 0.9346823692321777, "learning_rate": 1.3471577269803274e-05, "loss": 0.4359, "step": 6564 }, { "epoch": 0.7989047763918466, "grad_norm": 2.3150224685668945, "learning_rate": 1.3469747829770854e-05, "loss": 0.482, "step": 6565 }, { "epoch": 0.7990264679038637, "grad_norm": 1.5315240621566772, "learning_rate": 1.3467918257707344e-05, "loss": 0.4033, "step": 6566 }, { "epoch": 0.7991481594158807, "grad_norm": 1.580141305923462, "learning_rate": 1.346608855368237e-05, "loss": 0.4982, "step": 6567 }, { "epoch": 0.7992698509278978, "grad_norm": 4.9104228019714355, "learning_rate": 1.3464258717765551e-05, "loss": 0.5695, "step": 6568 }, { "epoch": 0.7993915424399148, "grad_norm": 0.6425305604934692, "learning_rate": 1.3462428750026514e-05, "loss": 0.4191, "step": 6569 }, { "epoch": 0.7995132339519319, "grad_norm": 0.6832162737846375, "learning_rate": 1.3460598650534902e-05, "loss": 0.4127, "step": 6570 }, { "epoch": 0.7996349254639489, "grad_norm": 0.6767962574958801, "learning_rate": 1.3458768419360344e-05, "loss": 0.4449, "step": 6571 }, { "epoch": 0.7997566169759659, "grad_norm": 4.664670944213867, "learning_rate": 1.3456938056572489e-05, "loss": 0.5902, "step": 6572 }, { "epoch": 0.7998783084879829, "grad_norm": 0.5448784828186035, "learning_rate": 1.3455107562240985e-05, "loss": 0.408, "step": 6573 }, { "epoch": 0.8, "grad_norm": 2.1333236694335938, "learning_rate": 1.3453276936435486e-05, "loss": 0.4352, "step": 6574 }, { "epoch": 0.8001216915120171, "grad_norm": 2.676685333251953, "learning_rate": 1.3451446179225655e-05, "loss": 0.4436, "step": 6575 }, { "epoch": 0.8002433830240341, "grad_norm": 2.743525981903076, "learning_rate": 1.3449615290681154e-05, "loss": 0.4025, "step": 6576 }, { "epoch": 0.8003650745360511, "grad_norm": 1.2232776880264282, "learning_rate": 1.3447784270871646e-05, "loss": 0.4367, "step": 6577 }, { "epoch": 0.8004867660480681, "grad_norm": 0.6163656115531921, "learning_rate": 1.3445953119866813e-05, "loss": 0.4581, "step": 6578 }, { "epoch": 0.8006084575600851, "grad_norm": 0.7034110426902771, "learning_rate": 1.344412183773633e-05, "loss": 0.4409, "step": 6579 }, { "epoch": 0.8007301490721023, "grad_norm": 4.315017223358154, "learning_rate": 1.3442290424549882e-05, "loss": 0.5704, "step": 6580 }, { "epoch": 0.8008518405841193, "grad_norm": 1.6207174062728882, "learning_rate": 1.3440458880377156e-05, "loss": 0.4527, "step": 6581 }, { "epoch": 0.8009735320961363, "grad_norm": 0.8339226245880127, "learning_rate": 1.3438627205287853e-05, "loss": 0.3911, "step": 6582 }, { "epoch": 0.8010952236081533, "grad_norm": 0.7527160048484802, "learning_rate": 1.3436795399351665e-05, "loss": 0.4591, "step": 6583 }, { "epoch": 0.8012169151201703, "grad_norm": 0.7859740257263184, "learning_rate": 1.3434963462638298e-05, "loss": 0.4251, "step": 6584 }, { "epoch": 0.8013386066321874, "grad_norm": 1.3363676071166992, "learning_rate": 1.343313139521746e-05, "loss": 0.4363, "step": 6585 }, { "epoch": 0.8014602981442045, "grad_norm": 2.3438727855682373, "learning_rate": 1.3431299197158869e-05, "loss": 0.4807, "step": 6586 }, { "epoch": 0.8015819896562215, "grad_norm": 0.6119399666786194, "learning_rate": 1.3429466868532239e-05, "loss": 0.4138, "step": 6587 }, { "epoch": 0.8017036811682385, "grad_norm": 0.8425772190093994, "learning_rate": 1.3427634409407298e-05, "loss": 0.4803, "step": 6588 }, { "epoch": 0.8018253726802556, "grad_norm": 0.7016222476959229, "learning_rate": 1.342580181985377e-05, "loss": 0.4458, "step": 6589 }, { "epoch": 0.8019470641922726, "grad_norm": 2.7810826301574707, "learning_rate": 1.3423969099941396e-05, "loss": 0.4113, "step": 6590 }, { "epoch": 0.8020687557042896, "grad_norm": 1.088829517364502, "learning_rate": 1.3422136249739906e-05, "loss": 0.4257, "step": 6591 }, { "epoch": 0.8021904472163066, "grad_norm": 0.992914617061615, "learning_rate": 1.3420303269319051e-05, "loss": 0.4797, "step": 6592 }, { "epoch": 0.8023121387283237, "grad_norm": 1.0340906381607056, "learning_rate": 1.3418470158748575e-05, "loss": 0.4216, "step": 6593 }, { "epoch": 0.8024338302403408, "grad_norm": 1.5709484815597534, "learning_rate": 1.3416636918098239e-05, "loss": 0.4455, "step": 6594 }, { "epoch": 0.8025555217523578, "grad_norm": 3.0877702236175537, "learning_rate": 1.3414803547437789e-05, "loss": 0.3774, "step": 6595 }, { "epoch": 0.8026772132643748, "grad_norm": 1.7690620422363281, "learning_rate": 1.3412970046837e-05, "loss": 0.4586, "step": 6596 }, { "epoch": 0.8027989047763918, "grad_norm": 3.440791606903076, "learning_rate": 1.3411136416365635e-05, "loss": 0.5166, "step": 6597 }, { "epoch": 0.8029205962884088, "grad_norm": 0.8563041090965271, "learning_rate": 1.3409302656093468e-05, "loss": 0.4341, "step": 6598 }, { "epoch": 0.803042287800426, "grad_norm": 2.609707832336426, "learning_rate": 1.340746876609028e-05, "loss": 0.4871, "step": 6599 }, { "epoch": 0.803163979312443, "grad_norm": 0.8562548160552979, "learning_rate": 1.340563474642585e-05, "loss": 0.4687, "step": 6600 }, { "epoch": 0.80328567082446, "grad_norm": 0.8712829947471619, "learning_rate": 1.3403800597169971e-05, "loss": 0.4597, "step": 6601 }, { "epoch": 0.803407362336477, "grad_norm": 0.7331824898719788, "learning_rate": 1.3401966318392433e-05, "loss": 0.4464, "step": 6602 }, { "epoch": 0.803529053848494, "grad_norm": 1.2333920001983643, "learning_rate": 1.3400131910163032e-05, "loss": 0.4577, "step": 6603 }, { "epoch": 0.8036507453605111, "grad_norm": 0.9844228029251099, "learning_rate": 1.3398297372551577e-05, "loss": 0.4685, "step": 6604 }, { "epoch": 0.8037724368725282, "grad_norm": 1.145633339881897, "learning_rate": 1.3396462705627875e-05, "loss": 0.4617, "step": 6605 }, { "epoch": 0.8038941283845452, "grad_norm": 3.1598007678985596, "learning_rate": 1.3394627909461733e-05, "loss": 0.4209, "step": 6606 }, { "epoch": 0.8040158198965622, "grad_norm": 1.0236690044403076, "learning_rate": 1.3392792984122973e-05, "loss": 0.4654, "step": 6607 }, { "epoch": 0.8041375114085793, "grad_norm": 1.344252109527588, "learning_rate": 1.3390957929681416e-05, "loss": 0.495, "step": 6608 }, { "epoch": 0.8042592029205963, "grad_norm": 0.7431895732879639, "learning_rate": 1.3389122746206896e-05, "loss": 0.4865, "step": 6609 }, { "epoch": 0.8043808944326133, "grad_norm": 0.7696148753166199, "learning_rate": 1.3387287433769236e-05, "loss": 0.4916, "step": 6610 }, { "epoch": 0.8045025859446303, "grad_norm": 1.1978672742843628, "learning_rate": 1.338545199243828e-05, "loss": 0.463, "step": 6611 }, { "epoch": 0.8046242774566474, "grad_norm": 0.9850582480430603, "learning_rate": 1.3383616422283865e-05, "loss": 0.4874, "step": 6612 }, { "epoch": 0.8047459689686645, "grad_norm": 1.781611680984497, "learning_rate": 1.3381780723375845e-05, "loss": 0.4051, "step": 6613 }, { "epoch": 0.8048676604806815, "grad_norm": 0.579318106174469, "learning_rate": 1.3379944895784067e-05, "loss": 0.4391, "step": 6614 }, { "epoch": 0.8049893519926985, "grad_norm": 1.0190308094024658, "learning_rate": 1.337810893957839e-05, "loss": 0.4578, "step": 6615 }, { "epoch": 0.8051110435047155, "grad_norm": 1.111925482749939, "learning_rate": 1.3376272854828675e-05, "loss": 0.451, "step": 6616 }, { "epoch": 0.8052327350167325, "grad_norm": 1.1480294466018677, "learning_rate": 1.3374436641604791e-05, "loss": 0.4072, "step": 6617 }, { "epoch": 0.8053544265287497, "grad_norm": 0.5749523639678955, "learning_rate": 1.3372600299976606e-05, "loss": 0.4472, "step": 6618 }, { "epoch": 0.8054761180407667, "grad_norm": 1.753135323524475, "learning_rate": 1.3370763830014e-05, "loss": 0.4757, "step": 6619 }, { "epoch": 0.8055978095527837, "grad_norm": 1.2991291284561157, "learning_rate": 1.3368927231786853e-05, "loss": 0.4878, "step": 6620 }, { "epoch": 0.8057195010648007, "grad_norm": 0.6958457827568054, "learning_rate": 1.3367090505365051e-05, "loss": 0.4391, "step": 6621 }, { "epoch": 0.8058411925768177, "grad_norm": 0.7624719142913818, "learning_rate": 1.3365253650818485e-05, "loss": 0.4363, "step": 6622 }, { "epoch": 0.8059628840888348, "grad_norm": 0.706021785736084, "learning_rate": 1.3363416668217055e-05, "loss": 0.4787, "step": 6623 }, { "epoch": 0.8060845756008519, "grad_norm": 4.138987064361572, "learning_rate": 1.3361579557630658e-05, "loss": 0.4307, "step": 6624 }, { "epoch": 0.8062062671128689, "grad_norm": 1.60456383228302, "learning_rate": 1.3359742319129199e-05, "loss": 0.5063, "step": 6625 }, { "epoch": 0.8063279586248859, "grad_norm": 2.763915538787842, "learning_rate": 1.3357904952782587e-05, "loss": 0.4472, "step": 6626 }, { "epoch": 0.806449650136903, "grad_norm": 2.176271677017212, "learning_rate": 1.3356067458660749e-05, "loss": 0.4464, "step": 6627 }, { "epoch": 0.80657134164892, "grad_norm": 0.8584074974060059, "learning_rate": 1.3354229836833594e-05, "loss": 0.4601, "step": 6628 }, { "epoch": 0.806693033160937, "grad_norm": 2.678847074508667, "learning_rate": 1.335239208737105e-05, "loss": 0.384, "step": 6629 }, { "epoch": 0.806814724672954, "grad_norm": 2.9203453063964844, "learning_rate": 1.3350554210343048e-05, "loss": 0.4937, "step": 6630 }, { "epoch": 0.8069364161849711, "grad_norm": 1.1138005256652832, "learning_rate": 1.3348716205819523e-05, "loss": 0.5036, "step": 6631 }, { "epoch": 0.8070581076969882, "grad_norm": 0.6669288873672485, "learning_rate": 1.3346878073870415e-05, "loss": 0.5199, "step": 6632 }, { "epoch": 0.8071797992090052, "grad_norm": 3.3877501487731934, "learning_rate": 1.3345039814565668e-05, "loss": 0.3926, "step": 6633 }, { "epoch": 0.8073014907210222, "grad_norm": 1.7293299436569214, "learning_rate": 1.3343201427975234e-05, "loss": 0.4229, "step": 6634 }, { "epoch": 0.8074231822330392, "grad_norm": 1.7728866338729858, "learning_rate": 1.3341362914169067e-05, "loss": 0.4609, "step": 6635 }, { "epoch": 0.8075448737450562, "grad_norm": 2.9592413902282715, "learning_rate": 1.3339524273217122e-05, "loss": 0.4021, "step": 6636 }, { "epoch": 0.8076665652570734, "grad_norm": 1.5425691604614258, "learning_rate": 1.3337685505189364e-05, "loss": 0.373, "step": 6637 }, { "epoch": 0.8077882567690904, "grad_norm": 1.6649357080459595, "learning_rate": 1.3335846610155767e-05, "loss": 0.4277, "step": 6638 }, { "epoch": 0.8079099482811074, "grad_norm": 5.9295268058776855, "learning_rate": 1.3334007588186301e-05, "loss": 0.5674, "step": 6639 }, { "epoch": 0.8080316397931244, "grad_norm": 0.8094533681869507, "learning_rate": 1.3332168439350948e-05, "loss": 0.4052, "step": 6640 }, { "epoch": 0.8081533313051414, "grad_norm": 1.8929312229156494, "learning_rate": 1.3330329163719684e-05, "loss": 0.4286, "step": 6641 }, { "epoch": 0.8082750228171585, "grad_norm": 2.9139959812164307, "learning_rate": 1.3328489761362505e-05, "loss": 0.4411, "step": 6642 }, { "epoch": 0.8083967143291756, "grad_norm": 2.1933822631835938, "learning_rate": 1.33266502323494e-05, "loss": 0.4252, "step": 6643 }, { "epoch": 0.8085184058411926, "grad_norm": 1.8865286111831665, "learning_rate": 1.3324810576750369e-05, "loss": 0.4757, "step": 6644 }, { "epoch": 0.8086400973532096, "grad_norm": 2.809324026107788, "learning_rate": 1.3322970794635412e-05, "loss": 0.5048, "step": 6645 }, { "epoch": 0.8087617888652266, "grad_norm": 0.6368886828422546, "learning_rate": 1.3321130886074538e-05, "loss": 0.4258, "step": 6646 }, { "epoch": 0.8088834803772437, "grad_norm": 2.1594743728637695, "learning_rate": 1.3319290851137763e-05, "loss": 0.4035, "step": 6647 }, { "epoch": 0.8090051718892607, "grad_norm": 1.0266525745391846, "learning_rate": 1.3317450689895095e-05, "loss": 0.4867, "step": 6648 }, { "epoch": 0.8091268634012777, "grad_norm": 1.2973319292068481, "learning_rate": 1.3315610402416563e-05, "loss": 0.4697, "step": 6649 }, { "epoch": 0.8092485549132948, "grad_norm": 1.4728530645370483, "learning_rate": 1.3313769988772195e-05, "loss": 0.5034, "step": 6650 }, { "epoch": 0.8093702464253119, "grad_norm": 0.8336383700370789, "learning_rate": 1.331192944903202e-05, "loss": 0.4768, "step": 6651 }, { "epoch": 0.8094919379373289, "grad_norm": 1.8269641399383545, "learning_rate": 1.3310088783266071e-05, "loss": 0.4789, "step": 6652 }, { "epoch": 0.8096136294493459, "grad_norm": 3.5603976249694824, "learning_rate": 1.3308247991544392e-05, "loss": 0.4306, "step": 6653 }, { "epoch": 0.8097353209613629, "grad_norm": 0.7099655270576477, "learning_rate": 1.3306407073937031e-05, "loss": 0.4638, "step": 6654 }, { "epoch": 0.8098570124733799, "grad_norm": 1.9727869033813477, "learning_rate": 1.3304566030514037e-05, "loss": 0.4353, "step": 6655 }, { "epoch": 0.8099787039853971, "grad_norm": 2.759723663330078, "learning_rate": 1.3302724861345464e-05, "loss": 0.3676, "step": 6656 }, { "epoch": 0.8101003954974141, "grad_norm": 1.030905842781067, "learning_rate": 1.3300883566501376e-05, "loss": 0.4789, "step": 6657 }, { "epoch": 0.8102220870094311, "grad_norm": 0.8978413343429565, "learning_rate": 1.3299042146051837e-05, "loss": 0.4918, "step": 6658 }, { "epoch": 0.8103437785214481, "grad_norm": 1.9734023809432983, "learning_rate": 1.3297200600066912e-05, "loss": 0.4365, "step": 6659 }, { "epoch": 0.8104654700334651, "grad_norm": 6.101864337921143, "learning_rate": 1.3295358928616679e-05, "loss": 0.5943, "step": 6660 }, { "epoch": 0.8105871615454822, "grad_norm": 1.9487838745117188, "learning_rate": 1.3293517131771224e-05, "loss": 0.4412, "step": 6661 }, { "epoch": 0.8107088530574993, "grad_norm": 0.6250563263893127, "learning_rate": 1.3291675209600619e-05, "loss": 0.4365, "step": 6662 }, { "epoch": 0.8108305445695163, "grad_norm": 2.8699235916137695, "learning_rate": 1.328983316217496e-05, "loss": 0.3765, "step": 6663 }, { "epoch": 0.8109522360815333, "grad_norm": 2.4612600803375244, "learning_rate": 1.3287990989564338e-05, "loss": 0.3853, "step": 6664 }, { "epoch": 0.8110739275935503, "grad_norm": 2.3636205196380615, "learning_rate": 1.3286148691838859e-05, "loss": 0.4363, "step": 6665 }, { "epoch": 0.8111956191055674, "grad_norm": 4.065032482147217, "learning_rate": 1.3284306269068614e-05, "loss": 0.5549, "step": 6666 }, { "epoch": 0.8113173106175844, "grad_norm": 2.5878663063049316, "learning_rate": 1.328246372132372e-05, "loss": 0.4955, "step": 6667 }, { "epoch": 0.8114390021296014, "grad_norm": 0.8897340297698975, "learning_rate": 1.3280621048674288e-05, "loss": 0.4586, "step": 6668 }, { "epoch": 0.8115606936416185, "grad_norm": 1.4318222999572754, "learning_rate": 1.3278778251190431e-05, "loss": 0.4908, "step": 6669 }, { "epoch": 0.8116823851536356, "grad_norm": 0.6420795321464539, "learning_rate": 1.327693532894228e-05, "loss": 0.4176, "step": 6670 }, { "epoch": 0.8118040766656526, "grad_norm": 2.915782928466797, "learning_rate": 1.3275092281999952e-05, "loss": 0.3939, "step": 6671 }, { "epoch": 0.8119257681776696, "grad_norm": 1.9668383598327637, "learning_rate": 1.3273249110433587e-05, "loss": 0.4195, "step": 6672 }, { "epoch": 0.8120474596896866, "grad_norm": 0.6759459972381592, "learning_rate": 1.3271405814313316e-05, "loss": 0.4455, "step": 6673 }, { "epoch": 0.8121691512017036, "grad_norm": 0.6696354150772095, "learning_rate": 1.3269562393709286e-05, "loss": 0.4332, "step": 6674 }, { "epoch": 0.8122908427137208, "grad_norm": 1.029154658317566, "learning_rate": 1.3267718848691634e-05, "loss": 0.4161, "step": 6675 }, { "epoch": 0.8124125342257378, "grad_norm": 1.3693962097167969, "learning_rate": 1.3265875179330517e-05, "loss": 0.4497, "step": 6676 }, { "epoch": 0.8125342257377548, "grad_norm": 3.642892837524414, "learning_rate": 1.3264031385696092e-05, "loss": 0.5223, "step": 6677 }, { "epoch": 0.8126559172497718, "grad_norm": 0.8846643567085266, "learning_rate": 1.3262187467858514e-05, "loss": 0.4769, "step": 6678 }, { "epoch": 0.8127776087617888, "grad_norm": 2.3803341388702393, "learning_rate": 1.326034342588795e-05, "loss": 0.4949, "step": 6679 }, { "epoch": 0.8128993002738059, "grad_norm": 3.128622531890869, "learning_rate": 1.3258499259854576e-05, "loss": 0.5033, "step": 6680 }, { "epoch": 0.813020991785823, "grad_norm": 0.6702750325202942, "learning_rate": 1.3256654969828558e-05, "loss": 0.4666, "step": 6681 }, { "epoch": 0.81314268329784, "grad_norm": 2.27921724319458, "learning_rate": 1.3254810555880074e-05, "loss": 0.5003, "step": 6682 }, { "epoch": 0.813264374809857, "grad_norm": 0.7695069909095764, "learning_rate": 1.3252966018079312e-05, "loss": 0.5037, "step": 6683 }, { "epoch": 0.813386066321874, "grad_norm": 0.6124937534332275, "learning_rate": 1.3251121356496462e-05, "loss": 0.4561, "step": 6684 }, { "epoch": 0.8135077578338911, "grad_norm": 2.236314296722412, "learning_rate": 1.3249276571201714e-05, "loss": 0.4811, "step": 6685 }, { "epoch": 0.8136294493459081, "grad_norm": 3.784168243408203, "learning_rate": 1.3247431662265267e-05, "loss": 0.4334, "step": 6686 }, { "epoch": 0.8137511408579252, "grad_norm": 5.357327461242676, "learning_rate": 1.3245586629757323e-05, "loss": 0.4391, "step": 6687 }, { "epoch": 0.8138728323699422, "grad_norm": 2.820408821105957, "learning_rate": 1.3243741473748091e-05, "loss": 0.4778, "step": 6688 }, { "epoch": 0.8139945238819593, "grad_norm": 3.5945191383361816, "learning_rate": 1.324189619430778e-05, "loss": 0.4732, "step": 6689 }, { "epoch": 0.8141162153939763, "grad_norm": 2.078001022338867, "learning_rate": 1.3240050791506609e-05, "loss": 0.4883, "step": 6690 }, { "epoch": 0.8142379069059933, "grad_norm": 2.0544700622558594, "learning_rate": 1.32382052654148e-05, "loss": 0.5085, "step": 6691 }, { "epoch": 0.8143595984180103, "grad_norm": 3.0333774089813232, "learning_rate": 1.3236359616102576e-05, "loss": 0.3931, "step": 6692 }, { "epoch": 0.8144812899300273, "grad_norm": 0.9521600008010864, "learning_rate": 1.3234513843640171e-05, "loss": 0.4128, "step": 6693 }, { "epoch": 0.8146029814420445, "grad_norm": 0.8809683322906494, "learning_rate": 1.3232667948097818e-05, "loss": 0.4374, "step": 6694 }, { "epoch": 0.8147246729540615, "grad_norm": 5.278363227844238, "learning_rate": 1.3230821929545758e-05, "loss": 0.5436, "step": 6695 }, { "epoch": 0.8148463644660785, "grad_norm": 2.8285536766052246, "learning_rate": 1.322897578805424e-05, "loss": 0.4687, "step": 6696 }, { "epoch": 0.8149680559780955, "grad_norm": 0.9588330984115601, "learning_rate": 1.3227129523693507e-05, "loss": 0.4441, "step": 6697 }, { "epoch": 0.8150897474901125, "grad_norm": 4.537371635437012, "learning_rate": 1.3225283136533815e-05, "loss": 0.5379, "step": 6698 }, { "epoch": 0.8152114390021296, "grad_norm": 7.747735977172852, "learning_rate": 1.3223436626645423e-05, "loss": 0.609, "step": 6699 }, { "epoch": 0.8153331305141467, "grad_norm": 3.778442859649658, "learning_rate": 1.3221589994098598e-05, "loss": 0.4583, "step": 6700 }, { "epoch": 0.8154548220261637, "grad_norm": 0.7494981288909912, "learning_rate": 1.3219743238963603e-05, "loss": 0.422, "step": 6701 }, { "epoch": 0.8155765135381807, "grad_norm": 0.6733980774879456, "learning_rate": 1.3217896361310713e-05, "loss": 0.4108, "step": 6702 }, { "epoch": 0.8156982050501977, "grad_norm": 3.6487674713134766, "learning_rate": 1.3216049361210208e-05, "loss": 0.3315, "step": 6703 }, { "epoch": 0.8158198965622148, "grad_norm": 0.939558744430542, "learning_rate": 1.3214202238732367e-05, "loss": 0.4469, "step": 6704 }, { "epoch": 0.8159415880742318, "grad_norm": 0.7197710275650024, "learning_rate": 1.3212354993947478e-05, "loss": 0.4774, "step": 6705 }, { "epoch": 0.8160632795862489, "grad_norm": 2.546074390411377, "learning_rate": 1.3210507626925833e-05, "loss": 0.4595, "step": 6706 }, { "epoch": 0.8161849710982659, "grad_norm": 1.2112523317337036, "learning_rate": 1.3208660137737725e-05, "loss": 0.5203, "step": 6707 }, { "epoch": 0.816306662610283, "grad_norm": 3.5929346084594727, "learning_rate": 1.3206812526453458e-05, "loss": 0.4012, "step": 6708 }, { "epoch": 0.8164283541223, "grad_norm": 3.0846757888793945, "learning_rate": 1.320496479314334e-05, "loss": 0.4433, "step": 6709 }, { "epoch": 0.816550045634317, "grad_norm": 0.7732279300689697, "learning_rate": 1.3203116937877674e-05, "loss": 0.4835, "step": 6710 }, { "epoch": 0.816671737146334, "grad_norm": 2.4987215995788574, "learning_rate": 1.320126896072678e-05, "loss": 0.4186, "step": 6711 }, { "epoch": 0.816793428658351, "grad_norm": 1.4232234954833984, "learning_rate": 1.3199420861760974e-05, "loss": 0.5002, "step": 6712 }, { "epoch": 0.8169151201703682, "grad_norm": 2.0465641021728516, "learning_rate": 1.3197572641050583e-05, "loss": 0.4509, "step": 6713 }, { "epoch": 0.8170368116823852, "grad_norm": 1.7632215023040771, "learning_rate": 1.3195724298665935e-05, "loss": 0.4045, "step": 6714 }, { "epoch": 0.8171585031944022, "grad_norm": 0.7635778188705444, "learning_rate": 1.3193875834677363e-05, "loss": 0.4632, "step": 6715 }, { "epoch": 0.8172801947064192, "grad_norm": 0.5357716083526611, "learning_rate": 1.3192027249155205e-05, "loss": 0.4081, "step": 6716 }, { "epoch": 0.8174018862184362, "grad_norm": 2.5138120651245117, "learning_rate": 1.31901785421698e-05, "loss": 0.5036, "step": 6717 }, { "epoch": 0.8175235777304533, "grad_norm": 1.8262012004852295, "learning_rate": 1.3188329713791502e-05, "loss": 0.4514, "step": 6718 }, { "epoch": 0.8176452692424704, "grad_norm": 2.4968302249908447, "learning_rate": 1.3186480764090655e-05, "loss": 0.4719, "step": 6719 }, { "epoch": 0.8177669607544874, "grad_norm": 0.6859037280082703, "learning_rate": 1.3184631693137622e-05, "loss": 0.4342, "step": 6720 }, { "epoch": 0.8178886522665044, "grad_norm": 2.740236282348633, "learning_rate": 1.3182782501002757e-05, "loss": 0.4796, "step": 6721 }, { "epoch": 0.8180103437785214, "grad_norm": 0.7942762970924377, "learning_rate": 1.3180933187756435e-05, "loss": 0.43, "step": 6722 }, { "epoch": 0.8181320352905385, "grad_norm": 0.9182960391044617, "learning_rate": 1.3179083753469018e-05, "loss": 0.4821, "step": 6723 }, { "epoch": 0.8182537268025555, "grad_norm": 1.0311622619628906, "learning_rate": 1.3177234198210885e-05, "loss": 0.5229, "step": 6724 }, { "epoch": 0.8183754183145726, "grad_norm": 2.3608343601226807, "learning_rate": 1.3175384522052413e-05, "loss": 0.4715, "step": 6725 }, { "epoch": 0.8184971098265896, "grad_norm": 3.9457805156707764, "learning_rate": 1.3173534725063992e-05, "loss": 0.4546, "step": 6726 }, { "epoch": 0.8186188013386066, "grad_norm": 2.941842555999756, "learning_rate": 1.3171684807316e-05, "loss": 0.5047, "step": 6727 }, { "epoch": 0.8187404928506237, "grad_norm": 3.6807191371917725, "learning_rate": 1.316983476887884e-05, "loss": 0.5019, "step": 6728 }, { "epoch": 0.8188621843626407, "grad_norm": 5.569693565368652, "learning_rate": 1.3167984609822901e-05, "loss": 0.4589, "step": 6729 }, { "epoch": 0.8189838758746577, "grad_norm": 1.4934221506118774, "learning_rate": 1.3166134330218597e-05, "loss": 0.485, "step": 6730 }, { "epoch": 0.8191055673866747, "grad_norm": 2.0929012298583984, "learning_rate": 1.3164283930136321e-05, "loss": 0.4819, "step": 6731 }, { "epoch": 0.8192272588986919, "grad_norm": 1.0450574159622192, "learning_rate": 1.3162433409646499e-05, "loss": 0.4977, "step": 6732 }, { "epoch": 0.8193489504107089, "grad_norm": 0.6460596323013306, "learning_rate": 1.3160582768819534e-05, "loss": 0.4696, "step": 6733 }, { "epoch": 0.8194706419227259, "grad_norm": 1.1410826444625854, "learning_rate": 1.3158732007725856e-05, "loss": 0.4508, "step": 6734 }, { "epoch": 0.8195923334347429, "grad_norm": 0.6852778196334839, "learning_rate": 1.3156881126435885e-05, "loss": 0.4372, "step": 6735 }, { "epoch": 0.8197140249467599, "grad_norm": 4.219789028167725, "learning_rate": 1.3155030125020051e-05, "loss": 0.5393, "step": 6736 }, { "epoch": 0.819835716458777, "grad_norm": 3.642214775085449, "learning_rate": 1.3153179003548794e-05, "loss": 0.4818, "step": 6737 }, { "epoch": 0.8199574079707941, "grad_norm": 2.299353837966919, "learning_rate": 1.3151327762092549e-05, "loss": 0.4565, "step": 6738 }, { "epoch": 0.8200790994828111, "grad_norm": 0.5892159342765808, "learning_rate": 1.314947640072176e-05, "loss": 0.3876, "step": 6739 }, { "epoch": 0.8202007909948281, "grad_norm": 0.6633529663085938, "learning_rate": 1.3147624919506872e-05, "loss": 0.385, "step": 6740 }, { "epoch": 0.8203224825068451, "grad_norm": 5.231131076812744, "learning_rate": 1.3145773318518342e-05, "loss": 0.5464, "step": 6741 }, { "epoch": 0.8204441740188622, "grad_norm": 1.2580164670944214, "learning_rate": 1.3143921597826625e-05, "loss": 0.432, "step": 6742 }, { "epoch": 0.8205658655308792, "grad_norm": 0.8528087735176086, "learning_rate": 1.3142069757502187e-05, "loss": 0.432, "step": 6743 }, { "epoch": 0.8206875570428963, "grad_norm": 0.9162859916687012, "learning_rate": 1.3140217797615486e-05, "loss": 0.4255, "step": 6744 }, { "epoch": 0.8208092485549133, "grad_norm": 1.9959614276885986, "learning_rate": 1.3138365718237002e-05, "loss": 0.4994, "step": 6745 }, { "epoch": 0.8209309400669303, "grad_norm": 1.2443194389343262, "learning_rate": 1.3136513519437204e-05, "loss": 0.4776, "step": 6746 }, { "epoch": 0.8210526315789474, "grad_norm": 1.8186997175216675, "learning_rate": 1.3134661201286573e-05, "loss": 0.5325, "step": 6747 }, { "epoch": 0.8211743230909644, "grad_norm": 1.274896502494812, "learning_rate": 1.31328087638556e-05, "loss": 0.4833, "step": 6748 }, { "epoch": 0.8212960146029814, "grad_norm": 2.247789144515991, "learning_rate": 1.3130956207214767e-05, "loss": 0.4409, "step": 6749 }, { "epoch": 0.8214177061149984, "grad_norm": 4.778298377990723, "learning_rate": 1.3129103531434569e-05, "loss": 0.3931, "step": 6750 }, { "epoch": 0.8215393976270156, "grad_norm": 1.2628536224365234, "learning_rate": 1.3127250736585504e-05, "loss": 0.4313, "step": 6751 }, { "epoch": 0.8216610891390326, "grad_norm": 0.8084657788276672, "learning_rate": 1.3125397822738075e-05, "loss": 0.4557, "step": 6752 }, { "epoch": 0.8217827806510496, "grad_norm": 1.6310756206512451, "learning_rate": 1.312354478996279e-05, "loss": 0.4337, "step": 6753 }, { "epoch": 0.8219044721630666, "grad_norm": 0.6733612418174744, "learning_rate": 1.312169163833016e-05, "loss": 0.4298, "step": 6754 }, { "epoch": 0.8220261636750836, "grad_norm": 0.6573811173439026, "learning_rate": 1.3119838367910704e-05, "loss": 0.46, "step": 6755 }, { "epoch": 0.8221478551871007, "grad_norm": 0.8170048594474792, "learning_rate": 1.3117984978774941e-05, "loss": 0.49, "step": 6756 }, { "epoch": 0.8222695466991178, "grad_norm": 4.894285678863525, "learning_rate": 1.3116131470993391e-05, "loss": 0.5681, "step": 6757 }, { "epoch": 0.8223912382111348, "grad_norm": 0.9377179741859436, "learning_rate": 1.3114277844636592e-05, "loss": 0.4883, "step": 6758 }, { "epoch": 0.8225129297231518, "grad_norm": 2.1831438541412354, "learning_rate": 1.3112424099775071e-05, "loss": 0.4006, "step": 6759 }, { "epoch": 0.8226346212351688, "grad_norm": 2.9049322605133057, "learning_rate": 1.3110570236479374e-05, "loss": 0.5278, "step": 6760 }, { "epoch": 0.8227563127471859, "grad_norm": 1.5541597604751587, "learning_rate": 1.3108716254820043e-05, "loss": 0.429, "step": 6761 }, { "epoch": 0.8228780042592029, "grad_norm": 2.308603048324585, "learning_rate": 1.3106862154867619e-05, "loss": 0.4838, "step": 6762 }, { "epoch": 0.82299969577122, "grad_norm": 3.5870141983032227, "learning_rate": 1.3105007936692662e-05, "loss": 0.5245, "step": 6763 }, { "epoch": 0.823121387283237, "grad_norm": 0.8772350549697876, "learning_rate": 1.3103153600365726e-05, "loss": 0.398, "step": 6764 }, { "epoch": 0.823243078795254, "grad_norm": 1.807956337928772, "learning_rate": 1.3101299145957372e-05, "loss": 0.4522, "step": 6765 }, { "epoch": 0.8233647703072711, "grad_norm": 1.533187747001648, "learning_rate": 1.3099444573538164e-05, "loss": 0.5183, "step": 6766 }, { "epoch": 0.8234864618192881, "grad_norm": 0.8093053102493286, "learning_rate": 1.3097589883178677e-05, "loss": 0.4431, "step": 6767 }, { "epoch": 0.8236081533313051, "grad_norm": 0.859160840511322, "learning_rate": 1.3095735074949485e-05, "loss": 0.4724, "step": 6768 }, { "epoch": 0.8237298448433221, "grad_norm": 2.8490512371063232, "learning_rate": 1.3093880148921162e-05, "loss": 0.4371, "step": 6769 }, { "epoch": 0.8238515363553393, "grad_norm": 1.9302666187286377, "learning_rate": 1.3092025105164297e-05, "loss": 0.4655, "step": 6770 }, { "epoch": 0.8239732278673563, "grad_norm": 1.8279956579208374, "learning_rate": 1.3090169943749475e-05, "loss": 0.4513, "step": 6771 }, { "epoch": 0.8240949193793733, "grad_norm": 0.9794836044311523, "learning_rate": 1.3088314664747293e-05, "loss": 0.4507, "step": 6772 }, { "epoch": 0.8242166108913903, "grad_norm": 1.7553751468658447, "learning_rate": 1.3086459268228345e-05, "loss": 0.3991, "step": 6773 }, { "epoch": 0.8243383024034073, "grad_norm": 2.9451494216918945, "learning_rate": 1.308460375426323e-05, "loss": 0.5058, "step": 6774 }, { "epoch": 0.8244599939154243, "grad_norm": 1.7878193855285645, "learning_rate": 1.3082748122922562e-05, "loss": 0.4512, "step": 6775 }, { "epoch": 0.8245816854274415, "grad_norm": 2.5065412521362305, "learning_rate": 1.3080892374276943e-05, "loss": 0.4822, "step": 6776 }, { "epoch": 0.8247033769394585, "grad_norm": 0.6355451345443726, "learning_rate": 1.3079036508396991e-05, "loss": 0.4485, "step": 6777 }, { "epoch": 0.8248250684514755, "grad_norm": 2.876383066177368, "learning_rate": 1.3077180525353332e-05, "loss": 0.3951, "step": 6778 }, { "epoch": 0.8249467599634925, "grad_norm": 2.601952314376831, "learning_rate": 1.3075324425216583e-05, "loss": 0.5184, "step": 6779 }, { "epoch": 0.8250684514755096, "grad_norm": 1.2750346660614014, "learning_rate": 1.3073468208057372e-05, "loss": 0.4566, "step": 6780 }, { "epoch": 0.8251901429875266, "grad_norm": 2.2961859703063965, "learning_rate": 1.3071611873946335e-05, "loss": 0.4345, "step": 6781 }, { "epoch": 0.8253118344995437, "grad_norm": 0.566150426864624, "learning_rate": 1.306975542295411e-05, "loss": 0.4692, "step": 6782 }, { "epoch": 0.8254335260115607, "grad_norm": 2.2687556743621826, "learning_rate": 1.3067898855151333e-05, "loss": 0.4322, "step": 6783 }, { "epoch": 0.8255552175235777, "grad_norm": 0.7312899231910706, "learning_rate": 1.3066042170608658e-05, "loss": 0.4089, "step": 6784 }, { "epoch": 0.8256769090355948, "grad_norm": 0.7542760968208313, "learning_rate": 1.306418536939673e-05, "loss": 0.441, "step": 6785 }, { "epoch": 0.8257986005476118, "grad_norm": 1.011281967163086, "learning_rate": 1.3062328451586209e-05, "loss": 0.4073, "step": 6786 }, { "epoch": 0.8259202920596288, "grad_norm": 3.765763759613037, "learning_rate": 1.3060471417247746e-05, "loss": 0.5107, "step": 6787 }, { "epoch": 0.8260419835716459, "grad_norm": 0.9791185259819031, "learning_rate": 1.3058614266452014e-05, "loss": 0.4392, "step": 6788 }, { "epoch": 0.826163675083663, "grad_norm": 3.819981813430786, "learning_rate": 1.3056756999269679e-05, "loss": 0.4919, "step": 6789 }, { "epoch": 0.82628536659568, "grad_norm": 0.8946986794471741, "learning_rate": 1.3054899615771414e-05, "loss": 0.4398, "step": 6790 }, { "epoch": 0.826407058107697, "grad_norm": 0.6221190094947815, "learning_rate": 1.3053042116027895e-05, "loss": 0.4638, "step": 6791 }, { "epoch": 0.826528749619714, "grad_norm": 1.3485747575759888, "learning_rate": 1.3051184500109801e-05, "loss": 0.4613, "step": 6792 }, { "epoch": 0.826650441131731, "grad_norm": 1.647303581237793, "learning_rate": 1.3049326768087821e-05, "loss": 0.4878, "step": 6793 }, { "epoch": 0.826772132643748, "grad_norm": 1.5896073579788208, "learning_rate": 1.3047468920032651e-05, "loss": 0.4391, "step": 6794 }, { "epoch": 0.8268938241557652, "grad_norm": 1.961309552192688, "learning_rate": 1.3045610956014978e-05, "loss": 0.4564, "step": 6795 }, { "epoch": 0.8270155156677822, "grad_norm": 2.0452980995178223, "learning_rate": 1.3043752876105504e-05, "loss": 0.4475, "step": 6796 }, { "epoch": 0.8271372071797992, "grad_norm": 0.641858696937561, "learning_rate": 1.3041894680374932e-05, "loss": 0.474, "step": 6797 }, { "epoch": 0.8272588986918162, "grad_norm": 0.9289491176605225, "learning_rate": 1.304003636889397e-05, "loss": 0.4717, "step": 6798 }, { "epoch": 0.8273805902038333, "grad_norm": 1.0018569231033325, "learning_rate": 1.3038177941733333e-05, "loss": 0.4869, "step": 6799 }, { "epoch": 0.8275022817158503, "grad_norm": 0.9068577289581299, "learning_rate": 1.3036319398963737e-05, "loss": 0.4649, "step": 6800 }, { "epoch": 0.8276239732278674, "grad_norm": 1.6282252073287964, "learning_rate": 1.3034460740655903e-05, "loss": 0.481, "step": 6801 }, { "epoch": 0.8277456647398844, "grad_norm": 1.6111154556274414, "learning_rate": 1.3032601966880558e-05, "loss": 0.4279, "step": 6802 }, { "epoch": 0.8278673562519014, "grad_norm": 2.688147783279419, "learning_rate": 1.303074307770843e-05, "loss": 0.4764, "step": 6803 }, { "epoch": 0.8279890477639185, "grad_norm": 2.1374621391296387, "learning_rate": 1.3028884073210253e-05, "loss": 0.5043, "step": 6804 }, { "epoch": 0.8281107392759355, "grad_norm": 1.8053512573242188, "learning_rate": 1.302702495345677e-05, "loss": 0.4679, "step": 6805 }, { "epoch": 0.8282324307879525, "grad_norm": 0.7423006892204285, "learning_rate": 1.302516571851872e-05, "loss": 0.4426, "step": 6806 }, { "epoch": 0.8283541222999696, "grad_norm": 1.0323774814605713, "learning_rate": 1.3023306368466853e-05, "loss": 0.4747, "step": 6807 }, { "epoch": 0.8284758138119866, "grad_norm": 2.339226484298706, "learning_rate": 1.3021446903371922e-05, "loss": 0.4342, "step": 6808 }, { "epoch": 0.8285975053240037, "grad_norm": 1.1705610752105713, "learning_rate": 1.301958732330468e-05, "loss": 0.4637, "step": 6809 }, { "epoch": 0.8287191968360207, "grad_norm": 0.7676281929016113, "learning_rate": 1.3017727628335892e-05, "loss": 0.4896, "step": 6810 }, { "epoch": 0.8288408883480377, "grad_norm": 2.7097115516662598, "learning_rate": 1.3015867818536321e-05, "loss": 0.4419, "step": 6811 }, { "epoch": 0.8289625798600547, "grad_norm": 1.556746482849121, "learning_rate": 1.3014007893976737e-05, "loss": 0.5079, "step": 6812 }, { "epoch": 0.8290842713720717, "grad_norm": 0.8942267894744873, "learning_rate": 1.3012147854727917e-05, "loss": 0.433, "step": 6813 }, { "epoch": 0.8292059628840889, "grad_norm": 1.9167677164077759, "learning_rate": 1.3010287700860632e-05, "loss": 0.4215, "step": 6814 }, { "epoch": 0.8293276543961059, "grad_norm": 1.3114110231399536, "learning_rate": 1.3008427432445667e-05, "loss": 0.4681, "step": 6815 }, { "epoch": 0.8294493459081229, "grad_norm": 1.196704387664795, "learning_rate": 1.3006567049553818e-05, "loss": 0.456, "step": 6816 }, { "epoch": 0.8295710374201399, "grad_norm": 1.963807225227356, "learning_rate": 1.3004706552255863e-05, "loss": 0.4448, "step": 6817 }, { "epoch": 0.829692728932157, "grad_norm": 3.58132266998291, "learning_rate": 1.3002845940622609e-05, "loss": 0.5033, "step": 6818 }, { "epoch": 0.829814420444174, "grad_norm": 1.2175681591033936, "learning_rate": 1.3000985214724848e-05, "loss": 0.441, "step": 6819 }, { "epoch": 0.8299361119561911, "grad_norm": 0.669368326663971, "learning_rate": 1.2999124374633392e-05, "loss": 0.4174, "step": 6820 }, { "epoch": 0.8300578034682081, "grad_norm": 4.004969596862793, "learning_rate": 1.2997263420419042e-05, "loss": 0.503, "step": 6821 }, { "epoch": 0.8301794949802251, "grad_norm": 3.592200994491577, "learning_rate": 1.2995402352152614e-05, "loss": 0.5281, "step": 6822 }, { "epoch": 0.8303011864922422, "grad_norm": 1.0471320152282715, "learning_rate": 1.2993541169904928e-05, "loss": 0.4788, "step": 6823 }, { "epoch": 0.8304228780042592, "grad_norm": 4.47477912902832, "learning_rate": 1.299167987374681e-05, "loss": 0.5547, "step": 6824 }, { "epoch": 0.8305445695162762, "grad_norm": 1.6746526956558228, "learning_rate": 1.2989818463749073e-05, "loss": 0.4444, "step": 6825 }, { "epoch": 0.8306662610282933, "grad_norm": 0.6307390332221985, "learning_rate": 1.2987956939982556e-05, "loss": 0.487, "step": 6826 }, { "epoch": 0.8307879525403103, "grad_norm": 2.1108851432800293, "learning_rate": 1.2986095302518092e-05, "loss": 0.4754, "step": 6827 }, { "epoch": 0.8309096440523274, "grad_norm": 3.383765459060669, "learning_rate": 1.2984233551426525e-05, "loss": 0.4277, "step": 6828 }, { "epoch": 0.8310313355643444, "grad_norm": 3.48599910736084, "learning_rate": 1.298237168677869e-05, "loss": 0.4248, "step": 6829 }, { "epoch": 0.8311530270763614, "grad_norm": 1.4197397232055664, "learning_rate": 1.2980509708645441e-05, "loss": 0.4709, "step": 6830 }, { "epoch": 0.8312747185883784, "grad_norm": 0.9290311336517334, "learning_rate": 1.2978647617097629e-05, "loss": 0.4969, "step": 6831 }, { "epoch": 0.8313964101003954, "grad_norm": 1.0463742017745972, "learning_rate": 1.297678541220611e-05, "loss": 0.4636, "step": 6832 }, { "epoch": 0.8315181016124126, "grad_norm": 0.6120090484619141, "learning_rate": 1.2974923094041741e-05, "loss": 0.431, "step": 6833 }, { "epoch": 0.8316397931244296, "grad_norm": 1.3695932626724243, "learning_rate": 1.2973060662675391e-05, "loss": 0.4979, "step": 6834 }, { "epoch": 0.8317614846364466, "grad_norm": 1.5041674375534058, "learning_rate": 1.2971198118177932e-05, "loss": 0.4218, "step": 6835 }, { "epoch": 0.8318831761484636, "grad_norm": 1.0865904092788696, "learning_rate": 1.2969335460620236e-05, "loss": 0.4389, "step": 6836 }, { "epoch": 0.8320048676604807, "grad_norm": 1.5804051160812378, "learning_rate": 1.2967472690073176e-05, "loss": 0.4821, "step": 6837 }, { "epoch": 0.8321265591724977, "grad_norm": 0.8305590152740479, "learning_rate": 1.2965609806607637e-05, "loss": 0.4192, "step": 6838 }, { "epoch": 0.8322482506845148, "grad_norm": 0.9001047015190125, "learning_rate": 1.296374681029451e-05, "loss": 0.4589, "step": 6839 }, { "epoch": 0.8323699421965318, "grad_norm": 1.0995434522628784, "learning_rate": 1.296188370120468e-05, "loss": 0.4638, "step": 6840 }, { "epoch": 0.8324916337085488, "grad_norm": 2.307422161102295, "learning_rate": 1.2960020479409043e-05, "loss": 0.488, "step": 6841 }, { "epoch": 0.8326133252205659, "grad_norm": 1.6505639553070068, "learning_rate": 1.2958157144978503e-05, "loss": 0.46, "step": 6842 }, { "epoch": 0.8327350167325829, "grad_norm": 1.4252490997314453, "learning_rate": 1.2956293697983959e-05, "loss": 0.4589, "step": 6843 }, { "epoch": 0.8328567082445999, "grad_norm": 1.731046438217163, "learning_rate": 1.295443013849632e-05, "loss": 0.4576, "step": 6844 }, { "epoch": 0.832978399756617, "grad_norm": 3.6126105785369873, "learning_rate": 1.29525664665865e-05, "loss": 0.3881, "step": 6845 }, { "epoch": 0.833100091268634, "grad_norm": 0.7489259243011475, "learning_rate": 1.2950702682325415e-05, "loss": 0.471, "step": 6846 }, { "epoch": 0.8332217827806511, "grad_norm": 0.6991187334060669, "learning_rate": 1.2948838785783986e-05, "loss": 0.4702, "step": 6847 }, { "epoch": 0.8333434742926681, "grad_norm": 1.273205041885376, "learning_rate": 1.2946974777033135e-05, "loss": 0.484, "step": 6848 }, { "epoch": 0.8334651658046851, "grad_norm": 0.6542345285415649, "learning_rate": 1.2945110656143793e-05, "loss": 0.4822, "step": 6849 }, { "epoch": 0.8335868573167021, "grad_norm": 1.059162974357605, "learning_rate": 1.2943246423186897e-05, "loss": 0.478, "step": 6850 }, { "epoch": 0.8337085488287191, "grad_norm": 0.596342146396637, "learning_rate": 1.294138207823338e-05, "loss": 0.4319, "step": 6851 }, { "epoch": 0.8338302403407363, "grad_norm": 3.7490267753601074, "learning_rate": 1.2939517621354187e-05, "loss": 0.3969, "step": 6852 }, { "epoch": 0.8339519318527533, "grad_norm": 1.5371110439300537, "learning_rate": 1.2937653052620266e-05, "loss": 0.451, "step": 6853 }, { "epoch": 0.8340736233647703, "grad_norm": 0.8665494918823242, "learning_rate": 1.2935788372102566e-05, "loss": 0.4436, "step": 6854 }, { "epoch": 0.8341953148767873, "grad_norm": 1.6559678316116333, "learning_rate": 1.2933923579872042e-05, "loss": 0.42, "step": 6855 }, { "epoch": 0.8343170063888043, "grad_norm": 2.324580669403076, "learning_rate": 1.2932058675999651e-05, "loss": 0.5133, "step": 6856 }, { "epoch": 0.8344386979008214, "grad_norm": 1.1224908828735352, "learning_rate": 1.2930193660556356e-05, "loss": 0.4425, "step": 6857 }, { "epoch": 0.8345603894128385, "grad_norm": 0.6706041097640991, "learning_rate": 1.2928328533613135e-05, "loss": 0.472, "step": 6858 }, { "epoch": 0.8346820809248555, "grad_norm": 1.2882907390594482, "learning_rate": 1.2926463295240945e-05, "loss": 0.4124, "step": 6859 }, { "epoch": 0.8348037724368725, "grad_norm": 1.9551949501037598, "learning_rate": 1.2924597945510771e-05, "loss": 0.4866, "step": 6860 }, { "epoch": 0.8349254639488896, "grad_norm": 0.8849619626998901, "learning_rate": 1.292273248449359e-05, "loss": 0.4067, "step": 6861 }, { "epoch": 0.8350471554609066, "grad_norm": 0.9172243475914001, "learning_rate": 1.292086691226039e-05, "loss": 0.4562, "step": 6862 }, { "epoch": 0.8351688469729236, "grad_norm": 1.9458006620407104, "learning_rate": 1.2919001228882157e-05, "loss": 0.4622, "step": 6863 }, { "epoch": 0.8352905384849407, "grad_norm": 3.4780547618865967, "learning_rate": 1.2917135434429888e-05, "loss": 0.5445, "step": 6864 }, { "epoch": 0.8354122299969577, "grad_norm": 1.3642549514770508, "learning_rate": 1.2915269528974576e-05, "loss": 0.4394, "step": 6865 }, { "epoch": 0.8355339215089748, "grad_norm": 0.6495612263679504, "learning_rate": 1.2913403512587227e-05, "loss": 0.4251, "step": 6866 }, { "epoch": 0.8356556130209918, "grad_norm": 0.8254455327987671, "learning_rate": 1.2911537385338836e-05, "loss": 0.5089, "step": 6867 }, { "epoch": 0.8357773045330088, "grad_norm": 2.1402394771575928, "learning_rate": 1.2909671147300427e-05, "loss": 0.5193, "step": 6868 }, { "epoch": 0.8358989960450258, "grad_norm": 1.8830991983413696, "learning_rate": 1.2907804798543006e-05, "loss": 0.4631, "step": 6869 }, { "epoch": 0.8360206875570428, "grad_norm": 3.3242721557617188, "learning_rate": 1.2905938339137598e-05, "loss": 0.4568, "step": 6870 }, { "epoch": 0.83614237906906, "grad_norm": 1.459088921546936, "learning_rate": 1.2904071769155215e-05, "loss": 0.5031, "step": 6871 }, { "epoch": 0.836264070581077, "grad_norm": 2.3964123725891113, "learning_rate": 1.2902205088666894e-05, "loss": 0.4945, "step": 6872 }, { "epoch": 0.836385762093094, "grad_norm": 1.7578063011169434, "learning_rate": 1.2900338297743661e-05, "loss": 0.4726, "step": 6873 }, { "epoch": 0.836507453605111, "grad_norm": 1.5480945110321045, "learning_rate": 1.2898471396456554e-05, "loss": 0.4515, "step": 6874 }, { "epoch": 0.836629145117128, "grad_norm": 2.650372266769409, "learning_rate": 1.2896604384876608e-05, "loss": 0.4032, "step": 6875 }, { "epoch": 0.8367508366291451, "grad_norm": 2.133085012435913, "learning_rate": 1.2894737263074872e-05, "loss": 0.5347, "step": 6876 }, { "epoch": 0.8368725281411622, "grad_norm": 3.180300712585449, "learning_rate": 1.289287003112239e-05, "loss": 0.5483, "step": 6877 }, { "epoch": 0.8369942196531792, "grad_norm": 1.7303646802902222, "learning_rate": 1.2891002689090215e-05, "loss": 0.4755, "step": 6878 }, { "epoch": 0.8371159111651962, "grad_norm": 2.27557110786438, "learning_rate": 1.2889135237049405e-05, "loss": 0.4871, "step": 6879 }, { "epoch": 0.8372376026772133, "grad_norm": 0.836880087852478, "learning_rate": 1.2887267675071018e-05, "loss": 0.4039, "step": 6880 }, { "epoch": 0.8373592941892303, "grad_norm": 3.1311147212982178, "learning_rate": 1.2885400003226118e-05, "loss": 0.5287, "step": 6881 }, { "epoch": 0.8374809857012473, "grad_norm": 1.0516445636749268, "learning_rate": 1.288353222158578e-05, "loss": 0.4073, "step": 6882 }, { "epoch": 0.8376026772132644, "grad_norm": 0.9085931181907654, "learning_rate": 1.2881664330221069e-05, "loss": 0.4554, "step": 6883 }, { "epoch": 0.8377243687252814, "grad_norm": 0.946977972984314, "learning_rate": 1.2879796329203067e-05, "loss": 0.4458, "step": 6884 }, { "epoch": 0.8378460602372985, "grad_norm": 1.1708645820617676, "learning_rate": 1.2877928218602853e-05, "loss": 0.4093, "step": 6885 }, { "epoch": 0.8379677517493155, "grad_norm": 2.924179792404175, "learning_rate": 1.2876059998491513e-05, "loss": 0.4055, "step": 6886 }, { "epoch": 0.8380894432613325, "grad_norm": 0.7647840976715088, "learning_rate": 1.2874191668940136e-05, "loss": 0.4459, "step": 6887 }, { "epoch": 0.8382111347733495, "grad_norm": 2.0494229793548584, "learning_rate": 1.2872323230019822e-05, "loss": 0.4948, "step": 6888 }, { "epoch": 0.8383328262853667, "grad_norm": 0.9894136190414429, "learning_rate": 1.2870454681801658e-05, "loss": 0.4954, "step": 6889 }, { "epoch": 0.8384545177973837, "grad_norm": 1.4254064559936523, "learning_rate": 1.2868586024356753e-05, "loss": 0.4589, "step": 6890 }, { "epoch": 0.8385762093094007, "grad_norm": 2.4123616218566895, "learning_rate": 1.2866717257756212e-05, "loss": 0.4195, "step": 6891 }, { "epoch": 0.8386979008214177, "grad_norm": 1.3306949138641357, "learning_rate": 1.2864848382071147e-05, "loss": 0.4329, "step": 6892 }, { "epoch": 0.8388195923334347, "grad_norm": 0.8937929272651672, "learning_rate": 1.286297939737267e-05, "loss": 0.4117, "step": 6893 }, { "epoch": 0.8389412838454517, "grad_norm": 1.6041615009307861, "learning_rate": 1.2861110303731904e-05, "loss": 0.4808, "step": 6894 }, { "epoch": 0.8390629753574688, "grad_norm": 0.73006671667099, "learning_rate": 1.2859241101219964e-05, "loss": 0.4072, "step": 6895 }, { "epoch": 0.8391846668694859, "grad_norm": 2.1428239345550537, "learning_rate": 1.2857371789907984e-05, "loss": 0.5044, "step": 6896 }, { "epoch": 0.8393063583815029, "grad_norm": 1.0156277418136597, "learning_rate": 1.2855502369867092e-05, "loss": 0.4348, "step": 6897 }, { "epoch": 0.8394280498935199, "grad_norm": 0.6417730450630188, "learning_rate": 1.2853632841168424e-05, "loss": 0.4338, "step": 6898 }, { "epoch": 0.839549741405537, "grad_norm": 0.69439697265625, "learning_rate": 1.2851763203883122e-05, "loss": 0.4667, "step": 6899 }, { "epoch": 0.839671432917554, "grad_norm": 2.310706377029419, "learning_rate": 1.2849893458082328e-05, "loss": 0.4788, "step": 6900 }, { "epoch": 0.839793124429571, "grad_norm": 0.5879369378089905, "learning_rate": 1.2848023603837185e-05, "loss": 0.4435, "step": 6901 }, { "epoch": 0.8399148159415881, "grad_norm": 0.8880062699317932, "learning_rate": 1.2846153641218851e-05, "loss": 0.467, "step": 6902 }, { "epoch": 0.8400365074536051, "grad_norm": 2.1720924377441406, "learning_rate": 1.2844283570298481e-05, "loss": 0.4848, "step": 6903 }, { "epoch": 0.8401581989656222, "grad_norm": 2.6391782760620117, "learning_rate": 1.284241339114723e-05, "loss": 0.4548, "step": 6904 }, { "epoch": 0.8402798904776392, "grad_norm": 3.1797895431518555, "learning_rate": 1.2840543103836271e-05, "loss": 0.4227, "step": 6905 }, { "epoch": 0.8404015819896562, "grad_norm": 2.2403340339660645, "learning_rate": 1.2838672708436764e-05, "loss": 0.4866, "step": 6906 }, { "epoch": 0.8405232735016732, "grad_norm": 1.0137779712677002, "learning_rate": 1.2836802205019887e-05, "loss": 0.4771, "step": 6907 }, { "epoch": 0.8406449650136903, "grad_norm": 2.729987382888794, "learning_rate": 1.2834931593656812e-05, "loss": 0.459, "step": 6908 }, { "epoch": 0.8407666565257074, "grad_norm": 4.043386936187744, "learning_rate": 1.2833060874418722e-05, "loss": 0.4533, "step": 6909 }, { "epoch": 0.8408883480377244, "grad_norm": 1.1356867551803589, "learning_rate": 1.2831190047376802e-05, "loss": 0.4259, "step": 6910 }, { "epoch": 0.8410100395497414, "grad_norm": 0.9805501699447632, "learning_rate": 1.282931911260224e-05, "loss": 0.4577, "step": 6911 }, { "epoch": 0.8411317310617584, "grad_norm": 1.4176305532455444, "learning_rate": 1.282744807016623e-05, "loss": 0.4371, "step": 6912 }, { "epoch": 0.8412534225737754, "grad_norm": 0.6131776571273804, "learning_rate": 1.2825576920139969e-05, "loss": 0.4187, "step": 6913 }, { "epoch": 0.8413751140857925, "grad_norm": 2.1293554306030273, "learning_rate": 1.2823705662594658e-05, "loss": 0.4521, "step": 6914 }, { "epoch": 0.8414968055978096, "grad_norm": 4.38294792175293, "learning_rate": 1.2821834297601498e-05, "loss": 0.5177, "step": 6915 }, { "epoch": 0.8416184971098266, "grad_norm": 2.352923631668091, "learning_rate": 1.2819962825231707e-05, "loss": 0.4575, "step": 6916 }, { "epoch": 0.8417401886218436, "grad_norm": 0.7982110977172852, "learning_rate": 1.281809124555649e-05, "loss": 0.4086, "step": 6917 }, { "epoch": 0.8418618801338607, "grad_norm": 3.7458646297454834, "learning_rate": 1.2816219558647072e-05, "loss": 0.5379, "step": 6918 }, { "epoch": 0.8419835716458777, "grad_norm": 0.6902008056640625, "learning_rate": 1.2814347764574666e-05, "loss": 0.4685, "step": 6919 }, { "epoch": 0.8421052631578947, "grad_norm": 4.48379373550415, "learning_rate": 1.2812475863410505e-05, "loss": 0.5657, "step": 6920 }, { "epoch": 0.8422269546699118, "grad_norm": 2.713804006576538, "learning_rate": 1.2810603855225811e-05, "loss": 0.4333, "step": 6921 }, { "epoch": 0.8423486461819288, "grad_norm": 1.6324368715286255, "learning_rate": 1.2808731740091834e-05, "loss": 0.4431, "step": 6922 }, { "epoch": 0.8424703376939459, "grad_norm": 0.8463773131370544, "learning_rate": 1.280685951807979e-05, "loss": 0.4789, "step": 6923 }, { "epoch": 0.8425920292059629, "grad_norm": 1.5698280334472656, "learning_rate": 1.2804987189260933e-05, "loss": 0.4959, "step": 6924 }, { "epoch": 0.8427137207179799, "grad_norm": 4.177562713623047, "learning_rate": 1.280311475370651e-05, "loss": 0.3915, "step": 6925 }, { "epoch": 0.8428354122299969, "grad_norm": 2.055678606033325, "learning_rate": 1.2801242211487768e-05, "loss": 0.4247, "step": 6926 }, { "epoch": 0.842957103742014, "grad_norm": 2.990649938583374, "learning_rate": 1.279936956267596e-05, "loss": 0.4974, "step": 6927 }, { "epoch": 0.8430787952540311, "grad_norm": 1.3391287326812744, "learning_rate": 1.2797496807342349e-05, "loss": 0.4509, "step": 6928 }, { "epoch": 0.8432004867660481, "grad_norm": 1.2605738639831543, "learning_rate": 1.2795623945558191e-05, "loss": 0.4482, "step": 6929 }, { "epoch": 0.8433221782780651, "grad_norm": 0.6613712906837463, "learning_rate": 1.2793750977394756e-05, "loss": 0.4305, "step": 6930 }, { "epoch": 0.8434438697900821, "grad_norm": 1.0457223653793335, "learning_rate": 1.2791877902923312e-05, "loss": 0.4542, "step": 6931 }, { "epoch": 0.8435655613020991, "grad_norm": 1.8725061416625977, "learning_rate": 1.2790004722215137e-05, "loss": 0.4481, "step": 6932 }, { "epoch": 0.8436872528141162, "grad_norm": 3.1485283374786377, "learning_rate": 1.278813143534151e-05, "loss": 0.4629, "step": 6933 }, { "epoch": 0.8438089443261333, "grad_norm": 1.9086508750915527, "learning_rate": 1.2786258042373707e-05, "loss": 0.4748, "step": 6934 }, { "epoch": 0.8439306358381503, "grad_norm": 5.065165996551514, "learning_rate": 1.2784384543383017e-05, "loss": 0.5661, "step": 6935 }, { "epoch": 0.8440523273501673, "grad_norm": 0.6957314610481262, "learning_rate": 1.2782510938440735e-05, "loss": 0.4371, "step": 6936 }, { "epoch": 0.8441740188621843, "grad_norm": 1.8577358722686768, "learning_rate": 1.2780637227618152e-05, "loss": 0.5035, "step": 6937 }, { "epoch": 0.8442957103742014, "grad_norm": 0.8081720471382141, "learning_rate": 1.2778763410986565e-05, "loss": 0.4894, "step": 6938 }, { "epoch": 0.8444174018862184, "grad_norm": 2.4398961067199707, "learning_rate": 1.2776889488617279e-05, "loss": 0.4733, "step": 6939 }, { "epoch": 0.8445390933982355, "grad_norm": 3.0027964115142822, "learning_rate": 1.2775015460581602e-05, "loss": 0.4543, "step": 6940 }, { "epoch": 0.8446607849102525, "grad_norm": 0.6925974488258362, "learning_rate": 1.2773141326950842e-05, "loss": 0.543, "step": 6941 }, { "epoch": 0.8447824764222696, "grad_norm": 1.5968799591064453, "learning_rate": 1.2771267087796312e-05, "loss": 0.4453, "step": 6942 }, { "epoch": 0.8449041679342866, "grad_norm": 0.6327440142631531, "learning_rate": 1.2769392743189335e-05, "loss": 0.4714, "step": 6943 }, { "epoch": 0.8450258594463036, "grad_norm": 2.0380287170410156, "learning_rate": 1.2767518293201232e-05, "loss": 0.4506, "step": 6944 }, { "epoch": 0.8451475509583206, "grad_norm": 1.119584083557129, "learning_rate": 1.2765643737903331e-05, "loss": 0.4757, "step": 6945 }, { "epoch": 0.8452692424703377, "grad_norm": 0.9903263449668884, "learning_rate": 1.276376907736696e-05, "loss": 0.429, "step": 6946 }, { "epoch": 0.8453909339823548, "grad_norm": 1.4283775091171265, "learning_rate": 1.2761894311663451e-05, "loss": 0.4905, "step": 6947 }, { "epoch": 0.8455126254943718, "grad_norm": 1.6286977529525757, "learning_rate": 1.276001944086415e-05, "loss": 0.4565, "step": 6948 }, { "epoch": 0.8456343170063888, "grad_norm": 1.2994225025177002, "learning_rate": 1.2758144465040396e-05, "loss": 0.4266, "step": 6949 }, { "epoch": 0.8457560085184058, "grad_norm": 0.5361411571502686, "learning_rate": 1.2756269384263536e-05, "loss": 0.4064, "step": 6950 }, { "epoch": 0.8458777000304228, "grad_norm": 0.6261139512062073, "learning_rate": 1.2754394198604923e-05, "loss": 0.4544, "step": 6951 }, { "epoch": 0.8459993915424399, "grad_norm": 1.6742547750473022, "learning_rate": 1.275251890813591e-05, "loss": 0.5153, "step": 6952 }, { "epoch": 0.846121083054457, "grad_norm": 1.8770606517791748, "learning_rate": 1.2750643512927849e-05, "loss": 0.424, "step": 6953 }, { "epoch": 0.846242774566474, "grad_norm": 0.8181618452072144, "learning_rate": 1.2748768013052113e-05, "loss": 0.4654, "step": 6954 }, { "epoch": 0.846364466078491, "grad_norm": 1.6564304828643799, "learning_rate": 1.2746892408580062e-05, "loss": 0.4019, "step": 6955 }, { "epoch": 0.846486157590508, "grad_norm": 1.423676609992981, "learning_rate": 1.2745016699583074e-05, "loss": 0.4491, "step": 6956 }, { "epoch": 0.8466078491025251, "grad_norm": 0.8537080883979797, "learning_rate": 1.2743140886132514e-05, "loss": 0.4357, "step": 6957 }, { "epoch": 0.8467295406145421, "grad_norm": 2.9443321228027344, "learning_rate": 1.2741264968299767e-05, "loss": 0.4013, "step": 6958 }, { "epoch": 0.8468512321265592, "grad_norm": 1.2850115299224854, "learning_rate": 1.2739388946156215e-05, "loss": 0.4111, "step": 6959 }, { "epoch": 0.8469729236385762, "grad_norm": 0.7939808368682861, "learning_rate": 1.273751281977324e-05, "loss": 0.4055, "step": 6960 }, { "epoch": 0.8470946151505933, "grad_norm": 1.5699455738067627, "learning_rate": 1.273563658922224e-05, "loss": 0.4129, "step": 6961 }, { "epoch": 0.8472163066626103, "grad_norm": 1.7347068786621094, "learning_rate": 1.2733760254574606e-05, "loss": 0.458, "step": 6962 }, { "epoch": 0.8473379981746273, "grad_norm": 4.550609588623047, "learning_rate": 1.2731883815901731e-05, "loss": 0.5723, "step": 6963 }, { "epoch": 0.8474596896866443, "grad_norm": 2.323796272277832, "learning_rate": 1.2730007273275025e-05, "loss": 0.4742, "step": 6964 }, { "epoch": 0.8475813811986614, "grad_norm": 6.394164562225342, "learning_rate": 1.2728130626765892e-05, "loss": 0.554, "step": 6965 }, { "epoch": 0.8477030727106785, "grad_norm": 2.1003170013427734, "learning_rate": 1.2726253876445738e-05, "loss": 0.4306, "step": 6966 }, { "epoch": 0.8478247642226955, "grad_norm": 1.0300663709640503, "learning_rate": 1.2724377022385985e-05, "loss": 0.4196, "step": 6967 }, { "epoch": 0.8479464557347125, "grad_norm": 2.2606160640716553, "learning_rate": 1.2722500064658045e-05, "loss": 0.5082, "step": 6968 }, { "epoch": 0.8480681472467295, "grad_norm": 0.9641751646995544, "learning_rate": 1.2720623003333343e-05, "loss": 0.4564, "step": 6969 }, { "epoch": 0.8481898387587465, "grad_norm": 3.5402324199676514, "learning_rate": 1.2718745838483304e-05, "loss": 0.4244, "step": 6970 }, { "epoch": 0.8483115302707637, "grad_norm": 1.4322373867034912, "learning_rate": 1.2716868570179359e-05, "loss": 0.5406, "step": 6971 }, { "epoch": 0.8484332217827807, "grad_norm": 1.7457278966903687, "learning_rate": 1.271499119849294e-05, "loss": 0.4959, "step": 6972 }, { "epoch": 0.8485549132947977, "grad_norm": 1.1944416761398315, "learning_rate": 1.2713113723495485e-05, "loss": 0.514, "step": 6973 }, { "epoch": 0.8486766048068147, "grad_norm": 6.41412353515625, "learning_rate": 1.271123614525844e-05, "loss": 0.4585, "step": 6974 }, { "epoch": 0.8487982963188317, "grad_norm": 5.333705902099609, "learning_rate": 1.2709358463853249e-05, "loss": 0.4747, "step": 6975 }, { "epoch": 0.8489199878308488, "grad_norm": 4.668219566345215, "learning_rate": 1.2707480679351358e-05, "loss": 0.4706, "step": 6976 }, { "epoch": 0.8490416793428658, "grad_norm": 4.608364105224609, "learning_rate": 1.2705602791824224e-05, "loss": 0.4438, "step": 6977 }, { "epoch": 0.8491633708548829, "grad_norm": 1.8792756795883179, "learning_rate": 1.2703724801343303e-05, "loss": 0.4628, "step": 6978 }, { "epoch": 0.8492850623668999, "grad_norm": 2.620440721511841, "learning_rate": 1.2701846707980056e-05, "loss": 0.4182, "step": 6979 }, { "epoch": 0.849406753878917, "grad_norm": 2.442648410797119, "learning_rate": 1.269996851180595e-05, "loss": 0.475, "step": 6980 }, { "epoch": 0.849528445390934, "grad_norm": 1.1661570072174072, "learning_rate": 1.2698090212892452e-05, "loss": 0.4674, "step": 6981 }, { "epoch": 0.849650136902951, "grad_norm": 2.131560802459717, "learning_rate": 1.269621181131104e-05, "loss": 0.4829, "step": 6982 }, { "epoch": 0.849771828414968, "grad_norm": 1.2309439182281494, "learning_rate": 1.2694333307133184e-05, "loss": 0.4513, "step": 6983 }, { "epoch": 0.8498935199269851, "grad_norm": 5.524863243103027, "learning_rate": 1.2692454700430369e-05, "loss": 0.5625, "step": 6984 }, { "epoch": 0.8500152114390022, "grad_norm": 1.8256990909576416, "learning_rate": 1.2690575991274083e-05, "loss": 0.5141, "step": 6985 }, { "epoch": 0.8501369029510192, "grad_norm": 0.8253006935119629, "learning_rate": 1.268869717973581e-05, "loss": 0.4413, "step": 6986 }, { "epoch": 0.8502585944630362, "grad_norm": 0.9531517624855042, "learning_rate": 1.2686818265887042e-05, "loss": 0.4575, "step": 6987 }, { "epoch": 0.8503802859750532, "grad_norm": 3.4873125553131104, "learning_rate": 1.2684939249799277e-05, "loss": 0.4095, "step": 6988 }, { "epoch": 0.8505019774870702, "grad_norm": 1.0143766403198242, "learning_rate": 1.2683060131544018e-05, "loss": 0.468, "step": 6989 }, { "epoch": 0.8506236689990874, "grad_norm": 1.0920214653015137, "learning_rate": 1.2681180911192767e-05, "loss": 0.4626, "step": 6990 }, { "epoch": 0.8507453605111044, "grad_norm": 0.7049693465232849, "learning_rate": 1.2679301588817034e-05, "loss": 0.4518, "step": 6991 }, { "epoch": 0.8508670520231214, "grad_norm": 0.6504801511764526, "learning_rate": 1.2677422164488328e-05, "loss": 0.4709, "step": 6992 }, { "epoch": 0.8509887435351384, "grad_norm": 1.0879284143447876, "learning_rate": 1.2675542638278166e-05, "loss": 0.4197, "step": 6993 }, { "epoch": 0.8511104350471554, "grad_norm": 1.1372443437576294, "learning_rate": 1.2673663010258073e-05, "loss": 0.4018, "step": 6994 }, { "epoch": 0.8512321265591725, "grad_norm": 1.4385079145431519, "learning_rate": 1.2671783280499563e-05, "loss": 0.5101, "step": 6995 }, { "epoch": 0.8513538180711895, "grad_norm": 1.0067007541656494, "learning_rate": 1.266990344907417e-05, "loss": 0.434, "step": 6996 }, { "epoch": 0.8514755095832066, "grad_norm": 0.6502609848976135, "learning_rate": 1.2668023516053426e-05, "loss": 0.3992, "step": 6997 }, { "epoch": 0.8515972010952236, "grad_norm": 2.3904335498809814, "learning_rate": 1.2666143481508865e-05, "loss": 0.478, "step": 6998 }, { "epoch": 0.8517188926072407, "grad_norm": 0.9435008764266968, "learning_rate": 1.2664263345512024e-05, "loss": 0.4797, "step": 6999 }, { "epoch": 0.8518405841192577, "grad_norm": 1.326151728630066, "learning_rate": 1.2662383108134448e-05, "loss": 0.496, "step": 7000 }, { "epoch": 0.8519622756312747, "grad_norm": 1.5210660696029663, "learning_rate": 1.2660502769447686e-05, "loss": 0.4509, "step": 7001 }, { "epoch": 0.8520839671432917, "grad_norm": 0.9029418230056763, "learning_rate": 1.2658622329523287e-05, "loss": 0.5236, "step": 7002 }, { "epoch": 0.8522056586553088, "grad_norm": 2.1396021842956543, "learning_rate": 1.2656741788432805e-05, "loss": 0.3981, "step": 7003 }, { "epoch": 0.8523273501673259, "grad_norm": 0.6995888948440552, "learning_rate": 1.2654861146247794e-05, "loss": 0.4642, "step": 7004 }, { "epoch": 0.8524490416793429, "grad_norm": 4.560553550720215, "learning_rate": 1.2652980403039827e-05, "loss": 0.4239, "step": 7005 }, { "epoch": 0.8525707331913599, "grad_norm": 2.2662789821624756, "learning_rate": 1.2651099558880462e-05, "loss": 0.4247, "step": 7006 }, { "epoch": 0.8526924247033769, "grad_norm": 2.8812103271484375, "learning_rate": 1.264921861384127e-05, "loss": 0.4502, "step": 7007 }, { "epoch": 0.8528141162153939, "grad_norm": 0.6335647702217102, "learning_rate": 1.264733756799383e-05, "loss": 0.4236, "step": 7008 }, { "epoch": 0.8529358077274111, "grad_norm": 0.8655391931533813, "learning_rate": 1.2645456421409712e-05, "loss": 0.4455, "step": 7009 }, { "epoch": 0.8530574992394281, "grad_norm": 2.688206672668457, "learning_rate": 1.2643575174160503e-05, "loss": 0.4724, "step": 7010 }, { "epoch": 0.8531791907514451, "grad_norm": 0.983683705329895, "learning_rate": 1.2641693826317785e-05, "loss": 0.4607, "step": 7011 }, { "epoch": 0.8533008822634621, "grad_norm": 0.7513974905014038, "learning_rate": 1.2639812377953152e-05, "loss": 0.4052, "step": 7012 }, { "epoch": 0.8534225737754791, "grad_norm": 3.3914999961853027, "learning_rate": 1.2637930829138192e-05, "loss": 0.5243, "step": 7013 }, { "epoch": 0.8535442652874962, "grad_norm": 1.1194955110549927, "learning_rate": 1.2636049179944502e-05, "loss": 0.445, "step": 7014 }, { "epoch": 0.8536659567995132, "grad_norm": 1.1755385398864746, "learning_rate": 1.2634167430443687e-05, "loss": 0.4314, "step": 7015 }, { "epoch": 0.8537876483115303, "grad_norm": 3.3257882595062256, "learning_rate": 1.2632285580707349e-05, "loss": 0.514, "step": 7016 }, { "epoch": 0.8539093398235473, "grad_norm": 0.9307615756988525, "learning_rate": 1.2630403630807096e-05, "loss": 0.5101, "step": 7017 }, { "epoch": 0.8540310313355644, "grad_norm": 1.614334225654602, "learning_rate": 1.262852158081454e-05, "loss": 0.4671, "step": 7018 }, { "epoch": 0.8541527228475814, "grad_norm": 0.7288075089454651, "learning_rate": 1.2626639430801293e-05, "loss": 0.4697, "step": 7019 }, { "epoch": 0.8542744143595984, "grad_norm": 2.6823551654815674, "learning_rate": 1.2624757180838987e-05, "loss": 0.4346, "step": 7020 }, { "epoch": 0.8543961058716154, "grad_norm": 2.2687909603118896, "learning_rate": 1.262287483099923e-05, "loss": 0.4285, "step": 7021 }, { "epoch": 0.8545177973836325, "grad_norm": 2.0029501914978027, "learning_rate": 1.262099238135366e-05, "loss": 0.4403, "step": 7022 }, { "epoch": 0.8546394888956496, "grad_norm": 1.5380704402923584, "learning_rate": 1.2619109831973903e-05, "loss": 0.4675, "step": 7023 }, { "epoch": 0.8547611804076666, "grad_norm": 1.8958693742752075, "learning_rate": 1.2617227182931597e-05, "loss": 0.386, "step": 7024 }, { "epoch": 0.8548828719196836, "grad_norm": 2.458688259124756, "learning_rate": 1.261534443429838e-05, "loss": 0.5004, "step": 7025 }, { "epoch": 0.8550045634317006, "grad_norm": 0.9578983187675476, "learning_rate": 1.2613461586145892e-05, "loss": 0.4134, "step": 7026 }, { "epoch": 0.8551262549437176, "grad_norm": 1.2905571460723877, "learning_rate": 1.2611578638545783e-05, "loss": 0.4597, "step": 7027 }, { "epoch": 0.8552479464557348, "grad_norm": 9.036710739135742, "learning_rate": 1.26096955915697e-05, "loss": 0.6334, "step": 7028 }, { "epoch": 0.8553696379677518, "grad_norm": 2.6435916423797607, "learning_rate": 1.2607812445289297e-05, "loss": 0.4937, "step": 7029 }, { "epoch": 0.8554913294797688, "grad_norm": 3.5096120834350586, "learning_rate": 1.2605929199776234e-05, "loss": 0.5237, "step": 7030 }, { "epoch": 0.8556130209917858, "grad_norm": 1.8670804500579834, "learning_rate": 1.2604045855102172e-05, "loss": 0.4498, "step": 7031 }, { "epoch": 0.8557347125038028, "grad_norm": 0.5056993961334229, "learning_rate": 1.2602162411338775e-05, "loss": 0.4072, "step": 7032 }, { "epoch": 0.8558564040158199, "grad_norm": 1.6217615604400635, "learning_rate": 1.260027886855771e-05, "loss": 0.4865, "step": 7033 }, { "epoch": 0.8559780955278369, "grad_norm": 1.4946330785751343, "learning_rate": 1.259839522683065e-05, "loss": 0.4389, "step": 7034 }, { "epoch": 0.856099787039854, "grad_norm": 0.7999100089073181, "learning_rate": 1.2596511486229277e-05, "loss": 0.4596, "step": 7035 }, { "epoch": 0.856221478551871, "grad_norm": 1.6567870378494263, "learning_rate": 1.2594627646825265e-05, "loss": 0.4584, "step": 7036 }, { "epoch": 0.856343170063888, "grad_norm": 1.1645410060882568, "learning_rate": 1.2592743708690305e-05, "loss": 0.4197, "step": 7037 }, { "epoch": 0.8564648615759051, "grad_norm": 0.6656481027603149, "learning_rate": 1.2590859671896076e-05, "loss": 0.4582, "step": 7038 }, { "epoch": 0.8565865530879221, "grad_norm": 0.6635571718215942, "learning_rate": 1.2588975536514276e-05, "loss": 0.4571, "step": 7039 }, { "epoch": 0.8567082445999391, "grad_norm": 0.7041589617729187, "learning_rate": 1.2587091302616594e-05, "loss": 0.4551, "step": 7040 }, { "epoch": 0.8568299361119562, "grad_norm": 1.993066668510437, "learning_rate": 1.2585206970274734e-05, "loss": 0.4401, "step": 7041 }, { "epoch": 0.8569516276239733, "grad_norm": 2.9001550674438477, "learning_rate": 1.2583322539560401e-05, "loss": 0.3901, "step": 7042 }, { "epoch": 0.8570733191359903, "grad_norm": 0.9461142420768738, "learning_rate": 1.25814380105453e-05, "loss": 0.4657, "step": 7043 }, { "epoch": 0.8571950106480073, "grad_norm": 1.4206129312515259, "learning_rate": 1.2579553383301134e-05, "loss": 0.4596, "step": 7044 }, { "epoch": 0.8573167021600243, "grad_norm": 1.6657085418701172, "learning_rate": 1.2577668657899622e-05, "loss": 0.422, "step": 7045 }, { "epoch": 0.8574383936720413, "grad_norm": 2.3552663326263428, "learning_rate": 1.2575783834412488e-05, "loss": 0.5153, "step": 7046 }, { "epoch": 0.8575600851840585, "grad_norm": 0.9289820790290833, "learning_rate": 1.2573898912911442e-05, "loss": 0.4273, "step": 7047 }, { "epoch": 0.8576817766960755, "grad_norm": 2.8469924926757812, "learning_rate": 1.2572013893468216e-05, "loss": 0.4772, "step": 7048 }, { "epoch": 0.8578034682080925, "grad_norm": 2.5421817302703857, "learning_rate": 1.2570128776154538e-05, "loss": 0.5141, "step": 7049 }, { "epoch": 0.8579251597201095, "grad_norm": 1.3275059461593628, "learning_rate": 1.2568243561042141e-05, "loss": 0.4079, "step": 7050 }, { "epoch": 0.8580468512321265, "grad_norm": 0.9563422203063965, "learning_rate": 1.2566358248202758e-05, "loss": 0.4907, "step": 7051 }, { "epoch": 0.8581685427441436, "grad_norm": 0.7685717940330505, "learning_rate": 1.2564472837708134e-05, "loss": 0.4813, "step": 7052 }, { "epoch": 0.8582902342561606, "grad_norm": 1.4967350959777832, "learning_rate": 1.2562587329630009e-05, "loss": 0.4423, "step": 7053 }, { "epoch": 0.8584119257681777, "grad_norm": 1.104519009590149, "learning_rate": 1.2560701724040136e-05, "loss": 0.4902, "step": 7054 }, { "epoch": 0.8585336172801947, "grad_norm": 2.1416592597961426, "learning_rate": 1.2558816021010259e-05, "loss": 0.4408, "step": 7055 }, { "epoch": 0.8586553087922117, "grad_norm": 0.6394123435020447, "learning_rate": 1.2556930220612134e-05, "loss": 0.4507, "step": 7056 }, { "epoch": 0.8587770003042288, "grad_norm": 0.7312126159667969, "learning_rate": 1.2555044322917523e-05, "loss": 0.4624, "step": 7057 }, { "epoch": 0.8588986918162458, "grad_norm": 2.855180025100708, "learning_rate": 1.2553158327998186e-05, "loss": 0.5386, "step": 7058 }, { "epoch": 0.8590203833282628, "grad_norm": 2.4441428184509277, "learning_rate": 1.255127223592589e-05, "loss": 0.4996, "step": 7059 }, { "epoch": 0.8591420748402799, "grad_norm": 1.8379968404769897, "learning_rate": 1.2549386046772408e-05, "loss": 0.4044, "step": 7060 }, { "epoch": 0.859263766352297, "grad_norm": 1.8424160480499268, "learning_rate": 1.2547499760609507e-05, "loss": 0.417, "step": 7061 }, { "epoch": 0.859385457864314, "grad_norm": 0.595614492893219, "learning_rate": 1.2545613377508967e-05, "loss": 0.4212, "step": 7062 }, { "epoch": 0.859507149376331, "grad_norm": 1.5273895263671875, "learning_rate": 1.254372689754257e-05, "loss": 0.4303, "step": 7063 }, { "epoch": 0.859628840888348, "grad_norm": 0.9880451560020447, "learning_rate": 1.2541840320782097e-05, "loss": 0.4197, "step": 7064 }, { "epoch": 0.859750532400365, "grad_norm": 3.9398880004882812, "learning_rate": 1.2539953647299341e-05, "loss": 0.4562, "step": 7065 }, { "epoch": 0.8598722239123822, "grad_norm": 1.0115792751312256, "learning_rate": 1.2538066877166093e-05, "loss": 0.4358, "step": 7066 }, { "epoch": 0.8599939154243992, "grad_norm": 0.5628373026847839, "learning_rate": 1.2536180010454142e-05, "loss": 0.3998, "step": 7067 }, { "epoch": 0.8601156069364162, "grad_norm": 1.36702561378479, "learning_rate": 1.2534293047235292e-05, "loss": 0.4534, "step": 7068 }, { "epoch": 0.8602372984484332, "grad_norm": 1.2451943159103394, "learning_rate": 1.253240598758135e-05, "loss": 0.3906, "step": 7069 }, { "epoch": 0.8603589899604502, "grad_norm": 1.6277918815612793, "learning_rate": 1.2530518831564119e-05, "loss": 0.4692, "step": 7070 }, { "epoch": 0.8604806814724673, "grad_norm": 1.4523383378982544, "learning_rate": 1.2528631579255404e-05, "loss": 0.43, "step": 7071 }, { "epoch": 0.8606023729844844, "grad_norm": 2.8507893085479736, "learning_rate": 1.2526744230727027e-05, "loss": 0.5151, "step": 7072 }, { "epoch": 0.8607240644965014, "grad_norm": 2.7794971466064453, "learning_rate": 1.25248567860508e-05, "loss": 0.3745, "step": 7073 }, { "epoch": 0.8608457560085184, "grad_norm": 0.602327823638916, "learning_rate": 1.2522969245298546e-05, "loss": 0.4562, "step": 7074 }, { "epoch": 0.8609674475205354, "grad_norm": 2.184446334838867, "learning_rate": 1.2521081608542089e-05, "loss": 0.4258, "step": 7075 }, { "epoch": 0.8610891390325525, "grad_norm": 0.6675443053245544, "learning_rate": 1.2519193875853261e-05, "loss": 0.5, "step": 7076 }, { "epoch": 0.8612108305445695, "grad_norm": 0.8092020153999329, "learning_rate": 1.2517306047303893e-05, "loss": 0.4604, "step": 7077 }, { "epoch": 0.8613325220565865, "grad_norm": 2.3450775146484375, "learning_rate": 1.2515418122965817e-05, "loss": 0.4237, "step": 7078 }, { "epoch": 0.8614542135686036, "grad_norm": 2.036470890045166, "learning_rate": 1.2513530102910873e-05, "loss": 0.5295, "step": 7079 }, { "epoch": 0.8615759050806207, "grad_norm": 0.8510911464691162, "learning_rate": 1.251164198721091e-05, "loss": 0.5133, "step": 7080 }, { "epoch": 0.8616975965926377, "grad_norm": 3.924497365951538, "learning_rate": 1.2509753775937765e-05, "loss": 0.4127, "step": 7081 }, { "epoch": 0.8618192881046547, "grad_norm": 2.202623128890991, "learning_rate": 1.2507865469163298e-05, "loss": 0.531, "step": 7082 }, { "epoch": 0.8619409796166717, "grad_norm": 2.0649092197418213, "learning_rate": 1.2505977066959358e-05, "loss": 0.422, "step": 7083 }, { "epoch": 0.8620626711286887, "grad_norm": 1.8406898975372314, "learning_rate": 1.2504088569397805e-05, "loss": 0.4662, "step": 7084 }, { "epoch": 0.8621843626407059, "grad_norm": 2.1745588779449463, "learning_rate": 1.2502199976550498e-05, "loss": 0.5012, "step": 7085 }, { "epoch": 0.8623060541527229, "grad_norm": 2.647151470184326, "learning_rate": 1.25003112884893e-05, "loss": 0.4238, "step": 7086 }, { "epoch": 0.8624277456647399, "grad_norm": 0.8039729595184326, "learning_rate": 1.2498422505286082e-05, "loss": 0.4899, "step": 7087 }, { "epoch": 0.8625494371767569, "grad_norm": 1.5099488496780396, "learning_rate": 1.2496533627012719e-05, "loss": 0.4487, "step": 7088 }, { "epoch": 0.8626711286887739, "grad_norm": 0.8914634585380554, "learning_rate": 1.2494644653741087e-05, "loss": 0.4621, "step": 7089 }, { "epoch": 0.862792820200791, "grad_norm": 2.4344775676727295, "learning_rate": 1.2492755585543055e-05, "loss": 0.4127, "step": 7090 }, { "epoch": 0.8629145117128081, "grad_norm": 1.771337628364563, "learning_rate": 1.2490866422490515e-05, "loss": 0.4578, "step": 7091 }, { "epoch": 0.8630362032248251, "grad_norm": 0.6396241188049316, "learning_rate": 1.2488977164655355e-05, "loss": 0.4695, "step": 7092 }, { "epoch": 0.8631578947368421, "grad_norm": 1.6561543941497803, "learning_rate": 1.248708781210946e-05, "loss": 0.4, "step": 7093 }, { "epoch": 0.8632795862488591, "grad_norm": 0.6191179752349854, "learning_rate": 1.2485198364924726e-05, "loss": 0.4182, "step": 7094 }, { "epoch": 0.8634012777608762, "grad_norm": 0.6067349314689636, "learning_rate": 1.2483308823173052e-05, "loss": 0.4489, "step": 7095 }, { "epoch": 0.8635229692728932, "grad_norm": 1.1665351390838623, "learning_rate": 1.2481419186926335e-05, "loss": 0.4723, "step": 7096 }, { "epoch": 0.8636446607849102, "grad_norm": 1.1563318967819214, "learning_rate": 1.2479529456256483e-05, "loss": 0.4638, "step": 7097 }, { "epoch": 0.8637663522969273, "grad_norm": 2.5375454425811768, "learning_rate": 1.24776396312354e-05, "loss": 0.5132, "step": 7098 }, { "epoch": 0.8638880438089444, "grad_norm": 1.057485580444336, "learning_rate": 1.2475749711935006e-05, "loss": 0.4776, "step": 7099 }, { "epoch": 0.8640097353209614, "grad_norm": 0.8790457844734192, "learning_rate": 1.2473859698427209e-05, "loss": 0.4589, "step": 7100 }, { "epoch": 0.8641314268329784, "grad_norm": 3.862192153930664, "learning_rate": 1.2471969590783926e-05, "loss": 0.3849, "step": 7101 }, { "epoch": 0.8642531183449954, "grad_norm": 0.7336155772209167, "learning_rate": 1.2470079389077085e-05, "loss": 0.4815, "step": 7102 }, { "epoch": 0.8643748098570124, "grad_norm": 4.680565357208252, "learning_rate": 1.2468189093378613e-05, "loss": 0.4125, "step": 7103 }, { "epoch": 0.8644965013690296, "grad_norm": 1.2660815715789795, "learning_rate": 1.2466298703760434e-05, "loss": 0.484, "step": 7104 }, { "epoch": 0.8646181928810466, "grad_norm": 0.9602317810058594, "learning_rate": 1.2464408220294483e-05, "loss": 0.4982, "step": 7105 }, { "epoch": 0.8647398843930636, "grad_norm": 1.0130568742752075, "learning_rate": 1.2462517643052704e-05, "loss": 0.4701, "step": 7106 }, { "epoch": 0.8648615759050806, "grad_norm": 1.9429272413253784, "learning_rate": 1.246062697210703e-05, "loss": 0.4102, "step": 7107 }, { "epoch": 0.8649832674170976, "grad_norm": 0.5669442415237427, "learning_rate": 1.2458736207529403e-05, "loss": 0.4701, "step": 7108 }, { "epoch": 0.8651049589291147, "grad_norm": 2.781632423400879, "learning_rate": 1.2456845349391776e-05, "loss": 0.5548, "step": 7109 }, { "epoch": 0.8652266504411318, "grad_norm": 1.8443169593811035, "learning_rate": 1.2454954397766099e-05, "loss": 0.455, "step": 7110 }, { "epoch": 0.8653483419531488, "grad_norm": 0.7192366719245911, "learning_rate": 1.2453063352724324e-05, "loss": 0.4606, "step": 7111 }, { "epoch": 0.8654700334651658, "grad_norm": 3.1163535118103027, "learning_rate": 1.2451172214338411e-05, "loss": 0.5635, "step": 7112 }, { "epoch": 0.8655917249771828, "grad_norm": 0.7690176367759705, "learning_rate": 1.2449280982680324e-05, "loss": 0.4425, "step": 7113 }, { "epoch": 0.8657134164891999, "grad_norm": 1.1607067584991455, "learning_rate": 1.2447389657822025e-05, "loss": 0.3777, "step": 7114 }, { "epoch": 0.8658351080012169, "grad_norm": 0.8777136206626892, "learning_rate": 1.2445498239835481e-05, "loss": 0.4317, "step": 7115 }, { "epoch": 0.8659567995132339, "grad_norm": 1.2214540243148804, "learning_rate": 1.2443606728792667e-05, "loss": 0.4019, "step": 7116 }, { "epoch": 0.866078491025251, "grad_norm": 0.7902207374572754, "learning_rate": 1.244171512476556e-05, "loss": 0.3849, "step": 7117 }, { "epoch": 0.866200182537268, "grad_norm": 3.835721969604492, "learning_rate": 1.2439823427826144e-05, "loss": 0.4813, "step": 7118 }, { "epoch": 0.8663218740492851, "grad_norm": 2.578134298324585, "learning_rate": 1.243793163804639e-05, "loss": 0.4414, "step": 7119 }, { "epoch": 0.8664435655613021, "grad_norm": 0.7175315618515015, "learning_rate": 1.2436039755498292e-05, "loss": 0.4005, "step": 7120 }, { "epoch": 0.8665652570733191, "grad_norm": 2.298964262008667, "learning_rate": 1.2434147780253837e-05, "loss": 0.4534, "step": 7121 }, { "epoch": 0.8666869485853361, "grad_norm": 1.3722517490386963, "learning_rate": 1.2432255712385025e-05, "loss": 0.427, "step": 7122 }, { "epoch": 0.8668086400973533, "grad_norm": 4.137610912322998, "learning_rate": 1.2430363551963849e-05, "loss": 0.5157, "step": 7123 }, { "epoch": 0.8669303316093703, "grad_norm": 0.8999399542808533, "learning_rate": 1.2428471299062306e-05, "loss": 0.4493, "step": 7124 }, { "epoch": 0.8670520231213873, "grad_norm": 0.6382309794425964, "learning_rate": 1.2426578953752404e-05, "loss": 0.4157, "step": 7125 }, { "epoch": 0.8671737146334043, "grad_norm": 2.151242971420288, "learning_rate": 1.2424686516106153e-05, "loss": 0.4643, "step": 7126 }, { "epoch": 0.8672954061454213, "grad_norm": 1.0443936586380005, "learning_rate": 1.2422793986195556e-05, "loss": 0.5304, "step": 7127 }, { "epoch": 0.8674170976574384, "grad_norm": 1.3880633115768433, "learning_rate": 1.2420901364092634e-05, "loss": 0.4677, "step": 7128 }, { "epoch": 0.8675387891694555, "grad_norm": 0.8804852366447449, "learning_rate": 1.2419008649869408e-05, "loss": 0.444, "step": 7129 }, { "epoch": 0.8676604806814725, "grad_norm": 3.4794905185699463, "learning_rate": 1.2417115843597896e-05, "loss": 0.4505, "step": 7130 }, { "epoch": 0.8677821721934895, "grad_norm": 1.386530876159668, "learning_rate": 1.2415222945350118e-05, "loss": 0.4642, "step": 7131 }, { "epoch": 0.8679038637055065, "grad_norm": 4.581042766571045, "learning_rate": 1.241332995519811e-05, "loss": 0.564, "step": 7132 }, { "epoch": 0.8680255552175236, "grad_norm": 2.8398358821868896, "learning_rate": 1.2411436873213902e-05, "loss": 0.4133, "step": 7133 }, { "epoch": 0.8681472467295406, "grad_norm": 1.2776179313659668, "learning_rate": 1.2409543699469531e-05, "loss": 0.479, "step": 7134 }, { "epoch": 0.8682689382415576, "grad_norm": 0.623054027557373, "learning_rate": 1.2407650434037036e-05, "loss": 0.4777, "step": 7135 }, { "epoch": 0.8683906297535747, "grad_norm": 1.7206213474273682, "learning_rate": 1.2405757076988454e-05, "loss": 0.4599, "step": 7136 }, { "epoch": 0.8685123212655917, "grad_norm": 2.46237850189209, "learning_rate": 1.2403863628395838e-05, "loss": 0.3843, "step": 7137 }, { "epoch": 0.8686340127776088, "grad_norm": 2.648427724838257, "learning_rate": 1.2401970088331234e-05, "loss": 0.4233, "step": 7138 }, { "epoch": 0.8687557042896258, "grad_norm": 1.5909925699234009, "learning_rate": 1.2400076456866696e-05, "loss": 0.4339, "step": 7139 }, { "epoch": 0.8688773958016428, "grad_norm": 1.2027496099472046, "learning_rate": 1.2398182734074282e-05, "loss": 0.4169, "step": 7140 }, { "epoch": 0.8689990873136598, "grad_norm": 1.9359771013259888, "learning_rate": 1.2396288920026055e-05, "loss": 0.4506, "step": 7141 }, { "epoch": 0.869120778825677, "grad_norm": 0.8780108690261841, "learning_rate": 1.2394395014794066e-05, "loss": 0.3983, "step": 7142 }, { "epoch": 0.869242470337694, "grad_norm": 2.922348976135254, "learning_rate": 1.2392501018450395e-05, "loss": 0.5137, "step": 7143 }, { "epoch": 0.869364161849711, "grad_norm": 2.718005418777466, "learning_rate": 1.239060693106711e-05, "loss": 0.4996, "step": 7144 }, { "epoch": 0.869485853361728, "grad_norm": 1.3465734720230103, "learning_rate": 1.2388712752716281e-05, "loss": 0.4439, "step": 7145 }, { "epoch": 0.869607544873745, "grad_norm": 1.294167399406433, "learning_rate": 1.238681848346999e-05, "loss": 0.3801, "step": 7146 }, { "epoch": 0.869729236385762, "grad_norm": 2.0159707069396973, "learning_rate": 1.238492412340031e-05, "loss": 0.5163, "step": 7147 }, { "epoch": 0.8698509278977792, "grad_norm": 0.953387975692749, "learning_rate": 1.2383029672579335e-05, "loss": 0.4632, "step": 7148 }, { "epoch": 0.8699726194097962, "grad_norm": 2.860001802444458, "learning_rate": 1.2381135131079145e-05, "loss": 0.4683, "step": 7149 }, { "epoch": 0.8700943109218132, "grad_norm": 1.1687285900115967, "learning_rate": 1.2379240498971837e-05, "loss": 0.4774, "step": 7150 }, { "epoch": 0.8702160024338302, "grad_norm": 3.3176209926605225, "learning_rate": 1.2377345776329502e-05, "loss": 0.4032, "step": 7151 }, { "epoch": 0.8703376939458473, "grad_norm": 0.8278681635856628, "learning_rate": 1.237545096322424e-05, "loss": 0.4556, "step": 7152 }, { "epoch": 0.8704593854578643, "grad_norm": 2.099942684173584, "learning_rate": 1.2373556059728153e-05, "loss": 0.4425, "step": 7153 }, { "epoch": 0.8705810769698813, "grad_norm": 1.1819173097610474, "learning_rate": 1.2371661065913343e-05, "loss": 0.4999, "step": 7154 }, { "epoch": 0.8707027684818984, "grad_norm": 2.0561752319335938, "learning_rate": 1.236976598185192e-05, "loss": 0.4994, "step": 7155 }, { "epoch": 0.8708244599939154, "grad_norm": 1.27903413772583, "learning_rate": 1.2367870807615998e-05, "loss": 0.4209, "step": 7156 }, { "epoch": 0.8709461515059325, "grad_norm": 1.2673401832580566, "learning_rate": 1.2365975543277688e-05, "loss": 0.4317, "step": 7157 }, { "epoch": 0.8710678430179495, "grad_norm": 2.4399564266204834, "learning_rate": 1.2364080188909116e-05, "loss": 0.418, "step": 7158 }, { "epoch": 0.8711895345299665, "grad_norm": 5.944895267486572, "learning_rate": 1.2362184744582396e-05, "loss": 0.5677, "step": 7159 }, { "epoch": 0.8713112260419835, "grad_norm": 3.566612958908081, "learning_rate": 1.2360289210369658e-05, "loss": 0.5076, "step": 7160 }, { "epoch": 0.8714329175540007, "grad_norm": 0.9285900592803955, "learning_rate": 1.235839358634303e-05, "loss": 0.4202, "step": 7161 }, { "epoch": 0.8715546090660177, "grad_norm": 0.7251654267311096, "learning_rate": 1.2356497872574642e-05, "loss": 0.4246, "step": 7162 }, { "epoch": 0.8716763005780347, "grad_norm": 0.7016144394874573, "learning_rate": 1.2354602069136636e-05, "loss": 0.4016, "step": 7163 }, { "epoch": 0.8717979920900517, "grad_norm": 2.1173007488250732, "learning_rate": 1.2352706176101147e-05, "loss": 0.4487, "step": 7164 }, { "epoch": 0.8719196836020687, "grad_norm": 1.1782103776931763, "learning_rate": 1.2350810193540318e-05, "loss": 0.4655, "step": 7165 }, { "epoch": 0.8720413751140857, "grad_norm": 2.83894419670105, "learning_rate": 1.2348914121526292e-05, "loss": 0.4468, "step": 7166 }, { "epoch": 0.8721630666261029, "grad_norm": 4.003988742828369, "learning_rate": 1.2347017960131225e-05, "loss": 0.4113, "step": 7167 }, { "epoch": 0.8722847581381199, "grad_norm": 2.185760259628296, "learning_rate": 1.2345121709427265e-05, "loss": 0.4315, "step": 7168 }, { "epoch": 0.8724064496501369, "grad_norm": 0.6458480954170227, "learning_rate": 1.2343225369486569e-05, "loss": 0.4639, "step": 7169 }, { "epoch": 0.8725281411621539, "grad_norm": 0.8225185871124268, "learning_rate": 1.23413289403813e-05, "loss": 0.4299, "step": 7170 }, { "epoch": 0.872649832674171, "grad_norm": 2.206825017929077, "learning_rate": 1.233943242218362e-05, "loss": 0.5521, "step": 7171 }, { "epoch": 0.872771524186188, "grad_norm": 1.1052758693695068, "learning_rate": 1.2337535814965688e-05, "loss": 0.4625, "step": 7172 }, { "epoch": 0.8728932156982051, "grad_norm": 1.6416510343551636, "learning_rate": 1.2335639118799682e-05, "loss": 0.459, "step": 7173 }, { "epoch": 0.8730149072102221, "grad_norm": 0.891686737537384, "learning_rate": 1.2333742333757776e-05, "loss": 0.4849, "step": 7174 }, { "epoch": 0.8731365987222391, "grad_norm": 0.7240790128707886, "learning_rate": 1.2331845459912144e-05, "loss": 0.4103, "step": 7175 }, { "epoch": 0.8732582902342562, "grad_norm": 0.6536197066307068, "learning_rate": 1.2329948497334963e-05, "loss": 0.443, "step": 7176 }, { "epoch": 0.8733799817462732, "grad_norm": 1.9579108953475952, "learning_rate": 1.2328051446098418e-05, "loss": 0.4965, "step": 7177 }, { "epoch": 0.8735016732582902, "grad_norm": 2.1566762924194336, "learning_rate": 1.2326154306274698e-05, "loss": 0.5056, "step": 7178 }, { "epoch": 0.8736233647703072, "grad_norm": 1.443878173828125, "learning_rate": 1.2324257077935993e-05, "loss": 0.4966, "step": 7179 }, { "epoch": 0.8737450562823244, "grad_norm": 1.3102716207504272, "learning_rate": 1.2322359761154493e-05, "loss": 0.4265, "step": 7180 }, { "epoch": 0.8738667477943414, "grad_norm": 0.9915708899497986, "learning_rate": 1.2320462356002399e-05, "loss": 0.476, "step": 7181 }, { "epoch": 0.8739884393063584, "grad_norm": 3.007075786590576, "learning_rate": 1.2318564862551908e-05, "loss": 0.424, "step": 7182 }, { "epoch": 0.8741101308183754, "grad_norm": 0.8018471002578735, "learning_rate": 1.2316667280875226e-05, "loss": 0.4277, "step": 7183 }, { "epoch": 0.8742318223303924, "grad_norm": 2.0959227085113525, "learning_rate": 1.2314769611044557e-05, "loss": 0.4004, "step": 7184 }, { "epoch": 0.8743535138424094, "grad_norm": 0.8163623809814453, "learning_rate": 1.2312871853132114e-05, "loss": 0.4715, "step": 7185 }, { "epoch": 0.8744752053544266, "grad_norm": 1.8603123426437378, "learning_rate": 1.231097400721011e-05, "loss": 0.4353, "step": 7186 }, { "epoch": 0.8745968968664436, "grad_norm": 0.8322412967681885, "learning_rate": 1.2309076073350761e-05, "loss": 0.4262, "step": 7187 }, { "epoch": 0.8747185883784606, "grad_norm": 0.9720268249511719, "learning_rate": 1.2307178051626287e-05, "loss": 0.4559, "step": 7188 }, { "epoch": 0.8748402798904776, "grad_norm": 0.630632758140564, "learning_rate": 1.2305279942108914e-05, "loss": 0.4697, "step": 7189 }, { "epoch": 0.8749619714024947, "grad_norm": 2.5271854400634766, "learning_rate": 1.2303381744870868e-05, "loss": 0.4924, "step": 7190 }, { "epoch": 0.8750836629145117, "grad_norm": 1.0941441059112549, "learning_rate": 1.2301483459984375e-05, "loss": 0.4539, "step": 7191 }, { "epoch": 0.8752053544265288, "grad_norm": 0.6484375596046448, "learning_rate": 1.2299585087521675e-05, "loss": 0.4354, "step": 7192 }, { "epoch": 0.8753270459385458, "grad_norm": 3.5244596004486084, "learning_rate": 1.2297686627555006e-05, "loss": 0.532, "step": 7193 }, { "epoch": 0.8754487374505628, "grad_norm": 0.7241256833076477, "learning_rate": 1.2295788080156604e-05, "loss": 0.434, "step": 7194 }, { "epoch": 0.8755704289625799, "grad_norm": 1.1277343034744263, "learning_rate": 1.229388944539871e-05, "loss": 0.497, "step": 7195 }, { "epoch": 0.8756921204745969, "grad_norm": 1.9367152452468872, "learning_rate": 1.2291990723353573e-05, "loss": 0.4692, "step": 7196 }, { "epoch": 0.8758138119866139, "grad_norm": 1.7133387327194214, "learning_rate": 1.229009191409345e-05, "loss": 0.5249, "step": 7197 }, { "epoch": 0.8759355034986309, "grad_norm": 2.437053680419922, "learning_rate": 1.2288193017690588e-05, "loss": 0.4532, "step": 7198 }, { "epoch": 0.876057195010648, "grad_norm": 1.1704226732254028, "learning_rate": 1.2286294034217243e-05, "loss": 0.4483, "step": 7199 }, { "epoch": 0.8761788865226651, "grad_norm": 2.379946708679199, "learning_rate": 1.2284394963745679e-05, "loss": 0.4841, "step": 7200 }, { "epoch": 0.8763005780346821, "grad_norm": 1.1669596433639526, "learning_rate": 1.228249580634816e-05, "loss": 0.4965, "step": 7201 }, { "epoch": 0.8764222695466991, "grad_norm": 1.0141856670379639, "learning_rate": 1.2280596562096947e-05, "loss": 0.464, "step": 7202 }, { "epoch": 0.8765439610587161, "grad_norm": 2.4254069328308105, "learning_rate": 1.2278697231064317e-05, "loss": 0.4599, "step": 7203 }, { "epoch": 0.8766656525707331, "grad_norm": 2.0517711639404297, "learning_rate": 1.2276797813322541e-05, "loss": 0.3785, "step": 7204 }, { "epoch": 0.8767873440827503, "grad_norm": 1.5930449962615967, "learning_rate": 1.2274898308943896e-05, "loss": 0.4806, "step": 7205 }, { "epoch": 0.8769090355947673, "grad_norm": 2.6375033855438232, "learning_rate": 1.227299871800066e-05, "loss": 0.4879, "step": 7206 }, { "epoch": 0.8770307271067843, "grad_norm": 3.8798134326934814, "learning_rate": 1.2271099040565118e-05, "loss": 0.5059, "step": 7207 }, { "epoch": 0.8771524186188013, "grad_norm": 2.800853729248047, "learning_rate": 1.2269199276709555e-05, "loss": 0.5118, "step": 7208 }, { "epoch": 0.8772741101308184, "grad_norm": 0.7687027454376221, "learning_rate": 1.2267299426506267e-05, "loss": 0.413, "step": 7209 }, { "epoch": 0.8773958016428354, "grad_norm": 4.138387203216553, "learning_rate": 1.226539949002754e-05, "loss": 0.5305, "step": 7210 }, { "epoch": 0.8775174931548525, "grad_norm": 0.5947757363319397, "learning_rate": 1.2263499467345673e-05, "loss": 0.4685, "step": 7211 }, { "epoch": 0.8776391846668695, "grad_norm": 0.7174993753433228, "learning_rate": 1.2261599358532965e-05, "loss": 0.4538, "step": 7212 }, { "epoch": 0.8777608761788865, "grad_norm": 1.968227505683899, "learning_rate": 1.2259699163661725e-05, "loss": 0.4028, "step": 7213 }, { "epoch": 0.8778825676909036, "grad_norm": 1.0651625394821167, "learning_rate": 1.225779888280425e-05, "loss": 0.4257, "step": 7214 }, { "epoch": 0.8780042592029206, "grad_norm": 2.9992117881774902, "learning_rate": 1.2255898516032853e-05, "loss": 0.4499, "step": 7215 }, { "epoch": 0.8781259507149376, "grad_norm": 1.5156244039535522, "learning_rate": 1.2253998063419852e-05, "loss": 0.4771, "step": 7216 }, { "epoch": 0.8782476422269546, "grad_norm": 0.7558187246322632, "learning_rate": 1.2252097525037558e-05, "loss": 0.4056, "step": 7217 }, { "epoch": 0.8783693337389717, "grad_norm": 0.6978296041488647, "learning_rate": 1.2250196900958291e-05, "loss": 0.41, "step": 7218 }, { "epoch": 0.8784910252509888, "grad_norm": 0.7302688360214233, "learning_rate": 1.2248296191254375e-05, "loss": 0.3786, "step": 7219 }, { "epoch": 0.8786127167630058, "grad_norm": 1.1655205488204956, "learning_rate": 1.2246395395998135e-05, "loss": 0.4495, "step": 7220 }, { "epoch": 0.8787344082750228, "grad_norm": 0.8397741317749023, "learning_rate": 1.2244494515261902e-05, "loss": 0.467, "step": 7221 }, { "epoch": 0.8788560997870398, "grad_norm": 2.247478723526001, "learning_rate": 1.2242593549118003e-05, "loss": 0.4866, "step": 7222 }, { "epoch": 0.8789777912990568, "grad_norm": 1.0904483795166016, "learning_rate": 1.2240692497638778e-05, "loss": 0.4166, "step": 7223 }, { "epoch": 0.879099482811074, "grad_norm": 0.6437256932258606, "learning_rate": 1.2238791360896569e-05, "loss": 0.3745, "step": 7224 }, { "epoch": 0.879221174323091, "grad_norm": 0.959119439125061, "learning_rate": 1.2236890138963711e-05, "loss": 0.4285, "step": 7225 }, { "epoch": 0.879342865835108, "grad_norm": 1.366894245147705, "learning_rate": 1.2234988831912553e-05, "loss": 0.4823, "step": 7226 }, { "epoch": 0.879464557347125, "grad_norm": 2.7522079944610596, "learning_rate": 1.2233087439815447e-05, "loss": 0.4738, "step": 7227 }, { "epoch": 0.879586248859142, "grad_norm": 1.0359946489334106, "learning_rate": 1.2231185962744742e-05, "loss": 0.4299, "step": 7228 }, { "epoch": 0.8797079403711591, "grad_norm": 1.0898159742355347, "learning_rate": 1.222928440077279e-05, "loss": 0.4624, "step": 7229 }, { "epoch": 0.8798296318831762, "grad_norm": 1.9964925050735474, "learning_rate": 1.2227382753971953e-05, "loss": 0.5015, "step": 7230 }, { "epoch": 0.8799513233951932, "grad_norm": 0.7144758701324463, "learning_rate": 1.2225481022414592e-05, "loss": 0.4422, "step": 7231 }, { "epoch": 0.8800730149072102, "grad_norm": 1.533362865447998, "learning_rate": 1.2223579206173071e-05, "loss": 0.4291, "step": 7232 }, { "epoch": 0.8801947064192273, "grad_norm": 1.6278109550476074, "learning_rate": 1.2221677305319762e-05, "loss": 0.4924, "step": 7233 }, { "epoch": 0.8803163979312443, "grad_norm": 0.7324231863021851, "learning_rate": 1.2219775319927027e-05, "loss": 0.4821, "step": 7234 }, { "epoch": 0.8804380894432613, "grad_norm": 2.150149345397949, "learning_rate": 1.2217873250067252e-05, "loss": 0.4658, "step": 7235 }, { "epoch": 0.8805597809552783, "grad_norm": 4.297105312347412, "learning_rate": 1.2215971095812805e-05, "loss": 0.4161, "step": 7236 }, { "epoch": 0.8806814724672954, "grad_norm": 1.6495928764343262, "learning_rate": 1.2214068857236072e-05, "loss": 0.4556, "step": 7237 }, { "epoch": 0.8808031639793125, "grad_norm": 1.1916217803955078, "learning_rate": 1.2212166534409436e-05, "loss": 0.4998, "step": 7238 }, { "epoch": 0.8809248554913295, "grad_norm": 1.4659065008163452, "learning_rate": 1.2210264127405287e-05, "loss": 0.4226, "step": 7239 }, { "epoch": 0.8810465470033465, "grad_norm": 1.8044829368591309, "learning_rate": 1.2208361636296012e-05, "loss": 0.4885, "step": 7240 }, { "epoch": 0.8811682385153635, "grad_norm": 0.8489356637001038, "learning_rate": 1.2206459061154004e-05, "loss": 0.4815, "step": 7241 }, { "epoch": 0.8812899300273805, "grad_norm": 1.1724481582641602, "learning_rate": 1.2204556402051659e-05, "loss": 0.4341, "step": 7242 }, { "epoch": 0.8814116215393977, "grad_norm": 0.7249171137809753, "learning_rate": 1.2202653659061385e-05, "loss": 0.4524, "step": 7243 }, { "epoch": 0.8815333130514147, "grad_norm": 2.6195907592773438, "learning_rate": 1.2200750832255578e-05, "loss": 0.3956, "step": 7244 }, { "epoch": 0.8816550045634317, "grad_norm": 1.4745408296585083, "learning_rate": 1.2198847921706646e-05, "loss": 0.4319, "step": 7245 }, { "epoch": 0.8817766960754487, "grad_norm": 2.000275135040283, "learning_rate": 1.2196944927487e-05, "loss": 0.4587, "step": 7246 }, { "epoch": 0.8818983875874657, "grad_norm": 1.536466121673584, "learning_rate": 1.2195041849669054e-05, "loss": 0.385, "step": 7247 }, { "epoch": 0.8820200790994828, "grad_norm": 1.8848366737365723, "learning_rate": 1.2193138688325218e-05, "loss": 0.4976, "step": 7248 }, { "epoch": 0.8821417706114999, "grad_norm": 2.3491923809051514, "learning_rate": 1.2191235443527919e-05, "loss": 0.4517, "step": 7249 }, { "epoch": 0.8822634621235169, "grad_norm": 0.7377593517303467, "learning_rate": 1.2189332115349573e-05, "loss": 0.4509, "step": 7250 }, { "epoch": 0.8823851536355339, "grad_norm": 2.4110500812530518, "learning_rate": 1.2187428703862612e-05, "loss": 0.5268, "step": 7251 }, { "epoch": 0.882506845147551, "grad_norm": 1.6337910890579224, "learning_rate": 1.2185525209139457e-05, "loss": 0.4134, "step": 7252 }, { "epoch": 0.882628536659568, "grad_norm": 0.6018592119216919, "learning_rate": 1.2183621631252544e-05, "loss": 0.4462, "step": 7253 }, { "epoch": 0.882750228171585, "grad_norm": 0.7575495839118958, "learning_rate": 1.2181717970274312e-05, "loss": 0.4576, "step": 7254 }, { "epoch": 0.882871919683602, "grad_norm": 1.188772439956665, "learning_rate": 1.2179814226277191e-05, "loss": 0.4451, "step": 7255 }, { "epoch": 0.8829936111956191, "grad_norm": 2.6152424812316895, "learning_rate": 1.217791039933363e-05, "loss": 0.5052, "step": 7256 }, { "epoch": 0.8831153027076362, "grad_norm": 3.8781511783599854, "learning_rate": 1.2176006489516068e-05, "loss": 0.3836, "step": 7257 }, { "epoch": 0.8832369942196532, "grad_norm": 0.968696117401123, "learning_rate": 1.2174102496896959e-05, "loss": 0.5065, "step": 7258 }, { "epoch": 0.8833586857316702, "grad_norm": 0.9881165623664856, "learning_rate": 1.2172198421548745e-05, "loss": 0.463, "step": 7259 }, { "epoch": 0.8834803772436872, "grad_norm": 1.793229341506958, "learning_rate": 1.2170294263543884e-05, "loss": 0.4729, "step": 7260 }, { "epoch": 0.8836020687557042, "grad_norm": 0.7428906559944153, "learning_rate": 1.216839002295484e-05, "loss": 0.4582, "step": 7261 }, { "epoch": 0.8837237602677214, "grad_norm": 1.8898204565048218, "learning_rate": 1.2166485699854064e-05, "loss": 0.4011, "step": 7262 }, { "epoch": 0.8838454517797384, "grad_norm": 0.725135326385498, "learning_rate": 1.2164581294314022e-05, "loss": 0.4659, "step": 7263 }, { "epoch": 0.8839671432917554, "grad_norm": 1.1981191635131836, "learning_rate": 1.2162676806407181e-05, "loss": 0.47, "step": 7264 }, { "epoch": 0.8840888348037724, "grad_norm": 1.1754435300827026, "learning_rate": 1.2160772236206014e-05, "loss": 0.4856, "step": 7265 }, { "epoch": 0.8842105263157894, "grad_norm": 2.904205322265625, "learning_rate": 1.2158867583782988e-05, "loss": 0.5361, "step": 7266 }, { "epoch": 0.8843322178278065, "grad_norm": 0.8026193380355835, "learning_rate": 1.2156962849210584e-05, "loss": 0.4773, "step": 7267 }, { "epoch": 0.8844539093398236, "grad_norm": 0.7588707208633423, "learning_rate": 1.2155058032561278e-05, "loss": 0.4551, "step": 7268 }, { "epoch": 0.8845756008518406, "grad_norm": 1.2164289951324463, "learning_rate": 1.2153153133907554e-05, "loss": 0.4417, "step": 7269 }, { "epoch": 0.8846972923638576, "grad_norm": 0.9452533721923828, "learning_rate": 1.2151248153321892e-05, "loss": 0.5107, "step": 7270 }, { "epoch": 0.8848189838758747, "grad_norm": 0.8283289074897766, "learning_rate": 1.2149343090876788e-05, "loss": 0.5, "step": 7271 }, { "epoch": 0.8849406753878917, "grad_norm": 0.9408752918243408, "learning_rate": 1.2147437946644731e-05, "loss": 0.544, "step": 7272 }, { "epoch": 0.8850623668999087, "grad_norm": 1.163599967956543, "learning_rate": 1.2145532720698219e-05, "loss": 0.5075, "step": 7273 }, { "epoch": 0.8851840584119258, "grad_norm": 3.3686656951904297, "learning_rate": 1.214362741310974e-05, "loss": 0.4778, "step": 7274 }, { "epoch": 0.8853057499239428, "grad_norm": 2.4053075313568115, "learning_rate": 1.2141722023951801e-05, "loss": 0.4655, "step": 7275 }, { "epoch": 0.8854274414359599, "grad_norm": 5.931481838226318, "learning_rate": 1.2139816553296908e-05, "loss": 0.4043, "step": 7276 }, { "epoch": 0.8855491329479769, "grad_norm": 3.4912824630737305, "learning_rate": 1.2137911001217565e-05, "loss": 0.4501, "step": 7277 }, { "epoch": 0.8856708244599939, "grad_norm": 0.6624214053153992, "learning_rate": 1.2136005367786284e-05, "loss": 0.4562, "step": 7278 }, { "epoch": 0.8857925159720109, "grad_norm": 0.6605298519134521, "learning_rate": 1.2134099653075579e-05, "loss": 0.4765, "step": 7279 }, { "epoch": 0.8859142074840279, "grad_norm": 2.066791296005249, "learning_rate": 1.213219385715796e-05, "loss": 0.3924, "step": 7280 }, { "epoch": 0.8860358989960451, "grad_norm": 0.65671706199646, "learning_rate": 1.2130287980105958e-05, "loss": 0.4331, "step": 7281 }, { "epoch": 0.8861575905080621, "grad_norm": 0.5998497605323792, "learning_rate": 1.2128382021992084e-05, "loss": 0.421, "step": 7282 }, { "epoch": 0.8862792820200791, "grad_norm": 2.2659995555877686, "learning_rate": 1.2126475982888868e-05, "loss": 0.4705, "step": 7283 }, { "epoch": 0.8864009735320961, "grad_norm": 0.7178171873092651, "learning_rate": 1.2124569862868842e-05, "loss": 0.3816, "step": 7284 }, { "epoch": 0.8865226650441131, "grad_norm": 4.186813831329346, "learning_rate": 1.2122663662004536e-05, "loss": 0.5712, "step": 7285 }, { "epoch": 0.8866443565561302, "grad_norm": 1.2549664974212646, "learning_rate": 1.2120757380368481e-05, "loss": 0.417, "step": 7286 }, { "epoch": 0.8867660480681473, "grad_norm": 2.6965174674987793, "learning_rate": 1.2118851018033219e-05, "loss": 0.4671, "step": 7287 }, { "epoch": 0.8868877395801643, "grad_norm": 2.5200355052948, "learning_rate": 1.2116944575071293e-05, "loss": 0.3725, "step": 7288 }, { "epoch": 0.8870094310921813, "grad_norm": 0.7925547957420349, "learning_rate": 1.2115038051555239e-05, "loss": 0.4236, "step": 7289 }, { "epoch": 0.8871311226041984, "grad_norm": 2.165656566619873, "learning_rate": 1.2113131447557614e-05, "loss": 0.4924, "step": 7290 }, { "epoch": 0.8872528141162154, "grad_norm": 0.9340012669563293, "learning_rate": 1.211122476315096e-05, "loss": 0.4407, "step": 7291 }, { "epoch": 0.8873745056282324, "grad_norm": 1.1947416067123413, "learning_rate": 1.2109317998407834e-05, "loss": 0.513, "step": 7292 }, { "epoch": 0.8874961971402495, "grad_norm": 2.4484124183654785, "learning_rate": 1.2107411153400793e-05, "loss": 0.406, "step": 7293 }, { "epoch": 0.8876178886522665, "grad_norm": 0.7745649814605713, "learning_rate": 1.2105504228202394e-05, "loss": 0.4893, "step": 7294 }, { "epoch": 0.8877395801642836, "grad_norm": 2.3933732509613037, "learning_rate": 1.2103597222885204e-05, "loss": 0.4451, "step": 7295 }, { "epoch": 0.8878612716763006, "grad_norm": 3.0880136489868164, "learning_rate": 1.2101690137521785e-05, "loss": 0.4411, "step": 7296 }, { "epoch": 0.8879829631883176, "grad_norm": 2.0262298583984375, "learning_rate": 1.2099782972184704e-05, "loss": 0.4405, "step": 7297 }, { "epoch": 0.8881046547003346, "grad_norm": 2.956406593322754, "learning_rate": 1.2097875726946535e-05, "loss": 0.4667, "step": 7298 }, { "epoch": 0.8882263462123516, "grad_norm": 2.091343402862549, "learning_rate": 1.2095968401879854e-05, "loss": 0.3916, "step": 7299 }, { "epoch": 0.8883480377243688, "grad_norm": 1.1204673051834106, "learning_rate": 1.2094060997057233e-05, "loss": 0.439, "step": 7300 }, { "epoch": 0.8884697292363858, "grad_norm": 1.1490085124969482, "learning_rate": 1.2092153512551258e-05, "loss": 0.4843, "step": 7301 }, { "epoch": 0.8885914207484028, "grad_norm": 0.8195436596870422, "learning_rate": 1.2090245948434514e-05, "loss": 0.3827, "step": 7302 }, { "epoch": 0.8887131122604198, "grad_norm": 1.1624146699905396, "learning_rate": 1.2088338304779586e-05, "loss": 0.5068, "step": 7303 }, { "epoch": 0.8888348037724368, "grad_norm": 2.6067113876342773, "learning_rate": 1.2086430581659058e-05, "loss": 0.4732, "step": 7304 }, { "epoch": 0.8889564952844539, "grad_norm": 5.265781402587891, "learning_rate": 1.2084522779145531e-05, "loss": 0.5815, "step": 7305 }, { "epoch": 0.889078186796471, "grad_norm": 1.8413618803024292, "learning_rate": 1.2082614897311595e-05, "loss": 0.4129, "step": 7306 }, { "epoch": 0.889199878308488, "grad_norm": 2.391411542892456, "learning_rate": 1.2080706936229854e-05, "loss": 0.4605, "step": 7307 }, { "epoch": 0.889321569820505, "grad_norm": 0.9443608522415161, "learning_rate": 1.2078798895972908e-05, "loss": 0.4439, "step": 7308 }, { "epoch": 0.889443261332522, "grad_norm": 2.8269126415252686, "learning_rate": 1.207689077661336e-05, "loss": 0.4953, "step": 7309 }, { "epoch": 0.8895649528445391, "grad_norm": 0.7635732293128967, "learning_rate": 1.2074982578223815e-05, "loss": 0.4872, "step": 7310 }, { "epoch": 0.8896866443565561, "grad_norm": 2.8566296100616455, "learning_rate": 1.2073074300876893e-05, "loss": 0.45, "step": 7311 }, { "epoch": 0.8898083358685732, "grad_norm": 1.6522257328033447, "learning_rate": 1.20711659446452e-05, "loss": 0.4648, "step": 7312 }, { "epoch": 0.8899300273805902, "grad_norm": 4.044620513916016, "learning_rate": 1.2069257509601356e-05, "loss": 0.4064, "step": 7313 }, { "epoch": 0.8900517188926073, "grad_norm": 4.702152252197266, "learning_rate": 1.2067348995817983e-05, "loss": 0.3677, "step": 7314 }, { "epoch": 0.8901734104046243, "grad_norm": 1.3512136936187744, "learning_rate": 1.20654404033677e-05, "loss": 0.4851, "step": 7315 }, { "epoch": 0.8902951019166413, "grad_norm": 1.169438123703003, "learning_rate": 1.2063531732323135e-05, "loss": 0.4954, "step": 7316 }, { "epoch": 0.8904167934286583, "grad_norm": 1.094239592552185, "learning_rate": 1.2061622982756916e-05, "loss": 0.4881, "step": 7317 }, { "epoch": 0.8905384849406753, "grad_norm": 1.7024352550506592, "learning_rate": 1.2059714154741675e-05, "loss": 0.4796, "step": 7318 }, { "epoch": 0.8906601764526925, "grad_norm": 1.6726089715957642, "learning_rate": 1.2057805248350047e-05, "loss": 0.5149, "step": 7319 }, { "epoch": 0.8907818679647095, "grad_norm": 1.5365747213363647, "learning_rate": 1.205589626365467e-05, "loss": 0.4815, "step": 7320 }, { "epoch": 0.8909035594767265, "grad_norm": 1.5698299407958984, "learning_rate": 1.2053987200728183e-05, "loss": 0.4367, "step": 7321 }, { "epoch": 0.8910252509887435, "grad_norm": 1.539196252822876, "learning_rate": 1.2052078059643235e-05, "loss": 0.4727, "step": 7322 }, { "epoch": 0.8911469425007605, "grad_norm": 1.279540777206421, "learning_rate": 1.2050168840472466e-05, "loss": 0.4997, "step": 7323 }, { "epoch": 0.8912686340127776, "grad_norm": 5.007602214813232, "learning_rate": 1.2048259543288529e-05, "loss": 0.3912, "step": 7324 }, { "epoch": 0.8913903255247947, "grad_norm": 2.6805882453918457, "learning_rate": 1.204635016816408e-05, "loss": 0.4709, "step": 7325 }, { "epoch": 0.8915120170368117, "grad_norm": 0.5695067048072815, "learning_rate": 1.204444071517177e-05, "loss": 0.4758, "step": 7326 }, { "epoch": 0.8916337085488287, "grad_norm": 3.1602389812469482, "learning_rate": 1.2042531184384258e-05, "loss": 0.4345, "step": 7327 }, { "epoch": 0.8917554000608457, "grad_norm": 3.4763336181640625, "learning_rate": 1.2040621575874208e-05, "loss": 0.3752, "step": 7328 }, { "epoch": 0.8918770915728628, "grad_norm": 0.6043762564659119, "learning_rate": 1.2038711889714282e-05, "loss": 0.4491, "step": 7329 }, { "epoch": 0.8919987830848798, "grad_norm": 2.324209213256836, "learning_rate": 1.2036802125977149e-05, "loss": 0.4579, "step": 7330 }, { "epoch": 0.8921204745968969, "grad_norm": 1.1331285238265991, "learning_rate": 1.203489228473548e-05, "loss": 0.4105, "step": 7331 }, { "epoch": 0.8922421661089139, "grad_norm": 1.273290991783142, "learning_rate": 1.2032982366061945e-05, "loss": 0.3622, "step": 7332 }, { "epoch": 0.892363857620931, "grad_norm": 3.2276227474212646, "learning_rate": 1.2031072370029227e-05, "loss": 0.4421, "step": 7333 }, { "epoch": 0.892485549132948, "grad_norm": 4.9920148849487305, "learning_rate": 1.2029162296709998e-05, "loss": 0.5286, "step": 7334 }, { "epoch": 0.892607240644965, "grad_norm": 4.205452919006348, "learning_rate": 1.2027252146176943e-05, "loss": 0.5436, "step": 7335 }, { "epoch": 0.892728932156982, "grad_norm": 1.5160728693008423, "learning_rate": 1.2025341918502748e-05, "loss": 0.3205, "step": 7336 }, { "epoch": 0.892850623668999, "grad_norm": 3.0665640830993652, "learning_rate": 1.2023431613760106e-05, "loss": 0.4703, "step": 7337 }, { "epoch": 0.8929723151810162, "grad_norm": 0.7929785847663879, "learning_rate": 1.2021521232021698e-05, "loss": 0.4062, "step": 7338 }, { "epoch": 0.8930940066930332, "grad_norm": 1.8136169910430908, "learning_rate": 1.2019610773360222e-05, "loss": 0.4626, "step": 7339 }, { "epoch": 0.8932156982050502, "grad_norm": 0.8245934247970581, "learning_rate": 1.2017700237848375e-05, "loss": 0.4614, "step": 7340 }, { "epoch": 0.8933373897170672, "grad_norm": 1.2006621360778809, "learning_rate": 1.2015789625558862e-05, "loss": 0.4077, "step": 7341 }, { "epoch": 0.8934590812290842, "grad_norm": 0.699004054069519, "learning_rate": 1.201387893656438e-05, "loss": 0.4188, "step": 7342 }, { "epoch": 0.8935807727411013, "grad_norm": 1.2466466426849365, "learning_rate": 1.2011968170937634e-05, "loss": 0.4306, "step": 7343 }, { "epoch": 0.8937024642531184, "grad_norm": 1.9796028137207031, "learning_rate": 1.2010057328751335e-05, "loss": 0.4271, "step": 7344 }, { "epoch": 0.8938241557651354, "grad_norm": 2.442113161087036, "learning_rate": 1.2008146410078195e-05, "loss": 0.3871, "step": 7345 }, { "epoch": 0.8939458472771524, "grad_norm": 0.6641497611999512, "learning_rate": 1.2006235414990925e-05, "loss": 0.4854, "step": 7346 }, { "epoch": 0.8940675387891694, "grad_norm": 1.1614047288894653, "learning_rate": 1.2004324343562246e-05, "loss": 0.4383, "step": 7347 }, { "epoch": 0.8941892303011865, "grad_norm": 1.7455962896347046, "learning_rate": 1.200241319586488e-05, "loss": 0.4646, "step": 7348 }, { "epoch": 0.8943109218132035, "grad_norm": 0.7281454205513, "learning_rate": 1.2000501971971546e-05, "loss": 0.4736, "step": 7349 }, { "epoch": 0.8944326133252206, "grad_norm": 3.365340232849121, "learning_rate": 1.1998590671954969e-05, "loss": 0.4921, "step": 7350 }, { "epoch": 0.8945543048372376, "grad_norm": 0.8367118835449219, "learning_rate": 1.1996679295887881e-05, "loss": 0.411, "step": 7351 }, { "epoch": 0.8946759963492547, "grad_norm": 1.1888952255249023, "learning_rate": 1.1994767843843013e-05, "loss": 0.4386, "step": 7352 }, { "epoch": 0.8947976878612717, "grad_norm": 1.3032678365707397, "learning_rate": 1.19928563158931e-05, "loss": 0.4851, "step": 7353 }, { "epoch": 0.8949193793732887, "grad_norm": 2.7483127117156982, "learning_rate": 1.199094471211088e-05, "loss": 0.3948, "step": 7354 }, { "epoch": 0.8950410708853057, "grad_norm": 0.7429040670394897, "learning_rate": 1.1989033032569091e-05, "loss": 0.4517, "step": 7355 }, { "epoch": 0.8951627623973228, "grad_norm": 0.5595561265945435, "learning_rate": 1.198712127734048e-05, "loss": 0.4758, "step": 7356 }, { "epoch": 0.8952844539093399, "grad_norm": 1.5698339939117432, "learning_rate": 1.1985209446497788e-05, "loss": 0.5223, "step": 7357 }, { "epoch": 0.8954061454213569, "grad_norm": 3.6477086544036865, "learning_rate": 1.1983297540113767e-05, "loss": 0.3842, "step": 7358 }, { "epoch": 0.8955278369333739, "grad_norm": 1.0380009412765503, "learning_rate": 1.1981385558261173e-05, "loss": 0.4511, "step": 7359 }, { "epoch": 0.8956495284453909, "grad_norm": 1.0342826843261719, "learning_rate": 1.1979473501012757e-05, "loss": 0.4634, "step": 7360 }, { "epoch": 0.8957712199574079, "grad_norm": 1.539221167564392, "learning_rate": 1.1977561368441275e-05, "loss": 0.481, "step": 7361 }, { "epoch": 0.895892911469425, "grad_norm": 0.6270670890808105, "learning_rate": 1.1975649160619488e-05, "loss": 0.4722, "step": 7362 }, { "epoch": 0.8960146029814421, "grad_norm": 0.6505113840103149, "learning_rate": 1.1973736877620166e-05, "loss": 0.464, "step": 7363 }, { "epoch": 0.8961362944934591, "grad_norm": 2.1215133666992188, "learning_rate": 1.1971824519516066e-05, "loss": 0.4914, "step": 7364 }, { "epoch": 0.8962579860054761, "grad_norm": 0.8316892981529236, "learning_rate": 1.1969912086379968e-05, "loss": 0.4485, "step": 7365 }, { "epoch": 0.8963796775174931, "grad_norm": 0.6590808033943176, "learning_rate": 1.196799957828463e-05, "loss": 0.4684, "step": 7366 }, { "epoch": 0.8965013690295102, "grad_norm": 1.3061925172805786, "learning_rate": 1.196608699530284e-05, "loss": 0.4935, "step": 7367 }, { "epoch": 0.8966230605415272, "grad_norm": 2.2511823177337646, "learning_rate": 1.1964174337507366e-05, "loss": 0.4563, "step": 7368 }, { "epoch": 0.8967447520535443, "grad_norm": 1.2181216478347778, "learning_rate": 1.1962261604970994e-05, "loss": 0.4609, "step": 7369 }, { "epoch": 0.8968664435655613, "grad_norm": 0.5540208220481873, "learning_rate": 1.1960348797766505e-05, "loss": 0.4736, "step": 7370 }, { "epoch": 0.8969881350775784, "grad_norm": 0.9309449195861816, "learning_rate": 1.1958435915966692e-05, "loss": 0.4526, "step": 7371 }, { "epoch": 0.8971098265895954, "grad_norm": 1.24196195602417, "learning_rate": 1.1956522959644334e-05, "loss": 0.4407, "step": 7372 }, { "epoch": 0.8972315181016124, "grad_norm": 1.3524045944213867, "learning_rate": 1.1954609928872229e-05, "loss": 0.4742, "step": 7373 }, { "epoch": 0.8973532096136294, "grad_norm": 1.753662347793579, "learning_rate": 1.1952696823723169e-05, "loss": 0.4339, "step": 7374 }, { "epoch": 0.8974749011256465, "grad_norm": 2.8566055297851562, "learning_rate": 1.1950783644269956e-05, "loss": 0.4882, "step": 7375 }, { "epoch": 0.8975965926376636, "grad_norm": 2.9239728450775146, "learning_rate": 1.1948870390585386e-05, "loss": 0.5488, "step": 7376 }, { "epoch": 0.8977182841496806, "grad_norm": 1.1236060857772827, "learning_rate": 1.1946957062742263e-05, "loss": 0.4221, "step": 7377 }, { "epoch": 0.8978399756616976, "grad_norm": 0.7916250824928284, "learning_rate": 1.1945043660813393e-05, "loss": 0.4487, "step": 7378 }, { "epoch": 0.8979616671737146, "grad_norm": 0.6620454788208008, "learning_rate": 1.1943130184871588e-05, "loss": 0.4188, "step": 7379 }, { "epoch": 0.8980833586857316, "grad_norm": 2.3451781272888184, "learning_rate": 1.1941216634989656e-05, "loss": 0.4309, "step": 7380 }, { "epoch": 0.8982050501977487, "grad_norm": 0.7819168567657471, "learning_rate": 1.1939303011240413e-05, "loss": 0.4162, "step": 7381 }, { "epoch": 0.8983267417097658, "grad_norm": 1.7895784378051758, "learning_rate": 1.1937389313696677e-05, "loss": 0.5011, "step": 7382 }, { "epoch": 0.8984484332217828, "grad_norm": 2.3792643547058105, "learning_rate": 1.1935475542431268e-05, "loss": 0.5144, "step": 7383 }, { "epoch": 0.8985701247337998, "grad_norm": 2.1358370780944824, "learning_rate": 1.1933561697517006e-05, "loss": 0.4459, "step": 7384 }, { "epoch": 0.8986918162458168, "grad_norm": 0.6661283373832703, "learning_rate": 1.193164777902672e-05, "loss": 0.4378, "step": 7385 }, { "epoch": 0.8988135077578339, "grad_norm": 0.6997846364974976, "learning_rate": 1.192973378703324e-05, "loss": 0.4555, "step": 7386 }, { "epoch": 0.8989351992698509, "grad_norm": 4.277323246002197, "learning_rate": 1.192781972160939e-05, "loss": 0.5582, "step": 7387 }, { "epoch": 0.899056890781868, "grad_norm": 1.2798309326171875, "learning_rate": 1.1925905582828015e-05, "loss": 0.4706, "step": 7388 }, { "epoch": 0.899178582293885, "grad_norm": 2.09159517288208, "learning_rate": 1.1923991370761942e-05, "loss": 0.4634, "step": 7389 }, { "epoch": 0.899300273805902, "grad_norm": 1.8460427522659302, "learning_rate": 1.1922077085484018e-05, "loss": 0.5384, "step": 7390 }, { "epoch": 0.8994219653179191, "grad_norm": 3.0884695053100586, "learning_rate": 1.1920162727067082e-05, "loss": 0.4529, "step": 7391 }, { "epoch": 0.8995436568299361, "grad_norm": 1.5712168216705322, "learning_rate": 1.1918248295583977e-05, "loss": 0.5111, "step": 7392 }, { "epoch": 0.8996653483419531, "grad_norm": 2.340404510498047, "learning_rate": 1.1916333791107558e-05, "loss": 0.4486, "step": 7393 }, { "epoch": 0.8997870398539702, "grad_norm": 3.346893787384033, "learning_rate": 1.1914419213710669e-05, "loss": 0.4525, "step": 7394 }, { "epoch": 0.8999087313659873, "grad_norm": 2.9239792823791504, "learning_rate": 1.1912504563466165e-05, "loss": 0.4413, "step": 7395 }, { "epoch": 0.9000304228780043, "grad_norm": 1.453131914138794, "learning_rate": 1.1910589840446904e-05, "loss": 0.4343, "step": 7396 }, { "epoch": 0.9001521143900213, "grad_norm": 0.6888946890830994, "learning_rate": 1.1908675044725749e-05, "loss": 0.4343, "step": 7397 }, { "epoch": 0.9002738059020383, "grad_norm": 1.2008459568023682, "learning_rate": 1.1906760176375551e-05, "loss": 0.4219, "step": 7398 }, { "epoch": 0.9003954974140553, "grad_norm": 1.9007642269134521, "learning_rate": 1.1904845235469184e-05, "loss": 0.451, "step": 7399 }, { "epoch": 0.9005171889260724, "grad_norm": 3.8408656120300293, "learning_rate": 1.1902930222079516e-05, "loss": 0.5274, "step": 7400 }, { "epoch": 0.9006388804380895, "grad_norm": 1.6050820350646973, "learning_rate": 1.1901015136279415e-05, "loss": 0.4759, "step": 7401 }, { "epoch": 0.9007605719501065, "grad_norm": 1.1178021430969238, "learning_rate": 1.1899099978141748e-05, "loss": 0.4682, "step": 7402 }, { "epoch": 0.9008822634621235, "grad_norm": 0.864290714263916, "learning_rate": 1.1897184747739398e-05, "loss": 0.444, "step": 7403 }, { "epoch": 0.9010039549741405, "grad_norm": 1.1707119941711426, "learning_rate": 1.1895269445145241e-05, "loss": 0.4448, "step": 7404 }, { "epoch": 0.9011256464861576, "grad_norm": 2.937156915664673, "learning_rate": 1.1893354070432161e-05, "loss": 0.5074, "step": 7405 }, { "epoch": 0.9012473379981746, "grad_norm": 0.6468366384506226, "learning_rate": 1.1891438623673039e-05, "loss": 0.4692, "step": 7406 }, { "epoch": 0.9013690295101917, "grad_norm": 0.8036524057388306, "learning_rate": 1.1889523104940762e-05, "loss": 0.4641, "step": 7407 }, { "epoch": 0.9014907210222087, "grad_norm": 2.6297693252563477, "learning_rate": 1.1887607514308218e-05, "loss": 0.4292, "step": 7408 }, { "epoch": 0.9016124125342257, "grad_norm": 4.296541213989258, "learning_rate": 1.1885691851848302e-05, "loss": 0.4809, "step": 7409 }, { "epoch": 0.9017341040462428, "grad_norm": 3.5419514179229736, "learning_rate": 1.1883776117633907e-05, "loss": 0.4349, "step": 7410 }, { "epoch": 0.9018557955582598, "grad_norm": 2.7667031288146973, "learning_rate": 1.1881860311737934e-05, "loss": 0.4575, "step": 7411 }, { "epoch": 0.9019774870702768, "grad_norm": 3.723479986190796, "learning_rate": 1.187994443423328e-05, "loss": 0.4109, "step": 7412 }, { "epoch": 0.9020991785822939, "grad_norm": 2.6736536026000977, "learning_rate": 1.187802848519285e-05, "loss": 0.4412, "step": 7413 }, { "epoch": 0.902220870094311, "grad_norm": 0.7212310433387756, "learning_rate": 1.1876112464689547e-05, "loss": 0.4576, "step": 7414 }, { "epoch": 0.902342561606328, "grad_norm": 1.5392414331436157, "learning_rate": 1.1874196372796278e-05, "loss": 0.4342, "step": 7415 }, { "epoch": 0.902464253118345, "grad_norm": 1.6171127557754517, "learning_rate": 1.1872280209585965e-05, "loss": 0.4437, "step": 7416 }, { "epoch": 0.902585944630362, "grad_norm": 5.273656845092773, "learning_rate": 1.1870363975131511e-05, "loss": 0.5091, "step": 7417 }, { "epoch": 0.902707636142379, "grad_norm": 2.4460678100585938, "learning_rate": 1.1868447669505836e-05, "loss": 0.5266, "step": 7418 }, { "epoch": 0.902829327654396, "grad_norm": 2.832026958465576, "learning_rate": 1.1866531292781858e-05, "loss": 0.4937, "step": 7419 }, { "epoch": 0.9029510191664132, "grad_norm": 2.171494245529175, "learning_rate": 1.1864614845032504e-05, "loss": 0.4102, "step": 7420 }, { "epoch": 0.9030727106784302, "grad_norm": 1.6888043880462646, "learning_rate": 1.1862698326330694e-05, "loss": 0.449, "step": 7421 }, { "epoch": 0.9031944021904472, "grad_norm": 0.7823889255523682, "learning_rate": 1.1860781736749355e-05, "loss": 0.4129, "step": 7422 }, { "epoch": 0.9033160937024642, "grad_norm": 0.9548131227493286, "learning_rate": 1.1858865076361423e-05, "loss": 0.4522, "step": 7423 }, { "epoch": 0.9034377852144813, "grad_norm": 0.9475173354148865, "learning_rate": 1.1856948345239827e-05, "loss": 0.4368, "step": 7424 }, { "epoch": 0.9035594767264983, "grad_norm": 1.3620988130569458, "learning_rate": 1.1855031543457502e-05, "loss": 0.4828, "step": 7425 }, { "epoch": 0.9036811682385154, "grad_norm": 0.6987718343734741, "learning_rate": 1.1853114671087387e-05, "loss": 0.4597, "step": 7426 }, { "epoch": 0.9038028597505324, "grad_norm": 3.693128824234009, "learning_rate": 1.1851197728202423e-05, "loss": 0.4131, "step": 7427 }, { "epoch": 0.9039245512625494, "grad_norm": 2.660182237625122, "learning_rate": 1.1849280714875552e-05, "loss": 0.4346, "step": 7428 }, { "epoch": 0.9040462427745665, "grad_norm": 3.2342629432678223, "learning_rate": 1.1847363631179724e-05, "loss": 0.402, "step": 7429 }, { "epoch": 0.9041679342865835, "grad_norm": 1.0060064792633057, "learning_rate": 1.1845446477187886e-05, "loss": 0.4664, "step": 7430 }, { "epoch": 0.9042896257986005, "grad_norm": 0.7507879734039307, "learning_rate": 1.184352925297299e-05, "loss": 0.432, "step": 7431 }, { "epoch": 0.9044113173106176, "grad_norm": 2.0956578254699707, "learning_rate": 1.1841611958607988e-05, "loss": 0.5218, "step": 7432 }, { "epoch": 0.9045330088226347, "grad_norm": 3.350637674331665, "learning_rate": 1.183969459416584e-05, "loss": 0.5159, "step": 7433 }, { "epoch": 0.9046547003346517, "grad_norm": 2.215989351272583, "learning_rate": 1.1837777159719506e-05, "loss": 0.4057, "step": 7434 }, { "epoch": 0.9047763918466687, "grad_norm": 1.168872594833374, "learning_rate": 1.1835859655341948e-05, "loss": 0.3942, "step": 7435 }, { "epoch": 0.9048980833586857, "grad_norm": 0.8087822794914246, "learning_rate": 1.1833942081106127e-05, "loss": 0.4598, "step": 7436 }, { "epoch": 0.9050197748707027, "grad_norm": 0.8854578137397766, "learning_rate": 1.1832024437085015e-05, "loss": 0.4295, "step": 7437 }, { "epoch": 0.9051414663827198, "grad_norm": 1.3707044124603271, "learning_rate": 1.1830106723351578e-05, "loss": 0.433, "step": 7438 }, { "epoch": 0.9052631578947369, "grad_norm": 2.760690212249756, "learning_rate": 1.1828188939978798e-05, "loss": 0.5251, "step": 7439 }, { "epoch": 0.9053848494067539, "grad_norm": 0.8044907450675964, "learning_rate": 1.1826271087039643e-05, "loss": 0.3961, "step": 7440 }, { "epoch": 0.9055065409187709, "grad_norm": 2.011630058288574, "learning_rate": 1.182435316460709e-05, "loss": 0.504, "step": 7441 }, { "epoch": 0.9056282324307879, "grad_norm": 1.7443996667861938, "learning_rate": 1.1822435172754123e-05, "loss": 0.4619, "step": 7442 }, { "epoch": 0.905749923942805, "grad_norm": 1.1376187801361084, "learning_rate": 1.1820517111553729e-05, "loss": 0.4551, "step": 7443 }, { "epoch": 0.905871615454822, "grad_norm": 0.6837798953056335, "learning_rate": 1.1818598981078888e-05, "loss": 0.4405, "step": 7444 }, { "epoch": 0.9059933069668391, "grad_norm": 0.5845446586608887, "learning_rate": 1.1816680781402592e-05, "loss": 0.4486, "step": 7445 }, { "epoch": 0.9061149984788561, "grad_norm": 3.576133966445923, "learning_rate": 1.1814762512597833e-05, "loss": 0.5182, "step": 7446 }, { "epoch": 0.9062366899908731, "grad_norm": 2.3765969276428223, "learning_rate": 1.1812844174737603e-05, "loss": 0.5145, "step": 7447 }, { "epoch": 0.9063583815028902, "grad_norm": 1.5969698429107666, "learning_rate": 1.18109257678949e-05, "loss": 0.4703, "step": 7448 }, { "epoch": 0.9064800730149072, "grad_norm": 0.770078182220459, "learning_rate": 1.1809007292142723e-05, "loss": 0.533, "step": 7449 }, { "epoch": 0.9066017645269242, "grad_norm": 5.47883415222168, "learning_rate": 1.1807088747554074e-05, "loss": 0.4112, "step": 7450 }, { "epoch": 0.9067234560389413, "grad_norm": 0.868976891040802, "learning_rate": 1.1805170134201957e-05, "loss": 0.4785, "step": 7451 }, { "epoch": 0.9068451475509584, "grad_norm": 4.280722618103027, "learning_rate": 1.1803251452159383e-05, "loss": 0.4133, "step": 7452 }, { "epoch": 0.9069668390629754, "grad_norm": 2.8576579093933105, "learning_rate": 1.1801332701499355e-05, "loss": 0.4578, "step": 7453 }, { "epoch": 0.9070885305749924, "grad_norm": 1.9192330837249756, "learning_rate": 1.1799413882294893e-05, "loss": 0.4472, "step": 7454 }, { "epoch": 0.9072102220870094, "grad_norm": 1.2525372505187988, "learning_rate": 1.1797494994619003e-05, "loss": 0.4659, "step": 7455 }, { "epoch": 0.9073319135990264, "grad_norm": 0.9913723468780518, "learning_rate": 1.1795576038544709e-05, "loss": 0.4375, "step": 7456 }, { "epoch": 0.9074536051110436, "grad_norm": 1.3874493837356567, "learning_rate": 1.1793657014145034e-05, "loss": 0.4531, "step": 7457 }, { "epoch": 0.9075752966230606, "grad_norm": 2.199550151824951, "learning_rate": 1.1791737921492997e-05, "loss": 0.5044, "step": 7458 }, { "epoch": 0.9076969881350776, "grad_norm": 1.8949652910232544, "learning_rate": 1.178981876066162e-05, "loss": 0.5028, "step": 7459 }, { "epoch": 0.9078186796470946, "grad_norm": 1.4145840406417847, "learning_rate": 1.1787899531723935e-05, "loss": 0.3584, "step": 7460 }, { "epoch": 0.9079403711591116, "grad_norm": 0.9962152242660522, "learning_rate": 1.1785980234752975e-05, "loss": 0.3894, "step": 7461 }, { "epoch": 0.9080620626711287, "grad_norm": 4.3158721923828125, "learning_rate": 1.1784060869821766e-05, "loss": 0.5194, "step": 7462 }, { "epoch": 0.9081837541831457, "grad_norm": 2.5999600887298584, "learning_rate": 1.178214143700335e-05, "loss": 0.5032, "step": 7463 }, { "epoch": 0.9083054456951628, "grad_norm": 2.517180919647217, "learning_rate": 1.1780221936370761e-05, "loss": 0.4338, "step": 7464 }, { "epoch": 0.9084271372071798, "grad_norm": 2.1384212970733643, "learning_rate": 1.1778302367997048e-05, "loss": 0.4647, "step": 7465 }, { "epoch": 0.9085488287191968, "grad_norm": 2.1684670448303223, "learning_rate": 1.1776382731955245e-05, "loss": 0.4728, "step": 7466 }, { "epoch": 0.9086705202312139, "grad_norm": 3.1083970069885254, "learning_rate": 1.1774463028318401e-05, "loss": 0.3706, "step": 7467 }, { "epoch": 0.9087922117432309, "grad_norm": 1.0006368160247803, "learning_rate": 1.1772543257159568e-05, "loss": 0.4787, "step": 7468 }, { "epoch": 0.9089139032552479, "grad_norm": 0.7188969850540161, "learning_rate": 1.1770623418551798e-05, "loss": 0.4405, "step": 7469 }, { "epoch": 0.909035594767265, "grad_norm": 3.0520830154418945, "learning_rate": 1.1768703512568139e-05, "loss": 0.4465, "step": 7470 }, { "epoch": 0.909157286279282, "grad_norm": 2.5445644855499268, "learning_rate": 1.176678353928165e-05, "loss": 0.464, "step": 7471 }, { "epoch": 0.9092789777912991, "grad_norm": 1.9943443536758423, "learning_rate": 1.1764863498765388e-05, "loss": 0.4769, "step": 7472 }, { "epoch": 0.9094006693033161, "grad_norm": 1.370458960533142, "learning_rate": 1.1762943391092421e-05, "loss": 0.4377, "step": 7473 }, { "epoch": 0.9095223608153331, "grad_norm": 1.6611050367355347, "learning_rate": 1.1761023216335807e-05, "loss": 0.4448, "step": 7474 }, { "epoch": 0.9096440523273501, "grad_norm": 3.1339404582977295, "learning_rate": 1.1759102974568616e-05, "loss": 0.497, "step": 7475 }, { "epoch": 0.9097657438393673, "grad_norm": 1.0105726718902588, "learning_rate": 1.1757182665863913e-05, "loss": 0.445, "step": 7476 }, { "epoch": 0.9098874353513843, "grad_norm": 0.903846025466919, "learning_rate": 1.1755262290294776e-05, "loss": 0.456, "step": 7477 }, { "epoch": 0.9100091268634013, "grad_norm": 1.3151462078094482, "learning_rate": 1.1753341847934272e-05, "loss": 0.3545, "step": 7478 }, { "epoch": 0.9101308183754183, "grad_norm": 3.2192325592041016, "learning_rate": 1.1751421338855483e-05, "loss": 0.5171, "step": 7479 }, { "epoch": 0.9102525098874353, "grad_norm": 0.8245891332626343, "learning_rate": 1.1749500763131488e-05, "loss": 0.4194, "step": 7480 }, { "epoch": 0.9103742013994524, "grad_norm": 4.182288646697998, "learning_rate": 1.1747580120835367e-05, "loss": 0.4919, "step": 7481 }, { "epoch": 0.9104958929114694, "grad_norm": 1.492456316947937, "learning_rate": 1.1745659412040202e-05, "loss": 0.3937, "step": 7482 }, { "epoch": 0.9106175844234865, "grad_norm": 4.322916507720947, "learning_rate": 1.1743738636819087e-05, "loss": 0.5162, "step": 7483 }, { "epoch": 0.9107392759355035, "grad_norm": 5.514195442199707, "learning_rate": 1.1741817795245107e-05, "loss": 0.5864, "step": 7484 }, { "epoch": 0.9108609674475205, "grad_norm": 0.722213089466095, "learning_rate": 1.1739896887391353e-05, "loss": 0.4183, "step": 7485 }, { "epoch": 0.9109826589595376, "grad_norm": 1.453924298286438, "learning_rate": 1.173797591333092e-05, "loss": 0.4374, "step": 7486 }, { "epoch": 0.9111043504715546, "grad_norm": 1.0098438262939453, "learning_rate": 1.1736054873136909e-05, "loss": 0.444, "step": 7487 }, { "epoch": 0.9112260419835716, "grad_norm": 0.6750814318656921, "learning_rate": 1.1734133766882416e-05, "loss": 0.42, "step": 7488 }, { "epoch": 0.9113477334955887, "grad_norm": 0.855018138885498, "learning_rate": 1.1732212594640541e-05, "loss": 0.4508, "step": 7489 }, { "epoch": 0.9114694250076057, "grad_norm": 0.7046476602554321, "learning_rate": 1.1730291356484394e-05, "loss": 0.4117, "step": 7490 }, { "epoch": 0.9115911165196228, "grad_norm": 2.677027702331543, "learning_rate": 1.1728370052487082e-05, "loss": 0.4285, "step": 7491 }, { "epoch": 0.9117128080316398, "grad_norm": 1.3213434219360352, "learning_rate": 1.172644868272171e-05, "loss": 0.4502, "step": 7492 }, { "epoch": 0.9118344995436568, "grad_norm": 0.5881977677345276, "learning_rate": 1.1724527247261389e-05, "loss": 0.4433, "step": 7493 }, { "epoch": 0.9119561910556738, "grad_norm": 1.3601247072219849, "learning_rate": 1.1722605746179237e-05, "loss": 0.4671, "step": 7494 }, { "epoch": 0.912077882567691, "grad_norm": 1.056037187576294, "learning_rate": 1.1720684179548373e-05, "loss": 0.4797, "step": 7495 }, { "epoch": 0.912199574079708, "grad_norm": 1.241981863975525, "learning_rate": 1.1718762547441913e-05, "loss": 0.4758, "step": 7496 }, { "epoch": 0.912321265591725, "grad_norm": 1.5421791076660156, "learning_rate": 1.171684084993298e-05, "loss": 0.4242, "step": 7497 }, { "epoch": 0.912442957103742, "grad_norm": 3.0606703758239746, "learning_rate": 1.1714919087094703e-05, "loss": 0.369, "step": 7498 }, { "epoch": 0.912564648615759, "grad_norm": 0.6550858616828918, "learning_rate": 1.1712997259000203e-05, "loss": 0.4452, "step": 7499 }, { "epoch": 0.912686340127776, "grad_norm": 2.153909683227539, "learning_rate": 1.1711075365722608e-05, "loss": 0.4649, "step": 7500 }, { "epoch": 0.9128080316397931, "grad_norm": 2.1597583293914795, "learning_rate": 1.1709153407335057e-05, "loss": 0.3629, "step": 7501 }, { "epoch": 0.9129297231518102, "grad_norm": 1.0668424367904663, "learning_rate": 1.170723138391068e-05, "loss": 0.4301, "step": 7502 }, { "epoch": 0.9130514146638272, "grad_norm": 1.500740647315979, "learning_rate": 1.1705309295522615e-05, "loss": 0.4778, "step": 7503 }, { "epoch": 0.9131731061758442, "grad_norm": 2.7584002017974854, "learning_rate": 1.1703387142244005e-05, "loss": 0.4581, "step": 7504 }, { "epoch": 0.9132947976878613, "grad_norm": 1.1044843196868896, "learning_rate": 1.1701464924147983e-05, "loss": 0.4057, "step": 7505 }, { "epoch": 0.9134164891998783, "grad_norm": 2.9148340225219727, "learning_rate": 1.1699542641307701e-05, "loss": 0.4766, "step": 7506 }, { "epoch": 0.9135381807118953, "grad_norm": 2.5233004093170166, "learning_rate": 1.1697620293796306e-05, "loss": 0.4927, "step": 7507 }, { "epoch": 0.9136598722239124, "grad_norm": 3.836001396179199, "learning_rate": 1.1695697881686941e-05, "loss": 0.5249, "step": 7508 }, { "epoch": 0.9137815637359294, "grad_norm": 0.6965628266334534, "learning_rate": 1.1693775405052764e-05, "loss": 0.4303, "step": 7509 }, { "epoch": 0.9139032552479465, "grad_norm": 1.6198863983154297, "learning_rate": 1.1691852863966926e-05, "loss": 0.4665, "step": 7510 }, { "epoch": 0.9140249467599635, "grad_norm": 2.0401527881622314, "learning_rate": 1.1689930258502587e-05, "loss": 0.415, "step": 7511 }, { "epoch": 0.9141466382719805, "grad_norm": 0.9270548820495605, "learning_rate": 1.16880075887329e-05, "loss": 0.4653, "step": 7512 }, { "epoch": 0.9142683297839975, "grad_norm": 3.1184487342834473, "learning_rate": 1.168608485473103e-05, "loss": 0.3977, "step": 7513 }, { "epoch": 0.9143900212960147, "grad_norm": 1.2735332250595093, "learning_rate": 1.1684162056570146e-05, "loss": 0.4227, "step": 7514 }, { "epoch": 0.9145117128080317, "grad_norm": 5.183312892913818, "learning_rate": 1.1682239194323408e-05, "loss": 0.3778, "step": 7515 }, { "epoch": 0.9146334043200487, "grad_norm": 0.7662824988365173, "learning_rate": 1.1680316268063984e-05, "loss": 0.4914, "step": 7516 }, { "epoch": 0.9147550958320657, "grad_norm": 2.325381278991699, "learning_rate": 1.167839327786505e-05, "loss": 0.4475, "step": 7517 }, { "epoch": 0.9148767873440827, "grad_norm": 0.6889393925666809, "learning_rate": 1.1676470223799776e-05, "loss": 0.4448, "step": 7518 }, { "epoch": 0.9149984788560998, "grad_norm": 0.9693259596824646, "learning_rate": 1.1674547105941341e-05, "loss": 0.4316, "step": 7519 }, { "epoch": 0.9151201703681168, "grad_norm": 0.8678448796272278, "learning_rate": 1.1672623924362922e-05, "loss": 0.4413, "step": 7520 }, { "epoch": 0.9152418618801339, "grad_norm": 1.315632939338684, "learning_rate": 1.1670700679137703e-05, "loss": 0.4647, "step": 7521 }, { "epoch": 0.9153635533921509, "grad_norm": 2.9123716354370117, "learning_rate": 1.1668777370338864e-05, "loss": 0.484, "step": 7522 }, { "epoch": 0.9154852449041679, "grad_norm": 1.3041127920150757, "learning_rate": 1.166685399803959e-05, "loss": 0.4509, "step": 7523 }, { "epoch": 0.915606936416185, "grad_norm": 1.2023916244506836, "learning_rate": 1.166493056231307e-05, "loss": 0.4204, "step": 7524 }, { "epoch": 0.915728627928202, "grad_norm": 1.6076616048812866, "learning_rate": 1.1663007063232501e-05, "loss": 0.4671, "step": 7525 }, { "epoch": 0.915850319440219, "grad_norm": 1.4111597537994385, "learning_rate": 1.1661083500871066e-05, "loss": 0.5001, "step": 7526 }, { "epoch": 0.9159720109522361, "grad_norm": 0.858390748500824, "learning_rate": 1.1659159875301968e-05, "loss": 0.4709, "step": 7527 }, { "epoch": 0.9160937024642531, "grad_norm": 1.0628103017807007, "learning_rate": 1.1657236186598401e-05, "loss": 0.493, "step": 7528 }, { "epoch": 0.9162153939762702, "grad_norm": 2.3629496097564697, "learning_rate": 1.165531243483357e-05, "loss": 0.4638, "step": 7529 }, { "epoch": 0.9163370854882872, "grad_norm": 5.7145867347717285, "learning_rate": 1.1653388620080672e-05, "loss": 0.3881, "step": 7530 }, { "epoch": 0.9164587770003042, "grad_norm": 2.803727865219116, "learning_rate": 1.1651464742412915e-05, "loss": 0.4856, "step": 7531 }, { "epoch": 0.9165804685123212, "grad_norm": 1.3399631977081299, "learning_rate": 1.1649540801903506e-05, "loss": 0.51, "step": 7532 }, { "epoch": 0.9167021600243384, "grad_norm": 3.168067455291748, "learning_rate": 1.1647616798625659e-05, "loss": 0.4752, "step": 7533 }, { "epoch": 0.9168238515363554, "grad_norm": 1.5800386667251587, "learning_rate": 1.1645692732652577e-05, "loss": 0.4972, "step": 7534 }, { "epoch": 0.9169455430483724, "grad_norm": 2.391519069671631, "learning_rate": 1.1643768604057482e-05, "loss": 0.4715, "step": 7535 }, { "epoch": 0.9170672345603894, "grad_norm": 2.9794158935546875, "learning_rate": 1.1641844412913588e-05, "loss": 0.3831, "step": 7536 }, { "epoch": 0.9171889260724064, "grad_norm": 1.9917330741882324, "learning_rate": 1.1639920159294119e-05, "loss": 0.5072, "step": 7537 }, { "epoch": 0.9173106175844234, "grad_norm": 2.534503698348999, "learning_rate": 1.163799584327229e-05, "loss": 0.4568, "step": 7538 }, { "epoch": 0.9174323090964405, "grad_norm": 3.330085515975952, "learning_rate": 1.163607146492133e-05, "loss": 0.4921, "step": 7539 }, { "epoch": 0.9175540006084576, "grad_norm": 2.5798494815826416, "learning_rate": 1.1634147024314463e-05, "loss": 0.5148, "step": 7540 }, { "epoch": 0.9176756921204746, "grad_norm": 3.7334725856781006, "learning_rate": 1.1632222521524923e-05, "loss": 0.5113, "step": 7541 }, { "epoch": 0.9177973836324916, "grad_norm": 4.550693988800049, "learning_rate": 1.1630297956625934e-05, "loss": 0.5152, "step": 7542 }, { "epoch": 0.9179190751445087, "grad_norm": 0.8807277083396912, "learning_rate": 1.1628373329690732e-05, "loss": 0.4625, "step": 7543 }, { "epoch": 0.9180407666565257, "grad_norm": 2.635573625564575, "learning_rate": 1.1626448640792558e-05, "loss": 0.4093, "step": 7544 }, { "epoch": 0.9181624581685427, "grad_norm": 1.0053194761276245, "learning_rate": 1.1624523890004646e-05, "loss": 0.4681, "step": 7545 }, { "epoch": 0.9182841496805598, "grad_norm": 1.5999778509140015, "learning_rate": 1.1622599077400236e-05, "loss": 0.4843, "step": 7546 }, { "epoch": 0.9184058411925768, "grad_norm": 1.8159563541412354, "learning_rate": 1.1620674203052574e-05, "loss": 0.436, "step": 7547 }, { "epoch": 0.9185275327045939, "grad_norm": 1.4748505353927612, "learning_rate": 1.1618749267034904e-05, "loss": 0.4914, "step": 7548 }, { "epoch": 0.9186492242166109, "grad_norm": 4.531437397003174, "learning_rate": 1.161682426942047e-05, "loss": 0.4053, "step": 7549 }, { "epoch": 0.9187709157286279, "grad_norm": 2.7811291217803955, "learning_rate": 1.1614899210282531e-05, "loss": 0.4047, "step": 7550 }, { "epoch": 0.9188926072406449, "grad_norm": 1.8396155834197998, "learning_rate": 1.161297408969433e-05, "loss": 0.4214, "step": 7551 }, { "epoch": 0.919014298752662, "grad_norm": 0.8901538252830505, "learning_rate": 1.161104890772913e-05, "loss": 0.447, "step": 7552 }, { "epoch": 0.9191359902646791, "grad_norm": 1.542142629623413, "learning_rate": 1.1609123664460183e-05, "loss": 0.4491, "step": 7553 }, { "epoch": 0.9192576817766961, "grad_norm": 2.397416114807129, "learning_rate": 1.1607198359960748e-05, "loss": 0.4692, "step": 7554 }, { "epoch": 0.9193793732887131, "grad_norm": 0.9102994799613953, "learning_rate": 1.1605272994304091e-05, "loss": 0.426, "step": 7555 }, { "epoch": 0.9195010648007301, "grad_norm": 0.5880528092384338, "learning_rate": 1.1603347567563474e-05, "loss": 0.3992, "step": 7556 }, { "epoch": 0.9196227563127471, "grad_norm": 0.758091926574707, "learning_rate": 1.1601422079812163e-05, "loss": 0.4286, "step": 7557 }, { "epoch": 0.9197444478247643, "grad_norm": 3.115398406982422, "learning_rate": 1.1599496531123426e-05, "loss": 0.4922, "step": 7558 }, { "epoch": 0.9198661393367813, "grad_norm": 3.4865224361419678, "learning_rate": 1.1597570921570536e-05, "loss": 0.4673, "step": 7559 }, { "epoch": 0.9199878308487983, "grad_norm": 2.4543771743774414, "learning_rate": 1.1595645251226766e-05, "loss": 0.4802, "step": 7560 }, { "epoch": 0.9201095223608153, "grad_norm": 1.5124155282974243, "learning_rate": 1.1593719520165392e-05, "loss": 0.4534, "step": 7561 }, { "epoch": 0.9202312138728324, "grad_norm": 0.8431416153907776, "learning_rate": 1.1591793728459689e-05, "loss": 0.4483, "step": 7562 }, { "epoch": 0.9203529053848494, "grad_norm": 0.9718037843704224, "learning_rate": 1.1589867876182941e-05, "loss": 0.4385, "step": 7563 }, { "epoch": 0.9204745968968664, "grad_norm": 2.2607533931732178, "learning_rate": 1.1587941963408429e-05, "loss": 0.3986, "step": 7564 }, { "epoch": 0.9205962884088835, "grad_norm": 2.767716407775879, "learning_rate": 1.1586015990209439e-05, "loss": 0.4265, "step": 7565 }, { "epoch": 0.9207179799209005, "grad_norm": 1.0288968086242676, "learning_rate": 1.1584089956659254e-05, "loss": 0.4565, "step": 7566 }, { "epoch": 0.9208396714329176, "grad_norm": 0.7784706354141235, "learning_rate": 1.1582163862831175e-05, "loss": 0.5245, "step": 7567 }, { "epoch": 0.9209613629449346, "grad_norm": 0.5957329869270325, "learning_rate": 1.1580237708798482e-05, "loss": 0.4513, "step": 7568 }, { "epoch": 0.9210830544569516, "grad_norm": 2.8319318294525146, "learning_rate": 1.1578311494634474e-05, "loss": 0.5012, "step": 7569 }, { "epoch": 0.9212047459689686, "grad_norm": 1.967776894569397, "learning_rate": 1.1576385220412442e-05, "loss": 0.4083, "step": 7570 }, { "epoch": 0.9213264374809857, "grad_norm": 1.3437954187393188, "learning_rate": 1.1574458886205698e-05, "loss": 0.3761, "step": 7571 }, { "epoch": 0.9214481289930028, "grad_norm": 0.6504002809524536, "learning_rate": 1.1572532492087527e-05, "loss": 0.4464, "step": 7572 }, { "epoch": 0.9215698205050198, "grad_norm": 0.5987673401832581, "learning_rate": 1.1570606038131245e-05, "loss": 0.4289, "step": 7573 }, { "epoch": 0.9216915120170368, "grad_norm": 2.900508165359497, "learning_rate": 1.1568679524410147e-05, "loss": 0.4928, "step": 7574 }, { "epoch": 0.9218132035290538, "grad_norm": 0.6257543563842773, "learning_rate": 1.156675295099755e-05, "loss": 0.4191, "step": 7575 }, { "epoch": 0.9219348950410708, "grad_norm": 1.9884703159332275, "learning_rate": 1.156482631796676e-05, "loss": 0.458, "step": 7576 }, { "epoch": 0.922056586553088, "grad_norm": 1.192807912826538, "learning_rate": 1.1562899625391086e-05, "loss": 0.4594, "step": 7577 }, { "epoch": 0.922178278065105, "grad_norm": 1.2598100900650024, "learning_rate": 1.1560972873343852e-05, "loss": 0.4703, "step": 7578 }, { "epoch": 0.922299969577122, "grad_norm": 1.4153900146484375, "learning_rate": 1.1559046061898367e-05, "loss": 0.4706, "step": 7579 }, { "epoch": 0.922421661089139, "grad_norm": 0.853300929069519, "learning_rate": 1.155711919112795e-05, "loss": 0.4465, "step": 7580 }, { "epoch": 0.922543352601156, "grad_norm": 0.5372827053070068, "learning_rate": 1.1555192261105925e-05, "loss": 0.4326, "step": 7581 }, { "epoch": 0.9226650441131731, "grad_norm": 0.7183429598808289, "learning_rate": 1.1553265271905619e-05, "loss": 0.4755, "step": 7582 }, { "epoch": 0.9227867356251901, "grad_norm": 3.2192203998565674, "learning_rate": 1.155133822360035e-05, "loss": 0.4516, "step": 7583 }, { "epoch": 0.9229084271372072, "grad_norm": 4.470743656158447, "learning_rate": 1.1549411116263454e-05, "loss": 0.4699, "step": 7584 }, { "epoch": 0.9230301186492242, "grad_norm": 2.28182053565979, "learning_rate": 1.1547483949968254e-05, "loss": 0.4633, "step": 7585 }, { "epoch": 0.9231518101612413, "grad_norm": 1.8521881103515625, "learning_rate": 1.154555672478809e-05, "loss": 0.4858, "step": 7586 }, { "epoch": 0.9232735016732583, "grad_norm": 2.6726293563842773, "learning_rate": 1.1543629440796291e-05, "loss": 0.479, "step": 7587 }, { "epoch": 0.9233951931852753, "grad_norm": 4.213736534118652, "learning_rate": 1.1541702098066199e-05, "loss": 0.4363, "step": 7588 }, { "epoch": 0.9235168846972923, "grad_norm": 1.0938345193862915, "learning_rate": 1.1539774696671151e-05, "loss": 0.4592, "step": 7589 }, { "epoch": 0.9236385762093094, "grad_norm": 0.6823290586471558, "learning_rate": 1.1537847236684487e-05, "loss": 0.4207, "step": 7590 }, { "epoch": 0.9237602677213265, "grad_norm": 1.2085717916488647, "learning_rate": 1.1535919718179554e-05, "loss": 0.4407, "step": 7591 }, { "epoch": 0.9238819592333435, "grad_norm": 0.6506874561309814, "learning_rate": 1.1533992141229693e-05, "loss": 0.448, "step": 7592 }, { "epoch": 0.9240036507453605, "grad_norm": 3.0533831119537354, "learning_rate": 1.153206450590826e-05, "loss": 0.4598, "step": 7593 }, { "epoch": 0.9241253422573775, "grad_norm": 3.3513081073760986, "learning_rate": 1.15301368122886e-05, "loss": 0.5091, "step": 7594 }, { "epoch": 0.9242470337693945, "grad_norm": 0.7972692251205444, "learning_rate": 1.1528209060444065e-05, "loss": 0.4345, "step": 7595 }, { "epoch": 0.9243687252814117, "grad_norm": 1.1738011837005615, "learning_rate": 1.1526281250448015e-05, "loss": 0.3849, "step": 7596 }, { "epoch": 0.9244904167934287, "grad_norm": 0.7687472105026245, "learning_rate": 1.1524353382373806e-05, "loss": 0.4731, "step": 7597 }, { "epoch": 0.9246121083054457, "grad_norm": 1.6522984504699707, "learning_rate": 1.152242545629479e-05, "loss": 0.4614, "step": 7598 }, { "epoch": 0.9247337998174627, "grad_norm": 2.1395158767700195, "learning_rate": 1.1520497472284337e-05, "loss": 0.4891, "step": 7599 }, { "epoch": 0.9248554913294798, "grad_norm": 1.6859644651412964, "learning_rate": 1.1518569430415806e-05, "loss": 0.4822, "step": 7600 }, { "epoch": 0.9249771828414968, "grad_norm": 3.8348793983459473, "learning_rate": 1.1516641330762567e-05, "loss": 0.566, "step": 7601 }, { "epoch": 0.9250988743535138, "grad_norm": 1.5323084592819214, "learning_rate": 1.1514713173397989e-05, "loss": 0.4458, "step": 7602 }, { "epoch": 0.9252205658655309, "grad_norm": 0.7786366939544678, "learning_rate": 1.1512784958395436e-05, "loss": 0.4926, "step": 7603 }, { "epoch": 0.9253422573775479, "grad_norm": 2.2099411487579346, "learning_rate": 1.1510856685828283e-05, "loss": 0.4048, "step": 7604 }, { "epoch": 0.925463948889565, "grad_norm": 3.047236204147339, "learning_rate": 1.1508928355769907e-05, "loss": 0.4442, "step": 7605 }, { "epoch": 0.925585640401582, "grad_norm": 3.9911813735961914, "learning_rate": 1.1506999968293683e-05, "loss": 0.4097, "step": 7606 }, { "epoch": 0.925707331913599, "grad_norm": 4.025291442871094, "learning_rate": 1.1505071523472992e-05, "loss": 0.4271, "step": 7607 }, { "epoch": 0.925829023425616, "grad_norm": 0.8336036205291748, "learning_rate": 1.1503143021381213e-05, "loss": 0.4559, "step": 7608 }, { "epoch": 0.9259507149376331, "grad_norm": 0.9317759871482849, "learning_rate": 1.1501214462091734e-05, "loss": 0.4526, "step": 7609 }, { "epoch": 0.9260724064496502, "grad_norm": 2.1263556480407715, "learning_rate": 1.1499285845677934e-05, "loss": 0.3997, "step": 7610 }, { "epoch": 0.9261940979616672, "grad_norm": 1.8488959074020386, "learning_rate": 1.1497357172213204e-05, "loss": 0.5209, "step": 7611 }, { "epoch": 0.9263157894736842, "grad_norm": 0.9055743217468262, "learning_rate": 1.1495428441770937e-05, "loss": 0.4589, "step": 7612 }, { "epoch": 0.9264374809857012, "grad_norm": 0.6781041622161865, "learning_rate": 1.1493499654424523e-05, "loss": 0.4829, "step": 7613 }, { "epoch": 0.9265591724977182, "grad_norm": 2.101365804672241, "learning_rate": 1.1491570810247351e-05, "loss": 0.5071, "step": 7614 }, { "epoch": 0.9266808640097354, "grad_norm": 1.1475825309753418, "learning_rate": 1.1489641909312825e-05, "loss": 0.4539, "step": 7615 }, { "epoch": 0.9268025555217524, "grad_norm": 1.1658658981323242, "learning_rate": 1.148771295169434e-05, "loss": 0.4879, "step": 7616 }, { "epoch": 0.9269242470337694, "grad_norm": 2.036994695663452, "learning_rate": 1.14857839374653e-05, "loss": 0.4839, "step": 7617 }, { "epoch": 0.9270459385457864, "grad_norm": 0.8018247485160828, "learning_rate": 1.1483854866699102e-05, "loss": 0.5041, "step": 7618 }, { "epoch": 0.9271676300578034, "grad_norm": 2.9389092922210693, "learning_rate": 1.148192573946916e-05, "loss": 0.4927, "step": 7619 }, { "epoch": 0.9272893215698205, "grad_norm": 2.154796838760376, "learning_rate": 1.1479996555848874e-05, "loss": 0.4972, "step": 7620 }, { "epoch": 0.9274110130818375, "grad_norm": 2.108656406402588, "learning_rate": 1.1478067315911653e-05, "loss": 0.5127, "step": 7621 }, { "epoch": 0.9275327045938546, "grad_norm": 3.507293224334717, "learning_rate": 1.1476138019730913e-05, "loss": 0.4675, "step": 7622 }, { "epoch": 0.9276543961058716, "grad_norm": 2.8766088485717773, "learning_rate": 1.147420866738007e-05, "loss": 0.4614, "step": 7623 }, { "epoch": 0.9277760876178887, "grad_norm": 2.3016607761383057, "learning_rate": 1.147227925893253e-05, "loss": 0.5216, "step": 7624 }, { "epoch": 0.9278977791299057, "grad_norm": 1.2048115730285645, "learning_rate": 1.1470349794461719e-05, "loss": 0.5709, "step": 7625 }, { "epoch": 0.9280194706419227, "grad_norm": 0.7632753849029541, "learning_rate": 1.1468420274041054e-05, "loss": 0.477, "step": 7626 }, { "epoch": 0.9281411621539397, "grad_norm": 4.178420543670654, "learning_rate": 1.1466490697743962e-05, "loss": 0.4384, "step": 7627 }, { "epoch": 0.9282628536659568, "grad_norm": 1.5677729845046997, "learning_rate": 1.1464561065643858e-05, "loss": 0.453, "step": 7628 }, { "epoch": 0.9283845451779739, "grad_norm": 0.7407684326171875, "learning_rate": 1.1462631377814175e-05, "loss": 0.481, "step": 7629 }, { "epoch": 0.9285062366899909, "grad_norm": 2.8354055881500244, "learning_rate": 1.1460701634328344e-05, "loss": 0.5165, "step": 7630 }, { "epoch": 0.9286279282020079, "grad_norm": 1.4617078304290771, "learning_rate": 1.1458771835259787e-05, "loss": 0.4801, "step": 7631 }, { "epoch": 0.9287496197140249, "grad_norm": 2.3162219524383545, "learning_rate": 1.1456841980681948e-05, "loss": 0.4921, "step": 7632 }, { "epoch": 0.9288713112260419, "grad_norm": 1.2323050498962402, "learning_rate": 1.1454912070668254e-05, "loss": 0.4334, "step": 7633 }, { "epoch": 0.9289930027380591, "grad_norm": 2.2230920791625977, "learning_rate": 1.1452982105292141e-05, "loss": 0.4812, "step": 7634 }, { "epoch": 0.9291146942500761, "grad_norm": 0.9971070289611816, "learning_rate": 1.1451052084627055e-05, "loss": 0.4549, "step": 7635 }, { "epoch": 0.9292363857620931, "grad_norm": 2.170532703399658, "learning_rate": 1.1449122008746434e-05, "loss": 0.4398, "step": 7636 }, { "epoch": 0.9293580772741101, "grad_norm": 1.6843416690826416, "learning_rate": 1.144719187772372e-05, "loss": 0.4763, "step": 7637 }, { "epoch": 0.9294797687861271, "grad_norm": 0.6193882822990417, "learning_rate": 1.144526169163236e-05, "loss": 0.3935, "step": 7638 }, { "epoch": 0.9296014602981442, "grad_norm": 0.9456354379653931, "learning_rate": 1.14433314505458e-05, "loss": 0.4211, "step": 7639 }, { "epoch": 0.9297231518101612, "grad_norm": 2.8768813610076904, "learning_rate": 1.144140115453749e-05, "loss": 0.4857, "step": 7640 }, { "epoch": 0.9298448433221783, "grad_norm": 0.8512792587280273, "learning_rate": 1.1439470803680884e-05, "loss": 0.4374, "step": 7641 }, { "epoch": 0.9299665348341953, "grad_norm": 1.271427035331726, "learning_rate": 1.1437540398049433e-05, "loss": 0.4334, "step": 7642 }, { "epoch": 0.9300882263462124, "grad_norm": 1.7403056621551514, "learning_rate": 1.1435609937716599e-05, "loss": 0.399, "step": 7643 }, { "epoch": 0.9302099178582294, "grad_norm": 1.6461127996444702, "learning_rate": 1.143367942275583e-05, "loss": 0.4877, "step": 7644 }, { "epoch": 0.9303316093702464, "grad_norm": 1.8353902101516724, "learning_rate": 1.1431748853240591e-05, "loss": 0.4439, "step": 7645 }, { "epoch": 0.9304533008822634, "grad_norm": 2.511136293411255, "learning_rate": 1.1429818229244349e-05, "loss": 0.4193, "step": 7646 }, { "epoch": 0.9305749923942805, "grad_norm": 2.807276725769043, "learning_rate": 1.142788755084056e-05, "loss": 0.3767, "step": 7647 }, { "epoch": 0.9306966839062976, "grad_norm": 0.7150379419326782, "learning_rate": 1.1425956818102696e-05, "loss": 0.4879, "step": 7648 }, { "epoch": 0.9308183754183146, "grad_norm": 3.0271072387695312, "learning_rate": 1.1424026031104223e-05, "loss": 0.4003, "step": 7649 }, { "epoch": 0.9309400669303316, "grad_norm": 2.6720077991485596, "learning_rate": 1.1422095189918614e-05, "loss": 0.436, "step": 7650 }, { "epoch": 0.9310617584423486, "grad_norm": 0.6751669645309448, "learning_rate": 1.1420164294619336e-05, "loss": 0.4393, "step": 7651 }, { "epoch": 0.9311834499543656, "grad_norm": 2.4788801670074463, "learning_rate": 1.1418233345279868e-05, "loss": 0.3788, "step": 7652 }, { "epoch": 0.9313051414663828, "grad_norm": 1.3087700605392456, "learning_rate": 1.1416302341973689e-05, "loss": 0.4118, "step": 7653 }, { "epoch": 0.9314268329783998, "grad_norm": 2.1150686740875244, "learning_rate": 1.141437128477427e-05, "loss": 0.4446, "step": 7654 }, { "epoch": 0.9315485244904168, "grad_norm": 3.462644338607788, "learning_rate": 1.1412440173755098e-05, "loss": 0.4835, "step": 7655 }, { "epoch": 0.9316702160024338, "grad_norm": 2.551018714904785, "learning_rate": 1.1410509008989652e-05, "loss": 0.5146, "step": 7656 }, { "epoch": 0.9317919075144508, "grad_norm": 2.713292121887207, "learning_rate": 1.140857779055142e-05, "loss": 0.4853, "step": 7657 }, { "epoch": 0.9319135990264679, "grad_norm": 0.8535588979721069, "learning_rate": 1.1406646518513888e-05, "loss": 0.4075, "step": 7658 }, { "epoch": 0.932035290538485, "grad_norm": 1.7017369270324707, "learning_rate": 1.1404715192950543e-05, "loss": 0.4578, "step": 7659 }, { "epoch": 0.932156982050502, "grad_norm": 1.3032909631729126, "learning_rate": 1.140278381393488e-05, "loss": 0.4138, "step": 7660 }, { "epoch": 0.932278673562519, "grad_norm": 0.8046365976333618, "learning_rate": 1.1400852381540385e-05, "loss": 0.4335, "step": 7661 }, { "epoch": 0.932400365074536, "grad_norm": 1.8814215660095215, "learning_rate": 1.1398920895840561e-05, "loss": 0.4021, "step": 7662 }, { "epoch": 0.9325220565865531, "grad_norm": 3.4482665061950684, "learning_rate": 1.1396989356908899e-05, "loss": 0.4154, "step": 7663 }, { "epoch": 0.9326437480985701, "grad_norm": 1.482643723487854, "learning_rate": 1.13950577648189e-05, "loss": 0.4834, "step": 7664 }, { "epoch": 0.9327654396105871, "grad_norm": 0.714070737361908, "learning_rate": 1.1393126119644068e-05, "loss": 0.422, "step": 7665 }, { "epoch": 0.9328871311226042, "grad_norm": 2.219923496246338, "learning_rate": 1.1391194421457905e-05, "loss": 0.4054, "step": 7666 }, { "epoch": 0.9330088226346213, "grad_norm": 1.8329023122787476, "learning_rate": 1.138926267033391e-05, "loss": 0.4334, "step": 7667 }, { "epoch": 0.9331305141466383, "grad_norm": 2.2366943359375, "learning_rate": 1.1387330866345596e-05, "loss": 0.4228, "step": 7668 }, { "epoch": 0.9332522056586553, "grad_norm": 4.111137390136719, "learning_rate": 1.1385399009566473e-05, "loss": 0.5119, "step": 7669 }, { "epoch": 0.9333738971706723, "grad_norm": 2.5041019916534424, "learning_rate": 1.1383467100070046e-05, "loss": 0.4909, "step": 7670 }, { "epoch": 0.9334955886826893, "grad_norm": 2.5130231380462646, "learning_rate": 1.1381535137929837e-05, "loss": 0.4771, "step": 7671 }, { "epoch": 0.9336172801947065, "grad_norm": 1.2716031074523926, "learning_rate": 1.1379603123219353e-05, "loss": 0.4239, "step": 7672 }, { "epoch": 0.9337389717067235, "grad_norm": 1.0430330038070679, "learning_rate": 1.1377671056012119e-05, "loss": 0.4412, "step": 7673 }, { "epoch": 0.9338606632187405, "grad_norm": 2.0244851112365723, "learning_rate": 1.1375738936381644e-05, "loss": 0.4621, "step": 7674 }, { "epoch": 0.9339823547307575, "grad_norm": 1.8305261135101318, "learning_rate": 1.1373806764401459e-05, "loss": 0.4551, "step": 7675 }, { "epoch": 0.9341040462427745, "grad_norm": 2.0901358127593994, "learning_rate": 1.137187454014508e-05, "loss": 0.4825, "step": 7676 }, { "epoch": 0.9342257377547916, "grad_norm": 2.215219020843506, "learning_rate": 1.1369942263686038e-05, "loss": 0.5068, "step": 7677 }, { "epoch": 0.9343474292668087, "grad_norm": 1.669443964958191, "learning_rate": 1.1368009935097856e-05, "loss": 0.4864, "step": 7678 }, { "epoch": 0.9344691207788257, "grad_norm": 2.9809720516204834, "learning_rate": 1.1366077554454062e-05, "loss": 0.4143, "step": 7679 }, { "epoch": 0.9345908122908427, "grad_norm": 3.228161334991455, "learning_rate": 1.1364145121828195e-05, "loss": 0.4205, "step": 7680 }, { "epoch": 0.9347125038028598, "grad_norm": 3.5505542755126953, "learning_rate": 1.1362212637293777e-05, "loss": 0.4767, "step": 7681 }, { "epoch": 0.9348341953148768, "grad_norm": 2.12424373626709, "learning_rate": 1.136028010092435e-05, "loss": 0.4426, "step": 7682 }, { "epoch": 0.9349558868268938, "grad_norm": 0.9856480956077576, "learning_rate": 1.1358347512793451e-05, "loss": 0.4322, "step": 7683 }, { "epoch": 0.9350775783389108, "grad_norm": 1.7891829013824463, "learning_rate": 1.1356414872974617e-05, "loss": 0.4414, "step": 7684 }, { "epoch": 0.9351992698509279, "grad_norm": 0.9244073033332825, "learning_rate": 1.1354482181541389e-05, "loss": 0.4606, "step": 7685 }, { "epoch": 0.935320961362945, "grad_norm": 0.7673761248588562, "learning_rate": 1.1352549438567308e-05, "loss": 0.4425, "step": 7686 }, { "epoch": 0.935442652874962, "grad_norm": 0.7276907563209534, "learning_rate": 1.1350616644125925e-05, "loss": 0.4191, "step": 7687 }, { "epoch": 0.935564344386979, "grad_norm": 2.1830379962921143, "learning_rate": 1.1348683798290782e-05, "loss": 0.3948, "step": 7688 }, { "epoch": 0.935686035898996, "grad_norm": 1.2450603246688843, "learning_rate": 1.1346750901135431e-05, "loss": 0.468, "step": 7689 }, { "epoch": 0.935807727411013, "grad_norm": 1.1602596044540405, "learning_rate": 1.1344817952733416e-05, "loss": 0.4631, "step": 7690 }, { "epoch": 0.9359294189230302, "grad_norm": 1.6805158853530884, "learning_rate": 1.1342884953158295e-05, "loss": 0.4661, "step": 7691 }, { "epoch": 0.9360511104350472, "grad_norm": 3.5069258213043213, "learning_rate": 1.1340951902483623e-05, "loss": 0.4618, "step": 7692 }, { "epoch": 0.9361728019470642, "grad_norm": 1.3578343391418457, "learning_rate": 1.1339018800782956e-05, "loss": 0.4815, "step": 7693 }, { "epoch": 0.9362944934590812, "grad_norm": 2.591719388961792, "learning_rate": 1.1337085648129853e-05, "loss": 0.4983, "step": 7694 }, { "epoch": 0.9364161849710982, "grad_norm": 3.20554256439209, "learning_rate": 1.1335152444597872e-05, "loss": 0.4021, "step": 7695 }, { "epoch": 0.9365378764831153, "grad_norm": 1.2479726076126099, "learning_rate": 1.1333219190260578e-05, "loss": 0.4433, "step": 7696 }, { "epoch": 0.9366595679951324, "grad_norm": 3.1756629943847656, "learning_rate": 1.1331285885191533e-05, "loss": 0.436, "step": 7697 }, { "epoch": 0.9367812595071494, "grad_norm": 1.2032915353775024, "learning_rate": 1.1329352529464304e-05, "loss": 0.4449, "step": 7698 }, { "epoch": 0.9369029510191664, "grad_norm": 1.4280959367752075, "learning_rate": 1.1327419123152461e-05, "loss": 0.441, "step": 7699 }, { "epoch": 0.9370246425311834, "grad_norm": 0.6721690893173218, "learning_rate": 1.1325485666329573e-05, "loss": 0.4451, "step": 7700 }, { "epoch": 0.9371463340432005, "grad_norm": 1.0141255855560303, "learning_rate": 1.1323552159069211e-05, "loss": 0.4365, "step": 7701 }, { "epoch": 0.9372680255552175, "grad_norm": 1.9130144119262695, "learning_rate": 1.1321618601444947e-05, "loss": 0.4723, "step": 7702 }, { "epoch": 0.9373897170672345, "grad_norm": 1.519945502281189, "learning_rate": 1.1319684993530366e-05, "loss": 0.4234, "step": 7703 }, { "epoch": 0.9375114085792516, "grad_norm": 1.5344319343566895, "learning_rate": 1.1317751335399034e-05, "loss": 0.4235, "step": 7704 }, { "epoch": 0.9376331000912687, "grad_norm": 0.768884539604187, "learning_rate": 1.1315817627124538e-05, "loss": 0.3798, "step": 7705 }, { "epoch": 0.9377547916032857, "grad_norm": 2.5807077884674072, "learning_rate": 1.1313883868780458e-05, "loss": 0.4691, "step": 7706 }, { "epoch": 0.9378764831153027, "grad_norm": 2.1828975677490234, "learning_rate": 1.1311950060440377e-05, "loss": 0.4533, "step": 7707 }, { "epoch": 0.9379981746273197, "grad_norm": 2.83746337890625, "learning_rate": 1.131001620217788e-05, "loss": 0.4487, "step": 7708 }, { "epoch": 0.9381198661393367, "grad_norm": 2.4260363578796387, "learning_rate": 1.1308082294066556e-05, "loss": 0.4927, "step": 7709 }, { "epoch": 0.9382415576513539, "grad_norm": 0.9822359085083008, "learning_rate": 1.1306148336179992e-05, "loss": 0.46, "step": 7710 }, { "epoch": 0.9383632491633709, "grad_norm": 2.834970712661743, "learning_rate": 1.1304214328591783e-05, "loss": 0.5125, "step": 7711 }, { "epoch": 0.9384849406753879, "grad_norm": 1.2395174503326416, "learning_rate": 1.1302280271375516e-05, "loss": 0.5118, "step": 7712 }, { "epoch": 0.9386066321874049, "grad_norm": 0.6867635250091553, "learning_rate": 1.130034616460479e-05, "loss": 0.4497, "step": 7713 }, { "epoch": 0.9387283236994219, "grad_norm": 1.8402960300445557, "learning_rate": 1.1298412008353201e-05, "loss": 0.441, "step": 7714 }, { "epoch": 0.938850015211439, "grad_norm": 2.4694528579711914, "learning_rate": 1.1296477802694345e-05, "loss": 0.4654, "step": 7715 }, { "epoch": 0.9389717067234561, "grad_norm": 0.6076860427856445, "learning_rate": 1.1294543547701828e-05, "loss": 0.4502, "step": 7716 }, { "epoch": 0.9390933982354731, "grad_norm": 0.8330790996551514, "learning_rate": 1.129260924344925e-05, "loss": 0.4533, "step": 7717 }, { "epoch": 0.9392150897474901, "grad_norm": 1.718134880065918, "learning_rate": 1.1290674890010216e-05, "loss": 0.4553, "step": 7718 }, { "epoch": 0.9393367812595071, "grad_norm": 2.0217318534851074, "learning_rate": 1.1288740487458327e-05, "loss": 0.4683, "step": 7719 }, { "epoch": 0.9394584727715242, "grad_norm": 2.768575429916382, "learning_rate": 1.1286806035867195e-05, "loss": 0.4406, "step": 7720 }, { "epoch": 0.9395801642835412, "grad_norm": 0.8233996033668518, "learning_rate": 1.1284871535310432e-05, "loss": 0.5018, "step": 7721 }, { "epoch": 0.9397018557955582, "grad_norm": 2.154956579208374, "learning_rate": 1.1282936985861647e-05, "loss": 0.4169, "step": 7722 }, { "epoch": 0.9398235473075753, "grad_norm": 0.7472934126853943, "learning_rate": 1.1281002387594455e-05, "loss": 0.4237, "step": 7723 }, { "epoch": 0.9399452388195924, "grad_norm": 3.3204634189605713, "learning_rate": 1.1279067740582468e-05, "loss": 0.5002, "step": 7724 }, { "epoch": 0.9400669303316094, "grad_norm": 1.817721962928772, "learning_rate": 1.1277133044899307e-05, "loss": 0.4804, "step": 7725 }, { "epoch": 0.9401886218436264, "grad_norm": 1.491402506828308, "learning_rate": 1.1275198300618591e-05, "loss": 0.4862, "step": 7726 }, { "epoch": 0.9403103133556434, "grad_norm": 0.7314885258674622, "learning_rate": 1.127326350781394e-05, "loss": 0.4537, "step": 7727 }, { "epoch": 0.9404320048676604, "grad_norm": 1.3579291105270386, "learning_rate": 1.1271328666558978e-05, "loss": 0.4434, "step": 7728 }, { "epoch": 0.9405536963796776, "grad_norm": 1.0967844724655151, "learning_rate": 1.1269393776927327e-05, "loss": 0.4219, "step": 7729 }, { "epoch": 0.9406753878916946, "grad_norm": 0.7108358144760132, "learning_rate": 1.1267458838992616e-05, "loss": 0.4716, "step": 7730 }, { "epoch": 0.9407970794037116, "grad_norm": 0.7127029895782471, "learning_rate": 1.126552385282847e-05, "loss": 0.452, "step": 7731 }, { "epoch": 0.9409187709157286, "grad_norm": 0.7911932468414307, "learning_rate": 1.1263588818508524e-05, "loss": 0.4367, "step": 7732 }, { "epoch": 0.9410404624277456, "grad_norm": 0.7735646367073059, "learning_rate": 1.1261653736106411e-05, "loss": 0.452, "step": 7733 }, { "epoch": 0.9411621539397627, "grad_norm": 1.4993888139724731, "learning_rate": 1.125971860569576e-05, "loss": 0.4146, "step": 7734 }, { "epoch": 0.9412838454517798, "grad_norm": 1.8502323627471924, "learning_rate": 1.1257783427350207e-05, "loss": 0.4132, "step": 7735 }, { "epoch": 0.9414055369637968, "grad_norm": 1.0543010234832764, "learning_rate": 1.125584820114339e-05, "loss": 0.3663, "step": 7736 }, { "epoch": 0.9415272284758138, "grad_norm": 0.7789441347122192, "learning_rate": 1.1253912927148954e-05, "loss": 0.46, "step": 7737 }, { "epoch": 0.9416489199878308, "grad_norm": 3.0533335208892822, "learning_rate": 1.1251977605440532e-05, "loss": 0.4897, "step": 7738 }, { "epoch": 0.9417706114998479, "grad_norm": 2.086733818054199, "learning_rate": 1.125004223609177e-05, "loss": 0.4754, "step": 7739 }, { "epoch": 0.9418923030118649, "grad_norm": 2.711740732192993, "learning_rate": 1.1248106819176317e-05, "loss": 0.4434, "step": 7740 }, { "epoch": 0.942013994523882, "grad_norm": 1.6003062725067139, "learning_rate": 1.1246171354767813e-05, "loss": 0.4299, "step": 7741 }, { "epoch": 0.942135686035899, "grad_norm": 0.9266567230224609, "learning_rate": 1.1244235842939912e-05, "loss": 0.4865, "step": 7742 }, { "epoch": 0.942257377547916, "grad_norm": 1.6402803659439087, "learning_rate": 1.1242300283766258e-05, "loss": 0.4187, "step": 7743 }, { "epoch": 0.9423790690599331, "grad_norm": 1.8191728591918945, "learning_rate": 1.1240364677320513e-05, "loss": 0.4103, "step": 7744 }, { "epoch": 0.9425007605719501, "grad_norm": 1.5551636219024658, "learning_rate": 1.1238429023676317e-05, "loss": 0.4612, "step": 7745 }, { "epoch": 0.9426224520839671, "grad_norm": 2.4148149490356445, "learning_rate": 1.1236493322907341e-05, "loss": 0.5204, "step": 7746 }, { "epoch": 0.9427441435959841, "grad_norm": 1.983602523803711, "learning_rate": 1.123455757508723e-05, "loss": 0.454, "step": 7747 }, { "epoch": 0.9428658351080013, "grad_norm": 1.3249270915985107, "learning_rate": 1.123262178028965e-05, "loss": 0.422, "step": 7748 }, { "epoch": 0.9429875266200183, "grad_norm": 1.3510316610336304, "learning_rate": 1.1230685938588257e-05, "loss": 0.5, "step": 7749 }, { "epoch": 0.9431092181320353, "grad_norm": 1.6859837770462036, "learning_rate": 1.1228750050056718e-05, "loss": 0.4248, "step": 7750 }, { "epoch": 0.9432309096440523, "grad_norm": 0.818302571773529, "learning_rate": 1.1226814114768696e-05, "loss": 0.4821, "step": 7751 }, { "epoch": 0.9433526011560693, "grad_norm": 1.6927692890167236, "learning_rate": 1.122487813279786e-05, "loss": 0.4331, "step": 7752 }, { "epoch": 0.9434742926680864, "grad_norm": 0.570755124092102, "learning_rate": 1.1222942104217874e-05, "loss": 0.4724, "step": 7753 }, { "epoch": 0.9435959841801035, "grad_norm": 4.1477837562561035, "learning_rate": 1.1221006029102408e-05, "loss": 0.5048, "step": 7754 }, { "epoch": 0.9437176756921205, "grad_norm": 3.0145456790924072, "learning_rate": 1.1219069907525136e-05, "loss": 0.5098, "step": 7755 }, { "epoch": 0.9438393672041375, "grad_norm": 0.701015293598175, "learning_rate": 1.1217133739559731e-05, "loss": 0.4806, "step": 7756 }, { "epoch": 0.9439610587161545, "grad_norm": 0.7368866801261902, "learning_rate": 1.121519752527987e-05, "loss": 0.4193, "step": 7757 }, { "epoch": 0.9440827502281716, "grad_norm": 0.7859698534011841, "learning_rate": 1.1213261264759226e-05, "loss": 0.4399, "step": 7758 }, { "epoch": 0.9442044417401886, "grad_norm": 0.7844321727752686, "learning_rate": 1.1211324958071477e-05, "loss": 0.435, "step": 7759 }, { "epoch": 0.9443261332522057, "grad_norm": 1.2891095876693726, "learning_rate": 1.120938860529031e-05, "loss": 0.423, "step": 7760 }, { "epoch": 0.9444478247642227, "grad_norm": 1.3656929731369019, "learning_rate": 1.1207452206489402e-05, "loss": 0.454, "step": 7761 }, { "epoch": 0.9445695162762398, "grad_norm": 3.1759777069091797, "learning_rate": 1.1205515761742436e-05, "loss": 0.5318, "step": 7762 }, { "epoch": 0.9446912077882568, "grad_norm": 2.6389236450195312, "learning_rate": 1.1203579271123103e-05, "loss": 0.5193, "step": 7763 }, { "epoch": 0.9448128993002738, "grad_norm": 3.1276309490203857, "learning_rate": 1.1201642734705089e-05, "loss": 0.4167, "step": 7764 }, { "epoch": 0.9449345908122908, "grad_norm": 2.766252040863037, "learning_rate": 1.1199706152562077e-05, "loss": 0.4579, "step": 7765 }, { "epoch": 0.9450562823243078, "grad_norm": 2.338665246963501, "learning_rate": 1.1197769524767765e-05, "loss": 0.451, "step": 7766 }, { "epoch": 0.945177973836325, "grad_norm": 3.3780970573425293, "learning_rate": 1.1195832851395844e-05, "loss": 0.5034, "step": 7767 }, { "epoch": 0.945299665348342, "grad_norm": 2.698901891708374, "learning_rate": 1.1193896132520006e-05, "loss": 0.4493, "step": 7768 }, { "epoch": 0.945421356860359, "grad_norm": 1.6006723642349243, "learning_rate": 1.1191959368213952e-05, "loss": 0.4806, "step": 7769 }, { "epoch": 0.945543048372376, "grad_norm": 0.8120681047439575, "learning_rate": 1.1190022558551372e-05, "loss": 0.4707, "step": 7770 }, { "epoch": 0.945664739884393, "grad_norm": 3.7931699752807617, "learning_rate": 1.1188085703605976e-05, "loss": 0.3507, "step": 7771 }, { "epoch": 0.94578643139641, "grad_norm": 1.8634275197982788, "learning_rate": 1.1186148803451455e-05, "loss": 0.3815, "step": 7772 }, { "epoch": 0.9459081229084272, "grad_norm": 1.5907340049743652, "learning_rate": 1.1184211858161517e-05, "loss": 0.4136, "step": 7773 }, { "epoch": 0.9460298144204442, "grad_norm": 0.8339758515357971, "learning_rate": 1.118227486780987e-05, "loss": 0.3881, "step": 7774 }, { "epoch": 0.9461515059324612, "grad_norm": 5.796790599822998, "learning_rate": 1.1180337832470218e-05, "loss": 0.5456, "step": 7775 }, { "epoch": 0.9462731974444782, "grad_norm": 3.344573736190796, "learning_rate": 1.1178400752216265e-05, "loss": 0.4823, "step": 7776 }, { "epoch": 0.9463948889564953, "grad_norm": 6.225649356842041, "learning_rate": 1.1176463627121723e-05, "loss": 0.5337, "step": 7777 }, { "epoch": 0.9465165804685123, "grad_norm": 2.477018117904663, "learning_rate": 1.1174526457260309e-05, "loss": 0.4146, "step": 7778 }, { "epoch": 0.9466382719805294, "grad_norm": 3.7175071239471436, "learning_rate": 1.117258924270573e-05, "loss": 0.4607, "step": 7779 }, { "epoch": 0.9467599634925464, "grad_norm": 3.969531536102295, "learning_rate": 1.1170651983531704e-05, "loss": 0.5194, "step": 7780 }, { "epoch": 0.9468816550045634, "grad_norm": 1.6964995861053467, "learning_rate": 1.1168714679811945e-05, "loss": 0.4055, "step": 7781 }, { "epoch": 0.9470033465165805, "grad_norm": 1.2695993185043335, "learning_rate": 1.1166777331620175e-05, "loss": 0.4172, "step": 7782 }, { "epoch": 0.9471250380285975, "grad_norm": 3.6955606937408447, "learning_rate": 1.1164839939030112e-05, "loss": 0.5375, "step": 7783 }, { "epoch": 0.9472467295406145, "grad_norm": 2.6843903064727783, "learning_rate": 1.1162902502115476e-05, "loss": 0.5435, "step": 7784 }, { "epoch": 0.9473684210526315, "grad_norm": 1.131152868270874, "learning_rate": 1.1160965020949994e-05, "loss": 0.4948, "step": 7785 }, { "epoch": 0.9474901125646487, "grad_norm": 2.265782594680786, "learning_rate": 1.1159027495607395e-05, "loss": 0.4427, "step": 7786 }, { "epoch": 0.9476118040766657, "grad_norm": 4.863142967224121, "learning_rate": 1.1157089926161395e-05, "loss": 0.4785, "step": 7787 }, { "epoch": 0.9477334955886827, "grad_norm": 4.738919734954834, "learning_rate": 1.115515231268573e-05, "loss": 0.4153, "step": 7788 }, { "epoch": 0.9478551871006997, "grad_norm": 5.452841281890869, "learning_rate": 1.1153214655254126e-05, "loss": 0.4179, "step": 7789 }, { "epoch": 0.9479768786127167, "grad_norm": 2.430560827255249, "learning_rate": 1.115127695394032e-05, "loss": 0.4652, "step": 7790 }, { "epoch": 0.9480985701247338, "grad_norm": 1.904536247253418, "learning_rate": 1.1149339208818042e-05, "loss": 0.4662, "step": 7791 }, { "epoch": 0.9482202616367509, "grad_norm": 1.2570780515670776, "learning_rate": 1.114740141996103e-05, "loss": 0.4943, "step": 7792 }, { "epoch": 0.9483419531487679, "grad_norm": 2.6728243827819824, "learning_rate": 1.1145463587443016e-05, "loss": 0.4437, "step": 7793 }, { "epoch": 0.9484636446607849, "grad_norm": 0.8751927018165588, "learning_rate": 1.1143525711337743e-05, "loss": 0.4869, "step": 7794 }, { "epoch": 0.9485853361728019, "grad_norm": 3.1249399185180664, "learning_rate": 1.1141587791718951e-05, "loss": 0.4167, "step": 7795 }, { "epoch": 0.948707027684819, "grad_norm": 0.6778789162635803, "learning_rate": 1.1139649828660377e-05, "loss": 0.4662, "step": 7796 }, { "epoch": 0.948828719196836, "grad_norm": 1.434187650680542, "learning_rate": 1.1137711822235773e-05, "loss": 0.4959, "step": 7797 }, { "epoch": 0.9489504107088531, "grad_norm": 1.2248542308807373, "learning_rate": 1.1135773772518877e-05, "loss": 0.4752, "step": 7798 }, { "epoch": 0.9490721022208701, "grad_norm": 0.7918793559074402, "learning_rate": 1.1133835679583438e-05, "loss": 0.3909, "step": 7799 }, { "epoch": 0.9491937937328871, "grad_norm": 1.0940003395080566, "learning_rate": 1.1131897543503203e-05, "loss": 0.4337, "step": 7800 }, { "epoch": 0.9493154852449042, "grad_norm": 0.6187695264816284, "learning_rate": 1.1129959364351925e-05, "loss": 0.4122, "step": 7801 }, { "epoch": 0.9494371767569212, "grad_norm": 0.647481381893158, "learning_rate": 1.1128021142203354e-05, "loss": 0.4565, "step": 7802 }, { "epoch": 0.9495588682689382, "grad_norm": 1.5043638944625854, "learning_rate": 1.1126082877131244e-05, "loss": 0.4489, "step": 7803 }, { "epoch": 0.9496805597809552, "grad_norm": 3.3977267742156982, "learning_rate": 1.1124144569209347e-05, "loss": 0.4837, "step": 7804 }, { "epoch": 0.9498022512929724, "grad_norm": 0.715659499168396, "learning_rate": 1.1122206218511427e-05, "loss": 0.4442, "step": 7805 }, { "epoch": 0.9499239428049894, "grad_norm": 0.8718197345733643, "learning_rate": 1.1120267825111235e-05, "loss": 0.4297, "step": 7806 }, { "epoch": 0.9500456343170064, "grad_norm": 0.855361819267273, "learning_rate": 1.1118329389082532e-05, "loss": 0.4581, "step": 7807 }, { "epoch": 0.9501673258290234, "grad_norm": 1.6435692310333252, "learning_rate": 1.1116390910499085e-05, "loss": 0.4583, "step": 7808 }, { "epoch": 0.9502890173410404, "grad_norm": 0.9460768699645996, "learning_rate": 1.1114452389434652e-05, "loss": 0.4686, "step": 7809 }, { "epoch": 0.9504107088530575, "grad_norm": 2.108517646789551, "learning_rate": 1.1112513825962994e-05, "loss": 0.4201, "step": 7810 }, { "epoch": 0.9505324003650746, "grad_norm": 1.9522459506988525, "learning_rate": 1.1110575220157886e-05, "loss": 0.4228, "step": 7811 }, { "epoch": 0.9506540918770916, "grad_norm": 1.0424877405166626, "learning_rate": 1.1108636572093092e-05, "loss": 0.3555, "step": 7812 }, { "epoch": 0.9507757833891086, "grad_norm": 0.8412451148033142, "learning_rate": 1.1106697881842381e-05, "loss": 0.4947, "step": 7813 }, { "epoch": 0.9508974749011256, "grad_norm": 1.0469846725463867, "learning_rate": 1.1104759149479525e-05, "loss": 0.4358, "step": 7814 }, { "epoch": 0.9510191664131427, "grad_norm": 0.6246845722198486, "learning_rate": 1.1102820375078296e-05, "loss": 0.4381, "step": 7815 }, { "epoch": 0.9511408579251597, "grad_norm": 1.6863676309585571, "learning_rate": 1.1100881558712473e-05, "loss": 0.4764, "step": 7816 }, { "epoch": 0.9512625494371768, "grad_norm": 2.348029851913452, "learning_rate": 1.1098942700455823e-05, "loss": 0.487, "step": 7817 }, { "epoch": 0.9513842409491938, "grad_norm": 1.1672379970550537, "learning_rate": 1.1097003800382129e-05, "loss": 0.4594, "step": 7818 }, { "epoch": 0.9515059324612108, "grad_norm": 1.862343430519104, "learning_rate": 1.109506485856517e-05, "loss": 0.5072, "step": 7819 }, { "epoch": 0.9516276239732279, "grad_norm": 1.7099848985671997, "learning_rate": 1.1093125875078732e-05, "loss": 0.5179, "step": 7820 }, { "epoch": 0.9517493154852449, "grad_norm": 1.5098011493682861, "learning_rate": 1.1091186849996585e-05, "loss": 0.5294, "step": 7821 }, { "epoch": 0.9518710069972619, "grad_norm": 2.793037176132202, "learning_rate": 1.1089247783392523e-05, "loss": 0.4304, "step": 7822 }, { "epoch": 0.9519926985092789, "grad_norm": 0.8674801588058472, "learning_rate": 1.1087308675340324e-05, "loss": 0.5023, "step": 7823 }, { "epoch": 0.952114390021296, "grad_norm": 6.732608795166016, "learning_rate": 1.1085369525913784e-05, "loss": 0.4349, "step": 7824 }, { "epoch": 0.9522360815333131, "grad_norm": 4.1696014404296875, "learning_rate": 1.1083430335186687e-05, "loss": 0.4973, "step": 7825 }, { "epoch": 0.9523577730453301, "grad_norm": 5.230327129364014, "learning_rate": 1.1081491103232822e-05, "loss": 0.4882, "step": 7826 }, { "epoch": 0.9524794645573471, "grad_norm": 4.096656799316406, "learning_rate": 1.107955183012598e-05, "loss": 0.4937, "step": 7827 }, { "epoch": 0.9526011560693641, "grad_norm": 5.5169572830200195, "learning_rate": 1.1077612515939959e-05, "loss": 0.4352, "step": 7828 }, { "epoch": 0.9527228475813811, "grad_norm": 1.130476713180542, "learning_rate": 1.107567316074855e-05, "loss": 0.4749, "step": 7829 }, { "epoch": 0.9528445390933983, "grad_norm": 3.9312586784362793, "learning_rate": 1.107373376462555e-05, "loss": 0.4433, "step": 7830 }, { "epoch": 0.9529662306054153, "grad_norm": 1.3090933561325073, "learning_rate": 1.107179432764476e-05, "loss": 0.5189, "step": 7831 }, { "epoch": 0.9530879221174323, "grad_norm": 1.763823390007019, "learning_rate": 1.1069854849879977e-05, "loss": 0.384, "step": 7832 }, { "epoch": 0.9532096136294493, "grad_norm": 1.0364371538162231, "learning_rate": 1.1067915331405002e-05, "loss": 0.4412, "step": 7833 }, { "epoch": 0.9533313051414664, "grad_norm": 4.0301642417907715, "learning_rate": 1.1065975772293635e-05, "loss": 0.4838, "step": 7834 }, { "epoch": 0.9534529966534834, "grad_norm": 2.8961918354034424, "learning_rate": 1.1064036172619688e-05, "loss": 0.4697, "step": 7835 }, { "epoch": 0.9535746881655005, "grad_norm": 4.301506042480469, "learning_rate": 1.1062096532456958e-05, "loss": 0.4825, "step": 7836 }, { "epoch": 0.9536963796775175, "grad_norm": 4.657220363616943, "learning_rate": 1.1060156851879257e-05, "loss": 0.5186, "step": 7837 }, { "epoch": 0.9538180711895345, "grad_norm": 0.9875574707984924, "learning_rate": 1.1058217130960398e-05, "loss": 0.3917, "step": 7838 }, { "epoch": 0.9539397627015516, "grad_norm": 3.456489086151123, "learning_rate": 1.1056277369774186e-05, "loss": 0.4665, "step": 7839 }, { "epoch": 0.9540614542135686, "grad_norm": 2.8122589588165283, "learning_rate": 1.105433756839443e-05, "loss": 0.4774, "step": 7840 }, { "epoch": 0.9541831457255856, "grad_norm": 0.8617849946022034, "learning_rate": 1.1052397726894949e-05, "loss": 0.3969, "step": 7841 }, { "epoch": 0.9543048372376027, "grad_norm": 0.8713522553443909, "learning_rate": 1.1050457845349557e-05, "loss": 0.4523, "step": 7842 }, { "epoch": 0.9544265287496198, "grad_norm": 1.827952265739441, "learning_rate": 1.104851792383207e-05, "loss": 0.4539, "step": 7843 }, { "epoch": 0.9545482202616368, "grad_norm": 1.5133836269378662, "learning_rate": 1.1046577962416303e-05, "loss": 0.4833, "step": 7844 }, { "epoch": 0.9546699117736538, "grad_norm": 2.6935362815856934, "learning_rate": 1.1044637961176079e-05, "loss": 0.4413, "step": 7845 }, { "epoch": 0.9547916032856708, "grad_norm": 2.455949068069458, "learning_rate": 1.1042697920185218e-05, "loss": 0.466, "step": 7846 }, { "epoch": 0.9549132947976878, "grad_norm": 3.4989287853240967, "learning_rate": 1.1040757839517544e-05, "loss": 0.4032, "step": 7847 }, { "epoch": 0.9550349863097048, "grad_norm": 4.018179416656494, "learning_rate": 1.103881771924688e-05, "loss": 0.4783, "step": 7848 }, { "epoch": 0.955156677821722, "grad_norm": 2.9321541786193848, "learning_rate": 1.1036877559447052e-05, "loss": 0.437, "step": 7849 }, { "epoch": 0.955278369333739, "grad_norm": 1.1128114461898804, "learning_rate": 1.1034937360191887e-05, "loss": 0.489, "step": 7850 }, { "epoch": 0.955400060845756, "grad_norm": 1.6909722089767456, "learning_rate": 1.103299712155521e-05, "loss": 0.4762, "step": 7851 }, { "epoch": 0.955521752357773, "grad_norm": 1.436305284500122, "learning_rate": 1.1031056843610856e-05, "loss": 0.437, "step": 7852 }, { "epoch": 0.95564344386979, "grad_norm": 2.8903472423553467, "learning_rate": 1.1029116526432655e-05, "loss": 0.5151, "step": 7853 }, { "epoch": 0.9557651353818071, "grad_norm": 1.2533063888549805, "learning_rate": 1.1027176170094442e-05, "loss": 0.4662, "step": 7854 }, { "epoch": 0.9558868268938242, "grad_norm": 0.7719179391860962, "learning_rate": 1.1025235774670048e-05, "loss": 0.417, "step": 7855 }, { "epoch": 0.9560085184058412, "grad_norm": 2.024005174636841, "learning_rate": 1.102329534023331e-05, "loss": 0.4928, "step": 7856 }, { "epoch": 0.9561302099178582, "grad_norm": 2.4934709072113037, "learning_rate": 1.1021354866858067e-05, "loss": 0.4653, "step": 7857 }, { "epoch": 0.9562519014298753, "grad_norm": 1.243676781654358, "learning_rate": 1.1019414354618158e-05, "loss": 0.3537, "step": 7858 }, { "epoch": 0.9563735929418923, "grad_norm": 2.9375689029693604, "learning_rate": 1.101747380358742e-05, "loss": 0.5234, "step": 7859 }, { "epoch": 0.9564952844539093, "grad_norm": 0.78957599401474, "learning_rate": 1.10155332138397e-05, "loss": 0.4446, "step": 7860 }, { "epoch": 0.9566169759659264, "grad_norm": 0.7856225967407227, "learning_rate": 1.101359258544884e-05, "loss": 0.4417, "step": 7861 }, { "epoch": 0.9567386674779434, "grad_norm": 0.7760050296783447, "learning_rate": 1.1011651918488683e-05, "loss": 0.4326, "step": 7862 }, { "epoch": 0.9568603589899605, "grad_norm": 1.8795158863067627, "learning_rate": 1.1009711213033076e-05, "loss": 0.5253, "step": 7863 }, { "epoch": 0.9569820505019775, "grad_norm": 1.5947014093399048, "learning_rate": 1.1007770469155865e-05, "loss": 0.5076, "step": 7864 }, { "epoch": 0.9571037420139945, "grad_norm": 1.5609192848205566, "learning_rate": 1.1005829686930906e-05, "loss": 0.4749, "step": 7865 }, { "epoch": 0.9572254335260115, "grad_norm": 1.0820679664611816, "learning_rate": 1.1003888866432047e-05, "loss": 0.5074, "step": 7866 }, { "epoch": 0.9573471250380285, "grad_norm": 1.0442790985107422, "learning_rate": 1.1001948007733135e-05, "loss": 0.478, "step": 7867 }, { "epoch": 0.9574688165500457, "grad_norm": 1.3109970092773438, "learning_rate": 1.1000007110908025e-05, "loss": 0.4551, "step": 7868 }, { "epoch": 0.9575905080620627, "grad_norm": 1.043371558189392, "learning_rate": 1.099806617603058e-05, "loss": 0.461, "step": 7869 }, { "epoch": 0.9577121995740797, "grad_norm": 4.1665778160095215, "learning_rate": 1.0996125203174645e-05, "loss": 0.4379, "step": 7870 }, { "epoch": 0.9578338910860967, "grad_norm": 0.6852086186408997, "learning_rate": 1.0994184192414088e-05, "loss": 0.4537, "step": 7871 }, { "epoch": 0.9579555825981138, "grad_norm": 0.8128134608268738, "learning_rate": 1.0992243143822764e-05, "loss": 0.4858, "step": 7872 }, { "epoch": 0.9580772741101308, "grad_norm": 0.9471670389175415, "learning_rate": 1.0990302057474537e-05, "loss": 0.4835, "step": 7873 }, { "epoch": 0.9581989656221479, "grad_norm": 0.9546118974685669, "learning_rate": 1.0988360933443264e-05, "loss": 0.4539, "step": 7874 }, { "epoch": 0.9583206571341649, "grad_norm": 0.7892980575561523, "learning_rate": 1.0986419771802812e-05, "loss": 0.446, "step": 7875 }, { "epoch": 0.9584423486461819, "grad_norm": 2.0440328121185303, "learning_rate": 1.0984478572627049e-05, "loss": 0.4483, "step": 7876 }, { "epoch": 0.958564040158199, "grad_norm": 2.138730525970459, "learning_rate": 1.0982537335989833e-05, "loss": 0.4118, "step": 7877 }, { "epoch": 0.958685731670216, "grad_norm": 0.5984135270118713, "learning_rate": 1.0980596061965043e-05, "loss": 0.4379, "step": 7878 }, { "epoch": 0.958807423182233, "grad_norm": 2.018582820892334, "learning_rate": 1.0978654750626538e-05, "loss": 0.4464, "step": 7879 }, { "epoch": 0.9589291146942501, "grad_norm": 1.2306653261184692, "learning_rate": 1.09767134020482e-05, "loss": 0.4465, "step": 7880 }, { "epoch": 0.9590508062062671, "grad_norm": 1.4559270143508911, "learning_rate": 1.0974772016303889e-05, "loss": 0.4586, "step": 7881 }, { "epoch": 0.9591724977182842, "grad_norm": 2.112050771713257, "learning_rate": 1.097283059346749e-05, "loss": 0.4912, "step": 7882 }, { "epoch": 0.9592941892303012, "grad_norm": 2.625102996826172, "learning_rate": 1.097088913361287e-05, "loss": 0.5085, "step": 7883 }, { "epoch": 0.9594158807423182, "grad_norm": 0.7203896045684814, "learning_rate": 1.0968947636813913e-05, "loss": 0.4644, "step": 7884 }, { "epoch": 0.9595375722543352, "grad_norm": 0.7176701426506042, "learning_rate": 1.0967006103144488e-05, "loss": 0.4436, "step": 7885 }, { "epoch": 0.9596592637663522, "grad_norm": 1.31585693359375, "learning_rate": 1.0965064532678483e-05, "loss": 0.4951, "step": 7886 }, { "epoch": 0.9597809552783694, "grad_norm": 2.936957359313965, "learning_rate": 1.096312292548977e-05, "loss": 0.4465, "step": 7887 }, { "epoch": 0.9599026467903864, "grad_norm": 1.3865190744400024, "learning_rate": 1.096118128165224e-05, "loss": 0.5442, "step": 7888 }, { "epoch": 0.9600243383024034, "grad_norm": 2.1573688983917236, "learning_rate": 1.0959239601239773e-05, "loss": 0.4627, "step": 7889 }, { "epoch": 0.9601460298144204, "grad_norm": 2.1060612201690674, "learning_rate": 1.0957297884326252e-05, "loss": 0.5051, "step": 7890 }, { "epoch": 0.9602677213264375, "grad_norm": 3.0208218097686768, "learning_rate": 1.0955356130985566e-05, "loss": 0.4453, "step": 7891 }, { "epoch": 0.9603894128384545, "grad_norm": 3.036423444747925, "learning_rate": 1.0953414341291602e-05, "loss": 0.4282, "step": 7892 }, { "epoch": 0.9605111043504716, "grad_norm": 4.146114826202393, "learning_rate": 1.0951472515318249e-05, "loss": 0.3671, "step": 7893 }, { "epoch": 0.9606327958624886, "grad_norm": 1.8409923315048218, "learning_rate": 1.0949530653139395e-05, "loss": 0.4574, "step": 7894 }, { "epoch": 0.9607544873745056, "grad_norm": 0.992895781993866, "learning_rate": 1.0947588754828937e-05, "loss": 0.452, "step": 7895 }, { "epoch": 0.9608761788865227, "grad_norm": 3.341982364654541, "learning_rate": 1.0945646820460765e-05, "loss": 0.4647, "step": 7896 }, { "epoch": 0.9609978703985397, "grad_norm": 0.7378147840499878, "learning_rate": 1.0943704850108774e-05, "loss": 0.3954, "step": 7897 }, { "epoch": 0.9611195619105567, "grad_norm": 0.9263473749160767, "learning_rate": 1.0941762843846857e-05, "loss": 0.3905, "step": 7898 }, { "epoch": 0.9612412534225738, "grad_norm": 5.433279037475586, "learning_rate": 1.0939820801748919e-05, "loss": 0.5553, "step": 7899 }, { "epoch": 0.9613629449345908, "grad_norm": 2.567471981048584, "learning_rate": 1.093787872388885e-05, "loss": 0.4695, "step": 7900 }, { "epoch": 0.9614846364466079, "grad_norm": 4.649729251861572, "learning_rate": 1.0935936610340559e-05, "loss": 0.4914, "step": 7901 }, { "epoch": 0.9616063279586249, "grad_norm": 2.801370859146118, "learning_rate": 1.093399446117794e-05, "loss": 0.4613, "step": 7902 }, { "epoch": 0.9617280194706419, "grad_norm": 3.3774945735931396, "learning_rate": 1.0932052276474898e-05, "loss": 0.4858, "step": 7903 }, { "epoch": 0.9618497109826589, "grad_norm": 1.2636725902557373, "learning_rate": 1.0930110056305339e-05, "loss": 0.4887, "step": 7904 }, { "epoch": 0.9619714024946759, "grad_norm": 1.8090736865997314, "learning_rate": 1.0928167800743164e-05, "loss": 0.4384, "step": 7905 }, { "epoch": 0.9620930940066931, "grad_norm": 0.94919753074646, "learning_rate": 1.0926225509862288e-05, "loss": 0.4764, "step": 7906 }, { "epoch": 0.9622147855187101, "grad_norm": 2.3158748149871826, "learning_rate": 1.0924283183736613e-05, "loss": 0.4355, "step": 7907 }, { "epoch": 0.9623364770307271, "grad_norm": 3.156244993209839, "learning_rate": 1.0922340822440045e-05, "loss": 0.4394, "step": 7908 }, { "epoch": 0.9624581685427441, "grad_norm": 2.830498218536377, "learning_rate": 1.0920398426046503e-05, "loss": 0.4476, "step": 7909 }, { "epoch": 0.9625798600547611, "grad_norm": 3.9664883613586426, "learning_rate": 1.0918455994629898e-05, "loss": 0.3834, "step": 7910 }, { "epoch": 0.9627015515667782, "grad_norm": 3.5587096214294434, "learning_rate": 1.0916513528264136e-05, "loss": 0.4311, "step": 7911 }, { "epoch": 0.9628232430787953, "grad_norm": 0.6019009947776794, "learning_rate": 1.0914571027023139e-05, "loss": 0.4633, "step": 7912 }, { "epoch": 0.9629449345908123, "grad_norm": 1.592553973197937, "learning_rate": 1.0912628490980826e-05, "loss": 0.4046, "step": 7913 }, { "epoch": 0.9630666261028293, "grad_norm": 1.5847268104553223, "learning_rate": 1.0910685920211106e-05, "loss": 0.4608, "step": 7914 }, { "epoch": 0.9631883176148464, "grad_norm": 0.9771660566329956, "learning_rate": 1.0908743314787901e-05, "loss": 0.4617, "step": 7915 }, { "epoch": 0.9633100091268634, "grad_norm": 1.221008062362671, "learning_rate": 1.0906800674785132e-05, "loss": 0.4406, "step": 7916 }, { "epoch": 0.9634317006388804, "grad_norm": 3.2355761528015137, "learning_rate": 1.0904858000276719e-05, "loss": 0.4787, "step": 7917 }, { "epoch": 0.9635533921508975, "grad_norm": 2.323673963546753, "learning_rate": 1.0902915291336594e-05, "loss": 0.4961, "step": 7918 }, { "epoch": 0.9636750836629145, "grad_norm": 3.9719388484954834, "learning_rate": 1.0900972548038666e-05, "loss": 0.5399, "step": 7919 }, { "epoch": 0.9637967751749316, "grad_norm": 0.7563192844390869, "learning_rate": 1.0899029770456869e-05, "loss": 0.4525, "step": 7920 }, { "epoch": 0.9639184666869486, "grad_norm": 1.9598678350448608, "learning_rate": 1.0897086958665126e-05, "loss": 0.5004, "step": 7921 }, { "epoch": 0.9640401581989656, "grad_norm": 0.7722530364990234, "learning_rate": 1.0895144112737372e-05, "loss": 0.46, "step": 7922 }, { "epoch": 0.9641618497109826, "grad_norm": 0.8511223793029785, "learning_rate": 1.0893201232747527e-05, "loss": 0.4829, "step": 7923 }, { "epoch": 0.9642835412229996, "grad_norm": 0.705536425113678, "learning_rate": 1.089125831876953e-05, "loss": 0.4622, "step": 7924 }, { "epoch": 0.9644052327350168, "grad_norm": 3.8996047973632812, "learning_rate": 1.088931537087731e-05, "loss": 0.4224, "step": 7925 }, { "epoch": 0.9645269242470338, "grad_norm": 1.6934938430786133, "learning_rate": 1.0887372389144797e-05, "loss": 0.4959, "step": 7926 }, { "epoch": 0.9646486157590508, "grad_norm": 2.820828676223755, "learning_rate": 1.0885429373645928e-05, "loss": 0.4182, "step": 7927 }, { "epoch": 0.9647703072710678, "grad_norm": 6.752305507659912, "learning_rate": 1.0883486324454637e-05, "loss": 0.4019, "step": 7928 }, { "epoch": 0.9648919987830848, "grad_norm": 3.006600856781006, "learning_rate": 1.0881543241644864e-05, "loss": 0.4482, "step": 7929 }, { "epoch": 0.9650136902951019, "grad_norm": 2.8017425537109375, "learning_rate": 1.087960012529055e-05, "loss": 0.4382, "step": 7930 }, { "epoch": 0.965135381807119, "grad_norm": 4.913672924041748, "learning_rate": 1.0877656975465625e-05, "loss": 0.4139, "step": 7931 }, { "epoch": 0.965257073319136, "grad_norm": 2.5266127586364746, "learning_rate": 1.0875713792244038e-05, "loss": 0.4548, "step": 7932 }, { "epoch": 0.965378764831153, "grad_norm": 2.3445112705230713, "learning_rate": 1.087377057569973e-05, "loss": 0.3933, "step": 7933 }, { "epoch": 0.96550045634317, "grad_norm": 1.4738410711288452, "learning_rate": 1.0871827325906638e-05, "loss": 0.4935, "step": 7934 }, { "epoch": 0.9656221478551871, "grad_norm": 3.2260611057281494, "learning_rate": 1.0869884042938714e-05, "loss": 0.4784, "step": 7935 }, { "epoch": 0.9657438393672041, "grad_norm": 0.6361846327781677, "learning_rate": 1.0867940726869903e-05, "loss": 0.4354, "step": 7936 }, { "epoch": 0.9658655308792212, "grad_norm": 4.284882545471191, "learning_rate": 1.086599737777415e-05, "loss": 0.5149, "step": 7937 }, { "epoch": 0.9659872223912382, "grad_norm": 3.0723934173583984, "learning_rate": 1.0864053995725405e-05, "loss": 0.4787, "step": 7938 }, { "epoch": 0.9661089139032553, "grad_norm": 1.0320440530776978, "learning_rate": 1.0862110580797615e-05, "loss": 0.459, "step": 7939 }, { "epoch": 0.9662306054152723, "grad_norm": 2.403982639312744, "learning_rate": 1.0860167133064737e-05, "loss": 0.4482, "step": 7940 }, { "epoch": 0.9663522969272893, "grad_norm": 1.6134270429611206, "learning_rate": 1.0858223652600717e-05, "loss": 0.4297, "step": 7941 }, { "epoch": 0.9664739884393063, "grad_norm": 1.1011158227920532, "learning_rate": 1.085628013947951e-05, "loss": 0.4411, "step": 7942 }, { "epoch": 0.9665956799513234, "grad_norm": 2.1511731147766113, "learning_rate": 1.085433659377507e-05, "loss": 0.4684, "step": 7943 }, { "epoch": 0.9667173714633405, "grad_norm": 1.5668193101882935, "learning_rate": 1.0852393015561356e-05, "loss": 0.3859, "step": 7944 }, { "epoch": 0.9668390629753575, "grad_norm": 1.4748427867889404, "learning_rate": 1.0850449404912323e-05, "loss": 0.4024, "step": 7945 }, { "epoch": 0.9669607544873745, "grad_norm": 0.9931533336639404, "learning_rate": 1.0848505761901926e-05, "loss": 0.4947, "step": 7946 }, { "epoch": 0.9670824459993915, "grad_norm": 1.8887418508529663, "learning_rate": 1.0846562086604135e-05, "loss": 0.4781, "step": 7947 }, { "epoch": 0.9672041375114085, "grad_norm": 1.539760947227478, "learning_rate": 1.0844618379092901e-05, "loss": 0.4573, "step": 7948 }, { "epoch": 0.9673258290234256, "grad_norm": 0.7642394304275513, "learning_rate": 1.084267463944219e-05, "loss": 0.4918, "step": 7949 }, { "epoch": 0.9674475205354427, "grad_norm": 0.912656843662262, "learning_rate": 1.0840730867725964e-05, "loss": 0.4864, "step": 7950 }, { "epoch": 0.9675692120474597, "grad_norm": 1.3620154857635498, "learning_rate": 1.0838787064018187e-05, "loss": 0.3881, "step": 7951 }, { "epoch": 0.9676909035594767, "grad_norm": 1.8657649755477905, "learning_rate": 1.0836843228392831e-05, "loss": 0.5264, "step": 7952 }, { "epoch": 0.9678125950714938, "grad_norm": 2.46170711517334, "learning_rate": 1.0834899360923853e-05, "loss": 0.4703, "step": 7953 }, { "epoch": 0.9679342865835108, "grad_norm": 1.167879581451416, "learning_rate": 1.0832955461685228e-05, "loss": 0.3924, "step": 7954 }, { "epoch": 0.9680559780955278, "grad_norm": 0.729956328868866, "learning_rate": 1.0831011530750923e-05, "loss": 0.4273, "step": 7955 }, { "epoch": 0.9681776696075449, "grad_norm": 0.868808388710022, "learning_rate": 1.0829067568194911e-05, "loss": 0.4107, "step": 7956 }, { "epoch": 0.9682993611195619, "grad_norm": 0.6682521104812622, "learning_rate": 1.082712357409116e-05, "loss": 0.4158, "step": 7957 }, { "epoch": 0.968421052631579, "grad_norm": 1.4129011631011963, "learning_rate": 1.0825179548513644e-05, "loss": 0.4656, "step": 7958 }, { "epoch": 0.968542744143596, "grad_norm": 1.1172596216201782, "learning_rate": 1.082323549153634e-05, "loss": 0.4157, "step": 7959 }, { "epoch": 0.968664435655613, "grad_norm": 0.6368606686592102, "learning_rate": 1.0821291403233226e-05, "loss": 0.4312, "step": 7960 }, { "epoch": 0.96878612716763, "grad_norm": 3.5384790897369385, "learning_rate": 1.0819347283678268e-05, "loss": 0.5049, "step": 7961 }, { "epoch": 0.9689078186796471, "grad_norm": 2.799133539199829, "learning_rate": 1.081740313294545e-05, "loss": 0.4962, "step": 7962 }, { "epoch": 0.9690295101916642, "grad_norm": 1.3229289054870605, "learning_rate": 1.0815458951108753e-05, "loss": 0.4062, "step": 7963 }, { "epoch": 0.9691512017036812, "grad_norm": 0.7160334587097168, "learning_rate": 1.0813514738242154e-05, "loss": 0.4056, "step": 7964 }, { "epoch": 0.9692728932156982, "grad_norm": 1.8991798162460327, "learning_rate": 1.0811570494419636e-05, "loss": 0.3743, "step": 7965 }, { "epoch": 0.9693945847277152, "grad_norm": 0.9729048013687134, "learning_rate": 1.080962621971518e-05, "loss": 0.4048, "step": 7966 }, { "epoch": 0.9695162762397322, "grad_norm": 2.3653435707092285, "learning_rate": 1.0807681914202773e-05, "loss": 0.4834, "step": 7967 }, { "epoch": 0.9696379677517493, "grad_norm": 4.200676441192627, "learning_rate": 1.0805737577956393e-05, "loss": 0.5534, "step": 7968 }, { "epoch": 0.9697596592637664, "grad_norm": 0.9078891277313232, "learning_rate": 1.0803793211050032e-05, "loss": 0.4479, "step": 7969 }, { "epoch": 0.9698813507757834, "grad_norm": 1.4626671075820923, "learning_rate": 1.0801848813557677e-05, "loss": 0.4325, "step": 7970 }, { "epoch": 0.9700030422878004, "grad_norm": 1.7735768556594849, "learning_rate": 1.0799904385553315e-05, "loss": 0.4878, "step": 7971 }, { "epoch": 0.9701247337998175, "grad_norm": 2.8348875045776367, "learning_rate": 1.0797959927110934e-05, "loss": 0.423, "step": 7972 }, { "epoch": 0.9702464253118345, "grad_norm": 2.85650897026062, "learning_rate": 1.0796015438304526e-05, "loss": 0.4507, "step": 7973 }, { "epoch": 0.9703681168238515, "grad_norm": 3.913283109664917, "learning_rate": 1.0794070919208084e-05, "loss": 0.4315, "step": 7974 }, { "epoch": 0.9704898083358686, "grad_norm": 4.523223400115967, "learning_rate": 1.0792126369895599e-05, "loss": 0.3945, "step": 7975 }, { "epoch": 0.9706114998478856, "grad_norm": 2.4726908206939697, "learning_rate": 1.0790181790441069e-05, "loss": 0.4747, "step": 7976 }, { "epoch": 0.9707331913599027, "grad_norm": 1.4418996572494507, "learning_rate": 1.0788237180918481e-05, "loss": 0.4491, "step": 7977 }, { "epoch": 0.9708548828719197, "grad_norm": 2.8546974658966064, "learning_rate": 1.0786292541401842e-05, "loss": 0.4155, "step": 7978 }, { "epoch": 0.9709765743839367, "grad_norm": 0.6050379276275635, "learning_rate": 1.078434787196514e-05, "loss": 0.4322, "step": 7979 }, { "epoch": 0.9710982658959537, "grad_norm": 0.8127751350402832, "learning_rate": 1.0782403172682378e-05, "loss": 0.428, "step": 7980 }, { "epoch": 0.9712199574079708, "grad_norm": 3.4307353496551514, "learning_rate": 1.0780458443627558e-05, "loss": 0.4747, "step": 7981 }, { "epoch": 0.9713416489199879, "grad_norm": 1.7517369985580444, "learning_rate": 1.077851368487468e-05, "loss": 0.4546, "step": 7982 }, { "epoch": 0.9714633404320049, "grad_norm": 2.215606212615967, "learning_rate": 1.0776568896497744e-05, "loss": 0.4481, "step": 7983 }, { "epoch": 0.9715850319440219, "grad_norm": 3.4851198196411133, "learning_rate": 1.077462407857075e-05, "loss": 0.5047, "step": 7984 }, { "epoch": 0.9717067234560389, "grad_norm": 1.1918259859085083, "learning_rate": 1.0772679231167709e-05, "loss": 0.4192, "step": 7985 }, { "epoch": 0.9718284149680559, "grad_norm": 1.1821300983428955, "learning_rate": 1.0770734354362627e-05, "loss": 0.4501, "step": 7986 }, { "epoch": 0.971950106480073, "grad_norm": 4.396644115447998, "learning_rate": 1.0768789448229504e-05, "loss": 0.5528, "step": 7987 }, { "epoch": 0.9720717979920901, "grad_norm": 0.6211681962013245, "learning_rate": 1.0766844512842351e-05, "loss": 0.4304, "step": 7988 }, { "epoch": 0.9721934895041071, "grad_norm": 1.9274446964263916, "learning_rate": 1.0764899548275179e-05, "loss": 0.3943, "step": 7989 }, { "epoch": 0.9723151810161241, "grad_norm": 1.3689601421356201, "learning_rate": 1.0762954554601996e-05, "loss": 0.4719, "step": 7990 }, { "epoch": 0.9724368725281411, "grad_norm": 2.6538727283477783, "learning_rate": 1.0761009531896811e-05, "loss": 0.4165, "step": 7991 }, { "epoch": 0.9725585640401582, "grad_norm": 3.45505952835083, "learning_rate": 1.0759064480233639e-05, "loss": 0.3796, "step": 7992 }, { "epoch": 0.9726802555521752, "grad_norm": 0.7931990027427673, "learning_rate": 1.0757119399686494e-05, "loss": 0.4602, "step": 7993 }, { "epoch": 0.9728019470641923, "grad_norm": 2.7926878929138184, "learning_rate": 1.0755174290329386e-05, "loss": 0.4818, "step": 7994 }, { "epoch": 0.9729236385762093, "grad_norm": 2.470961332321167, "learning_rate": 1.0753229152236335e-05, "loss": 0.4783, "step": 7995 }, { "epoch": 0.9730453300882264, "grad_norm": 1.097302794456482, "learning_rate": 1.0751283985481353e-05, "loss": 0.4102, "step": 7996 }, { "epoch": 0.9731670216002434, "grad_norm": 1.4294880628585815, "learning_rate": 1.0749338790138464e-05, "loss": 0.4144, "step": 7997 }, { "epoch": 0.9732887131122604, "grad_norm": 1.4758702516555786, "learning_rate": 1.074739356628168e-05, "loss": 0.3761, "step": 7998 }, { "epoch": 0.9734104046242774, "grad_norm": 3.157487392425537, "learning_rate": 1.0745448313985026e-05, "loss": 0.5063, "step": 7999 }, { "epoch": 0.9735320961362945, "grad_norm": 2.099881410598755, "learning_rate": 1.074350303332252e-05, "loss": 0.4658, "step": 8000 }, { "epoch": 0.9736537876483116, "grad_norm": 2.018212080001831, "learning_rate": 1.0741557724368183e-05, "loss": 0.4919, "step": 8001 }, { "epoch": 0.9737754791603286, "grad_norm": 0.6164096593856812, "learning_rate": 1.073961238719604e-05, "loss": 0.4311, "step": 8002 }, { "epoch": 0.9738971706723456, "grad_norm": 0.6218458414077759, "learning_rate": 1.0737667021880117e-05, "loss": 0.4306, "step": 8003 }, { "epoch": 0.9740188621843626, "grad_norm": 2.3931751251220703, "learning_rate": 1.0735721628494436e-05, "loss": 0.3992, "step": 8004 }, { "epoch": 0.9741405536963796, "grad_norm": 2.135546922683716, "learning_rate": 1.0733776207113025e-05, "loss": 0.5176, "step": 8005 }, { "epoch": 0.9742622452083967, "grad_norm": 0.7601376175880432, "learning_rate": 1.0731830757809908e-05, "loss": 0.4715, "step": 8006 }, { "epoch": 0.9743839367204138, "grad_norm": 0.591479480266571, "learning_rate": 1.0729885280659116e-05, "loss": 0.4278, "step": 8007 }, { "epoch": 0.9745056282324308, "grad_norm": 1.0128854513168335, "learning_rate": 1.0727939775734682e-05, "loss": 0.455, "step": 8008 }, { "epoch": 0.9746273197444478, "grad_norm": 1.5185626745224, "learning_rate": 1.0725994243110628e-05, "loss": 0.4675, "step": 8009 }, { "epoch": 0.9747490112564648, "grad_norm": 0.6635894775390625, "learning_rate": 1.0724048682860995e-05, "loss": 0.4645, "step": 8010 }, { "epoch": 0.9748707027684819, "grad_norm": 2.5288872718811035, "learning_rate": 1.0722103095059806e-05, "loss": 0.4553, "step": 8011 }, { "epoch": 0.9749923942804989, "grad_norm": 1.3128995895385742, "learning_rate": 1.0720157479781103e-05, "loss": 0.464, "step": 8012 }, { "epoch": 0.975114085792516, "grad_norm": 0.8010328412055969, "learning_rate": 1.0718211837098915e-05, "loss": 0.4679, "step": 8013 }, { "epoch": 0.975235777304533, "grad_norm": 0.6297222971916199, "learning_rate": 1.071626616708728e-05, "loss": 0.4577, "step": 8014 }, { "epoch": 0.97535746881655, "grad_norm": 3.365506887435913, "learning_rate": 1.0714320469820236e-05, "loss": 0.3953, "step": 8015 }, { "epoch": 0.9754791603285671, "grad_norm": 1.4621787071228027, "learning_rate": 1.0712374745371822e-05, "loss": 0.4813, "step": 8016 }, { "epoch": 0.9756008518405841, "grad_norm": 0.8708003163337708, "learning_rate": 1.0710428993816073e-05, "loss": 0.4664, "step": 8017 }, { "epoch": 0.9757225433526011, "grad_norm": 1.6532293558120728, "learning_rate": 1.0708483215227028e-05, "loss": 0.4473, "step": 8018 }, { "epoch": 0.9758442348646182, "grad_norm": 2.949024200439453, "learning_rate": 1.070653740967873e-05, "loss": 0.3964, "step": 8019 }, { "epoch": 0.9759659263766353, "grad_norm": 0.9826018810272217, "learning_rate": 1.0704591577245225e-05, "loss": 0.43, "step": 8020 }, { "epoch": 0.9760876178886523, "grad_norm": 1.0905530452728271, "learning_rate": 1.0702645718000549e-05, "loss": 0.435, "step": 8021 }, { "epoch": 0.9762093094006693, "grad_norm": 0.9655706286430359, "learning_rate": 1.0700699832018751e-05, "loss": 0.418, "step": 8022 }, { "epoch": 0.9763310009126863, "grad_norm": 0.6622583866119385, "learning_rate": 1.069875391937387e-05, "loss": 0.4271, "step": 8023 }, { "epoch": 0.9764526924247033, "grad_norm": 2.9581708908081055, "learning_rate": 1.069680798013996e-05, "loss": 0.5099, "step": 8024 }, { "epoch": 0.9765743839367204, "grad_norm": 1.334496021270752, "learning_rate": 1.069486201439106e-05, "loss": 0.4278, "step": 8025 }, { "epoch": 0.9766960754487375, "grad_norm": 2.130648136138916, "learning_rate": 1.0692916022201226e-05, "loss": 0.4931, "step": 8026 }, { "epoch": 0.9768177669607545, "grad_norm": 1.12907075881958, "learning_rate": 1.0690970003644503e-05, "loss": 0.4844, "step": 8027 }, { "epoch": 0.9769394584727715, "grad_norm": 1.4944736957550049, "learning_rate": 1.0689023958794942e-05, "loss": 0.4369, "step": 8028 }, { "epoch": 0.9770611499847885, "grad_norm": 1.1342177391052246, "learning_rate": 1.0687077887726589e-05, "loss": 0.4793, "step": 8029 }, { "epoch": 0.9771828414968056, "grad_norm": 0.7727245688438416, "learning_rate": 1.0685131790513502e-05, "loss": 0.4604, "step": 8030 }, { "epoch": 0.9773045330088226, "grad_norm": 1.1200982332229614, "learning_rate": 1.0683185667229733e-05, "loss": 0.4478, "step": 8031 }, { "epoch": 0.9774262245208397, "grad_norm": 1.6211756467819214, "learning_rate": 1.0681239517949336e-05, "loss": 0.4423, "step": 8032 }, { "epoch": 0.9775479160328567, "grad_norm": 3.1147689819335938, "learning_rate": 1.0679293342746362e-05, "loss": 0.5705, "step": 8033 }, { "epoch": 0.9776696075448738, "grad_norm": 1.3835824728012085, "learning_rate": 1.0677347141694874e-05, "loss": 0.456, "step": 8034 }, { "epoch": 0.9777912990568908, "grad_norm": 1.6693867444992065, "learning_rate": 1.0675400914868924e-05, "loss": 0.4734, "step": 8035 }, { "epoch": 0.9779129905689078, "grad_norm": 2.5441606044769287, "learning_rate": 1.0673454662342571e-05, "loss": 0.4351, "step": 8036 }, { "epoch": 0.9780346820809248, "grad_norm": 3.679476499557495, "learning_rate": 1.0671508384189872e-05, "loss": 0.4211, "step": 8037 }, { "epoch": 0.9781563735929419, "grad_norm": 2.839444637298584, "learning_rate": 1.0669562080484892e-05, "loss": 0.4505, "step": 8038 }, { "epoch": 0.978278065104959, "grad_norm": 3.4824390411376953, "learning_rate": 1.066761575130169e-05, "loss": 0.4268, "step": 8039 }, { "epoch": 0.978399756616976, "grad_norm": 2.349884510040283, "learning_rate": 1.0665669396714322e-05, "loss": 0.4043, "step": 8040 }, { "epoch": 0.978521448128993, "grad_norm": 1.8292826414108276, "learning_rate": 1.0663723016796859e-05, "loss": 0.4386, "step": 8041 }, { "epoch": 0.97864313964101, "grad_norm": 0.918819785118103, "learning_rate": 1.066177661162336e-05, "loss": 0.4734, "step": 8042 }, { "epoch": 0.978764831153027, "grad_norm": 1.3980753421783447, "learning_rate": 1.0659830181267891e-05, "loss": 0.3627, "step": 8043 }, { "epoch": 0.9788865226650442, "grad_norm": 3.8002572059631348, "learning_rate": 1.0657883725804518e-05, "loss": 0.4821, "step": 8044 }, { "epoch": 0.9790082141770612, "grad_norm": 1.8831466436386108, "learning_rate": 1.0655937245307308e-05, "loss": 0.4471, "step": 8045 }, { "epoch": 0.9791299056890782, "grad_norm": 2.864485502243042, "learning_rate": 1.065399073985033e-05, "loss": 0.4561, "step": 8046 }, { "epoch": 0.9792515972010952, "grad_norm": 3.2202043533325195, "learning_rate": 1.0652044209507648e-05, "loss": 0.5084, "step": 8047 }, { "epoch": 0.9793732887131122, "grad_norm": 3.6493804454803467, "learning_rate": 1.0650097654353335e-05, "loss": 0.4981, "step": 8048 }, { "epoch": 0.9794949802251293, "grad_norm": 2.2826626300811768, "learning_rate": 1.0648151074461459e-05, "loss": 0.4728, "step": 8049 }, { "epoch": 0.9796166717371463, "grad_norm": 1.0844839811325073, "learning_rate": 1.0646204469906096e-05, "loss": 0.4585, "step": 8050 }, { "epoch": 0.9797383632491634, "grad_norm": 2.003732204437256, "learning_rate": 1.0644257840761317e-05, "loss": 0.4725, "step": 8051 }, { "epoch": 0.9798600547611804, "grad_norm": 1.4262032508850098, "learning_rate": 1.0642311187101189e-05, "loss": 0.4778, "step": 8052 }, { "epoch": 0.9799817462731975, "grad_norm": 1.154963493347168, "learning_rate": 1.064036450899979e-05, "loss": 0.457, "step": 8053 }, { "epoch": 0.9801034377852145, "grad_norm": 1.2797378301620483, "learning_rate": 1.06384178065312e-05, "loss": 0.4689, "step": 8054 }, { "epoch": 0.9802251292972315, "grad_norm": 1.3024284839630127, "learning_rate": 1.0636471079769488e-05, "loss": 0.4614, "step": 8055 }, { "epoch": 0.9803468208092485, "grad_norm": 3.3173351287841797, "learning_rate": 1.0634524328788736e-05, "loss": 0.4246, "step": 8056 }, { "epoch": 0.9804685123212656, "grad_norm": 3.05373477935791, "learning_rate": 1.0632577553663019e-05, "loss": 0.4104, "step": 8057 }, { "epoch": 0.9805902038332827, "grad_norm": 1.5117512941360474, "learning_rate": 1.0630630754466419e-05, "loss": 0.5033, "step": 8058 }, { "epoch": 0.9807118953452997, "grad_norm": 1.2367396354675293, "learning_rate": 1.0628683931273009e-05, "loss": 0.5227, "step": 8059 }, { "epoch": 0.9808335868573167, "grad_norm": 1.2027482986450195, "learning_rate": 1.0626737084156878e-05, "loss": 0.4124, "step": 8060 }, { "epoch": 0.9809552783693337, "grad_norm": 0.9053662419319153, "learning_rate": 1.0624790213192102e-05, "loss": 0.4552, "step": 8061 }, { "epoch": 0.9810769698813507, "grad_norm": 1.1773267984390259, "learning_rate": 1.0622843318452767e-05, "loss": 0.466, "step": 8062 }, { "epoch": 0.9811986613933679, "grad_norm": 2.4418399333953857, "learning_rate": 1.0620896400012952e-05, "loss": 0.4115, "step": 8063 }, { "epoch": 0.9813203529053849, "grad_norm": 1.4186867475509644, "learning_rate": 1.0618949457946743e-05, "loss": 0.4203, "step": 8064 }, { "epoch": 0.9814420444174019, "grad_norm": 0.6780378818511963, "learning_rate": 1.0617002492328228e-05, "loss": 0.4326, "step": 8065 }, { "epoch": 0.9815637359294189, "grad_norm": 0.7168206572532654, "learning_rate": 1.0615055503231491e-05, "loss": 0.4493, "step": 8066 }, { "epoch": 0.9816854274414359, "grad_norm": 0.6558755040168762, "learning_rate": 1.0613108490730617e-05, "loss": 0.3912, "step": 8067 }, { "epoch": 0.981807118953453, "grad_norm": 2.957249641418457, "learning_rate": 1.06111614548997e-05, "loss": 0.4703, "step": 8068 }, { "epoch": 0.98192881046547, "grad_norm": 0.7346828579902649, "learning_rate": 1.0609214395812821e-05, "loss": 0.4539, "step": 8069 }, { "epoch": 0.9820505019774871, "grad_norm": 0.9538126587867737, "learning_rate": 1.0607267313544074e-05, "loss": 0.4513, "step": 8070 }, { "epoch": 0.9821721934895041, "grad_norm": 2.4194936752319336, "learning_rate": 1.0605320208167549e-05, "loss": 0.3865, "step": 8071 }, { "epoch": 0.9822938850015211, "grad_norm": 1.0028941631317139, "learning_rate": 1.0603373079757338e-05, "loss": 0.4133, "step": 8072 }, { "epoch": 0.9824155765135382, "grad_norm": 2.4903273582458496, "learning_rate": 1.0601425928387533e-05, "loss": 0.3828, "step": 8073 }, { "epoch": 0.9825372680255552, "grad_norm": 1.5925891399383545, "learning_rate": 1.0599478754132224e-05, "loss": 0.501, "step": 8074 }, { "epoch": 0.9826589595375722, "grad_norm": 1.3841291666030884, "learning_rate": 1.0597531557065508e-05, "loss": 0.442, "step": 8075 }, { "epoch": 0.9827806510495893, "grad_norm": 1.2095823287963867, "learning_rate": 1.0595584337261483e-05, "loss": 0.494, "step": 8076 }, { "epoch": 0.9829023425616064, "grad_norm": 1.2328397035598755, "learning_rate": 1.0593637094794236e-05, "loss": 0.4506, "step": 8077 }, { "epoch": 0.9830240340736234, "grad_norm": 0.9837632179260254, "learning_rate": 1.059168982973787e-05, "loss": 0.4636, "step": 8078 }, { "epoch": 0.9831457255856404, "grad_norm": 1.1830470561981201, "learning_rate": 1.0589742542166482e-05, "loss": 0.4417, "step": 8079 }, { "epoch": 0.9832674170976574, "grad_norm": 1.3787841796875, "learning_rate": 1.0587795232154174e-05, "loss": 0.4026, "step": 8080 }, { "epoch": 0.9833891086096744, "grad_norm": 2.046830892562866, "learning_rate": 1.0585847899775034e-05, "loss": 0.5416, "step": 8081 }, { "epoch": 0.9835108001216916, "grad_norm": 1.3431073427200317, "learning_rate": 1.0583900545103171e-05, "loss": 0.4506, "step": 8082 }, { "epoch": 0.9836324916337086, "grad_norm": 3.117535352706909, "learning_rate": 1.0581953168212684e-05, "loss": 0.4619, "step": 8083 }, { "epoch": 0.9837541831457256, "grad_norm": 3.2632980346679688, "learning_rate": 1.0580005769177674e-05, "loss": 0.5134, "step": 8084 }, { "epoch": 0.9838758746577426, "grad_norm": 0.6580621004104614, "learning_rate": 1.0578058348072247e-05, "loss": 0.468, "step": 8085 }, { "epoch": 0.9839975661697596, "grad_norm": 3.4985034465789795, "learning_rate": 1.05761109049705e-05, "loss": 0.3818, "step": 8086 }, { "epoch": 0.9841192576817767, "grad_norm": 0.6641891598701477, "learning_rate": 1.0574163439946541e-05, "loss": 0.4628, "step": 8087 }, { "epoch": 0.9842409491937937, "grad_norm": 2.8652753829956055, "learning_rate": 1.0572215953074475e-05, "loss": 0.4703, "step": 8088 }, { "epoch": 0.9843626407058108, "grad_norm": 1.4694719314575195, "learning_rate": 1.0570268444428406e-05, "loss": 0.4086, "step": 8089 }, { "epoch": 0.9844843322178278, "grad_norm": 0.5935743451118469, "learning_rate": 1.0568320914082444e-05, "loss": 0.4627, "step": 8090 }, { "epoch": 0.9846060237298448, "grad_norm": 0.6337292194366455, "learning_rate": 1.0566373362110695e-05, "loss": 0.4272, "step": 8091 }, { "epoch": 0.9847277152418619, "grad_norm": 1.4187356233596802, "learning_rate": 1.0564425788587269e-05, "loss": 0.3871, "step": 8092 }, { "epoch": 0.9848494067538789, "grad_norm": 3.1834704875946045, "learning_rate": 1.0562478193586272e-05, "loss": 0.3792, "step": 8093 }, { "epoch": 0.9849710982658959, "grad_norm": 0.5928133726119995, "learning_rate": 1.0560530577181813e-05, "loss": 0.4406, "step": 8094 }, { "epoch": 0.985092789777913, "grad_norm": 1.171393871307373, "learning_rate": 1.0558582939448008e-05, "loss": 0.476, "step": 8095 }, { "epoch": 0.98521448128993, "grad_norm": 1.6947165727615356, "learning_rate": 1.0556635280458964e-05, "loss": 0.4708, "step": 8096 }, { "epoch": 0.9853361728019471, "grad_norm": 1.3873652219772339, "learning_rate": 1.0554687600288797e-05, "loss": 0.4371, "step": 8097 }, { "epoch": 0.9854578643139641, "grad_norm": 2.427231788635254, "learning_rate": 1.0552739899011619e-05, "loss": 0.5076, "step": 8098 }, { "epoch": 0.9855795558259811, "grad_norm": 1.2444276809692383, "learning_rate": 1.0550792176701543e-05, "loss": 0.43, "step": 8099 }, { "epoch": 0.9857012473379981, "grad_norm": 2.5057947635650635, "learning_rate": 1.0548844433432685e-05, "loss": 0.4122, "step": 8100 }, { "epoch": 0.9858229388500153, "grad_norm": 0.8455339074134827, "learning_rate": 1.0546896669279158e-05, "loss": 0.3948, "step": 8101 }, { "epoch": 0.9859446303620323, "grad_norm": 1.0947209596633911, "learning_rate": 1.0544948884315085e-05, "loss": 0.4276, "step": 8102 }, { "epoch": 0.9860663218740493, "grad_norm": 0.5759259462356567, "learning_rate": 1.0543001078614576e-05, "loss": 0.4281, "step": 8103 }, { "epoch": 0.9861880133860663, "grad_norm": 2.048048257827759, "learning_rate": 1.0541053252251751e-05, "loss": 0.5305, "step": 8104 }, { "epoch": 0.9863097048980833, "grad_norm": 0.7636784315109253, "learning_rate": 1.0539105405300731e-05, "loss": 0.4454, "step": 8105 }, { "epoch": 0.9864313964101004, "grad_norm": 2.2165699005126953, "learning_rate": 1.0537157537835635e-05, "loss": 0.515, "step": 8106 }, { "epoch": 0.9865530879221174, "grad_norm": 1.2681456804275513, "learning_rate": 1.0535209649930584e-05, "loss": 0.4387, "step": 8107 }, { "epoch": 0.9866747794341345, "grad_norm": 2.2847306728363037, "learning_rate": 1.0533261741659697e-05, "loss": 0.4726, "step": 8108 }, { "epoch": 0.9867964709461515, "grad_norm": 0.9131169319152832, "learning_rate": 1.0531313813097097e-05, "loss": 0.4478, "step": 8109 }, { "epoch": 0.9869181624581685, "grad_norm": 2.2885148525238037, "learning_rate": 1.0529365864316907e-05, "loss": 0.4037, "step": 8110 }, { "epoch": 0.9870398539701856, "grad_norm": 0.6963400840759277, "learning_rate": 1.0527417895393248e-05, "loss": 0.4684, "step": 8111 }, { "epoch": 0.9871615454822026, "grad_norm": 0.8338130712509155, "learning_rate": 1.0525469906400248e-05, "loss": 0.4678, "step": 8112 }, { "epoch": 0.9872832369942196, "grad_norm": 0.8437964916229248, "learning_rate": 1.0523521897412028e-05, "loss": 0.3977, "step": 8113 }, { "epoch": 0.9874049285062367, "grad_norm": 0.8176448941230774, "learning_rate": 1.0521573868502719e-05, "loss": 0.4377, "step": 8114 }, { "epoch": 0.9875266200182538, "grad_norm": 0.7634590268135071, "learning_rate": 1.0519625819746447e-05, "loss": 0.4158, "step": 8115 }, { "epoch": 0.9876483115302708, "grad_norm": 1.011165976524353, "learning_rate": 1.0517677751217332e-05, "loss": 0.4631, "step": 8116 }, { "epoch": 0.9877700030422878, "grad_norm": 1.6365612745285034, "learning_rate": 1.051572966298951e-05, "loss": 0.5118, "step": 8117 }, { "epoch": 0.9878916945543048, "grad_norm": 2.087566614151001, "learning_rate": 1.0513781555137105e-05, "loss": 0.4119, "step": 8118 }, { "epoch": 0.9880133860663218, "grad_norm": 1.306759238243103, "learning_rate": 1.0511833427734249e-05, "loss": 0.4218, "step": 8119 }, { "epoch": 0.988135077578339, "grad_norm": 0.6562801599502563, "learning_rate": 1.0509885280855073e-05, "loss": 0.441, "step": 8120 }, { "epoch": 0.988256769090356, "grad_norm": 0.8264156579971313, "learning_rate": 1.0507937114573703e-05, "loss": 0.375, "step": 8121 }, { "epoch": 0.988378460602373, "grad_norm": 1.1634833812713623, "learning_rate": 1.0505988928964279e-05, "loss": 0.4765, "step": 8122 }, { "epoch": 0.98850015211439, "grad_norm": 1.3989956378936768, "learning_rate": 1.0504040724100925e-05, "loss": 0.4342, "step": 8123 }, { "epoch": 0.988621843626407, "grad_norm": 0.9042601585388184, "learning_rate": 1.0502092500057781e-05, "loss": 0.4459, "step": 8124 }, { "epoch": 0.9887435351384241, "grad_norm": 1.1182591915130615, "learning_rate": 1.0500144256908977e-05, "loss": 0.4743, "step": 8125 }, { "epoch": 0.9888652266504412, "grad_norm": 1.599513053894043, "learning_rate": 1.0498195994728651e-05, "loss": 0.3931, "step": 8126 }, { "epoch": 0.9889869181624582, "grad_norm": 1.2687733173370361, "learning_rate": 1.0496247713590933e-05, "loss": 0.4575, "step": 8127 }, { "epoch": 0.9891086096744752, "grad_norm": 4.393535614013672, "learning_rate": 1.0494299413569962e-05, "loss": 0.5469, "step": 8128 }, { "epoch": 0.9892303011864922, "grad_norm": 3.1072423458099365, "learning_rate": 1.0492351094739879e-05, "loss": 0.4975, "step": 8129 }, { "epoch": 0.9893519926985093, "grad_norm": 1.6055103540420532, "learning_rate": 1.0490402757174814e-05, "loss": 0.4647, "step": 8130 }, { "epoch": 0.9894736842105263, "grad_norm": 0.6324020028114319, "learning_rate": 1.0488454400948911e-05, "loss": 0.4582, "step": 8131 }, { "epoch": 0.9895953757225433, "grad_norm": 0.8509452939033508, "learning_rate": 1.0486506026136304e-05, "loss": 0.5157, "step": 8132 }, { "epoch": 0.9897170672345604, "grad_norm": 3.5176970958709717, "learning_rate": 1.048455763281114e-05, "loss": 0.4282, "step": 8133 }, { "epoch": 0.9898387587465775, "grad_norm": 2.642094135284424, "learning_rate": 1.0482609221047552e-05, "loss": 0.4644, "step": 8134 }, { "epoch": 0.9899604502585945, "grad_norm": 3.5484108924865723, "learning_rate": 1.0480660790919686e-05, "loss": 0.4293, "step": 8135 }, { "epoch": 0.9900821417706115, "grad_norm": 1.7466529607772827, "learning_rate": 1.0478712342501682e-05, "loss": 0.4277, "step": 8136 }, { "epoch": 0.9902038332826285, "grad_norm": 1.8871071338653564, "learning_rate": 1.0476763875867682e-05, "loss": 0.4307, "step": 8137 }, { "epoch": 0.9903255247946455, "grad_norm": 0.8592079877853394, "learning_rate": 1.0474815391091828e-05, "loss": 0.4214, "step": 8138 }, { "epoch": 0.9904472163066627, "grad_norm": 0.5436782836914062, "learning_rate": 1.0472866888248267e-05, "loss": 0.4249, "step": 8139 }, { "epoch": 0.9905689078186797, "grad_norm": 2.1338112354278564, "learning_rate": 1.0470918367411143e-05, "loss": 0.4682, "step": 8140 }, { "epoch": 0.9906905993306967, "grad_norm": 2.0289323329925537, "learning_rate": 1.0468969828654598e-05, "loss": 0.4192, "step": 8141 }, { "epoch": 0.9908122908427137, "grad_norm": 2.854349374771118, "learning_rate": 1.0467021272052782e-05, "loss": 0.4502, "step": 8142 }, { "epoch": 0.9909339823547307, "grad_norm": 0.7675443291664124, "learning_rate": 1.0465072697679842e-05, "loss": 0.3835, "step": 8143 }, { "epoch": 0.9910556738667478, "grad_norm": 4.075288772583008, "learning_rate": 1.0463124105609918e-05, "loss": 0.4956, "step": 8144 }, { "epoch": 0.9911773653787649, "grad_norm": 0.7009711861610413, "learning_rate": 1.0461175495917168e-05, "loss": 0.4574, "step": 8145 }, { "epoch": 0.9912990568907819, "grad_norm": 2.847881317138672, "learning_rate": 1.0459226868675734e-05, "loss": 0.4974, "step": 8146 }, { "epoch": 0.9914207484027989, "grad_norm": 1.2025774717330933, "learning_rate": 1.0457278223959766e-05, "loss": 0.4279, "step": 8147 }, { "epoch": 0.9915424399148159, "grad_norm": 1.956728458404541, "learning_rate": 1.0455329561843417e-05, "loss": 0.5224, "step": 8148 }, { "epoch": 0.991664131426833, "grad_norm": 0.6114785671234131, "learning_rate": 1.0453380882400834e-05, "loss": 0.425, "step": 8149 }, { "epoch": 0.99178582293885, "grad_norm": 2.43511962890625, "learning_rate": 1.0451432185706172e-05, "loss": 0.4496, "step": 8150 }, { "epoch": 0.991907514450867, "grad_norm": 1.9524028301239014, "learning_rate": 1.0449483471833576e-05, "loss": 0.4857, "step": 8151 }, { "epoch": 0.9920292059628841, "grad_norm": 1.7438323497772217, "learning_rate": 1.044753474085721e-05, "loss": 0.4801, "step": 8152 }, { "epoch": 0.9921508974749012, "grad_norm": 2.3419482707977295, "learning_rate": 1.0445585992851217e-05, "loss": 0.4749, "step": 8153 }, { "epoch": 0.9922725889869182, "grad_norm": 2.623054027557373, "learning_rate": 1.0443637227889756e-05, "loss": 0.4209, "step": 8154 }, { "epoch": 0.9923942804989352, "grad_norm": 1.219497561454773, "learning_rate": 1.044168844604698e-05, "loss": 0.4762, "step": 8155 }, { "epoch": 0.9925159720109522, "grad_norm": 2.0181477069854736, "learning_rate": 1.0439739647397046e-05, "loss": 0.4959, "step": 8156 }, { "epoch": 0.9926376635229692, "grad_norm": 0.9117008447647095, "learning_rate": 1.0437790832014106e-05, "loss": 0.5067, "step": 8157 }, { "epoch": 0.9927593550349864, "grad_norm": 2.0771126747131348, "learning_rate": 1.0435841999972316e-05, "loss": 0.4743, "step": 8158 }, { "epoch": 0.9928810465470034, "grad_norm": 0.7010003924369812, "learning_rate": 1.0433893151345839e-05, "loss": 0.463, "step": 8159 }, { "epoch": 0.9930027380590204, "grad_norm": 1.738229513168335, "learning_rate": 1.0431944286208833e-05, "loss": 0.4638, "step": 8160 }, { "epoch": 0.9931244295710374, "grad_norm": 0.6289927363395691, "learning_rate": 1.0429995404635447e-05, "loss": 0.4477, "step": 8161 }, { "epoch": 0.9932461210830544, "grad_norm": 3.480825424194336, "learning_rate": 1.0428046506699847e-05, "loss": 0.5465, "step": 8162 }, { "epoch": 0.9933678125950715, "grad_norm": 0.818274199962616, "learning_rate": 1.0426097592476194e-05, "loss": 0.44, "step": 8163 }, { "epoch": 0.9934895041070886, "grad_norm": 3.45645809173584, "learning_rate": 1.0424148662038643e-05, "loss": 0.5326, "step": 8164 }, { "epoch": 0.9936111956191056, "grad_norm": 0.7355402708053589, "learning_rate": 1.0422199715461357e-05, "loss": 0.4693, "step": 8165 }, { "epoch": 0.9937328871311226, "grad_norm": 0.5822993516921997, "learning_rate": 1.0420250752818502e-05, "loss": 0.4469, "step": 8166 }, { "epoch": 0.9938545786431396, "grad_norm": 1.0866787433624268, "learning_rate": 1.0418301774184234e-05, "loss": 0.486, "step": 8167 }, { "epoch": 0.9939762701551567, "grad_norm": 4.596395969390869, "learning_rate": 1.0416352779632714e-05, "loss": 0.4246, "step": 8168 }, { "epoch": 0.9940979616671737, "grad_norm": 2.685981273651123, "learning_rate": 1.0414403769238112e-05, "loss": 0.4356, "step": 8169 }, { "epoch": 0.9942196531791907, "grad_norm": 3.646270513534546, "learning_rate": 1.0412454743074588e-05, "loss": 0.4195, "step": 8170 }, { "epoch": 0.9943413446912078, "grad_norm": 0.9976490139961243, "learning_rate": 1.0410505701216308e-05, "loss": 0.4838, "step": 8171 }, { "epoch": 0.9944630362032248, "grad_norm": 2.3518176078796387, "learning_rate": 1.0408556643737439e-05, "loss": 0.4278, "step": 8172 }, { "epoch": 0.9945847277152419, "grad_norm": 1.7078113555908203, "learning_rate": 1.040660757071214e-05, "loss": 0.5016, "step": 8173 }, { "epoch": 0.9947064192272589, "grad_norm": 1.3084639310836792, "learning_rate": 1.0404658482214582e-05, "loss": 0.4896, "step": 8174 }, { "epoch": 0.9948281107392759, "grad_norm": 0.8833538293838501, "learning_rate": 1.0402709378318934e-05, "loss": 0.4566, "step": 8175 }, { "epoch": 0.9949498022512929, "grad_norm": 0.837421178817749, "learning_rate": 1.0400760259099356e-05, "loss": 0.3917, "step": 8176 }, { "epoch": 0.99507149376331, "grad_norm": 0.7357523441314697, "learning_rate": 1.0398811124630024e-05, "loss": 0.4046, "step": 8177 }, { "epoch": 0.9951931852753271, "grad_norm": 1.0310944318771362, "learning_rate": 1.0396861974985103e-05, "loss": 0.4362, "step": 8178 }, { "epoch": 0.9953148767873441, "grad_norm": 0.57745361328125, "learning_rate": 1.0394912810238762e-05, "loss": 0.4271, "step": 8179 }, { "epoch": 0.9954365682993611, "grad_norm": 0.6754943132400513, "learning_rate": 1.039296363046517e-05, "loss": 0.4182, "step": 8180 }, { "epoch": 0.9955582598113781, "grad_norm": 2.6498584747314453, "learning_rate": 1.0391014435738498e-05, "loss": 0.5195, "step": 8181 }, { "epoch": 0.9956799513233952, "grad_norm": 0.6231868863105774, "learning_rate": 1.0389065226132922e-05, "loss": 0.3748, "step": 8182 }, { "epoch": 0.9958016428354123, "grad_norm": 1.6310969591140747, "learning_rate": 1.0387116001722605e-05, "loss": 0.4782, "step": 8183 }, { "epoch": 0.9959233343474293, "grad_norm": 1.122788429260254, "learning_rate": 1.0385166762581722e-05, "loss": 0.4788, "step": 8184 }, { "epoch": 0.9960450258594463, "grad_norm": 1.01002037525177, "learning_rate": 1.0383217508784447e-05, "loss": 0.4427, "step": 8185 }, { "epoch": 0.9961667173714633, "grad_norm": 0.8244919776916504, "learning_rate": 1.0381268240404956e-05, "loss": 0.4694, "step": 8186 }, { "epoch": 0.9962884088834804, "grad_norm": 1.242445945739746, "learning_rate": 1.0379318957517414e-05, "loss": 0.4883, "step": 8187 }, { "epoch": 0.9964101003954974, "grad_norm": 2.397307872772217, "learning_rate": 1.0377369660196004e-05, "loss": 0.4223, "step": 8188 }, { "epoch": 0.9965317919075144, "grad_norm": 1.7231874465942383, "learning_rate": 1.0375420348514898e-05, "loss": 0.43, "step": 8189 }, { "epoch": 0.9966534834195315, "grad_norm": 0.785193920135498, "learning_rate": 1.037347102254827e-05, "loss": 0.4362, "step": 8190 }, { "epoch": 0.9967751749315485, "grad_norm": 2.0725505352020264, "learning_rate": 1.0371521682370294e-05, "loss": 0.3877, "step": 8191 }, { "epoch": 0.9968968664435656, "grad_norm": 1.4224882125854492, "learning_rate": 1.0369572328055149e-05, "loss": 0.4718, "step": 8192 }, { "epoch": 0.9970185579555826, "grad_norm": 0.6750485301017761, "learning_rate": 1.0367622959677015e-05, "loss": 0.467, "step": 8193 }, { "epoch": 0.9971402494675996, "grad_norm": 0.8924117088317871, "learning_rate": 1.0365673577310065e-05, "loss": 0.4152, "step": 8194 }, { "epoch": 0.9972619409796166, "grad_norm": 1.3113890886306763, "learning_rate": 1.0363724181028479e-05, "loss": 0.4625, "step": 8195 }, { "epoch": 0.9973836324916338, "grad_norm": 2.259995460510254, "learning_rate": 1.0361774770906434e-05, "loss": 0.496, "step": 8196 }, { "epoch": 0.9975053240036508, "grad_norm": 2.002284049987793, "learning_rate": 1.0359825347018111e-05, "loss": 0.4816, "step": 8197 }, { "epoch": 0.9976270155156678, "grad_norm": 1.1135305166244507, "learning_rate": 1.035787590943769e-05, "loss": 0.4589, "step": 8198 }, { "epoch": 0.9977487070276848, "grad_norm": 1.1858857870101929, "learning_rate": 1.0355926458239346e-05, "loss": 0.4791, "step": 8199 }, { "epoch": 0.9978703985397018, "grad_norm": 0.6245574355125427, "learning_rate": 1.0353976993497265e-05, "loss": 0.4505, "step": 8200 }, { "epoch": 0.9979920900517188, "grad_norm": 1.240212321281433, "learning_rate": 1.035202751528563e-05, "loss": 0.5072, "step": 8201 }, { "epoch": 0.998113781563736, "grad_norm": 2.408939838409424, "learning_rate": 1.0350078023678616e-05, "loss": 0.4241, "step": 8202 }, { "epoch": 0.998235473075753, "grad_norm": 1.2402812242507935, "learning_rate": 1.0348128518750409e-05, "loss": 0.4347, "step": 8203 }, { "epoch": 0.99835716458777, "grad_norm": 1.7202292680740356, "learning_rate": 1.0346179000575191e-05, "loss": 0.425, "step": 8204 }, { "epoch": 0.998478856099787, "grad_norm": 4.543060302734375, "learning_rate": 1.0344229469227148e-05, "loss": 0.4079, "step": 8205 }, { "epoch": 0.9986005476118041, "grad_norm": 0.8375499248504639, "learning_rate": 1.034227992478046e-05, "loss": 0.5166, "step": 8206 }, { "epoch": 0.9987222391238211, "grad_norm": 0.60439532995224, "learning_rate": 1.034033036730931e-05, "loss": 0.444, "step": 8207 }, { "epoch": 0.9988439306358381, "grad_norm": 1.0107192993164062, "learning_rate": 1.0338380796887888e-05, "loss": 0.4738, "step": 8208 }, { "epoch": 0.9989656221478552, "grad_norm": 0.79546058177948, "learning_rate": 1.0336431213590377e-05, "loss": 0.4708, "step": 8209 }, { "epoch": 0.9990873136598722, "grad_norm": 0.7611692547798157, "learning_rate": 1.033448161749096e-05, "loss": 0.456, "step": 8210 }, { "epoch": 0.9992090051718893, "grad_norm": 0.8569793701171875, "learning_rate": 1.0332532008663823e-05, "loss": 0.4847, "step": 8211 }, { "epoch": 0.9993306966839063, "grad_norm": 1.7697551250457764, "learning_rate": 1.0330582387183156e-05, "loss": 0.4477, "step": 8212 }, { "epoch": 0.9994523881959233, "grad_norm": 2.360614776611328, "learning_rate": 1.0328632753123149e-05, "loss": 0.4372, "step": 8213 }, { "epoch": 0.9995740797079403, "grad_norm": 2.1063103675842285, "learning_rate": 1.0326683106557982e-05, "loss": 0.4407, "step": 8214 }, { "epoch": 0.9996957712199575, "grad_norm": 0.9967697858810425, "learning_rate": 1.0324733447561845e-05, "loss": 0.4375, "step": 8215 }, { "epoch": 0.9998174627319745, "grad_norm": 1.2738629579544067, "learning_rate": 1.0322783776208932e-05, "loss": 0.4834, "step": 8216 }, { "epoch": 0.9999391542439915, "grad_norm": 1.358147382736206, "learning_rate": 1.0320834092573426e-05, "loss": 0.4268, "step": 8217 }, { "epoch": 1.0000608457560085, "grad_norm": 2.3392598628997803, "learning_rate": 1.031888439672952e-05, "loss": 0.4757, "step": 8218 }, { "epoch": 1.0001825372680255, "grad_norm": 2.130840301513672, "learning_rate": 1.0316934688751401e-05, "loss": 0.4377, "step": 8219 }, { "epoch": 1.0003042287800425, "grad_norm": 2.0060911178588867, "learning_rate": 1.0314984968713262e-05, "loss": 0.4483, "step": 8220 }, { "epoch": 1.0004259202920596, "grad_norm": 0.7743332386016846, "learning_rate": 1.0313035236689293e-05, "loss": 0.4587, "step": 8221 }, { "epoch": 1.0005476118040766, "grad_norm": 0.8040422797203064, "learning_rate": 1.0311085492753683e-05, "loss": 0.4174, "step": 8222 }, { "epoch": 1.0006693033160936, "grad_norm": 1.2777271270751953, "learning_rate": 1.030913573698063e-05, "loss": 0.3941, "step": 8223 }, { "epoch": 1.0007909948281108, "grad_norm": 0.7108688354492188, "learning_rate": 1.0307185969444322e-05, "loss": 0.4358, "step": 8224 }, { "epoch": 1.0009126863401279, "grad_norm": 1.1994948387145996, "learning_rate": 1.0305236190218947e-05, "loss": 0.4465, "step": 8225 }, { "epoch": 1.0010343778521449, "grad_norm": 3.568629503250122, "learning_rate": 1.0303286399378706e-05, "loss": 0.3845, "step": 8226 }, { "epoch": 1.001156069364162, "grad_norm": 1.0252903699874878, "learning_rate": 1.0301336596997792e-05, "loss": 0.4039, "step": 8227 }, { "epoch": 1.001277760876179, "grad_norm": 0.7791546583175659, "learning_rate": 1.0299386783150395e-05, "loss": 0.4395, "step": 8228 }, { "epoch": 1.001399452388196, "grad_norm": 1.6991033554077148, "learning_rate": 1.0297436957910713e-05, "loss": 0.4063, "step": 8229 }, { "epoch": 1.001521143900213, "grad_norm": 0.9151224493980408, "learning_rate": 1.0295487121352936e-05, "loss": 0.3929, "step": 8230 }, { "epoch": 1.00164283541223, "grad_norm": 0.763907253742218, "learning_rate": 1.0293537273551266e-05, "loss": 0.3901, "step": 8231 }, { "epoch": 1.001764526924247, "grad_norm": 1.2544831037521362, "learning_rate": 1.029158741457989e-05, "loss": 0.3418, "step": 8232 }, { "epoch": 1.001886218436264, "grad_norm": 2.4731099605560303, "learning_rate": 1.0289637544513014e-05, "loss": 0.4695, "step": 8233 }, { "epoch": 1.002007909948281, "grad_norm": 2.8757359981536865, "learning_rate": 1.0287687663424826e-05, "loss": 0.4298, "step": 8234 }, { "epoch": 1.002129601460298, "grad_norm": 0.9083685874938965, "learning_rate": 1.0285737771389532e-05, "loss": 0.4306, "step": 8235 }, { "epoch": 1.002251292972315, "grad_norm": 1.6674168109893799, "learning_rate": 1.0283787868481322e-05, "loss": 0.4163, "step": 8236 }, { "epoch": 1.0023729844843323, "grad_norm": 0.9981405138969421, "learning_rate": 1.0281837954774395e-05, "loss": 0.38, "step": 8237 }, { "epoch": 1.0024946759963493, "grad_norm": 1.7152098417282104, "learning_rate": 1.027988803034295e-05, "loss": 0.4261, "step": 8238 }, { "epoch": 1.0026163675083664, "grad_norm": 1.7241359949111938, "learning_rate": 1.0277938095261189e-05, "loss": 0.3885, "step": 8239 }, { "epoch": 1.0027380590203834, "grad_norm": 1.241711974143982, "learning_rate": 1.0275988149603305e-05, "loss": 0.5193, "step": 8240 }, { "epoch": 1.0028597505324004, "grad_norm": 1.847245693206787, "learning_rate": 1.0274038193443503e-05, "loss": 0.409, "step": 8241 }, { "epoch": 1.0029814420444174, "grad_norm": 1.475324034690857, "learning_rate": 1.0272088226855979e-05, "loss": 0.3985, "step": 8242 }, { "epoch": 1.0031031335564344, "grad_norm": 3.2274577617645264, "learning_rate": 1.0270138249914936e-05, "loss": 0.3604, "step": 8243 }, { "epoch": 1.0032248250684515, "grad_norm": 0.855535626411438, "learning_rate": 1.0268188262694571e-05, "loss": 0.4186, "step": 8244 }, { "epoch": 1.0033465165804685, "grad_norm": 2.6282224655151367, "learning_rate": 1.0266238265269088e-05, "loss": 0.3621, "step": 8245 }, { "epoch": 1.0034682080924855, "grad_norm": 2.8923850059509277, "learning_rate": 1.0264288257712691e-05, "loss": 0.3931, "step": 8246 }, { "epoch": 1.0035898996045025, "grad_norm": 1.0964852571487427, "learning_rate": 1.0262338240099579e-05, "loss": 0.3694, "step": 8247 }, { "epoch": 1.0037115911165195, "grad_norm": 1.529205560684204, "learning_rate": 1.026038821250395e-05, "loss": 0.3994, "step": 8248 }, { "epoch": 1.0038332826285365, "grad_norm": 1.7007806301116943, "learning_rate": 1.0258438175000011e-05, "loss": 0.3804, "step": 8249 }, { "epoch": 1.0039549741405538, "grad_norm": 1.1398683786392212, "learning_rate": 1.025648812766197e-05, "loss": 0.3983, "step": 8250 }, { "epoch": 1.0040766656525708, "grad_norm": 1.3885143995285034, "learning_rate": 1.025453807056402e-05, "loss": 0.4128, "step": 8251 }, { "epoch": 1.0041983571645878, "grad_norm": 1.401789903640747, "learning_rate": 1.025258800378037e-05, "loss": 0.3435, "step": 8252 }, { "epoch": 1.0043200486766048, "grad_norm": 3.9362881183624268, "learning_rate": 1.0250637927385223e-05, "loss": 0.5015, "step": 8253 }, { "epoch": 1.0044417401886219, "grad_norm": 1.0945338010787964, "learning_rate": 1.0248687841452787e-05, "loss": 0.4033, "step": 8254 }, { "epoch": 1.0045634317006389, "grad_norm": 0.976088285446167, "learning_rate": 1.0246737746057261e-05, "loss": 0.3983, "step": 8255 }, { "epoch": 1.004685123212656, "grad_norm": 2.0697836875915527, "learning_rate": 1.0244787641272852e-05, "loss": 0.3914, "step": 8256 }, { "epoch": 1.004806814724673, "grad_norm": 1.0286303758621216, "learning_rate": 1.024283752717377e-05, "loss": 0.3357, "step": 8257 }, { "epoch": 1.00492850623669, "grad_norm": 3.1036605834960938, "learning_rate": 1.0240887403834218e-05, "loss": 0.4642, "step": 8258 }, { "epoch": 1.005050197748707, "grad_norm": 0.9778620004653931, "learning_rate": 1.0238937271328398e-05, "loss": 0.463, "step": 8259 }, { "epoch": 1.005171889260724, "grad_norm": 2.1090810298919678, "learning_rate": 1.0236987129730522e-05, "loss": 0.4064, "step": 8260 }, { "epoch": 1.005293580772741, "grad_norm": 2.055647611618042, "learning_rate": 1.0235036979114796e-05, "loss": 0.437, "step": 8261 }, { "epoch": 1.0054152722847582, "grad_norm": 1.9423773288726807, "learning_rate": 1.0233086819555424e-05, "loss": 0.3509, "step": 8262 }, { "epoch": 1.0055369637967753, "grad_norm": 1.6141581535339355, "learning_rate": 1.0231136651126616e-05, "loss": 0.4145, "step": 8263 }, { "epoch": 1.0056586553087923, "grad_norm": 1.7718088626861572, "learning_rate": 1.0229186473902583e-05, "loss": 0.3689, "step": 8264 }, { "epoch": 1.0057803468208093, "grad_norm": 1.3700051307678223, "learning_rate": 1.0227236287957532e-05, "loss": 0.4207, "step": 8265 }, { "epoch": 1.0059020383328263, "grad_norm": 1.1339055299758911, "learning_rate": 1.0225286093365665e-05, "loss": 0.4443, "step": 8266 }, { "epoch": 1.0060237298448433, "grad_norm": 3.1934893131256104, "learning_rate": 1.0223335890201195e-05, "loss": 0.3983, "step": 8267 }, { "epoch": 1.0061454213568604, "grad_norm": 2.673259735107422, "learning_rate": 1.0221385678538335e-05, "loss": 0.4212, "step": 8268 }, { "epoch": 1.0062671128688774, "grad_norm": 1.2998101711273193, "learning_rate": 1.0219435458451292e-05, "loss": 0.4295, "step": 8269 }, { "epoch": 1.0063888043808944, "grad_norm": 1.089137077331543, "learning_rate": 1.0217485230014278e-05, "loss": 0.419, "step": 8270 }, { "epoch": 1.0065104958929114, "grad_norm": 1.3858327865600586, "learning_rate": 1.0215534993301496e-05, "loss": 0.4549, "step": 8271 }, { "epoch": 1.0066321874049284, "grad_norm": 2.626344680786133, "learning_rate": 1.0213584748387163e-05, "loss": 0.3498, "step": 8272 }, { "epoch": 1.0067538789169455, "grad_norm": 1.2461892366409302, "learning_rate": 1.0211634495345488e-05, "loss": 0.4532, "step": 8273 }, { "epoch": 1.0068755704289625, "grad_norm": 1.191898226737976, "learning_rate": 1.0209684234250683e-05, "loss": 0.412, "step": 8274 }, { "epoch": 1.0069972619409797, "grad_norm": 1.1674786806106567, "learning_rate": 1.020773396517696e-05, "loss": 0.3746, "step": 8275 }, { "epoch": 1.0071189534529967, "grad_norm": 1.2406764030456543, "learning_rate": 1.0205783688198527e-05, "loss": 0.3944, "step": 8276 }, { "epoch": 1.0072406449650138, "grad_norm": 2.467060089111328, "learning_rate": 1.0203833403389601e-05, "loss": 0.491, "step": 8277 }, { "epoch": 1.0073623364770308, "grad_norm": 2.001375436782837, "learning_rate": 1.0201883110824391e-05, "loss": 0.3867, "step": 8278 }, { "epoch": 1.0074840279890478, "grad_norm": 2.6476292610168457, "learning_rate": 1.019993281057711e-05, "loss": 0.4524, "step": 8279 }, { "epoch": 1.0076057195010648, "grad_norm": 1.4431946277618408, "learning_rate": 1.0197982502721973e-05, "loss": 0.391, "step": 8280 }, { "epoch": 1.0077274110130818, "grad_norm": 0.9354568123817444, "learning_rate": 1.0196032187333194e-05, "loss": 0.3646, "step": 8281 }, { "epoch": 1.0078491025250988, "grad_norm": 1.1406186819076538, "learning_rate": 1.019408186448498e-05, "loss": 0.4219, "step": 8282 }, { "epoch": 1.0079707940371159, "grad_norm": 2.238574504852295, "learning_rate": 1.019213153425155e-05, "loss": 0.3707, "step": 8283 }, { "epoch": 1.0080924855491329, "grad_norm": 0.9136751294136047, "learning_rate": 1.0190181196707121e-05, "loss": 0.4287, "step": 8284 }, { "epoch": 1.00821417706115, "grad_norm": 0.9762387275695801, "learning_rate": 1.01882308519259e-05, "loss": 0.393, "step": 8285 }, { "epoch": 1.008335868573167, "grad_norm": 1.1271981000900269, "learning_rate": 1.0186280499982107e-05, "loss": 0.4125, "step": 8286 }, { "epoch": 1.0084575600851842, "grad_norm": 1.9838197231292725, "learning_rate": 1.0184330140949956e-05, "loss": 0.3889, "step": 8287 }, { "epoch": 1.0085792515972012, "grad_norm": 1.7504675388336182, "learning_rate": 1.0182379774903662e-05, "loss": 0.343, "step": 8288 }, { "epoch": 1.0087009431092182, "grad_norm": 1.7771848440170288, "learning_rate": 1.0180429401917438e-05, "loss": 0.3894, "step": 8289 }, { "epoch": 1.0088226346212352, "grad_norm": 1.4864696264266968, "learning_rate": 1.0178479022065501e-05, "loss": 0.4092, "step": 8290 }, { "epoch": 1.0089443261332522, "grad_norm": 0.9380364418029785, "learning_rate": 1.0176528635422072e-05, "loss": 0.3772, "step": 8291 }, { "epoch": 1.0090660176452693, "grad_norm": 2.18104887008667, "learning_rate": 1.017457824206136e-05, "loss": 0.4245, "step": 8292 }, { "epoch": 1.0091877091572863, "grad_norm": 0.9738626480102539, "learning_rate": 1.0172627842057588e-05, "loss": 0.3595, "step": 8293 }, { "epoch": 1.0093094006693033, "grad_norm": 0.7704262733459473, "learning_rate": 1.0170677435484964e-05, "loss": 0.3229, "step": 8294 }, { "epoch": 1.0094310921813203, "grad_norm": 2.369701623916626, "learning_rate": 1.0168727022417715e-05, "loss": 0.4364, "step": 8295 }, { "epoch": 1.0095527836933373, "grad_norm": 2.02177095413208, "learning_rate": 1.0166776602930051e-05, "loss": 0.4443, "step": 8296 }, { "epoch": 1.0096744752053544, "grad_norm": 1.763436198234558, "learning_rate": 1.0164826177096189e-05, "loss": 0.4182, "step": 8297 }, { "epoch": 1.0097961667173714, "grad_norm": 2.5117340087890625, "learning_rate": 1.0162875744990357e-05, "loss": 0.4028, "step": 8298 }, { "epoch": 1.0099178582293884, "grad_norm": 1.0737465620040894, "learning_rate": 1.0160925306686762e-05, "loss": 0.3655, "step": 8299 }, { "epoch": 1.0100395497414056, "grad_norm": 2.2438952922821045, "learning_rate": 1.0158974862259626e-05, "loss": 0.3568, "step": 8300 }, { "epoch": 1.0101612412534227, "grad_norm": 1.2577983140945435, "learning_rate": 1.0157024411783165e-05, "loss": 0.4057, "step": 8301 }, { "epoch": 1.0102829327654397, "grad_norm": 1.3593028783798218, "learning_rate": 1.0155073955331603e-05, "loss": 0.4129, "step": 8302 }, { "epoch": 1.0104046242774567, "grad_norm": 1.9193497896194458, "learning_rate": 1.0153123492979156e-05, "loss": 0.3816, "step": 8303 }, { "epoch": 1.0105263157894737, "grad_norm": 1.3715996742248535, "learning_rate": 1.0151173024800045e-05, "loss": 0.3462, "step": 8304 }, { "epoch": 1.0106480073014907, "grad_norm": 1.147500991821289, "learning_rate": 1.0149222550868483e-05, "loss": 0.3727, "step": 8305 }, { "epoch": 1.0107696988135078, "grad_norm": 2.520171642303467, "learning_rate": 1.0147272071258695e-05, "loss": 0.4317, "step": 8306 }, { "epoch": 1.0108913903255248, "grad_norm": 1.0026464462280273, "learning_rate": 1.0145321586044903e-05, "loss": 0.3443, "step": 8307 }, { "epoch": 1.0110130818375418, "grad_norm": 3.3628146648406982, "learning_rate": 1.0143371095301323e-05, "loss": 0.4607, "step": 8308 }, { "epoch": 1.0111347733495588, "grad_norm": 1.7303556203842163, "learning_rate": 1.0141420599102172e-05, "loss": 0.393, "step": 8309 }, { "epoch": 1.0112564648615758, "grad_norm": 2.9159469604492188, "learning_rate": 1.013947009752168e-05, "loss": 0.3767, "step": 8310 }, { "epoch": 1.0113781563735929, "grad_norm": 3.6203832626342773, "learning_rate": 1.013751959063406e-05, "loss": 0.4231, "step": 8311 }, { "epoch": 1.0114998478856099, "grad_norm": 1.3753291368484497, "learning_rate": 1.0135569078513534e-05, "loss": 0.3822, "step": 8312 }, { "epoch": 1.0116215393976271, "grad_norm": 2.3555948734283447, "learning_rate": 1.0133618561234322e-05, "loss": 0.3742, "step": 8313 }, { "epoch": 1.0117432309096441, "grad_norm": 2.5891268253326416, "learning_rate": 1.0131668038870653e-05, "loss": 0.4598, "step": 8314 }, { "epoch": 1.0118649224216612, "grad_norm": 4.6124162673950195, "learning_rate": 1.0129717511496738e-05, "loss": 0.3334, "step": 8315 }, { "epoch": 1.0119866139336782, "grad_norm": 0.944620668888092, "learning_rate": 1.0127766979186804e-05, "loss": 0.374, "step": 8316 }, { "epoch": 1.0121083054456952, "grad_norm": 1.506091594696045, "learning_rate": 1.0125816442015073e-05, "loss": 0.4436, "step": 8317 }, { "epoch": 1.0122299969577122, "grad_norm": 3.5881686210632324, "learning_rate": 1.0123865900055765e-05, "loss": 0.4124, "step": 8318 }, { "epoch": 1.0123516884697292, "grad_norm": 2.568885087966919, "learning_rate": 1.0121915353383103e-05, "loss": 0.4077, "step": 8319 }, { "epoch": 1.0124733799817462, "grad_norm": 1.9236934185028076, "learning_rate": 1.011996480207131e-05, "loss": 0.37, "step": 8320 }, { "epoch": 1.0125950714937633, "grad_norm": 2.2964608669281006, "learning_rate": 1.0118014246194607e-05, "loss": 0.438, "step": 8321 }, { "epoch": 1.0127167630057803, "grad_norm": 1.2992522716522217, "learning_rate": 1.0116063685827222e-05, "loss": 0.4205, "step": 8322 }, { "epoch": 1.0128384545177973, "grad_norm": 1.1891727447509766, "learning_rate": 1.0114113121043367e-05, "loss": 0.4003, "step": 8323 }, { "epoch": 1.0129601460298143, "grad_norm": 3.500699996948242, "learning_rate": 1.0112162551917275e-05, "loss": 0.2874, "step": 8324 }, { "epoch": 1.0130818375418316, "grad_norm": 1.4891784191131592, "learning_rate": 1.0110211978523166e-05, "loss": 0.4533, "step": 8325 }, { "epoch": 1.0132035290538486, "grad_norm": 1.2669380903244019, "learning_rate": 1.0108261400935262e-05, "loss": 0.4277, "step": 8326 }, { "epoch": 1.0133252205658656, "grad_norm": 1.8431144952774048, "learning_rate": 1.0106310819227789e-05, "loss": 0.385, "step": 8327 }, { "epoch": 1.0134469120778826, "grad_norm": 1.4549047946929932, "learning_rate": 1.0104360233474967e-05, "loss": 0.3893, "step": 8328 }, { "epoch": 1.0135686035898996, "grad_norm": 1.4614136219024658, "learning_rate": 1.0102409643751025e-05, "loss": 0.3559, "step": 8329 }, { "epoch": 1.0136902951019167, "grad_norm": 1.1574559211730957, "learning_rate": 1.0100459050130182e-05, "loss": 0.3239, "step": 8330 }, { "epoch": 1.0138119866139337, "grad_norm": 1.0747854709625244, "learning_rate": 1.0098508452686664e-05, "loss": 0.3846, "step": 8331 }, { "epoch": 1.0139336781259507, "grad_norm": 1.212923288345337, "learning_rate": 1.0096557851494695e-05, "loss": 0.315, "step": 8332 }, { "epoch": 1.0140553696379677, "grad_norm": 3.0859949588775635, "learning_rate": 1.0094607246628505e-05, "loss": 0.4746, "step": 8333 }, { "epoch": 1.0141770611499847, "grad_norm": 1.2591862678527832, "learning_rate": 1.0092656638162309e-05, "loss": 0.3338, "step": 8334 }, { "epoch": 1.0142987526620018, "grad_norm": 1.3367829322814941, "learning_rate": 1.0090706026170334e-05, "loss": 0.3447, "step": 8335 }, { "epoch": 1.0144204441740188, "grad_norm": 1.5000842809677124, "learning_rate": 1.0088755410726809e-05, "loss": 0.4379, "step": 8336 }, { "epoch": 1.0145421356860358, "grad_norm": 2.5861613750457764, "learning_rate": 1.008680479190596e-05, "loss": 0.3318, "step": 8337 }, { "epoch": 1.014663827198053, "grad_norm": 2.0502939224243164, "learning_rate": 1.0084854169782006e-05, "loss": 0.349, "step": 8338 }, { "epoch": 1.01478551871007, "grad_norm": 1.8105688095092773, "learning_rate": 1.0082903544429176e-05, "loss": 0.4004, "step": 8339 }, { "epoch": 1.014907210222087, "grad_norm": 1.7817609310150146, "learning_rate": 1.0080952915921694e-05, "loss": 0.3905, "step": 8340 }, { "epoch": 1.015028901734104, "grad_norm": 1.7019411325454712, "learning_rate": 1.0079002284333785e-05, "loss": 0.4047, "step": 8341 }, { "epoch": 1.0151505932461211, "grad_norm": 1.144789457321167, "learning_rate": 1.0077051649739678e-05, "loss": 0.3553, "step": 8342 }, { "epoch": 1.0152722847581381, "grad_norm": 3.05646014213562, "learning_rate": 1.0075101012213592e-05, "loss": 0.4102, "step": 8343 }, { "epoch": 1.0153939762701552, "grad_norm": 1.4944475889205933, "learning_rate": 1.0073150371829763e-05, "loss": 0.3906, "step": 8344 }, { "epoch": 1.0155156677821722, "grad_norm": 2.971116781234741, "learning_rate": 1.0071199728662409e-05, "loss": 0.3853, "step": 8345 }, { "epoch": 1.0156373592941892, "grad_norm": 1.6913899183273315, "learning_rate": 1.0069249082785755e-05, "loss": 0.3914, "step": 8346 }, { "epoch": 1.0157590508062062, "grad_norm": 4.988412857055664, "learning_rate": 1.0067298434274031e-05, "loss": 0.499, "step": 8347 }, { "epoch": 1.0158807423182232, "grad_norm": 3.384791851043701, "learning_rate": 1.0065347783201464e-05, "loss": 0.3158, "step": 8348 }, { "epoch": 1.0160024338302402, "grad_norm": 3.1956398487091064, "learning_rate": 1.0063397129642278e-05, "loss": 0.2915, "step": 8349 }, { "epoch": 1.0161241253422575, "grad_norm": 1.756445050239563, "learning_rate": 1.0061446473670701e-05, "loss": 0.3852, "step": 8350 }, { "epoch": 1.0162458168542745, "grad_norm": 1.7911667823791504, "learning_rate": 1.0059495815360956e-05, "loss": 0.4497, "step": 8351 }, { "epoch": 1.0163675083662915, "grad_norm": 1.727837324142456, "learning_rate": 1.0057545154787276e-05, "loss": 0.3689, "step": 8352 }, { "epoch": 1.0164891998783085, "grad_norm": 3.2925519943237305, "learning_rate": 1.005559449202388e-05, "loss": 0.4847, "step": 8353 }, { "epoch": 1.0166108913903256, "grad_norm": 1.6262190341949463, "learning_rate": 1.0053643827144999e-05, "loss": 0.4004, "step": 8354 }, { "epoch": 1.0167325829023426, "grad_norm": 1.9189738035202026, "learning_rate": 1.005169316022486e-05, "loss": 0.4345, "step": 8355 }, { "epoch": 1.0168542744143596, "grad_norm": 1.4556199312210083, "learning_rate": 1.0049742491337691e-05, "loss": 0.4061, "step": 8356 }, { "epoch": 1.0169759659263766, "grad_norm": 1.363373041152954, "learning_rate": 1.0047791820557715e-05, "loss": 0.4366, "step": 8357 }, { "epoch": 1.0170976574383936, "grad_norm": 2.4752964973449707, "learning_rate": 1.0045841147959161e-05, "loss": 0.3997, "step": 8358 }, { "epoch": 1.0172193489504107, "grad_norm": 2.4163966178894043, "learning_rate": 1.0043890473616258e-05, "loss": 0.3868, "step": 8359 }, { "epoch": 1.0173410404624277, "grad_norm": 1.5908942222595215, "learning_rate": 1.0041939797603231e-05, "loss": 0.3949, "step": 8360 }, { "epoch": 1.0174627319744447, "grad_norm": 2.44240140914917, "learning_rate": 1.0039989119994306e-05, "loss": 0.353, "step": 8361 }, { "epoch": 1.0175844234864617, "grad_norm": 2.568925619125366, "learning_rate": 1.0038038440863715e-05, "loss": 0.478, "step": 8362 }, { "epoch": 1.017706114998479, "grad_norm": 1.585746169090271, "learning_rate": 1.0036087760285682e-05, "loss": 0.3459, "step": 8363 }, { "epoch": 1.017827806510496, "grad_norm": 1.1615705490112305, "learning_rate": 1.0034137078334434e-05, "loss": 0.3797, "step": 8364 }, { "epoch": 1.017949498022513, "grad_norm": 1.4657227993011475, "learning_rate": 1.00321863950842e-05, "loss": 0.3588, "step": 8365 }, { "epoch": 1.01807118953453, "grad_norm": 1.4175293445587158, "learning_rate": 1.0030235710609206e-05, "loss": 0.3784, "step": 8366 }, { "epoch": 1.018192881046547, "grad_norm": 1.450131893157959, "learning_rate": 1.0028285024983681e-05, "loss": 0.4149, "step": 8367 }, { "epoch": 1.018314572558564, "grad_norm": 2.158311605453491, "learning_rate": 1.0026334338281856e-05, "loss": 0.3856, "step": 8368 }, { "epoch": 1.018436264070581, "grad_norm": 1.6758432388305664, "learning_rate": 1.0024383650577952e-05, "loss": 0.4044, "step": 8369 }, { "epoch": 1.018557955582598, "grad_norm": 1.2153000831604004, "learning_rate": 1.0022432961946197e-05, "loss": 0.3824, "step": 8370 }, { "epoch": 1.0186796470946151, "grad_norm": 1.2526644468307495, "learning_rate": 1.0020482272460825e-05, "loss": 0.3422, "step": 8371 }, { "epoch": 1.0188013386066321, "grad_norm": 3.0249688625335693, "learning_rate": 1.0018531582196059e-05, "loss": 0.2929, "step": 8372 }, { "epoch": 1.0189230301186492, "grad_norm": 1.8084049224853516, "learning_rate": 1.001658089122613e-05, "loss": 0.3341, "step": 8373 }, { "epoch": 1.0190447216306662, "grad_norm": 1.1040598154067993, "learning_rate": 1.001463019962526e-05, "loss": 0.3424, "step": 8374 }, { "epoch": 1.0191664131426832, "grad_norm": 1.4620301723480225, "learning_rate": 1.0012679507467683e-05, "loss": 0.3775, "step": 8375 }, { "epoch": 1.0192881046547004, "grad_norm": 1.6535565853118896, "learning_rate": 1.0010728814827623e-05, "loss": 0.4196, "step": 8376 }, { "epoch": 1.0194097961667175, "grad_norm": 1.3088042736053467, "learning_rate": 1.000877812177931e-05, "loss": 0.3615, "step": 8377 }, { "epoch": 1.0195314876787345, "grad_norm": 1.5654534101486206, "learning_rate": 1.0006827428396972e-05, "loss": 0.3696, "step": 8378 }, { "epoch": 1.0196531791907515, "grad_norm": 2.361877679824829, "learning_rate": 1.0004876734754838e-05, "loss": 0.4321, "step": 8379 }, { "epoch": 1.0197748707027685, "grad_norm": 1.1035239696502686, "learning_rate": 1.000292604092713e-05, "loss": 0.4019, "step": 8380 }, { "epoch": 1.0198965622147855, "grad_norm": 1.299080729484558, "learning_rate": 1.0000975346988081e-05, "loss": 0.3164, "step": 8381 }, { "epoch": 1.0200182537268025, "grad_norm": 1.5622678995132446, "learning_rate": 9.99902465301192e-06, "loss": 0.4089, "step": 8382 }, { "epoch": 1.0201399452388196, "grad_norm": 1.23862624168396, "learning_rate": 9.997073959072872e-06, "loss": 0.4181, "step": 8383 }, { "epoch": 1.0202616367508366, "grad_norm": 1.8458223342895508, "learning_rate": 9.995123265245165e-06, "loss": 0.4401, "step": 8384 }, { "epoch": 1.0203833282628536, "grad_norm": 1.8534002304077148, "learning_rate": 9.99317257160303e-06, "loss": 0.3928, "step": 8385 }, { "epoch": 1.0205050197748706, "grad_norm": 1.2381389141082764, "learning_rate": 9.991221878220691e-06, "loss": 0.434, "step": 8386 }, { "epoch": 1.0206267112868876, "grad_norm": 1.700703501701355, "learning_rate": 9.989271185172376e-06, "loss": 0.3741, "step": 8387 }, { "epoch": 1.0207484027989047, "grad_norm": 2.201749324798584, "learning_rate": 9.987320492532322e-06, "loss": 0.3951, "step": 8388 }, { "epoch": 1.020870094310922, "grad_norm": 1.0145998001098633, "learning_rate": 9.985369800374743e-06, "loss": 0.3689, "step": 8389 }, { "epoch": 1.020991785822939, "grad_norm": 1.4275449514389038, "learning_rate": 9.983419108773876e-06, "loss": 0.3933, "step": 8390 }, { "epoch": 1.021113477334956, "grad_norm": 2.5123236179351807, "learning_rate": 9.981468417803945e-06, "loss": 0.3489, "step": 8391 }, { "epoch": 1.021235168846973, "grad_norm": 1.210709571838379, "learning_rate": 9.979517727539177e-06, "loss": 0.4135, "step": 8392 }, { "epoch": 1.02135686035899, "grad_norm": 1.035210371017456, "learning_rate": 9.977567038053804e-06, "loss": 0.3461, "step": 8393 }, { "epoch": 1.021478551871007, "grad_norm": 2.321002244949341, "learning_rate": 9.975616349422052e-06, "loss": 0.416, "step": 8394 }, { "epoch": 1.021600243383024, "grad_norm": 1.3586382865905762, "learning_rate": 9.973665661718147e-06, "loss": 0.4224, "step": 8395 }, { "epoch": 1.021721934895041, "grad_norm": 1.3766831159591675, "learning_rate": 9.971714975016318e-06, "loss": 0.3822, "step": 8396 }, { "epoch": 1.021843626407058, "grad_norm": 1.6176156997680664, "learning_rate": 9.969764289390796e-06, "loss": 0.372, "step": 8397 }, { "epoch": 1.021965317919075, "grad_norm": 1.1981098651885986, "learning_rate": 9.967813604915801e-06, "loss": 0.4232, "step": 8398 }, { "epoch": 1.022087009431092, "grad_norm": 1.5008865594863892, "learning_rate": 9.96586292166557e-06, "loss": 0.3796, "step": 8399 }, { "epoch": 1.0222087009431091, "grad_norm": 1.383406639099121, "learning_rate": 9.963912239714323e-06, "loss": 0.3915, "step": 8400 }, { "epoch": 1.0223303924551264, "grad_norm": 0.9409666657447815, "learning_rate": 9.96196155913629e-06, "loss": 0.3455, "step": 8401 }, { "epoch": 1.0224520839671434, "grad_norm": 1.1549283266067505, "learning_rate": 9.960010880005697e-06, "loss": 0.3877, "step": 8402 }, { "epoch": 1.0225737754791604, "grad_norm": 2.7519052028656006, "learning_rate": 9.958060202396774e-06, "loss": 0.4259, "step": 8403 }, { "epoch": 1.0226954669911774, "grad_norm": 1.1812822818756104, "learning_rate": 9.956109526383745e-06, "loss": 0.4274, "step": 8404 }, { "epoch": 1.0228171585031944, "grad_norm": 2.096966505050659, "learning_rate": 9.954158852040842e-06, "loss": 0.4345, "step": 8405 }, { "epoch": 1.0229388500152115, "grad_norm": 0.9592916369438171, "learning_rate": 9.952208179442288e-06, "loss": 0.3592, "step": 8406 }, { "epoch": 1.0230605415272285, "grad_norm": 1.6376439332962036, "learning_rate": 9.950257508662312e-06, "loss": 0.3201, "step": 8407 }, { "epoch": 1.0231822330392455, "grad_norm": 1.5710434913635254, "learning_rate": 9.948306839775143e-06, "loss": 0.3452, "step": 8408 }, { "epoch": 1.0233039245512625, "grad_norm": 1.166159749031067, "learning_rate": 9.946356172855003e-06, "loss": 0.3465, "step": 8409 }, { "epoch": 1.0234256160632795, "grad_norm": 1.8354363441467285, "learning_rate": 9.944405507976122e-06, "loss": 0.4257, "step": 8410 }, { "epoch": 1.0235473075752965, "grad_norm": 2.438997745513916, "learning_rate": 9.94245484521273e-06, "loss": 0.3444, "step": 8411 }, { "epoch": 1.0236689990873136, "grad_norm": 1.3106298446655273, "learning_rate": 9.940504184639046e-06, "loss": 0.4019, "step": 8412 }, { "epoch": 1.0237906905993306, "grad_norm": 1.2060514688491821, "learning_rate": 9.938553526329304e-06, "loss": 0.377, "step": 8413 }, { "epoch": 1.0239123821113478, "grad_norm": 1.3195699453353882, "learning_rate": 9.936602870357725e-06, "loss": 0.387, "step": 8414 }, { "epoch": 1.0240340736233648, "grad_norm": 1.252862572669983, "learning_rate": 9.934652216798537e-06, "loss": 0.383, "step": 8415 }, { "epoch": 1.0241557651353819, "grad_norm": 2.6345582008361816, "learning_rate": 9.93270156572597e-06, "loss": 0.3047, "step": 8416 }, { "epoch": 1.0242774566473989, "grad_norm": 1.6867715120315552, "learning_rate": 9.930750917214246e-06, "loss": 0.3688, "step": 8417 }, { "epoch": 1.024399148159416, "grad_norm": 3.6022634506225586, "learning_rate": 9.928800271337595e-06, "loss": 0.4462, "step": 8418 }, { "epoch": 1.024520839671433, "grad_norm": 2.9298648834228516, "learning_rate": 9.92684962817024e-06, "loss": 0.4327, "step": 8419 }, { "epoch": 1.02464253118345, "grad_norm": 3.2493534088134766, "learning_rate": 9.924898987786408e-06, "loss": 0.413, "step": 8420 }, { "epoch": 1.024764222695467, "grad_norm": 2.7008402347564697, "learning_rate": 9.922948350260323e-06, "loss": 0.4117, "step": 8421 }, { "epoch": 1.024885914207484, "grad_norm": 1.7091697454452515, "learning_rate": 9.920997715666218e-06, "loss": 0.3822, "step": 8422 }, { "epoch": 1.025007605719501, "grad_norm": 1.5987539291381836, "learning_rate": 9.919047084078311e-06, "loss": 0.401, "step": 8423 }, { "epoch": 1.025129297231518, "grad_norm": 1.2505837678909302, "learning_rate": 9.917096455570829e-06, "loss": 0.3448, "step": 8424 }, { "epoch": 1.025250988743535, "grad_norm": 1.6886801719665527, "learning_rate": 9.915145830217999e-06, "loss": 0.3685, "step": 8425 }, { "epoch": 1.0253726802555523, "grad_norm": 2.0024356842041016, "learning_rate": 9.913195208094043e-06, "loss": 0.4339, "step": 8426 }, { "epoch": 1.0254943717675693, "grad_norm": 2.178471326828003, "learning_rate": 9.911244589273193e-06, "loss": 0.3396, "step": 8427 }, { "epoch": 1.0256160632795863, "grad_norm": 1.8820332288742065, "learning_rate": 9.909293973829667e-06, "loss": 0.4167, "step": 8428 }, { "epoch": 1.0257377547916033, "grad_norm": 1.6188408136367798, "learning_rate": 9.907343361837694e-06, "loss": 0.42, "step": 8429 }, { "epoch": 1.0258594463036204, "grad_norm": 1.5058780908584595, "learning_rate": 9.9053927533715e-06, "loss": 0.3765, "step": 8430 }, { "epoch": 1.0259811378156374, "grad_norm": 3.7936012744903564, "learning_rate": 9.903442148505305e-06, "loss": 0.3855, "step": 8431 }, { "epoch": 1.0261028293276544, "grad_norm": 1.5638360977172852, "learning_rate": 9.901491547313336e-06, "loss": 0.4067, "step": 8432 }, { "epoch": 1.0262245208396714, "grad_norm": 1.5825241804122925, "learning_rate": 9.89954094986982e-06, "loss": 0.3515, "step": 8433 }, { "epoch": 1.0263462123516884, "grad_norm": 2.1016364097595215, "learning_rate": 9.897590356248979e-06, "loss": 0.3441, "step": 8434 }, { "epoch": 1.0264679038637055, "grad_norm": 1.3313311338424683, "learning_rate": 9.895639766525038e-06, "loss": 0.3558, "step": 8435 }, { "epoch": 1.0265895953757225, "grad_norm": 2.491170644760132, "learning_rate": 9.893689180772216e-06, "loss": 0.4247, "step": 8436 }, { "epoch": 1.0267112868877395, "grad_norm": 3.083341121673584, "learning_rate": 9.891738599064741e-06, "loss": 0.4331, "step": 8437 }, { "epoch": 1.0268329783997565, "grad_norm": 4.732062339782715, "learning_rate": 9.889788021476836e-06, "loss": 0.5266, "step": 8438 }, { "epoch": 1.0269546699117738, "grad_norm": 2.2248575687408447, "learning_rate": 9.887837448082728e-06, "loss": 0.3503, "step": 8439 }, { "epoch": 1.0270763614237908, "grad_norm": 1.8852100372314453, "learning_rate": 9.885886878956634e-06, "loss": 0.3964, "step": 8440 }, { "epoch": 1.0271980529358078, "grad_norm": 2.4943511486053467, "learning_rate": 9.883936314172783e-06, "loss": 0.4099, "step": 8441 }, { "epoch": 1.0273197444478248, "grad_norm": 2.6485531330108643, "learning_rate": 9.881985753805394e-06, "loss": 0.3922, "step": 8442 }, { "epoch": 1.0274414359598418, "grad_norm": 1.6862001419067383, "learning_rate": 9.880035197928692e-06, "loss": 0.4046, "step": 8443 }, { "epoch": 1.0275631274718589, "grad_norm": 2.152690887451172, "learning_rate": 9.878084646616897e-06, "loss": 0.4025, "step": 8444 }, { "epoch": 1.0276848189838759, "grad_norm": 3.224316358566284, "learning_rate": 9.87613409994424e-06, "loss": 0.3862, "step": 8445 }, { "epoch": 1.0278065104958929, "grad_norm": 2.362570285797119, "learning_rate": 9.87418355798493e-06, "loss": 0.4232, "step": 8446 }, { "epoch": 1.02792820200791, "grad_norm": 4.146801948547363, "learning_rate": 9.8722330208132e-06, "loss": 0.3559, "step": 8447 }, { "epoch": 1.028049893519927, "grad_norm": 1.7981668710708618, "learning_rate": 9.870282488503266e-06, "loss": 0.4, "step": 8448 }, { "epoch": 1.028171585031944, "grad_norm": 2.4671928882598877, "learning_rate": 9.868331961129352e-06, "loss": 0.3622, "step": 8449 }, { "epoch": 1.028293276543961, "grad_norm": 2.83992600440979, "learning_rate": 9.86638143876568e-06, "loss": 0.3442, "step": 8450 }, { "epoch": 1.0284149680559782, "grad_norm": 1.4505071640014648, "learning_rate": 9.86443092148647e-06, "loss": 0.3732, "step": 8451 }, { "epoch": 1.0285366595679952, "grad_norm": 1.1865346431732178, "learning_rate": 9.862480409365942e-06, "loss": 0.3853, "step": 8452 }, { "epoch": 1.0286583510800122, "grad_norm": 1.0423961877822876, "learning_rate": 9.860529902478324e-06, "loss": 0.3462, "step": 8453 }, { "epoch": 1.0287800425920293, "grad_norm": 2.1626217365264893, "learning_rate": 9.85857940089783e-06, "loss": 0.3861, "step": 8454 }, { "epoch": 1.0289017341040463, "grad_norm": 1.8315163850784302, "learning_rate": 9.856628904698679e-06, "loss": 0.3745, "step": 8455 }, { "epoch": 1.0290234256160633, "grad_norm": 1.2824817895889282, "learning_rate": 9.854678413955098e-06, "loss": 0.3668, "step": 8456 }, { "epoch": 1.0291451171280803, "grad_norm": 1.1688225269317627, "learning_rate": 9.852727928741307e-06, "loss": 0.3328, "step": 8457 }, { "epoch": 1.0292668086400973, "grad_norm": 3.8883888721466064, "learning_rate": 9.850777449131522e-06, "loss": 0.4166, "step": 8458 }, { "epoch": 1.0293885001521144, "grad_norm": 1.1900955438613892, "learning_rate": 9.848826975199961e-06, "loss": 0.3376, "step": 8459 }, { "epoch": 1.0295101916641314, "grad_norm": 2.726933717727661, "learning_rate": 9.846876507020847e-06, "loss": 0.4204, "step": 8460 }, { "epoch": 1.0296318831761484, "grad_norm": 1.4068548679351807, "learning_rate": 9.8449260446684e-06, "loss": 0.3828, "step": 8461 }, { "epoch": 1.0297535746881654, "grad_norm": 1.3711700439453125, "learning_rate": 9.842975588216838e-06, "loss": 0.3966, "step": 8462 }, { "epoch": 1.0298752662001824, "grad_norm": 1.7826921939849854, "learning_rate": 9.841025137740377e-06, "loss": 0.3647, "step": 8463 }, { "epoch": 1.0299969577121997, "grad_norm": 1.4266414642333984, "learning_rate": 9.839074693313241e-06, "loss": 0.4476, "step": 8464 }, { "epoch": 1.0301186492242167, "grad_norm": 1.5153205394744873, "learning_rate": 9.837124255009647e-06, "loss": 0.3638, "step": 8465 }, { "epoch": 1.0302403407362337, "grad_norm": 1.2447034120559692, "learning_rate": 9.83517382290381e-06, "loss": 0.4118, "step": 8466 }, { "epoch": 1.0303620322482507, "grad_norm": 2.4332399368286133, "learning_rate": 9.83322339706995e-06, "loss": 0.3432, "step": 8467 }, { "epoch": 1.0304837237602678, "grad_norm": 1.9648536443710327, "learning_rate": 9.83127297758229e-06, "loss": 0.3456, "step": 8468 }, { "epoch": 1.0306054152722848, "grad_norm": 1.266471266746521, "learning_rate": 9.829322564515041e-06, "loss": 0.4106, "step": 8469 }, { "epoch": 1.0307271067843018, "grad_norm": 0.9075831174850464, "learning_rate": 9.827372157942419e-06, "loss": 0.345, "step": 8470 }, { "epoch": 1.0308487982963188, "grad_norm": 1.0461357831954956, "learning_rate": 9.825421757938642e-06, "loss": 0.4258, "step": 8471 }, { "epoch": 1.0309704898083358, "grad_norm": 1.6484135389328003, "learning_rate": 9.82347136457793e-06, "loss": 0.3812, "step": 8472 }, { "epoch": 1.0310921813203529, "grad_norm": 2.3146800994873047, "learning_rate": 9.8215209779345e-06, "loss": 0.3719, "step": 8473 }, { "epoch": 1.0312138728323699, "grad_norm": 1.8806352615356445, "learning_rate": 9.819570598082564e-06, "loss": 0.4018, "step": 8474 }, { "epoch": 1.031335564344387, "grad_norm": 1.2429287433624268, "learning_rate": 9.81762022509634e-06, "loss": 0.3971, "step": 8475 }, { "epoch": 1.031457255856404, "grad_norm": 1.5485801696777344, "learning_rate": 9.815669859050046e-06, "loss": 0.3715, "step": 8476 }, { "epoch": 1.0315789473684212, "grad_norm": 1.1920545101165771, "learning_rate": 9.813719500017895e-06, "loss": 0.3639, "step": 8477 }, { "epoch": 1.0317006388804382, "grad_norm": 1.3843580484390259, "learning_rate": 9.8117691480741e-06, "loss": 0.3614, "step": 8478 }, { "epoch": 1.0318223303924552, "grad_norm": 1.169682264328003, "learning_rate": 9.809818803292882e-06, "loss": 0.4095, "step": 8479 }, { "epoch": 1.0319440219044722, "grad_norm": 1.1997531652450562, "learning_rate": 9.807868465748453e-06, "loss": 0.3971, "step": 8480 }, { "epoch": 1.0320657134164892, "grad_norm": 1.1105883121490479, "learning_rate": 9.805918135515025e-06, "loss": 0.3703, "step": 8481 }, { "epoch": 1.0321874049285062, "grad_norm": 2.0782408714294434, "learning_rate": 9.803967812666812e-06, "loss": 0.3923, "step": 8482 }, { "epoch": 1.0323090964405233, "grad_norm": 4.343510627746582, "learning_rate": 9.80201749727803e-06, "loss": 0.4519, "step": 8483 }, { "epoch": 1.0324307879525403, "grad_norm": 1.1199190616607666, "learning_rate": 9.800067189422894e-06, "loss": 0.3378, "step": 8484 }, { "epoch": 1.0325524794645573, "grad_norm": 1.6541204452514648, "learning_rate": 9.798116889175614e-06, "loss": 0.4255, "step": 8485 }, { "epoch": 1.0326741709765743, "grad_norm": 1.1295616626739502, "learning_rate": 9.7961665966104e-06, "loss": 0.3888, "step": 8486 }, { "epoch": 1.0327958624885913, "grad_norm": 1.1412512063980103, "learning_rate": 9.794216311801477e-06, "loss": 0.368, "step": 8487 }, { "epoch": 1.0329175540006084, "grad_norm": 1.2467306852340698, "learning_rate": 9.792266034823044e-06, "loss": 0.3577, "step": 8488 }, { "epoch": 1.0330392455126254, "grad_norm": 1.0315723419189453, "learning_rate": 9.790315765749319e-06, "loss": 0.3634, "step": 8489 }, { "epoch": 1.0331609370246426, "grad_norm": 1.0767110586166382, "learning_rate": 9.788365504654512e-06, "loss": 0.3715, "step": 8490 }, { "epoch": 1.0332826285366596, "grad_norm": 1.5041097402572632, "learning_rate": 9.78641525161284e-06, "loss": 0.397, "step": 8491 }, { "epoch": 1.0334043200486767, "grad_norm": 1.2021644115447998, "learning_rate": 9.78446500669851e-06, "loss": 0.3752, "step": 8492 }, { "epoch": 1.0335260115606937, "grad_norm": 2.2690627574920654, "learning_rate": 9.782514769985727e-06, "loss": 0.3089, "step": 8493 }, { "epoch": 1.0336477030727107, "grad_norm": 3.597916603088379, "learning_rate": 9.78056454154871e-06, "loss": 0.4476, "step": 8494 }, { "epoch": 1.0337693945847277, "grad_norm": 1.4413632154464722, "learning_rate": 9.778614321461668e-06, "loss": 0.3987, "step": 8495 }, { "epoch": 1.0338910860967447, "grad_norm": 1.7488199472427368, "learning_rate": 9.776664109798806e-06, "loss": 0.3999, "step": 8496 }, { "epoch": 1.0340127776087618, "grad_norm": 3.842494010925293, "learning_rate": 9.774713906634338e-06, "loss": 0.3472, "step": 8497 }, { "epoch": 1.0341344691207788, "grad_norm": 2.5658071041107178, "learning_rate": 9.772763712042473e-06, "loss": 0.4204, "step": 8498 }, { "epoch": 1.0342561606327958, "grad_norm": 1.6479542255401611, "learning_rate": 9.770813526097419e-06, "loss": 0.4029, "step": 8499 }, { "epoch": 1.0343778521448128, "grad_norm": 1.7017662525177002, "learning_rate": 9.768863348873384e-06, "loss": 0.3774, "step": 8500 }, { "epoch": 1.0344995436568298, "grad_norm": 1.4452855587005615, "learning_rate": 9.766913180444576e-06, "loss": 0.4026, "step": 8501 }, { "epoch": 1.034621235168847, "grad_norm": 2.4560563564300537, "learning_rate": 9.764963020885208e-06, "loss": 0.4194, "step": 8502 }, { "epoch": 1.034742926680864, "grad_norm": 2.3867592811584473, "learning_rate": 9.763012870269484e-06, "loss": 0.3994, "step": 8503 }, { "epoch": 1.0348646181928811, "grad_norm": 1.6456576585769653, "learning_rate": 9.761062728671607e-06, "loss": 0.3625, "step": 8504 }, { "epoch": 1.0349863097048981, "grad_norm": 1.394225835800171, "learning_rate": 9.759112596165787e-06, "loss": 0.4008, "step": 8505 }, { "epoch": 1.0351080012169152, "grad_norm": 1.8049538135528564, "learning_rate": 9.757162472826232e-06, "loss": 0.395, "step": 8506 }, { "epoch": 1.0352296927289322, "grad_norm": 3.1767828464508057, "learning_rate": 9.755212358727151e-06, "loss": 0.368, "step": 8507 }, { "epoch": 1.0353513842409492, "grad_norm": 2.3020334243774414, "learning_rate": 9.753262253942742e-06, "loss": 0.4263, "step": 8508 }, { "epoch": 1.0354730757529662, "grad_norm": 1.5832927227020264, "learning_rate": 9.751312158547216e-06, "loss": 0.3424, "step": 8509 }, { "epoch": 1.0355947672649832, "grad_norm": 1.5203386545181274, "learning_rate": 9.749362072614779e-06, "loss": 0.3642, "step": 8510 }, { "epoch": 1.0357164587770002, "grad_norm": 3.6001834869384766, "learning_rate": 9.747411996219633e-06, "loss": 0.3466, "step": 8511 }, { "epoch": 1.0358381502890173, "grad_norm": 1.806621789932251, "learning_rate": 9.745461929435982e-06, "loss": 0.4198, "step": 8512 }, { "epoch": 1.0359598418010343, "grad_norm": 2.951817750930786, "learning_rate": 9.743511872338034e-06, "loss": 0.36, "step": 8513 }, { "epoch": 1.0360815333130513, "grad_norm": 2.2316105365753174, "learning_rate": 9.74156182499999e-06, "loss": 0.4014, "step": 8514 }, { "epoch": 1.0362032248250685, "grad_norm": 1.3834682703018188, "learning_rate": 9.739611787496054e-06, "loss": 0.3548, "step": 8515 }, { "epoch": 1.0363249163370856, "grad_norm": 3.6755449771881104, "learning_rate": 9.737661759900426e-06, "loss": 0.4702, "step": 8516 }, { "epoch": 1.0364466078491026, "grad_norm": 2.0684592723846436, "learning_rate": 9.73571174228731e-06, "loss": 0.4418, "step": 8517 }, { "epoch": 1.0365682993611196, "grad_norm": 2.0509941577911377, "learning_rate": 9.733761734730913e-06, "loss": 0.4023, "step": 8518 }, { "epoch": 1.0366899908731366, "grad_norm": 2.624793291091919, "learning_rate": 9.731811737305432e-06, "loss": 0.4104, "step": 8519 }, { "epoch": 1.0368116823851536, "grad_norm": 1.7117773294448853, "learning_rate": 9.729861750085066e-06, "loss": 0.3884, "step": 8520 }, { "epoch": 1.0369333738971707, "grad_norm": 2.3872177600860596, "learning_rate": 9.727911773144024e-06, "loss": 0.3559, "step": 8521 }, { "epoch": 1.0370550654091877, "grad_norm": 3.1806843280792236, "learning_rate": 9.7259618065565e-06, "loss": 0.3488, "step": 8522 }, { "epoch": 1.0371767569212047, "grad_norm": 2.596280097961426, "learning_rate": 9.724011850396697e-06, "loss": 0.3712, "step": 8523 }, { "epoch": 1.0372984484332217, "grad_norm": 1.2299621105194092, "learning_rate": 9.722061904738811e-06, "loss": 0.3875, "step": 8524 }, { "epoch": 1.0374201399452387, "grad_norm": 1.091686487197876, "learning_rate": 9.720111969657052e-06, "loss": 0.3545, "step": 8525 }, { "epoch": 1.0375418314572558, "grad_norm": 1.6109882593154907, "learning_rate": 9.71816204522561e-06, "loss": 0.3877, "step": 8526 }, { "epoch": 1.037663522969273, "grad_norm": 1.3331568241119385, "learning_rate": 9.716212131518683e-06, "loss": 0.3777, "step": 8527 }, { "epoch": 1.03778521448129, "grad_norm": 3.5424487590789795, "learning_rate": 9.714262228610471e-06, "loss": 0.4608, "step": 8528 }, { "epoch": 1.037906905993307, "grad_norm": 1.1005115509033203, "learning_rate": 9.712312336575175e-06, "loss": 0.3841, "step": 8529 }, { "epoch": 1.038028597505324, "grad_norm": 2.815463066101074, "learning_rate": 9.71036245548699e-06, "loss": 0.2905, "step": 8530 }, { "epoch": 1.038150289017341, "grad_norm": 1.3579227924346924, "learning_rate": 9.708412585420111e-06, "loss": 0.3057, "step": 8531 }, { "epoch": 1.038271980529358, "grad_norm": 2.1427407264709473, "learning_rate": 9.706462726448738e-06, "loss": 0.4161, "step": 8532 }, { "epoch": 1.0383936720413751, "grad_norm": 1.2537420988082886, "learning_rate": 9.704512878647066e-06, "loss": 0.358, "step": 8533 }, { "epoch": 1.0385153635533921, "grad_norm": 1.2238306999206543, "learning_rate": 9.70256304208929e-06, "loss": 0.3449, "step": 8534 }, { "epoch": 1.0386370550654092, "grad_norm": 1.53570556640625, "learning_rate": 9.700613216849605e-06, "loss": 0.3847, "step": 8535 }, { "epoch": 1.0387587465774262, "grad_norm": 2.3927245140075684, "learning_rate": 9.69866340300221e-06, "loss": 0.4218, "step": 8536 }, { "epoch": 1.0388804380894432, "grad_norm": 1.9626636505126953, "learning_rate": 9.696713600621292e-06, "loss": 0.437, "step": 8537 }, { "epoch": 1.0390021296014602, "grad_norm": 1.0661342144012451, "learning_rate": 9.694763809781056e-06, "loss": 0.3301, "step": 8538 }, { "epoch": 1.0391238211134772, "grad_norm": 1.2507702112197876, "learning_rate": 9.692814030555683e-06, "loss": 0.3564, "step": 8539 }, { "epoch": 1.0392455126254945, "grad_norm": 1.2951289415359497, "learning_rate": 9.690864263019372e-06, "loss": 0.3778, "step": 8540 }, { "epoch": 1.0393672041375115, "grad_norm": 1.6303167343139648, "learning_rate": 9.68891450724632e-06, "loss": 0.3563, "step": 8541 }, { "epoch": 1.0394888956495285, "grad_norm": 1.423923373222351, "learning_rate": 9.686964763310712e-06, "loss": 0.3839, "step": 8542 }, { "epoch": 1.0396105871615455, "grad_norm": 1.3353351354599, "learning_rate": 9.685015031286741e-06, "loss": 0.3779, "step": 8543 }, { "epoch": 1.0397322786735625, "grad_norm": 1.1837313175201416, "learning_rate": 9.683065311248602e-06, "loss": 0.3522, "step": 8544 }, { "epoch": 1.0398539701855796, "grad_norm": 1.2641229629516602, "learning_rate": 9.681115603270484e-06, "loss": 0.3511, "step": 8545 }, { "epoch": 1.0399756616975966, "grad_norm": 1.8671753406524658, "learning_rate": 9.679165907426576e-06, "loss": 0.3627, "step": 8546 }, { "epoch": 1.0400973532096136, "grad_norm": 2.247713088989258, "learning_rate": 9.67721622379107e-06, "loss": 0.3977, "step": 8547 }, { "epoch": 1.0402190447216306, "grad_norm": 3.400810956954956, "learning_rate": 9.675266552438155e-06, "loss": 0.4593, "step": 8548 }, { "epoch": 1.0403407362336476, "grad_norm": 1.6697648763656616, "learning_rate": 9.673316893442025e-06, "loss": 0.334, "step": 8549 }, { "epoch": 1.0404624277456647, "grad_norm": 1.9920457601547241, "learning_rate": 9.671367246876856e-06, "loss": 0.4137, "step": 8550 }, { "epoch": 1.0405841192576817, "grad_norm": 1.3066365718841553, "learning_rate": 9.669417612816846e-06, "loss": 0.344, "step": 8551 }, { "epoch": 1.040705810769699, "grad_norm": 1.6996352672576904, "learning_rate": 9.66746799133618e-06, "loss": 0.4173, "step": 8552 }, { "epoch": 1.040827502281716, "grad_norm": 1.5663464069366455, "learning_rate": 9.665518382509046e-06, "loss": 0.4363, "step": 8553 }, { "epoch": 1.040949193793733, "grad_norm": 1.4060596227645874, "learning_rate": 9.663568786409628e-06, "loss": 0.3948, "step": 8554 }, { "epoch": 1.04107088530575, "grad_norm": 2.2613673210144043, "learning_rate": 9.661619203112115e-06, "loss": 0.4088, "step": 8555 }, { "epoch": 1.041192576817767, "grad_norm": 1.3217846155166626, "learning_rate": 9.659669632690691e-06, "loss": 0.419, "step": 8556 }, { "epoch": 1.041314268329784, "grad_norm": 2.6818349361419678, "learning_rate": 9.657720075219542e-06, "loss": 0.3835, "step": 8557 }, { "epoch": 1.041435959841801, "grad_norm": 1.4916032552719116, "learning_rate": 9.655770530772854e-06, "loss": 0.3406, "step": 8558 }, { "epoch": 1.041557651353818, "grad_norm": 1.768543004989624, "learning_rate": 9.65382099942481e-06, "loss": 0.3617, "step": 8559 }, { "epoch": 1.041679342865835, "grad_norm": 1.6199798583984375, "learning_rate": 9.651871481249591e-06, "loss": 0.3767, "step": 8560 }, { "epoch": 1.041801034377852, "grad_norm": 1.3501124382019043, "learning_rate": 9.649921976321388e-06, "loss": 0.405, "step": 8561 }, { "epoch": 1.0419227258898691, "grad_norm": 1.4673627614974976, "learning_rate": 9.647972484714374e-06, "loss": 0.3952, "step": 8562 }, { "epoch": 1.0420444174018861, "grad_norm": 1.5570197105407715, "learning_rate": 9.646023006502738e-06, "loss": 0.3859, "step": 8563 }, { "epoch": 1.0421661089139032, "grad_norm": 1.3670626878738403, "learning_rate": 9.644073541760656e-06, "loss": 0.3899, "step": 8564 }, { "epoch": 1.0422878004259204, "grad_norm": 1.4149664640426636, "learning_rate": 9.642124090562314e-06, "loss": 0.3713, "step": 8565 }, { "epoch": 1.0424094919379374, "grad_norm": 2.66680908203125, "learning_rate": 9.64017465298189e-06, "loss": 0.4047, "step": 8566 }, { "epoch": 1.0425311834499544, "grad_norm": 1.4891482591629028, "learning_rate": 9.638225229093568e-06, "loss": 0.3903, "step": 8567 }, { "epoch": 1.0426528749619715, "grad_norm": 1.4337607622146606, "learning_rate": 9.636275818971523e-06, "loss": 0.3711, "step": 8568 }, { "epoch": 1.0427745664739885, "grad_norm": 2.509216785430908, "learning_rate": 9.634326422689935e-06, "loss": 0.4449, "step": 8569 }, { "epoch": 1.0428962579860055, "grad_norm": 2.597322702407837, "learning_rate": 9.632377040322988e-06, "loss": 0.4404, "step": 8570 }, { "epoch": 1.0430179494980225, "grad_norm": 1.3111467361450195, "learning_rate": 9.630427671944851e-06, "loss": 0.3985, "step": 8571 }, { "epoch": 1.0431396410100395, "grad_norm": 1.3656083345413208, "learning_rate": 9.62847831762971e-06, "loss": 0.373, "step": 8572 }, { "epoch": 1.0432613325220566, "grad_norm": 1.302311897277832, "learning_rate": 9.626528977451735e-06, "loss": 0.4065, "step": 8573 }, { "epoch": 1.0433830240340736, "grad_norm": 1.2328763008117676, "learning_rate": 9.624579651485107e-06, "loss": 0.3925, "step": 8574 }, { "epoch": 1.0435047155460906, "grad_norm": 1.8730429410934448, "learning_rate": 9.622630339804e-06, "loss": 0.4329, "step": 8575 }, { "epoch": 1.0436264070581076, "grad_norm": 1.287023901939392, "learning_rate": 9.620681042482587e-06, "loss": 0.4223, "step": 8576 }, { "epoch": 1.0437480985701246, "grad_norm": 1.6565619707107544, "learning_rate": 9.618731759595049e-06, "loss": 0.3709, "step": 8577 }, { "epoch": 1.0438697900821419, "grad_norm": 1.4961142539978027, "learning_rate": 9.616782491215555e-06, "loss": 0.3484, "step": 8578 }, { "epoch": 1.0439914815941589, "grad_norm": 1.7269107103347778, "learning_rate": 9.61483323741828e-06, "loss": 0.4001, "step": 8579 }, { "epoch": 1.044113173106176, "grad_norm": 2.211500883102417, "learning_rate": 9.612883998277398e-06, "loss": 0.3564, "step": 8580 }, { "epoch": 1.044234864618193, "grad_norm": 1.8098746538162231, "learning_rate": 9.610934773867083e-06, "loss": 0.3972, "step": 8581 }, { "epoch": 1.04435655613021, "grad_norm": 2.49975848197937, "learning_rate": 9.608985564261502e-06, "loss": 0.3494, "step": 8582 }, { "epoch": 1.044478247642227, "grad_norm": 1.2084003686904907, "learning_rate": 9.60703636953483e-06, "loss": 0.3629, "step": 8583 }, { "epoch": 1.044599939154244, "grad_norm": 1.2928369045257568, "learning_rate": 9.605087189761244e-06, "loss": 0.3882, "step": 8584 }, { "epoch": 1.044721630666261, "grad_norm": 1.0913563966751099, "learning_rate": 9.6031380250149e-06, "loss": 0.3421, "step": 8585 }, { "epoch": 1.044843322178278, "grad_norm": 1.943540334701538, "learning_rate": 9.60118887536998e-06, "loss": 0.3426, "step": 8586 }, { "epoch": 1.044965013690295, "grad_norm": 1.5081590414047241, "learning_rate": 9.599239740900647e-06, "loss": 0.3626, "step": 8587 }, { "epoch": 1.045086705202312, "grad_norm": 2.3020002841949463, "learning_rate": 9.59729062168107e-06, "loss": 0.338, "step": 8588 }, { "epoch": 1.045208396714329, "grad_norm": 2.5086286067962646, "learning_rate": 9.59534151778542e-06, "loss": 0.3969, "step": 8589 }, { "epoch": 1.045330088226346, "grad_norm": 2.89668345451355, "learning_rate": 9.593392429287861e-06, "loss": 0.4072, "step": 8590 }, { "epoch": 1.0454517797383633, "grad_norm": 1.1879040002822876, "learning_rate": 9.591443356262564e-06, "loss": 0.3883, "step": 8591 }, { "epoch": 1.0455734712503804, "grad_norm": 1.5011917352676392, "learning_rate": 9.589494298783692e-06, "loss": 0.3596, "step": 8592 }, { "epoch": 1.0456951627623974, "grad_norm": 3.433818817138672, "learning_rate": 9.587545256925412e-06, "loss": 0.4029, "step": 8593 }, { "epoch": 1.0458168542744144, "grad_norm": 2.515930652618408, "learning_rate": 9.58559623076189e-06, "loss": 0.4472, "step": 8594 }, { "epoch": 1.0459385457864314, "grad_norm": 1.3873485326766968, "learning_rate": 9.583647220367291e-06, "loss": 0.3614, "step": 8595 }, { "epoch": 1.0460602372984484, "grad_norm": 1.8964276313781738, "learning_rate": 9.581698225815771e-06, "loss": 0.4187, "step": 8596 }, { "epoch": 1.0461819288104655, "grad_norm": 1.5862632989883423, "learning_rate": 9.579749247181502e-06, "loss": 0.3632, "step": 8597 }, { "epoch": 1.0463036203224825, "grad_norm": 2.7335329055786133, "learning_rate": 9.577800284538645e-06, "loss": 0.4063, "step": 8598 }, { "epoch": 1.0464253118344995, "grad_norm": 3.465146064758301, "learning_rate": 9.57585133796136e-06, "loss": 0.3964, "step": 8599 }, { "epoch": 1.0465470033465165, "grad_norm": 3.3736541271209717, "learning_rate": 9.57390240752381e-06, "loss": 0.3591, "step": 8600 }, { "epoch": 1.0466686948585335, "grad_norm": 3.1835906505584717, "learning_rate": 9.571953493300156e-06, "loss": 0.3519, "step": 8601 }, { "epoch": 1.0467903863705506, "grad_norm": 1.5710586309432983, "learning_rate": 9.570004595364557e-06, "loss": 0.4274, "step": 8602 }, { "epoch": 1.0469120778825678, "grad_norm": 1.8897483348846436, "learning_rate": 9.56805571379117e-06, "loss": 0.4137, "step": 8603 }, { "epoch": 1.0470337693945848, "grad_norm": 1.7368671894073486, "learning_rate": 9.566106848654163e-06, "loss": 0.3819, "step": 8604 }, { "epoch": 1.0471554609066018, "grad_norm": 1.4896665811538696, "learning_rate": 9.564158000027685e-06, "loss": 0.3523, "step": 8605 }, { "epoch": 1.0472771524186189, "grad_norm": 1.4961308240890503, "learning_rate": 9.562209167985896e-06, "loss": 0.3786, "step": 8606 }, { "epoch": 1.0473988439306359, "grad_norm": 2.8934786319732666, "learning_rate": 9.56026035260296e-06, "loss": 0.3932, "step": 8607 }, { "epoch": 1.047520535442653, "grad_norm": 1.2765803337097168, "learning_rate": 9.558311553953023e-06, "loss": 0.3441, "step": 8608 }, { "epoch": 1.04764222695467, "grad_norm": 2.254314661026001, "learning_rate": 9.556362772110249e-06, "loss": 0.3895, "step": 8609 }, { "epoch": 1.047763918466687, "grad_norm": 2.7281601428985596, "learning_rate": 9.554414007148786e-06, "loss": 0.4032, "step": 8610 }, { "epoch": 1.047885609978704, "grad_norm": 1.9062083959579468, "learning_rate": 9.552465259142793e-06, "loss": 0.3693, "step": 8611 }, { "epoch": 1.048007301490721, "grad_norm": 2.7098045349121094, "learning_rate": 9.550516528166425e-06, "loss": 0.4419, "step": 8612 }, { "epoch": 1.048128993002738, "grad_norm": 4.986382007598877, "learning_rate": 9.548567814293831e-06, "loss": 0.5056, "step": 8613 }, { "epoch": 1.048250684514755, "grad_norm": 1.4649035930633545, "learning_rate": 9.546619117599167e-06, "loss": 0.3925, "step": 8614 }, { "epoch": 1.048372376026772, "grad_norm": 1.3123399019241333, "learning_rate": 9.544670438156588e-06, "loss": 0.3392, "step": 8615 }, { "epoch": 1.0484940675387893, "grad_norm": 1.9751774072647095, "learning_rate": 9.542721776040236e-06, "loss": 0.4658, "step": 8616 }, { "epoch": 1.0486157590508063, "grad_norm": 1.6269317865371704, "learning_rate": 9.540773131324267e-06, "loss": 0.3948, "step": 8617 }, { "epoch": 1.0487374505628233, "grad_norm": 2.909550189971924, "learning_rate": 9.538824504082837e-06, "loss": 0.389, "step": 8618 }, { "epoch": 1.0488591420748403, "grad_norm": 3.067458152770996, "learning_rate": 9.536875894390083e-06, "loss": 0.443, "step": 8619 }, { "epoch": 1.0489808335868573, "grad_norm": 2.9201252460479736, "learning_rate": 9.534927302320165e-06, "loss": 0.3534, "step": 8620 }, { "epoch": 1.0491025250988744, "grad_norm": 1.5402511358261108, "learning_rate": 9.532978727947221e-06, "loss": 0.3872, "step": 8621 }, { "epoch": 1.0492242166108914, "grad_norm": 1.5486410856246948, "learning_rate": 9.531030171345405e-06, "loss": 0.4344, "step": 8622 }, { "epoch": 1.0493459081229084, "grad_norm": 1.9518561363220215, "learning_rate": 9.529081632588858e-06, "loss": 0.3875, "step": 8623 }, { "epoch": 1.0494675996349254, "grad_norm": 1.8062916994094849, "learning_rate": 9.527133111751736e-06, "loss": 0.3393, "step": 8624 }, { "epoch": 1.0495892911469424, "grad_norm": 2.0456032752990723, "learning_rate": 9.525184608908175e-06, "loss": 0.3419, "step": 8625 }, { "epoch": 1.0497109826589595, "grad_norm": 1.0896750688552856, "learning_rate": 9.52323612413232e-06, "loss": 0.3771, "step": 8626 }, { "epoch": 1.0498326741709765, "grad_norm": 2.2507569789886475, "learning_rate": 9.521287657498322e-06, "loss": 0.4071, "step": 8627 }, { "epoch": 1.0499543656829937, "grad_norm": 1.2501325607299805, "learning_rate": 9.519339209080316e-06, "loss": 0.3853, "step": 8628 }, { "epoch": 1.0500760571950107, "grad_norm": 1.1588976383209229, "learning_rate": 9.517390778952448e-06, "loss": 0.3499, "step": 8629 }, { "epoch": 1.0501977487070278, "grad_norm": 1.8816875219345093, "learning_rate": 9.515442367188866e-06, "loss": 0.3683, "step": 8630 }, { "epoch": 1.0503194402190448, "grad_norm": 3.6141932010650635, "learning_rate": 9.513493973863697e-06, "loss": 0.4278, "step": 8631 }, { "epoch": 1.0504411317310618, "grad_norm": 1.2083204984664917, "learning_rate": 9.511545599051094e-06, "loss": 0.3318, "step": 8632 }, { "epoch": 1.0505628232430788, "grad_norm": 2.6953628063201904, "learning_rate": 9.50959724282519e-06, "loss": 0.4406, "step": 8633 }, { "epoch": 1.0506845147550958, "grad_norm": 3.108588695526123, "learning_rate": 9.507648905260125e-06, "loss": 0.3972, "step": 8634 }, { "epoch": 1.0508062062671129, "grad_norm": 1.7507376670837402, "learning_rate": 9.505700586430042e-06, "loss": 0.4256, "step": 8635 }, { "epoch": 1.0509278977791299, "grad_norm": 2.2934391498565674, "learning_rate": 9.50375228640907e-06, "loss": 0.3484, "step": 8636 }, { "epoch": 1.051049589291147, "grad_norm": 1.4860869646072388, "learning_rate": 9.501804005271352e-06, "loss": 0.4162, "step": 8637 }, { "epoch": 1.051171280803164, "grad_norm": 2.9421560764312744, "learning_rate": 9.499855743091026e-06, "loss": 0.4091, "step": 8638 }, { "epoch": 1.051292972315181, "grad_norm": 3.1615567207336426, "learning_rate": 9.49790749994222e-06, "loss": 0.383, "step": 8639 }, { "epoch": 1.051414663827198, "grad_norm": 1.2670717239379883, "learning_rate": 9.495959275899075e-06, "loss": 0.374, "step": 8640 }, { "epoch": 1.0515363553392152, "grad_norm": 1.1805905103683472, "learning_rate": 9.494011071035726e-06, "loss": 0.4205, "step": 8641 }, { "epoch": 1.0516580468512322, "grad_norm": 1.6930080652236938, "learning_rate": 9.492062885426299e-06, "loss": 0.4114, "step": 8642 }, { "epoch": 1.0517797383632492, "grad_norm": 1.0549845695495605, "learning_rate": 9.490114719144932e-06, "loss": 0.4335, "step": 8643 }, { "epoch": 1.0519014298752662, "grad_norm": 2.2710280418395996, "learning_rate": 9.488166572265754e-06, "loss": 0.4464, "step": 8644 }, { "epoch": 1.0520231213872833, "grad_norm": 1.156900405883789, "learning_rate": 9.486218444862898e-06, "loss": 0.4372, "step": 8645 }, { "epoch": 1.0521448128993003, "grad_norm": 1.38802170753479, "learning_rate": 9.484270337010494e-06, "loss": 0.3541, "step": 8646 }, { "epoch": 1.0522665044113173, "grad_norm": 3.0831542015075684, "learning_rate": 9.482322248782671e-06, "loss": 0.504, "step": 8647 }, { "epoch": 1.0523881959233343, "grad_norm": 1.4287234544754028, "learning_rate": 9.480374180253557e-06, "loss": 0.4476, "step": 8648 }, { "epoch": 1.0525098874353513, "grad_norm": 1.5159581899642944, "learning_rate": 9.478426131497284e-06, "loss": 0.3851, "step": 8649 }, { "epoch": 1.0526315789473684, "grad_norm": 1.4192575216293335, "learning_rate": 9.476478102587973e-06, "loss": 0.4047, "step": 8650 }, { "epoch": 1.0527532704593854, "grad_norm": 1.8735322952270508, "learning_rate": 9.474530093599754e-06, "loss": 0.393, "step": 8651 }, { "epoch": 1.0528749619714024, "grad_norm": 1.8461400270462036, "learning_rate": 9.472582104606752e-06, "loss": 0.3292, "step": 8652 }, { "epoch": 1.0529966534834196, "grad_norm": 1.4358570575714111, "learning_rate": 9.4706341356831e-06, "loss": 0.3882, "step": 8653 }, { "epoch": 1.0531183449954367, "grad_norm": 2.3466813564300537, "learning_rate": 9.468686186902908e-06, "loss": 0.3251, "step": 8654 }, { "epoch": 1.0532400365074537, "grad_norm": 1.7165895700454712, "learning_rate": 9.466738258340308e-06, "loss": 0.4862, "step": 8655 }, { "epoch": 1.0533617280194707, "grad_norm": 1.2065918445587158, "learning_rate": 9.464790350069419e-06, "loss": 0.3958, "step": 8656 }, { "epoch": 1.0534834195314877, "grad_norm": 1.5157067775726318, "learning_rate": 9.462842462164366e-06, "loss": 0.3655, "step": 8657 }, { "epoch": 1.0536051110435047, "grad_norm": 2.298922061920166, "learning_rate": 9.46089459469927e-06, "loss": 0.3877, "step": 8658 }, { "epoch": 1.0537268025555218, "grad_norm": 3.1769769191741943, "learning_rate": 9.45894674774825e-06, "loss": 0.4215, "step": 8659 }, { "epoch": 1.0538484940675388, "grad_norm": 1.8627218008041382, "learning_rate": 9.456998921385427e-06, "loss": 0.4379, "step": 8660 }, { "epoch": 1.0539701855795558, "grad_norm": 3.351285934448242, "learning_rate": 9.455051115684919e-06, "loss": 0.465, "step": 8661 }, { "epoch": 1.0540918770915728, "grad_norm": 1.3899731636047363, "learning_rate": 9.453103330720842e-06, "loss": 0.4141, "step": 8662 }, { "epoch": 1.0542135686035898, "grad_norm": 1.1285886764526367, "learning_rate": 9.451155566567317e-06, "loss": 0.3705, "step": 8663 }, { "epoch": 1.0543352601156069, "grad_norm": 1.893664002418518, "learning_rate": 9.44920782329846e-06, "loss": 0.3667, "step": 8664 }, { "epoch": 1.0544569516276239, "grad_norm": 2.149561882019043, "learning_rate": 9.447260100988385e-06, "loss": 0.3469, "step": 8665 }, { "epoch": 1.0545786431396411, "grad_norm": 1.609248161315918, "learning_rate": 9.445312399711206e-06, "loss": 0.3795, "step": 8666 }, { "epoch": 1.0547003346516581, "grad_norm": 1.1167349815368652, "learning_rate": 9.443364719541038e-06, "loss": 0.3944, "step": 8667 }, { "epoch": 1.0548220261636752, "grad_norm": 1.164547324180603, "learning_rate": 9.441417060551994e-06, "loss": 0.3642, "step": 8668 }, { "epoch": 1.0549437176756922, "grad_norm": 1.6111547946929932, "learning_rate": 9.43946942281819e-06, "loss": 0.3918, "step": 8669 }, { "epoch": 1.0550654091877092, "grad_norm": 2.4160683155059814, "learning_rate": 9.437521806413733e-06, "loss": 0.3723, "step": 8670 }, { "epoch": 1.0551871006997262, "grad_norm": 1.576104760169983, "learning_rate": 9.435574211412734e-06, "loss": 0.4009, "step": 8671 }, { "epoch": 1.0553087922117432, "grad_norm": 1.3091334104537964, "learning_rate": 9.433626637889306e-06, "loss": 0.3651, "step": 8672 }, { "epoch": 1.0554304837237602, "grad_norm": 2.005645275115967, "learning_rate": 9.431679085917556e-06, "loss": 0.3496, "step": 8673 }, { "epoch": 1.0555521752357773, "grad_norm": 1.513327956199646, "learning_rate": 9.429731555571592e-06, "loss": 0.4306, "step": 8674 }, { "epoch": 1.0556738667477943, "grad_norm": 3.060948133468628, "learning_rate": 9.427784046925526e-06, "loss": 0.4433, "step": 8675 }, { "epoch": 1.0557955582598113, "grad_norm": 1.9053220748901367, "learning_rate": 9.425836560053462e-06, "loss": 0.3622, "step": 8676 }, { "epoch": 1.0559172497718283, "grad_norm": 2.5148892402648926, "learning_rate": 9.423889095029505e-06, "loss": 0.3924, "step": 8677 }, { "epoch": 1.0560389412838453, "grad_norm": 3.0455322265625, "learning_rate": 9.421941651927757e-06, "loss": 0.4315, "step": 8678 }, { "epoch": 1.0561606327958626, "grad_norm": 3.107144594192505, "learning_rate": 9.419994230822328e-06, "loss": 0.3726, "step": 8679 }, { "epoch": 1.0562823243078796, "grad_norm": 1.6978682279586792, "learning_rate": 9.41804683178732e-06, "loss": 0.3486, "step": 8680 }, { "epoch": 1.0564040158198966, "grad_norm": 1.8656402826309204, "learning_rate": 9.41609945489683e-06, "loss": 0.3767, "step": 8681 }, { "epoch": 1.0565257073319136, "grad_norm": 1.498172640800476, "learning_rate": 9.414152100224968e-06, "loss": 0.3394, "step": 8682 }, { "epoch": 1.0566473988439307, "grad_norm": 2.0387983322143555, "learning_rate": 9.41220476784583e-06, "loss": 0.3797, "step": 8683 }, { "epoch": 1.0567690903559477, "grad_norm": 2.857862949371338, "learning_rate": 9.410257457833518e-06, "loss": 0.4023, "step": 8684 }, { "epoch": 1.0568907818679647, "grad_norm": 1.7712801694869995, "learning_rate": 9.40831017026213e-06, "loss": 0.384, "step": 8685 }, { "epoch": 1.0570124733799817, "grad_norm": 1.6085553169250488, "learning_rate": 9.406362905205765e-06, "loss": 0.4231, "step": 8686 }, { "epoch": 1.0571341648919987, "grad_norm": 1.550455927848816, "learning_rate": 9.404415662738522e-06, "loss": 0.4289, "step": 8687 }, { "epoch": 1.0572558564040158, "grad_norm": 2.210205078125, "learning_rate": 9.402468442934497e-06, "loss": 0.3495, "step": 8688 }, { "epoch": 1.0573775479160328, "grad_norm": 3.5440616607666016, "learning_rate": 9.400521245867781e-06, "loss": 0.4284, "step": 8689 }, { "epoch": 1.0574992394280498, "grad_norm": 2.110807180404663, "learning_rate": 9.398574071612472e-06, "loss": 0.4182, "step": 8690 }, { "epoch": 1.0576209309400668, "grad_norm": 3.383354902267456, "learning_rate": 9.396626920242664e-06, "loss": 0.3348, "step": 8691 }, { "epoch": 1.057742622452084, "grad_norm": 3.4444661140441895, "learning_rate": 9.394679791832455e-06, "loss": 0.371, "step": 8692 }, { "epoch": 1.057864313964101, "grad_norm": 1.4034574031829834, "learning_rate": 9.392732686455927e-06, "loss": 0.3931, "step": 8693 }, { "epoch": 1.057986005476118, "grad_norm": 1.4356985092163086, "learning_rate": 9.39078560418718e-06, "loss": 0.3658, "step": 8694 }, { "epoch": 1.0581076969881351, "grad_norm": 2.41737699508667, "learning_rate": 9.388838545100304e-06, "loss": 0.4302, "step": 8695 }, { "epoch": 1.0582293885001521, "grad_norm": 2.68820858001709, "learning_rate": 9.386891509269383e-06, "loss": 0.306, "step": 8696 }, { "epoch": 1.0583510800121692, "grad_norm": 1.6873308420181274, "learning_rate": 9.38494449676851e-06, "loss": 0.4345, "step": 8697 }, { "epoch": 1.0584727715241862, "grad_norm": 3.743823289871216, "learning_rate": 9.382997507671774e-06, "loss": 0.4291, "step": 8698 }, { "epoch": 1.0585944630362032, "grad_norm": 1.1851119995117188, "learning_rate": 9.38105054205326e-06, "loss": 0.3115, "step": 8699 }, { "epoch": 1.0587161545482202, "grad_norm": 1.9133899211883545, "learning_rate": 9.379103599987053e-06, "loss": 0.3641, "step": 8700 }, { "epoch": 1.0588378460602372, "grad_norm": 1.8852527141571045, "learning_rate": 9.37715668154724e-06, "loss": 0.4106, "step": 8701 }, { "epoch": 1.0589595375722543, "grad_norm": 2.589930534362793, "learning_rate": 9.375209786807901e-06, "loss": 0.4151, "step": 8702 }, { "epoch": 1.0590812290842713, "grad_norm": 4.11845064163208, "learning_rate": 9.373262915843127e-06, "loss": 0.3209, "step": 8703 }, { "epoch": 1.0592029205962885, "grad_norm": 1.5849689245224, "learning_rate": 9.371316068726993e-06, "loss": 0.4191, "step": 8704 }, { "epoch": 1.0593246121083055, "grad_norm": 1.9058828353881836, "learning_rate": 9.369369245533585e-06, "loss": 0.3362, "step": 8705 }, { "epoch": 1.0594463036203225, "grad_norm": 1.423583745956421, "learning_rate": 9.367422446336984e-06, "loss": 0.4166, "step": 8706 }, { "epoch": 1.0595679951323396, "grad_norm": 3.1402063369750977, "learning_rate": 9.365475671211266e-06, "loss": 0.4016, "step": 8707 }, { "epoch": 1.0596896866443566, "grad_norm": 3.0349225997924805, "learning_rate": 9.363528920230512e-06, "loss": 0.3238, "step": 8708 }, { "epoch": 1.0598113781563736, "grad_norm": 2.0186822414398193, "learning_rate": 9.361582193468801e-06, "loss": 0.3772, "step": 8709 }, { "epoch": 1.0599330696683906, "grad_norm": 2.000281810760498, "learning_rate": 9.359635491000214e-06, "loss": 0.3737, "step": 8710 }, { "epoch": 1.0600547611804076, "grad_norm": 1.2245726585388184, "learning_rate": 9.357688812898818e-06, "loss": 0.4214, "step": 8711 }, { "epoch": 1.0601764526924247, "grad_norm": 1.5693227052688599, "learning_rate": 9.35574215923869e-06, "loss": 0.3292, "step": 8712 }, { "epoch": 1.0602981442044417, "grad_norm": 1.3693928718566895, "learning_rate": 9.353795530093906e-06, "loss": 0.3625, "step": 8713 }, { "epoch": 1.0604198357164587, "grad_norm": 1.4481825828552246, "learning_rate": 9.351848925538543e-06, "loss": 0.39, "step": 8714 }, { "epoch": 1.0605415272284757, "grad_norm": 1.5233197212219238, "learning_rate": 9.349902345646669e-06, "loss": 0.3279, "step": 8715 }, { "epoch": 1.0606632187404927, "grad_norm": 2.397857189178467, "learning_rate": 9.347955790492354e-06, "loss": 0.3597, "step": 8716 }, { "epoch": 1.06078491025251, "grad_norm": 4.655124664306641, "learning_rate": 9.346009260149673e-06, "loss": 0.4989, "step": 8717 }, { "epoch": 1.060906601764527, "grad_norm": 1.253547191619873, "learning_rate": 9.344062754692694e-06, "loss": 0.3722, "step": 8718 }, { "epoch": 1.061028293276544, "grad_norm": 1.3941712379455566, "learning_rate": 9.342116274195484e-06, "loss": 0.3944, "step": 8719 }, { "epoch": 1.061149984788561, "grad_norm": 1.6175655126571655, "learning_rate": 9.340169818732109e-06, "loss": 0.4208, "step": 8720 }, { "epoch": 1.061271676300578, "grad_norm": 3.4816179275512695, "learning_rate": 9.338223388376642e-06, "loss": 0.4428, "step": 8721 }, { "epoch": 1.061393367812595, "grad_norm": 3.7616019248962402, "learning_rate": 9.336276983203148e-06, "loss": 0.3549, "step": 8722 }, { "epoch": 1.061515059324612, "grad_norm": 3.567310333251953, "learning_rate": 9.334330603285683e-06, "loss": 0.3903, "step": 8723 }, { "epoch": 1.0616367508366291, "grad_norm": 1.2563401460647583, "learning_rate": 9.332384248698316e-06, "loss": 0.4021, "step": 8724 }, { "epoch": 1.0617584423486461, "grad_norm": 1.5313869714736938, "learning_rate": 9.330437919515111e-06, "loss": 0.4242, "step": 8725 }, { "epoch": 1.0618801338606632, "grad_norm": 2.9950108528137207, "learning_rate": 9.328491615810131e-06, "loss": 0.3551, "step": 8726 }, { "epoch": 1.0620018253726802, "grad_norm": 2.3801920413970947, "learning_rate": 9.326545337657434e-06, "loss": 0.3934, "step": 8727 }, { "epoch": 1.0621235168846972, "grad_norm": 1.8461861610412598, "learning_rate": 9.324599085131078e-06, "loss": 0.4328, "step": 8728 }, { "epoch": 1.0622452083967144, "grad_norm": 2.450988292694092, "learning_rate": 9.32265285830513e-06, "loss": 0.4012, "step": 8729 }, { "epoch": 1.0623668999087315, "grad_norm": 1.270151972770691, "learning_rate": 9.320706657253638e-06, "loss": 0.4078, "step": 8730 }, { "epoch": 1.0624885914207485, "grad_norm": 1.7064636945724487, "learning_rate": 9.318760482050665e-06, "loss": 0.4137, "step": 8731 }, { "epoch": 1.0626102829327655, "grad_norm": 2.0447769165039062, "learning_rate": 9.31681433277027e-06, "loss": 0.3445, "step": 8732 }, { "epoch": 1.0627319744447825, "grad_norm": 1.4277113676071167, "learning_rate": 9.3148682094865e-06, "loss": 0.3636, "step": 8733 }, { "epoch": 1.0628536659567995, "grad_norm": 3.52839732170105, "learning_rate": 9.312922112273416e-06, "loss": 0.4182, "step": 8734 }, { "epoch": 1.0629753574688166, "grad_norm": 4.402937412261963, "learning_rate": 9.310976041205065e-06, "loss": 0.4483, "step": 8735 }, { "epoch": 1.0630970489808336, "grad_norm": 2.3507585525512695, "learning_rate": 9.309029996355499e-06, "loss": 0.3871, "step": 8736 }, { "epoch": 1.0632187404928506, "grad_norm": 1.6308351755142212, "learning_rate": 9.307083977798777e-06, "loss": 0.3349, "step": 8737 }, { "epoch": 1.0633404320048676, "grad_norm": 1.4599895477294922, "learning_rate": 9.305137985608941e-06, "loss": 0.3478, "step": 8738 }, { "epoch": 1.0634621235168846, "grad_norm": 1.2582248449325562, "learning_rate": 9.303192019860042e-06, "loss": 0.3317, "step": 8739 }, { "epoch": 1.0635838150289016, "grad_norm": 1.1287273168563843, "learning_rate": 9.301246080626133e-06, "loss": 0.3687, "step": 8740 }, { "epoch": 1.0637055065409187, "grad_norm": 1.214354395866394, "learning_rate": 9.299300167981254e-06, "loss": 0.4112, "step": 8741 }, { "epoch": 1.063827198052936, "grad_norm": 3.5570178031921387, "learning_rate": 9.297354281999453e-06, "loss": 0.4338, "step": 8742 }, { "epoch": 1.063948889564953, "grad_norm": 2.9221506118774414, "learning_rate": 9.295408422754777e-06, "loss": 0.4156, "step": 8743 }, { "epoch": 1.06407058107697, "grad_norm": 1.737151026725769, "learning_rate": 9.29346259032127e-06, "loss": 0.4072, "step": 8744 }, { "epoch": 1.064192272588987, "grad_norm": 1.730349063873291, "learning_rate": 9.291516784772977e-06, "loss": 0.4002, "step": 8745 }, { "epoch": 1.064313964101004, "grad_norm": 2.5676918029785156, "learning_rate": 9.289571006183932e-06, "loss": 0.3182, "step": 8746 }, { "epoch": 1.064435655613021, "grad_norm": 2.8133351802825928, "learning_rate": 9.287625254628183e-06, "loss": 0.3892, "step": 8747 }, { "epoch": 1.064557347125038, "grad_norm": 1.8877850770950317, "learning_rate": 9.285679530179766e-06, "loss": 0.3755, "step": 8748 }, { "epoch": 1.064679038637055, "grad_norm": 1.8910564184188843, "learning_rate": 9.28373383291272e-06, "loss": 0.4811, "step": 8749 }, { "epoch": 1.064800730149072, "grad_norm": 1.5362210273742676, "learning_rate": 9.281788162901087e-06, "loss": 0.4273, "step": 8750 }, { "epoch": 1.064922421661089, "grad_norm": 2.644871950149536, "learning_rate": 9.279842520218898e-06, "loss": 0.3503, "step": 8751 }, { "epoch": 1.065044113173106, "grad_norm": 2.3236091136932373, "learning_rate": 9.277896904940196e-06, "loss": 0.3814, "step": 8752 }, { "epoch": 1.0651658046851231, "grad_norm": 1.8920600414276123, "learning_rate": 9.275951317139008e-06, "loss": 0.4223, "step": 8753 }, { "epoch": 1.0652874961971404, "grad_norm": 1.8277065753936768, "learning_rate": 9.274005756889373e-06, "loss": 0.3635, "step": 8754 }, { "epoch": 1.0654091877091574, "grad_norm": 1.6762092113494873, "learning_rate": 9.272060224265321e-06, "loss": 0.3715, "step": 8755 }, { "epoch": 1.0655308792211744, "grad_norm": 3.025296211242676, "learning_rate": 9.270114719340889e-06, "loss": 0.3354, "step": 8756 }, { "epoch": 1.0656525707331914, "grad_norm": 1.6936650276184082, "learning_rate": 9.268169242190097e-06, "loss": 0.3951, "step": 8757 }, { "epoch": 1.0657742622452084, "grad_norm": 2.7245430946350098, "learning_rate": 9.26622379288698e-06, "loss": 0.4547, "step": 8758 }, { "epoch": 1.0658959537572255, "grad_norm": 2.2041938304901123, "learning_rate": 9.264278371505568e-06, "loss": 0.4219, "step": 8759 }, { "epoch": 1.0660176452692425, "grad_norm": 2.5318431854248047, "learning_rate": 9.262332978119887e-06, "loss": 0.3871, "step": 8760 }, { "epoch": 1.0661393367812595, "grad_norm": 1.6668375730514526, "learning_rate": 9.260387612803961e-06, "loss": 0.3858, "step": 8761 }, { "epoch": 1.0662610282932765, "grad_norm": 3.3045215606689453, "learning_rate": 9.258442275631818e-06, "loss": 0.416, "step": 8762 }, { "epoch": 1.0663827198052935, "grad_norm": 1.2915488481521606, "learning_rate": 9.256496966677484e-06, "loss": 0.379, "step": 8763 }, { "epoch": 1.0665044113173106, "grad_norm": 2.8317501544952393, "learning_rate": 9.254551686014976e-06, "loss": 0.3517, "step": 8764 }, { "epoch": 1.0666261028293276, "grad_norm": 2.2278428077697754, "learning_rate": 9.252606433718321e-06, "loss": 0.3576, "step": 8765 }, { "epoch": 1.0667477943413446, "grad_norm": 3.0616414546966553, "learning_rate": 9.25066120986154e-06, "loss": 0.3451, "step": 8766 }, { "epoch": 1.0668694858533618, "grad_norm": 1.6762421131134033, "learning_rate": 9.248716014518647e-06, "loss": 0.4452, "step": 8767 }, { "epoch": 1.0669911773653789, "grad_norm": 2.5014431476593018, "learning_rate": 9.246770847763671e-06, "loss": 0.3339, "step": 8768 }, { "epoch": 1.0671128688773959, "grad_norm": 1.3409093618392944, "learning_rate": 9.244825709670619e-06, "loss": 0.3892, "step": 8769 }, { "epoch": 1.067234560389413, "grad_norm": 2.126960277557373, "learning_rate": 9.242880600313511e-06, "loss": 0.4607, "step": 8770 }, { "epoch": 1.06735625190143, "grad_norm": 1.0942060947418213, "learning_rate": 9.240935519766365e-06, "loss": 0.3462, "step": 8771 }, { "epoch": 1.067477943413447, "grad_norm": 1.2500476837158203, "learning_rate": 9.238990468103192e-06, "loss": 0.364, "step": 8772 }, { "epoch": 1.067599634925464, "grad_norm": 1.6133836507797241, "learning_rate": 9.237045445398007e-06, "loss": 0.4237, "step": 8773 }, { "epoch": 1.067721326437481, "grad_norm": 1.4467661380767822, "learning_rate": 9.235100451724825e-06, "loss": 0.3572, "step": 8774 }, { "epoch": 1.067843017949498, "grad_norm": 1.4626907110214233, "learning_rate": 9.23315548715765e-06, "loss": 0.4046, "step": 8775 }, { "epoch": 1.067964709461515, "grad_norm": 1.370842456817627, "learning_rate": 9.231210551770497e-06, "loss": 0.326, "step": 8776 }, { "epoch": 1.068086400973532, "grad_norm": 1.4305375814437866, "learning_rate": 9.229265645637375e-06, "loss": 0.4208, "step": 8777 }, { "epoch": 1.068208092485549, "grad_norm": 1.6250733137130737, "learning_rate": 9.227320768832293e-06, "loss": 0.3933, "step": 8778 }, { "epoch": 1.0683297839975663, "grad_norm": 1.7090439796447754, "learning_rate": 9.225375921429255e-06, "loss": 0.4064, "step": 8779 }, { "epoch": 1.0684514755095833, "grad_norm": 3.8332064151763916, "learning_rate": 9.223431103502263e-06, "loss": 0.4336, "step": 8780 }, { "epoch": 1.0685731670216003, "grad_norm": 2.608372688293457, "learning_rate": 9.221486315125325e-06, "loss": 0.3625, "step": 8781 }, { "epoch": 1.0686948585336173, "grad_norm": 1.9305294752120972, "learning_rate": 9.219541556372447e-06, "loss": 0.3772, "step": 8782 }, { "epoch": 1.0688165500456344, "grad_norm": 1.221787929534912, "learning_rate": 9.217596827317624e-06, "loss": 0.4123, "step": 8783 }, { "epoch": 1.0689382415576514, "grad_norm": 1.9899630546569824, "learning_rate": 9.215652128034864e-06, "loss": 0.4315, "step": 8784 }, { "epoch": 1.0690599330696684, "grad_norm": 2.623753547668457, "learning_rate": 9.213707458598163e-06, "loss": 0.3293, "step": 8785 }, { "epoch": 1.0691816245816854, "grad_norm": 1.6596559286117554, "learning_rate": 9.211762819081522e-06, "loss": 0.3473, "step": 8786 }, { "epoch": 1.0693033160937024, "grad_norm": 1.4267139434814453, "learning_rate": 9.209818209558935e-06, "loss": 0.405, "step": 8787 }, { "epoch": 1.0694250076057195, "grad_norm": 1.655957818031311, "learning_rate": 9.207873630104401e-06, "loss": 0.4123, "step": 8788 }, { "epoch": 1.0695466991177365, "grad_norm": 2.438105821609497, "learning_rate": 9.205929080791919e-06, "loss": 0.4156, "step": 8789 }, { "epoch": 1.0696683906297535, "grad_norm": 1.2603689432144165, "learning_rate": 9.203984561695476e-06, "loss": 0.3776, "step": 8790 }, { "epoch": 1.0697900821417705, "grad_norm": 2.1065311431884766, "learning_rate": 9.202040072889071e-06, "loss": 0.3181, "step": 8791 }, { "epoch": 1.0699117736537875, "grad_norm": 2.2273366451263428, "learning_rate": 9.200095614446689e-06, "loss": 0.4216, "step": 8792 }, { "epoch": 1.0700334651658048, "grad_norm": 1.4721736907958984, "learning_rate": 9.198151186442325e-06, "loss": 0.3372, "step": 8793 }, { "epoch": 1.0701551566778218, "grad_norm": 1.687626838684082, "learning_rate": 9.19620678894997e-06, "loss": 0.409, "step": 8794 }, { "epoch": 1.0702768481898388, "grad_norm": 1.4892747402191162, "learning_rate": 9.194262422043609e-06, "loss": 0.3486, "step": 8795 }, { "epoch": 1.0703985397018558, "grad_norm": 2.552877426147461, "learning_rate": 9.192318085797232e-06, "loss": 0.4032, "step": 8796 }, { "epoch": 1.0705202312138729, "grad_norm": 1.3186944723129272, "learning_rate": 9.190373780284824e-06, "loss": 0.3962, "step": 8797 }, { "epoch": 1.0706419227258899, "grad_norm": 1.7931240797042847, "learning_rate": 9.188429505580366e-06, "loss": 0.4171, "step": 8798 }, { "epoch": 1.070763614237907, "grad_norm": 1.9625226259231567, "learning_rate": 9.186485261757848e-06, "loss": 0.3774, "step": 8799 }, { "epoch": 1.070885305749924, "grad_norm": 1.2802168130874634, "learning_rate": 9.18454104889125e-06, "loss": 0.3809, "step": 8800 }, { "epoch": 1.071006997261941, "grad_norm": 3.9507319927215576, "learning_rate": 9.182596867054551e-06, "loss": 0.3209, "step": 8801 }, { "epoch": 1.071128688773958, "grad_norm": 1.738481879234314, "learning_rate": 9.180652716321739e-06, "loss": 0.3867, "step": 8802 }, { "epoch": 1.071250380285975, "grad_norm": 1.4252978563308716, "learning_rate": 9.17870859676678e-06, "loss": 0.3912, "step": 8803 }, { "epoch": 1.071372071797992, "grad_norm": 1.5513497591018677, "learning_rate": 9.176764508463661e-06, "loss": 0.3632, "step": 8804 }, { "epoch": 1.0714937633100092, "grad_norm": 2.4214656352996826, "learning_rate": 9.174820451486357e-06, "loss": 0.3568, "step": 8805 }, { "epoch": 1.0716154548220262, "grad_norm": 1.6125140190124512, "learning_rate": 9.172876425908843e-06, "loss": 0.3933, "step": 8806 }, { "epoch": 1.0717371463340433, "grad_norm": 1.800565242767334, "learning_rate": 9.17093243180509e-06, "loss": 0.3946, "step": 8807 }, { "epoch": 1.0718588378460603, "grad_norm": 1.5824241638183594, "learning_rate": 9.168988469249079e-06, "loss": 0.3349, "step": 8808 }, { "epoch": 1.0719805293580773, "grad_norm": 1.8837624788284302, "learning_rate": 9.167044538314774e-06, "loss": 0.3671, "step": 8809 }, { "epoch": 1.0721022208700943, "grad_norm": 3.5118982791900635, "learning_rate": 9.165100639076148e-06, "loss": 0.3186, "step": 8810 }, { "epoch": 1.0722239123821113, "grad_norm": 1.37621009349823, "learning_rate": 9.16315677160717e-06, "loss": 0.367, "step": 8811 }, { "epoch": 1.0723456038941284, "grad_norm": 1.3543016910552979, "learning_rate": 9.161212935981812e-06, "loss": 0.379, "step": 8812 }, { "epoch": 1.0724672954061454, "grad_norm": 2.310431718826294, "learning_rate": 9.159269132274036e-06, "loss": 0.4218, "step": 8813 }, { "epoch": 1.0725889869181624, "grad_norm": 1.7307064533233643, "learning_rate": 9.157325360557815e-06, "loss": 0.3893, "step": 8814 }, { "epoch": 1.0727106784301794, "grad_norm": 1.887952446937561, "learning_rate": 9.155381620907102e-06, "loss": 0.4116, "step": 8815 }, { "epoch": 1.0728323699421964, "grad_norm": 1.7383230924606323, "learning_rate": 9.15343791339587e-06, "loss": 0.3697, "step": 8816 }, { "epoch": 1.0729540614542135, "grad_norm": 1.839990496635437, "learning_rate": 9.151494238098075e-06, "loss": 0.4193, "step": 8817 }, { "epoch": 1.0730757529662307, "grad_norm": 1.9042268991470337, "learning_rate": 9.149550595087682e-06, "loss": 0.4027, "step": 8818 }, { "epoch": 1.0731974444782477, "grad_norm": 3.3323802947998047, "learning_rate": 9.147606984438647e-06, "loss": 0.4564, "step": 8819 }, { "epoch": 1.0733191359902647, "grad_norm": 1.3060674667358398, "learning_rate": 9.145663406224935e-06, "loss": 0.3908, "step": 8820 }, { "epoch": 1.0734408275022818, "grad_norm": 1.3116837739944458, "learning_rate": 9.143719860520494e-06, "loss": 0.4063, "step": 8821 }, { "epoch": 1.0735625190142988, "grad_norm": 1.5498803853988647, "learning_rate": 9.141776347399286e-06, "loss": 0.4034, "step": 8822 }, { "epoch": 1.0736842105263158, "grad_norm": 2.3495378494262695, "learning_rate": 9.139832866935266e-06, "loss": 0.3526, "step": 8823 }, { "epoch": 1.0738059020383328, "grad_norm": 1.535097599029541, "learning_rate": 9.137889419202385e-06, "loss": 0.4213, "step": 8824 }, { "epoch": 1.0739275935503498, "grad_norm": 3.1412715911865234, "learning_rate": 9.135946004274595e-06, "loss": 0.3762, "step": 8825 }, { "epoch": 1.0740492850623669, "grad_norm": 1.5283244848251343, "learning_rate": 9.134002622225853e-06, "loss": 0.3751, "step": 8826 }, { "epoch": 1.0741709765743839, "grad_norm": 1.8251268863677979, "learning_rate": 9.1320592731301e-06, "loss": 0.395, "step": 8827 }, { "epoch": 1.074292668086401, "grad_norm": 1.467444658279419, "learning_rate": 9.130115957061289e-06, "loss": 0.4248, "step": 8828 }, { "epoch": 1.074414359598418, "grad_norm": 1.3606672286987305, "learning_rate": 9.128172674093363e-06, "loss": 0.343, "step": 8829 }, { "epoch": 1.0745360511104352, "grad_norm": 2.5812063217163086, "learning_rate": 9.126229424300274e-06, "loss": 0.3869, "step": 8830 }, { "epoch": 1.0746577426224522, "grad_norm": 3.732025384902954, "learning_rate": 9.124286207755966e-06, "loss": 0.4326, "step": 8831 }, { "epoch": 1.0747794341344692, "grad_norm": 2.6255147457122803, "learning_rate": 9.122343024534377e-06, "loss": 0.4407, "step": 8832 }, { "epoch": 1.0749011256464862, "grad_norm": 2.0973968505859375, "learning_rate": 9.120399874709453e-06, "loss": 0.3559, "step": 8833 }, { "epoch": 1.0750228171585032, "grad_norm": 3.68205189704895, "learning_rate": 9.118456758355137e-06, "loss": 0.4265, "step": 8834 }, { "epoch": 1.0751445086705202, "grad_norm": 2.149353265762329, "learning_rate": 9.116513675545363e-06, "loss": 0.3803, "step": 8835 }, { "epoch": 1.0752662001825373, "grad_norm": 1.8083761930465698, "learning_rate": 9.114570626354073e-06, "loss": 0.4038, "step": 8836 }, { "epoch": 1.0753878916945543, "grad_norm": 1.7582157850265503, "learning_rate": 9.112627610855208e-06, "loss": 0.4099, "step": 8837 }, { "epoch": 1.0755095832065713, "grad_norm": 1.6295803785324097, "learning_rate": 9.110684629122695e-06, "loss": 0.4194, "step": 8838 }, { "epoch": 1.0756312747185883, "grad_norm": 1.2005438804626465, "learning_rate": 9.108741681230475e-06, "loss": 0.394, "step": 8839 }, { "epoch": 1.0757529662306053, "grad_norm": 1.6948192119598389, "learning_rate": 9.106798767252474e-06, "loss": 0.3906, "step": 8840 }, { "epoch": 1.0758746577426224, "grad_norm": 2.4337706565856934, "learning_rate": 9.104855887262631e-06, "loss": 0.4182, "step": 8841 }, { "epoch": 1.0759963492546394, "grad_norm": 3.97274112701416, "learning_rate": 9.102913041334877e-06, "loss": 0.3682, "step": 8842 }, { "epoch": 1.0761180407666566, "grad_norm": 2.991827964782715, "learning_rate": 9.100970229543135e-06, "loss": 0.3702, "step": 8843 }, { "epoch": 1.0762397322786736, "grad_norm": 3.40278697013855, "learning_rate": 9.099027451961337e-06, "loss": 0.3512, "step": 8844 }, { "epoch": 1.0763614237906907, "grad_norm": 3.2497072219848633, "learning_rate": 9.09708470866341e-06, "loss": 0.4095, "step": 8845 }, { "epoch": 1.0764831153027077, "grad_norm": 1.726302981376648, "learning_rate": 9.09514199972328e-06, "loss": 0.4569, "step": 8846 }, { "epoch": 1.0766048068147247, "grad_norm": 1.5319948196411133, "learning_rate": 9.093199325214868e-06, "loss": 0.364, "step": 8847 }, { "epoch": 1.0767264983267417, "grad_norm": 3.0809860229492188, "learning_rate": 9.0912566852121e-06, "loss": 0.4536, "step": 8848 }, { "epoch": 1.0768481898387587, "grad_norm": 1.3485472202301025, "learning_rate": 9.089314079788899e-06, "loss": 0.3907, "step": 8849 }, { "epoch": 1.0769698813507758, "grad_norm": 1.5011563301086426, "learning_rate": 9.08737150901918e-06, "loss": 0.3879, "step": 8850 }, { "epoch": 1.0770915728627928, "grad_norm": 2.9348831176757812, "learning_rate": 9.085428972976863e-06, "loss": 0.3307, "step": 8851 }, { "epoch": 1.0772132643748098, "grad_norm": 1.3488210439682007, "learning_rate": 9.083486471735866e-06, "loss": 0.3311, "step": 8852 }, { "epoch": 1.0773349558868268, "grad_norm": 2.135638952255249, "learning_rate": 9.081544005370107e-06, "loss": 0.3848, "step": 8853 }, { "epoch": 1.0774566473988438, "grad_norm": 1.6729844808578491, "learning_rate": 9.0796015739535e-06, "loss": 0.4042, "step": 8854 }, { "epoch": 1.077578338910861, "grad_norm": 2.3693199157714844, "learning_rate": 9.077659177559957e-06, "loss": 0.4112, "step": 8855 }, { "epoch": 1.077700030422878, "grad_norm": 2.576648473739624, "learning_rate": 9.075716816263392e-06, "loss": 0.4163, "step": 8856 }, { "epoch": 1.0778217219348951, "grad_norm": 2.359023332595825, "learning_rate": 9.073774490137716e-06, "loss": 0.3977, "step": 8857 }, { "epoch": 1.0779434134469121, "grad_norm": 1.4538905620574951, "learning_rate": 9.071832199256836e-06, "loss": 0.3907, "step": 8858 }, { "epoch": 1.0780651049589292, "grad_norm": 2.0680110454559326, "learning_rate": 9.069889943694663e-06, "loss": 0.3331, "step": 8859 }, { "epoch": 1.0781867964709462, "grad_norm": 3.00321102142334, "learning_rate": 9.067947723525107e-06, "loss": 0.4598, "step": 8860 }, { "epoch": 1.0783084879829632, "grad_norm": 2.321582078933716, "learning_rate": 9.066005538822064e-06, "loss": 0.324, "step": 8861 }, { "epoch": 1.0784301794949802, "grad_norm": 1.5175471305847168, "learning_rate": 9.064063389659446e-06, "loss": 0.3803, "step": 8862 }, { "epoch": 1.0785518710069972, "grad_norm": 1.7753382921218872, "learning_rate": 9.062121276111152e-06, "loss": 0.451, "step": 8863 }, { "epoch": 1.0786735625190143, "grad_norm": 1.395018219947815, "learning_rate": 9.060179198251085e-06, "loss": 0.346, "step": 8864 }, { "epoch": 1.0787952540310313, "grad_norm": 1.4062145948410034, "learning_rate": 9.058237156153145e-06, "loss": 0.3591, "step": 8865 }, { "epoch": 1.0789169455430483, "grad_norm": 2.2314913272857666, "learning_rate": 9.05629514989123e-06, "loss": 0.3756, "step": 8866 }, { "epoch": 1.0790386370550653, "grad_norm": 2.2746989727020264, "learning_rate": 9.054353179539237e-06, "loss": 0.3447, "step": 8867 }, { "epoch": 1.0791603285670825, "grad_norm": 3.5027270317077637, "learning_rate": 9.052411245171066e-06, "loss": 0.3327, "step": 8868 }, { "epoch": 1.0792820200790996, "grad_norm": 2.264431953430176, "learning_rate": 9.050469346860606e-06, "loss": 0.3397, "step": 8869 }, { "epoch": 1.0794037115911166, "grad_norm": 1.7220628261566162, "learning_rate": 9.048527484681753e-06, "loss": 0.3998, "step": 8870 }, { "epoch": 1.0795254031031336, "grad_norm": 1.2057875394821167, "learning_rate": 9.046585658708398e-06, "loss": 0.4028, "step": 8871 }, { "epoch": 1.0796470946151506, "grad_norm": 1.6283847093582153, "learning_rate": 9.044643869014437e-06, "loss": 0.3383, "step": 8872 }, { "epoch": 1.0797687861271676, "grad_norm": 1.2450268268585205, "learning_rate": 9.042702115673751e-06, "loss": 0.3279, "step": 8873 }, { "epoch": 1.0798904776391847, "grad_norm": 1.7437647581100464, "learning_rate": 9.04076039876023e-06, "loss": 0.3888, "step": 8874 }, { "epoch": 1.0800121691512017, "grad_norm": 2.5526015758514404, "learning_rate": 9.038818718347761e-06, "loss": 0.4001, "step": 8875 }, { "epoch": 1.0801338606632187, "grad_norm": 2.628814458847046, "learning_rate": 9.036877074510233e-06, "loss": 0.4228, "step": 8876 }, { "epoch": 1.0802555521752357, "grad_norm": 1.5292574167251587, "learning_rate": 9.034935467321522e-06, "loss": 0.3849, "step": 8877 }, { "epoch": 1.0803772436872527, "grad_norm": 3.0046262741088867, "learning_rate": 9.032993896855514e-06, "loss": 0.4562, "step": 8878 }, { "epoch": 1.0804989351992698, "grad_norm": 1.7847601175308228, "learning_rate": 9.03105236318609e-06, "loss": 0.3832, "step": 8879 }, { "epoch": 1.080620626711287, "grad_norm": 2.2581450939178467, "learning_rate": 9.029110866387132e-06, "loss": 0.455, "step": 8880 }, { "epoch": 1.080742318223304, "grad_norm": 1.386810064315796, "learning_rate": 9.027169406532512e-06, "loss": 0.3573, "step": 8881 }, { "epoch": 1.080864009735321, "grad_norm": 1.7611544132232666, "learning_rate": 9.02522798369611e-06, "loss": 0.4436, "step": 8882 }, { "epoch": 1.080985701247338, "grad_norm": 1.5873936414718628, "learning_rate": 9.023286597951805e-06, "loss": 0.4315, "step": 8883 }, { "epoch": 1.081107392759355, "grad_norm": 2.20467209815979, "learning_rate": 9.021345249373465e-06, "loss": 0.4533, "step": 8884 }, { "epoch": 1.081229084271372, "grad_norm": 3.7632782459259033, "learning_rate": 9.019403938034962e-06, "loss": 0.4037, "step": 8885 }, { "epoch": 1.0813507757833891, "grad_norm": 4.266801357269287, "learning_rate": 9.01746266401017e-06, "loss": 0.3872, "step": 8886 }, { "epoch": 1.0814724672954061, "grad_norm": 5.662842273712158, "learning_rate": 9.015521427372955e-06, "loss": 0.3623, "step": 8887 }, { "epoch": 1.0815941588074232, "grad_norm": 4.647026538848877, "learning_rate": 9.013580228197191e-06, "loss": 0.367, "step": 8888 }, { "epoch": 1.0817158503194402, "grad_norm": 3.4710135459899902, "learning_rate": 9.011639066556737e-06, "loss": 0.3964, "step": 8889 }, { "epoch": 1.0818375418314572, "grad_norm": 3.1195731163024902, "learning_rate": 9.009697942525464e-06, "loss": 0.3439, "step": 8890 }, { "epoch": 1.0819592333434742, "grad_norm": 1.837060570716858, "learning_rate": 9.007756856177237e-06, "loss": 0.3648, "step": 8891 }, { "epoch": 1.0820809248554912, "grad_norm": 1.3897647857666016, "learning_rate": 9.005815807585912e-06, "loss": 0.4354, "step": 8892 }, { "epoch": 1.0822026163675083, "grad_norm": 1.2845149040222168, "learning_rate": 9.003874796825353e-06, "loss": 0.4081, "step": 8893 }, { "epoch": 1.0823243078795255, "grad_norm": 1.4223989248275757, "learning_rate": 9.001933823969424e-06, "loss": 0.3724, "step": 8894 }, { "epoch": 1.0824459993915425, "grad_norm": 3.233513116836548, "learning_rate": 8.999992889091977e-06, "loss": 0.4095, "step": 8895 }, { "epoch": 1.0825676909035595, "grad_norm": 1.3134852647781372, "learning_rate": 8.998051992266872e-06, "loss": 0.4355, "step": 8896 }, { "epoch": 1.0826893824155766, "grad_norm": 1.2544664144515991, "learning_rate": 8.99611113356796e-06, "loss": 0.3631, "step": 8897 }, { "epoch": 1.0828110739275936, "grad_norm": 2.711484432220459, "learning_rate": 8.994170313069095e-06, "loss": 0.4238, "step": 8898 }, { "epoch": 1.0829327654396106, "grad_norm": 1.5422803163528442, "learning_rate": 8.992229530844136e-06, "loss": 0.3907, "step": 8899 }, { "epoch": 1.0830544569516276, "grad_norm": 1.4914556741714478, "learning_rate": 8.990288786966927e-06, "loss": 0.3557, "step": 8900 }, { "epoch": 1.0831761484636446, "grad_norm": 2.2798070907592773, "learning_rate": 8.98834808151132e-06, "loss": 0.4193, "step": 8901 }, { "epoch": 1.0832978399756616, "grad_norm": 3.790067434310913, "learning_rate": 8.986407414551164e-06, "loss": 0.4607, "step": 8902 }, { "epoch": 1.0834195314876787, "grad_norm": 1.3658655881881714, "learning_rate": 8.984466786160302e-06, "loss": 0.4044, "step": 8903 }, { "epoch": 1.0835412229996957, "grad_norm": 1.631594181060791, "learning_rate": 8.98252619641258e-06, "loss": 0.4105, "step": 8904 }, { "epoch": 1.0836629145117127, "grad_norm": 2.061410427093506, "learning_rate": 8.980585645381844e-06, "loss": 0.3873, "step": 8905 }, { "epoch": 1.08378460602373, "grad_norm": 1.6519362926483154, "learning_rate": 8.978645133141936e-06, "loss": 0.4416, "step": 8906 }, { "epoch": 1.083906297535747, "grad_norm": 2.980961799621582, "learning_rate": 8.976704659766694e-06, "loss": 0.3371, "step": 8907 }, { "epoch": 1.084027989047764, "grad_norm": 2.6576199531555176, "learning_rate": 8.974764225329957e-06, "loss": 0.3535, "step": 8908 }, { "epoch": 1.084149680559781, "grad_norm": 1.69933021068573, "learning_rate": 8.972823829905561e-06, "loss": 0.4165, "step": 8909 }, { "epoch": 1.084271372071798, "grad_norm": 3.7051103115081787, "learning_rate": 8.970883473567348e-06, "loss": 0.3918, "step": 8910 }, { "epoch": 1.084393063583815, "grad_norm": 1.5553418397903442, "learning_rate": 8.968943156389146e-06, "loss": 0.4838, "step": 8911 }, { "epoch": 1.084514755095832, "grad_norm": 3.2772324085235596, "learning_rate": 8.967002878444791e-06, "loss": 0.3841, "step": 8912 }, { "epoch": 1.084636446607849, "grad_norm": 1.2527636289596558, "learning_rate": 8.965062639808116e-06, "loss": 0.3816, "step": 8913 }, { "epoch": 1.084758138119866, "grad_norm": 3.1844944953918457, "learning_rate": 8.963122440552951e-06, "loss": 0.3513, "step": 8914 }, { "epoch": 1.0848798296318831, "grad_norm": 2.1681220531463623, "learning_rate": 8.961182280753122e-06, "loss": 0.3885, "step": 8915 }, { "epoch": 1.0850015211439001, "grad_norm": 2.5879220962524414, "learning_rate": 8.959242160482456e-06, "loss": 0.4517, "step": 8916 }, { "epoch": 1.0851232126559172, "grad_norm": 1.6130682229995728, "learning_rate": 8.957302079814783e-06, "loss": 0.3424, "step": 8917 }, { "epoch": 1.0852449041679342, "grad_norm": 1.7708659172058105, "learning_rate": 8.955362038823926e-06, "loss": 0.3234, "step": 8918 }, { "epoch": 1.0853665956799514, "grad_norm": 5.493325710296631, "learning_rate": 8.953422037583702e-06, "loss": 0.4904, "step": 8919 }, { "epoch": 1.0854882871919684, "grad_norm": 4.483700752258301, "learning_rate": 8.951482076167935e-06, "loss": 0.4188, "step": 8920 }, { "epoch": 1.0856099787039855, "grad_norm": 2.5243887901306152, "learning_rate": 8.949542154650445e-06, "loss": 0.4062, "step": 8921 }, { "epoch": 1.0857316702160025, "grad_norm": 2.6309549808502197, "learning_rate": 8.947602273105055e-06, "loss": 0.4069, "step": 8922 }, { "epoch": 1.0858533617280195, "grad_norm": 4.536709785461426, "learning_rate": 8.945662431605573e-06, "loss": 0.4769, "step": 8923 }, { "epoch": 1.0859750532400365, "grad_norm": 5.025242328643799, "learning_rate": 8.943722630225817e-06, "loss": 0.4726, "step": 8924 }, { "epoch": 1.0860967447520535, "grad_norm": 2.740510940551758, "learning_rate": 8.941782869039604e-06, "loss": 0.3125, "step": 8925 }, { "epoch": 1.0862184362640706, "grad_norm": 1.3224332332611084, "learning_rate": 8.939843148120741e-06, "loss": 0.3817, "step": 8926 }, { "epoch": 1.0863401277760876, "grad_norm": 2.4443953037261963, "learning_rate": 8.937903467543042e-06, "loss": 0.4152, "step": 8927 }, { "epoch": 1.0864618192881046, "grad_norm": 1.2838761806488037, "learning_rate": 8.935963827380315e-06, "loss": 0.4078, "step": 8928 }, { "epoch": 1.0865835108001216, "grad_norm": 1.5518044233322144, "learning_rate": 8.934024227706366e-06, "loss": 0.3563, "step": 8929 }, { "epoch": 1.0867052023121386, "grad_norm": 1.638936996459961, "learning_rate": 8.932084668595005e-06, "loss": 0.3953, "step": 8930 }, { "epoch": 1.0868268938241559, "grad_norm": 4.243192672729492, "learning_rate": 8.930145150120028e-06, "loss": 0.3295, "step": 8931 }, { "epoch": 1.086948585336173, "grad_norm": 2.638798952102661, "learning_rate": 8.928205672355244e-06, "loss": 0.4957, "step": 8932 }, { "epoch": 1.08707027684819, "grad_norm": 3.9487528800964355, "learning_rate": 8.926266235374454e-06, "loss": 0.3609, "step": 8933 }, { "epoch": 1.087191968360207, "grad_norm": 3.4017183780670166, "learning_rate": 8.924326839251452e-06, "loss": 0.3467, "step": 8934 }, { "epoch": 1.087313659872224, "grad_norm": 1.3985438346862793, "learning_rate": 8.922387484060043e-06, "loss": 0.4121, "step": 8935 }, { "epoch": 1.087435351384241, "grad_norm": 1.3551188707351685, "learning_rate": 8.920448169874023e-06, "loss": 0.38, "step": 8936 }, { "epoch": 1.087557042896258, "grad_norm": 2.9014194011688232, "learning_rate": 8.918508896767181e-06, "loss": 0.3386, "step": 8937 }, { "epoch": 1.087678734408275, "grad_norm": 1.9644094705581665, "learning_rate": 8.916569664813315e-06, "loss": 0.3995, "step": 8938 }, { "epoch": 1.087800425920292, "grad_norm": 1.6576064825057983, "learning_rate": 8.914630474086216e-06, "loss": 0.3901, "step": 8939 }, { "epoch": 1.087922117432309, "grad_norm": 3.281888008117676, "learning_rate": 8.912691324659676e-06, "loss": 0.4632, "step": 8940 }, { "epoch": 1.088043808944326, "grad_norm": 1.4651509523391724, "learning_rate": 8.910752216607483e-06, "loss": 0.4087, "step": 8941 }, { "epoch": 1.088165500456343, "grad_norm": 1.5841832160949707, "learning_rate": 8.908813150003418e-06, "loss": 0.3013, "step": 8942 }, { "epoch": 1.08828719196836, "grad_norm": 1.4435245990753174, "learning_rate": 8.906874124921274e-06, "loss": 0.3077, "step": 8943 }, { "epoch": 1.0884088834803773, "grad_norm": 3.5181400775909424, "learning_rate": 8.904935141434833e-06, "loss": 0.4537, "step": 8944 }, { "epoch": 1.0885305749923944, "grad_norm": 2.530200242996216, "learning_rate": 8.902996199617875e-06, "loss": 0.4344, "step": 8945 }, { "epoch": 1.0886522665044114, "grad_norm": 1.2385213375091553, "learning_rate": 8.90105729954418e-06, "loss": 0.3527, "step": 8946 }, { "epoch": 1.0887739580164284, "grad_norm": 1.6580597162246704, "learning_rate": 8.899118441287532e-06, "loss": 0.3766, "step": 8947 }, { "epoch": 1.0888956495284454, "grad_norm": 1.090971827507019, "learning_rate": 8.897179624921706e-06, "loss": 0.356, "step": 8948 }, { "epoch": 1.0890173410404624, "grad_norm": 1.24204421043396, "learning_rate": 8.895240850520477e-06, "loss": 0.3309, "step": 8949 }, { "epoch": 1.0891390325524795, "grad_norm": 1.9583499431610107, "learning_rate": 8.893302118157619e-06, "loss": 0.429, "step": 8950 }, { "epoch": 1.0892607240644965, "grad_norm": 1.2250736951828003, "learning_rate": 8.891363427906911e-06, "loss": 0.3833, "step": 8951 }, { "epoch": 1.0893824155765135, "grad_norm": 1.339637279510498, "learning_rate": 8.88942477984212e-06, "loss": 0.3988, "step": 8952 }, { "epoch": 1.0895041070885305, "grad_norm": 2.841278314590454, "learning_rate": 8.88748617403701e-06, "loss": 0.2837, "step": 8953 }, { "epoch": 1.0896257986005475, "grad_norm": 1.1660181283950806, "learning_rate": 8.885547610565355e-06, "loss": 0.3354, "step": 8954 }, { "epoch": 1.0897474901125646, "grad_norm": 1.377590537071228, "learning_rate": 8.883609089500919e-06, "loss": 0.3999, "step": 8955 }, { "epoch": 1.0898691816245818, "grad_norm": 1.1790530681610107, "learning_rate": 8.881670610917471e-06, "loss": 0.3932, "step": 8956 }, { "epoch": 1.0899908731365988, "grad_norm": 1.4846265316009521, "learning_rate": 8.87973217488877e-06, "loss": 0.388, "step": 8957 }, { "epoch": 1.0901125646486158, "grad_norm": 1.8230562210083008, "learning_rate": 8.877793781488575e-06, "loss": 0.4082, "step": 8958 }, { "epoch": 1.0902342561606329, "grad_norm": 2.732302188873291, "learning_rate": 8.875855430790655e-06, "loss": 0.4456, "step": 8959 }, { "epoch": 1.0903559476726499, "grad_norm": 1.9497780799865723, "learning_rate": 8.87391712286876e-06, "loss": 0.3993, "step": 8960 }, { "epoch": 1.090477639184667, "grad_norm": 1.7974390983581543, "learning_rate": 8.871978857796648e-06, "loss": 0.3589, "step": 8961 }, { "epoch": 1.090599330696684, "grad_norm": 2.550435781478882, "learning_rate": 8.870040635648078e-06, "loss": 0.3376, "step": 8962 }, { "epoch": 1.090721022208701, "grad_norm": 1.7439709901809692, "learning_rate": 8.868102456496799e-06, "loss": 0.3484, "step": 8963 }, { "epoch": 1.090842713720718, "grad_norm": 1.7590044736862183, "learning_rate": 8.866164320416568e-06, "loss": 0.4406, "step": 8964 }, { "epoch": 1.090964405232735, "grad_norm": 1.5954684019088745, "learning_rate": 8.864226227481127e-06, "loss": 0.4353, "step": 8965 }, { "epoch": 1.091086096744752, "grad_norm": 1.9115084409713745, "learning_rate": 8.862288177764232e-06, "loss": 0.3942, "step": 8966 }, { "epoch": 1.091207788256769, "grad_norm": 1.9837453365325928, "learning_rate": 8.860350171339626e-06, "loss": 0.4004, "step": 8967 }, { "epoch": 1.091329479768786, "grad_norm": 1.2382985353469849, "learning_rate": 8.858412208281052e-06, "loss": 0.4126, "step": 8968 }, { "epoch": 1.0914511712808033, "grad_norm": 1.5390450954437256, "learning_rate": 8.856474288662258e-06, "loss": 0.3302, "step": 8969 }, { "epoch": 1.0915728627928203, "grad_norm": 1.4210050106048584, "learning_rate": 8.854536412556987e-06, "loss": 0.3664, "step": 8970 }, { "epoch": 1.0916945543048373, "grad_norm": 2.797977924346924, "learning_rate": 8.852598580038974e-06, "loss": 0.4233, "step": 8971 }, { "epoch": 1.0918162458168543, "grad_norm": 1.2185076475143433, "learning_rate": 8.850660791181958e-06, "loss": 0.329, "step": 8972 }, { "epoch": 1.0919379373288713, "grad_norm": 1.5773074626922607, "learning_rate": 8.84872304605968e-06, "loss": 0.3643, "step": 8973 }, { "epoch": 1.0920596288408884, "grad_norm": 1.7411830425262451, "learning_rate": 8.846785344745876e-06, "loss": 0.3819, "step": 8974 }, { "epoch": 1.0921813203529054, "grad_norm": 1.5986206531524658, "learning_rate": 8.844847687314276e-06, "loss": 0.3522, "step": 8975 }, { "epoch": 1.0923030118649224, "grad_norm": 2.6501529216766357, "learning_rate": 8.84291007383861e-06, "loss": 0.392, "step": 8976 }, { "epoch": 1.0924247033769394, "grad_norm": 1.5997583866119385, "learning_rate": 8.840972504392612e-06, "loss": 0.3987, "step": 8977 }, { "epoch": 1.0925463948889564, "grad_norm": 1.9058746099472046, "learning_rate": 8.83903497905001e-06, "loss": 0.4024, "step": 8978 }, { "epoch": 1.0926680864009735, "grad_norm": 2.0032706260681152, "learning_rate": 8.837097497884527e-06, "loss": 0.4052, "step": 8979 }, { "epoch": 1.0927897779129905, "grad_norm": 1.894484043121338, "learning_rate": 8.835160060969891e-06, "loss": 0.3937, "step": 8980 }, { "epoch": 1.0929114694250077, "grad_norm": 1.5778229236602783, "learning_rate": 8.833222668379828e-06, "loss": 0.4101, "step": 8981 }, { "epoch": 1.0930331609370247, "grad_norm": 4.399184703826904, "learning_rate": 8.83128532018806e-06, "loss": 0.3433, "step": 8982 }, { "epoch": 1.0931548524490418, "grad_norm": 1.858673095703125, "learning_rate": 8.829348016468299e-06, "loss": 0.3592, "step": 8983 }, { "epoch": 1.0932765439610588, "grad_norm": 1.6883916854858398, "learning_rate": 8.827410757294272e-06, "loss": 0.4269, "step": 8984 }, { "epoch": 1.0933982354730758, "grad_norm": 1.6884262561798096, "learning_rate": 8.825473542739695e-06, "loss": 0.3854, "step": 8985 }, { "epoch": 1.0935199269850928, "grad_norm": 1.9616135358810425, "learning_rate": 8.823536372878277e-06, "loss": 0.4414, "step": 8986 }, { "epoch": 1.0936416184971098, "grad_norm": 1.9091920852661133, "learning_rate": 8.821599247783742e-06, "loss": 0.4148, "step": 8987 }, { "epoch": 1.0937633100091269, "grad_norm": 1.4490082263946533, "learning_rate": 8.819662167529787e-06, "loss": 0.381, "step": 8988 }, { "epoch": 1.0938850015211439, "grad_norm": 1.411734938621521, "learning_rate": 8.817725132190132e-06, "loss": 0.4299, "step": 8989 }, { "epoch": 1.094006693033161, "grad_norm": 1.2066599130630493, "learning_rate": 8.815788141838484e-06, "loss": 0.3801, "step": 8990 }, { "epoch": 1.094128384545178, "grad_norm": 2.4151101112365723, "learning_rate": 8.813851196548547e-06, "loss": 0.3141, "step": 8991 }, { "epoch": 1.094250076057195, "grad_norm": 1.2600125074386597, "learning_rate": 8.811914296394027e-06, "loss": 0.3488, "step": 8992 }, { "epoch": 1.094371767569212, "grad_norm": 1.534321665763855, "learning_rate": 8.80997744144863e-06, "loss": 0.4236, "step": 8993 }, { "epoch": 1.094493459081229, "grad_norm": 2.4045729637145996, "learning_rate": 8.808040631786052e-06, "loss": 0.4665, "step": 8994 }, { "epoch": 1.0946151505932462, "grad_norm": 2.070873737335205, "learning_rate": 8.806103867479994e-06, "loss": 0.3362, "step": 8995 }, { "epoch": 1.0947368421052632, "grad_norm": 1.5666120052337646, "learning_rate": 8.804167148604159e-06, "loss": 0.4153, "step": 8996 }, { "epoch": 1.0948585336172802, "grad_norm": 1.8920365571975708, "learning_rate": 8.802230475232237e-06, "loss": 0.3489, "step": 8997 }, { "epoch": 1.0949802251292973, "grad_norm": 1.5304187536239624, "learning_rate": 8.800293847437928e-06, "loss": 0.3528, "step": 8998 }, { "epoch": 1.0951019166413143, "grad_norm": 1.4707058668136597, "learning_rate": 8.798357265294916e-06, "loss": 0.3852, "step": 8999 }, { "epoch": 1.0952236081533313, "grad_norm": 1.4516830444335938, "learning_rate": 8.796420728876898e-06, "loss": 0.3903, "step": 9000 }, { "epoch": 1.0953452996653483, "grad_norm": 2.015927314758301, "learning_rate": 8.794484238257568e-06, "loss": 0.3625, "step": 9001 }, { "epoch": 1.0954669911773653, "grad_norm": 1.454624056816101, "learning_rate": 8.792547793510602e-06, "loss": 0.3123, "step": 9002 }, { "epoch": 1.0955886826893824, "grad_norm": 1.3874398469924927, "learning_rate": 8.790611394709693e-06, "loss": 0.3987, "step": 9003 }, { "epoch": 1.0957103742013994, "grad_norm": 1.4493876695632935, "learning_rate": 8.788675041928525e-06, "loss": 0.3916, "step": 9004 }, { "epoch": 1.0958320657134164, "grad_norm": 1.4685176610946655, "learning_rate": 8.786738735240777e-06, "loss": 0.3743, "step": 9005 }, { "epoch": 1.0959537572254334, "grad_norm": 2.2720980644226074, "learning_rate": 8.784802474720133e-06, "loss": 0.4268, "step": 9006 }, { "epoch": 1.0960754487374507, "grad_norm": 1.6242934465408325, "learning_rate": 8.782866260440268e-06, "loss": 0.3399, "step": 9007 }, { "epoch": 1.0961971402494677, "grad_norm": 2.1982522010803223, "learning_rate": 8.780930092474866e-06, "loss": 0.4336, "step": 9008 }, { "epoch": 1.0963188317614847, "grad_norm": 1.506544589996338, "learning_rate": 8.778993970897593e-06, "loss": 0.3508, "step": 9009 }, { "epoch": 1.0964405232735017, "grad_norm": 2.265421152114868, "learning_rate": 8.777057895782131e-06, "loss": 0.4363, "step": 9010 }, { "epoch": 1.0965622147855187, "grad_norm": 5.215832233428955, "learning_rate": 8.775121867202144e-06, "loss": 0.3515, "step": 9011 }, { "epoch": 1.0966839062975358, "grad_norm": 2.833876609802246, "learning_rate": 8.773185885231307e-06, "loss": 0.3791, "step": 9012 }, { "epoch": 1.0968055978095528, "grad_norm": 1.7682383060455322, "learning_rate": 8.771249949943285e-06, "loss": 0.4235, "step": 9013 }, { "epoch": 1.0969272893215698, "grad_norm": 1.5311999320983887, "learning_rate": 8.769314061411746e-06, "loss": 0.4589, "step": 9014 }, { "epoch": 1.0970489808335868, "grad_norm": 3.27245831489563, "learning_rate": 8.767378219710353e-06, "loss": 0.3361, "step": 9015 }, { "epoch": 1.0971706723456038, "grad_norm": 1.8526626825332642, "learning_rate": 8.765442424912774e-06, "loss": 0.384, "step": 9016 }, { "epoch": 1.0972923638576209, "grad_norm": 1.8122971057891846, "learning_rate": 8.763506677092662e-06, "loss": 0.3677, "step": 9017 }, { "epoch": 1.0974140553696379, "grad_norm": 1.3476673364639282, "learning_rate": 8.76157097632368e-06, "loss": 0.419, "step": 9018 }, { "epoch": 1.097535746881655, "grad_norm": 1.5779483318328857, "learning_rate": 8.75963532267949e-06, "loss": 0.3504, "step": 9019 }, { "epoch": 1.0976574383936721, "grad_norm": 4.563393592834473, "learning_rate": 8.757699716233742e-06, "loss": 0.3593, "step": 9020 }, { "epoch": 1.0977791299056892, "grad_norm": 2.21390700340271, "learning_rate": 8.755764157060093e-06, "loss": 0.3094, "step": 9021 }, { "epoch": 1.0979008214177062, "grad_norm": 2.7866342067718506, "learning_rate": 8.75382864523219e-06, "loss": 0.3956, "step": 9022 }, { "epoch": 1.0980225129297232, "grad_norm": 1.9960756301879883, "learning_rate": 8.751893180823686e-06, "loss": 0.3557, "step": 9023 }, { "epoch": 1.0981442044417402, "grad_norm": 1.3319308757781982, "learning_rate": 8.749957763908232e-06, "loss": 0.3524, "step": 9024 }, { "epoch": 1.0982658959537572, "grad_norm": 1.9648208618164062, "learning_rate": 8.748022394559472e-06, "loss": 0.4304, "step": 9025 }, { "epoch": 1.0983875874657743, "grad_norm": 1.5563209056854248, "learning_rate": 8.74608707285105e-06, "loss": 0.3136, "step": 9026 }, { "epoch": 1.0985092789777913, "grad_norm": 2.40421724319458, "learning_rate": 8.744151798856613e-06, "loss": 0.4276, "step": 9027 }, { "epoch": 1.0986309704898083, "grad_norm": 2.3067102432250977, "learning_rate": 8.742216572649797e-06, "loss": 0.376, "step": 9028 }, { "epoch": 1.0987526620018253, "grad_norm": 3.315098762512207, "learning_rate": 8.740281394304243e-06, "loss": 0.4297, "step": 9029 }, { "epoch": 1.0988743535138423, "grad_norm": 1.6769059896469116, "learning_rate": 8.738346263893592e-06, "loss": 0.381, "step": 9030 }, { "epoch": 1.0989960450258593, "grad_norm": 2.220285177230835, "learning_rate": 8.736411181491476e-06, "loss": 0.4493, "step": 9031 }, { "epoch": 1.0991177365378766, "grad_norm": 2.3177056312561035, "learning_rate": 8.73447614717153e-06, "loss": 0.387, "step": 9032 }, { "epoch": 1.0992394280498936, "grad_norm": 2.4205918312072754, "learning_rate": 8.732541161007389e-06, "loss": 0.3581, "step": 9033 }, { "epoch": 1.0993611195619106, "grad_norm": 1.4935222864151, "learning_rate": 8.730606223072678e-06, "loss": 0.3938, "step": 9034 }, { "epoch": 1.0994828110739276, "grad_norm": 3.0986626148223877, "learning_rate": 8.728671333441027e-06, "loss": 0.4781, "step": 9035 }, { "epoch": 1.0996045025859447, "grad_norm": 2.061501979827881, "learning_rate": 8.726736492186062e-06, "loss": 0.4442, "step": 9036 }, { "epoch": 1.0997261940979617, "grad_norm": 1.3886741399765015, "learning_rate": 8.72480169938141e-06, "loss": 0.4234, "step": 9037 }, { "epoch": 1.0998478856099787, "grad_norm": 2.325425148010254, "learning_rate": 8.722866955100697e-06, "loss": 0.4441, "step": 9038 }, { "epoch": 1.0999695771219957, "grad_norm": 2.2290287017822266, "learning_rate": 8.720932259417536e-06, "loss": 0.3339, "step": 9039 }, { "epoch": 1.1000912686340127, "grad_norm": 3.4437472820281982, "learning_rate": 8.718997612405548e-06, "loss": 0.3829, "step": 9040 }, { "epoch": 1.1002129601460298, "grad_norm": 1.9450740814208984, "learning_rate": 8.717063014138354e-06, "loss": 0.4499, "step": 9041 }, { "epoch": 1.1003346516580468, "grad_norm": 2.27532696723938, "learning_rate": 8.71512846468957e-06, "loss": 0.4174, "step": 9042 }, { "epoch": 1.1004563431700638, "grad_norm": 1.454737663269043, "learning_rate": 8.713193964132805e-06, "loss": 0.4493, "step": 9043 }, { "epoch": 1.1005780346820808, "grad_norm": 1.8113757371902466, "learning_rate": 8.711259512541678e-06, "loss": 0.3786, "step": 9044 }, { "epoch": 1.100699726194098, "grad_norm": 3.0968167781829834, "learning_rate": 8.70932510998979e-06, "loss": 0.3427, "step": 9045 }, { "epoch": 1.100821417706115, "grad_norm": 1.5274698734283447, "learning_rate": 8.707390756550755e-06, "loss": 0.3505, "step": 9046 }, { "epoch": 1.100943109218132, "grad_norm": 1.8361154794692993, "learning_rate": 8.705456452298175e-06, "loss": 0.3611, "step": 9047 }, { "epoch": 1.1010648007301491, "grad_norm": 1.2664860486984253, "learning_rate": 8.703522197305657e-06, "loss": 0.3678, "step": 9048 }, { "epoch": 1.1011864922421661, "grad_norm": 1.4269071817398071, "learning_rate": 8.701587991646802e-06, "loss": 0.3288, "step": 9049 }, { "epoch": 1.1013081837541832, "grad_norm": 2.48342227935791, "learning_rate": 8.699653835395215e-06, "loss": 0.2848, "step": 9050 }, { "epoch": 1.1014298752662002, "grad_norm": 1.5357853174209595, "learning_rate": 8.697719728624487e-06, "loss": 0.316, "step": 9051 }, { "epoch": 1.1015515667782172, "grad_norm": 2.211686372756958, "learning_rate": 8.69578567140822e-06, "loss": 0.3841, "step": 9052 }, { "epoch": 1.1016732582902342, "grad_norm": 4.63115930557251, "learning_rate": 8.69385166382001e-06, "loss": 0.4341, "step": 9053 }, { "epoch": 1.1017949498022512, "grad_norm": 4.628605365753174, "learning_rate": 8.691917705933445e-06, "loss": 0.3662, "step": 9054 }, { "epoch": 1.1019166413142683, "grad_norm": 3.7070493698120117, "learning_rate": 8.68998379782212e-06, "loss": 0.3668, "step": 9055 }, { "epoch": 1.1020383328262853, "grad_norm": 5.416265964508057, "learning_rate": 8.688049939559626e-06, "loss": 0.4415, "step": 9056 }, { "epoch": 1.1021600243383025, "grad_norm": 2.968954086303711, "learning_rate": 8.686116131219544e-06, "loss": 0.3658, "step": 9057 }, { "epoch": 1.1022817158503195, "grad_norm": 4.6864519119262695, "learning_rate": 8.684182372875465e-06, "loss": 0.432, "step": 9058 }, { "epoch": 1.1024034073623366, "grad_norm": 4.840615749359131, "learning_rate": 8.682248664600969e-06, "loss": 0.5121, "step": 9059 }, { "epoch": 1.1025250988743536, "grad_norm": 2.544372797012329, "learning_rate": 8.680315006469638e-06, "loss": 0.3224, "step": 9060 }, { "epoch": 1.1026467903863706, "grad_norm": 1.6478246450424194, "learning_rate": 8.678381398555054e-06, "loss": 0.3644, "step": 9061 }, { "epoch": 1.1027684818983876, "grad_norm": 2.746987819671631, "learning_rate": 8.676447840930792e-06, "loss": 0.3744, "step": 9062 }, { "epoch": 1.1028901734104046, "grad_norm": 2.1918320655822754, "learning_rate": 8.674514333670428e-06, "loss": 0.3869, "step": 9063 }, { "epoch": 1.1030118649224216, "grad_norm": 1.4808180332183838, "learning_rate": 8.672580876847542e-06, "loss": 0.411, "step": 9064 }, { "epoch": 1.1031335564344387, "grad_norm": 1.4325323104858398, "learning_rate": 8.670647470535698e-06, "loss": 0.4233, "step": 9065 }, { "epoch": 1.1032552479464557, "grad_norm": 1.5158377885818481, "learning_rate": 8.668714114808467e-06, "loss": 0.4147, "step": 9066 }, { "epoch": 1.1033769394584727, "grad_norm": 1.4325822591781616, "learning_rate": 8.666780809739427e-06, "loss": 0.3805, "step": 9067 }, { "epoch": 1.1034986309704897, "grad_norm": 2.2317309379577637, "learning_rate": 8.66484755540213e-06, "loss": 0.4299, "step": 9068 }, { "epoch": 1.1036203224825067, "grad_norm": 2.6835567951202393, "learning_rate": 8.66291435187015e-06, "loss": 0.3028, "step": 9069 }, { "epoch": 1.103742013994524, "grad_norm": 3.496803045272827, "learning_rate": 8.660981199217047e-06, "loss": 0.3697, "step": 9070 }, { "epoch": 1.103863705506541, "grad_norm": 2.323664903640747, "learning_rate": 8.659048097516378e-06, "loss": 0.365, "step": 9071 }, { "epoch": 1.103985397018558, "grad_norm": 2.18013596534729, "learning_rate": 8.657115046841708e-06, "loss": 0.346, "step": 9072 }, { "epoch": 1.104107088530575, "grad_norm": 2.304213047027588, "learning_rate": 8.655182047266587e-06, "loss": 0.3382, "step": 9073 }, { "epoch": 1.104228780042592, "grad_norm": 1.591457486152649, "learning_rate": 8.653249098864574e-06, "loss": 0.385, "step": 9074 }, { "epoch": 1.104350471554609, "grad_norm": 1.8094230890274048, "learning_rate": 8.65131620170922e-06, "loss": 0.3944, "step": 9075 }, { "epoch": 1.104472163066626, "grad_norm": 1.4685986042022705, "learning_rate": 8.649383355874077e-06, "loss": 0.3502, "step": 9076 }, { "epoch": 1.1045938545786431, "grad_norm": 2.0377144813537598, "learning_rate": 8.647450561432692e-06, "loss": 0.4188, "step": 9077 }, { "epoch": 1.1047155460906601, "grad_norm": 1.2891614437103271, "learning_rate": 8.645517818458611e-06, "loss": 0.3349, "step": 9078 }, { "epoch": 1.1048372376026772, "grad_norm": 1.2488749027252197, "learning_rate": 8.643585127025388e-06, "loss": 0.3321, "step": 9079 }, { "epoch": 1.1049589291146942, "grad_norm": 4.072513103485107, "learning_rate": 8.641652487206552e-06, "loss": 0.4602, "step": 9080 }, { "epoch": 1.1050806206267112, "grad_norm": 2.200491189956665, "learning_rate": 8.639719899075654e-06, "loss": 0.393, "step": 9081 }, { "epoch": 1.1052023121387284, "grad_norm": 1.5780788660049438, "learning_rate": 8.637787362706227e-06, "loss": 0.3448, "step": 9082 }, { "epoch": 1.1053240036507455, "grad_norm": 2.6987144947052, "learning_rate": 8.63585487817181e-06, "loss": 0.4521, "step": 9083 }, { "epoch": 1.1054456951627625, "grad_norm": 1.9878219366073608, "learning_rate": 8.63392244554594e-06, "loss": 0.4167, "step": 9084 }, { "epoch": 1.1055673866747795, "grad_norm": 1.4324803352355957, "learning_rate": 8.631990064902147e-06, "loss": 0.3928, "step": 9085 }, { "epoch": 1.1056890781867965, "grad_norm": 1.9192440509796143, "learning_rate": 8.630057736313964e-06, "loss": 0.3828, "step": 9086 }, { "epoch": 1.1058107696988135, "grad_norm": 2.194176197052002, "learning_rate": 8.628125459854922e-06, "loss": 0.3858, "step": 9087 }, { "epoch": 1.1059324612108306, "grad_norm": 3.5617263317108154, "learning_rate": 8.626193235598543e-06, "loss": 0.3727, "step": 9088 }, { "epoch": 1.1060541527228476, "grad_norm": 4.5613112449646, "learning_rate": 8.624261063618356e-06, "loss": 0.3473, "step": 9089 }, { "epoch": 1.1061758442348646, "grad_norm": 2.2122268676757812, "learning_rate": 8.622328943987886e-06, "loss": 0.4251, "step": 9090 }, { "epoch": 1.1062975357468816, "grad_norm": 2.576096534729004, "learning_rate": 8.620396876780648e-06, "loss": 0.4217, "step": 9091 }, { "epoch": 1.1064192272588986, "grad_norm": 2.0854804515838623, "learning_rate": 8.618464862070168e-06, "loss": 0.3648, "step": 9092 }, { "epoch": 1.1065409187709156, "grad_norm": 1.4130666255950928, "learning_rate": 8.616532899929955e-06, "loss": 0.3559, "step": 9093 }, { "epoch": 1.1066626102829327, "grad_norm": 1.5977747440338135, "learning_rate": 8.61460099043353e-06, "loss": 0.3893, "step": 9094 }, { "epoch": 1.1067843017949497, "grad_norm": 2.612947463989258, "learning_rate": 8.612669133654406e-06, "loss": 0.4082, "step": 9095 }, { "epoch": 1.106905993306967, "grad_norm": 1.413270354270935, "learning_rate": 8.610737329666092e-06, "loss": 0.3817, "step": 9096 }, { "epoch": 1.107027684818984, "grad_norm": 1.6126654148101807, "learning_rate": 8.608805578542099e-06, "loss": 0.3163, "step": 9097 }, { "epoch": 1.107149376331001, "grad_norm": 2.7629494667053223, "learning_rate": 8.606873880355933e-06, "loss": 0.4096, "step": 9098 }, { "epoch": 1.107271067843018, "grad_norm": 2.252469778060913, "learning_rate": 8.6049422351811e-06, "loss": 0.3871, "step": 9099 }, { "epoch": 1.107392759355035, "grad_norm": 2.1956517696380615, "learning_rate": 8.603010643091101e-06, "loss": 0.3415, "step": 9100 }, { "epoch": 1.107514450867052, "grad_norm": 1.7556936740875244, "learning_rate": 8.60107910415944e-06, "loss": 0.3619, "step": 9101 }, { "epoch": 1.107636142379069, "grad_norm": 2.947998046875, "learning_rate": 8.599147618459618e-06, "loss": 0.4105, "step": 9102 }, { "epoch": 1.107757833891086, "grad_norm": 2.2991061210632324, "learning_rate": 8.597216186065125e-06, "loss": 0.3917, "step": 9103 }, { "epoch": 1.107879525403103, "grad_norm": 1.6912895441055298, "learning_rate": 8.59528480704946e-06, "loss": 0.3173, "step": 9104 }, { "epoch": 1.10800121691512, "grad_norm": 1.5079476833343506, "learning_rate": 8.593353481486115e-06, "loss": 0.3756, "step": 9105 }, { "epoch": 1.1081229084271371, "grad_norm": 2.070499897003174, "learning_rate": 8.591422209448582e-06, "loss": 0.3856, "step": 9106 }, { "epoch": 1.1082445999391541, "grad_norm": 1.5848976373672485, "learning_rate": 8.589490991010351e-06, "loss": 0.3934, "step": 9107 }, { "epoch": 1.1083662914511714, "grad_norm": 2.169907569885254, "learning_rate": 8.587559826244904e-06, "loss": 0.3406, "step": 9108 }, { "epoch": 1.1084879829631884, "grad_norm": 1.6242631673812866, "learning_rate": 8.585628715225731e-06, "loss": 0.4194, "step": 9109 }, { "epoch": 1.1086096744752054, "grad_norm": 1.4864424467086792, "learning_rate": 8.583697658026316e-06, "loss": 0.4087, "step": 9110 }, { "epoch": 1.1087313659872224, "grad_norm": 2.291318655014038, "learning_rate": 8.581766654720132e-06, "loss": 0.3521, "step": 9111 }, { "epoch": 1.1088530574992395, "grad_norm": 1.442834496498108, "learning_rate": 8.579835705380664e-06, "loss": 0.3855, "step": 9112 }, { "epoch": 1.1089747490112565, "grad_norm": 1.7348870038986206, "learning_rate": 8.57790481008139e-06, "loss": 0.405, "step": 9113 }, { "epoch": 1.1090964405232735, "grad_norm": 1.9099787473678589, "learning_rate": 8.57597396889578e-06, "loss": 0.4049, "step": 9114 }, { "epoch": 1.1092181320352905, "grad_norm": 1.8496531248092651, "learning_rate": 8.574043181897309e-06, "loss": 0.4312, "step": 9115 }, { "epoch": 1.1093398235473075, "grad_norm": 1.6800591945648193, "learning_rate": 8.572112449159442e-06, "loss": 0.3626, "step": 9116 }, { "epoch": 1.1094615150593246, "grad_norm": 1.8353216648101807, "learning_rate": 8.570181770755654e-06, "loss": 0.4022, "step": 9117 }, { "epoch": 1.1095832065713416, "grad_norm": 1.3841429948806763, "learning_rate": 8.56825114675941e-06, "loss": 0.3756, "step": 9118 }, { "epoch": 1.1097048980833586, "grad_norm": 1.9450618028640747, "learning_rate": 8.566320577244174e-06, "loss": 0.3856, "step": 9119 }, { "epoch": 1.1098265895953756, "grad_norm": 1.8709648847579956, "learning_rate": 8.564390062283405e-06, "loss": 0.3316, "step": 9120 }, { "epoch": 1.1099482811073929, "grad_norm": 2.1731176376342773, "learning_rate": 8.562459601950568e-06, "loss": 0.446, "step": 9121 }, { "epoch": 1.1100699726194099, "grad_norm": 1.196104645729065, "learning_rate": 8.560529196319118e-06, "loss": 0.3708, "step": 9122 }, { "epoch": 1.110191664131427, "grad_norm": 2.1624343395233154, "learning_rate": 8.558598845462512e-06, "loss": 0.3673, "step": 9123 }, { "epoch": 1.110313355643444, "grad_norm": 1.9917705059051514, "learning_rate": 8.556668549454203e-06, "loss": 0.3972, "step": 9124 }, { "epoch": 1.110435047155461, "grad_norm": 1.3727229833602905, "learning_rate": 8.554738308367645e-06, "loss": 0.3699, "step": 9125 }, { "epoch": 1.110556738667478, "grad_norm": 1.489043116569519, "learning_rate": 8.552808122276286e-06, "loss": 0.3719, "step": 9126 }, { "epoch": 1.110678430179495, "grad_norm": 1.4416900873184204, "learning_rate": 8.55087799125357e-06, "loss": 0.3998, "step": 9127 }, { "epoch": 1.110800121691512, "grad_norm": 1.2847437858581543, "learning_rate": 8.548947915372947e-06, "loss": 0.3837, "step": 9128 }, { "epoch": 1.110921813203529, "grad_norm": 2.7136335372924805, "learning_rate": 8.54701789470786e-06, "loss": 0.4881, "step": 9129 }, { "epoch": 1.111043504715546, "grad_norm": 1.9693403244018555, "learning_rate": 8.545087929331751e-06, "loss": 0.4192, "step": 9130 }, { "epoch": 1.111165196227563, "grad_norm": 2.21697735786438, "learning_rate": 8.543158019318053e-06, "loss": 0.3393, "step": 9131 }, { "epoch": 1.11128688773958, "grad_norm": 1.8898667097091675, "learning_rate": 8.541228164740214e-06, "loss": 0.4462, "step": 9132 }, { "epoch": 1.1114085792515973, "grad_norm": 2.183990955352783, "learning_rate": 8.53929836567166e-06, "loss": 0.4004, "step": 9133 }, { "epoch": 1.1115302707636143, "grad_norm": 1.8716946840286255, "learning_rate": 8.537368622185825e-06, "loss": 0.4424, "step": 9134 }, { "epoch": 1.1116519622756313, "grad_norm": 1.44870924949646, "learning_rate": 8.535438934356142e-06, "loss": 0.39, "step": 9135 }, { "epoch": 1.1117736537876484, "grad_norm": 1.6148954629898071, "learning_rate": 8.533509302256042e-06, "loss": 0.3959, "step": 9136 }, { "epoch": 1.1118953452996654, "grad_norm": 3.0691044330596924, "learning_rate": 8.531579725958951e-06, "loss": 0.479, "step": 9137 }, { "epoch": 1.1120170368116824, "grad_norm": 2.1254775524139404, "learning_rate": 8.529650205538285e-06, "loss": 0.47, "step": 9138 }, { "epoch": 1.1121387283236994, "grad_norm": 2.366456985473633, "learning_rate": 8.527720741067473e-06, "loss": 0.4024, "step": 9139 }, { "epoch": 1.1122604198357164, "grad_norm": 1.6609021425247192, "learning_rate": 8.525791332619934e-06, "loss": 0.4353, "step": 9140 }, { "epoch": 1.1123821113477335, "grad_norm": 2.807094097137451, "learning_rate": 8.523861980269088e-06, "loss": 0.2954, "step": 9141 }, { "epoch": 1.1125038028597505, "grad_norm": 2.602961540222168, "learning_rate": 8.521932684088348e-06, "loss": 0.3672, "step": 9142 }, { "epoch": 1.1126254943717675, "grad_norm": 1.7842473983764648, "learning_rate": 8.520003444151128e-06, "loss": 0.3832, "step": 9143 }, { "epoch": 1.1127471858837845, "grad_norm": 2.3027572631835938, "learning_rate": 8.518074260530842e-06, "loss": 0.4034, "step": 9144 }, { "epoch": 1.1128688773958015, "grad_norm": 2.4500296115875244, "learning_rate": 8.516145133300896e-06, "loss": 0.355, "step": 9145 }, { "epoch": 1.1129905689078188, "grad_norm": 2.3904201984405518, "learning_rate": 8.514216062534702e-06, "loss": 0.4425, "step": 9146 }, { "epoch": 1.1131122604198358, "grad_norm": 2.657768964767456, "learning_rate": 8.512287048305661e-06, "loss": 0.3289, "step": 9147 }, { "epoch": 1.1132339519318528, "grad_norm": 2.2551615238189697, "learning_rate": 8.510358090687178e-06, "loss": 0.4052, "step": 9148 }, { "epoch": 1.1133556434438698, "grad_norm": 2.1249330043792725, "learning_rate": 8.508429189752654e-06, "loss": 0.355, "step": 9149 }, { "epoch": 1.1134773349558869, "grad_norm": 4.088589191436768, "learning_rate": 8.506500345575484e-06, "loss": 0.4759, "step": 9150 }, { "epoch": 1.1135990264679039, "grad_norm": 2.492638349533081, "learning_rate": 8.504571558229067e-06, "loss": 0.3861, "step": 9151 }, { "epoch": 1.113720717979921, "grad_norm": 2.263943910598755, "learning_rate": 8.502642827786799e-06, "loss": 0.3854, "step": 9152 }, { "epoch": 1.113842409491938, "grad_norm": 2.477700710296631, "learning_rate": 8.500714154322069e-06, "loss": 0.4286, "step": 9153 }, { "epoch": 1.113964101003955, "grad_norm": 1.5313174724578857, "learning_rate": 8.49878553790827e-06, "loss": 0.3613, "step": 9154 }, { "epoch": 1.114085792515972, "grad_norm": 2.7664523124694824, "learning_rate": 8.496856978618788e-06, "loss": 0.3821, "step": 9155 }, { "epoch": 1.114207484027989, "grad_norm": 1.5243641138076782, "learning_rate": 8.49492847652701e-06, "loss": 0.3569, "step": 9156 }, { "epoch": 1.114329175540006, "grad_norm": 1.43416166305542, "learning_rate": 8.493000031706318e-06, "loss": 0.3354, "step": 9157 }, { "epoch": 1.1144508670520232, "grad_norm": 1.70289945602417, "learning_rate": 8.491071644230095e-06, "loss": 0.3995, "step": 9158 }, { "epoch": 1.1145725585640402, "grad_norm": 1.881446361541748, "learning_rate": 8.489143314171719e-06, "loss": 0.3974, "step": 9159 }, { "epoch": 1.1146942500760573, "grad_norm": 1.7493022680282593, "learning_rate": 8.487215041604571e-06, "loss": 0.4416, "step": 9160 }, { "epoch": 1.1148159415880743, "grad_norm": 1.911035180091858, "learning_rate": 8.485286826602018e-06, "loss": 0.4283, "step": 9161 }, { "epoch": 1.1149376331000913, "grad_norm": 3.0140066146850586, "learning_rate": 8.483358669237434e-06, "loss": 0.3294, "step": 9162 }, { "epoch": 1.1150593246121083, "grad_norm": 1.4261788129806519, "learning_rate": 8.481430569584197e-06, "loss": 0.4374, "step": 9163 }, { "epoch": 1.1151810161241253, "grad_norm": 2.5744519233703613, "learning_rate": 8.479502527715666e-06, "loss": 0.3847, "step": 9164 }, { "epoch": 1.1153027076361424, "grad_norm": 3.1720798015594482, "learning_rate": 8.477574543705211e-06, "loss": 0.4055, "step": 9165 }, { "epoch": 1.1154243991481594, "grad_norm": 3.349222421646118, "learning_rate": 8.475646617626198e-06, "loss": 0.3601, "step": 9166 }, { "epoch": 1.1155460906601764, "grad_norm": 2.2097604274749756, "learning_rate": 8.473718749551987e-06, "loss": 0.4389, "step": 9167 }, { "epoch": 1.1156677821721934, "grad_norm": 2.555494546890259, "learning_rate": 8.471790939555935e-06, "loss": 0.3649, "step": 9168 }, { "epoch": 1.1157894736842104, "grad_norm": 1.521283745765686, "learning_rate": 8.4698631877114e-06, "loss": 0.4093, "step": 9169 }, { "epoch": 1.1159111651962275, "grad_norm": 1.1920220851898193, "learning_rate": 8.467935494091743e-06, "loss": 0.3774, "step": 9170 }, { "epoch": 1.1160328567082447, "grad_norm": 2.5965051651000977, "learning_rate": 8.46600785877031e-06, "loss": 0.3704, "step": 9171 }, { "epoch": 1.1161545482202617, "grad_norm": 1.5529069900512695, "learning_rate": 8.464080281820453e-06, "loss": 0.394, "step": 9172 }, { "epoch": 1.1162762397322787, "grad_norm": 2.5242857933044434, "learning_rate": 8.462152763315516e-06, "loss": 0.3802, "step": 9173 }, { "epoch": 1.1163979312442958, "grad_norm": 1.7580677270889282, "learning_rate": 8.460225303328854e-06, "loss": 0.3439, "step": 9174 }, { "epoch": 1.1165196227563128, "grad_norm": 1.6584011316299438, "learning_rate": 8.458297901933805e-06, "loss": 0.3621, "step": 9175 }, { "epoch": 1.1166413142683298, "grad_norm": 2.3109233379364014, "learning_rate": 8.45637055920371e-06, "loss": 0.3843, "step": 9176 }, { "epoch": 1.1167630057803468, "grad_norm": 2.325183629989624, "learning_rate": 8.454443275211912e-06, "loss": 0.4008, "step": 9177 }, { "epoch": 1.1168846972923638, "grad_norm": 2.5963947772979736, "learning_rate": 8.452516050031749e-06, "loss": 0.4097, "step": 9178 }, { "epoch": 1.1170063888043809, "grad_norm": 3.48311185836792, "learning_rate": 8.45058888373655e-06, "loss": 0.4332, "step": 9179 }, { "epoch": 1.1171280803163979, "grad_norm": 3.0110666751861572, "learning_rate": 8.448661776399652e-06, "loss": 0.3904, "step": 9180 }, { "epoch": 1.117249771828415, "grad_norm": 4.495974063873291, "learning_rate": 8.446734728094386e-06, "loss": 0.4892, "step": 9181 }, { "epoch": 1.117371463340432, "grad_norm": 2.3228940963745117, "learning_rate": 8.444807738894077e-06, "loss": 0.3903, "step": 9182 }, { "epoch": 1.1174931548524492, "grad_norm": 1.5135903358459473, "learning_rate": 8.442880808872057e-06, "loss": 0.3918, "step": 9183 }, { "epoch": 1.1176148463644662, "grad_norm": 2.5475640296936035, "learning_rate": 8.44095393810164e-06, "loss": 0.4213, "step": 9184 }, { "epoch": 1.1177365378764832, "grad_norm": 3.3910868167877197, "learning_rate": 8.439027126656151e-06, "loss": 0.2829, "step": 9185 }, { "epoch": 1.1178582293885002, "grad_norm": 2.7618460655212402, "learning_rate": 8.437100374608916e-06, "loss": 0.3769, "step": 9186 }, { "epoch": 1.1179799209005172, "grad_norm": 1.492190957069397, "learning_rate": 8.435173682033245e-06, "loss": 0.426, "step": 9187 }, { "epoch": 1.1181016124125343, "grad_norm": 2.698499917984009, "learning_rate": 8.433247049002453e-06, "loss": 0.3676, "step": 9188 }, { "epoch": 1.1182233039245513, "grad_norm": 1.435552716255188, "learning_rate": 8.431320475589855e-06, "loss": 0.4303, "step": 9189 }, { "epoch": 1.1183449954365683, "grad_norm": 2.180490016937256, "learning_rate": 8.42939396186876e-06, "loss": 0.3763, "step": 9190 }, { "epoch": 1.1184666869485853, "grad_norm": 1.9120277166366577, "learning_rate": 8.427467507912473e-06, "loss": 0.3883, "step": 9191 }, { "epoch": 1.1185883784606023, "grad_norm": 3.7214653491973877, "learning_rate": 8.425541113794304e-06, "loss": 0.3713, "step": 9192 }, { "epoch": 1.1187100699726193, "grad_norm": 1.3134238719940186, "learning_rate": 8.423614779587556e-06, "loss": 0.401, "step": 9193 }, { "epoch": 1.1188317614846364, "grad_norm": 1.6264400482177734, "learning_rate": 8.421688505365533e-06, "loss": 0.3972, "step": 9194 }, { "epoch": 1.1189534529966534, "grad_norm": 2.182687759399414, "learning_rate": 8.419762291201523e-06, "loss": 0.3879, "step": 9195 }, { "epoch": 1.1190751445086704, "grad_norm": 2.0381920337677, "learning_rate": 8.417836137168828e-06, "loss": 0.3713, "step": 9196 }, { "epoch": 1.1191968360206876, "grad_norm": 2.056309700012207, "learning_rate": 8.415910043340747e-06, "loss": 0.3585, "step": 9197 }, { "epoch": 1.1193185275327047, "grad_norm": 1.435675859451294, "learning_rate": 8.413984009790565e-06, "loss": 0.3617, "step": 9198 }, { "epoch": 1.1194402190447217, "grad_norm": 1.9840588569641113, "learning_rate": 8.412058036591573e-06, "loss": 0.4369, "step": 9199 }, { "epoch": 1.1195619105567387, "grad_norm": 1.6251378059387207, "learning_rate": 8.41013212381706e-06, "loss": 0.4057, "step": 9200 }, { "epoch": 1.1196836020687557, "grad_norm": 2.0687034130096436, "learning_rate": 8.408206271540314e-06, "loss": 0.352, "step": 9201 }, { "epoch": 1.1198052935807727, "grad_norm": 4.334172248840332, "learning_rate": 8.406280479834612e-06, "loss": 0.4505, "step": 9202 }, { "epoch": 1.1199269850927898, "grad_norm": 2.7174582481384277, "learning_rate": 8.404354748773235e-06, "loss": 0.4329, "step": 9203 }, { "epoch": 1.1200486766048068, "grad_norm": 1.7748609781265259, "learning_rate": 8.402429078429466e-06, "loss": 0.3516, "step": 9204 }, { "epoch": 1.1201703681168238, "grad_norm": 1.4807707071304321, "learning_rate": 8.400503468876576e-06, "loss": 0.3765, "step": 9205 }, { "epoch": 1.1202920596288408, "grad_norm": 1.528741717338562, "learning_rate": 8.398577920187842e-06, "loss": 0.4212, "step": 9206 }, { "epoch": 1.1204137511408578, "grad_norm": 1.4302330017089844, "learning_rate": 8.39665243243653e-06, "loss": 0.3877, "step": 9207 }, { "epoch": 1.1205354426528749, "grad_norm": 3.2975523471832275, "learning_rate": 8.39472700569591e-06, "loss": 0.3145, "step": 9208 }, { "epoch": 1.120657134164892, "grad_norm": 1.322030782699585, "learning_rate": 8.392801640039254e-06, "loss": 0.3785, "step": 9209 }, { "epoch": 1.1207788256769091, "grad_norm": 1.6766749620437622, "learning_rate": 8.39087633553982e-06, "loss": 0.408, "step": 9210 }, { "epoch": 1.1209005171889261, "grad_norm": 1.4917322397232056, "learning_rate": 8.388951092270871e-06, "loss": 0.3892, "step": 9211 }, { "epoch": 1.1210222087009432, "grad_norm": 3.555649518966675, "learning_rate": 8.387025910305671e-06, "loss": 0.3748, "step": 9212 }, { "epoch": 1.1211439002129602, "grad_norm": 2.2035226821899414, "learning_rate": 8.385100789717472e-06, "loss": 0.4583, "step": 9213 }, { "epoch": 1.1212655917249772, "grad_norm": 1.5977227687835693, "learning_rate": 8.383175730579528e-06, "loss": 0.3636, "step": 9214 }, { "epoch": 1.1213872832369942, "grad_norm": 1.6033726930618286, "learning_rate": 8.3812507329651e-06, "loss": 0.3817, "step": 9215 }, { "epoch": 1.1215089747490112, "grad_norm": 1.445035457611084, "learning_rate": 8.379325796947428e-06, "loss": 0.3518, "step": 9216 }, { "epoch": 1.1216306662610283, "grad_norm": 2.4747064113616943, "learning_rate": 8.377400922599769e-06, "loss": 0.3803, "step": 9217 }, { "epoch": 1.1217523577730453, "grad_norm": 3.3628170490264893, "learning_rate": 8.375476109995359e-06, "loss": 0.4531, "step": 9218 }, { "epoch": 1.1218740492850623, "grad_norm": 2.5855064392089844, "learning_rate": 8.373551359207445e-06, "loss": 0.4042, "step": 9219 }, { "epoch": 1.1219957407970793, "grad_norm": 1.3485170602798462, "learning_rate": 8.37162667030927e-06, "loss": 0.3386, "step": 9220 }, { "epoch": 1.1221174323090963, "grad_norm": 2.2683331966400146, "learning_rate": 8.36970204337407e-06, "loss": 0.4098, "step": 9221 }, { "epoch": 1.1222391238211136, "grad_norm": 2.050955057144165, "learning_rate": 8.367777478475082e-06, "loss": 0.349, "step": 9222 }, { "epoch": 1.1223608153331306, "grad_norm": 2.4875850677490234, "learning_rate": 8.36585297568554e-06, "loss": 0.4101, "step": 9223 }, { "epoch": 1.1224825068451476, "grad_norm": 1.9098100662231445, "learning_rate": 8.363928535078674e-06, "loss": 0.3642, "step": 9224 }, { "epoch": 1.1226041983571646, "grad_norm": 1.338596224784851, "learning_rate": 8.362004156727712e-06, "loss": 0.3923, "step": 9225 }, { "epoch": 1.1227258898691816, "grad_norm": 1.468164324760437, "learning_rate": 8.360079840705883e-06, "loss": 0.4224, "step": 9226 }, { "epoch": 1.1228475813811987, "grad_norm": 1.3696421384811401, "learning_rate": 8.358155587086414e-06, "loss": 0.376, "step": 9227 }, { "epoch": 1.1229692728932157, "grad_norm": 1.6756013631820679, "learning_rate": 8.35623139594252e-06, "loss": 0.3851, "step": 9228 }, { "epoch": 1.1230909644052327, "grad_norm": 1.3554133176803589, "learning_rate": 8.354307267347428e-06, "loss": 0.3774, "step": 9229 }, { "epoch": 1.1232126559172497, "grad_norm": 1.647902011871338, "learning_rate": 8.352383201374348e-06, "loss": 0.4031, "step": 9230 }, { "epoch": 1.1233343474292667, "grad_norm": 1.9269342422485352, "learning_rate": 8.350459198096499e-06, "loss": 0.3639, "step": 9231 }, { "epoch": 1.1234560389412838, "grad_norm": 1.6590639352798462, "learning_rate": 8.348535257587089e-06, "loss": 0.4309, "step": 9232 }, { "epoch": 1.1235777304533008, "grad_norm": 1.882504940032959, "learning_rate": 8.346611379919331e-06, "loss": 0.392, "step": 9233 }, { "epoch": 1.123699421965318, "grad_norm": 1.2456754446029663, "learning_rate": 8.344687565166431e-06, "loss": 0.3953, "step": 9234 }, { "epoch": 1.123821113477335, "grad_norm": 1.6376365423202515, "learning_rate": 8.3427638134016e-06, "loss": 0.3902, "step": 9235 }, { "epoch": 1.123942804989352, "grad_norm": 1.4814348220825195, "learning_rate": 8.340840124698033e-06, "loss": 0.3828, "step": 9236 }, { "epoch": 1.124064496501369, "grad_norm": 1.7772419452667236, "learning_rate": 8.338916499128934e-06, "loss": 0.4027, "step": 9237 }, { "epoch": 1.124186188013386, "grad_norm": 1.310228943824768, "learning_rate": 8.336992936767502e-06, "loss": 0.3816, "step": 9238 }, { "epoch": 1.1243078795254031, "grad_norm": 1.4920202493667603, "learning_rate": 8.33506943768693e-06, "loss": 0.3356, "step": 9239 }, { "epoch": 1.1244295710374201, "grad_norm": 1.5532442331314087, "learning_rate": 8.333146001960414e-06, "loss": 0.3891, "step": 9240 }, { "epoch": 1.1245512625494372, "grad_norm": 1.3390220403671265, "learning_rate": 8.331222629661141e-06, "loss": 0.4108, "step": 9241 }, { "epoch": 1.1246729540614542, "grad_norm": 2.116316556930542, "learning_rate": 8.3292993208623e-06, "loss": 0.4052, "step": 9242 }, { "epoch": 1.1247946455734712, "grad_norm": 1.5120859146118164, "learning_rate": 8.32737607563708e-06, "loss": 0.3689, "step": 9243 }, { "epoch": 1.1249163370854882, "grad_norm": 3.3274855613708496, "learning_rate": 8.32545289405866e-06, "loss": 0.3176, "step": 9244 }, { "epoch": 1.1250380285975052, "grad_norm": 1.7269874811172485, "learning_rate": 8.323529776200225e-06, "loss": 0.3963, "step": 9245 }, { "epoch": 1.1251597201095223, "grad_norm": 1.530533790588379, "learning_rate": 8.321606722134954e-06, "loss": 0.4131, "step": 9246 }, { "epoch": 1.1252814116215393, "grad_norm": 1.2506675720214844, "learning_rate": 8.31968373193602e-06, "loss": 0.4242, "step": 9247 }, { "epoch": 1.1254031031335565, "grad_norm": 1.1500694751739502, "learning_rate": 8.317760805676595e-06, "loss": 0.3202, "step": 9248 }, { "epoch": 1.1255247946455735, "grad_norm": 1.965857744216919, "learning_rate": 8.315837943429858e-06, "loss": 0.3931, "step": 9249 }, { "epoch": 1.1256464861575906, "grad_norm": 1.6869757175445557, "learning_rate": 8.313915145268969e-06, "loss": 0.3322, "step": 9250 }, { "epoch": 1.1257681776696076, "grad_norm": 3.3146512508392334, "learning_rate": 8.3119924112671e-06, "loss": 0.3118, "step": 9251 }, { "epoch": 1.1258898691816246, "grad_norm": 1.2627558708190918, "learning_rate": 8.310069741497418e-06, "loss": 0.3802, "step": 9252 }, { "epoch": 1.1260115606936416, "grad_norm": 1.7003408670425415, "learning_rate": 8.308147136033077e-06, "loss": 0.3377, "step": 9253 }, { "epoch": 1.1261332522056586, "grad_norm": 4.627403259277344, "learning_rate": 8.30622459494724e-06, "loss": 0.5301, "step": 9254 }, { "epoch": 1.1262549437176756, "grad_norm": 2.9649951457977295, "learning_rate": 8.304302118313062e-06, "loss": 0.4097, "step": 9255 }, { "epoch": 1.1263766352296927, "grad_norm": 1.5506354570388794, "learning_rate": 8.302379706203699e-06, "loss": 0.4451, "step": 9256 }, { "epoch": 1.1264983267417097, "grad_norm": 1.629686951637268, "learning_rate": 8.300457358692302e-06, "loss": 0.4682, "step": 9257 }, { "epoch": 1.1266200182537267, "grad_norm": 1.4924015998840332, "learning_rate": 8.298535075852018e-06, "loss": 0.4257, "step": 9258 }, { "epoch": 1.126741709765744, "grad_norm": 1.261066198348999, "learning_rate": 8.296612857755999e-06, "loss": 0.3768, "step": 9259 }, { "epoch": 1.126863401277761, "grad_norm": 1.8587522506713867, "learning_rate": 8.294690704477385e-06, "loss": 0.3911, "step": 9260 }, { "epoch": 1.126985092789778, "grad_norm": 1.5861588716506958, "learning_rate": 8.292768616089321e-06, "loss": 0.4208, "step": 9261 }, { "epoch": 1.127106784301795, "grad_norm": 1.6087912321090698, "learning_rate": 8.290846592664944e-06, "loss": 0.4646, "step": 9262 }, { "epoch": 1.127228475813812, "grad_norm": 1.486458659172058, "learning_rate": 8.288924634277395e-06, "loss": 0.4132, "step": 9263 }, { "epoch": 1.127350167325829, "grad_norm": 2.022153854370117, "learning_rate": 8.287002740999804e-06, "loss": 0.4231, "step": 9264 }, { "epoch": 1.127471858837846, "grad_norm": 1.5830142498016357, "learning_rate": 8.285080912905304e-06, "loss": 0.3751, "step": 9265 }, { "epoch": 1.127593550349863, "grad_norm": 1.609625220298767, "learning_rate": 8.283159150067021e-06, "loss": 0.445, "step": 9266 }, { "epoch": 1.12771524186188, "grad_norm": 2.2812600135803223, "learning_rate": 8.28123745255809e-06, "loss": 0.4171, "step": 9267 }, { "epoch": 1.1278369333738971, "grad_norm": 1.4414499998092651, "learning_rate": 8.279315820451629e-06, "loss": 0.3523, "step": 9268 }, { "epoch": 1.1279586248859141, "grad_norm": 2.064664602279663, "learning_rate": 8.277394253820765e-06, "loss": 0.4462, "step": 9269 }, { "epoch": 1.1280803163979312, "grad_norm": 2.7489442825317383, "learning_rate": 8.275472752738613e-06, "loss": 0.3337, "step": 9270 }, { "epoch": 1.1282020079099482, "grad_norm": 1.8284212350845337, "learning_rate": 8.273551317278294e-06, "loss": 0.3644, "step": 9271 }, { "epoch": 1.1283236994219652, "grad_norm": 1.5729069709777832, "learning_rate": 8.271629947512922e-06, "loss": 0.4098, "step": 9272 }, { "epoch": 1.1284453909339824, "grad_norm": 1.5879161357879639, "learning_rate": 8.269708643515606e-06, "loss": 0.3688, "step": 9273 }, { "epoch": 1.1285670824459995, "grad_norm": 2.1110644340515137, "learning_rate": 8.267787405359457e-06, "loss": 0.3482, "step": 9274 }, { "epoch": 1.1286887739580165, "grad_norm": 1.3235408067703247, "learning_rate": 8.265866233117589e-06, "loss": 0.4, "step": 9275 }, { "epoch": 1.1288104654700335, "grad_norm": 1.99334716796875, "learning_rate": 8.263945126863093e-06, "loss": 0.4066, "step": 9276 }, { "epoch": 1.1289321569820505, "grad_norm": 1.5238032341003418, "learning_rate": 8.262024086669083e-06, "loss": 0.3942, "step": 9277 }, { "epoch": 1.1290538484940675, "grad_norm": 1.3232477903366089, "learning_rate": 8.26010311260865e-06, "loss": 0.3316, "step": 9278 }, { "epoch": 1.1291755400060846, "grad_norm": 1.5495525598526, "learning_rate": 8.258182204754896e-06, "loss": 0.3627, "step": 9279 }, { "epoch": 1.1292972315181016, "grad_norm": 3.8505191802978516, "learning_rate": 8.256261363180917e-06, "loss": 0.4347, "step": 9280 }, { "epoch": 1.1294189230301186, "grad_norm": 1.376433253288269, "learning_rate": 8.2543405879598e-06, "loss": 0.3677, "step": 9281 }, { "epoch": 1.1295406145421356, "grad_norm": 2.3608484268188477, "learning_rate": 8.252419879164637e-06, "loss": 0.3811, "step": 9282 }, { "epoch": 1.1296623060541526, "grad_norm": 1.8226901292800903, "learning_rate": 8.250499236868517e-06, "loss": 0.3393, "step": 9283 }, { "epoch": 1.1297839975661699, "grad_norm": 2.5892343521118164, "learning_rate": 8.248578661144519e-06, "loss": 0.3924, "step": 9284 }, { "epoch": 1.129905689078187, "grad_norm": 1.4924262762069702, "learning_rate": 8.24665815206573e-06, "loss": 0.3799, "step": 9285 }, { "epoch": 1.130027380590204, "grad_norm": 1.3813202381134033, "learning_rate": 8.24473770970523e-06, "loss": 0.3664, "step": 9286 }, { "epoch": 1.130149072102221, "grad_norm": 1.8140778541564941, "learning_rate": 8.24281733413609e-06, "loss": 0.3809, "step": 9287 }, { "epoch": 1.130270763614238, "grad_norm": 1.605759620666504, "learning_rate": 8.24089702543139e-06, "loss": 0.3871, "step": 9288 }, { "epoch": 1.130392455126255, "grad_norm": 1.528796911239624, "learning_rate": 8.238976783664196e-06, "loss": 0.3757, "step": 9289 }, { "epoch": 1.130514146638272, "grad_norm": 2.570866584777832, "learning_rate": 8.237056608907582e-06, "loss": 0.3257, "step": 9290 }, { "epoch": 1.130635838150289, "grad_norm": 1.2629880905151367, "learning_rate": 8.235136501234615e-06, "loss": 0.3844, "step": 9291 }, { "epoch": 1.130757529662306, "grad_norm": 1.410779356956482, "learning_rate": 8.233216460718354e-06, "loss": 0.4059, "step": 9292 }, { "epoch": 1.130879221174323, "grad_norm": 1.5605778694152832, "learning_rate": 8.231296487431865e-06, "loss": 0.3685, "step": 9293 }, { "epoch": 1.13100091268634, "grad_norm": 2.2018704414367676, "learning_rate": 8.229376581448203e-06, "loss": 0.342, "step": 9294 }, { "epoch": 1.131122604198357, "grad_norm": 2.390566825866699, "learning_rate": 8.227456742840432e-06, "loss": 0.4373, "step": 9295 }, { "epoch": 1.131244295710374, "grad_norm": 1.6698867082595825, "learning_rate": 8.225536971681599e-06, "loss": 0.3872, "step": 9296 }, { "epoch": 1.1313659872223911, "grad_norm": 1.9178253412246704, "learning_rate": 8.223617268044755e-06, "loss": 0.3579, "step": 9297 }, { "epoch": 1.1314876787344084, "grad_norm": 2.1450469493865967, "learning_rate": 8.221697632002957e-06, "loss": 0.3704, "step": 9298 }, { "epoch": 1.1316093702464254, "grad_norm": 1.6373666524887085, "learning_rate": 8.219778063629242e-06, "loss": 0.4104, "step": 9299 }, { "epoch": 1.1317310617584424, "grad_norm": 2.797213554382324, "learning_rate": 8.217858562996655e-06, "loss": 0.3473, "step": 9300 }, { "epoch": 1.1318527532704594, "grad_norm": 1.3088014125823975, "learning_rate": 8.215939130178238e-06, "loss": 0.3543, "step": 9301 }, { "epoch": 1.1319744447824764, "grad_norm": 2.1467719078063965, "learning_rate": 8.21401976524703e-06, "loss": 0.4568, "step": 9302 }, { "epoch": 1.1320961362944935, "grad_norm": 1.691737413406372, "learning_rate": 8.212100468276068e-06, "loss": 0.3544, "step": 9303 }, { "epoch": 1.1322178278065105, "grad_norm": 1.674431324005127, "learning_rate": 8.210181239338385e-06, "loss": 0.3666, "step": 9304 }, { "epoch": 1.1323395193185275, "grad_norm": 1.9545689821243286, "learning_rate": 8.208262078507007e-06, "loss": 0.3792, "step": 9305 }, { "epoch": 1.1324612108305445, "grad_norm": 1.3984578847885132, "learning_rate": 8.206342985854969e-06, "loss": 0.3438, "step": 9306 }, { "epoch": 1.1325829023425615, "grad_norm": 2.0488922595977783, "learning_rate": 8.20442396145529e-06, "loss": 0.4096, "step": 9307 }, { "epoch": 1.1327045938545786, "grad_norm": 2.844698905944824, "learning_rate": 8.202505005380997e-06, "loss": 0.3938, "step": 9308 }, { "epoch": 1.1328262853665958, "grad_norm": 2.3427329063415527, "learning_rate": 8.200586117705114e-06, "loss": 0.3957, "step": 9309 }, { "epoch": 1.1329479768786128, "grad_norm": 1.4236059188842773, "learning_rate": 8.198667298500648e-06, "loss": 0.3811, "step": 9310 }, { "epoch": 1.1330696683906298, "grad_norm": 1.2733254432678223, "learning_rate": 8.196748547840622e-06, "loss": 0.337, "step": 9311 }, { "epoch": 1.1331913599026469, "grad_norm": 1.3997050523757935, "learning_rate": 8.194829865798045e-06, "loss": 0.3535, "step": 9312 }, { "epoch": 1.1333130514146639, "grad_norm": 1.8210163116455078, "learning_rate": 8.19291125244593e-06, "loss": 0.3624, "step": 9313 }, { "epoch": 1.133434742926681, "grad_norm": 3.7033047676086426, "learning_rate": 8.190992707857282e-06, "loss": 0.4599, "step": 9314 }, { "epoch": 1.133556434438698, "grad_norm": 2.7419254779815674, "learning_rate": 8.189074232105103e-06, "loss": 0.3757, "step": 9315 }, { "epoch": 1.133678125950715, "grad_norm": 1.3748414516448975, "learning_rate": 8.187155825262398e-06, "loss": 0.3675, "step": 9316 }, { "epoch": 1.133799817462732, "grad_norm": 1.866584062576294, "learning_rate": 8.18523748740217e-06, "loss": 0.4044, "step": 9317 }, { "epoch": 1.133921508974749, "grad_norm": 1.968496322631836, "learning_rate": 8.18331921859741e-06, "loss": 0.3317, "step": 9318 }, { "epoch": 1.134043200486766, "grad_norm": 1.6435009241104126, "learning_rate": 8.181401018921114e-06, "loss": 0.4012, "step": 9319 }, { "epoch": 1.134164891998783, "grad_norm": 2.9385650157928467, "learning_rate": 8.179482888446271e-06, "loss": 0.3563, "step": 9320 }, { "epoch": 1.1342865835108, "grad_norm": 3.4491231441497803, "learning_rate": 8.177564827245879e-06, "loss": 0.3935, "step": 9321 }, { "epoch": 1.134408275022817, "grad_norm": 2.5235519409179688, "learning_rate": 8.175646835392915e-06, "loss": 0.4009, "step": 9322 }, { "epoch": 1.1345299665348343, "grad_norm": 1.8858593702316284, "learning_rate": 8.173728912960362e-06, "loss": 0.3625, "step": 9323 }, { "epoch": 1.1346516580468513, "grad_norm": 1.8929585218429565, "learning_rate": 8.171811060021206e-06, "loss": 0.3495, "step": 9324 }, { "epoch": 1.1347733495588683, "grad_norm": 2.0479977130889893, "learning_rate": 8.169893276648423e-06, "loss": 0.3461, "step": 9325 }, { "epoch": 1.1348950410708853, "grad_norm": 2.209444761276245, "learning_rate": 8.16797556291499e-06, "loss": 0.3812, "step": 9326 }, { "epoch": 1.1350167325829024, "grad_norm": 1.4777228832244873, "learning_rate": 8.166057918893876e-06, "loss": 0.3582, "step": 9327 }, { "epoch": 1.1351384240949194, "grad_norm": 1.1509705781936646, "learning_rate": 8.164140344658057e-06, "loss": 0.3489, "step": 9328 }, { "epoch": 1.1352601156069364, "grad_norm": 1.1321022510528564, "learning_rate": 8.162222840280497e-06, "loss": 0.3523, "step": 9329 }, { "epoch": 1.1353818071189534, "grad_norm": 1.3123786449432373, "learning_rate": 8.160305405834162e-06, "loss": 0.3658, "step": 9330 }, { "epoch": 1.1355034986309704, "grad_norm": 2.0186431407928467, "learning_rate": 8.158388041392012e-06, "loss": 0.4565, "step": 9331 }, { "epoch": 1.1356251901429875, "grad_norm": 3.378861904144287, "learning_rate": 8.156470747027015e-06, "loss": 0.4604, "step": 9332 }, { "epoch": 1.1357468816550045, "grad_norm": 3.521886110305786, "learning_rate": 8.154553522812118e-06, "loss": 0.2668, "step": 9333 }, { "epoch": 1.1358685731670217, "grad_norm": 1.5217844247817993, "learning_rate": 8.152636368820279e-06, "loss": 0.3835, "step": 9334 }, { "epoch": 1.1359902646790387, "grad_norm": 1.7410460710525513, "learning_rate": 8.150719285124451e-06, "loss": 0.3861, "step": 9335 }, { "epoch": 1.1361119561910558, "grad_norm": 1.5696029663085938, "learning_rate": 8.14880227179758e-06, "loss": 0.3902, "step": 9336 }, { "epoch": 1.1362336477030728, "grad_norm": 1.8855774402618408, "learning_rate": 8.146885328912618e-06, "loss": 0.3451, "step": 9337 }, { "epoch": 1.1363553392150898, "grad_norm": 1.956186056137085, "learning_rate": 8.144968456542502e-06, "loss": 0.4759, "step": 9338 }, { "epoch": 1.1364770307271068, "grad_norm": 1.9672017097473145, "learning_rate": 8.143051654760174e-06, "loss": 0.3978, "step": 9339 }, { "epoch": 1.1365987222391238, "grad_norm": 1.4946990013122559, "learning_rate": 8.141134923638579e-06, "loss": 0.3736, "step": 9340 }, { "epoch": 1.1367204137511409, "grad_norm": 1.4063612222671509, "learning_rate": 8.139218263250644e-06, "loss": 0.4137, "step": 9341 }, { "epoch": 1.1368421052631579, "grad_norm": 2.0300369262695312, "learning_rate": 8.137301673669306e-06, "loss": 0.3085, "step": 9342 }, { "epoch": 1.136963796775175, "grad_norm": 2.0914924144744873, "learning_rate": 8.135385154967499e-06, "loss": 0.4254, "step": 9343 }, { "epoch": 1.137085488287192, "grad_norm": 1.692579984664917, "learning_rate": 8.133468707218145e-06, "loss": 0.3931, "step": 9344 }, { "epoch": 1.137207179799209, "grad_norm": 1.779434084892273, "learning_rate": 8.13155233049417e-06, "loss": 0.3496, "step": 9345 }, { "epoch": 1.137328871311226, "grad_norm": 1.9572780132293701, "learning_rate": 8.129636024868495e-06, "loss": 0.3248, "step": 9346 }, { "epoch": 1.137450562823243, "grad_norm": 1.767472743988037, "learning_rate": 8.12771979041404e-06, "loss": 0.4445, "step": 9347 }, { "epoch": 1.13757225433526, "grad_norm": 1.6177034378051758, "learning_rate": 8.125803627203724e-06, "loss": 0.4019, "step": 9348 }, { "epoch": 1.1376939458472772, "grad_norm": 1.58851957321167, "learning_rate": 8.123887535310457e-06, "loss": 0.3887, "step": 9349 }, { "epoch": 1.1378156373592943, "grad_norm": 2.0800893306732178, "learning_rate": 8.121971514807154e-06, "loss": 0.4332, "step": 9350 }, { "epoch": 1.1379373288713113, "grad_norm": 1.859734296798706, "learning_rate": 8.120055565766723e-06, "loss": 0.3338, "step": 9351 }, { "epoch": 1.1380590203833283, "grad_norm": 1.957097053527832, "learning_rate": 8.118139688262067e-06, "loss": 0.4234, "step": 9352 }, { "epoch": 1.1381807118953453, "grad_norm": 1.6906638145446777, "learning_rate": 8.116223882366091e-06, "loss": 0.3642, "step": 9353 }, { "epoch": 1.1383024034073623, "grad_norm": 2.7752556800842285, "learning_rate": 8.114308148151697e-06, "loss": 0.3455, "step": 9354 }, { "epoch": 1.1384240949193793, "grad_norm": 1.729985237121582, "learning_rate": 8.112392485691784e-06, "loss": 0.4193, "step": 9355 }, { "epoch": 1.1385457864313964, "grad_norm": 2.2575976848602295, "learning_rate": 8.110476895059243e-06, "loss": 0.3086, "step": 9356 }, { "epoch": 1.1386674779434134, "grad_norm": 1.2856334447860718, "learning_rate": 8.108561376326966e-06, "loss": 0.3668, "step": 9357 }, { "epoch": 1.1387891694554304, "grad_norm": 2.5189595222473145, "learning_rate": 8.106645929567842e-06, "loss": 0.3139, "step": 9358 }, { "epoch": 1.1389108609674474, "grad_norm": 2.3296563625335693, "learning_rate": 8.10473055485476e-06, "loss": 0.3866, "step": 9359 }, { "epoch": 1.1390325524794647, "grad_norm": 2.5810060501098633, "learning_rate": 8.102815252260604e-06, "loss": 0.3913, "step": 9360 }, { "epoch": 1.1391542439914817, "grad_norm": 2.634850025177002, "learning_rate": 8.100900021858253e-06, "loss": 0.4001, "step": 9361 }, { "epoch": 1.1392759355034987, "grad_norm": 2.4573683738708496, "learning_rate": 8.098984863720588e-06, "loss": 0.4085, "step": 9362 }, { "epoch": 1.1393976270155157, "grad_norm": 1.4731765985488892, "learning_rate": 8.097069777920486e-06, "loss": 0.3693, "step": 9363 }, { "epoch": 1.1395193185275327, "grad_norm": 1.8492422103881836, "learning_rate": 8.095154764530814e-06, "loss": 0.3682, "step": 9364 }, { "epoch": 1.1396410100395498, "grad_norm": 1.3343756198883057, "learning_rate": 8.093239823624449e-06, "loss": 0.3421, "step": 9365 }, { "epoch": 1.1397627015515668, "grad_norm": 2.886051893234253, "learning_rate": 8.091324955274256e-06, "loss": 0.4413, "step": 9366 }, { "epoch": 1.1398843930635838, "grad_norm": 3.353578567504883, "learning_rate": 8.0894101595531e-06, "loss": 0.4363, "step": 9367 }, { "epoch": 1.1400060845756008, "grad_norm": 1.4267032146453857, "learning_rate": 8.08749543653384e-06, "loss": 0.4186, "step": 9368 }, { "epoch": 1.1401277760876178, "grad_norm": 2.3749492168426514, "learning_rate": 8.085580786289336e-06, "loss": 0.3865, "step": 9369 }, { "epoch": 1.1402494675996349, "grad_norm": 2.1741597652435303, "learning_rate": 8.083666208892447e-06, "loss": 0.4266, "step": 9370 }, { "epoch": 1.1403711591116519, "grad_norm": 1.214575171470642, "learning_rate": 8.081751704416027e-06, "loss": 0.3935, "step": 9371 }, { "epoch": 1.140492850623669, "grad_norm": 2.4452576637268066, "learning_rate": 8.079837272932923e-06, "loss": 0.3847, "step": 9372 }, { "epoch": 1.140614542135686, "grad_norm": 1.7560369968414307, "learning_rate": 8.077922914515984e-06, "loss": 0.4192, "step": 9373 }, { "epoch": 1.1407362336477032, "grad_norm": 1.2648472785949707, "learning_rate": 8.07600862923806e-06, "loss": 0.391, "step": 9374 }, { "epoch": 1.1408579251597202, "grad_norm": 1.4950051307678223, "learning_rate": 8.074094417171987e-06, "loss": 0.4275, "step": 9375 }, { "epoch": 1.1409796166717372, "grad_norm": 2.6746535301208496, "learning_rate": 8.07218027839061e-06, "loss": 0.3993, "step": 9376 }, { "epoch": 1.1411013081837542, "grad_norm": 1.4658801555633545, "learning_rate": 8.070266212966765e-06, "loss": 0.3736, "step": 9377 }, { "epoch": 1.1412229996957712, "grad_norm": 2.162775993347168, "learning_rate": 8.068352220973284e-06, "loss": 0.3678, "step": 9378 }, { "epoch": 1.1413446912077883, "grad_norm": 3.0236072540283203, "learning_rate": 8.066438302482999e-06, "loss": 0.4055, "step": 9379 }, { "epoch": 1.1414663827198053, "grad_norm": 3.2523727416992188, "learning_rate": 8.064524457568736e-06, "loss": 0.3519, "step": 9380 }, { "epoch": 1.1415880742318223, "grad_norm": 1.3547011613845825, "learning_rate": 8.062610686303326e-06, "loss": 0.3739, "step": 9381 }, { "epoch": 1.1417097657438393, "grad_norm": 1.3020365238189697, "learning_rate": 8.06069698875959e-06, "loss": 0.3894, "step": 9382 }, { "epoch": 1.1418314572558563, "grad_norm": 2.1218912601470947, "learning_rate": 8.058783365010348e-06, "loss": 0.4066, "step": 9383 }, { "epoch": 1.1419531487678733, "grad_norm": 2.3884449005126953, "learning_rate": 8.056869815128414e-06, "loss": 0.4146, "step": 9384 }, { "epoch": 1.1420748402798906, "grad_norm": 4.383427143096924, "learning_rate": 8.05495633918661e-06, "loss": 0.4325, "step": 9385 }, { "epoch": 1.1421965317919076, "grad_norm": 1.297877311706543, "learning_rate": 8.053042937257739e-06, "loss": 0.3786, "step": 9386 }, { "epoch": 1.1423182233039246, "grad_norm": 2.1148841381073, "learning_rate": 8.051129609414616e-06, "loss": 0.3984, "step": 9387 }, { "epoch": 1.1424399148159416, "grad_norm": 1.0986117124557495, "learning_rate": 8.049216355730046e-06, "loss": 0.3043, "step": 9388 }, { "epoch": 1.1425616063279587, "grad_norm": 2.2859504222869873, "learning_rate": 8.047303176276831e-06, "loss": 0.3026, "step": 9389 }, { "epoch": 1.1426832978399757, "grad_norm": 3.8585379123687744, "learning_rate": 8.045390071127776e-06, "loss": 0.4313, "step": 9390 }, { "epoch": 1.1428049893519927, "grad_norm": 1.3305115699768066, "learning_rate": 8.043477040355669e-06, "loss": 0.3836, "step": 9391 }, { "epoch": 1.1429266808640097, "grad_norm": 2.696345567703247, "learning_rate": 8.041564084033311e-06, "loss": 0.4444, "step": 9392 }, { "epoch": 1.1430483723760267, "grad_norm": 1.7355663776397705, "learning_rate": 8.039651202233497e-06, "loss": 0.4127, "step": 9393 }, { "epoch": 1.1431700638880438, "grad_norm": 2.0472328662872314, "learning_rate": 8.037738395029009e-06, "loss": 0.3923, "step": 9394 }, { "epoch": 1.1432917554000608, "grad_norm": 1.3737040758132935, "learning_rate": 8.035825662492637e-06, "loss": 0.3666, "step": 9395 }, { "epoch": 1.1434134469120778, "grad_norm": 2.1715660095214844, "learning_rate": 8.033913004697164e-06, "loss": 0.4093, "step": 9396 }, { "epoch": 1.1435351384240948, "grad_norm": 1.5705746412277222, "learning_rate": 8.032000421715372e-06, "loss": 0.4284, "step": 9397 }, { "epoch": 1.1436568299361118, "grad_norm": 1.605338454246521, "learning_rate": 8.030087913620036e-06, "loss": 0.4397, "step": 9398 }, { "epoch": 1.143778521448129, "grad_norm": 1.901050090789795, "learning_rate": 8.028175480483932e-06, "loss": 0.37, "step": 9399 }, { "epoch": 1.143900212960146, "grad_norm": 1.8695341348648071, "learning_rate": 8.026263122379835e-06, "loss": 0.3656, "step": 9400 }, { "epoch": 1.1440219044721631, "grad_norm": 1.4707010984420776, "learning_rate": 8.02435083938051e-06, "loss": 0.3743, "step": 9401 }, { "epoch": 1.1441435959841801, "grad_norm": 1.474139928817749, "learning_rate": 8.02243863155873e-06, "loss": 0.3596, "step": 9402 }, { "epoch": 1.1442652874961972, "grad_norm": 1.4010915756225586, "learning_rate": 8.020526498987248e-06, "loss": 0.3748, "step": 9403 }, { "epoch": 1.1443869790082142, "grad_norm": 1.3375674486160278, "learning_rate": 8.01861444173883e-06, "loss": 0.3559, "step": 9404 }, { "epoch": 1.1445086705202312, "grad_norm": 1.069501280784607, "learning_rate": 8.016702459886235e-06, "loss": 0.3508, "step": 9405 }, { "epoch": 1.1446303620322482, "grad_norm": 2.4618308544158936, "learning_rate": 8.014790553502215e-06, "loss": 0.3773, "step": 9406 }, { "epoch": 1.1447520535442652, "grad_norm": 2.4717705249786377, "learning_rate": 8.012878722659526e-06, "loss": 0.4141, "step": 9407 }, { "epoch": 1.1448737450562823, "grad_norm": 1.9223419427871704, "learning_rate": 8.010966967430914e-06, "loss": 0.3935, "step": 9408 }, { "epoch": 1.1449954365682993, "grad_norm": 1.3690690994262695, "learning_rate": 8.009055287889124e-06, "loss": 0.3107, "step": 9409 }, { "epoch": 1.1451171280803165, "grad_norm": 2.378538131713867, "learning_rate": 8.007143684106901e-06, "loss": 0.3318, "step": 9410 }, { "epoch": 1.1452388195923335, "grad_norm": 1.3570115566253662, "learning_rate": 8.00523215615699e-06, "loss": 0.3661, "step": 9411 }, { "epoch": 1.1453605111043506, "grad_norm": 1.7419265508651733, "learning_rate": 8.00332070411212e-06, "loss": 0.3471, "step": 9412 }, { "epoch": 1.1454822026163676, "grad_norm": 3.308589458465576, "learning_rate": 8.001409328045036e-06, "loss": 0.4324, "step": 9413 }, { "epoch": 1.1456038941283846, "grad_norm": 3.726086378097534, "learning_rate": 7.999498028028459e-06, "loss": 0.461, "step": 9414 }, { "epoch": 1.1457255856404016, "grad_norm": 1.404338002204895, "learning_rate": 7.997586804135124e-06, "loss": 0.3102, "step": 9415 }, { "epoch": 1.1458472771524186, "grad_norm": 1.4919569492340088, "learning_rate": 7.995675656437756e-06, "loss": 0.3622, "step": 9416 }, { "epoch": 1.1459689686644356, "grad_norm": 1.9902676343917847, "learning_rate": 7.993764585009078e-06, "loss": 0.3578, "step": 9417 }, { "epoch": 1.1460906601764527, "grad_norm": 1.3841243982315063, "learning_rate": 7.991853589921807e-06, "loss": 0.3436, "step": 9418 }, { "epoch": 1.1462123516884697, "grad_norm": 1.3774094581604004, "learning_rate": 7.98994267124867e-06, "loss": 0.3874, "step": 9419 }, { "epoch": 1.1463340432004867, "grad_norm": 3.0227530002593994, "learning_rate": 7.98803182906237e-06, "loss": 0.4731, "step": 9420 }, { "epoch": 1.1464557347125037, "grad_norm": 1.482176423072815, "learning_rate": 7.986121063435623e-06, "loss": 0.3768, "step": 9421 }, { "epoch": 1.1465774262245207, "grad_norm": 1.861307978630066, "learning_rate": 7.98421037444114e-06, "loss": 0.34, "step": 9422 }, { "epoch": 1.1466991177365378, "grad_norm": 1.1559419631958008, "learning_rate": 7.982299762151625e-06, "loss": 0.3539, "step": 9423 }, { "epoch": 1.146820809248555, "grad_norm": 1.223334550857544, "learning_rate": 7.98038922663978e-06, "loss": 0.3921, "step": 9424 }, { "epoch": 1.146942500760572, "grad_norm": 2.4468960762023926, "learning_rate": 7.978478767978308e-06, "loss": 0.3745, "step": 9425 }, { "epoch": 1.147064192272589, "grad_norm": 1.5288887023925781, "learning_rate": 7.9765683862399e-06, "loss": 0.3728, "step": 9426 }, { "epoch": 1.147185883784606, "grad_norm": 2.264828681945801, "learning_rate": 7.974658081497255e-06, "loss": 0.4029, "step": 9427 }, { "epoch": 1.147307575296623, "grad_norm": 1.2708088159561157, "learning_rate": 7.972747853823059e-06, "loss": 0.3864, "step": 9428 }, { "epoch": 1.14742926680864, "grad_norm": 2.664573907852173, "learning_rate": 7.970837703290006e-06, "loss": 0.4648, "step": 9429 }, { "epoch": 1.1475509583206571, "grad_norm": 1.6222294569015503, "learning_rate": 7.968927629970776e-06, "loss": 0.3818, "step": 9430 }, { "epoch": 1.1476726498326741, "grad_norm": 1.9193700551986694, "learning_rate": 7.967017633938057e-06, "loss": 0.3729, "step": 9431 }, { "epoch": 1.1477943413446912, "grad_norm": 2.1691250801086426, "learning_rate": 7.965107715264523e-06, "loss": 0.3274, "step": 9432 }, { "epoch": 1.1479160328567082, "grad_norm": 1.5296939611434937, "learning_rate": 7.963197874022853e-06, "loss": 0.4097, "step": 9433 }, { "epoch": 1.1480377243687252, "grad_norm": 1.1184592247009277, "learning_rate": 7.961288110285721e-06, "loss": 0.3531, "step": 9434 }, { "epoch": 1.1481594158807424, "grad_norm": 1.580876111984253, "learning_rate": 7.959378424125794e-06, "loss": 0.3329, "step": 9435 }, { "epoch": 1.1482811073927595, "grad_norm": 1.337518572807312, "learning_rate": 7.957468815615747e-06, "loss": 0.3967, "step": 9436 }, { "epoch": 1.1484027989047765, "grad_norm": 4.1903767585754395, "learning_rate": 7.955559284828234e-06, "loss": 0.4417, "step": 9437 }, { "epoch": 1.1485244904167935, "grad_norm": 1.5820752382278442, "learning_rate": 7.953649831835923e-06, "loss": 0.4218, "step": 9438 }, { "epoch": 1.1486461819288105, "grad_norm": 2.5573534965515137, "learning_rate": 7.951740456711473e-06, "loss": 0.3377, "step": 9439 }, { "epoch": 1.1487678734408275, "grad_norm": 1.11497962474823, "learning_rate": 7.949831159527537e-06, "loss": 0.3298, "step": 9440 }, { "epoch": 1.1488895649528446, "grad_norm": 1.5248496532440186, "learning_rate": 7.947921940356767e-06, "loss": 0.3813, "step": 9441 }, { "epoch": 1.1490112564648616, "grad_norm": 3.7922863960266113, "learning_rate": 7.946012799271818e-06, "loss": 0.4952, "step": 9442 }, { "epoch": 1.1491329479768786, "grad_norm": 2.071898937225342, "learning_rate": 7.944103736345332e-06, "loss": 0.3852, "step": 9443 }, { "epoch": 1.1492546394888956, "grad_norm": 1.9802888631820679, "learning_rate": 7.942194751649955e-06, "loss": 0.4081, "step": 9444 }, { "epoch": 1.1493763310009126, "grad_norm": 3.9047274589538574, "learning_rate": 7.940285845258328e-06, "loss": 0.4395, "step": 9445 }, { "epoch": 1.1494980225129297, "grad_norm": 1.3604471683502197, "learning_rate": 7.938377017243086e-06, "loss": 0.3886, "step": 9446 }, { "epoch": 1.1496197140249467, "grad_norm": 3.498446464538574, "learning_rate": 7.936468267676865e-06, "loss": 0.3412, "step": 9447 }, { "epoch": 1.1497414055369637, "grad_norm": 3.1781203746795654, "learning_rate": 7.934559596632303e-06, "loss": 0.3846, "step": 9448 }, { "epoch": 1.1498630970489807, "grad_norm": 1.4405567646026611, "learning_rate": 7.932651004182019e-06, "loss": 0.3895, "step": 9449 }, { "epoch": 1.149984788560998, "grad_norm": 1.8151578903198242, "learning_rate": 7.930742490398646e-06, "loss": 0.368, "step": 9450 }, { "epoch": 1.150106480073015, "grad_norm": 1.7208164930343628, "learning_rate": 7.928834055354803e-06, "loss": 0.431, "step": 9451 }, { "epoch": 1.150228171585032, "grad_norm": 2.3178372383117676, "learning_rate": 7.926925699123109e-06, "loss": 0.3344, "step": 9452 }, { "epoch": 1.150349863097049, "grad_norm": 1.9946759939193726, "learning_rate": 7.925017421776188e-06, "loss": 0.4022, "step": 9453 }, { "epoch": 1.150471554609066, "grad_norm": 2.473324775695801, "learning_rate": 7.923109223386644e-06, "loss": 0.3413, "step": 9454 }, { "epoch": 1.150593246121083, "grad_norm": 1.7858223915100098, "learning_rate": 7.921201104027095e-06, "loss": 0.3399, "step": 9455 }, { "epoch": 1.1507149376331, "grad_norm": 2.6613070964813232, "learning_rate": 7.919293063770147e-06, "loss": 0.422, "step": 9456 }, { "epoch": 1.150836629145117, "grad_norm": 1.9846540689468384, "learning_rate": 7.917385102688407e-06, "loss": 0.3686, "step": 9457 }, { "epoch": 1.150958320657134, "grad_norm": 1.9592974185943604, "learning_rate": 7.91547722085447e-06, "loss": 0.3084, "step": 9458 }, { "epoch": 1.1510800121691511, "grad_norm": 2.1782310009002686, "learning_rate": 7.913569418340947e-06, "loss": 0.3826, "step": 9459 }, { "epoch": 1.1512017036811681, "grad_norm": 1.5790261030197144, "learning_rate": 7.911661695220419e-06, "loss": 0.3544, "step": 9460 }, { "epoch": 1.1513233951931854, "grad_norm": 4.169583320617676, "learning_rate": 7.90975405156549e-06, "loss": 0.4341, "step": 9461 }, { "epoch": 1.1514450867052024, "grad_norm": 2.2710633277893066, "learning_rate": 7.907846487448743e-06, "loss": 0.3897, "step": 9462 }, { "epoch": 1.1515667782172194, "grad_norm": 1.6063332557678223, "learning_rate": 7.905939002942769e-06, "loss": 0.3778, "step": 9463 }, { "epoch": 1.1516884697292364, "grad_norm": 2.9622507095336914, "learning_rate": 7.90403159812015e-06, "loss": 0.424, "step": 9464 }, { "epoch": 1.1518101612412535, "grad_norm": 1.6363670825958252, "learning_rate": 7.902124273053469e-06, "loss": 0.3507, "step": 9465 }, { "epoch": 1.1519318527532705, "grad_norm": 1.6198620796203613, "learning_rate": 7.900217027815299e-06, "loss": 0.3514, "step": 9466 }, { "epoch": 1.1520535442652875, "grad_norm": 2.052090644836426, "learning_rate": 7.898309862478219e-06, "loss": 0.3794, "step": 9467 }, { "epoch": 1.1521752357773045, "grad_norm": 1.5545611381530762, "learning_rate": 7.896402777114799e-06, "loss": 0.388, "step": 9468 }, { "epoch": 1.1522969272893215, "grad_norm": 1.7451837062835693, "learning_rate": 7.894495771797607e-06, "loss": 0.3818, "step": 9469 }, { "epoch": 1.1524186188013386, "grad_norm": 4.016990661621094, "learning_rate": 7.892588846599207e-06, "loss": 0.367, "step": 9470 }, { "epoch": 1.1525403103133556, "grad_norm": 1.678607702255249, "learning_rate": 7.89068200159217e-06, "loss": 0.3721, "step": 9471 }, { "epoch": 1.1526620018253726, "grad_norm": 1.4158412218093872, "learning_rate": 7.888775236849045e-06, "loss": 0.3955, "step": 9472 }, { "epoch": 1.1527836933373896, "grad_norm": 1.546684980392456, "learning_rate": 7.886868552442393e-06, "loss": 0.3704, "step": 9473 }, { "epoch": 1.1529053848494066, "grad_norm": 1.6489508152008057, "learning_rate": 7.884961948444763e-06, "loss": 0.4321, "step": 9474 }, { "epoch": 1.1530270763614239, "grad_norm": 2.5284345149993896, "learning_rate": 7.883055424928712e-06, "loss": 0.4659, "step": 9475 }, { "epoch": 1.153148767873441, "grad_norm": 1.6549315452575684, "learning_rate": 7.881148981966784e-06, "loss": 0.379, "step": 9476 }, { "epoch": 1.153270459385458, "grad_norm": 2.288231611251831, "learning_rate": 7.87924261963152e-06, "loss": 0.3886, "step": 9477 }, { "epoch": 1.153392150897475, "grad_norm": 2.0967490673065186, "learning_rate": 7.877336337995465e-06, "loss": 0.3451, "step": 9478 }, { "epoch": 1.153513842409492, "grad_norm": 1.1757196187973022, "learning_rate": 7.87543013713116e-06, "loss": 0.3828, "step": 9479 }, { "epoch": 1.153635533921509, "grad_norm": 1.3049899339675903, "learning_rate": 7.873524017111132e-06, "loss": 0.4098, "step": 9480 }, { "epoch": 1.153757225433526, "grad_norm": 2.831350803375244, "learning_rate": 7.871617978007918e-06, "loss": 0.4293, "step": 9481 }, { "epoch": 1.153878916945543, "grad_norm": 2.961888551712036, "learning_rate": 7.869712019894047e-06, "loss": 0.4551, "step": 9482 }, { "epoch": 1.15400060845756, "grad_norm": 1.5543574094772339, "learning_rate": 7.867806142842041e-06, "loss": 0.4219, "step": 9483 }, { "epoch": 1.154122299969577, "grad_norm": 1.2422986030578613, "learning_rate": 7.865900346924426e-06, "loss": 0.3535, "step": 9484 }, { "epoch": 1.154243991481594, "grad_norm": 1.8995840549468994, "learning_rate": 7.863994632213718e-06, "loss": 0.415, "step": 9485 }, { "epoch": 1.1543656829936113, "grad_norm": 1.3236974477767944, "learning_rate": 7.862088998782436e-06, "loss": 0.3878, "step": 9486 }, { "epoch": 1.1544873745056283, "grad_norm": 1.3614071607589722, "learning_rate": 7.860183446703096e-06, "loss": 0.3808, "step": 9487 }, { "epoch": 1.1546090660176453, "grad_norm": 1.6428714990615845, "learning_rate": 7.8582779760482e-06, "loss": 0.2749, "step": 9488 }, { "epoch": 1.1547307575296624, "grad_norm": 1.4920694828033447, "learning_rate": 7.856372586890262e-06, "loss": 0.3991, "step": 9489 }, { "epoch": 1.1548524490416794, "grad_norm": 1.3494758605957031, "learning_rate": 7.854467279301785e-06, "loss": 0.3659, "step": 9490 }, { "epoch": 1.1549741405536964, "grad_norm": 2.7925567626953125, "learning_rate": 7.85256205335527e-06, "loss": 0.3223, "step": 9491 }, { "epoch": 1.1550958320657134, "grad_norm": 1.8954964876174927, "learning_rate": 7.850656909123212e-06, "loss": 0.4144, "step": 9492 }, { "epoch": 1.1552175235777304, "grad_norm": 2.248262643814087, "learning_rate": 7.848751846678106e-06, "loss": 0.3885, "step": 9493 }, { "epoch": 1.1553392150897475, "grad_norm": 1.478324055671692, "learning_rate": 7.846846866092452e-06, "loss": 0.3959, "step": 9494 }, { "epoch": 1.1554609066017645, "grad_norm": 1.411107063293457, "learning_rate": 7.844941967438729e-06, "loss": 0.3684, "step": 9495 }, { "epoch": 1.1555825981137815, "grad_norm": 4.012429237365723, "learning_rate": 7.84303715078942e-06, "loss": 0.4168, "step": 9496 }, { "epoch": 1.1557042896257985, "grad_norm": 1.3292492628097534, "learning_rate": 7.841132416217014e-06, "loss": 0.3785, "step": 9497 }, { "epoch": 1.1558259811378155, "grad_norm": 1.4239649772644043, "learning_rate": 7.839227763793988e-06, "loss": 0.3594, "step": 9498 }, { "epoch": 1.1559476726498326, "grad_norm": 2.2892231941223145, "learning_rate": 7.83732319359282e-06, "loss": 0.3809, "step": 9499 }, { "epoch": 1.1560693641618498, "grad_norm": 1.491167664527893, "learning_rate": 7.83541870568598e-06, "loss": 0.3873, "step": 9500 }, { "epoch": 1.1561910556738668, "grad_norm": 1.4967931509017944, "learning_rate": 7.833514300145937e-06, "loss": 0.3836, "step": 9501 }, { "epoch": 1.1563127471858838, "grad_norm": 1.2635350227355957, "learning_rate": 7.831609977045164e-06, "loss": 0.3119, "step": 9502 }, { "epoch": 1.1564344386979009, "grad_norm": 2.0637807846069336, "learning_rate": 7.829705736456114e-06, "loss": 0.3688, "step": 9503 }, { "epoch": 1.1565561302099179, "grad_norm": 1.5053224563598633, "learning_rate": 7.827801578451255e-06, "loss": 0.4093, "step": 9504 }, { "epoch": 1.156677821721935, "grad_norm": 1.4580307006835938, "learning_rate": 7.825897503103046e-06, "loss": 0.4102, "step": 9505 }, { "epoch": 1.156799513233952, "grad_norm": 1.7580782175064087, "learning_rate": 7.823993510483934e-06, "loss": 0.3734, "step": 9506 }, { "epoch": 1.156921204745969, "grad_norm": 2.484034299850464, "learning_rate": 7.822089600666373e-06, "loss": 0.4252, "step": 9507 }, { "epoch": 1.157042896257986, "grad_norm": 1.912593126296997, "learning_rate": 7.820185773722812e-06, "loss": 0.3722, "step": 9508 }, { "epoch": 1.157164587770003, "grad_norm": 1.8472379446029663, "learning_rate": 7.818282029725691e-06, "loss": 0.411, "step": 9509 }, { "epoch": 1.15728627928202, "grad_norm": 1.7662009000778198, "learning_rate": 7.816378368747459e-06, "loss": 0.356, "step": 9510 }, { "epoch": 1.1574079707940372, "grad_norm": 2.1545989513397217, "learning_rate": 7.814474790860546e-06, "loss": 0.3269, "step": 9511 }, { "epoch": 1.1575296623060543, "grad_norm": 1.389992117881775, "learning_rate": 7.812571296137392e-06, "loss": 0.3876, "step": 9512 }, { "epoch": 1.1576513538180713, "grad_norm": 1.614871859550476, "learning_rate": 7.810667884650429e-06, "loss": 0.3792, "step": 9513 }, { "epoch": 1.1577730453300883, "grad_norm": 1.670812964439392, "learning_rate": 7.808764556472083e-06, "loss": 0.3475, "step": 9514 }, { "epoch": 1.1578947368421053, "grad_norm": 1.552437663078308, "learning_rate": 7.806861311674783e-06, "loss": 0.3635, "step": 9515 }, { "epoch": 1.1580164283541223, "grad_norm": 1.466321587562561, "learning_rate": 7.804958150330947e-06, "loss": 0.3797, "step": 9516 }, { "epoch": 1.1581381198661393, "grad_norm": 1.5108704566955566, "learning_rate": 7.803055072513003e-06, "loss": 0.3225, "step": 9517 }, { "epoch": 1.1582598113781564, "grad_norm": 2.024376392364502, "learning_rate": 7.801152078293357e-06, "loss": 0.3914, "step": 9518 }, { "epoch": 1.1583815028901734, "grad_norm": 2.414236307144165, "learning_rate": 7.799249167744425e-06, "loss": 0.3444, "step": 9519 }, { "epoch": 1.1585031944021904, "grad_norm": 4.399750232696533, "learning_rate": 7.797346340938618e-06, "loss": 0.4668, "step": 9520 }, { "epoch": 1.1586248859142074, "grad_norm": 1.9857077598571777, "learning_rate": 7.795443597948343e-06, "loss": 0.3944, "step": 9521 }, { "epoch": 1.1587465774262244, "grad_norm": 1.38261079788208, "learning_rate": 7.793540938846e-06, "loss": 0.3501, "step": 9522 }, { "epoch": 1.1588682689382415, "grad_norm": 1.349539875984192, "learning_rate": 7.791638363703992e-06, "loss": 0.3397, "step": 9523 }, { "epoch": 1.1589899604502585, "grad_norm": 2.1774744987487793, "learning_rate": 7.789735872594714e-06, "loss": 0.3167, "step": 9524 }, { "epoch": 1.1591116519622757, "grad_norm": 1.8662327527999878, "learning_rate": 7.787833465590566e-06, "loss": 0.3149, "step": 9525 }, { "epoch": 1.1592333434742927, "grad_norm": 1.6271507740020752, "learning_rate": 7.78593114276393e-06, "loss": 0.4027, "step": 9526 }, { "epoch": 1.1593550349863098, "grad_norm": 2.30841064453125, "learning_rate": 7.784028904187195e-06, "loss": 0.4197, "step": 9527 }, { "epoch": 1.1594767264983268, "grad_norm": 1.694427728652954, "learning_rate": 7.782126749932753e-06, "loss": 0.3307, "step": 9528 }, { "epoch": 1.1595984180103438, "grad_norm": 2.769146203994751, "learning_rate": 7.780224680072978e-06, "loss": 0.4299, "step": 9529 }, { "epoch": 1.1597201095223608, "grad_norm": 1.845624327659607, "learning_rate": 7.778322694680245e-06, "loss": 0.4046, "step": 9530 }, { "epoch": 1.1598418010343778, "grad_norm": 2.73650860786438, "learning_rate": 7.776420793826932e-06, "loss": 0.4572, "step": 9531 }, { "epoch": 1.1599634925463949, "grad_norm": 1.5545141696929932, "learning_rate": 7.77451897758541e-06, "loss": 0.3782, "step": 9532 }, { "epoch": 1.1600851840584119, "grad_norm": 1.8445744514465332, "learning_rate": 7.772617246028052e-06, "loss": 0.3725, "step": 9533 }, { "epoch": 1.160206875570429, "grad_norm": 3.447798013687134, "learning_rate": 7.770715599227214e-06, "loss": 0.3537, "step": 9534 }, { "epoch": 1.160328567082446, "grad_norm": 1.9832454919815063, "learning_rate": 7.768814037255261e-06, "loss": 0.4088, "step": 9535 }, { "epoch": 1.1604502585944632, "grad_norm": 1.8081860542297363, "learning_rate": 7.766912560184555e-06, "loss": 0.3834, "step": 9536 }, { "epoch": 1.1605719501064802, "grad_norm": 2.8829517364501953, "learning_rate": 7.765011168087447e-06, "loss": 0.3133, "step": 9537 }, { "epoch": 1.1606936416184972, "grad_norm": 2.1916608810424805, "learning_rate": 7.76310986103629e-06, "loss": 0.3603, "step": 9538 }, { "epoch": 1.1608153331305142, "grad_norm": 1.6348036527633667, "learning_rate": 7.761208639103435e-06, "loss": 0.3911, "step": 9539 }, { "epoch": 1.1609370246425312, "grad_norm": 1.549636721611023, "learning_rate": 7.759307502361224e-06, "loss": 0.3496, "step": 9540 }, { "epoch": 1.1610587161545483, "grad_norm": 2.0951642990112305, "learning_rate": 7.757406450882002e-06, "loss": 0.3438, "step": 9541 }, { "epoch": 1.1611804076665653, "grad_norm": 2.578937530517578, "learning_rate": 7.755505484738103e-06, "loss": 0.3842, "step": 9542 }, { "epoch": 1.1613020991785823, "grad_norm": 1.7002606391906738, "learning_rate": 7.753604604001867e-06, "loss": 0.3123, "step": 9543 }, { "epoch": 1.1614237906905993, "grad_norm": 2.162013292312622, "learning_rate": 7.751703808745629e-06, "loss": 0.4071, "step": 9544 }, { "epoch": 1.1615454822026163, "grad_norm": 1.66376531124115, "learning_rate": 7.749803099041712e-06, "loss": 0.4031, "step": 9545 }, { "epoch": 1.1616671737146333, "grad_norm": 1.3813987970352173, "learning_rate": 7.747902474962444e-06, "loss": 0.3427, "step": 9546 }, { "epoch": 1.1617888652266504, "grad_norm": 2.2929234504699707, "learning_rate": 7.74600193658015e-06, "loss": 0.3727, "step": 9547 }, { "epoch": 1.1619105567386674, "grad_norm": 4.208395004272461, "learning_rate": 7.744101483967147e-06, "loss": 0.4426, "step": 9548 }, { "epoch": 1.1620322482506844, "grad_norm": 1.5069340467453003, "learning_rate": 7.742201117195751e-06, "loss": 0.3932, "step": 9549 }, { "epoch": 1.1621539397627014, "grad_norm": 1.8345972299575806, "learning_rate": 7.740300836338276e-06, "loss": 0.3631, "step": 9550 }, { "epoch": 1.1622756312747187, "grad_norm": 1.3778551816940308, "learning_rate": 7.738400641467037e-06, "loss": 0.3853, "step": 9551 }, { "epoch": 1.1623973227867357, "grad_norm": 2.1200578212738037, "learning_rate": 7.736500532654332e-06, "loss": 0.4271, "step": 9552 }, { "epoch": 1.1625190142987527, "grad_norm": 1.5334471464157104, "learning_rate": 7.734600509972465e-06, "loss": 0.4049, "step": 9553 }, { "epoch": 1.1626407058107697, "grad_norm": 3.031325340270996, "learning_rate": 7.732700573493736e-06, "loss": 0.2943, "step": 9554 }, { "epoch": 1.1627623973227867, "grad_norm": 1.5473443269729614, "learning_rate": 7.730800723290448e-06, "loss": 0.3508, "step": 9555 }, { "epoch": 1.1628840888348038, "grad_norm": 1.4994291067123413, "learning_rate": 7.728900959434885e-06, "loss": 0.3533, "step": 9556 }, { "epoch": 1.1630057803468208, "grad_norm": 2.1711630821228027, "learning_rate": 7.727001281999342e-06, "loss": 0.4169, "step": 9557 }, { "epoch": 1.1631274718588378, "grad_norm": 1.7278785705566406, "learning_rate": 7.725101691056107e-06, "loss": 0.4028, "step": 9558 }, { "epoch": 1.1632491633708548, "grad_norm": 1.880561113357544, "learning_rate": 7.72320218667746e-06, "loss": 0.3925, "step": 9559 }, { "epoch": 1.1633708548828718, "grad_norm": 1.623520851135254, "learning_rate": 7.721302768935683e-06, "loss": 0.3819, "step": 9560 }, { "epoch": 1.1634925463948889, "grad_norm": 1.408267617225647, "learning_rate": 7.719403437903053e-06, "loss": 0.4024, "step": 9561 }, { "epoch": 1.163614237906906, "grad_norm": 1.4975931644439697, "learning_rate": 7.717504193651843e-06, "loss": 0.3827, "step": 9562 }, { "epoch": 1.1637359294189231, "grad_norm": 1.5320889949798584, "learning_rate": 7.715605036254323e-06, "loss": 0.3858, "step": 9563 }, { "epoch": 1.1638576209309401, "grad_norm": 1.6493922472000122, "learning_rate": 7.71370596578276e-06, "loss": 0.3898, "step": 9564 }, { "epoch": 1.1639793124429572, "grad_norm": 2.1500160694122314, "learning_rate": 7.711806982309416e-06, "loss": 0.3732, "step": 9565 }, { "epoch": 1.1641010039549742, "grad_norm": 2.043473720550537, "learning_rate": 7.709908085906553e-06, "loss": 0.4098, "step": 9566 }, { "epoch": 1.1642226954669912, "grad_norm": 2.018477439880371, "learning_rate": 7.708009276646428e-06, "loss": 0.3647, "step": 9567 }, { "epoch": 1.1643443869790082, "grad_norm": 1.6066014766693115, "learning_rate": 7.706110554601293e-06, "loss": 0.3598, "step": 9568 }, { "epoch": 1.1644660784910252, "grad_norm": 1.5660622119903564, "learning_rate": 7.704211919843401e-06, "loss": 0.3656, "step": 9569 }, { "epoch": 1.1645877700030423, "grad_norm": 2.9562876224517822, "learning_rate": 7.702313372444998e-06, "loss": 0.4249, "step": 9570 }, { "epoch": 1.1647094615150593, "grad_norm": 1.541069507598877, "learning_rate": 7.700414912478324e-06, "loss": 0.3297, "step": 9571 }, { "epoch": 1.1648311530270763, "grad_norm": 3.7196342945098877, "learning_rate": 7.698516540015623e-06, "loss": 0.4506, "step": 9572 }, { "epoch": 1.1649528445390933, "grad_norm": 1.3341870307922363, "learning_rate": 7.696618255129135e-06, "loss": 0.3751, "step": 9573 }, { "epoch": 1.1650745360511103, "grad_norm": 1.5453259944915771, "learning_rate": 7.69472005789109e-06, "loss": 0.4101, "step": 9574 }, { "epoch": 1.1651962275631274, "grad_norm": 1.5480189323425293, "learning_rate": 7.692821948373718e-06, "loss": 0.3633, "step": 9575 }, { "epoch": 1.1653179190751446, "grad_norm": 2.057725191116333, "learning_rate": 7.690923926649242e-06, "loss": 0.3647, "step": 9576 }, { "epoch": 1.1654396105871616, "grad_norm": 1.6835273504257202, "learning_rate": 7.689025992789893e-06, "loss": 0.4442, "step": 9577 }, { "epoch": 1.1655613020991786, "grad_norm": 2.4741570949554443, "learning_rate": 7.687128146867891e-06, "loss": 0.3646, "step": 9578 }, { "epoch": 1.1656829936111957, "grad_norm": 2.169314384460449, "learning_rate": 7.685230388955447e-06, "loss": 0.4438, "step": 9579 }, { "epoch": 1.1658046851232127, "grad_norm": 2.268157958984375, "learning_rate": 7.683332719124778e-06, "loss": 0.313, "step": 9580 }, { "epoch": 1.1659263766352297, "grad_norm": 1.5645852088928223, "learning_rate": 7.681435137448095e-06, "loss": 0.3983, "step": 9581 }, { "epoch": 1.1660480681472467, "grad_norm": 1.926119089126587, "learning_rate": 7.679537643997604e-06, "loss": 0.4253, "step": 9582 }, { "epoch": 1.1661697596592637, "grad_norm": 2.4873692989349365, "learning_rate": 7.677640238845509e-06, "loss": 0.3784, "step": 9583 }, { "epoch": 1.1662914511712807, "grad_norm": 2.1079134941101074, "learning_rate": 7.675742922064008e-06, "loss": 0.3709, "step": 9584 }, { "epoch": 1.1664131426832978, "grad_norm": 1.2665895223617554, "learning_rate": 7.673845693725304e-06, "loss": 0.363, "step": 9585 }, { "epoch": 1.1665348341953148, "grad_norm": 2.4232044219970703, "learning_rate": 7.671948553901587e-06, "loss": 0.326, "step": 9586 }, { "epoch": 1.166656525707332, "grad_norm": 5.492870807647705, "learning_rate": 7.670051502665042e-06, "loss": 0.5059, "step": 9587 }, { "epoch": 1.166778217219349, "grad_norm": 1.7048954963684082, "learning_rate": 7.668154540087861e-06, "loss": 0.4105, "step": 9588 }, { "epoch": 1.166899908731366, "grad_norm": 1.9728702306747437, "learning_rate": 7.666257666242225e-06, "loss": 0.2991, "step": 9589 }, { "epoch": 1.167021600243383, "grad_norm": 1.2459090948104858, "learning_rate": 7.66436088120032e-06, "loss": 0.3501, "step": 9590 }, { "epoch": 1.1671432917554, "grad_norm": 1.744399905204773, "learning_rate": 7.662464185034313e-06, "loss": 0.3504, "step": 9591 }, { "epoch": 1.1672649832674171, "grad_norm": 1.4024866819381714, "learning_rate": 7.660567577816383e-06, "loss": 0.3744, "step": 9592 }, { "epoch": 1.1673866747794341, "grad_norm": 2.816185474395752, "learning_rate": 7.658671059618703e-06, "loss": 0.4085, "step": 9593 }, { "epoch": 1.1675083662914512, "grad_norm": 2.363020896911621, "learning_rate": 7.656774630513431e-06, "loss": 0.389, "step": 9594 }, { "epoch": 1.1676300578034682, "grad_norm": 2.158487558364868, "learning_rate": 7.654878290572737e-06, "loss": 0.3419, "step": 9595 }, { "epoch": 1.1677517493154852, "grad_norm": 1.329377293586731, "learning_rate": 7.652982039868777e-06, "loss": 0.3486, "step": 9596 }, { "epoch": 1.1678734408275022, "grad_norm": 1.7175960540771484, "learning_rate": 7.65108587847371e-06, "loss": 0.4068, "step": 9597 }, { "epoch": 1.1679951323395192, "grad_norm": 2.4583442211151123, "learning_rate": 7.649189806459687e-06, "loss": 0.4199, "step": 9598 }, { "epoch": 1.1681168238515363, "grad_norm": 2.073394775390625, "learning_rate": 7.647293823898858e-06, "loss": 0.411, "step": 9599 }, { "epoch": 1.1682385153635533, "grad_norm": 2.0896148681640625, "learning_rate": 7.645397930863366e-06, "loss": 0.4237, "step": 9600 }, { "epoch": 1.1683602068755705, "grad_norm": 1.3931843042373657, "learning_rate": 7.643502127425359e-06, "loss": 0.3768, "step": 9601 }, { "epoch": 1.1684818983875875, "grad_norm": 1.3291289806365967, "learning_rate": 7.641606413656974e-06, "loss": 0.3568, "step": 9602 }, { "epoch": 1.1686035898996046, "grad_norm": 2.253200054168701, "learning_rate": 7.639710789630344e-06, "loss": 0.2967, "step": 9603 }, { "epoch": 1.1687252814116216, "grad_norm": 2.661163091659546, "learning_rate": 7.637815255417606e-06, "loss": 0.4568, "step": 9604 }, { "epoch": 1.1688469729236386, "grad_norm": 1.7723489999771118, "learning_rate": 7.635919811090887e-06, "loss": 0.4396, "step": 9605 }, { "epoch": 1.1689686644356556, "grad_norm": 1.6135865449905396, "learning_rate": 7.63402445672231e-06, "loss": 0.3781, "step": 9606 }, { "epoch": 1.1690903559476726, "grad_norm": 1.3138494491577148, "learning_rate": 7.632129192384005e-06, "loss": 0.4024, "step": 9607 }, { "epoch": 1.1692120474596897, "grad_norm": 1.2561352252960205, "learning_rate": 7.63023401814808e-06, "loss": 0.3739, "step": 9608 }, { "epoch": 1.1693337389717067, "grad_norm": 2.543386459350586, "learning_rate": 7.628338934086662e-06, "loss": 0.3233, "step": 9609 }, { "epoch": 1.1694554304837237, "grad_norm": 4.182769775390625, "learning_rate": 7.626443940271853e-06, "loss": 0.3412, "step": 9610 }, { "epoch": 1.1695771219957407, "grad_norm": 3.469388008117676, "learning_rate": 7.624549036775764e-06, "loss": 0.3312, "step": 9611 }, { "epoch": 1.169698813507758, "grad_norm": 1.8135895729064941, "learning_rate": 7.622654223670502e-06, "loss": 0.3475, "step": 9612 }, { "epoch": 1.169820505019775, "grad_norm": 2.1234781742095947, "learning_rate": 7.6207595010281675e-06, "loss": 0.3414, "step": 9613 }, { "epoch": 1.169942196531792, "grad_norm": 1.6187366247177124, "learning_rate": 7.618864868920858e-06, "loss": 0.3791, "step": 9614 }, { "epoch": 1.170063888043809, "grad_norm": 2.2679715156555176, "learning_rate": 7.6169703274206685e-06, "loss": 0.3374, "step": 9615 }, { "epoch": 1.170185579555826, "grad_norm": 2.1995928287506104, "learning_rate": 7.615075876599692e-06, "loss": 0.3545, "step": 9616 }, { "epoch": 1.170307271067843, "grad_norm": 2.2592668533325195, "learning_rate": 7.613181516530015e-06, "loss": 0.4188, "step": 9617 }, { "epoch": 1.17042896257986, "grad_norm": 2.811978816986084, "learning_rate": 7.611287247283721e-06, "loss": 0.4238, "step": 9618 }, { "epoch": 1.170550654091877, "grad_norm": 3.0794565677642822, "learning_rate": 7.6093930689328935e-06, "loss": 0.3908, "step": 9619 }, { "epoch": 1.170672345603894, "grad_norm": 1.6768244504928589, "learning_rate": 7.607498981549609e-06, "loss": 0.2969, "step": 9620 }, { "epoch": 1.1707940371159111, "grad_norm": 2.241546869277954, "learning_rate": 7.605604985205937e-06, "loss": 0.3922, "step": 9621 }, { "epoch": 1.1709157286279281, "grad_norm": 3.904524326324463, "learning_rate": 7.603711079973952e-06, "loss": 0.4114, "step": 9622 }, { "epoch": 1.1710374201399452, "grad_norm": 3.861419200897217, "learning_rate": 7.60181726592572e-06, "loss": 0.4676, "step": 9623 }, { "epoch": 1.1711591116519622, "grad_norm": 2.285588502883911, "learning_rate": 7.599923543133307e-06, "loss": 0.4361, "step": 9624 }, { "epoch": 1.1712808031639792, "grad_norm": 1.8813375234603882, "learning_rate": 7.5980299116687695e-06, "loss": 0.4012, "step": 9625 }, { "epoch": 1.1714024946759964, "grad_norm": 1.4907963275909424, "learning_rate": 7.596136371604165e-06, "loss": 0.3759, "step": 9626 }, { "epoch": 1.1715241861880135, "grad_norm": 1.702423334121704, "learning_rate": 7.59424292301155e-06, "loss": 0.3906, "step": 9627 }, { "epoch": 1.1716458777000305, "grad_norm": 2.7096595764160156, "learning_rate": 7.592349565962968e-06, "loss": 0.3805, "step": 9628 }, { "epoch": 1.1717675692120475, "grad_norm": 1.9202237129211426, "learning_rate": 7.590456300530471e-06, "loss": 0.3681, "step": 9629 }, { "epoch": 1.1718892607240645, "grad_norm": 3.4236960411071777, "learning_rate": 7.588563126786099e-06, "loss": 0.3606, "step": 9630 }, { "epoch": 1.1720109522360815, "grad_norm": 2.2568109035491943, "learning_rate": 7.58667004480189e-06, "loss": 0.4256, "step": 9631 }, { "epoch": 1.1721326437480986, "grad_norm": 2.0422170162200928, "learning_rate": 7.584777054649886e-06, "loss": 0.3705, "step": 9632 }, { "epoch": 1.1722543352601156, "grad_norm": 1.7017064094543457, "learning_rate": 7.582884156402111e-06, "loss": 0.4099, "step": 9633 }, { "epoch": 1.1723760267721326, "grad_norm": 3.462963104248047, "learning_rate": 7.580991350130594e-06, "loss": 0.4323, "step": 9634 }, { "epoch": 1.1724977182841496, "grad_norm": 2.1411759853363037, "learning_rate": 7.579098635907367e-06, "loss": 0.4156, "step": 9635 }, { "epoch": 1.1726194097961666, "grad_norm": 1.6380159854888916, "learning_rate": 7.577206013804446e-06, "loss": 0.4219, "step": 9636 }, { "epoch": 1.1727411013081839, "grad_norm": 1.9403705596923828, "learning_rate": 7.575313483893851e-06, "loss": 0.4444, "step": 9637 }, { "epoch": 1.172862792820201, "grad_norm": 1.975189208984375, "learning_rate": 7.573421046247598e-06, "loss": 0.3766, "step": 9638 }, { "epoch": 1.172984484332218, "grad_norm": 1.2692707777023315, "learning_rate": 7.571528700937696e-06, "loss": 0.3673, "step": 9639 }, { "epoch": 1.173106175844235, "grad_norm": 1.503158688545227, "learning_rate": 7.569636448036154e-06, "loss": 0.4124, "step": 9640 }, { "epoch": 1.173227867356252, "grad_norm": 1.2525988817214966, "learning_rate": 7.567744287614976e-06, "loss": 0.3587, "step": 9641 }, { "epoch": 1.173349558868269, "grad_norm": 1.385740041732788, "learning_rate": 7.565852219746162e-06, "loss": 0.3972, "step": 9642 }, { "epoch": 1.173471250380286, "grad_norm": 1.5023621320724487, "learning_rate": 7.563960244501714e-06, "loss": 0.3456, "step": 9643 }, { "epoch": 1.173592941892303, "grad_norm": 1.666089415550232, "learning_rate": 7.562068361953614e-06, "loss": 0.3258, "step": 9644 }, { "epoch": 1.17371463340432, "grad_norm": 1.487396001815796, "learning_rate": 7.5601765721738605e-06, "loss": 0.3475, "step": 9645 }, { "epoch": 1.173836324916337, "grad_norm": 1.9156768321990967, "learning_rate": 7.558284875234441e-06, "loss": 0.3504, "step": 9646 }, { "epoch": 1.173958016428354, "grad_norm": 2.0139801502227783, "learning_rate": 7.556393271207334e-06, "loss": 0.3415, "step": 9647 }, { "epoch": 1.174079707940371, "grad_norm": 2.9057841300964355, "learning_rate": 7.554501760164521e-06, "loss": 0.3603, "step": 9648 }, { "epoch": 1.174201399452388, "grad_norm": 1.9443708658218384, "learning_rate": 7.5526103421779785e-06, "loss": 0.4088, "step": 9649 }, { "epoch": 1.1743230909644051, "grad_norm": 2.8054327964782715, "learning_rate": 7.55071901731968e-06, "loss": 0.4205, "step": 9650 }, { "epoch": 1.1744447824764224, "grad_norm": 1.5990451574325562, "learning_rate": 7.54882778566159e-06, "loss": 0.373, "step": 9651 }, { "epoch": 1.1745664739884394, "grad_norm": 2.325359582901001, "learning_rate": 7.546936647275676e-06, "loss": 0.3566, "step": 9652 }, { "epoch": 1.1746881655004564, "grad_norm": 1.5325226783752441, "learning_rate": 7.545045602233904e-06, "loss": 0.331, "step": 9653 }, { "epoch": 1.1748098570124734, "grad_norm": 1.6761375665664673, "learning_rate": 7.543154650608224e-06, "loss": 0.409, "step": 9654 }, { "epoch": 1.1749315485244904, "grad_norm": 3.7169172763824463, "learning_rate": 7.541263792470601e-06, "loss": 0.4699, "step": 9655 }, { "epoch": 1.1750532400365075, "grad_norm": 1.3243436813354492, "learning_rate": 7.539373027892976e-06, "loss": 0.3336, "step": 9656 }, { "epoch": 1.1751749315485245, "grad_norm": 1.3653485774993896, "learning_rate": 7.537482356947299e-06, "loss": 0.3249, "step": 9657 }, { "epoch": 1.1752966230605415, "grad_norm": 2.285682439804077, "learning_rate": 7.535591779705518e-06, "loss": 0.4148, "step": 9658 }, { "epoch": 1.1754183145725585, "grad_norm": 1.385016918182373, "learning_rate": 7.533701296239568e-06, "loss": 0.3052, "step": 9659 }, { "epoch": 1.1755400060845755, "grad_norm": 1.9938673973083496, "learning_rate": 7.531810906621391e-06, "loss": 0.374, "step": 9660 }, { "epoch": 1.1756616975965926, "grad_norm": 1.736778736114502, "learning_rate": 7.529920610922918e-06, "loss": 0.4145, "step": 9661 }, { "epoch": 1.1757833891086096, "grad_norm": 2.017746686935425, "learning_rate": 7.5280304092160775e-06, "loss": 0.4215, "step": 9662 }, { "epoch": 1.1759050806206268, "grad_norm": 1.3665560483932495, "learning_rate": 7.526140301572795e-06, "loss": 0.3773, "step": 9663 }, { "epoch": 1.1760267721326438, "grad_norm": 1.4421672821044922, "learning_rate": 7.524250288064998e-06, "loss": 0.372, "step": 9664 }, { "epoch": 1.1761484636446609, "grad_norm": 1.7199981212615967, "learning_rate": 7.522360368764599e-06, "loss": 0.3864, "step": 9665 }, { "epoch": 1.1762701551566779, "grad_norm": 1.7247111797332764, "learning_rate": 7.520470543743522e-06, "loss": 0.3953, "step": 9666 }, { "epoch": 1.176391846668695, "grad_norm": 1.8313848972320557, "learning_rate": 7.518580813073668e-06, "loss": 0.3981, "step": 9667 }, { "epoch": 1.176513538180712, "grad_norm": 1.5674959421157837, "learning_rate": 7.516691176826951e-06, "loss": 0.4093, "step": 9668 }, { "epoch": 1.176635229692729, "grad_norm": 2.8634324073791504, "learning_rate": 7.514801635075277e-06, "loss": 0.3387, "step": 9669 }, { "epoch": 1.176756921204746, "grad_norm": 1.6176297664642334, "learning_rate": 7.512912187890542e-06, "loss": 0.3732, "step": 9670 }, { "epoch": 1.176878612716763, "grad_norm": 1.6058251857757568, "learning_rate": 7.511022835344647e-06, "loss": 0.394, "step": 9671 }, { "epoch": 1.17700030422878, "grad_norm": 1.987532138824463, "learning_rate": 7.509133577509486e-06, "loss": 0.3916, "step": 9672 }, { "epoch": 1.177121995740797, "grad_norm": 2.52313494682312, "learning_rate": 7.507244414456947e-06, "loss": 0.301, "step": 9673 }, { "epoch": 1.177243687252814, "grad_norm": 1.3120063543319702, "learning_rate": 7.505355346258918e-06, "loss": 0.3575, "step": 9674 }, { "epoch": 1.177365378764831, "grad_norm": 2.4810423851013184, "learning_rate": 7.50346637298728e-06, "loss": 0.352, "step": 9675 }, { "epoch": 1.177487070276848, "grad_norm": 2.557352066040039, "learning_rate": 7.5015774947139185e-06, "loss": 0.4296, "step": 9676 }, { "epoch": 1.1776087617888653, "grad_norm": 1.4737499952316284, "learning_rate": 7.499688711510702e-06, "loss": 0.3795, "step": 9677 }, { "epoch": 1.1777304533008823, "grad_norm": 1.374341607093811, "learning_rate": 7.497800023449509e-06, "loss": 0.3717, "step": 9678 }, { "epoch": 1.1778521448128993, "grad_norm": 2.6653521060943604, "learning_rate": 7.4959114306022005e-06, "loss": 0.3967, "step": 9679 }, { "epoch": 1.1779738363249164, "grad_norm": 1.7362717390060425, "learning_rate": 7.494022933040646e-06, "loss": 0.3598, "step": 9680 }, { "epoch": 1.1780955278369334, "grad_norm": 3.3668129444122314, "learning_rate": 7.492134530836705e-06, "loss": 0.4189, "step": 9681 }, { "epoch": 1.1782172193489504, "grad_norm": 3.8593332767486572, "learning_rate": 7.4902462240622364e-06, "loss": 0.4334, "step": 9682 }, { "epoch": 1.1783389108609674, "grad_norm": 2.8784804344177246, "learning_rate": 7.488358012789094e-06, "loss": 0.4429, "step": 9683 }, { "epoch": 1.1784606023729844, "grad_norm": 1.2115579843521118, "learning_rate": 7.4864698970891305e-06, "loss": 0.3521, "step": 9684 }, { "epoch": 1.1785822938850015, "grad_norm": 2.027649402618408, "learning_rate": 7.484581877034187e-06, "loss": 0.3684, "step": 9685 }, { "epoch": 1.1787039853970185, "grad_norm": 2.467210292816162, "learning_rate": 7.48269395269611e-06, "loss": 0.3297, "step": 9686 }, { "epoch": 1.1788256769090355, "grad_norm": 1.1987730264663696, "learning_rate": 7.48080612414674e-06, "loss": 0.3585, "step": 9687 }, { "epoch": 1.1789473684210527, "grad_norm": 1.5257595777511597, "learning_rate": 7.47891839145791e-06, "loss": 0.4022, "step": 9688 }, { "epoch": 1.1790690599330698, "grad_norm": 2.9447364807128906, "learning_rate": 7.477030754701454e-06, "loss": 0.3705, "step": 9689 }, { "epoch": 1.1791907514450868, "grad_norm": 2.2697343826293945, "learning_rate": 7.475143213949204e-06, "loss": 0.3544, "step": 9690 }, { "epoch": 1.1793124429571038, "grad_norm": 2.7392823696136475, "learning_rate": 7.473255769272977e-06, "loss": 0.4308, "step": 9691 }, { "epoch": 1.1794341344691208, "grad_norm": 2.2082958221435547, "learning_rate": 7.4713684207446e-06, "loss": 0.3614, "step": 9692 }, { "epoch": 1.1795558259811378, "grad_norm": 1.5046038627624512, "learning_rate": 7.469481168435886e-06, "loss": 0.3863, "step": 9693 }, { "epoch": 1.1796775174931549, "grad_norm": 2.409872055053711, "learning_rate": 7.4675940124186504e-06, "loss": 0.438, "step": 9694 }, { "epoch": 1.1797992090051719, "grad_norm": 1.4781765937805176, "learning_rate": 7.465706952764708e-06, "loss": 0.3759, "step": 9695 }, { "epoch": 1.179920900517189, "grad_norm": 1.8360564708709717, "learning_rate": 7.463819989545859e-06, "loss": 0.3712, "step": 9696 }, { "epoch": 1.180042592029206, "grad_norm": 1.7445881366729736, "learning_rate": 7.461933122833911e-06, "loss": 0.4061, "step": 9697 }, { "epoch": 1.180164283541223, "grad_norm": 1.9159667491912842, "learning_rate": 7.460046352700661e-06, "loss": 0.3885, "step": 9698 }, { "epoch": 1.18028597505324, "grad_norm": 2.351688861846924, "learning_rate": 7.4581596792179034e-06, "loss": 0.4035, "step": 9699 }, { "epoch": 1.180407666565257, "grad_norm": 3.483651638031006, "learning_rate": 7.456273102457432e-06, "loss": 0.4115, "step": 9700 }, { "epoch": 1.180529358077274, "grad_norm": 1.9175450801849365, "learning_rate": 7.454386622491037e-06, "loss": 0.3779, "step": 9701 }, { "epoch": 1.1806510495892912, "grad_norm": 2.3120839595794678, "learning_rate": 7.4525002393904965e-06, "loss": 0.356, "step": 9702 }, { "epoch": 1.1807727411013083, "grad_norm": 2.721449851989746, "learning_rate": 7.4506139532275965e-06, "loss": 0.3882, "step": 9703 }, { "epoch": 1.1808944326133253, "grad_norm": 2.6561784744262695, "learning_rate": 7.448727764074112e-06, "loss": 0.3323, "step": 9704 }, { "epoch": 1.1810161241253423, "grad_norm": 1.5090900659561157, "learning_rate": 7.446841672001815e-06, "loss": 0.4024, "step": 9705 }, { "epoch": 1.1811378156373593, "grad_norm": 1.976524829864502, "learning_rate": 7.4449556770824814e-06, "loss": 0.3893, "step": 9706 }, { "epoch": 1.1812595071493763, "grad_norm": 1.6574209928512573, "learning_rate": 7.44306977938787e-06, "loss": 0.3967, "step": 9707 }, { "epoch": 1.1813811986613934, "grad_norm": 1.6262218952178955, "learning_rate": 7.441183978989745e-06, "loss": 0.3693, "step": 9708 }, { "epoch": 1.1815028901734104, "grad_norm": 2.3745126724243164, "learning_rate": 7.439298275959866e-06, "loss": 0.3243, "step": 9709 }, { "epoch": 1.1816245816854274, "grad_norm": 1.668454647064209, "learning_rate": 7.437412670369992e-06, "loss": 0.3735, "step": 9710 }, { "epoch": 1.1817462731974444, "grad_norm": 1.3087494373321533, "learning_rate": 7.435527162291867e-06, "loss": 0.3566, "step": 9711 }, { "epoch": 1.1818679647094614, "grad_norm": 2.0906765460968018, "learning_rate": 7.433641751797241e-06, "loss": 0.3043, "step": 9712 }, { "epoch": 1.1819896562214787, "grad_norm": 2.554093837738037, "learning_rate": 7.4317564389578624e-06, "loss": 0.4262, "step": 9713 }, { "epoch": 1.1821113477334957, "grad_norm": 1.4899507761001587, "learning_rate": 7.429871223845466e-06, "loss": 0.3661, "step": 9714 }, { "epoch": 1.1822330392455127, "grad_norm": 2.2557945251464844, "learning_rate": 7.4279861065317885e-06, "loss": 0.3676, "step": 9715 }, { "epoch": 1.1823547307575297, "grad_norm": 1.3241007328033447, "learning_rate": 7.426101087088562e-06, "loss": 0.3787, "step": 9716 }, { "epoch": 1.1824764222695467, "grad_norm": 2.7104976177215576, "learning_rate": 7.4242161655875165e-06, "loss": 0.4282, "step": 9717 }, { "epoch": 1.1825981137815638, "grad_norm": 2.966212034225464, "learning_rate": 7.42233134210038e-06, "loss": 0.4097, "step": 9718 }, { "epoch": 1.1827198052935808, "grad_norm": 1.4211074113845825, "learning_rate": 7.420446616698869e-06, "loss": 0.357, "step": 9719 }, { "epoch": 1.1828414968055978, "grad_norm": 1.311387538909912, "learning_rate": 7.418561989454705e-06, "loss": 0.3527, "step": 9720 }, { "epoch": 1.1829631883176148, "grad_norm": 1.228264570236206, "learning_rate": 7.4166774604396e-06, "loss": 0.3358, "step": 9721 }, { "epoch": 1.1830848798296318, "grad_norm": 1.6796833276748657, "learning_rate": 7.414793029725265e-06, "loss": 0.369, "step": 9722 }, { "epoch": 1.1832065713416489, "grad_norm": 1.2923469543457031, "learning_rate": 7.4129086973834055e-06, "loss": 0.3645, "step": 9723 }, { "epoch": 1.1833282628536659, "grad_norm": 1.2359907627105713, "learning_rate": 7.4110244634857295e-06, "loss": 0.3565, "step": 9724 }, { "epoch": 1.183449954365683, "grad_norm": 2.5839452743530273, "learning_rate": 7.409140328103927e-06, "loss": 0.396, "step": 9725 }, { "epoch": 1.1835716458777, "grad_norm": 2.000016450881958, "learning_rate": 7.407256291309702e-06, "loss": 0.3948, "step": 9726 }, { "epoch": 1.1836933373897172, "grad_norm": 1.8811098337173462, "learning_rate": 7.4053723531747355e-06, "loss": 0.4016, "step": 9727 }, { "epoch": 1.1838150289017342, "grad_norm": 1.4274972677230835, "learning_rate": 7.403488513770724e-06, "loss": 0.4323, "step": 9728 }, { "epoch": 1.1839367204137512, "grad_norm": 1.712700366973877, "learning_rate": 7.401604773169352e-06, "loss": 0.3595, "step": 9729 }, { "epoch": 1.1840584119257682, "grad_norm": 1.2554993629455566, "learning_rate": 7.399721131442294e-06, "loss": 0.3462, "step": 9730 }, { "epoch": 1.1841801034377852, "grad_norm": 2.3004250526428223, "learning_rate": 7.397837588661229e-06, "loss": 0.3885, "step": 9731 }, { "epoch": 1.1843017949498023, "grad_norm": 2.6135618686676025, "learning_rate": 7.395954144897831e-06, "loss": 0.3391, "step": 9732 }, { "epoch": 1.1844234864618193, "grad_norm": 1.9746272563934326, "learning_rate": 7.3940708002237675e-06, "loss": 0.4412, "step": 9733 }, { "epoch": 1.1845451779738363, "grad_norm": 3.0108489990234375, "learning_rate": 7.3921875547107035e-06, "loss": 0.3767, "step": 9734 }, { "epoch": 1.1846668694858533, "grad_norm": 1.8141714334487915, "learning_rate": 7.3903044084303e-06, "loss": 0.3114, "step": 9735 }, { "epoch": 1.1847885609978703, "grad_norm": 1.6276839971542358, "learning_rate": 7.388421361454221e-06, "loss": 0.3967, "step": 9736 }, { "epoch": 1.1849102525098874, "grad_norm": 1.4108457565307617, "learning_rate": 7.386538413854112e-06, "loss": 0.3832, "step": 9737 }, { "epoch": 1.1850319440219046, "grad_norm": 1.443261742591858, "learning_rate": 7.384655565701624e-06, "loss": 0.3648, "step": 9738 }, { "epoch": 1.1851536355339216, "grad_norm": 1.2201173305511475, "learning_rate": 7.3827728170684045e-06, "loss": 0.3359, "step": 9739 }, { "epoch": 1.1852753270459386, "grad_norm": 3.517454147338867, "learning_rate": 7.3808901680261e-06, "loss": 0.4007, "step": 9740 }, { "epoch": 1.1853970185579557, "grad_norm": 1.5070616006851196, "learning_rate": 7.379007618646343e-06, "loss": 0.3682, "step": 9741 }, { "epoch": 1.1855187100699727, "grad_norm": 1.5357564687728882, "learning_rate": 7.377125169000772e-06, "loss": 0.3386, "step": 9742 }, { "epoch": 1.1856404015819897, "grad_norm": 2.7693560123443604, "learning_rate": 7.375242819161017e-06, "loss": 0.4197, "step": 9743 }, { "epoch": 1.1857620930940067, "grad_norm": 1.6653414964675903, "learning_rate": 7.373360569198706e-06, "loss": 0.3906, "step": 9744 }, { "epoch": 1.1858837846060237, "grad_norm": 2.5514349937438965, "learning_rate": 7.371478419185462e-06, "loss": 0.3186, "step": 9745 }, { "epoch": 1.1860054761180407, "grad_norm": 1.5120397806167603, "learning_rate": 7.369596369192905e-06, "loss": 0.3917, "step": 9746 }, { "epoch": 1.1861271676300578, "grad_norm": 1.4133343696594238, "learning_rate": 7.3677144192926555e-06, "loss": 0.3929, "step": 9747 }, { "epoch": 1.1862488591420748, "grad_norm": 1.445682168006897, "learning_rate": 7.365832569556317e-06, "loss": 0.3389, "step": 9748 }, { "epoch": 1.1863705506540918, "grad_norm": 2.119556427001953, "learning_rate": 7.363950820055501e-06, "loss": 0.4039, "step": 9749 }, { "epoch": 1.1864922421661088, "grad_norm": 1.8560068607330322, "learning_rate": 7.362069170861812e-06, "loss": 0.3172, "step": 9750 }, { "epoch": 1.1866139336781258, "grad_norm": 1.8694087266921997, "learning_rate": 7.360187622046851e-06, "loss": 0.3566, "step": 9751 }, { "epoch": 1.186735625190143, "grad_norm": 1.401943564414978, "learning_rate": 7.358306173682217e-06, "loss": 0.3761, "step": 9752 }, { "epoch": 1.18685731670216, "grad_norm": 1.337256908416748, "learning_rate": 7.356424825839501e-06, "loss": 0.3357, "step": 9753 }, { "epoch": 1.1869790082141771, "grad_norm": 1.3931041955947876, "learning_rate": 7.35454357859029e-06, "loss": 0.3937, "step": 9754 }, { "epoch": 1.1871006997261941, "grad_norm": 4.364648818969727, "learning_rate": 7.352662432006175e-06, "loss": 0.4306, "step": 9755 }, { "epoch": 1.1872223912382112, "grad_norm": 1.8168553113937378, "learning_rate": 7.3507813861587315e-06, "loss": 0.3763, "step": 9756 }, { "epoch": 1.1873440827502282, "grad_norm": 1.891121745109558, "learning_rate": 7.3489004411195395e-06, "loss": 0.3256, "step": 9757 }, { "epoch": 1.1874657742622452, "grad_norm": 1.7567882537841797, "learning_rate": 7.347019596960177e-06, "loss": 0.3887, "step": 9758 }, { "epoch": 1.1875874657742622, "grad_norm": 1.993302822113037, "learning_rate": 7.345138853752207e-06, "loss": 0.3819, "step": 9759 }, { "epoch": 1.1877091572862792, "grad_norm": 1.6605867147445679, "learning_rate": 7.343258211567201e-06, "loss": 0.4151, "step": 9760 }, { "epoch": 1.1878308487982963, "grad_norm": 1.5491313934326172, "learning_rate": 7.341377670476717e-06, "loss": 0.4117, "step": 9761 }, { "epoch": 1.1879525403103133, "grad_norm": 3.200650691986084, "learning_rate": 7.339497230552316e-06, "loss": 0.4397, "step": 9762 }, { "epoch": 1.1880742318223303, "grad_norm": 2.271812915802002, "learning_rate": 7.3376168918655536e-06, "loss": 0.3352, "step": 9763 }, { "epoch": 1.1881959233343475, "grad_norm": 2.0693397521972656, "learning_rate": 7.335736654487978e-06, "loss": 0.3671, "step": 9764 }, { "epoch": 1.1883176148463646, "grad_norm": 2.6654064655303955, "learning_rate": 7.333856518491137e-06, "loss": 0.3299, "step": 9765 }, { "epoch": 1.1884393063583816, "grad_norm": 1.5677542686462402, "learning_rate": 7.331976483946577e-06, "loss": 0.4042, "step": 9766 }, { "epoch": 1.1885609978703986, "grad_norm": 1.3392091989517212, "learning_rate": 7.330096550925831e-06, "loss": 0.3886, "step": 9767 }, { "epoch": 1.1886826893824156, "grad_norm": 1.5454658269882202, "learning_rate": 7.328216719500437e-06, "loss": 0.3749, "step": 9768 }, { "epoch": 1.1888043808944326, "grad_norm": 1.7193052768707275, "learning_rate": 7.326336989741929e-06, "loss": 0.4039, "step": 9769 }, { "epoch": 1.1889260724064497, "grad_norm": 1.9421888589859009, "learning_rate": 7.324457361721835e-06, "loss": 0.305, "step": 9770 }, { "epoch": 1.1890477639184667, "grad_norm": 1.9311881065368652, "learning_rate": 7.322577835511676e-06, "loss": 0.3984, "step": 9771 }, { "epoch": 1.1891694554304837, "grad_norm": 1.4759806394577026, "learning_rate": 7.32069841118297e-06, "loss": 0.3929, "step": 9772 }, { "epoch": 1.1892911469425007, "grad_norm": 1.4620578289031982, "learning_rate": 7.318819088807234e-06, "loss": 0.381, "step": 9773 }, { "epoch": 1.1894128384545177, "grad_norm": 2.0390918254852295, "learning_rate": 7.316939868455985e-06, "loss": 0.4152, "step": 9774 }, { "epoch": 1.1895345299665347, "grad_norm": 2.3117787837982178, "learning_rate": 7.3150607502007246e-06, "loss": 0.3536, "step": 9775 }, { "epoch": 1.1896562214785518, "grad_norm": 2.2581448554992676, "learning_rate": 7.313181734112961e-06, "loss": 0.3817, "step": 9776 }, { "epoch": 1.1897779129905688, "grad_norm": 1.3782665729522705, "learning_rate": 7.311302820264193e-06, "loss": 0.3219, "step": 9777 }, { "epoch": 1.189899604502586, "grad_norm": 1.7863227128982544, "learning_rate": 7.3094240087259205e-06, "loss": 0.3513, "step": 9778 }, { "epoch": 1.190021296014603, "grad_norm": 1.720304250717163, "learning_rate": 7.307545299569631e-06, "loss": 0.4215, "step": 9779 }, { "epoch": 1.19014298752662, "grad_norm": 2.7664988040924072, "learning_rate": 7.305666692866817e-06, "loss": 0.4042, "step": 9780 }, { "epoch": 1.190264679038637, "grad_norm": 2.4821979999542236, "learning_rate": 7.303788188688964e-06, "loss": 0.3629, "step": 9781 }, { "epoch": 1.190386370550654, "grad_norm": 1.498619794845581, "learning_rate": 7.301909787107553e-06, "loss": 0.3873, "step": 9782 }, { "epoch": 1.1905080620626711, "grad_norm": 2.3452537059783936, "learning_rate": 7.300031488194055e-06, "loss": 0.3863, "step": 9783 }, { "epoch": 1.1906297535746881, "grad_norm": 1.4993228912353516, "learning_rate": 7.298153292019948e-06, "loss": 0.3699, "step": 9784 }, { "epoch": 1.1907514450867052, "grad_norm": 2.058849811553955, "learning_rate": 7.296275198656701e-06, "loss": 0.3875, "step": 9785 }, { "epoch": 1.1908731365987222, "grad_norm": 3.372253894805908, "learning_rate": 7.2943972081757805e-06, "loss": 0.3094, "step": 9786 }, { "epoch": 1.1909948281107392, "grad_norm": 3.1086041927337646, "learning_rate": 7.292519320648646e-06, "loss": 0.4501, "step": 9787 }, { "epoch": 1.1911165196227562, "grad_norm": 2.2981343269348145, "learning_rate": 7.290641536146753e-06, "loss": 0.3557, "step": 9788 }, { "epoch": 1.1912382111347735, "grad_norm": 1.3525437116622925, "learning_rate": 7.28876385474156e-06, "loss": 0.3874, "step": 9789 }, { "epoch": 1.1913599026467905, "grad_norm": 2.6758930683135986, "learning_rate": 7.286886276504514e-06, "loss": 0.3771, "step": 9790 }, { "epoch": 1.1914815941588075, "grad_norm": 1.3250406980514526, "learning_rate": 7.285008801507061e-06, "loss": 0.3683, "step": 9791 }, { "epoch": 1.1916032856708245, "grad_norm": 2.3617355823516846, "learning_rate": 7.283131429820644e-06, "loss": 0.4301, "step": 9792 }, { "epoch": 1.1917249771828415, "grad_norm": 1.3408892154693604, "learning_rate": 7.2812541615166995e-06, "loss": 0.4119, "step": 9793 }, { "epoch": 1.1918466686948586, "grad_norm": 1.8691617250442505, "learning_rate": 7.279376996666662e-06, "loss": 0.3411, "step": 9794 }, { "epoch": 1.1919683602068756, "grad_norm": 2.074599027633667, "learning_rate": 7.277499935341959e-06, "loss": 0.4041, "step": 9795 }, { "epoch": 1.1920900517188926, "grad_norm": 1.9167262315750122, "learning_rate": 7.2756229776140185e-06, "loss": 0.4124, "step": 9796 }, { "epoch": 1.1922117432309096, "grad_norm": 1.5892618894577026, "learning_rate": 7.273746123554265e-06, "loss": 0.3583, "step": 9797 }, { "epoch": 1.1923334347429266, "grad_norm": 1.6339491605758667, "learning_rate": 7.271869373234113e-06, "loss": 0.3575, "step": 9798 }, { "epoch": 1.1924551262549437, "grad_norm": 2.667067289352417, "learning_rate": 7.269992726724977e-06, "loss": 0.3877, "step": 9799 }, { "epoch": 1.1925768177669607, "grad_norm": 1.3575387001037598, "learning_rate": 7.268116184098273e-06, "loss": 0.4284, "step": 9800 }, { "epoch": 1.1926985092789777, "grad_norm": 2.481475830078125, "learning_rate": 7.266239745425398e-06, "loss": 0.3566, "step": 9801 }, { "epoch": 1.1928202007909947, "grad_norm": 2.239607810974121, "learning_rate": 7.264363410777761e-06, "loss": 0.4492, "step": 9802 }, { "epoch": 1.192941892303012, "grad_norm": 2.346715211868286, "learning_rate": 7.262487180226758e-06, "loss": 0.3635, "step": 9803 }, { "epoch": 1.193063583815029, "grad_norm": 1.6116138696670532, "learning_rate": 7.260611053843787e-06, "loss": 0.38, "step": 9804 }, { "epoch": 1.193185275327046, "grad_norm": 1.5168038606643677, "learning_rate": 7.258735031700237e-06, "loss": 0.4297, "step": 9805 }, { "epoch": 1.193306966839063, "grad_norm": 1.8599116802215576, "learning_rate": 7.256859113867489e-06, "loss": 0.3472, "step": 9806 }, { "epoch": 1.19342865835108, "grad_norm": 1.5954008102416992, "learning_rate": 7.25498330041693e-06, "loss": 0.3332, "step": 9807 }, { "epoch": 1.193550349863097, "grad_norm": 2.0614848136901855, "learning_rate": 7.25310759141994e-06, "loss": 0.4302, "step": 9808 }, { "epoch": 1.193672041375114, "grad_norm": 1.8109195232391357, "learning_rate": 7.251231986947891e-06, "loss": 0.4274, "step": 9809 }, { "epoch": 1.193793732887131, "grad_norm": 2.0193774700164795, "learning_rate": 7.249356487072153e-06, "loss": 0.3392, "step": 9810 }, { "epoch": 1.193915424399148, "grad_norm": 1.418201208114624, "learning_rate": 7.2474810918640945e-06, "loss": 0.349, "step": 9811 }, { "epoch": 1.1940371159111651, "grad_norm": 1.9252541065216064, "learning_rate": 7.245605801395081e-06, "loss": 0.3656, "step": 9812 }, { "epoch": 1.1941588074231821, "grad_norm": 1.3925299644470215, "learning_rate": 7.243730615736464e-06, "loss": 0.3959, "step": 9813 }, { "epoch": 1.1942804989351994, "grad_norm": 2.263493776321411, "learning_rate": 7.2418555349596035e-06, "loss": 0.3544, "step": 9814 }, { "epoch": 1.1944021904472164, "grad_norm": 1.6489735841751099, "learning_rate": 7.239980559135851e-06, "loss": 0.3908, "step": 9815 }, { "epoch": 1.1945238819592334, "grad_norm": 1.5460681915283203, "learning_rate": 7.238105688336554e-06, "loss": 0.3676, "step": 9816 }, { "epoch": 1.1946455734712504, "grad_norm": 1.6218175888061523, "learning_rate": 7.2362309226330474e-06, "loss": 0.4264, "step": 9817 }, { "epoch": 1.1947672649832675, "grad_norm": 2.4622278213500977, "learning_rate": 7.234356262096675e-06, "loss": 0.354, "step": 9818 }, { "epoch": 1.1948889564952845, "grad_norm": 2.2065672874450684, "learning_rate": 7.232481706798771e-06, "loss": 0.4001, "step": 9819 }, { "epoch": 1.1950106480073015, "grad_norm": 2.0798799991607666, "learning_rate": 7.230607256810669e-06, "loss": 0.425, "step": 9820 }, { "epoch": 1.1951323395193185, "grad_norm": 1.8250443935394287, "learning_rate": 7.228732912203691e-06, "loss": 0.3903, "step": 9821 }, { "epoch": 1.1952540310313355, "grad_norm": 2.283653497695923, "learning_rate": 7.2268586730491615e-06, "loss": 0.4084, "step": 9822 }, { "epoch": 1.1953757225433526, "grad_norm": 1.6580002307891846, "learning_rate": 7.224984539418402e-06, "loss": 0.4022, "step": 9823 }, { "epoch": 1.1954974140553696, "grad_norm": 1.3858612775802612, "learning_rate": 7.2231105113827226e-06, "loss": 0.3303, "step": 9824 }, { "epoch": 1.1956191055673866, "grad_norm": 1.97579824924469, "learning_rate": 7.2212365890134365e-06, "loss": 0.4339, "step": 9825 }, { "epoch": 1.1957407970794036, "grad_norm": 3.906874895095825, "learning_rate": 7.219362772381851e-06, "loss": 0.4547, "step": 9826 }, { "epoch": 1.1958624885914206, "grad_norm": 1.5980494022369385, "learning_rate": 7.217489061559266e-06, "loss": 0.3882, "step": 9827 }, { "epoch": 1.1959841801034379, "grad_norm": 1.6412523984909058, "learning_rate": 7.215615456616987e-06, "loss": 0.4059, "step": 9828 }, { "epoch": 1.196105871615455, "grad_norm": 1.7347638607025146, "learning_rate": 7.213741957626297e-06, "loss": 0.3622, "step": 9829 }, { "epoch": 1.196227563127472, "grad_norm": 2.732839345932007, "learning_rate": 7.2118685646584955e-06, "loss": 0.3609, "step": 9830 }, { "epoch": 1.196349254639489, "grad_norm": 2.1362144947052, "learning_rate": 7.209995277784866e-06, "loss": 0.3514, "step": 9831 }, { "epoch": 1.196470946151506, "grad_norm": 1.543515682220459, "learning_rate": 7.20812209707669e-06, "loss": 0.3733, "step": 9832 }, { "epoch": 1.196592637663523, "grad_norm": 1.354123592376709, "learning_rate": 7.2062490226052464e-06, "loss": 0.3699, "step": 9833 }, { "epoch": 1.19671432917554, "grad_norm": 1.7345744371414185, "learning_rate": 7.204376054441813e-06, "loss": 0.3448, "step": 9834 }, { "epoch": 1.196836020687557, "grad_norm": 3.250802993774414, "learning_rate": 7.202503192657655e-06, "loss": 0.4241, "step": 9835 }, { "epoch": 1.196957712199574, "grad_norm": 2.1031477451324463, "learning_rate": 7.200630437324041e-06, "loss": 0.391, "step": 9836 }, { "epoch": 1.197079403711591, "grad_norm": 1.759873628616333, "learning_rate": 7.198757788512233e-06, "loss": 0.36, "step": 9837 }, { "epoch": 1.197201095223608, "grad_norm": 2.229681968688965, "learning_rate": 7.196885246293492e-06, "loss": 0.4182, "step": 9838 }, { "epoch": 1.1973227867356253, "grad_norm": 2.7650949954986572, "learning_rate": 7.19501281073907e-06, "loss": 0.4363, "step": 9839 }, { "epoch": 1.1974444782476423, "grad_norm": 1.41323721408844, "learning_rate": 7.193140481920215e-06, "loss": 0.3438, "step": 9840 }, { "epoch": 1.1975661697596593, "grad_norm": 2.160066843032837, "learning_rate": 7.191268259908173e-06, "loss": 0.4054, "step": 9841 }, { "epoch": 1.1976878612716764, "grad_norm": 2.335728168487549, "learning_rate": 7.18939614477419e-06, "loss": 0.3609, "step": 9842 }, { "epoch": 1.1978095527836934, "grad_norm": 1.6418588161468506, "learning_rate": 7.187524136589499e-06, "loss": 0.4025, "step": 9843 }, { "epoch": 1.1979312442957104, "grad_norm": 3.662625312805176, "learning_rate": 7.185652235425335e-06, "loss": 0.3183, "step": 9844 }, { "epoch": 1.1980529358077274, "grad_norm": 1.84652841091156, "learning_rate": 7.183780441352931e-06, "loss": 0.4042, "step": 9845 }, { "epoch": 1.1981746273197444, "grad_norm": 1.9028189182281494, "learning_rate": 7.1819087544435115e-06, "loss": 0.3812, "step": 9846 }, { "epoch": 1.1982963188317615, "grad_norm": 1.8770074844360352, "learning_rate": 7.180037174768295e-06, "loss": 0.4045, "step": 9847 }, { "epoch": 1.1984180103437785, "grad_norm": 1.9270813465118408, "learning_rate": 7.178165702398501e-06, "loss": 0.4106, "step": 9848 }, { "epoch": 1.1985397018557955, "grad_norm": 2.323920726776123, "learning_rate": 7.176294337405345e-06, "loss": 0.4461, "step": 9849 }, { "epoch": 1.1986613933678125, "grad_norm": 2.5973894596099854, "learning_rate": 7.174423079860032e-06, "loss": 0.4, "step": 9850 }, { "epoch": 1.1987830848798295, "grad_norm": 1.7463017702102661, "learning_rate": 7.1725519298337745e-06, "loss": 0.4001, "step": 9851 }, { "epoch": 1.1989047763918466, "grad_norm": 2.995480537414551, "learning_rate": 7.170680887397763e-06, "loss": 0.3835, "step": 9852 }, { "epoch": 1.1990264679038638, "grad_norm": 2.71461820602417, "learning_rate": 7.1688099526232015e-06, "loss": 0.4077, "step": 9853 }, { "epoch": 1.1991481594158808, "grad_norm": 2.7108864784240723, "learning_rate": 7.166939125581283e-06, "loss": 0.3734, "step": 9854 }, { "epoch": 1.1992698509278978, "grad_norm": 1.7506965398788452, "learning_rate": 7.165068406343192e-06, "loss": 0.4198, "step": 9855 }, { "epoch": 1.1993915424399149, "grad_norm": 2.925708770751953, "learning_rate": 7.163197794980117e-06, "loss": 0.3223, "step": 9856 }, { "epoch": 1.1995132339519319, "grad_norm": 1.3340356349945068, "learning_rate": 7.161327291563239e-06, "loss": 0.3423, "step": 9857 }, { "epoch": 1.199634925463949, "grad_norm": 3.1486504077911377, "learning_rate": 7.1594568961637325e-06, "loss": 0.4445, "step": 9858 }, { "epoch": 1.199756616975966, "grad_norm": 1.4620342254638672, "learning_rate": 7.157586608852769e-06, "loss": 0.3572, "step": 9859 }, { "epoch": 1.199878308487983, "grad_norm": 1.8884685039520264, "learning_rate": 7.155716429701522e-06, "loss": 0.4246, "step": 9860 }, { "epoch": 1.2, "grad_norm": 1.7373028993606567, "learning_rate": 7.153846358781149e-06, "loss": 0.3394, "step": 9861 }, { "epoch": 1.200121691512017, "grad_norm": 1.6023287773132324, "learning_rate": 7.151976396162818e-06, "loss": 0.3618, "step": 9862 }, { "epoch": 1.200243383024034, "grad_norm": 3.0164480209350586, "learning_rate": 7.1501065419176775e-06, "loss": 0.3861, "step": 9863 }, { "epoch": 1.2003650745360512, "grad_norm": 1.3019382953643799, "learning_rate": 7.148236796116881e-06, "loss": 0.3772, "step": 9864 }, { "epoch": 1.2004867660480683, "grad_norm": 1.565251350402832, "learning_rate": 7.146367158831578e-06, "loss": 0.3929, "step": 9865 }, { "epoch": 1.2006084575600853, "grad_norm": 2.9135732650756836, "learning_rate": 7.1444976301329105e-06, "loss": 0.4156, "step": 9866 }, { "epoch": 1.2007301490721023, "grad_norm": 1.5117042064666748, "learning_rate": 7.142628210092019e-06, "loss": 0.3535, "step": 9867 }, { "epoch": 1.2008518405841193, "grad_norm": 1.5485661029815674, "learning_rate": 7.14075889878004e-06, "loss": 0.4021, "step": 9868 }, { "epoch": 1.2009735320961363, "grad_norm": 1.477531909942627, "learning_rate": 7.138889696268101e-06, "loss": 0.3909, "step": 9869 }, { "epoch": 1.2010952236081534, "grad_norm": 1.7225297689437866, "learning_rate": 7.137020602627332e-06, "loss": 0.3782, "step": 9870 }, { "epoch": 1.2012169151201704, "grad_norm": 1.5606352090835571, "learning_rate": 7.135151617928855e-06, "loss": 0.3743, "step": 9871 }, { "epoch": 1.2013386066321874, "grad_norm": 1.413008689880371, "learning_rate": 7.13328274224379e-06, "loss": 0.3959, "step": 9872 }, { "epoch": 1.2014602981442044, "grad_norm": 1.8778789043426514, "learning_rate": 7.131413975643249e-06, "loss": 0.3499, "step": 9873 }, { "epoch": 1.2015819896562214, "grad_norm": 1.7032098770141602, "learning_rate": 7.1295453181983475e-06, "loss": 0.3843, "step": 9874 }, { "epoch": 1.2017036811682384, "grad_norm": 1.5106642246246338, "learning_rate": 7.127676769980185e-06, "loss": 0.3485, "step": 9875 }, { "epoch": 1.2018253726802555, "grad_norm": 1.6289196014404297, "learning_rate": 7.125808331059868e-06, "loss": 0.3643, "step": 9876 }, { "epoch": 1.2019470641922725, "grad_norm": 3.55741286277771, "learning_rate": 7.123940001508491e-06, "loss": 0.3463, "step": 9877 }, { "epoch": 1.2020687557042895, "grad_norm": 1.4855420589447021, "learning_rate": 7.122071781397151e-06, "loss": 0.36, "step": 9878 }, { "epoch": 1.2021904472163067, "grad_norm": 1.5825939178466797, "learning_rate": 7.120203670796936e-06, "loss": 0.3507, "step": 9879 }, { "epoch": 1.2023121387283238, "grad_norm": 2.6188483238220215, "learning_rate": 7.118335669778934e-06, "loss": 0.335, "step": 9880 }, { "epoch": 1.2024338302403408, "grad_norm": 1.9618849754333496, "learning_rate": 7.116467778414223e-06, "loss": 0.411, "step": 9881 }, { "epoch": 1.2025555217523578, "grad_norm": 1.689078450202942, "learning_rate": 7.114599996773881e-06, "loss": 0.4108, "step": 9882 }, { "epoch": 1.2026772132643748, "grad_norm": 1.631308674812317, "learning_rate": 7.112732324928985e-06, "loss": 0.3441, "step": 9883 }, { "epoch": 1.2027989047763918, "grad_norm": 3.095848798751831, "learning_rate": 7.110864762950598e-06, "loss": 0.4489, "step": 9884 }, { "epoch": 1.2029205962884089, "grad_norm": 3.32705020904541, "learning_rate": 7.1089973109097894e-06, "loss": 0.4167, "step": 9885 }, { "epoch": 1.2030422878004259, "grad_norm": 1.5482968091964722, "learning_rate": 7.1071299688776155e-06, "loss": 0.358, "step": 9886 }, { "epoch": 1.203163979312443, "grad_norm": 3.5254554748535156, "learning_rate": 7.105262736925132e-06, "loss": 0.3866, "step": 9887 }, { "epoch": 1.20328567082446, "grad_norm": 3.1695315837860107, "learning_rate": 7.103395615123396e-06, "loss": 0.3123, "step": 9888 }, { "epoch": 1.203407362336477, "grad_norm": 1.5443141460418701, "learning_rate": 7.101528603543451e-06, "loss": 0.3557, "step": 9889 }, { "epoch": 1.2035290538484942, "grad_norm": 1.659907579421997, "learning_rate": 7.099661702256341e-06, "loss": 0.4285, "step": 9890 }, { "epoch": 1.2036507453605112, "grad_norm": 1.6083381175994873, "learning_rate": 7.097794911333109e-06, "loss": 0.4032, "step": 9891 }, { "epoch": 1.2037724368725282, "grad_norm": 1.3338496685028076, "learning_rate": 7.095928230844786e-06, "loss": 0.3515, "step": 9892 }, { "epoch": 1.2038941283845452, "grad_norm": 3.1042778491973877, "learning_rate": 7.0940616608624056e-06, "loss": 0.3583, "step": 9893 }, { "epoch": 1.2040158198965623, "grad_norm": 2.0684478282928467, "learning_rate": 7.092195201456995e-06, "loss": 0.3558, "step": 9894 }, { "epoch": 1.2041375114085793, "grad_norm": 4.512422561645508, "learning_rate": 7.0903288526995736e-06, "loss": 0.3876, "step": 9895 }, { "epoch": 1.2042592029205963, "grad_norm": 2.115954637527466, "learning_rate": 7.088462614661163e-06, "loss": 0.4278, "step": 9896 }, { "epoch": 1.2043808944326133, "grad_norm": 1.716399073600769, "learning_rate": 7.08659648741278e-06, "loss": 0.3915, "step": 9897 }, { "epoch": 1.2045025859446303, "grad_norm": 1.7902320623397827, "learning_rate": 7.084730471025427e-06, "loss": 0.4327, "step": 9898 }, { "epoch": 1.2046242774566474, "grad_norm": 2.3689777851104736, "learning_rate": 7.0828645655701155e-06, "loss": 0.3079, "step": 9899 }, { "epoch": 1.2047459689686644, "grad_norm": 1.9941939115524292, "learning_rate": 7.080998771117844e-06, "loss": 0.4092, "step": 9900 }, { "epoch": 1.2048676604806814, "grad_norm": 1.7526655197143555, "learning_rate": 7.079133087739611e-06, "loss": 0.3985, "step": 9901 }, { "epoch": 1.2049893519926984, "grad_norm": 1.446198582649231, "learning_rate": 7.077267515506413e-06, "loss": 0.4139, "step": 9902 }, { "epoch": 1.2051110435047154, "grad_norm": 1.6286088228225708, "learning_rate": 7.075402054489234e-06, "loss": 0.3994, "step": 9903 }, { "epoch": 1.2052327350167327, "grad_norm": 1.712848424911499, "learning_rate": 7.073536704759059e-06, "loss": 0.421, "step": 9904 }, { "epoch": 1.2053544265287497, "grad_norm": 1.4952540397644043, "learning_rate": 7.071671466386869e-06, "loss": 0.3555, "step": 9905 }, { "epoch": 1.2054761180407667, "grad_norm": 1.493610143661499, "learning_rate": 7.0698063394436435e-06, "loss": 0.3761, "step": 9906 }, { "epoch": 1.2055978095527837, "grad_norm": 1.6017245054244995, "learning_rate": 7.067941324000352e-06, "loss": 0.3529, "step": 9907 }, { "epoch": 1.2057195010648007, "grad_norm": 1.9543555974960327, "learning_rate": 7.066076420127964e-06, "loss": 0.4133, "step": 9908 }, { "epoch": 1.2058411925768178, "grad_norm": 2.609764575958252, "learning_rate": 7.064211627897437e-06, "loss": 0.4384, "step": 9909 }, { "epoch": 1.2059628840888348, "grad_norm": 2.833123207092285, "learning_rate": 7.062346947379738e-06, "loss": 0.3833, "step": 9910 }, { "epoch": 1.2060845756008518, "grad_norm": 1.7996008396148682, "learning_rate": 7.060482378645814e-06, "loss": 0.3262, "step": 9911 }, { "epoch": 1.2062062671128688, "grad_norm": 1.8962863683700562, "learning_rate": 7.058617921766622e-06, "loss": 0.3332, "step": 9912 }, { "epoch": 1.2063279586248858, "grad_norm": 2.3121402263641357, "learning_rate": 7.056753576813106e-06, "loss": 0.4039, "step": 9913 }, { "epoch": 1.2064496501369029, "grad_norm": 1.6381328105926514, "learning_rate": 7.0548893438562105e-06, "loss": 0.4022, "step": 9914 }, { "epoch": 1.20657134164892, "grad_norm": 1.5254966020584106, "learning_rate": 7.05302522296687e-06, "loss": 0.3737, "step": 9915 }, { "epoch": 1.2066930331609371, "grad_norm": 2.949911594390869, "learning_rate": 7.051161214216018e-06, "loss": 0.4068, "step": 9916 }, { "epoch": 1.2068147246729541, "grad_norm": 2.3714778423309326, "learning_rate": 7.049297317674588e-06, "loss": 0.3557, "step": 9917 }, { "epoch": 1.2069364161849712, "grad_norm": 1.5922006368637085, "learning_rate": 7.047433533413501e-06, "loss": 0.3946, "step": 9918 }, { "epoch": 1.2070581076969882, "grad_norm": 1.5477550029754639, "learning_rate": 7.04556986150368e-06, "loss": 0.3763, "step": 9919 }, { "epoch": 1.2071797992090052, "grad_norm": 2.199549913406372, "learning_rate": 7.0437063020160455e-06, "loss": 0.4445, "step": 9920 }, { "epoch": 1.2073014907210222, "grad_norm": 2.6356499195098877, "learning_rate": 7.041842855021501e-06, "loss": 0.3384, "step": 9921 }, { "epoch": 1.2074231822330392, "grad_norm": 1.3380391597747803, "learning_rate": 7.03997952059096e-06, "loss": 0.3674, "step": 9922 }, { "epoch": 1.2075448737450563, "grad_norm": 1.6883161067962646, "learning_rate": 7.038116298795323e-06, "loss": 0.4058, "step": 9923 }, { "epoch": 1.2076665652570733, "grad_norm": 1.5603445768356323, "learning_rate": 7.036253189705493e-06, "loss": 0.3278, "step": 9924 }, { "epoch": 1.2077882567690903, "grad_norm": 1.529004693031311, "learning_rate": 7.034390193392366e-06, "loss": 0.4173, "step": 9925 }, { "epoch": 1.2079099482811073, "grad_norm": 1.2442429065704346, "learning_rate": 7.032527309926827e-06, "loss": 0.3665, "step": 9926 }, { "epoch": 1.2080316397931243, "grad_norm": 1.8064535856246948, "learning_rate": 7.030664539379768e-06, "loss": 0.3875, "step": 9927 }, { "epoch": 1.2081533313051414, "grad_norm": 2.5870845317840576, "learning_rate": 7.028801881822069e-06, "loss": 0.4509, "step": 9928 }, { "epoch": 1.2082750228171586, "grad_norm": 1.7757675647735596, "learning_rate": 7.026939337324609e-06, "loss": 0.3831, "step": 9929 }, { "epoch": 1.2083967143291756, "grad_norm": 2.2062106132507324, "learning_rate": 7.025076905958259e-06, "loss": 0.3626, "step": 9930 }, { "epoch": 1.2085184058411926, "grad_norm": 1.3745630979537964, "learning_rate": 7.0232145877938965e-06, "loss": 0.372, "step": 9931 }, { "epoch": 1.2086400973532097, "grad_norm": 1.5313496589660645, "learning_rate": 7.021352382902375e-06, "loss": 0.3815, "step": 9932 }, { "epoch": 1.2087617888652267, "grad_norm": 1.932791829109192, "learning_rate": 7.019490291354563e-06, "loss": 0.4015, "step": 9933 }, { "epoch": 1.2088834803772437, "grad_norm": 1.8884142637252808, "learning_rate": 7.0176283132213145e-06, "loss": 0.3278, "step": 9934 }, { "epoch": 1.2090051718892607, "grad_norm": 1.3709759712219238, "learning_rate": 7.01576644857348e-06, "loss": 0.3884, "step": 9935 }, { "epoch": 1.2091268634012777, "grad_norm": 1.4755620956420898, "learning_rate": 7.01390469748191e-06, "loss": 0.3341, "step": 9936 }, { "epoch": 1.2092485549132947, "grad_norm": 1.6526072025299072, "learning_rate": 7.012043060017447e-06, "loss": 0.3783, "step": 9937 }, { "epoch": 1.2093702464253118, "grad_norm": 1.6409109830856323, "learning_rate": 7.01018153625093e-06, "loss": 0.4409, "step": 9938 }, { "epoch": 1.2094919379373288, "grad_norm": 3.184429883956909, "learning_rate": 7.008320126253194e-06, "loss": 0.4623, "step": 9939 }, { "epoch": 1.209613629449346, "grad_norm": 1.2408146858215332, "learning_rate": 7.006458830095072e-06, "loss": 0.3218, "step": 9940 }, { "epoch": 1.209735320961363, "grad_norm": 2.685734748840332, "learning_rate": 7.004597647847386e-06, "loss": 0.3654, "step": 9941 }, { "epoch": 1.20985701247338, "grad_norm": 2.2608585357666016, "learning_rate": 7.002736579580958e-06, "loss": 0.4121, "step": 9942 }, { "epoch": 1.209978703985397, "grad_norm": 1.6932148933410645, "learning_rate": 7.000875625366613e-06, "loss": 0.3663, "step": 9943 }, { "epoch": 1.210100395497414, "grad_norm": 1.6316936016082764, "learning_rate": 6.999014785275155e-06, "loss": 0.3944, "step": 9944 }, { "epoch": 1.2102220870094311, "grad_norm": 1.554034948348999, "learning_rate": 6.9971540593773955e-06, "loss": 0.3744, "step": 9945 }, { "epoch": 1.2103437785214481, "grad_norm": 3.209688663482666, "learning_rate": 6.995293447744138e-06, "loss": 0.344, "step": 9946 }, { "epoch": 1.2104654700334652, "grad_norm": 1.660309910774231, "learning_rate": 6.993432950446187e-06, "loss": 0.3896, "step": 9947 }, { "epoch": 1.2105871615454822, "grad_norm": 1.545854926109314, "learning_rate": 6.991572567554334e-06, "loss": 0.3323, "step": 9948 }, { "epoch": 1.2107088530574992, "grad_norm": 1.3997136354446411, "learning_rate": 6.989712299139372e-06, "loss": 0.3802, "step": 9949 }, { "epoch": 1.2108305445695162, "grad_norm": 1.8171509504318237, "learning_rate": 6.987852145272087e-06, "loss": 0.3664, "step": 9950 }, { "epoch": 1.2109522360815332, "grad_norm": 1.4273338317871094, "learning_rate": 6.985992106023265e-06, "loss": 0.3818, "step": 9951 }, { "epoch": 1.2110739275935503, "grad_norm": 2.940361976623535, "learning_rate": 6.984132181463681e-06, "loss": 0.4578, "step": 9952 }, { "epoch": 1.2111956191055673, "grad_norm": 1.6295067071914673, "learning_rate": 6.9822723716641084e-06, "loss": 0.3527, "step": 9953 }, { "epoch": 1.2113173106175845, "grad_norm": 2.1354970932006836, "learning_rate": 6.9804126766953225e-06, "loss": 0.3786, "step": 9954 }, { "epoch": 1.2114390021296015, "grad_norm": 1.4374134540557861, "learning_rate": 6.978553096628082e-06, "loss": 0.3605, "step": 9955 }, { "epoch": 1.2115606936416186, "grad_norm": 1.7722989320755005, "learning_rate": 6.976693631533151e-06, "loss": 0.3307, "step": 9956 }, { "epoch": 1.2116823851536356, "grad_norm": 1.442394495010376, "learning_rate": 6.974834281481284e-06, "loss": 0.3688, "step": 9957 }, { "epoch": 1.2118040766656526, "grad_norm": 1.6845611333847046, "learning_rate": 6.972975046543233e-06, "loss": 0.3842, "step": 9958 }, { "epoch": 1.2119257681776696, "grad_norm": 2.095054864883423, "learning_rate": 6.971115926789751e-06, "loss": 0.4221, "step": 9959 }, { "epoch": 1.2120474596896866, "grad_norm": 1.698492407798767, "learning_rate": 6.969256922291574e-06, "loss": 0.3146, "step": 9960 }, { "epoch": 1.2121691512017037, "grad_norm": 1.813895583152771, "learning_rate": 6.967398033119445e-06, "loss": 0.339, "step": 9961 }, { "epoch": 1.2122908427137207, "grad_norm": 1.9622679948806763, "learning_rate": 6.9655392593440985e-06, "loss": 0.3981, "step": 9962 }, { "epoch": 1.2124125342257377, "grad_norm": 1.5471811294555664, "learning_rate": 6.963680601036264e-06, "loss": 0.3574, "step": 9963 }, { "epoch": 1.2125342257377547, "grad_norm": 2.0501105785369873, "learning_rate": 6.961822058266667e-06, "loss": 0.3415, "step": 9964 }, { "epoch": 1.212655917249772, "grad_norm": 1.4566954374313354, "learning_rate": 6.959963631106029e-06, "loss": 0.4249, "step": 9965 }, { "epoch": 1.212777608761789, "grad_norm": 1.7529780864715576, "learning_rate": 6.958105319625073e-06, "loss": 0.3655, "step": 9966 }, { "epoch": 1.212899300273806, "grad_norm": 1.6072533130645752, "learning_rate": 6.956247123894502e-06, "loss": 0.3096, "step": 9967 }, { "epoch": 1.213020991785823, "grad_norm": 1.5779048204421997, "learning_rate": 6.954389043985026e-06, "loss": 0.3452, "step": 9968 }, { "epoch": 1.21314268329784, "grad_norm": 2.105333089828491, "learning_rate": 6.952531079967352e-06, "loss": 0.3881, "step": 9969 }, { "epoch": 1.213264374809857, "grad_norm": 1.9414513111114502, "learning_rate": 6.950673231912179e-06, "loss": 0.4065, "step": 9970 }, { "epoch": 1.213386066321874, "grad_norm": 1.4344571828842163, "learning_rate": 6.948815499890201e-06, "loss": 0.376, "step": 9971 }, { "epoch": 1.213507757833891, "grad_norm": 1.4175901412963867, "learning_rate": 6.946957883972107e-06, "loss": 0.3402, "step": 9972 }, { "epoch": 1.213629449345908, "grad_norm": 2.5647964477539062, "learning_rate": 6.945100384228587e-06, "loss": 0.3717, "step": 9973 }, { "epoch": 1.2137511408579251, "grad_norm": 2.6028192043304443, "learning_rate": 6.943243000730322e-06, "loss": 0.3441, "step": 9974 }, { "epoch": 1.2138728323699421, "grad_norm": 2.2155604362487793, "learning_rate": 6.941385733547985e-06, "loss": 0.3948, "step": 9975 }, { "epoch": 1.2139945238819592, "grad_norm": 1.776086449623108, "learning_rate": 6.939528582752253e-06, "loss": 0.3442, "step": 9976 }, { "epoch": 1.2141162153939762, "grad_norm": 2.011115312576294, "learning_rate": 6.937671548413796e-06, "loss": 0.4114, "step": 9977 }, { "epoch": 1.2142379069059932, "grad_norm": 1.7712589502334595, "learning_rate": 6.935814630603275e-06, "loss": 0.36, "step": 9978 }, { "epoch": 1.2143595984180102, "grad_norm": 1.51893150806427, "learning_rate": 6.933957829391346e-06, "loss": 0.3782, "step": 9979 }, { "epoch": 1.2144812899300275, "grad_norm": 2.2671079635620117, "learning_rate": 6.932101144848669e-06, "loss": 0.3938, "step": 9980 }, { "epoch": 1.2146029814420445, "grad_norm": 1.7446645498275757, "learning_rate": 6.930244577045895e-06, "loss": 0.3357, "step": 9981 }, { "epoch": 1.2147246729540615, "grad_norm": 2.676668405532837, "learning_rate": 6.928388126053669e-06, "loss": 0.4248, "step": 9982 }, { "epoch": 1.2148463644660785, "grad_norm": 3.3868157863616943, "learning_rate": 6.92653179194263e-06, "loss": 0.4796, "step": 9983 }, { "epoch": 1.2149680559780955, "grad_norm": 1.732919454574585, "learning_rate": 6.92467557478342e-06, "loss": 0.35, "step": 9984 }, { "epoch": 1.2150897474901126, "grad_norm": 1.8993979692459106, "learning_rate": 6.922819474646672e-06, "loss": 0.3858, "step": 9985 }, { "epoch": 1.2152114390021296, "grad_norm": 1.9380769729614258, "learning_rate": 6.920963491603008e-06, "loss": 0.3871, "step": 9986 }, { "epoch": 1.2153331305141466, "grad_norm": 1.2939344644546509, "learning_rate": 6.919107625723058e-06, "loss": 0.398, "step": 9987 }, { "epoch": 1.2154548220261636, "grad_norm": 1.5331484079360962, "learning_rate": 6.917251877077442e-06, "loss": 0.332, "step": 9988 }, { "epoch": 1.2155765135381806, "grad_norm": 1.5995283126831055, "learning_rate": 6.915396245736773e-06, "loss": 0.3808, "step": 9989 }, { "epoch": 1.2156982050501977, "grad_norm": 1.3807971477508545, "learning_rate": 6.913540731771662e-06, "loss": 0.3857, "step": 9990 }, { "epoch": 1.215819896562215, "grad_norm": 2.8587934970855713, "learning_rate": 6.9116853352527115e-06, "loss": 0.3431, "step": 9991 }, { "epoch": 1.215941588074232, "grad_norm": 1.5146300792694092, "learning_rate": 6.909830056250527e-06, "loss": 0.3872, "step": 9992 }, { "epoch": 1.216063279586249, "grad_norm": 1.8994717597961426, "learning_rate": 6.907974894835708e-06, "loss": 0.387, "step": 9993 }, { "epoch": 1.216184971098266, "grad_norm": 1.5792795419692993, "learning_rate": 6.906119851078841e-06, "loss": 0.3821, "step": 9994 }, { "epoch": 1.216306662610283, "grad_norm": 1.907387137413025, "learning_rate": 6.9042649250505186e-06, "loss": 0.4274, "step": 9995 }, { "epoch": 1.2164283541223, "grad_norm": 2.6051313877105713, "learning_rate": 6.9024101168213255e-06, "loss": 0.3032, "step": 9996 }, { "epoch": 1.216550045634317, "grad_norm": 2.166860818862915, "learning_rate": 6.900555426461837e-06, "loss": 0.3578, "step": 9997 }, { "epoch": 1.216671737146334, "grad_norm": 1.4204481840133667, "learning_rate": 6.89870085404263e-06, "loss": 0.3766, "step": 9998 }, { "epoch": 1.216793428658351, "grad_norm": 1.676345705986023, "learning_rate": 6.8968463996342736e-06, "loss": 0.3709, "step": 9999 }, { "epoch": 1.216915120170368, "grad_norm": 3.2839457988739014, "learning_rate": 6.89499206330734e-06, "loss": 0.3924, "step": 10000 }, { "epoch": 1.217036811682385, "grad_norm": 1.7567495107650757, "learning_rate": 6.8931378451323846e-06, "loss": 0.3009, "step": 10001 }, { "epoch": 1.217158503194402, "grad_norm": 1.9860072135925293, "learning_rate": 6.891283745179962e-06, "loss": 0.4073, "step": 10002 }, { "epoch": 1.2172801947064191, "grad_norm": 1.4669981002807617, "learning_rate": 6.889429763520627e-06, "loss": 0.3701, "step": 10003 }, { "epoch": 1.2174018862184361, "grad_norm": 1.966073751449585, "learning_rate": 6.887575900224931e-06, "loss": 0.3667, "step": 10004 }, { "epoch": 1.2175235777304534, "grad_norm": 2.080537796020508, "learning_rate": 6.885722155363413e-06, "loss": 0.316, "step": 10005 }, { "epoch": 1.2176452692424704, "grad_norm": 1.2806013822555542, "learning_rate": 6.883868529006611e-06, "loss": 0.308, "step": 10006 }, { "epoch": 1.2177669607544874, "grad_norm": 4.204131603240967, "learning_rate": 6.882015021225062e-06, "loss": 0.4191, "step": 10007 }, { "epoch": 1.2178886522665044, "grad_norm": 1.7619069814682007, "learning_rate": 6.8801616320893e-06, "loss": 0.3843, "step": 10008 }, { "epoch": 1.2180103437785215, "grad_norm": 2.0150258541107178, "learning_rate": 6.878308361669841e-06, "loss": 0.3972, "step": 10009 }, { "epoch": 1.2181320352905385, "grad_norm": 1.7189114093780518, "learning_rate": 6.876455210037209e-06, "loss": 0.3605, "step": 10010 }, { "epoch": 1.2182537268025555, "grad_norm": 1.3818416595458984, "learning_rate": 6.874602177261926e-06, "loss": 0.3659, "step": 10011 }, { "epoch": 1.2183754183145725, "grad_norm": 1.6566050052642822, "learning_rate": 6.872749263414502e-06, "loss": 0.3668, "step": 10012 }, { "epoch": 1.2184971098265895, "grad_norm": 1.6086212396621704, "learning_rate": 6.870896468565437e-06, "loss": 0.415, "step": 10013 }, { "epoch": 1.2186188013386066, "grad_norm": 2.278784990310669, "learning_rate": 6.869043792785237e-06, "loss": 0.3187, "step": 10014 }, { "epoch": 1.2187404928506236, "grad_norm": 1.5243163108825684, "learning_rate": 6.867191236144402e-06, "loss": 0.4224, "step": 10015 }, { "epoch": 1.2188621843626408, "grad_norm": 2.868350028991699, "learning_rate": 6.8653387987134275e-06, "loss": 0.4393, "step": 10016 }, { "epoch": 1.2189838758746578, "grad_norm": 1.6022921800613403, "learning_rate": 6.863486480562798e-06, "loss": 0.375, "step": 10017 }, { "epoch": 1.2191055673866749, "grad_norm": 2.5440804958343506, "learning_rate": 6.861634281763e-06, "loss": 0.4253, "step": 10018 }, { "epoch": 1.2192272588986919, "grad_norm": 1.742983341217041, "learning_rate": 6.859782202384515e-06, "loss": 0.3718, "step": 10019 }, { "epoch": 1.219348950410709, "grad_norm": 1.916079044342041, "learning_rate": 6.857930242497817e-06, "loss": 0.3954, "step": 10020 }, { "epoch": 1.219470641922726, "grad_norm": 2.3970232009887695, "learning_rate": 6.8560784021733755e-06, "loss": 0.3671, "step": 10021 }, { "epoch": 1.219592333434743, "grad_norm": 1.591254472732544, "learning_rate": 6.8542266814816594e-06, "loss": 0.3837, "step": 10022 }, { "epoch": 1.21971402494676, "grad_norm": 2.426933765411377, "learning_rate": 6.85237508049313e-06, "loss": 0.346, "step": 10023 }, { "epoch": 1.219835716458777, "grad_norm": 1.8062587976455688, "learning_rate": 6.850523599278246e-06, "loss": 0.431, "step": 10024 }, { "epoch": 1.219957407970794, "grad_norm": 1.314801812171936, "learning_rate": 6.8486722379074545e-06, "loss": 0.3489, "step": 10025 }, { "epoch": 1.220079099482811, "grad_norm": 1.3319562673568726, "learning_rate": 6.8468209964512074e-06, "loss": 0.3814, "step": 10026 }, { "epoch": 1.220200790994828, "grad_norm": 1.550036907196045, "learning_rate": 6.84496987497995e-06, "loss": 0.3752, "step": 10027 }, { "epoch": 1.220322482506845, "grad_norm": 1.9958138465881348, "learning_rate": 6.843118873564119e-06, "loss": 0.3645, "step": 10028 }, { "epoch": 1.220444174018862, "grad_norm": 1.5241824388504028, "learning_rate": 6.841267992274147e-06, "loss": 0.4009, "step": 10029 }, { "epoch": 1.2205658655308793, "grad_norm": 1.9343754053115845, "learning_rate": 6.839417231180469e-06, "loss": 0.3696, "step": 10030 }, { "epoch": 1.2206875570428963, "grad_norm": 1.2802180051803589, "learning_rate": 6.837566590353506e-06, "loss": 0.3188, "step": 10031 }, { "epoch": 1.2208092485549134, "grad_norm": 1.5967847108840942, "learning_rate": 6.835716069863678e-06, "loss": 0.4215, "step": 10032 }, { "epoch": 1.2209309400669304, "grad_norm": 1.9407161474227905, "learning_rate": 6.8338656697814055e-06, "loss": 0.3554, "step": 10033 }, { "epoch": 1.2210526315789474, "grad_norm": 1.7045966386795044, "learning_rate": 6.8320153901770995e-06, "loss": 0.3964, "step": 10034 }, { "epoch": 1.2211743230909644, "grad_norm": 1.8515386581420898, "learning_rate": 6.830165231121166e-06, "loss": 0.418, "step": 10035 }, { "epoch": 1.2212960146029814, "grad_norm": 2.134194850921631, "learning_rate": 6.828315192684004e-06, "loss": 0.4145, "step": 10036 }, { "epoch": 1.2214177061149984, "grad_norm": 2.974844455718994, "learning_rate": 6.8264652749360136e-06, "loss": 0.416, "step": 10037 }, { "epoch": 1.2215393976270155, "grad_norm": 1.8573710918426514, "learning_rate": 6.824615477947589e-06, "loss": 0.4382, "step": 10038 }, { "epoch": 1.2216610891390325, "grad_norm": 3.1503186225891113, "learning_rate": 6.822765801789117e-06, "loss": 0.4425, "step": 10039 }, { "epoch": 1.2217827806510495, "grad_norm": 1.7028475999832153, "learning_rate": 6.820916246530984e-06, "loss": 0.4468, "step": 10040 }, { "epoch": 1.2219044721630667, "grad_norm": 1.371720314025879, "learning_rate": 6.819066812243567e-06, "loss": 0.3636, "step": 10041 }, { "epoch": 1.2220261636750838, "grad_norm": 2.932171106338501, "learning_rate": 6.8172174989972435e-06, "loss": 0.3632, "step": 10042 }, { "epoch": 1.2221478551871008, "grad_norm": 1.4529882669448853, "learning_rate": 6.815368306862382e-06, "loss": 0.3776, "step": 10043 }, { "epoch": 1.2222695466991178, "grad_norm": 2.41349196434021, "learning_rate": 6.813519235909347e-06, "loss": 0.3981, "step": 10044 }, { "epoch": 1.2223912382111348, "grad_norm": 1.9547290802001953, "learning_rate": 6.811670286208503e-06, "loss": 0.3828, "step": 10045 }, { "epoch": 1.2225129297231518, "grad_norm": 3.3234975337982178, "learning_rate": 6.809821457830201e-06, "loss": 0.3358, "step": 10046 }, { "epoch": 1.2226346212351689, "grad_norm": 2.3116579055786133, "learning_rate": 6.807972750844801e-06, "loss": 0.3848, "step": 10047 }, { "epoch": 1.2227563127471859, "grad_norm": 1.3412984609603882, "learning_rate": 6.806124165322641e-06, "loss": 0.4085, "step": 10048 }, { "epoch": 1.222878004259203, "grad_norm": 1.237619161605835, "learning_rate": 6.8042757013340665e-06, "loss": 0.3525, "step": 10049 }, { "epoch": 1.22299969577122, "grad_norm": 1.3679311275482178, "learning_rate": 6.8024273589494195e-06, "loss": 0.3829, "step": 10050 }, { "epoch": 1.223121387283237, "grad_norm": 1.5347281694412231, "learning_rate": 6.800579138239028e-06, "loss": 0.3576, "step": 10051 }, { "epoch": 1.223243078795254, "grad_norm": 2.21738338470459, "learning_rate": 6.7987310392732235e-06, "loss": 0.3385, "step": 10052 }, { "epoch": 1.223364770307271, "grad_norm": 2.2998335361480713, "learning_rate": 6.796883062122329e-06, "loss": 0.4308, "step": 10053 }, { "epoch": 1.223486461819288, "grad_norm": 1.3314573764801025, "learning_rate": 6.795035206856663e-06, "loss": 0.3532, "step": 10054 }, { "epoch": 1.2236081533313052, "grad_norm": 1.3780741691589355, "learning_rate": 6.793187473546541e-06, "loss": 0.3867, "step": 10055 }, { "epoch": 1.2237298448433223, "grad_norm": 2.9927659034729004, "learning_rate": 6.791339862262276e-06, "loss": 0.3601, "step": 10056 }, { "epoch": 1.2238515363553393, "grad_norm": 3.4382476806640625, "learning_rate": 6.789492373074169e-06, "loss": 0.438, "step": 10057 }, { "epoch": 1.2239732278673563, "grad_norm": 2.0436625480651855, "learning_rate": 6.787645006052525e-06, "loss": 0.341, "step": 10058 }, { "epoch": 1.2240949193793733, "grad_norm": 1.3106759786605835, "learning_rate": 6.785797761267636e-06, "loss": 0.3361, "step": 10059 }, { "epoch": 1.2242166108913903, "grad_norm": 2.131434917449951, "learning_rate": 6.7839506387897935e-06, "loss": 0.4025, "step": 10060 }, { "epoch": 1.2243383024034074, "grad_norm": 1.2191104888916016, "learning_rate": 6.7821036386892905e-06, "loss": 0.344, "step": 10061 }, { "epoch": 1.2244599939154244, "grad_norm": 2.634955644607544, "learning_rate": 6.7802567610364e-06, "loss": 0.3743, "step": 10062 }, { "epoch": 1.2245816854274414, "grad_norm": 1.5233964920043945, "learning_rate": 6.778410005901406e-06, "loss": 0.3452, "step": 10063 }, { "epoch": 1.2247033769394584, "grad_norm": 1.5571216344833374, "learning_rate": 6.77656337335458e-06, "loss": 0.4033, "step": 10064 }, { "epoch": 1.2248250684514754, "grad_norm": 2.993842601776123, "learning_rate": 6.774716863466189e-06, "loss": 0.4045, "step": 10065 }, { "epoch": 1.2249467599634927, "grad_norm": 1.3878711462020874, "learning_rate": 6.772870476306496e-06, "loss": 0.395, "step": 10066 }, { "epoch": 1.2250684514755097, "grad_norm": 2.649106502532959, "learning_rate": 6.771024211945762e-06, "loss": 0.379, "step": 10067 }, { "epoch": 1.2251901429875267, "grad_norm": 2.9401559829711914, "learning_rate": 6.769178070454243e-06, "loss": 0.3604, "step": 10068 }, { "epoch": 1.2253118344995437, "grad_norm": 1.7393020391464233, "learning_rate": 6.767332051902182e-06, "loss": 0.3582, "step": 10069 }, { "epoch": 1.2254335260115607, "grad_norm": 2.9692940711975098, "learning_rate": 6.765486156359834e-06, "loss": 0.3832, "step": 10070 }, { "epoch": 1.2255552175235778, "grad_norm": 2.12664532661438, "learning_rate": 6.7636403838974276e-06, "loss": 0.3998, "step": 10071 }, { "epoch": 1.2256769090355948, "grad_norm": 1.749603509902954, "learning_rate": 6.761794734585203e-06, "loss": 0.4046, "step": 10072 }, { "epoch": 1.2257986005476118, "grad_norm": 1.5714327096939087, "learning_rate": 6.759949208493395e-06, "loss": 0.3969, "step": 10073 }, { "epoch": 1.2259202920596288, "grad_norm": 1.6170574426651, "learning_rate": 6.758103805692223e-06, "loss": 0.3675, "step": 10074 }, { "epoch": 1.2260419835716458, "grad_norm": 1.404693603515625, "learning_rate": 6.756258526251912e-06, "loss": 0.3699, "step": 10075 }, { "epoch": 1.2261636750836629, "grad_norm": 1.5548479557037354, "learning_rate": 6.754413370242681e-06, "loss": 0.4125, "step": 10076 }, { "epoch": 1.2262853665956799, "grad_norm": 2.4601943492889404, "learning_rate": 6.752568337734735e-06, "loss": 0.3723, "step": 10077 }, { "epoch": 1.226407058107697, "grad_norm": 1.7282521724700928, "learning_rate": 6.750723428798287e-06, "loss": 0.4007, "step": 10078 }, { "epoch": 1.226528749619714, "grad_norm": 3.0546650886535645, "learning_rate": 6.74887864350354e-06, "loss": 0.3832, "step": 10079 }, { "epoch": 1.226650441131731, "grad_norm": 1.4626129865646362, "learning_rate": 6.747033981920688e-06, "loss": 0.3536, "step": 10080 }, { "epoch": 1.2267721326437482, "grad_norm": 1.3409062623977661, "learning_rate": 6.74518944411993e-06, "loss": 0.3313, "step": 10081 }, { "epoch": 1.2268938241557652, "grad_norm": 1.5369508266448975, "learning_rate": 6.743345030171448e-06, "loss": 0.3651, "step": 10082 }, { "epoch": 1.2270155156677822, "grad_norm": 1.8735958337783813, "learning_rate": 6.741500740145428e-06, "loss": 0.3617, "step": 10083 }, { "epoch": 1.2271372071797992, "grad_norm": 2.009453058242798, "learning_rate": 6.73965657411205e-06, "loss": 0.3338, "step": 10084 }, { "epoch": 1.2272588986918163, "grad_norm": 2.2092716693878174, "learning_rate": 6.737812532141488e-06, "loss": 0.3531, "step": 10085 }, { "epoch": 1.2273805902038333, "grad_norm": 1.8117817640304565, "learning_rate": 6.735968614303911e-06, "loss": 0.4075, "step": 10086 }, { "epoch": 1.2275022817158503, "grad_norm": 2.3265016078948975, "learning_rate": 6.734124820669485e-06, "loss": 0.4198, "step": 10087 }, { "epoch": 1.2276239732278673, "grad_norm": 1.1171107292175293, "learning_rate": 6.732281151308369e-06, "loss": 0.3141, "step": 10088 }, { "epoch": 1.2277456647398843, "grad_norm": 1.3437461853027344, "learning_rate": 6.730437606290718e-06, "loss": 0.3325, "step": 10089 }, { "epoch": 1.2278673562519014, "grad_norm": 1.692828893661499, "learning_rate": 6.728594185686687e-06, "loss": 0.3521, "step": 10090 }, { "epoch": 1.2279890477639184, "grad_norm": 1.748609185218811, "learning_rate": 6.726750889566416e-06, "loss": 0.4422, "step": 10091 }, { "epoch": 1.2281107392759356, "grad_norm": 1.7633051872253418, "learning_rate": 6.724907718000049e-06, "loss": 0.3008, "step": 10092 }, { "epoch": 1.2282324307879526, "grad_norm": 1.5486901998519897, "learning_rate": 6.7230646710577265e-06, "loss": 0.3776, "step": 10093 }, { "epoch": 1.2283541222999697, "grad_norm": 1.6553490161895752, "learning_rate": 6.721221748809571e-06, "loss": 0.3271, "step": 10094 }, { "epoch": 1.2284758138119867, "grad_norm": 4.259578227996826, "learning_rate": 6.719378951325718e-06, "loss": 0.4764, "step": 10095 }, { "epoch": 1.2285975053240037, "grad_norm": 1.6228665113449097, "learning_rate": 6.717536278676284e-06, "loss": 0.4343, "step": 10096 }, { "epoch": 1.2287191968360207, "grad_norm": 3.021451950073242, "learning_rate": 6.715693730931389e-06, "loss": 0.4, "step": 10097 }, { "epoch": 1.2288408883480377, "grad_norm": 1.5281996726989746, "learning_rate": 6.713851308161146e-06, "loss": 0.3604, "step": 10098 }, { "epoch": 1.2289625798600547, "grad_norm": 1.3944039344787598, "learning_rate": 6.712009010435663e-06, "loss": 0.3637, "step": 10099 }, { "epoch": 1.2290842713720718, "grad_norm": 1.44630765914917, "learning_rate": 6.710166837825043e-06, "loss": 0.3759, "step": 10100 }, { "epoch": 1.2292059628840888, "grad_norm": 1.4572608470916748, "learning_rate": 6.708324790399383e-06, "loss": 0.3688, "step": 10101 }, { "epoch": 1.2293276543961058, "grad_norm": 3.140326499938965, "learning_rate": 6.70648286822878e-06, "loss": 0.4223, "step": 10102 }, { "epoch": 1.2294493459081228, "grad_norm": 1.6996397972106934, "learning_rate": 6.70464107138332e-06, "loss": 0.3813, "step": 10103 }, { "epoch": 1.2295710374201398, "grad_norm": 1.4673484563827515, "learning_rate": 6.702799399933093e-06, "loss": 0.3678, "step": 10104 }, { "epoch": 1.2296927289321569, "grad_norm": 1.7116461992263794, "learning_rate": 6.700957853948168e-06, "loss": 0.3603, "step": 10105 }, { "epoch": 1.229814420444174, "grad_norm": 1.863695502281189, "learning_rate": 6.699116433498626e-06, "loss": 0.4474, "step": 10106 }, { "epoch": 1.2299361119561911, "grad_norm": 1.6932259798049927, "learning_rate": 6.6972751386545376e-06, "loss": 0.3891, "step": 10107 }, { "epoch": 1.2300578034682081, "grad_norm": 2.165278196334839, "learning_rate": 6.695433969485965e-06, "loss": 0.3865, "step": 10108 }, { "epoch": 1.2301794949802252, "grad_norm": 2.702542543411255, "learning_rate": 6.693592926062971e-06, "loss": 0.3932, "step": 10109 }, { "epoch": 1.2303011864922422, "grad_norm": 2.1497843265533447, "learning_rate": 6.69175200845561e-06, "loss": 0.3509, "step": 10110 }, { "epoch": 1.2304228780042592, "grad_norm": 1.8543788194656372, "learning_rate": 6.689911216733932e-06, "loss": 0.4296, "step": 10111 }, { "epoch": 1.2305445695162762, "grad_norm": 2.2742533683776855, "learning_rate": 6.688070550967983e-06, "loss": 0.3936, "step": 10112 }, { "epoch": 1.2306662610282932, "grad_norm": 1.6529985666275024, "learning_rate": 6.686230011227808e-06, "loss": 0.3612, "step": 10113 }, { "epoch": 1.2307879525403103, "grad_norm": 2.7607147693634033, "learning_rate": 6.684389597583437e-06, "loss": 0.3032, "step": 10114 }, { "epoch": 1.2309096440523273, "grad_norm": 1.5222597122192383, "learning_rate": 6.682549310104905e-06, "loss": 0.3835, "step": 10115 }, { "epoch": 1.2310313355643443, "grad_norm": 1.828380823135376, "learning_rate": 6.680709148862243e-06, "loss": 0.3611, "step": 10116 }, { "epoch": 1.2311530270763615, "grad_norm": 2.4681479930877686, "learning_rate": 6.678869113925465e-06, "loss": 0.4313, "step": 10117 }, { "epoch": 1.2312747185883786, "grad_norm": 1.53141188621521, "learning_rate": 6.677029205364592e-06, "loss": 0.3677, "step": 10118 }, { "epoch": 1.2313964101003956, "grad_norm": 1.5276798009872437, "learning_rate": 6.675189423249635e-06, "loss": 0.3886, "step": 10119 }, { "epoch": 1.2315181016124126, "grad_norm": 1.2358633279800415, "learning_rate": 6.673349767650602e-06, "loss": 0.3303, "step": 10120 }, { "epoch": 1.2316397931244296, "grad_norm": 1.5187122821807861, "learning_rate": 6.671510238637498e-06, "loss": 0.3367, "step": 10121 }, { "epoch": 1.2317614846364466, "grad_norm": 3.9124903678894043, "learning_rate": 6.6696708362803174e-06, "loss": 0.4285, "step": 10122 }, { "epoch": 1.2318831761484637, "grad_norm": 2.3285555839538574, "learning_rate": 6.667831560649054e-06, "loss": 0.3948, "step": 10123 }, { "epoch": 1.2320048676604807, "grad_norm": 3.6912076473236084, "learning_rate": 6.6659924118136996e-06, "loss": 0.4472, "step": 10124 }, { "epoch": 1.2321265591724977, "grad_norm": 1.5694102048873901, "learning_rate": 6.664153389844234e-06, "loss": 0.317, "step": 10125 }, { "epoch": 1.2322482506845147, "grad_norm": 1.686165690422058, "learning_rate": 6.662314494810636e-06, "loss": 0.3926, "step": 10126 }, { "epoch": 1.2323699421965317, "grad_norm": 4.579761505126953, "learning_rate": 6.660475726782883e-06, "loss": 0.4725, "step": 10127 }, { "epoch": 1.2324916337085488, "grad_norm": 1.8019663095474243, "learning_rate": 6.658637085830939e-06, "loss": 0.4087, "step": 10128 }, { "epoch": 1.2326133252205658, "grad_norm": 2.6613926887512207, "learning_rate": 6.65679857202477e-06, "loss": 0.4409, "step": 10129 }, { "epoch": 1.2327350167325828, "grad_norm": 1.8014506101608276, "learning_rate": 6.6549601854343345e-06, "loss": 0.4228, "step": 10130 }, { "epoch": 1.2328567082446, "grad_norm": 1.81883704662323, "learning_rate": 6.653121926129588e-06, "loss": 0.3724, "step": 10131 }, { "epoch": 1.232978399756617, "grad_norm": 1.9304344654083252, "learning_rate": 6.651283794180479e-06, "loss": 0.4326, "step": 10132 }, { "epoch": 1.233100091268634, "grad_norm": 2.783109188079834, "learning_rate": 6.6494457896569564e-06, "loss": 0.3593, "step": 10133 }, { "epoch": 1.233221782780651, "grad_norm": 2.8495147228240967, "learning_rate": 6.647607912628953e-06, "loss": 0.398, "step": 10134 }, { "epoch": 1.233343474292668, "grad_norm": 2.1942520141601562, "learning_rate": 6.645770163166409e-06, "loss": 0.4162, "step": 10135 }, { "epoch": 1.2334651658046851, "grad_norm": 2.531790256500244, "learning_rate": 6.6439325413392556e-06, "loss": 0.4235, "step": 10136 }, { "epoch": 1.2335868573167021, "grad_norm": 3.0320332050323486, "learning_rate": 6.642095047217412e-06, "loss": 0.3819, "step": 10137 }, { "epoch": 1.2337085488287192, "grad_norm": 1.732704997062683, "learning_rate": 6.640257680870803e-06, "loss": 0.4441, "step": 10138 }, { "epoch": 1.2338302403407362, "grad_norm": 1.34881591796875, "learning_rate": 6.638420442369349e-06, "loss": 0.4119, "step": 10139 }, { "epoch": 1.2339519318527532, "grad_norm": 3.5255260467529297, "learning_rate": 6.636583331782949e-06, "loss": 0.3677, "step": 10140 }, { "epoch": 1.2340736233647702, "grad_norm": 2.842275619506836, "learning_rate": 6.634746349181518e-06, "loss": 0.3659, "step": 10141 }, { "epoch": 1.2341953148767875, "grad_norm": 1.715838074684143, "learning_rate": 6.6329094946349515e-06, "loss": 0.3604, "step": 10142 }, { "epoch": 1.2343170063888045, "grad_norm": 1.959950566291809, "learning_rate": 6.631072768213149e-06, "loss": 0.4179, "step": 10143 }, { "epoch": 1.2344386979008215, "grad_norm": 1.688847303390503, "learning_rate": 6.629236169986004e-06, "loss": 0.3726, "step": 10144 }, { "epoch": 1.2345603894128385, "grad_norm": 1.3267831802368164, "learning_rate": 6.6273997000233955e-06, "loss": 0.3284, "step": 10145 }, { "epoch": 1.2346820809248555, "grad_norm": 1.2984108924865723, "learning_rate": 6.62556335839521e-06, "loss": 0.348, "step": 10146 }, { "epoch": 1.2348037724368726, "grad_norm": 2.858015298843384, "learning_rate": 6.623727145171327e-06, "loss": 0.4159, "step": 10147 }, { "epoch": 1.2349254639488896, "grad_norm": 1.4354840517044067, "learning_rate": 6.621891060421613e-06, "loss": 0.3848, "step": 10148 }, { "epoch": 1.2350471554609066, "grad_norm": 1.2782552242279053, "learning_rate": 6.620055104215933e-06, "loss": 0.3356, "step": 10149 }, { "epoch": 1.2351688469729236, "grad_norm": 1.8523955345153809, "learning_rate": 6.6182192766241596e-06, "loss": 0.3372, "step": 10150 }, { "epoch": 1.2352905384849406, "grad_norm": 4.148677825927734, "learning_rate": 6.616383577716137e-06, "loss": 0.4638, "step": 10151 }, { "epoch": 1.2354122299969577, "grad_norm": 1.3652501106262207, "learning_rate": 6.614548007561725e-06, "loss": 0.3417, "step": 10152 }, { "epoch": 1.2355339215089747, "grad_norm": 1.4691781997680664, "learning_rate": 6.612712566230768e-06, "loss": 0.355, "step": 10153 }, { "epoch": 1.2356556130209917, "grad_norm": 1.8688024282455444, "learning_rate": 6.6108772537931075e-06, "loss": 0.3737, "step": 10154 }, { "epoch": 1.2357773045330087, "grad_norm": 2.493049383163452, "learning_rate": 6.609042070318585e-06, "loss": 0.3017, "step": 10155 }, { "epoch": 1.235898996045026, "grad_norm": 1.7352840900421143, "learning_rate": 6.6072070158770295e-06, "loss": 0.3994, "step": 10156 }, { "epoch": 1.236020687557043, "grad_norm": 1.376338005065918, "learning_rate": 6.605372090538269e-06, "loss": 0.3413, "step": 10157 }, { "epoch": 1.23614237906906, "grad_norm": 1.3053025007247925, "learning_rate": 6.603537294372127e-06, "loss": 0.3294, "step": 10158 }, { "epoch": 1.236264070581077, "grad_norm": 1.4417226314544678, "learning_rate": 6.6017026274484245e-06, "loss": 0.3933, "step": 10159 }, { "epoch": 1.236385762093094, "grad_norm": 3.6771433353424072, "learning_rate": 6.599868089836968e-06, "loss": 0.4538, "step": 10160 }, { "epoch": 1.236507453605111, "grad_norm": 1.7547956705093384, "learning_rate": 6.598033681607568e-06, "loss": 0.3958, "step": 10161 }, { "epoch": 1.236629145117128, "grad_norm": 1.8888362646102905, "learning_rate": 6.596199402830034e-06, "loss": 0.3507, "step": 10162 }, { "epoch": 1.236750836629145, "grad_norm": 2.2229413986206055, "learning_rate": 6.594365253574155e-06, "loss": 0.4569, "step": 10163 }, { "epoch": 1.236872528141162, "grad_norm": 1.3697724342346191, "learning_rate": 6.5925312339097245e-06, "loss": 0.3815, "step": 10164 }, { "epoch": 1.2369942196531791, "grad_norm": 1.8411109447479248, "learning_rate": 6.590697343906535e-06, "loss": 0.3435, "step": 10165 }, { "epoch": 1.2371159111651961, "grad_norm": 2.2527406215667725, "learning_rate": 6.588863583634369e-06, "loss": 0.3543, "step": 10166 }, { "epoch": 1.2372376026772134, "grad_norm": 1.8241209983825684, "learning_rate": 6.587029953163004e-06, "loss": 0.4076, "step": 10167 }, { "epoch": 1.2373592941892304, "grad_norm": 1.5561507940292358, "learning_rate": 6.5851964525622125e-06, "loss": 0.3842, "step": 10168 }, { "epoch": 1.2374809857012474, "grad_norm": 1.294209599494934, "learning_rate": 6.583363081901765e-06, "loss": 0.3522, "step": 10169 }, { "epoch": 1.2376026772132644, "grad_norm": 2.8769376277923584, "learning_rate": 6.581529841251426e-06, "loss": 0.3822, "step": 10170 }, { "epoch": 1.2377243687252815, "grad_norm": 1.2180982828140259, "learning_rate": 6.5796967306809515e-06, "loss": 0.3649, "step": 10171 }, { "epoch": 1.2378460602372985, "grad_norm": 1.7926931381225586, "learning_rate": 6.577863750260094e-06, "loss": 0.4302, "step": 10172 }, { "epoch": 1.2379677517493155, "grad_norm": 1.6825025081634521, "learning_rate": 6.57603090005861e-06, "loss": 0.371, "step": 10173 }, { "epoch": 1.2380894432613325, "grad_norm": 1.562591552734375, "learning_rate": 6.574198180146232e-06, "loss": 0.3653, "step": 10174 }, { "epoch": 1.2382111347733495, "grad_norm": 2.818526029586792, "learning_rate": 6.572365590592706e-06, "loss": 0.4531, "step": 10175 }, { "epoch": 1.2383328262853666, "grad_norm": 1.4376499652862549, "learning_rate": 6.570533131467763e-06, "loss": 0.3616, "step": 10176 }, { "epoch": 1.2384545177973836, "grad_norm": 2.309314012527466, "learning_rate": 6.568700802841134e-06, "loss": 0.3877, "step": 10177 }, { "epoch": 1.2385762093094006, "grad_norm": 1.6091163158416748, "learning_rate": 6.566868604782542e-06, "loss": 0.3977, "step": 10178 }, { "epoch": 1.2386979008214176, "grad_norm": 1.6217697858810425, "learning_rate": 6.565036537361704e-06, "loss": 0.3667, "step": 10179 }, { "epoch": 1.2388195923334346, "grad_norm": 3.172600030899048, "learning_rate": 6.5632046006483375e-06, "loss": 0.4775, "step": 10180 }, { "epoch": 1.2389412838454517, "grad_norm": 1.9307940006256104, "learning_rate": 6.561372794712151e-06, "loss": 0.3849, "step": 10181 }, { "epoch": 1.239062975357469, "grad_norm": 1.5082497596740723, "learning_rate": 6.559541119622844e-06, "loss": 0.3834, "step": 10182 }, { "epoch": 1.239184666869486, "grad_norm": 1.4714469909667969, "learning_rate": 6.55770957545012e-06, "loss": 0.377, "step": 10183 }, { "epoch": 1.239306358381503, "grad_norm": 3.1042087078094482, "learning_rate": 6.555878162263672e-06, "loss": 0.32, "step": 10184 }, { "epoch": 1.23942804989352, "grad_norm": 1.747067928314209, "learning_rate": 6.554046880133192e-06, "loss": 0.3043, "step": 10185 }, { "epoch": 1.239549741405537, "grad_norm": 1.8358150720596313, "learning_rate": 6.552215729128358e-06, "loss": 0.3467, "step": 10186 }, { "epoch": 1.239671432917554, "grad_norm": 1.4600703716278076, "learning_rate": 6.5503847093188515e-06, "loss": 0.3745, "step": 10187 }, { "epoch": 1.239793124429571, "grad_norm": 1.4450266361236572, "learning_rate": 6.548553820774346e-06, "loss": 0.3873, "step": 10188 }, { "epoch": 1.239914815941588, "grad_norm": 2.450169801712036, "learning_rate": 6.546723063564515e-06, "loss": 0.4485, "step": 10189 }, { "epoch": 1.240036507453605, "grad_norm": 2.0614356994628906, "learning_rate": 6.544892437759016e-06, "loss": 0.3637, "step": 10190 }, { "epoch": 1.240158198965622, "grad_norm": 1.4805302619934082, "learning_rate": 6.543061943427513e-06, "loss": 0.4028, "step": 10191 }, { "epoch": 1.240279890477639, "grad_norm": 2.427363634109497, "learning_rate": 6.541231580639657e-06, "loss": 0.4194, "step": 10192 }, { "epoch": 1.2404015819896563, "grad_norm": 2.1027939319610596, "learning_rate": 6.539401349465102e-06, "loss": 0.4009, "step": 10193 }, { "epoch": 1.2405232735016734, "grad_norm": 2.545494794845581, "learning_rate": 6.537571249973487e-06, "loss": 0.3337, "step": 10194 }, { "epoch": 1.2406449650136904, "grad_norm": 1.5310806035995483, "learning_rate": 6.5357412822344515e-06, "loss": 0.3811, "step": 10195 }, { "epoch": 1.2407666565257074, "grad_norm": 1.6960728168487549, "learning_rate": 6.533911446317635e-06, "loss": 0.3441, "step": 10196 }, { "epoch": 1.2408883480377244, "grad_norm": 1.7053056955337524, "learning_rate": 6.532081742292661e-06, "loss": 0.3558, "step": 10197 }, { "epoch": 1.2410100395497414, "grad_norm": 3.8674912452697754, "learning_rate": 6.530252170229152e-06, "loss": 0.4513, "step": 10198 }, { "epoch": 1.2411317310617584, "grad_norm": 1.6030267477035522, "learning_rate": 6.52842273019673e-06, "loss": 0.3747, "step": 10199 }, { "epoch": 1.2412534225737755, "grad_norm": 1.641355276107788, "learning_rate": 6.52659342226501e-06, "loss": 0.3494, "step": 10200 }, { "epoch": 1.2413751140857925, "grad_norm": 1.4010977745056152, "learning_rate": 6.524764246503601e-06, "loss": 0.3742, "step": 10201 }, { "epoch": 1.2414968055978095, "grad_norm": 1.8821971416473389, "learning_rate": 6.522935202982104e-06, "loss": 0.3698, "step": 10202 }, { "epoch": 1.2416184971098265, "grad_norm": 1.9448819160461426, "learning_rate": 6.521106291770118e-06, "loss": 0.3454, "step": 10203 }, { "epoch": 1.2417401886218435, "grad_norm": 2.2565510272979736, "learning_rate": 6.519277512937243e-06, "loss": 0.3388, "step": 10204 }, { "epoch": 1.2418618801338606, "grad_norm": 2.1581931114196777, "learning_rate": 6.51744886655306e-06, "loss": 0.3958, "step": 10205 }, { "epoch": 1.2419835716458776, "grad_norm": 2.816659450531006, "learning_rate": 6.515620352687157e-06, "loss": 0.311, "step": 10206 }, { "epoch": 1.2421052631578948, "grad_norm": 1.8190574645996094, "learning_rate": 6.5137919714091135e-06, "loss": 0.4218, "step": 10207 }, { "epoch": 1.2422269546699118, "grad_norm": 1.860929250717163, "learning_rate": 6.511963722788501e-06, "loss": 0.393, "step": 10208 }, { "epoch": 1.2423486461819289, "grad_norm": 1.3672665357589722, "learning_rate": 6.51013560689489e-06, "loss": 0.3877, "step": 10209 }, { "epoch": 1.2424703376939459, "grad_norm": 2.0359270572662354, "learning_rate": 6.508307623797838e-06, "loss": 0.3544, "step": 10210 }, { "epoch": 1.242592029205963, "grad_norm": 3.789458990097046, "learning_rate": 6.506479773566912e-06, "loss": 0.433, "step": 10211 }, { "epoch": 1.24271372071798, "grad_norm": 1.888265609741211, "learning_rate": 6.504652056271661e-06, "loss": 0.4067, "step": 10212 }, { "epoch": 1.242835412229997, "grad_norm": 2.016385316848755, "learning_rate": 6.502824471981633e-06, "loss": 0.3379, "step": 10213 }, { "epoch": 1.242957103742014, "grad_norm": 2.9620585441589355, "learning_rate": 6.500997020766373e-06, "loss": 0.3188, "step": 10214 }, { "epoch": 1.243078795254031, "grad_norm": 1.5754475593566895, "learning_rate": 6.49916970269542e-06, "loss": 0.402, "step": 10215 }, { "epoch": 1.243200486766048, "grad_norm": 3.2240493297576904, "learning_rate": 6.4973425178383055e-06, "loss": 0.3597, "step": 10216 }, { "epoch": 1.243322178278065, "grad_norm": 1.8452606201171875, "learning_rate": 6.495515466264557e-06, "loss": 0.3952, "step": 10217 }, { "epoch": 1.2434438697900823, "grad_norm": 1.7084141969680786, "learning_rate": 6.4936885480437e-06, "loss": 0.397, "step": 10218 }, { "epoch": 1.2435655613020993, "grad_norm": 1.5936448574066162, "learning_rate": 6.491861763245255e-06, "loss": 0.3668, "step": 10219 }, { "epoch": 1.2436872528141163, "grad_norm": 3.834557294845581, "learning_rate": 6.49003511193873e-06, "loss": 0.3288, "step": 10220 }, { "epoch": 1.2438089443261333, "grad_norm": 2.0085062980651855, "learning_rate": 6.4882085941936305e-06, "loss": 0.3986, "step": 10221 }, { "epoch": 1.2439306358381503, "grad_norm": 2.1349685192108154, "learning_rate": 6.486382210079465e-06, "loss": 0.3752, "step": 10222 }, { "epoch": 1.2440523273501674, "grad_norm": 1.5375348329544067, "learning_rate": 6.48455595966573e-06, "loss": 0.3818, "step": 10223 }, { "epoch": 1.2441740188621844, "grad_norm": 2.064540147781372, "learning_rate": 6.482729843021916e-06, "loss": 0.3664, "step": 10224 }, { "epoch": 1.2442957103742014, "grad_norm": 1.913798213005066, "learning_rate": 6.48090386021751e-06, "loss": 0.3562, "step": 10225 }, { "epoch": 1.2444174018862184, "grad_norm": 1.8298752307891846, "learning_rate": 6.479078011321997e-06, "loss": 0.3599, "step": 10226 }, { "epoch": 1.2445390933982354, "grad_norm": 2.9885013103485107, "learning_rate": 6.477252296404857e-06, "loss": 0.3852, "step": 10227 }, { "epoch": 1.2446607849102524, "grad_norm": 1.758154034614563, "learning_rate": 6.4754267155355555e-06, "loss": 0.3362, "step": 10228 }, { "epoch": 1.2447824764222695, "grad_norm": 1.5940898656845093, "learning_rate": 6.473601268783564e-06, "loss": 0.3527, "step": 10229 }, { "epoch": 1.2449041679342865, "grad_norm": 2.102858543395996, "learning_rate": 6.471775956218345e-06, "loss": 0.4091, "step": 10230 }, { "epoch": 1.2450258594463035, "grad_norm": 1.4504879713058472, "learning_rate": 6.4699507779093555e-06, "loss": 0.3457, "step": 10231 }, { "epoch": 1.2451475509583207, "grad_norm": 2.159846544265747, "learning_rate": 6.4681257339260425e-06, "loss": 0.3592, "step": 10232 }, { "epoch": 1.2452692424703378, "grad_norm": 1.8087475299835205, "learning_rate": 6.466300824337854e-06, "loss": 0.3693, "step": 10233 }, { "epoch": 1.2453909339823548, "grad_norm": 1.5439751148223877, "learning_rate": 6.464476049214235e-06, "loss": 0.4035, "step": 10234 }, { "epoch": 1.2455126254943718, "grad_norm": 2.1327357292175293, "learning_rate": 6.462651408624623e-06, "loss": 0.3915, "step": 10235 }, { "epoch": 1.2456343170063888, "grad_norm": 1.6450012922286987, "learning_rate": 6.460826902638444e-06, "loss": 0.4, "step": 10236 }, { "epoch": 1.2457560085184058, "grad_norm": 1.636935830116272, "learning_rate": 6.459002531325127e-06, "loss": 0.396, "step": 10237 }, { "epoch": 1.2458777000304229, "grad_norm": 2.4607183933258057, "learning_rate": 6.4571782947540965e-06, "loss": 0.363, "step": 10238 }, { "epoch": 1.2459993915424399, "grad_norm": 1.7880399227142334, "learning_rate": 6.4553541929947605e-06, "loss": 0.3323, "step": 10239 }, { "epoch": 1.246121083054457, "grad_norm": 1.579453468322754, "learning_rate": 6.453530226116536e-06, "loss": 0.3864, "step": 10240 }, { "epoch": 1.246242774566474, "grad_norm": 3.1174049377441406, "learning_rate": 6.451706394188829e-06, "loss": 0.4405, "step": 10241 }, { "epoch": 1.246364466078491, "grad_norm": 2.23661470413208, "learning_rate": 6.449882697281038e-06, "loss": 0.398, "step": 10242 }, { "epoch": 1.2464861575905082, "grad_norm": 3.240889072418213, "learning_rate": 6.44805913546256e-06, "loss": 0.2665, "step": 10243 }, { "epoch": 1.2466078491025252, "grad_norm": 1.862520694732666, "learning_rate": 6.446235708802782e-06, "loss": 0.3625, "step": 10244 }, { "epoch": 1.2467295406145422, "grad_norm": 1.716979742050171, "learning_rate": 6.444412417371089e-06, "loss": 0.3963, "step": 10245 }, { "epoch": 1.2468512321265592, "grad_norm": 1.7534376382827759, "learning_rate": 6.442589261236867e-06, "loss": 0.3965, "step": 10246 }, { "epoch": 1.2469729236385763, "grad_norm": 1.667065143585205, "learning_rate": 6.440766240469485e-06, "loss": 0.3614, "step": 10247 }, { "epoch": 1.2470946151505933, "grad_norm": 1.7638033628463745, "learning_rate": 6.438943355138313e-06, "loss": 0.3301, "step": 10248 }, { "epoch": 1.2472163066626103, "grad_norm": 1.2614213228225708, "learning_rate": 6.437120605312721e-06, "loss": 0.3725, "step": 10249 }, { "epoch": 1.2473379981746273, "grad_norm": 1.9589706659317017, "learning_rate": 6.435297991062061e-06, "loss": 0.3194, "step": 10250 }, { "epoch": 1.2474596896866443, "grad_norm": 1.2972359657287598, "learning_rate": 6.433475512455694e-06, "loss": 0.3961, "step": 10251 }, { "epoch": 1.2475813811986614, "grad_norm": 1.3767954111099243, "learning_rate": 6.431653169562964e-06, "loss": 0.3558, "step": 10252 }, { "epoch": 1.2477030727106784, "grad_norm": 3.4938995838165283, "learning_rate": 6.429830962453219e-06, "loss": 0.4091, "step": 10253 }, { "epoch": 1.2478247642226954, "grad_norm": 1.1383659839630127, "learning_rate": 6.428008891195798e-06, "loss": 0.298, "step": 10254 }, { "epoch": 1.2479464557347124, "grad_norm": 2.4070913791656494, "learning_rate": 6.426186955860028e-06, "loss": 0.3771, "step": 10255 }, { "epoch": 1.2480681472467294, "grad_norm": 2.375009536743164, "learning_rate": 6.424365156515242e-06, "loss": 0.437, "step": 10256 }, { "epoch": 1.2481898387587467, "grad_norm": 3.548109769821167, "learning_rate": 6.422543493230765e-06, "loss": 0.4835, "step": 10257 }, { "epoch": 1.2483115302707637, "grad_norm": 2.1627519130706787, "learning_rate": 6.4207219660759116e-06, "loss": 0.3335, "step": 10258 }, { "epoch": 1.2484332217827807, "grad_norm": 1.7186542749404907, "learning_rate": 6.4189005751199955e-06, "loss": 0.3683, "step": 10259 }, { "epoch": 1.2485549132947977, "grad_norm": 2.8404996395111084, "learning_rate": 6.417079320432324e-06, "loss": 0.4033, "step": 10260 }, { "epoch": 1.2486766048068147, "grad_norm": 2.7294185161590576, "learning_rate": 6.415258202082204e-06, "loss": 0.4078, "step": 10261 }, { "epoch": 1.2487982963188318, "grad_norm": 1.9257264137268066, "learning_rate": 6.413437220138927e-06, "loss": 0.3943, "step": 10262 }, { "epoch": 1.2489199878308488, "grad_norm": 2.3244078159332275, "learning_rate": 6.411616374671788e-06, "loss": 0.4044, "step": 10263 }, { "epoch": 1.2490416793428658, "grad_norm": 3.294506311416626, "learning_rate": 6.409795665750074e-06, "loss": 0.3042, "step": 10264 }, { "epoch": 1.2491633708548828, "grad_norm": 2.544189214706421, "learning_rate": 6.407975093443065e-06, "loss": 0.3241, "step": 10265 }, { "epoch": 1.2492850623668998, "grad_norm": 2.8588218688964844, "learning_rate": 6.406154657820043e-06, "loss": 0.3496, "step": 10266 }, { "epoch": 1.2494067538789169, "grad_norm": 2.1826016902923584, "learning_rate": 6.404334358950271e-06, "loss": 0.334, "step": 10267 }, { "epoch": 1.249528445390934, "grad_norm": 1.4517606496810913, "learning_rate": 6.4025141969030205e-06, "loss": 0.3756, "step": 10268 }, { "epoch": 1.2496501369029511, "grad_norm": 1.3809586763381958, "learning_rate": 6.400694171747552e-06, "loss": 0.3607, "step": 10269 }, { "epoch": 1.2497718284149681, "grad_norm": 2.6558356285095215, "learning_rate": 6.398874283553118e-06, "loss": 0.3177, "step": 10270 }, { "epoch": 1.2498935199269852, "grad_norm": 1.7323626279830933, "learning_rate": 6.397054532388972e-06, "loss": 0.3724, "step": 10271 }, { "epoch": 1.2500152114390022, "grad_norm": 2.186450719833374, "learning_rate": 6.39523491832436e-06, "loss": 0.4119, "step": 10272 }, { "epoch": 1.2501369029510192, "grad_norm": 5.2563862800598145, "learning_rate": 6.39341544142852e-06, "loss": 0.471, "step": 10273 }, { "epoch": 1.2502585944630362, "grad_norm": 1.3286327123641968, "learning_rate": 6.391596101770687e-06, "loss": 0.3021, "step": 10274 }, { "epoch": 1.2503802859750532, "grad_norm": 1.8804073333740234, "learning_rate": 6.389776899420094e-06, "loss": 0.3226, "step": 10275 }, { "epoch": 1.2505019774870703, "grad_norm": 2.5674660205841064, "learning_rate": 6.387957834445959e-06, "loss": 0.4329, "step": 10276 }, { "epoch": 1.2506236689990873, "grad_norm": 1.4156824350357056, "learning_rate": 6.386138906917512e-06, "loss": 0.3409, "step": 10277 }, { "epoch": 1.2507453605111043, "grad_norm": 1.4968140125274658, "learning_rate": 6.3843201169039524e-06, "loss": 0.3516, "step": 10278 }, { "epoch": 1.2508670520231213, "grad_norm": 1.9649887084960938, "learning_rate": 6.382501464474499e-06, "loss": 0.3675, "step": 10279 }, { "epoch": 1.2509887435351383, "grad_norm": 1.5103336572647095, "learning_rate": 6.380682949698354e-06, "loss": 0.3452, "step": 10280 }, { "epoch": 1.2511104350471554, "grad_norm": 1.548187017440796, "learning_rate": 6.378864572644712e-06, "loss": 0.3453, "step": 10281 }, { "epoch": 1.2512321265591724, "grad_norm": 2.288419485092163, "learning_rate": 6.377046333382768e-06, "loss": 0.4032, "step": 10282 }, { "epoch": 1.2513538180711896, "grad_norm": 2.0537350177764893, "learning_rate": 6.375228231981711e-06, "loss": 0.4204, "step": 10283 }, { "epoch": 1.2514755095832066, "grad_norm": 1.1866052150726318, "learning_rate": 6.373410268510722e-06, "loss": 0.3567, "step": 10284 }, { "epoch": 1.2515972010952237, "grad_norm": 2.002417802810669, "learning_rate": 6.371592443038981e-06, "loss": 0.394, "step": 10285 }, { "epoch": 1.2517188926072407, "grad_norm": 2.173264741897583, "learning_rate": 6.369774755635655e-06, "loss": 0.3891, "step": 10286 }, { "epoch": 1.2518405841192577, "grad_norm": 1.8141224384307861, "learning_rate": 6.367957206369917e-06, "loss": 0.3315, "step": 10287 }, { "epoch": 1.2519622756312747, "grad_norm": 2.7340219020843506, "learning_rate": 6.3661397953109225e-06, "loss": 0.3624, "step": 10288 }, { "epoch": 1.2520839671432917, "grad_norm": 2.2916109561920166, "learning_rate": 6.364322522527835e-06, "loss": 0.3399, "step": 10289 }, { "epoch": 1.2522056586553088, "grad_norm": 1.6363352537155151, "learning_rate": 6.362505388089797e-06, "loss": 0.3806, "step": 10290 }, { "epoch": 1.2523273501673258, "grad_norm": 1.769532322883606, "learning_rate": 6.360688392065959e-06, "loss": 0.357, "step": 10291 }, { "epoch": 1.2524490416793428, "grad_norm": 2.0367841720581055, "learning_rate": 6.35887153452546e-06, "loss": 0.4071, "step": 10292 }, { "epoch": 1.25257073319136, "grad_norm": 1.7407128810882568, "learning_rate": 6.357054815537435e-06, "loss": 0.4159, "step": 10293 }, { "epoch": 1.252692424703377, "grad_norm": 1.7790228128433228, "learning_rate": 6.355238235171015e-06, "loss": 0.3962, "step": 10294 }, { "epoch": 1.252814116215394, "grad_norm": 3.8156418800354004, "learning_rate": 6.353421793495327e-06, "loss": 0.499, "step": 10295 }, { "epoch": 1.252935807727411, "grad_norm": 2.255209445953369, "learning_rate": 6.351605490579486e-06, "loss": 0.4571, "step": 10296 }, { "epoch": 1.253057499239428, "grad_norm": 1.682839274406433, "learning_rate": 6.349789326492607e-06, "loss": 0.3459, "step": 10297 }, { "epoch": 1.2531791907514451, "grad_norm": 1.8207453489303589, "learning_rate": 6.347973301303802e-06, "loss": 0.4251, "step": 10298 }, { "epoch": 1.2533008822634621, "grad_norm": 1.597184419631958, "learning_rate": 6.34615741508217e-06, "loss": 0.3675, "step": 10299 }, { "epoch": 1.2534225737754792, "grad_norm": 2.73740553855896, "learning_rate": 6.3443416678968165e-06, "loss": 0.4034, "step": 10300 }, { "epoch": 1.2535442652874962, "grad_norm": 2.7080307006835938, "learning_rate": 6.342526059816822e-06, "loss": 0.3327, "step": 10301 }, { "epoch": 1.2536659567995132, "grad_norm": 1.590369701385498, "learning_rate": 6.340710590911285e-06, "loss": 0.3874, "step": 10302 }, { "epoch": 1.2537876483115302, "grad_norm": 1.3904131650924683, "learning_rate": 6.338895261249285e-06, "loss": 0.3501, "step": 10303 }, { "epoch": 1.2539093398235472, "grad_norm": 2.322983980178833, "learning_rate": 6.337080070899897e-06, "loss": 0.3466, "step": 10304 }, { "epoch": 1.2540310313355643, "grad_norm": 1.473681092262268, "learning_rate": 6.335265019932193e-06, "loss": 0.3856, "step": 10305 }, { "epoch": 1.2541527228475813, "grad_norm": 1.3059062957763672, "learning_rate": 6.333450108415243e-06, "loss": 0.3513, "step": 10306 }, { "epoch": 1.2542744143595983, "grad_norm": 1.4276163578033447, "learning_rate": 6.331635336418104e-06, "loss": 0.3417, "step": 10307 }, { "epoch": 1.2543961058716153, "grad_norm": 2.6507444381713867, "learning_rate": 6.329820704009833e-06, "loss": 0.4115, "step": 10308 }, { "epoch": 1.2545177973836326, "grad_norm": 1.5199767351150513, "learning_rate": 6.3280062112594835e-06, "loss": 0.3512, "step": 10309 }, { "epoch": 1.2546394888956496, "grad_norm": 2.149470329284668, "learning_rate": 6.326191858236097e-06, "loss": 0.3676, "step": 10310 }, { "epoch": 1.2547611804076666, "grad_norm": 1.625446081161499, "learning_rate": 6.3243776450087135e-06, "loss": 0.3881, "step": 10311 }, { "epoch": 1.2548828719196836, "grad_norm": 2.1296708583831787, "learning_rate": 6.322563571646373e-06, "loss": 0.4005, "step": 10312 }, { "epoch": 1.2550045634317006, "grad_norm": 1.544885277748108, "learning_rate": 6.320749638218097e-06, "loss": 0.3482, "step": 10313 }, { "epoch": 1.2551262549437177, "grad_norm": 1.4518001079559326, "learning_rate": 6.318935844792915e-06, "loss": 0.3343, "step": 10314 }, { "epoch": 1.2552479464557347, "grad_norm": 2.0721778869628906, "learning_rate": 6.317122191439839e-06, "loss": 0.4879, "step": 10315 }, { "epoch": 1.2553696379677517, "grad_norm": 2.368177890777588, "learning_rate": 6.31530867822789e-06, "loss": 0.4587, "step": 10316 }, { "epoch": 1.2554913294797687, "grad_norm": 2.0135738849639893, "learning_rate": 6.313495305226074e-06, "loss": 0.3674, "step": 10317 }, { "epoch": 1.255613020991786, "grad_norm": 2.9008350372314453, "learning_rate": 6.3116820725033885e-06, "loss": 0.4191, "step": 10318 }, { "epoch": 1.255734712503803, "grad_norm": 1.9180659055709839, "learning_rate": 6.309868980128837e-06, "loss": 0.3526, "step": 10319 }, { "epoch": 1.25585640401582, "grad_norm": 2.159235715866089, "learning_rate": 6.308056028171407e-06, "loss": 0.369, "step": 10320 }, { "epoch": 1.255978095527837, "grad_norm": 1.8591614961624146, "learning_rate": 6.30624321670009e-06, "loss": 0.372, "step": 10321 }, { "epoch": 1.256099787039854, "grad_norm": 3.3553457260131836, "learning_rate": 6.3044305457838615e-06, "loss": 0.3147, "step": 10322 }, { "epoch": 1.256221478551871, "grad_norm": 1.7500940561294556, "learning_rate": 6.3026180154917046e-06, "loss": 0.389, "step": 10323 }, { "epoch": 1.256343170063888, "grad_norm": 2.3331050872802734, "learning_rate": 6.300805625892581e-06, "loss": 0.3456, "step": 10324 }, { "epoch": 1.256464861575905, "grad_norm": 1.8292016983032227, "learning_rate": 6.298993377055463e-06, "loss": 0.3568, "step": 10325 }, { "epoch": 1.256586553087922, "grad_norm": 1.286298155784607, "learning_rate": 6.297181269049306e-06, "loss": 0.3377, "step": 10326 }, { "epoch": 1.2567082445999391, "grad_norm": 1.907997488975525, "learning_rate": 6.295369301943065e-06, "loss": 0.363, "step": 10327 }, { "epoch": 1.2568299361119561, "grad_norm": 1.6424189805984497, "learning_rate": 6.29355747580569e-06, "loss": 0.3547, "step": 10328 }, { "epoch": 1.2569516276239732, "grad_norm": 3.8504080772399902, "learning_rate": 6.291745790706129e-06, "loss": 0.4063, "step": 10329 }, { "epoch": 1.2570733191359902, "grad_norm": 3.5502266883850098, "learning_rate": 6.2899342467133115e-06, "loss": 0.4361, "step": 10330 }, { "epoch": 1.2571950106480072, "grad_norm": 1.7539945840835571, "learning_rate": 6.288122843896179e-06, "loss": 0.3525, "step": 10331 }, { "epoch": 1.2573167021600242, "grad_norm": 1.3641618490219116, "learning_rate": 6.286311582323654e-06, "loss": 0.3734, "step": 10332 }, { "epoch": 1.2574383936720412, "grad_norm": 1.7663099765777588, "learning_rate": 6.28450046206466e-06, "loss": 0.375, "step": 10333 }, { "epoch": 1.2575600851840585, "grad_norm": 1.6508175134658813, "learning_rate": 6.282689483188113e-06, "loss": 0.3093, "step": 10334 }, { "epoch": 1.2576817766960755, "grad_norm": 1.401906132698059, "learning_rate": 6.28087864576293e-06, "loss": 0.4104, "step": 10335 }, { "epoch": 1.2578034682080925, "grad_norm": 1.5382647514343262, "learning_rate": 6.279067949858009e-06, "loss": 0.3662, "step": 10336 }, { "epoch": 1.2579251597201095, "grad_norm": 1.4446736574172974, "learning_rate": 6.277257395542256e-06, "loss": 0.2971, "step": 10337 }, { "epoch": 1.2580468512321266, "grad_norm": 1.5693936347961426, "learning_rate": 6.275446982884563e-06, "loss": 0.4226, "step": 10338 }, { "epoch": 1.2581685427441436, "grad_norm": 3.176318645477295, "learning_rate": 6.273636711953821e-06, "loss": 0.456, "step": 10339 }, { "epoch": 1.2582902342561606, "grad_norm": 2.546290874481201, "learning_rate": 6.271826582818918e-06, "loss": 0.379, "step": 10340 }, { "epoch": 1.2584119257681776, "grad_norm": 2.663435459136963, "learning_rate": 6.270016595548729e-06, "loss": 0.4254, "step": 10341 }, { "epoch": 1.2585336172801946, "grad_norm": 2.669820785522461, "learning_rate": 6.268206750212129e-06, "loss": 0.3611, "step": 10342 }, { "epoch": 1.2586553087922119, "grad_norm": 1.6334837675094604, "learning_rate": 6.266397046877988e-06, "loss": 0.3197, "step": 10343 }, { "epoch": 1.258777000304229, "grad_norm": 1.5402953624725342, "learning_rate": 6.264587485615166e-06, "loss": 0.3859, "step": 10344 }, { "epoch": 1.258898691816246, "grad_norm": 1.406273365020752, "learning_rate": 6.262778066492521e-06, "loss": 0.2967, "step": 10345 }, { "epoch": 1.259020383328263, "grad_norm": 1.5371147394180298, "learning_rate": 6.260968789578911e-06, "loss": 0.3625, "step": 10346 }, { "epoch": 1.25914207484028, "grad_norm": 2.012908697128296, "learning_rate": 6.259159654943173e-06, "loss": 0.3578, "step": 10347 }, { "epoch": 1.259263766352297, "grad_norm": 2.0378217697143555, "learning_rate": 6.2573506626541555e-06, "loss": 0.3543, "step": 10348 }, { "epoch": 1.259385457864314, "grad_norm": 1.4626818895339966, "learning_rate": 6.255541812780692e-06, "loss": 0.3738, "step": 10349 }, { "epoch": 1.259507149376331, "grad_norm": 2.4697868824005127, "learning_rate": 6.2537331053916105e-06, "loss": 0.3798, "step": 10350 }, { "epoch": 1.259628840888348, "grad_norm": 3.879641532897949, "learning_rate": 6.2519245405557424e-06, "loss": 0.4534, "step": 10351 }, { "epoch": 1.259750532400365, "grad_norm": 2.0256338119506836, "learning_rate": 6.250116118341901e-06, "loss": 0.3961, "step": 10352 }, { "epoch": 1.259872223912382, "grad_norm": 1.9154810905456543, "learning_rate": 6.248307838818904e-06, "loss": 0.4165, "step": 10353 }, { "epoch": 1.259993915424399, "grad_norm": 1.7380759716033936, "learning_rate": 6.246499702055559e-06, "loss": 0.4114, "step": 10354 }, { "epoch": 1.260115606936416, "grad_norm": 1.822481393814087, "learning_rate": 6.2446917081206705e-06, "loss": 0.3156, "step": 10355 }, { "epoch": 1.2602372984484331, "grad_norm": 1.9142069816589355, "learning_rate": 6.242883857083034e-06, "loss": 0.4287, "step": 10356 }, { "epoch": 1.2603589899604501, "grad_norm": 2.1088061332702637, "learning_rate": 6.241076149011444e-06, "loss": 0.4112, "step": 10357 }, { "epoch": 1.2604806814724672, "grad_norm": 2.0033791065216064, "learning_rate": 6.239268583974692e-06, "loss": 0.3466, "step": 10358 }, { "epoch": 1.2606023729844844, "grad_norm": 2.1132278442382812, "learning_rate": 6.2374611620415516e-06, "loss": 0.4509, "step": 10359 }, { "epoch": 1.2607240644965014, "grad_norm": 1.5643467903137207, "learning_rate": 6.235653883280799e-06, "loss": 0.4395, "step": 10360 }, { "epoch": 1.2608457560085184, "grad_norm": 1.7210465669631958, "learning_rate": 6.23384674776121e-06, "loss": 0.3896, "step": 10361 }, { "epoch": 1.2609674475205355, "grad_norm": 2.4575791358947754, "learning_rate": 6.2320397555515465e-06, "loss": 0.3392, "step": 10362 }, { "epoch": 1.2610891390325525, "grad_norm": 1.9225492477416992, "learning_rate": 6.230232906720572e-06, "loss": 0.3109, "step": 10363 }, { "epoch": 1.2612108305445695, "grad_norm": 1.8778384923934937, "learning_rate": 6.2284262013370344e-06, "loss": 0.3385, "step": 10364 }, { "epoch": 1.2613325220565865, "grad_norm": 2.1595747470855713, "learning_rate": 6.226619639469689e-06, "loss": 0.3921, "step": 10365 }, { "epoch": 1.2614542135686035, "grad_norm": 2.5503785610198975, "learning_rate": 6.224813221187278e-06, "loss": 0.3422, "step": 10366 }, { "epoch": 1.2615759050806206, "grad_norm": 1.3293473720550537, "learning_rate": 6.223006946558536e-06, "loss": 0.3688, "step": 10367 }, { "epoch": 1.2616975965926376, "grad_norm": 2.0430235862731934, "learning_rate": 6.2212008156521975e-06, "loss": 0.3498, "step": 10368 }, { "epoch": 1.2618192881046548, "grad_norm": 1.8556219339370728, "learning_rate": 6.219394828536996e-06, "loss": 0.3842, "step": 10369 }, { "epoch": 1.2619409796166718, "grad_norm": 1.4079780578613281, "learning_rate": 6.21758898528164e-06, "loss": 0.3325, "step": 10370 }, { "epoch": 1.2620626711286889, "grad_norm": 1.5446381568908691, "learning_rate": 6.215783285954855e-06, "loss": 0.3633, "step": 10371 }, { "epoch": 1.2621843626407059, "grad_norm": 2.038118600845337, "learning_rate": 6.213977730625347e-06, "loss": 0.3862, "step": 10372 }, { "epoch": 1.262306054152723, "grad_norm": 1.4054160118103027, "learning_rate": 6.2121723193618225e-06, "loss": 0.3166, "step": 10373 }, { "epoch": 1.26242774566474, "grad_norm": 3.0983309745788574, "learning_rate": 6.210367052232984e-06, "loss": 0.4216, "step": 10374 }, { "epoch": 1.262549437176757, "grad_norm": 1.440018653869629, "learning_rate": 6.208561929307521e-06, "loss": 0.368, "step": 10375 }, { "epoch": 1.262671128688774, "grad_norm": 2.5499024391174316, "learning_rate": 6.206756950654125e-06, "loss": 0.3805, "step": 10376 }, { "epoch": 1.262792820200791, "grad_norm": 1.522775411605835, "learning_rate": 6.204952116341481e-06, "loss": 0.3586, "step": 10377 }, { "epoch": 1.262914511712808, "grad_norm": 2.750680446624756, "learning_rate": 6.203147426438261e-06, "loss": 0.441, "step": 10378 }, { "epoch": 1.263036203224825, "grad_norm": 2.937898635864258, "learning_rate": 6.20134288101314e-06, "loss": 0.3937, "step": 10379 }, { "epoch": 1.263157894736842, "grad_norm": 1.5787930488586426, "learning_rate": 6.199538480134786e-06, "loss": 0.381, "step": 10380 }, { "epoch": 1.263279586248859, "grad_norm": 1.8676972389221191, "learning_rate": 6.197734223871864e-06, "loss": 0.3818, "step": 10381 }, { "epoch": 1.263401277760876, "grad_norm": 1.419344186782837, "learning_rate": 6.195930112293023e-06, "loss": 0.4017, "step": 10382 }, { "epoch": 1.263522969272893, "grad_norm": 1.6382222175598145, "learning_rate": 6.1941261454669125e-06, "loss": 0.4107, "step": 10383 }, { "epoch": 1.2636446607849103, "grad_norm": 1.8934613466262817, "learning_rate": 6.192322323462181e-06, "loss": 0.4257, "step": 10384 }, { "epoch": 1.2637663522969274, "grad_norm": 1.9939029216766357, "learning_rate": 6.190518646347468e-06, "loss": 0.3989, "step": 10385 }, { "epoch": 1.2638880438089444, "grad_norm": 1.7058422565460205, "learning_rate": 6.1887151141914025e-06, "loss": 0.4125, "step": 10386 }, { "epoch": 1.2640097353209614, "grad_norm": 2.658456325531006, "learning_rate": 6.186911727062617e-06, "loss": 0.3489, "step": 10387 }, { "epoch": 1.2641314268329784, "grad_norm": 3.096217632293701, "learning_rate": 6.185108485029731e-06, "loss": 0.3478, "step": 10388 }, { "epoch": 1.2642531183449954, "grad_norm": 1.8618708848953247, "learning_rate": 6.183305388161369e-06, "loss": 0.3766, "step": 10389 }, { "epoch": 1.2643748098570124, "grad_norm": 1.5148802995681763, "learning_rate": 6.181502436526132e-06, "loss": 0.3652, "step": 10390 }, { "epoch": 1.2644965013690295, "grad_norm": 1.4201743602752686, "learning_rate": 6.179699630192634e-06, "loss": 0.3551, "step": 10391 }, { "epoch": 1.2646181928810465, "grad_norm": 2.339498996734619, "learning_rate": 6.177896969229476e-06, "loss": 0.4219, "step": 10392 }, { "epoch": 1.2647398843930635, "grad_norm": 4.591128349304199, "learning_rate": 6.176094453705246e-06, "loss": 0.4586, "step": 10393 }, { "epoch": 1.2648615759050807, "grad_norm": 1.3773913383483887, "learning_rate": 6.174292083688536e-06, "loss": 0.3465, "step": 10394 }, { "epoch": 1.2649832674170978, "grad_norm": 1.5853371620178223, "learning_rate": 6.17248985924793e-06, "loss": 0.3678, "step": 10395 }, { "epoch": 1.2651049589291148, "grad_norm": 1.7863986492156982, "learning_rate": 6.170687780452009e-06, "loss": 0.3967, "step": 10396 }, { "epoch": 1.2652266504411318, "grad_norm": 1.4064834117889404, "learning_rate": 6.168885847369346e-06, "loss": 0.4242, "step": 10397 }, { "epoch": 1.2653483419531488, "grad_norm": 2.5834035873413086, "learning_rate": 6.167084060068502e-06, "loss": 0.4319, "step": 10398 }, { "epoch": 1.2654700334651658, "grad_norm": 2.945024013519287, "learning_rate": 6.165282418618046e-06, "loss": 0.4173, "step": 10399 }, { "epoch": 1.2655917249771829, "grad_norm": 1.9413625001907349, "learning_rate": 6.163480923086534e-06, "loss": 0.4148, "step": 10400 }, { "epoch": 1.2657134164891999, "grad_norm": 1.3249505758285522, "learning_rate": 6.1616795735425095e-06, "loss": 0.3529, "step": 10401 }, { "epoch": 1.265835108001217, "grad_norm": 2.0023956298828125, "learning_rate": 6.159878370054523e-06, "loss": 0.3941, "step": 10402 }, { "epoch": 1.265956799513234, "grad_norm": 2.4751486778259277, "learning_rate": 6.158077312691116e-06, "loss": 0.3906, "step": 10403 }, { "epoch": 1.266078491025251, "grad_norm": 1.5486822128295898, "learning_rate": 6.156276401520818e-06, "loss": 0.3909, "step": 10404 }, { "epoch": 1.266200182537268, "grad_norm": 1.4864442348480225, "learning_rate": 6.15447563661216e-06, "loss": 0.3464, "step": 10405 }, { "epoch": 1.266321874049285, "grad_norm": 1.8442856073379517, "learning_rate": 6.152675018033662e-06, "loss": 0.3722, "step": 10406 }, { "epoch": 1.266443565561302, "grad_norm": 1.901624321937561, "learning_rate": 6.150874545853842e-06, "loss": 0.4188, "step": 10407 }, { "epoch": 1.266565257073319, "grad_norm": 1.8881466388702393, "learning_rate": 6.149074220141216e-06, "loss": 0.3879, "step": 10408 }, { "epoch": 1.266686948585336, "grad_norm": 1.4268056154251099, "learning_rate": 6.1472740409642854e-06, "loss": 0.3486, "step": 10409 }, { "epoch": 1.2668086400973533, "grad_norm": 2.9135031700134277, "learning_rate": 6.145474008391551e-06, "loss": 0.3641, "step": 10410 }, { "epoch": 1.2669303316093703, "grad_norm": 2.232487440109253, "learning_rate": 6.143674122491512e-06, "loss": 0.3718, "step": 10411 }, { "epoch": 1.2670520231213873, "grad_norm": 2.7165050506591797, "learning_rate": 6.141874383332651e-06, "loss": 0.3647, "step": 10412 }, { "epoch": 1.2671737146334043, "grad_norm": 1.9660296440124512, "learning_rate": 6.140074790983457e-06, "loss": 0.3844, "step": 10413 }, { "epoch": 1.2672954061454214, "grad_norm": 2.1090433597564697, "learning_rate": 6.138275345512407e-06, "loss": 0.4064, "step": 10414 }, { "epoch": 1.2674170976574384, "grad_norm": 2.349531650543213, "learning_rate": 6.136476046987977e-06, "loss": 0.3735, "step": 10415 }, { "epoch": 1.2675387891694554, "grad_norm": 1.6262054443359375, "learning_rate": 6.134676895478628e-06, "loss": 0.3674, "step": 10416 }, { "epoch": 1.2676604806814724, "grad_norm": 1.6123547554016113, "learning_rate": 6.132877891052823e-06, "loss": 0.3453, "step": 10417 }, { "epoch": 1.2677821721934894, "grad_norm": 1.8852487802505493, "learning_rate": 6.131079033779017e-06, "loss": 0.3866, "step": 10418 }, { "epoch": 1.2679038637055067, "grad_norm": 2.070927381515503, "learning_rate": 6.129280323725666e-06, "loss": 0.4192, "step": 10419 }, { "epoch": 1.2680255552175237, "grad_norm": 1.594740629196167, "learning_rate": 6.127481760961208e-06, "loss": 0.363, "step": 10420 }, { "epoch": 1.2681472467295407, "grad_norm": 2.4988176822662354, "learning_rate": 6.125683345554085e-06, "loss": 0.4495, "step": 10421 }, { "epoch": 1.2682689382415577, "grad_norm": 1.6801209449768066, "learning_rate": 6.123885077572729e-06, "loss": 0.2916, "step": 10422 }, { "epoch": 1.2683906297535747, "grad_norm": 2.5788590908050537, "learning_rate": 6.122086957085571e-06, "loss": 0.4272, "step": 10423 }, { "epoch": 1.2685123212655918, "grad_norm": 1.5483750104904175, "learning_rate": 6.120288984161029e-06, "loss": 0.3907, "step": 10424 }, { "epoch": 1.2686340127776088, "grad_norm": 1.8146661520004272, "learning_rate": 6.118491158867523e-06, "loss": 0.4039, "step": 10425 }, { "epoch": 1.2687557042896258, "grad_norm": 1.6989229917526245, "learning_rate": 6.1166934812734635e-06, "loss": 0.3721, "step": 10426 }, { "epoch": 1.2688773958016428, "grad_norm": 1.8849676847457886, "learning_rate": 6.114895951447259e-06, "loss": 0.3449, "step": 10427 }, { "epoch": 1.2689990873136598, "grad_norm": 2.7213823795318604, "learning_rate": 6.113098569457299e-06, "loss": 0.4148, "step": 10428 }, { "epoch": 1.2691207788256769, "grad_norm": 4.174452304840088, "learning_rate": 6.111301335371984e-06, "loss": 0.4482, "step": 10429 }, { "epoch": 1.2692424703376939, "grad_norm": 1.5713180303573608, "learning_rate": 6.109504249259703e-06, "loss": 0.3635, "step": 10430 }, { "epoch": 1.269364161849711, "grad_norm": 3.1803181171417236, "learning_rate": 6.107707311188841e-06, "loss": 0.4462, "step": 10431 }, { "epoch": 1.269485853361728, "grad_norm": 1.4651141166687012, "learning_rate": 6.1059105212277695e-06, "loss": 0.3747, "step": 10432 }, { "epoch": 1.269607544873745, "grad_norm": 1.3476706743240356, "learning_rate": 6.104113879444863e-06, "loss": 0.3531, "step": 10433 }, { "epoch": 1.269729236385762, "grad_norm": 1.4035331010818481, "learning_rate": 6.102317385908489e-06, "loss": 0.3541, "step": 10434 }, { "epoch": 1.2698509278977792, "grad_norm": 2.7053279876708984, "learning_rate": 6.100521040687005e-06, "loss": 0.3502, "step": 10435 }, { "epoch": 1.2699726194097962, "grad_norm": 2.0300729274749756, "learning_rate": 6.098724843848767e-06, "loss": 0.3674, "step": 10436 }, { "epoch": 1.2700943109218132, "grad_norm": 1.6558020114898682, "learning_rate": 6.096928795462125e-06, "loss": 0.3625, "step": 10437 }, { "epoch": 1.2702160024338303, "grad_norm": 3.5191328525543213, "learning_rate": 6.0951328955954225e-06, "loss": 0.468, "step": 10438 }, { "epoch": 1.2703376939458473, "grad_norm": 1.4504538774490356, "learning_rate": 6.093337144316995e-06, "loss": 0.382, "step": 10439 }, { "epoch": 1.2704593854578643, "grad_norm": 1.5944061279296875, "learning_rate": 6.091541541695174e-06, "loss": 0.4136, "step": 10440 }, { "epoch": 1.2705810769698813, "grad_norm": 2.0001142024993896, "learning_rate": 6.089746087798287e-06, "loss": 0.3918, "step": 10441 }, { "epoch": 1.2707027684818983, "grad_norm": 1.9967511892318726, "learning_rate": 6.087950782694657e-06, "loss": 0.3508, "step": 10442 }, { "epoch": 1.2708244599939154, "grad_norm": 1.6681797504425049, "learning_rate": 6.086155626452596e-06, "loss": 0.4393, "step": 10443 }, { "epoch": 1.2709461515059326, "grad_norm": 1.7271015644073486, "learning_rate": 6.084360619140414e-06, "loss": 0.4042, "step": 10444 }, { "epoch": 1.2710678430179496, "grad_norm": 1.9814927577972412, "learning_rate": 6.082565760826417e-06, "loss": 0.3668, "step": 10445 }, { "epoch": 1.2711895345299666, "grad_norm": 1.9423177242279053, "learning_rate": 6.0807710515789e-06, "loss": 0.3419, "step": 10446 }, { "epoch": 1.2713112260419837, "grad_norm": 1.3841135501861572, "learning_rate": 6.078976491466158e-06, "loss": 0.406, "step": 10447 }, { "epoch": 1.2714329175540007, "grad_norm": 1.5829485654830933, "learning_rate": 6.077182080556474e-06, "loss": 0.4288, "step": 10448 }, { "epoch": 1.2715546090660177, "grad_norm": 1.6783592700958252, "learning_rate": 6.0753878189181345e-06, "loss": 0.3824, "step": 10449 }, { "epoch": 1.2716763005780347, "grad_norm": 1.9294824600219727, "learning_rate": 6.073593706619416e-06, "loss": 0.3789, "step": 10450 }, { "epoch": 1.2717979920900517, "grad_norm": 2.5260190963745117, "learning_rate": 6.071799743728578e-06, "loss": 0.4571, "step": 10451 }, { "epoch": 1.2719196836020688, "grad_norm": 2.35833740234375, "learning_rate": 6.070005930313891e-06, "loss": 0.3704, "step": 10452 }, { "epoch": 1.2720413751140858, "grad_norm": 1.851613163948059, "learning_rate": 6.068212266443616e-06, "loss": 0.2806, "step": 10453 }, { "epoch": 1.2721630666261028, "grad_norm": 2.70015811920166, "learning_rate": 6.0664187521859994e-06, "loss": 0.4471, "step": 10454 }, { "epoch": 1.2722847581381198, "grad_norm": 1.429207444190979, "learning_rate": 6.064625387609292e-06, "loss": 0.3852, "step": 10455 }, { "epoch": 1.2724064496501368, "grad_norm": 1.3841800689697266, "learning_rate": 6.062832172781734e-06, "loss": 0.3861, "step": 10456 }, { "epoch": 1.2725281411621538, "grad_norm": 2.082512140274048, "learning_rate": 6.061039107771565e-06, "loss": 0.3865, "step": 10457 }, { "epoch": 1.2726498326741709, "grad_norm": 1.7528374195098877, "learning_rate": 6.0592461926470055e-06, "loss": 0.3884, "step": 10458 }, { "epoch": 1.2727715241861879, "grad_norm": 2.5518510341644287, "learning_rate": 6.057453427476289e-06, "loss": 0.3653, "step": 10459 }, { "epoch": 1.2728932156982051, "grad_norm": 1.5256962776184082, "learning_rate": 6.055660812327631e-06, "loss": 0.3882, "step": 10460 }, { "epoch": 1.2730149072102221, "grad_norm": 1.8758482933044434, "learning_rate": 6.053868347269245e-06, "loss": 0.3497, "step": 10461 }, { "epoch": 1.2731365987222392, "grad_norm": 1.7514450550079346, "learning_rate": 6.052076032369332e-06, "loss": 0.353, "step": 10462 }, { "epoch": 1.2732582902342562, "grad_norm": 1.7371220588684082, "learning_rate": 6.050283867696099e-06, "loss": 0.3624, "step": 10463 }, { "epoch": 1.2733799817462732, "grad_norm": 1.4457263946533203, "learning_rate": 6.04849185331774e-06, "loss": 0.3553, "step": 10464 }, { "epoch": 1.2735016732582902, "grad_norm": 3.211045503616333, "learning_rate": 6.046699989302446e-06, "loss": 0.4267, "step": 10465 }, { "epoch": 1.2736233647703072, "grad_norm": 1.5058374404907227, "learning_rate": 6.0449082757184e-06, "loss": 0.361, "step": 10466 }, { "epoch": 1.2737450562823243, "grad_norm": 2.124441385269165, "learning_rate": 6.043116712633778e-06, "loss": 0.3761, "step": 10467 }, { "epoch": 1.2738667477943413, "grad_norm": 1.8949488401412964, "learning_rate": 6.04132530011676e-06, "loss": 0.4325, "step": 10468 }, { "epoch": 1.2739884393063583, "grad_norm": 1.6625025272369385, "learning_rate": 6.039534038235505e-06, "loss": 0.4106, "step": 10469 }, { "epoch": 1.2741101308183755, "grad_norm": 1.4373219013214111, "learning_rate": 6.037742927058179e-06, "loss": 0.358, "step": 10470 }, { "epoch": 1.2742318223303926, "grad_norm": 1.883716344833374, "learning_rate": 6.035951966652936e-06, "loss": 0.405, "step": 10471 }, { "epoch": 1.2743535138424096, "grad_norm": 2.272364377975464, "learning_rate": 6.034161157087926e-06, "loss": 0.3156, "step": 10472 }, { "epoch": 1.2744752053544266, "grad_norm": 1.715686559677124, "learning_rate": 6.032370498431294e-06, "loss": 0.3643, "step": 10473 }, { "epoch": 1.2745968968664436, "grad_norm": 1.8858438730239868, "learning_rate": 6.030579990751175e-06, "loss": 0.4144, "step": 10474 }, { "epoch": 1.2747185883784606, "grad_norm": 1.255143642425537, "learning_rate": 6.028789634115704e-06, "loss": 0.3419, "step": 10475 }, { "epoch": 1.2748402798904777, "grad_norm": 1.8643040657043457, "learning_rate": 6.026999428593009e-06, "loss": 0.355, "step": 10476 }, { "epoch": 1.2749619714024947, "grad_norm": 2.9114835262298584, "learning_rate": 6.025209374251206e-06, "loss": 0.3684, "step": 10477 }, { "epoch": 1.2750836629145117, "grad_norm": 2.755558729171753, "learning_rate": 6.023419471158416e-06, "loss": 0.4078, "step": 10478 }, { "epoch": 1.2752053544265287, "grad_norm": 2.8887221813201904, "learning_rate": 6.021629719382746e-06, "loss": 0.3392, "step": 10479 }, { "epoch": 1.2753270459385457, "grad_norm": 1.4835281372070312, "learning_rate": 6.0198401189922995e-06, "loss": 0.3395, "step": 10480 }, { "epoch": 1.2754487374505628, "grad_norm": 1.3329451084136963, "learning_rate": 6.018050670055174e-06, "loss": 0.3471, "step": 10481 }, { "epoch": 1.2755704289625798, "grad_norm": 2.1320884227752686, "learning_rate": 6.016261372639464e-06, "loss": 0.3477, "step": 10482 }, { "epoch": 1.2756921204745968, "grad_norm": 1.4166489839553833, "learning_rate": 6.0144722268132546e-06, "loss": 0.3639, "step": 10483 }, { "epoch": 1.2758138119866138, "grad_norm": 2.165843963623047, "learning_rate": 6.01268323264463e-06, "loss": 0.336, "step": 10484 }, { "epoch": 1.275935503498631, "grad_norm": 2.06430721282959, "learning_rate": 6.010894390201658e-06, "loss": 0.381, "step": 10485 }, { "epoch": 1.276057195010648, "grad_norm": 1.5169594287872314, "learning_rate": 6.009105699552411e-06, "loss": 0.3166, "step": 10486 }, { "epoch": 1.276178886522665, "grad_norm": 1.3819568157196045, "learning_rate": 6.007317160764954e-06, "loss": 0.3702, "step": 10487 }, { "epoch": 1.276300578034682, "grad_norm": 2.10349178314209, "learning_rate": 6.005528773907343e-06, "loss": 0.3391, "step": 10488 }, { "epoch": 1.2764222695466991, "grad_norm": 2.059483289718628, "learning_rate": 6.003740539047629e-06, "loss": 0.2821, "step": 10489 }, { "epoch": 1.2765439610587161, "grad_norm": 2.134270429611206, "learning_rate": 6.001952456253859e-06, "loss": 0.3515, "step": 10490 }, { "epoch": 1.2766656525707332, "grad_norm": 2.585449457168579, "learning_rate": 6.000164525594076e-06, "loss": 0.3941, "step": 10491 }, { "epoch": 1.2767873440827502, "grad_norm": 1.4174909591674805, "learning_rate": 5.998376747136311e-06, "loss": 0.3334, "step": 10492 }, { "epoch": 1.2769090355947672, "grad_norm": 2.21463942527771, "learning_rate": 5.996589120948593e-06, "loss": 0.4005, "step": 10493 }, { "epoch": 1.2770307271067842, "grad_norm": 2.017909288406372, "learning_rate": 5.9948016470989465e-06, "loss": 0.3409, "step": 10494 }, { "epoch": 1.2771524186188015, "grad_norm": 2.3163602352142334, "learning_rate": 5.993014325655386e-06, "loss": 0.3779, "step": 10495 }, { "epoch": 1.2772741101308185, "grad_norm": 2.354531764984131, "learning_rate": 5.99122715668593e-06, "loss": 0.3876, "step": 10496 }, { "epoch": 1.2773958016428355, "grad_norm": 2.2206008434295654, "learning_rate": 5.989440140258573e-06, "loss": 0.3968, "step": 10497 }, { "epoch": 1.2775174931548525, "grad_norm": 1.627376914024353, "learning_rate": 5.987653276441321e-06, "loss": 0.4265, "step": 10498 }, { "epoch": 1.2776391846668695, "grad_norm": 2.0795376300811768, "learning_rate": 5.9858665653021675e-06, "loss": 0.4345, "step": 10499 }, { "epoch": 1.2777608761788866, "grad_norm": 1.858400583267212, "learning_rate": 5.984080006909099e-06, "loss": 0.3971, "step": 10500 }, { "epoch": 1.2778825676909036, "grad_norm": 2.803896188735962, "learning_rate": 5.982293601330099e-06, "loss": 0.4094, "step": 10501 }, { "epoch": 1.2780042592029206, "grad_norm": 1.7287530899047852, "learning_rate": 5.980507348633146e-06, "loss": 0.3817, "step": 10502 }, { "epoch": 1.2781259507149376, "grad_norm": 3.074690580368042, "learning_rate": 5.978721248886206e-06, "loss": 0.3365, "step": 10503 }, { "epoch": 1.2782476422269546, "grad_norm": 3.238543748855591, "learning_rate": 5.976935302157245e-06, "loss": 0.3338, "step": 10504 }, { "epoch": 1.2783693337389717, "grad_norm": 1.3183553218841553, "learning_rate": 5.9751495085142254e-06, "loss": 0.3598, "step": 10505 }, { "epoch": 1.2784910252509887, "grad_norm": 1.4870617389678955, "learning_rate": 5.973363868025096e-06, "loss": 0.4025, "step": 10506 }, { "epoch": 1.2786127167630057, "grad_norm": 3.533945083618164, "learning_rate": 5.9715783807578095e-06, "loss": 0.3561, "step": 10507 }, { "epoch": 1.2787344082750227, "grad_norm": 1.8328453302383423, "learning_rate": 5.9697930467803015e-06, "loss": 0.3965, "step": 10508 }, { "epoch": 1.2788560997870397, "grad_norm": 2.160099744796753, "learning_rate": 5.96800786616051e-06, "loss": 0.4167, "step": 10509 }, { "epoch": 1.2789777912990568, "grad_norm": 2.485539436340332, "learning_rate": 5.966222838966367e-06, "loss": 0.4213, "step": 10510 }, { "epoch": 1.279099482811074, "grad_norm": 3.2244744300842285, "learning_rate": 5.96443796526579e-06, "loss": 0.3162, "step": 10511 }, { "epoch": 1.279221174323091, "grad_norm": 1.4829498529434204, "learning_rate": 5.962653245126704e-06, "loss": 0.396, "step": 10512 }, { "epoch": 1.279342865835108, "grad_norm": 2.4302046298980713, "learning_rate": 5.96086867861702e-06, "loss": 0.3655, "step": 10513 }, { "epoch": 1.279464557347125, "grad_norm": 2.0691308975219727, "learning_rate": 5.959084265804643e-06, "loss": 0.3993, "step": 10514 }, { "epoch": 1.279586248859142, "grad_norm": 1.4534651041030884, "learning_rate": 5.957300006757472e-06, "loss": 0.3915, "step": 10515 }, { "epoch": 1.279707940371159, "grad_norm": 3.1938302516937256, "learning_rate": 5.955515901543404e-06, "loss": 0.3721, "step": 10516 }, { "epoch": 1.279829631883176, "grad_norm": 1.5802381038665771, "learning_rate": 5.953731950230331e-06, "loss": 0.3975, "step": 10517 }, { "epoch": 1.2799513233951931, "grad_norm": 1.2114176750183105, "learning_rate": 5.951948152886129e-06, "loss": 0.3343, "step": 10518 }, { "epoch": 1.2800730149072101, "grad_norm": 1.7845396995544434, "learning_rate": 5.950164509578682e-06, "loss": 0.4012, "step": 10519 }, { "epoch": 1.2801947064192274, "grad_norm": 1.4808484315872192, "learning_rate": 5.9483810203758555e-06, "loss": 0.3533, "step": 10520 }, { "epoch": 1.2803163979312444, "grad_norm": 1.723787784576416, "learning_rate": 5.946597685345519e-06, "loss": 0.3638, "step": 10521 }, { "epoch": 1.2804380894432614, "grad_norm": 1.3908257484436035, "learning_rate": 5.9448145045555294e-06, "loss": 0.3404, "step": 10522 }, { "epoch": 1.2805597809552784, "grad_norm": 1.4263622760772705, "learning_rate": 5.9430314780737416e-06, "loss": 0.3533, "step": 10523 }, { "epoch": 1.2806814724672955, "grad_norm": 1.8044443130493164, "learning_rate": 5.941248605968003e-06, "loss": 0.3934, "step": 10524 }, { "epoch": 1.2808031639793125, "grad_norm": 3.0128695964813232, "learning_rate": 5.939465888306159e-06, "loss": 0.4611, "step": 10525 }, { "epoch": 1.2809248554913295, "grad_norm": 1.506617546081543, "learning_rate": 5.93768332515604e-06, "loss": 0.4022, "step": 10526 }, { "epoch": 1.2810465470033465, "grad_norm": 2.5495035648345947, "learning_rate": 5.935900916585478e-06, "loss": 0.3399, "step": 10527 }, { "epoch": 1.2811682385153635, "grad_norm": 2.3683578968048096, "learning_rate": 5.9341186626623025e-06, "loss": 0.4227, "step": 10528 }, { "epoch": 1.2812899300273806, "grad_norm": 2.409839153289795, "learning_rate": 5.932336563454324e-06, "loss": 0.4357, "step": 10529 }, { "epoch": 1.2814116215393976, "grad_norm": 2.0458576679229736, "learning_rate": 5.9305546190293635e-06, "loss": 0.4032, "step": 10530 }, { "epoch": 1.2815333130514146, "grad_norm": 2.188157558441162, "learning_rate": 5.9287728294552195e-06, "loss": 0.3893, "step": 10531 }, { "epoch": 1.2816550045634316, "grad_norm": 2.3667166233062744, "learning_rate": 5.926991194799696e-06, "loss": 0.3384, "step": 10532 }, { "epoch": 1.2817766960754486, "grad_norm": 3.17220401763916, "learning_rate": 5.925209715130591e-06, "loss": 0.309, "step": 10533 }, { "epoch": 1.2818983875874657, "grad_norm": 2.1808674335479736, "learning_rate": 5.923428390515686e-06, "loss": 0.3614, "step": 10534 }, { "epoch": 1.2820200790994827, "grad_norm": 1.6956264972686768, "learning_rate": 5.921647221022772e-06, "loss": 0.393, "step": 10535 }, { "epoch": 1.2821417706115, "grad_norm": 3.252528667449951, "learning_rate": 5.919866206719623e-06, "loss": 0.498, "step": 10536 }, { "epoch": 1.282263462123517, "grad_norm": 1.5510379076004028, "learning_rate": 5.91808534767401e-06, "loss": 0.3572, "step": 10537 }, { "epoch": 1.282385153635534, "grad_norm": 1.9704879522323608, "learning_rate": 5.916304643953696e-06, "loss": 0.3447, "step": 10538 }, { "epoch": 1.282506845147551, "grad_norm": 1.686834454536438, "learning_rate": 5.9145240956264475e-06, "loss": 0.4191, "step": 10539 }, { "epoch": 1.282628536659568, "grad_norm": 2.781834840774536, "learning_rate": 5.912743702760011e-06, "loss": 0.3845, "step": 10540 }, { "epoch": 1.282750228171585, "grad_norm": 1.7784477472305298, "learning_rate": 5.9109634654221356e-06, "loss": 0.3653, "step": 10541 }, { "epoch": 1.282871919683602, "grad_norm": 1.4981151819229126, "learning_rate": 5.90918338368057e-06, "loss": 0.3537, "step": 10542 }, { "epoch": 1.282993611195619, "grad_norm": 1.455384373664856, "learning_rate": 5.907403457603038e-06, "loss": 0.3289, "step": 10543 }, { "epoch": 1.283115302707636, "grad_norm": 1.6621510982513428, "learning_rate": 5.905623687257279e-06, "loss": 0.3691, "step": 10544 }, { "epoch": 1.2832369942196533, "grad_norm": 3.7067105770111084, "learning_rate": 5.903844072711011e-06, "loss": 0.4419, "step": 10545 }, { "epoch": 1.2833586857316703, "grad_norm": 2.1652414798736572, "learning_rate": 5.9020646140319555e-06, "loss": 0.3358, "step": 10546 }, { "epoch": 1.2834803772436874, "grad_norm": 2.1550374031066895, "learning_rate": 5.900285311287826e-06, "loss": 0.4066, "step": 10547 }, { "epoch": 1.2836020687557044, "grad_norm": 1.7441656589508057, "learning_rate": 5.898506164546323e-06, "loss": 0.3564, "step": 10548 }, { "epoch": 1.2837237602677214, "grad_norm": 1.982452392578125, "learning_rate": 5.896727173875151e-06, "loss": 0.3417, "step": 10549 }, { "epoch": 1.2838454517797384, "grad_norm": 2.337707281112671, "learning_rate": 5.894948339342003e-06, "loss": 0.411, "step": 10550 }, { "epoch": 1.2839671432917554, "grad_norm": 1.798502802848816, "learning_rate": 5.89316966101457e-06, "loss": 0.3947, "step": 10551 }, { "epoch": 1.2840888348037724, "grad_norm": 1.6420990228652954, "learning_rate": 5.891391138960529e-06, "loss": 0.3632, "step": 10552 }, { "epoch": 1.2842105263157895, "grad_norm": 1.4075000286102295, "learning_rate": 5.889612773247561e-06, "loss": 0.3591, "step": 10553 }, { "epoch": 1.2843322178278065, "grad_norm": 1.4349812269210815, "learning_rate": 5.887834563943338e-06, "loss": 0.3356, "step": 10554 }, { "epoch": 1.2844539093398235, "grad_norm": 1.5117881298065186, "learning_rate": 5.886056511115519e-06, "loss": 0.3521, "step": 10555 }, { "epoch": 1.2845756008518405, "grad_norm": 1.259488582611084, "learning_rate": 5.884278614831765e-06, "loss": 0.3609, "step": 10556 }, { "epoch": 1.2846972923638575, "grad_norm": 3.270118474960327, "learning_rate": 5.882500875159728e-06, "loss": 0.4136, "step": 10557 }, { "epoch": 1.2848189838758746, "grad_norm": 1.7463117837905884, "learning_rate": 5.880723292167054e-06, "loss": 0.3992, "step": 10558 }, { "epoch": 1.2849406753878916, "grad_norm": 1.4743537902832031, "learning_rate": 5.878945865921389e-06, "loss": 0.3141, "step": 10559 }, { "epoch": 1.2850623668999086, "grad_norm": 1.7920653820037842, "learning_rate": 5.877168596490361e-06, "loss": 0.3667, "step": 10560 }, { "epoch": 1.2851840584119258, "grad_norm": 1.4804673194885254, "learning_rate": 5.875391483941603e-06, "loss": 0.3988, "step": 10561 }, { "epoch": 1.2853057499239429, "grad_norm": 1.9991148710250854, "learning_rate": 5.873614528342737e-06, "loss": 0.3508, "step": 10562 }, { "epoch": 1.2854274414359599, "grad_norm": 1.456757664680481, "learning_rate": 5.871837729761378e-06, "loss": 0.3761, "step": 10563 }, { "epoch": 1.285549132947977, "grad_norm": 1.9267749786376953, "learning_rate": 5.870061088265138e-06, "loss": 0.357, "step": 10564 }, { "epoch": 1.285670824459994, "grad_norm": 1.635720133781433, "learning_rate": 5.868284603921626e-06, "loss": 0.3508, "step": 10565 }, { "epoch": 1.285792515972011, "grad_norm": 1.7783342599868774, "learning_rate": 5.866508276798434e-06, "loss": 0.321, "step": 10566 }, { "epoch": 1.285914207484028, "grad_norm": 1.870968222618103, "learning_rate": 5.864732106963158e-06, "loss": 0.4141, "step": 10567 }, { "epoch": 1.286035898996045, "grad_norm": 1.668931007385254, "learning_rate": 5.862956094483385e-06, "loss": 0.3479, "step": 10568 }, { "epoch": 1.286157590508062, "grad_norm": 1.4777226448059082, "learning_rate": 5.8611802394266944e-06, "loss": 0.3287, "step": 10569 }, { "epoch": 1.286279282020079, "grad_norm": 2.1105763912200928, "learning_rate": 5.859404541860665e-06, "loss": 0.4109, "step": 10570 }, { "epoch": 1.2864009735320963, "grad_norm": 2.2539310455322266, "learning_rate": 5.8576290018528615e-06, "loss": 0.4011, "step": 10571 }, { "epoch": 1.2865226650441133, "grad_norm": 2.440284252166748, "learning_rate": 5.855853619470848e-06, "loss": 0.285, "step": 10572 }, { "epoch": 1.2866443565561303, "grad_norm": 1.7038896083831787, "learning_rate": 5.8540783947821856e-06, "loss": 0.4046, "step": 10573 }, { "epoch": 1.2867660480681473, "grad_norm": 1.7720586061477661, "learning_rate": 5.852303327854418e-06, "loss": 0.3774, "step": 10574 }, { "epoch": 1.2868877395801643, "grad_norm": 2.3250913619995117, "learning_rate": 5.850528418755095e-06, "loss": 0.3411, "step": 10575 }, { "epoch": 1.2870094310921814, "grad_norm": 1.7034109830856323, "learning_rate": 5.848753667551755e-06, "loss": 0.3591, "step": 10576 }, { "epoch": 1.2871311226041984, "grad_norm": 2.0150527954101562, "learning_rate": 5.846979074311934e-06, "loss": 0.4218, "step": 10577 }, { "epoch": 1.2872528141162154, "grad_norm": 2.4230263233184814, "learning_rate": 5.845204639103155e-06, "loss": 0.4298, "step": 10578 }, { "epoch": 1.2873745056282324, "grad_norm": 1.7150936126708984, "learning_rate": 5.8434303619929345e-06, "loss": 0.3683, "step": 10579 }, { "epoch": 1.2874961971402494, "grad_norm": 2.0455844402313232, "learning_rate": 5.841656243048794e-06, "loss": 0.3148, "step": 10580 }, { "epoch": 1.2876178886522665, "grad_norm": 2.2733371257781982, "learning_rate": 5.83988228233824e-06, "loss": 0.4184, "step": 10581 }, { "epoch": 1.2877395801642835, "grad_norm": 1.4156023263931274, "learning_rate": 5.8381084799287776e-06, "loss": 0.3499, "step": 10582 }, { "epoch": 1.2878612716763005, "grad_norm": 1.6243747472763062, "learning_rate": 5.836334835887905e-06, "loss": 0.4141, "step": 10583 }, { "epoch": 1.2879829631883175, "grad_norm": 2.7751576900482178, "learning_rate": 5.834561350283108e-06, "loss": 0.3387, "step": 10584 }, { "epoch": 1.2881046547003345, "grad_norm": 1.280044436454773, "learning_rate": 5.83278802318187e-06, "loss": 0.3603, "step": 10585 }, { "epoch": 1.2882263462123518, "grad_norm": 3.362574815750122, "learning_rate": 5.831014854651678e-06, "loss": 0.3225, "step": 10586 }, { "epoch": 1.2883480377243688, "grad_norm": 1.702264428138733, "learning_rate": 5.829241844759998e-06, "loss": 0.3199, "step": 10587 }, { "epoch": 1.2884697292363858, "grad_norm": 1.7620378732681274, "learning_rate": 5.827468993574301e-06, "loss": 0.2932, "step": 10588 }, { "epoch": 1.2885914207484028, "grad_norm": 2.1318371295928955, "learning_rate": 5.825696301162046e-06, "loss": 0.4398, "step": 10589 }, { "epoch": 1.2887131122604198, "grad_norm": 2.232062578201294, "learning_rate": 5.823923767590682e-06, "loss": 0.302, "step": 10590 }, { "epoch": 1.2888348037724369, "grad_norm": 2.7815849781036377, "learning_rate": 5.822151392927666e-06, "loss": 0.4297, "step": 10591 }, { "epoch": 1.2889564952844539, "grad_norm": 1.9335200786590576, "learning_rate": 5.820379177240438e-06, "loss": 0.4234, "step": 10592 }, { "epoch": 1.289078186796471, "grad_norm": 1.7936642169952393, "learning_rate": 5.81860712059643e-06, "loss": 0.358, "step": 10593 }, { "epoch": 1.289199878308488, "grad_norm": 1.7385996580123901, "learning_rate": 5.81683522306308e-06, "loss": 0.3083, "step": 10594 }, { "epoch": 1.289321569820505, "grad_norm": 1.6687299013137817, "learning_rate": 5.815063484707808e-06, "loss": 0.343, "step": 10595 }, { "epoch": 1.2894432613325222, "grad_norm": 1.531218409538269, "learning_rate": 5.813291905598029e-06, "loss": 0.3396, "step": 10596 }, { "epoch": 1.2895649528445392, "grad_norm": 1.9220924377441406, "learning_rate": 5.811520485801162e-06, "loss": 0.4286, "step": 10597 }, { "epoch": 1.2896866443565562, "grad_norm": 1.8867710828781128, "learning_rate": 5.809749225384611e-06, "loss": 0.4201, "step": 10598 }, { "epoch": 1.2898083358685732, "grad_norm": 1.9965869188308716, "learning_rate": 5.807978124415772e-06, "loss": 0.4259, "step": 10599 }, { "epoch": 1.2899300273805903, "grad_norm": 2.204007625579834, "learning_rate": 5.8062071829620494e-06, "loss": 0.3852, "step": 10600 }, { "epoch": 1.2900517188926073, "grad_norm": 1.8582518100738525, "learning_rate": 5.804436401090816e-06, "loss": 0.355, "step": 10601 }, { "epoch": 1.2901734104046243, "grad_norm": 1.6306055784225464, "learning_rate": 5.802665778869466e-06, "loss": 0.3789, "step": 10602 }, { "epoch": 1.2902951019166413, "grad_norm": 2.7174384593963623, "learning_rate": 5.800895316365371e-06, "loss": 0.3226, "step": 10603 }, { "epoch": 1.2904167934286583, "grad_norm": 1.3473379611968994, "learning_rate": 5.799125013645899e-06, "loss": 0.3615, "step": 10604 }, { "epoch": 1.2905384849406754, "grad_norm": 2.451268434524536, "learning_rate": 5.797354870778418e-06, "loss": 0.4094, "step": 10605 }, { "epoch": 1.2906601764526924, "grad_norm": 1.7882784605026245, "learning_rate": 5.795584887830282e-06, "loss": 0.3843, "step": 10606 }, { "epoch": 1.2907818679647094, "grad_norm": 2.148496389389038, "learning_rate": 5.7938150648688406e-06, "loss": 0.3286, "step": 10607 }, { "epoch": 1.2909035594767264, "grad_norm": 3.206817626953125, "learning_rate": 5.7920454019614455e-06, "loss": 0.3113, "step": 10608 }, { "epoch": 1.2910252509887434, "grad_norm": 2.004594564437866, "learning_rate": 5.790275899175435e-06, "loss": 0.3567, "step": 10609 }, { "epoch": 1.2911469425007605, "grad_norm": 1.7616199254989624, "learning_rate": 5.788506556578135e-06, "loss": 0.4024, "step": 10610 }, { "epoch": 1.2912686340127775, "grad_norm": 2.1099853515625, "learning_rate": 5.786737374236886e-06, "loss": 0.3781, "step": 10611 }, { "epoch": 1.2913903255247947, "grad_norm": 1.5661702156066895, "learning_rate": 5.784968352218991e-06, "loss": 0.3564, "step": 10612 }, { "epoch": 1.2915120170368117, "grad_norm": 2.1131184101104736, "learning_rate": 5.783199490591782e-06, "loss": 0.3801, "step": 10613 }, { "epoch": 1.2916337085488288, "grad_norm": 3.104557991027832, "learning_rate": 5.781430789422559e-06, "loss": 0.3239, "step": 10614 }, { "epoch": 1.2917554000608458, "grad_norm": 2.0992746353149414, "learning_rate": 5.7796622487786225e-06, "loss": 0.3855, "step": 10615 }, { "epoch": 1.2918770915728628, "grad_norm": 1.8904378414154053, "learning_rate": 5.777893868727278e-06, "loss": 0.3703, "step": 10616 }, { "epoch": 1.2919987830848798, "grad_norm": 1.4152432680130005, "learning_rate": 5.7761256493358086e-06, "loss": 0.3598, "step": 10617 }, { "epoch": 1.2921204745968968, "grad_norm": 1.6704233884811401, "learning_rate": 5.7743575906715e-06, "loss": 0.3916, "step": 10618 }, { "epoch": 1.2922421661089138, "grad_norm": 1.9063706398010254, "learning_rate": 5.7725896928016336e-06, "loss": 0.3548, "step": 10619 }, { "epoch": 1.2923638576209309, "grad_norm": 1.5738579034805298, "learning_rate": 5.770821955793482e-06, "loss": 0.3452, "step": 10620 }, { "epoch": 1.292485549132948, "grad_norm": 4.074136734008789, "learning_rate": 5.769054379714303e-06, "loss": 0.4183, "step": 10621 }, { "epoch": 1.2926072406449651, "grad_norm": 1.93488609790802, "learning_rate": 5.767286964631367e-06, "loss": 0.3467, "step": 10622 }, { "epoch": 1.2927289321569821, "grad_norm": 1.6406809091567993, "learning_rate": 5.765519710611922e-06, "loss": 0.3344, "step": 10623 }, { "epoch": 1.2928506236689992, "grad_norm": 1.8336777687072754, "learning_rate": 5.763752617723218e-06, "loss": 0.3799, "step": 10624 }, { "epoch": 1.2929723151810162, "grad_norm": 1.4141950607299805, "learning_rate": 5.7619856860324964e-06, "loss": 0.3339, "step": 10625 }, { "epoch": 1.2930940066930332, "grad_norm": 1.5580123662948608, "learning_rate": 5.760218915606987e-06, "loss": 0.3661, "step": 10626 }, { "epoch": 1.2932156982050502, "grad_norm": 2.565035581588745, "learning_rate": 5.758452306513927e-06, "loss": 0.378, "step": 10627 }, { "epoch": 1.2933373897170672, "grad_norm": 1.2089426517486572, "learning_rate": 5.756685858820532e-06, "loss": 0.3125, "step": 10628 }, { "epoch": 1.2934590812290843, "grad_norm": 1.7160195112228394, "learning_rate": 5.7549195725940284e-06, "loss": 0.3794, "step": 10629 }, { "epoch": 1.2935807727411013, "grad_norm": 1.249646544456482, "learning_rate": 5.753153447901621e-06, "loss": 0.3489, "step": 10630 }, { "epoch": 1.2937024642531183, "grad_norm": 2.717773914337158, "learning_rate": 5.751387484810512e-06, "loss": 0.4082, "step": 10631 }, { "epoch": 1.2938241557651353, "grad_norm": 1.9328533411026, "learning_rate": 5.749621683387907e-06, "loss": 0.3335, "step": 10632 }, { "epoch": 1.2939458472771523, "grad_norm": 1.5630487203598022, "learning_rate": 5.7478560437009955e-06, "loss": 0.373, "step": 10633 }, { "epoch": 1.2940675387891694, "grad_norm": 2.057236671447754, "learning_rate": 5.746090565816962e-06, "loss": 0.3707, "step": 10634 }, { "epoch": 1.2941892303011864, "grad_norm": 1.8337665796279907, "learning_rate": 5.744325249802989e-06, "loss": 0.3275, "step": 10635 }, { "epoch": 1.2943109218132034, "grad_norm": 1.6739068031311035, "learning_rate": 5.742560095726244e-06, "loss": 0.3691, "step": 10636 }, { "epoch": 1.2944326133252206, "grad_norm": 2.1306872367858887, "learning_rate": 5.7407951036539025e-06, "loss": 0.412, "step": 10637 }, { "epoch": 1.2945543048372377, "grad_norm": 2.0116026401519775, "learning_rate": 5.7390302736531255e-06, "loss": 0.4278, "step": 10638 }, { "epoch": 1.2946759963492547, "grad_norm": 1.5661468505859375, "learning_rate": 5.737265605791062e-06, "loss": 0.3651, "step": 10639 }, { "epoch": 1.2947976878612717, "grad_norm": 2.0398988723754883, "learning_rate": 5.7355011001348686e-06, "loss": 0.467, "step": 10640 }, { "epoch": 1.2949193793732887, "grad_norm": 1.9183504581451416, "learning_rate": 5.733736756751686e-06, "loss": 0.4234, "step": 10641 }, { "epoch": 1.2950410708853057, "grad_norm": 2.205601453781128, "learning_rate": 5.731972575708646e-06, "loss": 0.394, "step": 10642 }, { "epoch": 1.2951627623973228, "grad_norm": 2.178201913833618, "learning_rate": 5.730208557072887e-06, "loss": 0.4267, "step": 10643 }, { "epoch": 1.2952844539093398, "grad_norm": 2.3975582122802734, "learning_rate": 5.728444700911533e-06, "loss": 0.3761, "step": 10644 }, { "epoch": 1.2954061454213568, "grad_norm": 1.3895853757858276, "learning_rate": 5.7266810072916936e-06, "loss": 0.376, "step": 10645 }, { "epoch": 1.295527836933374, "grad_norm": 2.3043224811553955, "learning_rate": 5.724917476280496e-06, "loss": 0.4607, "step": 10646 }, { "epoch": 1.295649528445391, "grad_norm": 2.0370213985443115, "learning_rate": 5.723154107945029e-06, "loss": 0.3345, "step": 10647 }, { "epoch": 1.295771219957408, "grad_norm": 2.3514859676361084, "learning_rate": 5.721390902352405e-06, "loss": 0.3619, "step": 10648 }, { "epoch": 1.295892911469425, "grad_norm": 2.2256734371185303, "learning_rate": 5.719627859569714e-06, "loss": 0.3425, "step": 10649 }, { "epoch": 1.296014602981442, "grad_norm": 1.5580493211746216, "learning_rate": 5.717864979664038e-06, "loss": 0.3368, "step": 10650 }, { "epoch": 1.2961362944934591, "grad_norm": 2.3323075771331787, "learning_rate": 5.7161022627024675e-06, "loss": 0.3701, "step": 10651 }, { "epoch": 1.2962579860054761, "grad_norm": 2.9248900413513184, "learning_rate": 5.714339708752072e-06, "loss": 0.4025, "step": 10652 }, { "epoch": 1.2963796775174932, "grad_norm": 1.6240113973617554, "learning_rate": 5.712577317879918e-06, "loss": 0.3722, "step": 10653 }, { "epoch": 1.2965013690295102, "grad_norm": 1.3895801305770874, "learning_rate": 5.7108150901530766e-06, "loss": 0.312, "step": 10654 }, { "epoch": 1.2966230605415272, "grad_norm": 1.9655169248580933, "learning_rate": 5.709053025638598e-06, "loss": 0.3301, "step": 10655 }, { "epoch": 1.2967447520535442, "grad_norm": 1.5269062519073486, "learning_rate": 5.70729112440353e-06, "loss": 0.3574, "step": 10656 }, { "epoch": 1.2968664435655612, "grad_norm": 3.039123773574829, "learning_rate": 5.705529386514928e-06, "loss": 0.4181, "step": 10657 }, { "epoch": 1.2969881350775783, "grad_norm": 3.578644275665283, "learning_rate": 5.703767812039813e-06, "loss": 0.3562, "step": 10658 }, { "epoch": 1.2971098265895953, "grad_norm": 1.9839129447937012, "learning_rate": 5.702006401045231e-06, "loss": 0.3227, "step": 10659 }, { "epoch": 1.2972315181016123, "grad_norm": 1.5482141971588135, "learning_rate": 5.700245153598201e-06, "loss": 0.3848, "step": 10660 }, { "epoch": 1.2973532096136293, "grad_norm": 1.9916326999664307, "learning_rate": 5.698484069765739e-06, "loss": 0.4203, "step": 10661 }, { "epoch": 1.2974749011256466, "grad_norm": 1.2304826974868774, "learning_rate": 5.696723149614866e-06, "loss": 0.3572, "step": 10662 }, { "epoch": 1.2975965926376636, "grad_norm": 1.5947169065475464, "learning_rate": 5.694962393212584e-06, "loss": 0.4098, "step": 10663 }, { "epoch": 1.2977182841496806, "grad_norm": 1.5367302894592285, "learning_rate": 5.693201800625892e-06, "loss": 0.3788, "step": 10664 }, { "epoch": 1.2978399756616976, "grad_norm": 1.4731502532958984, "learning_rate": 5.691441371921787e-06, "loss": 0.3261, "step": 10665 }, { "epoch": 1.2979616671737146, "grad_norm": 1.7778632640838623, "learning_rate": 5.689681107167258e-06, "loss": 0.3284, "step": 10666 }, { "epoch": 1.2980833586857317, "grad_norm": 1.5393034219741821, "learning_rate": 5.68792100642928e-06, "loss": 0.3748, "step": 10667 }, { "epoch": 1.2982050501977487, "grad_norm": 1.6737003326416016, "learning_rate": 5.686161069774837e-06, "loss": 0.3791, "step": 10668 }, { "epoch": 1.2983267417097657, "grad_norm": 2.314873218536377, "learning_rate": 5.684401297270894e-06, "loss": 0.4198, "step": 10669 }, { "epoch": 1.2984484332217827, "grad_norm": 1.4829821586608887, "learning_rate": 5.682641688984416e-06, "loss": 0.3542, "step": 10670 }, { "epoch": 1.2985701247337997, "grad_norm": 1.8030195236206055, "learning_rate": 5.680882244982357e-06, "loss": 0.4122, "step": 10671 }, { "epoch": 1.298691816245817, "grad_norm": 1.9843130111694336, "learning_rate": 5.679122965331665e-06, "loss": 0.3588, "step": 10672 }, { "epoch": 1.298813507757834, "grad_norm": 1.4244213104248047, "learning_rate": 5.677363850099293e-06, "loss": 0.3552, "step": 10673 }, { "epoch": 1.298935199269851, "grad_norm": 2.3985137939453125, "learning_rate": 5.675604899352171e-06, "loss": 0.3475, "step": 10674 }, { "epoch": 1.299056890781868, "grad_norm": 2.278571367263794, "learning_rate": 5.673846113157232e-06, "loss": 0.3637, "step": 10675 }, { "epoch": 1.299178582293885, "grad_norm": 1.2912546396255493, "learning_rate": 5.672087491581406e-06, "loss": 0.3433, "step": 10676 }, { "epoch": 1.299300273805902, "grad_norm": 1.8979377746582031, "learning_rate": 5.670329034691611e-06, "loss": 0.3946, "step": 10677 }, { "epoch": 1.299421965317919, "grad_norm": 1.7651402950286865, "learning_rate": 5.668570742554752e-06, "loss": 0.3755, "step": 10678 }, { "epoch": 1.2995436568299361, "grad_norm": 1.1389776468276978, "learning_rate": 5.666812615237747e-06, "loss": 0.3238, "step": 10679 }, { "epoch": 1.2996653483419531, "grad_norm": 1.4201276302337646, "learning_rate": 5.6650546528074905e-06, "loss": 0.3834, "step": 10680 }, { "epoch": 1.2997870398539701, "grad_norm": 1.8664674758911133, "learning_rate": 5.663296855330878e-06, "loss": 0.3467, "step": 10681 }, { "epoch": 1.2999087313659872, "grad_norm": 2.3464839458465576, "learning_rate": 5.661539222874795e-06, "loss": 0.3807, "step": 10682 }, { "epoch": 1.3000304228780042, "grad_norm": 1.6741420030593872, "learning_rate": 5.659781755506122e-06, "loss": 0.3618, "step": 10683 }, { "epoch": 1.3001521143900212, "grad_norm": 1.6574125289916992, "learning_rate": 5.658024453291741e-06, "loss": 0.3533, "step": 10684 }, { "epoch": 1.3002738059020382, "grad_norm": 1.9747450351715088, "learning_rate": 5.656267316298517e-06, "loss": 0.343, "step": 10685 }, { "epoch": 1.3003954974140552, "grad_norm": 2.5942485332489014, "learning_rate": 5.654510344593309e-06, "loss": 0.2748, "step": 10686 }, { "epoch": 1.3005171889260725, "grad_norm": 1.6068029403686523, "learning_rate": 5.652753538242981e-06, "loss": 0.3765, "step": 10687 }, { "epoch": 1.3006388804380895, "grad_norm": 1.290798306465149, "learning_rate": 5.650996897314374e-06, "loss": 0.2886, "step": 10688 }, { "epoch": 1.3007605719501065, "grad_norm": 2.5814549922943115, "learning_rate": 5.649240421874342e-06, "loss": 0.3965, "step": 10689 }, { "epoch": 1.3008822634621235, "grad_norm": 1.8892725706100464, "learning_rate": 5.647484111989716e-06, "loss": 0.3884, "step": 10690 }, { "epoch": 1.3010039549741406, "grad_norm": 1.7181422710418701, "learning_rate": 5.645727967727327e-06, "loss": 0.3445, "step": 10691 }, { "epoch": 1.3011256464861576, "grad_norm": 2.149254560470581, "learning_rate": 5.643971989154009e-06, "loss": 0.3446, "step": 10692 }, { "epoch": 1.3012473379981746, "grad_norm": 1.5193428993225098, "learning_rate": 5.642216176336568e-06, "loss": 0.3554, "step": 10693 }, { "epoch": 1.3013690295101916, "grad_norm": 1.44844388961792, "learning_rate": 5.640460529341819e-06, "loss": 0.351, "step": 10694 }, { "epoch": 1.3014907210222086, "grad_norm": 2.9043166637420654, "learning_rate": 5.638705048236574e-06, "loss": 0.3533, "step": 10695 }, { "epoch": 1.3016124125342257, "grad_norm": 2.613502025604248, "learning_rate": 5.636949733087626e-06, "loss": 0.3205, "step": 10696 }, { "epoch": 1.301734104046243, "grad_norm": 1.4322253465652466, "learning_rate": 5.635194583961775e-06, "loss": 0.3664, "step": 10697 }, { "epoch": 1.30185579555826, "grad_norm": 2.0034613609313965, "learning_rate": 5.633439600925805e-06, "loss": 0.4041, "step": 10698 }, { "epoch": 1.301977487070277, "grad_norm": 1.4181780815124512, "learning_rate": 5.631684784046492e-06, "loss": 0.3659, "step": 10699 }, { "epoch": 1.302099178582294, "grad_norm": 1.8612855672836304, "learning_rate": 5.6299301333906194e-06, "loss": 0.3833, "step": 10700 }, { "epoch": 1.302220870094311, "grad_norm": 2.5197055339813232, "learning_rate": 5.628175649024951e-06, "loss": 0.4205, "step": 10701 }, { "epoch": 1.302342561606328, "grad_norm": 2.4088521003723145, "learning_rate": 5.626421331016243e-06, "loss": 0.4156, "step": 10702 }, { "epoch": 1.302464253118345, "grad_norm": 1.8909119367599487, "learning_rate": 5.624667179431265e-06, "loss": 0.3487, "step": 10703 }, { "epoch": 1.302585944630362, "grad_norm": 2.771806478500366, "learning_rate": 5.62291319433675e-06, "loss": 0.4214, "step": 10704 }, { "epoch": 1.302707636142379, "grad_norm": 2.0864429473876953, "learning_rate": 5.621159375799452e-06, "loss": 0.3821, "step": 10705 }, { "epoch": 1.302829327654396, "grad_norm": 2.33351993560791, "learning_rate": 5.619405723886102e-06, "loss": 0.4004, "step": 10706 }, { "epoch": 1.302951019166413, "grad_norm": 1.4285186529159546, "learning_rate": 5.6176522386634294e-06, "loss": 0.3605, "step": 10707 }, { "epoch": 1.3030727106784301, "grad_norm": 1.5547945499420166, "learning_rate": 5.615898920198162e-06, "loss": 0.3996, "step": 10708 }, { "epoch": 1.3031944021904471, "grad_norm": 2.567091703414917, "learning_rate": 5.614145768557017e-06, "loss": 0.3799, "step": 10709 }, { "epoch": 1.3033160937024642, "grad_norm": 2.726357936859131, "learning_rate": 5.612392783806699e-06, "loss": 0.3692, "step": 10710 }, { "epoch": 1.3034377852144812, "grad_norm": 1.7168859243392944, "learning_rate": 5.610639966013921e-06, "loss": 0.3679, "step": 10711 }, { "epoch": 1.3035594767264982, "grad_norm": 3.060255289077759, "learning_rate": 5.608887315245378e-06, "loss": 0.3629, "step": 10712 }, { "epoch": 1.3036811682385154, "grad_norm": 1.4523892402648926, "learning_rate": 5.6071348315677585e-06, "loss": 0.304, "step": 10713 }, { "epoch": 1.3038028597505325, "grad_norm": 1.5616751909255981, "learning_rate": 5.605382515047755e-06, "loss": 0.3587, "step": 10714 }, { "epoch": 1.3039245512625495, "grad_norm": 1.6984819173812866, "learning_rate": 5.603630365752043e-06, "loss": 0.3928, "step": 10715 }, { "epoch": 1.3040462427745665, "grad_norm": 1.3906701803207397, "learning_rate": 5.601878383747295e-06, "loss": 0.3763, "step": 10716 }, { "epoch": 1.3041679342865835, "grad_norm": 2.763263702392578, "learning_rate": 5.6001265691001795e-06, "loss": 0.3068, "step": 10717 }, { "epoch": 1.3042896257986005, "grad_norm": 1.929206132888794, "learning_rate": 5.5983749218773496e-06, "loss": 0.4249, "step": 10718 }, { "epoch": 1.3044113173106175, "grad_norm": 2.449132204055786, "learning_rate": 5.59662344214547e-06, "loss": 0.4549, "step": 10719 }, { "epoch": 1.3045330088226346, "grad_norm": 1.7186609506607056, "learning_rate": 5.594872129971183e-06, "loss": 0.3823, "step": 10720 }, { "epoch": 1.3046547003346516, "grad_norm": 1.771635890007019, "learning_rate": 5.593120985421125e-06, "loss": 0.3568, "step": 10721 }, { "epoch": 1.3047763918466688, "grad_norm": 1.5166743993759155, "learning_rate": 5.59137000856194e-06, "loss": 0.384, "step": 10722 }, { "epoch": 1.3048980833586858, "grad_norm": 1.4323418140411377, "learning_rate": 5.58961919946025e-06, "loss": 0.3916, "step": 10723 }, { "epoch": 1.3050197748707029, "grad_norm": 2.5157432556152344, "learning_rate": 5.587868558182676e-06, "loss": 0.314, "step": 10724 }, { "epoch": 1.3051414663827199, "grad_norm": 1.4859994649887085, "learning_rate": 5.58611808479584e-06, "loss": 0.3415, "step": 10725 }, { "epoch": 1.305263157894737, "grad_norm": 1.9007946252822876, "learning_rate": 5.584367779366346e-06, "loss": 0.3763, "step": 10726 }, { "epoch": 1.305384849406754, "grad_norm": 2.0490918159484863, "learning_rate": 5.582617641960799e-06, "loss": 0.3638, "step": 10727 }, { "epoch": 1.305506540918771, "grad_norm": 1.6577391624450684, "learning_rate": 5.580867672645793e-06, "loss": 0.3861, "step": 10728 }, { "epoch": 1.305628232430788, "grad_norm": 2.240692615509033, "learning_rate": 5.579117871487915e-06, "loss": 0.4245, "step": 10729 }, { "epoch": 1.305749923942805, "grad_norm": 1.5075914859771729, "learning_rate": 5.5773682385537575e-06, "loss": 0.3699, "step": 10730 }, { "epoch": 1.305871615454822, "grad_norm": 2.7460334300994873, "learning_rate": 5.5756187739098945e-06, "loss": 0.4151, "step": 10731 }, { "epoch": 1.305993306966839, "grad_norm": 1.6418988704681396, "learning_rate": 5.573869477622889e-06, "loss": 0.3999, "step": 10732 }, { "epoch": 1.306114998478856, "grad_norm": 1.6874862909317017, "learning_rate": 5.5721203497593155e-06, "loss": 0.3925, "step": 10733 }, { "epoch": 1.306236689990873, "grad_norm": 1.4764375686645508, "learning_rate": 5.5703713903857285e-06, "loss": 0.3305, "step": 10734 }, { "epoch": 1.30635838150289, "grad_norm": 1.5199888944625854, "learning_rate": 5.568622599568674e-06, "loss": 0.3697, "step": 10735 }, { "epoch": 1.306480073014907, "grad_norm": 1.8549597263336182, "learning_rate": 5.566873977374707e-06, "loss": 0.3898, "step": 10736 }, { "epoch": 1.3066017645269241, "grad_norm": 2.7467002868652344, "learning_rate": 5.565125523870361e-06, "loss": 0.3731, "step": 10737 }, { "epoch": 1.3067234560389414, "grad_norm": 2.816716432571411, "learning_rate": 5.5633772391221685e-06, "loss": 0.3947, "step": 10738 }, { "epoch": 1.3068451475509584, "grad_norm": 1.8271758556365967, "learning_rate": 5.561629123196656e-06, "loss": 0.3973, "step": 10739 }, { "epoch": 1.3069668390629754, "grad_norm": 1.7588022947311401, "learning_rate": 5.559881176160338e-06, "loss": 0.4333, "step": 10740 }, { "epoch": 1.3070885305749924, "grad_norm": 3.211850643157959, "learning_rate": 5.558133398079735e-06, "loss": 0.4231, "step": 10741 }, { "epoch": 1.3072102220870094, "grad_norm": 2.4309945106506348, "learning_rate": 5.55638578902135e-06, "loss": 0.3266, "step": 10742 }, { "epoch": 1.3073319135990265, "grad_norm": 2.169997215270996, "learning_rate": 5.554638349051679e-06, "loss": 0.3928, "step": 10743 }, { "epoch": 1.3074536051110435, "grad_norm": 1.443649411201477, "learning_rate": 5.552891078237227e-06, "loss": 0.4134, "step": 10744 }, { "epoch": 1.3075752966230605, "grad_norm": 3.0654757022857666, "learning_rate": 5.551143976644473e-06, "loss": 0.4624, "step": 10745 }, { "epoch": 1.3076969881350775, "grad_norm": 1.5582939386367798, "learning_rate": 5.5493970443398945e-06, "loss": 0.3789, "step": 10746 }, { "epoch": 1.3078186796470948, "grad_norm": 1.8564646244049072, "learning_rate": 5.5476502813899755e-06, "loss": 0.3987, "step": 10747 }, { "epoch": 1.3079403711591118, "grad_norm": 1.7201308012008667, "learning_rate": 5.545903687861176e-06, "loss": 0.3771, "step": 10748 }, { "epoch": 1.3080620626711288, "grad_norm": 1.351222038269043, "learning_rate": 5.544157263819967e-06, "loss": 0.3648, "step": 10749 }, { "epoch": 1.3081837541831458, "grad_norm": 1.7537788152694702, "learning_rate": 5.542411009332795e-06, "loss": 0.3215, "step": 10750 }, { "epoch": 1.3083054456951628, "grad_norm": 1.9958311319351196, "learning_rate": 5.540664924466106e-06, "loss": 0.3602, "step": 10751 }, { "epoch": 1.3084271372071798, "grad_norm": 3.8747355937957764, "learning_rate": 5.53891900928635e-06, "loss": 0.4722, "step": 10752 }, { "epoch": 1.3085488287191969, "grad_norm": 1.4203927516937256, "learning_rate": 5.537173263859961e-06, "loss": 0.3344, "step": 10753 }, { "epoch": 1.3086705202312139, "grad_norm": 2.0952188968658447, "learning_rate": 5.535427688253362e-06, "loss": 0.4384, "step": 10754 }, { "epoch": 1.308792211743231, "grad_norm": 2.8315587043762207, "learning_rate": 5.5336822825329836e-06, "loss": 0.3081, "step": 10755 }, { "epoch": 1.308913903255248, "grad_norm": 2.5170958042144775, "learning_rate": 5.531937046765237e-06, "loss": 0.3943, "step": 10756 }, { "epoch": 1.309035594767265, "grad_norm": 1.618561863899231, "learning_rate": 5.530191981016537e-06, "loss": 0.4014, "step": 10757 }, { "epoch": 1.309157286279282, "grad_norm": 1.997988224029541, "learning_rate": 5.5284470853532855e-06, "loss": 0.3795, "step": 10758 }, { "epoch": 1.309278977791299, "grad_norm": 1.512786865234375, "learning_rate": 5.526702359841873e-06, "loss": 0.4177, "step": 10759 }, { "epoch": 1.309400669303316, "grad_norm": 1.3573386669158936, "learning_rate": 5.524957804548698e-06, "loss": 0.3428, "step": 10760 }, { "epoch": 1.309522360815333, "grad_norm": 1.9457066059112549, "learning_rate": 5.523213419540147e-06, "loss": 0.4584, "step": 10761 }, { "epoch": 1.30964405232735, "grad_norm": 2.474031925201416, "learning_rate": 5.521469204882584e-06, "loss": 0.451, "step": 10762 }, { "epoch": 1.3097657438393673, "grad_norm": 1.7259163856506348, "learning_rate": 5.519725160642391e-06, "loss": 0.4013, "step": 10763 }, { "epoch": 1.3098874353513843, "grad_norm": 3.171025514602661, "learning_rate": 5.517981286885925e-06, "loss": 0.3812, "step": 10764 }, { "epoch": 1.3100091268634013, "grad_norm": 1.64792799949646, "learning_rate": 5.516237583679554e-06, "loss": 0.4041, "step": 10765 }, { "epoch": 1.3101308183754183, "grad_norm": 2.7931742668151855, "learning_rate": 5.514494051089622e-06, "loss": 0.3502, "step": 10766 }, { "epoch": 1.3102525098874354, "grad_norm": 1.377469778060913, "learning_rate": 5.512750689182471e-06, "loss": 0.4009, "step": 10767 }, { "epoch": 1.3103742013994524, "grad_norm": 1.6480742692947388, "learning_rate": 5.51100749802445e-06, "loss": 0.3846, "step": 10768 }, { "epoch": 1.3104958929114694, "grad_norm": 2.1429126262664795, "learning_rate": 5.509264477681886e-06, "loss": 0.3927, "step": 10769 }, { "epoch": 1.3106175844234864, "grad_norm": 1.7459510564804077, "learning_rate": 5.507521628221099e-06, "loss": 0.4333, "step": 10770 }, { "epoch": 1.3107392759355034, "grad_norm": 1.4798492193222046, "learning_rate": 5.505778949708416e-06, "loss": 0.3349, "step": 10771 }, { "epoch": 1.3108609674475205, "grad_norm": 1.6968584060668945, "learning_rate": 5.504036442210148e-06, "loss": 0.3759, "step": 10772 }, { "epoch": 1.3109826589595377, "grad_norm": 2.111764907836914, "learning_rate": 5.502294105792598e-06, "loss": 0.3781, "step": 10773 }, { "epoch": 1.3111043504715547, "grad_norm": 1.8388253450393677, "learning_rate": 5.50055194052207e-06, "loss": 0.3338, "step": 10774 }, { "epoch": 1.3112260419835717, "grad_norm": 3.487530469894409, "learning_rate": 5.498809946464847e-06, "loss": 0.3922, "step": 10775 }, { "epoch": 1.3113477334955888, "grad_norm": 1.7745195627212524, "learning_rate": 5.497068123687228e-06, "loss": 0.3338, "step": 10776 }, { "epoch": 1.3114694250076058, "grad_norm": 1.4318186044692993, "learning_rate": 5.495326472255486e-06, "loss": 0.3459, "step": 10777 }, { "epoch": 1.3115911165196228, "grad_norm": 2.126408100128174, "learning_rate": 5.4935849922358944e-06, "loss": 0.3566, "step": 10778 }, { "epoch": 1.3117128080316398, "grad_norm": 1.5190150737762451, "learning_rate": 5.491843683694723e-06, "loss": 0.3168, "step": 10779 }, { "epoch": 1.3118344995436568, "grad_norm": 1.3851282596588135, "learning_rate": 5.490102546698233e-06, "loss": 0.3795, "step": 10780 }, { "epoch": 1.3119561910556738, "grad_norm": 2.327291488647461, "learning_rate": 5.48836158131267e-06, "loss": 0.3821, "step": 10781 }, { "epoch": 1.3120778825676909, "grad_norm": 2.2141382694244385, "learning_rate": 5.486620787604292e-06, "loss": 0.3996, "step": 10782 }, { "epoch": 1.3121995740797079, "grad_norm": 2.753286838531494, "learning_rate": 5.484880165639336e-06, "loss": 0.4084, "step": 10783 }, { "epoch": 1.312321265591725, "grad_norm": 1.3257920742034912, "learning_rate": 5.483139715484035e-06, "loss": 0.331, "step": 10784 }, { "epoch": 1.312442957103742, "grad_norm": 1.4459072351455688, "learning_rate": 5.481399437204618e-06, "loss": 0.3779, "step": 10785 }, { "epoch": 1.312564648615759, "grad_norm": 2.0659801959991455, "learning_rate": 5.4796593308673e-06, "loss": 0.3852, "step": 10786 }, { "epoch": 1.312686340127776, "grad_norm": 1.7968263626098633, "learning_rate": 5.477919396538305e-06, "loss": 0.3869, "step": 10787 }, { "epoch": 1.3128080316397932, "grad_norm": 2.095046281814575, "learning_rate": 5.476179634283838e-06, "loss": 0.3218, "step": 10788 }, { "epoch": 1.3129297231518102, "grad_norm": 1.79927396774292, "learning_rate": 5.474440044170095e-06, "loss": 0.4439, "step": 10789 }, { "epoch": 1.3130514146638272, "grad_norm": 1.4196207523345947, "learning_rate": 5.472700626263281e-06, "loss": 0.3987, "step": 10790 }, { "epoch": 1.3131731061758443, "grad_norm": 1.6785361766815186, "learning_rate": 5.470961380629578e-06, "loss": 0.3916, "step": 10791 }, { "epoch": 1.3132947976878613, "grad_norm": 2.620760917663574, "learning_rate": 5.4692223073351665e-06, "loss": 0.4446, "step": 10792 }, { "epoch": 1.3134164891998783, "grad_norm": 2.1112852096557617, "learning_rate": 5.467483406446228e-06, "loss": 0.3836, "step": 10793 }, { "epoch": 1.3135381807118953, "grad_norm": 2.9808385372161865, "learning_rate": 5.465744678028926e-06, "loss": 0.4082, "step": 10794 }, { "epoch": 1.3136598722239123, "grad_norm": 1.2926630973815918, "learning_rate": 5.464006122149426e-06, "loss": 0.3539, "step": 10795 }, { "epoch": 1.3137815637359294, "grad_norm": 2.858870506286621, "learning_rate": 5.462267738873881e-06, "loss": 0.349, "step": 10796 }, { "epoch": 1.3139032552479464, "grad_norm": 3.8252112865448, "learning_rate": 5.4605295282684375e-06, "loss": 0.3765, "step": 10797 }, { "epoch": 1.3140249467599636, "grad_norm": 2.7853469848632812, "learning_rate": 5.458791490399247e-06, "loss": 0.3613, "step": 10798 }, { "epoch": 1.3141466382719806, "grad_norm": 1.9212602376937866, "learning_rate": 5.457053625332438e-06, "loss": 0.4021, "step": 10799 }, { "epoch": 1.3142683297839977, "grad_norm": 1.397031545639038, "learning_rate": 5.455315933134139e-06, "loss": 0.3773, "step": 10800 }, { "epoch": 1.3143900212960147, "grad_norm": 1.456973910331726, "learning_rate": 5.45357841387048e-06, "loss": 0.3913, "step": 10801 }, { "epoch": 1.3145117128080317, "grad_norm": 3.196429491043091, "learning_rate": 5.451841067607572e-06, "loss": 0.4016, "step": 10802 }, { "epoch": 1.3146334043200487, "grad_norm": 2.8266994953155518, "learning_rate": 5.450103894411522e-06, "loss": 0.3874, "step": 10803 }, { "epoch": 1.3147550958320657, "grad_norm": 2.5611789226531982, "learning_rate": 5.44836689434844e-06, "loss": 0.4418, "step": 10804 }, { "epoch": 1.3148767873440828, "grad_norm": 1.2674380540847778, "learning_rate": 5.446630067484419e-06, "loss": 0.3815, "step": 10805 }, { "epoch": 1.3149984788560998, "grad_norm": 2.272742748260498, "learning_rate": 5.4448934138855466e-06, "loss": 0.3356, "step": 10806 }, { "epoch": 1.3151201703681168, "grad_norm": 2.0775740146636963, "learning_rate": 5.443156933617915e-06, "loss": 0.4188, "step": 10807 }, { "epoch": 1.3152418618801338, "grad_norm": 2.362074375152588, "learning_rate": 5.441420626747586e-06, "loss": 0.3616, "step": 10808 }, { "epoch": 1.3153635533921508, "grad_norm": 1.909151554107666, "learning_rate": 5.439684493340644e-06, "loss": 0.4277, "step": 10809 }, { "epoch": 1.3154852449041678, "grad_norm": 1.517953872680664, "learning_rate": 5.437948533463145e-06, "loss": 0.3889, "step": 10810 }, { "epoch": 1.3156069364161849, "grad_norm": 3.1799468994140625, "learning_rate": 5.436212747181142e-06, "loss": 0.4338, "step": 10811 }, { "epoch": 1.3157286279282019, "grad_norm": 3.3358051776885986, "learning_rate": 5.4344771345606965e-06, "loss": 0.3598, "step": 10812 }, { "epoch": 1.315850319440219, "grad_norm": 2.6447231769561768, "learning_rate": 5.432741695667845e-06, "loss": 0.3384, "step": 10813 }, { "epoch": 1.3159720109522361, "grad_norm": 1.3671926259994507, "learning_rate": 5.431006430568622e-06, "loss": 0.3934, "step": 10814 }, { "epoch": 1.3160937024642532, "grad_norm": 1.8913819789886475, "learning_rate": 5.429271339329065e-06, "loss": 0.4453, "step": 10815 }, { "epoch": 1.3162153939762702, "grad_norm": 2.112213134765625, "learning_rate": 5.42753642201519e-06, "loss": 0.4043, "step": 10816 }, { "epoch": 1.3163370854882872, "grad_norm": 1.8930308818817139, "learning_rate": 5.425801678693023e-06, "loss": 0.4618, "step": 10817 }, { "epoch": 1.3164587770003042, "grad_norm": 1.4577845335006714, "learning_rate": 5.424067109428574e-06, "loss": 0.3792, "step": 10818 }, { "epoch": 1.3165804685123212, "grad_norm": 2.575348138809204, "learning_rate": 5.422332714287837e-06, "loss": 0.3006, "step": 10819 }, { "epoch": 1.3167021600243383, "grad_norm": 1.3824480772018433, "learning_rate": 5.420598493336818e-06, "loss": 0.3531, "step": 10820 }, { "epoch": 1.3168238515363553, "grad_norm": 1.181577444076538, "learning_rate": 5.418864446641505e-06, "loss": 0.3998, "step": 10821 }, { "epoch": 1.3169455430483723, "grad_norm": 1.3310928344726562, "learning_rate": 5.41713057426788e-06, "loss": 0.3639, "step": 10822 }, { "epoch": 1.3170672345603895, "grad_norm": 1.439767837524414, "learning_rate": 5.4153968762819256e-06, "loss": 0.3961, "step": 10823 }, { "epoch": 1.3171889260724066, "grad_norm": 1.7048437595367432, "learning_rate": 5.413663352749605e-06, "loss": 0.3459, "step": 10824 }, { "epoch": 1.3173106175844236, "grad_norm": 1.9653639793395996, "learning_rate": 5.411930003736892e-06, "loss": 0.3475, "step": 10825 }, { "epoch": 1.3174323090964406, "grad_norm": 2.0754644870758057, "learning_rate": 5.410196829309738e-06, "loss": 0.4101, "step": 10826 }, { "epoch": 1.3175540006084576, "grad_norm": 2.066164255142212, "learning_rate": 5.4084638295340915e-06, "loss": 0.343, "step": 10827 }, { "epoch": 1.3176756921204746, "grad_norm": 1.9408520460128784, "learning_rate": 5.406731004475905e-06, "loss": 0.3826, "step": 10828 }, { "epoch": 1.3177973836324917, "grad_norm": 1.6110838651657104, "learning_rate": 5.40499835420111e-06, "loss": 0.3858, "step": 10829 }, { "epoch": 1.3179190751445087, "grad_norm": 2.0025970935821533, "learning_rate": 5.40326587877564e-06, "loss": 0.4049, "step": 10830 }, { "epoch": 1.3180407666565257, "grad_norm": 1.7093302011489868, "learning_rate": 5.401533578265416e-06, "loss": 0.3293, "step": 10831 }, { "epoch": 1.3181624581685427, "grad_norm": 1.944166660308838, "learning_rate": 5.399801452736356e-06, "loss": 0.3493, "step": 10832 }, { "epoch": 1.3182841496805597, "grad_norm": 1.8357899188995361, "learning_rate": 5.3980695022543764e-06, "loss": 0.3223, "step": 10833 }, { "epoch": 1.3184058411925768, "grad_norm": 1.4382829666137695, "learning_rate": 5.396337726885377e-06, "loss": 0.3225, "step": 10834 }, { "epoch": 1.3185275327045938, "grad_norm": 1.2934057712554932, "learning_rate": 5.394606126695252e-06, "loss": 0.3611, "step": 10835 }, { "epoch": 1.3186492242166108, "grad_norm": 2.2172439098358154, "learning_rate": 5.3928747017499e-06, "loss": 0.2899, "step": 10836 }, { "epoch": 1.3187709157286278, "grad_norm": 1.8254777193069458, "learning_rate": 5.391143452115203e-06, "loss": 0.3467, "step": 10837 }, { "epoch": 1.3188926072406448, "grad_norm": 1.2833013534545898, "learning_rate": 5.389412377857033e-06, "loss": 0.318, "step": 10838 }, { "epoch": 1.319014298752662, "grad_norm": 1.3253841400146484, "learning_rate": 5.387681479041269e-06, "loss": 0.2984, "step": 10839 }, { "epoch": 1.319135990264679, "grad_norm": 2.0427513122558594, "learning_rate": 5.385950755733773e-06, "loss": 0.4273, "step": 10840 }, { "epoch": 1.3192576817766961, "grad_norm": 1.7031419277191162, "learning_rate": 5.384220208000396e-06, "loss": 0.388, "step": 10841 }, { "epoch": 1.3193793732887131, "grad_norm": 1.6727646589279175, "learning_rate": 5.382489835907002e-06, "loss": 0.3427, "step": 10842 }, { "epoch": 1.3195010648007302, "grad_norm": 1.460808277130127, "learning_rate": 5.380759639519421e-06, "loss": 0.343, "step": 10843 }, { "epoch": 1.3196227563127472, "grad_norm": 1.7380757331848145, "learning_rate": 5.3790296189035e-06, "loss": 0.3006, "step": 10844 }, { "epoch": 1.3197444478247642, "grad_norm": 3.1223533153533936, "learning_rate": 5.3772997741250665e-06, "loss": 0.4569, "step": 10845 }, { "epoch": 1.3198661393367812, "grad_norm": 2.2328264713287354, "learning_rate": 5.375570105249941e-06, "loss": 0.4092, "step": 10846 }, { "epoch": 1.3199878308487982, "grad_norm": 2.401493787765503, "learning_rate": 5.37384061234395e-06, "loss": 0.4062, "step": 10847 }, { "epoch": 1.3201095223608155, "grad_norm": 1.501154899597168, "learning_rate": 5.372111295472899e-06, "loss": 0.3733, "step": 10848 }, { "epoch": 1.3202312138728325, "grad_norm": 2.488508701324463, "learning_rate": 5.370382154702589e-06, "loss": 0.3935, "step": 10849 }, { "epoch": 1.3203529053848495, "grad_norm": 2.3892276287078857, "learning_rate": 5.368653190098824e-06, "loss": 0.4005, "step": 10850 }, { "epoch": 1.3204745968968665, "grad_norm": 3.7241599559783936, "learning_rate": 5.366924401727391e-06, "loss": 0.3493, "step": 10851 }, { "epoch": 1.3205962884088835, "grad_norm": 3.4036762714385986, "learning_rate": 5.365195789654072e-06, "loss": 0.4669, "step": 10852 }, { "epoch": 1.3207179799209006, "grad_norm": 1.8968693017959595, "learning_rate": 5.3634673539446535e-06, "loss": 0.3649, "step": 10853 }, { "epoch": 1.3208396714329176, "grad_norm": 2.0806210041046143, "learning_rate": 5.361739094664891e-06, "loss": 0.357, "step": 10854 }, { "epoch": 1.3209613629449346, "grad_norm": 2.1857049465179443, "learning_rate": 5.360011011880562e-06, "loss": 0.331, "step": 10855 }, { "epoch": 1.3210830544569516, "grad_norm": 3.2242050170898438, "learning_rate": 5.358283105657417e-06, "loss": 0.3365, "step": 10856 }, { "epoch": 1.3212047459689686, "grad_norm": 1.7904189825057983, "learning_rate": 5.356555376061204e-06, "loss": 0.3895, "step": 10857 }, { "epoch": 1.3213264374809857, "grad_norm": 2.237102508544922, "learning_rate": 5.354827823157674e-06, "loss": 0.3828, "step": 10858 }, { "epoch": 1.3214481289930027, "grad_norm": 2.2091522216796875, "learning_rate": 5.35310044701256e-06, "loss": 0.3804, "step": 10859 }, { "epoch": 1.3215698205050197, "grad_norm": 2.3677759170532227, "learning_rate": 5.35137324769159e-06, "loss": 0.4076, "step": 10860 }, { "epoch": 1.3216915120170367, "grad_norm": 2.804358720779419, "learning_rate": 5.349646225260494e-06, "loss": 0.4119, "step": 10861 }, { "epoch": 1.3218132035290537, "grad_norm": 2.1119935512542725, "learning_rate": 5.347919379784984e-06, "loss": 0.3886, "step": 10862 }, { "epoch": 1.3219348950410708, "grad_norm": 1.7128105163574219, "learning_rate": 5.3461927113307664e-06, "loss": 0.3656, "step": 10863 }, { "epoch": 1.322056586553088, "grad_norm": 1.809964895248413, "learning_rate": 5.344466219963553e-06, "loss": 0.4227, "step": 10864 }, { "epoch": 1.322178278065105, "grad_norm": 1.4565883874893188, "learning_rate": 5.342739905749038e-06, "loss": 0.3923, "step": 10865 }, { "epoch": 1.322299969577122, "grad_norm": 1.6060867309570312, "learning_rate": 5.341013768752908e-06, "loss": 0.3665, "step": 10866 }, { "epoch": 1.322421661089139, "grad_norm": 2.354559898376465, "learning_rate": 5.33928780904085e-06, "loss": 0.3822, "step": 10867 }, { "epoch": 1.322543352601156, "grad_norm": 2.4192521572113037, "learning_rate": 5.3375620266785315e-06, "loss": 0.3545, "step": 10868 }, { "epoch": 1.322665044113173, "grad_norm": 1.8301217555999756, "learning_rate": 5.335836421731633e-06, "loss": 0.3552, "step": 10869 }, { "epoch": 1.3227867356251901, "grad_norm": 1.254237174987793, "learning_rate": 5.3341109942658135e-06, "loss": 0.344, "step": 10870 }, { "epoch": 1.3229084271372071, "grad_norm": 2.161576509475708, "learning_rate": 5.332385744346724e-06, "loss": 0.351, "step": 10871 }, { "epoch": 1.3230301186492242, "grad_norm": 2.287412405014038, "learning_rate": 5.3306606720400224e-06, "loss": 0.3964, "step": 10872 }, { "epoch": 1.3231518101612412, "grad_norm": 2.5778119564056396, "learning_rate": 5.328935777411348e-06, "loss": 0.4066, "step": 10873 }, { "epoch": 1.3232735016732584, "grad_norm": 2.265530586242676, "learning_rate": 5.327211060526331e-06, "loss": 0.398, "step": 10874 }, { "epoch": 1.3233951931852754, "grad_norm": 1.5640454292297363, "learning_rate": 5.3254865214506095e-06, "loss": 0.3738, "step": 10875 }, { "epoch": 1.3235168846972925, "grad_norm": 1.3931158781051636, "learning_rate": 5.323762160249802e-06, "loss": 0.346, "step": 10876 }, { "epoch": 1.3236385762093095, "grad_norm": 2.767368793487549, "learning_rate": 5.322037976989522e-06, "loss": 0.4648, "step": 10877 }, { "epoch": 1.3237602677213265, "grad_norm": 1.3860892057418823, "learning_rate": 5.320313971735383e-06, "loss": 0.3881, "step": 10878 }, { "epoch": 1.3238819592333435, "grad_norm": 2.4217209815979004, "learning_rate": 5.318590144552978e-06, "loss": 0.4659, "step": 10879 }, { "epoch": 1.3240036507453605, "grad_norm": 2.8353214263916016, "learning_rate": 5.316866495507911e-06, "loss": 0.3475, "step": 10880 }, { "epoch": 1.3241253422573775, "grad_norm": 1.6553510427474976, "learning_rate": 5.315143024665769e-06, "loss": 0.3787, "step": 10881 }, { "epoch": 1.3242470337693946, "grad_norm": 1.603233814239502, "learning_rate": 5.3134197320921265e-06, "loss": 0.4088, "step": 10882 }, { "epoch": 1.3243687252814116, "grad_norm": 2.316512107849121, "learning_rate": 5.3116966178525685e-06, "loss": 0.3828, "step": 10883 }, { "epoch": 1.3244904167934286, "grad_norm": 1.7286615371704102, "learning_rate": 5.309973682012655e-06, "loss": 0.3632, "step": 10884 }, { "epoch": 1.3246121083054456, "grad_norm": 1.503751516342163, "learning_rate": 5.308250924637955e-06, "loss": 0.4247, "step": 10885 }, { "epoch": 1.3247337998174626, "grad_norm": 2.024657964706421, "learning_rate": 5.306528345794018e-06, "loss": 0.359, "step": 10886 }, { "epoch": 1.3248554913294797, "grad_norm": 1.592625379562378, "learning_rate": 5.304805945546388e-06, "loss": 0.3936, "step": 10887 }, { "epoch": 1.3249771828414967, "grad_norm": 2.4652585983276367, "learning_rate": 5.303083723960619e-06, "loss": 0.3413, "step": 10888 }, { "epoch": 1.325098874353514, "grad_norm": 1.7822929620742798, "learning_rate": 5.301361681102233e-06, "loss": 0.3564, "step": 10889 }, { "epoch": 1.325220565865531, "grad_norm": 1.8490363359451294, "learning_rate": 5.299639817036754e-06, "loss": 0.377, "step": 10890 }, { "epoch": 1.325342257377548, "grad_norm": 1.8932664394378662, "learning_rate": 5.297918131829716e-06, "loss": 0.3983, "step": 10891 }, { "epoch": 1.325463948889565, "grad_norm": 1.6952601671218872, "learning_rate": 5.296196625546621e-06, "loss": 0.4208, "step": 10892 }, { "epoch": 1.325585640401582, "grad_norm": 1.800462245941162, "learning_rate": 5.294475298252985e-06, "loss": 0.3771, "step": 10893 }, { "epoch": 1.325707331913599, "grad_norm": 1.980384349822998, "learning_rate": 5.292754150014303e-06, "loss": 0.3828, "step": 10894 }, { "epoch": 1.325829023425616, "grad_norm": 1.3604977130889893, "learning_rate": 5.2910331808960655e-06, "loss": 0.3379, "step": 10895 }, { "epoch": 1.325950714937633, "grad_norm": 1.4096249341964722, "learning_rate": 5.289312390963766e-06, "loss": 0.3629, "step": 10896 }, { "epoch": 1.32607240644965, "grad_norm": 1.9162101745605469, "learning_rate": 5.287591780282881e-06, "loss": 0.4101, "step": 10897 }, { "epoch": 1.326194097961667, "grad_norm": 1.796937108039856, "learning_rate": 5.2858713489188795e-06, "loss": 0.3996, "step": 10898 }, { "epoch": 1.3263157894736843, "grad_norm": 2.8475372791290283, "learning_rate": 5.2841510969372375e-06, "loss": 0.354, "step": 10899 }, { "epoch": 1.3264374809857014, "grad_norm": 1.438982367515564, "learning_rate": 5.282431024403401e-06, "loss": 0.3605, "step": 10900 }, { "epoch": 1.3265591724977184, "grad_norm": 1.7382758855819702, "learning_rate": 5.280711131382832e-06, "loss": 0.3984, "step": 10901 }, { "epoch": 1.3266808640097354, "grad_norm": 3.1082699298858643, "learning_rate": 5.278991417940973e-06, "loss": 0.3469, "step": 10902 }, { "epoch": 1.3268025555217524, "grad_norm": 1.4664252996444702, "learning_rate": 5.277271884143259e-06, "loss": 0.3312, "step": 10903 }, { "epoch": 1.3269242470337694, "grad_norm": 1.8658509254455566, "learning_rate": 5.2755525300551295e-06, "loss": 0.3507, "step": 10904 }, { "epoch": 1.3270459385457865, "grad_norm": 2.7156667709350586, "learning_rate": 5.2738333557420045e-06, "loss": 0.3672, "step": 10905 }, { "epoch": 1.3271676300578035, "grad_norm": 2.6489243507385254, "learning_rate": 5.272114361269298e-06, "loss": 0.3829, "step": 10906 }, { "epoch": 1.3272893215698205, "grad_norm": 1.456113576889038, "learning_rate": 5.270395546702433e-06, "loss": 0.307, "step": 10907 }, { "epoch": 1.3274110130818375, "grad_norm": 1.3985824584960938, "learning_rate": 5.268676912106805e-06, "loss": 0.368, "step": 10908 }, { "epoch": 1.3275327045938545, "grad_norm": 2.178978204727173, "learning_rate": 5.26695845754781e-06, "loss": 0.4253, "step": 10909 }, { "epoch": 1.3276543961058715, "grad_norm": 4.004980087280273, "learning_rate": 5.2652401830908474e-06, "loss": 0.3909, "step": 10910 }, { "epoch": 1.3277760876178886, "grad_norm": 1.9495490789413452, "learning_rate": 5.263522088801296e-06, "loss": 0.4048, "step": 10911 }, { "epoch": 1.3278977791299056, "grad_norm": 1.7496061325073242, "learning_rate": 5.261804174744533e-06, "loss": 0.4121, "step": 10912 }, { "epoch": 1.3280194706419226, "grad_norm": 2.009120225906372, "learning_rate": 5.2600864409859275e-06, "loss": 0.3716, "step": 10913 }, { "epoch": 1.3281411621539396, "grad_norm": 1.8285285234451294, "learning_rate": 5.258368887590841e-06, "loss": 0.3674, "step": 10914 }, { "epoch": 1.3282628536659569, "grad_norm": 1.4351909160614014, "learning_rate": 5.256651514624638e-06, "loss": 0.355, "step": 10915 }, { "epoch": 1.3283845451779739, "grad_norm": 1.6309455633163452, "learning_rate": 5.254934322152662e-06, "loss": 0.3654, "step": 10916 }, { "epoch": 1.328506236689991, "grad_norm": 1.6981252431869507, "learning_rate": 5.253217310240253e-06, "loss": 0.3963, "step": 10917 }, { "epoch": 1.328627928202008, "grad_norm": 1.5298811197280884, "learning_rate": 5.2515004789527535e-06, "loss": 0.3659, "step": 10918 }, { "epoch": 1.328749619714025, "grad_norm": 1.5758899450302124, "learning_rate": 5.249783828355492e-06, "loss": 0.3805, "step": 10919 }, { "epoch": 1.328871311226042, "grad_norm": 4.301183700561523, "learning_rate": 5.248067358513782e-06, "loss": 0.3676, "step": 10920 }, { "epoch": 1.328993002738059, "grad_norm": 1.7789143323898315, "learning_rate": 5.24635106949295e-06, "loss": 0.3816, "step": 10921 }, { "epoch": 1.329114694250076, "grad_norm": 2.5354020595550537, "learning_rate": 5.244634961358299e-06, "loss": 0.3886, "step": 10922 }, { "epoch": 1.329236385762093, "grad_norm": 1.7551963329315186, "learning_rate": 5.242919034175131e-06, "loss": 0.369, "step": 10923 }, { "epoch": 1.3293580772741103, "grad_norm": 2.167905569076538, "learning_rate": 5.2412032880087405e-06, "loss": 0.3587, "step": 10924 }, { "epoch": 1.3294797687861273, "grad_norm": 2.0046024322509766, "learning_rate": 5.23948772292441e-06, "loss": 0.4379, "step": 10925 }, { "epoch": 1.3296014602981443, "grad_norm": 2.3789079189300537, "learning_rate": 5.23777233898743e-06, "loss": 0.3971, "step": 10926 }, { "epoch": 1.3297231518101613, "grad_norm": 2.103271484375, "learning_rate": 5.236057136263071e-06, "loss": 0.4176, "step": 10927 }, { "epoch": 1.3298448433221783, "grad_norm": 2.9267852306365967, "learning_rate": 5.234342114816594e-06, "loss": 0.2982, "step": 10928 }, { "epoch": 1.3299665348341954, "grad_norm": 1.86089289188385, "learning_rate": 5.232627274713267e-06, "loss": 0.3925, "step": 10929 }, { "epoch": 1.3300882263462124, "grad_norm": 2.7386250495910645, "learning_rate": 5.230912616018341e-06, "loss": 0.3149, "step": 10930 }, { "epoch": 1.3302099178582294, "grad_norm": 2.2234046459198, "learning_rate": 5.229198138797058e-06, "loss": 0.3928, "step": 10931 }, { "epoch": 1.3303316093702464, "grad_norm": 1.5488901138305664, "learning_rate": 5.227483843114663e-06, "loss": 0.3629, "step": 10932 }, { "epoch": 1.3304533008822634, "grad_norm": 1.6239206790924072, "learning_rate": 5.2257697290363875e-06, "loss": 0.3808, "step": 10933 }, { "epoch": 1.3305749923942805, "grad_norm": 1.7368719577789307, "learning_rate": 5.2240557966274564e-06, "loss": 0.3652, "step": 10934 }, { "epoch": 1.3306966839062975, "grad_norm": 1.4475150108337402, "learning_rate": 5.222342045953087e-06, "loss": 0.3542, "step": 10935 }, { "epoch": 1.3308183754183145, "grad_norm": 1.8466107845306396, "learning_rate": 5.220628477078489e-06, "loss": 0.3924, "step": 10936 }, { "epoch": 1.3309400669303315, "grad_norm": 1.828380823135376, "learning_rate": 5.218915090068873e-06, "loss": 0.3881, "step": 10937 }, { "epoch": 1.3310617584423485, "grad_norm": 2.7511560916900635, "learning_rate": 5.217201884989435e-06, "loss": 0.3828, "step": 10938 }, { "epoch": 1.3311834499543655, "grad_norm": 1.4727199077606201, "learning_rate": 5.21548886190536e-06, "loss": 0.3536, "step": 10939 }, { "epoch": 1.3313051414663828, "grad_norm": 1.5623347759246826, "learning_rate": 5.213776020881842e-06, "loss": 0.386, "step": 10940 }, { "epoch": 1.3314268329783998, "grad_norm": 1.6486202478408813, "learning_rate": 5.212063361984054e-06, "loss": 0.3669, "step": 10941 }, { "epoch": 1.3315485244904168, "grad_norm": 2.885010004043579, "learning_rate": 5.21035088527716e-06, "loss": 0.4332, "step": 10942 }, { "epoch": 1.3316702160024338, "grad_norm": 1.7678548097610474, "learning_rate": 5.208638590826334e-06, "loss": 0.3471, "step": 10943 }, { "epoch": 1.3317919075144509, "grad_norm": 2.591498374938965, "learning_rate": 5.206926478696723e-06, "loss": 0.3486, "step": 10944 }, { "epoch": 1.3319135990264679, "grad_norm": 2.8749332427978516, "learning_rate": 5.2052145489534885e-06, "loss": 0.3323, "step": 10945 }, { "epoch": 1.332035290538485, "grad_norm": 1.5610345602035522, "learning_rate": 5.203502801661762e-06, "loss": 0.3617, "step": 10946 }, { "epoch": 1.332156982050502, "grad_norm": 2.0968525409698486, "learning_rate": 5.2017912368866765e-06, "loss": 0.3361, "step": 10947 }, { "epoch": 1.332278673562519, "grad_norm": 1.8223997354507446, "learning_rate": 5.2000798546933705e-06, "loss": 0.3799, "step": 10948 }, { "epoch": 1.3324003650745362, "grad_norm": 1.9765418767929077, "learning_rate": 5.1983686551469615e-06, "loss": 0.3482, "step": 10949 }, { "epoch": 1.3325220565865532, "grad_norm": 2.3010082244873047, "learning_rate": 5.196657638312561e-06, "loss": 0.3744, "step": 10950 }, { "epoch": 1.3326437480985702, "grad_norm": 1.559601902961731, "learning_rate": 5.194946804255283e-06, "loss": 0.3385, "step": 10951 }, { "epoch": 1.3327654396105872, "grad_norm": 1.601814866065979, "learning_rate": 5.1932361530402195e-06, "loss": 0.3439, "step": 10952 }, { "epoch": 1.3328871311226043, "grad_norm": 2.0287764072418213, "learning_rate": 5.191525684732477e-06, "loss": 0.2864, "step": 10953 }, { "epoch": 1.3330088226346213, "grad_norm": 1.5020554065704346, "learning_rate": 5.189815399397133e-06, "loss": 0.2729, "step": 10954 }, { "epoch": 1.3331305141466383, "grad_norm": 3.7550368309020996, "learning_rate": 5.188105297099266e-06, "loss": 0.4167, "step": 10955 }, { "epoch": 1.3332522056586553, "grad_norm": 2.2288944721221924, "learning_rate": 5.186395377903957e-06, "loss": 0.3785, "step": 10956 }, { "epoch": 1.3333738971706723, "grad_norm": 1.3996793031692505, "learning_rate": 5.18468564187627e-06, "loss": 0.3485, "step": 10957 }, { "epoch": 1.3334955886826894, "grad_norm": 1.8882296085357666, "learning_rate": 5.182976089081255e-06, "loss": 0.3639, "step": 10958 }, { "epoch": 1.3336172801947064, "grad_norm": 2.441636562347412, "learning_rate": 5.181266719583974e-06, "loss": 0.3695, "step": 10959 }, { "epoch": 1.3337389717067234, "grad_norm": 3.7391064167022705, "learning_rate": 5.179557533449464e-06, "loss": 0.4286, "step": 10960 }, { "epoch": 1.3338606632187404, "grad_norm": 2.417163133621216, "learning_rate": 5.177848530742773e-06, "loss": 0.3759, "step": 10961 }, { "epoch": 1.3339823547307574, "grad_norm": 3.05417537689209, "learning_rate": 5.1761397115289255e-06, "loss": 0.3989, "step": 10962 }, { "epoch": 1.3341040462427745, "grad_norm": 1.6889636516571045, "learning_rate": 5.174431075872943e-06, "loss": 0.4065, "step": 10963 }, { "epoch": 1.3342257377547915, "grad_norm": 1.7248908281326294, "learning_rate": 5.172722623839851e-06, "loss": 0.4049, "step": 10964 }, { "epoch": 1.3343474292668087, "grad_norm": 3.922619581222534, "learning_rate": 5.171014355494654e-06, "loss": 0.3583, "step": 10965 }, { "epoch": 1.3344691207788257, "grad_norm": 2.3132288455963135, "learning_rate": 5.169306270902354e-06, "loss": 0.4408, "step": 10966 }, { "epoch": 1.3345908122908428, "grad_norm": 2.1008007526397705, "learning_rate": 5.167598370127952e-06, "loss": 0.3859, "step": 10967 }, { "epoch": 1.3347125038028598, "grad_norm": 2.025282859802246, "learning_rate": 5.165890653236435e-06, "loss": 0.3615, "step": 10968 }, { "epoch": 1.3348341953148768, "grad_norm": 2.9155805110931396, "learning_rate": 5.164183120292785e-06, "loss": 0.3736, "step": 10969 }, { "epoch": 1.3349558868268938, "grad_norm": 2.0699708461761475, "learning_rate": 5.1624757713619766e-06, "loss": 0.3756, "step": 10970 }, { "epoch": 1.3350775783389108, "grad_norm": 2.333455801010132, "learning_rate": 5.160768606508973e-06, "loss": 0.3889, "step": 10971 }, { "epoch": 1.3351992698509279, "grad_norm": 2.908857583999634, "learning_rate": 5.159061625798747e-06, "loss": 0.3909, "step": 10972 }, { "epoch": 1.3353209613629449, "grad_norm": 2.964445114135742, "learning_rate": 5.157354829296244e-06, "loss": 0.3294, "step": 10973 }, { "epoch": 1.3354426528749619, "grad_norm": 1.7204053401947021, "learning_rate": 5.155648217066411e-06, "loss": 0.3511, "step": 10974 }, { "epoch": 1.3355643443869791, "grad_norm": 2.3303165435791016, "learning_rate": 5.153941789174194e-06, "loss": 0.3879, "step": 10975 }, { "epoch": 1.3356860358989961, "grad_norm": 1.7581350803375244, "learning_rate": 5.152235545684523e-06, "loss": 0.4142, "step": 10976 }, { "epoch": 1.3358077274110132, "grad_norm": 1.7393678426742554, "learning_rate": 5.150529486662319e-06, "loss": 0.3486, "step": 10977 }, { "epoch": 1.3359294189230302, "grad_norm": 1.4734742641448975, "learning_rate": 5.148823612172511e-06, "loss": 0.3759, "step": 10978 }, { "epoch": 1.3360511104350472, "grad_norm": 1.6412012577056885, "learning_rate": 5.147117922280005e-06, "loss": 0.3195, "step": 10979 }, { "epoch": 1.3361728019470642, "grad_norm": 2.7351601123809814, "learning_rate": 5.145412417049707e-06, "loss": 0.3785, "step": 10980 }, { "epoch": 1.3362944934590812, "grad_norm": 1.4758644104003906, "learning_rate": 5.143707096546515e-06, "loss": 0.3697, "step": 10981 }, { "epoch": 1.3364161849710983, "grad_norm": 1.6413012742996216, "learning_rate": 5.142001960835316e-06, "loss": 0.3696, "step": 10982 }, { "epoch": 1.3365378764831153, "grad_norm": 2.974024534225464, "learning_rate": 5.140297009981003e-06, "loss": 0.3704, "step": 10983 }, { "epoch": 1.3366595679951323, "grad_norm": 4.0569167137146, "learning_rate": 5.138592244048447e-06, "loss": 0.4529, "step": 10984 }, { "epoch": 1.3367812595071493, "grad_norm": 1.8888123035430908, "learning_rate": 5.136887663102514e-06, "loss": 0.3136, "step": 10985 }, { "epoch": 1.3369029510191663, "grad_norm": 1.581889033317566, "learning_rate": 5.135183267208077e-06, "loss": 0.3576, "step": 10986 }, { "epoch": 1.3370246425311834, "grad_norm": 2.990297317504883, "learning_rate": 5.133479056429986e-06, "loss": 0.3445, "step": 10987 }, { "epoch": 1.3371463340432004, "grad_norm": 3.5888357162475586, "learning_rate": 5.131775030833085e-06, "loss": 0.4068, "step": 10988 }, { "epoch": 1.3372680255552174, "grad_norm": 1.796064019203186, "learning_rate": 5.130071190482225e-06, "loss": 0.3147, "step": 10989 }, { "epoch": 1.3373897170672346, "grad_norm": 2.4908409118652344, "learning_rate": 5.128367535442237e-06, "loss": 0.3955, "step": 10990 }, { "epoch": 1.3375114085792517, "grad_norm": 1.7676397562026978, "learning_rate": 5.126664065777947e-06, "loss": 0.3926, "step": 10991 }, { "epoch": 1.3376331000912687, "grad_norm": 1.6447612047195435, "learning_rate": 5.124960781554178e-06, "loss": 0.3353, "step": 10992 }, { "epoch": 1.3377547916032857, "grad_norm": 1.9016059637069702, "learning_rate": 5.123257682835737e-06, "loss": 0.34, "step": 10993 }, { "epoch": 1.3378764831153027, "grad_norm": 3.1524505615234375, "learning_rate": 5.12155476968744e-06, "loss": 0.3321, "step": 10994 }, { "epoch": 1.3379981746273197, "grad_norm": 1.9542748928070068, "learning_rate": 5.1198520421740816e-06, "loss": 0.39, "step": 10995 }, { "epoch": 1.3381198661393368, "grad_norm": 1.716983675956726, "learning_rate": 5.1181495003604495e-06, "loss": 0.3991, "step": 10996 }, { "epoch": 1.3382415576513538, "grad_norm": 2.202787160873413, "learning_rate": 5.116447144311339e-06, "loss": 0.3492, "step": 10997 }, { "epoch": 1.3383632491633708, "grad_norm": 1.6653982400894165, "learning_rate": 5.114744974091523e-06, "loss": 0.3534, "step": 10998 }, { "epoch": 1.3384849406753878, "grad_norm": 1.9553024768829346, "learning_rate": 5.113042989765769e-06, "loss": 0.3404, "step": 10999 }, { "epoch": 1.338606632187405, "grad_norm": 2.9529871940612793, "learning_rate": 5.111341191398847e-06, "loss": 0.3477, "step": 11000 }, { "epoch": 1.338728323699422, "grad_norm": 1.39569091796875, "learning_rate": 5.109639579055513e-06, "loss": 0.3418, "step": 11001 }, { "epoch": 1.338850015211439, "grad_norm": 1.9004137516021729, "learning_rate": 5.1079381528005115e-06, "loss": 0.3201, "step": 11002 }, { "epoch": 1.3389717067234561, "grad_norm": 1.811944603919983, "learning_rate": 5.106236912698597e-06, "loss": 0.3368, "step": 11003 }, { "epoch": 1.3390933982354731, "grad_norm": 1.8880418539047241, "learning_rate": 5.1045358588144885e-06, "loss": 0.3987, "step": 11004 }, { "epoch": 1.3392150897474902, "grad_norm": 2.5061557292938232, "learning_rate": 5.10283499121293e-06, "loss": 0.3529, "step": 11005 }, { "epoch": 1.3393367812595072, "grad_norm": 2.251542091369629, "learning_rate": 5.101134309958634e-06, "loss": 0.3721, "step": 11006 }, { "epoch": 1.3394584727715242, "grad_norm": 3.39823317527771, "learning_rate": 5.099433815116315e-06, "loss": 0.4286, "step": 11007 }, { "epoch": 1.3395801642835412, "grad_norm": 3.1394879817962646, "learning_rate": 5.097733506750687e-06, "loss": 0.3897, "step": 11008 }, { "epoch": 1.3397018557955582, "grad_norm": 2.324920892715454, "learning_rate": 5.0960333849264465e-06, "loss": 0.3739, "step": 11009 }, { "epoch": 1.3398235473075752, "grad_norm": 1.7832491397857666, "learning_rate": 5.0943334497082815e-06, "loss": 0.3615, "step": 11010 }, { "epoch": 1.3399452388195923, "grad_norm": 1.780424952507019, "learning_rate": 5.0926337011608875e-06, "loss": 0.3665, "step": 11011 }, { "epoch": 1.3400669303316093, "grad_norm": 2.180391788482666, "learning_rate": 5.0909341393489345e-06, "loss": 0.3355, "step": 11012 }, { "epoch": 1.3401886218436263, "grad_norm": 1.671390175819397, "learning_rate": 5.089234764337103e-06, "loss": 0.3471, "step": 11013 }, { "epoch": 1.3403103133556433, "grad_norm": 2.992978096008301, "learning_rate": 5.087535576190057e-06, "loss": 0.4249, "step": 11014 }, { "epoch": 1.3404320048676603, "grad_norm": 1.7745660543441772, "learning_rate": 5.0858365749724425e-06, "loss": 0.3514, "step": 11015 }, { "epoch": 1.3405536963796776, "grad_norm": 2.2330129146575928, "learning_rate": 5.084137760748923e-06, "loss": 0.4246, "step": 11016 }, { "epoch": 1.3406753878916946, "grad_norm": 2.6199281215667725, "learning_rate": 5.082439133584136e-06, "loss": 0.4635, "step": 11017 }, { "epoch": 1.3407970794037116, "grad_norm": 2.5350892543792725, "learning_rate": 5.0807406935427165e-06, "loss": 0.3737, "step": 11018 }, { "epoch": 1.3409187709157286, "grad_norm": 2.2068848609924316, "learning_rate": 5.0790424406892966e-06, "loss": 0.3746, "step": 11019 }, { "epoch": 1.3410404624277457, "grad_norm": 1.8008586168289185, "learning_rate": 5.0773443750884965e-06, "loss": 0.4095, "step": 11020 }, { "epoch": 1.3411621539397627, "grad_norm": 3.852505922317505, "learning_rate": 5.075646496804936e-06, "loss": 0.3295, "step": 11021 }, { "epoch": 1.3412838454517797, "grad_norm": 1.5841361284255981, "learning_rate": 5.073948805903217e-06, "loss": 0.3955, "step": 11022 }, { "epoch": 1.3414055369637967, "grad_norm": 2.9455623626708984, "learning_rate": 5.072251302447942e-06, "loss": 0.3759, "step": 11023 }, { "epoch": 1.3415272284758137, "grad_norm": 2.2216196060180664, "learning_rate": 5.070553986503706e-06, "loss": 0.374, "step": 11024 }, { "epoch": 1.341648919987831, "grad_norm": 3.1855056285858154, "learning_rate": 5.068856858135094e-06, "loss": 0.3967, "step": 11025 }, { "epoch": 1.341770611499848, "grad_norm": 3.5295183658599854, "learning_rate": 5.0671599174066865e-06, "loss": 0.3371, "step": 11026 }, { "epoch": 1.341892303011865, "grad_norm": 2.3580572605133057, "learning_rate": 5.065463164383054e-06, "loss": 0.3989, "step": 11027 }, { "epoch": 1.342013994523882, "grad_norm": 4.957223892211914, "learning_rate": 5.0637665991287585e-06, "loss": 0.3306, "step": 11028 }, { "epoch": 1.342135686035899, "grad_norm": 2.0160727500915527, "learning_rate": 5.0620702217083665e-06, "loss": 0.4112, "step": 11029 }, { "epoch": 1.342257377547916, "grad_norm": 1.795076847076416, "learning_rate": 5.0603740321864215e-06, "loss": 0.323, "step": 11030 }, { "epoch": 1.342379069059933, "grad_norm": 2.9094138145446777, "learning_rate": 5.058678030627466e-06, "loss": 0.275, "step": 11031 }, { "epoch": 1.3425007605719501, "grad_norm": 1.4378584623336792, "learning_rate": 5.056982217096043e-06, "loss": 0.312, "step": 11032 }, { "epoch": 1.3426224520839671, "grad_norm": 1.352764368057251, "learning_rate": 5.055286591656678e-06, "loss": 0.3493, "step": 11033 }, { "epoch": 1.3427441435959842, "grad_norm": 1.5660258531570435, "learning_rate": 5.053591154373891e-06, "loss": 0.3427, "step": 11034 }, { "epoch": 1.3428658351080012, "grad_norm": 3.100874423980713, "learning_rate": 5.051895905312201e-06, "loss": 0.3818, "step": 11035 }, { "epoch": 1.3429875266200182, "grad_norm": 2.558004140853882, "learning_rate": 5.050200844536114e-06, "loss": 0.3746, "step": 11036 }, { "epoch": 1.3431092181320352, "grad_norm": 4.097098350524902, "learning_rate": 5.048505972110131e-06, "loss": 0.4383, "step": 11037 }, { "epoch": 1.3432309096440522, "grad_norm": 2.1375014781951904, "learning_rate": 5.046811288098745e-06, "loss": 0.3771, "step": 11038 }, { "epoch": 1.3433526011560692, "grad_norm": 1.6098134517669678, "learning_rate": 5.045116792566438e-06, "loss": 0.3305, "step": 11039 }, { "epoch": 1.3434742926680863, "grad_norm": 2.4263265132904053, "learning_rate": 5.043422485577696e-06, "loss": 0.3789, "step": 11040 }, { "epoch": 1.3435959841801035, "grad_norm": 2.0546152591705322, "learning_rate": 5.041728367196988e-06, "loss": 0.3486, "step": 11041 }, { "epoch": 1.3437176756921205, "grad_norm": 2.3783087730407715, "learning_rate": 5.0400344374887765e-06, "loss": 0.3779, "step": 11042 }, { "epoch": 1.3438393672041375, "grad_norm": 4.532798767089844, "learning_rate": 5.038340696517523e-06, "loss": 0.4212, "step": 11043 }, { "epoch": 1.3439610587161546, "grad_norm": 1.5730412006378174, "learning_rate": 5.0366471443476775e-06, "loss": 0.353, "step": 11044 }, { "epoch": 1.3440827502281716, "grad_norm": 1.2899422645568848, "learning_rate": 5.034953781043678e-06, "loss": 0.3765, "step": 11045 }, { "epoch": 1.3442044417401886, "grad_norm": 2.337146282196045, "learning_rate": 5.033260606669968e-06, "loss": 0.3261, "step": 11046 }, { "epoch": 1.3443261332522056, "grad_norm": 1.57066011428833, "learning_rate": 5.0315676212909715e-06, "loss": 0.3513, "step": 11047 }, { "epoch": 1.3444478247642226, "grad_norm": 2.039445400238037, "learning_rate": 5.029874824971108e-06, "loss": 0.3483, "step": 11048 }, { "epoch": 1.3445695162762397, "grad_norm": 1.8404277563095093, "learning_rate": 5.028182217774802e-06, "loss": 0.3445, "step": 11049 }, { "epoch": 1.344691207788257, "grad_norm": 1.4669520854949951, "learning_rate": 5.026489799766447e-06, "loss": 0.3664, "step": 11050 }, { "epoch": 1.344812899300274, "grad_norm": 1.6410268545150757, "learning_rate": 5.024797571010454e-06, "loss": 0.3669, "step": 11051 }, { "epoch": 1.344934590812291, "grad_norm": 1.673137903213501, "learning_rate": 5.0231055315712095e-06, "loss": 0.3959, "step": 11052 }, { "epoch": 1.345056282324308, "grad_norm": 1.8528831005096436, "learning_rate": 5.0214136815130985e-06, "loss": 0.3912, "step": 11053 }, { "epoch": 1.345177973836325, "grad_norm": 1.6221925020217896, "learning_rate": 5.019722020900506e-06, "loss": 0.3897, "step": 11054 }, { "epoch": 1.345299665348342, "grad_norm": 2.3154237270355225, "learning_rate": 5.018030549797799e-06, "loss": 0.3461, "step": 11055 }, { "epoch": 1.345421356860359, "grad_norm": 2.1729185581207275, "learning_rate": 5.016339268269338e-06, "loss": 0.3425, "step": 11056 }, { "epoch": 1.345543048372376, "grad_norm": 1.5695834159851074, "learning_rate": 5.014648176379487e-06, "loss": 0.3625, "step": 11057 }, { "epoch": 1.345664739884393, "grad_norm": 2.028552770614624, "learning_rate": 5.012957274192592e-06, "loss": 0.4064, "step": 11058 }, { "epoch": 1.34578643139641, "grad_norm": 2.6498844623565674, "learning_rate": 5.011266561772992e-06, "loss": 0.3956, "step": 11059 }, { "epoch": 1.345908122908427, "grad_norm": 1.7356956005096436, "learning_rate": 5.009576039185034e-06, "loss": 0.3046, "step": 11060 }, { "epoch": 1.3460298144204441, "grad_norm": 2.5046088695526123, "learning_rate": 5.007885706493028e-06, "loss": 0.3949, "step": 11061 }, { "epoch": 1.3461515059324611, "grad_norm": 2.589621067047119, "learning_rate": 5.006195563761307e-06, "loss": 0.4018, "step": 11062 }, { "epoch": 1.3462731974444782, "grad_norm": 1.794716238975525, "learning_rate": 5.004505611054182e-06, "loss": 0.3862, "step": 11063 }, { "epoch": 1.3463948889564952, "grad_norm": 1.9696511030197144, "learning_rate": 5.002815848435956e-06, "loss": 0.3539, "step": 11064 }, { "epoch": 1.3465165804685122, "grad_norm": 2.4545681476593018, "learning_rate": 5.001126275970934e-06, "loss": 0.3609, "step": 11065 }, { "epoch": 1.3466382719805294, "grad_norm": 3.0479249954223633, "learning_rate": 4.999436893723404e-06, "loss": 0.336, "step": 11066 }, { "epoch": 1.3467599634925465, "grad_norm": 1.4956940412521362, "learning_rate": 4.997747701757647e-06, "loss": 0.3373, "step": 11067 }, { "epoch": 1.3468816550045635, "grad_norm": 2.651766300201416, "learning_rate": 4.996058700137948e-06, "loss": 0.3302, "step": 11068 }, { "epoch": 1.3470033465165805, "grad_norm": 2.408039093017578, "learning_rate": 4.994369888928573e-06, "loss": 0.3779, "step": 11069 }, { "epoch": 1.3471250380285975, "grad_norm": 4.037755489349365, "learning_rate": 4.992681268193781e-06, "loss": 0.4393, "step": 11070 }, { "epoch": 1.3472467295406145, "grad_norm": 2.839583158493042, "learning_rate": 4.990992837997836e-06, "loss": 0.4211, "step": 11071 }, { "epoch": 1.3473684210526315, "grad_norm": 1.764530062675476, "learning_rate": 4.989304598404981e-06, "loss": 0.3597, "step": 11072 }, { "epoch": 1.3474901125646486, "grad_norm": 1.5183846950531006, "learning_rate": 4.987616549479456e-06, "loss": 0.3285, "step": 11073 }, { "epoch": 1.3476118040766656, "grad_norm": 4.086575508117676, "learning_rate": 4.9859286912854975e-06, "loss": 0.4522, "step": 11074 }, { "epoch": 1.3477334955886826, "grad_norm": 1.79486882686615, "learning_rate": 4.984241023887327e-06, "loss": 0.3729, "step": 11075 }, { "epoch": 1.3478551871006998, "grad_norm": 2.400681734085083, "learning_rate": 4.982553547349173e-06, "loss": 0.4159, "step": 11076 }, { "epoch": 1.3479768786127169, "grad_norm": 1.9631298780441284, "learning_rate": 4.980866261735237e-06, "loss": 0.3716, "step": 11077 }, { "epoch": 1.3480985701247339, "grad_norm": 1.5370818376541138, "learning_rate": 4.979179167109732e-06, "loss": 0.329, "step": 11078 }, { "epoch": 1.348220261636751, "grad_norm": 1.6928389072418213, "learning_rate": 4.977492263536853e-06, "loss": 0.332, "step": 11079 }, { "epoch": 1.348341953148768, "grad_norm": 1.4783992767333984, "learning_rate": 4.975805551080784e-06, "loss": 0.3804, "step": 11080 }, { "epoch": 1.348463644660785, "grad_norm": 1.3643468618392944, "learning_rate": 4.97411902980572e-06, "loss": 0.3536, "step": 11081 }, { "epoch": 1.348585336172802, "grad_norm": 1.8942606449127197, "learning_rate": 4.972432699775827e-06, "loss": 0.35, "step": 11082 }, { "epoch": 1.348707027684819, "grad_norm": 1.7877579927444458, "learning_rate": 4.970746561055278e-06, "loss": 0.2827, "step": 11083 }, { "epoch": 1.348828719196836, "grad_norm": 1.600560188293457, "learning_rate": 4.969060613708232e-06, "loss": 0.3747, "step": 11084 }, { "epoch": 1.348950410708853, "grad_norm": 1.5231003761291504, "learning_rate": 4.967374857798839e-06, "loss": 0.4006, "step": 11085 }, { "epoch": 1.34907210222087, "grad_norm": 1.6011158227920532, "learning_rate": 4.965689293391254e-06, "loss": 0.3422, "step": 11086 }, { "epoch": 1.349193793732887, "grad_norm": 3.4856338500976562, "learning_rate": 4.964003920549612e-06, "loss": 0.3868, "step": 11087 }, { "epoch": 1.349315485244904, "grad_norm": 4.397075176239014, "learning_rate": 4.962318739338039e-06, "loss": 0.4357, "step": 11088 }, { "epoch": 1.349437176756921, "grad_norm": 1.8966400623321533, "learning_rate": 4.9606337498206715e-06, "loss": 0.4009, "step": 11089 }, { "epoch": 1.3495588682689381, "grad_norm": 1.6261204481124878, "learning_rate": 4.95894895206162e-06, "loss": 0.4101, "step": 11090 }, { "epoch": 1.3496805597809554, "grad_norm": 1.422709345817566, "learning_rate": 4.95726434612499e-06, "loss": 0.3949, "step": 11091 }, { "epoch": 1.3498022512929724, "grad_norm": 1.7959972620010376, "learning_rate": 4.955579932074896e-06, "loss": 0.2994, "step": 11092 }, { "epoch": 1.3499239428049894, "grad_norm": 3.601912498474121, "learning_rate": 4.953895709975425e-06, "loss": 0.4894, "step": 11093 }, { "epoch": 1.3500456343170064, "grad_norm": 1.9235748052597046, "learning_rate": 4.952211679890664e-06, "loss": 0.4036, "step": 11094 }, { "epoch": 1.3501673258290234, "grad_norm": 1.8110328912734985, "learning_rate": 4.950527841884705e-06, "loss": 0.3139, "step": 11095 }, { "epoch": 1.3502890173410405, "grad_norm": 1.5664286613464355, "learning_rate": 4.948844196021605e-06, "loss": 0.3698, "step": 11096 }, { "epoch": 1.3504107088530575, "grad_norm": 1.812437653541565, "learning_rate": 4.947160742365442e-06, "loss": 0.4136, "step": 11097 }, { "epoch": 1.3505324003650745, "grad_norm": 1.8608636856079102, "learning_rate": 4.945477480980273e-06, "loss": 0.3985, "step": 11098 }, { "epoch": 1.3506540918770915, "grad_norm": 1.386445164680481, "learning_rate": 4.943794411930143e-06, "loss": 0.3881, "step": 11099 }, { "epoch": 1.3507757833891085, "grad_norm": 1.3475912809371948, "learning_rate": 4.942111535279106e-06, "loss": 0.33, "step": 11100 }, { "epoch": 1.3508974749011258, "grad_norm": 1.3175837993621826, "learning_rate": 4.940428851091195e-06, "loss": 0.3595, "step": 11101 }, { "epoch": 1.3510191664131428, "grad_norm": 2.505901336669922, "learning_rate": 4.938746359430434e-06, "loss": 0.3406, "step": 11102 }, { "epoch": 1.3511408579251598, "grad_norm": 1.8229089975357056, "learning_rate": 4.937064060360854e-06, "loss": 0.4044, "step": 11103 }, { "epoch": 1.3512625494371768, "grad_norm": 1.2791388034820557, "learning_rate": 4.9353819539464664e-06, "loss": 0.3495, "step": 11104 }, { "epoch": 1.3513842409491938, "grad_norm": 1.715727686882019, "learning_rate": 4.933700040251276e-06, "loss": 0.3581, "step": 11105 }, { "epoch": 1.3515059324612109, "grad_norm": 2.3101534843444824, "learning_rate": 4.932018319339291e-06, "loss": 0.3331, "step": 11106 }, { "epoch": 1.3516276239732279, "grad_norm": 1.4475650787353516, "learning_rate": 4.930336791274494e-06, "loss": 0.3815, "step": 11107 }, { "epoch": 1.351749315485245, "grad_norm": 1.5390148162841797, "learning_rate": 4.928655456120878e-06, "loss": 0.4125, "step": 11108 }, { "epoch": 1.351871006997262, "grad_norm": 1.4437994956970215, "learning_rate": 4.92697431394242e-06, "loss": 0.3737, "step": 11109 }, { "epoch": 1.351992698509279, "grad_norm": 1.6687712669372559, "learning_rate": 4.925293364803084e-06, "loss": 0.3661, "step": 11110 }, { "epoch": 1.352114390021296, "grad_norm": 1.7024102210998535, "learning_rate": 4.923612608766846e-06, "loss": 0.3334, "step": 11111 }, { "epoch": 1.352236081533313, "grad_norm": 2.6565771102905273, "learning_rate": 4.921932045897654e-06, "loss": 0.4211, "step": 11112 }, { "epoch": 1.35235777304533, "grad_norm": 1.6554299592971802, "learning_rate": 4.920251676259455e-06, "loss": 0.3413, "step": 11113 }, { "epoch": 1.352479464557347, "grad_norm": 4.151785373687744, "learning_rate": 4.918571499916199e-06, "loss": 0.455, "step": 11114 }, { "epoch": 1.352601156069364, "grad_norm": 2.1160130500793457, "learning_rate": 4.9168915169318155e-06, "loss": 0.3694, "step": 11115 }, { "epoch": 1.352722847581381, "grad_norm": 2.1712119579315186, "learning_rate": 4.915211727370226e-06, "loss": 0.3696, "step": 11116 }, { "epoch": 1.3528445390933983, "grad_norm": 1.4879486560821533, "learning_rate": 4.91353213129536e-06, "loss": 0.3153, "step": 11117 }, { "epoch": 1.3529662306054153, "grad_norm": 1.6520042419433594, "learning_rate": 4.911852728771125e-06, "loss": 0.365, "step": 11118 }, { "epoch": 1.3530879221174323, "grad_norm": 1.4935072660446167, "learning_rate": 4.910173519861425e-06, "loss": 0.4036, "step": 11119 }, { "epoch": 1.3532096136294494, "grad_norm": 2.27902889251709, "learning_rate": 4.908494504630158e-06, "loss": 0.4058, "step": 11120 }, { "epoch": 1.3533313051414664, "grad_norm": 2.504993200302124, "learning_rate": 4.906815683141211e-06, "loss": 0.3431, "step": 11121 }, { "epoch": 1.3534529966534834, "grad_norm": 2.718082904815674, "learning_rate": 4.905137055458472e-06, "loss": 0.3761, "step": 11122 }, { "epoch": 1.3535746881655004, "grad_norm": 1.9521477222442627, "learning_rate": 4.903458621645815e-06, "loss": 0.3712, "step": 11123 }, { "epoch": 1.3536963796775174, "grad_norm": 1.510922908782959, "learning_rate": 4.901780381767103e-06, "loss": 0.3575, "step": 11124 }, { "epoch": 1.3538180711895345, "grad_norm": 2.4058730602264404, "learning_rate": 4.9001023358862036e-06, "loss": 0.3185, "step": 11125 }, { "epoch": 1.3539397627015517, "grad_norm": 2.4367456436157227, "learning_rate": 4.898424484066966e-06, "loss": 0.3175, "step": 11126 }, { "epoch": 1.3540614542135687, "grad_norm": 1.4776997566223145, "learning_rate": 4.896746826373232e-06, "loss": 0.3438, "step": 11127 }, { "epoch": 1.3541831457255857, "grad_norm": 1.9074993133544922, "learning_rate": 4.8950693628688495e-06, "loss": 0.339, "step": 11128 }, { "epoch": 1.3543048372376028, "grad_norm": 2.4589500427246094, "learning_rate": 4.893392093617645e-06, "loss": 0.2854, "step": 11129 }, { "epoch": 1.3544265287496198, "grad_norm": 3.9764435291290283, "learning_rate": 4.89171501868344e-06, "loss": 0.434, "step": 11130 }, { "epoch": 1.3545482202616368, "grad_norm": 1.4589622020721436, "learning_rate": 4.8900381381300524e-06, "loss": 0.3141, "step": 11131 }, { "epoch": 1.3546699117736538, "grad_norm": 2.069058895111084, "learning_rate": 4.888361452021288e-06, "loss": 0.3738, "step": 11132 }, { "epoch": 1.3547916032856708, "grad_norm": 1.5543010234832764, "learning_rate": 4.886684960420953e-06, "loss": 0.343, "step": 11133 }, { "epoch": 1.3549132947976879, "grad_norm": 2.2902374267578125, "learning_rate": 4.88500866339284e-06, "loss": 0.3737, "step": 11134 }, { "epoch": 1.3550349863097049, "grad_norm": 1.6328562498092651, "learning_rate": 4.883332561000731e-06, "loss": 0.3629, "step": 11135 }, { "epoch": 1.3551566778217219, "grad_norm": 1.4300404787063599, "learning_rate": 4.881656653308413e-06, "loss": 0.3383, "step": 11136 }, { "epoch": 1.355278369333739, "grad_norm": 1.3524949550628662, "learning_rate": 4.879980940379649e-06, "loss": 0.3324, "step": 11137 }, { "epoch": 1.355400060845756, "grad_norm": 1.8797709941864014, "learning_rate": 4.878305422278213e-06, "loss": 0.4218, "step": 11138 }, { "epoch": 1.355521752357773, "grad_norm": 1.634743571281433, "learning_rate": 4.8766300990678576e-06, "loss": 0.3653, "step": 11139 }, { "epoch": 1.35564344386979, "grad_norm": 1.4865691661834717, "learning_rate": 4.874954970812326e-06, "loss": 0.3549, "step": 11140 }, { "epoch": 1.355765135381807, "grad_norm": 2.021141529083252, "learning_rate": 4.873280037575375e-06, "loss": 0.3067, "step": 11141 }, { "epoch": 1.3558868268938242, "grad_norm": 1.5794376134872437, "learning_rate": 4.8716052994207254e-06, "loss": 0.3709, "step": 11142 }, { "epoch": 1.3560085184058412, "grad_norm": 1.9783570766448975, "learning_rate": 4.869930756412107e-06, "loss": 0.3104, "step": 11143 }, { "epoch": 1.3561302099178583, "grad_norm": 1.8349593877792358, "learning_rate": 4.868256408613244e-06, "loss": 0.4265, "step": 11144 }, { "epoch": 1.3562519014298753, "grad_norm": 1.4364620447158813, "learning_rate": 4.866582256087845e-06, "loss": 0.3353, "step": 11145 }, { "epoch": 1.3563735929418923, "grad_norm": 1.933782696723938, "learning_rate": 4.8649082988996185e-06, "loss": 0.3553, "step": 11146 }, { "epoch": 1.3564952844539093, "grad_norm": 1.6134716272354126, "learning_rate": 4.863234537112261e-06, "loss": 0.382, "step": 11147 }, { "epoch": 1.3566169759659263, "grad_norm": 1.8891401290893555, "learning_rate": 4.86156097078946e-06, "loss": 0.3518, "step": 11148 }, { "epoch": 1.3567386674779434, "grad_norm": 3.6986119747161865, "learning_rate": 4.8598875999949015e-06, "loss": 0.457, "step": 11149 }, { "epoch": 1.3568603589899604, "grad_norm": 2.8852577209472656, "learning_rate": 4.8582144247922606e-06, "loss": 0.409, "step": 11150 }, { "epoch": 1.3569820505019776, "grad_norm": 1.6108317375183105, "learning_rate": 4.8565414452451986e-06, "loss": 0.4154, "step": 11151 }, { "epoch": 1.3571037420139946, "grad_norm": 1.7871568202972412, "learning_rate": 4.854868661417385e-06, "loss": 0.3947, "step": 11152 }, { "epoch": 1.3572254335260117, "grad_norm": 1.8654072284698486, "learning_rate": 4.85319607337247e-06, "loss": 0.3953, "step": 11153 }, { "epoch": 1.3573471250380287, "grad_norm": 1.3134747743606567, "learning_rate": 4.8515236811740965e-06, "loss": 0.3352, "step": 11154 }, { "epoch": 1.3574688165500457, "grad_norm": 1.8670462369918823, "learning_rate": 4.849851484885905e-06, "loss": 0.4032, "step": 11155 }, { "epoch": 1.3575905080620627, "grad_norm": 1.9213924407958984, "learning_rate": 4.8481794845715195e-06, "loss": 0.3549, "step": 11156 }, { "epoch": 1.3577121995740797, "grad_norm": 2.5830886363983154, "learning_rate": 4.846507680294572e-06, "loss": 0.3792, "step": 11157 }, { "epoch": 1.3578338910860968, "grad_norm": 2.2905547618865967, "learning_rate": 4.844836072118676e-06, "loss": 0.3398, "step": 11158 }, { "epoch": 1.3579555825981138, "grad_norm": 2.351374626159668, "learning_rate": 4.843164660107432e-06, "loss": 0.3302, "step": 11159 }, { "epoch": 1.3580772741101308, "grad_norm": 1.4515116214752197, "learning_rate": 4.841493444324452e-06, "loss": 0.3071, "step": 11160 }, { "epoch": 1.3581989656221478, "grad_norm": 1.2680836915969849, "learning_rate": 4.839822424833324e-06, "loss": 0.3357, "step": 11161 }, { "epoch": 1.3583206571341648, "grad_norm": 2.178415298461914, "learning_rate": 4.8381516016976305e-06, "loss": 0.3983, "step": 11162 }, { "epoch": 1.3584423486461819, "grad_norm": 1.6810710430145264, "learning_rate": 4.836480974980956e-06, "loss": 0.3884, "step": 11163 }, { "epoch": 1.3585640401581989, "grad_norm": 1.5945277214050293, "learning_rate": 4.834810544746867e-06, "loss": 0.3873, "step": 11164 }, { "epoch": 1.358685731670216, "grad_norm": 5.168517589569092, "learning_rate": 4.83314031105893e-06, "loss": 0.3778, "step": 11165 }, { "epoch": 1.358807423182233, "grad_norm": 2.691084623336792, "learning_rate": 4.8314702739806984e-06, "loss": 0.416, "step": 11166 }, { "epoch": 1.3589291146942502, "grad_norm": 2.2331182956695557, "learning_rate": 4.829800433575717e-06, "loss": 0.4013, "step": 11167 }, { "epoch": 1.3590508062062672, "grad_norm": 1.7573703527450562, "learning_rate": 4.828130789907536e-06, "loss": 0.3484, "step": 11168 }, { "epoch": 1.3591724977182842, "grad_norm": 2.2589328289031982, "learning_rate": 4.826461343039682e-06, "loss": 0.3059, "step": 11169 }, { "epoch": 1.3592941892303012, "grad_norm": 5.430410861968994, "learning_rate": 4.8247920930356795e-06, "loss": 0.4594, "step": 11170 }, { "epoch": 1.3594158807423182, "grad_norm": 1.2312405109405518, "learning_rate": 4.823123039959054e-06, "loss": 0.3406, "step": 11171 }, { "epoch": 1.3595375722543352, "grad_norm": 1.5096276998519897, "learning_rate": 4.821454183873312e-06, "loss": 0.3146, "step": 11172 }, { "epoch": 1.3596592637663523, "grad_norm": 1.4163577556610107, "learning_rate": 4.819785524841954e-06, "loss": 0.3208, "step": 11173 }, { "epoch": 1.3597809552783693, "grad_norm": 1.7854074239730835, "learning_rate": 4.818117062928481e-06, "loss": 0.3653, "step": 11174 }, { "epoch": 1.3599026467903863, "grad_norm": 3.257835865020752, "learning_rate": 4.8164487981963816e-06, "loss": 0.4086, "step": 11175 }, { "epoch": 1.3600243383024035, "grad_norm": 1.231567621231079, "learning_rate": 4.8147807307091345e-06, "loss": 0.3517, "step": 11176 }, { "epoch": 1.3601460298144206, "grad_norm": 1.357016921043396, "learning_rate": 4.813112860530213e-06, "loss": 0.3041, "step": 11177 }, { "epoch": 1.3602677213264376, "grad_norm": 2.265507459640503, "learning_rate": 4.811445187723081e-06, "loss": 0.3566, "step": 11178 }, { "epoch": 1.3603894128384546, "grad_norm": 1.910288691520691, "learning_rate": 4.809777712351202e-06, "loss": 0.3672, "step": 11179 }, { "epoch": 1.3605111043504716, "grad_norm": 2.2209866046905518, "learning_rate": 4.808110434478024e-06, "loss": 0.4218, "step": 11180 }, { "epoch": 1.3606327958624886, "grad_norm": 3.924684524536133, "learning_rate": 4.806443354166986e-06, "loss": 0.4281, "step": 11181 }, { "epoch": 1.3607544873745057, "grad_norm": 1.9214997291564941, "learning_rate": 4.804776471481534e-06, "loss": 0.3956, "step": 11182 }, { "epoch": 1.3608761788865227, "grad_norm": 2.4110934734344482, "learning_rate": 4.803109786485089e-06, "loss": 0.3236, "step": 11183 }, { "epoch": 1.3609978703985397, "grad_norm": 1.5801680088043213, "learning_rate": 4.801443299241072e-06, "loss": 0.375, "step": 11184 }, { "epoch": 1.3611195619105567, "grad_norm": 1.6021701097488403, "learning_rate": 4.7997770098129e-06, "loss": 0.3742, "step": 11185 }, { "epoch": 1.3612412534225737, "grad_norm": 1.6113061904907227, "learning_rate": 4.798110918263978e-06, "loss": 0.2971, "step": 11186 }, { "epoch": 1.3613629449345908, "grad_norm": 1.3568525314331055, "learning_rate": 4.796445024657701e-06, "loss": 0.3095, "step": 11187 }, { "epoch": 1.3614846364466078, "grad_norm": 2.1532278060913086, "learning_rate": 4.794779329057463e-06, "loss": 0.4125, "step": 11188 }, { "epoch": 1.3616063279586248, "grad_norm": 1.948684811592102, "learning_rate": 4.793113831526641e-06, "loss": 0.39, "step": 11189 }, { "epoch": 1.3617280194706418, "grad_norm": 3.202752113342285, "learning_rate": 4.791448532128621e-06, "loss": 0.44, "step": 11190 }, { "epoch": 1.3618497109826588, "grad_norm": 1.9924675226211548, "learning_rate": 4.789783430926763e-06, "loss": 0.3694, "step": 11191 }, { "epoch": 1.361971402494676, "grad_norm": 1.710642695426941, "learning_rate": 4.7881185279844275e-06, "loss": 0.3514, "step": 11192 }, { "epoch": 1.362093094006693, "grad_norm": 1.8457367420196533, "learning_rate": 4.786453823364975e-06, "loss": 0.4265, "step": 11193 }, { "epoch": 1.3622147855187101, "grad_norm": 1.7339402437210083, "learning_rate": 4.784789317131744e-06, "loss": 0.4053, "step": 11194 }, { "epoch": 1.3623364770307271, "grad_norm": 1.512866497039795, "learning_rate": 4.783125009348072e-06, "loss": 0.3624, "step": 11195 }, { "epoch": 1.3624581685427442, "grad_norm": 1.9473291635513306, "learning_rate": 4.781460900077294e-06, "loss": 0.3932, "step": 11196 }, { "epoch": 1.3625798600547612, "grad_norm": 1.3948858976364136, "learning_rate": 4.779796989382728e-06, "loss": 0.3708, "step": 11197 }, { "epoch": 1.3627015515667782, "grad_norm": 1.562318205833435, "learning_rate": 4.7781332773276945e-06, "loss": 0.3409, "step": 11198 }, { "epoch": 1.3628232430787952, "grad_norm": 4.144116401672363, "learning_rate": 4.776469763975503e-06, "loss": 0.3516, "step": 11199 }, { "epoch": 1.3629449345908122, "grad_norm": 4.386763095855713, "learning_rate": 4.774806449389442e-06, "loss": 0.3822, "step": 11200 }, { "epoch": 1.3630666261028292, "grad_norm": 1.58619225025177, "learning_rate": 4.773143333632814e-06, "loss": 0.4008, "step": 11201 }, { "epoch": 1.3631883176148465, "grad_norm": 2.9979045391082764, "learning_rate": 4.771480416768902e-06, "loss": 0.3307, "step": 11202 }, { "epoch": 1.3633100091268635, "grad_norm": 1.6023509502410889, "learning_rate": 4.769817698860978e-06, "loss": 0.3813, "step": 11203 }, { "epoch": 1.3634317006388805, "grad_norm": 2.7294957637786865, "learning_rate": 4.768155179972321e-06, "loss": 0.4011, "step": 11204 }, { "epoch": 1.3635533921508975, "grad_norm": 1.89555823802948, "learning_rate": 4.7664928601661844e-06, "loss": 0.4013, "step": 11205 }, { "epoch": 1.3636750836629146, "grad_norm": 1.9388405084609985, "learning_rate": 4.76483073950583e-06, "loss": 0.437, "step": 11206 }, { "epoch": 1.3637967751749316, "grad_norm": 2.4625535011291504, "learning_rate": 4.763168818054505e-06, "loss": 0.2918, "step": 11207 }, { "epoch": 1.3639184666869486, "grad_norm": 2.10256028175354, "learning_rate": 4.761507095875439e-06, "loss": 0.3831, "step": 11208 }, { "epoch": 1.3640401581989656, "grad_norm": 3.1596407890319824, "learning_rate": 4.759845573031877e-06, "loss": 0.4174, "step": 11209 }, { "epoch": 1.3641618497109826, "grad_norm": 1.5453768968582153, "learning_rate": 4.758184249587041e-06, "loss": 0.3806, "step": 11210 }, { "epoch": 1.3642835412229997, "grad_norm": 2.5782411098480225, "learning_rate": 4.756523125604137e-06, "loss": 0.4121, "step": 11211 }, { "epoch": 1.3644052327350167, "grad_norm": 1.759214162826538, "learning_rate": 4.754862201146384e-06, "loss": 0.3601, "step": 11212 }, { "epoch": 1.3645269242470337, "grad_norm": 3.6430277824401855, "learning_rate": 4.753201476276978e-06, "loss": 0.4007, "step": 11213 }, { "epoch": 1.3646486157590507, "grad_norm": 6.325989246368408, "learning_rate": 4.75154095105912e-06, "loss": 0.3273, "step": 11214 }, { "epoch": 1.3647703072710677, "grad_norm": 2.039947986602783, "learning_rate": 4.749880625555991e-06, "loss": 0.3529, "step": 11215 }, { "epoch": 1.3648919987830848, "grad_norm": 2.276052951812744, "learning_rate": 4.74822049983077e-06, "loss": 0.4259, "step": 11216 }, { "epoch": 1.3650136902951018, "grad_norm": 1.8155701160430908, "learning_rate": 4.746560573946633e-06, "loss": 0.4126, "step": 11217 }, { "epoch": 1.365135381807119, "grad_norm": 2.353361129760742, "learning_rate": 4.744900847966739e-06, "loss": 0.3198, "step": 11218 }, { "epoch": 1.365257073319136, "grad_norm": 2.2139289379119873, "learning_rate": 4.743241321954242e-06, "loss": 0.4392, "step": 11219 }, { "epoch": 1.365378764831153, "grad_norm": 1.3920056819915771, "learning_rate": 4.741581995972298e-06, "loss": 0.3511, "step": 11220 }, { "epoch": 1.36550045634317, "grad_norm": 2.7540769577026367, "learning_rate": 4.739922870084041e-06, "loss": 0.3217, "step": 11221 }, { "epoch": 1.365622147855187, "grad_norm": 1.6999354362487793, "learning_rate": 4.738263944352609e-06, "loss": 0.3583, "step": 11222 }, { "epoch": 1.3657438393672041, "grad_norm": 1.662536382675171, "learning_rate": 4.736605218841124e-06, "loss": 0.3725, "step": 11223 }, { "epoch": 1.3658655308792211, "grad_norm": 1.5856044292449951, "learning_rate": 4.734946693612702e-06, "loss": 0.4252, "step": 11224 }, { "epoch": 1.3659872223912382, "grad_norm": 4.299996852874756, "learning_rate": 4.733288368730459e-06, "loss": 0.3554, "step": 11225 }, { "epoch": 1.3661089139032552, "grad_norm": 1.7832560539245605, "learning_rate": 4.731630244257497e-06, "loss": 0.3323, "step": 11226 }, { "epoch": 1.3662306054152724, "grad_norm": 1.8432408571243286, "learning_rate": 4.729972320256903e-06, "loss": 0.3535, "step": 11227 }, { "epoch": 1.3663522969272894, "grad_norm": 1.6611496210098267, "learning_rate": 4.728314596791777e-06, "loss": 0.3071, "step": 11228 }, { "epoch": 1.3664739884393065, "grad_norm": 1.9368723630905151, "learning_rate": 4.726657073925189e-06, "loss": 0.3531, "step": 11229 }, { "epoch": 1.3665956799513235, "grad_norm": 1.4649137258529663, "learning_rate": 4.724999751720214e-06, "loss": 0.3766, "step": 11230 }, { "epoch": 1.3667173714633405, "grad_norm": 1.8864132165908813, "learning_rate": 4.723342630239919e-06, "loss": 0.3952, "step": 11231 }, { "epoch": 1.3668390629753575, "grad_norm": 3.544740915298462, "learning_rate": 4.72168570954736e-06, "loss": 0.4523, "step": 11232 }, { "epoch": 1.3669607544873745, "grad_norm": 2.1816415786743164, "learning_rate": 4.720028989705584e-06, "loss": 0.3944, "step": 11233 }, { "epoch": 1.3670824459993915, "grad_norm": 1.3318713903427124, "learning_rate": 4.718372470777635e-06, "loss": 0.3632, "step": 11234 }, { "epoch": 1.3672041375114086, "grad_norm": 1.5592565536499023, "learning_rate": 4.716716152826542e-06, "loss": 0.3181, "step": 11235 }, { "epoch": 1.3673258290234256, "grad_norm": 2.314906597137451, "learning_rate": 4.7150600359153385e-06, "loss": 0.3936, "step": 11236 }, { "epoch": 1.3674475205354426, "grad_norm": 1.9761723279953003, "learning_rate": 4.7134041201070406e-06, "loss": 0.4037, "step": 11237 }, { "epoch": 1.3675692120474596, "grad_norm": 3.8374202251434326, "learning_rate": 4.711748405464655e-06, "loss": 0.4521, "step": 11238 }, { "epoch": 1.3676909035594766, "grad_norm": 2.2602813243865967, "learning_rate": 4.710092892051191e-06, "loss": 0.3984, "step": 11239 }, { "epoch": 1.3678125950714937, "grad_norm": 1.8705501556396484, "learning_rate": 4.708437579929642e-06, "loss": 0.4067, "step": 11240 }, { "epoch": 1.3679342865835107, "grad_norm": 1.5998222827911377, "learning_rate": 4.706782469162994e-06, "loss": 0.3658, "step": 11241 }, { "epoch": 1.3680559780955277, "grad_norm": 2.1664841175079346, "learning_rate": 4.705127559814231e-06, "loss": 0.3904, "step": 11242 }, { "epoch": 1.368177669607545, "grad_norm": 1.323347806930542, "learning_rate": 4.703472851946325e-06, "loss": 0.3457, "step": 11243 }, { "epoch": 1.368299361119562, "grad_norm": 1.7622110843658447, "learning_rate": 4.701818345622238e-06, "loss": 0.3091, "step": 11244 }, { "epoch": 1.368421052631579, "grad_norm": 1.828781008720398, "learning_rate": 4.700164040904936e-06, "loss": 0.3753, "step": 11245 }, { "epoch": 1.368542744143596, "grad_norm": 1.8733725547790527, "learning_rate": 4.698509937857354e-06, "loss": 0.4113, "step": 11246 }, { "epoch": 1.368664435655613, "grad_norm": 1.968394160270691, "learning_rate": 4.696856036542447e-06, "loss": 0.3656, "step": 11247 }, { "epoch": 1.36878612716763, "grad_norm": 1.6193249225616455, "learning_rate": 4.695202337023143e-06, "loss": 0.4199, "step": 11248 }, { "epoch": 1.368907818679647, "grad_norm": 2.1883695125579834, "learning_rate": 4.693548839362368e-06, "loss": 0.3244, "step": 11249 }, { "epoch": 1.369029510191664, "grad_norm": 1.4155499935150146, "learning_rate": 4.691895543623048e-06, "loss": 0.375, "step": 11250 }, { "epoch": 1.369151201703681, "grad_norm": 1.5698657035827637, "learning_rate": 4.690242449868089e-06, "loss": 0.3605, "step": 11251 }, { "epoch": 1.3692728932156983, "grad_norm": 2.13388991355896, "learning_rate": 4.6885895581603915e-06, "loss": 0.4531, "step": 11252 }, { "epoch": 1.3693945847277154, "grad_norm": 1.7883191108703613, "learning_rate": 4.686936868562859e-06, "loss": 0.3171, "step": 11253 }, { "epoch": 1.3695162762397324, "grad_norm": 1.669055700302124, "learning_rate": 4.685284381138376e-06, "loss": 0.3846, "step": 11254 }, { "epoch": 1.3696379677517494, "grad_norm": 2.980462074279785, "learning_rate": 4.683632095949821e-06, "loss": 0.3179, "step": 11255 }, { "epoch": 1.3697596592637664, "grad_norm": 3.4764130115509033, "learning_rate": 4.681980013060075e-06, "loss": 0.3374, "step": 11256 }, { "epoch": 1.3698813507757834, "grad_norm": 1.9100525379180908, "learning_rate": 4.680328132531989e-06, "loss": 0.3807, "step": 11257 }, { "epoch": 1.3700030422878005, "grad_norm": 1.5455349683761597, "learning_rate": 4.678676454428434e-06, "loss": 0.3704, "step": 11258 }, { "epoch": 1.3701247337998175, "grad_norm": 1.6455035209655762, "learning_rate": 4.677024978812254e-06, "loss": 0.3814, "step": 11259 }, { "epoch": 1.3702464253118345, "grad_norm": 1.8717092275619507, "learning_rate": 4.675373705746287e-06, "loss": 0.3903, "step": 11260 }, { "epoch": 1.3703681168238515, "grad_norm": 1.6716012954711914, "learning_rate": 4.673722635293375e-06, "loss": 0.3497, "step": 11261 }, { "epoch": 1.3704898083358685, "grad_norm": 2.577040195465088, "learning_rate": 4.672071767516343e-06, "loss": 0.3992, "step": 11262 }, { "epoch": 1.3706114998478856, "grad_norm": 1.626160740852356, "learning_rate": 4.670421102478004e-06, "loss": 0.3655, "step": 11263 }, { "epoch": 1.3707331913599026, "grad_norm": 1.9731955528259277, "learning_rate": 4.668770640241178e-06, "loss": 0.369, "step": 11264 }, { "epoch": 1.3708548828719196, "grad_norm": 1.845909833908081, "learning_rate": 4.667120380868661e-06, "loss": 0.3667, "step": 11265 }, { "epoch": 1.3709765743839366, "grad_norm": 1.9171596765518188, "learning_rate": 4.665470324423255e-06, "loss": 0.302, "step": 11266 }, { "epoch": 1.3710982658959536, "grad_norm": 1.7243751287460327, "learning_rate": 4.6638204709677445e-06, "loss": 0.3544, "step": 11267 }, { "epoch": 1.3712199574079709, "grad_norm": 1.6981006860733032, "learning_rate": 4.6621708205649105e-06, "loss": 0.4065, "step": 11268 }, { "epoch": 1.3713416489199879, "grad_norm": 1.5829240083694458, "learning_rate": 4.660521373277525e-06, "loss": 0.3857, "step": 11269 }, { "epoch": 1.371463340432005, "grad_norm": 1.477675199508667, "learning_rate": 4.658872129168355e-06, "loss": 0.3129, "step": 11270 }, { "epoch": 1.371585031944022, "grad_norm": 1.269054651260376, "learning_rate": 4.657223088300151e-06, "loss": 0.3613, "step": 11271 }, { "epoch": 1.371706723456039, "grad_norm": 3.0516796112060547, "learning_rate": 4.655574250735671e-06, "loss": 0.4244, "step": 11272 }, { "epoch": 1.371828414968056, "grad_norm": 2.2524068355560303, "learning_rate": 4.653925616537651e-06, "loss": 0.3565, "step": 11273 }, { "epoch": 1.371950106480073, "grad_norm": 1.4977946281433105, "learning_rate": 4.65227718576883e-06, "loss": 0.3243, "step": 11274 }, { "epoch": 1.37207179799209, "grad_norm": 3.0709052085876465, "learning_rate": 4.650628958491931e-06, "loss": 0.3919, "step": 11275 }, { "epoch": 1.372193489504107, "grad_norm": 1.6849305629730225, "learning_rate": 4.648980934769668e-06, "loss": 0.348, "step": 11276 }, { "epoch": 1.3723151810161243, "grad_norm": 1.665854811668396, "learning_rate": 4.647333114664762e-06, "loss": 0.3312, "step": 11277 }, { "epoch": 1.3724368725281413, "grad_norm": 1.9021978378295898, "learning_rate": 4.645685498239909e-06, "loss": 0.3391, "step": 11278 }, { "epoch": 1.3725585640401583, "grad_norm": 1.8014719486236572, "learning_rate": 4.644038085557806e-06, "loss": 0.344, "step": 11279 }, { "epoch": 1.3726802555521753, "grad_norm": 1.668756127357483, "learning_rate": 4.642390876681141e-06, "loss": 0.3821, "step": 11280 }, { "epoch": 1.3728019470641923, "grad_norm": 1.6157748699188232, "learning_rate": 4.640743871672588e-06, "loss": 0.3467, "step": 11281 }, { "epoch": 1.3729236385762094, "grad_norm": 1.963357925415039, "learning_rate": 4.639097070594828e-06, "loss": 0.3915, "step": 11282 }, { "epoch": 1.3730453300882264, "grad_norm": 1.9872376918792725, "learning_rate": 4.637450473510521e-06, "loss": 0.3925, "step": 11283 }, { "epoch": 1.3731670216002434, "grad_norm": 2.877149820327759, "learning_rate": 4.635804080482319e-06, "loss": 0.4215, "step": 11284 }, { "epoch": 1.3732887131122604, "grad_norm": 2.3329317569732666, "learning_rate": 4.634157891572879e-06, "loss": 0.4413, "step": 11285 }, { "epoch": 1.3734104046242774, "grad_norm": 2.8469250202178955, "learning_rate": 4.632511906844837e-06, "loss": 0.4668, "step": 11286 }, { "epoch": 1.3735320961362945, "grad_norm": 2.2457942962646484, "learning_rate": 4.630866126360824e-06, "loss": 0.4771, "step": 11287 }, { "epoch": 1.3736537876483115, "grad_norm": 2.9940145015716553, "learning_rate": 4.629220550183473e-06, "loss": 0.296, "step": 11288 }, { "epoch": 1.3737754791603285, "grad_norm": 1.4465837478637695, "learning_rate": 4.6275751783753964e-06, "loss": 0.3473, "step": 11289 }, { "epoch": 1.3738971706723455, "grad_norm": 2.904381036758423, "learning_rate": 4.625930010999201e-06, "loss": 0.3652, "step": 11290 }, { "epoch": 1.3740188621843625, "grad_norm": 1.8450020551681519, "learning_rate": 4.6242850481174995e-06, "loss": 0.4307, "step": 11291 }, { "epoch": 1.3741405536963796, "grad_norm": 2.3697352409362793, "learning_rate": 4.6226402897928715e-06, "loss": 0.3899, "step": 11292 }, { "epoch": 1.3742622452083968, "grad_norm": 1.4843295812606812, "learning_rate": 4.620995736087915e-06, "loss": 0.3708, "step": 11293 }, { "epoch": 1.3743839367204138, "grad_norm": 1.7908962965011597, "learning_rate": 4.619351387065205e-06, "loss": 0.3973, "step": 11294 }, { "epoch": 1.3745056282324308, "grad_norm": 1.9400103092193604, "learning_rate": 4.6177072427873075e-06, "loss": 0.3714, "step": 11295 }, { "epoch": 1.3746273197444479, "grad_norm": 1.4860111474990845, "learning_rate": 4.616063303316795e-06, "loss": 0.4392, "step": 11296 }, { "epoch": 1.3747490112564649, "grad_norm": 1.3192747831344604, "learning_rate": 4.614419568716216e-06, "loss": 0.3788, "step": 11297 }, { "epoch": 1.374870702768482, "grad_norm": 2.877135992050171, "learning_rate": 4.612776039048118e-06, "loss": 0.5236, "step": 11298 }, { "epoch": 1.374992394280499, "grad_norm": 1.6368414163589478, "learning_rate": 4.611132714375046e-06, "loss": 0.3863, "step": 11299 }, { "epoch": 1.375114085792516, "grad_norm": 1.3492391109466553, "learning_rate": 4.609489594759528e-06, "loss": 0.3683, "step": 11300 }, { "epoch": 1.375235777304533, "grad_norm": 1.476997971534729, "learning_rate": 4.6078466802640855e-06, "loss": 0.3085, "step": 11301 }, { "epoch": 1.37535746881655, "grad_norm": 1.7999513149261475, "learning_rate": 4.606203970951245e-06, "loss": 0.3299, "step": 11302 }, { "epoch": 1.3754791603285672, "grad_norm": 1.768798828125, "learning_rate": 4.604561466883502e-06, "loss": 0.3842, "step": 11303 }, { "epoch": 1.3756008518405842, "grad_norm": 4.651552200317383, "learning_rate": 4.602919168123366e-06, "loss": 0.3592, "step": 11304 }, { "epoch": 1.3757225433526012, "grad_norm": 1.3234188556671143, "learning_rate": 4.6012770747333255e-06, "loss": 0.3503, "step": 11305 }, { "epoch": 1.3758442348646183, "grad_norm": 1.2548002004623413, "learning_rate": 4.599635186775865e-06, "loss": 0.3456, "step": 11306 }, { "epoch": 1.3759659263766353, "grad_norm": 3.3660314083099365, "learning_rate": 4.597993504313466e-06, "loss": 0.2967, "step": 11307 }, { "epoch": 1.3760876178886523, "grad_norm": 2.4953486919403076, "learning_rate": 4.596352027408597e-06, "loss": 0.3775, "step": 11308 }, { "epoch": 1.3762093094006693, "grad_norm": 1.842812180519104, "learning_rate": 4.594710756123715e-06, "loss": 0.3858, "step": 11309 }, { "epoch": 1.3763310009126863, "grad_norm": 1.8454877138137817, "learning_rate": 4.593069690521279e-06, "loss": 0.3118, "step": 11310 }, { "epoch": 1.3764526924247034, "grad_norm": 3.179696559906006, "learning_rate": 4.5914288306637346e-06, "loss": 0.476, "step": 11311 }, { "epoch": 1.3765743839367204, "grad_norm": 2.8451809883117676, "learning_rate": 4.5897881766135136e-06, "loss": 0.4487, "step": 11312 }, { "epoch": 1.3766960754487374, "grad_norm": 2.830974817276001, "learning_rate": 4.588147728433055e-06, "loss": 0.2987, "step": 11313 }, { "epoch": 1.3768177669607544, "grad_norm": 1.6668356657028198, "learning_rate": 4.586507486184778e-06, "loss": 0.3726, "step": 11314 }, { "epoch": 1.3769394584727714, "grad_norm": 1.8309497833251953, "learning_rate": 4.584867449931095e-06, "loss": 0.4037, "step": 11315 }, { "epoch": 1.3770611499847885, "grad_norm": 2.203819751739502, "learning_rate": 4.5832276197344165e-06, "loss": 0.4586, "step": 11316 }, { "epoch": 1.3771828414968055, "grad_norm": 1.5618456602096558, "learning_rate": 4.581587995657133e-06, "loss": 0.3356, "step": 11317 }, { "epoch": 1.3773045330088227, "grad_norm": 1.8384875059127808, "learning_rate": 4.579948577761647e-06, "loss": 0.4344, "step": 11318 }, { "epoch": 1.3774262245208397, "grad_norm": 2.093533992767334, "learning_rate": 4.578309366110336e-06, "loss": 0.343, "step": 11319 }, { "epoch": 1.3775479160328568, "grad_norm": 1.7189770936965942, "learning_rate": 4.576670360765572e-06, "loss": 0.3907, "step": 11320 }, { "epoch": 1.3776696075448738, "grad_norm": 1.4578478336334229, "learning_rate": 4.57503156178973e-06, "loss": 0.3138, "step": 11321 }, { "epoch": 1.3777912990568908, "grad_norm": 2.045624256134033, "learning_rate": 4.573392969245167e-06, "loss": 0.4335, "step": 11322 }, { "epoch": 1.3779129905689078, "grad_norm": 1.5540378093719482, "learning_rate": 4.57175458319423e-06, "loss": 0.3568, "step": 11323 }, { "epoch": 1.3780346820809248, "grad_norm": 2.0552492141723633, "learning_rate": 4.5701164036992706e-06, "loss": 0.338, "step": 11324 }, { "epoch": 1.3781563735929419, "grad_norm": 1.56297767162323, "learning_rate": 4.56847843082262e-06, "loss": 0.3403, "step": 11325 }, { "epoch": 1.3782780651049589, "grad_norm": 1.832349181175232, "learning_rate": 4.56684066462661e-06, "loss": 0.4034, "step": 11326 }, { "epoch": 1.378399756616976, "grad_norm": 2.4731876850128174, "learning_rate": 4.565203105173557e-06, "loss": 0.339, "step": 11327 }, { "epoch": 1.3785214481289931, "grad_norm": 1.8244894742965698, "learning_rate": 4.563565752525773e-06, "loss": 0.3866, "step": 11328 }, { "epoch": 1.3786431396410102, "grad_norm": 2.375509023666382, "learning_rate": 4.561928606745567e-06, "loss": 0.2979, "step": 11329 }, { "epoch": 1.3787648311530272, "grad_norm": 1.6645686626434326, "learning_rate": 4.560291667895235e-06, "loss": 0.3358, "step": 11330 }, { "epoch": 1.3788865226650442, "grad_norm": 1.4858492612838745, "learning_rate": 4.55865493603706e-06, "loss": 0.3173, "step": 11331 }, { "epoch": 1.3790082141770612, "grad_norm": 3.204472780227661, "learning_rate": 4.557018411233332e-06, "loss": 0.3732, "step": 11332 }, { "epoch": 1.3791299056890782, "grad_norm": 1.775059700012207, "learning_rate": 4.555382093546316e-06, "loss": 0.3285, "step": 11333 }, { "epoch": 1.3792515972010952, "grad_norm": 2.6282403469085693, "learning_rate": 4.553745983038285e-06, "loss": 0.4236, "step": 11334 }, { "epoch": 1.3793732887131123, "grad_norm": 4.959954738616943, "learning_rate": 4.552110079771491e-06, "loss": 0.4402, "step": 11335 }, { "epoch": 1.3794949802251293, "grad_norm": 1.8816577196121216, "learning_rate": 4.550474383808184e-06, "loss": 0.3597, "step": 11336 }, { "epoch": 1.3796166717371463, "grad_norm": 1.4939597845077515, "learning_rate": 4.548838895210611e-06, "loss": 0.352, "step": 11337 }, { "epoch": 1.3797383632491633, "grad_norm": 1.2710790634155273, "learning_rate": 4.547203614040999e-06, "loss": 0.3423, "step": 11338 }, { "epoch": 1.3798600547611803, "grad_norm": 1.7677258253097534, "learning_rate": 4.545568540361572e-06, "loss": 0.2839, "step": 11339 }, { "epoch": 1.3799817462731974, "grad_norm": 2.359163999557495, "learning_rate": 4.543933674234558e-06, "loss": 0.3293, "step": 11340 }, { "epoch": 1.3801034377852144, "grad_norm": 2.50783634185791, "learning_rate": 4.542299015722155e-06, "loss": 0.3739, "step": 11341 }, { "epoch": 1.3802251292972314, "grad_norm": 1.5659841299057007, "learning_rate": 4.540664564886575e-06, "loss": 0.3727, "step": 11342 }, { "epoch": 1.3803468208092484, "grad_norm": 5.482038974761963, "learning_rate": 4.53903032179001e-06, "loss": 0.482, "step": 11343 }, { "epoch": 1.3804685123212657, "grad_norm": 2.9925808906555176, "learning_rate": 4.5373962864946395e-06, "loss": 0.4054, "step": 11344 }, { "epoch": 1.3805902038332827, "grad_norm": 1.8215121030807495, "learning_rate": 4.535762459062653e-06, "loss": 0.3994, "step": 11345 }, { "epoch": 1.3807118953452997, "grad_norm": 1.8021489381790161, "learning_rate": 4.534128839556213e-06, "loss": 0.3685, "step": 11346 }, { "epoch": 1.3808335868573167, "grad_norm": 3.0243401527404785, "learning_rate": 4.532495428037482e-06, "loss": 0.3313, "step": 11347 }, { "epoch": 1.3809552783693337, "grad_norm": 1.5342028141021729, "learning_rate": 4.530862224568624e-06, "loss": 0.3824, "step": 11348 }, { "epoch": 1.3810769698813508, "grad_norm": 1.3513374328613281, "learning_rate": 4.5292292292117724e-06, "loss": 0.3075, "step": 11349 }, { "epoch": 1.3811986613933678, "grad_norm": 1.4740082025527954, "learning_rate": 4.527596442029075e-06, "loss": 0.3729, "step": 11350 }, { "epoch": 1.3813203529053848, "grad_norm": 1.7651846408843994, "learning_rate": 4.5259638630826605e-06, "loss": 0.4135, "step": 11351 }, { "epoch": 1.3814420444174018, "grad_norm": 2.790583610534668, "learning_rate": 4.5243314924346485e-06, "loss": 0.359, "step": 11352 }, { "epoch": 1.381563735929419, "grad_norm": 2.103055000305176, "learning_rate": 4.52269933014716e-06, "loss": 0.3836, "step": 11353 }, { "epoch": 1.381685427441436, "grad_norm": 1.776471495628357, "learning_rate": 4.521067376282301e-06, "loss": 0.4036, "step": 11354 }, { "epoch": 1.381807118953453, "grad_norm": 1.5256181955337524, "learning_rate": 4.519435630902164e-06, "loss": 0.3902, "step": 11355 }, { "epoch": 1.3819288104654701, "grad_norm": 2.557204484939575, "learning_rate": 4.517804094068849e-06, "loss": 0.4893, "step": 11356 }, { "epoch": 1.3820505019774871, "grad_norm": 3.6220035552978516, "learning_rate": 4.516172765844436e-06, "loss": 0.3794, "step": 11357 }, { "epoch": 1.3821721934895042, "grad_norm": 2.16264009475708, "learning_rate": 4.5145416462909976e-06, "loss": 0.459, "step": 11358 }, { "epoch": 1.3822938850015212, "grad_norm": 1.595890760421753, "learning_rate": 4.512910735470606e-06, "loss": 0.3543, "step": 11359 }, { "epoch": 1.3824155765135382, "grad_norm": 1.916425347328186, "learning_rate": 4.5112800334453185e-06, "loss": 0.3977, "step": 11360 }, { "epoch": 1.3825372680255552, "grad_norm": 1.6924155950546265, "learning_rate": 4.509649540277188e-06, "loss": 0.3828, "step": 11361 }, { "epoch": 1.3826589595375722, "grad_norm": 1.6039602756500244, "learning_rate": 4.5080192560282564e-06, "loss": 0.3664, "step": 11362 }, { "epoch": 1.3827806510495892, "grad_norm": 1.7230364084243774, "learning_rate": 4.506389180760557e-06, "loss": 0.3634, "step": 11363 }, { "epoch": 1.3829023425616063, "grad_norm": 1.7189810276031494, "learning_rate": 4.504759314536123e-06, "loss": 0.4175, "step": 11364 }, { "epoch": 1.3830240340736233, "grad_norm": 1.8771231174468994, "learning_rate": 4.503129657416971e-06, "loss": 0.3689, "step": 11365 }, { "epoch": 1.3831457255856403, "grad_norm": 1.9943089485168457, "learning_rate": 4.50150020946511e-06, "loss": 0.3966, "step": 11366 }, { "epoch": 1.3832674170976573, "grad_norm": 2.8318376541137695, "learning_rate": 4.4998709707425525e-06, "loss": 0.3531, "step": 11367 }, { "epoch": 1.3833891086096743, "grad_norm": 2.465700626373291, "learning_rate": 4.498241941311288e-06, "loss": 0.4314, "step": 11368 }, { "epoch": 1.3835108001216916, "grad_norm": 2.425323009490967, "learning_rate": 4.496613121233302e-06, "loss": 0.3252, "step": 11369 }, { "epoch": 1.3836324916337086, "grad_norm": 1.7423495054244995, "learning_rate": 4.494984510570583e-06, "loss": 0.3892, "step": 11370 }, { "epoch": 1.3837541831457256, "grad_norm": 1.561933994293213, "learning_rate": 4.493356109385097e-06, "loss": 0.3891, "step": 11371 }, { "epoch": 1.3838758746577426, "grad_norm": 2.1172046661376953, "learning_rate": 4.4917279177388085e-06, "loss": 0.402, "step": 11372 }, { "epoch": 1.3839975661697597, "grad_norm": 1.6774377822875977, "learning_rate": 4.490099935693675e-06, "loss": 0.3604, "step": 11373 }, { "epoch": 1.3841192576817767, "grad_norm": 1.4198288917541504, "learning_rate": 4.48847216331164e-06, "loss": 0.3604, "step": 11374 }, { "epoch": 1.3842409491937937, "grad_norm": 1.3024564981460571, "learning_rate": 4.486844600654651e-06, "loss": 0.3492, "step": 11375 }, { "epoch": 1.3843626407058107, "grad_norm": 1.8111578226089478, "learning_rate": 4.485217247784638e-06, "loss": 0.3669, "step": 11376 }, { "epoch": 1.3844843322178277, "grad_norm": 1.1767746210098267, "learning_rate": 4.483590104763519e-06, "loss": 0.3423, "step": 11377 }, { "epoch": 1.384606023729845, "grad_norm": 1.3728400468826294, "learning_rate": 4.481963171653217e-06, "loss": 0.3543, "step": 11378 }, { "epoch": 1.384727715241862, "grad_norm": 1.6849199533462524, "learning_rate": 4.48033644851564e-06, "loss": 0.3278, "step": 11379 }, { "epoch": 1.384849406753879, "grad_norm": 1.7433174848556519, "learning_rate": 4.478709935412681e-06, "loss": 0.4172, "step": 11380 }, { "epoch": 1.384971098265896, "grad_norm": 1.8216497898101807, "learning_rate": 4.477083632406243e-06, "loss": 0.3028, "step": 11381 }, { "epoch": 1.385092789777913, "grad_norm": 1.8246872425079346, "learning_rate": 4.475457539558203e-06, "loss": 0.3328, "step": 11382 }, { "epoch": 1.38521448128993, "grad_norm": 1.909900426864624, "learning_rate": 4.473831656930439e-06, "loss": 0.3353, "step": 11383 }, { "epoch": 1.385336172801947, "grad_norm": 1.9581698179244995, "learning_rate": 4.472205984584821e-06, "loss": 0.3799, "step": 11384 }, { "epoch": 1.3854578643139641, "grad_norm": 2.3372864723205566, "learning_rate": 4.470580522583201e-06, "loss": 0.4281, "step": 11385 }, { "epoch": 1.3855795558259811, "grad_norm": 2.010037422180176, "learning_rate": 4.468955270987443e-06, "loss": 0.3909, "step": 11386 }, { "epoch": 1.3857012473379982, "grad_norm": 1.598394513130188, "learning_rate": 4.467330229859383e-06, "loss": 0.3731, "step": 11387 }, { "epoch": 1.3858229388500152, "grad_norm": 1.558213233947754, "learning_rate": 4.465705399260859e-06, "loss": 0.3479, "step": 11388 }, { "epoch": 1.3859446303620322, "grad_norm": 1.814674735069275, "learning_rate": 4.4640807792537025e-06, "loss": 0.4653, "step": 11389 }, { "epoch": 1.3860663218740492, "grad_norm": 1.3796453475952148, "learning_rate": 4.46245636989973e-06, "loss": 0.3472, "step": 11390 }, { "epoch": 1.3861880133860662, "grad_norm": 1.381128191947937, "learning_rate": 4.460832171260752e-06, "loss": 0.3375, "step": 11391 }, { "epoch": 1.3863097048980833, "grad_norm": 2.082923173904419, "learning_rate": 4.459208183398579e-06, "loss": 0.3844, "step": 11392 }, { "epoch": 1.3864313964101003, "grad_norm": 1.6432932615280151, "learning_rate": 4.457584406374999e-06, "loss": 0.3989, "step": 11393 }, { "epoch": 1.3865530879221175, "grad_norm": 1.6915321350097656, "learning_rate": 4.455960840251813e-06, "loss": 0.4035, "step": 11394 }, { "epoch": 1.3866747794341345, "grad_norm": 1.5092614889144897, "learning_rate": 4.454337485090789e-06, "loss": 0.3719, "step": 11395 }, { "epoch": 1.3867964709461515, "grad_norm": 1.3614698648452759, "learning_rate": 4.452714340953698e-06, "loss": 0.3252, "step": 11396 }, { "epoch": 1.3869181624581686, "grad_norm": 1.6111360788345337, "learning_rate": 4.4510914079023115e-06, "loss": 0.3535, "step": 11397 }, { "epoch": 1.3870398539701856, "grad_norm": 1.8023755550384521, "learning_rate": 4.4494686859983835e-06, "loss": 0.4033, "step": 11398 }, { "epoch": 1.3871615454822026, "grad_norm": 1.939241886138916, "learning_rate": 4.447846175303657e-06, "loss": 0.3782, "step": 11399 }, { "epoch": 1.3872832369942196, "grad_norm": 1.8333767652511597, "learning_rate": 4.44622387587988e-06, "loss": 0.3926, "step": 11400 }, { "epoch": 1.3874049285062366, "grad_norm": 1.9471579790115356, "learning_rate": 4.444601787788777e-06, "loss": 0.2893, "step": 11401 }, { "epoch": 1.3875266200182537, "grad_norm": 2.24674391746521, "learning_rate": 4.442979911092078e-06, "loss": 0.3827, "step": 11402 }, { "epoch": 1.3876483115302707, "grad_norm": 2.093810558319092, "learning_rate": 4.4413582458514955e-06, "loss": 0.3692, "step": 11403 }, { "epoch": 1.387770003042288, "grad_norm": 1.5624829530715942, "learning_rate": 4.439736792128735e-06, "loss": 0.3367, "step": 11404 }, { "epoch": 1.387891694554305, "grad_norm": 1.9874701499938965, "learning_rate": 4.438115549985501e-06, "loss": 0.3984, "step": 11405 }, { "epoch": 1.388013386066322, "grad_norm": 1.513587474822998, "learning_rate": 4.436494519483487e-06, "loss": 0.3688, "step": 11406 }, { "epoch": 1.388135077578339, "grad_norm": 2.258695363998413, "learning_rate": 4.434873700684365e-06, "loss": 0.3907, "step": 11407 }, { "epoch": 1.388256769090356, "grad_norm": 1.6159029006958008, "learning_rate": 4.433253093649822e-06, "loss": 0.4083, "step": 11408 }, { "epoch": 1.388378460602373, "grad_norm": 3.6507537364959717, "learning_rate": 4.431632698441517e-06, "loss": 0.4805, "step": 11409 }, { "epoch": 1.38850015211439, "grad_norm": 1.2714152336120605, "learning_rate": 4.430012515121117e-06, "loss": 0.3325, "step": 11410 }, { "epoch": 1.388621843626407, "grad_norm": 1.7035398483276367, "learning_rate": 4.428392543750271e-06, "loss": 0.3462, "step": 11411 }, { "epoch": 1.388743535138424, "grad_norm": 3.3468666076660156, "learning_rate": 4.426772784390617e-06, "loss": 0.458, "step": 11412 }, { "epoch": 1.388865226650441, "grad_norm": 3.2629191875457764, "learning_rate": 4.425153237103799e-06, "loss": 0.4103, "step": 11413 }, { "epoch": 1.3889869181624581, "grad_norm": 2.212270736694336, "learning_rate": 4.423533901951439e-06, "loss": 0.403, "step": 11414 }, { "epoch": 1.3891086096744751, "grad_norm": 1.7704497575759888, "learning_rate": 4.421914778995154e-06, "loss": 0.4272, "step": 11415 }, { "epoch": 1.3892303011864922, "grad_norm": 2.283195972442627, "learning_rate": 4.4202958682965595e-06, "loss": 0.379, "step": 11416 }, { "epoch": 1.3893519926985092, "grad_norm": 1.9091100692749023, "learning_rate": 4.418677169917257e-06, "loss": 0.4111, "step": 11417 }, { "epoch": 1.3894736842105262, "grad_norm": 1.59117591381073, "learning_rate": 4.417058683918841e-06, "loss": 0.421, "step": 11418 }, { "epoch": 1.3895953757225434, "grad_norm": 1.984761357307434, "learning_rate": 4.415440410362899e-06, "loss": 0.3825, "step": 11419 }, { "epoch": 1.3897170672345605, "grad_norm": 1.6840970516204834, "learning_rate": 4.413822349311005e-06, "loss": 0.4399, "step": 11420 }, { "epoch": 1.3898387587465775, "grad_norm": 2.9814164638519287, "learning_rate": 4.412204500824736e-06, "loss": 0.4524, "step": 11421 }, { "epoch": 1.3899604502585945, "grad_norm": 1.4622747898101807, "learning_rate": 4.410586864965653e-06, "loss": 0.3557, "step": 11422 }, { "epoch": 1.3900821417706115, "grad_norm": 5.3173322677612305, "learning_rate": 4.408969441795305e-06, "loss": 0.3576, "step": 11423 }, { "epoch": 1.3902038332826285, "grad_norm": 4.087156772613525, "learning_rate": 4.407352231375246e-06, "loss": 0.3436, "step": 11424 }, { "epoch": 1.3903255247946456, "grad_norm": 3.7394979000091553, "learning_rate": 4.4057352337670114e-06, "loss": 0.3127, "step": 11425 }, { "epoch": 1.3904472163066626, "grad_norm": 3.23745059967041, "learning_rate": 4.4041184490321265e-06, "loss": 0.3613, "step": 11426 }, { "epoch": 1.3905689078186796, "grad_norm": 2.2304234504699707, "learning_rate": 4.402501877232121e-06, "loss": 0.396, "step": 11427 }, { "epoch": 1.3906905993306966, "grad_norm": 1.6873611211776733, "learning_rate": 4.4008855184285045e-06, "loss": 0.3306, "step": 11428 }, { "epoch": 1.3908122908427138, "grad_norm": 1.6144554615020752, "learning_rate": 4.3992693726827855e-06, "loss": 0.3194, "step": 11429 }, { "epoch": 1.3909339823547309, "grad_norm": 1.2821298837661743, "learning_rate": 4.397653440056459e-06, "loss": 0.3537, "step": 11430 }, { "epoch": 1.3910556738667479, "grad_norm": 1.6831331253051758, "learning_rate": 4.39603772061101e-06, "loss": 0.3916, "step": 11431 }, { "epoch": 1.391177365378765, "grad_norm": 1.4870332479476929, "learning_rate": 4.394422214407932e-06, "loss": 0.3698, "step": 11432 }, { "epoch": 1.391299056890782, "grad_norm": 1.2417694330215454, "learning_rate": 4.3928069215086896e-06, "loss": 0.3397, "step": 11433 }, { "epoch": 1.391420748402799, "grad_norm": 1.4579051733016968, "learning_rate": 4.391191841974747e-06, "loss": 0.304, "step": 11434 }, { "epoch": 1.391542439914816, "grad_norm": 4.942966461181641, "learning_rate": 4.389576975867568e-06, "loss": 0.4639, "step": 11435 }, { "epoch": 1.391664131426833, "grad_norm": 2.9270763397216797, "learning_rate": 4.387962323248598e-06, "loss": 0.3798, "step": 11436 }, { "epoch": 1.39178582293885, "grad_norm": 5.224781513214111, "learning_rate": 4.386347884179274e-06, "loss": 0.4888, "step": 11437 }, { "epoch": 1.391907514450867, "grad_norm": 4.197554111480713, "learning_rate": 4.384733658721036e-06, "loss": 0.4567, "step": 11438 }, { "epoch": 1.392029205962884, "grad_norm": 1.351085901260376, "learning_rate": 4.3831196469353056e-06, "loss": 0.3538, "step": 11439 }, { "epoch": 1.392150897474901, "grad_norm": 2.068093776702881, "learning_rate": 4.381505848883496e-06, "loss": 0.2867, "step": 11440 }, { "epoch": 1.392272588986918, "grad_norm": 1.9180797338485718, "learning_rate": 4.379892264627025e-06, "loss": 0.432, "step": 11441 }, { "epoch": 1.392394280498935, "grad_norm": 2.743220329284668, "learning_rate": 4.378278894227279e-06, "loss": 0.3381, "step": 11442 }, { "epoch": 1.3925159720109521, "grad_norm": 1.7271233797073364, "learning_rate": 4.376665737745661e-06, "loss": 0.3046, "step": 11443 }, { "epoch": 1.3926376635229691, "grad_norm": 2.314553737640381, "learning_rate": 4.3750527952435514e-06, "loss": 0.4137, "step": 11444 }, { "epoch": 1.3927593550349864, "grad_norm": 5.153557300567627, "learning_rate": 4.373440066782323e-06, "loss": 0.4349, "step": 11445 }, { "epoch": 1.3928810465470034, "grad_norm": 3.608574390411377, "learning_rate": 4.371827552423349e-06, "loss": 0.4626, "step": 11446 }, { "epoch": 1.3930027380590204, "grad_norm": 1.3261542320251465, "learning_rate": 4.370215252227987e-06, "loss": 0.3246, "step": 11447 }, { "epoch": 1.3931244295710374, "grad_norm": 2.7190096378326416, "learning_rate": 4.368603166257584e-06, "loss": 0.3735, "step": 11448 }, { "epoch": 1.3932461210830545, "grad_norm": 2.8585147857666016, "learning_rate": 4.36699129457349e-06, "loss": 0.39, "step": 11449 }, { "epoch": 1.3933678125950715, "grad_norm": 2.667886734008789, "learning_rate": 4.365379637237037e-06, "loss": 0.3437, "step": 11450 }, { "epoch": 1.3934895041070885, "grad_norm": 1.6503410339355469, "learning_rate": 4.363768194309549e-06, "loss": 0.3958, "step": 11451 }, { "epoch": 1.3936111956191055, "grad_norm": 1.6660648584365845, "learning_rate": 4.362156965852354e-06, "loss": 0.3694, "step": 11452 }, { "epoch": 1.3937328871311225, "grad_norm": 1.6979115009307861, "learning_rate": 4.36054595192675e-06, "loss": 0.4006, "step": 11453 }, { "epoch": 1.3938545786431398, "grad_norm": 3.540794849395752, "learning_rate": 4.358935152594048e-06, "loss": 0.4581, "step": 11454 }, { "epoch": 1.3939762701551568, "grad_norm": 2.0232386589050293, "learning_rate": 4.357324567915541e-06, "loss": 0.3625, "step": 11455 }, { "epoch": 1.3940979616671738, "grad_norm": 1.5823118686676025, "learning_rate": 4.355714197952508e-06, "loss": 0.4557, "step": 11456 }, { "epoch": 1.3942196531791908, "grad_norm": 2.286123275756836, "learning_rate": 4.354104042766238e-06, "loss": 0.3242, "step": 11457 }, { "epoch": 1.3943413446912079, "grad_norm": 4.621548175811768, "learning_rate": 4.352494102417996e-06, "loss": 0.347, "step": 11458 }, { "epoch": 1.3944630362032249, "grad_norm": 2.9518280029296875, "learning_rate": 4.350884376969039e-06, "loss": 0.3284, "step": 11459 }, { "epoch": 1.394584727715242, "grad_norm": 1.5486581325531006, "learning_rate": 4.349274866480627e-06, "loss": 0.3911, "step": 11460 }, { "epoch": 1.394706419227259, "grad_norm": 2.0710244178771973, "learning_rate": 4.347665571014e-06, "loss": 0.4565, "step": 11461 }, { "epoch": 1.394828110739276, "grad_norm": 2.828049659729004, "learning_rate": 4.346056490630403e-06, "loss": 0.3245, "step": 11462 }, { "epoch": 1.394949802251293, "grad_norm": 1.3766549825668335, "learning_rate": 4.344447625391057e-06, "loss": 0.371, "step": 11463 }, { "epoch": 1.39507149376331, "grad_norm": 1.8920338153839111, "learning_rate": 4.342838975357187e-06, "loss": 0.3637, "step": 11464 }, { "epoch": 1.395193185275327, "grad_norm": 1.9529694318771362, "learning_rate": 4.341230540590003e-06, "loss": 0.3806, "step": 11465 }, { "epoch": 1.395314876787344, "grad_norm": 1.478337287902832, "learning_rate": 4.339622321150709e-06, "loss": 0.3799, "step": 11466 }, { "epoch": 1.395436568299361, "grad_norm": 1.3936914205551147, "learning_rate": 4.3380143171004996e-06, "loss": 0.3239, "step": 11467 }, { "epoch": 1.395558259811378, "grad_norm": 1.5418885946273804, "learning_rate": 4.336406528500567e-06, "loss": 0.3663, "step": 11468 }, { "epoch": 1.395679951323395, "grad_norm": 1.9808118343353271, "learning_rate": 4.3347989554120876e-06, "loss": 0.3557, "step": 11469 }, { "epoch": 1.3958016428354123, "grad_norm": 2.683344841003418, "learning_rate": 4.333191597896237e-06, "loss": 0.443, "step": 11470 }, { "epoch": 1.3959233343474293, "grad_norm": 2.1260132789611816, "learning_rate": 4.331584456014175e-06, "loss": 0.3739, "step": 11471 }, { "epoch": 1.3960450258594463, "grad_norm": 1.494470477104187, "learning_rate": 4.329977529827055e-06, "loss": 0.3471, "step": 11472 }, { "epoch": 1.3961667173714634, "grad_norm": 2.17232084274292, "learning_rate": 4.3283708193960295e-06, "loss": 0.329, "step": 11473 }, { "epoch": 1.3962884088834804, "grad_norm": 2.302095651626587, "learning_rate": 4.326764324782234e-06, "loss": 0.3771, "step": 11474 }, { "epoch": 1.3964101003954974, "grad_norm": 3.183126211166382, "learning_rate": 4.325158046046798e-06, "loss": 0.4264, "step": 11475 }, { "epoch": 1.3965317919075144, "grad_norm": 1.6612156629562378, "learning_rate": 4.323551983250846e-06, "loss": 0.3465, "step": 11476 }, { "epoch": 1.3966534834195314, "grad_norm": 1.7970367670059204, "learning_rate": 4.321946136455486e-06, "loss": 0.4116, "step": 11477 }, { "epoch": 1.3967751749315485, "grad_norm": 2.088991641998291, "learning_rate": 4.320340505721833e-06, "loss": 0.3573, "step": 11478 }, { "epoch": 1.3968968664435657, "grad_norm": 1.435523271560669, "learning_rate": 4.318735091110979e-06, "loss": 0.4107, "step": 11479 }, { "epoch": 1.3970185579555827, "grad_norm": 1.9187802076339722, "learning_rate": 4.3171298926840114e-06, "loss": 0.3931, "step": 11480 }, { "epoch": 1.3971402494675997, "grad_norm": 1.7297875881195068, "learning_rate": 4.315524910502017e-06, "loss": 0.4248, "step": 11481 }, { "epoch": 1.3972619409796168, "grad_norm": 1.4448972940444946, "learning_rate": 4.313920144626067e-06, "loss": 0.334, "step": 11482 }, { "epoch": 1.3973836324916338, "grad_norm": 1.4364911317825317, "learning_rate": 4.312315595117221e-06, "loss": 0.3802, "step": 11483 }, { "epoch": 1.3975053240036508, "grad_norm": 1.8982006311416626, "learning_rate": 4.310711262036543e-06, "loss": 0.3451, "step": 11484 }, { "epoch": 1.3976270155156678, "grad_norm": 1.715415120124817, "learning_rate": 4.3091071454450785e-06, "loss": 0.3993, "step": 11485 }, { "epoch": 1.3977487070276848, "grad_norm": 1.5801661014556885, "learning_rate": 4.307503245403862e-06, "loss": 0.3968, "step": 11486 }, { "epoch": 1.3978703985397019, "grad_norm": 1.4895697832107544, "learning_rate": 4.305899561973937e-06, "loss": 0.4462, "step": 11487 }, { "epoch": 1.3979920900517189, "grad_norm": 1.5394319295883179, "learning_rate": 4.304296095216314e-06, "loss": 0.3441, "step": 11488 }, { "epoch": 1.398113781563736, "grad_norm": 1.4770684242248535, "learning_rate": 4.302692845192016e-06, "loss": 0.4017, "step": 11489 }, { "epoch": 1.398235473075753, "grad_norm": 1.7553646564483643, "learning_rate": 4.301089811962048e-06, "loss": 0.4142, "step": 11490 }, { "epoch": 1.39835716458777, "grad_norm": 2.124732732772827, "learning_rate": 4.299486995587406e-06, "loss": 0.3152, "step": 11491 }, { "epoch": 1.398478856099787, "grad_norm": 2.317671298980713, "learning_rate": 4.2978843961290875e-06, "loss": 0.3979, "step": 11492 }, { "epoch": 1.398600547611804, "grad_norm": 1.6811238527297974, "learning_rate": 4.296282013648069e-06, "loss": 0.3974, "step": 11493 }, { "epoch": 1.398722239123821, "grad_norm": 2.0519702434539795, "learning_rate": 4.294679848205323e-06, "loss": 0.4209, "step": 11494 }, { "epoch": 1.3988439306358382, "grad_norm": 1.7124383449554443, "learning_rate": 4.293077899861821e-06, "loss": 0.3319, "step": 11495 }, { "epoch": 1.3989656221478552, "grad_norm": 1.9717894792556763, "learning_rate": 4.2914761686785186e-06, "loss": 0.3663, "step": 11496 }, { "epoch": 1.3990873136598723, "grad_norm": 1.6932785511016846, "learning_rate": 4.28987465471636e-06, "loss": 0.3505, "step": 11497 }, { "epoch": 1.3992090051718893, "grad_norm": 1.7399998903274536, "learning_rate": 4.288273358036298e-06, "loss": 0.3124, "step": 11498 }, { "epoch": 1.3993306966839063, "grad_norm": 3.1257483959198, "learning_rate": 4.286672278699249e-06, "loss": 0.316, "step": 11499 }, { "epoch": 1.3994523881959233, "grad_norm": 1.7928040027618408, "learning_rate": 4.28507141676615e-06, "loss": 0.4447, "step": 11500 }, { "epoch": 1.3995740797079403, "grad_norm": 3.268521547317505, "learning_rate": 4.283470772297912e-06, "loss": 0.3814, "step": 11501 }, { "epoch": 1.3996957712199574, "grad_norm": 1.739776849746704, "learning_rate": 4.281870345355441e-06, "loss": 0.394, "step": 11502 }, { "epoch": 1.3998174627319744, "grad_norm": 2.0779480934143066, "learning_rate": 4.280270135999642e-06, "loss": 0.3376, "step": 11503 }, { "epoch": 1.3999391542439914, "grad_norm": 2.316032886505127, "learning_rate": 4.278670144291405e-06, "loss": 0.4113, "step": 11504 }, { "epoch": 1.4000608457560086, "grad_norm": 1.5981416702270508, "learning_rate": 4.277070370291606e-06, "loss": 0.3413, "step": 11505 }, { "epoch": 1.4001825372680257, "grad_norm": 2.4327147006988525, "learning_rate": 4.27547081406113e-06, "loss": 0.3933, "step": 11506 }, { "epoch": 1.4003042287800427, "grad_norm": 1.9172366857528687, "learning_rate": 4.273871475660839e-06, "loss": 0.3367, "step": 11507 }, { "epoch": 1.4004259202920597, "grad_norm": 1.5525368452072144, "learning_rate": 4.272272355151586e-06, "loss": 0.3646, "step": 11508 }, { "epoch": 1.4005476118040767, "grad_norm": 1.6372947692871094, "learning_rate": 4.27067345259423e-06, "loss": 0.3979, "step": 11509 }, { "epoch": 1.4006693033160937, "grad_norm": 3.172664165496826, "learning_rate": 4.2690747680496085e-06, "loss": 0.4206, "step": 11510 }, { "epoch": 1.4007909948281108, "grad_norm": 1.5217859745025635, "learning_rate": 4.267476301578554e-06, "loss": 0.3368, "step": 11511 }, { "epoch": 1.4009126863401278, "grad_norm": 2.774683952331543, "learning_rate": 4.265878053241892e-06, "loss": 0.4314, "step": 11512 }, { "epoch": 1.4010343778521448, "grad_norm": 2.561152935028076, "learning_rate": 4.264280023100435e-06, "loss": 0.3476, "step": 11513 }, { "epoch": 1.4011560693641618, "grad_norm": 1.487470269203186, "learning_rate": 4.262682211214999e-06, "loss": 0.3923, "step": 11514 }, { "epoch": 1.4012777608761788, "grad_norm": 3.286255359649658, "learning_rate": 4.261084617646382e-06, "loss": 0.3526, "step": 11515 }, { "epoch": 1.4013994523881959, "grad_norm": 1.7256561517715454, "learning_rate": 4.259487242455369e-06, "loss": 0.3762, "step": 11516 }, { "epoch": 1.4015211439002129, "grad_norm": 1.8141251802444458, "learning_rate": 4.257890085702753e-06, "loss": 0.2966, "step": 11517 }, { "epoch": 1.40164283541223, "grad_norm": 1.8115521669387817, "learning_rate": 4.2562931474493044e-06, "loss": 0.3801, "step": 11518 }, { "epoch": 1.401764526924247, "grad_norm": 1.7352278232574463, "learning_rate": 4.2546964277557854e-06, "loss": 0.3983, "step": 11519 }, { "epoch": 1.4018862184362642, "grad_norm": 1.9483028650283813, "learning_rate": 4.253099926682965e-06, "loss": 0.4182, "step": 11520 }, { "epoch": 1.4020079099482812, "grad_norm": 1.8020724058151245, "learning_rate": 4.251503644291587e-06, "loss": 0.4037, "step": 11521 }, { "epoch": 1.4021296014602982, "grad_norm": 1.4089369773864746, "learning_rate": 4.2499075806423954e-06, "loss": 0.3354, "step": 11522 }, { "epoch": 1.4022512929723152, "grad_norm": 1.9180755615234375, "learning_rate": 4.24831173579612e-06, "loss": 0.3392, "step": 11523 }, { "epoch": 1.4023729844843322, "grad_norm": 1.626389980316162, "learning_rate": 4.2467161098134864e-06, "loss": 0.378, "step": 11524 }, { "epoch": 1.4024946759963492, "grad_norm": 1.5367153882980347, "learning_rate": 4.2451207027552164e-06, "loss": 0.3744, "step": 11525 }, { "epoch": 1.4026163675083663, "grad_norm": 2.8640105724334717, "learning_rate": 4.243525514682017e-06, "loss": 0.3655, "step": 11526 }, { "epoch": 1.4027380590203833, "grad_norm": 1.424145221710205, "learning_rate": 4.241930545654582e-06, "loss": 0.3609, "step": 11527 }, { "epoch": 1.4028597505324003, "grad_norm": 2.524362802505493, "learning_rate": 4.240335795733612e-06, "loss": 0.3687, "step": 11528 }, { "epoch": 1.4029814420444173, "grad_norm": 1.7690109014511108, "learning_rate": 4.238741264979783e-06, "loss": 0.3613, "step": 11529 }, { "epoch": 1.4031031335564346, "grad_norm": 2.6616768836975098, "learning_rate": 4.237146953453779e-06, "loss": 0.389, "step": 11530 }, { "epoch": 1.4032248250684516, "grad_norm": 1.9031298160552979, "learning_rate": 4.235552861216261e-06, "loss": 0.3357, "step": 11531 }, { "epoch": 1.4033465165804686, "grad_norm": 2.886734962463379, "learning_rate": 4.233958988327884e-06, "loss": 0.3888, "step": 11532 }, { "epoch": 1.4034682080924856, "grad_norm": 1.8774120807647705, "learning_rate": 4.232365334849311e-06, "loss": 0.3595, "step": 11533 }, { "epoch": 1.4035898996045026, "grad_norm": 3.600778818130493, "learning_rate": 4.230771900841168e-06, "loss": 0.4511, "step": 11534 }, { "epoch": 1.4037115911165197, "grad_norm": 2.5160017013549805, "learning_rate": 4.229178686364098e-06, "loss": 0.4237, "step": 11535 }, { "epoch": 1.4038332826285367, "grad_norm": 2.9807233810424805, "learning_rate": 4.227585691478726e-06, "loss": 0.3137, "step": 11536 }, { "epoch": 1.4039549741405537, "grad_norm": 2.2398462295532227, "learning_rate": 4.2259929162456616e-06, "loss": 0.4278, "step": 11537 }, { "epoch": 1.4040766656525707, "grad_norm": 1.914557933807373, "learning_rate": 4.224400360725522e-06, "loss": 0.3701, "step": 11538 }, { "epoch": 1.4041983571645877, "grad_norm": 3.069185972213745, "learning_rate": 4.222808024978905e-06, "loss": 0.4091, "step": 11539 }, { "epoch": 1.4043200486766048, "grad_norm": 1.572724461555481, "learning_rate": 4.221215909066395e-06, "loss": 0.3355, "step": 11540 }, { "epoch": 1.4044417401886218, "grad_norm": 2.696105480194092, "learning_rate": 4.219624013048587e-06, "loss": 0.3454, "step": 11541 }, { "epoch": 1.4045634317006388, "grad_norm": 1.6319700479507446, "learning_rate": 4.218032336986048e-06, "loss": 0.3653, "step": 11542 }, { "epoch": 1.4046851232126558, "grad_norm": 1.9677331447601318, "learning_rate": 4.216440880939344e-06, "loss": 0.4175, "step": 11543 }, { "epoch": 1.4048068147246728, "grad_norm": 3.3107752799987793, "learning_rate": 4.214849644969042e-06, "loss": 0.3172, "step": 11544 }, { "epoch": 1.4049285062366899, "grad_norm": 2.2747137546539307, "learning_rate": 4.213258629135679e-06, "loss": 0.3307, "step": 11545 }, { "epoch": 1.405050197748707, "grad_norm": 3.6300511360168457, "learning_rate": 4.211667833499807e-06, "loss": 0.2855, "step": 11546 }, { "epoch": 1.4051718892607241, "grad_norm": 1.8248096704483032, "learning_rate": 4.210077258121955e-06, "loss": 0.4238, "step": 11547 }, { "epoch": 1.4052935807727411, "grad_norm": 1.8915961980819702, "learning_rate": 4.208486903062644e-06, "loss": 0.3683, "step": 11548 }, { "epoch": 1.4054152722847582, "grad_norm": 1.917109727859497, "learning_rate": 4.206896768382398e-06, "loss": 0.4559, "step": 11549 }, { "epoch": 1.4055369637967752, "grad_norm": 1.8176966905593872, "learning_rate": 4.205306854141722e-06, "loss": 0.3801, "step": 11550 }, { "epoch": 1.4056586553087922, "grad_norm": 1.6042671203613281, "learning_rate": 4.2037171604011096e-06, "loss": 0.3847, "step": 11551 }, { "epoch": 1.4057803468208092, "grad_norm": 2.1626269817352295, "learning_rate": 4.2021276872210605e-06, "loss": 0.3817, "step": 11552 }, { "epoch": 1.4059020383328262, "grad_norm": 1.3933879137039185, "learning_rate": 4.200538434662054e-06, "loss": 0.3506, "step": 11553 }, { "epoch": 1.4060237298448433, "grad_norm": 1.5468640327453613, "learning_rate": 4.198949402784561e-06, "loss": 0.3542, "step": 11554 }, { "epoch": 1.4061454213568605, "grad_norm": 2.298234701156616, "learning_rate": 4.197360591649053e-06, "loss": 0.3936, "step": 11555 }, { "epoch": 1.4062671128688775, "grad_norm": 1.9949541091918945, "learning_rate": 4.1957720013159874e-06, "loss": 0.3653, "step": 11556 }, { "epoch": 1.4063888043808945, "grad_norm": 2.873588800430298, "learning_rate": 4.19418363184581e-06, "loss": 0.4122, "step": 11557 }, { "epoch": 1.4065104958929115, "grad_norm": 4.114194393157959, "learning_rate": 4.192595483298964e-06, "loss": 0.4581, "step": 11558 }, { "epoch": 1.4066321874049286, "grad_norm": 2.229585647583008, "learning_rate": 4.191007555735876e-06, "loss": 0.3804, "step": 11559 }, { "epoch": 1.4067538789169456, "grad_norm": 1.6459980010986328, "learning_rate": 4.189419849216977e-06, "loss": 0.3162, "step": 11560 }, { "epoch": 1.4068755704289626, "grad_norm": 2.128148078918457, "learning_rate": 4.187832363802681e-06, "loss": 0.423, "step": 11561 }, { "epoch": 1.4069972619409796, "grad_norm": 2.1356284618377686, "learning_rate": 4.186245099553391e-06, "loss": 0.3911, "step": 11562 }, { "epoch": 1.4071189534529966, "grad_norm": 1.7619588375091553, "learning_rate": 4.184658056529511e-06, "loss": 0.3467, "step": 11563 }, { "epoch": 1.4072406449650137, "grad_norm": 1.9123762845993042, "learning_rate": 4.183071234791431e-06, "loss": 0.415, "step": 11564 }, { "epoch": 1.4073623364770307, "grad_norm": 1.8862640857696533, "learning_rate": 4.181484634399526e-06, "loss": 0.3797, "step": 11565 }, { "epoch": 1.4074840279890477, "grad_norm": 2.488956928253174, "learning_rate": 4.179898255414178e-06, "loss": 0.3823, "step": 11566 }, { "epoch": 1.4076057195010647, "grad_norm": 1.537616491317749, "learning_rate": 4.178312097895748e-06, "loss": 0.3446, "step": 11567 }, { "epoch": 1.4077274110130817, "grad_norm": 2.983016014099121, "learning_rate": 4.176726161904593e-06, "loss": 0.4306, "step": 11568 }, { "epoch": 1.4078491025250988, "grad_norm": 1.7719531059265137, "learning_rate": 4.175140447501061e-06, "loss": 0.3656, "step": 11569 }, { "epoch": 1.4079707940371158, "grad_norm": 1.6745668649673462, "learning_rate": 4.173554954745489e-06, "loss": 0.3083, "step": 11570 }, { "epoch": 1.408092485549133, "grad_norm": 2.064340353012085, "learning_rate": 4.171969683698215e-06, "loss": 0.439, "step": 11571 }, { "epoch": 1.40821417706115, "grad_norm": 1.612450361251831, "learning_rate": 4.1703846344195565e-06, "loss": 0.3397, "step": 11572 }, { "epoch": 1.408335868573167, "grad_norm": 1.6694869995117188, "learning_rate": 4.168799806969827e-06, "loss": 0.388, "step": 11573 }, { "epoch": 1.408457560085184, "grad_norm": 3.5114548206329346, "learning_rate": 4.167215201409337e-06, "loss": 0.2922, "step": 11574 }, { "epoch": 1.408579251597201, "grad_norm": 3.0673043727874756, "learning_rate": 4.165630817798383e-06, "loss": 0.3318, "step": 11575 }, { "epoch": 1.4087009431092181, "grad_norm": 2.271623134613037, "learning_rate": 4.164046656197248e-06, "loss": 0.357, "step": 11576 }, { "epoch": 1.4088226346212351, "grad_norm": 1.4271769523620605, "learning_rate": 4.162462716666222e-06, "loss": 0.3588, "step": 11577 }, { "epoch": 1.4089443261332522, "grad_norm": 1.7905840873718262, "learning_rate": 4.160878999265573e-06, "loss": 0.3455, "step": 11578 }, { "epoch": 1.4090660176452692, "grad_norm": 2.0919485092163086, "learning_rate": 4.159295504055564e-06, "loss": 0.3094, "step": 11579 }, { "epoch": 1.4091877091572864, "grad_norm": 2.895566940307617, "learning_rate": 4.15771223109645e-06, "loss": 0.449, "step": 11580 }, { "epoch": 1.4093094006693034, "grad_norm": 2.123392343521118, "learning_rate": 4.156129180448476e-06, "loss": 0.3325, "step": 11581 }, { "epoch": 1.4094310921813205, "grad_norm": 1.632764458656311, "learning_rate": 4.154546352171885e-06, "loss": 0.3615, "step": 11582 }, { "epoch": 1.4095527836933375, "grad_norm": 2.295473337173462, "learning_rate": 4.152963746326907e-06, "loss": 0.3268, "step": 11583 }, { "epoch": 1.4096744752053545, "grad_norm": 1.784664273262024, "learning_rate": 4.1513813629737556e-06, "loss": 0.3758, "step": 11584 }, { "epoch": 1.4097961667173715, "grad_norm": 2.5662484169006348, "learning_rate": 4.149799202172653e-06, "loss": 0.3607, "step": 11585 }, { "epoch": 1.4099178582293885, "grad_norm": 1.2922815084457397, "learning_rate": 4.1482172639837966e-06, "loss": 0.3072, "step": 11586 }, { "epoch": 1.4100395497414056, "grad_norm": 2.3864541053771973, "learning_rate": 4.14663554846739e-06, "loss": 0.3599, "step": 11587 }, { "epoch": 1.4101612412534226, "grad_norm": 2.0226659774780273, "learning_rate": 4.145054055683616e-06, "loss": 0.3749, "step": 11588 }, { "epoch": 1.4102829327654396, "grad_norm": 2.0993988513946533, "learning_rate": 4.143472785692652e-06, "loss": 0.3672, "step": 11589 }, { "epoch": 1.4104046242774566, "grad_norm": 1.5659104585647583, "learning_rate": 4.1418917385546766e-06, "loss": 0.3596, "step": 11590 }, { "epoch": 1.4105263157894736, "grad_norm": 2.0968754291534424, "learning_rate": 4.140310914329843e-06, "loss": 0.3578, "step": 11591 }, { "epoch": 1.4106480073014906, "grad_norm": 1.5416018962860107, "learning_rate": 4.138730313078304e-06, "loss": 0.3326, "step": 11592 }, { "epoch": 1.4107696988135077, "grad_norm": 2.7538788318634033, "learning_rate": 4.137149934860213e-06, "loss": 0.3819, "step": 11593 }, { "epoch": 1.4108913903255247, "grad_norm": 1.3816417455673218, "learning_rate": 4.1355697797356985e-06, "loss": 0.3746, "step": 11594 }, { "epoch": 1.4110130818375417, "grad_norm": 1.8974199295043945, "learning_rate": 4.133989847764897e-06, "loss": 0.3768, "step": 11595 }, { "epoch": 1.411134773349559, "grad_norm": 1.9509323835372925, "learning_rate": 4.1324101390079215e-06, "loss": 0.3915, "step": 11596 }, { "epoch": 1.411256464861576, "grad_norm": 3.6561601161956787, "learning_rate": 4.130830653524885e-06, "loss": 0.3351, "step": 11597 }, { "epoch": 1.411378156373593, "grad_norm": 1.790881872177124, "learning_rate": 4.129251391375891e-06, "loss": 0.3948, "step": 11598 }, { "epoch": 1.41149984788561, "grad_norm": 1.3854141235351562, "learning_rate": 4.127672352621035e-06, "loss": 0.3927, "step": 11599 }, { "epoch": 1.411621539397627, "grad_norm": 1.6384949684143066, "learning_rate": 4.126093537320398e-06, "loss": 0.3742, "step": 11600 }, { "epoch": 1.411743230909644, "grad_norm": 2.4517855644226074, "learning_rate": 4.124514945534063e-06, "loss": 0.358, "step": 11601 }, { "epoch": 1.411864922421661, "grad_norm": 1.6706990003585815, "learning_rate": 4.122936577322096e-06, "loss": 0.3935, "step": 11602 }, { "epoch": 1.411986613933678, "grad_norm": 2.1631698608398438, "learning_rate": 4.121358432744558e-06, "loss": 0.3471, "step": 11603 }, { "epoch": 1.412108305445695, "grad_norm": 1.5076167583465576, "learning_rate": 4.119780511861499e-06, "loss": 0.3795, "step": 11604 }, { "epoch": 1.4122299969577121, "grad_norm": 3.6191859245300293, "learning_rate": 4.118202814732959e-06, "loss": 0.3478, "step": 11605 }, { "epoch": 1.4123516884697294, "grad_norm": 1.7758814096450806, "learning_rate": 4.11662534141898e-06, "loss": 0.3883, "step": 11606 }, { "epoch": 1.4124733799817464, "grad_norm": 3.163689374923706, "learning_rate": 4.115048091979584e-06, "loss": 0.3568, "step": 11607 }, { "epoch": 1.4125950714937634, "grad_norm": 2.9635977745056152, "learning_rate": 4.113471066474788e-06, "loss": 0.3138, "step": 11608 }, { "epoch": 1.4127167630057804, "grad_norm": 2.9105312824249268, "learning_rate": 4.111894264964604e-06, "loss": 0.3379, "step": 11609 }, { "epoch": 1.4128384545177974, "grad_norm": 2.5093648433685303, "learning_rate": 4.110317687509033e-06, "loss": 0.333, "step": 11610 }, { "epoch": 1.4129601460298145, "grad_norm": 1.6861945390701294, "learning_rate": 4.108741334168059e-06, "loss": 0.4078, "step": 11611 }, { "epoch": 1.4130818375418315, "grad_norm": 1.7538056373596191, "learning_rate": 4.107165205001676e-06, "loss": 0.3883, "step": 11612 }, { "epoch": 1.4132035290538485, "grad_norm": 1.4961466789245605, "learning_rate": 4.105589300069855e-06, "loss": 0.3697, "step": 11613 }, { "epoch": 1.4133252205658655, "grad_norm": 3.3296072483062744, "learning_rate": 4.104013619432561e-06, "loss": 0.4123, "step": 11614 }, { "epoch": 1.4134469120778825, "grad_norm": 1.4338408708572388, "learning_rate": 4.102438163149753e-06, "loss": 0.3279, "step": 11615 }, { "epoch": 1.4135686035898996, "grad_norm": 4.271087169647217, "learning_rate": 4.100862931281378e-06, "loss": 0.4388, "step": 11616 }, { "epoch": 1.4136902951019166, "grad_norm": 1.2837892770767212, "learning_rate": 4.099287923887381e-06, "loss": 0.3554, "step": 11617 }, { "epoch": 1.4138119866139336, "grad_norm": 2.8733601570129395, "learning_rate": 4.097713141027692e-06, "loss": 0.3854, "step": 11618 }, { "epoch": 1.4139336781259506, "grad_norm": 1.828884482383728, "learning_rate": 4.096138582762232e-06, "loss": 0.3675, "step": 11619 }, { "epoch": 1.4140553696379676, "grad_norm": 3.118661403656006, "learning_rate": 4.094564249150923e-06, "loss": 0.3981, "step": 11620 }, { "epoch": 1.4141770611499849, "grad_norm": 1.6310796737670898, "learning_rate": 4.0929901402536676e-06, "loss": 0.4055, "step": 11621 }, { "epoch": 1.414298752662002, "grad_norm": 2.197155714035034, "learning_rate": 4.091416256130362e-06, "loss": 0.3165, "step": 11622 }, { "epoch": 1.414420444174019, "grad_norm": 2.284440755844116, "learning_rate": 4.0898425968409e-06, "loss": 0.3538, "step": 11623 }, { "epoch": 1.414542135686036, "grad_norm": 1.3533656597137451, "learning_rate": 4.088269162445161e-06, "loss": 0.3745, "step": 11624 }, { "epoch": 1.414663827198053, "grad_norm": 2.233342170715332, "learning_rate": 4.086695953003017e-06, "loss": 0.3001, "step": 11625 }, { "epoch": 1.41478551871007, "grad_norm": 3.6351616382598877, "learning_rate": 4.085122968574332e-06, "loss": 0.4268, "step": 11626 }, { "epoch": 1.414907210222087, "grad_norm": 3.2335991859436035, "learning_rate": 4.0835502092189575e-06, "loss": 0.4177, "step": 11627 }, { "epoch": 1.415028901734104, "grad_norm": 2.717853546142578, "learning_rate": 4.081977674996746e-06, "loss": 0.4144, "step": 11628 }, { "epoch": 1.415150593246121, "grad_norm": 2.9840035438537598, "learning_rate": 4.080405365967536e-06, "loss": 0.3131, "step": 11629 }, { "epoch": 1.415272284758138, "grad_norm": 1.9089453220367432, "learning_rate": 4.07883328219115e-06, "loss": 0.3306, "step": 11630 }, { "epoch": 1.4153939762701553, "grad_norm": 1.6232671737670898, "learning_rate": 4.077261423727418e-06, "loss": 0.371, "step": 11631 }, { "epoch": 1.4155156677821723, "grad_norm": 1.6911869049072266, "learning_rate": 4.075689790636147e-06, "loss": 0.3758, "step": 11632 }, { "epoch": 1.4156373592941893, "grad_norm": 1.5354719161987305, "learning_rate": 4.074118382977139e-06, "loss": 0.3222, "step": 11633 }, { "epoch": 1.4157590508062063, "grad_norm": 3.1932930946350098, "learning_rate": 4.072547200810196e-06, "loss": 0.3415, "step": 11634 }, { "epoch": 1.4158807423182234, "grad_norm": 1.4369210004806519, "learning_rate": 4.070976244195102e-06, "loss": 0.337, "step": 11635 }, { "epoch": 1.4160024338302404, "grad_norm": 1.9137746095657349, "learning_rate": 4.069405513191634e-06, "loss": 0.3774, "step": 11636 }, { "epoch": 1.4161241253422574, "grad_norm": 1.9250438213348389, "learning_rate": 4.067835007859562e-06, "loss": 0.3551, "step": 11637 }, { "epoch": 1.4162458168542744, "grad_norm": 1.6549873352050781, "learning_rate": 4.066264728258642e-06, "loss": 0.3784, "step": 11638 }, { "epoch": 1.4163675083662914, "grad_norm": 1.709864616394043, "learning_rate": 4.064694674448636e-06, "loss": 0.3809, "step": 11639 }, { "epoch": 1.4164891998783085, "grad_norm": 1.252031922340393, "learning_rate": 4.0631248464892825e-06, "loss": 0.3522, "step": 11640 }, { "epoch": 1.4166108913903255, "grad_norm": 1.765067458152771, "learning_rate": 4.061555244440314e-06, "loss": 0.3058, "step": 11641 }, { "epoch": 1.4167325829023425, "grad_norm": 1.901914358139038, "learning_rate": 4.059985868361464e-06, "loss": 0.3765, "step": 11642 }, { "epoch": 1.4168542744143595, "grad_norm": 1.3037997484207153, "learning_rate": 4.058416718312447e-06, "loss": 0.3065, "step": 11643 }, { "epoch": 1.4169759659263765, "grad_norm": 2.255718946456909, "learning_rate": 4.056847794352968e-06, "loss": 0.3515, "step": 11644 }, { "epoch": 1.4170976574383936, "grad_norm": 1.8065667152404785, "learning_rate": 4.055279096542736e-06, "loss": 0.3469, "step": 11645 }, { "epoch": 1.4172193489504106, "grad_norm": 1.3460602760314941, "learning_rate": 4.053710624941436e-06, "loss": 0.3414, "step": 11646 }, { "epoch": 1.4173410404624278, "grad_norm": 1.5945611000061035, "learning_rate": 4.052142379608758e-06, "loss": 0.385, "step": 11647 }, { "epoch": 1.4174627319744448, "grad_norm": 1.628516435623169, "learning_rate": 4.050574360604378e-06, "loss": 0.3145, "step": 11648 }, { "epoch": 1.4175844234864619, "grad_norm": 1.7932103872299194, "learning_rate": 4.049006567987951e-06, "loss": 0.3489, "step": 11649 }, { "epoch": 1.4177061149984789, "grad_norm": 1.348899245262146, "learning_rate": 4.047439001819144e-06, "loss": 0.3242, "step": 11650 }, { "epoch": 1.417827806510496, "grad_norm": 1.6273852586746216, "learning_rate": 4.045871662157605e-06, "loss": 0.3842, "step": 11651 }, { "epoch": 1.417949498022513, "grad_norm": 1.4806421995162964, "learning_rate": 4.044304549062972e-06, "loss": 0.3879, "step": 11652 }, { "epoch": 1.41807118953453, "grad_norm": 1.8799042701721191, "learning_rate": 4.04273766259488e-06, "loss": 0.3662, "step": 11653 }, { "epoch": 1.418192881046547, "grad_norm": 1.725543737411499, "learning_rate": 4.041171002812947e-06, "loss": 0.3752, "step": 11654 }, { "epoch": 1.418314572558564, "grad_norm": 1.7824726104736328, "learning_rate": 4.039604569776796e-06, "loss": 0.3588, "step": 11655 }, { "epoch": 1.4184362640705812, "grad_norm": 1.758988380432129, "learning_rate": 4.038038363546028e-06, "loss": 0.3467, "step": 11656 }, { "epoch": 1.4185579555825982, "grad_norm": 1.4466619491577148, "learning_rate": 4.036472384180238e-06, "loss": 0.3394, "step": 11657 }, { "epoch": 1.4186796470946152, "grad_norm": 1.6314854621887207, "learning_rate": 4.03490663173902e-06, "loss": 0.3508, "step": 11658 }, { "epoch": 1.4188013386066323, "grad_norm": 2.778224468231201, "learning_rate": 4.033341106281956e-06, "loss": 0.3291, "step": 11659 }, { "epoch": 1.4189230301186493, "grad_norm": 1.6974302530288696, "learning_rate": 4.031775807868606e-06, "loss": 0.3611, "step": 11660 }, { "epoch": 1.4190447216306663, "grad_norm": 2.893326759338379, "learning_rate": 4.0302107365585445e-06, "loss": 0.4098, "step": 11661 }, { "epoch": 1.4191664131426833, "grad_norm": 1.8200770616531372, "learning_rate": 4.028645892411316e-06, "loss": 0.3686, "step": 11662 }, { "epoch": 1.4192881046547003, "grad_norm": 2.087089776992798, "learning_rate": 4.027081275486476e-06, "loss": 0.3142, "step": 11663 }, { "epoch": 1.4194097961667174, "grad_norm": 1.401525855064392, "learning_rate": 4.025516885843557e-06, "loss": 0.3441, "step": 11664 }, { "epoch": 1.4195314876787344, "grad_norm": 1.5148062705993652, "learning_rate": 4.023952723542083e-06, "loss": 0.3847, "step": 11665 }, { "epoch": 1.4196531791907514, "grad_norm": 1.7427053451538086, "learning_rate": 4.02238878864158e-06, "loss": 0.3287, "step": 11666 }, { "epoch": 1.4197748707027684, "grad_norm": 1.761740803718567, "learning_rate": 4.020825081201558e-06, "loss": 0.4148, "step": 11667 }, { "epoch": 1.4198965622147854, "grad_norm": 1.9368391036987305, "learning_rate": 4.019261601281514e-06, "loss": 0.2846, "step": 11668 }, { "epoch": 1.4200182537268025, "grad_norm": 1.5413599014282227, "learning_rate": 4.017698348940949e-06, "loss": 0.3407, "step": 11669 }, { "epoch": 1.4201399452388195, "grad_norm": 3.175100803375244, "learning_rate": 4.016135324239344e-06, "loss": 0.4225, "step": 11670 }, { "epoch": 1.4202616367508365, "grad_norm": 2.09101939201355, "learning_rate": 4.014572527236177e-06, "loss": 0.4306, "step": 11671 }, { "epoch": 1.4203833282628537, "grad_norm": 1.820353388786316, "learning_rate": 4.013009957990913e-06, "loss": 0.3679, "step": 11672 }, { "epoch": 1.4205050197748708, "grad_norm": 2.646521806716919, "learning_rate": 4.01144761656301e-06, "loss": 0.2648, "step": 11673 }, { "epoch": 1.4206267112868878, "grad_norm": 2.137427568435669, "learning_rate": 4.0098855030119245e-06, "loss": 0.3471, "step": 11674 }, { "epoch": 1.4207484027989048, "grad_norm": 3.1136484146118164, "learning_rate": 4.008323617397094e-06, "loss": 0.3904, "step": 11675 }, { "epoch": 1.4208700943109218, "grad_norm": 1.9761993885040283, "learning_rate": 4.006761959777948e-06, "loss": 0.3784, "step": 11676 }, { "epoch": 1.4209917858229388, "grad_norm": 2.099623441696167, "learning_rate": 4.005200530213919e-06, "loss": 0.3342, "step": 11677 }, { "epoch": 1.4211134773349559, "grad_norm": 1.583484172821045, "learning_rate": 4.003639328764418e-06, "loss": 0.3922, "step": 11678 }, { "epoch": 1.4212351688469729, "grad_norm": 1.5651918649673462, "learning_rate": 4.002078355488849e-06, "loss": 0.4242, "step": 11679 }, { "epoch": 1.42135686035899, "grad_norm": 3.1957945823669434, "learning_rate": 4.000517610446617e-06, "loss": 0.507, "step": 11680 }, { "epoch": 1.4214785518710071, "grad_norm": 1.4976094961166382, "learning_rate": 3.998957093697109e-06, "loss": 0.3484, "step": 11681 }, { "epoch": 1.4216002433830242, "grad_norm": 1.3932868242263794, "learning_rate": 3.997396805299703e-06, "loss": 0.3209, "step": 11682 }, { "epoch": 1.4217219348950412, "grad_norm": 1.8951544761657715, "learning_rate": 3.9958367453137754e-06, "loss": 0.3818, "step": 11683 }, { "epoch": 1.4218436264070582, "grad_norm": 2.4057655334472656, "learning_rate": 3.994276913798684e-06, "loss": 0.3066, "step": 11684 }, { "epoch": 1.4219653179190752, "grad_norm": 1.3851104974746704, "learning_rate": 3.99271731081379e-06, "loss": 0.3387, "step": 11685 }, { "epoch": 1.4220870094310922, "grad_norm": 1.9990328550338745, "learning_rate": 3.991157936418436e-06, "loss": 0.3664, "step": 11686 }, { "epoch": 1.4222087009431092, "grad_norm": 2.7234108448028564, "learning_rate": 3.989598790671957e-06, "loss": 0.3085, "step": 11687 }, { "epoch": 1.4223303924551263, "grad_norm": 1.5538443326950073, "learning_rate": 3.988039873633689e-06, "loss": 0.3131, "step": 11688 }, { "epoch": 1.4224520839671433, "grad_norm": 1.3935569524765015, "learning_rate": 3.986481185362947e-06, "loss": 0.314, "step": 11689 }, { "epoch": 1.4225737754791603, "grad_norm": 1.9987400770187378, "learning_rate": 3.98492272591904e-06, "loss": 0.3539, "step": 11690 }, { "epoch": 1.4226954669911773, "grad_norm": 1.3666478395462036, "learning_rate": 3.9833644953612785e-06, "loss": 0.3419, "step": 11691 }, { "epoch": 1.4228171585031943, "grad_norm": 4.389426231384277, "learning_rate": 3.981806493748949e-06, "loss": 0.4448, "step": 11692 }, { "epoch": 1.4229388500152114, "grad_norm": 1.5672175884246826, "learning_rate": 3.980248721141339e-06, "loss": 0.3707, "step": 11693 }, { "epoch": 1.4230605415272284, "grad_norm": 1.8768202066421509, "learning_rate": 3.978691177597731e-06, "loss": 0.389, "step": 11694 }, { "epoch": 1.4231822330392454, "grad_norm": 1.4674445390701294, "learning_rate": 3.977133863177381e-06, "loss": 0.38, "step": 11695 }, { "epoch": 1.4233039245512624, "grad_norm": 1.3890225887298584, "learning_rate": 3.975576777939556e-06, "loss": 0.3341, "step": 11696 }, { "epoch": 1.4234256160632797, "grad_norm": 1.265735149383545, "learning_rate": 3.974019921943505e-06, "loss": 0.3712, "step": 11697 }, { "epoch": 1.4235473075752967, "grad_norm": 2.511049509048462, "learning_rate": 3.972463295248465e-06, "loss": 0.3819, "step": 11698 }, { "epoch": 1.4236689990873137, "grad_norm": 3.0046520233154297, "learning_rate": 3.970906897913678e-06, "loss": 0.4123, "step": 11699 }, { "epoch": 1.4237906905993307, "grad_norm": 2.1112117767333984, "learning_rate": 3.969350729998362e-06, "loss": 0.3653, "step": 11700 }, { "epoch": 1.4239123821113477, "grad_norm": 1.5638095140457153, "learning_rate": 3.967794791561731e-06, "loss": 0.4055, "step": 11701 }, { "epoch": 1.4240340736233648, "grad_norm": 2.9030213356018066, "learning_rate": 3.966239082662996e-06, "loss": 0.3683, "step": 11702 }, { "epoch": 1.4241557651353818, "grad_norm": 1.6132267713546753, "learning_rate": 3.964683603361355e-06, "loss": 0.4415, "step": 11703 }, { "epoch": 1.4242774566473988, "grad_norm": 1.4999154806137085, "learning_rate": 3.96312835371599e-06, "loss": 0.3537, "step": 11704 }, { "epoch": 1.4243991481594158, "grad_norm": 2.0627658367156982, "learning_rate": 3.9615733337860915e-06, "loss": 0.4057, "step": 11705 }, { "epoch": 1.4245208396714328, "grad_norm": 2.203972339630127, "learning_rate": 3.9600185436308255e-06, "loss": 0.3672, "step": 11706 }, { "epoch": 1.42464253118345, "grad_norm": 1.9013432264328003, "learning_rate": 3.958463983309357e-06, "loss": 0.3331, "step": 11707 }, { "epoch": 1.424764222695467, "grad_norm": 1.4748945236206055, "learning_rate": 3.956909652880839e-06, "loss": 0.3533, "step": 11708 }, { "epoch": 1.4248859142074841, "grad_norm": 1.6467561721801758, "learning_rate": 3.955355552404414e-06, "loss": 0.3832, "step": 11709 }, { "epoch": 1.4250076057195011, "grad_norm": 3.121297597885132, "learning_rate": 3.953801681939225e-06, "loss": 0.3639, "step": 11710 }, { "epoch": 1.4251292972315182, "grad_norm": 1.8061774969100952, "learning_rate": 3.952248041544397e-06, "loss": 0.3764, "step": 11711 }, { "epoch": 1.4252509887435352, "grad_norm": 2.0275731086730957, "learning_rate": 3.950694631279045e-06, "loss": 0.329, "step": 11712 }, { "epoch": 1.4253726802555522, "grad_norm": 1.9685959815979004, "learning_rate": 3.949141451202288e-06, "loss": 0.3271, "step": 11713 }, { "epoch": 1.4254943717675692, "grad_norm": 2.367690324783325, "learning_rate": 3.9475885013732195e-06, "loss": 0.4217, "step": 11714 }, { "epoch": 1.4256160632795862, "grad_norm": 1.5932812690734863, "learning_rate": 3.9460357818509396e-06, "loss": 0.3827, "step": 11715 }, { "epoch": 1.4257377547916033, "grad_norm": 3.4910826683044434, "learning_rate": 3.944483292694531e-06, "loss": 0.4391, "step": 11716 }, { "epoch": 1.4258594463036203, "grad_norm": 2.5507473945617676, "learning_rate": 3.942931033963066e-06, "loss": 0.4097, "step": 11717 }, { "epoch": 1.4259811378156373, "grad_norm": 1.3516994714736938, "learning_rate": 3.9413790057156124e-06, "loss": 0.3461, "step": 11718 }, { "epoch": 1.4261028293276543, "grad_norm": 1.527092695236206, "learning_rate": 3.939827208011228e-06, "loss": 0.3823, "step": 11719 }, { "epoch": 1.4262245208396713, "grad_norm": 1.394440770149231, "learning_rate": 3.938275640908959e-06, "loss": 0.3598, "step": 11720 }, { "epoch": 1.4263462123516883, "grad_norm": 1.4894014596939087, "learning_rate": 3.936724304467853e-06, "loss": 0.3684, "step": 11721 }, { "epoch": 1.4264679038637056, "grad_norm": 1.5485066175460815, "learning_rate": 3.9351731987469336e-06, "loss": 0.37, "step": 11722 }, { "epoch": 1.4265895953757226, "grad_norm": 1.475352168083191, "learning_rate": 3.933622323805231e-06, "loss": 0.3328, "step": 11723 }, { "epoch": 1.4267112868877396, "grad_norm": 1.4506787061691284, "learning_rate": 3.932071679701755e-06, "loss": 0.3116, "step": 11724 }, { "epoch": 1.4268329783997566, "grad_norm": 1.5375412702560425, "learning_rate": 3.93052126649551e-06, "loss": 0.3718, "step": 11725 }, { "epoch": 1.4269546699117737, "grad_norm": 1.848252534866333, "learning_rate": 3.928971084245495e-06, "loss": 0.3976, "step": 11726 }, { "epoch": 1.4270763614237907, "grad_norm": 4.220135688781738, "learning_rate": 3.927421133010698e-06, "loss": 0.408, "step": 11727 }, { "epoch": 1.4271980529358077, "grad_norm": 1.7481589317321777, "learning_rate": 3.925871412850093e-06, "loss": 0.3421, "step": 11728 }, { "epoch": 1.4273197444478247, "grad_norm": 1.9360086917877197, "learning_rate": 3.92432192382266e-06, "loss": 0.3238, "step": 11729 }, { "epoch": 1.4274414359598417, "grad_norm": 1.5617867708206177, "learning_rate": 3.922772665987347e-06, "loss": 0.3866, "step": 11730 }, { "epoch": 1.4275631274718588, "grad_norm": 1.4919909238815308, "learning_rate": 3.921223639403117e-06, "loss": 0.3566, "step": 11731 }, { "epoch": 1.427684818983876, "grad_norm": 1.6973735094070435, "learning_rate": 3.919674844128909e-06, "loss": 0.3859, "step": 11732 }, { "epoch": 1.427806510495893, "grad_norm": 1.8454428911209106, "learning_rate": 3.918126280223657e-06, "loss": 0.3984, "step": 11733 }, { "epoch": 1.42792820200791, "grad_norm": 1.8268356323242188, "learning_rate": 3.91657794774629e-06, "loss": 0.2983, "step": 11734 }, { "epoch": 1.428049893519927, "grad_norm": 3.7695889472961426, "learning_rate": 3.915029846755726e-06, "loss": 0.4432, "step": 11735 }, { "epoch": 1.428171585031944, "grad_norm": 2.152716875076294, "learning_rate": 3.913481977310867e-06, "loss": 0.3962, "step": 11736 }, { "epoch": 1.428293276543961, "grad_norm": 1.473909616470337, "learning_rate": 3.91193433947062e-06, "loss": 0.3062, "step": 11737 }, { "epoch": 1.4284149680559781, "grad_norm": 2.0247116088867188, "learning_rate": 3.9103869332938745e-06, "loss": 0.4168, "step": 11738 }, { "epoch": 1.4285366595679951, "grad_norm": 1.6107152700424194, "learning_rate": 3.908839758839506e-06, "loss": 0.3707, "step": 11739 }, { "epoch": 1.4286583510800122, "grad_norm": 1.9907371997833252, "learning_rate": 3.907292816166401e-06, "loss": 0.3497, "step": 11740 }, { "epoch": 1.4287800425920292, "grad_norm": 1.8290342092514038, "learning_rate": 3.905746105333409e-06, "loss": 0.3482, "step": 11741 }, { "epoch": 1.4289017341040462, "grad_norm": 3.007143020629883, "learning_rate": 3.904199626399395e-06, "loss": 0.384, "step": 11742 }, { "epoch": 1.4290234256160632, "grad_norm": 1.9857935905456543, "learning_rate": 3.902653379423202e-06, "loss": 0.3575, "step": 11743 }, { "epoch": 1.4291451171280802, "grad_norm": 1.9340925216674805, "learning_rate": 3.901107364463666e-06, "loss": 0.4019, "step": 11744 }, { "epoch": 1.4292668086400973, "grad_norm": 1.9911713600158691, "learning_rate": 3.8995615815796214e-06, "loss": 0.3237, "step": 11745 }, { "epoch": 1.4293885001521143, "grad_norm": 1.326897382736206, "learning_rate": 3.898016030829886e-06, "loss": 0.3059, "step": 11746 }, { "epoch": 1.4295101916641313, "grad_norm": 1.8565852642059326, "learning_rate": 3.896470712273269e-06, "loss": 0.3276, "step": 11747 }, { "epoch": 1.4296318831761485, "grad_norm": 1.446336269378662, "learning_rate": 3.894925625968578e-06, "loss": 0.3664, "step": 11748 }, { "epoch": 1.4297535746881656, "grad_norm": 3.1635358333587646, "learning_rate": 3.893380771974603e-06, "loss": 0.4464, "step": 11749 }, { "epoch": 1.4298752662001826, "grad_norm": 1.8254159688949585, "learning_rate": 3.891836150350127e-06, "loss": 0.4186, "step": 11750 }, { "epoch": 1.4299969577121996, "grad_norm": 1.7408673763275146, "learning_rate": 3.89029176115393e-06, "loss": 0.3052, "step": 11751 }, { "epoch": 1.4301186492242166, "grad_norm": 2.184199571609497, "learning_rate": 3.88874760444478e-06, "loss": 0.4172, "step": 11752 }, { "epoch": 1.4302403407362336, "grad_norm": 1.6625468730926514, "learning_rate": 3.887203680281433e-06, "loss": 0.3711, "step": 11753 }, { "epoch": 1.4303620322482506, "grad_norm": 2.4049620628356934, "learning_rate": 3.8856599887226386e-06, "loss": 0.4471, "step": 11754 }, { "epoch": 1.4304837237602677, "grad_norm": 1.7581387758255005, "learning_rate": 3.884116529827134e-06, "loss": 0.4297, "step": 11755 }, { "epoch": 1.4306054152722847, "grad_norm": 1.495323896408081, "learning_rate": 3.882573303653658e-06, "loss": 0.3354, "step": 11756 }, { "epoch": 1.430727106784302, "grad_norm": 2.040177345275879, "learning_rate": 3.88103031026093e-06, "loss": 0.3901, "step": 11757 }, { "epoch": 1.430848798296319, "grad_norm": 1.8757530450820923, "learning_rate": 3.8794875497076625e-06, "loss": 0.368, "step": 11758 }, { "epoch": 1.430970489808336, "grad_norm": 2.0033326148986816, "learning_rate": 3.877945022052564e-06, "loss": 0.3475, "step": 11759 }, { "epoch": 1.431092181320353, "grad_norm": 1.5022252798080444, "learning_rate": 3.87640272735433e-06, "loss": 0.3681, "step": 11760 }, { "epoch": 1.43121387283237, "grad_norm": 1.929874062538147, "learning_rate": 3.874860665671644e-06, "loss": 0.3533, "step": 11761 }, { "epoch": 1.431335564344387, "grad_norm": 1.5449442863464355, "learning_rate": 3.8733188370631915e-06, "loss": 0.3603, "step": 11762 }, { "epoch": 1.431457255856404, "grad_norm": 2.9099409580230713, "learning_rate": 3.871777241587638e-06, "loss": 0.4045, "step": 11763 }, { "epoch": 1.431578947368421, "grad_norm": 2.839965581893921, "learning_rate": 3.870235879303646e-06, "loss": 0.4013, "step": 11764 }, { "epoch": 1.431700638880438, "grad_norm": 1.417090892791748, "learning_rate": 3.868694750269867e-06, "loss": 0.3344, "step": 11765 }, { "epoch": 1.431822330392455, "grad_norm": 1.6607081890106201, "learning_rate": 3.867153854544939e-06, "loss": 0.3903, "step": 11766 }, { "epoch": 1.4319440219044721, "grad_norm": 1.6627159118652344, "learning_rate": 3.865613192187506e-06, "loss": 0.3605, "step": 11767 }, { "epoch": 1.4320657134164891, "grad_norm": 1.7895677089691162, "learning_rate": 3.864072763256188e-06, "loss": 0.3654, "step": 11768 }, { "epoch": 1.4321874049285062, "grad_norm": 1.3444929122924805, "learning_rate": 3.862532567809599e-06, "loss": 0.357, "step": 11769 }, { "epoch": 1.4323090964405232, "grad_norm": 1.7331032752990723, "learning_rate": 3.860992605906353e-06, "loss": 0.3871, "step": 11770 }, { "epoch": 1.4324307879525402, "grad_norm": 2.553884983062744, "learning_rate": 3.859452877605044e-06, "loss": 0.4075, "step": 11771 }, { "epoch": 1.4325524794645572, "grad_norm": 2.045876979827881, "learning_rate": 3.857913382964261e-06, "loss": 0.3976, "step": 11772 }, { "epoch": 1.4326741709765745, "grad_norm": 1.304618239402771, "learning_rate": 3.856374122042591e-06, "loss": 0.3049, "step": 11773 }, { "epoch": 1.4327958624885915, "grad_norm": 1.9653626680374146, "learning_rate": 3.854835094898598e-06, "loss": 0.4501, "step": 11774 }, { "epoch": 1.4329175540006085, "grad_norm": 1.49382483959198, "learning_rate": 3.853296301590857e-06, "loss": 0.335, "step": 11775 }, { "epoch": 1.4330392455126255, "grad_norm": 1.4614157676696777, "learning_rate": 3.851757742177912e-06, "loss": 0.3579, "step": 11776 }, { "epoch": 1.4331609370246425, "grad_norm": 2.047070264816284, "learning_rate": 3.850219416718306e-06, "loss": 0.3805, "step": 11777 }, { "epoch": 1.4332826285366596, "grad_norm": 1.5494405031204224, "learning_rate": 3.848681325270585e-06, "loss": 0.34, "step": 11778 }, { "epoch": 1.4334043200486766, "grad_norm": 1.595807433128357, "learning_rate": 3.847143467893271e-06, "loss": 0.3889, "step": 11779 }, { "epoch": 1.4335260115606936, "grad_norm": 1.5325510501861572, "learning_rate": 3.845605844644881e-06, "loss": 0.3825, "step": 11780 }, { "epoch": 1.4336477030727106, "grad_norm": 1.9219975471496582, "learning_rate": 3.844068455583932e-06, "loss": 0.3713, "step": 11781 }, { "epoch": 1.4337693945847279, "grad_norm": 2.2359395027160645, "learning_rate": 3.842531300768915e-06, "loss": 0.3794, "step": 11782 }, { "epoch": 1.4338910860967449, "grad_norm": 2.6751513481140137, "learning_rate": 3.840994380258332e-06, "loss": 0.325, "step": 11783 }, { "epoch": 1.434012777608762, "grad_norm": 1.601576328277588, "learning_rate": 3.83945769411066e-06, "loss": 0.3825, "step": 11784 }, { "epoch": 1.434134469120779, "grad_norm": 1.6968153715133667, "learning_rate": 3.837921242384372e-06, "loss": 0.3246, "step": 11785 }, { "epoch": 1.434256160632796, "grad_norm": 2.769641160964966, "learning_rate": 3.836385025137942e-06, "loss": 0.3494, "step": 11786 }, { "epoch": 1.434377852144813, "grad_norm": 1.795230507850647, "learning_rate": 3.834849042429817e-06, "loss": 0.3866, "step": 11787 }, { "epoch": 1.43449954365683, "grad_norm": 2.0857882499694824, "learning_rate": 3.833313294318442e-06, "loss": 0.4158, "step": 11788 }, { "epoch": 1.434621235168847, "grad_norm": 2.680990695953369, "learning_rate": 3.831777780862266e-06, "loss": 0.3682, "step": 11789 }, { "epoch": 1.434742926680864, "grad_norm": 1.160776138305664, "learning_rate": 3.830242502119709e-06, "loss": 0.3289, "step": 11790 }, { "epoch": 1.434864618192881, "grad_norm": 1.482572078704834, "learning_rate": 3.828707458149198e-06, "loss": 0.3864, "step": 11791 }, { "epoch": 1.434986309704898, "grad_norm": 1.968894362449646, "learning_rate": 3.827172649009142e-06, "loss": 0.3856, "step": 11792 }, { "epoch": 1.435108001216915, "grad_norm": 2.602078676223755, "learning_rate": 3.825638074757941e-06, "loss": 0.4144, "step": 11793 }, { "epoch": 1.435229692728932, "grad_norm": 2.042684316635132, "learning_rate": 3.824103735453995e-06, "loss": 0.3899, "step": 11794 }, { "epoch": 1.435351384240949, "grad_norm": 1.7161979675292969, "learning_rate": 3.822569631155686e-06, "loss": 0.3642, "step": 11795 }, { "epoch": 1.4354730757529661, "grad_norm": 1.9018559455871582, "learning_rate": 3.821035761921385e-06, "loss": 0.3232, "step": 11796 }, { "epoch": 1.4355947672649831, "grad_norm": 1.9780354499816895, "learning_rate": 3.8195021278094656e-06, "loss": 0.3364, "step": 11797 }, { "epoch": 1.4357164587770004, "grad_norm": 1.8928637504577637, "learning_rate": 3.817968728878284e-06, "loss": 0.3205, "step": 11798 }, { "epoch": 1.4358381502890174, "grad_norm": 1.4485167264938354, "learning_rate": 3.816435565186189e-06, "loss": 0.359, "step": 11799 }, { "epoch": 1.4359598418010344, "grad_norm": 1.6143624782562256, "learning_rate": 3.814902636791519e-06, "loss": 0.3548, "step": 11800 }, { "epoch": 1.4360815333130514, "grad_norm": 1.4651527404785156, "learning_rate": 3.8133699437526016e-06, "loss": 0.2954, "step": 11801 }, { "epoch": 1.4362032248250685, "grad_norm": 1.8096466064453125, "learning_rate": 3.8118374861277685e-06, "loss": 0.3804, "step": 11802 }, { "epoch": 1.4363249163370855, "grad_norm": 3.1749520301818848, "learning_rate": 3.810305263975328e-06, "loss": 0.4436, "step": 11803 }, { "epoch": 1.4364466078491025, "grad_norm": 1.4107646942138672, "learning_rate": 3.80877327735358e-06, "loss": 0.3028, "step": 11804 }, { "epoch": 1.4365682993611195, "grad_norm": 1.6116647720336914, "learning_rate": 3.8072415263208275e-06, "loss": 0.3476, "step": 11805 }, { "epoch": 1.4366899908731365, "grad_norm": 1.5863817930221558, "learning_rate": 3.805710010935354e-06, "loss": 0.3669, "step": 11806 }, { "epoch": 1.4368116823851536, "grad_norm": 2.238461494445801, "learning_rate": 3.8041787312554312e-06, "loss": 0.3938, "step": 11807 }, { "epoch": 1.4369333738971708, "grad_norm": 1.3938432931900024, "learning_rate": 3.802647687339337e-06, "loss": 0.3412, "step": 11808 }, { "epoch": 1.4370550654091878, "grad_norm": 2.802577495574951, "learning_rate": 3.8011168792453256e-06, "loss": 0.2997, "step": 11809 }, { "epoch": 1.4371767569212048, "grad_norm": 2.1231021881103516, "learning_rate": 3.7995863070316473e-06, "loss": 0.4151, "step": 11810 }, { "epoch": 1.4372984484332219, "grad_norm": 1.5988826751708984, "learning_rate": 3.7980559707565455e-06, "loss": 0.4082, "step": 11811 }, { "epoch": 1.4374201399452389, "grad_norm": 2.132887840270996, "learning_rate": 3.796525870478247e-06, "loss": 0.3783, "step": 11812 }, { "epoch": 1.437541831457256, "grad_norm": 1.3722410202026367, "learning_rate": 3.794996006254984e-06, "loss": 0.3728, "step": 11813 }, { "epoch": 1.437663522969273, "grad_norm": 1.8361307382583618, "learning_rate": 3.7934663781449654e-06, "loss": 0.3767, "step": 11814 }, { "epoch": 1.43778521448129, "grad_norm": 1.7051621675491333, "learning_rate": 3.791936986206395e-06, "loss": 0.3715, "step": 11815 }, { "epoch": 1.437906905993307, "grad_norm": 2.7762765884399414, "learning_rate": 3.7904078304974756e-06, "loss": 0.4432, "step": 11816 }, { "epoch": 1.438028597505324, "grad_norm": 1.4747251272201538, "learning_rate": 3.7888789110763915e-06, "loss": 0.3014, "step": 11817 }, { "epoch": 1.438150289017341, "grad_norm": 1.429440975189209, "learning_rate": 3.7873502280013174e-06, "loss": 0.3243, "step": 11818 }, { "epoch": 1.438271980529358, "grad_norm": 2.280829906463623, "learning_rate": 3.785821781330431e-06, "loss": 0.4309, "step": 11819 }, { "epoch": 1.438393672041375, "grad_norm": 1.8403630256652832, "learning_rate": 3.7842935711218876e-06, "loss": 0.4118, "step": 11820 }, { "epoch": 1.438515363553392, "grad_norm": 2.2532782554626465, "learning_rate": 3.7827655974338397e-06, "loss": 0.3616, "step": 11821 }, { "epoch": 1.438637055065409, "grad_norm": 1.6302249431610107, "learning_rate": 3.7812378603244306e-06, "loss": 0.3878, "step": 11822 }, { "epoch": 1.4387587465774263, "grad_norm": 2.0840559005737305, "learning_rate": 3.779710359851788e-06, "loss": 0.3749, "step": 11823 }, { "epoch": 1.4388804380894433, "grad_norm": 2.1594605445861816, "learning_rate": 3.778183096074046e-06, "loss": 0.3359, "step": 11824 }, { "epoch": 1.4390021296014603, "grad_norm": 3.089811086654663, "learning_rate": 3.776656069049316e-06, "loss": 0.4711, "step": 11825 }, { "epoch": 1.4391238211134774, "grad_norm": 1.4995942115783691, "learning_rate": 3.7751292788357007e-06, "loss": 0.3762, "step": 11826 }, { "epoch": 1.4392455126254944, "grad_norm": 1.6174546480178833, "learning_rate": 3.773602725491303e-06, "loss": 0.3667, "step": 11827 }, { "epoch": 1.4393672041375114, "grad_norm": 2.0613558292388916, "learning_rate": 3.7720764090742112e-06, "loss": 0.3034, "step": 11828 }, { "epoch": 1.4394888956495284, "grad_norm": 3.3389575481414795, "learning_rate": 3.7705503296424996e-06, "loss": 0.4531, "step": 11829 }, { "epoch": 1.4396105871615454, "grad_norm": 1.471073865890503, "learning_rate": 3.769024487254245e-06, "loss": 0.4015, "step": 11830 }, { "epoch": 1.4397322786735625, "grad_norm": 2.452979803085327, "learning_rate": 3.7674988819675064e-06, "loss": 0.4444, "step": 11831 }, { "epoch": 1.4398539701855795, "grad_norm": 1.6104811429977417, "learning_rate": 3.7659735138403354e-06, "loss": 0.3864, "step": 11832 }, { "epoch": 1.4399756616975967, "grad_norm": 1.776667833328247, "learning_rate": 3.7644483829307764e-06, "loss": 0.3966, "step": 11833 }, { "epoch": 1.4400973532096137, "grad_norm": 2.803340435028076, "learning_rate": 3.76292348929686e-06, "loss": 0.4458, "step": 11834 }, { "epoch": 1.4402190447216308, "grad_norm": 1.9894449710845947, "learning_rate": 3.761398832996618e-06, "loss": 0.334, "step": 11835 }, { "epoch": 1.4403407362336478, "grad_norm": 1.810025691986084, "learning_rate": 3.759874414088064e-06, "loss": 0.4043, "step": 11836 }, { "epoch": 1.4404624277456648, "grad_norm": 1.4075738191604614, "learning_rate": 3.758350232629201e-06, "loss": 0.4105, "step": 11837 }, { "epoch": 1.4405841192576818, "grad_norm": 1.4784384965896606, "learning_rate": 3.756826288678035e-06, "loss": 0.4071, "step": 11838 }, { "epoch": 1.4407058107696988, "grad_norm": 2.2317841053009033, "learning_rate": 3.7553025822925505e-06, "loss": 0.4204, "step": 11839 }, { "epoch": 1.4408275022817159, "grad_norm": 1.469972848892212, "learning_rate": 3.7537791135307258e-06, "loss": 0.324, "step": 11840 }, { "epoch": 1.4409491937937329, "grad_norm": 1.5468652248382568, "learning_rate": 3.7522558824505384e-06, "loss": 0.3675, "step": 11841 }, { "epoch": 1.44107088530575, "grad_norm": 1.3524500131607056, "learning_rate": 3.7507328891099425e-06, "loss": 0.3084, "step": 11842 }, { "epoch": 1.441192576817767, "grad_norm": 3.782473087310791, "learning_rate": 3.7492101335668995e-06, "loss": 0.3221, "step": 11843 }, { "epoch": 1.441314268329784, "grad_norm": 2.458317518234253, "learning_rate": 3.747687615879353e-06, "loss": 0.2772, "step": 11844 }, { "epoch": 1.441435959841801, "grad_norm": 1.239877700805664, "learning_rate": 3.7461653361052276e-06, "loss": 0.3302, "step": 11845 }, { "epoch": 1.441557651353818, "grad_norm": 3.6148478984832764, "learning_rate": 3.744643294302459e-06, "loss": 0.4576, "step": 11846 }, { "epoch": 1.441679342865835, "grad_norm": 3.3129348754882812, "learning_rate": 3.7431214905289602e-06, "loss": 0.4444, "step": 11847 }, { "epoch": 1.441801034377852, "grad_norm": 1.6403416395187378, "learning_rate": 3.7415999248426373e-06, "loss": 0.3334, "step": 11848 }, { "epoch": 1.4419227258898693, "grad_norm": 1.4138256311416626, "learning_rate": 3.7400785973013944e-06, "loss": 0.336, "step": 11849 }, { "epoch": 1.4420444174018863, "grad_norm": 4.209815502166748, "learning_rate": 3.738557507963115e-06, "loss": 0.4022, "step": 11850 }, { "epoch": 1.4421661089139033, "grad_norm": 2.5665271282196045, "learning_rate": 3.7370366568856864e-06, "loss": 0.3204, "step": 11851 }, { "epoch": 1.4422878004259203, "grad_norm": 2.005777597427368, "learning_rate": 3.735516044126977e-06, "loss": 0.3902, "step": 11852 }, { "epoch": 1.4424094919379373, "grad_norm": 1.5276718139648438, "learning_rate": 3.7339956697448444e-06, "loss": 0.3977, "step": 11853 }, { "epoch": 1.4425311834499543, "grad_norm": 1.3939039707183838, "learning_rate": 3.73247553379715e-06, "loss": 0.4163, "step": 11854 }, { "epoch": 1.4426528749619714, "grad_norm": 3.7330803871154785, "learning_rate": 3.7309556363417388e-06, "loss": 0.4434, "step": 11855 }, { "epoch": 1.4427745664739884, "grad_norm": 2.1777427196502686, "learning_rate": 3.7294359774364343e-06, "loss": 0.3846, "step": 11856 }, { "epoch": 1.4428962579860054, "grad_norm": 2.101963758468628, "learning_rate": 3.727916557139074e-06, "loss": 0.3819, "step": 11857 }, { "epoch": 1.4430179494980226, "grad_norm": 1.8848042488098145, "learning_rate": 3.726397375507468e-06, "loss": 0.3851, "step": 11858 }, { "epoch": 1.4431396410100397, "grad_norm": 1.75052011013031, "learning_rate": 3.72487843259943e-06, "loss": 0.3621, "step": 11859 }, { "epoch": 1.4432613325220567, "grad_norm": 2.2883315086364746, "learning_rate": 3.7233597284727573e-06, "loss": 0.3541, "step": 11860 }, { "epoch": 1.4433830240340737, "grad_norm": 1.6344425678253174, "learning_rate": 3.721841263185235e-06, "loss": 0.3794, "step": 11861 }, { "epoch": 1.4435047155460907, "grad_norm": 1.5160001516342163, "learning_rate": 3.720323036794652e-06, "loss": 0.3713, "step": 11862 }, { "epoch": 1.4436264070581077, "grad_norm": 1.483747124671936, "learning_rate": 3.7188050493587746e-06, "loss": 0.3758, "step": 11863 }, { "epoch": 1.4437480985701248, "grad_norm": 1.8093531131744385, "learning_rate": 3.7172873009353637e-06, "loss": 0.3687, "step": 11864 }, { "epoch": 1.4438697900821418, "grad_norm": 2.2343521118164062, "learning_rate": 3.7157697915821787e-06, "loss": 0.3168, "step": 11865 }, { "epoch": 1.4439914815941588, "grad_norm": 3.938396453857422, "learning_rate": 3.7142525213569615e-06, "loss": 0.3098, "step": 11866 }, { "epoch": 1.4441131731061758, "grad_norm": 1.385934591293335, "learning_rate": 3.712735490317445e-06, "loss": 0.3536, "step": 11867 }, { "epoch": 1.4442348646181928, "grad_norm": 2.027087688446045, "learning_rate": 3.711218698521358e-06, "loss": 0.3843, "step": 11868 }, { "epoch": 1.4443565561302099, "grad_norm": 2.3759968280792236, "learning_rate": 3.709702146026413e-06, "loss": 0.3281, "step": 11869 }, { "epoch": 1.4444782476422269, "grad_norm": 1.6625268459320068, "learning_rate": 3.7081858328903254e-06, "loss": 0.3773, "step": 11870 }, { "epoch": 1.444599939154244, "grad_norm": 3.1017301082611084, "learning_rate": 3.70666975917079e-06, "loss": 0.309, "step": 11871 }, { "epoch": 1.444721630666261, "grad_norm": 1.438114047050476, "learning_rate": 3.705153924925493e-06, "loss": 0.3391, "step": 11872 }, { "epoch": 1.444843322178278, "grad_norm": 1.330441951751709, "learning_rate": 3.703638330212123e-06, "loss": 0.3506, "step": 11873 }, { "epoch": 1.4449650136902952, "grad_norm": 1.717523217201233, "learning_rate": 3.7021229750883456e-06, "loss": 0.3191, "step": 11874 }, { "epoch": 1.4450867052023122, "grad_norm": 3.1649675369262695, "learning_rate": 3.700607859611822e-06, "loss": 0.4068, "step": 11875 }, { "epoch": 1.4452083967143292, "grad_norm": 2.1028671264648438, "learning_rate": 3.6990929838402134e-06, "loss": 0.3753, "step": 11876 }, { "epoch": 1.4453300882263462, "grad_norm": 1.4174188375473022, "learning_rate": 3.6975783478311577e-06, "loss": 0.3268, "step": 11877 }, { "epoch": 1.4454517797383633, "grad_norm": 1.5427979230880737, "learning_rate": 3.6960639516422914e-06, "loss": 0.3437, "step": 11878 }, { "epoch": 1.4455734712503803, "grad_norm": 4.0022969245910645, "learning_rate": 3.6945497953312393e-06, "loss": 0.4219, "step": 11879 }, { "epoch": 1.4456951627623973, "grad_norm": 4.140995025634766, "learning_rate": 3.6930358789556166e-06, "loss": 0.4492, "step": 11880 }, { "epoch": 1.4458168542744143, "grad_norm": 1.6450996398925781, "learning_rate": 3.691522202573037e-06, "loss": 0.3716, "step": 11881 }, { "epoch": 1.4459385457864313, "grad_norm": 2.075960874557495, "learning_rate": 3.6900087662410944e-06, "loss": 0.378, "step": 11882 }, { "epoch": 1.4460602372984486, "grad_norm": 1.8620332479476929, "learning_rate": 3.688495570017376e-06, "loss": 0.3823, "step": 11883 }, { "epoch": 1.4461819288104656, "grad_norm": 1.837510585784912, "learning_rate": 3.68698261395947e-06, "loss": 0.3609, "step": 11884 }, { "epoch": 1.4463036203224826, "grad_norm": 1.5520201921463013, "learning_rate": 3.6854698981249416e-06, "loss": 0.3907, "step": 11885 }, { "epoch": 1.4464253118344996, "grad_norm": 1.6463804244995117, "learning_rate": 3.6839574225713505e-06, "loss": 0.3894, "step": 11886 }, { "epoch": 1.4465470033465166, "grad_norm": 2.3525595664978027, "learning_rate": 3.682445187356257e-06, "loss": 0.4395, "step": 11887 }, { "epoch": 1.4466686948585337, "grad_norm": 2.8774092197418213, "learning_rate": 3.6809331925372005e-06, "loss": 0.3538, "step": 11888 }, { "epoch": 1.4467903863705507, "grad_norm": 1.9676817655563354, "learning_rate": 3.679421438171712e-06, "loss": 0.366, "step": 11889 }, { "epoch": 1.4469120778825677, "grad_norm": 1.7366608381271362, "learning_rate": 3.6779099243173277e-06, "loss": 0.3624, "step": 11890 }, { "epoch": 1.4470337693945847, "grad_norm": 1.9219353199005127, "learning_rate": 3.6763986510315506e-06, "loss": 0.371, "step": 11891 }, { "epoch": 1.4471554609066017, "grad_norm": 3.129209041595459, "learning_rate": 3.674887618371896e-06, "loss": 0.318, "step": 11892 }, { "epoch": 1.4472771524186188, "grad_norm": 2.042206048965454, "learning_rate": 3.6733768263958615e-06, "loss": 0.4412, "step": 11893 }, { "epoch": 1.4473988439306358, "grad_norm": 2.022676706314087, "learning_rate": 3.671866275160929e-06, "loss": 0.3759, "step": 11894 }, { "epoch": 1.4475205354426528, "grad_norm": 2.3494081497192383, "learning_rate": 3.670355964724588e-06, "loss": 0.3779, "step": 11895 }, { "epoch": 1.4476422269546698, "grad_norm": 2.8019931316375732, "learning_rate": 3.668845895144304e-06, "loss": 0.3019, "step": 11896 }, { "epoch": 1.4477639184666868, "grad_norm": 2.2933526039123535, "learning_rate": 3.6673360664775337e-06, "loss": 0.4281, "step": 11897 }, { "epoch": 1.4478856099787039, "grad_norm": 1.5204875469207764, "learning_rate": 3.665826478781739e-06, "loss": 0.3532, "step": 11898 }, { "epoch": 1.448007301490721, "grad_norm": 1.6592719554901123, "learning_rate": 3.6643171321143568e-06, "loss": 0.383, "step": 11899 }, { "epoch": 1.4481289930027381, "grad_norm": 1.5485148429870605, "learning_rate": 3.6628080265328193e-06, "loss": 0.4048, "step": 11900 }, { "epoch": 1.4482506845147551, "grad_norm": 1.8762445449829102, "learning_rate": 3.661299162094559e-06, "loss": 0.4269, "step": 11901 }, { "epoch": 1.4483723760267722, "grad_norm": 1.6729037761688232, "learning_rate": 3.659790538856981e-06, "loss": 0.3873, "step": 11902 }, { "epoch": 1.4484940675387892, "grad_norm": 1.9526728391647339, "learning_rate": 3.6582821568774997e-06, "loss": 0.3149, "step": 11903 }, { "epoch": 1.4486157590508062, "grad_norm": 2.0222623348236084, "learning_rate": 3.656774016213508e-06, "loss": 0.3841, "step": 11904 }, { "epoch": 1.4487374505628232, "grad_norm": 2.0432982444763184, "learning_rate": 3.6552661169223912e-06, "loss": 0.3788, "step": 11905 }, { "epoch": 1.4488591420748402, "grad_norm": 1.6651802062988281, "learning_rate": 3.653758459061536e-06, "loss": 0.3382, "step": 11906 }, { "epoch": 1.4489808335868573, "grad_norm": 1.8043780326843262, "learning_rate": 3.6522510426883064e-06, "loss": 0.4021, "step": 11907 }, { "epoch": 1.4491025250988743, "grad_norm": 1.5165379047393799, "learning_rate": 3.6507438678600604e-06, "loss": 0.3457, "step": 11908 }, { "epoch": 1.4492242166108915, "grad_norm": 1.8150503635406494, "learning_rate": 3.649236934634156e-06, "loss": 0.4514, "step": 11909 }, { "epoch": 1.4493459081229085, "grad_norm": 3.865121841430664, "learning_rate": 3.6477302430679294e-06, "loss": 0.4431, "step": 11910 }, { "epoch": 1.4494675996349256, "grad_norm": 3.333472967147827, "learning_rate": 3.646223793218717e-06, "loss": 0.4465, "step": 11911 }, { "epoch": 1.4495892911469426, "grad_norm": 2.021068572998047, "learning_rate": 3.644717585143842e-06, "loss": 0.3345, "step": 11912 }, { "epoch": 1.4497109826589596, "grad_norm": 2.026867628097534, "learning_rate": 3.6432116189006185e-06, "loss": 0.3145, "step": 11913 }, { "epoch": 1.4498326741709766, "grad_norm": 1.637890100479126, "learning_rate": 3.641705894546349e-06, "loss": 0.3591, "step": 11914 }, { "epoch": 1.4499543656829936, "grad_norm": 1.6168177127838135, "learning_rate": 3.6402004121383316e-06, "loss": 0.3411, "step": 11915 }, { "epoch": 1.4500760571950106, "grad_norm": 1.5202151536941528, "learning_rate": 3.6386951717338494e-06, "loss": 0.3333, "step": 11916 }, { "epoch": 1.4501977487070277, "grad_norm": 2.092015266418457, "learning_rate": 3.637190173390187e-06, "loss": 0.3933, "step": 11917 }, { "epoch": 1.4503194402190447, "grad_norm": 1.5029441118240356, "learning_rate": 3.635685417164605e-06, "loss": 0.3518, "step": 11918 }, { "epoch": 1.4504411317310617, "grad_norm": 1.5704599618911743, "learning_rate": 3.6341809031143695e-06, "loss": 0.3498, "step": 11919 }, { "epoch": 1.4505628232430787, "grad_norm": 1.5989277362823486, "learning_rate": 3.6326766312967267e-06, "loss": 0.3436, "step": 11920 }, { "epoch": 1.4506845147550957, "grad_norm": 1.2575188875198364, "learning_rate": 3.6311726017689154e-06, "loss": 0.3572, "step": 11921 }, { "epoch": 1.4508062062671128, "grad_norm": 3.50934100151062, "learning_rate": 3.629668814588172e-06, "loss": 0.4281, "step": 11922 }, { "epoch": 1.4509278977791298, "grad_norm": 1.7198940515518188, "learning_rate": 3.6281652698117163e-06, "loss": 0.4014, "step": 11923 }, { "epoch": 1.451049589291147, "grad_norm": 1.6251307725906372, "learning_rate": 3.626661967496761e-06, "loss": 0.3663, "step": 11924 }, { "epoch": 1.451171280803164, "grad_norm": 1.9700456857681274, "learning_rate": 3.62515890770051e-06, "loss": 0.4203, "step": 11925 }, { "epoch": 1.451292972315181, "grad_norm": 2.024721622467041, "learning_rate": 3.6236560904801545e-06, "loss": 0.3733, "step": 11926 }, { "epoch": 1.451414663827198, "grad_norm": 2.203974723815918, "learning_rate": 3.6221535158928857e-06, "loss": 0.4132, "step": 11927 }, { "epoch": 1.451536355339215, "grad_norm": 1.67996084690094, "learning_rate": 3.620651183995878e-06, "loss": 0.3888, "step": 11928 }, { "epoch": 1.4516580468512321, "grad_norm": 3.2756850719451904, "learning_rate": 3.6191490948462937e-06, "loss": 0.3353, "step": 11929 }, { "epoch": 1.4517797383632491, "grad_norm": 1.6566343307495117, "learning_rate": 3.617647248501297e-06, "loss": 0.3667, "step": 11930 }, { "epoch": 1.4519014298752662, "grad_norm": 1.4988598823547363, "learning_rate": 3.616145645018033e-06, "loss": 0.365, "step": 11931 }, { "epoch": 1.4520231213872832, "grad_norm": 1.937064290046692, "learning_rate": 3.614644284453639e-06, "loss": 0.391, "step": 11932 }, { "epoch": 1.4521448128993002, "grad_norm": 1.973625659942627, "learning_rate": 3.6131431668652482e-06, "loss": 0.374, "step": 11933 }, { "epoch": 1.4522665044113174, "grad_norm": 1.3987855911254883, "learning_rate": 3.6116422923099814e-06, "loss": 0.3773, "step": 11934 }, { "epoch": 1.4523881959233345, "grad_norm": 2.3750061988830566, "learning_rate": 3.6101416608449457e-06, "loss": 0.4469, "step": 11935 }, { "epoch": 1.4525098874353515, "grad_norm": 1.690173864364624, "learning_rate": 3.608641272527251e-06, "loss": 0.4323, "step": 11936 }, { "epoch": 1.4526315789473685, "grad_norm": 3.122556447982788, "learning_rate": 3.60714112741398e-06, "loss": 0.3131, "step": 11937 }, { "epoch": 1.4527532704593855, "grad_norm": 1.7985584735870361, "learning_rate": 3.605641225562225e-06, "loss": 0.3807, "step": 11938 }, { "epoch": 1.4528749619714025, "grad_norm": 2.649031639099121, "learning_rate": 3.604141567029057e-06, "loss": 0.3515, "step": 11939 }, { "epoch": 1.4529966534834196, "grad_norm": 3.458806037902832, "learning_rate": 3.602642151871537e-06, "loss": 0.4738, "step": 11940 }, { "epoch": 1.4531183449954366, "grad_norm": 1.8226171731948853, "learning_rate": 3.601142980146728e-06, "loss": 0.344, "step": 11941 }, { "epoch": 1.4532400365074536, "grad_norm": 1.8936265707015991, "learning_rate": 3.599644051911674e-06, "loss": 0.3878, "step": 11942 }, { "epoch": 1.4533617280194706, "grad_norm": 1.7052464485168457, "learning_rate": 3.598145367223409e-06, "loss": 0.3156, "step": 11943 }, { "epoch": 1.4534834195314876, "grad_norm": 1.5494877099990845, "learning_rate": 3.596646926138967e-06, "loss": 0.3175, "step": 11944 }, { "epoch": 1.4536051110435046, "grad_norm": 1.7864489555358887, "learning_rate": 3.595148728715363e-06, "loss": 0.3378, "step": 11945 }, { "epoch": 1.4537268025555217, "grad_norm": 1.6891885995864868, "learning_rate": 3.593650775009604e-06, "loss": 0.3571, "step": 11946 }, { "epoch": 1.4538484940675387, "grad_norm": 1.2989020347595215, "learning_rate": 3.5921530650787008e-06, "loss": 0.332, "step": 11947 }, { "epoch": 1.4539701855795557, "grad_norm": 2.4695024490356445, "learning_rate": 3.59065559897963e-06, "loss": 0.4224, "step": 11948 }, { "epoch": 1.4540918770915727, "grad_norm": 2.111696243286133, "learning_rate": 3.5891583767693825e-06, "loss": 0.4453, "step": 11949 }, { "epoch": 1.45421356860359, "grad_norm": 1.526094913482666, "learning_rate": 3.5876613985049302e-06, "loss": 0.3938, "step": 11950 }, { "epoch": 1.454335260115607, "grad_norm": 1.7118867635726929, "learning_rate": 3.5861646642432304e-06, "loss": 0.413, "step": 11951 }, { "epoch": 1.454456951627624, "grad_norm": 1.8664582967758179, "learning_rate": 3.5846681740412447e-06, "loss": 0.4039, "step": 11952 }, { "epoch": 1.454578643139641, "grad_norm": 1.5890673398971558, "learning_rate": 3.583171927955913e-06, "loss": 0.3706, "step": 11953 }, { "epoch": 1.454700334651658, "grad_norm": 1.3090864419937134, "learning_rate": 3.581675926044169e-06, "loss": 0.3452, "step": 11954 }, { "epoch": 1.454822026163675, "grad_norm": 2.5138299465179443, "learning_rate": 3.5801801683629444e-06, "loss": 0.4256, "step": 11955 }, { "epoch": 1.454943717675692, "grad_norm": 1.707141637802124, "learning_rate": 3.5786846549691532e-06, "loss": 0.3863, "step": 11956 }, { "epoch": 1.455065409187709, "grad_norm": 1.729933261871338, "learning_rate": 3.5771893859196984e-06, "loss": 0.4005, "step": 11957 }, { "epoch": 1.4551871006997261, "grad_norm": 2.7479751110076904, "learning_rate": 3.5756943612714856e-06, "loss": 0.33, "step": 11958 }, { "epoch": 1.4553087922117434, "grad_norm": 1.5847742557525635, "learning_rate": 3.574199581081399e-06, "loss": 0.3682, "step": 11959 }, { "epoch": 1.4554304837237604, "grad_norm": 2.085653066635132, "learning_rate": 3.5727050454063205e-06, "loss": 0.3414, "step": 11960 }, { "epoch": 1.4555521752357774, "grad_norm": 2.3502464294433594, "learning_rate": 3.571210754303117e-06, "loss": 0.3308, "step": 11961 }, { "epoch": 1.4556738667477944, "grad_norm": 1.8462797403335571, "learning_rate": 3.56971670782865e-06, "loss": 0.4166, "step": 11962 }, { "epoch": 1.4557955582598114, "grad_norm": 1.438338279724121, "learning_rate": 3.5682229060397744e-06, "loss": 0.3399, "step": 11963 }, { "epoch": 1.4559172497718285, "grad_norm": 2.339810371398926, "learning_rate": 3.5667293489933298e-06, "loss": 0.3943, "step": 11964 }, { "epoch": 1.4560389412838455, "grad_norm": 1.5432193279266357, "learning_rate": 3.565236036746147e-06, "loss": 0.3556, "step": 11965 }, { "epoch": 1.4561606327958625, "grad_norm": 2.055554151535034, "learning_rate": 3.5637429693550552e-06, "loss": 0.4105, "step": 11966 }, { "epoch": 1.4562823243078795, "grad_norm": 1.8492835760116577, "learning_rate": 3.5622501468768654e-06, "loss": 0.3668, "step": 11967 }, { "epoch": 1.4564040158198965, "grad_norm": 2.7835192680358887, "learning_rate": 3.5607575693683806e-06, "loss": 0.452, "step": 11968 }, { "epoch": 1.4565257073319136, "grad_norm": 1.5163910388946533, "learning_rate": 3.559265236886401e-06, "loss": 0.352, "step": 11969 }, { "epoch": 1.4566473988439306, "grad_norm": 1.883076548576355, "learning_rate": 3.557773149487711e-06, "loss": 0.4034, "step": 11970 }, { "epoch": 1.4567690903559476, "grad_norm": 1.7124474048614502, "learning_rate": 3.556281307229088e-06, "loss": 0.3203, "step": 11971 }, { "epoch": 1.4568907818679646, "grad_norm": 1.624720573425293, "learning_rate": 3.554789710167299e-06, "loss": 0.3725, "step": 11972 }, { "epoch": 1.4570124733799816, "grad_norm": 1.664584994316101, "learning_rate": 3.5532983583590985e-06, "loss": 0.3516, "step": 11973 }, { "epoch": 1.4571341648919987, "grad_norm": 1.4626065492630005, "learning_rate": 3.5518072518612434e-06, "loss": 0.4068, "step": 11974 }, { "epoch": 1.457255856404016, "grad_norm": 3.118086576461792, "learning_rate": 3.550316390730468e-06, "loss": 0.4359, "step": 11975 }, { "epoch": 1.457377547916033, "grad_norm": 1.965084195137024, "learning_rate": 3.5488257750235024e-06, "loss": 0.4313, "step": 11976 }, { "epoch": 1.45749923942805, "grad_norm": 1.603909969329834, "learning_rate": 3.547335404797072e-06, "loss": 0.3767, "step": 11977 }, { "epoch": 1.457620930940067, "grad_norm": 1.9170496463775635, "learning_rate": 3.5458452801078814e-06, "loss": 0.4499, "step": 11978 }, { "epoch": 1.457742622452084, "grad_norm": 1.6861101388931274, "learning_rate": 3.5443554010126413e-06, "loss": 0.4329, "step": 11979 }, { "epoch": 1.457864313964101, "grad_norm": 1.5012520551681519, "learning_rate": 3.542865767568041e-06, "loss": 0.3644, "step": 11980 }, { "epoch": 1.457986005476118, "grad_norm": 2.4398927688598633, "learning_rate": 3.54137637983076e-06, "loss": 0.3288, "step": 11981 }, { "epoch": 1.458107696988135, "grad_norm": 1.597055196762085, "learning_rate": 3.539887237857483e-06, "loss": 0.3522, "step": 11982 }, { "epoch": 1.458229388500152, "grad_norm": 2.284586191177368, "learning_rate": 3.538398341704866e-06, "loss": 0.3993, "step": 11983 }, { "epoch": 1.4583510800121693, "grad_norm": 2.395482063293457, "learning_rate": 3.536909691429563e-06, "loss": 0.3262, "step": 11984 }, { "epoch": 1.4584727715241863, "grad_norm": 2.9379451274871826, "learning_rate": 3.5354212870882264e-06, "loss": 0.3193, "step": 11985 }, { "epoch": 1.4585944630362033, "grad_norm": 2.296464681625366, "learning_rate": 3.5339331287374888e-06, "loss": 0.384, "step": 11986 }, { "epoch": 1.4587161545482203, "grad_norm": 2.4101970195770264, "learning_rate": 3.532445216433982e-06, "loss": 0.3865, "step": 11987 }, { "epoch": 1.4588378460602374, "grad_norm": 1.3474007844924927, "learning_rate": 3.530957550234323e-06, "loss": 0.3637, "step": 11988 }, { "epoch": 1.4589595375722544, "grad_norm": 1.4934849739074707, "learning_rate": 3.529470130195115e-06, "loss": 0.3237, "step": 11989 }, { "epoch": 1.4590812290842714, "grad_norm": 2.1972146034240723, "learning_rate": 3.527982956372965e-06, "loss": 0.3321, "step": 11990 }, { "epoch": 1.4592029205962884, "grad_norm": 1.6108638048171997, "learning_rate": 3.526496028824461e-06, "loss": 0.3198, "step": 11991 }, { "epoch": 1.4593246121083054, "grad_norm": 2.217272996902466, "learning_rate": 3.5250093476061777e-06, "loss": 0.3584, "step": 11992 }, { "epoch": 1.4594463036203225, "grad_norm": 3.880554676055908, "learning_rate": 3.5235229127746962e-06, "loss": 0.4702, "step": 11993 }, { "epoch": 1.4595679951323395, "grad_norm": 2.569859027862549, "learning_rate": 3.5220367243865716e-06, "loss": 0.3564, "step": 11994 }, { "epoch": 1.4596896866443565, "grad_norm": 1.963043451309204, "learning_rate": 3.5205507824983597e-06, "loss": 0.3921, "step": 11995 }, { "epoch": 1.4598113781563735, "grad_norm": 1.3961774110794067, "learning_rate": 3.519065087166602e-06, "loss": 0.3369, "step": 11996 }, { "epoch": 1.4599330696683905, "grad_norm": 3.0910370349884033, "learning_rate": 3.517579638447829e-06, "loss": 0.2967, "step": 11997 }, { "epoch": 1.4600547611804076, "grad_norm": 1.6313807964324951, "learning_rate": 3.5160944363985716e-06, "loss": 0.3744, "step": 11998 }, { "epoch": 1.4601764526924246, "grad_norm": 2.917386770248413, "learning_rate": 3.514609481075343e-06, "loss": 0.3837, "step": 11999 }, { "epoch": 1.4602981442044418, "grad_norm": 2.9949278831481934, "learning_rate": 3.513124772534643e-06, "loss": 0.3854, "step": 12000 }, { "epoch": 1.4604198357164588, "grad_norm": 2.0964198112487793, "learning_rate": 3.5116403108329767e-06, "loss": 0.3759, "step": 12001 }, { "epoch": 1.4605415272284759, "grad_norm": 1.702501654624939, "learning_rate": 3.5101560960268267e-06, "loss": 0.3552, "step": 12002 }, { "epoch": 1.4606632187404929, "grad_norm": 1.7093279361724854, "learning_rate": 3.5086721281726665e-06, "loss": 0.3581, "step": 12003 }, { "epoch": 1.46078491025251, "grad_norm": 2.492913246154785, "learning_rate": 3.5071884073269713e-06, "loss": 0.3737, "step": 12004 }, { "epoch": 1.460906601764527, "grad_norm": 2.7656280994415283, "learning_rate": 3.505704933546198e-06, "loss": 0.3497, "step": 12005 }, { "epoch": 1.461028293276544, "grad_norm": 1.679345726966858, "learning_rate": 3.5042217068867935e-06, "loss": 0.368, "step": 12006 }, { "epoch": 1.461149984788561, "grad_norm": 1.9937193393707275, "learning_rate": 3.5027387274051985e-06, "loss": 0.3724, "step": 12007 }, { "epoch": 1.461271676300578, "grad_norm": 2.082627296447754, "learning_rate": 3.5012559951578395e-06, "loss": 0.3904, "step": 12008 }, { "epoch": 1.461393367812595, "grad_norm": 1.383838415145874, "learning_rate": 3.4997735102011464e-06, "loss": 0.3145, "step": 12009 }, { "epoch": 1.4615150593246122, "grad_norm": 2.8954319953918457, "learning_rate": 3.498291272591525e-06, "loss": 0.467, "step": 12010 }, { "epoch": 1.4616367508366293, "grad_norm": 1.8391460180282593, "learning_rate": 3.496809282385375e-06, "loss": 0.3086, "step": 12011 }, { "epoch": 1.4617584423486463, "grad_norm": 2.2016239166259766, "learning_rate": 3.4953275396390972e-06, "loss": 0.3987, "step": 12012 }, { "epoch": 1.4618801338606633, "grad_norm": 4.661727428436279, "learning_rate": 3.4938460444090695e-06, "loss": 0.4281, "step": 12013 }, { "epoch": 1.4620018253726803, "grad_norm": 1.52132248878479, "learning_rate": 3.492364796751664e-06, "loss": 0.3512, "step": 12014 }, { "epoch": 1.4621235168846973, "grad_norm": 1.402281641960144, "learning_rate": 3.49088379672325e-06, "loss": 0.3362, "step": 12015 }, { "epoch": 1.4622452083967143, "grad_norm": 2.3437771797180176, "learning_rate": 3.489403044380182e-06, "loss": 0.3978, "step": 12016 }, { "epoch": 1.4623668999087314, "grad_norm": 1.3682541847229004, "learning_rate": 3.4879225397788042e-06, "loss": 0.3377, "step": 12017 }, { "epoch": 1.4624885914207484, "grad_norm": 1.7647216320037842, "learning_rate": 3.486442282975453e-06, "loss": 0.4121, "step": 12018 }, { "epoch": 1.4626102829327654, "grad_norm": 2.1108624935150146, "learning_rate": 3.484962274026451e-06, "loss": 0.4369, "step": 12019 }, { "epoch": 1.4627319744447824, "grad_norm": 1.7634366750717163, "learning_rate": 3.4834825129881244e-06, "loss": 0.4307, "step": 12020 }, { "epoch": 1.4628536659567994, "grad_norm": 1.9122487306594849, "learning_rate": 3.4820029999167758e-06, "loss": 0.388, "step": 12021 }, { "epoch": 1.4629753574688165, "grad_norm": 2.420741319656372, "learning_rate": 3.4805237348687025e-06, "loss": 0.3697, "step": 12022 }, { "epoch": 1.4630970489808335, "grad_norm": 1.7084821462631226, "learning_rate": 3.4790447179001974e-06, "loss": 0.3942, "step": 12023 }, { "epoch": 1.4632187404928505, "grad_norm": 2.1634058952331543, "learning_rate": 3.4775659490675394e-06, "loss": 0.406, "step": 12024 }, { "epoch": 1.4633404320048677, "grad_norm": 1.580566167831421, "learning_rate": 3.476087428426994e-06, "loss": 0.3885, "step": 12025 }, { "epoch": 1.4634621235168848, "grad_norm": 1.4776626825332642, "learning_rate": 3.4746091560348295e-06, "loss": 0.3787, "step": 12026 }, { "epoch": 1.4635838150289018, "grad_norm": 1.5963472127914429, "learning_rate": 3.4731311319472926e-06, "loss": 0.3267, "step": 12027 }, { "epoch": 1.4637055065409188, "grad_norm": 2.0312659740448, "learning_rate": 3.4716533562206266e-06, "loss": 0.3485, "step": 12028 }, { "epoch": 1.4638271980529358, "grad_norm": 1.6842656135559082, "learning_rate": 3.4701758289110642e-06, "loss": 0.4169, "step": 12029 }, { "epoch": 1.4639488895649528, "grad_norm": 1.9565033912658691, "learning_rate": 3.4686985500748226e-06, "loss": 0.3502, "step": 12030 }, { "epoch": 1.4640705810769699, "grad_norm": 1.7969274520874023, "learning_rate": 3.467221519768125e-06, "loss": 0.3997, "step": 12031 }, { "epoch": 1.4641922725889869, "grad_norm": 1.9222075939178467, "learning_rate": 3.46574473804717e-06, "loss": 0.439, "step": 12032 }, { "epoch": 1.464313964101004, "grad_norm": 3.1811234951019287, "learning_rate": 3.464268204968149e-06, "loss": 0.3264, "step": 12033 }, { "epoch": 1.464435655613021, "grad_norm": 1.7962932586669922, "learning_rate": 3.462791920587254e-06, "loss": 0.3697, "step": 12034 }, { "epoch": 1.4645573471250382, "grad_norm": 1.8668720722198486, "learning_rate": 3.461315884960659e-06, "loss": 0.3675, "step": 12035 }, { "epoch": 1.4646790386370552, "grad_norm": 2.976686954498291, "learning_rate": 3.4598400981445244e-06, "loss": 0.3709, "step": 12036 }, { "epoch": 1.4648007301490722, "grad_norm": 2.3302364349365234, "learning_rate": 3.4583645601950153e-06, "loss": 0.3864, "step": 12037 }, { "epoch": 1.4649224216610892, "grad_norm": 2.78696608543396, "learning_rate": 3.456889271168272e-06, "loss": 0.4148, "step": 12038 }, { "epoch": 1.4650441131731062, "grad_norm": 1.9746683835983276, "learning_rate": 3.4554142311204397e-06, "loss": 0.3723, "step": 12039 }, { "epoch": 1.4651658046851233, "grad_norm": 2.125821113586426, "learning_rate": 3.4539394401076443e-06, "loss": 0.3339, "step": 12040 }, { "epoch": 1.4652874961971403, "grad_norm": 1.5860700607299805, "learning_rate": 3.4524648981859965e-06, "loss": 0.3673, "step": 12041 }, { "epoch": 1.4654091877091573, "grad_norm": 2.7654309272766113, "learning_rate": 3.450990605411616e-06, "loss": 0.4254, "step": 12042 }, { "epoch": 1.4655308792211743, "grad_norm": 1.426876425743103, "learning_rate": 3.4495165618405947e-06, "loss": 0.3114, "step": 12043 }, { "epoch": 1.4656525707331913, "grad_norm": 2.3017098903656006, "learning_rate": 3.4480427675290306e-06, "loss": 0.3791, "step": 12044 }, { "epoch": 1.4657742622452083, "grad_norm": 2.061642646789551, "learning_rate": 3.4465692225330007e-06, "loss": 0.3914, "step": 12045 }, { "epoch": 1.4658959537572254, "grad_norm": 1.9715923070907593, "learning_rate": 3.4450959269085725e-06, "loss": 0.3855, "step": 12046 }, { "epoch": 1.4660176452692424, "grad_norm": 1.894641399383545, "learning_rate": 3.4436228807118168e-06, "loss": 0.3273, "step": 12047 }, { "epoch": 1.4661393367812594, "grad_norm": 1.7752881050109863, "learning_rate": 3.442150083998782e-06, "loss": 0.345, "step": 12048 }, { "epoch": 1.4662610282932764, "grad_norm": 2.1672749519348145, "learning_rate": 3.4406775368255062e-06, "loss": 0.3675, "step": 12049 }, { "epoch": 1.4663827198052934, "grad_norm": 1.4513928890228271, "learning_rate": 3.43920523924803e-06, "loss": 0.3514, "step": 12050 }, { "epoch": 1.4665044113173107, "grad_norm": 1.7190641164779663, "learning_rate": 3.4377331913223756e-06, "loss": 0.3328, "step": 12051 }, { "epoch": 1.4666261028293277, "grad_norm": 1.7609766721725464, "learning_rate": 3.436261393104556e-06, "loss": 0.3829, "step": 12052 }, { "epoch": 1.4667477943413447, "grad_norm": 3.0008814334869385, "learning_rate": 3.434789844650577e-06, "loss": 0.4678, "step": 12053 }, { "epoch": 1.4668694858533617, "grad_norm": 17.622514724731445, "learning_rate": 3.43331854601643e-06, "loss": 0.3467, "step": 12054 }, { "epoch": 1.4669911773653788, "grad_norm": 2.601954460144043, "learning_rate": 3.431847497258107e-06, "loss": 0.3984, "step": 12055 }, { "epoch": 1.4671128688773958, "grad_norm": 1.7869925498962402, "learning_rate": 3.4303766984315834e-06, "loss": 0.3568, "step": 12056 }, { "epoch": 1.4672345603894128, "grad_norm": 1.9167250394821167, "learning_rate": 3.428906149592821e-06, "loss": 0.3639, "step": 12057 }, { "epoch": 1.4673562519014298, "grad_norm": 1.9740310907363892, "learning_rate": 3.4274358507977846e-06, "loss": 0.3733, "step": 12058 }, { "epoch": 1.4674779434134468, "grad_norm": 1.9413151741027832, "learning_rate": 3.425965802102418e-06, "loss": 0.3117, "step": 12059 }, { "epoch": 1.467599634925464, "grad_norm": 1.640803575515747, "learning_rate": 3.424496003562656e-06, "loss": 0.4151, "step": 12060 }, { "epoch": 1.467721326437481, "grad_norm": 2.9612045288085938, "learning_rate": 3.423026455234437e-06, "loss": 0.3484, "step": 12061 }, { "epoch": 1.4678430179494981, "grad_norm": 1.853200078010559, "learning_rate": 3.421557157173674e-06, "loss": 0.4105, "step": 12062 }, { "epoch": 1.4679647094615151, "grad_norm": 1.6028133630752563, "learning_rate": 3.420088109436277e-06, "loss": 0.4252, "step": 12063 }, { "epoch": 1.4680864009735322, "grad_norm": 2.3818438053131104, "learning_rate": 3.4186193120781485e-06, "loss": 0.327, "step": 12064 }, { "epoch": 1.4682080924855492, "grad_norm": 1.8532391786575317, "learning_rate": 3.417150765155174e-06, "loss": 0.3683, "step": 12065 }, { "epoch": 1.4683297839975662, "grad_norm": 1.5387063026428223, "learning_rate": 3.4156824687232413e-06, "loss": 0.3411, "step": 12066 }, { "epoch": 1.4684514755095832, "grad_norm": 1.6694824695587158, "learning_rate": 3.4142144228382203e-06, "loss": 0.3726, "step": 12067 }, { "epoch": 1.4685731670216002, "grad_norm": 2.199007749557495, "learning_rate": 3.4127466275559685e-06, "loss": 0.3869, "step": 12068 }, { "epoch": 1.4686948585336173, "grad_norm": 1.4756526947021484, "learning_rate": 3.411279082932346e-06, "loss": 0.3549, "step": 12069 }, { "epoch": 1.4688165500456343, "grad_norm": 2.005603551864624, "learning_rate": 3.409811789023193e-06, "loss": 0.3589, "step": 12070 }, { "epoch": 1.4689382415576513, "grad_norm": 1.502556324005127, "learning_rate": 3.4083447458843376e-06, "loss": 0.3628, "step": 12071 }, { "epoch": 1.4690599330696683, "grad_norm": 2.0606141090393066, "learning_rate": 3.4068779535716133e-06, "loss": 0.3447, "step": 12072 }, { "epoch": 1.4691816245816853, "grad_norm": 1.632533311843872, "learning_rate": 3.405411412140829e-06, "loss": 0.4003, "step": 12073 }, { "epoch": 1.4693033160937023, "grad_norm": 1.3820217847824097, "learning_rate": 3.4039451216477903e-06, "loss": 0.3776, "step": 12074 }, { "epoch": 1.4694250076057194, "grad_norm": 1.6384810209274292, "learning_rate": 3.402479082148293e-06, "loss": 0.3988, "step": 12075 }, { "epoch": 1.4695466991177366, "grad_norm": 1.8373570442199707, "learning_rate": 3.40101329369812e-06, "loss": 0.4093, "step": 12076 }, { "epoch": 1.4696683906297536, "grad_norm": 1.817745327949524, "learning_rate": 3.3995477563530533e-06, "loss": 0.4081, "step": 12077 }, { "epoch": 1.4697900821417706, "grad_norm": 2.7041759490966797, "learning_rate": 3.3980824701688576e-06, "loss": 0.4207, "step": 12078 }, { "epoch": 1.4699117736537877, "grad_norm": 2.0243911743164062, "learning_rate": 3.396617435201284e-06, "loss": 0.3708, "step": 12079 }, { "epoch": 1.4700334651658047, "grad_norm": 2.2408790588378906, "learning_rate": 3.3951526515060895e-06, "loss": 0.3973, "step": 12080 }, { "epoch": 1.4701551566778217, "grad_norm": 2.189399480819702, "learning_rate": 3.3936881191390083e-06, "loss": 0.351, "step": 12081 }, { "epoch": 1.4702768481898387, "grad_norm": 1.7004191875457764, "learning_rate": 3.392223838155765e-06, "loss": 0.3859, "step": 12082 }, { "epoch": 1.4703985397018557, "grad_norm": 1.840395212173462, "learning_rate": 3.390759808612084e-06, "loss": 0.377, "step": 12083 }, { "epoch": 1.4705202312138728, "grad_norm": 2.119800329208374, "learning_rate": 3.3892960305636747e-06, "loss": 0.3085, "step": 12084 }, { "epoch": 1.47064192272589, "grad_norm": 2.191340208053589, "learning_rate": 3.3878325040662297e-06, "loss": 0.3916, "step": 12085 }, { "epoch": 1.470763614237907, "grad_norm": 2.118105411529541, "learning_rate": 3.3863692291754523e-06, "loss": 0.3561, "step": 12086 }, { "epoch": 1.470885305749924, "grad_norm": 2.6521801948547363, "learning_rate": 3.3849062059470073e-06, "loss": 0.4316, "step": 12087 }, { "epoch": 1.471006997261941, "grad_norm": 2.1146295070648193, "learning_rate": 3.3834434344365784e-06, "loss": 0.3892, "step": 12088 }, { "epoch": 1.471128688773958, "grad_norm": 2.861733913421631, "learning_rate": 3.3819809146998216e-06, "loss": 0.3464, "step": 12089 }, { "epoch": 1.471250380285975, "grad_norm": 1.8602722883224487, "learning_rate": 3.3805186467923855e-06, "loss": 0.3542, "step": 12090 }, { "epoch": 1.4713720717979921, "grad_norm": 1.3949251174926758, "learning_rate": 3.379056630769921e-06, "loss": 0.3686, "step": 12091 }, { "epoch": 1.4714937633100091, "grad_norm": 1.4818003177642822, "learning_rate": 3.3775948666880566e-06, "loss": 0.3793, "step": 12092 }, { "epoch": 1.4716154548220262, "grad_norm": 1.301381230354309, "learning_rate": 3.376133354602411e-06, "loss": 0.3386, "step": 12093 }, { "epoch": 1.4717371463340432, "grad_norm": 2.334848642349243, "learning_rate": 3.374672094568605e-06, "loss": 0.4194, "step": 12094 }, { "epoch": 1.4718588378460602, "grad_norm": 1.5721102952957153, "learning_rate": 3.3732110866422364e-06, "loss": 0.3587, "step": 12095 }, { "epoch": 1.4719805293580772, "grad_norm": 1.7915695905685425, "learning_rate": 3.3717503308789056e-06, "loss": 0.3563, "step": 12096 }, { "epoch": 1.4721022208700942, "grad_norm": 2.92470645904541, "learning_rate": 3.3702898273341987e-06, "loss": 0.3223, "step": 12097 }, { "epoch": 1.4722239123821113, "grad_norm": 3.1266584396362305, "learning_rate": 3.368829576063679e-06, "loss": 0.433, "step": 12098 }, { "epoch": 1.4723456038941283, "grad_norm": 1.9093586206436157, "learning_rate": 3.367369577122924e-06, "loss": 0.3603, "step": 12099 }, { "epoch": 1.4724672954061453, "grad_norm": 2.7054426670074463, "learning_rate": 3.3659098305674855e-06, "loss": 0.4244, "step": 12100 }, { "epoch": 1.4725889869181625, "grad_norm": 1.9046179056167603, "learning_rate": 3.3644503364529047e-06, "loss": 0.3284, "step": 12101 }, { "epoch": 1.4727106784301796, "grad_norm": 1.6244277954101562, "learning_rate": 3.362991094834729e-06, "loss": 0.3715, "step": 12102 }, { "epoch": 1.4728323699421966, "grad_norm": 1.5298324823379517, "learning_rate": 3.3615321057684747e-06, "loss": 0.3457, "step": 12103 }, { "epoch": 1.4729540614542136, "grad_norm": 2.714778423309326, "learning_rate": 3.3600733693096686e-06, "loss": 0.3881, "step": 12104 }, { "epoch": 1.4730757529662306, "grad_norm": 3.4847700595855713, "learning_rate": 3.3586148855138157e-06, "loss": 0.4263, "step": 12105 }, { "epoch": 1.4731974444782476, "grad_norm": 1.8944257497787476, "learning_rate": 3.3571566544364086e-06, "loss": 0.3414, "step": 12106 }, { "epoch": 1.4733191359902647, "grad_norm": 1.9065788984298706, "learning_rate": 3.3556986761329445e-06, "loss": 0.3471, "step": 12107 }, { "epoch": 1.4734408275022817, "grad_norm": 1.49790620803833, "learning_rate": 3.354240950658899e-06, "loss": 0.3368, "step": 12108 }, { "epoch": 1.4735625190142987, "grad_norm": 1.9343204498291016, "learning_rate": 3.352783478069741e-06, "loss": 0.3494, "step": 12109 }, { "epoch": 1.4736842105263157, "grad_norm": 1.758023977279663, "learning_rate": 3.3513262584209315e-06, "loss": 0.3451, "step": 12110 }, { "epoch": 1.473805902038333, "grad_norm": 1.5986489057540894, "learning_rate": 3.3498692917679154e-06, "loss": 0.3503, "step": 12111 }, { "epoch": 1.47392759355035, "grad_norm": 1.632510781288147, "learning_rate": 3.3484125781661414e-06, "loss": 0.34, "step": 12112 }, { "epoch": 1.474049285062367, "grad_norm": 2.4656519889831543, "learning_rate": 3.3469561176710376e-06, "loss": 0.4074, "step": 12113 }, { "epoch": 1.474170976574384, "grad_norm": 1.7424700260162354, "learning_rate": 3.3454999103380202e-06, "loss": 0.3615, "step": 12114 }, { "epoch": 1.474292668086401, "grad_norm": 1.3843655586242676, "learning_rate": 3.344043956222509e-06, "loss": 0.3445, "step": 12115 }, { "epoch": 1.474414359598418, "grad_norm": 1.902854323387146, "learning_rate": 3.3425882553799026e-06, "loss": 0.43, "step": 12116 }, { "epoch": 1.474536051110435, "grad_norm": 1.8997992277145386, "learning_rate": 3.341132807865589e-06, "loss": 0.3671, "step": 12117 }, { "epoch": 1.474657742622452, "grad_norm": 3.5117878913879395, "learning_rate": 3.3396776137349595e-06, "loss": 0.4317, "step": 12118 }, { "epoch": 1.474779434134469, "grad_norm": 1.873831033706665, "learning_rate": 3.338222673043383e-06, "loss": 0.362, "step": 12119 }, { "epoch": 1.4749011256464861, "grad_norm": 1.730852723121643, "learning_rate": 3.336767985846222e-06, "loss": 0.3248, "step": 12120 }, { "epoch": 1.4750228171585031, "grad_norm": 2.0717759132385254, "learning_rate": 3.335313552198832e-06, "loss": 0.422, "step": 12121 }, { "epoch": 1.4751445086705202, "grad_norm": 2.1184513568878174, "learning_rate": 3.333859372156553e-06, "loss": 0.411, "step": 12122 }, { "epoch": 1.4752662001825372, "grad_norm": 2.4771809577941895, "learning_rate": 3.3324054457747258e-06, "loss": 0.3334, "step": 12123 }, { "epoch": 1.4753878916945542, "grad_norm": 1.6757452487945557, "learning_rate": 3.330951773108673e-06, "loss": 0.3808, "step": 12124 }, { "epoch": 1.4755095832065712, "grad_norm": 1.8989259004592896, "learning_rate": 3.3294983542137062e-06, "loss": 0.3271, "step": 12125 }, { "epoch": 1.4756312747185885, "grad_norm": 1.9451688528060913, "learning_rate": 3.328045189145137e-06, "loss": 0.3893, "step": 12126 }, { "epoch": 1.4757529662306055, "grad_norm": 2.464289903640747, "learning_rate": 3.3265922779582593e-06, "loss": 0.3895, "step": 12127 }, { "epoch": 1.4758746577426225, "grad_norm": 3.6289501190185547, "learning_rate": 3.325139620708354e-06, "loss": 0.4726, "step": 12128 }, { "epoch": 1.4759963492546395, "grad_norm": 2.3231496810913086, "learning_rate": 3.3236872174507072e-06, "loss": 0.3482, "step": 12129 }, { "epoch": 1.4761180407666565, "grad_norm": 1.5108734369277954, "learning_rate": 3.32223506824058e-06, "loss": 0.3742, "step": 12130 }, { "epoch": 1.4762397322786736, "grad_norm": 2.2625434398651123, "learning_rate": 3.3207831731332284e-06, "loss": 0.3627, "step": 12131 }, { "epoch": 1.4763614237906906, "grad_norm": 3.0276291370391846, "learning_rate": 3.319331532183908e-06, "loss": 0.3702, "step": 12132 }, { "epoch": 1.4764831153027076, "grad_norm": 2.318291425704956, "learning_rate": 3.317880145447845e-06, "loss": 0.438, "step": 12133 }, { "epoch": 1.4766048068147246, "grad_norm": 2.4994823932647705, "learning_rate": 3.316429012980276e-06, "loss": 0.4382, "step": 12134 }, { "epoch": 1.4767264983267416, "grad_norm": 3.6462714672088623, "learning_rate": 3.3149781348364185e-06, "loss": 0.2762, "step": 12135 }, { "epoch": 1.4768481898387589, "grad_norm": 1.6942640542984009, "learning_rate": 3.313527511071476e-06, "loss": 0.4008, "step": 12136 }, { "epoch": 1.476969881350776, "grad_norm": 1.773508906364441, "learning_rate": 3.312077141740655e-06, "loss": 0.382, "step": 12137 }, { "epoch": 1.477091572862793, "grad_norm": 1.9742419719696045, "learning_rate": 3.3106270268991425e-06, "loss": 0.378, "step": 12138 }, { "epoch": 1.47721326437481, "grad_norm": 1.8255996704101562, "learning_rate": 3.3091771666021146e-06, "loss": 0.435, "step": 12139 }, { "epoch": 1.477334955886827, "grad_norm": 2.394289016723633, "learning_rate": 3.3077275609047486e-06, "loss": 0.3651, "step": 12140 }, { "epoch": 1.477456647398844, "grad_norm": 1.5934715270996094, "learning_rate": 3.3062782098622004e-06, "loss": 0.3511, "step": 12141 }, { "epoch": 1.477578338910861, "grad_norm": 1.7620885372161865, "learning_rate": 3.3048291135296185e-06, "loss": 0.4043, "step": 12142 }, { "epoch": 1.477700030422878, "grad_norm": 2.0183396339416504, "learning_rate": 3.3033802719621533e-06, "loss": 0.3745, "step": 12143 }, { "epoch": 1.477821721934895, "grad_norm": 4.824260711669922, "learning_rate": 3.301931685214924e-06, "loss": 0.4956, "step": 12144 }, { "epoch": 1.477943413446912, "grad_norm": 1.6940417289733887, "learning_rate": 3.3004833533430615e-06, "loss": 0.3681, "step": 12145 }, { "epoch": 1.478065104958929, "grad_norm": 1.8248379230499268, "learning_rate": 3.299035276401674e-06, "loss": 0.4143, "step": 12146 }, { "epoch": 1.478186796470946, "grad_norm": 2.2205722332000732, "learning_rate": 3.297587454445862e-06, "loss": 0.396, "step": 12147 }, { "epoch": 1.478308487982963, "grad_norm": 1.820279598236084, "learning_rate": 3.2961398875307238e-06, "loss": 0.4017, "step": 12148 }, { "epoch": 1.4784301794949801, "grad_norm": 1.601264238357544, "learning_rate": 3.294692575711339e-06, "loss": 0.3632, "step": 12149 }, { "epoch": 1.4785518710069971, "grad_norm": 1.8530502319335938, "learning_rate": 3.293245519042777e-06, "loss": 0.3576, "step": 12150 }, { "epoch": 1.4786735625190142, "grad_norm": 1.7436153888702393, "learning_rate": 3.291798717580109e-06, "loss": 0.3867, "step": 12151 }, { "epoch": 1.4787952540310314, "grad_norm": 1.6185338497161865, "learning_rate": 3.290352171378385e-06, "loss": 0.3272, "step": 12152 }, { "epoch": 1.4789169455430484, "grad_norm": 1.5198289155960083, "learning_rate": 3.2889058804926455e-06, "loss": 0.3829, "step": 12153 }, { "epoch": 1.4790386370550654, "grad_norm": 1.5279617309570312, "learning_rate": 3.287459844977933e-06, "loss": 0.4059, "step": 12154 }, { "epoch": 1.4791603285670825, "grad_norm": 1.4052528142929077, "learning_rate": 3.2860140648892657e-06, "loss": 0.3874, "step": 12155 }, { "epoch": 1.4792820200790995, "grad_norm": 1.8463801145553589, "learning_rate": 3.284568540281662e-06, "loss": 0.387, "step": 12156 }, { "epoch": 1.4794037115911165, "grad_norm": 2.442044496536255, "learning_rate": 3.2831232712101245e-06, "loss": 0.3689, "step": 12157 }, { "epoch": 1.4795254031031335, "grad_norm": 1.3333532810211182, "learning_rate": 3.281678257729647e-06, "loss": 0.3751, "step": 12158 }, { "epoch": 1.4796470946151505, "grad_norm": 1.7679804563522339, "learning_rate": 3.280233499895221e-06, "loss": 0.385, "step": 12159 }, { "epoch": 1.4797687861271676, "grad_norm": 2.173210620880127, "learning_rate": 3.2787889977618194e-06, "loss": 0.388, "step": 12160 }, { "epoch": 1.4798904776391848, "grad_norm": 1.9507856369018555, "learning_rate": 3.2773447513844058e-06, "loss": 0.3477, "step": 12161 }, { "epoch": 1.4800121691512018, "grad_norm": 1.4006422758102417, "learning_rate": 3.275900760817943e-06, "loss": 0.3612, "step": 12162 }, { "epoch": 1.4801338606632188, "grad_norm": 1.785213589668274, "learning_rate": 3.274457026117369e-06, "loss": 0.4075, "step": 12163 }, { "epoch": 1.4802555521752359, "grad_norm": 1.889771819114685, "learning_rate": 3.2730135473376324e-06, "loss": 0.2934, "step": 12164 }, { "epoch": 1.4803772436872529, "grad_norm": 1.497920036315918, "learning_rate": 3.271570324533653e-06, "loss": 0.3495, "step": 12165 }, { "epoch": 1.48049893519927, "grad_norm": 1.4012306928634644, "learning_rate": 3.27012735776035e-06, "loss": 0.3788, "step": 12166 }, { "epoch": 1.480620626711287, "grad_norm": 1.801177978515625, "learning_rate": 3.2686846470726306e-06, "loss": 0.4004, "step": 12167 }, { "epoch": 1.480742318223304, "grad_norm": 1.9686452150344849, "learning_rate": 3.2672421925253938e-06, "loss": 0.3819, "step": 12168 }, { "epoch": 1.480864009735321, "grad_norm": 3.5021603107452393, "learning_rate": 3.265799994173524e-06, "loss": 0.4607, "step": 12169 }, { "epoch": 1.480985701247338, "grad_norm": 1.4304561614990234, "learning_rate": 3.2643580520719075e-06, "loss": 0.3915, "step": 12170 }, { "epoch": 1.481107392759355, "grad_norm": 1.576928973197937, "learning_rate": 3.262916366275406e-06, "loss": 0.3759, "step": 12171 }, { "epoch": 1.481229084271372, "grad_norm": 2.650402545928955, "learning_rate": 3.2614749368388842e-06, "loss": 0.2997, "step": 12172 }, { "epoch": 1.481350775783389, "grad_norm": 2.327786445617676, "learning_rate": 3.2600337638171897e-06, "loss": 0.3778, "step": 12173 }, { "epoch": 1.481472467295406, "grad_norm": 1.6953985691070557, "learning_rate": 3.2585928472651573e-06, "loss": 0.4064, "step": 12174 }, { "epoch": 1.481594158807423, "grad_norm": 2.0793440341949463, "learning_rate": 3.2571521872376243e-06, "loss": 0.4006, "step": 12175 }, { "epoch": 1.48171585031944, "grad_norm": 2.706766128540039, "learning_rate": 3.255711783789408e-06, "loss": 0.3322, "step": 12176 }, { "epoch": 1.4818375418314573, "grad_norm": 1.3230936527252197, "learning_rate": 3.2542716369753156e-06, "loss": 0.3511, "step": 12177 }, { "epoch": 1.4819592333434743, "grad_norm": 1.5271930694580078, "learning_rate": 3.2528317468501557e-06, "loss": 0.3682, "step": 12178 }, { "epoch": 1.4820809248554914, "grad_norm": 3.0575695037841797, "learning_rate": 3.2513921134687086e-06, "loss": 0.3287, "step": 12179 }, { "epoch": 1.4822026163675084, "grad_norm": 1.637450933456421, "learning_rate": 3.249952736885762e-06, "loss": 0.3497, "step": 12180 }, { "epoch": 1.4823243078795254, "grad_norm": 1.9152295589447021, "learning_rate": 3.248513617156087e-06, "loss": 0.365, "step": 12181 }, { "epoch": 1.4824459993915424, "grad_norm": 2.66620135307312, "learning_rate": 3.2470747543344394e-06, "loss": 0.4223, "step": 12182 }, { "epoch": 1.4825676909035594, "grad_norm": 1.3882116079330444, "learning_rate": 3.2456361484755795e-06, "loss": 0.3241, "step": 12183 }, { "epoch": 1.4826893824155765, "grad_norm": 1.5836743116378784, "learning_rate": 3.2441977996342443e-06, "loss": 0.3478, "step": 12184 }, { "epoch": 1.4828110739275935, "grad_norm": 2.3171470165252686, "learning_rate": 3.242759707865163e-06, "loss": 0.3475, "step": 12185 }, { "epoch": 1.4829327654396107, "grad_norm": 1.8339293003082275, "learning_rate": 3.2413218732230643e-06, "loss": 0.3714, "step": 12186 }, { "epoch": 1.4830544569516277, "grad_norm": 1.4502087831497192, "learning_rate": 3.2398842957626596e-06, "loss": 0.3598, "step": 12187 }, { "epoch": 1.4831761484636448, "grad_norm": 1.9981969594955444, "learning_rate": 3.238446975538646e-06, "loss": 0.3509, "step": 12188 }, { "epoch": 1.4832978399756618, "grad_norm": 2.196707010269165, "learning_rate": 3.2370099126057277e-06, "loss": 0.3101, "step": 12189 }, { "epoch": 1.4834195314876788, "grad_norm": 1.7515709400177002, "learning_rate": 3.2355731070185737e-06, "loss": 0.4177, "step": 12190 }, { "epoch": 1.4835412229996958, "grad_norm": 1.9297406673431396, "learning_rate": 3.2341365588318676e-06, "loss": 0.3684, "step": 12191 }, { "epoch": 1.4836629145117128, "grad_norm": 1.7228189706802368, "learning_rate": 3.2327002681002706e-06, "loss": 0.3595, "step": 12192 }, { "epoch": 1.4837846060237299, "grad_norm": 2.0338523387908936, "learning_rate": 3.231264234878432e-06, "loss": 0.4159, "step": 12193 }, { "epoch": 1.4839062975357469, "grad_norm": 1.951887845993042, "learning_rate": 3.229828459221004e-06, "loss": 0.383, "step": 12194 }, { "epoch": 1.484027989047764, "grad_norm": 1.9407411813735962, "learning_rate": 3.2283929411826152e-06, "loss": 0.3774, "step": 12195 }, { "epoch": 1.484149680559781, "grad_norm": 1.7167006731033325, "learning_rate": 3.2269576808178895e-06, "loss": 0.3505, "step": 12196 }, { "epoch": 1.484271372071798, "grad_norm": 1.6156951189041138, "learning_rate": 3.2255226781814465e-06, "loss": 0.3908, "step": 12197 }, { "epoch": 1.484393063583815, "grad_norm": 1.6708869934082031, "learning_rate": 3.2240879333278872e-06, "loss": 0.4116, "step": 12198 }, { "epoch": 1.484514755095832, "grad_norm": 1.7810593843460083, "learning_rate": 3.2226534463118055e-06, "loss": 0.3949, "step": 12199 }, { "epoch": 1.484636446607849, "grad_norm": 1.5908105373382568, "learning_rate": 3.221219217187791e-06, "loss": 0.3814, "step": 12200 }, { "epoch": 1.484758138119866, "grad_norm": 2.0193192958831787, "learning_rate": 3.219785246010416e-06, "loss": 0.4046, "step": 12201 }, { "epoch": 1.4848798296318833, "grad_norm": 1.867391586303711, "learning_rate": 3.2183515328342473e-06, "loss": 0.3854, "step": 12202 }, { "epoch": 1.4850015211439003, "grad_norm": 3.2296524047851562, "learning_rate": 3.2169180777138407e-06, "loss": 0.3279, "step": 12203 }, { "epoch": 1.4851232126559173, "grad_norm": 1.8475428819656372, "learning_rate": 3.2154848807037376e-06, "loss": 0.4015, "step": 12204 }, { "epoch": 1.4852449041679343, "grad_norm": 1.554054856300354, "learning_rate": 3.2140519418584805e-06, "loss": 0.4448, "step": 12205 }, { "epoch": 1.4853665956799513, "grad_norm": 1.6051626205444336, "learning_rate": 3.2126192612325934e-06, "loss": 0.3706, "step": 12206 }, { "epoch": 1.4854882871919683, "grad_norm": 2.1893246173858643, "learning_rate": 3.21118683888059e-06, "loss": 0.4109, "step": 12207 }, { "epoch": 1.4856099787039854, "grad_norm": 3.1736106872558594, "learning_rate": 3.209754674856982e-06, "loss": 0.3119, "step": 12208 }, { "epoch": 1.4857316702160024, "grad_norm": 3.722097158432007, "learning_rate": 3.208322769216264e-06, "loss": 0.318, "step": 12209 }, { "epoch": 1.4858533617280194, "grad_norm": 1.6310083866119385, "learning_rate": 3.2068911220129197e-06, "loss": 0.3542, "step": 12210 }, { "epoch": 1.4859750532400364, "grad_norm": 3.6910557746887207, "learning_rate": 3.2054597333014328e-06, "loss": 0.3327, "step": 12211 }, { "epoch": 1.4860967447520537, "grad_norm": 1.5434637069702148, "learning_rate": 3.2040286031362654e-06, "loss": 0.3696, "step": 12212 }, { "epoch": 1.4862184362640707, "grad_norm": 1.804858684539795, "learning_rate": 3.202597731571878e-06, "loss": 0.4362, "step": 12213 }, { "epoch": 1.4863401277760877, "grad_norm": 1.7253929376602173, "learning_rate": 3.201167118662716e-06, "loss": 0.3495, "step": 12214 }, { "epoch": 1.4864618192881047, "grad_norm": 2.440077781677246, "learning_rate": 3.1997367644632148e-06, "loss": 0.3428, "step": 12215 }, { "epoch": 1.4865835108001217, "grad_norm": 1.3952327966690063, "learning_rate": 3.198306669027809e-06, "loss": 0.3885, "step": 12216 }, { "epoch": 1.4867052023121388, "grad_norm": 4.270806312561035, "learning_rate": 3.1968768324109123e-06, "loss": 0.3218, "step": 12217 }, { "epoch": 1.4868268938241558, "grad_norm": 1.5582537651062012, "learning_rate": 3.1954472546669313e-06, "loss": 0.348, "step": 12218 }, { "epoch": 1.4869485853361728, "grad_norm": 1.860722303390503, "learning_rate": 3.19401793585027e-06, "loss": 0.3393, "step": 12219 }, { "epoch": 1.4870702768481898, "grad_norm": 2.8334057331085205, "learning_rate": 3.1925888760153144e-06, "loss": 0.3748, "step": 12220 }, { "epoch": 1.4871919683602068, "grad_norm": 1.6063847541809082, "learning_rate": 3.1911600752164383e-06, "loss": 0.3489, "step": 12221 }, { "epoch": 1.4873136598722239, "grad_norm": 3.600930690765381, "learning_rate": 3.189731533508019e-06, "loss": 0.4335, "step": 12222 }, { "epoch": 1.4874353513842409, "grad_norm": 2.0729432106018066, "learning_rate": 3.188303250944408e-06, "loss": 0.3805, "step": 12223 }, { "epoch": 1.487557042896258, "grad_norm": 1.4001272916793823, "learning_rate": 3.1868752275799643e-06, "loss": 0.3465, "step": 12224 }, { "epoch": 1.487678734408275, "grad_norm": 2.107030153274536, "learning_rate": 3.185447463469017e-06, "loss": 0.374, "step": 12225 }, { "epoch": 1.487800425920292, "grad_norm": 2.100602865219116, "learning_rate": 3.184019958665896e-06, "loss": 0.4118, "step": 12226 }, { "epoch": 1.4879221174323092, "grad_norm": 3.5554583072662354, "learning_rate": 3.1825927132249266e-06, "loss": 0.4575, "step": 12227 }, { "epoch": 1.4880438089443262, "grad_norm": 1.6292518377304077, "learning_rate": 3.181165727200416e-06, "loss": 0.3751, "step": 12228 }, { "epoch": 1.4881655004563432, "grad_norm": 2.390096664428711, "learning_rate": 3.179739000646661e-06, "loss": 0.3181, "step": 12229 }, { "epoch": 1.4882871919683602, "grad_norm": 1.8347625732421875, "learning_rate": 3.1783125336179577e-06, "loss": 0.4166, "step": 12230 }, { "epoch": 1.4884088834803773, "grad_norm": 3.1557180881500244, "learning_rate": 3.176886326168578e-06, "loss": 0.3343, "step": 12231 }, { "epoch": 1.4885305749923943, "grad_norm": 1.8456231355667114, "learning_rate": 3.1754603783528e-06, "loss": 0.4191, "step": 12232 }, { "epoch": 1.4886522665044113, "grad_norm": 2.0623562335968018, "learning_rate": 3.174034690224882e-06, "loss": 0.3064, "step": 12233 }, { "epoch": 1.4887739580164283, "grad_norm": 1.72364342212677, "learning_rate": 3.172609261839069e-06, "loss": 0.3882, "step": 12234 }, { "epoch": 1.4888956495284453, "grad_norm": 1.563386082649231, "learning_rate": 3.1711840932496117e-06, "loss": 0.3801, "step": 12235 }, { "epoch": 1.4890173410404623, "grad_norm": 3.1208484172821045, "learning_rate": 3.1697591845107324e-06, "loss": 0.3294, "step": 12236 }, { "epoch": 1.4891390325524796, "grad_norm": 1.7949192523956299, "learning_rate": 3.168334535676649e-06, "loss": 0.4018, "step": 12237 }, { "epoch": 1.4892607240644966, "grad_norm": 2.5957248210906982, "learning_rate": 3.166910146801582e-06, "loss": 0.34, "step": 12238 }, { "epoch": 1.4893824155765136, "grad_norm": 2.835603713989258, "learning_rate": 3.165486017939724e-06, "loss": 0.2921, "step": 12239 }, { "epoch": 1.4895041070885306, "grad_norm": 1.5516680479049683, "learning_rate": 3.1640621491452726e-06, "loss": 0.3857, "step": 12240 }, { "epoch": 1.4896257986005477, "grad_norm": 1.36301851272583, "learning_rate": 3.162638540472406e-06, "loss": 0.3229, "step": 12241 }, { "epoch": 1.4897474901125647, "grad_norm": 2.6442418098449707, "learning_rate": 3.161215191975292e-06, "loss": 0.3383, "step": 12242 }, { "epoch": 1.4898691816245817, "grad_norm": 1.591164231300354, "learning_rate": 3.1597921037080993e-06, "loss": 0.3961, "step": 12243 }, { "epoch": 1.4899908731365987, "grad_norm": 1.8963242769241333, "learning_rate": 3.1583692757249752e-06, "loss": 0.431, "step": 12244 }, { "epoch": 1.4901125646486157, "grad_norm": 1.5690912008285522, "learning_rate": 3.156946708080059e-06, "loss": 0.3149, "step": 12245 }, { "epoch": 1.4902342561606328, "grad_norm": 3.6722986698150635, "learning_rate": 3.1555244008274878e-06, "loss": 0.4469, "step": 12246 }, { "epoch": 1.4903559476726498, "grad_norm": 1.4763903617858887, "learning_rate": 3.15410235402138e-06, "loss": 0.343, "step": 12247 }, { "epoch": 1.4904776391846668, "grad_norm": 1.4336321353912354, "learning_rate": 3.1526805677158477e-06, "loss": 0.3849, "step": 12248 }, { "epoch": 1.4905993306966838, "grad_norm": 2.9285664558410645, "learning_rate": 3.1512590419649935e-06, "loss": 0.4136, "step": 12249 }, { "epoch": 1.4907210222087008, "grad_norm": 1.8608766794204712, "learning_rate": 3.149837776822906e-06, "loss": 0.3577, "step": 12250 }, { "epoch": 1.4908427137207179, "grad_norm": 1.4334529638290405, "learning_rate": 3.1484167723436723e-06, "loss": 0.3481, "step": 12251 }, { "epoch": 1.4909644052327349, "grad_norm": 3.0986416339874268, "learning_rate": 3.146996028581363e-06, "loss": 0.3838, "step": 12252 }, { "epoch": 1.4910860967447521, "grad_norm": 2.436950922012329, "learning_rate": 3.145575545590036e-06, "loss": 0.4266, "step": 12253 }, { "epoch": 1.4912077882567691, "grad_norm": 2.039731502532959, "learning_rate": 3.144155323423752e-06, "loss": 0.3808, "step": 12254 }, { "epoch": 1.4913294797687862, "grad_norm": 1.6108180284500122, "learning_rate": 3.1427353621365474e-06, "loss": 0.4109, "step": 12255 }, { "epoch": 1.4914511712808032, "grad_norm": 2.0467262268066406, "learning_rate": 3.1413156617824537e-06, "loss": 0.4496, "step": 12256 }, { "epoch": 1.4915728627928202, "grad_norm": 2.6676440238952637, "learning_rate": 3.1398962224154983e-06, "loss": 0.3434, "step": 12257 }, { "epoch": 1.4916945543048372, "grad_norm": 1.7897522449493408, "learning_rate": 3.1384770440896917e-06, "loss": 0.382, "step": 12258 }, { "epoch": 1.4918162458168542, "grad_norm": 1.4741199016571045, "learning_rate": 3.137058126859036e-06, "loss": 0.3462, "step": 12259 }, { "epoch": 1.4919379373288713, "grad_norm": 2.7347793579101562, "learning_rate": 3.135639470777524e-06, "loss": 0.3285, "step": 12260 }, { "epoch": 1.4920596288408883, "grad_norm": 2.7070937156677246, "learning_rate": 3.1342210758991355e-06, "loss": 0.4233, "step": 12261 }, { "epoch": 1.4921813203529055, "grad_norm": 2.465555191040039, "learning_rate": 3.1328029422778494e-06, "loss": 0.3469, "step": 12262 }, { "epoch": 1.4923030118649225, "grad_norm": 1.5377061367034912, "learning_rate": 3.1313850699676264e-06, "loss": 0.3873, "step": 12263 }, { "epoch": 1.4924247033769396, "grad_norm": 1.359940528869629, "learning_rate": 3.129967459022415e-06, "loss": 0.425, "step": 12264 }, { "epoch": 1.4925463948889566, "grad_norm": 2.1438324451446533, "learning_rate": 3.1285501094961645e-06, "loss": 0.353, "step": 12265 }, { "epoch": 1.4926680864009736, "grad_norm": 1.7696703672409058, "learning_rate": 3.1271330214428063e-06, "loss": 0.3193, "step": 12266 }, { "epoch": 1.4927897779129906, "grad_norm": 2.484936237335205, "learning_rate": 3.1257161949162598e-06, "loss": 0.3175, "step": 12267 }, { "epoch": 1.4929114694250076, "grad_norm": 1.596076250076294, "learning_rate": 3.1242996299704432e-06, "loss": 0.3954, "step": 12268 }, { "epoch": 1.4930331609370247, "grad_norm": 1.520337700843811, "learning_rate": 3.1228833266592585e-06, "loss": 0.351, "step": 12269 }, { "epoch": 1.4931548524490417, "grad_norm": 1.6884106397628784, "learning_rate": 3.121467285036598e-06, "loss": 0.4404, "step": 12270 }, { "epoch": 1.4932765439610587, "grad_norm": 2.1992413997650146, "learning_rate": 3.1200515051563442e-06, "loss": 0.2917, "step": 12271 }, { "epoch": 1.4933982354730757, "grad_norm": 1.7355977296829224, "learning_rate": 3.118635987072369e-06, "loss": 0.4039, "step": 12272 }, { "epoch": 1.4935199269850927, "grad_norm": 2.3108272552490234, "learning_rate": 3.1172207308385406e-06, "loss": 0.3668, "step": 12273 }, { "epoch": 1.4936416184971097, "grad_norm": 1.553948998451233, "learning_rate": 3.11580573650871e-06, "loss": 0.3946, "step": 12274 }, { "epoch": 1.4937633100091268, "grad_norm": 2.9436333179473877, "learning_rate": 3.1143910041367185e-06, "loss": 0.3571, "step": 12275 }, { "epoch": 1.4938850015211438, "grad_norm": 2.040424346923828, "learning_rate": 3.112976533776404e-06, "loss": 0.3794, "step": 12276 }, { "epoch": 1.4940066930331608, "grad_norm": 2.03022837638855, "learning_rate": 3.1115623254815886e-06, "loss": 0.3916, "step": 12277 }, { "epoch": 1.494128384545178, "grad_norm": 1.7078419923782349, "learning_rate": 3.1101483793060816e-06, "loss": 0.391, "step": 12278 }, { "epoch": 1.494250076057195, "grad_norm": 1.7216203212738037, "learning_rate": 3.1087346953036924e-06, "loss": 0.3995, "step": 12279 }, { "epoch": 1.494371767569212, "grad_norm": 2.206731081008911, "learning_rate": 3.107321273528212e-06, "loss": 0.3593, "step": 12280 }, { "epoch": 1.494493459081229, "grad_norm": 2.5501060485839844, "learning_rate": 3.1059081140334224e-06, "loss": 0.3681, "step": 12281 }, { "epoch": 1.4946151505932461, "grad_norm": 2.21803617477417, "learning_rate": 3.1044952168731047e-06, "loss": 0.3587, "step": 12282 }, { "epoch": 1.4947368421052631, "grad_norm": 3.62646484375, "learning_rate": 3.10308258210101e-06, "loss": 0.4794, "step": 12283 }, { "epoch": 1.4948585336172802, "grad_norm": 1.92630934715271, "learning_rate": 3.101670209770903e-06, "loss": 0.3491, "step": 12284 }, { "epoch": 1.4949802251292972, "grad_norm": 3.7176802158355713, "learning_rate": 3.1002580999365228e-06, "loss": 0.3309, "step": 12285 }, { "epoch": 1.4951019166413142, "grad_norm": 1.882136583328247, "learning_rate": 3.0988462526516004e-06, "loss": 0.4068, "step": 12286 }, { "epoch": 1.4952236081533314, "grad_norm": 3.4674339294433594, "learning_rate": 3.0974346679698653e-06, "loss": 0.3601, "step": 12287 }, { "epoch": 1.4953452996653485, "grad_norm": 1.4098377227783203, "learning_rate": 3.0960233459450295e-06, "loss": 0.3596, "step": 12288 }, { "epoch": 1.4954669911773655, "grad_norm": 2.281935691833496, "learning_rate": 3.0946122866307925e-06, "loss": 0.3438, "step": 12289 }, { "epoch": 1.4955886826893825, "grad_norm": 1.6393074989318848, "learning_rate": 3.093201490080854e-06, "loss": 0.3468, "step": 12290 }, { "epoch": 1.4957103742013995, "grad_norm": 2.3345911502838135, "learning_rate": 3.0917909563488924e-06, "loss": 0.3274, "step": 12291 }, { "epoch": 1.4958320657134165, "grad_norm": 1.6504086256027222, "learning_rate": 3.0903806854885875e-06, "loss": 0.3735, "step": 12292 }, { "epoch": 1.4959537572254336, "grad_norm": 1.6430517435073853, "learning_rate": 3.0889706775536023e-06, "loss": 0.3549, "step": 12293 }, { "epoch": 1.4960754487374506, "grad_norm": 2.504863977432251, "learning_rate": 3.0875609325975818e-06, "loss": 0.3972, "step": 12294 }, { "epoch": 1.4961971402494676, "grad_norm": 1.7399461269378662, "learning_rate": 3.0861514506741786e-06, "loss": 0.3185, "step": 12295 }, { "epoch": 1.4963188317614846, "grad_norm": 3.7056572437286377, "learning_rate": 3.0847422318370247e-06, "loss": 0.4252, "step": 12296 }, { "epoch": 1.4964405232735016, "grad_norm": 1.4100056886672974, "learning_rate": 3.083333276139738e-06, "loss": 0.3506, "step": 12297 }, { "epoch": 1.4965622147855187, "grad_norm": 1.956782579421997, "learning_rate": 3.0819245836359413e-06, "loss": 0.3145, "step": 12298 }, { "epoch": 1.4966839062975357, "grad_norm": 1.6348721981048584, "learning_rate": 3.0805161543792293e-06, "loss": 0.3186, "step": 12299 }, { "epoch": 1.4968055978095527, "grad_norm": 2.610935688018799, "learning_rate": 3.0791079884232044e-06, "loss": 0.4086, "step": 12300 }, { "epoch": 1.4969272893215697, "grad_norm": 2.711599588394165, "learning_rate": 3.0777000858214458e-06, "loss": 0.3667, "step": 12301 }, { "epoch": 1.4970489808335867, "grad_norm": 4.114050388336182, "learning_rate": 3.0762924466275244e-06, "loss": 0.4347, "step": 12302 }, { "epoch": 1.497170672345604, "grad_norm": 3.9740138053894043, "learning_rate": 3.0748850708950096e-06, "loss": 0.4042, "step": 12303 }, { "epoch": 1.497292363857621, "grad_norm": 2.804448127746582, "learning_rate": 3.073477958677451e-06, "loss": 0.4266, "step": 12304 }, { "epoch": 1.497414055369638, "grad_norm": 4.193317890167236, "learning_rate": 3.072071110028395e-06, "loss": 0.4139, "step": 12305 }, { "epoch": 1.497535746881655, "grad_norm": 1.7350159883499146, "learning_rate": 3.0706645250013722e-06, "loss": 0.337, "step": 12306 }, { "epoch": 1.497657438393672, "grad_norm": 2.3582441806793213, "learning_rate": 3.069258203649904e-06, "loss": 0.3674, "step": 12307 }, { "epoch": 1.497779129905689, "grad_norm": 1.4542208909988403, "learning_rate": 3.06785214602751e-06, "loss": 0.3679, "step": 12308 }, { "epoch": 1.497900821417706, "grad_norm": 3.017589569091797, "learning_rate": 3.066446352187692e-06, "loss": 0.2985, "step": 12309 }, { "epoch": 1.498022512929723, "grad_norm": 1.587577223777771, "learning_rate": 3.0650408221839365e-06, "loss": 0.3747, "step": 12310 }, { "epoch": 1.4981442044417401, "grad_norm": 1.7981516122817993, "learning_rate": 3.063635556069737e-06, "loss": 0.4031, "step": 12311 }, { "epoch": 1.4982658959537571, "grad_norm": 2.5378201007843018, "learning_rate": 3.062230553898562e-06, "loss": 0.406, "step": 12312 }, { "epoch": 1.4983875874657744, "grad_norm": 1.496023178100586, "learning_rate": 3.060825815723871e-06, "loss": 0.3844, "step": 12313 }, { "epoch": 1.4985092789777914, "grad_norm": 1.455578088760376, "learning_rate": 3.059421341599126e-06, "loss": 0.344, "step": 12314 }, { "epoch": 1.4986309704898084, "grad_norm": 1.6901764869689941, "learning_rate": 3.058017131577763e-06, "loss": 0.3795, "step": 12315 }, { "epoch": 1.4987526620018254, "grad_norm": 1.5380728244781494, "learning_rate": 3.0566131857132186e-06, "loss": 0.4138, "step": 12316 }, { "epoch": 1.4988743535138425, "grad_norm": 1.7879115343093872, "learning_rate": 3.0552095040589134e-06, "loss": 0.3994, "step": 12317 }, { "epoch": 1.4989960450258595, "grad_norm": 3.5872092247009277, "learning_rate": 3.0538060866682596e-06, "loss": 0.3033, "step": 12318 }, { "epoch": 1.4991177365378765, "grad_norm": 1.6339712142944336, "learning_rate": 3.052402933594664e-06, "loss": 0.3853, "step": 12319 }, { "epoch": 1.4992394280498935, "grad_norm": 1.9802073240280151, "learning_rate": 3.0510000448915177e-06, "loss": 0.4178, "step": 12320 }, { "epoch": 1.4993611195619105, "grad_norm": 3.277489185333252, "learning_rate": 3.0495974206122015e-06, "loss": 0.3994, "step": 12321 }, { "epoch": 1.4994828110739276, "grad_norm": 1.8114643096923828, "learning_rate": 3.048195060810092e-06, "loss": 0.4076, "step": 12322 }, { "epoch": 1.4996045025859446, "grad_norm": 1.8180392980575562, "learning_rate": 3.04679296553855e-06, "loss": 0.4102, "step": 12323 }, { "epoch": 1.4997261940979616, "grad_norm": 1.731507658958435, "learning_rate": 3.0453911348509246e-06, "loss": 0.4115, "step": 12324 }, { "epoch": 1.4998478856099786, "grad_norm": 1.3817925453186035, "learning_rate": 3.043989568800565e-06, "loss": 0.4276, "step": 12325 }, { "epoch": 1.4999695771219956, "grad_norm": 1.6861214637756348, "learning_rate": 3.0425882674408012e-06, "loss": 0.3839, "step": 12326 }, { "epoch": 1.5000912686340127, "grad_norm": 2.5139353275299072, "learning_rate": 3.041187230824951e-06, "loss": 0.3891, "step": 12327 }, { "epoch": 1.5002129601460297, "grad_norm": 2.5731089115142822, "learning_rate": 3.0397864590063377e-06, "loss": 0.3831, "step": 12328 }, { "epoch": 1.5003346516580467, "grad_norm": 1.5949252843856812, "learning_rate": 3.0383859520382485e-06, "loss": 0.3552, "step": 12329 }, { "epoch": 1.500456343170064, "grad_norm": 1.2993236780166626, "learning_rate": 3.0369857099739884e-06, "loss": 0.3684, "step": 12330 }, { "epoch": 1.500578034682081, "grad_norm": 2.266195058822632, "learning_rate": 3.0355857328668337e-06, "loss": 0.3409, "step": 12331 }, { "epoch": 1.500699726194098, "grad_norm": 1.638208270072937, "learning_rate": 3.034186020770055e-06, "loss": 0.3756, "step": 12332 }, { "epoch": 1.500821417706115, "grad_norm": 2.017385482788086, "learning_rate": 3.032786573736919e-06, "loss": 0.3811, "step": 12333 }, { "epoch": 1.500943109218132, "grad_norm": 1.5251866579055786, "learning_rate": 3.031387391820676e-06, "loss": 0.3349, "step": 12334 }, { "epoch": 1.501064800730149, "grad_norm": 1.678708553314209, "learning_rate": 3.0299884750745635e-06, "loss": 0.4216, "step": 12335 }, { "epoch": 1.5011864922421663, "grad_norm": 1.482534646987915, "learning_rate": 3.0285898235518197e-06, "loss": 0.3626, "step": 12336 }, { "epoch": 1.5013081837541833, "grad_norm": 2.3882389068603516, "learning_rate": 3.027191437305663e-06, "loss": 0.3867, "step": 12337 }, { "epoch": 1.5014298752662003, "grad_norm": 2.533510684967041, "learning_rate": 3.025793316389303e-06, "loss": 0.4303, "step": 12338 }, { "epoch": 1.5015515667782173, "grad_norm": 1.591119408607483, "learning_rate": 3.024395460855948e-06, "loss": 0.3339, "step": 12339 }, { "epoch": 1.5016732582902343, "grad_norm": 1.8293664455413818, "learning_rate": 3.022997870758779e-06, "loss": 0.3485, "step": 12340 }, { "epoch": 1.5017949498022514, "grad_norm": 2.356353282928467, "learning_rate": 3.021600546150986e-06, "loss": 0.3984, "step": 12341 }, { "epoch": 1.5019166413142684, "grad_norm": 1.9324548244476318, "learning_rate": 3.0202034870857354e-06, "loss": 0.3387, "step": 12342 }, { "epoch": 1.5020383328262854, "grad_norm": 1.3541065454483032, "learning_rate": 3.0188066936161875e-06, "loss": 0.3919, "step": 12343 }, { "epoch": 1.5021600243383024, "grad_norm": 2.017469882965088, "learning_rate": 3.017410165795498e-06, "loss": 0.3456, "step": 12344 }, { "epoch": 1.5022817158503194, "grad_norm": 1.600197672843933, "learning_rate": 3.0160139036768045e-06, "loss": 0.3802, "step": 12345 }, { "epoch": 1.5024034073623365, "grad_norm": 1.4959287643432617, "learning_rate": 3.0146179073132355e-06, "loss": 0.3474, "step": 12346 }, { "epoch": 1.5025250988743535, "grad_norm": 1.5233023166656494, "learning_rate": 3.0132221767579164e-06, "loss": 0.3382, "step": 12347 }, { "epoch": 1.5026467903863705, "grad_norm": 3.6474270820617676, "learning_rate": 3.0118267120639557e-06, "loss": 0.4429, "step": 12348 }, { "epoch": 1.5027684818983875, "grad_norm": 1.9193079471588135, "learning_rate": 3.010431513284451e-06, "loss": 0.3909, "step": 12349 }, { "epoch": 1.5028901734104045, "grad_norm": 1.6091670989990234, "learning_rate": 3.009036580472496e-06, "loss": 0.3952, "step": 12350 }, { "epoch": 1.5030118649224216, "grad_norm": 1.455883502960205, "learning_rate": 3.0076419136811717e-06, "loss": 0.3619, "step": 12351 }, { "epoch": 1.5031335564344386, "grad_norm": 1.6678181886672974, "learning_rate": 3.006247512963545e-06, "loss": 0.3523, "step": 12352 }, { "epoch": 1.5032552479464556, "grad_norm": 2.2384750843048096, "learning_rate": 3.0048533783726774e-06, "loss": 0.4137, "step": 12353 }, { "epoch": 1.5033769394584726, "grad_norm": 1.8170641660690308, "learning_rate": 3.0034595099616137e-06, "loss": 0.3231, "step": 12354 }, { "epoch": 1.5034986309704899, "grad_norm": 1.6375850439071655, "learning_rate": 3.0020659077834014e-06, "loss": 0.3981, "step": 12355 }, { "epoch": 1.5036203224825069, "grad_norm": 1.9837037324905396, "learning_rate": 3.000672571891067e-06, "loss": 0.4079, "step": 12356 }, { "epoch": 1.503742013994524, "grad_norm": 1.7627718448638916, "learning_rate": 2.999279502337624e-06, "loss": 0.3472, "step": 12357 }, { "epoch": 1.503863705506541, "grad_norm": 1.6974141597747803, "learning_rate": 2.9978866991760912e-06, "loss": 0.3918, "step": 12358 }, { "epoch": 1.503985397018558, "grad_norm": 1.7849807739257812, "learning_rate": 2.99649416245946e-06, "loss": 0.3682, "step": 12359 }, { "epoch": 1.504107088530575, "grad_norm": 2.7937188148498535, "learning_rate": 2.995101892240725e-06, "loss": 0.37, "step": 12360 }, { "epoch": 1.5042287800425922, "grad_norm": 2.260521650314331, "learning_rate": 2.993709888572862e-06, "loss": 0.4007, "step": 12361 }, { "epoch": 1.5043504715546092, "grad_norm": 2.6758663654327393, "learning_rate": 2.9923181515088407e-06, "loss": 0.3588, "step": 12362 }, { "epoch": 1.5044721630666262, "grad_norm": 1.8291544914245605, "learning_rate": 2.990926681101619e-06, "loss": 0.3485, "step": 12363 }, { "epoch": 1.5045938545786433, "grad_norm": 1.969672679901123, "learning_rate": 2.989535477404144e-06, "loss": 0.4138, "step": 12364 }, { "epoch": 1.5047155460906603, "grad_norm": 2.022167921066284, "learning_rate": 2.9881445404693533e-06, "loss": 0.3373, "step": 12365 }, { "epoch": 1.5048372376026773, "grad_norm": 1.8693008422851562, "learning_rate": 2.9867538703501788e-06, "loss": 0.3704, "step": 12366 }, { "epoch": 1.5049589291146943, "grad_norm": 2.33036208152771, "learning_rate": 2.9853634670995323e-06, "loss": 0.3394, "step": 12367 }, { "epoch": 1.5050806206267113, "grad_norm": 2.0149638652801514, "learning_rate": 2.98397333077033e-06, "loss": 0.3277, "step": 12368 }, { "epoch": 1.5052023121387283, "grad_norm": 2.071512460708618, "learning_rate": 2.982583461415464e-06, "loss": 0.2941, "step": 12369 }, { "epoch": 1.5053240036507454, "grad_norm": 1.4121744632720947, "learning_rate": 2.9811938590878197e-06, "loss": 0.3543, "step": 12370 }, { "epoch": 1.5054456951627624, "grad_norm": 3.1035492420196533, "learning_rate": 2.9798045238402794e-06, "loss": 0.4504, "step": 12371 }, { "epoch": 1.5055673866747794, "grad_norm": 1.631083369255066, "learning_rate": 2.9784154557257094e-06, "loss": 0.3375, "step": 12372 }, { "epoch": 1.5056890781867964, "grad_norm": 1.757651686668396, "learning_rate": 2.977026654796962e-06, "loss": 0.3235, "step": 12373 }, { "epoch": 1.5058107696988134, "grad_norm": 1.765066146850586, "learning_rate": 2.975638121106892e-06, "loss": 0.3623, "step": 12374 }, { "epoch": 1.5059324612108305, "grad_norm": 2.4763071537017822, "learning_rate": 2.9742498547083254e-06, "loss": 0.4339, "step": 12375 }, { "epoch": 1.5060541527228475, "grad_norm": 1.6039776802062988, "learning_rate": 2.9728618556540976e-06, "loss": 0.3895, "step": 12376 }, { "epoch": 1.5061758442348645, "grad_norm": 1.8766047954559326, "learning_rate": 2.9714741239970223e-06, "loss": 0.3891, "step": 12377 }, { "epoch": 1.5062975357468815, "grad_norm": 3.1880428791046143, "learning_rate": 2.9700866597899016e-06, "loss": 0.3314, "step": 12378 }, { "epoch": 1.5064192272588985, "grad_norm": 1.4541547298431396, "learning_rate": 2.968699463085537e-06, "loss": 0.3526, "step": 12379 }, { "epoch": 1.5065409187709158, "grad_norm": 1.342864990234375, "learning_rate": 2.9673125339367113e-06, "loss": 0.3583, "step": 12380 }, { "epoch": 1.5066626102829328, "grad_norm": 1.548964262008667, "learning_rate": 2.9659258723961993e-06, "loss": 0.358, "step": 12381 }, { "epoch": 1.5067843017949498, "grad_norm": 2.163334846496582, "learning_rate": 2.9645394785167693e-06, "loss": 0.4127, "step": 12382 }, { "epoch": 1.5069059933069668, "grad_norm": 2.184917449951172, "learning_rate": 2.963153352351175e-06, "loss": 0.3757, "step": 12383 }, { "epoch": 1.5070276848189839, "grad_norm": 1.9177147150039673, "learning_rate": 2.961767493952158e-06, "loss": 0.3478, "step": 12384 }, { "epoch": 1.5071493763310009, "grad_norm": 1.805364727973938, "learning_rate": 2.960381903372462e-06, "loss": 0.3776, "step": 12385 }, { "epoch": 1.507271067843018, "grad_norm": 1.5803080797195435, "learning_rate": 2.9589965806648004e-06, "loss": 0.3481, "step": 12386 }, { "epoch": 1.5073927593550351, "grad_norm": 1.3207484483718872, "learning_rate": 2.957611525881896e-06, "loss": 0.3245, "step": 12387 }, { "epoch": 1.5075144508670522, "grad_norm": 2.636009931564331, "learning_rate": 2.9562267390764497e-06, "loss": 0.4136, "step": 12388 }, { "epoch": 1.5076361423790692, "grad_norm": 1.4088382720947266, "learning_rate": 2.9548422203011527e-06, "loss": 0.3853, "step": 12389 }, { "epoch": 1.5077578338910862, "grad_norm": 2.709378957748413, "learning_rate": 2.9534579696086953e-06, "loss": 0.3597, "step": 12390 }, { "epoch": 1.5078795254031032, "grad_norm": 1.9916608333587646, "learning_rate": 2.9520739870517468e-06, "loss": 0.3141, "step": 12391 }, { "epoch": 1.5080012169151202, "grad_norm": 3.2743358612060547, "learning_rate": 2.950690272682969e-06, "loss": 0.366, "step": 12392 }, { "epoch": 1.5081229084271373, "grad_norm": 1.5566741228103638, "learning_rate": 2.9493068265550207e-06, "loss": 0.3648, "step": 12393 }, { "epoch": 1.5082445999391543, "grad_norm": 2.2265734672546387, "learning_rate": 2.947923648720542e-06, "loss": 0.3565, "step": 12394 }, { "epoch": 1.5083662914511713, "grad_norm": 1.8998069763183594, "learning_rate": 2.946540739232162e-06, "loss": 0.3338, "step": 12395 }, { "epoch": 1.5084879829631883, "grad_norm": 1.8418159484863281, "learning_rate": 2.94515809814251e-06, "loss": 0.3828, "step": 12396 }, { "epoch": 1.5086096744752053, "grad_norm": 2.126296043395996, "learning_rate": 2.9437757255041955e-06, "loss": 0.3855, "step": 12397 }, { "epoch": 1.5087313659872224, "grad_norm": 4.020832061767578, "learning_rate": 2.94239362136982e-06, "loss": 0.4502, "step": 12398 }, { "epoch": 1.5088530574992394, "grad_norm": 1.7214140892028809, "learning_rate": 2.941011785791975e-06, "loss": 0.3744, "step": 12399 }, { "epoch": 1.5089747490112564, "grad_norm": 1.624267816543579, "learning_rate": 2.9396302188232405e-06, "loss": 0.3765, "step": 12400 }, { "epoch": 1.5090964405232734, "grad_norm": 2.235553503036499, "learning_rate": 2.9382489205161926e-06, "loss": 0.3796, "step": 12401 }, { "epoch": 1.5092181320352904, "grad_norm": 1.3899976015090942, "learning_rate": 2.9368678909233917e-06, "loss": 0.3368, "step": 12402 }, { "epoch": 1.5093398235473074, "grad_norm": 1.6225268840789795, "learning_rate": 2.935487130097383e-06, "loss": 0.3764, "step": 12403 }, { "epoch": 1.5094615150593245, "grad_norm": 1.8236868381500244, "learning_rate": 2.9341066380907157e-06, "loss": 0.3919, "step": 12404 }, { "epoch": 1.5095832065713415, "grad_norm": 1.3602375984191895, "learning_rate": 2.9327264149559154e-06, "loss": 0.3399, "step": 12405 }, { "epoch": 1.5097048980833587, "grad_norm": 1.8719074726104736, "learning_rate": 2.931346460745501e-06, "loss": 0.3856, "step": 12406 }, { "epoch": 1.5098265895953757, "grad_norm": 2.090416431427002, "learning_rate": 2.929966775511989e-06, "loss": 0.3272, "step": 12407 }, { "epoch": 1.5099482811073928, "grad_norm": 1.2473992109298706, "learning_rate": 2.928587359307874e-06, "loss": 0.3621, "step": 12408 }, { "epoch": 1.5100699726194098, "grad_norm": 2.0594165325164795, "learning_rate": 2.927208212185647e-06, "loss": 0.3726, "step": 12409 }, { "epoch": 1.5101916641314268, "grad_norm": 1.631903052330017, "learning_rate": 2.9258293341977884e-06, "loss": 0.3792, "step": 12410 }, { "epoch": 1.5103133556434438, "grad_norm": 1.4740819931030273, "learning_rate": 2.9244507253967625e-06, "loss": 0.362, "step": 12411 }, { "epoch": 1.510435047155461, "grad_norm": 2.6567625999450684, "learning_rate": 2.9230723858350353e-06, "loss": 0.4339, "step": 12412 }, { "epoch": 1.510556738667478, "grad_norm": 1.7639751434326172, "learning_rate": 2.9216943155650524e-06, "loss": 0.3229, "step": 12413 }, { "epoch": 1.510678430179495, "grad_norm": 1.784854531288147, "learning_rate": 2.9203165146392496e-06, "loss": 0.3856, "step": 12414 }, { "epoch": 1.5108001216915121, "grad_norm": 1.7744238376617432, "learning_rate": 2.9189389831100603e-06, "loss": 0.3556, "step": 12415 }, { "epoch": 1.5109218132035291, "grad_norm": 1.7978202104568481, "learning_rate": 2.917561721029899e-06, "loss": 0.3962, "step": 12416 }, { "epoch": 1.5110435047155462, "grad_norm": 1.4225305318832397, "learning_rate": 2.9161847284511715e-06, "loss": 0.357, "step": 12417 }, { "epoch": 1.5111651962275632, "grad_norm": 1.4157168865203857, "learning_rate": 2.9148080054262805e-06, "loss": 0.3326, "step": 12418 }, { "epoch": 1.5112868877395802, "grad_norm": 1.620936632156372, "learning_rate": 2.9134315520076072e-06, "loss": 0.3285, "step": 12419 }, { "epoch": 1.5114085792515972, "grad_norm": 2.4878740310668945, "learning_rate": 2.9120553682475394e-06, "loss": 0.4731, "step": 12420 }, { "epoch": 1.5115302707636142, "grad_norm": 1.468770146369934, "learning_rate": 2.9106794541984316e-06, "loss": 0.3874, "step": 12421 }, { "epoch": 1.5116519622756313, "grad_norm": 1.8907181024551392, "learning_rate": 2.9093038099126416e-06, "loss": 0.3466, "step": 12422 }, { "epoch": 1.5117736537876483, "grad_norm": 2.9018452167510986, "learning_rate": 2.907928435442522e-06, "loss": 0.3131, "step": 12423 }, { "epoch": 1.5118953452996653, "grad_norm": 1.9180059432983398, "learning_rate": 2.9065533308404046e-06, "loss": 0.3924, "step": 12424 }, { "epoch": 1.5120170368116823, "grad_norm": 3.1403298377990723, "learning_rate": 2.9051784961586138e-06, "loss": 0.367, "step": 12425 }, { "epoch": 1.5121387283236993, "grad_norm": 1.482758641242981, "learning_rate": 2.903803931449469e-06, "loss": 0.3428, "step": 12426 }, { "epoch": 1.5122604198357164, "grad_norm": 1.934027075767517, "learning_rate": 2.902429636765269e-06, "loss": 0.3327, "step": 12427 }, { "epoch": 1.5123821113477334, "grad_norm": 1.9481582641601562, "learning_rate": 2.9010556121583176e-06, "loss": 0.3512, "step": 12428 }, { "epoch": 1.5125038028597504, "grad_norm": 1.6440929174423218, "learning_rate": 2.8996818576808926e-06, "loss": 0.3366, "step": 12429 }, { "epoch": 1.5126254943717674, "grad_norm": 1.5502510070800781, "learning_rate": 2.8983083733852666e-06, "loss": 0.3289, "step": 12430 }, { "epoch": 1.5127471858837847, "grad_norm": 1.5090821981430054, "learning_rate": 2.896935159323714e-06, "loss": 0.3845, "step": 12431 }, { "epoch": 1.5128688773958017, "grad_norm": 2.690147638320923, "learning_rate": 2.8955622155484777e-06, "loss": 0.3743, "step": 12432 }, { "epoch": 1.5129905689078187, "grad_norm": 1.9491065740585327, "learning_rate": 2.8941895421118004e-06, "loss": 0.3994, "step": 12433 }, { "epoch": 1.5131122604198357, "grad_norm": 1.8123835325241089, "learning_rate": 2.8928171390659234e-06, "loss": 0.3529, "step": 12434 }, { "epoch": 1.5132339519318527, "grad_norm": 2.0912837982177734, "learning_rate": 2.891445006463062e-06, "loss": 0.3105, "step": 12435 }, { "epoch": 1.5133556434438697, "grad_norm": 1.463146448135376, "learning_rate": 2.8900731443554354e-06, "loss": 0.3564, "step": 12436 }, { "epoch": 1.513477334955887, "grad_norm": 1.616438865661621, "learning_rate": 2.8887015527952412e-06, "loss": 0.3844, "step": 12437 }, { "epoch": 1.513599026467904, "grad_norm": 1.9377530813217163, "learning_rate": 2.887330231834671e-06, "loss": 0.4028, "step": 12438 }, { "epoch": 1.513720717979921, "grad_norm": 2.1920993328094482, "learning_rate": 2.8859591815259092e-06, "loss": 0.3064, "step": 12439 }, { "epoch": 1.513842409491938, "grad_norm": 1.726955771446228, "learning_rate": 2.8845884019211268e-06, "loss": 0.3637, "step": 12440 }, { "epoch": 1.513964101003955, "grad_norm": 2.236267328262329, "learning_rate": 2.883217893072481e-06, "loss": 0.3733, "step": 12441 }, { "epoch": 1.514085792515972, "grad_norm": 3.0263047218322754, "learning_rate": 2.8818476550321282e-06, "loss": 0.3966, "step": 12442 }, { "epoch": 1.514207484027989, "grad_norm": 2.3909547328948975, "learning_rate": 2.8804776878522056e-06, "loss": 0.3394, "step": 12443 }, { "epoch": 1.5143291755400061, "grad_norm": 1.9603559970855713, "learning_rate": 2.879107991584843e-06, "loss": 0.3672, "step": 12444 }, { "epoch": 1.5144508670520231, "grad_norm": 2.0302340984344482, "learning_rate": 2.8777385662821623e-06, "loss": 0.3737, "step": 12445 }, { "epoch": 1.5145725585640402, "grad_norm": 2.8463363647460938, "learning_rate": 2.8763694119962672e-06, "loss": 0.3996, "step": 12446 }, { "epoch": 1.5146942500760572, "grad_norm": 1.9522300958633423, "learning_rate": 2.875000528779265e-06, "loss": 0.2895, "step": 12447 }, { "epoch": 1.5148159415880742, "grad_norm": 1.9654104709625244, "learning_rate": 2.8736319166832393e-06, "loss": 0.4268, "step": 12448 }, { "epoch": 1.5149376331000912, "grad_norm": 1.757759928703308, "learning_rate": 2.872263575760268e-06, "loss": 0.4314, "step": 12449 }, { "epoch": 1.5150593246121082, "grad_norm": 3.167323350906372, "learning_rate": 2.8708955060624245e-06, "loss": 0.3707, "step": 12450 }, { "epoch": 1.5151810161241253, "grad_norm": 2.120405912399292, "learning_rate": 2.8695277076417626e-06, "loss": 0.3659, "step": 12451 }, { "epoch": 1.5153027076361423, "grad_norm": 3.1846115589141846, "learning_rate": 2.8681601805503278e-06, "loss": 0.312, "step": 12452 }, { "epoch": 1.5154243991481593, "grad_norm": 1.6613367795944214, "learning_rate": 2.8667929248401626e-06, "loss": 0.3389, "step": 12453 }, { "epoch": 1.5155460906601763, "grad_norm": 2.0838582515716553, "learning_rate": 2.865425940563293e-06, "loss": 0.3417, "step": 12454 }, { "epoch": 1.5156677821721933, "grad_norm": 1.655215859413147, "learning_rate": 2.8640592277717337e-06, "loss": 0.3561, "step": 12455 }, { "epoch": 1.5157894736842106, "grad_norm": 2.4793524742126465, "learning_rate": 2.8626927865174914e-06, "loss": 0.2821, "step": 12456 }, { "epoch": 1.5159111651962276, "grad_norm": 1.7385034561157227, "learning_rate": 2.8613266168525577e-06, "loss": 0.3545, "step": 12457 }, { "epoch": 1.5160328567082446, "grad_norm": 1.9146374464035034, "learning_rate": 2.859960718828927e-06, "loss": 0.3674, "step": 12458 }, { "epoch": 1.5161545482202616, "grad_norm": 3.5179266929626465, "learning_rate": 2.8585950924985695e-06, "loss": 0.4101, "step": 12459 }, { "epoch": 1.5162762397322787, "grad_norm": 3.0011661052703857, "learning_rate": 2.8572297379134472e-06, "loss": 0.4241, "step": 12460 }, { "epoch": 1.5163979312442957, "grad_norm": 3.7639846801757812, "learning_rate": 2.855864655125522e-06, "loss": 0.4429, "step": 12461 }, { "epoch": 1.516519622756313, "grad_norm": 2.267141103744507, "learning_rate": 2.8544998441867333e-06, "loss": 0.2562, "step": 12462 }, { "epoch": 1.51664131426833, "grad_norm": 1.5660878419876099, "learning_rate": 2.853135305149014e-06, "loss": 0.3535, "step": 12463 }, { "epoch": 1.516763005780347, "grad_norm": 1.9517383575439453, "learning_rate": 2.8517710380642916e-06, "loss": 0.3922, "step": 12464 }, { "epoch": 1.516884697292364, "grad_norm": 1.3870606422424316, "learning_rate": 2.850407042984478e-06, "loss": 0.3472, "step": 12465 }, { "epoch": 1.517006388804381, "grad_norm": 2.4180305004119873, "learning_rate": 2.849043319961474e-06, "loss": 0.3357, "step": 12466 }, { "epoch": 1.517128080316398, "grad_norm": 2.154930591583252, "learning_rate": 2.8476798690471742e-06, "loss": 0.3861, "step": 12467 }, { "epoch": 1.517249771828415, "grad_norm": 1.5330091714859009, "learning_rate": 2.846316690293457e-06, "loss": 0.3422, "step": 12468 }, { "epoch": 1.517371463340432, "grad_norm": 1.9610263109207153, "learning_rate": 2.8449537837522e-06, "loss": 0.3979, "step": 12469 }, { "epoch": 1.517493154852449, "grad_norm": 1.2669917345046997, "learning_rate": 2.843591149475261e-06, "loss": 0.3288, "step": 12470 }, { "epoch": 1.517614846364466, "grad_norm": 1.6009371280670166, "learning_rate": 2.8422287875144895e-06, "loss": 0.4017, "step": 12471 }, { "epoch": 1.517736537876483, "grad_norm": 2.4372897148132324, "learning_rate": 2.8408666979217315e-06, "loss": 0.4656, "step": 12472 }, { "epoch": 1.5178582293885001, "grad_norm": 2.3142457008361816, "learning_rate": 2.8395048807488136e-06, "loss": 0.4048, "step": 12473 }, { "epoch": 1.5179799209005171, "grad_norm": 1.6589879989624023, "learning_rate": 2.838143336047554e-06, "loss": 0.3546, "step": 12474 }, { "epoch": 1.5181016124125342, "grad_norm": 2.1142783164978027, "learning_rate": 2.8367820638697672e-06, "loss": 0.2929, "step": 12475 }, { "epoch": 1.5182233039245512, "grad_norm": 2.1573996543884277, "learning_rate": 2.8354210642672497e-06, "loss": 0.3343, "step": 12476 }, { "epoch": 1.5183449954365682, "grad_norm": 2.197007179260254, "learning_rate": 2.8340603372917907e-06, "loss": 0.3919, "step": 12477 }, { "epoch": 1.5184666869485852, "grad_norm": 1.6790157556533813, "learning_rate": 2.832699882995169e-06, "loss": 0.3679, "step": 12478 }, { "epoch": 1.5185883784606022, "grad_norm": 2.010957956314087, "learning_rate": 2.8313397014291486e-06, "loss": 0.4218, "step": 12479 }, { "epoch": 1.5187100699726193, "grad_norm": 2.464397668838501, "learning_rate": 2.8299797926454954e-06, "loss": 0.449, "step": 12480 }, { "epoch": 1.5188317614846365, "grad_norm": 1.7504805326461792, "learning_rate": 2.8286201566959504e-06, "loss": 0.3144, "step": 12481 }, { "epoch": 1.5189534529966535, "grad_norm": 1.7859468460083008, "learning_rate": 2.8272607936322504e-06, "loss": 0.3881, "step": 12482 }, { "epoch": 1.5190751445086705, "grad_norm": 1.5858334302902222, "learning_rate": 2.8259017035061266e-06, "loss": 0.3449, "step": 12483 }, { "epoch": 1.5191968360206876, "grad_norm": 3.0674517154693604, "learning_rate": 2.8245428863692925e-06, "loss": 0.3004, "step": 12484 }, { "epoch": 1.5193185275327046, "grad_norm": 2.073861598968506, "learning_rate": 2.8231843422734507e-06, "loss": 0.4116, "step": 12485 }, { "epoch": 1.5194402190447216, "grad_norm": 1.8102351427078247, "learning_rate": 2.821826071270304e-06, "loss": 0.3684, "step": 12486 }, { "epoch": 1.5195619105567386, "grad_norm": 2.0589699745178223, "learning_rate": 2.8204680734115285e-06, "loss": 0.3298, "step": 12487 }, { "epoch": 1.5196836020687559, "grad_norm": 1.4765851497650146, "learning_rate": 2.819110348748808e-06, "loss": 0.3685, "step": 12488 }, { "epoch": 1.5198052935807729, "grad_norm": 1.4881231784820557, "learning_rate": 2.817752897333805e-06, "loss": 0.3507, "step": 12489 }, { "epoch": 1.51992698509279, "grad_norm": 2.6291379928588867, "learning_rate": 2.8163957192181644e-06, "loss": 0.3788, "step": 12490 }, { "epoch": 1.520048676604807, "grad_norm": 1.3584263324737549, "learning_rate": 2.81503881445354e-06, "loss": 0.4073, "step": 12491 }, { "epoch": 1.520170368116824, "grad_norm": 1.951324224472046, "learning_rate": 2.8136821830915593e-06, "loss": 0.4247, "step": 12492 }, { "epoch": 1.520292059628841, "grad_norm": 1.4104790687561035, "learning_rate": 2.812325825183845e-06, "loss": 0.3687, "step": 12493 }, { "epoch": 1.520413751140858, "grad_norm": 2.132877826690674, "learning_rate": 2.8109697407820124e-06, "loss": 0.3875, "step": 12494 }, { "epoch": 1.520535442652875, "grad_norm": 2.3127293586730957, "learning_rate": 2.809613929937659e-06, "loss": 0.3874, "step": 12495 }, { "epoch": 1.520657134164892, "grad_norm": 1.6824495792388916, "learning_rate": 2.8082583927023823e-06, "loss": 0.395, "step": 12496 }, { "epoch": 1.520778825676909, "grad_norm": 1.815323829650879, "learning_rate": 2.8069031291277592e-06, "loss": 0.3297, "step": 12497 }, { "epoch": 1.520900517188926, "grad_norm": 1.9569213390350342, "learning_rate": 2.8055481392653584e-06, "loss": 0.3321, "step": 12498 }, { "epoch": 1.521022208700943, "grad_norm": 1.4917882680892944, "learning_rate": 2.8041934231667445e-06, "loss": 0.3932, "step": 12499 }, { "epoch": 1.52114390021296, "grad_norm": 1.4959702491760254, "learning_rate": 2.802838980883467e-06, "loss": 0.3731, "step": 12500 }, { "epoch": 1.521265591724977, "grad_norm": 2.0857603549957275, "learning_rate": 2.8014848124670626e-06, "loss": 0.4038, "step": 12501 }, { "epoch": 1.5213872832369941, "grad_norm": 2.008976697921753, "learning_rate": 2.8001309179690607e-06, "loss": 0.4068, "step": 12502 }, { "epoch": 1.5215089747490111, "grad_norm": 2.03781795501709, "learning_rate": 2.798777297440979e-06, "loss": 0.3361, "step": 12503 }, { "epoch": 1.5216306662610282, "grad_norm": 1.6787981986999512, "learning_rate": 2.797423950934328e-06, "loss": 0.3921, "step": 12504 }, { "epoch": 1.5217523577730452, "grad_norm": 3.9700543880462646, "learning_rate": 2.796070878500605e-06, "loss": 0.3056, "step": 12505 }, { "epoch": 1.5218740492850622, "grad_norm": 1.6673107147216797, "learning_rate": 2.794718080191293e-06, "loss": 0.3423, "step": 12506 }, { "epoch": 1.5219957407970794, "grad_norm": 1.9476972818374634, "learning_rate": 2.7933655560578767e-06, "loss": 0.2832, "step": 12507 }, { "epoch": 1.5221174323090965, "grad_norm": 1.8355255126953125, "learning_rate": 2.792013306151816e-06, "loss": 0.3628, "step": 12508 }, { "epoch": 1.5222391238211135, "grad_norm": 1.585462212562561, "learning_rate": 2.7906613305245666e-06, "loss": 0.3607, "step": 12509 }, { "epoch": 1.5223608153331305, "grad_norm": 1.41182541847229, "learning_rate": 2.789309629227579e-06, "loss": 0.346, "step": 12510 }, { "epoch": 1.5224825068451475, "grad_norm": 2.200190544128418, "learning_rate": 2.7879582023122854e-06, "loss": 0.4043, "step": 12511 }, { "epoch": 1.5226041983571645, "grad_norm": 1.837247371673584, "learning_rate": 2.7866070498301103e-06, "loss": 0.392, "step": 12512 }, { "epoch": 1.5227258898691818, "grad_norm": 1.808407187461853, "learning_rate": 2.785256171832468e-06, "loss": 0.4363, "step": 12513 }, { "epoch": 1.5228475813811988, "grad_norm": 2.131739377975464, "learning_rate": 2.783905568370758e-06, "loss": 0.3802, "step": 12514 }, { "epoch": 1.5229692728932158, "grad_norm": 1.746535062789917, "learning_rate": 2.782555239496382e-06, "loss": 0.3729, "step": 12515 }, { "epoch": 1.5230909644052328, "grad_norm": 1.4207675457000732, "learning_rate": 2.781205185260717e-06, "loss": 0.3262, "step": 12516 }, { "epoch": 1.5232126559172499, "grad_norm": 2.992722749710083, "learning_rate": 2.779855405715135e-06, "loss": 0.4238, "step": 12517 }, { "epoch": 1.5233343474292669, "grad_norm": 3.160243272781372, "learning_rate": 2.7785059009110025e-06, "loss": 0.4457, "step": 12518 }, { "epoch": 1.523456038941284, "grad_norm": 1.7625850439071655, "learning_rate": 2.7771566708996676e-06, "loss": 0.3357, "step": 12519 }, { "epoch": 1.523577730453301, "grad_norm": 2.4827253818511963, "learning_rate": 2.775807715732469e-06, "loss": 0.4042, "step": 12520 }, { "epoch": 1.523699421965318, "grad_norm": 2.242046594619751, "learning_rate": 2.7744590354607436e-06, "loss": 0.392, "step": 12521 }, { "epoch": 1.523821113477335, "grad_norm": 1.9286634922027588, "learning_rate": 2.7731106301358077e-06, "loss": 0.3805, "step": 12522 }, { "epoch": 1.523942804989352, "grad_norm": 3.6629810333251953, "learning_rate": 2.771762499808971e-06, "loss": 0.4606, "step": 12523 }, { "epoch": 1.524064496501369, "grad_norm": 2.2985353469848633, "learning_rate": 2.7704146445315326e-06, "loss": 0.3612, "step": 12524 }, { "epoch": 1.524186188013386, "grad_norm": 1.843945026397705, "learning_rate": 2.769067064354779e-06, "loss": 0.3587, "step": 12525 }, { "epoch": 1.524307879525403, "grad_norm": 1.5582345724105835, "learning_rate": 2.767719759329993e-06, "loss": 0.3748, "step": 12526 }, { "epoch": 1.52442957103742, "grad_norm": 1.6034371852874756, "learning_rate": 2.766372729508441e-06, "loss": 0.3417, "step": 12527 }, { "epoch": 1.524551262549437, "grad_norm": 1.9791405200958252, "learning_rate": 2.7650259749413765e-06, "loss": 0.4141, "step": 12528 }, { "epoch": 1.524672954061454, "grad_norm": 1.4464818239212036, "learning_rate": 2.7636794956800516e-06, "loss": 0.322, "step": 12529 }, { "epoch": 1.524794645573471, "grad_norm": 1.4464612007141113, "learning_rate": 2.7623332917757005e-06, "loss": 0.3524, "step": 12530 }, { "epoch": 1.5249163370854881, "grad_norm": 3.289478302001953, "learning_rate": 2.7609873632795458e-06, "loss": 0.3381, "step": 12531 }, { "epoch": 1.5250380285975054, "grad_norm": 1.9363148212432861, "learning_rate": 2.7596417102428085e-06, "loss": 0.377, "step": 12532 }, { "epoch": 1.5251597201095224, "grad_norm": 1.9632848501205444, "learning_rate": 2.7582963327166913e-06, "loss": 0.3776, "step": 12533 }, { "epoch": 1.5252814116215394, "grad_norm": 2.836686134338379, "learning_rate": 2.756951230752385e-06, "loss": 0.3297, "step": 12534 }, { "epoch": 1.5254031031335564, "grad_norm": 1.3542596101760864, "learning_rate": 2.7556064044010822e-06, "loss": 0.3276, "step": 12535 }, { "epoch": 1.5255247946455734, "grad_norm": 1.9509390592575073, "learning_rate": 2.7542618537139455e-06, "loss": 0.3747, "step": 12536 }, { "epoch": 1.5256464861575905, "grad_norm": 3.561317205429077, "learning_rate": 2.7529175787421457e-06, "loss": 0.4237, "step": 12537 }, { "epoch": 1.5257681776696077, "grad_norm": 1.8905047178268433, "learning_rate": 2.751573579536834e-06, "loss": 0.3218, "step": 12538 }, { "epoch": 1.5258898691816247, "grad_norm": 2.1115753650665283, "learning_rate": 2.750229856149146e-06, "loss": 0.3886, "step": 12539 }, { "epoch": 1.5260115606936417, "grad_norm": 1.7737010717391968, "learning_rate": 2.7488864086302225e-06, "loss": 0.3517, "step": 12540 }, { "epoch": 1.5261332522056588, "grad_norm": 2.041555643081665, "learning_rate": 2.7475432370311793e-06, "loss": 0.3652, "step": 12541 }, { "epoch": 1.5262549437176758, "grad_norm": 3.035809278488159, "learning_rate": 2.7462003414031245e-06, "loss": 0.389, "step": 12542 }, { "epoch": 1.5263766352296928, "grad_norm": 1.6282237768173218, "learning_rate": 2.744857721797165e-06, "loss": 0.4061, "step": 12543 }, { "epoch": 1.5264983267417098, "grad_norm": 3.464914321899414, "learning_rate": 2.7435153782643863e-06, "loss": 0.4089, "step": 12544 }, { "epoch": 1.5266200182537268, "grad_norm": 3.303316354751587, "learning_rate": 2.7421733108558647e-06, "loss": 0.3361, "step": 12545 }, { "epoch": 1.5267417097657439, "grad_norm": 2.564096689224243, "learning_rate": 2.7408315196226774e-06, "loss": 0.4571, "step": 12546 }, { "epoch": 1.5268634012777609, "grad_norm": 2.6128838062286377, "learning_rate": 2.73949000461587e-06, "loss": 0.3588, "step": 12547 }, { "epoch": 1.526985092789778, "grad_norm": 1.7256296873092651, "learning_rate": 2.7381487658865003e-06, "loss": 0.3735, "step": 12548 }, { "epoch": 1.527106784301795, "grad_norm": 2.496760368347168, "learning_rate": 2.7368078034856004e-06, "loss": 0.3611, "step": 12549 }, { "epoch": 1.527228475813812, "grad_norm": 3.9976139068603516, "learning_rate": 2.7354671174641943e-06, "loss": 0.4633, "step": 12550 }, { "epoch": 1.527350167325829, "grad_norm": 2.3817555904388428, "learning_rate": 2.734126707873306e-06, "loss": 0.3473, "step": 12551 }, { "epoch": 1.527471858837846, "grad_norm": 2.1352765560150146, "learning_rate": 2.7327865747639315e-06, "loss": 0.4786, "step": 12552 }, { "epoch": 1.527593550349863, "grad_norm": 1.520741581916809, "learning_rate": 2.731446718187073e-06, "loss": 0.3758, "step": 12553 }, { "epoch": 1.52771524186188, "grad_norm": 2.741384983062744, "learning_rate": 2.7301071381937115e-06, "loss": 0.3677, "step": 12554 }, { "epoch": 1.527836933373897, "grad_norm": 2.115781307220459, "learning_rate": 2.72876783483482e-06, "loss": 0.3661, "step": 12555 }, { "epoch": 1.527958624885914, "grad_norm": 2.0032315254211426, "learning_rate": 2.7274288081613643e-06, "loss": 0.3689, "step": 12556 }, { "epoch": 1.5280803163979313, "grad_norm": 1.7844867706298828, "learning_rate": 2.7260900582242966e-06, "loss": 0.4048, "step": 12557 }, { "epoch": 1.5282020079099483, "grad_norm": 2.108513593673706, "learning_rate": 2.7247515850745586e-06, "loss": 0.3594, "step": 12558 }, { "epoch": 1.5283236994219653, "grad_norm": 1.8052276372909546, "learning_rate": 2.723413388763082e-06, "loss": 0.3934, "step": 12559 }, { "epoch": 1.5284453909339824, "grad_norm": 3.2891016006469727, "learning_rate": 2.7220754693407834e-06, "loss": 0.3259, "step": 12560 }, { "epoch": 1.5285670824459994, "grad_norm": 2.9912776947021484, "learning_rate": 2.7207378268585817e-06, "loss": 0.4031, "step": 12561 }, { "epoch": 1.5286887739580164, "grad_norm": 1.5246210098266602, "learning_rate": 2.719400461367373e-06, "loss": 0.3852, "step": 12562 }, { "epoch": 1.5288104654700336, "grad_norm": 1.4531372785568237, "learning_rate": 2.7180633729180427e-06, "loss": 0.403, "step": 12563 }, { "epoch": 1.5289321569820506, "grad_norm": 1.4160289764404297, "learning_rate": 2.716726561561478e-06, "loss": 0.3216, "step": 12564 }, { "epoch": 1.5290538484940677, "grad_norm": 2.1517996788024902, "learning_rate": 2.7153900273485424e-06, "loss": 0.3597, "step": 12565 }, { "epoch": 1.5291755400060847, "grad_norm": 1.7108573913574219, "learning_rate": 2.714053770330092e-06, "loss": 0.3644, "step": 12566 }, { "epoch": 1.5292972315181017, "grad_norm": 2.7664637565612793, "learning_rate": 2.7127177905569803e-06, "loss": 0.3552, "step": 12567 }, { "epoch": 1.5294189230301187, "grad_norm": 2.8855767250061035, "learning_rate": 2.711382088080039e-06, "loss": 0.4201, "step": 12568 }, { "epoch": 1.5295406145421357, "grad_norm": 1.5161004066467285, "learning_rate": 2.7100466629500944e-06, "loss": 0.3911, "step": 12569 }, { "epoch": 1.5296623060541528, "grad_norm": 1.2162444591522217, "learning_rate": 2.7087115152179686e-06, "loss": 0.3736, "step": 12570 }, { "epoch": 1.5297839975661698, "grad_norm": 2.8383493423461914, "learning_rate": 2.707376644934456e-06, "loss": 0.3689, "step": 12571 }, { "epoch": 1.5299056890781868, "grad_norm": 3.169105291366577, "learning_rate": 2.7060420521503607e-06, "loss": 0.3228, "step": 12572 }, { "epoch": 1.5300273805902038, "grad_norm": 2.56186842918396, "learning_rate": 2.704707736916462e-06, "loss": 0.3461, "step": 12573 }, { "epoch": 1.5301490721022208, "grad_norm": 2.33776593208313, "learning_rate": 2.7033736992835314e-06, "loss": 0.3976, "step": 12574 }, { "epoch": 1.5302707636142379, "grad_norm": 1.4289809465408325, "learning_rate": 2.7020399393023377e-06, "loss": 0.396, "step": 12575 }, { "epoch": 1.5303924551262549, "grad_norm": 2.5968401432037354, "learning_rate": 2.700706457023631e-06, "loss": 0.3633, "step": 12576 }, { "epoch": 1.530514146638272, "grad_norm": 1.9646767377853394, "learning_rate": 2.6993732524981484e-06, "loss": 0.3955, "step": 12577 }, { "epoch": 1.530635838150289, "grad_norm": 1.4681382179260254, "learning_rate": 2.6980403257766287e-06, "loss": 0.3423, "step": 12578 }, { "epoch": 1.530757529662306, "grad_norm": 1.5288289785385132, "learning_rate": 2.696707676909789e-06, "loss": 0.379, "step": 12579 }, { "epoch": 1.530879221174323, "grad_norm": 1.6512926816940308, "learning_rate": 2.6953753059483357e-06, "loss": 0.4064, "step": 12580 }, { "epoch": 1.53100091268634, "grad_norm": 1.8012267351150513, "learning_rate": 2.6940432129429785e-06, "loss": 0.3575, "step": 12581 }, { "epoch": 1.5311226041983572, "grad_norm": 1.7500895261764526, "learning_rate": 2.692711397944392e-06, "loss": 0.3951, "step": 12582 }, { "epoch": 1.5312442957103742, "grad_norm": 1.5793031454086304, "learning_rate": 2.691379861003266e-06, "loss": 0.35, "step": 12583 }, { "epoch": 1.5313659872223913, "grad_norm": 1.933671236038208, "learning_rate": 2.690048602170264e-06, "loss": 0.3761, "step": 12584 }, { "epoch": 1.5314876787344083, "grad_norm": 2.2148663997650146, "learning_rate": 2.6887176214960408e-06, "loss": 0.4297, "step": 12585 }, { "epoch": 1.5316093702464253, "grad_norm": 1.6534515619277954, "learning_rate": 2.6873869190312483e-06, "loss": 0.3652, "step": 12586 }, { "epoch": 1.5317310617584423, "grad_norm": 1.443554401397705, "learning_rate": 2.686056494826521e-06, "loss": 0.3685, "step": 12587 }, { "epoch": 1.5318527532704593, "grad_norm": 3.234772205352783, "learning_rate": 2.6847263489324793e-06, "loss": 0.3262, "step": 12588 }, { "epoch": 1.5319744447824766, "grad_norm": 1.585789680480957, "learning_rate": 2.683396481399744e-06, "loss": 0.4032, "step": 12589 }, { "epoch": 1.5320961362944936, "grad_norm": 3.123924732208252, "learning_rate": 2.6820668922789185e-06, "loss": 0.3612, "step": 12590 }, { "epoch": 1.5322178278065106, "grad_norm": 1.779894232749939, "learning_rate": 2.6807375816205904e-06, "loss": 0.3907, "step": 12591 }, { "epoch": 1.5323395193185276, "grad_norm": 3.158414363861084, "learning_rate": 2.679408549475352e-06, "loss": 0.3387, "step": 12592 }, { "epoch": 1.5324612108305447, "grad_norm": 2.2403247356414795, "learning_rate": 2.67807979589377e-06, "loss": 0.4036, "step": 12593 }, { "epoch": 1.5325829023425617, "grad_norm": 2.8860385417938232, "learning_rate": 2.6767513209264084e-06, "loss": 0.3161, "step": 12594 }, { "epoch": 1.5327045938545787, "grad_norm": 2.474834680557251, "learning_rate": 2.675423124623816e-06, "loss": 0.346, "step": 12595 }, { "epoch": 1.5328262853665957, "grad_norm": 1.8335844278335571, "learning_rate": 2.674095207036532e-06, "loss": 0.3623, "step": 12596 }, { "epoch": 1.5329479768786127, "grad_norm": 1.397382140159607, "learning_rate": 2.672767568215093e-06, "loss": 0.3943, "step": 12597 }, { "epoch": 1.5330696683906297, "grad_norm": 1.7127958536148071, "learning_rate": 2.6714402082100143e-06, "loss": 0.3074, "step": 12598 }, { "epoch": 1.5331913599026468, "grad_norm": 1.7539894580841064, "learning_rate": 2.6701131270718016e-06, "loss": 0.3474, "step": 12599 }, { "epoch": 1.5333130514146638, "grad_norm": 1.297405481338501, "learning_rate": 2.6687863248509595e-06, "loss": 0.3126, "step": 12600 }, { "epoch": 1.5334347429266808, "grad_norm": 4.5004963874816895, "learning_rate": 2.6674598015979723e-06, "loss": 0.471, "step": 12601 }, { "epoch": 1.5335564344386978, "grad_norm": 3.0616583824157715, "learning_rate": 2.666133557363315e-06, "loss": 0.406, "step": 12602 }, { "epoch": 1.5336781259507148, "grad_norm": 1.5206066370010376, "learning_rate": 2.664807592197458e-06, "loss": 0.3555, "step": 12603 }, { "epoch": 1.5337998174627319, "grad_norm": 1.8446646928787231, "learning_rate": 2.6634819061508564e-06, "loss": 0.3495, "step": 12604 }, { "epoch": 1.5339215089747489, "grad_norm": 2.185856819152832, "learning_rate": 2.6621564992739533e-06, "loss": 0.421, "step": 12605 }, { "epoch": 1.534043200486766, "grad_norm": 1.5439691543579102, "learning_rate": 2.660831371617184e-06, "loss": 0.3377, "step": 12606 }, { "epoch": 1.534164891998783, "grad_norm": 1.750883936882019, "learning_rate": 2.659506523230969e-06, "loss": 0.3909, "step": 12607 }, { "epoch": 1.5342865835108002, "grad_norm": 2.113356828689575, "learning_rate": 2.658181954165728e-06, "loss": 0.3931, "step": 12608 }, { "epoch": 1.5344082750228172, "grad_norm": 1.461066722869873, "learning_rate": 2.65685766447186e-06, "loss": 0.2957, "step": 12609 }, { "epoch": 1.5345299665348342, "grad_norm": 2.9559671878814697, "learning_rate": 2.6555336541997555e-06, "loss": 0.4491, "step": 12610 }, { "epoch": 1.5346516580468512, "grad_norm": 1.8662328720092773, "learning_rate": 2.6542099233997996e-06, "loss": 0.3755, "step": 12611 }, { "epoch": 1.5347733495588682, "grad_norm": 3.054624080657959, "learning_rate": 2.652886472122359e-06, "loss": 0.3513, "step": 12612 }, { "epoch": 1.5348950410708853, "grad_norm": 1.3648021221160889, "learning_rate": 2.6515633004177977e-06, "loss": 0.3407, "step": 12613 }, { "epoch": 1.5350167325829025, "grad_norm": 1.4538992643356323, "learning_rate": 2.650240408336464e-06, "loss": 0.3808, "step": 12614 }, { "epoch": 1.5351384240949195, "grad_norm": 1.3085802793502808, "learning_rate": 2.648917795928693e-06, "loss": 0.3486, "step": 12615 }, { "epoch": 1.5352601156069365, "grad_norm": 1.4782772064208984, "learning_rate": 2.647595463244821e-06, "loss": 0.362, "step": 12616 }, { "epoch": 1.5353818071189536, "grad_norm": 1.7189334630966187, "learning_rate": 2.6462734103351573e-06, "loss": 0.3146, "step": 12617 }, { "epoch": 1.5355034986309706, "grad_norm": 2.4061315059661865, "learning_rate": 2.6449516372500093e-06, "loss": 0.3933, "step": 12618 }, { "epoch": 1.5356251901429876, "grad_norm": 3.2007806301116943, "learning_rate": 2.6436301440396785e-06, "loss": 0.425, "step": 12619 }, { "epoch": 1.5357468816550046, "grad_norm": 1.963255763053894, "learning_rate": 2.6423089307544447e-06, "loss": 0.4086, "step": 12620 }, { "epoch": 1.5358685731670216, "grad_norm": 1.522980809211731, "learning_rate": 2.6409879974445883e-06, "loss": 0.3698, "step": 12621 }, { "epoch": 1.5359902646790387, "grad_norm": 1.7028958797454834, "learning_rate": 2.6396673441603713e-06, "loss": 0.3353, "step": 12622 }, { "epoch": 1.5361119561910557, "grad_norm": 2.8061985969543457, "learning_rate": 2.6383469709520436e-06, "loss": 0.3911, "step": 12623 }, { "epoch": 1.5362336477030727, "grad_norm": 1.8408160209655762, "learning_rate": 2.637026877869855e-06, "loss": 0.3534, "step": 12624 }, { "epoch": 1.5363553392150897, "grad_norm": 2.483170509338379, "learning_rate": 2.635707064964034e-06, "loss": 0.3697, "step": 12625 }, { "epoch": 1.5364770307271067, "grad_norm": 2.4414095878601074, "learning_rate": 2.634387532284799e-06, "loss": 0.2921, "step": 12626 }, { "epoch": 1.5365987222391237, "grad_norm": 1.7491705417633057, "learning_rate": 2.6330682798823704e-06, "loss": 0.3718, "step": 12627 }, { "epoch": 1.5367204137511408, "grad_norm": 3.8672852516174316, "learning_rate": 2.631749307806937e-06, "loss": 0.3717, "step": 12628 }, { "epoch": 1.5368421052631578, "grad_norm": 1.512840986251831, "learning_rate": 2.630430616108696e-06, "loss": 0.3807, "step": 12629 }, { "epoch": 1.5369637967751748, "grad_norm": 1.7544865608215332, "learning_rate": 2.6291122048378246e-06, "loss": 0.3682, "step": 12630 }, { "epoch": 1.5370854882871918, "grad_norm": 1.583282232284546, "learning_rate": 2.6277940740444874e-06, "loss": 0.433, "step": 12631 }, { "epoch": 1.5372071797992088, "grad_norm": 1.7873797416687012, "learning_rate": 2.626476223778849e-06, "loss": 0.4123, "step": 12632 }, { "epoch": 1.537328871311226, "grad_norm": 1.5935440063476562, "learning_rate": 2.6251586540910523e-06, "loss": 0.3075, "step": 12633 }, { "epoch": 1.537450562823243, "grad_norm": 2.0723776817321777, "learning_rate": 2.623841365031231e-06, "loss": 0.3452, "step": 12634 }, { "epoch": 1.5375722543352601, "grad_norm": 1.8005446195602417, "learning_rate": 2.622524356649516e-06, "loss": 0.3785, "step": 12635 }, { "epoch": 1.5376939458472771, "grad_norm": 1.8446670770645142, "learning_rate": 2.6212076289960207e-06, "loss": 0.4001, "step": 12636 }, { "epoch": 1.5378156373592942, "grad_norm": 2.093743324279785, "learning_rate": 2.619891182120845e-06, "loss": 0.3409, "step": 12637 }, { "epoch": 1.5379373288713112, "grad_norm": 1.894636869430542, "learning_rate": 2.6185750160740885e-06, "loss": 0.4198, "step": 12638 }, { "epoch": 1.5380590203833284, "grad_norm": 2.0566294193267822, "learning_rate": 2.617259130905833e-06, "loss": 0.3987, "step": 12639 }, { "epoch": 1.5381807118953454, "grad_norm": 2.1996231079101562, "learning_rate": 2.615943526666147e-06, "loss": 0.3262, "step": 12640 }, { "epoch": 1.5383024034073625, "grad_norm": 1.6906081438064575, "learning_rate": 2.614628203405094e-06, "loss": 0.2881, "step": 12641 }, { "epoch": 1.5384240949193795, "grad_norm": 2.8557193279266357, "learning_rate": 2.6133131611727225e-06, "loss": 0.4133, "step": 12642 }, { "epoch": 1.5385457864313965, "grad_norm": 2.2630748748779297, "learning_rate": 2.611998400019078e-06, "loss": 0.4035, "step": 12643 }, { "epoch": 1.5386674779434135, "grad_norm": 1.6517478227615356, "learning_rate": 2.6106839199941868e-06, "loss": 0.3366, "step": 12644 }, { "epoch": 1.5387891694554305, "grad_norm": 2.2785139083862305, "learning_rate": 2.609369721148064e-06, "loss": 0.4114, "step": 12645 }, { "epoch": 1.5389108609674476, "grad_norm": 2.9818496704101562, "learning_rate": 2.6080558035307237e-06, "loss": 0.3984, "step": 12646 }, { "epoch": 1.5390325524794646, "grad_norm": 1.423867106437683, "learning_rate": 2.606742167192161e-06, "loss": 0.3766, "step": 12647 }, { "epoch": 1.5391542439914816, "grad_norm": 2.039567708969116, "learning_rate": 2.605428812182359e-06, "loss": 0.3352, "step": 12648 }, { "epoch": 1.5392759355034986, "grad_norm": 1.451438069343567, "learning_rate": 2.6041157385512993e-06, "loss": 0.4101, "step": 12649 }, { "epoch": 1.5393976270155156, "grad_norm": 1.6416186094284058, "learning_rate": 2.6028029463489456e-06, "loss": 0.3564, "step": 12650 }, { "epoch": 1.5395193185275327, "grad_norm": 1.7495027780532837, "learning_rate": 2.60149043562525e-06, "loss": 0.3604, "step": 12651 }, { "epoch": 1.5396410100395497, "grad_norm": 1.9021443128585815, "learning_rate": 2.6001782064301572e-06, "loss": 0.4371, "step": 12652 }, { "epoch": 1.5397627015515667, "grad_norm": 2.226966142654419, "learning_rate": 2.598866258813598e-06, "loss": 0.2856, "step": 12653 }, { "epoch": 1.5398843930635837, "grad_norm": 2.5288314819335938, "learning_rate": 2.597554592825501e-06, "loss": 0.3907, "step": 12654 }, { "epoch": 1.5400060845756007, "grad_norm": 1.9600913524627686, "learning_rate": 2.596243208515773e-06, "loss": 0.3877, "step": 12655 }, { "epoch": 1.5401277760876178, "grad_norm": 2.3895750045776367, "learning_rate": 2.5949321059343123e-06, "loss": 0.3088, "step": 12656 }, { "epoch": 1.5402494675996348, "grad_norm": 1.6667580604553223, "learning_rate": 2.593621285131016e-06, "loss": 0.3901, "step": 12657 }, { "epoch": 1.540371159111652, "grad_norm": 1.8805153369903564, "learning_rate": 2.5923107461557604e-06, "loss": 0.3665, "step": 12658 }, { "epoch": 1.540492850623669, "grad_norm": 2.1743226051330566, "learning_rate": 2.5910004890584116e-06, "loss": 0.4053, "step": 12659 }, { "epoch": 1.540614542135686, "grad_norm": 1.501466155052185, "learning_rate": 2.5896905138888316e-06, "loss": 0.3491, "step": 12660 }, { "epoch": 1.540736233647703, "grad_norm": 1.759179711341858, "learning_rate": 2.5883808206968675e-06, "loss": 0.3437, "step": 12661 }, { "epoch": 1.54085792515972, "grad_norm": 2.846353054046631, "learning_rate": 2.5870714095323536e-06, "loss": 0.3885, "step": 12662 }, { "epoch": 1.540979616671737, "grad_norm": 1.4050909280776978, "learning_rate": 2.5857622804451165e-06, "loss": 0.3338, "step": 12663 }, { "epoch": 1.5411013081837543, "grad_norm": 1.7222799062728882, "learning_rate": 2.584453433484968e-06, "loss": 0.3404, "step": 12664 }, { "epoch": 1.5412229996957714, "grad_norm": 2.3639652729034424, "learning_rate": 2.583144868701719e-06, "loss": 0.3892, "step": 12665 }, { "epoch": 1.5413446912077884, "grad_norm": 1.5261552333831787, "learning_rate": 2.5818365861451598e-06, "loss": 0.3724, "step": 12666 }, { "epoch": 1.5414663827198054, "grad_norm": 2.142681837081909, "learning_rate": 2.580528585865071e-06, "loss": 0.3862, "step": 12667 }, { "epoch": 1.5415880742318224, "grad_norm": 1.5395457744598389, "learning_rate": 2.5792208679112285e-06, "loss": 0.3548, "step": 12668 }, { "epoch": 1.5417097657438394, "grad_norm": 1.441227912902832, "learning_rate": 2.5779134323333934e-06, "loss": 0.317, "step": 12669 }, { "epoch": 1.5418314572558565, "grad_norm": 1.4216769933700562, "learning_rate": 2.5766062791813117e-06, "loss": 0.2936, "step": 12670 }, { "epoch": 1.5419531487678735, "grad_norm": 1.5779895782470703, "learning_rate": 2.575299408504729e-06, "loss": 0.3485, "step": 12671 }, { "epoch": 1.5420748402798905, "grad_norm": 1.695778727531433, "learning_rate": 2.5739928203533695e-06, "loss": 0.3479, "step": 12672 }, { "epoch": 1.5421965317919075, "grad_norm": 2.2750704288482666, "learning_rate": 2.572686514776961e-06, "loss": 0.3086, "step": 12673 }, { "epoch": 1.5423182233039245, "grad_norm": 2.7548768520355225, "learning_rate": 2.5713804918252004e-06, "loss": 0.3797, "step": 12674 }, { "epoch": 1.5424399148159416, "grad_norm": 2.270379066467285, "learning_rate": 2.5700747515477853e-06, "loss": 0.2839, "step": 12675 }, { "epoch": 1.5425616063279586, "grad_norm": 3.0020194053649902, "learning_rate": 2.568769293994409e-06, "loss": 0.4161, "step": 12676 }, { "epoch": 1.5426832978399756, "grad_norm": 1.421528697013855, "learning_rate": 2.567464119214742e-06, "loss": 0.3568, "step": 12677 }, { "epoch": 1.5428049893519926, "grad_norm": 2.8901584148406982, "learning_rate": 2.566159227258447e-06, "loss": 0.45, "step": 12678 }, { "epoch": 1.5429266808640096, "grad_norm": 2.137016773223877, "learning_rate": 2.564854618175183e-06, "loss": 0.3737, "step": 12679 }, { "epoch": 1.5430483723760267, "grad_norm": 2.089994192123413, "learning_rate": 2.5635502920145885e-06, "loss": 0.4103, "step": 12680 }, { "epoch": 1.5431700638880437, "grad_norm": 2.317518711090088, "learning_rate": 2.5622462488263012e-06, "loss": 0.3185, "step": 12681 }, { "epoch": 1.5432917554000607, "grad_norm": 1.6309014558792114, "learning_rate": 2.560942488659939e-06, "loss": 0.3335, "step": 12682 }, { "epoch": 1.543413446912078, "grad_norm": 2.184443235397339, "learning_rate": 2.5596390115651105e-06, "loss": 0.364, "step": 12683 }, { "epoch": 1.543535138424095, "grad_norm": 2.5433719158172607, "learning_rate": 2.558335817591421e-06, "loss": 0.3728, "step": 12684 }, { "epoch": 1.543656829936112, "grad_norm": 2.0452566146850586, "learning_rate": 2.5570329067884593e-06, "loss": 0.3588, "step": 12685 }, { "epoch": 1.543778521448129, "grad_norm": 1.9246655702590942, "learning_rate": 2.555730279205797e-06, "loss": 0.3765, "step": 12686 }, { "epoch": 1.543900212960146, "grad_norm": 1.7227977514266968, "learning_rate": 2.5544279348930077e-06, "loss": 0.3605, "step": 12687 }, { "epoch": 1.544021904472163, "grad_norm": 2.0520403385162354, "learning_rate": 2.5531258738996454e-06, "loss": 0.3775, "step": 12688 }, { "epoch": 1.54414359598418, "grad_norm": 2.2975685596466064, "learning_rate": 2.5518240962752603e-06, "loss": 0.3495, "step": 12689 }, { "epoch": 1.5442652874961973, "grad_norm": 1.9765340089797974, "learning_rate": 2.550522602069385e-06, "loss": 0.3245, "step": 12690 }, { "epoch": 1.5443869790082143, "grad_norm": 1.8159561157226562, "learning_rate": 2.549221391331542e-06, "loss": 0.4079, "step": 12691 }, { "epoch": 1.5445086705202313, "grad_norm": 1.4668906927108765, "learning_rate": 2.5479204641112496e-06, "loss": 0.3639, "step": 12692 }, { "epoch": 1.5446303620322483, "grad_norm": 1.7074501514434814, "learning_rate": 2.5466198204580083e-06, "loss": 0.3647, "step": 12693 }, { "epoch": 1.5447520535442654, "grad_norm": 2.0606911182403564, "learning_rate": 2.545319460421308e-06, "loss": 0.4033, "step": 12694 }, { "epoch": 1.5448737450562824, "grad_norm": 1.7580406665802002, "learning_rate": 2.5440193840506353e-06, "loss": 0.378, "step": 12695 }, { "epoch": 1.5449954365682994, "grad_norm": 2.8147008419036865, "learning_rate": 2.5427195913954583e-06, "loss": 0.4229, "step": 12696 }, { "epoch": 1.5451171280803164, "grad_norm": 1.6291056871414185, "learning_rate": 2.541420082505237e-06, "loss": 0.3818, "step": 12697 }, { "epoch": 1.5452388195923334, "grad_norm": 1.9070744514465332, "learning_rate": 2.540120857429419e-06, "loss": 0.4015, "step": 12698 }, { "epoch": 1.5453605111043505, "grad_norm": 1.6448923349380493, "learning_rate": 2.538821916217441e-06, "loss": 0.3558, "step": 12699 }, { "epoch": 1.5454822026163675, "grad_norm": 1.9214787483215332, "learning_rate": 2.537523258918736e-06, "loss": 0.4017, "step": 12700 }, { "epoch": 1.5456038941283845, "grad_norm": 2.2004947662353516, "learning_rate": 2.5362248855827175e-06, "loss": 0.3898, "step": 12701 }, { "epoch": 1.5457255856404015, "grad_norm": 2.111344337463379, "learning_rate": 2.5349267962587875e-06, "loss": 0.4226, "step": 12702 }, { "epoch": 1.5458472771524185, "grad_norm": 1.5048394203186035, "learning_rate": 2.5336289909963484e-06, "loss": 0.3805, "step": 12703 }, { "epoch": 1.5459689686644356, "grad_norm": 1.6047000885009766, "learning_rate": 2.532331469844781e-06, "loss": 0.3461, "step": 12704 }, { "epoch": 1.5460906601764526, "grad_norm": 1.5765515565872192, "learning_rate": 2.5310342328534552e-06, "loss": 0.3516, "step": 12705 }, { "epoch": 1.5462123516884696, "grad_norm": 1.9117792844772339, "learning_rate": 2.529737280071739e-06, "loss": 0.3896, "step": 12706 }, { "epoch": 1.5463340432004866, "grad_norm": 1.7108114957809448, "learning_rate": 2.5284406115489835e-06, "loss": 0.4254, "step": 12707 }, { "epoch": 1.5464557347125036, "grad_norm": 2.0911877155303955, "learning_rate": 2.527144227334527e-06, "loss": 0.3505, "step": 12708 }, { "epoch": 1.5465774262245209, "grad_norm": 2.0561294555664062, "learning_rate": 2.5258481274777e-06, "loss": 0.3383, "step": 12709 }, { "epoch": 1.546699117736538, "grad_norm": 3.7373759746551514, "learning_rate": 2.5245523120278205e-06, "loss": 0.3182, "step": 12710 }, { "epoch": 1.546820809248555, "grad_norm": 2.76027512550354, "learning_rate": 2.5232567810342014e-06, "loss": 0.3901, "step": 12711 }, { "epoch": 1.546942500760572, "grad_norm": 1.8573646545410156, "learning_rate": 2.521961534546138e-06, "loss": 0.3712, "step": 12712 }, { "epoch": 1.547064192272589, "grad_norm": 1.6403520107269287, "learning_rate": 2.520666572612913e-06, "loss": 0.3693, "step": 12713 }, { "epoch": 1.547185883784606, "grad_norm": 1.7222304344177246, "learning_rate": 2.5193718952838096e-06, "loss": 0.4221, "step": 12714 }, { "epoch": 1.5473075752966232, "grad_norm": 1.9656339883804321, "learning_rate": 2.5180775026080905e-06, "loss": 0.3913, "step": 12715 }, { "epoch": 1.5474292668086402, "grad_norm": 1.713310956954956, "learning_rate": 2.5167833946350052e-06, "loss": 0.3801, "step": 12716 }, { "epoch": 1.5475509583206573, "grad_norm": 2.168942928314209, "learning_rate": 2.515489571413805e-06, "loss": 0.3898, "step": 12717 }, { "epoch": 1.5476726498326743, "grad_norm": 1.9084094762802124, "learning_rate": 2.5141960329937175e-06, "loss": 0.3706, "step": 12718 }, { "epoch": 1.5477943413446913, "grad_norm": 2.4918160438537598, "learning_rate": 2.512902779423967e-06, "loss": 0.4113, "step": 12719 }, { "epoch": 1.5479160328567083, "grad_norm": 1.8096450567245483, "learning_rate": 2.5116098107537624e-06, "loss": 0.3314, "step": 12720 }, { "epoch": 1.5480377243687253, "grad_norm": 1.514387607574463, "learning_rate": 2.5103171270323023e-06, "loss": 0.4034, "step": 12721 }, { "epoch": 1.5481594158807424, "grad_norm": 1.594966173171997, "learning_rate": 2.509024728308781e-06, "loss": 0.3761, "step": 12722 }, { "epoch": 1.5482811073927594, "grad_norm": 1.6208900213241577, "learning_rate": 2.5077326146323746e-06, "loss": 0.337, "step": 12723 }, { "epoch": 1.5484027989047764, "grad_norm": 2.314403772354126, "learning_rate": 2.5064407860522465e-06, "loss": 0.4299, "step": 12724 }, { "epoch": 1.5485244904167934, "grad_norm": 2.0890610218048096, "learning_rate": 2.5051492426175604e-06, "loss": 0.406, "step": 12725 }, { "epoch": 1.5486461819288104, "grad_norm": 2.2321109771728516, "learning_rate": 2.503857984377459e-06, "loss": 0.397, "step": 12726 }, { "epoch": 1.5487678734408274, "grad_norm": 1.3876774311065674, "learning_rate": 2.5025670113810753e-06, "loss": 0.3795, "step": 12727 }, { "epoch": 1.5488895649528445, "grad_norm": 1.8247785568237305, "learning_rate": 2.5012763236775385e-06, "loss": 0.4279, "step": 12728 }, { "epoch": 1.5490112564648615, "grad_norm": 2.8193259239196777, "learning_rate": 2.4999859213159584e-06, "loss": 0.4488, "step": 12729 }, { "epoch": 1.5491329479768785, "grad_norm": 2.129063367843628, "learning_rate": 2.4986958043454356e-06, "loss": 0.3799, "step": 12730 }, { "epoch": 1.5492546394888955, "grad_norm": 1.6212902069091797, "learning_rate": 2.4974059728150702e-06, "loss": 0.3337, "step": 12731 }, { "epoch": 1.5493763310009125, "grad_norm": 1.7969318628311157, "learning_rate": 2.4961164267739312e-06, "loss": 0.4288, "step": 12732 }, { "epoch": 1.5494980225129296, "grad_norm": 1.800437569618225, "learning_rate": 2.494827166271098e-06, "loss": 0.4234, "step": 12733 }, { "epoch": 1.5496197140249468, "grad_norm": 1.8014652729034424, "learning_rate": 2.4935381913556243e-06, "loss": 0.3432, "step": 12734 }, { "epoch": 1.5497414055369638, "grad_norm": 2.6505277156829834, "learning_rate": 2.492249502076558e-06, "loss": 0.3559, "step": 12735 }, { "epoch": 1.5498630970489808, "grad_norm": 1.578000545501709, "learning_rate": 2.4909610984829414e-06, "loss": 0.3662, "step": 12736 }, { "epoch": 1.5499847885609979, "grad_norm": 1.5680572986602783, "learning_rate": 2.4896729806237973e-06, "loss": 0.3494, "step": 12737 }, { "epoch": 1.5501064800730149, "grad_norm": 2.3017964363098145, "learning_rate": 2.4883851485481393e-06, "loss": 0.367, "step": 12738 }, { "epoch": 1.550228171585032, "grad_norm": 2.2530407905578613, "learning_rate": 2.4870976023049775e-06, "loss": 0.3815, "step": 12739 }, { "epoch": 1.5503498630970491, "grad_norm": 1.668676495552063, "learning_rate": 2.4858103419433e-06, "loss": 0.3726, "step": 12740 }, { "epoch": 1.5504715546090662, "grad_norm": 1.8456331491470337, "learning_rate": 2.4845233675120948e-06, "loss": 0.3418, "step": 12741 }, { "epoch": 1.5505932461210832, "grad_norm": 1.8216592073440552, "learning_rate": 2.483236679060336e-06, "loss": 0.37, "step": 12742 }, { "epoch": 1.5507149376331002, "grad_norm": 1.434902548789978, "learning_rate": 2.4819502766369728e-06, "loss": 0.3701, "step": 12743 }, { "epoch": 1.5508366291451172, "grad_norm": 2.3084845542907715, "learning_rate": 2.4806641602909675e-06, "loss": 0.3341, "step": 12744 }, { "epoch": 1.5509583206571342, "grad_norm": 2.484041213989258, "learning_rate": 2.4793783300712536e-06, "loss": 0.3357, "step": 12745 }, { "epoch": 1.5510800121691513, "grad_norm": 1.3554069995880127, "learning_rate": 2.478092786026759e-06, "loss": 0.3694, "step": 12746 }, { "epoch": 1.5512017036811683, "grad_norm": 1.8760665655136108, "learning_rate": 2.476807528206406e-06, "loss": 0.3463, "step": 12747 }, { "epoch": 1.5513233951931853, "grad_norm": 1.754461407661438, "learning_rate": 2.4755225566590966e-06, "loss": 0.3425, "step": 12748 }, { "epoch": 1.5514450867052023, "grad_norm": 1.6037449836730957, "learning_rate": 2.474237871433731e-06, "loss": 0.2969, "step": 12749 }, { "epoch": 1.5515667782172193, "grad_norm": 2.366725444793701, "learning_rate": 2.4729534725791915e-06, "loss": 0.3015, "step": 12750 }, { "epoch": 1.5516884697292364, "grad_norm": 1.6609134674072266, "learning_rate": 2.4716693601443507e-06, "loss": 0.3362, "step": 12751 }, { "epoch": 1.5518101612412534, "grad_norm": 1.4698946475982666, "learning_rate": 2.470385534178076e-06, "loss": 0.3661, "step": 12752 }, { "epoch": 1.5519318527532704, "grad_norm": 2.472661018371582, "learning_rate": 2.469101994729216e-06, "loss": 0.4133, "step": 12753 }, { "epoch": 1.5520535442652874, "grad_norm": 2.0256452560424805, "learning_rate": 2.467818741846615e-06, "loss": 0.3491, "step": 12754 }, { "epoch": 1.5521752357773044, "grad_norm": 1.8092035055160522, "learning_rate": 2.4665357755791006e-06, "loss": 0.3854, "step": 12755 }, { "epoch": 1.5522969272893214, "grad_norm": 2.6981921195983887, "learning_rate": 2.4652530959754907e-06, "loss": 0.4196, "step": 12756 }, { "epoch": 1.5524186188013385, "grad_norm": 1.7002185583114624, "learning_rate": 2.4639707030845993e-06, "loss": 0.2888, "step": 12757 }, { "epoch": 1.5525403103133555, "grad_norm": 2.7183339595794678, "learning_rate": 2.462688596955222e-06, "loss": 0.3204, "step": 12758 }, { "epoch": 1.5526620018253727, "grad_norm": 3.830136299133301, "learning_rate": 2.4614067776361406e-06, "loss": 0.427, "step": 12759 }, { "epoch": 1.5527836933373897, "grad_norm": 1.677081823348999, "learning_rate": 2.460125245176139e-06, "loss": 0.3515, "step": 12760 }, { "epoch": 1.5529053848494068, "grad_norm": 2.07553768157959, "learning_rate": 2.458843999623979e-06, "loss": 0.4079, "step": 12761 }, { "epoch": 1.5530270763614238, "grad_norm": 2.483259916305542, "learning_rate": 2.4575630410284112e-06, "loss": 0.4308, "step": 12762 }, { "epoch": 1.5531487678734408, "grad_norm": 1.7689406871795654, "learning_rate": 2.4562823694381845e-06, "loss": 0.3739, "step": 12763 }, { "epoch": 1.5532704593854578, "grad_norm": 1.9724507331848145, "learning_rate": 2.455001984902028e-06, "loss": 0.3871, "step": 12764 }, { "epoch": 1.553392150897475, "grad_norm": 1.475845217704773, "learning_rate": 2.453721887468663e-06, "loss": 0.3674, "step": 12765 }, { "epoch": 1.553513842409492, "grad_norm": 2.475400447845459, "learning_rate": 2.452442077186801e-06, "loss": 0.348, "step": 12766 }, { "epoch": 1.553635533921509, "grad_norm": 1.8045141696929932, "learning_rate": 2.4511625541051365e-06, "loss": 0.3423, "step": 12767 }, { "epoch": 1.5537572254335261, "grad_norm": 2.7508513927459717, "learning_rate": 2.449883318272366e-06, "loss": 0.372, "step": 12768 }, { "epoch": 1.5538789169455431, "grad_norm": 1.6447900533676147, "learning_rate": 2.448604369737162e-06, "loss": 0.372, "step": 12769 }, { "epoch": 1.5540006084575602, "grad_norm": 1.582385778427124, "learning_rate": 2.4473257085481903e-06, "loss": 0.353, "step": 12770 }, { "epoch": 1.5541222999695772, "grad_norm": 1.6140209436416626, "learning_rate": 2.4460473347541112e-06, "loss": 0.337, "step": 12771 }, { "epoch": 1.5542439914815942, "grad_norm": 1.881425142288208, "learning_rate": 2.444769248403567e-06, "loss": 0.3433, "step": 12772 }, { "epoch": 1.5543656829936112, "grad_norm": 1.553879976272583, "learning_rate": 2.443491449545188e-06, "loss": 0.3668, "step": 12773 }, { "epoch": 1.5544873745056282, "grad_norm": 1.7438925504684448, "learning_rate": 2.4422139382276033e-06, "loss": 0.4055, "step": 12774 }, { "epoch": 1.5546090660176453, "grad_norm": 1.6766334772109985, "learning_rate": 2.4409367144994223e-06, "loss": 0.3352, "step": 12775 }, { "epoch": 1.5547307575296623, "grad_norm": 2.2803711891174316, "learning_rate": 2.4396597784092437e-06, "loss": 0.3722, "step": 12776 }, { "epoch": 1.5548524490416793, "grad_norm": 1.987220287322998, "learning_rate": 2.438383130005664e-06, "loss": 0.2965, "step": 12777 }, { "epoch": 1.5549741405536963, "grad_norm": 1.925665020942688, "learning_rate": 2.437106769337253e-06, "loss": 0.4832, "step": 12778 }, { "epoch": 1.5550958320657133, "grad_norm": 1.7567967176437378, "learning_rate": 2.435830696452586e-06, "loss": 0.3726, "step": 12779 }, { "epoch": 1.5552175235777304, "grad_norm": 2.1436469554901123, "learning_rate": 2.434554911400219e-06, "loss": 0.3789, "step": 12780 }, { "epoch": 1.5553392150897474, "grad_norm": 2.099574565887451, "learning_rate": 2.433279414228693e-06, "loss": 0.3108, "step": 12781 }, { "epoch": 1.5554609066017644, "grad_norm": 2.9680776596069336, "learning_rate": 2.432004204986551e-06, "loss": 0.3337, "step": 12782 }, { "epoch": 1.5555825981137814, "grad_norm": 1.6419986486434937, "learning_rate": 2.430729283722314e-06, "loss": 0.3845, "step": 12783 }, { "epoch": 1.5557042896257987, "grad_norm": 2.0534918308258057, "learning_rate": 2.429454650484493e-06, "loss": 0.3865, "step": 12784 }, { "epoch": 1.5558259811378157, "grad_norm": 1.4777759313583374, "learning_rate": 2.4281803053215946e-06, "loss": 0.3446, "step": 12785 }, { "epoch": 1.5559476726498327, "grad_norm": 2.158703327178955, "learning_rate": 2.4269062482821084e-06, "loss": 0.3056, "step": 12786 }, { "epoch": 1.5560693641618497, "grad_norm": 2.2385034561157227, "learning_rate": 2.4256324794145124e-06, "loss": 0.415, "step": 12787 }, { "epoch": 1.5561910556738667, "grad_norm": 1.4018527269363403, "learning_rate": 2.4243589987672845e-06, "loss": 0.3467, "step": 12788 }, { "epoch": 1.5563127471858837, "grad_norm": 1.909488558769226, "learning_rate": 2.4230858063888717e-06, "loss": 0.3654, "step": 12789 }, { "epoch": 1.5564344386979008, "grad_norm": 1.8015897274017334, "learning_rate": 2.421812902327729e-06, "loss": 0.3994, "step": 12790 }, { "epoch": 1.556556130209918, "grad_norm": 1.7957444190979004, "learning_rate": 2.4205402866322938e-06, "loss": 0.3705, "step": 12791 }, { "epoch": 1.556677821721935, "grad_norm": 2.5776891708374023, "learning_rate": 2.419267959350984e-06, "loss": 0.4047, "step": 12792 }, { "epoch": 1.556799513233952, "grad_norm": 1.8470677137374878, "learning_rate": 2.4179959205322234e-06, "loss": 0.352, "step": 12793 }, { "epoch": 1.556921204745969, "grad_norm": 1.678146243095398, "learning_rate": 2.416724170224413e-06, "loss": 0.3677, "step": 12794 }, { "epoch": 1.557042896257986, "grad_norm": 4.77911901473999, "learning_rate": 2.4154527084759393e-06, "loss": 0.4445, "step": 12795 }, { "epoch": 1.557164587770003, "grad_norm": 1.6903306245803833, "learning_rate": 2.414181535335194e-06, "loss": 0.4057, "step": 12796 }, { "epoch": 1.5572862792820201, "grad_norm": 1.584194302558899, "learning_rate": 2.412910650850542e-06, "loss": 0.2984, "step": 12797 }, { "epoch": 1.5574079707940371, "grad_norm": 1.7317028045654297, "learning_rate": 2.4116400550703423e-06, "loss": 0.4066, "step": 12798 }, { "epoch": 1.5575296623060542, "grad_norm": 2.235231399536133, "learning_rate": 2.4103697480429477e-06, "loss": 0.3974, "step": 12799 }, { "epoch": 1.5576513538180712, "grad_norm": 1.6978048086166382, "learning_rate": 2.409099729816694e-06, "loss": 0.3741, "step": 12800 }, { "epoch": 1.5577730453300882, "grad_norm": 1.6473702192306519, "learning_rate": 2.407830000439907e-06, "loss": 0.344, "step": 12801 }, { "epoch": 1.5578947368421052, "grad_norm": 1.9948610067367554, "learning_rate": 2.4065605599609044e-06, "loss": 0.3957, "step": 12802 }, { "epoch": 1.5580164283541222, "grad_norm": 1.2500169277191162, "learning_rate": 2.4052914084279878e-06, "loss": 0.3295, "step": 12803 }, { "epoch": 1.5581381198661393, "grad_norm": 2.2401773929595947, "learning_rate": 2.404022545889455e-06, "loss": 0.3978, "step": 12804 }, { "epoch": 1.5582598113781563, "grad_norm": 2.3625476360321045, "learning_rate": 2.402753972393588e-06, "loss": 0.4333, "step": 12805 }, { "epoch": 1.5583815028901733, "grad_norm": 2.5307910442352295, "learning_rate": 2.401485687988654e-06, "loss": 0.4218, "step": 12806 }, { "epoch": 1.5585031944021903, "grad_norm": 1.6290391683578491, "learning_rate": 2.4002176927229203e-06, "loss": 0.3591, "step": 12807 }, { "epoch": 1.5586248859142073, "grad_norm": 1.7677561044692993, "learning_rate": 2.398949986644632e-06, "loss": 0.3591, "step": 12808 }, { "epoch": 1.5587465774262244, "grad_norm": 1.9134935140609741, "learning_rate": 2.3976825698020336e-06, "loss": 0.3735, "step": 12809 }, { "epoch": 1.5588682689382416, "grad_norm": 2.1838998794555664, "learning_rate": 2.396415442243347e-06, "loss": 0.35, "step": 12810 }, { "epoch": 1.5589899604502586, "grad_norm": 1.5391291379928589, "learning_rate": 2.3951486040167936e-06, "loss": 0.3581, "step": 12811 }, { "epoch": 1.5591116519622756, "grad_norm": 2.85530424118042, "learning_rate": 2.3938820551705766e-06, "loss": 0.2958, "step": 12812 }, { "epoch": 1.5592333434742927, "grad_norm": 2.349602222442627, "learning_rate": 2.3926157957528907e-06, "loss": 0.3646, "step": 12813 }, { "epoch": 1.5593550349863097, "grad_norm": 1.5135033130645752, "learning_rate": 2.391349825811917e-06, "loss": 0.359, "step": 12814 }, { "epoch": 1.5594767264983267, "grad_norm": 2.149428606033325, "learning_rate": 2.3900841453958344e-06, "loss": 0.3531, "step": 12815 }, { "epoch": 1.559598418010344, "grad_norm": 3.258680820465088, "learning_rate": 2.3888187545527995e-06, "loss": 0.403, "step": 12816 }, { "epoch": 1.559720109522361, "grad_norm": 1.4699033498764038, "learning_rate": 2.3875536533309685e-06, "loss": 0.3263, "step": 12817 }, { "epoch": 1.559841801034378, "grad_norm": 1.6422860622406006, "learning_rate": 2.386288841778477e-06, "loss": 0.3478, "step": 12818 }, { "epoch": 1.559963492546395, "grad_norm": 1.3670568466186523, "learning_rate": 2.385024319943452e-06, "loss": 0.4089, "step": 12819 }, { "epoch": 1.560085184058412, "grad_norm": 1.6304458379745483, "learning_rate": 2.3837600878740166e-06, "loss": 0.3463, "step": 12820 }, { "epoch": 1.560206875570429, "grad_norm": 2.3730359077453613, "learning_rate": 2.3824961456182747e-06, "loss": 0.4077, "step": 12821 }, { "epoch": 1.560328567082446, "grad_norm": 3.8978514671325684, "learning_rate": 2.381232493224319e-06, "loss": 0.4965, "step": 12822 }, { "epoch": 1.560450258594463, "grad_norm": 4.243291854858398, "learning_rate": 2.379969130740243e-06, "loss": 0.3972, "step": 12823 }, { "epoch": 1.56057195010648, "grad_norm": 2.884408950805664, "learning_rate": 2.3787060582141074e-06, "loss": 0.4588, "step": 12824 }, { "epoch": 1.560693641618497, "grad_norm": 2.60561203956604, "learning_rate": 2.3774432756939845e-06, "loss": 0.3777, "step": 12825 }, { "epoch": 1.5608153331305141, "grad_norm": 1.5718512535095215, "learning_rate": 2.376180783227924e-06, "loss": 0.3555, "step": 12826 }, { "epoch": 1.5609370246425311, "grad_norm": 2.756662368774414, "learning_rate": 2.3749185808639617e-06, "loss": 0.3112, "step": 12827 }, { "epoch": 1.5610587161545482, "grad_norm": 3.597346782684326, "learning_rate": 2.3736566686501327e-06, "loss": 0.4183, "step": 12828 }, { "epoch": 1.5611804076665652, "grad_norm": 1.5573253631591797, "learning_rate": 2.3723950466344535e-06, "loss": 0.3611, "step": 12829 }, { "epoch": 1.5613020991785822, "grad_norm": 1.6890100240707397, "learning_rate": 2.3711337148649284e-06, "loss": 0.3363, "step": 12830 }, { "epoch": 1.5614237906905992, "grad_norm": 1.6519485712051392, "learning_rate": 2.3698726733895596e-06, "loss": 0.4162, "step": 12831 }, { "epoch": 1.5615454822026162, "grad_norm": 1.9749494791030884, "learning_rate": 2.3686119222563273e-06, "loss": 0.3962, "step": 12832 }, { "epoch": 1.5616671737146333, "grad_norm": 2.018336772918701, "learning_rate": 2.3673514615132067e-06, "loss": 0.3416, "step": 12833 }, { "epoch": 1.5617888652266503, "grad_norm": 2.254579544067383, "learning_rate": 2.3660912912081668e-06, "loss": 0.3467, "step": 12834 }, { "epoch": 1.5619105567386675, "grad_norm": 1.8689029216766357, "learning_rate": 2.364831411389148e-06, "loss": 0.3953, "step": 12835 }, { "epoch": 1.5620322482506845, "grad_norm": 2.1085586547851562, "learning_rate": 2.3635718221041014e-06, "loss": 0.3573, "step": 12836 }, { "epoch": 1.5621539397627016, "grad_norm": 2.7029595375061035, "learning_rate": 2.3623125234009537e-06, "loss": 0.3134, "step": 12837 }, { "epoch": 1.5622756312747186, "grad_norm": 2.319758892059326, "learning_rate": 2.3610535153276202e-06, "loss": 0.3918, "step": 12838 }, { "epoch": 1.5623973227867356, "grad_norm": 1.5745046138763428, "learning_rate": 2.3597947979320157e-06, "loss": 0.3674, "step": 12839 }, { "epoch": 1.5625190142987526, "grad_norm": 1.9525219202041626, "learning_rate": 2.3585363712620333e-06, "loss": 0.3783, "step": 12840 }, { "epoch": 1.5626407058107699, "grad_norm": 3.5742135047912598, "learning_rate": 2.3572782353655555e-06, "loss": 0.4636, "step": 12841 }, { "epoch": 1.5627623973227869, "grad_norm": 2.036787748336792, "learning_rate": 2.3560203902904646e-06, "loss": 0.4117, "step": 12842 }, { "epoch": 1.562884088834804, "grad_norm": 1.618148922920227, "learning_rate": 2.3547628360846188e-06, "loss": 0.3611, "step": 12843 }, { "epoch": 1.563005780346821, "grad_norm": 2.6135871410369873, "learning_rate": 2.353505572795869e-06, "loss": 0.3808, "step": 12844 }, { "epoch": 1.563127471858838, "grad_norm": 2.4590752124786377, "learning_rate": 2.3522486004720635e-06, "loss": 0.3229, "step": 12845 }, { "epoch": 1.563249163370855, "grad_norm": 2.022947072982788, "learning_rate": 2.3509919191610274e-06, "loss": 0.4051, "step": 12846 }, { "epoch": 1.563370854882872, "grad_norm": 3.2890682220458984, "learning_rate": 2.3497355289105827e-06, "loss": 0.4558, "step": 12847 }, { "epoch": 1.563492546394889, "grad_norm": 1.9923243522644043, "learning_rate": 2.3484794297685353e-06, "loss": 0.3799, "step": 12848 }, { "epoch": 1.563614237906906, "grad_norm": 2.6818184852600098, "learning_rate": 2.3472236217826807e-06, "loss": 0.4284, "step": 12849 }, { "epoch": 1.563735929418923, "grad_norm": 1.4004765748977661, "learning_rate": 2.345968105000811e-06, "loss": 0.349, "step": 12850 }, { "epoch": 1.56385762093094, "grad_norm": 2.124483585357666, "learning_rate": 2.3447128794706975e-06, "loss": 0.3637, "step": 12851 }, { "epoch": 1.563979312442957, "grad_norm": 1.6781656742095947, "learning_rate": 2.3434579452401007e-06, "loss": 0.3741, "step": 12852 }, { "epoch": 1.564101003954974, "grad_norm": 1.8484046459197998, "learning_rate": 2.3422033023567814e-06, "loss": 0.4061, "step": 12853 }, { "epoch": 1.564222695466991, "grad_norm": 1.6275197267532349, "learning_rate": 2.3409489508684767e-06, "loss": 0.3517, "step": 12854 }, { "epoch": 1.5643443869790081, "grad_norm": 1.8312777280807495, "learning_rate": 2.3396948908229155e-06, "loss": 0.3217, "step": 12855 }, { "epoch": 1.5644660784910251, "grad_norm": 1.6628172397613525, "learning_rate": 2.3384411222678215e-06, "loss": 0.4065, "step": 12856 }, { "epoch": 1.5645877700030422, "grad_norm": 1.5507392883300781, "learning_rate": 2.3371876452509024e-06, "loss": 0.3574, "step": 12857 }, { "epoch": 1.5647094615150592, "grad_norm": 1.7578529119491577, "learning_rate": 2.3359344598198543e-06, "loss": 0.3231, "step": 12858 }, { "epoch": 1.5648311530270762, "grad_norm": 2.074265956878662, "learning_rate": 2.334681566022363e-06, "loss": 0.3898, "step": 12859 }, { "epoch": 1.5649528445390934, "grad_norm": 1.7688438892364502, "learning_rate": 2.3334289639061026e-06, "loss": 0.3453, "step": 12860 }, { "epoch": 1.5650745360511105, "grad_norm": 1.606223225593567, "learning_rate": 2.332176653518742e-06, "loss": 0.3823, "step": 12861 }, { "epoch": 1.5651962275631275, "grad_norm": 1.3803592920303345, "learning_rate": 2.3309246349079306e-06, "loss": 0.3314, "step": 12862 }, { "epoch": 1.5653179190751445, "grad_norm": 2.2646665573120117, "learning_rate": 2.3296729081213077e-06, "loss": 0.3543, "step": 12863 }, { "epoch": 1.5654396105871615, "grad_norm": 2.098952293395996, "learning_rate": 2.328421473206511e-06, "loss": 0.3423, "step": 12864 }, { "epoch": 1.5655613020991785, "grad_norm": 1.7026997804641724, "learning_rate": 2.327170330211156e-06, "loss": 0.3363, "step": 12865 }, { "epoch": 1.5656829936111958, "grad_norm": 5.189004898071289, "learning_rate": 2.32591947918285e-06, "loss": 0.4746, "step": 12866 }, { "epoch": 1.5658046851232128, "grad_norm": 1.9921131134033203, "learning_rate": 2.3246689201691943e-06, "loss": 0.3542, "step": 12867 }, { "epoch": 1.5659263766352298, "grad_norm": 2.1459908485412598, "learning_rate": 2.3234186532177695e-06, "loss": 0.4049, "step": 12868 }, { "epoch": 1.5660480681472468, "grad_norm": 1.9813066720962524, "learning_rate": 2.3221686783761623e-06, "loss": 0.3422, "step": 12869 }, { "epoch": 1.5661697596592639, "grad_norm": 1.6277923583984375, "learning_rate": 2.3209189956919243e-06, "loss": 0.3814, "step": 12870 }, { "epoch": 1.5662914511712809, "grad_norm": 1.8347305059432983, "learning_rate": 2.3196696052126122e-06, "loss": 0.3676, "step": 12871 }, { "epoch": 1.566413142683298, "grad_norm": 1.9091711044311523, "learning_rate": 2.318420506985771e-06, "loss": 0.4076, "step": 12872 }, { "epoch": 1.566534834195315, "grad_norm": 1.3776190280914307, "learning_rate": 2.317171701058929e-06, "loss": 0.3459, "step": 12873 }, { "epoch": 1.566656525707332, "grad_norm": 1.734372615814209, "learning_rate": 2.315923187479604e-06, "loss": 0.3479, "step": 12874 }, { "epoch": 1.566778217219349, "grad_norm": 1.9317102432250977, "learning_rate": 2.3146749662953105e-06, "loss": 0.355, "step": 12875 }, { "epoch": 1.566899908731366, "grad_norm": 1.7534362077713013, "learning_rate": 2.3134270375535384e-06, "loss": 0.3511, "step": 12876 }, { "epoch": 1.567021600243383, "grad_norm": 2.6851043701171875, "learning_rate": 2.312179401301782e-06, "loss": 0.3539, "step": 12877 }, { "epoch": 1.5671432917554, "grad_norm": 1.8159314393997192, "learning_rate": 2.3109320575875104e-06, "loss": 0.3932, "step": 12878 }, { "epoch": 1.567264983267417, "grad_norm": 2.1902477741241455, "learning_rate": 2.3096850064581878e-06, "loss": 0.3778, "step": 12879 }, { "epoch": 1.567386674779434, "grad_norm": 2.8415133953094482, "learning_rate": 2.308438247961272e-06, "loss": 0.3721, "step": 12880 }, { "epoch": 1.567508366291451, "grad_norm": 2.1026830673217773, "learning_rate": 2.3071917821442037e-06, "loss": 0.4102, "step": 12881 }, { "epoch": 1.567630057803468, "grad_norm": 2.2709245681762695, "learning_rate": 2.3059456090544053e-06, "loss": 0.3372, "step": 12882 }, { "epoch": 1.567751749315485, "grad_norm": 1.5737155675888062, "learning_rate": 2.3046997287393056e-06, "loss": 0.3801, "step": 12883 }, { "epoch": 1.5678734408275021, "grad_norm": 2.4267656803131104, "learning_rate": 2.3034541412463075e-06, "loss": 0.3498, "step": 12884 }, { "epoch": 1.5679951323395194, "grad_norm": 2.2090985774993896, "learning_rate": 2.302208846622812e-06, "loss": 0.3471, "step": 12885 }, { "epoch": 1.5681168238515364, "grad_norm": 2.4376964569091797, "learning_rate": 2.300963844916204e-06, "loss": 0.3633, "step": 12886 }, { "epoch": 1.5682385153635534, "grad_norm": 2.6460013389587402, "learning_rate": 2.2997191361738545e-06, "loss": 0.3374, "step": 12887 }, { "epoch": 1.5683602068755704, "grad_norm": 2.4712672233581543, "learning_rate": 2.2984747204431345e-06, "loss": 0.361, "step": 12888 }, { "epoch": 1.5684818983875874, "grad_norm": 1.6455574035644531, "learning_rate": 2.2972305977713918e-06, "loss": 0.3658, "step": 12889 }, { "epoch": 1.5686035898996045, "grad_norm": 2.4127776622772217, "learning_rate": 2.2959867682059654e-06, "loss": 0.3467, "step": 12890 }, { "epoch": 1.5687252814116215, "grad_norm": 1.2532869577407837, "learning_rate": 2.294743231794193e-06, "loss": 0.3152, "step": 12891 }, { "epoch": 1.5688469729236387, "grad_norm": 2.937251567840576, "learning_rate": 2.2934999885833885e-06, "loss": 0.4389, "step": 12892 }, { "epoch": 1.5689686644356557, "grad_norm": 2.0350735187530518, "learning_rate": 2.292257038620862e-06, "loss": 0.363, "step": 12893 }, { "epoch": 1.5690903559476728, "grad_norm": 2.337066888809204, "learning_rate": 2.2910143819539087e-06, "loss": 0.3391, "step": 12894 }, { "epoch": 1.5692120474596898, "grad_norm": 1.6901695728302002, "learning_rate": 2.289772018629812e-06, "loss": 0.3862, "step": 12895 }, { "epoch": 1.5693337389717068, "grad_norm": 2.0855400562286377, "learning_rate": 2.2885299486958524e-06, "loss": 0.3763, "step": 12896 }, { "epoch": 1.5694554304837238, "grad_norm": 2.361114740371704, "learning_rate": 2.2872881721992903e-06, "loss": 0.3595, "step": 12897 }, { "epoch": 1.5695771219957408, "grad_norm": 1.7107714414596558, "learning_rate": 2.2860466891873743e-06, "loss": 0.3336, "step": 12898 }, { "epoch": 1.5696988135077579, "grad_norm": 2.0851094722747803, "learning_rate": 2.284805499707352e-06, "loss": 0.4057, "step": 12899 }, { "epoch": 1.5698205050197749, "grad_norm": 3.4108095169067383, "learning_rate": 2.283564603806451e-06, "loss": 0.4048, "step": 12900 }, { "epoch": 1.569942196531792, "grad_norm": 1.425576090812683, "learning_rate": 2.282324001531885e-06, "loss": 0.3624, "step": 12901 }, { "epoch": 1.570063888043809, "grad_norm": 2.004263401031494, "learning_rate": 2.281083692930868e-06, "loss": 0.3748, "step": 12902 }, { "epoch": 1.570185579555826, "grad_norm": 2.1680753231048584, "learning_rate": 2.2798436780505937e-06, "loss": 0.3442, "step": 12903 }, { "epoch": 1.570307271067843, "grad_norm": 2.3241710662841797, "learning_rate": 2.278603956938248e-06, "loss": 0.2941, "step": 12904 }, { "epoch": 1.57042896257986, "grad_norm": 1.7455425262451172, "learning_rate": 2.277364529641004e-06, "loss": 0.3813, "step": 12905 }, { "epoch": 1.570550654091877, "grad_norm": 1.4648090600967407, "learning_rate": 2.2761253962060216e-06, "loss": 0.3449, "step": 12906 }, { "epoch": 1.570672345603894, "grad_norm": 2.0846569538116455, "learning_rate": 2.274886556680458e-06, "loss": 0.3482, "step": 12907 }, { "epoch": 1.570794037115911, "grad_norm": 1.5035845041275024, "learning_rate": 2.2736480111114512e-06, "loss": 0.3419, "step": 12908 }, { "epoch": 1.570915728627928, "grad_norm": 2.0656044483184814, "learning_rate": 2.2724097595461283e-06, "loss": 0.3455, "step": 12909 }, { "epoch": 1.571037420139945, "grad_norm": 1.9806627035140991, "learning_rate": 2.271171802031611e-06, "loss": 0.4156, "step": 12910 }, { "epoch": 1.5711591116519623, "grad_norm": 2.290090322494507, "learning_rate": 2.2699341386150044e-06, "loss": 0.3184, "step": 12911 }, { "epoch": 1.5712808031639793, "grad_norm": 2.0771074295043945, "learning_rate": 2.2686967693434024e-06, "loss": 0.385, "step": 12912 }, { "epoch": 1.5714024946759964, "grad_norm": 1.6272902488708496, "learning_rate": 2.2674596942638927e-06, "loss": 0.3513, "step": 12913 }, { "epoch": 1.5715241861880134, "grad_norm": 1.7542093992233276, "learning_rate": 2.2662229134235482e-06, "loss": 0.415, "step": 12914 }, { "epoch": 1.5716458777000304, "grad_norm": 1.5956990718841553, "learning_rate": 2.2649864268694298e-06, "loss": 0.3545, "step": 12915 }, { "epoch": 1.5717675692120474, "grad_norm": 1.7092236280441284, "learning_rate": 2.2637502346485875e-06, "loss": 0.3977, "step": 12916 }, { "epoch": 1.5718892607240647, "grad_norm": 2.1471784114837646, "learning_rate": 2.2625143368080604e-06, "loss": 0.434, "step": 12917 }, { "epoch": 1.5720109522360817, "grad_norm": 1.582375168800354, "learning_rate": 2.2612787333948805e-06, "loss": 0.3339, "step": 12918 }, { "epoch": 1.5721326437480987, "grad_norm": 2.6699843406677246, "learning_rate": 2.2600434244560632e-06, "loss": 0.2913, "step": 12919 }, { "epoch": 1.5722543352601157, "grad_norm": 2.6099820137023926, "learning_rate": 2.258808410038612e-06, "loss": 0.4198, "step": 12920 }, { "epoch": 1.5723760267721327, "grad_norm": 1.3790663480758667, "learning_rate": 2.257573690189526e-06, "loss": 0.3356, "step": 12921 }, { "epoch": 1.5724977182841497, "grad_norm": 1.509026050567627, "learning_rate": 2.256339264955788e-06, "loss": 0.3649, "step": 12922 }, { "epoch": 1.5726194097961668, "grad_norm": 1.3774064779281616, "learning_rate": 2.2551051343843666e-06, "loss": 0.3561, "step": 12923 }, { "epoch": 1.5727411013081838, "grad_norm": 3.1276044845581055, "learning_rate": 2.253871298522229e-06, "loss": 0.3388, "step": 12924 }, { "epoch": 1.5728627928202008, "grad_norm": 2.3575377464294434, "learning_rate": 2.2526377574163204e-06, "loss": 0.3746, "step": 12925 }, { "epoch": 1.5729844843322178, "grad_norm": 2.5346429347991943, "learning_rate": 2.2514045111135796e-06, "loss": 0.3006, "step": 12926 }, { "epoch": 1.5731061758442348, "grad_norm": 1.745758056640625, "learning_rate": 2.250171559660942e-06, "loss": 0.3988, "step": 12927 }, { "epoch": 1.5732278673562519, "grad_norm": 3.1045055389404297, "learning_rate": 2.248938903105312e-06, "loss": 0.4123, "step": 12928 }, { "epoch": 1.5733495588682689, "grad_norm": 2.329341173171997, "learning_rate": 2.247706541493603e-06, "loss": 0.4086, "step": 12929 }, { "epoch": 1.573471250380286, "grad_norm": 1.997171401977539, "learning_rate": 2.2464744748727065e-06, "loss": 0.391, "step": 12930 }, { "epoch": 1.573592941892303, "grad_norm": 1.5768593549728394, "learning_rate": 2.245242703289502e-06, "loss": 0.37, "step": 12931 }, { "epoch": 1.57371463340432, "grad_norm": 1.5939037799835205, "learning_rate": 2.2440112267908678e-06, "loss": 0.3793, "step": 12932 }, { "epoch": 1.573836324916337, "grad_norm": 3.788174629211426, "learning_rate": 2.2427800454236603e-06, "loss": 0.2802, "step": 12933 }, { "epoch": 1.573958016428354, "grad_norm": 1.968440055847168, "learning_rate": 2.241549159234725e-06, "loss": 0.3588, "step": 12934 }, { "epoch": 1.574079707940371, "grad_norm": 1.7688645124435425, "learning_rate": 2.240318568270906e-06, "loss": 0.3725, "step": 12935 }, { "epoch": 1.5742013994523882, "grad_norm": 2.482125759124756, "learning_rate": 2.2390882725790255e-06, "loss": 0.28, "step": 12936 }, { "epoch": 1.5743230909644053, "grad_norm": 1.6216914653778076, "learning_rate": 2.2378582722059027e-06, "loss": 0.3724, "step": 12937 }, { "epoch": 1.5744447824764223, "grad_norm": 1.8637388944625854, "learning_rate": 2.2366285671983424e-06, "loss": 0.3854, "step": 12938 }, { "epoch": 1.5745664739884393, "grad_norm": 2.8731648921966553, "learning_rate": 2.2353991576031286e-06, "loss": 0.4259, "step": 12939 }, { "epoch": 1.5746881655004563, "grad_norm": 1.7532217502593994, "learning_rate": 2.234170043467051e-06, "loss": 0.3367, "step": 12940 }, { "epoch": 1.5748098570124733, "grad_norm": 1.7763198614120483, "learning_rate": 2.2329412248368786e-06, "loss": 0.3825, "step": 12941 }, { "epoch": 1.5749315485244906, "grad_norm": 1.456264615058899, "learning_rate": 2.2317127017593654e-06, "loss": 0.3418, "step": 12942 }, { "epoch": 1.5750532400365076, "grad_norm": 1.8870912790298462, "learning_rate": 2.230484474281267e-06, "loss": 0.4288, "step": 12943 }, { "epoch": 1.5751749315485246, "grad_norm": 1.6714569330215454, "learning_rate": 2.229256542449314e-06, "loss": 0.3915, "step": 12944 }, { "epoch": 1.5752966230605416, "grad_norm": 1.7826646566390991, "learning_rate": 2.2280289063102365e-06, "loss": 0.4295, "step": 12945 }, { "epoch": 1.5754183145725587, "grad_norm": 1.8751949071884155, "learning_rate": 2.2268015659107456e-06, "loss": 0.3883, "step": 12946 }, { "epoch": 1.5755400060845757, "grad_norm": 1.8790628910064697, "learning_rate": 2.2255745212975433e-06, "loss": 0.3593, "step": 12947 }, { "epoch": 1.5756616975965927, "grad_norm": 1.7039514780044556, "learning_rate": 2.2243477725173246e-06, "loss": 0.3979, "step": 12948 }, { "epoch": 1.5757833891086097, "grad_norm": 1.5635474920272827, "learning_rate": 2.2231213196167678e-06, "loss": 0.3082, "step": 12949 }, { "epoch": 1.5759050806206267, "grad_norm": 2.9554731845855713, "learning_rate": 2.221895162642542e-06, "loss": 0.289, "step": 12950 }, { "epoch": 1.5760267721326437, "grad_norm": 2.4131555557250977, "learning_rate": 2.220669301641305e-06, "loss": 0.3073, "step": 12951 }, { "epoch": 1.5761484636446608, "grad_norm": 2.1077520847320557, "learning_rate": 2.219443736659701e-06, "loss": 0.3339, "step": 12952 }, { "epoch": 1.5762701551566778, "grad_norm": 2.155916213989258, "learning_rate": 2.218218467744371e-06, "loss": 0.3404, "step": 12953 }, { "epoch": 1.5763918466686948, "grad_norm": 2.0844171047210693, "learning_rate": 2.2169934949419345e-06, "loss": 0.3724, "step": 12954 }, { "epoch": 1.5765135381807118, "grad_norm": 1.671710729598999, "learning_rate": 2.2157688182990033e-06, "loss": 0.4115, "step": 12955 }, { "epoch": 1.5766352296927288, "grad_norm": 1.7781239748001099, "learning_rate": 2.2145444378621837e-06, "loss": 0.3941, "step": 12956 }, { "epoch": 1.5767569212047459, "grad_norm": 1.587677240371704, "learning_rate": 2.2133203536780635e-06, "loss": 0.363, "step": 12957 }, { "epoch": 1.5768786127167629, "grad_norm": 1.8353302478790283, "learning_rate": 2.212096565793218e-06, "loss": 0.3517, "step": 12958 }, { "epoch": 1.57700030422878, "grad_norm": 2.0180232524871826, "learning_rate": 2.2108730742542215e-06, "loss": 0.3442, "step": 12959 }, { "epoch": 1.577121995740797, "grad_norm": 2.873677968978882, "learning_rate": 2.209649879107627e-06, "loss": 0.4144, "step": 12960 }, { "epoch": 1.5772436872528142, "grad_norm": 2.216658115386963, "learning_rate": 2.2084269803999803e-06, "loss": 0.3685, "step": 12961 }, { "epoch": 1.5773653787648312, "grad_norm": 2.5188417434692383, "learning_rate": 2.207204378177814e-06, "loss": 0.3765, "step": 12962 }, { "epoch": 1.5774870702768482, "grad_norm": 1.9065752029418945, "learning_rate": 2.2059820724876478e-06, "loss": 0.364, "step": 12963 }, { "epoch": 1.5776087617888652, "grad_norm": 1.6783907413482666, "learning_rate": 2.204760063376e-06, "loss": 0.3583, "step": 12964 }, { "epoch": 1.5777304533008822, "grad_norm": 2.5551974773406982, "learning_rate": 2.2035383508893682e-06, "loss": 0.4302, "step": 12965 }, { "epoch": 1.5778521448128993, "grad_norm": 3.1080923080444336, "learning_rate": 2.202316935074236e-06, "loss": 0.339, "step": 12966 }, { "epoch": 1.5779738363249165, "grad_norm": 1.7808754444122314, "learning_rate": 2.201095815977087e-06, "loss": 0.3371, "step": 12967 }, { "epoch": 1.5780955278369335, "grad_norm": 1.6282432079315186, "learning_rate": 2.1998749936443864e-06, "loss": 0.3551, "step": 12968 }, { "epoch": 1.5782172193489505, "grad_norm": 1.782135248184204, "learning_rate": 2.198654468122584e-06, "loss": 0.3512, "step": 12969 }, { "epoch": 1.5783389108609676, "grad_norm": 1.5888426303863525, "learning_rate": 2.1974342394581293e-06, "loss": 0.3344, "step": 12970 }, { "epoch": 1.5784606023729846, "grad_norm": 2.6294143199920654, "learning_rate": 2.196214307697453e-06, "loss": 0.3854, "step": 12971 }, { "epoch": 1.5785822938850016, "grad_norm": 2.163494825363159, "learning_rate": 2.194994672886973e-06, "loss": 0.4103, "step": 12972 }, { "epoch": 1.5787039853970186, "grad_norm": 2.9790916442871094, "learning_rate": 2.193775335073106e-06, "loss": 0.4395, "step": 12973 }, { "epoch": 1.5788256769090356, "grad_norm": 1.5964429378509521, "learning_rate": 2.192556294302239e-06, "loss": 0.3879, "step": 12974 }, { "epoch": 1.5789473684210527, "grad_norm": 3.1039741039276123, "learning_rate": 2.1913375506207703e-06, "loss": 0.4087, "step": 12975 }, { "epoch": 1.5790690599330697, "grad_norm": 1.4071731567382812, "learning_rate": 2.190119104075069e-06, "loss": 0.3467, "step": 12976 }, { "epoch": 1.5791907514450867, "grad_norm": 2.2165424823760986, "learning_rate": 2.1889009547115003e-06, "loss": 0.3726, "step": 12977 }, { "epoch": 1.5793124429571037, "grad_norm": 1.5575865507125854, "learning_rate": 2.18768310257642e-06, "loss": 0.3843, "step": 12978 }, { "epoch": 1.5794341344691207, "grad_norm": 2.5923357009887695, "learning_rate": 2.186465547716168e-06, "loss": 0.4024, "step": 12979 }, { "epoch": 1.5795558259811378, "grad_norm": 2.053741931915283, "learning_rate": 2.1852482901770734e-06, "loss": 0.3783, "step": 12980 }, { "epoch": 1.5796775174931548, "grad_norm": 1.9022127389907837, "learning_rate": 2.1840313300054592e-06, "loss": 0.4115, "step": 12981 }, { "epoch": 1.5797992090051718, "grad_norm": 1.603399634361267, "learning_rate": 2.18281466724763e-06, "loss": 0.3688, "step": 12982 }, { "epoch": 1.5799209005171888, "grad_norm": 1.7983378171920776, "learning_rate": 2.181598301949882e-06, "loss": 0.3906, "step": 12983 }, { "epoch": 1.5800425920292058, "grad_norm": 1.6356163024902344, "learning_rate": 2.1803822341585066e-06, "loss": 0.365, "step": 12984 }, { "epoch": 1.5801642835412228, "grad_norm": 2.31428861618042, "learning_rate": 2.1791664639197683e-06, "loss": 0.3253, "step": 12985 }, { "epoch": 1.58028597505324, "grad_norm": 4.362576961517334, "learning_rate": 2.1779509912799367e-06, "loss": 0.3318, "step": 12986 }, { "epoch": 1.580407666565257, "grad_norm": 1.7996320724487305, "learning_rate": 2.17673581628526e-06, "loss": 0.383, "step": 12987 }, { "epoch": 1.5805293580772741, "grad_norm": 1.5419590473175049, "learning_rate": 2.1755209389819764e-06, "loss": 0.3869, "step": 12988 }, { "epoch": 1.5806510495892911, "grad_norm": 3.000788927078247, "learning_rate": 2.1743063594163205e-06, "loss": 0.4218, "step": 12989 }, { "epoch": 1.5807727411013082, "grad_norm": 3.0800633430480957, "learning_rate": 2.1730920776345043e-06, "loss": 0.3232, "step": 12990 }, { "epoch": 1.5808944326133252, "grad_norm": 1.633508563041687, "learning_rate": 2.171878093682733e-06, "loss": 0.3576, "step": 12991 }, { "epoch": 1.5810161241253422, "grad_norm": 3.2565622329711914, "learning_rate": 2.170664407607207e-06, "loss": 0.2872, "step": 12992 }, { "epoch": 1.5811378156373594, "grad_norm": 1.6280717849731445, "learning_rate": 2.1694510194541063e-06, "loss": 0.322, "step": 12993 }, { "epoch": 1.5812595071493765, "grad_norm": 3.7198541164398193, "learning_rate": 2.1682379292695997e-06, "loss": 0.4513, "step": 12994 }, { "epoch": 1.5813811986613935, "grad_norm": 2.5929200649261475, "learning_rate": 2.167025137099853e-06, "loss": 0.3744, "step": 12995 }, { "epoch": 1.5815028901734105, "grad_norm": 1.6465598344802856, "learning_rate": 2.1658126429910133e-06, "loss": 0.3362, "step": 12996 }, { "epoch": 1.5816245816854275, "grad_norm": 1.9195952415466309, "learning_rate": 2.164600446989219e-06, "loss": 0.3038, "step": 12997 }, { "epoch": 1.5817462731974445, "grad_norm": 1.4766395092010498, "learning_rate": 2.163388549140595e-06, "loss": 0.3336, "step": 12998 }, { "epoch": 1.5818679647094616, "grad_norm": 1.4439332485198975, "learning_rate": 2.1621769494912558e-06, "loss": 0.3296, "step": 12999 }, { "epoch": 1.5819896562214786, "grad_norm": 1.8840376138687134, "learning_rate": 2.1609656480873097e-06, "loss": 0.3503, "step": 13000 }, { "epoch": 1.5821113477334956, "grad_norm": 2.572530508041382, "learning_rate": 2.1597546449748463e-06, "loss": 0.3997, "step": 13001 }, { "epoch": 1.5822330392455126, "grad_norm": 2.019007921218872, "learning_rate": 2.158543940199944e-06, "loss": 0.384, "step": 13002 }, { "epoch": 1.5823547307575296, "grad_norm": 2.364663600921631, "learning_rate": 2.1573335338086785e-06, "loss": 0.4355, "step": 13003 }, { "epoch": 1.5824764222695467, "grad_norm": 1.8007522821426392, "learning_rate": 2.156123425847103e-06, "loss": 0.3396, "step": 13004 }, { "epoch": 1.5825981137815637, "grad_norm": 1.7086308002471924, "learning_rate": 2.1549136163612703e-06, "loss": 0.3405, "step": 13005 }, { "epoch": 1.5827198052935807, "grad_norm": 1.6272873878479004, "learning_rate": 2.153704105397213e-06, "loss": 0.387, "step": 13006 }, { "epoch": 1.5828414968055977, "grad_norm": 1.6640149354934692, "learning_rate": 2.1524948930009548e-06, "loss": 0.3016, "step": 13007 }, { "epoch": 1.5829631883176147, "grad_norm": 2.0438344478607178, "learning_rate": 2.151285979218509e-06, "loss": 0.3294, "step": 13008 }, { "epoch": 1.5830848798296318, "grad_norm": 3.8273937702178955, "learning_rate": 2.150077364095876e-06, "loss": 0.4172, "step": 13009 }, { "epoch": 1.5832065713416488, "grad_norm": 2.200937509536743, "learning_rate": 2.1488690476790497e-06, "loss": 0.378, "step": 13010 }, { "epoch": 1.5833282628536658, "grad_norm": 1.7602485418319702, "learning_rate": 2.147661030014008e-06, "loss": 0.3285, "step": 13011 }, { "epoch": 1.583449954365683, "grad_norm": 2.479182720184326, "learning_rate": 2.1464533111467136e-06, "loss": 0.4261, "step": 13012 }, { "epoch": 1.5835716458777, "grad_norm": 1.859266757965088, "learning_rate": 2.14524589112313e-06, "loss": 0.3381, "step": 13013 }, { "epoch": 1.583693337389717, "grad_norm": 1.5318127870559692, "learning_rate": 2.1440387699891986e-06, "loss": 0.3229, "step": 13014 }, { "epoch": 1.583815028901734, "grad_norm": 1.622456669807434, "learning_rate": 2.14283194779085e-06, "loss": 0.3122, "step": 13015 }, { "epoch": 1.583936720413751, "grad_norm": 1.5536085367202759, "learning_rate": 2.141625424574012e-06, "loss": 0.3398, "step": 13016 }, { "epoch": 1.5840584119257681, "grad_norm": 2.3423728942871094, "learning_rate": 2.1404192003845924e-06, "loss": 0.4037, "step": 13017 }, { "epoch": 1.5841801034377854, "grad_norm": 2.2760283946990967, "learning_rate": 2.1392132752684893e-06, "loss": 0.3952, "step": 13018 }, { "epoch": 1.5843017949498024, "grad_norm": 1.316992163658142, "learning_rate": 2.1380076492715963e-06, "loss": 0.3143, "step": 13019 }, { "epoch": 1.5844234864618194, "grad_norm": 2.216892957687378, "learning_rate": 2.1368023224397793e-06, "loss": 0.4307, "step": 13020 }, { "epoch": 1.5845451779738364, "grad_norm": 1.8537057638168335, "learning_rate": 2.135597294818914e-06, "loss": 0.3856, "step": 13021 }, { "epoch": 1.5846668694858534, "grad_norm": 2.3461999893188477, "learning_rate": 2.1343925664548505e-06, "loss": 0.3605, "step": 13022 }, { "epoch": 1.5847885609978705, "grad_norm": 1.6738178730010986, "learning_rate": 2.133188137393427e-06, "loss": 0.3754, "step": 13023 }, { "epoch": 1.5849102525098875, "grad_norm": 1.6088544130325317, "learning_rate": 2.131984007680481e-06, "loss": 0.3256, "step": 13024 }, { "epoch": 1.5850319440219045, "grad_norm": 1.8200000524520874, "learning_rate": 2.1307801773618296e-06, "loss": 0.4206, "step": 13025 }, { "epoch": 1.5851536355339215, "grad_norm": 1.6171196699142456, "learning_rate": 2.1295766464832777e-06, "loss": 0.3908, "step": 13026 }, { "epoch": 1.5852753270459385, "grad_norm": 1.6358188390731812, "learning_rate": 2.1283734150906288e-06, "loss": 0.3749, "step": 13027 }, { "epoch": 1.5853970185579556, "grad_norm": 2.141417980194092, "learning_rate": 2.127170483229665e-06, "loss": 0.4208, "step": 13028 }, { "epoch": 1.5855187100699726, "grad_norm": 1.5128798484802246, "learning_rate": 2.1259678509461567e-06, "loss": 0.3967, "step": 13029 }, { "epoch": 1.5856404015819896, "grad_norm": 2.1067793369293213, "learning_rate": 2.124765518285875e-06, "loss": 0.3414, "step": 13030 }, { "epoch": 1.5857620930940066, "grad_norm": 2.328778028488159, "learning_rate": 2.123563485294562e-06, "loss": 0.3812, "step": 13031 }, { "epoch": 1.5858837846060236, "grad_norm": 1.975500226020813, "learning_rate": 2.122361752017964e-06, "loss": 0.4166, "step": 13032 }, { "epoch": 1.5860054761180407, "grad_norm": 1.896972417831421, "learning_rate": 2.121160318501807e-06, "loss": 0.3922, "step": 13033 }, { "epoch": 1.5861271676300577, "grad_norm": 2.4338061809539795, "learning_rate": 2.1199591847918053e-06, "loss": 0.4501, "step": 13034 }, { "epoch": 1.5862488591420747, "grad_norm": 4.087060451507568, "learning_rate": 2.1187583509336707e-06, "loss": 0.2745, "step": 13035 }, { "epoch": 1.5863705506540917, "grad_norm": 2.049020528793335, "learning_rate": 2.1175578169730936e-06, "loss": 0.3481, "step": 13036 }, { "epoch": 1.586492242166109, "grad_norm": 1.684091567993164, "learning_rate": 2.116357582955755e-06, "loss": 0.4156, "step": 13037 }, { "epoch": 1.586613933678126, "grad_norm": 1.9463108777999878, "learning_rate": 2.1151576489273306e-06, "loss": 0.3598, "step": 13038 }, { "epoch": 1.586735625190143, "grad_norm": 1.5346837043762207, "learning_rate": 2.113958014933478e-06, "loss": 0.3811, "step": 13039 }, { "epoch": 1.58685731670216, "grad_norm": 1.8729277849197388, "learning_rate": 2.1127586810198454e-06, "loss": 0.371, "step": 13040 }, { "epoch": 1.586979008214177, "grad_norm": 2.132922887802124, "learning_rate": 2.111559647232072e-06, "loss": 0.4112, "step": 13041 }, { "epoch": 1.587100699726194, "grad_norm": 2.478147506713867, "learning_rate": 2.110360913615782e-06, "loss": 0.3597, "step": 13042 }, { "epoch": 1.5872223912382113, "grad_norm": 2.210583448410034, "learning_rate": 2.10916248021659e-06, "loss": 0.3784, "step": 13043 }, { "epoch": 1.5873440827502283, "grad_norm": 3.000032901763916, "learning_rate": 2.107964347080099e-06, "loss": 0.3495, "step": 13044 }, { "epoch": 1.5874657742622453, "grad_norm": 1.791088581085205, "learning_rate": 2.1067665142518976e-06, "loss": 0.3636, "step": 13045 }, { "epoch": 1.5875874657742624, "grad_norm": 1.8577041625976562, "learning_rate": 2.1055689817775714e-06, "loss": 0.3525, "step": 13046 }, { "epoch": 1.5877091572862794, "grad_norm": 1.9904578924179077, "learning_rate": 2.1043717497026863e-06, "loss": 0.3866, "step": 13047 }, { "epoch": 1.5878308487982964, "grad_norm": 1.590952754020691, "learning_rate": 2.103174818072796e-06, "loss": 0.3487, "step": 13048 }, { "epoch": 1.5879525403103134, "grad_norm": 1.942191481590271, "learning_rate": 2.101978186933452e-06, "loss": 0.3676, "step": 13049 }, { "epoch": 1.5880742318223304, "grad_norm": 1.9794538021087646, "learning_rate": 2.1007818563301863e-06, "loss": 0.3179, "step": 13050 }, { "epoch": 1.5881959233343474, "grad_norm": 1.604133129119873, "learning_rate": 2.099585826308518e-06, "loss": 0.3418, "step": 13051 }, { "epoch": 1.5883176148463645, "grad_norm": 1.8325567245483398, "learning_rate": 2.098390096913965e-06, "loss": 0.3253, "step": 13052 }, { "epoch": 1.5884393063583815, "grad_norm": 3.32185435295105, "learning_rate": 2.097194668192025e-06, "loss": 0.4288, "step": 13053 }, { "epoch": 1.5885609978703985, "grad_norm": 1.7816901206970215, "learning_rate": 2.095999540188185e-06, "loss": 0.3651, "step": 13054 }, { "epoch": 1.5886826893824155, "grad_norm": 3.4924569129943848, "learning_rate": 2.094804712947923e-06, "loss": 0.4112, "step": 13055 }, { "epoch": 1.5888043808944325, "grad_norm": 1.9935883283615112, "learning_rate": 2.093610186516701e-06, "loss": 0.4249, "step": 13056 }, { "epoch": 1.5889260724064496, "grad_norm": 1.885443925857544, "learning_rate": 2.0924159609399808e-06, "loss": 0.3687, "step": 13057 }, { "epoch": 1.5890477639184666, "grad_norm": 1.448865294456482, "learning_rate": 2.0912220362632005e-06, "loss": 0.3873, "step": 13058 }, { "epoch": 1.5891694554304836, "grad_norm": 1.9033385515213013, "learning_rate": 2.0900284125317906e-06, "loss": 0.3221, "step": 13059 }, { "epoch": 1.5892911469425006, "grad_norm": 1.7482808828353882, "learning_rate": 2.0888350897911736e-06, "loss": 0.3581, "step": 13060 }, { "epoch": 1.5894128384545176, "grad_norm": 1.5110366344451904, "learning_rate": 2.087642068086754e-06, "loss": 0.3589, "step": 13061 }, { "epoch": 1.5895345299665349, "grad_norm": 1.431944727897644, "learning_rate": 2.0864493474639335e-06, "loss": 0.3495, "step": 13062 }, { "epoch": 1.589656221478552, "grad_norm": 2.4168877601623535, "learning_rate": 2.0852569279680967e-06, "loss": 0.3776, "step": 13063 }, { "epoch": 1.589777912990569, "grad_norm": 1.8519970178604126, "learning_rate": 2.0840648096446135e-06, "loss": 0.3793, "step": 13064 }, { "epoch": 1.589899604502586, "grad_norm": 2.2232577800750732, "learning_rate": 2.0828729925388547e-06, "loss": 0.3782, "step": 13065 }, { "epoch": 1.590021296014603, "grad_norm": 2.82106614112854, "learning_rate": 2.0816814766961633e-06, "loss": 0.4127, "step": 13066 }, { "epoch": 1.59014298752662, "grad_norm": 2.5844547748565674, "learning_rate": 2.0804902621618784e-06, "loss": 0.4223, "step": 13067 }, { "epoch": 1.5902646790386372, "grad_norm": 2.7374610900878906, "learning_rate": 2.079299348981335e-06, "loss": 0.3127, "step": 13068 }, { "epoch": 1.5903863705506542, "grad_norm": 2.436783790588379, "learning_rate": 2.0781087371998433e-06, "loss": 0.3122, "step": 13069 }, { "epoch": 1.5905080620626713, "grad_norm": 2.7289369106292725, "learning_rate": 2.076918426862715e-06, "loss": 0.315, "step": 13070 }, { "epoch": 1.5906297535746883, "grad_norm": 1.6221330165863037, "learning_rate": 2.0757284180152392e-06, "loss": 0.3614, "step": 13071 }, { "epoch": 1.5907514450867053, "grad_norm": 1.865172028541565, "learning_rate": 2.074538710702697e-06, "loss": 0.3647, "step": 13072 }, { "epoch": 1.5908731365987223, "grad_norm": 1.4285074472427368, "learning_rate": 2.073349304970366e-06, "loss": 0.3416, "step": 13073 }, { "epoch": 1.5909948281107393, "grad_norm": 2.0247199535369873, "learning_rate": 2.0721602008634987e-06, "loss": 0.3166, "step": 13074 }, { "epoch": 1.5911165196227564, "grad_norm": 1.9108790159225464, "learning_rate": 2.070971398427345e-06, "loss": 0.3427, "step": 13075 }, { "epoch": 1.5912382111347734, "grad_norm": 1.8942264318466187, "learning_rate": 2.0697828977071456e-06, "loss": 0.3535, "step": 13076 }, { "epoch": 1.5913599026467904, "grad_norm": 3.7403979301452637, "learning_rate": 2.0685946987481165e-06, "loss": 0.4554, "step": 13077 }, { "epoch": 1.5914815941588074, "grad_norm": 2.674006700515747, "learning_rate": 2.0674068015954797e-06, "loss": 0.433, "step": 13078 }, { "epoch": 1.5916032856708244, "grad_norm": 2.5970053672790527, "learning_rate": 2.066219206294433e-06, "loss": 0.3701, "step": 13079 }, { "epoch": 1.5917249771828414, "grad_norm": 1.6705175638198853, "learning_rate": 2.0650319128901654e-06, "loss": 0.3817, "step": 13080 }, { "epoch": 1.5918466686948585, "grad_norm": 1.850736379623413, "learning_rate": 2.06384492142786e-06, "loss": 0.3169, "step": 13081 }, { "epoch": 1.5919683602068755, "grad_norm": 1.7765023708343506, "learning_rate": 2.062658231952682e-06, "loss": 0.4159, "step": 13082 }, { "epoch": 1.5920900517188925, "grad_norm": 1.430756688117981, "learning_rate": 2.061471844509786e-06, "loss": 0.366, "step": 13083 }, { "epoch": 1.5922117432309095, "grad_norm": 2.4478580951690674, "learning_rate": 2.0602857591443205e-06, "loss": 0.3344, "step": 13084 }, { "epoch": 1.5923334347429265, "grad_norm": 1.5864261388778687, "learning_rate": 2.059099975901416e-06, "loss": 0.3823, "step": 13085 }, { "epoch": 1.5924551262549436, "grad_norm": 1.5701196193695068, "learning_rate": 2.0579144948261918e-06, "loss": 0.3122, "step": 13086 }, { "epoch": 1.5925768177669608, "grad_norm": 1.8844201564788818, "learning_rate": 2.0567293159637624e-06, "loss": 0.3892, "step": 13087 }, { "epoch": 1.5926985092789778, "grad_norm": 1.6080620288848877, "learning_rate": 2.055544439359225e-06, "loss": 0.3795, "step": 13088 }, { "epoch": 1.5928202007909948, "grad_norm": 1.5697171688079834, "learning_rate": 2.0543598650576645e-06, "loss": 0.3777, "step": 13089 }, { "epoch": 1.5929418923030119, "grad_norm": 2.5041136741638184, "learning_rate": 2.0531755931041588e-06, "loss": 0.3119, "step": 13090 }, { "epoch": 1.5930635838150289, "grad_norm": 1.8614110946655273, "learning_rate": 2.0519916235437674e-06, "loss": 0.3164, "step": 13091 }, { "epoch": 1.593185275327046, "grad_norm": 3.230666160583496, "learning_rate": 2.05080795642155e-06, "loss": 0.3372, "step": 13092 }, { "epoch": 1.593306966839063, "grad_norm": 2.014282464981079, "learning_rate": 2.049624591782543e-06, "loss": 0.3377, "step": 13093 }, { "epoch": 1.5934286583510802, "grad_norm": 1.7611933946609497, "learning_rate": 2.048441529671774e-06, "loss": 0.3384, "step": 13094 }, { "epoch": 1.5935503498630972, "grad_norm": 2.6231980323791504, "learning_rate": 2.047258770134266e-06, "loss": 0.3954, "step": 13095 }, { "epoch": 1.5936720413751142, "grad_norm": 1.7613611221313477, "learning_rate": 2.046076313215024e-06, "loss": 0.4118, "step": 13096 }, { "epoch": 1.5937937328871312, "grad_norm": 1.9959487915039062, "learning_rate": 2.044894158959039e-06, "loss": 0.3672, "step": 13097 }, { "epoch": 1.5939154243991482, "grad_norm": 1.6007417440414429, "learning_rate": 2.0437123074112996e-06, "loss": 0.4135, "step": 13098 }, { "epoch": 1.5940371159111653, "grad_norm": 3.7402307987213135, "learning_rate": 2.0425307586167763e-06, "loss": 0.3884, "step": 13099 }, { "epoch": 1.5941588074231823, "grad_norm": 1.7162389755249023, "learning_rate": 2.0413495126204274e-06, "loss": 0.3883, "step": 13100 }, { "epoch": 1.5942804989351993, "grad_norm": 2.3073923587799072, "learning_rate": 2.0401685694672045e-06, "loss": 0.3392, "step": 13101 }, { "epoch": 1.5944021904472163, "grad_norm": 2.4966931343078613, "learning_rate": 2.03898792920204e-06, "loss": 0.4019, "step": 13102 }, { "epoch": 1.5945238819592333, "grad_norm": 1.4600239992141724, "learning_rate": 2.037807591869866e-06, "loss": 0.3528, "step": 13103 }, { "epoch": 1.5946455734712504, "grad_norm": 1.5368931293487549, "learning_rate": 2.0366275575155936e-06, "loss": 0.3938, "step": 13104 }, { "epoch": 1.5947672649832674, "grad_norm": 1.6008902788162231, "learning_rate": 2.035447826184124e-06, "loss": 0.3337, "step": 13105 }, { "epoch": 1.5948889564952844, "grad_norm": 2.228052854537964, "learning_rate": 2.0342683979203527e-06, "loss": 0.3756, "step": 13106 }, { "epoch": 1.5950106480073014, "grad_norm": 2.35479736328125, "learning_rate": 2.0330892727691564e-06, "loss": 0.363, "step": 13107 }, { "epoch": 1.5951323395193184, "grad_norm": 2.6611642837524414, "learning_rate": 2.031910450775402e-06, "loss": 0.4209, "step": 13108 }, { "epoch": 1.5952540310313355, "grad_norm": 1.578197956085205, "learning_rate": 2.03073193198395e-06, "loss": 0.3965, "step": 13109 }, { "epoch": 1.5953757225433525, "grad_norm": 1.3807129859924316, "learning_rate": 2.029553716439644e-06, "loss": 0.3383, "step": 13110 }, { "epoch": 1.5954974140553695, "grad_norm": 2.376840591430664, "learning_rate": 2.028375804187317e-06, "loss": 0.2854, "step": 13111 }, { "epoch": 1.5956191055673865, "grad_norm": 2.8759920597076416, "learning_rate": 2.027198195271791e-06, "loss": 0.3942, "step": 13112 }, { "epoch": 1.5957407970794037, "grad_norm": 2.0009405612945557, "learning_rate": 2.026020889737873e-06, "loss": 0.3133, "step": 13113 }, { "epoch": 1.5958624885914208, "grad_norm": 2.055126667022705, "learning_rate": 2.024843887630369e-06, "loss": 0.3896, "step": 13114 }, { "epoch": 1.5959841801034378, "grad_norm": 1.771736741065979, "learning_rate": 2.023667188994063e-06, "loss": 0.3711, "step": 13115 }, { "epoch": 1.5961058716154548, "grad_norm": 2.2798218727111816, "learning_rate": 2.022490793873727e-06, "loss": 0.3171, "step": 13116 }, { "epoch": 1.5962275631274718, "grad_norm": 1.9011422395706177, "learning_rate": 2.021314702314132e-06, "loss": 0.387, "step": 13117 }, { "epoch": 1.5963492546394888, "grad_norm": 3.230950117111206, "learning_rate": 2.020138914360027e-06, "loss": 0.4211, "step": 13118 }, { "epoch": 1.596470946151506, "grad_norm": 3.131316900253296, "learning_rate": 2.0189634300561513e-06, "loss": 0.3896, "step": 13119 }, { "epoch": 1.596592637663523, "grad_norm": 2.0703797340393066, "learning_rate": 2.01778824944724e-06, "loss": 0.2909, "step": 13120 }, { "epoch": 1.5967143291755401, "grad_norm": 2.714980125427246, "learning_rate": 2.016613372578005e-06, "loss": 0.4278, "step": 13121 }, { "epoch": 1.5968360206875571, "grad_norm": 2.3640944957733154, "learning_rate": 2.0154387994931613e-06, "loss": 0.3698, "step": 13122 }, { "epoch": 1.5969577121995742, "grad_norm": 1.8098775148391724, "learning_rate": 2.014264530237395e-06, "loss": 0.3759, "step": 13123 }, { "epoch": 1.5970794037115912, "grad_norm": 3.8114538192749023, "learning_rate": 2.0130905648553922e-06, "loss": 0.4176, "step": 13124 }, { "epoch": 1.5972010952236082, "grad_norm": 1.8549028635025024, "learning_rate": 2.011916903391826e-06, "loss": 0.3216, "step": 13125 }, { "epoch": 1.5973227867356252, "grad_norm": 2.009453535079956, "learning_rate": 2.0107435458913573e-06, "loss": 0.4127, "step": 13126 }, { "epoch": 1.5974444782476422, "grad_norm": 1.6705901622772217, "learning_rate": 2.0095704923986305e-06, "loss": 0.379, "step": 13127 }, { "epoch": 1.5975661697596593, "grad_norm": 1.6024035215377808, "learning_rate": 2.0083977429582892e-06, "loss": 0.3469, "step": 13128 }, { "epoch": 1.5976878612716763, "grad_norm": 1.6293784379959106, "learning_rate": 2.0072252976149508e-06, "loss": 0.3898, "step": 13129 }, { "epoch": 1.5978095527836933, "grad_norm": 1.6171514987945557, "learning_rate": 2.006053156413238e-06, "loss": 0.3543, "step": 13130 }, { "epoch": 1.5979312442957103, "grad_norm": 1.3208644390106201, "learning_rate": 2.004881319397749e-06, "loss": 0.3965, "step": 13131 }, { "epoch": 1.5980529358077273, "grad_norm": 1.501453161239624, "learning_rate": 2.003709786613073e-06, "loss": 0.3518, "step": 13132 }, { "epoch": 1.5981746273197444, "grad_norm": 1.4601036310195923, "learning_rate": 2.0025385581037927e-06, "loss": 0.3484, "step": 13133 }, { "epoch": 1.5982963188317614, "grad_norm": 1.8425116539001465, "learning_rate": 2.001367633914476e-06, "loss": 0.3267, "step": 13134 }, { "epoch": 1.5984180103437784, "grad_norm": 1.508164644241333, "learning_rate": 2.000197014089673e-06, "loss": 0.3286, "step": 13135 }, { "epoch": 1.5985397018557954, "grad_norm": 1.4628454446792603, "learning_rate": 1.999026698673935e-06, "loss": 0.3948, "step": 13136 }, { "epoch": 1.5986613933678124, "grad_norm": 2.1673059463500977, "learning_rate": 1.997856687711789e-06, "loss": 0.411, "step": 13137 }, { "epoch": 1.5987830848798297, "grad_norm": 2.268766164779663, "learning_rate": 1.9966869812477618e-06, "loss": 0.3785, "step": 13138 }, { "epoch": 1.5989047763918467, "grad_norm": 1.3966866731643677, "learning_rate": 1.9955175793263628e-06, "loss": 0.3734, "step": 13139 }, { "epoch": 1.5990264679038637, "grad_norm": 1.3291488885879517, "learning_rate": 1.9943484819920832e-06, "loss": 0.3357, "step": 13140 }, { "epoch": 1.5991481594158807, "grad_norm": 3.306013345718384, "learning_rate": 1.9931796892894186e-06, "loss": 0.2922, "step": 13141 }, { "epoch": 1.5992698509278978, "grad_norm": 1.9285842180252075, "learning_rate": 1.9920112012628403e-06, "loss": 0.4253, "step": 13142 }, { "epoch": 1.5993915424399148, "grad_norm": 1.5274900197982788, "learning_rate": 1.990843017956808e-06, "loss": 0.3867, "step": 13143 }, { "epoch": 1.599513233951932, "grad_norm": 1.4659581184387207, "learning_rate": 1.98967513941578e-06, "loss": 0.3609, "step": 13144 }, { "epoch": 1.599634925463949, "grad_norm": 2.233898878097534, "learning_rate": 1.9885075656841933e-06, "loss": 0.3883, "step": 13145 }, { "epoch": 1.599756616975966, "grad_norm": 1.4824124574661255, "learning_rate": 1.987340296806477e-06, "loss": 0.3154, "step": 13146 }, { "epoch": 1.599878308487983, "grad_norm": 2.120619297027588, "learning_rate": 1.986173332827047e-06, "loss": 0.3706, "step": 13147 }, { "epoch": 1.6, "grad_norm": 1.8847951889038086, "learning_rate": 1.985006673790307e-06, "loss": 0.356, "step": 13148 }, { "epoch": 1.600121691512017, "grad_norm": 1.7535452842712402, "learning_rate": 1.983840319740655e-06, "loss": 0.3707, "step": 13149 }, { "epoch": 1.6002433830240341, "grad_norm": 1.942562222480774, "learning_rate": 1.9826742707224735e-06, "loss": 0.404, "step": 13150 }, { "epoch": 1.6003650745360511, "grad_norm": 3.911184072494507, "learning_rate": 1.981508526780127e-06, "loss": 0.4187, "step": 13151 }, { "epoch": 1.6004867660480682, "grad_norm": 1.8654357194900513, "learning_rate": 1.980343087957981e-06, "loss": 0.3659, "step": 13152 }, { "epoch": 1.6006084575600852, "grad_norm": 2.3797109127044678, "learning_rate": 1.97917795430038e-06, "loss": 0.3535, "step": 13153 }, { "epoch": 1.6007301490721022, "grad_norm": 2.240915298461914, "learning_rate": 1.9780131258516578e-06, "loss": 0.3888, "step": 13154 }, { "epoch": 1.6008518405841192, "grad_norm": 1.3131917715072632, "learning_rate": 1.976848602656144e-06, "loss": 0.3645, "step": 13155 }, { "epoch": 1.6009735320961362, "grad_norm": 1.5870047807693481, "learning_rate": 1.9756843847581463e-06, "loss": 0.3661, "step": 13156 }, { "epoch": 1.6010952236081533, "grad_norm": 1.7342826128005981, "learning_rate": 1.9745204722019683e-06, "loss": 0.3082, "step": 13157 }, { "epoch": 1.6012169151201703, "grad_norm": 1.8618814945220947, "learning_rate": 1.973356865031898e-06, "loss": 0.4428, "step": 13158 }, { "epoch": 1.6013386066321873, "grad_norm": 1.919959545135498, "learning_rate": 1.9721935632922107e-06, "loss": 0.4016, "step": 13159 }, { "epoch": 1.6014602981442043, "grad_norm": 1.9391942024230957, "learning_rate": 1.9710305670271778e-06, "loss": 0.3585, "step": 13160 }, { "epoch": 1.6015819896562213, "grad_norm": 1.6574639081954956, "learning_rate": 1.969867876281051e-06, "loss": 0.3401, "step": 13161 }, { "epoch": 1.6017036811682384, "grad_norm": 3.038248062133789, "learning_rate": 1.9687054910980695e-06, "loss": 0.299, "step": 13162 }, { "epoch": 1.6018253726802556, "grad_norm": 1.6359108686447144, "learning_rate": 1.9675434115224713e-06, "loss": 0.4085, "step": 13163 }, { "epoch": 1.6019470641922726, "grad_norm": 2.4834847450256348, "learning_rate": 1.9663816375984725e-06, "loss": 0.3036, "step": 13164 }, { "epoch": 1.6020687557042896, "grad_norm": 3.4020845890045166, "learning_rate": 1.9652201693702775e-06, "loss": 0.3211, "step": 13165 }, { "epoch": 1.6021904472163067, "grad_norm": 1.8100526332855225, "learning_rate": 1.9640590068820896e-06, "loss": 0.4085, "step": 13166 }, { "epoch": 1.6023121387283237, "grad_norm": 1.6588760614395142, "learning_rate": 1.9628981501780897e-06, "loss": 0.3957, "step": 13167 }, { "epoch": 1.6024338302403407, "grad_norm": 1.6608449220657349, "learning_rate": 1.961737599302449e-06, "loss": 0.3919, "step": 13168 }, { "epoch": 1.602555521752358, "grad_norm": 2.2597062587738037, "learning_rate": 1.9605773542993357e-06, "loss": 0.3197, "step": 13169 }, { "epoch": 1.602677213264375, "grad_norm": 2.0825250148773193, "learning_rate": 1.95941741521289e-06, "loss": 0.3723, "step": 13170 }, { "epoch": 1.602798904776392, "grad_norm": 1.9156825542449951, "learning_rate": 1.958257782087256e-06, "loss": 0.314, "step": 13171 }, { "epoch": 1.602920596288409, "grad_norm": 1.633423924446106, "learning_rate": 1.9570984549665607e-06, "loss": 0.3614, "step": 13172 }, { "epoch": 1.603042287800426, "grad_norm": 1.5988203287124634, "learning_rate": 1.955939433894913e-06, "loss": 0.3831, "step": 13173 }, { "epoch": 1.603163979312443, "grad_norm": 1.5619373321533203, "learning_rate": 1.9547807189164236e-06, "loss": 0.3824, "step": 13174 }, { "epoch": 1.60328567082446, "grad_norm": 1.826727271080017, "learning_rate": 1.9536223100751793e-06, "loss": 0.4226, "step": 13175 }, { "epoch": 1.603407362336477, "grad_norm": 1.3738070726394653, "learning_rate": 1.95246420741526e-06, "loss": 0.3401, "step": 13176 }, { "epoch": 1.603529053848494, "grad_norm": 2.9618113040924072, "learning_rate": 1.951306410980738e-06, "loss": 0.3939, "step": 13177 }, { "epoch": 1.603650745360511, "grad_norm": 1.620686411857605, "learning_rate": 1.9501489208156654e-06, "loss": 0.318, "step": 13178 }, { "epoch": 1.6037724368725281, "grad_norm": 2.3203670978546143, "learning_rate": 1.9489917369640865e-06, "loss": 0.3824, "step": 13179 }, { "epoch": 1.6038941283845451, "grad_norm": 1.9192984104156494, "learning_rate": 1.9478348594700424e-06, "loss": 0.334, "step": 13180 }, { "epoch": 1.6040158198965622, "grad_norm": 1.9321638345718384, "learning_rate": 1.9466782883775437e-06, "loss": 0.4316, "step": 13181 }, { "epoch": 1.6041375114085792, "grad_norm": 2.7354652881622314, "learning_rate": 1.9455220237306085e-06, "loss": 0.4319, "step": 13182 }, { "epoch": 1.6042592029205962, "grad_norm": 3.1546716690063477, "learning_rate": 1.9443660655732312e-06, "loss": 0.4116, "step": 13183 }, { "epoch": 1.6043808944326132, "grad_norm": 2.883622169494629, "learning_rate": 1.943210413949398e-06, "loss": 0.3548, "step": 13184 }, { "epoch": 1.6045025859446302, "grad_norm": 2.8604094982147217, "learning_rate": 1.942055068903087e-06, "loss": 0.3278, "step": 13185 }, { "epoch": 1.6046242774566473, "grad_norm": 2.840311288833618, "learning_rate": 1.9409000304782588e-06, "loss": 0.4517, "step": 13186 }, { "epoch": 1.6047459689686643, "grad_norm": 1.6016236543655396, "learning_rate": 1.9397452987188646e-06, "loss": 0.3896, "step": 13187 }, { "epoch": 1.6048676604806815, "grad_norm": 1.4885835647583008, "learning_rate": 1.9385908736688475e-06, "loss": 0.3517, "step": 13188 }, { "epoch": 1.6049893519926985, "grad_norm": 2.3673877716064453, "learning_rate": 1.937436755372132e-06, "loss": 0.3725, "step": 13189 }, { "epoch": 1.6051110435047156, "grad_norm": 1.6779899597167969, "learning_rate": 1.9362829438726384e-06, "loss": 0.3373, "step": 13190 }, { "epoch": 1.6052327350167326, "grad_norm": 2.5539908409118652, "learning_rate": 1.9351294392142706e-06, "loss": 0.3235, "step": 13191 }, { "epoch": 1.6053544265287496, "grad_norm": 3.1633963584899902, "learning_rate": 1.93397624144092e-06, "loss": 0.4091, "step": 13192 }, { "epoch": 1.6054761180407666, "grad_norm": 1.5882463455200195, "learning_rate": 1.93282335059647e-06, "loss": 0.345, "step": 13193 }, { "epoch": 1.6055978095527836, "grad_norm": 1.4868581295013428, "learning_rate": 1.9316707667247893e-06, "loss": 0.3462, "step": 13194 }, { "epoch": 1.6057195010648009, "grad_norm": 2.319028615951538, "learning_rate": 1.930518489869734e-06, "loss": 0.421, "step": 13195 }, { "epoch": 1.605841192576818, "grad_norm": 1.7415883541107178, "learning_rate": 1.9293665200751545e-06, "loss": 0.4073, "step": 13196 }, { "epoch": 1.605962884088835, "grad_norm": 1.8426426649093628, "learning_rate": 1.9282148573848825e-06, "loss": 0.4099, "step": 13197 }, { "epoch": 1.606084575600852, "grad_norm": 1.710612416267395, "learning_rate": 1.9270635018427463e-06, "loss": 0.3859, "step": 13198 }, { "epoch": 1.606206267112869, "grad_norm": 2.2894039154052734, "learning_rate": 1.9259124534925533e-06, "loss": 0.3537, "step": 13199 }, { "epoch": 1.606327958624886, "grad_norm": 2.2786309719085693, "learning_rate": 1.9247617123781005e-06, "loss": 0.3482, "step": 13200 }, { "epoch": 1.606449650136903, "grad_norm": 1.9055657386779785, "learning_rate": 1.9236112785431825e-06, "loss": 0.3787, "step": 13201 }, { "epoch": 1.60657134164892, "grad_norm": 1.6727559566497803, "learning_rate": 1.9224611520315726e-06, "loss": 0.355, "step": 13202 }, { "epoch": 1.606693033160937, "grad_norm": 1.7929413318634033, "learning_rate": 1.921311332887036e-06, "loss": 0.3634, "step": 13203 }, { "epoch": 1.606814724672954, "grad_norm": 1.9333524703979492, "learning_rate": 1.9201618211533246e-06, "loss": 0.3852, "step": 13204 }, { "epoch": 1.606936416184971, "grad_norm": 1.8331294059753418, "learning_rate": 1.9190126168741776e-06, "loss": 0.4045, "step": 13205 }, { "epoch": 1.607058107696988, "grad_norm": 2.7449755668640137, "learning_rate": 1.9178637200933303e-06, "loss": 0.3119, "step": 13206 }, { "epoch": 1.6071797992090051, "grad_norm": 1.8609424829483032, "learning_rate": 1.9167151308544973e-06, "loss": 0.3977, "step": 13207 }, { "epoch": 1.6073014907210221, "grad_norm": 1.6123592853546143, "learning_rate": 1.9155668492013823e-06, "loss": 0.374, "step": 13208 }, { "epoch": 1.6074231822330391, "grad_norm": 2.7192881107330322, "learning_rate": 1.914418875177685e-06, "loss": 0.3939, "step": 13209 }, { "epoch": 1.6075448737450562, "grad_norm": 2.2225780487060547, "learning_rate": 1.9132712088270854e-06, "loss": 0.3548, "step": 13210 }, { "epoch": 1.6076665652570732, "grad_norm": 4.248149394989014, "learning_rate": 1.9121238501932537e-06, "loss": 0.4465, "step": 13211 }, { "epoch": 1.6077882567690902, "grad_norm": 1.9062058925628662, "learning_rate": 1.9109767993198513e-06, "loss": 0.3828, "step": 13212 }, { "epoch": 1.6079099482811072, "grad_norm": 2.6968767642974854, "learning_rate": 1.9098300562505266e-06, "loss": 0.4181, "step": 13213 }, { "epoch": 1.6080316397931245, "grad_norm": 1.5170238018035889, "learning_rate": 1.9086836210289107e-06, "loss": 0.3497, "step": 13214 }, { "epoch": 1.6081533313051415, "grad_norm": 2.2596681118011475, "learning_rate": 1.907537493698637e-06, "loss": 0.3901, "step": 13215 }, { "epoch": 1.6082750228171585, "grad_norm": 2.2486374378204346, "learning_rate": 1.9063916743033061e-06, "loss": 0.3655, "step": 13216 }, { "epoch": 1.6083967143291755, "grad_norm": 1.4816728830337524, "learning_rate": 1.905246162886527e-06, "loss": 0.364, "step": 13217 }, { "epoch": 1.6085184058411925, "grad_norm": 1.6641440391540527, "learning_rate": 1.904100959491888e-06, "loss": 0.3946, "step": 13218 }, { "epoch": 1.6086400973532096, "grad_norm": 1.7364550828933716, "learning_rate": 1.9029560641629618e-06, "loss": 0.4108, "step": 13219 }, { "epoch": 1.6087617888652268, "grad_norm": 1.4976606369018555, "learning_rate": 1.9018114769433193e-06, "loss": 0.3788, "step": 13220 }, { "epoch": 1.6088834803772438, "grad_norm": 1.3347288370132446, "learning_rate": 1.9006671978765135e-06, "loss": 0.365, "step": 13221 }, { "epoch": 1.6090051718892608, "grad_norm": 1.9878966808319092, "learning_rate": 1.8995232270060826e-06, "loss": 0.3747, "step": 13222 }, { "epoch": 1.6091268634012779, "grad_norm": 2.4338021278381348, "learning_rate": 1.898379564375562e-06, "loss": 0.3688, "step": 13223 }, { "epoch": 1.6092485549132949, "grad_norm": 1.8237535953521729, "learning_rate": 1.897236210028469e-06, "loss": 0.3574, "step": 13224 }, { "epoch": 1.609370246425312, "grad_norm": 2.5246663093566895, "learning_rate": 1.896093164008307e-06, "loss": 0.3475, "step": 13225 }, { "epoch": 1.609491937937329, "grad_norm": 2.1172122955322266, "learning_rate": 1.8949504263585804e-06, "loss": 0.3452, "step": 13226 }, { "epoch": 1.609613629449346, "grad_norm": 1.6885406970977783, "learning_rate": 1.8938079971227608e-06, "loss": 0.3291, "step": 13227 }, { "epoch": 1.609735320961363, "grad_norm": 2.7502851486206055, "learning_rate": 1.8926658763443284e-06, "loss": 0.3592, "step": 13228 }, { "epoch": 1.60985701247338, "grad_norm": 2.3753843307495117, "learning_rate": 1.89152406406674e-06, "loss": 0.3481, "step": 13229 }, { "epoch": 1.609978703985397, "grad_norm": 1.8891501426696777, "learning_rate": 1.8903825603334426e-06, "loss": 0.4036, "step": 13230 }, { "epoch": 1.610100395497414, "grad_norm": 2.0609076023101807, "learning_rate": 1.889241365187877e-06, "loss": 0.3969, "step": 13231 }, { "epoch": 1.610222087009431, "grad_norm": 2.677816390991211, "learning_rate": 1.8881004786734668e-06, "loss": 0.3087, "step": 13232 }, { "epoch": 1.610343778521448, "grad_norm": 1.7660808563232422, "learning_rate": 1.8869599008336203e-06, "loss": 0.332, "step": 13233 }, { "epoch": 1.610465470033465, "grad_norm": 1.5322084426879883, "learning_rate": 1.885819631711746e-06, "loss": 0.3374, "step": 13234 }, { "epoch": 1.610587161545482, "grad_norm": 2.8294854164123535, "learning_rate": 1.8846796713512305e-06, "loss": 0.4058, "step": 13235 }, { "epoch": 1.6107088530574991, "grad_norm": 1.9636582136154175, "learning_rate": 1.8835400197954479e-06, "loss": 0.3704, "step": 13236 }, { "epoch": 1.6108305445695161, "grad_norm": 1.8096963167190552, "learning_rate": 1.8824006770877712e-06, "loss": 0.3478, "step": 13237 }, { "epoch": 1.6109522360815332, "grad_norm": 1.3713093996047974, "learning_rate": 1.8812616432715503e-06, "loss": 0.3484, "step": 13238 }, { "epoch": 1.6110739275935504, "grad_norm": 4.010976314544678, "learning_rate": 1.8801229183901293e-06, "loss": 0.4626, "step": 13239 }, { "epoch": 1.6111956191055674, "grad_norm": 1.7447024583816528, "learning_rate": 1.8789845024868381e-06, "loss": 0.3495, "step": 13240 }, { "epoch": 1.6113173106175844, "grad_norm": 1.570378065109253, "learning_rate": 1.877846395604993e-06, "loss": 0.4024, "step": 13241 }, { "epoch": 1.6114390021296014, "grad_norm": 1.969346523284912, "learning_rate": 1.8767085977879085e-06, "loss": 0.3739, "step": 13242 }, { "epoch": 1.6115606936416185, "grad_norm": 2.5684289932250977, "learning_rate": 1.8755711090788753e-06, "loss": 0.4212, "step": 13243 }, { "epoch": 1.6116823851536355, "grad_norm": 1.56687593460083, "learning_rate": 1.8744339295211755e-06, "loss": 0.3667, "step": 13244 }, { "epoch": 1.6118040766656527, "grad_norm": 2.4005539417266846, "learning_rate": 1.8732970591580857e-06, "loss": 0.3319, "step": 13245 }, { "epoch": 1.6119257681776697, "grad_norm": 2.3146398067474365, "learning_rate": 1.8721604980328656e-06, "loss": 0.3721, "step": 13246 }, { "epoch": 1.6120474596896868, "grad_norm": 1.469888687133789, "learning_rate": 1.8710242461887584e-06, "loss": 0.3417, "step": 13247 }, { "epoch": 1.6121691512017038, "grad_norm": 1.7390164136886597, "learning_rate": 1.8698883036690075e-06, "loss": 0.3542, "step": 13248 }, { "epoch": 1.6122908427137208, "grad_norm": 1.7231286764144897, "learning_rate": 1.8687526705168356e-06, "loss": 0.3592, "step": 13249 }, { "epoch": 1.6124125342257378, "grad_norm": 1.5603148937225342, "learning_rate": 1.8676173467754544e-06, "loss": 0.3409, "step": 13250 }, { "epoch": 1.6125342257377548, "grad_norm": 2.4028549194335938, "learning_rate": 1.8664823324880677e-06, "loss": 0.3115, "step": 13251 }, { "epoch": 1.6126559172497719, "grad_norm": 1.8136959075927734, "learning_rate": 1.8653476276978599e-06, "loss": 0.3528, "step": 13252 }, { "epoch": 1.6127776087617889, "grad_norm": 1.9303573369979858, "learning_rate": 1.8642132324480156e-06, "loss": 0.361, "step": 13253 }, { "epoch": 1.612899300273806, "grad_norm": 1.7942339181900024, "learning_rate": 1.8630791467816979e-06, "loss": 0.379, "step": 13254 }, { "epoch": 1.613020991785823, "grad_norm": 1.3791202306747437, "learning_rate": 1.8619453707420586e-06, "loss": 0.3402, "step": 13255 }, { "epoch": 1.61314268329784, "grad_norm": 1.652978777885437, "learning_rate": 1.8608119043722462e-06, "loss": 0.3817, "step": 13256 }, { "epoch": 1.613264374809857, "grad_norm": 2.38712739944458, "learning_rate": 1.8596787477153844e-06, "loss": 0.3303, "step": 13257 }, { "epoch": 1.613386066321874, "grad_norm": 1.327331781387329, "learning_rate": 1.858545900814599e-06, "loss": 0.3295, "step": 13258 }, { "epoch": 1.613507757833891, "grad_norm": 2.4914231300354004, "learning_rate": 1.8574133637129932e-06, "loss": 0.4301, "step": 13259 }, { "epoch": 1.613629449345908, "grad_norm": 2.065092086791992, "learning_rate": 1.8562811364536614e-06, "loss": 0.3564, "step": 13260 }, { "epoch": 1.613751140857925, "grad_norm": 1.8509316444396973, "learning_rate": 1.855149219079694e-06, "loss": 0.344, "step": 13261 }, { "epoch": 1.613872832369942, "grad_norm": 2.01120924949646, "learning_rate": 1.8540176116341547e-06, "loss": 0.3999, "step": 13262 }, { "epoch": 1.613994523881959, "grad_norm": 2.0076661109924316, "learning_rate": 1.8528863141601038e-06, "loss": 0.3909, "step": 13263 }, { "epoch": 1.6141162153939763, "grad_norm": 2.4938056468963623, "learning_rate": 1.851755326700595e-06, "loss": 0.3445, "step": 13264 }, { "epoch": 1.6142379069059933, "grad_norm": 1.689375638961792, "learning_rate": 1.850624649298659e-06, "loss": 0.4045, "step": 13265 }, { "epoch": 1.6143595984180104, "grad_norm": 1.6277165412902832, "learning_rate": 1.8494942819973272e-06, "loss": 0.3972, "step": 13266 }, { "epoch": 1.6144812899300274, "grad_norm": 2.8080849647521973, "learning_rate": 1.8483642248396072e-06, "loss": 0.4208, "step": 13267 }, { "epoch": 1.6146029814420444, "grad_norm": 1.7079126834869385, "learning_rate": 1.8472344778685002e-06, "loss": 0.3858, "step": 13268 }, { "epoch": 1.6147246729540614, "grad_norm": 2.390308380126953, "learning_rate": 1.8461050411269975e-06, "loss": 0.4407, "step": 13269 }, { "epoch": 1.6148463644660787, "grad_norm": 3.835757255554199, "learning_rate": 1.844975914658077e-06, "loss": 0.3367, "step": 13270 }, { "epoch": 1.6149680559780957, "grad_norm": 1.788669466972351, "learning_rate": 1.8438470985046997e-06, "loss": 0.3264, "step": 13271 }, { "epoch": 1.6150897474901127, "grad_norm": 2.648322582244873, "learning_rate": 1.842718592709829e-06, "loss": 0.4316, "step": 13272 }, { "epoch": 1.6152114390021297, "grad_norm": 1.4995954036712646, "learning_rate": 1.841590397316394e-06, "loss": 0.3349, "step": 13273 }, { "epoch": 1.6153331305141467, "grad_norm": 1.5123441219329834, "learning_rate": 1.8404625123673336e-06, "loss": 0.3521, "step": 13274 }, { "epoch": 1.6154548220261638, "grad_norm": 1.4551340341567993, "learning_rate": 1.8393349379055647e-06, "loss": 0.3822, "step": 13275 }, { "epoch": 1.6155765135381808, "grad_norm": 1.5746889114379883, "learning_rate": 1.8382076739739907e-06, "loss": 0.3589, "step": 13276 }, { "epoch": 1.6156982050501978, "grad_norm": 2.464028835296631, "learning_rate": 1.8370807206155106e-06, "loss": 0.3583, "step": 13277 }, { "epoch": 1.6158198965622148, "grad_norm": 2.8867733478546143, "learning_rate": 1.8359540778730066e-06, "loss": 0.3368, "step": 13278 }, { "epoch": 1.6159415880742318, "grad_norm": 2.114790201187134, "learning_rate": 1.8348277457893449e-06, "loss": 0.3481, "step": 13279 }, { "epoch": 1.6160632795862488, "grad_norm": 1.7462207078933716, "learning_rate": 1.8337017244073907e-06, "loss": 0.392, "step": 13280 }, { "epoch": 1.6161849710982659, "grad_norm": 1.5940678119659424, "learning_rate": 1.83257601376999e-06, "loss": 0.3301, "step": 13281 }, { "epoch": 1.6163066626102829, "grad_norm": 1.5645939111709595, "learning_rate": 1.831450613919975e-06, "loss": 0.3141, "step": 13282 }, { "epoch": 1.6164283541223, "grad_norm": 1.8331551551818848, "learning_rate": 1.830325524900175e-06, "loss": 0.3663, "step": 13283 }, { "epoch": 1.616550045634317, "grad_norm": 2.1362781524658203, "learning_rate": 1.8292007467533978e-06, "loss": 0.4157, "step": 13284 }, { "epoch": 1.616671737146334, "grad_norm": 1.87754487991333, "learning_rate": 1.828076279522446e-06, "loss": 0.3922, "step": 13285 }, { "epoch": 1.616793428658351, "grad_norm": 1.4716932773590088, "learning_rate": 1.8269521232501065e-06, "loss": 0.3364, "step": 13286 }, { "epoch": 1.616915120170368, "grad_norm": 2.4856133460998535, "learning_rate": 1.8258282779791524e-06, "loss": 0.4281, "step": 13287 }, { "epoch": 1.617036811682385, "grad_norm": 1.9493603706359863, "learning_rate": 1.8247047437523557e-06, "loss": 0.3567, "step": 13288 }, { "epoch": 1.6171585031944022, "grad_norm": 3.3769569396972656, "learning_rate": 1.8235815206124653e-06, "loss": 0.4216, "step": 13289 }, { "epoch": 1.6172801947064193, "grad_norm": 1.5918591022491455, "learning_rate": 1.822458608602219e-06, "loss": 0.3517, "step": 13290 }, { "epoch": 1.6174018862184363, "grad_norm": 2.097656011581421, "learning_rate": 1.8213360077643527e-06, "loss": 0.3257, "step": 13291 }, { "epoch": 1.6175235777304533, "grad_norm": 1.5599029064178467, "learning_rate": 1.8202137181415802e-06, "loss": 0.3685, "step": 13292 }, { "epoch": 1.6176452692424703, "grad_norm": 2.296247959136963, "learning_rate": 1.819091739776604e-06, "loss": 0.4051, "step": 13293 }, { "epoch": 1.6177669607544873, "grad_norm": 1.7134901285171509, "learning_rate": 1.817970072712123e-06, "loss": 0.3444, "step": 13294 }, { "epoch": 1.6178886522665044, "grad_norm": 1.8877848386764526, "learning_rate": 1.8168487169908166e-06, "loss": 0.3817, "step": 13295 }, { "epoch": 1.6180103437785216, "grad_norm": 2.377495527267456, "learning_rate": 1.8157276726553552e-06, "loss": 0.3206, "step": 13296 }, { "epoch": 1.6181320352905386, "grad_norm": 1.5760836601257324, "learning_rate": 1.8146069397483956e-06, "loss": 0.356, "step": 13297 }, { "epoch": 1.6182537268025556, "grad_norm": 2.420151710510254, "learning_rate": 1.8134865183125828e-06, "loss": 0.3043, "step": 13298 }, { "epoch": 1.6183754183145727, "grad_norm": 1.7536336183547974, "learning_rate": 1.8123664083905556e-06, "loss": 0.3705, "step": 13299 }, { "epoch": 1.6184971098265897, "grad_norm": 1.9137368202209473, "learning_rate": 1.8112466100249337e-06, "loss": 0.3887, "step": 13300 }, { "epoch": 1.6186188013386067, "grad_norm": 2.0884597301483154, "learning_rate": 1.8101271232583263e-06, "loss": 0.3329, "step": 13301 }, { "epoch": 1.6187404928506237, "grad_norm": 2.6297354698181152, "learning_rate": 1.8090079481333357e-06, "loss": 0.395, "step": 13302 }, { "epoch": 1.6188621843626407, "grad_norm": 2.0360443592071533, "learning_rate": 1.8078890846925478e-06, "loss": 0.3607, "step": 13303 }, { "epoch": 1.6189838758746578, "grad_norm": 3.129077672958374, "learning_rate": 1.8067705329785334e-06, "loss": 0.3456, "step": 13304 }, { "epoch": 1.6191055673866748, "grad_norm": 1.5659916400909424, "learning_rate": 1.8056522930338627e-06, "loss": 0.3676, "step": 13305 }, { "epoch": 1.6192272588986918, "grad_norm": 1.5475733280181885, "learning_rate": 1.8045343649010838e-06, "loss": 0.3443, "step": 13306 }, { "epoch": 1.6193489504107088, "grad_norm": 3.415072202682495, "learning_rate": 1.803416748622736e-06, "loss": 0.4651, "step": 13307 }, { "epoch": 1.6194706419227258, "grad_norm": 1.8677818775177002, "learning_rate": 1.8022994442413466e-06, "loss": 0.4125, "step": 13308 }, { "epoch": 1.6195923334347428, "grad_norm": 1.4603604078292847, "learning_rate": 1.801182451799428e-06, "loss": 0.3511, "step": 13309 }, { "epoch": 1.6197140249467599, "grad_norm": 1.7580389976501465, "learning_rate": 1.800065771339492e-06, "loss": 0.3603, "step": 13310 }, { "epoch": 1.6198357164587769, "grad_norm": 2.6074488162994385, "learning_rate": 1.7989494029040255e-06, "loss": 0.4099, "step": 13311 }, { "epoch": 1.619957407970794, "grad_norm": 1.353723406791687, "learning_rate": 1.797833346535507e-06, "loss": 0.3365, "step": 13312 }, { "epoch": 1.620079099482811, "grad_norm": 1.5630590915679932, "learning_rate": 1.79671760227641e-06, "loss": 0.2888, "step": 13313 }, { "epoch": 1.6202007909948282, "grad_norm": 2.4240448474884033, "learning_rate": 1.7956021701691873e-06, "loss": 0.4259, "step": 13314 }, { "epoch": 1.6203224825068452, "grad_norm": 3.40415620803833, "learning_rate": 1.7944870502562827e-06, "loss": 0.4158, "step": 13315 }, { "epoch": 1.6204441740188622, "grad_norm": 1.6393496990203857, "learning_rate": 1.7933722425801326e-06, "loss": 0.3731, "step": 13316 }, { "epoch": 1.6205658655308792, "grad_norm": 3.0124433040618896, "learning_rate": 1.7922577471831526e-06, "loss": 0.4152, "step": 13317 }, { "epoch": 1.6206875570428962, "grad_norm": 1.7624109983444214, "learning_rate": 1.791143564107759e-06, "loss": 0.3863, "step": 13318 }, { "epoch": 1.6208092485549133, "grad_norm": 1.688957691192627, "learning_rate": 1.7900296933963424e-06, "loss": 0.3945, "step": 13319 }, { "epoch": 1.6209309400669303, "grad_norm": 2.223710536956787, "learning_rate": 1.7889161350912876e-06, "loss": 0.4087, "step": 13320 }, { "epoch": 1.6210526315789475, "grad_norm": 4.020733833312988, "learning_rate": 1.7878028892349719e-06, "loss": 0.2963, "step": 13321 }, { "epoch": 1.6211743230909645, "grad_norm": 1.4559962749481201, "learning_rate": 1.7866899558697549e-06, "loss": 0.3353, "step": 13322 }, { "epoch": 1.6212960146029816, "grad_norm": 3.1958560943603516, "learning_rate": 1.7855773350379824e-06, "loss": 0.3114, "step": 13323 }, { "epoch": 1.6214177061149986, "grad_norm": 1.6606889963150024, "learning_rate": 1.7844650267819973e-06, "loss": 0.3654, "step": 13324 }, { "epoch": 1.6215393976270156, "grad_norm": 1.6357158422470093, "learning_rate": 1.7833530311441215e-06, "loss": 0.3343, "step": 13325 }, { "epoch": 1.6216610891390326, "grad_norm": 2.2711021900177, "learning_rate": 1.7822413481666733e-06, "loss": 0.4024, "step": 13326 }, { "epoch": 1.6217827806510496, "grad_norm": 4.657652378082275, "learning_rate": 1.781129977891951e-06, "loss": 0.4325, "step": 13327 }, { "epoch": 1.6219044721630667, "grad_norm": 1.7236744165420532, "learning_rate": 1.780018920362243e-06, "loss": 0.3315, "step": 13328 }, { "epoch": 1.6220261636750837, "grad_norm": 2.7690746784210205, "learning_rate": 1.7789081756198324e-06, "loss": 0.3449, "step": 13329 }, { "epoch": 1.6221478551871007, "grad_norm": 1.634344220161438, "learning_rate": 1.7777977437069838e-06, "loss": 0.3698, "step": 13330 }, { "epoch": 1.6222695466991177, "grad_norm": 1.9576445817947388, "learning_rate": 1.7766876246659458e-06, "loss": 0.3291, "step": 13331 }, { "epoch": 1.6223912382111347, "grad_norm": 1.6200907230377197, "learning_rate": 1.775577818538967e-06, "loss": 0.3436, "step": 13332 }, { "epoch": 1.6225129297231518, "grad_norm": 2.087721586227417, "learning_rate": 1.7744683253682737e-06, "loss": 0.3358, "step": 13333 }, { "epoch": 1.6226346212351688, "grad_norm": 1.6513155698776245, "learning_rate": 1.7733591451960896e-06, "loss": 0.3475, "step": 13334 }, { "epoch": 1.6227563127471858, "grad_norm": 2.0104594230651855, "learning_rate": 1.7722502780646178e-06, "loss": 0.3424, "step": 13335 }, { "epoch": 1.6228780042592028, "grad_norm": 1.5387104749679565, "learning_rate": 1.77114172401605e-06, "loss": 0.3474, "step": 13336 }, { "epoch": 1.6229996957712198, "grad_norm": 1.7255566120147705, "learning_rate": 1.7700334830925758e-06, "loss": 0.3607, "step": 13337 }, { "epoch": 1.6231213872832368, "grad_norm": 2.8821768760681152, "learning_rate": 1.7689255553363627e-06, "loss": 0.4013, "step": 13338 }, { "epoch": 1.6232430787952539, "grad_norm": 1.585805058479309, "learning_rate": 1.7678179407895667e-06, "loss": 0.3556, "step": 13339 }, { "epoch": 1.623364770307271, "grad_norm": 1.6983035802841187, "learning_rate": 1.7667106394943413e-06, "loss": 0.3919, "step": 13340 }, { "epoch": 1.6234864618192881, "grad_norm": 2.624363422393799, "learning_rate": 1.7656036514928166e-06, "loss": 0.3028, "step": 13341 }, { "epoch": 1.6236081533313051, "grad_norm": 1.7962149381637573, "learning_rate": 1.764496976827118e-06, "loss": 0.3503, "step": 13342 }, { "epoch": 1.6237298448433222, "grad_norm": 1.9360661506652832, "learning_rate": 1.7633906155393566e-06, "loss": 0.3293, "step": 13343 }, { "epoch": 1.6238515363553392, "grad_norm": 2.4604246616363525, "learning_rate": 1.7622845676716271e-06, "loss": 0.3854, "step": 13344 }, { "epoch": 1.6239732278673562, "grad_norm": 1.814049243927002, "learning_rate": 1.761178833266024e-06, "loss": 0.3753, "step": 13345 }, { "epoch": 1.6240949193793734, "grad_norm": 2.614799737930298, "learning_rate": 1.7600734123646202e-06, "loss": 0.4417, "step": 13346 }, { "epoch": 1.6242166108913905, "grad_norm": 3.1902172565460205, "learning_rate": 1.7589683050094763e-06, "loss": 0.4114, "step": 13347 }, { "epoch": 1.6243383024034075, "grad_norm": 1.580715298652649, "learning_rate": 1.757863511242649e-06, "loss": 0.3327, "step": 13348 }, { "epoch": 1.6244599939154245, "grad_norm": 1.7152653932571411, "learning_rate": 1.7567590311061743e-06, "loss": 0.3681, "step": 13349 }, { "epoch": 1.6245816854274415, "grad_norm": 1.7957149744033813, "learning_rate": 1.755654864642079e-06, "loss": 0.3784, "step": 13350 }, { "epoch": 1.6247033769394585, "grad_norm": 1.749819278717041, "learning_rate": 1.7545510118923847e-06, "loss": 0.3579, "step": 13351 }, { "epoch": 1.6248250684514756, "grad_norm": 1.7374850511550903, "learning_rate": 1.7534474728990902e-06, "loss": 0.3683, "step": 13352 }, { "epoch": 1.6249467599634926, "grad_norm": 1.8143401145935059, "learning_rate": 1.7523442477041885e-06, "loss": 0.386, "step": 13353 }, { "epoch": 1.6250684514755096, "grad_norm": 1.9732637405395508, "learning_rate": 1.7512413363496606e-06, "loss": 0.3592, "step": 13354 }, { "epoch": 1.6251901429875266, "grad_norm": 2.349351167678833, "learning_rate": 1.7501387388774716e-06, "loss": 0.3869, "step": 13355 }, { "epoch": 1.6253118344995436, "grad_norm": 2.04888653755188, "learning_rate": 1.7490364553295825e-06, "loss": 0.3378, "step": 13356 }, { "epoch": 1.6254335260115607, "grad_norm": 1.7062122821807861, "learning_rate": 1.7479344857479342e-06, "loss": 0.3666, "step": 13357 }, { "epoch": 1.6255552175235777, "grad_norm": 1.9820282459259033, "learning_rate": 1.7468328301744564e-06, "loss": 0.3677, "step": 13358 }, { "epoch": 1.6256769090355947, "grad_norm": 2.2650251388549805, "learning_rate": 1.7457314886510756e-06, "loss": 0.3629, "step": 13359 }, { "epoch": 1.6257986005476117, "grad_norm": 1.5225366353988647, "learning_rate": 1.7446304612196973e-06, "loss": 0.396, "step": 13360 }, { "epoch": 1.6259202920596287, "grad_norm": 1.715078592300415, "learning_rate": 1.7435297479222157e-06, "loss": 0.3218, "step": 13361 }, { "epoch": 1.6260419835716458, "grad_norm": 2.031444787979126, "learning_rate": 1.7424293488005196e-06, "loss": 0.3531, "step": 13362 }, { "epoch": 1.6261636750836628, "grad_norm": 1.4962328672409058, "learning_rate": 1.7413292638964773e-06, "loss": 0.3546, "step": 13363 }, { "epoch": 1.6262853665956798, "grad_norm": 2.2736563682556152, "learning_rate": 1.7402294932519525e-06, "loss": 0.4149, "step": 13364 }, { "epoch": 1.626407058107697, "grad_norm": 2.057826042175293, "learning_rate": 1.7391300369087928e-06, "loss": 0.3686, "step": 13365 }, { "epoch": 1.626528749619714, "grad_norm": 4.780618667602539, "learning_rate": 1.73803089490883e-06, "loss": 0.4655, "step": 13366 }, { "epoch": 1.626650441131731, "grad_norm": 1.945212721824646, "learning_rate": 1.736932067293896e-06, "loss": 0.413, "step": 13367 }, { "epoch": 1.626772132643748, "grad_norm": 2.0224502086639404, "learning_rate": 1.7358335541058013e-06, "loss": 0.3933, "step": 13368 }, { "epoch": 1.6268938241557651, "grad_norm": 2.113107919692993, "learning_rate": 1.7347353553863433e-06, "loss": 0.3167, "step": 13369 }, { "epoch": 1.6270155156677821, "grad_norm": 1.8089020252227783, "learning_rate": 1.7336374711773152e-06, "loss": 0.3683, "step": 13370 }, { "epoch": 1.6271372071797994, "grad_norm": 2.2153992652893066, "learning_rate": 1.7325399015204913e-06, "loss": 0.3509, "step": 13371 }, { "epoch": 1.6272588986918164, "grad_norm": 1.6916420459747314, "learning_rate": 1.7314426464576351e-06, "loss": 0.4115, "step": 13372 }, { "epoch": 1.6273805902038334, "grad_norm": 2.248929023742676, "learning_rate": 1.730345706030503e-06, "loss": 0.3691, "step": 13373 }, { "epoch": 1.6275022817158504, "grad_norm": 2.0479023456573486, "learning_rate": 1.7292490802808349e-06, "loss": 0.3565, "step": 13374 }, { "epoch": 1.6276239732278674, "grad_norm": 2.097653388977051, "learning_rate": 1.7281527692503553e-06, "loss": 0.4014, "step": 13375 }, { "epoch": 1.6277456647398845, "grad_norm": 1.6671005487442017, "learning_rate": 1.7270567729807897e-06, "loss": 0.3532, "step": 13376 }, { "epoch": 1.6278673562519015, "grad_norm": 1.3938034772872925, "learning_rate": 1.7259610915138336e-06, "loss": 0.3799, "step": 13377 }, { "epoch": 1.6279890477639185, "grad_norm": 1.6788058280944824, "learning_rate": 1.724865724891187e-06, "loss": 0.3988, "step": 13378 }, { "epoch": 1.6281107392759355, "grad_norm": 3.041322708129883, "learning_rate": 1.7237706731545278e-06, "loss": 0.2951, "step": 13379 }, { "epoch": 1.6282324307879525, "grad_norm": 2.0745925903320312, "learning_rate": 1.7226759363455225e-06, "loss": 0.337, "step": 13380 }, { "epoch": 1.6283541222999696, "grad_norm": 2.732539176940918, "learning_rate": 1.7215815145058335e-06, "loss": 0.476, "step": 13381 }, { "epoch": 1.6284758138119866, "grad_norm": 1.6170086860656738, "learning_rate": 1.7204874076771038e-06, "loss": 0.3839, "step": 13382 }, { "epoch": 1.6285975053240036, "grad_norm": 2.6791563034057617, "learning_rate": 1.7193936159009627e-06, "loss": 0.4347, "step": 13383 }, { "epoch": 1.6287191968360206, "grad_norm": 2.8652350902557373, "learning_rate": 1.718300139219037e-06, "loss": 0.401, "step": 13384 }, { "epoch": 1.6288408883480376, "grad_norm": 1.598162293434143, "learning_rate": 1.7172069776729305e-06, "loss": 0.3853, "step": 13385 }, { "epoch": 1.6289625798600547, "grad_norm": 1.6476985216140747, "learning_rate": 1.7161141313042463e-06, "loss": 0.3461, "step": 13386 }, { "epoch": 1.6290842713720717, "grad_norm": 1.8403626680374146, "learning_rate": 1.7150216001545684e-06, "loss": 0.396, "step": 13387 }, { "epoch": 1.6292059628840887, "grad_norm": 2.9103140830993652, "learning_rate": 1.7139293842654625e-06, "loss": 0.4184, "step": 13388 }, { "epoch": 1.6293276543961057, "grad_norm": 2.9887588024139404, "learning_rate": 1.7128374836784967e-06, "loss": 0.423, "step": 13389 }, { "epoch": 1.629449345908123, "grad_norm": 2.2097301483154297, "learning_rate": 1.7117458984352186e-06, "loss": 0.313, "step": 13390 }, { "epoch": 1.62957103742014, "grad_norm": 1.5335193872451782, "learning_rate": 1.7106546285771618e-06, "loss": 0.376, "step": 13391 }, { "epoch": 1.629692728932157, "grad_norm": 2.8846802711486816, "learning_rate": 1.7095636741458576e-06, "loss": 0.3729, "step": 13392 }, { "epoch": 1.629814420444174, "grad_norm": 1.8995273113250732, "learning_rate": 1.7084730351828138e-06, "loss": 0.3517, "step": 13393 }, { "epoch": 1.629936111956191, "grad_norm": 2.5518577098846436, "learning_rate": 1.7073827117295349e-06, "loss": 0.3734, "step": 13394 }, { "epoch": 1.630057803468208, "grad_norm": 2.041529417037964, "learning_rate": 1.706292703827509e-06, "loss": 0.4001, "step": 13395 }, { "epoch": 1.630179494980225, "grad_norm": 1.9116995334625244, "learning_rate": 1.7052030115182105e-06, "loss": 0.4086, "step": 13396 }, { "epoch": 1.6303011864922423, "grad_norm": 1.8903470039367676, "learning_rate": 1.7041136348431087e-06, "loss": 0.3801, "step": 13397 }, { "epoch": 1.6304228780042593, "grad_norm": 1.2369699478149414, "learning_rate": 1.7030245738436547e-06, "loss": 0.3393, "step": 13398 }, { "epoch": 1.6305445695162764, "grad_norm": 2.136042833328247, "learning_rate": 1.7019358285612897e-06, "loss": 0.3791, "step": 13399 }, { "epoch": 1.6306662610282934, "grad_norm": 2.6420884132385254, "learning_rate": 1.700847399037443e-06, "loss": 0.3927, "step": 13400 }, { "epoch": 1.6307879525403104, "grad_norm": 1.8712613582611084, "learning_rate": 1.699759285313528e-06, "loss": 0.3222, "step": 13401 }, { "epoch": 1.6309096440523274, "grad_norm": 1.794637680053711, "learning_rate": 1.698671487430955e-06, "loss": 0.3563, "step": 13402 }, { "epoch": 1.6310313355643444, "grad_norm": 1.964385747909546, "learning_rate": 1.6975840054311143e-06, "loss": 0.3008, "step": 13403 }, { "epoch": 1.6311530270763615, "grad_norm": 1.9887021780014038, "learning_rate": 1.696496839355386e-06, "loss": 0.3737, "step": 13404 }, { "epoch": 1.6312747185883785, "grad_norm": 1.9794068336486816, "learning_rate": 1.6954099892451426e-06, "loss": 0.3321, "step": 13405 }, { "epoch": 1.6313964101003955, "grad_norm": 1.858148455619812, "learning_rate": 1.6943234551417375e-06, "loss": 0.4109, "step": 13406 }, { "epoch": 1.6315181016124125, "grad_norm": 1.4207062721252441, "learning_rate": 1.6932372370865147e-06, "loss": 0.3746, "step": 13407 }, { "epoch": 1.6316397931244295, "grad_norm": 1.9442638158798218, "learning_rate": 1.692151335120812e-06, "loss": 0.4241, "step": 13408 }, { "epoch": 1.6317614846364465, "grad_norm": 2.0604519844055176, "learning_rate": 1.6910657492859473e-06, "loss": 0.3387, "step": 13409 }, { "epoch": 1.6318831761484636, "grad_norm": 1.3367180824279785, "learning_rate": 1.6899804796232288e-06, "loss": 0.3026, "step": 13410 }, { "epoch": 1.6320048676604806, "grad_norm": 1.6336756944656372, "learning_rate": 1.6888955261739549e-06, "loss": 0.4121, "step": 13411 }, { "epoch": 1.6321265591724976, "grad_norm": 2.9144794940948486, "learning_rate": 1.6878108889794065e-06, "loss": 0.2901, "step": 13412 }, { "epoch": 1.6322482506845146, "grad_norm": 2.1106197834014893, "learning_rate": 1.6867265680808608e-06, "loss": 0.3281, "step": 13413 }, { "epoch": 1.6323699421965316, "grad_norm": 1.767088770866394, "learning_rate": 1.6856425635195771e-06, "loss": 0.3519, "step": 13414 }, { "epoch": 1.6324916337085489, "grad_norm": 1.462937593460083, "learning_rate": 1.6845588753368014e-06, "loss": 0.3725, "step": 13415 }, { "epoch": 1.632613325220566, "grad_norm": 1.2741680145263672, "learning_rate": 1.6834755035737749e-06, "loss": 0.3075, "step": 13416 }, { "epoch": 1.632735016732583, "grad_norm": 1.4983042478561401, "learning_rate": 1.6823924482717192e-06, "loss": 0.3593, "step": 13417 }, { "epoch": 1.6328567082446, "grad_norm": 1.7222654819488525, "learning_rate": 1.6813097094718455e-06, "loss": 0.3305, "step": 13418 }, { "epoch": 1.632978399756617, "grad_norm": 2.429203748703003, "learning_rate": 1.680227287215358e-06, "loss": 0.403, "step": 13419 }, { "epoch": 1.633100091268634, "grad_norm": 1.4949259757995605, "learning_rate": 1.6791451815434445e-06, "loss": 0.3288, "step": 13420 }, { "epoch": 1.633221782780651, "grad_norm": 2.1316640377044678, "learning_rate": 1.6780633924972766e-06, "loss": 0.3838, "step": 13421 }, { "epoch": 1.6333434742926682, "grad_norm": 1.9163856506347656, "learning_rate": 1.6769819201180271e-06, "loss": 0.356, "step": 13422 }, { "epoch": 1.6334651658046853, "grad_norm": 1.452164888381958, "learning_rate": 1.6759007644468384e-06, "loss": 0.3316, "step": 13423 }, { "epoch": 1.6335868573167023, "grad_norm": 2.5675241947174072, "learning_rate": 1.6748199255248576e-06, "loss": 0.4283, "step": 13424 }, { "epoch": 1.6337085488287193, "grad_norm": 2.2385644912719727, "learning_rate": 1.6737394033932108e-06, "loss": 0.3341, "step": 13425 }, { "epoch": 1.6338302403407363, "grad_norm": 1.9260796308517456, "learning_rate": 1.6726591980930117e-06, "loss": 0.4016, "step": 13426 }, { "epoch": 1.6339519318527533, "grad_norm": 2.012775421142578, "learning_rate": 1.6715793096653698e-06, "loss": 0.4048, "step": 13427 }, { "epoch": 1.6340736233647704, "grad_norm": 1.357103705406189, "learning_rate": 1.6704997381513744e-06, "loss": 0.3463, "step": 13428 }, { "epoch": 1.6341953148767874, "grad_norm": 2.2745702266693115, "learning_rate": 1.6694204835921014e-06, "loss": 0.3746, "step": 13429 }, { "epoch": 1.6343170063888044, "grad_norm": 1.7849301099777222, "learning_rate": 1.6683415460286256e-06, "loss": 0.3575, "step": 13430 }, { "epoch": 1.6344386979008214, "grad_norm": 1.900254726409912, "learning_rate": 1.667262925501999e-06, "loss": 0.3847, "step": 13431 }, { "epoch": 1.6345603894128384, "grad_norm": 2.481780767440796, "learning_rate": 1.6661846220532641e-06, "loss": 0.3043, "step": 13432 }, { "epoch": 1.6346820809248555, "grad_norm": 1.8348278999328613, "learning_rate": 1.665106635723457e-06, "loss": 0.3284, "step": 13433 }, { "epoch": 1.6348037724368725, "grad_norm": 3.7086610794067383, "learning_rate": 1.6640289665535935e-06, "loss": 0.3311, "step": 13434 }, { "epoch": 1.6349254639488895, "grad_norm": 2.66515851020813, "learning_rate": 1.6629516145846836e-06, "loss": 0.4147, "step": 13435 }, { "epoch": 1.6350471554609065, "grad_norm": 1.8951871395111084, "learning_rate": 1.6618745798577207e-06, "loss": 0.3318, "step": 13436 }, { "epoch": 1.6351688469729235, "grad_norm": 2.4279592037200928, "learning_rate": 1.6607978624136868e-06, "loss": 0.3737, "step": 13437 }, { "epoch": 1.6352905384849405, "grad_norm": 1.6277490854263306, "learning_rate": 1.6597214622935576e-06, "loss": 0.3477, "step": 13438 }, { "epoch": 1.6354122299969576, "grad_norm": 1.6486586332321167, "learning_rate": 1.6586453795382906e-06, "loss": 0.3532, "step": 13439 }, { "epoch": 1.6355339215089746, "grad_norm": 1.428605079650879, "learning_rate": 1.6575696141888298e-06, "loss": 0.3293, "step": 13440 }, { "epoch": 1.6356556130209918, "grad_norm": 3.075319528579712, "learning_rate": 1.6564941662861156e-06, "loss": 0.4263, "step": 13441 }, { "epoch": 1.6357773045330088, "grad_norm": 2.772754192352295, "learning_rate": 1.6554190358710688e-06, "loss": 0.4277, "step": 13442 }, { "epoch": 1.6358989960450259, "grad_norm": 2.264282703399658, "learning_rate": 1.6543442229845973e-06, "loss": 0.3724, "step": 13443 }, { "epoch": 1.6360206875570429, "grad_norm": 2.1718485355377197, "learning_rate": 1.6532697276676056e-06, "loss": 0.3485, "step": 13444 }, { "epoch": 1.63614237906906, "grad_norm": 1.5994768142700195, "learning_rate": 1.6521955499609776e-06, "loss": 0.3993, "step": 13445 }, { "epoch": 1.636264070581077, "grad_norm": 2.743475914001465, "learning_rate": 1.651121689905587e-06, "loss": 0.4191, "step": 13446 }, { "epoch": 1.6363857620930942, "grad_norm": 1.5631766319274902, "learning_rate": 1.650048147542298e-06, "loss": 0.3511, "step": 13447 }, { "epoch": 1.6365074536051112, "grad_norm": 1.707613468170166, "learning_rate": 1.648974922911958e-06, "loss": 0.3621, "step": 13448 }, { "epoch": 1.6366291451171282, "grad_norm": 2.5252771377563477, "learning_rate": 1.6479020160554093e-06, "loss": 0.3122, "step": 13449 }, { "epoch": 1.6367508366291452, "grad_norm": 1.849367380142212, "learning_rate": 1.6468294270134777e-06, "loss": 0.3764, "step": 13450 }, { "epoch": 1.6368725281411622, "grad_norm": 2.094403028488159, "learning_rate": 1.6457571558269747e-06, "loss": 0.3988, "step": 13451 }, { "epoch": 1.6369942196531793, "grad_norm": 3.9948136806488037, "learning_rate": 1.6446852025367055e-06, "loss": 0.3298, "step": 13452 }, { "epoch": 1.6371159111651963, "grad_norm": 1.7776210308074951, "learning_rate": 1.6436135671834574e-06, "loss": 0.3812, "step": 13453 }, { "epoch": 1.6372376026772133, "grad_norm": 1.5117802619934082, "learning_rate": 1.6425422498080112e-06, "loss": 0.3159, "step": 13454 }, { "epoch": 1.6373592941892303, "grad_norm": 2.207461357116699, "learning_rate": 1.641471250451132e-06, "loss": 0.3026, "step": 13455 }, { "epoch": 1.6374809857012473, "grad_norm": 1.9573259353637695, "learning_rate": 1.6404005691535707e-06, "loss": 0.3378, "step": 13456 }, { "epoch": 1.6376026772132644, "grad_norm": 2.1857869625091553, "learning_rate": 1.6393302059560756e-06, "loss": 0.3382, "step": 13457 }, { "epoch": 1.6377243687252814, "grad_norm": 1.6931297779083252, "learning_rate": 1.6382601608993698e-06, "loss": 0.3608, "step": 13458 }, { "epoch": 1.6378460602372984, "grad_norm": 1.9838147163391113, "learning_rate": 1.6371904340241694e-06, "loss": 0.3602, "step": 13459 }, { "epoch": 1.6379677517493154, "grad_norm": 2.7030320167541504, "learning_rate": 1.6361210253711868e-06, "loss": 0.3968, "step": 13460 }, { "epoch": 1.6380894432613324, "grad_norm": 1.8258942365646362, "learning_rate": 1.6350519349811078e-06, "loss": 0.3636, "step": 13461 }, { "epoch": 1.6382111347733495, "grad_norm": 1.7149296998977661, "learning_rate": 1.6339831628946202e-06, "loss": 0.3665, "step": 13462 }, { "epoch": 1.6383328262853665, "grad_norm": 2.036803722381592, "learning_rate": 1.63291470915239e-06, "loss": 0.3888, "step": 13463 }, { "epoch": 1.6384545177973835, "grad_norm": 3.1245319843292236, "learning_rate": 1.6318465737950718e-06, "loss": 0.4389, "step": 13464 }, { "epoch": 1.6385762093094005, "grad_norm": 1.8545536994934082, "learning_rate": 1.630778756863315e-06, "loss": 0.4056, "step": 13465 }, { "epoch": 1.6386979008214178, "grad_norm": 2.2261717319488525, "learning_rate": 1.6297112583977493e-06, "loss": 0.363, "step": 13466 }, { "epoch": 1.6388195923334348, "grad_norm": 2.0839240550994873, "learning_rate": 1.6286440784389934e-06, "loss": 0.3556, "step": 13467 }, { "epoch": 1.6389412838454518, "grad_norm": 1.6345210075378418, "learning_rate": 1.6275772170276638e-06, "loss": 0.3517, "step": 13468 }, { "epoch": 1.6390629753574688, "grad_norm": 2.0055270195007324, "learning_rate": 1.6265106742043446e-06, "loss": 0.3985, "step": 13469 }, { "epoch": 1.6391846668694858, "grad_norm": 1.2477595806121826, "learning_rate": 1.62544445000963e-06, "loss": 0.3402, "step": 13470 }, { "epoch": 1.6393063583815028, "grad_norm": 2.4617700576782227, "learning_rate": 1.6243785444840888e-06, "loss": 0.4238, "step": 13471 }, { "epoch": 1.63942804989352, "grad_norm": 1.89751398563385, "learning_rate": 1.623312957668277e-06, "loss": 0.4055, "step": 13472 }, { "epoch": 1.639549741405537, "grad_norm": 1.828223705291748, "learning_rate": 1.6222476896027495e-06, "loss": 0.349, "step": 13473 }, { "epoch": 1.6396714329175541, "grad_norm": 1.4702894687652588, "learning_rate": 1.6211827403280378e-06, "loss": 0.3763, "step": 13474 }, { "epoch": 1.6397931244295711, "grad_norm": 3.080322504043579, "learning_rate": 1.620118109884663e-06, "loss": 0.4438, "step": 13475 }, { "epoch": 1.6399148159415882, "grad_norm": 2.0052330493927, "learning_rate": 1.6190537983131428e-06, "loss": 0.3738, "step": 13476 }, { "epoch": 1.6400365074536052, "grad_norm": 1.811352252960205, "learning_rate": 1.6179898056539734e-06, "loss": 0.3878, "step": 13477 }, { "epoch": 1.6401581989656222, "grad_norm": 2.2385783195495605, "learning_rate": 1.6169261319476392e-06, "loss": 0.346, "step": 13478 }, { "epoch": 1.6402798904776392, "grad_norm": 1.8236132860183716, "learning_rate": 1.615862777234619e-06, "loss": 0.3227, "step": 13479 }, { "epoch": 1.6404015819896562, "grad_norm": 1.4718546867370605, "learning_rate": 1.6147997415553751e-06, "loss": 0.3456, "step": 13480 }, { "epoch": 1.6405232735016733, "grad_norm": 3.5592145919799805, "learning_rate": 1.6137370249503582e-06, "loss": 0.3065, "step": 13481 }, { "epoch": 1.6406449650136903, "grad_norm": 2.163372755050659, "learning_rate": 1.6126746274600048e-06, "loss": 0.3826, "step": 13482 }, { "epoch": 1.6407666565257073, "grad_norm": 3.2274222373962402, "learning_rate": 1.6116125491247413e-06, "loss": 0.3623, "step": 13483 }, { "epoch": 1.6408883480377243, "grad_norm": 1.6441229581832886, "learning_rate": 1.6105507899849847e-06, "loss": 0.3639, "step": 13484 }, { "epoch": 1.6410100395497413, "grad_norm": 2.035132884979248, "learning_rate": 1.609489350081137e-06, "loss": 0.3367, "step": 13485 }, { "epoch": 1.6411317310617584, "grad_norm": 2.9197516441345215, "learning_rate": 1.6084282294535835e-06, "loss": 0.4271, "step": 13486 }, { "epoch": 1.6412534225737754, "grad_norm": 1.5328617095947266, "learning_rate": 1.6073674281427075e-06, "loss": 0.3806, "step": 13487 }, { "epoch": 1.6413751140857924, "grad_norm": 1.8179408311843872, "learning_rate": 1.6063069461888737e-06, "loss": 0.3669, "step": 13488 }, { "epoch": 1.6414968055978094, "grad_norm": 1.252159595489502, "learning_rate": 1.6052467836324315e-06, "loss": 0.3202, "step": 13489 }, { "epoch": 1.6416184971098264, "grad_norm": 1.6930807828903198, "learning_rate": 1.604186940513729e-06, "loss": 0.3136, "step": 13490 }, { "epoch": 1.6417401886218437, "grad_norm": 3.6778435707092285, "learning_rate": 1.6031274168730903e-06, "loss": 0.4552, "step": 13491 }, { "epoch": 1.6418618801338607, "grad_norm": 1.778458833694458, "learning_rate": 1.6020682127508348e-06, "loss": 0.3638, "step": 13492 }, { "epoch": 1.6419835716458777, "grad_norm": 1.3359156847000122, "learning_rate": 1.6010093281872662e-06, "loss": 0.3473, "step": 13493 }, { "epoch": 1.6421052631578947, "grad_norm": 1.4263921976089478, "learning_rate": 1.5999507632226752e-06, "loss": 0.3303, "step": 13494 }, { "epoch": 1.6422269546699118, "grad_norm": 2.1085805892944336, "learning_rate": 1.598892517897348e-06, "loss": 0.3819, "step": 13495 }, { "epoch": 1.6423486461819288, "grad_norm": 2.020066738128662, "learning_rate": 1.5978345922515481e-06, "loss": 0.3257, "step": 13496 }, { "epoch": 1.6424703376939458, "grad_norm": 1.9756035804748535, "learning_rate": 1.5967769863255322e-06, "loss": 0.3569, "step": 13497 }, { "epoch": 1.642592029205963, "grad_norm": 2.3926239013671875, "learning_rate": 1.595719700159548e-06, "loss": 0.3828, "step": 13498 }, { "epoch": 1.64271372071798, "grad_norm": 1.6480560302734375, "learning_rate": 1.594662733793826e-06, "loss": 0.3631, "step": 13499 }, { "epoch": 1.642835412229997, "grad_norm": 2.0234663486480713, "learning_rate": 1.5936060872685821e-06, "loss": 0.3821, "step": 13500 }, { "epoch": 1.642957103742014, "grad_norm": 1.726359486579895, "learning_rate": 1.592549760624028e-06, "loss": 0.3989, "step": 13501 }, { "epoch": 1.643078795254031, "grad_norm": 1.9464523792266846, "learning_rate": 1.5914937539003594e-06, "loss": 0.4144, "step": 13502 }, { "epoch": 1.6432004867660481, "grad_norm": 2.832329511642456, "learning_rate": 1.5904380671377574e-06, "loss": 0.2845, "step": 13503 }, { "epoch": 1.6433221782780651, "grad_norm": 1.3468236923217773, "learning_rate": 1.589382700376394e-06, "loss": 0.3184, "step": 13504 }, { "epoch": 1.6434438697900822, "grad_norm": 1.4346479177474976, "learning_rate": 1.5883276536564263e-06, "loss": 0.3541, "step": 13505 }, { "epoch": 1.6435655613020992, "grad_norm": 2.4173312187194824, "learning_rate": 1.5872729270180043e-06, "loss": 0.3372, "step": 13506 }, { "epoch": 1.6436872528141162, "grad_norm": 1.4922704696655273, "learning_rate": 1.5862185205012603e-06, "loss": 0.3518, "step": 13507 }, { "epoch": 1.6438089443261332, "grad_norm": 2.3446948528289795, "learning_rate": 1.5851644341463157e-06, "loss": 0.3378, "step": 13508 }, { "epoch": 1.6439306358381502, "grad_norm": 2.416332483291626, "learning_rate": 1.5841106679932838e-06, "loss": 0.4321, "step": 13509 }, { "epoch": 1.6440523273501673, "grad_norm": 1.436244249343872, "learning_rate": 1.5830572220822604e-06, "loss": 0.3636, "step": 13510 }, { "epoch": 1.6441740188621843, "grad_norm": 2.0416808128356934, "learning_rate": 1.5820040964533313e-06, "loss": 0.3768, "step": 13511 }, { "epoch": 1.6442957103742013, "grad_norm": 2.190077304840088, "learning_rate": 1.5809512911465708e-06, "loss": 0.4151, "step": 13512 }, { "epoch": 1.6444174018862183, "grad_norm": 3.4898064136505127, "learning_rate": 1.5798988062020392e-06, "loss": 0.4347, "step": 13513 }, { "epoch": 1.6445390933982353, "grad_norm": 2.559004306793213, "learning_rate": 1.5788466416597914e-06, "loss": 0.383, "step": 13514 }, { "epoch": 1.6446607849102524, "grad_norm": 2.573141574859619, "learning_rate": 1.5777947975598573e-06, "loss": 0.4365, "step": 13515 }, { "epoch": 1.6447824764222696, "grad_norm": 1.6363483667373657, "learning_rate": 1.5767432739422607e-06, "loss": 0.3973, "step": 13516 }, { "epoch": 1.6449041679342866, "grad_norm": 5.089287281036377, "learning_rate": 1.5756920708470213e-06, "loss": 0.2951, "step": 13517 }, { "epoch": 1.6450258594463036, "grad_norm": 1.376924991607666, "learning_rate": 1.5746411883141332e-06, "loss": 0.3305, "step": 13518 }, { "epoch": 1.6451475509583207, "grad_norm": 1.7490031719207764, "learning_rate": 1.5735906263835898e-06, "loss": 0.3357, "step": 13519 }, { "epoch": 1.6452692424703377, "grad_norm": 1.9388436079025269, "learning_rate": 1.5725403850953647e-06, "loss": 0.3967, "step": 13520 }, { "epoch": 1.6453909339823547, "grad_norm": 1.8339444398880005, "learning_rate": 1.571490464489419e-06, "loss": 0.4305, "step": 13521 }, { "epoch": 1.6455126254943717, "grad_norm": 1.4353859424591064, "learning_rate": 1.5704408646057101e-06, "loss": 0.3237, "step": 13522 }, { "epoch": 1.645634317006389, "grad_norm": 1.4388935565948486, "learning_rate": 1.5693915854841747e-06, "loss": 0.4038, "step": 13523 }, { "epoch": 1.645756008518406, "grad_norm": 1.8594416379928589, "learning_rate": 1.5683426271647373e-06, "loss": 0.4112, "step": 13524 }, { "epoch": 1.645877700030423, "grad_norm": 2.123471975326538, "learning_rate": 1.5672939896873184e-06, "loss": 0.41, "step": 13525 }, { "epoch": 1.64599939154244, "grad_norm": 2.739448308944702, "learning_rate": 1.5662456730918174e-06, "loss": 0.4007, "step": 13526 }, { "epoch": 1.646121083054457, "grad_norm": 1.8444348573684692, "learning_rate": 1.5651976774181255e-06, "loss": 0.3317, "step": 13527 }, { "epoch": 1.646242774566474, "grad_norm": 1.8997925519943237, "learning_rate": 1.5641500027061206e-06, "loss": 0.3291, "step": 13528 }, { "epoch": 1.646364466078491, "grad_norm": 2.3686363697052, "learning_rate": 1.563102648995668e-06, "loss": 0.3302, "step": 13529 }, { "epoch": 1.646486157590508, "grad_norm": 4.1441426277160645, "learning_rate": 1.5620556163266244e-06, "loss": 0.4486, "step": 13530 }, { "epoch": 1.6466078491025251, "grad_norm": 2.0939393043518066, "learning_rate": 1.5610089047388311e-06, "loss": 0.3737, "step": 13531 }, { "epoch": 1.6467295406145421, "grad_norm": 1.6156666278839111, "learning_rate": 1.559962514272113e-06, "loss": 0.3721, "step": 13532 }, { "epoch": 1.6468512321265592, "grad_norm": 1.9121696949005127, "learning_rate": 1.558916444966294e-06, "loss": 0.4047, "step": 13533 }, { "epoch": 1.6469729236385762, "grad_norm": 1.62413489818573, "learning_rate": 1.557870696861178e-06, "loss": 0.3614, "step": 13534 }, { "epoch": 1.6470946151505932, "grad_norm": 1.4637845754623413, "learning_rate": 1.5568252699965514e-06, "loss": 0.3871, "step": 13535 }, { "epoch": 1.6472163066626102, "grad_norm": 2.2136142253875732, "learning_rate": 1.555780164412204e-06, "loss": 0.345, "step": 13536 }, { "epoch": 1.6473379981746272, "grad_norm": 1.998266577720642, "learning_rate": 1.5547353801478993e-06, "loss": 0.3507, "step": 13537 }, { "epoch": 1.6474596896866442, "grad_norm": 1.9016863107681274, "learning_rate": 1.5536909172433935e-06, "loss": 0.4068, "step": 13538 }, { "epoch": 1.6475813811986613, "grad_norm": 1.7950299978256226, "learning_rate": 1.5526467757384322e-06, "loss": 0.3323, "step": 13539 }, { "epoch": 1.6477030727106783, "grad_norm": 1.8009721040725708, "learning_rate": 1.551602955672743e-06, "loss": 0.3648, "step": 13540 }, { "epoch": 1.6478247642226953, "grad_norm": 1.5335566997528076, "learning_rate": 1.5505594570860505e-06, "loss": 0.399, "step": 13541 }, { "epoch": 1.6479464557347125, "grad_norm": 2.015314817428589, "learning_rate": 1.5495162800180608e-06, "loss": 0.3872, "step": 13542 }, { "epoch": 1.6480681472467296, "grad_norm": 2.049790382385254, "learning_rate": 1.5484734245084665e-06, "loss": 0.4352, "step": 13543 }, { "epoch": 1.6481898387587466, "grad_norm": 1.487663984298706, "learning_rate": 1.5474308905969537e-06, "loss": 0.3068, "step": 13544 }, { "epoch": 1.6483115302707636, "grad_norm": 1.8583332300186157, "learning_rate": 1.5463886783231906e-06, "loss": 0.3648, "step": 13545 }, { "epoch": 1.6484332217827806, "grad_norm": 1.8660967350006104, "learning_rate": 1.5453467877268346e-06, "loss": 0.343, "step": 13546 }, { "epoch": 1.6485549132947976, "grad_norm": 1.659022331237793, "learning_rate": 1.544305218847536e-06, "loss": 0.4167, "step": 13547 }, { "epoch": 1.6486766048068149, "grad_norm": 1.9124284982681274, "learning_rate": 1.5432639717249266e-06, "loss": 0.308, "step": 13548 }, { "epoch": 1.648798296318832, "grad_norm": 3.2666680812835693, "learning_rate": 1.5422230463986277e-06, "loss": 0.3038, "step": 13549 }, { "epoch": 1.648919987830849, "grad_norm": 2.1303718090057373, "learning_rate": 1.5411824429082478e-06, "loss": 0.365, "step": 13550 }, { "epoch": 1.649041679342866, "grad_norm": 2.429307699203491, "learning_rate": 1.540142161293382e-06, "loss": 0.3163, "step": 13551 }, { "epoch": 1.649163370854883, "grad_norm": 1.8923447132110596, "learning_rate": 1.5391022015936209e-06, "loss": 0.3512, "step": 13552 }, { "epoch": 1.6492850623669, "grad_norm": 2.2367300987243652, "learning_rate": 1.5380625638485337e-06, "loss": 0.4674, "step": 13553 }, { "epoch": 1.649406753878917, "grad_norm": 1.349216341972351, "learning_rate": 1.5370232480976787e-06, "loss": 0.3204, "step": 13554 }, { "epoch": 1.649528445390934, "grad_norm": 1.6021007299423218, "learning_rate": 1.5359842543806103e-06, "loss": 0.3889, "step": 13555 }, { "epoch": 1.649650136902951, "grad_norm": 2.0629358291625977, "learning_rate": 1.5349455827368586e-06, "loss": 0.4152, "step": 13556 }, { "epoch": 1.649771828414968, "grad_norm": 2.156932830810547, "learning_rate": 1.533907233205948e-06, "loss": 0.3068, "step": 13557 }, { "epoch": 1.649893519926985, "grad_norm": 1.9429261684417725, "learning_rate": 1.532869205827393e-06, "loss": 0.3664, "step": 13558 }, { "epoch": 1.650015211439002, "grad_norm": 1.6506551504135132, "learning_rate": 1.5318315006406915e-06, "loss": 0.3775, "step": 13559 }, { "epoch": 1.6501369029510191, "grad_norm": 1.749122977256775, "learning_rate": 1.5307941176853292e-06, "loss": 0.3346, "step": 13560 }, { "epoch": 1.6502585944630361, "grad_norm": 1.6698367595672607, "learning_rate": 1.5297570570007802e-06, "loss": 0.3525, "step": 13561 }, { "epoch": 1.6503802859750532, "grad_norm": 3.209825038909912, "learning_rate": 1.5287203186265055e-06, "loss": 0.423, "step": 13562 }, { "epoch": 1.6505019774870702, "grad_norm": 2.068608522415161, "learning_rate": 1.5276839026019597e-06, "loss": 0.3923, "step": 13563 }, { "epoch": 1.6506236689990872, "grad_norm": 1.9705032110214233, "learning_rate": 1.5266478089665793e-06, "loss": 0.3152, "step": 13564 }, { "epoch": 1.6507453605111042, "grad_norm": 1.3871736526489258, "learning_rate": 1.525612037759785e-06, "loss": 0.3552, "step": 13565 }, { "epoch": 1.6508670520231212, "grad_norm": 2.300062417984009, "learning_rate": 1.5245765890209963e-06, "loss": 0.3359, "step": 13566 }, { "epoch": 1.6509887435351385, "grad_norm": 1.5899654626846313, "learning_rate": 1.5235414627896117e-06, "loss": 0.3174, "step": 13567 }, { "epoch": 1.6511104350471555, "grad_norm": 2.0018270015716553, "learning_rate": 1.5225066591050174e-06, "loss": 0.3696, "step": 13568 }, { "epoch": 1.6512321265591725, "grad_norm": 2.9894344806671143, "learning_rate": 1.5214721780065944e-06, "loss": 0.4339, "step": 13569 }, { "epoch": 1.6513538180711895, "grad_norm": 2.1566507816314697, "learning_rate": 1.5204380195337022e-06, "loss": 0.3463, "step": 13570 }, { "epoch": 1.6514755095832065, "grad_norm": 1.2636442184448242, "learning_rate": 1.5194041837256979e-06, "loss": 0.3406, "step": 13571 }, { "epoch": 1.6515972010952236, "grad_norm": 2.55454683303833, "learning_rate": 1.5183706706219202e-06, "loss": 0.3964, "step": 13572 }, { "epoch": 1.6517188926072408, "grad_norm": 1.8894301652908325, "learning_rate": 1.5173374802616892e-06, "loss": 0.3143, "step": 13573 }, { "epoch": 1.6518405841192578, "grad_norm": 2.53320050239563, "learning_rate": 1.5163046126843283e-06, "loss": 0.389, "step": 13574 }, { "epoch": 1.6519622756312748, "grad_norm": 3.6553306579589844, "learning_rate": 1.5152720679291377e-06, "loss": 0.317, "step": 13575 }, { "epoch": 1.6520839671432919, "grad_norm": 1.83319890499115, "learning_rate": 1.5142398460354036e-06, "loss": 0.3252, "step": 13576 }, { "epoch": 1.6522056586553089, "grad_norm": 1.5476677417755127, "learning_rate": 1.513207947042411e-06, "loss": 0.3582, "step": 13577 }, { "epoch": 1.652327350167326, "grad_norm": 2.0710372924804688, "learning_rate": 1.512176370989421e-06, "loss": 0.3973, "step": 13578 }, { "epoch": 1.652449041679343, "grad_norm": 3.0273845195770264, "learning_rate": 1.511145117915691e-06, "loss": 0.3033, "step": 13579 }, { "epoch": 1.65257073319136, "grad_norm": 2.030923366546631, "learning_rate": 1.5101141878604598e-06, "loss": 0.2935, "step": 13580 }, { "epoch": 1.652692424703377, "grad_norm": 2.000373601913452, "learning_rate": 1.5090835808629544e-06, "loss": 0.4074, "step": 13581 }, { "epoch": 1.652814116215394, "grad_norm": 1.9685217142105103, "learning_rate": 1.5080532969623963e-06, "loss": 0.4195, "step": 13582 }, { "epoch": 1.652935807727411, "grad_norm": 1.5527143478393555, "learning_rate": 1.5070233361979902e-06, "loss": 0.3376, "step": 13583 }, { "epoch": 1.653057499239428, "grad_norm": 2.4072747230529785, "learning_rate": 1.5059936986089208e-06, "loss": 0.4189, "step": 13584 }, { "epoch": 1.653179190751445, "grad_norm": 3.1512880325317383, "learning_rate": 1.5049643842343753e-06, "loss": 0.427, "step": 13585 }, { "epoch": 1.653300882263462, "grad_norm": 1.9627091884613037, "learning_rate": 1.5039353931135169e-06, "loss": 0.3999, "step": 13586 }, { "epoch": 1.653422573775479, "grad_norm": 1.905883550643921, "learning_rate": 1.5029067252855045e-06, "loss": 0.3998, "step": 13587 }, { "epoch": 1.653544265287496, "grad_norm": 2.125684976577759, "learning_rate": 1.5018783807894789e-06, "loss": 0.3692, "step": 13588 }, { "epoch": 1.6536659567995131, "grad_norm": 1.6453704833984375, "learning_rate": 1.5008503596645697e-06, "loss": 0.3312, "step": 13589 }, { "epoch": 1.6537876483115301, "grad_norm": 1.7019684314727783, "learning_rate": 1.4998226619498979e-06, "loss": 0.3556, "step": 13590 }, { "epoch": 1.6539093398235472, "grad_norm": 2.6745917797088623, "learning_rate": 1.498795287684569e-06, "loss": 0.312, "step": 13591 }, { "epoch": 1.6540310313355644, "grad_norm": 2.4666833877563477, "learning_rate": 1.497768236907673e-06, "loss": 0.3479, "step": 13592 }, { "epoch": 1.6541527228475814, "grad_norm": 1.7143189907073975, "learning_rate": 1.4967415096582972e-06, "loss": 0.3898, "step": 13593 }, { "epoch": 1.6542744143595984, "grad_norm": 1.3360440731048584, "learning_rate": 1.495715105975507e-06, "loss": 0.2944, "step": 13594 }, { "epoch": 1.6543961058716155, "grad_norm": 2.0141937732696533, "learning_rate": 1.4946890258983603e-06, "loss": 0.4944, "step": 13595 }, { "epoch": 1.6545177973836325, "grad_norm": 1.803688883781433, "learning_rate": 1.4936632694659004e-06, "loss": 0.3728, "step": 13596 }, { "epoch": 1.6546394888956495, "grad_norm": 1.8607743978500366, "learning_rate": 1.492637836717159e-06, "loss": 0.3296, "step": 13597 }, { "epoch": 1.6547611804076665, "grad_norm": 1.640473484992981, "learning_rate": 1.4916127276911596e-06, "loss": 0.3749, "step": 13598 }, { "epoch": 1.6548828719196838, "grad_norm": 1.659511685371399, "learning_rate": 1.4905879424269066e-06, "loss": 0.3677, "step": 13599 }, { "epoch": 1.6550045634317008, "grad_norm": 3.7554428577423096, "learning_rate": 1.489563480963394e-06, "loss": 0.3897, "step": 13600 }, { "epoch": 1.6551262549437178, "grad_norm": 2.6372745037078857, "learning_rate": 1.4885393433396089e-06, "loss": 0.4192, "step": 13601 }, { "epoch": 1.6552479464557348, "grad_norm": 2.3624267578125, "learning_rate": 1.4875155295945187e-06, "loss": 0.3046, "step": 13602 }, { "epoch": 1.6553696379677518, "grad_norm": 1.6589763164520264, "learning_rate": 1.4864920397670812e-06, "loss": 0.3852, "step": 13603 }, { "epoch": 1.6554913294797688, "grad_norm": 1.946058750152588, "learning_rate": 1.4854688738962452e-06, "loss": 0.3701, "step": 13604 }, { "epoch": 1.6556130209917859, "grad_norm": 2.2866172790527344, "learning_rate": 1.4844460320209431e-06, "loss": 0.2729, "step": 13605 }, { "epoch": 1.6557347125038029, "grad_norm": 2.7910420894622803, "learning_rate": 1.4834235141800957e-06, "loss": 0.3243, "step": 13606 }, { "epoch": 1.65585640401582, "grad_norm": 2.127629041671753, "learning_rate": 1.4824013204126119e-06, "loss": 0.3572, "step": 13607 }, { "epoch": 1.655978095527837, "grad_norm": 1.3885986804962158, "learning_rate": 1.4813794507573865e-06, "loss": 0.3237, "step": 13608 }, { "epoch": 1.656099787039854, "grad_norm": 1.5610814094543457, "learning_rate": 1.4803579052533068e-06, "loss": 0.3115, "step": 13609 }, { "epoch": 1.656221478551871, "grad_norm": 1.7771389484405518, "learning_rate": 1.4793366839392443e-06, "loss": 0.382, "step": 13610 }, { "epoch": 1.656343170063888, "grad_norm": 1.7746970653533936, "learning_rate": 1.4783157868540555e-06, "loss": 0.3823, "step": 13611 }, { "epoch": 1.656464861575905, "grad_norm": 1.6492103338241577, "learning_rate": 1.4772952140365914e-06, "loss": 0.3275, "step": 13612 }, { "epoch": 1.656586553087922, "grad_norm": 1.9623640775680542, "learning_rate": 1.4762749655256859e-06, "loss": 0.396, "step": 13613 }, { "epoch": 1.656708244599939, "grad_norm": 2.6154098510742188, "learning_rate": 1.4752550413601586e-06, "loss": 0.3634, "step": 13614 }, { "epoch": 1.656829936111956, "grad_norm": 1.8920204639434814, "learning_rate": 1.4742354415788252e-06, "loss": 0.2903, "step": 13615 }, { "epoch": 1.656951627623973, "grad_norm": 1.7871285676956177, "learning_rate": 1.4732161662204803e-06, "loss": 0.3853, "step": 13616 }, { "epoch": 1.6570733191359903, "grad_norm": 1.8500959873199463, "learning_rate": 1.4721972153239073e-06, "loss": 0.2934, "step": 13617 }, { "epoch": 1.6571950106480073, "grad_norm": 2.0763282775878906, "learning_rate": 1.4711785889278863e-06, "loss": 0.3775, "step": 13618 }, { "epoch": 1.6573167021600244, "grad_norm": 2.6651928424835205, "learning_rate": 1.4701602870711696e-06, "loss": 0.422, "step": 13619 }, { "epoch": 1.6574383936720414, "grad_norm": 2.9481241703033447, "learning_rate": 1.4691423097925117e-06, "loss": 0.4025, "step": 13620 }, { "epoch": 1.6575600851840584, "grad_norm": 1.7668037414550781, "learning_rate": 1.4681246571306474e-06, "loss": 0.3455, "step": 13621 }, { "epoch": 1.6576817766960754, "grad_norm": 2.107320785522461, "learning_rate": 1.4671073291242965e-06, "loss": 0.3867, "step": 13622 }, { "epoch": 1.6578034682080924, "grad_norm": 1.5007433891296387, "learning_rate": 1.4660903258121773e-06, "loss": 0.3592, "step": 13623 }, { "epoch": 1.6579251597201097, "grad_norm": 1.5743193626403809, "learning_rate": 1.465073647232984e-06, "loss": 0.3674, "step": 13624 }, { "epoch": 1.6580468512321267, "grad_norm": 2.820394992828369, "learning_rate": 1.4640572934254037e-06, "loss": 0.4089, "step": 13625 }, { "epoch": 1.6581685427441437, "grad_norm": 1.837266206741333, "learning_rate": 1.4630412644281133e-06, "loss": 0.3932, "step": 13626 }, { "epoch": 1.6582902342561607, "grad_norm": 1.6454945802688599, "learning_rate": 1.462025560279774e-06, "loss": 0.33, "step": 13627 }, { "epoch": 1.6584119257681778, "grad_norm": 2.0698416233062744, "learning_rate": 1.4610101810190325e-06, "loss": 0.398, "step": 13628 }, { "epoch": 1.6585336172801948, "grad_norm": 2.274641513824463, "learning_rate": 1.4599951266845325e-06, "loss": 0.3592, "step": 13629 }, { "epoch": 1.6586553087922118, "grad_norm": 2.0959949493408203, "learning_rate": 1.458980397314891e-06, "loss": 0.3721, "step": 13630 }, { "epoch": 1.6587770003042288, "grad_norm": 1.6033892631530762, "learning_rate": 1.4579659929487255e-06, "loss": 0.3524, "step": 13631 }, { "epoch": 1.6588986918162458, "grad_norm": 1.241445779800415, "learning_rate": 1.4569519136246357e-06, "loss": 0.3135, "step": 13632 }, { "epoch": 1.6590203833282628, "grad_norm": 2.6747539043426514, "learning_rate": 1.455938159381206e-06, "loss": 0.4672, "step": 13633 }, { "epoch": 1.6591420748402799, "grad_norm": 1.6344259977340698, "learning_rate": 1.4549247302570169e-06, "loss": 0.33, "step": 13634 }, { "epoch": 1.6592637663522969, "grad_norm": 1.8994158506393433, "learning_rate": 1.4539116262906295e-06, "loss": 0.3706, "step": 13635 }, { "epoch": 1.659385457864314, "grad_norm": 1.5130999088287354, "learning_rate": 1.4528988475205918e-06, "loss": 0.328, "step": 13636 }, { "epoch": 1.659507149376331, "grad_norm": 1.4553889036178589, "learning_rate": 1.4518863939854467e-06, "loss": 0.3324, "step": 13637 }, { "epoch": 1.659628840888348, "grad_norm": 1.8493916988372803, "learning_rate": 1.4508742657237152e-06, "loss": 0.3741, "step": 13638 }, { "epoch": 1.659750532400365, "grad_norm": 3.459730863571167, "learning_rate": 1.4498624627739167e-06, "loss": 0.4211, "step": 13639 }, { "epoch": 1.659872223912382, "grad_norm": 1.845629334449768, "learning_rate": 1.4488509851745491e-06, "loss": 0.3729, "step": 13640 }, { "epoch": 1.659993915424399, "grad_norm": 1.6316800117492676, "learning_rate": 1.4478398329641007e-06, "loss": 0.3716, "step": 13641 }, { "epoch": 1.660115606936416, "grad_norm": 2.327666759490967, "learning_rate": 1.4468290061810497e-06, "loss": 0.3982, "step": 13642 }, { "epoch": 1.6602372984484333, "grad_norm": 4.206064224243164, "learning_rate": 1.4458185048638584e-06, "loss": 0.3973, "step": 13643 }, { "epoch": 1.6603589899604503, "grad_norm": 1.6471965312957764, "learning_rate": 1.4448083290509774e-06, "loss": 0.3003, "step": 13644 }, { "epoch": 1.6604806814724673, "grad_norm": 2.684356451034546, "learning_rate": 1.443798478780849e-06, "loss": 0.3836, "step": 13645 }, { "epoch": 1.6606023729844843, "grad_norm": 1.5354562997817993, "learning_rate": 1.442788954091897e-06, "loss": 0.3421, "step": 13646 }, { "epoch": 1.6607240644965013, "grad_norm": 2.4430270195007324, "learning_rate": 1.441779755022541e-06, "loss": 0.3507, "step": 13647 }, { "epoch": 1.6608457560085184, "grad_norm": 2.050832748413086, "learning_rate": 1.4407708816111787e-06, "loss": 0.3486, "step": 13648 }, { "epoch": 1.6609674475205356, "grad_norm": 2.2159924507141113, "learning_rate": 1.4397623338961996e-06, "loss": 0.3305, "step": 13649 }, { "epoch": 1.6610891390325526, "grad_norm": 1.7031782865524292, "learning_rate": 1.4387541119159842e-06, "loss": 0.3476, "step": 13650 }, { "epoch": 1.6612108305445696, "grad_norm": 5.4927263259887695, "learning_rate": 1.437746215708895e-06, "loss": 0.5212, "step": 13651 }, { "epoch": 1.6613325220565867, "grad_norm": 1.991536021232605, "learning_rate": 1.436738645313286e-06, "loss": 0.3299, "step": 13652 }, { "epoch": 1.6614542135686037, "grad_norm": 1.5333884954452515, "learning_rate": 1.4357314007674972e-06, "loss": 0.2913, "step": 13653 }, { "epoch": 1.6615759050806207, "grad_norm": 2.365474224090576, "learning_rate": 1.4347244821098526e-06, "loss": 0.4077, "step": 13654 }, { "epoch": 1.6616975965926377, "grad_norm": 2.1574690341949463, "learning_rate": 1.433717889378673e-06, "loss": 0.4334, "step": 13655 }, { "epoch": 1.6618192881046547, "grad_norm": 1.1991745233535767, "learning_rate": 1.4327116226122584e-06, "loss": 0.3381, "step": 13656 }, { "epoch": 1.6619409796166718, "grad_norm": 1.8424403667449951, "learning_rate": 1.4317056818488983e-06, "loss": 0.3181, "step": 13657 }, { "epoch": 1.6620626711286888, "grad_norm": 2.9418604373931885, "learning_rate": 1.4307000671268746e-06, "loss": 0.3835, "step": 13658 }, { "epoch": 1.6621843626407058, "grad_norm": 1.6366314888000488, "learning_rate": 1.4296947784844505e-06, "loss": 0.3818, "step": 13659 }, { "epoch": 1.6623060541527228, "grad_norm": 2.9563655853271484, "learning_rate": 1.4286898159598772e-06, "loss": 0.4524, "step": 13660 }, { "epoch": 1.6624277456647398, "grad_norm": 2.4889585971832275, "learning_rate": 1.4276851795914003e-06, "loss": 0.3964, "step": 13661 }, { "epoch": 1.6625494371767569, "grad_norm": 2.7162702083587646, "learning_rate": 1.4266808694172463e-06, "loss": 0.2923, "step": 13662 }, { "epoch": 1.6626711286887739, "grad_norm": 1.4771445989608765, "learning_rate": 1.4256768854756286e-06, "loss": 0.3484, "step": 13663 }, { "epoch": 1.6627928202007909, "grad_norm": 1.7932910919189453, "learning_rate": 1.4246732278047582e-06, "loss": 0.3438, "step": 13664 }, { "epoch": 1.662914511712808, "grad_norm": 1.894774079322815, "learning_rate": 1.4236698964428164e-06, "loss": 0.3853, "step": 13665 }, { "epoch": 1.663036203224825, "grad_norm": 1.359831690788269, "learning_rate": 1.4226668914279907e-06, "loss": 0.349, "step": 13666 }, { "epoch": 1.663157894736842, "grad_norm": 1.9268769025802612, "learning_rate": 1.4216642127984426e-06, "loss": 0.3697, "step": 13667 }, { "epoch": 1.6632795862488592, "grad_norm": 2.676494598388672, "learning_rate": 1.420661860592325e-06, "loss": 0.4374, "step": 13668 }, { "epoch": 1.6634012777608762, "grad_norm": 1.6084446907043457, "learning_rate": 1.4196598348477842e-06, "loss": 0.3356, "step": 13669 }, { "epoch": 1.6635229692728932, "grad_norm": 1.7682455778121948, "learning_rate": 1.4186581356029472e-06, "loss": 0.3538, "step": 13670 }, { "epoch": 1.6636446607849102, "grad_norm": 1.6002421379089355, "learning_rate": 1.4176567628959282e-06, "loss": 0.2985, "step": 13671 }, { "epoch": 1.6637663522969273, "grad_norm": 3.169933795928955, "learning_rate": 1.4166557167648365e-06, "loss": 0.4102, "step": 13672 }, { "epoch": 1.6638880438089443, "grad_norm": 3.072920322418213, "learning_rate": 1.4156549972477618e-06, "loss": 0.3968, "step": 13673 }, { "epoch": 1.6640097353209615, "grad_norm": 1.5023761987686157, "learning_rate": 1.4146546043827791e-06, "loss": 0.3693, "step": 13674 }, { "epoch": 1.6641314268329785, "grad_norm": 1.723341703414917, "learning_rate": 1.4136545382079658e-06, "loss": 0.348, "step": 13675 }, { "epoch": 1.6642531183449956, "grad_norm": 2.0811023712158203, "learning_rate": 1.4126547987613649e-06, "loss": 0.3323, "step": 13676 }, { "epoch": 1.6643748098570126, "grad_norm": 2.1050126552581787, "learning_rate": 1.4116553860810256e-06, "loss": 0.351, "step": 13677 }, { "epoch": 1.6644965013690296, "grad_norm": 1.8099220991134644, "learning_rate": 1.4106563002049756e-06, "loss": 0.3854, "step": 13678 }, { "epoch": 1.6646181928810466, "grad_norm": 2.0233612060546875, "learning_rate": 1.4096575411712299e-06, "loss": 0.2744, "step": 13679 }, { "epoch": 1.6647398843930636, "grad_norm": 2.3380508422851562, "learning_rate": 1.4086591090177993e-06, "loss": 0.3445, "step": 13680 }, { "epoch": 1.6648615759050807, "grad_norm": 1.6650595664978027, "learning_rate": 1.4076610037826722e-06, "loss": 0.3802, "step": 13681 }, { "epoch": 1.6649832674170977, "grad_norm": 2.3621175289154053, "learning_rate": 1.406663225503826e-06, "loss": 0.4072, "step": 13682 }, { "epoch": 1.6651049589291147, "grad_norm": 1.7309842109680176, "learning_rate": 1.4056657742192336e-06, "loss": 0.3742, "step": 13683 }, { "epoch": 1.6652266504411317, "grad_norm": 2.152414321899414, "learning_rate": 1.4046686499668493e-06, "loss": 0.3451, "step": 13684 }, { "epoch": 1.6653483419531487, "grad_norm": 1.6705527305603027, "learning_rate": 1.4036718527846104e-06, "loss": 0.3528, "step": 13685 }, { "epoch": 1.6654700334651658, "grad_norm": 2.721670150756836, "learning_rate": 1.4026753827104533e-06, "loss": 0.3112, "step": 13686 }, { "epoch": 1.6655917249771828, "grad_norm": 2.670419216156006, "learning_rate": 1.4016792397822943e-06, "loss": 0.3597, "step": 13687 }, { "epoch": 1.6657134164891998, "grad_norm": 1.557462453842163, "learning_rate": 1.4006834240380373e-06, "loss": 0.327, "step": 13688 }, { "epoch": 1.6658351080012168, "grad_norm": 2.4295411109924316, "learning_rate": 1.3996879355155757e-06, "loss": 0.3235, "step": 13689 }, { "epoch": 1.6659567995132338, "grad_norm": 1.8355549573898315, "learning_rate": 1.3986927742527878e-06, "loss": 0.3706, "step": 13690 }, { "epoch": 1.6660784910252509, "grad_norm": 2.6848819255828857, "learning_rate": 1.3976979402875458e-06, "loss": 0.4611, "step": 13691 }, { "epoch": 1.6662001825372679, "grad_norm": 1.8984572887420654, "learning_rate": 1.3967034336577024e-06, "loss": 0.3423, "step": 13692 }, { "epoch": 1.6663218740492851, "grad_norm": 1.914689302444458, "learning_rate": 1.3957092544010986e-06, "loss": 0.4128, "step": 13693 }, { "epoch": 1.6664435655613021, "grad_norm": 1.6922495365142822, "learning_rate": 1.394715402555571e-06, "loss": 0.3139, "step": 13694 }, { "epoch": 1.6665652570733192, "grad_norm": 2.5484304428100586, "learning_rate": 1.3937218781589335e-06, "loss": 0.4163, "step": 13695 }, { "epoch": 1.6666869485853362, "grad_norm": 2.0473713874816895, "learning_rate": 1.3927286812489904e-06, "loss": 0.3593, "step": 13696 }, { "epoch": 1.6668086400973532, "grad_norm": 2.2794864177703857, "learning_rate": 1.3917358118635394e-06, "loss": 0.2856, "step": 13697 }, { "epoch": 1.6669303316093702, "grad_norm": 1.7777392864227295, "learning_rate": 1.390743270040359e-06, "loss": 0.3394, "step": 13698 }, { "epoch": 1.6670520231213874, "grad_norm": 2.142390251159668, "learning_rate": 1.3897510558172178e-06, "loss": 0.4225, "step": 13699 }, { "epoch": 1.6671737146334045, "grad_norm": 3.37737774848938, "learning_rate": 1.3887591692318704e-06, "loss": 0.3955, "step": 13700 }, { "epoch": 1.6672954061454215, "grad_norm": 1.6774508953094482, "learning_rate": 1.3877676103220594e-06, "loss": 0.3749, "step": 13701 }, { "epoch": 1.6674170976574385, "grad_norm": 1.7301967144012451, "learning_rate": 1.3867763791255184e-06, "loss": 0.4008, "step": 13702 }, { "epoch": 1.6675387891694555, "grad_norm": 2.0341153144836426, "learning_rate": 1.3857854756799649e-06, "loss": 0.3601, "step": 13703 }, { "epoch": 1.6676604806814725, "grad_norm": 1.6317696571350098, "learning_rate": 1.384794900023103e-06, "loss": 0.3255, "step": 13704 }, { "epoch": 1.6677821721934896, "grad_norm": 2.622546911239624, "learning_rate": 1.3838046521926286e-06, "loss": 0.4034, "step": 13705 }, { "epoch": 1.6679038637055066, "grad_norm": 2.03069806098938, "learning_rate": 1.3828147322262209e-06, "loss": 0.4118, "step": 13706 }, { "epoch": 1.6680255552175236, "grad_norm": 1.7591652870178223, "learning_rate": 1.3818251401615501e-06, "loss": 0.3499, "step": 13707 }, { "epoch": 1.6681472467295406, "grad_norm": 2.0143213272094727, "learning_rate": 1.3808358760362728e-06, "loss": 0.3228, "step": 13708 }, { "epoch": 1.6682689382415576, "grad_norm": 1.5251108407974243, "learning_rate": 1.3798469398880277e-06, "loss": 0.3599, "step": 13709 }, { "epoch": 1.6683906297535747, "grad_norm": 1.732371211051941, "learning_rate": 1.3788583317544546e-06, "loss": 0.418, "step": 13710 }, { "epoch": 1.6685123212655917, "grad_norm": 3.6833293437957764, "learning_rate": 1.3778700516731636e-06, "loss": 0.4541, "step": 13711 }, { "epoch": 1.6686340127776087, "grad_norm": 1.495136022567749, "learning_rate": 1.3768820996817623e-06, "loss": 0.3087, "step": 13712 }, { "epoch": 1.6687557042896257, "grad_norm": 1.7879563570022583, "learning_rate": 1.3758944758178482e-06, "loss": 0.3407, "step": 13713 }, { "epoch": 1.6688773958016427, "grad_norm": 2.007225275039673, "learning_rate": 1.3749071801189983e-06, "loss": 0.3569, "step": 13714 }, { "epoch": 1.6689990873136598, "grad_norm": 1.7144052982330322, "learning_rate": 1.3739202126227847e-06, "loss": 0.3957, "step": 13715 }, { "epoch": 1.6691207788256768, "grad_norm": 2.2162551879882812, "learning_rate": 1.3729335733667627e-06, "loss": 0.4748, "step": 13716 }, { "epoch": 1.6692424703376938, "grad_norm": 2.049804210662842, "learning_rate": 1.3719472623884723e-06, "loss": 0.342, "step": 13717 }, { "epoch": 1.669364161849711, "grad_norm": 2.3981854915618896, "learning_rate": 1.3709612797254502e-06, "loss": 0.4169, "step": 13718 }, { "epoch": 1.669485853361728, "grad_norm": 1.3542389869689941, "learning_rate": 1.369975625415213e-06, "loss": 0.3145, "step": 13719 }, { "epoch": 1.669607544873745, "grad_norm": 2.2779381275177, "learning_rate": 1.3689902994952631e-06, "loss": 0.3296, "step": 13720 }, { "epoch": 1.669729236385762, "grad_norm": 2.5263664722442627, "learning_rate": 1.3680053020031004e-06, "loss": 0.2926, "step": 13721 }, { "epoch": 1.6698509278977791, "grad_norm": 1.904555082321167, "learning_rate": 1.3670206329762037e-06, "loss": 0.3938, "step": 13722 }, { "epoch": 1.6699726194097961, "grad_norm": 2.192382574081421, "learning_rate": 1.366036292452041e-06, "loss": 0.3653, "step": 13723 }, { "epoch": 1.6700943109218132, "grad_norm": 2.2709121704101562, "learning_rate": 1.3650522804680676e-06, "loss": 0.3385, "step": 13724 }, { "epoch": 1.6702160024338304, "grad_norm": 1.7489142417907715, "learning_rate": 1.3640685970617274e-06, "loss": 0.3904, "step": 13725 }, { "epoch": 1.6703376939458474, "grad_norm": 1.9560399055480957, "learning_rate": 1.3630852422704544e-06, "loss": 0.4114, "step": 13726 }, { "epoch": 1.6704593854578644, "grad_norm": 2.172668218612671, "learning_rate": 1.3621022161316654e-06, "loss": 0.4002, "step": 13727 }, { "epoch": 1.6705810769698815, "grad_norm": 1.9341093301773071, "learning_rate": 1.361119518682764e-06, "loss": 0.3356, "step": 13728 }, { "epoch": 1.6707027684818985, "grad_norm": 2.005704164505005, "learning_rate": 1.3601371499611483e-06, "loss": 0.3891, "step": 13729 }, { "epoch": 1.6708244599939155, "grad_norm": 2.575528860092163, "learning_rate": 1.3591551100041977e-06, "loss": 0.3683, "step": 13730 }, { "epoch": 1.6709461515059325, "grad_norm": 1.7623628377914429, "learning_rate": 1.3581733988492795e-06, "loss": 0.3643, "step": 13731 }, { "epoch": 1.6710678430179495, "grad_norm": 1.4995167255401611, "learning_rate": 1.3571920165337527e-06, "loss": 0.3649, "step": 13732 }, { "epoch": 1.6711895345299665, "grad_norm": 1.898901104927063, "learning_rate": 1.3562109630949593e-06, "loss": 0.3681, "step": 13733 }, { "epoch": 1.6713112260419836, "grad_norm": 3.507563591003418, "learning_rate": 1.3552302385702297e-06, "loss": 0.2993, "step": 13734 }, { "epoch": 1.6714329175540006, "grad_norm": 2.015133857727051, "learning_rate": 1.3542498429968841e-06, "loss": 0.3996, "step": 13735 }, { "epoch": 1.6715546090660176, "grad_norm": 1.8106026649475098, "learning_rate": 1.353269776412225e-06, "loss": 0.3882, "step": 13736 }, { "epoch": 1.6716763005780346, "grad_norm": 1.6142646074295044, "learning_rate": 1.3522900388535509e-06, "loss": 0.4044, "step": 13737 }, { "epoch": 1.6717979920900516, "grad_norm": 2.2084908485412598, "learning_rate": 1.3513106303581413e-06, "loss": 0.3058, "step": 13738 }, { "epoch": 1.6719196836020687, "grad_norm": 2.259007692337036, "learning_rate": 1.3503315509632608e-06, "loss": 0.4203, "step": 13739 }, { "epoch": 1.6720413751140857, "grad_norm": 1.9059878587722778, "learning_rate": 1.3493528007061718e-06, "loss": 0.3516, "step": 13740 }, { "epoch": 1.6721630666261027, "grad_norm": 2.7167346477508545, "learning_rate": 1.3483743796241144e-06, "loss": 0.3222, "step": 13741 }, { "epoch": 1.6722847581381197, "grad_norm": 2.21913743019104, "learning_rate": 1.3473962877543167e-06, "loss": 0.4206, "step": 13742 }, { "epoch": 1.6724064496501367, "grad_norm": 1.735192894935608, "learning_rate": 1.3464185251340035e-06, "loss": 0.4098, "step": 13743 }, { "epoch": 1.672528141162154, "grad_norm": 1.9332529306411743, "learning_rate": 1.3454410918003758e-06, "loss": 0.4072, "step": 13744 }, { "epoch": 1.672649832674171, "grad_norm": 1.307860255241394, "learning_rate": 1.3444639877906296e-06, "loss": 0.3356, "step": 13745 }, { "epoch": 1.672771524186188, "grad_norm": 3.0803723335266113, "learning_rate": 1.3434872131419452e-06, "loss": 0.3316, "step": 13746 }, { "epoch": 1.672893215698205, "grad_norm": 2.2157514095306396, "learning_rate": 1.3425107678914872e-06, "loss": 0.3123, "step": 13747 }, { "epoch": 1.673014907210222, "grad_norm": 1.4930944442749023, "learning_rate": 1.341534652076417e-06, "loss": 0.3511, "step": 13748 }, { "epoch": 1.673136598722239, "grad_norm": 2.276279926300049, "learning_rate": 1.3405588657338754e-06, "loss": 0.3332, "step": 13749 }, { "epoch": 1.6732582902342563, "grad_norm": 3.006722927093506, "learning_rate": 1.3395834089009906e-06, "loss": 0.3942, "step": 13750 }, { "epoch": 1.6733799817462733, "grad_norm": 1.903573989868164, "learning_rate": 1.3386082816148848e-06, "loss": 0.4068, "step": 13751 }, { "epoch": 1.6735016732582904, "grad_norm": 2.9941470623016357, "learning_rate": 1.3376334839126625e-06, "loss": 0.2982, "step": 13752 }, { "epoch": 1.6736233647703074, "grad_norm": 1.5512161254882812, "learning_rate": 1.336659015831414e-06, "loss": 0.3322, "step": 13753 }, { "epoch": 1.6737450562823244, "grad_norm": 3.2248644828796387, "learning_rate": 1.3356848774082255e-06, "loss": 0.4243, "step": 13754 }, { "epoch": 1.6738667477943414, "grad_norm": 1.7863333225250244, "learning_rate": 1.33471106868016e-06, "loss": 0.3175, "step": 13755 }, { "epoch": 1.6739884393063584, "grad_norm": 1.6618281602859497, "learning_rate": 1.3337375896842753e-06, "loss": 0.3883, "step": 13756 }, { "epoch": 1.6741101308183755, "grad_norm": 1.7667138576507568, "learning_rate": 1.3327644404576135e-06, "loss": 0.368, "step": 13757 }, { "epoch": 1.6742318223303925, "grad_norm": 1.4757273197174072, "learning_rate": 1.3317916210372018e-06, "loss": 0.3654, "step": 13758 }, { "epoch": 1.6743535138424095, "grad_norm": 1.6524502038955688, "learning_rate": 1.330819131460064e-06, "loss": 0.3866, "step": 13759 }, { "epoch": 1.6744752053544265, "grad_norm": 2.698432207107544, "learning_rate": 1.3298469717632023e-06, "loss": 0.317, "step": 13760 }, { "epoch": 1.6745968968664435, "grad_norm": 1.7345067262649536, "learning_rate": 1.3288751419836077e-06, "loss": 0.3255, "step": 13761 }, { "epoch": 1.6747185883784605, "grad_norm": 1.4904327392578125, "learning_rate": 1.3279036421582648e-06, "loss": 0.3853, "step": 13762 }, { "epoch": 1.6748402798904776, "grad_norm": 1.903839349746704, "learning_rate": 1.3269324723241373e-06, "loss": 0.3981, "step": 13763 }, { "epoch": 1.6749619714024946, "grad_norm": 1.7542340755462646, "learning_rate": 1.32596163251818e-06, "loss": 0.3119, "step": 13764 }, { "epoch": 1.6750836629145116, "grad_norm": 1.9150886535644531, "learning_rate": 1.3249911227773393e-06, "loss": 0.3797, "step": 13765 }, { "epoch": 1.6752053544265286, "grad_norm": 1.7638375759124756, "learning_rate": 1.3240209431385399e-06, "loss": 0.3333, "step": 13766 }, { "epoch": 1.6753270459385456, "grad_norm": 2.2814157009124756, "learning_rate": 1.3230510936387042e-06, "loss": 0.4052, "step": 13767 }, { "epoch": 1.6754487374505627, "grad_norm": 1.629783034324646, "learning_rate": 1.3220815743147374e-06, "loss": 0.335, "step": 13768 }, { "epoch": 1.67557042896258, "grad_norm": 2.9230308532714844, "learning_rate": 1.3211123852035245e-06, "loss": 0.39, "step": 13769 }, { "epoch": 1.675692120474597, "grad_norm": 3.9670817852020264, "learning_rate": 1.3201435263419514e-06, "loss": 0.4153, "step": 13770 }, { "epoch": 1.675813811986614, "grad_norm": 2.5139501094818115, "learning_rate": 1.3191749977668833e-06, "loss": 0.4397, "step": 13771 }, { "epoch": 1.675935503498631, "grad_norm": 1.3515489101409912, "learning_rate": 1.3182067995151736e-06, "loss": 0.3472, "step": 13772 }, { "epoch": 1.676057195010648, "grad_norm": 2.188394069671631, "learning_rate": 1.3172389316236667e-06, "loss": 0.3905, "step": 13773 }, { "epoch": 1.676178886522665, "grad_norm": 2.337156057357788, "learning_rate": 1.3162713941291895e-06, "loss": 0.3573, "step": 13774 }, { "epoch": 1.6763005780346822, "grad_norm": 3.3132240772247314, "learning_rate": 1.3153041870685611e-06, "loss": 0.4008, "step": 13775 }, { "epoch": 1.6764222695466993, "grad_norm": 1.6074503660202026, "learning_rate": 1.3143373104785862e-06, "loss": 0.3729, "step": 13776 }, { "epoch": 1.6765439610587163, "grad_norm": 2.6904752254486084, "learning_rate": 1.3133707643960515e-06, "loss": 0.3097, "step": 13777 }, { "epoch": 1.6766656525707333, "grad_norm": 2.233013868331909, "learning_rate": 1.3124045488577419e-06, "loss": 0.3278, "step": 13778 }, { "epoch": 1.6767873440827503, "grad_norm": 2.1369080543518066, "learning_rate": 1.311438663900424e-06, "loss": 0.3418, "step": 13779 }, { "epoch": 1.6769090355947673, "grad_norm": 2.949039936065674, "learning_rate": 1.3104731095608448e-06, "loss": 0.4202, "step": 13780 }, { "epoch": 1.6770307271067844, "grad_norm": 1.3142260313034058, "learning_rate": 1.3095078858757527e-06, "loss": 0.3365, "step": 13781 }, { "epoch": 1.6771524186188014, "grad_norm": 1.9385156631469727, "learning_rate": 1.3085429928818716e-06, "loss": 0.3743, "step": 13782 }, { "epoch": 1.6772741101308184, "grad_norm": 1.6352856159210205, "learning_rate": 1.3075784306159222e-06, "loss": 0.288, "step": 13783 }, { "epoch": 1.6773958016428354, "grad_norm": 1.819472312927246, "learning_rate": 1.3066141991146053e-06, "loss": 0.4, "step": 13784 }, { "epoch": 1.6775174931548524, "grad_norm": 4.280248641967773, "learning_rate": 1.3056502984146103e-06, "loss": 0.4649, "step": 13785 }, { "epoch": 1.6776391846668695, "grad_norm": 1.7480182647705078, "learning_rate": 1.304686728552621e-06, "loss": 0.3945, "step": 13786 }, { "epoch": 1.6777608761788865, "grad_norm": 2.491863250732422, "learning_rate": 1.3037234895652984e-06, "loss": 0.3953, "step": 13787 }, { "epoch": 1.6778825676909035, "grad_norm": 1.9823706150054932, "learning_rate": 1.3027605814892963e-06, "loss": 0.3679, "step": 13788 }, { "epoch": 1.6780042592029205, "grad_norm": 2.13973331451416, "learning_rate": 1.3017980043612578e-06, "loss": 0.396, "step": 13789 }, { "epoch": 1.6781259507149375, "grad_norm": 1.9010581970214844, "learning_rate": 1.3008357582178099e-06, "loss": 0.3497, "step": 13790 }, { "epoch": 1.6782476422269546, "grad_norm": 1.6171354055404663, "learning_rate": 1.299873843095568e-06, "loss": 0.3662, "step": 13791 }, { "epoch": 1.6783693337389716, "grad_norm": 1.9463456869125366, "learning_rate": 1.2989122590311342e-06, "loss": 0.4269, "step": 13792 }, { "epoch": 1.6784910252509886, "grad_norm": 2.181964874267578, "learning_rate": 1.2979510060610978e-06, "loss": 0.4205, "step": 13793 }, { "epoch": 1.6786127167630058, "grad_norm": 1.7647093534469604, "learning_rate": 1.2969900842220396e-06, "loss": 0.3909, "step": 13794 }, { "epoch": 1.6787344082750228, "grad_norm": 3.173133134841919, "learning_rate": 1.2960294935505224e-06, "loss": 0.4152, "step": 13795 }, { "epoch": 1.6788560997870399, "grad_norm": 1.5895024538040161, "learning_rate": 1.2950692340830972e-06, "loss": 0.3698, "step": 13796 }, { "epoch": 1.6789777912990569, "grad_norm": 1.9458640813827515, "learning_rate": 1.2941093058563082e-06, "loss": 0.3907, "step": 13797 }, { "epoch": 1.679099482811074, "grad_norm": 1.5639913082122803, "learning_rate": 1.2931497089066802e-06, "loss": 0.3709, "step": 13798 }, { "epoch": 1.679221174323091, "grad_norm": 2.37680721282959, "learning_rate": 1.2921904432707255e-06, "loss": 0.4122, "step": 13799 }, { "epoch": 1.6793428658351082, "grad_norm": 1.9247478246688843, "learning_rate": 1.2912315089849515e-06, "loss": 0.3492, "step": 13800 }, { "epoch": 1.6794645573471252, "grad_norm": 1.4941282272338867, "learning_rate": 1.2902729060858444e-06, "loss": 0.3421, "step": 13801 }, { "epoch": 1.6795862488591422, "grad_norm": 2.026052951812744, "learning_rate": 1.2893146346098817e-06, "loss": 0.3603, "step": 13802 }, { "epoch": 1.6797079403711592, "grad_norm": 2.213083028793335, "learning_rate": 1.2883566945935266e-06, "loss": 0.3487, "step": 13803 }, { "epoch": 1.6798296318831762, "grad_norm": 2.2838826179504395, "learning_rate": 1.28739908607323e-06, "loss": 0.3832, "step": 13804 }, { "epoch": 1.6799513233951933, "grad_norm": 1.776839017868042, "learning_rate": 1.2864418090854337e-06, "loss": 0.4198, "step": 13805 }, { "epoch": 1.6800730149072103, "grad_norm": 1.4478399753570557, "learning_rate": 1.285484863666563e-06, "loss": 0.3101, "step": 13806 }, { "epoch": 1.6801947064192273, "grad_norm": 1.535431981086731, "learning_rate": 1.2845282498530298e-06, "loss": 0.3448, "step": 13807 }, { "epoch": 1.6803163979312443, "grad_norm": 2.038459300994873, "learning_rate": 1.2835719676812374e-06, "loss": 0.357, "step": 13808 }, { "epoch": 1.6804380894432613, "grad_norm": 3.2336442470550537, "learning_rate": 1.2826160171875746e-06, "loss": 0.35, "step": 13809 }, { "epoch": 1.6805597809552784, "grad_norm": 1.693761944770813, "learning_rate": 1.2816603984084142e-06, "loss": 0.3633, "step": 13810 }, { "epoch": 1.6806814724672954, "grad_norm": 2.309576988220215, "learning_rate": 1.280705111380124e-06, "loss": 0.4252, "step": 13811 }, { "epoch": 1.6808031639793124, "grad_norm": 2.395458221435547, "learning_rate": 1.2797501561390513e-06, "loss": 0.4361, "step": 13812 }, { "epoch": 1.6809248554913294, "grad_norm": 1.6999075412750244, "learning_rate": 1.278795532721534e-06, "loss": 0.3988, "step": 13813 }, { "epoch": 1.6810465470033464, "grad_norm": 1.5286123752593994, "learning_rate": 1.2778412411639018e-06, "loss": 0.3844, "step": 13814 }, { "epoch": 1.6811682385153635, "grad_norm": 2.7576487064361572, "learning_rate": 1.2768872815024614e-06, "loss": 0.3417, "step": 13815 }, { "epoch": 1.6812899300273805, "grad_norm": 1.5065091848373413, "learning_rate": 1.2759336537735168e-06, "loss": 0.334, "step": 13816 }, { "epoch": 1.6814116215393975, "grad_norm": 1.6406195163726807, "learning_rate": 1.2749803580133547e-06, "loss": 0.3307, "step": 13817 }, { "epoch": 1.6815333130514145, "grad_norm": 1.4684678316116333, "learning_rate": 1.2740273942582481e-06, "loss": 0.3893, "step": 13818 }, { "epoch": 1.6816550045634318, "grad_norm": 1.5859050750732422, "learning_rate": 1.273074762544463e-06, "loss": 0.3381, "step": 13819 }, { "epoch": 1.6817766960754488, "grad_norm": 1.3808832168579102, "learning_rate": 1.2721224629082463e-06, "loss": 0.3572, "step": 13820 }, { "epoch": 1.6818983875874658, "grad_norm": 2.258392095565796, "learning_rate": 1.271170495385834e-06, "loss": 0.3928, "step": 13821 }, { "epoch": 1.6820200790994828, "grad_norm": 1.7828283309936523, "learning_rate": 1.2702188600134536e-06, "loss": 0.4007, "step": 13822 }, { "epoch": 1.6821417706114998, "grad_norm": 2.0993125438690186, "learning_rate": 1.269267556827315e-06, "loss": 0.3462, "step": 13823 }, { "epoch": 1.6822634621235169, "grad_norm": 1.4628483057022095, "learning_rate": 1.2683165858636159e-06, "loss": 0.3699, "step": 13824 }, { "epoch": 1.6823851536355339, "grad_norm": 2.096174716949463, "learning_rate": 1.2673659471585486e-06, "loss": 0.426, "step": 13825 }, { "epoch": 1.6825068451475511, "grad_norm": 2.4189236164093018, "learning_rate": 1.2664156407482787e-06, "loss": 0.3812, "step": 13826 }, { "epoch": 1.6826285366595681, "grad_norm": 1.8713542222976685, "learning_rate": 1.2654656666689724e-06, "loss": 0.4129, "step": 13827 }, { "epoch": 1.6827502281715851, "grad_norm": 2.3637099266052246, "learning_rate": 1.2645160249567777e-06, "loss": 0.3983, "step": 13828 }, { "epoch": 1.6828719196836022, "grad_norm": 1.902443766593933, "learning_rate": 1.263566715647827e-06, "loss": 0.3261, "step": 13829 }, { "epoch": 1.6829936111956192, "grad_norm": 2.1366183757781982, "learning_rate": 1.262617738778249e-06, "loss": 0.3142, "step": 13830 }, { "epoch": 1.6831153027076362, "grad_norm": 2.6139023303985596, "learning_rate": 1.2616690943841515e-06, "loss": 0.3623, "step": 13831 }, { "epoch": 1.6832369942196532, "grad_norm": 1.7788294553756714, "learning_rate": 1.26072078250163e-06, "loss": 0.2905, "step": 13832 }, { "epoch": 1.6833586857316702, "grad_norm": 1.4570634365081787, "learning_rate": 1.2597728031667733e-06, "loss": 0.3651, "step": 13833 }, { "epoch": 1.6834803772436873, "grad_norm": 1.4675289392471313, "learning_rate": 1.2588251564156517e-06, "loss": 0.3437, "step": 13834 }, { "epoch": 1.6836020687557043, "grad_norm": 2.492671489715576, "learning_rate": 1.2578778422843275e-06, "loss": 0.4255, "step": 13835 }, { "epoch": 1.6837237602677213, "grad_norm": 1.3692865371704102, "learning_rate": 1.2569308608088471e-06, "loss": 0.341, "step": 13836 }, { "epoch": 1.6838454517797383, "grad_norm": 1.5555249452590942, "learning_rate": 1.2559842120252442e-06, "loss": 0.3959, "step": 13837 }, { "epoch": 1.6839671432917553, "grad_norm": 1.4702510833740234, "learning_rate": 1.255037895969542e-06, "loss": 0.3353, "step": 13838 }, { "epoch": 1.6840888348037724, "grad_norm": 2.1564106941223145, "learning_rate": 1.2540919126777484e-06, "loss": 0.3636, "step": 13839 }, { "epoch": 1.6842105263157894, "grad_norm": 2.056885242462158, "learning_rate": 1.253146262185858e-06, "loss": 0.3657, "step": 13840 }, { "epoch": 1.6843322178278064, "grad_norm": 2.310302495956421, "learning_rate": 1.2522009445298599e-06, "loss": 0.3451, "step": 13841 }, { "epoch": 1.6844539093398234, "grad_norm": 2.25286865234375, "learning_rate": 1.2512559597457208e-06, "loss": 0.3619, "step": 13842 }, { "epoch": 1.6845756008518404, "grad_norm": 1.9163131713867188, "learning_rate": 1.2503113078694028e-06, "loss": 0.4133, "step": 13843 }, { "epoch": 1.6846972923638575, "grad_norm": 2.0300254821777344, "learning_rate": 1.249366988936851e-06, "loss": 0.4232, "step": 13844 }, { "epoch": 1.6848189838758747, "grad_norm": 2.266204833984375, "learning_rate": 1.248423002983996e-06, "loss": 0.4228, "step": 13845 }, { "epoch": 1.6849406753878917, "grad_norm": 2.1649701595306396, "learning_rate": 1.247479350046762e-06, "loss": 0.3464, "step": 13846 }, { "epoch": 1.6850623668999087, "grad_norm": 2.1098644733428955, "learning_rate": 1.2465360301610562e-06, "loss": 0.3811, "step": 13847 }, { "epoch": 1.6851840584119258, "grad_norm": 2.8243815898895264, "learning_rate": 1.2455930433627728e-06, "loss": 0.3138, "step": 13848 }, { "epoch": 1.6853057499239428, "grad_norm": 1.9501984119415283, "learning_rate": 1.2446503896877948e-06, "loss": 0.3886, "step": 13849 }, { "epoch": 1.6854274414359598, "grad_norm": 1.5686861276626587, "learning_rate": 1.2437080691719894e-06, "loss": 0.3512, "step": 13850 }, { "epoch": 1.685549132947977, "grad_norm": 1.653573751449585, "learning_rate": 1.24276608185122e-06, "loss": 0.3179, "step": 13851 }, { "epoch": 1.685670824459994, "grad_norm": 1.947106122970581, "learning_rate": 1.2418244277613267e-06, "loss": 0.3815, "step": 13852 }, { "epoch": 1.685792515972011, "grad_norm": 2.215709924697876, "learning_rate": 1.24088310693814e-06, "loss": 0.4164, "step": 13853 }, { "epoch": 1.685914207484028, "grad_norm": 1.3895610570907593, "learning_rate": 1.2399421194174833e-06, "loss": 0.3508, "step": 13854 }, { "epoch": 1.6860358989960451, "grad_norm": 1.9541537761688232, "learning_rate": 1.2390014652351622e-06, "loss": 0.37, "step": 13855 }, { "epoch": 1.6861575905080621, "grad_norm": 2.2421467304229736, "learning_rate": 1.2380611444269674e-06, "loss": 0.3755, "step": 13856 }, { "epoch": 1.6862792820200792, "grad_norm": 1.8879858255386353, "learning_rate": 1.2371211570286835e-06, "loss": 0.3922, "step": 13857 }, { "epoch": 1.6864009735320962, "grad_norm": 1.4031667709350586, "learning_rate": 1.2361815030760793e-06, "loss": 0.3025, "step": 13858 }, { "epoch": 1.6865226650441132, "grad_norm": 1.7046468257904053, "learning_rate": 1.2352421826049054e-06, "loss": 0.3903, "step": 13859 }, { "epoch": 1.6866443565561302, "grad_norm": 2.5772104263305664, "learning_rate": 1.2343031956509133e-06, "loss": 0.2999, "step": 13860 }, { "epoch": 1.6867660480681472, "grad_norm": 2.414714813232422, "learning_rate": 1.2333645422498241e-06, "loss": 0.3657, "step": 13861 }, { "epoch": 1.6868877395801642, "grad_norm": 1.5761195421218872, "learning_rate": 1.2324262224373628e-06, "loss": 0.3329, "step": 13862 }, { "epoch": 1.6870094310921813, "grad_norm": 1.5399327278137207, "learning_rate": 1.2314882362492308e-06, "loss": 0.3726, "step": 13863 }, { "epoch": 1.6871311226041983, "grad_norm": 2.0807573795318604, "learning_rate": 1.2305505837211195e-06, "loss": 0.3647, "step": 13864 }, { "epoch": 1.6872528141162153, "grad_norm": 1.6232025623321533, "learning_rate": 1.2296132648887128e-06, "loss": 0.3591, "step": 13865 }, { "epoch": 1.6873745056282323, "grad_norm": 1.7631481885910034, "learning_rate": 1.228676279787675e-06, "loss": 0.3674, "step": 13866 }, { "epoch": 1.6874961971402493, "grad_norm": 1.5286754369735718, "learning_rate": 1.227739628453658e-06, "loss": 0.3852, "step": 13867 }, { "epoch": 1.6876178886522664, "grad_norm": 1.6232655048370361, "learning_rate": 1.2268033109223077e-06, "loss": 0.343, "step": 13868 }, { "epoch": 1.6877395801642834, "grad_norm": 1.570472240447998, "learning_rate": 1.2258673272292509e-06, "loss": 0.395, "step": 13869 }, { "epoch": 1.6878612716763006, "grad_norm": 2.6342594623565674, "learning_rate": 1.2249316774101017e-06, "loss": 0.3615, "step": 13870 }, { "epoch": 1.6879829631883176, "grad_norm": 1.5583102703094482, "learning_rate": 1.2239963615004701e-06, "loss": 0.3216, "step": 13871 }, { "epoch": 1.6881046547003347, "grad_norm": 2.0572757720947266, "learning_rate": 1.223061379535938e-06, "loss": 0.3879, "step": 13872 }, { "epoch": 1.6882263462123517, "grad_norm": 1.362613558769226, "learning_rate": 1.222126731552089e-06, "loss": 0.2967, "step": 13873 }, { "epoch": 1.6883480377243687, "grad_norm": 1.8617174625396729, "learning_rate": 1.221192417584488e-06, "loss": 0.343, "step": 13874 }, { "epoch": 1.6884697292363857, "grad_norm": 3.0676445960998535, "learning_rate": 1.2202584376686843e-06, "loss": 0.416, "step": 13875 }, { "epoch": 1.688591420748403, "grad_norm": 1.53612220287323, "learning_rate": 1.2193247918402217e-06, "loss": 0.4226, "step": 13876 }, { "epoch": 1.68871311226042, "grad_norm": 1.6260018348693848, "learning_rate": 1.2183914801346264e-06, "loss": 0.3674, "step": 13877 }, { "epoch": 1.688834803772437, "grad_norm": 1.915148138999939, "learning_rate": 1.2174585025874097e-06, "loss": 0.3835, "step": 13878 }, { "epoch": 1.688956495284454, "grad_norm": 2.3580193519592285, "learning_rate": 1.216525859234079e-06, "loss": 0.4044, "step": 13879 }, { "epoch": 1.689078186796471, "grad_norm": 1.9624818563461304, "learning_rate": 1.2155935501101191e-06, "loss": 0.347, "step": 13880 }, { "epoch": 1.689199878308488, "grad_norm": 1.4094874858856201, "learning_rate": 1.214661575251005e-06, "loss": 0.3167, "step": 13881 }, { "epoch": 1.689321569820505, "grad_norm": 2.3209450244903564, "learning_rate": 1.213729934692205e-06, "loss": 0.4138, "step": 13882 }, { "epoch": 1.689443261332522, "grad_norm": 1.5316599607467651, "learning_rate": 1.2127986284691683e-06, "loss": 0.3751, "step": 13883 }, { "epoch": 1.6895649528445391, "grad_norm": 1.7891693115234375, "learning_rate": 1.2118676566173316e-06, "loss": 0.4153, "step": 13884 }, { "epoch": 1.6896866443565561, "grad_norm": 1.642689824104309, "learning_rate": 1.2109370191721203e-06, "loss": 0.3587, "step": 13885 }, { "epoch": 1.6898083358685732, "grad_norm": 2.266117572784424, "learning_rate": 1.2100067161689465e-06, "loss": 0.4444, "step": 13886 }, { "epoch": 1.6899300273805902, "grad_norm": 1.7410967350006104, "learning_rate": 1.2090767476432142e-06, "loss": 0.3354, "step": 13887 }, { "epoch": 1.6900517188926072, "grad_norm": 1.5513304471969604, "learning_rate": 1.208147113630307e-06, "loss": 0.3374, "step": 13888 }, { "epoch": 1.6901734104046242, "grad_norm": 2.0188941955566406, "learning_rate": 1.2072178141655988e-06, "loss": 0.3215, "step": 13889 }, { "epoch": 1.6902951019166412, "grad_norm": 1.7523757219314575, "learning_rate": 1.2062888492844539e-06, "loss": 0.332, "step": 13890 }, { "epoch": 1.6904167934286582, "grad_norm": 1.408253788948059, "learning_rate": 1.2053602190222214e-06, "loss": 0.3423, "step": 13891 }, { "epoch": 1.6905384849406753, "grad_norm": 2.106708526611328, "learning_rate": 1.2044319234142331e-06, "loss": 0.3765, "step": 13892 }, { "epoch": 1.6906601764526923, "grad_norm": 1.4886817932128906, "learning_rate": 1.203503962495819e-06, "loss": 0.3559, "step": 13893 }, { "epoch": 1.6907818679647093, "grad_norm": 1.8618484735488892, "learning_rate": 1.2025763363022869e-06, "loss": 0.35, "step": 13894 }, { "epoch": 1.6909035594767265, "grad_norm": 1.4520701169967651, "learning_rate": 1.2016490448689344e-06, "loss": 0.322, "step": 13895 }, { "epoch": 1.6910252509887436, "grad_norm": 1.3975701332092285, "learning_rate": 1.2007220882310478e-06, "loss": 0.335, "step": 13896 }, { "epoch": 1.6911469425007606, "grad_norm": 2.045530319213867, "learning_rate": 1.1997954664238964e-06, "loss": 0.4157, "step": 13897 }, { "epoch": 1.6912686340127776, "grad_norm": 1.7703114748001099, "learning_rate": 1.1988691794827457e-06, "loss": 0.3433, "step": 13898 }, { "epoch": 1.6913903255247946, "grad_norm": 1.9776839017868042, "learning_rate": 1.1979432274428405e-06, "loss": 0.3808, "step": 13899 }, { "epoch": 1.6915120170368116, "grad_norm": 1.9306470155715942, "learning_rate": 1.1970176103394115e-06, "loss": 0.307, "step": 13900 }, { "epoch": 1.6916337085488289, "grad_norm": 2.20820951461792, "learning_rate": 1.1960923282076863e-06, "loss": 0.3394, "step": 13901 }, { "epoch": 1.691755400060846, "grad_norm": 1.6490033864974976, "learning_rate": 1.195167381082869e-06, "loss": 0.3451, "step": 13902 }, { "epoch": 1.691877091572863, "grad_norm": 2.2584750652313232, "learning_rate": 1.19424276900016e-06, "loss": 0.3957, "step": 13903 }, { "epoch": 1.69199878308488, "grad_norm": 1.813049554824829, "learning_rate": 1.1933184919947404e-06, "loss": 0.39, "step": 13904 }, { "epoch": 1.692120474596897, "grad_norm": 1.6137909889221191, "learning_rate": 1.1923945501017786e-06, "loss": 0.3493, "step": 13905 }, { "epoch": 1.692242166108914, "grad_norm": 2.3704895973205566, "learning_rate": 1.1914709433564398e-06, "loss": 0.3918, "step": 13906 }, { "epoch": 1.692363857620931, "grad_norm": 2.6435279846191406, "learning_rate": 1.1905476717938614e-06, "loss": 0.3163, "step": 13907 }, { "epoch": 1.692485549132948, "grad_norm": 2.7807271480560303, "learning_rate": 1.1896247354491774e-06, "loss": 0.4237, "step": 13908 }, { "epoch": 1.692607240644965, "grad_norm": 2.0644032955169678, "learning_rate": 1.18870213435751e-06, "loss": 0.4196, "step": 13909 }, { "epoch": 1.692728932156982, "grad_norm": 1.563829779624939, "learning_rate": 1.1877798685539633e-06, "loss": 0.3664, "step": 13910 }, { "epoch": 1.692850623668999, "grad_norm": 2.3204684257507324, "learning_rate": 1.1868579380736344e-06, "loss": 0.3799, "step": 13911 }, { "epoch": 1.692972315181016, "grad_norm": 1.9118934869766235, "learning_rate": 1.1859363429516035e-06, "loss": 0.4313, "step": 13912 }, { "epoch": 1.6930940066930331, "grad_norm": 2.6560192108154297, "learning_rate": 1.1850150832229368e-06, "loss": 0.4295, "step": 13913 }, { "epoch": 1.6932156982050501, "grad_norm": 1.937640905380249, "learning_rate": 1.184094158922695e-06, "loss": 0.3631, "step": 13914 }, { "epoch": 1.6933373897170672, "grad_norm": 2.101738929748535, "learning_rate": 1.183173570085917e-06, "loss": 0.3749, "step": 13915 }, { "epoch": 1.6934590812290842, "grad_norm": 1.9780380725860596, "learning_rate": 1.1822533167476336e-06, "loss": 0.3338, "step": 13916 }, { "epoch": 1.6935807727411012, "grad_norm": 1.7169417142868042, "learning_rate": 1.1813333989428665e-06, "loss": 0.3714, "step": 13917 }, { "epoch": 1.6937024642531182, "grad_norm": 3.403289556503296, "learning_rate": 1.1804138167066136e-06, "loss": 0.4806, "step": 13918 }, { "epoch": 1.6938241557651352, "grad_norm": 1.8074461221694946, "learning_rate": 1.1794945700738724e-06, "loss": 0.3785, "step": 13919 }, { "epoch": 1.6939458472771525, "grad_norm": 1.990272879600525, "learning_rate": 1.1785756590796204e-06, "loss": 0.3894, "step": 13920 }, { "epoch": 1.6940675387891695, "grad_norm": 1.5768609046936035, "learning_rate": 1.1776570837588208e-06, "loss": 0.3387, "step": 13921 }, { "epoch": 1.6941892303011865, "grad_norm": 2.194485902786255, "learning_rate": 1.1767388441464333e-06, "loss": 0.3989, "step": 13922 }, { "epoch": 1.6943109218132035, "grad_norm": 1.8552031517028809, "learning_rate": 1.1758209402773967e-06, "loss": 0.4083, "step": 13923 }, { "epoch": 1.6944326133252205, "grad_norm": 3.6940269470214844, "learning_rate": 1.1749033721866354e-06, "loss": 0.3644, "step": 13924 }, { "epoch": 1.6945543048372376, "grad_norm": 3.08841872215271, "learning_rate": 1.1739861399090702e-06, "loss": 0.39, "step": 13925 }, { "epoch": 1.6946759963492546, "grad_norm": 3.911529302597046, "learning_rate": 1.173069243479601e-06, "loss": 0.356, "step": 13926 }, { "epoch": 1.6947976878612718, "grad_norm": 2.4748220443725586, "learning_rate": 1.1721526829331153e-06, "loss": 0.3627, "step": 13927 }, { "epoch": 1.6949193793732888, "grad_norm": 1.4452297687530518, "learning_rate": 1.1712364583044954e-06, "loss": 0.3711, "step": 13928 }, { "epoch": 1.6950410708853059, "grad_norm": 2.475959539413452, "learning_rate": 1.1703205696286024e-06, "loss": 0.3188, "step": 13929 }, { "epoch": 1.6951627623973229, "grad_norm": 1.9028396606445312, "learning_rate": 1.1694050169402883e-06, "loss": 0.3438, "step": 13930 }, { "epoch": 1.69528445390934, "grad_norm": 2.206341505050659, "learning_rate": 1.1684898002743916e-06, "loss": 0.3854, "step": 13931 }, { "epoch": 1.695406145421357, "grad_norm": 1.3340353965759277, "learning_rate": 1.167574919665736e-06, "loss": 0.3312, "step": 13932 }, { "epoch": 1.695527836933374, "grad_norm": 1.6846473217010498, "learning_rate": 1.1666603751491402e-06, "loss": 0.3982, "step": 13933 }, { "epoch": 1.695649528445391, "grad_norm": 1.9827247858047485, "learning_rate": 1.1657461667593994e-06, "loss": 0.3971, "step": 13934 }, { "epoch": 1.695771219957408, "grad_norm": 1.3461912870407104, "learning_rate": 1.1648322945313017e-06, "loss": 0.3421, "step": 13935 }, { "epoch": 1.695892911469425, "grad_norm": 2.17278790473938, "learning_rate": 1.1639187584996248e-06, "loss": 0.3444, "step": 13936 }, { "epoch": 1.696014602981442, "grad_norm": 1.8735146522521973, "learning_rate": 1.1630055586991285e-06, "loss": 0.3776, "step": 13937 }, { "epoch": 1.696136294493459, "grad_norm": 1.9665249586105347, "learning_rate": 1.1620926951645606e-06, "loss": 0.4169, "step": 13938 }, { "epoch": 1.696257986005476, "grad_norm": 1.7326833009719849, "learning_rate": 1.161180167930661e-06, "loss": 0.3695, "step": 13939 }, { "epoch": 1.696379677517493, "grad_norm": 2.694737195968628, "learning_rate": 1.1602679770321512e-06, "loss": 0.3375, "step": 13940 }, { "epoch": 1.69650136902951, "grad_norm": 1.9490004777908325, "learning_rate": 1.1593561225037431e-06, "loss": 0.3942, "step": 13941 }, { "epoch": 1.6966230605415271, "grad_norm": 2.8919477462768555, "learning_rate": 1.1584446043801323e-06, "loss": 0.4085, "step": 13942 }, { "epoch": 1.6967447520535441, "grad_norm": 1.697523832321167, "learning_rate": 1.1575334226960045e-06, "loss": 0.3673, "step": 13943 }, { "epoch": 1.6968664435655612, "grad_norm": 1.7414662837982178, "learning_rate": 1.156622577486034e-06, "loss": 0.3127, "step": 13944 }, { "epoch": 1.6969881350775782, "grad_norm": 1.6480767726898193, "learning_rate": 1.15571206878488e-06, "loss": 0.3199, "step": 13945 }, { "epoch": 1.6971098265895954, "grad_norm": 1.942179560661316, "learning_rate": 1.1548018966271856e-06, "loss": 0.35, "step": 13946 }, { "epoch": 1.6972315181016124, "grad_norm": 2.7838218212127686, "learning_rate": 1.15389206104759e-06, "loss": 0.3975, "step": 13947 }, { "epoch": 1.6973532096136295, "grad_norm": 1.9651570320129395, "learning_rate": 1.1529825620807121e-06, "loss": 0.4118, "step": 13948 }, { "epoch": 1.6974749011256465, "grad_norm": 3.165354013442993, "learning_rate": 1.1520733997611577e-06, "loss": 0.4557, "step": 13949 }, { "epoch": 1.6975965926376635, "grad_norm": 1.6352638006210327, "learning_rate": 1.1511645741235267e-06, "loss": 0.3404, "step": 13950 }, { "epoch": 1.6977182841496805, "grad_norm": 2.1372931003570557, "learning_rate": 1.1502560852023992e-06, "loss": 0.3674, "step": 13951 }, { "epoch": 1.6978399756616978, "grad_norm": 2.2757043838500977, "learning_rate": 1.1493479330323466e-06, "loss": 0.4067, "step": 13952 }, { "epoch": 1.6979616671737148, "grad_norm": 2.541049003601074, "learning_rate": 1.1484401176479243e-06, "loss": 0.3318, "step": 13953 }, { "epoch": 1.6980833586857318, "grad_norm": 1.514437198638916, "learning_rate": 1.147532639083676e-06, "loss": 0.363, "step": 13954 }, { "epoch": 1.6982050501977488, "grad_norm": 2.9432213306427, "learning_rate": 1.1466254973741364e-06, "loss": 0.3848, "step": 13955 }, { "epoch": 1.6983267417097658, "grad_norm": 3.3483433723449707, "learning_rate": 1.145718692553821e-06, "loss": 0.2824, "step": 13956 }, { "epoch": 1.6984484332217828, "grad_norm": 1.9449719190597534, "learning_rate": 1.1448122246572356e-06, "loss": 0.3729, "step": 13957 }, { "epoch": 1.6985701247337999, "grad_norm": 2.138198137283325, "learning_rate": 1.1439060937188762e-06, "loss": 0.2778, "step": 13958 }, { "epoch": 1.6986918162458169, "grad_norm": 1.4589548110961914, "learning_rate": 1.1430002997732204e-06, "loss": 0.3471, "step": 13959 }, { "epoch": 1.698813507757834, "grad_norm": 2.0117695331573486, "learning_rate": 1.1420948428547352e-06, "loss": 0.309, "step": 13960 }, { "epoch": 1.698935199269851, "grad_norm": 1.8093459606170654, "learning_rate": 1.1411897229978774e-06, "loss": 0.3968, "step": 13961 }, { "epoch": 1.699056890781868, "grad_norm": 1.7502095699310303, "learning_rate": 1.1402849402370853e-06, "loss": 0.4172, "step": 13962 }, { "epoch": 1.699178582293885, "grad_norm": 1.983483910560608, "learning_rate": 1.1393804946067943e-06, "loss": 0.3856, "step": 13963 }, { "epoch": 1.699300273805902, "grad_norm": 1.778069257736206, "learning_rate": 1.138476386141414e-06, "loss": 0.3451, "step": 13964 }, { "epoch": 1.699421965317919, "grad_norm": 1.9403551816940308, "learning_rate": 1.1375726148753464e-06, "loss": 0.3637, "step": 13965 }, { "epoch": 1.699543656829936, "grad_norm": 1.8398736715316772, "learning_rate": 1.1366691808429886e-06, "loss": 0.3688, "step": 13966 }, { "epoch": 1.699665348341953, "grad_norm": 1.6638399362564087, "learning_rate": 1.1357660840787133e-06, "loss": 0.3604, "step": 13967 }, { "epoch": 1.69978703985397, "grad_norm": 1.6198534965515137, "learning_rate": 1.134863324616885e-06, "loss": 0.393, "step": 13968 }, { "epoch": 1.699908731365987, "grad_norm": 2.1225740909576416, "learning_rate": 1.1339609024918586e-06, "loss": 0.3491, "step": 13969 }, { "epoch": 1.700030422878004, "grad_norm": 1.749290108680725, "learning_rate": 1.1330588177379698e-06, "loss": 0.3901, "step": 13970 }, { "epoch": 1.7001521143900213, "grad_norm": 2.071774959564209, "learning_rate": 1.132157070389549e-06, "loss": 0.3573, "step": 13971 }, { "epoch": 1.7002738059020384, "grad_norm": 2.022597312927246, "learning_rate": 1.1312556604809067e-06, "loss": 0.3951, "step": 13972 }, { "epoch": 1.7003954974140554, "grad_norm": 2.1706387996673584, "learning_rate": 1.1303545880463418e-06, "loss": 0.3426, "step": 13973 }, { "epoch": 1.7005171889260724, "grad_norm": 2.5023021697998047, "learning_rate": 1.129453853120146e-06, "loss": 0.338, "step": 13974 }, { "epoch": 1.7006388804380894, "grad_norm": 1.8623839616775513, "learning_rate": 1.1285534557365929e-06, "loss": 0.3595, "step": 13975 }, { "epoch": 1.7007605719501064, "grad_norm": 3.5104005336761475, "learning_rate": 1.127653395929943e-06, "loss": 0.414, "step": 13976 }, { "epoch": 1.7008822634621237, "grad_norm": 1.9361910820007324, "learning_rate": 1.1267536737344465e-06, "loss": 0.3411, "step": 13977 }, { "epoch": 1.7010039549741407, "grad_norm": 3.107074737548828, "learning_rate": 1.125854289184336e-06, "loss": 0.2885, "step": 13978 }, { "epoch": 1.7011256464861577, "grad_norm": 2.2797510623931885, "learning_rate": 1.124955242313842e-06, "loss": 0.3911, "step": 13979 }, { "epoch": 1.7012473379981747, "grad_norm": 1.9399561882019043, "learning_rate": 1.1240565331571707e-06, "loss": 0.3593, "step": 13980 }, { "epoch": 1.7013690295101918, "grad_norm": 2.1943323612213135, "learning_rate": 1.1231581617485176e-06, "loss": 0.3065, "step": 13981 }, { "epoch": 1.7014907210222088, "grad_norm": 1.545628547668457, "learning_rate": 1.1222601281220736e-06, "loss": 0.3482, "step": 13982 }, { "epoch": 1.7016124125342258, "grad_norm": 1.8336818218231201, "learning_rate": 1.1213624323120076e-06, "loss": 0.4191, "step": 13983 }, { "epoch": 1.7017341040462428, "grad_norm": 1.9648090600967407, "learning_rate": 1.1204650743524759e-06, "loss": 0.4103, "step": 13984 }, { "epoch": 1.7018557955582598, "grad_norm": 2.643662929534912, "learning_rate": 1.1195680542776311e-06, "loss": 0.3016, "step": 13985 }, { "epoch": 1.7019774870702769, "grad_norm": 1.5862783193588257, "learning_rate": 1.1186713721216025e-06, "loss": 0.3496, "step": 13986 }, { "epoch": 1.7020991785822939, "grad_norm": 2.0508296489715576, "learning_rate": 1.1177750279185118e-06, "loss": 0.3929, "step": 13987 }, { "epoch": 1.702220870094311, "grad_norm": 1.6170121431350708, "learning_rate": 1.116879021702466e-06, "loss": 0.3313, "step": 13988 }, { "epoch": 1.702342561606328, "grad_norm": 1.9966620206832886, "learning_rate": 1.1159833535075594e-06, "loss": 0.3838, "step": 13989 }, { "epoch": 1.702464253118345, "grad_norm": 1.8811813592910767, "learning_rate": 1.1150880233678764e-06, "loss": 0.3447, "step": 13990 }, { "epoch": 1.702585944630362, "grad_norm": 1.8958288431167603, "learning_rate": 1.1141930313174843e-06, "loss": 0.3547, "step": 13991 }, { "epoch": 1.702707636142379, "grad_norm": 1.6099693775177002, "learning_rate": 1.1132983773904393e-06, "loss": 0.3381, "step": 13992 }, { "epoch": 1.702829327654396, "grad_norm": 1.6218100786209106, "learning_rate": 1.1124040616207866e-06, "loss": 0.359, "step": 13993 }, { "epoch": 1.702951019166413, "grad_norm": 1.8699159622192383, "learning_rate": 1.1115100840425564e-06, "loss": 0.3438, "step": 13994 }, { "epoch": 1.70307271067843, "grad_norm": 1.7054446935653687, "learning_rate": 1.1106164446897628e-06, "loss": 0.3581, "step": 13995 }, { "epoch": 1.7031944021904473, "grad_norm": 3.486537218093872, "learning_rate": 1.1097231435964162e-06, "loss": 0.3141, "step": 13996 }, { "epoch": 1.7033160937024643, "grad_norm": 2.0044102668762207, "learning_rate": 1.1088301807965052e-06, "loss": 0.4258, "step": 13997 }, { "epoch": 1.7034377852144813, "grad_norm": 2.704425096511841, "learning_rate": 1.1079375563240103e-06, "loss": 0.448, "step": 13998 }, { "epoch": 1.7035594767264983, "grad_norm": 2.560760259628296, "learning_rate": 1.1070452702128965e-06, "loss": 0.2965, "step": 13999 }, { "epoch": 1.7036811682385153, "grad_norm": 1.3174242973327637, "learning_rate": 1.1061533224971156e-06, "loss": 0.3196, "step": 14000 }, { "epoch": 1.7038028597505324, "grad_norm": 1.7981939315795898, "learning_rate": 1.1052617132106113e-06, "loss": 0.3245, "step": 14001 }, { "epoch": 1.7039245512625496, "grad_norm": 1.9320846796035767, "learning_rate": 1.1043704423873104e-06, "loss": 0.4217, "step": 14002 }, { "epoch": 1.7040462427745666, "grad_norm": 1.9292234182357788, "learning_rate": 1.1034795100611252e-06, "loss": 0.3626, "step": 14003 }, { "epoch": 1.7041679342865836, "grad_norm": 2.313657760620117, "learning_rate": 1.102588916265962e-06, "loss": 0.3527, "step": 14004 }, { "epoch": 1.7042896257986007, "grad_norm": 1.4426449537277222, "learning_rate": 1.1016986610357072e-06, "loss": 0.3316, "step": 14005 }, { "epoch": 1.7044113173106177, "grad_norm": 2.132216453552246, "learning_rate": 1.1008087444042336e-06, "loss": 0.3235, "step": 14006 }, { "epoch": 1.7045330088226347, "grad_norm": 3.11110258102417, "learning_rate": 1.099919166405411e-06, "loss": 0.4182, "step": 14007 }, { "epoch": 1.7046547003346517, "grad_norm": 2.1369504928588867, "learning_rate": 1.0990299270730854e-06, "loss": 0.3247, "step": 14008 }, { "epoch": 1.7047763918466687, "grad_norm": 2.776984691619873, "learning_rate": 1.098141026441093e-06, "loss": 0.3989, "step": 14009 }, { "epoch": 1.7048980833586858, "grad_norm": 1.7858883142471313, "learning_rate": 1.0972524645432647e-06, "loss": 0.3294, "step": 14010 }, { "epoch": 1.7050197748707028, "grad_norm": 3.6378374099731445, "learning_rate": 1.0963642414134035e-06, "loss": 0.4295, "step": 14011 }, { "epoch": 1.7051414663827198, "grad_norm": 2.4956066608428955, "learning_rate": 1.0954763570853155e-06, "loss": 0.353, "step": 14012 }, { "epoch": 1.7052631578947368, "grad_norm": 1.9461241960525513, "learning_rate": 1.0945888115927816e-06, "loss": 0.3219, "step": 14013 }, { "epoch": 1.7053848494067538, "grad_norm": 1.7975083589553833, "learning_rate": 1.0937016049695759e-06, "loss": 0.3573, "step": 14014 }, { "epoch": 1.7055065409187709, "grad_norm": 2.1224591732025146, "learning_rate": 1.0928147372494613e-06, "loss": 0.3831, "step": 14015 }, { "epoch": 1.7056282324307879, "grad_norm": 1.5556366443634033, "learning_rate": 1.091928208466182e-06, "loss": 0.3425, "step": 14016 }, { "epoch": 1.705749923942805, "grad_norm": 1.7902332544326782, "learning_rate": 1.0910420186534721e-06, "loss": 0.349, "step": 14017 }, { "epoch": 1.705871615454822, "grad_norm": 2.7425386905670166, "learning_rate": 1.0901561678450546e-06, "loss": 0.3289, "step": 14018 }, { "epoch": 1.705993306966839, "grad_norm": 2.3676950931549072, "learning_rate": 1.089270656074638e-06, "loss": 0.3434, "step": 14019 }, { "epoch": 1.706114998478856, "grad_norm": 1.9426798820495605, "learning_rate": 1.0883854833759155e-06, "loss": 0.3944, "step": 14020 }, { "epoch": 1.7062366899908732, "grad_norm": 3.295860767364502, "learning_rate": 1.0875006497825747e-06, "loss": 0.4305, "step": 14021 }, { "epoch": 1.7063583815028902, "grad_norm": 2.0086307525634766, "learning_rate": 1.0866161553282784e-06, "loss": 0.3842, "step": 14022 }, { "epoch": 1.7064800730149072, "grad_norm": 1.6595697402954102, "learning_rate": 1.0857320000466898e-06, "loss": 0.3505, "step": 14023 }, { "epoch": 1.7066017645269242, "grad_norm": 1.8961855173110962, "learning_rate": 1.0848481839714487e-06, "loss": 0.3619, "step": 14024 }, { "epoch": 1.7067234560389413, "grad_norm": 2.3203125, "learning_rate": 1.083964707136187e-06, "loss": 0.3429, "step": 14025 }, { "epoch": 1.7068451475509583, "grad_norm": 1.9095147848129272, "learning_rate": 1.0830815695745245e-06, "loss": 0.2919, "step": 14026 }, { "epoch": 1.7069668390629753, "grad_norm": 1.5612170696258545, "learning_rate": 1.0821987713200631e-06, "loss": 0.3202, "step": 14027 }, { "epoch": 1.7070885305749925, "grad_norm": 2.065566062927246, "learning_rate": 1.0813163124063996e-06, "loss": 0.4277, "step": 14028 }, { "epoch": 1.7072102220870096, "grad_norm": 2.7088770866394043, "learning_rate": 1.0804341928671102e-06, "loss": 0.3151, "step": 14029 }, { "epoch": 1.7073319135990266, "grad_norm": 1.5059829950332642, "learning_rate": 1.0795524127357616e-06, "loss": 0.3335, "step": 14030 }, { "epoch": 1.7074536051110436, "grad_norm": 3.178105354309082, "learning_rate": 1.07867097204591e-06, "loss": 0.4036, "step": 14031 }, { "epoch": 1.7075752966230606, "grad_norm": 1.8254328966140747, "learning_rate": 1.077789870831093e-06, "loss": 0.3365, "step": 14032 }, { "epoch": 1.7076969881350776, "grad_norm": 1.327294111251831, "learning_rate": 1.0769091091248397e-06, "loss": 0.322, "step": 14033 }, { "epoch": 1.7078186796470947, "grad_norm": 1.9347903728485107, "learning_rate": 1.076028686960665e-06, "loss": 0.3764, "step": 14034 }, { "epoch": 1.7079403711591117, "grad_norm": 1.7627278566360474, "learning_rate": 1.0751486043720683e-06, "loss": 0.3264, "step": 14035 }, { "epoch": 1.7080620626711287, "grad_norm": 2.2402665615081787, "learning_rate": 1.0742688613925422e-06, "loss": 0.4097, "step": 14036 }, { "epoch": 1.7081837541831457, "grad_norm": 1.9333844184875488, "learning_rate": 1.0733894580555616e-06, "loss": 0.3718, "step": 14037 }, { "epoch": 1.7083054456951627, "grad_norm": 3.3544297218322754, "learning_rate": 1.072510394394587e-06, "loss": 0.3651, "step": 14038 }, { "epoch": 1.7084271372071798, "grad_norm": 1.7452428340911865, "learning_rate": 1.071631670443073e-06, "loss": 0.3477, "step": 14039 }, { "epoch": 1.7085488287191968, "grad_norm": 1.6241368055343628, "learning_rate": 1.0707532862344538e-06, "loss": 0.3513, "step": 14040 }, { "epoch": 1.7086705202312138, "grad_norm": 1.8884460926055908, "learning_rate": 1.0698752418021542e-06, "loss": 0.3792, "step": 14041 }, { "epoch": 1.7087922117432308, "grad_norm": 1.3426518440246582, "learning_rate": 1.0689975371795868e-06, "loss": 0.3302, "step": 14042 }, { "epoch": 1.7089139032552478, "grad_norm": 2.08935284614563, "learning_rate": 1.0681201724001488e-06, "loss": 0.3719, "step": 14043 }, { "epoch": 1.7090355947672649, "grad_norm": 1.592146873474121, "learning_rate": 1.0672431474972279e-06, "loss": 0.3668, "step": 14044 }, { "epoch": 1.7091572862792819, "grad_norm": 1.8158204555511475, "learning_rate": 1.0663664625041936e-06, "loss": 0.3592, "step": 14045 }, { "epoch": 1.709278977791299, "grad_norm": 1.6156903505325317, "learning_rate": 1.0654901174544053e-06, "loss": 0.3461, "step": 14046 }, { "epoch": 1.7094006693033161, "grad_norm": 2.518251895904541, "learning_rate": 1.0646141123812136e-06, "loss": 0.4092, "step": 14047 }, { "epoch": 1.7095223608153332, "grad_norm": 2.2245280742645264, "learning_rate": 1.0637384473179502e-06, "loss": 0.3436, "step": 14048 }, { "epoch": 1.7096440523273502, "grad_norm": 2.391563653945923, "learning_rate": 1.0628631222979346e-06, "loss": 0.3524, "step": 14049 }, { "epoch": 1.7097657438393672, "grad_norm": 1.895557165145874, "learning_rate": 1.0619881373544772e-06, "loss": 0.3581, "step": 14050 }, { "epoch": 1.7098874353513842, "grad_norm": 1.6016755104064941, "learning_rate": 1.0611134925208722e-06, "loss": 0.2983, "step": 14051 }, { "epoch": 1.7100091268634012, "grad_norm": 1.8078246116638184, "learning_rate": 1.0602391878303987e-06, "loss": 0.3369, "step": 14052 }, { "epoch": 1.7101308183754185, "grad_norm": 1.8278409242630005, "learning_rate": 1.059365223316331e-06, "loss": 0.3903, "step": 14053 }, { "epoch": 1.7102525098874355, "grad_norm": 1.732277274131775, "learning_rate": 1.0584915990119216e-06, "loss": 0.3851, "step": 14054 }, { "epoch": 1.7103742013994525, "grad_norm": 1.5919405221939087, "learning_rate": 1.0576183149504137e-06, "loss": 0.3369, "step": 14055 }, { "epoch": 1.7104958929114695, "grad_norm": 1.998347282409668, "learning_rate": 1.0567453711650432e-06, "loss": 0.2933, "step": 14056 }, { "epoch": 1.7106175844234865, "grad_norm": 2.7652010917663574, "learning_rate": 1.055872767689018e-06, "loss": 0.3579, "step": 14057 }, { "epoch": 1.7107392759355036, "grad_norm": 2.5122175216674805, "learning_rate": 1.055000504555549e-06, "loss": 0.3179, "step": 14058 }, { "epoch": 1.7108609674475206, "grad_norm": 1.6296882629394531, "learning_rate": 1.0541285817978265e-06, "loss": 0.3535, "step": 14059 }, { "epoch": 1.7109826589595376, "grad_norm": 1.8032680749893188, "learning_rate": 1.0532569994490271e-06, "loss": 0.3228, "step": 14060 }, { "epoch": 1.7111043504715546, "grad_norm": 2.861661195755005, "learning_rate": 1.052385757542318e-06, "loss": 0.3332, "step": 14061 }, { "epoch": 1.7112260419835716, "grad_norm": 1.7813626527786255, "learning_rate": 1.0515148561108524e-06, "loss": 0.3764, "step": 14062 }, { "epoch": 1.7113477334955887, "grad_norm": 1.5075675249099731, "learning_rate": 1.050644295187767e-06, "loss": 0.3224, "step": 14063 }, { "epoch": 1.7114694250076057, "grad_norm": 1.7072079181671143, "learning_rate": 1.0497740748061925e-06, "loss": 0.3507, "step": 14064 }, { "epoch": 1.7115911165196227, "grad_norm": 1.846217155456543, "learning_rate": 1.0489041949992395e-06, "loss": 0.3647, "step": 14065 }, { "epoch": 1.7117128080316397, "grad_norm": 1.7694567441940308, "learning_rate": 1.0480346558000076e-06, "loss": 0.3647, "step": 14066 }, { "epoch": 1.7118344995436567, "grad_norm": 1.5622608661651611, "learning_rate": 1.0471654572415912e-06, "loss": 0.358, "step": 14067 }, { "epoch": 1.7119561910556738, "grad_norm": 2.289029121398926, "learning_rate": 1.0462965993570562e-06, "loss": 0.4145, "step": 14068 }, { "epoch": 1.7120778825676908, "grad_norm": 1.8689907789230347, "learning_rate": 1.0454280821794717e-06, "loss": 0.3602, "step": 14069 }, { "epoch": 1.7121995740797078, "grad_norm": 2.038823127746582, "learning_rate": 1.0445599057418821e-06, "loss": 0.3491, "step": 14070 }, { "epoch": 1.7123212655917248, "grad_norm": 2.37129282951355, "learning_rate": 1.0436920700773234e-06, "loss": 0.3959, "step": 14071 }, { "epoch": 1.712442957103742, "grad_norm": 2.348007917404175, "learning_rate": 1.0428245752188226e-06, "loss": 0.3277, "step": 14072 }, { "epoch": 1.712564648615759, "grad_norm": 2.007065773010254, "learning_rate": 1.0419574211993866e-06, "loss": 0.3582, "step": 14073 }, { "epoch": 1.712686340127776, "grad_norm": 2.0469300746917725, "learning_rate": 1.04109060805201e-06, "loss": 0.4017, "step": 14074 }, { "epoch": 1.7128080316397931, "grad_norm": 1.606426477432251, "learning_rate": 1.0402241358096832e-06, "loss": 0.3854, "step": 14075 }, { "epoch": 1.7129297231518101, "grad_norm": 1.681236743927002, "learning_rate": 1.039358004505373e-06, "loss": 0.3097, "step": 14076 }, { "epoch": 1.7130514146638272, "grad_norm": 1.9548197984695435, "learning_rate": 1.0384922141720356e-06, "loss": 0.3289, "step": 14077 }, { "epoch": 1.7131731061758444, "grad_norm": 2.0898630619049072, "learning_rate": 1.037626764842622e-06, "loss": 0.3583, "step": 14078 }, { "epoch": 1.7132947976878614, "grad_norm": 2.0821692943573, "learning_rate": 1.0367616565500604e-06, "loss": 0.322, "step": 14079 }, { "epoch": 1.7134164891998784, "grad_norm": 1.8662997484207153, "learning_rate": 1.0358968893272702e-06, "loss": 0.3544, "step": 14080 }, { "epoch": 1.7135381807118955, "grad_norm": 1.5352036952972412, "learning_rate": 1.0350324632071597e-06, "loss": 0.3544, "step": 14081 }, { "epoch": 1.7136598722239125, "grad_norm": 1.5609310865402222, "learning_rate": 1.0341683782226164e-06, "loss": 0.3731, "step": 14082 }, { "epoch": 1.7137815637359295, "grad_norm": 2.0905563831329346, "learning_rate": 1.0333046344065277e-06, "loss": 0.3993, "step": 14083 }, { "epoch": 1.7139032552479465, "grad_norm": 3.518739938735962, "learning_rate": 1.0324412317917575e-06, "loss": 0.4176, "step": 14084 }, { "epoch": 1.7140249467599635, "grad_norm": 1.8051915168762207, "learning_rate": 1.0315781704111583e-06, "loss": 0.4026, "step": 14085 }, { "epoch": 1.7141466382719805, "grad_norm": 2.331281900405884, "learning_rate": 1.0307154502975759e-06, "loss": 0.4104, "step": 14086 }, { "epoch": 1.7142683297839976, "grad_norm": 1.7676728963851929, "learning_rate": 1.0298530714838328e-06, "loss": 0.3323, "step": 14087 }, { "epoch": 1.7143900212960146, "grad_norm": 1.867126703262329, "learning_rate": 1.0289910340027508e-06, "loss": 0.3528, "step": 14088 }, { "epoch": 1.7145117128080316, "grad_norm": 1.8186051845550537, "learning_rate": 1.0281293378871283e-06, "loss": 0.3911, "step": 14089 }, { "epoch": 1.7146334043200486, "grad_norm": 2.3229928016662598, "learning_rate": 1.0272679831697553e-06, "loss": 0.3946, "step": 14090 }, { "epoch": 1.7147550958320656, "grad_norm": 2.176506519317627, "learning_rate": 1.0264069698834077e-06, "loss": 0.318, "step": 14091 }, { "epoch": 1.7148767873440827, "grad_norm": 3.387120246887207, "learning_rate": 1.025546298060849e-06, "loss": 0.4279, "step": 14092 }, { "epoch": 1.7149984788560997, "grad_norm": 2.252229690551758, "learning_rate": 1.0246859677348287e-06, "loss": 0.3884, "step": 14093 }, { "epoch": 1.7151201703681167, "grad_norm": 1.9507129192352295, "learning_rate": 1.0238259789380867e-06, "loss": 0.3478, "step": 14094 }, { "epoch": 1.7152418618801337, "grad_norm": 2.2231152057647705, "learning_rate": 1.0229663317033433e-06, "loss": 0.3785, "step": 14095 }, { "epoch": 1.7153635533921507, "grad_norm": 2.567385196685791, "learning_rate": 1.022107026063316e-06, "loss": 0.3482, "step": 14096 }, { "epoch": 1.715485244904168, "grad_norm": 2.702180862426758, "learning_rate": 1.0212480620506982e-06, "loss": 0.3353, "step": 14097 }, { "epoch": 1.715606936416185, "grad_norm": 1.8773272037506104, "learning_rate": 1.020389439698175e-06, "loss": 0.413, "step": 14098 }, { "epoch": 1.715728627928202, "grad_norm": 2.5362932682037354, "learning_rate": 1.0195311590384227e-06, "loss": 0.3734, "step": 14099 }, { "epoch": 1.715850319440219, "grad_norm": 2.7064740657806396, "learning_rate": 1.018673220104097e-06, "loss": 0.2847, "step": 14100 }, { "epoch": 1.715972010952236, "grad_norm": 2.8950181007385254, "learning_rate": 1.0178156229278458e-06, "loss": 0.3263, "step": 14101 }, { "epoch": 1.716093702464253, "grad_norm": 1.702979564666748, "learning_rate": 1.0169583675423044e-06, "loss": 0.3786, "step": 14102 }, { "epoch": 1.7162153939762703, "grad_norm": 2.6131691932678223, "learning_rate": 1.0161014539800885e-06, "loss": 0.416, "step": 14103 }, { "epoch": 1.7163370854882873, "grad_norm": 2.7310736179351807, "learning_rate": 1.0152448822738093e-06, "loss": 0.3944, "step": 14104 }, { "epoch": 1.7164587770003044, "grad_norm": 1.8591914176940918, "learning_rate": 1.0143886524560587e-06, "loss": 0.3547, "step": 14105 }, { "epoch": 1.7165804685123214, "grad_norm": 2.777805805206299, "learning_rate": 1.0135327645594184e-06, "loss": 0.3179, "step": 14106 }, { "epoch": 1.7167021600243384, "grad_norm": 1.5503928661346436, "learning_rate": 1.0126772186164592e-06, "loss": 0.3647, "step": 14107 }, { "epoch": 1.7168238515363554, "grad_norm": 2.6914825439453125, "learning_rate": 1.0118220146597334e-06, "loss": 0.3728, "step": 14108 }, { "epoch": 1.7169455430483724, "grad_norm": 1.7153568267822266, "learning_rate": 1.0109671527217835e-06, "loss": 0.3973, "step": 14109 }, { "epoch": 1.7170672345603895, "grad_norm": 1.873653769493103, "learning_rate": 1.0101126328351418e-06, "loss": 0.3731, "step": 14110 }, { "epoch": 1.7171889260724065, "grad_norm": 1.6782232522964478, "learning_rate": 1.0092584550323225e-06, "loss": 0.4116, "step": 14111 }, { "epoch": 1.7173106175844235, "grad_norm": 1.7122968435287476, "learning_rate": 1.0084046193458263e-06, "loss": 0.3643, "step": 14112 }, { "epoch": 1.7174323090964405, "grad_norm": 1.8513596057891846, "learning_rate": 1.0075511258081505e-06, "loss": 0.3981, "step": 14113 }, { "epoch": 1.7175540006084575, "grad_norm": 3.84171199798584, "learning_rate": 1.0066979744517636e-06, "loss": 0.3184, "step": 14114 }, { "epoch": 1.7176756921204746, "grad_norm": 2.873119354248047, "learning_rate": 1.005845165309135e-06, "loss": 0.3286, "step": 14115 }, { "epoch": 1.7177973836324916, "grad_norm": 1.6686207056045532, "learning_rate": 1.0049926984127156e-06, "loss": 0.3981, "step": 14116 }, { "epoch": 1.7179190751445086, "grad_norm": 1.8420835733413696, "learning_rate": 1.0041405737949406e-06, "loss": 0.3602, "step": 14117 }, { "epoch": 1.7180407666565256, "grad_norm": 3.320587396621704, "learning_rate": 1.0032887914882394e-06, "loss": 0.4768, "step": 14118 }, { "epoch": 1.7181624581685426, "grad_norm": 2.6647303104400635, "learning_rate": 1.0024373515250207e-06, "loss": 0.4366, "step": 14119 }, { "epoch": 1.7182841496805596, "grad_norm": 1.8319995403289795, "learning_rate": 1.0015862539376842e-06, "loss": 0.434, "step": 14120 }, { "epoch": 1.7184058411925767, "grad_norm": 1.3675659894943237, "learning_rate": 1.0007354987586171e-06, "loss": 0.3198, "step": 14121 }, { "epoch": 1.718527532704594, "grad_norm": 2.028301239013672, "learning_rate": 9.998850860201925e-07, "loss": 0.3149, "step": 14122 }, { "epoch": 1.718649224216611, "grad_norm": 1.7637901306152344, "learning_rate": 9.99035015754768e-07, "loss": 0.3436, "step": 14123 }, { "epoch": 1.718770915728628, "grad_norm": 1.949903964996338, "learning_rate": 9.98185287994693e-07, "loss": 0.3579, "step": 14124 }, { "epoch": 1.718892607240645, "grad_norm": 1.688572883605957, "learning_rate": 9.973359027723017e-07, "loss": 0.37, "step": 14125 }, { "epoch": 1.719014298752662, "grad_norm": 1.9323683977127075, "learning_rate": 9.964868601199129e-07, "loss": 0.379, "step": 14126 }, { "epoch": 1.719135990264679, "grad_norm": 2.9864230155944824, "learning_rate": 9.95638160069835e-07, "loss": 0.3602, "step": 14127 }, { "epoch": 1.719257681776696, "grad_norm": 1.9064257144927979, "learning_rate": 9.947898026543622e-07, "loss": 0.3822, "step": 14128 }, { "epoch": 1.7193793732887133, "grad_norm": 2.079899549484253, "learning_rate": 9.93941787905779e-07, "loss": 0.316, "step": 14129 }, { "epoch": 1.7195010648007303, "grad_norm": 2.1837902069091797, "learning_rate": 9.930941158563512e-07, "loss": 0.3962, "step": 14130 }, { "epoch": 1.7196227563127473, "grad_norm": 1.7883656024932861, "learning_rate": 9.922467865383344e-07, "loss": 0.3655, "step": 14131 }, { "epoch": 1.7197444478247643, "grad_norm": 2.2328412532806396, "learning_rate": 9.913997999839742e-07, "loss": 0.3123, "step": 14132 }, { "epoch": 1.7198661393367813, "grad_norm": 1.6640217304229736, "learning_rate": 9.905531562254988e-07, "loss": 0.3419, "step": 14133 }, { "epoch": 1.7199878308487984, "grad_norm": 2.8707523345947266, "learning_rate": 9.897068552951228e-07, "loss": 0.4164, "step": 14134 }, { "epoch": 1.7201095223608154, "grad_norm": 1.5813701152801514, "learning_rate": 9.888608972250523e-07, "loss": 0.3304, "step": 14135 }, { "epoch": 1.7202312138728324, "grad_norm": 1.7172472476959229, "learning_rate": 9.880152820474765e-07, "loss": 0.3657, "step": 14136 }, { "epoch": 1.7203529053848494, "grad_norm": 2.234194278717041, "learning_rate": 9.871700097945737e-07, "loss": 0.3952, "step": 14137 }, { "epoch": 1.7204745968968664, "grad_norm": 1.6735930442810059, "learning_rate": 9.86325080498507e-07, "loss": 0.4146, "step": 14138 }, { "epoch": 1.7205962884088835, "grad_norm": 1.8130608797073364, "learning_rate": 9.854804941914264e-07, "loss": 0.3659, "step": 14139 }, { "epoch": 1.7207179799209005, "grad_norm": 1.8729840517044067, "learning_rate": 9.84636250905474e-07, "loss": 0.329, "step": 14140 }, { "epoch": 1.7208396714329175, "grad_norm": 1.7210155725479126, "learning_rate": 9.83792350672773e-07, "loss": 0.3519, "step": 14141 }, { "epoch": 1.7209613629449345, "grad_norm": 3.2441020011901855, "learning_rate": 9.829487935254345e-07, "loss": 0.4159, "step": 14142 }, { "epoch": 1.7210830544569515, "grad_norm": 1.4905451536178589, "learning_rate": 9.821055794955603e-07, "loss": 0.3581, "step": 14143 }, { "epoch": 1.7212047459689686, "grad_norm": 1.6636959314346313, "learning_rate": 9.812627086152338e-07, "loss": 0.3813, "step": 14144 }, { "epoch": 1.7213264374809856, "grad_norm": 3.2838358879089355, "learning_rate": 9.80420180916528e-07, "loss": 0.443, "step": 14145 }, { "epoch": 1.7214481289930026, "grad_norm": 1.567162275314331, "learning_rate": 9.795779964315056e-07, "loss": 0.4183, "step": 14146 }, { "epoch": 1.7215698205050196, "grad_norm": 1.8367794752120972, "learning_rate": 9.787361551922092e-07, "loss": 0.369, "step": 14147 }, { "epoch": 1.7216915120170369, "grad_norm": 2.2278058528900146, "learning_rate": 9.77894657230679e-07, "loss": 0.3683, "step": 14148 }, { "epoch": 1.7218132035290539, "grad_norm": 2.4154059886932373, "learning_rate": 9.770535025789307e-07, "loss": 0.3844, "step": 14149 }, { "epoch": 1.721934895041071, "grad_norm": 1.7388941049575806, "learning_rate": 9.762126912689707e-07, "loss": 0.3821, "step": 14150 }, { "epoch": 1.722056586553088, "grad_norm": 1.5595877170562744, "learning_rate": 9.753722233327977e-07, "loss": 0.3938, "step": 14151 }, { "epoch": 1.722178278065105, "grad_norm": 1.57517409324646, "learning_rate": 9.745320988023921e-07, "loss": 0.2974, "step": 14152 }, { "epoch": 1.722299969577122, "grad_norm": 1.5863057374954224, "learning_rate": 9.7369231770972e-07, "loss": 0.3452, "step": 14153 }, { "epoch": 1.7224216610891392, "grad_norm": 1.5435655117034912, "learning_rate": 9.728528800867398e-07, "loss": 0.3594, "step": 14154 }, { "epoch": 1.7225433526011562, "grad_norm": 1.5653700828552246, "learning_rate": 9.72013785965391e-07, "loss": 0.3922, "step": 14155 }, { "epoch": 1.7226650441131732, "grad_norm": 1.9242876768112183, "learning_rate": 9.711750353776072e-07, "loss": 0.3363, "step": 14156 }, { "epoch": 1.7227867356251902, "grad_norm": 1.4263627529144287, "learning_rate": 9.703366283553006e-07, "loss": 0.3611, "step": 14157 }, { "epoch": 1.7229084271372073, "grad_norm": 1.5623769760131836, "learning_rate": 9.694985649303745e-07, "loss": 0.3852, "step": 14158 }, { "epoch": 1.7230301186492243, "grad_norm": 1.5293095111846924, "learning_rate": 9.686608451347245e-07, "loss": 0.3582, "step": 14159 }, { "epoch": 1.7231518101612413, "grad_norm": 2.105797052383423, "learning_rate": 9.678234690002208e-07, "loss": 0.3962, "step": 14160 }, { "epoch": 1.7232735016732583, "grad_norm": 2.047987937927246, "learning_rate": 9.669864365587279e-07, "loss": 0.3334, "step": 14161 }, { "epoch": 1.7233951931852753, "grad_norm": 1.7671821117401123, "learning_rate": 9.661497478421001e-07, "loss": 0.3821, "step": 14162 }, { "epoch": 1.7235168846972924, "grad_norm": 1.756811499595642, "learning_rate": 9.653134028821709e-07, "loss": 0.3765, "step": 14163 }, { "epoch": 1.7236385762093094, "grad_norm": 2.402477741241455, "learning_rate": 9.644774017107705e-07, "loss": 0.4495, "step": 14164 }, { "epoch": 1.7237602677213264, "grad_norm": 1.4955434799194336, "learning_rate": 9.636417443597067e-07, "loss": 0.3573, "step": 14165 }, { "epoch": 1.7238819592333434, "grad_norm": 1.8756307363510132, "learning_rate": 9.628064308607777e-07, "loss": 0.3663, "step": 14166 }, { "epoch": 1.7240036507453604, "grad_norm": 1.492052674293518, "learning_rate": 9.619714612457708e-07, "loss": 0.3174, "step": 14167 }, { "epoch": 1.7241253422573775, "grad_norm": 1.9347213506698608, "learning_rate": 9.61136835546459e-07, "loss": 0.3399, "step": 14168 }, { "epoch": 1.7242470337693945, "grad_norm": 1.6814157962799072, "learning_rate": 9.603025537945965e-07, "loss": 0.3748, "step": 14169 }, { "epoch": 1.7243687252814115, "grad_norm": 1.3750447034835815, "learning_rate": 9.594686160219358e-07, "loss": 0.3215, "step": 14170 }, { "epoch": 1.7244904167934285, "grad_norm": 1.697706699371338, "learning_rate": 9.58635022260207e-07, "loss": 0.4036, "step": 14171 }, { "epoch": 1.7246121083054455, "grad_norm": 2.6654069423675537, "learning_rate": 9.578017725411304e-07, "loss": 0.3375, "step": 14172 }, { "epoch": 1.7247337998174628, "grad_norm": 1.6381762027740479, "learning_rate": 9.569688668964116e-07, "loss": 0.4025, "step": 14173 }, { "epoch": 1.7248554913294798, "grad_norm": 1.474560022354126, "learning_rate": 9.561363053577443e-07, "loss": 0.3708, "step": 14174 }, { "epoch": 1.7249771828414968, "grad_norm": 3.0399885177612305, "learning_rate": 9.553040879568121e-07, "loss": 0.4065, "step": 14175 }, { "epoch": 1.7250988743535138, "grad_norm": 1.9203499555587769, "learning_rate": 9.544722147252816e-07, "loss": 0.3009, "step": 14176 }, { "epoch": 1.7252205658655309, "grad_norm": 1.8001118898391724, "learning_rate": 9.536406856948043e-07, "loss": 0.3371, "step": 14177 }, { "epoch": 1.7253422573775479, "grad_norm": 2.507826328277588, "learning_rate": 9.528095008970251e-07, "loss": 0.4456, "step": 14178 }, { "epoch": 1.7254639488895651, "grad_norm": 2.496150255203247, "learning_rate": 9.519786603635717e-07, "loss": 0.3957, "step": 14179 }, { "epoch": 1.7255856404015821, "grad_norm": 2.605888605117798, "learning_rate": 9.511481641260567e-07, "loss": 0.4224, "step": 14180 }, { "epoch": 1.7257073319135992, "grad_norm": 3.6426854133605957, "learning_rate": 9.50318012216086e-07, "loss": 0.4519, "step": 14181 }, { "epoch": 1.7258290234256162, "grad_norm": 1.860112190246582, "learning_rate": 9.494882046652465e-07, "loss": 0.3922, "step": 14182 }, { "epoch": 1.7259507149376332, "grad_norm": 1.4793708324432373, "learning_rate": 9.486587415051141e-07, "loss": 0.3461, "step": 14183 }, { "epoch": 1.7260724064496502, "grad_norm": 1.375121831893921, "learning_rate": 9.478296227672523e-07, "loss": 0.3433, "step": 14184 }, { "epoch": 1.7261940979616672, "grad_norm": 1.5559217929840088, "learning_rate": 9.470008484832083e-07, "loss": 0.38, "step": 14185 }, { "epoch": 1.7263157894736842, "grad_norm": 1.5865850448608398, "learning_rate": 9.461724186845222e-07, "loss": 0.3344, "step": 14186 }, { "epoch": 1.7264374809857013, "grad_norm": 1.8463729619979858, "learning_rate": 9.453443334027168e-07, "loss": 0.3334, "step": 14187 }, { "epoch": 1.7265591724977183, "grad_norm": 1.7615233659744263, "learning_rate": 9.445165926692989e-07, "loss": 0.3891, "step": 14188 }, { "epoch": 1.7266808640097353, "grad_norm": 1.7121474742889404, "learning_rate": 9.436891965157713e-07, "loss": 0.4313, "step": 14189 }, { "epoch": 1.7268025555217523, "grad_norm": 1.343726634979248, "learning_rate": 9.428621449736142e-07, "loss": 0.37, "step": 14190 }, { "epoch": 1.7269242470337693, "grad_norm": 1.5749168395996094, "learning_rate": 9.420354380742991e-07, "loss": 0.3003, "step": 14191 }, { "epoch": 1.7270459385457864, "grad_norm": 2.3141303062438965, "learning_rate": 9.412090758492853e-07, "loss": 0.4167, "step": 14192 }, { "epoch": 1.7271676300578034, "grad_norm": 1.5556703805923462, "learning_rate": 9.403830583300166e-07, "loss": 0.3328, "step": 14193 }, { "epoch": 1.7272893215698204, "grad_norm": 2.4870657920837402, "learning_rate": 9.395573855479257e-07, "loss": 0.3366, "step": 14194 }, { "epoch": 1.7274110130818374, "grad_norm": 2.103954553604126, "learning_rate": 9.387320575344305e-07, "loss": 0.3761, "step": 14195 }, { "epoch": 1.7275327045938544, "grad_norm": 1.3927984237670898, "learning_rate": 9.37907074320934e-07, "loss": 0.3617, "step": 14196 }, { "epoch": 1.7276543961058715, "grad_norm": 1.64122474193573, "learning_rate": 9.370824359388331e-07, "loss": 0.3938, "step": 14197 }, { "epoch": 1.7277760876178887, "grad_norm": 1.7214555740356445, "learning_rate": 9.36258142419505e-07, "loss": 0.3528, "step": 14198 }, { "epoch": 1.7278977791299057, "grad_norm": 1.8959749937057495, "learning_rate": 9.354341937943124e-07, "loss": 0.4055, "step": 14199 }, { "epoch": 1.7280194706419227, "grad_norm": 1.9538798332214355, "learning_rate": 9.346105900946145e-07, "loss": 0.3981, "step": 14200 }, { "epoch": 1.7281411621539398, "grad_norm": 3.2480194568634033, "learning_rate": 9.337873313517465e-07, "loss": 0.4434, "step": 14201 }, { "epoch": 1.7282628536659568, "grad_norm": 1.9845527410507202, "learning_rate": 9.329644175970365e-07, "loss": 0.4204, "step": 14202 }, { "epoch": 1.7283845451779738, "grad_norm": 1.5241584777832031, "learning_rate": 9.321418488617983e-07, "loss": 0.3893, "step": 14203 }, { "epoch": 1.728506236689991, "grad_norm": 2.9985358715057373, "learning_rate": 9.313196251773338e-07, "loss": 0.4007, "step": 14204 }, { "epoch": 1.728627928202008, "grad_norm": 1.9829671382904053, "learning_rate": 9.304977465749276e-07, "loss": 0.4106, "step": 14205 }, { "epoch": 1.728749619714025, "grad_norm": 2.017364740371704, "learning_rate": 9.29676213085855e-07, "loss": 0.3168, "step": 14206 }, { "epoch": 1.728871311226042, "grad_norm": 2.7571353912353516, "learning_rate": 9.288550247413752e-07, "loss": 0.3981, "step": 14207 }, { "epoch": 1.7289930027380591, "grad_norm": 2.408407688140869, "learning_rate": 9.2803418157274e-07, "loss": 0.3056, "step": 14208 }, { "epoch": 1.7291146942500761, "grad_norm": 1.5391234159469604, "learning_rate": 9.272136836111822e-07, "loss": 0.3796, "step": 14209 }, { "epoch": 1.7292363857620932, "grad_norm": 1.7516957521438599, "learning_rate": 9.263935308879224e-07, "loss": 0.3899, "step": 14210 }, { "epoch": 1.7293580772741102, "grad_norm": 1.509750485420227, "learning_rate": 9.25573723434171e-07, "loss": 0.3808, "step": 14211 }, { "epoch": 1.7294797687861272, "grad_norm": 1.683774471282959, "learning_rate": 9.247542612811222e-07, "loss": 0.3905, "step": 14212 }, { "epoch": 1.7296014602981442, "grad_norm": 2.21616792678833, "learning_rate": 9.239351444599564e-07, "loss": 0.4452, "step": 14213 }, { "epoch": 1.7297231518101612, "grad_norm": 1.5090255737304688, "learning_rate": 9.231163730018478e-07, "loss": 0.292, "step": 14214 }, { "epoch": 1.7298448433221782, "grad_norm": 1.7390064001083374, "learning_rate": 9.222979469379467e-07, "loss": 0.3731, "step": 14215 }, { "epoch": 1.7299665348341953, "grad_norm": 1.449793815612793, "learning_rate": 9.214798662994006e-07, "loss": 0.3619, "step": 14216 }, { "epoch": 1.7300882263462123, "grad_norm": 2.0085577964782715, "learning_rate": 9.206621311173391e-07, "loss": 0.3728, "step": 14217 }, { "epoch": 1.7302099178582293, "grad_norm": 3.629009246826172, "learning_rate": 9.198447414228728e-07, "loss": 0.4148, "step": 14218 }, { "epoch": 1.7303316093702463, "grad_norm": 2.0576725006103516, "learning_rate": 9.190276972471101e-07, "loss": 0.4253, "step": 14219 }, { "epoch": 1.7304533008822633, "grad_norm": 2.8660964965820312, "learning_rate": 9.182109986211407e-07, "loss": 0.3434, "step": 14220 }, { "epoch": 1.7305749923942804, "grad_norm": 1.589560866355896, "learning_rate": 9.173946455760396e-07, "loss": 0.3497, "step": 14221 }, { "epoch": 1.7306966839062974, "grad_norm": 1.5878167152404785, "learning_rate": 9.165786381428732e-07, "loss": 0.396, "step": 14222 }, { "epoch": 1.7308183754183146, "grad_norm": 4.360469341278076, "learning_rate": 9.15762976352691e-07, "loss": 0.3111, "step": 14223 }, { "epoch": 1.7309400669303316, "grad_norm": 1.77071213722229, "learning_rate": 9.149476602365315e-07, "loss": 0.386, "step": 14224 }, { "epoch": 1.7310617584423487, "grad_norm": 2.8939599990844727, "learning_rate": 9.141326898254188e-07, "loss": 0.3852, "step": 14225 }, { "epoch": 1.7311834499543657, "grad_norm": 1.6369757652282715, "learning_rate": 9.133180651503615e-07, "loss": 0.4005, "step": 14226 }, { "epoch": 1.7313051414663827, "grad_norm": 3.2018487453460693, "learning_rate": 9.125037862423625e-07, "loss": 0.3807, "step": 14227 }, { "epoch": 1.7314268329783997, "grad_norm": 2.0804715156555176, "learning_rate": 9.11689853132407e-07, "loss": 0.3278, "step": 14228 }, { "epoch": 1.7315485244904167, "grad_norm": 1.760297417640686, "learning_rate": 9.108762658514603e-07, "loss": 0.3474, "step": 14229 }, { "epoch": 1.731670216002434, "grad_norm": 2.205538034439087, "learning_rate": 9.100630244304865e-07, "loss": 0.3857, "step": 14230 }, { "epoch": 1.731791907514451, "grad_norm": 1.6773427724838257, "learning_rate": 9.092501289004285e-07, "loss": 0.381, "step": 14231 }, { "epoch": 1.731913599026468, "grad_norm": 2.9241650104522705, "learning_rate": 9.084375792922217e-07, "loss": 0.3144, "step": 14232 }, { "epoch": 1.732035290538485, "grad_norm": 1.6965457201004028, "learning_rate": 9.076253756367836e-07, "loss": 0.3461, "step": 14233 }, { "epoch": 1.732156982050502, "grad_norm": 1.8271918296813965, "learning_rate": 9.068135179650173e-07, "loss": 0.3891, "step": 14234 }, { "epoch": 1.732278673562519, "grad_norm": 1.8727154731750488, "learning_rate": 9.060020063078212e-07, "loss": 0.3893, "step": 14235 }, { "epoch": 1.732400365074536, "grad_norm": 3.413839817047119, "learning_rate": 9.051908406960719e-07, "loss": 0.3922, "step": 14236 }, { "epoch": 1.7325220565865531, "grad_norm": 1.5881644487380981, "learning_rate": 9.043800211606357e-07, "loss": 0.3659, "step": 14237 }, { "epoch": 1.7326437480985701, "grad_norm": 1.6653475761413574, "learning_rate": 9.035695477323669e-07, "loss": 0.3386, "step": 14238 }, { "epoch": 1.7327654396105872, "grad_norm": 1.7836490869522095, "learning_rate": 9.027594204421064e-07, "loss": 0.331, "step": 14239 }, { "epoch": 1.7328871311226042, "grad_norm": 2.1213221549987793, "learning_rate": 9.019496393206794e-07, "loss": 0.3592, "step": 14240 }, { "epoch": 1.7330088226346212, "grad_norm": 2.1884124279022217, "learning_rate": 9.011402043989015e-07, "loss": 0.3906, "step": 14241 }, { "epoch": 1.7331305141466382, "grad_norm": 2.9316279888153076, "learning_rate": 9.0033111570757e-07, "loss": 0.345, "step": 14242 }, { "epoch": 1.7332522056586552, "grad_norm": 2.2177157402038574, "learning_rate": 8.99522373277476e-07, "loss": 0.3075, "step": 14243 }, { "epoch": 1.7333738971706723, "grad_norm": 1.7550849914550781, "learning_rate": 8.987139771393938e-07, "loss": 0.4, "step": 14244 }, { "epoch": 1.7334955886826893, "grad_norm": 3.208246946334839, "learning_rate": 8.979059273240809e-07, "loss": 0.4879, "step": 14245 }, { "epoch": 1.7336172801947063, "grad_norm": 2.1785266399383545, "learning_rate": 8.970982238622905e-07, "loss": 0.4097, "step": 14246 }, { "epoch": 1.7337389717067233, "grad_norm": 2.0655219554901123, "learning_rate": 8.962908667847536e-07, "loss": 0.3392, "step": 14247 }, { "epoch": 1.7338606632187403, "grad_norm": 1.8200823068618774, "learning_rate": 8.954838561221912e-07, "loss": 0.3534, "step": 14248 }, { "epoch": 1.7339823547307576, "grad_norm": 1.938529133796692, "learning_rate": 8.946771919053154e-07, "loss": 0.3548, "step": 14249 }, { "epoch": 1.7341040462427746, "grad_norm": 2.110961437225342, "learning_rate": 8.938708741648194e-07, "loss": 0.407, "step": 14250 }, { "epoch": 1.7342257377547916, "grad_norm": 2.3713085651397705, "learning_rate": 8.930649029313854e-07, "loss": 0.3517, "step": 14251 }, { "epoch": 1.7343474292668086, "grad_norm": 2.8605287075042725, "learning_rate": 8.92259278235682e-07, "loss": 0.3971, "step": 14252 }, { "epoch": 1.7344691207788256, "grad_norm": 3.02201247215271, "learning_rate": 8.914540001083638e-07, "loss": 0.4127, "step": 14253 }, { "epoch": 1.7345908122908427, "grad_norm": 1.4600318670272827, "learning_rate": 8.906490685800761e-07, "loss": 0.3628, "step": 14254 }, { "epoch": 1.73471250380286, "grad_norm": 2.366039752960205, "learning_rate": 8.898444836814457e-07, "loss": 0.4365, "step": 14255 }, { "epoch": 1.734834195314877, "grad_norm": 2.0894320011138916, "learning_rate": 8.89040245443089e-07, "loss": 0.3759, "step": 14256 }, { "epoch": 1.734955886826894, "grad_norm": 1.6230913400650024, "learning_rate": 8.882363538956107e-07, "loss": 0.3321, "step": 14257 }, { "epoch": 1.735077578338911, "grad_norm": 1.7111051082611084, "learning_rate": 8.874328090696005e-07, "loss": 0.3605, "step": 14258 }, { "epoch": 1.735199269850928, "grad_norm": 1.5047584772109985, "learning_rate": 8.866296109956308e-07, "loss": 0.3789, "step": 14259 }, { "epoch": 1.735320961362945, "grad_norm": 2.572357177734375, "learning_rate": 8.858267597042713e-07, "loss": 0.3548, "step": 14260 }, { "epoch": 1.735442652874962, "grad_norm": 1.667695164680481, "learning_rate": 8.850242552260679e-07, "loss": 0.3724, "step": 14261 }, { "epoch": 1.735564344386979, "grad_norm": 1.6652452945709229, "learning_rate": 8.842220975915561e-07, "loss": 0.3255, "step": 14262 }, { "epoch": 1.735686035898996, "grad_norm": 1.9200270175933838, "learning_rate": 8.834202868312669e-07, "loss": 0.3256, "step": 14263 }, { "epoch": 1.735807727411013, "grad_norm": 1.69968843460083, "learning_rate": 8.826188229757027e-07, "loss": 0.3669, "step": 14264 }, { "epoch": 1.73592941892303, "grad_norm": 1.806928277015686, "learning_rate": 8.818177060553645e-07, "loss": 0.3806, "step": 14265 }, { "epoch": 1.7360511104350471, "grad_norm": 1.348667025566101, "learning_rate": 8.81016936100737e-07, "loss": 0.3306, "step": 14266 }, { "epoch": 1.7361728019470641, "grad_norm": 2.4229934215545654, "learning_rate": 8.802165131422891e-07, "loss": 0.4349, "step": 14267 }, { "epoch": 1.7362944934590812, "grad_norm": 1.4708613157272339, "learning_rate": 8.794164372104807e-07, "loss": 0.3745, "step": 14268 }, { "epoch": 1.7364161849710982, "grad_norm": 1.7326991558074951, "learning_rate": 8.786167083357566e-07, "loss": 0.3747, "step": 14269 }, { "epoch": 1.7365378764831152, "grad_norm": 1.5292972326278687, "learning_rate": 8.778173265485446e-07, "loss": 0.3911, "step": 14270 }, { "epoch": 1.7366595679951322, "grad_norm": 1.3409945964813232, "learning_rate": 8.770182918792669e-07, "loss": 0.3427, "step": 14271 }, { "epoch": 1.7367812595071492, "grad_norm": 1.6310046911239624, "learning_rate": 8.76219604358326e-07, "loss": 0.356, "step": 14272 }, { "epoch": 1.7369029510191663, "grad_norm": 1.6166120767593384, "learning_rate": 8.754212640161131e-07, "loss": 0.3769, "step": 14273 }, { "epoch": 1.7370246425311835, "grad_norm": 2.4560840129852295, "learning_rate": 8.746232708830116e-07, "loss": 0.3959, "step": 14274 }, { "epoch": 1.7371463340432005, "grad_norm": 1.5992571115493774, "learning_rate": 8.738256249893795e-07, "loss": 0.3468, "step": 14275 }, { "epoch": 1.7372680255552175, "grad_norm": 2.3675191402435303, "learning_rate": 8.730283263655748e-07, "loss": 0.3028, "step": 14276 }, { "epoch": 1.7373897170672346, "grad_norm": 2.043269634246826, "learning_rate": 8.722313750419343e-07, "loss": 0.331, "step": 14277 }, { "epoch": 1.7375114085792516, "grad_norm": 1.6750531196594238, "learning_rate": 8.714347710487803e-07, "loss": 0.3809, "step": 14278 }, { "epoch": 1.7376331000912686, "grad_norm": 1.8722851276397705, "learning_rate": 8.706385144164309e-07, "loss": 0.3916, "step": 14279 }, { "epoch": 1.7377547916032858, "grad_norm": 2.0383670330047607, "learning_rate": 8.698426051751829e-07, "loss": 0.378, "step": 14280 }, { "epoch": 1.7378764831153029, "grad_norm": 2.141855239868164, "learning_rate": 8.6904704335532e-07, "loss": 0.3251, "step": 14281 }, { "epoch": 1.7379981746273199, "grad_norm": 2.53654408454895, "learning_rate": 8.682518289871189e-07, "loss": 0.2825, "step": 14282 }, { "epoch": 1.7381198661393369, "grad_norm": 2.078965902328491, "learning_rate": 8.674569621008366e-07, "loss": 0.4302, "step": 14283 }, { "epoch": 1.738241557651354, "grad_norm": 2.0348286628723145, "learning_rate": 8.666624427267212e-07, "loss": 0.3378, "step": 14284 }, { "epoch": 1.738363249163371, "grad_norm": 2.64128041267395, "learning_rate": 8.65868270895004e-07, "loss": 0.3456, "step": 14285 }, { "epoch": 1.738484940675388, "grad_norm": 1.6047821044921875, "learning_rate": 8.650744466359074e-07, "loss": 0.3479, "step": 14286 }, { "epoch": 1.738606632187405, "grad_norm": 1.5302748680114746, "learning_rate": 8.64280969979635e-07, "loss": 0.3611, "step": 14287 }, { "epoch": 1.738728323699422, "grad_norm": 2.3115594387054443, "learning_rate": 8.634878409563818e-07, "loss": 0.3515, "step": 14288 }, { "epoch": 1.738850015211439, "grad_norm": 1.7235603332519531, "learning_rate": 8.626950595963269e-07, "loss": 0.3535, "step": 14289 }, { "epoch": 1.738971706723456, "grad_norm": 1.5920923948287964, "learning_rate": 8.619026259296393e-07, "loss": 0.3659, "step": 14290 }, { "epoch": 1.739093398235473, "grad_norm": 1.8308765888214111, "learning_rate": 8.611105399864694e-07, "loss": 0.374, "step": 14291 }, { "epoch": 1.73921508974749, "grad_norm": 3.2287888526916504, "learning_rate": 8.603188017969621e-07, "loss": 0.3936, "step": 14292 }, { "epoch": 1.739336781259507, "grad_norm": 3.4937808513641357, "learning_rate": 8.595274113912432e-07, "loss": 0.4142, "step": 14293 }, { "epoch": 1.739458472771524, "grad_norm": 2.5637383460998535, "learning_rate": 8.587363687994233e-07, "loss": 0.4221, "step": 14294 }, { "epoch": 1.7395801642835411, "grad_norm": 1.9632245302200317, "learning_rate": 8.579456740516079e-07, "loss": 0.3726, "step": 14295 }, { "epoch": 1.7397018557955581, "grad_norm": 1.7519874572753906, "learning_rate": 8.571553271778832e-07, "loss": 0.3518, "step": 14296 }, { "epoch": 1.7398235473075752, "grad_norm": 1.641635537147522, "learning_rate": 8.563653282083206e-07, "loss": 0.3414, "step": 14297 }, { "epoch": 1.7399452388195922, "grad_norm": 3.0177409648895264, "learning_rate": 8.555756771729873e-07, "loss": 0.4441, "step": 14298 }, { "epoch": 1.7400669303316094, "grad_norm": 2.0765628814697266, "learning_rate": 8.547863741019235e-07, "loss": 0.3443, "step": 14299 }, { "epoch": 1.7401886218436264, "grad_norm": 1.8263086080551147, "learning_rate": 8.539974190251699e-07, "loss": 0.3345, "step": 14300 }, { "epoch": 1.7403103133556435, "grad_norm": 1.9698293209075928, "learning_rate": 8.532088119727455e-07, "loss": 0.3348, "step": 14301 }, { "epoch": 1.7404320048676605, "grad_norm": 1.9641646146774292, "learning_rate": 8.524205529746576e-07, "loss": 0.3888, "step": 14302 }, { "epoch": 1.7405536963796775, "grad_norm": 3.123169422149658, "learning_rate": 8.516326420609033e-07, "loss": 0.294, "step": 14303 }, { "epoch": 1.7406753878916945, "grad_norm": 2.204244613647461, "learning_rate": 8.508450792614631e-07, "loss": 0.3043, "step": 14304 }, { "epoch": 1.7407970794037118, "grad_norm": 1.8235293626785278, "learning_rate": 8.500578646063029e-07, "loss": 0.3215, "step": 14305 }, { "epoch": 1.7409187709157288, "grad_norm": 3.7659685611724854, "learning_rate": 8.492709981253822e-07, "loss": 0.4288, "step": 14306 }, { "epoch": 1.7410404624277458, "grad_norm": 1.9809975624084473, "learning_rate": 8.484844798486414e-07, "loss": 0.3776, "step": 14307 }, { "epoch": 1.7411621539397628, "grad_norm": 1.818855881690979, "learning_rate": 8.476983098060076e-07, "loss": 0.3598, "step": 14308 }, { "epoch": 1.7412838454517798, "grad_norm": 1.9004418849945068, "learning_rate": 8.469124880274004e-07, "loss": 0.3407, "step": 14309 }, { "epoch": 1.7414055369637969, "grad_norm": 1.6751792430877686, "learning_rate": 8.461270145427147e-07, "loss": 0.395, "step": 14310 }, { "epoch": 1.7415272284758139, "grad_norm": 1.9813085794448853, "learning_rate": 8.453418893818455e-07, "loss": 0.4133, "step": 14311 }, { "epoch": 1.741648919987831, "grad_norm": 1.6053643226623535, "learning_rate": 8.445571125746677e-07, "loss": 0.3515, "step": 14312 }, { "epoch": 1.741770611499848, "grad_norm": 1.8470227718353271, "learning_rate": 8.437726841510396e-07, "loss": 0.3951, "step": 14313 }, { "epoch": 1.741892303011865, "grad_norm": 2.454641580581665, "learning_rate": 8.429886041408153e-07, "loss": 0.3849, "step": 14314 }, { "epoch": 1.742013994523882, "grad_norm": 1.8138149976730347, "learning_rate": 8.422048725738286e-07, "loss": 0.353, "step": 14315 }, { "epoch": 1.742135686035899, "grad_norm": 1.6105372905731201, "learning_rate": 8.414214894799e-07, "loss": 0.3365, "step": 14316 }, { "epoch": 1.742257377547916, "grad_norm": 1.6992546319961548, "learning_rate": 8.406384548888425e-07, "loss": 0.3342, "step": 14317 }, { "epoch": 1.742379069059933, "grad_norm": 2.5218346118927, "learning_rate": 8.39855768830452e-07, "loss": 0.375, "step": 14318 }, { "epoch": 1.74250076057195, "grad_norm": 1.8298931121826172, "learning_rate": 8.390734313345073e-07, "loss": 0.3968, "step": 14319 }, { "epoch": 1.742622452083967, "grad_norm": 4.075117588043213, "learning_rate": 8.38291442430782e-07, "loss": 0.3403, "step": 14320 }, { "epoch": 1.742744143595984, "grad_norm": 1.9006128311157227, "learning_rate": 8.375098021490313e-07, "loss": 0.3717, "step": 14321 }, { "epoch": 1.742865835108001, "grad_norm": 1.9493316411972046, "learning_rate": 8.367285105189982e-07, "loss": 0.3399, "step": 14322 }, { "epoch": 1.742987526620018, "grad_norm": 1.9841796159744263, "learning_rate": 8.359475675704121e-07, "loss": 0.309, "step": 14323 }, { "epoch": 1.7431092181320353, "grad_norm": 2.4708316326141357, "learning_rate": 8.35166973332987e-07, "loss": 0.3522, "step": 14324 }, { "epoch": 1.7432309096440524, "grad_norm": 1.964381456375122, "learning_rate": 8.343867278364315e-07, "loss": 0.4194, "step": 14325 }, { "epoch": 1.7433526011560694, "grad_norm": 1.8712615966796875, "learning_rate": 8.336068311104328e-07, "loss": 0.421, "step": 14326 }, { "epoch": 1.7434742926680864, "grad_norm": 1.8085203170776367, "learning_rate": 8.328272831846651e-07, "loss": 0.3532, "step": 14327 }, { "epoch": 1.7435959841801034, "grad_norm": 2.2099030017852783, "learning_rate": 8.320480840887968e-07, "loss": 0.4259, "step": 14328 }, { "epoch": 1.7437176756921204, "grad_norm": 2.0019686222076416, "learning_rate": 8.312692338524752e-07, "loss": 0.3269, "step": 14329 }, { "epoch": 1.7438393672041375, "grad_norm": 1.9506771564483643, "learning_rate": 8.304907325053357e-07, "loss": 0.3551, "step": 14330 }, { "epoch": 1.7439610587161547, "grad_norm": 2.5915844440460205, "learning_rate": 8.297125800770056e-07, "loss": 0.4505, "step": 14331 }, { "epoch": 1.7440827502281717, "grad_norm": 1.4320746660232544, "learning_rate": 8.289347765970934e-07, "loss": 0.3492, "step": 14332 }, { "epoch": 1.7442044417401887, "grad_norm": 1.7862533330917358, "learning_rate": 8.281573220951955e-07, "loss": 0.3566, "step": 14333 }, { "epoch": 1.7443261332522058, "grad_norm": 2.7786672115325928, "learning_rate": 8.27380216600896e-07, "loss": 0.3574, "step": 14334 }, { "epoch": 1.7444478247642228, "grad_norm": 2.1462345123291016, "learning_rate": 8.266034601437645e-07, "loss": 0.3105, "step": 14335 }, { "epoch": 1.7445695162762398, "grad_norm": 2.914695978164673, "learning_rate": 8.258270527533607e-07, "loss": 0.325, "step": 14336 }, { "epoch": 1.7446912077882568, "grad_norm": 1.4044674634933472, "learning_rate": 8.250509944592267e-07, "loss": 0.3654, "step": 14337 }, { "epoch": 1.7448128993002738, "grad_norm": 2.0682713985443115, "learning_rate": 8.24275285290892e-07, "loss": 0.3975, "step": 14338 }, { "epoch": 1.7449345908122909, "grad_norm": 1.9333986043930054, "learning_rate": 8.234999252778775e-07, "loss": 0.3841, "step": 14339 }, { "epoch": 1.7450562823243079, "grad_norm": 2.2306602001190186, "learning_rate": 8.227249144496852e-07, "loss": 0.4137, "step": 14340 }, { "epoch": 1.745177973836325, "grad_norm": 1.8189393281936646, "learning_rate": 8.219502528358036e-07, "loss": 0.3604, "step": 14341 }, { "epoch": 1.745299665348342, "grad_norm": 1.6609610319137573, "learning_rate": 8.211759404657138e-07, "loss": 0.3843, "step": 14342 }, { "epoch": 1.745421356860359, "grad_norm": 1.6170681715011597, "learning_rate": 8.204019773688776e-07, "loss": 0.3643, "step": 14343 }, { "epoch": 1.745543048372376, "grad_norm": 1.571121096611023, "learning_rate": 8.196283635747504e-07, "loss": 0.3501, "step": 14344 }, { "epoch": 1.745664739884393, "grad_norm": 2.148019790649414, "learning_rate": 8.188550991127642e-07, "loss": 0.2856, "step": 14345 }, { "epoch": 1.74578643139641, "grad_norm": 2.059107542037964, "learning_rate": 8.180821840123432e-07, "loss": 0.3551, "step": 14346 }, { "epoch": 1.745908122908427, "grad_norm": 2.4106245040893555, "learning_rate": 8.173096183029027e-07, "loss": 0.2867, "step": 14347 }, { "epoch": 1.746029814420444, "grad_norm": 1.6454966068267822, "learning_rate": 8.165374020138384e-07, "loss": 0.3789, "step": 14348 }, { "epoch": 1.746151505932461, "grad_norm": 2.545868158340454, "learning_rate": 8.15765535174533e-07, "loss": 0.4312, "step": 14349 }, { "epoch": 1.7462731974444783, "grad_norm": 8.065531730651855, "learning_rate": 8.149940178143611e-07, "loss": 0.3585, "step": 14350 }, { "epoch": 1.7463948889564953, "grad_norm": 1.8935617208480835, "learning_rate": 8.142228499626769e-07, "loss": 0.3774, "step": 14351 }, { "epoch": 1.7465165804685123, "grad_norm": 1.7391681671142578, "learning_rate": 8.134520316488292e-07, "loss": 0.3394, "step": 14352 }, { "epoch": 1.7466382719805293, "grad_norm": 2.0660479068756104, "learning_rate": 8.126815629021456e-07, "loss": 0.457, "step": 14353 }, { "epoch": 1.7467599634925464, "grad_norm": 1.928277850151062, "learning_rate": 8.119114437519448e-07, "loss": 0.3631, "step": 14354 }, { "epoch": 1.7468816550045634, "grad_norm": 2.140286922454834, "learning_rate": 8.111416742275347e-07, "loss": 0.3132, "step": 14355 }, { "epoch": 1.7470033465165806, "grad_norm": 3.118319511413574, "learning_rate": 8.103722543582016e-07, "loss": 0.3733, "step": 14356 }, { "epoch": 1.7471250380285976, "grad_norm": 2.1216111183166504, "learning_rate": 8.096031841732243e-07, "loss": 0.3299, "step": 14357 }, { "epoch": 1.7472467295406147, "grad_norm": 2.7144100666046143, "learning_rate": 8.088344637018708e-07, "loss": 0.391, "step": 14358 }, { "epoch": 1.7473684210526317, "grad_norm": 1.6950066089630127, "learning_rate": 8.080660929733885e-07, "loss": 0.2955, "step": 14359 }, { "epoch": 1.7474901125646487, "grad_norm": 1.693657636642456, "learning_rate": 8.072980720170198e-07, "loss": 0.3566, "step": 14360 }, { "epoch": 1.7476118040766657, "grad_norm": 2.463639497756958, "learning_rate": 8.065304008619878e-07, "loss": 0.3585, "step": 14361 }, { "epoch": 1.7477334955886827, "grad_norm": 1.5246741771697998, "learning_rate": 8.057630795375004e-07, "loss": 0.3634, "step": 14362 }, { "epoch": 1.7478551871006998, "grad_norm": 2.191861152648926, "learning_rate": 8.049961080727619e-07, "loss": 0.3907, "step": 14363 }, { "epoch": 1.7479768786127168, "grad_norm": 1.908806324005127, "learning_rate": 8.042294864969536e-07, "loss": 0.3199, "step": 14364 }, { "epoch": 1.7480985701247338, "grad_norm": 1.8936090469360352, "learning_rate": 8.034632148392452e-07, "loss": 0.3729, "step": 14365 }, { "epoch": 1.7482202616367508, "grad_norm": 1.4756361246109009, "learning_rate": 8.026972931288001e-07, "loss": 0.3561, "step": 14366 }, { "epoch": 1.7483419531487678, "grad_norm": 1.8983566761016846, "learning_rate": 8.019317213947597e-07, "loss": 0.4401, "step": 14367 }, { "epoch": 1.7484636446607849, "grad_norm": 3.4970991611480713, "learning_rate": 8.011664996662561e-07, "loss": 0.3247, "step": 14368 }, { "epoch": 1.7485853361728019, "grad_norm": 3.1445930004119873, "learning_rate": 8.004016279724081e-07, "loss": 0.4137, "step": 14369 }, { "epoch": 1.748707027684819, "grad_norm": 2.1475565433502197, "learning_rate": 7.996371063423181e-07, "loss": 0.3976, "step": 14370 }, { "epoch": 1.748828719196836, "grad_norm": 2.2538836002349854, "learning_rate": 7.988729348050817e-07, "loss": 0.3747, "step": 14371 }, { "epoch": 1.748950410708853, "grad_norm": 1.8164736032485962, "learning_rate": 7.981091133897756e-07, "loss": 0.3836, "step": 14372 }, { "epoch": 1.74907210222087, "grad_norm": 1.7587019205093384, "learning_rate": 7.973456421254633e-07, "loss": 0.3501, "step": 14373 }, { "epoch": 1.749193793732887, "grad_norm": 1.6233000755310059, "learning_rate": 7.965825210411993e-07, "loss": 0.3591, "step": 14374 }, { "epoch": 1.7493154852449042, "grad_norm": 1.580428123474121, "learning_rate": 7.958197501660203e-07, "loss": 0.3298, "step": 14375 }, { "epoch": 1.7494371767569212, "grad_norm": 1.9146944284439087, "learning_rate": 7.950573295289499e-07, "loss": 0.4258, "step": 14376 }, { "epoch": 1.7495588682689382, "grad_norm": 2.0037081241607666, "learning_rate": 7.942952591590025e-07, "loss": 0.3407, "step": 14377 }, { "epoch": 1.7496805597809553, "grad_norm": 1.64399254322052, "learning_rate": 7.935335390851751e-07, "loss": 0.3668, "step": 14378 }, { "epoch": 1.7498022512929723, "grad_norm": 1.9744069576263428, "learning_rate": 7.927721693364531e-07, "loss": 0.3618, "step": 14379 }, { "epoch": 1.7499239428049893, "grad_norm": 1.8075882196426392, "learning_rate": 7.920111499418082e-07, "loss": 0.3507, "step": 14380 }, { "epoch": 1.7500456343170065, "grad_norm": 1.5442469120025635, "learning_rate": 7.912504809301969e-07, "loss": 0.3364, "step": 14381 }, { "epoch": 1.7501673258290236, "grad_norm": 2.457545757293701, "learning_rate": 7.904901623305672e-07, "loss": 0.4603, "step": 14382 }, { "epoch": 1.7502890173410406, "grad_norm": 2.4344992637634277, "learning_rate": 7.897301941718505e-07, "loss": 0.3341, "step": 14383 }, { "epoch": 1.7504107088530576, "grad_norm": 1.8127057552337646, "learning_rate": 7.889705764829614e-07, "loss": 0.3776, "step": 14384 }, { "epoch": 1.7505324003650746, "grad_norm": 1.8539972305297852, "learning_rate": 7.88211309292809e-07, "loss": 0.3802, "step": 14385 }, { "epoch": 1.7506540918770916, "grad_norm": 1.6395106315612793, "learning_rate": 7.874523926302846e-07, "loss": 0.3587, "step": 14386 }, { "epoch": 1.7507757833891087, "grad_norm": 2.129948616027832, "learning_rate": 7.86693826524263e-07, "loss": 0.3364, "step": 14387 }, { "epoch": 1.7508974749011257, "grad_norm": 2.1072161197662354, "learning_rate": 7.859356110036143e-07, "loss": 0.3612, "step": 14388 }, { "epoch": 1.7510191664131427, "grad_norm": 1.4651302099227905, "learning_rate": 7.851777460971877e-07, "loss": 0.3282, "step": 14389 }, { "epoch": 1.7511408579251597, "grad_norm": 1.5033385753631592, "learning_rate": 7.844202318338212e-07, "loss": 0.3119, "step": 14390 }, { "epoch": 1.7512625494371767, "grad_norm": 3.124924421310425, "learning_rate": 7.836630682423396e-07, "loss": 0.4177, "step": 14391 }, { "epoch": 1.7513842409491938, "grad_norm": 2.8099687099456787, "learning_rate": 7.829062553515543e-07, "loss": 0.3768, "step": 14392 }, { "epoch": 1.7515059324612108, "grad_norm": 2.5157947540283203, "learning_rate": 7.821497931902655e-07, "loss": 0.3346, "step": 14393 }, { "epoch": 1.7516276239732278, "grad_norm": 2.5333807468414307, "learning_rate": 7.81393681787257e-07, "loss": 0.3606, "step": 14394 }, { "epoch": 1.7517493154852448, "grad_norm": 1.9541484117507935, "learning_rate": 7.806379211712978e-07, "loss": 0.3742, "step": 14395 }, { "epoch": 1.7518710069972618, "grad_norm": 1.5055828094482422, "learning_rate": 7.798825113711516e-07, "loss": 0.3868, "step": 14396 }, { "epoch": 1.7519926985092789, "grad_norm": 2.6457931995391846, "learning_rate": 7.791274524155601e-07, "loss": 0.3693, "step": 14397 }, { "epoch": 1.7521143900212959, "grad_norm": 1.9580260515213013, "learning_rate": 7.783727443332534e-07, "loss": 0.3472, "step": 14398 }, { "epoch": 1.752236081533313, "grad_norm": 2.075838327407837, "learning_rate": 7.77618387152953e-07, "loss": 0.3959, "step": 14399 }, { "epoch": 1.7523577730453301, "grad_norm": 1.451200246810913, "learning_rate": 7.768643809033626e-07, "loss": 0.3778, "step": 14400 }, { "epoch": 1.7524794645573472, "grad_norm": 1.5976767539978027, "learning_rate": 7.761107256131739e-07, "loss": 0.324, "step": 14401 }, { "epoch": 1.7526011560693642, "grad_norm": 1.5861172676086426, "learning_rate": 7.753574213110637e-07, "loss": 0.3853, "step": 14402 }, { "epoch": 1.7527228475813812, "grad_norm": 1.511989712715149, "learning_rate": 7.746044680256959e-07, "loss": 0.3411, "step": 14403 }, { "epoch": 1.7528445390933982, "grad_norm": 1.9029408693313599, "learning_rate": 7.738518657857252e-07, "loss": 0.3656, "step": 14404 }, { "epoch": 1.7529662306054152, "grad_norm": 2.7883009910583496, "learning_rate": 7.730996146197889e-07, "loss": 0.4121, "step": 14405 }, { "epoch": 1.7530879221174325, "grad_norm": 2.30157470703125, "learning_rate": 7.723477145565083e-07, "loss": 0.4029, "step": 14406 }, { "epoch": 1.7532096136294495, "grad_norm": 2.4816009998321533, "learning_rate": 7.715961656244997e-07, "loss": 0.4004, "step": 14407 }, { "epoch": 1.7533313051414665, "grad_norm": 1.7638683319091797, "learning_rate": 7.708449678523588e-07, "loss": 0.3736, "step": 14408 }, { "epoch": 1.7534529966534835, "grad_norm": 1.883698582649231, "learning_rate": 7.700941212686674e-07, "loss": 0.3736, "step": 14409 }, { "epoch": 1.7535746881655006, "grad_norm": 2.497305154800415, "learning_rate": 7.693436259020026e-07, "loss": 0.3629, "step": 14410 }, { "epoch": 1.7536963796775176, "grad_norm": 1.5503554344177246, "learning_rate": 7.68593481780917e-07, "loss": 0.3107, "step": 14411 }, { "epoch": 1.7538180711895346, "grad_norm": 1.9252413511276245, "learning_rate": 7.678436889339591e-07, "loss": 0.3278, "step": 14412 }, { "epoch": 1.7539397627015516, "grad_norm": 1.7362934350967407, "learning_rate": 7.670942473896592e-07, "loss": 0.3628, "step": 14413 }, { "epoch": 1.7540614542135686, "grad_norm": 2.065354108810425, "learning_rate": 7.663451571765323e-07, "loss": 0.3384, "step": 14414 }, { "epoch": 1.7541831457255856, "grad_norm": 1.5617612600326538, "learning_rate": 7.655964183230857e-07, "loss": 0.3428, "step": 14415 }, { "epoch": 1.7543048372376027, "grad_norm": 1.2491282224655151, "learning_rate": 7.648480308578088e-07, "loss": 0.3193, "step": 14416 }, { "epoch": 1.7544265287496197, "grad_norm": 1.5279039144515991, "learning_rate": 7.640999948091799e-07, "loss": 0.3744, "step": 14417 }, { "epoch": 1.7545482202616367, "grad_norm": 2.2969255447387695, "learning_rate": 7.633523102056639e-07, "loss": 0.3548, "step": 14418 }, { "epoch": 1.7546699117736537, "grad_norm": 2.429680585861206, "learning_rate": 7.626049770757104e-07, "loss": 0.4012, "step": 14419 }, { "epoch": 1.7547916032856707, "grad_norm": 2.0860016345977783, "learning_rate": 7.618579954477589e-07, "loss": 0.3395, "step": 14420 }, { "epoch": 1.7549132947976878, "grad_norm": 1.5536525249481201, "learning_rate": 7.611113653502333e-07, "loss": 0.3173, "step": 14421 }, { "epoch": 1.7550349863097048, "grad_norm": 1.4375079870224, "learning_rate": 7.60365086811542e-07, "loss": 0.3146, "step": 14422 }, { "epoch": 1.7551566778217218, "grad_norm": 1.7376062870025635, "learning_rate": 7.596191598600854e-07, "loss": 0.3409, "step": 14423 }, { "epoch": 1.7552783693337388, "grad_norm": 1.731907844543457, "learning_rate": 7.588735845242467e-07, "loss": 0.3972, "step": 14424 }, { "epoch": 1.755400060845756, "grad_norm": 2.6565561294555664, "learning_rate": 7.581283608323953e-07, "loss": 0.3874, "step": 14425 }, { "epoch": 1.755521752357773, "grad_norm": 1.4553632736206055, "learning_rate": 7.573834888128906e-07, "loss": 0.3705, "step": 14426 }, { "epoch": 1.75564344386979, "grad_norm": 1.4372460842132568, "learning_rate": 7.566389684940734e-07, "loss": 0.3149, "step": 14427 }, { "epoch": 1.7557651353818071, "grad_norm": 2.6474924087524414, "learning_rate": 7.558947999042765e-07, "loss": 0.3704, "step": 14428 }, { "epoch": 1.7558868268938241, "grad_norm": 1.6344373226165771, "learning_rate": 7.551509830718185e-07, "loss": 0.3461, "step": 14429 }, { "epoch": 1.7560085184058412, "grad_norm": 3.925034523010254, "learning_rate": 7.544075180249988e-07, "loss": 0.413, "step": 14430 }, { "epoch": 1.7561302099178582, "grad_norm": 1.50836980342865, "learning_rate": 7.536644047921116e-07, "loss": 0.3432, "step": 14431 }, { "epoch": 1.7562519014298754, "grad_norm": 2.3834168910980225, "learning_rate": 7.52921643401433e-07, "loss": 0.3944, "step": 14432 }, { "epoch": 1.7563735929418924, "grad_norm": 1.8044732809066772, "learning_rate": 7.52179233881225e-07, "loss": 0.4377, "step": 14433 }, { "epoch": 1.7564952844539095, "grad_norm": 2.5352587699890137, "learning_rate": 7.514371762597405e-07, "loss": 0.394, "step": 14434 }, { "epoch": 1.7566169759659265, "grad_norm": 1.7870938777923584, "learning_rate": 7.506954705652158e-07, "loss": 0.4185, "step": 14435 }, { "epoch": 1.7567386674779435, "grad_norm": 2.400815010070801, "learning_rate": 7.499541168258728e-07, "loss": 0.3877, "step": 14436 }, { "epoch": 1.7568603589899605, "grad_norm": 2.1925830841064453, "learning_rate": 7.492131150699211e-07, "loss": 0.3233, "step": 14437 }, { "epoch": 1.7569820505019775, "grad_norm": 1.7983744144439697, "learning_rate": 7.484724653255581e-07, "loss": 0.3404, "step": 14438 }, { "epoch": 1.7571037420139946, "grad_norm": 2.345271348953247, "learning_rate": 7.477321676209692e-07, "loss": 0.3949, "step": 14439 }, { "epoch": 1.7572254335260116, "grad_norm": 1.5213383436203003, "learning_rate": 7.469922219843218e-07, "loss": 0.3571, "step": 14440 }, { "epoch": 1.7573471250380286, "grad_norm": 1.5926969051361084, "learning_rate": 7.46252628443771e-07, "loss": 0.3249, "step": 14441 }, { "epoch": 1.7574688165500456, "grad_norm": 2.315727472305298, "learning_rate": 7.455133870274645e-07, "loss": 0.3795, "step": 14442 }, { "epoch": 1.7575905080620626, "grad_norm": 1.7920769453048706, "learning_rate": 7.447744977635285e-07, "loss": 0.3804, "step": 14443 }, { "epoch": 1.7577121995740796, "grad_norm": 1.3890974521636963, "learning_rate": 7.440359606800796e-07, "loss": 0.3986, "step": 14444 }, { "epoch": 1.7578338910860967, "grad_norm": 1.3588234186172485, "learning_rate": 7.432977758052217e-07, "loss": 0.3592, "step": 14445 }, { "epoch": 1.7579555825981137, "grad_norm": 2.4143753051757812, "learning_rate": 7.425599431670438e-07, "loss": 0.3905, "step": 14446 }, { "epoch": 1.7580772741101307, "grad_norm": 2.8679070472717285, "learning_rate": 7.418224627936221e-07, "loss": 0.3409, "step": 14447 }, { "epoch": 1.7581989656221477, "grad_norm": 2.49882173538208, "learning_rate": 7.410853347130198e-07, "loss": 0.3791, "step": 14448 }, { "epoch": 1.7583206571341647, "grad_norm": 3.792658567428589, "learning_rate": 7.403485589532833e-07, "loss": 0.2899, "step": 14449 }, { "epoch": 1.7584423486461818, "grad_norm": 2.197274923324585, "learning_rate": 7.396121355424523e-07, "loss": 0.3129, "step": 14450 }, { "epoch": 1.758564040158199, "grad_norm": 2.00026798248291, "learning_rate": 7.388760645085469e-07, "loss": 0.3615, "step": 14451 }, { "epoch": 1.758685731670216, "grad_norm": 1.3426164388656616, "learning_rate": 7.381403458795755e-07, "loss": 0.3495, "step": 14452 }, { "epoch": 1.758807423182233, "grad_norm": 3.0518858432769775, "learning_rate": 7.374049796835369e-07, "loss": 0.4337, "step": 14453 }, { "epoch": 1.75892911469425, "grad_norm": 1.4964326620101929, "learning_rate": 7.366699659484111e-07, "loss": 0.3453, "step": 14454 }, { "epoch": 1.759050806206267, "grad_norm": 2.0643231868743896, "learning_rate": 7.359353047021656e-07, "loss": 0.3474, "step": 14455 }, { "epoch": 1.759172497718284, "grad_norm": 1.6798450946807861, "learning_rate": 7.352009959727591e-07, "loss": 0.3761, "step": 14456 }, { "epoch": 1.7592941892303013, "grad_norm": 2.020517110824585, "learning_rate": 7.344670397881304e-07, "loss": 0.3914, "step": 14457 }, { "epoch": 1.7594158807423184, "grad_norm": 2.052978038787842, "learning_rate": 7.337334361762094e-07, "loss": 0.3522, "step": 14458 }, { "epoch": 1.7595375722543354, "grad_norm": 1.6676478385925293, "learning_rate": 7.330001851649138e-07, "loss": 0.3355, "step": 14459 }, { "epoch": 1.7596592637663524, "grad_norm": 2.627361536026001, "learning_rate": 7.322672867821401e-07, "loss": 0.3036, "step": 14460 }, { "epoch": 1.7597809552783694, "grad_norm": 2.615955114364624, "learning_rate": 7.315347410557804e-07, "loss": 0.2908, "step": 14461 }, { "epoch": 1.7599026467903864, "grad_norm": 1.537392258644104, "learning_rate": 7.308025480137082e-07, "loss": 0.3664, "step": 14462 }, { "epoch": 1.7600243383024035, "grad_norm": 1.6661725044250488, "learning_rate": 7.300707076837843e-07, "loss": 0.355, "step": 14463 }, { "epoch": 1.7601460298144205, "grad_norm": 2.1293396949768066, "learning_rate": 7.293392200938587e-07, "loss": 0.3904, "step": 14464 }, { "epoch": 1.7602677213264375, "grad_norm": 1.4980497360229492, "learning_rate": 7.286080852717659e-07, "loss": 0.3552, "step": 14465 }, { "epoch": 1.7603894128384545, "grad_norm": 3.4383912086486816, "learning_rate": 7.278773032453235e-07, "loss": 0.4565, "step": 14466 }, { "epoch": 1.7605111043504715, "grad_norm": 1.9814101457595825, "learning_rate": 7.271468740423449e-07, "loss": 0.3661, "step": 14467 }, { "epoch": 1.7606327958624886, "grad_norm": 1.84766685962677, "learning_rate": 7.2641679769062e-07, "loss": 0.3867, "step": 14468 }, { "epoch": 1.7607544873745056, "grad_norm": 1.3371798992156982, "learning_rate": 7.2568707421793e-07, "loss": 0.3142, "step": 14469 }, { "epoch": 1.7608761788865226, "grad_norm": 1.9628312587738037, "learning_rate": 7.249577036520472e-07, "loss": 0.3652, "step": 14470 }, { "epoch": 1.7609978703985396, "grad_norm": 1.8537945747375488, "learning_rate": 7.242286860207192e-07, "loss": 0.3374, "step": 14471 }, { "epoch": 1.7611195619105566, "grad_norm": 1.5959616899490356, "learning_rate": 7.235000213516907e-07, "loss": 0.3622, "step": 14472 }, { "epoch": 1.7612412534225736, "grad_norm": 2.3115346431732178, "learning_rate": 7.227717096726872e-07, "loss": 0.361, "step": 14473 }, { "epoch": 1.7613629449345907, "grad_norm": 2.1264383792877197, "learning_rate": 7.22043751011422e-07, "loss": 0.3836, "step": 14474 }, { "epoch": 1.7614846364466077, "grad_norm": 2.4236934185028076, "learning_rate": 7.213161453955974e-07, "loss": 0.4318, "step": 14475 }, { "epoch": 1.761606327958625, "grad_norm": 1.7381917238235474, "learning_rate": 7.205888928529003e-07, "loss": 0.3795, "step": 14476 }, { "epoch": 1.761728019470642, "grad_norm": 2.0093178749084473, "learning_rate": 7.198619934110007e-07, "loss": 0.3751, "step": 14477 }, { "epoch": 1.761849710982659, "grad_norm": 1.860365867614746, "learning_rate": 7.191354470975631e-07, "loss": 0.3591, "step": 14478 }, { "epoch": 1.761971402494676, "grad_norm": 1.6120810508728027, "learning_rate": 7.184092539402298e-07, "loss": 0.3364, "step": 14479 }, { "epoch": 1.762093094006693, "grad_norm": 3.0284183025360107, "learning_rate": 7.176834139666378e-07, "loss": 0.3923, "step": 14480 }, { "epoch": 1.76221478551871, "grad_norm": 3.31282901763916, "learning_rate": 7.169579272044058e-07, "loss": 0.4327, "step": 14481 }, { "epoch": 1.7623364770307273, "grad_norm": 1.8907090425491333, "learning_rate": 7.162327936811397e-07, "loss": 0.3964, "step": 14482 }, { "epoch": 1.7624581685427443, "grad_norm": 1.7526501417160034, "learning_rate": 7.155080134244319e-07, "loss": 0.3163, "step": 14483 }, { "epoch": 1.7625798600547613, "grad_norm": 2.4112749099731445, "learning_rate": 7.147835864618602e-07, "loss": 0.3856, "step": 14484 }, { "epoch": 1.7627015515667783, "grad_norm": 2.010303020477295, "learning_rate": 7.140595128209948e-07, "loss": 0.2901, "step": 14485 }, { "epoch": 1.7628232430787953, "grad_norm": 2.1501972675323486, "learning_rate": 7.133357925293849e-07, "loss": 0.3352, "step": 14486 }, { "epoch": 1.7629449345908124, "grad_norm": 1.453220009803772, "learning_rate": 7.126124256145694e-07, "loss": 0.351, "step": 14487 }, { "epoch": 1.7630666261028294, "grad_norm": 1.5813199281692505, "learning_rate": 7.118894121040754e-07, "loss": 0.3832, "step": 14488 }, { "epoch": 1.7631883176148464, "grad_norm": 2.1204910278320312, "learning_rate": 7.111667520254162e-07, "loss": 0.3537, "step": 14489 }, { "epoch": 1.7633100091268634, "grad_norm": 2.2804338932037354, "learning_rate": 7.104444454060866e-07, "loss": 0.2907, "step": 14490 }, { "epoch": 1.7634317006388804, "grad_norm": 1.4901660680770874, "learning_rate": 7.097224922735757e-07, "loss": 0.3844, "step": 14491 }, { "epoch": 1.7635533921508975, "grad_norm": 1.5875693559646606, "learning_rate": 7.090008926553538e-07, "loss": 0.3641, "step": 14492 }, { "epoch": 1.7636750836629145, "grad_norm": 1.7893704175949097, "learning_rate": 7.082796465788789e-07, "loss": 0.325, "step": 14493 }, { "epoch": 1.7637967751749315, "grad_norm": 2.1332480907440186, "learning_rate": 7.075587540715967e-07, "loss": 0.3292, "step": 14494 }, { "epoch": 1.7639184666869485, "grad_norm": 1.689132571220398, "learning_rate": 7.068382151609366e-07, "loss": 0.3313, "step": 14495 }, { "epoch": 1.7640401581989655, "grad_norm": 1.5373564958572388, "learning_rate": 7.061180298743198e-07, "loss": 0.3407, "step": 14496 }, { "epoch": 1.7641618497109826, "grad_norm": 1.3875147104263306, "learning_rate": 7.053981982391489e-07, "loss": 0.2987, "step": 14497 }, { "epoch": 1.7642835412229996, "grad_norm": 1.885918140411377, "learning_rate": 7.046787202828142e-07, "loss": 0.3586, "step": 14498 }, { "epoch": 1.7644052327350166, "grad_norm": 2.2079832553863525, "learning_rate": 7.039595960326951e-07, "loss": 0.3362, "step": 14499 }, { "epoch": 1.7645269242470336, "grad_norm": 1.5553251504898071, "learning_rate": 7.03240825516156e-07, "loss": 0.3856, "step": 14500 }, { "epoch": 1.7646486157590509, "grad_norm": 2.093270778656006, "learning_rate": 7.025224087605453e-07, "loss": 0.3548, "step": 14501 }, { "epoch": 1.7647703072710679, "grad_norm": 2.286315679550171, "learning_rate": 7.018043457932034e-07, "loss": 0.3291, "step": 14502 }, { "epoch": 1.764891998783085, "grad_norm": 1.7934011220932007, "learning_rate": 7.010866366414514e-07, "loss": 0.325, "step": 14503 }, { "epoch": 1.765013690295102, "grad_norm": 1.7132139205932617, "learning_rate": 7.003692813325991e-07, "loss": 0.3593, "step": 14504 }, { "epoch": 1.765135381807119, "grad_norm": 2.294631242752075, "learning_rate": 6.996522798939487e-07, "loss": 0.4345, "step": 14505 }, { "epoch": 1.765257073319136, "grad_norm": 1.472137451171875, "learning_rate": 6.989356323527763e-07, "loss": 0.3399, "step": 14506 }, { "epoch": 1.7653787648311532, "grad_norm": 1.6393823623657227, "learning_rate": 6.982193387363568e-07, "loss": 0.4025, "step": 14507 }, { "epoch": 1.7655004563431702, "grad_norm": 1.7435812950134277, "learning_rate": 6.975033990719449e-07, "loss": 0.3642, "step": 14508 }, { "epoch": 1.7656221478551872, "grad_norm": 2.0616307258605957, "learning_rate": 6.967878133867822e-07, "loss": 0.3539, "step": 14509 }, { "epoch": 1.7657438393672042, "grad_norm": 1.8341875076293945, "learning_rate": 6.960725817081015e-07, "loss": 0.3802, "step": 14510 }, { "epoch": 1.7658655308792213, "grad_norm": 2.115043878555298, "learning_rate": 6.953577040631176e-07, "loss": 0.3355, "step": 14511 }, { "epoch": 1.7659872223912383, "grad_norm": 1.9199072122573853, "learning_rate": 6.946431804790299e-07, "loss": 0.3446, "step": 14512 }, { "epoch": 1.7661089139032553, "grad_norm": 1.793259620666504, "learning_rate": 6.939290109830322e-07, "loss": 0.3459, "step": 14513 }, { "epoch": 1.7662306054152723, "grad_norm": 1.6848077774047852, "learning_rate": 6.932151956022992e-07, "loss": 0.3637, "step": 14514 }, { "epoch": 1.7663522969272893, "grad_norm": 1.8250707387924194, "learning_rate": 6.925017343639895e-07, "loss": 0.3652, "step": 14515 }, { "epoch": 1.7664739884393064, "grad_norm": 2.0465261936187744, "learning_rate": 6.917886272952578e-07, "loss": 0.3911, "step": 14516 }, { "epoch": 1.7665956799513234, "grad_norm": 4.026511192321777, "learning_rate": 6.910758744232315e-07, "loss": 0.4958, "step": 14517 }, { "epoch": 1.7667173714633404, "grad_norm": 1.5430704355239868, "learning_rate": 6.9036347577504e-07, "loss": 0.316, "step": 14518 }, { "epoch": 1.7668390629753574, "grad_norm": 1.7800544500350952, "learning_rate": 6.896514313777869e-07, "loss": 0.3462, "step": 14519 }, { "epoch": 1.7669607544873744, "grad_norm": 3.350831985473633, "learning_rate": 6.889397412585664e-07, "loss": 0.4432, "step": 14520 }, { "epoch": 1.7670824459993915, "grad_norm": 1.5889924764633179, "learning_rate": 6.882284054444632e-07, "loss": 0.3625, "step": 14521 }, { "epoch": 1.7672041375114085, "grad_norm": 1.7387501001358032, "learning_rate": 6.875174239625448e-07, "loss": 0.3761, "step": 14522 }, { "epoch": 1.7673258290234255, "grad_norm": 1.6038727760314941, "learning_rate": 6.868067968398618e-07, "loss": 0.3546, "step": 14523 }, { "epoch": 1.7674475205354425, "grad_norm": 2.071868896484375, "learning_rate": 6.860965241034589e-07, "loss": 0.3919, "step": 14524 }, { "epoch": 1.7675692120474595, "grad_norm": 3.5060112476348877, "learning_rate": 6.853866057803615e-07, "loss": 0.4265, "step": 14525 }, { "epoch": 1.7676909035594768, "grad_norm": 1.730776309967041, "learning_rate": 6.846770418975835e-07, "loss": 0.4121, "step": 14526 }, { "epoch": 1.7678125950714938, "grad_norm": 1.892000436782837, "learning_rate": 6.839678324821264e-07, "loss": 0.3776, "step": 14527 }, { "epoch": 1.7679342865835108, "grad_norm": 1.9229711294174194, "learning_rate": 6.832589775609766e-07, "loss": 0.361, "step": 14528 }, { "epoch": 1.7680559780955278, "grad_norm": 1.7723015546798706, "learning_rate": 6.825504771611069e-07, "loss": 0.3915, "step": 14529 }, { "epoch": 1.7681776696075449, "grad_norm": 1.7557389736175537, "learning_rate": 6.818423313094791e-07, "loss": 0.3661, "step": 14530 }, { "epoch": 1.7682993611195619, "grad_norm": 2.8860726356506348, "learning_rate": 6.811345400330349e-07, "loss": 0.3782, "step": 14531 }, { "epoch": 1.768421052631579, "grad_norm": 1.7480837106704712, "learning_rate": 6.804271033587129e-07, "loss": 0.3289, "step": 14532 }, { "epoch": 1.7685427441435961, "grad_norm": 1.9398764371871948, "learning_rate": 6.797200213134292e-07, "loss": 0.3304, "step": 14533 }, { "epoch": 1.7686644356556132, "grad_norm": 1.4975380897521973, "learning_rate": 6.790132939240901e-07, "loss": 0.3634, "step": 14534 }, { "epoch": 1.7687861271676302, "grad_norm": 2.078226089477539, "learning_rate": 6.783069212175897e-07, "loss": 0.3395, "step": 14535 }, { "epoch": 1.7689078186796472, "grad_norm": 1.5778584480285645, "learning_rate": 6.776009032208042e-07, "loss": 0.3233, "step": 14536 }, { "epoch": 1.7690295101916642, "grad_norm": 2.097545623779297, "learning_rate": 6.768952399606021e-07, "loss": 0.3869, "step": 14537 }, { "epoch": 1.7691512017036812, "grad_norm": 1.9245706796646118, "learning_rate": 6.761899314638343e-07, "loss": 0.3381, "step": 14538 }, { "epoch": 1.7692728932156983, "grad_norm": 2.8432419300079346, "learning_rate": 6.75484977757338e-07, "loss": 0.4162, "step": 14539 }, { "epoch": 1.7693945847277153, "grad_norm": 1.7467833757400513, "learning_rate": 6.747803788679397e-07, "loss": 0.3607, "step": 14540 }, { "epoch": 1.7695162762397323, "grad_norm": 2.521716356277466, "learning_rate": 6.7407613482245e-07, "loss": 0.4369, "step": 14541 }, { "epoch": 1.7696379677517493, "grad_norm": 1.7306139469146729, "learning_rate": 6.733722456476654e-07, "loss": 0.375, "step": 14542 }, { "epoch": 1.7697596592637663, "grad_norm": 2.0977470874786377, "learning_rate": 6.726687113703733e-07, "loss": 0.3335, "step": 14543 }, { "epoch": 1.7698813507757833, "grad_norm": 2.7140793800354004, "learning_rate": 6.719655320173424e-07, "loss": 0.3927, "step": 14544 }, { "epoch": 1.7700030422878004, "grad_norm": 1.6032614707946777, "learning_rate": 6.712627076153322e-07, "loss": 0.3836, "step": 14545 }, { "epoch": 1.7701247337998174, "grad_norm": 2.5950915813446045, "learning_rate": 6.705602381910848e-07, "loss": 0.3697, "step": 14546 }, { "epoch": 1.7702464253118344, "grad_norm": 1.907840371131897, "learning_rate": 6.698581237713298e-07, "loss": 0.3228, "step": 14547 }, { "epoch": 1.7703681168238514, "grad_norm": 2.101078987121582, "learning_rate": 6.69156364382787e-07, "loss": 0.3372, "step": 14548 }, { "epoch": 1.7704898083358684, "grad_norm": 1.3305882215499878, "learning_rate": 6.684549600521595e-07, "loss": 0.3332, "step": 14549 }, { "epoch": 1.7706114998478855, "grad_norm": 2.558901309967041, "learning_rate": 6.677539108061326e-07, "loss": 0.3533, "step": 14550 }, { "epoch": 1.7707331913599025, "grad_norm": 1.8484891653060913, "learning_rate": 6.670532166713906e-07, "loss": 0.3568, "step": 14551 }, { "epoch": 1.7708548828719197, "grad_norm": 2.8344151973724365, "learning_rate": 6.663528776745886e-07, "loss": 0.3123, "step": 14552 }, { "epoch": 1.7709765743839367, "grad_norm": 2.365994453430176, "learning_rate": 6.6565289384238e-07, "loss": 0.3006, "step": 14553 }, { "epoch": 1.7710982658959538, "grad_norm": 2.0647196769714355, "learning_rate": 6.649532652014002e-07, "loss": 0.3394, "step": 14554 }, { "epoch": 1.7712199574079708, "grad_norm": 3.8903493881225586, "learning_rate": 6.642539917782698e-07, "loss": 0.2907, "step": 14555 }, { "epoch": 1.7713416489199878, "grad_norm": 3.136115074157715, "learning_rate": 6.635550735995999e-07, "loss": 0.337, "step": 14556 }, { "epoch": 1.7714633404320048, "grad_norm": 2.2149486541748047, "learning_rate": 6.628565106919859e-07, "loss": 0.3507, "step": 14557 }, { "epoch": 1.771585031944022, "grad_norm": 1.881397008895874, "learning_rate": 6.621583030820067e-07, "loss": 0.318, "step": 14558 }, { "epoch": 1.771706723456039, "grad_norm": 2.2142624855041504, "learning_rate": 6.61460450796233e-07, "loss": 0.4046, "step": 14559 }, { "epoch": 1.771828414968056, "grad_norm": 1.8249690532684326, "learning_rate": 6.607629538612192e-07, "loss": 0.3841, "step": 14560 }, { "epoch": 1.7719501064800731, "grad_norm": 1.9165360927581787, "learning_rate": 6.600658123035053e-07, "loss": 0.3891, "step": 14561 }, { "epoch": 1.7720717979920901, "grad_norm": 1.7116965055465698, "learning_rate": 6.593690261496222e-07, "loss": 0.3797, "step": 14562 }, { "epoch": 1.7721934895041072, "grad_norm": 1.6220554113388062, "learning_rate": 6.586725954260787e-07, "loss": 0.3658, "step": 14563 }, { "epoch": 1.7723151810161242, "grad_norm": 1.531381368637085, "learning_rate": 6.579765201593802e-07, "loss": 0.3732, "step": 14564 }, { "epoch": 1.7724368725281412, "grad_norm": 2.1630656719207764, "learning_rate": 6.572808003760123e-07, "loss": 0.3413, "step": 14565 }, { "epoch": 1.7725585640401582, "grad_norm": 1.581964135169983, "learning_rate": 6.56585436102446e-07, "loss": 0.3743, "step": 14566 }, { "epoch": 1.7726802555521752, "grad_norm": 1.8824985027313232, "learning_rate": 6.558904273651457e-07, "loss": 0.3843, "step": 14567 }, { "epoch": 1.7728019470641923, "grad_norm": 3.0243945121765137, "learning_rate": 6.551957741905556e-07, "loss": 0.4064, "step": 14568 }, { "epoch": 1.7729236385762093, "grad_norm": 1.7638392448425293, "learning_rate": 6.545014766051084e-07, "loss": 0.389, "step": 14569 }, { "epoch": 1.7730453300882263, "grad_norm": 1.540652871131897, "learning_rate": 6.538075346352246e-07, "loss": 0.3169, "step": 14570 }, { "epoch": 1.7731670216002433, "grad_norm": 3.324019432067871, "learning_rate": 6.531139483073101e-07, "loss": 0.4413, "step": 14571 }, { "epoch": 1.7732887131122603, "grad_norm": 1.8459844589233398, "learning_rate": 6.524207176477549e-07, "loss": 0.3558, "step": 14572 }, { "epoch": 1.7734104046242773, "grad_norm": 1.7923648357391357, "learning_rate": 6.51727842682942e-07, "loss": 0.3417, "step": 14573 }, { "epoch": 1.7735320961362944, "grad_norm": 2.2081899642944336, "learning_rate": 6.510353234392341e-07, "loss": 0.3729, "step": 14574 }, { "epoch": 1.7736537876483114, "grad_norm": 1.6217221021652222, "learning_rate": 6.503431599429833e-07, "loss": 0.3404, "step": 14575 }, { "epoch": 1.7737754791603284, "grad_norm": 1.4036725759506226, "learning_rate": 6.496513522205283e-07, "loss": 0.3215, "step": 14576 }, { "epoch": 1.7738971706723456, "grad_norm": 1.5899603366851807, "learning_rate": 6.489599002981917e-07, "loss": 0.324, "step": 14577 }, { "epoch": 1.7740188621843627, "grad_norm": 1.9212393760681152, "learning_rate": 6.482688042022889e-07, "loss": 0.4103, "step": 14578 }, { "epoch": 1.7741405536963797, "grad_norm": 1.695272445678711, "learning_rate": 6.475780639591145e-07, "loss": 0.358, "step": 14579 }, { "epoch": 1.7742622452083967, "grad_norm": 2.3171448707580566, "learning_rate": 6.468876795949508e-07, "loss": 0.3968, "step": 14580 }, { "epoch": 1.7743839367204137, "grad_norm": 2.148740291595459, "learning_rate": 6.461976511360735e-07, "loss": 0.3952, "step": 14581 }, { "epoch": 1.7745056282324307, "grad_norm": 2.0499255657196045, "learning_rate": 6.45507978608737e-07, "loss": 0.4096, "step": 14582 }, { "epoch": 1.774627319744448, "grad_norm": 1.8709224462509155, "learning_rate": 6.448186620391828e-07, "loss": 0.3363, "step": 14583 }, { "epoch": 1.774749011256465, "grad_norm": 1.5541064739227295, "learning_rate": 6.44129701453644e-07, "loss": 0.3803, "step": 14584 }, { "epoch": 1.774870702768482, "grad_norm": 1.7476093769073486, "learning_rate": 6.434410968783355e-07, "loss": 0.41, "step": 14585 }, { "epoch": 1.774992394280499, "grad_norm": 1.8392903804779053, "learning_rate": 6.427528483394607e-07, "loss": 0.368, "step": 14586 }, { "epoch": 1.775114085792516, "grad_norm": 1.8348569869995117, "learning_rate": 6.420649558632075e-07, "loss": 0.3953, "step": 14587 }, { "epoch": 1.775235777304533, "grad_norm": 2.5158095359802246, "learning_rate": 6.413774194757516e-07, "loss": 0.4437, "step": 14588 }, { "epoch": 1.77535746881655, "grad_norm": 1.5707505941390991, "learning_rate": 6.406902392032588e-07, "loss": 0.4074, "step": 14589 }, { "epoch": 1.7754791603285671, "grad_norm": 2.4350297451019287, "learning_rate": 6.400034150718737e-07, "loss": 0.3898, "step": 14590 }, { "epoch": 1.7756008518405841, "grad_norm": 3.083111047744751, "learning_rate": 6.39316947107732e-07, "loss": 0.4341, "step": 14591 }, { "epoch": 1.7757225433526012, "grad_norm": 1.6954346895217896, "learning_rate": 6.386308353369575e-07, "loss": 0.3665, "step": 14592 }, { "epoch": 1.7758442348646182, "grad_norm": 2.392301559448242, "learning_rate": 6.379450797856557e-07, "loss": 0.3624, "step": 14593 }, { "epoch": 1.7759659263766352, "grad_norm": 1.7334064245224, "learning_rate": 6.372596804799213e-07, "loss": 0.3371, "step": 14594 }, { "epoch": 1.7760876178886522, "grad_norm": 2.2518150806427, "learning_rate": 6.365746374458381e-07, "loss": 0.3714, "step": 14595 }, { "epoch": 1.7762093094006692, "grad_norm": 1.614601492881775, "learning_rate": 6.358899507094684e-07, "loss": 0.325, "step": 14596 }, { "epoch": 1.7763310009126863, "grad_norm": 1.6773942708969116, "learning_rate": 6.352056202968726e-07, "loss": 0.4112, "step": 14597 }, { "epoch": 1.7764526924247033, "grad_norm": 2.3715524673461914, "learning_rate": 6.345216462340853e-07, "loss": 0.4126, "step": 14598 }, { "epoch": 1.7765743839367203, "grad_norm": 1.8623312711715698, "learning_rate": 6.338380285471324e-07, "loss": 0.3809, "step": 14599 }, { "epoch": 1.7766960754487373, "grad_norm": 2.9922993183135986, "learning_rate": 6.331547672620319e-07, "loss": 0.3062, "step": 14600 }, { "epoch": 1.7768177669607543, "grad_norm": 1.7961255311965942, "learning_rate": 6.324718624047809e-07, "loss": 0.3675, "step": 14601 }, { "epoch": 1.7769394584727716, "grad_norm": 2.4649550914764404, "learning_rate": 6.317893140013631e-07, "loss": 0.4175, "step": 14602 }, { "epoch": 1.7770611499847886, "grad_norm": 2.225714921951294, "learning_rate": 6.311071220777554e-07, "loss": 0.4176, "step": 14603 }, { "epoch": 1.7771828414968056, "grad_norm": 1.9400768280029297, "learning_rate": 6.304252866599125e-07, "loss": 0.3301, "step": 14604 }, { "epoch": 1.7773045330088226, "grad_norm": 2.111846923828125, "learning_rate": 6.297438077737816e-07, "loss": 0.3309, "step": 14605 }, { "epoch": 1.7774262245208396, "grad_norm": 1.546210765838623, "learning_rate": 6.290626854452953e-07, "loss": 0.3496, "step": 14606 }, { "epoch": 1.7775479160328567, "grad_norm": 1.7552134990692139, "learning_rate": 6.283819197003693e-07, "loss": 0.3258, "step": 14607 }, { "epoch": 1.777669607544874, "grad_norm": 3.7536916732788086, "learning_rate": 6.27701510564911e-07, "loss": 0.2992, "step": 14608 }, { "epoch": 1.777791299056891, "grad_norm": 1.7868030071258545, "learning_rate": 6.270214580648104e-07, "loss": 0.3631, "step": 14609 }, { "epoch": 1.777912990568908, "grad_norm": 2.1418042182922363, "learning_rate": 6.263417622259427e-07, "loss": 0.4327, "step": 14610 }, { "epoch": 1.778034682080925, "grad_norm": 1.5145152807235718, "learning_rate": 6.256624230741737e-07, "loss": 0.3675, "step": 14611 }, { "epoch": 1.778156373592942, "grad_norm": 2.19519305229187, "learning_rate": 6.249834406353517e-07, "loss": 0.3599, "step": 14612 }, { "epoch": 1.778278065104959, "grad_norm": 1.9231786727905273, "learning_rate": 6.243048149353171e-07, "loss": 0.3864, "step": 14613 }, { "epoch": 1.778399756616976, "grad_norm": 1.6109853982925415, "learning_rate": 6.236265459998914e-07, "loss": 0.3511, "step": 14614 }, { "epoch": 1.778521448128993, "grad_norm": 3.507779598236084, "learning_rate": 6.229486338548807e-07, "loss": 0.4456, "step": 14615 }, { "epoch": 1.77864313964101, "grad_norm": 1.6540296077728271, "learning_rate": 6.222710785260866e-07, "loss": 0.3717, "step": 14616 }, { "epoch": 1.778764831153027, "grad_norm": 2.3607757091522217, "learning_rate": 6.215938800392884e-07, "loss": 0.3278, "step": 14617 }, { "epoch": 1.778886522665044, "grad_norm": 1.7540985345840454, "learning_rate": 6.209170384202534e-07, "loss": 0.367, "step": 14618 }, { "epoch": 1.7790082141770611, "grad_norm": 1.715063452720642, "learning_rate": 6.202405536947409e-07, "loss": 0.3603, "step": 14619 }, { "epoch": 1.7791299056890781, "grad_norm": 1.8997678756713867, "learning_rate": 6.195644258884903e-07, "loss": 0.3851, "step": 14620 }, { "epoch": 1.7792515972010952, "grad_norm": 1.4899977445602417, "learning_rate": 6.188886550272299e-07, "loss": 0.3277, "step": 14621 }, { "epoch": 1.7793732887131122, "grad_norm": 2.952059030532837, "learning_rate": 6.182132411366737e-07, "loss": 0.4346, "step": 14622 }, { "epoch": 1.7794949802251292, "grad_norm": 3.236086368560791, "learning_rate": 6.17538184242521e-07, "loss": 0.344, "step": 14623 }, { "epoch": 1.7796166717371462, "grad_norm": 1.8219189643859863, "learning_rate": 6.168634843704624e-07, "loss": 0.3215, "step": 14624 }, { "epoch": 1.7797383632491632, "grad_norm": 1.7022855281829834, "learning_rate": 6.161891415461718e-07, "loss": 0.3929, "step": 14625 }, { "epoch": 1.7798600547611803, "grad_norm": 1.6795920133590698, "learning_rate": 6.155151557953043e-07, "loss": 0.3315, "step": 14626 }, { "epoch": 1.7799817462731975, "grad_norm": 1.9836817979812622, "learning_rate": 6.148415271435127e-07, "loss": 0.3413, "step": 14627 }, { "epoch": 1.7801034377852145, "grad_norm": 1.9625375270843506, "learning_rate": 6.141682556164274e-07, "loss": 0.3142, "step": 14628 }, { "epoch": 1.7802251292972315, "grad_norm": 1.6465593576431274, "learning_rate": 6.134953412396649e-07, "loss": 0.3313, "step": 14629 }, { "epoch": 1.7803468208092486, "grad_norm": 1.4613187313079834, "learning_rate": 6.128227840388357e-07, "loss": 0.3499, "step": 14630 }, { "epoch": 1.7804685123212656, "grad_norm": 1.7990692853927612, "learning_rate": 6.121505840395303e-07, "loss": 0.3475, "step": 14631 }, { "epoch": 1.7805902038332826, "grad_norm": 1.4610713720321655, "learning_rate": 6.114787412673262e-07, "loss": 0.3392, "step": 14632 }, { "epoch": 1.7807118953452996, "grad_norm": 1.584815502166748, "learning_rate": 6.108072557477906e-07, "loss": 0.3899, "step": 14633 }, { "epoch": 1.7808335868573169, "grad_norm": 2.032013416290283, "learning_rate": 6.10136127506471e-07, "loss": 0.3498, "step": 14634 }, { "epoch": 1.7809552783693339, "grad_norm": 1.9763884544372559, "learning_rate": 6.09465356568909e-07, "loss": 0.339, "step": 14635 }, { "epoch": 1.781076969881351, "grad_norm": 1.6281328201293945, "learning_rate": 6.087949429606277e-07, "loss": 0.3715, "step": 14636 }, { "epoch": 1.781198661393368, "grad_norm": 1.9488134384155273, "learning_rate": 6.081248867071366e-07, "loss": 0.3505, "step": 14637 }, { "epoch": 1.781320352905385, "grad_norm": 2.1327097415924072, "learning_rate": 6.074551878339341e-07, "loss": 0.3973, "step": 14638 }, { "epoch": 1.781442044417402, "grad_norm": 2.186551570892334, "learning_rate": 6.067858463665043e-07, "loss": 0.4052, "step": 14639 }, { "epoch": 1.781563735929419, "grad_norm": 2.0402286052703857, "learning_rate": 6.061168623303137e-07, "loss": 0.4237, "step": 14640 }, { "epoch": 1.781685427441436, "grad_norm": 1.7206637859344482, "learning_rate": 6.054482357508229e-07, "loss": 0.3605, "step": 14641 }, { "epoch": 1.781807118953453, "grad_norm": 1.7428456544876099, "learning_rate": 6.047799666534715e-07, "loss": 0.3451, "step": 14642 }, { "epoch": 1.78192881046547, "grad_norm": 1.942031741142273, "learning_rate": 6.041120550636903e-07, "loss": 0.3564, "step": 14643 }, { "epoch": 1.782050501977487, "grad_norm": 2.4761927127838135, "learning_rate": 6.034445010068934e-07, "loss": 0.3296, "step": 14644 }, { "epoch": 1.782172193489504, "grad_norm": 1.6716270446777344, "learning_rate": 6.027773045084817e-07, "loss": 0.4037, "step": 14645 }, { "epoch": 1.782293885001521, "grad_norm": 1.6377651691436768, "learning_rate": 6.021104655938459e-07, "loss": 0.3719, "step": 14646 }, { "epoch": 1.782415576513538, "grad_norm": 2.054234266281128, "learning_rate": 6.01443984288359e-07, "loss": 0.3563, "step": 14647 }, { "epoch": 1.7825372680255551, "grad_norm": 1.8019675016403198, "learning_rate": 6.00777860617382e-07, "loss": 0.3349, "step": 14648 }, { "epoch": 1.7826589595375721, "grad_norm": 2.7690815925598145, "learning_rate": 6.001120946062633e-07, "loss": 0.4577, "step": 14649 }, { "epoch": 1.7827806510495892, "grad_norm": 3.760871410369873, "learning_rate": 5.994466862803372e-07, "loss": 0.4424, "step": 14650 }, { "epoch": 1.7829023425616062, "grad_norm": 1.5450057983398438, "learning_rate": 5.987816356649201e-07, "loss": 0.3597, "step": 14651 }, { "epoch": 1.7830240340736232, "grad_norm": 2.7767693996429443, "learning_rate": 5.981169427853239e-07, "loss": 0.4472, "step": 14652 }, { "epoch": 1.7831457255856404, "grad_norm": 1.9481687545776367, "learning_rate": 5.974526076668386e-07, "loss": 0.3374, "step": 14653 }, { "epoch": 1.7832674170976575, "grad_norm": 1.608691930770874, "learning_rate": 5.967886303347415e-07, "loss": 0.3735, "step": 14654 }, { "epoch": 1.7833891086096745, "grad_norm": 1.670374870300293, "learning_rate": 5.961250108143035e-07, "loss": 0.3309, "step": 14655 }, { "epoch": 1.7835108001216915, "grad_norm": 2.5748684406280518, "learning_rate": 5.954617491307713e-07, "loss": 0.3914, "step": 14656 }, { "epoch": 1.7836324916337085, "grad_norm": 1.6508787870407104, "learning_rate": 5.947988453093878e-07, "loss": 0.367, "step": 14657 }, { "epoch": 1.7837541831457255, "grad_norm": 1.5863659381866455, "learning_rate": 5.941362993753752e-07, "loss": 0.3813, "step": 14658 }, { "epoch": 1.7838758746577428, "grad_norm": 1.7271991968154907, "learning_rate": 5.934741113539444e-07, "loss": 0.2967, "step": 14659 }, { "epoch": 1.7839975661697598, "grad_norm": 1.7320616245269775, "learning_rate": 5.928122812702963e-07, "loss": 0.3732, "step": 14660 }, { "epoch": 1.7841192576817768, "grad_norm": 2.0183608531951904, "learning_rate": 5.92150809149612e-07, "loss": 0.2761, "step": 14661 }, { "epoch": 1.7842409491937938, "grad_norm": 1.841736912727356, "learning_rate": 5.914896950170601e-07, "loss": 0.3946, "step": 14662 }, { "epoch": 1.7843626407058109, "grad_norm": 1.9585292339324951, "learning_rate": 5.908289388978028e-07, "loss": 0.3385, "step": 14663 }, { "epoch": 1.7844843322178279, "grad_norm": 2.909271001815796, "learning_rate": 5.901685408169777e-07, "loss": 0.415, "step": 14664 }, { "epoch": 1.784606023729845, "grad_norm": 2.813800096511841, "learning_rate": 5.895085007997192e-07, "loss": 0.3874, "step": 14665 }, { "epoch": 1.784727715241862, "grad_norm": 2.633601665496826, "learning_rate": 5.888488188711427e-07, "loss": 0.3921, "step": 14666 }, { "epoch": 1.784849406753879, "grad_norm": 2.4266695976257324, "learning_rate": 5.881894950563449e-07, "loss": 0.3867, "step": 14667 }, { "epoch": 1.784971098265896, "grad_norm": 1.7731428146362305, "learning_rate": 5.875305293804201e-07, "loss": 0.3018, "step": 14668 }, { "epoch": 1.785092789777913, "grad_norm": 2.9412267208099365, "learning_rate": 5.868719218684405e-07, "loss": 0.3444, "step": 14669 }, { "epoch": 1.78521448128993, "grad_norm": 3.0094337463378906, "learning_rate": 5.862136725454681e-07, "loss": 0.4223, "step": 14670 }, { "epoch": 1.785336172801947, "grad_norm": 1.6128140687942505, "learning_rate": 5.85555781436552e-07, "loss": 0.4037, "step": 14671 }, { "epoch": 1.785457864313964, "grad_norm": 1.832460641860962, "learning_rate": 5.848982485667232e-07, "loss": 0.3374, "step": 14672 }, { "epoch": 1.785579555825981, "grad_norm": 2.198385715484619, "learning_rate": 5.842410739610061e-07, "loss": 0.4153, "step": 14673 }, { "epoch": 1.785701247337998, "grad_norm": 1.7360938787460327, "learning_rate": 5.835842576444062e-07, "loss": 0.3957, "step": 14674 }, { "epoch": 1.785822938850015, "grad_norm": 1.9352519512176514, "learning_rate": 5.829277996419135e-07, "loss": 0.4056, "step": 14675 }, { "epoch": 1.785944630362032, "grad_norm": 1.5521093606948853, "learning_rate": 5.822716999785127e-07, "loss": 0.3601, "step": 14676 }, { "epoch": 1.7860663218740491, "grad_norm": 2.041236639022827, "learning_rate": 5.816159586791669e-07, "loss": 0.3897, "step": 14677 }, { "epoch": 1.7861880133860664, "grad_norm": 2.1369783878326416, "learning_rate": 5.809605757688297e-07, "loss": 0.4439, "step": 14678 }, { "epoch": 1.7863097048980834, "grad_norm": 1.8396812677383423, "learning_rate": 5.803055512724387e-07, "loss": 0.3837, "step": 14679 }, { "epoch": 1.7864313964101004, "grad_norm": 1.8973432779312134, "learning_rate": 5.796508852149174e-07, "loss": 0.3535, "step": 14680 }, { "epoch": 1.7865530879221174, "grad_norm": 1.9612892866134644, "learning_rate": 5.789965776211804e-07, "loss": 0.3817, "step": 14681 }, { "epoch": 1.7866747794341344, "grad_norm": 2.2423877716064453, "learning_rate": 5.783426285161243e-07, "loss": 0.4269, "step": 14682 }, { "epoch": 1.7867964709461515, "grad_norm": 2.1847662925720215, "learning_rate": 5.776890379246303e-07, "loss": 0.3473, "step": 14683 }, { "epoch": 1.7869181624581687, "grad_norm": 2.904357433319092, "learning_rate": 5.770358058715742e-07, "loss": 0.3336, "step": 14684 }, { "epoch": 1.7870398539701857, "grad_norm": 2.066694498062134, "learning_rate": 5.763829323818104e-07, "loss": 0.367, "step": 14685 }, { "epoch": 1.7871615454822027, "grad_norm": 2.2317566871643066, "learning_rate": 5.757304174801792e-07, "loss": 0.321, "step": 14686 }, { "epoch": 1.7872832369942198, "grad_norm": 1.8779890537261963, "learning_rate": 5.750782611915151e-07, "loss": 0.3562, "step": 14687 }, { "epoch": 1.7874049285062368, "grad_norm": 1.9288618564605713, "learning_rate": 5.744264635406316e-07, "loss": 0.3415, "step": 14688 }, { "epoch": 1.7875266200182538, "grad_norm": 1.723334550857544, "learning_rate": 5.737750245523311e-07, "loss": 0.3908, "step": 14689 }, { "epoch": 1.7876483115302708, "grad_norm": 2.3007218837738037, "learning_rate": 5.731239442514014e-07, "loss": 0.3273, "step": 14690 }, { "epoch": 1.7877700030422878, "grad_norm": 1.4876885414123535, "learning_rate": 5.724732226626173e-07, "loss": 0.397, "step": 14691 }, { "epoch": 1.7878916945543049, "grad_norm": 2.6491878032684326, "learning_rate": 5.718228598107422e-07, "loss": 0.4083, "step": 14692 }, { "epoch": 1.7880133860663219, "grad_norm": 1.5451598167419434, "learning_rate": 5.711728557205232e-07, "loss": 0.3937, "step": 14693 }, { "epoch": 1.788135077578339, "grad_norm": 1.640594720840454, "learning_rate": 5.705232104166913e-07, "loss": 0.3476, "step": 14694 }, { "epoch": 1.788256769090356, "grad_norm": 1.6284178495407104, "learning_rate": 5.698739239239715e-07, "loss": 0.3906, "step": 14695 }, { "epoch": 1.788378460602373, "grad_norm": 1.477540373802185, "learning_rate": 5.692249962670671e-07, "loss": 0.3591, "step": 14696 }, { "epoch": 1.78850015211439, "grad_norm": 1.769839882850647, "learning_rate": 5.685764274706717e-07, "loss": 0.3395, "step": 14697 }, { "epoch": 1.788621843626407, "grad_norm": 2.4802558422088623, "learning_rate": 5.679282175594669e-07, "loss": 0.3165, "step": 14698 }, { "epoch": 1.788743535138424, "grad_norm": 1.9474537372589111, "learning_rate": 5.672803665581161e-07, "loss": 0.376, "step": 14699 }, { "epoch": 1.788865226650441, "grad_norm": 1.8837029933929443, "learning_rate": 5.666328744912708e-07, "loss": 0.3021, "step": 14700 }, { "epoch": 1.788986918162458, "grad_norm": 1.8293421268463135, "learning_rate": 5.659857413835735e-07, "loss": 0.376, "step": 14701 }, { "epoch": 1.789108609674475, "grad_norm": 2.456915855407715, "learning_rate": 5.653389672596421e-07, "loss": 0.4254, "step": 14702 }, { "epoch": 1.7892303011864923, "grad_norm": 2.629037618637085, "learning_rate": 5.646925521440949e-07, "loss": 0.3457, "step": 14703 }, { "epoch": 1.7893519926985093, "grad_norm": 1.7283847332000732, "learning_rate": 5.640464960615255e-07, "loss": 0.4401, "step": 14704 }, { "epoch": 1.7894736842105263, "grad_norm": 2.4867005348205566, "learning_rate": 5.634007990365165e-07, "loss": 0.3969, "step": 14705 }, { "epoch": 1.7895953757225433, "grad_norm": 2.7062764167785645, "learning_rate": 5.627554610936414e-07, "loss": 0.4235, "step": 14706 }, { "epoch": 1.7897170672345604, "grad_norm": 2.072197675704956, "learning_rate": 5.621104822574542e-07, "loss": 0.3981, "step": 14707 }, { "epoch": 1.7898387587465774, "grad_norm": 1.6879130601882935, "learning_rate": 5.614658625524983e-07, "loss": 0.3516, "step": 14708 }, { "epoch": 1.7899604502585946, "grad_norm": 3.3290631771087646, "learning_rate": 5.608216020033042e-07, "loss": 0.4062, "step": 14709 }, { "epoch": 1.7900821417706116, "grad_norm": 1.9005234241485596, "learning_rate": 5.601777006343856e-07, "loss": 0.3903, "step": 14710 }, { "epoch": 1.7902038332826287, "grad_norm": 1.6743403673171997, "learning_rate": 5.59534158470244e-07, "loss": 0.3932, "step": 14711 }, { "epoch": 1.7903255247946457, "grad_norm": 1.645206332206726, "learning_rate": 5.588909755353711e-07, "loss": 0.3814, "step": 14712 }, { "epoch": 1.7904472163066627, "grad_norm": 2.1297378540039062, "learning_rate": 5.58248151854236e-07, "loss": 0.4193, "step": 14713 }, { "epoch": 1.7905689078186797, "grad_norm": 1.9844765663146973, "learning_rate": 5.576056874513025e-07, "loss": 0.3002, "step": 14714 }, { "epoch": 1.7906905993306967, "grad_norm": 2.4359207153320312, "learning_rate": 5.569635823510178e-07, "loss": 0.3936, "step": 14715 }, { "epoch": 1.7908122908427138, "grad_norm": 3.388141632080078, "learning_rate": 5.563218365778134e-07, "loss": 0.3248, "step": 14716 }, { "epoch": 1.7909339823547308, "grad_norm": 2.2163338661193848, "learning_rate": 5.556804501561131e-07, "loss": 0.3437, "step": 14717 }, { "epoch": 1.7910556738667478, "grad_norm": 1.8829245567321777, "learning_rate": 5.550394231103185e-07, "loss": 0.4333, "step": 14718 }, { "epoch": 1.7911773653787648, "grad_norm": 1.7104471921920776, "learning_rate": 5.543987554648234e-07, "loss": 0.3525, "step": 14719 }, { "epoch": 1.7912990568907818, "grad_norm": 1.5799250602722168, "learning_rate": 5.537584472440083e-07, "loss": 0.3834, "step": 14720 }, { "epoch": 1.7914207484027989, "grad_norm": 1.6776342391967773, "learning_rate": 5.531184984722371e-07, "loss": 0.3475, "step": 14721 }, { "epoch": 1.7915424399148159, "grad_norm": 2.1516518592834473, "learning_rate": 5.524789091738592e-07, "loss": 0.3278, "step": 14722 }, { "epoch": 1.791664131426833, "grad_norm": 2.270328998565674, "learning_rate": 5.51839679373215e-07, "loss": 0.3314, "step": 14723 }, { "epoch": 1.79178582293885, "grad_norm": 1.5874958038330078, "learning_rate": 5.512008090946286e-07, "loss": 0.3335, "step": 14724 }, { "epoch": 1.791907514450867, "grad_norm": 1.5988296270370483, "learning_rate": 5.505622983624093e-07, "loss": 0.3681, "step": 14725 }, { "epoch": 1.792029205962884, "grad_norm": 2.482724905014038, "learning_rate": 5.499241472008532e-07, "loss": 0.3181, "step": 14726 }, { "epoch": 1.792150897474901, "grad_norm": 2.6830341815948486, "learning_rate": 5.49286355634242e-07, "loss": 0.3056, "step": 14727 }, { "epoch": 1.7922725889869182, "grad_norm": 1.734123945236206, "learning_rate": 5.486489236868497e-07, "loss": 0.3785, "step": 14728 }, { "epoch": 1.7923942804989352, "grad_norm": 2.384275197982788, "learning_rate": 5.48011851382928e-07, "loss": 0.3665, "step": 14729 }, { "epoch": 1.7925159720109523, "grad_norm": 1.6589666604995728, "learning_rate": 5.473751387467196e-07, "loss": 0.3716, "step": 14730 }, { "epoch": 1.7926376635229693, "grad_norm": 2.1683309078216553, "learning_rate": 5.467387858024532e-07, "loss": 0.3661, "step": 14731 }, { "epoch": 1.7927593550349863, "grad_norm": 1.929255723953247, "learning_rate": 5.461027925743422e-07, "loss": 0.3525, "step": 14732 }, { "epoch": 1.7928810465470033, "grad_norm": 3.0995941162109375, "learning_rate": 5.4546715908659e-07, "loss": 0.2643, "step": 14733 }, { "epoch": 1.7930027380590203, "grad_norm": 2.034583568572998, "learning_rate": 5.448318853633827e-07, "loss": 0.3769, "step": 14734 }, { "epoch": 1.7931244295710376, "grad_norm": 2.0144851207733154, "learning_rate": 5.441969714288919e-07, "loss": 0.3886, "step": 14735 }, { "epoch": 1.7932461210830546, "grad_norm": 1.7579948902130127, "learning_rate": 5.435624173072807e-07, "loss": 0.295, "step": 14736 }, { "epoch": 1.7933678125950716, "grad_norm": 1.5953644514083862, "learning_rate": 5.429282230226918e-07, "loss": 0.382, "step": 14737 }, { "epoch": 1.7934895041070886, "grad_norm": 1.584346890449524, "learning_rate": 5.422943885992582e-07, "loss": 0.3248, "step": 14738 }, { "epoch": 1.7936111956191056, "grad_norm": 1.7310121059417725, "learning_rate": 5.416609140611006e-07, "loss": 0.3619, "step": 14739 }, { "epoch": 1.7937328871311227, "grad_norm": 3.3062760829925537, "learning_rate": 5.41027799432321e-07, "loss": 0.4008, "step": 14740 }, { "epoch": 1.7938545786431397, "grad_norm": 2.234459638595581, "learning_rate": 5.403950447370154e-07, "loss": 0.3899, "step": 14741 }, { "epoch": 1.7939762701551567, "grad_norm": 1.7101892232894897, "learning_rate": 5.39762649999258e-07, "loss": 0.3742, "step": 14742 }, { "epoch": 1.7940979616671737, "grad_norm": 3.6123085021972656, "learning_rate": 5.39130615243112e-07, "loss": 0.4417, "step": 14743 }, { "epoch": 1.7942196531791907, "grad_norm": 1.5546348094940186, "learning_rate": 5.3849894049263e-07, "loss": 0.381, "step": 14744 }, { "epoch": 1.7943413446912078, "grad_norm": 1.6390193700790405, "learning_rate": 5.378676257718474e-07, "loss": 0.3573, "step": 14745 }, { "epoch": 1.7944630362032248, "grad_norm": 1.231501817703247, "learning_rate": 5.372366711047871e-07, "loss": 0.3133, "step": 14746 }, { "epoch": 1.7945847277152418, "grad_norm": 1.6342763900756836, "learning_rate": 5.366060765154602e-07, "loss": 0.3301, "step": 14747 }, { "epoch": 1.7947064192272588, "grad_norm": 3.78139066696167, "learning_rate": 5.359758420278571e-07, "loss": 0.4606, "step": 14748 }, { "epoch": 1.7948281107392758, "grad_norm": 1.6679884195327759, "learning_rate": 5.353459676659645e-07, "loss": 0.347, "step": 14749 }, { "epoch": 1.7949498022512929, "grad_norm": 1.5740777254104614, "learning_rate": 5.347164534537486e-07, "loss": 0.3313, "step": 14750 }, { "epoch": 1.7950714937633099, "grad_norm": 1.7896395921707153, "learning_rate": 5.340872994151625e-07, "loss": 0.3734, "step": 14751 }, { "epoch": 1.795193185275327, "grad_norm": 3.9227335453033447, "learning_rate": 5.334585055741492e-07, "loss": 0.4533, "step": 14752 }, { "epoch": 1.795314876787344, "grad_norm": 1.646697998046875, "learning_rate": 5.328300719546342e-07, "loss": 0.3643, "step": 14753 }, { "epoch": 1.7954365682993612, "grad_norm": 1.4338403940200806, "learning_rate": 5.322019985805294e-07, "loss": 0.3384, "step": 14754 }, { "epoch": 1.7955582598113782, "grad_norm": 1.9854618310928345, "learning_rate": 5.315742854757378e-07, "loss": 0.4001, "step": 14755 }, { "epoch": 1.7956799513233952, "grad_norm": 1.8640873432159424, "learning_rate": 5.309469326641437e-07, "loss": 0.3835, "step": 14756 }, { "epoch": 1.7958016428354122, "grad_norm": 2.356013059616089, "learning_rate": 5.303199401696157e-07, "loss": 0.4477, "step": 14757 }, { "epoch": 1.7959233343474292, "grad_norm": 2.144286632537842, "learning_rate": 5.296933080160194e-07, "loss": 0.3604, "step": 14758 }, { "epoch": 1.7960450258594463, "grad_norm": 1.867668628692627, "learning_rate": 5.29067036227191e-07, "loss": 0.4201, "step": 14759 }, { "epoch": 1.7961667173714635, "grad_norm": 2.3393490314483643, "learning_rate": 5.284411248269683e-07, "loss": 0.2951, "step": 14760 }, { "epoch": 1.7962884088834805, "grad_norm": 2.8287689685821533, "learning_rate": 5.278155738391655e-07, "loss": 0.3541, "step": 14761 }, { "epoch": 1.7964101003954975, "grad_norm": 1.6722416877746582, "learning_rate": 5.271903832875847e-07, "loss": 0.3737, "step": 14762 }, { "epoch": 1.7965317919075146, "grad_norm": 1.562496304512024, "learning_rate": 5.265655531960189e-07, "loss": 0.3359, "step": 14763 }, { "epoch": 1.7966534834195316, "grad_norm": 2.3614540100097656, "learning_rate": 5.259410835882429e-07, "loss": 0.3255, "step": 14764 }, { "epoch": 1.7967751749315486, "grad_norm": 1.6277563571929932, "learning_rate": 5.253169744880182e-07, "loss": 0.3295, "step": 14765 }, { "epoch": 1.7968968664435656, "grad_norm": 2.1053812503814697, "learning_rate": 5.24693225919094e-07, "loss": 0.4114, "step": 14766 }, { "epoch": 1.7970185579555826, "grad_norm": 1.4443774223327637, "learning_rate": 5.240698379052067e-07, "loss": 0.3216, "step": 14767 }, { "epoch": 1.7971402494675996, "grad_norm": 3.4049203395843506, "learning_rate": 5.234468104700741e-07, "loss": 0.3822, "step": 14768 }, { "epoch": 1.7972619409796167, "grad_norm": 1.7134525775909424, "learning_rate": 5.228241436374071e-07, "loss": 0.3893, "step": 14769 }, { "epoch": 1.7973836324916337, "grad_norm": 1.5483367443084717, "learning_rate": 5.22201837430899e-07, "loss": 0.3281, "step": 14770 }, { "epoch": 1.7975053240036507, "grad_norm": 1.4085713624954224, "learning_rate": 5.215798918742276e-07, "loss": 0.3698, "step": 14771 }, { "epoch": 1.7976270155156677, "grad_norm": 1.5848411321640015, "learning_rate": 5.209583069910617e-07, "loss": 0.3478, "step": 14772 }, { "epoch": 1.7977487070276847, "grad_norm": 1.6619746685028076, "learning_rate": 5.203370828050503e-07, "loss": 0.3173, "step": 14773 }, { "epoch": 1.7978703985397018, "grad_norm": 1.9243499040603638, "learning_rate": 5.197162193398375e-07, "loss": 0.4045, "step": 14774 }, { "epoch": 1.7979920900517188, "grad_norm": 1.8584126234054565, "learning_rate": 5.190957166190447e-07, "loss": 0.3574, "step": 14775 }, { "epoch": 1.7981137815637358, "grad_norm": 1.852081537246704, "learning_rate": 5.184755746662828e-07, "loss": 0.3889, "step": 14776 }, { "epoch": 1.7982354730757528, "grad_norm": 1.581626057624817, "learning_rate": 5.178557935051532e-07, "loss": 0.3373, "step": 14777 }, { "epoch": 1.7983571645877698, "grad_norm": 1.7249395847320557, "learning_rate": 5.172363731592367e-07, "loss": 0.401, "step": 14778 }, { "epoch": 1.798478856099787, "grad_norm": 2.10213303565979, "learning_rate": 5.166173136521035e-07, "loss": 0.3693, "step": 14779 }, { "epoch": 1.798600547611804, "grad_norm": 1.5888627767562866, "learning_rate": 5.159986150073126e-07, "loss": 0.3742, "step": 14780 }, { "epoch": 1.7987222391238211, "grad_norm": 3.600254774093628, "learning_rate": 5.15380277248404e-07, "loss": 0.4371, "step": 14781 }, { "epoch": 1.7988439306358381, "grad_norm": 4.297314643859863, "learning_rate": 5.147623003989089e-07, "loss": 0.4079, "step": 14782 }, { "epoch": 1.7989656221478552, "grad_norm": 1.860015869140625, "learning_rate": 5.141446844823417e-07, "loss": 0.3244, "step": 14783 }, { "epoch": 1.7990873136598722, "grad_norm": 2.0766143798828125, "learning_rate": 5.135274295222026e-07, "loss": 0.4172, "step": 14784 }, { "epoch": 1.7992090051718894, "grad_norm": 3.5105597972869873, "learning_rate": 5.129105355419816e-07, "loss": 0.4489, "step": 14785 }, { "epoch": 1.7993306966839064, "grad_norm": 1.5098934173583984, "learning_rate": 5.122940025651523e-07, "loss": 0.3369, "step": 14786 }, { "epoch": 1.7994523881959235, "grad_norm": 3.6979687213897705, "learning_rate": 5.116778306151737e-07, "loss": 0.4223, "step": 14787 }, { "epoch": 1.7995740797079405, "grad_norm": 1.7551203966140747, "learning_rate": 5.110620197154947e-07, "loss": 0.3753, "step": 14788 }, { "epoch": 1.7996957712199575, "grad_norm": 1.981912612915039, "learning_rate": 5.104465698895456e-07, "loss": 0.3695, "step": 14789 }, { "epoch": 1.7998174627319745, "grad_norm": 1.7611268758773804, "learning_rate": 5.098314811607463e-07, "loss": 0.3461, "step": 14790 }, { "epoch": 1.7999391542439915, "grad_norm": 2.163121461868286, "learning_rate": 5.09216753552505e-07, "loss": 0.4022, "step": 14791 }, { "epoch": 1.8000608457560086, "grad_norm": 1.6740963459014893, "learning_rate": 5.086023870882084e-07, "loss": 0.3712, "step": 14792 }, { "epoch": 1.8001825372680256, "grad_norm": 1.6616315841674805, "learning_rate": 5.079883817912401e-07, "loss": 0.3612, "step": 14793 }, { "epoch": 1.8003042287800426, "grad_norm": 2.0691816806793213, "learning_rate": 5.073747376849602e-07, "loss": 0.3233, "step": 14794 }, { "epoch": 1.8004259202920596, "grad_norm": 1.9312821626663208, "learning_rate": 5.06761454792718e-07, "loss": 0.3963, "step": 14795 }, { "epoch": 1.8005476118040766, "grad_norm": 1.9697308540344238, "learning_rate": 5.061485331378546e-07, "loss": 0.3656, "step": 14796 }, { "epoch": 1.8006693033160937, "grad_norm": 1.6139159202575684, "learning_rate": 5.055359727436904e-07, "loss": 0.329, "step": 14797 }, { "epoch": 1.8007909948281107, "grad_norm": 2.5449719429016113, "learning_rate": 5.049237736335343e-07, "loss": 0.3022, "step": 14798 }, { "epoch": 1.8009126863401277, "grad_norm": 2.2044992446899414, "learning_rate": 5.043119358306825e-07, "loss": 0.3018, "step": 14799 }, { "epoch": 1.8010343778521447, "grad_norm": 2.2404911518096924, "learning_rate": 5.037004593584161e-07, "loss": 0.3494, "step": 14800 }, { "epoch": 1.8011560693641617, "grad_norm": 1.726481318473816, "learning_rate": 5.030893442400042e-07, "loss": 0.3636, "step": 14801 }, { "epoch": 1.8012777608761787, "grad_norm": 3.1688575744628906, "learning_rate": 5.024785904987006e-07, "loss": 0.5256, "step": 14802 }, { "epoch": 1.8013994523881958, "grad_norm": 1.8053702116012573, "learning_rate": 5.018681981577445e-07, "loss": 0.4006, "step": 14803 }, { "epoch": 1.801521143900213, "grad_norm": 2.033916711807251, "learning_rate": 5.012581672403649e-07, "loss": 0.3265, "step": 14804 }, { "epoch": 1.80164283541223, "grad_norm": 2.071986436843872, "learning_rate": 5.006484977697735e-07, "loss": 0.4078, "step": 14805 }, { "epoch": 1.801764526924247, "grad_norm": 1.6140072345733643, "learning_rate": 5.00039189769167e-07, "loss": 0.3562, "step": 14806 }, { "epoch": 1.801886218436264, "grad_norm": 1.711897373199463, "learning_rate": 4.994302432617348e-07, "loss": 0.4249, "step": 14807 }, { "epoch": 1.802007909948281, "grad_norm": 2.0763955116271973, "learning_rate": 4.988216582706451e-07, "loss": 0.3749, "step": 14808 }, { "epoch": 1.802129601460298, "grad_norm": 1.9720592498779297, "learning_rate": 4.982134348190604e-07, "loss": 0.4262, "step": 14809 }, { "epoch": 1.8022512929723153, "grad_norm": 1.66851007938385, "learning_rate": 4.976055729301211e-07, "loss": 0.3761, "step": 14810 }, { "epoch": 1.8023729844843324, "grad_norm": 1.9438172578811646, "learning_rate": 4.969980726269574e-07, "loss": 0.3898, "step": 14811 }, { "epoch": 1.8024946759963494, "grad_norm": 2.012744903564453, "learning_rate": 4.963909339326877e-07, "loss": 0.3273, "step": 14812 }, { "epoch": 1.8026163675083664, "grad_norm": 1.625656008720398, "learning_rate": 4.957841568704158e-07, "loss": 0.3645, "step": 14813 }, { "epoch": 1.8027380590203834, "grad_norm": 1.7917683124542236, "learning_rate": 4.951777414632275e-07, "loss": 0.4012, "step": 14814 }, { "epoch": 1.8028597505324004, "grad_norm": 1.8424171209335327, "learning_rate": 4.945716877342011e-07, "loss": 0.3865, "step": 14815 }, { "epoch": 1.8029814420444175, "grad_norm": 1.930957555770874, "learning_rate": 4.939659957063969e-07, "loss": 0.3136, "step": 14816 }, { "epoch": 1.8031031335564345, "grad_norm": 1.6637803316116333, "learning_rate": 4.933606654028633e-07, "loss": 0.3502, "step": 14817 }, { "epoch": 1.8032248250684515, "grad_norm": 2.7403173446655273, "learning_rate": 4.927556968466351e-07, "loss": 0.3471, "step": 14818 }, { "epoch": 1.8033465165804685, "grad_norm": 1.8273621797561646, "learning_rate": 4.921510900607285e-07, "loss": 0.3741, "step": 14819 }, { "epoch": 1.8034682080924855, "grad_norm": 1.7146549224853516, "learning_rate": 4.915468450681559e-07, "loss": 0.3427, "step": 14820 }, { "epoch": 1.8035898996045026, "grad_norm": 1.6572078466415405, "learning_rate": 4.909429618919059e-07, "loss": 0.3271, "step": 14821 }, { "epoch": 1.8037115911165196, "grad_norm": 1.6790177822113037, "learning_rate": 4.903394405549589e-07, "loss": 0.3641, "step": 14822 }, { "epoch": 1.8038332826285366, "grad_norm": 2.3604867458343506, "learning_rate": 4.897362810802808e-07, "loss": 0.4224, "step": 14823 }, { "epoch": 1.8039549741405536, "grad_norm": 3.3323137760162354, "learning_rate": 4.891334834908224e-07, "loss": 0.4482, "step": 14824 }, { "epoch": 1.8040766656525706, "grad_norm": 2.1242923736572266, "learning_rate": 4.885310478095196e-07, "loss": 0.3864, "step": 14825 }, { "epoch": 1.8041983571645877, "grad_norm": 2.0595483779907227, "learning_rate": 4.879289740592996e-07, "loss": 0.4129, "step": 14826 }, { "epoch": 1.8043200486766047, "grad_norm": 1.8756804466247559, "learning_rate": 4.873272622630709e-07, "loss": 0.3411, "step": 14827 }, { "epoch": 1.8044417401886217, "grad_norm": 2.0814154148101807, "learning_rate": 4.867259124437307e-07, "loss": 0.4108, "step": 14828 }, { "epoch": 1.804563431700639, "grad_norm": 1.3498259782791138, "learning_rate": 4.861249246241606e-07, "loss": 0.2879, "step": 14829 }, { "epoch": 1.804685123212656, "grad_norm": 2.0821359157562256, "learning_rate": 4.855242988272279e-07, "loss": 0.3517, "step": 14830 }, { "epoch": 1.804806814724673, "grad_norm": 1.918953776359558, "learning_rate": 4.849240350757911e-07, "loss": 0.3694, "step": 14831 }, { "epoch": 1.80492850623669, "grad_norm": 2.6444332599639893, "learning_rate": 4.843241333926907e-07, "loss": 0.4012, "step": 14832 }, { "epoch": 1.805050197748707, "grad_norm": 1.9890787601470947, "learning_rate": 4.837245938007518e-07, "loss": 0.3355, "step": 14833 }, { "epoch": 1.805171889260724, "grad_norm": 2.329502582550049, "learning_rate": 4.831254163227906e-07, "loss": 0.3957, "step": 14834 }, { "epoch": 1.805293580772741, "grad_norm": 2.8570895195007324, "learning_rate": 4.825266009816054e-07, "loss": 0.3137, "step": 14835 }, { "epoch": 1.8054152722847583, "grad_norm": 1.8884459733963013, "learning_rate": 4.819281477999826e-07, "loss": 0.3874, "step": 14836 }, { "epoch": 1.8055369637967753, "grad_norm": 2.1398398876190186, "learning_rate": 4.813300568006973e-07, "loss": 0.3895, "step": 14837 }, { "epoch": 1.8056586553087923, "grad_norm": 4.1433186531066895, "learning_rate": 4.807323280065046e-07, "loss": 0.4288, "step": 14838 }, { "epoch": 1.8057803468208093, "grad_norm": 1.590999960899353, "learning_rate": 4.801349614401518e-07, "loss": 0.3796, "step": 14839 }, { "epoch": 1.8059020383328264, "grad_norm": 3.455644369125366, "learning_rate": 4.795379571243675e-07, "loss": 0.2993, "step": 14840 }, { "epoch": 1.8060237298448434, "grad_norm": 2.1806375980377197, "learning_rate": 4.7894131508187e-07, "loss": 0.3579, "step": 14841 }, { "epoch": 1.8061454213568604, "grad_norm": 1.8905553817749023, "learning_rate": 4.783450353353636e-07, "loss": 0.3729, "step": 14842 }, { "epoch": 1.8062671128688774, "grad_norm": 1.8146286010742188, "learning_rate": 4.777491179075378e-07, "loss": 0.3684, "step": 14843 }, { "epoch": 1.8063888043808944, "grad_norm": 2.0119943618774414, "learning_rate": 4.771535628210655e-07, "loss": 0.3966, "step": 14844 }, { "epoch": 1.8065104958929115, "grad_norm": 2.6113085746765137, "learning_rate": 4.7655837009861427e-07, "loss": 0.4367, "step": 14845 }, { "epoch": 1.8066321874049285, "grad_norm": 2.5403854846954346, "learning_rate": 4.7596353976282814e-07, "loss": 0.3695, "step": 14846 }, { "epoch": 1.8067538789169455, "grad_norm": 2.138422727584839, "learning_rate": 4.753690718363424e-07, "loss": 0.294, "step": 14847 }, { "epoch": 1.8068755704289625, "grad_norm": 3.077648162841797, "learning_rate": 4.7477496634178e-07, "loss": 0.3872, "step": 14848 }, { "epoch": 1.8069972619409795, "grad_norm": 1.725256085395813, "learning_rate": 4.741812233017462e-07, "loss": 0.3501, "step": 14849 }, { "epoch": 1.8071189534529966, "grad_norm": 1.8914635181427002, "learning_rate": 4.7358784273883407e-07, "loss": 0.3695, "step": 14850 }, { "epoch": 1.8072406449650136, "grad_norm": 1.6651238203048706, "learning_rate": 4.729948246756222e-07, "loss": 0.3612, "step": 14851 }, { "epoch": 1.8073623364770306, "grad_norm": 1.5391969680786133, "learning_rate": 4.7240216913467697e-07, "loss": 0.3509, "step": 14852 }, { "epoch": 1.8074840279890476, "grad_norm": 1.8276183605194092, "learning_rate": 4.7180987613855147e-07, "loss": 0.3932, "step": 14853 }, { "epoch": 1.8076057195010649, "grad_norm": 1.6572011709213257, "learning_rate": 4.712179457097821e-07, "loss": 0.3907, "step": 14854 }, { "epoch": 1.8077274110130819, "grad_norm": 2.9096972942352295, "learning_rate": 4.706263778708919e-07, "loss": 0.3413, "step": 14855 }, { "epoch": 1.807849102525099, "grad_norm": 2.8286430835723877, "learning_rate": 4.7003517264439413e-07, "loss": 0.4235, "step": 14856 }, { "epoch": 1.807970794037116, "grad_norm": 1.8517764806747437, "learning_rate": 4.69444330052784e-07, "loss": 0.3491, "step": 14857 }, { "epoch": 1.808092485549133, "grad_norm": 1.8784458637237549, "learning_rate": 4.6885385011854243e-07, "loss": 0.3562, "step": 14858 }, { "epoch": 1.80821417706115, "grad_norm": 2.420989751815796, "learning_rate": 4.682637328641426e-07, "loss": 0.43, "step": 14859 }, { "epoch": 1.808335868573167, "grad_norm": 2.6593501567840576, "learning_rate": 4.6767397831203543e-07, "loss": 0.2992, "step": 14860 }, { "epoch": 1.8084575600851842, "grad_norm": 3.873929977416992, "learning_rate": 4.6708458648466625e-07, "loss": 0.4299, "step": 14861 }, { "epoch": 1.8085792515972012, "grad_norm": 2.11767840385437, "learning_rate": 4.664955574044616e-07, "loss": 0.4236, "step": 14862 }, { "epoch": 1.8087009431092183, "grad_norm": 1.478190541267395, "learning_rate": 4.6590689109383136e-07, "loss": 0.3178, "step": 14863 }, { "epoch": 1.8088226346212353, "grad_norm": 1.6360585689544678, "learning_rate": 4.653185875751798e-07, "loss": 0.3348, "step": 14864 }, { "epoch": 1.8089443261332523, "grad_norm": 2.4978084564208984, "learning_rate": 4.647306468708912e-07, "loss": 0.3962, "step": 14865 }, { "epoch": 1.8090660176452693, "grad_norm": 2.796816825866699, "learning_rate": 4.641430690033377e-07, "loss": 0.292, "step": 14866 }, { "epoch": 1.8091877091572863, "grad_norm": 1.680859923362732, "learning_rate": 4.635558539948803e-07, "loss": 0.3859, "step": 14867 }, { "epoch": 1.8093094006693033, "grad_norm": 2.0321059226989746, "learning_rate": 4.629690018678601e-07, "loss": 0.3246, "step": 14868 }, { "epoch": 1.8094310921813204, "grad_norm": 1.5931979417800903, "learning_rate": 4.6238251264461134e-07, "loss": 0.3724, "step": 14869 }, { "epoch": 1.8095527836933374, "grad_norm": 1.7058771848678589, "learning_rate": 4.617963863474495e-07, "loss": 0.3151, "step": 14870 }, { "epoch": 1.8096744752053544, "grad_norm": 1.4641793966293335, "learning_rate": 4.612106229986768e-07, "loss": 0.3245, "step": 14871 }, { "epoch": 1.8097961667173714, "grad_norm": 1.888519525527954, "learning_rate": 4.606252226205854e-07, "loss": 0.3844, "step": 14872 }, { "epoch": 1.8099178582293884, "grad_norm": 1.5693230628967285, "learning_rate": 4.6004018523545077e-07, "loss": 0.374, "step": 14873 }, { "epoch": 1.8100395497414055, "grad_norm": 1.6680448055267334, "learning_rate": 4.5945551086553297e-07, "loss": 0.3775, "step": 14874 }, { "epoch": 1.8101612412534225, "grad_norm": 2.2927894592285156, "learning_rate": 4.588711995330808e-07, "loss": 0.3812, "step": 14875 }, { "epoch": 1.8102829327654395, "grad_norm": 2.147362232208252, "learning_rate": 4.5828725126032646e-07, "loss": 0.3567, "step": 14876 }, { "epoch": 1.8104046242774565, "grad_norm": 2.65213942527771, "learning_rate": 4.5770366606949443e-07, "loss": 0.3201, "step": 14877 }, { "epoch": 1.8105263157894735, "grad_norm": 2.5378568172454834, "learning_rate": 4.5712044398279033e-07, "loss": 0.316, "step": 14878 }, { "epoch": 1.8106480073014906, "grad_norm": 2.001706838607788, "learning_rate": 4.56537585022403e-07, "loss": 0.3454, "step": 14879 }, { "epoch": 1.8107696988135078, "grad_norm": 1.8229354619979858, "learning_rate": 4.559550892105169e-07, "loss": 0.3525, "step": 14880 }, { "epoch": 1.8108913903255248, "grad_norm": 1.6335780620574951, "learning_rate": 4.5537295656929435e-07, "loss": 0.3956, "step": 14881 }, { "epoch": 1.8110130818375418, "grad_norm": 2.341587543487549, "learning_rate": 4.5479118712088656e-07, "loss": 0.3871, "step": 14882 }, { "epoch": 1.8111347733495589, "grad_norm": 2.632615089416504, "learning_rate": 4.5420978088743127e-07, "loss": 0.3112, "step": 14883 }, { "epoch": 1.8112564648615759, "grad_norm": 1.8785203695297241, "learning_rate": 4.536287378910542e-07, "loss": 0.4, "step": 14884 }, { "epoch": 1.811378156373593, "grad_norm": 1.6285902261734009, "learning_rate": 4.530480581538632e-07, "loss": 0.3821, "step": 14885 }, { "epoch": 1.8114998478856101, "grad_norm": 1.6032397747039795, "learning_rate": 4.524677416979539e-07, "loss": 0.3828, "step": 14886 }, { "epoch": 1.8116215393976272, "grad_norm": 1.7675167322158813, "learning_rate": 4.518877885454087e-07, "loss": 0.3975, "step": 14887 }, { "epoch": 1.8117432309096442, "grad_norm": 3.0974245071411133, "learning_rate": 4.5130819871829766e-07, "loss": 0.3921, "step": 14888 }, { "epoch": 1.8118649224216612, "grad_norm": 2.473191976547241, "learning_rate": 4.507289722386743e-07, "loss": 0.3729, "step": 14889 }, { "epoch": 1.8119866139336782, "grad_norm": 1.5676767826080322, "learning_rate": 4.501501091285787e-07, "loss": 0.3165, "step": 14890 }, { "epoch": 1.8121083054456952, "grad_norm": 1.580743432044983, "learning_rate": 4.4957160941003884e-07, "loss": 0.3757, "step": 14891 }, { "epoch": 1.8122299969577123, "grad_norm": 1.9960728883743286, "learning_rate": 4.4899347310506824e-07, "loss": 0.3213, "step": 14892 }, { "epoch": 1.8123516884697293, "grad_norm": 2.9530646800994873, "learning_rate": 4.4841570023566374e-07, "loss": 0.4389, "step": 14893 }, { "epoch": 1.8124733799817463, "grad_norm": 2.1559576988220215, "learning_rate": 4.4783829082381435e-07, "loss": 0.458, "step": 14894 }, { "epoch": 1.8125950714937633, "grad_norm": 1.826980471611023, "learning_rate": 4.472612448914904e-07, "loss": 0.3778, "step": 14895 }, { "epoch": 1.8127167630057803, "grad_norm": 1.6393816471099854, "learning_rate": 4.466845624606464e-07, "loss": 0.3268, "step": 14896 }, { "epoch": 1.8128384545177973, "grad_norm": 1.5732077360153198, "learning_rate": 4.461082435532327e-07, "loss": 0.3603, "step": 14897 }, { "epoch": 1.8129601460298144, "grad_norm": 1.806353211402893, "learning_rate": 4.455322881911728e-07, "loss": 0.3734, "step": 14898 }, { "epoch": 1.8130818375418314, "grad_norm": 1.83968186378479, "learning_rate": 4.4495669639638803e-07, "loss": 0.3991, "step": 14899 }, { "epoch": 1.8132035290538484, "grad_norm": 1.6772584915161133, "learning_rate": 4.4438146819077765e-07, "loss": 0.3343, "step": 14900 }, { "epoch": 1.8133252205658654, "grad_norm": 1.7154022455215454, "learning_rate": 4.4380660359623074e-07, "loss": 0.3976, "step": 14901 }, { "epoch": 1.8134469120778824, "grad_norm": 1.4220342636108398, "learning_rate": 4.432321026346242e-07, "loss": 0.3425, "step": 14902 }, { "epoch": 1.8135686035898995, "grad_norm": 2.66960072517395, "learning_rate": 4.4265796532781737e-07, "loss": 0.3369, "step": 14903 }, { "epoch": 1.8136902951019165, "grad_norm": 2.686824321746826, "learning_rate": 4.4208419169765705e-07, "loss": 0.3861, "step": 14904 }, { "epoch": 1.8138119866139337, "grad_norm": 1.9637571573257446, "learning_rate": 4.4151078176597694e-07, "loss": 0.3985, "step": 14905 }, { "epoch": 1.8139336781259507, "grad_norm": 2.498554229736328, "learning_rate": 4.409377355545974e-07, "loss": 0.3809, "step": 14906 }, { "epoch": 1.8140553696379678, "grad_norm": 3.8453824520111084, "learning_rate": 4.4036505308532207e-07, "loss": 0.3014, "step": 14907 }, { "epoch": 1.8141770611499848, "grad_norm": 1.4222275018692017, "learning_rate": 4.3979273437994575e-07, "loss": 0.3955, "step": 14908 }, { "epoch": 1.8142987526620018, "grad_norm": 2.0273361206054688, "learning_rate": 4.3922077946024213e-07, "loss": 0.4061, "step": 14909 }, { "epoch": 1.8144204441740188, "grad_norm": 1.940845251083374, "learning_rate": 4.3864918834797934e-07, "loss": 0.4214, "step": 14910 }, { "epoch": 1.814542135686036, "grad_norm": 2.002195119857788, "learning_rate": 4.380779610649055e-07, "loss": 0.3415, "step": 14911 }, { "epoch": 1.814663827198053, "grad_norm": 1.8036713600158691, "learning_rate": 4.3750709763275554e-07, "loss": 0.3641, "step": 14912 }, { "epoch": 1.81478551871007, "grad_norm": 1.459445834159851, "learning_rate": 4.369365980732565e-07, "loss": 0.3373, "step": 14913 }, { "epoch": 1.8149072102220871, "grad_norm": 1.7466773986816406, "learning_rate": 4.363664624081132e-07, "loss": 0.3968, "step": 14914 }, { "epoch": 1.8150289017341041, "grad_norm": 2.2615108489990234, "learning_rate": 4.357966906590205e-07, "loss": 0.3276, "step": 14915 }, { "epoch": 1.8151505932461212, "grad_norm": 1.3322075605392456, "learning_rate": 4.3522728284766224e-07, "loss": 0.3451, "step": 14916 }, { "epoch": 1.8152722847581382, "grad_norm": 2.68404221534729, "learning_rate": 4.3465823899570323e-07, "loss": 0.4284, "step": 14917 }, { "epoch": 1.8153939762701552, "grad_norm": 3.1832666397094727, "learning_rate": 4.340895591247962e-07, "loss": 0.3442, "step": 14918 }, { "epoch": 1.8155156677821722, "grad_norm": 1.6485973596572876, "learning_rate": 4.3352124325658385e-07, "loss": 0.4397, "step": 14919 }, { "epoch": 1.8156373592941892, "grad_norm": 2.054135322570801, "learning_rate": 4.3295329141268885e-07, "loss": 0.3407, "step": 14920 }, { "epoch": 1.8157590508062063, "grad_norm": 2.019174098968506, "learning_rate": 4.323857036147239e-07, "loss": 0.3445, "step": 14921 }, { "epoch": 1.8158807423182233, "grad_norm": 1.528096079826355, "learning_rate": 4.3181847988428726e-07, "loss": 0.357, "step": 14922 }, { "epoch": 1.8160024338302403, "grad_norm": 1.6700773239135742, "learning_rate": 4.3125162024296065e-07, "loss": 0.359, "step": 14923 }, { "epoch": 1.8161241253422573, "grad_norm": 1.8885477781295776, "learning_rate": 4.306851247123178e-07, "loss": 0.4057, "step": 14924 }, { "epoch": 1.8162458168542743, "grad_norm": 1.8417980670928955, "learning_rate": 4.3011899331391273e-07, "loss": 0.3414, "step": 14925 }, { "epoch": 1.8163675083662914, "grad_norm": 2.0142858028411865, "learning_rate": 4.2955322606928807e-07, "loss": 0.4231, "step": 14926 }, { "epoch": 1.8164891998783084, "grad_norm": 1.8136248588562012, "learning_rate": 4.2898782299997445e-07, "loss": 0.393, "step": 14927 }, { "epoch": 1.8166108913903254, "grad_norm": 1.5395504236221313, "learning_rate": 4.284227841274824e-07, "loss": 0.3433, "step": 14928 }, { "epoch": 1.8167325829023424, "grad_norm": 1.5359646081924438, "learning_rate": 4.278581094733181e-07, "loss": 0.3847, "step": 14929 }, { "epoch": 1.8168542744143596, "grad_norm": 1.5286551713943481, "learning_rate": 4.272937990589654e-07, "loss": 0.3432, "step": 14930 }, { "epoch": 1.8169759659263767, "grad_norm": 2.710171699523926, "learning_rate": 4.267298529058983e-07, "loss": 0.428, "step": 14931 }, { "epoch": 1.8170976574383937, "grad_norm": 1.830812692642212, "learning_rate": 4.2616627103557516e-07, "loss": 0.3591, "step": 14932 }, { "epoch": 1.8172193489504107, "grad_norm": 1.8068969249725342, "learning_rate": 4.256030534694422e-07, "loss": 0.3809, "step": 14933 }, { "epoch": 1.8173410404624277, "grad_norm": 1.635646939277649, "learning_rate": 4.250402002289311e-07, "loss": 0.3444, "step": 14934 }, { "epoch": 1.8174627319744447, "grad_norm": 1.634432077407837, "learning_rate": 4.2447771133546037e-07, "loss": 0.3948, "step": 14935 }, { "epoch": 1.8175844234864618, "grad_norm": 1.7847291231155396, "learning_rate": 4.2391558681043057e-07, "loss": 0.3804, "step": 14936 }, { "epoch": 1.817706114998479, "grad_norm": 2.725579261779785, "learning_rate": 4.233538266752357e-07, "loss": 0.3558, "step": 14937 }, { "epoch": 1.817827806510496, "grad_norm": 1.476682424545288, "learning_rate": 4.22792430951251e-07, "loss": 0.3558, "step": 14938 }, { "epoch": 1.817949498022513, "grad_norm": 1.8366916179656982, "learning_rate": 4.2223139965983595e-07, "loss": 0.3411, "step": 14939 }, { "epoch": 1.81807118953453, "grad_norm": 2.21850848197937, "learning_rate": 4.216707328223424e-07, "loss": 0.4216, "step": 14940 }, { "epoch": 1.818192881046547, "grad_norm": 1.8436192274093628, "learning_rate": 4.2111043046010434e-07, "loss": 0.3422, "step": 14941 }, { "epoch": 1.818314572558564, "grad_norm": 2.9281909465789795, "learning_rate": 4.2055049259443923e-07, "loss": 0.3926, "step": 14942 }, { "epoch": 1.8184362640705811, "grad_norm": 2.58005690574646, "learning_rate": 4.1999091924666e-07, "loss": 0.3505, "step": 14943 }, { "epoch": 1.8185579555825981, "grad_norm": 2.444199562072754, "learning_rate": 4.19431710438053e-07, "loss": 0.3934, "step": 14944 }, { "epoch": 1.8186796470946152, "grad_norm": 1.4753987789154053, "learning_rate": 4.188728661899e-07, "loss": 0.3076, "step": 14945 }, { "epoch": 1.8188013386066322, "grad_norm": 1.6509904861450195, "learning_rate": 4.1831438652346733e-07, "loss": 0.3872, "step": 14946 }, { "epoch": 1.8189230301186492, "grad_norm": 2.4174916744232178, "learning_rate": 4.177562714600047e-07, "loss": 0.3958, "step": 14947 }, { "epoch": 1.8190447216306662, "grad_norm": 1.382598638534546, "learning_rate": 4.1719852102075076e-07, "loss": 0.326, "step": 14948 }, { "epoch": 1.8191664131426832, "grad_norm": 2.24420428276062, "learning_rate": 4.166411352269284e-07, "loss": 0.3767, "step": 14949 }, { "epoch": 1.8192881046547003, "grad_norm": 2.2212283611297607, "learning_rate": 4.160841140997451e-07, "loss": 0.3377, "step": 14950 }, { "epoch": 1.8194097961667173, "grad_norm": 1.5659985542297363, "learning_rate": 4.1552745766040073e-07, "loss": 0.3968, "step": 14951 }, { "epoch": 1.8195314876787343, "grad_norm": 1.841123104095459, "learning_rate": 4.149711659300759e-07, "loss": 0.3932, "step": 14952 }, { "epoch": 1.8196531791907513, "grad_norm": 2.78578782081604, "learning_rate": 4.1441523892993716e-07, "loss": 0.4348, "step": 14953 }, { "epoch": 1.8197748707027683, "grad_norm": 2.753040313720703, "learning_rate": 4.1385967668114093e-07, "loss": 0.4322, "step": 14954 }, { "epoch": 1.8198965622147856, "grad_norm": 3.5378448963165283, "learning_rate": 4.1330447920482466e-07, "loss": 0.2601, "step": 14955 }, { "epoch": 1.8200182537268026, "grad_norm": 1.5717259645462036, "learning_rate": 4.12749646522117e-07, "loss": 0.382, "step": 14956 }, { "epoch": 1.8201399452388196, "grad_norm": 1.9543402194976807, "learning_rate": 4.1219517865413006e-07, "loss": 0.3695, "step": 14957 }, { "epoch": 1.8202616367508366, "grad_norm": 2.4204328060150146, "learning_rate": 4.116410756219613e-07, "loss": 0.4302, "step": 14958 }, { "epoch": 1.8203833282628537, "grad_norm": 1.7716232538223267, "learning_rate": 4.1108733744669726e-07, "loss": 0.3662, "step": 14959 }, { "epoch": 1.8205050197748707, "grad_norm": 1.4448550939559937, "learning_rate": 4.105339641494077e-07, "loss": 0.3022, "step": 14960 }, { "epoch": 1.8206267112868877, "grad_norm": 2.1423330307006836, "learning_rate": 4.099809557511492e-07, "loss": 0.393, "step": 14961 }, { "epoch": 1.820748402798905, "grad_norm": 1.6046580076217651, "learning_rate": 4.0942831227296695e-07, "loss": 0.4013, "step": 14962 }, { "epoch": 1.820870094310922, "grad_norm": 1.9217573404312134, "learning_rate": 4.088760337358888e-07, "loss": 0.3932, "step": 14963 }, { "epoch": 1.820991785822939, "grad_norm": 1.4996533393859863, "learning_rate": 4.083241201609278e-07, "loss": 0.3465, "step": 14964 }, { "epoch": 1.821113477334956, "grad_norm": 1.8665032386779785, "learning_rate": 4.0777257156909053e-07, "loss": 0.3991, "step": 14965 }, { "epoch": 1.821235168846973, "grad_norm": 1.8192962408065796, "learning_rate": 4.072213879813602e-07, "loss": 0.3755, "step": 14966 }, { "epoch": 1.82135686035899, "grad_norm": 1.821222186088562, "learning_rate": 4.066705694187134e-07, "loss": 0.3722, "step": 14967 }, { "epoch": 1.821478551871007, "grad_norm": 2.230487108230591, "learning_rate": 4.0612011590210775e-07, "loss": 0.328, "step": 14968 }, { "epoch": 1.821600243383024, "grad_norm": 1.6029232740402222, "learning_rate": 4.0557002745248875e-07, "loss": 0.4133, "step": 14969 }, { "epoch": 1.821721934895041, "grad_norm": 3.296739101409912, "learning_rate": 4.0502030409079076e-07, "loss": 0.3287, "step": 14970 }, { "epoch": 1.821843626407058, "grad_norm": 1.5507313013076782, "learning_rate": 4.0447094583793143e-07, "loss": 0.3753, "step": 14971 }, { "epoch": 1.8219653179190751, "grad_norm": 1.5675057172775269, "learning_rate": 4.039219527148119e-07, "loss": 0.3921, "step": 14972 }, { "epoch": 1.8220870094310921, "grad_norm": 1.8070685863494873, "learning_rate": 4.033733247423266e-07, "loss": 0.353, "step": 14973 }, { "epoch": 1.8222087009431092, "grad_norm": 2.161892890930176, "learning_rate": 4.0282506194135096e-07, "loss": 0.317, "step": 14974 }, { "epoch": 1.8223303924551262, "grad_norm": 1.746654987335205, "learning_rate": 4.02277164332745e-07, "loss": 0.3619, "step": 14975 }, { "epoch": 1.8224520839671432, "grad_norm": 1.7208940982818604, "learning_rate": 4.017296319373598e-07, "loss": 0.3373, "step": 14976 }, { "epoch": 1.8225737754791602, "grad_norm": 2.248431444168091, "learning_rate": 4.0118246477602987e-07, "loss": 0.435, "step": 14977 }, { "epoch": 1.8226954669911772, "grad_norm": 1.7662694454193115, "learning_rate": 4.006356628695751e-07, "loss": 0.3162, "step": 14978 }, { "epoch": 1.8228171585031943, "grad_norm": 1.6861603260040283, "learning_rate": 4.000892262388045e-07, "loss": 0.3182, "step": 14979 }, { "epoch": 1.8229388500152113, "grad_norm": 1.6651843786239624, "learning_rate": 3.99543154904507e-07, "loss": 0.3371, "step": 14980 }, { "epoch": 1.8230605415272285, "grad_norm": 1.7793020009994507, "learning_rate": 3.989974488874659e-07, "loss": 0.332, "step": 14981 }, { "epoch": 1.8231822330392455, "grad_norm": 2.287501811981201, "learning_rate": 3.984521082084447e-07, "loss": 0.3918, "step": 14982 }, { "epoch": 1.8233039245512626, "grad_norm": 2.995178461074829, "learning_rate": 3.9790713288819337e-07, "loss": 0.3287, "step": 14983 }, { "epoch": 1.8234256160632796, "grad_norm": 2.0127651691436768, "learning_rate": 3.9736252294745315e-07, "loss": 0.4102, "step": 14984 }, { "epoch": 1.8235473075752966, "grad_norm": 1.4879025220870972, "learning_rate": 3.968182784069441e-07, "loss": 0.3306, "step": 14985 }, { "epoch": 1.8236689990873136, "grad_norm": 1.9716264009475708, "learning_rate": 3.962743992873763e-07, "loss": 0.3785, "step": 14986 }, { "epoch": 1.8237906905993309, "grad_norm": 1.5481476783752441, "learning_rate": 3.957308856094477e-07, "loss": 0.3375, "step": 14987 }, { "epoch": 1.8239123821113479, "grad_norm": 1.5322394371032715, "learning_rate": 3.951877373938373e-07, "loss": 0.3049, "step": 14988 }, { "epoch": 1.824034073623365, "grad_norm": 1.819868803024292, "learning_rate": 3.9464495466121633e-07, "loss": 0.3383, "step": 14989 }, { "epoch": 1.824155765135382, "grad_norm": 2.616342306137085, "learning_rate": 3.9410253743223605e-07, "loss": 0.3958, "step": 14990 }, { "epoch": 1.824277456647399, "grad_norm": 1.671526551246643, "learning_rate": 3.9356048572753655e-07, "loss": 0.3629, "step": 14991 }, { "epoch": 1.824399148159416, "grad_norm": 1.829897165298462, "learning_rate": 3.930187995677459e-07, "loss": 0.3209, "step": 14992 }, { "epoch": 1.824520839671433, "grad_norm": 1.5978320837020874, "learning_rate": 3.924774789734731e-07, "loss": 0.3816, "step": 14993 }, { "epoch": 1.82464253118345, "grad_norm": 2.3925981521606445, "learning_rate": 3.9193652396532056e-07, "loss": 0.3377, "step": 14994 }, { "epoch": 1.824764222695467, "grad_norm": 1.997206687927246, "learning_rate": 3.913959345638707e-07, "loss": 0.4114, "step": 14995 }, { "epoch": 1.824885914207484, "grad_norm": 1.9419773817062378, "learning_rate": 3.9085571078969374e-07, "loss": 0.3384, "step": 14996 }, { "epoch": 1.825007605719501, "grad_norm": 1.563836693763733, "learning_rate": 3.903158526633477e-07, "loss": 0.3305, "step": 14997 }, { "epoch": 1.825129297231518, "grad_norm": 3.358605146408081, "learning_rate": 3.897763602053739e-07, "loss": 0.2995, "step": 14998 }, { "epoch": 1.825250988743535, "grad_norm": 2.1497631072998047, "learning_rate": 3.8923723343630146e-07, "loss": 0.3611, "step": 14999 }, { "epoch": 1.825372680255552, "grad_norm": 1.9597254991531372, "learning_rate": 3.8869847237664627e-07, "loss": 0.332, "step": 15000 }, { "epoch": 1.8254943717675691, "grad_norm": 1.8248443603515625, "learning_rate": 3.881600770469074e-07, "loss": 0.3586, "step": 15001 }, { "epoch": 1.8256160632795861, "grad_norm": 1.559475064277649, "learning_rate": 3.8762204746757403e-07, "loss": 0.3672, "step": 15002 }, { "epoch": 1.8257377547916032, "grad_norm": 2.0782363414764404, "learning_rate": 3.870843836591176e-07, "loss": 0.3074, "step": 15003 }, { "epoch": 1.8258594463036202, "grad_norm": 2.658369302749634, "learning_rate": 3.865470856419973e-07, "loss": 0.405, "step": 15004 }, { "epoch": 1.8259811378156372, "grad_norm": 1.6301521062850952, "learning_rate": 3.8601015343666113e-07, "loss": 0.3411, "step": 15005 }, { "epoch": 1.8261028293276544, "grad_norm": 2.93886399269104, "learning_rate": 3.8547358706353733e-07, "loss": 0.4146, "step": 15006 }, { "epoch": 1.8262245208396715, "grad_norm": 2.226526975631714, "learning_rate": 3.849373865430439e-07, "loss": 0.4016, "step": 15007 }, { "epoch": 1.8263462123516885, "grad_norm": 2.2823421955108643, "learning_rate": 3.844015518955857e-07, "loss": 0.3723, "step": 15008 }, { "epoch": 1.8264679038637055, "grad_norm": 1.8886463642120361, "learning_rate": 3.8386608314155195e-07, "loss": 0.4396, "step": 15009 }, { "epoch": 1.8265895953757225, "grad_norm": 1.7071367502212524, "learning_rate": 3.8333098030131745e-07, "loss": 0.3098, "step": 15010 }, { "epoch": 1.8267112868877395, "grad_norm": 1.6018060445785522, "learning_rate": 3.827962433952448e-07, "loss": 0.3382, "step": 15011 }, { "epoch": 1.8268329783997568, "grad_norm": 2.620126485824585, "learning_rate": 3.8226187244368216e-07, "loss": 0.3337, "step": 15012 }, { "epoch": 1.8269546699117738, "grad_norm": 1.7114201784133911, "learning_rate": 3.8172786746696223e-07, "loss": 0.3633, "step": 15013 }, { "epoch": 1.8270763614237908, "grad_norm": 1.8939762115478516, "learning_rate": 3.8119422848540644e-07, "loss": 0.391, "step": 15014 }, { "epoch": 1.8271980529358078, "grad_norm": 2.332385540008545, "learning_rate": 3.806609555193186e-07, "loss": 0.4033, "step": 15015 }, { "epoch": 1.8273197444478249, "grad_norm": 2.4864838123321533, "learning_rate": 3.8012804858899353e-07, "loss": 0.4233, "step": 15016 }, { "epoch": 1.8274414359598419, "grad_norm": 1.6785238981246948, "learning_rate": 3.7959550771470845e-07, "loss": 0.367, "step": 15017 }, { "epoch": 1.827563127471859, "grad_norm": 1.452405571937561, "learning_rate": 3.7906333291672594e-07, "loss": 0.3055, "step": 15018 }, { "epoch": 1.827684818983876, "grad_norm": 1.5213143825531006, "learning_rate": 3.785315242152998e-07, "loss": 0.2946, "step": 15019 }, { "epoch": 1.827806510495893, "grad_norm": 2.9879708290100098, "learning_rate": 3.7800008163066394e-07, "loss": 0.4212, "step": 15020 }, { "epoch": 1.82792820200791, "grad_norm": 1.889747142791748, "learning_rate": 3.7746900518303984e-07, "loss": 0.3937, "step": 15021 }, { "epoch": 1.828049893519927, "grad_norm": 1.8190747499465942, "learning_rate": 3.769382948926392e-07, "loss": 0.3118, "step": 15022 }, { "epoch": 1.828171585031944, "grad_norm": 1.568833351135254, "learning_rate": 3.7640795077965475e-07, "loss": 0.2836, "step": 15023 }, { "epoch": 1.828293276543961, "grad_norm": 1.5308839082717896, "learning_rate": 3.7587797286426806e-07, "loss": 0.3454, "step": 15024 }, { "epoch": 1.828414968055978, "grad_norm": 2.0651838779449463, "learning_rate": 3.753483611666453e-07, "loss": 0.3839, "step": 15025 }, { "epoch": 1.828536659567995, "grad_norm": 1.687839388847351, "learning_rate": 3.748191157069381e-07, "loss": 0.314, "step": 15026 }, { "epoch": 1.828658351080012, "grad_norm": 1.7700896263122559, "learning_rate": 3.74290236505287e-07, "loss": 0.3191, "step": 15027 }, { "epoch": 1.828780042592029, "grad_norm": 1.647594928741455, "learning_rate": 3.7376172358181696e-07, "loss": 0.3375, "step": 15028 }, { "epoch": 1.828901734104046, "grad_norm": 2.0445783138275146, "learning_rate": 3.7323357695663754e-07, "loss": 0.3821, "step": 15029 }, { "epoch": 1.8290234256160631, "grad_norm": 1.649852991104126, "learning_rate": 3.727057966498482e-07, "loss": 0.3384, "step": 15030 }, { "epoch": 1.8291451171280804, "grad_norm": 3.663666009902954, "learning_rate": 3.721783826815306e-07, "loss": 0.4125, "step": 15031 }, { "epoch": 1.8292668086400974, "grad_norm": 1.822206974029541, "learning_rate": 3.716513350717532e-07, "loss": 0.3325, "step": 15032 }, { "epoch": 1.8293885001521144, "grad_norm": 1.4554885625839233, "learning_rate": 3.7112465384057215e-07, "loss": 0.3452, "step": 15033 }, { "epoch": 1.8295101916641314, "grad_norm": 1.5985687971115112, "learning_rate": 3.705983390080303e-07, "loss": 0.3472, "step": 15034 }, { "epoch": 1.8296318831761484, "grad_norm": 1.9971174001693726, "learning_rate": 3.7007239059415165e-07, "loss": 0.3875, "step": 15035 }, { "epoch": 1.8297535746881655, "grad_norm": 1.4277081489562988, "learning_rate": 3.695468086189524e-07, "loss": 0.3395, "step": 15036 }, { "epoch": 1.8298752662001825, "grad_norm": 2.9293782711029053, "learning_rate": 3.690215931024299e-07, "loss": 0.4317, "step": 15037 }, { "epoch": 1.8299969577121997, "grad_norm": 2.684494733810425, "learning_rate": 3.684967440645715e-07, "loss": 0.3208, "step": 15038 }, { "epoch": 1.8301186492242167, "grad_norm": 1.6981792449951172, "learning_rate": 3.679722615253489e-07, "loss": 0.3645, "step": 15039 }, { "epoch": 1.8302403407362338, "grad_norm": 1.5855158567428589, "learning_rate": 3.674481455047174e-07, "loss": 0.3293, "step": 15040 }, { "epoch": 1.8303620322482508, "grad_norm": 2.08746337890625, "learning_rate": 3.669243960226232e-07, "loss": 0.3535, "step": 15041 }, { "epoch": 1.8304837237602678, "grad_norm": 2.050413131713867, "learning_rate": 3.6640101309899477e-07, "loss": 0.4106, "step": 15042 }, { "epoch": 1.8306054152722848, "grad_norm": 1.5070345401763916, "learning_rate": 3.6587799675374844e-07, "loss": 0.3533, "step": 15043 }, { "epoch": 1.8307271067843018, "grad_norm": 2.5825185775756836, "learning_rate": 3.653553470067861e-07, "loss": 0.3073, "step": 15044 }, { "epoch": 1.8308487982963189, "grad_norm": 1.5110516548156738, "learning_rate": 3.6483306387799397e-07, "loss": 0.3057, "step": 15045 }, { "epoch": 1.8309704898083359, "grad_norm": 2.3110103607177734, "learning_rate": 3.643111473872496e-07, "loss": 0.3908, "step": 15046 }, { "epoch": 1.831092181320353, "grad_norm": 3.019033193588257, "learning_rate": 3.6378959755441035e-07, "loss": 0.324, "step": 15047 }, { "epoch": 1.83121387283237, "grad_norm": 1.8984547853469849, "learning_rate": 3.6326841439932147e-07, "loss": 0.3604, "step": 15048 }, { "epoch": 1.831335564344387, "grad_norm": 2.5512828826904297, "learning_rate": 3.627475979418182e-07, "loss": 0.4046, "step": 15049 }, { "epoch": 1.831457255856404, "grad_norm": 1.5460147857666016, "learning_rate": 3.622271482017159e-07, "loss": 0.3642, "step": 15050 }, { "epoch": 1.831578947368421, "grad_norm": 2.112250804901123, "learning_rate": 3.617070651988186e-07, "loss": 0.3605, "step": 15051 }, { "epoch": 1.831700638880438, "grad_norm": 2.0232927799224854, "learning_rate": 3.611873489529194e-07, "loss": 0.393, "step": 15052 }, { "epoch": 1.831822330392455, "grad_norm": 2.1189792156219482, "learning_rate": 3.606679994837903e-07, "loss": 0.3369, "step": 15053 }, { "epoch": 1.831944021904472, "grad_norm": 2.586393356323242, "learning_rate": 3.601490168111987e-07, "loss": 0.3743, "step": 15054 }, { "epoch": 1.832065713416489, "grad_norm": 1.8087722063064575, "learning_rate": 3.596304009548901e-07, "loss": 0.3956, "step": 15055 }, { "epoch": 1.8321874049285063, "grad_norm": 1.9052002429962158, "learning_rate": 3.591121519345975e-07, "loss": 0.4009, "step": 15056 }, { "epoch": 1.8323090964405233, "grad_norm": 3.31994891166687, "learning_rate": 3.585942697700451e-07, "loss": 0.4156, "step": 15057 }, { "epoch": 1.8324307879525403, "grad_norm": 1.8642053604125977, "learning_rate": 3.580767544809383e-07, "loss": 0.3964, "step": 15058 }, { "epoch": 1.8325524794645573, "grad_norm": 2.8211753368377686, "learning_rate": 3.575596060869657e-07, "loss": 0.3874, "step": 15059 }, { "epoch": 1.8326741709765744, "grad_norm": 1.7911968231201172, "learning_rate": 3.5704282460781057e-07, "loss": 0.3658, "step": 15060 }, { "epoch": 1.8327958624885914, "grad_norm": 1.4197808504104614, "learning_rate": 3.5652641006313384e-07, "loss": 0.3368, "step": 15061 }, { "epoch": 1.8329175540006084, "grad_norm": 1.7975486516952515, "learning_rate": 3.560103624725908e-07, "loss": 0.3851, "step": 15062 }, { "epoch": 1.8330392455126256, "grad_norm": 4.038433074951172, "learning_rate": 3.554946818558136e-07, "loss": 0.2721, "step": 15063 }, { "epoch": 1.8331609370246427, "grad_norm": 1.4514188766479492, "learning_rate": 3.5497936823242653e-07, "loss": 0.3719, "step": 15064 }, { "epoch": 1.8332826285366597, "grad_norm": 2.8439102172851562, "learning_rate": 3.544644216220405e-07, "loss": 0.3495, "step": 15065 }, { "epoch": 1.8334043200486767, "grad_norm": 1.6029181480407715, "learning_rate": 3.5394984204424776e-07, "loss": 0.3445, "step": 15066 }, { "epoch": 1.8335260115606937, "grad_norm": 1.7452284097671509, "learning_rate": 3.534356295186281e-07, "loss": 0.3849, "step": 15067 }, { "epoch": 1.8336477030727107, "grad_norm": 1.9027994871139526, "learning_rate": 3.529217840647514e-07, "loss": 0.3716, "step": 15068 }, { "epoch": 1.8337693945847278, "grad_norm": 2.0823395252227783, "learning_rate": 3.524083057021699e-07, "loss": 0.3348, "step": 15069 }, { "epoch": 1.8338910860967448, "grad_norm": 1.9788453578948975, "learning_rate": 3.518951944504212e-07, "loss": 0.3973, "step": 15070 }, { "epoch": 1.8340127776087618, "grad_norm": 1.5093154907226562, "learning_rate": 3.5138245032903086e-07, "loss": 0.4161, "step": 15071 }, { "epoch": 1.8341344691207788, "grad_norm": 2.215609312057495, "learning_rate": 3.508700733575088e-07, "loss": 0.3169, "step": 15072 }, { "epoch": 1.8342561606327958, "grad_norm": 1.928064227104187, "learning_rate": 3.5035806355535385e-07, "loss": 0.3305, "step": 15073 }, { "epoch": 1.8343778521448129, "grad_norm": 3.4645094871520996, "learning_rate": 3.4984642094204824e-07, "loss": 0.4295, "step": 15074 }, { "epoch": 1.8344995436568299, "grad_norm": 2.571255922317505, "learning_rate": 3.493351455370597e-07, "loss": 0.3183, "step": 15075 }, { "epoch": 1.834621235168847, "grad_norm": 1.6098570823669434, "learning_rate": 3.4882423735984716e-07, "loss": 0.3828, "step": 15076 }, { "epoch": 1.834742926680864, "grad_norm": 1.7994794845581055, "learning_rate": 3.4831369642984724e-07, "loss": 0.381, "step": 15077 }, { "epoch": 1.834864618192881, "grad_norm": 1.593248724937439, "learning_rate": 3.4780352276649e-07, "loss": 0.3426, "step": 15078 }, { "epoch": 1.834986309704898, "grad_norm": 2.4551877975463867, "learning_rate": 3.472937163891876e-07, "loss": 0.3939, "step": 15079 }, { "epoch": 1.835108001216915, "grad_norm": 2.4965217113494873, "learning_rate": 3.467842773173391e-07, "loss": 0.386, "step": 15080 }, { "epoch": 1.835229692728932, "grad_norm": 2.558504581451416, "learning_rate": 3.4627520557033e-07, "loss": 0.3221, "step": 15081 }, { "epoch": 1.8353513842409492, "grad_norm": 1.6214685440063477, "learning_rate": 3.4576650116753265e-07, "loss": 0.3636, "step": 15082 }, { "epoch": 1.8354730757529663, "grad_norm": 1.7680699825286865, "learning_rate": 3.4525816412830036e-07, "loss": 0.3357, "step": 15083 }, { "epoch": 1.8355947672649833, "grad_norm": 1.8350796699523926, "learning_rate": 3.447501944719811e-07, "loss": 0.3534, "step": 15084 }, { "epoch": 1.8357164587770003, "grad_norm": 1.7724733352661133, "learning_rate": 3.4424259221790155e-07, "loss": 0.3712, "step": 15085 }, { "epoch": 1.8358381502890173, "grad_norm": 2.4058008193969727, "learning_rate": 3.4373535738537746e-07, "loss": 0.3934, "step": 15086 }, { "epoch": 1.8359598418010343, "grad_norm": 1.8062288761138916, "learning_rate": 3.4322848999371106e-07, "loss": 0.4319, "step": 15087 }, { "epoch": 1.8360815333130516, "grad_norm": 1.8181830644607544, "learning_rate": 3.427219900621892e-07, "loss": 0.3181, "step": 15088 }, { "epoch": 1.8362032248250686, "grad_norm": 1.898027777671814, "learning_rate": 3.422158576100842e-07, "loss": 0.357, "step": 15089 }, { "epoch": 1.8363249163370856, "grad_norm": 2.4604787826538086, "learning_rate": 3.4171009265665633e-07, "loss": 0.307, "step": 15090 }, { "epoch": 1.8364466078491026, "grad_norm": 2.7061424255371094, "learning_rate": 3.412046952211523e-07, "loss": 0.3301, "step": 15091 }, { "epoch": 1.8365682993611196, "grad_norm": 1.8778622150421143, "learning_rate": 3.4069966532280116e-07, "loss": 0.3737, "step": 15092 }, { "epoch": 1.8366899908731367, "grad_norm": 1.9590085744857788, "learning_rate": 3.401950029808221e-07, "loss": 0.447, "step": 15093 }, { "epoch": 1.8368116823851537, "grad_norm": 1.6582603454589844, "learning_rate": 3.396907082144174e-07, "loss": 0.355, "step": 15094 }, { "epoch": 1.8369333738971707, "grad_norm": 2.134652614593506, "learning_rate": 3.3918678104277737e-07, "loss": 0.3619, "step": 15095 }, { "epoch": 1.8370550654091877, "grad_norm": 1.419028639793396, "learning_rate": 3.386832214850766e-07, "loss": 0.3579, "step": 15096 }, { "epoch": 1.8371767569212047, "grad_norm": 1.4091005325317383, "learning_rate": 3.381800295604765e-07, "loss": 0.3231, "step": 15097 }, { "epoch": 1.8372984484332218, "grad_norm": 1.621938943862915, "learning_rate": 3.376772052881261e-07, "loss": 0.3788, "step": 15098 }, { "epoch": 1.8374201399452388, "grad_norm": 2.6516916751861572, "learning_rate": 3.371747486871579e-07, "loss": 0.3172, "step": 15099 }, { "epoch": 1.8375418314572558, "grad_norm": 1.6465470790863037, "learning_rate": 3.3667265977669117e-07, "loss": 0.369, "step": 15100 }, { "epoch": 1.8376635229692728, "grad_norm": 2.082284688949585, "learning_rate": 3.361709385758316e-07, "loss": 0.3221, "step": 15101 }, { "epoch": 1.8377852144812898, "grad_norm": 1.710175633430481, "learning_rate": 3.3566958510367177e-07, "loss": 0.3632, "step": 15102 }, { "epoch": 1.8379069059933069, "grad_norm": 2.417053699493408, "learning_rate": 3.351685993792864e-07, "loss": 0.3403, "step": 15103 }, { "epoch": 1.8380285975053239, "grad_norm": 1.7789360284805298, "learning_rate": 3.3466798142174353e-07, "loss": 0.3055, "step": 15104 }, { "epoch": 1.838150289017341, "grad_norm": 2.2343626022338867, "learning_rate": 3.3416773125008797e-07, "loss": 0.31, "step": 15105 }, { "epoch": 1.838271980529358, "grad_norm": 1.6454588174819946, "learning_rate": 3.336678488833589e-07, "loss": 0.3604, "step": 15106 }, { "epoch": 1.8383936720413752, "grad_norm": 1.929332971572876, "learning_rate": 3.331683343405756e-07, "loss": 0.3654, "step": 15107 }, { "epoch": 1.8385153635533922, "grad_norm": 2.3010215759277344, "learning_rate": 3.3266918764074616e-07, "loss": 0.4226, "step": 15108 }, { "epoch": 1.8386370550654092, "grad_norm": 2.4542360305786133, "learning_rate": 3.3217040880286435e-07, "loss": 0.398, "step": 15109 }, { "epoch": 1.8387587465774262, "grad_norm": 2.573143243789673, "learning_rate": 3.316719978459104e-07, "loss": 0.4139, "step": 15110 }, { "epoch": 1.8388804380894432, "grad_norm": 2.1813714504241943, "learning_rate": 3.3117395478884815e-07, "loss": 0.3524, "step": 15111 }, { "epoch": 1.8390021296014603, "grad_norm": 1.7164812088012695, "learning_rate": 3.306762796506313e-07, "loss": 0.3183, "step": 15112 }, { "epoch": 1.8391238211134775, "grad_norm": 2.0599563121795654, "learning_rate": 3.3017897245019583e-07, "loss": 0.4082, "step": 15113 }, { "epoch": 1.8392455126254945, "grad_norm": 1.7602815628051758, "learning_rate": 3.2968203320646655e-07, "loss": 0.4196, "step": 15114 }, { "epoch": 1.8393672041375115, "grad_norm": 2.440704584121704, "learning_rate": 3.291854619383539e-07, "loss": 0.3172, "step": 15115 }, { "epoch": 1.8394888956495286, "grad_norm": 1.635888695716858, "learning_rate": 3.286892586647494e-07, "loss": 0.3669, "step": 15116 }, { "epoch": 1.8396105871615456, "grad_norm": 2.2009193897247314, "learning_rate": 3.2819342340453807e-07, "loss": 0.3385, "step": 15117 }, { "epoch": 1.8397322786735626, "grad_norm": 1.8456350564956665, "learning_rate": 3.2769795617658694e-07, "loss": 0.3331, "step": 15118 }, { "epoch": 1.8398539701855796, "grad_norm": 2.1818649768829346, "learning_rate": 3.2720285699974764e-07, "loss": 0.3314, "step": 15119 }, { "epoch": 1.8399756616975966, "grad_norm": 1.5128535032272339, "learning_rate": 3.2670812589286285e-07, "loss": 0.3716, "step": 15120 }, { "epoch": 1.8400973532096137, "grad_norm": 2.060521125793457, "learning_rate": 3.2621376287475416e-07, "loss": 0.3641, "step": 15121 }, { "epoch": 1.8402190447216307, "grad_norm": 1.9721227884292603, "learning_rate": 3.2571976796423767e-07, "loss": 0.3446, "step": 15122 }, { "epoch": 1.8403407362336477, "grad_norm": 2.0915703773498535, "learning_rate": 3.252261411801083e-07, "loss": 0.3902, "step": 15123 }, { "epoch": 1.8404624277456647, "grad_norm": 1.8351625204086304, "learning_rate": 3.2473288254114886e-07, "loss": 0.3585, "step": 15124 }, { "epoch": 1.8405841192576817, "grad_norm": 2.097517967224121, "learning_rate": 3.24239992066131e-07, "loss": 0.3783, "step": 15125 }, { "epoch": 1.8407058107696987, "grad_norm": 3.4559755325317383, "learning_rate": 3.2374746977380965e-07, "loss": 0.3982, "step": 15126 }, { "epoch": 1.8408275022817158, "grad_norm": 1.6628080606460571, "learning_rate": 3.2325531568292545e-07, "loss": 0.3737, "step": 15127 }, { "epoch": 1.8409491937937328, "grad_norm": 1.8398913145065308, "learning_rate": 3.227635298122056e-07, "loss": 0.3331, "step": 15128 }, { "epoch": 1.8410708853057498, "grad_norm": 1.7329350709915161, "learning_rate": 3.2227211218036404e-07, "loss": 0.371, "step": 15129 }, { "epoch": 1.8411925768177668, "grad_norm": 3.174161195755005, "learning_rate": 3.2178106280610135e-07, "loss": 0.4001, "step": 15130 }, { "epoch": 1.8413142683297838, "grad_norm": 1.6231906414031982, "learning_rate": 3.2129038170810143e-07, "loss": 0.3269, "step": 15131 }, { "epoch": 1.841435959841801, "grad_norm": 2.7355408668518066, "learning_rate": 3.208000689050361e-07, "loss": 0.3541, "step": 15132 }, { "epoch": 1.841557651353818, "grad_norm": 2.3104732036590576, "learning_rate": 3.2031012441556375e-07, "loss": 0.3618, "step": 15133 }, { "epoch": 1.8416793428658351, "grad_norm": 1.5579098463058472, "learning_rate": 3.198205482583261e-07, "loss": 0.3479, "step": 15134 }, { "epoch": 1.8418010343778521, "grad_norm": 2.145002841949463, "learning_rate": 3.193313404519538e-07, "loss": 0.366, "step": 15135 }, { "epoch": 1.8419227258898692, "grad_norm": 2.3516693115234375, "learning_rate": 3.18842501015062e-07, "loss": 0.2951, "step": 15136 }, { "epoch": 1.8420444174018862, "grad_norm": 1.9513347148895264, "learning_rate": 3.183540299662524e-07, "loss": 0.3569, "step": 15137 }, { "epoch": 1.8421661089139034, "grad_norm": 2.0186514854431152, "learning_rate": 3.1786592732411136e-07, "loss": 0.3985, "step": 15138 }, { "epoch": 1.8422878004259204, "grad_norm": 1.64278244972229, "learning_rate": 3.1737819310721395e-07, "loss": 0.3467, "step": 15139 }, { "epoch": 1.8424094919379375, "grad_norm": 2.21455979347229, "learning_rate": 3.168908273341154e-07, "loss": 0.4267, "step": 15140 }, { "epoch": 1.8425311834499545, "grad_norm": 1.6781272888183594, "learning_rate": 3.164038300233663e-07, "loss": 0.3837, "step": 15141 }, { "epoch": 1.8426528749619715, "grad_norm": 1.504429817199707, "learning_rate": 3.1591720119349525e-07, "loss": 0.3743, "step": 15142 }, { "epoch": 1.8427745664739885, "grad_norm": 1.5023126602172852, "learning_rate": 3.154309408630174e-07, "loss": 0.3861, "step": 15143 }, { "epoch": 1.8428962579860055, "grad_norm": 1.779693603515625, "learning_rate": 3.149450490504402e-07, "loss": 0.337, "step": 15144 }, { "epoch": 1.8430179494980226, "grad_norm": 2.134334087371826, "learning_rate": 3.144595257742511e-07, "loss": 0.404, "step": 15145 }, { "epoch": 1.8431396410100396, "grad_norm": 3.270294189453125, "learning_rate": 3.1397437105292307e-07, "loss": 0.3595, "step": 15146 }, { "epoch": 1.8432613325220566, "grad_norm": 2.270339250564575, "learning_rate": 3.134895849049213e-07, "loss": 0.271, "step": 15147 }, { "epoch": 1.8433830240340736, "grad_norm": 1.5615133047103882, "learning_rate": 3.1300516734869e-07, "loss": 0.364, "step": 15148 }, { "epoch": 1.8435047155460906, "grad_norm": 1.4227180480957031, "learning_rate": 3.125211184026622e-07, "loss": 0.3286, "step": 15149 }, { "epoch": 1.8436264070581077, "grad_norm": 2.8514857292175293, "learning_rate": 3.1203743808525975e-07, "loss": 0.3491, "step": 15150 }, { "epoch": 1.8437480985701247, "grad_norm": 1.4530268907546997, "learning_rate": 3.1155412641488356e-07, "loss": 0.3486, "step": 15151 }, { "epoch": 1.8438697900821417, "grad_norm": 1.465126872062683, "learning_rate": 3.1107118340992895e-07, "loss": 0.3542, "step": 15152 }, { "epoch": 1.8439914815941587, "grad_norm": 2.551419258117676, "learning_rate": 3.1058860908876997e-07, "loss": 0.3966, "step": 15153 }, { "epoch": 1.8441131731061757, "grad_norm": 2.1102793216705322, "learning_rate": 3.101064034697698e-07, "loss": 0.3871, "step": 15154 }, { "epoch": 1.8442348646181927, "grad_norm": 1.720428466796875, "learning_rate": 3.096245665712783e-07, "loss": 0.3529, "step": 15155 }, { "epoch": 1.8443565561302098, "grad_norm": 2.118997573852539, "learning_rate": 3.091430984116306e-07, "loss": 0.3602, "step": 15156 }, { "epoch": 1.844478247642227, "grad_norm": 1.8666397333145142, "learning_rate": 3.0866199900914553e-07, "loss": 0.3575, "step": 15157 }, { "epoch": 1.844599939154244, "grad_norm": 1.8927935361862183, "learning_rate": 3.081812683821328e-07, "loss": 0.3797, "step": 15158 }, { "epoch": 1.844721630666261, "grad_norm": 1.6338809728622437, "learning_rate": 3.077009065488834e-07, "loss": 0.4088, "step": 15159 }, { "epoch": 1.844843322178278, "grad_norm": 1.8532401323318481, "learning_rate": 3.072209135276749e-07, "loss": 0.3875, "step": 15160 }, { "epoch": 1.844965013690295, "grad_norm": 3.4864485263824463, "learning_rate": 3.0674128933677603e-07, "loss": 0.423, "step": 15161 }, { "epoch": 1.845086705202312, "grad_norm": 3.0972182750701904, "learning_rate": 3.062620339944344e-07, "loss": 0.362, "step": 15162 }, { "epoch": 1.8452083967143291, "grad_norm": 2.235102891921997, "learning_rate": 3.057831475188866e-07, "loss": 0.3345, "step": 15163 }, { "epoch": 1.8453300882263464, "grad_norm": 2.061497688293457, "learning_rate": 3.0530462992835687e-07, "loss": 0.3522, "step": 15164 }, { "epoch": 1.8454517797383634, "grad_norm": 2.01693058013916, "learning_rate": 3.0482648124105176e-07, "loss": 0.3951, "step": 15165 }, { "epoch": 1.8455734712503804, "grad_norm": 3.279359817504883, "learning_rate": 3.043487014751678e-07, "loss": 0.4126, "step": 15166 }, { "epoch": 1.8456951627623974, "grad_norm": 2.221430778503418, "learning_rate": 3.03871290648885e-07, "loss": 0.3545, "step": 15167 }, { "epoch": 1.8458168542744144, "grad_norm": 1.5250096321105957, "learning_rate": 3.033942487803676e-07, "loss": 0.3335, "step": 15168 }, { "epoch": 1.8459385457864315, "grad_norm": 1.7814027070999146, "learning_rate": 3.0291757588777116e-07, "loss": 0.3662, "step": 15169 }, { "epoch": 1.8460602372984485, "grad_norm": 1.7331829071044922, "learning_rate": 3.024412719892333e-07, "loss": 0.36, "step": 15170 }, { "epoch": 1.8461819288104655, "grad_norm": 1.3819475173950195, "learning_rate": 3.0196533710287636e-07, "loss": 0.3452, "step": 15171 }, { "epoch": 1.8463036203224825, "grad_norm": 2.0687620639801025, "learning_rate": 3.0148977124681343e-07, "loss": 0.3245, "step": 15172 }, { "epoch": 1.8464253118344995, "grad_norm": 2.9504642486572266, "learning_rate": 3.010145744391402e-07, "loss": 0.4151, "step": 15173 }, { "epoch": 1.8465470033465166, "grad_norm": 3.3617234230041504, "learning_rate": 3.005397466979376e-07, "loss": 0.3657, "step": 15174 }, { "epoch": 1.8466686948585336, "grad_norm": 2.615053415298462, "learning_rate": 3.0006528804127466e-07, "loss": 0.3949, "step": 15175 }, { "epoch": 1.8467903863705506, "grad_norm": 2.1150665283203125, "learning_rate": 2.995911984872035e-07, "loss": 0.3486, "step": 15176 }, { "epoch": 1.8469120778825676, "grad_norm": 1.580841302871704, "learning_rate": 2.9911747805376754e-07, "loss": 0.3805, "step": 15177 }, { "epoch": 1.8470337693945846, "grad_norm": 2.793536424636841, "learning_rate": 2.9864412675899125e-07, "loss": 0.2972, "step": 15178 }, { "epoch": 1.8471554609066017, "grad_norm": 2.1198718547821045, "learning_rate": 2.981711446208857e-07, "loss": 0.357, "step": 15179 }, { "epoch": 1.8472771524186187, "grad_norm": 1.786293864250183, "learning_rate": 2.9769853165744986e-07, "loss": 0.3824, "step": 15180 }, { "epoch": 1.8473988439306357, "grad_norm": 1.8318227529525757, "learning_rate": 2.972262878866683e-07, "loss": 0.3819, "step": 15181 }, { "epoch": 1.8475205354426527, "grad_norm": 1.9865673780441284, "learning_rate": 2.967544133265099e-07, "loss": 0.3809, "step": 15182 }, { "epoch": 1.84764222695467, "grad_norm": 1.9973698854446411, "learning_rate": 2.962829079949314e-07, "loss": 0.3838, "step": 15183 }, { "epoch": 1.847763918466687, "grad_norm": 1.7482609748840332, "learning_rate": 2.9581177190987185e-07, "loss": 0.3674, "step": 15184 }, { "epoch": 1.847885609978704, "grad_norm": 2.033256769180298, "learning_rate": 2.9534100508926355e-07, "loss": 0.4078, "step": 15185 }, { "epoch": 1.848007301490721, "grad_norm": 1.7239772081375122, "learning_rate": 2.948706075510166e-07, "loss": 0.4165, "step": 15186 }, { "epoch": 1.848128993002738, "grad_norm": 2.1953210830688477, "learning_rate": 2.9440057931303e-07, "loss": 0.4181, "step": 15187 }, { "epoch": 1.848250684514755, "grad_norm": 1.8499325513839722, "learning_rate": 2.9393092039319284e-07, "loss": 0.3764, "step": 15188 }, { "epoch": 1.8483723760267723, "grad_norm": 3.3107144832611084, "learning_rate": 2.93461630809373e-07, "loss": 0.3034, "step": 15189 }, { "epoch": 1.8484940675387893, "grad_norm": 1.5686134099960327, "learning_rate": 2.929927105794306e-07, "loss": 0.3414, "step": 15190 }, { "epoch": 1.8486157590508063, "grad_norm": 1.612724781036377, "learning_rate": 2.925241597212081e-07, "loss": 0.3761, "step": 15191 }, { "epoch": 1.8487374505628233, "grad_norm": 2.137622594833374, "learning_rate": 2.920559782525334e-07, "loss": 0.3147, "step": 15192 }, { "epoch": 1.8488591420748404, "grad_norm": 1.8977779150009155, "learning_rate": 2.9158816619122457e-07, "loss": 0.4063, "step": 15193 }, { "epoch": 1.8489808335868574, "grad_norm": 2.2496049404144287, "learning_rate": 2.911207235550806e-07, "loss": 0.4324, "step": 15194 }, { "epoch": 1.8491025250988744, "grad_norm": 1.6088521480560303, "learning_rate": 2.906536503618884e-07, "loss": 0.3934, "step": 15195 }, { "epoch": 1.8492242166108914, "grad_norm": 2.3476169109344482, "learning_rate": 2.901869466294249e-07, "loss": 0.3294, "step": 15196 }, { "epoch": 1.8493459081229084, "grad_norm": 1.5555016994476318, "learning_rate": 2.897206123754437e-07, "loss": 0.3347, "step": 15197 }, { "epoch": 1.8494675996349255, "grad_norm": 1.8243275880813599, "learning_rate": 2.8925464761769385e-07, "loss": 0.3261, "step": 15198 }, { "epoch": 1.8495892911469425, "grad_norm": 2.080986738204956, "learning_rate": 2.887890523739045e-07, "loss": 0.3702, "step": 15199 }, { "epoch": 1.8497109826589595, "grad_norm": 1.6361138820648193, "learning_rate": 2.883238266617916e-07, "loss": 0.3394, "step": 15200 }, { "epoch": 1.8498326741709765, "grad_norm": 1.9605023860931396, "learning_rate": 2.8785897049906086e-07, "loss": 0.4017, "step": 15201 }, { "epoch": 1.8499543656829935, "grad_norm": 2.717085123062134, "learning_rate": 2.873944839033982e-07, "loss": 0.3728, "step": 15202 }, { "epoch": 1.8500760571950106, "grad_norm": 1.5822906494140625, "learning_rate": 2.8693036689247943e-07, "loss": 0.3602, "step": 15203 }, { "epoch": 1.8501977487070276, "grad_norm": 1.9276150465011597, "learning_rate": 2.8646661948396494e-07, "loss": 0.3797, "step": 15204 }, { "epoch": 1.8503194402190446, "grad_norm": 1.3737303018569946, "learning_rate": 2.860032416955028e-07, "loss": 0.355, "step": 15205 }, { "epoch": 1.8504411317310616, "grad_norm": 1.8732149600982666, "learning_rate": 2.855402335447222e-07, "loss": 0.3691, "step": 15206 }, { "epoch": 1.8505628232430786, "grad_norm": 1.601396083831787, "learning_rate": 2.850775950492457e-07, "loss": 0.3833, "step": 15207 }, { "epoch": 1.8506845147550959, "grad_norm": 2.2502572536468506, "learning_rate": 2.8461532622667486e-07, "loss": 0.3758, "step": 15208 }, { "epoch": 1.850806206267113, "grad_norm": 3.461381196975708, "learning_rate": 2.8415342709460003e-07, "loss": 0.4597, "step": 15209 }, { "epoch": 1.85092789777913, "grad_norm": 1.6245616674423218, "learning_rate": 2.8369189767059823e-07, "loss": 0.3802, "step": 15210 }, { "epoch": 1.851049589291147, "grad_norm": 2.85707950592041, "learning_rate": 2.83230737972231e-07, "loss": 0.3331, "step": 15211 }, { "epoch": 1.851171280803164, "grad_norm": 3.1344103813171387, "learning_rate": 2.827699480170476e-07, "loss": 0.4383, "step": 15212 }, { "epoch": 1.851292972315181, "grad_norm": 1.7445528507232666, "learning_rate": 2.823095278225807e-07, "loss": 0.3499, "step": 15213 }, { "epoch": 1.8514146638271982, "grad_norm": 1.7127130031585693, "learning_rate": 2.818494774063507e-07, "loss": 0.376, "step": 15214 }, { "epoch": 1.8515363553392152, "grad_norm": 1.6919753551483154, "learning_rate": 2.813897967858636e-07, "loss": 0.3692, "step": 15215 }, { "epoch": 1.8516580468512323, "grad_norm": 2.6670053005218506, "learning_rate": 2.8093048597861107e-07, "loss": 0.3234, "step": 15216 }, { "epoch": 1.8517797383632493, "grad_norm": 2.696377992630005, "learning_rate": 2.8047154500207117e-07, "loss": 0.3818, "step": 15217 }, { "epoch": 1.8519014298752663, "grad_norm": 3.575444221496582, "learning_rate": 2.800129738737067e-07, "loss": 0.4169, "step": 15218 }, { "epoch": 1.8520231213872833, "grad_norm": 1.633947491645813, "learning_rate": 2.795547726109693e-07, "loss": 0.4023, "step": 15219 }, { "epoch": 1.8521448128993003, "grad_norm": 1.7125251293182373, "learning_rate": 2.790969412312927e-07, "loss": 0.3347, "step": 15220 }, { "epoch": 1.8522665044113173, "grad_norm": 2.236868143081665, "learning_rate": 2.7863947975209747e-07, "loss": 0.3169, "step": 15221 }, { "epoch": 1.8523881959233344, "grad_norm": 3.2324862480163574, "learning_rate": 2.781823881907919e-07, "loss": 0.3111, "step": 15222 }, { "epoch": 1.8525098874353514, "grad_norm": 1.803200364112854, "learning_rate": 2.7772566656477096e-07, "loss": 0.3723, "step": 15223 }, { "epoch": 1.8526315789473684, "grad_norm": 3.2707302570343018, "learning_rate": 2.7726931489141186e-07, "loss": 0.4346, "step": 15224 }, { "epoch": 1.8527532704593854, "grad_norm": 2.8390579223632812, "learning_rate": 2.7681333318807956e-07, "loss": 0.3423, "step": 15225 }, { "epoch": 1.8528749619714024, "grad_norm": 2.4574379920959473, "learning_rate": 2.763577214721269e-07, "loss": 0.3909, "step": 15226 }, { "epoch": 1.8529966534834195, "grad_norm": 1.9136743545532227, "learning_rate": 2.7590247976089004e-07, "loss": 0.3789, "step": 15227 }, { "epoch": 1.8531183449954365, "grad_norm": 1.6445471048355103, "learning_rate": 2.754476080716906e-07, "loss": 0.3508, "step": 15228 }, { "epoch": 1.8532400365074535, "grad_norm": 1.5453400611877441, "learning_rate": 2.7499310642183807e-07, "loss": 0.407, "step": 15229 }, { "epoch": 1.8533617280194705, "grad_norm": 2.1475095748901367, "learning_rate": 2.745389748286287e-07, "loss": 0.3421, "step": 15230 }, { "epoch": 1.8534834195314875, "grad_norm": 1.6891398429870605, "learning_rate": 2.740852133093419e-07, "loss": 0.3363, "step": 15231 }, { "epoch": 1.8536051110435046, "grad_norm": 3.554314136505127, "learning_rate": 2.7363182188124396e-07, "loss": 0.4546, "step": 15232 }, { "epoch": 1.8537268025555218, "grad_norm": 1.834157109260559, "learning_rate": 2.731788005615865e-07, "loss": 0.3502, "step": 15233 }, { "epoch": 1.8538484940675388, "grad_norm": 2.061800956726074, "learning_rate": 2.7272614936761033e-07, "loss": 0.3579, "step": 15234 }, { "epoch": 1.8539701855795558, "grad_norm": 1.362410306930542, "learning_rate": 2.7227386831653933e-07, "loss": 0.3333, "step": 15235 }, { "epoch": 1.8540918770915729, "grad_norm": 1.7776938676834106, "learning_rate": 2.718219574255809e-07, "loss": 0.3627, "step": 15236 }, { "epoch": 1.8542135686035899, "grad_norm": 1.718324899673462, "learning_rate": 2.7137041671193463e-07, "loss": 0.4081, "step": 15237 }, { "epoch": 1.854335260115607, "grad_norm": 1.710709810256958, "learning_rate": 2.709192461927801e-07, "loss": 0.3901, "step": 15238 }, { "epoch": 1.8544569516276241, "grad_norm": 4.159104347229004, "learning_rate": 2.7046844588528684e-07, "loss": 0.4716, "step": 15239 }, { "epoch": 1.8545786431396412, "grad_norm": 1.7879972457885742, "learning_rate": 2.700180158066079e-07, "loss": 0.3916, "step": 15240 }, { "epoch": 1.8547003346516582, "grad_norm": 1.9301176071166992, "learning_rate": 2.6956795597388396e-07, "loss": 0.3835, "step": 15241 }, { "epoch": 1.8548220261636752, "grad_norm": 1.9738837480545044, "learning_rate": 2.691182664042413e-07, "loss": 0.3659, "step": 15242 }, { "epoch": 1.8549437176756922, "grad_norm": 2.536100387573242, "learning_rate": 2.686689471147896e-07, "loss": 0.4327, "step": 15243 }, { "epoch": 1.8550654091877092, "grad_norm": 6.848067760467529, "learning_rate": 2.682199981226252e-07, "loss": 0.395, "step": 15244 }, { "epoch": 1.8551871006997263, "grad_norm": 1.7462328672409058, "learning_rate": 2.6777141944483553e-07, "loss": 0.3478, "step": 15245 }, { "epoch": 1.8553087922117433, "grad_norm": 2.3958640098571777, "learning_rate": 2.67323211098488e-07, "loss": 0.3869, "step": 15246 }, { "epoch": 1.8554304837237603, "grad_norm": 1.5834935903549194, "learning_rate": 2.6687537310063685e-07, "loss": 0.3268, "step": 15247 }, { "epoch": 1.8555521752357773, "grad_norm": 2.825343608856201, "learning_rate": 2.66427905468325e-07, "loss": 0.3079, "step": 15248 }, { "epoch": 1.8556738667477943, "grad_norm": 1.737581729888916, "learning_rate": 2.659808082185777e-07, "loss": 0.399, "step": 15249 }, { "epoch": 1.8557955582598114, "grad_norm": 2.0236270427703857, "learning_rate": 2.655340813684104e-07, "loss": 0.3246, "step": 15250 }, { "epoch": 1.8559172497718284, "grad_norm": 2.477104425430298, "learning_rate": 2.650877249348205e-07, "loss": 0.4306, "step": 15251 }, { "epoch": 1.8560389412838454, "grad_norm": 1.8327890634536743, "learning_rate": 2.6464173893479104e-07, "loss": 0.403, "step": 15252 }, { "epoch": 1.8561606327958624, "grad_norm": 1.4874261617660522, "learning_rate": 2.641961233852963e-07, "loss": 0.3513, "step": 15253 }, { "epoch": 1.8562823243078794, "grad_norm": 2.209660768508911, "learning_rate": 2.637508783032916e-07, "loss": 0.376, "step": 15254 }, { "epoch": 1.8564040158198964, "grad_norm": 1.6877037286758423, "learning_rate": 2.6330600370571667e-07, "loss": 0.352, "step": 15255 }, { "epoch": 1.8565257073319135, "grad_norm": 2.268897294998169, "learning_rate": 2.6286149960950357e-07, "loss": 0.3148, "step": 15256 }, { "epoch": 1.8566473988439305, "grad_norm": 1.7968549728393555, "learning_rate": 2.6241736603156433e-07, "loss": 0.3961, "step": 15257 }, { "epoch": 1.8567690903559477, "grad_norm": 3.1995365619659424, "learning_rate": 2.619736029888009e-07, "loss": 0.3272, "step": 15258 }, { "epoch": 1.8568907818679647, "grad_norm": 2.673677921295166, "learning_rate": 2.615302104980988e-07, "loss": 0.444, "step": 15259 }, { "epoch": 1.8570124733799818, "grad_norm": 2.7062039375305176, "learning_rate": 2.6108718857632776e-07, "loss": 0.395, "step": 15260 }, { "epoch": 1.8571341648919988, "grad_norm": 1.8717856407165527, "learning_rate": 2.6064453724034986e-07, "loss": 0.376, "step": 15261 }, { "epoch": 1.8572558564040158, "grad_norm": 2.5740532875061035, "learning_rate": 2.602022565070061e-07, "loss": 0.406, "step": 15262 }, { "epoch": 1.8573775479160328, "grad_norm": 2.5122294425964355, "learning_rate": 2.5976034639312533e-07, "loss": 0.3715, "step": 15263 }, { "epoch": 1.8574992394280498, "grad_norm": 2.4028637409210205, "learning_rate": 2.5931880691552613e-07, "loss": 0.3769, "step": 15264 }, { "epoch": 1.857620930940067, "grad_norm": 1.950917363166809, "learning_rate": 2.588776380910074e-07, "loss": 0.3324, "step": 15265 }, { "epoch": 1.857742622452084, "grad_norm": 1.5598349571228027, "learning_rate": 2.5843683993635903e-07, "loss": 0.3523, "step": 15266 }, { "epoch": 1.8578643139641011, "grad_norm": 2.1785759925842285, "learning_rate": 2.5799641246835206e-07, "loss": 0.3828, "step": 15267 }, { "epoch": 1.8579860054761181, "grad_norm": 1.9486876726150513, "learning_rate": 2.5755635570374525e-07, "loss": 0.3908, "step": 15268 }, { "epoch": 1.8581076969881352, "grad_norm": 1.6697043180465698, "learning_rate": 2.571166696592864e-07, "loss": 0.3804, "step": 15269 }, { "epoch": 1.8582293885001522, "grad_norm": 2.2978954315185547, "learning_rate": 2.5667735435170426e-07, "loss": 0.3852, "step": 15270 }, { "epoch": 1.8583510800121692, "grad_norm": 1.653773546218872, "learning_rate": 2.5623840979771665e-07, "loss": 0.3304, "step": 15271 }, { "epoch": 1.8584727715241862, "grad_norm": 2.2193028926849365, "learning_rate": 2.557998360140257e-07, "loss": 0.3923, "step": 15272 }, { "epoch": 1.8585944630362032, "grad_norm": 1.6757093667984009, "learning_rate": 2.553616330173214e-07, "loss": 0.3615, "step": 15273 }, { "epoch": 1.8587161545482203, "grad_norm": 1.5936883687973022, "learning_rate": 2.5492380082427495e-07, "loss": 0.3563, "step": 15274 }, { "epoch": 1.8588378460602373, "grad_norm": 1.9530621767044067, "learning_rate": 2.544863394515507e-07, "loss": 0.3807, "step": 15275 }, { "epoch": 1.8589595375722543, "grad_norm": 4.100805282592773, "learning_rate": 2.540492489157942e-07, "loss": 0.3468, "step": 15276 }, { "epoch": 1.8590812290842713, "grad_norm": 1.8061838150024414, "learning_rate": 2.5361252923363553e-07, "loss": 0.3802, "step": 15277 }, { "epoch": 1.8592029205962883, "grad_norm": 2.2591936588287354, "learning_rate": 2.531761804216948e-07, "loss": 0.3756, "step": 15278 }, { "epoch": 1.8593246121083054, "grad_norm": 2.2334301471710205, "learning_rate": 2.5274020249657414e-07, "loss": 0.3608, "step": 15279 }, { "epoch": 1.8594463036203224, "grad_norm": 1.7504310607910156, "learning_rate": 2.523045954748649e-07, "loss": 0.3622, "step": 15280 }, { "epoch": 1.8595679951323394, "grad_norm": 1.748343825340271, "learning_rate": 2.518693593731425e-07, "loss": 0.3355, "step": 15281 }, { "epoch": 1.8596896866443564, "grad_norm": 1.4133572578430176, "learning_rate": 2.5143449420796826e-07, "loss": 0.3691, "step": 15282 }, { "epoch": 1.8598113781563734, "grad_norm": 3.210618495941162, "learning_rate": 2.509999999958912e-07, "loss": 0.4337, "step": 15283 }, { "epoch": 1.8599330696683907, "grad_norm": 1.4826692342758179, "learning_rate": 2.5056587675344245e-07, "loss": 0.3766, "step": 15284 }, { "epoch": 1.8600547611804077, "grad_norm": 2.6870193481445312, "learning_rate": 2.5013212449714106e-07, "loss": 0.3954, "step": 15285 }, { "epoch": 1.8601764526924247, "grad_norm": 3.0309722423553467, "learning_rate": 2.49698743243495e-07, "loss": 0.3014, "step": 15286 }, { "epoch": 1.8602981442044417, "grad_norm": 2.19174861907959, "learning_rate": 2.4926573300899426e-07, "loss": 0.3673, "step": 15287 }, { "epoch": 1.8604198357164587, "grad_norm": 1.744202733039856, "learning_rate": 2.488330938101147e-07, "loss": 0.3411, "step": 15288 }, { "epoch": 1.8605415272284758, "grad_norm": 2.1084840297698975, "learning_rate": 2.484008256633197e-07, "loss": 0.384, "step": 15289 }, { "epoch": 1.860663218740493, "grad_norm": 1.4459794759750366, "learning_rate": 2.479689285850573e-07, "loss": 0.3464, "step": 15290 }, { "epoch": 1.86078491025251, "grad_norm": 2.2902674674987793, "learning_rate": 2.4753740259176427e-07, "loss": 0.4493, "step": 15291 }, { "epoch": 1.860906601764527, "grad_norm": 1.6270930767059326, "learning_rate": 2.471062476998587e-07, "loss": 0.3248, "step": 15292 }, { "epoch": 1.861028293276544, "grad_norm": 1.6411547660827637, "learning_rate": 2.466754639257474e-07, "loss": 0.3784, "step": 15293 }, { "epoch": 1.861149984788561, "grad_norm": 2.00333833694458, "learning_rate": 2.4624505128582276e-07, "loss": 0.3918, "step": 15294 }, { "epoch": 1.861271676300578, "grad_norm": 1.498854637145996, "learning_rate": 2.45815009796464e-07, "loss": 0.3572, "step": 15295 }, { "epoch": 1.8613933678125951, "grad_norm": 2.089965343475342, "learning_rate": 2.4538533947403355e-07, "loss": 0.3509, "step": 15296 }, { "epoch": 1.8615150593246121, "grad_norm": 1.6366585493087769, "learning_rate": 2.4495604033488165e-07, "loss": 0.3424, "step": 15297 }, { "epoch": 1.8616367508366292, "grad_norm": 1.997127652168274, "learning_rate": 2.445271123953452e-07, "loss": 0.3526, "step": 15298 }, { "epoch": 1.8617584423486462, "grad_norm": 2.454453706741333, "learning_rate": 2.4409855567174347e-07, "loss": 0.3492, "step": 15299 }, { "epoch": 1.8618801338606632, "grad_norm": 1.4662631750106812, "learning_rate": 2.4367037018038777e-07, "loss": 0.2955, "step": 15300 }, { "epoch": 1.8620018253726802, "grad_norm": 2.0737311840057373, "learning_rate": 2.4324255593756727e-07, "loss": 0.3533, "step": 15301 }, { "epoch": 1.8621235168846972, "grad_norm": 1.6240527629852295, "learning_rate": 2.428151129595624e-07, "loss": 0.3655, "step": 15302 }, { "epoch": 1.8622452083967143, "grad_norm": 1.8514087200164795, "learning_rate": 2.4238804126264006e-07, "loss": 0.3334, "step": 15303 }, { "epoch": 1.8623668999087313, "grad_norm": 1.7822703123092651, "learning_rate": 2.4196134086304835e-07, "loss": 0.3779, "step": 15304 }, { "epoch": 1.8624885914207483, "grad_norm": 1.6161086559295654, "learning_rate": 2.4153501177702656e-07, "loss": 0.395, "step": 15305 }, { "epoch": 1.8626102829327653, "grad_norm": 2.832211971282959, "learning_rate": 2.4110905402079723e-07, "loss": 0.4319, "step": 15306 }, { "epoch": 1.8627319744447823, "grad_norm": 1.7324323654174805, "learning_rate": 2.4068346761056627e-07, "loss": 0.3761, "step": 15307 }, { "epoch": 1.8628536659567994, "grad_norm": 1.9043869972229004, "learning_rate": 2.4025825256253186e-07, "loss": 0.374, "step": 15308 }, { "epoch": 1.8629753574688166, "grad_norm": 3.8759820461273193, "learning_rate": 2.398334088928711e-07, "loss": 0.3906, "step": 15309 }, { "epoch": 1.8630970489808336, "grad_norm": 1.8025131225585938, "learning_rate": 2.394089366177521e-07, "loss": 0.3806, "step": 15310 }, { "epoch": 1.8632187404928506, "grad_norm": 1.7823030948638916, "learning_rate": 2.3898483575332866e-07, "loss": 0.391, "step": 15311 }, { "epoch": 1.8633404320048677, "grad_norm": 1.5300712585449219, "learning_rate": 2.3856110631573336e-07, "loss": 0.375, "step": 15312 }, { "epoch": 1.8634621235168847, "grad_norm": 1.6364474296569824, "learning_rate": 2.3813774832109338e-07, "loss": 0.3543, "step": 15313 }, { "epoch": 1.8635838150289017, "grad_norm": 1.6200556755065918, "learning_rate": 2.377147617855191e-07, "loss": 0.3764, "step": 15314 }, { "epoch": 1.863705506540919, "grad_norm": 3.346451997756958, "learning_rate": 2.3729214672510436e-07, "loss": 0.4574, "step": 15315 }, { "epoch": 1.863827198052936, "grad_norm": 2.808462142944336, "learning_rate": 2.3686990315593071e-07, "loss": 0.4139, "step": 15316 }, { "epoch": 1.863948889564953, "grad_norm": 1.7323073148727417, "learning_rate": 2.3644803109406643e-07, "loss": 0.3828, "step": 15317 }, { "epoch": 1.86407058107697, "grad_norm": 1.712723731994629, "learning_rate": 2.3602653055556424e-07, "loss": 0.344, "step": 15318 }, { "epoch": 1.864192272588987, "grad_norm": 2.311335802078247, "learning_rate": 2.356054015564624e-07, "loss": 0.3344, "step": 15319 }, { "epoch": 1.864313964101004, "grad_norm": 1.6981480121612549, "learning_rate": 2.3518464411278586e-07, "loss": 0.3042, "step": 15320 }, { "epoch": 1.864435655613021, "grad_norm": 1.6295275688171387, "learning_rate": 2.3476425824054627e-07, "loss": 0.3374, "step": 15321 }, { "epoch": 1.864557347125038, "grad_norm": 2.2025394439697266, "learning_rate": 2.3434424395573973e-07, "loss": 0.3913, "step": 15322 }, { "epoch": 1.864679038637055, "grad_norm": 1.3795732259750366, "learning_rate": 2.3392460127434903e-07, "loss": 0.3406, "step": 15323 }, { "epoch": 1.864800730149072, "grad_norm": 1.7225010395050049, "learning_rate": 2.3350533021234134e-07, "loss": 0.3303, "step": 15324 }, { "epoch": 1.8649224216610891, "grad_norm": 1.5403178930282593, "learning_rate": 2.3308643078567062e-07, "loss": 0.397, "step": 15325 }, { "epoch": 1.8650441131731061, "grad_norm": 1.648912787437439, "learning_rate": 2.3266790301027853e-07, "loss": 0.311, "step": 15326 }, { "epoch": 1.8651658046851232, "grad_norm": 1.8623329401016235, "learning_rate": 2.3224974690209013e-07, "loss": 0.3852, "step": 15327 }, { "epoch": 1.8652874961971402, "grad_norm": 1.4475737810134888, "learning_rate": 2.318319624770149e-07, "loss": 0.3351, "step": 15328 }, { "epoch": 1.8654091877091572, "grad_norm": 1.4394919872283936, "learning_rate": 2.3141454975095457e-07, "loss": 0.3422, "step": 15329 }, { "epoch": 1.8655308792211742, "grad_norm": 1.9390974044799805, "learning_rate": 2.3099750873979088e-07, "loss": 0.3536, "step": 15330 }, { "epoch": 1.8656525707331912, "grad_norm": 1.6818997859954834, "learning_rate": 2.3058083945939114e-07, "loss": 0.3544, "step": 15331 }, { "epoch": 1.8657742622452083, "grad_norm": 1.7142422199249268, "learning_rate": 2.3016454192561267e-07, "loss": 0.3336, "step": 15332 }, { "epoch": 1.8658959537572253, "grad_norm": 1.8560435771942139, "learning_rate": 2.2974861615429612e-07, "loss": 0.3293, "step": 15333 }, { "epoch": 1.8660176452692425, "grad_norm": 2.3031299114227295, "learning_rate": 2.293330621612677e-07, "loss": 0.3405, "step": 15334 }, { "epoch": 1.8661393367812595, "grad_norm": 4.432735443115234, "learning_rate": 2.289178799623415e-07, "loss": 0.4603, "step": 15335 }, { "epoch": 1.8662610282932766, "grad_norm": 1.8172458410263062, "learning_rate": 2.285030695733137e-07, "loss": 0.3746, "step": 15336 }, { "epoch": 1.8663827198052936, "grad_norm": 2.4081640243530273, "learning_rate": 2.280886310099706e-07, "loss": 0.3503, "step": 15337 }, { "epoch": 1.8665044113173106, "grad_norm": 1.8210499286651611, "learning_rate": 2.2767456428808177e-07, "loss": 0.4008, "step": 15338 }, { "epoch": 1.8666261028293276, "grad_norm": 2.1988589763641357, "learning_rate": 2.2726086942340243e-07, "loss": 0.4072, "step": 15339 }, { "epoch": 1.8667477943413449, "grad_norm": 1.9303128719329834, "learning_rate": 2.2684754643167662e-07, "loss": 0.3619, "step": 15340 }, { "epoch": 1.8668694858533619, "grad_norm": 2.564828872680664, "learning_rate": 2.2643459532863067e-07, "loss": 0.3622, "step": 15341 }, { "epoch": 1.866991177365379, "grad_norm": 1.9748661518096924, "learning_rate": 2.2602201612997754e-07, "loss": 0.3693, "step": 15342 }, { "epoch": 1.867112868877396, "grad_norm": 1.621315360069275, "learning_rate": 2.2560980885141913e-07, "loss": 0.3503, "step": 15343 }, { "epoch": 1.867234560389413, "grad_norm": 1.4281065464019775, "learning_rate": 2.2519797350863848e-07, "loss": 0.3465, "step": 15344 }, { "epoch": 1.86735625190143, "grad_norm": 1.5208765268325806, "learning_rate": 2.2478651011730745e-07, "loss": 0.3136, "step": 15345 }, { "epoch": 1.867477943413447, "grad_norm": 1.624214768409729, "learning_rate": 2.2437541869308577e-07, "loss": 0.3631, "step": 15346 }, { "epoch": 1.867599634925464, "grad_norm": 2.243067979812622, "learning_rate": 2.239646992516109e-07, "loss": 0.4242, "step": 15347 }, { "epoch": 1.867721326437481, "grad_norm": 2.440727710723877, "learning_rate": 2.2355435180851592e-07, "loss": 0.3705, "step": 15348 }, { "epoch": 1.867843017949498, "grad_norm": 1.867019534111023, "learning_rate": 2.231443763794139e-07, "loss": 0.4287, "step": 15349 }, { "epoch": 1.867964709461515, "grad_norm": 1.9152992963790894, "learning_rate": 2.2273477297990453e-07, "loss": 0.3443, "step": 15350 }, { "epoch": 1.868086400973532, "grad_norm": 1.8000767230987549, "learning_rate": 2.223255416255754e-07, "loss": 0.3629, "step": 15351 }, { "epoch": 1.868208092485549, "grad_norm": 1.6828755140304565, "learning_rate": 2.2191668233199848e-07, "loss": 0.3457, "step": 15352 }, { "epoch": 1.868329783997566, "grad_norm": 7.77219820022583, "learning_rate": 2.215081951147302e-07, "loss": 0.3679, "step": 15353 }, { "epoch": 1.8684514755095831, "grad_norm": 1.7122520208358765, "learning_rate": 2.2110007998931705e-07, "loss": 0.3527, "step": 15354 }, { "epoch": 1.8685731670216001, "grad_norm": 1.5874251127243042, "learning_rate": 2.2069233697128657e-07, "loss": 0.3767, "step": 15355 }, { "epoch": 1.8686948585336172, "grad_norm": 1.8478678464889526, "learning_rate": 2.2028496607615303e-07, "loss": 0.3479, "step": 15356 }, { "epoch": 1.8688165500456342, "grad_norm": 2.899851083755493, "learning_rate": 2.1987796731942178e-07, "loss": 0.409, "step": 15357 }, { "epoch": 1.8689382415576512, "grad_norm": 1.7577241659164429, "learning_rate": 2.1947134071657607e-07, "loss": 0.3985, "step": 15358 }, { "epoch": 1.8690599330696684, "grad_norm": 1.868581771850586, "learning_rate": 2.190650862830912e-07, "loss": 0.3799, "step": 15359 }, { "epoch": 1.8691816245816855, "grad_norm": 1.7281532287597656, "learning_rate": 2.1865920403442598e-07, "loss": 0.3182, "step": 15360 }, { "epoch": 1.8693033160937025, "grad_norm": 2.024193525314331, "learning_rate": 2.1825369398602247e-07, "loss": 0.3637, "step": 15361 }, { "epoch": 1.8694250076057195, "grad_norm": 1.9359935522079468, "learning_rate": 2.178485561533139e-07, "loss": 0.3745, "step": 15362 }, { "epoch": 1.8695466991177365, "grad_norm": 3.233109712600708, "learning_rate": 2.174437905517157e-07, "loss": 0.2921, "step": 15363 }, { "epoch": 1.8696683906297535, "grad_norm": 2.986978054046631, "learning_rate": 2.1703939719663004e-07, "loss": 0.3269, "step": 15364 }, { "epoch": 1.8697900821417706, "grad_norm": 1.7951374053955078, "learning_rate": 2.1663537610344453e-07, "loss": 0.3309, "step": 15365 }, { "epoch": 1.8699117736537878, "grad_norm": 1.7709498405456543, "learning_rate": 2.1623172728753472e-07, "loss": 0.4393, "step": 15366 }, { "epoch": 1.8700334651658048, "grad_norm": 1.9826070070266724, "learning_rate": 2.158284507642572e-07, "loss": 0.4589, "step": 15367 }, { "epoch": 1.8701551566778218, "grad_norm": 1.6621332168579102, "learning_rate": 2.1542554654896076e-07, "loss": 0.3578, "step": 15368 }, { "epoch": 1.8702768481898389, "grad_norm": 1.561108946800232, "learning_rate": 2.150230146569743e-07, "loss": 0.3652, "step": 15369 }, { "epoch": 1.8703985397018559, "grad_norm": 1.57069730758667, "learning_rate": 2.1462085510361664e-07, "loss": 0.3255, "step": 15370 }, { "epoch": 1.870520231213873, "grad_norm": 1.6626564264297485, "learning_rate": 2.1421906790419001e-07, "loss": 0.3069, "step": 15371 }, { "epoch": 1.87064192272589, "grad_norm": 1.2337212562561035, "learning_rate": 2.1381765307398216e-07, "loss": 0.3105, "step": 15372 }, { "epoch": 1.870763614237907, "grad_norm": 2.4486405849456787, "learning_rate": 2.134166106282698e-07, "loss": 0.3505, "step": 15373 }, { "epoch": 1.870885305749924, "grad_norm": 1.6470117568969727, "learning_rate": 2.130159405823129e-07, "loss": 0.382, "step": 15374 }, { "epoch": 1.871006997261941, "grad_norm": 2.046447277069092, "learning_rate": 2.1261564295135596e-07, "loss": 0.3482, "step": 15375 }, { "epoch": 1.871128688773958, "grad_norm": 1.8801674842834473, "learning_rate": 2.1221571775063343e-07, "loss": 0.3365, "step": 15376 }, { "epoch": 1.871250380285975, "grad_norm": 2.0557637214660645, "learning_rate": 2.1181616499536206e-07, "loss": 0.342, "step": 15377 }, { "epoch": 1.871372071797992, "grad_norm": 2.9107978343963623, "learning_rate": 2.1141698470074746e-07, "loss": 0.3432, "step": 15378 }, { "epoch": 1.871493763310009, "grad_norm": 2.4534497261047363, "learning_rate": 2.1101817688197633e-07, "loss": 0.3972, "step": 15379 }, { "epoch": 1.871615454822026, "grad_norm": 1.4412715435028076, "learning_rate": 2.106197415542266e-07, "loss": 0.3127, "step": 15380 }, { "epoch": 1.871737146334043, "grad_norm": 2.2530510425567627, "learning_rate": 2.102216787326583e-07, "loss": 0.3331, "step": 15381 }, { "epoch": 1.87185883784606, "grad_norm": 1.7369531393051147, "learning_rate": 2.0982398843241937e-07, "loss": 0.3607, "step": 15382 }, { "epoch": 1.8719805293580771, "grad_norm": 1.5745741128921509, "learning_rate": 2.0942667066864099e-07, "loss": 0.3367, "step": 15383 }, { "epoch": 1.8721022208700941, "grad_norm": 2.732997417449951, "learning_rate": 2.090297254564444e-07, "loss": 0.3978, "step": 15384 }, { "epoch": 1.8722239123821114, "grad_norm": 1.5832599401474, "learning_rate": 2.0863315281093088e-07, "loss": 0.3261, "step": 15385 }, { "epoch": 1.8723456038941284, "grad_norm": 1.58500075340271, "learning_rate": 2.0823695274719503e-07, "loss": 0.3575, "step": 15386 }, { "epoch": 1.8724672954061454, "grad_norm": 1.656009316444397, "learning_rate": 2.078411252803103e-07, "loss": 0.3785, "step": 15387 }, { "epoch": 1.8725889869181624, "grad_norm": 3.3702714443206787, "learning_rate": 2.0744567042533914e-07, "loss": 0.2634, "step": 15388 }, { "epoch": 1.8727106784301795, "grad_norm": 2.3989360332489014, "learning_rate": 2.070505881973306e-07, "loss": 0.3286, "step": 15389 }, { "epoch": 1.8728323699421965, "grad_norm": 1.991233468055725, "learning_rate": 2.0665587861131707e-07, "loss": 0.3526, "step": 15390 }, { "epoch": 1.8729540614542137, "grad_norm": 1.6651912927627563, "learning_rate": 2.062615416823177e-07, "loss": 0.3437, "step": 15391 }, { "epoch": 1.8730757529662307, "grad_norm": 1.644325852394104, "learning_rate": 2.0586757742533937e-07, "loss": 0.3465, "step": 15392 }, { "epoch": 1.8731974444782478, "grad_norm": 2.5114777088165283, "learning_rate": 2.0547398585537225e-07, "loss": 0.413, "step": 15393 }, { "epoch": 1.8733191359902648, "grad_norm": 2.8682193756103516, "learning_rate": 2.0508076698739333e-07, "loss": 0.4729, "step": 15394 }, { "epoch": 1.8734408275022818, "grad_norm": 1.4600131511688232, "learning_rate": 2.0468792083636612e-07, "loss": 0.3513, "step": 15395 }, { "epoch": 1.8735625190142988, "grad_norm": 1.5856443643569946, "learning_rate": 2.042954474172376e-07, "loss": 0.3222, "step": 15396 }, { "epoch": 1.8736842105263158, "grad_norm": 2.255934953689575, "learning_rate": 2.0390334674494473e-07, "loss": 0.3912, "step": 15397 }, { "epoch": 1.8738059020383329, "grad_norm": 1.7907888889312744, "learning_rate": 2.035116188344055e-07, "loss": 0.3687, "step": 15398 }, { "epoch": 1.8739275935503499, "grad_norm": 1.64853036403656, "learning_rate": 2.0312026370052696e-07, "loss": 0.3127, "step": 15399 }, { "epoch": 1.874049285062367, "grad_norm": 1.8663063049316406, "learning_rate": 2.0272928135820047e-07, "loss": 0.3636, "step": 15400 }, { "epoch": 1.874170976574384, "grad_norm": 1.712429165840149, "learning_rate": 2.0233867182230527e-07, "loss": 0.341, "step": 15401 }, { "epoch": 1.874292668086401, "grad_norm": 1.8692585229873657, "learning_rate": 2.019484351077017e-07, "loss": 0.3972, "step": 15402 }, { "epoch": 1.874414359598418, "grad_norm": 2.110351324081421, "learning_rate": 2.0155857122924337e-07, "loss": 0.3629, "step": 15403 }, { "epoch": 1.874536051110435, "grad_norm": 1.7450706958770752, "learning_rate": 2.0116908020176073e-07, "loss": 0.4152, "step": 15404 }, { "epoch": 1.874657742622452, "grad_norm": 1.606277585029602, "learning_rate": 2.0077996204007855e-07, "loss": 0.4053, "step": 15405 }, { "epoch": 1.874779434134469, "grad_norm": 1.9735743999481201, "learning_rate": 2.0039121675900163e-07, "loss": 0.3732, "step": 15406 }, { "epoch": 1.874901125646486, "grad_norm": 2.281611442565918, "learning_rate": 2.0000284437332264e-07, "loss": 0.4196, "step": 15407 }, { "epoch": 1.875022817158503, "grad_norm": 2.0328779220581055, "learning_rate": 1.996148448978208e-07, "loss": 0.3523, "step": 15408 }, { "epoch": 1.87514450867052, "grad_norm": 1.740028738975525, "learning_rate": 1.9922721834725988e-07, "loss": 0.3739, "step": 15409 }, { "epoch": 1.8752662001825373, "grad_norm": 1.7364581823349, "learning_rate": 1.9883996473638922e-07, "loss": 0.3868, "step": 15410 }, { "epoch": 1.8753878916945543, "grad_norm": 2.5917022228240967, "learning_rate": 1.984530840799459e-07, "loss": 0.4013, "step": 15411 }, { "epoch": 1.8755095832065714, "grad_norm": 3.0971415042877197, "learning_rate": 1.9806657639265037e-07, "loss": 0.4174, "step": 15412 }, { "epoch": 1.8756312747185884, "grad_norm": 1.984106183052063, "learning_rate": 1.9768044168920974e-07, "loss": 0.3486, "step": 15413 }, { "epoch": 1.8757529662306054, "grad_norm": 3.0246214866638184, "learning_rate": 1.9729467998432007e-07, "loss": 0.3716, "step": 15414 }, { "epoch": 1.8758746577426224, "grad_norm": 1.9176599979400635, "learning_rate": 1.9690929129265735e-07, "loss": 0.3722, "step": 15415 }, { "epoch": 1.8759963492546397, "grad_norm": 1.8779288530349731, "learning_rate": 1.9652427562888653e-07, "loss": 0.3107, "step": 15416 }, { "epoch": 1.8761180407666567, "grad_norm": 1.471330165863037, "learning_rate": 1.9613963300766038e-07, "loss": 0.3379, "step": 15417 }, { "epoch": 1.8762397322786737, "grad_norm": 2.8038527965545654, "learning_rate": 1.957553634436127e-07, "loss": 0.3973, "step": 15418 }, { "epoch": 1.8763614237906907, "grad_norm": 1.803779125213623, "learning_rate": 1.9537146695136843e-07, "loss": 0.3537, "step": 15419 }, { "epoch": 1.8764831153027077, "grad_norm": 2.6164183616638184, "learning_rate": 1.9498794354553374e-07, "loss": 0.3273, "step": 15420 }, { "epoch": 1.8766048068147247, "grad_norm": 2.1499290466308594, "learning_rate": 1.9460479324070247e-07, "loss": 0.3379, "step": 15421 }, { "epoch": 1.8767264983267418, "grad_norm": 2.333621025085449, "learning_rate": 1.942220160514552e-07, "loss": 0.4227, "step": 15422 }, { "epoch": 1.8768481898387588, "grad_norm": 3.023346185684204, "learning_rate": 1.93839611992358e-07, "loss": 0.4454, "step": 15423 }, { "epoch": 1.8769698813507758, "grad_norm": 1.423231840133667, "learning_rate": 1.9345758107795932e-07, "loss": 0.3203, "step": 15424 }, { "epoch": 1.8770915728627928, "grad_norm": 1.7558668851852417, "learning_rate": 1.9307592332279966e-07, "loss": 0.3339, "step": 15425 }, { "epoch": 1.8772132643748098, "grad_norm": 1.9423383474349976, "learning_rate": 1.926946387413997e-07, "loss": 0.3719, "step": 15426 }, { "epoch": 1.8773349558868269, "grad_norm": 1.695603847503662, "learning_rate": 1.9231372734827004e-07, "loss": 0.3467, "step": 15427 }, { "epoch": 1.8774566473988439, "grad_norm": 1.5282888412475586, "learning_rate": 1.9193318915790237e-07, "loss": 0.401, "step": 15428 }, { "epoch": 1.877578338910861, "grad_norm": 2.061734437942505, "learning_rate": 1.915530241847785e-07, "loss": 0.3678, "step": 15429 }, { "epoch": 1.877700030422878, "grad_norm": 1.6862027645111084, "learning_rate": 1.911732324433646e-07, "loss": 0.4015, "step": 15430 }, { "epoch": 1.877821721934895, "grad_norm": 2.7050788402557373, "learning_rate": 1.907938139481136e-07, "loss": 0.3686, "step": 15431 }, { "epoch": 1.877943413446912, "grad_norm": 2.4505159854888916, "learning_rate": 1.9041476871345943e-07, "loss": 0.4082, "step": 15432 }, { "epoch": 1.878065104958929, "grad_norm": 2.776437997817993, "learning_rate": 1.900360967538306e-07, "loss": 0.4408, "step": 15433 }, { "epoch": 1.878186796470946, "grad_norm": 1.6510276794433594, "learning_rate": 1.8965779808363227e-07, "loss": 0.3673, "step": 15434 }, { "epoch": 1.8783084879829632, "grad_norm": 1.5085569620132446, "learning_rate": 1.8927987271726067e-07, "loss": 0.3514, "step": 15435 }, { "epoch": 1.8784301794949803, "grad_norm": 1.781345009803772, "learning_rate": 1.8890232066909764e-07, "loss": 0.3616, "step": 15436 }, { "epoch": 1.8785518710069973, "grad_norm": 1.3456544876098633, "learning_rate": 1.8852514195350947e-07, "loss": 0.3266, "step": 15437 }, { "epoch": 1.8786735625190143, "grad_norm": 1.8861418962478638, "learning_rate": 1.881483365848491e-07, "loss": 0.404, "step": 15438 }, { "epoch": 1.8787952540310313, "grad_norm": 1.5723501443862915, "learning_rate": 1.8777190457745287e-07, "loss": 0.3765, "step": 15439 }, { "epoch": 1.8789169455430483, "grad_norm": 1.626685619354248, "learning_rate": 1.8739584594564485e-07, "loss": 0.3452, "step": 15440 }, { "epoch": 1.8790386370550656, "grad_norm": 3.0832009315490723, "learning_rate": 1.8702016070373807e-07, "loss": 0.4342, "step": 15441 }, { "epoch": 1.8791603285670826, "grad_norm": 2.1513757705688477, "learning_rate": 1.866448488660244e-07, "loss": 0.402, "step": 15442 }, { "epoch": 1.8792820200790996, "grad_norm": 1.746595859527588, "learning_rate": 1.8626991044678688e-07, "loss": 0.3934, "step": 15443 }, { "epoch": 1.8794037115911166, "grad_norm": 1.7503626346588135, "learning_rate": 1.8589534546029297e-07, "loss": 0.3429, "step": 15444 }, { "epoch": 1.8795254031031337, "grad_norm": 2.16701340675354, "learning_rate": 1.8552115392079462e-07, "loss": 0.3243, "step": 15445 }, { "epoch": 1.8796470946151507, "grad_norm": 3.6273088455200195, "learning_rate": 1.8514733584253153e-07, "loss": 0.3482, "step": 15446 }, { "epoch": 1.8797687861271677, "grad_norm": 1.6590532064437866, "learning_rate": 1.84773891239729e-07, "loss": 0.362, "step": 15447 }, { "epoch": 1.8798904776391847, "grad_norm": 1.4681428670883179, "learning_rate": 1.8440082012659455e-07, "loss": 0.3314, "step": 15448 }, { "epoch": 1.8800121691512017, "grad_norm": 2.2805116176605225, "learning_rate": 1.8402812251732683e-07, "loss": 0.3541, "step": 15449 }, { "epoch": 1.8801338606632187, "grad_norm": 2.2875566482543945, "learning_rate": 1.836557984261078e-07, "loss": 0.4365, "step": 15450 }, { "epoch": 1.8802555521752358, "grad_norm": 1.4187343120574951, "learning_rate": 1.8328384786710396e-07, "loss": 0.3401, "step": 15451 }, { "epoch": 1.8803772436872528, "grad_norm": 1.6635375022888184, "learning_rate": 1.829122708544695e-07, "loss": 0.3574, "step": 15452 }, { "epoch": 1.8804989351992698, "grad_norm": 1.9890598058700562, "learning_rate": 1.82541067402342e-07, "loss": 0.3674, "step": 15453 }, { "epoch": 1.8806206267112868, "grad_norm": 1.6499123573303223, "learning_rate": 1.8217023752484907e-07, "loss": 0.3469, "step": 15454 }, { "epoch": 1.8807423182233038, "grad_norm": 3.207336664199829, "learning_rate": 1.8179978123609942e-07, "loss": 0.4323, "step": 15455 }, { "epoch": 1.8808640097353209, "grad_norm": 1.9012500047683716, "learning_rate": 1.8142969855019067e-07, "loss": 0.4465, "step": 15456 }, { "epoch": 1.8809857012473379, "grad_norm": 2.123323917388916, "learning_rate": 1.8105998948120595e-07, "loss": 0.3544, "step": 15457 }, { "epoch": 1.881107392759355, "grad_norm": 1.8889573812484741, "learning_rate": 1.8069065404321295e-07, "loss": 0.3495, "step": 15458 }, { "epoch": 1.881229084271372, "grad_norm": 2.0151238441467285, "learning_rate": 1.8032169225026487e-07, "loss": 0.4075, "step": 15459 }, { "epoch": 1.8813507757833892, "grad_norm": 1.5618926286697388, "learning_rate": 1.7995310411640266e-07, "loss": 0.3821, "step": 15460 }, { "epoch": 1.8814724672954062, "grad_norm": 2.0830960273742676, "learning_rate": 1.795848896556507e-07, "loss": 0.3659, "step": 15461 }, { "epoch": 1.8815941588074232, "grad_norm": 2.1608448028564453, "learning_rate": 1.7921704888202107e-07, "loss": 0.3866, "step": 15462 }, { "epoch": 1.8817158503194402, "grad_norm": 2.254701852798462, "learning_rate": 1.788495818095104e-07, "loss": 0.3561, "step": 15463 }, { "epoch": 1.8818375418314572, "grad_norm": 2.0414462089538574, "learning_rate": 1.7848248845210192e-07, "loss": 0.3438, "step": 15464 }, { "epoch": 1.8819592333434743, "grad_norm": 1.682150959968567, "learning_rate": 1.7811576882376446e-07, "loss": 0.3432, "step": 15465 }, { "epoch": 1.8820809248554913, "grad_norm": 1.691027045249939, "learning_rate": 1.7774942293845132e-07, "loss": 0.3851, "step": 15466 }, { "epoch": 1.8822026163675085, "grad_norm": 1.6096749305725098, "learning_rate": 1.7738345081010354e-07, "loss": 0.4077, "step": 15467 }, { "epoch": 1.8823243078795255, "grad_norm": 2.0246798992156982, "learning_rate": 1.7701785245264779e-07, "loss": 0.3679, "step": 15468 }, { "epoch": 1.8824459993915426, "grad_norm": 2.1744384765625, "learning_rate": 1.7665262787999625e-07, "loss": 0.4022, "step": 15469 }, { "epoch": 1.8825676909035596, "grad_norm": 2.3079404830932617, "learning_rate": 1.762877771060434e-07, "loss": 0.418, "step": 15470 }, { "epoch": 1.8826893824155766, "grad_norm": 1.723802089691162, "learning_rate": 1.7592330014467586e-07, "loss": 0.3776, "step": 15471 }, { "epoch": 1.8828110739275936, "grad_norm": 1.6644879579544067, "learning_rate": 1.7555919700976144e-07, "loss": 0.3448, "step": 15472 }, { "epoch": 1.8829327654396106, "grad_norm": 1.8033770322799683, "learning_rate": 1.751954677151546e-07, "loss": 0.3741, "step": 15473 }, { "epoch": 1.8830544569516277, "grad_norm": 2.4408578872680664, "learning_rate": 1.7483211227469654e-07, "loss": 0.4361, "step": 15474 }, { "epoch": 1.8831761484636447, "grad_norm": 4.081774711608887, "learning_rate": 1.7446913070221283e-07, "loss": 0.2735, "step": 15475 }, { "epoch": 1.8832978399756617, "grad_norm": 1.7569750547409058, "learning_rate": 1.7410652301151798e-07, "loss": 0.4259, "step": 15476 }, { "epoch": 1.8834195314876787, "grad_norm": 1.794790267944336, "learning_rate": 1.7374428921640761e-07, "loss": 0.3754, "step": 15477 }, { "epoch": 1.8835412229996957, "grad_norm": 1.802484154701233, "learning_rate": 1.733824293306663e-07, "loss": 0.3809, "step": 15478 }, { "epoch": 1.8836629145117127, "grad_norm": 1.5311236381530762, "learning_rate": 1.7302094336806298e-07, "loss": 0.325, "step": 15479 }, { "epoch": 1.8837846060237298, "grad_norm": 1.5800395011901855, "learning_rate": 1.7265983134235442e-07, "loss": 0.3426, "step": 15480 }, { "epoch": 1.8839062975357468, "grad_norm": 1.874714732170105, "learning_rate": 1.7229909326727968e-07, "loss": 0.3422, "step": 15481 }, { "epoch": 1.8840279890477638, "grad_norm": 2.0682287216186523, "learning_rate": 1.7193872915656773e-07, "loss": 0.3549, "step": 15482 }, { "epoch": 1.8841496805597808, "grad_norm": 1.8225303888320923, "learning_rate": 1.7157873902392986e-07, "loss": 0.3752, "step": 15483 }, { "epoch": 1.8842713720717978, "grad_norm": 2.681377649307251, "learning_rate": 1.7121912288306508e-07, "loss": 0.2461, "step": 15484 }, { "epoch": 1.8843930635838149, "grad_norm": 1.4650429487228394, "learning_rate": 1.7085988074765692e-07, "loss": 0.3798, "step": 15485 }, { "epoch": 1.884514755095832, "grad_norm": 1.6445276737213135, "learning_rate": 1.7050101263137443e-07, "loss": 0.3276, "step": 15486 }, { "epoch": 1.8846364466078491, "grad_norm": 2.489896059036255, "learning_rate": 1.7014251854787556e-07, "loss": 0.3001, "step": 15487 }, { "epoch": 1.8847581381198661, "grad_norm": 2.9276480674743652, "learning_rate": 1.6978439851080054e-07, "loss": 0.3048, "step": 15488 }, { "epoch": 1.8848798296318832, "grad_norm": 1.4694931507110596, "learning_rate": 1.6942665253377622e-07, "loss": 0.3504, "step": 15489 }, { "epoch": 1.8850015211439002, "grad_norm": 2.2043704986572266, "learning_rate": 1.6906928063041504e-07, "loss": 0.3718, "step": 15490 }, { "epoch": 1.8851232126559172, "grad_norm": 2.199695587158203, "learning_rate": 1.687122828143184e-07, "loss": 0.3321, "step": 15491 }, { "epoch": 1.8852449041679344, "grad_norm": 1.8922832012176514, "learning_rate": 1.6835565909906758e-07, "loss": 0.3783, "step": 15492 }, { "epoch": 1.8853665956799515, "grad_norm": 1.9197243452072144, "learning_rate": 1.6799940949823513e-07, "loss": 0.3661, "step": 15493 }, { "epoch": 1.8854882871919685, "grad_norm": 1.7733432054519653, "learning_rate": 1.676435340253757e-07, "loss": 0.3622, "step": 15494 }, { "epoch": 1.8856099787039855, "grad_norm": 1.6435930728912354, "learning_rate": 1.6728803269403182e-07, "loss": 0.4033, "step": 15495 }, { "epoch": 1.8857316702160025, "grad_norm": 2.256098747253418, "learning_rate": 1.6693290551773155e-07, "loss": 0.3568, "step": 15496 }, { "epoch": 1.8858533617280195, "grad_norm": 1.4301310777664185, "learning_rate": 1.6657815250998744e-07, "loss": 0.3644, "step": 15497 }, { "epoch": 1.8859750532400366, "grad_norm": 1.7706140279769897, "learning_rate": 1.6622377368429864e-07, "loss": 0.3567, "step": 15498 }, { "epoch": 1.8860967447520536, "grad_norm": 2.2572269439697266, "learning_rate": 1.6586976905414997e-07, "loss": 0.4194, "step": 15499 }, { "epoch": 1.8862184362640706, "grad_norm": 1.787251591682434, "learning_rate": 1.6551613863301063e-07, "loss": 0.3687, "step": 15500 }, { "epoch": 1.8863401277760876, "grad_norm": 1.6112436056137085, "learning_rate": 1.6516288243433986e-07, "loss": 0.3715, "step": 15501 }, { "epoch": 1.8864618192881046, "grad_norm": 2.2957050800323486, "learning_rate": 1.6481000047157803e-07, "loss": 0.3914, "step": 15502 }, { "epoch": 1.8865835108001217, "grad_norm": 1.4749581813812256, "learning_rate": 1.6445749275815326e-07, "loss": 0.3638, "step": 15503 }, { "epoch": 1.8867052023121387, "grad_norm": 2.2407288551330566, "learning_rate": 1.641053593074804e-07, "loss": 0.2987, "step": 15504 }, { "epoch": 1.8868268938241557, "grad_norm": 2.0497801303863525, "learning_rate": 1.637536001329565e-07, "loss": 0.3919, "step": 15505 }, { "epoch": 1.8869485853361727, "grad_norm": 2.0333075523376465, "learning_rate": 1.634022152479686e-07, "loss": 0.4193, "step": 15506 }, { "epoch": 1.8870702768481897, "grad_norm": 1.5225858688354492, "learning_rate": 1.630512046658883e-07, "loss": 0.3381, "step": 15507 }, { "epoch": 1.8871919683602068, "grad_norm": 1.4889533519744873, "learning_rate": 1.6270056840006932e-07, "loss": 0.3417, "step": 15508 }, { "epoch": 1.8873136598722238, "grad_norm": 2.391403913497925, "learning_rate": 1.6235030646385652e-07, "loss": 0.4248, "step": 15509 }, { "epoch": 1.8874353513842408, "grad_norm": 1.5303593873977661, "learning_rate": 1.6200041887057705e-07, "loss": 0.3308, "step": 15510 }, { "epoch": 1.887557042896258, "grad_norm": 1.7096195220947266, "learning_rate": 1.616509056335458e-07, "loss": 0.3759, "step": 15511 }, { "epoch": 1.887678734408275, "grad_norm": 3.1133453845977783, "learning_rate": 1.6130176676606101e-07, "loss": 0.4211, "step": 15512 }, { "epoch": 1.887800425920292, "grad_norm": 1.7224997282028198, "learning_rate": 1.6095300228140876e-07, "loss": 0.357, "step": 15513 }, { "epoch": 1.887922117432309, "grad_norm": 1.5754659175872803, "learning_rate": 1.6060461219286172e-07, "loss": 0.3594, "step": 15514 }, { "epoch": 1.888043808944326, "grad_norm": 2.473769426345825, "learning_rate": 1.6025659651367487e-07, "loss": 0.3307, "step": 15515 }, { "epoch": 1.8881655004563431, "grad_norm": 1.7727367877960205, "learning_rate": 1.5990895525709094e-07, "loss": 0.3692, "step": 15516 }, { "epoch": 1.8882871919683604, "grad_norm": 2.061589002609253, "learning_rate": 1.5956168843634046e-07, "loss": 0.4008, "step": 15517 }, { "epoch": 1.8884088834803774, "grad_norm": 1.6778526306152344, "learning_rate": 1.592147960646362e-07, "loss": 0.4159, "step": 15518 }, { "epoch": 1.8885305749923944, "grad_norm": 3.4384090900421143, "learning_rate": 1.5886827815517757e-07, "loss": 0.4471, "step": 15519 }, { "epoch": 1.8886522665044114, "grad_norm": 1.743391513824463, "learning_rate": 1.5852213472115187e-07, "loss": 0.368, "step": 15520 }, { "epoch": 1.8887739580164284, "grad_norm": 2.0557942390441895, "learning_rate": 1.5817636577572847e-07, "loss": 0.3795, "step": 15521 }, { "epoch": 1.8888956495284455, "grad_norm": 1.49049973487854, "learning_rate": 1.5783097133206694e-07, "loss": 0.3636, "step": 15522 }, { "epoch": 1.8890173410404625, "grad_norm": 1.6343085765838623, "learning_rate": 1.574859514033089e-07, "loss": 0.386, "step": 15523 }, { "epoch": 1.8891390325524795, "grad_norm": 1.5958449840545654, "learning_rate": 1.571413060025828e-07, "loss": 0.3486, "step": 15524 }, { "epoch": 1.8892607240644965, "grad_norm": 2.0006320476531982, "learning_rate": 1.5679703514300372e-07, "loss": 0.3787, "step": 15525 }, { "epoch": 1.8893824155765135, "grad_norm": 1.7781809568405151, "learning_rate": 1.564531388376722e-07, "loss": 0.3478, "step": 15526 }, { "epoch": 1.8895041070885306, "grad_norm": 1.8301674127578735, "learning_rate": 1.5610961709967233e-07, "loss": 0.3798, "step": 15527 }, { "epoch": 1.8896257986005476, "grad_norm": 1.5751827955245972, "learning_rate": 1.5576646994207912e-07, "loss": 0.3759, "step": 15528 }, { "epoch": 1.8897474901125646, "grad_norm": 2.0050911903381348, "learning_rate": 1.554236973779477e-07, "loss": 0.3099, "step": 15529 }, { "epoch": 1.8898691816245816, "grad_norm": 2.4886465072631836, "learning_rate": 1.5508129942032103e-07, "loss": 0.304, "step": 15530 }, { "epoch": 1.8899908731365986, "grad_norm": 2.0082767009735107, "learning_rate": 1.547392760822297e-07, "loss": 0.3541, "step": 15531 }, { "epoch": 1.8901125646486157, "grad_norm": 1.6126676797866821, "learning_rate": 1.5439762737668672e-07, "loss": 0.3408, "step": 15532 }, { "epoch": 1.8902342561606327, "grad_norm": 1.9257162809371948, "learning_rate": 1.540563533166939e-07, "loss": 0.3124, "step": 15533 }, { "epoch": 1.8903559476726497, "grad_norm": 2.029927968978882, "learning_rate": 1.5371545391523636e-07, "loss": 0.3296, "step": 15534 }, { "epoch": 1.8904776391846667, "grad_norm": 2.923900604248047, "learning_rate": 1.5337492918528596e-07, "loss": 0.3741, "step": 15535 }, { "epoch": 1.890599330696684, "grad_norm": 1.9904184341430664, "learning_rate": 1.5303477913980235e-07, "loss": 0.3912, "step": 15536 }, { "epoch": 1.890721022208701, "grad_norm": 1.8978614807128906, "learning_rate": 1.526950037917263e-07, "loss": 0.3184, "step": 15537 }, { "epoch": 1.890842713720718, "grad_norm": 1.5287790298461914, "learning_rate": 1.5235560315398856e-07, "loss": 0.3667, "step": 15538 }, { "epoch": 1.890964405232735, "grad_norm": 1.7170741558074951, "learning_rate": 1.5201657723950326e-07, "loss": 0.4334, "step": 15539 }, { "epoch": 1.891086096744752, "grad_norm": 2.079960823059082, "learning_rate": 1.5167792606117226e-07, "loss": 0.3985, "step": 15540 }, { "epoch": 1.891207788256769, "grad_norm": 1.5038026571273804, "learning_rate": 1.5133964963187975e-07, "loss": 0.362, "step": 15541 }, { "epoch": 1.8913294797687863, "grad_norm": 1.8691158294677734, "learning_rate": 1.5100174796450096e-07, "loss": 0.3201, "step": 15542 }, { "epoch": 1.8914511712808033, "grad_norm": 2.1943020820617676, "learning_rate": 1.5066422107189117e-07, "loss": 0.3209, "step": 15543 }, { "epoch": 1.8915728627928203, "grad_norm": 2.417699098587036, "learning_rate": 1.5032706896689452e-07, "loss": 0.2917, "step": 15544 }, { "epoch": 1.8916945543048374, "grad_norm": 1.629287838935852, "learning_rate": 1.499902916623408e-07, "loss": 0.3766, "step": 15545 }, { "epoch": 1.8918162458168544, "grad_norm": 3.198397159576416, "learning_rate": 1.496538891710442e-07, "loss": 0.4089, "step": 15546 }, { "epoch": 1.8919379373288714, "grad_norm": 1.9084103107452393, "learning_rate": 1.4931786150580775e-07, "loss": 0.3456, "step": 15547 }, { "epoch": 1.8920596288408884, "grad_norm": 1.6851654052734375, "learning_rate": 1.489822086794157e-07, "loss": 0.3023, "step": 15548 }, { "epoch": 1.8921813203529054, "grad_norm": 2.7930757999420166, "learning_rate": 1.486469307046401e-07, "loss": 0.383, "step": 15549 }, { "epoch": 1.8923030118649224, "grad_norm": 1.7203458547592163, "learning_rate": 1.4831202759424068e-07, "loss": 0.3327, "step": 15550 }, { "epoch": 1.8924247033769395, "grad_norm": 2.0448713302612305, "learning_rate": 1.4797749936096173e-07, "loss": 0.4002, "step": 15551 }, { "epoch": 1.8925463948889565, "grad_norm": 2.388394594192505, "learning_rate": 1.4764334601752972e-07, "loss": 0.391, "step": 15552 }, { "epoch": 1.8926680864009735, "grad_norm": 4.091358661651611, "learning_rate": 1.4730956757666337e-07, "loss": 0.4297, "step": 15553 }, { "epoch": 1.8927897779129905, "grad_norm": 1.76665461063385, "learning_rate": 1.469761640510603e-07, "loss": 0.2893, "step": 15554 }, { "epoch": 1.8929114694250075, "grad_norm": 1.7423388957977295, "learning_rate": 1.4664313545340926e-07, "loss": 0.3966, "step": 15555 }, { "epoch": 1.8930331609370246, "grad_norm": 1.6806039810180664, "learning_rate": 1.463104817963823e-07, "loss": 0.3056, "step": 15556 }, { "epoch": 1.8931548524490416, "grad_norm": 1.9396659135818481, "learning_rate": 1.4597820309263712e-07, "loss": 0.424, "step": 15557 }, { "epoch": 1.8932765439610586, "grad_norm": 3.064082145690918, "learning_rate": 1.4564629935481912e-07, "loss": 0.3772, "step": 15558 }, { "epoch": 1.8933982354730756, "grad_norm": 1.7717182636260986, "learning_rate": 1.45314770595556e-07, "loss": 0.3954, "step": 15559 }, { "epoch": 1.8935199269850926, "grad_norm": 1.7230448722839355, "learning_rate": 1.449836168274632e-07, "loss": 0.3335, "step": 15560 }, { "epoch": 1.8936416184971099, "grad_norm": 1.8982417583465576, "learning_rate": 1.4465283806314402e-07, "loss": 0.3958, "step": 15561 }, { "epoch": 1.893763310009127, "grad_norm": 1.5071955919265747, "learning_rate": 1.4432243431518279e-07, "loss": 0.3356, "step": 15562 }, { "epoch": 1.893885001521144, "grad_norm": 1.5439248085021973, "learning_rate": 1.439924055961539e-07, "loss": 0.3759, "step": 15563 }, { "epoch": 1.894006693033161, "grad_norm": 4.547462463378906, "learning_rate": 1.43662751918614e-07, "loss": 0.4813, "step": 15564 }, { "epoch": 1.894128384545178, "grad_norm": 1.606374979019165, "learning_rate": 1.433334732951086e-07, "loss": 0.3566, "step": 15565 }, { "epoch": 1.894250076057195, "grad_norm": 2.2416622638702393, "learning_rate": 1.4300456973816656e-07, "loss": 0.4402, "step": 15566 }, { "epoch": 1.894371767569212, "grad_norm": 1.9301303625106812, "learning_rate": 1.4267604126030454e-07, "loss": 0.3631, "step": 15567 }, { "epoch": 1.8944934590812292, "grad_norm": 3.0308613777160645, "learning_rate": 1.4234788787402033e-07, "loss": 0.3651, "step": 15568 }, { "epoch": 1.8946151505932463, "grad_norm": 2.5868449211120605, "learning_rate": 1.42020109591805e-07, "loss": 0.3535, "step": 15569 }, { "epoch": 1.8947368421052633, "grad_norm": 1.9669126272201538, "learning_rate": 1.4169270642612755e-07, "loss": 0.3292, "step": 15570 }, { "epoch": 1.8948585336172803, "grad_norm": 2.1283349990844727, "learning_rate": 1.4136567838945015e-07, "loss": 0.3361, "step": 15571 }, { "epoch": 1.8949802251292973, "grad_norm": 1.5383472442626953, "learning_rate": 1.41039025494214e-07, "loss": 0.3531, "step": 15572 }, { "epoch": 1.8951019166413143, "grad_norm": 2.2978322505950928, "learning_rate": 1.4071274775285027e-07, "loss": 0.3438, "step": 15573 }, { "epoch": 1.8952236081533314, "grad_norm": 1.8739210367202759, "learning_rate": 1.4038684517777347e-07, "loss": 0.382, "step": 15574 }, { "epoch": 1.8953452996653484, "grad_norm": 1.5733166933059692, "learning_rate": 1.4006131778138698e-07, "loss": 0.3613, "step": 15575 }, { "epoch": 1.8954669911773654, "grad_norm": 1.9286514520645142, "learning_rate": 1.3973616557607538e-07, "loss": 0.4076, "step": 15576 }, { "epoch": 1.8955886826893824, "grad_norm": 4.407425880432129, "learning_rate": 1.3941138857421211e-07, "loss": 0.3411, "step": 15577 }, { "epoch": 1.8957103742013994, "grad_norm": 1.8577276468276978, "learning_rate": 1.39086986788155e-07, "loss": 0.4108, "step": 15578 }, { "epoch": 1.8958320657134164, "grad_norm": 2.144033193588257, "learning_rate": 1.387629602302498e-07, "loss": 0.4027, "step": 15579 }, { "epoch": 1.8959537572254335, "grad_norm": 1.8283655643463135, "learning_rate": 1.3843930891282665e-07, "loss": 0.364, "step": 15580 }, { "epoch": 1.8960754487374505, "grad_norm": 1.5492373704910278, "learning_rate": 1.3811603284819786e-07, "loss": 0.3841, "step": 15581 }, { "epoch": 1.8961971402494675, "grad_norm": 2.024186372756958, "learning_rate": 1.377931320486692e-07, "loss": 0.3897, "step": 15582 }, { "epoch": 1.8963188317614845, "grad_norm": 2.529646396636963, "learning_rate": 1.374706065265241e-07, "loss": 0.3494, "step": 15583 }, { "epoch": 1.8964405232735015, "grad_norm": 2.182511329650879, "learning_rate": 1.371484562940373e-07, "loss": 0.3583, "step": 15584 }, { "epoch": 1.8965622147855186, "grad_norm": 2.0404226779937744, "learning_rate": 1.3682668136346667e-07, "loss": 0.3291, "step": 15585 }, { "epoch": 1.8966839062975356, "grad_norm": 1.476284146308899, "learning_rate": 1.365052817470569e-07, "loss": 0.3515, "step": 15586 }, { "epoch": 1.8968055978095528, "grad_norm": 1.8726391792297363, "learning_rate": 1.36184257457036e-07, "loss": 0.3278, "step": 15587 }, { "epoch": 1.8969272893215698, "grad_norm": 3.92242693901062, "learning_rate": 1.3586360850562307e-07, "loss": 0.4368, "step": 15588 }, { "epoch": 1.8970489808335869, "grad_norm": 1.8666071891784668, "learning_rate": 1.3554333490501614e-07, "loss": 0.3376, "step": 15589 }, { "epoch": 1.8971706723456039, "grad_norm": 1.6082663536071777, "learning_rate": 1.3522343666740435e-07, "loss": 0.3344, "step": 15590 }, { "epoch": 1.897292363857621, "grad_norm": 1.8468172550201416, "learning_rate": 1.3490391380495905e-07, "loss": 0.3396, "step": 15591 }, { "epoch": 1.897414055369638, "grad_norm": 1.9696063995361328, "learning_rate": 1.345847663298394e-07, "loss": 0.3169, "step": 15592 }, { "epoch": 1.8975357468816552, "grad_norm": 1.651789665222168, "learning_rate": 1.342659942541902e-07, "loss": 0.3724, "step": 15593 }, { "epoch": 1.8976574383936722, "grad_norm": 1.7387598752975464, "learning_rate": 1.3394759759014164e-07, "loss": 0.2967, "step": 15594 }, { "epoch": 1.8977791299056892, "grad_norm": 1.9992504119873047, "learning_rate": 1.3362957634980744e-07, "loss": 0.3944, "step": 15595 }, { "epoch": 1.8979008214177062, "grad_norm": 1.7703934907913208, "learning_rate": 1.3331193054529012e-07, "loss": 0.3752, "step": 15596 }, { "epoch": 1.8980225129297232, "grad_norm": 1.9858839511871338, "learning_rate": 1.329946601886778e-07, "loss": 0.3696, "step": 15597 }, { "epoch": 1.8981442044417403, "grad_norm": 1.469612717628479, "learning_rate": 1.326777652920408e-07, "loss": 0.3892, "step": 15598 }, { "epoch": 1.8982658959537573, "grad_norm": 1.9799754619598389, "learning_rate": 1.3236124586744058e-07, "loss": 0.3085, "step": 15599 }, { "epoch": 1.8983875874657743, "grad_norm": 3.9746949672698975, "learning_rate": 1.3204510192691867e-07, "loss": 0.4683, "step": 15600 }, { "epoch": 1.8985092789777913, "grad_norm": 1.6107414960861206, "learning_rate": 1.3172933348250649e-07, "loss": 0.3116, "step": 15601 }, { "epoch": 1.8986309704898083, "grad_norm": 2.864382028579712, "learning_rate": 1.3141394054621892e-07, "loss": 0.4334, "step": 15602 }, { "epoch": 1.8987526620018254, "grad_norm": 1.5157403945922852, "learning_rate": 1.3109892313005856e-07, "loss": 0.3401, "step": 15603 }, { "epoch": 1.8988743535138424, "grad_norm": 1.9197707176208496, "learning_rate": 1.307842812460114e-07, "loss": 0.3541, "step": 15604 }, { "epoch": 1.8989960450258594, "grad_norm": 2.0894527435302734, "learning_rate": 1.3047001490605005e-07, "loss": 0.366, "step": 15605 }, { "epoch": 1.8991177365378764, "grad_norm": 1.8883907794952393, "learning_rate": 1.3015612412213386e-07, "loss": 0.3127, "step": 15606 }, { "epoch": 1.8992394280498934, "grad_norm": 2.121696949005127, "learning_rate": 1.298426089062066e-07, "loss": 0.3748, "step": 15607 }, { "epoch": 1.8993611195619104, "grad_norm": 1.4395122528076172, "learning_rate": 1.2952946927019872e-07, "loss": 0.3828, "step": 15608 }, { "epoch": 1.8994828110739275, "grad_norm": 1.9695862531661987, "learning_rate": 1.2921670522602402e-07, "loss": 0.4084, "step": 15609 }, { "epoch": 1.8996045025859445, "grad_norm": 1.5089620351791382, "learning_rate": 1.2890431678558635e-07, "loss": 0.3479, "step": 15610 }, { "epoch": 1.8997261940979615, "grad_norm": 1.7086600065231323, "learning_rate": 1.285923039607706e-07, "loss": 0.3913, "step": 15611 }, { "epoch": 1.8998478856099787, "grad_norm": 1.719649076461792, "learning_rate": 1.2828066676345064e-07, "loss": 0.3901, "step": 15612 }, { "epoch": 1.8999695771219958, "grad_norm": 2.038633108139038, "learning_rate": 1.2796940520548473e-07, "loss": 0.4471, "step": 15613 }, { "epoch": 1.9000912686340128, "grad_norm": 2.121171712875366, "learning_rate": 1.2765851929871675e-07, "loss": 0.38, "step": 15614 }, { "epoch": 1.9002129601460298, "grad_norm": 2.4557530879974365, "learning_rate": 1.2734800905497725e-07, "loss": 0.4629, "step": 15615 }, { "epoch": 1.9003346516580468, "grad_norm": 1.3311986923217773, "learning_rate": 1.2703787448608118e-07, "loss": 0.3179, "step": 15616 }, { "epoch": 1.9004563431700638, "grad_norm": 3.0475714206695557, "learning_rate": 1.2672811560382913e-07, "loss": 0.4957, "step": 15617 }, { "epoch": 1.900578034682081, "grad_norm": 1.663323163986206, "learning_rate": 1.2641873242000946e-07, "loss": 0.3727, "step": 15618 }, { "epoch": 1.900699726194098, "grad_norm": 2.1675822734832764, "learning_rate": 1.2610972494639384e-07, "loss": 0.3994, "step": 15619 }, { "epoch": 1.9008214177061151, "grad_norm": 1.463021159172058, "learning_rate": 1.2580109319474065e-07, "loss": 0.3273, "step": 15620 }, { "epoch": 1.9009431092181321, "grad_norm": 2.210233211517334, "learning_rate": 1.2549283717679607e-07, "loss": 0.3225, "step": 15621 }, { "epoch": 1.9010648007301492, "grad_norm": 1.7576329708099365, "learning_rate": 1.2518495690428623e-07, "loss": 0.3378, "step": 15622 }, { "epoch": 1.9011864922421662, "grad_norm": 1.4421683549880981, "learning_rate": 1.2487745238892956e-07, "loss": 0.3749, "step": 15623 }, { "epoch": 1.9013081837541832, "grad_norm": 1.74948251247406, "learning_rate": 1.245703236424267e-07, "loss": 0.3327, "step": 15624 }, { "epoch": 1.9014298752662002, "grad_norm": 2.2133705615997314, "learning_rate": 1.2426357067646278e-07, "loss": 0.2968, "step": 15625 }, { "epoch": 1.9015515667782172, "grad_norm": 1.6672260761260986, "learning_rate": 1.2395719350271174e-07, "loss": 0.3839, "step": 15626 }, { "epoch": 1.9016732582902343, "grad_norm": 1.4690799713134766, "learning_rate": 1.2365119213283317e-07, "loss": 0.335, "step": 15627 }, { "epoch": 1.9017949498022513, "grad_norm": 1.741631031036377, "learning_rate": 1.2334556657846774e-07, "loss": 0.3582, "step": 15628 }, { "epoch": 1.9019166413142683, "grad_norm": 2.2371408939361572, "learning_rate": 1.230403168512484e-07, "loss": 0.4062, "step": 15629 }, { "epoch": 1.9020383328262853, "grad_norm": 3.551867723464966, "learning_rate": 1.2273544296278806e-07, "loss": 0.4439, "step": 15630 }, { "epoch": 1.9021600243383023, "grad_norm": 1.673030972480774, "learning_rate": 1.2243094492468964e-07, "loss": 0.383, "step": 15631 }, { "epoch": 1.9022817158503194, "grad_norm": 2.83841609954834, "learning_rate": 1.221268227485395e-07, "loss": 0.4466, "step": 15632 }, { "epoch": 1.9024034073623364, "grad_norm": 1.5153568983078003, "learning_rate": 1.218230764459094e-07, "loss": 0.357, "step": 15633 }, { "epoch": 1.9025250988743534, "grad_norm": 1.9943279027938843, "learning_rate": 1.2151970602835904e-07, "loss": 0.3939, "step": 15634 }, { "epoch": 1.9026467903863704, "grad_norm": 1.7895900011062622, "learning_rate": 1.212167115074303e-07, "loss": 0.4009, "step": 15635 }, { "epoch": 1.9027684818983874, "grad_norm": 2.7744574546813965, "learning_rate": 1.2091409289465283e-07, "loss": 0.3392, "step": 15636 }, { "epoch": 1.9028901734104047, "grad_norm": 2.0328197479248047, "learning_rate": 1.2061185020154409e-07, "loss": 0.3749, "step": 15637 }, { "epoch": 1.9030118649224217, "grad_norm": 2.0816256999969482, "learning_rate": 1.2030998343960265e-07, "loss": 0.4097, "step": 15638 }, { "epoch": 1.9031335564344387, "grad_norm": 1.815958023071289, "learning_rate": 1.200084926203171e-07, "loss": 0.3476, "step": 15639 }, { "epoch": 1.9032552479464557, "grad_norm": 1.5143921375274658, "learning_rate": 1.197073777551583e-07, "loss": 0.3698, "step": 15640 }, { "epoch": 1.9033769394584727, "grad_norm": 2.451936721801758, "learning_rate": 1.1940663885558368e-07, "loss": 0.3102, "step": 15641 }, { "epoch": 1.9034986309704898, "grad_norm": 3.6819612979888916, "learning_rate": 1.1910627593303969e-07, "loss": 0.3438, "step": 15642 }, { "epoch": 1.903620322482507, "grad_norm": 1.8531838655471802, "learning_rate": 1.1880628899895496e-07, "loss": 0.4247, "step": 15643 }, { "epoch": 1.903742013994524, "grad_norm": 2.0949432849884033, "learning_rate": 1.1850667806474148e-07, "loss": 0.3146, "step": 15644 }, { "epoch": 1.903863705506541, "grad_norm": 1.786766529083252, "learning_rate": 1.1820744314180455e-07, "loss": 0.3664, "step": 15645 }, { "epoch": 1.903985397018558, "grad_norm": 2.2288992404937744, "learning_rate": 1.179085842415284e-07, "loss": 0.3379, "step": 15646 }, { "epoch": 1.904107088530575, "grad_norm": 1.7041383981704712, "learning_rate": 1.1761010137528506e-07, "loss": 0.3304, "step": 15647 }, { "epoch": 1.904228780042592, "grad_norm": 3.200002670288086, "learning_rate": 1.173119945544321e-07, "loss": 0.3029, "step": 15648 }, { "epoch": 1.9043504715546091, "grad_norm": 1.7898532152175903, "learning_rate": 1.1701426379031377e-07, "loss": 0.4038, "step": 15649 }, { "epoch": 1.9044721630666261, "grad_norm": 3.10357403755188, "learning_rate": 1.1671690909425992e-07, "loss": 0.2895, "step": 15650 }, { "epoch": 1.9045938545786432, "grad_norm": 1.7661055326461792, "learning_rate": 1.1641993047758593e-07, "loss": 0.3555, "step": 15651 }, { "epoch": 1.9047155460906602, "grad_norm": 1.898592233657837, "learning_rate": 1.1612332795158943e-07, "loss": 0.3322, "step": 15652 }, { "epoch": 1.9048372376026772, "grad_norm": 1.8657114505767822, "learning_rate": 1.1582710152756027e-07, "loss": 0.3779, "step": 15653 }, { "epoch": 1.9049589291146942, "grad_norm": 1.4222372770309448, "learning_rate": 1.1553125121676833e-07, "loss": 0.3662, "step": 15654 }, { "epoch": 1.9050806206267112, "grad_norm": 2.719543695449829, "learning_rate": 1.1523577703047239e-07, "loss": 0.3852, "step": 15655 }, { "epoch": 1.9052023121387283, "grad_norm": 1.905992865562439, "learning_rate": 1.1494067897991568e-07, "loss": 0.4082, "step": 15656 }, { "epoch": 1.9053240036507453, "grad_norm": 3.3642287254333496, "learning_rate": 1.1464595707632697e-07, "loss": 0.4085, "step": 15657 }, { "epoch": 1.9054456951627623, "grad_norm": 4.392092704772949, "learning_rate": 1.1435161133092065e-07, "loss": 0.2837, "step": 15658 }, { "epoch": 1.9055673866747793, "grad_norm": 2.469750165939331, "learning_rate": 1.1405764175489887e-07, "loss": 0.3988, "step": 15659 }, { "epoch": 1.9056890781867963, "grad_norm": 1.797170639038086, "learning_rate": 1.1376404835944488e-07, "loss": 0.3257, "step": 15660 }, { "epoch": 1.9058107696988134, "grad_norm": 1.7183449268341064, "learning_rate": 1.134708311557331e-07, "loss": 0.3756, "step": 15661 }, { "epoch": 1.9059324612108306, "grad_norm": 1.552887201309204, "learning_rate": 1.1317799015492014e-07, "loss": 0.4094, "step": 15662 }, { "epoch": 1.9060541527228476, "grad_norm": 3.746464967727661, "learning_rate": 1.1288552536814934e-07, "loss": 0.3351, "step": 15663 }, { "epoch": 1.9061758442348646, "grad_norm": 2.0496416091918945, "learning_rate": 1.1259343680654955e-07, "loss": 0.2993, "step": 15664 }, { "epoch": 1.9062975357468817, "grad_norm": 2.1597886085510254, "learning_rate": 1.1230172448123522e-07, "loss": 0.3792, "step": 15665 }, { "epoch": 1.9064192272588987, "grad_norm": 2.3849825859069824, "learning_rate": 1.1201038840330636e-07, "loss": 0.3366, "step": 15666 }, { "epoch": 1.9065409187709157, "grad_norm": 1.6846128702163696, "learning_rate": 1.1171942858384966e-07, "loss": 0.34, "step": 15667 }, { "epoch": 1.9066626102829327, "grad_norm": 1.7930631637573242, "learning_rate": 1.1142884503393625e-07, "loss": 0.3384, "step": 15668 }, { "epoch": 1.90678430179495, "grad_norm": 2.7511916160583496, "learning_rate": 1.1113863776462286e-07, "loss": 0.452, "step": 15669 }, { "epoch": 1.906905993306967, "grad_norm": 1.9824519157409668, "learning_rate": 1.1084880678695397e-07, "loss": 0.3485, "step": 15670 }, { "epoch": 1.907027684818984, "grad_norm": 1.8829624652862549, "learning_rate": 1.1055935211195745e-07, "loss": 0.3514, "step": 15671 }, { "epoch": 1.907149376331001, "grad_norm": 1.7604342699050903, "learning_rate": 1.1027027375064669e-07, "loss": 0.3201, "step": 15672 }, { "epoch": 1.907271067843018, "grad_norm": 1.853173851966858, "learning_rate": 1.0998157171402402e-07, "loss": 0.3277, "step": 15673 }, { "epoch": 1.907392759355035, "grad_norm": 1.6976581811904907, "learning_rate": 1.0969324601307174e-07, "loss": 0.3902, "step": 15674 }, { "epoch": 1.907514450867052, "grad_norm": 1.4061689376831055, "learning_rate": 1.0940529665876554e-07, "loss": 0.319, "step": 15675 }, { "epoch": 1.907636142379069, "grad_norm": 1.9750257730484009, "learning_rate": 1.0911772366205886e-07, "loss": 0.3862, "step": 15676 }, { "epoch": 1.907757833891086, "grad_norm": 2.3953728675842285, "learning_rate": 1.0883052703389518e-07, "loss": 0.3656, "step": 15677 }, { "epoch": 1.9078795254031031, "grad_norm": 1.772693395614624, "learning_rate": 1.0854370678520465e-07, "loss": 0.3592, "step": 15678 }, { "epoch": 1.9080012169151201, "grad_norm": 2.816084861755371, "learning_rate": 1.0825726292690075e-07, "loss": 0.3902, "step": 15679 }, { "epoch": 1.9081229084271372, "grad_norm": 1.6737862825393677, "learning_rate": 1.0797119546988255e-07, "loss": 0.325, "step": 15680 }, { "epoch": 1.9082445999391542, "grad_norm": 2.518547296524048, "learning_rate": 1.0768550442503466e-07, "loss": 0.3202, "step": 15681 }, { "epoch": 1.9083662914511712, "grad_norm": 1.8919771909713745, "learning_rate": 1.0740018980322953e-07, "loss": 0.3589, "step": 15682 }, { "epoch": 1.9084879829631882, "grad_norm": 1.4554303884506226, "learning_rate": 1.071152516153251e-07, "loss": 0.3503, "step": 15683 }, { "epoch": 1.9086096744752052, "grad_norm": 1.468931794166565, "learning_rate": 1.068306898721616e-07, "loss": 0.3455, "step": 15684 }, { "epoch": 1.9087313659872223, "grad_norm": 1.76096773147583, "learning_rate": 1.0654650458456705e-07, "loss": 0.3422, "step": 15685 }, { "epoch": 1.9088530574992393, "grad_norm": 1.7063286304473877, "learning_rate": 1.0626269576335723e-07, "loss": 0.3293, "step": 15686 }, { "epoch": 1.9089747490112563, "grad_norm": 1.6289657354354858, "learning_rate": 1.059792634193313e-07, "loss": 0.3133, "step": 15687 }, { "epoch": 1.9090964405232735, "grad_norm": 1.7014787197113037, "learning_rate": 1.0569620756327281e-07, "loss": 0.4098, "step": 15688 }, { "epoch": 1.9092181320352906, "grad_norm": 2.8411800861358643, "learning_rate": 1.054135282059543e-07, "loss": 0.403, "step": 15689 }, { "epoch": 1.9093398235473076, "grad_norm": 2.6975014209747314, "learning_rate": 1.0513122535813158e-07, "loss": 0.464, "step": 15690 }, { "epoch": 1.9094615150593246, "grad_norm": 1.574360728263855, "learning_rate": 1.048492990305483e-07, "loss": 0.3582, "step": 15691 }, { "epoch": 1.9095832065713416, "grad_norm": 1.4731158018112183, "learning_rate": 1.0456774923392921e-07, "loss": 0.3162, "step": 15692 }, { "epoch": 1.9097048980833586, "grad_norm": 1.5337339639663696, "learning_rate": 1.0428657597899016e-07, "loss": 0.3401, "step": 15693 }, { "epoch": 1.9098265895953759, "grad_norm": 1.8068760633468628, "learning_rate": 1.0400577927643041e-07, "loss": 0.3964, "step": 15694 }, { "epoch": 1.909948281107393, "grad_norm": 2.491594076156616, "learning_rate": 1.037253591369336e-07, "loss": 0.3843, "step": 15695 }, { "epoch": 1.91006997261941, "grad_norm": 1.5058473348617554, "learning_rate": 1.034453155711712e-07, "loss": 0.3571, "step": 15696 }, { "epoch": 1.910191664131427, "grad_norm": 2.8071610927581787, "learning_rate": 1.0316564858980027e-07, "loss": 0.3699, "step": 15697 }, { "epoch": 1.910313355643444, "grad_norm": 1.8608394861221313, "learning_rate": 1.0288635820346004e-07, "loss": 0.3273, "step": 15698 }, { "epoch": 1.910435047155461, "grad_norm": 2.825749635696411, "learning_rate": 1.0260744442278093e-07, "loss": 0.4064, "step": 15699 }, { "epoch": 1.910556738667478, "grad_norm": 2.0415709018707275, "learning_rate": 1.0232890725837441e-07, "loss": 0.3803, "step": 15700 }, { "epoch": 1.910678430179495, "grad_norm": 1.860910177230835, "learning_rate": 1.0205074672084092e-07, "loss": 0.4217, "step": 15701 }, { "epoch": 1.910800121691512, "grad_norm": 1.3647487163543701, "learning_rate": 1.017729628207631e-07, "loss": 0.3409, "step": 15702 }, { "epoch": 1.910921813203529, "grad_norm": 2.0112497806549072, "learning_rate": 1.014955555687136e-07, "loss": 0.4113, "step": 15703 }, { "epoch": 1.911043504715546, "grad_norm": 2.5934998989105225, "learning_rate": 1.012185249752462e-07, "loss": 0.4251, "step": 15704 }, { "epoch": 1.911165196227563, "grad_norm": 1.5455267429351807, "learning_rate": 1.0094187105090358e-07, "loss": 0.3572, "step": 15705 }, { "epoch": 1.91128688773958, "grad_norm": 2.318258285522461, "learning_rate": 1.0066559380621177e-07, "loss": 0.4247, "step": 15706 }, { "epoch": 1.9114085792515971, "grad_norm": 1.9367578029632568, "learning_rate": 1.0038969325168569e-07, "loss": 0.3123, "step": 15707 }, { "epoch": 1.9115302707636141, "grad_norm": 3.868952512741089, "learning_rate": 1.0011416939782248e-07, "loss": 0.4518, "step": 15708 }, { "epoch": 1.9116519622756312, "grad_norm": 1.7361475229263306, "learning_rate": 9.9839022255106e-08, "loss": 0.3311, "step": 15709 }, { "epoch": 1.9117736537876482, "grad_norm": 1.4297748804092407, "learning_rate": 9.956425183400786e-08, "loss": 0.3352, "step": 15710 }, { "epoch": 1.9118953452996652, "grad_norm": 1.944142460823059, "learning_rate": 9.928985814498193e-08, "loss": 0.3698, "step": 15711 }, { "epoch": 1.9120170368116822, "grad_norm": 1.6730833053588867, "learning_rate": 9.901584119847096e-08, "loss": 0.365, "step": 15712 }, { "epoch": 1.9121387283236995, "grad_norm": 2.269139289855957, "learning_rate": 9.874220100490106e-08, "loss": 0.3638, "step": 15713 }, { "epoch": 1.9122604198357165, "grad_norm": 1.6421120166778564, "learning_rate": 9.8468937574685e-08, "loss": 0.4002, "step": 15714 }, { "epoch": 1.9123821113477335, "grad_norm": 1.5852395296096802, "learning_rate": 9.819605091822004e-08, "loss": 0.3799, "step": 15715 }, { "epoch": 1.9125038028597505, "grad_norm": 2.432729721069336, "learning_rate": 9.79235410458923e-08, "loss": 0.3478, "step": 15716 }, { "epoch": 1.9126254943717675, "grad_norm": 1.454453945159912, "learning_rate": 9.765140796806904e-08, "loss": 0.3429, "step": 15717 }, { "epoch": 1.9127471858837846, "grad_norm": 2.1835670471191406, "learning_rate": 9.737965169510644e-08, "loss": 0.3391, "step": 15718 }, { "epoch": 1.9128688773958018, "grad_norm": 1.790948748588562, "learning_rate": 9.710827223734621e-08, "loss": 0.3522, "step": 15719 }, { "epoch": 1.9129905689078188, "grad_norm": 3.2709999084472656, "learning_rate": 9.683726960511231e-08, "loss": 0.3517, "step": 15720 }, { "epoch": 1.9131122604198358, "grad_norm": 1.9048463106155396, "learning_rate": 9.656664380872094e-08, "loss": 0.3838, "step": 15721 }, { "epoch": 1.9132339519318529, "grad_norm": 1.8403513431549072, "learning_rate": 9.62963948584661e-08, "loss": 0.3448, "step": 15722 }, { "epoch": 1.9133556434438699, "grad_norm": 1.816890835762024, "learning_rate": 9.602652276463398e-08, "loss": 0.4082, "step": 15723 }, { "epoch": 1.913477334955887, "grad_norm": 1.6230320930480957, "learning_rate": 9.575702753749194e-08, "loss": 0.3814, "step": 15724 }, { "epoch": 1.913599026467904, "grad_norm": 1.7542905807495117, "learning_rate": 9.548790918729622e-08, "loss": 0.3474, "step": 15725 }, { "epoch": 1.913720717979921, "grad_norm": 2.545095205307007, "learning_rate": 9.521916772428752e-08, "loss": 0.4076, "step": 15726 }, { "epoch": 1.913842409491938, "grad_norm": 1.2396000623703003, "learning_rate": 9.495080315868988e-08, "loss": 0.3157, "step": 15727 }, { "epoch": 1.913964101003955, "grad_norm": 1.5171116590499878, "learning_rate": 9.468281550071734e-08, "loss": 0.3628, "step": 15728 }, { "epoch": 1.914085792515972, "grad_norm": 2.3970634937286377, "learning_rate": 9.441520476056621e-08, "loss": 0.3573, "step": 15729 }, { "epoch": 1.914207484027989, "grad_norm": 2.6460959911346436, "learning_rate": 9.414797094842054e-08, "loss": 0.4904, "step": 15730 }, { "epoch": 1.914329175540006, "grad_norm": 1.9427192211151123, "learning_rate": 9.388111407444666e-08, "loss": 0.3427, "step": 15731 }, { "epoch": 1.914450867052023, "grad_norm": 1.7334887981414795, "learning_rate": 9.361463414880311e-08, "loss": 0.4313, "step": 15732 }, { "epoch": 1.91457255856404, "grad_norm": 2.8944931030273438, "learning_rate": 9.334853118162735e-08, "loss": 0.3892, "step": 15733 }, { "epoch": 1.914694250076057, "grad_norm": 2.0976483821868896, "learning_rate": 9.308280518304458e-08, "loss": 0.3385, "step": 15734 }, { "epoch": 1.914815941588074, "grad_norm": 1.70675528049469, "learning_rate": 9.281745616316784e-08, "loss": 0.4182, "step": 15735 }, { "epoch": 1.9149376331000911, "grad_norm": 1.8453956842422485, "learning_rate": 9.25524841320946e-08, "loss": 0.3672, "step": 15736 }, { "epoch": 1.9150593246121081, "grad_norm": 1.8507795333862305, "learning_rate": 9.22878890999046e-08, "loss": 0.3733, "step": 15737 }, { "epoch": 1.9151810161241254, "grad_norm": 2.1185927391052246, "learning_rate": 9.202367107667087e-08, "loss": 0.3521, "step": 15738 }, { "epoch": 1.9153027076361424, "grad_norm": 1.6565144062042236, "learning_rate": 9.175983007244316e-08, "loss": 0.3779, "step": 15739 }, { "epoch": 1.9154243991481594, "grad_norm": 2.4728691577911377, "learning_rate": 9.149636609726231e-08, "loss": 0.3876, "step": 15740 }, { "epoch": 1.9155460906601764, "grad_norm": 1.765580415725708, "learning_rate": 9.123327916115588e-08, "loss": 0.3668, "step": 15741 }, { "epoch": 1.9156677821721935, "grad_norm": 1.8593168258666992, "learning_rate": 9.097056927413139e-08, "loss": 0.3295, "step": 15742 }, { "epoch": 1.9157894736842105, "grad_norm": 1.524196982383728, "learning_rate": 9.070823644618865e-08, "loss": 0.3489, "step": 15743 }, { "epoch": 1.9159111651962277, "grad_norm": 2.2295501232147217, "learning_rate": 9.044628068730854e-08, "loss": 0.4168, "step": 15744 }, { "epoch": 1.9160328567082447, "grad_norm": 2.052743911743164, "learning_rate": 9.018470200745866e-08, "loss": 0.3469, "step": 15745 }, { "epoch": 1.9161545482202618, "grad_norm": 2.7995383739471436, "learning_rate": 8.992350041659325e-08, "loss": 0.2683, "step": 15746 }, { "epoch": 1.9162762397322788, "grad_norm": 1.984447717666626, "learning_rate": 8.966267592465105e-08, "loss": 0.4216, "step": 15747 }, { "epoch": 1.9163979312442958, "grad_norm": 1.9310307502746582, "learning_rate": 8.940222854155745e-08, "loss": 0.4126, "step": 15748 }, { "epoch": 1.9165196227563128, "grad_norm": 2.677833318710327, "learning_rate": 8.914215827722338e-08, "loss": 0.4041, "step": 15749 }, { "epoch": 1.9166413142683298, "grad_norm": 1.8265058994293213, "learning_rate": 8.888246514154431e-08, "loss": 0.3288, "step": 15750 }, { "epoch": 1.9167630057803469, "grad_norm": 1.5710548162460327, "learning_rate": 8.862314914440229e-08, "loss": 0.3424, "step": 15751 }, { "epoch": 1.9168846972923639, "grad_norm": 2.239837408065796, "learning_rate": 8.836421029566388e-08, "loss": 0.3345, "step": 15752 }, { "epoch": 1.917006388804381, "grad_norm": 1.3584866523742676, "learning_rate": 8.810564860518456e-08, "loss": 0.3143, "step": 15753 }, { "epoch": 1.917128080316398, "grad_norm": 2.688485860824585, "learning_rate": 8.784746408280087e-08, "loss": 0.4219, "step": 15754 }, { "epoch": 1.917249771828415, "grad_norm": 1.5568344593048096, "learning_rate": 8.758965673833942e-08, "loss": 0.3493, "step": 15755 }, { "epoch": 1.917371463340432, "grad_norm": 1.7253013849258423, "learning_rate": 8.73322265816079e-08, "loss": 0.3759, "step": 15756 }, { "epoch": 1.917493154852449, "grad_norm": 1.5808874368667603, "learning_rate": 8.707517362240292e-08, "loss": 0.3862, "step": 15757 }, { "epoch": 1.917614846364466, "grad_norm": 1.8901225328445435, "learning_rate": 8.681849787050667e-08, "loss": 0.3183, "step": 15758 }, { "epoch": 1.917736537876483, "grad_norm": 3.3234410285949707, "learning_rate": 8.656219933568688e-08, "loss": 0.3031, "step": 15759 }, { "epoch": 1.9178582293885, "grad_norm": 2.0144050121307373, "learning_rate": 8.630627802769465e-08, "loss": 0.327, "step": 15760 }, { "epoch": 1.917979920900517, "grad_norm": 1.5334062576293945, "learning_rate": 8.605073395626772e-08, "loss": 0.3797, "step": 15761 }, { "epoch": 1.918101612412534, "grad_norm": 1.4984906911849976, "learning_rate": 8.579556713113169e-08, "loss": 0.3441, "step": 15762 }, { "epoch": 1.9182233039245513, "grad_norm": 1.9602434635162354, "learning_rate": 8.554077756199652e-08, "loss": 0.3244, "step": 15763 }, { "epoch": 1.9183449954365683, "grad_norm": 2.1524736881256104, "learning_rate": 8.52863652585556e-08, "loss": 0.4062, "step": 15764 }, { "epoch": 1.9184666869485854, "grad_norm": 1.5244736671447754, "learning_rate": 8.503233023049006e-08, "loss": 0.3846, "step": 15765 }, { "epoch": 1.9185883784606024, "grad_norm": 1.5238925218582153, "learning_rate": 8.477867248746773e-08, "loss": 0.3626, "step": 15766 }, { "epoch": 1.9187100699726194, "grad_norm": 1.6026420593261719, "learning_rate": 8.452539203914089e-08, "loss": 0.3641, "step": 15767 }, { "epoch": 1.9188317614846364, "grad_norm": 2.1201412677764893, "learning_rate": 8.427248889514738e-08, "loss": 0.3758, "step": 15768 }, { "epoch": 1.9189534529966534, "grad_norm": 3.2570815086364746, "learning_rate": 8.40199630651084e-08, "loss": 0.312, "step": 15769 }, { "epoch": 1.9190751445086707, "grad_norm": 3.0580813884735107, "learning_rate": 8.376781455863625e-08, "loss": 0.4261, "step": 15770 }, { "epoch": 1.9191968360206877, "grad_norm": 2.4813880920410156, "learning_rate": 8.351604338532327e-08, "loss": 0.3485, "step": 15771 }, { "epoch": 1.9193185275327047, "grad_norm": 2.4027113914489746, "learning_rate": 8.326464955475177e-08, "loss": 0.4122, "step": 15772 }, { "epoch": 1.9194402190447217, "grad_norm": 1.8295562267303467, "learning_rate": 8.301363307648636e-08, "loss": 0.3475, "step": 15773 }, { "epoch": 1.9195619105567387, "grad_norm": 2.1208078861236572, "learning_rate": 8.276299396007825e-08, "loss": 0.41, "step": 15774 }, { "epoch": 1.9196836020687558, "grad_norm": 2.149620771408081, "learning_rate": 8.251273221506762e-08, "loss": 0.3311, "step": 15775 }, { "epoch": 1.9198052935807728, "grad_norm": 2.189979076385498, "learning_rate": 8.22628478509746e-08, "loss": 0.3161, "step": 15776 }, { "epoch": 1.9199269850927898, "grad_norm": 1.9679685831069946, "learning_rate": 8.201334087730828e-08, "loss": 0.321, "step": 15777 }, { "epoch": 1.9200486766048068, "grad_norm": 2.358872413635254, "learning_rate": 8.176421130356438e-08, "loss": 0.4111, "step": 15778 }, { "epoch": 1.9201703681168238, "grad_norm": 1.8389935493469238, "learning_rate": 8.151545913922198e-08, "loss": 0.4004, "step": 15779 }, { "epoch": 1.9202920596288409, "grad_norm": 2.633004903793335, "learning_rate": 8.126708439374464e-08, "loss": 0.4396, "step": 15780 }, { "epoch": 1.9204137511408579, "grad_norm": 1.9778844118118286, "learning_rate": 8.101908707658702e-08, "loss": 0.3944, "step": 15781 }, { "epoch": 1.920535442652875, "grad_norm": 1.9150971174240112, "learning_rate": 8.077146719718487e-08, "loss": 0.3514, "step": 15782 }, { "epoch": 1.920657134164892, "grad_norm": 1.8329989910125732, "learning_rate": 8.052422476495846e-08, "loss": 0.3841, "step": 15783 }, { "epoch": 1.920778825676909, "grad_norm": 1.71555757522583, "learning_rate": 8.027735978931917e-08, "loss": 0.339, "step": 15784 }, { "epoch": 1.920900517188926, "grad_norm": 1.8899199962615967, "learning_rate": 8.003087227965723e-08, "loss": 0.293, "step": 15785 }, { "epoch": 1.921022208700943, "grad_norm": 1.4862643480300903, "learning_rate": 7.978476224535514e-08, "loss": 0.3697, "step": 15786 }, { "epoch": 1.92114390021296, "grad_norm": 2.145812511444092, "learning_rate": 7.953902969577653e-08, "loss": 0.3748, "step": 15787 }, { "epoch": 1.921265591724977, "grad_norm": 1.9779895544052124, "learning_rate": 7.92936746402717e-08, "loss": 0.3314, "step": 15788 }, { "epoch": 1.9213872832369943, "grad_norm": 2.167039155960083, "learning_rate": 7.904869708817653e-08, "loss": 0.3366, "step": 15789 }, { "epoch": 1.9215089747490113, "grad_norm": 2.6096041202545166, "learning_rate": 7.880409704881464e-08, "loss": 0.3251, "step": 15790 }, { "epoch": 1.9216306662610283, "grad_norm": 1.6183769702911377, "learning_rate": 7.855987453149195e-08, "loss": 0.406, "step": 15791 }, { "epoch": 1.9217523577730453, "grad_norm": 1.6336644887924194, "learning_rate": 7.831602954550322e-08, "loss": 0.3797, "step": 15792 }, { "epoch": 1.9218740492850623, "grad_norm": 2.0201914310455322, "learning_rate": 7.807256210012659e-08, "loss": 0.3829, "step": 15793 }, { "epoch": 1.9219957407970794, "grad_norm": 2.7254021167755127, "learning_rate": 7.782947220462466e-08, "loss": 0.4408, "step": 15794 }, { "epoch": 1.9221174323090966, "grad_norm": 2.432457208633423, "learning_rate": 7.758675986825115e-08, "loss": 0.4338, "step": 15795 }, { "epoch": 1.9222391238211136, "grad_norm": 2.04353666305542, "learning_rate": 7.734442510023754e-08, "loss": 0.339, "step": 15796 }, { "epoch": 1.9223608153331306, "grad_norm": 1.6202785968780518, "learning_rate": 7.710246790980869e-08, "loss": 0.3562, "step": 15797 }, { "epoch": 1.9224825068451477, "grad_norm": 2.105527400970459, "learning_rate": 7.686088830616945e-08, "loss": 0.3797, "step": 15798 }, { "epoch": 1.9226041983571647, "grad_norm": 1.8749754428863525, "learning_rate": 7.661968629851357e-08, "loss": 0.3421, "step": 15799 }, { "epoch": 1.9227258898691817, "grad_norm": 2.8890326023101807, "learning_rate": 7.637886189601817e-08, "loss": 0.2965, "step": 15800 }, { "epoch": 1.9228475813811987, "grad_norm": 1.6345950365066528, "learning_rate": 7.613841510784925e-08, "loss": 0.3284, "step": 15801 }, { "epoch": 1.9229692728932157, "grad_norm": 2.3004817962646484, "learning_rate": 7.589834594315393e-08, "loss": 0.3615, "step": 15802 }, { "epoch": 1.9230909644052328, "grad_norm": 2.3893001079559326, "learning_rate": 7.565865441106823e-08, "loss": 0.3655, "step": 15803 }, { "epoch": 1.9232126559172498, "grad_norm": 2.915879249572754, "learning_rate": 7.541934052071375e-08, "loss": 0.4081, "step": 15804 }, { "epoch": 1.9233343474292668, "grad_norm": 3.374998092651367, "learning_rate": 7.518040428119433e-08, "loss": 0.3004, "step": 15805 }, { "epoch": 1.9234560389412838, "grad_norm": 1.637037992477417, "learning_rate": 7.494184570160601e-08, "loss": 0.338, "step": 15806 }, { "epoch": 1.9235777304533008, "grad_norm": 1.5632528066635132, "learning_rate": 7.470366479102265e-08, "loss": 0.3495, "step": 15807 }, { "epoch": 1.9236994219653178, "grad_norm": 4.725611209869385, "learning_rate": 7.446586155851032e-08, "loss": 0.4816, "step": 15808 }, { "epoch": 1.9238211134773349, "grad_norm": 2.030162811279297, "learning_rate": 7.422843601311624e-08, "loss": 0.4317, "step": 15809 }, { "epoch": 1.9239428049893519, "grad_norm": 1.5780426263809204, "learning_rate": 7.39913881638743e-08, "loss": 0.3599, "step": 15810 }, { "epoch": 1.924064496501369, "grad_norm": 1.7926127910614014, "learning_rate": 7.375471801980727e-08, "loss": 0.3422, "step": 15811 }, { "epoch": 1.924186188013386, "grad_norm": 1.820106029510498, "learning_rate": 7.351842558991906e-08, "loss": 0.3676, "step": 15812 }, { "epoch": 1.924307879525403, "grad_norm": 1.8103939294815063, "learning_rate": 7.328251088320027e-08, "loss": 0.393, "step": 15813 }, { "epoch": 1.9244295710374202, "grad_norm": 2.31059193611145, "learning_rate": 7.304697390863036e-08, "loss": 0.3972, "step": 15814 }, { "epoch": 1.9245512625494372, "grad_norm": 1.330661654472351, "learning_rate": 7.281181467517106e-08, "loss": 0.2745, "step": 15815 }, { "epoch": 1.9246729540614542, "grad_norm": 2.3426334857940674, "learning_rate": 7.257703319176968e-08, "loss": 0.3793, "step": 15816 }, { "epoch": 1.9247946455734712, "grad_norm": 1.8138560056686401, "learning_rate": 7.234262946736015e-08, "loss": 0.424, "step": 15817 }, { "epoch": 1.9249163370854883, "grad_norm": 1.8721749782562256, "learning_rate": 7.210860351086423e-08, "loss": 0.3555, "step": 15818 }, { "epoch": 1.9250380285975053, "grad_norm": 2.384075403213501, "learning_rate": 7.187495533118482e-08, "loss": 0.3465, "step": 15819 }, { "epoch": 1.9251597201095225, "grad_norm": 1.7856847047805786, "learning_rate": 7.164168493721368e-08, "loss": 0.3739, "step": 15820 }, { "epoch": 1.9252814116215395, "grad_norm": 2.844778299331665, "learning_rate": 7.140879233782594e-08, "loss": 0.3887, "step": 15821 }, { "epoch": 1.9254031031335566, "grad_norm": 1.6135432720184326, "learning_rate": 7.117627754188561e-08, "loss": 0.3408, "step": 15822 }, { "epoch": 1.9255247946455736, "grad_norm": 1.8190762996673584, "learning_rate": 7.094414055824006e-08, "loss": 0.3555, "step": 15823 }, { "epoch": 1.9256464861575906, "grad_norm": 2.879199504852295, "learning_rate": 7.071238139572001e-08, "loss": 0.4137, "step": 15824 }, { "epoch": 1.9257681776696076, "grad_norm": 2.9349617958068848, "learning_rate": 7.048100006314839e-08, "loss": 0.4067, "step": 15825 }, { "epoch": 1.9258898691816246, "grad_norm": 1.6504396200180054, "learning_rate": 7.024999656932597e-08, "loss": 0.3466, "step": 15826 }, { "epoch": 1.9260115606936417, "grad_norm": 2.51949143409729, "learning_rate": 7.001937092304568e-08, "loss": 0.4156, "step": 15827 }, { "epoch": 1.9261332522056587, "grad_norm": 1.7395340204238892, "learning_rate": 6.978912313308272e-08, "loss": 0.3635, "step": 15828 }, { "epoch": 1.9262549437176757, "grad_norm": 2.0017318725585938, "learning_rate": 6.955925320819678e-08, "loss": 0.391, "step": 15829 }, { "epoch": 1.9263766352296927, "grad_norm": 2.966348886489868, "learning_rate": 6.932976115713752e-08, "loss": 0.321, "step": 15830 }, { "epoch": 1.9264983267417097, "grad_norm": 1.6849247217178345, "learning_rate": 6.910064698863572e-08, "loss": 0.3536, "step": 15831 }, { "epoch": 1.9266200182537268, "grad_norm": 2.099560260772705, "learning_rate": 6.887191071140886e-08, "loss": 0.3786, "step": 15832 }, { "epoch": 1.9267417097657438, "grad_norm": 2.352947235107422, "learning_rate": 6.86435523341633e-08, "loss": 0.3312, "step": 15833 }, { "epoch": 1.9268634012777608, "grad_norm": 1.2084604501724243, "learning_rate": 6.841557186558767e-08, "loss": 0.3361, "step": 15834 }, { "epoch": 1.9269850927897778, "grad_norm": 1.5980653762817383, "learning_rate": 6.818796931435612e-08, "loss": 0.3373, "step": 15835 }, { "epoch": 1.9271067843017948, "grad_norm": 1.4440498352050781, "learning_rate": 6.796074468913061e-08, "loss": 0.3445, "step": 15836 }, { "epoch": 1.9272284758138118, "grad_norm": 1.4435983896255493, "learning_rate": 6.773389799855534e-08, "loss": 0.3818, "step": 15837 }, { "epoch": 1.9273501673258289, "grad_norm": 2.4774882793426514, "learning_rate": 6.750742925126563e-08, "loss": 0.3883, "step": 15838 }, { "epoch": 1.927471858837846, "grad_norm": 1.8974932432174683, "learning_rate": 6.728133845587681e-08, "loss": 0.415, "step": 15839 }, { "epoch": 1.9275935503498631, "grad_norm": 2.4979612827301025, "learning_rate": 6.70556256209931e-08, "loss": 0.4277, "step": 15840 }, { "epoch": 1.9277152418618801, "grad_norm": 2.7445714473724365, "learning_rate": 6.68302907552032e-08, "loss": 0.4246, "step": 15841 }, { "epoch": 1.9278369333738972, "grad_norm": 1.7106176614761353, "learning_rate": 6.660533386708023e-08, "loss": 0.4145, "step": 15842 }, { "epoch": 1.9279586248859142, "grad_norm": 1.5466960668563843, "learning_rate": 6.638075496518515e-08, "loss": 0.3284, "step": 15843 }, { "epoch": 1.9280803163979312, "grad_norm": 3.4106321334838867, "learning_rate": 6.615655405806442e-08, "loss": 0.3081, "step": 15844 }, { "epoch": 1.9282020079099484, "grad_norm": 1.6802036762237549, "learning_rate": 6.593273115424903e-08, "loss": 0.3772, "step": 15845 }, { "epoch": 1.9283236994219655, "grad_norm": 1.694337010383606, "learning_rate": 6.570928626225548e-08, "loss": 0.3815, "step": 15846 }, { "epoch": 1.9284453909339825, "grad_norm": 2.3030476570129395, "learning_rate": 6.548621939058585e-08, "loss": 0.4042, "step": 15847 }, { "epoch": 1.9285670824459995, "grad_norm": 2.331472158432007, "learning_rate": 6.526353054772894e-08, "loss": 0.4184, "step": 15848 }, { "epoch": 1.9286887739580165, "grad_norm": 2.278980016708374, "learning_rate": 6.504121974215904e-08, "loss": 0.4, "step": 15849 }, { "epoch": 1.9288104654700335, "grad_norm": 1.4729667901992798, "learning_rate": 6.481928698233497e-08, "loss": 0.3584, "step": 15850 }, { "epoch": 1.9289321569820506, "grad_norm": 3.1923413276672363, "learning_rate": 6.459773227670107e-08, "loss": 0.4412, "step": 15851 }, { "epoch": 1.9290538484940676, "grad_norm": 2.1268229484558105, "learning_rate": 6.437655563368838e-08, "loss": 0.3147, "step": 15852 }, { "epoch": 1.9291755400060846, "grad_norm": 1.7822880744934082, "learning_rate": 6.415575706171462e-08, "loss": 0.3684, "step": 15853 }, { "epoch": 1.9292972315181016, "grad_norm": 1.8047879934310913, "learning_rate": 6.393533656917972e-08, "loss": 0.3204, "step": 15854 }, { "epoch": 1.9294189230301186, "grad_norm": 1.5757339000701904, "learning_rate": 6.371529416447254e-08, "loss": 0.3826, "step": 15855 }, { "epoch": 1.9295406145421357, "grad_norm": 1.5452866554260254, "learning_rate": 6.349562985596413e-08, "loss": 0.3321, "step": 15856 }, { "epoch": 1.9296623060541527, "grad_norm": 2.2325963973999023, "learning_rate": 6.327634365201452e-08, "loss": 0.4216, "step": 15857 }, { "epoch": 1.9297839975661697, "grad_norm": 1.651720404624939, "learning_rate": 6.305743556096922e-08, "loss": 0.2962, "step": 15858 }, { "epoch": 1.9299056890781867, "grad_norm": 1.6414620876312256, "learning_rate": 6.28389055911549e-08, "loss": 0.4104, "step": 15859 }, { "epoch": 1.9300273805902037, "grad_norm": 2.2379367351531982, "learning_rate": 6.262075375089049e-08, "loss": 0.3987, "step": 15860 }, { "epoch": 1.9301490721022208, "grad_norm": 1.4453548192977905, "learning_rate": 6.240298004847489e-08, "loss": 0.3214, "step": 15861 }, { "epoch": 1.9302707636142378, "grad_norm": 1.6983914375305176, "learning_rate": 6.218558449219591e-08, "loss": 0.354, "step": 15862 }, { "epoch": 1.9303924551262548, "grad_norm": 1.6801940202713013, "learning_rate": 6.196856709032584e-08, "loss": 0.3805, "step": 15863 }, { "epoch": 1.930514146638272, "grad_norm": 2.0834500789642334, "learning_rate": 6.17519278511225e-08, "loss": 0.4061, "step": 15864 }, { "epoch": 1.930635838150289, "grad_norm": 2.9435620307922363, "learning_rate": 6.15356667828293e-08, "loss": 0.4177, "step": 15865 }, { "epoch": 1.930757529662306, "grad_norm": 2.922804117202759, "learning_rate": 6.131978389367522e-08, "loss": 0.2928, "step": 15866 }, { "epoch": 1.930879221174323, "grad_norm": 2.215226888656616, "learning_rate": 6.110427919187478e-08, "loss": 0.3764, "step": 15867 }, { "epoch": 1.93100091268634, "grad_norm": 2.2295496463775635, "learning_rate": 6.088915268562922e-08, "loss": 0.3259, "step": 15868 }, { "epoch": 1.9311226041983571, "grad_norm": 3.030305862426758, "learning_rate": 6.067440438312532e-08, "loss": 0.436, "step": 15869 }, { "epoch": 1.9312442957103741, "grad_norm": 2.6606290340423584, "learning_rate": 6.046003429253211e-08, "loss": 0.2812, "step": 15870 }, { "epoch": 1.9313659872223914, "grad_norm": 1.3542592525482178, "learning_rate": 6.02460424220086e-08, "loss": 0.3399, "step": 15871 }, { "epoch": 1.9314876787344084, "grad_norm": 1.776929497718811, "learning_rate": 6.003242877969828e-08, "loss": 0.3546, "step": 15872 }, { "epoch": 1.9316093702464254, "grad_norm": 3.7559821605682373, "learning_rate": 5.981919337372688e-08, "loss": 0.292, "step": 15873 }, { "epoch": 1.9317310617584424, "grad_norm": 1.4685168266296387, "learning_rate": 5.960633621221235e-08, "loss": 0.3764, "step": 15874 }, { "epoch": 1.9318527532704595, "grad_norm": 1.5051461458206177, "learning_rate": 5.939385730325042e-08, "loss": 0.3203, "step": 15875 }, { "epoch": 1.9319744447824765, "grad_norm": 1.6219699382781982, "learning_rate": 5.918175665492909e-08, "loss": 0.3176, "step": 15876 }, { "epoch": 1.9320961362944935, "grad_norm": 1.6281224489212036, "learning_rate": 5.897003427531855e-08, "loss": 0.3485, "step": 15877 }, { "epoch": 1.9322178278065105, "grad_norm": 2.3135859966278076, "learning_rate": 5.87586901724746e-08, "loss": 0.3808, "step": 15878 }, { "epoch": 1.9323395193185275, "grad_norm": 1.6280550956726074, "learning_rate": 5.8547724354439673e-08, "loss": 0.3935, "step": 15879 }, { "epoch": 1.9324612108305446, "grad_norm": 1.603858470916748, "learning_rate": 5.83371368292418e-08, "loss": 0.3458, "step": 15880 }, { "epoch": 1.9325829023425616, "grad_norm": 1.6695835590362549, "learning_rate": 5.8126927604893467e-08, "loss": 0.369, "step": 15881 }, { "epoch": 1.9327045938545786, "grad_norm": 1.996770977973938, "learning_rate": 5.791709668939383e-08, "loss": 0.3828, "step": 15882 }, { "epoch": 1.9328262853665956, "grad_norm": 1.8552868366241455, "learning_rate": 5.770764409072871e-08, "loss": 0.3891, "step": 15883 }, { "epoch": 1.9329479768786126, "grad_norm": 1.8187626600265503, "learning_rate": 5.749856981686619e-08, "loss": 0.3679, "step": 15884 }, { "epoch": 1.9330696683906297, "grad_norm": 2.197300672531128, "learning_rate": 5.7289873875763234e-08, "loss": 0.3222, "step": 15885 }, { "epoch": 1.9331913599026467, "grad_norm": 1.9732637405395508, "learning_rate": 5.708155627536127e-08, "loss": 0.398, "step": 15886 }, { "epoch": 1.9333130514146637, "grad_norm": 2.3674983978271484, "learning_rate": 5.687361702358618e-08, "loss": 0.3764, "step": 15887 }, { "epoch": 1.9334347429266807, "grad_norm": 3.2757568359375, "learning_rate": 5.6666056128351634e-08, "loss": 0.4391, "step": 15888 }, { "epoch": 1.9335564344386977, "grad_norm": 1.9373129606246948, "learning_rate": 5.6458873597554645e-08, "loss": 0.4269, "step": 15889 }, { "epoch": 1.933678125950715, "grad_norm": 2.645625352859497, "learning_rate": 5.6252069439080014e-08, "loss": 0.3673, "step": 15890 }, { "epoch": 1.933799817462732, "grad_norm": 1.5224021673202515, "learning_rate": 5.604564366079701e-08, "loss": 0.3461, "step": 15891 }, { "epoch": 1.933921508974749, "grad_norm": 1.5954240560531616, "learning_rate": 5.583959627055935e-08, "loss": 0.3779, "step": 15892 }, { "epoch": 1.934043200486766, "grad_norm": 2.3299670219421387, "learning_rate": 5.563392727620853e-08, "loss": 0.3916, "step": 15893 }, { "epoch": 1.934164891998783, "grad_norm": 2.9327049255371094, "learning_rate": 5.5428636685570524e-08, "loss": 0.4028, "step": 15894 }, { "epoch": 1.9342865835108, "grad_norm": 1.5220563411712646, "learning_rate": 5.522372450645686e-08, "loss": 0.3277, "step": 15895 }, { "epoch": 1.9344082750228173, "grad_norm": 2.57047438621521, "learning_rate": 5.501919074666462e-08, "loss": 0.3375, "step": 15896 }, { "epoch": 1.9345299665348343, "grad_norm": 2.0873324871063232, "learning_rate": 5.481503541397759e-08, "loss": 0.2955, "step": 15897 }, { "epoch": 1.9346516580468514, "grad_norm": 1.910020112991333, "learning_rate": 5.4611258516164e-08, "loss": 0.4043, "step": 15898 }, { "epoch": 1.9347733495588684, "grad_norm": 1.6588650941848755, "learning_rate": 5.440786006097876e-08, "loss": 0.3143, "step": 15899 }, { "epoch": 1.9348950410708854, "grad_norm": 2.2277395725250244, "learning_rate": 5.4204840056159e-08, "loss": 0.3514, "step": 15900 }, { "epoch": 1.9350167325829024, "grad_norm": 2.939955234527588, "learning_rate": 5.4002198509433004e-08, "loss": 0.391, "step": 15901 }, { "epoch": 1.9351384240949194, "grad_norm": 1.6179908514022827, "learning_rate": 5.379993542850903e-08, "loss": 0.3564, "step": 15902 }, { "epoch": 1.9352601156069364, "grad_norm": 2.887336492538452, "learning_rate": 5.359805082108649e-08, "loss": 0.3235, "step": 15903 }, { "epoch": 1.9353818071189535, "grad_norm": 1.9876681566238403, "learning_rate": 5.33965446948459e-08, "loss": 0.3223, "step": 15904 }, { "epoch": 1.9355034986309705, "grad_norm": 3.082040786743164, "learning_rate": 5.3195417057454455e-08, "loss": 0.3496, "step": 15905 }, { "epoch": 1.9356251901429875, "grad_norm": 2.421233654022217, "learning_rate": 5.299466791656604e-08, "loss": 0.3516, "step": 15906 }, { "epoch": 1.9357468816550045, "grad_norm": 2.028790235519409, "learning_rate": 5.279429727982011e-08, "loss": 0.2996, "step": 15907 }, { "epoch": 1.9358685731670215, "grad_norm": 1.9469631910324097, "learning_rate": 5.259430515484054e-08, "loss": 0.3539, "step": 15908 }, { "epoch": 1.9359902646790386, "grad_norm": 3.711719512939453, "learning_rate": 5.239469154923793e-08, "loss": 0.4552, "step": 15909 }, { "epoch": 1.9361119561910556, "grad_norm": 1.7252106666564941, "learning_rate": 5.219545647060731e-08, "loss": 0.389, "step": 15910 }, { "epoch": 1.9362336477030726, "grad_norm": 2.494966506958008, "learning_rate": 5.1996599926531496e-08, "loss": 0.3958, "step": 15911 }, { "epoch": 1.9363553392150896, "grad_norm": 1.5225744247436523, "learning_rate": 5.1798121924575565e-08, "loss": 0.3981, "step": 15912 }, { "epoch": 1.9364770307271066, "grad_norm": 1.4590448141098022, "learning_rate": 5.160002247229234e-08, "loss": 0.321, "step": 15913 }, { "epoch": 1.9365987222391237, "grad_norm": 2.6050050258636475, "learning_rate": 5.140230157722026e-08, "loss": 0.4119, "step": 15914 }, { "epoch": 1.936720413751141, "grad_norm": 2.958155870437622, "learning_rate": 5.120495924688329e-08, "loss": 0.3796, "step": 15915 }, { "epoch": 1.936842105263158, "grad_norm": 2.356903553009033, "learning_rate": 5.100799548879099e-08, "loss": 0.4146, "step": 15916 }, { "epoch": 1.936963796775175, "grad_norm": 2.283723831176758, "learning_rate": 5.0811410310437346e-08, "loss": 0.425, "step": 15917 }, { "epoch": 1.937085488287192, "grad_norm": 1.717651128768921, "learning_rate": 5.061520371930306e-08, "loss": 0.3578, "step": 15918 }, { "epoch": 1.937207179799209, "grad_norm": 2.4509005546569824, "learning_rate": 5.041937572285438e-08, "loss": 0.4109, "step": 15919 }, { "epoch": 1.937328871311226, "grad_norm": 2.104342460632324, "learning_rate": 5.022392632854311e-08, "loss": 0.3676, "step": 15920 }, { "epoch": 1.9374505628232432, "grad_norm": 1.9883087873458862, "learning_rate": 5.0028855543806654e-08, "loss": 0.3678, "step": 15921 }, { "epoch": 1.9375722543352603, "grad_norm": 1.5696316957473755, "learning_rate": 4.9834163376066836e-08, "loss": 0.3506, "step": 15922 }, { "epoch": 1.9376939458472773, "grad_norm": 2.4884350299835205, "learning_rate": 4.963984983273329e-08, "loss": 0.4102, "step": 15923 }, { "epoch": 1.9378156373592943, "grad_norm": 1.818729043006897, "learning_rate": 4.94459149212001e-08, "loss": 0.3828, "step": 15924 }, { "epoch": 1.9379373288713113, "grad_norm": 1.5971750020980835, "learning_rate": 4.925235864884581e-08, "loss": 0.3442, "step": 15925 }, { "epoch": 1.9380590203833283, "grad_norm": 1.9785457849502563, "learning_rate": 4.905918102303564e-08, "loss": 0.3104, "step": 15926 }, { "epoch": 1.9381807118953454, "grad_norm": 1.3062514066696167, "learning_rate": 4.886638205112149e-08, "loss": 0.3409, "step": 15927 }, { "epoch": 1.9383024034073624, "grad_norm": 1.5457227230072021, "learning_rate": 4.86739617404397e-08, "loss": 0.3599, "step": 15928 }, { "epoch": 1.9384240949193794, "grad_norm": 1.4135849475860596, "learning_rate": 4.8481920098311095e-08, "loss": 0.3431, "step": 15929 }, { "epoch": 1.9385457864313964, "grad_norm": 3.181957244873047, "learning_rate": 4.8290257132044274e-08, "loss": 0.345, "step": 15930 }, { "epoch": 1.9386674779434134, "grad_norm": 1.5733072757720947, "learning_rate": 4.809897284893117e-08, "loss": 0.3679, "step": 15931 }, { "epoch": 1.9387891694554305, "grad_norm": 1.9676730632781982, "learning_rate": 4.790806725625263e-08, "loss": 0.3481, "step": 15932 }, { "epoch": 1.9389108609674475, "grad_norm": 2.5600335597991943, "learning_rate": 4.7717540361271744e-08, "loss": 0.3413, "step": 15933 }, { "epoch": 1.9390325524794645, "grad_norm": 1.895156979560852, "learning_rate": 4.752739217123825e-08, "loss": 0.4411, "step": 15934 }, { "epoch": 1.9391542439914815, "grad_norm": 2.6482043266296387, "learning_rate": 4.7337622693387486e-08, "loss": 0.2969, "step": 15935 }, { "epoch": 1.9392759355034985, "grad_norm": 2.233037233352661, "learning_rate": 4.7148231934941445e-08, "loss": 0.3547, "step": 15936 }, { "epoch": 1.9393976270155155, "grad_norm": 1.7740463018417358, "learning_rate": 4.695921990310659e-08, "loss": 0.3599, "step": 15937 }, { "epoch": 1.9395193185275326, "grad_norm": 3.1780738830566406, "learning_rate": 4.6770586605074945e-08, "loss": 0.3708, "step": 15938 }, { "epoch": 1.9396410100395496, "grad_norm": 2.140378952026367, "learning_rate": 4.65823320480252e-08, "loss": 0.3065, "step": 15939 }, { "epoch": 1.9397627015515668, "grad_norm": 1.6263338327407837, "learning_rate": 4.6394456239119424e-08, "loss": 0.389, "step": 15940 }, { "epoch": 1.9398843930635838, "grad_norm": 2.4274227619171143, "learning_rate": 4.6206959185507437e-08, "loss": 0.3412, "step": 15941 }, { "epoch": 1.9400060845756009, "grad_norm": 1.710370421409607, "learning_rate": 4.6019840894323544e-08, "loss": 0.3445, "step": 15942 }, { "epoch": 1.9401277760876179, "grad_norm": 2.23415207862854, "learning_rate": 4.5833101372688706e-08, "loss": 0.3304, "step": 15943 }, { "epoch": 1.940249467599635, "grad_norm": 2.093322992324829, "learning_rate": 4.564674062770835e-08, "loss": 0.3868, "step": 15944 }, { "epoch": 1.940371159111652, "grad_norm": 1.8244776725769043, "learning_rate": 4.546075866647459e-08, "loss": 0.3683, "step": 15945 }, { "epoch": 1.9404928506236692, "grad_norm": 1.6568856239318848, "learning_rate": 4.5275155496062873e-08, "loss": 0.3578, "step": 15946 }, { "epoch": 1.9406145421356862, "grad_norm": 2.0331029891967773, "learning_rate": 4.508993112353754e-08, "loss": 0.4123, "step": 15947 }, { "epoch": 1.9407362336477032, "grad_norm": 1.6933448314666748, "learning_rate": 4.490508555594519e-08, "loss": 0.3644, "step": 15948 }, { "epoch": 1.9408579251597202, "grad_norm": 4.004017353057861, "learning_rate": 4.4720618800319085e-08, "loss": 0.428, "step": 15949 }, { "epoch": 1.9409796166717372, "grad_norm": 1.8626937866210938, "learning_rate": 4.453653086368137e-08, "loss": 0.3997, "step": 15950 }, { "epoch": 1.9411013081837543, "grad_norm": 1.7269723415374756, "learning_rate": 4.4352821753035345e-08, "loss": 0.3515, "step": 15951 }, { "epoch": 1.9412229996957713, "grad_norm": 3.0375661849975586, "learning_rate": 4.4169491475370975e-08, "loss": 0.3747, "step": 15952 }, { "epoch": 1.9413446912077883, "grad_norm": 1.7200435400009155, "learning_rate": 4.3986540037664896e-08, "loss": 0.3163, "step": 15953 }, { "epoch": 1.9414663827198053, "grad_norm": 1.8690217733383179, "learning_rate": 4.3803967446878204e-08, "loss": 0.3644, "step": 15954 }, { "epoch": 1.9415880742318223, "grad_norm": 2.563432216644287, "learning_rate": 4.362177370995979e-08, "loss": 0.3973, "step": 15955 }, { "epoch": 1.9417097657438394, "grad_norm": 2.1452534198760986, "learning_rate": 4.343995883384078e-08, "loss": 0.3255, "step": 15956 }, { "epoch": 1.9418314572558564, "grad_norm": 1.381900668144226, "learning_rate": 4.325852282544119e-08, "loss": 0.3546, "step": 15957 }, { "epoch": 1.9419531487678734, "grad_norm": 1.7779309749603271, "learning_rate": 4.3077465691663264e-08, "loss": 0.3581, "step": 15958 }, { "epoch": 1.9420748402798904, "grad_norm": 1.611080288887024, "learning_rate": 4.289678743939707e-08, "loss": 0.3374, "step": 15959 }, { "epoch": 1.9421965317919074, "grad_norm": 2.6402385234832764, "learning_rate": 4.2716488075519314e-08, "loss": 0.3906, "step": 15960 }, { "epoch": 1.9423182233039245, "grad_norm": 1.5428540706634521, "learning_rate": 4.253656760688896e-08, "loss": 0.3611, "step": 15961 }, { "epoch": 1.9424399148159415, "grad_norm": 1.8161102533340454, "learning_rate": 4.2357026040352744e-08, "loss": 0.3672, "step": 15962 }, { "epoch": 1.9425616063279585, "grad_norm": 2.851447105407715, "learning_rate": 4.21778633827441e-08, "loss": 0.3657, "step": 15963 }, { "epoch": 1.9426832978399755, "grad_norm": 2.0196964740753174, "learning_rate": 4.199907964087757e-08, "loss": 0.3698, "step": 15964 }, { "epoch": 1.9428049893519928, "grad_norm": 2.4098381996154785, "learning_rate": 4.1820674821558825e-08, "loss": 0.3131, "step": 15965 }, { "epoch": 1.9429266808640098, "grad_norm": 1.8653944730758667, "learning_rate": 4.164264893157577e-08, "loss": 0.3446, "step": 15966 }, { "epoch": 1.9430483723760268, "grad_norm": 2.68943190574646, "learning_rate": 4.146500197770298e-08, "loss": 0.4029, "step": 15967 }, { "epoch": 1.9431700638880438, "grad_norm": 2.2966091632843018, "learning_rate": 4.1287733966699495e-08, "loss": 0.394, "step": 15968 }, { "epoch": 1.9432917554000608, "grad_norm": 1.923325538635254, "learning_rate": 4.111084490531214e-08, "loss": 0.3354, "step": 15969 }, { "epoch": 1.9434134469120778, "grad_norm": 1.472470760345459, "learning_rate": 4.093433480026887e-08, "loss": 0.3755, "step": 15970 }, { "epoch": 1.9435351384240949, "grad_norm": 2.5652997493743896, "learning_rate": 4.075820365828986e-08, "loss": 0.4053, "step": 15971 }, { "epoch": 1.943656829936112, "grad_norm": 1.952427625656128, "learning_rate": 4.058245148607532e-08, "loss": 0.3667, "step": 15972 }, { "epoch": 1.9437785214481291, "grad_norm": 1.5961142778396606, "learning_rate": 4.040707829031321e-08, "loss": 0.3591, "step": 15973 }, { "epoch": 1.9439002129601461, "grad_norm": 1.9078994989395142, "learning_rate": 4.023208407767709e-08, "loss": 0.3099, "step": 15974 }, { "epoch": 1.9440219044721632, "grad_norm": 1.7736799716949463, "learning_rate": 4.005746885482609e-08, "loss": 0.4071, "step": 15975 }, { "epoch": 1.9441435959841802, "grad_norm": 2.0563454627990723, "learning_rate": 3.9883232628403766e-08, "loss": 0.3942, "step": 15976 }, { "epoch": 1.9442652874961972, "grad_norm": 1.8751639127731323, "learning_rate": 3.970937540504039e-08, "loss": 0.3518, "step": 15977 }, { "epoch": 1.9443869790082142, "grad_norm": 1.75934636592865, "learning_rate": 3.953589719135287e-08, "loss": 0.3584, "step": 15978 }, { "epoch": 1.9445086705202312, "grad_norm": 2.3915746212005615, "learning_rate": 3.936279799394149e-08, "loss": 0.3663, "step": 15979 }, { "epoch": 1.9446303620322483, "grad_norm": 1.4720486402511597, "learning_rate": 3.9190077819393214e-08, "loss": 0.3363, "step": 15980 }, { "epoch": 1.9447520535442653, "grad_norm": 1.9294682741165161, "learning_rate": 3.901773667427944e-08, "loss": 0.3896, "step": 15981 }, { "epoch": 1.9448737450562823, "grad_norm": 2.3747177124023438, "learning_rate": 3.884577456515826e-08, "loss": 0.4731, "step": 15982 }, { "epoch": 1.9449954365682993, "grad_norm": 1.736438274383545, "learning_rate": 3.867419149857554e-08, "loss": 0.3822, "step": 15983 }, { "epoch": 1.9451171280803163, "grad_norm": 2.718778371810913, "learning_rate": 3.8502987481057185e-08, "loss": 0.3277, "step": 15984 }, { "epoch": 1.9452388195923334, "grad_norm": 2.622084856033325, "learning_rate": 3.8332162519120196e-08, "loss": 0.394, "step": 15985 }, { "epoch": 1.9453605111043504, "grad_norm": 1.9948043823242188, "learning_rate": 3.816171661926382e-08, "loss": 0.3631, "step": 15986 }, { "epoch": 1.9454822026163674, "grad_norm": 1.9194804430007935, "learning_rate": 3.799164978797398e-08, "loss": 0.2973, "step": 15987 }, { "epoch": 1.9456038941283844, "grad_norm": 2.271669864654541, "learning_rate": 3.782196203172217e-08, "loss": 0.3971, "step": 15988 }, { "epoch": 1.9457255856404014, "grad_norm": 1.896245002746582, "learning_rate": 3.765265335696433e-08, "loss": 0.4208, "step": 15989 }, { "epoch": 1.9458472771524185, "grad_norm": 1.5037046670913696, "learning_rate": 3.7483723770145306e-08, "loss": 0.3632, "step": 15990 }, { "epoch": 1.9459689686644357, "grad_norm": 2.6111867427825928, "learning_rate": 3.7315173277691075e-08, "loss": 0.3129, "step": 15991 }, { "epoch": 1.9460906601764527, "grad_norm": 2.1137571334838867, "learning_rate": 3.71470018860165e-08, "loss": 0.3001, "step": 15992 }, { "epoch": 1.9462123516884697, "grad_norm": 1.4770915508270264, "learning_rate": 3.697920960151979e-08, "loss": 0.3151, "step": 15993 }, { "epoch": 1.9463340432004868, "grad_norm": 1.6807974576950073, "learning_rate": 3.6811796430588074e-08, "loss": 0.3619, "step": 15994 }, { "epoch": 1.9464557347125038, "grad_norm": 1.555757999420166, "learning_rate": 3.664476237958847e-08, "loss": 0.3426, "step": 15995 }, { "epoch": 1.9465774262245208, "grad_norm": 2.976696252822876, "learning_rate": 3.6478107454879234e-08, "loss": 0.4211, "step": 15996 }, { "epoch": 1.946699117736538, "grad_norm": 1.7319098711013794, "learning_rate": 3.631183166280194e-08, "loss": 0.3544, "step": 15997 }, { "epoch": 1.946820809248555, "grad_norm": 1.7513951063156128, "learning_rate": 3.6145935009681554e-08, "loss": 0.3818, "step": 15998 }, { "epoch": 1.946942500760572, "grad_norm": 2.1964359283447266, "learning_rate": 3.598041750183412e-08, "loss": 0.3394, "step": 15999 }, { "epoch": 1.947064192272589, "grad_norm": 1.5427451133728027, "learning_rate": 3.5815279145555716e-08, "loss": 0.3621, "step": 16000 }, { "epoch": 1.947185883784606, "grad_norm": 1.4409089088439941, "learning_rate": 3.56505199471302e-08, "loss": 0.3287, "step": 16001 }, { "epoch": 1.9473075752966231, "grad_norm": 1.894395112991333, "learning_rate": 3.548613991282812e-08, "loss": 0.4158, "step": 16002 }, { "epoch": 1.9474292668086401, "grad_norm": 2.9766595363616943, "learning_rate": 3.532213904890336e-08, "loss": 0.3318, "step": 16003 }, { "epoch": 1.9475509583206572, "grad_norm": 2.333193778991699, "learning_rate": 3.515851736159648e-08, "loss": 0.3275, "step": 16004 }, { "epoch": 1.9476726498326742, "grad_norm": 1.5578022003173828, "learning_rate": 3.499527485713583e-08, "loss": 0.3629, "step": 16005 }, { "epoch": 1.9477943413446912, "grad_norm": 1.478871464729309, "learning_rate": 3.483241154172978e-08, "loss": 0.3571, "step": 16006 }, { "epoch": 1.9479160328567082, "grad_norm": 1.7768558263778687, "learning_rate": 3.466992742157782e-08, "loss": 0.3865, "step": 16007 }, { "epoch": 1.9480377243687252, "grad_norm": 4.546865463256836, "learning_rate": 3.4507822502861666e-08, "loss": 0.4083, "step": 16008 }, { "epoch": 1.9481594158807423, "grad_norm": 1.4959570169448853, "learning_rate": 3.4346096791750825e-08, "loss": 0.3811, "step": 16009 }, { "epoch": 1.9482811073927593, "grad_norm": 1.3378942012786865, "learning_rate": 3.418475029439927e-08, "loss": 0.3596, "step": 16010 }, { "epoch": 1.9484027989047763, "grad_norm": 1.8297083377838135, "learning_rate": 3.402378301694431e-08, "loss": 0.3718, "step": 16011 }, { "epoch": 1.9485244904167933, "grad_norm": 2.106321334838867, "learning_rate": 3.386319496551438e-08, "loss": 0.4147, "step": 16012 }, { "epoch": 1.9486461819288103, "grad_norm": 2.4800286293029785, "learning_rate": 3.370298614621903e-08, "loss": 0.3483, "step": 16013 }, { "epoch": 1.9487678734408274, "grad_norm": 1.6255266666412354, "learning_rate": 3.354315656515339e-08, "loss": 0.3275, "step": 16014 }, { "epoch": 1.9488895649528444, "grad_norm": 1.6460814476013184, "learning_rate": 3.338370622840037e-08, "loss": 0.3962, "step": 16015 }, { "epoch": 1.9490112564648616, "grad_norm": 2.4279298782348633, "learning_rate": 3.322463514202623e-08, "loss": 0.3808, "step": 16016 }, { "epoch": 1.9491329479768786, "grad_norm": 2.511500358581543, "learning_rate": 3.306594331208501e-08, "loss": 0.3024, "step": 16017 }, { "epoch": 1.9492546394888957, "grad_norm": 2.590264320373535, "learning_rate": 3.2907630744616335e-08, "loss": 0.4136, "step": 16018 }, { "epoch": 1.9493763310009127, "grad_norm": 2.1603734493255615, "learning_rate": 3.274969744564205e-08, "loss": 0.4408, "step": 16019 }, { "epoch": 1.9494980225129297, "grad_norm": 1.8866347074508667, "learning_rate": 3.259214342117289e-08, "loss": 0.3826, "step": 16020 }, { "epoch": 1.9496197140249467, "grad_norm": 1.7558435201644897, "learning_rate": 3.243496867720408e-08, "loss": 0.4097, "step": 16021 }, { "epoch": 1.949741405536964, "grad_norm": 2.186951160430908, "learning_rate": 3.227817321971638e-08, "loss": 0.3438, "step": 16022 }, { "epoch": 1.949863097048981, "grad_norm": 1.4934724569320679, "learning_rate": 3.212175705467613e-08, "loss": 0.3182, "step": 16023 }, { "epoch": 1.949984788560998, "grad_norm": 2.3411896228790283, "learning_rate": 3.1965720188036345e-08, "loss": 0.3446, "step": 16024 }, { "epoch": 1.950106480073015, "grad_norm": 1.7738183736801147, "learning_rate": 3.1810062625732274e-08, "loss": 0.3253, "step": 16025 }, { "epoch": 1.950228171585032, "grad_norm": 1.5678980350494385, "learning_rate": 3.1654784373690295e-08, "loss": 0.3764, "step": 16026 }, { "epoch": 1.950349863097049, "grad_norm": 2.606781482696533, "learning_rate": 3.149988543781457e-08, "loss": 0.4517, "step": 16027 }, { "epoch": 1.950471554609066, "grad_norm": 2.046215772628784, "learning_rate": 3.134536582400372e-08, "loss": 0.3378, "step": 16028 }, { "epoch": 1.950593246121083, "grad_norm": 1.9816588163375854, "learning_rate": 3.119122553813525e-08, "loss": 0.4039, "step": 16029 }, { "epoch": 1.9507149376331, "grad_norm": 2.3498826026916504, "learning_rate": 3.103746458607448e-08, "loss": 0.4089, "step": 16030 }, { "epoch": 1.9508366291451171, "grad_norm": 1.5352933406829834, "learning_rate": 3.08840829736734e-08, "loss": 0.3538, "step": 16031 }, { "epoch": 1.9509583206571341, "grad_norm": 2.039468288421631, "learning_rate": 3.073108070676733e-08, "loss": 0.3778, "step": 16032 }, { "epoch": 1.9510800121691512, "grad_norm": 2.404261350631714, "learning_rate": 3.0578457791179404e-08, "loss": 0.3906, "step": 16033 }, { "epoch": 1.9512017036811682, "grad_norm": 1.7531213760375977, "learning_rate": 3.042621423271608e-08, "loss": 0.3398, "step": 16034 }, { "epoch": 1.9513233951931852, "grad_norm": 1.822922706604004, "learning_rate": 3.027435003717161e-08, "loss": 0.3521, "step": 16035 }, { "epoch": 1.9514450867052022, "grad_norm": 2.2279117107391357, "learning_rate": 3.0122865210324704e-08, "loss": 0.4432, "step": 16036 }, { "epoch": 1.9515667782172192, "grad_norm": 2.238145112991333, "learning_rate": 2.9971759757939644e-08, "loss": 0.3728, "step": 16037 }, { "epoch": 1.9516884697292363, "grad_norm": 1.7669050693511963, "learning_rate": 2.9821033685764055e-08, "loss": 0.429, "step": 16038 }, { "epoch": 1.9518101612412533, "grad_norm": 1.7786117792129517, "learning_rate": 2.967068699953668e-08, "loss": 0.3257, "step": 16039 }, { "epoch": 1.9519318527532703, "grad_norm": 1.7841204404830933, "learning_rate": 2.9520719704977386e-08, "loss": 0.3802, "step": 16040 }, { "epoch": 1.9520535442652875, "grad_norm": 2.8488128185272217, "learning_rate": 2.937113180779161e-08, "loss": 0.3097, "step": 16041 }, { "epoch": 1.9521752357773046, "grad_norm": 1.5512897968292236, "learning_rate": 2.9221923313672573e-08, "loss": 0.3635, "step": 16042 }, { "epoch": 1.9522969272893216, "grad_norm": 1.670295000076294, "learning_rate": 2.907309422829796e-08, "loss": 0.3826, "step": 16043 }, { "epoch": 1.9524186188013386, "grad_norm": 1.8494594097137451, "learning_rate": 2.8924644557331015e-08, "loss": 0.3266, "step": 16044 }, { "epoch": 1.9525403103133556, "grad_norm": 2.493541955947876, "learning_rate": 2.8776574306419446e-08, "loss": 0.3569, "step": 16045 }, { "epoch": 1.9526620018253726, "grad_norm": 1.8433974981307983, "learning_rate": 2.8628883481198743e-08, "loss": 0.4145, "step": 16046 }, { "epoch": 1.9527836933373899, "grad_norm": 1.6594692468643188, "learning_rate": 2.8481572087288855e-08, "loss": 0.3873, "step": 16047 }, { "epoch": 1.952905384849407, "grad_norm": 1.4882068634033203, "learning_rate": 2.83346401302953e-08, "loss": 0.3582, "step": 16048 }, { "epoch": 1.953027076361424, "grad_norm": 2.0764756202697754, "learning_rate": 2.8188087615809157e-08, "loss": 0.3712, "step": 16049 }, { "epoch": 1.953148767873441, "grad_norm": 2.594630002975464, "learning_rate": 2.8041914549405967e-08, "loss": 0.4236, "step": 16050 }, { "epoch": 1.953270459385458, "grad_norm": 2.0064070224761963, "learning_rate": 2.7896120936650174e-08, "loss": 0.4179, "step": 16051 }, { "epoch": 1.953392150897475, "grad_norm": 2.3302161693573, "learning_rate": 2.7750706783086222e-08, "loss": 0.3522, "step": 16052 }, { "epoch": 1.953513842409492, "grad_norm": 1.6610610485076904, "learning_rate": 2.7605672094250802e-08, "loss": 0.3613, "step": 16053 }, { "epoch": 1.953635533921509, "grad_norm": 5.792760848999023, "learning_rate": 2.7461016875660608e-08, "loss": 0.5273, "step": 16054 }, { "epoch": 1.953757225433526, "grad_norm": 1.9264659881591797, "learning_rate": 2.7316741132820122e-08, "loss": 0.3776, "step": 16055 }, { "epoch": 1.953878916945543, "grad_norm": 1.7694602012634277, "learning_rate": 2.7172844871221625e-08, "loss": 0.3428, "step": 16056 }, { "epoch": 1.95400060845756, "grad_norm": 2.1041338443756104, "learning_rate": 2.7029328096337403e-08, "loss": 0.3819, "step": 16057 }, { "epoch": 1.954122299969577, "grad_norm": 3.1114678382873535, "learning_rate": 2.6886190813630862e-08, "loss": 0.3021, "step": 16058 }, { "epoch": 1.9542439914815941, "grad_norm": 2.371156692504883, "learning_rate": 2.674343302854765e-08, "loss": 0.4205, "step": 16059 }, { "epoch": 1.9543656829936111, "grad_norm": 2.124628782272339, "learning_rate": 2.6601054746521194e-08, "loss": 0.4336, "step": 16060 }, { "epoch": 1.9544873745056281, "grad_norm": 2.71779203414917, "learning_rate": 2.6459055972967162e-08, "loss": 0.3969, "step": 16061 }, { "epoch": 1.9546090660176452, "grad_norm": 1.8790838718414307, "learning_rate": 2.6317436713291233e-08, "loss": 0.3227, "step": 16062 }, { "epoch": 1.9547307575296622, "grad_norm": 1.962717056274414, "learning_rate": 2.6176196972880208e-08, "loss": 0.4185, "step": 16063 }, { "epoch": 1.9548524490416792, "grad_norm": 2.240490198135376, "learning_rate": 2.6035336757110896e-08, "loss": 0.3573, "step": 16064 }, { "epoch": 1.9549741405536962, "grad_norm": 2.883488178253174, "learning_rate": 2.5894856071341236e-08, "loss": 0.3443, "step": 16065 }, { "epoch": 1.9550958320657135, "grad_norm": 1.692785620689392, "learning_rate": 2.5754754920916947e-08, "loss": 0.3398, "step": 16066 }, { "epoch": 1.9552175235777305, "grad_norm": 2.052774667739868, "learning_rate": 2.5615033311170433e-08, "loss": 0.3912, "step": 16067 }, { "epoch": 1.9553392150897475, "grad_norm": 1.7729231119155884, "learning_rate": 2.5475691247417444e-08, "loss": 0.389, "step": 16068 }, { "epoch": 1.9554609066017645, "grad_norm": 4.135091304779053, "learning_rate": 2.5336728734960403e-08, "loss": 0.4294, "step": 16069 }, { "epoch": 1.9555825981137815, "grad_norm": 1.6622675657272339, "learning_rate": 2.5198145779088413e-08, "loss": 0.3551, "step": 16070 }, { "epoch": 1.9557042896257986, "grad_norm": 1.3635361194610596, "learning_rate": 2.505994238507281e-08, "loss": 0.3084, "step": 16071 }, { "epoch": 1.9558259811378156, "grad_norm": 1.7262868881225586, "learning_rate": 2.4922118558173835e-08, "loss": 0.3765, "step": 16072 }, { "epoch": 1.9559476726498328, "grad_norm": 2.049860954284668, "learning_rate": 2.478467430363507e-08, "loss": 0.4249, "step": 16073 }, { "epoch": 1.9560693641618498, "grad_norm": 1.459931492805481, "learning_rate": 2.4647609626686774e-08, "loss": 0.3565, "step": 16074 }, { "epoch": 1.9561910556738669, "grad_norm": 2.6633172035217285, "learning_rate": 2.4510924532544777e-08, "loss": 0.3888, "step": 16075 }, { "epoch": 1.9563127471858839, "grad_norm": 2.477372884750366, "learning_rate": 2.4374619026410473e-08, "loss": 0.3738, "step": 16076 }, { "epoch": 1.956434438697901, "grad_norm": 2.220189094543457, "learning_rate": 2.4238693113470824e-08, "loss": 0.3724, "step": 16077 }, { "epoch": 1.956556130209918, "grad_norm": 2.200385093688965, "learning_rate": 2.4103146798897247e-08, "loss": 0.3123, "step": 16078 }, { "epoch": 1.956677821721935, "grad_norm": 2.3237109184265137, "learning_rate": 2.3967980087847842e-08, "loss": 0.3445, "step": 16079 }, { "epoch": 1.956799513233952, "grad_norm": 2.357635498046875, "learning_rate": 2.383319298546627e-08, "loss": 0.3084, "step": 16080 }, { "epoch": 1.956921204745969, "grad_norm": 1.877802848815918, "learning_rate": 2.369878549688065e-08, "loss": 0.3306, "step": 16081 }, { "epoch": 1.957042896257986, "grad_norm": 1.9022371768951416, "learning_rate": 2.356475762720578e-08, "loss": 0.363, "step": 16082 }, { "epoch": 1.957164587770003, "grad_norm": 1.8036013841629028, "learning_rate": 2.3431109381543137e-08, "loss": 0.3516, "step": 16083 }, { "epoch": 1.95728627928202, "grad_norm": 1.7448277473449707, "learning_rate": 2.3297840764976432e-08, "loss": 0.3648, "step": 16084 }, { "epoch": 1.957407970794037, "grad_norm": 2.354055643081665, "learning_rate": 2.316495178257827e-08, "loss": 0.3157, "step": 16085 }, { "epoch": 1.957529662306054, "grad_norm": 1.772237777709961, "learning_rate": 2.3032442439403502e-08, "loss": 0.3831, "step": 16086 }, { "epoch": 1.957651353818071, "grad_norm": 1.6501935720443726, "learning_rate": 2.2900312740495866e-08, "loss": 0.3756, "step": 16087 }, { "epoch": 1.9577730453300881, "grad_norm": 2.007291078567505, "learning_rate": 2.2768562690883568e-08, "loss": 0.3488, "step": 16088 }, { "epoch": 1.9578947368421051, "grad_norm": 1.6944053173065186, "learning_rate": 2.2637192295578147e-08, "loss": 0.3762, "step": 16089 }, { "epoch": 1.9580164283541222, "grad_norm": 1.4936788082122803, "learning_rate": 2.2506201559580056e-08, "loss": 0.3153, "step": 16090 }, { "epoch": 1.9581381198661392, "grad_norm": 1.573096752166748, "learning_rate": 2.2375590487873077e-08, "loss": 0.377, "step": 16091 }, { "epoch": 1.9582598113781564, "grad_norm": 2.0307915210723877, "learning_rate": 2.2245359085427687e-08, "loss": 0.3704, "step": 16092 }, { "epoch": 1.9583815028901734, "grad_norm": 1.6768741607666016, "learning_rate": 2.2115507357198806e-08, "loss": 0.3644, "step": 16093 }, { "epoch": 1.9585031944021905, "grad_norm": 2.0135579109191895, "learning_rate": 2.198603530812915e-08, "loss": 0.396, "step": 16094 }, { "epoch": 1.9586248859142075, "grad_norm": 1.7168631553649902, "learning_rate": 2.1856942943142556e-08, "loss": 0.3222, "step": 16095 }, { "epoch": 1.9587465774262245, "grad_norm": 1.6933259963989258, "learning_rate": 2.172823026715398e-08, "loss": 0.405, "step": 16096 }, { "epoch": 1.9588682689382415, "grad_norm": 3.757620096206665, "learning_rate": 2.1599897285059514e-08, "loss": 0.406, "step": 16097 }, { "epoch": 1.9589899604502587, "grad_norm": 2.306986093521118, "learning_rate": 2.1471944001744128e-08, "loss": 0.4266, "step": 16098 }, { "epoch": 1.9591116519622758, "grad_norm": 1.6254311800003052, "learning_rate": 2.1344370422075043e-08, "loss": 0.373, "step": 16099 }, { "epoch": 1.9592333434742928, "grad_norm": 1.3391528129577637, "learning_rate": 2.121717655090727e-08, "loss": 0.3522, "step": 16100 }, { "epoch": 1.9593550349863098, "grad_norm": 1.8421125411987305, "learning_rate": 2.1090362393080266e-08, "loss": 0.3164, "step": 16101 }, { "epoch": 1.9594767264983268, "grad_norm": 1.8858824968338013, "learning_rate": 2.0963927953421283e-08, "loss": 0.3655, "step": 16102 }, { "epoch": 1.9595984180103438, "grad_norm": 1.868971824645996, "learning_rate": 2.0837873236739803e-08, "loss": 0.3726, "step": 16103 }, { "epoch": 1.9597201095223609, "grad_norm": 1.9227591753005981, "learning_rate": 2.0712198247831994e-08, "loss": 0.3896, "step": 16104 }, { "epoch": 1.9598418010343779, "grad_norm": 2.1970951557159424, "learning_rate": 2.0586902991482915e-08, "loss": 0.3871, "step": 16105 }, { "epoch": 1.959963492546395, "grad_norm": 1.4184823036193848, "learning_rate": 2.0461987472457644e-08, "loss": 0.3336, "step": 16106 }, { "epoch": 1.960085184058412, "grad_norm": 1.4658305644989014, "learning_rate": 2.033745169551016e-08, "loss": 0.3737, "step": 16107 }, { "epoch": 1.960206875570429, "grad_norm": 2.533212900161743, "learning_rate": 2.0213295665378885e-08, "loss": 0.2941, "step": 16108 }, { "epoch": 1.960328567082446, "grad_norm": 1.6192365884780884, "learning_rate": 2.0089519386788935e-08, "loss": 0.3499, "step": 16109 }, { "epoch": 1.960450258594463, "grad_norm": 1.6580541133880615, "learning_rate": 1.9966122864450986e-08, "loss": 0.3724, "step": 16110 }, { "epoch": 1.96057195010648, "grad_norm": 1.4288220405578613, "learning_rate": 1.9843106103059063e-08, "loss": 0.3662, "step": 16111 }, { "epoch": 1.960693641618497, "grad_norm": 1.8832440376281738, "learning_rate": 1.972046910729386e-08, "loss": 0.4455, "step": 16112 }, { "epoch": 1.960815333130514, "grad_norm": 1.7494819164276123, "learning_rate": 1.9598211881823868e-08, "loss": 0.3949, "step": 16113 }, { "epoch": 1.960937024642531, "grad_norm": 1.9469034671783447, "learning_rate": 1.9476334431299815e-08, "loss": 0.3741, "step": 16114 }, { "epoch": 1.961058716154548, "grad_norm": 1.8368041515350342, "learning_rate": 1.9354836760360206e-08, "loss": 0.4006, "step": 16115 }, { "epoch": 1.961180407666565, "grad_norm": 1.9660885334014893, "learning_rate": 1.9233718873628016e-08, "loss": 0.3155, "step": 16116 }, { "epoch": 1.9613020991785823, "grad_norm": 2.2395100593566895, "learning_rate": 1.9112980775711775e-08, "loss": 0.4024, "step": 16117 }, { "epoch": 1.9614237906905994, "grad_norm": 2.684568405151367, "learning_rate": 1.8992622471205592e-08, "loss": 0.4188, "step": 16118 }, { "epoch": 1.9615454822026164, "grad_norm": 2.0264804363250732, "learning_rate": 1.887264396468913e-08, "loss": 0.3289, "step": 16119 }, { "epoch": 1.9616671737146334, "grad_norm": 1.872266411781311, "learning_rate": 1.8753045260728742e-08, "loss": 0.3763, "step": 16120 }, { "epoch": 1.9617888652266504, "grad_norm": 2.1223602294921875, "learning_rate": 1.863382636387523e-08, "loss": 0.3701, "step": 16121 }, { "epoch": 1.9619105567386674, "grad_norm": 1.5758085250854492, "learning_rate": 1.8514987278664966e-08, "loss": 0.381, "step": 16122 }, { "epoch": 1.9620322482506847, "grad_norm": 1.8471357822418213, "learning_rate": 1.8396528009619887e-08, "loss": 0.3878, "step": 16123 }, { "epoch": 1.9621539397627017, "grad_norm": 2.659285545349121, "learning_rate": 1.8278448561247496e-08, "loss": 0.3419, "step": 16124 }, { "epoch": 1.9622756312747187, "grad_norm": 1.811944603919983, "learning_rate": 1.8160748938041984e-08, "loss": 0.3854, "step": 16125 }, { "epoch": 1.9623973227867357, "grad_norm": 2.0797219276428223, "learning_rate": 1.8043429144480874e-08, "loss": 0.3321, "step": 16126 }, { "epoch": 1.9625190142987528, "grad_norm": 1.619655966758728, "learning_rate": 1.7926489185028373e-08, "loss": 0.3394, "step": 16127 }, { "epoch": 1.9626407058107698, "grad_norm": 1.9596425294876099, "learning_rate": 1.780992906413537e-08, "loss": 0.3439, "step": 16128 }, { "epoch": 1.9627623973227868, "grad_norm": 1.8175820112228394, "learning_rate": 1.7693748786236087e-08, "loss": 0.3718, "step": 16129 }, { "epoch": 1.9628840888348038, "grad_norm": 1.6436271667480469, "learning_rate": 1.757794835575144e-08, "loss": 0.3715, "step": 16130 }, { "epoch": 1.9630057803468208, "grad_norm": 2.266298532485962, "learning_rate": 1.746252777708901e-08, "loss": 0.3947, "step": 16131 }, { "epoch": 1.9631274718588378, "grad_norm": 1.6623202562332153, "learning_rate": 1.7347487054639732e-08, "loss": 0.3988, "step": 16132 }, { "epoch": 1.9632491633708549, "grad_norm": 1.9400012493133545, "learning_rate": 1.7232826192782327e-08, "loss": 0.4185, "step": 16133 }, { "epoch": 1.9633708548828719, "grad_norm": 2.1501822471618652, "learning_rate": 1.7118545195877745e-08, "loss": 0.3492, "step": 16134 }, { "epoch": 1.963492546394889, "grad_norm": 2.000150203704834, "learning_rate": 1.7004644068276955e-08, "loss": 0.3468, "step": 16135 }, { "epoch": 1.963614237906906, "grad_norm": 2.302781820297241, "learning_rate": 1.6891122814313154e-08, "loss": 0.3665, "step": 16136 }, { "epoch": 1.963735929418923, "grad_norm": 1.699919581413269, "learning_rate": 1.6777981438305113e-08, "loss": 0.3105, "step": 16137 }, { "epoch": 1.96385762093094, "grad_norm": 1.5578714609146118, "learning_rate": 1.6665219944560497e-08, "loss": 0.3423, "step": 16138 }, { "epoch": 1.963979312442957, "grad_norm": 1.8221158981323242, "learning_rate": 1.6552838337366983e-08, "loss": 0.3813, "step": 16139 }, { "epoch": 1.964101003954974, "grad_norm": 1.7642605304718018, "learning_rate": 1.6440836621003375e-08, "loss": 0.3716, "step": 16140 }, { "epoch": 1.964222695466991, "grad_norm": 1.5965232849121094, "learning_rate": 1.6329214799731817e-08, "loss": 0.3911, "step": 16141 }, { "epoch": 1.9643443869790083, "grad_norm": 1.5525596141815186, "learning_rate": 1.6217972877796695e-08, "loss": 0.333, "step": 16142 }, { "epoch": 1.9644660784910253, "grad_norm": 1.4319900274276733, "learning_rate": 1.6107110859434617e-08, "loss": 0.3741, "step": 16143 }, { "epoch": 1.9645877700030423, "grad_norm": 2.2056970596313477, "learning_rate": 1.59966287488611e-08, "loss": 0.3785, "step": 16144 }, { "epoch": 1.9647094615150593, "grad_norm": 1.7470972537994385, "learning_rate": 1.5886526550282776e-08, "loss": 0.3641, "step": 16145 }, { "epoch": 1.9648311530270763, "grad_norm": 2.975201368331909, "learning_rate": 1.5776804267887414e-08, "loss": 0.3732, "step": 16146 }, { "epoch": 1.9649528445390934, "grad_norm": 1.6766966581344604, "learning_rate": 1.5667461905850556e-08, "loss": 0.334, "step": 16147 }, { "epoch": 1.9650745360511106, "grad_norm": 1.821400761604309, "learning_rate": 1.5558499468334432e-08, "loss": 0.4138, "step": 16148 }, { "epoch": 1.9651962275631276, "grad_norm": 1.8996427059173584, "learning_rate": 1.5449916959483502e-08, "loss": 0.4095, "step": 16149 }, { "epoch": 1.9653179190751446, "grad_norm": 2.5826408863067627, "learning_rate": 1.5341714383430018e-08, "loss": 0.3497, "step": 16150 }, { "epoch": 1.9654396105871617, "grad_norm": 1.8483394384384155, "learning_rate": 1.5233891744290684e-08, "loss": 0.35, "step": 16151 }, { "epoch": 1.9655613020991787, "grad_norm": 1.6593778133392334, "learning_rate": 1.512644904617e-08, "loss": 0.4012, "step": 16152 }, { "epoch": 1.9656829936111957, "grad_norm": 1.8529874086380005, "learning_rate": 1.50193862931558e-08, "loss": 0.3777, "step": 16153 }, { "epoch": 1.9658046851232127, "grad_norm": 1.5400457382202148, "learning_rate": 1.49127034893215e-08, "loss": 0.3583, "step": 16154 }, { "epoch": 1.9659263766352297, "grad_norm": 2.7630953788757324, "learning_rate": 1.480640063872607e-08, "loss": 0.3829, "step": 16155 }, { "epoch": 1.9660480681472468, "grad_norm": 4.0489068031311035, "learning_rate": 1.4700477745416275e-08, "loss": 0.4039, "step": 16156 }, { "epoch": 1.9661697596592638, "grad_norm": 1.8888777494430542, "learning_rate": 1.4594934813421113e-08, "loss": 0.4238, "step": 16157 }, { "epoch": 1.9662914511712808, "grad_norm": 1.5959055423736572, "learning_rate": 1.4489771846757373e-08, "loss": 0.352, "step": 16158 }, { "epoch": 1.9664131426832978, "grad_norm": 1.896551251411438, "learning_rate": 1.4384988849426296e-08, "loss": 0.4144, "step": 16159 }, { "epoch": 1.9665348341953148, "grad_norm": 1.9961283206939697, "learning_rate": 1.4280585825415805e-08, "loss": 0.3635, "step": 16160 }, { "epoch": 1.9666565257073318, "grad_norm": 2.557892322540283, "learning_rate": 1.4176562778698278e-08, "loss": 0.3433, "step": 16161 }, { "epoch": 1.9667782172193489, "grad_norm": 2.7010657787323, "learning_rate": 1.407291971323277e-08, "loss": 0.3392, "step": 16162 }, { "epoch": 1.9668999087313659, "grad_norm": 1.998677372932434, "learning_rate": 1.3969656632961682e-08, "loss": 0.3896, "step": 16163 }, { "epoch": 1.967021600243383, "grad_norm": 1.8208341598510742, "learning_rate": 1.3866773541815204e-08, "loss": 0.3376, "step": 16164 }, { "epoch": 1.9671432917554, "grad_norm": 3.2257418632507324, "learning_rate": 1.3764270443709094e-08, "loss": 0.3229, "step": 16165 }, { "epoch": 1.967264983267417, "grad_norm": 1.6839380264282227, "learning_rate": 1.3662147342541344e-08, "loss": 0.3679, "step": 16166 }, { "epoch": 1.9673866747794342, "grad_norm": 2.1701269149780273, "learning_rate": 1.3560404242199954e-08, "loss": 0.3186, "step": 16167 }, { "epoch": 1.9675083662914512, "grad_norm": 2.662816286087036, "learning_rate": 1.3459041146556272e-08, "loss": 0.3982, "step": 16168 }, { "epoch": 1.9676300578034682, "grad_norm": 1.735885500907898, "learning_rate": 1.3358058059467217e-08, "loss": 0.373, "step": 16169 }, { "epoch": 1.9677517493154852, "grad_norm": 2.0652430057525635, "learning_rate": 1.3257454984775265e-08, "loss": 0.3565, "step": 16170 }, { "epoch": 1.9678734408275023, "grad_norm": 3.1041510105133057, "learning_rate": 1.3157231926308467e-08, "loss": 0.3567, "step": 16171 }, { "epoch": 1.9679951323395193, "grad_norm": 2.1822738647460938, "learning_rate": 1.305738888788044e-08, "loss": 0.3221, "step": 16172 }, { "epoch": 1.9681168238515363, "grad_norm": 3.687045097351074, "learning_rate": 1.2957925873290367e-08, "loss": 0.3152, "step": 16173 }, { "epoch": 1.9682385153635535, "grad_norm": 1.8525265455245972, "learning_rate": 1.2858842886324108e-08, "loss": 0.3924, "step": 16174 }, { "epoch": 1.9683602068755706, "grad_norm": 1.6356488466262817, "learning_rate": 1.276013993075087e-08, "loss": 0.3358, "step": 16175 }, { "epoch": 1.9684818983875876, "grad_norm": 1.6764427423477173, "learning_rate": 1.2661817010326538e-08, "loss": 0.3529, "step": 16176 }, { "epoch": 1.9686035898996046, "grad_norm": 1.6589916944503784, "learning_rate": 1.2563874128792564e-08, "loss": 0.3771, "step": 16177 }, { "epoch": 1.9687252814116216, "grad_norm": 2.913634777069092, "learning_rate": 1.2466311289875965e-08, "loss": 0.4074, "step": 16178 }, { "epoch": 1.9688469729236386, "grad_norm": 1.6322712898254395, "learning_rate": 1.2369128497289329e-08, "loss": 0.3433, "step": 16179 }, { "epoch": 1.9689686644356557, "grad_norm": 2.0338664054870605, "learning_rate": 1.2272325754730807e-08, "loss": 0.3754, "step": 16180 }, { "epoch": 1.9690903559476727, "grad_norm": 2.6447393894195557, "learning_rate": 1.217590306588301e-08, "loss": 0.3151, "step": 16181 }, { "epoch": 1.9692120474596897, "grad_norm": 2.7490193843841553, "learning_rate": 1.2079860434416335e-08, "loss": 0.2946, "step": 16182 }, { "epoch": 1.9693337389717067, "grad_norm": 1.993751049041748, "learning_rate": 1.1984197863985637e-08, "loss": 0.3396, "step": 16183 }, { "epoch": 1.9694554304837237, "grad_norm": 2.14383602142334, "learning_rate": 1.1888915358229114e-08, "loss": 0.388, "step": 16184 }, { "epoch": 1.9695771219957408, "grad_norm": 1.6024258136749268, "learning_rate": 1.1794012920773867e-08, "loss": 0.4065, "step": 16185 }, { "epoch": 1.9696988135077578, "grad_norm": 1.5283539295196533, "learning_rate": 1.1699490555231452e-08, "loss": 0.3292, "step": 16186 }, { "epoch": 1.9698205050197748, "grad_norm": 2.4187207221984863, "learning_rate": 1.160534826519788e-08, "loss": 0.2955, "step": 16187 }, { "epoch": 1.9699421965317918, "grad_norm": 2.8960108757019043, "learning_rate": 1.1511586054254731e-08, "loss": 0.399, "step": 16188 }, { "epoch": 1.9700638880438088, "grad_norm": 1.844589114189148, "learning_rate": 1.1418203925972482e-08, "loss": 0.4326, "step": 16189 }, { "epoch": 1.9701855795558258, "grad_norm": 1.4832305908203125, "learning_rate": 1.1325201883901626e-08, "loss": 0.3673, "step": 16190 }, { "epoch": 1.9703072710678429, "grad_norm": 2.5775561332702637, "learning_rate": 1.1232579931582666e-08, "loss": 0.3513, "step": 16191 }, { "epoch": 1.9704289625798599, "grad_norm": 1.834261178970337, "learning_rate": 1.1140338072539448e-08, "loss": 0.3442, "step": 16192 }, { "epoch": 1.9705506540918771, "grad_norm": 2.057279348373413, "learning_rate": 1.1048476310281386e-08, "loss": 0.4094, "step": 16193 }, { "epoch": 1.9706723456038941, "grad_norm": 1.5632085800170898, "learning_rate": 1.0956994648305685e-08, "loss": 0.3743, "step": 16194 }, { "epoch": 1.9707940371159112, "grad_norm": 2.784318208694458, "learning_rate": 1.0865893090092894e-08, "loss": 0.3268, "step": 16195 }, { "epoch": 1.9709157286279282, "grad_norm": 2.4518778324127197, "learning_rate": 1.0775171639108018e-08, "loss": 0.4051, "step": 16196 }, { "epoch": 1.9710374201399452, "grad_norm": 2.298598527908325, "learning_rate": 1.068483029880496e-08, "loss": 0.327, "step": 16197 }, { "epoch": 1.9711591116519622, "grad_norm": 1.675595760345459, "learning_rate": 1.0594869072622083e-08, "loss": 0.3892, "step": 16198 }, { "epoch": 1.9712808031639795, "grad_norm": 2.040555715560913, "learning_rate": 1.0505287963979981e-08, "loss": 0.4143, "step": 16199 }, { "epoch": 1.9714024946759965, "grad_norm": 2.217957019805908, "learning_rate": 1.0416086976289263e-08, "loss": 0.2885, "step": 16200 }, { "epoch": 1.9715241861880135, "grad_norm": 1.6215285062789917, "learning_rate": 1.0327266112943879e-08, "loss": 0.3627, "step": 16201 }, { "epoch": 1.9716458777000305, "grad_norm": 2.09826397895813, "learning_rate": 1.0238825377322237e-08, "loss": 0.4525, "step": 16202 }, { "epoch": 1.9717675692120475, "grad_norm": 2.2282772064208984, "learning_rate": 1.0150764772792755e-08, "loss": 0.3621, "step": 16203 }, { "epoch": 1.9718892607240646, "grad_norm": 2.02528715133667, "learning_rate": 1.0063084302703862e-08, "loss": 0.4205, "step": 16204 }, { "epoch": 1.9720109522360816, "grad_norm": 2.36088490486145, "learning_rate": 9.975783970391783e-09, "loss": 0.3504, "step": 16205 }, { "epoch": 1.9721326437480986, "grad_norm": 2.020817518234253, "learning_rate": 9.888863779180524e-09, "loss": 0.3479, "step": 16206 }, { "epoch": 1.9722543352601156, "grad_norm": 2.278749704360962, "learning_rate": 9.802323732375219e-09, "loss": 0.3466, "step": 16207 }, { "epoch": 1.9723760267721326, "grad_norm": 2.411249876022339, "learning_rate": 9.71616383327101e-09, "loss": 0.3689, "step": 16208 }, { "epoch": 1.9724977182841497, "grad_norm": 2.818837881088257, "learning_rate": 9.630384085145273e-09, "loss": 0.2697, "step": 16209 }, { "epoch": 1.9726194097961667, "grad_norm": 1.7738935947418213, "learning_rate": 9.544984491263177e-09, "loss": 0.3966, "step": 16210 }, { "epoch": 1.9727411013081837, "grad_norm": 1.6400485038757324, "learning_rate": 9.459965054872122e-09, "loss": 0.3621, "step": 16211 }, { "epoch": 1.9728627928202007, "grad_norm": 1.6486150026321411, "learning_rate": 9.375325779209521e-09, "loss": 0.3513, "step": 16212 }, { "epoch": 1.9729844843322177, "grad_norm": 1.7734371423721313, "learning_rate": 9.29106666749502e-09, "loss": 0.3557, "step": 16213 }, { "epoch": 1.9731061758442348, "grad_norm": 1.3012028932571411, "learning_rate": 9.20718772293494e-09, "loss": 0.3305, "step": 16214 }, { "epoch": 1.9732278673562518, "grad_norm": 1.6839501857757568, "learning_rate": 9.123688948721176e-09, "loss": 0.3148, "step": 16215 }, { "epoch": 1.9733495588682688, "grad_norm": 3.3136820793151855, "learning_rate": 9.040570348031185e-09, "loss": 0.4215, "step": 16216 }, { "epoch": 1.9734712503802858, "grad_norm": 1.7796372175216675, "learning_rate": 8.957831924027993e-09, "loss": 0.3669, "step": 16217 }, { "epoch": 1.973592941892303, "grad_norm": 1.924067497253418, "learning_rate": 8.87547367985908e-09, "loss": 0.4166, "step": 16218 }, { "epoch": 1.97371463340432, "grad_norm": 3.0644567012786865, "learning_rate": 8.79349561865861e-09, "loss": 0.3381, "step": 16219 }, { "epoch": 1.973836324916337, "grad_norm": 1.8464536666870117, "learning_rate": 8.711897743547416e-09, "loss": 0.3084, "step": 16220 }, { "epoch": 1.9739580164283541, "grad_norm": 1.678406000137329, "learning_rate": 8.630680057628572e-09, "loss": 0.382, "step": 16221 }, { "epoch": 1.9740797079403711, "grad_norm": 2.144219160079956, "learning_rate": 8.549842563992939e-09, "loss": 0.4112, "step": 16222 }, { "epoch": 1.9742013994523882, "grad_norm": 2.707542896270752, "learning_rate": 8.469385265718055e-09, "loss": 0.4175, "step": 16223 }, { "epoch": 1.9743230909644054, "grad_norm": 2.1397154331207275, "learning_rate": 8.389308165863696e-09, "loss": 0.3802, "step": 16224 }, { "epoch": 1.9744447824764224, "grad_norm": 2.093106269836426, "learning_rate": 8.309611267477424e-09, "loss": 0.365, "step": 16225 }, { "epoch": 1.9745664739884394, "grad_norm": 1.6024620532989502, "learning_rate": 8.230294573592368e-09, "loss": 0.3809, "step": 16226 }, { "epoch": 1.9746881655004564, "grad_norm": 1.8772417306900024, "learning_rate": 8.151358087226113e-09, "loss": 0.3627, "step": 16227 }, { "epoch": 1.9748098570124735, "grad_norm": 2.2356367111206055, "learning_rate": 8.072801811384034e-09, "loss": 0.3675, "step": 16228 }, { "epoch": 1.9749315485244905, "grad_norm": 1.6852242946624756, "learning_rate": 7.994625749052632e-09, "loss": 0.3483, "step": 16229 }, { "epoch": 1.9750532400365075, "grad_norm": 2.2831907272338867, "learning_rate": 7.916829903208411e-09, "loss": 0.2979, "step": 16230 }, { "epoch": 1.9751749315485245, "grad_norm": 1.5482177734375, "learning_rate": 7.83941427681123e-09, "loss": 0.3406, "step": 16231 }, { "epoch": 1.9752966230605415, "grad_norm": 3.005664825439453, "learning_rate": 7.762378872806509e-09, "loss": 0.3234, "step": 16232 }, { "epoch": 1.9754183145725586, "grad_norm": 2.0898256301879883, "learning_rate": 7.685723694127456e-09, "loss": 0.3982, "step": 16233 }, { "epoch": 1.9755400060845756, "grad_norm": 2.306986093521118, "learning_rate": 7.609448743688407e-09, "loss": 0.3629, "step": 16234 }, { "epoch": 1.9756616975965926, "grad_norm": 1.5853625535964966, "learning_rate": 7.533554024392598e-09, "loss": 0.373, "step": 16235 }, { "epoch": 1.9757833891086096, "grad_norm": 1.8018343448638916, "learning_rate": 7.458039539129935e-09, "loss": 0.403, "step": 16236 }, { "epoch": 1.9759050806206266, "grad_norm": 3.2835536003112793, "learning_rate": 7.382905290770348e-09, "loss": 0.4353, "step": 16237 }, { "epoch": 1.9760267721326437, "grad_norm": 1.5987604856491089, "learning_rate": 7.30815128217599e-09, "loss": 0.3654, "step": 16238 }, { "epoch": 1.9761484636446607, "grad_norm": 1.9460461139678955, "learning_rate": 7.233777516190144e-09, "loss": 0.3835, "step": 16239 }, { "epoch": 1.9762701551566777, "grad_norm": 1.9640945196151733, "learning_rate": 7.1597839956427665e-09, "loss": 0.3879, "step": 16240 }, { "epoch": 1.9763918466686947, "grad_norm": 3.649320363998413, "learning_rate": 7.086170723350494e-09, "loss": 0.2882, "step": 16241 }, { "epoch": 1.9765135381807117, "grad_norm": 2.2662365436553955, "learning_rate": 7.0129377021121996e-09, "loss": 0.409, "step": 16242 }, { "epoch": 1.976635229692729, "grad_norm": 1.8585344552993774, "learning_rate": 6.940084934716762e-09, "loss": 0.4072, "step": 16243 }, { "epoch": 1.976756921204746, "grad_norm": 2.306539297103882, "learning_rate": 6.867612423935299e-09, "loss": 0.3298, "step": 16244 }, { "epoch": 1.976878612716763, "grad_norm": 2.2497708797454834, "learning_rate": 6.795520172525605e-09, "loss": 0.4123, "step": 16245 }, { "epoch": 1.97700030422878, "grad_norm": 1.7580674886703491, "learning_rate": 6.72380818323215e-09, "loss": 0.3592, "step": 16246 }, { "epoch": 1.977121995740797, "grad_norm": 2.7794220447540283, "learning_rate": 6.6524764587816425e-09, "loss": 0.4345, "step": 16247 }, { "epoch": 1.977243687252814, "grad_norm": 1.8133848905563354, "learning_rate": 6.581525001890798e-09, "loss": 0.3562, "step": 16248 }, { "epoch": 1.9773653787648313, "grad_norm": 2.026087760925293, "learning_rate": 6.510953815256349e-09, "loss": 0.3765, "step": 16249 }, { "epoch": 1.9774870702768483, "grad_norm": 2.466622829437256, "learning_rate": 6.440762901567254e-09, "loss": 0.3347, "step": 16250 }, { "epoch": 1.9776087617888654, "grad_norm": 3.245797872543335, "learning_rate": 6.370952263491381e-09, "loss": 0.421, "step": 16251 }, { "epoch": 1.9777304533008824, "grad_norm": 1.4088108539581299, "learning_rate": 6.301521903687713e-09, "loss": 0.3869, "step": 16252 }, { "epoch": 1.9778521448128994, "grad_norm": 2.29914927482605, "learning_rate": 6.23247182479525e-09, "loss": 0.3552, "step": 16253 }, { "epoch": 1.9779738363249164, "grad_norm": 1.9885870218276978, "learning_rate": 6.1638020294441105e-09, "loss": 0.4096, "step": 16254 }, { "epoch": 1.9780955278369334, "grad_norm": 2.1441006660461426, "learning_rate": 6.095512520246649e-09, "loss": 0.3472, "step": 16255 }, { "epoch": 1.9782172193489505, "grad_norm": 1.488221526145935, "learning_rate": 6.027603299800788e-09, "loss": 0.3222, "step": 16256 }, { "epoch": 1.9783389108609675, "grad_norm": 1.8380842208862305, "learning_rate": 5.960074370692237e-09, "loss": 0.3661, "step": 16257 }, { "epoch": 1.9784606023729845, "grad_norm": 2.064291000366211, "learning_rate": 5.892925735487831e-09, "loss": 0.3727, "step": 16258 }, { "epoch": 1.9785822938850015, "grad_norm": 2.0364344120025635, "learning_rate": 5.826157396744414e-09, "loss": 0.3331, "step": 16259 }, { "epoch": 1.9787039853970185, "grad_norm": 1.9616774320602417, "learning_rate": 5.759769357003287e-09, "loss": 0.3762, "step": 16260 }, { "epoch": 1.9788256769090355, "grad_norm": 1.8364132642745972, "learning_rate": 5.6937616187890954e-09, "loss": 0.3147, "step": 16261 }, { "epoch": 1.9789473684210526, "grad_norm": 1.9974987506866455, "learning_rate": 5.628134184615386e-09, "loss": 0.357, "step": 16262 }, { "epoch": 1.9790690599330696, "grad_norm": 1.3815975189208984, "learning_rate": 5.562887056977939e-09, "loss": 0.3701, "step": 16263 }, { "epoch": 1.9791907514450866, "grad_norm": 2.103435754776001, "learning_rate": 5.498020238359214e-09, "loss": 0.3786, "step": 16264 }, { "epoch": 1.9793124429571036, "grad_norm": 1.6844791173934937, "learning_rate": 5.4335337312283465e-09, "loss": 0.3856, "step": 16265 }, { "epoch": 1.9794341344691206, "grad_norm": 1.456583857536316, "learning_rate": 5.369427538040039e-09, "loss": 0.3776, "step": 16266 }, { "epoch": 1.9795558259811377, "grad_norm": 2.071108341217041, "learning_rate": 5.305701661232343e-09, "loss": 0.3246, "step": 16267 }, { "epoch": 1.979677517493155, "grad_norm": 1.4558171033859253, "learning_rate": 5.242356103231094e-09, "loss": 0.3382, "step": 16268 }, { "epoch": 1.979799209005172, "grad_norm": 1.5601192712783813, "learning_rate": 5.179390866445477e-09, "loss": 0.3436, "step": 16269 }, { "epoch": 1.979920900517189, "grad_norm": 1.646645426750183, "learning_rate": 5.116805953272464e-09, "loss": 0.4212, "step": 16270 }, { "epoch": 1.980042592029206, "grad_norm": 2.3790793418884277, "learning_rate": 5.054601366093481e-09, "loss": 0.3075, "step": 16271 }, { "epoch": 1.980164283541223, "grad_norm": 1.6709163188934326, "learning_rate": 4.9927771072744154e-09, "loss": 0.3358, "step": 16272 }, { "epoch": 1.98028597505324, "grad_norm": 1.6739990711212158, "learning_rate": 4.93133317917005e-09, "loss": 0.3622, "step": 16273 }, { "epoch": 1.980407666565257, "grad_norm": 1.8143036365509033, "learning_rate": 4.870269584116294e-09, "loss": 0.3606, "step": 16274 }, { "epoch": 1.9805293580772743, "grad_norm": 2.032560348510742, "learning_rate": 4.809586324437954e-09, "loss": 0.3398, "step": 16275 }, { "epoch": 1.9806510495892913, "grad_norm": 1.6154775619506836, "learning_rate": 4.749283402443183e-09, "loss": 0.3544, "step": 16276 }, { "epoch": 1.9807727411013083, "grad_norm": 1.6917407512664795, "learning_rate": 4.689360820427924e-09, "loss": 0.3929, "step": 16277 }, { "epoch": 1.9808944326133253, "grad_norm": 1.994578242301941, "learning_rate": 4.629818580670353e-09, "loss": 0.3394, "step": 16278 }, { "epoch": 1.9810161241253423, "grad_norm": 1.7406855821609497, "learning_rate": 4.570656685438657e-09, "loss": 0.3199, "step": 16279 }, { "epoch": 1.9811378156373594, "grad_norm": 1.9072710275650024, "learning_rate": 4.511875136983257e-09, "loss": 0.3633, "step": 16280 }, { "epoch": 1.9812595071493764, "grad_norm": 1.6412389278411865, "learning_rate": 4.453473937539033e-09, "loss": 0.3626, "step": 16281 }, { "epoch": 1.9813811986613934, "grad_norm": 1.6243566274642944, "learning_rate": 4.39545308933087e-09, "loss": 0.324, "step": 16282 }, { "epoch": 1.9815028901734104, "grad_norm": 1.5476268529891968, "learning_rate": 4.3378125945647835e-09, "loss": 0.3918, "step": 16283 }, { "epoch": 1.9816245816854274, "grad_norm": 1.8445093631744385, "learning_rate": 4.280552455435683e-09, "loss": 0.2988, "step": 16284 }, { "epoch": 1.9817462731974445, "grad_norm": 2.0630104541778564, "learning_rate": 4.223672674120716e-09, "loss": 0.346, "step": 16285 }, { "epoch": 1.9818679647094615, "grad_norm": 1.8288735151290894, "learning_rate": 4.167173252785928e-09, "loss": 0.3808, "step": 16286 }, { "epoch": 1.9819896562214785, "grad_norm": 2.116935968399048, "learning_rate": 4.1110541935796e-09, "loss": 0.4183, "step": 16287 }, { "epoch": 1.9821113477334955, "grad_norm": 2.6456518173217773, "learning_rate": 4.055315498638912e-09, "loss": 0.3277, "step": 16288 }, { "epoch": 1.9822330392455125, "grad_norm": 1.4434562921524048, "learning_rate": 3.999957170083279e-09, "loss": 0.3202, "step": 16289 }, { "epoch": 1.9823547307575295, "grad_norm": 1.547217845916748, "learning_rate": 3.944979210019906e-09, "loss": 0.3672, "step": 16290 }, { "epoch": 1.9824764222695466, "grad_norm": 1.7337281703948975, "learning_rate": 3.890381620541561e-09, "loss": 0.3867, "step": 16291 }, { "epoch": 1.9825981137815636, "grad_norm": 2.7446372509002686, "learning_rate": 3.836164403724363e-09, "loss": 0.4059, "step": 16292 }, { "epoch": 1.9827198052935808, "grad_norm": 2.2903504371643066, "learning_rate": 3.7823275616322155e-09, "loss": 0.4208, "step": 16293 }, { "epoch": 1.9828414968055978, "grad_norm": 2.8361003398895264, "learning_rate": 3.728871096312369e-09, "loss": 0.4194, "step": 16294 }, { "epoch": 1.9829631883176149, "grad_norm": 1.6007697582244873, "learning_rate": 3.6757950098020854e-09, "loss": 0.338, "step": 16295 }, { "epoch": 1.9830848798296319, "grad_norm": 2.583404064178467, "learning_rate": 3.623099304117528e-09, "loss": 0.4514, "step": 16296 }, { "epoch": 1.983206571341649, "grad_norm": 1.7916226387023926, "learning_rate": 3.5707839812659796e-09, "loss": 0.3888, "step": 16297 }, { "epoch": 1.983328262853666, "grad_norm": 2.3017799854278564, "learning_rate": 3.5188490432380704e-09, "loss": 0.3621, "step": 16298 }, { "epoch": 1.983449954365683, "grad_norm": 1.813517451286316, "learning_rate": 3.467294492008888e-09, "loss": 0.3652, "step": 16299 }, { "epoch": 1.9835716458777002, "grad_norm": 1.5048482418060303, "learning_rate": 3.4161203295401955e-09, "loss": 0.3617, "step": 16300 }, { "epoch": 1.9836933373897172, "grad_norm": 1.6744508743286133, "learning_rate": 3.3653265577815454e-09, "loss": 0.356, "step": 16301 }, { "epoch": 1.9838150289017342, "grad_norm": 1.656672716140747, "learning_rate": 3.3149131786625046e-09, "loss": 0.3753, "step": 16302 }, { "epoch": 1.9839367204137512, "grad_norm": 1.8303706645965576, "learning_rate": 3.264880194103759e-09, "loss": 0.3477, "step": 16303 }, { "epoch": 1.9840584119257683, "grad_norm": 2.753491163253784, "learning_rate": 3.2152276060093413e-09, "loss": 0.4346, "step": 16304 }, { "epoch": 1.9841801034377853, "grad_norm": 2.1673684120178223, "learning_rate": 3.1659554162666307e-09, "loss": 0.3974, "step": 16305 }, { "epoch": 1.9843017949498023, "grad_norm": 2.2132246494293213, "learning_rate": 3.117063626751904e-09, "loss": 0.3672, "step": 16306 }, { "epoch": 1.9844234864618193, "grad_norm": 1.270691990852356, "learning_rate": 3.0685522393247846e-09, "loss": 0.3121, "step": 16307 }, { "epoch": 1.9845451779738363, "grad_norm": 3.200900077819824, "learning_rate": 3.020421255832684e-09, "loss": 0.4375, "step": 16308 }, { "epoch": 1.9846668694858534, "grad_norm": 1.888856053352356, "learning_rate": 2.9726706781052493e-09, "loss": 0.3802, "step": 16309 }, { "epoch": 1.9847885609978704, "grad_norm": 2.113400459289551, "learning_rate": 2.9253005079610265e-09, "loss": 0.341, "step": 16310 }, { "epoch": 1.9849102525098874, "grad_norm": 1.684683084487915, "learning_rate": 2.878310747201907e-09, "loss": 0.2993, "step": 16311 }, { "epoch": 1.9850319440219044, "grad_norm": 1.3620672225952148, "learning_rate": 2.8317013976153496e-09, "loss": 0.2926, "step": 16312 }, { "epoch": 1.9851536355339214, "grad_norm": 2.3848178386688232, "learning_rate": 2.7854724609766014e-09, "loss": 0.3355, "step": 16313 }, { "epoch": 1.9852753270459385, "grad_norm": 2.0870959758758545, "learning_rate": 2.7396239390431455e-09, "loss": 0.3394, "step": 16314 }, { "epoch": 1.9853970185579555, "grad_norm": 1.4668195247650146, "learning_rate": 2.6941558335602523e-09, "loss": 0.3769, "step": 16315 }, { "epoch": 1.9855187100699725, "grad_norm": 1.9599826335906982, "learning_rate": 2.6490681462576497e-09, "loss": 0.337, "step": 16316 }, { "epoch": 1.9856404015819895, "grad_norm": 1.9957466125488281, "learning_rate": 2.6043608788517415e-09, "loss": 0.4528, "step": 16317 }, { "epoch": 1.9857620930940065, "grad_norm": 1.765285849571228, "learning_rate": 2.5600340330433905e-09, "loss": 0.3745, "step": 16318 }, { "epoch": 1.9858837846060238, "grad_norm": 2.070235252380371, "learning_rate": 2.5160876105190245e-09, "loss": 0.3708, "step": 16319 }, { "epoch": 1.9860054761180408, "grad_norm": 1.7341270446777344, "learning_rate": 2.4725216129517506e-09, "loss": 0.3626, "step": 16320 }, { "epoch": 1.9861271676300578, "grad_norm": 1.7934695482254028, "learning_rate": 2.4293360419980204e-09, "loss": 0.3657, "step": 16321 }, { "epoch": 1.9862488591420748, "grad_norm": 2.795962333679199, "learning_rate": 2.3865308993020754e-09, "loss": 0.3037, "step": 16322 }, { "epoch": 1.9863705506540918, "grad_norm": 2.0234384536743164, "learning_rate": 2.3441061864926118e-09, "loss": 0.4152, "step": 16323 }, { "epoch": 1.9864922421661089, "grad_norm": 2.041461229324341, "learning_rate": 2.3020619051850046e-09, "loss": 0.3394, "step": 16324 }, { "epoch": 1.986613933678126, "grad_norm": 1.6153767108917236, "learning_rate": 2.2603980569768648e-09, "loss": 0.358, "step": 16325 }, { "epoch": 1.9867356251901431, "grad_norm": 1.8474339246749878, "learning_rate": 2.2191146434547006e-09, "loss": 0.3957, "step": 16326 }, { "epoch": 1.9868573167021601, "grad_norm": 2.107572078704834, "learning_rate": 2.1782116661894782e-09, "loss": 0.3802, "step": 16327 }, { "epoch": 1.9869790082141772, "grad_norm": 1.6258896589279175, "learning_rate": 2.13768912673884e-09, "loss": 0.3612, "step": 16328 }, { "epoch": 1.9871006997261942, "grad_norm": 3.6315109729766846, "learning_rate": 2.0975470266426657e-09, "loss": 0.4427, "step": 16329 }, { "epoch": 1.9872223912382112, "grad_norm": 2.224144220352173, "learning_rate": 2.057785367428622e-09, "loss": 0.4167, "step": 16330 }, { "epoch": 1.9873440827502282, "grad_norm": 1.6486867666244507, "learning_rate": 2.0184041506121633e-09, "loss": 0.3673, "step": 16331 }, { "epoch": 1.9874657742622452, "grad_norm": 1.7491037845611572, "learning_rate": 1.97940337768876e-09, "loss": 0.4017, "step": 16332 }, { "epoch": 1.9875874657742623, "grad_norm": 2.3490676879882812, "learning_rate": 1.9407830501438907e-09, "loss": 0.3274, "step": 16333 }, { "epoch": 1.9877091572862793, "grad_norm": 1.7487815618515015, "learning_rate": 1.9025431694474907e-09, "loss": 0.3174, "step": 16334 }, { "epoch": 1.9878308487982963, "grad_norm": 2.012241840362549, "learning_rate": 1.8646837370539515e-09, "loss": 0.3311, "step": 16335 }, { "epoch": 1.9879525403103133, "grad_norm": 1.857872724533081, "learning_rate": 1.8272047544043436e-09, "loss": 0.3877, "step": 16336 }, { "epoch": 1.9880742318223303, "grad_norm": 1.7917633056640625, "learning_rate": 1.790106222925303e-09, "loss": 0.3811, "step": 16337 }, { "epoch": 1.9881959233343474, "grad_norm": 2.7612621784210205, "learning_rate": 1.7533881440268131e-09, "loss": 0.4067, "step": 16338 }, { "epoch": 1.9883176148463644, "grad_norm": 1.765322208404541, "learning_rate": 1.7170505191077546e-09, "loss": 0.3418, "step": 16339 }, { "epoch": 1.9884393063583814, "grad_norm": 2.567042350769043, "learning_rate": 1.6810933495503556e-09, "loss": 0.3368, "step": 16340 }, { "epoch": 1.9885609978703984, "grad_norm": 1.4109476804733276, "learning_rate": 1.6455166367224106e-09, "loss": 0.3518, "step": 16341 }, { "epoch": 1.9886826893824154, "grad_norm": 1.6678557395935059, "learning_rate": 1.610320381978392e-09, "loss": 0.399, "step": 16342 }, { "epoch": 1.9888043808944325, "grad_norm": 1.952375054359436, "learning_rate": 1.5755045866572283e-09, "loss": 0.3809, "step": 16343 }, { "epoch": 1.9889260724064497, "grad_norm": 2.4348909854888916, "learning_rate": 1.5410692520834158e-09, "loss": 0.359, "step": 16344 }, { "epoch": 1.9890477639184667, "grad_norm": 1.7893550395965576, "learning_rate": 1.5070143795670177e-09, "loss": 0.3626, "step": 16345 }, { "epoch": 1.9891694554304837, "grad_norm": 1.501473307609558, "learning_rate": 1.4733399704058848e-09, "loss": 0.3764, "step": 16346 }, { "epoch": 1.9892911469425008, "grad_norm": 1.5196964740753174, "learning_rate": 1.440046025878994e-09, "loss": 0.3354, "step": 16347 }, { "epoch": 1.9894128384545178, "grad_norm": 1.4251240491867065, "learning_rate": 1.4071325472542197e-09, "loss": 0.3727, "step": 16348 }, { "epoch": 1.9895345299665348, "grad_norm": 1.7477946281433105, "learning_rate": 1.3745995357838937e-09, "loss": 0.2962, "step": 16349 }, { "epoch": 1.989656221478552, "grad_norm": 2.126129150390625, "learning_rate": 1.342446992707025e-09, "loss": 0.3324, "step": 16350 }, { "epoch": 1.989777912990569, "grad_norm": 1.8252509832382202, "learning_rate": 1.3106749192448587e-09, "loss": 0.381, "step": 16351 }, { "epoch": 1.989899604502586, "grad_norm": 2.0943539142608643, "learning_rate": 1.2792833166086483e-09, "loss": 0.3907, "step": 16352 }, { "epoch": 1.990021296014603, "grad_norm": 2.352362632751465, "learning_rate": 1.2482721859907731e-09, "loss": 0.4269, "step": 16353 }, { "epoch": 1.99014298752662, "grad_norm": 1.5144728422164917, "learning_rate": 1.2176415285736209e-09, "loss": 0.3615, "step": 16354 }, { "epoch": 1.9902646790386371, "grad_norm": 2.0436851978302, "learning_rate": 1.1873913455207053e-09, "loss": 0.3611, "step": 16355 }, { "epoch": 1.9903863705506541, "grad_norm": 1.8111693859100342, "learning_rate": 1.1575216379844378e-09, "loss": 0.3684, "step": 16356 }, { "epoch": 1.9905080620626712, "grad_norm": 1.826930046081543, "learning_rate": 1.1280324071016868e-09, "loss": 0.3116, "step": 16357 }, { "epoch": 1.9906297535746882, "grad_norm": 2.86380672454834, "learning_rate": 1.0989236539926673e-09, "loss": 0.3789, "step": 16358 }, { "epoch": 1.9907514450867052, "grad_norm": 3.0312790870666504, "learning_rate": 1.070195379766492e-09, "loss": 0.4541, "step": 16359 }, { "epoch": 1.9908731365987222, "grad_norm": 2.439685583114624, "learning_rate": 1.0418475855156207e-09, "loss": 0.3987, "step": 16360 }, { "epoch": 1.9909948281107392, "grad_norm": 2.344956159591675, "learning_rate": 1.0138802723203001e-09, "loss": 0.2862, "step": 16361 }, { "epoch": 1.9911165196227563, "grad_norm": 2.3051962852478027, "learning_rate": 9.862934412430135e-10, "loss": 0.4021, "step": 16362 }, { "epoch": 1.9912382111347733, "grad_norm": 2.1198222637176514, "learning_rate": 9.590870933340324e-10, "loss": 0.3418, "step": 16363 }, { "epoch": 1.9913599026467903, "grad_norm": 1.7135363817214966, "learning_rate": 9.322612296280842e-10, "loss": 0.4111, "step": 16364 }, { "epoch": 1.9914815941588073, "grad_norm": 2.4856932163238525, "learning_rate": 9.058158511476845e-10, "loss": 0.4134, "step": 16365 }, { "epoch": 1.9916032856708243, "grad_norm": 1.8987823724746704, "learning_rate": 8.797509588964748e-10, "loss": 0.4072, "step": 16366 }, { "epoch": 1.9917249771828414, "grad_norm": 1.743479609489441, "learning_rate": 8.54066553869215e-10, "loss": 0.3501, "step": 16367 }, { "epoch": 1.9918466686948584, "grad_norm": 1.894911527633667, "learning_rate": 8.287626370406809e-10, "loss": 0.3745, "step": 16368 }, { "epoch": 1.9919683602068756, "grad_norm": 2.3067073822021484, "learning_rate": 8.038392093756564e-10, "loss": 0.3584, "step": 16369 }, { "epoch": 1.9920900517188926, "grad_norm": 1.5864689350128174, "learning_rate": 7.792962718222719e-10, "loss": 0.3831, "step": 16370 }, { "epoch": 1.9922117432309097, "grad_norm": 2.287158250808716, "learning_rate": 7.551338253131147e-10, "loss": 0.4143, "step": 16371 }, { "epoch": 1.9923334347429267, "grad_norm": 2.90242862701416, "learning_rate": 7.313518707685596e-10, "loss": 0.4469, "step": 16372 }, { "epoch": 1.9924551262549437, "grad_norm": 1.7477468252182007, "learning_rate": 7.079504090934386e-10, "loss": 0.3422, "step": 16373 }, { "epoch": 1.9925768177669607, "grad_norm": 1.8733853101730347, "learning_rate": 6.849294411781504e-10, "loss": 0.3696, "step": 16374 }, { "epoch": 1.9926985092789777, "grad_norm": 3.6517434120178223, "learning_rate": 6.62288967898661e-10, "loss": 0.397, "step": 16375 }, { "epoch": 1.992820200790995, "grad_norm": 1.9230009317398071, "learning_rate": 6.400289901165036e-10, "loss": 0.3587, "step": 16376 }, { "epoch": 1.992941892303012, "grad_norm": 2.0829169750213623, "learning_rate": 6.181495086787781e-10, "loss": 0.3699, "step": 16377 }, { "epoch": 1.993063583815029, "grad_norm": 1.6313719749450684, "learning_rate": 5.966505244181519e-10, "loss": 0.362, "step": 16378 }, { "epoch": 1.993185275327046, "grad_norm": 1.5598270893096924, "learning_rate": 5.755320381528595e-10, "loss": 0.3335, "step": 16379 }, { "epoch": 1.993306966839063, "grad_norm": 2.51648211479187, "learning_rate": 5.54794050685592e-10, "loss": 0.4075, "step": 16380 }, { "epoch": 1.99342865835108, "grad_norm": 2.1215717792510986, "learning_rate": 5.344365628068282e-10, "loss": 0.3458, "step": 16381 }, { "epoch": 1.993550349863097, "grad_norm": 2.1789610385894775, "learning_rate": 5.144595752903936e-10, "loss": 0.3712, "step": 16382 }, { "epoch": 1.9936720413751141, "grad_norm": 1.5859107971191406, "learning_rate": 4.948630888956807e-10, "loss": 0.3295, "step": 16383 }, { "epoch": 1.9937937328871311, "grad_norm": 1.3667292594909668, "learning_rate": 4.756471043698696e-10, "loss": 0.339, "step": 16384 }, { "epoch": 1.9939154243991482, "grad_norm": 1.7453447580337524, "learning_rate": 4.568116224445973e-10, "loss": 0.3853, "step": 16385 }, { "epoch": 1.9940371159111652, "grad_norm": 2.116302728652954, "learning_rate": 4.3835664383373724e-10, "loss": 0.3441, "step": 16386 }, { "epoch": 1.9941588074231822, "grad_norm": 1.5490386486053467, "learning_rate": 4.202821692422809e-10, "loss": 0.3576, "step": 16387 }, { "epoch": 1.9942804989351992, "grad_norm": 1.4974098205566406, "learning_rate": 4.0258819935745654e-10, "loss": 0.3656, "step": 16388 }, { "epoch": 1.9944021904472162, "grad_norm": 1.8241342306137085, "learning_rate": 3.852747348520591e-10, "loss": 0.398, "step": 16389 }, { "epoch": 1.9945238819592332, "grad_norm": 1.5747286081314087, "learning_rate": 3.68341776384451e-10, "loss": 0.3647, "step": 16390 }, { "epoch": 1.9946455734712503, "grad_norm": 2.0876684188842773, "learning_rate": 3.5178932459967175e-10, "loss": 0.339, "step": 16391 }, { "epoch": 1.9947672649832673, "grad_norm": 1.552120566368103, "learning_rate": 3.35617380128328e-10, "loss": 0.3347, "step": 16392 }, { "epoch": 1.9948889564952843, "grad_norm": 2.3483457565307617, "learning_rate": 3.198259435832629e-10, "loss": 0.4246, "step": 16393 }, { "epoch": 1.9950106480073015, "grad_norm": 1.9285962581634521, "learning_rate": 3.044150155684378e-10, "loss": 0.3341, "step": 16394 }, { "epoch": 1.9951323395193186, "grad_norm": 1.5609934329986572, "learning_rate": 2.8938459666894016e-10, "loss": 0.3261, "step": 16395 }, { "epoch": 1.9952540310313356, "grad_norm": 1.5706086158752441, "learning_rate": 2.747346874554246e-10, "loss": 0.368, "step": 16396 }, { "epoch": 1.9953757225433526, "grad_norm": 1.583938717842102, "learning_rate": 2.604652884874437e-10, "loss": 0.3634, "step": 16397 }, { "epoch": 1.9954974140553696, "grad_norm": 1.7183619737625122, "learning_rate": 2.4657640030678607e-10, "loss": 0.34, "step": 16398 }, { "epoch": 1.9956191055673866, "grad_norm": 4.436192989349365, "learning_rate": 2.3306802344191804e-10, "loss": 0.4817, "step": 16399 }, { "epoch": 1.9957407970794037, "grad_norm": 1.836579442024231, "learning_rate": 2.19940158407983e-10, "loss": 0.3699, "step": 16400 }, { "epoch": 1.995862488591421, "grad_norm": 1.6603862047195435, "learning_rate": 2.0719280570347112e-10, "loss": 0.3768, "step": 16401 }, { "epoch": 1.995984180103438, "grad_norm": 3.0537452697753906, "learning_rate": 1.9482596581354984e-10, "loss": 0.3005, "step": 16402 }, { "epoch": 1.996105871615455, "grad_norm": 1.9862174987792969, "learning_rate": 1.8283963920895375e-10, "loss": 0.3786, "step": 16403 }, { "epoch": 1.996227563127472, "grad_norm": 1.6122006177902222, "learning_rate": 1.712338263459845e-10, "loss": 0.3504, "step": 16404 }, { "epoch": 1.996349254639489, "grad_norm": 2.2194674015045166, "learning_rate": 1.6000852766540064e-10, "loss": 0.3099, "step": 16405 }, { "epoch": 1.996470946151506, "grad_norm": 1.7488921880722046, "learning_rate": 1.4916374359574825e-10, "loss": 0.3493, "step": 16406 }, { "epoch": 1.996592637663523, "grad_norm": 1.438949704170227, "learning_rate": 1.3869947454892007e-10, "loss": 0.339, "step": 16407 }, { "epoch": 1.99671432917554, "grad_norm": 2.3451156616210938, "learning_rate": 1.2861572092237595e-10, "loss": 0.4242, "step": 16408 }, { "epoch": 1.996836020687557, "grad_norm": 1.373085618019104, "learning_rate": 1.1891248310136328e-10, "loss": 0.3164, "step": 16409 }, { "epoch": 1.996957712199574, "grad_norm": 1.339638352394104, "learning_rate": 1.0958976145447609e-10, "loss": 0.3573, "step": 16410 }, { "epoch": 1.997079403711591, "grad_norm": 2.091158866882324, "learning_rate": 1.0064755633587553e-10, "loss": 0.3794, "step": 16411 }, { "epoch": 1.9972010952236081, "grad_norm": 2.4368090629577637, "learning_rate": 9.208586808640008e-11, "loss": 0.2876, "step": 16412 }, { "epoch": 1.9973227867356251, "grad_norm": 1.7995102405548096, "learning_rate": 8.390469703134507e-11, "loss": 0.393, "step": 16413 }, { "epoch": 1.9974444782476422, "grad_norm": 2.7665796279907227, "learning_rate": 7.610404348379341e-11, "loss": 0.4315, "step": 16414 }, { "epoch": 1.9975661697596592, "grad_norm": 1.7597237825393677, "learning_rate": 6.868390773795419e-11, "loss": 0.2918, "step": 16415 }, { "epoch": 1.9976878612716762, "grad_norm": 2.6422579288482666, "learning_rate": 6.16442900780445e-11, "loss": 0.4158, "step": 16416 }, { "epoch": 1.9978095527836932, "grad_norm": 1.8573795557022095, "learning_rate": 5.498519077051789e-11, "loss": 0.394, "step": 16417 }, { "epoch": 1.9979312442957102, "grad_norm": 1.8805248737335205, "learning_rate": 4.870661006961541e-11, "loss": 0.3527, "step": 16418 }, { "epoch": 1.9980529358077272, "grad_norm": 1.790338397026062, "learning_rate": 4.280854821514524e-11, "loss": 0.3372, "step": 16419 }, { "epoch": 1.9981746273197445, "grad_norm": 1.7787131071090698, "learning_rate": 3.729100543026221e-11, "loss": 0.3965, "step": 16420 }, { "epoch": 1.9982963188317615, "grad_norm": 1.8633769750595093, "learning_rate": 3.215398192479846e-11, "loss": 0.3527, "step": 16421 }, { "epoch": 1.9984180103437785, "grad_norm": 2.000217914581299, "learning_rate": 2.7397477894153257e-11, "loss": 0.349, "step": 16422 }, { "epoch": 1.9985397018557955, "grad_norm": 2.3797690868377686, "learning_rate": 2.3021493521513395e-11, "loss": 0.3906, "step": 16423 }, { "epoch": 1.9986613933678126, "grad_norm": 2.995281457901001, "learning_rate": 1.9026028970081656e-11, "loss": 0.4525, "step": 16424 }, { "epoch": 1.9987830848798296, "grad_norm": 1.4523077011108398, "learning_rate": 1.5411084394179044e-11, "loss": 0.3783, "step": 16425 }, { "epoch": 1.9989047763918468, "grad_norm": 1.8615294694900513, "learning_rate": 1.2176659931473212e-11, "loss": 0.3783, "step": 16426 }, { "epoch": 1.9990264679038638, "grad_norm": 2.6837189197540283, "learning_rate": 9.32275570297847e-12, "loss": 0.3881, "step": 16427 }, { "epoch": 1.9991481594158809, "grad_norm": 1.8235241174697876, "learning_rate": 6.849371819717121e-12, "loss": 0.3603, "step": 16428 }, { "epoch": 1.9992698509278979, "grad_norm": 1.722519040107727, "learning_rate": 4.756508373837676e-12, "loss": 0.3998, "step": 16429 }, { "epoch": 1.999391542439915, "grad_norm": 3.307969808578491, "learning_rate": 3.0441654463864156e-12, "loss": 0.2877, "step": 16430 }, { "epoch": 1.999513233951932, "grad_norm": 2.423255443572998, "learning_rate": 1.7123431017562752e-12, "loss": 0.4201, "step": 16431 }, { "epoch": 1.999634925463949, "grad_norm": 1.7691155672073364, "learning_rate": 7.61041391017514e-13, "loss": 0.3697, "step": 16432 }, { "epoch": 1.999756616975966, "grad_norm": 1.6485546827316284, "learning_rate": 1.902603496972688e-13, "loss": 0.3719, "step": 16433 }, { "epoch": 1.999878308487983, "grad_norm": 1.541142463684082, "learning_rate": 0.0, "loss": 0.4099, "step": 16434 }, { "epoch": 1.999878308487983, "step": 16434, "total_flos": 7.693823835761541e+17, "train_loss": 0.4233635055173034, "train_runtime": 43491.2027, "train_samples_per_second": 48.37, "train_steps_per_second": 0.378 } ], "logging_steps": 1.0, "max_steps": 16434, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.693823835761541e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }