{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 18531, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.3963628514381306e-05, "grad_norm": 20.62471154119197, "learning_rate": 9.999999935332727e-06, "loss": 1.1326, "step": 1 }, { "epoch": 5.3963628514381306e-05, "eval_loss": 1.2850651741027832, "eval_runtime": 163.9451, "eval_samples_per_second": 20.977, "eval_steps_per_second": 0.878, "step": 1 }, { "epoch": 0.00010792725702876261, "grad_norm": 11.978215884570838, "learning_rate": 9.99999974133091e-06, "loss": 0.69, "step": 2 }, { "epoch": 0.00016189088554314392, "grad_norm": 2.7920195200003564, "learning_rate": 9.999999417994552e-06, "loss": 0.8313, "step": 3 }, { "epoch": 0.00021585451405752522, "grad_norm": 2.495653632936459, "learning_rate": 9.999998965323662e-06, "loss": 0.7776, "step": 4 }, { "epoch": 0.00026981814257190655, "grad_norm": 2.9616137141489753, "learning_rate": 9.999998383318257e-06, "loss": 0.8333, "step": 5 }, { "epoch": 0.00032378177108628785, "grad_norm": 1.6624487327415252, "learning_rate": 9.999997671978352e-06, "loss": 0.6244, "step": 6 }, { "epoch": 0.00037774539960066915, "grad_norm": 1.6137268553442474, "learning_rate": 9.999996831303965e-06, "loss": 0.6916, "step": 7 }, { "epoch": 0.00043170902811505045, "grad_norm": 1.4937969414472936, "learning_rate": 9.999995861295124e-06, "loss": 0.7256, "step": 8 }, { "epoch": 0.00048567265662943174, "grad_norm": 1.5617292166348637, "learning_rate": 9.999994761951853e-06, "loss": 0.682, "step": 9 }, { "epoch": 0.0005396362851438131, "grad_norm": 1.378846924064355, "learning_rate": 9.999993533274188e-06, "loss": 0.5825, "step": 10 }, { "epoch": 0.0005935999136581943, "grad_norm": 1.5976562369786855, "learning_rate": 9.999992175262161e-06, "loss": 0.5589, "step": 11 }, { "epoch": 0.0006475635421725757, "grad_norm": 1.6012385037210424, "learning_rate": 9.999990687915811e-06, "loss": 0.6812, "step": 12 }, { "epoch": 0.0007015271706869569, "grad_norm": 1.2509010209304539, "learning_rate": 9.999989071235184e-06, "loss": 0.6066, "step": 13 }, { "epoch": 0.0007554907992013383, "grad_norm": 1.8035441373911127, "learning_rate": 9.999987325220323e-06, "loss": 0.7727, "step": 14 }, { "epoch": 0.0008094544277157196, "grad_norm": 1.3921621567950277, "learning_rate": 9.999985449871278e-06, "loss": 0.4671, "step": 15 }, { "epoch": 0.0008634180562301009, "grad_norm": 1.6855287366327114, "learning_rate": 9.999983445188106e-06, "loss": 0.7438, "step": 16 }, { "epoch": 0.0009173816847444822, "grad_norm": 1.3435022323213992, "learning_rate": 9.999981311170862e-06, "loss": 0.6278, "step": 17 }, { "epoch": 0.0009713453132588635, "grad_norm": 1.2677017155543135, "learning_rate": 9.99997904781961e-06, "loss": 0.5984, "step": 18 }, { "epoch": 0.0010253089417732448, "grad_norm": 1.6430046461611512, "learning_rate": 9.99997665513441e-06, "loss": 0.88, "step": 19 }, { "epoch": 0.0010792725702876262, "grad_norm": 1.4463352162823673, "learning_rate": 9.999974133115337e-06, "loss": 0.6748, "step": 20 }, { "epoch": 0.0011332361988020075, "grad_norm": 1.5050398589101073, "learning_rate": 9.999971481762459e-06, "loss": 0.6264, "step": 21 }, { "epoch": 0.0011871998273163887, "grad_norm": 1.5644834623203105, "learning_rate": 9.999968701075853e-06, "loss": 0.6322, "step": 22 }, { "epoch": 0.00124116345583077, "grad_norm": 1.3403191435319328, "learning_rate": 9.999965791055601e-06, "loss": 0.5249, "step": 23 }, { "epoch": 0.0012951270843451514, "grad_norm": 1.493128153009394, "learning_rate": 9.999962751701786e-06, "loss": 0.7377, "step": 24 }, { "epoch": 0.0013490907128595327, "grad_norm": 1.1182061816796247, "learning_rate": 9.999959583014494e-06, "loss": 0.5741, "step": 25 }, { "epoch": 0.0014030543413739139, "grad_norm": 1.2693796235233346, "learning_rate": 9.999956284993815e-06, "loss": 0.5187, "step": 26 }, { "epoch": 0.0014570179698882952, "grad_norm": 1.0740755829937843, "learning_rate": 9.999952857639847e-06, "loss": 0.4692, "step": 27 }, { "epoch": 0.0015109815984026766, "grad_norm": 1.4554543012598844, "learning_rate": 9.999949300952688e-06, "loss": 0.6432, "step": 28 }, { "epoch": 0.001564945226917058, "grad_norm": 1.5263483734100178, "learning_rate": 9.999945614932437e-06, "loss": 0.5959, "step": 29 }, { "epoch": 0.0016189088554314393, "grad_norm": 1.3596137256673293, "learning_rate": 9.999941799579203e-06, "loss": 0.754, "step": 30 }, { "epoch": 0.0016728724839458204, "grad_norm": 0.9196760705015845, "learning_rate": 9.999937854893096e-06, "loss": 0.4175, "step": 31 }, { "epoch": 0.0017268361124602018, "grad_norm": 1.2698619328595582, "learning_rate": 9.999933780874227e-06, "loss": 0.5936, "step": 32 }, { "epoch": 0.0017807997409745831, "grad_norm": 1.1624589243802812, "learning_rate": 9.999929577522715e-06, "loss": 0.6055, "step": 33 }, { "epoch": 0.0018347633694889645, "grad_norm": 1.1733684912023161, "learning_rate": 9.999925244838679e-06, "loss": 0.7958, "step": 34 }, { "epoch": 0.0018887269980033458, "grad_norm": 1.4988701157981699, "learning_rate": 9.999920782822246e-06, "loss": 0.7344, "step": 35 }, { "epoch": 0.001942690626517727, "grad_norm": 1.1064293874678428, "learning_rate": 9.999916191473542e-06, "loss": 0.4257, "step": 36 }, { "epoch": 0.0019966542550321083, "grad_norm": 1.098798309417731, "learning_rate": 9.9999114707927e-06, "loss": 0.4599, "step": 37 }, { "epoch": 0.0020506178835464897, "grad_norm": 1.2564662535885602, "learning_rate": 9.999906620779854e-06, "loss": 0.5248, "step": 38 }, { "epoch": 0.002104581512060871, "grad_norm": 1.1966810881700105, "learning_rate": 9.999901641435147e-06, "loss": 0.5687, "step": 39 }, { "epoch": 0.0021585451405752524, "grad_norm": 1.1032986453241542, "learning_rate": 9.99989653275872e-06, "loss": 0.5577, "step": 40 }, { "epoch": 0.0022125087690896337, "grad_norm": 1.1920229090432144, "learning_rate": 9.999891294750719e-06, "loss": 0.6813, "step": 41 }, { "epoch": 0.002266472397604015, "grad_norm": 1.5572389614879893, "learning_rate": 9.999885927411296e-06, "loss": 0.6559, "step": 42 }, { "epoch": 0.002320436026118396, "grad_norm": 1.4067941217710171, "learning_rate": 9.999880430740603e-06, "loss": 0.6079, "step": 43 }, { "epoch": 0.0023743996546327774, "grad_norm": 1.2737559465331796, "learning_rate": 9.999874804738801e-06, "loss": 0.6533, "step": 44 }, { "epoch": 0.0024283632831471587, "grad_norm": 1.1590656997016004, "learning_rate": 9.99986904940605e-06, "loss": 0.5842, "step": 45 }, { "epoch": 0.00248232691166154, "grad_norm": 1.3696445997560607, "learning_rate": 9.999863164742515e-06, "loss": 0.6552, "step": 46 }, { "epoch": 0.0025362905401759214, "grad_norm": 1.5172040261890776, "learning_rate": 9.999857150748368e-06, "loss": 0.7312, "step": 47 }, { "epoch": 0.0025902541686903028, "grad_norm": 1.2528565188917922, "learning_rate": 9.99985100742378e-06, "loss": 0.615, "step": 48 }, { "epoch": 0.002644217797204684, "grad_norm": 1.589497912926135, "learning_rate": 9.999844734768924e-06, "loss": 0.5708, "step": 49 }, { "epoch": 0.0026981814257190655, "grad_norm": 1.2507827369389508, "learning_rate": 9.999838332783986e-06, "loss": 0.5464, "step": 50 }, { "epoch": 0.002752145054233447, "grad_norm": 1.1368103015379805, "learning_rate": 9.999831801469148e-06, "loss": 0.5777, "step": 51 }, { "epoch": 0.0028061086827478278, "grad_norm": 1.414047683271421, "learning_rate": 9.999825140824596e-06, "loss": 0.4937, "step": 52 }, { "epoch": 0.002860072311262209, "grad_norm": 1.5281045248145013, "learning_rate": 9.999818350850521e-06, "loss": 0.5942, "step": 53 }, { "epoch": 0.0029140359397765905, "grad_norm": 1.2491596458976133, "learning_rate": 9.999811431547122e-06, "loss": 0.5425, "step": 54 }, { "epoch": 0.002967999568290972, "grad_norm": 1.2783137365804016, "learning_rate": 9.999804382914596e-06, "loss": 0.5798, "step": 55 }, { "epoch": 0.003021963196805353, "grad_norm": 1.3064064194461447, "learning_rate": 9.999797204953144e-06, "loss": 0.5842, "step": 56 }, { "epoch": 0.0030759268253197345, "grad_norm": 1.137432305904066, "learning_rate": 9.999789897662975e-06, "loss": 0.4564, "step": 57 }, { "epoch": 0.003129890453834116, "grad_norm": 1.3204701904729959, "learning_rate": 9.999782461044297e-06, "loss": 0.5736, "step": 58 }, { "epoch": 0.0031838540823484972, "grad_norm": 1.3948398219985885, "learning_rate": 9.999774895097325e-06, "loss": 0.7644, "step": 59 }, { "epoch": 0.0032378177108628786, "grad_norm": 1.174379552130365, "learning_rate": 9.999767199822274e-06, "loss": 0.4547, "step": 60 }, { "epoch": 0.00329178133937726, "grad_norm": 1.1842108746350566, "learning_rate": 9.999759375219368e-06, "loss": 0.5331, "step": 61 }, { "epoch": 0.003345744967891641, "grad_norm": 1.2289334165755277, "learning_rate": 9.999751421288831e-06, "loss": 0.6269, "step": 62 }, { "epoch": 0.003399708596406022, "grad_norm": 1.2482406798625927, "learning_rate": 9.999743338030893e-06, "loss": 0.5734, "step": 63 }, { "epoch": 0.0034536722249204036, "grad_norm": 1.2028190488134864, "learning_rate": 9.999735125445784e-06, "loss": 0.6226, "step": 64 }, { "epoch": 0.003507635853434785, "grad_norm": 1.3563974287565117, "learning_rate": 9.99972678353374e-06, "loss": 0.6946, "step": 65 }, { "epoch": 0.0035615994819491663, "grad_norm": 1.2969861284073565, "learning_rate": 9.999718312295001e-06, "loss": 0.608, "step": 66 }, { "epoch": 0.0036155631104635476, "grad_norm": 1.109704224926767, "learning_rate": 9.999709711729813e-06, "loss": 0.5284, "step": 67 }, { "epoch": 0.003669526738977929, "grad_norm": 1.0926354182967197, "learning_rate": 9.999700981838419e-06, "loss": 0.5231, "step": 68 }, { "epoch": 0.0037234903674923103, "grad_norm": 1.316966278875892, "learning_rate": 9.999692122621074e-06, "loss": 0.6912, "step": 69 }, { "epoch": 0.0037774539960066917, "grad_norm": 1.1028038098378286, "learning_rate": 9.99968313407803e-06, "loss": 0.5388, "step": 70 }, { "epoch": 0.0038314176245210726, "grad_norm": 1.4592761701618011, "learning_rate": 9.999674016209545e-06, "loss": 0.691, "step": 71 }, { "epoch": 0.003885381253035454, "grad_norm": 1.2055763924979754, "learning_rate": 9.999664769015885e-06, "loss": 0.8268, "step": 72 }, { "epoch": 0.003939344881549836, "grad_norm": 1.5208705766562787, "learning_rate": 9.999655392497311e-06, "loss": 0.5616, "step": 73 }, { "epoch": 0.003993308510064217, "grad_norm": 1.5467569289119585, "learning_rate": 9.999645886654094e-06, "loss": 0.6716, "step": 74 }, { "epoch": 0.0040472721385785984, "grad_norm": 1.1406202607003082, "learning_rate": 9.999636251486508e-06, "loss": 0.5915, "step": 75 }, { "epoch": 0.004101235767092979, "grad_norm": 1.2502422721883082, "learning_rate": 9.999626486994831e-06, "loss": 0.5715, "step": 76 }, { "epoch": 0.00415519939560736, "grad_norm": 1.24141874973768, "learning_rate": 9.999616593179342e-06, "loss": 0.6205, "step": 77 }, { "epoch": 0.004209163024121742, "grad_norm": 1.074528911526831, "learning_rate": 9.999606570040324e-06, "loss": 0.5283, "step": 78 }, { "epoch": 0.004263126652636123, "grad_norm": 1.183869723262245, "learning_rate": 9.999596417578067e-06, "loss": 0.6861, "step": 79 }, { "epoch": 0.004317090281150505, "grad_norm": 1.3348217393736452, "learning_rate": 9.999586135792863e-06, "loss": 0.7591, "step": 80 }, { "epoch": 0.004371053909664886, "grad_norm": 1.1321295692058997, "learning_rate": 9.999575724685008e-06, "loss": 0.5951, "step": 81 }, { "epoch": 0.0044250175381792675, "grad_norm": 1.1306837011159268, "learning_rate": 9.999565184254798e-06, "loss": 0.5639, "step": 82 }, { "epoch": 0.004478981166693648, "grad_norm": 1.382248266536327, "learning_rate": 9.99955451450254e-06, "loss": 0.6085, "step": 83 }, { "epoch": 0.00453294479520803, "grad_norm": 1.0991967140033378, "learning_rate": 9.99954371542854e-06, "loss": 0.6118, "step": 84 }, { "epoch": 0.004586908423722411, "grad_norm": 1.0619382352967455, "learning_rate": 9.999532787033103e-06, "loss": 0.6348, "step": 85 }, { "epoch": 0.004640872052236792, "grad_norm": 1.2339151264107624, "learning_rate": 9.999521729316549e-06, "loss": 0.5824, "step": 86 }, { "epoch": 0.004694835680751174, "grad_norm": 1.3638922454106583, "learning_rate": 9.999510542279196e-06, "loss": 0.6507, "step": 87 }, { "epoch": 0.004748799309265555, "grad_norm": 1.0353403520223565, "learning_rate": 9.999499225921362e-06, "loss": 0.4242, "step": 88 }, { "epoch": 0.0048027629377799365, "grad_norm": 1.1388152200198804, "learning_rate": 9.999487780243374e-06, "loss": 0.6553, "step": 89 }, { "epoch": 0.0048567265662943174, "grad_norm": 1.0088999050018588, "learning_rate": 9.999476205245559e-06, "loss": 0.5447, "step": 90 }, { "epoch": 0.004910690194808699, "grad_norm": 1.3343741994178577, "learning_rate": 9.999464500928253e-06, "loss": 0.7178, "step": 91 }, { "epoch": 0.00496465382332308, "grad_norm": 1.2193554718190278, "learning_rate": 9.999452667291792e-06, "loss": 0.6227, "step": 92 }, { "epoch": 0.005018617451837462, "grad_norm": 1.2707108112552563, "learning_rate": 9.999440704336514e-06, "loss": 0.503, "step": 93 }, { "epoch": 0.005072581080351843, "grad_norm": 0.8699897258652027, "learning_rate": 9.999428612062763e-06, "loss": 0.3978, "step": 94 }, { "epoch": 0.005126544708866224, "grad_norm": 1.0285133671896063, "learning_rate": 9.99941639047089e-06, "loss": 0.5334, "step": 95 }, { "epoch": 0.0051805083373806056, "grad_norm": 1.3935214679247594, "learning_rate": 9.99940403956124e-06, "loss": 0.6722, "step": 96 }, { "epoch": 0.0052344719658949865, "grad_norm": 1.1390441173917967, "learning_rate": 9.999391559334175e-06, "loss": 0.6349, "step": 97 }, { "epoch": 0.005288435594409368, "grad_norm": 1.215342377859159, "learning_rate": 9.999378949790047e-06, "loss": 0.5232, "step": 98 }, { "epoch": 0.005342399222923749, "grad_norm": 1.2510327002090098, "learning_rate": 9.999366210929225e-06, "loss": 0.5196, "step": 99 }, { "epoch": 0.005396362851438131, "grad_norm": 1.2511226145939793, "learning_rate": 9.99935334275207e-06, "loss": 0.4622, "step": 100 }, { "epoch": 0.005450326479952512, "grad_norm": 1.2557162363186618, "learning_rate": 9.999340345258954e-06, "loss": 0.7242, "step": 101 }, { "epoch": 0.005504290108466894, "grad_norm": 1.361854651875888, "learning_rate": 9.99932721845025e-06, "loss": 0.5739, "step": 102 }, { "epoch": 0.005558253736981275, "grad_norm": 1.0151654094784182, "learning_rate": 9.999313962326335e-06, "loss": 0.4747, "step": 103 }, { "epoch": 0.0056122173654956555, "grad_norm": 1.307395363961812, "learning_rate": 9.999300576887592e-06, "loss": 0.5408, "step": 104 }, { "epoch": 0.005666180994010037, "grad_norm": 0.9999351567265667, "learning_rate": 9.999287062134402e-06, "loss": 0.4409, "step": 105 }, { "epoch": 0.005720144622524418, "grad_norm": 1.4198227189438999, "learning_rate": 9.999273418067157e-06, "loss": 0.6053, "step": 106 }, { "epoch": 0.0057741082510388, "grad_norm": 1.0283706964326431, "learning_rate": 9.999259644686248e-06, "loss": 0.4966, "step": 107 }, { "epoch": 0.005828071879553181, "grad_norm": 1.3975065339436565, "learning_rate": 9.99924574199207e-06, "loss": 0.5233, "step": 108 }, { "epoch": 0.005882035508067563, "grad_norm": 1.564248936330875, "learning_rate": 9.999231709985024e-06, "loss": 0.7572, "step": 109 }, { "epoch": 0.005935999136581944, "grad_norm": 0.953825958818951, "learning_rate": 9.999217548665513e-06, "loss": 0.5019, "step": 110 }, { "epoch": 0.005989962765096325, "grad_norm": 1.0989677737430716, "learning_rate": 9.999203258033943e-06, "loss": 0.5178, "step": 111 }, { "epoch": 0.006043926393610706, "grad_norm": 1.0873136252986095, "learning_rate": 9.999188838090723e-06, "loss": 0.5897, "step": 112 }, { "epoch": 0.006097890022125087, "grad_norm": 1.29257268513247, "learning_rate": 9.999174288836271e-06, "loss": 0.5261, "step": 113 }, { "epoch": 0.006151853650639469, "grad_norm": 1.4362780268116098, "learning_rate": 9.999159610271006e-06, "loss": 0.6261, "step": 114 }, { "epoch": 0.00620581727915385, "grad_norm": 1.4334912621392732, "learning_rate": 9.999144802395347e-06, "loss": 0.6802, "step": 115 }, { "epoch": 0.006259780907668232, "grad_norm": 1.3026939446003196, "learning_rate": 9.999129865209719e-06, "loss": 0.6078, "step": 116 }, { "epoch": 0.006313744536182613, "grad_norm": 1.3023261900733292, "learning_rate": 9.999114798714553e-06, "loss": 0.6115, "step": 117 }, { "epoch": 0.0063677081646969945, "grad_norm": 1.1621921082069646, "learning_rate": 9.999099602910282e-06, "loss": 0.6119, "step": 118 }, { "epoch": 0.006421671793211375, "grad_norm": 1.1321372544966917, "learning_rate": 9.99908427779734e-06, "loss": 0.518, "step": 119 }, { "epoch": 0.006475635421725757, "grad_norm": 1.1943533030251596, "learning_rate": 9.999068823376174e-06, "loss": 0.6093, "step": 120 }, { "epoch": 0.006529599050240138, "grad_norm": 1.240932756137203, "learning_rate": 9.99905323964722e-06, "loss": 0.5732, "step": 121 }, { "epoch": 0.00658356267875452, "grad_norm": 1.2798199696856134, "learning_rate": 9.999037526610934e-06, "loss": 0.6628, "step": 122 }, { "epoch": 0.006637526307268901, "grad_norm": 1.0920364951156052, "learning_rate": 9.999021684267761e-06, "loss": 0.4364, "step": 123 }, { "epoch": 0.006691489935783282, "grad_norm": 0.8643131495764302, "learning_rate": 9.999005712618159e-06, "loss": 0.3643, "step": 124 }, { "epoch": 0.0067454535642976635, "grad_norm": 1.2001944603703765, "learning_rate": 9.998989611662588e-06, "loss": 0.5291, "step": 125 }, { "epoch": 0.006799417192812044, "grad_norm": 1.191811116474948, "learning_rate": 9.998973381401509e-06, "loss": 0.5764, "step": 126 }, { "epoch": 0.006853380821326426, "grad_norm": 1.054067689722635, "learning_rate": 9.998957021835389e-06, "loss": 0.4844, "step": 127 }, { "epoch": 0.006907344449840807, "grad_norm": 1.1270242027136157, "learning_rate": 9.998940532964699e-06, "loss": 0.4923, "step": 128 }, { "epoch": 0.006961308078355189, "grad_norm": 1.0726357001289062, "learning_rate": 9.998923914789911e-06, "loss": 0.5963, "step": 129 }, { "epoch": 0.00701527170686957, "grad_norm": 1.1435268473899969, "learning_rate": 9.998907167311504e-06, "loss": 0.5464, "step": 130 }, { "epoch": 0.007069235335383952, "grad_norm": 1.2686625720572489, "learning_rate": 9.99889029052996e-06, "loss": 0.7449, "step": 131 }, { "epoch": 0.0071231989638983325, "grad_norm": 1.0793489291384164, "learning_rate": 9.998873284445762e-06, "loss": 0.4533, "step": 132 }, { "epoch": 0.0071771625924127135, "grad_norm": 1.3885398932409267, "learning_rate": 9.998856149059403e-06, "loss": 0.7217, "step": 133 }, { "epoch": 0.007231126220927095, "grad_norm": 1.285567221125925, "learning_rate": 9.99883888437137e-06, "loss": 0.4857, "step": 134 }, { "epoch": 0.007285089849441476, "grad_norm": 1.0822792865405872, "learning_rate": 9.998821490382161e-06, "loss": 0.4783, "step": 135 }, { "epoch": 0.007339053477955858, "grad_norm": 1.0501515400470738, "learning_rate": 9.99880396709228e-06, "loss": 0.4415, "step": 136 }, { "epoch": 0.007393017106470239, "grad_norm": 1.1488759595832994, "learning_rate": 9.998786314502224e-06, "loss": 0.4706, "step": 137 }, { "epoch": 0.007446980734984621, "grad_norm": 1.103982892567204, "learning_rate": 9.998768532612505e-06, "loss": 0.5226, "step": 138 }, { "epoch": 0.007500944363499002, "grad_norm": 0.9421778278661262, "learning_rate": 9.998750621423631e-06, "loss": 0.4667, "step": 139 }, { "epoch": 0.007554907992013383, "grad_norm": 1.0143824011867635, "learning_rate": 9.998732580936121e-06, "loss": 0.5049, "step": 140 }, { "epoch": 0.007608871620527764, "grad_norm": 1.1633847473650814, "learning_rate": 9.99871441115049e-06, "loss": 0.502, "step": 141 }, { "epoch": 0.007662835249042145, "grad_norm": 1.3041350676189563, "learning_rate": 9.99869611206726e-06, "loss": 0.7025, "step": 142 }, { "epoch": 0.007716798877556527, "grad_norm": 1.3908993686152158, "learning_rate": 9.99867768368696e-06, "loss": 0.6131, "step": 143 }, { "epoch": 0.007770762506070908, "grad_norm": 1.0207539270593207, "learning_rate": 9.998659126010118e-06, "loss": 0.5482, "step": 144 }, { "epoch": 0.00782472613458529, "grad_norm": 1.3608943167624925, "learning_rate": 9.998640439037265e-06, "loss": 0.7621, "step": 145 }, { "epoch": 0.007878689763099671, "grad_norm": 1.2548745897722715, "learning_rate": 9.998621622768942e-06, "loss": 0.5751, "step": 146 }, { "epoch": 0.007932653391614052, "grad_norm": 1.3718821597112052, "learning_rate": 9.998602677205687e-06, "loss": 0.6654, "step": 147 }, { "epoch": 0.007986617020128433, "grad_norm": 1.4261535885004726, "learning_rate": 9.998583602348047e-06, "loss": 0.6473, "step": 148 }, { "epoch": 0.008040580648642815, "grad_norm": 1.1217430186640438, "learning_rate": 9.99856439819657e-06, "loss": 0.4908, "step": 149 }, { "epoch": 0.008094544277157197, "grad_norm": 1.034071060097991, "learning_rate": 9.998545064751804e-06, "loss": 0.5102, "step": 150 }, { "epoch": 0.008148507905671577, "grad_norm": 1.0843017956342151, "learning_rate": 9.998525602014306e-06, "loss": 0.5048, "step": 151 }, { "epoch": 0.008202471534185959, "grad_norm": 1.4186154439889314, "learning_rate": 9.998506009984639e-06, "loss": 0.5596, "step": 152 }, { "epoch": 0.00825643516270034, "grad_norm": 1.012367711029462, "learning_rate": 9.998486288663364e-06, "loss": 0.603, "step": 153 }, { "epoch": 0.00831039879121472, "grad_norm": 1.09990662038871, "learning_rate": 9.998466438051047e-06, "loss": 0.5676, "step": 154 }, { "epoch": 0.008364362419729102, "grad_norm": 1.6328102766295634, "learning_rate": 9.99844645814826e-06, "loss": 0.6774, "step": 155 }, { "epoch": 0.008418326048243484, "grad_norm": 1.3320551667138403, "learning_rate": 9.998426348955575e-06, "loss": 0.6241, "step": 156 }, { "epoch": 0.008472289676757866, "grad_norm": 1.1788922269729352, "learning_rate": 9.998406110473573e-06, "loss": 0.6458, "step": 157 }, { "epoch": 0.008526253305272246, "grad_norm": 0.888558931597322, "learning_rate": 9.998385742702833e-06, "loss": 0.4062, "step": 158 }, { "epoch": 0.008580216933786628, "grad_norm": 1.0140068397136668, "learning_rate": 9.998365245643942e-06, "loss": 0.5355, "step": 159 }, { "epoch": 0.00863418056230101, "grad_norm": 1.2011625074559469, "learning_rate": 9.998344619297488e-06, "loss": 0.4706, "step": 160 }, { "epoch": 0.00868814419081539, "grad_norm": 1.1389715522250872, "learning_rate": 9.998323863664065e-06, "loss": 0.5874, "step": 161 }, { "epoch": 0.008742107819329771, "grad_norm": 1.2187759337676876, "learning_rate": 9.99830297874427e-06, "loss": 0.6923, "step": 162 }, { "epoch": 0.008796071447844153, "grad_norm": 1.6551973215397675, "learning_rate": 9.9982819645387e-06, "loss": 0.5792, "step": 163 }, { "epoch": 0.008850035076358535, "grad_norm": 0.8574771873796844, "learning_rate": 9.998260821047963e-06, "loss": 0.372, "step": 164 }, { "epoch": 0.008903998704872915, "grad_norm": 1.6701912055897754, "learning_rate": 9.998239548272663e-06, "loss": 0.6745, "step": 165 }, { "epoch": 0.008957962333387297, "grad_norm": 1.2714762488887172, "learning_rate": 9.998218146213416e-06, "loss": 0.6262, "step": 166 }, { "epoch": 0.009011925961901679, "grad_norm": 1.1103967321141592, "learning_rate": 9.998196614870831e-06, "loss": 0.5723, "step": 167 }, { "epoch": 0.00906588959041606, "grad_norm": 1.2833257398994573, "learning_rate": 9.998174954245532e-06, "loss": 0.583, "step": 168 }, { "epoch": 0.00911985321893044, "grad_norm": 1.2738967003285746, "learning_rate": 9.998153164338139e-06, "loss": 0.5546, "step": 169 }, { "epoch": 0.009173816847444822, "grad_norm": 1.2287790353967385, "learning_rate": 9.99813124514928e-06, "loss": 0.5358, "step": 170 }, { "epoch": 0.009227780475959204, "grad_norm": 1.203846588721628, "learning_rate": 9.998109196679583e-06, "loss": 0.6192, "step": 171 }, { "epoch": 0.009281744104473584, "grad_norm": 1.1397105672979877, "learning_rate": 9.998087018929685e-06, "loss": 0.5762, "step": 172 }, { "epoch": 0.009335707732987966, "grad_norm": 1.191304309249943, "learning_rate": 9.998064711900218e-06, "loss": 0.5048, "step": 173 }, { "epoch": 0.009389671361502348, "grad_norm": 1.4096998782102477, "learning_rate": 9.998042275591827e-06, "loss": 0.6585, "step": 174 }, { "epoch": 0.00944363499001673, "grad_norm": 1.3498881860364034, "learning_rate": 9.998019710005156e-06, "loss": 0.6566, "step": 175 }, { "epoch": 0.00949759861853111, "grad_norm": 1.0220711731954275, "learning_rate": 9.997997015140854e-06, "loss": 0.3949, "step": 176 }, { "epoch": 0.009551562247045491, "grad_norm": 1.122898194797035, "learning_rate": 9.997974190999574e-06, "loss": 0.573, "step": 177 }, { "epoch": 0.009605525875559873, "grad_norm": 1.1424439408128342, "learning_rate": 9.99795123758197e-06, "loss": 0.635, "step": 178 }, { "epoch": 0.009659489504074253, "grad_norm": 1.401557558251473, "learning_rate": 9.997928154888702e-06, "loss": 0.6309, "step": 179 }, { "epoch": 0.009713453132588635, "grad_norm": 1.222168741639048, "learning_rate": 9.997904942920435e-06, "loss": 0.7144, "step": 180 }, { "epoch": 0.009767416761103017, "grad_norm": 1.1875438927738955, "learning_rate": 9.997881601677836e-06, "loss": 0.6016, "step": 181 }, { "epoch": 0.009821380389617398, "grad_norm": 1.006927343303014, "learning_rate": 9.997858131161573e-06, "loss": 0.4256, "step": 182 }, { "epoch": 0.009875344018131779, "grad_norm": 1.0639553067807195, "learning_rate": 9.997834531372322e-06, "loss": 0.5162, "step": 183 }, { "epoch": 0.00992930764664616, "grad_norm": 1.537529849129402, "learning_rate": 9.997810802310763e-06, "loss": 0.9302, "step": 184 }, { "epoch": 0.009983271275160542, "grad_norm": 0.9034875159543039, "learning_rate": 9.997786943977578e-06, "loss": 0.3756, "step": 185 }, { "epoch": 0.010037234903674924, "grad_norm": 1.2064675194157035, "learning_rate": 9.997762956373451e-06, "loss": 0.5162, "step": 186 }, { "epoch": 0.010091198532189304, "grad_norm": 1.088112796798825, "learning_rate": 9.99773883949907e-06, "loss": 0.4671, "step": 187 }, { "epoch": 0.010145162160703686, "grad_norm": 1.1473843801769765, "learning_rate": 9.997714593355132e-06, "loss": 0.6305, "step": 188 }, { "epoch": 0.010199125789218067, "grad_norm": 1.2458415600123915, "learning_rate": 9.997690217942331e-06, "loss": 0.594, "step": 189 }, { "epoch": 0.010253089417732448, "grad_norm": 1.0425721869778186, "learning_rate": 9.99766571326137e-06, "loss": 0.6579, "step": 190 }, { "epoch": 0.01030705304624683, "grad_norm": 1.201821517106067, "learning_rate": 9.997641079312951e-06, "loss": 0.6153, "step": 191 }, { "epoch": 0.010361016674761211, "grad_norm": 1.2309229809549613, "learning_rate": 9.997616316097782e-06, "loss": 0.6137, "step": 192 }, { "epoch": 0.010414980303275593, "grad_norm": 1.2235037143833754, "learning_rate": 9.997591423616575e-06, "loss": 0.6649, "step": 193 }, { "epoch": 0.010468943931789973, "grad_norm": 1.2498932392147601, "learning_rate": 9.99756640187005e-06, "loss": 0.46, "step": 194 }, { "epoch": 0.010522907560304355, "grad_norm": 1.4750830159701147, "learning_rate": 9.997541250858917e-06, "loss": 0.5676, "step": 195 }, { "epoch": 0.010576871188818737, "grad_norm": 1.0791524250500986, "learning_rate": 9.997515970583907e-06, "loss": 0.4956, "step": 196 }, { "epoch": 0.010630834817333118, "grad_norm": 1.2643503979391109, "learning_rate": 9.997490561045744e-06, "loss": 0.5893, "step": 197 }, { "epoch": 0.010684798445847498, "grad_norm": 1.1432182544177327, "learning_rate": 9.997465022245157e-06, "loss": 0.5914, "step": 198 }, { "epoch": 0.01073876207436188, "grad_norm": 1.573535653351988, "learning_rate": 9.99743935418288e-06, "loss": 0.7028, "step": 199 }, { "epoch": 0.010792725702876262, "grad_norm": 1.3389910777902252, "learning_rate": 9.997413556859654e-06, "loss": 0.6192, "step": 200 }, { "epoch": 0.010846689331390642, "grad_norm": 0.8941336546169296, "learning_rate": 9.997387630276214e-06, "loss": 0.3799, "step": 201 }, { "epoch": 0.010900652959905024, "grad_norm": 1.1648849081985506, "learning_rate": 9.997361574433313e-06, "loss": 0.5682, "step": 202 }, { "epoch": 0.010954616588419406, "grad_norm": 1.053113316445482, "learning_rate": 9.997335389331693e-06, "loss": 0.6232, "step": 203 }, { "epoch": 0.011008580216933787, "grad_norm": 1.0761303152202273, "learning_rate": 9.997309074972112e-06, "loss": 0.4745, "step": 204 }, { "epoch": 0.011062543845448167, "grad_norm": 1.1473439558487417, "learning_rate": 9.997282631355325e-06, "loss": 0.4957, "step": 205 }, { "epoch": 0.01111650747396255, "grad_norm": 1.2223679691873086, "learning_rate": 9.99725605848209e-06, "loss": 0.5827, "step": 206 }, { "epoch": 0.011170471102476931, "grad_norm": 1.0981509552274973, "learning_rate": 9.99722935635317e-06, "loss": 0.5869, "step": 207 }, { "epoch": 0.011224434730991311, "grad_norm": 1.4704546867499864, "learning_rate": 9.997202524969337e-06, "loss": 0.6123, "step": 208 }, { "epoch": 0.011278398359505693, "grad_norm": 1.0645843133318582, "learning_rate": 9.997175564331359e-06, "loss": 0.4807, "step": 209 }, { "epoch": 0.011332361988020075, "grad_norm": 1.1274735657508994, "learning_rate": 9.997148474440009e-06, "loss": 0.5622, "step": 210 }, { "epoch": 0.011386325616534456, "grad_norm": 1.2358385027704084, "learning_rate": 9.997121255296068e-06, "loss": 0.6144, "step": 211 }, { "epoch": 0.011440289245048836, "grad_norm": 1.319621430143422, "learning_rate": 9.99709390690032e-06, "loss": 0.7473, "step": 212 }, { "epoch": 0.011494252873563218, "grad_norm": 1.2811889357342763, "learning_rate": 9.997066429253546e-06, "loss": 0.4787, "step": 213 }, { "epoch": 0.0115482165020776, "grad_norm": 1.171717692968343, "learning_rate": 9.997038822356543e-06, "loss": 0.4688, "step": 214 }, { "epoch": 0.011602180130591982, "grad_norm": 0.9580409637170038, "learning_rate": 9.997011086210096e-06, "loss": 0.3909, "step": 215 }, { "epoch": 0.011656143759106362, "grad_norm": 1.3138056995523446, "learning_rate": 9.996983220815009e-06, "loss": 0.6829, "step": 216 }, { "epoch": 0.011710107387620744, "grad_norm": 1.458926745441639, "learning_rate": 9.99695522617208e-06, "loss": 0.8704, "step": 217 }, { "epoch": 0.011764071016135125, "grad_norm": 1.3303970112972667, "learning_rate": 9.996927102282115e-06, "loss": 0.6338, "step": 218 }, { "epoch": 0.011818034644649505, "grad_norm": 1.2138386431991741, "learning_rate": 9.99689884914592e-06, "loss": 0.6906, "step": 219 }, { "epoch": 0.011871998273163887, "grad_norm": 1.1858395508401343, "learning_rate": 9.99687046676431e-06, "loss": 0.5059, "step": 220 }, { "epoch": 0.011925961901678269, "grad_norm": 1.1115608879719827, "learning_rate": 9.996841955138098e-06, "loss": 0.5521, "step": 221 }, { "epoch": 0.01197992553019265, "grad_norm": 1.0733546674988221, "learning_rate": 9.996813314268103e-06, "loss": 0.4899, "step": 222 }, { "epoch": 0.012033889158707031, "grad_norm": 1.4498359325116648, "learning_rate": 9.99678454415515e-06, "loss": 1.0193, "step": 223 }, { "epoch": 0.012087852787221413, "grad_norm": 1.2563070702223789, "learning_rate": 9.996755644800068e-06, "loss": 0.6354, "step": 224 }, { "epoch": 0.012141816415735794, "grad_norm": 1.0268052194602335, "learning_rate": 9.996726616203682e-06, "loss": 0.4692, "step": 225 }, { "epoch": 0.012195780044250175, "grad_norm": 1.8204979456323878, "learning_rate": 9.996697458366832e-06, "loss": 0.5616, "step": 226 }, { "epoch": 0.012249743672764556, "grad_norm": 1.2717677288653928, "learning_rate": 9.996668171290353e-06, "loss": 0.6308, "step": 227 }, { "epoch": 0.012303707301278938, "grad_norm": 1.1932236773195832, "learning_rate": 9.996638754975085e-06, "loss": 0.5354, "step": 228 }, { "epoch": 0.01235767092979332, "grad_norm": 0.9449431584018725, "learning_rate": 9.996609209421877e-06, "loss": 0.3974, "step": 229 }, { "epoch": 0.0124116345583077, "grad_norm": 1.161763024048585, "learning_rate": 9.996579534631578e-06, "loss": 0.4988, "step": 230 }, { "epoch": 0.012465598186822082, "grad_norm": 0.8690161676114153, "learning_rate": 9.996549730605039e-06, "loss": 0.3913, "step": 231 }, { "epoch": 0.012519561815336464, "grad_norm": 1.247403134820183, "learning_rate": 9.996519797343117e-06, "loss": 0.5993, "step": 232 }, { "epoch": 0.012573525443850845, "grad_norm": 1.11321460397835, "learning_rate": 9.996489734846672e-06, "loss": 0.5337, "step": 233 }, { "epoch": 0.012627489072365225, "grad_norm": 1.0978134832882454, "learning_rate": 9.99645954311657e-06, "loss": 0.6005, "step": 234 }, { "epoch": 0.012681452700879607, "grad_norm": 1.0541623598870151, "learning_rate": 9.996429222153676e-06, "loss": 0.4935, "step": 235 }, { "epoch": 0.012735416329393989, "grad_norm": 1.1543669864396497, "learning_rate": 9.996398771958865e-06, "loss": 0.5047, "step": 236 }, { "epoch": 0.012789379957908369, "grad_norm": 1.0640833183002085, "learning_rate": 9.996368192533009e-06, "loss": 0.4428, "step": 237 }, { "epoch": 0.01284334358642275, "grad_norm": 1.3678945497868689, "learning_rate": 9.996337483876987e-06, "loss": 0.7162, "step": 238 }, { "epoch": 0.012897307214937133, "grad_norm": 1.0984548994354322, "learning_rate": 9.996306645991682e-06, "loss": 0.5046, "step": 239 }, { "epoch": 0.012951270843451514, "grad_norm": 1.3848200621596305, "learning_rate": 9.996275678877982e-06, "loss": 0.7497, "step": 240 }, { "epoch": 0.013005234471965894, "grad_norm": 1.0761524958808466, "learning_rate": 9.996244582536775e-06, "loss": 0.5328, "step": 241 }, { "epoch": 0.013059198100480276, "grad_norm": 0.9775089979906237, "learning_rate": 9.996213356968957e-06, "loss": 0.5706, "step": 242 }, { "epoch": 0.013113161728994658, "grad_norm": 1.5040696987581101, "learning_rate": 9.996182002175423e-06, "loss": 0.6973, "step": 243 }, { "epoch": 0.01316712535750904, "grad_norm": 0.9630029498630243, "learning_rate": 9.996150518157077e-06, "loss": 0.3413, "step": 244 }, { "epoch": 0.01322108898602342, "grad_norm": 1.2080131338079794, "learning_rate": 9.996118904914818e-06, "loss": 0.6624, "step": 245 }, { "epoch": 0.013275052614537802, "grad_norm": 1.0703602760499134, "learning_rate": 9.996087162449562e-06, "loss": 0.5848, "step": 246 }, { "epoch": 0.013329016243052183, "grad_norm": 1.2659525460953978, "learning_rate": 9.996055290762217e-06, "loss": 0.5972, "step": 247 }, { "epoch": 0.013382979871566563, "grad_norm": 1.3476130462115583, "learning_rate": 9.9960232898537e-06, "loss": 0.5393, "step": 248 }, { "epoch": 0.013436943500080945, "grad_norm": 1.2763433889992966, "learning_rate": 9.995991159724933e-06, "loss": 0.6175, "step": 249 }, { "epoch": 0.013490907128595327, "grad_norm": 1.3606932218582706, "learning_rate": 9.995958900376835e-06, "loss": 0.7005, "step": 250 }, { "epoch": 0.013544870757109709, "grad_norm": 1.4461851927365759, "learning_rate": 9.995926511810336e-06, "loss": 0.558, "step": 251 }, { "epoch": 0.013598834385624089, "grad_norm": 1.192890069735771, "learning_rate": 9.995893994026365e-06, "loss": 0.6835, "step": 252 }, { "epoch": 0.01365279801413847, "grad_norm": 0.9060194145350097, "learning_rate": 9.995861347025858e-06, "loss": 0.4069, "step": 253 }, { "epoch": 0.013706761642652852, "grad_norm": 1.4397082880115948, "learning_rate": 9.995828570809753e-06, "loss": 0.6054, "step": 254 }, { "epoch": 0.013760725271167232, "grad_norm": 1.1939353182809131, "learning_rate": 9.995795665378993e-06, "loss": 0.621, "step": 255 }, { "epoch": 0.013814688899681614, "grad_norm": 1.273747209553719, "learning_rate": 9.995762630734524e-06, "loss": 0.67, "step": 256 }, { "epoch": 0.013868652528195996, "grad_norm": 1.2463987479425098, "learning_rate": 9.995729466877294e-06, "loss": 0.6794, "step": 257 }, { "epoch": 0.013922616156710378, "grad_norm": 1.3382261672022895, "learning_rate": 9.995696173808254e-06, "loss": 0.5532, "step": 258 }, { "epoch": 0.013976579785224758, "grad_norm": 1.1127761869896213, "learning_rate": 9.995662751528366e-06, "loss": 0.4956, "step": 259 }, { "epoch": 0.01403054341373914, "grad_norm": 1.3641595580702635, "learning_rate": 9.995629200038586e-06, "loss": 0.6197, "step": 260 }, { "epoch": 0.014084507042253521, "grad_norm": 1.123741530834796, "learning_rate": 9.995595519339882e-06, "loss": 0.6066, "step": 261 }, { "epoch": 0.014138470670767903, "grad_norm": 1.2068617191567075, "learning_rate": 9.995561709433219e-06, "loss": 0.5622, "step": 262 }, { "epoch": 0.014192434299282283, "grad_norm": 1.346334566713258, "learning_rate": 9.99552777031957e-06, "loss": 0.6559, "step": 263 }, { "epoch": 0.014246397927796665, "grad_norm": 1.0053081925345104, "learning_rate": 9.995493701999912e-06, "loss": 0.516, "step": 264 }, { "epoch": 0.014300361556311047, "grad_norm": 1.2112570428487228, "learning_rate": 9.995459504475223e-06, "loss": 0.4491, "step": 265 }, { "epoch": 0.014354325184825427, "grad_norm": 1.2986392377260056, "learning_rate": 9.995425177746484e-06, "loss": 0.6181, "step": 266 }, { "epoch": 0.014408288813339809, "grad_norm": 1.0653526638799027, "learning_rate": 9.995390721814683e-06, "loss": 0.5026, "step": 267 }, { "epoch": 0.01446225244185419, "grad_norm": 1.259812320370971, "learning_rate": 9.995356136680811e-06, "loss": 0.4447, "step": 268 }, { "epoch": 0.014516216070368572, "grad_norm": 0.9736190445696172, "learning_rate": 9.995321422345862e-06, "loss": 0.3902, "step": 269 }, { "epoch": 0.014570179698882952, "grad_norm": 1.2958778367226536, "learning_rate": 9.995286578810833e-06, "loss": 0.5193, "step": 270 }, { "epoch": 0.014624143327397334, "grad_norm": 1.164966261479915, "learning_rate": 9.995251606076724e-06, "loss": 0.5022, "step": 271 }, { "epoch": 0.014678106955911716, "grad_norm": 0.9666983690245742, "learning_rate": 9.995216504144543e-06, "loss": 0.5106, "step": 272 }, { "epoch": 0.014732070584426096, "grad_norm": 1.1069920244729308, "learning_rate": 9.995181273015297e-06, "loss": 0.5576, "step": 273 }, { "epoch": 0.014786034212940478, "grad_norm": 1.1128100553232345, "learning_rate": 9.995145912689998e-06, "loss": 0.5319, "step": 274 }, { "epoch": 0.01483999784145486, "grad_norm": 1.2769181693810012, "learning_rate": 9.995110423169665e-06, "loss": 0.6883, "step": 275 }, { "epoch": 0.014893961469969241, "grad_norm": 1.255845242513406, "learning_rate": 9.995074804455317e-06, "loss": 0.5589, "step": 276 }, { "epoch": 0.014947925098483621, "grad_norm": 1.4030259725520333, "learning_rate": 9.995039056547977e-06, "loss": 0.7846, "step": 277 }, { "epoch": 0.015001888726998003, "grad_norm": 1.09429455887684, "learning_rate": 9.995003179448673e-06, "loss": 0.5616, "step": 278 }, { "epoch": 0.015055852355512385, "grad_norm": 0.9757421804231383, "learning_rate": 9.994967173158434e-06, "loss": 0.5016, "step": 279 }, { "epoch": 0.015109815984026767, "grad_norm": 0.8847690502095292, "learning_rate": 9.994931037678297e-06, "loss": 0.3401, "step": 280 }, { "epoch": 0.015163779612541147, "grad_norm": 1.1057166785274695, "learning_rate": 9.994894773009301e-06, "loss": 0.5028, "step": 281 }, { "epoch": 0.015217743241055529, "grad_norm": 1.101812150891309, "learning_rate": 9.994858379152486e-06, "loss": 0.5727, "step": 282 }, { "epoch": 0.01527170686956991, "grad_norm": 1.2747397536257468, "learning_rate": 9.994821856108902e-06, "loss": 0.6635, "step": 283 }, { "epoch": 0.01532567049808429, "grad_norm": 1.1942069829536837, "learning_rate": 9.994785203879594e-06, "loss": 0.6384, "step": 284 }, { "epoch": 0.015379634126598672, "grad_norm": 1.1015489903141056, "learning_rate": 9.994748422465618e-06, "loss": 0.7011, "step": 285 }, { "epoch": 0.015433597755113054, "grad_norm": 1.0161278765443331, "learning_rate": 9.994711511868032e-06, "loss": 0.4708, "step": 286 }, { "epoch": 0.015487561383627436, "grad_norm": 1.121874126321933, "learning_rate": 9.994674472087896e-06, "loss": 0.4531, "step": 287 }, { "epoch": 0.015541525012141816, "grad_norm": 1.0492910723138273, "learning_rate": 9.994637303126272e-06, "loss": 0.398, "step": 288 }, { "epoch": 0.015595488640656198, "grad_norm": 1.1045915167009837, "learning_rate": 9.99460000498423e-06, "loss": 0.5987, "step": 289 }, { "epoch": 0.01564945226917058, "grad_norm": 1.2438915845902245, "learning_rate": 9.994562577662845e-06, "loss": 0.6903, "step": 290 }, { "epoch": 0.01570341589768496, "grad_norm": 0.8592788112987058, "learning_rate": 9.994525021163188e-06, "loss": 0.3996, "step": 291 }, { "epoch": 0.015757379526199343, "grad_norm": 0.9456141115471028, "learning_rate": 9.994487335486342e-06, "loss": 0.5333, "step": 292 }, { "epoch": 0.015811343154713723, "grad_norm": 1.4482688719577594, "learning_rate": 9.994449520633386e-06, "loss": 0.5056, "step": 293 }, { "epoch": 0.015865306783228103, "grad_norm": 1.0433010803417053, "learning_rate": 9.994411576605413e-06, "loss": 0.5134, "step": 294 }, { "epoch": 0.015919270411742487, "grad_norm": 1.0792885939124144, "learning_rate": 9.994373503403507e-06, "loss": 0.5134, "step": 295 }, { "epoch": 0.015973234040256867, "grad_norm": 1.1533137276573562, "learning_rate": 9.994335301028768e-06, "loss": 0.7336, "step": 296 }, { "epoch": 0.016027197668771247, "grad_norm": 1.2103948179761086, "learning_rate": 9.99429696948229e-06, "loss": 0.6977, "step": 297 }, { "epoch": 0.01608116129728563, "grad_norm": 1.2714543342806681, "learning_rate": 9.994258508765175e-06, "loss": 0.5472, "step": 298 }, { "epoch": 0.01613512492580001, "grad_norm": 1.64434725699464, "learning_rate": 9.994219918878531e-06, "loss": 0.772, "step": 299 }, { "epoch": 0.016189088554314394, "grad_norm": 1.1441854407927474, "learning_rate": 9.994181199823463e-06, "loss": 0.5516, "step": 300 }, { "epoch": 0.016243052182828774, "grad_norm": 1.2754927387713395, "learning_rate": 9.994142351601089e-06, "loss": 0.5164, "step": 301 }, { "epoch": 0.016297015811343154, "grad_norm": 1.0388992215157398, "learning_rate": 9.994103374212522e-06, "loss": 0.3441, "step": 302 }, { "epoch": 0.016350979439857537, "grad_norm": 0.9307976971500482, "learning_rate": 9.994064267658882e-06, "loss": 0.4018, "step": 303 }, { "epoch": 0.016404943068371917, "grad_norm": 1.434324345445989, "learning_rate": 9.994025031941295e-06, "loss": 0.5894, "step": 304 }, { "epoch": 0.016458906696886298, "grad_norm": 1.3557578833374386, "learning_rate": 9.993985667060887e-06, "loss": 0.6722, "step": 305 }, { "epoch": 0.01651287032540068, "grad_norm": 1.4210356035802283, "learning_rate": 9.993946173018792e-06, "loss": 0.7326, "step": 306 }, { "epoch": 0.01656683395391506, "grad_norm": 1.3176899298059377, "learning_rate": 9.993906549816141e-06, "loss": 0.7488, "step": 307 }, { "epoch": 0.01662079758242944, "grad_norm": 1.1605402034219734, "learning_rate": 9.993866797454076e-06, "loss": 0.5744, "step": 308 }, { "epoch": 0.016674761210943825, "grad_norm": 1.2348787683587534, "learning_rate": 9.993826915933738e-06, "loss": 0.5132, "step": 309 }, { "epoch": 0.016728724839458205, "grad_norm": 1.3295254195522013, "learning_rate": 9.993786905256273e-06, "loss": 0.7401, "step": 310 }, { "epoch": 0.016782688467972585, "grad_norm": 1.1704290073676298, "learning_rate": 9.993746765422834e-06, "loss": 0.5894, "step": 311 }, { "epoch": 0.01683665209648697, "grad_norm": 1.3489950408119658, "learning_rate": 9.99370649643457e-06, "loss": 0.5088, "step": 312 }, { "epoch": 0.01689061572500135, "grad_norm": 1.1663551477633096, "learning_rate": 9.993666098292643e-06, "loss": 0.5308, "step": 313 }, { "epoch": 0.016944579353515732, "grad_norm": 1.2444978826388653, "learning_rate": 9.99362557099821e-06, "loss": 0.5979, "step": 314 }, { "epoch": 0.016998542982030112, "grad_norm": 1.1669848128935618, "learning_rate": 9.993584914552437e-06, "loss": 0.5185, "step": 315 }, { "epoch": 0.017052506610544492, "grad_norm": 1.1785288436941468, "learning_rate": 9.993544128956494e-06, "loss": 0.6184, "step": 316 }, { "epoch": 0.017106470239058875, "grad_norm": 1.1287095546326185, "learning_rate": 9.993503214211556e-06, "loss": 0.6079, "step": 317 }, { "epoch": 0.017160433867573256, "grad_norm": 1.3208306869926496, "learning_rate": 9.993462170318791e-06, "loss": 0.6181, "step": 318 }, { "epoch": 0.017214397496087636, "grad_norm": 1.2687226372856646, "learning_rate": 9.993420997279383e-06, "loss": 0.607, "step": 319 }, { "epoch": 0.01726836112460202, "grad_norm": 1.0532544589435897, "learning_rate": 9.993379695094516e-06, "loss": 0.5164, "step": 320 }, { "epoch": 0.0173223247531164, "grad_norm": 1.146514029063011, "learning_rate": 9.993338263765377e-06, "loss": 0.5759, "step": 321 }, { "epoch": 0.01737628838163078, "grad_norm": 1.2409108189053872, "learning_rate": 9.993296703293156e-06, "loss": 0.7701, "step": 322 }, { "epoch": 0.017430252010145163, "grad_norm": 1.239396038545281, "learning_rate": 9.993255013679047e-06, "loss": 0.6736, "step": 323 }, { "epoch": 0.017484215638659543, "grad_norm": 1.0987468594332794, "learning_rate": 9.99321319492425e-06, "loss": 0.5417, "step": 324 }, { "epoch": 0.017538179267173926, "grad_norm": 1.2087645651398047, "learning_rate": 9.993171247029963e-06, "loss": 0.5748, "step": 325 }, { "epoch": 0.017592142895688306, "grad_norm": 1.0571439326868024, "learning_rate": 9.993129169997397e-06, "loss": 0.4707, "step": 326 }, { "epoch": 0.017646106524202686, "grad_norm": 1.2947374398426923, "learning_rate": 9.993086963827758e-06, "loss": 0.5796, "step": 327 }, { "epoch": 0.01770007015271707, "grad_norm": 1.157335974063005, "learning_rate": 9.993044628522258e-06, "loss": 0.5345, "step": 328 }, { "epoch": 0.01775403378123145, "grad_norm": 0.8450354570160381, "learning_rate": 9.993002164082118e-06, "loss": 0.4465, "step": 329 }, { "epoch": 0.01780799740974583, "grad_norm": 1.0798109763088657, "learning_rate": 9.992959570508554e-06, "loss": 0.5593, "step": 330 }, { "epoch": 0.017861961038260214, "grad_norm": 1.4764884866302763, "learning_rate": 9.992916847802793e-06, "loss": 0.7197, "step": 331 }, { "epoch": 0.017915924666774594, "grad_norm": 1.029910839549723, "learning_rate": 9.992873995966063e-06, "loss": 0.5725, "step": 332 }, { "epoch": 0.017969888295288974, "grad_norm": 1.1273273151709464, "learning_rate": 9.992831014999595e-06, "loss": 0.4723, "step": 333 }, { "epoch": 0.018023851923803357, "grad_norm": 1.1154601701917475, "learning_rate": 9.992787904904621e-06, "loss": 0.4921, "step": 334 }, { "epoch": 0.018077815552317737, "grad_norm": 1.155569195738189, "learning_rate": 9.992744665682386e-06, "loss": 0.6977, "step": 335 }, { "epoch": 0.01813177918083212, "grad_norm": 1.1784027045666452, "learning_rate": 9.992701297334127e-06, "loss": 0.4818, "step": 336 }, { "epoch": 0.0181857428093465, "grad_norm": 1.155068223585959, "learning_rate": 9.992657799861095e-06, "loss": 0.6108, "step": 337 }, { "epoch": 0.01823970643786088, "grad_norm": 1.1407259509958554, "learning_rate": 9.992614173264538e-06, "loss": 0.5003, "step": 338 }, { "epoch": 0.018293670066375264, "grad_norm": 1.0176430995940269, "learning_rate": 9.99257041754571e-06, "loss": 0.478, "step": 339 }, { "epoch": 0.018347633694889644, "grad_norm": 1.1118635127507424, "learning_rate": 9.992526532705869e-06, "loss": 0.5654, "step": 340 }, { "epoch": 0.018401597323404024, "grad_norm": 1.2135320201288273, "learning_rate": 9.992482518746274e-06, "loss": 0.5657, "step": 341 }, { "epoch": 0.018455560951918408, "grad_norm": 1.085537399883959, "learning_rate": 9.992438375668196e-06, "loss": 0.5541, "step": 342 }, { "epoch": 0.018509524580432788, "grad_norm": 1.0052941103448916, "learning_rate": 9.992394103472899e-06, "loss": 0.5616, "step": 343 }, { "epoch": 0.018563488208947168, "grad_norm": 1.2665821633476784, "learning_rate": 9.992349702161653e-06, "loss": 0.5954, "step": 344 }, { "epoch": 0.01861745183746155, "grad_norm": 1.0918213076130114, "learning_rate": 9.99230517173574e-06, "loss": 0.4426, "step": 345 }, { "epoch": 0.01867141546597593, "grad_norm": 1.073841582140135, "learning_rate": 9.992260512196437e-06, "loss": 0.5025, "step": 346 }, { "epoch": 0.018725379094490315, "grad_norm": 1.0383813865163982, "learning_rate": 9.992215723545029e-06, "loss": 0.5631, "step": 347 }, { "epoch": 0.018779342723004695, "grad_norm": 1.3164985062021122, "learning_rate": 9.992170805782799e-06, "loss": 0.6187, "step": 348 }, { "epoch": 0.018833306351519075, "grad_norm": 1.1021367368461157, "learning_rate": 9.992125758911043e-06, "loss": 0.5302, "step": 349 }, { "epoch": 0.01888726998003346, "grad_norm": 0.8934010962199753, "learning_rate": 9.992080582931052e-06, "loss": 0.3645, "step": 350 }, { "epoch": 0.01894123360854784, "grad_norm": 1.1996360550617136, "learning_rate": 9.99203527784413e-06, "loss": 0.4724, "step": 351 }, { "epoch": 0.01899519723706222, "grad_norm": 1.1280992512564252, "learning_rate": 9.99198984365157e-06, "loss": 0.5557, "step": 352 }, { "epoch": 0.019049160865576602, "grad_norm": 1.2136948086761201, "learning_rate": 9.991944280354685e-06, "loss": 0.6769, "step": 353 }, { "epoch": 0.019103124494090983, "grad_norm": 1.1039408042652477, "learning_rate": 9.991898587954784e-06, "loss": 0.442, "step": 354 }, { "epoch": 0.019157088122605363, "grad_norm": 1.356495134931273, "learning_rate": 9.991852766453179e-06, "loss": 0.6489, "step": 355 }, { "epoch": 0.019211051751119746, "grad_norm": 1.0219343512193042, "learning_rate": 9.991806815851186e-06, "loss": 0.4407, "step": 356 }, { "epoch": 0.019265015379634126, "grad_norm": 1.1804707429959662, "learning_rate": 9.991760736150126e-06, "loss": 0.5527, "step": 357 }, { "epoch": 0.019318979008148506, "grad_norm": 1.1591357203925168, "learning_rate": 9.991714527351323e-06, "loss": 0.5369, "step": 358 }, { "epoch": 0.01937294263666289, "grad_norm": 0.9582203943450978, "learning_rate": 9.99166818945611e-06, "loss": 0.414, "step": 359 }, { "epoch": 0.01942690626517727, "grad_norm": 1.0443032853878906, "learning_rate": 9.991621722465812e-06, "loss": 0.4966, "step": 360 }, { "epoch": 0.019480869893691653, "grad_norm": 1.2341283036013393, "learning_rate": 9.991575126381767e-06, "loss": 0.488, "step": 361 }, { "epoch": 0.019534833522206033, "grad_norm": 1.1218387784437682, "learning_rate": 9.991528401205315e-06, "loss": 0.5758, "step": 362 }, { "epoch": 0.019588797150720413, "grad_norm": 0.9356431766914127, "learning_rate": 9.991481546937797e-06, "loss": 0.3837, "step": 363 }, { "epoch": 0.019642760779234797, "grad_norm": 1.1757396086902396, "learning_rate": 9.991434563580561e-06, "loss": 0.5802, "step": 364 }, { "epoch": 0.019696724407749177, "grad_norm": 1.2136740074861603, "learning_rate": 9.99138745113496e-06, "loss": 0.545, "step": 365 }, { "epoch": 0.019750688036263557, "grad_norm": 1.5243756054514035, "learning_rate": 9.991340209602343e-06, "loss": 0.6434, "step": 366 }, { "epoch": 0.01980465166477794, "grad_norm": 0.9236937535852299, "learning_rate": 9.991292838984071e-06, "loss": 0.3782, "step": 367 }, { "epoch": 0.01985861529329232, "grad_norm": 1.131405185565812, "learning_rate": 9.991245339281507e-06, "loss": 0.6485, "step": 368 }, { "epoch": 0.0199125789218067, "grad_norm": 1.039846342455159, "learning_rate": 9.991197710496012e-06, "loss": 0.5499, "step": 369 }, { "epoch": 0.019966542550321084, "grad_norm": 1.0289320772557986, "learning_rate": 9.991149952628954e-06, "loss": 0.4486, "step": 370 }, { "epoch": 0.020020506178835464, "grad_norm": 0.893959457797538, "learning_rate": 9.991102065681713e-06, "loss": 0.4515, "step": 371 }, { "epoch": 0.020074469807349848, "grad_norm": 1.0074706151635824, "learning_rate": 9.991054049655657e-06, "loss": 0.5068, "step": 372 }, { "epoch": 0.020128433435864228, "grad_norm": 1.2604886614806916, "learning_rate": 9.991005904552171e-06, "loss": 0.5376, "step": 373 }, { "epoch": 0.020182397064378608, "grad_norm": 1.288867864182396, "learning_rate": 9.99095763037264e-06, "loss": 0.6698, "step": 374 }, { "epoch": 0.02023636069289299, "grad_norm": 1.0604201301316702, "learning_rate": 9.990909227118445e-06, "loss": 0.6035, "step": 375 }, { "epoch": 0.02029032432140737, "grad_norm": 1.4511241524576817, "learning_rate": 9.990860694790984e-06, "loss": 0.515, "step": 376 }, { "epoch": 0.02034428794992175, "grad_norm": 0.9691096093259752, "learning_rate": 9.990812033391647e-06, "loss": 0.5259, "step": 377 }, { "epoch": 0.020398251578436135, "grad_norm": 1.1933954800722981, "learning_rate": 9.990763242921836e-06, "loss": 0.4883, "step": 378 }, { "epoch": 0.020452215206950515, "grad_norm": 0.9890971821504816, "learning_rate": 9.99071432338295e-06, "loss": 0.5103, "step": 379 }, { "epoch": 0.020506178835464895, "grad_norm": 1.6161190384179327, "learning_rate": 9.990665274776397e-06, "loss": 0.7775, "step": 380 }, { "epoch": 0.02056014246397928, "grad_norm": 1.1588996284220572, "learning_rate": 9.990616097103589e-06, "loss": 0.6452, "step": 381 }, { "epoch": 0.02061410609249366, "grad_norm": 1.6387354365251323, "learning_rate": 9.990566790365934e-06, "loss": 0.538, "step": 382 }, { "epoch": 0.020668069721008042, "grad_norm": 1.181503408307723, "learning_rate": 9.990517354564854e-06, "loss": 0.6847, "step": 383 }, { "epoch": 0.020722033349522422, "grad_norm": 1.1210325924067448, "learning_rate": 9.990467789701768e-06, "loss": 0.4307, "step": 384 }, { "epoch": 0.020775996978036802, "grad_norm": 1.2351073551458729, "learning_rate": 9.990418095778099e-06, "loss": 0.5608, "step": 385 }, { "epoch": 0.020829960606551186, "grad_norm": 0.980133901174093, "learning_rate": 9.990368272795278e-06, "loss": 0.4729, "step": 386 }, { "epoch": 0.020883924235065566, "grad_norm": 0.9846342161427473, "learning_rate": 9.990318320754735e-06, "loss": 0.4843, "step": 387 }, { "epoch": 0.020937887863579946, "grad_norm": 1.0602643921251473, "learning_rate": 9.990268239657905e-06, "loss": 0.6411, "step": 388 }, { "epoch": 0.02099185149209433, "grad_norm": 1.152417197857347, "learning_rate": 9.99021802950623e-06, "loss": 0.6283, "step": 389 }, { "epoch": 0.02104581512060871, "grad_norm": 1.0971472320188196, "learning_rate": 9.990167690301153e-06, "loss": 0.5246, "step": 390 }, { "epoch": 0.02109977874912309, "grad_norm": 1.1513246271661204, "learning_rate": 9.99011722204412e-06, "loss": 0.6266, "step": 391 }, { "epoch": 0.021153742377637473, "grad_norm": 0.8725236933810684, "learning_rate": 9.990066624736577e-06, "loss": 0.3411, "step": 392 }, { "epoch": 0.021207706006151853, "grad_norm": 1.1007994871284874, "learning_rate": 9.990015898379985e-06, "loss": 0.603, "step": 393 }, { "epoch": 0.021261669634666237, "grad_norm": 0.981791162436866, "learning_rate": 9.9899650429758e-06, "loss": 0.5486, "step": 394 }, { "epoch": 0.021315633263180617, "grad_norm": 1.0712790034863355, "learning_rate": 9.989914058525483e-06, "loss": 0.482, "step": 395 }, { "epoch": 0.021369596891694997, "grad_norm": 1.281567362686101, "learning_rate": 9.989862945030497e-06, "loss": 0.6381, "step": 396 }, { "epoch": 0.02142356052020938, "grad_norm": 1.4066313108261324, "learning_rate": 9.989811702492315e-06, "loss": 0.7477, "step": 397 }, { "epoch": 0.02147752414872376, "grad_norm": 0.9009563673648116, "learning_rate": 9.989760330912407e-06, "loss": 0.4479, "step": 398 }, { "epoch": 0.02153148777723814, "grad_norm": 1.220952938845963, "learning_rate": 9.989708830292251e-06, "loss": 0.6219, "step": 399 }, { "epoch": 0.021585451405752524, "grad_norm": 1.2806449811219345, "learning_rate": 9.989657200633327e-06, "loss": 0.7035, "step": 400 }, { "epoch": 0.021639415034266904, "grad_norm": 1.2115281442409713, "learning_rate": 9.98960544193712e-06, "loss": 0.5445, "step": 401 }, { "epoch": 0.021693378662781284, "grad_norm": 0.9687200143125931, "learning_rate": 9.989553554205114e-06, "loss": 0.3712, "step": 402 }, { "epoch": 0.021747342291295668, "grad_norm": 0.9159610194814729, "learning_rate": 9.989501537438803e-06, "loss": 0.3589, "step": 403 }, { "epoch": 0.021801305919810048, "grad_norm": 1.1767781408873939, "learning_rate": 9.989449391639682e-06, "loss": 0.6396, "step": 404 }, { "epoch": 0.021855269548324428, "grad_norm": 1.08831950478531, "learning_rate": 9.989397116809248e-06, "loss": 0.6823, "step": 405 }, { "epoch": 0.02190923317683881, "grad_norm": 1.1423996719973566, "learning_rate": 9.989344712949007e-06, "loss": 0.6034, "step": 406 }, { "epoch": 0.02196319680535319, "grad_norm": 1.1634817089721479, "learning_rate": 9.98929218006046e-06, "loss": 0.5996, "step": 407 }, { "epoch": 0.022017160433867575, "grad_norm": 1.27239761726615, "learning_rate": 9.98923951814512e-06, "loss": 0.7378, "step": 408 }, { "epoch": 0.022071124062381955, "grad_norm": 1.2099833306386703, "learning_rate": 9.989186727204502e-06, "loss": 0.5914, "step": 409 }, { "epoch": 0.022125087690896335, "grad_norm": 1.142430265696326, "learning_rate": 9.989133807240121e-06, "loss": 0.5911, "step": 410 }, { "epoch": 0.02217905131941072, "grad_norm": 1.098549992682177, "learning_rate": 9.989080758253497e-06, "loss": 0.4004, "step": 411 }, { "epoch": 0.0222330149479251, "grad_norm": 1.182378957105608, "learning_rate": 9.989027580246157e-06, "loss": 0.5299, "step": 412 }, { "epoch": 0.02228697857643948, "grad_norm": 1.025952821533961, "learning_rate": 9.98897427321963e-06, "loss": 0.3932, "step": 413 }, { "epoch": 0.022340942204953862, "grad_norm": 1.2688654595356164, "learning_rate": 9.988920837175445e-06, "loss": 0.551, "step": 414 }, { "epoch": 0.022394905833468242, "grad_norm": 1.186693883564988, "learning_rate": 9.98886727211514e-06, "loss": 0.6896, "step": 415 }, { "epoch": 0.022448869461982622, "grad_norm": 1.1981808509414456, "learning_rate": 9.988813578040254e-06, "loss": 0.5842, "step": 416 }, { "epoch": 0.022502833090497006, "grad_norm": 1.3802870957789928, "learning_rate": 9.988759754952329e-06, "loss": 0.6488, "step": 417 }, { "epoch": 0.022556796719011386, "grad_norm": 1.323892653382377, "learning_rate": 9.988705802852913e-06, "loss": 0.5748, "step": 418 }, { "epoch": 0.02261076034752577, "grad_norm": 2.2002603319561653, "learning_rate": 9.988651721743558e-06, "loss": 0.6829, "step": 419 }, { "epoch": 0.02266472397604015, "grad_norm": 1.2480406558189578, "learning_rate": 9.988597511625818e-06, "loss": 0.6835, "step": 420 }, { "epoch": 0.02271868760455453, "grad_norm": 1.4122743308123062, "learning_rate": 9.988543172501247e-06, "loss": 0.7582, "step": 421 }, { "epoch": 0.022772651233068913, "grad_norm": 1.277262217326017, "learning_rate": 9.988488704371413e-06, "loss": 0.6366, "step": 422 }, { "epoch": 0.022826614861583293, "grad_norm": 0.9723536348145589, "learning_rate": 9.988434107237877e-06, "loss": 0.413, "step": 423 }, { "epoch": 0.022880578490097673, "grad_norm": 0.9223259724942151, "learning_rate": 9.98837938110221e-06, "loss": 0.3605, "step": 424 }, { "epoch": 0.022934542118612056, "grad_norm": 1.3905536665566678, "learning_rate": 9.988324525965984e-06, "loss": 0.6113, "step": 425 }, { "epoch": 0.022988505747126436, "grad_norm": 1.2123117468654485, "learning_rate": 9.988269541830775e-06, "loss": 0.6777, "step": 426 }, { "epoch": 0.023042469375640817, "grad_norm": 1.2493484390535818, "learning_rate": 9.988214428698166e-06, "loss": 0.4866, "step": 427 }, { "epoch": 0.0230964330041552, "grad_norm": 1.1509258716763837, "learning_rate": 9.988159186569738e-06, "loss": 0.4522, "step": 428 }, { "epoch": 0.02315039663266958, "grad_norm": 1.0837610111659282, "learning_rate": 9.988103815447082e-06, "loss": 0.4903, "step": 429 }, { "epoch": 0.023204360261183964, "grad_norm": 1.1704103386781648, "learning_rate": 9.988048315331784e-06, "loss": 0.546, "step": 430 }, { "epoch": 0.023258323889698344, "grad_norm": 1.089398629817117, "learning_rate": 9.987992686225444e-06, "loss": 0.5504, "step": 431 }, { "epoch": 0.023312287518212724, "grad_norm": 1.2721274937374005, "learning_rate": 9.987936928129662e-06, "loss": 0.539, "step": 432 }, { "epoch": 0.023366251146727107, "grad_norm": 0.9155705808375275, "learning_rate": 9.987881041046035e-06, "loss": 0.4581, "step": 433 }, { "epoch": 0.023420214775241487, "grad_norm": 1.0098221179095268, "learning_rate": 9.987825024976174e-06, "loss": 0.4936, "step": 434 }, { "epoch": 0.023474178403755867, "grad_norm": 1.1719022207004701, "learning_rate": 9.987768879921685e-06, "loss": 0.514, "step": 435 }, { "epoch": 0.02352814203227025, "grad_norm": 1.0511233620834823, "learning_rate": 9.987712605884185e-06, "loss": 0.5475, "step": 436 }, { "epoch": 0.02358210566078463, "grad_norm": 1.3894231984902898, "learning_rate": 9.987656202865291e-06, "loss": 0.5608, "step": 437 }, { "epoch": 0.02363606928929901, "grad_norm": 1.394301736069319, "learning_rate": 9.987599670866623e-06, "loss": 0.6223, "step": 438 }, { "epoch": 0.023690032917813394, "grad_norm": 1.1258535515648678, "learning_rate": 9.987543009889806e-06, "loss": 0.4578, "step": 439 }, { "epoch": 0.023743996546327775, "grad_norm": 1.0492990611438835, "learning_rate": 9.987486219936468e-06, "loss": 0.5358, "step": 440 }, { "epoch": 0.023797960174842158, "grad_norm": 0.9301331576169605, "learning_rate": 9.987429301008243e-06, "loss": 0.5961, "step": 441 }, { "epoch": 0.023851923803356538, "grad_norm": 1.147881652309137, "learning_rate": 9.987372253106764e-06, "loss": 0.5741, "step": 442 }, { "epoch": 0.023905887431870918, "grad_norm": 1.2789401882768119, "learning_rate": 9.987315076233673e-06, "loss": 0.5482, "step": 443 }, { "epoch": 0.0239598510603853, "grad_norm": 0.9678701947635667, "learning_rate": 9.987257770390612e-06, "loss": 0.5344, "step": 444 }, { "epoch": 0.024013814688899682, "grad_norm": 1.519705404305563, "learning_rate": 9.987200335579229e-06, "loss": 0.5431, "step": 445 }, { "epoch": 0.024067778317414062, "grad_norm": 1.3675015921979972, "learning_rate": 9.987142771801174e-06, "loss": 0.5168, "step": 446 }, { "epoch": 0.024121741945928445, "grad_norm": 1.0776624481236126, "learning_rate": 9.987085079058105e-06, "loss": 0.4961, "step": 447 }, { "epoch": 0.024175705574442825, "grad_norm": 1.1015060691686396, "learning_rate": 9.987027257351674e-06, "loss": 0.4721, "step": 448 }, { "epoch": 0.024229669202957205, "grad_norm": 0.9745202617892904, "learning_rate": 9.986969306683545e-06, "loss": 0.4596, "step": 449 }, { "epoch": 0.02428363283147159, "grad_norm": 1.4453594364528202, "learning_rate": 9.986911227055385e-06, "loss": 0.6479, "step": 450 }, { "epoch": 0.02433759645998597, "grad_norm": 1.3303851080342572, "learning_rate": 9.986853018468865e-06, "loss": 0.5806, "step": 451 }, { "epoch": 0.02439156008850035, "grad_norm": 1.1128854103209724, "learning_rate": 9.986794680925654e-06, "loss": 0.4633, "step": 452 }, { "epoch": 0.024445523717014733, "grad_norm": 1.382592561502497, "learning_rate": 9.986736214427428e-06, "loss": 0.5454, "step": 453 }, { "epoch": 0.024499487345529113, "grad_norm": 1.2571146851520059, "learning_rate": 9.986677618975875e-06, "loss": 0.6085, "step": 454 }, { "epoch": 0.024553450974043496, "grad_norm": 1.3180036253007132, "learning_rate": 9.986618894572667e-06, "loss": 0.602, "step": 455 }, { "epoch": 0.024607414602557876, "grad_norm": 0.9924928677849718, "learning_rate": 9.986560041219504e-06, "loss": 0.5012, "step": 456 }, { "epoch": 0.024661378231072256, "grad_norm": 1.0916553984789978, "learning_rate": 9.98650105891807e-06, "loss": 0.6007, "step": 457 }, { "epoch": 0.02471534185958664, "grad_norm": 1.1282895420477115, "learning_rate": 9.986441947670064e-06, "loss": 0.4772, "step": 458 }, { "epoch": 0.02476930548810102, "grad_norm": 1.042439146813708, "learning_rate": 9.98638270747718e-06, "loss": 0.5285, "step": 459 }, { "epoch": 0.0248232691166154, "grad_norm": 1.0563715968159886, "learning_rate": 9.986323338341127e-06, "loss": 0.5221, "step": 460 }, { "epoch": 0.024877232745129783, "grad_norm": 1.2986207979017148, "learning_rate": 9.986263840263606e-06, "loss": 0.6796, "step": 461 }, { "epoch": 0.024931196373644163, "grad_norm": 1.1084671828072887, "learning_rate": 9.98620421324633e-06, "loss": 0.6311, "step": 462 }, { "epoch": 0.024985160002158543, "grad_norm": 1.1879179337387835, "learning_rate": 9.986144457291012e-06, "loss": 0.6492, "step": 463 }, { "epoch": 0.025039123630672927, "grad_norm": 1.1349278759199277, "learning_rate": 9.986084572399369e-06, "loss": 0.4431, "step": 464 }, { "epoch": 0.025093087259187307, "grad_norm": 1.3227190707778786, "learning_rate": 9.986024558573122e-06, "loss": 0.5417, "step": 465 }, { "epoch": 0.02514705088770169, "grad_norm": 1.2593625285913357, "learning_rate": 9.985964415813997e-06, "loss": 0.5563, "step": 466 }, { "epoch": 0.02520101451621607, "grad_norm": 1.3550316587819038, "learning_rate": 9.985904144123723e-06, "loss": 0.4973, "step": 467 }, { "epoch": 0.02525497814473045, "grad_norm": 1.238208407663588, "learning_rate": 9.985843743504031e-06, "loss": 0.6036, "step": 468 }, { "epoch": 0.025308941773244834, "grad_norm": 1.3082385934445693, "learning_rate": 9.985783213956655e-06, "loss": 0.5714, "step": 469 }, { "epoch": 0.025362905401759214, "grad_norm": 1.0957314862399292, "learning_rate": 9.98572255548334e-06, "loss": 0.4711, "step": 470 }, { "epoch": 0.025416869030273594, "grad_norm": 1.3106289250592238, "learning_rate": 9.985661768085824e-06, "loss": 0.6982, "step": 471 }, { "epoch": 0.025470832658787978, "grad_norm": 1.1484042676375412, "learning_rate": 9.985600851765857e-06, "loss": 0.5221, "step": 472 }, { "epoch": 0.025524796287302358, "grad_norm": 1.0584262230980137, "learning_rate": 9.985539806525189e-06, "loss": 0.5805, "step": 473 }, { "epoch": 0.025578759915816738, "grad_norm": 1.0453924181359033, "learning_rate": 9.985478632365575e-06, "loss": 0.407, "step": 474 }, { "epoch": 0.02563272354433112, "grad_norm": 1.1058111944764053, "learning_rate": 9.985417329288774e-06, "loss": 0.516, "step": 475 }, { "epoch": 0.0256866871728455, "grad_norm": 1.2577401678807718, "learning_rate": 9.985355897296544e-06, "loss": 0.5843, "step": 476 }, { "epoch": 0.025740650801359885, "grad_norm": 1.07662485373891, "learning_rate": 9.985294336390656e-06, "loss": 0.5407, "step": 477 }, { "epoch": 0.025794614429874265, "grad_norm": 1.0695165179724182, "learning_rate": 9.985232646572874e-06, "loss": 0.4783, "step": 478 }, { "epoch": 0.025848578058388645, "grad_norm": 1.1276310322838505, "learning_rate": 9.985170827844976e-06, "loss": 0.7169, "step": 479 }, { "epoch": 0.02590254168690303, "grad_norm": 1.2047066021553443, "learning_rate": 9.985108880208736e-06, "loss": 0.4976, "step": 480 }, { "epoch": 0.02595650531541741, "grad_norm": 1.2593515997905331, "learning_rate": 9.985046803665934e-06, "loss": 0.5791, "step": 481 }, { "epoch": 0.02601046894393179, "grad_norm": 1.2016432572228684, "learning_rate": 9.984984598218357e-06, "loss": 0.5673, "step": 482 }, { "epoch": 0.026064432572446172, "grad_norm": 1.0189816790974007, "learning_rate": 9.984922263867789e-06, "loss": 0.4651, "step": 483 }, { "epoch": 0.026118396200960552, "grad_norm": 0.8988387572382215, "learning_rate": 9.984859800616024e-06, "loss": 0.3962, "step": 484 }, { "epoch": 0.026172359829474932, "grad_norm": 1.324787047734092, "learning_rate": 9.984797208464856e-06, "loss": 0.468, "step": 485 }, { "epoch": 0.026226323457989316, "grad_norm": 1.2617512630988965, "learning_rate": 9.984734487416086e-06, "loss": 0.5054, "step": 486 }, { "epoch": 0.026280287086503696, "grad_norm": 1.16735705539683, "learning_rate": 9.984671637471514e-06, "loss": 0.7776, "step": 487 }, { "epoch": 0.02633425071501808, "grad_norm": 1.004076616605721, "learning_rate": 9.98460865863295e-06, "loss": 0.4508, "step": 488 }, { "epoch": 0.02638821434353246, "grad_norm": 1.1462526187827538, "learning_rate": 9.9845455509022e-06, "loss": 0.4452, "step": 489 }, { "epoch": 0.02644217797204684, "grad_norm": 1.117487523543565, "learning_rate": 9.98448231428108e-06, "loss": 0.5695, "step": 490 }, { "epoch": 0.026496141600561223, "grad_norm": 0.9761634834798999, "learning_rate": 9.984418948771406e-06, "loss": 0.4342, "step": 491 }, { "epoch": 0.026550105229075603, "grad_norm": 1.0872276274350663, "learning_rate": 9.984355454375003e-06, "loss": 0.5526, "step": 492 }, { "epoch": 0.026604068857589983, "grad_norm": 1.0285732840486945, "learning_rate": 9.98429183109369e-06, "loss": 0.4997, "step": 493 }, { "epoch": 0.026658032486104367, "grad_norm": 1.5225246708669666, "learning_rate": 9.9842280789293e-06, "loss": 0.761, "step": 494 }, { "epoch": 0.026711996114618747, "grad_norm": 1.074960375956489, "learning_rate": 9.984164197883664e-06, "loss": 0.6129, "step": 495 }, { "epoch": 0.026765959743133127, "grad_norm": 1.006262347022148, "learning_rate": 9.984100187958618e-06, "loss": 0.4775, "step": 496 }, { "epoch": 0.02681992337164751, "grad_norm": 1.0262174107545028, "learning_rate": 9.984036049156002e-06, "loss": 0.4773, "step": 497 }, { "epoch": 0.02687388700016189, "grad_norm": 1.414132776550392, "learning_rate": 9.98397178147766e-06, "loss": 0.6144, "step": 498 }, { "epoch": 0.02692785062867627, "grad_norm": 1.1644426924260391, "learning_rate": 9.983907384925436e-06, "loss": 0.6346, "step": 499 }, { "epoch": 0.026981814257190654, "grad_norm": 1.0919694577033208, "learning_rate": 9.983842859501185e-06, "loss": 0.4418, "step": 500 }, { "epoch": 0.026981814257190654, "eval_loss": 0.6459314227104187, "eval_runtime": 164.1726, "eval_samples_per_second": 20.947, "eval_steps_per_second": 0.877, "step": 500 }, { "epoch": 0.027035777885705034, "grad_norm": 1.110109343155162, "learning_rate": 9.98377820520676e-06, "loss": 0.476, "step": 501 }, { "epoch": 0.027089741514219418, "grad_norm": 0.964460134527613, "learning_rate": 9.983713422044016e-06, "loss": 0.5242, "step": 502 }, { "epoch": 0.027143705142733798, "grad_norm": 1.2029588344530528, "learning_rate": 9.983648510014821e-06, "loss": 0.5624, "step": 503 }, { "epoch": 0.027197668771248178, "grad_norm": 1.0453470072965703, "learning_rate": 9.983583469121035e-06, "loss": 0.492, "step": 504 }, { "epoch": 0.02725163239976256, "grad_norm": 1.1008645815352225, "learning_rate": 9.983518299364531e-06, "loss": 0.5241, "step": 505 }, { "epoch": 0.02730559602827694, "grad_norm": 0.9139297808075197, "learning_rate": 9.983453000747181e-06, "loss": 0.4855, "step": 506 }, { "epoch": 0.02735955965679132, "grad_norm": 1.1774237738671027, "learning_rate": 9.983387573270861e-06, "loss": 0.4794, "step": 507 }, { "epoch": 0.027413523285305705, "grad_norm": 1.018400989814044, "learning_rate": 9.983322016937452e-06, "loss": 0.5955, "step": 508 }, { "epoch": 0.027467486913820085, "grad_norm": 1.1984477181782833, "learning_rate": 9.983256331748839e-06, "loss": 0.6075, "step": 509 }, { "epoch": 0.027521450542334465, "grad_norm": 1.2314614349710196, "learning_rate": 9.983190517706907e-06, "loss": 0.4281, "step": 510 }, { "epoch": 0.02757541417084885, "grad_norm": 1.1892243260952917, "learning_rate": 9.983124574813551e-06, "loss": 0.4865, "step": 511 }, { "epoch": 0.02762937779936323, "grad_norm": 1.3971079333132348, "learning_rate": 9.983058503070665e-06, "loss": 0.6548, "step": 512 }, { "epoch": 0.027683341427877612, "grad_norm": 1.233557591521009, "learning_rate": 9.982992302480148e-06, "loss": 0.5943, "step": 513 }, { "epoch": 0.027737305056391992, "grad_norm": 1.2284263058598675, "learning_rate": 9.982925973043902e-06, "loss": 0.5194, "step": 514 }, { "epoch": 0.027791268684906372, "grad_norm": 1.178338326028012, "learning_rate": 9.982859514763835e-06, "loss": 0.6166, "step": 515 }, { "epoch": 0.027845232313420756, "grad_norm": 0.9951496679963399, "learning_rate": 9.982792927641854e-06, "loss": 0.4512, "step": 516 }, { "epoch": 0.027899195941935136, "grad_norm": 0.8887784595612234, "learning_rate": 9.982726211679876e-06, "loss": 0.3754, "step": 517 }, { "epoch": 0.027953159570449516, "grad_norm": 1.24765799032227, "learning_rate": 9.982659366879817e-06, "loss": 0.548, "step": 518 }, { "epoch": 0.0280071231989639, "grad_norm": 1.2782875460704954, "learning_rate": 9.982592393243599e-06, "loss": 0.5232, "step": 519 }, { "epoch": 0.02806108682747828, "grad_norm": 0.9872821160150816, "learning_rate": 9.982525290773146e-06, "loss": 0.452, "step": 520 }, { "epoch": 0.02811505045599266, "grad_norm": 1.2844856524366668, "learning_rate": 9.982458059470386e-06, "loss": 0.7114, "step": 521 }, { "epoch": 0.028169014084507043, "grad_norm": 1.0825546900075842, "learning_rate": 9.982390699337253e-06, "loss": 0.5508, "step": 522 }, { "epoch": 0.028222977713021423, "grad_norm": 1.1895834434933414, "learning_rate": 9.982323210375681e-06, "loss": 0.5057, "step": 523 }, { "epoch": 0.028276941341535806, "grad_norm": 1.0794046514082432, "learning_rate": 9.982255592587612e-06, "loss": 0.4881, "step": 524 }, { "epoch": 0.028330904970050187, "grad_norm": 0.7589128492820196, "learning_rate": 9.982187845974988e-06, "loss": 0.3514, "step": 525 }, { "epoch": 0.028384868598564567, "grad_norm": 0.9777096703164789, "learning_rate": 9.982119970539757e-06, "loss": 0.4498, "step": 526 }, { "epoch": 0.02843883222707895, "grad_norm": 0.8718822369283625, "learning_rate": 9.98205196628387e-06, "loss": 0.405, "step": 527 }, { "epoch": 0.02849279585559333, "grad_norm": 1.116196074637517, "learning_rate": 9.981983833209279e-06, "loss": 0.6034, "step": 528 }, { "epoch": 0.02854675948410771, "grad_norm": 1.1676009518710115, "learning_rate": 9.981915571317942e-06, "loss": 0.4965, "step": 529 }, { "epoch": 0.028600723112622094, "grad_norm": 1.4052539130464754, "learning_rate": 9.981847180611826e-06, "loss": 0.6058, "step": 530 }, { "epoch": 0.028654686741136474, "grad_norm": 1.2334131614113848, "learning_rate": 9.981778661092893e-06, "loss": 0.6446, "step": 531 }, { "epoch": 0.028708650369650854, "grad_norm": 1.2741530240332686, "learning_rate": 9.981710012763111e-06, "loss": 0.5937, "step": 532 }, { "epoch": 0.028762613998165237, "grad_norm": 1.0247503718233553, "learning_rate": 9.981641235624457e-06, "loss": 0.6278, "step": 533 }, { "epoch": 0.028816577626679617, "grad_norm": 0.6977777911839362, "learning_rate": 9.981572329678904e-06, "loss": 0.3395, "step": 534 }, { "epoch": 0.028870541255194, "grad_norm": 1.1374177735247473, "learning_rate": 9.981503294928435e-06, "loss": 0.5862, "step": 535 }, { "epoch": 0.02892450488370838, "grad_norm": 1.1837132664625118, "learning_rate": 9.981434131375032e-06, "loss": 0.5048, "step": 536 }, { "epoch": 0.02897846851222276, "grad_norm": 1.110877117304451, "learning_rate": 9.981364839020685e-06, "loss": 0.6427, "step": 537 }, { "epoch": 0.029032432140737145, "grad_norm": 1.0662793460128148, "learning_rate": 9.981295417867384e-06, "loss": 0.4295, "step": 538 }, { "epoch": 0.029086395769251525, "grad_norm": 1.1779090743849445, "learning_rate": 9.981225867917124e-06, "loss": 0.5002, "step": 539 }, { "epoch": 0.029140359397765905, "grad_norm": 1.3172880780435805, "learning_rate": 9.981156189171907e-06, "loss": 0.6471, "step": 540 }, { "epoch": 0.029194323026280288, "grad_norm": 0.9266136440174673, "learning_rate": 9.98108638163373e-06, "loss": 0.3623, "step": 541 }, { "epoch": 0.029248286654794668, "grad_norm": 1.2667509480810353, "learning_rate": 9.981016445304604e-06, "loss": 0.7001, "step": 542 }, { "epoch": 0.02930225028330905, "grad_norm": 1.097025919017542, "learning_rate": 9.980946380186538e-06, "loss": 0.4448, "step": 543 }, { "epoch": 0.029356213911823432, "grad_norm": 1.0759727430485406, "learning_rate": 9.980876186281545e-06, "loss": 0.5169, "step": 544 }, { "epoch": 0.029410177540337812, "grad_norm": 0.9975899107011369, "learning_rate": 9.980805863591643e-06, "loss": 0.5351, "step": 545 }, { "epoch": 0.029464141168852192, "grad_norm": 1.1942104104934133, "learning_rate": 9.980735412118852e-06, "loss": 0.6671, "step": 546 }, { "epoch": 0.029518104797366575, "grad_norm": 0.9218625952339685, "learning_rate": 9.9806648318652e-06, "loss": 0.3542, "step": 547 }, { "epoch": 0.029572068425880955, "grad_norm": 1.1310218531366758, "learning_rate": 9.98059412283271e-06, "loss": 0.4743, "step": 548 }, { "epoch": 0.02962603205439534, "grad_norm": 1.0386891504578748, "learning_rate": 9.98052328502342e-06, "loss": 0.5505, "step": 549 }, { "epoch": 0.02967999568290972, "grad_norm": 1.3161231300084628, "learning_rate": 9.980452318439364e-06, "loss": 0.653, "step": 550 }, { "epoch": 0.0297339593114241, "grad_norm": 1.1787767975767434, "learning_rate": 9.980381223082581e-06, "loss": 0.6018, "step": 551 }, { "epoch": 0.029787922939938483, "grad_norm": 1.2327431095876908, "learning_rate": 9.980309998955113e-06, "loss": 0.4534, "step": 552 }, { "epoch": 0.029841886568452863, "grad_norm": 1.0123075863963638, "learning_rate": 9.980238646059009e-06, "loss": 0.5781, "step": 553 }, { "epoch": 0.029895850196967243, "grad_norm": 1.2074894084933527, "learning_rate": 9.980167164396321e-06, "loss": 0.6345, "step": 554 }, { "epoch": 0.029949813825481626, "grad_norm": 1.2016242071003427, "learning_rate": 9.980095553969101e-06, "loss": 0.5373, "step": 555 }, { "epoch": 0.030003777453996006, "grad_norm": 0.8842356569430259, "learning_rate": 9.980023814779407e-06, "loss": 0.3865, "step": 556 }, { "epoch": 0.030057741082510386, "grad_norm": 1.0238042871637816, "learning_rate": 9.979951946829302e-06, "loss": 0.3351, "step": 557 }, { "epoch": 0.03011170471102477, "grad_norm": 1.4603793159419332, "learning_rate": 9.979879950120852e-06, "loss": 0.7998, "step": 558 }, { "epoch": 0.03016566833953915, "grad_norm": 1.1312441024107023, "learning_rate": 9.979807824656125e-06, "loss": 0.435, "step": 559 }, { "epoch": 0.030219631968053533, "grad_norm": 1.1573531913038548, "learning_rate": 9.979735570437196e-06, "loss": 0.53, "step": 560 }, { "epoch": 0.030273595596567913, "grad_norm": 1.0057770230043936, "learning_rate": 9.979663187466139e-06, "loss": 0.4392, "step": 561 }, { "epoch": 0.030327559225082294, "grad_norm": 1.2583548279674015, "learning_rate": 9.979590675745036e-06, "loss": 0.5539, "step": 562 }, { "epoch": 0.030381522853596677, "grad_norm": 1.3151013253146489, "learning_rate": 9.979518035275972e-06, "loss": 0.4692, "step": 563 }, { "epoch": 0.030435486482111057, "grad_norm": 1.1230285182334558, "learning_rate": 9.97944526606103e-06, "loss": 0.6521, "step": 564 }, { "epoch": 0.030489450110625437, "grad_norm": 1.2629477082608564, "learning_rate": 9.979372368102307e-06, "loss": 0.5995, "step": 565 }, { "epoch": 0.03054341373913982, "grad_norm": 0.9839501803153108, "learning_rate": 9.979299341401896e-06, "loss": 0.4172, "step": 566 }, { "epoch": 0.0305973773676542, "grad_norm": 1.0221909418443713, "learning_rate": 9.979226185961898e-06, "loss": 0.4144, "step": 567 }, { "epoch": 0.03065134099616858, "grad_norm": 1.2836373848293925, "learning_rate": 9.979152901784411e-06, "loss": 0.6641, "step": 568 }, { "epoch": 0.030705304624682964, "grad_norm": 0.9175894057112968, "learning_rate": 9.979079488871544e-06, "loss": 0.4045, "step": 569 }, { "epoch": 0.030759268253197344, "grad_norm": 0.7862773944221269, "learning_rate": 9.979005947225407e-06, "loss": 0.3637, "step": 570 }, { "epoch": 0.030813231881711728, "grad_norm": 1.1386824914638112, "learning_rate": 9.978932276848112e-06, "loss": 0.4089, "step": 571 }, { "epoch": 0.030867195510226108, "grad_norm": 1.1261748321601806, "learning_rate": 9.97885847774178e-06, "loss": 0.5087, "step": 572 }, { "epoch": 0.030921159138740488, "grad_norm": 1.106878664327847, "learning_rate": 9.97878454990853e-06, "loss": 0.5337, "step": 573 }, { "epoch": 0.03097512276725487, "grad_norm": 1.2136879322490512, "learning_rate": 9.978710493350485e-06, "loss": 0.5178, "step": 574 }, { "epoch": 0.03102908639576925, "grad_norm": 0.9820022755592402, "learning_rate": 9.978636308069776e-06, "loss": 0.487, "step": 575 }, { "epoch": 0.03108305002428363, "grad_norm": 1.0470547617291561, "learning_rate": 9.978561994068532e-06, "loss": 0.4949, "step": 576 }, { "epoch": 0.031137013652798015, "grad_norm": 1.055441311740176, "learning_rate": 9.978487551348894e-06, "loss": 0.5426, "step": 577 }, { "epoch": 0.031190977281312395, "grad_norm": 1.2417993655060524, "learning_rate": 9.978412979912996e-06, "loss": 0.6439, "step": 578 }, { "epoch": 0.031244940909826775, "grad_norm": 1.1683808146319994, "learning_rate": 9.978338279762986e-06, "loss": 0.4765, "step": 579 }, { "epoch": 0.03129890453834116, "grad_norm": 1.1795114220484655, "learning_rate": 9.978263450901008e-06, "loss": 0.7299, "step": 580 }, { "epoch": 0.03135286816685554, "grad_norm": 1.1473697552121351, "learning_rate": 9.978188493329215e-06, "loss": 0.4675, "step": 581 }, { "epoch": 0.03140683179536992, "grad_norm": 0.9443838275186192, "learning_rate": 9.978113407049756e-06, "loss": 0.5326, "step": 582 }, { "epoch": 0.0314607954238843, "grad_norm": 1.1037380205963083, "learning_rate": 9.978038192064794e-06, "loss": 0.4849, "step": 583 }, { "epoch": 0.031514759052398686, "grad_norm": 1.1646219496312937, "learning_rate": 9.977962848376492e-06, "loss": 0.5496, "step": 584 }, { "epoch": 0.03156872268091306, "grad_norm": 1.0951060593829312, "learning_rate": 9.977887375987011e-06, "loss": 0.5345, "step": 585 }, { "epoch": 0.031622686309427446, "grad_norm": 1.3235756308444222, "learning_rate": 9.977811774898523e-06, "loss": 0.5481, "step": 586 }, { "epoch": 0.03167664993794183, "grad_norm": 0.9793840181262841, "learning_rate": 9.977736045113201e-06, "loss": 0.5435, "step": 587 }, { "epoch": 0.031730613566456206, "grad_norm": 1.105382311223432, "learning_rate": 9.977660186633219e-06, "loss": 0.5132, "step": 588 }, { "epoch": 0.03178457719497059, "grad_norm": 1.1200007439249184, "learning_rate": 9.977584199460762e-06, "loss": 0.4973, "step": 589 }, { "epoch": 0.03183854082348497, "grad_norm": 1.249298460297479, "learning_rate": 9.977508083598006e-06, "loss": 0.6439, "step": 590 }, { "epoch": 0.03189250445199935, "grad_norm": 1.2530124122559925, "learning_rate": 9.977431839047146e-06, "loss": 0.8403, "step": 591 }, { "epoch": 0.03194646808051373, "grad_norm": 0.997271518388178, "learning_rate": 9.977355465810371e-06, "loss": 0.4812, "step": 592 }, { "epoch": 0.03200043170902812, "grad_norm": 1.3074725702897192, "learning_rate": 9.977278963889875e-06, "loss": 0.6025, "step": 593 }, { "epoch": 0.03205439533754249, "grad_norm": 1.038322668837078, "learning_rate": 9.97720233328786e-06, "loss": 0.4901, "step": 594 }, { "epoch": 0.03210835896605688, "grad_norm": 1.1451343706709212, "learning_rate": 9.977125574006525e-06, "loss": 0.5426, "step": 595 }, { "epoch": 0.03216232259457126, "grad_norm": 1.2168745905748297, "learning_rate": 9.977048686048075e-06, "loss": 0.5768, "step": 596 }, { "epoch": 0.03221628622308564, "grad_norm": 1.1127259232538744, "learning_rate": 9.976971669414724e-06, "loss": 0.5462, "step": 597 }, { "epoch": 0.03227024985160002, "grad_norm": 1.1520816202174538, "learning_rate": 9.976894524108683e-06, "loss": 0.6138, "step": 598 }, { "epoch": 0.032324213480114404, "grad_norm": 1.0405659237347167, "learning_rate": 9.97681725013217e-06, "loss": 0.4405, "step": 599 }, { "epoch": 0.03237817710862879, "grad_norm": 1.2517486504367097, "learning_rate": 9.976739847487405e-06, "loss": 0.4688, "step": 600 }, { "epoch": 0.032432140737143164, "grad_norm": 0.9766098049663949, "learning_rate": 9.976662316176615e-06, "loss": 0.4981, "step": 601 }, { "epoch": 0.03248610436565755, "grad_norm": 1.3559931862898909, "learning_rate": 9.976584656202025e-06, "loss": 0.5543, "step": 602 }, { "epoch": 0.03254006799417193, "grad_norm": 1.4593350177312336, "learning_rate": 9.976506867565871e-06, "loss": 0.7446, "step": 603 }, { "epoch": 0.03259403162268631, "grad_norm": 1.0917633627665384, "learning_rate": 9.976428950270383e-06, "loss": 0.3551, "step": 604 }, { "epoch": 0.03264799525120069, "grad_norm": 1.283019691463763, "learning_rate": 9.976350904317806e-06, "loss": 0.5902, "step": 605 }, { "epoch": 0.032701958879715075, "grad_norm": 1.1034219477728264, "learning_rate": 9.976272729710383e-06, "loss": 0.4989, "step": 606 }, { "epoch": 0.03275592250822945, "grad_norm": 1.424922751570215, "learning_rate": 9.976194426450355e-06, "loss": 0.7038, "step": 607 }, { "epoch": 0.032809886136743835, "grad_norm": 1.1740696710472487, "learning_rate": 9.97611599453998e-06, "loss": 0.4829, "step": 608 }, { "epoch": 0.03286384976525822, "grad_norm": 1.1535403703450355, "learning_rate": 9.976037433981505e-06, "loss": 0.4847, "step": 609 }, { "epoch": 0.032917813393772595, "grad_norm": 0.9846926316613003, "learning_rate": 9.975958744777193e-06, "loss": 0.448, "step": 610 }, { "epoch": 0.03297177702228698, "grad_norm": 1.101924282582547, "learning_rate": 9.975879926929303e-06, "loss": 0.4891, "step": 611 }, { "epoch": 0.03302574065080136, "grad_norm": 1.3461251105948324, "learning_rate": 9.975800980440103e-06, "loss": 0.5654, "step": 612 }, { "epoch": 0.03307970427931574, "grad_norm": 1.6082354103507113, "learning_rate": 9.97572190531186e-06, "loss": 0.738, "step": 613 }, { "epoch": 0.03313366790783012, "grad_norm": 0.8235800550886619, "learning_rate": 9.975642701546846e-06, "loss": 0.3482, "step": 614 }, { "epoch": 0.033187631536344506, "grad_norm": 0.9766243693482498, "learning_rate": 9.97556336914734e-06, "loss": 0.4409, "step": 615 }, { "epoch": 0.03324159516485888, "grad_norm": 1.089966889925386, "learning_rate": 9.97548390811562e-06, "loss": 0.5387, "step": 616 }, { "epoch": 0.033295558793373266, "grad_norm": 1.1246783363316304, "learning_rate": 9.975404318453968e-06, "loss": 0.4865, "step": 617 }, { "epoch": 0.03334952242188765, "grad_norm": 1.180499738036913, "learning_rate": 9.975324600164678e-06, "loss": 0.5316, "step": 618 }, { "epoch": 0.033403486050402026, "grad_norm": 0.9249326514007677, "learning_rate": 9.975244753250035e-06, "loss": 0.4847, "step": 619 }, { "epoch": 0.03345744967891641, "grad_norm": 0.9733414663728588, "learning_rate": 9.975164777712335e-06, "loss": 0.3706, "step": 620 }, { "epoch": 0.03351141330743079, "grad_norm": 1.2304692361003597, "learning_rate": 9.975084673553878e-06, "loss": 0.5161, "step": 621 }, { "epoch": 0.03356537693594517, "grad_norm": 1.1947034439345585, "learning_rate": 9.975004440776965e-06, "loss": 0.5345, "step": 622 }, { "epoch": 0.03361934056445955, "grad_norm": 0.9441431259346416, "learning_rate": 9.974924079383905e-06, "loss": 0.4389, "step": 623 }, { "epoch": 0.03367330419297394, "grad_norm": 1.2286766556983493, "learning_rate": 9.974843589377003e-06, "loss": 0.4402, "step": 624 }, { "epoch": 0.03372726782148832, "grad_norm": 1.000082075862358, "learning_rate": 9.974762970758576e-06, "loss": 0.4443, "step": 625 }, { "epoch": 0.0337812314500027, "grad_norm": 1.1755997199423103, "learning_rate": 9.97468222353094e-06, "loss": 0.4008, "step": 626 }, { "epoch": 0.03383519507851708, "grad_norm": 0.9723235074841852, "learning_rate": 9.974601347696414e-06, "loss": 0.5516, "step": 627 }, { "epoch": 0.033889158707031464, "grad_norm": 0.9221374364490146, "learning_rate": 9.974520343257328e-06, "loss": 0.5571, "step": 628 }, { "epoch": 0.03394312233554584, "grad_norm": 1.0773764898622797, "learning_rate": 9.974439210216002e-06, "loss": 0.4685, "step": 629 }, { "epoch": 0.033997085964060224, "grad_norm": 1.107310253089235, "learning_rate": 9.974357948574774e-06, "loss": 0.4057, "step": 630 }, { "epoch": 0.03405104959257461, "grad_norm": 0.9162615412650591, "learning_rate": 9.974276558335975e-06, "loss": 0.3731, "step": 631 }, { "epoch": 0.034105013221088984, "grad_norm": 1.2586936932631536, "learning_rate": 9.974195039501949e-06, "loss": 0.5618, "step": 632 }, { "epoch": 0.03415897684960337, "grad_norm": 1.005459526986536, "learning_rate": 9.974113392075036e-06, "loss": 0.469, "step": 633 }, { "epoch": 0.03421294047811775, "grad_norm": 1.2129018180995976, "learning_rate": 9.974031616057584e-06, "loss": 0.5849, "step": 634 }, { "epoch": 0.03426690410663213, "grad_norm": 1.1055894264113686, "learning_rate": 9.973949711451943e-06, "loss": 0.5218, "step": 635 }, { "epoch": 0.03432086773514651, "grad_norm": 1.0244879733632453, "learning_rate": 9.973867678260466e-06, "loss": 0.4557, "step": 636 }, { "epoch": 0.034374831363660895, "grad_norm": 0.8427496611135044, "learning_rate": 9.97378551648551e-06, "loss": 0.3866, "step": 637 }, { "epoch": 0.03442879499217527, "grad_norm": 0.9940139937032199, "learning_rate": 9.97370322612944e-06, "loss": 0.4317, "step": 638 }, { "epoch": 0.034482758620689655, "grad_norm": 0.9907925129356866, "learning_rate": 9.97362080719462e-06, "loss": 0.4346, "step": 639 }, { "epoch": 0.03453672224920404, "grad_norm": 1.044847070827823, "learning_rate": 9.973538259683414e-06, "loss": 0.5501, "step": 640 }, { "epoch": 0.034590685877718415, "grad_norm": 1.2963142234244194, "learning_rate": 9.973455583598202e-06, "loss": 0.5729, "step": 641 }, { "epoch": 0.0346446495062328, "grad_norm": 1.1338078291252127, "learning_rate": 9.973372778941355e-06, "loss": 0.5105, "step": 642 }, { "epoch": 0.03469861313474718, "grad_norm": 1.2530568643852007, "learning_rate": 9.973289845715253e-06, "loss": 0.6123, "step": 643 }, { "epoch": 0.03475257676326156, "grad_norm": 1.0265232539412, "learning_rate": 9.973206783922282e-06, "loss": 0.4767, "step": 644 }, { "epoch": 0.03480654039177594, "grad_norm": 1.1588369685666042, "learning_rate": 9.973123593564828e-06, "loss": 0.6759, "step": 645 }, { "epoch": 0.034860504020290325, "grad_norm": 1.0746359596862989, "learning_rate": 9.973040274645282e-06, "loss": 0.4395, "step": 646 }, { "epoch": 0.03491446764880471, "grad_norm": 1.162708447503128, "learning_rate": 9.97295682716604e-06, "loss": 0.6355, "step": 647 }, { "epoch": 0.034968431277319086, "grad_norm": 1.1489117358412222, "learning_rate": 9.972873251129497e-06, "loss": 0.5061, "step": 648 }, { "epoch": 0.03502239490583347, "grad_norm": 1.052277830082588, "learning_rate": 9.972789546538059e-06, "loss": 0.5428, "step": 649 }, { "epoch": 0.03507635853434785, "grad_norm": 1.382702133255439, "learning_rate": 9.97270571339413e-06, "loss": 0.6205, "step": 650 }, { "epoch": 0.03513032216286223, "grad_norm": 1.1370874959665158, "learning_rate": 9.972621751700118e-06, "loss": 0.528, "step": 651 }, { "epoch": 0.03518428579137661, "grad_norm": 1.204968053967647, "learning_rate": 9.97253766145844e-06, "loss": 0.4759, "step": 652 }, { "epoch": 0.035238249419890996, "grad_norm": 1.1342280326210048, "learning_rate": 9.972453442671508e-06, "loss": 0.5389, "step": 653 }, { "epoch": 0.03529221304840537, "grad_norm": 0.8431360675819526, "learning_rate": 9.972369095341745e-06, "loss": 0.2807, "step": 654 }, { "epoch": 0.035346176676919756, "grad_norm": 1.185401144989261, "learning_rate": 9.972284619471577e-06, "loss": 0.5785, "step": 655 }, { "epoch": 0.03540014030543414, "grad_norm": 1.1749968169977327, "learning_rate": 9.97220001506343e-06, "loss": 0.6228, "step": 656 }, { "epoch": 0.035454103933948516, "grad_norm": 1.4371790572942826, "learning_rate": 9.972115282119736e-06, "loss": 0.6856, "step": 657 }, { "epoch": 0.0355080675624629, "grad_norm": 0.9043149055984723, "learning_rate": 9.972030420642927e-06, "loss": 0.4869, "step": 658 }, { "epoch": 0.035562031190977283, "grad_norm": 1.2276827475819863, "learning_rate": 9.971945430635448e-06, "loss": 0.6236, "step": 659 }, { "epoch": 0.03561599481949166, "grad_norm": 1.164631647536771, "learning_rate": 9.971860312099737e-06, "loss": 0.5166, "step": 660 }, { "epoch": 0.035669958448006044, "grad_norm": 0.9230512322782697, "learning_rate": 9.971775065038246e-06, "loss": 0.3973, "step": 661 }, { "epoch": 0.03572392207652043, "grad_norm": 1.053792932336806, "learning_rate": 9.971689689453416e-06, "loss": 0.4266, "step": 662 }, { "epoch": 0.035777885705034804, "grad_norm": 1.100343683454817, "learning_rate": 9.97160418534771e-06, "loss": 0.5455, "step": 663 }, { "epoch": 0.03583184933354919, "grad_norm": 1.1305454234247014, "learning_rate": 9.971518552723578e-06, "loss": 0.5087, "step": 664 }, { "epoch": 0.03588581296206357, "grad_norm": 1.3512641711109625, "learning_rate": 9.971432791583487e-06, "loss": 0.7354, "step": 665 }, { "epoch": 0.03593977659057795, "grad_norm": 1.44580082437797, "learning_rate": 9.971346901929898e-06, "loss": 0.731, "step": 666 }, { "epoch": 0.03599374021909233, "grad_norm": 1.1961213600145169, "learning_rate": 9.971260883765284e-06, "loss": 0.5212, "step": 667 }, { "epoch": 0.036047703847606714, "grad_norm": 1.1945660289743327, "learning_rate": 9.97117473709211e-06, "loss": 0.6593, "step": 668 }, { "epoch": 0.03610166747612109, "grad_norm": 1.254461644769536, "learning_rate": 9.97108846191286e-06, "loss": 0.6091, "step": 669 }, { "epoch": 0.036155631104635474, "grad_norm": 1.1047742408465668, "learning_rate": 9.971002058230009e-06, "loss": 0.5427, "step": 670 }, { "epoch": 0.03620959473314986, "grad_norm": 1.0630864865456484, "learning_rate": 9.970915526046042e-06, "loss": 0.5027, "step": 671 }, { "epoch": 0.03626355836166424, "grad_norm": 1.0951737585047432, "learning_rate": 9.970828865363443e-06, "loss": 0.6427, "step": 672 }, { "epoch": 0.03631752199017862, "grad_norm": 0.9025619571489237, "learning_rate": 9.970742076184706e-06, "loss": 0.4295, "step": 673 }, { "epoch": 0.036371485618693, "grad_norm": 1.1537471806161166, "learning_rate": 9.970655158512324e-06, "loss": 0.7454, "step": 674 }, { "epoch": 0.036425449247207385, "grad_norm": 1.2074926990006456, "learning_rate": 9.970568112348795e-06, "loss": 0.6205, "step": 675 }, { "epoch": 0.03647941287572176, "grad_norm": 1.1626190471402271, "learning_rate": 9.970480937696622e-06, "loss": 0.6302, "step": 676 }, { "epoch": 0.036533376504236145, "grad_norm": 1.135494630396891, "learning_rate": 9.970393634558311e-06, "loss": 0.6574, "step": 677 }, { "epoch": 0.03658734013275053, "grad_norm": 0.8561688553054742, "learning_rate": 9.970306202936368e-06, "loss": 0.3822, "step": 678 }, { "epoch": 0.036641303761264905, "grad_norm": 1.1306487387899604, "learning_rate": 9.970218642833308e-06, "loss": 0.5182, "step": 679 }, { "epoch": 0.03669526738977929, "grad_norm": 1.2058304980077048, "learning_rate": 9.970130954251648e-06, "loss": 0.6108, "step": 680 }, { "epoch": 0.03674923101829367, "grad_norm": 1.3117141478546073, "learning_rate": 9.970043137193908e-06, "loss": 0.6338, "step": 681 }, { "epoch": 0.03680319464680805, "grad_norm": 0.9791418578661547, "learning_rate": 9.969955191662612e-06, "loss": 0.427, "step": 682 }, { "epoch": 0.03685715827532243, "grad_norm": 1.069373859691777, "learning_rate": 9.969867117660287e-06, "loss": 0.4667, "step": 683 }, { "epoch": 0.036911121903836816, "grad_norm": 1.0868409328214808, "learning_rate": 9.969778915189462e-06, "loss": 0.5161, "step": 684 }, { "epoch": 0.03696508553235119, "grad_norm": 1.3856610061553871, "learning_rate": 9.969690584252678e-06, "loss": 0.6275, "step": 685 }, { "epoch": 0.037019049160865576, "grad_norm": 1.0561004318363176, "learning_rate": 9.969602124852469e-06, "loss": 0.503, "step": 686 }, { "epoch": 0.03707301278937996, "grad_norm": 1.0766005898443531, "learning_rate": 9.96951353699138e-06, "loss": 0.4514, "step": 687 }, { "epoch": 0.037126976417894336, "grad_norm": 1.1592447461780662, "learning_rate": 9.969424820671953e-06, "loss": 0.7777, "step": 688 }, { "epoch": 0.03718094004640872, "grad_norm": 1.4019311171946947, "learning_rate": 9.969335975896742e-06, "loss": 0.6162, "step": 689 }, { "epoch": 0.0372349036749231, "grad_norm": 1.2303440577817262, "learning_rate": 9.969247002668299e-06, "loss": 0.5196, "step": 690 }, { "epoch": 0.03728886730343748, "grad_norm": 1.154785159243188, "learning_rate": 9.969157900989182e-06, "loss": 0.5386, "step": 691 }, { "epoch": 0.03734283093195186, "grad_norm": 1.0959239657347508, "learning_rate": 9.96906867086195e-06, "loss": 0.4789, "step": 692 }, { "epoch": 0.03739679456046625, "grad_norm": 1.1183022099067816, "learning_rate": 9.96897931228917e-06, "loss": 0.5246, "step": 693 }, { "epoch": 0.03745075818898063, "grad_norm": 1.357914733647718, "learning_rate": 9.96888982527341e-06, "loss": 0.6085, "step": 694 }, { "epoch": 0.03750472181749501, "grad_norm": 1.3160612315164668, "learning_rate": 9.968800209817237e-06, "loss": 0.6066, "step": 695 }, { "epoch": 0.03755868544600939, "grad_norm": 1.0354159921439854, "learning_rate": 9.968710465923233e-06, "loss": 0.5688, "step": 696 }, { "epoch": 0.037612649074523774, "grad_norm": 1.0373869100624307, "learning_rate": 9.968620593593975e-06, "loss": 0.7545, "step": 697 }, { "epoch": 0.03766661270303815, "grad_norm": 1.2213722833832241, "learning_rate": 9.968530592832046e-06, "loss": 0.604, "step": 698 }, { "epoch": 0.037720576331552534, "grad_norm": 1.3194654548763127, "learning_rate": 9.968440463640032e-06, "loss": 0.5765, "step": 699 }, { "epoch": 0.03777453996006692, "grad_norm": 1.0516278070602276, "learning_rate": 9.968350206020526e-06, "loss": 0.4966, "step": 700 }, { "epoch": 0.037828503588581294, "grad_norm": 1.1148408206527374, "learning_rate": 9.968259819976118e-06, "loss": 0.4844, "step": 701 }, { "epoch": 0.03788246721709568, "grad_norm": 1.0103582606632104, "learning_rate": 9.96816930550941e-06, "loss": 0.5215, "step": 702 }, { "epoch": 0.03793643084561006, "grad_norm": 1.0529674441321135, "learning_rate": 9.968078662622999e-06, "loss": 0.5016, "step": 703 }, { "epoch": 0.03799039447412444, "grad_norm": 1.157328780634197, "learning_rate": 9.967987891319494e-06, "loss": 0.6078, "step": 704 }, { "epoch": 0.03804435810263882, "grad_norm": 1.5097333201657452, "learning_rate": 9.9678969916015e-06, "loss": 0.659, "step": 705 }, { "epoch": 0.038098321731153205, "grad_norm": 1.1056360268423069, "learning_rate": 9.967805963471634e-06, "loss": 0.5458, "step": 706 }, { "epoch": 0.03815228535966758, "grad_norm": 1.0179121258300226, "learning_rate": 9.967714806932511e-06, "loss": 0.4811, "step": 707 }, { "epoch": 0.038206248988181965, "grad_norm": 1.0966970010341872, "learning_rate": 9.96762352198675e-06, "loss": 0.5329, "step": 708 }, { "epoch": 0.03826021261669635, "grad_norm": 0.9904963679221083, "learning_rate": 9.967532108636973e-06, "loss": 0.5471, "step": 709 }, { "epoch": 0.038314176245210725, "grad_norm": 0.9967960124460453, "learning_rate": 9.96744056688581e-06, "loss": 0.4234, "step": 710 }, { "epoch": 0.03836813987372511, "grad_norm": 1.0362773978965538, "learning_rate": 9.967348896735892e-06, "loss": 0.4708, "step": 711 }, { "epoch": 0.03842210350223949, "grad_norm": 0.982630399151348, "learning_rate": 9.967257098189853e-06, "loss": 0.5004, "step": 712 }, { "epoch": 0.03847606713075387, "grad_norm": 1.0962644362782163, "learning_rate": 9.967165171250329e-06, "loss": 0.5692, "step": 713 }, { "epoch": 0.03853003075926825, "grad_norm": 0.8983170214180445, "learning_rate": 9.967073115919965e-06, "loss": 0.4079, "step": 714 }, { "epoch": 0.038583994387782636, "grad_norm": 1.0356706644538067, "learning_rate": 9.966980932201407e-06, "loss": 0.5331, "step": 715 }, { "epoch": 0.03863795801629701, "grad_norm": 1.1588322063890113, "learning_rate": 9.966888620097303e-06, "loss": 0.5561, "step": 716 }, { "epoch": 0.038691921644811396, "grad_norm": 1.1961146208660638, "learning_rate": 9.966796179610306e-06, "loss": 0.5388, "step": 717 }, { "epoch": 0.03874588527332578, "grad_norm": 1.1051479235994306, "learning_rate": 9.966703610743073e-06, "loss": 0.4948, "step": 718 }, { "epoch": 0.03879984890184016, "grad_norm": 0.7661323142531153, "learning_rate": 9.966610913498267e-06, "loss": 0.3183, "step": 719 }, { "epoch": 0.03885381253035454, "grad_norm": 1.083168886419266, "learning_rate": 9.96651808787855e-06, "loss": 0.4921, "step": 720 }, { "epoch": 0.03890777615886892, "grad_norm": 1.298634486799853, "learning_rate": 9.966425133886588e-06, "loss": 0.6332, "step": 721 }, { "epoch": 0.03896173978738331, "grad_norm": 1.2791251606448744, "learning_rate": 9.966332051525055e-06, "loss": 0.5252, "step": 722 }, { "epoch": 0.03901570341589768, "grad_norm": 1.1249022204155936, "learning_rate": 9.966238840796628e-06, "loss": 0.5595, "step": 723 }, { "epoch": 0.03906966704441207, "grad_norm": 0.8190816465698044, "learning_rate": 9.966145501703982e-06, "loss": 0.4424, "step": 724 }, { "epoch": 0.03912363067292645, "grad_norm": 1.1047061045742799, "learning_rate": 9.966052034249802e-06, "loss": 0.4675, "step": 725 }, { "epoch": 0.03917759430144083, "grad_norm": 1.0200768627096775, "learning_rate": 9.965958438436775e-06, "loss": 0.389, "step": 726 }, { "epoch": 0.03923155792995521, "grad_norm": 1.161695423127608, "learning_rate": 9.965864714267587e-06, "loss": 0.5093, "step": 727 }, { "epoch": 0.039285521558469594, "grad_norm": 1.0126559050060262, "learning_rate": 9.965770861744937e-06, "loss": 0.5076, "step": 728 }, { "epoch": 0.03933948518698397, "grad_norm": 1.168264868095318, "learning_rate": 9.965676880871522e-06, "loss": 0.5194, "step": 729 }, { "epoch": 0.039393448815498354, "grad_norm": 1.0322137644895344, "learning_rate": 9.965582771650038e-06, "loss": 0.5804, "step": 730 }, { "epoch": 0.03944741244401274, "grad_norm": 0.9355836663779755, "learning_rate": 9.965488534083195e-06, "loss": 0.469, "step": 731 }, { "epoch": 0.039501376072527114, "grad_norm": 1.0586181045442185, "learning_rate": 9.965394168173697e-06, "loss": 0.5508, "step": 732 }, { "epoch": 0.0395553397010415, "grad_norm": 1.0999258315100529, "learning_rate": 9.965299673924261e-06, "loss": 0.5656, "step": 733 }, { "epoch": 0.03960930332955588, "grad_norm": 1.0945696751619651, "learning_rate": 9.965205051337599e-06, "loss": 0.4721, "step": 734 }, { "epoch": 0.03966326695807026, "grad_norm": 1.2661034122021886, "learning_rate": 9.965110300416435e-06, "loss": 0.5925, "step": 735 }, { "epoch": 0.03971723058658464, "grad_norm": 1.1482636546647562, "learning_rate": 9.965015421163485e-06, "loss": 0.5825, "step": 736 }, { "epoch": 0.039771194215099025, "grad_norm": 1.1320438218871105, "learning_rate": 9.964920413581483e-06, "loss": 0.5023, "step": 737 }, { "epoch": 0.0398251578436134, "grad_norm": 0.8789083858187371, "learning_rate": 9.964825277673157e-06, "loss": 0.3935, "step": 738 }, { "epoch": 0.039879121472127785, "grad_norm": 1.0207605324573932, "learning_rate": 9.964730013441239e-06, "loss": 0.503, "step": 739 }, { "epoch": 0.03993308510064217, "grad_norm": 1.0698855451377445, "learning_rate": 9.96463462088847e-06, "loss": 0.4421, "step": 740 }, { "epoch": 0.03998704872915655, "grad_norm": 1.1635708748769076, "learning_rate": 9.964539100017592e-06, "loss": 0.4286, "step": 741 }, { "epoch": 0.04004101235767093, "grad_norm": 0.9524480092919881, "learning_rate": 9.964443450831348e-06, "loss": 0.4072, "step": 742 }, { "epoch": 0.04009497598618531, "grad_norm": 1.2787711364777035, "learning_rate": 9.964347673332488e-06, "loss": 0.6577, "step": 743 }, { "epoch": 0.040148939614699695, "grad_norm": 1.096966389940386, "learning_rate": 9.964251767523767e-06, "loss": 0.4192, "step": 744 }, { "epoch": 0.04020290324321407, "grad_norm": 1.1309243953832508, "learning_rate": 9.964155733407938e-06, "loss": 0.5705, "step": 745 }, { "epoch": 0.040256866871728456, "grad_norm": 1.2166427205383552, "learning_rate": 9.964059570987762e-06, "loss": 0.6032, "step": 746 }, { "epoch": 0.04031083050024284, "grad_norm": 1.0858889845400743, "learning_rate": 9.963963280266004e-06, "loss": 0.4877, "step": 747 }, { "epoch": 0.040364794128757216, "grad_norm": 1.275153474652414, "learning_rate": 9.96386686124543e-06, "loss": 0.5672, "step": 748 }, { "epoch": 0.0404187577572716, "grad_norm": 1.1052013607832605, "learning_rate": 9.963770313928814e-06, "loss": 0.4938, "step": 749 }, { "epoch": 0.04047272138578598, "grad_norm": 1.2679158762662448, "learning_rate": 9.963673638318927e-06, "loss": 0.5668, "step": 750 }, { "epoch": 0.04052668501430036, "grad_norm": 1.013042529904683, "learning_rate": 9.96357683441855e-06, "loss": 0.5224, "step": 751 }, { "epoch": 0.04058064864281474, "grad_norm": 1.208600292875119, "learning_rate": 9.963479902230466e-06, "loss": 0.5954, "step": 752 }, { "epoch": 0.040634612271329126, "grad_norm": 1.3402670358806734, "learning_rate": 9.963382841757456e-06, "loss": 0.6935, "step": 753 }, { "epoch": 0.0406885758998435, "grad_norm": 1.069309414095788, "learning_rate": 9.963285653002316e-06, "loss": 0.5321, "step": 754 }, { "epoch": 0.040742539528357886, "grad_norm": 1.1890978890324924, "learning_rate": 9.963188335967835e-06, "loss": 0.6441, "step": 755 }, { "epoch": 0.04079650315687227, "grad_norm": 1.1666412611464825, "learning_rate": 9.963090890656813e-06, "loss": 0.4805, "step": 756 }, { "epoch": 0.04085046678538665, "grad_norm": 1.126455463257009, "learning_rate": 9.962993317072049e-06, "loss": 0.6256, "step": 757 }, { "epoch": 0.04090443041390103, "grad_norm": 1.0874174072404954, "learning_rate": 9.962895615216347e-06, "loss": 0.6864, "step": 758 }, { "epoch": 0.040958394042415414, "grad_norm": 0.8814123576436613, "learning_rate": 9.962797785092515e-06, "loss": 0.4101, "step": 759 }, { "epoch": 0.04101235767092979, "grad_norm": 1.1746721637441901, "learning_rate": 9.962699826703366e-06, "loss": 0.6292, "step": 760 }, { "epoch": 0.041066321299444174, "grad_norm": 1.1348673779038585, "learning_rate": 9.962601740051717e-06, "loss": 0.4951, "step": 761 }, { "epoch": 0.04112028492795856, "grad_norm": 1.1290437096486725, "learning_rate": 9.962503525140381e-06, "loss": 0.4875, "step": 762 }, { "epoch": 0.041174248556472934, "grad_norm": 1.3367491564369545, "learning_rate": 9.962405181972188e-06, "loss": 0.5412, "step": 763 }, { "epoch": 0.04122821218498732, "grad_norm": 0.9932825015552385, "learning_rate": 9.962306710549958e-06, "loss": 0.4933, "step": 764 }, { "epoch": 0.0412821758135017, "grad_norm": 1.0256004888977786, "learning_rate": 9.962208110876528e-06, "loss": 0.6563, "step": 765 }, { "epoch": 0.041336139442016084, "grad_norm": 0.9104868137756835, "learning_rate": 9.962109382954724e-06, "loss": 0.3745, "step": 766 }, { "epoch": 0.04139010307053046, "grad_norm": 1.1415664441224562, "learning_rate": 9.962010526787389e-06, "loss": 0.5475, "step": 767 }, { "epoch": 0.041444066699044844, "grad_norm": 0.9550361610308175, "learning_rate": 9.961911542377365e-06, "loss": 0.3728, "step": 768 }, { "epoch": 0.04149803032755923, "grad_norm": 1.0722487816760142, "learning_rate": 9.961812429727493e-06, "loss": 0.6561, "step": 769 }, { "epoch": 0.041551993956073605, "grad_norm": 1.0537783222149142, "learning_rate": 9.961713188840624e-06, "loss": 0.5233, "step": 770 }, { "epoch": 0.04160595758458799, "grad_norm": 0.9343983623875682, "learning_rate": 9.96161381971961e-06, "loss": 0.3844, "step": 771 }, { "epoch": 0.04165992121310237, "grad_norm": 1.1496691780489372, "learning_rate": 9.961514322367305e-06, "loss": 0.6206, "step": 772 }, { "epoch": 0.04171388484161675, "grad_norm": 1.1983814688496892, "learning_rate": 9.961414696786573e-06, "loss": 0.5378, "step": 773 }, { "epoch": 0.04176784847013113, "grad_norm": 1.1836631645660005, "learning_rate": 9.961314942980273e-06, "loss": 0.5352, "step": 774 }, { "epoch": 0.041821812098645515, "grad_norm": 1.1869502593578356, "learning_rate": 9.961215060951274e-06, "loss": 0.5422, "step": 775 }, { "epoch": 0.04187577572715989, "grad_norm": 1.2992153032934948, "learning_rate": 9.961115050702445e-06, "loss": 0.5854, "step": 776 }, { "epoch": 0.041929739355674275, "grad_norm": 1.2240265961873171, "learning_rate": 9.961014912236664e-06, "loss": 0.5855, "step": 777 }, { "epoch": 0.04198370298418866, "grad_norm": 1.1310116533800991, "learning_rate": 9.960914645556806e-06, "loss": 0.5042, "step": 778 }, { "epoch": 0.042037666612703035, "grad_norm": 1.305660290961825, "learning_rate": 9.960814250665755e-06, "loss": 0.543, "step": 779 }, { "epoch": 0.04209163024121742, "grad_norm": 0.9109453758486583, "learning_rate": 9.960713727566392e-06, "loss": 0.4382, "step": 780 }, { "epoch": 0.0421455938697318, "grad_norm": 0.9582772793026495, "learning_rate": 9.960613076261612e-06, "loss": 0.4755, "step": 781 }, { "epoch": 0.04219955749824618, "grad_norm": 1.3905042558023346, "learning_rate": 9.960512296754305e-06, "loss": 0.6492, "step": 782 }, { "epoch": 0.04225352112676056, "grad_norm": 1.4237769289679167, "learning_rate": 9.960411389047366e-06, "loss": 0.8267, "step": 783 }, { "epoch": 0.042307484755274946, "grad_norm": 0.9865989322565573, "learning_rate": 9.960310353143697e-06, "loss": 0.4543, "step": 784 }, { "epoch": 0.04236144838378932, "grad_norm": 1.3675667607957243, "learning_rate": 9.960209189046203e-06, "loss": 0.504, "step": 785 }, { "epoch": 0.042415412012303706, "grad_norm": 1.3955889883374388, "learning_rate": 9.96010789675779e-06, "loss": 0.6794, "step": 786 }, { "epoch": 0.04246937564081809, "grad_norm": 1.3628001778666408, "learning_rate": 9.960006476281369e-06, "loss": 0.7429, "step": 787 }, { "epoch": 0.04252333926933247, "grad_norm": 0.9853353125060482, "learning_rate": 9.959904927619855e-06, "loss": 0.476, "step": 788 }, { "epoch": 0.04257730289784685, "grad_norm": 1.1642060412638036, "learning_rate": 9.959803250776168e-06, "loss": 0.5423, "step": 789 }, { "epoch": 0.04263126652636123, "grad_norm": 0.9657027265109951, "learning_rate": 9.959701445753228e-06, "loss": 0.4265, "step": 790 }, { "epoch": 0.04268523015487562, "grad_norm": 1.283198805129021, "learning_rate": 9.959599512553962e-06, "loss": 0.6696, "step": 791 }, { "epoch": 0.042739193783389993, "grad_norm": 1.141233984693726, "learning_rate": 9.959497451181302e-06, "loss": 0.6223, "step": 792 }, { "epoch": 0.04279315741190438, "grad_norm": 1.1750742000585968, "learning_rate": 9.959395261638175e-06, "loss": 0.573, "step": 793 }, { "epoch": 0.04284712104041876, "grad_norm": 1.1161770824778332, "learning_rate": 9.959292943927527e-06, "loss": 0.5537, "step": 794 }, { "epoch": 0.04290108466893314, "grad_norm": 1.0599726030986878, "learning_rate": 9.959190498052292e-06, "loss": 0.5516, "step": 795 }, { "epoch": 0.04295504829744752, "grad_norm": 1.0998115533102952, "learning_rate": 9.959087924015417e-06, "loss": 0.4421, "step": 796 }, { "epoch": 0.043009011925961904, "grad_norm": 0.8207845985207481, "learning_rate": 9.958985221819848e-06, "loss": 0.3529, "step": 797 }, { "epoch": 0.04306297555447628, "grad_norm": 1.170251041300329, "learning_rate": 9.958882391468541e-06, "loss": 0.5737, "step": 798 }, { "epoch": 0.043116939182990664, "grad_norm": 1.3622069799585852, "learning_rate": 9.958779432964446e-06, "loss": 0.6869, "step": 799 }, { "epoch": 0.04317090281150505, "grad_norm": 1.0832448392828693, "learning_rate": 9.958676346310526e-06, "loss": 0.4923, "step": 800 }, { "epoch": 0.043224866440019424, "grad_norm": 0.9694841309202773, "learning_rate": 9.958573131509742e-06, "loss": 0.4431, "step": 801 }, { "epoch": 0.04327883006853381, "grad_norm": 1.1173316212145825, "learning_rate": 9.958469788565061e-06, "loss": 0.5131, "step": 802 }, { "epoch": 0.04333279369704819, "grad_norm": 1.2816631455841192, "learning_rate": 9.958366317479456e-06, "loss": 0.6544, "step": 803 }, { "epoch": 0.04338675732556257, "grad_norm": 1.1648494294915082, "learning_rate": 9.958262718255896e-06, "loss": 0.6186, "step": 804 }, { "epoch": 0.04344072095407695, "grad_norm": 1.1711850844075506, "learning_rate": 9.958158990897359e-06, "loss": 0.4915, "step": 805 }, { "epoch": 0.043494684582591335, "grad_norm": 1.0606035622789363, "learning_rate": 9.95805513540683e-06, "loss": 0.4366, "step": 806 }, { "epoch": 0.04354864821110571, "grad_norm": 1.037981123602516, "learning_rate": 9.957951151787292e-06, "loss": 0.4313, "step": 807 }, { "epoch": 0.043602611839620095, "grad_norm": 1.3303904934753212, "learning_rate": 9.957847040041734e-06, "loss": 0.6739, "step": 808 }, { "epoch": 0.04365657546813448, "grad_norm": 1.0767940870560473, "learning_rate": 9.957742800173146e-06, "loss": 0.4243, "step": 809 }, { "epoch": 0.043710539096648855, "grad_norm": 1.1296467372940135, "learning_rate": 9.957638432184528e-06, "loss": 0.5154, "step": 810 }, { "epoch": 0.04376450272516324, "grad_norm": 0.969125918397227, "learning_rate": 9.957533936078874e-06, "loss": 0.4604, "step": 811 }, { "epoch": 0.04381846635367762, "grad_norm": 1.3247012212267448, "learning_rate": 9.957429311859193e-06, "loss": 0.5916, "step": 812 }, { "epoch": 0.043872429982192006, "grad_norm": 1.2946218747124218, "learning_rate": 9.95732455952849e-06, "loss": 0.7409, "step": 813 }, { "epoch": 0.04392639361070638, "grad_norm": 1.0921285610742266, "learning_rate": 9.957219679089774e-06, "loss": 0.6463, "step": 814 }, { "epoch": 0.043980357239220766, "grad_norm": 1.0250104908913187, "learning_rate": 9.957114670546062e-06, "loss": 0.4082, "step": 815 }, { "epoch": 0.04403432086773515, "grad_norm": 1.24791909525768, "learning_rate": 9.95700953390037e-06, "loss": 0.5775, "step": 816 }, { "epoch": 0.044088284496249526, "grad_norm": 1.1199302467618155, "learning_rate": 9.956904269155722e-06, "loss": 0.6594, "step": 817 }, { "epoch": 0.04414224812476391, "grad_norm": 1.0729047403541216, "learning_rate": 9.95679887631514e-06, "loss": 0.5091, "step": 818 }, { "epoch": 0.04419621175327829, "grad_norm": 0.9236947214435037, "learning_rate": 9.956693355381657e-06, "loss": 0.4626, "step": 819 }, { "epoch": 0.04425017538179267, "grad_norm": 1.1600202430517659, "learning_rate": 9.9565877063583e-06, "loss": 0.5589, "step": 820 }, { "epoch": 0.04430413901030705, "grad_norm": 1.23908090319664, "learning_rate": 9.956481929248113e-06, "loss": 0.6001, "step": 821 }, { "epoch": 0.04435810263882144, "grad_norm": 1.2081352033935002, "learning_rate": 9.95637602405413e-06, "loss": 0.5244, "step": 822 }, { "epoch": 0.04441206626733581, "grad_norm": 1.1158389757429827, "learning_rate": 9.956269990779399e-06, "loss": 0.6365, "step": 823 }, { "epoch": 0.0444660298958502, "grad_norm": 1.0228014766103806, "learning_rate": 9.956163829426964e-06, "loss": 0.5234, "step": 824 }, { "epoch": 0.04451999352436458, "grad_norm": 0.9379362357714647, "learning_rate": 9.956057539999879e-06, "loss": 0.5101, "step": 825 }, { "epoch": 0.04457395715287896, "grad_norm": 1.0419729226602952, "learning_rate": 9.955951122501198e-06, "loss": 0.5367, "step": 826 }, { "epoch": 0.04462792078139334, "grad_norm": 0.9449896021359578, "learning_rate": 9.955844576933979e-06, "loss": 0.4264, "step": 827 }, { "epoch": 0.044681884409907724, "grad_norm": 1.2890742429392263, "learning_rate": 9.955737903301285e-06, "loss": 0.5736, "step": 828 }, { "epoch": 0.0447358480384221, "grad_norm": 1.2929235394842682, "learning_rate": 9.955631101606181e-06, "loss": 0.5301, "step": 829 }, { "epoch": 0.044789811666936484, "grad_norm": 1.2910754231569337, "learning_rate": 9.955524171851737e-06, "loss": 0.5764, "step": 830 }, { "epoch": 0.04484377529545087, "grad_norm": 1.1522987846466173, "learning_rate": 9.955417114041027e-06, "loss": 0.5704, "step": 831 }, { "epoch": 0.044897738923965244, "grad_norm": 1.1825395933693907, "learning_rate": 9.955309928177125e-06, "loss": 0.6493, "step": 832 }, { "epoch": 0.04495170255247963, "grad_norm": 1.0917887930299823, "learning_rate": 9.955202614263118e-06, "loss": 0.5403, "step": 833 }, { "epoch": 0.04500566618099401, "grad_norm": 1.1539025758259183, "learning_rate": 9.955095172302082e-06, "loss": 0.5588, "step": 834 }, { "epoch": 0.045059629809508395, "grad_norm": 0.9618472043347563, "learning_rate": 9.954987602297112e-06, "loss": 0.39, "step": 835 }, { "epoch": 0.04511359343802277, "grad_norm": 1.0470025305708133, "learning_rate": 9.954879904251296e-06, "loss": 0.6432, "step": 836 }, { "epoch": 0.045167557066537155, "grad_norm": 1.0480055933972139, "learning_rate": 9.95477207816773e-06, "loss": 0.5512, "step": 837 }, { "epoch": 0.04522152069505154, "grad_norm": 0.999995877405877, "learning_rate": 9.954664124049513e-06, "loss": 0.4842, "step": 838 }, { "epoch": 0.045275484323565915, "grad_norm": 0.942848391250373, "learning_rate": 9.95455604189975e-06, "loss": 0.3936, "step": 839 }, { "epoch": 0.0453294479520803, "grad_norm": 1.2739432987117487, "learning_rate": 9.954447831721544e-06, "loss": 0.6475, "step": 840 }, { "epoch": 0.04538341158059468, "grad_norm": 0.9509442896722112, "learning_rate": 9.954339493518004e-06, "loss": 0.398, "step": 841 }, { "epoch": 0.04543737520910906, "grad_norm": 0.9667800908523281, "learning_rate": 9.95423102729225e-06, "loss": 0.4467, "step": 842 }, { "epoch": 0.04549133883762344, "grad_norm": 1.1650107026895582, "learning_rate": 9.954122433047394e-06, "loss": 0.6361, "step": 843 }, { "epoch": 0.045545302466137826, "grad_norm": 0.8457595407140776, "learning_rate": 9.954013710786558e-06, "loss": 0.4393, "step": 844 }, { "epoch": 0.0455992660946522, "grad_norm": 1.0185578443042687, "learning_rate": 9.953904860512869e-06, "loss": 0.5241, "step": 845 }, { "epoch": 0.045653229723166586, "grad_norm": 1.011919273653372, "learning_rate": 9.953795882229454e-06, "loss": 0.553, "step": 846 }, { "epoch": 0.04570719335168097, "grad_norm": 0.9421095555057688, "learning_rate": 9.953686775939443e-06, "loss": 0.4638, "step": 847 }, { "epoch": 0.045761156980195346, "grad_norm": 1.0513831834952192, "learning_rate": 9.953577541645975e-06, "loss": 0.4686, "step": 848 }, { "epoch": 0.04581512060870973, "grad_norm": 1.3461692569628683, "learning_rate": 9.953468179352189e-06, "loss": 0.6178, "step": 849 }, { "epoch": 0.04586908423722411, "grad_norm": 1.2294571450271108, "learning_rate": 9.953358689061228e-06, "loss": 0.6248, "step": 850 }, { "epoch": 0.04592304786573849, "grad_norm": 1.207253596824993, "learning_rate": 9.953249070776236e-06, "loss": 0.613, "step": 851 }, { "epoch": 0.04597701149425287, "grad_norm": 1.0742117125075181, "learning_rate": 9.95313932450037e-06, "loss": 0.5171, "step": 852 }, { "epoch": 0.046030975122767256, "grad_norm": 1.024532321451586, "learning_rate": 9.953029450236777e-06, "loss": 0.6241, "step": 853 }, { "epoch": 0.04608493875128163, "grad_norm": 1.3082001301235946, "learning_rate": 9.952919447988619e-06, "loss": 0.5491, "step": 854 }, { "epoch": 0.04613890237979602, "grad_norm": 1.0039499261884828, "learning_rate": 9.952809317759057e-06, "loss": 0.4348, "step": 855 }, { "epoch": 0.0461928660083104, "grad_norm": 1.0606995065463638, "learning_rate": 9.952699059551255e-06, "loss": 0.4873, "step": 856 }, { "epoch": 0.04624682963682478, "grad_norm": 1.1013389386979189, "learning_rate": 9.952588673368385e-06, "loss": 0.5586, "step": 857 }, { "epoch": 0.04630079326533916, "grad_norm": 1.1326071564789608, "learning_rate": 9.952478159213615e-06, "loss": 0.558, "step": 858 }, { "epoch": 0.046354756893853544, "grad_norm": 1.1829065474044214, "learning_rate": 9.952367517090126e-06, "loss": 0.5743, "step": 859 }, { "epoch": 0.04640872052236793, "grad_norm": 1.335348778037312, "learning_rate": 9.952256747001095e-06, "loss": 0.7002, "step": 860 }, { "epoch": 0.046462684150882304, "grad_norm": 1.0657595400051647, "learning_rate": 9.952145848949705e-06, "loss": 0.4487, "step": 861 }, { "epoch": 0.04651664777939669, "grad_norm": 0.9456656967812963, "learning_rate": 9.952034822939146e-06, "loss": 0.4797, "step": 862 }, { "epoch": 0.04657061140791107, "grad_norm": 1.1449853335406464, "learning_rate": 9.95192366897261e-06, "loss": 0.5472, "step": 863 }, { "epoch": 0.04662457503642545, "grad_norm": 0.9749603834609615, "learning_rate": 9.951812387053285e-06, "loss": 0.4366, "step": 864 }, { "epoch": 0.04667853866493983, "grad_norm": 1.2354880999088815, "learning_rate": 9.951700977184376e-06, "loss": 0.5315, "step": 865 }, { "epoch": 0.046732502293454214, "grad_norm": 1.1462689644973072, "learning_rate": 9.951589439369085e-06, "loss": 0.5435, "step": 866 }, { "epoch": 0.04678646592196859, "grad_norm": 1.0732309054100402, "learning_rate": 9.951477773610612e-06, "loss": 0.4782, "step": 867 }, { "epoch": 0.046840429550482975, "grad_norm": 1.172253476056852, "learning_rate": 9.951365979912171e-06, "loss": 0.6239, "step": 868 }, { "epoch": 0.04689439317899736, "grad_norm": 1.169417301170583, "learning_rate": 9.951254058276973e-06, "loss": 0.6685, "step": 869 }, { "epoch": 0.046948356807511735, "grad_norm": 1.0336422807383343, "learning_rate": 9.951142008708238e-06, "loss": 0.5499, "step": 870 }, { "epoch": 0.04700232043602612, "grad_norm": 1.0172000053778143, "learning_rate": 9.951029831209182e-06, "loss": 0.5051, "step": 871 }, { "epoch": 0.0470562840645405, "grad_norm": 1.1388258783549963, "learning_rate": 9.950917525783032e-06, "loss": 0.4595, "step": 872 }, { "epoch": 0.04711024769305488, "grad_norm": 1.173881312158791, "learning_rate": 9.950805092433016e-06, "loss": 0.648, "step": 873 }, { "epoch": 0.04716421132156926, "grad_norm": 1.1788809034420238, "learning_rate": 9.950692531162363e-06, "loss": 0.6048, "step": 874 }, { "epoch": 0.047218174950083645, "grad_norm": 1.0858016137565945, "learning_rate": 9.95057984197431e-06, "loss": 0.5727, "step": 875 }, { "epoch": 0.04727213857859802, "grad_norm": 1.1040578102124057, "learning_rate": 9.950467024872096e-06, "loss": 0.5268, "step": 876 }, { "epoch": 0.047326102207112405, "grad_norm": 1.0572361901994995, "learning_rate": 9.950354079858962e-06, "loss": 0.5294, "step": 877 }, { "epoch": 0.04738006583562679, "grad_norm": 1.4501507525450223, "learning_rate": 9.950241006938154e-06, "loss": 0.533, "step": 878 }, { "epoch": 0.047434029464141166, "grad_norm": 1.1775183486804905, "learning_rate": 9.950127806112927e-06, "loss": 0.6693, "step": 879 }, { "epoch": 0.04748799309265555, "grad_norm": 1.033222834721493, "learning_rate": 9.950014477386527e-06, "loss": 0.5367, "step": 880 }, { "epoch": 0.04754195672116993, "grad_norm": 1.226964582999013, "learning_rate": 9.949901020762215e-06, "loss": 0.6071, "step": 881 }, { "epoch": 0.047595920349684316, "grad_norm": 1.2851448583804694, "learning_rate": 9.94978743624325e-06, "loss": 0.6728, "step": 882 }, { "epoch": 0.04764988397819869, "grad_norm": 1.0985996569725616, "learning_rate": 9.9496737238329e-06, "loss": 0.6255, "step": 883 }, { "epoch": 0.047703847606713076, "grad_norm": 1.278570103286649, "learning_rate": 9.94955988353443e-06, "loss": 0.4899, "step": 884 }, { "epoch": 0.04775781123522746, "grad_norm": 1.0622605455885163, "learning_rate": 9.949445915351114e-06, "loss": 0.5208, "step": 885 }, { "epoch": 0.047811774863741836, "grad_norm": 1.14813735808111, "learning_rate": 9.949331819286224e-06, "loss": 0.6805, "step": 886 }, { "epoch": 0.04786573849225622, "grad_norm": 1.0978744242379226, "learning_rate": 9.949217595343044e-06, "loss": 0.445, "step": 887 }, { "epoch": 0.0479197021207706, "grad_norm": 1.3050579388952461, "learning_rate": 9.949103243524855e-06, "loss": 0.7052, "step": 888 }, { "epoch": 0.04797366574928498, "grad_norm": 1.1568123728537694, "learning_rate": 9.948988763834943e-06, "loss": 0.6378, "step": 889 }, { "epoch": 0.048027629377799363, "grad_norm": 1.0555064994304, "learning_rate": 9.948874156276599e-06, "loss": 0.5383, "step": 890 }, { "epoch": 0.04808159300631375, "grad_norm": 1.3732364076772197, "learning_rate": 9.948759420853114e-06, "loss": 0.4914, "step": 891 }, { "epoch": 0.048135556634828124, "grad_norm": 1.3621135636183506, "learning_rate": 9.94864455756779e-06, "loss": 0.5587, "step": 892 }, { "epoch": 0.04818952026334251, "grad_norm": 1.2161033354683788, "learning_rate": 9.948529566423926e-06, "loss": 0.5355, "step": 893 }, { "epoch": 0.04824348389185689, "grad_norm": 1.2347943493873368, "learning_rate": 9.948414447424828e-06, "loss": 0.6741, "step": 894 }, { "epoch": 0.04829744752037127, "grad_norm": 1.0990730119464749, "learning_rate": 9.948299200573804e-06, "loss": 0.5162, "step": 895 }, { "epoch": 0.04835141114888565, "grad_norm": 1.1517453969953158, "learning_rate": 9.948183825874165e-06, "loss": 0.6907, "step": 896 }, { "epoch": 0.048405374777400034, "grad_norm": 1.0916894811709865, "learning_rate": 9.94806832332923e-06, "loss": 0.6028, "step": 897 }, { "epoch": 0.04845933840591441, "grad_norm": 1.131941499588161, "learning_rate": 9.947952692942315e-06, "loss": 0.4923, "step": 898 }, { "epoch": 0.048513302034428794, "grad_norm": 1.0827248735353647, "learning_rate": 9.947836934716748e-06, "loss": 0.5119, "step": 899 }, { "epoch": 0.04856726566294318, "grad_norm": 1.142551533747074, "learning_rate": 9.947721048655851e-06, "loss": 0.5733, "step": 900 }, { "epoch": 0.048621229291457554, "grad_norm": 1.1691018272197893, "learning_rate": 9.947605034762957e-06, "loss": 0.5001, "step": 901 }, { "epoch": 0.04867519291997194, "grad_norm": 1.2452394132365128, "learning_rate": 9.9474888930414e-06, "loss": 0.5879, "step": 902 }, { "epoch": 0.04872915654848632, "grad_norm": 1.121845133387402, "learning_rate": 9.947372623494521e-06, "loss": 0.5128, "step": 903 }, { "epoch": 0.0487831201770007, "grad_norm": 0.8206533386720382, "learning_rate": 9.947256226125655e-06, "loss": 0.3144, "step": 904 }, { "epoch": 0.04883708380551508, "grad_norm": 1.0988307642935975, "learning_rate": 9.947139700938155e-06, "loss": 0.4608, "step": 905 }, { "epoch": 0.048891047434029465, "grad_norm": 1.2311889179252746, "learning_rate": 9.947023047935366e-06, "loss": 0.5923, "step": 906 }, { "epoch": 0.04894501106254385, "grad_norm": 1.1869626886398155, "learning_rate": 9.94690626712064e-06, "loss": 0.616, "step": 907 }, { "epoch": 0.048998974691058225, "grad_norm": 1.1377227457111447, "learning_rate": 9.946789358497334e-06, "loss": 0.5944, "step": 908 }, { "epoch": 0.04905293831957261, "grad_norm": 0.8850516222750494, "learning_rate": 9.94667232206881e-06, "loss": 0.3872, "step": 909 }, { "epoch": 0.04910690194808699, "grad_norm": 1.1855855414506806, "learning_rate": 9.946555157838431e-06, "loss": 0.592, "step": 910 }, { "epoch": 0.04916086557660137, "grad_norm": 1.043740924519021, "learning_rate": 9.946437865809563e-06, "loss": 0.3715, "step": 911 }, { "epoch": 0.04921482920511575, "grad_norm": 1.1200607490411842, "learning_rate": 9.946320445985577e-06, "loss": 0.609, "step": 912 }, { "epoch": 0.049268792833630136, "grad_norm": 1.1435492781265595, "learning_rate": 9.94620289836985e-06, "loss": 0.6849, "step": 913 }, { "epoch": 0.04932275646214451, "grad_norm": 1.1800112011495618, "learning_rate": 9.946085222965759e-06, "loss": 0.597, "step": 914 }, { "epoch": 0.049376720090658896, "grad_norm": 1.2704387357312605, "learning_rate": 9.945967419776687e-06, "loss": 0.5696, "step": 915 }, { "epoch": 0.04943068371917328, "grad_norm": 1.2884289142012513, "learning_rate": 9.945849488806017e-06, "loss": 0.5663, "step": 916 }, { "epoch": 0.049484647347687656, "grad_norm": 1.175951397470401, "learning_rate": 9.945731430057142e-06, "loss": 0.5097, "step": 917 }, { "epoch": 0.04953861097620204, "grad_norm": 1.1128480787952366, "learning_rate": 9.945613243533454e-06, "loss": 0.5986, "step": 918 }, { "epoch": 0.04959257460471642, "grad_norm": 1.0409962664966061, "learning_rate": 9.945494929238347e-06, "loss": 0.5551, "step": 919 }, { "epoch": 0.0496465382332308, "grad_norm": 1.3161197113986414, "learning_rate": 9.945376487175225e-06, "loss": 0.6711, "step": 920 }, { "epoch": 0.04970050186174518, "grad_norm": 1.0603301510247114, "learning_rate": 9.945257917347494e-06, "loss": 0.498, "step": 921 }, { "epoch": 0.04975446549025957, "grad_norm": 1.2689361906824057, "learning_rate": 9.945139219758557e-06, "loss": 0.7067, "step": 922 }, { "epoch": 0.04980842911877394, "grad_norm": 1.1863340445933492, "learning_rate": 9.945020394411825e-06, "loss": 0.5514, "step": 923 }, { "epoch": 0.04986239274728833, "grad_norm": 1.1305097743943155, "learning_rate": 9.944901441310718e-06, "loss": 0.4715, "step": 924 }, { "epoch": 0.04991635637580271, "grad_norm": 1.182687493870884, "learning_rate": 9.944782360458651e-06, "loss": 0.534, "step": 925 }, { "epoch": 0.04997032000431709, "grad_norm": 1.2140043040050221, "learning_rate": 9.944663151859047e-06, "loss": 0.7507, "step": 926 }, { "epoch": 0.05002428363283147, "grad_norm": 0.8890439120387297, "learning_rate": 9.944543815515334e-06, "loss": 0.3346, "step": 927 }, { "epoch": 0.050078247261345854, "grad_norm": 0.9995037436744633, "learning_rate": 9.944424351430942e-06, "loss": 0.4698, "step": 928 }, { "epoch": 0.05013221088986024, "grad_norm": 1.0680249722463102, "learning_rate": 9.944304759609303e-06, "loss": 0.4948, "step": 929 }, { "epoch": 0.050186174518374614, "grad_norm": 1.0249911843876578, "learning_rate": 9.944185040053855e-06, "loss": 0.4454, "step": 930 }, { "epoch": 0.050240138146889, "grad_norm": 1.1014612078908064, "learning_rate": 9.944065192768037e-06, "loss": 0.4797, "step": 931 }, { "epoch": 0.05029410177540338, "grad_norm": 0.9924841059203763, "learning_rate": 9.943945217755296e-06, "loss": 0.4721, "step": 932 }, { "epoch": 0.05034806540391776, "grad_norm": 1.1356030242662922, "learning_rate": 9.94382511501908e-06, "loss": 0.5683, "step": 933 }, { "epoch": 0.05040202903243214, "grad_norm": 1.2249284315308386, "learning_rate": 9.943704884562838e-06, "loss": 0.6563, "step": 934 }, { "epoch": 0.050455992660946525, "grad_norm": 1.0910310936758012, "learning_rate": 9.943584526390028e-06, "loss": 0.4433, "step": 935 }, { "epoch": 0.0505099562894609, "grad_norm": 1.06334702100724, "learning_rate": 9.94346404050411e-06, "loss": 0.5281, "step": 936 }, { "epoch": 0.050563919917975285, "grad_norm": 0.9941522802311998, "learning_rate": 9.943343426908544e-06, "loss": 0.5305, "step": 937 }, { "epoch": 0.05061788354648967, "grad_norm": 1.1969860609642315, "learning_rate": 9.943222685606799e-06, "loss": 0.4892, "step": 938 }, { "epoch": 0.050671847175004045, "grad_norm": 0.9532988061705191, "learning_rate": 9.943101816602344e-06, "loss": 0.3916, "step": 939 }, { "epoch": 0.05072581080351843, "grad_norm": 1.073260374045587, "learning_rate": 9.942980819898655e-06, "loss": 0.5817, "step": 940 }, { "epoch": 0.05077977443203281, "grad_norm": 1.1386205153888997, "learning_rate": 9.942859695499206e-06, "loss": 0.491, "step": 941 }, { "epoch": 0.05083373806054719, "grad_norm": 1.4006842128178485, "learning_rate": 9.94273844340748e-06, "loss": 0.5655, "step": 942 }, { "epoch": 0.05088770168906157, "grad_norm": 1.2238228760547045, "learning_rate": 9.942617063626964e-06, "loss": 0.5998, "step": 943 }, { "epoch": 0.050941665317575956, "grad_norm": 1.3009739056071936, "learning_rate": 9.942495556161143e-06, "loss": 0.7357, "step": 944 }, { "epoch": 0.05099562894609033, "grad_norm": 1.2108375479876154, "learning_rate": 9.942373921013512e-06, "loss": 0.5261, "step": 945 }, { "epoch": 0.051049592574604716, "grad_norm": 1.190659206324907, "learning_rate": 9.942252158187567e-06, "loss": 0.7053, "step": 946 }, { "epoch": 0.0511035562031191, "grad_norm": 0.8601414145935143, "learning_rate": 9.942130267686804e-06, "loss": 0.3742, "step": 947 }, { "epoch": 0.051157519831633476, "grad_norm": 1.1685961538246128, "learning_rate": 9.94200824951473e-06, "loss": 0.4824, "step": 948 }, { "epoch": 0.05121148346014786, "grad_norm": 1.1359198702540192, "learning_rate": 9.94188610367485e-06, "loss": 0.5011, "step": 949 }, { "epoch": 0.05126544708866224, "grad_norm": 1.1763716493388692, "learning_rate": 9.941763830170675e-06, "loss": 0.4977, "step": 950 }, { "epoch": 0.05131941071717662, "grad_norm": 1.2452579498304215, "learning_rate": 9.94164142900572e-06, "loss": 0.5505, "step": 951 }, { "epoch": 0.051373374345691, "grad_norm": 0.9287950381384263, "learning_rate": 9.941518900183501e-06, "loss": 0.3928, "step": 952 }, { "epoch": 0.05142733797420539, "grad_norm": 0.9733887430579281, "learning_rate": 9.941396243707544e-06, "loss": 0.4204, "step": 953 }, { "epoch": 0.05148130160271977, "grad_norm": 1.210708109561921, "learning_rate": 9.941273459581368e-06, "loss": 0.6957, "step": 954 }, { "epoch": 0.05153526523123415, "grad_norm": 1.2437245782495712, "learning_rate": 9.941150547808508e-06, "loss": 0.6411, "step": 955 }, { "epoch": 0.05158922885974853, "grad_norm": 1.339490879085528, "learning_rate": 9.941027508392493e-06, "loss": 0.6383, "step": 956 }, { "epoch": 0.051643192488262914, "grad_norm": 0.9581110984262579, "learning_rate": 9.940904341336859e-06, "loss": 0.4195, "step": 957 }, { "epoch": 0.05169715611677729, "grad_norm": 1.2973297179797718, "learning_rate": 9.940781046645147e-06, "loss": 0.642, "step": 958 }, { "epoch": 0.051751119745291674, "grad_norm": 1.1188470742790184, "learning_rate": 9.940657624320901e-06, "loss": 0.607, "step": 959 }, { "epoch": 0.05180508337380606, "grad_norm": 1.0749905459353266, "learning_rate": 9.940534074367667e-06, "loss": 0.5667, "step": 960 }, { "epoch": 0.051859047002320434, "grad_norm": 1.1126112661538172, "learning_rate": 9.940410396788997e-06, "loss": 0.5524, "step": 961 }, { "epoch": 0.05191301063083482, "grad_norm": 1.068142493588087, "learning_rate": 9.940286591588448e-06, "loss": 0.4731, "step": 962 }, { "epoch": 0.0519669742593492, "grad_norm": 0.9837912435942615, "learning_rate": 9.940162658769574e-06, "loss": 0.5821, "step": 963 }, { "epoch": 0.05202093788786358, "grad_norm": 1.2557086886793707, "learning_rate": 9.94003859833594e-06, "loss": 0.5706, "step": 964 }, { "epoch": 0.05207490151637796, "grad_norm": 1.0808312491617713, "learning_rate": 9.939914410291109e-06, "loss": 0.4327, "step": 965 }, { "epoch": 0.052128865144892345, "grad_norm": 0.9920629003973672, "learning_rate": 9.939790094638651e-06, "loss": 0.4885, "step": 966 }, { "epoch": 0.05218282877340672, "grad_norm": 1.0616517555397011, "learning_rate": 9.93966565138214e-06, "loss": 0.4787, "step": 967 }, { "epoch": 0.052236792401921105, "grad_norm": 1.1129504896846398, "learning_rate": 9.939541080525153e-06, "loss": 0.4747, "step": 968 }, { "epoch": 0.05229075603043549, "grad_norm": 1.2973136398948457, "learning_rate": 9.939416382071269e-06, "loss": 0.6651, "step": 969 }, { "epoch": 0.052344719658949865, "grad_norm": 1.124001588908291, "learning_rate": 9.939291556024072e-06, "loss": 0.5799, "step": 970 }, { "epoch": 0.05239868328746425, "grad_norm": 1.1290480849471018, "learning_rate": 9.939166602387152e-06, "loss": 0.7056, "step": 971 }, { "epoch": 0.05245264691597863, "grad_norm": 1.0055457271969237, "learning_rate": 9.939041521164097e-06, "loss": 0.5228, "step": 972 }, { "epoch": 0.05250661054449301, "grad_norm": 1.2953131890566674, "learning_rate": 9.938916312358505e-06, "loss": 0.619, "step": 973 }, { "epoch": 0.05256057417300739, "grad_norm": 1.1553552381650756, "learning_rate": 9.93879097597397e-06, "loss": 0.5775, "step": 974 }, { "epoch": 0.052614537801521775, "grad_norm": 1.0619771952527604, "learning_rate": 9.938665512014098e-06, "loss": 0.5347, "step": 975 }, { "epoch": 0.05266850143003616, "grad_norm": 1.265294361991696, "learning_rate": 9.938539920482494e-06, "loss": 0.6542, "step": 976 }, { "epoch": 0.052722465058550536, "grad_norm": 1.2430706697411082, "learning_rate": 9.93841420138277e-06, "loss": 0.5138, "step": 977 }, { "epoch": 0.05277642868706492, "grad_norm": 1.1635300732565532, "learning_rate": 9.938288354718536e-06, "loss": 0.4974, "step": 978 }, { "epoch": 0.0528303923155793, "grad_norm": 1.1917326046310424, "learning_rate": 9.93816238049341e-06, "loss": 0.5582, "step": 979 }, { "epoch": 0.05288435594409368, "grad_norm": 0.8240484699433677, "learning_rate": 9.938036278711015e-06, "loss": 0.3748, "step": 980 }, { "epoch": 0.05293831957260806, "grad_norm": 1.1330558016273184, "learning_rate": 9.937910049374971e-06, "loss": 0.4984, "step": 981 }, { "epoch": 0.052992283201122446, "grad_norm": 1.3269596197489946, "learning_rate": 9.937783692488906e-06, "loss": 0.5885, "step": 982 }, { "epoch": 0.05304624682963682, "grad_norm": 0.9910824435900697, "learning_rate": 9.937657208056458e-06, "loss": 0.4172, "step": 983 }, { "epoch": 0.053100210458151206, "grad_norm": 1.0741608027785567, "learning_rate": 9.937530596081254e-06, "loss": 0.6688, "step": 984 }, { "epoch": 0.05315417408666559, "grad_norm": 1.0619712769687613, "learning_rate": 9.937403856566939e-06, "loss": 0.4817, "step": 985 }, { "epoch": 0.053208137715179966, "grad_norm": 1.2688578788517515, "learning_rate": 9.937276989517153e-06, "loss": 0.6519, "step": 986 }, { "epoch": 0.05326210134369435, "grad_norm": 0.9383043202362142, "learning_rate": 9.937149994935543e-06, "loss": 0.405, "step": 987 }, { "epoch": 0.053316064972208733, "grad_norm": 1.001995972414624, "learning_rate": 9.937022872825756e-06, "loss": 0.5951, "step": 988 }, { "epoch": 0.05337002860072311, "grad_norm": 1.016055997196146, "learning_rate": 9.936895623191453e-06, "loss": 0.4707, "step": 989 }, { "epoch": 0.053423992229237494, "grad_norm": 1.0738252874343075, "learning_rate": 9.936768246036283e-06, "loss": 0.5661, "step": 990 }, { "epoch": 0.05347795585775188, "grad_norm": 1.0950234445594653, "learning_rate": 9.936640741363913e-06, "loss": 0.4569, "step": 991 }, { "epoch": 0.053531919486266254, "grad_norm": 0.8663372520293344, "learning_rate": 9.936513109178003e-06, "loss": 0.423, "step": 992 }, { "epoch": 0.05358588311478064, "grad_norm": 1.143248129276903, "learning_rate": 9.936385349482226e-06, "loss": 0.6686, "step": 993 }, { "epoch": 0.05363984674329502, "grad_norm": 1.159089527874994, "learning_rate": 9.936257462280251e-06, "loss": 0.5181, "step": 994 }, { "epoch": 0.0536938103718094, "grad_norm": 0.9020277036544208, "learning_rate": 9.936129447575752e-06, "loss": 0.3769, "step": 995 }, { "epoch": 0.05374777400032378, "grad_norm": 1.320910263474217, "learning_rate": 9.936001305372412e-06, "loss": 0.6481, "step": 996 }, { "epoch": 0.053801737628838164, "grad_norm": 1.2050958636270876, "learning_rate": 9.935873035673911e-06, "loss": 0.5827, "step": 997 }, { "epoch": 0.05385570125735254, "grad_norm": 0.9171114944046602, "learning_rate": 9.935744638483938e-06, "loss": 0.4028, "step": 998 }, { "epoch": 0.053909664885866924, "grad_norm": 0.7184313624066699, "learning_rate": 9.935616113806182e-06, "loss": 0.3021, "step": 999 }, { "epoch": 0.05396362851438131, "grad_norm": 1.0547786803586519, "learning_rate": 9.935487461644336e-06, "loss": 0.5216, "step": 1000 }, { "epoch": 0.05396362851438131, "eval_loss": 0.6226067543029785, "eval_runtime": 164.0493, "eval_samples_per_second": 20.963, "eval_steps_per_second": 0.878, "step": 1000 }, { "epoch": 0.05401759214289569, "grad_norm": 1.2023994644950398, "learning_rate": 9.9353586820021e-06, "loss": 0.6233, "step": 1001 }, { "epoch": 0.05407155577141007, "grad_norm": 0.8612062687703272, "learning_rate": 9.935229774883172e-06, "loss": 0.4073, "step": 1002 }, { "epoch": 0.05412551939992445, "grad_norm": 1.0617764289252274, "learning_rate": 9.935100740291259e-06, "loss": 0.5222, "step": 1003 }, { "epoch": 0.054179483028438835, "grad_norm": 1.3426345089131284, "learning_rate": 9.934971578230072e-06, "loss": 0.6215, "step": 1004 }, { "epoch": 0.05423344665695321, "grad_norm": 1.0878461397701575, "learning_rate": 9.934842288703318e-06, "loss": 0.614, "step": 1005 }, { "epoch": 0.054287410285467595, "grad_norm": 1.3384591563841215, "learning_rate": 9.934712871714716e-06, "loss": 0.5782, "step": 1006 }, { "epoch": 0.05434137391398198, "grad_norm": 1.607785271003181, "learning_rate": 9.934583327267986e-06, "loss": 0.7857, "step": 1007 }, { "epoch": 0.054395337542496355, "grad_norm": 1.3370854430986108, "learning_rate": 9.93445365536685e-06, "loss": 0.5012, "step": 1008 }, { "epoch": 0.05444930117101074, "grad_norm": 0.8279419905356157, "learning_rate": 9.934323856015036e-06, "loss": 0.5149, "step": 1009 }, { "epoch": 0.05450326479952512, "grad_norm": 1.130515250980697, "learning_rate": 9.934193929216271e-06, "loss": 0.5142, "step": 1010 }, { "epoch": 0.0545572284280395, "grad_norm": 0.8609342972741308, "learning_rate": 9.934063874974295e-06, "loss": 0.4632, "step": 1011 }, { "epoch": 0.05461119205655388, "grad_norm": 1.0774058847067693, "learning_rate": 9.93393369329284e-06, "loss": 0.5084, "step": 1012 }, { "epoch": 0.054665155685068266, "grad_norm": 1.0401120193620315, "learning_rate": 9.933803384175654e-06, "loss": 0.5945, "step": 1013 }, { "epoch": 0.05471911931358264, "grad_norm": 1.1362958751041685, "learning_rate": 9.933672947626476e-06, "loss": 0.5032, "step": 1014 }, { "epoch": 0.054773082942097026, "grad_norm": 1.1736921606921518, "learning_rate": 9.93354238364906e-06, "loss": 0.5598, "step": 1015 }, { "epoch": 0.05482704657061141, "grad_norm": 0.8712858333084482, "learning_rate": 9.933411692247155e-06, "loss": 0.3491, "step": 1016 }, { "epoch": 0.054881010199125786, "grad_norm": 0.90698567107981, "learning_rate": 9.933280873424517e-06, "loss": 0.3474, "step": 1017 }, { "epoch": 0.05493497382764017, "grad_norm": 0.9719069159174657, "learning_rate": 9.93314992718491e-06, "loss": 0.4207, "step": 1018 }, { "epoch": 0.05498893745615455, "grad_norm": 1.1279934819881443, "learning_rate": 9.933018853532093e-06, "loss": 0.5144, "step": 1019 }, { "epoch": 0.05504290108466893, "grad_norm": 1.0669557952489312, "learning_rate": 9.932887652469836e-06, "loss": 0.5522, "step": 1020 }, { "epoch": 0.05509686471318331, "grad_norm": 1.3518367262557713, "learning_rate": 9.932756324001907e-06, "loss": 0.5376, "step": 1021 }, { "epoch": 0.0551508283416977, "grad_norm": 0.970990148943271, "learning_rate": 9.932624868132082e-06, "loss": 0.4878, "step": 1022 }, { "epoch": 0.05520479197021208, "grad_norm": 1.495257366842139, "learning_rate": 9.93249328486414e-06, "loss": 0.6202, "step": 1023 }, { "epoch": 0.05525875559872646, "grad_norm": 1.4963237547364954, "learning_rate": 9.932361574201865e-06, "loss": 0.9286, "step": 1024 }, { "epoch": 0.05531271922724084, "grad_norm": 1.093292082047647, "learning_rate": 9.932229736149037e-06, "loss": 0.591, "step": 1025 }, { "epoch": 0.055366682855755224, "grad_norm": 1.073237667282872, "learning_rate": 9.932097770709447e-06, "loss": 0.4965, "step": 1026 }, { "epoch": 0.0554206464842696, "grad_norm": 1.1836114874130435, "learning_rate": 9.931965677886892e-06, "loss": 0.6085, "step": 1027 }, { "epoch": 0.055474610112783984, "grad_norm": 1.0930939128299604, "learning_rate": 9.931833457685163e-06, "loss": 0.4859, "step": 1028 }, { "epoch": 0.05552857374129837, "grad_norm": 1.063142817823407, "learning_rate": 9.931701110108064e-06, "loss": 0.5838, "step": 1029 }, { "epoch": 0.055582537369812744, "grad_norm": 1.1531640236787277, "learning_rate": 9.931568635159396e-06, "loss": 0.7143, "step": 1030 }, { "epoch": 0.05563650099832713, "grad_norm": 0.9688223805285074, "learning_rate": 9.931436032842965e-06, "loss": 0.4661, "step": 1031 }, { "epoch": 0.05569046462684151, "grad_norm": 1.1774047790315187, "learning_rate": 9.93130330316259e-06, "loss": 0.5187, "step": 1032 }, { "epoch": 0.05574442825535589, "grad_norm": 1.1309853070142364, "learning_rate": 9.931170446122077e-06, "loss": 0.4226, "step": 1033 }, { "epoch": 0.05579839188387027, "grad_norm": 1.1410310957879708, "learning_rate": 9.931037461725249e-06, "loss": 0.6308, "step": 1034 }, { "epoch": 0.055852355512384655, "grad_norm": 1.2339220973976475, "learning_rate": 9.930904349975927e-06, "loss": 0.7152, "step": 1035 }, { "epoch": 0.05590631914089903, "grad_norm": 1.218639895042501, "learning_rate": 9.930771110877938e-06, "loss": 0.6497, "step": 1036 }, { "epoch": 0.055960282769413415, "grad_norm": 1.2044668967273855, "learning_rate": 9.930637744435108e-06, "loss": 0.7125, "step": 1037 }, { "epoch": 0.0560142463979278, "grad_norm": 1.1617520760101143, "learning_rate": 9.930504250651274e-06, "loss": 0.5038, "step": 1038 }, { "epoch": 0.056068210026442175, "grad_norm": 0.9601996092976851, "learning_rate": 9.930370629530272e-06, "loss": 0.5533, "step": 1039 }, { "epoch": 0.05612217365495656, "grad_norm": 1.0620232727884185, "learning_rate": 9.93023688107594e-06, "loss": 0.4111, "step": 1040 }, { "epoch": 0.05617613728347094, "grad_norm": 0.9838885960634436, "learning_rate": 9.930103005292125e-06, "loss": 0.4681, "step": 1041 }, { "epoch": 0.05623010091198532, "grad_norm": 1.041853751086941, "learning_rate": 9.929969002182672e-06, "loss": 0.486, "step": 1042 }, { "epoch": 0.0562840645404997, "grad_norm": 1.3353148178216587, "learning_rate": 9.929834871751434e-06, "loss": 0.5567, "step": 1043 }, { "epoch": 0.056338028169014086, "grad_norm": 1.014180967498552, "learning_rate": 9.929700614002265e-06, "loss": 0.5128, "step": 1044 }, { "epoch": 0.05639199179752846, "grad_norm": 0.7958715730878317, "learning_rate": 9.929566228939025e-06, "loss": 0.4399, "step": 1045 }, { "epoch": 0.056445955426042846, "grad_norm": 1.0517660060868628, "learning_rate": 9.929431716565577e-06, "loss": 0.557, "step": 1046 }, { "epoch": 0.05649991905455723, "grad_norm": 1.012651541210173, "learning_rate": 9.929297076885785e-06, "loss": 0.4803, "step": 1047 }, { "epoch": 0.05655388268307161, "grad_norm": 0.9239130151281298, "learning_rate": 9.92916230990352e-06, "loss": 0.4523, "step": 1048 }, { "epoch": 0.05660784631158599, "grad_norm": 1.1115792548533128, "learning_rate": 9.929027415622655e-06, "loss": 0.5136, "step": 1049 }, { "epoch": 0.05666180994010037, "grad_norm": 1.14445002946903, "learning_rate": 9.928892394047066e-06, "loss": 0.652, "step": 1050 }, { "epoch": 0.05671577356861476, "grad_norm": 1.080993708601679, "learning_rate": 9.928757245180635e-06, "loss": 0.5891, "step": 1051 }, { "epoch": 0.05676973719712913, "grad_norm": 1.0509488370260447, "learning_rate": 9.928621969027246e-06, "loss": 0.4039, "step": 1052 }, { "epoch": 0.05682370082564352, "grad_norm": 1.2389372245481627, "learning_rate": 9.928486565590785e-06, "loss": 0.6045, "step": 1053 }, { "epoch": 0.0568776644541579, "grad_norm": 0.9911170868183955, "learning_rate": 9.928351034875149e-06, "loss": 0.5899, "step": 1054 }, { "epoch": 0.05693162808267228, "grad_norm": 1.0892073043968464, "learning_rate": 9.928215376884227e-06, "loss": 0.6116, "step": 1055 }, { "epoch": 0.05698559171118666, "grad_norm": 1.4176933196564943, "learning_rate": 9.928079591621922e-06, "loss": 0.57, "step": 1056 }, { "epoch": 0.057039555339701044, "grad_norm": 1.076250954270727, "learning_rate": 9.927943679092134e-06, "loss": 0.5164, "step": 1057 }, { "epoch": 0.05709351896821542, "grad_norm": 0.8958215243377342, "learning_rate": 9.927807639298772e-06, "loss": 0.3483, "step": 1058 }, { "epoch": 0.057147482596729804, "grad_norm": 1.3218711661544011, "learning_rate": 9.927671472245743e-06, "loss": 0.6363, "step": 1059 }, { "epoch": 0.05720144622524419, "grad_norm": 1.215084343070576, "learning_rate": 9.927535177936964e-06, "loss": 0.5085, "step": 1060 }, { "epoch": 0.057255409853758564, "grad_norm": 1.172103929714746, "learning_rate": 9.927398756376348e-06, "loss": 0.6634, "step": 1061 }, { "epoch": 0.05730937348227295, "grad_norm": 0.8655345151576931, "learning_rate": 9.927262207567817e-06, "loss": 0.3874, "step": 1062 }, { "epoch": 0.05736333711078733, "grad_norm": 1.046629513682674, "learning_rate": 9.9271255315153e-06, "loss": 0.5689, "step": 1063 }, { "epoch": 0.05741730073930171, "grad_norm": 1.3747233185738972, "learning_rate": 9.926988728222721e-06, "loss": 0.7255, "step": 1064 }, { "epoch": 0.05747126436781609, "grad_norm": 1.215027122982026, "learning_rate": 9.926851797694012e-06, "loss": 0.5023, "step": 1065 }, { "epoch": 0.057525227996330475, "grad_norm": 1.0386643954071433, "learning_rate": 9.926714739933111e-06, "loss": 0.4577, "step": 1066 }, { "epoch": 0.05757919162484485, "grad_norm": 0.9931422641125638, "learning_rate": 9.926577554943955e-06, "loss": 0.3957, "step": 1067 }, { "epoch": 0.057633155253359235, "grad_norm": 1.0768245900535862, "learning_rate": 9.926440242730486e-06, "loss": 0.4253, "step": 1068 }, { "epoch": 0.05768711888187362, "grad_norm": 1.075005360701322, "learning_rate": 9.926302803296654e-06, "loss": 0.5134, "step": 1069 }, { "epoch": 0.057741082510388, "grad_norm": 1.1832985481611653, "learning_rate": 9.926165236646405e-06, "loss": 0.5456, "step": 1070 }, { "epoch": 0.05779504613890238, "grad_norm": 1.2279499461384902, "learning_rate": 9.926027542783695e-06, "loss": 0.4491, "step": 1071 }, { "epoch": 0.05784900976741676, "grad_norm": 1.0539094085718188, "learning_rate": 9.925889721712482e-06, "loss": 0.4674, "step": 1072 }, { "epoch": 0.057902973395931145, "grad_norm": 1.036909349965767, "learning_rate": 9.925751773436726e-06, "loss": 0.5682, "step": 1073 }, { "epoch": 0.05795693702444552, "grad_norm": 1.3448201955446006, "learning_rate": 9.925613697960394e-06, "loss": 0.5136, "step": 1074 }, { "epoch": 0.058010900652959906, "grad_norm": 1.3956994166612555, "learning_rate": 9.925475495287449e-06, "loss": 0.697, "step": 1075 }, { "epoch": 0.05806486428147429, "grad_norm": 1.2027719043061338, "learning_rate": 9.92533716542187e-06, "loss": 0.5785, "step": 1076 }, { "epoch": 0.058118827909988666, "grad_norm": 1.1998103977282748, "learning_rate": 9.925198708367627e-06, "loss": 0.5562, "step": 1077 }, { "epoch": 0.05817279153850305, "grad_norm": 1.0382680257462724, "learning_rate": 9.925060124128702e-06, "loss": 0.5405, "step": 1078 }, { "epoch": 0.05822675516701743, "grad_norm": 0.7110051637436826, "learning_rate": 9.924921412709078e-06, "loss": 0.2886, "step": 1079 }, { "epoch": 0.05828071879553181, "grad_norm": 1.0381054017020952, "learning_rate": 9.924782574112743e-06, "loss": 0.4543, "step": 1080 }, { "epoch": 0.05833468242404619, "grad_norm": 1.3173854726163965, "learning_rate": 9.924643608343685e-06, "loss": 0.5803, "step": 1081 }, { "epoch": 0.058388646052560576, "grad_norm": 0.9498448360556373, "learning_rate": 9.9245045154059e-06, "loss": 0.4187, "step": 1082 }, { "epoch": 0.05844260968107495, "grad_norm": 0.8521959964681866, "learning_rate": 9.924365295303382e-06, "loss": 0.392, "step": 1083 }, { "epoch": 0.058496573309589336, "grad_norm": 1.0725999110184812, "learning_rate": 9.924225948040136e-06, "loss": 0.5742, "step": 1084 }, { "epoch": 0.05855053693810372, "grad_norm": 1.075339973980148, "learning_rate": 9.924086473620165e-06, "loss": 0.6659, "step": 1085 }, { "epoch": 0.0586045005666181, "grad_norm": 1.0862911255040697, "learning_rate": 9.923946872047481e-06, "loss": 0.6123, "step": 1086 }, { "epoch": 0.05865846419513248, "grad_norm": 1.2162331075707398, "learning_rate": 9.923807143326092e-06, "loss": 0.4996, "step": 1087 }, { "epoch": 0.058712427823646864, "grad_norm": 1.172245830967354, "learning_rate": 9.923667287460017e-06, "loss": 0.5834, "step": 1088 }, { "epoch": 0.05876639145216124, "grad_norm": 1.1463887060631397, "learning_rate": 9.923527304453273e-06, "loss": 0.5358, "step": 1089 }, { "epoch": 0.058820355080675624, "grad_norm": 1.1476867291157467, "learning_rate": 9.923387194309887e-06, "loss": 0.8617, "step": 1090 }, { "epoch": 0.05887431870919001, "grad_norm": 1.3333360690245066, "learning_rate": 9.923246957033881e-06, "loss": 0.4891, "step": 1091 }, { "epoch": 0.058928282337704384, "grad_norm": 1.169477520921293, "learning_rate": 9.92310659262929e-06, "loss": 0.5805, "step": 1092 }, { "epoch": 0.05898224596621877, "grad_norm": 1.3279530837610412, "learning_rate": 9.922966101100143e-06, "loss": 0.585, "step": 1093 }, { "epoch": 0.05903620959473315, "grad_norm": 1.2730111755866398, "learning_rate": 9.922825482450484e-06, "loss": 0.5245, "step": 1094 }, { "epoch": 0.059090173223247534, "grad_norm": 0.851589004597301, "learning_rate": 9.922684736684351e-06, "loss": 0.365, "step": 1095 }, { "epoch": 0.05914413685176191, "grad_norm": 0.9334940177851558, "learning_rate": 9.92254386380579e-06, "loss": 0.4499, "step": 1096 }, { "epoch": 0.059198100480276294, "grad_norm": 1.3891174706133667, "learning_rate": 9.92240286381885e-06, "loss": 0.4977, "step": 1097 }, { "epoch": 0.05925206410879068, "grad_norm": 1.1099315485360044, "learning_rate": 9.922261736727583e-06, "loss": 0.4729, "step": 1098 }, { "epoch": 0.059306027737305055, "grad_norm": 1.207761323144064, "learning_rate": 9.922120482536045e-06, "loss": 0.6132, "step": 1099 }, { "epoch": 0.05935999136581944, "grad_norm": 1.0804922418361151, "learning_rate": 9.921979101248296e-06, "loss": 0.4625, "step": 1100 }, { "epoch": 0.05941395499433382, "grad_norm": 1.2669076847139964, "learning_rate": 9.9218375928684e-06, "loss": 0.5528, "step": 1101 }, { "epoch": 0.0594679186228482, "grad_norm": 1.0411547516200543, "learning_rate": 9.921695957400425e-06, "loss": 0.4875, "step": 1102 }, { "epoch": 0.05952188225136258, "grad_norm": 0.8224949030006246, "learning_rate": 9.921554194848438e-06, "loss": 0.3581, "step": 1103 }, { "epoch": 0.059575845879876965, "grad_norm": 1.2174325731156257, "learning_rate": 9.921412305216516e-06, "loss": 0.5318, "step": 1104 }, { "epoch": 0.05962980950839134, "grad_norm": 1.176216394943767, "learning_rate": 9.921270288508738e-06, "loss": 0.4839, "step": 1105 }, { "epoch": 0.059683773136905725, "grad_norm": 1.173592501411679, "learning_rate": 9.921128144729185e-06, "loss": 0.5715, "step": 1106 }, { "epoch": 0.05973773676542011, "grad_norm": 0.9408335407170557, "learning_rate": 9.92098587388194e-06, "loss": 0.3462, "step": 1107 }, { "epoch": 0.059791700393934485, "grad_norm": 1.2356074106048929, "learning_rate": 9.920843475971096e-06, "loss": 0.7519, "step": 1108 }, { "epoch": 0.05984566402244887, "grad_norm": 1.256223513423086, "learning_rate": 9.92070095100074e-06, "loss": 0.615, "step": 1109 }, { "epoch": 0.05989962765096325, "grad_norm": 0.9682461083343752, "learning_rate": 9.920558298974974e-06, "loss": 0.4698, "step": 1110 }, { "epoch": 0.05995359127947763, "grad_norm": 1.112808887345549, "learning_rate": 9.920415519897898e-06, "loss": 0.5708, "step": 1111 }, { "epoch": 0.06000755490799201, "grad_norm": 1.294263814030072, "learning_rate": 9.920272613773611e-06, "loss": 0.5738, "step": 1112 }, { "epoch": 0.060061518536506396, "grad_norm": 1.1806926711202252, "learning_rate": 9.920129580606224e-06, "loss": 0.4584, "step": 1113 }, { "epoch": 0.06011548216502077, "grad_norm": 1.2492989748049381, "learning_rate": 9.919986420399846e-06, "loss": 0.5246, "step": 1114 }, { "epoch": 0.060169445793535156, "grad_norm": 1.149865129144684, "learning_rate": 9.919843133158592e-06, "loss": 0.4929, "step": 1115 }, { "epoch": 0.06022340942204954, "grad_norm": 1.0603190437917798, "learning_rate": 9.91969971888658e-06, "loss": 0.5278, "step": 1116 }, { "epoch": 0.06027737305056392, "grad_norm": 1.221355498686458, "learning_rate": 9.919556177587932e-06, "loss": 0.5234, "step": 1117 }, { "epoch": 0.0603313366790783, "grad_norm": 1.3173457859269384, "learning_rate": 9.919412509266775e-06, "loss": 0.6905, "step": 1118 }, { "epoch": 0.06038530030759268, "grad_norm": 0.976047658477216, "learning_rate": 9.919268713927236e-06, "loss": 0.445, "step": 1119 }, { "epoch": 0.06043926393610707, "grad_norm": 1.1178463171268915, "learning_rate": 9.91912479157345e-06, "loss": 0.5306, "step": 1120 }, { "epoch": 0.06049322756462144, "grad_norm": 2.1512471198640797, "learning_rate": 9.918980742209552e-06, "loss": 0.6731, "step": 1121 }, { "epoch": 0.06054719119313583, "grad_norm": 1.1287806516156254, "learning_rate": 9.918836565839682e-06, "loss": 0.5513, "step": 1122 }, { "epoch": 0.06060115482165021, "grad_norm": 1.2546453911339908, "learning_rate": 9.918692262467985e-06, "loss": 0.5559, "step": 1123 }, { "epoch": 0.06065511845016459, "grad_norm": 1.1685790377528955, "learning_rate": 9.918547832098606e-06, "loss": 0.6592, "step": 1124 }, { "epoch": 0.06070908207867897, "grad_norm": 1.2586584645164742, "learning_rate": 9.9184032747357e-06, "loss": 0.5594, "step": 1125 }, { "epoch": 0.060763045707193354, "grad_norm": 1.020357620807058, "learning_rate": 9.918258590383417e-06, "loss": 0.4455, "step": 1126 }, { "epoch": 0.06081700933570773, "grad_norm": 0.8355037912170709, "learning_rate": 9.91811377904592e-06, "loss": 0.3881, "step": 1127 }, { "epoch": 0.060870972964222114, "grad_norm": 1.0330667095735402, "learning_rate": 9.917968840727366e-06, "loss": 0.4268, "step": 1128 }, { "epoch": 0.0609249365927365, "grad_norm": 1.1430983240252517, "learning_rate": 9.917823775431927e-06, "loss": 0.4948, "step": 1129 }, { "epoch": 0.060978900221250874, "grad_norm": 1.0255848846305782, "learning_rate": 9.917678583163766e-06, "loss": 0.5068, "step": 1130 }, { "epoch": 0.06103286384976526, "grad_norm": 1.5068935437787216, "learning_rate": 9.91753326392706e-06, "loss": 0.6864, "step": 1131 }, { "epoch": 0.06108682747827964, "grad_norm": 0.9076958075822859, "learning_rate": 9.917387817725984e-06, "loss": 0.4973, "step": 1132 }, { "epoch": 0.06114079110679402, "grad_norm": 1.082157768190573, "learning_rate": 9.91724224456472e-06, "loss": 0.6103, "step": 1133 }, { "epoch": 0.0611947547353084, "grad_norm": 0.8786983823623391, "learning_rate": 9.917096544447448e-06, "loss": 0.392, "step": 1134 }, { "epoch": 0.061248718363822785, "grad_norm": 1.25416503663267, "learning_rate": 9.916950717378361e-06, "loss": 0.5387, "step": 1135 }, { "epoch": 0.06130268199233716, "grad_norm": 0.8202764755749875, "learning_rate": 9.916804763361644e-06, "loss": 0.3534, "step": 1136 }, { "epoch": 0.061356645620851545, "grad_norm": 1.2845513020235255, "learning_rate": 9.916658682401497e-06, "loss": 0.6599, "step": 1137 }, { "epoch": 0.06141060924936593, "grad_norm": 1.1322518063569047, "learning_rate": 9.91651247450212e-06, "loss": 0.6219, "step": 1138 }, { "epoch": 0.061464572877880305, "grad_norm": 1.215808317793313, "learning_rate": 9.916366139667707e-06, "loss": 0.5506, "step": 1139 }, { "epoch": 0.06151853650639469, "grad_norm": 0.9393399404357899, "learning_rate": 9.91621967790247e-06, "loss": 0.5155, "step": 1140 }, { "epoch": 0.06157250013490907, "grad_norm": 1.1820679116000006, "learning_rate": 9.916073089210619e-06, "loss": 0.5404, "step": 1141 }, { "epoch": 0.061626463763423456, "grad_norm": 1.239971889166963, "learning_rate": 9.915926373596364e-06, "loss": 0.6447, "step": 1142 }, { "epoch": 0.06168042739193783, "grad_norm": 0.944595241054684, "learning_rate": 9.915779531063923e-06, "loss": 0.4077, "step": 1143 }, { "epoch": 0.061734391020452216, "grad_norm": 1.0904850277250273, "learning_rate": 9.915632561617518e-06, "loss": 0.4909, "step": 1144 }, { "epoch": 0.0617883546489666, "grad_norm": 0.7567115234235193, "learning_rate": 9.91548546526137e-06, "loss": 0.3562, "step": 1145 }, { "epoch": 0.061842318277480976, "grad_norm": 1.21617298381758, "learning_rate": 9.915338241999707e-06, "loss": 0.6208, "step": 1146 }, { "epoch": 0.06189628190599536, "grad_norm": 1.0513148380621817, "learning_rate": 9.915190891836764e-06, "loss": 0.4853, "step": 1147 }, { "epoch": 0.06195024553450974, "grad_norm": 1.1441813281399302, "learning_rate": 9.91504341477677e-06, "loss": 0.6087, "step": 1148 }, { "epoch": 0.06200420916302412, "grad_norm": 1.1407880484823956, "learning_rate": 9.914895810823971e-06, "loss": 0.5844, "step": 1149 }, { "epoch": 0.0620581727915385, "grad_norm": 1.4385825400129781, "learning_rate": 9.914748079982605e-06, "loss": 0.6704, "step": 1150 }, { "epoch": 0.06211213642005289, "grad_norm": 1.0480631466226744, "learning_rate": 9.914600222256919e-06, "loss": 0.4473, "step": 1151 }, { "epoch": 0.06216610004856726, "grad_norm": 1.1797892665469236, "learning_rate": 9.914452237651158e-06, "loss": 0.5103, "step": 1152 }, { "epoch": 0.06222006367708165, "grad_norm": 1.5032582818757396, "learning_rate": 9.914304126169583e-06, "loss": 0.6047, "step": 1153 }, { "epoch": 0.06227402730559603, "grad_norm": 0.8684947886284589, "learning_rate": 9.914155887816446e-06, "loss": 0.3546, "step": 1154 }, { "epoch": 0.06232799093411041, "grad_norm": 1.5666880896107755, "learning_rate": 9.914007522596007e-06, "loss": 0.5919, "step": 1155 }, { "epoch": 0.06238195456262479, "grad_norm": 1.0510489293032053, "learning_rate": 9.913859030512534e-06, "loss": 0.3881, "step": 1156 }, { "epoch": 0.062435918191139174, "grad_norm": 1.4114247599651213, "learning_rate": 9.913710411570292e-06, "loss": 0.6325, "step": 1157 }, { "epoch": 0.06248988181965355, "grad_norm": 1.2126095428117198, "learning_rate": 9.913561665773554e-06, "loss": 0.5722, "step": 1158 }, { "epoch": 0.06254384544816793, "grad_norm": 1.057885219383531, "learning_rate": 9.913412793126592e-06, "loss": 0.5153, "step": 1159 }, { "epoch": 0.06259780907668232, "grad_norm": 1.2375173887892084, "learning_rate": 9.913263793633687e-06, "loss": 0.5986, "step": 1160 }, { "epoch": 0.0626517727051967, "grad_norm": 1.0036003195251175, "learning_rate": 9.913114667299122e-06, "loss": 0.5251, "step": 1161 }, { "epoch": 0.06270573633371108, "grad_norm": 1.0323354816185977, "learning_rate": 9.912965414127182e-06, "loss": 0.6265, "step": 1162 }, { "epoch": 0.06275969996222545, "grad_norm": 1.1574169313900733, "learning_rate": 9.91281603412216e-06, "loss": 0.6769, "step": 1163 }, { "epoch": 0.06281366359073984, "grad_norm": 1.0494706011749473, "learning_rate": 9.912666527288341e-06, "loss": 0.4362, "step": 1164 }, { "epoch": 0.06286762721925422, "grad_norm": 1.2761413413548277, "learning_rate": 9.912516893630031e-06, "loss": 0.6058, "step": 1165 }, { "epoch": 0.0629215908477686, "grad_norm": 1.4476528384056986, "learning_rate": 9.912367133151528e-06, "loss": 0.5537, "step": 1166 }, { "epoch": 0.06297555447628299, "grad_norm": 1.152500597590952, "learning_rate": 9.912217245857132e-06, "loss": 0.3886, "step": 1167 }, { "epoch": 0.06302951810479737, "grad_norm": 1.480417601860406, "learning_rate": 9.912067231751156e-06, "loss": 0.5057, "step": 1168 }, { "epoch": 0.06308348173331174, "grad_norm": 1.194157368716829, "learning_rate": 9.91191709083791e-06, "loss": 0.4916, "step": 1169 }, { "epoch": 0.06313744536182612, "grad_norm": 0.8836345252791998, "learning_rate": 9.911766823121706e-06, "loss": 0.3916, "step": 1170 }, { "epoch": 0.06319140899034051, "grad_norm": 1.3154308440943532, "learning_rate": 9.91161642860687e-06, "loss": 0.5987, "step": 1171 }, { "epoch": 0.06324537261885489, "grad_norm": 1.0400054157092624, "learning_rate": 9.911465907297718e-06, "loss": 0.4697, "step": 1172 }, { "epoch": 0.06329933624736928, "grad_norm": 1.1416142832602578, "learning_rate": 9.911315259198579e-06, "loss": 0.4946, "step": 1173 }, { "epoch": 0.06335329987588366, "grad_norm": 0.9914967202066232, "learning_rate": 9.91116448431378e-06, "loss": 0.4809, "step": 1174 }, { "epoch": 0.06340726350439804, "grad_norm": 1.1495280753890167, "learning_rate": 9.911013582647658e-06, "loss": 0.5652, "step": 1175 }, { "epoch": 0.06346122713291241, "grad_norm": 1.2240616377358424, "learning_rate": 9.910862554204548e-06, "loss": 0.6197, "step": 1176 }, { "epoch": 0.0635151907614268, "grad_norm": 1.209973008453103, "learning_rate": 9.910711398988793e-06, "loss": 0.6588, "step": 1177 }, { "epoch": 0.06356915438994118, "grad_norm": 1.1091934374707177, "learning_rate": 9.910560117004734e-06, "loss": 0.5144, "step": 1178 }, { "epoch": 0.06362311801845556, "grad_norm": 1.0043435373715903, "learning_rate": 9.910408708256722e-06, "loss": 0.4508, "step": 1179 }, { "epoch": 0.06367708164696995, "grad_norm": 1.3952575583295528, "learning_rate": 9.910257172749106e-06, "loss": 0.6002, "step": 1180 }, { "epoch": 0.06373104527548433, "grad_norm": 0.8719066868090879, "learning_rate": 9.910105510486244e-06, "loss": 0.3516, "step": 1181 }, { "epoch": 0.0637850089039987, "grad_norm": 1.1934117753541411, "learning_rate": 9.909953721472495e-06, "loss": 0.7167, "step": 1182 }, { "epoch": 0.06383897253251308, "grad_norm": 0.8587043230756479, "learning_rate": 9.909801805712218e-06, "loss": 0.3118, "step": 1183 }, { "epoch": 0.06389293616102747, "grad_norm": 1.1854522015607856, "learning_rate": 9.90964976320978e-06, "loss": 0.5166, "step": 1184 }, { "epoch": 0.06394689978954185, "grad_norm": 1.0099459697692823, "learning_rate": 9.909497593969554e-06, "loss": 0.5383, "step": 1185 }, { "epoch": 0.06400086341805623, "grad_norm": 1.293744297887391, "learning_rate": 9.90934529799591e-06, "loss": 0.6095, "step": 1186 }, { "epoch": 0.06405482704657062, "grad_norm": 1.0666799915816103, "learning_rate": 9.909192875293229e-06, "loss": 0.5046, "step": 1187 }, { "epoch": 0.06410879067508499, "grad_norm": 1.1739432941456882, "learning_rate": 9.909040325865888e-06, "loss": 0.621, "step": 1188 }, { "epoch": 0.06416275430359937, "grad_norm": 1.1147396812816694, "learning_rate": 9.908887649718274e-06, "loss": 0.6104, "step": 1189 }, { "epoch": 0.06421671793211375, "grad_norm": 1.1497117814066808, "learning_rate": 9.908734846854774e-06, "loss": 0.4546, "step": 1190 }, { "epoch": 0.06427068156062814, "grad_norm": 1.23405205033504, "learning_rate": 9.90858191727978e-06, "loss": 0.5482, "step": 1191 }, { "epoch": 0.06432464518914252, "grad_norm": 1.273379325808436, "learning_rate": 9.908428860997686e-06, "loss": 0.5062, "step": 1192 }, { "epoch": 0.0643786088176569, "grad_norm": 1.1045224742364133, "learning_rate": 9.908275678012892e-06, "loss": 0.548, "step": 1193 }, { "epoch": 0.06443257244617127, "grad_norm": 1.0930908233723715, "learning_rate": 9.908122368329801e-06, "loss": 0.6525, "step": 1194 }, { "epoch": 0.06448653607468566, "grad_norm": 1.0660427665845988, "learning_rate": 9.907968931952821e-06, "loss": 0.6039, "step": 1195 }, { "epoch": 0.06454049970320004, "grad_norm": 0.8476648078949227, "learning_rate": 9.90781536888636e-06, "loss": 0.432, "step": 1196 }, { "epoch": 0.06459446333171442, "grad_norm": 1.3671380001878286, "learning_rate": 9.907661679134829e-06, "loss": 0.8111, "step": 1197 }, { "epoch": 0.06464842696022881, "grad_norm": 0.9528680484148464, "learning_rate": 9.907507862702649e-06, "loss": 0.4975, "step": 1198 }, { "epoch": 0.06470239058874319, "grad_norm": 1.2964981045499149, "learning_rate": 9.90735391959424e-06, "loss": 0.7153, "step": 1199 }, { "epoch": 0.06475635421725758, "grad_norm": 1.0355378456042084, "learning_rate": 9.907199849814025e-06, "loss": 0.5623, "step": 1200 }, { "epoch": 0.06481031784577194, "grad_norm": 1.2537173730438371, "learning_rate": 9.907045653366435e-06, "loss": 0.5, "step": 1201 }, { "epoch": 0.06486428147428633, "grad_norm": 1.212170226608698, "learning_rate": 9.906891330255898e-06, "loss": 0.5577, "step": 1202 }, { "epoch": 0.06491824510280071, "grad_norm": 1.1121775734259214, "learning_rate": 9.906736880486852e-06, "loss": 0.5889, "step": 1203 }, { "epoch": 0.0649722087313151, "grad_norm": 1.038469497238206, "learning_rate": 9.906582304063735e-06, "loss": 0.5965, "step": 1204 }, { "epoch": 0.06502617235982948, "grad_norm": 1.1056631997683497, "learning_rate": 9.90642760099099e-06, "loss": 0.5221, "step": 1205 }, { "epoch": 0.06508013598834386, "grad_norm": 1.0720565748716335, "learning_rate": 9.906272771273064e-06, "loss": 0.4575, "step": 1206 }, { "epoch": 0.06513409961685823, "grad_norm": 1.1773842941732493, "learning_rate": 9.906117814914406e-06, "loss": 0.5201, "step": 1207 }, { "epoch": 0.06518806324537262, "grad_norm": 1.1483471120084654, "learning_rate": 9.905962731919471e-06, "loss": 0.5771, "step": 1208 }, { "epoch": 0.065242026873887, "grad_norm": 0.9350855019275369, "learning_rate": 9.905807522292714e-06, "loss": 0.4265, "step": 1209 }, { "epoch": 0.06529599050240138, "grad_norm": 1.0033984360258812, "learning_rate": 9.905652186038597e-06, "loss": 0.4818, "step": 1210 }, { "epoch": 0.06534995413091577, "grad_norm": 0.9918315308051188, "learning_rate": 9.905496723161585e-06, "loss": 0.511, "step": 1211 }, { "epoch": 0.06540391775943015, "grad_norm": 1.0145604266526105, "learning_rate": 9.905341133666145e-06, "loss": 0.4, "step": 1212 }, { "epoch": 0.06545788138794452, "grad_norm": 1.008670119335899, "learning_rate": 9.90518541755675e-06, "loss": 0.5809, "step": 1213 }, { "epoch": 0.0655118450164589, "grad_norm": 1.0720431660775398, "learning_rate": 9.905029574837876e-06, "loss": 0.4742, "step": 1214 }, { "epoch": 0.06556580864497329, "grad_norm": 1.1780792982451493, "learning_rate": 9.904873605514001e-06, "loss": 0.5215, "step": 1215 }, { "epoch": 0.06561977227348767, "grad_norm": 1.056284730122944, "learning_rate": 9.904717509589607e-06, "loss": 0.5105, "step": 1216 }, { "epoch": 0.06567373590200205, "grad_norm": 1.1381611853573148, "learning_rate": 9.90456128706918e-06, "loss": 0.4028, "step": 1217 }, { "epoch": 0.06572769953051644, "grad_norm": 1.20677167314594, "learning_rate": 9.904404937957213e-06, "loss": 0.5369, "step": 1218 }, { "epoch": 0.0657816631590308, "grad_norm": 1.1081183549244946, "learning_rate": 9.904248462258197e-06, "loss": 0.5269, "step": 1219 }, { "epoch": 0.06583562678754519, "grad_norm": 1.2323146476458697, "learning_rate": 9.90409185997663e-06, "loss": 0.55, "step": 1220 }, { "epoch": 0.06588959041605957, "grad_norm": 0.9112362945863107, "learning_rate": 9.903935131117011e-06, "loss": 0.4685, "step": 1221 }, { "epoch": 0.06594355404457396, "grad_norm": 1.0296930429924567, "learning_rate": 9.90377827568385e-06, "loss": 0.4595, "step": 1222 }, { "epoch": 0.06599751767308834, "grad_norm": 0.9943557725840793, "learning_rate": 9.903621293681651e-06, "loss": 0.469, "step": 1223 }, { "epoch": 0.06605148130160272, "grad_norm": 1.130129501746787, "learning_rate": 9.903464185114925e-06, "loss": 0.4656, "step": 1224 }, { "epoch": 0.06610544493011711, "grad_norm": 1.038727764660228, "learning_rate": 9.90330694998819e-06, "loss": 0.5105, "step": 1225 }, { "epoch": 0.06615940855863148, "grad_norm": 1.1432536959078887, "learning_rate": 9.903149588305962e-06, "loss": 0.4604, "step": 1226 }, { "epoch": 0.06621337218714586, "grad_norm": 1.1401384528976566, "learning_rate": 9.902992100072768e-06, "loss": 0.5932, "step": 1227 }, { "epoch": 0.06626733581566024, "grad_norm": 0.9822748241981794, "learning_rate": 9.902834485293132e-06, "loss": 0.4737, "step": 1228 }, { "epoch": 0.06632129944417463, "grad_norm": 1.3974625908209215, "learning_rate": 9.902676743971584e-06, "loss": 0.6035, "step": 1229 }, { "epoch": 0.06637526307268901, "grad_norm": 0.7875303962578376, "learning_rate": 9.902518876112657e-06, "loss": 0.3551, "step": 1230 }, { "epoch": 0.0664292267012034, "grad_norm": 0.8107934174695123, "learning_rate": 9.902360881720889e-06, "loss": 0.3788, "step": 1231 }, { "epoch": 0.06648319032971776, "grad_norm": 1.2018556053868974, "learning_rate": 9.902202760800823e-06, "loss": 0.5138, "step": 1232 }, { "epoch": 0.06653715395823215, "grad_norm": 1.0767159731328886, "learning_rate": 9.902044513357e-06, "loss": 0.5205, "step": 1233 }, { "epoch": 0.06659111758674653, "grad_norm": 1.0604471707474723, "learning_rate": 9.901886139393968e-06, "loss": 0.4008, "step": 1234 }, { "epoch": 0.06664508121526092, "grad_norm": 1.0346428995736863, "learning_rate": 9.901727638916284e-06, "loss": 0.5544, "step": 1235 }, { "epoch": 0.0666990448437753, "grad_norm": 1.0109933551344352, "learning_rate": 9.9015690119285e-06, "loss": 0.568, "step": 1236 }, { "epoch": 0.06675300847228968, "grad_norm": 1.077339599258623, "learning_rate": 9.901410258435172e-06, "loss": 0.5509, "step": 1237 }, { "epoch": 0.06680697210080405, "grad_norm": 1.0272052979550161, "learning_rate": 9.901251378440867e-06, "loss": 0.5267, "step": 1238 }, { "epoch": 0.06686093572931844, "grad_norm": 0.6809533432630285, "learning_rate": 9.90109237195015e-06, "loss": 0.2503, "step": 1239 }, { "epoch": 0.06691489935783282, "grad_norm": 1.196197606909191, "learning_rate": 9.900933238967594e-06, "loss": 0.5416, "step": 1240 }, { "epoch": 0.0669688629863472, "grad_norm": 0.9891962346539578, "learning_rate": 9.900773979497766e-06, "loss": 0.3866, "step": 1241 }, { "epoch": 0.06702282661486159, "grad_norm": 1.09445159942569, "learning_rate": 9.900614593545249e-06, "loss": 0.5561, "step": 1242 }, { "epoch": 0.06707679024337597, "grad_norm": 1.1695187770888185, "learning_rate": 9.900455081114623e-06, "loss": 0.5888, "step": 1243 }, { "epoch": 0.06713075387189034, "grad_norm": 1.0641905539721739, "learning_rate": 9.90029544221047e-06, "loss": 0.4525, "step": 1244 }, { "epoch": 0.06718471750040472, "grad_norm": 0.8808208480544224, "learning_rate": 9.900135676837382e-06, "loss": 0.4773, "step": 1245 }, { "epoch": 0.0672386811289191, "grad_norm": 1.0536364789447765, "learning_rate": 9.899975784999946e-06, "loss": 0.507, "step": 1246 }, { "epoch": 0.06729264475743349, "grad_norm": 1.049070033875011, "learning_rate": 9.899815766702762e-06, "loss": 0.522, "step": 1247 }, { "epoch": 0.06734660838594787, "grad_norm": 1.0865736584788848, "learning_rate": 9.899655621950426e-06, "loss": 0.8252, "step": 1248 }, { "epoch": 0.06740057201446226, "grad_norm": 1.1332885240116637, "learning_rate": 9.899495350747543e-06, "loss": 0.559, "step": 1249 }, { "epoch": 0.06745453564297664, "grad_norm": 0.998592122689888, "learning_rate": 9.899334953098718e-06, "loss": 0.5819, "step": 1250 }, { "epoch": 0.06750849927149101, "grad_norm": 0.9424013288908004, "learning_rate": 9.89917442900856e-06, "loss": 0.5392, "step": 1251 }, { "epoch": 0.0675624629000054, "grad_norm": 1.0570605717238244, "learning_rate": 9.899013778481685e-06, "loss": 0.4872, "step": 1252 }, { "epoch": 0.06761642652851978, "grad_norm": 1.3281893454625193, "learning_rate": 9.898853001522709e-06, "loss": 0.6775, "step": 1253 }, { "epoch": 0.06767039015703416, "grad_norm": 1.2270893852637632, "learning_rate": 9.898692098136252e-06, "loss": 0.6019, "step": 1254 }, { "epoch": 0.06772435378554854, "grad_norm": 0.9933541028848147, "learning_rate": 9.898531068326941e-06, "loss": 0.4468, "step": 1255 }, { "epoch": 0.06777831741406293, "grad_norm": 1.2519848674573097, "learning_rate": 9.898369912099401e-06, "loss": 0.6209, "step": 1256 }, { "epoch": 0.0678322810425773, "grad_norm": 1.022355968046646, "learning_rate": 9.898208629458267e-06, "loss": 0.4097, "step": 1257 }, { "epoch": 0.06788624467109168, "grad_norm": 1.3561032280506533, "learning_rate": 9.898047220408174e-06, "loss": 0.4646, "step": 1258 }, { "epoch": 0.06794020829960606, "grad_norm": 1.5410274124922365, "learning_rate": 9.897885684953758e-06, "loss": 0.5176, "step": 1259 }, { "epoch": 0.06799417192812045, "grad_norm": 1.0242001978910307, "learning_rate": 9.897724023099663e-06, "loss": 0.511, "step": 1260 }, { "epoch": 0.06804813555663483, "grad_norm": 0.808023448397742, "learning_rate": 9.897562234850538e-06, "loss": 0.3101, "step": 1261 }, { "epoch": 0.06810209918514921, "grad_norm": 1.2318290141400206, "learning_rate": 9.89740032021103e-06, "loss": 0.7173, "step": 1262 }, { "epoch": 0.06815606281366358, "grad_norm": 1.5815542248607608, "learning_rate": 9.897238279185793e-06, "loss": 0.7235, "step": 1263 }, { "epoch": 0.06821002644217797, "grad_norm": 1.0770135176453377, "learning_rate": 9.897076111779487e-06, "loss": 0.4103, "step": 1264 }, { "epoch": 0.06826399007069235, "grad_norm": 0.9576938412338435, "learning_rate": 9.896913817996769e-06, "loss": 0.4226, "step": 1265 }, { "epoch": 0.06831795369920673, "grad_norm": 1.3209514863119294, "learning_rate": 9.896751397842304e-06, "loss": 0.6787, "step": 1266 }, { "epoch": 0.06837191732772112, "grad_norm": 1.0564972787844833, "learning_rate": 9.896588851320763e-06, "loss": 0.5264, "step": 1267 }, { "epoch": 0.0684258809562355, "grad_norm": 0.8508933352273291, "learning_rate": 9.896426178436815e-06, "loss": 0.45, "step": 1268 }, { "epoch": 0.06847984458474989, "grad_norm": 0.9507977363069192, "learning_rate": 9.896263379195137e-06, "loss": 0.4189, "step": 1269 }, { "epoch": 0.06853380821326426, "grad_norm": 0.8244573927480834, "learning_rate": 9.896100453600407e-06, "loss": 0.4949, "step": 1270 }, { "epoch": 0.06858777184177864, "grad_norm": 1.064085097786398, "learning_rate": 9.895937401657309e-06, "loss": 0.489, "step": 1271 }, { "epoch": 0.06864173547029302, "grad_norm": 1.0549572332879322, "learning_rate": 9.895774223370526e-06, "loss": 0.5162, "step": 1272 }, { "epoch": 0.0686956990988074, "grad_norm": 1.120855272304418, "learning_rate": 9.895610918744752e-06, "loss": 0.4999, "step": 1273 }, { "epoch": 0.06874966272732179, "grad_norm": 1.1686633508697848, "learning_rate": 9.895447487784679e-06, "loss": 0.6084, "step": 1274 }, { "epoch": 0.06880362635583617, "grad_norm": 1.0405055309000513, "learning_rate": 9.895283930495003e-06, "loss": 0.4964, "step": 1275 }, { "epoch": 0.06885758998435054, "grad_norm": 1.2960881877758363, "learning_rate": 9.895120246880427e-06, "loss": 0.506, "step": 1276 }, { "epoch": 0.06891155361286493, "grad_norm": 1.0686435472564517, "learning_rate": 9.894956436945653e-06, "loss": 0.5458, "step": 1277 }, { "epoch": 0.06896551724137931, "grad_norm": 1.0905883560152692, "learning_rate": 9.89479250069539e-06, "loss": 0.4494, "step": 1278 }, { "epoch": 0.06901948086989369, "grad_norm": 1.0044385912776697, "learning_rate": 9.894628438134351e-06, "loss": 0.4121, "step": 1279 }, { "epoch": 0.06907344449840808, "grad_norm": 1.388799715247707, "learning_rate": 9.89446424926725e-06, "loss": 0.6794, "step": 1280 }, { "epoch": 0.06912740812692246, "grad_norm": 1.2259726622717861, "learning_rate": 9.894299934098805e-06, "loss": 0.5546, "step": 1281 }, { "epoch": 0.06918137175543683, "grad_norm": 1.090031038346463, "learning_rate": 9.894135492633742e-06, "loss": 0.491, "step": 1282 }, { "epoch": 0.06923533538395121, "grad_norm": 1.1025694813959253, "learning_rate": 9.893970924876783e-06, "loss": 0.4197, "step": 1283 }, { "epoch": 0.0692892990124656, "grad_norm": 1.1436354480458757, "learning_rate": 9.89380623083266e-06, "loss": 0.7405, "step": 1284 }, { "epoch": 0.06934326264097998, "grad_norm": 1.137637109686756, "learning_rate": 9.893641410506107e-06, "loss": 0.4212, "step": 1285 }, { "epoch": 0.06939722626949436, "grad_norm": 1.3106430304823857, "learning_rate": 9.89347646390186e-06, "loss": 0.541, "step": 1286 }, { "epoch": 0.06945118989800875, "grad_norm": 1.438737980772488, "learning_rate": 9.89331139102466e-06, "loss": 0.523, "step": 1287 }, { "epoch": 0.06950515352652312, "grad_norm": 1.0443079539185955, "learning_rate": 9.893146191879254e-06, "loss": 0.4721, "step": 1288 }, { "epoch": 0.0695591171550375, "grad_norm": 1.083102361040646, "learning_rate": 9.892980866470383e-06, "loss": 0.4348, "step": 1289 }, { "epoch": 0.06961308078355188, "grad_norm": 1.0899998628729872, "learning_rate": 9.892815414802807e-06, "loss": 0.5119, "step": 1290 }, { "epoch": 0.06966704441206627, "grad_norm": 1.061828024703141, "learning_rate": 9.892649836881276e-06, "loss": 0.4667, "step": 1291 }, { "epoch": 0.06972100804058065, "grad_norm": 0.9373877736100461, "learning_rate": 9.892484132710549e-06, "loss": 0.4201, "step": 1292 }, { "epoch": 0.06977497166909503, "grad_norm": 1.0883307848605492, "learning_rate": 9.892318302295392e-06, "loss": 0.5018, "step": 1293 }, { "epoch": 0.06982893529760942, "grad_norm": 0.9985812999678902, "learning_rate": 9.892152345640568e-06, "loss": 0.4986, "step": 1294 }, { "epoch": 0.06988289892612379, "grad_norm": 1.0702767801569917, "learning_rate": 9.891986262750846e-06, "loss": 0.4875, "step": 1295 }, { "epoch": 0.06993686255463817, "grad_norm": 1.3495093881975602, "learning_rate": 9.891820053631003e-06, "loss": 0.8506, "step": 1296 }, { "epoch": 0.06999082618315255, "grad_norm": 1.2873538896620391, "learning_rate": 9.891653718285815e-06, "loss": 0.5292, "step": 1297 }, { "epoch": 0.07004478981166694, "grad_norm": 1.0949458463462969, "learning_rate": 9.891487256720058e-06, "loss": 0.4176, "step": 1298 }, { "epoch": 0.07009875344018132, "grad_norm": 1.0192077829341488, "learning_rate": 9.891320668938523e-06, "loss": 0.5131, "step": 1299 }, { "epoch": 0.0701527170686957, "grad_norm": 1.217215663047037, "learning_rate": 9.891153954945994e-06, "loss": 0.5815, "step": 1300 }, { "epoch": 0.07020668069721007, "grad_norm": 1.0709972056787778, "learning_rate": 9.890987114747262e-06, "loss": 0.6182, "step": 1301 }, { "epoch": 0.07026064432572446, "grad_norm": 1.0689849256355781, "learning_rate": 9.890820148347126e-06, "loss": 0.4765, "step": 1302 }, { "epoch": 0.07031460795423884, "grad_norm": 1.2774043846790137, "learning_rate": 9.89065305575038e-06, "loss": 0.6815, "step": 1303 }, { "epoch": 0.07036857158275323, "grad_norm": 0.8352990073885136, "learning_rate": 9.89048583696183e-06, "loss": 0.408, "step": 1304 }, { "epoch": 0.07042253521126761, "grad_norm": 1.0086871424985875, "learning_rate": 9.890318491986282e-06, "loss": 0.4671, "step": 1305 }, { "epoch": 0.07047649883978199, "grad_norm": 0.8656607341025481, "learning_rate": 9.890151020828542e-06, "loss": 0.3313, "step": 1306 }, { "epoch": 0.07053046246829636, "grad_norm": 1.0708131139642576, "learning_rate": 9.889983423493428e-06, "loss": 0.6434, "step": 1307 }, { "epoch": 0.07058442609681075, "grad_norm": 1.0731101149836713, "learning_rate": 9.889815699985755e-06, "loss": 0.6455, "step": 1308 }, { "epoch": 0.07063838972532513, "grad_norm": 1.1210721798942302, "learning_rate": 9.88964785031034e-06, "loss": 0.6771, "step": 1309 }, { "epoch": 0.07069235335383951, "grad_norm": 1.170011729357879, "learning_rate": 9.889479874472012e-06, "loss": 0.5006, "step": 1310 }, { "epoch": 0.0707463169823539, "grad_norm": 1.1552290898873112, "learning_rate": 9.889311772475597e-06, "loss": 0.5545, "step": 1311 }, { "epoch": 0.07080028061086828, "grad_norm": 1.3156333314460178, "learning_rate": 9.889143544325926e-06, "loss": 0.601, "step": 1312 }, { "epoch": 0.07085424423938265, "grad_norm": 1.1998178584109462, "learning_rate": 9.888975190027837e-06, "loss": 0.7065, "step": 1313 }, { "epoch": 0.07090820786789703, "grad_norm": 1.1722387328498052, "learning_rate": 9.888806709586163e-06, "loss": 0.6057, "step": 1314 }, { "epoch": 0.07096217149641142, "grad_norm": 0.953568028644597, "learning_rate": 9.888638103005753e-06, "loss": 0.5316, "step": 1315 }, { "epoch": 0.0710161351249258, "grad_norm": 0.9464147533806352, "learning_rate": 9.888469370291448e-06, "loss": 0.4641, "step": 1316 }, { "epoch": 0.07107009875344018, "grad_norm": 1.220563680429435, "learning_rate": 9.888300511448099e-06, "loss": 0.6344, "step": 1317 }, { "epoch": 0.07112406238195457, "grad_norm": 0.9093703810615675, "learning_rate": 9.88813152648056e-06, "loss": 0.4236, "step": 1318 }, { "epoch": 0.07117802601046895, "grad_norm": 0.9583875339446923, "learning_rate": 9.887962415393685e-06, "loss": 0.466, "step": 1319 }, { "epoch": 0.07123198963898332, "grad_norm": 1.032828488882854, "learning_rate": 9.887793178192338e-06, "loss": 0.5059, "step": 1320 }, { "epoch": 0.0712859532674977, "grad_norm": 1.227250122945056, "learning_rate": 9.887623814881382e-06, "loss": 0.5929, "step": 1321 }, { "epoch": 0.07133991689601209, "grad_norm": 1.20230556777309, "learning_rate": 9.887454325465683e-06, "loss": 0.5909, "step": 1322 }, { "epoch": 0.07139388052452647, "grad_norm": 1.472206886931071, "learning_rate": 9.887284709950113e-06, "loss": 0.6516, "step": 1323 }, { "epoch": 0.07144784415304085, "grad_norm": 1.019024353052126, "learning_rate": 9.88711496833955e-06, "loss": 0.4596, "step": 1324 }, { "epoch": 0.07150180778155524, "grad_norm": 1.1060622738391286, "learning_rate": 9.886945100638868e-06, "loss": 0.4561, "step": 1325 }, { "epoch": 0.07155577141006961, "grad_norm": 1.164882065672562, "learning_rate": 9.88677510685295e-06, "loss": 0.421, "step": 1326 }, { "epoch": 0.07160973503858399, "grad_norm": 1.0693042439763076, "learning_rate": 9.886604986986685e-06, "loss": 0.5095, "step": 1327 }, { "epoch": 0.07166369866709837, "grad_norm": 1.3070559497267162, "learning_rate": 9.886434741044962e-06, "loss": 0.6779, "step": 1328 }, { "epoch": 0.07171766229561276, "grad_norm": 1.2486835781892738, "learning_rate": 9.886264369032669e-06, "loss": 0.6945, "step": 1329 }, { "epoch": 0.07177162592412714, "grad_norm": 1.007144743360955, "learning_rate": 9.886093870954707e-06, "loss": 0.4336, "step": 1330 }, { "epoch": 0.07182558955264153, "grad_norm": 1.277452180819719, "learning_rate": 9.885923246815974e-06, "loss": 0.6454, "step": 1331 }, { "epoch": 0.0718795531811559, "grad_norm": 1.0270566050532783, "learning_rate": 9.885752496621378e-06, "loss": 0.4727, "step": 1332 }, { "epoch": 0.07193351680967028, "grad_norm": 1.6895702394247003, "learning_rate": 9.885581620375823e-06, "loss": 0.4697, "step": 1333 }, { "epoch": 0.07198748043818466, "grad_norm": 1.142009293051695, "learning_rate": 9.885410618084218e-06, "loss": 0.4569, "step": 1334 }, { "epoch": 0.07204144406669905, "grad_norm": 1.024199754538245, "learning_rate": 9.885239489751485e-06, "loss": 0.5457, "step": 1335 }, { "epoch": 0.07209540769521343, "grad_norm": 1.0721261665930917, "learning_rate": 9.885068235382535e-06, "loss": 0.7341, "step": 1336 }, { "epoch": 0.07214937132372781, "grad_norm": 0.9571357893767014, "learning_rate": 9.884896854982295e-06, "loss": 0.4335, "step": 1337 }, { "epoch": 0.07220333495224218, "grad_norm": 1.0376508475427155, "learning_rate": 9.88472534855569e-06, "loss": 0.5813, "step": 1338 }, { "epoch": 0.07225729858075657, "grad_norm": 1.1844641911457416, "learning_rate": 9.884553716107646e-06, "loss": 0.5705, "step": 1339 }, { "epoch": 0.07231126220927095, "grad_norm": 1.22886962162792, "learning_rate": 9.884381957643096e-06, "loss": 0.5847, "step": 1340 }, { "epoch": 0.07236522583778533, "grad_norm": 1.1919558046026268, "learning_rate": 9.884210073166982e-06, "loss": 0.5849, "step": 1341 }, { "epoch": 0.07241918946629972, "grad_norm": 1.2204931291727565, "learning_rate": 9.884038062684239e-06, "loss": 0.5792, "step": 1342 }, { "epoch": 0.0724731530948141, "grad_norm": 0.8214518194672884, "learning_rate": 9.883865926199812e-06, "loss": 0.4572, "step": 1343 }, { "epoch": 0.07252711672332848, "grad_norm": 1.025309764037712, "learning_rate": 9.883693663718648e-06, "loss": 0.5484, "step": 1344 }, { "epoch": 0.07258108035184285, "grad_norm": 1.1530513234583, "learning_rate": 9.8835212752457e-06, "loss": 0.6541, "step": 1345 }, { "epoch": 0.07263504398035724, "grad_norm": 1.1307589246480472, "learning_rate": 9.883348760785922e-06, "loss": 0.5462, "step": 1346 }, { "epoch": 0.07268900760887162, "grad_norm": 1.2814630177058313, "learning_rate": 9.88317612034427e-06, "loss": 0.6591, "step": 1347 }, { "epoch": 0.072742971237386, "grad_norm": 1.0184193120221992, "learning_rate": 9.883003353925709e-06, "loss": 0.4958, "step": 1348 }, { "epoch": 0.07279693486590039, "grad_norm": 1.0652729315050362, "learning_rate": 9.882830461535202e-06, "loss": 0.4572, "step": 1349 }, { "epoch": 0.07285089849441477, "grad_norm": 0.8468286121757141, "learning_rate": 9.88265744317772e-06, "loss": 0.3354, "step": 1350 }, { "epoch": 0.07290486212292914, "grad_norm": 1.122813364663986, "learning_rate": 9.882484298858232e-06, "loss": 0.5723, "step": 1351 }, { "epoch": 0.07295882575144352, "grad_norm": 0.8808273744649087, "learning_rate": 9.88231102858172e-06, "loss": 0.3491, "step": 1352 }, { "epoch": 0.07301278937995791, "grad_norm": 1.100059421113247, "learning_rate": 9.88213763235316e-06, "loss": 0.5995, "step": 1353 }, { "epoch": 0.07306675300847229, "grad_norm": 1.0491102575485138, "learning_rate": 9.881964110177537e-06, "loss": 0.4412, "step": 1354 }, { "epoch": 0.07312071663698667, "grad_norm": 1.025894748978225, "learning_rate": 9.881790462059836e-06, "loss": 0.4851, "step": 1355 }, { "epoch": 0.07317468026550106, "grad_norm": 1.0081592749449473, "learning_rate": 9.881616688005052e-06, "loss": 0.6059, "step": 1356 }, { "epoch": 0.07322864389401543, "grad_norm": 1.3253708635397425, "learning_rate": 9.881442788018176e-06, "loss": 0.4249, "step": 1357 }, { "epoch": 0.07328260752252981, "grad_norm": 1.0151580121964023, "learning_rate": 9.881268762104207e-06, "loss": 0.4598, "step": 1358 }, { "epoch": 0.0733365711510442, "grad_norm": 1.2406743002990386, "learning_rate": 9.881094610268146e-06, "loss": 0.5223, "step": 1359 }, { "epoch": 0.07339053477955858, "grad_norm": 1.3562233124710563, "learning_rate": 9.880920332515001e-06, "loss": 0.6148, "step": 1360 }, { "epoch": 0.07344449840807296, "grad_norm": 1.0973597451727162, "learning_rate": 9.880745928849777e-06, "loss": 0.48, "step": 1361 }, { "epoch": 0.07349846203658734, "grad_norm": 0.9897650962413922, "learning_rate": 9.880571399277491e-06, "loss": 0.5059, "step": 1362 }, { "epoch": 0.07355242566510173, "grad_norm": 1.1310911829283203, "learning_rate": 9.880396743803155e-06, "loss": 0.4973, "step": 1363 }, { "epoch": 0.0736063892936161, "grad_norm": 1.131130288405654, "learning_rate": 9.880221962431793e-06, "loss": 0.5682, "step": 1364 }, { "epoch": 0.07366035292213048, "grad_norm": 0.9595916027242318, "learning_rate": 9.880047055168422e-06, "loss": 0.4736, "step": 1365 }, { "epoch": 0.07371431655064486, "grad_norm": 0.9826388128317063, "learning_rate": 9.879872022018076e-06, "loss": 0.4291, "step": 1366 }, { "epoch": 0.07376828017915925, "grad_norm": 0.8814962570745393, "learning_rate": 9.87969686298578e-06, "loss": 0.3894, "step": 1367 }, { "epoch": 0.07382224380767363, "grad_norm": 1.410852632537319, "learning_rate": 9.879521578076571e-06, "loss": 0.7539, "step": 1368 }, { "epoch": 0.07387620743618802, "grad_norm": 1.2642213593429998, "learning_rate": 9.87934616729549e-06, "loss": 0.6153, "step": 1369 }, { "epoch": 0.07393017106470239, "grad_norm": 1.0651239703821658, "learning_rate": 9.879170630647573e-06, "loss": 0.5788, "step": 1370 }, { "epoch": 0.07398413469321677, "grad_norm": 1.2538256270353412, "learning_rate": 9.878994968137867e-06, "loss": 0.6711, "step": 1371 }, { "epoch": 0.07403809832173115, "grad_norm": 1.3448755346794439, "learning_rate": 9.878819179771421e-06, "loss": 0.7628, "step": 1372 }, { "epoch": 0.07409206195024554, "grad_norm": 1.0957245802380502, "learning_rate": 9.878643265553289e-06, "loss": 0.4997, "step": 1373 }, { "epoch": 0.07414602557875992, "grad_norm": 1.0756860387728229, "learning_rate": 9.878467225488524e-06, "loss": 0.5338, "step": 1374 }, { "epoch": 0.0741999892072743, "grad_norm": 1.3450133524875882, "learning_rate": 9.878291059582185e-06, "loss": 0.6876, "step": 1375 }, { "epoch": 0.07425395283578867, "grad_norm": 1.0086571871272343, "learning_rate": 9.878114767839339e-06, "loss": 0.5276, "step": 1376 }, { "epoch": 0.07430791646430306, "grad_norm": 1.2376565219085747, "learning_rate": 9.87793835026505e-06, "loss": 0.4949, "step": 1377 }, { "epoch": 0.07436188009281744, "grad_norm": 0.9668329278997928, "learning_rate": 9.87776180686439e-06, "loss": 0.4691, "step": 1378 }, { "epoch": 0.07441584372133182, "grad_norm": 0.9407464409393771, "learning_rate": 9.877585137642431e-06, "loss": 0.3982, "step": 1379 }, { "epoch": 0.0744698073498462, "grad_norm": 0.9930611373186313, "learning_rate": 9.877408342604254e-06, "loss": 0.3368, "step": 1380 }, { "epoch": 0.07452377097836059, "grad_norm": 1.1122083373628897, "learning_rate": 9.877231421754939e-06, "loss": 0.5944, "step": 1381 }, { "epoch": 0.07457773460687496, "grad_norm": 1.3592230765430233, "learning_rate": 9.877054375099565e-06, "loss": 0.4754, "step": 1382 }, { "epoch": 0.07463169823538934, "grad_norm": 1.231853515584301, "learning_rate": 9.87687720264323e-06, "loss": 0.5812, "step": 1383 }, { "epoch": 0.07468566186390373, "grad_norm": 1.0627488331679096, "learning_rate": 9.876699904391019e-06, "loss": 0.4706, "step": 1384 }, { "epoch": 0.07473962549241811, "grad_norm": 1.2249040447217507, "learning_rate": 9.876522480348034e-06, "loss": 0.622, "step": 1385 }, { "epoch": 0.0747935891209325, "grad_norm": 1.0469375712052675, "learning_rate": 9.876344930519368e-06, "loss": 0.5364, "step": 1386 }, { "epoch": 0.07484755274944688, "grad_norm": 1.0517305301550146, "learning_rate": 9.876167254910127e-06, "loss": 0.6213, "step": 1387 }, { "epoch": 0.07490151637796126, "grad_norm": 1.1405961334518535, "learning_rate": 9.87598945352542e-06, "loss": 0.4842, "step": 1388 }, { "epoch": 0.07495548000647563, "grad_norm": 1.0798106593091088, "learning_rate": 9.875811526370349e-06, "loss": 0.5571, "step": 1389 }, { "epoch": 0.07500944363499001, "grad_norm": 0.9299544024296335, "learning_rate": 9.875633473450038e-06, "loss": 0.4723, "step": 1390 }, { "epoch": 0.0750634072635044, "grad_norm": 1.0266914149986648, "learning_rate": 9.875455294769598e-06, "loss": 0.5307, "step": 1391 }, { "epoch": 0.07511737089201878, "grad_norm": 0.8777907072326566, "learning_rate": 9.87527699033415e-06, "loss": 0.3752, "step": 1392 }, { "epoch": 0.07517133452053316, "grad_norm": 1.0296573583662345, "learning_rate": 9.875098560148823e-06, "loss": 0.6315, "step": 1393 }, { "epoch": 0.07522529814904755, "grad_norm": 1.1710635386858284, "learning_rate": 9.874920004218741e-06, "loss": 0.5093, "step": 1394 }, { "epoch": 0.07527926177756192, "grad_norm": 1.3032163605375449, "learning_rate": 9.87474132254904e-06, "loss": 0.6582, "step": 1395 }, { "epoch": 0.0753332254060763, "grad_norm": 1.0891684767103764, "learning_rate": 9.87456251514485e-06, "loss": 0.5775, "step": 1396 }, { "epoch": 0.07538718903459068, "grad_norm": 1.1123695657048627, "learning_rate": 9.874383582011314e-06, "loss": 0.5737, "step": 1397 }, { "epoch": 0.07544115266310507, "grad_norm": 1.1437853422300601, "learning_rate": 9.874204523153576e-06, "loss": 0.5532, "step": 1398 }, { "epoch": 0.07549511629161945, "grad_norm": 1.221134660243962, "learning_rate": 9.874025338576778e-06, "loss": 0.5409, "step": 1399 }, { "epoch": 0.07554907992013384, "grad_norm": 1.2284744208021552, "learning_rate": 9.873846028286073e-06, "loss": 0.6292, "step": 1400 }, { "epoch": 0.0756030435486482, "grad_norm": 1.1128867258560233, "learning_rate": 9.873666592286613e-06, "loss": 0.4633, "step": 1401 }, { "epoch": 0.07565700717716259, "grad_norm": 1.0580769400216088, "learning_rate": 9.873487030583557e-06, "loss": 0.472, "step": 1402 }, { "epoch": 0.07571097080567697, "grad_norm": 1.1255150435270471, "learning_rate": 9.873307343182065e-06, "loss": 0.5454, "step": 1403 }, { "epoch": 0.07576493443419136, "grad_norm": 1.1817209302522385, "learning_rate": 9.8731275300873e-06, "loss": 0.5515, "step": 1404 }, { "epoch": 0.07581889806270574, "grad_norm": 0.9668033613228058, "learning_rate": 9.87294759130443e-06, "loss": 0.4567, "step": 1405 }, { "epoch": 0.07587286169122012, "grad_norm": 1.367242744903834, "learning_rate": 9.87276752683863e-06, "loss": 0.7216, "step": 1406 }, { "epoch": 0.07592682531973449, "grad_norm": 0.9028418658167503, "learning_rate": 9.87258733669507e-06, "loss": 0.4066, "step": 1407 }, { "epoch": 0.07598078894824888, "grad_norm": 1.0117044010494998, "learning_rate": 9.872407020878935e-06, "loss": 0.497, "step": 1408 }, { "epoch": 0.07603475257676326, "grad_norm": 1.0982062937724875, "learning_rate": 9.872226579395403e-06, "loss": 0.4744, "step": 1409 }, { "epoch": 0.07608871620527764, "grad_norm": 1.4153922946353177, "learning_rate": 9.87204601224966e-06, "loss": 0.5274, "step": 1410 }, { "epoch": 0.07614267983379203, "grad_norm": 1.2327376197321842, "learning_rate": 9.871865319446898e-06, "loss": 0.8642, "step": 1411 }, { "epoch": 0.07619664346230641, "grad_norm": 1.083491103854235, "learning_rate": 9.871684500992311e-06, "loss": 0.4422, "step": 1412 }, { "epoch": 0.0762506070908208, "grad_norm": 1.160377295489073, "learning_rate": 9.871503556891091e-06, "loss": 0.5039, "step": 1413 }, { "epoch": 0.07630457071933516, "grad_norm": 0.930025103853537, "learning_rate": 9.871322487148443e-06, "loss": 0.5091, "step": 1414 }, { "epoch": 0.07635853434784955, "grad_norm": 1.115305167417295, "learning_rate": 9.87114129176957e-06, "loss": 0.5114, "step": 1415 }, { "epoch": 0.07641249797636393, "grad_norm": 1.0751915996349761, "learning_rate": 9.87095997075968e-06, "loss": 0.4455, "step": 1416 }, { "epoch": 0.07646646160487831, "grad_norm": 0.7236439217019938, "learning_rate": 9.870778524123984e-06, "loss": 0.3525, "step": 1417 }, { "epoch": 0.0765204252333927, "grad_norm": 1.1651640949299422, "learning_rate": 9.870596951867695e-06, "loss": 0.6346, "step": 1418 }, { "epoch": 0.07657438886190708, "grad_norm": 1.1150536829259694, "learning_rate": 9.870415253996035e-06, "loss": 0.4391, "step": 1419 }, { "epoch": 0.07662835249042145, "grad_norm": 0.898592522230811, "learning_rate": 9.870233430514224e-06, "loss": 0.427, "step": 1420 }, { "epoch": 0.07668231611893583, "grad_norm": 0.9342003491093377, "learning_rate": 9.87005148142749e-06, "loss": 0.4328, "step": 1421 }, { "epoch": 0.07673627974745022, "grad_norm": 1.2016248332588375, "learning_rate": 9.86986940674106e-06, "loss": 0.5819, "step": 1422 }, { "epoch": 0.0767902433759646, "grad_norm": 1.191696133549967, "learning_rate": 9.869687206460167e-06, "loss": 0.5651, "step": 1423 }, { "epoch": 0.07684420700447898, "grad_norm": 1.0476512389761121, "learning_rate": 9.869504880590048e-06, "loss": 0.5231, "step": 1424 }, { "epoch": 0.07689817063299337, "grad_norm": 1.3408752448458274, "learning_rate": 9.869322429135945e-06, "loss": 0.5502, "step": 1425 }, { "epoch": 0.07695213426150774, "grad_norm": 0.9776976465401988, "learning_rate": 9.8691398521031e-06, "loss": 0.5854, "step": 1426 }, { "epoch": 0.07700609789002212, "grad_norm": 1.0394366221764808, "learning_rate": 9.86895714949676e-06, "loss": 0.3746, "step": 1427 }, { "epoch": 0.0770600615185365, "grad_norm": 1.1964174509006205, "learning_rate": 9.868774321322179e-06, "loss": 0.5364, "step": 1428 }, { "epoch": 0.07711402514705089, "grad_norm": 0.9831195754750282, "learning_rate": 9.868591367584609e-06, "loss": 0.4765, "step": 1429 }, { "epoch": 0.07716798877556527, "grad_norm": 0.9867185352555774, "learning_rate": 9.868408288289309e-06, "loss": 0.5155, "step": 1430 }, { "epoch": 0.07722195240407966, "grad_norm": 0.8251961683724085, "learning_rate": 9.86822508344154e-06, "loss": 0.4613, "step": 1431 }, { "epoch": 0.07727591603259402, "grad_norm": 1.0345335763712429, "learning_rate": 9.86804175304657e-06, "loss": 0.4671, "step": 1432 }, { "epoch": 0.07732987966110841, "grad_norm": 0.9341757938183023, "learning_rate": 9.867858297109663e-06, "loss": 0.4752, "step": 1433 }, { "epoch": 0.07738384328962279, "grad_norm": 1.160587148930441, "learning_rate": 9.867674715636098e-06, "loss": 0.491, "step": 1434 }, { "epoch": 0.07743780691813718, "grad_norm": 1.0836427319372677, "learning_rate": 9.867491008631148e-06, "loss": 0.434, "step": 1435 }, { "epoch": 0.07749177054665156, "grad_norm": 1.0559453818687794, "learning_rate": 9.867307176100093e-06, "loss": 0.507, "step": 1436 }, { "epoch": 0.07754573417516594, "grad_norm": 1.2529640389859145, "learning_rate": 9.867123218048217e-06, "loss": 0.596, "step": 1437 }, { "epoch": 0.07759969780368033, "grad_norm": 1.0323130023185692, "learning_rate": 9.866939134480809e-06, "loss": 0.4515, "step": 1438 }, { "epoch": 0.0776536614321947, "grad_norm": 0.9039489075926891, "learning_rate": 9.866754925403155e-06, "loss": 0.4464, "step": 1439 }, { "epoch": 0.07770762506070908, "grad_norm": 0.9353282870534332, "learning_rate": 9.866570590820552e-06, "loss": 0.4157, "step": 1440 }, { "epoch": 0.07776158868922346, "grad_norm": 1.1436730301977664, "learning_rate": 9.8663861307383e-06, "loss": 0.4728, "step": 1441 }, { "epoch": 0.07781555231773785, "grad_norm": 1.205298017870878, "learning_rate": 9.866201545161697e-06, "loss": 0.6542, "step": 1442 }, { "epoch": 0.07786951594625223, "grad_norm": 1.2026611316019373, "learning_rate": 9.866016834096051e-06, "loss": 0.5678, "step": 1443 }, { "epoch": 0.07792347957476661, "grad_norm": 1.2245263511227187, "learning_rate": 9.865831997546671e-06, "loss": 0.6857, "step": 1444 }, { "epoch": 0.07797744320328098, "grad_norm": 1.2355437471738135, "learning_rate": 9.865647035518864e-06, "loss": 0.689, "step": 1445 }, { "epoch": 0.07803140683179537, "grad_norm": 1.2277745201727839, "learning_rate": 9.865461948017954e-06, "loss": 0.5433, "step": 1446 }, { "epoch": 0.07808537046030975, "grad_norm": 1.186068147842787, "learning_rate": 9.865276735049257e-06, "loss": 0.543, "step": 1447 }, { "epoch": 0.07813933408882413, "grad_norm": 1.1277496567219691, "learning_rate": 9.865091396618094e-06, "loss": 0.5915, "step": 1448 }, { "epoch": 0.07819329771733852, "grad_norm": 1.2531167963137233, "learning_rate": 9.864905932729793e-06, "loss": 0.6918, "step": 1449 }, { "epoch": 0.0782472613458529, "grad_norm": 1.0245060141670959, "learning_rate": 9.864720343389685e-06, "loss": 0.4987, "step": 1450 }, { "epoch": 0.07830122497436727, "grad_norm": 1.1206027549310897, "learning_rate": 9.864534628603104e-06, "loss": 0.5332, "step": 1451 }, { "epoch": 0.07835518860288165, "grad_norm": 1.0691952766003536, "learning_rate": 9.86434878837539e-06, "loss": 0.5743, "step": 1452 }, { "epoch": 0.07840915223139604, "grad_norm": 1.2801306728083763, "learning_rate": 9.86416282271188e-06, "loss": 0.591, "step": 1453 }, { "epoch": 0.07846311585991042, "grad_norm": 0.8782631910423312, "learning_rate": 9.863976731617922e-06, "loss": 0.5008, "step": 1454 }, { "epoch": 0.0785170794884248, "grad_norm": 1.5073536444498166, "learning_rate": 9.863790515098863e-06, "loss": 0.6005, "step": 1455 }, { "epoch": 0.07857104311693919, "grad_norm": 0.9509012874588001, "learning_rate": 9.863604173160054e-06, "loss": 0.395, "step": 1456 }, { "epoch": 0.07862500674545357, "grad_norm": 0.992038118489577, "learning_rate": 9.863417705806854e-06, "loss": 0.4284, "step": 1457 }, { "epoch": 0.07867897037396794, "grad_norm": 0.9145494405449114, "learning_rate": 9.86323111304462e-06, "loss": 0.3893, "step": 1458 }, { "epoch": 0.07873293400248232, "grad_norm": 0.8142662446402474, "learning_rate": 9.863044394878712e-06, "loss": 0.427, "step": 1459 }, { "epoch": 0.07878689763099671, "grad_norm": 0.9737968481837906, "learning_rate": 9.862857551314503e-06, "loss": 0.4625, "step": 1460 }, { "epoch": 0.07884086125951109, "grad_norm": 1.05966516755037, "learning_rate": 9.862670582357358e-06, "loss": 0.4998, "step": 1461 }, { "epoch": 0.07889482488802547, "grad_norm": 1.237120469588189, "learning_rate": 9.862483488012653e-06, "loss": 0.6764, "step": 1462 }, { "epoch": 0.07894878851653986, "grad_norm": 1.2112161188587183, "learning_rate": 9.862296268285764e-06, "loss": 0.5315, "step": 1463 }, { "epoch": 0.07900275214505423, "grad_norm": 0.9788383146859465, "learning_rate": 9.862108923182073e-06, "loss": 0.51, "step": 1464 }, { "epoch": 0.07905671577356861, "grad_norm": 1.2412483063590298, "learning_rate": 9.861921452706964e-06, "loss": 0.5567, "step": 1465 }, { "epoch": 0.079110679402083, "grad_norm": 1.265692075720178, "learning_rate": 9.861733856865825e-06, "loss": 0.5197, "step": 1466 }, { "epoch": 0.07916464303059738, "grad_norm": 1.206037172647099, "learning_rate": 9.861546135664047e-06, "loss": 0.5381, "step": 1467 }, { "epoch": 0.07921860665911176, "grad_norm": 0.6921957406159033, "learning_rate": 9.861358289107028e-06, "loss": 0.3112, "step": 1468 }, { "epoch": 0.07927257028762615, "grad_norm": 1.08216762357684, "learning_rate": 9.861170317200164e-06, "loss": 0.4626, "step": 1469 }, { "epoch": 0.07932653391614052, "grad_norm": 1.2075259022925382, "learning_rate": 9.860982219948856e-06, "loss": 0.5026, "step": 1470 }, { "epoch": 0.0793804975446549, "grad_norm": 1.2446012654277088, "learning_rate": 9.860793997358515e-06, "loss": 0.4357, "step": 1471 }, { "epoch": 0.07943446117316928, "grad_norm": 0.9854572307082262, "learning_rate": 9.860605649434547e-06, "loss": 0.3771, "step": 1472 }, { "epoch": 0.07948842480168367, "grad_norm": 1.1193974224597572, "learning_rate": 9.860417176182368e-06, "loss": 0.651, "step": 1473 }, { "epoch": 0.07954238843019805, "grad_norm": 1.0916857462805774, "learning_rate": 9.860228577607392e-06, "loss": 0.486, "step": 1474 }, { "epoch": 0.07959635205871243, "grad_norm": 1.2815082610516313, "learning_rate": 9.860039853715043e-06, "loss": 0.5562, "step": 1475 }, { "epoch": 0.0796503156872268, "grad_norm": 1.1651273032580152, "learning_rate": 9.859851004510741e-06, "loss": 0.6071, "step": 1476 }, { "epoch": 0.07970427931574119, "grad_norm": 1.0956430590425383, "learning_rate": 9.859662029999917e-06, "loss": 0.4904, "step": 1477 }, { "epoch": 0.07975824294425557, "grad_norm": 0.9472708300017778, "learning_rate": 9.859472930188002e-06, "loss": 0.4158, "step": 1478 }, { "epoch": 0.07981220657276995, "grad_norm": 1.0615490832922667, "learning_rate": 9.85928370508043e-06, "loss": 0.5284, "step": 1479 }, { "epoch": 0.07986617020128434, "grad_norm": 1.0237575294712045, "learning_rate": 9.859094354682636e-06, "loss": 0.4561, "step": 1480 }, { "epoch": 0.07992013382979872, "grad_norm": 1.1954722898403902, "learning_rate": 9.858904879000068e-06, "loss": 0.6271, "step": 1481 }, { "epoch": 0.0799740974583131, "grad_norm": 0.9246314156408526, "learning_rate": 9.85871527803817e-06, "loss": 0.3628, "step": 1482 }, { "epoch": 0.08002806108682747, "grad_norm": 1.1535051229274245, "learning_rate": 9.85852555180239e-06, "loss": 0.5989, "step": 1483 }, { "epoch": 0.08008202471534186, "grad_norm": 1.0107034488730695, "learning_rate": 9.858335700298183e-06, "loss": 0.4543, "step": 1484 }, { "epoch": 0.08013598834385624, "grad_norm": 1.0933195350337488, "learning_rate": 9.858145723531003e-06, "loss": 0.5148, "step": 1485 }, { "epoch": 0.08018995197237062, "grad_norm": 1.2817658042371736, "learning_rate": 9.857955621506312e-06, "loss": 0.5432, "step": 1486 }, { "epoch": 0.08024391560088501, "grad_norm": 1.1124611024398199, "learning_rate": 9.857765394229573e-06, "loss": 0.5106, "step": 1487 }, { "epoch": 0.08029787922939939, "grad_norm": 1.090046228360641, "learning_rate": 9.857575041706253e-06, "loss": 0.5011, "step": 1488 }, { "epoch": 0.08035184285791376, "grad_norm": 0.9952267743209943, "learning_rate": 9.857384563941822e-06, "loss": 0.4861, "step": 1489 }, { "epoch": 0.08040580648642814, "grad_norm": 1.1146891235017142, "learning_rate": 9.857193960941757e-06, "loss": 0.5959, "step": 1490 }, { "epoch": 0.08045977011494253, "grad_norm": 0.9742156618232539, "learning_rate": 9.857003232711535e-06, "loss": 0.4979, "step": 1491 }, { "epoch": 0.08051373374345691, "grad_norm": 1.2594637760359897, "learning_rate": 9.856812379256637e-06, "loss": 0.5615, "step": 1492 }, { "epoch": 0.0805676973719713, "grad_norm": 1.024781234168918, "learning_rate": 9.85662140058255e-06, "loss": 0.481, "step": 1493 }, { "epoch": 0.08062166100048568, "grad_norm": 1.1244158600069667, "learning_rate": 9.856430296694762e-06, "loss": 0.6746, "step": 1494 }, { "epoch": 0.08067562462900005, "grad_norm": 1.1677602531457634, "learning_rate": 9.856239067598766e-06, "loss": 0.6207, "step": 1495 }, { "epoch": 0.08072958825751443, "grad_norm": 1.0726082817996365, "learning_rate": 9.856047713300055e-06, "loss": 0.5905, "step": 1496 }, { "epoch": 0.08078355188602881, "grad_norm": 1.2114995837635878, "learning_rate": 9.855856233804134e-06, "loss": 0.5323, "step": 1497 }, { "epoch": 0.0808375155145432, "grad_norm": 1.16000317153991, "learning_rate": 9.855664629116502e-06, "loss": 0.6126, "step": 1498 }, { "epoch": 0.08089147914305758, "grad_norm": 1.0492276381754382, "learning_rate": 9.855472899242668e-06, "loss": 0.4683, "step": 1499 }, { "epoch": 0.08094544277157197, "grad_norm": 1.1663522259630936, "learning_rate": 9.855281044188142e-06, "loss": 0.6185, "step": 1500 }, { "epoch": 0.08094544277157197, "eval_loss": 0.6146706938743591, "eval_runtime": 164.3247, "eval_samples_per_second": 20.928, "eval_steps_per_second": 0.876, "step": 1500 }, { "epoch": 0.08099940640008634, "grad_norm": 0.9798140207891484, "learning_rate": 9.855089063958438e-06, "loss": 0.5013, "step": 1501 }, { "epoch": 0.08105337002860072, "grad_norm": 1.2582772065254066, "learning_rate": 9.854896958559075e-06, "loss": 0.5821, "step": 1502 }, { "epoch": 0.0811073336571151, "grad_norm": 1.2520645620663025, "learning_rate": 9.854704727995572e-06, "loss": 0.453, "step": 1503 }, { "epoch": 0.08116129728562949, "grad_norm": 1.2157887560299088, "learning_rate": 9.854512372273453e-06, "loss": 0.5886, "step": 1504 }, { "epoch": 0.08121526091414387, "grad_norm": 1.1146875166182604, "learning_rate": 9.854319891398251e-06, "loss": 0.4069, "step": 1505 }, { "epoch": 0.08126922454265825, "grad_norm": 1.4202003350134589, "learning_rate": 9.854127285375495e-06, "loss": 0.6642, "step": 1506 }, { "epoch": 0.08132318817117264, "grad_norm": 1.3259499177089509, "learning_rate": 9.853934554210722e-06, "loss": 0.5552, "step": 1507 }, { "epoch": 0.081377151799687, "grad_norm": 1.1060516920685122, "learning_rate": 9.853741697909469e-06, "loss": 0.5061, "step": 1508 }, { "epoch": 0.08143111542820139, "grad_norm": 1.0258360711480912, "learning_rate": 9.853548716477282e-06, "loss": 0.6109, "step": 1509 }, { "epoch": 0.08148507905671577, "grad_norm": 1.2655816403044782, "learning_rate": 9.853355609919704e-06, "loss": 0.5719, "step": 1510 }, { "epoch": 0.08153904268523016, "grad_norm": 1.1343385881609889, "learning_rate": 9.853162378242288e-06, "loss": 0.5053, "step": 1511 }, { "epoch": 0.08159300631374454, "grad_norm": 1.0328876512863703, "learning_rate": 9.852969021450588e-06, "loss": 0.5016, "step": 1512 }, { "epoch": 0.08164696994225892, "grad_norm": 0.878102636532819, "learning_rate": 9.852775539550158e-06, "loss": 0.4054, "step": 1513 }, { "epoch": 0.0817009335707733, "grad_norm": 1.0514038860420063, "learning_rate": 9.852581932546564e-06, "loss": 0.3864, "step": 1514 }, { "epoch": 0.08175489719928768, "grad_norm": 1.0362282012694082, "learning_rate": 9.852388200445364e-06, "loss": 0.4653, "step": 1515 }, { "epoch": 0.08180886082780206, "grad_norm": 1.0185811148611448, "learning_rate": 9.85219434325213e-06, "loss": 0.424, "step": 1516 }, { "epoch": 0.08186282445631644, "grad_norm": 1.1444059859700202, "learning_rate": 9.852000360972435e-06, "loss": 0.5455, "step": 1517 }, { "epoch": 0.08191678808483083, "grad_norm": 1.199478908214755, "learning_rate": 9.85180625361185e-06, "loss": 0.6757, "step": 1518 }, { "epoch": 0.08197075171334521, "grad_norm": 1.115159861386223, "learning_rate": 9.851612021175958e-06, "loss": 0.5406, "step": 1519 }, { "epoch": 0.08202471534185958, "grad_norm": 0.9312085379169391, "learning_rate": 9.851417663670338e-06, "loss": 0.4855, "step": 1520 }, { "epoch": 0.08207867897037396, "grad_norm": 1.0896347948181393, "learning_rate": 9.851223181100577e-06, "loss": 0.451, "step": 1521 }, { "epoch": 0.08213264259888835, "grad_norm": 1.0466600772463077, "learning_rate": 9.851028573472267e-06, "loss": 0.5355, "step": 1522 }, { "epoch": 0.08218660622740273, "grad_norm": 1.1596608974361473, "learning_rate": 9.850833840790996e-06, "loss": 0.4676, "step": 1523 }, { "epoch": 0.08224056985591711, "grad_norm": 1.0780443075418005, "learning_rate": 9.850638983062367e-06, "loss": 0.5934, "step": 1524 }, { "epoch": 0.0822945334844315, "grad_norm": 1.0496587614329123, "learning_rate": 9.850444000291977e-06, "loss": 0.4462, "step": 1525 }, { "epoch": 0.08234849711294587, "grad_norm": 0.9084719891997122, "learning_rate": 9.850248892485434e-06, "loss": 0.3869, "step": 1526 }, { "epoch": 0.08240246074146025, "grad_norm": 1.0413229017852348, "learning_rate": 9.850053659648338e-06, "loss": 0.5009, "step": 1527 }, { "epoch": 0.08245642436997463, "grad_norm": 1.1587227563029678, "learning_rate": 9.849858301786305e-06, "loss": 0.5335, "step": 1528 }, { "epoch": 0.08251038799848902, "grad_norm": 1.1680125356628455, "learning_rate": 9.84966281890495e-06, "loss": 0.573, "step": 1529 }, { "epoch": 0.0825643516270034, "grad_norm": 0.8598853255457625, "learning_rate": 9.84946721100989e-06, "loss": 0.2967, "step": 1530 }, { "epoch": 0.08261831525551779, "grad_norm": 0.9157247685416676, "learning_rate": 9.849271478106747e-06, "loss": 0.4403, "step": 1531 }, { "epoch": 0.08267227888403217, "grad_norm": 1.0124714028200428, "learning_rate": 9.849075620201148e-06, "loss": 0.4885, "step": 1532 }, { "epoch": 0.08272624251254654, "grad_norm": 0.9958019600890563, "learning_rate": 9.848879637298724e-06, "loss": 0.5238, "step": 1533 }, { "epoch": 0.08278020614106092, "grad_norm": 1.3366909708225285, "learning_rate": 9.8486835294051e-06, "loss": 0.6829, "step": 1534 }, { "epoch": 0.0828341697695753, "grad_norm": 0.9181762422893593, "learning_rate": 9.84848729652592e-06, "loss": 0.4989, "step": 1535 }, { "epoch": 0.08288813339808969, "grad_norm": 1.1826804758244975, "learning_rate": 9.848290938666821e-06, "loss": 0.5855, "step": 1536 }, { "epoch": 0.08294209702660407, "grad_norm": 1.0940651721566208, "learning_rate": 9.848094455833447e-06, "loss": 0.5753, "step": 1537 }, { "epoch": 0.08299606065511846, "grad_norm": 1.2331370009972131, "learning_rate": 9.847897848031444e-06, "loss": 0.656, "step": 1538 }, { "epoch": 0.08305002428363283, "grad_norm": 1.2762993941501477, "learning_rate": 9.847701115266464e-06, "loss": 0.7644, "step": 1539 }, { "epoch": 0.08310398791214721, "grad_norm": 0.9726644290765585, "learning_rate": 9.847504257544163e-06, "loss": 0.5011, "step": 1540 }, { "epoch": 0.08315795154066159, "grad_norm": 0.9584812503039151, "learning_rate": 9.847307274870193e-06, "loss": 0.4976, "step": 1541 }, { "epoch": 0.08321191516917598, "grad_norm": 1.1750148556944287, "learning_rate": 9.847110167250223e-06, "loss": 0.58, "step": 1542 }, { "epoch": 0.08326587879769036, "grad_norm": 1.222355056247674, "learning_rate": 9.84691293468991e-06, "loss": 0.6011, "step": 1543 }, { "epoch": 0.08331984242620474, "grad_norm": 1.1254245067873054, "learning_rate": 9.846715577194931e-06, "loss": 0.4862, "step": 1544 }, { "epoch": 0.08337380605471911, "grad_norm": 1.0455939198311845, "learning_rate": 9.846518094770951e-06, "loss": 0.4624, "step": 1545 }, { "epoch": 0.0834277696832335, "grad_norm": 1.228633884409697, "learning_rate": 9.846320487423653e-06, "loss": 0.5733, "step": 1546 }, { "epoch": 0.08348173331174788, "grad_norm": 0.7365752934107501, "learning_rate": 9.84612275515871e-06, "loss": 0.2979, "step": 1547 }, { "epoch": 0.08353569694026226, "grad_norm": 1.297037613261306, "learning_rate": 9.845924897981809e-06, "loss": 0.5991, "step": 1548 }, { "epoch": 0.08358966056877665, "grad_norm": 1.0307260290793485, "learning_rate": 9.845726915898634e-06, "loss": 0.438, "step": 1549 }, { "epoch": 0.08364362419729103, "grad_norm": 1.083529896053623, "learning_rate": 9.845528808914876e-06, "loss": 0.5046, "step": 1550 }, { "epoch": 0.08369758782580541, "grad_norm": 1.0815254116768536, "learning_rate": 9.845330577036229e-06, "loss": 0.515, "step": 1551 }, { "epoch": 0.08375155145431978, "grad_norm": 0.942989789903542, "learning_rate": 9.845132220268393e-06, "loss": 0.4579, "step": 1552 }, { "epoch": 0.08380551508283417, "grad_norm": 1.1382890688730474, "learning_rate": 9.844933738617063e-06, "loss": 0.4499, "step": 1553 }, { "epoch": 0.08385947871134855, "grad_norm": 0.8690473800277624, "learning_rate": 9.84473513208795e-06, "loss": 0.4089, "step": 1554 }, { "epoch": 0.08391344233986293, "grad_norm": 1.1505086404814973, "learning_rate": 9.84453640068676e-06, "loss": 0.5531, "step": 1555 }, { "epoch": 0.08396740596837732, "grad_norm": 1.0483287103181118, "learning_rate": 9.844337544419201e-06, "loss": 0.4522, "step": 1556 }, { "epoch": 0.0840213695968917, "grad_norm": 1.1589430216527066, "learning_rate": 9.844138563290993e-06, "loss": 0.6265, "step": 1557 }, { "epoch": 0.08407533322540607, "grad_norm": 1.262231200769264, "learning_rate": 9.843939457307853e-06, "loss": 0.6673, "step": 1558 }, { "epoch": 0.08412929685392045, "grad_norm": 0.8240812365831286, "learning_rate": 9.843740226475505e-06, "loss": 0.3645, "step": 1559 }, { "epoch": 0.08418326048243484, "grad_norm": 1.2755777293978097, "learning_rate": 9.843540870799671e-06, "loss": 0.5795, "step": 1560 }, { "epoch": 0.08423722411094922, "grad_norm": 1.1559880831674183, "learning_rate": 9.843341390286087e-06, "loss": 0.7272, "step": 1561 }, { "epoch": 0.0842911877394636, "grad_norm": 1.3779131757746121, "learning_rate": 9.84314178494048e-06, "loss": 0.6434, "step": 1562 }, { "epoch": 0.08434515136797799, "grad_norm": 1.346690560850989, "learning_rate": 9.84294205476859e-06, "loss": 0.7369, "step": 1563 }, { "epoch": 0.08439911499649236, "grad_norm": 1.2208802990487055, "learning_rate": 9.84274219977616e-06, "loss": 0.5447, "step": 1564 }, { "epoch": 0.08445307862500674, "grad_norm": 1.156660916184976, "learning_rate": 9.842542219968926e-06, "loss": 0.5326, "step": 1565 }, { "epoch": 0.08450704225352113, "grad_norm": 1.1089401599190514, "learning_rate": 9.842342115352647e-06, "loss": 0.5314, "step": 1566 }, { "epoch": 0.08456100588203551, "grad_norm": 0.9395095432669703, "learning_rate": 9.842141885933063e-06, "loss": 0.4176, "step": 1567 }, { "epoch": 0.08461496951054989, "grad_norm": 1.1210758235682878, "learning_rate": 9.841941531715936e-06, "loss": 0.5447, "step": 1568 }, { "epoch": 0.08466893313906428, "grad_norm": 1.2527745425116128, "learning_rate": 9.841741052707021e-06, "loss": 0.6707, "step": 1569 }, { "epoch": 0.08472289676757865, "grad_norm": 0.9981604489122831, "learning_rate": 9.841540448912083e-06, "loss": 0.4276, "step": 1570 }, { "epoch": 0.08477686039609303, "grad_norm": 1.0319580285317156, "learning_rate": 9.841339720336885e-06, "loss": 0.5513, "step": 1571 }, { "epoch": 0.08483082402460741, "grad_norm": 1.0105271934793305, "learning_rate": 9.841138866987196e-06, "loss": 0.5267, "step": 1572 }, { "epoch": 0.0848847876531218, "grad_norm": 0.9073355854440462, "learning_rate": 9.84093788886879e-06, "loss": 0.4137, "step": 1573 }, { "epoch": 0.08493875128163618, "grad_norm": 1.1546972256070236, "learning_rate": 9.840736785987443e-06, "loss": 0.515, "step": 1574 }, { "epoch": 0.08499271491015056, "grad_norm": 1.22179328513042, "learning_rate": 9.840535558348934e-06, "loss": 0.5654, "step": 1575 }, { "epoch": 0.08504667853866495, "grad_norm": 1.2223317208394844, "learning_rate": 9.840334205959047e-06, "loss": 0.5655, "step": 1576 }, { "epoch": 0.08510064216717932, "grad_norm": 1.1658447829794572, "learning_rate": 9.84013272882357e-06, "loss": 0.5585, "step": 1577 }, { "epoch": 0.0851546057956937, "grad_norm": 1.1713083436248977, "learning_rate": 9.839931126948294e-06, "loss": 0.5375, "step": 1578 }, { "epoch": 0.08520856942420808, "grad_norm": 1.0074625828735708, "learning_rate": 9.83972940033901e-06, "loss": 0.4389, "step": 1579 }, { "epoch": 0.08526253305272247, "grad_norm": 1.0141735897654298, "learning_rate": 9.839527549001522e-06, "loss": 0.3648, "step": 1580 }, { "epoch": 0.08531649668123685, "grad_norm": 1.086980057179789, "learning_rate": 9.839325572941624e-06, "loss": 0.4758, "step": 1581 }, { "epoch": 0.08537046030975123, "grad_norm": 1.0944083611542792, "learning_rate": 9.839123472165125e-06, "loss": 0.5433, "step": 1582 }, { "epoch": 0.0854244239382656, "grad_norm": 0.9484114757413327, "learning_rate": 9.838921246677833e-06, "loss": 0.529, "step": 1583 }, { "epoch": 0.08547838756677999, "grad_norm": 1.249388415303616, "learning_rate": 9.838718896485562e-06, "loss": 0.5792, "step": 1584 }, { "epoch": 0.08553235119529437, "grad_norm": 1.2069786134825093, "learning_rate": 9.838516421594123e-06, "loss": 0.611, "step": 1585 }, { "epoch": 0.08558631482380875, "grad_norm": 1.1367159383841587, "learning_rate": 9.838313822009342e-06, "loss": 0.5743, "step": 1586 }, { "epoch": 0.08564027845232314, "grad_norm": 1.0690548361561774, "learning_rate": 9.838111097737034e-06, "loss": 0.5715, "step": 1587 }, { "epoch": 0.08569424208083752, "grad_norm": 1.0807623232506285, "learning_rate": 9.837908248783033e-06, "loss": 0.6421, "step": 1588 }, { "epoch": 0.08574820570935189, "grad_norm": 1.0783708764592845, "learning_rate": 9.837705275153163e-06, "loss": 0.4799, "step": 1589 }, { "epoch": 0.08580216933786627, "grad_norm": 0.9061602416524287, "learning_rate": 9.837502176853263e-06, "loss": 0.51, "step": 1590 }, { "epoch": 0.08585613296638066, "grad_norm": 1.151846880949086, "learning_rate": 9.837298953889167e-06, "loss": 0.5705, "step": 1591 }, { "epoch": 0.08591009659489504, "grad_norm": 1.0876547236078546, "learning_rate": 9.837095606266716e-06, "loss": 0.57, "step": 1592 }, { "epoch": 0.08596406022340942, "grad_norm": 1.1138372740908353, "learning_rate": 9.836892133991756e-06, "loss": 0.5763, "step": 1593 }, { "epoch": 0.08601802385192381, "grad_norm": 0.852474862855587, "learning_rate": 9.836688537070133e-06, "loss": 0.4211, "step": 1594 }, { "epoch": 0.08607198748043818, "grad_norm": 1.152205613848411, "learning_rate": 9.8364848155077e-06, "loss": 0.436, "step": 1595 }, { "epoch": 0.08612595110895256, "grad_norm": 0.9890111297566225, "learning_rate": 9.836280969310311e-06, "loss": 0.4744, "step": 1596 }, { "epoch": 0.08617991473746694, "grad_norm": 1.1267327430720884, "learning_rate": 9.836076998483827e-06, "loss": 0.5306, "step": 1597 }, { "epoch": 0.08623387836598133, "grad_norm": 1.256551240729594, "learning_rate": 9.835872903034107e-06, "loss": 0.6612, "step": 1598 }, { "epoch": 0.08628784199449571, "grad_norm": 1.1490930821353862, "learning_rate": 9.83566868296702e-06, "loss": 0.5736, "step": 1599 }, { "epoch": 0.0863418056230101, "grad_norm": 0.8801770895212956, "learning_rate": 9.835464338288433e-06, "loss": 0.337, "step": 1600 }, { "epoch": 0.08639576925152448, "grad_norm": 1.1206498024505585, "learning_rate": 9.83525986900422e-06, "loss": 0.6459, "step": 1601 }, { "epoch": 0.08644973288003885, "grad_norm": 1.0631613107392668, "learning_rate": 9.83505527512026e-06, "loss": 0.5481, "step": 1602 }, { "epoch": 0.08650369650855323, "grad_norm": 0.787002373070203, "learning_rate": 9.834850556642429e-06, "loss": 0.365, "step": 1603 }, { "epoch": 0.08655766013706762, "grad_norm": 1.2890055639787825, "learning_rate": 9.834645713576615e-06, "loss": 0.6179, "step": 1604 }, { "epoch": 0.086611623765582, "grad_norm": 1.2914020715362726, "learning_rate": 9.8344407459287e-06, "loss": 0.667, "step": 1605 }, { "epoch": 0.08666558739409638, "grad_norm": 1.1843440352892518, "learning_rate": 9.834235653704583e-06, "loss": 0.4882, "step": 1606 }, { "epoch": 0.08671955102261077, "grad_norm": 0.9121622957994678, "learning_rate": 9.83403043691015e-06, "loss": 0.4114, "step": 1607 }, { "epoch": 0.08677351465112514, "grad_norm": 1.1316613787339864, "learning_rate": 9.833825095551304e-06, "loss": 0.5457, "step": 1608 }, { "epoch": 0.08682747827963952, "grad_norm": 1.3354579259543042, "learning_rate": 9.833619629633946e-06, "loss": 0.6108, "step": 1609 }, { "epoch": 0.0868814419081539, "grad_norm": 1.004378439975859, "learning_rate": 9.833414039163983e-06, "loss": 0.5821, "step": 1610 }, { "epoch": 0.08693540553666829, "grad_norm": 0.9257516759832259, "learning_rate": 9.833208324147318e-06, "loss": 0.4459, "step": 1611 }, { "epoch": 0.08698936916518267, "grad_norm": 1.3658895076135296, "learning_rate": 9.833002484589871e-06, "loss": 0.617, "step": 1612 }, { "epoch": 0.08704333279369705, "grad_norm": 1.3484458340651433, "learning_rate": 9.832796520497552e-06, "loss": 0.5209, "step": 1613 }, { "epoch": 0.08709729642221142, "grad_norm": 1.3788447896712182, "learning_rate": 9.832590431876285e-06, "loss": 0.7221, "step": 1614 }, { "epoch": 0.0871512600507258, "grad_norm": 1.0761047530954517, "learning_rate": 9.83238421873199e-06, "loss": 0.6012, "step": 1615 }, { "epoch": 0.08720522367924019, "grad_norm": 0.8347598100449255, "learning_rate": 9.832177881070594e-06, "loss": 0.289, "step": 1616 }, { "epoch": 0.08725918730775457, "grad_norm": 1.240941394104531, "learning_rate": 9.831971418898029e-06, "loss": 0.5491, "step": 1617 }, { "epoch": 0.08731315093626896, "grad_norm": 1.12464094020021, "learning_rate": 9.83176483222023e-06, "loss": 0.5414, "step": 1618 }, { "epoch": 0.08736711456478334, "grad_norm": 1.2160853220993917, "learning_rate": 9.831558121043132e-06, "loss": 0.5913, "step": 1619 }, { "epoch": 0.08742107819329771, "grad_norm": 1.1736920426391768, "learning_rate": 9.831351285372677e-06, "loss": 0.4621, "step": 1620 }, { "epoch": 0.0874750418218121, "grad_norm": 0.8947489008974683, "learning_rate": 9.831144325214807e-06, "loss": 0.4323, "step": 1621 }, { "epoch": 0.08752900545032648, "grad_norm": 0.8576290615011672, "learning_rate": 9.830937240575476e-06, "loss": 0.3433, "step": 1622 }, { "epoch": 0.08758296907884086, "grad_norm": 0.8202390883169676, "learning_rate": 9.830730031460631e-06, "loss": 0.3983, "step": 1623 }, { "epoch": 0.08763693270735524, "grad_norm": 1.0073211066754821, "learning_rate": 9.83052269787623e-06, "loss": 0.4526, "step": 1624 }, { "epoch": 0.08769089633586963, "grad_norm": 1.2100917935625481, "learning_rate": 9.83031523982823e-06, "loss": 0.5639, "step": 1625 }, { "epoch": 0.08774485996438401, "grad_norm": 1.0518192301087639, "learning_rate": 9.830107657322595e-06, "loss": 0.5762, "step": 1626 }, { "epoch": 0.08779882359289838, "grad_norm": 1.4607934577686827, "learning_rate": 9.829899950365291e-06, "loss": 0.7201, "step": 1627 }, { "epoch": 0.08785278722141276, "grad_norm": 1.1780400437548577, "learning_rate": 9.829692118962288e-06, "loss": 0.5046, "step": 1628 }, { "epoch": 0.08790675084992715, "grad_norm": 1.0305028798455964, "learning_rate": 9.829484163119558e-06, "loss": 0.555, "step": 1629 }, { "epoch": 0.08796071447844153, "grad_norm": 0.9483324996319525, "learning_rate": 9.829276082843077e-06, "loss": 0.3648, "step": 1630 }, { "epoch": 0.08801467810695592, "grad_norm": 1.0524614300174187, "learning_rate": 9.829067878138828e-06, "loss": 0.4713, "step": 1631 }, { "epoch": 0.0880686417354703, "grad_norm": 0.9599722735210796, "learning_rate": 9.828859549012795e-06, "loss": 0.4018, "step": 1632 }, { "epoch": 0.08812260536398467, "grad_norm": 1.0558785725193331, "learning_rate": 9.828651095470963e-06, "loss": 0.6398, "step": 1633 }, { "epoch": 0.08817656899249905, "grad_norm": 1.2490202671484132, "learning_rate": 9.828442517519325e-06, "loss": 0.6621, "step": 1634 }, { "epoch": 0.08823053262101344, "grad_norm": 0.9597501357336855, "learning_rate": 9.828233815163877e-06, "loss": 0.4455, "step": 1635 }, { "epoch": 0.08828449624952782, "grad_norm": 1.1436511229580923, "learning_rate": 9.828024988410614e-06, "loss": 0.5793, "step": 1636 }, { "epoch": 0.0883384598780422, "grad_norm": 1.1270627241489257, "learning_rate": 9.827816037265541e-06, "loss": 0.5021, "step": 1637 }, { "epoch": 0.08839242350655659, "grad_norm": 1.4219463515828699, "learning_rate": 9.827606961734661e-06, "loss": 0.735, "step": 1638 }, { "epoch": 0.08844638713507096, "grad_norm": 0.9714128992279043, "learning_rate": 9.827397761823984e-06, "loss": 0.3808, "step": 1639 }, { "epoch": 0.08850035076358534, "grad_norm": 1.4904570373380337, "learning_rate": 9.827188437539524e-06, "loss": 0.7883, "step": 1640 }, { "epoch": 0.08855431439209972, "grad_norm": 1.1838036727180905, "learning_rate": 9.826978988887297e-06, "loss": 0.5839, "step": 1641 }, { "epoch": 0.0886082780206141, "grad_norm": 1.1617384767629275, "learning_rate": 9.82676941587332e-06, "loss": 0.4723, "step": 1642 }, { "epoch": 0.08866224164912849, "grad_norm": 1.3683706335596868, "learning_rate": 9.826559718503617e-06, "loss": 0.5093, "step": 1643 }, { "epoch": 0.08871620527764287, "grad_norm": 1.2675392424392362, "learning_rate": 9.826349896784219e-06, "loss": 0.6712, "step": 1644 }, { "epoch": 0.08877016890615726, "grad_norm": 1.3289530387490138, "learning_rate": 9.826139950721151e-06, "loss": 0.6452, "step": 1645 }, { "epoch": 0.08882413253467163, "grad_norm": 1.2228801552837718, "learning_rate": 9.825929880320452e-06, "loss": 0.5753, "step": 1646 }, { "epoch": 0.08887809616318601, "grad_norm": 1.0343104073204008, "learning_rate": 9.825719685588156e-06, "loss": 0.6086, "step": 1647 }, { "epoch": 0.0889320597917004, "grad_norm": 1.1870806049082987, "learning_rate": 9.825509366530303e-06, "loss": 0.5365, "step": 1648 }, { "epoch": 0.08898602342021478, "grad_norm": 1.0046447938937908, "learning_rate": 9.825298923152944e-06, "loss": 0.5849, "step": 1649 }, { "epoch": 0.08903998704872916, "grad_norm": 0.9978182875348294, "learning_rate": 9.825088355462122e-06, "loss": 0.5223, "step": 1650 }, { "epoch": 0.08909395067724354, "grad_norm": 1.044125844081303, "learning_rate": 9.82487766346389e-06, "loss": 0.482, "step": 1651 }, { "epoch": 0.08914791430575791, "grad_norm": 1.225174758023279, "learning_rate": 9.824666847164305e-06, "loss": 0.6918, "step": 1652 }, { "epoch": 0.0892018779342723, "grad_norm": 1.06438626007324, "learning_rate": 9.824455906569423e-06, "loss": 0.5024, "step": 1653 }, { "epoch": 0.08925584156278668, "grad_norm": 1.245934185643498, "learning_rate": 9.824244841685312e-06, "loss": 0.6606, "step": 1654 }, { "epoch": 0.08930980519130106, "grad_norm": 1.1185088785196011, "learning_rate": 9.824033652518031e-06, "loss": 0.482, "step": 1655 }, { "epoch": 0.08936376881981545, "grad_norm": 1.0840397608763517, "learning_rate": 9.823822339073656e-06, "loss": 0.5517, "step": 1656 }, { "epoch": 0.08941773244832983, "grad_norm": 1.329610598092162, "learning_rate": 9.823610901358258e-06, "loss": 0.6059, "step": 1657 }, { "epoch": 0.0894716960768442, "grad_norm": 1.3229738100958877, "learning_rate": 9.823399339377913e-06, "loss": 0.591, "step": 1658 }, { "epoch": 0.08952565970535858, "grad_norm": 1.0775612463918058, "learning_rate": 9.823187653138704e-06, "loss": 0.5562, "step": 1659 }, { "epoch": 0.08957962333387297, "grad_norm": 0.9785007886055614, "learning_rate": 9.822975842646713e-06, "loss": 0.4405, "step": 1660 }, { "epoch": 0.08963358696238735, "grad_norm": 0.8586064196390926, "learning_rate": 9.822763907908028e-06, "loss": 0.3278, "step": 1661 }, { "epoch": 0.08968755059090174, "grad_norm": 0.9333122172315669, "learning_rate": 9.822551848928742e-06, "loss": 0.3912, "step": 1662 }, { "epoch": 0.08974151421941612, "grad_norm": 0.9869739468486504, "learning_rate": 9.822339665714946e-06, "loss": 0.4953, "step": 1663 }, { "epoch": 0.08979547784793049, "grad_norm": 1.1252225806440233, "learning_rate": 9.822127358272742e-06, "loss": 0.5658, "step": 1664 }, { "epoch": 0.08984944147644487, "grad_norm": 1.3715820146447948, "learning_rate": 9.82191492660823e-06, "loss": 0.6715, "step": 1665 }, { "epoch": 0.08990340510495926, "grad_norm": 1.2153187702701098, "learning_rate": 9.821702370727517e-06, "loss": 0.6509, "step": 1666 }, { "epoch": 0.08995736873347364, "grad_norm": 0.8146975489692445, "learning_rate": 9.82148969063671e-06, "loss": 0.3417, "step": 1667 }, { "epoch": 0.09001133236198802, "grad_norm": 1.25428516083054, "learning_rate": 9.821276886341921e-06, "loss": 0.652, "step": 1668 }, { "epoch": 0.0900652959905024, "grad_norm": 0.7877677891714017, "learning_rate": 9.82106395784927e-06, "loss": 0.2937, "step": 1669 }, { "epoch": 0.09011925961901679, "grad_norm": 1.1984374000287379, "learning_rate": 9.820850905164875e-06, "loss": 0.5934, "step": 1670 }, { "epoch": 0.09017322324753116, "grad_norm": 1.2228934987459703, "learning_rate": 9.820637728294859e-06, "loss": 0.5393, "step": 1671 }, { "epoch": 0.09022718687604554, "grad_norm": 1.2555563442989714, "learning_rate": 9.820424427245349e-06, "loss": 0.5592, "step": 1672 }, { "epoch": 0.09028115050455993, "grad_norm": 1.0774177619090028, "learning_rate": 9.820211002022475e-06, "loss": 0.6326, "step": 1673 }, { "epoch": 0.09033511413307431, "grad_norm": 0.924511641561153, "learning_rate": 9.819997452632372e-06, "loss": 0.3981, "step": 1674 }, { "epoch": 0.0903890777615887, "grad_norm": 1.08463739646336, "learning_rate": 9.819783779081176e-06, "loss": 0.4948, "step": 1675 }, { "epoch": 0.09044304139010308, "grad_norm": 1.2105462433132683, "learning_rate": 9.819569981375031e-06, "loss": 0.4794, "step": 1676 }, { "epoch": 0.09049700501861745, "grad_norm": 1.0756202429582895, "learning_rate": 9.81935605952008e-06, "loss": 0.5709, "step": 1677 }, { "epoch": 0.09055096864713183, "grad_norm": 0.8921964399721621, "learning_rate": 9.81914201352247e-06, "loss": 0.466, "step": 1678 }, { "epoch": 0.09060493227564621, "grad_norm": 1.069290093281084, "learning_rate": 9.818927843388353e-06, "loss": 0.59, "step": 1679 }, { "epoch": 0.0906588959041606, "grad_norm": 1.1030831464156623, "learning_rate": 9.81871354912389e-06, "loss": 0.5554, "step": 1680 }, { "epoch": 0.09071285953267498, "grad_norm": 1.0161444169948883, "learning_rate": 9.818499130735236e-06, "loss": 0.421, "step": 1681 }, { "epoch": 0.09076682316118936, "grad_norm": 1.0531489691120555, "learning_rate": 9.818284588228549e-06, "loss": 0.6346, "step": 1682 }, { "epoch": 0.09082078678970373, "grad_norm": 0.927684292184937, "learning_rate": 9.818069921610002e-06, "loss": 0.4583, "step": 1683 }, { "epoch": 0.09087475041821812, "grad_norm": 1.073151799884988, "learning_rate": 9.817855130885762e-06, "loss": 0.498, "step": 1684 }, { "epoch": 0.0909287140467325, "grad_norm": 1.0582736106515125, "learning_rate": 9.817640216062003e-06, "loss": 0.4456, "step": 1685 }, { "epoch": 0.09098267767524688, "grad_norm": 1.025772844411493, "learning_rate": 9.817425177144903e-06, "loss": 0.3691, "step": 1686 }, { "epoch": 0.09103664130376127, "grad_norm": 1.0891343214577762, "learning_rate": 9.81721001414064e-06, "loss": 0.4927, "step": 1687 }, { "epoch": 0.09109060493227565, "grad_norm": 0.9504154617191033, "learning_rate": 9.816994727055397e-06, "loss": 0.4328, "step": 1688 }, { "epoch": 0.09114456856079002, "grad_norm": 1.0943679321142035, "learning_rate": 9.816779315895366e-06, "loss": 0.4348, "step": 1689 }, { "epoch": 0.0911985321893044, "grad_norm": 1.1139241373970679, "learning_rate": 9.816563780666734e-06, "loss": 0.464, "step": 1690 }, { "epoch": 0.09125249581781879, "grad_norm": 1.137816870929604, "learning_rate": 9.816348121375697e-06, "loss": 0.4801, "step": 1691 }, { "epoch": 0.09130645944633317, "grad_norm": 1.1393581216763444, "learning_rate": 9.816132338028455e-06, "loss": 0.5652, "step": 1692 }, { "epoch": 0.09136042307484755, "grad_norm": 1.0317802011950623, "learning_rate": 9.815916430631209e-06, "loss": 0.5314, "step": 1693 }, { "epoch": 0.09141438670336194, "grad_norm": 1.029407606259921, "learning_rate": 9.815700399190162e-06, "loss": 0.4722, "step": 1694 }, { "epoch": 0.09146835033187632, "grad_norm": 1.140417592765902, "learning_rate": 9.815484243711525e-06, "loss": 0.4671, "step": 1695 }, { "epoch": 0.09152231396039069, "grad_norm": 1.178258194091602, "learning_rate": 9.815267964201511e-06, "loss": 0.4784, "step": 1696 }, { "epoch": 0.09157627758890508, "grad_norm": 1.2934630824706141, "learning_rate": 9.815051560666337e-06, "loss": 0.5439, "step": 1697 }, { "epoch": 0.09163024121741946, "grad_norm": 1.1753392952512376, "learning_rate": 9.81483503311222e-06, "loss": 0.587, "step": 1698 }, { "epoch": 0.09168420484593384, "grad_norm": 1.0874961980852107, "learning_rate": 9.814618381545385e-06, "loss": 0.5716, "step": 1699 }, { "epoch": 0.09173816847444823, "grad_norm": 1.1789266039726995, "learning_rate": 9.814401605972056e-06, "loss": 0.6019, "step": 1700 }, { "epoch": 0.09179213210296261, "grad_norm": 0.813923817098091, "learning_rate": 9.814184706398467e-06, "loss": 0.3539, "step": 1701 }, { "epoch": 0.09184609573147698, "grad_norm": 1.3132245363138273, "learning_rate": 9.813967682830851e-06, "loss": 0.5825, "step": 1702 }, { "epoch": 0.09190005935999136, "grad_norm": 1.0948781294439975, "learning_rate": 9.813750535275443e-06, "loss": 0.3776, "step": 1703 }, { "epoch": 0.09195402298850575, "grad_norm": 1.3664642869615802, "learning_rate": 9.813533263738486e-06, "loss": 0.7115, "step": 1704 }, { "epoch": 0.09200798661702013, "grad_norm": 1.079512338213839, "learning_rate": 9.813315868226226e-06, "loss": 0.6042, "step": 1705 }, { "epoch": 0.09206195024553451, "grad_norm": 1.2216389626565454, "learning_rate": 9.813098348744908e-06, "loss": 0.6331, "step": 1706 }, { "epoch": 0.0921159138740489, "grad_norm": 0.9557344063718698, "learning_rate": 9.812880705300788e-06, "loss": 0.5468, "step": 1707 }, { "epoch": 0.09216987750256327, "grad_norm": 1.0944854324097417, "learning_rate": 9.812662937900115e-06, "loss": 0.6354, "step": 1708 }, { "epoch": 0.09222384113107765, "grad_norm": 1.021843422721648, "learning_rate": 9.812445046549156e-06, "loss": 0.5327, "step": 1709 }, { "epoch": 0.09227780475959203, "grad_norm": 0.9678859433516638, "learning_rate": 9.812227031254165e-06, "loss": 0.4132, "step": 1710 }, { "epoch": 0.09233176838810642, "grad_norm": 0.8988119562999242, "learning_rate": 9.812008892021414e-06, "loss": 0.3923, "step": 1711 }, { "epoch": 0.0923857320166208, "grad_norm": 1.1646460322989367, "learning_rate": 9.81179062885717e-06, "loss": 0.4816, "step": 1712 }, { "epoch": 0.09243969564513518, "grad_norm": 1.1162727452001793, "learning_rate": 9.811572241767706e-06, "loss": 0.5885, "step": 1713 }, { "epoch": 0.09249365927364955, "grad_norm": 1.0996732397513724, "learning_rate": 9.8113537307593e-06, "loss": 0.4349, "step": 1714 }, { "epoch": 0.09254762290216394, "grad_norm": 0.999500261909367, "learning_rate": 9.811135095838231e-06, "loss": 0.4081, "step": 1715 }, { "epoch": 0.09260158653067832, "grad_norm": 0.8847185037684455, "learning_rate": 9.810916337010783e-06, "loss": 0.4378, "step": 1716 }, { "epoch": 0.0926555501591927, "grad_norm": 1.0378386908800896, "learning_rate": 9.810697454283244e-06, "loss": 0.4723, "step": 1717 }, { "epoch": 0.09270951378770709, "grad_norm": 1.0533433981723221, "learning_rate": 9.810478447661905e-06, "loss": 0.4544, "step": 1718 }, { "epoch": 0.09276347741622147, "grad_norm": 0.9962979316321182, "learning_rate": 9.81025931715306e-06, "loss": 0.4935, "step": 1719 }, { "epoch": 0.09281744104473585, "grad_norm": 1.093176694811763, "learning_rate": 9.810040062763007e-06, "loss": 0.5344, "step": 1720 }, { "epoch": 0.09287140467325022, "grad_norm": 1.0962559073863924, "learning_rate": 9.809820684498047e-06, "loss": 0.5263, "step": 1721 }, { "epoch": 0.09292536830176461, "grad_norm": 1.1360375754944454, "learning_rate": 9.809601182364484e-06, "loss": 0.5281, "step": 1722 }, { "epoch": 0.09297933193027899, "grad_norm": 0.9765547124984795, "learning_rate": 9.809381556368632e-06, "loss": 0.3518, "step": 1723 }, { "epoch": 0.09303329555879337, "grad_norm": 1.0779994570172466, "learning_rate": 9.809161806516797e-06, "loss": 0.4334, "step": 1724 }, { "epoch": 0.09308725918730776, "grad_norm": 1.003737619795514, "learning_rate": 9.8089419328153e-06, "loss": 0.4246, "step": 1725 }, { "epoch": 0.09314122281582214, "grad_norm": 1.2274996988254032, "learning_rate": 9.808721935270457e-06, "loss": 0.6325, "step": 1726 }, { "epoch": 0.09319518644433651, "grad_norm": 1.0822542723169275, "learning_rate": 9.80850181388859e-06, "loss": 0.5053, "step": 1727 }, { "epoch": 0.0932491500728509, "grad_norm": 1.120562931452158, "learning_rate": 9.808281568676032e-06, "loss": 0.4591, "step": 1728 }, { "epoch": 0.09330311370136528, "grad_norm": 1.0021283275466906, "learning_rate": 9.808061199639105e-06, "loss": 0.5276, "step": 1729 }, { "epoch": 0.09335707732987966, "grad_norm": 1.2798593177691098, "learning_rate": 9.807840706784146e-06, "loss": 0.5406, "step": 1730 }, { "epoch": 0.09341104095839405, "grad_norm": 0.8934191089316476, "learning_rate": 9.807620090117494e-06, "loss": 0.4272, "step": 1731 }, { "epoch": 0.09346500458690843, "grad_norm": 1.1813582621611651, "learning_rate": 9.807399349645486e-06, "loss": 0.6701, "step": 1732 }, { "epoch": 0.0935189682154228, "grad_norm": 1.1172849750615685, "learning_rate": 9.80717848537447e-06, "loss": 0.4658, "step": 1733 }, { "epoch": 0.09357293184393718, "grad_norm": 1.1189582016106767, "learning_rate": 9.806957497310792e-06, "loss": 0.4672, "step": 1734 }, { "epoch": 0.09362689547245157, "grad_norm": 1.038513728809273, "learning_rate": 9.806736385460804e-06, "loss": 0.4507, "step": 1735 }, { "epoch": 0.09368085910096595, "grad_norm": 0.9949872424989441, "learning_rate": 9.80651514983086e-06, "loss": 0.5313, "step": 1736 }, { "epoch": 0.09373482272948033, "grad_norm": 1.268630327038093, "learning_rate": 9.80629379042732e-06, "loss": 0.6811, "step": 1737 }, { "epoch": 0.09378878635799472, "grad_norm": 0.9852084071710385, "learning_rate": 9.806072307256544e-06, "loss": 0.4773, "step": 1738 }, { "epoch": 0.0938427499865091, "grad_norm": 1.3832159594107338, "learning_rate": 9.8058507003249e-06, "loss": 0.5529, "step": 1739 }, { "epoch": 0.09389671361502347, "grad_norm": 1.1398019688137881, "learning_rate": 9.805628969638757e-06, "loss": 0.4677, "step": 1740 }, { "epoch": 0.09395067724353785, "grad_norm": 1.2312960713727539, "learning_rate": 9.805407115204485e-06, "loss": 0.6004, "step": 1741 }, { "epoch": 0.09400464087205224, "grad_norm": 1.3078251924291642, "learning_rate": 9.805185137028462e-06, "loss": 0.5637, "step": 1742 }, { "epoch": 0.09405860450056662, "grad_norm": 1.273966544786765, "learning_rate": 9.804963035117068e-06, "loss": 0.652, "step": 1743 }, { "epoch": 0.094112568129081, "grad_norm": 1.1369745280783805, "learning_rate": 9.804740809476688e-06, "loss": 0.6, "step": 1744 }, { "epoch": 0.09416653175759539, "grad_norm": 0.8609721359305004, "learning_rate": 9.804518460113706e-06, "loss": 0.4077, "step": 1745 }, { "epoch": 0.09422049538610976, "grad_norm": 1.139629018203808, "learning_rate": 9.804295987034515e-06, "loss": 0.5478, "step": 1746 }, { "epoch": 0.09427445901462414, "grad_norm": 0.9120199284557523, "learning_rate": 9.804073390245508e-06, "loss": 0.4141, "step": 1747 }, { "epoch": 0.09432842264313852, "grad_norm": 0.9657490366756944, "learning_rate": 9.803850669753083e-06, "loss": 0.413, "step": 1748 }, { "epoch": 0.09438238627165291, "grad_norm": 1.0892512503475946, "learning_rate": 9.803627825563641e-06, "loss": 0.5968, "step": 1749 }, { "epoch": 0.09443634990016729, "grad_norm": 1.3626845502378782, "learning_rate": 9.803404857683588e-06, "loss": 0.516, "step": 1750 }, { "epoch": 0.09449031352868167, "grad_norm": 1.1209065797502615, "learning_rate": 9.80318176611933e-06, "loss": 0.5044, "step": 1751 }, { "epoch": 0.09454427715719604, "grad_norm": 1.2686221369868365, "learning_rate": 9.80295855087728e-06, "loss": 0.7517, "step": 1752 }, { "epoch": 0.09459824078571043, "grad_norm": 1.3084350909515148, "learning_rate": 9.802735211963853e-06, "loss": 0.6232, "step": 1753 }, { "epoch": 0.09465220441422481, "grad_norm": 1.261863390994182, "learning_rate": 9.802511749385468e-06, "loss": 0.572, "step": 1754 }, { "epoch": 0.0947061680427392, "grad_norm": 0.9056923726438948, "learning_rate": 9.80228816314855e-06, "loss": 0.4114, "step": 1755 }, { "epoch": 0.09476013167125358, "grad_norm": 0.9738461539492286, "learning_rate": 9.802064453259521e-06, "loss": 0.3676, "step": 1756 }, { "epoch": 0.09481409529976796, "grad_norm": 1.1412149410123702, "learning_rate": 9.801840619724813e-06, "loss": 0.5079, "step": 1757 }, { "epoch": 0.09486805892828233, "grad_norm": 1.1537521167338853, "learning_rate": 9.80161666255086e-06, "loss": 0.5674, "step": 1758 }, { "epoch": 0.09492202255679671, "grad_norm": 1.123774222484853, "learning_rate": 9.801392581744097e-06, "loss": 0.5131, "step": 1759 }, { "epoch": 0.0949759861853111, "grad_norm": 1.0388352692224978, "learning_rate": 9.801168377310965e-06, "loss": 0.5121, "step": 1760 }, { "epoch": 0.09502994981382548, "grad_norm": 0.9909389700218928, "learning_rate": 9.800944049257908e-06, "loss": 0.4373, "step": 1761 }, { "epoch": 0.09508391344233987, "grad_norm": 0.9891729319816386, "learning_rate": 9.800719597591371e-06, "loss": 0.519, "step": 1762 }, { "epoch": 0.09513787707085425, "grad_norm": 1.0633666047469092, "learning_rate": 9.80049502231781e-06, "loss": 0.499, "step": 1763 }, { "epoch": 0.09519184069936863, "grad_norm": 1.0431196568476266, "learning_rate": 9.800270323443678e-06, "loss": 0.4682, "step": 1764 }, { "epoch": 0.095245804327883, "grad_norm": 1.4035536849589416, "learning_rate": 9.800045500975428e-06, "loss": 0.688, "step": 1765 }, { "epoch": 0.09529976795639739, "grad_norm": 0.9686498850327151, "learning_rate": 9.799820554919528e-06, "loss": 0.4384, "step": 1766 }, { "epoch": 0.09535373158491177, "grad_norm": 0.9894101258175626, "learning_rate": 9.79959548528244e-06, "loss": 0.4117, "step": 1767 }, { "epoch": 0.09540769521342615, "grad_norm": 1.3496947995128765, "learning_rate": 9.799370292070634e-06, "loss": 0.5401, "step": 1768 }, { "epoch": 0.09546165884194054, "grad_norm": 0.9816714609340523, "learning_rate": 9.799144975290579e-06, "loss": 0.4971, "step": 1769 }, { "epoch": 0.09551562247045492, "grad_norm": 1.2409106331323252, "learning_rate": 9.798919534948756e-06, "loss": 0.5532, "step": 1770 }, { "epoch": 0.09556958609896929, "grad_norm": 1.0021526498451414, "learning_rate": 9.798693971051641e-06, "loss": 0.4412, "step": 1771 }, { "epoch": 0.09562354972748367, "grad_norm": 1.0864988722263433, "learning_rate": 9.798468283605716e-06, "loss": 0.4165, "step": 1772 }, { "epoch": 0.09567751335599806, "grad_norm": 1.0066569326563153, "learning_rate": 9.798242472617471e-06, "loss": 0.4913, "step": 1773 }, { "epoch": 0.09573147698451244, "grad_norm": 1.2266450087530774, "learning_rate": 9.798016538093396e-06, "loss": 0.562, "step": 1774 }, { "epoch": 0.09578544061302682, "grad_norm": 1.237983520389527, "learning_rate": 9.79779048003998e-06, "loss": 0.651, "step": 1775 }, { "epoch": 0.0958394042415412, "grad_norm": 1.299106086132227, "learning_rate": 9.797564298463725e-06, "loss": 0.5381, "step": 1776 }, { "epoch": 0.09589336787005558, "grad_norm": 1.0839563005024957, "learning_rate": 9.797337993371128e-06, "loss": 0.517, "step": 1777 }, { "epoch": 0.09594733149856996, "grad_norm": 1.1913820795935592, "learning_rate": 9.797111564768696e-06, "loss": 0.453, "step": 1778 }, { "epoch": 0.09600129512708434, "grad_norm": 1.1079006742861335, "learning_rate": 9.796885012662936e-06, "loss": 0.5147, "step": 1779 }, { "epoch": 0.09605525875559873, "grad_norm": 1.129216268516609, "learning_rate": 9.796658337060357e-06, "loss": 0.4553, "step": 1780 }, { "epoch": 0.09610922238411311, "grad_norm": 1.0722575770751104, "learning_rate": 9.796431537967477e-06, "loss": 0.5327, "step": 1781 }, { "epoch": 0.0961631860126275, "grad_norm": 1.1813500952193647, "learning_rate": 9.796204615390814e-06, "loss": 0.6856, "step": 1782 }, { "epoch": 0.09621714964114186, "grad_norm": 1.0913983396043994, "learning_rate": 9.795977569336891e-06, "loss": 0.4578, "step": 1783 }, { "epoch": 0.09627111326965625, "grad_norm": 1.301097327416357, "learning_rate": 9.79575039981223e-06, "loss": 0.6079, "step": 1784 }, { "epoch": 0.09632507689817063, "grad_norm": 1.0548437808049047, "learning_rate": 9.79552310682336e-06, "loss": 0.4871, "step": 1785 }, { "epoch": 0.09637904052668501, "grad_norm": 1.0807952374555794, "learning_rate": 9.795295690376818e-06, "loss": 0.4061, "step": 1786 }, { "epoch": 0.0964330041551994, "grad_norm": 0.9448667901122797, "learning_rate": 9.79506815047914e-06, "loss": 0.3571, "step": 1787 }, { "epoch": 0.09648696778371378, "grad_norm": 0.9529909949888399, "learning_rate": 9.794840487136858e-06, "loss": 0.4449, "step": 1788 }, { "epoch": 0.09654093141222816, "grad_norm": 1.1814576992176162, "learning_rate": 9.794612700356525e-06, "loss": 0.4849, "step": 1789 }, { "epoch": 0.09659489504074253, "grad_norm": 0.9458414707473647, "learning_rate": 9.794384790144682e-06, "loss": 0.4459, "step": 1790 }, { "epoch": 0.09664885866925692, "grad_norm": 1.062069498421552, "learning_rate": 9.794156756507881e-06, "loss": 0.5639, "step": 1791 }, { "epoch": 0.0967028222977713, "grad_norm": 1.123282884208523, "learning_rate": 9.793928599452675e-06, "loss": 0.71, "step": 1792 }, { "epoch": 0.09675678592628568, "grad_norm": 1.217075319920651, "learning_rate": 9.793700318985624e-06, "loss": 0.5692, "step": 1793 }, { "epoch": 0.09681074955480007, "grad_norm": 1.35123143598289, "learning_rate": 9.793471915113287e-06, "loss": 0.7978, "step": 1794 }, { "epoch": 0.09686471318331445, "grad_norm": 1.019110242222147, "learning_rate": 9.79324338784223e-06, "loss": 0.5335, "step": 1795 }, { "epoch": 0.09691867681182882, "grad_norm": 1.0913769935948132, "learning_rate": 9.79301473717902e-06, "loss": 0.564, "step": 1796 }, { "epoch": 0.0969726404403432, "grad_norm": 1.1513368941302295, "learning_rate": 9.792785963130225e-06, "loss": 0.5794, "step": 1797 }, { "epoch": 0.09702660406885759, "grad_norm": 1.0586796465639843, "learning_rate": 9.792557065702427e-06, "loss": 0.4919, "step": 1798 }, { "epoch": 0.09708056769737197, "grad_norm": 1.0502276852068373, "learning_rate": 9.792328044902202e-06, "loss": 0.4535, "step": 1799 }, { "epoch": 0.09713453132588636, "grad_norm": 1.063088844539688, "learning_rate": 9.79209890073613e-06, "loss": 0.5, "step": 1800 }, { "epoch": 0.09718849495440074, "grad_norm": 0.9636081820731449, "learning_rate": 9.791869633210798e-06, "loss": 0.4989, "step": 1801 }, { "epoch": 0.09724245858291511, "grad_norm": 1.1209434214124236, "learning_rate": 9.7916402423328e-06, "loss": 0.4977, "step": 1802 }, { "epoch": 0.09729642221142949, "grad_norm": 1.1209376618342108, "learning_rate": 9.791410728108722e-06, "loss": 0.5485, "step": 1803 }, { "epoch": 0.09735038583994388, "grad_norm": 1.0658658566139085, "learning_rate": 9.791181090545164e-06, "loss": 0.5307, "step": 1804 }, { "epoch": 0.09740434946845826, "grad_norm": 0.9498123257113005, "learning_rate": 9.790951329648725e-06, "loss": 0.4237, "step": 1805 }, { "epoch": 0.09745831309697264, "grad_norm": 1.444550404742815, "learning_rate": 9.790721445426011e-06, "loss": 0.6454, "step": 1806 }, { "epoch": 0.09751227672548703, "grad_norm": 1.101732025832489, "learning_rate": 9.790491437883625e-06, "loss": 0.5342, "step": 1807 }, { "epoch": 0.0975662403540014, "grad_norm": 1.1787976384381782, "learning_rate": 9.790261307028183e-06, "loss": 0.5147, "step": 1808 }, { "epoch": 0.09762020398251578, "grad_norm": 0.951082357310187, "learning_rate": 9.790031052866293e-06, "loss": 0.5273, "step": 1809 }, { "epoch": 0.09767416761103016, "grad_norm": 0.9810628269929993, "learning_rate": 9.789800675404578e-06, "loss": 0.4761, "step": 1810 }, { "epoch": 0.09772813123954455, "grad_norm": 0.8623410505750264, "learning_rate": 9.789570174649655e-06, "loss": 0.4157, "step": 1811 }, { "epoch": 0.09778209486805893, "grad_norm": 0.9726672898715044, "learning_rate": 9.789339550608153e-06, "loss": 0.472, "step": 1812 }, { "epoch": 0.09783605849657331, "grad_norm": 0.9353190594260716, "learning_rate": 9.789108803286695e-06, "loss": 0.4506, "step": 1813 }, { "epoch": 0.0978900221250877, "grad_norm": 1.1331545689286646, "learning_rate": 9.78887793269192e-06, "loss": 0.6281, "step": 1814 }, { "epoch": 0.09794398575360207, "grad_norm": 1.2319173037194096, "learning_rate": 9.788646938830457e-06, "loss": 0.6923, "step": 1815 }, { "epoch": 0.09799794938211645, "grad_norm": 1.0209871537627047, "learning_rate": 9.78841582170895e-06, "loss": 0.392, "step": 1816 }, { "epoch": 0.09805191301063083, "grad_norm": 0.9867241653850197, "learning_rate": 9.788184581334036e-06, "loss": 0.496, "step": 1817 }, { "epoch": 0.09810587663914522, "grad_norm": 1.0244230279437918, "learning_rate": 9.787953217712367e-06, "loss": 0.4196, "step": 1818 }, { "epoch": 0.0981598402676596, "grad_norm": 1.0899912281690927, "learning_rate": 9.787721730850588e-06, "loss": 0.4899, "step": 1819 }, { "epoch": 0.09821380389617398, "grad_norm": 1.0335985348672776, "learning_rate": 9.787490120755355e-06, "loss": 0.4097, "step": 1820 }, { "epoch": 0.09826776752468835, "grad_norm": 1.2996907616305213, "learning_rate": 9.787258387433323e-06, "loss": 0.6559, "step": 1821 }, { "epoch": 0.09832173115320274, "grad_norm": 0.9206905533173715, "learning_rate": 9.787026530891153e-06, "loss": 0.4209, "step": 1822 }, { "epoch": 0.09837569478171712, "grad_norm": 1.0649630561016947, "learning_rate": 9.786794551135509e-06, "loss": 0.4777, "step": 1823 }, { "epoch": 0.0984296584102315, "grad_norm": 1.1307178076231366, "learning_rate": 9.786562448173059e-06, "loss": 0.6167, "step": 1824 }, { "epoch": 0.09848362203874589, "grad_norm": 1.0260615660667647, "learning_rate": 9.786330222010471e-06, "loss": 0.4616, "step": 1825 }, { "epoch": 0.09853758566726027, "grad_norm": 0.8048563765739971, "learning_rate": 9.786097872654423e-06, "loss": 0.4392, "step": 1826 }, { "epoch": 0.09859154929577464, "grad_norm": 1.3138791829106728, "learning_rate": 9.785865400111593e-06, "loss": 0.5068, "step": 1827 }, { "epoch": 0.09864551292428902, "grad_norm": 1.1264502419904723, "learning_rate": 9.785632804388658e-06, "loss": 0.4967, "step": 1828 }, { "epoch": 0.09869947655280341, "grad_norm": 0.9930704575281883, "learning_rate": 9.785400085492307e-06, "loss": 0.512, "step": 1829 }, { "epoch": 0.09875344018131779, "grad_norm": 1.2054757116428756, "learning_rate": 9.785167243429226e-06, "loss": 0.8699, "step": 1830 }, { "epoch": 0.09880740380983218, "grad_norm": 1.2464665362339837, "learning_rate": 9.78493427820611e-06, "loss": 0.5111, "step": 1831 }, { "epoch": 0.09886136743834656, "grad_norm": 0.995354033237861, "learning_rate": 9.784701189829654e-06, "loss": 0.3865, "step": 1832 }, { "epoch": 0.09891533106686094, "grad_norm": 0.9982282218056565, "learning_rate": 9.784467978306556e-06, "loss": 0.5704, "step": 1833 }, { "epoch": 0.09896929469537531, "grad_norm": 1.1208261196725686, "learning_rate": 9.784234643643518e-06, "loss": 0.6302, "step": 1834 }, { "epoch": 0.0990232583238897, "grad_norm": 0.9558653700993097, "learning_rate": 9.78400118584725e-06, "loss": 0.5035, "step": 1835 }, { "epoch": 0.09907722195240408, "grad_norm": 1.0012625298386546, "learning_rate": 9.783767604924455e-06, "loss": 0.6002, "step": 1836 }, { "epoch": 0.09913118558091846, "grad_norm": 1.6742097593036334, "learning_rate": 9.783533900881853e-06, "loss": 0.7644, "step": 1837 }, { "epoch": 0.09918514920943285, "grad_norm": 1.059317947976064, "learning_rate": 9.78330007372616e-06, "loss": 0.3876, "step": 1838 }, { "epoch": 0.09923911283794723, "grad_norm": 1.1821808255801756, "learning_rate": 9.783066123464091e-06, "loss": 0.5948, "step": 1839 }, { "epoch": 0.0992930764664616, "grad_norm": 1.238947540278007, "learning_rate": 9.782832050102378e-06, "loss": 0.6476, "step": 1840 }, { "epoch": 0.09934704009497598, "grad_norm": 0.9430134847310206, "learning_rate": 9.782597853647743e-06, "loss": 0.4044, "step": 1841 }, { "epoch": 0.09940100372349037, "grad_norm": 1.2301054647503713, "learning_rate": 9.782363534106917e-06, "loss": 0.6224, "step": 1842 }, { "epoch": 0.09945496735200475, "grad_norm": 1.178245806928214, "learning_rate": 9.782129091486638e-06, "loss": 0.5336, "step": 1843 }, { "epoch": 0.09950893098051913, "grad_norm": 1.0221590079033773, "learning_rate": 9.781894525793641e-06, "loss": 0.494, "step": 1844 }, { "epoch": 0.09956289460903352, "grad_norm": 1.1996440833919009, "learning_rate": 9.78165983703467e-06, "loss": 0.6876, "step": 1845 }, { "epoch": 0.09961685823754789, "grad_norm": 1.081663977146522, "learning_rate": 9.781425025216467e-06, "loss": 0.4538, "step": 1846 }, { "epoch": 0.09967082186606227, "grad_norm": 1.2547613743094457, "learning_rate": 9.781190090345784e-06, "loss": 0.6449, "step": 1847 }, { "epoch": 0.09972478549457665, "grad_norm": 1.087594038997108, "learning_rate": 9.780955032429372e-06, "loss": 0.5822, "step": 1848 }, { "epoch": 0.09977874912309104, "grad_norm": 1.1864787589616799, "learning_rate": 9.780719851473985e-06, "loss": 0.5884, "step": 1849 }, { "epoch": 0.09983271275160542, "grad_norm": 1.1043706902386765, "learning_rate": 9.780484547486388e-06, "loss": 0.5356, "step": 1850 }, { "epoch": 0.0998866763801198, "grad_norm": 1.1004756822246937, "learning_rate": 9.780249120473336e-06, "loss": 0.4644, "step": 1851 }, { "epoch": 0.09994064000863417, "grad_norm": 1.0306662951937564, "learning_rate": 9.780013570441601e-06, "loss": 0.5103, "step": 1852 }, { "epoch": 0.09999460363714856, "grad_norm": 1.172975462899251, "learning_rate": 9.779777897397951e-06, "loss": 0.6517, "step": 1853 }, { "epoch": 0.10004856726566294, "grad_norm": 1.2606032253971604, "learning_rate": 9.779542101349159e-06, "loss": 0.6306, "step": 1854 }, { "epoch": 0.10010253089417732, "grad_norm": 1.0079193323931124, "learning_rate": 9.779306182302007e-06, "loss": 0.4927, "step": 1855 }, { "epoch": 0.10015649452269171, "grad_norm": 1.1727876550411127, "learning_rate": 9.779070140263268e-06, "loss": 0.4622, "step": 1856 }, { "epoch": 0.10021045815120609, "grad_norm": 0.8888248268363738, "learning_rate": 9.77883397523973e-06, "loss": 0.4503, "step": 1857 }, { "epoch": 0.10026442177972048, "grad_norm": 1.0650611495111002, "learning_rate": 9.77859768723818e-06, "loss": 0.5226, "step": 1858 }, { "epoch": 0.10031838540823484, "grad_norm": 1.1361345648201215, "learning_rate": 9.77836127626541e-06, "loss": 0.4927, "step": 1859 }, { "epoch": 0.10037234903674923, "grad_norm": 0.9875975064934887, "learning_rate": 9.778124742328212e-06, "loss": 0.5089, "step": 1860 }, { "epoch": 0.10042631266526361, "grad_norm": 1.208188369414098, "learning_rate": 9.777888085433387e-06, "loss": 0.5662, "step": 1861 }, { "epoch": 0.100480276293778, "grad_norm": 0.9986593616226938, "learning_rate": 9.777651305587736e-06, "loss": 0.4445, "step": 1862 }, { "epoch": 0.10053423992229238, "grad_norm": 0.9914849452851272, "learning_rate": 9.777414402798065e-06, "loss": 0.4619, "step": 1863 }, { "epoch": 0.10058820355080676, "grad_norm": 1.1413348890840276, "learning_rate": 9.777177377071183e-06, "loss": 0.4622, "step": 1864 }, { "epoch": 0.10064216717932113, "grad_norm": 1.1994138704593589, "learning_rate": 9.776940228413899e-06, "loss": 0.5304, "step": 1865 }, { "epoch": 0.10069613080783552, "grad_norm": 1.0976185142941253, "learning_rate": 9.776702956833032e-06, "loss": 0.5702, "step": 1866 }, { "epoch": 0.1007500944363499, "grad_norm": 0.8346531393376027, "learning_rate": 9.7764655623354e-06, "loss": 0.3277, "step": 1867 }, { "epoch": 0.10080405806486428, "grad_norm": 1.121980705541515, "learning_rate": 9.776228044927828e-06, "loss": 0.6302, "step": 1868 }, { "epoch": 0.10085802169337867, "grad_norm": 1.2308518065437721, "learning_rate": 9.775990404617141e-06, "loss": 0.6476, "step": 1869 }, { "epoch": 0.10091198532189305, "grad_norm": 1.2447969601492699, "learning_rate": 9.775752641410169e-06, "loss": 0.6093, "step": 1870 }, { "epoch": 0.10096594895040742, "grad_norm": 0.9467125711371354, "learning_rate": 9.775514755313746e-06, "loss": 0.458, "step": 1871 }, { "epoch": 0.1010199125789218, "grad_norm": 1.208223856424701, "learning_rate": 9.775276746334708e-06, "loss": 0.5962, "step": 1872 }, { "epoch": 0.10107387620743619, "grad_norm": 0.9845535196052614, "learning_rate": 9.775038614479898e-06, "loss": 0.4954, "step": 1873 }, { "epoch": 0.10112783983595057, "grad_norm": 1.1984163519840867, "learning_rate": 9.774800359756158e-06, "loss": 0.6807, "step": 1874 }, { "epoch": 0.10118180346446495, "grad_norm": 1.126261603044235, "learning_rate": 9.774561982170337e-06, "loss": 0.6708, "step": 1875 }, { "epoch": 0.10123576709297934, "grad_norm": 1.4660249265947256, "learning_rate": 9.774323481729284e-06, "loss": 0.7612, "step": 1876 }, { "epoch": 0.1012897307214937, "grad_norm": 1.3514992175069376, "learning_rate": 9.774084858439857e-06, "loss": 0.7213, "step": 1877 }, { "epoch": 0.10134369435000809, "grad_norm": 1.165358703418103, "learning_rate": 9.77384611230891e-06, "loss": 0.5559, "step": 1878 }, { "epoch": 0.10139765797852247, "grad_norm": 0.9486946037174541, "learning_rate": 9.773607243343308e-06, "loss": 0.4351, "step": 1879 }, { "epoch": 0.10145162160703686, "grad_norm": 0.6860129533340775, "learning_rate": 9.773368251549918e-06, "loss": 0.2685, "step": 1880 }, { "epoch": 0.10150558523555124, "grad_norm": 1.0110395434502522, "learning_rate": 9.773129136935602e-06, "loss": 0.4514, "step": 1881 }, { "epoch": 0.10155954886406562, "grad_norm": 0.9933075677480542, "learning_rate": 9.772889899507242e-06, "loss": 0.5124, "step": 1882 }, { "epoch": 0.10161351249258001, "grad_norm": 1.237437058665746, "learning_rate": 9.772650539271705e-06, "loss": 0.4625, "step": 1883 }, { "epoch": 0.10166747612109438, "grad_norm": 0.9749338352717853, "learning_rate": 9.772411056235878e-06, "loss": 0.5221, "step": 1884 }, { "epoch": 0.10172143974960876, "grad_norm": 1.1970607217938785, "learning_rate": 9.772171450406637e-06, "loss": 0.6625, "step": 1885 }, { "epoch": 0.10177540337812314, "grad_norm": 1.169662906668592, "learning_rate": 9.771931721790874e-06, "loss": 0.5919, "step": 1886 }, { "epoch": 0.10182936700663753, "grad_norm": 1.2846094887357278, "learning_rate": 9.771691870395475e-06, "loss": 0.449, "step": 1887 }, { "epoch": 0.10188333063515191, "grad_norm": 1.0579844204100555, "learning_rate": 9.771451896227336e-06, "loss": 0.4532, "step": 1888 }, { "epoch": 0.1019372942636663, "grad_norm": 1.1597314171062565, "learning_rate": 9.771211799293354e-06, "loss": 0.5575, "step": 1889 }, { "epoch": 0.10199125789218066, "grad_norm": 0.9659196655786471, "learning_rate": 9.77097157960043e-06, "loss": 0.375, "step": 1890 }, { "epoch": 0.10204522152069505, "grad_norm": 0.8475788025608892, "learning_rate": 9.770731237155465e-06, "loss": 0.3533, "step": 1891 }, { "epoch": 0.10209918514920943, "grad_norm": 0.9958470031529646, "learning_rate": 9.77049077196537e-06, "loss": 0.4248, "step": 1892 }, { "epoch": 0.10215314877772382, "grad_norm": 1.1857783460869353, "learning_rate": 9.770250184037056e-06, "loss": 0.4717, "step": 1893 }, { "epoch": 0.1022071124062382, "grad_norm": 1.0591714113278927, "learning_rate": 9.770009473377437e-06, "loss": 0.5194, "step": 1894 }, { "epoch": 0.10226107603475258, "grad_norm": 1.122486941608562, "learning_rate": 9.76976863999343e-06, "loss": 0.5574, "step": 1895 }, { "epoch": 0.10231503966326695, "grad_norm": 1.2220364464661535, "learning_rate": 9.769527683891958e-06, "loss": 0.6314, "step": 1896 }, { "epoch": 0.10236900329178134, "grad_norm": 1.0533765017318564, "learning_rate": 9.769286605079947e-06, "loss": 0.5347, "step": 1897 }, { "epoch": 0.10242296692029572, "grad_norm": 1.2544918180915918, "learning_rate": 9.769045403564325e-06, "loss": 0.5127, "step": 1898 }, { "epoch": 0.1024769305488101, "grad_norm": 1.1509187331144999, "learning_rate": 9.768804079352025e-06, "loss": 0.5742, "step": 1899 }, { "epoch": 0.10253089417732449, "grad_norm": 1.218467989454776, "learning_rate": 9.768562632449982e-06, "loss": 0.6117, "step": 1900 }, { "epoch": 0.10258485780583887, "grad_norm": 0.9654140422632717, "learning_rate": 9.768321062865135e-06, "loss": 0.4095, "step": 1901 }, { "epoch": 0.10263882143435324, "grad_norm": 0.9060808909925768, "learning_rate": 9.768079370604431e-06, "loss": 0.457, "step": 1902 }, { "epoch": 0.10269278506286762, "grad_norm": 1.0915297243852264, "learning_rate": 9.76783755567481e-06, "loss": 0.5095, "step": 1903 }, { "epoch": 0.102746748691382, "grad_norm": 0.854957363160912, "learning_rate": 9.767595618083227e-06, "loss": 0.4444, "step": 1904 }, { "epoch": 0.10280071231989639, "grad_norm": 1.3057005954878795, "learning_rate": 9.767353557836633e-06, "loss": 0.7067, "step": 1905 }, { "epoch": 0.10285467594841077, "grad_norm": 1.2888731693275481, "learning_rate": 9.767111374941987e-06, "loss": 0.5779, "step": 1906 }, { "epoch": 0.10290863957692516, "grad_norm": 1.0462384817391857, "learning_rate": 9.766869069406245e-06, "loss": 0.5162, "step": 1907 }, { "epoch": 0.10296260320543954, "grad_norm": 0.868789715317468, "learning_rate": 9.766626641236379e-06, "loss": 0.3558, "step": 1908 }, { "epoch": 0.10301656683395391, "grad_norm": 1.0344716985952944, "learning_rate": 9.76638409043935e-06, "loss": 0.395, "step": 1909 }, { "epoch": 0.1030705304624683, "grad_norm": 1.1302954093544293, "learning_rate": 9.766141417022131e-06, "loss": 0.5591, "step": 1910 }, { "epoch": 0.10312449409098268, "grad_norm": 1.1115187130104567, "learning_rate": 9.765898620991699e-06, "loss": 0.5296, "step": 1911 }, { "epoch": 0.10317845771949706, "grad_norm": 1.0707296622789542, "learning_rate": 9.765655702355028e-06, "loss": 0.5228, "step": 1912 }, { "epoch": 0.10323242134801144, "grad_norm": 1.135206198426863, "learning_rate": 9.765412661119103e-06, "loss": 0.5881, "step": 1913 }, { "epoch": 0.10328638497652583, "grad_norm": 1.0137421601106908, "learning_rate": 9.765169497290908e-06, "loss": 0.4427, "step": 1914 }, { "epoch": 0.1033403486050402, "grad_norm": 1.041405489898531, "learning_rate": 9.764926210877434e-06, "loss": 0.5667, "step": 1915 }, { "epoch": 0.10339431223355458, "grad_norm": 0.8015744955615476, "learning_rate": 9.76468280188567e-06, "loss": 0.4133, "step": 1916 }, { "epoch": 0.10344827586206896, "grad_norm": 1.2761945552458638, "learning_rate": 9.764439270322612e-06, "loss": 0.5874, "step": 1917 }, { "epoch": 0.10350223949058335, "grad_norm": 0.8768767769631532, "learning_rate": 9.764195616195261e-06, "loss": 0.5022, "step": 1918 }, { "epoch": 0.10355620311909773, "grad_norm": 1.1827522028036292, "learning_rate": 9.76395183951062e-06, "loss": 0.6599, "step": 1919 }, { "epoch": 0.10361016674761211, "grad_norm": 1.0709601727285045, "learning_rate": 9.763707940275695e-06, "loss": 0.5559, "step": 1920 }, { "epoch": 0.10366413037612648, "grad_norm": 0.8464960943503373, "learning_rate": 9.763463918497494e-06, "loss": 0.4622, "step": 1921 }, { "epoch": 0.10371809400464087, "grad_norm": 1.0747608486689184, "learning_rate": 9.763219774183034e-06, "loss": 0.5414, "step": 1922 }, { "epoch": 0.10377205763315525, "grad_norm": 1.0538094625918362, "learning_rate": 9.762975507339328e-06, "loss": 0.4669, "step": 1923 }, { "epoch": 0.10382602126166963, "grad_norm": 1.0218935444697155, "learning_rate": 9.762731117973399e-06, "loss": 0.4382, "step": 1924 }, { "epoch": 0.10387998489018402, "grad_norm": 1.2765613911370342, "learning_rate": 9.76248660609227e-06, "loss": 0.6027, "step": 1925 }, { "epoch": 0.1039339485186984, "grad_norm": 1.1135633334890616, "learning_rate": 9.76224197170297e-06, "loss": 0.4015, "step": 1926 }, { "epoch": 0.10398791214721279, "grad_norm": 1.188900621126394, "learning_rate": 9.76199721481253e-06, "loss": 0.6685, "step": 1927 }, { "epoch": 0.10404187577572716, "grad_norm": 1.3723524816982617, "learning_rate": 9.761752335427984e-06, "loss": 0.6583, "step": 1928 }, { "epoch": 0.10409583940424154, "grad_norm": 1.051500901366465, "learning_rate": 9.761507333556368e-06, "loss": 0.5768, "step": 1929 }, { "epoch": 0.10414980303275592, "grad_norm": 1.2811555752558006, "learning_rate": 9.761262209204724e-06, "loss": 0.6703, "step": 1930 }, { "epoch": 0.1042037666612703, "grad_norm": 1.2065967281972392, "learning_rate": 9.7610169623801e-06, "loss": 0.79, "step": 1931 }, { "epoch": 0.10425773028978469, "grad_norm": 1.0255104229522647, "learning_rate": 9.760771593089544e-06, "loss": 0.4396, "step": 1932 }, { "epoch": 0.10431169391829907, "grad_norm": 0.9753122444393113, "learning_rate": 9.760526101340106e-06, "loss": 0.3077, "step": 1933 }, { "epoch": 0.10436565754681344, "grad_norm": 1.369651160929907, "learning_rate": 9.760280487138845e-06, "loss": 0.5699, "step": 1934 }, { "epoch": 0.10441962117532783, "grad_norm": 1.0337622269568871, "learning_rate": 9.760034750492815e-06, "loss": 0.4813, "step": 1935 }, { "epoch": 0.10447358480384221, "grad_norm": 1.0734827658009616, "learning_rate": 9.759788891409083e-06, "loss": 0.5315, "step": 1936 }, { "epoch": 0.10452754843235659, "grad_norm": 0.9120105406121684, "learning_rate": 9.759542909894717e-06, "loss": 0.4425, "step": 1937 }, { "epoch": 0.10458151206087098, "grad_norm": 1.1344171256380333, "learning_rate": 9.759296805956778e-06, "loss": 0.5243, "step": 1938 }, { "epoch": 0.10463547568938536, "grad_norm": 1.3691033689696215, "learning_rate": 9.75905057960235e-06, "loss": 0.6494, "step": 1939 }, { "epoch": 0.10468943931789973, "grad_norm": 1.0077312221045187, "learning_rate": 9.758804230838502e-06, "loss": 0.4312, "step": 1940 }, { "epoch": 0.10474340294641411, "grad_norm": 1.151571797197468, "learning_rate": 9.758557759672318e-06, "loss": 0.5536, "step": 1941 }, { "epoch": 0.1047973665749285, "grad_norm": 1.366696536457912, "learning_rate": 9.758311166110882e-06, "loss": 0.6511, "step": 1942 }, { "epoch": 0.10485133020344288, "grad_norm": 1.1269916955119221, "learning_rate": 9.758064450161278e-06, "loss": 0.4899, "step": 1943 }, { "epoch": 0.10490529383195726, "grad_norm": 0.8255439351284474, "learning_rate": 9.757817611830602e-06, "loss": 0.3823, "step": 1944 }, { "epoch": 0.10495925746047165, "grad_norm": 1.1549598155843972, "learning_rate": 9.757570651125945e-06, "loss": 0.5465, "step": 1945 }, { "epoch": 0.10501322108898602, "grad_norm": 1.2461128575807885, "learning_rate": 9.757323568054405e-06, "loss": 0.6161, "step": 1946 }, { "epoch": 0.1050671847175004, "grad_norm": 1.1585262251951964, "learning_rate": 9.757076362623083e-06, "loss": 0.5056, "step": 1947 }, { "epoch": 0.10512114834601478, "grad_norm": 1.3014714051098684, "learning_rate": 9.756829034839086e-06, "loss": 0.7011, "step": 1948 }, { "epoch": 0.10517511197452917, "grad_norm": 1.0012839196414296, "learning_rate": 9.75658158470952e-06, "loss": 0.6167, "step": 1949 }, { "epoch": 0.10522907560304355, "grad_norm": 1.1769901577350173, "learning_rate": 9.756334012241501e-06, "loss": 0.5975, "step": 1950 }, { "epoch": 0.10528303923155793, "grad_norm": 1.1265232575338653, "learning_rate": 9.756086317442141e-06, "loss": 0.4979, "step": 1951 }, { "epoch": 0.10533700286007232, "grad_norm": 1.3404783447215725, "learning_rate": 9.755838500318558e-06, "loss": 0.5928, "step": 1952 }, { "epoch": 0.10539096648858669, "grad_norm": 1.0660806744304923, "learning_rate": 9.755590560877878e-06, "loss": 0.4715, "step": 1953 }, { "epoch": 0.10544493011710107, "grad_norm": 1.3687208201551049, "learning_rate": 9.755342499127223e-06, "loss": 0.7336, "step": 1954 }, { "epoch": 0.10549889374561545, "grad_norm": 1.0976752123329738, "learning_rate": 9.755094315073728e-06, "loss": 0.5251, "step": 1955 }, { "epoch": 0.10555285737412984, "grad_norm": 1.0175485014124146, "learning_rate": 9.754846008724522e-06, "loss": 0.5214, "step": 1956 }, { "epoch": 0.10560682100264422, "grad_norm": 1.0757956040202814, "learning_rate": 9.754597580086741e-06, "loss": 0.6126, "step": 1957 }, { "epoch": 0.1056607846311586, "grad_norm": 1.3058368333925987, "learning_rate": 9.754349029167529e-06, "loss": 0.7183, "step": 1958 }, { "epoch": 0.10571474825967297, "grad_norm": 1.0926702304581917, "learning_rate": 9.754100355974024e-06, "loss": 0.5207, "step": 1959 }, { "epoch": 0.10576871188818736, "grad_norm": 1.2346418114399282, "learning_rate": 9.75385156051338e-06, "loss": 0.4418, "step": 1960 }, { "epoch": 0.10582267551670174, "grad_norm": 1.1853217431163343, "learning_rate": 9.753602642792739e-06, "loss": 0.6107, "step": 1961 }, { "epoch": 0.10587663914521613, "grad_norm": 1.0503762219815762, "learning_rate": 9.753353602819264e-06, "loss": 0.5852, "step": 1962 }, { "epoch": 0.10593060277373051, "grad_norm": 1.1059707897708044, "learning_rate": 9.753104440600107e-06, "loss": 0.4826, "step": 1963 }, { "epoch": 0.10598456640224489, "grad_norm": 1.0704021597905693, "learning_rate": 9.752855156142433e-06, "loss": 0.5172, "step": 1964 }, { "epoch": 0.10603853003075926, "grad_norm": 1.231331718989751, "learning_rate": 9.752605749453404e-06, "loss": 0.5151, "step": 1965 }, { "epoch": 0.10609249365927365, "grad_norm": 1.1303527860448364, "learning_rate": 9.752356220540188e-06, "loss": 0.5823, "step": 1966 }, { "epoch": 0.10614645728778803, "grad_norm": 0.9725785999061783, "learning_rate": 9.752106569409958e-06, "loss": 0.3771, "step": 1967 }, { "epoch": 0.10620042091630241, "grad_norm": 1.2282665862506168, "learning_rate": 9.751856796069887e-06, "loss": 0.507, "step": 1968 }, { "epoch": 0.1062543845448168, "grad_norm": 1.227974916413366, "learning_rate": 9.751606900527157e-06, "loss": 0.5988, "step": 1969 }, { "epoch": 0.10630834817333118, "grad_norm": 0.9658033899837036, "learning_rate": 9.75135688278895e-06, "loss": 0.4305, "step": 1970 }, { "epoch": 0.10636231180184555, "grad_norm": 0.9774777307316999, "learning_rate": 9.751106742862449e-06, "loss": 0.4212, "step": 1971 }, { "epoch": 0.10641627543035993, "grad_norm": 1.1506168800102283, "learning_rate": 9.750856480754845e-06, "loss": 0.6729, "step": 1972 }, { "epoch": 0.10647023905887432, "grad_norm": 1.0171715937156907, "learning_rate": 9.75060609647333e-06, "loss": 0.4997, "step": 1973 }, { "epoch": 0.1065242026873887, "grad_norm": 1.23918714731053, "learning_rate": 9.750355590025102e-06, "loss": 0.5971, "step": 1974 }, { "epoch": 0.10657816631590308, "grad_norm": 1.1331798037533234, "learning_rate": 9.75010496141736e-06, "loss": 0.6696, "step": 1975 }, { "epoch": 0.10663212994441747, "grad_norm": 0.862378184423155, "learning_rate": 9.749854210657304e-06, "loss": 0.4031, "step": 1976 }, { "epoch": 0.10668609357293185, "grad_norm": 1.3031839012483875, "learning_rate": 9.749603337752147e-06, "loss": 0.4922, "step": 1977 }, { "epoch": 0.10674005720144622, "grad_norm": 0.834720570968053, "learning_rate": 9.749352342709094e-06, "loss": 0.3966, "step": 1978 }, { "epoch": 0.1067940208299606, "grad_norm": 0.9881052096592996, "learning_rate": 9.749101225535364e-06, "loss": 0.4956, "step": 1979 }, { "epoch": 0.10684798445847499, "grad_norm": 1.1064283541599667, "learning_rate": 9.748849986238168e-06, "loss": 0.4747, "step": 1980 }, { "epoch": 0.10690194808698937, "grad_norm": 1.255985580021028, "learning_rate": 9.748598624824732e-06, "loss": 0.6021, "step": 1981 }, { "epoch": 0.10695591171550375, "grad_norm": 1.4403620330298401, "learning_rate": 9.748347141302278e-06, "loss": 0.8397, "step": 1982 }, { "epoch": 0.10700987534401814, "grad_norm": 1.0841775932535316, "learning_rate": 9.748095535678034e-06, "loss": 0.4786, "step": 1983 }, { "epoch": 0.10706383897253251, "grad_norm": 1.2428417221318295, "learning_rate": 9.747843807959234e-06, "loss": 0.5827, "step": 1984 }, { "epoch": 0.10711780260104689, "grad_norm": 0.971418060580997, "learning_rate": 9.74759195815311e-06, "loss": 0.4969, "step": 1985 }, { "epoch": 0.10717176622956127, "grad_norm": 1.304176071243054, "learning_rate": 9.7473399862669e-06, "loss": 0.6962, "step": 1986 }, { "epoch": 0.10722572985807566, "grad_norm": 1.2448690984846835, "learning_rate": 9.747087892307849e-06, "loss": 0.5538, "step": 1987 }, { "epoch": 0.10727969348659004, "grad_norm": 1.0846650476547828, "learning_rate": 9.746835676283198e-06, "loss": 0.5185, "step": 1988 }, { "epoch": 0.10733365711510442, "grad_norm": 1.100617112690641, "learning_rate": 9.746583338200202e-06, "loss": 0.6433, "step": 1989 }, { "epoch": 0.1073876207436188, "grad_norm": 0.8669020274209454, "learning_rate": 9.746330878066107e-06, "loss": 0.3392, "step": 1990 }, { "epoch": 0.10744158437213318, "grad_norm": 1.0775369463586923, "learning_rate": 9.746078295888173e-06, "loss": 0.5139, "step": 1991 }, { "epoch": 0.10749554800064756, "grad_norm": 0.8529980238729221, "learning_rate": 9.745825591673658e-06, "loss": 0.2807, "step": 1992 }, { "epoch": 0.10754951162916195, "grad_norm": 0.925742204592247, "learning_rate": 9.745572765429827e-06, "loss": 0.483, "step": 1993 }, { "epoch": 0.10760347525767633, "grad_norm": 1.0341754172552462, "learning_rate": 9.745319817163945e-06, "loss": 0.5378, "step": 1994 }, { "epoch": 0.10765743888619071, "grad_norm": 1.1535429085742888, "learning_rate": 9.74506674688328e-06, "loss": 0.4762, "step": 1995 }, { "epoch": 0.10771140251470508, "grad_norm": 1.09618826383207, "learning_rate": 9.744813554595108e-06, "loss": 0.5686, "step": 1996 }, { "epoch": 0.10776536614321947, "grad_norm": 1.2670251559217747, "learning_rate": 9.744560240306703e-06, "loss": 0.6233, "step": 1997 }, { "epoch": 0.10781932977173385, "grad_norm": 1.1273646205289356, "learning_rate": 9.744306804025351e-06, "loss": 0.5205, "step": 1998 }, { "epoch": 0.10787329340024823, "grad_norm": 1.2114931709774968, "learning_rate": 9.744053245758334e-06, "loss": 0.5368, "step": 1999 }, { "epoch": 0.10792725702876262, "grad_norm": 0.9709901278473052, "learning_rate": 9.743799565512937e-06, "loss": 0.3807, "step": 2000 }, { "epoch": 0.10792725702876262, "eval_loss": 0.6009196639060974, "eval_runtime": 165.0692, "eval_samples_per_second": 20.834, "eval_steps_per_second": 0.872, "step": 2000 }, { "epoch": 0.107981220657277, "grad_norm": 1.086327057488277, "learning_rate": 9.743545763296451e-06, "loss": 0.5048, "step": 2001 }, { "epoch": 0.10803518428579138, "grad_norm": 0.9618518192303469, "learning_rate": 9.743291839116173e-06, "loss": 0.4861, "step": 2002 }, { "epoch": 0.10808914791430575, "grad_norm": 1.0349094577675364, "learning_rate": 9.7430377929794e-06, "loss": 0.5862, "step": 2003 }, { "epoch": 0.10814311154282014, "grad_norm": 1.0807198401468234, "learning_rate": 9.742783624893433e-06, "loss": 0.5229, "step": 2004 }, { "epoch": 0.10819707517133452, "grad_norm": 1.1473711659554733, "learning_rate": 9.74252933486558e-06, "loss": 0.4916, "step": 2005 }, { "epoch": 0.1082510387998489, "grad_norm": 1.1290787695001119, "learning_rate": 9.742274922903144e-06, "loss": 0.5355, "step": 2006 }, { "epoch": 0.10830500242836329, "grad_norm": 0.9942238419896946, "learning_rate": 9.742020389013443e-06, "loss": 0.3975, "step": 2007 }, { "epoch": 0.10835896605687767, "grad_norm": 0.7861395908615773, "learning_rate": 9.741765733203787e-06, "loss": 0.3258, "step": 2008 }, { "epoch": 0.10841292968539204, "grad_norm": 1.033835244951, "learning_rate": 9.7415109554815e-06, "loss": 0.4824, "step": 2009 }, { "epoch": 0.10846689331390642, "grad_norm": 1.0998800748785262, "learning_rate": 9.7412560558539e-06, "loss": 0.45, "step": 2010 }, { "epoch": 0.1085208569424208, "grad_norm": 1.0364260881550968, "learning_rate": 9.741001034328317e-06, "loss": 0.466, "step": 2011 }, { "epoch": 0.10857482057093519, "grad_norm": 1.558558225493801, "learning_rate": 9.74074589091208e-06, "loss": 0.6723, "step": 2012 }, { "epoch": 0.10862878419944957, "grad_norm": 1.0105016772140298, "learning_rate": 9.740490625612521e-06, "loss": 0.5403, "step": 2013 }, { "epoch": 0.10868274782796396, "grad_norm": 0.8278394022293022, "learning_rate": 9.740235238436975e-06, "loss": 0.4206, "step": 2014 }, { "epoch": 0.10873671145647833, "grad_norm": 1.1417752499540412, "learning_rate": 9.739979729392786e-06, "loss": 0.5724, "step": 2015 }, { "epoch": 0.10879067508499271, "grad_norm": 0.8586195525028376, "learning_rate": 9.739724098487294e-06, "loss": 0.4163, "step": 2016 }, { "epoch": 0.1088446387135071, "grad_norm": 1.1747285478241036, "learning_rate": 9.739468345727847e-06, "loss": 0.5618, "step": 2017 }, { "epoch": 0.10889860234202148, "grad_norm": 1.2407179989122474, "learning_rate": 9.739212471121796e-06, "loss": 0.603, "step": 2018 }, { "epoch": 0.10895256597053586, "grad_norm": 1.275554684918799, "learning_rate": 9.738956474676497e-06, "loss": 0.6539, "step": 2019 }, { "epoch": 0.10900652959905024, "grad_norm": 1.280343135592382, "learning_rate": 9.738700356399305e-06, "loss": 0.5992, "step": 2020 }, { "epoch": 0.10906049322756463, "grad_norm": 1.019487658172093, "learning_rate": 9.738444116297581e-06, "loss": 0.4199, "step": 2021 }, { "epoch": 0.109114456856079, "grad_norm": 0.9937539966186173, "learning_rate": 9.738187754378692e-06, "loss": 0.4821, "step": 2022 }, { "epoch": 0.10916842048459338, "grad_norm": 0.7000724513408018, "learning_rate": 9.737931270650002e-06, "loss": 0.2971, "step": 2023 }, { "epoch": 0.10922238411310776, "grad_norm": 1.4876554142497398, "learning_rate": 9.737674665118887e-06, "loss": 0.7589, "step": 2024 }, { "epoch": 0.10927634774162215, "grad_norm": 0.9254853080874313, "learning_rate": 9.73741793779272e-06, "loss": 0.3833, "step": 2025 }, { "epoch": 0.10933031137013653, "grad_norm": 0.9243946785957957, "learning_rate": 9.73716108867888e-06, "loss": 0.3512, "step": 2026 }, { "epoch": 0.10938427499865092, "grad_norm": 1.071068732509766, "learning_rate": 9.736904117784748e-06, "loss": 0.5512, "step": 2027 }, { "epoch": 0.10943823862716529, "grad_norm": 1.2049337134569917, "learning_rate": 9.736647025117711e-06, "loss": 0.7301, "step": 2028 }, { "epoch": 0.10949220225567967, "grad_norm": 1.3019665514348748, "learning_rate": 9.736389810685158e-06, "loss": 0.6059, "step": 2029 }, { "epoch": 0.10954616588419405, "grad_norm": 1.0494434705119964, "learning_rate": 9.736132474494481e-06, "loss": 0.4897, "step": 2030 }, { "epoch": 0.10960012951270844, "grad_norm": 1.0506933083696381, "learning_rate": 9.735875016553078e-06, "loss": 0.5215, "step": 2031 }, { "epoch": 0.10965409314122282, "grad_norm": 1.110877362099513, "learning_rate": 9.735617436868346e-06, "loss": 0.5188, "step": 2032 }, { "epoch": 0.1097080567697372, "grad_norm": 1.1617723752266298, "learning_rate": 9.735359735447688e-06, "loss": 0.4617, "step": 2033 }, { "epoch": 0.10976202039825157, "grad_norm": 1.2478676094178631, "learning_rate": 9.735101912298513e-06, "loss": 0.7055, "step": 2034 }, { "epoch": 0.10981598402676596, "grad_norm": 0.9557792821728467, "learning_rate": 9.73484396742823e-06, "loss": 0.3877, "step": 2035 }, { "epoch": 0.10986994765528034, "grad_norm": 1.1573665753897102, "learning_rate": 9.734585900844253e-06, "loss": 0.4788, "step": 2036 }, { "epoch": 0.10992391128379472, "grad_norm": 1.0370546997934993, "learning_rate": 9.734327712553995e-06, "loss": 0.573, "step": 2037 }, { "epoch": 0.1099778749123091, "grad_norm": 0.980348856038039, "learning_rate": 9.734069402564882e-06, "loss": 0.4304, "step": 2038 }, { "epoch": 0.11003183854082349, "grad_norm": 1.1168487005494403, "learning_rate": 9.733810970884337e-06, "loss": 0.6007, "step": 2039 }, { "epoch": 0.11008580216933786, "grad_norm": 0.8303364341660308, "learning_rate": 9.733552417519787e-06, "loss": 0.3367, "step": 2040 }, { "epoch": 0.11013976579785224, "grad_norm": 1.0739726817212272, "learning_rate": 9.733293742478661e-06, "loss": 0.3936, "step": 2041 }, { "epoch": 0.11019372942636663, "grad_norm": 1.1208176391013152, "learning_rate": 9.733034945768396e-06, "loss": 0.4646, "step": 2042 }, { "epoch": 0.11024769305488101, "grad_norm": 0.9835334674808709, "learning_rate": 9.73277602739643e-06, "loss": 0.4459, "step": 2043 }, { "epoch": 0.1103016566833954, "grad_norm": 0.8932043869611919, "learning_rate": 9.732516987370204e-06, "loss": 0.4296, "step": 2044 }, { "epoch": 0.11035562031190978, "grad_norm": 1.1156003941281467, "learning_rate": 9.732257825697164e-06, "loss": 0.5599, "step": 2045 }, { "epoch": 0.11040958394042416, "grad_norm": 1.2569202916685973, "learning_rate": 9.731998542384755e-06, "loss": 0.6855, "step": 2046 }, { "epoch": 0.11046354756893853, "grad_norm": 1.0683697151603757, "learning_rate": 9.731739137440435e-06, "loss": 0.4774, "step": 2047 }, { "epoch": 0.11051751119745291, "grad_norm": 1.1041546828753652, "learning_rate": 9.731479610871653e-06, "loss": 0.6182, "step": 2048 }, { "epoch": 0.1105714748259673, "grad_norm": 1.0594516865997277, "learning_rate": 9.731219962685875e-06, "loss": 0.4786, "step": 2049 }, { "epoch": 0.11062543845448168, "grad_norm": 0.9481132721538373, "learning_rate": 9.730960192890557e-06, "loss": 0.4332, "step": 2050 }, { "epoch": 0.11067940208299606, "grad_norm": 1.2386574979502512, "learning_rate": 9.73070030149317e-06, "loss": 0.6605, "step": 2051 }, { "epoch": 0.11073336571151045, "grad_norm": 1.1749002189703048, "learning_rate": 9.73044028850118e-06, "loss": 0.74, "step": 2052 }, { "epoch": 0.11078732934002482, "grad_norm": 1.0390648996291156, "learning_rate": 9.730180153922064e-06, "loss": 0.4984, "step": 2053 }, { "epoch": 0.1108412929685392, "grad_norm": 1.2089202836836257, "learning_rate": 9.729919897763292e-06, "loss": 0.6758, "step": 2054 }, { "epoch": 0.11089525659705358, "grad_norm": 1.0501131657131468, "learning_rate": 9.729659520032351e-06, "loss": 0.4171, "step": 2055 }, { "epoch": 0.11094922022556797, "grad_norm": 0.8490220409725224, "learning_rate": 9.729399020736721e-06, "loss": 0.3382, "step": 2056 }, { "epoch": 0.11100318385408235, "grad_norm": 1.2036475796245694, "learning_rate": 9.729138399883889e-06, "loss": 0.5028, "step": 2057 }, { "epoch": 0.11105714748259674, "grad_norm": 0.9829412932223817, "learning_rate": 9.728877657481348e-06, "loss": 0.4841, "step": 2058 }, { "epoch": 0.1111111111111111, "grad_norm": 1.2777568068937704, "learning_rate": 9.728616793536588e-06, "loss": 0.6849, "step": 2059 }, { "epoch": 0.11116507473962549, "grad_norm": 1.172701257470385, "learning_rate": 9.72835580805711e-06, "loss": 0.4159, "step": 2060 }, { "epoch": 0.11121903836813987, "grad_norm": 1.0582032856621537, "learning_rate": 9.728094701050413e-06, "loss": 0.4911, "step": 2061 }, { "epoch": 0.11127300199665426, "grad_norm": 1.3071440638814404, "learning_rate": 9.727833472524003e-06, "loss": 0.6866, "step": 2062 }, { "epoch": 0.11132696562516864, "grad_norm": 1.0590361342197327, "learning_rate": 9.727572122485386e-06, "loss": 0.497, "step": 2063 }, { "epoch": 0.11138092925368302, "grad_norm": 1.4589507959826196, "learning_rate": 9.727310650942075e-06, "loss": 0.6057, "step": 2064 }, { "epoch": 0.11143489288219739, "grad_norm": 1.0155104005182347, "learning_rate": 9.727049057901586e-06, "loss": 0.4684, "step": 2065 }, { "epoch": 0.11148885651071178, "grad_norm": 1.0711181466199533, "learning_rate": 9.726787343371433e-06, "loss": 0.5111, "step": 2066 }, { "epoch": 0.11154282013922616, "grad_norm": 1.0975477574969559, "learning_rate": 9.726525507359143e-06, "loss": 0.5015, "step": 2067 }, { "epoch": 0.11159678376774054, "grad_norm": 1.0752889361973546, "learning_rate": 9.726263549872238e-06, "loss": 0.4686, "step": 2068 }, { "epoch": 0.11165074739625493, "grad_norm": 1.1687453568198447, "learning_rate": 9.726001470918249e-06, "loss": 0.4929, "step": 2069 }, { "epoch": 0.11170471102476931, "grad_norm": 1.3647900262835757, "learning_rate": 9.725739270504707e-06, "loss": 0.7172, "step": 2070 }, { "epoch": 0.1117586746532837, "grad_norm": 1.0630016687011967, "learning_rate": 9.725476948639152e-06, "loss": 0.6157, "step": 2071 }, { "epoch": 0.11181263828179806, "grad_norm": 1.0252247452586676, "learning_rate": 9.725214505329114e-06, "loss": 0.5517, "step": 2072 }, { "epoch": 0.11186660191031245, "grad_norm": 1.1792647687740614, "learning_rate": 9.724951940582146e-06, "loss": 0.5353, "step": 2073 }, { "epoch": 0.11192056553882683, "grad_norm": 1.0774365003462847, "learning_rate": 9.724689254405789e-06, "loss": 0.481, "step": 2074 }, { "epoch": 0.11197452916734121, "grad_norm": 1.124972102210081, "learning_rate": 9.724426446807595e-06, "loss": 0.57, "step": 2075 }, { "epoch": 0.1120284927958556, "grad_norm": 0.9853076749628312, "learning_rate": 9.724163517795116e-06, "loss": 0.4656, "step": 2076 }, { "epoch": 0.11208245642436998, "grad_norm": 0.8760299877697736, "learning_rate": 9.72390046737591e-06, "loss": 0.3396, "step": 2077 }, { "epoch": 0.11213642005288435, "grad_norm": 0.8879348514567834, "learning_rate": 9.723637295557535e-06, "loss": 0.445, "step": 2078 }, { "epoch": 0.11219038368139873, "grad_norm": 1.024961794933285, "learning_rate": 9.723374002347557e-06, "loss": 0.4248, "step": 2079 }, { "epoch": 0.11224434730991312, "grad_norm": 0.9135338012918697, "learning_rate": 9.723110587753544e-06, "loss": 0.4054, "step": 2080 }, { "epoch": 0.1122983109384275, "grad_norm": 1.019269099574995, "learning_rate": 9.722847051783062e-06, "loss": 0.3603, "step": 2081 }, { "epoch": 0.11235227456694188, "grad_norm": 1.2011374791691507, "learning_rate": 9.722583394443692e-06, "loss": 0.6786, "step": 2082 }, { "epoch": 0.11240623819545627, "grad_norm": 1.1884726055708354, "learning_rate": 9.722319615743009e-06, "loss": 0.5652, "step": 2083 }, { "epoch": 0.11246020182397064, "grad_norm": 1.0785529997291525, "learning_rate": 9.722055715688593e-06, "loss": 0.5207, "step": 2084 }, { "epoch": 0.11251416545248502, "grad_norm": 1.026048563046313, "learning_rate": 9.721791694288029e-06, "loss": 0.5358, "step": 2085 }, { "epoch": 0.1125681290809994, "grad_norm": 1.0613392585949002, "learning_rate": 9.721527551548908e-06, "loss": 0.5013, "step": 2086 }, { "epoch": 0.11262209270951379, "grad_norm": 1.245909574567919, "learning_rate": 9.721263287478817e-06, "loss": 0.5602, "step": 2087 }, { "epoch": 0.11267605633802817, "grad_norm": 1.0464217842666608, "learning_rate": 9.720998902085354e-06, "loss": 0.5457, "step": 2088 }, { "epoch": 0.11273001996654256, "grad_norm": 1.1439479559145347, "learning_rate": 9.72073439537612e-06, "loss": 0.4971, "step": 2089 }, { "epoch": 0.11278398359505692, "grad_norm": 1.182967193168024, "learning_rate": 9.720469767358712e-06, "loss": 0.5949, "step": 2090 }, { "epoch": 0.11283794722357131, "grad_norm": 1.154401640187526, "learning_rate": 9.720205018040739e-06, "loss": 0.6422, "step": 2091 }, { "epoch": 0.11289191085208569, "grad_norm": 1.119996773183585, "learning_rate": 9.71994014742981e-06, "loss": 0.5825, "step": 2092 }, { "epoch": 0.11294587448060008, "grad_norm": 1.0596990776268982, "learning_rate": 9.719675155533538e-06, "loss": 0.5041, "step": 2093 }, { "epoch": 0.11299983810911446, "grad_norm": 0.8844227592405739, "learning_rate": 9.719410042359538e-06, "loss": 0.3153, "step": 2094 }, { "epoch": 0.11305380173762884, "grad_norm": 1.010530722002409, "learning_rate": 9.71914480791543e-06, "loss": 0.5955, "step": 2095 }, { "epoch": 0.11310776536614323, "grad_norm": 1.2464321068180946, "learning_rate": 9.718879452208836e-06, "loss": 0.5403, "step": 2096 }, { "epoch": 0.1131617289946576, "grad_norm": 1.0799884153596904, "learning_rate": 9.718613975247385e-06, "loss": 0.6415, "step": 2097 }, { "epoch": 0.11321569262317198, "grad_norm": 1.0544805841691753, "learning_rate": 9.718348377038705e-06, "loss": 0.4599, "step": 2098 }, { "epoch": 0.11326965625168636, "grad_norm": 1.0942206196361584, "learning_rate": 9.71808265759043e-06, "loss": 0.615, "step": 2099 }, { "epoch": 0.11332361988020075, "grad_norm": 1.178566236604381, "learning_rate": 9.7178168169102e-06, "loss": 0.4576, "step": 2100 }, { "epoch": 0.11337758350871513, "grad_norm": 1.1328586071696003, "learning_rate": 9.71755085500565e-06, "loss": 0.468, "step": 2101 }, { "epoch": 0.11343154713722951, "grad_norm": 0.7794337763447532, "learning_rate": 9.717284771884427e-06, "loss": 0.4533, "step": 2102 }, { "epoch": 0.11348551076574388, "grad_norm": 1.0031295924330543, "learning_rate": 9.71701856755418e-06, "loss": 0.4203, "step": 2103 }, { "epoch": 0.11353947439425827, "grad_norm": 0.966836137429084, "learning_rate": 9.716752242022557e-06, "loss": 0.4889, "step": 2104 }, { "epoch": 0.11359343802277265, "grad_norm": 1.1146599231197085, "learning_rate": 9.716485795297213e-06, "loss": 0.4987, "step": 2105 }, { "epoch": 0.11364740165128703, "grad_norm": 1.1189504959809826, "learning_rate": 9.716219227385809e-06, "loss": 0.4945, "step": 2106 }, { "epoch": 0.11370136527980142, "grad_norm": 1.1303991596120908, "learning_rate": 9.715952538296004e-06, "loss": 0.6319, "step": 2107 }, { "epoch": 0.1137553289083158, "grad_norm": 0.911771599076188, "learning_rate": 9.715685728035462e-06, "loss": 0.4308, "step": 2108 }, { "epoch": 0.11380929253683017, "grad_norm": 1.0914598689834962, "learning_rate": 9.715418796611854e-06, "loss": 0.4169, "step": 2109 }, { "epoch": 0.11386325616534455, "grad_norm": 0.8205296786086751, "learning_rate": 9.715151744032849e-06, "loss": 0.3976, "step": 2110 }, { "epoch": 0.11391721979385894, "grad_norm": 1.2377533912758019, "learning_rate": 9.714884570306123e-06, "loss": 0.493, "step": 2111 }, { "epoch": 0.11397118342237332, "grad_norm": 1.1933932254179174, "learning_rate": 9.714617275439358e-06, "loss": 0.4986, "step": 2112 }, { "epoch": 0.1140251470508877, "grad_norm": 1.094748515201122, "learning_rate": 9.714349859440232e-06, "loss": 0.5111, "step": 2113 }, { "epoch": 0.11407911067940209, "grad_norm": 1.1550967078214478, "learning_rate": 9.714082322316434e-06, "loss": 0.5567, "step": 2114 }, { "epoch": 0.11413307430791647, "grad_norm": 1.0381901687122022, "learning_rate": 9.713814664075652e-06, "loss": 0.4062, "step": 2115 }, { "epoch": 0.11418703793643084, "grad_norm": 0.8926215387923336, "learning_rate": 9.713546884725578e-06, "loss": 0.4334, "step": 2116 }, { "epoch": 0.11424100156494522, "grad_norm": 1.121271114812557, "learning_rate": 9.71327898427391e-06, "loss": 0.6181, "step": 2117 }, { "epoch": 0.11429496519345961, "grad_norm": 1.1486350849731022, "learning_rate": 9.713010962728346e-06, "loss": 0.549, "step": 2118 }, { "epoch": 0.11434892882197399, "grad_norm": 1.0531303113515242, "learning_rate": 9.71274282009659e-06, "loss": 0.5114, "step": 2119 }, { "epoch": 0.11440289245048837, "grad_norm": 1.0070433807681014, "learning_rate": 9.712474556386349e-06, "loss": 0.468, "step": 2120 }, { "epoch": 0.11445685607900276, "grad_norm": 0.8818546760986574, "learning_rate": 9.712206171605333e-06, "loss": 0.4355, "step": 2121 }, { "epoch": 0.11451081970751713, "grad_norm": 0.9920858563948186, "learning_rate": 9.711937665761255e-06, "loss": 0.4312, "step": 2122 }, { "epoch": 0.11456478333603151, "grad_norm": 1.0192576805920608, "learning_rate": 9.711669038861833e-06, "loss": 0.5495, "step": 2123 }, { "epoch": 0.1146187469645459, "grad_norm": 0.9418997920985216, "learning_rate": 9.711400290914787e-06, "loss": 0.3956, "step": 2124 }, { "epoch": 0.11467271059306028, "grad_norm": 1.1400140711658797, "learning_rate": 9.71113142192784e-06, "loss": 0.6297, "step": 2125 }, { "epoch": 0.11472667422157466, "grad_norm": 0.955892471460207, "learning_rate": 9.710862431908723e-06, "loss": 0.425, "step": 2126 }, { "epoch": 0.11478063785008905, "grad_norm": 1.2328722045887308, "learning_rate": 9.710593320865165e-06, "loss": 0.7923, "step": 2127 }, { "epoch": 0.11483460147860342, "grad_norm": 1.2988420613874465, "learning_rate": 9.710324088804899e-06, "loss": 0.653, "step": 2128 }, { "epoch": 0.1148885651071178, "grad_norm": 1.094077090228835, "learning_rate": 9.710054735735665e-06, "loss": 0.6028, "step": 2129 }, { "epoch": 0.11494252873563218, "grad_norm": 1.2222961951220337, "learning_rate": 9.709785261665205e-06, "loss": 0.4907, "step": 2130 }, { "epoch": 0.11499649236414657, "grad_norm": 0.9443231244877854, "learning_rate": 9.709515666601262e-06, "loss": 0.4208, "step": 2131 }, { "epoch": 0.11505045599266095, "grad_norm": 1.0563208397163193, "learning_rate": 9.709245950551586e-06, "loss": 0.5139, "step": 2132 }, { "epoch": 0.11510441962117533, "grad_norm": 0.928467590525105, "learning_rate": 9.708976113523929e-06, "loss": 0.4348, "step": 2133 }, { "epoch": 0.1151583832496897, "grad_norm": 1.0240504443384908, "learning_rate": 9.708706155526042e-06, "loss": 0.4389, "step": 2134 }, { "epoch": 0.11521234687820409, "grad_norm": 1.1788583005461306, "learning_rate": 9.70843607656569e-06, "loss": 0.4491, "step": 2135 }, { "epoch": 0.11526631050671847, "grad_norm": 1.0255206433579533, "learning_rate": 9.708165876650634e-06, "loss": 0.5015, "step": 2136 }, { "epoch": 0.11532027413523285, "grad_norm": 1.100632821414947, "learning_rate": 9.707895555788638e-06, "loss": 0.6681, "step": 2137 }, { "epoch": 0.11537423776374724, "grad_norm": 1.2760326420241366, "learning_rate": 9.707625113987468e-06, "loss": 0.7037, "step": 2138 }, { "epoch": 0.11542820139226162, "grad_norm": 1.184046177513834, "learning_rate": 9.707354551254906e-06, "loss": 0.5489, "step": 2139 }, { "epoch": 0.115482165020776, "grad_norm": 0.941891360003267, "learning_rate": 9.707083867598722e-06, "loss": 0.4095, "step": 2140 }, { "epoch": 0.11553612864929037, "grad_norm": 0.9592418910260238, "learning_rate": 9.706813063026694e-06, "loss": 0.5687, "step": 2141 }, { "epoch": 0.11559009227780476, "grad_norm": 1.216384524866234, "learning_rate": 9.70654213754661e-06, "loss": 0.6112, "step": 2142 }, { "epoch": 0.11564405590631914, "grad_norm": 1.1271086372560108, "learning_rate": 9.706271091166254e-06, "loss": 0.4728, "step": 2143 }, { "epoch": 0.11569801953483352, "grad_norm": 1.035078179931676, "learning_rate": 9.705999923893417e-06, "loss": 0.558, "step": 2144 }, { "epoch": 0.11575198316334791, "grad_norm": 1.1459829936066768, "learning_rate": 9.705728635735892e-06, "loss": 0.5734, "step": 2145 }, { "epoch": 0.11580594679186229, "grad_norm": 1.025290354986629, "learning_rate": 9.705457226701476e-06, "loss": 0.4987, "step": 2146 }, { "epoch": 0.11585991042037666, "grad_norm": 1.1494116639809646, "learning_rate": 9.705185696797972e-06, "loss": 0.5385, "step": 2147 }, { "epoch": 0.11591387404889104, "grad_norm": 0.9120287383364258, "learning_rate": 9.70491404603318e-06, "loss": 0.6142, "step": 2148 }, { "epoch": 0.11596783767740543, "grad_norm": 1.0465135255117215, "learning_rate": 9.70464227441491e-06, "loss": 0.4155, "step": 2149 }, { "epoch": 0.11602180130591981, "grad_norm": 1.0130069439592695, "learning_rate": 9.704370381950974e-06, "loss": 0.4597, "step": 2150 }, { "epoch": 0.1160757649344342, "grad_norm": 1.1512564818300701, "learning_rate": 9.704098368649181e-06, "loss": 0.6123, "step": 2151 }, { "epoch": 0.11612972856294858, "grad_norm": 1.2423992686953171, "learning_rate": 9.703826234517358e-06, "loss": 0.5082, "step": 2152 }, { "epoch": 0.11618369219146295, "grad_norm": 0.8925834830733531, "learning_rate": 9.70355397956332e-06, "loss": 0.4523, "step": 2153 }, { "epoch": 0.11623765581997733, "grad_norm": 0.967594413798547, "learning_rate": 9.70328160379489e-06, "loss": 0.443, "step": 2154 }, { "epoch": 0.11629161944849171, "grad_norm": 0.8453519442799124, "learning_rate": 9.703009107219904e-06, "loss": 0.3905, "step": 2155 }, { "epoch": 0.1163455830770061, "grad_norm": 1.1059504958176312, "learning_rate": 9.702736489846187e-06, "loss": 0.5401, "step": 2156 }, { "epoch": 0.11639954670552048, "grad_norm": 1.3918652504879416, "learning_rate": 9.702463751681577e-06, "loss": 0.6284, "step": 2157 }, { "epoch": 0.11645351033403487, "grad_norm": 0.9582342909149959, "learning_rate": 9.702190892733913e-06, "loss": 0.4074, "step": 2158 }, { "epoch": 0.11650747396254924, "grad_norm": 1.035499039427595, "learning_rate": 9.701917913011035e-06, "loss": 0.3915, "step": 2159 }, { "epoch": 0.11656143759106362, "grad_norm": 1.2348681022612513, "learning_rate": 9.701644812520792e-06, "loss": 0.5226, "step": 2160 }, { "epoch": 0.116615401219578, "grad_norm": 1.0316289082892915, "learning_rate": 9.701371591271031e-06, "loss": 0.4473, "step": 2161 }, { "epoch": 0.11666936484809239, "grad_norm": 1.0642023489563224, "learning_rate": 9.701098249269606e-06, "loss": 0.4486, "step": 2162 }, { "epoch": 0.11672332847660677, "grad_norm": 1.026526047463095, "learning_rate": 9.700824786524372e-06, "loss": 0.5221, "step": 2163 }, { "epoch": 0.11677729210512115, "grad_norm": 1.135013428447993, "learning_rate": 9.70055120304319e-06, "loss": 0.4675, "step": 2164 }, { "epoch": 0.11683125573363554, "grad_norm": 1.0201577017599577, "learning_rate": 9.700277498833921e-06, "loss": 0.5379, "step": 2165 }, { "epoch": 0.1168852193621499, "grad_norm": 0.9371695966850042, "learning_rate": 9.700003673904433e-06, "loss": 0.5225, "step": 2166 }, { "epoch": 0.11693918299066429, "grad_norm": 1.0880857939485777, "learning_rate": 9.699729728262596e-06, "loss": 0.529, "step": 2167 }, { "epoch": 0.11699314661917867, "grad_norm": 1.0726276715929168, "learning_rate": 9.699455661916281e-06, "loss": 0.5636, "step": 2168 }, { "epoch": 0.11704711024769306, "grad_norm": 0.9398092630930535, "learning_rate": 9.69918147487337e-06, "loss": 0.4371, "step": 2169 }, { "epoch": 0.11710107387620744, "grad_norm": 0.8844183672272802, "learning_rate": 9.698907167141739e-06, "loss": 0.4259, "step": 2170 }, { "epoch": 0.11715503750472182, "grad_norm": 1.140494840944878, "learning_rate": 9.698632738729273e-06, "loss": 0.5548, "step": 2171 }, { "epoch": 0.1172090011332362, "grad_norm": 1.099954541875123, "learning_rate": 9.69835818964386e-06, "loss": 0.4904, "step": 2172 }, { "epoch": 0.11726296476175058, "grad_norm": 1.0900031468786004, "learning_rate": 9.69808351989339e-06, "loss": 0.6291, "step": 2173 }, { "epoch": 0.11731692839026496, "grad_norm": 1.0851603119621536, "learning_rate": 9.69780872948576e-06, "loss": 0.4439, "step": 2174 }, { "epoch": 0.11737089201877934, "grad_norm": 0.9631078378601368, "learning_rate": 9.697533818428863e-06, "loss": 0.4114, "step": 2175 }, { "epoch": 0.11742485564729373, "grad_norm": 0.9950717121723265, "learning_rate": 9.697258786730603e-06, "loss": 0.5193, "step": 2176 }, { "epoch": 0.11747881927580811, "grad_norm": 1.22702270470133, "learning_rate": 9.696983634398886e-06, "loss": 0.6386, "step": 2177 }, { "epoch": 0.11753278290432248, "grad_norm": 1.0893143520594046, "learning_rate": 9.696708361441617e-06, "loss": 0.5014, "step": 2178 }, { "epoch": 0.11758674653283686, "grad_norm": 0.9031315969572481, "learning_rate": 9.696432967866712e-06, "loss": 0.4056, "step": 2179 }, { "epoch": 0.11764071016135125, "grad_norm": 0.9875513301486594, "learning_rate": 9.69615745368208e-06, "loss": 0.5105, "step": 2180 }, { "epoch": 0.11769467378986563, "grad_norm": 1.0112005447768027, "learning_rate": 9.695881818895642e-06, "loss": 0.4693, "step": 2181 }, { "epoch": 0.11774863741838001, "grad_norm": 0.9654037146706186, "learning_rate": 9.695606063515325e-06, "loss": 0.3687, "step": 2182 }, { "epoch": 0.1178026010468944, "grad_norm": 1.2173417540977598, "learning_rate": 9.695330187549048e-06, "loss": 0.5959, "step": 2183 }, { "epoch": 0.11785656467540877, "grad_norm": 0.8912592311311957, "learning_rate": 9.695054191004743e-06, "loss": 0.4056, "step": 2184 }, { "epoch": 0.11791052830392315, "grad_norm": 1.0473782442352688, "learning_rate": 9.694778073890342e-06, "loss": 0.4809, "step": 2185 }, { "epoch": 0.11796449193243753, "grad_norm": 0.9326417039400774, "learning_rate": 9.69450183621378e-06, "loss": 0.4508, "step": 2186 }, { "epoch": 0.11801845556095192, "grad_norm": 1.04928607430521, "learning_rate": 9.694225477982996e-06, "loss": 0.5075, "step": 2187 }, { "epoch": 0.1180724191894663, "grad_norm": 1.0011047276138676, "learning_rate": 9.693948999205935e-06, "loss": 0.7541, "step": 2188 }, { "epoch": 0.11812638281798069, "grad_norm": 1.3456540891225421, "learning_rate": 9.693672399890543e-06, "loss": 0.7273, "step": 2189 }, { "epoch": 0.11818034644649507, "grad_norm": 1.2161558121391915, "learning_rate": 9.693395680044766e-06, "loss": 0.5821, "step": 2190 }, { "epoch": 0.11823431007500944, "grad_norm": 0.9572900643636869, "learning_rate": 9.693118839676562e-06, "loss": 0.4236, "step": 2191 }, { "epoch": 0.11828827370352382, "grad_norm": 1.0412734889470079, "learning_rate": 9.692841878793888e-06, "loss": 0.6252, "step": 2192 }, { "epoch": 0.1183422373320382, "grad_norm": 1.122067561259171, "learning_rate": 9.692564797404698e-06, "loss": 0.565, "step": 2193 }, { "epoch": 0.11839620096055259, "grad_norm": 1.0317664850900574, "learning_rate": 9.69228759551696e-06, "loss": 0.4377, "step": 2194 }, { "epoch": 0.11845016458906697, "grad_norm": 1.2601597990560485, "learning_rate": 9.692010273138642e-06, "loss": 0.5287, "step": 2195 }, { "epoch": 0.11850412821758136, "grad_norm": 1.08477682890788, "learning_rate": 9.691732830277712e-06, "loss": 0.6646, "step": 2196 }, { "epoch": 0.11855809184609573, "grad_norm": 1.0696842005743334, "learning_rate": 9.691455266942146e-06, "loss": 0.4496, "step": 2197 }, { "epoch": 0.11861205547461011, "grad_norm": 1.026353876095435, "learning_rate": 9.69117758313992e-06, "loss": 0.5071, "step": 2198 }, { "epoch": 0.11866601910312449, "grad_norm": 1.051444886850701, "learning_rate": 9.690899778879016e-06, "loss": 0.4464, "step": 2199 }, { "epoch": 0.11871998273163888, "grad_norm": 1.1875435174597502, "learning_rate": 9.690621854167415e-06, "loss": 0.5663, "step": 2200 }, { "epoch": 0.11877394636015326, "grad_norm": 1.131346145204395, "learning_rate": 9.690343809013111e-06, "loss": 0.6198, "step": 2201 }, { "epoch": 0.11882790998866764, "grad_norm": 1.1486586422763523, "learning_rate": 9.69006564342409e-06, "loss": 0.427, "step": 2202 }, { "epoch": 0.11888187361718201, "grad_norm": 1.0837775288585423, "learning_rate": 9.689787357408348e-06, "loss": 0.4634, "step": 2203 }, { "epoch": 0.1189358372456964, "grad_norm": 1.3740114987384053, "learning_rate": 9.689508950973888e-06, "loss": 0.72, "step": 2204 }, { "epoch": 0.11898980087421078, "grad_norm": 1.162623860963007, "learning_rate": 9.689230424128703e-06, "loss": 0.5259, "step": 2205 }, { "epoch": 0.11904376450272516, "grad_norm": 1.1643884469495673, "learning_rate": 9.688951776880805e-06, "loss": 0.5399, "step": 2206 }, { "epoch": 0.11909772813123955, "grad_norm": 1.059434341566082, "learning_rate": 9.688673009238199e-06, "loss": 0.5182, "step": 2207 }, { "epoch": 0.11915169175975393, "grad_norm": 1.1165082545626912, "learning_rate": 9.688394121208899e-06, "loss": 0.6969, "step": 2208 }, { "epoch": 0.11920565538826831, "grad_norm": 1.0883483656850015, "learning_rate": 9.688115112800922e-06, "loss": 0.6342, "step": 2209 }, { "epoch": 0.11925961901678268, "grad_norm": 0.9494426199177266, "learning_rate": 9.687835984022281e-06, "loss": 0.6165, "step": 2210 }, { "epoch": 0.11931358264529707, "grad_norm": 0.9686385992805834, "learning_rate": 9.687556734881006e-06, "loss": 0.4665, "step": 2211 }, { "epoch": 0.11936754627381145, "grad_norm": 1.157418220068164, "learning_rate": 9.687277365385116e-06, "loss": 0.6218, "step": 2212 }, { "epoch": 0.11942150990232583, "grad_norm": 1.0293779911615994, "learning_rate": 9.686997875542644e-06, "loss": 0.4685, "step": 2213 }, { "epoch": 0.11947547353084022, "grad_norm": 1.5861511364682184, "learning_rate": 9.686718265361625e-06, "loss": 0.7018, "step": 2214 }, { "epoch": 0.1195294371593546, "grad_norm": 1.0054865831088418, "learning_rate": 9.68643853485009e-06, "loss": 0.3506, "step": 2215 }, { "epoch": 0.11958340078786897, "grad_norm": 1.2206746111948432, "learning_rate": 9.686158684016084e-06, "loss": 0.6254, "step": 2216 }, { "epoch": 0.11963736441638335, "grad_norm": 0.9718927206405142, "learning_rate": 9.685878712867644e-06, "loss": 0.4597, "step": 2217 }, { "epoch": 0.11969132804489774, "grad_norm": 1.0444441606983377, "learning_rate": 9.685598621412824e-06, "loss": 0.5662, "step": 2218 }, { "epoch": 0.11974529167341212, "grad_norm": 0.9501010885692707, "learning_rate": 9.685318409659668e-06, "loss": 0.4915, "step": 2219 }, { "epoch": 0.1197992553019265, "grad_norm": 1.137605788339061, "learning_rate": 9.685038077616231e-06, "loss": 0.5529, "step": 2220 }, { "epoch": 0.11985321893044089, "grad_norm": 1.0397386718597894, "learning_rate": 9.684757625290574e-06, "loss": 0.6095, "step": 2221 }, { "epoch": 0.11990718255895526, "grad_norm": 0.9318530954776885, "learning_rate": 9.684477052690754e-06, "loss": 0.3826, "step": 2222 }, { "epoch": 0.11996114618746964, "grad_norm": 1.0119627906820465, "learning_rate": 9.684196359824833e-06, "loss": 0.569, "step": 2223 }, { "epoch": 0.12001510981598403, "grad_norm": 1.1090213373768198, "learning_rate": 9.683915546700883e-06, "loss": 0.623, "step": 2224 }, { "epoch": 0.12006907344449841, "grad_norm": 0.9471935628760784, "learning_rate": 9.683634613326972e-06, "loss": 0.4909, "step": 2225 }, { "epoch": 0.12012303707301279, "grad_norm": 1.3845759877807065, "learning_rate": 9.683353559711175e-06, "loss": 0.598, "step": 2226 }, { "epoch": 0.12017700070152718, "grad_norm": 0.8221055747147173, "learning_rate": 9.683072385861569e-06, "loss": 0.4061, "step": 2227 }, { "epoch": 0.12023096433004155, "grad_norm": 0.9664110823160382, "learning_rate": 9.682791091786237e-06, "loss": 0.504, "step": 2228 }, { "epoch": 0.12028492795855593, "grad_norm": 1.079246691711793, "learning_rate": 9.68250967749326e-06, "loss": 0.456, "step": 2229 }, { "epoch": 0.12033889158707031, "grad_norm": 1.1104342349265683, "learning_rate": 9.682228142990732e-06, "loss": 0.5795, "step": 2230 }, { "epoch": 0.1203928552155847, "grad_norm": 1.2027124158240485, "learning_rate": 9.68194648828674e-06, "loss": 0.7313, "step": 2231 }, { "epoch": 0.12044681884409908, "grad_norm": 1.0009290953727872, "learning_rate": 9.681664713389382e-06, "loss": 0.4615, "step": 2232 }, { "epoch": 0.12050078247261346, "grad_norm": 0.9520443193405584, "learning_rate": 9.681382818306754e-06, "loss": 0.433, "step": 2233 }, { "epoch": 0.12055474610112785, "grad_norm": 0.8669540435783489, "learning_rate": 9.681100803046956e-06, "loss": 0.4159, "step": 2234 }, { "epoch": 0.12060870972964222, "grad_norm": 1.4765173908952733, "learning_rate": 9.680818667618101e-06, "loss": 0.6846, "step": 2235 }, { "epoch": 0.1206626733581566, "grad_norm": 1.0214499606258602, "learning_rate": 9.680536412028291e-06, "loss": 0.4474, "step": 2236 }, { "epoch": 0.12071663698667098, "grad_norm": 1.2052665544356185, "learning_rate": 9.680254036285638e-06, "loss": 0.7253, "step": 2237 }, { "epoch": 0.12077060061518537, "grad_norm": 1.1073765270459122, "learning_rate": 9.679971540398263e-06, "loss": 0.7594, "step": 2238 }, { "epoch": 0.12082456424369975, "grad_norm": 0.8110268777075951, "learning_rate": 9.679688924374283e-06, "loss": 0.4053, "step": 2239 }, { "epoch": 0.12087852787221413, "grad_norm": 1.1120168263411978, "learning_rate": 9.679406188221818e-06, "loss": 0.5394, "step": 2240 }, { "epoch": 0.1209324915007285, "grad_norm": 1.3392086023213707, "learning_rate": 9.679123331948995e-06, "loss": 0.5211, "step": 2241 }, { "epoch": 0.12098645512924289, "grad_norm": 1.0364699707537088, "learning_rate": 9.678840355563947e-06, "loss": 0.5329, "step": 2242 }, { "epoch": 0.12104041875775727, "grad_norm": 1.157960590526774, "learning_rate": 9.678557259074805e-06, "loss": 0.5674, "step": 2243 }, { "epoch": 0.12109438238627165, "grad_norm": 1.1101877591724363, "learning_rate": 9.678274042489704e-06, "loss": 0.4706, "step": 2244 }, { "epoch": 0.12114834601478604, "grad_norm": 1.1488828078726339, "learning_rate": 9.677990705816787e-06, "loss": 0.496, "step": 2245 }, { "epoch": 0.12120230964330042, "grad_norm": 0.9298744766980935, "learning_rate": 9.677707249064193e-06, "loss": 0.3834, "step": 2246 }, { "epoch": 0.12125627327181479, "grad_norm": 1.2496939382762586, "learning_rate": 9.677423672240073e-06, "loss": 0.5499, "step": 2247 }, { "epoch": 0.12131023690032917, "grad_norm": 1.1698730037388965, "learning_rate": 9.677139975352577e-06, "loss": 0.5242, "step": 2248 }, { "epoch": 0.12136420052884356, "grad_norm": 1.142238733423812, "learning_rate": 9.676856158409854e-06, "loss": 0.3603, "step": 2249 }, { "epoch": 0.12141816415735794, "grad_norm": 1.229077501052302, "learning_rate": 9.676572221420067e-06, "loss": 0.5999, "step": 2250 }, { "epoch": 0.12147212778587232, "grad_norm": 1.1711008649042807, "learning_rate": 9.676288164391373e-06, "loss": 0.552, "step": 2251 }, { "epoch": 0.12152609141438671, "grad_norm": 1.062514411032166, "learning_rate": 9.676003987331939e-06, "loss": 0.5048, "step": 2252 }, { "epoch": 0.12158005504290108, "grad_norm": 1.1474293897922299, "learning_rate": 9.67571969024993e-06, "loss": 0.4124, "step": 2253 }, { "epoch": 0.12163401867141546, "grad_norm": 1.0463006830783068, "learning_rate": 9.675435273153515e-06, "loss": 0.4837, "step": 2254 }, { "epoch": 0.12168798229992984, "grad_norm": 1.1629971896909328, "learning_rate": 9.675150736050875e-06, "loss": 0.6153, "step": 2255 }, { "epoch": 0.12174194592844423, "grad_norm": 1.185342591314412, "learning_rate": 9.674866078950184e-06, "loss": 0.5341, "step": 2256 }, { "epoch": 0.12179590955695861, "grad_norm": 1.2347685339359427, "learning_rate": 9.674581301859624e-06, "loss": 0.656, "step": 2257 }, { "epoch": 0.121849873185473, "grad_norm": 0.831253140334588, "learning_rate": 9.674296404787376e-06, "loss": 0.3719, "step": 2258 }, { "epoch": 0.12190383681398738, "grad_norm": 0.9277944807722457, "learning_rate": 9.674011387741635e-06, "loss": 0.4361, "step": 2259 }, { "epoch": 0.12195780044250175, "grad_norm": 1.0897314234304791, "learning_rate": 9.673726250730587e-06, "loss": 0.4018, "step": 2260 }, { "epoch": 0.12201176407101613, "grad_norm": 0.8230387588679382, "learning_rate": 9.673440993762429e-06, "loss": 0.3782, "step": 2261 }, { "epoch": 0.12206572769953052, "grad_norm": 0.9421259183686265, "learning_rate": 9.673155616845362e-06, "loss": 0.5124, "step": 2262 }, { "epoch": 0.1221196913280449, "grad_norm": 0.907467768225828, "learning_rate": 9.672870119987584e-06, "loss": 0.428, "step": 2263 }, { "epoch": 0.12217365495655928, "grad_norm": 1.5072659227187655, "learning_rate": 9.672584503197303e-06, "loss": 0.5897, "step": 2264 }, { "epoch": 0.12222761858507367, "grad_norm": 0.9317382001346334, "learning_rate": 9.672298766482728e-06, "loss": 0.4433, "step": 2265 }, { "epoch": 0.12228158221358804, "grad_norm": 1.0146027636011954, "learning_rate": 9.67201290985207e-06, "loss": 0.5124, "step": 2266 }, { "epoch": 0.12233554584210242, "grad_norm": 1.0785171894259167, "learning_rate": 9.671726933313545e-06, "loss": 0.647, "step": 2267 }, { "epoch": 0.1223895094706168, "grad_norm": 1.1040007523715156, "learning_rate": 9.671440836875374e-06, "loss": 0.5698, "step": 2268 }, { "epoch": 0.12244347309913119, "grad_norm": 1.0678578174289854, "learning_rate": 9.671154620545775e-06, "loss": 0.5571, "step": 2269 }, { "epoch": 0.12249743672764557, "grad_norm": 1.0381194634940218, "learning_rate": 9.670868284332982e-06, "loss": 0.46, "step": 2270 }, { "epoch": 0.12255140035615995, "grad_norm": 1.1021253867203855, "learning_rate": 9.670581828245218e-06, "loss": 0.5695, "step": 2271 }, { "epoch": 0.12260536398467432, "grad_norm": 1.0731139164555994, "learning_rate": 9.670295252290717e-06, "loss": 0.5801, "step": 2272 }, { "epoch": 0.1226593276131887, "grad_norm": 0.866065270712914, "learning_rate": 9.67000855647772e-06, "loss": 0.3971, "step": 2273 }, { "epoch": 0.12271329124170309, "grad_norm": 0.9131932290987935, "learning_rate": 9.669721740814462e-06, "loss": 0.4184, "step": 2274 }, { "epoch": 0.12276725487021747, "grad_norm": 0.9333520435832217, "learning_rate": 9.669434805309187e-06, "loss": 0.4704, "step": 2275 }, { "epoch": 0.12282121849873186, "grad_norm": 0.9112828751784572, "learning_rate": 9.669147749970143e-06, "loss": 0.4396, "step": 2276 }, { "epoch": 0.12287518212724624, "grad_norm": 1.252045603114486, "learning_rate": 9.668860574805581e-06, "loss": 0.5901, "step": 2277 }, { "epoch": 0.12292914575576061, "grad_norm": 1.040656827069334, "learning_rate": 9.668573279823753e-06, "loss": 0.5295, "step": 2278 }, { "epoch": 0.122983109384275, "grad_norm": 1.1830336186386003, "learning_rate": 9.668285865032917e-06, "loss": 0.5786, "step": 2279 }, { "epoch": 0.12303707301278938, "grad_norm": 1.2300312025901774, "learning_rate": 9.667998330441335e-06, "loss": 0.5892, "step": 2280 }, { "epoch": 0.12309103664130376, "grad_norm": 1.0161061153877486, "learning_rate": 9.667710676057268e-06, "loss": 0.4717, "step": 2281 }, { "epoch": 0.12314500026981814, "grad_norm": 0.9500036499035054, "learning_rate": 9.667422901888985e-06, "loss": 0.5285, "step": 2282 }, { "epoch": 0.12319896389833253, "grad_norm": 1.082961516705248, "learning_rate": 9.667135007944757e-06, "loss": 0.611, "step": 2283 }, { "epoch": 0.12325292752684691, "grad_norm": 1.5818763667705031, "learning_rate": 9.66684699423286e-06, "loss": 0.8236, "step": 2284 }, { "epoch": 0.12330689115536128, "grad_norm": 1.091196762263724, "learning_rate": 9.666558860761567e-06, "loss": 0.5574, "step": 2285 }, { "epoch": 0.12336085478387566, "grad_norm": 0.8856862786818642, "learning_rate": 9.666270607539163e-06, "loss": 0.424, "step": 2286 }, { "epoch": 0.12341481841239005, "grad_norm": 1.1437200014543105, "learning_rate": 9.665982234573936e-06, "loss": 0.5005, "step": 2287 }, { "epoch": 0.12346878204090443, "grad_norm": 1.1362864378908073, "learning_rate": 9.665693741874166e-06, "loss": 0.4611, "step": 2288 }, { "epoch": 0.12352274566941882, "grad_norm": 1.1231989414190497, "learning_rate": 9.665405129448148e-06, "loss": 0.5942, "step": 2289 }, { "epoch": 0.1235767092979332, "grad_norm": 0.8697400029417068, "learning_rate": 9.66511639730418e-06, "loss": 0.4915, "step": 2290 }, { "epoch": 0.12363067292644757, "grad_norm": 1.3202433195580263, "learning_rate": 9.664827545450558e-06, "loss": 0.6408, "step": 2291 }, { "epoch": 0.12368463655496195, "grad_norm": 1.108800565583756, "learning_rate": 9.664538573895584e-06, "loss": 0.4425, "step": 2292 }, { "epoch": 0.12373860018347634, "grad_norm": 0.8900464895758888, "learning_rate": 9.664249482647563e-06, "loss": 0.4278, "step": 2293 }, { "epoch": 0.12379256381199072, "grad_norm": 1.0069899345340934, "learning_rate": 9.663960271714805e-06, "loss": 0.3956, "step": 2294 }, { "epoch": 0.1238465274405051, "grad_norm": 1.0908276370743775, "learning_rate": 9.66367094110562e-06, "loss": 0.5027, "step": 2295 }, { "epoch": 0.12390049106901949, "grad_norm": 1.2590867700383381, "learning_rate": 9.663381490828326e-06, "loss": 0.4648, "step": 2296 }, { "epoch": 0.12395445469753386, "grad_norm": 0.9943299838390417, "learning_rate": 9.663091920891241e-06, "loss": 0.5348, "step": 2297 }, { "epoch": 0.12400841832604824, "grad_norm": 0.895696595881996, "learning_rate": 9.662802231302687e-06, "loss": 0.4343, "step": 2298 }, { "epoch": 0.12406238195456262, "grad_norm": 0.988818066946634, "learning_rate": 9.662512422070994e-06, "loss": 0.4817, "step": 2299 }, { "epoch": 0.124116345583077, "grad_norm": 1.0628598546050112, "learning_rate": 9.662222493204485e-06, "loss": 0.4844, "step": 2300 }, { "epoch": 0.12417030921159139, "grad_norm": 1.0788173755181631, "learning_rate": 9.661932444711495e-06, "loss": 0.4939, "step": 2301 }, { "epoch": 0.12422427284010577, "grad_norm": 1.130407900785564, "learning_rate": 9.661642276600365e-06, "loss": 0.5054, "step": 2302 }, { "epoch": 0.12427823646862016, "grad_norm": 1.0700600279021275, "learning_rate": 9.661351988879427e-06, "loss": 0.4695, "step": 2303 }, { "epoch": 0.12433220009713453, "grad_norm": 1.2969314259022702, "learning_rate": 9.661061581557031e-06, "loss": 0.6024, "step": 2304 }, { "epoch": 0.12438616372564891, "grad_norm": 1.1191176049592504, "learning_rate": 9.66077105464152e-06, "loss": 0.4106, "step": 2305 }, { "epoch": 0.1244401273541633, "grad_norm": 0.971310521576742, "learning_rate": 9.660480408141244e-06, "loss": 0.4564, "step": 2306 }, { "epoch": 0.12449409098267768, "grad_norm": 1.0429586913915552, "learning_rate": 9.660189642064558e-06, "loss": 0.4254, "step": 2307 }, { "epoch": 0.12454805461119206, "grad_norm": 1.1586945413085292, "learning_rate": 9.659898756419817e-06, "loss": 0.4639, "step": 2308 }, { "epoch": 0.12460201823970644, "grad_norm": 1.230196555922898, "learning_rate": 9.659607751215383e-06, "loss": 0.7129, "step": 2309 }, { "epoch": 0.12465598186822081, "grad_norm": 1.0697726836246857, "learning_rate": 9.65931662645962e-06, "loss": 0.4951, "step": 2310 }, { "epoch": 0.1247099454967352, "grad_norm": 1.1048557328586497, "learning_rate": 9.659025382160893e-06, "loss": 0.4879, "step": 2311 }, { "epoch": 0.12476390912524958, "grad_norm": 1.0084590359615, "learning_rate": 9.658734018327573e-06, "loss": 0.3901, "step": 2312 }, { "epoch": 0.12481787275376396, "grad_norm": 1.2009920220889132, "learning_rate": 9.65844253496804e-06, "loss": 0.7065, "step": 2313 }, { "epoch": 0.12487183638227835, "grad_norm": 0.986576097524222, "learning_rate": 9.658150932090663e-06, "loss": 0.4532, "step": 2314 }, { "epoch": 0.12492580001079273, "grad_norm": 0.8220129343918802, "learning_rate": 9.657859209703827e-06, "loss": 0.3863, "step": 2315 }, { "epoch": 0.1249797636393071, "grad_norm": 1.0604531856680535, "learning_rate": 9.657567367815916e-06, "loss": 0.5317, "step": 2316 }, { "epoch": 0.1250337272678215, "grad_norm": 1.0088592003408487, "learning_rate": 9.657275406435319e-06, "loss": 0.5537, "step": 2317 }, { "epoch": 0.12508769089633587, "grad_norm": 1.1099986590988968, "learning_rate": 9.656983325570426e-06, "loss": 0.5502, "step": 2318 }, { "epoch": 0.12514165452485024, "grad_norm": 0.8910825962336899, "learning_rate": 9.65669112522963e-06, "loss": 0.4911, "step": 2319 }, { "epoch": 0.12519561815336464, "grad_norm": 1.121673789514824, "learning_rate": 9.656398805421335e-06, "loss": 0.5092, "step": 2320 }, { "epoch": 0.125249581781879, "grad_norm": 1.0019397725685486, "learning_rate": 9.656106366153936e-06, "loss": 0.5038, "step": 2321 }, { "epoch": 0.1253035454103934, "grad_norm": 1.0513763789138886, "learning_rate": 9.655813807435841e-06, "loss": 0.5735, "step": 2322 }, { "epoch": 0.12535750903890777, "grad_norm": 0.9652003909575069, "learning_rate": 9.655521129275458e-06, "loss": 0.4019, "step": 2323 }, { "epoch": 0.12541147266742217, "grad_norm": 1.166351544480196, "learning_rate": 9.655228331681197e-06, "loss": 0.5495, "step": 2324 }, { "epoch": 0.12546543629593654, "grad_norm": 1.109953569517171, "learning_rate": 9.654935414661478e-06, "loss": 0.5282, "step": 2325 }, { "epoch": 0.1255193999244509, "grad_norm": 0.7984846021363996, "learning_rate": 9.654642378224716e-06, "loss": 0.3519, "step": 2326 }, { "epoch": 0.1255733635529653, "grad_norm": 1.059791634150416, "learning_rate": 9.654349222379334e-06, "loss": 0.6065, "step": 2327 }, { "epoch": 0.12562732718147968, "grad_norm": 1.1047908768219616, "learning_rate": 9.654055947133758e-06, "loss": 0.511, "step": 2328 }, { "epoch": 0.12568129080999407, "grad_norm": 1.2578559335080715, "learning_rate": 9.653762552496417e-06, "loss": 0.7344, "step": 2329 }, { "epoch": 0.12573525443850844, "grad_norm": 1.3406460761544932, "learning_rate": 9.653469038475742e-06, "loss": 0.5372, "step": 2330 }, { "epoch": 0.12578921806702284, "grad_norm": 0.9756130876637941, "learning_rate": 9.65317540508017e-06, "loss": 0.4718, "step": 2331 }, { "epoch": 0.1258431816955372, "grad_norm": 1.0531367259468962, "learning_rate": 9.652881652318142e-06, "loss": 0.5154, "step": 2332 }, { "epoch": 0.12589714532405158, "grad_norm": 0.9901447002548144, "learning_rate": 9.652587780198098e-06, "loss": 0.502, "step": 2333 }, { "epoch": 0.12595110895256598, "grad_norm": 1.1715081027693313, "learning_rate": 9.652293788728486e-06, "loss": 0.6447, "step": 2334 }, { "epoch": 0.12600507258108035, "grad_norm": 1.0931870998648412, "learning_rate": 9.651999677917753e-06, "loss": 0.6276, "step": 2335 }, { "epoch": 0.12605903620959474, "grad_norm": 1.1298716728195484, "learning_rate": 9.651705447774357e-06, "loss": 0.5572, "step": 2336 }, { "epoch": 0.1261129998381091, "grad_norm": 1.1339630199520756, "learning_rate": 9.65141109830675e-06, "loss": 0.6943, "step": 2337 }, { "epoch": 0.12616696346662348, "grad_norm": 1.045219148018875, "learning_rate": 9.651116629523393e-06, "loss": 0.5688, "step": 2338 }, { "epoch": 0.12622092709513788, "grad_norm": 1.123672007235184, "learning_rate": 9.65082204143275e-06, "loss": 0.5997, "step": 2339 }, { "epoch": 0.12627489072365225, "grad_norm": 0.9582358570183779, "learning_rate": 9.650527334043286e-06, "loss": 0.38, "step": 2340 }, { "epoch": 0.12632885435216665, "grad_norm": 0.9553825726036964, "learning_rate": 9.650232507363474e-06, "loss": 0.4944, "step": 2341 }, { "epoch": 0.12638281798068102, "grad_norm": 1.0644758202658042, "learning_rate": 9.649937561401786e-06, "loss": 0.5437, "step": 2342 }, { "epoch": 0.12643678160919541, "grad_norm": 0.8911858578923589, "learning_rate": 9.6496424961667e-06, "loss": 0.3247, "step": 2343 }, { "epoch": 0.12649074523770978, "grad_norm": 1.0732386497563131, "learning_rate": 9.649347311666694e-06, "loss": 0.5302, "step": 2344 }, { "epoch": 0.12654470886622415, "grad_norm": 1.2324268362974626, "learning_rate": 9.649052007910255e-06, "loss": 0.5527, "step": 2345 }, { "epoch": 0.12659867249473855, "grad_norm": 0.9658054703646195, "learning_rate": 9.64875658490587e-06, "loss": 0.4225, "step": 2346 }, { "epoch": 0.12665263612325292, "grad_norm": 0.9332139358562981, "learning_rate": 9.648461042662027e-06, "loss": 0.3635, "step": 2347 }, { "epoch": 0.12670659975176732, "grad_norm": 1.0599404283870741, "learning_rate": 9.648165381187223e-06, "loss": 0.5378, "step": 2348 }, { "epoch": 0.1267605633802817, "grad_norm": 1.2145549993630358, "learning_rate": 9.647869600489954e-06, "loss": 0.6625, "step": 2349 }, { "epoch": 0.12681452700879609, "grad_norm": 0.9751897129335274, "learning_rate": 9.647573700578721e-06, "loss": 0.3854, "step": 2350 }, { "epoch": 0.12686849063731045, "grad_norm": 0.9768421948855404, "learning_rate": 9.64727768146203e-06, "loss": 0.4006, "step": 2351 }, { "epoch": 0.12692245426582482, "grad_norm": 1.1018301961283798, "learning_rate": 9.646981543148387e-06, "loss": 0.421, "step": 2352 }, { "epoch": 0.12697641789433922, "grad_norm": 1.286981143639693, "learning_rate": 9.646685285646304e-06, "loss": 0.6644, "step": 2353 }, { "epoch": 0.1270303815228536, "grad_norm": 1.1121912385109616, "learning_rate": 9.646388908964297e-06, "loss": 0.5332, "step": 2354 }, { "epoch": 0.127084345151368, "grad_norm": 1.0553933835029896, "learning_rate": 9.646092413110883e-06, "loss": 0.5645, "step": 2355 }, { "epoch": 0.12713830877988236, "grad_norm": 0.7894738988967905, "learning_rate": 9.645795798094583e-06, "loss": 0.4628, "step": 2356 }, { "epoch": 0.12719227240839673, "grad_norm": 0.8986074065583881, "learning_rate": 9.645499063923925e-06, "loss": 0.4192, "step": 2357 }, { "epoch": 0.12724623603691113, "grad_norm": 0.9779527614674771, "learning_rate": 9.645202210607433e-06, "loss": 0.4687, "step": 2358 }, { "epoch": 0.1273001996654255, "grad_norm": 1.2292497155955948, "learning_rate": 9.644905238153642e-06, "loss": 0.4825, "step": 2359 }, { "epoch": 0.1273541632939399, "grad_norm": 0.9467758623028063, "learning_rate": 9.644608146571084e-06, "loss": 0.4671, "step": 2360 }, { "epoch": 0.12740812692245426, "grad_norm": 1.091059325211132, "learning_rate": 9.644310935868303e-06, "loss": 0.4729, "step": 2361 }, { "epoch": 0.12746209055096866, "grad_norm": 1.0600944026062862, "learning_rate": 9.644013606053836e-06, "loss": 0.4672, "step": 2362 }, { "epoch": 0.12751605417948303, "grad_norm": 0.7840662638831936, "learning_rate": 9.643716157136232e-06, "loss": 0.3388, "step": 2363 }, { "epoch": 0.1275700178079974, "grad_norm": 1.076131696848735, "learning_rate": 9.643418589124038e-06, "loss": 0.4295, "step": 2364 }, { "epoch": 0.1276239814365118, "grad_norm": 1.299056985877537, "learning_rate": 9.643120902025808e-06, "loss": 0.6662, "step": 2365 }, { "epoch": 0.12767794506502617, "grad_norm": 1.1212532029125037, "learning_rate": 9.642823095850097e-06, "loss": 0.5579, "step": 2366 }, { "epoch": 0.12773190869354056, "grad_norm": 1.090655243646464, "learning_rate": 9.642525170605464e-06, "loss": 0.5574, "step": 2367 }, { "epoch": 0.12778587232205493, "grad_norm": 0.9412617905299603, "learning_rate": 9.642227126300472e-06, "loss": 0.4045, "step": 2368 }, { "epoch": 0.1278398359505693, "grad_norm": 1.0765261510123068, "learning_rate": 9.641928962943687e-06, "loss": 0.3827, "step": 2369 }, { "epoch": 0.1278937995790837, "grad_norm": 1.130923581847816, "learning_rate": 9.64163068054368e-06, "loss": 0.5618, "step": 2370 }, { "epoch": 0.12794776320759807, "grad_norm": 0.9371509506883278, "learning_rate": 9.64133227910902e-06, "loss": 0.451, "step": 2371 }, { "epoch": 0.12800172683611247, "grad_norm": 1.0830884506795813, "learning_rate": 9.641033758648286e-06, "loss": 0.5312, "step": 2372 }, { "epoch": 0.12805569046462684, "grad_norm": 1.0213862469568282, "learning_rate": 9.640735119170058e-06, "loss": 0.4496, "step": 2373 }, { "epoch": 0.12810965409314123, "grad_norm": 1.0960032223707379, "learning_rate": 9.640436360682918e-06, "loss": 0.4521, "step": 2374 }, { "epoch": 0.1281636177216556, "grad_norm": 0.9606648476859436, "learning_rate": 9.640137483195454e-06, "loss": 0.4681, "step": 2375 }, { "epoch": 0.12821758135016997, "grad_norm": 1.1172880473254505, "learning_rate": 9.639838486716256e-06, "loss": 0.5794, "step": 2376 }, { "epoch": 0.12827154497868437, "grad_norm": 1.1058131600254175, "learning_rate": 9.639539371253917e-06, "loss": 0.5061, "step": 2377 }, { "epoch": 0.12832550860719874, "grad_norm": 1.0313060732410173, "learning_rate": 9.639240136817033e-06, "loss": 0.4649, "step": 2378 }, { "epoch": 0.12837947223571314, "grad_norm": 1.1953392544536947, "learning_rate": 9.638940783414205e-06, "loss": 0.6473, "step": 2379 }, { "epoch": 0.1284334358642275, "grad_norm": 0.7873834199318798, "learning_rate": 9.638641311054037e-06, "loss": 0.4563, "step": 2380 }, { "epoch": 0.1284873994927419, "grad_norm": 1.103509483071724, "learning_rate": 9.638341719745135e-06, "loss": 0.5284, "step": 2381 }, { "epoch": 0.12854136312125627, "grad_norm": 1.17896761435768, "learning_rate": 9.638042009496113e-06, "loss": 0.4917, "step": 2382 }, { "epoch": 0.12859532674977064, "grad_norm": 1.042597902144271, "learning_rate": 9.63774218031558e-06, "loss": 0.5803, "step": 2383 }, { "epoch": 0.12864929037828504, "grad_norm": 1.2876964421340653, "learning_rate": 9.637442232212156e-06, "loss": 0.742, "step": 2384 }, { "epoch": 0.1287032540067994, "grad_norm": 1.0124103943886325, "learning_rate": 9.637142165194463e-06, "loss": 0.4369, "step": 2385 }, { "epoch": 0.1287572176353138, "grad_norm": 1.0195095112655648, "learning_rate": 9.636841979271124e-06, "loss": 0.4574, "step": 2386 }, { "epoch": 0.12881118126382818, "grad_norm": 1.2009140099321645, "learning_rate": 9.636541674450766e-06, "loss": 0.493, "step": 2387 }, { "epoch": 0.12886514489234255, "grad_norm": 0.9056379203867875, "learning_rate": 9.63624125074202e-06, "loss": 0.4328, "step": 2388 }, { "epoch": 0.12891910852085695, "grad_norm": 1.1481425038859074, "learning_rate": 9.635940708153522e-06, "loss": 0.5056, "step": 2389 }, { "epoch": 0.12897307214937132, "grad_norm": 0.9157110951050635, "learning_rate": 9.635640046693908e-06, "loss": 0.5054, "step": 2390 }, { "epoch": 0.1290270357778857, "grad_norm": 1.07460102493212, "learning_rate": 9.635339266371822e-06, "loss": 0.5474, "step": 2391 }, { "epoch": 0.12908099940640008, "grad_norm": 1.0030390726620408, "learning_rate": 9.635038367195905e-06, "loss": 0.4663, "step": 2392 }, { "epoch": 0.12913496303491448, "grad_norm": 1.117900712629355, "learning_rate": 9.634737349174808e-06, "loss": 0.5975, "step": 2393 }, { "epoch": 0.12918892666342885, "grad_norm": 1.3555245158028257, "learning_rate": 9.634436212317183e-06, "loss": 0.666, "step": 2394 }, { "epoch": 0.12924289029194322, "grad_norm": 0.6534616139106463, "learning_rate": 9.634134956631682e-06, "loss": 0.2691, "step": 2395 }, { "epoch": 0.12929685392045762, "grad_norm": 0.9775394153258076, "learning_rate": 9.633833582126965e-06, "loss": 0.4342, "step": 2396 }, { "epoch": 0.12935081754897199, "grad_norm": 1.0146337512552532, "learning_rate": 9.633532088811695e-06, "loss": 0.4693, "step": 2397 }, { "epoch": 0.12940478117748638, "grad_norm": 0.967699387726096, "learning_rate": 9.633230476694537e-06, "loss": 0.4306, "step": 2398 }, { "epoch": 0.12945874480600075, "grad_norm": 0.857916673328048, "learning_rate": 9.632928745784155e-06, "loss": 0.3807, "step": 2399 }, { "epoch": 0.12951270843451515, "grad_norm": 1.0676377315763095, "learning_rate": 9.632626896089227e-06, "loss": 0.5414, "step": 2400 }, { "epoch": 0.12956667206302952, "grad_norm": 1.2251926894520226, "learning_rate": 9.632324927618427e-06, "loss": 0.5675, "step": 2401 }, { "epoch": 0.1296206356915439, "grad_norm": 0.8927103427909798, "learning_rate": 9.632022840380434e-06, "loss": 0.4772, "step": 2402 }, { "epoch": 0.1296745993200583, "grad_norm": 0.8765908472064589, "learning_rate": 9.631720634383928e-06, "loss": 0.4286, "step": 2403 }, { "epoch": 0.12972856294857266, "grad_norm": 1.1782956295449456, "learning_rate": 9.631418309637596e-06, "loss": 0.5105, "step": 2404 }, { "epoch": 0.12978252657708705, "grad_norm": 0.9034348128422381, "learning_rate": 9.631115866150128e-06, "loss": 0.4183, "step": 2405 }, { "epoch": 0.12983649020560142, "grad_norm": 1.0964704607944813, "learning_rate": 9.630813303930216e-06, "loss": 0.5026, "step": 2406 }, { "epoch": 0.1298904538341158, "grad_norm": 1.1136694357283998, "learning_rate": 9.630510622986554e-06, "loss": 0.6531, "step": 2407 }, { "epoch": 0.1299444174626302, "grad_norm": 1.4270243400488787, "learning_rate": 9.630207823327846e-06, "loss": 0.675, "step": 2408 }, { "epoch": 0.12999838109114456, "grad_norm": 1.2708352520950916, "learning_rate": 9.629904904962791e-06, "loss": 0.7248, "step": 2409 }, { "epoch": 0.13005234471965896, "grad_norm": 1.2031617042791496, "learning_rate": 9.629601867900093e-06, "loss": 0.5103, "step": 2410 }, { "epoch": 0.13010630834817333, "grad_norm": 1.0085558137015336, "learning_rate": 9.629298712148469e-06, "loss": 0.4638, "step": 2411 }, { "epoch": 0.13016027197668772, "grad_norm": 0.8869487714830846, "learning_rate": 9.628995437716625e-06, "loss": 0.2859, "step": 2412 }, { "epoch": 0.1302142356052021, "grad_norm": 0.9736740855142443, "learning_rate": 9.62869204461328e-06, "loss": 0.5188, "step": 2413 }, { "epoch": 0.13026819923371646, "grad_norm": 1.1797904952668532, "learning_rate": 9.628388532847154e-06, "loss": 0.5881, "step": 2414 }, { "epoch": 0.13032216286223086, "grad_norm": 1.166877476662343, "learning_rate": 9.628084902426972e-06, "loss": 0.5259, "step": 2415 }, { "epoch": 0.13037612649074523, "grad_norm": 1.3834053765911938, "learning_rate": 9.627781153361457e-06, "loss": 0.6617, "step": 2416 }, { "epoch": 0.13043009011925963, "grad_norm": 1.2555975142595714, "learning_rate": 9.627477285659342e-06, "loss": 0.4746, "step": 2417 }, { "epoch": 0.130484053747774, "grad_norm": 1.0448532464614437, "learning_rate": 9.627173299329357e-06, "loss": 0.5222, "step": 2418 }, { "epoch": 0.13053801737628837, "grad_norm": 0.9829225057644765, "learning_rate": 9.626869194380244e-06, "loss": 0.5243, "step": 2419 }, { "epoch": 0.13059198100480277, "grad_norm": 0.957158270450429, "learning_rate": 9.62656497082074e-06, "loss": 0.4233, "step": 2420 }, { "epoch": 0.13064594463331713, "grad_norm": 1.0449644453801614, "learning_rate": 9.626260628659588e-06, "loss": 0.4826, "step": 2421 }, { "epoch": 0.13069990826183153, "grad_norm": 1.0207092747101252, "learning_rate": 9.625956167905537e-06, "loss": 0.4251, "step": 2422 }, { "epoch": 0.1307538718903459, "grad_norm": 0.8998483236584914, "learning_rate": 9.625651588567336e-06, "loss": 0.4228, "step": 2423 }, { "epoch": 0.1308078355188603, "grad_norm": 1.2680256486807358, "learning_rate": 9.62534689065374e-06, "loss": 0.5302, "step": 2424 }, { "epoch": 0.13086179914737467, "grad_norm": 1.2111756956184165, "learning_rate": 9.625042074173508e-06, "loss": 0.5867, "step": 2425 }, { "epoch": 0.13091576277588904, "grad_norm": 1.069740453486259, "learning_rate": 9.624737139135394e-06, "loss": 0.4858, "step": 2426 }, { "epoch": 0.13096972640440344, "grad_norm": 1.3376218797658184, "learning_rate": 9.624432085548171e-06, "loss": 0.4978, "step": 2427 }, { "epoch": 0.1310236900329178, "grad_norm": 1.1430438302576718, "learning_rate": 9.624126913420602e-06, "loss": 0.6198, "step": 2428 }, { "epoch": 0.1310776536614322, "grad_norm": 1.0004736159347272, "learning_rate": 9.623821622761457e-06, "loss": 0.4199, "step": 2429 }, { "epoch": 0.13113161728994657, "grad_norm": 1.2692414954178206, "learning_rate": 9.623516213579513e-06, "loss": 0.4773, "step": 2430 }, { "epoch": 0.13118558091846097, "grad_norm": 0.9002773963320035, "learning_rate": 9.623210685883545e-06, "loss": 0.4066, "step": 2431 }, { "epoch": 0.13123954454697534, "grad_norm": 0.9869864656682837, "learning_rate": 9.622905039682336e-06, "loss": 0.393, "step": 2432 }, { "epoch": 0.1312935081754897, "grad_norm": 0.9590246192881277, "learning_rate": 9.622599274984671e-06, "loss": 0.4118, "step": 2433 }, { "epoch": 0.1313474718040041, "grad_norm": 1.1492526357596056, "learning_rate": 9.622293391799337e-06, "loss": 0.5548, "step": 2434 }, { "epoch": 0.13140143543251848, "grad_norm": 0.9370903017822045, "learning_rate": 9.621987390135125e-06, "loss": 0.4195, "step": 2435 }, { "epoch": 0.13145539906103287, "grad_norm": 1.0136113905246822, "learning_rate": 9.621681270000833e-06, "loss": 0.4666, "step": 2436 }, { "epoch": 0.13150936268954724, "grad_norm": 1.085792518089485, "learning_rate": 9.621375031405255e-06, "loss": 0.4646, "step": 2437 }, { "epoch": 0.1315633263180616, "grad_norm": 1.2352015013851452, "learning_rate": 9.621068674357193e-06, "loss": 0.5272, "step": 2438 }, { "epoch": 0.131617289946576, "grad_norm": 0.8028213251695808, "learning_rate": 9.620762198865454e-06, "loss": 0.3269, "step": 2439 }, { "epoch": 0.13167125357509038, "grad_norm": 0.9976372090632596, "learning_rate": 9.620455604938848e-06, "loss": 0.3987, "step": 2440 }, { "epoch": 0.13172521720360478, "grad_norm": 1.1127761942622467, "learning_rate": 9.620148892586182e-06, "loss": 0.5511, "step": 2441 }, { "epoch": 0.13177918083211915, "grad_norm": 0.8015034349488216, "learning_rate": 9.619842061816274e-06, "loss": 0.336, "step": 2442 }, { "epoch": 0.13183314446063354, "grad_norm": 1.1426894223164443, "learning_rate": 9.61953511263794e-06, "loss": 0.5223, "step": 2443 }, { "epoch": 0.13188710808914791, "grad_norm": 1.0900535494826618, "learning_rate": 9.619228045060009e-06, "loss": 0.481, "step": 2444 }, { "epoch": 0.13194107171766228, "grad_norm": 1.1322871389096443, "learning_rate": 9.618920859091302e-06, "loss": 0.5074, "step": 2445 }, { "epoch": 0.13199503534617668, "grad_norm": 1.043328564956112, "learning_rate": 9.618613554740643e-06, "loss": 0.4115, "step": 2446 }, { "epoch": 0.13204899897469105, "grad_norm": 1.1264655513708715, "learning_rate": 9.61830613201687e-06, "loss": 0.5749, "step": 2447 }, { "epoch": 0.13210296260320545, "grad_norm": 1.0118079909092637, "learning_rate": 9.61799859092882e-06, "loss": 0.5687, "step": 2448 }, { "epoch": 0.13215692623171982, "grad_norm": 1.020229301501136, "learning_rate": 9.61769093148533e-06, "loss": 0.5671, "step": 2449 }, { "epoch": 0.13221088986023422, "grad_norm": 1.0828895202750095, "learning_rate": 9.617383153695239e-06, "loss": 0.3592, "step": 2450 }, { "epoch": 0.13226485348874858, "grad_norm": 1.0596406078708909, "learning_rate": 9.617075257567396e-06, "loss": 0.6218, "step": 2451 }, { "epoch": 0.13231881711726295, "grad_norm": 0.9752104492561213, "learning_rate": 9.616767243110652e-06, "loss": 0.5265, "step": 2452 }, { "epoch": 0.13237278074577735, "grad_norm": 1.3085855838016145, "learning_rate": 9.616459110333857e-06, "loss": 0.6442, "step": 2453 }, { "epoch": 0.13242674437429172, "grad_norm": 1.121650074806892, "learning_rate": 9.61615085924587e-06, "loss": 0.5288, "step": 2454 }, { "epoch": 0.13248070800280612, "grad_norm": 1.1055303019320688, "learning_rate": 9.615842489855544e-06, "loss": 0.5232, "step": 2455 }, { "epoch": 0.1325346716313205, "grad_norm": 1.2129562679317283, "learning_rate": 9.615534002171748e-06, "loss": 0.6321, "step": 2456 }, { "epoch": 0.13258863525983486, "grad_norm": 1.1957399176974686, "learning_rate": 9.615225396203346e-06, "loss": 0.5964, "step": 2457 }, { "epoch": 0.13264259888834926, "grad_norm": 1.2377423166940396, "learning_rate": 9.61491667195921e-06, "loss": 0.5543, "step": 2458 }, { "epoch": 0.13269656251686363, "grad_norm": 1.346362147879752, "learning_rate": 9.614607829448208e-06, "loss": 0.7215, "step": 2459 }, { "epoch": 0.13275052614537802, "grad_norm": 0.9535335356999718, "learning_rate": 9.61429886867922e-06, "loss": 0.4411, "step": 2460 }, { "epoch": 0.1328044897738924, "grad_norm": 1.0247754056072995, "learning_rate": 9.613989789661129e-06, "loss": 0.5801, "step": 2461 }, { "epoch": 0.1328584534024068, "grad_norm": 0.977434445340742, "learning_rate": 9.613680592402811e-06, "loss": 0.4674, "step": 2462 }, { "epoch": 0.13291241703092116, "grad_norm": 1.04626538900482, "learning_rate": 9.613371276913157e-06, "loss": 0.6238, "step": 2463 }, { "epoch": 0.13296638065943553, "grad_norm": 0.9885610191677416, "learning_rate": 9.613061843201056e-06, "loss": 0.6203, "step": 2464 }, { "epoch": 0.13302034428794993, "grad_norm": 1.2455341685581247, "learning_rate": 9.612752291275402e-06, "loss": 0.4998, "step": 2465 }, { "epoch": 0.1330743079164643, "grad_norm": 1.169939118486903, "learning_rate": 9.61244262114509e-06, "loss": 0.5575, "step": 2466 }, { "epoch": 0.1331282715449787, "grad_norm": 1.0664988788314986, "learning_rate": 9.612132832819026e-06, "loss": 0.4612, "step": 2467 }, { "epoch": 0.13318223517349306, "grad_norm": 1.0206933584213325, "learning_rate": 9.611822926306105e-06, "loss": 0.4886, "step": 2468 }, { "epoch": 0.13323619880200746, "grad_norm": 0.9988512482641646, "learning_rate": 9.61151290161524e-06, "loss": 0.5729, "step": 2469 }, { "epoch": 0.13329016243052183, "grad_norm": 1.1047423093010345, "learning_rate": 9.61120275875534e-06, "loss": 0.7915, "step": 2470 }, { "epoch": 0.1333441260590362, "grad_norm": 0.988310547692114, "learning_rate": 9.610892497735318e-06, "loss": 0.4449, "step": 2471 }, { "epoch": 0.1333980896875506, "grad_norm": 1.0445904806882997, "learning_rate": 9.610582118564094e-06, "loss": 0.4446, "step": 2472 }, { "epoch": 0.13345205331606497, "grad_norm": 1.0821206944813198, "learning_rate": 9.610271621250583e-06, "loss": 0.5476, "step": 2473 }, { "epoch": 0.13350601694457936, "grad_norm": 1.2224905940279551, "learning_rate": 9.609961005803717e-06, "loss": 0.6291, "step": 2474 }, { "epoch": 0.13355998057309373, "grad_norm": 0.9004469412710865, "learning_rate": 9.609650272232414e-06, "loss": 0.4127, "step": 2475 }, { "epoch": 0.1336139442016081, "grad_norm": 1.1188922327563204, "learning_rate": 9.609339420545612e-06, "loss": 0.6003, "step": 2476 }, { "epoch": 0.1336679078301225, "grad_norm": 0.8726946473185105, "learning_rate": 9.609028450752244e-06, "loss": 0.3706, "step": 2477 }, { "epoch": 0.13372187145863687, "grad_norm": 0.8963312001084753, "learning_rate": 9.608717362861244e-06, "loss": 0.4538, "step": 2478 }, { "epoch": 0.13377583508715127, "grad_norm": 0.987510092117334, "learning_rate": 9.608406156881556e-06, "loss": 0.4215, "step": 2479 }, { "epoch": 0.13382979871566564, "grad_norm": 0.951825455480239, "learning_rate": 9.608094832822125e-06, "loss": 0.4191, "step": 2480 }, { "epoch": 0.13388376234418004, "grad_norm": 1.194530000304039, "learning_rate": 9.607783390691897e-06, "loss": 0.6759, "step": 2481 }, { "epoch": 0.1339377259726944, "grad_norm": 0.8948112319041206, "learning_rate": 9.607471830499824e-06, "loss": 0.414, "step": 2482 }, { "epoch": 0.13399168960120877, "grad_norm": 1.0094061253921351, "learning_rate": 9.607160152254859e-06, "loss": 0.4218, "step": 2483 }, { "epoch": 0.13404565322972317, "grad_norm": 0.7844190787087654, "learning_rate": 9.606848355965964e-06, "loss": 0.3715, "step": 2484 }, { "epoch": 0.13409961685823754, "grad_norm": 0.7606854328208149, "learning_rate": 9.606536441642095e-06, "loss": 0.3128, "step": 2485 }, { "epoch": 0.13415358048675194, "grad_norm": 1.071083198019202, "learning_rate": 9.60622440929222e-06, "loss": 0.6381, "step": 2486 }, { "epoch": 0.1342075441152663, "grad_norm": 1.040828527890293, "learning_rate": 9.605912258925307e-06, "loss": 0.5335, "step": 2487 }, { "epoch": 0.13426150774378068, "grad_norm": 1.174359419468511, "learning_rate": 9.605599990550328e-06, "loss": 0.5561, "step": 2488 }, { "epoch": 0.13431547137229508, "grad_norm": 0.9748030315859836, "learning_rate": 9.605287604176254e-06, "loss": 0.4449, "step": 2489 }, { "epoch": 0.13436943500080945, "grad_norm": 1.0200785089753444, "learning_rate": 9.604975099812067e-06, "loss": 0.5766, "step": 2490 }, { "epoch": 0.13442339862932384, "grad_norm": 1.1279461252828833, "learning_rate": 9.604662477466749e-06, "loss": 0.4621, "step": 2491 }, { "epoch": 0.1344773622578382, "grad_norm": 1.1348280737827194, "learning_rate": 9.604349737149284e-06, "loss": 0.5523, "step": 2492 }, { "epoch": 0.1345313258863526, "grad_norm": 1.2243609099980748, "learning_rate": 9.604036878868659e-06, "loss": 0.6548, "step": 2493 }, { "epoch": 0.13458528951486698, "grad_norm": 1.24533581395501, "learning_rate": 9.60372390263387e-06, "loss": 0.6079, "step": 2494 }, { "epoch": 0.13463925314338135, "grad_norm": 0.9909986314270336, "learning_rate": 9.603410808453908e-06, "loss": 0.367, "step": 2495 }, { "epoch": 0.13469321677189575, "grad_norm": 1.0652539883547631, "learning_rate": 9.603097596337771e-06, "loss": 0.4058, "step": 2496 }, { "epoch": 0.13474718040041012, "grad_norm": 1.1147590269355505, "learning_rate": 9.602784266294465e-06, "loss": 0.4668, "step": 2497 }, { "epoch": 0.1348011440289245, "grad_norm": 1.2660133946669825, "learning_rate": 9.602470818332994e-06, "loss": 0.6954, "step": 2498 }, { "epoch": 0.13485510765743888, "grad_norm": 1.015002525451692, "learning_rate": 9.602157252462366e-06, "loss": 0.4105, "step": 2499 }, { "epoch": 0.13490907128595328, "grad_norm": 0.9577233129333887, "learning_rate": 9.601843568691593e-06, "loss": 0.4719, "step": 2500 }, { "epoch": 0.13490907128595328, "eval_loss": 0.5979181528091431, "eval_runtime": 164.2476, "eval_samples_per_second": 20.938, "eval_steps_per_second": 0.877, "step": 2500 }, { "epoch": 0.13496303491446765, "grad_norm": 1.029910066759321, "learning_rate": 9.601529767029693e-06, "loss": 0.528, "step": 2501 }, { "epoch": 0.13501699854298202, "grad_norm": 1.134005129744382, "learning_rate": 9.601215847485683e-06, "loss": 0.4481, "step": 2502 }, { "epoch": 0.13507096217149642, "grad_norm": 0.9967761863757987, "learning_rate": 9.600901810068583e-06, "loss": 0.4424, "step": 2503 }, { "epoch": 0.1351249258000108, "grad_norm": 0.9506240119650282, "learning_rate": 9.600587654787424e-06, "loss": 0.4962, "step": 2504 }, { "epoch": 0.13517888942852518, "grad_norm": 1.0109301091251528, "learning_rate": 9.600273381651231e-06, "loss": 0.4526, "step": 2505 }, { "epoch": 0.13523285305703955, "grad_norm": 1.1042680615407552, "learning_rate": 9.599958990669038e-06, "loss": 0.5816, "step": 2506 }, { "epoch": 0.13528681668555392, "grad_norm": 1.0954231731115147, "learning_rate": 9.599644481849882e-06, "loss": 0.494, "step": 2507 }, { "epoch": 0.13534078031406832, "grad_norm": 1.0758162220892895, "learning_rate": 9.5993298552028e-06, "loss": 0.7047, "step": 2508 }, { "epoch": 0.1353947439425827, "grad_norm": 0.8056014486717537, "learning_rate": 9.599015110736837e-06, "loss": 0.3388, "step": 2509 }, { "epoch": 0.1354487075710971, "grad_norm": 1.0099862056561784, "learning_rate": 9.598700248461037e-06, "loss": 0.5323, "step": 2510 }, { "epoch": 0.13550267119961146, "grad_norm": 1.0855549940230655, "learning_rate": 9.598385268384451e-06, "loss": 0.4472, "step": 2511 }, { "epoch": 0.13555663482812585, "grad_norm": 1.0834810297140982, "learning_rate": 9.59807017051613e-06, "loss": 0.4654, "step": 2512 }, { "epoch": 0.13561059845664022, "grad_norm": 0.9839433055733857, "learning_rate": 9.597754954865135e-06, "loss": 0.4587, "step": 2513 }, { "epoch": 0.1356645620851546, "grad_norm": 1.0058060991557032, "learning_rate": 9.597439621440518e-06, "loss": 0.4803, "step": 2514 }, { "epoch": 0.135718525713669, "grad_norm": 0.9913408751687934, "learning_rate": 9.597124170251348e-06, "loss": 0.4609, "step": 2515 }, { "epoch": 0.13577248934218336, "grad_norm": 1.1227426972215355, "learning_rate": 9.59680860130669e-06, "loss": 0.567, "step": 2516 }, { "epoch": 0.13582645297069776, "grad_norm": 0.9683478453412216, "learning_rate": 9.596492914615612e-06, "loss": 0.4141, "step": 2517 }, { "epoch": 0.13588041659921213, "grad_norm": 1.0552723142140177, "learning_rate": 9.596177110187189e-06, "loss": 0.5502, "step": 2518 }, { "epoch": 0.13593438022772653, "grad_norm": 1.141275200682066, "learning_rate": 9.595861188030497e-06, "loss": 0.6188, "step": 2519 }, { "epoch": 0.1359883438562409, "grad_norm": 0.9973201886089194, "learning_rate": 9.595545148154615e-06, "loss": 0.5388, "step": 2520 }, { "epoch": 0.13604230748475526, "grad_norm": 1.340008910568637, "learning_rate": 9.595228990568629e-06, "loss": 0.6626, "step": 2521 }, { "epoch": 0.13609627111326966, "grad_norm": 1.2491490711296749, "learning_rate": 9.594912715281624e-06, "loss": 0.5527, "step": 2522 }, { "epoch": 0.13615023474178403, "grad_norm": 0.9329716158670116, "learning_rate": 9.594596322302688e-06, "loss": 0.4582, "step": 2523 }, { "epoch": 0.13620419837029843, "grad_norm": 0.8732251108470945, "learning_rate": 9.594279811640919e-06, "loss": 0.4118, "step": 2524 }, { "epoch": 0.1362581619988128, "grad_norm": 1.1890024672951138, "learning_rate": 9.593963183305408e-06, "loss": 0.4772, "step": 2525 }, { "epoch": 0.13631212562732717, "grad_norm": 1.3836628478930972, "learning_rate": 9.59364643730526e-06, "loss": 0.4739, "step": 2526 }, { "epoch": 0.13636608925584157, "grad_norm": 1.2902259612881681, "learning_rate": 9.593329573649578e-06, "loss": 0.6125, "step": 2527 }, { "epoch": 0.13642005288435594, "grad_norm": 0.8087477486409129, "learning_rate": 9.593012592347468e-06, "loss": 0.3189, "step": 2528 }, { "epoch": 0.13647401651287033, "grad_norm": 1.1602530983014696, "learning_rate": 9.592695493408039e-06, "loss": 0.4625, "step": 2529 }, { "epoch": 0.1365279801413847, "grad_norm": 0.9521072564287097, "learning_rate": 9.592378276840406e-06, "loss": 0.3513, "step": 2530 }, { "epoch": 0.1365819437698991, "grad_norm": 1.0132130018012742, "learning_rate": 9.592060942653687e-06, "loss": 0.5234, "step": 2531 }, { "epoch": 0.13663590739841347, "grad_norm": 1.165906448192873, "learning_rate": 9.591743490857002e-06, "loss": 0.5985, "step": 2532 }, { "epoch": 0.13668987102692784, "grad_norm": 0.7884327482710408, "learning_rate": 9.591425921459475e-06, "loss": 0.3133, "step": 2533 }, { "epoch": 0.13674383465544224, "grad_norm": 0.9240095470179405, "learning_rate": 9.591108234470232e-06, "loss": 0.4745, "step": 2534 }, { "epoch": 0.1367977982839566, "grad_norm": 1.065547895464235, "learning_rate": 9.590790429898405e-06, "loss": 0.5158, "step": 2535 }, { "epoch": 0.136851761912471, "grad_norm": 0.9777372636408309, "learning_rate": 9.590472507753127e-06, "loss": 0.4382, "step": 2536 }, { "epoch": 0.13690572554098537, "grad_norm": 1.1308187633334466, "learning_rate": 9.590154468043536e-06, "loss": 0.4877, "step": 2537 }, { "epoch": 0.13695968916949977, "grad_norm": 0.7940184553753229, "learning_rate": 9.589836310778775e-06, "loss": 0.3455, "step": 2538 }, { "epoch": 0.13701365279801414, "grad_norm": 1.102230633105096, "learning_rate": 9.589518035967985e-06, "loss": 0.7786, "step": 2539 }, { "epoch": 0.1370676164265285, "grad_norm": 1.304275919149733, "learning_rate": 9.589199643620313e-06, "loss": 0.6737, "step": 2540 }, { "epoch": 0.1371215800550429, "grad_norm": 1.0210457107753166, "learning_rate": 9.588881133744912e-06, "loss": 0.4063, "step": 2541 }, { "epoch": 0.13717554368355728, "grad_norm": 1.0386254495498621, "learning_rate": 9.588562506350935e-06, "loss": 0.6125, "step": 2542 }, { "epoch": 0.13722950731207167, "grad_norm": 0.8895917731810747, "learning_rate": 9.588243761447542e-06, "loss": 0.408, "step": 2543 }, { "epoch": 0.13728347094058604, "grad_norm": 1.067530374667688, "learning_rate": 9.587924899043891e-06, "loss": 0.4107, "step": 2544 }, { "epoch": 0.13733743456910041, "grad_norm": 1.0277261987723012, "learning_rate": 9.58760591914915e-06, "loss": 0.686, "step": 2545 }, { "epoch": 0.1373913981976148, "grad_norm": 1.0365401907144827, "learning_rate": 9.587286821772482e-06, "loss": 0.5248, "step": 2546 }, { "epoch": 0.13744536182612918, "grad_norm": 1.0368307252113131, "learning_rate": 9.586967606923064e-06, "loss": 0.393, "step": 2547 }, { "epoch": 0.13749932545464358, "grad_norm": 1.0273634170984998, "learning_rate": 9.586648274610064e-06, "loss": 0.3996, "step": 2548 }, { "epoch": 0.13755328908315795, "grad_norm": 1.2538092172866357, "learning_rate": 9.586328824842666e-06, "loss": 0.6681, "step": 2549 }, { "epoch": 0.13760725271167235, "grad_norm": 1.302772807289807, "learning_rate": 9.586009257630047e-06, "loss": 0.4649, "step": 2550 }, { "epoch": 0.13766121634018672, "grad_norm": 1.1219474209095346, "learning_rate": 9.585689572981394e-06, "loss": 0.4485, "step": 2551 }, { "epoch": 0.13771517996870108, "grad_norm": 1.0831766812714303, "learning_rate": 9.585369770905893e-06, "loss": 0.6332, "step": 2552 }, { "epoch": 0.13776914359721548, "grad_norm": 1.0076020834204702, "learning_rate": 9.585049851412739e-06, "loss": 0.5317, "step": 2553 }, { "epoch": 0.13782310722572985, "grad_norm": 1.0280097172550269, "learning_rate": 9.584729814511123e-06, "loss": 0.4816, "step": 2554 }, { "epoch": 0.13787707085424425, "grad_norm": 1.1147073858086427, "learning_rate": 9.584409660210246e-06, "loss": 0.4072, "step": 2555 }, { "epoch": 0.13793103448275862, "grad_norm": 1.1536095536285746, "learning_rate": 9.584089388519307e-06, "loss": 0.5672, "step": 2556 }, { "epoch": 0.137984998111273, "grad_norm": 1.3147134998710537, "learning_rate": 9.583768999447514e-06, "loss": 0.6541, "step": 2557 }, { "epoch": 0.13803896173978739, "grad_norm": 0.7982761535559738, "learning_rate": 9.583448493004072e-06, "loss": 0.3368, "step": 2558 }, { "epoch": 0.13809292536830176, "grad_norm": 1.0077510203567732, "learning_rate": 9.583127869198196e-06, "loss": 0.4421, "step": 2559 }, { "epoch": 0.13814688899681615, "grad_norm": 1.3678041966263175, "learning_rate": 9.5828071280391e-06, "loss": 0.5334, "step": 2560 }, { "epoch": 0.13820085262533052, "grad_norm": 1.1255253080048273, "learning_rate": 9.582486269535998e-06, "loss": 0.5211, "step": 2561 }, { "epoch": 0.13825481625384492, "grad_norm": 1.0549558665271186, "learning_rate": 9.582165293698117e-06, "loss": 0.6559, "step": 2562 }, { "epoch": 0.1383087798823593, "grad_norm": 1.0888693478731601, "learning_rate": 9.581844200534682e-06, "loss": 0.5551, "step": 2563 }, { "epoch": 0.13836274351087366, "grad_norm": 1.016183020598562, "learning_rate": 9.58152299005492e-06, "loss": 0.4505, "step": 2564 }, { "epoch": 0.13841670713938806, "grad_norm": 1.110625805188764, "learning_rate": 9.581201662268063e-06, "loss": 0.4914, "step": 2565 }, { "epoch": 0.13847067076790243, "grad_norm": 0.9435012255574498, "learning_rate": 9.580880217183346e-06, "loss": 0.4038, "step": 2566 }, { "epoch": 0.13852463439641682, "grad_norm": 1.085540641750587, "learning_rate": 9.580558654810009e-06, "loss": 0.444, "step": 2567 }, { "epoch": 0.1385785980249312, "grad_norm": 0.8813751323089457, "learning_rate": 9.580236975157292e-06, "loss": 0.3689, "step": 2568 }, { "epoch": 0.1386325616534456, "grad_norm": 0.820317559867008, "learning_rate": 9.579915178234442e-06, "loss": 0.4265, "step": 2569 }, { "epoch": 0.13868652528195996, "grad_norm": 0.8693620741878239, "learning_rate": 9.579593264050708e-06, "loss": 0.3753, "step": 2570 }, { "epoch": 0.13874048891047433, "grad_norm": 1.2816812160027693, "learning_rate": 9.57927123261534e-06, "loss": 0.6314, "step": 2571 }, { "epoch": 0.13879445253898873, "grad_norm": 0.9904263251854922, "learning_rate": 9.578949083937597e-06, "loss": 0.4331, "step": 2572 }, { "epoch": 0.1388484161675031, "grad_norm": 0.9953179851933146, "learning_rate": 9.578626818026734e-06, "loss": 0.476, "step": 2573 }, { "epoch": 0.1389023797960175, "grad_norm": 1.1534774527531024, "learning_rate": 9.578304434892015e-06, "loss": 0.4492, "step": 2574 }, { "epoch": 0.13895634342453186, "grad_norm": 1.0394417402054497, "learning_rate": 9.577981934542708e-06, "loss": 0.5753, "step": 2575 }, { "epoch": 0.13901030705304623, "grad_norm": 1.1484559018496852, "learning_rate": 9.577659316988078e-06, "loss": 0.5677, "step": 2576 }, { "epoch": 0.13906427068156063, "grad_norm": 1.009247268310256, "learning_rate": 9.5773365822374e-06, "loss": 0.5041, "step": 2577 }, { "epoch": 0.139118234310075, "grad_norm": 1.072964181643425, "learning_rate": 9.57701373029995e-06, "loss": 0.4502, "step": 2578 }, { "epoch": 0.1391721979385894, "grad_norm": 0.9549320934435183, "learning_rate": 9.576690761185003e-06, "loss": 0.4482, "step": 2579 }, { "epoch": 0.13922616156710377, "grad_norm": 1.1089248740062063, "learning_rate": 9.576367674901844e-06, "loss": 0.6669, "step": 2580 }, { "epoch": 0.13928012519561817, "grad_norm": 1.0193315557908955, "learning_rate": 9.57604447145976e-06, "loss": 0.5047, "step": 2581 }, { "epoch": 0.13933408882413253, "grad_norm": 0.9607659260039592, "learning_rate": 9.57572115086804e-06, "loss": 0.4033, "step": 2582 }, { "epoch": 0.1393880524526469, "grad_norm": 1.14485580605853, "learning_rate": 9.575397713135976e-06, "loss": 0.4549, "step": 2583 }, { "epoch": 0.1394420160811613, "grad_norm": 1.0256920449242288, "learning_rate": 9.575074158272863e-06, "loss": 0.5008, "step": 2584 }, { "epoch": 0.13949597970967567, "grad_norm": 0.9360501234966311, "learning_rate": 9.574750486288002e-06, "loss": 0.4252, "step": 2585 }, { "epoch": 0.13954994333819007, "grad_norm": 1.2979261413733383, "learning_rate": 9.574426697190693e-06, "loss": 0.5747, "step": 2586 }, { "epoch": 0.13960390696670444, "grad_norm": 1.0418664474978445, "learning_rate": 9.574102790990248e-06, "loss": 0.4653, "step": 2587 }, { "epoch": 0.13965787059521884, "grad_norm": 1.045132092266482, "learning_rate": 9.57377876769597e-06, "loss": 0.4415, "step": 2588 }, { "epoch": 0.1397118342237332, "grad_norm": 0.9916717423858653, "learning_rate": 9.573454627317173e-06, "loss": 0.314, "step": 2589 }, { "epoch": 0.13976579785224758, "grad_norm": 0.9970491284596702, "learning_rate": 9.573130369863174e-06, "loss": 0.4225, "step": 2590 }, { "epoch": 0.13981976148076197, "grad_norm": 1.0390133110786588, "learning_rate": 9.572805995343295e-06, "loss": 0.4093, "step": 2591 }, { "epoch": 0.13987372510927634, "grad_norm": 0.9727906022994752, "learning_rate": 9.572481503766853e-06, "loss": 0.4695, "step": 2592 }, { "epoch": 0.13992768873779074, "grad_norm": 0.9022336383234881, "learning_rate": 9.57215689514318e-06, "loss": 0.4672, "step": 2593 }, { "epoch": 0.1399816523663051, "grad_norm": 1.0385106267425408, "learning_rate": 9.571832169481602e-06, "loss": 0.5557, "step": 2594 }, { "epoch": 0.14003561599481948, "grad_norm": 0.9654756810000235, "learning_rate": 9.571507326791453e-06, "loss": 0.4752, "step": 2595 }, { "epoch": 0.14008957962333388, "grad_norm": 1.1548396564453938, "learning_rate": 9.57118236708207e-06, "loss": 0.4955, "step": 2596 }, { "epoch": 0.14014354325184825, "grad_norm": 1.186801450845665, "learning_rate": 9.570857290362794e-06, "loss": 0.5206, "step": 2597 }, { "epoch": 0.14019750688036264, "grad_norm": 0.8664592693871139, "learning_rate": 9.570532096642963e-06, "loss": 0.3991, "step": 2598 }, { "epoch": 0.140251470508877, "grad_norm": 0.969692784429654, "learning_rate": 9.570206785931927e-06, "loss": 0.4325, "step": 2599 }, { "epoch": 0.1403054341373914, "grad_norm": 1.0492571386861889, "learning_rate": 9.569881358239037e-06, "loss": 0.5411, "step": 2600 }, { "epoch": 0.14035939776590578, "grad_norm": 1.0060693669054226, "learning_rate": 9.569555813573644e-06, "loss": 0.5672, "step": 2601 }, { "epoch": 0.14041336139442015, "grad_norm": 1.143113838901137, "learning_rate": 9.569230151945105e-06, "loss": 0.5534, "step": 2602 }, { "epoch": 0.14046732502293455, "grad_norm": 1.087133662747898, "learning_rate": 9.568904373362779e-06, "loss": 0.4858, "step": 2603 }, { "epoch": 0.14052128865144892, "grad_norm": 1.1522554951874138, "learning_rate": 9.56857847783603e-06, "loss": 0.5182, "step": 2604 }, { "epoch": 0.14057525227996331, "grad_norm": 1.0480853831318047, "learning_rate": 9.568252465374224e-06, "loss": 0.506, "step": 2605 }, { "epoch": 0.14062921590847768, "grad_norm": 0.9186438405095925, "learning_rate": 9.567926335986733e-06, "loss": 0.4158, "step": 2606 }, { "epoch": 0.14068317953699205, "grad_norm": 0.8678569034327193, "learning_rate": 9.567600089682928e-06, "loss": 0.3834, "step": 2607 }, { "epoch": 0.14073714316550645, "grad_norm": 1.091831426426634, "learning_rate": 9.567273726472187e-06, "loss": 0.5875, "step": 2608 }, { "epoch": 0.14079110679402082, "grad_norm": 0.8421450490830088, "learning_rate": 9.566947246363888e-06, "loss": 0.4066, "step": 2609 }, { "epoch": 0.14084507042253522, "grad_norm": 0.9666356438661695, "learning_rate": 9.566620649367418e-06, "loss": 0.4077, "step": 2610 }, { "epoch": 0.1408990340510496, "grad_norm": 1.2187214965749216, "learning_rate": 9.566293935492162e-06, "loss": 0.8097, "step": 2611 }, { "epoch": 0.14095299767956398, "grad_norm": 0.9571714197283464, "learning_rate": 9.565967104747507e-06, "loss": 0.3968, "step": 2612 }, { "epoch": 0.14100696130807835, "grad_norm": 1.028097354324081, "learning_rate": 9.56564015714285e-06, "loss": 0.5724, "step": 2613 }, { "epoch": 0.14106092493659272, "grad_norm": 1.1586571950425648, "learning_rate": 9.565313092687587e-06, "loss": 0.5813, "step": 2614 }, { "epoch": 0.14111488856510712, "grad_norm": 1.0418364353844511, "learning_rate": 9.564985911391117e-06, "loss": 0.4542, "step": 2615 }, { "epoch": 0.1411688521936215, "grad_norm": 1.0598796307897886, "learning_rate": 9.564658613262845e-06, "loss": 0.5188, "step": 2616 }, { "epoch": 0.1412228158221359, "grad_norm": 1.133300172474042, "learning_rate": 9.564331198312177e-06, "loss": 0.4732, "step": 2617 }, { "epoch": 0.14127677945065026, "grad_norm": 1.0259005484867296, "learning_rate": 9.564003666548526e-06, "loss": 0.5847, "step": 2618 }, { "epoch": 0.14133074307916466, "grad_norm": 0.7893556267682196, "learning_rate": 9.563676017981302e-06, "loss": 0.334, "step": 2619 }, { "epoch": 0.14138470670767903, "grad_norm": 1.070962047424158, "learning_rate": 9.563348252619923e-06, "loss": 0.5642, "step": 2620 }, { "epoch": 0.1414386703361934, "grad_norm": 1.0480827546547675, "learning_rate": 9.56302037047381e-06, "loss": 0.4962, "step": 2621 }, { "epoch": 0.1414926339647078, "grad_norm": 1.1513529929024682, "learning_rate": 9.562692371552385e-06, "loss": 0.5157, "step": 2622 }, { "epoch": 0.14154659759322216, "grad_norm": 1.0422364332615788, "learning_rate": 9.562364255865075e-06, "loss": 0.5357, "step": 2623 }, { "epoch": 0.14160056122173656, "grad_norm": 0.9789437352776942, "learning_rate": 9.562036023421314e-06, "loss": 0.454, "step": 2624 }, { "epoch": 0.14165452485025093, "grad_norm": 1.2250638756945975, "learning_rate": 9.561707674230534e-06, "loss": 0.5736, "step": 2625 }, { "epoch": 0.1417084884787653, "grad_norm": 1.0655496105126427, "learning_rate": 9.56137920830217e-06, "loss": 0.514, "step": 2626 }, { "epoch": 0.1417624521072797, "grad_norm": 0.9673476574120283, "learning_rate": 9.561050625645664e-06, "loss": 0.4301, "step": 2627 }, { "epoch": 0.14181641573579407, "grad_norm": 1.0647956149616595, "learning_rate": 9.56072192627046e-06, "loss": 0.475, "step": 2628 }, { "epoch": 0.14187037936430846, "grad_norm": 0.9021195591978254, "learning_rate": 9.560393110186003e-06, "loss": 0.4324, "step": 2629 }, { "epoch": 0.14192434299282283, "grad_norm": 1.0802933818558718, "learning_rate": 9.560064177401747e-06, "loss": 0.6331, "step": 2630 }, { "epoch": 0.14197830662133723, "grad_norm": 1.2837754742114944, "learning_rate": 9.559735127927144e-06, "loss": 0.6139, "step": 2631 }, { "epoch": 0.1420322702498516, "grad_norm": 0.9536782547287448, "learning_rate": 9.55940596177165e-06, "loss": 0.4511, "step": 2632 }, { "epoch": 0.14208623387836597, "grad_norm": 1.11044933185833, "learning_rate": 9.559076678944728e-06, "loss": 0.5647, "step": 2633 }, { "epoch": 0.14214019750688037, "grad_norm": 0.8873046372163033, "learning_rate": 9.558747279455842e-06, "loss": 0.4095, "step": 2634 }, { "epoch": 0.14219416113539474, "grad_norm": 1.199580699078788, "learning_rate": 9.558417763314457e-06, "loss": 0.6209, "step": 2635 }, { "epoch": 0.14224812476390913, "grad_norm": 0.8584095858866477, "learning_rate": 9.558088130530044e-06, "loss": 0.3892, "step": 2636 }, { "epoch": 0.1423020883924235, "grad_norm": 1.1019214451448518, "learning_rate": 9.557758381112078e-06, "loss": 0.4975, "step": 2637 }, { "epoch": 0.1423560520209379, "grad_norm": 1.2253871231919182, "learning_rate": 9.557428515070038e-06, "loss": 0.5796, "step": 2638 }, { "epoch": 0.14241001564945227, "grad_norm": 1.0227896510673953, "learning_rate": 9.557098532413401e-06, "loss": 0.4383, "step": 2639 }, { "epoch": 0.14246397927796664, "grad_norm": 1.0502765388406958, "learning_rate": 9.556768433151653e-06, "loss": 0.4901, "step": 2640 }, { "epoch": 0.14251794290648104, "grad_norm": 0.9921377963357235, "learning_rate": 9.556438217294281e-06, "loss": 0.4664, "step": 2641 }, { "epoch": 0.1425719065349954, "grad_norm": 1.0081820587697776, "learning_rate": 9.556107884850775e-06, "loss": 0.5231, "step": 2642 }, { "epoch": 0.1426258701635098, "grad_norm": 1.2791367590435578, "learning_rate": 9.555777435830631e-06, "loss": 0.4995, "step": 2643 }, { "epoch": 0.14267983379202417, "grad_norm": 0.9611821180144038, "learning_rate": 9.555446870243347e-06, "loss": 0.3968, "step": 2644 }, { "epoch": 0.14273379742053854, "grad_norm": 1.0454212039989443, "learning_rate": 9.55511618809842e-06, "loss": 0.5337, "step": 2645 }, { "epoch": 0.14278776104905294, "grad_norm": 0.8810747894593064, "learning_rate": 9.554785389405358e-06, "loss": 0.3838, "step": 2646 }, { "epoch": 0.1428417246775673, "grad_norm": 0.8947913990143506, "learning_rate": 9.554454474173666e-06, "loss": 0.3778, "step": 2647 }, { "epoch": 0.1428956883060817, "grad_norm": 1.0221665550766166, "learning_rate": 9.554123442412854e-06, "loss": 0.4618, "step": 2648 }, { "epoch": 0.14294965193459608, "grad_norm": 1.038492341392835, "learning_rate": 9.55379229413244e-06, "loss": 0.5345, "step": 2649 }, { "epoch": 0.14300361556311048, "grad_norm": 1.0760012700412676, "learning_rate": 9.553461029341939e-06, "loss": 0.5385, "step": 2650 }, { "epoch": 0.14305757919162485, "grad_norm": 1.16707773826301, "learning_rate": 9.553129648050874e-06, "loss": 0.6774, "step": 2651 }, { "epoch": 0.14311154282013921, "grad_norm": 1.0638068705902184, "learning_rate": 9.552798150268765e-06, "loss": 0.6202, "step": 2652 }, { "epoch": 0.1431655064486536, "grad_norm": 1.1093648067862005, "learning_rate": 9.552466536005142e-06, "loss": 0.6886, "step": 2653 }, { "epoch": 0.14321947007716798, "grad_norm": 0.8265117177854048, "learning_rate": 9.552134805269536e-06, "loss": 0.3481, "step": 2654 }, { "epoch": 0.14327343370568238, "grad_norm": 1.0110327824272152, "learning_rate": 9.551802958071483e-06, "loss": 0.4439, "step": 2655 }, { "epoch": 0.14332739733419675, "grad_norm": 0.8972082467465926, "learning_rate": 9.551470994420517e-06, "loss": 0.4069, "step": 2656 }, { "epoch": 0.14338136096271115, "grad_norm": 0.927206713277317, "learning_rate": 9.551138914326184e-06, "loss": 0.426, "step": 2657 }, { "epoch": 0.14343532459122552, "grad_norm": 0.8414110153102673, "learning_rate": 9.550806717798022e-06, "loss": 0.3912, "step": 2658 }, { "epoch": 0.14348928821973989, "grad_norm": 1.1585750829838957, "learning_rate": 9.550474404845584e-06, "loss": 0.4648, "step": 2659 }, { "epoch": 0.14354325184825428, "grad_norm": 1.1449548531018088, "learning_rate": 9.550141975478418e-06, "loss": 0.5446, "step": 2660 }, { "epoch": 0.14359721547676865, "grad_norm": 0.8992677296285637, "learning_rate": 9.54980942970608e-06, "loss": 0.3789, "step": 2661 }, { "epoch": 0.14365117910528305, "grad_norm": 1.152628082428434, "learning_rate": 9.549476767538126e-06, "loss": 0.6134, "step": 2662 }, { "epoch": 0.14370514273379742, "grad_norm": 0.9587653189423988, "learning_rate": 9.549143988984118e-06, "loss": 0.404, "step": 2663 }, { "epoch": 0.1437591063623118, "grad_norm": 1.0115456045337243, "learning_rate": 9.548811094053621e-06, "loss": 0.6834, "step": 2664 }, { "epoch": 0.1438130699908262, "grad_norm": 1.1208233546789934, "learning_rate": 9.548478082756201e-06, "loss": 0.5224, "step": 2665 }, { "epoch": 0.14386703361934056, "grad_norm": 1.142640644520483, "learning_rate": 9.548144955101432e-06, "loss": 0.4811, "step": 2666 }, { "epoch": 0.14392099724785495, "grad_norm": 1.0974440880571412, "learning_rate": 9.547811711098884e-06, "loss": 0.5431, "step": 2667 }, { "epoch": 0.14397496087636932, "grad_norm": 1.09323246984495, "learning_rate": 9.547478350758139e-06, "loss": 0.5424, "step": 2668 }, { "epoch": 0.14402892450488372, "grad_norm": 0.9954539985184983, "learning_rate": 9.547144874088778e-06, "loss": 0.4651, "step": 2669 }, { "epoch": 0.1440828881333981, "grad_norm": 1.1383665457547127, "learning_rate": 9.546811281100382e-06, "loss": 0.5504, "step": 2670 }, { "epoch": 0.14413685176191246, "grad_norm": 1.3613893937646355, "learning_rate": 9.546477571802543e-06, "loss": 0.6844, "step": 2671 }, { "epoch": 0.14419081539042686, "grad_norm": 1.0474214759758598, "learning_rate": 9.546143746204846e-06, "loss": 0.3626, "step": 2672 }, { "epoch": 0.14424477901894123, "grad_norm": 1.0865055636534702, "learning_rate": 9.545809804316892e-06, "loss": 0.6026, "step": 2673 }, { "epoch": 0.14429874264745562, "grad_norm": 1.1863800958005426, "learning_rate": 9.545475746148276e-06, "loss": 0.5424, "step": 2674 }, { "epoch": 0.14435270627597, "grad_norm": 1.0058743943550288, "learning_rate": 9.5451415717086e-06, "loss": 0.6409, "step": 2675 }, { "epoch": 0.14440666990448436, "grad_norm": 1.0251103526293064, "learning_rate": 9.544807281007468e-06, "loss": 0.4847, "step": 2676 }, { "epoch": 0.14446063353299876, "grad_norm": 0.9777966961140083, "learning_rate": 9.544472874054486e-06, "loss": 0.3921, "step": 2677 }, { "epoch": 0.14451459716151313, "grad_norm": 1.099678643169673, "learning_rate": 9.544138350859268e-06, "loss": 0.6029, "step": 2678 }, { "epoch": 0.14456856079002753, "grad_norm": 1.0037004095857347, "learning_rate": 9.543803711431426e-06, "loss": 0.4527, "step": 2679 }, { "epoch": 0.1446225244185419, "grad_norm": 1.437348677265869, "learning_rate": 9.543468955780582e-06, "loss": 0.5544, "step": 2680 }, { "epoch": 0.1446764880470563, "grad_norm": 1.0707494506682007, "learning_rate": 9.543134083916352e-06, "loss": 0.5283, "step": 2681 }, { "epoch": 0.14473045167557066, "grad_norm": 0.9294263595550656, "learning_rate": 9.542799095848365e-06, "loss": 0.4903, "step": 2682 }, { "epoch": 0.14478441530408503, "grad_norm": 0.9520122378533792, "learning_rate": 9.542463991586248e-06, "loss": 0.4262, "step": 2683 }, { "epoch": 0.14483837893259943, "grad_norm": 1.122554724878602, "learning_rate": 9.54212877113963e-06, "loss": 0.4245, "step": 2684 }, { "epoch": 0.1448923425611138, "grad_norm": 1.133346608105541, "learning_rate": 9.541793434518146e-06, "loss": 0.4565, "step": 2685 }, { "epoch": 0.1449463061896282, "grad_norm": 1.0526754529488411, "learning_rate": 9.541457981731434e-06, "loss": 0.4691, "step": 2686 }, { "epoch": 0.14500026981814257, "grad_norm": 1.208048125468102, "learning_rate": 9.54112241278914e-06, "loss": 0.4247, "step": 2687 }, { "epoch": 0.14505423344665697, "grad_norm": 1.0733951365423606, "learning_rate": 9.540786727700901e-06, "loss": 0.6435, "step": 2688 }, { "epoch": 0.14510819707517134, "grad_norm": 1.1257436806259096, "learning_rate": 9.540450926476368e-06, "loss": 0.492, "step": 2689 }, { "epoch": 0.1451621607036857, "grad_norm": 1.006434784909184, "learning_rate": 9.540115009125196e-06, "loss": 0.4799, "step": 2690 }, { "epoch": 0.1452161243322001, "grad_norm": 0.9722214088182579, "learning_rate": 9.539778975657034e-06, "loss": 0.4375, "step": 2691 }, { "epoch": 0.14527008796071447, "grad_norm": 1.1070342600110088, "learning_rate": 9.539442826081544e-06, "loss": 0.6341, "step": 2692 }, { "epoch": 0.14532405158922887, "grad_norm": 1.132535510797178, "learning_rate": 9.539106560408383e-06, "loss": 0.5306, "step": 2693 }, { "epoch": 0.14537801521774324, "grad_norm": 1.195577648501966, "learning_rate": 9.538770178647221e-06, "loss": 0.5398, "step": 2694 }, { "epoch": 0.1454319788462576, "grad_norm": 0.9729369863913087, "learning_rate": 9.538433680807722e-06, "loss": 0.4369, "step": 2695 }, { "epoch": 0.145485942474772, "grad_norm": 1.039248128363371, "learning_rate": 9.53809706689956e-06, "loss": 0.5098, "step": 2696 }, { "epoch": 0.14553990610328638, "grad_norm": 1.0501415495755502, "learning_rate": 9.537760336932406e-06, "loss": 0.7323, "step": 2697 }, { "epoch": 0.14559386973180077, "grad_norm": 1.0263063462553053, "learning_rate": 9.53742349091594e-06, "loss": 0.5112, "step": 2698 }, { "epoch": 0.14564783336031514, "grad_norm": 1.2392996840627537, "learning_rate": 9.537086528859844e-06, "loss": 0.5922, "step": 2699 }, { "epoch": 0.14570179698882954, "grad_norm": 1.157408317157222, "learning_rate": 9.536749450773803e-06, "loss": 0.5631, "step": 2700 }, { "epoch": 0.1457557606173439, "grad_norm": 1.0618491489102444, "learning_rate": 9.536412256667502e-06, "loss": 0.5086, "step": 2701 }, { "epoch": 0.14580972424585828, "grad_norm": 1.1903306678334087, "learning_rate": 9.536074946550637e-06, "loss": 0.5743, "step": 2702 }, { "epoch": 0.14586368787437268, "grad_norm": 0.9564412282544611, "learning_rate": 9.535737520432898e-06, "loss": 0.4714, "step": 2703 }, { "epoch": 0.14591765150288705, "grad_norm": 1.0997757318815997, "learning_rate": 9.535399978323983e-06, "loss": 0.5966, "step": 2704 }, { "epoch": 0.14597161513140144, "grad_norm": 1.1125981078194969, "learning_rate": 9.535062320233598e-06, "loss": 0.5394, "step": 2705 }, { "epoch": 0.14602557875991581, "grad_norm": 1.026991788916191, "learning_rate": 9.534724546171443e-06, "loss": 0.4997, "step": 2706 }, { "epoch": 0.1460795423884302, "grad_norm": 1.4259261434919575, "learning_rate": 9.53438665614723e-06, "loss": 0.4821, "step": 2707 }, { "epoch": 0.14613350601694458, "grad_norm": 1.1792280050864627, "learning_rate": 9.534048650170666e-06, "loss": 0.5849, "step": 2708 }, { "epoch": 0.14618746964545895, "grad_norm": 1.1801925982652528, "learning_rate": 9.533710528251468e-06, "loss": 0.5223, "step": 2709 }, { "epoch": 0.14624143327397335, "grad_norm": 0.8929005822487396, "learning_rate": 9.533372290399355e-06, "loss": 0.4584, "step": 2710 }, { "epoch": 0.14629539690248772, "grad_norm": 0.9546866661356682, "learning_rate": 9.533033936624046e-06, "loss": 0.5108, "step": 2711 }, { "epoch": 0.14634936053100212, "grad_norm": 0.8599338529564818, "learning_rate": 9.532695466935268e-06, "loss": 0.3805, "step": 2712 }, { "epoch": 0.14640332415951648, "grad_norm": 1.2149036956078132, "learning_rate": 9.532356881342745e-06, "loss": 0.5312, "step": 2713 }, { "epoch": 0.14645728778803085, "grad_norm": 1.1915843959350059, "learning_rate": 9.532018179856212e-06, "loss": 0.5733, "step": 2714 }, { "epoch": 0.14651125141654525, "grad_norm": 1.024591019475785, "learning_rate": 9.531679362485403e-06, "loss": 0.4889, "step": 2715 }, { "epoch": 0.14656521504505962, "grad_norm": 1.0559808657112495, "learning_rate": 9.531340429240054e-06, "loss": 0.5174, "step": 2716 }, { "epoch": 0.14661917867357402, "grad_norm": 0.9567789831665745, "learning_rate": 9.531001380129908e-06, "loss": 0.5371, "step": 2717 }, { "epoch": 0.1466731423020884, "grad_norm": 0.8765303698997371, "learning_rate": 9.530662215164711e-06, "loss": 0.4249, "step": 2718 }, { "epoch": 0.14672710593060279, "grad_norm": 0.8809714435190494, "learning_rate": 9.530322934354207e-06, "loss": 0.4663, "step": 2719 }, { "epoch": 0.14678106955911716, "grad_norm": 0.9975744852914546, "learning_rate": 9.529983537708152e-06, "loss": 0.5255, "step": 2720 }, { "epoch": 0.14683503318763153, "grad_norm": 1.168151205435896, "learning_rate": 9.529644025236296e-06, "loss": 0.4823, "step": 2721 }, { "epoch": 0.14688899681614592, "grad_norm": 0.910044789382668, "learning_rate": 9.5293043969484e-06, "loss": 0.4541, "step": 2722 }, { "epoch": 0.1469429604446603, "grad_norm": 1.1770842834344528, "learning_rate": 9.528964652854225e-06, "loss": 0.5935, "step": 2723 }, { "epoch": 0.1469969240731747, "grad_norm": 1.0066274310321626, "learning_rate": 9.528624792963532e-06, "loss": 0.4019, "step": 2724 }, { "epoch": 0.14705088770168906, "grad_norm": 1.0085593712731897, "learning_rate": 9.528284817286095e-06, "loss": 0.5454, "step": 2725 }, { "epoch": 0.14710485133020346, "grad_norm": 0.8915287296790081, "learning_rate": 9.52794472583168e-06, "loss": 0.4547, "step": 2726 }, { "epoch": 0.14715881495871783, "grad_norm": 1.0002889739111178, "learning_rate": 9.527604518610065e-06, "loss": 0.5321, "step": 2727 }, { "epoch": 0.1472127785872322, "grad_norm": 1.1038141629037825, "learning_rate": 9.527264195631027e-06, "loss": 0.5067, "step": 2728 }, { "epoch": 0.1472667422157466, "grad_norm": 1.196890746349286, "learning_rate": 9.526923756904345e-06, "loss": 0.5626, "step": 2729 }, { "epoch": 0.14732070584426096, "grad_norm": 1.0503680774394206, "learning_rate": 9.526583202439807e-06, "loss": 0.5281, "step": 2730 }, { "epoch": 0.14737466947277536, "grad_norm": 1.1897915063209588, "learning_rate": 9.526242532247198e-06, "loss": 0.5781, "step": 2731 }, { "epoch": 0.14742863310128973, "grad_norm": 0.8366426721531985, "learning_rate": 9.525901746336311e-06, "loss": 0.5152, "step": 2732 }, { "epoch": 0.1474825967298041, "grad_norm": 1.12820813958084, "learning_rate": 9.525560844716939e-06, "loss": 0.675, "step": 2733 }, { "epoch": 0.1475365603583185, "grad_norm": 1.310594039995791, "learning_rate": 9.525219827398883e-06, "loss": 0.6133, "step": 2734 }, { "epoch": 0.14759052398683287, "grad_norm": 1.0766343103037874, "learning_rate": 9.524878694391941e-06, "loss": 0.4979, "step": 2735 }, { "epoch": 0.14764448761534726, "grad_norm": 0.7882782885328782, "learning_rate": 9.524537445705916e-06, "loss": 0.3136, "step": 2736 }, { "epoch": 0.14769845124386163, "grad_norm": 1.3486679751539103, "learning_rate": 9.52419608135062e-06, "loss": 0.6084, "step": 2737 }, { "epoch": 0.14775241487237603, "grad_norm": 1.0556444452364526, "learning_rate": 9.523854601335863e-06, "loss": 0.6846, "step": 2738 }, { "epoch": 0.1478063785008904, "grad_norm": 1.3341109686927861, "learning_rate": 9.523513005671458e-06, "loss": 0.5441, "step": 2739 }, { "epoch": 0.14786034212940477, "grad_norm": 1.0711440183458931, "learning_rate": 9.523171294367227e-06, "loss": 0.5149, "step": 2740 }, { "epoch": 0.14791430575791917, "grad_norm": 1.0223612217807727, "learning_rate": 9.522829467432983e-06, "loss": 0.5387, "step": 2741 }, { "epoch": 0.14796826938643354, "grad_norm": 1.0822431884422958, "learning_rate": 9.522487524878559e-06, "loss": 0.5001, "step": 2742 }, { "epoch": 0.14802223301494793, "grad_norm": 1.2800366587387562, "learning_rate": 9.522145466713776e-06, "loss": 0.5449, "step": 2743 }, { "epoch": 0.1480761966434623, "grad_norm": 1.1838853836813952, "learning_rate": 9.521803292948472e-06, "loss": 0.6643, "step": 2744 }, { "epoch": 0.14813016027197667, "grad_norm": 0.9141166364874875, "learning_rate": 9.521461003592475e-06, "loss": 0.4307, "step": 2745 }, { "epoch": 0.14818412390049107, "grad_norm": 1.1705889006255623, "learning_rate": 9.521118598655625e-06, "loss": 0.597, "step": 2746 }, { "epoch": 0.14823808752900544, "grad_norm": 0.7804647602362148, "learning_rate": 9.520776078147764e-06, "loss": 0.3506, "step": 2747 }, { "epoch": 0.14829205115751984, "grad_norm": 0.8681293886515719, "learning_rate": 9.520433442078736e-06, "loss": 0.4139, "step": 2748 }, { "epoch": 0.1483460147860342, "grad_norm": 1.4558325911129746, "learning_rate": 9.52009069045839e-06, "loss": 0.7303, "step": 2749 }, { "epoch": 0.1483999784145486, "grad_norm": 1.3023893387614927, "learning_rate": 9.519747823296575e-06, "loss": 0.6067, "step": 2750 }, { "epoch": 0.14845394204306298, "grad_norm": 0.8390630174981729, "learning_rate": 9.519404840603144e-06, "loss": 0.4123, "step": 2751 }, { "epoch": 0.14850790567157734, "grad_norm": 1.0628666991988718, "learning_rate": 9.519061742387958e-06, "loss": 0.5666, "step": 2752 }, { "epoch": 0.14856186930009174, "grad_norm": 1.2775771207586568, "learning_rate": 9.518718528660876e-06, "loss": 0.5672, "step": 2753 }, { "epoch": 0.1486158329286061, "grad_norm": 1.2759656925981926, "learning_rate": 9.518375199431765e-06, "loss": 0.5179, "step": 2754 }, { "epoch": 0.1486697965571205, "grad_norm": 0.744565339672643, "learning_rate": 9.518031754710487e-06, "loss": 0.3222, "step": 2755 }, { "epoch": 0.14872376018563488, "grad_norm": 1.267383341314063, "learning_rate": 9.51768819450692e-06, "loss": 0.4921, "step": 2756 }, { "epoch": 0.14877772381414928, "grad_norm": 1.3700116211689632, "learning_rate": 9.517344518830933e-06, "loss": 0.6167, "step": 2757 }, { "epoch": 0.14883168744266365, "grad_norm": 1.0318331241769707, "learning_rate": 9.517000727692404e-06, "loss": 0.4929, "step": 2758 }, { "epoch": 0.14888565107117802, "grad_norm": 1.0691213584611883, "learning_rate": 9.516656821101216e-06, "loss": 0.4211, "step": 2759 }, { "epoch": 0.1489396146996924, "grad_norm": 0.872281370383322, "learning_rate": 9.516312799067252e-06, "loss": 0.3697, "step": 2760 }, { "epoch": 0.14899357832820678, "grad_norm": 1.0177898649819634, "learning_rate": 9.5159686616004e-06, "loss": 0.5554, "step": 2761 }, { "epoch": 0.14904754195672118, "grad_norm": 0.9670687956150409, "learning_rate": 9.515624408710553e-06, "loss": 0.4102, "step": 2762 }, { "epoch": 0.14910150558523555, "grad_norm": 1.3021185340492363, "learning_rate": 9.5152800404076e-06, "loss": 0.6789, "step": 2763 }, { "epoch": 0.14915546921374992, "grad_norm": 0.9597878396484694, "learning_rate": 9.514935556701444e-06, "loss": 0.4563, "step": 2764 }, { "epoch": 0.14920943284226432, "grad_norm": 1.0462466572931999, "learning_rate": 9.514590957601982e-06, "loss": 0.5444, "step": 2765 }, { "epoch": 0.1492633964707787, "grad_norm": 0.9962500727661605, "learning_rate": 9.51424624311912e-06, "loss": 0.4202, "step": 2766 }, { "epoch": 0.14931736009929308, "grad_norm": 1.1550706051308917, "learning_rate": 9.513901413262765e-06, "loss": 0.5264, "step": 2767 }, { "epoch": 0.14937132372780745, "grad_norm": 1.068799516294129, "learning_rate": 9.513556468042827e-06, "loss": 0.4578, "step": 2768 }, { "epoch": 0.14942528735632185, "grad_norm": 1.2645756935412595, "learning_rate": 9.51321140746922e-06, "loss": 0.6239, "step": 2769 }, { "epoch": 0.14947925098483622, "grad_norm": 1.244606082613979, "learning_rate": 9.512866231551864e-06, "loss": 0.5557, "step": 2770 }, { "epoch": 0.1495332146133506, "grad_norm": 1.1317309432732894, "learning_rate": 9.512520940300677e-06, "loss": 0.5896, "step": 2771 }, { "epoch": 0.149587178241865, "grad_norm": 1.1140591289980817, "learning_rate": 9.512175533725584e-06, "loss": 0.4529, "step": 2772 }, { "epoch": 0.14964114187037936, "grad_norm": 1.0242381038144122, "learning_rate": 9.511830011836511e-06, "loss": 0.5195, "step": 2773 }, { "epoch": 0.14969510549889375, "grad_norm": 1.0701669212234728, "learning_rate": 9.51148437464339e-06, "loss": 0.4555, "step": 2774 }, { "epoch": 0.14974906912740812, "grad_norm": 1.2178655764423278, "learning_rate": 9.511138622156156e-06, "loss": 0.6977, "step": 2775 }, { "epoch": 0.14980303275592252, "grad_norm": 0.9112326183885575, "learning_rate": 9.510792754384746e-06, "loss": 0.4007, "step": 2776 }, { "epoch": 0.1498569963844369, "grad_norm": 0.9579132603705647, "learning_rate": 9.510446771339097e-06, "loss": 0.4015, "step": 2777 }, { "epoch": 0.14991096001295126, "grad_norm": 1.0872432074395484, "learning_rate": 9.510100673029157e-06, "loss": 0.4934, "step": 2778 }, { "epoch": 0.14996492364146566, "grad_norm": 1.2052227289733362, "learning_rate": 9.509754459464871e-06, "loss": 0.6154, "step": 2779 }, { "epoch": 0.15001888726998003, "grad_norm": 1.002968709589042, "learning_rate": 9.50940813065619e-06, "loss": 0.4652, "step": 2780 }, { "epoch": 0.15007285089849443, "grad_norm": 0.8737905108910214, "learning_rate": 9.50906168661307e-06, "loss": 0.4181, "step": 2781 }, { "epoch": 0.1501268145270088, "grad_norm": 0.9193251672944583, "learning_rate": 9.508715127345463e-06, "loss": 0.3952, "step": 2782 }, { "epoch": 0.15018077815552316, "grad_norm": 1.0084630576533118, "learning_rate": 9.508368452863337e-06, "loss": 0.4859, "step": 2783 }, { "epoch": 0.15023474178403756, "grad_norm": 1.1291265525635383, "learning_rate": 9.508021663176648e-06, "loss": 0.4592, "step": 2784 }, { "epoch": 0.15028870541255193, "grad_norm": 1.4834595867164335, "learning_rate": 9.50767475829537e-06, "loss": 0.715, "step": 2785 }, { "epoch": 0.15034266904106633, "grad_norm": 1.4135220165690807, "learning_rate": 9.507327738229466e-06, "loss": 0.6709, "step": 2786 }, { "epoch": 0.1503966326695807, "grad_norm": 1.1418754550040022, "learning_rate": 9.506980602988916e-06, "loss": 0.4757, "step": 2787 }, { "epoch": 0.1504505962980951, "grad_norm": 1.0410461761137528, "learning_rate": 9.506633352583693e-06, "loss": 0.5803, "step": 2788 }, { "epoch": 0.15050455992660947, "grad_norm": 1.0915138686218968, "learning_rate": 9.506285987023781e-06, "loss": 0.5661, "step": 2789 }, { "epoch": 0.15055852355512384, "grad_norm": 0.997682727949255, "learning_rate": 9.50593850631916e-06, "loss": 0.4109, "step": 2790 }, { "epoch": 0.15061248718363823, "grad_norm": 0.9804741684481599, "learning_rate": 9.50559091047982e-06, "loss": 0.481, "step": 2791 }, { "epoch": 0.1506664508121526, "grad_norm": 1.0043907499911815, "learning_rate": 9.50524319951575e-06, "loss": 0.5298, "step": 2792 }, { "epoch": 0.150720414440667, "grad_norm": 1.1952438444683178, "learning_rate": 9.504895373436944e-06, "loss": 0.5577, "step": 2793 }, { "epoch": 0.15077437806918137, "grad_norm": 1.1238283573149006, "learning_rate": 9.504547432253397e-06, "loss": 0.472, "step": 2794 }, { "epoch": 0.15082834169769577, "grad_norm": 1.374206317281408, "learning_rate": 9.50419937597511e-06, "loss": 0.8451, "step": 2795 }, { "epoch": 0.15088230532621014, "grad_norm": 0.9650478445037453, "learning_rate": 9.503851204612087e-06, "loss": 0.4587, "step": 2796 }, { "epoch": 0.1509362689547245, "grad_norm": 0.9839652655826362, "learning_rate": 9.503502918174335e-06, "loss": 0.3269, "step": 2797 }, { "epoch": 0.1509902325832389, "grad_norm": 0.9964073919264328, "learning_rate": 9.503154516671865e-06, "loss": 0.4895, "step": 2798 }, { "epoch": 0.15104419621175327, "grad_norm": 1.0898911423807338, "learning_rate": 9.50280600011469e-06, "loss": 0.4066, "step": 2799 }, { "epoch": 0.15109815984026767, "grad_norm": 1.330056718842153, "learning_rate": 9.502457368512822e-06, "loss": 0.601, "step": 2800 }, { "epoch": 0.15115212346878204, "grad_norm": 1.0027926633446427, "learning_rate": 9.50210862187629e-06, "loss": 0.4215, "step": 2801 }, { "epoch": 0.1512060870972964, "grad_norm": 1.1353000132097928, "learning_rate": 9.50175976021511e-06, "loss": 0.5901, "step": 2802 }, { "epoch": 0.1512600507258108, "grad_norm": 0.9208197119539769, "learning_rate": 9.501410783539311e-06, "loss": 0.483, "step": 2803 }, { "epoch": 0.15131401435432518, "grad_norm": 1.1516905524126346, "learning_rate": 9.501061691858923e-06, "loss": 0.5956, "step": 2804 }, { "epoch": 0.15136797798283957, "grad_norm": 1.4497308701953389, "learning_rate": 9.500712485183979e-06, "loss": 0.6845, "step": 2805 }, { "epoch": 0.15142194161135394, "grad_norm": 1.0025938544902167, "learning_rate": 9.500363163524517e-06, "loss": 0.5178, "step": 2806 }, { "epoch": 0.15147590523986834, "grad_norm": 0.9873272024920466, "learning_rate": 9.500013726890573e-06, "loss": 0.4225, "step": 2807 }, { "epoch": 0.1515298688683827, "grad_norm": 0.9196890987899318, "learning_rate": 9.499664175292194e-06, "loss": 0.3996, "step": 2808 }, { "epoch": 0.15158383249689708, "grad_norm": 1.1451173550985723, "learning_rate": 9.499314508739427e-06, "loss": 0.4811, "step": 2809 }, { "epoch": 0.15163779612541148, "grad_norm": 1.0659462029621256, "learning_rate": 9.498964727242319e-06, "loss": 0.5329, "step": 2810 }, { "epoch": 0.15169175975392585, "grad_norm": 1.0703593982816626, "learning_rate": 9.498614830810922e-06, "loss": 0.4612, "step": 2811 }, { "epoch": 0.15174572338244025, "grad_norm": 0.9695327974457811, "learning_rate": 9.498264819455298e-06, "loss": 0.4831, "step": 2812 }, { "epoch": 0.15179968701095461, "grad_norm": 1.0082917882854028, "learning_rate": 9.497914693185502e-06, "loss": 0.5948, "step": 2813 }, { "epoch": 0.15185365063946898, "grad_norm": 1.0637152304507682, "learning_rate": 9.497564452011596e-06, "loss": 0.4635, "step": 2814 }, { "epoch": 0.15190761426798338, "grad_norm": 1.1357602928253683, "learning_rate": 9.49721409594365e-06, "loss": 0.5427, "step": 2815 }, { "epoch": 0.15196157789649775, "grad_norm": 1.1644111266702526, "learning_rate": 9.496863624991731e-06, "loss": 0.5459, "step": 2816 }, { "epoch": 0.15201554152501215, "grad_norm": 0.981129528489329, "learning_rate": 9.496513039165914e-06, "loss": 0.5778, "step": 2817 }, { "epoch": 0.15206950515352652, "grad_norm": 0.9531871700703887, "learning_rate": 9.496162338476273e-06, "loss": 0.412, "step": 2818 }, { "epoch": 0.15212346878204092, "grad_norm": 0.8599717573928912, "learning_rate": 9.495811522932891e-06, "loss": 0.3252, "step": 2819 }, { "epoch": 0.15217743241055529, "grad_norm": 1.2000203757639407, "learning_rate": 9.495460592545847e-06, "loss": 0.6232, "step": 2820 }, { "epoch": 0.15223139603906966, "grad_norm": 1.1921194369363222, "learning_rate": 9.495109547325226e-06, "loss": 0.6013, "step": 2821 }, { "epoch": 0.15228535966758405, "grad_norm": 1.2644511454902831, "learning_rate": 9.494758387281124e-06, "loss": 0.615, "step": 2822 }, { "epoch": 0.15233932329609842, "grad_norm": 0.9013605252302545, "learning_rate": 9.494407112423626e-06, "loss": 0.3422, "step": 2823 }, { "epoch": 0.15239328692461282, "grad_norm": 1.086334142432782, "learning_rate": 9.494055722762832e-06, "loss": 0.4352, "step": 2824 }, { "epoch": 0.1524472505531272, "grad_norm": 1.1042704671775259, "learning_rate": 9.493704218308844e-06, "loss": 0.4663, "step": 2825 }, { "epoch": 0.1525012141816416, "grad_norm": 0.9857749382345511, "learning_rate": 9.493352599071758e-06, "loss": 0.4665, "step": 2826 }, { "epoch": 0.15255517781015596, "grad_norm": 1.0179170188940228, "learning_rate": 9.493000865061685e-06, "loss": 0.4057, "step": 2827 }, { "epoch": 0.15260914143867033, "grad_norm": 1.024508144899749, "learning_rate": 9.492649016288734e-06, "loss": 0.487, "step": 2828 }, { "epoch": 0.15266310506718472, "grad_norm": 1.0224996333437304, "learning_rate": 9.492297052763013e-06, "loss": 0.4659, "step": 2829 }, { "epoch": 0.1527170686956991, "grad_norm": 1.1015211610625972, "learning_rate": 9.491944974494643e-06, "loss": 0.5576, "step": 2830 }, { "epoch": 0.1527710323242135, "grad_norm": 1.0702420403191497, "learning_rate": 9.49159278149374e-06, "loss": 0.5733, "step": 2831 }, { "epoch": 0.15282499595272786, "grad_norm": 0.9954699410982839, "learning_rate": 9.491240473770426e-06, "loss": 0.5624, "step": 2832 }, { "epoch": 0.15287895958124223, "grad_norm": 0.9127593904418583, "learning_rate": 9.49088805133483e-06, "loss": 0.4448, "step": 2833 }, { "epoch": 0.15293292320975663, "grad_norm": 0.9246399420247385, "learning_rate": 9.490535514197077e-06, "loss": 0.4123, "step": 2834 }, { "epoch": 0.152986886838271, "grad_norm": 1.0703769462733244, "learning_rate": 9.490182862367302e-06, "loss": 0.5527, "step": 2835 }, { "epoch": 0.1530408504667854, "grad_norm": 0.9900364888738452, "learning_rate": 9.48983009585564e-06, "loss": 0.5052, "step": 2836 }, { "epoch": 0.15309481409529976, "grad_norm": 1.4593715585423634, "learning_rate": 9.48947721467223e-06, "loss": 0.6111, "step": 2837 }, { "epoch": 0.15314877772381416, "grad_norm": 1.104102787868028, "learning_rate": 9.489124218827212e-06, "loss": 0.3644, "step": 2838 }, { "epoch": 0.15320274135232853, "grad_norm": 0.987638030896048, "learning_rate": 9.488771108330734e-06, "loss": 0.4787, "step": 2839 }, { "epoch": 0.1532567049808429, "grad_norm": 1.0613157413275411, "learning_rate": 9.488417883192945e-06, "loss": 0.6394, "step": 2840 }, { "epoch": 0.1533106686093573, "grad_norm": 0.9458738190414954, "learning_rate": 9.488064543423996e-06, "loss": 0.4025, "step": 2841 }, { "epoch": 0.15336463223787167, "grad_norm": 1.2169479780082078, "learning_rate": 9.487711089034041e-06, "loss": 0.7105, "step": 2842 }, { "epoch": 0.15341859586638606, "grad_norm": 0.9601533896935032, "learning_rate": 9.48735752003324e-06, "loss": 0.494, "step": 2843 }, { "epoch": 0.15347255949490043, "grad_norm": 1.1641318249446895, "learning_rate": 9.487003836431756e-06, "loss": 0.5663, "step": 2844 }, { "epoch": 0.15352652312341483, "grad_norm": 0.9563745737604212, "learning_rate": 9.486650038239753e-06, "loss": 0.3602, "step": 2845 }, { "epoch": 0.1535804867519292, "grad_norm": 0.9180793400155437, "learning_rate": 9.4862961254674e-06, "loss": 0.4173, "step": 2846 }, { "epoch": 0.15363445038044357, "grad_norm": 1.1258785878347464, "learning_rate": 9.485942098124869e-06, "loss": 0.5589, "step": 2847 }, { "epoch": 0.15368841400895797, "grad_norm": 0.883033392317904, "learning_rate": 9.485587956222334e-06, "loss": 0.4165, "step": 2848 }, { "epoch": 0.15374237763747234, "grad_norm": 0.8361668081627076, "learning_rate": 9.485233699769973e-06, "loss": 0.3216, "step": 2849 }, { "epoch": 0.15379634126598674, "grad_norm": 1.1900547827422676, "learning_rate": 9.48487932877797e-06, "loss": 0.5849, "step": 2850 }, { "epoch": 0.1538503048945011, "grad_norm": 1.3838262025952157, "learning_rate": 9.484524843256508e-06, "loss": 0.7638, "step": 2851 }, { "epoch": 0.15390426852301548, "grad_norm": 1.083019812992553, "learning_rate": 9.484170243215776e-06, "loss": 0.6214, "step": 2852 }, { "epoch": 0.15395823215152987, "grad_norm": 1.0219788986930345, "learning_rate": 9.483815528665965e-06, "loss": 0.4615, "step": 2853 }, { "epoch": 0.15401219578004424, "grad_norm": 1.6232248513350636, "learning_rate": 9.48346069961727e-06, "loss": 0.5991, "step": 2854 }, { "epoch": 0.15406615940855864, "grad_norm": 1.0951075975276294, "learning_rate": 9.48310575607989e-06, "loss": 0.3977, "step": 2855 }, { "epoch": 0.154120123037073, "grad_norm": 1.1375374670957732, "learning_rate": 9.482750698064028e-06, "loss": 0.5911, "step": 2856 }, { "epoch": 0.1541740866655874, "grad_norm": 1.0645224308179708, "learning_rate": 9.482395525579884e-06, "loss": 0.6209, "step": 2857 }, { "epoch": 0.15422805029410178, "grad_norm": 0.9923636920012383, "learning_rate": 9.48204023863767e-06, "loss": 0.5374, "step": 2858 }, { "epoch": 0.15428201392261615, "grad_norm": 0.9609218034965911, "learning_rate": 9.481684837247595e-06, "loss": 0.4595, "step": 2859 }, { "epoch": 0.15433597755113054, "grad_norm": 0.7647594926047586, "learning_rate": 9.481329321419877e-06, "loss": 0.301, "step": 2860 }, { "epoch": 0.1543899411796449, "grad_norm": 1.0551851841784197, "learning_rate": 9.480973691164728e-06, "loss": 0.4641, "step": 2861 }, { "epoch": 0.1544439048081593, "grad_norm": 0.8545558320980838, "learning_rate": 9.480617946492374e-06, "loss": 0.3856, "step": 2862 }, { "epoch": 0.15449786843667368, "grad_norm": 1.011789944460799, "learning_rate": 9.480262087413039e-06, "loss": 0.4297, "step": 2863 }, { "epoch": 0.15455183206518805, "grad_norm": 1.2599715393855264, "learning_rate": 9.47990611393695e-06, "loss": 0.4941, "step": 2864 }, { "epoch": 0.15460579569370245, "grad_norm": 0.9677200898396942, "learning_rate": 9.479550026074335e-06, "loss": 0.403, "step": 2865 }, { "epoch": 0.15465975932221682, "grad_norm": 1.0377728072893408, "learning_rate": 9.479193823835434e-06, "loss": 0.5074, "step": 2866 }, { "epoch": 0.15471372295073121, "grad_norm": 1.025915069495186, "learning_rate": 9.47883750723048e-06, "loss": 0.4828, "step": 2867 }, { "epoch": 0.15476768657924558, "grad_norm": 1.1834059300264819, "learning_rate": 9.478481076269719e-06, "loss": 0.5575, "step": 2868 }, { "epoch": 0.15482165020775998, "grad_norm": 0.9636221667891641, "learning_rate": 9.47812453096339e-06, "loss": 0.4809, "step": 2869 }, { "epoch": 0.15487561383627435, "grad_norm": 0.9704198580116432, "learning_rate": 9.477767871321741e-06, "loss": 0.5914, "step": 2870 }, { "epoch": 0.15492957746478872, "grad_norm": 0.9750063028919201, "learning_rate": 9.477411097355025e-06, "loss": 0.4519, "step": 2871 }, { "epoch": 0.15498354109330312, "grad_norm": 1.0979218847502932, "learning_rate": 9.477054209073497e-06, "loss": 0.4913, "step": 2872 }, { "epoch": 0.1550375047218175, "grad_norm": 1.0876237145767753, "learning_rate": 9.47669720648741e-06, "loss": 0.4799, "step": 2873 }, { "epoch": 0.15509146835033188, "grad_norm": 1.0078010239957849, "learning_rate": 9.476340089607028e-06, "loss": 0.5526, "step": 2874 }, { "epoch": 0.15514543197884625, "grad_norm": 1.1930517622787165, "learning_rate": 9.475982858442614e-06, "loss": 0.5519, "step": 2875 }, { "epoch": 0.15519939560736065, "grad_norm": 1.1500906430899376, "learning_rate": 9.475625513004437e-06, "loss": 0.4688, "step": 2876 }, { "epoch": 0.15525335923587502, "grad_norm": 1.0790049906878143, "learning_rate": 9.475268053302763e-06, "loss": 0.5366, "step": 2877 }, { "epoch": 0.1553073228643894, "grad_norm": 1.2458981329388064, "learning_rate": 9.47491047934787e-06, "loss": 0.5673, "step": 2878 }, { "epoch": 0.1553612864929038, "grad_norm": 1.064254343830126, "learning_rate": 9.474552791150033e-06, "loss": 0.5475, "step": 2879 }, { "epoch": 0.15541525012141816, "grad_norm": 0.9097563265657234, "learning_rate": 9.474194988719533e-06, "loss": 0.3253, "step": 2880 }, { "epoch": 0.15546921374993256, "grad_norm": 1.3085861653582225, "learning_rate": 9.473837072066651e-06, "loss": 0.485, "step": 2881 }, { "epoch": 0.15552317737844693, "grad_norm": 1.1867553902854862, "learning_rate": 9.473479041201679e-06, "loss": 0.398, "step": 2882 }, { "epoch": 0.1555771410069613, "grad_norm": 0.824277977218826, "learning_rate": 9.473120896134904e-06, "loss": 0.3572, "step": 2883 }, { "epoch": 0.1556311046354757, "grad_norm": 0.9281228279094402, "learning_rate": 9.47276263687662e-06, "loss": 0.4196, "step": 2884 }, { "epoch": 0.15568506826399006, "grad_norm": 1.1900765997057472, "learning_rate": 9.472404263437123e-06, "loss": 0.6778, "step": 2885 }, { "epoch": 0.15573903189250446, "grad_norm": 0.9890690930687212, "learning_rate": 9.472045775826714e-06, "loss": 0.5263, "step": 2886 }, { "epoch": 0.15579299552101883, "grad_norm": 1.1837927520805074, "learning_rate": 9.471687174055696e-06, "loss": 0.6636, "step": 2887 }, { "epoch": 0.15584695914953323, "grad_norm": 1.2344221976148766, "learning_rate": 9.471328458134375e-06, "loss": 0.5035, "step": 2888 }, { "epoch": 0.1559009227780476, "grad_norm": 1.0464913022132651, "learning_rate": 9.470969628073063e-06, "loss": 0.461, "step": 2889 }, { "epoch": 0.15595488640656197, "grad_norm": 1.1958548996745026, "learning_rate": 9.470610683882072e-06, "loss": 0.6025, "step": 2890 }, { "epoch": 0.15600885003507636, "grad_norm": 0.9076619095644475, "learning_rate": 9.470251625571715e-06, "loss": 0.4786, "step": 2891 }, { "epoch": 0.15606281366359073, "grad_norm": 1.0783035884236594, "learning_rate": 9.469892453152315e-06, "loss": 0.6129, "step": 2892 }, { "epoch": 0.15611677729210513, "grad_norm": 0.9887525022195978, "learning_rate": 9.469533166634196e-06, "loss": 0.532, "step": 2893 }, { "epoch": 0.1561707409206195, "grad_norm": 0.8576644728034248, "learning_rate": 9.469173766027682e-06, "loss": 0.3744, "step": 2894 }, { "epoch": 0.1562247045491339, "grad_norm": 0.9829558603837744, "learning_rate": 9.468814251343103e-06, "loss": 0.4242, "step": 2895 }, { "epoch": 0.15627866817764827, "grad_norm": 1.193911322653794, "learning_rate": 9.468454622590794e-06, "loss": 0.4999, "step": 2896 }, { "epoch": 0.15633263180616264, "grad_norm": 1.103571413898659, "learning_rate": 9.46809487978109e-06, "loss": 0.4886, "step": 2897 }, { "epoch": 0.15638659543467703, "grad_norm": 1.1004416436398254, "learning_rate": 9.467735022924327e-06, "loss": 0.626, "step": 2898 }, { "epoch": 0.1564405590631914, "grad_norm": 1.0489516331262132, "learning_rate": 9.467375052030852e-06, "loss": 0.419, "step": 2899 }, { "epoch": 0.1564945226917058, "grad_norm": 1.2073312628495325, "learning_rate": 9.467014967111009e-06, "loss": 0.5705, "step": 2900 }, { "epoch": 0.15654848632022017, "grad_norm": 1.0119178368994157, "learning_rate": 9.466654768175147e-06, "loss": 0.4512, "step": 2901 }, { "epoch": 0.15660244994873454, "grad_norm": 1.0875765578620804, "learning_rate": 9.466294455233619e-06, "loss": 0.5958, "step": 2902 }, { "epoch": 0.15665641357724894, "grad_norm": 1.0478843241285938, "learning_rate": 9.465934028296779e-06, "loss": 0.4958, "step": 2903 }, { "epoch": 0.1567103772057633, "grad_norm": 0.9884385864044685, "learning_rate": 9.46557348737499e-06, "loss": 0.3717, "step": 2904 }, { "epoch": 0.1567643408342777, "grad_norm": 1.0627591161496852, "learning_rate": 9.465212832478612e-06, "loss": 0.4862, "step": 2905 }, { "epoch": 0.15681830446279207, "grad_norm": 1.1562972027559206, "learning_rate": 9.46485206361801e-06, "loss": 0.5356, "step": 2906 }, { "epoch": 0.15687226809130647, "grad_norm": 0.9603524675841937, "learning_rate": 9.464491180803554e-06, "loss": 0.5413, "step": 2907 }, { "epoch": 0.15692623171982084, "grad_norm": 1.1836173381727348, "learning_rate": 9.464130184045616e-06, "loss": 0.571, "step": 2908 }, { "epoch": 0.1569801953483352, "grad_norm": 1.313629758337978, "learning_rate": 9.46376907335457e-06, "loss": 0.8012, "step": 2909 }, { "epoch": 0.1570341589768496, "grad_norm": 1.0209304454351344, "learning_rate": 9.463407848740795e-06, "loss": 0.4758, "step": 2910 }, { "epoch": 0.15708812260536398, "grad_norm": 0.8563920806117512, "learning_rate": 9.463046510214675e-06, "loss": 0.3656, "step": 2911 }, { "epoch": 0.15714208623387838, "grad_norm": 0.9523887574688876, "learning_rate": 9.462685057786596e-06, "loss": 0.4095, "step": 2912 }, { "epoch": 0.15719604986239274, "grad_norm": 1.025706722008397, "learning_rate": 9.462323491466941e-06, "loss": 0.5064, "step": 2913 }, { "epoch": 0.15725001349090714, "grad_norm": 1.0188317608498416, "learning_rate": 9.461961811266105e-06, "loss": 0.5028, "step": 2914 }, { "epoch": 0.1573039771194215, "grad_norm": 0.9845470517114103, "learning_rate": 9.461600017194487e-06, "loss": 0.3829, "step": 2915 }, { "epoch": 0.15735794074793588, "grad_norm": 0.8352989680949157, "learning_rate": 9.461238109262479e-06, "loss": 0.3657, "step": 2916 }, { "epoch": 0.15741190437645028, "grad_norm": 1.0666792908952116, "learning_rate": 9.460876087480486e-06, "loss": 0.5231, "step": 2917 }, { "epoch": 0.15746586800496465, "grad_norm": 1.2502735000746397, "learning_rate": 9.46051395185891e-06, "loss": 0.473, "step": 2918 }, { "epoch": 0.15751983163347905, "grad_norm": 1.0384826580623279, "learning_rate": 9.460151702408164e-06, "loss": 0.5164, "step": 2919 }, { "epoch": 0.15757379526199342, "grad_norm": 1.2383654112918259, "learning_rate": 9.459789339138655e-06, "loss": 0.526, "step": 2920 }, { "epoch": 0.15762775889050779, "grad_norm": 1.1991652787562606, "learning_rate": 9.4594268620608e-06, "loss": 0.6141, "step": 2921 }, { "epoch": 0.15768172251902218, "grad_norm": 1.1661750393832249, "learning_rate": 9.459064271185015e-06, "loss": 0.5834, "step": 2922 }, { "epoch": 0.15773568614753655, "grad_norm": 1.0844068000511828, "learning_rate": 9.458701566521725e-06, "loss": 0.6405, "step": 2923 }, { "epoch": 0.15778964977605095, "grad_norm": 1.02503241827222, "learning_rate": 9.45833874808135e-06, "loss": 0.5292, "step": 2924 }, { "epoch": 0.15784361340456532, "grad_norm": 0.9831051027170661, "learning_rate": 9.45797581587432e-06, "loss": 0.4583, "step": 2925 }, { "epoch": 0.15789757703307972, "grad_norm": 0.9056946053694395, "learning_rate": 9.457612769911067e-06, "loss": 0.3339, "step": 2926 }, { "epoch": 0.1579515406615941, "grad_norm": 0.8803807335797599, "learning_rate": 9.457249610202022e-06, "loss": 0.3508, "step": 2927 }, { "epoch": 0.15800550429010846, "grad_norm": 0.938112782940209, "learning_rate": 9.456886336757627e-06, "loss": 0.473, "step": 2928 }, { "epoch": 0.15805946791862285, "grad_norm": 0.8830492335405095, "learning_rate": 9.456522949588319e-06, "loss": 0.4094, "step": 2929 }, { "epoch": 0.15811343154713722, "grad_norm": 1.1349872542128703, "learning_rate": 9.456159448704544e-06, "loss": 0.5457, "step": 2930 }, { "epoch": 0.15816739517565162, "grad_norm": 1.0307333397601042, "learning_rate": 9.455795834116748e-06, "loss": 0.5877, "step": 2931 }, { "epoch": 0.158221358804166, "grad_norm": 0.7006549112109498, "learning_rate": 9.455432105835385e-06, "loss": 0.3124, "step": 2932 }, { "epoch": 0.15827532243268036, "grad_norm": 0.9668862621859664, "learning_rate": 9.455068263870905e-06, "loss": 0.4285, "step": 2933 }, { "epoch": 0.15832928606119476, "grad_norm": 0.9029724476936948, "learning_rate": 9.454704308233767e-06, "loss": 0.4115, "step": 2934 }, { "epoch": 0.15838324968970913, "grad_norm": 1.1087508953689431, "learning_rate": 9.454340238934431e-06, "loss": 0.5999, "step": 2935 }, { "epoch": 0.15843721331822352, "grad_norm": 1.075836463843238, "learning_rate": 9.453976055983363e-06, "loss": 0.5349, "step": 2936 }, { "epoch": 0.1584911769467379, "grad_norm": 1.20229994550125, "learning_rate": 9.453611759391026e-06, "loss": 0.5615, "step": 2937 }, { "epoch": 0.1585451405752523, "grad_norm": 1.226899527228933, "learning_rate": 9.453247349167892e-06, "loss": 0.5455, "step": 2938 }, { "epoch": 0.15859910420376666, "grad_norm": 1.0544148396871993, "learning_rate": 9.452882825324436e-06, "loss": 0.4789, "step": 2939 }, { "epoch": 0.15865306783228103, "grad_norm": 0.9006553287205337, "learning_rate": 9.452518187871132e-06, "loss": 0.4425, "step": 2940 }, { "epoch": 0.15870703146079543, "grad_norm": 1.0720855721461386, "learning_rate": 9.452153436818462e-06, "loss": 0.6386, "step": 2941 }, { "epoch": 0.1587609950893098, "grad_norm": 1.0565794148654033, "learning_rate": 9.45178857217691e-06, "loss": 0.5664, "step": 2942 }, { "epoch": 0.1588149587178242, "grad_norm": 1.007972009815946, "learning_rate": 9.45142359395696e-06, "loss": 0.4532, "step": 2943 }, { "epoch": 0.15886892234633856, "grad_norm": 1.0495827968349112, "learning_rate": 9.451058502169106e-06, "loss": 0.4482, "step": 2944 }, { "epoch": 0.15892288597485296, "grad_norm": 0.8590267904173257, "learning_rate": 9.450693296823835e-06, "loss": 0.4679, "step": 2945 }, { "epoch": 0.15897684960336733, "grad_norm": 1.1121114511714107, "learning_rate": 9.450327977931647e-06, "loss": 0.4488, "step": 2946 }, { "epoch": 0.1590308132318817, "grad_norm": 0.978526513636428, "learning_rate": 9.449962545503045e-06, "loss": 0.4382, "step": 2947 }, { "epoch": 0.1590847768603961, "grad_norm": 1.1629246545472256, "learning_rate": 9.449596999548525e-06, "loss": 0.6222, "step": 2948 }, { "epoch": 0.15913874048891047, "grad_norm": 1.071501081712184, "learning_rate": 9.449231340078598e-06, "loss": 0.545, "step": 2949 }, { "epoch": 0.15919270411742487, "grad_norm": 1.2254608374115032, "learning_rate": 9.44886556710377e-06, "loss": 0.7286, "step": 2950 }, { "epoch": 0.15924666774593924, "grad_norm": 1.001361155889579, "learning_rate": 9.448499680634556e-06, "loss": 0.5595, "step": 2951 }, { "epoch": 0.1593006313744536, "grad_norm": 1.3320681292760825, "learning_rate": 9.448133680681474e-06, "loss": 0.5593, "step": 2952 }, { "epoch": 0.159354595002968, "grad_norm": 1.0290498009540576, "learning_rate": 9.447767567255038e-06, "loss": 0.5075, "step": 2953 }, { "epoch": 0.15940855863148237, "grad_norm": 0.9937671870003342, "learning_rate": 9.447401340365773e-06, "loss": 0.4641, "step": 2954 }, { "epoch": 0.15946252225999677, "grad_norm": 1.1828711994574164, "learning_rate": 9.447035000024208e-06, "loss": 0.5113, "step": 2955 }, { "epoch": 0.15951648588851114, "grad_norm": 1.1963924723036305, "learning_rate": 9.446668546240866e-06, "loss": 0.4672, "step": 2956 }, { "epoch": 0.15957044951702554, "grad_norm": 1.0002823889427979, "learning_rate": 9.446301979026284e-06, "loss": 0.4434, "step": 2957 }, { "epoch": 0.1596244131455399, "grad_norm": 1.302124221685511, "learning_rate": 9.445935298390994e-06, "loss": 0.5243, "step": 2958 }, { "epoch": 0.15967837677405428, "grad_norm": 1.1380623406917816, "learning_rate": 9.445568504345537e-06, "loss": 0.627, "step": 2959 }, { "epoch": 0.15973234040256867, "grad_norm": 1.1150746968545358, "learning_rate": 9.445201596900455e-06, "loss": 0.5856, "step": 2960 }, { "epoch": 0.15978630403108304, "grad_norm": 0.9819504343339145, "learning_rate": 9.444834576066294e-06, "loss": 0.5137, "step": 2961 }, { "epoch": 0.15984026765959744, "grad_norm": 0.980496432471336, "learning_rate": 9.444467441853599e-06, "loss": 0.3801, "step": 2962 }, { "epoch": 0.1598942312881118, "grad_norm": 1.1309617074388112, "learning_rate": 9.444100194272925e-06, "loss": 0.4412, "step": 2963 }, { "epoch": 0.1599481949166262, "grad_norm": 0.9049356981254731, "learning_rate": 9.443732833334827e-06, "loss": 0.357, "step": 2964 }, { "epoch": 0.16000215854514058, "grad_norm": 1.1552843340026835, "learning_rate": 9.443365359049861e-06, "loss": 0.6825, "step": 2965 }, { "epoch": 0.16005612217365495, "grad_norm": 1.2585909640919055, "learning_rate": 9.442997771428592e-06, "loss": 0.6187, "step": 2966 }, { "epoch": 0.16011008580216934, "grad_norm": 0.9453560526947975, "learning_rate": 9.442630070481581e-06, "loss": 0.4266, "step": 2967 }, { "epoch": 0.1601640494306837, "grad_norm": 1.1399775819290876, "learning_rate": 9.442262256219398e-06, "loss": 0.4922, "step": 2968 }, { "epoch": 0.1602180130591981, "grad_norm": 1.1522101107397504, "learning_rate": 9.441894328652616e-06, "loss": 0.6737, "step": 2969 }, { "epoch": 0.16027197668771248, "grad_norm": 1.0093176766089809, "learning_rate": 9.441526287791808e-06, "loss": 0.5261, "step": 2970 }, { "epoch": 0.16032594031622685, "grad_norm": 1.1617219249125292, "learning_rate": 9.44115813364755e-06, "loss": 0.5208, "step": 2971 }, { "epoch": 0.16037990394474125, "grad_norm": 0.9555423481831041, "learning_rate": 9.440789866230425e-06, "loss": 0.5283, "step": 2972 }, { "epoch": 0.16043386757325562, "grad_norm": 1.38965754978105, "learning_rate": 9.44042148555102e-06, "loss": 0.673, "step": 2973 }, { "epoch": 0.16048783120177001, "grad_norm": 1.3841161533500563, "learning_rate": 9.440052991619916e-06, "loss": 0.5446, "step": 2974 }, { "epoch": 0.16054179483028438, "grad_norm": 0.9949161992789052, "learning_rate": 9.43968438444771e-06, "loss": 0.4264, "step": 2975 }, { "epoch": 0.16059575845879878, "grad_norm": 0.9895256678105773, "learning_rate": 9.439315664044995e-06, "loss": 0.3891, "step": 2976 }, { "epoch": 0.16064972208731315, "grad_norm": 1.1226751756829116, "learning_rate": 9.438946830422368e-06, "loss": 0.5229, "step": 2977 }, { "epoch": 0.16070368571582752, "grad_norm": 0.9613734894365971, "learning_rate": 9.438577883590429e-06, "loss": 0.4265, "step": 2978 }, { "epoch": 0.16075764934434192, "grad_norm": 0.8707514694948728, "learning_rate": 9.43820882355978e-06, "loss": 0.3897, "step": 2979 }, { "epoch": 0.1608116129728563, "grad_norm": 0.9865680178703112, "learning_rate": 9.437839650341029e-06, "loss": 0.5033, "step": 2980 }, { "epoch": 0.16086557660137069, "grad_norm": 0.8215037060126451, "learning_rate": 9.43747036394479e-06, "loss": 0.3681, "step": 2981 }, { "epoch": 0.16091954022988506, "grad_norm": 1.037724137563405, "learning_rate": 9.437100964381674e-06, "loss": 0.4976, "step": 2982 }, { "epoch": 0.16097350385839945, "grad_norm": 1.3051493987429608, "learning_rate": 9.436731451662298e-06, "loss": 0.6589, "step": 2983 }, { "epoch": 0.16102746748691382, "grad_norm": 1.1884775347865255, "learning_rate": 9.436361825797282e-06, "loss": 0.482, "step": 2984 }, { "epoch": 0.1610814311154282, "grad_norm": 1.0206626874955227, "learning_rate": 9.43599208679725e-06, "loss": 0.383, "step": 2985 }, { "epoch": 0.1611353947439426, "grad_norm": 1.1000631040821316, "learning_rate": 9.435622234672828e-06, "loss": 0.6137, "step": 2986 }, { "epoch": 0.16118935837245696, "grad_norm": 1.028320014277096, "learning_rate": 9.435252269434648e-06, "loss": 0.4819, "step": 2987 }, { "epoch": 0.16124332200097136, "grad_norm": 0.9466939472657221, "learning_rate": 9.43488219109334e-06, "loss": 0.5253, "step": 2988 }, { "epoch": 0.16129728562948573, "grad_norm": 1.1445087675353023, "learning_rate": 9.43451199965954e-06, "loss": 0.6027, "step": 2989 }, { "epoch": 0.1613512492580001, "grad_norm": 1.0650579875920114, "learning_rate": 9.434141695143892e-06, "loss": 0.4571, "step": 2990 }, { "epoch": 0.1614052128865145, "grad_norm": 0.979517265359635, "learning_rate": 9.433771277557037e-06, "loss": 0.3932, "step": 2991 }, { "epoch": 0.16145917651502886, "grad_norm": 1.0275475366138263, "learning_rate": 9.43340074690962e-06, "loss": 0.5097, "step": 2992 }, { "epoch": 0.16151314014354326, "grad_norm": 1.452466258311937, "learning_rate": 9.43303010321229e-06, "loss": 0.7083, "step": 2993 }, { "epoch": 0.16156710377205763, "grad_norm": 1.184735503574892, "learning_rate": 9.432659346475701e-06, "loss": 0.6073, "step": 2994 }, { "epoch": 0.16162106740057203, "grad_norm": 1.1704524150563005, "learning_rate": 9.43228847671051e-06, "loss": 0.5536, "step": 2995 }, { "epoch": 0.1616750310290864, "grad_norm": 1.177285702112136, "learning_rate": 9.431917493927375e-06, "loss": 0.5353, "step": 2996 }, { "epoch": 0.16172899465760077, "grad_norm": 1.0310662876272836, "learning_rate": 9.431546398136958e-06, "loss": 0.493, "step": 2997 }, { "epoch": 0.16178295828611516, "grad_norm": 0.9884204856095337, "learning_rate": 9.431175189349923e-06, "loss": 0.3979, "step": 2998 }, { "epoch": 0.16183692191462953, "grad_norm": 1.1622571114156945, "learning_rate": 9.430803867576943e-06, "loss": 0.6345, "step": 2999 }, { "epoch": 0.16189088554314393, "grad_norm": 0.817843255602589, "learning_rate": 9.430432432828689e-06, "loss": 0.437, "step": 3000 }, { "epoch": 0.16189088554314393, "eval_loss": 0.5924049019813538, "eval_runtime": 164.6583, "eval_samples_per_second": 20.886, "eval_steps_per_second": 0.875, "step": 3000 }, { "epoch": 0.1619448491716583, "grad_norm": 0.9556694643284128, "learning_rate": 9.430060885115833e-06, "loss": 0.3971, "step": 3001 }, { "epoch": 0.16199881280017267, "grad_norm": 1.0818664172169097, "learning_rate": 9.429689224449057e-06, "loss": 0.3972, "step": 3002 }, { "epoch": 0.16205277642868707, "grad_norm": 1.273605535210558, "learning_rate": 9.429317450839042e-06, "loss": 0.6418, "step": 3003 }, { "epoch": 0.16210674005720144, "grad_norm": 1.2352653813107772, "learning_rate": 9.428945564296475e-06, "loss": 0.4909, "step": 3004 }, { "epoch": 0.16216070368571583, "grad_norm": 1.1533149620424703, "learning_rate": 9.42857356483204e-06, "loss": 0.6047, "step": 3005 }, { "epoch": 0.1622146673142302, "grad_norm": 1.171398421840585, "learning_rate": 9.428201452456432e-06, "loss": 0.566, "step": 3006 }, { "epoch": 0.1622686309427446, "grad_norm": 0.9813872132490498, "learning_rate": 9.427829227180344e-06, "loss": 0.4373, "step": 3007 }, { "epoch": 0.16232259457125897, "grad_norm": 1.1626122944699004, "learning_rate": 9.427456889014477e-06, "loss": 0.5867, "step": 3008 }, { "epoch": 0.16237655819977334, "grad_norm": 0.9626729888386892, "learning_rate": 9.427084437969532e-06, "loss": 0.4243, "step": 3009 }, { "epoch": 0.16243052182828774, "grad_norm": 1.2500473323898016, "learning_rate": 9.42671187405621e-06, "loss": 0.5294, "step": 3010 }, { "epoch": 0.1624844854568021, "grad_norm": 0.9623809037424444, "learning_rate": 9.426339197285222e-06, "loss": 0.5611, "step": 3011 }, { "epoch": 0.1625384490853165, "grad_norm": 0.9289052195571385, "learning_rate": 9.425966407667277e-06, "loss": 0.3921, "step": 3012 }, { "epoch": 0.16259241271383088, "grad_norm": 1.0527435465638553, "learning_rate": 9.42559350521309e-06, "loss": 0.664, "step": 3013 }, { "epoch": 0.16264637634234527, "grad_norm": 0.9429453793611975, "learning_rate": 9.425220489933381e-06, "loss": 0.4107, "step": 3014 }, { "epoch": 0.16270033997085964, "grad_norm": 0.9151679554357353, "learning_rate": 9.424847361838869e-06, "loss": 0.4721, "step": 3015 }, { "epoch": 0.162754303599374, "grad_norm": 1.037592736754966, "learning_rate": 9.424474120940277e-06, "loss": 0.5925, "step": 3016 }, { "epoch": 0.1628082672278884, "grad_norm": 1.2248237437559515, "learning_rate": 9.424100767248334e-06, "loss": 0.6128, "step": 3017 }, { "epoch": 0.16286223085640278, "grad_norm": 1.1470604338214256, "learning_rate": 9.423727300773768e-06, "loss": 0.5027, "step": 3018 }, { "epoch": 0.16291619448491718, "grad_norm": 1.0431866760864006, "learning_rate": 9.423353721527318e-06, "loss": 0.6309, "step": 3019 }, { "epoch": 0.16297015811343155, "grad_norm": 0.6933722126647592, "learning_rate": 9.422980029519715e-06, "loss": 0.2494, "step": 3020 }, { "epoch": 0.16302412174194592, "grad_norm": 1.1536956453898288, "learning_rate": 9.422606224761704e-06, "loss": 0.6348, "step": 3021 }, { "epoch": 0.1630780853704603, "grad_norm": 1.13266758935705, "learning_rate": 9.422232307264026e-06, "loss": 0.4723, "step": 3022 }, { "epoch": 0.16313204899897468, "grad_norm": 1.2221156784475118, "learning_rate": 9.421858277037428e-06, "loss": 0.4799, "step": 3023 }, { "epoch": 0.16318601262748908, "grad_norm": 0.9748698185272858, "learning_rate": 9.42148413409266e-06, "loss": 0.5042, "step": 3024 }, { "epoch": 0.16323997625600345, "grad_norm": 1.0014204569606169, "learning_rate": 9.421109878440477e-06, "loss": 0.4705, "step": 3025 }, { "epoch": 0.16329393988451785, "grad_norm": 1.1399509315661114, "learning_rate": 9.420735510091632e-06, "loss": 0.5868, "step": 3026 }, { "epoch": 0.16334790351303222, "grad_norm": 0.9899289293266449, "learning_rate": 9.420361029056888e-06, "loss": 0.4035, "step": 3027 }, { "epoch": 0.1634018671415466, "grad_norm": 1.01127412264611, "learning_rate": 9.419986435347008e-06, "loss": 0.4397, "step": 3028 }, { "epoch": 0.16345583077006098, "grad_norm": 0.9588630463431623, "learning_rate": 9.419611728972755e-06, "loss": 0.4449, "step": 3029 }, { "epoch": 0.16350979439857535, "grad_norm": 1.2380666327172114, "learning_rate": 9.419236909944901e-06, "loss": 0.5602, "step": 3030 }, { "epoch": 0.16356375802708975, "grad_norm": 1.127776180023369, "learning_rate": 9.418861978274218e-06, "loss": 0.5521, "step": 3031 }, { "epoch": 0.16361772165560412, "grad_norm": 0.9306796501593076, "learning_rate": 9.418486933971484e-06, "loss": 0.3872, "step": 3032 }, { "epoch": 0.16367168528411852, "grad_norm": 1.0351015360996014, "learning_rate": 9.418111777047474e-06, "loss": 0.3687, "step": 3033 }, { "epoch": 0.1637256489126329, "grad_norm": 0.8924275562872201, "learning_rate": 9.417736507512973e-06, "loss": 0.3921, "step": 3034 }, { "epoch": 0.16377961254114726, "grad_norm": 1.0661561176176064, "learning_rate": 9.417361125378766e-06, "loss": 0.6482, "step": 3035 }, { "epoch": 0.16383357616966165, "grad_norm": 1.241335436368178, "learning_rate": 9.416985630655644e-06, "loss": 0.5584, "step": 3036 }, { "epoch": 0.16388753979817602, "grad_norm": 0.8888631290440462, "learning_rate": 9.416610023354394e-06, "loss": 0.4033, "step": 3037 }, { "epoch": 0.16394150342669042, "grad_norm": 0.8972331457001452, "learning_rate": 9.416234303485818e-06, "loss": 0.4195, "step": 3038 }, { "epoch": 0.1639954670552048, "grad_norm": 1.054331208898059, "learning_rate": 9.415858471060708e-06, "loss": 0.4508, "step": 3039 }, { "epoch": 0.16404943068371916, "grad_norm": 0.7954476759868314, "learning_rate": 9.41548252608987e-06, "loss": 0.4296, "step": 3040 }, { "epoch": 0.16410339431223356, "grad_norm": 1.030451652512724, "learning_rate": 9.415106468584107e-06, "loss": 0.5327, "step": 3041 }, { "epoch": 0.16415735794074793, "grad_norm": 1.015956215361609, "learning_rate": 9.414730298554228e-06, "loss": 0.4974, "step": 3042 }, { "epoch": 0.16421132156926233, "grad_norm": 0.9272709245301843, "learning_rate": 9.414354016011047e-06, "loss": 0.3483, "step": 3043 }, { "epoch": 0.1642652851977767, "grad_norm": 1.0150350771222634, "learning_rate": 9.413977620965375e-06, "loss": 0.5064, "step": 3044 }, { "epoch": 0.1643192488262911, "grad_norm": 1.19288810364433, "learning_rate": 9.413601113428032e-06, "loss": 0.6861, "step": 3045 }, { "epoch": 0.16437321245480546, "grad_norm": 0.9624948077538739, "learning_rate": 9.413224493409838e-06, "loss": 0.4252, "step": 3046 }, { "epoch": 0.16442717608331983, "grad_norm": 1.2253308895360053, "learning_rate": 9.412847760921619e-06, "loss": 0.6518, "step": 3047 }, { "epoch": 0.16448113971183423, "grad_norm": 1.1671457288957277, "learning_rate": 9.412470915974199e-06, "loss": 0.651, "step": 3048 }, { "epoch": 0.1645351033403486, "grad_norm": 0.8765505935801314, "learning_rate": 9.412093958578415e-06, "loss": 0.3962, "step": 3049 }, { "epoch": 0.164589066968863, "grad_norm": 1.1450536790754162, "learning_rate": 9.411716888745095e-06, "loss": 0.5284, "step": 3050 }, { "epoch": 0.16464303059737737, "grad_norm": 1.2483592298579325, "learning_rate": 9.411339706485081e-06, "loss": 0.6087, "step": 3051 }, { "epoch": 0.16469699422589174, "grad_norm": 1.0439490842009356, "learning_rate": 9.41096241180921e-06, "loss": 0.4902, "step": 3052 }, { "epoch": 0.16475095785440613, "grad_norm": 1.056495166148305, "learning_rate": 9.41058500472833e-06, "loss": 0.5415, "step": 3053 }, { "epoch": 0.1648049214829205, "grad_norm": 1.2145790304172641, "learning_rate": 9.410207485253282e-06, "loss": 0.5508, "step": 3054 }, { "epoch": 0.1648588851114349, "grad_norm": 0.8904792645399217, "learning_rate": 9.409829853394922e-06, "loss": 0.3888, "step": 3055 }, { "epoch": 0.16491284873994927, "grad_norm": 1.0552352552400153, "learning_rate": 9.409452109164103e-06, "loss": 0.4298, "step": 3056 }, { "epoch": 0.16496681236846367, "grad_norm": 1.106455965800329, "learning_rate": 9.409074252571679e-06, "loss": 0.5506, "step": 3057 }, { "epoch": 0.16502077599697804, "grad_norm": 1.0744145272574606, "learning_rate": 9.40869628362851e-06, "loss": 0.5517, "step": 3058 }, { "epoch": 0.1650747396254924, "grad_norm": 0.9307517810607085, "learning_rate": 9.408318202345461e-06, "loss": 0.4244, "step": 3059 }, { "epoch": 0.1651287032540068, "grad_norm": 1.2019725571847832, "learning_rate": 9.407940008733399e-06, "loss": 0.575, "step": 3060 }, { "epoch": 0.16518266688252117, "grad_norm": 1.1416811803351057, "learning_rate": 9.40756170280319e-06, "loss": 0.5289, "step": 3061 }, { "epoch": 0.16523663051103557, "grad_norm": 1.2665498059748768, "learning_rate": 9.407183284565712e-06, "loss": 0.5472, "step": 3062 }, { "epoch": 0.16529059413954994, "grad_norm": 0.9739350308640315, "learning_rate": 9.406804754031837e-06, "loss": 0.3614, "step": 3063 }, { "epoch": 0.16534455776806434, "grad_norm": 0.8335953100771376, "learning_rate": 9.406426111212446e-06, "loss": 0.3219, "step": 3064 }, { "epoch": 0.1653985213965787, "grad_norm": 0.9032177672871006, "learning_rate": 9.40604735611842e-06, "loss": 0.4387, "step": 3065 }, { "epoch": 0.16545248502509308, "grad_norm": 1.0115050926061735, "learning_rate": 9.40566848876065e-06, "loss": 0.5073, "step": 3066 }, { "epoch": 0.16550644865360747, "grad_norm": 1.086252264671102, "learning_rate": 9.405289509150017e-06, "loss": 0.5555, "step": 3067 }, { "epoch": 0.16556041228212184, "grad_norm": 0.9018130363882, "learning_rate": 9.40491041729742e-06, "loss": 0.4076, "step": 3068 }, { "epoch": 0.16561437591063624, "grad_norm": 1.439400994384993, "learning_rate": 9.404531213213752e-06, "loss": 0.5329, "step": 3069 }, { "epoch": 0.1656683395391506, "grad_norm": 0.7145592529461156, "learning_rate": 9.404151896909911e-06, "loss": 0.3082, "step": 3070 }, { "epoch": 0.16572230316766498, "grad_norm": 0.8870963461821364, "learning_rate": 9.4037724683968e-06, "loss": 0.3478, "step": 3071 }, { "epoch": 0.16577626679617938, "grad_norm": 1.182121552084783, "learning_rate": 9.403392927685322e-06, "loss": 0.5312, "step": 3072 }, { "epoch": 0.16583023042469375, "grad_norm": 1.1421941915077434, "learning_rate": 9.40301327478639e-06, "loss": 0.572, "step": 3073 }, { "epoch": 0.16588419405320814, "grad_norm": 1.38019449890495, "learning_rate": 9.402633509710913e-06, "loss": 0.6512, "step": 3074 }, { "epoch": 0.16593815768172251, "grad_norm": 0.8797995630826165, "learning_rate": 9.402253632469803e-06, "loss": 0.4058, "step": 3075 }, { "epoch": 0.1659921213102369, "grad_norm": 1.1216965841314825, "learning_rate": 9.401873643073981e-06, "loss": 0.4823, "step": 3076 }, { "epoch": 0.16604608493875128, "grad_norm": 1.071196255031116, "learning_rate": 9.40149354153437e-06, "loss": 0.4646, "step": 3077 }, { "epoch": 0.16610004856726565, "grad_norm": 1.1689014808512714, "learning_rate": 9.401113327861891e-06, "loss": 0.5937, "step": 3078 }, { "epoch": 0.16615401219578005, "grad_norm": 1.1496105290643719, "learning_rate": 9.400733002067472e-06, "loss": 0.634, "step": 3079 }, { "epoch": 0.16620797582429442, "grad_norm": 0.9086245552587248, "learning_rate": 9.400352564162047e-06, "loss": 0.4068, "step": 3080 }, { "epoch": 0.16626193945280882, "grad_norm": 1.23963551141849, "learning_rate": 9.399972014156548e-06, "loss": 0.5891, "step": 3081 }, { "epoch": 0.16631590308132319, "grad_norm": 0.8077414700341824, "learning_rate": 9.399591352061914e-06, "loss": 0.3298, "step": 3082 }, { "epoch": 0.16636986670983758, "grad_norm": 0.9904344852496164, "learning_rate": 9.39921057788908e-06, "loss": 0.4816, "step": 3083 }, { "epoch": 0.16642383033835195, "grad_norm": 0.8854066282235407, "learning_rate": 9.398829691648997e-06, "loss": 0.4646, "step": 3084 }, { "epoch": 0.16647779396686632, "grad_norm": 1.2972689095176384, "learning_rate": 9.39844869335261e-06, "loss": 0.5955, "step": 3085 }, { "epoch": 0.16653175759538072, "grad_norm": 0.9793100068946493, "learning_rate": 9.398067583010867e-06, "loss": 0.3875, "step": 3086 }, { "epoch": 0.1665857212238951, "grad_norm": 1.145494148297733, "learning_rate": 9.397686360634723e-06, "loss": 0.6392, "step": 3087 }, { "epoch": 0.1666396848524095, "grad_norm": 1.1219676551495839, "learning_rate": 9.397305026235136e-06, "loss": 0.5467, "step": 3088 }, { "epoch": 0.16669364848092386, "grad_norm": 1.4293147408556335, "learning_rate": 9.396923579823063e-06, "loss": 0.5576, "step": 3089 }, { "epoch": 0.16674761210943823, "grad_norm": 1.0875718719882101, "learning_rate": 9.39654202140947e-06, "loss": 0.6202, "step": 3090 }, { "epoch": 0.16680157573795262, "grad_norm": 1.1120292049738836, "learning_rate": 9.39616035100532e-06, "loss": 0.5581, "step": 3091 }, { "epoch": 0.166855539366467, "grad_norm": 1.0647139724354546, "learning_rate": 9.395778568621587e-06, "loss": 0.5451, "step": 3092 }, { "epoch": 0.1669095029949814, "grad_norm": 1.1375409615676562, "learning_rate": 9.395396674269242e-06, "loss": 0.4928, "step": 3093 }, { "epoch": 0.16696346662349576, "grad_norm": 0.815365525349067, "learning_rate": 9.39501466795926e-06, "loss": 0.396, "step": 3094 }, { "epoch": 0.16701743025201016, "grad_norm": 0.9776969430339892, "learning_rate": 9.39463254970262e-06, "loss": 0.5518, "step": 3095 }, { "epoch": 0.16707139388052453, "grad_norm": 1.0601102856547375, "learning_rate": 9.394250319510303e-06, "loss": 0.4114, "step": 3096 }, { "epoch": 0.1671253575090389, "grad_norm": 1.5050886559918186, "learning_rate": 9.393867977393301e-06, "loss": 0.516, "step": 3097 }, { "epoch": 0.1671793211375533, "grad_norm": 0.8597350697318071, "learning_rate": 9.393485523362598e-06, "loss": 0.435, "step": 3098 }, { "epoch": 0.16723328476606766, "grad_norm": 0.9872186768310999, "learning_rate": 9.393102957429186e-06, "loss": 0.4943, "step": 3099 }, { "epoch": 0.16728724839458206, "grad_norm": 1.0445663431609618, "learning_rate": 9.39272027960406e-06, "loss": 0.4823, "step": 3100 }, { "epoch": 0.16734121202309643, "grad_norm": 1.0634782719924891, "learning_rate": 9.392337489898221e-06, "loss": 0.5365, "step": 3101 }, { "epoch": 0.16739517565161083, "grad_norm": 0.998589561500629, "learning_rate": 9.391954588322671e-06, "loss": 0.4343, "step": 3102 }, { "epoch": 0.1674491392801252, "grad_norm": 0.9660567173279985, "learning_rate": 9.391571574888413e-06, "loss": 0.4845, "step": 3103 }, { "epoch": 0.16750310290863957, "grad_norm": 1.1680840656489786, "learning_rate": 9.391188449606455e-06, "loss": 0.5011, "step": 3104 }, { "epoch": 0.16755706653715396, "grad_norm": 0.8028264854819163, "learning_rate": 9.390805212487812e-06, "loss": 0.3304, "step": 3105 }, { "epoch": 0.16761103016566833, "grad_norm": 0.9638020102365698, "learning_rate": 9.390421863543493e-06, "loss": 0.4739, "step": 3106 }, { "epoch": 0.16766499379418273, "grad_norm": 1.2420489557987215, "learning_rate": 9.390038402784518e-06, "loss": 0.583, "step": 3107 }, { "epoch": 0.1677189574226971, "grad_norm": 0.8764612160942299, "learning_rate": 9.38965483022191e-06, "loss": 0.396, "step": 3108 }, { "epoch": 0.16777292105121147, "grad_norm": 0.9298058848908223, "learning_rate": 9.389271145866692e-06, "loss": 0.4538, "step": 3109 }, { "epoch": 0.16782688467972587, "grad_norm": 0.8843831186291994, "learning_rate": 9.388887349729892e-06, "loss": 0.3868, "step": 3110 }, { "epoch": 0.16788084830824024, "grad_norm": 0.9683556653072294, "learning_rate": 9.388503441822538e-06, "loss": 0.4548, "step": 3111 }, { "epoch": 0.16793481193675464, "grad_norm": 1.1594149310781166, "learning_rate": 9.388119422155669e-06, "loss": 0.5885, "step": 3112 }, { "epoch": 0.167988775565269, "grad_norm": 1.0731272354600097, "learning_rate": 9.387735290740318e-06, "loss": 0.5017, "step": 3113 }, { "epoch": 0.1680427391937834, "grad_norm": 1.2248377523260037, "learning_rate": 9.387351047587526e-06, "loss": 0.6311, "step": 3114 }, { "epoch": 0.16809670282229777, "grad_norm": 0.9878766630350391, "learning_rate": 9.386966692708337e-06, "loss": 0.5302, "step": 3115 }, { "epoch": 0.16815066645081214, "grad_norm": 1.1054680257046703, "learning_rate": 9.386582226113797e-06, "loss": 0.6424, "step": 3116 }, { "epoch": 0.16820463007932654, "grad_norm": 1.3494022635303744, "learning_rate": 9.386197647814957e-06, "loss": 0.6652, "step": 3117 }, { "epoch": 0.1682585937078409, "grad_norm": 0.908662956904971, "learning_rate": 9.385812957822872e-06, "loss": 0.282, "step": 3118 }, { "epoch": 0.1683125573363553, "grad_norm": 0.9769440317855215, "learning_rate": 9.385428156148594e-06, "loss": 0.4228, "step": 3119 }, { "epoch": 0.16836652096486968, "grad_norm": 1.3345610696828134, "learning_rate": 9.385043242803185e-06, "loss": 0.6465, "step": 3120 }, { "epoch": 0.16842048459338405, "grad_norm": 1.0283701558408973, "learning_rate": 9.384658217797709e-06, "loss": 0.5418, "step": 3121 }, { "epoch": 0.16847444822189844, "grad_norm": 1.0473900974859198, "learning_rate": 9.384273081143231e-06, "loss": 0.4831, "step": 3122 }, { "epoch": 0.1685284118504128, "grad_norm": 1.1231703724221165, "learning_rate": 9.383887832850818e-06, "loss": 0.4373, "step": 3123 }, { "epoch": 0.1685823754789272, "grad_norm": 0.9920505908283784, "learning_rate": 9.383502472931544e-06, "loss": 0.466, "step": 3124 }, { "epoch": 0.16863633910744158, "grad_norm": 0.9248699283439118, "learning_rate": 9.383117001396486e-06, "loss": 0.439, "step": 3125 }, { "epoch": 0.16869030273595598, "grad_norm": 1.1028877674318893, "learning_rate": 9.382731418256722e-06, "loss": 0.6322, "step": 3126 }, { "epoch": 0.16874426636447035, "grad_norm": 0.9265599504493932, "learning_rate": 9.382345723523334e-06, "loss": 0.4509, "step": 3127 }, { "epoch": 0.16879822999298472, "grad_norm": 1.074225159766606, "learning_rate": 9.381959917207406e-06, "loss": 0.4589, "step": 3128 }, { "epoch": 0.1688521936214991, "grad_norm": 1.3126964215060857, "learning_rate": 9.38157399932003e-06, "loss": 0.5964, "step": 3129 }, { "epoch": 0.16890615725001348, "grad_norm": 1.130087571507967, "learning_rate": 9.381187969872293e-06, "loss": 0.4258, "step": 3130 }, { "epoch": 0.16896012087852788, "grad_norm": 1.051620216315125, "learning_rate": 9.380801828875293e-06, "loss": 0.498, "step": 3131 }, { "epoch": 0.16901408450704225, "grad_norm": 1.0424098212459374, "learning_rate": 9.380415576340127e-06, "loss": 0.4507, "step": 3132 }, { "epoch": 0.16906804813555665, "grad_norm": 1.2055373278127448, "learning_rate": 9.380029212277894e-06, "loss": 0.4558, "step": 3133 }, { "epoch": 0.16912201176407102, "grad_norm": 1.1492780036121855, "learning_rate": 9.379642736699705e-06, "loss": 0.5091, "step": 3134 }, { "epoch": 0.1691759753925854, "grad_norm": 0.8874085665919307, "learning_rate": 9.37925614961666e-06, "loss": 0.4329, "step": 3135 }, { "epoch": 0.16922993902109978, "grad_norm": 1.0191130000583988, "learning_rate": 9.378869451039875e-06, "loss": 0.546, "step": 3136 }, { "epoch": 0.16928390264961415, "grad_norm": 1.1253211184489675, "learning_rate": 9.378482640980464e-06, "loss": 0.5971, "step": 3137 }, { "epoch": 0.16933786627812855, "grad_norm": 0.7600441046962375, "learning_rate": 9.378095719449542e-06, "loss": 0.3519, "step": 3138 }, { "epoch": 0.16939182990664292, "grad_norm": 0.9521796236362152, "learning_rate": 9.377708686458232e-06, "loss": 0.399, "step": 3139 }, { "epoch": 0.1694457935351573, "grad_norm": 1.050674325423393, "learning_rate": 9.377321542017653e-06, "loss": 0.5083, "step": 3140 }, { "epoch": 0.1694997571636717, "grad_norm": 1.1269299485204087, "learning_rate": 9.376934286138937e-06, "loss": 0.6998, "step": 3141 }, { "epoch": 0.16955372079218606, "grad_norm": 1.1621490340719962, "learning_rate": 9.376546918833213e-06, "loss": 0.5044, "step": 3142 }, { "epoch": 0.16960768442070046, "grad_norm": 1.0469693139614664, "learning_rate": 9.376159440111612e-06, "loss": 0.5744, "step": 3143 }, { "epoch": 0.16966164804921482, "grad_norm": 1.192464924568567, "learning_rate": 9.375771849985274e-06, "loss": 0.5223, "step": 3144 }, { "epoch": 0.16971561167772922, "grad_norm": 1.1163127645661057, "learning_rate": 9.375384148465336e-06, "loss": 0.4891, "step": 3145 }, { "epoch": 0.1697695753062436, "grad_norm": 1.102258566582523, "learning_rate": 9.37499633556294e-06, "loss": 0.5244, "step": 3146 }, { "epoch": 0.16982353893475796, "grad_norm": 1.0329631513717354, "learning_rate": 9.374608411289235e-06, "loss": 0.5739, "step": 3147 }, { "epoch": 0.16987750256327236, "grad_norm": 1.1048982274665755, "learning_rate": 9.37422037565537e-06, "loss": 0.5065, "step": 3148 }, { "epoch": 0.16993146619178673, "grad_norm": 1.2623232069609536, "learning_rate": 9.373832228672496e-06, "loss": 0.6486, "step": 3149 }, { "epoch": 0.16998542982030113, "grad_norm": 1.0598765701628676, "learning_rate": 9.37344397035177e-06, "loss": 0.5006, "step": 3150 }, { "epoch": 0.1700393934488155, "grad_norm": 1.1317557180272588, "learning_rate": 9.37305560070435e-06, "loss": 0.5813, "step": 3151 }, { "epoch": 0.1700933570773299, "grad_norm": 1.1754520574515648, "learning_rate": 9.3726671197414e-06, "loss": 0.6186, "step": 3152 }, { "epoch": 0.17014732070584426, "grad_norm": 0.8678252653715242, "learning_rate": 9.372278527474082e-06, "loss": 0.441, "step": 3153 }, { "epoch": 0.17020128433435863, "grad_norm": 1.08115063063288, "learning_rate": 9.371889823913568e-06, "loss": 0.598, "step": 3154 }, { "epoch": 0.17025524796287303, "grad_norm": 0.950660372804506, "learning_rate": 9.371501009071028e-06, "loss": 0.3981, "step": 3155 }, { "epoch": 0.1703092115913874, "grad_norm": 1.4665065497095973, "learning_rate": 9.371112082957636e-06, "loss": 0.6937, "step": 3156 }, { "epoch": 0.1703631752199018, "grad_norm": 0.7324722046788382, "learning_rate": 9.370723045584572e-06, "loss": 0.3634, "step": 3157 }, { "epoch": 0.17041713884841617, "grad_norm": 0.8367632956957777, "learning_rate": 9.370333896963016e-06, "loss": 0.311, "step": 3158 }, { "epoch": 0.17047110247693054, "grad_norm": 0.9664147070769433, "learning_rate": 9.369944637104154e-06, "loss": 0.3895, "step": 3159 }, { "epoch": 0.17052506610544493, "grad_norm": 0.7592322372672324, "learning_rate": 9.369555266019172e-06, "loss": 0.2704, "step": 3160 }, { "epoch": 0.1705790297339593, "grad_norm": 1.0911157977038604, "learning_rate": 9.369165783719263e-06, "loss": 0.5379, "step": 3161 }, { "epoch": 0.1706329933624737, "grad_norm": 1.066173691287015, "learning_rate": 9.368776190215621e-06, "loss": 0.5272, "step": 3162 }, { "epoch": 0.17068695699098807, "grad_norm": 1.3363042425935967, "learning_rate": 9.36838648551944e-06, "loss": 0.6508, "step": 3163 }, { "epoch": 0.17074092061950247, "grad_norm": 1.2487725767035265, "learning_rate": 9.367996669641922e-06, "loss": 0.6781, "step": 3164 }, { "epoch": 0.17079488424801684, "grad_norm": 0.9304481930527686, "learning_rate": 9.367606742594272e-06, "loss": 0.4372, "step": 3165 }, { "epoch": 0.1708488478765312, "grad_norm": 1.047154252405805, "learning_rate": 9.367216704387697e-06, "loss": 0.4792, "step": 3166 }, { "epoch": 0.1709028115050456, "grad_norm": 1.0416008138735509, "learning_rate": 9.366826555033407e-06, "loss": 0.4857, "step": 3167 }, { "epoch": 0.17095677513355997, "grad_norm": 1.069034862353067, "learning_rate": 9.366436294542613e-06, "loss": 0.5767, "step": 3168 }, { "epoch": 0.17101073876207437, "grad_norm": 0.9678460143389742, "learning_rate": 9.366045922926533e-06, "loss": 0.5385, "step": 3169 }, { "epoch": 0.17106470239058874, "grad_norm": 1.06093944632542, "learning_rate": 9.36565544019639e-06, "loss": 0.5412, "step": 3170 }, { "epoch": 0.17111866601910314, "grad_norm": 1.1966492709091994, "learning_rate": 9.3652648463634e-06, "loss": 0.5807, "step": 3171 }, { "epoch": 0.1711726296476175, "grad_norm": 0.9414369136681316, "learning_rate": 9.364874141438794e-06, "loss": 0.445, "step": 3172 }, { "epoch": 0.17122659327613188, "grad_norm": 0.9007917942082062, "learning_rate": 9.3644833254338e-06, "loss": 0.4046, "step": 3173 }, { "epoch": 0.17128055690464628, "grad_norm": 0.8691328597730115, "learning_rate": 9.36409239835965e-06, "loss": 0.4779, "step": 3174 }, { "epoch": 0.17133452053316064, "grad_norm": 0.98268329044992, "learning_rate": 9.363701360227582e-06, "loss": 0.4573, "step": 3175 }, { "epoch": 0.17138848416167504, "grad_norm": 1.1831323615552372, "learning_rate": 9.363310211048831e-06, "loss": 0.5779, "step": 3176 }, { "epoch": 0.1714424477901894, "grad_norm": 1.0182781491082997, "learning_rate": 9.36291895083464e-06, "loss": 0.44, "step": 3177 }, { "epoch": 0.17149641141870378, "grad_norm": 1.4296176623865242, "learning_rate": 9.36252757959626e-06, "loss": 0.7297, "step": 3178 }, { "epoch": 0.17155037504721818, "grad_norm": 0.9030071143021786, "learning_rate": 9.36213609734493e-06, "loss": 0.4115, "step": 3179 }, { "epoch": 0.17160433867573255, "grad_norm": 1.111632939999548, "learning_rate": 9.361744504091908e-06, "loss": 0.6168, "step": 3180 }, { "epoch": 0.17165830230424695, "grad_norm": 1.0346813646290283, "learning_rate": 9.361352799848445e-06, "loss": 0.5056, "step": 3181 }, { "epoch": 0.17171226593276132, "grad_norm": 1.2234205017832493, "learning_rate": 9.360960984625804e-06, "loss": 0.6338, "step": 3182 }, { "epoch": 0.1717662295612757, "grad_norm": 1.3361862240278448, "learning_rate": 9.360569058435241e-06, "loss": 0.4717, "step": 3183 }, { "epoch": 0.17182019318979008, "grad_norm": 1.0484649300735198, "learning_rate": 9.360177021288023e-06, "loss": 0.4227, "step": 3184 }, { "epoch": 0.17187415681830445, "grad_norm": 1.0655346388463542, "learning_rate": 9.359784873195416e-06, "loss": 0.464, "step": 3185 }, { "epoch": 0.17192812044681885, "grad_norm": 1.0355144351054362, "learning_rate": 9.359392614168693e-06, "loss": 0.5299, "step": 3186 }, { "epoch": 0.17198208407533322, "grad_norm": 0.9740925109337184, "learning_rate": 9.359000244219126e-06, "loss": 0.5065, "step": 3187 }, { "epoch": 0.17203604770384762, "grad_norm": 1.0953196260892126, "learning_rate": 9.358607763357993e-06, "loss": 0.5905, "step": 3188 }, { "epoch": 0.172090011332362, "grad_norm": 1.0523016671119958, "learning_rate": 9.358215171596573e-06, "loss": 0.5661, "step": 3189 }, { "epoch": 0.17214397496087636, "grad_norm": 1.2276682040883806, "learning_rate": 9.357822468946151e-06, "loss": 0.4918, "step": 3190 }, { "epoch": 0.17219793858939075, "grad_norm": 0.9251643008960653, "learning_rate": 9.357429655418015e-06, "loss": 0.4246, "step": 3191 }, { "epoch": 0.17225190221790512, "grad_norm": 0.8470788817677216, "learning_rate": 9.357036731023453e-06, "loss": 0.3816, "step": 3192 }, { "epoch": 0.17230586584641952, "grad_norm": 1.026408059475465, "learning_rate": 9.356643695773756e-06, "loss": 0.417, "step": 3193 }, { "epoch": 0.1723598294749339, "grad_norm": 0.9647376766991798, "learning_rate": 9.356250549680222e-06, "loss": 0.4904, "step": 3194 }, { "epoch": 0.1724137931034483, "grad_norm": 0.6809438894547291, "learning_rate": 9.355857292754152e-06, "loss": 0.2818, "step": 3195 }, { "epoch": 0.17246775673196266, "grad_norm": 1.2558224240508546, "learning_rate": 9.355463925006847e-06, "loss": 0.6028, "step": 3196 }, { "epoch": 0.17252172036047703, "grad_norm": 1.0383872508159173, "learning_rate": 9.355070446449614e-06, "loss": 0.4985, "step": 3197 }, { "epoch": 0.17257568398899142, "grad_norm": 1.0932739056654472, "learning_rate": 9.35467685709376e-06, "loss": 0.5205, "step": 3198 }, { "epoch": 0.1726296476175058, "grad_norm": 1.0351938552964597, "learning_rate": 9.354283156950599e-06, "loss": 0.514, "step": 3199 }, { "epoch": 0.1726836112460202, "grad_norm": 1.0586882652954654, "learning_rate": 9.353889346031445e-06, "loss": 0.4492, "step": 3200 }, { "epoch": 0.17273757487453456, "grad_norm": 1.0500731483979102, "learning_rate": 9.353495424347619e-06, "loss": 0.4969, "step": 3201 }, { "epoch": 0.17279153850304896, "grad_norm": 0.8389163707053308, "learning_rate": 9.353101391910438e-06, "loss": 0.3551, "step": 3202 }, { "epoch": 0.17284550213156333, "grad_norm": 1.304678420501069, "learning_rate": 9.352707248731232e-06, "loss": 0.6063, "step": 3203 }, { "epoch": 0.1728994657600777, "grad_norm": 1.2252522932989551, "learning_rate": 9.352312994821326e-06, "loss": 0.6369, "step": 3204 }, { "epoch": 0.1729534293885921, "grad_norm": 0.8893561599290669, "learning_rate": 9.35191863019205e-06, "loss": 0.3713, "step": 3205 }, { "epoch": 0.17300739301710646, "grad_norm": 1.1692360566313433, "learning_rate": 9.351524154854744e-06, "loss": 0.4404, "step": 3206 }, { "epoch": 0.17306135664562086, "grad_norm": 1.1258113219579153, "learning_rate": 9.351129568820741e-06, "loss": 0.7105, "step": 3207 }, { "epoch": 0.17311532027413523, "grad_norm": 1.1096574469198752, "learning_rate": 9.350734872101382e-06, "loss": 0.4757, "step": 3208 }, { "epoch": 0.1731692839026496, "grad_norm": 0.9013096492377883, "learning_rate": 9.350340064708012e-06, "loss": 0.4479, "step": 3209 }, { "epoch": 0.173223247531164, "grad_norm": 1.0969597487443385, "learning_rate": 9.34994514665198e-06, "loss": 0.4586, "step": 3210 }, { "epoch": 0.17327721115967837, "grad_norm": 1.2034394522788074, "learning_rate": 9.34955011794463e-06, "loss": 0.5529, "step": 3211 }, { "epoch": 0.17333117478819277, "grad_norm": 1.2147045614543763, "learning_rate": 9.349154978597323e-06, "loss": 0.6469, "step": 3212 }, { "epoch": 0.17338513841670714, "grad_norm": 1.1006007247622636, "learning_rate": 9.348759728621414e-06, "loss": 0.4802, "step": 3213 }, { "epoch": 0.17343910204522153, "grad_norm": 0.7835335407997517, "learning_rate": 9.348364368028258e-06, "loss": 0.3803, "step": 3214 }, { "epoch": 0.1734930656737359, "grad_norm": 0.9533931856040292, "learning_rate": 9.347968896829223e-06, "loss": 0.441, "step": 3215 }, { "epoch": 0.17354702930225027, "grad_norm": 1.0442213999689582, "learning_rate": 9.347573315035674e-06, "loss": 0.462, "step": 3216 }, { "epoch": 0.17360099293076467, "grad_norm": 1.303094685739711, "learning_rate": 9.347177622658981e-06, "loss": 0.4358, "step": 3217 }, { "epoch": 0.17365495655927904, "grad_norm": 1.0106174444243956, "learning_rate": 9.346781819710517e-06, "loss": 0.52, "step": 3218 }, { "epoch": 0.17370892018779344, "grad_norm": 1.1849587960670875, "learning_rate": 9.346385906201653e-06, "loss": 0.6035, "step": 3219 }, { "epoch": 0.1737628838163078, "grad_norm": 0.9121007311677226, "learning_rate": 9.345989882143774e-06, "loss": 0.4114, "step": 3220 }, { "epoch": 0.1738168474448222, "grad_norm": 1.138535168778012, "learning_rate": 9.34559374754826e-06, "loss": 0.643, "step": 3221 }, { "epoch": 0.17387081107333657, "grad_norm": 1.1697686250851338, "learning_rate": 9.345197502426493e-06, "loss": 0.5988, "step": 3222 }, { "epoch": 0.17392477470185094, "grad_norm": 1.0428752617830082, "learning_rate": 9.344801146789868e-06, "loss": 0.5793, "step": 3223 }, { "epoch": 0.17397873833036534, "grad_norm": 1.2675938613389781, "learning_rate": 9.344404680649772e-06, "loss": 0.4874, "step": 3224 }, { "epoch": 0.1740327019588797, "grad_norm": 1.06273226312279, "learning_rate": 9.344008104017602e-06, "loss": 0.5549, "step": 3225 }, { "epoch": 0.1740866655873941, "grad_norm": 0.9938776312782399, "learning_rate": 9.343611416904753e-06, "loss": 0.4367, "step": 3226 }, { "epoch": 0.17414062921590848, "grad_norm": 1.0033742631460751, "learning_rate": 9.34321461932263e-06, "loss": 0.5087, "step": 3227 }, { "epoch": 0.17419459284442285, "grad_norm": 1.0527137173871501, "learning_rate": 9.342817711282632e-06, "loss": 0.5159, "step": 3228 }, { "epoch": 0.17424855647293724, "grad_norm": 0.9823574897939077, "learning_rate": 9.342420692796174e-06, "loss": 0.5646, "step": 3229 }, { "epoch": 0.1743025201014516, "grad_norm": 0.8467193379999528, "learning_rate": 9.342023563874662e-06, "loss": 0.366, "step": 3230 }, { "epoch": 0.174356483729966, "grad_norm": 0.9368740858494746, "learning_rate": 9.34162632452951e-06, "loss": 0.3576, "step": 3231 }, { "epoch": 0.17441044735848038, "grad_norm": 1.099694877484971, "learning_rate": 9.341228974772136e-06, "loss": 0.5787, "step": 3232 }, { "epoch": 0.17446441098699478, "grad_norm": 1.2123322009480553, "learning_rate": 9.34083151461396e-06, "loss": 0.5488, "step": 3233 }, { "epoch": 0.17451837461550915, "grad_norm": 1.2148609589199066, "learning_rate": 9.340433944066404e-06, "loss": 0.569, "step": 3234 }, { "epoch": 0.17457233824402352, "grad_norm": 0.917629070811702, "learning_rate": 9.340036263140897e-06, "loss": 0.4794, "step": 3235 }, { "epoch": 0.17462630187253791, "grad_norm": 1.114222829729946, "learning_rate": 9.339638471848868e-06, "loss": 0.5525, "step": 3236 }, { "epoch": 0.17468026550105228, "grad_norm": 1.1922208820225446, "learning_rate": 9.339240570201751e-06, "loss": 0.5042, "step": 3237 }, { "epoch": 0.17473422912956668, "grad_norm": 0.9856681273461215, "learning_rate": 9.338842558210978e-06, "loss": 0.4067, "step": 3238 }, { "epoch": 0.17478819275808105, "grad_norm": 0.9974510264838814, "learning_rate": 9.338444435887992e-06, "loss": 0.4181, "step": 3239 }, { "epoch": 0.17484215638659542, "grad_norm": 1.247648327619207, "learning_rate": 9.338046203244236e-06, "loss": 0.6015, "step": 3240 }, { "epoch": 0.17489612001510982, "grad_norm": 0.9566208872313153, "learning_rate": 9.337647860291153e-06, "loss": 0.4391, "step": 3241 }, { "epoch": 0.1749500836436242, "grad_norm": 1.1677003247093316, "learning_rate": 9.337249407040192e-06, "loss": 0.7163, "step": 3242 }, { "epoch": 0.17500404727213859, "grad_norm": 1.045418805789118, "learning_rate": 9.336850843502806e-06, "loss": 0.5056, "step": 3243 }, { "epoch": 0.17505801090065295, "grad_norm": 0.9652699574270031, "learning_rate": 9.336452169690452e-06, "loss": 0.534, "step": 3244 }, { "epoch": 0.17511197452916735, "grad_norm": 1.047371902013245, "learning_rate": 9.336053385614583e-06, "loss": 0.4443, "step": 3245 }, { "epoch": 0.17516593815768172, "grad_norm": 1.0320307181082093, "learning_rate": 9.335654491286665e-06, "loss": 0.5416, "step": 3246 }, { "epoch": 0.1752199017861961, "grad_norm": 0.9316780206979535, "learning_rate": 9.335255486718163e-06, "loss": 0.4266, "step": 3247 }, { "epoch": 0.1752738654147105, "grad_norm": 1.0811059928942819, "learning_rate": 9.334856371920541e-06, "loss": 0.5733, "step": 3248 }, { "epoch": 0.17532782904322486, "grad_norm": 1.014106130214839, "learning_rate": 9.334457146905272e-06, "loss": 0.5464, "step": 3249 }, { "epoch": 0.17538179267173926, "grad_norm": 1.0217909285725855, "learning_rate": 9.33405781168383e-06, "loss": 0.5105, "step": 3250 }, { "epoch": 0.17543575630025363, "grad_norm": 1.0955810019076024, "learning_rate": 9.333658366267696e-06, "loss": 0.5502, "step": 3251 }, { "epoch": 0.17548971992876802, "grad_norm": 1.1791534620278106, "learning_rate": 9.333258810668342e-06, "loss": 0.6343, "step": 3252 }, { "epoch": 0.1755436835572824, "grad_norm": 0.9547441940443747, "learning_rate": 9.332859144897259e-06, "loss": 0.5532, "step": 3253 }, { "epoch": 0.17559764718579676, "grad_norm": 0.9940651715911237, "learning_rate": 9.332459368965931e-06, "loss": 0.4437, "step": 3254 }, { "epoch": 0.17565161081431116, "grad_norm": 1.1651945156910053, "learning_rate": 9.332059482885848e-06, "loss": 0.5651, "step": 3255 }, { "epoch": 0.17570557444282553, "grad_norm": 0.9455896428110837, "learning_rate": 9.331659486668504e-06, "loss": 0.5146, "step": 3256 }, { "epoch": 0.17575953807133993, "grad_norm": 1.1119095488510455, "learning_rate": 9.331259380325394e-06, "loss": 0.7488, "step": 3257 }, { "epoch": 0.1758135016998543, "grad_norm": 1.2603621376809295, "learning_rate": 9.33085916386802e-06, "loss": 0.5016, "step": 3258 }, { "epoch": 0.17586746532836867, "grad_norm": 0.9792258642789261, "learning_rate": 9.33045883730788e-06, "loss": 0.41, "step": 3259 }, { "epoch": 0.17592142895688306, "grad_norm": 0.9719434036938298, "learning_rate": 9.330058400656485e-06, "loss": 0.4073, "step": 3260 }, { "epoch": 0.17597539258539743, "grad_norm": 1.0244334264908586, "learning_rate": 9.329657853925341e-06, "loss": 0.4301, "step": 3261 }, { "epoch": 0.17602935621391183, "grad_norm": 1.2994254823114377, "learning_rate": 9.32925719712596e-06, "loss": 0.6954, "step": 3262 }, { "epoch": 0.1760833198424262, "grad_norm": 1.1513503786063108, "learning_rate": 9.32885643026986e-06, "loss": 0.4671, "step": 3263 }, { "epoch": 0.1761372834709406, "grad_norm": 1.0002628255651296, "learning_rate": 9.328455553368555e-06, "loss": 0.5593, "step": 3264 }, { "epoch": 0.17619124709945497, "grad_norm": 1.26962893102264, "learning_rate": 9.32805456643357e-06, "loss": 0.7464, "step": 3265 }, { "epoch": 0.17624521072796934, "grad_norm": 0.7824678197452039, "learning_rate": 9.327653469476429e-06, "loss": 0.3755, "step": 3266 }, { "epoch": 0.17629917435648373, "grad_norm": 1.051999440894643, "learning_rate": 9.327252262508658e-06, "loss": 0.4842, "step": 3267 }, { "epoch": 0.1763531379849981, "grad_norm": 1.15630611433821, "learning_rate": 9.326850945541792e-06, "loss": 0.4028, "step": 3268 }, { "epoch": 0.1764071016135125, "grad_norm": 1.1440553563911282, "learning_rate": 9.326449518587362e-06, "loss": 0.4883, "step": 3269 }, { "epoch": 0.17646106524202687, "grad_norm": 1.0617842800495336, "learning_rate": 9.326047981656907e-06, "loss": 0.676, "step": 3270 }, { "epoch": 0.17651502887054127, "grad_norm": 1.5971815549102086, "learning_rate": 9.325646334761968e-06, "loss": 0.683, "step": 3271 }, { "epoch": 0.17656899249905564, "grad_norm": 1.1659009036449632, "learning_rate": 9.325244577914087e-06, "loss": 0.5659, "step": 3272 }, { "epoch": 0.17662295612757, "grad_norm": 0.9452383221006792, "learning_rate": 9.32484271112481e-06, "loss": 0.4219, "step": 3273 }, { "epoch": 0.1766769197560844, "grad_norm": 0.9412400911991763, "learning_rate": 9.324440734405691e-06, "loss": 0.551, "step": 3274 }, { "epoch": 0.17673088338459877, "grad_norm": 0.8808292727968727, "learning_rate": 9.32403864776828e-06, "loss": 0.4037, "step": 3275 }, { "epoch": 0.17678484701311317, "grad_norm": 1.039745305177311, "learning_rate": 9.323636451224134e-06, "loss": 0.5608, "step": 3276 }, { "epoch": 0.17683881064162754, "grad_norm": 0.9620416422151222, "learning_rate": 9.323234144784813e-06, "loss": 0.4995, "step": 3277 }, { "epoch": 0.1768927742701419, "grad_norm": 0.815618409367972, "learning_rate": 9.32283172846188e-06, "loss": 0.3628, "step": 3278 }, { "epoch": 0.1769467378986563, "grad_norm": 1.0547044649589403, "learning_rate": 9.3224292022669e-06, "loss": 0.51, "step": 3279 }, { "epoch": 0.17700070152717068, "grad_norm": 1.3070943342158574, "learning_rate": 9.322026566211445e-06, "loss": 0.5071, "step": 3280 }, { "epoch": 0.17705466515568508, "grad_norm": 1.1735337901435394, "learning_rate": 9.321623820307083e-06, "loss": 0.5709, "step": 3281 }, { "epoch": 0.17710862878419945, "grad_norm": 1.2701511872697282, "learning_rate": 9.321220964565389e-06, "loss": 0.4199, "step": 3282 }, { "epoch": 0.17716259241271384, "grad_norm": 1.1539412480526907, "learning_rate": 9.320817998997945e-06, "loss": 0.6511, "step": 3283 }, { "epoch": 0.1772165560412282, "grad_norm": 1.0263610283883908, "learning_rate": 9.32041492361633e-06, "loss": 0.4635, "step": 3284 }, { "epoch": 0.17727051966974258, "grad_norm": 0.9856630254617735, "learning_rate": 9.320011738432131e-06, "loss": 0.4157, "step": 3285 }, { "epoch": 0.17732448329825698, "grad_norm": 0.8543579872459929, "learning_rate": 9.319608443456935e-06, "loss": 0.3369, "step": 3286 }, { "epoch": 0.17737844692677135, "grad_norm": 1.2337477370201717, "learning_rate": 9.319205038702332e-06, "loss": 0.7126, "step": 3287 }, { "epoch": 0.17743241055528575, "grad_norm": 0.9419621395036354, "learning_rate": 9.318801524179916e-06, "loss": 0.3209, "step": 3288 }, { "epoch": 0.17748637418380012, "grad_norm": 1.0582641777403659, "learning_rate": 9.318397899901288e-06, "loss": 0.633, "step": 3289 }, { "epoch": 0.1775403378123145, "grad_norm": 0.9910102928148292, "learning_rate": 9.317994165878047e-06, "loss": 0.4484, "step": 3290 }, { "epoch": 0.17759430144082888, "grad_norm": 0.9121037577456016, "learning_rate": 9.317590322121793e-06, "loss": 0.404, "step": 3291 }, { "epoch": 0.17764826506934325, "grad_norm": 0.9437541509326917, "learning_rate": 9.317186368644136e-06, "loss": 0.3371, "step": 3292 }, { "epoch": 0.17770222869785765, "grad_norm": 1.0328304649893163, "learning_rate": 9.316782305456687e-06, "loss": 0.6038, "step": 3293 }, { "epoch": 0.17775619232637202, "grad_norm": 0.9688883352092765, "learning_rate": 9.316378132571057e-06, "loss": 0.531, "step": 3294 }, { "epoch": 0.17781015595488642, "grad_norm": 1.105327546125664, "learning_rate": 9.315973849998865e-06, "loss": 0.5082, "step": 3295 }, { "epoch": 0.1778641195834008, "grad_norm": 0.8824029983111507, "learning_rate": 9.315569457751727e-06, "loss": 0.3592, "step": 3296 }, { "epoch": 0.17791808321191516, "grad_norm": 0.9852205024300851, "learning_rate": 9.31516495584127e-06, "loss": 0.5855, "step": 3297 }, { "epoch": 0.17797204684042955, "grad_norm": 0.9786839663225962, "learning_rate": 9.314760344279115e-06, "loss": 0.3922, "step": 3298 }, { "epoch": 0.17802601046894392, "grad_norm": 1.0900601031427941, "learning_rate": 9.314355623076893e-06, "loss": 0.4363, "step": 3299 }, { "epoch": 0.17807997409745832, "grad_norm": 1.3321222386509908, "learning_rate": 9.313950792246235e-06, "loss": 0.6845, "step": 3300 }, { "epoch": 0.1781339377259727, "grad_norm": 1.0227741303943747, "learning_rate": 9.313545851798781e-06, "loss": 0.4957, "step": 3301 }, { "epoch": 0.1781879013544871, "grad_norm": 1.0989740181221477, "learning_rate": 9.313140801746163e-06, "loss": 0.5653, "step": 3302 }, { "epoch": 0.17824186498300146, "grad_norm": 1.170586723336589, "learning_rate": 9.312735642100028e-06, "loss": 0.7054, "step": 3303 }, { "epoch": 0.17829582861151583, "grad_norm": 1.1674203575265656, "learning_rate": 9.312330372872017e-06, "loss": 0.4465, "step": 3304 }, { "epoch": 0.17834979224003022, "grad_norm": 0.9738439055009164, "learning_rate": 9.311924994073778e-06, "loss": 0.3496, "step": 3305 }, { "epoch": 0.1784037558685446, "grad_norm": 1.0672067509862484, "learning_rate": 9.311519505716963e-06, "loss": 0.5504, "step": 3306 }, { "epoch": 0.178457719497059, "grad_norm": 1.041829780251987, "learning_rate": 9.31111390781323e-06, "loss": 0.4337, "step": 3307 }, { "epoch": 0.17851168312557336, "grad_norm": 1.075398756699929, "learning_rate": 9.310708200374228e-06, "loss": 0.62, "step": 3308 }, { "epoch": 0.17856564675408773, "grad_norm": 1.02371527646056, "learning_rate": 9.310302383411622e-06, "loss": 0.4891, "step": 3309 }, { "epoch": 0.17861961038260213, "grad_norm": 0.9560526469992675, "learning_rate": 9.309896456937078e-06, "loss": 0.357, "step": 3310 }, { "epoch": 0.1786735740111165, "grad_norm": 0.9730731129826901, "learning_rate": 9.309490420962258e-06, "loss": 0.4397, "step": 3311 }, { "epoch": 0.1787275376396309, "grad_norm": 1.0152941707725756, "learning_rate": 9.309084275498835e-06, "loss": 0.44, "step": 3312 }, { "epoch": 0.17878150126814527, "grad_norm": 1.0142110520354706, "learning_rate": 9.308678020558483e-06, "loss": 0.6049, "step": 3313 }, { "epoch": 0.17883546489665966, "grad_norm": 1.0297795741479252, "learning_rate": 9.308271656152874e-06, "loss": 0.5356, "step": 3314 }, { "epoch": 0.17888942852517403, "grad_norm": 0.998483510934037, "learning_rate": 9.30786518229369e-06, "loss": 0.4465, "step": 3315 }, { "epoch": 0.1789433921536884, "grad_norm": 1.2140340542509656, "learning_rate": 9.307458598992614e-06, "loss": 0.6528, "step": 3316 }, { "epoch": 0.1789973557822028, "grad_norm": 1.1416732007239871, "learning_rate": 9.30705190626133e-06, "loss": 0.6565, "step": 3317 }, { "epoch": 0.17905131941071717, "grad_norm": 1.0686693098573046, "learning_rate": 9.306645104111526e-06, "loss": 0.5676, "step": 3318 }, { "epoch": 0.17910528303923157, "grad_norm": 1.1185008636101514, "learning_rate": 9.306238192554897e-06, "loss": 0.5591, "step": 3319 }, { "epoch": 0.17915924666774594, "grad_norm": 1.2855477621968296, "learning_rate": 9.305831171603137e-06, "loss": 0.6093, "step": 3320 }, { "epoch": 0.17921321029626033, "grad_norm": 1.1158796433255014, "learning_rate": 9.305424041267943e-06, "loss": 0.4844, "step": 3321 }, { "epoch": 0.1792671739247747, "grad_norm": 0.7429794435644391, "learning_rate": 9.305016801561017e-06, "loss": 0.3228, "step": 3322 }, { "epoch": 0.17932113755328907, "grad_norm": 1.3104756570822056, "learning_rate": 9.304609452494064e-06, "loss": 0.6458, "step": 3323 }, { "epoch": 0.17937510118180347, "grad_norm": 1.2936074729727534, "learning_rate": 9.30420199407879e-06, "loss": 0.7339, "step": 3324 }, { "epoch": 0.17942906481031784, "grad_norm": 0.8958662939192786, "learning_rate": 9.303794426326906e-06, "loss": 0.4413, "step": 3325 }, { "epoch": 0.17948302843883224, "grad_norm": 1.1710054067687343, "learning_rate": 9.303386749250129e-06, "loss": 0.5735, "step": 3326 }, { "epoch": 0.1795369920673466, "grad_norm": 1.0309293850514063, "learning_rate": 9.302978962860172e-06, "loss": 0.559, "step": 3327 }, { "epoch": 0.17959095569586098, "grad_norm": 1.0674542284716562, "learning_rate": 9.302571067168758e-06, "loss": 0.4708, "step": 3328 }, { "epoch": 0.17964491932437537, "grad_norm": 1.2177004632352904, "learning_rate": 9.30216306218761e-06, "loss": 0.6017, "step": 3329 }, { "epoch": 0.17969888295288974, "grad_norm": 1.007711145174143, "learning_rate": 9.301754947928451e-06, "loss": 0.4493, "step": 3330 }, { "epoch": 0.17975284658140414, "grad_norm": 0.9700419388779866, "learning_rate": 9.301346724403015e-06, "loss": 0.5282, "step": 3331 }, { "epoch": 0.1798068102099185, "grad_norm": 0.8812928901911473, "learning_rate": 9.300938391623034e-06, "loss": 0.3274, "step": 3332 }, { "epoch": 0.1798607738384329, "grad_norm": 0.960260917232155, "learning_rate": 9.300529949600243e-06, "loss": 0.4369, "step": 3333 }, { "epoch": 0.17991473746694728, "grad_norm": 0.9199512596536168, "learning_rate": 9.30012139834638e-06, "loss": 0.4801, "step": 3334 }, { "epoch": 0.17996870109546165, "grad_norm": 1.1603865245724476, "learning_rate": 9.299712737873188e-06, "loss": 0.5125, "step": 3335 }, { "epoch": 0.18002266472397604, "grad_norm": 0.8271989275867807, "learning_rate": 9.299303968192414e-06, "loss": 0.3391, "step": 3336 }, { "epoch": 0.18007662835249041, "grad_norm": 1.1428907308232006, "learning_rate": 9.298895089315802e-06, "loss": 0.4528, "step": 3337 }, { "epoch": 0.1801305919810048, "grad_norm": 1.0306078452013545, "learning_rate": 9.29848610125511e-06, "loss": 0.5307, "step": 3338 }, { "epoch": 0.18018455560951918, "grad_norm": 1.0699440464244176, "learning_rate": 9.298077004022089e-06, "loss": 0.5289, "step": 3339 }, { "epoch": 0.18023851923803358, "grad_norm": 1.2736183734576514, "learning_rate": 9.297667797628498e-06, "loss": 0.4943, "step": 3340 }, { "epoch": 0.18029248286654795, "grad_norm": 1.0752536850350756, "learning_rate": 9.297258482086095e-06, "loss": 0.5378, "step": 3341 }, { "epoch": 0.18034644649506232, "grad_norm": 1.0861044149618735, "learning_rate": 9.296849057406646e-06, "loss": 0.5236, "step": 3342 }, { "epoch": 0.18040041012357672, "grad_norm": 0.880314867656759, "learning_rate": 9.29643952360192e-06, "loss": 0.4368, "step": 3343 }, { "epoch": 0.18045437375209109, "grad_norm": 1.108225132249348, "learning_rate": 9.296029880683687e-06, "loss": 0.5206, "step": 3344 }, { "epoch": 0.18050833738060548, "grad_norm": 0.9241104486220661, "learning_rate": 9.295620128663718e-06, "loss": 0.4938, "step": 3345 }, { "epoch": 0.18056230100911985, "grad_norm": 1.2673764791630482, "learning_rate": 9.29521026755379e-06, "loss": 0.4529, "step": 3346 }, { "epoch": 0.18061626463763422, "grad_norm": 0.9680492789836844, "learning_rate": 9.294800297365686e-06, "loss": 0.5137, "step": 3347 }, { "epoch": 0.18067022826614862, "grad_norm": 0.8640926526322925, "learning_rate": 9.294390218111189e-06, "loss": 0.4024, "step": 3348 }, { "epoch": 0.180724191894663, "grad_norm": 0.8898384559968857, "learning_rate": 9.293980029802081e-06, "loss": 0.3496, "step": 3349 }, { "epoch": 0.1807781555231774, "grad_norm": 1.0346616866166647, "learning_rate": 9.293569732450152e-06, "loss": 0.4422, "step": 3350 }, { "epoch": 0.18083211915169176, "grad_norm": 0.9168741296014665, "learning_rate": 9.293159326067199e-06, "loss": 0.4061, "step": 3351 }, { "epoch": 0.18088608278020615, "grad_norm": 1.1574232540369924, "learning_rate": 9.292748810665011e-06, "loss": 0.7215, "step": 3352 }, { "epoch": 0.18094004640872052, "grad_norm": 1.0864359202445553, "learning_rate": 9.292338186255392e-06, "loss": 0.489, "step": 3353 }, { "epoch": 0.1809940100372349, "grad_norm": 1.1430777170980695, "learning_rate": 9.291927452850142e-06, "loss": 0.6622, "step": 3354 }, { "epoch": 0.1810479736657493, "grad_norm": 0.9433634004684675, "learning_rate": 9.291516610461064e-06, "loss": 0.6419, "step": 3355 }, { "epoch": 0.18110193729426366, "grad_norm": 0.8835197466182724, "learning_rate": 9.291105659099969e-06, "loss": 0.4394, "step": 3356 }, { "epoch": 0.18115590092277806, "grad_norm": 1.0789301874473527, "learning_rate": 9.290694598778665e-06, "loss": 0.4438, "step": 3357 }, { "epoch": 0.18120986455129243, "grad_norm": 0.9060626523725631, "learning_rate": 9.29028342950897e-06, "loss": 0.4452, "step": 3358 }, { "epoch": 0.18126382817980682, "grad_norm": 1.1660051766039203, "learning_rate": 9.2898721513027e-06, "loss": 0.5056, "step": 3359 }, { "epoch": 0.1813177918083212, "grad_norm": 1.1512519747094312, "learning_rate": 9.289460764171672e-06, "loss": 0.5044, "step": 3360 }, { "epoch": 0.18137175543683556, "grad_norm": 0.9380121697826385, "learning_rate": 9.289049268127713e-06, "loss": 0.4698, "step": 3361 }, { "epoch": 0.18142571906534996, "grad_norm": 1.0702939795518407, "learning_rate": 9.288637663182651e-06, "loss": 0.4988, "step": 3362 }, { "epoch": 0.18147968269386433, "grad_norm": 1.1465321957186811, "learning_rate": 9.288225949348314e-06, "loss": 0.6344, "step": 3363 }, { "epoch": 0.18153364632237873, "grad_norm": 1.162711795318182, "learning_rate": 9.287814126636536e-06, "loss": 0.7037, "step": 3364 }, { "epoch": 0.1815876099508931, "grad_norm": 0.913961290755787, "learning_rate": 9.287402195059152e-06, "loss": 0.4598, "step": 3365 }, { "epoch": 0.18164157357940747, "grad_norm": 0.9974714743613253, "learning_rate": 9.286990154628003e-06, "loss": 0.409, "step": 3366 }, { "epoch": 0.18169553720792186, "grad_norm": 0.938876778683415, "learning_rate": 9.286578005354929e-06, "loss": 0.3952, "step": 3367 }, { "epoch": 0.18174950083643623, "grad_norm": 1.2622041805409798, "learning_rate": 9.286165747251777e-06, "loss": 0.4929, "step": 3368 }, { "epoch": 0.18180346446495063, "grad_norm": 1.0862165460584956, "learning_rate": 9.285753380330397e-06, "loss": 0.4817, "step": 3369 }, { "epoch": 0.181857428093465, "grad_norm": 1.1038101367234663, "learning_rate": 9.28534090460264e-06, "loss": 0.4966, "step": 3370 }, { "epoch": 0.1819113917219794, "grad_norm": 1.1529182834691796, "learning_rate": 9.284928320080359e-06, "loss": 0.4643, "step": 3371 }, { "epoch": 0.18196535535049377, "grad_norm": 0.9137565597669305, "learning_rate": 9.284515626775416e-06, "loss": 0.3574, "step": 3372 }, { "epoch": 0.18201931897900814, "grad_norm": 0.8673277395531713, "learning_rate": 9.284102824699667e-06, "loss": 0.3566, "step": 3373 }, { "epoch": 0.18207328260752254, "grad_norm": 1.1428203569066189, "learning_rate": 9.283689913864982e-06, "loss": 0.7193, "step": 3374 }, { "epoch": 0.1821272462360369, "grad_norm": 1.1846749187720897, "learning_rate": 9.283276894283224e-06, "loss": 0.6534, "step": 3375 }, { "epoch": 0.1821812098645513, "grad_norm": 1.2627483587706738, "learning_rate": 9.282863765966264e-06, "loss": 0.6325, "step": 3376 }, { "epoch": 0.18223517349306567, "grad_norm": 1.0622289099828244, "learning_rate": 9.28245052892598e-06, "loss": 0.4054, "step": 3377 }, { "epoch": 0.18228913712158004, "grad_norm": 0.8909687306982086, "learning_rate": 9.282037183174246e-06, "loss": 0.3957, "step": 3378 }, { "epoch": 0.18234310075009444, "grad_norm": 0.9890775774933492, "learning_rate": 9.28162372872294e-06, "loss": 0.4631, "step": 3379 }, { "epoch": 0.1823970643786088, "grad_norm": 0.9572318660208106, "learning_rate": 9.281210165583948e-06, "loss": 0.4878, "step": 3380 }, { "epoch": 0.1824510280071232, "grad_norm": 0.7574305009864738, "learning_rate": 9.280796493769154e-06, "loss": 0.3604, "step": 3381 }, { "epoch": 0.18250499163563758, "grad_norm": 1.0412049330231905, "learning_rate": 9.280382713290449e-06, "loss": 0.6203, "step": 3382 }, { "epoch": 0.18255895526415197, "grad_norm": 0.8475356447430414, "learning_rate": 9.279968824159726e-06, "loss": 0.4097, "step": 3383 }, { "epoch": 0.18261291889266634, "grad_norm": 1.0736785644586544, "learning_rate": 9.279554826388878e-06, "loss": 0.5533, "step": 3384 }, { "epoch": 0.1826668825211807, "grad_norm": 1.1067516300997777, "learning_rate": 9.279140719989806e-06, "loss": 0.4882, "step": 3385 }, { "epoch": 0.1827208461496951, "grad_norm": 1.0654879691357955, "learning_rate": 9.278726504974412e-06, "loss": 0.5979, "step": 3386 }, { "epoch": 0.18277480977820948, "grad_norm": 0.9850064813014754, "learning_rate": 9.278312181354599e-06, "loss": 0.5414, "step": 3387 }, { "epoch": 0.18282877340672388, "grad_norm": 0.8460700886497378, "learning_rate": 9.277897749142276e-06, "loss": 0.3356, "step": 3388 }, { "epoch": 0.18288273703523825, "grad_norm": 0.9617044534574388, "learning_rate": 9.277483208349357e-06, "loss": 0.5169, "step": 3389 }, { "epoch": 0.18293670066375264, "grad_norm": 1.023612753160458, "learning_rate": 9.27706855898775e-06, "loss": 0.4592, "step": 3390 }, { "epoch": 0.182990664292267, "grad_norm": 0.8381025289763231, "learning_rate": 9.276653801069379e-06, "loss": 0.3695, "step": 3391 }, { "epoch": 0.18304462792078138, "grad_norm": 1.0997135090772479, "learning_rate": 9.27623893460616e-06, "loss": 0.4523, "step": 3392 }, { "epoch": 0.18309859154929578, "grad_norm": 0.8530527387299963, "learning_rate": 9.275823959610019e-06, "loss": 0.425, "step": 3393 }, { "epoch": 0.18315255517781015, "grad_norm": 0.9607476891833553, "learning_rate": 9.275408876092884e-06, "loss": 0.4553, "step": 3394 }, { "epoch": 0.18320651880632455, "grad_norm": 1.1152480720624944, "learning_rate": 9.27499368406668e-06, "loss": 0.5299, "step": 3395 }, { "epoch": 0.18326048243483892, "grad_norm": 0.9176787252004119, "learning_rate": 9.274578383543346e-06, "loss": 0.4425, "step": 3396 }, { "epoch": 0.1833144460633533, "grad_norm": 1.0642023660642137, "learning_rate": 9.274162974534814e-06, "loss": 0.4749, "step": 3397 }, { "epoch": 0.18336840969186768, "grad_norm": 1.2651195464192453, "learning_rate": 9.273747457053025e-06, "loss": 0.6713, "step": 3398 }, { "epoch": 0.18342237332038205, "grad_norm": 1.0580599975443903, "learning_rate": 9.27333183110992e-06, "loss": 0.48, "step": 3399 }, { "epoch": 0.18347633694889645, "grad_norm": 1.0845136587764994, "learning_rate": 9.272916096717447e-06, "loss": 0.481, "step": 3400 }, { "epoch": 0.18353030057741082, "grad_norm": 1.2201654816547105, "learning_rate": 9.272500253887551e-06, "loss": 0.5575, "step": 3401 }, { "epoch": 0.18358426420592522, "grad_norm": 1.025715500242841, "learning_rate": 9.272084302632187e-06, "loss": 0.4312, "step": 3402 }, { "epoch": 0.1836382278344396, "grad_norm": 1.2405464753238307, "learning_rate": 9.27166824296331e-06, "loss": 0.6212, "step": 3403 }, { "epoch": 0.18369219146295396, "grad_norm": 0.9546881179283732, "learning_rate": 9.271252074892875e-06, "loss": 0.4591, "step": 3404 }, { "epoch": 0.18374615509146836, "grad_norm": 1.4233247038148675, "learning_rate": 9.270835798432845e-06, "loss": 0.6821, "step": 3405 }, { "epoch": 0.18380011871998272, "grad_norm": 0.9455876618440167, "learning_rate": 9.270419413595184e-06, "loss": 0.3976, "step": 3406 }, { "epoch": 0.18385408234849712, "grad_norm": 0.9898702111549852, "learning_rate": 9.27000292039186e-06, "loss": 0.5434, "step": 3407 }, { "epoch": 0.1839080459770115, "grad_norm": 0.9388878687183496, "learning_rate": 9.269586318834841e-06, "loss": 0.4107, "step": 3408 }, { "epoch": 0.1839620096055259, "grad_norm": 1.2529233911758717, "learning_rate": 9.269169608936105e-06, "loss": 0.6721, "step": 3409 }, { "epoch": 0.18401597323404026, "grad_norm": 0.9736496451066581, "learning_rate": 9.268752790707625e-06, "loss": 0.4169, "step": 3410 }, { "epoch": 0.18406993686255463, "grad_norm": 1.0346357610468684, "learning_rate": 9.268335864161381e-06, "loss": 0.6374, "step": 3411 }, { "epoch": 0.18412390049106903, "grad_norm": 1.0285352222222677, "learning_rate": 9.267918829309356e-06, "loss": 0.4478, "step": 3412 }, { "epoch": 0.1841778641195834, "grad_norm": 1.1646391593992762, "learning_rate": 9.267501686163539e-06, "loss": 0.5021, "step": 3413 }, { "epoch": 0.1842318277480978, "grad_norm": 1.1542285800713197, "learning_rate": 9.267084434735916e-06, "loss": 0.5561, "step": 3414 }, { "epoch": 0.18428579137661216, "grad_norm": 0.8891687949404169, "learning_rate": 9.26666707503848e-06, "loss": 0.4995, "step": 3415 }, { "epoch": 0.18433975500512653, "grad_norm": 1.0160714571981209, "learning_rate": 9.266249607083225e-06, "loss": 0.5065, "step": 3416 }, { "epoch": 0.18439371863364093, "grad_norm": 1.0374493742540898, "learning_rate": 9.265832030882152e-06, "loss": 0.566, "step": 3417 }, { "epoch": 0.1844476822621553, "grad_norm": 0.8579524403058835, "learning_rate": 9.265414346447261e-06, "loss": 0.4901, "step": 3418 }, { "epoch": 0.1845016458906697, "grad_norm": 0.9946558535269783, "learning_rate": 9.264996553790557e-06, "loss": 0.6282, "step": 3419 }, { "epoch": 0.18455560951918407, "grad_norm": 1.0222891000111585, "learning_rate": 9.264578652924047e-06, "loss": 0.3936, "step": 3420 }, { "epoch": 0.18460957314769846, "grad_norm": 1.2257043053142351, "learning_rate": 9.264160643859744e-06, "loss": 0.5604, "step": 3421 }, { "epoch": 0.18466353677621283, "grad_norm": 1.2365215955352395, "learning_rate": 9.263742526609659e-06, "loss": 0.6103, "step": 3422 }, { "epoch": 0.1847175004047272, "grad_norm": 1.4488132129875555, "learning_rate": 9.263324301185813e-06, "loss": 0.5988, "step": 3423 }, { "epoch": 0.1847714640332416, "grad_norm": 1.1252588567062876, "learning_rate": 9.262905967600223e-06, "loss": 0.5053, "step": 3424 }, { "epoch": 0.18482542766175597, "grad_norm": 1.0981007806889382, "learning_rate": 9.262487525864912e-06, "loss": 0.5106, "step": 3425 }, { "epoch": 0.18487939129027037, "grad_norm": 1.1648618486236735, "learning_rate": 9.262068975991909e-06, "loss": 0.4392, "step": 3426 }, { "epoch": 0.18493335491878474, "grad_norm": 1.0314005729561397, "learning_rate": 9.261650317993243e-06, "loss": 0.5458, "step": 3427 }, { "epoch": 0.1849873185472991, "grad_norm": 0.8779380437821133, "learning_rate": 9.261231551880944e-06, "loss": 0.4519, "step": 3428 }, { "epoch": 0.1850412821758135, "grad_norm": 1.325374382885057, "learning_rate": 9.260812677667053e-06, "loss": 0.6434, "step": 3429 }, { "epoch": 0.18509524580432787, "grad_norm": 1.130735575226392, "learning_rate": 9.260393695363603e-06, "loss": 0.5674, "step": 3430 }, { "epoch": 0.18514920943284227, "grad_norm": 1.159351325546032, "learning_rate": 9.259974604982639e-06, "loss": 0.557, "step": 3431 }, { "epoch": 0.18520317306135664, "grad_norm": 0.9846036727499488, "learning_rate": 9.259555406536206e-06, "loss": 0.4268, "step": 3432 }, { "epoch": 0.18525713668987104, "grad_norm": 0.7502301814664805, "learning_rate": 9.259136100036352e-06, "loss": 0.3467, "step": 3433 }, { "epoch": 0.1853111003183854, "grad_norm": 0.9692974823179602, "learning_rate": 9.258716685495128e-06, "loss": 0.4271, "step": 3434 }, { "epoch": 0.18536506394689978, "grad_norm": 0.7814698153113552, "learning_rate": 9.25829716292459e-06, "loss": 0.3268, "step": 3435 }, { "epoch": 0.18541902757541417, "grad_norm": 1.3395346973117894, "learning_rate": 9.25787753233679e-06, "loss": 0.6795, "step": 3436 }, { "epoch": 0.18547299120392854, "grad_norm": 0.9789329771624914, "learning_rate": 9.257457793743797e-06, "loss": 0.5786, "step": 3437 }, { "epoch": 0.18552695483244294, "grad_norm": 1.0635472203588612, "learning_rate": 9.257037947157669e-06, "loss": 0.4254, "step": 3438 }, { "epoch": 0.1855809184609573, "grad_norm": 1.060849791958406, "learning_rate": 9.256617992590474e-06, "loss": 0.4779, "step": 3439 }, { "epoch": 0.1856348820894717, "grad_norm": 1.115484162825859, "learning_rate": 9.256197930054282e-06, "loss": 0.4942, "step": 3440 }, { "epoch": 0.18568884571798608, "grad_norm": 1.1556526716139666, "learning_rate": 9.255777759561165e-06, "loss": 0.5941, "step": 3441 }, { "epoch": 0.18574280934650045, "grad_norm": 1.1886439767462937, "learning_rate": 9.255357481123202e-06, "loss": 0.4939, "step": 3442 }, { "epoch": 0.18579677297501485, "grad_norm": 1.283475024261839, "learning_rate": 9.25493709475247e-06, "loss": 0.7041, "step": 3443 }, { "epoch": 0.18585073660352922, "grad_norm": 0.9722056209296598, "learning_rate": 9.254516600461051e-06, "loss": 0.4517, "step": 3444 }, { "epoch": 0.1859047002320436, "grad_norm": 1.1458358473171781, "learning_rate": 9.254095998261032e-06, "loss": 0.5273, "step": 3445 }, { "epoch": 0.18595866386055798, "grad_norm": 0.8864873146178459, "learning_rate": 9.2536752881645e-06, "loss": 0.4115, "step": 3446 }, { "epoch": 0.18601262748907235, "grad_norm": 1.1084416753074882, "learning_rate": 9.25325447018355e-06, "loss": 0.5521, "step": 3447 }, { "epoch": 0.18606659111758675, "grad_norm": 0.8723446745150307, "learning_rate": 9.252833544330271e-06, "loss": 0.4044, "step": 3448 }, { "epoch": 0.18612055474610112, "grad_norm": 1.00134020577514, "learning_rate": 9.252412510616766e-06, "loss": 0.4214, "step": 3449 }, { "epoch": 0.18617451837461552, "grad_norm": 1.2697332778957287, "learning_rate": 9.251991369055133e-06, "loss": 0.5776, "step": 3450 }, { "epoch": 0.18622848200312989, "grad_norm": 1.0148362788993683, "learning_rate": 9.251570119657477e-06, "loss": 0.3954, "step": 3451 }, { "epoch": 0.18628244563164428, "grad_norm": 0.8625814751685347, "learning_rate": 9.251148762435904e-06, "loss": 0.4109, "step": 3452 }, { "epoch": 0.18633640926015865, "grad_norm": 0.7929076942918725, "learning_rate": 9.250727297402526e-06, "loss": 0.3682, "step": 3453 }, { "epoch": 0.18639037288867302, "grad_norm": 1.1025806886945726, "learning_rate": 9.250305724569458e-06, "loss": 0.5482, "step": 3454 }, { "epoch": 0.18644433651718742, "grad_norm": 0.9318397616702896, "learning_rate": 9.24988404394881e-06, "loss": 0.4039, "step": 3455 }, { "epoch": 0.1864983001457018, "grad_norm": 1.3202777018261782, "learning_rate": 9.249462255552708e-06, "loss": 0.718, "step": 3456 }, { "epoch": 0.1865522637742162, "grad_norm": 1.0447643179725168, "learning_rate": 9.249040359393273e-06, "loss": 0.4465, "step": 3457 }, { "epoch": 0.18660622740273056, "grad_norm": 1.1410350321319078, "learning_rate": 9.248618355482629e-06, "loss": 0.6849, "step": 3458 }, { "epoch": 0.18666019103124495, "grad_norm": 0.9879405716650461, "learning_rate": 9.248196243832906e-06, "loss": 0.4913, "step": 3459 }, { "epoch": 0.18671415465975932, "grad_norm": 1.1763099788805969, "learning_rate": 9.247774024456235e-06, "loss": 0.4552, "step": 3460 }, { "epoch": 0.1867681182882737, "grad_norm": 0.9573549291367229, "learning_rate": 9.247351697364751e-06, "loss": 0.3575, "step": 3461 }, { "epoch": 0.1868220819167881, "grad_norm": 1.2663126514944159, "learning_rate": 9.246929262570594e-06, "loss": 0.5593, "step": 3462 }, { "epoch": 0.18687604554530246, "grad_norm": 1.0076383687905381, "learning_rate": 9.246506720085904e-06, "loss": 0.5205, "step": 3463 }, { "epoch": 0.18693000917381686, "grad_norm": 1.1711432966567077, "learning_rate": 9.246084069922825e-06, "loss": 0.5753, "step": 3464 }, { "epoch": 0.18698397280233123, "grad_norm": 1.1069830558264135, "learning_rate": 9.245661312093504e-06, "loss": 0.4602, "step": 3465 }, { "epoch": 0.1870379364308456, "grad_norm": 0.9064463942479322, "learning_rate": 9.245238446610093e-06, "loss": 0.4034, "step": 3466 }, { "epoch": 0.18709190005936, "grad_norm": 1.0574835197248882, "learning_rate": 9.244815473484746e-06, "loss": 0.4813, "step": 3467 }, { "epoch": 0.18714586368787436, "grad_norm": 0.9738614849331148, "learning_rate": 9.244392392729617e-06, "loss": 0.4542, "step": 3468 }, { "epoch": 0.18719982731638876, "grad_norm": 1.189935112463329, "learning_rate": 9.243969204356866e-06, "loss": 0.675, "step": 3469 }, { "epoch": 0.18725379094490313, "grad_norm": 1.1330631411943373, "learning_rate": 9.24354590837866e-06, "loss": 0.43, "step": 3470 }, { "epoch": 0.18730775457341753, "grad_norm": 0.9281664275659937, "learning_rate": 9.24312250480716e-06, "loss": 0.55, "step": 3471 }, { "epoch": 0.1873617182019319, "grad_norm": 1.0399880070461784, "learning_rate": 9.242698993654537e-06, "loss": 0.5427, "step": 3472 }, { "epoch": 0.18741568183044627, "grad_norm": 1.1990096032698496, "learning_rate": 9.242275374932964e-06, "loss": 0.6653, "step": 3473 }, { "epoch": 0.18746964545896067, "grad_norm": 1.0844353409882246, "learning_rate": 9.241851648654615e-06, "loss": 0.4249, "step": 3474 }, { "epoch": 0.18752360908747503, "grad_norm": 0.9047991933503821, "learning_rate": 9.24142781483167e-06, "loss": 0.4016, "step": 3475 }, { "epoch": 0.18757757271598943, "grad_norm": 1.029046545861315, "learning_rate": 9.241003873476308e-06, "loss": 0.5697, "step": 3476 }, { "epoch": 0.1876315363445038, "grad_norm": 1.0062966383079572, "learning_rate": 9.240579824600715e-06, "loss": 0.494, "step": 3477 }, { "epoch": 0.1876854999730182, "grad_norm": 1.186940306943247, "learning_rate": 9.240155668217077e-06, "loss": 0.5438, "step": 3478 }, { "epoch": 0.18773946360153257, "grad_norm": 1.2879449774601435, "learning_rate": 9.239731404337586e-06, "loss": 0.5318, "step": 3479 }, { "epoch": 0.18779342723004694, "grad_norm": 1.0613821417380622, "learning_rate": 9.239307032974438e-06, "loss": 0.5963, "step": 3480 }, { "epoch": 0.18784739085856134, "grad_norm": 1.6503328933051795, "learning_rate": 9.238882554139826e-06, "loss": 0.6752, "step": 3481 }, { "epoch": 0.1879013544870757, "grad_norm": 1.2321961575999703, "learning_rate": 9.238457967845952e-06, "loss": 0.5486, "step": 3482 }, { "epoch": 0.1879553181155901, "grad_norm": 1.2075999319593684, "learning_rate": 9.238033274105019e-06, "loss": 0.6074, "step": 3483 }, { "epoch": 0.18800928174410447, "grad_norm": 1.1141759103653237, "learning_rate": 9.23760847292923e-06, "loss": 0.5654, "step": 3484 }, { "epoch": 0.18806324537261884, "grad_norm": 1.0834347698686992, "learning_rate": 9.237183564330799e-06, "loss": 0.4681, "step": 3485 }, { "epoch": 0.18811720900113324, "grad_norm": 1.2143611832361731, "learning_rate": 9.236758548321936e-06, "loss": 0.5697, "step": 3486 }, { "epoch": 0.1881711726296476, "grad_norm": 1.0168337874864963, "learning_rate": 9.236333424914858e-06, "loss": 0.5645, "step": 3487 }, { "epoch": 0.188225136258162, "grad_norm": 1.0190346442508489, "learning_rate": 9.23590819412178e-06, "loss": 0.6447, "step": 3488 }, { "epoch": 0.18827909988667638, "grad_norm": 1.05238358321972, "learning_rate": 9.235482855954927e-06, "loss": 0.5029, "step": 3489 }, { "epoch": 0.18833306351519077, "grad_norm": 0.777758048009598, "learning_rate": 9.235057410426523e-06, "loss": 0.3958, "step": 3490 }, { "epoch": 0.18838702714370514, "grad_norm": 1.0656339621105695, "learning_rate": 9.234631857548793e-06, "loss": 0.4746, "step": 3491 }, { "epoch": 0.1884409907722195, "grad_norm": 1.1436061916244185, "learning_rate": 9.23420619733397e-06, "loss": 0.5674, "step": 3492 }, { "epoch": 0.1884949544007339, "grad_norm": 1.1530082096168304, "learning_rate": 9.233780429794289e-06, "loss": 0.5282, "step": 3493 }, { "epoch": 0.18854891802924828, "grad_norm": 1.0219645271003759, "learning_rate": 9.233354554941985e-06, "loss": 0.5054, "step": 3494 }, { "epoch": 0.18860288165776268, "grad_norm": 1.1820011605223422, "learning_rate": 9.232928572789298e-06, "loss": 0.4161, "step": 3495 }, { "epoch": 0.18865684528627705, "grad_norm": 1.007723844492105, "learning_rate": 9.232502483348473e-06, "loss": 0.4088, "step": 3496 }, { "epoch": 0.18871080891479142, "grad_norm": 1.359710262049616, "learning_rate": 9.232076286631755e-06, "loss": 0.5925, "step": 3497 }, { "epoch": 0.18876477254330581, "grad_norm": 0.967431021953732, "learning_rate": 9.231649982651395e-06, "loss": 0.4017, "step": 3498 }, { "epoch": 0.18881873617182018, "grad_norm": 0.9829602598227183, "learning_rate": 9.231223571419641e-06, "loss": 0.4106, "step": 3499 }, { "epoch": 0.18887269980033458, "grad_norm": 1.099857209487639, "learning_rate": 9.230797052948753e-06, "loss": 0.5176, "step": 3500 }, { "epoch": 0.18887269980033458, "eval_loss": 0.5863323211669922, "eval_runtime": 160.241, "eval_samples_per_second": 21.461, "eval_steps_per_second": 0.899, "step": 3500 }, { "epoch": 0.18892666342884895, "grad_norm": 0.9413599538002425, "learning_rate": 9.23037042725099e-06, "loss": 0.425, "step": 3501 }, { "epoch": 0.18898062705736335, "grad_norm": 1.0803720114828943, "learning_rate": 9.22994369433861e-06, "loss": 0.4157, "step": 3502 }, { "epoch": 0.18903459068587772, "grad_norm": 0.978064266852157, "learning_rate": 9.229516854223878e-06, "loss": 0.5308, "step": 3503 }, { "epoch": 0.1890885543143921, "grad_norm": 1.236389627655464, "learning_rate": 9.229089906919064e-06, "loss": 0.6252, "step": 3504 }, { "epoch": 0.18914251794290649, "grad_norm": 1.0322789006066662, "learning_rate": 9.228662852436438e-06, "loss": 0.5367, "step": 3505 }, { "epoch": 0.18919648157142085, "grad_norm": 1.1344734389597244, "learning_rate": 9.228235690788274e-06, "loss": 0.4481, "step": 3506 }, { "epoch": 0.18925044519993525, "grad_norm": 0.9318181110759796, "learning_rate": 9.227808421986851e-06, "loss": 0.4044, "step": 3507 }, { "epoch": 0.18930440882844962, "grad_norm": 0.939628546952394, "learning_rate": 9.227381046044444e-06, "loss": 0.4319, "step": 3508 }, { "epoch": 0.18935837245696402, "grad_norm": 0.8636356009954431, "learning_rate": 9.226953562973343e-06, "loss": 0.4298, "step": 3509 }, { "epoch": 0.1894123360854784, "grad_norm": 1.3868254043795416, "learning_rate": 9.226525972785827e-06, "loss": 0.5882, "step": 3510 }, { "epoch": 0.18946629971399276, "grad_norm": 1.1375186306338791, "learning_rate": 9.226098275494192e-06, "loss": 0.3922, "step": 3511 }, { "epoch": 0.18952026334250716, "grad_norm": 1.0638640024274246, "learning_rate": 9.225670471110728e-06, "loss": 0.6124, "step": 3512 }, { "epoch": 0.18957422697102153, "grad_norm": 1.1454161836759171, "learning_rate": 9.225242559647729e-06, "loss": 0.5908, "step": 3513 }, { "epoch": 0.18962819059953592, "grad_norm": 0.9042197694399127, "learning_rate": 9.224814541117493e-06, "loss": 0.4233, "step": 3514 }, { "epoch": 0.1896821542280503, "grad_norm": 1.0858707555162825, "learning_rate": 9.224386415532327e-06, "loss": 0.4924, "step": 3515 }, { "epoch": 0.18973611785656466, "grad_norm": 0.9007210257822292, "learning_rate": 9.22395818290453e-06, "loss": 0.4306, "step": 3516 }, { "epoch": 0.18979008148507906, "grad_norm": 1.2072767463676732, "learning_rate": 9.223529843246412e-06, "loss": 0.5953, "step": 3517 }, { "epoch": 0.18984404511359343, "grad_norm": 1.18181194335848, "learning_rate": 9.223101396570285e-06, "loss": 0.6356, "step": 3518 }, { "epoch": 0.18989800874210783, "grad_norm": 0.8823631791466998, "learning_rate": 9.22267284288846e-06, "loss": 0.4222, "step": 3519 }, { "epoch": 0.1899519723706222, "grad_norm": 1.1492767388930047, "learning_rate": 9.222244182213258e-06, "loss": 0.5605, "step": 3520 }, { "epoch": 0.1900059359991366, "grad_norm": 0.915733559599143, "learning_rate": 9.221815414556998e-06, "loss": 0.4578, "step": 3521 }, { "epoch": 0.19005989962765096, "grad_norm": 0.8558759151395682, "learning_rate": 9.221386539932e-06, "loss": 0.3731, "step": 3522 }, { "epoch": 0.19011386325616533, "grad_norm": 1.2633267791968665, "learning_rate": 9.220957558350594e-06, "loss": 0.6015, "step": 3523 }, { "epoch": 0.19016782688467973, "grad_norm": 0.9069104546218806, "learning_rate": 9.220528469825107e-06, "loss": 0.3843, "step": 3524 }, { "epoch": 0.1902217905131941, "grad_norm": 1.100535321502579, "learning_rate": 9.220099274367874e-06, "loss": 0.6306, "step": 3525 }, { "epoch": 0.1902757541417085, "grad_norm": 1.0841256832675252, "learning_rate": 9.219669971991228e-06, "loss": 0.6069, "step": 3526 }, { "epoch": 0.19032971777022287, "grad_norm": 0.8915221394550601, "learning_rate": 9.219240562707508e-06, "loss": 0.4238, "step": 3527 }, { "epoch": 0.19038368139873726, "grad_norm": 1.0889425069056702, "learning_rate": 9.218811046529055e-06, "loss": 0.5184, "step": 3528 }, { "epoch": 0.19043764502725163, "grad_norm": 0.9314305189264818, "learning_rate": 9.218381423468218e-06, "loss": 0.4617, "step": 3529 }, { "epoch": 0.190491608655766, "grad_norm": 1.0642877726546343, "learning_rate": 9.217951693537341e-06, "loss": 0.442, "step": 3530 }, { "epoch": 0.1905455722842804, "grad_norm": 1.1320889404173977, "learning_rate": 9.217521856748776e-06, "loss": 0.4866, "step": 3531 }, { "epoch": 0.19059953591279477, "grad_norm": 1.3647397692872159, "learning_rate": 9.217091913114874e-06, "loss": 0.6245, "step": 3532 }, { "epoch": 0.19065349954130917, "grad_norm": 1.117233877126864, "learning_rate": 9.216661862647997e-06, "loss": 0.5378, "step": 3533 }, { "epoch": 0.19070746316982354, "grad_norm": 1.2285993461638316, "learning_rate": 9.216231705360502e-06, "loss": 0.5282, "step": 3534 }, { "epoch": 0.1907614267983379, "grad_norm": 1.1024656763318308, "learning_rate": 9.215801441264752e-06, "loss": 0.6157, "step": 3535 }, { "epoch": 0.1908153904268523, "grad_norm": 1.0410446994674691, "learning_rate": 9.215371070373114e-06, "loss": 0.5525, "step": 3536 }, { "epoch": 0.19086935405536667, "grad_norm": 0.818203398417902, "learning_rate": 9.214940592697959e-06, "loss": 0.3322, "step": 3537 }, { "epoch": 0.19092331768388107, "grad_norm": 1.2003426936065618, "learning_rate": 9.214510008251656e-06, "loss": 0.4921, "step": 3538 }, { "epoch": 0.19097728131239544, "grad_norm": 0.9449314471918646, "learning_rate": 9.214079317046583e-06, "loss": 0.5402, "step": 3539 }, { "epoch": 0.19103124494090984, "grad_norm": 1.0881166789556225, "learning_rate": 9.213648519095118e-06, "loss": 0.5106, "step": 3540 }, { "epoch": 0.1910852085694242, "grad_norm": 1.258125117877621, "learning_rate": 9.213217614409641e-06, "loss": 0.4694, "step": 3541 }, { "epoch": 0.19113917219793858, "grad_norm": 1.1953205087009697, "learning_rate": 9.21278660300254e-06, "loss": 0.6117, "step": 3542 }, { "epoch": 0.19119313582645298, "grad_norm": 1.0545429903101766, "learning_rate": 9.212355484886198e-06, "loss": 0.57, "step": 3543 }, { "epoch": 0.19124709945496735, "grad_norm": 1.2301276625917152, "learning_rate": 9.21192426007301e-06, "loss": 0.563, "step": 3544 }, { "epoch": 0.19130106308348174, "grad_norm": 1.2361063244687553, "learning_rate": 9.211492928575367e-06, "loss": 0.4705, "step": 3545 }, { "epoch": 0.1913550267119961, "grad_norm": 1.157708055320251, "learning_rate": 9.211061490405667e-06, "loss": 0.5523, "step": 3546 }, { "epoch": 0.1914089903405105, "grad_norm": 1.111581194563244, "learning_rate": 9.21062994557631e-06, "loss": 0.4602, "step": 3547 }, { "epoch": 0.19146295396902488, "grad_norm": 1.2308133479713206, "learning_rate": 9.2101982940997e-06, "loss": 0.6038, "step": 3548 }, { "epoch": 0.19151691759753925, "grad_norm": 1.4173235606196728, "learning_rate": 9.209766535988242e-06, "loss": 0.6588, "step": 3549 }, { "epoch": 0.19157088122605365, "grad_norm": 1.1828597239765997, "learning_rate": 9.209334671254346e-06, "loss": 0.5733, "step": 3550 }, { "epoch": 0.19162484485456802, "grad_norm": 1.019645518792412, "learning_rate": 9.208902699910422e-06, "loss": 0.524, "step": 3551 }, { "epoch": 0.1916788084830824, "grad_norm": 1.4321370889472942, "learning_rate": 9.208470621968888e-06, "loss": 0.5143, "step": 3552 }, { "epoch": 0.19173277211159678, "grad_norm": 0.9771196529761064, "learning_rate": 9.20803843744216e-06, "loss": 0.4762, "step": 3553 }, { "epoch": 0.19178673574011115, "grad_norm": 0.8453750798422196, "learning_rate": 9.207606146342661e-06, "loss": 0.3185, "step": 3554 }, { "epoch": 0.19184069936862555, "grad_norm": 1.1337413271824397, "learning_rate": 9.207173748682815e-06, "loss": 0.5927, "step": 3555 }, { "epoch": 0.19189466299713992, "grad_norm": 1.1851828234013277, "learning_rate": 9.20674124447505e-06, "loss": 0.5292, "step": 3556 }, { "epoch": 0.19194862662565432, "grad_norm": 1.130782551043283, "learning_rate": 9.206308633731796e-06, "loss": 0.5354, "step": 3557 }, { "epoch": 0.1920025902541687, "grad_norm": 1.0285656058686468, "learning_rate": 9.205875916465487e-06, "loss": 0.498, "step": 3558 }, { "epoch": 0.19205655388268308, "grad_norm": 0.8044955352133234, "learning_rate": 9.20544309268856e-06, "loss": 0.3501, "step": 3559 }, { "epoch": 0.19211051751119745, "grad_norm": 1.130479066625325, "learning_rate": 9.205010162413453e-06, "loss": 0.604, "step": 3560 }, { "epoch": 0.19216448113971182, "grad_norm": 1.183288302034953, "learning_rate": 9.204577125652613e-06, "loss": 0.5448, "step": 3561 }, { "epoch": 0.19221844476822622, "grad_norm": 1.1491433880773687, "learning_rate": 9.204143982418482e-06, "loss": 0.4885, "step": 3562 }, { "epoch": 0.1922724083967406, "grad_norm": 1.05871317408975, "learning_rate": 9.20371073272351e-06, "loss": 0.5711, "step": 3563 }, { "epoch": 0.192326372025255, "grad_norm": 0.9612177767402478, "learning_rate": 9.203277376580148e-06, "loss": 0.4386, "step": 3564 }, { "epoch": 0.19238033565376936, "grad_norm": 1.1435755817938, "learning_rate": 9.202843914000855e-06, "loss": 0.5742, "step": 3565 }, { "epoch": 0.19243429928228373, "grad_norm": 1.086469307128442, "learning_rate": 9.202410344998084e-06, "loss": 0.5575, "step": 3566 }, { "epoch": 0.19248826291079812, "grad_norm": 1.1501097092990864, "learning_rate": 9.201976669584299e-06, "loss": 0.4186, "step": 3567 }, { "epoch": 0.1925422265393125, "grad_norm": 0.9696458262977752, "learning_rate": 9.201542887771964e-06, "loss": 0.4917, "step": 3568 }, { "epoch": 0.1925961901678269, "grad_norm": 0.9993446749816595, "learning_rate": 9.201108999573546e-06, "loss": 0.536, "step": 3569 }, { "epoch": 0.19265015379634126, "grad_norm": 1.025394678729257, "learning_rate": 9.200675005001518e-06, "loss": 0.496, "step": 3570 }, { "epoch": 0.19270411742485566, "grad_norm": 1.2479100996140824, "learning_rate": 9.20024090406835e-06, "loss": 0.8365, "step": 3571 }, { "epoch": 0.19275808105337003, "grad_norm": 1.0274313489907139, "learning_rate": 9.199806696786517e-06, "loss": 0.5111, "step": 3572 }, { "epoch": 0.1928120446818844, "grad_norm": 1.0007183310603083, "learning_rate": 9.199372383168505e-06, "loss": 0.4077, "step": 3573 }, { "epoch": 0.1928660083103988, "grad_norm": 0.9281619648660502, "learning_rate": 9.198937963226791e-06, "loss": 0.3626, "step": 3574 }, { "epoch": 0.19291997193891317, "grad_norm": 1.141218285658685, "learning_rate": 9.198503436973862e-06, "loss": 0.5597, "step": 3575 }, { "epoch": 0.19297393556742756, "grad_norm": 1.1108387673604245, "learning_rate": 9.198068804422206e-06, "loss": 0.524, "step": 3576 }, { "epoch": 0.19302789919594193, "grad_norm": 1.0749912164667301, "learning_rate": 9.197634065584319e-06, "loss": 0.3801, "step": 3577 }, { "epoch": 0.19308186282445633, "grad_norm": 1.0637333940611702, "learning_rate": 9.197199220472691e-06, "loss": 0.455, "step": 3578 }, { "epoch": 0.1931358264529707, "grad_norm": 1.2942680509329512, "learning_rate": 9.19676426909982e-06, "loss": 0.5516, "step": 3579 }, { "epoch": 0.19318979008148507, "grad_norm": 1.1753712397057503, "learning_rate": 9.196329211478212e-06, "loss": 0.5056, "step": 3580 }, { "epoch": 0.19324375370999947, "grad_norm": 1.0742807121379698, "learning_rate": 9.195894047620367e-06, "loss": 0.6193, "step": 3581 }, { "epoch": 0.19329771733851384, "grad_norm": 1.3563544006951858, "learning_rate": 9.195458777538792e-06, "loss": 0.5342, "step": 3582 }, { "epoch": 0.19335168096702823, "grad_norm": 1.0839770191043454, "learning_rate": 9.195023401245998e-06, "loss": 0.4762, "step": 3583 }, { "epoch": 0.1934056445955426, "grad_norm": 1.0275590327005384, "learning_rate": 9.194587918754496e-06, "loss": 0.439, "step": 3584 }, { "epoch": 0.19345960822405697, "grad_norm": 0.9458721337676708, "learning_rate": 9.194152330076806e-06, "loss": 0.4961, "step": 3585 }, { "epoch": 0.19351357185257137, "grad_norm": 0.9496046857940559, "learning_rate": 9.193716635225445e-06, "loss": 0.4236, "step": 3586 }, { "epoch": 0.19356753548108574, "grad_norm": 0.9427693237687016, "learning_rate": 9.193280834212935e-06, "loss": 0.4386, "step": 3587 }, { "epoch": 0.19362149910960014, "grad_norm": 1.0891050792957426, "learning_rate": 9.192844927051802e-06, "loss": 0.5124, "step": 3588 }, { "epoch": 0.1936754627381145, "grad_norm": 0.9779880912946277, "learning_rate": 9.192408913754575e-06, "loss": 0.4483, "step": 3589 }, { "epoch": 0.1937294263666289, "grad_norm": 1.1842951511195103, "learning_rate": 9.191972794333786e-06, "loss": 0.5986, "step": 3590 }, { "epoch": 0.19378338999514327, "grad_norm": 1.060208841482625, "learning_rate": 9.191536568801967e-06, "loss": 0.4817, "step": 3591 }, { "epoch": 0.19383735362365764, "grad_norm": 1.0208953406081516, "learning_rate": 9.191100237171657e-06, "loss": 0.4607, "step": 3592 }, { "epoch": 0.19389131725217204, "grad_norm": 1.0014919263055355, "learning_rate": 9.190663799455397e-06, "loss": 0.4642, "step": 3593 }, { "epoch": 0.1939452808806864, "grad_norm": 1.0442691432330509, "learning_rate": 9.19022725566573e-06, "loss": 0.4865, "step": 3594 }, { "epoch": 0.1939992445092008, "grad_norm": 1.018641699783968, "learning_rate": 9.189790605815203e-06, "loss": 0.3655, "step": 3595 }, { "epoch": 0.19405320813771518, "grad_norm": 1.1244596123138388, "learning_rate": 9.189353849916366e-06, "loss": 0.5873, "step": 3596 }, { "epoch": 0.19410717176622957, "grad_norm": 1.1316079909347732, "learning_rate": 9.188916987981772e-06, "loss": 0.6233, "step": 3597 }, { "epoch": 0.19416113539474394, "grad_norm": 0.850623357354391, "learning_rate": 9.188480020023976e-06, "loss": 0.3491, "step": 3598 }, { "epoch": 0.19421509902325831, "grad_norm": 1.1388085581199436, "learning_rate": 9.188042946055536e-06, "loss": 0.5394, "step": 3599 }, { "epoch": 0.1942690626517727, "grad_norm": 1.328353140314693, "learning_rate": 9.187605766089017e-06, "loss": 0.673, "step": 3600 }, { "epoch": 0.19432302628028708, "grad_norm": 0.9615930435843016, "learning_rate": 9.187168480136982e-06, "loss": 0.3963, "step": 3601 }, { "epoch": 0.19437698990880148, "grad_norm": 0.9483627916873354, "learning_rate": 9.186731088212e-06, "loss": 0.4975, "step": 3602 }, { "epoch": 0.19443095353731585, "grad_norm": 0.8542570093009577, "learning_rate": 9.186293590326639e-06, "loss": 0.3762, "step": 3603 }, { "epoch": 0.19448491716583022, "grad_norm": 0.9211920276186257, "learning_rate": 9.185855986493478e-06, "loss": 0.4036, "step": 3604 }, { "epoch": 0.19453888079434462, "grad_norm": 1.1779246821180303, "learning_rate": 9.185418276725089e-06, "loss": 0.661, "step": 3605 }, { "epoch": 0.19459284442285898, "grad_norm": 0.9507352115524731, "learning_rate": 9.184980461034056e-06, "loss": 0.352, "step": 3606 }, { "epoch": 0.19464680805137338, "grad_norm": 1.4180710691701253, "learning_rate": 9.184542539432963e-06, "loss": 0.6853, "step": 3607 }, { "epoch": 0.19470077167988775, "grad_norm": 1.025037776645239, "learning_rate": 9.184104511934392e-06, "loss": 0.5463, "step": 3608 }, { "epoch": 0.19475473530840215, "grad_norm": 0.8767518913857991, "learning_rate": 9.183666378550935e-06, "loss": 0.3942, "step": 3609 }, { "epoch": 0.19480869893691652, "grad_norm": 1.1743405552100048, "learning_rate": 9.183228139295185e-06, "loss": 0.4834, "step": 3610 }, { "epoch": 0.1948626625654309, "grad_norm": 1.198131092729544, "learning_rate": 9.182789794179737e-06, "loss": 0.619, "step": 3611 }, { "epoch": 0.19491662619394529, "grad_norm": 1.338304467082019, "learning_rate": 9.18235134321719e-06, "loss": 0.6273, "step": 3612 }, { "epoch": 0.19497058982245966, "grad_norm": 1.1673223970546338, "learning_rate": 9.181912786420142e-06, "loss": 0.5269, "step": 3613 }, { "epoch": 0.19502455345097405, "grad_norm": 0.7890082003084596, "learning_rate": 9.181474123801201e-06, "loss": 0.4055, "step": 3614 }, { "epoch": 0.19507851707948842, "grad_norm": 1.132574254869667, "learning_rate": 9.181035355372974e-06, "loss": 0.5497, "step": 3615 }, { "epoch": 0.1951324807080028, "grad_norm": 0.9038162642220137, "learning_rate": 9.180596481148072e-06, "loss": 0.4027, "step": 3616 }, { "epoch": 0.1951864443365172, "grad_norm": 0.8315942463006982, "learning_rate": 9.180157501139108e-06, "loss": 0.416, "step": 3617 }, { "epoch": 0.19524040796503156, "grad_norm": 1.2392969593251206, "learning_rate": 9.179718415358698e-06, "loss": 0.5767, "step": 3618 }, { "epoch": 0.19529437159354596, "grad_norm": 1.0479830495919433, "learning_rate": 9.179279223819464e-06, "loss": 0.5535, "step": 3619 }, { "epoch": 0.19534833522206033, "grad_norm": 0.9311288840124303, "learning_rate": 9.178839926534027e-06, "loss": 0.4557, "step": 3620 }, { "epoch": 0.19540229885057472, "grad_norm": 1.0557026472005977, "learning_rate": 9.178400523515013e-06, "loss": 0.5382, "step": 3621 }, { "epoch": 0.1954562624790891, "grad_norm": 1.1854025420247465, "learning_rate": 9.177961014775052e-06, "loss": 0.6454, "step": 3622 }, { "epoch": 0.19551022610760346, "grad_norm": 0.8185365233147655, "learning_rate": 9.177521400326774e-06, "loss": 0.4338, "step": 3623 }, { "epoch": 0.19556418973611786, "grad_norm": 1.1220356407791416, "learning_rate": 9.177081680182817e-06, "loss": 0.5525, "step": 3624 }, { "epoch": 0.19561815336463223, "grad_norm": 0.9482150235546478, "learning_rate": 9.176641854355815e-06, "loss": 0.4341, "step": 3625 }, { "epoch": 0.19567211699314663, "grad_norm": 0.7004963440572141, "learning_rate": 9.176201922858411e-06, "loss": 0.2866, "step": 3626 }, { "epoch": 0.195726080621661, "grad_norm": 1.0806331857400004, "learning_rate": 9.175761885703253e-06, "loss": 0.4915, "step": 3627 }, { "epoch": 0.1957800442501754, "grad_norm": 1.1965538613778017, "learning_rate": 9.17532174290298e-06, "loss": 0.5701, "step": 3628 }, { "epoch": 0.19583400787868976, "grad_norm": 1.0450893480707388, "learning_rate": 9.174881494470247e-06, "loss": 0.4841, "step": 3629 }, { "epoch": 0.19588797150720413, "grad_norm": 0.9204543684344223, "learning_rate": 9.174441140417709e-06, "loss": 0.4216, "step": 3630 }, { "epoch": 0.19594193513571853, "grad_norm": 0.8022516175723758, "learning_rate": 9.174000680758019e-06, "loss": 0.399, "step": 3631 }, { "epoch": 0.1959958987642329, "grad_norm": 1.19860940407008, "learning_rate": 9.173560115503836e-06, "loss": 0.5936, "step": 3632 }, { "epoch": 0.1960498623927473, "grad_norm": 0.8835650103042798, "learning_rate": 9.173119444667822e-06, "loss": 0.3686, "step": 3633 }, { "epoch": 0.19610382602126167, "grad_norm": 1.0362649541666233, "learning_rate": 9.172678668262648e-06, "loss": 0.4747, "step": 3634 }, { "epoch": 0.19615778964977604, "grad_norm": 1.1694835300634574, "learning_rate": 9.172237786300976e-06, "loss": 0.6331, "step": 3635 }, { "epoch": 0.19621175327829043, "grad_norm": 1.134872845981011, "learning_rate": 9.17179679879548e-06, "loss": 0.4929, "step": 3636 }, { "epoch": 0.1962657169068048, "grad_norm": 1.0970020471593487, "learning_rate": 9.171355705758833e-06, "loss": 0.5916, "step": 3637 }, { "epoch": 0.1963196805353192, "grad_norm": 1.1800775629307798, "learning_rate": 9.170914507203714e-06, "loss": 0.591, "step": 3638 }, { "epoch": 0.19637364416383357, "grad_norm": 1.1359360906745266, "learning_rate": 9.170473203142801e-06, "loss": 0.5648, "step": 3639 }, { "epoch": 0.19642760779234797, "grad_norm": 0.9354847798201783, "learning_rate": 9.17003179358878e-06, "loss": 0.4316, "step": 3640 }, { "epoch": 0.19648157142086234, "grad_norm": 0.9907227227255224, "learning_rate": 9.16959027855434e-06, "loss": 0.4445, "step": 3641 }, { "epoch": 0.1965355350493767, "grad_norm": 1.0098236455436445, "learning_rate": 9.169148658052165e-06, "loss": 0.5278, "step": 3642 }, { "epoch": 0.1965894986778911, "grad_norm": 1.0996804161678086, "learning_rate": 9.16870693209495e-06, "loss": 0.6128, "step": 3643 }, { "epoch": 0.19664346230640548, "grad_norm": 1.1627519776013067, "learning_rate": 9.168265100695391e-06, "loss": 0.5903, "step": 3644 }, { "epoch": 0.19669742593491987, "grad_norm": 1.1378521650086106, "learning_rate": 9.167823163866188e-06, "loss": 0.6362, "step": 3645 }, { "epoch": 0.19675138956343424, "grad_norm": 1.1276788386610797, "learning_rate": 9.16738112162004e-06, "loss": 0.5274, "step": 3646 }, { "epoch": 0.19680535319194864, "grad_norm": 0.9694403167256228, "learning_rate": 9.166938973969653e-06, "loss": 0.4906, "step": 3647 }, { "epoch": 0.196859316820463, "grad_norm": 1.154591828039086, "learning_rate": 9.166496720927734e-06, "loss": 0.5508, "step": 3648 }, { "epoch": 0.19691328044897738, "grad_norm": 1.125946973321343, "learning_rate": 9.166054362506995e-06, "loss": 0.4575, "step": 3649 }, { "epoch": 0.19696724407749178, "grad_norm": 1.0707590538363734, "learning_rate": 9.16561189872015e-06, "loss": 0.4795, "step": 3650 }, { "epoch": 0.19702120770600615, "grad_norm": 1.049676985488393, "learning_rate": 9.165169329579917e-06, "loss": 0.5877, "step": 3651 }, { "epoch": 0.19707517133452054, "grad_norm": 1.0865584386024731, "learning_rate": 9.16472665509901e-06, "loss": 0.5218, "step": 3652 }, { "epoch": 0.1971291349630349, "grad_norm": 0.9581409334255403, "learning_rate": 9.16428387529016e-06, "loss": 0.4987, "step": 3653 }, { "epoch": 0.19718309859154928, "grad_norm": 0.9707156414098648, "learning_rate": 9.163840990166085e-06, "loss": 0.4199, "step": 3654 }, { "epoch": 0.19723706222006368, "grad_norm": 1.1806341562036526, "learning_rate": 9.16339799973952e-06, "loss": 0.64, "step": 3655 }, { "epoch": 0.19729102584857805, "grad_norm": 1.026361694002647, "learning_rate": 9.162954904023195e-06, "loss": 0.5245, "step": 3656 }, { "epoch": 0.19734498947709245, "grad_norm": 1.1930545809305801, "learning_rate": 9.162511703029842e-06, "loss": 0.5413, "step": 3657 }, { "epoch": 0.19739895310560682, "grad_norm": 0.9907684552810251, "learning_rate": 9.162068396772207e-06, "loss": 0.389, "step": 3658 }, { "epoch": 0.19745291673412121, "grad_norm": 1.319194534790042, "learning_rate": 9.161624985263023e-06, "loss": 0.5117, "step": 3659 }, { "epoch": 0.19750688036263558, "grad_norm": 1.0702382399427064, "learning_rate": 9.161181468515037e-06, "loss": 0.5431, "step": 3660 }, { "epoch": 0.19756084399114995, "grad_norm": 1.0315151508177454, "learning_rate": 9.160737846540997e-06, "loss": 0.4605, "step": 3661 }, { "epoch": 0.19761480761966435, "grad_norm": 1.1267962159886362, "learning_rate": 9.160294119353652e-06, "loss": 0.5176, "step": 3662 }, { "epoch": 0.19766877124817872, "grad_norm": 0.8535401806708524, "learning_rate": 9.159850286965757e-06, "loss": 0.4492, "step": 3663 }, { "epoch": 0.19772273487669312, "grad_norm": 0.8655100307829648, "learning_rate": 9.159406349390065e-06, "loss": 0.4324, "step": 3664 }, { "epoch": 0.1977766985052075, "grad_norm": 0.8172992160695234, "learning_rate": 9.158962306639338e-06, "loss": 0.4176, "step": 3665 }, { "epoch": 0.19783066213372189, "grad_norm": 0.993936509723193, "learning_rate": 9.158518158726335e-06, "loss": 0.5083, "step": 3666 }, { "epoch": 0.19788462576223625, "grad_norm": 1.2083528942114232, "learning_rate": 9.158073905663827e-06, "loss": 0.6151, "step": 3667 }, { "epoch": 0.19793858939075062, "grad_norm": 1.130946843712377, "learning_rate": 9.157629547464576e-06, "loss": 0.5618, "step": 3668 }, { "epoch": 0.19799255301926502, "grad_norm": 0.9998668629923408, "learning_rate": 9.157185084141356e-06, "loss": 0.4444, "step": 3669 }, { "epoch": 0.1980465166477794, "grad_norm": 1.0889537463800498, "learning_rate": 9.156740515706943e-06, "loss": 0.4777, "step": 3670 }, { "epoch": 0.1981004802762938, "grad_norm": 1.1091655375607095, "learning_rate": 9.156295842174113e-06, "loss": 0.5302, "step": 3671 }, { "epoch": 0.19815444390480816, "grad_norm": 1.1412984108149076, "learning_rate": 9.155851063555644e-06, "loss": 0.496, "step": 3672 }, { "epoch": 0.19820840753332253, "grad_norm": 0.9304990469459504, "learning_rate": 9.155406179864323e-06, "loss": 0.4858, "step": 3673 }, { "epoch": 0.19826237116183693, "grad_norm": 1.214694080711274, "learning_rate": 9.154961191112935e-06, "loss": 0.6366, "step": 3674 }, { "epoch": 0.1983163347903513, "grad_norm": 0.8526081098641014, "learning_rate": 9.15451609731427e-06, "loss": 0.3443, "step": 3675 }, { "epoch": 0.1983702984188657, "grad_norm": 1.138549151178086, "learning_rate": 9.154070898481116e-06, "loss": 0.5303, "step": 3676 }, { "epoch": 0.19842426204738006, "grad_norm": 1.1186069512379517, "learning_rate": 9.153625594626275e-06, "loss": 0.6459, "step": 3677 }, { "epoch": 0.19847822567589446, "grad_norm": 1.0135586168488384, "learning_rate": 9.153180185762543e-06, "loss": 0.4083, "step": 3678 }, { "epoch": 0.19853218930440883, "grad_norm": 1.0903371526228016, "learning_rate": 9.15273467190272e-06, "loss": 0.4743, "step": 3679 }, { "epoch": 0.1985861529329232, "grad_norm": 1.3198972853086282, "learning_rate": 9.152289053059613e-06, "loss": 0.487, "step": 3680 }, { "epoch": 0.1986401165614376, "grad_norm": 0.9482080660441373, "learning_rate": 9.151843329246027e-06, "loss": 0.5716, "step": 3681 }, { "epoch": 0.19869408018995197, "grad_norm": 0.9466516083637744, "learning_rate": 9.151397500474774e-06, "loss": 0.4856, "step": 3682 }, { "epoch": 0.19874804381846636, "grad_norm": 1.067625054948517, "learning_rate": 9.15095156675867e-06, "loss": 0.4292, "step": 3683 }, { "epoch": 0.19880200744698073, "grad_norm": 1.0892470711529207, "learning_rate": 9.150505528110525e-06, "loss": 0.4628, "step": 3684 }, { "epoch": 0.1988559710754951, "grad_norm": 1.2898357736097448, "learning_rate": 9.150059384543165e-06, "loss": 0.5822, "step": 3685 }, { "epoch": 0.1989099347040095, "grad_norm": 1.1066441917713825, "learning_rate": 9.149613136069409e-06, "loss": 0.519, "step": 3686 }, { "epoch": 0.19896389833252387, "grad_norm": 1.101692089019711, "learning_rate": 9.149166782702085e-06, "loss": 0.663, "step": 3687 }, { "epoch": 0.19901786196103827, "grad_norm": 0.8307843789271808, "learning_rate": 9.14872032445402e-06, "loss": 0.374, "step": 3688 }, { "epoch": 0.19907182558955264, "grad_norm": 1.0642949265532295, "learning_rate": 9.148273761338046e-06, "loss": 0.6425, "step": 3689 }, { "epoch": 0.19912578921806703, "grad_norm": 1.096181684833071, "learning_rate": 9.147827093366996e-06, "loss": 0.4325, "step": 3690 }, { "epoch": 0.1991797528465814, "grad_norm": 1.1574670603263386, "learning_rate": 9.147380320553712e-06, "loss": 0.4986, "step": 3691 }, { "epoch": 0.19923371647509577, "grad_norm": 1.005501035949866, "learning_rate": 9.146933442911032e-06, "loss": 0.5354, "step": 3692 }, { "epoch": 0.19928768010361017, "grad_norm": 1.0195661926515458, "learning_rate": 9.146486460451797e-06, "loss": 0.4688, "step": 3693 }, { "epoch": 0.19934164373212454, "grad_norm": 1.1124136235004647, "learning_rate": 9.14603937318886e-06, "loss": 0.4182, "step": 3694 }, { "epoch": 0.19939560736063894, "grad_norm": 1.234936762727358, "learning_rate": 9.145592181135066e-06, "loss": 0.5513, "step": 3695 }, { "epoch": 0.1994495709891533, "grad_norm": 1.0893680973489885, "learning_rate": 9.145144884303268e-06, "loss": 0.5407, "step": 3696 }, { "epoch": 0.1995035346176677, "grad_norm": 1.3464603067962544, "learning_rate": 9.144697482706324e-06, "loss": 0.6722, "step": 3697 }, { "epoch": 0.19955749824618207, "grad_norm": 1.0284302168979524, "learning_rate": 9.144249976357093e-06, "loss": 0.4518, "step": 3698 }, { "epoch": 0.19961146187469644, "grad_norm": 0.9949188187012509, "learning_rate": 9.143802365268435e-06, "loss": 0.5139, "step": 3699 }, { "epoch": 0.19966542550321084, "grad_norm": 1.0823165027009942, "learning_rate": 9.143354649453212e-06, "loss": 0.5281, "step": 3700 }, { "epoch": 0.1997193891317252, "grad_norm": 1.0907409762550577, "learning_rate": 9.1429068289243e-06, "loss": 0.4633, "step": 3701 }, { "epoch": 0.1997733527602396, "grad_norm": 1.0132207005497866, "learning_rate": 9.14245890369456e-06, "loss": 0.5515, "step": 3702 }, { "epoch": 0.19982731638875398, "grad_norm": 0.8478118733129804, "learning_rate": 9.142010873776875e-06, "loss": 0.4438, "step": 3703 }, { "epoch": 0.19988128001726835, "grad_norm": 0.9427127130661139, "learning_rate": 9.141562739184115e-06, "loss": 0.4157, "step": 3704 }, { "epoch": 0.19993524364578275, "grad_norm": 1.1250699601499659, "learning_rate": 9.141114499929162e-06, "loss": 0.7275, "step": 3705 }, { "epoch": 0.19998920727429711, "grad_norm": 0.9680254469744685, "learning_rate": 9.1406661560249e-06, "loss": 0.5907, "step": 3706 }, { "epoch": 0.2000431709028115, "grad_norm": 0.9195594819190348, "learning_rate": 9.140217707484214e-06, "loss": 0.4476, "step": 3707 }, { "epoch": 0.20009713453132588, "grad_norm": 0.8535184568247356, "learning_rate": 9.139769154319993e-06, "loss": 0.413, "step": 3708 }, { "epoch": 0.20015109815984028, "grad_norm": 1.0798688658374924, "learning_rate": 9.139320496545129e-06, "loss": 0.4226, "step": 3709 }, { "epoch": 0.20020506178835465, "grad_norm": 1.0475628489345057, "learning_rate": 9.138871734172516e-06, "loss": 0.4836, "step": 3710 }, { "epoch": 0.20025902541686902, "grad_norm": 1.1049357465146818, "learning_rate": 9.138422867215053e-06, "loss": 0.5274, "step": 3711 }, { "epoch": 0.20031298904538342, "grad_norm": 1.3199057348444236, "learning_rate": 9.137973895685641e-06, "loss": 0.59, "step": 3712 }, { "epoch": 0.20036695267389779, "grad_norm": 0.809125376913214, "learning_rate": 9.137524819597182e-06, "loss": 0.3552, "step": 3713 }, { "epoch": 0.20042091630241218, "grad_norm": 0.9454948767301519, "learning_rate": 9.137075638962584e-06, "loss": 0.3995, "step": 3714 }, { "epoch": 0.20047487993092655, "grad_norm": 1.111817631093611, "learning_rate": 9.13662635379476e-06, "loss": 0.521, "step": 3715 }, { "epoch": 0.20052884355944095, "grad_norm": 1.051126541186184, "learning_rate": 9.136176964106617e-06, "loss": 0.5015, "step": 3716 }, { "epoch": 0.20058280718795532, "grad_norm": 1.4695960205959748, "learning_rate": 9.135727469911075e-06, "loss": 0.645, "step": 3717 }, { "epoch": 0.2006367708164697, "grad_norm": 0.9996250279813591, "learning_rate": 9.135277871221051e-06, "loss": 0.4357, "step": 3718 }, { "epoch": 0.2006907344449841, "grad_norm": 1.11234064852628, "learning_rate": 9.134828168049469e-06, "loss": 0.5853, "step": 3719 }, { "epoch": 0.20074469807349846, "grad_norm": 0.9939452332084993, "learning_rate": 9.134378360409255e-06, "loss": 0.452, "step": 3720 }, { "epoch": 0.20079866170201285, "grad_norm": 1.0526604119645906, "learning_rate": 9.133928448313332e-06, "loss": 0.5914, "step": 3721 }, { "epoch": 0.20085262533052722, "grad_norm": 1.00049085068388, "learning_rate": 9.133478431774631e-06, "loss": 0.4602, "step": 3722 }, { "epoch": 0.2009065889590416, "grad_norm": 0.8405873770395913, "learning_rate": 9.133028310806093e-06, "loss": 0.4055, "step": 3723 }, { "epoch": 0.200960552587556, "grad_norm": 1.1341216314451776, "learning_rate": 9.132578085420648e-06, "loss": 0.564, "step": 3724 }, { "epoch": 0.20101451621607036, "grad_norm": 1.0759309560977517, "learning_rate": 9.132127755631239e-06, "loss": 0.397, "step": 3725 }, { "epoch": 0.20106847984458476, "grad_norm": 0.9968363196507504, "learning_rate": 9.131677321450809e-06, "loss": 0.4783, "step": 3726 }, { "epoch": 0.20112244347309913, "grad_norm": 1.195439300642246, "learning_rate": 9.131226782892303e-06, "loss": 0.6821, "step": 3727 }, { "epoch": 0.20117640710161352, "grad_norm": 1.0593748786844126, "learning_rate": 9.13077613996867e-06, "loss": 0.5606, "step": 3728 }, { "epoch": 0.2012303707301279, "grad_norm": 1.2347747200174293, "learning_rate": 9.130325392692861e-06, "loss": 0.5657, "step": 3729 }, { "epoch": 0.20128433435864226, "grad_norm": 1.1766522706375906, "learning_rate": 9.129874541077832e-06, "loss": 0.5685, "step": 3730 }, { "epoch": 0.20133829798715666, "grad_norm": 0.6688025576594917, "learning_rate": 9.129423585136542e-06, "loss": 0.2463, "step": 3731 }, { "epoch": 0.20139226161567103, "grad_norm": 0.9700431086726674, "learning_rate": 9.128972524881952e-06, "loss": 0.3616, "step": 3732 }, { "epoch": 0.20144622524418543, "grad_norm": 1.1510830981224824, "learning_rate": 9.128521360327025e-06, "loss": 0.4724, "step": 3733 }, { "epoch": 0.2015001888726998, "grad_norm": 1.1139481495240269, "learning_rate": 9.128070091484725e-06, "loss": 0.525, "step": 3734 }, { "epoch": 0.2015541525012142, "grad_norm": 1.0892795807328075, "learning_rate": 9.127618718368026e-06, "loss": 0.546, "step": 3735 }, { "epoch": 0.20160811612972857, "grad_norm": 1.21382346680645, "learning_rate": 9.127167240989899e-06, "loss": 0.681, "step": 3736 }, { "epoch": 0.20166207975824293, "grad_norm": 1.0981012544491835, "learning_rate": 9.126715659363322e-06, "loss": 0.5568, "step": 3737 }, { "epoch": 0.20171604338675733, "grad_norm": 1.2338161400663619, "learning_rate": 9.126263973501271e-06, "loss": 0.4916, "step": 3738 }, { "epoch": 0.2017700070152717, "grad_norm": 1.2021458283340636, "learning_rate": 9.12581218341673e-06, "loss": 0.5919, "step": 3739 }, { "epoch": 0.2018239706437861, "grad_norm": 0.8029228772857078, "learning_rate": 9.125360289122685e-06, "loss": 0.3895, "step": 3740 }, { "epoch": 0.20187793427230047, "grad_norm": 1.1617322613679533, "learning_rate": 9.124908290632119e-06, "loss": 0.5522, "step": 3741 }, { "epoch": 0.20193189790081484, "grad_norm": 1.11771296420502, "learning_rate": 9.124456187958027e-06, "loss": 0.6752, "step": 3742 }, { "epoch": 0.20198586152932924, "grad_norm": 0.7245114922881668, "learning_rate": 9.124003981113402e-06, "loss": 0.4101, "step": 3743 }, { "epoch": 0.2020398251578436, "grad_norm": 0.898860249771672, "learning_rate": 9.123551670111242e-06, "loss": 0.4668, "step": 3744 }, { "epoch": 0.202093788786358, "grad_norm": 1.2234294778727197, "learning_rate": 9.123099254964544e-06, "loss": 0.6372, "step": 3745 }, { "epoch": 0.20214775241487237, "grad_norm": 1.1270426971777125, "learning_rate": 9.122646735686313e-06, "loss": 0.5213, "step": 3746 }, { "epoch": 0.20220171604338677, "grad_norm": 1.204079728903731, "learning_rate": 9.122194112289554e-06, "loss": 0.6473, "step": 3747 }, { "epoch": 0.20225567967190114, "grad_norm": 0.9090023866417571, "learning_rate": 9.121741384787278e-06, "loss": 0.4942, "step": 3748 }, { "epoch": 0.2023096433004155, "grad_norm": 0.9556497317556221, "learning_rate": 9.121288553192494e-06, "loss": 0.4741, "step": 3749 }, { "epoch": 0.2023636069289299, "grad_norm": 0.9240768386839322, "learning_rate": 9.120835617518216e-06, "loss": 0.4037, "step": 3750 }, { "epoch": 0.20241757055744428, "grad_norm": 1.233303339592849, "learning_rate": 9.120382577777468e-06, "loss": 0.5834, "step": 3751 }, { "epoch": 0.20247153418595867, "grad_norm": 1.024209839920656, "learning_rate": 9.119929433983263e-06, "loss": 0.5692, "step": 3752 }, { "epoch": 0.20252549781447304, "grad_norm": 1.0479240752985408, "learning_rate": 9.11947618614863e-06, "loss": 0.6407, "step": 3753 }, { "epoch": 0.2025794614429874, "grad_norm": 0.9362282824222296, "learning_rate": 9.119022834286594e-06, "loss": 0.4625, "step": 3754 }, { "epoch": 0.2026334250715018, "grad_norm": 1.238717836371045, "learning_rate": 9.118569378410183e-06, "loss": 0.6564, "step": 3755 }, { "epoch": 0.20268738870001618, "grad_norm": 0.9541062575767985, "learning_rate": 9.118115818532434e-06, "loss": 0.4446, "step": 3756 }, { "epoch": 0.20274135232853058, "grad_norm": 1.157429130169837, "learning_rate": 9.117662154666382e-06, "loss": 0.4296, "step": 3757 }, { "epoch": 0.20279531595704495, "grad_norm": 1.2658198841988768, "learning_rate": 9.117208386825061e-06, "loss": 0.5239, "step": 3758 }, { "epoch": 0.20284927958555934, "grad_norm": 0.9807121924602964, "learning_rate": 9.116754515021519e-06, "loss": 0.3702, "step": 3759 }, { "epoch": 0.20290324321407371, "grad_norm": 1.169113994911858, "learning_rate": 9.116300539268794e-06, "loss": 0.4832, "step": 3760 }, { "epoch": 0.20295720684258808, "grad_norm": 1.112474941085719, "learning_rate": 9.11584645957994e-06, "loss": 0.4955, "step": 3761 }, { "epoch": 0.20301117047110248, "grad_norm": 0.8078690227852009, "learning_rate": 9.115392275968005e-06, "loss": 0.329, "step": 3762 }, { "epoch": 0.20306513409961685, "grad_norm": 0.9659226500099185, "learning_rate": 9.114937988446044e-06, "loss": 0.5848, "step": 3763 }, { "epoch": 0.20311909772813125, "grad_norm": 1.1254703567857467, "learning_rate": 9.114483597027111e-06, "loss": 0.4772, "step": 3764 }, { "epoch": 0.20317306135664562, "grad_norm": 1.0774208147264799, "learning_rate": 9.114029101724268e-06, "loss": 0.5636, "step": 3765 }, { "epoch": 0.20322702498516002, "grad_norm": 0.8925182485383784, "learning_rate": 9.113574502550576e-06, "loss": 0.4924, "step": 3766 }, { "epoch": 0.20328098861367438, "grad_norm": 1.1340742932385532, "learning_rate": 9.113119799519103e-06, "loss": 0.6247, "step": 3767 }, { "epoch": 0.20333495224218875, "grad_norm": 1.0360772061554622, "learning_rate": 9.112664992642915e-06, "loss": 0.5195, "step": 3768 }, { "epoch": 0.20338891587070315, "grad_norm": 0.9709576731864566, "learning_rate": 9.112210081935084e-06, "loss": 0.5441, "step": 3769 }, { "epoch": 0.20344287949921752, "grad_norm": 0.8208956677653393, "learning_rate": 9.111755067408688e-06, "loss": 0.3147, "step": 3770 }, { "epoch": 0.20349684312773192, "grad_norm": 0.9670784941304759, "learning_rate": 9.111299949076801e-06, "loss": 0.4656, "step": 3771 }, { "epoch": 0.2035508067562463, "grad_norm": 1.0733556297819917, "learning_rate": 9.110844726952503e-06, "loss": 0.4475, "step": 3772 }, { "epoch": 0.20360477038476066, "grad_norm": 1.1590222183027303, "learning_rate": 9.110389401048879e-06, "loss": 0.5622, "step": 3773 }, { "epoch": 0.20365873401327506, "grad_norm": 0.9668901144273859, "learning_rate": 9.109933971379016e-06, "loss": 0.4204, "step": 3774 }, { "epoch": 0.20371269764178943, "grad_norm": 1.080854489945085, "learning_rate": 9.109478437956005e-06, "loss": 0.4893, "step": 3775 }, { "epoch": 0.20376666127030382, "grad_norm": 1.2927781961112563, "learning_rate": 9.109022800792935e-06, "loss": 0.7542, "step": 3776 }, { "epoch": 0.2038206248988182, "grad_norm": 0.9185852179216725, "learning_rate": 9.108567059902903e-06, "loss": 0.3752, "step": 3777 }, { "epoch": 0.2038745885273326, "grad_norm": 1.1452906267976544, "learning_rate": 9.108111215299008e-06, "loss": 0.7748, "step": 3778 }, { "epoch": 0.20392855215584696, "grad_norm": 1.0824200291017472, "learning_rate": 9.10765526699435e-06, "loss": 0.4636, "step": 3779 }, { "epoch": 0.20398251578436133, "grad_norm": 0.8368675507202236, "learning_rate": 9.107199215002036e-06, "loss": 0.3172, "step": 3780 }, { "epoch": 0.20403647941287573, "grad_norm": 1.0780568229109078, "learning_rate": 9.106743059335171e-06, "loss": 0.4018, "step": 3781 }, { "epoch": 0.2040904430413901, "grad_norm": 0.9992955101218016, "learning_rate": 9.106286800006867e-06, "loss": 0.4154, "step": 3782 }, { "epoch": 0.2041444066699045, "grad_norm": 1.1579744631904958, "learning_rate": 9.105830437030234e-06, "loss": 0.6817, "step": 3783 }, { "epoch": 0.20419837029841886, "grad_norm": 0.8295782847520651, "learning_rate": 9.105373970418392e-06, "loss": 0.3972, "step": 3784 }, { "epoch": 0.20425233392693326, "grad_norm": 1.127411411720756, "learning_rate": 9.10491740018446e-06, "loss": 0.6368, "step": 3785 }, { "epoch": 0.20430629755544763, "grad_norm": 0.937387790429257, "learning_rate": 9.104460726341558e-06, "loss": 0.457, "step": 3786 }, { "epoch": 0.204360261183962, "grad_norm": 1.3483683723351014, "learning_rate": 9.104003948902814e-06, "loss": 0.6207, "step": 3787 }, { "epoch": 0.2044142248124764, "grad_norm": 1.238710290776146, "learning_rate": 9.103547067881355e-06, "loss": 0.5052, "step": 3788 }, { "epoch": 0.20446818844099077, "grad_norm": 1.0984899629588436, "learning_rate": 9.103090083290312e-06, "loss": 0.6517, "step": 3789 }, { "epoch": 0.20452215206950516, "grad_norm": 1.084297066461451, "learning_rate": 9.102632995142818e-06, "loss": 0.4235, "step": 3790 }, { "epoch": 0.20457611569801953, "grad_norm": 0.9898888746164872, "learning_rate": 9.102175803452012e-06, "loss": 0.4578, "step": 3791 }, { "epoch": 0.2046300793265339, "grad_norm": 1.0459420712389298, "learning_rate": 9.101718508231034e-06, "loss": 0.5295, "step": 3792 }, { "epoch": 0.2046840429550483, "grad_norm": 0.8450037866062065, "learning_rate": 9.101261109493029e-06, "loss": 0.363, "step": 3793 }, { "epoch": 0.20473800658356267, "grad_norm": 1.0126135952629696, "learning_rate": 9.100803607251138e-06, "loss": 0.4806, "step": 3794 }, { "epoch": 0.20479197021207707, "grad_norm": 1.1794469538180645, "learning_rate": 9.100346001518512e-06, "loss": 0.5338, "step": 3795 }, { "epoch": 0.20484593384059144, "grad_norm": 0.9852862596143186, "learning_rate": 9.099888292308308e-06, "loss": 0.4918, "step": 3796 }, { "epoch": 0.20489989746910584, "grad_norm": 1.1002683158656994, "learning_rate": 9.099430479633674e-06, "loss": 0.4877, "step": 3797 }, { "epoch": 0.2049538610976202, "grad_norm": 1.3273032700084622, "learning_rate": 9.098972563507771e-06, "loss": 0.5508, "step": 3798 }, { "epoch": 0.20500782472613457, "grad_norm": 0.9520863986741406, "learning_rate": 9.098514543943762e-06, "loss": 0.373, "step": 3799 }, { "epoch": 0.20506178835464897, "grad_norm": 1.0555605223861957, "learning_rate": 9.098056420954807e-06, "loss": 0.4654, "step": 3800 }, { "epoch": 0.20511575198316334, "grad_norm": 0.8038614412779561, "learning_rate": 9.097598194554076e-06, "loss": 0.4105, "step": 3801 }, { "epoch": 0.20516971561167774, "grad_norm": 1.0295001955289393, "learning_rate": 9.097139864754738e-06, "loss": 0.5183, "step": 3802 }, { "epoch": 0.2052236792401921, "grad_norm": 1.23551425545782, "learning_rate": 9.096681431569965e-06, "loss": 0.5704, "step": 3803 }, { "epoch": 0.20527764286870648, "grad_norm": 1.148326710491872, "learning_rate": 9.096222895012935e-06, "loss": 0.6574, "step": 3804 }, { "epoch": 0.20533160649722088, "grad_norm": 1.021426779912693, "learning_rate": 9.095764255096823e-06, "loss": 0.4965, "step": 3805 }, { "epoch": 0.20538557012573525, "grad_norm": 1.0937830868746914, "learning_rate": 9.095305511834813e-06, "loss": 0.5664, "step": 3806 }, { "epoch": 0.20543953375424964, "grad_norm": 0.8990240022806519, "learning_rate": 9.094846665240092e-06, "loss": 0.467, "step": 3807 }, { "epoch": 0.205493497382764, "grad_norm": 1.2601528761778267, "learning_rate": 9.094387715325844e-06, "loss": 0.6459, "step": 3808 }, { "epoch": 0.2055474610112784, "grad_norm": 1.0765649235795685, "learning_rate": 9.093928662105261e-06, "loss": 0.5058, "step": 3809 }, { "epoch": 0.20560142463979278, "grad_norm": 1.1583687805552179, "learning_rate": 9.093469505591537e-06, "loss": 0.581, "step": 3810 }, { "epoch": 0.20565538826830715, "grad_norm": 0.8588458958621885, "learning_rate": 9.09301024579787e-06, "loss": 0.4083, "step": 3811 }, { "epoch": 0.20570935189682155, "grad_norm": 1.0205325869096444, "learning_rate": 9.092550882737458e-06, "loss": 0.529, "step": 3812 }, { "epoch": 0.20576331552533592, "grad_norm": 1.1514347715487017, "learning_rate": 9.092091416423502e-06, "loss": 0.5511, "step": 3813 }, { "epoch": 0.2058172791538503, "grad_norm": 0.9946967484657464, "learning_rate": 9.091631846869208e-06, "loss": 0.5226, "step": 3814 }, { "epoch": 0.20587124278236468, "grad_norm": 1.1848641096765617, "learning_rate": 9.091172174087789e-06, "loss": 0.5819, "step": 3815 }, { "epoch": 0.20592520641087908, "grad_norm": 0.9366671692202229, "learning_rate": 9.090712398092451e-06, "loss": 0.4026, "step": 3816 }, { "epoch": 0.20597917003939345, "grad_norm": 0.7898139235707033, "learning_rate": 9.090252518896412e-06, "loss": 0.4523, "step": 3817 }, { "epoch": 0.20603313366790782, "grad_norm": 0.6649074646820992, "learning_rate": 9.089792536512887e-06, "loss": 0.3197, "step": 3818 }, { "epoch": 0.20608709729642222, "grad_norm": 1.3778380184118229, "learning_rate": 9.089332450955097e-06, "loss": 0.6503, "step": 3819 }, { "epoch": 0.2061410609249366, "grad_norm": 1.0365057247961833, "learning_rate": 9.088872262236266e-06, "loss": 0.4165, "step": 3820 }, { "epoch": 0.20619502455345098, "grad_norm": 1.2670053733517477, "learning_rate": 9.088411970369619e-06, "loss": 0.6734, "step": 3821 }, { "epoch": 0.20624898818196535, "grad_norm": 0.9117451318292406, "learning_rate": 9.087951575368386e-06, "loss": 0.3714, "step": 3822 }, { "epoch": 0.20630295181047972, "grad_norm": 1.0465414362507464, "learning_rate": 9.087491077245801e-06, "loss": 0.4708, "step": 3823 }, { "epoch": 0.20635691543899412, "grad_norm": 1.2517457139784924, "learning_rate": 9.087030476015096e-06, "loss": 0.4552, "step": 3824 }, { "epoch": 0.2064108790675085, "grad_norm": 1.257565526312639, "learning_rate": 9.086569771689511e-06, "loss": 0.6989, "step": 3825 }, { "epoch": 0.2064648426960229, "grad_norm": 1.0479561616606654, "learning_rate": 9.086108964282288e-06, "loss": 0.3842, "step": 3826 }, { "epoch": 0.20651880632453726, "grad_norm": 0.7239587247970184, "learning_rate": 9.08564805380667e-06, "loss": 0.2905, "step": 3827 }, { "epoch": 0.20657276995305165, "grad_norm": 0.7892006314927505, "learning_rate": 9.085187040275903e-06, "loss": 0.4043, "step": 3828 }, { "epoch": 0.20662673358156602, "grad_norm": 1.0232482133900491, "learning_rate": 9.084725923703239e-06, "loss": 0.4761, "step": 3829 }, { "epoch": 0.2066806972100804, "grad_norm": 1.076150120569057, "learning_rate": 9.084264704101928e-06, "loss": 0.3932, "step": 3830 }, { "epoch": 0.2067346608385948, "grad_norm": 0.9726391786688343, "learning_rate": 9.083803381485228e-06, "loss": 0.5401, "step": 3831 }, { "epoch": 0.20678862446710916, "grad_norm": 0.9680419362008221, "learning_rate": 9.083341955866397e-06, "loss": 0.4337, "step": 3832 }, { "epoch": 0.20684258809562356, "grad_norm": 1.152372370389519, "learning_rate": 9.0828804272587e-06, "loss": 0.4794, "step": 3833 }, { "epoch": 0.20689655172413793, "grad_norm": 1.1488019150465112, "learning_rate": 9.082418795675397e-06, "loss": 0.4996, "step": 3834 }, { "epoch": 0.20695051535265233, "grad_norm": 1.1613562490404625, "learning_rate": 9.081957061129759e-06, "loss": 0.5326, "step": 3835 }, { "epoch": 0.2070044789811667, "grad_norm": 1.0321620061465928, "learning_rate": 9.081495223635056e-06, "loss": 0.5818, "step": 3836 }, { "epoch": 0.20705844260968106, "grad_norm": 1.1031430448277395, "learning_rate": 9.081033283204562e-06, "loss": 0.5467, "step": 3837 }, { "epoch": 0.20711240623819546, "grad_norm": 1.17344392101849, "learning_rate": 9.080571239851553e-06, "loss": 0.5775, "step": 3838 }, { "epoch": 0.20716636986670983, "grad_norm": 1.0929116623317745, "learning_rate": 9.080109093589307e-06, "loss": 0.4895, "step": 3839 }, { "epoch": 0.20722033349522423, "grad_norm": 1.1104152489632335, "learning_rate": 9.079646844431109e-06, "loss": 0.5181, "step": 3840 }, { "epoch": 0.2072742971237386, "grad_norm": 1.172165218288601, "learning_rate": 9.079184492390243e-06, "loss": 0.7755, "step": 3841 }, { "epoch": 0.20732826075225297, "grad_norm": 0.862232851144642, "learning_rate": 9.07872203748e-06, "loss": 0.3866, "step": 3842 }, { "epoch": 0.20738222438076737, "grad_norm": 1.145180630667375, "learning_rate": 9.078259479713668e-06, "loss": 0.4741, "step": 3843 }, { "epoch": 0.20743618800928174, "grad_norm": 0.8201913840696834, "learning_rate": 9.077796819104543e-06, "loss": 0.323, "step": 3844 }, { "epoch": 0.20749015163779613, "grad_norm": 0.9465618237701664, "learning_rate": 9.077334055665924e-06, "loss": 0.51, "step": 3845 }, { "epoch": 0.2075441152663105, "grad_norm": 0.730568131899897, "learning_rate": 9.076871189411109e-06, "loss": 0.262, "step": 3846 }, { "epoch": 0.2075980788948249, "grad_norm": 1.3392263889868612, "learning_rate": 9.0764082203534e-06, "loss": 0.4794, "step": 3847 }, { "epoch": 0.20765204252333927, "grad_norm": 1.1405824253588246, "learning_rate": 9.075945148506108e-06, "loss": 0.8523, "step": 3848 }, { "epoch": 0.20770600615185364, "grad_norm": 1.121804347175139, "learning_rate": 9.075481973882536e-06, "loss": 0.6648, "step": 3849 }, { "epoch": 0.20775996978036804, "grad_norm": 0.8206467960411512, "learning_rate": 9.075018696496002e-06, "loss": 0.3791, "step": 3850 }, { "epoch": 0.2078139334088824, "grad_norm": 0.9918088821101608, "learning_rate": 9.074555316359817e-06, "loss": 0.4499, "step": 3851 }, { "epoch": 0.2078678970373968, "grad_norm": 0.9970567092088943, "learning_rate": 9.0740918334873e-06, "loss": 0.4116, "step": 3852 }, { "epoch": 0.20792186066591117, "grad_norm": 0.9813827498885892, "learning_rate": 9.073628247891775e-06, "loss": 0.2799, "step": 3853 }, { "epoch": 0.20797582429442557, "grad_norm": 1.1529450533520045, "learning_rate": 9.07316455958656e-06, "loss": 0.5303, "step": 3854 }, { "epoch": 0.20802978792293994, "grad_norm": 1.0205694942858632, "learning_rate": 9.072700768584986e-06, "loss": 0.4358, "step": 3855 }, { "epoch": 0.2080837515514543, "grad_norm": 1.0648678275716918, "learning_rate": 9.072236874900385e-06, "loss": 0.5501, "step": 3856 }, { "epoch": 0.2081377151799687, "grad_norm": 1.3816538057096799, "learning_rate": 9.071772878546083e-06, "loss": 0.6802, "step": 3857 }, { "epoch": 0.20819167880848308, "grad_norm": 1.0495137518250075, "learning_rate": 9.07130877953542e-06, "loss": 0.4967, "step": 3858 }, { "epoch": 0.20824564243699747, "grad_norm": 1.1863997644347717, "learning_rate": 9.070844577881735e-06, "loss": 0.5231, "step": 3859 }, { "epoch": 0.20829960606551184, "grad_norm": 0.9326985312300379, "learning_rate": 9.070380273598368e-06, "loss": 0.3829, "step": 3860 }, { "epoch": 0.2083535696940262, "grad_norm": 1.1410302104902033, "learning_rate": 9.069915866698666e-06, "loss": 0.5768, "step": 3861 }, { "epoch": 0.2084075333225406, "grad_norm": 1.0875319487257407, "learning_rate": 9.069451357195974e-06, "loss": 0.6122, "step": 3862 }, { "epoch": 0.20846149695105498, "grad_norm": 0.731919192222664, "learning_rate": 9.068986745103643e-06, "loss": 0.3615, "step": 3863 }, { "epoch": 0.20851546057956938, "grad_norm": 1.108033990239275, "learning_rate": 9.068522030435025e-06, "loss": 0.6173, "step": 3864 }, { "epoch": 0.20856942420808375, "grad_norm": 1.126197723277647, "learning_rate": 9.068057213203481e-06, "loss": 0.5498, "step": 3865 }, { "epoch": 0.20862338783659815, "grad_norm": 1.2839405504606491, "learning_rate": 9.067592293422366e-06, "loss": 0.6349, "step": 3866 }, { "epoch": 0.20867735146511251, "grad_norm": 0.9764652265971667, "learning_rate": 9.067127271105043e-06, "loss": 0.4433, "step": 3867 }, { "epoch": 0.20873131509362688, "grad_norm": 0.7594458240336066, "learning_rate": 9.06666214626488e-06, "loss": 0.4252, "step": 3868 }, { "epoch": 0.20878527872214128, "grad_norm": 1.0964259034579231, "learning_rate": 9.066196918915243e-06, "loss": 0.5883, "step": 3869 }, { "epoch": 0.20883924235065565, "grad_norm": 1.1874433087104141, "learning_rate": 9.0657315890695e-06, "loss": 0.6546, "step": 3870 }, { "epoch": 0.20889320597917005, "grad_norm": 1.2360349584605888, "learning_rate": 9.065266156741031e-06, "loss": 0.5418, "step": 3871 }, { "epoch": 0.20894716960768442, "grad_norm": 0.9329975163968138, "learning_rate": 9.06480062194321e-06, "loss": 0.4156, "step": 3872 }, { "epoch": 0.2090011332361988, "grad_norm": 1.4863437214203792, "learning_rate": 9.064334984689417e-06, "loss": 0.6885, "step": 3873 }, { "epoch": 0.20905509686471319, "grad_norm": 0.8371105586740093, "learning_rate": 9.063869244993035e-06, "loss": 0.3983, "step": 3874 }, { "epoch": 0.20910906049322756, "grad_norm": 1.0338699673628178, "learning_rate": 9.06340340286745e-06, "loss": 0.49, "step": 3875 }, { "epoch": 0.20916302412174195, "grad_norm": 0.9841241600143728, "learning_rate": 9.06293745832605e-06, "loss": 0.4871, "step": 3876 }, { "epoch": 0.20921698775025632, "grad_norm": 1.1415178268292643, "learning_rate": 9.062471411382228e-06, "loss": 0.517, "step": 3877 }, { "epoch": 0.20927095137877072, "grad_norm": 1.1482227277060528, "learning_rate": 9.062005262049377e-06, "loss": 0.4614, "step": 3878 }, { "epoch": 0.2093249150072851, "grad_norm": 1.1838484878353295, "learning_rate": 9.061539010340898e-06, "loss": 0.6213, "step": 3879 }, { "epoch": 0.20937887863579946, "grad_norm": 1.2359974630529107, "learning_rate": 9.061072656270186e-06, "loss": 0.7294, "step": 3880 }, { "epoch": 0.20943284226431386, "grad_norm": 1.3210752697964867, "learning_rate": 9.060606199850648e-06, "loss": 0.6458, "step": 3881 }, { "epoch": 0.20948680589282823, "grad_norm": 1.0389529672318072, "learning_rate": 9.060139641095694e-06, "loss": 0.3766, "step": 3882 }, { "epoch": 0.20954076952134262, "grad_norm": 1.0025935314030558, "learning_rate": 9.059672980018725e-06, "loss": 0.5016, "step": 3883 }, { "epoch": 0.209594733149857, "grad_norm": 1.1218859341736258, "learning_rate": 9.059206216633161e-06, "loss": 0.5728, "step": 3884 }, { "epoch": 0.2096486967783714, "grad_norm": 1.202372198238103, "learning_rate": 9.058739350952411e-06, "loss": 0.5637, "step": 3885 }, { "epoch": 0.20970266040688576, "grad_norm": 1.0351766674514313, "learning_rate": 9.058272382989899e-06, "loss": 0.5758, "step": 3886 }, { "epoch": 0.20975662403540013, "grad_norm": 1.135669097182712, "learning_rate": 9.057805312759042e-06, "loss": 0.6708, "step": 3887 }, { "epoch": 0.20981058766391453, "grad_norm": 1.1728584558035258, "learning_rate": 9.057338140273263e-06, "loss": 0.7437, "step": 3888 }, { "epoch": 0.2098645512924289, "grad_norm": 0.9670312473946563, "learning_rate": 9.056870865545995e-06, "loss": 0.4158, "step": 3889 }, { "epoch": 0.2099185149209433, "grad_norm": 1.0169606853053872, "learning_rate": 9.056403488590664e-06, "loss": 0.4104, "step": 3890 }, { "epoch": 0.20997247854945766, "grad_norm": 1.0513006911578175, "learning_rate": 9.055936009420701e-06, "loss": 0.4837, "step": 3891 }, { "epoch": 0.21002644217797203, "grad_norm": 0.994266635767907, "learning_rate": 9.055468428049547e-06, "loss": 0.5132, "step": 3892 }, { "epoch": 0.21008040580648643, "grad_norm": 1.3176735817535101, "learning_rate": 9.055000744490636e-06, "loss": 0.7013, "step": 3893 }, { "epoch": 0.2101343694350008, "grad_norm": 1.0985426132081926, "learning_rate": 9.054532958757413e-06, "loss": 0.6176, "step": 3894 }, { "epoch": 0.2101883330635152, "grad_norm": 0.9023904960073221, "learning_rate": 9.05406507086332e-06, "loss": 0.4917, "step": 3895 }, { "epoch": 0.21024229669202957, "grad_norm": 0.9832362482417746, "learning_rate": 9.053597080821806e-06, "loss": 0.4066, "step": 3896 }, { "epoch": 0.21029626032054397, "grad_norm": 1.021488711386533, "learning_rate": 9.05312898864632e-06, "loss": 0.4585, "step": 3897 }, { "epoch": 0.21035022394905833, "grad_norm": 1.2145369172421092, "learning_rate": 9.052660794350318e-06, "loss": 0.5546, "step": 3898 }, { "epoch": 0.2104041875775727, "grad_norm": 0.9983674272525982, "learning_rate": 9.052192497947255e-06, "loss": 0.4014, "step": 3899 }, { "epoch": 0.2104581512060871, "grad_norm": 1.0745283458527848, "learning_rate": 9.051724099450591e-06, "loss": 0.5116, "step": 3900 }, { "epoch": 0.21051211483460147, "grad_norm": 0.9298405422290709, "learning_rate": 9.051255598873787e-06, "loss": 0.5071, "step": 3901 }, { "epoch": 0.21056607846311587, "grad_norm": 1.1694964336381588, "learning_rate": 9.050786996230308e-06, "loss": 0.425, "step": 3902 }, { "epoch": 0.21062004209163024, "grad_norm": 1.125314301489721, "learning_rate": 9.050318291533624e-06, "loss": 0.6144, "step": 3903 }, { "epoch": 0.21067400572014464, "grad_norm": 0.9432318833764584, "learning_rate": 9.049849484797205e-06, "loss": 0.4255, "step": 3904 }, { "epoch": 0.210727969348659, "grad_norm": 1.1475244897572214, "learning_rate": 9.049380576034524e-06, "loss": 0.5721, "step": 3905 }, { "epoch": 0.21078193297717338, "grad_norm": 1.0482920789783552, "learning_rate": 9.04891156525906e-06, "loss": 0.4578, "step": 3906 }, { "epoch": 0.21083589660568777, "grad_norm": 0.9043257434772238, "learning_rate": 9.04844245248429e-06, "loss": 0.4027, "step": 3907 }, { "epoch": 0.21088986023420214, "grad_norm": 1.2417123813265774, "learning_rate": 9.0479732377237e-06, "loss": 0.7197, "step": 3908 }, { "epoch": 0.21094382386271654, "grad_norm": 0.9912213476465899, "learning_rate": 9.047503920990773e-06, "loss": 0.4283, "step": 3909 }, { "epoch": 0.2109977874912309, "grad_norm": 1.0625242799866899, "learning_rate": 9.047034502299001e-06, "loss": 0.4694, "step": 3910 }, { "epoch": 0.21105175111974528, "grad_norm": 1.184741449080488, "learning_rate": 9.046564981661872e-06, "loss": 0.5428, "step": 3911 }, { "epoch": 0.21110571474825968, "grad_norm": 1.0232591537901663, "learning_rate": 9.046095359092882e-06, "loss": 0.6114, "step": 3912 }, { "epoch": 0.21115967837677405, "grad_norm": 1.0483303416013794, "learning_rate": 9.045625634605528e-06, "loss": 0.4922, "step": 3913 }, { "epoch": 0.21121364200528844, "grad_norm": 1.0152066138835125, "learning_rate": 9.04515580821331e-06, "loss": 0.4821, "step": 3914 }, { "epoch": 0.2112676056338028, "grad_norm": 0.9005491964594909, "learning_rate": 9.044685879929734e-06, "loss": 0.5389, "step": 3915 }, { "epoch": 0.2113215692623172, "grad_norm": 0.8736380425750268, "learning_rate": 9.044215849768301e-06, "loss": 0.3748, "step": 3916 }, { "epoch": 0.21137553289083158, "grad_norm": 1.1130456743586536, "learning_rate": 9.043745717742527e-06, "loss": 0.4542, "step": 3917 }, { "epoch": 0.21142949651934595, "grad_norm": 0.9087707234184306, "learning_rate": 9.043275483865918e-06, "loss": 0.3466, "step": 3918 }, { "epoch": 0.21148346014786035, "grad_norm": 1.1589649440131629, "learning_rate": 9.042805148151991e-06, "loss": 0.5031, "step": 3919 }, { "epoch": 0.21153742377637472, "grad_norm": 0.916612921848723, "learning_rate": 9.042334710614267e-06, "loss": 0.4077, "step": 3920 }, { "epoch": 0.21159138740488911, "grad_norm": 1.09943628477085, "learning_rate": 9.041864171266261e-06, "loss": 0.6119, "step": 3921 }, { "epoch": 0.21164535103340348, "grad_norm": 1.0573910691570063, "learning_rate": 9.041393530121503e-06, "loss": 0.5843, "step": 3922 }, { "epoch": 0.21169931466191788, "grad_norm": 1.0216860887252937, "learning_rate": 9.040922787193514e-06, "loss": 0.4922, "step": 3923 }, { "epoch": 0.21175327829043225, "grad_norm": 1.0388331986792314, "learning_rate": 9.040451942495827e-06, "loss": 0.377, "step": 3924 }, { "epoch": 0.21180724191894662, "grad_norm": 1.3289374662872535, "learning_rate": 9.039980996041974e-06, "loss": 0.5038, "step": 3925 }, { "epoch": 0.21186120554746102, "grad_norm": 1.2046519157742162, "learning_rate": 9.03950994784549e-06, "loss": 0.628, "step": 3926 }, { "epoch": 0.2119151691759754, "grad_norm": 1.0317793064865266, "learning_rate": 9.039038797919915e-06, "loss": 0.4974, "step": 3927 }, { "epoch": 0.21196913280448978, "grad_norm": 0.8901791870227522, "learning_rate": 9.038567546278788e-06, "loss": 0.3548, "step": 3928 }, { "epoch": 0.21202309643300415, "grad_norm": 1.0571825549646474, "learning_rate": 9.038096192935655e-06, "loss": 0.5301, "step": 3929 }, { "epoch": 0.21207706006151852, "grad_norm": 0.9965957672642536, "learning_rate": 9.03762473790406e-06, "loss": 0.4422, "step": 3930 }, { "epoch": 0.21213102369003292, "grad_norm": 1.135320265719766, "learning_rate": 9.037153181197558e-06, "loss": 0.5513, "step": 3931 }, { "epoch": 0.2121849873185473, "grad_norm": 0.891997805803228, "learning_rate": 9.036681522829698e-06, "loss": 0.5171, "step": 3932 }, { "epoch": 0.2122389509470617, "grad_norm": 0.7622237444844877, "learning_rate": 9.036209762814038e-06, "loss": 0.3067, "step": 3933 }, { "epoch": 0.21229291457557606, "grad_norm": 1.0378832163556038, "learning_rate": 9.035737901164138e-06, "loss": 0.5174, "step": 3934 }, { "epoch": 0.21234687820409046, "grad_norm": 1.155870327987508, "learning_rate": 9.035265937893556e-06, "loss": 0.6494, "step": 3935 }, { "epoch": 0.21240084183260483, "grad_norm": 1.1442332615468283, "learning_rate": 9.03479387301586e-06, "loss": 0.513, "step": 3936 }, { "epoch": 0.2124548054611192, "grad_norm": 0.874162254995752, "learning_rate": 9.034321706544617e-06, "loss": 0.44, "step": 3937 }, { "epoch": 0.2125087690896336, "grad_norm": 1.184770792385275, "learning_rate": 9.033849438493396e-06, "loss": 0.5672, "step": 3938 }, { "epoch": 0.21256273271814796, "grad_norm": 1.063807350405684, "learning_rate": 9.03337706887577e-06, "loss": 0.4988, "step": 3939 }, { "epoch": 0.21261669634666236, "grad_norm": 1.1456144386108473, "learning_rate": 9.032904597705318e-06, "loss": 0.5399, "step": 3940 }, { "epoch": 0.21267065997517673, "grad_norm": 0.9093594370494846, "learning_rate": 9.032432024995618e-06, "loss": 0.3614, "step": 3941 }, { "epoch": 0.2127246236036911, "grad_norm": 1.1817771519574174, "learning_rate": 9.031959350760254e-06, "loss": 0.6147, "step": 3942 }, { "epoch": 0.2127785872322055, "grad_norm": 0.9428467533589016, "learning_rate": 9.031486575012809e-06, "loss": 0.4093, "step": 3943 }, { "epoch": 0.21283255086071987, "grad_norm": 0.7599893495075416, "learning_rate": 9.031013697766871e-06, "loss": 0.4031, "step": 3944 }, { "epoch": 0.21288651448923426, "grad_norm": 1.1440116340229138, "learning_rate": 9.03054071903603e-06, "loss": 0.515, "step": 3945 }, { "epoch": 0.21294047811774863, "grad_norm": 0.8962400584161349, "learning_rate": 9.030067638833882e-06, "loss": 0.3129, "step": 3946 }, { "epoch": 0.21299444174626303, "grad_norm": 1.1504491610991356, "learning_rate": 9.029594457174025e-06, "loss": 0.4481, "step": 3947 }, { "epoch": 0.2130484053747774, "grad_norm": 1.0946426863870848, "learning_rate": 9.029121174070055e-06, "loss": 0.4452, "step": 3948 }, { "epoch": 0.21310236900329177, "grad_norm": 0.9903173583548343, "learning_rate": 9.028647789535575e-06, "loss": 0.5246, "step": 3949 }, { "epoch": 0.21315633263180617, "grad_norm": 1.0607844823404882, "learning_rate": 9.028174303584198e-06, "loss": 0.5039, "step": 3950 }, { "epoch": 0.21321029626032054, "grad_norm": 1.0008862319530052, "learning_rate": 9.027700716229522e-06, "loss": 0.4335, "step": 3951 }, { "epoch": 0.21326425988883493, "grad_norm": 1.2022074823755349, "learning_rate": 9.027227027485163e-06, "loss": 0.4486, "step": 3952 }, { "epoch": 0.2133182235173493, "grad_norm": 0.9284403411512231, "learning_rate": 9.026753237364735e-06, "loss": 0.3795, "step": 3953 }, { "epoch": 0.2133721871458637, "grad_norm": 0.6324710945038569, "learning_rate": 9.026279345881858e-06, "loss": 0.2983, "step": 3954 }, { "epoch": 0.21342615077437807, "grad_norm": 1.1952451858348805, "learning_rate": 9.025805353050149e-06, "loss": 0.7813, "step": 3955 }, { "epoch": 0.21348011440289244, "grad_norm": 1.260142705979331, "learning_rate": 9.02533125888323e-06, "loss": 0.6504, "step": 3956 }, { "epoch": 0.21353407803140684, "grad_norm": 1.2540320144076724, "learning_rate": 9.024857063394729e-06, "loss": 0.6934, "step": 3957 }, { "epoch": 0.2135880416599212, "grad_norm": 1.1477533983017443, "learning_rate": 9.024382766598274e-06, "loss": 0.5293, "step": 3958 }, { "epoch": 0.2136420052884356, "grad_norm": 0.9493108751403639, "learning_rate": 9.023908368507498e-06, "loss": 0.5305, "step": 3959 }, { "epoch": 0.21369596891694997, "grad_norm": 0.9524559818208181, "learning_rate": 9.023433869136034e-06, "loss": 0.4008, "step": 3960 }, { "epoch": 0.21374993254546434, "grad_norm": 0.7820605858019544, "learning_rate": 9.022959268497522e-06, "loss": 0.3139, "step": 3961 }, { "epoch": 0.21380389617397874, "grad_norm": 1.1350732499094303, "learning_rate": 9.022484566605599e-06, "loss": 0.5454, "step": 3962 }, { "epoch": 0.2138578598024931, "grad_norm": 0.9023840038650534, "learning_rate": 9.022009763473912e-06, "loss": 0.4492, "step": 3963 }, { "epoch": 0.2139118234310075, "grad_norm": 0.9966316783480835, "learning_rate": 9.021534859116104e-06, "loss": 0.5936, "step": 3964 }, { "epoch": 0.21396578705952188, "grad_norm": 1.3261019470858544, "learning_rate": 9.021059853545828e-06, "loss": 0.7163, "step": 3965 }, { "epoch": 0.21401975068803628, "grad_norm": 0.8726886947641488, "learning_rate": 9.020584746776731e-06, "loss": 0.4816, "step": 3966 }, { "epoch": 0.21407371431655065, "grad_norm": 1.2910453457306585, "learning_rate": 9.020109538822474e-06, "loss": 0.7472, "step": 3967 }, { "epoch": 0.21412767794506501, "grad_norm": 1.2231730028898598, "learning_rate": 9.019634229696711e-06, "loss": 0.8247, "step": 3968 }, { "epoch": 0.2141816415735794, "grad_norm": 1.1891277143936283, "learning_rate": 9.019158819413104e-06, "loss": 0.6231, "step": 3969 }, { "epoch": 0.21423560520209378, "grad_norm": 1.0118545804911891, "learning_rate": 9.018683307985317e-06, "loss": 0.5139, "step": 3970 }, { "epoch": 0.21428956883060818, "grad_norm": 0.9973902877485001, "learning_rate": 9.018207695427015e-06, "loss": 0.4673, "step": 3971 }, { "epoch": 0.21434353245912255, "grad_norm": 0.8424684420498934, "learning_rate": 9.01773198175187e-06, "loss": 0.364, "step": 3972 }, { "epoch": 0.21439749608763695, "grad_norm": 1.0674028067795285, "learning_rate": 9.017256166973554e-06, "loss": 0.523, "step": 3973 }, { "epoch": 0.21445145971615132, "grad_norm": 1.1139550831641658, "learning_rate": 9.01678025110574e-06, "loss": 0.4778, "step": 3974 }, { "epoch": 0.21450542334466569, "grad_norm": 0.8965690917412549, "learning_rate": 9.01630423416211e-06, "loss": 0.369, "step": 3975 }, { "epoch": 0.21455938697318008, "grad_norm": 0.8215376518556305, "learning_rate": 9.015828116156343e-06, "loss": 0.3582, "step": 3976 }, { "epoch": 0.21461335060169445, "grad_norm": 0.8693451111709126, "learning_rate": 9.015351897102123e-06, "loss": 0.4038, "step": 3977 }, { "epoch": 0.21466731423020885, "grad_norm": 1.4012206145220893, "learning_rate": 9.014875577013137e-06, "loss": 0.5622, "step": 3978 }, { "epoch": 0.21472127785872322, "grad_norm": 1.0657626753983134, "learning_rate": 9.014399155903077e-06, "loss": 0.482, "step": 3979 }, { "epoch": 0.2147752414872376, "grad_norm": 1.129379227255306, "learning_rate": 9.013922633785632e-06, "loss": 0.5782, "step": 3980 }, { "epoch": 0.214829205115752, "grad_norm": 1.2337238539095285, "learning_rate": 9.0134460106745e-06, "loss": 0.6213, "step": 3981 }, { "epoch": 0.21488316874426636, "grad_norm": 1.0357518292975791, "learning_rate": 9.012969286583381e-06, "loss": 0.5107, "step": 3982 }, { "epoch": 0.21493713237278075, "grad_norm": 1.0196893526290927, "learning_rate": 9.012492461525976e-06, "loss": 0.4394, "step": 3983 }, { "epoch": 0.21499109600129512, "grad_norm": 0.8946379944661457, "learning_rate": 9.012015535515987e-06, "loss": 0.4383, "step": 3984 }, { "epoch": 0.21504505962980952, "grad_norm": 1.0568577099659149, "learning_rate": 9.011538508567122e-06, "loss": 0.5242, "step": 3985 }, { "epoch": 0.2150990232583239, "grad_norm": 0.9451234441609712, "learning_rate": 9.011061380693094e-06, "loss": 0.426, "step": 3986 }, { "epoch": 0.21515298688683826, "grad_norm": 1.0153537833073967, "learning_rate": 9.010584151907615e-06, "loss": 0.5366, "step": 3987 }, { "epoch": 0.21520695051535266, "grad_norm": 1.1497605468617007, "learning_rate": 9.010106822224398e-06, "loss": 0.6683, "step": 3988 }, { "epoch": 0.21526091414386703, "grad_norm": 1.212212990329426, "learning_rate": 9.009629391657166e-06, "loss": 0.596, "step": 3989 }, { "epoch": 0.21531487777238142, "grad_norm": 0.950751734965433, "learning_rate": 9.009151860219637e-06, "loss": 0.4963, "step": 3990 }, { "epoch": 0.2153688414008958, "grad_norm": 1.0972075578089489, "learning_rate": 9.008674227925539e-06, "loss": 0.4146, "step": 3991 }, { "epoch": 0.21542280502941016, "grad_norm": 1.1645867041581603, "learning_rate": 9.0081964947886e-06, "loss": 0.6042, "step": 3992 }, { "epoch": 0.21547676865792456, "grad_norm": 1.4023136307320923, "learning_rate": 9.007718660822547e-06, "loss": 0.6124, "step": 3993 }, { "epoch": 0.21553073228643893, "grad_norm": 1.0124618801017364, "learning_rate": 9.007240726041115e-06, "loss": 0.4454, "step": 3994 }, { "epoch": 0.21558469591495333, "grad_norm": 0.9208581619967927, "learning_rate": 9.00676269045804e-06, "loss": 0.4426, "step": 3995 }, { "epoch": 0.2156386595434677, "grad_norm": 0.8666223155897976, "learning_rate": 9.006284554087065e-06, "loss": 0.4424, "step": 3996 }, { "epoch": 0.2156926231719821, "grad_norm": 1.3599242310249426, "learning_rate": 9.005806316941927e-06, "loss": 0.6218, "step": 3997 }, { "epoch": 0.21574658680049646, "grad_norm": 0.8411577808261556, "learning_rate": 9.005327979036373e-06, "loss": 0.3873, "step": 3998 }, { "epoch": 0.21580055042901083, "grad_norm": 0.9657131027277541, "learning_rate": 9.004849540384153e-06, "loss": 0.5001, "step": 3999 }, { "epoch": 0.21585451405752523, "grad_norm": 1.292862194383677, "learning_rate": 9.004371000999013e-06, "loss": 0.6114, "step": 4000 }, { "epoch": 0.21585451405752523, "eval_loss": 0.5817294120788574, "eval_runtime": 160.8162, "eval_samples_per_second": 21.385, "eval_steps_per_second": 0.895, "step": 4000 }, { "epoch": 0.2159084776860396, "grad_norm": 1.0563048429126018, "learning_rate": 9.003892360894711e-06, "loss": 0.5492, "step": 4001 }, { "epoch": 0.215962441314554, "grad_norm": 1.0515773777035606, "learning_rate": 9.003413620085002e-06, "loss": 0.448, "step": 4002 }, { "epoch": 0.21601640494306837, "grad_norm": 1.1816263709952683, "learning_rate": 9.002934778583647e-06, "loss": 0.5911, "step": 4003 }, { "epoch": 0.21607036857158277, "grad_norm": 0.9884789111703723, "learning_rate": 9.002455836404404e-06, "loss": 0.5374, "step": 4004 }, { "epoch": 0.21612433220009714, "grad_norm": 0.9484853864647014, "learning_rate": 9.001976793561044e-06, "loss": 0.5617, "step": 4005 }, { "epoch": 0.2161782958286115, "grad_norm": 1.18031054644485, "learning_rate": 9.001497650067331e-06, "loss": 0.4511, "step": 4006 }, { "epoch": 0.2162322594571259, "grad_norm": 1.252263560155836, "learning_rate": 9.001018405937038e-06, "loss": 0.5487, "step": 4007 }, { "epoch": 0.21628622308564027, "grad_norm": 1.0061238804944843, "learning_rate": 9.000539061183939e-06, "loss": 0.5622, "step": 4008 }, { "epoch": 0.21634018671415467, "grad_norm": 0.8942974252079775, "learning_rate": 9.00005961582181e-06, "loss": 0.3772, "step": 4009 }, { "epoch": 0.21639415034266904, "grad_norm": 0.9514468082825852, "learning_rate": 8.999580069864429e-06, "loss": 0.4783, "step": 4010 }, { "epoch": 0.2164481139711834, "grad_norm": 1.0411101974513266, "learning_rate": 8.999100423325585e-06, "loss": 0.4902, "step": 4011 }, { "epoch": 0.2165020775996978, "grad_norm": 1.0168004115480387, "learning_rate": 8.998620676219057e-06, "loss": 0.5075, "step": 4012 }, { "epoch": 0.21655604122821218, "grad_norm": 1.2457042345560991, "learning_rate": 8.998140828558634e-06, "loss": 0.5782, "step": 4013 }, { "epoch": 0.21661000485672657, "grad_norm": 0.8665809394188911, "learning_rate": 8.997660880358111e-06, "loss": 0.4262, "step": 4014 }, { "epoch": 0.21666396848524094, "grad_norm": 1.1878243764904397, "learning_rate": 8.997180831631279e-06, "loss": 0.5639, "step": 4015 }, { "epoch": 0.21671793211375534, "grad_norm": 1.0888283580903304, "learning_rate": 8.996700682391938e-06, "loss": 0.3735, "step": 4016 }, { "epoch": 0.2167718957422697, "grad_norm": 1.2367023549717704, "learning_rate": 8.996220432653884e-06, "loss": 0.6938, "step": 4017 }, { "epoch": 0.21682585937078408, "grad_norm": 0.9808439328255408, "learning_rate": 8.995740082430924e-06, "loss": 0.4821, "step": 4018 }, { "epoch": 0.21687982299929848, "grad_norm": 1.0366699148700493, "learning_rate": 8.995259631736862e-06, "loss": 0.5017, "step": 4019 }, { "epoch": 0.21693378662781285, "grad_norm": 0.9273114940176643, "learning_rate": 8.994779080585503e-06, "loss": 0.3972, "step": 4020 }, { "epoch": 0.21698775025632724, "grad_norm": 1.2573622007462064, "learning_rate": 8.994298428990667e-06, "loss": 0.6585, "step": 4021 }, { "epoch": 0.2170417138848416, "grad_norm": 1.0937169778287301, "learning_rate": 8.99381767696616e-06, "loss": 0.5648, "step": 4022 }, { "epoch": 0.217095677513356, "grad_norm": 1.33401456577074, "learning_rate": 8.993336824525804e-06, "loss": 0.6267, "step": 4023 }, { "epoch": 0.21714964114187038, "grad_norm": 1.0501639380316055, "learning_rate": 8.992855871683417e-06, "loss": 0.4715, "step": 4024 }, { "epoch": 0.21720360477038475, "grad_norm": 1.2312575956203569, "learning_rate": 8.992374818452823e-06, "loss": 0.5575, "step": 4025 }, { "epoch": 0.21725756839889915, "grad_norm": 1.0149235200835898, "learning_rate": 8.99189366484785e-06, "loss": 0.4831, "step": 4026 }, { "epoch": 0.21731153202741352, "grad_norm": 1.000839083245443, "learning_rate": 8.991412410882323e-06, "loss": 0.451, "step": 4027 }, { "epoch": 0.21736549565592791, "grad_norm": 1.0214543211395493, "learning_rate": 8.990931056570075e-06, "loss": 0.5052, "step": 4028 }, { "epoch": 0.21741945928444228, "grad_norm": 0.967045541897537, "learning_rate": 8.990449601924943e-06, "loss": 0.4166, "step": 4029 }, { "epoch": 0.21747342291295665, "grad_norm": 1.3024119976346453, "learning_rate": 8.989968046960761e-06, "loss": 0.6361, "step": 4030 }, { "epoch": 0.21752738654147105, "grad_norm": 1.3366669988509314, "learning_rate": 8.989486391691372e-06, "loss": 0.5269, "step": 4031 }, { "epoch": 0.21758135016998542, "grad_norm": 1.029736233840357, "learning_rate": 8.989004636130618e-06, "loss": 0.4635, "step": 4032 }, { "epoch": 0.21763531379849982, "grad_norm": 1.1642415151600105, "learning_rate": 8.988522780292345e-06, "loss": 0.5301, "step": 4033 }, { "epoch": 0.2176892774270142, "grad_norm": 1.3044939567494471, "learning_rate": 8.988040824190402e-06, "loss": 0.5727, "step": 4034 }, { "epoch": 0.21774324105552859, "grad_norm": 1.243176583591493, "learning_rate": 8.987558767838642e-06, "loss": 0.6035, "step": 4035 }, { "epoch": 0.21779720468404296, "grad_norm": 1.2813814586958956, "learning_rate": 8.987076611250918e-06, "loss": 0.5033, "step": 4036 }, { "epoch": 0.21785116831255733, "grad_norm": 1.0621116910036337, "learning_rate": 8.98659435444109e-06, "loss": 0.3963, "step": 4037 }, { "epoch": 0.21790513194107172, "grad_norm": 1.0541490348535043, "learning_rate": 8.986111997423017e-06, "loss": 0.4538, "step": 4038 }, { "epoch": 0.2179590955695861, "grad_norm": 0.8236509031673342, "learning_rate": 8.985629540210561e-06, "loss": 0.4094, "step": 4039 }, { "epoch": 0.2180130591981005, "grad_norm": 1.2004135326224785, "learning_rate": 8.985146982817593e-06, "loss": 0.4932, "step": 4040 }, { "epoch": 0.21806702282661486, "grad_norm": 1.1167471145002856, "learning_rate": 8.984664325257977e-06, "loss": 0.4979, "step": 4041 }, { "epoch": 0.21812098645512926, "grad_norm": 1.0271973268916812, "learning_rate": 8.984181567545591e-06, "loss": 0.5463, "step": 4042 }, { "epoch": 0.21817495008364363, "grad_norm": 1.3530530212199736, "learning_rate": 8.983698709694301e-06, "loss": 0.631, "step": 4043 }, { "epoch": 0.218228913712158, "grad_norm": 1.1438034775101846, "learning_rate": 8.983215751717993e-06, "loss": 0.5026, "step": 4044 }, { "epoch": 0.2182828773406724, "grad_norm": 1.1642893761609479, "learning_rate": 8.982732693630544e-06, "loss": 0.7373, "step": 4045 }, { "epoch": 0.21833684096918676, "grad_norm": 1.0935518027083104, "learning_rate": 8.982249535445839e-06, "loss": 0.3831, "step": 4046 }, { "epoch": 0.21839080459770116, "grad_norm": 1.2980427621761101, "learning_rate": 8.981766277177764e-06, "loss": 0.6428, "step": 4047 }, { "epoch": 0.21844476822621553, "grad_norm": 1.017452541800854, "learning_rate": 8.981282918840207e-06, "loss": 0.5347, "step": 4048 }, { "epoch": 0.2184987318547299, "grad_norm": 0.9050196178244304, "learning_rate": 8.980799460447062e-06, "loss": 0.3752, "step": 4049 }, { "epoch": 0.2185526954832443, "grad_norm": 0.9787900219809772, "learning_rate": 8.980315902012225e-06, "loss": 0.5528, "step": 4050 }, { "epoch": 0.21860665911175867, "grad_norm": 0.9616656827000798, "learning_rate": 8.97983224354959e-06, "loss": 0.4803, "step": 4051 }, { "epoch": 0.21866062274027306, "grad_norm": 0.9220009905140023, "learning_rate": 8.97934848507306e-06, "loss": 0.5308, "step": 4052 }, { "epoch": 0.21871458636878743, "grad_norm": 1.161256216281666, "learning_rate": 8.978864626596539e-06, "loss": 0.6176, "step": 4053 }, { "epoch": 0.21876854999730183, "grad_norm": 1.0634278657312266, "learning_rate": 8.978380668133935e-06, "loss": 0.39, "step": 4054 }, { "epoch": 0.2188225136258162, "grad_norm": 1.030157959467724, "learning_rate": 8.977896609699155e-06, "loss": 0.3982, "step": 4055 }, { "epoch": 0.21887647725433057, "grad_norm": 0.8235031171575286, "learning_rate": 8.977412451306113e-06, "loss": 0.3665, "step": 4056 }, { "epoch": 0.21893044088284497, "grad_norm": 1.1368618754906603, "learning_rate": 8.976928192968724e-06, "loss": 0.4891, "step": 4057 }, { "epoch": 0.21898440451135934, "grad_norm": 1.0438595482066209, "learning_rate": 8.976443834700903e-06, "loss": 0.6424, "step": 4058 }, { "epoch": 0.21903836813987373, "grad_norm": 0.9972526266260556, "learning_rate": 8.975959376516575e-06, "loss": 0.4432, "step": 4059 }, { "epoch": 0.2190923317683881, "grad_norm": 0.957352258519993, "learning_rate": 8.975474818429664e-06, "loss": 0.5539, "step": 4060 }, { "epoch": 0.21914629539690247, "grad_norm": 1.3009896276290596, "learning_rate": 8.974990160454094e-06, "loss": 0.5961, "step": 4061 }, { "epoch": 0.21920025902541687, "grad_norm": 1.2429791798238328, "learning_rate": 8.974505402603793e-06, "loss": 0.5173, "step": 4062 }, { "epoch": 0.21925422265393124, "grad_norm": 1.0283813761778038, "learning_rate": 8.974020544892699e-06, "loss": 0.5579, "step": 4063 }, { "epoch": 0.21930818628244564, "grad_norm": 0.8985381319945738, "learning_rate": 8.973535587334743e-06, "loss": 0.4627, "step": 4064 }, { "epoch": 0.21936214991096, "grad_norm": 0.9537275172497061, "learning_rate": 8.973050529943863e-06, "loss": 0.4152, "step": 4065 }, { "epoch": 0.2194161135394744, "grad_norm": 0.865533992664969, "learning_rate": 8.972565372734004e-06, "loss": 0.4835, "step": 4066 }, { "epoch": 0.21947007716798878, "grad_norm": 0.8934062523831986, "learning_rate": 8.972080115719107e-06, "loss": 0.3278, "step": 4067 }, { "epoch": 0.21952404079650314, "grad_norm": 1.0477904992543636, "learning_rate": 8.971594758913119e-06, "loss": 0.5012, "step": 4068 }, { "epoch": 0.21957800442501754, "grad_norm": 0.9616828304255225, "learning_rate": 8.97110930232999e-06, "loss": 0.5428, "step": 4069 }, { "epoch": 0.2196319680535319, "grad_norm": 1.1313341683599356, "learning_rate": 8.970623745983673e-06, "loss": 0.5242, "step": 4070 }, { "epoch": 0.2196859316820463, "grad_norm": 1.0418498418774882, "learning_rate": 8.97013808988812e-06, "loss": 0.5005, "step": 4071 }, { "epoch": 0.21973989531056068, "grad_norm": 0.7414784823001763, "learning_rate": 8.969652334057296e-06, "loss": 0.2434, "step": 4072 }, { "epoch": 0.21979385893907508, "grad_norm": 1.072182239328063, "learning_rate": 8.969166478505155e-06, "loss": 0.5964, "step": 4073 }, { "epoch": 0.21984782256758945, "grad_norm": 1.1612831183380452, "learning_rate": 8.968680523245665e-06, "loss": 0.3883, "step": 4074 }, { "epoch": 0.21990178619610382, "grad_norm": 1.0297712400615584, "learning_rate": 8.968194468292792e-06, "loss": 0.4257, "step": 4075 }, { "epoch": 0.2199557498246182, "grad_norm": 0.7961173237206042, "learning_rate": 8.967708313660504e-06, "loss": 0.3833, "step": 4076 }, { "epoch": 0.22000971345313258, "grad_norm": 1.167388854994612, "learning_rate": 8.967222059362777e-06, "loss": 0.6036, "step": 4077 }, { "epoch": 0.22006367708164698, "grad_norm": 1.0281012597884969, "learning_rate": 8.966735705413584e-06, "loss": 0.5783, "step": 4078 }, { "epoch": 0.22011764071016135, "grad_norm": 0.9551695348081268, "learning_rate": 8.966249251826905e-06, "loss": 0.4604, "step": 4079 }, { "epoch": 0.22017160433867572, "grad_norm": 1.124765000567545, "learning_rate": 8.96576269861672e-06, "loss": 0.571, "step": 4080 }, { "epoch": 0.22022556796719012, "grad_norm": 0.9112201646827748, "learning_rate": 8.965276045797011e-06, "loss": 0.437, "step": 4081 }, { "epoch": 0.2202795315957045, "grad_norm": 1.0292561958525943, "learning_rate": 8.96478929338177e-06, "loss": 0.4876, "step": 4082 }, { "epoch": 0.22033349522421888, "grad_norm": 0.9877788416416108, "learning_rate": 8.964302441384983e-06, "loss": 0.4398, "step": 4083 }, { "epoch": 0.22038745885273325, "grad_norm": 0.9322745236600732, "learning_rate": 8.96381548982064e-06, "loss": 0.4167, "step": 4084 }, { "epoch": 0.22044142248124765, "grad_norm": 0.9877958398889292, "learning_rate": 8.963328438702744e-06, "loss": 0.4249, "step": 4085 }, { "epoch": 0.22049538610976202, "grad_norm": 1.1734113771186048, "learning_rate": 8.962841288045287e-06, "loss": 0.524, "step": 4086 }, { "epoch": 0.2205493497382764, "grad_norm": 1.0822743828770462, "learning_rate": 8.962354037862274e-06, "loss": 0.5177, "step": 4087 }, { "epoch": 0.2206033133667908, "grad_norm": 0.9996964330874292, "learning_rate": 8.961866688167708e-06, "loss": 0.4903, "step": 4088 }, { "epoch": 0.22065727699530516, "grad_norm": 1.6177500774623486, "learning_rate": 8.961379238975594e-06, "loss": 0.5319, "step": 4089 }, { "epoch": 0.22071124062381955, "grad_norm": 0.9509287108513386, "learning_rate": 8.960891690299944e-06, "loss": 0.4636, "step": 4090 }, { "epoch": 0.22076520425233392, "grad_norm": 1.031238015015512, "learning_rate": 8.960404042154767e-06, "loss": 0.6424, "step": 4091 }, { "epoch": 0.22081916788084832, "grad_norm": 1.0106914192316385, "learning_rate": 8.959916294554084e-06, "loss": 0.3962, "step": 4092 }, { "epoch": 0.2208731315093627, "grad_norm": 1.0910875891908924, "learning_rate": 8.959428447511912e-06, "loss": 0.5329, "step": 4093 }, { "epoch": 0.22092709513787706, "grad_norm": 1.0385840012897831, "learning_rate": 8.95894050104227e-06, "loss": 0.4289, "step": 4094 }, { "epoch": 0.22098105876639146, "grad_norm": 1.0045327230079815, "learning_rate": 8.95845245515918e-06, "loss": 0.5755, "step": 4095 }, { "epoch": 0.22103502239490583, "grad_norm": 0.8823817879051734, "learning_rate": 8.957964309876675e-06, "loss": 0.4123, "step": 4096 }, { "epoch": 0.22108898602342023, "grad_norm": 0.8540134158441888, "learning_rate": 8.957476065208782e-06, "loss": 0.379, "step": 4097 }, { "epoch": 0.2211429496519346, "grad_norm": 1.1624954516311459, "learning_rate": 8.956987721169532e-06, "loss": 0.594, "step": 4098 }, { "epoch": 0.22119691328044896, "grad_norm": 1.13859785979404, "learning_rate": 8.956499277772961e-06, "loss": 0.4572, "step": 4099 }, { "epoch": 0.22125087690896336, "grad_norm": 0.9655043430426752, "learning_rate": 8.956010735033109e-06, "loss": 0.4984, "step": 4100 }, { "epoch": 0.22130484053747773, "grad_norm": 1.1517416895404495, "learning_rate": 8.955522092964017e-06, "loss": 0.5561, "step": 4101 }, { "epoch": 0.22135880416599213, "grad_norm": 1.0374749142356428, "learning_rate": 8.955033351579729e-06, "loss": 0.4836, "step": 4102 }, { "epoch": 0.2214127677945065, "grad_norm": 1.4497411288788788, "learning_rate": 8.954544510894291e-06, "loss": 0.7018, "step": 4103 }, { "epoch": 0.2214667314230209, "grad_norm": 0.8015117899857264, "learning_rate": 8.954055570921751e-06, "loss": 0.3448, "step": 4104 }, { "epoch": 0.22152069505153527, "grad_norm": 0.9796133860791367, "learning_rate": 8.953566531676166e-06, "loss": 0.4345, "step": 4105 }, { "epoch": 0.22157465868004964, "grad_norm": 1.2846425814363263, "learning_rate": 8.95307739317159e-06, "loss": 0.6509, "step": 4106 }, { "epoch": 0.22162862230856403, "grad_norm": 0.9288885421021263, "learning_rate": 8.952588155422079e-06, "loss": 0.4174, "step": 4107 }, { "epoch": 0.2216825859370784, "grad_norm": 0.9458164143901633, "learning_rate": 8.952098818441697e-06, "loss": 0.3676, "step": 4108 }, { "epoch": 0.2217365495655928, "grad_norm": 1.1121638402023792, "learning_rate": 8.951609382244506e-06, "loss": 0.5679, "step": 4109 }, { "epoch": 0.22179051319410717, "grad_norm": 0.9729036549505502, "learning_rate": 8.951119846844576e-06, "loss": 0.4568, "step": 4110 }, { "epoch": 0.22184447682262157, "grad_norm": 1.171064719184763, "learning_rate": 8.950630212255971e-06, "loss": 0.4814, "step": 4111 }, { "epoch": 0.22189844045113594, "grad_norm": 0.9611435363428809, "learning_rate": 8.95014047849277e-06, "loss": 0.4274, "step": 4112 }, { "epoch": 0.2219524040796503, "grad_norm": 1.1424155034886319, "learning_rate": 8.949650645569042e-06, "loss": 0.5637, "step": 4113 }, { "epoch": 0.2220063677081647, "grad_norm": 0.967686409838488, "learning_rate": 8.949160713498872e-06, "loss": 0.411, "step": 4114 }, { "epoch": 0.22206033133667907, "grad_norm": 0.8086050482496685, "learning_rate": 8.948670682296338e-06, "loss": 0.3656, "step": 4115 }, { "epoch": 0.22211429496519347, "grad_norm": 1.0702450413453835, "learning_rate": 8.948180551975522e-06, "loss": 0.6477, "step": 4116 }, { "epoch": 0.22216825859370784, "grad_norm": 1.01536331206658, "learning_rate": 8.947690322550514e-06, "loss": 0.439, "step": 4117 }, { "epoch": 0.2222222222222222, "grad_norm": 0.903582903828541, "learning_rate": 8.947199994035402e-06, "loss": 0.4243, "step": 4118 }, { "epoch": 0.2222761858507366, "grad_norm": 0.9374751026841127, "learning_rate": 8.94670956644428e-06, "loss": 0.4227, "step": 4119 }, { "epoch": 0.22233014947925098, "grad_norm": 1.0596942250130763, "learning_rate": 8.94621903979124e-06, "loss": 0.62, "step": 4120 }, { "epoch": 0.22238411310776537, "grad_norm": 0.8436998136427647, "learning_rate": 8.945728414090384e-06, "loss": 0.4039, "step": 4121 }, { "epoch": 0.22243807673627974, "grad_norm": 0.7628293416739909, "learning_rate": 8.945237689355814e-06, "loss": 0.3252, "step": 4122 }, { "epoch": 0.22249204036479414, "grad_norm": 1.0350417374084566, "learning_rate": 8.944746865601629e-06, "loss": 0.5435, "step": 4123 }, { "epoch": 0.2225460039933085, "grad_norm": 0.9698398203088243, "learning_rate": 8.94425594284194e-06, "loss": 0.4395, "step": 4124 }, { "epoch": 0.22259996762182288, "grad_norm": 1.0930077205524755, "learning_rate": 8.943764921090854e-06, "loss": 0.5986, "step": 4125 }, { "epoch": 0.22265393125033728, "grad_norm": 1.1529034604248085, "learning_rate": 8.943273800362485e-06, "loss": 0.622, "step": 4126 }, { "epoch": 0.22270789487885165, "grad_norm": 0.980949974799224, "learning_rate": 8.942782580670946e-06, "loss": 0.5569, "step": 4127 }, { "epoch": 0.22276185850736605, "grad_norm": 1.2002415814334453, "learning_rate": 8.94229126203036e-06, "loss": 0.7676, "step": 4128 }, { "epoch": 0.22281582213588041, "grad_norm": 1.1664303687141202, "learning_rate": 8.941799844454842e-06, "loss": 0.5861, "step": 4129 }, { "epoch": 0.22286978576439478, "grad_norm": 1.0338148026822138, "learning_rate": 8.94130832795852e-06, "loss": 0.3795, "step": 4130 }, { "epoch": 0.22292374939290918, "grad_norm": 1.1187286175572075, "learning_rate": 8.94081671255552e-06, "loss": 0.4514, "step": 4131 }, { "epoch": 0.22297771302142355, "grad_norm": 1.046668632098277, "learning_rate": 8.94032499825997e-06, "loss": 0.5655, "step": 4132 }, { "epoch": 0.22303167664993795, "grad_norm": 0.9836688524422647, "learning_rate": 8.939833185086005e-06, "loss": 0.3575, "step": 4133 }, { "epoch": 0.22308564027845232, "grad_norm": 0.9791946066732348, "learning_rate": 8.939341273047758e-06, "loss": 0.4513, "step": 4134 }, { "epoch": 0.22313960390696672, "grad_norm": 0.8850210780279401, "learning_rate": 8.938849262159368e-06, "loss": 0.4186, "step": 4135 }, { "epoch": 0.22319356753548109, "grad_norm": 1.2294477585170056, "learning_rate": 8.938357152434975e-06, "loss": 0.6458, "step": 4136 }, { "epoch": 0.22324753116399546, "grad_norm": 1.241116771816486, "learning_rate": 8.937864943888721e-06, "loss": 0.6709, "step": 4137 }, { "epoch": 0.22330149479250985, "grad_norm": 1.0100196945962354, "learning_rate": 8.937372636534757e-06, "loss": 0.5567, "step": 4138 }, { "epoch": 0.22335545842102422, "grad_norm": 1.1201647378160644, "learning_rate": 8.93688023038723e-06, "loss": 0.5282, "step": 4139 }, { "epoch": 0.22340942204953862, "grad_norm": 1.2610779113955102, "learning_rate": 8.936387725460293e-06, "loss": 0.5776, "step": 4140 }, { "epoch": 0.223463385678053, "grad_norm": 1.0957387229292392, "learning_rate": 8.9358951217681e-06, "loss": 0.5212, "step": 4141 }, { "epoch": 0.2235173493065674, "grad_norm": 1.1782102401401422, "learning_rate": 8.935402419324809e-06, "loss": 0.5886, "step": 4142 }, { "epoch": 0.22357131293508176, "grad_norm": 1.1713213594844538, "learning_rate": 8.934909618144583e-06, "loss": 0.5134, "step": 4143 }, { "epoch": 0.22362527656359613, "grad_norm": 1.4572245958079553, "learning_rate": 8.934416718241582e-06, "loss": 0.58, "step": 4144 }, { "epoch": 0.22367924019211052, "grad_norm": 0.8982852214507034, "learning_rate": 8.933923719629975e-06, "loss": 0.4228, "step": 4145 }, { "epoch": 0.2237332038206249, "grad_norm": 1.0367242252408408, "learning_rate": 8.93343062232393e-06, "loss": 0.36, "step": 4146 }, { "epoch": 0.2237871674491393, "grad_norm": 1.106136579679524, "learning_rate": 8.932937426337619e-06, "loss": 0.4767, "step": 4147 }, { "epoch": 0.22384113107765366, "grad_norm": 0.9708779660580348, "learning_rate": 8.93244413168522e-06, "loss": 0.4327, "step": 4148 }, { "epoch": 0.22389509470616803, "grad_norm": 0.8729816428325439, "learning_rate": 8.931950738380907e-06, "loss": 0.4061, "step": 4149 }, { "epoch": 0.22394905833468243, "grad_norm": 1.1169144939160642, "learning_rate": 8.931457246438862e-06, "loss": 0.4832, "step": 4150 }, { "epoch": 0.2240030219631968, "grad_norm": 0.8734765413553331, "learning_rate": 8.930963655873268e-06, "loss": 0.3442, "step": 4151 }, { "epoch": 0.2240569855917112, "grad_norm": 1.1580977068059468, "learning_rate": 8.930469966698313e-06, "loss": 0.5714, "step": 4152 }, { "epoch": 0.22411094922022556, "grad_norm": 1.3200087552695383, "learning_rate": 8.929976178928185e-06, "loss": 0.4888, "step": 4153 }, { "epoch": 0.22416491284873996, "grad_norm": 1.0242800604911402, "learning_rate": 8.929482292577075e-06, "loss": 0.4936, "step": 4154 }, { "epoch": 0.22421887647725433, "grad_norm": 1.200200188353733, "learning_rate": 8.928988307659178e-06, "loss": 0.5812, "step": 4155 }, { "epoch": 0.2242728401057687, "grad_norm": 0.9934708027308374, "learning_rate": 8.928494224188694e-06, "loss": 0.4851, "step": 4156 }, { "epoch": 0.2243268037342831, "grad_norm": 0.8781530264094205, "learning_rate": 8.92800004217982e-06, "loss": 0.4091, "step": 4157 }, { "epoch": 0.22438076736279747, "grad_norm": 1.0636847590192327, "learning_rate": 8.927505761646764e-06, "loss": 0.6683, "step": 4158 }, { "epoch": 0.22443473099131186, "grad_norm": 0.8592389729553446, "learning_rate": 8.927011382603725e-06, "loss": 0.3564, "step": 4159 }, { "epoch": 0.22448869461982623, "grad_norm": 1.1971357086116088, "learning_rate": 8.92651690506492e-06, "loss": 0.5926, "step": 4160 }, { "epoch": 0.22454265824834063, "grad_norm": 0.8433865543220043, "learning_rate": 8.926022329044555e-06, "loss": 0.4982, "step": 4161 }, { "epoch": 0.224596621876855, "grad_norm": 1.2121082482377088, "learning_rate": 8.925527654556846e-06, "loss": 0.6006, "step": 4162 }, { "epoch": 0.22465058550536937, "grad_norm": 1.0063795255508543, "learning_rate": 8.925032881616013e-06, "loss": 0.4056, "step": 4163 }, { "epoch": 0.22470454913388377, "grad_norm": 1.1077478268428798, "learning_rate": 8.924538010236273e-06, "loss": 0.4408, "step": 4164 }, { "epoch": 0.22475851276239814, "grad_norm": 1.0763533584744858, "learning_rate": 8.92404304043185e-06, "loss": 0.6446, "step": 4165 }, { "epoch": 0.22481247639091254, "grad_norm": 1.17910620580634, "learning_rate": 8.92354797221697e-06, "loss": 0.5861, "step": 4166 }, { "epoch": 0.2248664400194269, "grad_norm": 1.1481134849077221, "learning_rate": 8.923052805605861e-06, "loss": 0.4669, "step": 4167 }, { "epoch": 0.22492040364794127, "grad_norm": 0.9608982268793681, "learning_rate": 8.922557540612759e-06, "loss": 0.3779, "step": 4168 }, { "epoch": 0.22497436727645567, "grad_norm": 1.018488835116828, "learning_rate": 8.922062177251893e-06, "loss": 0.53, "step": 4169 }, { "epoch": 0.22502833090497004, "grad_norm": 1.2949182836445468, "learning_rate": 8.921566715537502e-06, "loss": 0.6546, "step": 4170 }, { "epoch": 0.22508229453348444, "grad_norm": 1.1440041555810876, "learning_rate": 8.921071155483827e-06, "loss": 0.5664, "step": 4171 }, { "epoch": 0.2251362581619988, "grad_norm": 0.9680233122609794, "learning_rate": 8.920575497105111e-06, "loss": 0.3881, "step": 4172 }, { "epoch": 0.2251902217905132, "grad_norm": 0.9276079948297263, "learning_rate": 8.920079740415598e-06, "loss": 0.4471, "step": 4173 }, { "epoch": 0.22524418541902758, "grad_norm": 1.1584316767595777, "learning_rate": 8.919583885429537e-06, "loss": 0.5973, "step": 4174 }, { "epoch": 0.22529814904754195, "grad_norm": 1.0531476263513175, "learning_rate": 8.919087932161182e-06, "loss": 0.4471, "step": 4175 }, { "epoch": 0.22535211267605634, "grad_norm": 1.1209996820580306, "learning_rate": 8.918591880624783e-06, "loss": 0.6233, "step": 4176 }, { "epoch": 0.2254060763045707, "grad_norm": 0.9819268238797572, "learning_rate": 8.9180957308346e-06, "loss": 0.4733, "step": 4177 }, { "epoch": 0.2254600399330851, "grad_norm": 0.970423572480629, "learning_rate": 8.917599482804892e-06, "loss": 0.4026, "step": 4178 }, { "epoch": 0.22551400356159948, "grad_norm": 0.9536961469082397, "learning_rate": 8.91710313654992e-06, "loss": 0.4697, "step": 4179 }, { "epoch": 0.22556796719011385, "grad_norm": 1.0970101624574227, "learning_rate": 8.916606692083953e-06, "loss": 0.554, "step": 4180 }, { "epoch": 0.22562193081862825, "grad_norm": 1.2337050362657989, "learning_rate": 8.916110149421257e-06, "loss": 0.5846, "step": 4181 }, { "epoch": 0.22567589444714262, "grad_norm": 0.9530236030275984, "learning_rate": 8.915613508576104e-06, "loss": 0.4349, "step": 4182 }, { "epoch": 0.22572985807565701, "grad_norm": 1.0512358300529536, "learning_rate": 8.915116769562768e-06, "loss": 0.5618, "step": 4183 }, { "epoch": 0.22578382170417138, "grad_norm": 0.7197642291378159, "learning_rate": 8.914619932395523e-06, "loss": 0.2788, "step": 4184 }, { "epoch": 0.22583778533268578, "grad_norm": 1.165800358170503, "learning_rate": 8.914122997088652e-06, "loss": 0.71, "step": 4185 }, { "epoch": 0.22589174896120015, "grad_norm": 1.1494566248230678, "learning_rate": 8.913625963656437e-06, "loss": 0.6532, "step": 4186 }, { "epoch": 0.22594571258971452, "grad_norm": 1.0426722431275415, "learning_rate": 8.913128832113164e-06, "loss": 0.4893, "step": 4187 }, { "epoch": 0.22599967621822892, "grad_norm": 0.9716829600960734, "learning_rate": 8.912631602473117e-06, "loss": 0.585, "step": 4188 }, { "epoch": 0.2260536398467433, "grad_norm": 0.8563201914162332, "learning_rate": 8.91213427475059e-06, "loss": 0.3973, "step": 4189 }, { "epoch": 0.22610760347525768, "grad_norm": 1.2341370433452616, "learning_rate": 8.911636848959876e-06, "loss": 0.6115, "step": 4190 }, { "epoch": 0.22616156710377205, "grad_norm": 1.1409364542777995, "learning_rate": 8.911139325115274e-06, "loss": 0.4954, "step": 4191 }, { "epoch": 0.22621553073228645, "grad_norm": 1.0698605080022052, "learning_rate": 8.910641703231079e-06, "loss": 0.5339, "step": 4192 }, { "epoch": 0.22626949436080082, "grad_norm": 0.9940865210791319, "learning_rate": 8.910143983321596e-06, "loss": 0.5827, "step": 4193 }, { "epoch": 0.2263234579893152, "grad_norm": 1.2022944062679384, "learning_rate": 8.909646165401128e-06, "loss": 0.5125, "step": 4194 }, { "epoch": 0.2263774216178296, "grad_norm": 1.1178263358501914, "learning_rate": 8.909148249483984e-06, "loss": 0.5641, "step": 4195 }, { "epoch": 0.22643138524634396, "grad_norm": 0.7443849723704626, "learning_rate": 8.908650235584477e-06, "loss": 0.2891, "step": 4196 }, { "epoch": 0.22648534887485836, "grad_norm": 1.040331495967735, "learning_rate": 8.908152123716917e-06, "loss": 0.4695, "step": 4197 }, { "epoch": 0.22653931250337273, "grad_norm": 1.0893988743850729, "learning_rate": 8.907653913895622e-06, "loss": 0.4992, "step": 4198 }, { "epoch": 0.2265932761318871, "grad_norm": 1.0320143366137184, "learning_rate": 8.907155606134907e-06, "loss": 0.5432, "step": 4199 }, { "epoch": 0.2266472397604015, "grad_norm": 1.0771158856277476, "learning_rate": 8.9066572004491e-06, "loss": 0.6623, "step": 4200 }, { "epoch": 0.22670120338891586, "grad_norm": 1.3125658385317989, "learning_rate": 8.906158696852523e-06, "loss": 0.5609, "step": 4201 }, { "epoch": 0.22675516701743026, "grad_norm": 0.7501022545065628, "learning_rate": 8.905660095359502e-06, "loss": 0.3314, "step": 4202 }, { "epoch": 0.22680913064594463, "grad_norm": 1.0513668045645406, "learning_rate": 8.90516139598437e-06, "loss": 0.38, "step": 4203 }, { "epoch": 0.22686309427445903, "grad_norm": 1.1045705465964784, "learning_rate": 8.904662598741457e-06, "loss": 0.4299, "step": 4204 }, { "epoch": 0.2269170579029734, "grad_norm": 1.0976583189423483, "learning_rate": 8.904163703645102e-06, "loss": 0.6709, "step": 4205 }, { "epoch": 0.22697102153148777, "grad_norm": 0.9951642942765463, "learning_rate": 8.903664710709643e-06, "loss": 0.5125, "step": 4206 }, { "epoch": 0.22702498516000216, "grad_norm": 0.9617935680681392, "learning_rate": 8.90316561994942e-06, "loss": 0.4374, "step": 4207 }, { "epoch": 0.22707894878851653, "grad_norm": 0.998701680744596, "learning_rate": 8.90266643137878e-06, "loss": 0.4762, "step": 4208 }, { "epoch": 0.22713291241703093, "grad_norm": 0.949788765590773, "learning_rate": 8.902167145012067e-06, "loss": 0.4375, "step": 4209 }, { "epoch": 0.2271868760455453, "grad_norm": 1.1922421756692756, "learning_rate": 8.901667760863633e-06, "loss": 0.5052, "step": 4210 }, { "epoch": 0.2272408396740597, "grad_norm": 1.0026072902164263, "learning_rate": 8.901168278947832e-06, "loss": 0.4409, "step": 4211 }, { "epoch": 0.22729480330257407, "grad_norm": 1.1952331852131484, "learning_rate": 8.900668699279015e-06, "loss": 0.4889, "step": 4212 }, { "epoch": 0.22734876693108844, "grad_norm": 0.955925820758912, "learning_rate": 8.900169021871544e-06, "loss": 0.4665, "step": 4213 }, { "epoch": 0.22740273055960283, "grad_norm": 1.1559229761283465, "learning_rate": 8.899669246739783e-06, "loss": 0.7371, "step": 4214 }, { "epoch": 0.2274566941881172, "grad_norm": 1.1060319091781599, "learning_rate": 8.899169373898091e-06, "loss": 0.5395, "step": 4215 }, { "epoch": 0.2275106578166316, "grad_norm": 1.0297372939628304, "learning_rate": 8.898669403360835e-06, "loss": 0.4694, "step": 4216 }, { "epoch": 0.22756462144514597, "grad_norm": 0.913239694210688, "learning_rate": 8.898169335142388e-06, "loss": 0.4383, "step": 4217 }, { "epoch": 0.22761858507366034, "grad_norm": 1.0180784321261644, "learning_rate": 8.89766916925712e-06, "loss": 0.4424, "step": 4218 }, { "epoch": 0.22767254870217474, "grad_norm": 0.8786052806762851, "learning_rate": 8.897168905719406e-06, "loss": 0.4493, "step": 4219 }, { "epoch": 0.2277265123306891, "grad_norm": 1.171917696377168, "learning_rate": 8.896668544543628e-06, "loss": 0.5056, "step": 4220 }, { "epoch": 0.2277804759592035, "grad_norm": 1.1776391001404922, "learning_rate": 8.89616808574416e-06, "loss": 0.5048, "step": 4221 }, { "epoch": 0.22783443958771787, "grad_norm": 1.012316150941073, "learning_rate": 8.895667529335393e-06, "loss": 0.4847, "step": 4222 }, { "epoch": 0.22788840321623227, "grad_norm": 0.9242215709219594, "learning_rate": 8.89516687533171e-06, "loss": 0.3184, "step": 4223 }, { "epoch": 0.22794236684474664, "grad_norm": 1.0644696411350414, "learning_rate": 8.894666123747497e-06, "loss": 0.5301, "step": 4224 }, { "epoch": 0.227996330473261, "grad_norm": 1.2414733290516247, "learning_rate": 8.894165274597153e-06, "loss": 0.5959, "step": 4225 }, { "epoch": 0.2280502941017754, "grad_norm": 1.2311808232261539, "learning_rate": 8.893664327895067e-06, "loss": 0.5504, "step": 4226 }, { "epoch": 0.22810425773028978, "grad_norm": 1.1023302476738075, "learning_rate": 8.893163283655642e-06, "loss": 0.4164, "step": 4227 }, { "epoch": 0.22815822135880418, "grad_norm": 1.1397767435811703, "learning_rate": 8.892662141893274e-06, "loss": 0.6805, "step": 4228 }, { "epoch": 0.22821218498731854, "grad_norm": 1.003397468731344, "learning_rate": 8.892160902622368e-06, "loss": 0.5144, "step": 4229 }, { "epoch": 0.22826614861583294, "grad_norm": 1.0697723309582954, "learning_rate": 8.891659565857331e-06, "loss": 0.4131, "step": 4230 }, { "epoch": 0.2283201122443473, "grad_norm": 1.0861622113310254, "learning_rate": 8.891158131612568e-06, "loss": 0.4786, "step": 4231 }, { "epoch": 0.22837407587286168, "grad_norm": 0.9903712633444866, "learning_rate": 8.890656599902497e-06, "loss": 0.4743, "step": 4232 }, { "epoch": 0.22842803950137608, "grad_norm": 1.0875230942155125, "learning_rate": 8.890154970741527e-06, "loss": 0.5137, "step": 4233 }, { "epoch": 0.22848200312989045, "grad_norm": 0.7760738260224667, "learning_rate": 8.88965324414408e-06, "loss": 0.2923, "step": 4234 }, { "epoch": 0.22853596675840485, "grad_norm": 1.1025028646817099, "learning_rate": 8.889151420124573e-06, "loss": 0.5875, "step": 4235 }, { "epoch": 0.22858993038691922, "grad_norm": 0.9491971020061925, "learning_rate": 8.88864949869743e-06, "loss": 0.5046, "step": 4236 }, { "epoch": 0.22864389401543359, "grad_norm": 0.8726322996835082, "learning_rate": 8.888147479877076e-06, "loss": 0.3897, "step": 4237 }, { "epoch": 0.22869785764394798, "grad_norm": 1.0318043402707135, "learning_rate": 8.887645363677939e-06, "loss": 0.4022, "step": 4238 }, { "epoch": 0.22875182127246235, "grad_norm": 0.9883519579100625, "learning_rate": 8.887143150114453e-06, "loss": 0.4923, "step": 4239 }, { "epoch": 0.22880578490097675, "grad_norm": 0.8780799273560774, "learning_rate": 8.88664083920105e-06, "loss": 0.4429, "step": 4240 }, { "epoch": 0.22885974852949112, "grad_norm": 1.0400042114873702, "learning_rate": 8.886138430952166e-06, "loss": 0.3872, "step": 4241 }, { "epoch": 0.22891371215800552, "grad_norm": 1.0524801245914936, "learning_rate": 8.885635925382244e-06, "loss": 0.4657, "step": 4242 }, { "epoch": 0.2289676757865199, "grad_norm": 0.8991194025114374, "learning_rate": 8.885133322505724e-06, "loss": 0.5374, "step": 4243 }, { "epoch": 0.22902163941503426, "grad_norm": 0.9400100698539551, "learning_rate": 8.88463062233705e-06, "loss": 0.3829, "step": 4244 }, { "epoch": 0.22907560304354865, "grad_norm": 0.9265237855648651, "learning_rate": 8.884127824890673e-06, "loss": 0.3829, "step": 4245 }, { "epoch": 0.22912956667206302, "grad_norm": 1.1374307795545215, "learning_rate": 8.883624930181045e-06, "loss": 0.5186, "step": 4246 }, { "epoch": 0.22918353030057742, "grad_norm": 1.3715941049808282, "learning_rate": 8.883121938222614e-06, "loss": 0.6195, "step": 4247 }, { "epoch": 0.2292374939290918, "grad_norm": 0.962725685851421, "learning_rate": 8.882618849029844e-06, "loss": 0.4638, "step": 4248 }, { "epoch": 0.22929145755760616, "grad_norm": 0.9344335651635133, "learning_rate": 8.882115662617188e-06, "loss": 0.4889, "step": 4249 }, { "epoch": 0.22934542118612056, "grad_norm": 1.21679266699377, "learning_rate": 8.88161237899911e-06, "loss": 0.5355, "step": 4250 }, { "epoch": 0.22939938481463493, "grad_norm": 0.952017146765123, "learning_rate": 8.881108998190077e-06, "loss": 0.523, "step": 4251 }, { "epoch": 0.22945334844314932, "grad_norm": 1.053957002530713, "learning_rate": 8.880605520204554e-06, "loss": 0.4929, "step": 4252 }, { "epoch": 0.2295073120716637, "grad_norm": 1.0988128107147797, "learning_rate": 8.880101945057012e-06, "loss": 0.452, "step": 4253 }, { "epoch": 0.2295612757001781, "grad_norm": 1.1092556005888314, "learning_rate": 8.879598272761927e-06, "loss": 0.5861, "step": 4254 }, { "epoch": 0.22961523932869246, "grad_norm": 1.184420475611664, "learning_rate": 8.879094503333772e-06, "loss": 0.5513, "step": 4255 }, { "epoch": 0.22966920295720683, "grad_norm": 1.3435321256644301, "learning_rate": 8.878590636787024e-06, "loss": 0.8189, "step": 4256 }, { "epoch": 0.22972316658572123, "grad_norm": 0.9108882913517538, "learning_rate": 8.878086673136169e-06, "loss": 0.3758, "step": 4257 }, { "epoch": 0.2297771302142356, "grad_norm": 1.1369536192820615, "learning_rate": 8.87758261239569e-06, "loss": 0.493, "step": 4258 }, { "epoch": 0.22983109384275, "grad_norm": 0.8503592174469613, "learning_rate": 8.877078454580075e-06, "loss": 0.3754, "step": 4259 }, { "epoch": 0.22988505747126436, "grad_norm": 0.8446263629628106, "learning_rate": 8.87657419970381e-06, "loss": 0.3653, "step": 4260 }, { "epoch": 0.22993902109977876, "grad_norm": 1.209461872833154, "learning_rate": 8.876069847781394e-06, "loss": 0.7052, "step": 4261 }, { "epoch": 0.22999298472829313, "grad_norm": 1.356339812298453, "learning_rate": 8.875565398827318e-06, "loss": 0.5134, "step": 4262 }, { "epoch": 0.2300469483568075, "grad_norm": 1.1153141163887883, "learning_rate": 8.875060852856082e-06, "loss": 0.471, "step": 4263 }, { "epoch": 0.2301009119853219, "grad_norm": 0.9865368783059812, "learning_rate": 8.874556209882185e-06, "loss": 0.3982, "step": 4264 }, { "epoch": 0.23015487561383627, "grad_norm": 0.9855722530215885, "learning_rate": 8.874051469920135e-06, "loss": 0.525, "step": 4265 }, { "epoch": 0.23020883924235067, "grad_norm": 1.0158313877708975, "learning_rate": 8.873546632984435e-06, "loss": 0.4927, "step": 4266 }, { "epoch": 0.23026280287086504, "grad_norm": 0.9871122763443364, "learning_rate": 8.873041699089597e-06, "loss": 0.3966, "step": 4267 }, { "epoch": 0.2303167664993794, "grad_norm": 0.91906101940436, "learning_rate": 8.872536668250132e-06, "loss": 0.488, "step": 4268 }, { "epoch": 0.2303707301278938, "grad_norm": 0.8603050805399844, "learning_rate": 8.872031540480557e-06, "loss": 0.4352, "step": 4269 }, { "epoch": 0.23042469375640817, "grad_norm": 1.0133474465620715, "learning_rate": 8.871526315795386e-06, "loss": 0.4464, "step": 4270 }, { "epoch": 0.23047865738492257, "grad_norm": 1.111747218373013, "learning_rate": 8.871020994209143e-06, "loss": 0.4935, "step": 4271 }, { "epoch": 0.23053262101343694, "grad_norm": 1.0149561846708304, "learning_rate": 8.87051557573635e-06, "loss": 0.5624, "step": 4272 }, { "epoch": 0.23058658464195134, "grad_norm": 0.9442732151882096, "learning_rate": 8.870010060391534e-06, "loss": 0.4153, "step": 4273 }, { "epoch": 0.2306405482704657, "grad_norm": 1.0096363296590476, "learning_rate": 8.869504448189223e-06, "loss": 0.495, "step": 4274 }, { "epoch": 0.23069451189898008, "grad_norm": 0.90175713182598, "learning_rate": 8.86899873914395e-06, "loss": 0.4138, "step": 4275 }, { "epoch": 0.23074847552749447, "grad_norm": 1.1139798751347338, "learning_rate": 8.86849293327025e-06, "loss": 0.5741, "step": 4276 }, { "epoch": 0.23080243915600884, "grad_norm": 1.108189972750971, "learning_rate": 8.86798703058266e-06, "loss": 0.549, "step": 4277 }, { "epoch": 0.23085640278452324, "grad_norm": 0.9335382908852994, "learning_rate": 8.867481031095719e-06, "loss": 0.5081, "step": 4278 }, { "epoch": 0.2309103664130376, "grad_norm": 1.0813560804343791, "learning_rate": 8.866974934823969e-06, "loss": 0.4865, "step": 4279 }, { "epoch": 0.230964330041552, "grad_norm": 1.0954516980452589, "learning_rate": 8.86646874178196e-06, "loss": 0.5032, "step": 4280 }, { "epoch": 0.23101829367006638, "grad_norm": 0.8311954357144007, "learning_rate": 8.865962451984235e-06, "loss": 0.3464, "step": 4281 }, { "epoch": 0.23107225729858075, "grad_norm": 0.8966321243707828, "learning_rate": 8.865456065445348e-06, "loss": 0.3913, "step": 4282 }, { "epoch": 0.23112622092709514, "grad_norm": 0.9846125816863257, "learning_rate": 8.864949582179854e-06, "loss": 0.4771, "step": 4283 }, { "epoch": 0.2311801845556095, "grad_norm": 0.8726933707142843, "learning_rate": 8.86444300220231e-06, "loss": 0.5076, "step": 4284 }, { "epoch": 0.2312341481841239, "grad_norm": 1.3004201856524287, "learning_rate": 8.863936325527274e-06, "loss": 0.5173, "step": 4285 }, { "epoch": 0.23128811181263828, "grad_norm": 1.0130975329242455, "learning_rate": 8.86342955216931e-06, "loss": 0.5646, "step": 4286 }, { "epoch": 0.23134207544115265, "grad_norm": 1.030676721432551, "learning_rate": 8.862922682142981e-06, "loss": 0.4488, "step": 4287 }, { "epoch": 0.23139603906966705, "grad_norm": 1.008906818299179, "learning_rate": 8.862415715462855e-06, "loss": 0.4991, "step": 4288 }, { "epoch": 0.23145000269818142, "grad_norm": 1.0543323730433567, "learning_rate": 8.861908652143506e-06, "loss": 0.5383, "step": 4289 }, { "epoch": 0.23150396632669581, "grad_norm": 1.1115893777788908, "learning_rate": 8.861401492199505e-06, "loss": 0.415, "step": 4290 }, { "epoch": 0.23155792995521018, "grad_norm": 0.8637530526451926, "learning_rate": 8.860894235645427e-06, "loss": 0.3965, "step": 4291 }, { "epoch": 0.23161189358372458, "grad_norm": 1.1014743917742287, "learning_rate": 8.860386882495854e-06, "loss": 0.6244, "step": 4292 }, { "epoch": 0.23166585721223895, "grad_norm": 0.8478111687240644, "learning_rate": 8.859879432765366e-06, "loss": 0.3925, "step": 4293 }, { "epoch": 0.23171982084075332, "grad_norm": 1.08294437995867, "learning_rate": 8.859371886468549e-06, "loss": 0.5316, "step": 4294 }, { "epoch": 0.23177378446926772, "grad_norm": 0.9591093469183052, "learning_rate": 8.85886424361999e-06, "loss": 0.3865, "step": 4295 }, { "epoch": 0.2318277480977821, "grad_norm": 0.9580500099307673, "learning_rate": 8.858356504234278e-06, "loss": 0.4226, "step": 4296 }, { "epoch": 0.23188171172629649, "grad_norm": 1.2238897863570526, "learning_rate": 8.857848668326005e-06, "loss": 0.6211, "step": 4297 }, { "epoch": 0.23193567535481086, "grad_norm": 1.15164731754067, "learning_rate": 8.857340735909772e-06, "loss": 0.4269, "step": 4298 }, { "epoch": 0.23198963898332525, "grad_norm": 0.9037249263569062, "learning_rate": 8.856832707000172e-06, "loss": 0.3706, "step": 4299 }, { "epoch": 0.23204360261183962, "grad_norm": 1.1237870327045316, "learning_rate": 8.856324581611808e-06, "loss": 0.421, "step": 4300 }, { "epoch": 0.232097566240354, "grad_norm": 0.8781357554467456, "learning_rate": 8.855816359759283e-06, "loss": 0.3878, "step": 4301 }, { "epoch": 0.2321515298688684, "grad_norm": 0.8677405088798672, "learning_rate": 8.855308041457208e-06, "loss": 0.3667, "step": 4302 }, { "epoch": 0.23220549349738276, "grad_norm": 1.1553070055635584, "learning_rate": 8.854799626720187e-06, "loss": 0.6579, "step": 4303 }, { "epoch": 0.23225945712589716, "grad_norm": 0.9748291144533766, "learning_rate": 8.854291115562837e-06, "loss": 0.4439, "step": 4304 }, { "epoch": 0.23231342075441153, "grad_norm": 1.029576453505428, "learning_rate": 8.853782507999769e-06, "loss": 0.5533, "step": 4305 }, { "epoch": 0.2323673843829259, "grad_norm": 1.19724482777032, "learning_rate": 8.853273804045605e-06, "loss": 0.4949, "step": 4306 }, { "epoch": 0.2324213480114403, "grad_norm": 1.035621633122897, "learning_rate": 8.852765003714963e-06, "loss": 0.495, "step": 4307 }, { "epoch": 0.23247531163995466, "grad_norm": 1.1270690519523168, "learning_rate": 8.852256107022465e-06, "loss": 0.4842, "step": 4308 }, { "epoch": 0.23252927526846906, "grad_norm": 1.0166842657456197, "learning_rate": 8.85174711398274e-06, "loss": 0.4985, "step": 4309 }, { "epoch": 0.23258323889698343, "grad_norm": 1.2902067126071957, "learning_rate": 8.851238024610418e-06, "loss": 0.5769, "step": 4310 }, { "epoch": 0.23263720252549783, "grad_norm": 0.9294636511265644, "learning_rate": 8.85072883892013e-06, "loss": 0.5055, "step": 4311 }, { "epoch": 0.2326911661540122, "grad_norm": 1.0175295390551642, "learning_rate": 8.850219556926506e-06, "loss": 0.5916, "step": 4312 }, { "epoch": 0.23274512978252657, "grad_norm": 0.960058267579395, "learning_rate": 8.84971017864419e-06, "loss": 0.4849, "step": 4313 }, { "epoch": 0.23279909341104096, "grad_norm": 0.8607329467182275, "learning_rate": 8.849200704087818e-06, "loss": 0.453, "step": 4314 }, { "epoch": 0.23285305703955533, "grad_norm": 1.1464748530021955, "learning_rate": 8.848691133272033e-06, "loss": 0.6183, "step": 4315 }, { "epoch": 0.23290702066806973, "grad_norm": 1.066488444254917, "learning_rate": 8.84818146621148e-06, "loss": 0.4407, "step": 4316 }, { "epoch": 0.2329609842965841, "grad_norm": 1.4024568753450393, "learning_rate": 8.84767170292081e-06, "loss": 0.7498, "step": 4317 }, { "epoch": 0.23301494792509847, "grad_norm": 0.9403023759037108, "learning_rate": 8.847161843414674e-06, "loss": 0.4197, "step": 4318 }, { "epoch": 0.23306891155361287, "grad_norm": 1.1335302063864279, "learning_rate": 8.84665188770772e-06, "loss": 0.4275, "step": 4319 }, { "epoch": 0.23312287518212724, "grad_norm": 1.1692816942166677, "learning_rate": 8.846141835814613e-06, "loss": 0.4992, "step": 4320 }, { "epoch": 0.23317683881064163, "grad_norm": 0.9359651896370513, "learning_rate": 8.845631687750008e-06, "loss": 0.5537, "step": 4321 }, { "epoch": 0.233230802439156, "grad_norm": 1.0666729337528817, "learning_rate": 8.845121443528567e-06, "loss": 0.5709, "step": 4322 }, { "epoch": 0.2332847660676704, "grad_norm": 0.9650158325398492, "learning_rate": 8.844611103164957e-06, "loss": 0.3949, "step": 4323 }, { "epoch": 0.23333872969618477, "grad_norm": 0.8499119303607209, "learning_rate": 8.844100666673845e-06, "loss": 0.3177, "step": 4324 }, { "epoch": 0.23339269332469914, "grad_norm": 0.8397536315262232, "learning_rate": 8.8435901340699e-06, "loss": 0.4382, "step": 4325 }, { "epoch": 0.23344665695321354, "grad_norm": 1.1845849999537605, "learning_rate": 8.843079505367794e-06, "loss": 0.5236, "step": 4326 }, { "epoch": 0.2335006205817279, "grad_norm": 0.8372986391265121, "learning_rate": 8.842568780582206e-06, "loss": 0.4106, "step": 4327 }, { "epoch": 0.2335545842102423, "grad_norm": 1.259611438903873, "learning_rate": 8.842057959727815e-06, "loss": 0.5597, "step": 4328 }, { "epoch": 0.23360854783875667, "grad_norm": 1.0067055960977618, "learning_rate": 8.841547042819301e-06, "loss": 0.4928, "step": 4329 }, { "epoch": 0.23366251146727107, "grad_norm": 0.8551285691669201, "learning_rate": 8.84103602987135e-06, "loss": 0.4835, "step": 4330 }, { "epoch": 0.23371647509578544, "grad_norm": 1.1137524682225863, "learning_rate": 8.840524920898645e-06, "loss": 0.6417, "step": 4331 }, { "epoch": 0.2337704387242998, "grad_norm": 1.191704192076781, "learning_rate": 8.84001371591588e-06, "loss": 0.6284, "step": 4332 }, { "epoch": 0.2338244023528142, "grad_norm": 1.208408802379505, "learning_rate": 8.839502414937745e-06, "loss": 0.5869, "step": 4333 }, { "epoch": 0.23387836598132858, "grad_norm": 1.1256126131622404, "learning_rate": 8.838991017978937e-06, "loss": 0.5215, "step": 4334 }, { "epoch": 0.23393232960984298, "grad_norm": 1.3315455106979754, "learning_rate": 8.838479525054153e-06, "loss": 0.5281, "step": 4335 }, { "epoch": 0.23398629323835735, "grad_norm": 0.9910674903674576, "learning_rate": 8.837967936178095e-06, "loss": 0.4993, "step": 4336 }, { "epoch": 0.23404025686687172, "grad_norm": 1.12609720932742, "learning_rate": 8.837456251365466e-06, "loss": 0.6089, "step": 4337 }, { "epoch": 0.2340942204953861, "grad_norm": 0.9407906525347404, "learning_rate": 8.83694447063097e-06, "loss": 0.3663, "step": 4338 }, { "epoch": 0.23414818412390048, "grad_norm": 1.2233821566603786, "learning_rate": 8.83643259398932e-06, "loss": 0.6363, "step": 4339 }, { "epoch": 0.23420214775241488, "grad_norm": 0.9711851058494575, "learning_rate": 8.835920621455225e-06, "loss": 0.4572, "step": 4340 }, { "epoch": 0.23425611138092925, "grad_norm": 0.8615266852364176, "learning_rate": 8.8354085530434e-06, "loss": 0.4759, "step": 4341 }, { "epoch": 0.23431007500944365, "grad_norm": 1.1759350800963684, "learning_rate": 8.834896388768565e-06, "loss": 0.574, "step": 4342 }, { "epoch": 0.23436403863795802, "grad_norm": 1.184254392667908, "learning_rate": 8.834384128645439e-06, "loss": 0.5895, "step": 4343 }, { "epoch": 0.2344180022664724, "grad_norm": 1.132207594382214, "learning_rate": 8.833871772688741e-06, "loss": 0.5023, "step": 4344 }, { "epoch": 0.23447196589498678, "grad_norm": 0.9776216068914209, "learning_rate": 8.8333593209132e-06, "loss": 0.4647, "step": 4345 }, { "epoch": 0.23452592952350115, "grad_norm": 0.8597875662842117, "learning_rate": 8.832846773333544e-06, "loss": 0.383, "step": 4346 }, { "epoch": 0.23457989315201555, "grad_norm": 1.059448780233623, "learning_rate": 8.832334129964505e-06, "loss": 0.4694, "step": 4347 }, { "epoch": 0.23463385678052992, "grad_norm": 1.0831799127979564, "learning_rate": 8.831821390820815e-06, "loss": 0.4467, "step": 4348 }, { "epoch": 0.23468782040904432, "grad_norm": 1.0980445228647002, "learning_rate": 8.831308555917213e-06, "loss": 0.5461, "step": 4349 }, { "epoch": 0.2347417840375587, "grad_norm": 0.8204431415290188, "learning_rate": 8.830795625268437e-06, "loss": 0.3559, "step": 4350 }, { "epoch": 0.23479574766607306, "grad_norm": 0.9757006411880733, "learning_rate": 8.83028259888923e-06, "loss": 0.5887, "step": 4351 }, { "epoch": 0.23484971129458745, "grad_norm": 1.0592479496790406, "learning_rate": 8.829769476794334e-06, "loss": 0.4722, "step": 4352 }, { "epoch": 0.23490367492310182, "grad_norm": 0.9104820507918483, "learning_rate": 8.8292562589985e-06, "loss": 0.3461, "step": 4353 }, { "epoch": 0.23495763855161622, "grad_norm": 0.9501502804441881, "learning_rate": 8.828742945516478e-06, "loss": 0.3982, "step": 4354 }, { "epoch": 0.2350116021801306, "grad_norm": 1.0975843306868056, "learning_rate": 8.828229536363018e-06, "loss": 0.4222, "step": 4355 }, { "epoch": 0.23506556580864496, "grad_norm": 1.0185578537571258, "learning_rate": 8.82771603155288e-06, "loss": 0.4954, "step": 4356 }, { "epoch": 0.23511952943715936, "grad_norm": 1.1985899142667398, "learning_rate": 8.82720243110082e-06, "loss": 0.7365, "step": 4357 }, { "epoch": 0.23517349306567373, "grad_norm": 1.038119897581883, "learning_rate": 8.8266887350216e-06, "loss": 0.4322, "step": 4358 }, { "epoch": 0.23522745669418813, "grad_norm": 1.1311527530084275, "learning_rate": 8.826174943329986e-06, "loss": 0.6467, "step": 4359 }, { "epoch": 0.2352814203227025, "grad_norm": 1.2843945716059986, "learning_rate": 8.82566105604074e-06, "loss": 0.6104, "step": 4360 }, { "epoch": 0.2353353839512169, "grad_norm": 1.0773562189818182, "learning_rate": 8.825147073168639e-06, "loss": 0.506, "step": 4361 }, { "epoch": 0.23538934757973126, "grad_norm": 1.046791650762785, "learning_rate": 8.824632994728449e-06, "loss": 0.5448, "step": 4362 }, { "epoch": 0.23544331120824563, "grad_norm": 1.23133381745459, "learning_rate": 8.824118820734948e-06, "loss": 0.5516, "step": 4363 }, { "epoch": 0.23549727483676003, "grad_norm": 0.8857200985938232, "learning_rate": 8.823604551202912e-06, "loss": 0.3874, "step": 4364 }, { "epoch": 0.2355512384652744, "grad_norm": 1.0780822095268483, "learning_rate": 8.823090186147125e-06, "loss": 0.5135, "step": 4365 }, { "epoch": 0.2356052020937888, "grad_norm": 1.1562755635990478, "learning_rate": 8.822575725582367e-06, "loss": 0.4561, "step": 4366 }, { "epoch": 0.23565916572230317, "grad_norm": 0.8888984401583951, "learning_rate": 8.822061169523426e-06, "loss": 0.361, "step": 4367 }, { "epoch": 0.23571312935081754, "grad_norm": 1.15908386920342, "learning_rate": 8.82154651798509e-06, "loss": 0.6063, "step": 4368 }, { "epoch": 0.23576709297933193, "grad_norm": 1.0772012280512686, "learning_rate": 8.82103177098215e-06, "loss": 0.5387, "step": 4369 }, { "epoch": 0.2358210566078463, "grad_norm": 0.969742164426776, "learning_rate": 8.820516928529402e-06, "loss": 0.4737, "step": 4370 }, { "epoch": 0.2358750202363607, "grad_norm": 1.0789167882819684, "learning_rate": 8.820001990641644e-06, "loss": 0.5025, "step": 4371 }, { "epoch": 0.23592898386487507, "grad_norm": 0.7671262894132683, "learning_rate": 8.819486957333672e-06, "loss": 0.333, "step": 4372 }, { "epoch": 0.23598294749338947, "grad_norm": 1.0675713687322526, "learning_rate": 8.818971828620291e-06, "loss": 0.3698, "step": 4373 }, { "epoch": 0.23603691112190384, "grad_norm": 1.0901986807445843, "learning_rate": 8.818456604516305e-06, "loss": 0.5537, "step": 4374 }, { "epoch": 0.2360908747504182, "grad_norm": 1.1726107244228667, "learning_rate": 8.817941285036525e-06, "loss": 0.5997, "step": 4375 }, { "epoch": 0.2361448383789326, "grad_norm": 1.0352167424115302, "learning_rate": 8.81742587019576e-06, "loss": 0.4877, "step": 4376 }, { "epoch": 0.23619880200744697, "grad_norm": 0.9938193562936292, "learning_rate": 8.816910360008824e-06, "loss": 0.4993, "step": 4377 }, { "epoch": 0.23625276563596137, "grad_norm": 0.9389130512500483, "learning_rate": 8.816394754490531e-06, "loss": 0.3606, "step": 4378 }, { "epoch": 0.23630672926447574, "grad_norm": 0.9404745506794373, "learning_rate": 8.815879053655702e-06, "loss": 0.3905, "step": 4379 }, { "epoch": 0.23636069289299014, "grad_norm": 0.7739994838801398, "learning_rate": 8.81536325751916e-06, "loss": 0.3042, "step": 4380 }, { "epoch": 0.2364146565215045, "grad_norm": 0.797544898263942, "learning_rate": 8.814847366095728e-06, "loss": 0.4998, "step": 4381 }, { "epoch": 0.23646862015001888, "grad_norm": 0.9841069057124918, "learning_rate": 8.814331379400232e-06, "loss": 0.5548, "step": 4382 }, { "epoch": 0.23652258377853327, "grad_norm": 1.1407222679650029, "learning_rate": 8.813815297447505e-06, "loss": 0.5294, "step": 4383 }, { "epoch": 0.23657654740704764, "grad_norm": 1.191495804750151, "learning_rate": 8.813299120252376e-06, "loss": 0.6453, "step": 4384 }, { "epoch": 0.23663051103556204, "grad_norm": 1.150917484385976, "learning_rate": 8.812782847829682e-06, "loss": 0.5789, "step": 4385 }, { "epoch": 0.2366844746640764, "grad_norm": 1.223642259170739, "learning_rate": 8.812266480194263e-06, "loss": 0.6294, "step": 4386 }, { "epoch": 0.23673843829259078, "grad_norm": 0.8941445824384119, "learning_rate": 8.811750017360957e-06, "loss": 0.4205, "step": 4387 }, { "epoch": 0.23679240192110518, "grad_norm": 1.052619234823713, "learning_rate": 8.811233459344612e-06, "loss": 0.4018, "step": 4388 }, { "epoch": 0.23684636554961955, "grad_norm": 1.084935333204135, "learning_rate": 8.810716806160071e-06, "loss": 0.5192, "step": 4389 }, { "epoch": 0.23690032917813394, "grad_norm": 0.9903791009425426, "learning_rate": 8.810200057822183e-06, "loss": 0.5643, "step": 4390 }, { "epoch": 0.23695429280664831, "grad_norm": 1.1959885157959225, "learning_rate": 8.8096832143458e-06, "loss": 0.6143, "step": 4391 }, { "epoch": 0.2370082564351627, "grad_norm": 1.2361495594681793, "learning_rate": 8.809166275745778e-06, "loss": 0.4855, "step": 4392 }, { "epoch": 0.23706222006367708, "grad_norm": 1.4232033412954477, "learning_rate": 8.808649242036973e-06, "loss": 0.7707, "step": 4393 }, { "epoch": 0.23711618369219145, "grad_norm": 0.8590172392863666, "learning_rate": 8.808132113234246e-06, "loss": 0.366, "step": 4394 }, { "epoch": 0.23717014732070585, "grad_norm": 1.1060641175115506, "learning_rate": 8.80761488935246e-06, "loss": 0.5636, "step": 4395 }, { "epoch": 0.23722411094922022, "grad_norm": 1.4626189751077419, "learning_rate": 8.807097570406479e-06, "loss": 0.592, "step": 4396 }, { "epoch": 0.23727807457773462, "grad_norm": 0.9107326919099172, "learning_rate": 8.806580156411171e-06, "loss": 0.4456, "step": 4397 }, { "epoch": 0.23733203820624899, "grad_norm": 0.9756624810973178, "learning_rate": 8.80606264738141e-06, "loss": 0.5083, "step": 4398 }, { "epoch": 0.23738600183476338, "grad_norm": 1.041742857273947, "learning_rate": 8.805545043332068e-06, "loss": 0.4233, "step": 4399 }, { "epoch": 0.23743996546327775, "grad_norm": 1.0038686305036868, "learning_rate": 8.805027344278022e-06, "loss": 0.4605, "step": 4400 }, { "epoch": 0.23749392909179212, "grad_norm": 1.1723838648696772, "learning_rate": 8.80450955023415e-06, "loss": 0.7379, "step": 4401 }, { "epoch": 0.23754789272030652, "grad_norm": 1.0051713261276964, "learning_rate": 8.803991661215334e-06, "loss": 0.4267, "step": 4402 }, { "epoch": 0.2376018563488209, "grad_norm": 1.0785563032732313, "learning_rate": 8.803473677236461e-06, "loss": 0.585, "step": 4403 }, { "epoch": 0.2376558199773353, "grad_norm": 1.2915591474910588, "learning_rate": 8.802955598312416e-06, "loss": 0.5827, "step": 4404 }, { "epoch": 0.23770978360584966, "grad_norm": 0.9870151625822382, "learning_rate": 8.80243742445809e-06, "loss": 0.4676, "step": 4405 }, { "epoch": 0.23776374723436403, "grad_norm": 1.059162165622893, "learning_rate": 8.801919155688374e-06, "loss": 0.4648, "step": 4406 }, { "epoch": 0.23781771086287842, "grad_norm": 0.844387260639048, "learning_rate": 8.801400792018166e-06, "loss": 0.4754, "step": 4407 }, { "epoch": 0.2378716744913928, "grad_norm": 1.1646843032869052, "learning_rate": 8.800882333462361e-06, "loss": 0.4835, "step": 4408 }, { "epoch": 0.2379256381199072, "grad_norm": 1.082992591250399, "learning_rate": 8.800363780035865e-06, "loss": 0.6019, "step": 4409 }, { "epoch": 0.23797960174842156, "grad_norm": 1.0856228009803985, "learning_rate": 8.799845131753581e-06, "loss": 0.5902, "step": 4410 }, { "epoch": 0.23803356537693596, "grad_norm": 0.9463257680748529, "learning_rate": 8.799326388630409e-06, "loss": 0.3682, "step": 4411 }, { "epoch": 0.23808752900545033, "grad_norm": 1.231707749218174, "learning_rate": 8.798807550681266e-06, "loss": 0.634, "step": 4412 }, { "epoch": 0.2381414926339647, "grad_norm": 0.9902154080387326, "learning_rate": 8.798288617921061e-06, "loss": 0.5658, "step": 4413 }, { "epoch": 0.2381954562624791, "grad_norm": 0.956782362443882, "learning_rate": 8.797769590364706e-06, "loss": 0.5058, "step": 4414 }, { "epoch": 0.23824941989099346, "grad_norm": 0.9829207728761068, "learning_rate": 8.797250468027124e-06, "loss": 0.5784, "step": 4415 }, { "epoch": 0.23830338351950786, "grad_norm": 0.9314631121396891, "learning_rate": 8.796731250923229e-06, "loss": 0.4859, "step": 4416 }, { "epoch": 0.23835734714802223, "grad_norm": 1.1093704822387043, "learning_rate": 8.796211939067948e-06, "loss": 0.506, "step": 4417 }, { "epoch": 0.23841131077653663, "grad_norm": 1.118440513473199, "learning_rate": 8.795692532476206e-06, "loss": 0.5183, "step": 4418 }, { "epoch": 0.238465274405051, "grad_norm": 1.0125155068683966, "learning_rate": 8.79517303116293e-06, "loss": 0.5391, "step": 4419 }, { "epoch": 0.23851923803356537, "grad_norm": 1.0554181451704647, "learning_rate": 8.794653435143052e-06, "loss": 0.4988, "step": 4420 }, { "epoch": 0.23857320166207976, "grad_norm": 0.9053544133653306, "learning_rate": 8.794133744431504e-06, "loss": 0.5167, "step": 4421 }, { "epoch": 0.23862716529059413, "grad_norm": 1.2086001400434754, "learning_rate": 8.793613959043224e-06, "loss": 0.618, "step": 4422 }, { "epoch": 0.23868112891910853, "grad_norm": 1.2116479905549473, "learning_rate": 8.793094078993152e-06, "loss": 0.5234, "step": 4423 }, { "epoch": 0.2387350925476229, "grad_norm": 0.9732075073145292, "learning_rate": 8.792574104296227e-06, "loss": 0.4703, "step": 4424 }, { "epoch": 0.23878905617613727, "grad_norm": 1.0514097967596792, "learning_rate": 8.792054034967396e-06, "loss": 0.6827, "step": 4425 }, { "epoch": 0.23884301980465167, "grad_norm": 1.1053612680069402, "learning_rate": 8.791533871021607e-06, "loss": 0.5606, "step": 4426 }, { "epoch": 0.23889698343316604, "grad_norm": 1.0476676490188268, "learning_rate": 8.791013612473808e-06, "loss": 0.4679, "step": 4427 }, { "epoch": 0.23895094706168044, "grad_norm": 1.194465980594172, "learning_rate": 8.790493259338951e-06, "loss": 0.5016, "step": 4428 }, { "epoch": 0.2390049106901948, "grad_norm": 0.9391728881187966, "learning_rate": 8.789972811631995e-06, "loss": 0.5013, "step": 4429 }, { "epoch": 0.2390588743187092, "grad_norm": 1.2862266063235424, "learning_rate": 8.789452269367895e-06, "loss": 0.6144, "step": 4430 }, { "epoch": 0.23911283794722357, "grad_norm": 0.9844588027101957, "learning_rate": 8.788931632561614e-06, "loss": 0.4269, "step": 4431 }, { "epoch": 0.23916680157573794, "grad_norm": 1.1532586292136673, "learning_rate": 8.788410901228112e-06, "loss": 0.4794, "step": 4432 }, { "epoch": 0.23922076520425234, "grad_norm": 0.8392037506548712, "learning_rate": 8.787890075382362e-06, "loss": 0.307, "step": 4433 }, { "epoch": 0.2392747288327667, "grad_norm": 0.9301486269190737, "learning_rate": 8.787369155039327e-06, "loss": 0.4579, "step": 4434 }, { "epoch": 0.2393286924612811, "grad_norm": 1.0157799882379444, "learning_rate": 8.78684814021398e-06, "loss": 0.4332, "step": 4435 }, { "epoch": 0.23938265608979548, "grad_norm": 1.0209303728763566, "learning_rate": 8.786327030921297e-06, "loss": 0.6182, "step": 4436 }, { "epoch": 0.23943661971830985, "grad_norm": 0.8414072933796239, "learning_rate": 8.785805827176256e-06, "loss": 0.3736, "step": 4437 }, { "epoch": 0.23949058334682424, "grad_norm": 1.114723735839323, "learning_rate": 8.785284528993832e-06, "loss": 0.684, "step": 4438 }, { "epoch": 0.2395445469753386, "grad_norm": 1.2288926229374904, "learning_rate": 8.784763136389015e-06, "loss": 0.5597, "step": 4439 }, { "epoch": 0.239598510603853, "grad_norm": 1.1283941129174084, "learning_rate": 8.784241649376784e-06, "loss": 0.5406, "step": 4440 }, { "epoch": 0.23965247423236738, "grad_norm": 1.1032518288304136, "learning_rate": 8.78372006797213e-06, "loss": 0.501, "step": 4441 }, { "epoch": 0.23970643786088178, "grad_norm": 0.8231821667735801, "learning_rate": 8.783198392190043e-06, "loss": 0.4031, "step": 4442 }, { "epoch": 0.23976040148939615, "grad_norm": 0.9304145448977094, "learning_rate": 8.782676622045517e-06, "loss": 0.4174, "step": 4443 }, { "epoch": 0.23981436511791052, "grad_norm": 0.9457067954719116, "learning_rate": 8.78215475755355e-06, "loss": 0.3959, "step": 4444 }, { "epoch": 0.2398683287464249, "grad_norm": 0.9999599936551482, "learning_rate": 8.781632798729137e-06, "loss": 0.4854, "step": 4445 }, { "epoch": 0.23992229237493928, "grad_norm": 1.0319051381283428, "learning_rate": 8.781110745587281e-06, "loss": 0.5168, "step": 4446 }, { "epoch": 0.23997625600345368, "grad_norm": 1.0042353296128097, "learning_rate": 8.780588598142987e-06, "loss": 0.4266, "step": 4447 }, { "epoch": 0.24003021963196805, "grad_norm": 0.994443631715621, "learning_rate": 8.780066356411262e-06, "loss": 0.5309, "step": 4448 }, { "epoch": 0.24008418326048245, "grad_norm": 1.0916162812550392, "learning_rate": 8.779544020407116e-06, "loss": 0.4831, "step": 4449 }, { "epoch": 0.24013814688899682, "grad_norm": 1.0945821112302112, "learning_rate": 8.779021590145562e-06, "loss": 0.4311, "step": 4450 }, { "epoch": 0.2401921105175112, "grad_norm": 1.0857189120778294, "learning_rate": 8.778499065641614e-06, "loss": 0.6337, "step": 4451 }, { "epoch": 0.24024607414602558, "grad_norm": 0.8548613471329187, "learning_rate": 8.77797644691029e-06, "loss": 0.3708, "step": 4452 }, { "epoch": 0.24030003777453995, "grad_norm": 0.8836519630135827, "learning_rate": 8.777453733966609e-06, "loss": 0.4169, "step": 4453 }, { "epoch": 0.24035400140305435, "grad_norm": 1.0071023958163081, "learning_rate": 8.776930926825598e-06, "loss": 0.6848, "step": 4454 }, { "epoch": 0.24040796503156872, "grad_norm": 0.7260458908256008, "learning_rate": 8.776408025502281e-06, "loss": 0.3105, "step": 4455 }, { "epoch": 0.2404619286600831, "grad_norm": 0.8954039160270562, "learning_rate": 8.775885030011686e-06, "loss": 0.4436, "step": 4456 }, { "epoch": 0.2405158922885975, "grad_norm": 0.7670009622812443, "learning_rate": 8.775361940368845e-06, "loss": 0.2746, "step": 4457 }, { "epoch": 0.24056985591711186, "grad_norm": 1.176189635313819, "learning_rate": 8.774838756588794e-06, "loss": 0.5389, "step": 4458 }, { "epoch": 0.24062381954562626, "grad_norm": 0.9860281371505387, "learning_rate": 8.774315478686566e-06, "loss": 0.3606, "step": 4459 }, { "epoch": 0.24067778317414062, "grad_norm": 0.9569033458670732, "learning_rate": 8.773792106677203e-06, "loss": 0.4682, "step": 4460 }, { "epoch": 0.24073174680265502, "grad_norm": 0.852850415670999, "learning_rate": 8.773268640575748e-06, "loss": 0.3728, "step": 4461 }, { "epoch": 0.2407857104311694, "grad_norm": 0.9681582402641141, "learning_rate": 8.772745080397245e-06, "loss": 0.5061, "step": 4462 }, { "epoch": 0.24083967405968376, "grad_norm": 0.8825902441889901, "learning_rate": 8.772221426156741e-06, "loss": 0.5365, "step": 4463 }, { "epoch": 0.24089363768819816, "grad_norm": 1.0585990052253709, "learning_rate": 8.771697677869286e-06, "loss": 0.5712, "step": 4464 }, { "epoch": 0.24094760131671253, "grad_norm": 1.2143698230657496, "learning_rate": 8.771173835549936e-06, "loss": 0.6284, "step": 4465 }, { "epoch": 0.24100156494522693, "grad_norm": 0.9470806233844631, "learning_rate": 8.770649899213744e-06, "loss": 0.3699, "step": 4466 }, { "epoch": 0.2410555285737413, "grad_norm": 1.2449869724174918, "learning_rate": 8.770125868875768e-06, "loss": 0.5524, "step": 4467 }, { "epoch": 0.2411094922022557, "grad_norm": 1.1135492611520865, "learning_rate": 8.769601744551072e-06, "loss": 0.4846, "step": 4468 }, { "epoch": 0.24116345583077006, "grad_norm": 0.893999334963022, "learning_rate": 8.769077526254715e-06, "loss": 0.3236, "step": 4469 }, { "epoch": 0.24121741945928443, "grad_norm": 0.9806389429710375, "learning_rate": 8.768553214001769e-06, "loss": 0.3784, "step": 4470 }, { "epoch": 0.24127138308779883, "grad_norm": 0.971481123607927, "learning_rate": 8.7680288078073e-06, "loss": 0.5386, "step": 4471 }, { "epoch": 0.2413253467163132, "grad_norm": 0.8908321580481223, "learning_rate": 8.767504307686383e-06, "loss": 0.3698, "step": 4472 }, { "epoch": 0.2413793103448276, "grad_norm": 1.1808912837909107, "learning_rate": 8.76697971365409e-06, "loss": 0.604, "step": 4473 }, { "epoch": 0.24143327397334197, "grad_norm": 0.9058774534773671, "learning_rate": 8.766455025725497e-06, "loss": 0.4287, "step": 4474 }, { "epoch": 0.24148723760185634, "grad_norm": 1.0613054771463337, "learning_rate": 8.765930243915689e-06, "loss": 0.585, "step": 4475 }, { "epoch": 0.24154120123037073, "grad_norm": 1.0406556690078919, "learning_rate": 8.765405368239743e-06, "loss": 0.5328, "step": 4476 }, { "epoch": 0.2415951648588851, "grad_norm": 1.1153393510559482, "learning_rate": 8.76488039871275e-06, "loss": 0.4759, "step": 4477 }, { "epoch": 0.2416491284873995, "grad_norm": 1.0594136475555267, "learning_rate": 8.764355335349794e-06, "loss": 0.5356, "step": 4478 }, { "epoch": 0.24170309211591387, "grad_norm": 1.0543849984150937, "learning_rate": 8.763830178165967e-06, "loss": 0.5712, "step": 4479 }, { "epoch": 0.24175705574442827, "grad_norm": 1.1520437234916578, "learning_rate": 8.763304927176363e-06, "loss": 0.5626, "step": 4480 }, { "epoch": 0.24181101937294264, "grad_norm": 0.9355460169629677, "learning_rate": 8.762779582396078e-06, "loss": 0.4356, "step": 4481 }, { "epoch": 0.241864983001457, "grad_norm": 0.8855893023183755, "learning_rate": 8.762254143840211e-06, "loss": 0.3247, "step": 4482 }, { "epoch": 0.2419189466299714, "grad_norm": 1.1672813772507327, "learning_rate": 8.761728611523864e-06, "loss": 0.4471, "step": 4483 }, { "epoch": 0.24197291025848577, "grad_norm": 1.231238592597526, "learning_rate": 8.761202985462141e-06, "loss": 0.6042, "step": 4484 }, { "epoch": 0.24202687388700017, "grad_norm": 1.05754937086084, "learning_rate": 8.760677265670149e-06, "loss": 0.6493, "step": 4485 }, { "epoch": 0.24208083751551454, "grad_norm": 1.0607517925350525, "learning_rate": 8.760151452162996e-06, "loss": 0.4747, "step": 4486 }, { "epoch": 0.24213480114402894, "grad_norm": 1.123298156123676, "learning_rate": 8.759625544955797e-06, "loss": 0.5499, "step": 4487 }, { "epoch": 0.2421887647725433, "grad_norm": 0.9238540280104023, "learning_rate": 8.759099544063668e-06, "loss": 0.4266, "step": 4488 }, { "epoch": 0.24224272840105768, "grad_norm": 0.9390164836135448, "learning_rate": 8.758573449501724e-06, "loss": 0.4411, "step": 4489 }, { "epoch": 0.24229669202957207, "grad_norm": 1.0727487274331275, "learning_rate": 8.758047261285085e-06, "loss": 0.5513, "step": 4490 }, { "epoch": 0.24235065565808644, "grad_norm": 0.9476608451235209, "learning_rate": 8.757520979428878e-06, "loss": 0.458, "step": 4491 }, { "epoch": 0.24240461928660084, "grad_norm": 1.1311171234811987, "learning_rate": 8.756994603948226e-06, "loss": 0.4286, "step": 4492 }, { "epoch": 0.2424585829151152, "grad_norm": 1.1529072357283119, "learning_rate": 8.756468134858258e-06, "loss": 0.58, "step": 4493 }, { "epoch": 0.24251254654362958, "grad_norm": 1.073373146549898, "learning_rate": 8.755941572174104e-06, "loss": 0.4255, "step": 4494 }, { "epoch": 0.24256651017214398, "grad_norm": 1.1113101286638392, "learning_rate": 8.755414915910901e-06, "loss": 0.5512, "step": 4495 }, { "epoch": 0.24262047380065835, "grad_norm": 1.277170819662614, "learning_rate": 8.754888166083783e-06, "loss": 0.5204, "step": 4496 }, { "epoch": 0.24267443742917275, "grad_norm": 0.7686999413496634, "learning_rate": 8.754361322707891e-06, "loss": 0.3238, "step": 4497 }, { "epoch": 0.24272840105768712, "grad_norm": 1.0838427561996915, "learning_rate": 8.753834385798365e-06, "loss": 0.4976, "step": 4498 }, { "epoch": 0.2427823646862015, "grad_norm": 0.9685891805430745, "learning_rate": 8.753307355370353e-06, "loss": 0.46, "step": 4499 }, { "epoch": 0.24283632831471588, "grad_norm": 1.0452559920421545, "learning_rate": 8.752780231438999e-06, "loss": 0.4608, "step": 4500 }, { "epoch": 0.24283632831471588, "eval_loss": 0.5786745548248291, "eval_runtime": 159.742, "eval_samples_per_second": 21.528, "eval_steps_per_second": 0.901, "step": 4500 }, { "epoch": 0.24289029194323025, "grad_norm": 1.0347496393922702, "learning_rate": 8.752253014019455e-06, "loss": 0.4134, "step": 4501 }, { "epoch": 0.24294425557174465, "grad_norm": 0.9911194331172879, "learning_rate": 8.751725703126872e-06, "loss": 0.499, "step": 4502 }, { "epoch": 0.24299821920025902, "grad_norm": 1.1719826253193577, "learning_rate": 8.751198298776408e-06, "loss": 0.6023, "step": 4503 }, { "epoch": 0.24305218282877342, "grad_norm": 1.0240929812232697, "learning_rate": 8.75067080098322e-06, "loss": 0.5047, "step": 4504 }, { "epoch": 0.2431061464572878, "grad_norm": 1.0952239543049223, "learning_rate": 8.750143209762468e-06, "loss": 0.5975, "step": 4505 }, { "epoch": 0.24316011008580216, "grad_norm": 0.9423099116349676, "learning_rate": 8.749615525129317e-06, "loss": 0.4691, "step": 4506 }, { "epoch": 0.24321407371431655, "grad_norm": 0.9685305262234355, "learning_rate": 8.74908774709893e-06, "loss": 0.4043, "step": 4507 }, { "epoch": 0.24326803734283092, "grad_norm": 0.9591719677662082, "learning_rate": 8.74855987568648e-06, "loss": 0.4351, "step": 4508 }, { "epoch": 0.24332200097134532, "grad_norm": 1.3645078583975547, "learning_rate": 8.748031910907136e-06, "loss": 0.392, "step": 4509 }, { "epoch": 0.2433759645998597, "grad_norm": 1.1078272797837547, "learning_rate": 8.747503852776073e-06, "loss": 0.4798, "step": 4510 }, { "epoch": 0.2434299282283741, "grad_norm": 1.0606201467634875, "learning_rate": 8.746975701308468e-06, "loss": 0.485, "step": 4511 }, { "epoch": 0.24348389185688846, "grad_norm": 1.0712520787198745, "learning_rate": 8.746447456519501e-06, "loss": 0.5411, "step": 4512 }, { "epoch": 0.24353785548540283, "grad_norm": 1.1683380252992273, "learning_rate": 8.745919118424355e-06, "loss": 0.4768, "step": 4513 }, { "epoch": 0.24359181911391722, "grad_norm": 1.2297470620274826, "learning_rate": 8.74539068703821e-06, "loss": 0.6862, "step": 4514 }, { "epoch": 0.2436457827424316, "grad_norm": 1.0616846827148587, "learning_rate": 8.744862162376263e-06, "loss": 0.5352, "step": 4515 }, { "epoch": 0.243699746370946, "grad_norm": 1.3467223574754694, "learning_rate": 8.744333544453695e-06, "loss": 0.6953, "step": 4516 }, { "epoch": 0.24375370999946036, "grad_norm": 1.1024770718147714, "learning_rate": 8.743804833285701e-06, "loss": 0.5203, "step": 4517 }, { "epoch": 0.24380767362797476, "grad_norm": 0.9266411634138806, "learning_rate": 8.743276028887482e-06, "loss": 0.4766, "step": 4518 }, { "epoch": 0.24386163725648913, "grad_norm": 1.0790471917638484, "learning_rate": 8.742747131274232e-06, "loss": 0.5278, "step": 4519 }, { "epoch": 0.2439156008850035, "grad_norm": 1.2317009842365523, "learning_rate": 8.742218140461153e-06, "loss": 0.5706, "step": 4520 }, { "epoch": 0.2439695645135179, "grad_norm": 0.9959228615098635, "learning_rate": 8.741689056463448e-06, "loss": 0.5652, "step": 4521 }, { "epoch": 0.24402352814203226, "grad_norm": 1.0977625070963575, "learning_rate": 8.741159879296324e-06, "loss": 0.5457, "step": 4522 }, { "epoch": 0.24407749177054666, "grad_norm": 1.0317165271240092, "learning_rate": 8.74063060897499e-06, "loss": 0.5447, "step": 4523 }, { "epoch": 0.24413145539906103, "grad_norm": 0.9779113411044444, "learning_rate": 8.740101245514659e-06, "loss": 0.4748, "step": 4524 }, { "epoch": 0.2441854190275754, "grad_norm": 0.9710308012077197, "learning_rate": 8.739571788930543e-06, "loss": 0.47, "step": 4525 }, { "epoch": 0.2442393826560898, "grad_norm": 1.113005808511701, "learning_rate": 8.73904223923786e-06, "loss": 0.638, "step": 4526 }, { "epoch": 0.24429334628460417, "grad_norm": 1.075946744567272, "learning_rate": 8.738512596451832e-06, "loss": 0.4959, "step": 4527 }, { "epoch": 0.24434730991311857, "grad_norm": 1.0972062178771205, "learning_rate": 8.73798286058768e-06, "loss": 0.5097, "step": 4528 }, { "epoch": 0.24440127354163294, "grad_norm": 1.154828910206549, "learning_rate": 8.737453031660628e-06, "loss": 0.523, "step": 4529 }, { "epoch": 0.24445523717014733, "grad_norm": 0.9821061456345133, "learning_rate": 8.736923109685904e-06, "loss": 0.4468, "step": 4530 }, { "epoch": 0.2445092007986617, "grad_norm": 0.6205791818245048, "learning_rate": 8.736393094678738e-06, "loss": 0.2753, "step": 4531 }, { "epoch": 0.24456316442717607, "grad_norm": 0.8305769708879828, "learning_rate": 8.735862986654368e-06, "loss": 0.4028, "step": 4532 }, { "epoch": 0.24461712805569047, "grad_norm": 1.0737823363226737, "learning_rate": 8.735332785628024e-06, "loss": 0.4925, "step": 4533 }, { "epoch": 0.24467109168420484, "grad_norm": 1.203826068989072, "learning_rate": 8.734802491614945e-06, "loss": 0.6011, "step": 4534 }, { "epoch": 0.24472505531271924, "grad_norm": 0.9569006456756168, "learning_rate": 8.734272104630375e-06, "loss": 0.5013, "step": 4535 }, { "epoch": 0.2447790189412336, "grad_norm": 1.1281118489319313, "learning_rate": 8.73374162468956e-06, "loss": 0.4843, "step": 4536 }, { "epoch": 0.244832982569748, "grad_norm": 0.9571279540509768, "learning_rate": 8.733211051807738e-06, "loss": 0.428, "step": 4537 }, { "epoch": 0.24488694619826237, "grad_norm": 1.3644121347277292, "learning_rate": 8.732680386000167e-06, "loss": 0.7089, "step": 4538 }, { "epoch": 0.24494090982677674, "grad_norm": 1.221640813412554, "learning_rate": 8.732149627282095e-06, "loss": 0.7131, "step": 4539 }, { "epoch": 0.24499487345529114, "grad_norm": 1.2587239234793959, "learning_rate": 8.731618775668775e-06, "loss": 0.6507, "step": 4540 }, { "epoch": 0.2450488370838055, "grad_norm": 0.9690081162761357, "learning_rate": 8.731087831175467e-06, "loss": 0.4993, "step": 4541 }, { "epoch": 0.2451028007123199, "grad_norm": 0.9919115195721688, "learning_rate": 8.73055679381743e-06, "loss": 0.4952, "step": 4542 }, { "epoch": 0.24515676434083428, "grad_norm": 1.1416151141268795, "learning_rate": 8.730025663609927e-06, "loss": 0.6717, "step": 4543 }, { "epoch": 0.24521072796934865, "grad_norm": 1.0119859741074868, "learning_rate": 8.729494440568223e-06, "loss": 0.5031, "step": 4544 }, { "epoch": 0.24526469159786304, "grad_norm": 1.1496931830843466, "learning_rate": 8.728963124707586e-06, "loss": 0.4927, "step": 4545 }, { "epoch": 0.2453186552263774, "grad_norm": 0.9607133828245882, "learning_rate": 8.728431716043286e-06, "loss": 0.531, "step": 4546 }, { "epoch": 0.2453726188548918, "grad_norm": 1.0663621337918092, "learning_rate": 8.727900214590593e-06, "loss": 0.5069, "step": 4547 }, { "epoch": 0.24542658248340618, "grad_norm": 1.1490887573087747, "learning_rate": 8.72736862036479e-06, "loss": 0.6, "step": 4548 }, { "epoch": 0.24548054611192058, "grad_norm": 0.8979682376756449, "learning_rate": 8.72683693338115e-06, "loss": 0.4038, "step": 4549 }, { "epoch": 0.24553450974043495, "grad_norm": 1.201568651160385, "learning_rate": 8.726305153654957e-06, "loss": 0.5398, "step": 4550 }, { "epoch": 0.24558847336894932, "grad_norm": 1.1372986400999774, "learning_rate": 8.725773281201494e-06, "loss": 0.4798, "step": 4551 }, { "epoch": 0.24564243699746371, "grad_norm": 1.085641349381679, "learning_rate": 8.725241316036047e-06, "loss": 0.4917, "step": 4552 }, { "epoch": 0.24569640062597808, "grad_norm": 1.0872764441269733, "learning_rate": 8.724709258173907e-06, "loss": 0.5352, "step": 4553 }, { "epoch": 0.24575036425449248, "grad_norm": 1.1493950975859577, "learning_rate": 8.724177107630361e-06, "loss": 0.4649, "step": 4554 }, { "epoch": 0.24580432788300685, "grad_norm": 1.0032706957121922, "learning_rate": 8.723644864420712e-06, "loss": 0.5106, "step": 4555 }, { "epoch": 0.24585829151152122, "grad_norm": 1.058277303548084, "learning_rate": 8.72311252856025e-06, "loss": 0.6369, "step": 4556 }, { "epoch": 0.24591225514003562, "grad_norm": 1.2471526354021933, "learning_rate": 8.722580100064277e-06, "loss": 0.5429, "step": 4557 }, { "epoch": 0.24596621876855, "grad_norm": 1.1521586280422114, "learning_rate": 8.722047578948096e-06, "loss": 0.5377, "step": 4558 }, { "epoch": 0.24602018239706439, "grad_norm": 0.9485490723422761, "learning_rate": 8.721514965227011e-06, "loss": 0.4998, "step": 4559 }, { "epoch": 0.24607414602557875, "grad_norm": 1.156746842364887, "learning_rate": 8.720982258916333e-06, "loss": 0.5515, "step": 4560 }, { "epoch": 0.24612810965409315, "grad_norm": 0.9616277434630006, "learning_rate": 8.720449460031367e-06, "loss": 0.4737, "step": 4561 }, { "epoch": 0.24618207328260752, "grad_norm": 1.1271548662016844, "learning_rate": 8.719916568587434e-06, "loss": 0.5528, "step": 4562 }, { "epoch": 0.2462360369111219, "grad_norm": 1.033726686748811, "learning_rate": 8.719383584599841e-06, "loss": 0.4057, "step": 4563 }, { "epoch": 0.2462900005396363, "grad_norm": 0.862506593182918, "learning_rate": 8.718850508083913e-06, "loss": 0.4427, "step": 4564 }, { "epoch": 0.24634396416815066, "grad_norm": 1.2206997004035272, "learning_rate": 8.718317339054969e-06, "loss": 0.5024, "step": 4565 }, { "epoch": 0.24639792779666506, "grad_norm": 1.2122725844275872, "learning_rate": 8.717784077528332e-06, "loss": 0.5288, "step": 4566 }, { "epoch": 0.24645189142517943, "grad_norm": 1.1507799811112978, "learning_rate": 8.71725072351933e-06, "loss": 0.5805, "step": 4567 }, { "epoch": 0.24650585505369382, "grad_norm": 0.9667649730452603, "learning_rate": 8.71671727704329e-06, "loss": 0.377, "step": 4568 }, { "epoch": 0.2465598186822082, "grad_norm": 1.0151565570693313, "learning_rate": 8.716183738115549e-06, "loss": 0.5779, "step": 4569 }, { "epoch": 0.24661378231072256, "grad_norm": 1.1091655036166475, "learning_rate": 8.715650106751434e-06, "loss": 0.4461, "step": 4570 }, { "epoch": 0.24666774593923696, "grad_norm": 0.8237104050226594, "learning_rate": 8.715116382966285e-06, "loss": 0.4064, "step": 4571 }, { "epoch": 0.24672170956775133, "grad_norm": 1.1276838481599933, "learning_rate": 8.714582566775444e-06, "loss": 0.5253, "step": 4572 }, { "epoch": 0.24677567319626573, "grad_norm": 0.933199669287803, "learning_rate": 8.714048658194253e-06, "loss": 0.3671, "step": 4573 }, { "epoch": 0.2468296368247801, "grad_norm": 1.196799895364356, "learning_rate": 8.713514657238055e-06, "loss": 0.5556, "step": 4574 }, { "epoch": 0.24688360045329447, "grad_norm": 1.1835107715234925, "learning_rate": 8.7129805639222e-06, "loss": 0.5963, "step": 4575 }, { "epoch": 0.24693756408180886, "grad_norm": 1.1221259317842405, "learning_rate": 8.712446378262035e-06, "loss": 0.5739, "step": 4576 }, { "epoch": 0.24699152771032323, "grad_norm": 0.9202137034395859, "learning_rate": 8.711912100272917e-06, "loss": 0.4501, "step": 4577 }, { "epoch": 0.24704549133883763, "grad_norm": 1.0304341995620017, "learning_rate": 8.711377729970198e-06, "loss": 0.4529, "step": 4578 }, { "epoch": 0.247099454967352, "grad_norm": 1.2867382828846046, "learning_rate": 8.71084326736924e-06, "loss": 0.6419, "step": 4579 }, { "epoch": 0.2471534185958664, "grad_norm": 1.162557532593156, "learning_rate": 8.710308712485401e-06, "loss": 0.4967, "step": 4580 }, { "epoch": 0.24720738222438077, "grad_norm": 0.8662074185769372, "learning_rate": 8.709774065334046e-06, "loss": 0.4078, "step": 4581 }, { "epoch": 0.24726134585289514, "grad_norm": 1.1568262463563586, "learning_rate": 8.709239325930543e-06, "loss": 0.4416, "step": 4582 }, { "epoch": 0.24731530948140953, "grad_norm": 1.120356437351626, "learning_rate": 8.708704494290257e-06, "loss": 0.5596, "step": 4583 }, { "epoch": 0.2473692731099239, "grad_norm": 1.083889146639063, "learning_rate": 8.708169570428563e-06, "loss": 0.4399, "step": 4584 }, { "epoch": 0.2474232367384383, "grad_norm": 1.060589039276693, "learning_rate": 8.707634554360832e-06, "loss": 0.7136, "step": 4585 }, { "epoch": 0.24747720036695267, "grad_norm": 1.0301517348437907, "learning_rate": 8.707099446102443e-06, "loss": 0.5168, "step": 4586 }, { "epoch": 0.24753116399546707, "grad_norm": 1.0932143106425458, "learning_rate": 8.706564245668778e-06, "loss": 0.5495, "step": 4587 }, { "epoch": 0.24758512762398144, "grad_norm": 0.9755221315096876, "learning_rate": 8.706028953075214e-06, "loss": 0.4375, "step": 4588 }, { "epoch": 0.2476390912524958, "grad_norm": 1.002573067407256, "learning_rate": 8.705493568337138e-06, "loss": 0.4836, "step": 4589 }, { "epoch": 0.2476930548810102, "grad_norm": 1.0374628231733365, "learning_rate": 8.70495809146994e-06, "loss": 0.4863, "step": 4590 }, { "epoch": 0.24774701850952457, "grad_norm": 0.9067463974134781, "learning_rate": 8.704422522489005e-06, "loss": 0.5309, "step": 4591 }, { "epoch": 0.24780098213803897, "grad_norm": 0.8930549565284348, "learning_rate": 8.703886861409732e-06, "loss": 0.4249, "step": 4592 }, { "epoch": 0.24785494576655334, "grad_norm": 1.0995963555824488, "learning_rate": 8.703351108247514e-06, "loss": 0.4133, "step": 4593 }, { "epoch": 0.2479089093950677, "grad_norm": 1.0436639421212672, "learning_rate": 8.702815263017747e-06, "loss": 0.3836, "step": 4594 }, { "epoch": 0.2479628730235821, "grad_norm": 0.8309231500220468, "learning_rate": 8.702279325735831e-06, "loss": 0.3313, "step": 4595 }, { "epoch": 0.24801683665209648, "grad_norm": 0.7904761241845633, "learning_rate": 8.701743296417175e-06, "loss": 0.2885, "step": 4596 }, { "epoch": 0.24807080028061088, "grad_norm": 0.9934532964507723, "learning_rate": 8.70120717507718e-06, "loss": 0.447, "step": 4597 }, { "epoch": 0.24812476390912525, "grad_norm": 1.0000756396733694, "learning_rate": 8.700670961731257e-06, "loss": 0.477, "step": 4598 }, { "epoch": 0.24817872753763964, "grad_norm": 0.9746253116354061, "learning_rate": 8.700134656394814e-06, "loss": 0.4913, "step": 4599 }, { "epoch": 0.248232691166154, "grad_norm": 1.2629598790773646, "learning_rate": 8.699598259083269e-06, "loss": 0.5343, "step": 4600 }, { "epoch": 0.24828665479466838, "grad_norm": 0.9645849442478929, "learning_rate": 8.699061769812038e-06, "loss": 0.3381, "step": 4601 }, { "epoch": 0.24834061842318278, "grad_norm": 1.1471534748000067, "learning_rate": 8.698525188596537e-06, "loss": 0.4834, "step": 4602 }, { "epoch": 0.24839458205169715, "grad_norm": 1.205853159750565, "learning_rate": 8.697988515452194e-06, "loss": 0.5552, "step": 4603 }, { "epoch": 0.24844854568021155, "grad_norm": 1.0358730504018554, "learning_rate": 8.697451750394425e-06, "loss": 0.4595, "step": 4604 }, { "epoch": 0.24850250930872592, "grad_norm": 1.02513956247496, "learning_rate": 8.696914893438665e-06, "loss": 0.3858, "step": 4605 }, { "epoch": 0.2485564729372403, "grad_norm": 0.8755438204696631, "learning_rate": 8.696377944600342e-06, "loss": 0.4549, "step": 4606 }, { "epoch": 0.24861043656575468, "grad_norm": 0.7804218915302031, "learning_rate": 8.695840903894886e-06, "loss": 0.3907, "step": 4607 }, { "epoch": 0.24866440019426905, "grad_norm": 1.0136461016388192, "learning_rate": 8.69530377133773e-06, "loss": 0.5114, "step": 4608 }, { "epoch": 0.24871836382278345, "grad_norm": 1.0196432768227743, "learning_rate": 8.69476654694432e-06, "loss": 0.3728, "step": 4609 }, { "epoch": 0.24877232745129782, "grad_norm": 1.0685559132464502, "learning_rate": 8.694229230730087e-06, "loss": 0.4677, "step": 4610 }, { "epoch": 0.24882629107981222, "grad_norm": 1.3060421254073247, "learning_rate": 8.69369182271048e-06, "loss": 0.5106, "step": 4611 }, { "epoch": 0.2488802547083266, "grad_norm": 0.9558738119319248, "learning_rate": 8.693154322900944e-06, "loss": 0.4536, "step": 4612 }, { "epoch": 0.24893421833684096, "grad_norm": 1.0111618166878278, "learning_rate": 8.692616731316924e-06, "loss": 0.5893, "step": 4613 }, { "epoch": 0.24898818196535535, "grad_norm": 1.0162019025276867, "learning_rate": 8.692079047973876e-06, "loss": 0.4364, "step": 4614 }, { "epoch": 0.24904214559386972, "grad_norm": 1.0202567031837155, "learning_rate": 8.691541272887251e-06, "loss": 0.4729, "step": 4615 }, { "epoch": 0.24909610922238412, "grad_norm": 1.0198511187883077, "learning_rate": 8.691003406072503e-06, "loss": 0.4855, "step": 4616 }, { "epoch": 0.2491500728508985, "grad_norm": 1.0879680664225109, "learning_rate": 8.690465447545093e-06, "loss": 0.5632, "step": 4617 }, { "epoch": 0.2492040364794129, "grad_norm": 1.1209853007557333, "learning_rate": 8.689927397320484e-06, "loss": 0.4848, "step": 4618 }, { "epoch": 0.24925800010792726, "grad_norm": 1.0108392411398617, "learning_rate": 8.689389255414137e-06, "loss": 0.5416, "step": 4619 }, { "epoch": 0.24931196373644163, "grad_norm": 1.1903438858376374, "learning_rate": 8.68885102184152e-06, "loss": 0.7676, "step": 4620 }, { "epoch": 0.24936592736495602, "grad_norm": 1.178054306411386, "learning_rate": 8.688312696618103e-06, "loss": 0.5978, "step": 4621 }, { "epoch": 0.2494198909934704, "grad_norm": 1.0194057410781583, "learning_rate": 8.687774279759357e-06, "loss": 0.4899, "step": 4622 }, { "epoch": 0.2494738546219848, "grad_norm": 1.1247838937534858, "learning_rate": 8.687235771280757e-06, "loss": 0.5165, "step": 4623 }, { "epoch": 0.24952781825049916, "grad_norm": 0.8413363716071782, "learning_rate": 8.686697171197782e-06, "loss": 0.4829, "step": 4624 }, { "epoch": 0.24958178187901353, "grad_norm": 1.4362665728065536, "learning_rate": 8.68615847952591e-06, "loss": 0.6991, "step": 4625 }, { "epoch": 0.24963574550752793, "grad_norm": 1.3413767496700388, "learning_rate": 8.685619696280624e-06, "loss": 0.4819, "step": 4626 }, { "epoch": 0.2496897091360423, "grad_norm": 1.0329409385564143, "learning_rate": 8.685080821477407e-06, "loss": 0.4053, "step": 4627 }, { "epoch": 0.2497436727645567, "grad_norm": 0.7942215860910685, "learning_rate": 8.684541855131751e-06, "loss": 0.4179, "step": 4628 }, { "epoch": 0.24979763639307107, "grad_norm": 0.9339792277485138, "learning_rate": 8.684002797259144e-06, "loss": 0.531, "step": 4629 }, { "epoch": 0.24985160002158546, "grad_norm": 1.119468146350275, "learning_rate": 8.683463647875079e-06, "loss": 0.5996, "step": 4630 }, { "epoch": 0.24990556365009983, "grad_norm": 1.0354026908687988, "learning_rate": 8.682924406995052e-06, "loss": 0.5106, "step": 4631 }, { "epoch": 0.2499595272786142, "grad_norm": 1.1310648875565443, "learning_rate": 8.682385074634562e-06, "loss": 0.4588, "step": 4632 }, { "epoch": 0.2500134909071286, "grad_norm": 0.834884580377105, "learning_rate": 8.681845650809109e-06, "loss": 0.384, "step": 4633 }, { "epoch": 0.250067454535643, "grad_norm": 1.1593628471354747, "learning_rate": 8.681306135534198e-06, "loss": 0.597, "step": 4634 }, { "epoch": 0.25012141816415734, "grad_norm": 0.8096407218154917, "learning_rate": 8.680766528825332e-06, "loss": 0.3914, "step": 4635 }, { "epoch": 0.25017538179267174, "grad_norm": 1.167017215959199, "learning_rate": 8.680226830698023e-06, "loss": 0.6153, "step": 4636 }, { "epoch": 0.25022934542118613, "grad_norm": 1.1686417031431657, "learning_rate": 8.679687041167782e-06, "loss": 0.5213, "step": 4637 }, { "epoch": 0.2502833090497005, "grad_norm": 1.1654903952526496, "learning_rate": 8.679147160250122e-06, "loss": 0.564, "step": 4638 }, { "epoch": 0.2503372726782149, "grad_norm": 1.0893147217891122, "learning_rate": 8.678607187960561e-06, "loss": 0.5024, "step": 4639 }, { "epoch": 0.25039123630672927, "grad_norm": 1.0939666921570177, "learning_rate": 8.678067124314618e-06, "loss": 0.7687, "step": 4640 }, { "epoch": 0.25044519993524367, "grad_norm": 1.30004042268799, "learning_rate": 8.677526969327814e-06, "loss": 0.4862, "step": 4641 }, { "epoch": 0.250499163563758, "grad_norm": 1.051924153650951, "learning_rate": 8.676986723015674e-06, "loss": 0.6033, "step": 4642 }, { "epoch": 0.2505531271922724, "grad_norm": 0.9541341161489371, "learning_rate": 8.676446385393726e-06, "loss": 0.3822, "step": 4643 }, { "epoch": 0.2506070908207868, "grad_norm": 1.176884088381866, "learning_rate": 8.675905956477498e-06, "loss": 0.5733, "step": 4644 }, { "epoch": 0.25066105444930115, "grad_norm": 1.092383829590621, "learning_rate": 8.675365436282527e-06, "loss": 0.7483, "step": 4645 }, { "epoch": 0.25071501807781554, "grad_norm": 0.9064241608921764, "learning_rate": 8.674824824824342e-06, "loss": 0.5765, "step": 4646 }, { "epoch": 0.25076898170632994, "grad_norm": 1.2936825788828394, "learning_rate": 8.674284122118484e-06, "loss": 0.6663, "step": 4647 }, { "epoch": 0.25082294533484434, "grad_norm": 0.9757480576532195, "learning_rate": 8.673743328180494e-06, "loss": 0.3793, "step": 4648 }, { "epoch": 0.2508769089633587, "grad_norm": 1.1495317543832295, "learning_rate": 8.673202443025913e-06, "loss": 0.585, "step": 4649 }, { "epoch": 0.2509308725918731, "grad_norm": 0.9352192046083353, "learning_rate": 8.672661466670287e-06, "loss": 0.4025, "step": 4650 }, { "epoch": 0.2509848362203875, "grad_norm": 1.0526747709530804, "learning_rate": 8.672120399129164e-06, "loss": 0.4262, "step": 4651 }, { "epoch": 0.2510387998489018, "grad_norm": 1.045492351445694, "learning_rate": 8.671579240418099e-06, "loss": 0.4032, "step": 4652 }, { "epoch": 0.2510927634774162, "grad_norm": 1.2557011290679319, "learning_rate": 8.67103799055264e-06, "loss": 0.806, "step": 4653 }, { "epoch": 0.2511467271059306, "grad_norm": 0.9694591174202163, "learning_rate": 8.670496649548344e-06, "loss": 0.3825, "step": 4654 }, { "epoch": 0.251200690734445, "grad_norm": 0.9899254989407549, "learning_rate": 8.669955217420771e-06, "loss": 0.3894, "step": 4655 }, { "epoch": 0.25125465436295935, "grad_norm": 1.0420059948494929, "learning_rate": 8.669413694185485e-06, "loss": 0.5142, "step": 4656 }, { "epoch": 0.25130861799147375, "grad_norm": 1.0823639015600264, "learning_rate": 8.668872079858042e-06, "loss": 0.4501, "step": 4657 }, { "epoch": 0.25136258161998815, "grad_norm": 1.0829709161739571, "learning_rate": 8.668330374454016e-06, "loss": 0.4438, "step": 4658 }, { "epoch": 0.2514165452485025, "grad_norm": 1.1079061031720523, "learning_rate": 8.667788577988975e-06, "loss": 0.6563, "step": 4659 }, { "epoch": 0.2514705088770169, "grad_norm": 1.062552492316104, "learning_rate": 8.66724669047849e-06, "loss": 0.4529, "step": 4660 }, { "epoch": 0.2515244725055313, "grad_norm": 1.133973701737392, "learning_rate": 8.666704711938133e-06, "loss": 0.5176, "step": 4661 }, { "epoch": 0.2515784361340457, "grad_norm": 1.1103955260711251, "learning_rate": 8.666162642383481e-06, "loss": 0.5229, "step": 4662 }, { "epoch": 0.25163239976256, "grad_norm": 1.1537072699447342, "learning_rate": 8.66562048183012e-06, "loss": 0.5537, "step": 4663 }, { "epoch": 0.2516863633910744, "grad_norm": 1.1018935525465061, "learning_rate": 8.665078230293626e-06, "loss": 0.5703, "step": 4664 }, { "epoch": 0.2517403270195888, "grad_norm": 1.2030832666004732, "learning_rate": 8.664535887789583e-06, "loss": 0.5521, "step": 4665 }, { "epoch": 0.25179429064810316, "grad_norm": 0.9748328118325305, "learning_rate": 8.663993454333583e-06, "loss": 0.494, "step": 4666 }, { "epoch": 0.25184825427661756, "grad_norm": 0.9875485350776795, "learning_rate": 8.663450929941214e-06, "loss": 0.4388, "step": 4667 }, { "epoch": 0.25190221790513195, "grad_norm": 1.338492272955238, "learning_rate": 8.662908314628069e-06, "loss": 0.6516, "step": 4668 }, { "epoch": 0.2519561815336463, "grad_norm": 0.8767492182970065, "learning_rate": 8.662365608409744e-06, "loss": 0.3735, "step": 4669 }, { "epoch": 0.2520101451621607, "grad_norm": 0.9947005162480036, "learning_rate": 8.661822811301834e-06, "loss": 0.4555, "step": 4670 }, { "epoch": 0.2520641087906751, "grad_norm": 1.1429969419517736, "learning_rate": 8.661279923319944e-06, "loss": 0.5985, "step": 4671 }, { "epoch": 0.2521180724191895, "grad_norm": 1.008991925784486, "learning_rate": 8.660736944479673e-06, "loss": 0.5753, "step": 4672 }, { "epoch": 0.25217203604770383, "grad_norm": 0.9489067867810032, "learning_rate": 8.660193874796629e-06, "loss": 0.4668, "step": 4673 }, { "epoch": 0.2522259996762182, "grad_norm": 0.994584421856532, "learning_rate": 8.659650714286419e-06, "loss": 0.5281, "step": 4674 }, { "epoch": 0.2522799633047326, "grad_norm": 0.8639435658018252, "learning_rate": 8.659107462964657e-06, "loss": 0.3761, "step": 4675 }, { "epoch": 0.25233392693324697, "grad_norm": 1.2739342995905965, "learning_rate": 8.658564120846953e-06, "loss": 0.7129, "step": 4676 }, { "epoch": 0.25238789056176136, "grad_norm": 1.1252140282275422, "learning_rate": 8.658020687948925e-06, "loss": 0.5598, "step": 4677 }, { "epoch": 0.25244185419027576, "grad_norm": 0.8989640921268353, "learning_rate": 8.65747716428619e-06, "loss": 0.3795, "step": 4678 }, { "epoch": 0.25249581781879016, "grad_norm": 1.0563550262086794, "learning_rate": 8.656933549874374e-06, "loss": 0.4486, "step": 4679 }, { "epoch": 0.2525497814473045, "grad_norm": 1.0427272650172887, "learning_rate": 8.656389844729094e-06, "loss": 0.4763, "step": 4680 }, { "epoch": 0.2526037450758189, "grad_norm": 1.2025475470600222, "learning_rate": 8.655846048865983e-06, "loss": 0.5987, "step": 4681 }, { "epoch": 0.2526577087043333, "grad_norm": 0.911868503447582, "learning_rate": 8.655302162300668e-06, "loss": 0.3402, "step": 4682 }, { "epoch": 0.25271167233284764, "grad_norm": 1.0489916125764895, "learning_rate": 8.654758185048781e-06, "loss": 0.4968, "step": 4683 }, { "epoch": 0.25276563596136203, "grad_norm": 1.0519211908890327, "learning_rate": 8.654214117125957e-06, "loss": 0.4823, "step": 4684 }, { "epoch": 0.25281959958987643, "grad_norm": 1.0858183812793807, "learning_rate": 8.65366995854783e-06, "loss": 0.5445, "step": 4685 }, { "epoch": 0.25287356321839083, "grad_norm": 1.0512473453056037, "learning_rate": 8.65312570933004e-06, "loss": 0.4689, "step": 4686 }, { "epoch": 0.25292752684690517, "grad_norm": 1.243521561722281, "learning_rate": 8.652581369488235e-06, "loss": 0.5978, "step": 4687 }, { "epoch": 0.25298149047541957, "grad_norm": 1.0360413851599632, "learning_rate": 8.652036939038055e-06, "loss": 0.5051, "step": 4688 }, { "epoch": 0.25303545410393397, "grad_norm": 1.0659968738774683, "learning_rate": 8.651492417995147e-06, "loss": 0.5013, "step": 4689 }, { "epoch": 0.2530894177324483, "grad_norm": 1.220151833199879, "learning_rate": 8.650947806375165e-06, "loss": 0.5658, "step": 4690 }, { "epoch": 0.2531433813609627, "grad_norm": 1.1767838769975547, "learning_rate": 8.650403104193758e-06, "loss": 0.5583, "step": 4691 }, { "epoch": 0.2531973449894771, "grad_norm": 1.0289736242325367, "learning_rate": 8.649858311466581e-06, "loss": 0.4434, "step": 4692 }, { "epoch": 0.2532513086179915, "grad_norm": 1.001042167713907, "learning_rate": 8.649313428209295e-06, "loss": 0.4333, "step": 4693 }, { "epoch": 0.25330527224650584, "grad_norm": 0.9721962469303661, "learning_rate": 8.648768454437559e-06, "loss": 0.5783, "step": 4694 }, { "epoch": 0.25335923587502024, "grad_norm": 1.0639881155303819, "learning_rate": 8.648223390167036e-06, "loss": 0.606, "step": 4695 }, { "epoch": 0.25341319950353464, "grad_norm": 0.9781002664198847, "learning_rate": 8.647678235413392e-06, "loss": 0.3875, "step": 4696 }, { "epoch": 0.253467163132049, "grad_norm": 1.0792283403050487, "learning_rate": 8.647132990192294e-06, "loss": 0.4838, "step": 4697 }, { "epoch": 0.2535211267605634, "grad_norm": 0.9225755664128706, "learning_rate": 8.646587654519413e-06, "loss": 0.394, "step": 4698 }, { "epoch": 0.2535750903890778, "grad_norm": 0.983300300991792, "learning_rate": 8.646042228410427e-06, "loss": 0.5214, "step": 4699 }, { "epoch": 0.25362905401759217, "grad_norm": 1.1094239308384897, "learning_rate": 8.645496711881005e-06, "loss": 0.4959, "step": 4700 }, { "epoch": 0.2536830176461065, "grad_norm": 1.0289085413727432, "learning_rate": 8.64495110494683e-06, "loss": 0.4549, "step": 4701 }, { "epoch": 0.2537369812746209, "grad_norm": 1.1405343903525262, "learning_rate": 8.644405407623584e-06, "loss": 0.6822, "step": 4702 }, { "epoch": 0.2537909449031353, "grad_norm": 1.067754758540559, "learning_rate": 8.64385961992695e-06, "loss": 0.5095, "step": 4703 }, { "epoch": 0.25384490853164965, "grad_norm": 1.126055992031456, "learning_rate": 8.64331374187261e-06, "loss": 0.6047, "step": 4704 }, { "epoch": 0.25389887216016405, "grad_norm": 1.2390534775340674, "learning_rate": 8.642767773476258e-06, "loss": 0.7254, "step": 4705 }, { "epoch": 0.25395283578867844, "grad_norm": 0.84808052360508, "learning_rate": 8.642221714753588e-06, "loss": 0.4104, "step": 4706 }, { "epoch": 0.2540067994171928, "grad_norm": 1.1264051857222837, "learning_rate": 8.641675565720289e-06, "loss": 0.5063, "step": 4707 }, { "epoch": 0.2540607630457072, "grad_norm": 1.0694037360849165, "learning_rate": 8.64112932639206e-06, "loss": 0.4464, "step": 4708 }, { "epoch": 0.2541147266742216, "grad_norm": 1.110202349014479, "learning_rate": 8.640582996784597e-06, "loss": 0.5092, "step": 4709 }, { "epoch": 0.254168690302736, "grad_norm": 1.0926368279848881, "learning_rate": 8.640036576913609e-06, "loss": 0.6214, "step": 4710 }, { "epoch": 0.2542226539312503, "grad_norm": 1.0403056060885083, "learning_rate": 8.639490066794797e-06, "loss": 0.649, "step": 4711 }, { "epoch": 0.2542766175597647, "grad_norm": 1.1521916394916742, "learning_rate": 8.638943466443867e-06, "loss": 0.654, "step": 4712 }, { "epoch": 0.2543305811882791, "grad_norm": 1.1973875911827312, "learning_rate": 8.638396775876531e-06, "loss": 0.4643, "step": 4713 }, { "epoch": 0.25438454481679346, "grad_norm": 0.9985086186284929, "learning_rate": 8.6378499951085e-06, "loss": 0.4789, "step": 4714 }, { "epoch": 0.25443850844530785, "grad_norm": 1.0028378786621082, "learning_rate": 8.63730312415549e-06, "loss": 0.4989, "step": 4715 }, { "epoch": 0.25449247207382225, "grad_norm": 1.1766990006039972, "learning_rate": 8.636756163033218e-06, "loss": 0.5806, "step": 4716 }, { "epoch": 0.25454643570233665, "grad_norm": 0.8926750708551197, "learning_rate": 8.636209111757405e-06, "loss": 0.4293, "step": 4717 }, { "epoch": 0.254600399330851, "grad_norm": 0.9527827225045191, "learning_rate": 8.635661970343773e-06, "loss": 0.3837, "step": 4718 }, { "epoch": 0.2546543629593654, "grad_norm": 0.9513594112110718, "learning_rate": 8.635114738808045e-06, "loss": 0.3435, "step": 4719 }, { "epoch": 0.2547083265878798, "grad_norm": 1.0428734444886056, "learning_rate": 8.634567417165956e-06, "loss": 0.4809, "step": 4720 }, { "epoch": 0.2547622902163941, "grad_norm": 1.1991795804777174, "learning_rate": 8.634020005433229e-06, "loss": 0.5313, "step": 4721 }, { "epoch": 0.2548162538449085, "grad_norm": 0.8578801078833512, "learning_rate": 8.633472503625602e-06, "loss": 0.3417, "step": 4722 }, { "epoch": 0.2548702174734229, "grad_norm": 1.0048990019304103, "learning_rate": 8.632924911758808e-06, "loss": 0.4366, "step": 4723 }, { "epoch": 0.2549241811019373, "grad_norm": 0.9515931558967206, "learning_rate": 8.632377229848586e-06, "loss": 0.4103, "step": 4724 }, { "epoch": 0.25497814473045166, "grad_norm": 0.9422457322772619, "learning_rate": 8.631829457910679e-06, "loss": 0.3797, "step": 4725 }, { "epoch": 0.25503210835896606, "grad_norm": 1.1299191544604263, "learning_rate": 8.63128159596083e-06, "loss": 0.5568, "step": 4726 }, { "epoch": 0.25508607198748046, "grad_norm": 1.174319083036384, "learning_rate": 8.630733644014781e-06, "loss": 0.5016, "step": 4727 }, { "epoch": 0.2551400356159948, "grad_norm": 1.1703259551196623, "learning_rate": 8.630185602088285e-06, "loss": 0.6325, "step": 4728 }, { "epoch": 0.2551939992445092, "grad_norm": 1.065845328081628, "learning_rate": 8.62963747019709e-06, "loss": 0.4913, "step": 4729 }, { "epoch": 0.2552479628730236, "grad_norm": 1.0061706344992347, "learning_rate": 8.629089248356955e-06, "loss": 0.5379, "step": 4730 }, { "epoch": 0.255301926501538, "grad_norm": 1.1611418906501025, "learning_rate": 8.628540936583632e-06, "loss": 0.6365, "step": 4731 }, { "epoch": 0.25535589013005233, "grad_norm": 0.8567499291371931, "learning_rate": 8.62799253489288e-06, "loss": 0.5439, "step": 4732 }, { "epoch": 0.25540985375856673, "grad_norm": 1.3126388618857745, "learning_rate": 8.627444043300463e-06, "loss": 0.6789, "step": 4733 }, { "epoch": 0.2554638173870811, "grad_norm": 0.9697156031059926, "learning_rate": 8.626895461822146e-06, "loss": 0.4409, "step": 4734 }, { "epoch": 0.25551778101559547, "grad_norm": 1.2984711627583192, "learning_rate": 8.626346790473693e-06, "loss": 0.4832, "step": 4735 }, { "epoch": 0.25557174464410987, "grad_norm": 1.2373761000460908, "learning_rate": 8.625798029270873e-06, "loss": 0.6429, "step": 4736 }, { "epoch": 0.25562570827262426, "grad_norm": 1.0110518775283, "learning_rate": 8.62524917822946e-06, "loss": 0.418, "step": 4737 }, { "epoch": 0.2556796719011386, "grad_norm": 0.9960210432015533, "learning_rate": 8.624700237365226e-06, "loss": 0.4677, "step": 4738 }, { "epoch": 0.255733635529653, "grad_norm": 0.8954507144754973, "learning_rate": 8.624151206693953e-06, "loss": 0.3791, "step": 4739 }, { "epoch": 0.2557875991581674, "grad_norm": 1.0543805923328142, "learning_rate": 8.623602086231417e-06, "loss": 0.4617, "step": 4740 }, { "epoch": 0.2558415627866818, "grad_norm": 1.2261070394313822, "learning_rate": 8.6230528759934e-06, "loss": 0.5016, "step": 4741 }, { "epoch": 0.25589552641519614, "grad_norm": 1.193618167482218, "learning_rate": 8.622503575995686e-06, "loss": 0.6098, "step": 4742 }, { "epoch": 0.25594949004371054, "grad_norm": 1.1007036449198757, "learning_rate": 8.621954186254067e-06, "loss": 0.5304, "step": 4743 }, { "epoch": 0.25600345367222493, "grad_norm": 1.1212114280309335, "learning_rate": 8.621404706784328e-06, "loss": 0.5112, "step": 4744 }, { "epoch": 0.2560574173007393, "grad_norm": 1.0832913545722138, "learning_rate": 8.620855137602267e-06, "loss": 0.5702, "step": 4745 }, { "epoch": 0.2561113809292537, "grad_norm": 1.3125030567538019, "learning_rate": 8.620305478723674e-06, "loss": 0.642, "step": 4746 }, { "epoch": 0.25616534455776807, "grad_norm": 1.1424828112775594, "learning_rate": 8.619755730164346e-06, "loss": 0.6617, "step": 4747 }, { "epoch": 0.25621930818628247, "grad_norm": 0.9609140987008827, "learning_rate": 8.619205891940089e-06, "loss": 0.6349, "step": 4748 }, { "epoch": 0.2562732718147968, "grad_norm": 1.0533856788269076, "learning_rate": 8.618655964066701e-06, "loss": 0.5189, "step": 4749 }, { "epoch": 0.2563272354433112, "grad_norm": 0.9012173701100783, "learning_rate": 8.618105946559993e-06, "loss": 0.4672, "step": 4750 }, { "epoch": 0.2563811990718256, "grad_norm": 1.0927629992181256, "learning_rate": 8.617555839435767e-06, "loss": 0.5131, "step": 4751 }, { "epoch": 0.25643516270033995, "grad_norm": 1.1157358564048347, "learning_rate": 8.617005642709836e-06, "loss": 0.5583, "step": 4752 }, { "epoch": 0.25648912632885434, "grad_norm": 1.3540776232728797, "learning_rate": 8.616455356398015e-06, "loss": 0.6633, "step": 4753 }, { "epoch": 0.25654308995736874, "grad_norm": 1.0385672344907089, "learning_rate": 8.615904980516116e-06, "loss": 0.6293, "step": 4754 }, { "epoch": 0.25659705358588314, "grad_norm": 1.13875050764832, "learning_rate": 8.61535451507996e-06, "loss": 0.509, "step": 4755 }, { "epoch": 0.2566510172143975, "grad_norm": 1.1093511562642127, "learning_rate": 8.614803960105368e-06, "loss": 0.5437, "step": 4756 }, { "epoch": 0.2567049808429119, "grad_norm": 1.0687098996725628, "learning_rate": 8.614253315608164e-06, "loss": 0.4725, "step": 4757 }, { "epoch": 0.2567589444714263, "grad_norm": 0.902088830126422, "learning_rate": 8.61370258160417e-06, "loss": 0.3781, "step": 4758 }, { "epoch": 0.2568129080999406, "grad_norm": 1.0420752523569332, "learning_rate": 8.613151758109221e-06, "loss": 0.4699, "step": 4759 }, { "epoch": 0.256866871728455, "grad_norm": 0.9266623243198918, "learning_rate": 8.612600845139144e-06, "loss": 0.4729, "step": 4760 }, { "epoch": 0.2569208353569694, "grad_norm": 1.180925513821991, "learning_rate": 8.612049842709771e-06, "loss": 0.5895, "step": 4761 }, { "epoch": 0.2569747989854838, "grad_norm": 1.237327064292216, "learning_rate": 8.611498750836942e-06, "loss": 0.5759, "step": 4762 }, { "epoch": 0.25702876261399815, "grad_norm": 1.2130067591257077, "learning_rate": 8.610947569536498e-06, "loss": 0.5098, "step": 4763 }, { "epoch": 0.25708272624251255, "grad_norm": 1.066143716042951, "learning_rate": 8.610396298824275e-06, "loss": 0.6153, "step": 4764 }, { "epoch": 0.25713668987102695, "grad_norm": 0.7871548833746083, "learning_rate": 8.60984493871612e-06, "loss": 0.3356, "step": 4765 }, { "epoch": 0.2571906534995413, "grad_norm": 1.0239917491078074, "learning_rate": 8.609293489227878e-06, "loss": 0.4657, "step": 4766 }, { "epoch": 0.2572446171280557, "grad_norm": 1.1054470581966795, "learning_rate": 8.6087419503754e-06, "loss": 0.4631, "step": 4767 }, { "epoch": 0.2572985807565701, "grad_norm": 0.997525205037016, "learning_rate": 8.608190322174539e-06, "loss": 0.536, "step": 4768 }, { "epoch": 0.2573525443850845, "grad_norm": 1.0420573831908493, "learning_rate": 8.607638604641145e-06, "loss": 0.5231, "step": 4769 }, { "epoch": 0.2574065080135988, "grad_norm": 1.0683902052910277, "learning_rate": 8.60708679779108e-06, "loss": 0.5861, "step": 4770 }, { "epoch": 0.2574604716421132, "grad_norm": 0.8923409518475283, "learning_rate": 8.606534901640198e-06, "loss": 0.3813, "step": 4771 }, { "epoch": 0.2575144352706276, "grad_norm": 1.0450148437435072, "learning_rate": 8.605982916204366e-06, "loss": 0.4895, "step": 4772 }, { "epoch": 0.25756839889914196, "grad_norm": 0.8725860693353078, "learning_rate": 8.605430841499446e-06, "loss": 0.4524, "step": 4773 }, { "epoch": 0.25762236252765636, "grad_norm": 1.1051453773295155, "learning_rate": 8.604878677541307e-06, "loss": 0.5623, "step": 4774 }, { "epoch": 0.25767632615617075, "grad_norm": 1.158119773584772, "learning_rate": 8.604326424345817e-06, "loss": 0.4481, "step": 4775 }, { "epoch": 0.2577302897846851, "grad_norm": 1.2949127443596018, "learning_rate": 8.603774081928848e-06, "loss": 0.66, "step": 4776 }, { "epoch": 0.2577842534131995, "grad_norm": 1.2503810331559044, "learning_rate": 8.603221650306276e-06, "loss": 0.5866, "step": 4777 }, { "epoch": 0.2578382170417139, "grad_norm": 0.8804330828311822, "learning_rate": 8.602669129493979e-06, "loss": 0.3406, "step": 4778 }, { "epoch": 0.2578921806702283, "grad_norm": 1.0764854027936386, "learning_rate": 8.602116519507835e-06, "loss": 0.5813, "step": 4779 }, { "epoch": 0.25794614429874263, "grad_norm": 1.0327333828383412, "learning_rate": 8.601563820363728e-06, "loss": 0.5389, "step": 4780 }, { "epoch": 0.258000107927257, "grad_norm": 1.0117585824266941, "learning_rate": 8.601011032077544e-06, "loss": 0.4145, "step": 4781 }, { "epoch": 0.2580540715557714, "grad_norm": 0.9219203401855592, "learning_rate": 8.600458154665167e-06, "loss": 0.3668, "step": 4782 }, { "epoch": 0.25810803518428577, "grad_norm": 1.0590268948442758, "learning_rate": 8.599905188142491e-06, "loss": 0.6278, "step": 4783 }, { "epoch": 0.25816199881280016, "grad_norm": 1.1117320646419748, "learning_rate": 8.59935213252541e-06, "loss": 0.4273, "step": 4784 }, { "epoch": 0.25821596244131456, "grad_norm": 1.021085077853354, "learning_rate": 8.598798987829816e-06, "loss": 0.4567, "step": 4785 }, { "epoch": 0.25826992606982896, "grad_norm": 1.2565843972287152, "learning_rate": 8.598245754071606e-06, "loss": 0.6665, "step": 4786 }, { "epoch": 0.2583238896983433, "grad_norm": 1.0343178498932173, "learning_rate": 8.597692431266685e-06, "loss": 0.551, "step": 4787 }, { "epoch": 0.2583778533268577, "grad_norm": 0.877592881943391, "learning_rate": 8.597139019430954e-06, "loss": 0.3768, "step": 4788 }, { "epoch": 0.2584318169553721, "grad_norm": 0.9277158610719495, "learning_rate": 8.596585518580315e-06, "loss": 0.3807, "step": 4789 }, { "epoch": 0.25848578058388644, "grad_norm": 1.067034470540038, "learning_rate": 8.59603192873068e-06, "loss": 0.5563, "step": 4790 }, { "epoch": 0.25853974421240083, "grad_norm": 1.1430864644634144, "learning_rate": 8.595478249897961e-06, "loss": 0.5265, "step": 4791 }, { "epoch": 0.25859370784091523, "grad_norm": 0.9795650958628277, "learning_rate": 8.594924482098069e-06, "loss": 0.4726, "step": 4792 }, { "epoch": 0.25864767146942963, "grad_norm": 1.6141125782572077, "learning_rate": 8.59437062534692e-06, "loss": 0.6229, "step": 4793 }, { "epoch": 0.25870163509794397, "grad_norm": 1.09011336030748, "learning_rate": 8.59381667966043e-06, "loss": 0.5511, "step": 4794 }, { "epoch": 0.25875559872645837, "grad_norm": 0.962058958335601, "learning_rate": 8.593262645054525e-06, "loss": 0.4118, "step": 4795 }, { "epoch": 0.25880956235497277, "grad_norm": 1.0616623335983002, "learning_rate": 8.592708521545125e-06, "loss": 0.5242, "step": 4796 }, { "epoch": 0.2588635259834871, "grad_norm": 0.7732391387288949, "learning_rate": 8.592154309148159e-06, "loss": 0.4085, "step": 4797 }, { "epoch": 0.2589174896120015, "grad_norm": 1.0836440835062717, "learning_rate": 8.591600007879552e-06, "loss": 0.7179, "step": 4798 }, { "epoch": 0.2589714532405159, "grad_norm": 0.7692170737726116, "learning_rate": 8.591045617755238e-06, "loss": 0.329, "step": 4799 }, { "epoch": 0.2590254168690303, "grad_norm": 1.1732359820706788, "learning_rate": 8.590491138791148e-06, "loss": 0.6607, "step": 4800 }, { "epoch": 0.25907938049754464, "grad_norm": 1.0737690690773984, "learning_rate": 8.589936571003223e-06, "loss": 0.5566, "step": 4801 }, { "epoch": 0.25913334412605904, "grad_norm": 1.2648455408968318, "learning_rate": 8.589381914407397e-06, "loss": 0.6435, "step": 4802 }, { "epoch": 0.25918730775457344, "grad_norm": 1.1154547488812638, "learning_rate": 8.588827169019613e-06, "loss": 0.6349, "step": 4803 }, { "epoch": 0.2592412713830878, "grad_norm": 1.1333387201788943, "learning_rate": 8.588272334855814e-06, "loss": 0.5413, "step": 4804 }, { "epoch": 0.2592952350116022, "grad_norm": 2.2278715805878457, "learning_rate": 8.58771741193195e-06, "loss": 0.4389, "step": 4805 }, { "epoch": 0.2593491986401166, "grad_norm": 1.0932450822569113, "learning_rate": 8.587162400263968e-06, "loss": 0.547, "step": 4806 }, { "epoch": 0.2594031622686309, "grad_norm": 1.228222949934109, "learning_rate": 8.586607299867816e-06, "loss": 0.4077, "step": 4807 }, { "epoch": 0.2594571258971453, "grad_norm": 1.1545562886297172, "learning_rate": 8.586052110759452e-06, "loss": 0.5296, "step": 4808 }, { "epoch": 0.2595110895256597, "grad_norm": 1.3216791437503053, "learning_rate": 8.585496832954835e-06, "loss": 0.628, "step": 4809 }, { "epoch": 0.2595650531541741, "grad_norm": 1.0028265697163208, "learning_rate": 8.58494146646992e-06, "loss": 0.5562, "step": 4810 }, { "epoch": 0.25961901678268845, "grad_norm": 1.2758895808057582, "learning_rate": 8.584386011320669e-06, "loss": 0.4651, "step": 4811 }, { "epoch": 0.25967298041120285, "grad_norm": 1.1479876864300085, "learning_rate": 8.583830467523048e-06, "loss": 0.4849, "step": 4812 }, { "epoch": 0.25972694403971724, "grad_norm": 1.0633183976589873, "learning_rate": 8.583274835093024e-06, "loss": 0.5501, "step": 4813 }, { "epoch": 0.2597809076682316, "grad_norm": 1.1512892941996538, "learning_rate": 8.582719114046566e-06, "loss": 0.6046, "step": 4814 }, { "epoch": 0.259834871296746, "grad_norm": 1.2058438850158357, "learning_rate": 8.582163304399646e-06, "loss": 0.3819, "step": 4815 }, { "epoch": 0.2598888349252604, "grad_norm": 0.8939949582031429, "learning_rate": 8.581607406168238e-06, "loss": 0.3644, "step": 4816 }, { "epoch": 0.2599427985537748, "grad_norm": 1.031048229794335, "learning_rate": 8.581051419368319e-06, "loss": 0.4676, "step": 4817 }, { "epoch": 0.2599967621822891, "grad_norm": 0.8493255738632824, "learning_rate": 8.580495344015869e-06, "loss": 0.4403, "step": 4818 }, { "epoch": 0.2600507258108035, "grad_norm": 0.9576952536351344, "learning_rate": 8.57993918012687e-06, "loss": 0.3935, "step": 4819 }, { "epoch": 0.2601046894393179, "grad_norm": 1.0039936739113335, "learning_rate": 8.579382927717306e-06, "loss": 0.4725, "step": 4820 }, { "epoch": 0.26015865306783226, "grad_norm": 1.3889602150207494, "learning_rate": 8.578826586803168e-06, "loss": 0.525, "step": 4821 }, { "epoch": 0.26021261669634665, "grad_norm": 0.9748389840729356, "learning_rate": 8.578270157400441e-06, "loss": 0.4622, "step": 4822 }, { "epoch": 0.26026658032486105, "grad_norm": 1.0615486641873189, "learning_rate": 8.57771363952512e-06, "loss": 0.4935, "step": 4823 }, { "epoch": 0.26032054395337545, "grad_norm": 0.973205392136642, "learning_rate": 8.577157033193197e-06, "loss": 0.4625, "step": 4824 }, { "epoch": 0.2603745075818898, "grad_norm": 0.9057937014557815, "learning_rate": 8.576600338420676e-06, "loss": 0.4211, "step": 4825 }, { "epoch": 0.2604284712104042, "grad_norm": 1.0158299482663928, "learning_rate": 8.57604355522355e-06, "loss": 0.4789, "step": 4826 }, { "epoch": 0.2604824348389186, "grad_norm": 0.9249741783503165, "learning_rate": 8.575486683617823e-06, "loss": 0.3642, "step": 4827 }, { "epoch": 0.26053639846743293, "grad_norm": 0.8584313182640777, "learning_rate": 8.574929723619502e-06, "loss": 0.4919, "step": 4828 }, { "epoch": 0.2605903620959473, "grad_norm": 1.115599877706248, "learning_rate": 8.574372675244595e-06, "loss": 0.5763, "step": 4829 }, { "epoch": 0.2606443257244617, "grad_norm": 1.308816181793228, "learning_rate": 8.57381553850911e-06, "loss": 0.7704, "step": 4830 }, { "epoch": 0.2606982893529761, "grad_norm": 1.128873806299612, "learning_rate": 8.57325831342906e-06, "loss": 0.5451, "step": 4831 }, { "epoch": 0.26075225298149046, "grad_norm": 1.2670500738382617, "learning_rate": 8.572701000020462e-06, "loss": 0.5222, "step": 4832 }, { "epoch": 0.26080621661000486, "grad_norm": 1.1234382018255646, "learning_rate": 8.572143598299332e-06, "loss": 0.547, "step": 4833 }, { "epoch": 0.26086018023851926, "grad_norm": 1.125368324949549, "learning_rate": 8.57158610828169e-06, "loss": 0.5177, "step": 4834 }, { "epoch": 0.2609141438670336, "grad_norm": 1.072537057266257, "learning_rate": 8.571028529983561e-06, "loss": 0.5756, "step": 4835 }, { "epoch": 0.260968107495548, "grad_norm": 1.0296411196191309, "learning_rate": 8.570470863420968e-06, "loss": 0.4859, "step": 4836 }, { "epoch": 0.2610220711240624, "grad_norm": 0.9138756588227304, "learning_rate": 8.56991310860994e-06, "loss": 0.459, "step": 4837 }, { "epoch": 0.26107603475257674, "grad_norm": 0.9511752396065486, "learning_rate": 8.569355265566508e-06, "loss": 0.4063, "step": 4838 }, { "epoch": 0.26112999838109113, "grad_norm": 1.104098685877078, "learning_rate": 8.568797334306703e-06, "loss": 0.4338, "step": 4839 }, { "epoch": 0.26118396200960553, "grad_norm": 1.1786502798592364, "learning_rate": 8.568239314846563e-06, "loss": 0.4616, "step": 4840 }, { "epoch": 0.2612379256381199, "grad_norm": 1.244208070747673, "learning_rate": 8.567681207202124e-06, "loss": 0.4382, "step": 4841 }, { "epoch": 0.26129188926663427, "grad_norm": 1.0455108342913553, "learning_rate": 8.56712301138943e-06, "loss": 0.4487, "step": 4842 }, { "epoch": 0.26134585289514867, "grad_norm": 1.0602898811387886, "learning_rate": 8.566564727424518e-06, "loss": 0.4714, "step": 4843 }, { "epoch": 0.26139981652366306, "grad_norm": 1.083045654232036, "learning_rate": 8.566006355323439e-06, "loss": 0.5861, "step": 4844 }, { "epoch": 0.2614537801521774, "grad_norm": 0.9407909075359852, "learning_rate": 8.56544789510224e-06, "loss": 0.5161, "step": 4845 }, { "epoch": 0.2615077437806918, "grad_norm": 1.2272369157506438, "learning_rate": 8.56488934677697e-06, "loss": 0.6047, "step": 4846 }, { "epoch": 0.2615617074092062, "grad_norm": 0.7988983948925199, "learning_rate": 8.564330710363682e-06, "loss": 0.2992, "step": 4847 }, { "epoch": 0.2616156710377206, "grad_norm": 1.1346296053853278, "learning_rate": 8.563771985878437e-06, "loss": 0.54, "step": 4848 }, { "epoch": 0.26166963466623494, "grad_norm": 1.113461239886182, "learning_rate": 8.563213173337287e-06, "loss": 0.4907, "step": 4849 }, { "epoch": 0.26172359829474934, "grad_norm": 0.9232650349284862, "learning_rate": 8.562654272756294e-06, "loss": 0.463, "step": 4850 }, { "epoch": 0.26177756192326374, "grad_norm": 1.2781360042073813, "learning_rate": 8.562095284151524e-06, "loss": 0.5849, "step": 4851 }, { "epoch": 0.2618315255517781, "grad_norm": 1.103956884495305, "learning_rate": 8.561536207539041e-06, "loss": 0.5006, "step": 4852 }, { "epoch": 0.2618854891802925, "grad_norm": 1.1024423561607866, "learning_rate": 8.560977042934915e-06, "loss": 0.5186, "step": 4853 }, { "epoch": 0.26193945280880687, "grad_norm": 0.8615639776757269, "learning_rate": 8.560417790355215e-06, "loss": 0.4259, "step": 4854 }, { "epoch": 0.26199341643732127, "grad_norm": 1.050811366327978, "learning_rate": 8.559858449816016e-06, "loss": 0.4822, "step": 4855 }, { "epoch": 0.2620473800658356, "grad_norm": 0.9780810424185326, "learning_rate": 8.559299021333392e-06, "loss": 0.4573, "step": 4856 }, { "epoch": 0.26210134369435, "grad_norm": 1.0230735501133428, "learning_rate": 8.558739504923424e-06, "loss": 0.4432, "step": 4857 }, { "epoch": 0.2621553073228644, "grad_norm": 1.0485820287888148, "learning_rate": 8.558179900602192e-06, "loss": 0.4724, "step": 4858 }, { "epoch": 0.26220927095137875, "grad_norm": 1.0497026283300308, "learning_rate": 8.55762020838578e-06, "loss": 0.4598, "step": 4859 }, { "epoch": 0.26226323457989315, "grad_norm": 1.0007377310284702, "learning_rate": 8.557060428290274e-06, "loss": 0.415, "step": 4860 }, { "epoch": 0.26231719820840754, "grad_norm": 1.3460855932211668, "learning_rate": 8.556500560331761e-06, "loss": 0.5424, "step": 4861 }, { "epoch": 0.26237116183692194, "grad_norm": 1.158346629858631, "learning_rate": 8.555940604526334e-06, "loss": 0.5684, "step": 4862 }, { "epoch": 0.2624251254654363, "grad_norm": 0.9854158072195656, "learning_rate": 8.555380560890086e-06, "loss": 0.4751, "step": 4863 }, { "epoch": 0.2624790890939507, "grad_norm": 1.1011620411044138, "learning_rate": 8.554820429439114e-06, "loss": 0.4901, "step": 4864 }, { "epoch": 0.2625330527224651, "grad_norm": 1.0275198858264731, "learning_rate": 8.554260210189516e-06, "loss": 0.5032, "step": 4865 }, { "epoch": 0.2625870163509794, "grad_norm": 0.8822803516546975, "learning_rate": 8.553699903157394e-06, "loss": 0.4158, "step": 4866 }, { "epoch": 0.2626409799794938, "grad_norm": 0.7976567262389546, "learning_rate": 8.55313950835885e-06, "loss": 0.3918, "step": 4867 }, { "epoch": 0.2626949436080082, "grad_norm": 1.2190675538317952, "learning_rate": 8.552579025809991e-06, "loss": 0.6225, "step": 4868 }, { "epoch": 0.2627489072365226, "grad_norm": 0.9706520185451039, "learning_rate": 8.552018455526928e-06, "loss": 0.4973, "step": 4869 }, { "epoch": 0.26280287086503695, "grad_norm": 1.1317902498235322, "learning_rate": 8.551457797525772e-06, "loss": 0.5281, "step": 4870 }, { "epoch": 0.26285683449355135, "grad_norm": 0.8883466169361715, "learning_rate": 8.550897051822633e-06, "loss": 0.3693, "step": 4871 }, { "epoch": 0.26291079812206575, "grad_norm": 1.0641957214042368, "learning_rate": 8.550336218433631e-06, "loss": 0.5204, "step": 4872 }, { "epoch": 0.2629647617505801, "grad_norm": 1.1442668294680263, "learning_rate": 8.549775297374884e-06, "loss": 0.5386, "step": 4873 }, { "epoch": 0.2630187253790945, "grad_norm": 1.2115103196298687, "learning_rate": 8.549214288662514e-06, "loss": 0.6344, "step": 4874 }, { "epoch": 0.2630726890076089, "grad_norm": 0.8010888174486609, "learning_rate": 8.548653192312644e-06, "loss": 0.4189, "step": 4875 }, { "epoch": 0.2631266526361232, "grad_norm": 1.262382806077851, "learning_rate": 8.548092008341401e-06, "loss": 0.627, "step": 4876 }, { "epoch": 0.2631806162646376, "grad_norm": 1.0041215661238831, "learning_rate": 8.547530736764914e-06, "loss": 0.5993, "step": 4877 }, { "epoch": 0.263234579893152, "grad_norm": 0.8825581149445243, "learning_rate": 8.546969377599312e-06, "loss": 0.3771, "step": 4878 }, { "epoch": 0.2632885435216664, "grad_norm": 1.1668402127016324, "learning_rate": 8.546407930860733e-06, "loss": 0.498, "step": 4879 }, { "epoch": 0.26334250715018076, "grad_norm": 0.8934903740322111, "learning_rate": 8.545846396565311e-06, "loss": 0.4427, "step": 4880 }, { "epoch": 0.26339647077869516, "grad_norm": 1.3561539850324098, "learning_rate": 8.545284774729187e-06, "loss": 0.522, "step": 4881 }, { "epoch": 0.26345043440720955, "grad_norm": 1.002778313222618, "learning_rate": 8.544723065368502e-06, "loss": 0.5016, "step": 4882 }, { "epoch": 0.2635043980357239, "grad_norm": 1.0891355731923946, "learning_rate": 8.544161268499399e-06, "loss": 0.4248, "step": 4883 }, { "epoch": 0.2635583616642383, "grad_norm": 1.0929498516502842, "learning_rate": 8.543599384138025e-06, "loss": 0.5241, "step": 4884 }, { "epoch": 0.2636123252927527, "grad_norm": 0.9937245367859349, "learning_rate": 8.54303741230053e-06, "loss": 0.5438, "step": 4885 }, { "epoch": 0.2636662889212671, "grad_norm": 0.945441882544221, "learning_rate": 8.542475353003064e-06, "loss": 0.4658, "step": 4886 }, { "epoch": 0.26372025254978143, "grad_norm": 1.2119461221591072, "learning_rate": 8.541913206261783e-06, "loss": 0.507, "step": 4887 }, { "epoch": 0.26377421617829583, "grad_norm": 0.9805697269663831, "learning_rate": 8.541350972092843e-06, "loss": 0.423, "step": 4888 }, { "epoch": 0.2638281798068102, "grad_norm": 1.3733588413033384, "learning_rate": 8.540788650512401e-06, "loss": 0.4798, "step": 4889 }, { "epoch": 0.26388214343532457, "grad_norm": 0.9058825153051512, "learning_rate": 8.540226241536625e-06, "loss": 0.4633, "step": 4890 }, { "epoch": 0.26393610706383897, "grad_norm": 1.1885466969877745, "learning_rate": 8.539663745181671e-06, "loss": 0.4967, "step": 4891 }, { "epoch": 0.26399007069235336, "grad_norm": 1.3401219680281002, "learning_rate": 8.539101161463713e-06, "loss": 0.7331, "step": 4892 }, { "epoch": 0.26404403432086776, "grad_norm": 1.24458237950023, "learning_rate": 8.538538490398912e-06, "loss": 0.6674, "step": 4893 }, { "epoch": 0.2640979979493821, "grad_norm": 1.1992251325676055, "learning_rate": 8.537975732003447e-06, "loss": 0.5936, "step": 4894 }, { "epoch": 0.2641519615778965, "grad_norm": 1.1983170242143621, "learning_rate": 8.53741288629349e-06, "loss": 0.5659, "step": 4895 }, { "epoch": 0.2642059252064109, "grad_norm": 1.0960972870316203, "learning_rate": 8.53684995328522e-06, "loss": 0.5348, "step": 4896 }, { "epoch": 0.26425988883492524, "grad_norm": 1.1468152081448304, "learning_rate": 8.536286932994811e-06, "loss": 0.5875, "step": 4897 }, { "epoch": 0.26431385246343964, "grad_norm": 0.8837263576493469, "learning_rate": 8.535723825438447e-06, "loss": 0.3534, "step": 4898 }, { "epoch": 0.26436781609195403, "grad_norm": 1.083306136706758, "learning_rate": 8.535160630632312e-06, "loss": 0.5039, "step": 4899 }, { "epoch": 0.26442177972046843, "grad_norm": 0.8487465289892849, "learning_rate": 8.534597348592597e-06, "loss": 0.3825, "step": 4900 }, { "epoch": 0.2644757433489828, "grad_norm": 0.8917678155121717, "learning_rate": 8.534033979335487e-06, "loss": 0.4749, "step": 4901 }, { "epoch": 0.26452970697749717, "grad_norm": 1.063237531771167, "learning_rate": 8.533470522877174e-06, "loss": 0.425, "step": 4902 }, { "epoch": 0.26458367060601157, "grad_norm": 1.022171581995924, "learning_rate": 8.532906979233852e-06, "loss": 0.4709, "step": 4903 }, { "epoch": 0.2646376342345259, "grad_norm": 1.1751425070862658, "learning_rate": 8.532343348421721e-06, "loss": 0.598, "step": 4904 }, { "epoch": 0.2646915978630403, "grad_norm": 0.9413060174357715, "learning_rate": 8.531779630456978e-06, "loss": 0.4074, "step": 4905 }, { "epoch": 0.2647455614915547, "grad_norm": 1.0362937557240024, "learning_rate": 8.531215825355824e-06, "loss": 0.5182, "step": 4906 }, { "epoch": 0.26479952512006905, "grad_norm": 1.082178090386478, "learning_rate": 8.530651933134465e-06, "loss": 0.4217, "step": 4907 }, { "epoch": 0.26485348874858344, "grad_norm": 1.1357642393200462, "learning_rate": 8.530087953809107e-06, "loss": 0.5889, "step": 4908 }, { "epoch": 0.26490745237709784, "grad_norm": 0.7936709247551555, "learning_rate": 8.529523887395961e-06, "loss": 0.3453, "step": 4909 }, { "epoch": 0.26496141600561224, "grad_norm": 0.9507990614044645, "learning_rate": 8.528959733911235e-06, "loss": 0.4072, "step": 4910 }, { "epoch": 0.2650153796341266, "grad_norm": 0.8993053600341796, "learning_rate": 8.528395493371146e-06, "loss": 0.3692, "step": 4911 }, { "epoch": 0.265069343262641, "grad_norm": 1.114215692651555, "learning_rate": 8.527831165791912e-06, "loss": 0.5838, "step": 4912 }, { "epoch": 0.2651233068911554, "grad_norm": 1.039676739180002, "learning_rate": 8.52726675118975e-06, "loss": 0.5503, "step": 4913 }, { "epoch": 0.2651772705196697, "grad_norm": 0.9297622857246778, "learning_rate": 8.526702249580883e-06, "loss": 0.4542, "step": 4914 }, { "epoch": 0.2652312341481841, "grad_norm": 1.0250481983396156, "learning_rate": 8.526137660981534e-06, "loss": 0.4836, "step": 4915 }, { "epoch": 0.2652851977766985, "grad_norm": 1.0766404804137226, "learning_rate": 8.525572985407933e-06, "loss": 0.5208, "step": 4916 }, { "epoch": 0.2653391614052129, "grad_norm": 0.9764260789777678, "learning_rate": 8.525008222876306e-06, "loss": 0.391, "step": 4917 }, { "epoch": 0.26539312503372725, "grad_norm": 0.9706112110198597, "learning_rate": 8.524443373402886e-06, "loss": 0.5071, "step": 4918 }, { "epoch": 0.26544708866224165, "grad_norm": 1.0054135618310376, "learning_rate": 8.52387843700391e-06, "loss": 0.3903, "step": 4919 }, { "epoch": 0.26550105229075605, "grad_norm": 1.0689764702778293, "learning_rate": 8.523313413695607e-06, "loss": 0.5215, "step": 4920 }, { "epoch": 0.2655550159192704, "grad_norm": 1.1136459293285068, "learning_rate": 8.522748303494224e-06, "loss": 0.4378, "step": 4921 }, { "epoch": 0.2656089795477848, "grad_norm": 1.0620473495105778, "learning_rate": 8.522183106416003e-06, "loss": 0.514, "step": 4922 }, { "epoch": 0.2656629431762992, "grad_norm": 1.0800005742134127, "learning_rate": 8.521617822477184e-06, "loss": 0.5729, "step": 4923 }, { "epoch": 0.2657169068048136, "grad_norm": 1.29501546969231, "learning_rate": 8.521052451694016e-06, "loss": 0.6134, "step": 4924 }, { "epoch": 0.2657708704333279, "grad_norm": 1.0374424261155721, "learning_rate": 8.520486994082747e-06, "loss": 0.4423, "step": 4925 }, { "epoch": 0.2658248340618423, "grad_norm": 0.9416477634223471, "learning_rate": 8.519921449659631e-06, "loss": 0.4271, "step": 4926 }, { "epoch": 0.2658787976903567, "grad_norm": 1.0619787232161133, "learning_rate": 8.51935581844092e-06, "loss": 0.5613, "step": 4927 }, { "epoch": 0.26593276131887106, "grad_norm": 1.261604275249425, "learning_rate": 8.518790100442873e-06, "loss": 0.538, "step": 4928 }, { "epoch": 0.26598672494738546, "grad_norm": 0.8690823222648841, "learning_rate": 8.518224295681748e-06, "loss": 0.4226, "step": 4929 }, { "epoch": 0.26604068857589985, "grad_norm": 1.1368489490418927, "learning_rate": 8.517658404173807e-06, "loss": 0.5564, "step": 4930 }, { "epoch": 0.26609465220441425, "grad_norm": 0.8633800490488659, "learning_rate": 8.517092425935313e-06, "loss": 0.2955, "step": 4931 }, { "epoch": 0.2661486158329286, "grad_norm": 0.9210270128927458, "learning_rate": 8.516526360982535e-06, "loss": 0.4041, "step": 4932 }, { "epoch": 0.266202579461443, "grad_norm": 1.0228828297817194, "learning_rate": 8.515960209331743e-06, "loss": 0.4931, "step": 4933 }, { "epoch": 0.2662565430899574, "grad_norm": 1.1669014263107473, "learning_rate": 8.515393970999206e-06, "loss": 0.6052, "step": 4934 }, { "epoch": 0.26631050671847173, "grad_norm": 1.2023129409372522, "learning_rate": 8.5148276460012e-06, "loss": 0.573, "step": 4935 }, { "epoch": 0.2663644703469861, "grad_norm": 0.867230737750647, "learning_rate": 8.514261234354e-06, "loss": 0.4794, "step": 4936 }, { "epoch": 0.2664184339755005, "grad_norm": 1.0197875658726432, "learning_rate": 8.513694736073886e-06, "loss": 0.5603, "step": 4937 }, { "epoch": 0.2664723976040149, "grad_norm": 1.4296621744182947, "learning_rate": 8.513128151177141e-06, "loss": 0.6773, "step": 4938 }, { "epoch": 0.26652636123252926, "grad_norm": 0.9917595113619254, "learning_rate": 8.512561479680049e-06, "loss": 0.4913, "step": 4939 }, { "epoch": 0.26658032486104366, "grad_norm": 1.0137842238279766, "learning_rate": 8.511994721598897e-06, "loss": 0.5145, "step": 4940 }, { "epoch": 0.26663428848955806, "grad_norm": 1.1369499223294355, "learning_rate": 8.511427876949971e-06, "loss": 0.4622, "step": 4941 }, { "epoch": 0.2666882521180724, "grad_norm": 1.047667903117249, "learning_rate": 8.510860945749566e-06, "loss": 0.4462, "step": 4942 }, { "epoch": 0.2667422157465868, "grad_norm": 1.132218455917715, "learning_rate": 8.510293928013976e-06, "loss": 0.6244, "step": 4943 }, { "epoch": 0.2667961793751012, "grad_norm": 0.9104460672372381, "learning_rate": 8.509726823759496e-06, "loss": 0.4047, "step": 4944 }, { "epoch": 0.26685014300361554, "grad_norm": 1.0510498449981627, "learning_rate": 8.509159633002427e-06, "loss": 0.5043, "step": 4945 }, { "epoch": 0.26690410663212993, "grad_norm": 1.2238303298889028, "learning_rate": 8.508592355759067e-06, "loss": 0.7582, "step": 4946 }, { "epoch": 0.26695807026064433, "grad_norm": 1.1010395345598394, "learning_rate": 8.508024992045724e-06, "loss": 0.6039, "step": 4947 }, { "epoch": 0.26701203388915873, "grad_norm": 0.8863813520161247, "learning_rate": 8.507457541878701e-06, "loss": 0.4679, "step": 4948 }, { "epoch": 0.26706599751767307, "grad_norm": 0.889051889445057, "learning_rate": 8.506890005274314e-06, "loss": 0.3708, "step": 4949 }, { "epoch": 0.26711996114618747, "grad_norm": 1.104902030318065, "learning_rate": 8.506322382248868e-06, "loss": 0.4639, "step": 4950 }, { "epoch": 0.26717392477470187, "grad_norm": 1.043100441457956, "learning_rate": 8.50575467281868e-06, "loss": 0.4699, "step": 4951 }, { "epoch": 0.2672278884032162, "grad_norm": 1.273542753520947, "learning_rate": 8.505186877000064e-06, "loss": 0.6534, "step": 4952 }, { "epoch": 0.2672818520317306, "grad_norm": 1.0827495330451984, "learning_rate": 8.504618994809342e-06, "loss": 0.4986, "step": 4953 }, { "epoch": 0.267335815660245, "grad_norm": 1.0283460024200892, "learning_rate": 8.504051026262832e-06, "loss": 0.4184, "step": 4954 }, { "epoch": 0.2673897792887594, "grad_norm": 0.8188646051556691, "learning_rate": 8.503482971376862e-06, "loss": 0.3102, "step": 4955 }, { "epoch": 0.26744374291727374, "grad_norm": 1.2076085310113696, "learning_rate": 8.502914830167757e-06, "loss": 0.5662, "step": 4956 }, { "epoch": 0.26749770654578814, "grad_norm": 1.3076179903454865, "learning_rate": 8.502346602651844e-06, "loss": 0.5597, "step": 4957 }, { "epoch": 0.26755167017430254, "grad_norm": 1.0339943961687594, "learning_rate": 8.501778288845459e-06, "loss": 0.5177, "step": 4958 }, { "epoch": 0.2676056338028169, "grad_norm": 1.1157164637839228, "learning_rate": 8.501209888764928e-06, "loss": 0.6153, "step": 4959 }, { "epoch": 0.2676595974313313, "grad_norm": 1.400569285351183, "learning_rate": 8.500641402426597e-06, "loss": 0.572, "step": 4960 }, { "epoch": 0.2677135610598457, "grad_norm": 0.886495899557425, "learning_rate": 8.500072829846796e-06, "loss": 0.4799, "step": 4961 }, { "epoch": 0.26776752468836007, "grad_norm": 1.0416668842599197, "learning_rate": 8.499504171041873e-06, "loss": 0.5245, "step": 4962 }, { "epoch": 0.2678214883168744, "grad_norm": 0.9161240387089902, "learning_rate": 8.49893542602817e-06, "loss": 0.4196, "step": 4963 }, { "epoch": 0.2678754519453888, "grad_norm": 0.7864410285310075, "learning_rate": 8.498366594822031e-06, "loss": 0.3162, "step": 4964 }, { "epoch": 0.2679294155739032, "grad_norm": 0.8805952860698942, "learning_rate": 8.497797677439808e-06, "loss": 0.3687, "step": 4965 }, { "epoch": 0.26798337920241755, "grad_norm": 1.1513061744265791, "learning_rate": 8.49722867389785e-06, "loss": 0.4061, "step": 4966 }, { "epoch": 0.26803734283093195, "grad_norm": 1.1342884541900962, "learning_rate": 8.496659584212512e-06, "loss": 0.506, "step": 4967 }, { "epoch": 0.26809130645944634, "grad_norm": 1.038042241661417, "learning_rate": 8.496090408400148e-06, "loss": 0.3968, "step": 4968 }, { "epoch": 0.26814527008796074, "grad_norm": 1.1320287049013158, "learning_rate": 8.495521146477119e-06, "loss": 0.4365, "step": 4969 }, { "epoch": 0.2681992337164751, "grad_norm": 0.8811255769842016, "learning_rate": 8.494951798459785e-06, "loss": 0.5295, "step": 4970 }, { "epoch": 0.2682531973449895, "grad_norm": 1.0109772481397439, "learning_rate": 8.494382364364512e-06, "loss": 0.4931, "step": 4971 }, { "epoch": 0.2683071609735039, "grad_norm": 1.0770758110378873, "learning_rate": 8.493812844207663e-06, "loss": 0.4884, "step": 4972 }, { "epoch": 0.2683611246020182, "grad_norm": 0.9012339892556664, "learning_rate": 8.493243238005607e-06, "loss": 0.4217, "step": 4973 }, { "epoch": 0.2684150882305326, "grad_norm": 0.987062628624061, "learning_rate": 8.492673545774718e-06, "loss": 0.5145, "step": 4974 }, { "epoch": 0.268469051859047, "grad_norm": 0.9595980027413639, "learning_rate": 8.492103767531366e-06, "loss": 0.4872, "step": 4975 }, { "epoch": 0.26852301548756136, "grad_norm": 0.8470756611748547, "learning_rate": 8.491533903291929e-06, "loss": 0.3724, "step": 4976 }, { "epoch": 0.26857697911607575, "grad_norm": 0.9357610969520691, "learning_rate": 8.490963953072784e-06, "loss": 0.4369, "step": 4977 }, { "epoch": 0.26863094274459015, "grad_norm": 1.100253710923987, "learning_rate": 8.490393916890313e-06, "loss": 0.4602, "step": 4978 }, { "epoch": 0.26868490637310455, "grad_norm": 0.9279529459673218, "learning_rate": 8.489823794760898e-06, "loss": 0.384, "step": 4979 }, { "epoch": 0.2687388700016189, "grad_norm": 1.3728200765506169, "learning_rate": 8.48925358670093e-06, "loss": 0.4891, "step": 4980 }, { "epoch": 0.2687928336301333, "grad_norm": 0.8957614056233371, "learning_rate": 8.488683292726791e-06, "loss": 0.4558, "step": 4981 }, { "epoch": 0.2688467972586477, "grad_norm": 0.727349194462971, "learning_rate": 8.488112912854875e-06, "loss": 0.3004, "step": 4982 }, { "epoch": 0.268900760887162, "grad_norm": 1.1094459118875597, "learning_rate": 8.487542447101574e-06, "loss": 0.5764, "step": 4983 }, { "epoch": 0.2689547245156764, "grad_norm": 0.8575342359738434, "learning_rate": 8.486971895483285e-06, "loss": 0.3176, "step": 4984 }, { "epoch": 0.2690086881441908, "grad_norm": 1.016538916087091, "learning_rate": 8.486401258016404e-06, "loss": 0.6115, "step": 4985 }, { "epoch": 0.2690626517727052, "grad_norm": 0.9891547486858964, "learning_rate": 8.485830534717334e-06, "loss": 0.5239, "step": 4986 }, { "epoch": 0.26911661540121956, "grad_norm": 1.0167122264955455, "learning_rate": 8.485259725602478e-06, "loss": 0.4932, "step": 4987 }, { "epoch": 0.26917057902973396, "grad_norm": 1.1717447412479178, "learning_rate": 8.484688830688243e-06, "loss": 0.4245, "step": 4988 }, { "epoch": 0.26922454265824836, "grad_norm": 1.2847463695021393, "learning_rate": 8.48411784999103e-06, "loss": 0.5168, "step": 4989 }, { "epoch": 0.2692785062867627, "grad_norm": 1.3156048681390915, "learning_rate": 8.48354678352726e-06, "loss": 0.498, "step": 4990 }, { "epoch": 0.2693324699152771, "grad_norm": 1.0090518804458701, "learning_rate": 8.48297563131334e-06, "loss": 0.4793, "step": 4991 }, { "epoch": 0.2693864335437915, "grad_norm": 1.0475593233037548, "learning_rate": 8.482404393365685e-06, "loss": 0.5675, "step": 4992 }, { "epoch": 0.2694403971723059, "grad_norm": 1.0197142873335316, "learning_rate": 8.481833069700714e-06, "loss": 0.5549, "step": 4993 }, { "epoch": 0.26949436080082023, "grad_norm": 0.9922306095291865, "learning_rate": 8.481261660334849e-06, "loss": 0.405, "step": 4994 }, { "epoch": 0.26954832442933463, "grad_norm": 0.7935425579329741, "learning_rate": 8.48069016528451e-06, "loss": 0.344, "step": 4995 }, { "epoch": 0.269602288057849, "grad_norm": 0.9424635511999723, "learning_rate": 8.480118584566124e-06, "loss": 0.3767, "step": 4996 }, { "epoch": 0.26965625168636337, "grad_norm": 1.1325714535020714, "learning_rate": 8.479546918196119e-06, "loss": 0.5869, "step": 4997 }, { "epoch": 0.26971021531487777, "grad_norm": 1.0151959178975154, "learning_rate": 8.478975166190925e-06, "loss": 0.4681, "step": 4998 }, { "epoch": 0.26976417894339216, "grad_norm": 1.0493906335144532, "learning_rate": 8.478403328566974e-06, "loss": 0.4868, "step": 4999 }, { "epoch": 0.26981814257190656, "grad_norm": 1.0784442292445506, "learning_rate": 8.477831405340704e-06, "loss": 0.4453, "step": 5000 }, { "epoch": 0.26981814257190656, "eval_loss": 0.574369490146637, "eval_runtime": 160.0872, "eval_samples_per_second": 21.482, "eval_steps_per_second": 0.9, "step": 5000 }, { "epoch": 0.2698721062004209, "grad_norm": 1.0659093442740304, "learning_rate": 8.477259396528548e-06, "loss": 0.4954, "step": 5001 }, { "epoch": 0.2699260698289353, "grad_norm": 1.0070598494129597, "learning_rate": 8.47668730214695e-06, "loss": 0.4619, "step": 5002 }, { "epoch": 0.2699800334574497, "grad_norm": 1.2496494051013718, "learning_rate": 8.476115122212352e-06, "loss": 0.604, "step": 5003 }, { "epoch": 0.27003399708596404, "grad_norm": 0.9105106750094834, "learning_rate": 8.475542856741197e-06, "loss": 0.6233, "step": 5004 }, { "epoch": 0.27008796071447844, "grad_norm": 1.0788889917140985, "learning_rate": 8.474970505749934e-06, "loss": 0.4729, "step": 5005 }, { "epoch": 0.27014192434299283, "grad_norm": 1.057682448366313, "learning_rate": 8.474398069255012e-06, "loss": 0.5027, "step": 5006 }, { "epoch": 0.27019588797150723, "grad_norm": 1.0392111686279433, "learning_rate": 8.473825547272887e-06, "loss": 0.4613, "step": 5007 }, { "epoch": 0.2702498516000216, "grad_norm": 1.090733174890004, "learning_rate": 8.473252939820007e-06, "loss": 0.563, "step": 5008 }, { "epoch": 0.27030381522853597, "grad_norm": 1.1095414170467692, "learning_rate": 8.472680246912836e-06, "loss": 0.5478, "step": 5009 }, { "epoch": 0.27035777885705037, "grad_norm": 0.7964808135127515, "learning_rate": 8.472107468567829e-06, "loss": 0.399, "step": 5010 }, { "epoch": 0.2704117424855647, "grad_norm": 1.184984786508031, "learning_rate": 8.471534604801451e-06, "loss": 0.5999, "step": 5011 }, { "epoch": 0.2704657061140791, "grad_norm": 1.0160100527368403, "learning_rate": 8.470961655630166e-06, "loss": 0.4221, "step": 5012 }, { "epoch": 0.2705196697425935, "grad_norm": 1.1686602202508165, "learning_rate": 8.47038862107044e-06, "loss": 0.5626, "step": 5013 }, { "epoch": 0.27057363337110785, "grad_norm": 1.2751461168384453, "learning_rate": 8.469815501138747e-06, "loss": 0.5128, "step": 5014 }, { "epoch": 0.27062759699962224, "grad_norm": 1.0325140761141676, "learning_rate": 8.469242295851553e-06, "loss": 0.4823, "step": 5015 }, { "epoch": 0.27068156062813664, "grad_norm": 0.9454161180952948, "learning_rate": 8.468669005225335e-06, "loss": 0.6885, "step": 5016 }, { "epoch": 0.27073552425665104, "grad_norm": 0.9153203827656389, "learning_rate": 8.468095629276571e-06, "loss": 0.4928, "step": 5017 }, { "epoch": 0.2707894878851654, "grad_norm": 0.8143847071242002, "learning_rate": 8.467522168021741e-06, "loss": 0.3772, "step": 5018 }, { "epoch": 0.2708434515136798, "grad_norm": 1.1255830015338906, "learning_rate": 8.466948621477323e-06, "loss": 0.4239, "step": 5019 }, { "epoch": 0.2708974151421942, "grad_norm": 0.9930946754694855, "learning_rate": 8.466374989659805e-06, "loss": 0.5661, "step": 5020 }, { "epoch": 0.2709513787707085, "grad_norm": 1.061281546571973, "learning_rate": 8.465801272585672e-06, "loss": 0.493, "step": 5021 }, { "epoch": 0.2710053423992229, "grad_norm": 1.116821178570163, "learning_rate": 8.465227470271414e-06, "loss": 0.673, "step": 5022 }, { "epoch": 0.2710593060277373, "grad_norm": 0.8850789010441871, "learning_rate": 8.464653582733521e-06, "loss": 0.4216, "step": 5023 }, { "epoch": 0.2711132696562517, "grad_norm": 1.2158504254998508, "learning_rate": 8.464079609988492e-06, "loss": 0.6665, "step": 5024 }, { "epoch": 0.27116723328476605, "grad_norm": 1.1467165349665343, "learning_rate": 8.463505552052819e-06, "loss": 0.5846, "step": 5025 }, { "epoch": 0.27122119691328045, "grad_norm": 1.1285256474554934, "learning_rate": 8.462931408942999e-06, "loss": 0.4812, "step": 5026 }, { "epoch": 0.27127516054179485, "grad_norm": 1.0025213875598828, "learning_rate": 8.462357180675537e-06, "loss": 0.4575, "step": 5027 }, { "epoch": 0.2713291241703092, "grad_norm": 1.0703177228345229, "learning_rate": 8.46178286726694e-06, "loss": 0.5122, "step": 5028 }, { "epoch": 0.2713830877988236, "grad_norm": 0.911516789226813, "learning_rate": 8.461208468733708e-06, "loss": 0.3802, "step": 5029 }, { "epoch": 0.271437051427338, "grad_norm": 1.0006674963119548, "learning_rate": 8.460633985092352e-06, "loss": 0.5366, "step": 5030 }, { "epoch": 0.2714910150558524, "grad_norm": 1.1789945161060549, "learning_rate": 8.460059416359386e-06, "loss": 0.4999, "step": 5031 }, { "epoch": 0.2715449786843667, "grad_norm": 1.0395320282108502, "learning_rate": 8.45948476255132e-06, "loss": 0.3344, "step": 5032 }, { "epoch": 0.2715989423128811, "grad_norm": 0.9791130688822701, "learning_rate": 8.458910023684673e-06, "loss": 0.4367, "step": 5033 }, { "epoch": 0.2716529059413955, "grad_norm": 0.9840433360157309, "learning_rate": 8.45833519977596e-06, "loss": 0.3535, "step": 5034 }, { "epoch": 0.27170686956990986, "grad_norm": 0.9739610477492691, "learning_rate": 8.457760290841705e-06, "loss": 0.4588, "step": 5035 }, { "epoch": 0.27176083319842426, "grad_norm": 1.2253675091163532, "learning_rate": 8.457185296898431e-06, "loss": 0.6463, "step": 5036 }, { "epoch": 0.27181479682693865, "grad_norm": 1.1277861928319362, "learning_rate": 8.456610217962662e-06, "loss": 0.4692, "step": 5037 }, { "epoch": 0.27186876045545305, "grad_norm": 1.143112260972578, "learning_rate": 8.45603505405093e-06, "loss": 0.5268, "step": 5038 }, { "epoch": 0.2719227240839674, "grad_norm": 1.0815323573744626, "learning_rate": 8.455459805179764e-06, "loss": 0.5321, "step": 5039 }, { "epoch": 0.2719766877124818, "grad_norm": 1.197674826231189, "learning_rate": 8.454884471365694e-06, "loss": 0.6476, "step": 5040 }, { "epoch": 0.2720306513409962, "grad_norm": 0.9576522395300567, "learning_rate": 8.45430905262526e-06, "loss": 0.5474, "step": 5041 }, { "epoch": 0.27208461496951053, "grad_norm": 0.9074578102020335, "learning_rate": 8.453733548975001e-06, "loss": 0.4108, "step": 5042 }, { "epoch": 0.2721385785980249, "grad_norm": 0.964708093340206, "learning_rate": 8.453157960431453e-06, "loss": 0.451, "step": 5043 }, { "epoch": 0.2721925422265393, "grad_norm": 1.1017540635767715, "learning_rate": 8.452582287011161e-06, "loss": 0.739, "step": 5044 }, { "epoch": 0.27224650585505367, "grad_norm": 1.3695125860384991, "learning_rate": 8.452006528730672e-06, "loss": 0.6403, "step": 5045 }, { "epoch": 0.27230046948356806, "grad_norm": 1.146474040394263, "learning_rate": 8.451430685606532e-06, "loss": 0.5609, "step": 5046 }, { "epoch": 0.27235443311208246, "grad_norm": 0.8505454320237756, "learning_rate": 8.450854757655291e-06, "loss": 0.3811, "step": 5047 }, { "epoch": 0.27240839674059686, "grad_norm": 0.9784521947218713, "learning_rate": 8.450278744893504e-06, "loss": 0.4416, "step": 5048 }, { "epoch": 0.2724623603691112, "grad_norm": 1.295507548389036, "learning_rate": 8.449702647337724e-06, "loss": 0.6001, "step": 5049 }, { "epoch": 0.2725163239976256, "grad_norm": 0.9381827749944345, "learning_rate": 8.44912646500451e-06, "loss": 0.4663, "step": 5050 }, { "epoch": 0.27257028762614, "grad_norm": 0.7350919808304873, "learning_rate": 8.448550197910421e-06, "loss": 0.3603, "step": 5051 }, { "epoch": 0.27262425125465434, "grad_norm": 1.126397535695852, "learning_rate": 8.447973846072021e-06, "loss": 0.6336, "step": 5052 }, { "epoch": 0.27267821488316873, "grad_norm": 0.9048991170752444, "learning_rate": 8.447397409505874e-06, "loss": 0.4232, "step": 5053 }, { "epoch": 0.27273217851168313, "grad_norm": 1.1225133092607755, "learning_rate": 8.446820888228547e-06, "loss": 0.596, "step": 5054 }, { "epoch": 0.27278614214019753, "grad_norm": 1.2472138112996929, "learning_rate": 8.446244282256609e-06, "loss": 0.5805, "step": 5055 }, { "epoch": 0.27284010576871187, "grad_norm": 0.9917401274010875, "learning_rate": 8.445667591606634e-06, "loss": 0.4142, "step": 5056 }, { "epoch": 0.27289406939722627, "grad_norm": 0.902867282072962, "learning_rate": 8.445090816295195e-06, "loss": 0.4012, "step": 5057 }, { "epoch": 0.27294803302574067, "grad_norm": 1.1800769062465017, "learning_rate": 8.44451395633887e-06, "loss": 0.7055, "step": 5058 }, { "epoch": 0.273001996654255, "grad_norm": 1.10792888884941, "learning_rate": 8.443937011754242e-06, "loss": 0.4622, "step": 5059 }, { "epoch": 0.2730559602827694, "grad_norm": 1.173149378108911, "learning_rate": 8.443359982557887e-06, "loss": 0.5471, "step": 5060 }, { "epoch": 0.2731099239112838, "grad_norm": 1.0168270021926529, "learning_rate": 8.442782868766394e-06, "loss": 0.433, "step": 5061 }, { "epoch": 0.2731638875397982, "grad_norm": 1.1966712446255392, "learning_rate": 8.442205670396344e-06, "loss": 0.4853, "step": 5062 }, { "epoch": 0.27321785116831254, "grad_norm": 1.0627865811473995, "learning_rate": 8.441628387464335e-06, "loss": 0.5376, "step": 5063 }, { "epoch": 0.27327181479682694, "grad_norm": 0.9919371942135345, "learning_rate": 8.44105101998695e-06, "loss": 0.4975, "step": 5064 }, { "epoch": 0.27332577842534134, "grad_norm": 1.1146194685456778, "learning_rate": 8.44047356798079e-06, "loss": 0.6138, "step": 5065 }, { "epoch": 0.2733797420538557, "grad_norm": 1.168554625383551, "learning_rate": 8.439896031462449e-06, "loss": 0.4881, "step": 5066 }, { "epoch": 0.2734337056823701, "grad_norm": 0.8791749401725523, "learning_rate": 8.439318410448522e-06, "loss": 0.4225, "step": 5067 }, { "epoch": 0.2734876693108845, "grad_norm": 1.2825232490679803, "learning_rate": 8.438740704955615e-06, "loss": 0.6178, "step": 5068 }, { "epoch": 0.27354163293939887, "grad_norm": 1.3052020244565279, "learning_rate": 8.43816291500033e-06, "loss": 0.5441, "step": 5069 }, { "epoch": 0.2735955965679132, "grad_norm": 0.9120281730842009, "learning_rate": 8.437585040599276e-06, "loss": 0.4658, "step": 5070 }, { "epoch": 0.2736495601964276, "grad_norm": 0.9253218427223191, "learning_rate": 8.437007081769059e-06, "loss": 0.4107, "step": 5071 }, { "epoch": 0.273703523824942, "grad_norm": 0.99747864890601, "learning_rate": 8.436429038526291e-06, "loss": 0.5365, "step": 5072 }, { "epoch": 0.27375748745345635, "grad_norm": 0.8795267358098525, "learning_rate": 8.435850910887585e-06, "loss": 0.3622, "step": 5073 }, { "epoch": 0.27381145108197075, "grad_norm": 0.9495593184306, "learning_rate": 8.435272698869558e-06, "loss": 0.4213, "step": 5074 }, { "epoch": 0.27386541471048514, "grad_norm": 0.9732769515514071, "learning_rate": 8.434694402488828e-06, "loss": 0.5532, "step": 5075 }, { "epoch": 0.27391937833899954, "grad_norm": 1.238460596029063, "learning_rate": 8.434116021762013e-06, "loss": 0.4655, "step": 5076 }, { "epoch": 0.2739733419675139, "grad_norm": 1.023957344398195, "learning_rate": 8.433537556705741e-06, "loss": 0.467, "step": 5077 }, { "epoch": 0.2740273055960283, "grad_norm": 1.176393334647037, "learning_rate": 8.432959007336637e-06, "loss": 0.5832, "step": 5078 }, { "epoch": 0.2740812692245427, "grad_norm": 1.1472160680536063, "learning_rate": 8.432380373671325e-06, "loss": 0.7125, "step": 5079 }, { "epoch": 0.274135232853057, "grad_norm": 0.9315799581981671, "learning_rate": 8.43180165572644e-06, "loss": 0.4408, "step": 5080 }, { "epoch": 0.2741891964815714, "grad_norm": 1.0141928686721873, "learning_rate": 8.43122285351861e-06, "loss": 0.3821, "step": 5081 }, { "epoch": 0.2742431601100858, "grad_norm": 1.1766900985608466, "learning_rate": 8.430643967064476e-06, "loss": 0.5704, "step": 5082 }, { "epoch": 0.27429712373860016, "grad_norm": 0.9988370693113201, "learning_rate": 8.430064996380672e-06, "loss": 0.6988, "step": 5083 }, { "epoch": 0.27435108736711455, "grad_norm": 0.9876301173428249, "learning_rate": 8.42948594148384e-06, "loss": 0.3864, "step": 5084 }, { "epoch": 0.27440505099562895, "grad_norm": 1.0257953218399134, "learning_rate": 8.428906802390623e-06, "loss": 0.488, "step": 5085 }, { "epoch": 0.27445901462414335, "grad_norm": 1.0362404675287735, "learning_rate": 8.428327579117664e-06, "loss": 0.4953, "step": 5086 }, { "epoch": 0.2745129782526577, "grad_norm": 1.168402919507919, "learning_rate": 8.427748271681612e-06, "loss": 0.5544, "step": 5087 }, { "epoch": 0.2745669418811721, "grad_norm": 0.9326917561398205, "learning_rate": 8.427168880099115e-06, "loss": 0.4308, "step": 5088 }, { "epoch": 0.2746209055096865, "grad_norm": 0.9266602203628659, "learning_rate": 8.426589404386827e-06, "loss": 0.4007, "step": 5089 }, { "epoch": 0.27467486913820083, "grad_norm": 0.9349769053192981, "learning_rate": 8.426009844561403e-06, "loss": 0.6142, "step": 5090 }, { "epoch": 0.2747288327667152, "grad_norm": 1.112542268307128, "learning_rate": 8.4254302006395e-06, "loss": 0.5465, "step": 5091 }, { "epoch": 0.2747827963952296, "grad_norm": 1.1530111627123463, "learning_rate": 8.424850472637777e-06, "loss": 0.6653, "step": 5092 }, { "epoch": 0.274836760023744, "grad_norm": 1.1455839179672693, "learning_rate": 8.424270660572896e-06, "loss": 0.635, "step": 5093 }, { "epoch": 0.27489072365225836, "grad_norm": 1.259626763088531, "learning_rate": 8.423690764461522e-06, "loss": 0.6676, "step": 5094 }, { "epoch": 0.27494468728077276, "grad_norm": 0.9358174837673061, "learning_rate": 8.42311078432032e-06, "loss": 0.4808, "step": 5095 }, { "epoch": 0.27499865090928716, "grad_norm": 0.9240641023458724, "learning_rate": 8.422530720165962e-06, "loss": 0.3619, "step": 5096 }, { "epoch": 0.2750526145378015, "grad_norm": 1.2744223508868635, "learning_rate": 8.421950572015115e-06, "loss": 0.6097, "step": 5097 }, { "epoch": 0.2751065781663159, "grad_norm": 0.8359664883577728, "learning_rate": 8.421370339884459e-06, "loss": 0.4016, "step": 5098 }, { "epoch": 0.2751605417948303, "grad_norm": 1.0883275352335537, "learning_rate": 8.420790023790665e-06, "loss": 0.5547, "step": 5099 }, { "epoch": 0.2752145054233447, "grad_norm": 1.1598786792810918, "learning_rate": 8.420209623750418e-06, "loss": 0.6056, "step": 5100 }, { "epoch": 0.27526846905185903, "grad_norm": 1.028744452882345, "learning_rate": 8.419629139780393e-06, "loss": 0.4726, "step": 5101 }, { "epoch": 0.27532243268037343, "grad_norm": 0.9337552247886279, "learning_rate": 8.419048571897278e-06, "loss": 0.4362, "step": 5102 }, { "epoch": 0.2753763963088878, "grad_norm": 1.0963768947238157, "learning_rate": 8.418467920117756e-06, "loss": 0.5055, "step": 5103 }, { "epoch": 0.27543035993740217, "grad_norm": 0.893469145270453, "learning_rate": 8.41788718445852e-06, "loss": 0.4225, "step": 5104 }, { "epoch": 0.27548432356591657, "grad_norm": 1.176830963694303, "learning_rate": 8.417306364936256e-06, "loss": 0.5374, "step": 5105 }, { "epoch": 0.27553828719443096, "grad_norm": 1.0025504708369914, "learning_rate": 8.416725461567659e-06, "loss": 0.6486, "step": 5106 }, { "epoch": 0.27559225082294536, "grad_norm": 1.0959336492329297, "learning_rate": 8.416144474369426e-06, "loss": 0.4322, "step": 5107 }, { "epoch": 0.2756462144514597, "grad_norm": 1.1516850452897203, "learning_rate": 8.415563403358253e-06, "loss": 0.4025, "step": 5108 }, { "epoch": 0.2757001780799741, "grad_norm": 1.0731944077722275, "learning_rate": 8.414982248550843e-06, "loss": 0.4932, "step": 5109 }, { "epoch": 0.2757541417084885, "grad_norm": 0.7721074221055555, "learning_rate": 8.414401009963897e-06, "loss": 0.2987, "step": 5110 }, { "epoch": 0.27580810533700284, "grad_norm": 1.2119601217712772, "learning_rate": 8.413819687614122e-06, "loss": 0.5232, "step": 5111 }, { "epoch": 0.27586206896551724, "grad_norm": 1.3612455101183176, "learning_rate": 8.413238281518225e-06, "loss": 0.5678, "step": 5112 }, { "epoch": 0.27591603259403163, "grad_norm": 1.4390519145365697, "learning_rate": 8.412656791692917e-06, "loss": 0.7584, "step": 5113 }, { "epoch": 0.275969996222546, "grad_norm": 1.0156598127384096, "learning_rate": 8.412075218154908e-06, "loss": 0.4622, "step": 5114 }, { "epoch": 0.2760239598510604, "grad_norm": 1.0791792903533781, "learning_rate": 8.411493560920915e-06, "loss": 0.5272, "step": 5115 }, { "epoch": 0.27607792347957477, "grad_norm": 1.045566604854827, "learning_rate": 8.410911820007656e-06, "loss": 0.5111, "step": 5116 }, { "epoch": 0.27613188710808917, "grad_norm": 1.007578660937908, "learning_rate": 8.410329995431848e-06, "loss": 0.3793, "step": 5117 }, { "epoch": 0.2761858507366035, "grad_norm": 1.1568028031999509, "learning_rate": 8.409748087210217e-06, "loss": 0.4382, "step": 5118 }, { "epoch": 0.2762398143651179, "grad_norm": 0.9806255099862317, "learning_rate": 8.409166095359487e-06, "loss": 0.48, "step": 5119 }, { "epoch": 0.2762937779936323, "grad_norm": 0.9619326671059396, "learning_rate": 8.408584019896383e-06, "loss": 0.4683, "step": 5120 }, { "epoch": 0.27634774162214665, "grad_norm": 1.020673975996857, "learning_rate": 8.408001860837635e-06, "loss": 0.4197, "step": 5121 }, { "epoch": 0.27640170525066105, "grad_norm": 0.8747233951471021, "learning_rate": 8.407419618199975e-06, "loss": 0.3709, "step": 5122 }, { "epoch": 0.27645566887917544, "grad_norm": 0.8181535948024199, "learning_rate": 8.406837292000138e-06, "loss": 0.3481, "step": 5123 }, { "epoch": 0.27650963250768984, "grad_norm": 1.2639556375780803, "learning_rate": 8.40625488225486e-06, "loss": 0.5555, "step": 5124 }, { "epoch": 0.2765635961362042, "grad_norm": 1.0061299410825792, "learning_rate": 8.405672388980879e-06, "loss": 0.5004, "step": 5125 }, { "epoch": 0.2766175597647186, "grad_norm": 1.0941971069456888, "learning_rate": 8.405089812194938e-06, "loss": 0.4235, "step": 5126 }, { "epoch": 0.276671523393233, "grad_norm": 1.2281585729837157, "learning_rate": 8.404507151913782e-06, "loss": 0.6125, "step": 5127 }, { "epoch": 0.2767254870217473, "grad_norm": 0.9635223137437686, "learning_rate": 8.403924408154154e-06, "loss": 0.4063, "step": 5128 }, { "epoch": 0.2767794506502617, "grad_norm": 1.0648375165815833, "learning_rate": 8.403341580932805e-06, "loss": 0.5049, "step": 5129 }, { "epoch": 0.2768334142787761, "grad_norm": 0.998859144410337, "learning_rate": 8.402758670266486e-06, "loss": 0.3863, "step": 5130 }, { "epoch": 0.2768873779072905, "grad_norm": 0.964514918225284, "learning_rate": 8.402175676171947e-06, "loss": 0.6031, "step": 5131 }, { "epoch": 0.27694134153580485, "grad_norm": 0.9582730367230397, "learning_rate": 8.401592598665949e-06, "loss": 0.5754, "step": 5132 }, { "epoch": 0.27699530516431925, "grad_norm": 1.097272178854074, "learning_rate": 8.401009437765248e-06, "loss": 0.5147, "step": 5133 }, { "epoch": 0.27704926879283365, "grad_norm": 0.9315169631393253, "learning_rate": 8.400426193486606e-06, "loss": 0.4174, "step": 5134 }, { "epoch": 0.277103232421348, "grad_norm": 1.0025438596871863, "learning_rate": 8.399842865846781e-06, "loss": 0.5341, "step": 5135 }, { "epoch": 0.2771571960498624, "grad_norm": 1.1752054780841596, "learning_rate": 8.399259454862545e-06, "loss": 0.518, "step": 5136 }, { "epoch": 0.2772111596783768, "grad_norm": 1.0799194183602907, "learning_rate": 8.39867596055066e-06, "loss": 0.5096, "step": 5137 }, { "epoch": 0.2772651233068912, "grad_norm": 1.2059157867343997, "learning_rate": 8.3980923829279e-06, "loss": 0.5018, "step": 5138 }, { "epoch": 0.2773190869354055, "grad_norm": 1.0835478518157242, "learning_rate": 8.397508722011037e-06, "loss": 0.4718, "step": 5139 }, { "epoch": 0.2773730505639199, "grad_norm": 1.0264742609630149, "learning_rate": 8.396924977816845e-06, "loss": 0.5005, "step": 5140 }, { "epoch": 0.2774270141924343, "grad_norm": 0.804563495151556, "learning_rate": 8.396341150362102e-06, "loss": 0.3667, "step": 5141 }, { "epoch": 0.27748097782094866, "grad_norm": 0.9031802363689143, "learning_rate": 8.395757239663589e-06, "loss": 0.3818, "step": 5142 }, { "epoch": 0.27753494144946306, "grad_norm": 1.020885677895989, "learning_rate": 8.395173245738087e-06, "loss": 0.5764, "step": 5143 }, { "epoch": 0.27758890507797745, "grad_norm": 1.2462240574241217, "learning_rate": 8.39458916860238e-06, "loss": 0.5994, "step": 5144 }, { "epoch": 0.27764286870649185, "grad_norm": 1.0495763160945715, "learning_rate": 8.394005008273255e-06, "loss": 0.4174, "step": 5145 }, { "epoch": 0.2776968323350062, "grad_norm": 1.2908751391208328, "learning_rate": 8.393420764767503e-06, "loss": 0.5662, "step": 5146 }, { "epoch": 0.2777507959635206, "grad_norm": 1.0671278977322058, "learning_rate": 8.392836438101914e-06, "loss": 0.5382, "step": 5147 }, { "epoch": 0.277804759592035, "grad_norm": 0.9417373730120165, "learning_rate": 8.39225202829328e-06, "loss": 0.3589, "step": 5148 }, { "epoch": 0.27785872322054933, "grad_norm": 1.0548223592855124, "learning_rate": 8.391667535358405e-06, "loss": 0.4992, "step": 5149 }, { "epoch": 0.27791268684906373, "grad_norm": 1.1374598782576733, "learning_rate": 8.39108295931408e-06, "loss": 0.5503, "step": 5150 }, { "epoch": 0.2779666504775781, "grad_norm": 1.0293922911470434, "learning_rate": 8.390498300177111e-06, "loss": 0.4669, "step": 5151 }, { "epoch": 0.27802061410609247, "grad_norm": 1.0088674529971597, "learning_rate": 8.3899135579643e-06, "loss": 0.486, "step": 5152 }, { "epoch": 0.27807457773460686, "grad_norm": 1.1140643532631689, "learning_rate": 8.389328732692454e-06, "loss": 0.5382, "step": 5153 }, { "epoch": 0.27812854136312126, "grad_norm": 1.098818075820503, "learning_rate": 8.38874382437838e-06, "loss": 0.5075, "step": 5154 }, { "epoch": 0.27818250499163566, "grad_norm": 1.1217444561658938, "learning_rate": 8.388158833038888e-06, "loss": 0.5473, "step": 5155 }, { "epoch": 0.27823646862015, "grad_norm": 1.0302331305246388, "learning_rate": 8.387573758690796e-06, "loss": 0.5617, "step": 5156 }, { "epoch": 0.2782904322486644, "grad_norm": 0.9602298764945383, "learning_rate": 8.386988601350916e-06, "loss": 0.4704, "step": 5157 }, { "epoch": 0.2783443958771788, "grad_norm": 1.1842450604403707, "learning_rate": 8.386403361036064e-06, "loss": 0.6777, "step": 5158 }, { "epoch": 0.27839835950569314, "grad_norm": 1.0848056302635525, "learning_rate": 8.385818037763064e-06, "loss": 0.6154, "step": 5159 }, { "epoch": 0.27845232313420754, "grad_norm": 1.321408288417881, "learning_rate": 8.38523263154874e-06, "loss": 0.5374, "step": 5160 }, { "epoch": 0.27850628676272193, "grad_norm": 0.9098792536009153, "learning_rate": 8.384647142409911e-06, "loss": 0.4813, "step": 5161 }, { "epoch": 0.27856025039123633, "grad_norm": 1.0456677386888666, "learning_rate": 8.384061570363411e-06, "loss": 0.5235, "step": 5162 }, { "epoch": 0.2786142140197507, "grad_norm": 1.1096475108821384, "learning_rate": 8.383475915426067e-06, "loss": 0.5031, "step": 5163 }, { "epoch": 0.27866817764826507, "grad_norm": 1.023470968276582, "learning_rate": 8.382890177614711e-06, "loss": 0.4926, "step": 5164 }, { "epoch": 0.27872214127677947, "grad_norm": 1.1851831795830645, "learning_rate": 8.382304356946178e-06, "loss": 0.6504, "step": 5165 }, { "epoch": 0.2787761049052938, "grad_norm": 1.079702018321339, "learning_rate": 8.381718453437307e-06, "loss": 0.5236, "step": 5166 }, { "epoch": 0.2788300685338082, "grad_norm": 0.9268666736659805, "learning_rate": 8.381132467104936e-06, "loss": 0.5269, "step": 5167 }, { "epoch": 0.2788840321623226, "grad_norm": 0.8786112763719611, "learning_rate": 8.380546397965904e-06, "loss": 0.4765, "step": 5168 }, { "epoch": 0.278937995790837, "grad_norm": 0.9877041493944586, "learning_rate": 8.37996024603706e-06, "loss": 0.4075, "step": 5169 }, { "epoch": 0.27899195941935134, "grad_norm": 0.8463161806803607, "learning_rate": 8.379374011335248e-06, "loss": 0.4173, "step": 5170 }, { "epoch": 0.27904592304786574, "grad_norm": 1.0933164236708315, "learning_rate": 8.378787693877318e-06, "loss": 0.6097, "step": 5171 }, { "epoch": 0.27909988667638014, "grad_norm": 0.9657070377376511, "learning_rate": 8.378201293680121e-06, "loss": 0.4661, "step": 5172 }, { "epoch": 0.2791538503048945, "grad_norm": 0.9071057576735806, "learning_rate": 8.377614810760509e-06, "loss": 0.3571, "step": 5173 }, { "epoch": 0.2792078139334089, "grad_norm": 0.9614238337293085, "learning_rate": 8.377028245135343e-06, "loss": 0.5366, "step": 5174 }, { "epoch": 0.2792617775619233, "grad_norm": 1.0032450035532046, "learning_rate": 8.376441596821475e-06, "loss": 0.5753, "step": 5175 }, { "epoch": 0.27931574119043767, "grad_norm": 0.9096085950586701, "learning_rate": 8.375854865835769e-06, "loss": 0.4688, "step": 5176 }, { "epoch": 0.279369704818952, "grad_norm": 0.9313417626631812, "learning_rate": 8.375268052195092e-06, "loss": 0.4133, "step": 5177 }, { "epoch": 0.2794236684474664, "grad_norm": 0.8858556868687976, "learning_rate": 8.374681155916302e-06, "loss": 0.4179, "step": 5178 }, { "epoch": 0.2794776320759808, "grad_norm": 1.241038069763822, "learning_rate": 8.374094177016271e-06, "loss": 0.71, "step": 5179 }, { "epoch": 0.27953159570449515, "grad_norm": 0.726303800177769, "learning_rate": 8.373507115511871e-06, "loss": 0.2853, "step": 5180 }, { "epoch": 0.27958555933300955, "grad_norm": 1.2834513512672878, "learning_rate": 8.372919971419974e-06, "loss": 0.6339, "step": 5181 }, { "epoch": 0.27963952296152395, "grad_norm": 1.2907785633539508, "learning_rate": 8.372332744757451e-06, "loss": 0.7897, "step": 5182 }, { "epoch": 0.2796934865900383, "grad_norm": 1.0138731266807657, "learning_rate": 8.371745435541187e-06, "loss": 0.4844, "step": 5183 }, { "epoch": 0.2797474502185527, "grad_norm": 0.9742985069461777, "learning_rate": 8.371158043788053e-06, "loss": 0.365, "step": 5184 }, { "epoch": 0.2798014138470671, "grad_norm": 1.0211065872616585, "learning_rate": 8.370570569514939e-06, "loss": 0.5078, "step": 5185 }, { "epoch": 0.2798553774755815, "grad_norm": 0.8583967322819118, "learning_rate": 8.369983012738725e-06, "loss": 0.3169, "step": 5186 }, { "epoch": 0.2799093411040958, "grad_norm": 0.9737478527592637, "learning_rate": 8.3693953734763e-06, "loss": 0.528, "step": 5187 }, { "epoch": 0.2799633047326102, "grad_norm": 1.1991064662352786, "learning_rate": 8.36880765174455e-06, "loss": 0.6277, "step": 5188 }, { "epoch": 0.2800172683611246, "grad_norm": 0.9292688528088241, "learning_rate": 8.368219847560372e-06, "loss": 0.4335, "step": 5189 }, { "epoch": 0.28007123198963896, "grad_norm": 1.0233726886781838, "learning_rate": 8.367631960940656e-06, "loss": 0.495, "step": 5190 }, { "epoch": 0.28012519561815336, "grad_norm": 1.1341116880415643, "learning_rate": 8.367043991902302e-06, "loss": 0.3519, "step": 5191 }, { "epoch": 0.28017915924666775, "grad_norm": 1.1029131077407095, "learning_rate": 8.366455940462205e-06, "loss": 0.4744, "step": 5192 }, { "epoch": 0.28023312287518215, "grad_norm": 1.3243149742718867, "learning_rate": 8.365867806637267e-06, "loss": 0.6258, "step": 5193 }, { "epoch": 0.2802870865036965, "grad_norm": 0.861426740239744, "learning_rate": 8.365279590444396e-06, "loss": 0.3822, "step": 5194 }, { "epoch": 0.2803410501322109, "grad_norm": 1.0789063539814836, "learning_rate": 8.364691291900491e-06, "loss": 0.5185, "step": 5195 }, { "epoch": 0.2803950137607253, "grad_norm": 1.1925559505285184, "learning_rate": 8.364102911022465e-06, "loss": 0.5673, "step": 5196 }, { "epoch": 0.28044897738923963, "grad_norm": 0.9768618989750608, "learning_rate": 8.36351444782723e-06, "loss": 0.5069, "step": 5197 }, { "epoch": 0.280502941017754, "grad_norm": 1.0007996356663025, "learning_rate": 8.362925902331692e-06, "loss": 0.5036, "step": 5198 }, { "epoch": 0.2805569046462684, "grad_norm": 0.9449320212842772, "learning_rate": 8.362337274552773e-06, "loss": 0.3695, "step": 5199 }, { "epoch": 0.2806108682747828, "grad_norm": 0.9884088893361652, "learning_rate": 8.361748564507387e-06, "loss": 0.5037, "step": 5200 }, { "epoch": 0.28066483190329716, "grad_norm": 1.2831977828391317, "learning_rate": 8.361159772212457e-06, "loss": 0.5981, "step": 5201 }, { "epoch": 0.28071879553181156, "grad_norm": 1.1509976665718125, "learning_rate": 8.360570897684905e-06, "loss": 0.6116, "step": 5202 }, { "epoch": 0.28077275916032596, "grad_norm": 0.7157367843923668, "learning_rate": 8.359981940941653e-06, "loss": 0.2681, "step": 5203 }, { "epoch": 0.2808267227888403, "grad_norm": 1.129770776732526, "learning_rate": 8.35939290199963e-06, "loss": 0.62, "step": 5204 }, { "epoch": 0.2808806864173547, "grad_norm": 1.0754692553439553, "learning_rate": 8.358803780875768e-06, "loss": 0.4591, "step": 5205 }, { "epoch": 0.2809346500458691, "grad_norm": 0.9795256363186552, "learning_rate": 8.358214577586995e-06, "loss": 0.4919, "step": 5206 }, { "epoch": 0.2809886136743835, "grad_norm": 0.9203708557747434, "learning_rate": 8.357625292150248e-06, "loss": 0.433, "step": 5207 }, { "epoch": 0.28104257730289783, "grad_norm": 1.2695843481038314, "learning_rate": 8.357035924582463e-06, "loss": 0.4863, "step": 5208 }, { "epoch": 0.28109654093141223, "grad_norm": 1.3246282664478144, "learning_rate": 8.356446474900578e-06, "loss": 0.4565, "step": 5209 }, { "epoch": 0.28115050455992663, "grad_norm": 1.143089835735994, "learning_rate": 8.355856943121536e-06, "loss": 0.4816, "step": 5210 }, { "epoch": 0.28120446818844097, "grad_norm": 1.0187325172334678, "learning_rate": 8.355267329262278e-06, "loss": 0.4471, "step": 5211 }, { "epoch": 0.28125843181695537, "grad_norm": 1.305157118218297, "learning_rate": 8.354677633339754e-06, "loss": 0.6088, "step": 5212 }, { "epoch": 0.28131239544546977, "grad_norm": 0.9146035159197472, "learning_rate": 8.354087855370909e-06, "loss": 0.3738, "step": 5213 }, { "epoch": 0.2813663590739841, "grad_norm": 0.8960768025370934, "learning_rate": 8.353497995372696e-06, "loss": 0.359, "step": 5214 }, { "epoch": 0.2814203227024985, "grad_norm": 1.0725420479656078, "learning_rate": 8.352908053362067e-06, "loss": 0.4419, "step": 5215 }, { "epoch": 0.2814742863310129, "grad_norm": 0.8507771988656984, "learning_rate": 8.352318029355977e-06, "loss": 0.4253, "step": 5216 }, { "epoch": 0.2815282499595273, "grad_norm": 0.9314833281648784, "learning_rate": 8.351727923371384e-06, "loss": 0.57, "step": 5217 }, { "epoch": 0.28158221358804164, "grad_norm": 1.1495705960788871, "learning_rate": 8.351137735425252e-06, "loss": 0.4345, "step": 5218 }, { "epoch": 0.28163617721655604, "grad_norm": 0.9791286271581207, "learning_rate": 8.350547465534538e-06, "loss": 0.5251, "step": 5219 }, { "epoch": 0.28169014084507044, "grad_norm": 1.0515167392180507, "learning_rate": 8.349957113716213e-06, "loss": 0.4363, "step": 5220 }, { "epoch": 0.2817441044735848, "grad_norm": 0.8349613592534751, "learning_rate": 8.349366679987237e-06, "loss": 0.3719, "step": 5221 }, { "epoch": 0.2817980681020992, "grad_norm": 0.8292161500472444, "learning_rate": 8.348776164364587e-06, "loss": 0.3255, "step": 5222 }, { "epoch": 0.2818520317306136, "grad_norm": 1.2565793912489867, "learning_rate": 8.34818556686523e-06, "loss": 0.6643, "step": 5223 }, { "epoch": 0.28190599535912797, "grad_norm": 0.9101156071186645, "learning_rate": 8.347594887506141e-06, "loss": 0.4416, "step": 5224 }, { "epoch": 0.2819599589876423, "grad_norm": 1.1524709155773445, "learning_rate": 8.347004126304297e-06, "loss": 0.5683, "step": 5225 }, { "epoch": 0.2820139226161567, "grad_norm": 0.8341321018357162, "learning_rate": 8.34641328327668e-06, "loss": 0.3497, "step": 5226 }, { "epoch": 0.2820678862446711, "grad_norm": 1.2668434836325178, "learning_rate": 8.345822358440268e-06, "loss": 0.6403, "step": 5227 }, { "epoch": 0.28212184987318545, "grad_norm": 1.050822214182496, "learning_rate": 8.345231351812045e-06, "loss": 0.4323, "step": 5228 }, { "epoch": 0.28217581350169985, "grad_norm": 1.1273374653670862, "learning_rate": 8.344640263408999e-06, "loss": 0.5613, "step": 5229 }, { "epoch": 0.28222977713021424, "grad_norm": 1.11068001568026, "learning_rate": 8.344049093248118e-06, "loss": 0.5278, "step": 5230 }, { "epoch": 0.28228374075872864, "grad_norm": 1.1057827306191248, "learning_rate": 8.343457841346391e-06, "loss": 0.6577, "step": 5231 }, { "epoch": 0.282337704387243, "grad_norm": 1.1927247126790186, "learning_rate": 8.342866507720817e-06, "loss": 0.503, "step": 5232 }, { "epoch": 0.2823916680157574, "grad_norm": 1.0893226374327662, "learning_rate": 8.342275092388382e-06, "loss": 0.5403, "step": 5233 }, { "epoch": 0.2824456316442718, "grad_norm": 1.2062931197595659, "learning_rate": 8.341683595366091e-06, "loss": 0.5266, "step": 5234 }, { "epoch": 0.2824995952727861, "grad_norm": 1.088510171170987, "learning_rate": 8.341092016670942e-06, "loss": 0.4691, "step": 5235 }, { "epoch": 0.2825535589013005, "grad_norm": 1.1393829029002434, "learning_rate": 8.340500356319938e-06, "loss": 0.4648, "step": 5236 }, { "epoch": 0.2826075225298149, "grad_norm": 1.2383087402811706, "learning_rate": 8.33990861433008e-06, "loss": 0.5759, "step": 5237 }, { "epoch": 0.2826614861583293, "grad_norm": 1.158388393443371, "learning_rate": 8.339316790718384e-06, "loss": 0.6915, "step": 5238 }, { "epoch": 0.28271544978684365, "grad_norm": 1.0733591859590972, "learning_rate": 8.338724885501852e-06, "loss": 0.5101, "step": 5239 }, { "epoch": 0.28276941341535805, "grad_norm": 1.0831872317622955, "learning_rate": 8.338132898697498e-06, "loss": 0.6607, "step": 5240 }, { "epoch": 0.28282337704387245, "grad_norm": 1.1098006949879207, "learning_rate": 8.337540830322338e-06, "loss": 0.4747, "step": 5241 }, { "epoch": 0.2828773406723868, "grad_norm": 0.8594101318684795, "learning_rate": 8.336948680393389e-06, "loss": 0.4554, "step": 5242 }, { "epoch": 0.2829313043009012, "grad_norm": 0.8475147965094816, "learning_rate": 8.336356448927665e-06, "loss": 0.4089, "step": 5243 }, { "epoch": 0.2829852679294156, "grad_norm": 1.1360751967876888, "learning_rate": 8.335764135942192e-06, "loss": 0.5592, "step": 5244 }, { "epoch": 0.28303923155793, "grad_norm": 1.0307022365279201, "learning_rate": 8.335171741453994e-06, "loss": 0.5353, "step": 5245 }, { "epoch": 0.2830931951864443, "grad_norm": 1.1730224545012737, "learning_rate": 8.334579265480094e-06, "loss": 0.5233, "step": 5246 }, { "epoch": 0.2831471588149587, "grad_norm": 0.7916927050990835, "learning_rate": 8.33398670803752e-06, "loss": 0.3586, "step": 5247 }, { "epoch": 0.2832011224434731, "grad_norm": 0.9436581499927891, "learning_rate": 8.333394069143309e-06, "loss": 0.3952, "step": 5248 }, { "epoch": 0.28325508607198746, "grad_norm": 1.070323017967436, "learning_rate": 8.332801348814485e-06, "loss": 0.4616, "step": 5249 }, { "epoch": 0.28330904970050186, "grad_norm": 0.9573372477518666, "learning_rate": 8.33220854706809e-06, "loss": 0.5162, "step": 5250 }, { "epoch": 0.28336301332901626, "grad_norm": 1.1585513949342028, "learning_rate": 8.331615663921159e-06, "loss": 0.6672, "step": 5251 }, { "epoch": 0.2834169769575306, "grad_norm": 0.8834631878106044, "learning_rate": 8.331022699390734e-06, "loss": 0.4705, "step": 5252 }, { "epoch": 0.283470940586045, "grad_norm": 1.2864485291397298, "learning_rate": 8.330429653493856e-06, "loss": 0.6313, "step": 5253 }, { "epoch": 0.2835249042145594, "grad_norm": 0.8733286481763205, "learning_rate": 8.329836526247568e-06, "loss": 0.4253, "step": 5254 }, { "epoch": 0.2835788678430738, "grad_norm": 1.0304861908664968, "learning_rate": 8.32924331766892e-06, "loss": 0.5078, "step": 5255 }, { "epoch": 0.28363283147158813, "grad_norm": 0.9393371350873889, "learning_rate": 8.328650027774961e-06, "loss": 0.4085, "step": 5256 }, { "epoch": 0.28368679510010253, "grad_norm": 1.2698276204761005, "learning_rate": 8.328056656582741e-06, "loss": 0.5969, "step": 5257 }, { "epoch": 0.2837407587286169, "grad_norm": 0.9069514263330534, "learning_rate": 8.327463204109316e-06, "loss": 0.4597, "step": 5258 }, { "epoch": 0.28379472235713127, "grad_norm": 0.9513637608508044, "learning_rate": 8.32686967037174e-06, "loss": 0.5246, "step": 5259 }, { "epoch": 0.28384868598564567, "grad_norm": 0.9858167083018621, "learning_rate": 8.326276055387074e-06, "loss": 0.5333, "step": 5260 }, { "epoch": 0.28390264961416006, "grad_norm": 1.252218898042466, "learning_rate": 8.32568235917238e-06, "loss": 0.5273, "step": 5261 }, { "epoch": 0.28395661324267446, "grad_norm": 1.1096524984315819, "learning_rate": 8.325088581744718e-06, "loss": 0.4854, "step": 5262 }, { "epoch": 0.2840105768711888, "grad_norm": 1.1571064917502285, "learning_rate": 8.324494723121156e-06, "loss": 0.4816, "step": 5263 }, { "epoch": 0.2840645404997032, "grad_norm": 1.0214729625428483, "learning_rate": 8.323900783318763e-06, "loss": 0.5225, "step": 5264 }, { "epoch": 0.2841185041282176, "grad_norm": 0.9602134731411627, "learning_rate": 8.323306762354606e-06, "loss": 0.4346, "step": 5265 }, { "epoch": 0.28417246775673194, "grad_norm": 1.003491878466424, "learning_rate": 8.32271266024576e-06, "loss": 0.4838, "step": 5266 }, { "epoch": 0.28422643138524634, "grad_norm": 0.9961159876873469, "learning_rate": 8.322118477009302e-06, "loss": 0.5509, "step": 5267 }, { "epoch": 0.28428039501376073, "grad_norm": 1.10443576954379, "learning_rate": 8.321524212662306e-06, "loss": 0.5677, "step": 5268 }, { "epoch": 0.28433435864227513, "grad_norm": 1.059302300848515, "learning_rate": 8.320929867221853e-06, "loss": 0.4434, "step": 5269 }, { "epoch": 0.2843883222707895, "grad_norm": 1.116302503945931, "learning_rate": 8.320335440705027e-06, "loss": 0.6942, "step": 5270 }, { "epoch": 0.28444228589930387, "grad_norm": 1.1731409931129784, "learning_rate": 8.319740933128909e-06, "loss": 0.655, "step": 5271 }, { "epoch": 0.28449624952781827, "grad_norm": 0.9802973559467687, "learning_rate": 8.319146344510587e-06, "loss": 0.4073, "step": 5272 }, { "epoch": 0.2845502131563326, "grad_norm": 0.9818750488104417, "learning_rate": 8.318551674867154e-06, "loss": 0.4637, "step": 5273 }, { "epoch": 0.284604176784847, "grad_norm": 0.9684884559716106, "learning_rate": 8.317956924215694e-06, "loss": 0.5124, "step": 5274 }, { "epoch": 0.2846581404133614, "grad_norm": 1.153653840275477, "learning_rate": 8.317362092573305e-06, "loss": 0.4813, "step": 5275 }, { "epoch": 0.2847121040418758, "grad_norm": 1.227678657381375, "learning_rate": 8.316767179957086e-06, "loss": 0.5169, "step": 5276 }, { "epoch": 0.28476606767039014, "grad_norm": 1.1822483721550656, "learning_rate": 8.316172186384128e-06, "loss": 0.5857, "step": 5277 }, { "epoch": 0.28482003129890454, "grad_norm": 1.107453794051031, "learning_rate": 8.315577111871539e-06, "loss": 0.4236, "step": 5278 }, { "epoch": 0.28487399492741894, "grad_norm": 1.055904068297378, "learning_rate": 8.314981956436418e-06, "loss": 0.4489, "step": 5279 }, { "epoch": 0.2849279585559333, "grad_norm": 1.0862839078490334, "learning_rate": 8.314386720095871e-06, "loss": 0.5057, "step": 5280 }, { "epoch": 0.2849819221844477, "grad_norm": 1.1746623265796625, "learning_rate": 8.313791402867005e-06, "loss": 0.4907, "step": 5281 }, { "epoch": 0.2850358858129621, "grad_norm": 0.8413264516204407, "learning_rate": 8.313196004766931e-06, "loss": 0.3666, "step": 5282 }, { "epoch": 0.2850898494414764, "grad_norm": 1.2320785224116908, "learning_rate": 8.312600525812764e-06, "loss": 0.6733, "step": 5283 }, { "epoch": 0.2851438130699908, "grad_norm": 1.0121220179431036, "learning_rate": 8.312004966021614e-06, "loss": 0.5218, "step": 5284 }, { "epoch": 0.2851977766985052, "grad_norm": 0.8601793472793967, "learning_rate": 8.311409325410598e-06, "loss": 0.4177, "step": 5285 }, { "epoch": 0.2852517403270196, "grad_norm": 1.17044729092144, "learning_rate": 8.31081360399684e-06, "loss": 0.6768, "step": 5286 }, { "epoch": 0.28530570395553395, "grad_norm": 0.9818135941724351, "learning_rate": 8.31021780179746e-06, "loss": 0.4669, "step": 5287 }, { "epoch": 0.28535966758404835, "grad_norm": 1.109802723527431, "learning_rate": 8.309621918829577e-06, "loss": 0.5982, "step": 5288 }, { "epoch": 0.28541363121256275, "grad_norm": 1.1965973490015482, "learning_rate": 8.309025955110322e-06, "loss": 0.5572, "step": 5289 }, { "epoch": 0.2854675948410771, "grad_norm": 0.8545864074578304, "learning_rate": 8.308429910656825e-06, "loss": 0.3617, "step": 5290 }, { "epoch": 0.2855215584695915, "grad_norm": 1.0131251750944372, "learning_rate": 8.307833785486214e-06, "loss": 0.4626, "step": 5291 }, { "epoch": 0.2855755220981059, "grad_norm": 0.8344219881157394, "learning_rate": 8.307237579615622e-06, "loss": 0.4027, "step": 5292 }, { "epoch": 0.2856294857266203, "grad_norm": 1.0832303553142841, "learning_rate": 8.306641293062185e-06, "loss": 0.5916, "step": 5293 }, { "epoch": 0.2856834493551346, "grad_norm": 1.102988197371885, "learning_rate": 8.306044925843044e-06, "loss": 0.7076, "step": 5294 }, { "epoch": 0.285737412983649, "grad_norm": 1.2008423341904468, "learning_rate": 8.305448477975334e-06, "loss": 0.5923, "step": 5295 }, { "epoch": 0.2857913766121634, "grad_norm": 0.8749593249745591, "learning_rate": 8.304851949476202e-06, "loss": 0.4231, "step": 5296 }, { "epoch": 0.28584534024067776, "grad_norm": 1.0504811931287372, "learning_rate": 8.30425534036279e-06, "loss": 0.4763, "step": 5297 }, { "epoch": 0.28589930386919216, "grad_norm": 1.084806656253144, "learning_rate": 8.303658650652245e-06, "loss": 0.5237, "step": 5298 }, { "epoch": 0.28595326749770655, "grad_norm": 1.0845437186622442, "learning_rate": 8.303061880361717e-06, "loss": 0.5559, "step": 5299 }, { "epoch": 0.28600723112622095, "grad_norm": 1.0155568524574485, "learning_rate": 8.30246502950836e-06, "loss": 0.5147, "step": 5300 }, { "epoch": 0.2860611947547353, "grad_norm": 1.012945340281663, "learning_rate": 8.301868098109326e-06, "loss": 0.5269, "step": 5301 }, { "epoch": 0.2861151583832497, "grad_norm": 1.0600868776244337, "learning_rate": 8.30127108618177e-06, "loss": 0.6879, "step": 5302 }, { "epoch": 0.2861691220117641, "grad_norm": 0.8685829709623949, "learning_rate": 8.300673993742856e-06, "loss": 0.3714, "step": 5303 }, { "epoch": 0.28622308564027843, "grad_norm": 0.9758221399855582, "learning_rate": 8.300076820809739e-06, "loss": 0.4791, "step": 5304 }, { "epoch": 0.2862770492687928, "grad_norm": 0.9391237851130257, "learning_rate": 8.299479567399585e-06, "loss": 0.4553, "step": 5305 }, { "epoch": 0.2863310128973072, "grad_norm": 1.058592126971488, "learning_rate": 8.29888223352956e-06, "loss": 0.5271, "step": 5306 }, { "epoch": 0.2863849765258216, "grad_norm": 1.0226100985913054, "learning_rate": 8.29828481921683e-06, "loss": 0.5826, "step": 5307 }, { "epoch": 0.28643894015433596, "grad_norm": 0.9555447593355231, "learning_rate": 8.297687324478572e-06, "loss": 0.4977, "step": 5308 }, { "epoch": 0.28649290378285036, "grad_norm": 1.0506232493996632, "learning_rate": 8.297089749331948e-06, "loss": 0.5216, "step": 5309 }, { "epoch": 0.28654686741136476, "grad_norm": 1.0382295158113921, "learning_rate": 8.29649209379414e-06, "loss": 0.3467, "step": 5310 }, { "epoch": 0.2866008310398791, "grad_norm": 1.0318329523797682, "learning_rate": 8.295894357882325e-06, "loss": 0.5723, "step": 5311 }, { "epoch": 0.2866547946683935, "grad_norm": 0.8639993145164058, "learning_rate": 8.29529654161368e-06, "loss": 0.4378, "step": 5312 }, { "epoch": 0.2867087582969079, "grad_norm": 0.9627058230658225, "learning_rate": 8.294698645005388e-06, "loss": 0.4302, "step": 5313 }, { "epoch": 0.2867627219254223, "grad_norm": 1.0104795783410847, "learning_rate": 8.294100668074632e-06, "loss": 0.4711, "step": 5314 }, { "epoch": 0.28681668555393663, "grad_norm": 0.9163214770613735, "learning_rate": 8.293502610838603e-06, "loss": 0.3796, "step": 5315 }, { "epoch": 0.28687064918245103, "grad_norm": 0.7381929135575839, "learning_rate": 8.292904473314482e-06, "loss": 0.3027, "step": 5316 }, { "epoch": 0.28692461281096543, "grad_norm": 1.009542838191959, "learning_rate": 8.292306255519467e-06, "loss": 0.4176, "step": 5317 }, { "epoch": 0.28697857643947977, "grad_norm": 1.1070272269681232, "learning_rate": 8.291707957470748e-06, "loss": 0.6255, "step": 5318 }, { "epoch": 0.28703254006799417, "grad_norm": 1.0066582367907981, "learning_rate": 8.29110957918552e-06, "loss": 0.5236, "step": 5319 }, { "epoch": 0.28708650369650857, "grad_norm": 1.2580977754001146, "learning_rate": 8.290511120680983e-06, "loss": 0.4868, "step": 5320 }, { "epoch": 0.2871404673250229, "grad_norm": 0.9549151537489389, "learning_rate": 8.28991258197434e-06, "loss": 0.4468, "step": 5321 }, { "epoch": 0.2871944309535373, "grad_norm": 1.0200049538453886, "learning_rate": 8.28931396308279e-06, "loss": 0.4361, "step": 5322 }, { "epoch": 0.2872483945820517, "grad_norm": 1.16405211470237, "learning_rate": 8.288715264023534e-06, "loss": 0.4582, "step": 5323 }, { "epoch": 0.2873023582105661, "grad_norm": 1.005309631343437, "learning_rate": 8.288116484813786e-06, "loss": 0.4996, "step": 5324 }, { "epoch": 0.28735632183908044, "grad_norm": 0.8889590951230237, "learning_rate": 8.287517625470754e-06, "loss": 0.4026, "step": 5325 }, { "epoch": 0.28741028546759484, "grad_norm": 0.8720753981019168, "learning_rate": 8.28691868601165e-06, "loss": 0.3754, "step": 5326 }, { "epoch": 0.28746424909610924, "grad_norm": 1.2623302572009456, "learning_rate": 8.286319666453682e-06, "loss": 0.7516, "step": 5327 }, { "epoch": 0.2875182127246236, "grad_norm": 1.0459545899110336, "learning_rate": 8.285720566814076e-06, "loss": 0.4001, "step": 5328 }, { "epoch": 0.287572176353138, "grad_norm": 1.0079595754939419, "learning_rate": 8.285121387110044e-06, "loss": 0.4526, "step": 5329 }, { "epoch": 0.2876261399816524, "grad_norm": 1.1201170004147303, "learning_rate": 8.284522127358809e-06, "loss": 0.5641, "step": 5330 }, { "epoch": 0.28768010361016677, "grad_norm": 0.9532457026941401, "learning_rate": 8.283922787577596e-06, "loss": 0.4008, "step": 5331 }, { "epoch": 0.2877340672386811, "grad_norm": 1.1028198846853186, "learning_rate": 8.283323367783627e-06, "loss": 0.5664, "step": 5332 }, { "epoch": 0.2877880308671955, "grad_norm": 1.144328468829172, "learning_rate": 8.282723867994135e-06, "loss": 0.5574, "step": 5333 }, { "epoch": 0.2878419944957099, "grad_norm": 1.0425593906311137, "learning_rate": 8.282124288226346e-06, "loss": 0.3982, "step": 5334 }, { "epoch": 0.28789595812422425, "grad_norm": 0.9455468236766429, "learning_rate": 8.281524628497492e-06, "loss": 0.5002, "step": 5335 }, { "epoch": 0.28794992175273865, "grad_norm": 1.2031298857041257, "learning_rate": 8.280924888824812e-06, "loss": 0.6162, "step": 5336 }, { "epoch": 0.28800388538125304, "grad_norm": 1.1849757761009654, "learning_rate": 8.28032506922554e-06, "loss": 0.5512, "step": 5337 }, { "epoch": 0.28805784900976744, "grad_norm": 1.0288548564280657, "learning_rate": 8.279725169716917e-06, "loss": 0.3496, "step": 5338 }, { "epoch": 0.2881118126382818, "grad_norm": 0.9502486230723481, "learning_rate": 8.279125190316182e-06, "loss": 0.4609, "step": 5339 }, { "epoch": 0.2881657762667962, "grad_norm": 1.212778276609102, "learning_rate": 8.278525131040582e-06, "loss": 0.6149, "step": 5340 }, { "epoch": 0.2882197398953106, "grad_norm": 1.1084392221827315, "learning_rate": 8.277924991907363e-06, "loss": 0.3774, "step": 5341 }, { "epoch": 0.2882737035238249, "grad_norm": 1.0635753482369519, "learning_rate": 8.277324772933772e-06, "loss": 0.4498, "step": 5342 }, { "epoch": 0.2883276671523393, "grad_norm": 0.8876907625015934, "learning_rate": 8.27672447413706e-06, "loss": 0.3878, "step": 5343 }, { "epoch": 0.2883816307808537, "grad_norm": 0.9340236997187584, "learning_rate": 8.27612409553448e-06, "loss": 0.4761, "step": 5344 }, { "epoch": 0.2884355944093681, "grad_norm": 1.2588573342806322, "learning_rate": 8.275523637143292e-06, "loss": 0.6165, "step": 5345 }, { "epoch": 0.28848955803788245, "grad_norm": 0.8950565095279595, "learning_rate": 8.274923098980749e-06, "loss": 0.4173, "step": 5346 }, { "epoch": 0.28854352166639685, "grad_norm": 1.2283973344435488, "learning_rate": 8.274322481064112e-06, "loss": 0.4533, "step": 5347 }, { "epoch": 0.28859748529491125, "grad_norm": 1.1358369546547467, "learning_rate": 8.273721783410643e-06, "loss": 0.5539, "step": 5348 }, { "epoch": 0.2886514489234256, "grad_norm": 1.0353221302404354, "learning_rate": 8.273121006037608e-06, "loss": 0.5357, "step": 5349 }, { "epoch": 0.28870541255194, "grad_norm": 1.0787186213548108, "learning_rate": 8.272520148962272e-06, "loss": 0.5179, "step": 5350 }, { "epoch": 0.2887593761804544, "grad_norm": 0.9430812535413787, "learning_rate": 8.271919212201907e-06, "loss": 0.4222, "step": 5351 }, { "epoch": 0.2888133398089687, "grad_norm": 0.9956531377766678, "learning_rate": 8.271318195773782e-06, "loss": 0.4742, "step": 5352 }, { "epoch": 0.2888673034374831, "grad_norm": 1.2357996457291027, "learning_rate": 8.270717099695173e-06, "loss": 0.4661, "step": 5353 }, { "epoch": 0.2889212670659975, "grad_norm": 0.986131967300041, "learning_rate": 8.270115923983355e-06, "loss": 0.3932, "step": 5354 }, { "epoch": 0.2889752306945119, "grad_norm": 1.2507282945597666, "learning_rate": 8.269514668655606e-06, "loss": 0.534, "step": 5355 }, { "epoch": 0.28902919432302626, "grad_norm": 0.9649999800689361, "learning_rate": 8.268913333729206e-06, "loss": 0.6083, "step": 5356 }, { "epoch": 0.28908315795154066, "grad_norm": 1.0600703490750132, "learning_rate": 8.268311919221439e-06, "loss": 0.4797, "step": 5357 }, { "epoch": 0.28913712158005506, "grad_norm": 1.002010915110171, "learning_rate": 8.267710425149592e-06, "loss": 0.438, "step": 5358 }, { "epoch": 0.2891910852085694, "grad_norm": 1.0761274338600197, "learning_rate": 8.267108851530949e-06, "loss": 0.6289, "step": 5359 }, { "epoch": 0.2892450488370838, "grad_norm": 1.0051417189596616, "learning_rate": 8.266507198382801e-06, "loss": 0.4645, "step": 5360 }, { "epoch": 0.2892990124655982, "grad_norm": 1.255253736523476, "learning_rate": 8.265905465722442e-06, "loss": 0.4766, "step": 5361 }, { "epoch": 0.2893529760941126, "grad_norm": 1.2243599995429308, "learning_rate": 8.265303653567166e-06, "loss": 0.6498, "step": 5362 }, { "epoch": 0.28940693972262693, "grad_norm": 0.9891044353067284, "learning_rate": 8.264701761934268e-06, "loss": 0.4538, "step": 5363 }, { "epoch": 0.28946090335114133, "grad_norm": 0.9263801064862729, "learning_rate": 8.264099790841047e-06, "loss": 0.4086, "step": 5364 }, { "epoch": 0.2895148669796557, "grad_norm": 1.3073434674959985, "learning_rate": 8.263497740304807e-06, "loss": 0.6906, "step": 5365 }, { "epoch": 0.28956883060817007, "grad_norm": 1.0404513363794408, "learning_rate": 8.26289561034285e-06, "loss": 0.5747, "step": 5366 }, { "epoch": 0.28962279423668447, "grad_norm": 1.427748007810944, "learning_rate": 8.262293400972478e-06, "loss": 0.5797, "step": 5367 }, { "epoch": 0.28967675786519886, "grad_norm": 0.9544937010317941, "learning_rate": 8.261691112211006e-06, "loss": 0.401, "step": 5368 }, { "epoch": 0.28973072149371326, "grad_norm": 1.0175496589820658, "learning_rate": 8.261088744075739e-06, "loss": 0.4157, "step": 5369 }, { "epoch": 0.2897846851222276, "grad_norm": 1.1014040030809185, "learning_rate": 8.260486296583992e-06, "loss": 0.4536, "step": 5370 }, { "epoch": 0.289838648750742, "grad_norm": 1.116412236204789, "learning_rate": 8.25988376975308e-06, "loss": 0.5688, "step": 5371 }, { "epoch": 0.2898926123792564, "grad_norm": 1.1399711376563233, "learning_rate": 8.25928116360032e-06, "loss": 0.5436, "step": 5372 }, { "epoch": 0.28994657600777074, "grad_norm": 0.847831458932473, "learning_rate": 8.258678478143032e-06, "loss": 0.4155, "step": 5373 }, { "epoch": 0.29000053963628514, "grad_norm": 1.2379818308535322, "learning_rate": 8.258075713398536e-06, "loss": 0.4429, "step": 5374 }, { "epoch": 0.29005450326479953, "grad_norm": 1.2381041974942528, "learning_rate": 8.257472869384156e-06, "loss": 0.5696, "step": 5375 }, { "epoch": 0.29010846689331393, "grad_norm": 1.1872922720443846, "learning_rate": 8.25686994611722e-06, "loss": 0.6659, "step": 5376 }, { "epoch": 0.2901624305218283, "grad_norm": 1.040378457305383, "learning_rate": 8.256266943615059e-06, "loss": 0.5299, "step": 5377 }, { "epoch": 0.29021639415034267, "grad_norm": 0.9826808545351431, "learning_rate": 8.255663861894998e-06, "loss": 0.4029, "step": 5378 }, { "epoch": 0.29027035777885707, "grad_norm": 0.9791381762969836, "learning_rate": 8.255060700974374e-06, "loss": 0.4387, "step": 5379 }, { "epoch": 0.2903243214073714, "grad_norm": 1.0324158777726515, "learning_rate": 8.254457460870523e-06, "loss": 0.5707, "step": 5380 }, { "epoch": 0.2903782850358858, "grad_norm": 1.0881804681399938, "learning_rate": 8.25385414160078e-06, "loss": 0.5091, "step": 5381 }, { "epoch": 0.2904322486644002, "grad_norm": 0.9469636009164442, "learning_rate": 8.253250743182486e-06, "loss": 0.4769, "step": 5382 }, { "epoch": 0.2904862122929146, "grad_norm": 0.7687497521535107, "learning_rate": 8.252647265632983e-06, "loss": 0.2819, "step": 5383 }, { "epoch": 0.29054017592142894, "grad_norm": 0.9433582375627291, "learning_rate": 8.252043708969619e-06, "loss": 0.4617, "step": 5384 }, { "epoch": 0.29059413954994334, "grad_norm": 0.827458323168548, "learning_rate": 8.251440073209735e-06, "loss": 0.3072, "step": 5385 }, { "epoch": 0.29064810317845774, "grad_norm": 0.9467999625777371, "learning_rate": 8.250836358370686e-06, "loss": 0.4146, "step": 5386 }, { "epoch": 0.2907020668069721, "grad_norm": 0.9899421326637341, "learning_rate": 8.250232564469819e-06, "loss": 0.536, "step": 5387 }, { "epoch": 0.2907560304354865, "grad_norm": 1.1909411469299798, "learning_rate": 8.24962869152449e-06, "loss": 0.5406, "step": 5388 }, { "epoch": 0.2908099940640009, "grad_norm": 1.146237262181166, "learning_rate": 8.249024739552053e-06, "loss": 0.5784, "step": 5389 }, { "epoch": 0.2908639576925152, "grad_norm": 1.1087033144287817, "learning_rate": 8.248420708569867e-06, "loss": 0.5568, "step": 5390 }, { "epoch": 0.2909179213210296, "grad_norm": 0.967375010073956, "learning_rate": 8.247816598595295e-06, "loss": 0.4035, "step": 5391 }, { "epoch": 0.290971884949544, "grad_norm": 1.041482864774634, "learning_rate": 8.247212409645695e-06, "loss": 0.4772, "step": 5392 }, { "epoch": 0.2910258485780584, "grad_norm": 1.073923591731376, "learning_rate": 8.246608141738436e-06, "loss": 0.5403, "step": 5393 }, { "epoch": 0.29107981220657275, "grad_norm": 1.0275768764893314, "learning_rate": 8.246003794890885e-06, "loss": 0.543, "step": 5394 }, { "epoch": 0.29113377583508715, "grad_norm": 1.2735076082564012, "learning_rate": 8.245399369120407e-06, "loss": 0.4239, "step": 5395 }, { "epoch": 0.29118773946360155, "grad_norm": 0.7554750894448626, "learning_rate": 8.244794864444378e-06, "loss": 0.3589, "step": 5396 }, { "epoch": 0.2912417030921159, "grad_norm": 1.045449726593084, "learning_rate": 8.244190280880175e-06, "loss": 0.4992, "step": 5397 }, { "epoch": 0.2912956667206303, "grad_norm": 1.1965004638833823, "learning_rate": 8.243585618445166e-06, "loss": 0.4952, "step": 5398 }, { "epoch": 0.2913496303491447, "grad_norm": 0.9199266289175159, "learning_rate": 8.242980877156735e-06, "loss": 0.4529, "step": 5399 }, { "epoch": 0.2914035939776591, "grad_norm": 0.9442601209321192, "learning_rate": 8.242376057032263e-06, "loss": 0.3833, "step": 5400 }, { "epoch": 0.2914575576061734, "grad_norm": 0.9376888997010562, "learning_rate": 8.241771158089132e-06, "loss": 0.4161, "step": 5401 }, { "epoch": 0.2915115212346878, "grad_norm": 1.0500417413452352, "learning_rate": 8.241166180344728e-06, "loss": 0.4168, "step": 5402 }, { "epoch": 0.2915654848632022, "grad_norm": 1.1573073174924724, "learning_rate": 8.240561123816437e-06, "loss": 0.62, "step": 5403 }, { "epoch": 0.29161944849171656, "grad_norm": 1.0446495661094266, "learning_rate": 8.23995598852165e-06, "loss": 0.467, "step": 5404 }, { "epoch": 0.29167341212023096, "grad_norm": 0.9583283215429682, "learning_rate": 8.239350774477761e-06, "loss": 0.3966, "step": 5405 }, { "epoch": 0.29172737574874535, "grad_norm": 0.8797972135789057, "learning_rate": 8.238745481702161e-06, "loss": 0.3979, "step": 5406 }, { "epoch": 0.29178133937725975, "grad_norm": 1.1298912186093513, "learning_rate": 8.238140110212249e-06, "loss": 0.4898, "step": 5407 }, { "epoch": 0.2918353030057741, "grad_norm": 1.0661438980055717, "learning_rate": 8.237534660025425e-06, "loss": 0.6284, "step": 5408 }, { "epoch": 0.2918892666342885, "grad_norm": 1.215853836598387, "learning_rate": 8.236929131159088e-06, "loss": 0.6196, "step": 5409 }, { "epoch": 0.2919432302628029, "grad_norm": 1.1409039838798734, "learning_rate": 8.236323523630642e-06, "loss": 0.3982, "step": 5410 }, { "epoch": 0.29199719389131723, "grad_norm": 0.8400040183169017, "learning_rate": 8.235717837457493e-06, "loss": 0.3328, "step": 5411 }, { "epoch": 0.29205115751983163, "grad_norm": 1.0488940463657699, "learning_rate": 8.23511207265705e-06, "loss": 0.5608, "step": 5412 }, { "epoch": 0.292105121148346, "grad_norm": 1.1366563299138206, "learning_rate": 8.234506229246721e-06, "loss": 0.4682, "step": 5413 }, { "epoch": 0.2921590847768604, "grad_norm": 1.1224182857202638, "learning_rate": 8.233900307243921e-06, "loss": 0.4539, "step": 5414 }, { "epoch": 0.29221304840537476, "grad_norm": 0.9959704626024016, "learning_rate": 8.233294306666063e-06, "loss": 0.5272, "step": 5415 }, { "epoch": 0.29226701203388916, "grad_norm": 1.0323317438566149, "learning_rate": 8.232688227530567e-06, "loss": 0.5355, "step": 5416 }, { "epoch": 0.29232097566240356, "grad_norm": 1.1367329705569469, "learning_rate": 8.232082069854847e-06, "loss": 0.4943, "step": 5417 }, { "epoch": 0.2923749392909179, "grad_norm": 1.246354004340486, "learning_rate": 8.23147583365633e-06, "loss": 0.4472, "step": 5418 }, { "epoch": 0.2924289029194323, "grad_norm": 1.1123869751782323, "learning_rate": 8.230869518952436e-06, "loss": 0.4595, "step": 5419 }, { "epoch": 0.2924828665479467, "grad_norm": 0.9773400274911657, "learning_rate": 8.230263125760593e-06, "loss": 0.4384, "step": 5420 }, { "epoch": 0.29253683017646104, "grad_norm": 0.9407495478043133, "learning_rate": 8.22965665409823e-06, "loss": 0.5721, "step": 5421 }, { "epoch": 0.29259079380497544, "grad_norm": 0.8399569173836842, "learning_rate": 8.229050103982776e-06, "loss": 0.3927, "step": 5422 }, { "epoch": 0.29264475743348983, "grad_norm": 1.1422291009629884, "learning_rate": 8.228443475431665e-06, "loss": 0.5209, "step": 5423 }, { "epoch": 0.29269872106200423, "grad_norm": 1.1079426577120186, "learning_rate": 8.22783676846233e-06, "loss": 0.51, "step": 5424 }, { "epoch": 0.29275268469051857, "grad_norm": 0.9478205754385052, "learning_rate": 8.227229983092211e-06, "loss": 0.4824, "step": 5425 }, { "epoch": 0.29280664831903297, "grad_norm": 1.0131658723271872, "learning_rate": 8.22662311933875e-06, "loss": 0.6823, "step": 5426 }, { "epoch": 0.29286061194754737, "grad_norm": 0.9083996947801121, "learning_rate": 8.22601617721938e-06, "loss": 0.4977, "step": 5427 }, { "epoch": 0.2929145755760617, "grad_norm": 1.0545415853905382, "learning_rate": 8.225409156751554e-06, "loss": 0.5247, "step": 5428 }, { "epoch": 0.2929685392045761, "grad_norm": 0.9476851606799219, "learning_rate": 8.224802057952714e-06, "loss": 0.6508, "step": 5429 }, { "epoch": 0.2930225028330905, "grad_norm": 1.0718557642674063, "learning_rate": 8.22419488084031e-06, "loss": 0.5035, "step": 5430 }, { "epoch": 0.2930764664616049, "grad_norm": 1.1004390861485833, "learning_rate": 8.223587625431791e-06, "loss": 0.5303, "step": 5431 }, { "epoch": 0.29313043009011924, "grad_norm": 0.9707714179839401, "learning_rate": 8.222980291744613e-06, "loss": 0.3874, "step": 5432 }, { "epoch": 0.29318439371863364, "grad_norm": 1.1599962859974324, "learning_rate": 8.22237287979623e-06, "loss": 0.5409, "step": 5433 }, { "epoch": 0.29323835734714804, "grad_norm": 1.319992450834645, "learning_rate": 8.221765389604099e-06, "loss": 0.6826, "step": 5434 }, { "epoch": 0.2932923209756624, "grad_norm": 1.1222617387727523, "learning_rate": 8.221157821185682e-06, "loss": 0.5894, "step": 5435 }, { "epoch": 0.2933462846041768, "grad_norm": 1.0258580884477058, "learning_rate": 8.220550174558439e-06, "loss": 0.4643, "step": 5436 }, { "epoch": 0.2934002482326912, "grad_norm": 0.8929660844363109, "learning_rate": 8.219942449739835e-06, "loss": 0.3826, "step": 5437 }, { "epoch": 0.29345421186120557, "grad_norm": 1.260848110419985, "learning_rate": 8.219334646747337e-06, "loss": 0.6333, "step": 5438 }, { "epoch": 0.2935081754897199, "grad_norm": 0.916291206183131, "learning_rate": 8.218726765598415e-06, "loss": 0.486, "step": 5439 }, { "epoch": 0.2935621391182343, "grad_norm": 0.9959265291854632, "learning_rate": 8.218118806310535e-06, "loss": 0.4151, "step": 5440 }, { "epoch": 0.2936161027467487, "grad_norm": 1.1356767435419513, "learning_rate": 8.217510768901174e-06, "loss": 0.4802, "step": 5441 }, { "epoch": 0.29367006637526305, "grad_norm": 0.9158152730978059, "learning_rate": 8.216902653387812e-06, "loss": 0.3979, "step": 5442 }, { "epoch": 0.29372403000377745, "grad_norm": 0.8560654957978295, "learning_rate": 8.216294459787918e-06, "loss": 0.3533, "step": 5443 }, { "epoch": 0.29377799363229185, "grad_norm": 1.1253049266915858, "learning_rate": 8.215686188118979e-06, "loss": 0.6517, "step": 5444 }, { "epoch": 0.29383195726080624, "grad_norm": 0.9451847467936215, "learning_rate": 8.215077838398475e-06, "loss": 0.5125, "step": 5445 }, { "epoch": 0.2938859208893206, "grad_norm": 1.1226813490154475, "learning_rate": 8.214469410643891e-06, "loss": 0.484, "step": 5446 }, { "epoch": 0.293939884517835, "grad_norm": 0.9629203593706895, "learning_rate": 8.21386090487271e-06, "loss": 0.3967, "step": 5447 }, { "epoch": 0.2939938481463494, "grad_norm": 1.1345575066789746, "learning_rate": 8.213252321102428e-06, "loss": 0.5399, "step": 5448 }, { "epoch": 0.2940478117748637, "grad_norm": 1.174680487968514, "learning_rate": 8.212643659350532e-06, "loss": 0.6315, "step": 5449 }, { "epoch": 0.2941017754033781, "grad_norm": 0.970722312137542, "learning_rate": 8.212034919634516e-06, "loss": 0.4666, "step": 5450 }, { "epoch": 0.2941557390318925, "grad_norm": 1.1106574122583126, "learning_rate": 8.211426101971876e-06, "loss": 0.6176, "step": 5451 }, { "epoch": 0.2942097026604069, "grad_norm": 0.9572675762355692, "learning_rate": 8.21081720638011e-06, "loss": 0.6453, "step": 5452 }, { "epoch": 0.29426366628892126, "grad_norm": 1.0082633080405732, "learning_rate": 8.210208232876721e-06, "loss": 0.4474, "step": 5453 }, { "epoch": 0.29431762991743565, "grad_norm": 0.9692325946468063, "learning_rate": 8.209599181479205e-06, "loss": 0.3964, "step": 5454 }, { "epoch": 0.29437159354595005, "grad_norm": 0.9185319853993519, "learning_rate": 8.208990052205073e-06, "loss": 0.4206, "step": 5455 }, { "epoch": 0.2944255571744644, "grad_norm": 1.0465981560266164, "learning_rate": 8.208380845071828e-06, "loss": 0.5051, "step": 5456 }, { "epoch": 0.2944795208029788, "grad_norm": 1.1622069843671032, "learning_rate": 8.207771560096982e-06, "loss": 0.5873, "step": 5457 }, { "epoch": 0.2945334844314932, "grad_norm": 1.2042948373974285, "learning_rate": 8.207162197298045e-06, "loss": 0.5492, "step": 5458 }, { "epoch": 0.29458744806000753, "grad_norm": 1.0334129981830786, "learning_rate": 8.206552756692532e-06, "loss": 0.5539, "step": 5459 }, { "epoch": 0.2946414116885219, "grad_norm": 0.967767523028003, "learning_rate": 8.205943238297956e-06, "loss": 0.3979, "step": 5460 }, { "epoch": 0.2946953753170363, "grad_norm": 1.030113174711838, "learning_rate": 8.205333642131838e-06, "loss": 0.4857, "step": 5461 }, { "epoch": 0.2947493389455507, "grad_norm": 1.0032352160325175, "learning_rate": 8.204723968211699e-06, "loss": 0.443, "step": 5462 }, { "epoch": 0.29480330257406506, "grad_norm": 1.2518514833718986, "learning_rate": 8.204114216555059e-06, "loss": 0.6762, "step": 5463 }, { "epoch": 0.29485726620257946, "grad_norm": 0.9502072659825652, "learning_rate": 8.203504387179446e-06, "loss": 0.4451, "step": 5464 }, { "epoch": 0.29491122983109386, "grad_norm": 1.308942790203988, "learning_rate": 8.202894480102382e-06, "loss": 0.6533, "step": 5465 }, { "epoch": 0.2949651934596082, "grad_norm": 1.2274345047975126, "learning_rate": 8.202284495341402e-06, "loss": 0.5171, "step": 5466 }, { "epoch": 0.2950191570881226, "grad_norm": 1.2400624548111532, "learning_rate": 8.201674432914033e-06, "loss": 0.6303, "step": 5467 }, { "epoch": 0.295073120716637, "grad_norm": 1.124512448773544, "learning_rate": 8.201064292837814e-06, "loss": 0.5204, "step": 5468 }, { "epoch": 0.2951270843451514, "grad_norm": 1.037354415023125, "learning_rate": 8.200454075130275e-06, "loss": 0.5452, "step": 5469 }, { "epoch": 0.29518104797366573, "grad_norm": 0.8969797021299134, "learning_rate": 8.19984377980896e-06, "loss": 0.3452, "step": 5470 }, { "epoch": 0.29523501160218013, "grad_norm": 1.0063473326281613, "learning_rate": 8.199233406891404e-06, "loss": 0.5309, "step": 5471 }, { "epoch": 0.29528897523069453, "grad_norm": 1.4009649960631787, "learning_rate": 8.198622956395153e-06, "loss": 0.686, "step": 5472 }, { "epoch": 0.29534293885920887, "grad_norm": 1.168596336176328, "learning_rate": 8.198012428337753e-06, "loss": 0.502, "step": 5473 }, { "epoch": 0.29539690248772327, "grad_norm": 1.0552223572901842, "learning_rate": 8.197401822736749e-06, "loss": 0.6253, "step": 5474 }, { "epoch": 0.29545086611623766, "grad_norm": 0.9684397244117144, "learning_rate": 8.196791139609691e-06, "loss": 0.4244, "step": 5475 }, { "epoch": 0.29550482974475206, "grad_norm": 1.0450748823856952, "learning_rate": 8.19618037897413e-06, "loss": 0.5186, "step": 5476 }, { "epoch": 0.2955587933732664, "grad_norm": 1.068329761333297, "learning_rate": 8.195569540847622e-06, "loss": 0.4829, "step": 5477 }, { "epoch": 0.2956127570017808, "grad_norm": 0.8647067709651073, "learning_rate": 8.194958625247721e-06, "loss": 0.4079, "step": 5478 }, { "epoch": 0.2956667206302952, "grad_norm": 1.0098411927804525, "learning_rate": 8.194347632191987e-06, "loss": 0.4356, "step": 5479 }, { "epoch": 0.29572068425880954, "grad_norm": 0.9538474646247859, "learning_rate": 8.193736561697979e-06, "loss": 0.3655, "step": 5480 }, { "epoch": 0.29577464788732394, "grad_norm": 1.0720353565271512, "learning_rate": 8.19312541378326e-06, "loss": 0.4534, "step": 5481 }, { "epoch": 0.29582861151583834, "grad_norm": 1.0095604090945147, "learning_rate": 8.192514188465395e-06, "loss": 0.5597, "step": 5482 }, { "epoch": 0.29588257514435273, "grad_norm": 0.935768151951977, "learning_rate": 8.19190288576195e-06, "loss": 0.3941, "step": 5483 }, { "epoch": 0.2959365387728671, "grad_norm": 1.3040973182569031, "learning_rate": 8.1912915056905e-06, "loss": 0.6695, "step": 5484 }, { "epoch": 0.2959905024013815, "grad_norm": 0.9114292938686618, "learning_rate": 8.190680048268611e-06, "loss": 0.4794, "step": 5485 }, { "epoch": 0.29604446602989587, "grad_norm": 1.3142024446146092, "learning_rate": 8.190068513513859e-06, "loss": 0.5565, "step": 5486 }, { "epoch": 0.2960984296584102, "grad_norm": 0.8929360891297731, "learning_rate": 8.189456901443821e-06, "loss": 0.4014, "step": 5487 }, { "epoch": 0.2961523932869246, "grad_norm": 0.9409169018175859, "learning_rate": 8.18884521207607e-06, "loss": 0.5017, "step": 5488 }, { "epoch": 0.296206356915439, "grad_norm": 1.1118146943914398, "learning_rate": 8.188233445428194e-06, "loss": 0.4724, "step": 5489 }, { "epoch": 0.29626032054395335, "grad_norm": 1.0800866486821898, "learning_rate": 8.187621601517773e-06, "loss": 0.4814, "step": 5490 }, { "epoch": 0.29631428417246775, "grad_norm": 1.0933208759476125, "learning_rate": 8.18700968036239e-06, "loss": 0.4988, "step": 5491 }, { "epoch": 0.29636824780098214, "grad_norm": 1.030853753670057, "learning_rate": 8.186397681979635e-06, "loss": 0.3688, "step": 5492 }, { "epoch": 0.29642221142949654, "grad_norm": 1.0572947431436854, "learning_rate": 8.185785606387095e-06, "loss": 0.6066, "step": 5493 }, { "epoch": 0.2964761750580109, "grad_norm": 0.9663742219454755, "learning_rate": 8.185173453602363e-06, "loss": 0.4927, "step": 5494 }, { "epoch": 0.2965301386865253, "grad_norm": 0.8451379195775102, "learning_rate": 8.184561223643034e-06, "loss": 0.4113, "step": 5495 }, { "epoch": 0.2965841023150397, "grad_norm": 1.1458128831754122, "learning_rate": 8.183948916526701e-06, "loss": 0.5873, "step": 5496 }, { "epoch": 0.296638065943554, "grad_norm": 0.8250334071608266, "learning_rate": 8.183336532270966e-06, "loss": 0.2951, "step": 5497 }, { "epoch": 0.2966920295720684, "grad_norm": 1.075284745846835, "learning_rate": 8.182724070893427e-06, "loss": 0.4702, "step": 5498 }, { "epoch": 0.2967459932005828, "grad_norm": 0.8906695467443344, "learning_rate": 8.182111532411686e-06, "loss": 0.3468, "step": 5499 }, { "epoch": 0.2967999568290972, "grad_norm": 0.9297036636633297, "learning_rate": 8.181498916843352e-06, "loss": 0.4743, "step": 5500 }, { "epoch": 0.2967999568290972, "eval_loss": 0.5707113742828369, "eval_runtime": 163.793, "eval_samples_per_second": 20.996, "eval_steps_per_second": 0.879, "step": 5500 }, { "epoch": 0.29685392045761155, "grad_norm": 0.9712918253502731, "learning_rate": 8.180886224206028e-06, "loss": 0.5744, "step": 5501 }, { "epoch": 0.29690788408612595, "grad_norm": 1.1044244633896194, "learning_rate": 8.180273454517325e-06, "loss": 0.3832, "step": 5502 }, { "epoch": 0.29696184771464035, "grad_norm": 1.273284977741108, "learning_rate": 8.179660607794855e-06, "loss": 0.5419, "step": 5503 }, { "epoch": 0.2970158113431547, "grad_norm": 0.9816134931221421, "learning_rate": 8.179047684056233e-06, "loss": 0.4513, "step": 5504 }, { "epoch": 0.2970697749716691, "grad_norm": 1.0999858716250308, "learning_rate": 8.178434683319072e-06, "loss": 0.6246, "step": 5505 }, { "epoch": 0.2971237386001835, "grad_norm": 1.1085406786154242, "learning_rate": 8.177821605600992e-06, "loss": 0.5914, "step": 5506 }, { "epoch": 0.2971777022286979, "grad_norm": 0.9654091907524764, "learning_rate": 8.177208450919613e-06, "loss": 0.4483, "step": 5507 }, { "epoch": 0.2972316658572122, "grad_norm": 1.5915707278993687, "learning_rate": 8.17659521929256e-06, "loss": 0.604, "step": 5508 }, { "epoch": 0.2972856294857266, "grad_norm": 1.1974455303308587, "learning_rate": 8.175981910737454e-06, "loss": 0.5802, "step": 5509 }, { "epoch": 0.297339593114241, "grad_norm": 0.9642831386101156, "learning_rate": 8.175368525271924e-06, "loss": 0.6693, "step": 5510 }, { "epoch": 0.29739355674275536, "grad_norm": 1.290045038065177, "learning_rate": 8.174755062913601e-06, "loss": 0.4657, "step": 5511 }, { "epoch": 0.29744752037126976, "grad_norm": 1.0461644271114532, "learning_rate": 8.174141523680114e-06, "loss": 0.5876, "step": 5512 }, { "epoch": 0.29750148399978416, "grad_norm": 0.8306402278676918, "learning_rate": 8.173527907589097e-06, "loss": 0.4327, "step": 5513 }, { "epoch": 0.29755544762829855, "grad_norm": 1.5404685765421697, "learning_rate": 8.172914214658187e-06, "loss": 0.7321, "step": 5514 }, { "epoch": 0.2976094112568129, "grad_norm": 0.9319422185521044, "learning_rate": 8.172300444905023e-06, "loss": 0.4465, "step": 5515 }, { "epoch": 0.2976633748853273, "grad_norm": 1.017641275949194, "learning_rate": 8.17168659834724e-06, "loss": 0.5397, "step": 5516 }, { "epoch": 0.2977173385138417, "grad_norm": 1.1637338596536795, "learning_rate": 8.171072675002487e-06, "loss": 0.4962, "step": 5517 }, { "epoch": 0.29777130214235603, "grad_norm": 1.1100342430534302, "learning_rate": 8.170458674888408e-06, "loss": 0.4538, "step": 5518 }, { "epoch": 0.29782526577087043, "grad_norm": 1.0310449234586758, "learning_rate": 8.169844598022647e-06, "loss": 0.4852, "step": 5519 }, { "epoch": 0.2978792293993848, "grad_norm": 1.1038785231301893, "learning_rate": 8.169230444422854e-06, "loss": 0.4856, "step": 5520 }, { "epoch": 0.2979331930278992, "grad_norm": 0.8231013447904444, "learning_rate": 8.168616214106682e-06, "loss": 0.4012, "step": 5521 }, { "epoch": 0.29798715665641357, "grad_norm": 1.1562261800992588, "learning_rate": 8.168001907091783e-06, "loss": 0.4114, "step": 5522 }, { "epoch": 0.29804112028492796, "grad_norm": 1.1682632849547983, "learning_rate": 8.167387523395814e-06, "loss": 0.5034, "step": 5523 }, { "epoch": 0.29809508391344236, "grad_norm": 1.0879205718093867, "learning_rate": 8.166773063036431e-06, "loss": 0.4802, "step": 5524 }, { "epoch": 0.2981490475419567, "grad_norm": 0.8309307951049498, "learning_rate": 8.166158526031296e-06, "loss": 0.3696, "step": 5525 }, { "epoch": 0.2982030111704711, "grad_norm": 1.1950457863844937, "learning_rate": 8.165543912398073e-06, "loss": 0.6335, "step": 5526 }, { "epoch": 0.2982569747989855, "grad_norm": 0.990844990881177, "learning_rate": 8.16492922215442e-06, "loss": 0.4283, "step": 5527 }, { "epoch": 0.29831093842749984, "grad_norm": 1.28814764784975, "learning_rate": 8.164314455318013e-06, "loss": 0.4542, "step": 5528 }, { "epoch": 0.29836490205601424, "grad_norm": 0.9699348030926074, "learning_rate": 8.163699611906517e-06, "loss": 0.4362, "step": 5529 }, { "epoch": 0.29841886568452863, "grad_norm": 1.0363943659220578, "learning_rate": 8.163084691937599e-06, "loss": 0.5889, "step": 5530 }, { "epoch": 0.29847282931304303, "grad_norm": 1.2902372375169315, "learning_rate": 8.162469695428938e-06, "loss": 0.6224, "step": 5531 }, { "epoch": 0.2985267929415574, "grad_norm": 1.143048644818585, "learning_rate": 8.161854622398207e-06, "loss": 0.5849, "step": 5532 }, { "epoch": 0.29858075657007177, "grad_norm": 0.7877927090924, "learning_rate": 8.161239472863086e-06, "loss": 0.3388, "step": 5533 }, { "epoch": 0.29863472019858617, "grad_norm": 1.1883476167480753, "learning_rate": 8.16062424684125e-06, "loss": 0.5812, "step": 5534 }, { "epoch": 0.2986886838271005, "grad_norm": 0.9611499252872551, "learning_rate": 8.160008944350389e-06, "loss": 0.5783, "step": 5535 }, { "epoch": 0.2987426474556149, "grad_norm": 1.167194486148674, "learning_rate": 8.159393565408181e-06, "loss": 0.6517, "step": 5536 }, { "epoch": 0.2987966110841293, "grad_norm": 0.8945729996759143, "learning_rate": 8.158778110032316e-06, "loss": 0.418, "step": 5537 }, { "epoch": 0.2988505747126437, "grad_norm": 0.938910285460627, "learning_rate": 8.158162578240479e-06, "loss": 0.502, "step": 5538 }, { "epoch": 0.29890453834115804, "grad_norm": 1.0090214275223737, "learning_rate": 8.157546970050368e-06, "loss": 0.4514, "step": 5539 }, { "epoch": 0.29895850196967244, "grad_norm": 0.9330970410632513, "learning_rate": 8.15693128547967e-06, "loss": 0.4529, "step": 5540 }, { "epoch": 0.29901246559818684, "grad_norm": 1.0695322642703895, "learning_rate": 8.15631552454608e-06, "loss": 0.4586, "step": 5541 }, { "epoch": 0.2990664292267012, "grad_norm": 0.8922299103968803, "learning_rate": 8.155699687267302e-06, "loss": 0.462, "step": 5542 }, { "epoch": 0.2991203928552156, "grad_norm": 1.1305598644689097, "learning_rate": 8.155083773661027e-06, "loss": 0.5908, "step": 5543 }, { "epoch": 0.29917435648373, "grad_norm": 1.4083299711571309, "learning_rate": 8.154467783744962e-06, "loss": 0.5023, "step": 5544 }, { "epoch": 0.2992283201122444, "grad_norm": 0.9540100168050423, "learning_rate": 8.153851717536813e-06, "loss": 0.4728, "step": 5545 }, { "epoch": 0.2992822837407587, "grad_norm": 1.0334761589473818, "learning_rate": 8.153235575054284e-06, "loss": 0.4552, "step": 5546 }, { "epoch": 0.2993362473692731, "grad_norm": 1.1799571760495575, "learning_rate": 8.152619356315083e-06, "loss": 0.6491, "step": 5547 }, { "epoch": 0.2993902109977875, "grad_norm": 0.9253563342651993, "learning_rate": 8.152003061336923e-06, "loss": 0.4588, "step": 5548 }, { "epoch": 0.29944417462630185, "grad_norm": 0.9901137242563935, "learning_rate": 8.151386690137513e-06, "loss": 0.4987, "step": 5549 }, { "epoch": 0.29949813825481625, "grad_norm": 0.8852548257952214, "learning_rate": 8.150770242734572e-06, "loss": 0.4499, "step": 5550 }, { "epoch": 0.29955210188333065, "grad_norm": 0.7695439621827244, "learning_rate": 8.150153719145816e-06, "loss": 0.291, "step": 5551 }, { "epoch": 0.29960606551184504, "grad_norm": 0.9319945764255112, "learning_rate": 8.149537119388962e-06, "loss": 0.4422, "step": 5552 }, { "epoch": 0.2996600291403594, "grad_norm": 1.4321809363268323, "learning_rate": 8.148920443481735e-06, "loss": 0.827, "step": 5553 }, { "epoch": 0.2997139927688738, "grad_norm": 0.9170146925411133, "learning_rate": 8.14830369144186e-06, "loss": 0.4488, "step": 5554 }, { "epoch": 0.2997679563973882, "grad_norm": 1.099364464947338, "learning_rate": 8.147686863287059e-06, "loss": 0.4642, "step": 5555 }, { "epoch": 0.2998219200259025, "grad_norm": 1.1007866771080732, "learning_rate": 8.147069959035064e-06, "loss": 0.5789, "step": 5556 }, { "epoch": 0.2998758836544169, "grad_norm": 1.0343223268387993, "learning_rate": 8.146452978703602e-06, "loss": 0.5172, "step": 5557 }, { "epoch": 0.2999298472829313, "grad_norm": 1.190795450972479, "learning_rate": 8.145835922310408e-06, "loss": 0.5474, "step": 5558 }, { "epoch": 0.29998381091144566, "grad_norm": 1.163337483113725, "learning_rate": 8.145218789873215e-06, "loss": 0.5431, "step": 5559 }, { "epoch": 0.30003777453996006, "grad_norm": 0.8093379823966488, "learning_rate": 8.144601581409764e-06, "loss": 0.3599, "step": 5560 }, { "epoch": 0.30009173816847445, "grad_norm": 1.0080482557997457, "learning_rate": 8.143984296937788e-06, "loss": 0.5517, "step": 5561 }, { "epoch": 0.30014570179698885, "grad_norm": 1.0964024267343018, "learning_rate": 8.143366936475033e-06, "loss": 0.5297, "step": 5562 }, { "epoch": 0.3001996654255032, "grad_norm": 1.290066255719801, "learning_rate": 8.142749500039243e-06, "loss": 0.6599, "step": 5563 }, { "epoch": 0.3002536290540176, "grad_norm": 1.0838978780428834, "learning_rate": 8.14213198764816e-06, "loss": 0.5452, "step": 5564 }, { "epoch": 0.300307592682532, "grad_norm": 0.9728809230325576, "learning_rate": 8.141514399319534e-06, "loss": 0.4632, "step": 5565 }, { "epoch": 0.30036155631104633, "grad_norm": 0.813989758108876, "learning_rate": 8.140896735071117e-06, "loss": 0.3638, "step": 5566 }, { "epoch": 0.3004155199395607, "grad_norm": 1.1119372025840815, "learning_rate": 8.140278994920657e-06, "loss": 0.4794, "step": 5567 }, { "epoch": 0.3004694835680751, "grad_norm": 1.0935860759829532, "learning_rate": 8.139661178885912e-06, "loss": 0.4132, "step": 5568 }, { "epoch": 0.3005234471965895, "grad_norm": 1.0107179224779197, "learning_rate": 8.139043286984638e-06, "loss": 0.4807, "step": 5569 }, { "epoch": 0.30057741082510386, "grad_norm": 0.8825258336629854, "learning_rate": 8.138425319234593e-06, "loss": 0.4398, "step": 5570 }, { "epoch": 0.30063137445361826, "grad_norm": 1.1179407831170807, "learning_rate": 8.137807275653538e-06, "loss": 0.4667, "step": 5571 }, { "epoch": 0.30068533808213266, "grad_norm": 1.1469055847717473, "learning_rate": 8.137189156259238e-06, "loss": 0.5493, "step": 5572 }, { "epoch": 0.300739301710647, "grad_norm": 1.084633207072137, "learning_rate": 8.136570961069456e-06, "loss": 0.441, "step": 5573 }, { "epoch": 0.3007932653391614, "grad_norm": 1.0297921058476203, "learning_rate": 8.13595269010196e-06, "loss": 0.589, "step": 5574 }, { "epoch": 0.3008472289676758, "grad_norm": 0.8372349074843124, "learning_rate": 8.135334343374522e-06, "loss": 0.3748, "step": 5575 }, { "epoch": 0.3009011925961902, "grad_norm": 1.0258839611266815, "learning_rate": 8.13471592090491e-06, "loss": 0.5307, "step": 5576 }, { "epoch": 0.30095515622470453, "grad_norm": 0.8096973955557034, "learning_rate": 8.1340974227109e-06, "loss": 0.3455, "step": 5577 }, { "epoch": 0.30100911985321893, "grad_norm": 1.1399273209511402, "learning_rate": 8.13347884881027e-06, "loss": 0.5883, "step": 5578 }, { "epoch": 0.30106308348173333, "grad_norm": 0.9137459273293648, "learning_rate": 8.132860199220796e-06, "loss": 0.4103, "step": 5579 }, { "epoch": 0.30111704711024767, "grad_norm": 0.7986444779728575, "learning_rate": 8.132241473960262e-06, "loss": 0.4176, "step": 5580 }, { "epoch": 0.30117101073876207, "grad_norm": 1.023032533506991, "learning_rate": 8.131622673046446e-06, "loss": 0.4219, "step": 5581 }, { "epoch": 0.30122497436727647, "grad_norm": 1.441354694703945, "learning_rate": 8.131003796497136e-06, "loss": 0.6018, "step": 5582 }, { "epoch": 0.30127893799579086, "grad_norm": 0.9453804870701896, "learning_rate": 8.130384844330118e-06, "loss": 0.3586, "step": 5583 }, { "epoch": 0.3013329016243052, "grad_norm": 1.0246365104445057, "learning_rate": 8.129765816563182e-06, "loss": 0.5022, "step": 5584 }, { "epoch": 0.3013868652528196, "grad_norm": 1.1052074177657991, "learning_rate": 8.129146713214119e-06, "loss": 0.5767, "step": 5585 }, { "epoch": 0.301440828881334, "grad_norm": 1.1818970903401833, "learning_rate": 8.128527534300723e-06, "loss": 0.6273, "step": 5586 }, { "epoch": 0.30149479250984834, "grad_norm": 1.170423927567819, "learning_rate": 8.127908279840791e-06, "loss": 0.5209, "step": 5587 }, { "epoch": 0.30154875613836274, "grad_norm": 1.320802168194481, "learning_rate": 8.127288949852118e-06, "loss": 0.5008, "step": 5588 }, { "epoch": 0.30160271976687714, "grad_norm": 1.0899490896311732, "learning_rate": 8.126669544352506e-06, "loss": 0.5052, "step": 5589 }, { "epoch": 0.30165668339539153, "grad_norm": 0.9903482526565055, "learning_rate": 8.126050063359757e-06, "loss": 0.5444, "step": 5590 }, { "epoch": 0.3017106470239059, "grad_norm": 0.9486053527463052, "learning_rate": 8.125430506891677e-06, "loss": 0.4294, "step": 5591 }, { "epoch": 0.3017646106524203, "grad_norm": 1.1565609846487668, "learning_rate": 8.12481087496607e-06, "loss": 0.5562, "step": 5592 }, { "epoch": 0.30181857428093467, "grad_norm": 1.2788610653093737, "learning_rate": 8.124191167600749e-06, "loss": 0.5142, "step": 5593 }, { "epoch": 0.301872537909449, "grad_norm": 1.0357093273374591, "learning_rate": 8.123571384813521e-06, "loss": 0.4588, "step": 5594 }, { "epoch": 0.3019265015379634, "grad_norm": 1.2197215911674228, "learning_rate": 8.122951526622199e-06, "loss": 0.5764, "step": 5595 }, { "epoch": 0.3019804651664778, "grad_norm": 1.1537585243467803, "learning_rate": 8.1223315930446e-06, "loss": 0.4942, "step": 5596 }, { "epoch": 0.30203442879499215, "grad_norm": 0.9162902118186534, "learning_rate": 8.121711584098544e-06, "loss": 0.4406, "step": 5597 }, { "epoch": 0.30208839242350655, "grad_norm": 0.8782032433844676, "learning_rate": 8.121091499801846e-06, "loss": 0.589, "step": 5598 }, { "epoch": 0.30214235605202094, "grad_norm": 0.9464663188097919, "learning_rate": 8.120471340172328e-06, "loss": 0.4401, "step": 5599 }, { "epoch": 0.30219631968053534, "grad_norm": 1.006411203481918, "learning_rate": 8.119851105227819e-06, "loss": 0.3438, "step": 5600 }, { "epoch": 0.3022502833090497, "grad_norm": 0.953975744019338, "learning_rate": 8.119230794986143e-06, "loss": 0.4899, "step": 5601 }, { "epoch": 0.3023042469375641, "grad_norm": 1.0974408589153004, "learning_rate": 8.118610409465125e-06, "loss": 0.4289, "step": 5602 }, { "epoch": 0.3023582105660785, "grad_norm": 1.0855741626603244, "learning_rate": 8.117989948682597e-06, "loss": 0.4042, "step": 5603 }, { "epoch": 0.3024121741945928, "grad_norm": 0.9991161230718788, "learning_rate": 8.117369412656395e-06, "loss": 0.4602, "step": 5604 }, { "epoch": 0.3024661378231072, "grad_norm": 0.8678952774258104, "learning_rate": 8.11674880140435e-06, "loss": 0.3918, "step": 5605 }, { "epoch": 0.3025201014516216, "grad_norm": 0.9224038609131077, "learning_rate": 8.1161281149443e-06, "loss": 0.4366, "step": 5606 }, { "epoch": 0.302574065080136, "grad_norm": 1.2580085546909257, "learning_rate": 8.115507353294086e-06, "loss": 0.5465, "step": 5607 }, { "epoch": 0.30262802870865035, "grad_norm": 1.010569757245223, "learning_rate": 8.114886516471547e-06, "loss": 0.4961, "step": 5608 }, { "epoch": 0.30268199233716475, "grad_norm": 0.9199854508735003, "learning_rate": 8.114265604494528e-06, "loss": 0.3388, "step": 5609 }, { "epoch": 0.30273595596567915, "grad_norm": 1.1129787015246886, "learning_rate": 8.113644617380874e-06, "loss": 0.4383, "step": 5610 }, { "epoch": 0.3027899195941935, "grad_norm": 1.041937101636486, "learning_rate": 8.113023555148432e-06, "loss": 0.5354, "step": 5611 }, { "epoch": 0.3028438832227079, "grad_norm": 1.0270096053696323, "learning_rate": 8.112402417815053e-06, "loss": 0.5185, "step": 5612 }, { "epoch": 0.3028978468512223, "grad_norm": 0.9702692240517283, "learning_rate": 8.11178120539859e-06, "loss": 0.3952, "step": 5613 }, { "epoch": 0.3029518104797367, "grad_norm": 1.1345670728996977, "learning_rate": 8.111159917916895e-06, "loss": 0.7042, "step": 5614 }, { "epoch": 0.303005774108251, "grad_norm": 0.9973945848772792, "learning_rate": 8.110538555387825e-06, "loss": 0.5149, "step": 5615 }, { "epoch": 0.3030597377367654, "grad_norm": 0.9103133840872114, "learning_rate": 8.10991711782924e-06, "loss": 0.4237, "step": 5616 }, { "epoch": 0.3031137013652798, "grad_norm": 0.9735867666074121, "learning_rate": 8.109295605258999e-06, "loss": 0.4926, "step": 5617 }, { "epoch": 0.30316766499379416, "grad_norm": 0.9424365079311553, "learning_rate": 8.108674017694967e-06, "loss": 0.4675, "step": 5618 }, { "epoch": 0.30322162862230856, "grad_norm": 0.9558619225225495, "learning_rate": 8.108052355155009e-06, "loss": 0.4793, "step": 5619 }, { "epoch": 0.30327559225082296, "grad_norm": 0.8643693298393155, "learning_rate": 8.107430617656988e-06, "loss": 0.3747, "step": 5620 }, { "epoch": 0.30332955587933735, "grad_norm": 1.2971067720772373, "learning_rate": 8.106808805218778e-06, "loss": 0.5317, "step": 5621 }, { "epoch": 0.3033835195078517, "grad_norm": 1.3828155170950338, "learning_rate": 8.10618691785825e-06, "loss": 0.6234, "step": 5622 }, { "epoch": 0.3034374831363661, "grad_norm": 1.1002875177925207, "learning_rate": 8.105564955593276e-06, "loss": 0.5742, "step": 5623 }, { "epoch": 0.3034914467648805, "grad_norm": 1.0621698177276115, "learning_rate": 8.10494291844173e-06, "loss": 0.5223, "step": 5624 }, { "epoch": 0.30354541039339483, "grad_norm": 1.115907113000123, "learning_rate": 8.104320806421495e-06, "loss": 0.5872, "step": 5625 }, { "epoch": 0.30359937402190923, "grad_norm": 0.9397455022294577, "learning_rate": 8.103698619550448e-06, "loss": 0.5793, "step": 5626 }, { "epoch": 0.3036533376504236, "grad_norm": 0.9175872459607314, "learning_rate": 8.103076357846473e-06, "loss": 0.4196, "step": 5627 }, { "epoch": 0.30370730127893797, "grad_norm": 0.628694421975127, "learning_rate": 8.102454021327453e-06, "loss": 0.2351, "step": 5628 }, { "epoch": 0.30376126490745237, "grad_norm": 0.9911911831146212, "learning_rate": 8.101831610011273e-06, "loss": 0.4012, "step": 5629 }, { "epoch": 0.30381522853596676, "grad_norm": 1.0363664495500855, "learning_rate": 8.101209123915824e-06, "loss": 0.5631, "step": 5630 }, { "epoch": 0.30386919216448116, "grad_norm": 1.0672446721283464, "learning_rate": 8.100586563058997e-06, "loss": 0.5057, "step": 5631 }, { "epoch": 0.3039231557929955, "grad_norm": 0.8957773164484549, "learning_rate": 8.099963927458684e-06, "loss": 0.4355, "step": 5632 }, { "epoch": 0.3039771194215099, "grad_norm": 1.6643713377153944, "learning_rate": 8.09934121713278e-06, "loss": 0.7048, "step": 5633 }, { "epoch": 0.3040310830500243, "grad_norm": 0.8934258465842406, "learning_rate": 8.098718432099186e-06, "loss": 0.3854, "step": 5634 }, { "epoch": 0.30408504667853864, "grad_norm": 1.0817116820246995, "learning_rate": 8.098095572375796e-06, "loss": 0.7578, "step": 5635 }, { "epoch": 0.30413901030705304, "grad_norm": 1.2359778806944424, "learning_rate": 8.097472637980515e-06, "loss": 0.6524, "step": 5636 }, { "epoch": 0.30419297393556743, "grad_norm": 1.3710073335821784, "learning_rate": 8.096849628931244e-06, "loss": 0.643, "step": 5637 }, { "epoch": 0.30424693756408183, "grad_norm": 1.1954399048586752, "learning_rate": 8.096226545245893e-06, "loss": 0.5407, "step": 5638 }, { "epoch": 0.3043009011925962, "grad_norm": 1.0643216815620422, "learning_rate": 8.095603386942366e-06, "loss": 0.5492, "step": 5639 }, { "epoch": 0.30435486482111057, "grad_norm": 0.9259441105607429, "learning_rate": 8.094980154038576e-06, "loss": 0.4543, "step": 5640 }, { "epoch": 0.30440882844962497, "grad_norm": 0.9137327181493116, "learning_rate": 8.094356846552435e-06, "loss": 0.3863, "step": 5641 }, { "epoch": 0.3044627920781393, "grad_norm": 1.0378723688587277, "learning_rate": 8.093733464501856e-06, "loss": 0.4317, "step": 5642 }, { "epoch": 0.3045167557066537, "grad_norm": 1.130225795149892, "learning_rate": 8.093110007904758e-06, "loss": 0.6817, "step": 5643 }, { "epoch": 0.3045707193351681, "grad_norm": 0.9528413177732721, "learning_rate": 8.092486476779055e-06, "loss": 0.4801, "step": 5644 }, { "epoch": 0.3046246829636825, "grad_norm": 0.8584354188463329, "learning_rate": 8.091862871142673e-06, "loss": 0.416, "step": 5645 }, { "epoch": 0.30467864659219684, "grad_norm": 1.0655929187142261, "learning_rate": 8.091239191013534e-06, "loss": 0.5465, "step": 5646 }, { "epoch": 0.30473261022071124, "grad_norm": 1.1479349997764827, "learning_rate": 8.09061543640956e-06, "loss": 0.5168, "step": 5647 }, { "epoch": 0.30478657384922564, "grad_norm": 1.1849258972780432, "learning_rate": 8.089991607348682e-06, "loss": 0.5216, "step": 5648 }, { "epoch": 0.30484053747774, "grad_norm": 1.1107529981419964, "learning_rate": 8.08936770384883e-06, "loss": 0.5037, "step": 5649 }, { "epoch": 0.3048945011062544, "grad_norm": 1.0657859810291685, "learning_rate": 8.088743725927932e-06, "loss": 0.5348, "step": 5650 }, { "epoch": 0.3049484647347688, "grad_norm": 1.0764146558525167, "learning_rate": 8.088119673603924e-06, "loss": 0.3931, "step": 5651 }, { "epoch": 0.3050024283632832, "grad_norm": 1.1273693100484672, "learning_rate": 8.087495546894742e-06, "loss": 0.5869, "step": 5652 }, { "epoch": 0.3050563919917975, "grad_norm": 1.3430042820025332, "learning_rate": 8.086871345818322e-06, "loss": 0.5854, "step": 5653 }, { "epoch": 0.3051103556203119, "grad_norm": 0.9747383968458961, "learning_rate": 8.086247070392606e-06, "loss": 0.4819, "step": 5654 }, { "epoch": 0.3051643192488263, "grad_norm": 1.0013086931490989, "learning_rate": 8.085622720635536e-06, "loss": 0.3576, "step": 5655 }, { "epoch": 0.30521828287734065, "grad_norm": 1.2214685092822675, "learning_rate": 8.084998296565057e-06, "loss": 0.5054, "step": 5656 }, { "epoch": 0.30527224650585505, "grad_norm": 0.982081155987322, "learning_rate": 8.084373798199117e-06, "loss": 0.4717, "step": 5657 }, { "epoch": 0.30532621013436945, "grad_norm": 1.0092086351766365, "learning_rate": 8.083749225555659e-06, "loss": 0.4944, "step": 5658 }, { "epoch": 0.3053801737628838, "grad_norm": 1.1491669676617895, "learning_rate": 8.08312457865264e-06, "loss": 0.4769, "step": 5659 }, { "epoch": 0.3054341373913982, "grad_norm": 1.305334787842396, "learning_rate": 8.082499857508009e-06, "loss": 0.5999, "step": 5660 }, { "epoch": 0.3054881010199126, "grad_norm": 0.9042370450695271, "learning_rate": 8.081875062139724e-06, "loss": 0.428, "step": 5661 }, { "epoch": 0.305542064648427, "grad_norm": 0.9052514722664241, "learning_rate": 8.081250192565739e-06, "loss": 0.4168, "step": 5662 }, { "epoch": 0.3055960282769413, "grad_norm": 1.2640739692276923, "learning_rate": 8.080625248804018e-06, "loss": 0.684, "step": 5663 }, { "epoch": 0.3056499919054557, "grad_norm": 1.1584083082491656, "learning_rate": 8.080000230872518e-06, "loss": 0.5252, "step": 5664 }, { "epoch": 0.3057039555339701, "grad_norm": 0.9232452297622722, "learning_rate": 8.079375138789204e-06, "loss": 0.4807, "step": 5665 }, { "epoch": 0.30575791916248446, "grad_norm": 0.8430881472269192, "learning_rate": 8.078749972572044e-06, "loss": 0.4157, "step": 5666 }, { "epoch": 0.30581188279099886, "grad_norm": 0.8897413254190745, "learning_rate": 8.078124732239004e-06, "loss": 0.3522, "step": 5667 }, { "epoch": 0.30586584641951325, "grad_norm": 0.9793038600260184, "learning_rate": 8.077499417808053e-06, "loss": 0.4852, "step": 5668 }, { "epoch": 0.30591981004802765, "grad_norm": 1.1763457571185545, "learning_rate": 8.076874029297165e-06, "loss": 0.6048, "step": 5669 }, { "epoch": 0.305973773676542, "grad_norm": 1.0377660944345446, "learning_rate": 8.076248566724314e-06, "loss": 0.561, "step": 5670 }, { "epoch": 0.3060277373050564, "grad_norm": 1.012156394474549, "learning_rate": 8.075623030107475e-06, "loss": 0.5114, "step": 5671 }, { "epoch": 0.3060817009335708, "grad_norm": 1.1404360054140812, "learning_rate": 8.074997419464629e-06, "loss": 0.5797, "step": 5672 }, { "epoch": 0.30613566456208513, "grad_norm": 0.9314242918976164, "learning_rate": 8.074371734813754e-06, "loss": 0.5464, "step": 5673 }, { "epoch": 0.3061896281905995, "grad_norm": 1.0005502443048095, "learning_rate": 8.073745976172834e-06, "loss": 0.4868, "step": 5674 }, { "epoch": 0.3062435918191139, "grad_norm": 1.3320044308351207, "learning_rate": 8.073120143559855e-06, "loss": 0.546, "step": 5675 }, { "epoch": 0.3062975554476283, "grad_norm": 0.9117754759949876, "learning_rate": 8.072494236992803e-06, "loss": 0.3933, "step": 5676 }, { "epoch": 0.30635151907614266, "grad_norm": 1.0396727281547622, "learning_rate": 8.071868256489668e-06, "loss": 0.4479, "step": 5677 }, { "epoch": 0.30640548270465706, "grad_norm": 1.0364374186814083, "learning_rate": 8.071242202068439e-06, "loss": 0.3505, "step": 5678 }, { "epoch": 0.30645944633317146, "grad_norm": 0.9642933101022922, "learning_rate": 8.070616073747112e-06, "loss": 0.4248, "step": 5679 }, { "epoch": 0.3065134099616858, "grad_norm": 1.2408825193579636, "learning_rate": 8.069989871543683e-06, "loss": 0.6152, "step": 5680 }, { "epoch": 0.3065673735902002, "grad_norm": 0.9698010325738592, "learning_rate": 8.069363595476146e-06, "loss": 0.4464, "step": 5681 }, { "epoch": 0.3066213372187146, "grad_norm": 0.9346970151768461, "learning_rate": 8.068737245562504e-06, "loss": 0.4185, "step": 5682 }, { "epoch": 0.306675300847229, "grad_norm": 1.121652419064162, "learning_rate": 8.068110821820758e-06, "loss": 0.5193, "step": 5683 }, { "epoch": 0.30672926447574334, "grad_norm": 0.8499846399476231, "learning_rate": 8.067484324268912e-06, "loss": 0.375, "step": 5684 }, { "epoch": 0.30678322810425773, "grad_norm": 1.0615598606914582, "learning_rate": 8.066857752924973e-06, "loss": 0.5325, "step": 5685 }, { "epoch": 0.30683719173277213, "grad_norm": 0.864465587817257, "learning_rate": 8.066231107806947e-06, "loss": 0.4728, "step": 5686 }, { "epoch": 0.30689115536128647, "grad_norm": 0.9238729385419961, "learning_rate": 8.065604388932847e-06, "loss": 0.4502, "step": 5687 }, { "epoch": 0.30694511898980087, "grad_norm": 0.704653921444429, "learning_rate": 8.064977596320684e-06, "loss": 0.2941, "step": 5688 }, { "epoch": 0.30699908261831527, "grad_norm": 0.7367501687721607, "learning_rate": 8.064350729988474e-06, "loss": 0.3706, "step": 5689 }, { "epoch": 0.30705304624682966, "grad_norm": 0.8970047055803194, "learning_rate": 8.063723789954233e-06, "loss": 0.5277, "step": 5690 }, { "epoch": 0.307107009875344, "grad_norm": 1.2819597049044584, "learning_rate": 8.063096776235978e-06, "loss": 0.6626, "step": 5691 }, { "epoch": 0.3071609735038584, "grad_norm": 1.1492900280504672, "learning_rate": 8.062469688851734e-06, "loss": 0.6614, "step": 5692 }, { "epoch": 0.3072149371323728, "grad_norm": 1.056764534893476, "learning_rate": 8.06184252781952e-06, "loss": 0.5453, "step": 5693 }, { "epoch": 0.30726890076088714, "grad_norm": 0.7326550091123926, "learning_rate": 8.061215293157365e-06, "loss": 0.2709, "step": 5694 }, { "epoch": 0.30732286438940154, "grad_norm": 0.9771661138204013, "learning_rate": 8.060587984883294e-06, "loss": 0.4315, "step": 5695 }, { "epoch": 0.30737682801791594, "grad_norm": 0.9582993041205412, "learning_rate": 8.059960603015337e-06, "loss": 0.3735, "step": 5696 }, { "epoch": 0.3074307916464303, "grad_norm": 0.8858920976016671, "learning_rate": 8.059333147571525e-06, "loss": 0.4375, "step": 5697 }, { "epoch": 0.3074847552749447, "grad_norm": 1.060543980001729, "learning_rate": 8.058705618569893e-06, "loss": 0.6025, "step": 5698 }, { "epoch": 0.3075387189034591, "grad_norm": 1.140007371132824, "learning_rate": 8.058078016028475e-06, "loss": 0.5694, "step": 5699 }, { "epoch": 0.30759268253197347, "grad_norm": 1.1097039745244839, "learning_rate": 8.057450339965313e-06, "loss": 0.5164, "step": 5700 }, { "epoch": 0.3076466461604878, "grad_norm": 1.064130736033157, "learning_rate": 8.056822590398441e-06, "loss": 0.6087, "step": 5701 }, { "epoch": 0.3077006097890022, "grad_norm": 0.8300452785622948, "learning_rate": 8.056194767345905e-06, "loss": 0.3429, "step": 5702 }, { "epoch": 0.3077545734175166, "grad_norm": 1.1591852350902305, "learning_rate": 8.05556687082575e-06, "loss": 0.5618, "step": 5703 }, { "epoch": 0.30780853704603095, "grad_norm": 1.0442871038567847, "learning_rate": 8.05493890085602e-06, "loss": 0.4645, "step": 5704 }, { "epoch": 0.30786250067454535, "grad_norm": 0.9316574993115146, "learning_rate": 8.054310857454764e-06, "loss": 0.4848, "step": 5705 }, { "epoch": 0.30791646430305974, "grad_norm": 1.0489283846846142, "learning_rate": 8.053682740640033e-06, "loss": 0.4811, "step": 5706 }, { "epoch": 0.30797042793157414, "grad_norm": 1.0731036279912236, "learning_rate": 8.05305455042988e-06, "loss": 0.508, "step": 5707 }, { "epoch": 0.3080243915600885, "grad_norm": 0.958058172229073, "learning_rate": 8.05242628684236e-06, "loss": 0.4211, "step": 5708 }, { "epoch": 0.3080783551886029, "grad_norm": 1.2974062314193937, "learning_rate": 8.051797949895529e-06, "loss": 0.6721, "step": 5709 }, { "epoch": 0.3081323188171173, "grad_norm": 1.0019057625137746, "learning_rate": 8.051169539607447e-06, "loss": 0.3736, "step": 5710 }, { "epoch": 0.3081862824456316, "grad_norm": 1.0763100277308877, "learning_rate": 8.050541055996173e-06, "loss": 0.4442, "step": 5711 }, { "epoch": 0.308240246074146, "grad_norm": 0.9031026163772028, "learning_rate": 8.049912499079773e-06, "loss": 0.4676, "step": 5712 }, { "epoch": 0.3082942097026604, "grad_norm": 1.083523042534364, "learning_rate": 8.049283868876312e-06, "loss": 0.4232, "step": 5713 }, { "epoch": 0.3083481733311748, "grad_norm": 1.0477609441170574, "learning_rate": 8.048655165403856e-06, "loss": 0.4302, "step": 5714 }, { "epoch": 0.30840213695968915, "grad_norm": 1.0982322460224874, "learning_rate": 8.048026388680476e-06, "loss": 0.4446, "step": 5715 }, { "epoch": 0.30845610058820355, "grad_norm": 1.1789833701804686, "learning_rate": 8.04739753872424e-06, "loss": 0.56, "step": 5716 }, { "epoch": 0.30851006421671795, "grad_norm": 0.8298565856289954, "learning_rate": 8.046768615553229e-06, "loss": 0.4233, "step": 5717 }, { "epoch": 0.3085640278452323, "grad_norm": 0.99642188653052, "learning_rate": 8.046139619185514e-06, "loss": 0.3782, "step": 5718 }, { "epoch": 0.3086179914737467, "grad_norm": 0.9188234053634301, "learning_rate": 8.045510549639173e-06, "loss": 0.5027, "step": 5719 }, { "epoch": 0.3086719551022611, "grad_norm": 0.9858820941218008, "learning_rate": 8.044881406932285e-06, "loss": 0.5159, "step": 5720 }, { "epoch": 0.3087259187307755, "grad_norm": 1.1112577772595984, "learning_rate": 8.044252191082936e-06, "loss": 0.4953, "step": 5721 }, { "epoch": 0.3087798823592898, "grad_norm": 1.1428043491063755, "learning_rate": 8.043622902109207e-06, "loss": 0.5777, "step": 5722 }, { "epoch": 0.3088338459878042, "grad_norm": 0.9696183711473637, "learning_rate": 8.042993540029186e-06, "loss": 0.4286, "step": 5723 }, { "epoch": 0.3088878096163186, "grad_norm": 0.777400507112986, "learning_rate": 8.042364104860962e-06, "loss": 0.3256, "step": 5724 }, { "epoch": 0.30894177324483296, "grad_norm": 1.0089457247572042, "learning_rate": 8.041734596622623e-06, "loss": 0.4298, "step": 5725 }, { "epoch": 0.30899573687334736, "grad_norm": 0.987856301123255, "learning_rate": 8.041105015332264e-06, "loss": 0.3837, "step": 5726 }, { "epoch": 0.30904970050186176, "grad_norm": 1.1344529722271766, "learning_rate": 8.040475361007982e-06, "loss": 0.4597, "step": 5727 }, { "epoch": 0.3091036641303761, "grad_norm": 1.1778670888542118, "learning_rate": 8.039845633667867e-06, "loss": 0.5681, "step": 5728 }, { "epoch": 0.3091576277588905, "grad_norm": 1.0456648659823868, "learning_rate": 8.039215833330026e-06, "loss": 0.5533, "step": 5729 }, { "epoch": 0.3092115913874049, "grad_norm": 1.0425636848769562, "learning_rate": 8.038585960012553e-06, "loss": 0.4264, "step": 5730 }, { "epoch": 0.3092655550159193, "grad_norm": 0.9565466919863763, "learning_rate": 8.037956013733556e-06, "loss": 0.5255, "step": 5731 }, { "epoch": 0.30931951864443363, "grad_norm": 1.0693905086280673, "learning_rate": 8.03732599451114e-06, "loss": 0.4671, "step": 5732 }, { "epoch": 0.30937348227294803, "grad_norm": 1.0379982879624805, "learning_rate": 8.036695902363408e-06, "loss": 0.4308, "step": 5733 }, { "epoch": 0.30942744590146243, "grad_norm": 0.8924010759329344, "learning_rate": 8.036065737308476e-06, "loss": 0.3995, "step": 5734 }, { "epoch": 0.30948140952997677, "grad_norm": 0.9129497072366776, "learning_rate": 8.03543549936445e-06, "loss": 0.4324, "step": 5735 }, { "epoch": 0.30953537315849117, "grad_norm": 0.9611986766029539, "learning_rate": 8.034805188549446e-06, "loss": 0.5475, "step": 5736 }, { "epoch": 0.30958933678700556, "grad_norm": 0.8381527660725501, "learning_rate": 8.03417480488158e-06, "loss": 0.3226, "step": 5737 }, { "epoch": 0.30964330041551996, "grad_norm": 1.1066767969846116, "learning_rate": 8.03354434837897e-06, "loss": 0.5505, "step": 5738 }, { "epoch": 0.3096972640440343, "grad_norm": 0.9499330874896346, "learning_rate": 8.032913819059734e-06, "loss": 0.4768, "step": 5739 }, { "epoch": 0.3097512276725487, "grad_norm": 0.8730288590704799, "learning_rate": 8.032283216941997e-06, "loss": 0.4084, "step": 5740 }, { "epoch": 0.3098051913010631, "grad_norm": 0.952662245864132, "learning_rate": 8.031652542043881e-06, "loss": 0.3509, "step": 5741 }, { "epoch": 0.30985915492957744, "grad_norm": 1.0508404580333337, "learning_rate": 8.031021794383513e-06, "loss": 0.4786, "step": 5742 }, { "epoch": 0.30991311855809184, "grad_norm": 1.0277263668851035, "learning_rate": 8.030390973979021e-06, "loss": 0.4977, "step": 5743 }, { "epoch": 0.30996708218660624, "grad_norm": 0.9290790689098543, "learning_rate": 8.029760080848537e-06, "loss": 0.462, "step": 5744 }, { "epoch": 0.31002104581512063, "grad_norm": 0.9266918610806855, "learning_rate": 8.029129115010191e-06, "loss": 0.3753, "step": 5745 }, { "epoch": 0.310075009443635, "grad_norm": 0.9818755824028417, "learning_rate": 8.028498076482119e-06, "loss": 0.6027, "step": 5746 }, { "epoch": 0.31012897307214937, "grad_norm": 0.9790688257925357, "learning_rate": 8.027866965282457e-06, "loss": 0.5445, "step": 5747 }, { "epoch": 0.31018293670066377, "grad_norm": 0.921062702338916, "learning_rate": 8.027235781429345e-06, "loss": 0.4154, "step": 5748 }, { "epoch": 0.3102369003291781, "grad_norm": 0.94225339249066, "learning_rate": 8.026604524940922e-06, "loss": 0.487, "step": 5749 }, { "epoch": 0.3102908639576925, "grad_norm": 0.9280899582904373, "learning_rate": 8.025973195835332e-06, "loss": 0.5227, "step": 5750 }, { "epoch": 0.3103448275862069, "grad_norm": 1.2554964797386488, "learning_rate": 8.025341794130722e-06, "loss": 0.5629, "step": 5751 }, { "epoch": 0.3103987912147213, "grad_norm": 0.914519474507173, "learning_rate": 8.024710319845233e-06, "loss": 0.5197, "step": 5752 }, { "epoch": 0.31045275484323565, "grad_norm": 1.1169964808535187, "learning_rate": 8.024078772997024e-06, "loss": 0.4897, "step": 5753 }, { "epoch": 0.31050671847175004, "grad_norm": 1.0009252970778322, "learning_rate": 8.023447153604236e-06, "loss": 0.5467, "step": 5754 }, { "epoch": 0.31056068210026444, "grad_norm": 0.7320710509581911, "learning_rate": 8.02281546168503e-06, "loss": 0.3095, "step": 5755 }, { "epoch": 0.3106146457287788, "grad_norm": 1.1227087692522757, "learning_rate": 8.022183697257557e-06, "loss": 0.5622, "step": 5756 }, { "epoch": 0.3106686093572932, "grad_norm": 0.9435790618574552, "learning_rate": 8.021551860339976e-06, "loss": 0.4308, "step": 5757 }, { "epoch": 0.3107225729858076, "grad_norm": 1.1943472855606505, "learning_rate": 8.020919950950448e-06, "loss": 0.5189, "step": 5758 }, { "epoch": 0.310776536614322, "grad_norm": 1.1136469037947343, "learning_rate": 8.020287969107132e-06, "loss": 0.5543, "step": 5759 }, { "epoch": 0.3108305002428363, "grad_norm": 1.0289136561858603, "learning_rate": 8.019655914828195e-06, "loss": 0.4599, "step": 5760 }, { "epoch": 0.3108844638713507, "grad_norm": 0.9797790273514165, "learning_rate": 8.0190237881318e-06, "loss": 0.4706, "step": 5761 }, { "epoch": 0.3109384274998651, "grad_norm": 0.7853063307729679, "learning_rate": 8.018391589036115e-06, "loss": 0.3983, "step": 5762 }, { "epoch": 0.31099239112837945, "grad_norm": 1.1835599872882585, "learning_rate": 8.017759317559312e-06, "loss": 0.5284, "step": 5763 }, { "epoch": 0.31104635475689385, "grad_norm": 1.0717012281299545, "learning_rate": 8.017126973719561e-06, "loss": 0.5832, "step": 5764 }, { "epoch": 0.31110031838540825, "grad_norm": 0.915749955156974, "learning_rate": 8.01649455753504e-06, "loss": 0.3854, "step": 5765 }, { "epoch": 0.3111542820139226, "grad_norm": 0.8535666670822716, "learning_rate": 8.01586206902392e-06, "loss": 0.4165, "step": 5766 }, { "epoch": 0.311208245642437, "grad_norm": 0.9842008297824502, "learning_rate": 8.015229508204383e-06, "loss": 0.4358, "step": 5767 }, { "epoch": 0.3112622092709514, "grad_norm": 1.076046957118243, "learning_rate": 8.014596875094609e-06, "loss": 0.507, "step": 5768 }, { "epoch": 0.3113161728994658, "grad_norm": 0.9890596293772697, "learning_rate": 8.01396416971278e-06, "loss": 0.5824, "step": 5769 }, { "epoch": 0.3113701365279801, "grad_norm": 1.2345176238394864, "learning_rate": 8.01333139207708e-06, "loss": 0.5423, "step": 5770 }, { "epoch": 0.3114241001564945, "grad_norm": 1.1293622033199444, "learning_rate": 8.012698542205698e-06, "loss": 0.5652, "step": 5771 }, { "epoch": 0.3114780637850089, "grad_norm": 1.0954738166785163, "learning_rate": 8.012065620116819e-06, "loss": 0.538, "step": 5772 }, { "epoch": 0.31153202741352326, "grad_norm": 0.7255140756272943, "learning_rate": 8.011432625828636e-06, "loss": 0.2899, "step": 5773 }, { "epoch": 0.31158599104203766, "grad_norm": 0.8835809701839762, "learning_rate": 8.010799559359342e-06, "loss": 0.4119, "step": 5774 }, { "epoch": 0.31163995467055206, "grad_norm": 0.9444065081302631, "learning_rate": 8.010166420727132e-06, "loss": 0.4164, "step": 5775 }, { "epoch": 0.31169391829906645, "grad_norm": 1.251770748116836, "learning_rate": 8.009533209950201e-06, "loss": 0.7404, "step": 5776 }, { "epoch": 0.3117478819275808, "grad_norm": 1.0206917175047399, "learning_rate": 8.008899927046752e-06, "loss": 0.4548, "step": 5777 }, { "epoch": 0.3118018455560952, "grad_norm": 0.6878741713820212, "learning_rate": 8.008266572034984e-06, "loss": 0.2496, "step": 5778 }, { "epoch": 0.3118558091846096, "grad_norm": 1.025049048254685, "learning_rate": 8.0076331449331e-06, "loss": 0.5123, "step": 5779 }, { "epoch": 0.31190977281312393, "grad_norm": 1.2347249780374974, "learning_rate": 8.006999645759305e-06, "loss": 0.6378, "step": 5780 }, { "epoch": 0.31196373644163833, "grad_norm": 0.9175210376702057, "learning_rate": 8.006366074531808e-06, "loss": 0.5526, "step": 5781 }, { "epoch": 0.3120177000701527, "grad_norm": 1.0859465031888946, "learning_rate": 8.005732431268817e-06, "loss": 0.604, "step": 5782 }, { "epoch": 0.3120716636986671, "grad_norm": 1.2249113840135557, "learning_rate": 8.005098715988543e-06, "loss": 0.477, "step": 5783 }, { "epoch": 0.31212562732718147, "grad_norm": 1.0952431674957677, "learning_rate": 8.004464928709203e-06, "loss": 0.4421, "step": 5784 }, { "epoch": 0.31217959095569586, "grad_norm": 0.9588434052193681, "learning_rate": 8.003831069449008e-06, "loss": 0.3682, "step": 5785 }, { "epoch": 0.31223355458421026, "grad_norm": 1.2124834730959386, "learning_rate": 8.003197138226178e-06, "loss": 0.5892, "step": 5786 }, { "epoch": 0.3122875182127246, "grad_norm": 1.0599732871955534, "learning_rate": 8.002563135058935e-06, "loss": 0.5058, "step": 5787 }, { "epoch": 0.312341481841239, "grad_norm": 1.1721815324702174, "learning_rate": 8.001929059965499e-06, "loss": 0.6736, "step": 5788 }, { "epoch": 0.3123954454697534, "grad_norm": 0.9858698274947841, "learning_rate": 8.001294912964092e-06, "loss": 0.4858, "step": 5789 }, { "epoch": 0.3124494090982678, "grad_norm": 1.0448021397704002, "learning_rate": 8.000660694072944e-06, "loss": 0.6486, "step": 5790 }, { "epoch": 0.31250337272678214, "grad_norm": 0.929609520581377, "learning_rate": 8.000026403310278e-06, "loss": 0.4042, "step": 5791 }, { "epoch": 0.31255733635529653, "grad_norm": 0.9196163368492284, "learning_rate": 7.999392040694329e-06, "loss": 0.3877, "step": 5792 }, { "epoch": 0.31261129998381093, "grad_norm": 0.9868209275203024, "learning_rate": 7.998757606243326e-06, "loss": 0.4454, "step": 5793 }, { "epoch": 0.3126652636123253, "grad_norm": 1.0883217893689185, "learning_rate": 7.998123099975507e-06, "loss": 0.6294, "step": 5794 }, { "epoch": 0.31271922724083967, "grad_norm": 0.9490428151289455, "learning_rate": 7.997488521909103e-06, "loss": 0.5898, "step": 5795 }, { "epoch": 0.31277319086935407, "grad_norm": 1.1350095340638253, "learning_rate": 7.996853872062359e-06, "loss": 0.5844, "step": 5796 }, { "epoch": 0.3128271544978684, "grad_norm": 0.8904157067709089, "learning_rate": 7.996219150453509e-06, "loss": 0.4268, "step": 5797 }, { "epoch": 0.3128811181263828, "grad_norm": 0.906666002328019, "learning_rate": 7.995584357100798e-06, "loss": 0.4028, "step": 5798 }, { "epoch": 0.3129350817548972, "grad_norm": 1.0366802775180255, "learning_rate": 7.994949492022472e-06, "loss": 0.4525, "step": 5799 }, { "epoch": 0.3129890453834116, "grad_norm": 0.9826857575165051, "learning_rate": 7.994314555236776e-06, "loss": 0.4226, "step": 5800 }, { "epoch": 0.31304300901192594, "grad_norm": 0.7873717905009159, "learning_rate": 7.993679546761962e-06, "loss": 0.3608, "step": 5801 }, { "epoch": 0.31309697264044034, "grad_norm": 0.8321285685663172, "learning_rate": 7.993044466616275e-06, "loss": 0.3378, "step": 5802 }, { "epoch": 0.31315093626895474, "grad_norm": 0.8949440503810001, "learning_rate": 7.99240931481797e-06, "loss": 0.3047, "step": 5803 }, { "epoch": 0.3132048998974691, "grad_norm": 1.257705208503103, "learning_rate": 7.991774091385306e-06, "loss": 0.6254, "step": 5804 }, { "epoch": 0.3132588635259835, "grad_norm": 1.399848691701927, "learning_rate": 7.991138796336537e-06, "loss": 0.5634, "step": 5805 }, { "epoch": 0.3133128271544979, "grad_norm": 1.0304363885464278, "learning_rate": 7.99050342968992e-06, "loss": 0.4374, "step": 5806 }, { "epoch": 0.3133667907830123, "grad_norm": 1.1844254310899136, "learning_rate": 7.989867991463717e-06, "loss": 0.6405, "step": 5807 }, { "epoch": 0.3134207544115266, "grad_norm": 1.000691911414819, "learning_rate": 7.989232481676196e-06, "loss": 0.429, "step": 5808 }, { "epoch": 0.313474718040041, "grad_norm": 1.2173707285541955, "learning_rate": 7.988596900345615e-06, "loss": 0.552, "step": 5809 }, { "epoch": 0.3135286816685554, "grad_norm": 1.0927331513298872, "learning_rate": 7.987961247490246e-06, "loss": 0.5113, "step": 5810 }, { "epoch": 0.31358264529706975, "grad_norm": 0.9120528394989482, "learning_rate": 7.987325523128357e-06, "loss": 0.4621, "step": 5811 }, { "epoch": 0.31363660892558415, "grad_norm": 1.0221986062073876, "learning_rate": 7.986689727278219e-06, "loss": 0.4781, "step": 5812 }, { "epoch": 0.31369057255409855, "grad_norm": 1.1210470670955046, "learning_rate": 7.986053859958104e-06, "loss": 0.652, "step": 5813 }, { "epoch": 0.31374453618261294, "grad_norm": 0.9254124183949032, "learning_rate": 7.985417921186292e-06, "loss": 0.4828, "step": 5814 }, { "epoch": 0.3137984998111273, "grad_norm": 1.044871074608017, "learning_rate": 7.984781910981055e-06, "loss": 0.5252, "step": 5815 }, { "epoch": 0.3138524634396417, "grad_norm": 1.1903180790776156, "learning_rate": 7.984145829360676e-06, "loss": 0.5789, "step": 5816 }, { "epoch": 0.3139064270681561, "grad_norm": 1.1783099741528913, "learning_rate": 7.983509676343435e-06, "loss": 0.4649, "step": 5817 }, { "epoch": 0.3139603906966704, "grad_norm": 1.1873672591705628, "learning_rate": 7.98287345194762e-06, "loss": 0.4212, "step": 5818 }, { "epoch": 0.3140143543251848, "grad_norm": 1.0455420534572923, "learning_rate": 7.98223715619151e-06, "loss": 0.5026, "step": 5819 }, { "epoch": 0.3140683179536992, "grad_norm": 0.874271130430988, "learning_rate": 7.981600789093395e-06, "loss": 0.3778, "step": 5820 }, { "epoch": 0.3141222815822136, "grad_norm": 0.9671985515972982, "learning_rate": 7.98096435067157e-06, "loss": 0.4618, "step": 5821 }, { "epoch": 0.31417624521072796, "grad_norm": 1.0907235402612039, "learning_rate": 7.980327840944318e-06, "loss": 0.5757, "step": 5822 }, { "epoch": 0.31423020883924235, "grad_norm": 0.9567141576071382, "learning_rate": 7.979691259929942e-06, "loss": 0.4432, "step": 5823 }, { "epoch": 0.31428417246775675, "grad_norm": 0.973129899444177, "learning_rate": 7.97905460764673e-06, "loss": 0.4555, "step": 5824 }, { "epoch": 0.3143381360962711, "grad_norm": 1.265399572361619, "learning_rate": 7.978417884112986e-06, "loss": 0.5079, "step": 5825 }, { "epoch": 0.3143920997247855, "grad_norm": 1.0640899919627675, "learning_rate": 7.977781089347007e-06, "loss": 0.4992, "step": 5826 }, { "epoch": 0.3144460633532999, "grad_norm": 0.9683520723134889, "learning_rate": 7.977144223367097e-06, "loss": 0.387, "step": 5827 }, { "epoch": 0.3145000269818143, "grad_norm": 0.930444249927146, "learning_rate": 7.976507286191557e-06, "loss": 0.4317, "step": 5828 }, { "epoch": 0.3145539906103286, "grad_norm": 0.9596739957461282, "learning_rate": 7.975870277838695e-06, "loss": 0.5599, "step": 5829 }, { "epoch": 0.314607954238843, "grad_norm": 1.0553082050145277, "learning_rate": 7.97523319832682e-06, "loss": 0.4343, "step": 5830 }, { "epoch": 0.3146619178673574, "grad_norm": 0.835048279175396, "learning_rate": 7.974596047674244e-06, "loss": 0.3504, "step": 5831 }, { "epoch": 0.31471588149587176, "grad_norm": 1.120678880375561, "learning_rate": 7.973958825899276e-06, "loss": 0.6657, "step": 5832 }, { "epoch": 0.31476984512438616, "grad_norm": 0.9853529486491105, "learning_rate": 7.973321533020232e-06, "loss": 0.5031, "step": 5833 }, { "epoch": 0.31482380875290056, "grad_norm": 0.8893062074578372, "learning_rate": 7.972684169055427e-06, "loss": 0.369, "step": 5834 }, { "epoch": 0.3148777723814149, "grad_norm": 1.0920588924263088, "learning_rate": 7.97204673402318e-06, "loss": 0.4106, "step": 5835 }, { "epoch": 0.3149317360099293, "grad_norm": 1.1086154064300722, "learning_rate": 7.971409227941814e-06, "loss": 0.4241, "step": 5836 }, { "epoch": 0.3149856996384437, "grad_norm": 0.9581381485147547, "learning_rate": 7.97077165082965e-06, "loss": 0.4289, "step": 5837 }, { "epoch": 0.3150396632669581, "grad_norm": 1.22657202320478, "learning_rate": 7.97013400270501e-06, "loss": 0.5311, "step": 5838 }, { "epoch": 0.31509362689547243, "grad_norm": 0.9912941421355445, "learning_rate": 7.969496283586224e-06, "loss": 0.4534, "step": 5839 }, { "epoch": 0.31514759052398683, "grad_norm": 1.15814048669185, "learning_rate": 7.968858493491618e-06, "loss": 0.5488, "step": 5840 }, { "epoch": 0.31520155415250123, "grad_norm": 1.1456935878843448, "learning_rate": 7.968220632439524e-06, "loss": 0.4011, "step": 5841 }, { "epoch": 0.31525551778101557, "grad_norm": 1.2962206486990293, "learning_rate": 7.967582700448279e-06, "loss": 0.5436, "step": 5842 }, { "epoch": 0.31530948140952997, "grad_norm": 0.8289831987761082, "learning_rate": 7.966944697536211e-06, "loss": 0.3543, "step": 5843 }, { "epoch": 0.31536344503804437, "grad_norm": 0.8939314937567485, "learning_rate": 7.96630662372166e-06, "loss": 0.3883, "step": 5844 }, { "epoch": 0.31541740866655876, "grad_norm": 0.9971532050688644, "learning_rate": 7.965668479022964e-06, "loss": 0.4185, "step": 5845 }, { "epoch": 0.3154713722950731, "grad_norm": 1.0901568973577713, "learning_rate": 7.965030263458466e-06, "loss": 0.4931, "step": 5846 }, { "epoch": 0.3155253359235875, "grad_norm": 1.259989299005883, "learning_rate": 7.964391977046508e-06, "loss": 0.523, "step": 5847 }, { "epoch": 0.3155792995521019, "grad_norm": 1.0859776653563968, "learning_rate": 7.96375361980543e-06, "loss": 0.5298, "step": 5848 }, { "epoch": 0.31563326318061624, "grad_norm": 1.1906997893829152, "learning_rate": 7.96311519175359e-06, "loss": 0.6941, "step": 5849 }, { "epoch": 0.31568722680913064, "grad_norm": 0.9914111111225924, "learning_rate": 7.962476692909327e-06, "loss": 0.5361, "step": 5850 }, { "epoch": 0.31574119043764504, "grad_norm": 1.1997176849686149, "learning_rate": 7.961838123290997e-06, "loss": 0.5156, "step": 5851 }, { "epoch": 0.31579515406615943, "grad_norm": 1.0835697909731707, "learning_rate": 7.96119948291695e-06, "loss": 0.4638, "step": 5852 }, { "epoch": 0.3158491176946738, "grad_norm": 1.298205417606924, "learning_rate": 7.960560771805544e-06, "loss": 0.5146, "step": 5853 }, { "epoch": 0.3159030813231882, "grad_norm": 0.9745858262658678, "learning_rate": 7.959921989975136e-06, "loss": 0.4748, "step": 5854 }, { "epoch": 0.31595704495170257, "grad_norm": 1.1012545910573555, "learning_rate": 7.959283137444083e-06, "loss": 0.5379, "step": 5855 }, { "epoch": 0.3160110085802169, "grad_norm": 0.9030981716785355, "learning_rate": 7.958644214230749e-06, "loss": 0.4077, "step": 5856 }, { "epoch": 0.3160649722087313, "grad_norm": 0.9730020436697281, "learning_rate": 7.958005220353495e-06, "loss": 0.4376, "step": 5857 }, { "epoch": 0.3161189358372457, "grad_norm": 1.055955379773911, "learning_rate": 7.957366155830687e-06, "loss": 0.5935, "step": 5858 }, { "epoch": 0.3161728994657601, "grad_norm": 1.0181651174423476, "learning_rate": 7.956727020680692e-06, "loss": 0.6437, "step": 5859 }, { "epoch": 0.31622686309427445, "grad_norm": 0.9700126992750109, "learning_rate": 7.956087814921884e-06, "loss": 0.4049, "step": 5860 }, { "epoch": 0.31628082672278884, "grad_norm": 1.0385669158530928, "learning_rate": 7.955448538572626e-06, "loss": 0.5072, "step": 5861 }, { "epoch": 0.31633479035130324, "grad_norm": 1.2194739998610478, "learning_rate": 7.954809191651298e-06, "loss": 0.8048, "step": 5862 }, { "epoch": 0.3163887539798176, "grad_norm": 1.0953804244036105, "learning_rate": 7.954169774176275e-06, "loss": 0.4738, "step": 5863 }, { "epoch": 0.316442717608332, "grad_norm": 0.9178850994733903, "learning_rate": 7.95353028616593e-06, "loss": 0.4045, "step": 5864 }, { "epoch": 0.3164966812368464, "grad_norm": 1.073957676644468, "learning_rate": 7.952890727638646e-06, "loss": 0.4289, "step": 5865 }, { "epoch": 0.3165506448653607, "grad_norm": 1.1100709617649702, "learning_rate": 7.952251098612807e-06, "loss": 0.4919, "step": 5866 }, { "epoch": 0.3166046084938751, "grad_norm": 0.9488450181157873, "learning_rate": 7.951611399106792e-06, "loss": 0.3607, "step": 5867 }, { "epoch": 0.3166585721223895, "grad_norm": 1.0707686511755923, "learning_rate": 7.950971629138986e-06, "loss": 0.6324, "step": 5868 }, { "epoch": 0.3167125357509039, "grad_norm": 1.3027139485558845, "learning_rate": 7.95033178872778e-06, "loss": 0.7684, "step": 5869 }, { "epoch": 0.31676649937941825, "grad_norm": 1.1559163865572664, "learning_rate": 7.949691877891564e-06, "loss": 0.6084, "step": 5870 }, { "epoch": 0.31682046300793265, "grad_norm": 1.154749064820439, "learning_rate": 7.949051896648726e-06, "loss": 0.5162, "step": 5871 }, { "epoch": 0.31687442663644705, "grad_norm": 0.8885038138718508, "learning_rate": 7.948411845017665e-06, "loss": 0.3846, "step": 5872 }, { "epoch": 0.3169283902649614, "grad_norm": 1.277908554544182, "learning_rate": 7.94777172301677e-06, "loss": 0.5957, "step": 5873 }, { "epoch": 0.3169823538934758, "grad_norm": 0.9126303217641203, "learning_rate": 7.947131530664444e-06, "loss": 0.4912, "step": 5874 }, { "epoch": 0.3170363175219902, "grad_norm": 0.9861993525521239, "learning_rate": 7.946491267979084e-06, "loss": 0.5479, "step": 5875 }, { "epoch": 0.3170902811505046, "grad_norm": 1.1387184460532889, "learning_rate": 7.945850934979095e-06, "loss": 0.5, "step": 5876 }, { "epoch": 0.3171442447790189, "grad_norm": 0.8190446527054122, "learning_rate": 7.945210531682878e-06, "loss": 0.3435, "step": 5877 }, { "epoch": 0.3171982084075333, "grad_norm": 0.9781052880177178, "learning_rate": 7.94457005810884e-06, "loss": 0.4093, "step": 5878 }, { "epoch": 0.3172521720360477, "grad_norm": 0.8602296661362321, "learning_rate": 7.943929514275388e-06, "loss": 0.3782, "step": 5879 }, { "epoch": 0.31730613566456206, "grad_norm": 0.9073202524499124, "learning_rate": 7.943288900200932e-06, "loss": 0.3531, "step": 5880 }, { "epoch": 0.31736009929307646, "grad_norm": 0.8346163380267516, "learning_rate": 7.942648215903886e-06, "loss": 0.3661, "step": 5881 }, { "epoch": 0.31741406292159086, "grad_norm": 1.009301338151871, "learning_rate": 7.94200746140266e-06, "loss": 0.454, "step": 5882 }, { "epoch": 0.31746802655010525, "grad_norm": 0.777601415227095, "learning_rate": 7.941366636715673e-06, "loss": 0.2918, "step": 5883 }, { "epoch": 0.3175219901786196, "grad_norm": 0.9679503802474361, "learning_rate": 7.940725741861342e-06, "loss": 0.363, "step": 5884 }, { "epoch": 0.317575953807134, "grad_norm": 1.024693736225454, "learning_rate": 7.940084776858089e-06, "loss": 0.4917, "step": 5885 }, { "epoch": 0.3176299174356484, "grad_norm": 1.1321107069169487, "learning_rate": 7.939443741724329e-06, "loss": 0.4444, "step": 5886 }, { "epoch": 0.31768388106416273, "grad_norm": 1.0918332090350371, "learning_rate": 7.938802636478496e-06, "loss": 0.3264, "step": 5887 }, { "epoch": 0.31773784469267713, "grad_norm": 1.0715956475824344, "learning_rate": 7.938161461139009e-06, "loss": 0.4277, "step": 5888 }, { "epoch": 0.3177918083211915, "grad_norm": 1.1311215407989392, "learning_rate": 7.937520215724297e-06, "loss": 0.6258, "step": 5889 }, { "epoch": 0.3178457719497059, "grad_norm": 1.1986855642251864, "learning_rate": 7.936878900252793e-06, "loss": 0.627, "step": 5890 }, { "epoch": 0.31789973557822027, "grad_norm": 1.0196362554573277, "learning_rate": 7.936237514742927e-06, "loss": 0.5161, "step": 5891 }, { "epoch": 0.31795369920673466, "grad_norm": 1.074218132442038, "learning_rate": 7.93559605921313e-06, "loss": 0.5126, "step": 5892 }, { "epoch": 0.31800766283524906, "grad_norm": 0.9410662748087897, "learning_rate": 7.934954533681843e-06, "loss": 0.4155, "step": 5893 }, { "epoch": 0.3180616264637634, "grad_norm": 0.9072156683166208, "learning_rate": 7.934312938167505e-06, "loss": 0.4293, "step": 5894 }, { "epoch": 0.3181155900922778, "grad_norm": 0.9187281987882211, "learning_rate": 7.93367127268855e-06, "loss": 0.4486, "step": 5895 }, { "epoch": 0.3181695537207922, "grad_norm": 1.1444334076752136, "learning_rate": 7.933029537263426e-06, "loss": 0.5998, "step": 5896 }, { "epoch": 0.3182235173493066, "grad_norm": 0.9043559026228211, "learning_rate": 7.932387731910573e-06, "loss": 0.3689, "step": 5897 }, { "epoch": 0.31827748097782094, "grad_norm": 0.9624484762918142, "learning_rate": 7.93174585664844e-06, "loss": 0.4615, "step": 5898 }, { "epoch": 0.31833144460633533, "grad_norm": 1.126440355890838, "learning_rate": 7.931103911495474e-06, "loss": 0.5642, "step": 5899 }, { "epoch": 0.31838540823484973, "grad_norm": 1.272228930320947, "learning_rate": 7.930461896470125e-06, "loss": 0.5955, "step": 5900 }, { "epoch": 0.3184393718633641, "grad_norm": 0.8470531449091266, "learning_rate": 7.929819811590844e-06, "loss": 0.3538, "step": 5901 }, { "epoch": 0.31849333549187847, "grad_norm": 0.8624163210271384, "learning_rate": 7.929177656876088e-06, "loss": 0.3359, "step": 5902 }, { "epoch": 0.31854729912039287, "grad_norm": 0.9890881209901963, "learning_rate": 7.928535432344311e-06, "loss": 0.4398, "step": 5903 }, { "epoch": 0.3186012627489072, "grad_norm": 0.9508911334110701, "learning_rate": 7.927893138013971e-06, "loss": 0.3783, "step": 5904 }, { "epoch": 0.3186552263774216, "grad_norm": 1.0773084056504816, "learning_rate": 7.92725077390353e-06, "loss": 0.5059, "step": 5905 }, { "epoch": 0.318709190005936, "grad_norm": 1.1008550347381039, "learning_rate": 7.92660834003145e-06, "loss": 0.5392, "step": 5906 }, { "epoch": 0.3187631536344504, "grad_norm": 1.0224588585450411, "learning_rate": 7.925965836416193e-06, "loss": 0.3786, "step": 5907 }, { "epoch": 0.31881711726296474, "grad_norm": 0.948906860440352, "learning_rate": 7.925323263076227e-06, "loss": 0.4795, "step": 5908 }, { "epoch": 0.31887108089147914, "grad_norm": 0.9097690473329768, "learning_rate": 7.924680620030019e-06, "loss": 0.4127, "step": 5909 }, { "epoch": 0.31892504451999354, "grad_norm": 1.214642090980527, "learning_rate": 7.924037907296042e-06, "loss": 0.5129, "step": 5910 }, { "epoch": 0.3189790081485079, "grad_norm": 0.9667265393100248, "learning_rate": 7.923395124892764e-06, "loss": 0.3679, "step": 5911 }, { "epoch": 0.3190329717770223, "grad_norm": 0.9335198752441802, "learning_rate": 7.922752272838664e-06, "loss": 0.3908, "step": 5912 }, { "epoch": 0.3190869354055367, "grad_norm": 1.1824299136461927, "learning_rate": 7.922109351152214e-06, "loss": 0.4857, "step": 5913 }, { "epoch": 0.3191408990340511, "grad_norm": 1.002311810325618, "learning_rate": 7.921466359851892e-06, "loss": 0.5593, "step": 5914 }, { "epoch": 0.3191948626625654, "grad_norm": 0.8861196534846978, "learning_rate": 7.920823298956184e-06, "loss": 0.4333, "step": 5915 }, { "epoch": 0.3192488262910798, "grad_norm": 1.1235941111438703, "learning_rate": 7.920180168483565e-06, "loss": 0.5012, "step": 5916 }, { "epoch": 0.3193027899195942, "grad_norm": 1.1314726926058671, "learning_rate": 7.919536968452524e-06, "loss": 0.5409, "step": 5917 }, { "epoch": 0.31935675354810855, "grad_norm": 0.9919398993156224, "learning_rate": 7.918893698881547e-06, "loss": 0.4717, "step": 5918 }, { "epoch": 0.31941071717662295, "grad_norm": 0.8342868306808415, "learning_rate": 7.91825035978912e-06, "loss": 0.3469, "step": 5919 }, { "epoch": 0.31946468080513735, "grad_norm": 0.9287318330248803, "learning_rate": 7.917606951193734e-06, "loss": 0.4345, "step": 5920 }, { "epoch": 0.31951864443365174, "grad_norm": 1.028241427927597, "learning_rate": 7.916963473113882e-06, "loss": 0.5754, "step": 5921 }, { "epoch": 0.3195726080621661, "grad_norm": 1.1402164802305959, "learning_rate": 7.916319925568056e-06, "loss": 0.428, "step": 5922 }, { "epoch": 0.3196265716906805, "grad_norm": 1.0302098615496027, "learning_rate": 7.915676308574755e-06, "loss": 0.457, "step": 5923 }, { "epoch": 0.3196805353191949, "grad_norm": 0.6309800117100535, "learning_rate": 7.915032622152475e-06, "loss": 0.2743, "step": 5924 }, { "epoch": 0.3197344989477092, "grad_norm": 1.1632934329006708, "learning_rate": 7.91438886631972e-06, "loss": 0.4602, "step": 5925 }, { "epoch": 0.3197884625762236, "grad_norm": 0.913815986530333, "learning_rate": 7.913745041094987e-06, "loss": 0.4002, "step": 5926 }, { "epoch": 0.319842426204738, "grad_norm": 0.9356763597533835, "learning_rate": 7.913101146496783e-06, "loss": 0.5539, "step": 5927 }, { "epoch": 0.3198963898332524, "grad_norm": 1.1499240588052162, "learning_rate": 7.912457182543613e-06, "loss": 0.4764, "step": 5928 }, { "epoch": 0.31995035346176676, "grad_norm": 1.11358676688239, "learning_rate": 7.91181314925399e-06, "loss": 0.4975, "step": 5929 }, { "epoch": 0.32000431709028115, "grad_norm": 0.9132441045556481, "learning_rate": 7.911169046646416e-06, "loss": 0.4274, "step": 5930 }, { "epoch": 0.32005828071879555, "grad_norm": 0.9228528240488291, "learning_rate": 7.910524874739407e-06, "loss": 0.5065, "step": 5931 }, { "epoch": 0.3201122443473099, "grad_norm": 0.9091138510326043, "learning_rate": 7.90988063355148e-06, "loss": 0.5389, "step": 5932 }, { "epoch": 0.3201662079758243, "grad_norm": 0.9745784632652947, "learning_rate": 7.909236323101147e-06, "loss": 0.4594, "step": 5933 }, { "epoch": 0.3202201716043387, "grad_norm": 1.1059827753247802, "learning_rate": 7.908591943406927e-06, "loss": 0.6131, "step": 5934 }, { "epoch": 0.32027413523285303, "grad_norm": 0.9785017759485038, "learning_rate": 7.907947494487343e-06, "loss": 0.4735, "step": 5935 }, { "epoch": 0.3203280988613674, "grad_norm": 1.000623208514124, "learning_rate": 7.907302976360913e-06, "loss": 0.378, "step": 5936 }, { "epoch": 0.3203820624898818, "grad_norm": 0.9689126532603618, "learning_rate": 7.906658389046165e-06, "loss": 0.3999, "step": 5937 }, { "epoch": 0.3204360261183962, "grad_norm": 0.9401649502626862, "learning_rate": 7.906013732561622e-06, "loss": 0.4357, "step": 5938 }, { "epoch": 0.32048998974691056, "grad_norm": 0.935819315183388, "learning_rate": 7.905369006925813e-06, "loss": 0.5898, "step": 5939 }, { "epoch": 0.32054395337542496, "grad_norm": 1.2529673858211547, "learning_rate": 7.904724212157267e-06, "loss": 0.527, "step": 5940 }, { "epoch": 0.32059791700393936, "grad_norm": 1.3005659348505847, "learning_rate": 7.90407934827452e-06, "loss": 0.5526, "step": 5941 }, { "epoch": 0.3206518806324537, "grad_norm": 0.91918447169636, "learning_rate": 7.903434415296103e-06, "loss": 0.4816, "step": 5942 }, { "epoch": 0.3207058442609681, "grad_norm": 1.3140619574175858, "learning_rate": 7.902789413240551e-06, "loss": 0.6179, "step": 5943 }, { "epoch": 0.3207598078894825, "grad_norm": 0.8075932533080238, "learning_rate": 7.902144342126405e-06, "loss": 0.3865, "step": 5944 }, { "epoch": 0.3208137715179969, "grad_norm": 1.0526313229442101, "learning_rate": 7.901499201972202e-06, "loss": 0.5472, "step": 5945 }, { "epoch": 0.32086773514651123, "grad_norm": 1.1604395833397465, "learning_rate": 7.900853992796486e-06, "loss": 0.4839, "step": 5946 }, { "epoch": 0.32092169877502563, "grad_norm": 0.9962635402597405, "learning_rate": 7.9002087146178e-06, "loss": 0.4912, "step": 5947 }, { "epoch": 0.32097566240354003, "grad_norm": 1.0304913103294047, "learning_rate": 7.899563367454692e-06, "loss": 0.4947, "step": 5948 }, { "epoch": 0.32102962603205437, "grad_norm": 1.20271684228534, "learning_rate": 7.898917951325706e-06, "loss": 0.636, "step": 5949 }, { "epoch": 0.32108358966056877, "grad_norm": 1.0036392943660768, "learning_rate": 7.898272466249396e-06, "loss": 0.4575, "step": 5950 }, { "epoch": 0.32113755328908317, "grad_norm": 1.0462348981071277, "learning_rate": 7.897626912244311e-06, "loss": 0.4681, "step": 5951 }, { "epoch": 0.32119151691759756, "grad_norm": 1.034491830458183, "learning_rate": 7.896981289329005e-06, "loss": 0.4538, "step": 5952 }, { "epoch": 0.3212454805461119, "grad_norm": 1.102045428683058, "learning_rate": 7.896335597522037e-06, "loss": 0.5755, "step": 5953 }, { "epoch": 0.3212994441746263, "grad_norm": 1.0231528289320657, "learning_rate": 7.895689836841961e-06, "loss": 0.6004, "step": 5954 }, { "epoch": 0.3213534078031407, "grad_norm": 1.0053977450325944, "learning_rate": 7.895044007307338e-06, "loss": 0.4682, "step": 5955 }, { "epoch": 0.32140737143165504, "grad_norm": 1.0194904388710258, "learning_rate": 7.894398108936733e-06, "loss": 0.4243, "step": 5956 }, { "epoch": 0.32146133506016944, "grad_norm": 0.8586913189830022, "learning_rate": 7.893752141748705e-06, "loss": 0.4316, "step": 5957 }, { "epoch": 0.32151529868868384, "grad_norm": 1.1836899440149393, "learning_rate": 7.89310610576182e-06, "loss": 0.5903, "step": 5958 }, { "epoch": 0.32156926231719823, "grad_norm": 1.0327946598185154, "learning_rate": 7.892460000994649e-06, "loss": 0.5217, "step": 5959 }, { "epoch": 0.3216232259457126, "grad_norm": 0.9772371785492189, "learning_rate": 7.891813827465761e-06, "loss": 0.4047, "step": 5960 }, { "epoch": 0.321677189574227, "grad_norm": 0.928707106540232, "learning_rate": 7.891167585193724e-06, "loss": 0.3613, "step": 5961 }, { "epoch": 0.32173115320274137, "grad_norm": 0.9004139349383272, "learning_rate": 7.890521274197119e-06, "loss": 0.3914, "step": 5962 }, { "epoch": 0.3217851168312557, "grad_norm": 0.9258773042001399, "learning_rate": 7.889874894494515e-06, "loss": 0.3772, "step": 5963 }, { "epoch": 0.3218390804597701, "grad_norm": 1.2108257169204986, "learning_rate": 7.889228446104492e-06, "loss": 0.474, "step": 5964 }, { "epoch": 0.3218930440882845, "grad_norm": 1.0170126463214018, "learning_rate": 7.888581929045629e-06, "loss": 0.5159, "step": 5965 }, { "epoch": 0.3219470077167989, "grad_norm": 1.0522850283683047, "learning_rate": 7.887935343336506e-06, "loss": 0.5003, "step": 5966 }, { "epoch": 0.32200097134531325, "grad_norm": 1.2083278056572362, "learning_rate": 7.88728868899571e-06, "loss": 0.5081, "step": 5967 }, { "epoch": 0.32205493497382764, "grad_norm": 1.1254941612702527, "learning_rate": 7.886641966041826e-06, "loss": 0.4811, "step": 5968 }, { "epoch": 0.32210889860234204, "grad_norm": 0.895796721462946, "learning_rate": 7.88599517449344e-06, "loss": 0.4238, "step": 5969 }, { "epoch": 0.3221628622308564, "grad_norm": 0.8962440626420475, "learning_rate": 7.885348314369141e-06, "loss": 0.3583, "step": 5970 }, { "epoch": 0.3222168258593708, "grad_norm": 1.0338502518591037, "learning_rate": 7.884701385687522e-06, "loss": 0.4516, "step": 5971 }, { "epoch": 0.3222707894878852, "grad_norm": 1.0770589366193566, "learning_rate": 7.884054388467176e-06, "loss": 0.4435, "step": 5972 }, { "epoch": 0.3223247531163995, "grad_norm": 1.0027776555865757, "learning_rate": 7.883407322726696e-06, "loss": 0.5537, "step": 5973 }, { "epoch": 0.3223787167449139, "grad_norm": 1.0257954816733534, "learning_rate": 7.882760188484683e-06, "loss": 0.3707, "step": 5974 }, { "epoch": 0.3224326803734283, "grad_norm": 0.9372059086577426, "learning_rate": 7.882112985759734e-06, "loss": 0.4735, "step": 5975 }, { "epoch": 0.3224866440019427, "grad_norm": 0.7959259272351011, "learning_rate": 7.88146571457045e-06, "loss": 0.343, "step": 5976 }, { "epoch": 0.32254060763045705, "grad_norm": 0.8314271617331521, "learning_rate": 7.880818374935436e-06, "loss": 0.3982, "step": 5977 }, { "epoch": 0.32259457125897145, "grad_norm": 1.1371707843625738, "learning_rate": 7.880170966873296e-06, "loss": 0.45, "step": 5978 }, { "epoch": 0.32264853488748585, "grad_norm": 1.0329817177978784, "learning_rate": 7.879523490402638e-06, "loss": 0.4057, "step": 5979 }, { "epoch": 0.3227024985160002, "grad_norm": 0.9584914980517119, "learning_rate": 7.878875945542071e-06, "loss": 0.4011, "step": 5980 }, { "epoch": 0.3227564621445146, "grad_norm": 1.053067823833615, "learning_rate": 7.878228332310204e-06, "loss": 0.5175, "step": 5981 }, { "epoch": 0.322810425773029, "grad_norm": 0.9883802204666124, "learning_rate": 7.877580650725653e-06, "loss": 0.4732, "step": 5982 }, { "epoch": 0.3228643894015434, "grad_norm": 0.9003743700416751, "learning_rate": 7.876932900807029e-06, "loss": 0.5378, "step": 5983 }, { "epoch": 0.3229183530300577, "grad_norm": 1.0234876987748245, "learning_rate": 7.876285082572956e-06, "loss": 0.4916, "step": 5984 }, { "epoch": 0.3229723166585721, "grad_norm": 0.8167169409750263, "learning_rate": 7.875637196042046e-06, "loss": 0.3551, "step": 5985 }, { "epoch": 0.3230262802870865, "grad_norm": 1.0531019347989066, "learning_rate": 7.874989241232923e-06, "loss": 0.4865, "step": 5986 }, { "epoch": 0.32308024391560086, "grad_norm": 1.0819642828732705, "learning_rate": 7.87434121816421e-06, "loss": 0.4772, "step": 5987 }, { "epoch": 0.32313420754411526, "grad_norm": 0.9951657783778652, "learning_rate": 7.87369312685453e-06, "loss": 0.5682, "step": 5988 }, { "epoch": 0.32318817117262966, "grad_norm": 1.0192453957095002, "learning_rate": 7.873044967322511e-06, "loss": 0.5713, "step": 5989 }, { "epoch": 0.32324213480114405, "grad_norm": 1.0250968739478206, "learning_rate": 7.872396739586784e-06, "loss": 0.4743, "step": 5990 }, { "epoch": 0.3232960984296584, "grad_norm": 1.1481343947313774, "learning_rate": 7.871748443665977e-06, "loss": 0.5565, "step": 5991 }, { "epoch": 0.3233500620581728, "grad_norm": 0.9749919528663178, "learning_rate": 7.871100079578722e-06, "loss": 0.3781, "step": 5992 }, { "epoch": 0.3234040256866872, "grad_norm": 1.2675135877431165, "learning_rate": 7.870451647343656e-06, "loss": 0.5373, "step": 5993 }, { "epoch": 0.32345798931520153, "grad_norm": 0.8214021154524622, "learning_rate": 7.869803146979414e-06, "loss": 0.3419, "step": 5994 }, { "epoch": 0.32351195294371593, "grad_norm": 1.0231918047672879, "learning_rate": 7.869154578504634e-06, "loss": 0.5955, "step": 5995 }, { "epoch": 0.3235659165722303, "grad_norm": 1.0119235350260083, "learning_rate": 7.868505941937959e-06, "loss": 0.3894, "step": 5996 }, { "epoch": 0.3236198802007447, "grad_norm": 1.1171954318871131, "learning_rate": 7.86785723729803e-06, "loss": 0.4735, "step": 5997 }, { "epoch": 0.32367384382925907, "grad_norm": 0.9953878269664985, "learning_rate": 7.86720846460349e-06, "loss": 0.4938, "step": 5998 }, { "epoch": 0.32372780745777346, "grad_norm": 0.7900486873321353, "learning_rate": 7.86655962387299e-06, "loss": 0.3387, "step": 5999 }, { "epoch": 0.32378177108628786, "grad_norm": 1.3181226899486023, "learning_rate": 7.865910715125172e-06, "loss": 0.5042, "step": 6000 }, { "epoch": 0.32378177108628786, "eval_loss": 0.5695191025733948, "eval_runtime": 163.9743, "eval_samples_per_second": 20.973, "eval_steps_per_second": 0.878, "step": 6000 }, { "epoch": 0.3238357347148022, "grad_norm": 0.8016413255225768, "learning_rate": 7.86526173837869e-06, "loss": 0.2859, "step": 6001 }, { "epoch": 0.3238896983433166, "grad_norm": 1.0263554092244853, "learning_rate": 7.864612693652198e-06, "loss": 0.4303, "step": 6002 }, { "epoch": 0.323943661971831, "grad_norm": 1.0507886807859228, "learning_rate": 7.863963580964344e-06, "loss": 0.4679, "step": 6003 }, { "epoch": 0.32399762560034534, "grad_norm": 0.8509495752549467, "learning_rate": 7.863314400333793e-06, "loss": 0.4705, "step": 6004 }, { "epoch": 0.32405158922885974, "grad_norm": 1.1381288010712423, "learning_rate": 7.862665151779195e-06, "loss": 0.5968, "step": 6005 }, { "epoch": 0.32410555285737414, "grad_norm": 1.1859335784464307, "learning_rate": 7.862015835319213e-06, "loss": 0.5179, "step": 6006 }, { "epoch": 0.32415951648588853, "grad_norm": 1.2668019825465817, "learning_rate": 7.861366450972509e-06, "loss": 0.4522, "step": 6007 }, { "epoch": 0.3242134801144029, "grad_norm": 1.1165754504194687, "learning_rate": 7.86071699875775e-06, "loss": 0.4782, "step": 6008 }, { "epoch": 0.32426744374291727, "grad_norm": 1.1151598077623261, "learning_rate": 7.860067478693597e-06, "loss": 0.6063, "step": 6009 }, { "epoch": 0.32432140737143167, "grad_norm": 1.0404140266536035, "learning_rate": 7.85941789079872e-06, "loss": 0.3934, "step": 6010 }, { "epoch": 0.324375370999946, "grad_norm": 1.0403130950851807, "learning_rate": 7.858768235091788e-06, "loss": 0.3591, "step": 6011 }, { "epoch": 0.3244293346284604, "grad_norm": 1.0106328791158328, "learning_rate": 7.858118511591476e-06, "loss": 0.5218, "step": 6012 }, { "epoch": 0.3244832982569748, "grad_norm": 0.9074220419301285, "learning_rate": 7.857468720316454e-06, "loss": 0.4859, "step": 6013 }, { "epoch": 0.3245372618854892, "grad_norm": 1.1941177222021893, "learning_rate": 7.856818861285398e-06, "loss": 0.5575, "step": 6014 }, { "epoch": 0.32459122551400355, "grad_norm": 1.2394127047785763, "learning_rate": 7.856168934516987e-06, "loss": 0.6421, "step": 6015 }, { "epoch": 0.32464518914251794, "grad_norm": 1.037366397795698, "learning_rate": 7.8555189400299e-06, "loss": 0.4506, "step": 6016 }, { "epoch": 0.32469915277103234, "grad_norm": 1.0801336652199232, "learning_rate": 7.85486887784282e-06, "loss": 0.5151, "step": 6017 }, { "epoch": 0.3247531163995467, "grad_norm": 0.9538346787866813, "learning_rate": 7.854218747974429e-06, "loss": 0.2954, "step": 6018 }, { "epoch": 0.3248070800280611, "grad_norm": 0.9276513311268766, "learning_rate": 7.853568550443412e-06, "loss": 0.3824, "step": 6019 }, { "epoch": 0.3248610436565755, "grad_norm": 0.9627758705059813, "learning_rate": 7.852918285268455e-06, "loss": 0.4308, "step": 6020 }, { "epoch": 0.3249150072850899, "grad_norm": 1.1566862255646715, "learning_rate": 7.85226795246825e-06, "loss": 0.6689, "step": 6021 }, { "epoch": 0.3249689709136042, "grad_norm": 0.9065902930994093, "learning_rate": 7.851617552061488e-06, "loss": 0.4199, "step": 6022 }, { "epoch": 0.3250229345421186, "grad_norm": 1.201369124323388, "learning_rate": 7.850967084066861e-06, "loss": 0.6766, "step": 6023 }, { "epoch": 0.325076898170633, "grad_norm": 1.1384224649745283, "learning_rate": 7.850316548503063e-06, "loss": 0.5562, "step": 6024 }, { "epoch": 0.32513086179914735, "grad_norm": 1.0720609338241982, "learning_rate": 7.849665945388797e-06, "loss": 0.5318, "step": 6025 }, { "epoch": 0.32518482542766175, "grad_norm": 1.00911565792164, "learning_rate": 7.849015274742753e-06, "loss": 0.4561, "step": 6026 }, { "epoch": 0.32523878905617615, "grad_norm": 0.8873024972829716, "learning_rate": 7.848364536583638e-06, "loss": 0.3531, "step": 6027 }, { "epoch": 0.32529275268469054, "grad_norm": 1.1154447628880586, "learning_rate": 7.847713730930155e-06, "loss": 0.5626, "step": 6028 }, { "epoch": 0.3253467163132049, "grad_norm": 1.1178920203049403, "learning_rate": 7.847062857801005e-06, "loss": 0.5527, "step": 6029 }, { "epoch": 0.3254006799417193, "grad_norm": 0.9609055520142648, "learning_rate": 7.846411917214897e-06, "loss": 0.4658, "step": 6030 }, { "epoch": 0.3254546435702337, "grad_norm": 1.0578684828601208, "learning_rate": 7.845760909190542e-06, "loss": 0.5102, "step": 6031 }, { "epoch": 0.325508607198748, "grad_norm": 1.0343317965987715, "learning_rate": 7.845109833746644e-06, "loss": 0.4733, "step": 6032 }, { "epoch": 0.3255625708272624, "grad_norm": 1.0830611706693334, "learning_rate": 7.844458690901923e-06, "loss": 0.5699, "step": 6033 }, { "epoch": 0.3256165344557768, "grad_norm": 0.9023550897494328, "learning_rate": 7.84380748067509e-06, "loss": 0.4296, "step": 6034 }, { "epoch": 0.32567049808429116, "grad_norm": 1.2816201584222446, "learning_rate": 7.843156203084861e-06, "loss": 0.604, "step": 6035 }, { "epoch": 0.32572446171280556, "grad_norm": 1.036893186826935, "learning_rate": 7.842504858149956e-06, "loss": 0.412, "step": 6036 }, { "epoch": 0.32577842534131995, "grad_norm": 0.9973101259191083, "learning_rate": 7.841853445889096e-06, "loss": 0.3588, "step": 6037 }, { "epoch": 0.32583238896983435, "grad_norm": 1.1612846766152949, "learning_rate": 7.841201966320999e-06, "loss": 0.4536, "step": 6038 }, { "epoch": 0.3258863525983487, "grad_norm": 1.010176772537534, "learning_rate": 7.84055041946439e-06, "loss": 0.3987, "step": 6039 }, { "epoch": 0.3259403162268631, "grad_norm": 1.2053397749336856, "learning_rate": 7.839898805338002e-06, "loss": 0.6834, "step": 6040 }, { "epoch": 0.3259942798553775, "grad_norm": 1.1000928622467565, "learning_rate": 7.839247123960554e-06, "loss": 0.6532, "step": 6041 }, { "epoch": 0.32604824348389183, "grad_norm": 0.8847699387158673, "learning_rate": 7.838595375350781e-06, "loss": 0.4449, "step": 6042 }, { "epoch": 0.32610220711240623, "grad_norm": 1.1879070708546324, "learning_rate": 7.837943559527416e-06, "loss": 0.611, "step": 6043 }, { "epoch": 0.3261561707409206, "grad_norm": 0.9433047923938608, "learning_rate": 7.837291676509188e-06, "loss": 0.3984, "step": 6044 }, { "epoch": 0.326210134369435, "grad_norm": 0.9425278924284124, "learning_rate": 7.836639726314837e-06, "loss": 0.4435, "step": 6045 }, { "epoch": 0.32626409799794936, "grad_norm": 0.9889509525049538, "learning_rate": 7.8359877089631e-06, "loss": 0.3967, "step": 6046 }, { "epoch": 0.32631806162646376, "grad_norm": 0.9562498165914874, "learning_rate": 7.835335624472714e-06, "loss": 0.4304, "step": 6047 }, { "epoch": 0.32637202525497816, "grad_norm": 0.8245261955777404, "learning_rate": 7.834683472862424e-06, "loss": 0.3383, "step": 6048 }, { "epoch": 0.3264259888834925, "grad_norm": 0.9901776927061094, "learning_rate": 7.834031254150972e-06, "loss": 0.6345, "step": 6049 }, { "epoch": 0.3264799525120069, "grad_norm": 0.8124534871056996, "learning_rate": 7.833378968357103e-06, "loss": 0.4048, "step": 6050 }, { "epoch": 0.3265339161405213, "grad_norm": 1.1018172149430634, "learning_rate": 7.832726615499566e-06, "loss": 0.5347, "step": 6051 }, { "epoch": 0.3265878797690357, "grad_norm": 0.8540654018758913, "learning_rate": 7.832074195597107e-06, "loss": 0.3897, "step": 6052 }, { "epoch": 0.32664184339755004, "grad_norm": 1.0156765117065754, "learning_rate": 7.831421708668479e-06, "loss": 0.5805, "step": 6053 }, { "epoch": 0.32669580702606443, "grad_norm": 0.9635602139293212, "learning_rate": 7.830769154732436e-06, "loss": 0.5176, "step": 6054 }, { "epoch": 0.32674977065457883, "grad_norm": 1.0171674463810585, "learning_rate": 7.830116533807733e-06, "loss": 0.4853, "step": 6055 }, { "epoch": 0.3268037342830932, "grad_norm": 1.1800036474157283, "learning_rate": 7.829463845913126e-06, "loss": 0.5722, "step": 6056 }, { "epoch": 0.32685769791160757, "grad_norm": 1.2229143633185457, "learning_rate": 7.828811091067374e-06, "loss": 0.5646, "step": 6057 }, { "epoch": 0.32691166154012197, "grad_norm": 0.7801234341026698, "learning_rate": 7.828158269289239e-06, "loss": 0.3733, "step": 6058 }, { "epoch": 0.32696562516863636, "grad_norm": 1.005833340571049, "learning_rate": 7.827505380597481e-06, "loss": 0.4422, "step": 6059 }, { "epoch": 0.3270195887971507, "grad_norm": 0.9441850532543788, "learning_rate": 7.826852425010868e-06, "loss": 0.489, "step": 6060 }, { "epoch": 0.3270735524256651, "grad_norm": 0.8921613351754532, "learning_rate": 7.826199402548167e-06, "loss": 0.367, "step": 6061 }, { "epoch": 0.3271275160541795, "grad_norm": 0.8861007731079703, "learning_rate": 7.825546313228141e-06, "loss": 0.3417, "step": 6062 }, { "epoch": 0.32718147968269384, "grad_norm": 1.1268070936083874, "learning_rate": 7.824893157069568e-06, "loss": 0.6046, "step": 6063 }, { "epoch": 0.32723544331120824, "grad_norm": 1.026040299302559, "learning_rate": 7.824239934091214e-06, "loss": 0.4673, "step": 6064 }, { "epoch": 0.32728940693972264, "grad_norm": 1.0507323097998134, "learning_rate": 7.823586644311857e-06, "loss": 0.5246, "step": 6065 }, { "epoch": 0.32734337056823704, "grad_norm": 0.9128044276434362, "learning_rate": 7.82293328775027e-06, "loss": 0.3925, "step": 6066 }, { "epoch": 0.3273973341967514, "grad_norm": 0.8892358679105018, "learning_rate": 7.822279864425235e-06, "loss": 0.3187, "step": 6067 }, { "epoch": 0.3274512978252658, "grad_norm": 1.0746749303298049, "learning_rate": 7.82162637435553e-06, "loss": 0.5598, "step": 6068 }, { "epoch": 0.32750526145378017, "grad_norm": 1.0253778098937847, "learning_rate": 7.820972817559938e-06, "loss": 0.5352, "step": 6069 }, { "epoch": 0.3275592250822945, "grad_norm": 1.1504190017318505, "learning_rate": 7.820319194057241e-06, "loss": 0.6271, "step": 6070 }, { "epoch": 0.3276131887108089, "grad_norm": 1.1438704935625597, "learning_rate": 7.819665503866226e-06, "loss": 0.4766, "step": 6071 }, { "epoch": 0.3276671523393233, "grad_norm": 0.9461197678211211, "learning_rate": 7.819011747005679e-06, "loss": 0.5585, "step": 6072 }, { "epoch": 0.32772111596783765, "grad_norm": 1.010602382751191, "learning_rate": 7.818357923494393e-06, "loss": 0.4191, "step": 6073 }, { "epoch": 0.32777507959635205, "grad_norm": 0.9575444124062902, "learning_rate": 7.817704033351157e-06, "loss": 0.4572, "step": 6074 }, { "epoch": 0.32782904322486645, "grad_norm": 0.8752066227157896, "learning_rate": 7.817050076594767e-06, "loss": 0.373, "step": 6075 }, { "epoch": 0.32788300685338084, "grad_norm": 1.1749051834514366, "learning_rate": 7.816396053244014e-06, "loss": 0.4921, "step": 6076 }, { "epoch": 0.3279369704818952, "grad_norm": 0.91179918091204, "learning_rate": 7.8157419633177e-06, "loss": 0.3591, "step": 6077 }, { "epoch": 0.3279909341104096, "grad_norm": 1.0242596440257616, "learning_rate": 7.815087806834622e-06, "loss": 0.4468, "step": 6078 }, { "epoch": 0.328044897738924, "grad_norm": 1.1730953189174727, "learning_rate": 7.814433583813579e-06, "loss": 0.5415, "step": 6079 }, { "epoch": 0.3280988613674383, "grad_norm": 1.1579758203193515, "learning_rate": 7.813779294273378e-06, "loss": 0.562, "step": 6080 }, { "epoch": 0.3281528249959527, "grad_norm": 0.8083202886741409, "learning_rate": 7.813124938232824e-06, "loss": 0.3884, "step": 6081 }, { "epoch": 0.3282067886244671, "grad_norm": 1.0111330168355748, "learning_rate": 7.812470515710719e-06, "loss": 0.4727, "step": 6082 }, { "epoch": 0.3282607522529815, "grad_norm": 0.9842406225630427, "learning_rate": 7.811816026725874e-06, "loss": 0.422, "step": 6083 }, { "epoch": 0.32831471588149586, "grad_norm": 1.2312340463962488, "learning_rate": 7.811161471297105e-06, "loss": 0.5857, "step": 6084 }, { "epoch": 0.32836867951001025, "grad_norm": 1.0622988280755192, "learning_rate": 7.810506849443216e-06, "loss": 0.4229, "step": 6085 }, { "epoch": 0.32842264313852465, "grad_norm": 1.0680878961896851, "learning_rate": 7.809852161183026e-06, "loss": 0.4025, "step": 6086 }, { "epoch": 0.328476606767039, "grad_norm": 1.1132893641940282, "learning_rate": 7.809197406535352e-06, "loss": 0.5829, "step": 6087 }, { "epoch": 0.3285305703955534, "grad_norm": 1.1023019765738784, "learning_rate": 7.808542585519009e-06, "loss": 0.6425, "step": 6088 }, { "epoch": 0.3285845340240678, "grad_norm": 0.940321954427669, "learning_rate": 7.807887698152822e-06, "loss": 0.4212, "step": 6089 }, { "epoch": 0.3286384976525822, "grad_norm": 1.031814900473182, "learning_rate": 7.80723274445561e-06, "loss": 0.4511, "step": 6090 }, { "epoch": 0.3286924612810965, "grad_norm": 0.908503840395287, "learning_rate": 7.806577724446198e-06, "loss": 0.4227, "step": 6091 }, { "epoch": 0.3287464249096109, "grad_norm": 0.763864642759267, "learning_rate": 7.80592263814341e-06, "loss": 0.3654, "step": 6092 }, { "epoch": 0.3288003885381253, "grad_norm": 0.9987776396446228, "learning_rate": 7.805267485566078e-06, "loss": 0.3385, "step": 6093 }, { "epoch": 0.32885435216663966, "grad_norm": 0.8743003862823229, "learning_rate": 7.804612266733027e-06, "loss": 0.3894, "step": 6094 }, { "epoch": 0.32890831579515406, "grad_norm": 1.0113020870084648, "learning_rate": 7.803956981663093e-06, "loss": 0.5101, "step": 6095 }, { "epoch": 0.32896227942366846, "grad_norm": 0.9141782010831855, "learning_rate": 7.803301630375106e-06, "loss": 0.3546, "step": 6096 }, { "epoch": 0.32901624305218286, "grad_norm": 0.9479588818707817, "learning_rate": 7.802646212887902e-06, "loss": 0.4038, "step": 6097 }, { "epoch": 0.3290702066806972, "grad_norm": 0.8217898268444614, "learning_rate": 7.80199072922032e-06, "loss": 0.3269, "step": 6098 }, { "epoch": 0.3291241703092116, "grad_norm": 0.8950533932566802, "learning_rate": 7.801335179391199e-06, "loss": 0.4485, "step": 6099 }, { "epoch": 0.329178133937726, "grad_norm": 1.1121206135623902, "learning_rate": 7.800679563419378e-06, "loss": 0.5339, "step": 6100 }, { "epoch": 0.32923209756624033, "grad_norm": 0.8633564223702286, "learning_rate": 7.800023881323705e-06, "loss": 0.5134, "step": 6101 }, { "epoch": 0.32928606119475473, "grad_norm": 0.810312885318976, "learning_rate": 7.79936813312302e-06, "loss": 0.5086, "step": 6102 }, { "epoch": 0.32934002482326913, "grad_norm": 1.0229570710582174, "learning_rate": 7.79871231883617e-06, "loss": 0.3955, "step": 6103 }, { "epoch": 0.32939398845178347, "grad_norm": 1.0115095598305568, "learning_rate": 7.798056438482006e-06, "loss": 0.513, "step": 6104 }, { "epoch": 0.32944795208029787, "grad_norm": 0.7328385130147109, "learning_rate": 7.797400492079379e-06, "loss": 0.3241, "step": 6105 }, { "epoch": 0.32950191570881227, "grad_norm": 1.1570114396302806, "learning_rate": 7.796744479647138e-06, "loss": 0.5975, "step": 6106 }, { "epoch": 0.32955587933732666, "grad_norm": 1.1896064684835053, "learning_rate": 7.796088401204142e-06, "loss": 0.6235, "step": 6107 }, { "epoch": 0.329609842965841, "grad_norm": 1.0584511717228382, "learning_rate": 7.795432256769247e-06, "loss": 0.5006, "step": 6108 }, { "epoch": 0.3296638065943554, "grad_norm": 1.139777008966188, "learning_rate": 7.794776046361306e-06, "loss": 0.5461, "step": 6109 }, { "epoch": 0.3297177702228698, "grad_norm": 0.9015283740802661, "learning_rate": 7.794119769999183e-06, "loss": 0.4175, "step": 6110 }, { "epoch": 0.32977173385138414, "grad_norm": 1.0879177556968282, "learning_rate": 7.793463427701741e-06, "loss": 0.5745, "step": 6111 }, { "epoch": 0.32982569747989854, "grad_norm": 1.0406410240543096, "learning_rate": 7.792807019487845e-06, "loss": 0.5017, "step": 6112 }, { "epoch": 0.32987966110841294, "grad_norm": 1.2146470574222767, "learning_rate": 7.792150545376354e-06, "loss": 0.5486, "step": 6113 }, { "epoch": 0.32993362473692733, "grad_norm": 0.9175806778670064, "learning_rate": 7.791494005386144e-06, "loss": 0.4245, "step": 6114 }, { "epoch": 0.3299875883654417, "grad_norm": 1.0663173678806595, "learning_rate": 7.79083739953608e-06, "loss": 0.4961, "step": 6115 }, { "epoch": 0.3300415519939561, "grad_norm": 1.1251536064299132, "learning_rate": 7.790180727845033e-06, "loss": 0.5283, "step": 6116 }, { "epoch": 0.33009551562247047, "grad_norm": 0.7970967079000394, "learning_rate": 7.789523990331877e-06, "loss": 0.3398, "step": 6117 }, { "epoch": 0.3301494792509848, "grad_norm": 1.469769274472929, "learning_rate": 7.78886718701549e-06, "loss": 0.6187, "step": 6118 }, { "epoch": 0.3302034428794992, "grad_norm": 0.721677327935083, "learning_rate": 7.788210317914747e-06, "loss": 0.3028, "step": 6119 }, { "epoch": 0.3302574065080136, "grad_norm": 0.9598269193925923, "learning_rate": 7.787553383048529e-06, "loss": 0.5038, "step": 6120 }, { "epoch": 0.330311370136528, "grad_norm": 0.8890415062612552, "learning_rate": 7.786896382435712e-06, "loss": 0.51, "step": 6121 }, { "epoch": 0.33036533376504235, "grad_norm": 0.8767350977759248, "learning_rate": 7.786239316095186e-06, "loss": 0.3517, "step": 6122 }, { "epoch": 0.33041929739355674, "grad_norm": 0.9129863341236272, "learning_rate": 7.78558218404583e-06, "loss": 0.49, "step": 6123 }, { "epoch": 0.33047326102207114, "grad_norm": 0.8351938611984346, "learning_rate": 7.78492498630653e-06, "loss": 0.3829, "step": 6124 }, { "epoch": 0.3305272246505855, "grad_norm": 0.9214872936628962, "learning_rate": 7.784267722896182e-06, "loss": 0.4818, "step": 6125 }, { "epoch": 0.3305811882790999, "grad_norm": 0.9772387074633102, "learning_rate": 7.78361039383367e-06, "loss": 0.665, "step": 6126 }, { "epoch": 0.3306351519076143, "grad_norm": 0.8771553690406717, "learning_rate": 7.782952999137888e-06, "loss": 0.3572, "step": 6127 }, { "epoch": 0.3306891155361287, "grad_norm": 1.0081758631176496, "learning_rate": 7.782295538827727e-06, "loss": 0.5799, "step": 6128 }, { "epoch": 0.330743079164643, "grad_norm": 1.1598085416065091, "learning_rate": 7.78163801292209e-06, "loss": 0.5301, "step": 6129 }, { "epoch": 0.3307970427931574, "grad_norm": 1.0784440956454608, "learning_rate": 7.780980421439868e-06, "loss": 0.6295, "step": 6130 }, { "epoch": 0.3308510064216718, "grad_norm": 1.0730411088684984, "learning_rate": 7.780322764399966e-06, "loss": 0.5644, "step": 6131 }, { "epoch": 0.33090497005018615, "grad_norm": 0.9585971642264112, "learning_rate": 7.779665041821283e-06, "loss": 0.4031, "step": 6132 }, { "epoch": 0.33095893367870055, "grad_norm": 0.9090169181178056, "learning_rate": 7.779007253722724e-06, "loss": 0.3768, "step": 6133 }, { "epoch": 0.33101289730721495, "grad_norm": 0.9027988457670335, "learning_rate": 7.778349400123193e-06, "loss": 0.4037, "step": 6134 }, { "epoch": 0.33106686093572935, "grad_norm": 1.2268859048647611, "learning_rate": 7.777691481041597e-06, "loss": 0.6937, "step": 6135 }, { "epoch": 0.3311208245642437, "grad_norm": 1.1372995277245583, "learning_rate": 7.77703349649685e-06, "loss": 0.6176, "step": 6136 }, { "epoch": 0.3311747881927581, "grad_norm": 1.0567387595288629, "learning_rate": 7.776375446507857e-06, "loss": 0.5809, "step": 6137 }, { "epoch": 0.3312287518212725, "grad_norm": 1.081897335836043, "learning_rate": 7.775717331093533e-06, "loss": 0.5538, "step": 6138 }, { "epoch": 0.3312827154497868, "grad_norm": 1.1047635189905596, "learning_rate": 7.775059150272795e-06, "loss": 0.6363, "step": 6139 }, { "epoch": 0.3313366790783012, "grad_norm": 1.2515148565500125, "learning_rate": 7.774400904064556e-06, "loss": 0.4816, "step": 6140 }, { "epoch": 0.3313906427068156, "grad_norm": 0.9158682287962409, "learning_rate": 7.773742592487739e-06, "loss": 0.3639, "step": 6141 }, { "epoch": 0.33144460633532996, "grad_norm": 0.8884769213525272, "learning_rate": 7.773084215561262e-06, "loss": 0.3785, "step": 6142 }, { "epoch": 0.33149856996384436, "grad_norm": 1.1432520493471308, "learning_rate": 7.772425773304047e-06, "loss": 0.5964, "step": 6143 }, { "epoch": 0.33155253359235876, "grad_norm": 1.1356593622416467, "learning_rate": 7.771767265735019e-06, "loss": 0.467, "step": 6144 }, { "epoch": 0.33160649722087315, "grad_norm": 0.9889063966056117, "learning_rate": 7.771108692873106e-06, "loss": 0.4611, "step": 6145 }, { "epoch": 0.3316604608493875, "grad_norm": 1.175240397130465, "learning_rate": 7.770450054737234e-06, "loss": 0.6055, "step": 6146 }, { "epoch": 0.3317144244779019, "grad_norm": 0.9047121406246202, "learning_rate": 7.769791351346332e-06, "loss": 0.3788, "step": 6147 }, { "epoch": 0.3317683881064163, "grad_norm": 0.8664363420739913, "learning_rate": 7.769132582719334e-06, "loss": 0.3846, "step": 6148 }, { "epoch": 0.33182235173493063, "grad_norm": 1.2441924006650231, "learning_rate": 7.768473748875174e-06, "loss": 0.6713, "step": 6149 }, { "epoch": 0.33187631536344503, "grad_norm": 1.2550275352176894, "learning_rate": 7.767814849832785e-06, "loss": 0.6879, "step": 6150 }, { "epoch": 0.3319302789919594, "grad_norm": 0.7888016212071025, "learning_rate": 7.767155885611105e-06, "loss": 0.3727, "step": 6151 }, { "epoch": 0.3319842426204738, "grad_norm": 0.8147802624082152, "learning_rate": 7.766496856229076e-06, "loss": 0.3738, "step": 6152 }, { "epoch": 0.33203820624898817, "grad_norm": 1.0351181504952358, "learning_rate": 7.765837761705636e-06, "loss": 0.5501, "step": 6153 }, { "epoch": 0.33209216987750256, "grad_norm": 1.2782201053461395, "learning_rate": 7.76517860205973e-06, "loss": 0.6329, "step": 6154 }, { "epoch": 0.33214613350601696, "grad_norm": 0.8739673044433726, "learning_rate": 7.7645193773103e-06, "loss": 0.3975, "step": 6155 }, { "epoch": 0.3322000971345313, "grad_norm": 1.0526898003005154, "learning_rate": 7.763860087476299e-06, "loss": 0.4815, "step": 6156 }, { "epoch": 0.3322540607630457, "grad_norm": 1.0634299284524442, "learning_rate": 7.763200732576667e-06, "loss": 0.5085, "step": 6157 }, { "epoch": 0.3323080243915601, "grad_norm": 1.0657653731444394, "learning_rate": 7.762541312630362e-06, "loss": 0.5456, "step": 6158 }, { "epoch": 0.3323619880200745, "grad_norm": 1.0837744262351066, "learning_rate": 7.76188182765633e-06, "loss": 0.5756, "step": 6159 }, { "epoch": 0.33241595164858884, "grad_norm": 0.9058608486351204, "learning_rate": 7.761222277673534e-06, "loss": 0.5253, "step": 6160 }, { "epoch": 0.33246991527710323, "grad_norm": 0.9383867067102744, "learning_rate": 7.760562662700922e-06, "loss": 0.393, "step": 6161 }, { "epoch": 0.33252387890561763, "grad_norm": 1.0754609082770223, "learning_rate": 7.759902982757455e-06, "loss": 0.5065, "step": 6162 }, { "epoch": 0.332577842534132, "grad_norm": 1.1052932132644804, "learning_rate": 7.759243237862094e-06, "loss": 0.5199, "step": 6163 }, { "epoch": 0.33263180616264637, "grad_norm": 0.9241645044762838, "learning_rate": 7.758583428033798e-06, "loss": 0.3702, "step": 6164 }, { "epoch": 0.33268576979116077, "grad_norm": 1.1045685198886126, "learning_rate": 7.757923553291534e-06, "loss": 0.7153, "step": 6165 }, { "epoch": 0.33273973341967517, "grad_norm": 1.0452794996703634, "learning_rate": 7.757263613654264e-06, "loss": 0.5346, "step": 6166 }, { "epoch": 0.3327936970481895, "grad_norm": 0.7905622623354926, "learning_rate": 7.756603609140958e-06, "loss": 0.4622, "step": 6167 }, { "epoch": 0.3328476606767039, "grad_norm": 1.1683529243915605, "learning_rate": 7.755943539770583e-06, "loss": 0.5473, "step": 6168 }, { "epoch": 0.3329016243052183, "grad_norm": 0.9113725120770961, "learning_rate": 7.755283405562111e-06, "loss": 0.5397, "step": 6169 }, { "epoch": 0.33295558793373264, "grad_norm": 0.9219700550871036, "learning_rate": 7.754623206534517e-06, "loss": 0.4165, "step": 6170 }, { "epoch": 0.33300955156224704, "grad_norm": 0.9884210014034804, "learning_rate": 7.753962942706774e-06, "loss": 0.466, "step": 6171 }, { "epoch": 0.33306351519076144, "grad_norm": 1.168377542711775, "learning_rate": 7.753302614097856e-06, "loss": 0.5802, "step": 6172 }, { "epoch": 0.3331174788192758, "grad_norm": 0.9469792470858159, "learning_rate": 7.752642220726746e-06, "loss": 0.4147, "step": 6173 }, { "epoch": 0.3331714424477902, "grad_norm": 1.0928192095907618, "learning_rate": 7.751981762612422e-06, "loss": 0.4672, "step": 6174 }, { "epoch": 0.3332254060763046, "grad_norm": 1.053552205707948, "learning_rate": 7.751321239773867e-06, "loss": 0.552, "step": 6175 }, { "epoch": 0.333279369704819, "grad_norm": 1.151135806094273, "learning_rate": 7.750660652230066e-06, "loss": 0.5742, "step": 6176 }, { "epoch": 0.3333333333333333, "grad_norm": 0.9427588102285339, "learning_rate": 7.75e-06, "loss": 0.4847, "step": 6177 }, { "epoch": 0.3333872969618477, "grad_norm": 0.9781882049791273, "learning_rate": 7.749339283102664e-06, "loss": 0.3721, "step": 6178 }, { "epoch": 0.3334412605903621, "grad_norm": 1.1147208162108093, "learning_rate": 7.748678501557042e-06, "loss": 0.5947, "step": 6179 }, { "epoch": 0.33349522421887645, "grad_norm": 0.9711244454710616, "learning_rate": 7.74801765538213e-06, "loss": 0.4781, "step": 6180 }, { "epoch": 0.33354918784739085, "grad_norm": 1.1561837661608232, "learning_rate": 7.747356744596917e-06, "loss": 0.5519, "step": 6181 }, { "epoch": 0.33360315147590525, "grad_norm": 0.9588564490847111, "learning_rate": 7.7466957692204e-06, "loss": 0.3841, "step": 6182 }, { "epoch": 0.33365711510441964, "grad_norm": 1.0385495945361292, "learning_rate": 7.74603472927158e-06, "loss": 0.444, "step": 6183 }, { "epoch": 0.333711078732934, "grad_norm": 1.1460838471311547, "learning_rate": 7.74537362476945e-06, "loss": 0.5644, "step": 6184 }, { "epoch": 0.3337650423614484, "grad_norm": 1.162062961537598, "learning_rate": 7.744712455733013e-06, "loss": 0.5541, "step": 6185 }, { "epoch": 0.3338190059899628, "grad_norm": 1.1094366700974185, "learning_rate": 7.74405122218127e-06, "loss": 0.548, "step": 6186 }, { "epoch": 0.3338729696184771, "grad_norm": 0.8991752029372594, "learning_rate": 7.743389924133233e-06, "loss": 0.4114, "step": 6187 }, { "epoch": 0.3339269332469915, "grad_norm": 0.9009820680520623, "learning_rate": 7.742728561607899e-06, "loss": 0.4457, "step": 6188 }, { "epoch": 0.3339808968755059, "grad_norm": 1.0331677867944369, "learning_rate": 7.74206713462428e-06, "loss": 0.4025, "step": 6189 }, { "epoch": 0.3340348605040203, "grad_norm": 1.047441014607124, "learning_rate": 7.741405643201386e-06, "loss": 0.5374, "step": 6190 }, { "epoch": 0.33408882413253466, "grad_norm": 0.9205806518078077, "learning_rate": 7.74074408735823e-06, "loss": 0.4519, "step": 6191 }, { "epoch": 0.33414278776104905, "grad_norm": 1.1531118003245446, "learning_rate": 7.740082467113825e-06, "loss": 0.5055, "step": 6192 }, { "epoch": 0.33419675138956345, "grad_norm": 1.2584658364447374, "learning_rate": 7.739420782487186e-06, "loss": 0.6092, "step": 6193 }, { "epoch": 0.3342507150180778, "grad_norm": 1.0165848053520155, "learning_rate": 7.73875903349733e-06, "loss": 0.5802, "step": 6194 }, { "epoch": 0.3343046786465922, "grad_norm": 0.8266691740490584, "learning_rate": 7.738097220163277e-06, "loss": 0.4005, "step": 6195 }, { "epoch": 0.3343586422751066, "grad_norm": 0.9807061306459408, "learning_rate": 7.737435342504051e-06, "loss": 0.5693, "step": 6196 }, { "epoch": 0.334412605903621, "grad_norm": 1.1439539392576559, "learning_rate": 7.73677340053867e-06, "loss": 0.5825, "step": 6197 }, { "epoch": 0.3344665695321353, "grad_norm": 1.3212221441915841, "learning_rate": 7.736111394286163e-06, "loss": 0.7168, "step": 6198 }, { "epoch": 0.3345205331606497, "grad_norm": 1.1349413721010104, "learning_rate": 7.735449323765555e-06, "loss": 0.4282, "step": 6199 }, { "epoch": 0.3345744967891641, "grad_norm": 0.8921006412308018, "learning_rate": 7.734787188995873e-06, "loss": 0.3258, "step": 6200 }, { "epoch": 0.33462846041767846, "grad_norm": 1.0736174761664117, "learning_rate": 7.73412498999615e-06, "loss": 0.5256, "step": 6201 }, { "epoch": 0.33468242404619286, "grad_norm": 1.0787289276179206, "learning_rate": 7.73346272678542e-06, "loss": 0.6061, "step": 6202 }, { "epoch": 0.33473638767470726, "grad_norm": 1.0814675129678235, "learning_rate": 7.73280039938271e-06, "loss": 0.5207, "step": 6203 }, { "epoch": 0.33479035130322166, "grad_norm": 1.131049104851575, "learning_rate": 7.732138007807064e-06, "loss": 0.5389, "step": 6204 }, { "epoch": 0.334844314931736, "grad_norm": 1.2451629938135993, "learning_rate": 7.731475552077515e-06, "loss": 0.5252, "step": 6205 }, { "epoch": 0.3348982785602504, "grad_norm": 0.7658844056184926, "learning_rate": 7.730813032213103e-06, "loss": 0.3439, "step": 6206 }, { "epoch": 0.3349522421887648, "grad_norm": 0.9075539313989543, "learning_rate": 7.730150448232872e-06, "loss": 0.524, "step": 6207 }, { "epoch": 0.33500620581727913, "grad_norm": 1.0886275514523147, "learning_rate": 7.729487800155864e-06, "loss": 0.4539, "step": 6208 }, { "epoch": 0.33506016944579353, "grad_norm": 1.1341538757109026, "learning_rate": 7.728825088001122e-06, "loss": 0.536, "step": 6209 }, { "epoch": 0.33511413307430793, "grad_norm": 0.9472875223922648, "learning_rate": 7.728162311787696e-06, "loss": 0.4728, "step": 6210 }, { "epoch": 0.33516809670282227, "grad_norm": 1.1595884962346827, "learning_rate": 7.727499471534635e-06, "loss": 0.3942, "step": 6211 }, { "epoch": 0.33522206033133667, "grad_norm": 1.0454994808364215, "learning_rate": 7.726836567260986e-06, "loss": 0.4795, "step": 6212 }, { "epoch": 0.33527602395985107, "grad_norm": 0.9659529030000801, "learning_rate": 7.726173598985803e-06, "loss": 0.4252, "step": 6213 }, { "epoch": 0.33532998758836546, "grad_norm": 0.9990540779486091, "learning_rate": 7.725510566728145e-06, "loss": 0.4897, "step": 6214 }, { "epoch": 0.3353839512168798, "grad_norm": 1.1385907335869565, "learning_rate": 7.724847470507062e-06, "loss": 0.5397, "step": 6215 }, { "epoch": 0.3354379148453942, "grad_norm": 0.9090975220667553, "learning_rate": 7.724184310341614e-06, "loss": 0.4305, "step": 6216 }, { "epoch": 0.3354918784739086, "grad_norm": 0.9399110094263099, "learning_rate": 7.723521086250865e-06, "loss": 0.4619, "step": 6217 }, { "epoch": 0.33554584210242294, "grad_norm": 1.2387712828669781, "learning_rate": 7.72285779825387e-06, "loss": 0.5123, "step": 6218 }, { "epoch": 0.33559980573093734, "grad_norm": 1.0545356541682946, "learning_rate": 7.722194446369698e-06, "loss": 0.5675, "step": 6219 }, { "epoch": 0.33565376935945174, "grad_norm": 1.0968888939082861, "learning_rate": 7.721531030617412e-06, "loss": 0.5025, "step": 6220 }, { "epoch": 0.33570773298796613, "grad_norm": 1.0857646422151292, "learning_rate": 7.720867551016078e-06, "loss": 0.4928, "step": 6221 }, { "epoch": 0.3357616966164805, "grad_norm": 0.9409480017119642, "learning_rate": 7.720204007584769e-06, "loss": 0.4365, "step": 6222 }, { "epoch": 0.3358156602449949, "grad_norm": 1.1023841549887154, "learning_rate": 7.71954040034255e-06, "loss": 0.4462, "step": 6223 }, { "epoch": 0.33586962387350927, "grad_norm": 0.8566536236688128, "learning_rate": 7.7188767293085e-06, "loss": 0.3889, "step": 6224 }, { "epoch": 0.3359235875020236, "grad_norm": 0.9221969152580449, "learning_rate": 7.71821299450169e-06, "loss": 0.4176, "step": 6225 }, { "epoch": 0.335977551130538, "grad_norm": 1.131641950374379, "learning_rate": 7.717549195941197e-06, "loss": 0.5448, "step": 6226 }, { "epoch": 0.3360315147590524, "grad_norm": 1.0508744107354993, "learning_rate": 7.7168853336461e-06, "loss": 0.5044, "step": 6227 }, { "epoch": 0.3360854783875668, "grad_norm": 1.1458849771886326, "learning_rate": 7.716221407635476e-06, "loss": 0.794, "step": 6228 }, { "epoch": 0.33613944201608115, "grad_norm": 1.0531669851335395, "learning_rate": 7.715557417928414e-06, "loss": 0.483, "step": 6229 }, { "epoch": 0.33619340564459554, "grad_norm": 0.9396037936660145, "learning_rate": 7.714893364543989e-06, "loss": 0.4818, "step": 6230 }, { "epoch": 0.33624736927310994, "grad_norm": 1.0367994153004367, "learning_rate": 7.714229247501293e-06, "loss": 0.4946, "step": 6231 }, { "epoch": 0.3363013329016243, "grad_norm": 0.8949548810558738, "learning_rate": 7.713565066819411e-06, "loss": 0.3695, "step": 6232 }, { "epoch": 0.3363552965301387, "grad_norm": 1.1761031374673392, "learning_rate": 7.712900822517432e-06, "loss": 0.5905, "step": 6233 }, { "epoch": 0.3364092601586531, "grad_norm": 0.9355424164463529, "learning_rate": 7.712236514614448e-06, "loss": 0.4164, "step": 6234 }, { "epoch": 0.3364632237871675, "grad_norm": 1.1632342365180592, "learning_rate": 7.71157214312955e-06, "loss": 0.4868, "step": 6235 }, { "epoch": 0.3365171874156818, "grad_norm": 0.7976187222577481, "learning_rate": 7.710907708081836e-06, "loss": 0.2694, "step": 6236 }, { "epoch": 0.3365711510441962, "grad_norm": 0.8439531299283709, "learning_rate": 7.710243209490398e-06, "loss": 0.3382, "step": 6237 }, { "epoch": 0.3366251146727106, "grad_norm": 1.0469436978608435, "learning_rate": 7.709578647374337e-06, "loss": 0.4593, "step": 6238 }, { "epoch": 0.33667907830122495, "grad_norm": 0.920572205891937, "learning_rate": 7.708914021752753e-06, "loss": 0.3301, "step": 6239 }, { "epoch": 0.33673304192973935, "grad_norm": 1.546360424269349, "learning_rate": 7.70824933264475e-06, "loss": 0.6702, "step": 6240 }, { "epoch": 0.33678700555825375, "grad_norm": 0.9578930665605552, "learning_rate": 7.70758458006943e-06, "loss": 0.4949, "step": 6241 }, { "epoch": 0.3368409691867681, "grad_norm": 0.8931781657743159, "learning_rate": 7.706919764045896e-06, "loss": 0.3517, "step": 6242 }, { "epoch": 0.3368949328152825, "grad_norm": 1.0050605057702025, "learning_rate": 7.70625488459326e-06, "loss": 0.4211, "step": 6243 }, { "epoch": 0.3369488964437969, "grad_norm": 0.8907736225700217, "learning_rate": 7.705589941730627e-06, "loss": 0.4586, "step": 6244 }, { "epoch": 0.3370028600723113, "grad_norm": 1.1853935600242616, "learning_rate": 7.704924935477114e-06, "loss": 0.4748, "step": 6245 }, { "epoch": 0.3370568237008256, "grad_norm": 1.1082499456181796, "learning_rate": 7.704259865851828e-06, "loss": 0.5586, "step": 6246 }, { "epoch": 0.33711078732934, "grad_norm": 0.8452663073810047, "learning_rate": 7.703594732873887e-06, "loss": 0.3875, "step": 6247 }, { "epoch": 0.3371647509578544, "grad_norm": 1.2142050814123375, "learning_rate": 7.702929536562409e-06, "loss": 0.5466, "step": 6248 }, { "epoch": 0.33721871458636876, "grad_norm": 1.078643778358689, "learning_rate": 7.702264276936508e-06, "loss": 0.4407, "step": 6249 }, { "epoch": 0.33727267821488316, "grad_norm": 1.1167831486414013, "learning_rate": 7.701598954015306e-06, "loss": 0.4881, "step": 6250 }, { "epoch": 0.33732664184339756, "grad_norm": 1.2160464689199002, "learning_rate": 7.700933567817926e-06, "loss": 0.7051, "step": 6251 }, { "epoch": 0.33738060547191195, "grad_norm": 1.1510221384976584, "learning_rate": 7.70026811836349e-06, "loss": 0.5096, "step": 6252 }, { "epoch": 0.3374345691004263, "grad_norm": 1.2141072527163788, "learning_rate": 7.699602605671128e-06, "loss": 0.5092, "step": 6253 }, { "epoch": 0.3374885327289407, "grad_norm": 0.9539107797251729, "learning_rate": 7.698937029759964e-06, "loss": 0.6115, "step": 6254 }, { "epoch": 0.3375424963574551, "grad_norm": 1.072742963942722, "learning_rate": 7.698271390649126e-06, "loss": 0.6071, "step": 6255 }, { "epoch": 0.33759645998596943, "grad_norm": 1.289193193092255, "learning_rate": 7.697605688357748e-06, "loss": 0.647, "step": 6256 }, { "epoch": 0.33765042361448383, "grad_norm": 1.0025469314040807, "learning_rate": 7.696939922904962e-06, "loss": 0.4288, "step": 6257 }, { "epoch": 0.3377043872429982, "grad_norm": 1.1559997467984333, "learning_rate": 7.696274094309902e-06, "loss": 0.5281, "step": 6258 }, { "epoch": 0.3377583508715126, "grad_norm": 1.1146118335200494, "learning_rate": 7.695608202591705e-06, "loss": 0.5448, "step": 6259 }, { "epoch": 0.33781231450002697, "grad_norm": 1.2992741138294905, "learning_rate": 7.694942247769514e-06, "loss": 0.6642, "step": 6260 }, { "epoch": 0.33786627812854136, "grad_norm": 1.4254929804782697, "learning_rate": 7.694276229862461e-06, "loss": 0.6635, "step": 6261 }, { "epoch": 0.33792024175705576, "grad_norm": 0.9549017920870384, "learning_rate": 7.693610148889692e-06, "loss": 0.388, "step": 6262 }, { "epoch": 0.3379742053855701, "grad_norm": 1.138202996844142, "learning_rate": 7.692944004870352e-06, "loss": 0.5313, "step": 6263 }, { "epoch": 0.3380281690140845, "grad_norm": 0.9567849978964882, "learning_rate": 7.692277797823585e-06, "loss": 0.3199, "step": 6264 }, { "epoch": 0.3380821326425989, "grad_norm": 1.024072452779663, "learning_rate": 7.69161152776854e-06, "loss": 0.4674, "step": 6265 }, { "epoch": 0.3381360962711133, "grad_norm": 0.7157583320499319, "learning_rate": 7.690945194724365e-06, "loss": 0.2732, "step": 6266 }, { "epoch": 0.33819005989962764, "grad_norm": 0.8954539961358388, "learning_rate": 7.690278798710209e-06, "loss": 0.3866, "step": 6267 }, { "epoch": 0.33824402352814203, "grad_norm": 0.8744872246341053, "learning_rate": 7.689612339745228e-06, "loss": 0.4, "step": 6268 }, { "epoch": 0.33829798715665643, "grad_norm": 0.7116845345196735, "learning_rate": 7.688945817848578e-06, "loss": 0.2993, "step": 6269 }, { "epoch": 0.3383519507851708, "grad_norm": 0.7845956396475425, "learning_rate": 7.688279233039413e-06, "loss": 0.2657, "step": 6270 }, { "epoch": 0.33840591441368517, "grad_norm": 1.1770705891807658, "learning_rate": 7.68761258533689e-06, "loss": 0.6383, "step": 6271 }, { "epoch": 0.33845987804219957, "grad_norm": 0.9761786362237939, "learning_rate": 7.686945874760172e-06, "loss": 0.418, "step": 6272 }, { "epoch": 0.33851384167071397, "grad_norm": 0.7805130628555754, "learning_rate": 7.686279101328419e-06, "loss": 0.4591, "step": 6273 }, { "epoch": 0.3385678052992283, "grad_norm": 0.9493394770658493, "learning_rate": 7.685612265060798e-06, "loss": 0.3988, "step": 6274 }, { "epoch": 0.3386217689277427, "grad_norm": 1.125565518828023, "learning_rate": 7.68494536597647e-06, "loss": 0.4915, "step": 6275 }, { "epoch": 0.3386757325562571, "grad_norm": 0.9511463352002476, "learning_rate": 7.684278404094605e-06, "loss": 0.4452, "step": 6276 }, { "epoch": 0.33872969618477144, "grad_norm": 1.1563436443726094, "learning_rate": 7.68361137943437e-06, "loss": 0.5193, "step": 6277 }, { "epoch": 0.33878365981328584, "grad_norm": 1.0792499628518062, "learning_rate": 7.682944292014938e-06, "loss": 0.5096, "step": 6278 }, { "epoch": 0.33883762344180024, "grad_norm": 1.0937024864006741, "learning_rate": 7.682277141855484e-06, "loss": 0.4247, "step": 6279 }, { "epoch": 0.3388915870703146, "grad_norm": 0.8723439174311844, "learning_rate": 7.681609928975178e-06, "loss": 0.441, "step": 6280 }, { "epoch": 0.338945550698829, "grad_norm": 0.9670619914367017, "learning_rate": 7.680942653393199e-06, "loss": 0.4241, "step": 6281 }, { "epoch": 0.3389995143273434, "grad_norm": 0.9456894080266762, "learning_rate": 7.680275315128723e-06, "loss": 0.5151, "step": 6282 }, { "epoch": 0.3390534779558578, "grad_norm": 0.914106705642595, "learning_rate": 7.679607914200933e-06, "loss": 0.378, "step": 6283 }, { "epoch": 0.3391074415843721, "grad_norm": 1.0857173479311941, "learning_rate": 7.678940450629009e-06, "loss": 0.5901, "step": 6284 }, { "epoch": 0.3391614052128865, "grad_norm": 1.043204465634035, "learning_rate": 7.678272924432134e-06, "loss": 0.5068, "step": 6285 }, { "epoch": 0.3392153688414009, "grad_norm": 0.9517043860576055, "learning_rate": 7.677605335629495e-06, "loss": 0.4279, "step": 6286 }, { "epoch": 0.33926933246991525, "grad_norm": 0.9713022561636512, "learning_rate": 7.676937684240278e-06, "loss": 0.5305, "step": 6287 }, { "epoch": 0.33932329609842965, "grad_norm": 0.8537366096349396, "learning_rate": 7.676269970283671e-06, "loss": 0.4337, "step": 6288 }, { "epoch": 0.33937725972694405, "grad_norm": 0.9047815139704588, "learning_rate": 7.675602193778868e-06, "loss": 0.3887, "step": 6289 }, { "epoch": 0.33943122335545844, "grad_norm": 0.973979252588054, "learning_rate": 7.67493435474506e-06, "loss": 0.5091, "step": 6290 }, { "epoch": 0.3394851869839728, "grad_norm": 1.0036115990364909, "learning_rate": 7.674266453201439e-06, "loss": 0.595, "step": 6291 }, { "epoch": 0.3395391506124872, "grad_norm": 0.910612619538208, "learning_rate": 7.673598489167204e-06, "loss": 0.4023, "step": 6292 }, { "epoch": 0.3395931142410016, "grad_norm": 0.9692480446749643, "learning_rate": 7.672930462661552e-06, "loss": 0.4907, "step": 6293 }, { "epoch": 0.3396470778695159, "grad_norm": 1.0176595291929587, "learning_rate": 7.672262373703684e-06, "loss": 0.5278, "step": 6294 }, { "epoch": 0.3397010414980303, "grad_norm": 0.8389376156128697, "learning_rate": 7.6715942223128e-06, "loss": 0.3492, "step": 6295 }, { "epoch": 0.3397550051265447, "grad_norm": 1.0074580725478366, "learning_rate": 7.670926008508103e-06, "loss": 0.6242, "step": 6296 }, { "epoch": 0.3398089687550591, "grad_norm": 1.2622812273116983, "learning_rate": 7.670257732308798e-06, "loss": 0.591, "step": 6297 }, { "epoch": 0.33986293238357346, "grad_norm": 0.8862328220889185, "learning_rate": 7.669589393734096e-06, "loss": 0.5103, "step": 6298 }, { "epoch": 0.33991689601208785, "grad_norm": 1.0370404965270081, "learning_rate": 7.668920992803201e-06, "loss": 0.4119, "step": 6299 }, { "epoch": 0.33997085964060225, "grad_norm": 0.9787020411238342, "learning_rate": 7.668252529535325e-06, "loss": 0.5462, "step": 6300 }, { "epoch": 0.3400248232691166, "grad_norm": 1.0101759048242935, "learning_rate": 7.667584003949681e-06, "loss": 0.454, "step": 6301 }, { "epoch": 0.340078786897631, "grad_norm": 1.0867862169794416, "learning_rate": 7.666915416065483e-06, "loss": 0.6773, "step": 6302 }, { "epoch": 0.3401327505261454, "grad_norm": 1.0849707605236367, "learning_rate": 7.666246765901945e-06, "loss": 0.508, "step": 6303 }, { "epoch": 0.3401867141546598, "grad_norm": 0.9020795056785497, "learning_rate": 7.665578053478287e-06, "loss": 0.4128, "step": 6304 }, { "epoch": 0.34024067778317413, "grad_norm": 1.0632992691286802, "learning_rate": 7.664909278813727e-06, "loss": 0.4166, "step": 6305 }, { "epoch": 0.3402946414116885, "grad_norm": 1.1723284530761957, "learning_rate": 7.664240441927487e-06, "loss": 0.5434, "step": 6306 }, { "epoch": 0.3403486050402029, "grad_norm": 0.8624393610562399, "learning_rate": 7.663571542838789e-06, "loss": 0.5296, "step": 6307 }, { "epoch": 0.34040256866871726, "grad_norm": 1.0564669912287514, "learning_rate": 7.662902581566863e-06, "loss": 0.4807, "step": 6308 }, { "epoch": 0.34045653229723166, "grad_norm": 1.112544418472094, "learning_rate": 7.662233558130927e-06, "loss": 0.4677, "step": 6309 }, { "epoch": 0.34051049592574606, "grad_norm": 1.0163153145371064, "learning_rate": 7.661564472550214e-06, "loss": 0.527, "step": 6310 }, { "epoch": 0.3405644595542604, "grad_norm": 0.9668505684434294, "learning_rate": 7.660895324843957e-06, "loss": 0.4929, "step": 6311 }, { "epoch": 0.3406184231827748, "grad_norm": 1.2603419245206888, "learning_rate": 7.660226115031382e-06, "loss": 0.6693, "step": 6312 }, { "epoch": 0.3406723868112892, "grad_norm": 0.9117661213439325, "learning_rate": 7.659556843131725e-06, "loss": 0.4289, "step": 6313 }, { "epoch": 0.3407263504398036, "grad_norm": 1.3583767620233498, "learning_rate": 7.658887509164224e-06, "loss": 0.6548, "step": 6314 }, { "epoch": 0.34078031406831794, "grad_norm": 1.1418352331171917, "learning_rate": 7.658218113148114e-06, "loss": 0.4595, "step": 6315 }, { "epoch": 0.34083427769683233, "grad_norm": 1.0193413813616137, "learning_rate": 7.657548655102636e-06, "loss": 0.5823, "step": 6316 }, { "epoch": 0.34088824132534673, "grad_norm": 0.7149267136920753, "learning_rate": 7.656879135047027e-06, "loss": 0.3041, "step": 6317 }, { "epoch": 0.34094220495386107, "grad_norm": 1.046913567743191, "learning_rate": 7.656209553000534e-06, "loss": 0.4253, "step": 6318 }, { "epoch": 0.34099616858237547, "grad_norm": 1.3113168800793509, "learning_rate": 7.6555399089824e-06, "loss": 0.4963, "step": 6319 }, { "epoch": 0.34105013221088987, "grad_norm": 1.138870550778005, "learning_rate": 7.654870203011871e-06, "loss": 0.5497, "step": 6320 }, { "epoch": 0.34110409583940426, "grad_norm": 0.8557430901305412, "learning_rate": 7.654200435108194e-06, "loss": 0.4814, "step": 6321 }, { "epoch": 0.3411580594679186, "grad_norm": 0.8848039623278529, "learning_rate": 7.653530605290623e-06, "loss": 0.4062, "step": 6322 }, { "epoch": 0.341212023096433, "grad_norm": 1.4748674041503242, "learning_rate": 7.652860713578402e-06, "loss": 0.6646, "step": 6323 }, { "epoch": 0.3412659867249474, "grad_norm": 1.0496727986508256, "learning_rate": 7.65219075999079e-06, "loss": 0.6126, "step": 6324 }, { "epoch": 0.34131995035346174, "grad_norm": 1.071841800651816, "learning_rate": 7.651520744547044e-06, "loss": 0.546, "step": 6325 }, { "epoch": 0.34137391398197614, "grad_norm": 0.9689676452905024, "learning_rate": 7.650850667266414e-06, "loss": 0.5725, "step": 6326 }, { "epoch": 0.34142787761049054, "grad_norm": 1.0099071982322663, "learning_rate": 7.650180528168164e-06, "loss": 0.5473, "step": 6327 }, { "epoch": 0.34148184123900494, "grad_norm": 1.1909987987018726, "learning_rate": 7.649510327271553e-06, "loss": 0.5555, "step": 6328 }, { "epoch": 0.3415358048675193, "grad_norm": 0.9131746471027308, "learning_rate": 7.648840064595844e-06, "loss": 0.3918, "step": 6329 }, { "epoch": 0.3415897684960337, "grad_norm": 0.7974986153292442, "learning_rate": 7.6481697401603e-06, "loss": 0.3634, "step": 6330 }, { "epoch": 0.34164373212454807, "grad_norm": 0.8314613240049529, "learning_rate": 7.647499353984188e-06, "loss": 0.4014, "step": 6331 }, { "epoch": 0.3416976957530624, "grad_norm": 1.0361843510210693, "learning_rate": 7.646828906086772e-06, "loss": 0.4472, "step": 6332 }, { "epoch": 0.3417516593815768, "grad_norm": 0.9235053291665339, "learning_rate": 7.646158396487327e-06, "loss": 0.4443, "step": 6333 }, { "epoch": 0.3418056230100912, "grad_norm": 1.00072126004096, "learning_rate": 7.645487825205118e-06, "loss": 0.4256, "step": 6334 }, { "epoch": 0.3418595866386056, "grad_norm": 1.142601042215496, "learning_rate": 7.644817192259423e-06, "loss": 0.4915, "step": 6335 }, { "epoch": 0.34191355026711995, "grad_norm": 0.8576856261709185, "learning_rate": 7.644146497669515e-06, "loss": 0.5452, "step": 6336 }, { "epoch": 0.34196751389563435, "grad_norm": 1.149481291781249, "learning_rate": 7.643475741454668e-06, "loss": 0.5219, "step": 6337 }, { "epoch": 0.34202147752414874, "grad_norm": 1.0423935225361187, "learning_rate": 7.642804923634165e-06, "loss": 0.4498, "step": 6338 }, { "epoch": 0.3420754411526631, "grad_norm": 0.9969054322548437, "learning_rate": 7.642134044227282e-06, "loss": 0.3759, "step": 6339 }, { "epoch": 0.3421294047811775, "grad_norm": 0.987731582957206, "learning_rate": 7.641463103253304e-06, "loss": 0.4883, "step": 6340 }, { "epoch": 0.3421833684096919, "grad_norm": 1.0382894479850697, "learning_rate": 7.640792100731509e-06, "loss": 0.5929, "step": 6341 }, { "epoch": 0.3422373320382063, "grad_norm": 1.1907527047385358, "learning_rate": 7.640121036681188e-06, "loss": 0.5691, "step": 6342 }, { "epoch": 0.3422912956667206, "grad_norm": 1.0946507105809948, "learning_rate": 7.639449911121626e-06, "loss": 0.504, "step": 6343 }, { "epoch": 0.342345259295235, "grad_norm": 0.8565754923633495, "learning_rate": 7.638778724072112e-06, "loss": 0.3902, "step": 6344 }, { "epoch": 0.3423992229237494, "grad_norm": 1.0617283960807233, "learning_rate": 7.638107475551935e-06, "loss": 0.4494, "step": 6345 }, { "epoch": 0.34245318655226376, "grad_norm": 0.910302382480241, "learning_rate": 7.63743616558039e-06, "loss": 0.3844, "step": 6346 }, { "epoch": 0.34250715018077815, "grad_norm": 0.9198534408101244, "learning_rate": 7.63676479417677e-06, "loss": 0.5034, "step": 6347 }, { "epoch": 0.34256111380929255, "grad_norm": 1.0720400159709258, "learning_rate": 7.63609336136037e-06, "loss": 0.568, "step": 6348 }, { "epoch": 0.3426150774378069, "grad_norm": 1.0838660416382222, "learning_rate": 7.63542186715049e-06, "loss": 0.5605, "step": 6349 }, { "epoch": 0.3426690410663213, "grad_norm": 1.0181251805640879, "learning_rate": 7.634750311566426e-06, "loss": 0.5865, "step": 6350 }, { "epoch": 0.3427230046948357, "grad_norm": 1.2847340280195056, "learning_rate": 7.634078694627483e-06, "loss": 0.6857, "step": 6351 }, { "epoch": 0.3427769683233501, "grad_norm": 0.9259508363518663, "learning_rate": 7.63340701635296e-06, "loss": 0.4335, "step": 6352 }, { "epoch": 0.3428309319518644, "grad_norm": 1.0443402344006814, "learning_rate": 7.632735276762166e-06, "loss": 0.5123, "step": 6353 }, { "epoch": 0.3428848955803788, "grad_norm": 0.8426121279675487, "learning_rate": 7.632063475874402e-06, "loss": 0.3425, "step": 6354 }, { "epoch": 0.3429388592088932, "grad_norm": 1.1210732642770516, "learning_rate": 7.631391613708982e-06, "loss": 0.5385, "step": 6355 }, { "epoch": 0.34299282283740756, "grad_norm": 0.9400363009568219, "learning_rate": 7.630719690285213e-06, "loss": 0.4866, "step": 6356 }, { "epoch": 0.34304678646592196, "grad_norm": 0.9403605898886146, "learning_rate": 7.630047705622406e-06, "loss": 0.4823, "step": 6357 }, { "epoch": 0.34310075009443636, "grad_norm": 1.1084938714531694, "learning_rate": 7.629375659739875e-06, "loss": 0.4763, "step": 6358 }, { "epoch": 0.34315471372295075, "grad_norm": 0.981265325152567, "learning_rate": 7.628703552656937e-06, "loss": 0.4047, "step": 6359 }, { "epoch": 0.3432086773514651, "grad_norm": 1.0064003155348655, "learning_rate": 7.628031384392909e-06, "loss": 0.4409, "step": 6360 }, { "epoch": 0.3432626409799795, "grad_norm": 1.1013551358252252, "learning_rate": 7.627359154967109e-06, "loss": 0.4745, "step": 6361 }, { "epoch": 0.3433166046084939, "grad_norm": 0.8141790756362646, "learning_rate": 7.626686864398856e-06, "loss": 0.4019, "step": 6362 }, { "epoch": 0.34337056823700823, "grad_norm": 1.3989751451926509, "learning_rate": 7.6260145127074746e-06, "loss": 0.5176, "step": 6363 }, { "epoch": 0.34342453186552263, "grad_norm": 0.874110896221824, "learning_rate": 7.6253420999122875e-06, "loss": 0.3832, "step": 6364 }, { "epoch": 0.34347849549403703, "grad_norm": 1.0114588233995112, "learning_rate": 7.624669626032621e-06, "loss": 0.5306, "step": 6365 }, { "epoch": 0.3435324591225514, "grad_norm": 0.982987149687826, "learning_rate": 7.623997091087802e-06, "loss": 0.4903, "step": 6366 }, { "epoch": 0.34358642275106577, "grad_norm": 0.8828418394548824, "learning_rate": 7.623324495097162e-06, "loss": 0.5846, "step": 6367 }, { "epoch": 0.34364038637958016, "grad_norm": 0.9420449875319705, "learning_rate": 7.6226518380800275e-06, "loss": 0.4316, "step": 6368 }, { "epoch": 0.34369435000809456, "grad_norm": 0.9188101349918704, "learning_rate": 7.621979120055737e-06, "loss": 0.5859, "step": 6369 }, { "epoch": 0.3437483136366089, "grad_norm": 1.0419763229538284, "learning_rate": 7.6213063410436216e-06, "loss": 0.4107, "step": 6370 }, { "epoch": 0.3438022772651233, "grad_norm": 1.199786812127711, "learning_rate": 7.620633501063018e-06, "loss": 0.626, "step": 6371 }, { "epoch": 0.3438562408936377, "grad_norm": 1.0553500629029329, "learning_rate": 7.619960600133266e-06, "loss": 0.4565, "step": 6372 }, { "epoch": 0.3439102045221521, "grad_norm": 0.794081552326649, "learning_rate": 7.619287638273704e-06, "loss": 0.4156, "step": 6373 }, { "epoch": 0.34396416815066644, "grad_norm": 0.997361412584346, "learning_rate": 7.618614615503674e-06, "loss": 0.3859, "step": 6374 }, { "epoch": 0.34401813177918084, "grad_norm": 0.98300758544729, "learning_rate": 7.6179415318425195e-06, "loss": 0.4705, "step": 6375 }, { "epoch": 0.34407209540769523, "grad_norm": 1.079058765860522, "learning_rate": 7.617268387309584e-06, "loss": 0.4356, "step": 6376 }, { "epoch": 0.3441260590362096, "grad_norm": 0.9422696275862578, "learning_rate": 7.616595181924218e-06, "loss": 0.4377, "step": 6377 }, { "epoch": 0.344180022664724, "grad_norm": 1.0578269437497625, "learning_rate": 7.615921915705765e-06, "loss": 0.4188, "step": 6378 }, { "epoch": 0.34423398629323837, "grad_norm": 1.0310118964689827, "learning_rate": 7.61524858867358e-06, "loss": 0.4343, "step": 6379 }, { "epoch": 0.3442879499217527, "grad_norm": 1.0829446227000492, "learning_rate": 7.614575200847014e-06, "loss": 0.5215, "step": 6380 }, { "epoch": 0.3443419135502671, "grad_norm": 0.9737894662501605, "learning_rate": 7.61390175224542e-06, "loss": 0.4392, "step": 6381 }, { "epoch": 0.3443958771787815, "grad_norm": 0.9360616719228899, "learning_rate": 7.613228242888151e-06, "loss": 0.4848, "step": 6382 }, { "epoch": 0.3444498408072959, "grad_norm": 0.8024323938970329, "learning_rate": 7.612554672794569e-06, "loss": 0.3335, "step": 6383 }, { "epoch": 0.34450380443581025, "grad_norm": 0.920052733755465, "learning_rate": 7.6118810419840304e-06, "loss": 0.4222, "step": 6384 }, { "epoch": 0.34455776806432464, "grad_norm": 1.117902105427638, "learning_rate": 7.611207350475898e-06, "loss": 0.7098, "step": 6385 }, { "epoch": 0.34461173169283904, "grad_norm": 1.2803140830960997, "learning_rate": 7.6105335982895315e-06, "loss": 0.619, "step": 6386 }, { "epoch": 0.3446656953213534, "grad_norm": 1.0674378230252821, "learning_rate": 7.609859785444299e-06, "loss": 0.6925, "step": 6387 }, { "epoch": 0.3447196589498678, "grad_norm": 1.0580062888092039, "learning_rate": 7.609185911959562e-06, "loss": 0.4609, "step": 6388 }, { "epoch": 0.3447736225783822, "grad_norm": 0.8912327249496358, "learning_rate": 7.608511977854692e-06, "loss": 0.4097, "step": 6389 }, { "epoch": 0.3448275862068966, "grad_norm": 0.8336328877984187, "learning_rate": 7.607837983149057e-06, "loss": 0.4029, "step": 6390 }, { "epoch": 0.3448815498354109, "grad_norm": 0.8578264416157988, "learning_rate": 7.607163927862029e-06, "loss": 0.3493, "step": 6391 }, { "epoch": 0.3449355134639253, "grad_norm": 1.167348057195879, "learning_rate": 7.606489812012979e-06, "loss": 0.4624, "step": 6392 }, { "epoch": 0.3449894770924397, "grad_norm": 0.8676766903105042, "learning_rate": 7.605815635621285e-06, "loss": 0.3989, "step": 6393 }, { "epoch": 0.34504344072095405, "grad_norm": 1.0999557762317755, "learning_rate": 7.60514139870632e-06, "loss": 0.5148, "step": 6394 }, { "epoch": 0.34509740434946845, "grad_norm": 0.952408679345257, "learning_rate": 7.604467101287466e-06, "loss": 0.4578, "step": 6395 }, { "epoch": 0.34515136797798285, "grad_norm": 1.2131888393093284, "learning_rate": 7.603792743384101e-06, "loss": 0.5792, "step": 6396 }, { "epoch": 0.34520533160649725, "grad_norm": 1.063457761280014, "learning_rate": 7.603118325015605e-06, "loss": 0.5566, "step": 6397 }, { "epoch": 0.3452592952350116, "grad_norm": 1.1188837544843955, "learning_rate": 7.602443846201366e-06, "loss": 0.4646, "step": 6398 }, { "epoch": 0.345313258863526, "grad_norm": 0.9044692449588837, "learning_rate": 7.601769306960765e-06, "loss": 0.5402, "step": 6399 }, { "epoch": 0.3453672224920404, "grad_norm": 1.0051518446811119, "learning_rate": 7.60109470731319e-06, "loss": 0.5386, "step": 6400 }, { "epoch": 0.3454211861205547, "grad_norm": 1.0566452822436465, "learning_rate": 7.600420047278031e-06, "loss": 0.5006, "step": 6401 }, { "epoch": 0.3454751497490691, "grad_norm": 0.9832971256714372, "learning_rate": 7.599745326874678e-06, "loss": 0.4828, "step": 6402 }, { "epoch": 0.3455291133775835, "grad_norm": 0.9721892342294046, "learning_rate": 7.599070546122521e-06, "loss": 0.4232, "step": 6403 }, { "epoch": 0.3455830770060979, "grad_norm": 0.8665740092486355, "learning_rate": 7.5983957050409565e-06, "loss": 0.3887, "step": 6404 }, { "epoch": 0.34563704063461226, "grad_norm": 0.9207762282099392, "learning_rate": 7.59772080364938e-06, "loss": 0.3645, "step": 6405 }, { "epoch": 0.34569100426312666, "grad_norm": 1.1279087747897294, "learning_rate": 7.597045841967186e-06, "loss": 0.4083, "step": 6406 }, { "epoch": 0.34574496789164105, "grad_norm": 1.0425945935703738, "learning_rate": 7.596370820013777e-06, "loss": 0.3998, "step": 6407 }, { "epoch": 0.3457989315201554, "grad_norm": 1.232002556156866, "learning_rate": 7.595695737808553e-06, "loss": 0.68, "step": 6408 }, { "epoch": 0.3458528951486698, "grad_norm": 1.156549679973464, "learning_rate": 7.595020595370915e-06, "loss": 0.4332, "step": 6409 }, { "epoch": 0.3459068587771842, "grad_norm": 0.940999975926922, "learning_rate": 7.594345392720268e-06, "loss": 0.4218, "step": 6410 }, { "epoch": 0.34596082240569853, "grad_norm": 1.0924012627886959, "learning_rate": 7.5936701298760195e-06, "loss": 0.5578, "step": 6411 }, { "epoch": 0.34601478603421293, "grad_norm": 1.0841953500309107, "learning_rate": 7.592994806857575e-06, "loss": 0.4731, "step": 6412 }, { "epoch": 0.3460687496627273, "grad_norm": 0.8754921314138775, "learning_rate": 7.592319423684344e-06, "loss": 0.3733, "step": 6413 }, { "epoch": 0.3461227132912417, "grad_norm": 1.2619835290026082, "learning_rate": 7.591643980375739e-06, "loss": 0.6186, "step": 6414 }, { "epoch": 0.34617667691975607, "grad_norm": 1.1960137828326032, "learning_rate": 7.590968476951175e-06, "loss": 0.6281, "step": 6415 }, { "epoch": 0.34623064054827046, "grad_norm": 1.1911410052819897, "learning_rate": 7.5902929134300615e-06, "loss": 0.5291, "step": 6416 }, { "epoch": 0.34628460417678486, "grad_norm": 1.0711483248649838, "learning_rate": 7.589617289831817e-06, "loss": 0.5239, "step": 6417 }, { "epoch": 0.3463385678052992, "grad_norm": 0.8714906748261642, "learning_rate": 7.5889416061758625e-06, "loss": 0.4281, "step": 6418 }, { "epoch": 0.3463925314338136, "grad_norm": 1.0642068525464896, "learning_rate": 7.588265862481614e-06, "loss": 0.5423, "step": 6419 }, { "epoch": 0.346446495062328, "grad_norm": 0.9575506429811761, "learning_rate": 7.587590058768497e-06, "loss": 0.4753, "step": 6420 }, { "epoch": 0.3465004586908424, "grad_norm": 1.00335644490642, "learning_rate": 7.58691419505593e-06, "loss": 0.4925, "step": 6421 }, { "epoch": 0.34655442231935674, "grad_norm": 1.0086906323913811, "learning_rate": 7.586238271363341e-06, "loss": 0.503, "step": 6422 }, { "epoch": 0.34660838594787113, "grad_norm": 1.0230368435058932, "learning_rate": 7.585562287710156e-06, "loss": 0.596, "step": 6423 }, { "epoch": 0.34666234957638553, "grad_norm": 1.0827476034878034, "learning_rate": 7.584886244115803e-06, "loss": 0.5902, "step": 6424 }, { "epoch": 0.3467163132048999, "grad_norm": 0.8223604910409316, "learning_rate": 7.584210140599714e-06, "loss": 0.3812, "step": 6425 }, { "epoch": 0.34677027683341427, "grad_norm": 0.8966546820624178, "learning_rate": 7.583533977181319e-06, "loss": 0.3258, "step": 6426 }, { "epoch": 0.34682424046192867, "grad_norm": 0.8394177718062036, "learning_rate": 7.5828577538800505e-06, "loss": 0.5146, "step": 6427 }, { "epoch": 0.34687820409044307, "grad_norm": 1.0897835582347766, "learning_rate": 7.582181470715347e-06, "loss": 0.5358, "step": 6428 }, { "epoch": 0.3469321677189574, "grad_norm": 1.11215251980922, "learning_rate": 7.5815051277066455e-06, "loss": 0.5131, "step": 6429 }, { "epoch": 0.3469861313474718, "grad_norm": 0.9644162958511308, "learning_rate": 7.580828724873382e-06, "loss": 0.5649, "step": 6430 }, { "epoch": 0.3470400949759862, "grad_norm": 1.0695343312091405, "learning_rate": 7.580152262234999e-06, "loss": 0.4032, "step": 6431 }, { "epoch": 0.34709405860450054, "grad_norm": 1.1788988180911955, "learning_rate": 7.579475739810938e-06, "loss": 0.4835, "step": 6432 }, { "epoch": 0.34714802223301494, "grad_norm": 1.0003558963080261, "learning_rate": 7.5787991576206435e-06, "loss": 0.5289, "step": 6433 }, { "epoch": 0.34720198586152934, "grad_norm": 1.1524254463321606, "learning_rate": 7.578122515683559e-06, "loss": 0.5713, "step": 6434 }, { "epoch": 0.34725594949004374, "grad_norm": 0.9615286333214317, "learning_rate": 7.577445814019135e-06, "loss": 0.3906, "step": 6435 }, { "epoch": 0.3473099131185581, "grad_norm": 1.101815021687922, "learning_rate": 7.576769052646819e-06, "loss": 0.5202, "step": 6436 }, { "epoch": 0.3473638767470725, "grad_norm": 1.063094103031705, "learning_rate": 7.576092231586062e-06, "loss": 0.5565, "step": 6437 }, { "epoch": 0.3474178403755869, "grad_norm": 1.1340573369368097, "learning_rate": 7.575415350856316e-06, "loss": 0.5966, "step": 6438 }, { "epoch": 0.3474718040041012, "grad_norm": 1.0566919540456226, "learning_rate": 7.574738410477037e-06, "loss": 0.4258, "step": 6439 }, { "epoch": 0.3475257676326156, "grad_norm": 0.9569617990190707, "learning_rate": 7.57406141046768e-06, "loss": 0.5104, "step": 6440 }, { "epoch": 0.34757973126113, "grad_norm": 1.0831590881197488, "learning_rate": 7.573384350847701e-06, "loss": 0.5075, "step": 6441 }, { "epoch": 0.3476336948896444, "grad_norm": 1.0824118263068179, "learning_rate": 7.5727072316365605e-06, "loss": 0.4955, "step": 6442 }, { "epoch": 0.34768765851815875, "grad_norm": 0.9063023030182364, "learning_rate": 7.572030052853722e-06, "loss": 0.467, "step": 6443 }, { "epoch": 0.34774162214667315, "grad_norm": 1.1201981532855316, "learning_rate": 7.5713528145186435e-06, "loss": 0.4068, "step": 6444 }, { "epoch": 0.34779558577518754, "grad_norm": 1.1609110774948603, "learning_rate": 7.570675516650793e-06, "loss": 0.5264, "step": 6445 }, { "epoch": 0.3478495494037019, "grad_norm": 1.1637518139264371, "learning_rate": 7.5699981592696345e-06, "loss": 0.6107, "step": 6446 }, { "epoch": 0.3479035130322163, "grad_norm": 0.9272526193771339, "learning_rate": 7.56932074239464e-06, "loss": 0.4033, "step": 6447 }, { "epoch": 0.3479574766607307, "grad_norm": 1.2123822935211714, "learning_rate": 7.568643266045275e-06, "loss": 0.7548, "step": 6448 }, { "epoch": 0.348011440289245, "grad_norm": 0.9616844197235982, "learning_rate": 7.567965730241012e-06, "loss": 0.3641, "step": 6449 }, { "epoch": 0.3480654039177594, "grad_norm": 0.9304163656923773, "learning_rate": 7.567288135001326e-06, "loss": 0.4787, "step": 6450 }, { "epoch": 0.3481193675462738, "grad_norm": 1.043286908052124, "learning_rate": 7.56661048034569e-06, "loss": 0.545, "step": 6451 }, { "epoch": 0.3481733311747882, "grad_norm": 1.069173449791837, "learning_rate": 7.5659327662935786e-06, "loss": 0.5335, "step": 6452 }, { "epoch": 0.34822729480330256, "grad_norm": 1.0317317417452494, "learning_rate": 7.565254992864474e-06, "loss": 0.4982, "step": 6453 }, { "epoch": 0.34828125843181695, "grad_norm": 0.8975768981766608, "learning_rate": 7.564577160077854e-06, "loss": 0.3754, "step": 6454 }, { "epoch": 0.34833522206033135, "grad_norm": 0.9326442682545582, "learning_rate": 7.5638992679532e-06, "loss": 0.3907, "step": 6455 }, { "epoch": 0.3483891856888457, "grad_norm": 1.1291538707618298, "learning_rate": 7.563221316509996e-06, "loss": 0.7164, "step": 6456 }, { "epoch": 0.3484431493173601, "grad_norm": 1.044709528421894, "learning_rate": 7.562543305767726e-06, "loss": 0.486, "step": 6457 }, { "epoch": 0.3484971129458745, "grad_norm": 0.9766970730675506, "learning_rate": 7.561865235745876e-06, "loss": 0.5857, "step": 6458 }, { "epoch": 0.3485510765743889, "grad_norm": 1.1383732920925034, "learning_rate": 7.561187106463936e-06, "loss": 0.6222, "step": 6459 }, { "epoch": 0.3486050402029032, "grad_norm": 0.9393773196610555, "learning_rate": 7.560508917941399e-06, "loss": 0.4116, "step": 6460 }, { "epoch": 0.3486590038314176, "grad_norm": 1.0125907231067426, "learning_rate": 7.5598306701977505e-06, "loss": 0.5227, "step": 6461 }, { "epoch": 0.348712967459932, "grad_norm": 1.2114878822826867, "learning_rate": 7.559152363252489e-06, "loss": 0.472, "step": 6462 }, { "epoch": 0.34876693108844636, "grad_norm": 1.0400701747371557, "learning_rate": 7.558473997125108e-06, "loss": 0.6635, "step": 6463 }, { "epoch": 0.34882089471696076, "grad_norm": 0.8836531776509655, "learning_rate": 7.557795571835103e-06, "loss": 0.4867, "step": 6464 }, { "epoch": 0.34887485834547516, "grad_norm": 1.141639891334631, "learning_rate": 7.5571170874019754e-06, "loss": 0.4431, "step": 6465 }, { "epoch": 0.34892882197398956, "grad_norm": 1.0156102734384294, "learning_rate": 7.556438543845223e-06, "loss": 0.4458, "step": 6466 }, { "epoch": 0.3489827856025039, "grad_norm": 1.0374516517955577, "learning_rate": 7.555759941184351e-06, "loss": 0.467, "step": 6467 }, { "epoch": 0.3490367492310183, "grad_norm": 1.115149400148132, "learning_rate": 7.55508127943886e-06, "loss": 0.5661, "step": 6468 }, { "epoch": 0.3490907128595327, "grad_norm": 1.0427405465174675, "learning_rate": 7.5544025586282556e-06, "loss": 0.5215, "step": 6469 }, { "epoch": 0.34914467648804703, "grad_norm": 0.9750814615296937, "learning_rate": 7.553723778772045e-06, "loss": 0.4367, "step": 6470 }, { "epoch": 0.34919864011656143, "grad_norm": 0.9036776672247029, "learning_rate": 7.5530449398897395e-06, "loss": 0.435, "step": 6471 }, { "epoch": 0.34925260374507583, "grad_norm": 1.1634490762494756, "learning_rate": 7.552366042000847e-06, "loss": 0.649, "step": 6472 }, { "epoch": 0.3493065673735902, "grad_norm": 0.7996603303044308, "learning_rate": 7.551687085124884e-06, "loss": 0.3644, "step": 6473 }, { "epoch": 0.34936053100210457, "grad_norm": 0.9898006183722865, "learning_rate": 7.551008069281357e-06, "loss": 0.5369, "step": 6474 }, { "epoch": 0.34941449463061897, "grad_norm": 0.99000172000474, "learning_rate": 7.550328994489786e-06, "loss": 0.4508, "step": 6475 }, { "epoch": 0.34946845825913336, "grad_norm": 1.0784946671790563, "learning_rate": 7.54964986076969e-06, "loss": 0.473, "step": 6476 }, { "epoch": 0.3495224218876477, "grad_norm": 0.9638909696345807, "learning_rate": 7.548970668140585e-06, "loss": 0.4623, "step": 6477 }, { "epoch": 0.3495763855161621, "grad_norm": 1.1759166298808386, "learning_rate": 7.548291416621994e-06, "loss": 0.4774, "step": 6478 }, { "epoch": 0.3496303491446765, "grad_norm": 0.9478636487886638, "learning_rate": 7.547612106233436e-06, "loss": 0.402, "step": 6479 }, { "epoch": 0.34968431277319084, "grad_norm": 1.2501544237995892, "learning_rate": 7.546932736994438e-06, "loss": 0.5943, "step": 6480 }, { "epoch": 0.34973827640170524, "grad_norm": 1.0661834044753822, "learning_rate": 7.546253308924525e-06, "loss": 0.6138, "step": 6481 }, { "epoch": 0.34979224003021964, "grad_norm": 1.236486177431717, "learning_rate": 7.545573822043224e-06, "loss": 0.5332, "step": 6482 }, { "epoch": 0.34984620365873403, "grad_norm": 0.8428806868503281, "learning_rate": 7.544894276370065e-06, "loss": 0.4421, "step": 6483 }, { "epoch": 0.3499001672872484, "grad_norm": 0.9053618769874671, "learning_rate": 7.544214671924579e-06, "loss": 0.3339, "step": 6484 }, { "epoch": 0.3499541309157628, "grad_norm": 0.9496733088647022, "learning_rate": 7.543535008726296e-06, "loss": 0.4169, "step": 6485 }, { "epoch": 0.35000809454427717, "grad_norm": 0.9239762575429569, "learning_rate": 7.542855286794752e-06, "loss": 0.3784, "step": 6486 }, { "epoch": 0.3500620581727915, "grad_norm": 1.2124299029352064, "learning_rate": 7.542175506149484e-06, "loss": 0.506, "step": 6487 }, { "epoch": 0.3501160218013059, "grad_norm": 0.9059247553173704, "learning_rate": 7.541495666810029e-06, "loss": 0.4454, "step": 6488 }, { "epoch": 0.3501699854298203, "grad_norm": 1.0510207747074805, "learning_rate": 7.540815768795924e-06, "loss": 0.418, "step": 6489 }, { "epoch": 0.3502239490583347, "grad_norm": 1.0799165418452552, "learning_rate": 7.540135812126712e-06, "loss": 0.5981, "step": 6490 }, { "epoch": 0.35027791268684905, "grad_norm": 1.1378964315927171, "learning_rate": 7.539455796821936e-06, "loss": 0.5029, "step": 6491 }, { "epoch": 0.35033187631536344, "grad_norm": 0.9716436585584425, "learning_rate": 7.53877572290114e-06, "loss": 0.3808, "step": 6492 }, { "epoch": 0.35038583994387784, "grad_norm": 0.9631044257974124, "learning_rate": 7.53809559038387e-06, "loss": 0.4207, "step": 6493 }, { "epoch": 0.3504398035723922, "grad_norm": 1.1221484059369171, "learning_rate": 7.537415399289672e-06, "loss": 0.697, "step": 6494 }, { "epoch": 0.3504937672009066, "grad_norm": 0.9437282279109885, "learning_rate": 7.536735149638098e-06, "loss": 0.3959, "step": 6495 }, { "epoch": 0.350547730829421, "grad_norm": 0.9971717406194858, "learning_rate": 7.536054841448698e-06, "loss": 0.4324, "step": 6496 }, { "epoch": 0.3506016944579354, "grad_norm": 0.9334461101355187, "learning_rate": 7.535374474741023e-06, "loss": 0.4975, "step": 6497 }, { "epoch": 0.3506556580864497, "grad_norm": 1.089678497834821, "learning_rate": 7.53469404953463e-06, "loss": 0.3305, "step": 6498 }, { "epoch": 0.3507096217149641, "grad_norm": 1.112337886551743, "learning_rate": 7.534013565849073e-06, "loss": 0.5749, "step": 6499 }, { "epoch": 0.3507635853434785, "grad_norm": 0.9452904016541749, "learning_rate": 7.533333023703912e-06, "loss": 0.3516, "step": 6500 }, { "epoch": 0.3507635853434785, "eval_loss": 0.5656527876853943, "eval_runtime": 163.6851, "eval_samples_per_second": 21.01, "eval_steps_per_second": 0.88, "step": 6500 }, { "epoch": 0.35081754897199285, "grad_norm": 0.9767187745997931, "learning_rate": 7.532652423118705e-06, "loss": 0.4503, "step": 6501 }, { "epoch": 0.35087151260050725, "grad_norm": 1.0899514245120523, "learning_rate": 7.531971764113015e-06, "loss": 0.5035, "step": 6502 }, { "epoch": 0.35092547622902165, "grad_norm": 1.2373739896553027, "learning_rate": 7.531291046706402e-06, "loss": 0.5972, "step": 6503 }, { "epoch": 0.35097943985753605, "grad_norm": 0.9063542039805706, "learning_rate": 7.53061027091843e-06, "loss": 0.492, "step": 6504 }, { "epoch": 0.3510334034860504, "grad_norm": 0.8782547005794519, "learning_rate": 7.529929436768671e-06, "loss": 0.3857, "step": 6505 }, { "epoch": 0.3510873671145648, "grad_norm": 1.2906422581490622, "learning_rate": 7.5292485442766865e-06, "loss": 0.5738, "step": 6506 }, { "epoch": 0.3511413307430792, "grad_norm": 0.8534822401540496, "learning_rate": 7.528567593462049e-06, "loss": 0.4218, "step": 6507 }, { "epoch": 0.3511952943715935, "grad_norm": 0.7167377293175881, "learning_rate": 7.527886584344329e-06, "loss": 0.3629, "step": 6508 }, { "epoch": 0.3512492580001079, "grad_norm": 1.0704166342047348, "learning_rate": 7.527205516943099e-06, "loss": 0.5124, "step": 6509 }, { "epoch": 0.3513032216286223, "grad_norm": 0.9980188363034932, "learning_rate": 7.526524391277935e-06, "loss": 0.4384, "step": 6510 }, { "epoch": 0.3513571852571367, "grad_norm": 1.0428380111100113, "learning_rate": 7.525843207368411e-06, "loss": 0.4504, "step": 6511 }, { "epoch": 0.35141114888565106, "grad_norm": 1.2156763880472938, "learning_rate": 7.525161965234108e-06, "loss": 0.6225, "step": 6512 }, { "epoch": 0.35146511251416546, "grad_norm": 0.9182257561093974, "learning_rate": 7.524480664894602e-06, "loss": 0.3537, "step": 6513 }, { "epoch": 0.35151907614267985, "grad_norm": 0.9104483078398737, "learning_rate": 7.523799306369477e-06, "loss": 0.4191, "step": 6514 }, { "epoch": 0.3515730397711942, "grad_norm": 0.8672700304103346, "learning_rate": 7.523117889678317e-06, "loss": 0.4061, "step": 6515 }, { "epoch": 0.3516270033997086, "grad_norm": 1.0509974525092312, "learning_rate": 7.5224364148407026e-06, "loss": 0.4303, "step": 6516 }, { "epoch": 0.351680967028223, "grad_norm": 0.9847922725703373, "learning_rate": 7.521754881876221e-06, "loss": 0.441, "step": 6517 }, { "epoch": 0.35173493065673733, "grad_norm": 0.9925256941585913, "learning_rate": 7.521073290804464e-06, "loss": 0.4132, "step": 6518 }, { "epoch": 0.35178889428525173, "grad_norm": 1.0322812676550408, "learning_rate": 7.520391641645016e-06, "loss": 0.5567, "step": 6519 }, { "epoch": 0.3518428579137661, "grad_norm": 1.0933715668745543, "learning_rate": 7.519709934417472e-06, "loss": 0.4357, "step": 6520 }, { "epoch": 0.3518968215422805, "grad_norm": 1.0906053014846768, "learning_rate": 7.5190281691414235e-06, "loss": 0.4322, "step": 6521 }, { "epoch": 0.35195078517079487, "grad_norm": 0.9217831467895617, "learning_rate": 7.518346345836465e-06, "loss": 0.4451, "step": 6522 }, { "epoch": 0.35200474879930926, "grad_norm": 1.0339158278913738, "learning_rate": 7.517664464522193e-06, "loss": 0.5398, "step": 6523 }, { "epoch": 0.35205871242782366, "grad_norm": 0.9235251329321243, "learning_rate": 7.516982525218206e-06, "loss": 0.4561, "step": 6524 }, { "epoch": 0.352112676056338, "grad_norm": 1.0244854001814672, "learning_rate": 7.516300527944104e-06, "loss": 0.3849, "step": 6525 }, { "epoch": 0.3521666396848524, "grad_norm": 1.012666720232384, "learning_rate": 7.515618472719487e-06, "loss": 0.5758, "step": 6526 }, { "epoch": 0.3522206033133668, "grad_norm": 0.8983934466627281, "learning_rate": 7.514936359563958e-06, "loss": 0.4833, "step": 6527 }, { "epoch": 0.3522745669418812, "grad_norm": 1.0900704030007031, "learning_rate": 7.514254188497122e-06, "loss": 0.5407, "step": 6528 }, { "epoch": 0.35232853057039554, "grad_norm": 0.9486340976612919, "learning_rate": 7.513571959538586e-06, "loss": 0.4836, "step": 6529 }, { "epoch": 0.35238249419890993, "grad_norm": 0.8820742830405071, "learning_rate": 7.512889672707957e-06, "loss": 0.3309, "step": 6530 }, { "epoch": 0.35243645782742433, "grad_norm": 1.075369047919462, "learning_rate": 7.5122073280248445e-06, "loss": 0.5763, "step": 6531 }, { "epoch": 0.3524904214559387, "grad_norm": 1.1301400030523072, "learning_rate": 7.5115249255088615e-06, "loss": 0.6124, "step": 6532 }, { "epoch": 0.35254438508445307, "grad_norm": 1.1899868255866837, "learning_rate": 7.510842465179619e-06, "loss": 0.4622, "step": 6533 }, { "epoch": 0.35259834871296747, "grad_norm": 1.283437097140067, "learning_rate": 7.510159947056731e-06, "loss": 0.4292, "step": 6534 }, { "epoch": 0.35265231234148187, "grad_norm": 1.2186542485318304, "learning_rate": 7.509477371159816e-06, "loss": 0.7543, "step": 6535 }, { "epoch": 0.3527062759699962, "grad_norm": 0.9148757560334622, "learning_rate": 7.5087947375084915e-06, "loss": 0.4527, "step": 6536 }, { "epoch": 0.3527602395985106, "grad_norm": 1.1039072354820263, "learning_rate": 7.508112046122377e-06, "loss": 0.5021, "step": 6537 }, { "epoch": 0.352814203227025, "grad_norm": 1.1977852556720494, "learning_rate": 7.507429297021091e-06, "loss": 0.4306, "step": 6538 }, { "epoch": 0.35286816685553934, "grad_norm": 1.0543272646655182, "learning_rate": 7.506746490224261e-06, "loss": 0.4738, "step": 6539 }, { "epoch": 0.35292213048405374, "grad_norm": 1.1317986055465181, "learning_rate": 7.506063625751508e-06, "loss": 0.601, "step": 6540 }, { "epoch": 0.35297609411256814, "grad_norm": 0.8227484742377368, "learning_rate": 7.505380703622459e-06, "loss": 0.3848, "step": 6541 }, { "epoch": 0.35303005774108254, "grad_norm": 1.0516969749419087, "learning_rate": 7.504697723856742e-06, "loss": 0.5662, "step": 6542 }, { "epoch": 0.3530840213695969, "grad_norm": 0.8543176655609518, "learning_rate": 7.504014686473988e-06, "loss": 0.3906, "step": 6543 }, { "epoch": 0.3531379849981113, "grad_norm": 0.9048863952231032, "learning_rate": 7.503331591493827e-06, "loss": 0.4213, "step": 6544 }, { "epoch": 0.3531919486266257, "grad_norm": 1.034883559470357, "learning_rate": 7.5026484389358915e-06, "loss": 0.5135, "step": 6545 }, { "epoch": 0.35324591225514, "grad_norm": 0.8891547021946267, "learning_rate": 7.501965228819815e-06, "loss": 0.3996, "step": 6546 }, { "epoch": 0.3532998758836544, "grad_norm": 1.1161633245858142, "learning_rate": 7.501281961165236e-06, "loss": 0.4659, "step": 6547 }, { "epoch": 0.3533538395121688, "grad_norm": 0.9282455990120431, "learning_rate": 7.500598635991793e-06, "loss": 0.458, "step": 6548 }, { "epoch": 0.35340780314068315, "grad_norm": 1.126714993253324, "learning_rate": 7.499915253319121e-06, "loss": 0.6586, "step": 6549 }, { "epoch": 0.35346176676919755, "grad_norm": 1.1556134500078927, "learning_rate": 7.499231813166865e-06, "loss": 0.5005, "step": 6550 }, { "epoch": 0.35351573039771195, "grad_norm": 1.0527378431484553, "learning_rate": 7.498548315554667e-06, "loss": 0.5111, "step": 6551 }, { "epoch": 0.35356969402622634, "grad_norm": 0.9064509540699975, "learning_rate": 7.49786476050217e-06, "loss": 0.4669, "step": 6552 }, { "epoch": 0.3536236576547407, "grad_norm": 1.0730732019894063, "learning_rate": 7.4971811480290225e-06, "loss": 0.4403, "step": 6553 }, { "epoch": 0.3536776212832551, "grad_norm": 0.9676578458527063, "learning_rate": 7.49649747815487e-06, "loss": 0.3726, "step": 6554 }, { "epoch": 0.3537315849117695, "grad_norm": 1.1235517391637238, "learning_rate": 7.495813750899363e-06, "loss": 0.5071, "step": 6555 }, { "epoch": 0.3537855485402838, "grad_norm": 0.8700223500916819, "learning_rate": 7.495129966282153e-06, "loss": 0.4991, "step": 6556 }, { "epoch": 0.3538395121687982, "grad_norm": 0.8530719531778566, "learning_rate": 7.4944461243228904e-06, "loss": 0.3444, "step": 6557 }, { "epoch": 0.3538934757973126, "grad_norm": 0.9832266779014823, "learning_rate": 7.4937622250412325e-06, "loss": 0.431, "step": 6558 }, { "epoch": 0.353947439425827, "grad_norm": 0.976178697446358, "learning_rate": 7.493078268456832e-06, "loss": 0.4827, "step": 6559 }, { "epoch": 0.35400140305434136, "grad_norm": 1.3005500017645097, "learning_rate": 7.492394254589348e-06, "loss": 0.6174, "step": 6560 }, { "epoch": 0.35405536668285575, "grad_norm": 0.7978554418033181, "learning_rate": 7.491710183458441e-06, "loss": 0.3386, "step": 6561 }, { "epoch": 0.35410933031137015, "grad_norm": 0.9614706703767442, "learning_rate": 7.491026055083771e-06, "loss": 0.505, "step": 6562 }, { "epoch": 0.3541632939398845, "grad_norm": 0.7489083952812767, "learning_rate": 7.4903418694849995e-06, "loss": 0.3307, "step": 6563 }, { "epoch": 0.3542172575683989, "grad_norm": 1.3067046354755372, "learning_rate": 7.489657626681792e-06, "loss": 0.6683, "step": 6564 }, { "epoch": 0.3542712211969133, "grad_norm": 1.0315985869882875, "learning_rate": 7.488973326693815e-06, "loss": 0.4483, "step": 6565 }, { "epoch": 0.3543251848254277, "grad_norm": 1.0352344968906375, "learning_rate": 7.488288969540732e-06, "loss": 0.4214, "step": 6566 }, { "epoch": 0.35437914845394203, "grad_norm": 1.029079623392603, "learning_rate": 7.487604555242218e-06, "loss": 0.4392, "step": 6567 }, { "epoch": 0.3544331120824564, "grad_norm": 1.0465136385765714, "learning_rate": 7.48692008381794e-06, "loss": 0.4642, "step": 6568 }, { "epoch": 0.3544870757109708, "grad_norm": 1.0752119066245482, "learning_rate": 7.48623555528757e-06, "loss": 0.5067, "step": 6569 }, { "epoch": 0.35454103933948516, "grad_norm": 1.0812256978833386, "learning_rate": 7.485550969670785e-06, "loss": 0.4937, "step": 6570 }, { "epoch": 0.35459500296799956, "grad_norm": 0.9426732062988696, "learning_rate": 7.484866326987258e-06, "loss": 0.523, "step": 6571 }, { "epoch": 0.35464896659651396, "grad_norm": 0.9659435840074309, "learning_rate": 7.484181627256667e-06, "loss": 0.5122, "step": 6572 }, { "epoch": 0.35470293022502836, "grad_norm": 0.8643323539680438, "learning_rate": 7.48349687049869e-06, "loss": 0.5218, "step": 6573 }, { "epoch": 0.3547568938535427, "grad_norm": 1.017844928597624, "learning_rate": 7.482812056733011e-06, "loss": 0.418, "step": 6574 }, { "epoch": 0.3548108574820571, "grad_norm": 0.9180222678695006, "learning_rate": 7.482127185979308e-06, "loss": 0.3677, "step": 6575 }, { "epoch": 0.3548648211105715, "grad_norm": 1.128070416482334, "learning_rate": 7.4814422582572675e-06, "loss": 0.5085, "step": 6576 }, { "epoch": 0.35491878473908584, "grad_norm": 1.0541985998890318, "learning_rate": 7.480757273586575e-06, "loss": 0.5239, "step": 6577 }, { "epoch": 0.35497274836760023, "grad_norm": 1.4431963200329356, "learning_rate": 7.4800722319869166e-06, "loss": 0.5252, "step": 6578 }, { "epoch": 0.35502671199611463, "grad_norm": 0.835595730971848, "learning_rate": 7.479387133477982e-06, "loss": 0.4226, "step": 6579 }, { "epoch": 0.355080675624629, "grad_norm": 1.1275770036973884, "learning_rate": 7.478701978079461e-06, "loss": 0.4153, "step": 6580 }, { "epoch": 0.35513463925314337, "grad_norm": 1.0814275394069335, "learning_rate": 7.478016765811047e-06, "loss": 0.501, "step": 6581 }, { "epoch": 0.35518860288165777, "grad_norm": 1.0176986314889893, "learning_rate": 7.477331496692429e-06, "loss": 0.412, "step": 6582 }, { "epoch": 0.35524256651017216, "grad_norm": 1.091184424651562, "learning_rate": 7.476646170743309e-06, "loss": 0.4449, "step": 6583 }, { "epoch": 0.3552965301386865, "grad_norm": 1.1409242080403468, "learning_rate": 7.475960787983381e-06, "loss": 0.5783, "step": 6584 }, { "epoch": 0.3553504937672009, "grad_norm": 1.0044724139352164, "learning_rate": 7.475275348432342e-06, "loss": 0.5442, "step": 6585 }, { "epoch": 0.3554044573957153, "grad_norm": 0.9760728680467642, "learning_rate": 7.474589852109893e-06, "loss": 0.4575, "step": 6586 }, { "epoch": 0.35545842102422964, "grad_norm": 0.9342750166931196, "learning_rate": 7.473904299035738e-06, "loss": 0.4684, "step": 6587 }, { "epoch": 0.35551238465274404, "grad_norm": 1.1136080273960873, "learning_rate": 7.4732186892295775e-06, "loss": 0.4052, "step": 6588 }, { "epoch": 0.35556634828125844, "grad_norm": 0.8539770458239477, "learning_rate": 7.4725330227111195e-06, "loss": 0.3837, "step": 6589 }, { "epoch": 0.35562031190977283, "grad_norm": 1.0565883288543376, "learning_rate": 7.471847299500068e-06, "loss": 0.6871, "step": 6590 }, { "epoch": 0.3556742755382872, "grad_norm": 1.0309164332032599, "learning_rate": 7.471161519616135e-06, "loss": 0.4717, "step": 6591 }, { "epoch": 0.3557282391668016, "grad_norm": 1.077510731868254, "learning_rate": 7.470475683079026e-06, "loss": 0.4438, "step": 6592 }, { "epoch": 0.35578220279531597, "grad_norm": 0.8440084373183798, "learning_rate": 7.469789789908455e-06, "loss": 0.3238, "step": 6593 }, { "epoch": 0.3558361664238303, "grad_norm": 1.2947619627388036, "learning_rate": 7.469103840124137e-06, "loss": 0.841, "step": 6594 }, { "epoch": 0.3558901300523447, "grad_norm": 0.8581337425178368, "learning_rate": 7.468417833745782e-06, "loss": 0.4336, "step": 6595 }, { "epoch": 0.3559440936808591, "grad_norm": 0.9937365637669544, "learning_rate": 7.467731770793112e-06, "loss": 0.4611, "step": 6596 }, { "epoch": 0.3559980573093735, "grad_norm": 0.9230737949764661, "learning_rate": 7.46704565128584e-06, "loss": 0.4621, "step": 6597 }, { "epoch": 0.35605202093788785, "grad_norm": 1.0466849447499893, "learning_rate": 7.466359475243692e-06, "loss": 0.4781, "step": 6598 }, { "epoch": 0.35610598456640224, "grad_norm": 1.0625913204688138, "learning_rate": 7.4656732426863835e-06, "loss": 0.6118, "step": 6599 }, { "epoch": 0.35615994819491664, "grad_norm": 0.9835674576466229, "learning_rate": 7.464986953633639e-06, "loss": 0.3728, "step": 6600 }, { "epoch": 0.356213911823431, "grad_norm": 1.3895064200829803, "learning_rate": 7.464300608105186e-06, "loss": 0.5864, "step": 6601 }, { "epoch": 0.3562678754519454, "grad_norm": 0.8891016292741093, "learning_rate": 7.463614206120748e-06, "loss": 0.4361, "step": 6602 }, { "epoch": 0.3563218390804598, "grad_norm": 1.0638876528766092, "learning_rate": 7.462927747700054e-06, "loss": 0.5618, "step": 6603 }, { "epoch": 0.3563758027089742, "grad_norm": 1.1073684957876997, "learning_rate": 7.462241232862833e-06, "loss": 0.4606, "step": 6604 }, { "epoch": 0.3564297663374885, "grad_norm": 1.276653625763433, "learning_rate": 7.461554661628816e-06, "loss": 0.4649, "step": 6605 }, { "epoch": 0.3564837299660029, "grad_norm": 1.1282786489095455, "learning_rate": 7.460868034017737e-06, "loss": 0.5665, "step": 6606 }, { "epoch": 0.3565376935945173, "grad_norm": 0.9956975657041539, "learning_rate": 7.460181350049328e-06, "loss": 0.5908, "step": 6607 }, { "epoch": 0.35659165722303166, "grad_norm": 1.1835863813987963, "learning_rate": 7.459494609743327e-06, "loss": 0.5879, "step": 6608 }, { "epoch": 0.35664562085154605, "grad_norm": 1.014259400012214, "learning_rate": 7.4588078131194705e-06, "loss": 0.476, "step": 6609 }, { "epoch": 0.35669958448006045, "grad_norm": 1.1079682667691544, "learning_rate": 7.458120960197498e-06, "loss": 0.5974, "step": 6610 }, { "epoch": 0.35675354810857485, "grad_norm": 1.0329480755026565, "learning_rate": 7.457434050997152e-06, "loss": 0.563, "step": 6611 }, { "epoch": 0.3568075117370892, "grad_norm": 1.1903914532945965, "learning_rate": 7.456747085538173e-06, "loss": 0.6955, "step": 6612 }, { "epoch": 0.3568614753656036, "grad_norm": 1.063247988350127, "learning_rate": 7.456060063840304e-06, "loss": 0.4794, "step": 6613 }, { "epoch": 0.356915438994118, "grad_norm": 0.9356931714980239, "learning_rate": 7.455372985923295e-06, "loss": 0.4093, "step": 6614 }, { "epoch": 0.3569694026226323, "grad_norm": 0.9886556058041656, "learning_rate": 7.45468585180689e-06, "loss": 0.5079, "step": 6615 }, { "epoch": 0.3570233662511467, "grad_norm": 1.0649377861656881, "learning_rate": 7.4539986615108365e-06, "loss": 0.6654, "step": 6616 }, { "epoch": 0.3570773298796611, "grad_norm": 1.0171607866143828, "learning_rate": 7.453311415054889e-06, "loss": 0.3463, "step": 6617 }, { "epoch": 0.35713129350817546, "grad_norm": 0.9053378066952775, "learning_rate": 7.452624112458797e-06, "loss": 0.3593, "step": 6618 }, { "epoch": 0.35718525713668986, "grad_norm": 1.0969382588410779, "learning_rate": 7.451936753742316e-06, "loss": 0.463, "step": 6619 }, { "epoch": 0.35723922076520426, "grad_norm": 1.5047068387374998, "learning_rate": 7.4512493389252e-06, "loss": 0.6974, "step": 6620 }, { "epoch": 0.35729318439371865, "grad_norm": 1.142019670340147, "learning_rate": 7.450561868027204e-06, "loss": 0.535, "step": 6621 }, { "epoch": 0.357347148022233, "grad_norm": 1.0439734278783264, "learning_rate": 7.449874341068092e-06, "loss": 0.5284, "step": 6622 }, { "epoch": 0.3574011116507474, "grad_norm": 1.33118939664702, "learning_rate": 7.4491867580676206e-06, "loss": 0.5786, "step": 6623 }, { "epoch": 0.3574550752792618, "grad_norm": 0.8699417205292298, "learning_rate": 7.4484991190455515e-06, "loss": 0.4434, "step": 6624 }, { "epoch": 0.35750903890777613, "grad_norm": 1.0501206693270315, "learning_rate": 7.447811424021649e-06, "loss": 0.5014, "step": 6625 }, { "epoch": 0.35756300253629053, "grad_norm": 0.920471822603437, "learning_rate": 7.447123673015678e-06, "loss": 0.4456, "step": 6626 }, { "epoch": 0.35761696616480493, "grad_norm": 0.9494119732148438, "learning_rate": 7.446435866047405e-06, "loss": 0.3606, "step": 6627 }, { "epoch": 0.3576709297933193, "grad_norm": 0.9767816035143059, "learning_rate": 7.445748003136599e-06, "loss": 0.4785, "step": 6628 }, { "epoch": 0.35772489342183367, "grad_norm": 0.9626289423710078, "learning_rate": 7.44506008430303e-06, "loss": 0.4139, "step": 6629 }, { "epoch": 0.35777885705034806, "grad_norm": 1.1833292100571229, "learning_rate": 7.4443721095664674e-06, "loss": 0.5452, "step": 6630 }, { "epoch": 0.35783282067886246, "grad_norm": 0.8705835170467068, "learning_rate": 7.443684078946687e-06, "loss": 0.4015, "step": 6631 }, { "epoch": 0.3578867843073768, "grad_norm": 0.9627277422019352, "learning_rate": 7.442995992463463e-06, "loss": 0.5252, "step": 6632 }, { "epoch": 0.3579407479358912, "grad_norm": 0.898825001323243, "learning_rate": 7.442307850136569e-06, "loss": 0.4017, "step": 6633 }, { "epoch": 0.3579947115644056, "grad_norm": 0.9257550988748362, "learning_rate": 7.441619651985786e-06, "loss": 0.3955, "step": 6634 }, { "epoch": 0.35804867519292, "grad_norm": 0.9279711635707943, "learning_rate": 7.440931398030892e-06, "loss": 0.4296, "step": 6635 }, { "epoch": 0.35810263882143434, "grad_norm": 0.915673266293192, "learning_rate": 7.440243088291667e-06, "loss": 0.4268, "step": 6636 }, { "epoch": 0.35815660244994874, "grad_norm": 1.0158044843888405, "learning_rate": 7.439554722787898e-06, "loss": 0.437, "step": 6637 }, { "epoch": 0.35821056607846313, "grad_norm": 0.8455530392246313, "learning_rate": 7.438866301539364e-06, "loss": 0.4003, "step": 6638 }, { "epoch": 0.3582645297069775, "grad_norm": 1.0440569166205138, "learning_rate": 7.4381778245658555e-06, "loss": 0.5095, "step": 6639 }, { "epoch": 0.35831849333549187, "grad_norm": 1.0839259295131853, "learning_rate": 7.437489291887157e-06, "loss": 0.5261, "step": 6640 }, { "epoch": 0.35837245696400627, "grad_norm": 1.0241333524541356, "learning_rate": 7.436800703523058e-06, "loss": 0.4799, "step": 6641 }, { "epoch": 0.35842642059252067, "grad_norm": 1.2342266809693268, "learning_rate": 7.436112059493349e-06, "loss": 0.4906, "step": 6642 }, { "epoch": 0.358480384221035, "grad_norm": 1.1129063157727925, "learning_rate": 7.435423359817826e-06, "loss": 0.7385, "step": 6643 }, { "epoch": 0.3585343478495494, "grad_norm": 0.9509114101784311, "learning_rate": 7.434734604516278e-06, "loss": 0.4911, "step": 6644 }, { "epoch": 0.3585883114780638, "grad_norm": 1.151058866627913, "learning_rate": 7.4340457936085016e-06, "loss": 0.5691, "step": 6645 }, { "epoch": 0.35864227510657815, "grad_norm": 0.8654342396644933, "learning_rate": 7.433356927114298e-06, "loss": 0.442, "step": 6646 }, { "epoch": 0.35869623873509254, "grad_norm": 1.0237116974661722, "learning_rate": 7.43266800505346e-06, "loss": 0.4589, "step": 6647 }, { "epoch": 0.35875020236360694, "grad_norm": 1.2367458811592502, "learning_rate": 7.431979027445792e-06, "loss": 0.6021, "step": 6648 }, { "epoch": 0.35880416599212134, "grad_norm": 1.0690113091558766, "learning_rate": 7.431289994311094e-06, "loss": 0.45, "step": 6649 }, { "epoch": 0.3588581296206357, "grad_norm": 1.317585001034018, "learning_rate": 7.4306009056691694e-06, "loss": 0.7059, "step": 6650 }, { "epoch": 0.3589120932491501, "grad_norm": 0.81493175067679, "learning_rate": 7.429911761539826e-06, "loss": 0.4105, "step": 6651 }, { "epoch": 0.3589660568776645, "grad_norm": 1.1712616130160447, "learning_rate": 7.429222561942867e-06, "loss": 0.6172, "step": 6652 }, { "epoch": 0.3590200205061788, "grad_norm": 0.9000987095954782, "learning_rate": 7.4285333068981035e-06, "loss": 0.3895, "step": 6653 }, { "epoch": 0.3590739841346932, "grad_norm": 1.1259739074898663, "learning_rate": 7.427843996425344e-06, "loss": 0.5202, "step": 6654 }, { "epoch": 0.3591279477632076, "grad_norm": 0.850591060161571, "learning_rate": 7.427154630544399e-06, "loss": 0.341, "step": 6655 }, { "epoch": 0.35918191139172195, "grad_norm": 0.9268074658266506, "learning_rate": 7.4264652092750836e-06, "loss": 0.3238, "step": 6656 }, { "epoch": 0.35923587502023635, "grad_norm": 0.8237024535448539, "learning_rate": 7.425775732637211e-06, "loss": 0.4207, "step": 6657 }, { "epoch": 0.35928983864875075, "grad_norm": 1.2149992370561, "learning_rate": 7.425086200650599e-06, "loss": 0.6533, "step": 6658 }, { "epoch": 0.35934380227726515, "grad_norm": 0.9600226648529733, "learning_rate": 7.424396613335062e-06, "loss": 0.5061, "step": 6659 }, { "epoch": 0.3593977659057795, "grad_norm": 0.9437161543975005, "learning_rate": 7.423706970710424e-06, "loss": 0.4779, "step": 6660 }, { "epoch": 0.3594517295342939, "grad_norm": 0.950617017309305, "learning_rate": 7.423017272796503e-06, "loss": 0.3685, "step": 6661 }, { "epoch": 0.3595056931628083, "grad_norm": 1.1192587061868684, "learning_rate": 7.422327519613123e-06, "loss": 0.5192, "step": 6662 }, { "epoch": 0.3595596567913226, "grad_norm": 1.0468027724457487, "learning_rate": 7.421637711180108e-06, "loss": 0.4558, "step": 6663 }, { "epoch": 0.359613620419837, "grad_norm": 1.0207220348239847, "learning_rate": 7.420947847517284e-06, "loss": 0.5137, "step": 6664 }, { "epoch": 0.3596675840483514, "grad_norm": 1.0022578132574254, "learning_rate": 7.420257928644477e-06, "loss": 0.4817, "step": 6665 }, { "epoch": 0.3597215476768658, "grad_norm": 1.0066828029688761, "learning_rate": 7.419567954581516e-06, "loss": 0.5416, "step": 6666 }, { "epoch": 0.35977551130538016, "grad_norm": 1.0927096085989094, "learning_rate": 7.418877925348234e-06, "loss": 0.5381, "step": 6667 }, { "epoch": 0.35982947493389456, "grad_norm": 0.936941111777973, "learning_rate": 7.41818784096446e-06, "loss": 0.5296, "step": 6668 }, { "epoch": 0.35988343856240895, "grad_norm": 1.163979528349895, "learning_rate": 7.41749770145003e-06, "loss": 0.4771, "step": 6669 }, { "epoch": 0.3599374021909233, "grad_norm": 1.0830612309611383, "learning_rate": 7.41680750682478e-06, "loss": 0.468, "step": 6670 }, { "epoch": 0.3599913658194377, "grad_norm": 0.8573546730914839, "learning_rate": 7.416117257108544e-06, "loss": 0.3908, "step": 6671 }, { "epoch": 0.3600453294479521, "grad_norm": 0.883265865964376, "learning_rate": 7.4154269523211616e-06, "loss": 0.5056, "step": 6672 }, { "epoch": 0.3600992930764665, "grad_norm": 1.0043717075995802, "learning_rate": 7.4147365924824735e-06, "loss": 0.445, "step": 6673 }, { "epoch": 0.36015325670498083, "grad_norm": 0.9320814636889937, "learning_rate": 7.414046177612321e-06, "loss": 0.4516, "step": 6674 }, { "epoch": 0.3602072203334952, "grad_norm": 1.0804595867182527, "learning_rate": 7.413355707730548e-06, "loss": 0.4971, "step": 6675 }, { "epoch": 0.3602611839620096, "grad_norm": 1.0582082211768769, "learning_rate": 7.412665182856999e-06, "loss": 0.4123, "step": 6676 }, { "epoch": 0.36031514759052397, "grad_norm": 0.6598923968975566, "learning_rate": 7.411974603011518e-06, "loss": 0.3254, "step": 6677 }, { "epoch": 0.36036911121903836, "grad_norm": 1.0818196534636064, "learning_rate": 7.411283968213956e-06, "loss": 0.5554, "step": 6678 }, { "epoch": 0.36042307484755276, "grad_norm": 0.9761849826331294, "learning_rate": 7.410593278484162e-06, "loss": 0.501, "step": 6679 }, { "epoch": 0.36047703847606716, "grad_norm": 1.1007499939513266, "learning_rate": 7.409902533841986e-06, "loss": 0.4952, "step": 6680 }, { "epoch": 0.3605310021045815, "grad_norm": 1.3077730535378476, "learning_rate": 7.409211734307282e-06, "loss": 0.742, "step": 6681 }, { "epoch": 0.3605849657330959, "grad_norm": 0.7882348467555511, "learning_rate": 7.408520879899903e-06, "loss": 0.4254, "step": 6682 }, { "epoch": 0.3606389293616103, "grad_norm": 1.0117600772958226, "learning_rate": 7.407829970639706e-06, "loss": 0.4548, "step": 6683 }, { "epoch": 0.36069289299012464, "grad_norm": 0.8551459933241918, "learning_rate": 7.407139006546547e-06, "loss": 0.3204, "step": 6684 }, { "epoch": 0.36074685661863903, "grad_norm": 0.9810011601361532, "learning_rate": 7.406447987640289e-06, "loss": 0.4861, "step": 6685 }, { "epoch": 0.36080082024715343, "grad_norm": 1.040858574485072, "learning_rate": 7.405756913940786e-06, "loss": 0.583, "step": 6686 }, { "epoch": 0.3608547838756678, "grad_norm": 1.0847896698081008, "learning_rate": 7.405065785467906e-06, "loss": 0.4994, "step": 6687 }, { "epoch": 0.36090874750418217, "grad_norm": 1.2445319816296931, "learning_rate": 7.404374602241508e-06, "loss": 0.4734, "step": 6688 }, { "epoch": 0.36096271113269657, "grad_norm": 1.054148207895995, "learning_rate": 7.403683364281461e-06, "loss": 0.5537, "step": 6689 }, { "epoch": 0.36101667476121097, "grad_norm": 1.1023535643928675, "learning_rate": 7.402992071607631e-06, "loss": 0.5372, "step": 6690 }, { "epoch": 0.3610706383897253, "grad_norm": 1.0589661849690708, "learning_rate": 7.402300724239887e-06, "loss": 0.4729, "step": 6691 }, { "epoch": 0.3611246020182397, "grad_norm": 0.9399510942983993, "learning_rate": 7.401609322198096e-06, "loss": 0.4094, "step": 6692 }, { "epoch": 0.3611785656467541, "grad_norm": 1.0725266899455026, "learning_rate": 7.400917865502132e-06, "loss": 0.419, "step": 6693 }, { "epoch": 0.36123252927526844, "grad_norm": 1.0747433337379437, "learning_rate": 7.4002263541718685e-06, "loss": 0.5058, "step": 6694 }, { "epoch": 0.36128649290378284, "grad_norm": 1.0717118611584169, "learning_rate": 7.39953478822718e-06, "loss": 0.4893, "step": 6695 }, { "epoch": 0.36134045653229724, "grad_norm": 0.9782585599158999, "learning_rate": 7.3988431676879405e-06, "loss": 0.5225, "step": 6696 }, { "epoch": 0.36139442016081164, "grad_norm": 1.0822589882995608, "learning_rate": 7.39815149257403e-06, "loss": 0.4938, "step": 6697 }, { "epoch": 0.361448383789326, "grad_norm": 1.0983818378486014, "learning_rate": 7.397459762905329e-06, "loss": 0.4546, "step": 6698 }, { "epoch": 0.3615023474178404, "grad_norm": 1.0132096328113913, "learning_rate": 7.3967679787017166e-06, "loss": 0.6245, "step": 6699 }, { "epoch": 0.3615563110463548, "grad_norm": 0.9391089371362723, "learning_rate": 7.396076139983076e-06, "loss": 0.4106, "step": 6700 }, { "epoch": 0.3616102746748691, "grad_norm": 0.9183376458775473, "learning_rate": 7.395384246769292e-06, "loss": 0.322, "step": 6701 }, { "epoch": 0.3616642383033835, "grad_norm": 1.1309536239002644, "learning_rate": 7.394692299080247e-06, "loss": 0.4504, "step": 6702 }, { "epoch": 0.3617182019318979, "grad_norm": 1.0681396062355577, "learning_rate": 7.394000296935833e-06, "loss": 0.5795, "step": 6703 }, { "epoch": 0.3617721655604123, "grad_norm": 0.9164680790865626, "learning_rate": 7.393308240355936e-06, "loss": 0.5164, "step": 6704 }, { "epoch": 0.36182612918892665, "grad_norm": 0.9206541736296995, "learning_rate": 7.392616129360448e-06, "loss": 0.4118, "step": 6705 }, { "epoch": 0.36188009281744105, "grad_norm": 1.215608484834527, "learning_rate": 7.39192396396926e-06, "loss": 0.5381, "step": 6706 }, { "epoch": 0.36193405644595544, "grad_norm": 0.9448301818952969, "learning_rate": 7.391231744202265e-06, "loss": 0.5094, "step": 6707 }, { "epoch": 0.3619880200744698, "grad_norm": 0.8599721981162372, "learning_rate": 7.390539470079361e-06, "loss": 0.4683, "step": 6708 }, { "epoch": 0.3620419837029842, "grad_norm": 1.0573677053051795, "learning_rate": 7.389847141620439e-06, "loss": 0.4512, "step": 6709 }, { "epoch": 0.3620959473314986, "grad_norm": 0.8777779451716963, "learning_rate": 7.3891547588454015e-06, "loss": 0.4748, "step": 6710 }, { "epoch": 0.362149910960013, "grad_norm": 0.8271504574280402, "learning_rate": 7.388462321774148e-06, "loss": 0.2948, "step": 6711 }, { "epoch": 0.3622038745885273, "grad_norm": 0.9427055373983112, "learning_rate": 7.387769830426579e-06, "loss": 0.4516, "step": 6712 }, { "epoch": 0.3622578382170417, "grad_norm": 1.0110359234713924, "learning_rate": 7.387077284822598e-06, "loss": 0.5316, "step": 6713 }, { "epoch": 0.3623118018455561, "grad_norm": 0.9097815869809164, "learning_rate": 7.3863846849821085e-06, "loss": 0.349, "step": 6714 }, { "epoch": 0.36236576547407046, "grad_norm": 1.1034829534612796, "learning_rate": 7.385692030925018e-06, "loss": 0.549, "step": 6715 }, { "epoch": 0.36241972910258485, "grad_norm": 1.197758370180457, "learning_rate": 7.384999322671232e-06, "loss": 0.4943, "step": 6716 }, { "epoch": 0.36247369273109925, "grad_norm": 0.9404653769179567, "learning_rate": 7.384306560240661e-06, "loss": 0.457, "step": 6717 }, { "epoch": 0.36252765635961365, "grad_norm": 1.0665030339637867, "learning_rate": 7.383613743653216e-06, "loss": 0.4754, "step": 6718 }, { "epoch": 0.362581619988128, "grad_norm": 1.2037917709838055, "learning_rate": 7.382920872928809e-06, "loss": 0.5744, "step": 6719 }, { "epoch": 0.3626355836166424, "grad_norm": 1.0682383961243649, "learning_rate": 7.382227948087354e-06, "loss": 0.6927, "step": 6720 }, { "epoch": 0.3626895472451568, "grad_norm": 0.9922792020421781, "learning_rate": 7.381534969148764e-06, "loss": 0.4456, "step": 6721 }, { "epoch": 0.3627435108736711, "grad_norm": 1.0411597229247016, "learning_rate": 7.38084193613296e-06, "loss": 0.4731, "step": 6722 }, { "epoch": 0.3627974745021855, "grad_norm": 0.9100601383430312, "learning_rate": 7.380148849059856e-06, "loss": 0.4141, "step": 6723 }, { "epoch": 0.3628514381306999, "grad_norm": 1.1851920921373118, "learning_rate": 7.379455707949376e-06, "loss": 0.5014, "step": 6724 }, { "epoch": 0.36290540175921426, "grad_norm": 1.4700881879327952, "learning_rate": 7.378762512821439e-06, "loss": 0.6874, "step": 6725 }, { "epoch": 0.36295936538772866, "grad_norm": 1.099857309494144, "learning_rate": 7.37806926369597e-06, "loss": 0.5948, "step": 6726 }, { "epoch": 0.36301332901624306, "grad_norm": 0.8385107764540245, "learning_rate": 7.377375960592892e-06, "loss": 0.3613, "step": 6727 }, { "epoch": 0.36306729264475746, "grad_norm": 1.1439698893188082, "learning_rate": 7.376682603532131e-06, "loss": 0.6483, "step": 6728 }, { "epoch": 0.3631212562732718, "grad_norm": 0.8628354532109298, "learning_rate": 7.375989192533619e-06, "loss": 0.4436, "step": 6729 }, { "epoch": 0.3631752199017862, "grad_norm": 1.0303970011329506, "learning_rate": 7.3752957276172784e-06, "loss": 0.4842, "step": 6730 }, { "epoch": 0.3632291835303006, "grad_norm": 1.0660396660635163, "learning_rate": 7.374602208803046e-06, "loss": 0.5123, "step": 6731 }, { "epoch": 0.36328314715881493, "grad_norm": 1.1877683390870848, "learning_rate": 7.3739086361108514e-06, "loss": 0.4013, "step": 6732 }, { "epoch": 0.36333711078732933, "grad_norm": 1.0748515356608208, "learning_rate": 7.373215009560628e-06, "loss": 0.5335, "step": 6733 }, { "epoch": 0.36339107441584373, "grad_norm": 0.9536344040334294, "learning_rate": 7.372521329172314e-06, "loss": 0.4339, "step": 6734 }, { "epoch": 0.3634450380443581, "grad_norm": 0.9361802437442717, "learning_rate": 7.371827594965845e-06, "loss": 0.3478, "step": 6735 }, { "epoch": 0.36349900167287247, "grad_norm": 1.051282707297924, "learning_rate": 7.371133806961159e-06, "loss": 0.4401, "step": 6736 }, { "epoch": 0.36355296530138687, "grad_norm": 1.526781427329277, "learning_rate": 7.370439965178197e-06, "loss": 0.6135, "step": 6737 }, { "epoch": 0.36360692892990126, "grad_norm": 0.9731838856606003, "learning_rate": 7.369746069636899e-06, "loss": 0.5292, "step": 6738 }, { "epoch": 0.3636608925584156, "grad_norm": 1.125094888019104, "learning_rate": 7.369052120357213e-06, "loss": 0.549, "step": 6739 }, { "epoch": 0.36371485618693, "grad_norm": 1.1012858175380296, "learning_rate": 7.368358117359077e-06, "loss": 0.5166, "step": 6740 }, { "epoch": 0.3637688198154444, "grad_norm": 1.0385585502371224, "learning_rate": 7.367664060662443e-06, "loss": 0.483, "step": 6741 }, { "epoch": 0.3638227834439588, "grad_norm": 0.9916849295281638, "learning_rate": 7.366969950287255e-06, "loss": 0.4475, "step": 6742 }, { "epoch": 0.36387674707247314, "grad_norm": 1.0764620030564624, "learning_rate": 7.366275786253467e-06, "loss": 0.6334, "step": 6743 }, { "epoch": 0.36393071070098754, "grad_norm": 0.9802143050498165, "learning_rate": 7.365581568581025e-06, "loss": 0.4284, "step": 6744 }, { "epoch": 0.36398467432950193, "grad_norm": 0.9968729323424695, "learning_rate": 7.364887297289885e-06, "loss": 0.543, "step": 6745 }, { "epoch": 0.3640386379580163, "grad_norm": 0.8621732472834347, "learning_rate": 7.364192972399999e-06, "loss": 0.4126, "step": 6746 }, { "epoch": 0.3640926015865307, "grad_norm": 1.0447857769164788, "learning_rate": 7.363498593931322e-06, "loss": 0.4791, "step": 6747 }, { "epoch": 0.36414656521504507, "grad_norm": 1.3142185446720986, "learning_rate": 7.362804161903814e-06, "loss": 0.4826, "step": 6748 }, { "epoch": 0.36420052884355947, "grad_norm": 1.4009547425371287, "learning_rate": 7.362109676337431e-06, "loss": 0.7531, "step": 6749 }, { "epoch": 0.3642544924720738, "grad_norm": 0.852144177773267, "learning_rate": 7.361415137252137e-06, "loss": 0.3479, "step": 6750 }, { "epoch": 0.3643084561005882, "grad_norm": 1.10314631748616, "learning_rate": 7.3607205446678885e-06, "loss": 0.4608, "step": 6751 }, { "epoch": 0.3643624197291026, "grad_norm": 0.8659837677213514, "learning_rate": 7.360025898604652e-06, "loss": 0.3841, "step": 6752 }, { "epoch": 0.36441638335761695, "grad_norm": 1.0278529941264452, "learning_rate": 7.359331199082391e-06, "loss": 0.429, "step": 6753 }, { "epoch": 0.36447034698613134, "grad_norm": 0.9328477401193067, "learning_rate": 7.3586364461210745e-06, "loss": 0.396, "step": 6754 }, { "epoch": 0.36452431061464574, "grad_norm": 0.9633088038107659, "learning_rate": 7.357941639740668e-06, "loss": 0.4499, "step": 6755 }, { "epoch": 0.3645782742431601, "grad_norm": 1.2395475181597273, "learning_rate": 7.357246779961141e-06, "loss": 0.5976, "step": 6756 }, { "epoch": 0.3646322378716745, "grad_norm": 1.0431985116152698, "learning_rate": 7.356551866802466e-06, "loss": 0.5749, "step": 6757 }, { "epoch": 0.3646862015001889, "grad_norm": 1.083703758947604, "learning_rate": 7.3558569002846125e-06, "loss": 0.4558, "step": 6758 }, { "epoch": 0.3647401651287033, "grad_norm": 1.055396242941038, "learning_rate": 7.355161880427557e-06, "loss": 0.602, "step": 6759 }, { "epoch": 0.3647941287572176, "grad_norm": 0.9180587924791588, "learning_rate": 7.354466807251276e-06, "loss": 0.411, "step": 6760 }, { "epoch": 0.364848092385732, "grad_norm": 0.8500713506775616, "learning_rate": 7.353771680775743e-06, "loss": 0.3704, "step": 6761 }, { "epoch": 0.3649020560142464, "grad_norm": 0.9362406625538126, "learning_rate": 7.3530765010209405e-06, "loss": 0.4435, "step": 6762 }, { "epoch": 0.36495601964276075, "grad_norm": 1.029283665363141, "learning_rate": 7.352381268006848e-06, "loss": 0.5332, "step": 6763 }, { "epoch": 0.36500998327127515, "grad_norm": 1.0327972417398112, "learning_rate": 7.3516859817534436e-06, "loss": 0.4883, "step": 6764 }, { "epoch": 0.36506394689978955, "grad_norm": 0.8603522833494767, "learning_rate": 7.3509906422807145e-06, "loss": 0.3843, "step": 6765 }, { "epoch": 0.36511791052830395, "grad_norm": 0.9824524103640179, "learning_rate": 7.350295249608642e-06, "loss": 0.4324, "step": 6766 }, { "epoch": 0.3651718741568183, "grad_norm": 0.899226797922604, "learning_rate": 7.349599803757217e-06, "loss": 0.4087, "step": 6767 }, { "epoch": 0.3652258377853327, "grad_norm": 0.9153123616413975, "learning_rate": 7.348904304746424e-06, "loss": 0.3655, "step": 6768 }, { "epoch": 0.3652798014138471, "grad_norm": 0.8767536905973499, "learning_rate": 7.348208752596253e-06, "loss": 0.4583, "step": 6769 }, { "epoch": 0.3653337650423614, "grad_norm": 0.7768088193198115, "learning_rate": 7.347513147326696e-06, "loss": 0.3936, "step": 6770 }, { "epoch": 0.3653877286708758, "grad_norm": 0.9988745331901796, "learning_rate": 7.346817488957742e-06, "loss": 0.396, "step": 6771 }, { "epoch": 0.3654416922993902, "grad_norm": 1.1541971983895656, "learning_rate": 7.34612177750939e-06, "loss": 0.5361, "step": 6772 }, { "epoch": 0.3654956559279046, "grad_norm": 1.079823346857731, "learning_rate": 7.345426013001631e-06, "loss": 0.5324, "step": 6773 }, { "epoch": 0.36554961955641896, "grad_norm": 1.0177048358787961, "learning_rate": 7.344730195454463e-06, "loss": 0.5315, "step": 6774 }, { "epoch": 0.36560358318493336, "grad_norm": 0.919720585672862, "learning_rate": 7.344034324887888e-06, "loss": 0.412, "step": 6775 }, { "epoch": 0.36565754681344775, "grad_norm": 0.9423818155170164, "learning_rate": 7.3433384013219e-06, "loss": 0.4551, "step": 6776 }, { "epoch": 0.3657115104419621, "grad_norm": 1.0032051801446293, "learning_rate": 7.342642424776505e-06, "loss": 0.488, "step": 6777 }, { "epoch": 0.3657654740704765, "grad_norm": 1.245420045383414, "learning_rate": 7.341946395271705e-06, "loss": 0.5852, "step": 6778 }, { "epoch": 0.3658194376989909, "grad_norm": 1.0175203319425785, "learning_rate": 7.341250312827506e-06, "loss": 0.3845, "step": 6779 }, { "epoch": 0.3658734013275053, "grad_norm": 1.0922527579502934, "learning_rate": 7.34055417746391e-06, "loss": 0.5221, "step": 6780 }, { "epoch": 0.36592736495601963, "grad_norm": 1.1655791005276739, "learning_rate": 7.3398579892009295e-06, "loss": 0.6484, "step": 6781 }, { "epoch": 0.365981328584534, "grad_norm": 1.2719842826560663, "learning_rate": 7.3391617480585695e-06, "loss": 0.5424, "step": 6782 }, { "epoch": 0.3660352922130484, "grad_norm": 1.1313166981019636, "learning_rate": 7.338465454056843e-06, "loss": 0.6351, "step": 6783 }, { "epoch": 0.36608925584156277, "grad_norm": 0.8634822717469182, "learning_rate": 7.337769107215763e-06, "loss": 0.3591, "step": 6784 }, { "epoch": 0.36614321947007716, "grad_norm": 0.8793026577942592, "learning_rate": 7.337072707555342e-06, "loss": 0.3948, "step": 6785 }, { "epoch": 0.36619718309859156, "grad_norm": 0.9692756615984774, "learning_rate": 7.336376255095592e-06, "loss": 0.4096, "step": 6786 }, { "epoch": 0.3662511467271059, "grad_norm": 1.018498899543461, "learning_rate": 7.335679749856536e-06, "loss": 0.5883, "step": 6787 }, { "epoch": 0.3663051103556203, "grad_norm": 0.879902373920261, "learning_rate": 7.334983191858188e-06, "loss": 0.3519, "step": 6788 }, { "epoch": 0.3663590739841347, "grad_norm": 1.3018838435427131, "learning_rate": 7.33428658112057e-06, "loss": 0.6822, "step": 6789 }, { "epoch": 0.3664130376126491, "grad_norm": 1.1609380989156672, "learning_rate": 7.3335899176637004e-06, "loss": 0.5357, "step": 6790 }, { "epoch": 0.36646700124116344, "grad_norm": 0.9938051622691001, "learning_rate": 7.332893201507605e-06, "loss": 0.4368, "step": 6791 }, { "epoch": 0.36652096486967783, "grad_norm": 1.0330275915559484, "learning_rate": 7.3321964326723075e-06, "loss": 0.5243, "step": 6792 }, { "epoch": 0.36657492849819223, "grad_norm": 0.9081434788455025, "learning_rate": 7.331499611177832e-06, "loss": 0.3684, "step": 6793 }, { "epoch": 0.3666288921267066, "grad_norm": 1.0061672223206344, "learning_rate": 7.330802737044207e-06, "loss": 0.5064, "step": 6794 }, { "epoch": 0.36668285575522097, "grad_norm": 1.0520981249052732, "learning_rate": 7.330105810291462e-06, "loss": 0.4099, "step": 6795 }, { "epoch": 0.36673681938373537, "grad_norm": 0.9068023570325237, "learning_rate": 7.329408830939627e-06, "loss": 0.4412, "step": 6796 }, { "epoch": 0.36679078301224977, "grad_norm": 0.8533863335025363, "learning_rate": 7.328711799008733e-06, "loss": 0.3613, "step": 6797 }, { "epoch": 0.3668447466407641, "grad_norm": 1.0696801033942818, "learning_rate": 7.328014714518816e-06, "loss": 0.5622, "step": 6798 }, { "epoch": 0.3668987102692785, "grad_norm": 1.0404485813062858, "learning_rate": 7.327317577489908e-06, "loss": 0.4903, "step": 6799 }, { "epoch": 0.3669526738977929, "grad_norm": 0.919094672518673, "learning_rate": 7.326620387942045e-06, "loss": 0.4139, "step": 6800 }, { "epoch": 0.36700663752630724, "grad_norm": 1.1653548100057403, "learning_rate": 7.325923145895268e-06, "loss": 0.5486, "step": 6801 }, { "epoch": 0.36706060115482164, "grad_norm": 1.1840614639628242, "learning_rate": 7.325225851369614e-06, "loss": 0.5902, "step": 6802 }, { "epoch": 0.36711456478333604, "grad_norm": 1.0502965559448754, "learning_rate": 7.3245285043851255e-06, "loss": 0.5338, "step": 6803 }, { "epoch": 0.36716852841185044, "grad_norm": 1.223795166334336, "learning_rate": 7.323831104961845e-06, "loss": 0.6876, "step": 6804 }, { "epoch": 0.3672224920403648, "grad_norm": 0.9326514393996463, "learning_rate": 7.323133653119814e-06, "loss": 0.4537, "step": 6805 }, { "epoch": 0.3672764556688792, "grad_norm": 0.8687848273581751, "learning_rate": 7.322436148879079e-06, "loss": 0.4288, "step": 6806 }, { "epoch": 0.3673304192973936, "grad_norm": 1.106684911721268, "learning_rate": 7.321738592259689e-06, "loss": 0.5906, "step": 6807 }, { "epoch": 0.3673843829259079, "grad_norm": 1.0910248129547129, "learning_rate": 7.321040983281692e-06, "loss": 0.6057, "step": 6808 }, { "epoch": 0.3674383465544223, "grad_norm": 1.223105410979182, "learning_rate": 7.320343321965136e-06, "loss": 0.5443, "step": 6809 }, { "epoch": 0.3674923101829367, "grad_norm": 1.0586091743606716, "learning_rate": 7.319645608330074e-06, "loss": 0.519, "step": 6810 }, { "epoch": 0.3675462738114511, "grad_norm": 0.8879723847207124, "learning_rate": 7.318947842396558e-06, "loss": 0.3314, "step": 6811 }, { "epoch": 0.36760023743996545, "grad_norm": 0.9014242052605601, "learning_rate": 7.3182500241846446e-06, "loss": 0.3639, "step": 6812 }, { "epoch": 0.36765420106847985, "grad_norm": 0.9510600066553361, "learning_rate": 7.3175521537143855e-06, "loss": 0.4216, "step": 6813 }, { "epoch": 0.36770816469699424, "grad_norm": 1.1350347922085335, "learning_rate": 7.316854231005842e-06, "loss": 0.4323, "step": 6814 }, { "epoch": 0.3677621283255086, "grad_norm": 1.0833143899584625, "learning_rate": 7.316156256079075e-06, "loss": 0.5838, "step": 6815 }, { "epoch": 0.367816091954023, "grad_norm": 1.2685845869287136, "learning_rate": 7.31545822895414e-06, "loss": 0.7139, "step": 6816 }, { "epoch": 0.3678700555825374, "grad_norm": 0.8756989809879977, "learning_rate": 7.314760149651099e-06, "loss": 0.4728, "step": 6817 }, { "epoch": 0.3679240192110518, "grad_norm": 0.9481467330858468, "learning_rate": 7.31406201819002e-06, "loss": 0.4305, "step": 6818 }, { "epoch": 0.3679779828395661, "grad_norm": 0.9881342203174235, "learning_rate": 7.3133638345909665e-06, "loss": 0.4666, "step": 6819 }, { "epoch": 0.3680319464680805, "grad_norm": 0.9640593117887198, "learning_rate": 7.312665598874001e-06, "loss": 0.4426, "step": 6820 }, { "epoch": 0.3680859100965949, "grad_norm": 1.0341250681327931, "learning_rate": 7.311967311059197e-06, "loss": 0.4275, "step": 6821 }, { "epoch": 0.36813987372510926, "grad_norm": 0.9227435855685743, "learning_rate": 7.3112689711666215e-06, "loss": 0.5436, "step": 6822 }, { "epoch": 0.36819383735362365, "grad_norm": 0.8982453947467975, "learning_rate": 7.310570579216345e-06, "loss": 0.4178, "step": 6823 }, { "epoch": 0.36824780098213805, "grad_norm": 1.063232771217943, "learning_rate": 7.309872135228441e-06, "loss": 0.5482, "step": 6824 }, { "epoch": 0.3683017646106524, "grad_norm": 0.8710197797161899, "learning_rate": 7.309173639222983e-06, "loss": 0.5107, "step": 6825 }, { "epoch": 0.3683557282391668, "grad_norm": 0.9562294351354688, "learning_rate": 7.308475091220047e-06, "loss": 0.4227, "step": 6826 }, { "epoch": 0.3684096918676812, "grad_norm": 0.8647796533499764, "learning_rate": 7.307776491239709e-06, "loss": 0.4426, "step": 6827 }, { "epoch": 0.3684636554961956, "grad_norm": 1.026384558891628, "learning_rate": 7.307077839302048e-06, "loss": 0.48, "step": 6828 }, { "epoch": 0.3685176191247099, "grad_norm": 0.9938155297542473, "learning_rate": 7.306379135427144e-06, "loss": 0.4332, "step": 6829 }, { "epoch": 0.3685715827532243, "grad_norm": 0.8363350697339719, "learning_rate": 7.305680379635078e-06, "loss": 0.3891, "step": 6830 }, { "epoch": 0.3686255463817387, "grad_norm": 1.0187486961096952, "learning_rate": 7.304981571945933e-06, "loss": 0.4412, "step": 6831 }, { "epoch": 0.36867951001025306, "grad_norm": 0.9464343226330632, "learning_rate": 7.304282712379796e-06, "loss": 0.3759, "step": 6832 }, { "epoch": 0.36873347363876746, "grad_norm": 1.1792880015819223, "learning_rate": 7.3035838009567495e-06, "loss": 0.5158, "step": 6833 }, { "epoch": 0.36878743726728186, "grad_norm": 0.9018161663533822, "learning_rate": 7.302884837696881e-06, "loss": 0.4473, "step": 6834 }, { "epoch": 0.36884140089579626, "grad_norm": 0.8056861465554771, "learning_rate": 7.302185822620281e-06, "loss": 0.3965, "step": 6835 }, { "epoch": 0.3688953645243106, "grad_norm": 1.1069205291340383, "learning_rate": 7.301486755747041e-06, "loss": 0.4831, "step": 6836 }, { "epoch": 0.368949328152825, "grad_norm": 0.8899513588353588, "learning_rate": 7.300787637097251e-06, "loss": 0.3716, "step": 6837 }, { "epoch": 0.3690032917813394, "grad_norm": 0.698910719433765, "learning_rate": 7.3000884666910045e-06, "loss": 0.2599, "step": 6838 }, { "epoch": 0.36905725540985374, "grad_norm": 1.0015956398595878, "learning_rate": 7.299389244548396e-06, "loss": 0.4188, "step": 6839 }, { "epoch": 0.36911121903836813, "grad_norm": 1.1282404778416344, "learning_rate": 7.298689970689522e-06, "loss": 0.5693, "step": 6840 }, { "epoch": 0.36916518266688253, "grad_norm": 0.9199500199619473, "learning_rate": 7.297990645134483e-06, "loss": 0.4674, "step": 6841 }, { "epoch": 0.3692191462953969, "grad_norm": 1.0239895821875582, "learning_rate": 7.2972912679033735e-06, "loss": 0.4962, "step": 6842 }, { "epoch": 0.36927310992391127, "grad_norm": 1.0135742176839524, "learning_rate": 7.296591839016299e-06, "loss": 0.5337, "step": 6843 }, { "epoch": 0.36932707355242567, "grad_norm": 1.0702536271566394, "learning_rate": 7.295892358493358e-06, "loss": 0.4959, "step": 6844 }, { "epoch": 0.36938103718094006, "grad_norm": 1.1999581757319084, "learning_rate": 7.295192826354657e-06, "loss": 0.5454, "step": 6845 }, { "epoch": 0.3694350008094544, "grad_norm": 0.9430599959552601, "learning_rate": 7.294493242620301e-06, "loss": 0.3899, "step": 6846 }, { "epoch": 0.3694889644379688, "grad_norm": 0.9974049073239135, "learning_rate": 7.293793607310396e-06, "loss": 0.4955, "step": 6847 }, { "epoch": 0.3695429280664832, "grad_norm": 0.9805359863885424, "learning_rate": 7.293093920445048e-06, "loss": 0.3468, "step": 6848 }, { "epoch": 0.3695968916949976, "grad_norm": 0.6685714629164718, "learning_rate": 7.292394182044371e-06, "loss": 0.2802, "step": 6849 }, { "epoch": 0.36965085532351194, "grad_norm": 1.1984384060536521, "learning_rate": 7.291694392128474e-06, "loss": 0.596, "step": 6850 }, { "epoch": 0.36970481895202634, "grad_norm": 1.0330509015130926, "learning_rate": 7.290994550717468e-06, "loss": 0.5566, "step": 6851 }, { "epoch": 0.36975878258054073, "grad_norm": 0.9591512299014374, "learning_rate": 7.290294657831471e-06, "loss": 0.3498, "step": 6852 }, { "epoch": 0.3698127462090551, "grad_norm": 0.9684663808235012, "learning_rate": 7.289594713490595e-06, "loss": 0.4258, "step": 6853 }, { "epoch": 0.3698667098375695, "grad_norm": 1.3749001239394132, "learning_rate": 7.28889471771496e-06, "loss": 0.6131, "step": 6854 }, { "epoch": 0.36992067346608387, "grad_norm": 1.1857924658353167, "learning_rate": 7.288194670524682e-06, "loss": 0.5378, "step": 6855 }, { "epoch": 0.3699746370945982, "grad_norm": 0.8308104950786264, "learning_rate": 7.287494571939884e-06, "loss": 0.3556, "step": 6856 }, { "epoch": 0.3700286007231126, "grad_norm": 0.9826560432428025, "learning_rate": 7.286794421980684e-06, "loss": 0.4588, "step": 6857 }, { "epoch": 0.370082564351627, "grad_norm": 1.0942386209436197, "learning_rate": 7.286094220667208e-06, "loss": 0.7319, "step": 6858 }, { "epoch": 0.3701365279801414, "grad_norm": 1.0793873734521517, "learning_rate": 7.28539396801958e-06, "loss": 0.539, "step": 6859 }, { "epoch": 0.37019049160865575, "grad_norm": 1.0280388303082675, "learning_rate": 7.284693664057923e-06, "loss": 0.4153, "step": 6860 }, { "epoch": 0.37024445523717014, "grad_norm": 0.9995849166646903, "learning_rate": 7.28399330880237e-06, "loss": 0.4356, "step": 6861 }, { "epoch": 0.37029841886568454, "grad_norm": 1.0563650370088837, "learning_rate": 7.2832929022730445e-06, "loss": 0.5329, "step": 6862 }, { "epoch": 0.3703523824941989, "grad_norm": 0.926292493214605, "learning_rate": 7.282592444490081e-06, "loss": 0.3783, "step": 6863 }, { "epoch": 0.3704063461227133, "grad_norm": 0.9241841036465496, "learning_rate": 7.281891935473607e-06, "loss": 0.3038, "step": 6864 }, { "epoch": 0.3704603097512277, "grad_norm": 1.0562348090282008, "learning_rate": 7.281191375243759e-06, "loss": 0.5533, "step": 6865 }, { "epoch": 0.3705142733797421, "grad_norm": 1.1850226921764784, "learning_rate": 7.280490763820671e-06, "loss": 0.4244, "step": 6866 }, { "epoch": 0.3705682370082564, "grad_norm": 1.2534287872455863, "learning_rate": 7.279790101224481e-06, "loss": 0.4805, "step": 6867 }, { "epoch": 0.3706222006367708, "grad_norm": 0.9391626210322489, "learning_rate": 7.2790893874753244e-06, "loss": 0.3739, "step": 6868 }, { "epoch": 0.3706761642652852, "grad_norm": 1.195295820641455, "learning_rate": 7.27838862259334e-06, "loss": 0.5032, "step": 6869 }, { "epoch": 0.37073012789379955, "grad_norm": 1.1516459235652914, "learning_rate": 7.277687806598671e-06, "loss": 0.5955, "step": 6870 }, { "epoch": 0.37078409152231395, "grad_norm": 0.9614959256784787, "learning_rate": 7.276986939511458e-06, "loss": 0.478, "step": 6871 }, { "epoch": 0.37083805515082835, "grad_norm": 1.0052149746700163, "learning_rate": 7.276286021351844e-06, "loss": 0.4797, "step": 6872 }, { "epoch": 0.37089201877934275, "grad_norm": 1.1422647860935524, "learning_rate": 7.275585052139975e-06, "loss": 0.5694, "step": 6873 }, { "epoch": 0.3709459824078571, "grad_norm": 1.1802395162122075, "learning_rate": 7.274884031895999e-06, "loss": 0.5292, "step": 6874 }, { "epoch": 0.3709999460363715, "grad_norm": 1.016477199622449, "learning_rate": 7.27418296064006e-06, "loss": 0.4325, "step": 6875 }, { "epoch": 0.3710539096648859, "grad_norm": 0.8485650411447343, "learning_rate": 7.273481838392312e-06, "loss": 0.3347, "step": 6876 }, { "epoch": 0.3711078732934002, "grad_norm": 1.2880122715545421, "learning_rate": 7.2727806651729045e-06, "loss": 0.6218, "step": 6877 }, { "epoch": 0.3711618369219146, "grad_norm": 1.3238818269936965, "learning_rate": 7.272079441001988e-06, "loss": 0.5616, "step": 6878 }, { "epoch": 0.371215800550429, "grad_norm": 0.9741192111599163, "learning_rate": 7.271378165899718e-06, "loss": 0.4729, "step": 6879 }, { "epoch": 0.3712697641789434, "grad_norm": 0.9497978842002315, "learning_rate": 7.270676839886251e-06, "loss": 0.4322, "step": 6880 }, { "epoch": 0.37132372780745776, "grad_norm": 0.9180193116411299, "learning_rate": 7.26997546298174e-06, "loss": 0.4834, "step": 6881 }, { "epoch": 0.37137769143597216, "grad_norm": 0.9810016952992067, "learning_rate": 7.269274035206347e-06, "loss": 0.3654, "step": 6882 }, { "epoch": 0.37143165506448655, "grad_norm": 0.9305524799307355, "learning_rate": 7.268572556580231e-06, "loss": 0.4371, "step": 6883 }, { "epoch": 0.3714856186930009, "grad_norm": 1.083050374895501, "learning_rate": 7.2678710271235545e-06, "loss": 0.512, "step": 6884 }, { "epoch": 0.3715395823215153, "grad_norm": 0.9474965006218362, "learning_rate": 7.267169446856475e-06, "loss": 0.4707, "step": 6885 }, { "epoch": 0.3715935459500297, "grad_norm": 1.3560878492325625, "learning_rate": 7.26646781579916e-06, "loss": 0.6223, "step": 6886 }, { "epoch": 0.3716475095785441, "grad_norm": 1.2013716884832917, "learning_rate": 7.265766133971777e-06, "loss": 0.6074, "step": 6887 }, { "epoch": 0.37170147320705843, "grad_norm": 0.8049158069036503, "learning_rate": 7.26506440139449e-06, "loss": 0.3855, "step": 6888 }, { "epoch": 0.37175543683557283, "grad_norm": 1.151713368184195, "learning_rate": 7.2643626180874705e-06, "loss": 0.6237, "step": 6889 }, { "epoch": 0.3718094004640872, "grad_norm": 1.1672530263551806, "learning_rate": 7.263660784070884e-06, "loss": 0.4497, "step": 6890 }, { "epoch": 0.37186336409260157, "grad_norm": 0.9076097961775188, "learning_rate": 7.262958899364907e-06, "loss": 0.3824, "step": 6891 }, { "epoch": 0.37191732772111596, "grad_norm": 1.2590068386288806, "learning_rate": 7.262256963989709e-06, "loss": 0.6934, "step": 6892 }, { "epoch": 0.37197129134963036, "grad_norm": 1.0511018630651543, "learning_rate": 7.261554977965464e-06, "loss": 0.5735, "step": 6893 }, { "epoch": 0.3720252549781447, "grad_norm": 0.8467117673473454, "learning_rate": 7.260852941312351e-06, "loss": 0.487, "step": 6894 }, { "epoch": 0.3720792186066591, "grad_norm": 1.1022197033192875, "learning_rate": 7.260150854050543e-06, "loss": 0.5453, "step": 6895 }, { "epoch": 0.3721331822351735, "grad_norm": 1.2078930120898936, "learning_rate": 7.259448716200223e-06, "loss": 0.5233, "step": 6896 }, { "epoch": 0.3721871458636879, "grad_norm": 1.036284215848604, "learning_rate": 7.258746527781567e-06, "loss": 0.5514, "step": 6897 }, { "epoch": 0.37224110949220224, "grad_norm": 1.1762736776713212, "learning_rate": 7.258044288814761e-06, "loss": 0.569, "step": 6898 }, { "epoch": 0.37229507312071664, "grad_norm": 0.9654807400439736, "learning_rate": 7.257341999319985e-06, "loss": 0.3936, "step": 6899 }, { "epoch": 0.37234903674923103, "grad_norm": 0.872319520455724, "learning_rate": 7.256639659317424e-06, "loss": 0.3645, "step": 6900 }, { "epoch": 0.3724030003777454, "grad_norm": 0.8793321009032167, "learning_rate": 7.255937268827264e-06, "loss": 0.4179, "step": 6901 }, { "epoch": 0.37245696400625977, "grad_norm": 0.6308134596303165, "learning_rate": 7.255234827869693e-06, "loss": 0.2526, "step": 6902 }, { "epoch": 0.37251092763477417, "grad_norm": 1.1208581845341734, "learning_rate": 7.254532336464899e-06, "loss": 0.3858, "step": 6903 }, { "epoch": 0.37256489126328857, "grad_norm": 1.0487431509815637, "learning_rate": 7.253829794633073e-06, "loss": 0.4305, "step": 6904 }, { "epoch": 0.3726188548918029, "grad_norm": 0.9606937545923605, "learning_rate": 7.253127202394407e-06, "loss": 0.5142, "step": 6905 }, { "epoch": 0.3726728185203173, "grad_norm": 1.1876231003642062, "learning_rate": 7.252424559769092e-06, "loss": 0.4935, "step": 6906 }, { "epoch": 0.3727267821488317, "grad_norm": 1.1160672901766309, "learning_rate": 7.251721866777326e-06, "loss": 0.4706, "step": 6907 }, { "epoch": 0.37278074577734605, "grad_norm": 1.427491097467988, "learning_rate": 7.251019123439305e-06, "loss": 0.6918, "step": 6908 }, { "epoch": 0.37283470940586044, "grad_norm": 0.8796965688808642, "learning_rate": 7.250316329775223e-06, "loss": 0.442, "step": 6909 }, { "epoch": 0.37288867303437484, "grad_norm": 1.0571475014453204, "learning_rate": 7.249613485805282e-06, "loss": 0.5762, "step": 6910 }, { "epoch": 0.37294263666288924, "grad_norm": 0.8377632021848902, "learning_rate": 7.2489105915496805e-06, "loss": 0.3352, "step": 6911 }, { "epoch": 0.3729966002914036, "grad_norm": 1.2836702198123393, "learning_rate": 7.248207647028624e-06, "loss": 0.5446, "step": 6912 }, { "epoch": 0.373050563919918, "grad_norm": 1.048118427575878, "learning_rate": 7.247504652262312e-06, "loss": 0.4362, "step": 6913 }, { "epoch": 0.3731045275484324, "grad_norm": 1.0222376806806541, "learning_rate": 7.246801607270949e-06, "loss": 0.4374, "step": 6914 }, { "epoch": 0.3731584911769467, "grad_norm": 0.9966544161430414, "learning_rate": 7.246098512074745e-06, "loss": 0.4106, "step": 6915 }, { "epoch": 0.3732124548054611, "grad_norm": 0.9545944967871358, "learning_rate": 7.245395366693903e-06, "loss": 0.4766, "step": 6916 }, { "epoch": 0.3732664184339755, "grad_norm": 0.9999704765408531, "learning_rate": 7.2446921711486364e-06, "loss": 0.3495, "step": 6917 }, { "epoch": 0.3733203820624899, "grad_norm": 1.310731377898673, "learning_rate": 7.243988925459154e-06, "loss": 0.6372, "step": 6918 }, { "epoch": 0.37337434569100425, "grad_norm": 0.9129002855118924, "learning_rate": 7.243285629645667e-06, "loss": 0.403, "step": 6919 }, { "epoch": 0.37342830931951865, "grad_norm": 0.9680231435862053, "learning_rate": 7.24258228372839e-06, "loss": 0.4785, "step": 6920 }, { "epoch": 0.37348227294803304, "grad_norm": 1.0513572833424338, "learning_rate": 7.241878887727537e-06, "loss": 0.3996, "step": 6921 }, { "epoch": 0.3735362365765474, "grad_norm": 1.3077776428608345, "learning_rate": 7.2411754416633255e-06, "loss": 0.5792, "step": 6922 }, { "epoch": 0.3735902002050618, "grad_norm": 0.8118315962513533, "learning_rate": 7.240471945555971e-06, "loss": 0.5007, "step": 6923 }, { "epoch": 0.3736441638335762, "grad_norm": 0.9665198880486371, "learning_rate": 7.239768399425695e-06, "loss": 0.3869, "step": 6924 }, { "epoch": 0.3736981274620905, "grad_norm": 1.063033097673364, "learning_rate": 7.239064803292718e-06, "loss": 0.5949, "step": 6925 }, { "epoch": 0.3737520910906049, "grad_norm": 0.7361857673166283, "learning_rate": 7.238361157177261e-06, "loss": 0.2602, "step": 6926 }, { "epoch": 0.3738060547191193, "grad_norm": 0.9562627325551762, "learning_rate": 7.2376574610995475e-06, "loss": 0.4432, "step": 6927 }, { "epoch": 0.3738600183476337, "grad_norm": 0.9847762688950488, "learning_rate": 7.2369537150798035e-06, "loss": 0.4827, "step": 6928 }, { "epoch": 0.37391398197614806, "grad_norm": 0.8316062011741119, "learning_rate": 7.236249919138253e-06, "loss": 0.3833, "step": 6929 }, { "epoch": 0.37396794560466246, "grad_norm": 1.277086817830862, "learning_rate": 7.235546073295127e-06, "loss": 0.6237, "step": 6930 }, { "epoch": 0.37402190923317685, "grad_norm": 1.0772332081083147, "learning_rate": 7.234842177570653e-06, "loss": 0.5001, "step": 6931 }, { "epoch": 0.3740758728616912, "grad_norm": 1.0876688477877543, "learning_rate": 7.2341382319850606e-06, "loss": 0.6204, "step": 6932 }, { "epoch": 0.3741298364902056, "grad_norm": 0.9715396695637002, "learning_rate": 7.2334342365585845e-06, "loss": 0.5307, "step": 6933 }, { "epoch": 0.37418380011872, "grad_norm": 0.8099095639290982, "learning_rate": 7.232730191311456e-06, "loss": 0.4243, "step": 6934 }, { "epoch": 0.3742377637472344, "grad_norm": 1.1073060570472009, "learning_rate": 7.232026096263911e-06, "loss": 0.6736, "step": 6935 }, { "epoch": 0.37429172737574873, "grad_norm": 0.9519820450138246, "learning_rate": 7.2313219514361875e-06, "loss": 0.457, "step": 6936 }, { "epoch": 0.3743456910042631, "grad_norm": 0.8273514978522809, "learning_rate": 7.230617756848521e-06, "loss": 0.3054, "step": 6937 }, { "epoch": 0.3743996546327775, "grad_norm": 1.1390732580964629, "learning_rate": 7.2299135125211505e-06, "loss": 0.5186, "step": 6938 }, { "epoch": 0.37445361826129187, "grad_norm": 1.0343797298995598, "learning_rate": 7.22920921847432e-06, "loss": 0.5556, "step": 6939 }, { "epoch": 0.37450758188980626, "grad_norm": 1.1886789778263926, "learning_rate": 7.228504874728266e-06, "loss": 0.5518, "step": 6940 }, { "epoch": 0.37456154551832066, "grad_norm": 0.9866180615249266, "learning_rate": 7.227800481303238e-06, "loss": 0.5597, "step": 6941 }, { "epoch": 0.37461550914683506, "grad_norm": 1.223121878808218, "learning_rate": 7.227096038219477e-06, "loss": 0.5435, "step": 6942 }, { "epoch": 0.3746694727753494, "grad_norm": 1.140965152571399, "learning_rate": 7.226391545497232e-06, "loss": 0.5376, "step": 6943 }, { "epoch": 0.3747234364038638, "grad_norm": 0.852521675509372, "learning_rate": 7.225687003156749e-06, "loss": 0.3093, "step": 6944 }, { "epoch": 0.3747774000323782, "grad_norm": 0.9306056139332489, "learning_rate": 7.224982411218278e-06, "loss": 0.4905, "step": 6945 }, { "epoch": 0.37483136366089254, "grad_norm": 1.1901022228595592, "learning_rate": 7.22427776970207e-06, "loss": 0.4879, "step": 6946 }, { "epoch": 0.37488532728940693, "grad_norm": 0.8133459499009361, "learning_rate": 7.223573078628375e-06, "loss": 0.3526, "step": 6947 }, { "epoch": 0.37493929091792133, "grad_norm": 0.9415855804708764, "learning_rate": 7.222868338017449e-06, "loss": 0.4503, "step": 6948 }, { "epoch": 0.37499325454643573, "grad_norm": 0.9265492366376306, "learning_rate": 7.222163547889546e-06, "loss": 0.4509, "step": 6949 }, { "epoch": 0.37504721817495007, "grad_norm": 1.3150095320973068, "learning_rate": 7.221458708264923e-06, "loss": 0.5171, "step": 6950 }, { "epoch": 0.37510118180346447, "grad_norm": 1.2551290560767592, "learning_rate": 7.220753819163838e-06, "loss": 0.5046, "step": 6951 }, { "epoch": 0.37515514543197886, "grad_norm": 0.898760396950055, "learning_rate": 7.220048880606548e-06, "loss": 0.3871, "step": 6952 }, { "epoch": 0.3752091090604932, "grad_norm": 1.1307511394296448, "learning_rate": 7.219343892613316e-06, "loss": 0.6383, "step": 6953 }, { "epoch": 0.3752630726890076, "grad_norm": 1.1146084760259063, "learning_rate": 7.218638855204403e-06, "loss": 0.3827, "step": 6954 }, { "epoch": 0.375317036317522, "grad_norm": 0.9600914785032998, "learning_rate": 7.217933768400072e-06, "loss": 0.4991, "step": 6955 }, { "epoch": 0.3753709999460364, "grad_norm": 0.8010863772686861, "learning_rate": 7.21722863222059e-06, "loss": 0.4678, "step": 6956 }, { "epoch": 0.37542496357455074, "grad_norm": 0.9864037905308912, "learning_rate": 7.216523446686221e-06, "loss": 0.4681, "step": 6957 }, { "epoch": 0.37547892720306514, "grad_norm": 0.9902117511594252, "learning_rate": 7.215818211817235e-06, "loss": 0.4924, "step": 6958 }, { "epoch": 0.37553289083157954, "grad_norm": 0.8551210867602257, "learning_rate": 7.215112927633898e-06, "loss": 0.4244, "step": 6959 }, { "epoch": 0.3755868544600939, "grad_norm": 1.18228049747753, "learning_rate": 7.2144075941564835e-06, "loss": 0.5987, "step": 6960 }, { "epoch": 0.3756408180886083, "grad_norm": 1.1942132227087285, "learning_rate": 7.213702211405263e-06, "loss": 0.5088, "step": 6961 }, { "epoch": 0.37569478171712267, "grad_norm": 1.0980434364519334, "learning_rate": 7.212996779400508e-06, "loss": 0.5012, "step": 6962 }, { "epoch": 0.375748745345637, "grad_norm": 0.8752062709162536, "learning_rate": 7.212291298162497e-06, "loss": 0.3579, "step": 6963 }, { "epoch": 0.3758027089741514, "grad_norm": 1.0596314658242807, "learning_rate": 7.211585767711502e-06, "loss": 0.488, "step": 6964 }, { "epoch": 0.3758566726026658, "grad_norm": 0.8897832942721821, "learning_rate": 7.210880188067803e-06, "loss": 0.4717, "step": 6965 }, { "epoch": 0.3759106362311802, "grad_norm": 1.1883846321806144, "learning_rate": 7.210174559251677e-06, "loss": 0.5669, "step": 6966 }, { "epoch": 0.37596459985969455, "grad_norm": 1.2266080684763248, "learning_rate": 7.209468881283409e-06, "loss": 0.5221, "step": 6967 }, { "epoch": 0.37601856348820895, "grad_norm": 0.9804230657604749, "learning_rate": 7.208763154183277e-06, "loss": 0.472, "step": 6968 }, { "epoch": 0.37607252711672334, "grad_norm": 1.122334968323653, "learning_rate": 7.208057377971565e-06, "loss": 0.5787, "step": 6969 }, { "epoch": 0.3761264907452377, "grad_norm": 0.9331703277946313, "learning_rate": 7.207351552668559e-06, "loss": 0.4536, "step": 6970 }, { "epoch": 0.3761804543737521, "grad_norm": 0.9659017422710044, "learning_rate": 7.2066456782945435e-06, "loss": 0.504, "step": 6971 }, { "epoch": 0.3762344180022665, "grad_norm": 1.100575609088681, "learning_rate": 7.205939754869807e-06, "loss": 0.5687, "step": 6972 }, { "epoch": 0.3762883816307809, "grad_norm": 0.6354951599506269, "learning_rate": 7.205233782414638e-06, "loss": 0.2376, "step": 6973 }, { "epoch": 0.3763423452592952, "grad_norm": 1.055794778160967, "learning_rate": 7.204527760949328e-06, "loss": 0.6155, "step": 6974 }, { "epoch": 0.3763963088878096, "grad_norm": 1.0610542222257762, "learning_rate": 7.203821690494166e-06, "loss": 0.5295, "step": 6975 }, { "epoch": 0.376450272516324, "grad_norm": 0.9284585272566351, "learning_rate": 7.203115571069449e-06, "loss": 0.4823, "step": 6976 }, { "epoch": 0.37650423614483836, "grad_norm": 1.1248439335483837, "learning_rate": 7.20240940269547e-06, "loss": 0.6971, "step": 6977 }, { "epoch": 0.37655819977335275, "grad_norm": 0.8955474565088192, "learning_rate": 7.201703185392523e-06, "loss": 0.4394, "step": 6978 }, { "epoch": 0.37661216340186715, "grad_norm": 0.8675733039893668, "learning_rate": 7.200996919180906e-06, "loss": 0.364, "step": 6979 }, { "epoch": 0.37666612703038155, "grad_norm": 0.8343671115061105, "learning_rate": 7.200290604080919e-06, "loss": 0.3756, "step": 6980 }, { "epoch": 0.3767200906588959, "grad_norm": 0.8942856079959411, "learning_rate": 7.199584240112865e-06, "loss": 0.3729, "step": 6981 }, { "epoch": 0.3767740542874103, "grad_norm": 1.0885419410682318, "learning_rate": 7.198877827297039e-06, "loss": 0.5373, "step": 6982 }, { "epoch": 0.3768280179159247, "grad_norm": 0.9875712028888981, "learning_rate": 7.19817136565375e-06, "loss": 0.4567, "step": 6983 }, { "epoch": 0.376881981544439, "grad_norm": 1.2221349923454623, "learning_rate": 7.1974648552033e-06, "loss": 0.6445, "step": 6984 }, { "epoch": 0.3769359451729534, "grad_norm": 1.0718006483040892, "learning_rate": 7.196758295965994e-06, "loss": 0.494, "step": 6985 }, { "epoch": 0.3769899088014678, "grad_norm": 0.9588058399856357, "learning_rate": 7.1960516879621415e-06, "loss": 0.4885, "step": 6986 }, { "epoch": 0.3770438724299822, "grad_norm": 0.9238615292036465, "learning_rate": 7.195345031212048e-06, "loss": 0.4155, "step": 6987 }, { "epoch": 0.37709783605849656, "grad_norm": 1.1020279510813313, "learning_rate": 7.1946383257360264e-06, "loss": 0.5185, "step": 6988 }, { "epoch": 0.37715179968701096, "grad_norm": 1.1048907541370179, "learning_rate": 7.193931571554387e-06, "loss": 0.4229, "step": 6989 }, { "epoch": 0.37720576331552536, "grad_norm": 1.0209909538555413, "learning_rate": 7.193224768687443e-06, "loss": 0.5509, "step": 6990 }, { "epoch": 0.3772597269440397, "grad_norm": 1.039764491174435, "learning_rate": 7.1925179171555075e-06, "loss": 0.6809, "step": 6991 }, { "epoch": 0.3773136905725541, "grad_norm": 0.8557102767080685, "learning_rate": 7.191811016978896e-06, "loss": 0.4039, "step": 6992 }, { "epoch": 0.3773676542010685, "grad_norm": 1.0941310598784544, "learning_rate": 7.191104068177927e-06, "loss": 0.5908, "step": 6993 }, { "epoch": 0.37742161782958283, "grad_norm": 0.9379140360496497, "learning_rate": 7.19039707077292e-06, "loss": 0.4297, "step": 6994 }, { "epoch": 0.37747558145809723, "grad_norm": 0.9177692414666432, "learning_rate": 7.189690024784192e-06, "loss": 0.3998, "step": 6995 }, { "epoch": 0.37752954508661163, "grad_norm": 1.1018460862449784, "learning_rate": 7.188982930232066e-06, "loss": 0.3711, "step": 6996 }, { "epoch": 0.377583508715126, "grad_norm": 1.1596327361759011, "learning_rate": 7.188275787136862e-06, "loss": 0.5013, "step": 6997 }, { "epoch": 0.37763747234364037, "grad_norm": 1.0455865358557612, "learning_rate": 7.187568595518908e-06, "loss": 0.4654, "step": 6998 }, { "epoch": 0.37769143597215477, "grad_norm": 0.8575421319902543, "learning_rate": 7.186861355398526e-06, "loss": 0.3939, "step": 6999 }, { "epoch": 0.37774539960066916, "grad_norm": 1.1218358121342649, "learning_rate": 7.186154066796045e-06, "loss": 0.5401, "step": 7000 }, { "epoch": 0.37774539960066916, "eval_loss": 0.5605905055999756, "eval_runtime": 162.8616, "eval_samples_per_second": 21.116, "eval_steps_per_second": 0.884, "step": 7000 }, { "epoch": 0.3777993632291835, "grad_norm": 1.1560634531374228, "learning_rate": 7.1854467297317935e-06, "loss": 0.4905, "step": 7001 }, { "epoch": 0.3778533268576979, "grad_norm": 1.0053200433683418, "learning_rate": 7.184739344226098e-06, "loss": 0.5393, "step": 7002 }, { "epoch": 0.3779072904862123, "grad_norm": 1.1615479575939966, "learning_rate": 7.184031910299292e-06, "loss": 0.587, "step": 7003 }, { "epoch": 0.3779612541147267, "grad_norm": 1.0279025794405205, "learning_rate": 7.183324427971707e-06, "loss": 0.5261, "step": 7004 }, { "epoch": 0.37801521774324104, "grad_norm": 0.9725617125236297, "learning_rate": 7.182616897263678e-06, "loss": 0.5119, "step": 7005 }, { "epoch": 0.37806918137175544, "grad_norm": 1.1637327927669623, "learning_rate": 7.181909318195539e-06, "loss": 0.5084, "step": 7006 }, { "epoch": 0.37812314500026983, "grad_norm": 1.2187877283703035, "learning_rate": 7.181201690787626e-06, "loss": 0.5985, "step": 7007 }, { "epoch": 0.3781771086287842, "grad_norm": 1.1171912107555106, "learning_rate": 7.180494015060278e-06, "loss": 0.4276, "step": 7008 }, { "epoch": 0.3782310722572986, "grad_norm": 0.8828222769684416, "learning_rate": 7.179786291033834e-06, "loss": 0.3548, "step": 7009 }, { "epoch": 0.37828503588581297, "grad_norm": 0.8914654082081784, "learning_rate": 7.179078518728635e-06, "loss": 0.3229, "step": 7010 }, { "epoch": 0.37833899951432737, "grad_norm": 0.9884145728287924, "learning_rate": 7.178370698165022e-06, "loss": 0.485, "step": 7011 }, { "epoch": 0.3783929631428417, "grad_norm": 1.042006120200116, "learning_rate": 7.177662829363342e-06, "loss": 0.6208, "step": 7012 }, { "epoch": 0.3784469267713561, "grad_norm": 1.0842866049885342, "learning_rate": 7.176954912343934e-06, "loss": 0.5097, "step": 7013 }, { "epoch": 0.3785008903998705, "grad_norm": 1.17449811129165, "learning_rate": 7.176246947127148e-06, "loss": 0.6594, "step": 7014 }, { "epoch": 0.37855485402838485, "grad_norm": 0.9878203238988538, "learning_rate": 7.175538933733332e-06, "loss": 0.4787, "step": 7015 }, { "epoch": 0.37860881765689924, "grad_norm": 0.959912623142717, "learning_rate": 7.174830872182833e-06, "loss": 0.4025, "step": 7016 }, { "epoch": 0.37866278128541364, "grad_norm": 0.9555376274760874, "learning_rate": 7.174122762496003e-06, "loss": 0.6747, "step": 7017 }, { "epoch": 0.37871674491392804, "grad_norm": 1.0175567596831172, "learning_rate": 7.173414604693193e-06, "loss": 0.4455, "step": 7018 }, { "epoch": 0.3787707085424424, "grad_norm": 0.8651521471195998, "learning_rate": 7.172706398794758e-06, "loss": 0.4644, "step": 7019 }, { "epoch": 0.3788246721709568, "grad_norm": 0.9310049279941783, "learning_rate": 7.171998144821049e-06, "loss": 0.4601, "step": 7020 }, { "epoch": 0.3788786357994712, "grad_norm": 1.1056088395125412, "learning_rate": 7.1712898427924235e-06, "loss": 0.5281, "step": 7021 }, { "epoch": 0.3789325994279855, "grad_norm": 0.9711011152256366, "learning_rate": 7.170581492729242e-06, "loss": 0.3924, "step": 7022 }, { "epoch": 0.3789865630564999, "grad_norm": 1.0036545459654718, "learning_rate": 7.169873094651858e-06, "loss": 0.3833, "step": 7023 }, { "epoch": 0.3790405266850143, "grad_norm": 1.251563186491286, "learning_rate": 7.169164648580633e-06, "loss": 0.5144, "step": 7024 }, { "epoch": 0.3790944903135287, "grad_norm": 1.0437386377767044, "learning_rate": 7.168456154535931e-06, "loss": 0.6036, "step": 7025 }, { "epoch": 0.37914845394204305, "grad_norm": 1.1557792825783992, "learning_rate": 7.167747612538115e-06, "loss": 0.5523, "step": 7026 }, { "epoch": 0.37920241757055745, "grad_norm": 0.972454590403336, "learning_rate": 7.167039022607546e-06, "loss": 0.4995, "step": 7027 }, { "epoch": 0.37925638119907185, "grad_norm": 1.0444963863269934, "learning_rate": 7.166330384764591e-06, "loss": 0.6478, "step": 7028 }, { "epoch": 0.3793103448275862, "grad_norm": 1.1356458291397966, "learning_rate": 7.165621699029615e-06, "loss": 0.5202, "step": 7029 }, { "epoch": 0.3793643084561006, "grad_norm": 0.846887285174065, "learning_rate": 7.164912965422991e-06, "loss": 0.4425, "step": 7030 }, { "epoch": 0.379418272084615, "grad_norm": 0.9697369823725206, "learning_rate": 7.164204183965085e-06, "loss": 0.5323, "step": 7031 }, { "epoch": 0.3794722357131293, "grad_norm": 0.8751957801557593, "learning_rate": 7.163495354676269e-06, "loss": 0.3903, "step": 7032 }, { "epoch": 0.3795261993416437, "grad_norm": 0.9173218878604463, "learning_rate": 7.162786477576916e-06, "loss": 0.4004, "step": 7033 }, { "epoch": 0.3795801629701581, "grad_norm": 0.9336291817344491, "learning_rate": 7.1620775526874e-06, "loss": 0.4734, "step": 7034 }, { "epoch": 0.3796341265986725, "grad_norm": 1.2907097609536318, "learning_rate": 7.161368580028095e-06, "loss": 0.6509, "step": 7035 }, { "epoch": 0.37968809022718686, "grad_norm": 1.0339099622969616, "learning_rate": 7.160659559619379e-06, "loss": 0.5301, "step": 7036 }, { "epoch": 0.37974205385570126, "grad_norm": 1.0253728472001105, "learning_rate": 7.15995049148163e-06, "loss": 0.5609, "step": 7037 }, { "epoch": 0.37979601748421565, "grad_norm": 0.9944978363864054, "learning_rate": 7.159241375635224e-06, "loss": 0.5077, "step": 7038 }, { "epoch": 0.37984998111273, "grad_norm": 0.9770593445708848, "learning_rate": 7.158532212100548e-06, "loss": 0.6439, "step": 7039 }, { "epoch": 0.3799039447412444, "grad_norm": 1.1082486835219814, "learning_rate": 7.157823000897977e-06, "loss": 0.4602, "step": 7040 }, { "epoch": 0.3799579083697588, "grad_norm": 0.9180680762962835, "learning_rate": 7.157113742047899e-06, "loss": 0.3974, "step": 7041 }, { "epoch": 0.3800118719982732, "grad_norm": 1.0153429842225434, "learning_rate": 7.156404435570698e-06, "loss": 0.435, "step": 7042 }, { "epoch": 0.38006583562678753, "grad_norm": 1.1245672119684262, "learning_rate": 7.1556950814867595e-06, "loss": 0.4595, "step": 7043 }, { "epoch": 0.3801197992553019, "grad_norm": 1.0118821672640739, "learning_rate": 7.1549856798164715e-06, "loss": 0.4963, "step": 7044 }, { "epoch": 0.3801737628838163, "grad_norm": 1.0733545313084645, "learning_rate": 7.1542762305802225e-06, "loss": 0.5423, "step": 7045 }, { "epoch": 0.38022772651233067, "grad_norm": 0.9808536887973087, "learning_rate": 7.153566733798405e-06, "loss": 0.4952, "step": 7046 }, { "epoch": 0.38028169014084506, "grad_norm": 1.1204979177439232, "learning_rate": 7.152857189491406e-06, "loss": 0.603, "step": 7047 }, { "epoch": 0.38033565376935946, "grad_norm": 0.9924163898705943, "learning_rate": 7.152147597679623e-06, "loss": 0.5996, "step": 7048 }, { "epoch": 0.38038961739787386, "grad_norm": 0.7976745628240199, "learning_rate": 7.151437958383448e-06, "loss": 0.3166, "step": 7049 }, { "epoch": 0.3804435810263882, "grad_norm": 1.2239695684630665, "learning_rate": 7.150728271623277e-06, "loss": 0.5438, "step": 7050 }, { "epoch": 0.3804975446549026, "grad_norm": 0.8369295927919345, "learning_rate": 7.150018537419508e-06, "loss": 0.3885, "step": 7051 }, { "epoch": 0.380551508283417, "grad_norm": 1.0463418150811383, "learning_rate": 7.14930875579254e-06, "loss": 0.4608, "step": 7052 }, { "epoch": 0.38060547191193134, "grad_norm": 1.1286567120340327, "learning_rate": 7.148598926762772e-06, "loss": 0.594, "step": 7053 }, { "epoch": 0.38065943554044573, "grad_norm": 0.982988578476732, "learning_rate": 7.1478890503506035e-06, "loss": 0.5006, "step": 7054 }, { "epoch": 0.38071339916896013, "grad_norm": 0.9470661952728016, "learning_rate": 7.147179126576439e-06, "loss": 0.4293, "step": 7055 }, { "epoch": 0.38076736279747453, "grad_norm": 1.1500669028368993, "learning_rate": 7.146469155460683e-06, "loss": 0.5725, "step": 7056 }, { "epoch": 0.38082132642598887, "grad_norm": 1.0273649273453214, "learning_rate": 7.145759137023739e-06, "loss": 0.5557, "step": 7057 }, { "epoch": 0.38087529005450327, "grad_norm": 0.7927788227930566, "learning_rate": 7.145049071286013e-06, "loss": 0.3164, "step": 7058 }, { "epoch": 0.38092925368301767, "grad_norm": 0.9403495569723332, "learning_rate": 7.144338958267917e-06, "loss": 0.4916, "step": 7059 }, { "epoch": 0.380983217311532, "grad_norm": 0.697577001371987, "learning_rate": 7.143628797989858e-06, "loss": 0.3165, "step": 7060 }, { "epoch": 0.3810371809400464, "grad_norm": 1.039598110387755, "learning_rate": 7.142918590472244e-06, "loss": 0.4882, "step": 7061 }, { "epoch": 0.3810911445685608, "grad_norm": 1.16144188994013, "learning_rate": 7.142208335735491e-06, "loss": 0.5649, "step": 7062 }, { "epoch": 0.38114510819707514, "grad_norm": 0.9066929074648491, "learning_rate": 7.141498033800012e-06, "loss": 0.5044, "step": 7063 }, { "epoch": 0.38119907182558954, "grad_norm": 1.2474159253922128, "learning_rate": 7.1407876846862205e-06, "loss": 0.6316, "step": 7064 }, { "epoch": 0.38125303545410394, "grad_norm": 1.0056805627989949, "learning_rate": 7.140077288414532e-06, "loss": 0.4375, "step": 7065 }, { "epoch": 0.38130699908261834, "grad_norm": 1.0336610517154494, "learning_rate": 7.139366845005366e-06, "loss": 0.4706, "step": 7066 }, { "epoch": 0.3813609627111327, "grad_norm": 0.9399374537163359, "learning_rate": 7.13865635447914e-06, "loss": 0.4487, "step": 7067 }, { "epoch": 0.3814149263396471, "grad_norm": 0.9529239917125278, "learning_rate": 7.137945816856276e-06, "loss": 0.4648, "step": 7068 }, { "epoch": 0.3814688899681615, "grad_norm": 0.9913642253176614, "learning_rate": 7.137235232157192e-06, "loss": 0.4311, "step": 7069 }, { "epoch": 0.3815228535966758, "grad_norm": 1.462036318179698, "learning_rate": 7.136524600402315e-06, "loss": 0.6083, "step": 7070 }, { "epoch": 0.3815768172251902, "grad_norm": 0.9929831928580635, "learning_rate": 7.1358139216120655e-06, "loss": 0.4829, "step": 7071 }, { "epoch": 0.3816307808537046, "grad_norm": 1.060270646546809, "learning_rate": 7.135103195806872e-06, "loss": 0.4438, "step": 7072 }, { "epoch": 0.381684744482219, "grad_norm": 1.1636922748726746, "learning_rate": 7.134392423007162e-06, "loss": 0.4459, "step": 7073 }, { "epoch": 0.38173870811073335, "grad_norm": 1.0072669476342324, "learning_rate": 7.13368160323336e-06, "loss": 0.4551, "step": 7074 }, { "epoch": 0.38179267173924775, "grad_norm": 1.0058699790543133, "learning_rate": 7.1329707365059e-06, "loss": 0.5872, "step": 7075 }, { "epoch": 0.38184663536776214, "grad_norm": 1.2077065808833078, "learning_rate": 7.132259822845209e-06, "loss": 0.4921, "step": 7076 }, { "epoch": 0.3819005989962765, "grad_norm": 1.1241221299731419, "learning_rate": 7.131548862271722e-06, "loss": 0.5221, "step": 7077 }, { "epoch": 0.3819545626247909, "grad_norm": 1.2889684412514781, "learning_rate": 7.130837854805873e-06, "loss": 0.6595, "step": 7078 }, { "epoch": 0.3820085262533053, "grad_norm": 0.6497624686592273, "learning_rate": 7.1301268004680945e-06, "loss": 0.3274, "step": 7079 }, { "epoch": 0.3820624898818197, "grad_norm": 0.9274043740305404, "learning_rate": 7.1294156992788265e-06, "loss": 0.434, "step": 7080 }, { "epoch": 0.382116453510334, "grad_norm": 0.9295793633690538, "learning_rate": 7.128704551258505e-06, "loss": 0.3892, "step": 7081 }, { "epoch": 0.3821704171388484, "grad_norm": 1.1044574355017553, "learning_rate": 7.127993356427568e-06, "loss": 0.6711, "step": 7082 }, { "epoch": 0.3822243807673628, "grad_norm": 1.103033647412057, "learning_rate": 7.127282114806456e-06, "loss": 0.5789, "step": 7083 }, { "epoch": 0.38227834439587716, "grad_norm": 0.8156842862976554, "learning_rate": 7.126570826415614e-06, "loss": 0.2496, "step": 7084 }, { "epoch": 0.38233230802439155, "grad_norm": 1.2599284771480748, "learning_rate": 7.125859491275482e-06, "loss": 0.569, "step": 7085 }, { "epoch": 0.38238627165290595, "grad_norm": 0.919740635654696, "learning_rate": 7.125148109406504e-06, "loss": 0.494, "step": 7086 }, { "epoch": 0.38244023528142035, "grad_norm": 0.9637185972817239, "learning_rate": 7.124436680829128e-06, "loss": 0.3611, "step": 7087 }, { "epoch": 0.3824941989099347, "grad_norm": 0.9849685295863924, "learning_rate": 7.123725205563803e-06, "loss": 0.4652, "step": 7088 }, { "epoch": 0.3825481625384491, "grad_norm": 1.0029198575451135, "learning_rate": 7.123013683630972e-06, "loss": 0.4336, "step": 7089 }, { "epoch": 0.3826021261669635, "grad_norm": 1.1075525525727516, "learning_rate": 7.1223021150510885e-06, "loss": 0.4762, "step": 7090 }, { "epoch": 0.3826560897954778, "grad_norm": 0.7420565547532824, "learning_rate": 7.121590499844604e-06, "loss": 0.3496, "step": 7091 }, { "epoch": 0.3827100534239922, "grad_norm": 1.060954325935764, "learning_rate": 7.120878838031969e-06, "loss": 0.3414, "step": 7092 }, { "epoch": 0.3827640170525066, "grad_norm": 0.9773295965412607, "learning_rate": 7.120167129633638e-06, "loss": 0.423, "step": 7093 }, { "epoch": 0.382817980681021, "grad_norm": 0.9642921070438216, "learning_rate": 7.119455374670068e-06, "loss": 0.3988, "step": 7094 }, { "epoch": 0.38287194430953536, "grad_norm": 1.1385182171735764, "learning_rate": 7.1187435731617135e-06, "loss": 0.5359, "step": 7095 }, { "epoch": 0.38292590793804976, "grad_norm": 0.9663787184249133, "learning_rate": 7.118031725129033e-06, "loss": 0.5206, "step": 7096 }, { "epoch": 0.38297987156656416, "grad_norm": 1.062153457706783, "learning_rate": 7.117319830592485e-06, "loss": 0.4935, "step": 7097 }, { "epoch": 0.3830338351950785, "grad_norm": 1.2165884168473715, "learning_rate": 7.116607889572533e-06, "loss": 0.716, "step": 7098 }, { "epoch": 0.3830877988235929, "grad_norm": 0.8724377963198745, "learning_rate": 7.115895902089635e-06, "loss": 0.3099, "step": 7099 }, { "epoch": 0.3831417624521073, "grad_norm": 1.1120702537414002, "learning_rate": 7.115183868164257e-06, "loss": 0.5846, "step": 7100 }, { "epoch": 0.38319572608062163, "grad_norm": 1.0464143765369258, "learning_rate": 7.114471787816862e-06, "loss": 0.5569, "step": 7101 }, { "epoch": 0.38324968970913603, "grad_norm": 0.9213089670858199, "learning_rate": 7.113759661067917e-06, "loss": 0.3718, "step": 7102 }, { "epoch": 0.38330365333765043, "grad_norm": 1.2132145901734046, "learning_rate": 7.1130474879378886e-06, "loss": 0.6988, "step": 7103 }, { "epoch": 0.3833576169661648, "grad_norm": 1.034736106727486, "learning_rate": 7.1123352684472445e-06, "loss": 0.4799, "step": 7104 }, { "epoch": 0.38341158059467917, "grad_norm": 1.063297091877675, "learning_rate": 7.111623002616459e-06, "loss": 0.6107, "step": 7105 }, { "epoch": 0.38346554422319357, "grad_norm": 1.05256114320061, "learning_rate": 7.1109106904659964e-06, "loss": 0.4745, "step": 7106 }, { "epoch": 0.38351950785170796, "grad_norm": 0.9915479572382622, "learning_rate": 7.110198332016334e-06, "loss": 0.4537, "step": 7107 }, { "epoch": 0.3835734714802223, "grad_norm": 1.1606877691514548, "learning_rate": 7.1094859272879435e-06, "loss": 0.5435, "step": 7108 }, { "epoch": 0.3836274351087367, "grad_norm": 1.0426600000798874, "learning_rate": 7.108773476301302e-06, "loss": 0.4918, "step": 7109 }, { "epoch": 0.3836813987372511, "grad_norm": 0.9562706075557893, "learning_rate": 7.108060979076885e-06, "loss": 0.4266, "step": 7110 }, { "epoch": 0.3837353623657655, "grad_norm": 1.1648405529061945, "learning_rate": 7.1073484356351715e-06, "loss": 0.5566, "step": 7111 }, { "epoch": 0.38378932599427984, "grad_norm": 0.9192670509764957, "learning_rate": 7.10663584599664e-06, "loss": 0.4417, "step": 7112 }, { "epoch": 0.38384328962279424, "grad_norm": 0.9929416534731506, "learning_rate": 7.10592321018177e-06, "loss": 0.4811, "step": 7113 }, { "epoch": 0.38389725325130863, "grad_norm": 1.0627248494896002, "learning_rate": 7.105210528211044e-06, "loss": 0.5636, "step": 7114 }, { "epoch": 0.383951216879823, "grad_norm": 1.0732106681059543, "learning_rate": 7.104497800104947e-06, "loss": 0.4384, "step": 7115 }, { "epoch": 0.3840051805083374, "grad_norm": 0.8965534118919051, "learning_rate": 7.10378502588396e-06, "loss": 0.3766, "step": 7116 }, { "epoch": 0.38405914413685177, "grad_norm": 1.2682003521416387, "learning_rate": 7.103072205568573e-06, "loss": 0.6621, "step": 7117 }, { "epoch": 0.38411310776536617, "grad_norm": 0.9471164574207247, "learning_rate": 7.1023593391792686e-06, "loss": 0.4164, "step": 7118 }, { "epoch": 0.3841670713938805, "grad_norm": 1.0882104084512663, "learning_rate": 7.1016464267365404e-06, "loss": 0.6048, "step": 7119 }, { "epoch": 0.3842210350223949, "grad_norm": 0.7458542911698984, "learning_rate": 7.100933468260874e-06, "loss": 0.3401, "step": 7120 }, { "epoch": 0.3842749986509093, "grad_norm": 0.7901450629168862, "learning_rate": 7.100220463772762e-06, "loss": 0.4414, "step": 7121 }, { "epoch": 0.38432896227942365, "grad_norm": 1.023105860278067, "learning_rate": 7.099507413292697e-06, "loss": 0.4411, "step": 7122 }, { "epoch": 0.38438292590793804, "grad_norm": 0.9229392990431791, "learning_rate": 7.098794316841175e-06, "loss": 0.4575, "step": 7123 }, { "epoch": 0.38443688953645244, "grad_norm": 0.8662001339135197, "learning_rate": 7.0980811744386855e-06, "loss": 0.4488, "step": 7124 }, { "epoch": 0.38449085316496684, "grad_norm": 0.9372548480448775, "learning_rate": 7.09736798610573e-06, "loss": 0.4763, "step": 7125 }, { "epoch": 0.3845448167934812, "grad_norm": 0.9088288956530416, "learning_rate": 7.096654751862805e-06, "loss": 0.4207, "step": 7126 }, { "epoch": 0.3845987804219956, "grad_norm": 1.0762002911766866, "learning_rate": 7.095941471730408e-06, "loss": 0.5215, "step": 7127 }, { "epoch": 0.38465274405051, "grad_norm": 1.2464436997477302, "learning_rate": 7.095228145729041e-06, "loss": 0.5932, "step": 7128 }, { "epoch": 0.3847067076790243, "grad_norm": 0.8331466414265337, "learning_rate": 7.0945147738792055e-06, "loss": 0.3068, "step": 7129 }, { "epoch": 0.3847606713075387, "grad_norm": 1.3065869811930102, "learning_rate": 7.093801356201403e-06, "loss": 0.6571, "step": 7130 }, { "epoch": 0.3848146349360531, "grad_norm": 1.226712268185015, "learning_rate": 7.0930878927161405e-06, "loss": 0.4885, "step": 7131 }, { "epoch": 0.38486859856456745, "grad_norm": 1.2026442888993771, "learning_rate": 7.0923743834439215e-06, "loss": 0.6074, "step": 7132 }, { "epoch": 0.38492256219308185, "grad_norm": 1.0776890496189788, "learning_rate": 7.0916608284052544e-06, "loss": 0.561, "step": 7133 }, { "epoch": 0.38497652582159625, "grad_norm": 0.9843011732735338, "learning_rate": 7.090947227620646e-06, "loss": 0.3488, "step": 7134 }, { "epoch": 0.38503048945011065, "grad_norm": 1.186524276821606, "learning_rate": 7.090233581110608e-06, "loss": 0.5732, "step": 7135 }, { "epoch": 0.385084453078625, "grad_norm": 1.0979776879028649, "learning_rate": 7.089519888895648e-06, "loss": 0.453, "step": 7136 }, { "epoch": 0.3851384167071394, "grad_norm": 0.9995108687191485, "learning_rate": 7.088806150996281e-06, "loss": 0.5055, "step": 7137 }, { "epoch": 0.3851923803356538, "grad_norm": 0.9120729093562829, "learning_rate": 7.088092367433022e-06, "loss": 0.3815, "step": 7138 }, { "epoch": 0.3852463439641681, "grad_norm": 1.0158641614489672, "learning_rate": 7.087378538226381e-06, "loss": 0.4488, "step": 7139 }, { "epoch": 0.3853003075926825, "grad_norm": 1.0344503246881993, "learning_rate": 7.0866646633968795e-06, "loss": 0.6048, "step": 7140 }, { "epoch": 0.3853542712211969, "grad_norm": 1.1971698488834972, "learning_rate": 7.0859507429650296e-06, "loss": 0.5987, "step": 7141 }, { "epoch": 0.3854082348497113, "grad_norm": 0.9401294787234151, "learning_rate": 7.085236776951354e-06, "loss": 0.394, "step": 7142 }, { "epoch": 0.38546219847822566, "grad_norm": 0.8449491398687429, "learning_rate": 7.084522765376374e-06, "loss": 0.4778, "step": 7143 }, { "epoch": 0.38551616210674006, "grad_norm": 0.953354842794333, "learning_rate": 7.0838087082606054e-06, "loss": 0.5349, "step": 7144 }, { "epoch": 0.38557012573525445, "grad_norm": 1.0468489862211805, "learning_rate": 7.083094605624574e-06, "loss": 0.5007, "step": 7145 }, { "epoch": 0.3856240893637688, "grad_norm": 0.9471131130888596, "learning_rate": 7.0823804574888065e-06, "loss": 0.501, "step": 7146 }, { "epoch": 0.3856780529922832, "grad_norm": 0.9130830609528583, "learning_rate": 7.081666263873824e-06, "loss": 0.3816, "step": 7147 }, { "epoch": 0.3857320166207976, "grad_norm": 0.9303626655705144, "learning_rate": 7.0809520248001565e-06, "loss": 0.4286, "step": 7148 }, { "epoch": 0.385785980249312, "grad_norm": 1.034402357264734, "learning_rate": 7.080237740288328e-06, "loss": 0.4928, "step": 7149 }, { "epoch": 0.38583994387782633, "grad_norm": 0.9764894319236294, "learning_rate": 7.079523410358873e-06, "loss": 0.4552, "step": 7150 }, { "epoch": 0.3858939075063407, "grad_norm": 1.0555463097969435, "learning_rate": 7.078809035032317e-06, "loss": 0.5333, "step": 7151 }, { "epoch": 0.3859478711348551, "grad_norm": 1.098031922007203, "learning_rate": 7.078094614329195e-06, "loss": 0.5165, "step": 7152 }, { "epoch": 0.38600183476336947, "grad_norm": 0.9919541710985963, "learning_rate": 7.07738014827004e-06, "loss": 0.4473, "step": 7153 }, { "epoch": 0.38605579839188386, "grad_norm": 1.1764797714051183, "learning_rate": 7.076665636875384e-06, "loss": 0.5697, "step": 7154 }, { "epoch": 0.38610976202039826, "grad_norm": 1.023379832879976, "learning_rate": 7.075951080165765e-06, "loss": 0.5068, "step": 7155 }, { "epoch": 0.38616372564891266, "grad_norm": 0.7060380034492929, "learning_rate": 7.07523647816172e-06, "loss": 0.347, "step": 7156 }, { "epoch": 0.386217689277427, "grad_norm": 0.9274208367146264, "learning_rate": 7.074521830883787e-06, "loss": 0.4828, "step": 7157 }, { "epoch": 0.3862716529059414, "grad_norm": 0.8945485547409492, "learning_rate": 7.073807138352506e-06, "loss": 0.3628, "step": 7158 }, { "epoch": 0.3863256165344558, "grad_norm": 0.9861317239492713, "learning_rate": 7.0730924005884165e-06, "loss": 0.5182, "step": 7159 }, { "epoch": 0.38637958016297014, "grad_norm": 0.9266853032703605, "learning_rate": 7.072377617612062e-06, "loss": 0.4944, "step": 7160 }, { "epoch": 0.38643354379148454, "grad_norm": 0.7917828122443779, "learning_rate": 7.071662789443986e-06, "loss": 0.3137, "step": 7161 }, { "epoch": 0.38648750741999893, "grad_norm": 1.1751427811865813, "learning_rate": 7.070947916104735e-06, "loss": 0.4857, "step": 7162 }, { "epoch": 0.3865414710485133, "grad_norm": 1.277641393297814, "learning_rate": 7.070232997614852e-06, "loss": 0.7244, "step": 7163 }, { "epoch": 0.38659543467702767, "grad_norm": 1.0282769903913904, "learning_rate": 7.069518033994886e-06, "loss": 0.4645, "step": 7164 }, { "epoch": 0.38664939830554207, "grad_norm": 1.077618043518193, "learning_rate": 7.068803025265386e-06, "loss": 0.4832, "step": 7165 }, { "epoch": 0.38670336193405647, "grad_norm": 0.930397629419174, "learning_rate": 7.068087971446902e-06, "loss": 0.5129, "step": 7166 }, { "epoch": 0.3867573255625708, "grad_norm": 1.1015822832660194, "learning_rate": 7.0673728725599855e-06, "loss": 0.5738, "step": 7167 }, { "epoch": 0.3868112891910852, "grad_norm": 1.0767470700972241, "learning_rate": 7.0666577286251895e-06, "loss": 0.3527, "step": 7168 }, { "epoch": 0.3868652528195996, "grad_norm": 1.1710833788466661, "learning_rate": 7.0659425396630665e-06, "loss": 0.4915, "step": 7169 }, { "epoch": 0.38691921644811395, "grad_norm": 1.255486128019382, "learning_rate": 7.065227305694173e-06, "loss": 0.6422, "step": 7170 }, { "epoch": 0.38697318007662834, "grad_norm": 0.941733134473826, "learning_rate": 7.064512026739064e-06, "loss": 0.459, "step": 7171 }, { "epoch": 0.38702714370514274, "grad_norm": 1.0107485023545026, "learning_rate": 7.0637967028183e-06, "loss": 0.3339, "step": 7172 }, { "epoch": 0.38708110733365714, "grad_norm": 1.034652975828915, "learning_rate": 7.063081333952439e-06, "loss": 0.4795, "step": 7173 }, { "epoch": 0.3871350709621715, "grad_norm": 0.9516667697952352, "learning_rate": 7.062365920162041e-06, "loss": 0.4958, "step": 7174 }, { "epoch": 0.3871890345906859, "grad_norm": 1.057235885495645, "learning_rate": 7.061650461467666e-06, "loss": 0.4966, "step": 7175 }, { "epoch": 0.3872429982192003, "grad_norm": 1.132586087963951, "learning_rate": 7.06093495788988e-06, "loss": 0.4475, "step": 7176 }, { "epoch": 0.3872969618477146, "grad_norm": 1.0395971098901065, "learning_rate": 7.060219409449247e-06, "loss": 0.5493, "step": 7177 }, { "epoch": 0.387350925476229, "grad_norm": 1.0317731792643587, "learning_rate": 7.0595038161663295e-06, "loss": 0.4934, "step": 7178 }, { "epoch": 0.3874048891047434, "grad_norm": 0.9666342972898421, "learning_rate": 7.058788178061698e-06, "loss": 0.4802, "step": 7179 }, { "epoch": 0.3874588527332578, "grad_norm": 0.9085884539362401, "learning_rate": 7.0580724951559185e-06, "loss": 0.4202, "step": 7180 }, { "epoch": 0.38751281636177215, "grad_norm": 0.8730183313954777, "learning_rate": 7.057356767469563e-06, "loss": 0.4206, "step": 7181 }, { "epoch": 0.38756677999028655, "grad_norm": 0.9509450669788764, "learning_rate": 7.056640995023199e-06, "loss": 0.6737, "step": 7182 }, { "epoch": 0.38762074361880094, "grad_norm": 0.8717379001901906, "learning_rate": 7.0559251778373995e-06, "loss": 0.3701, "step": 7183 }, { "epoch": 0.3876747072473153, "grad_norm": 1.2082073343371884, "learning_rate": 7.055209315932738e-06, "loss": 0.5439, "step": 7184 }, { "epoch": 0.3877286708758297, "grad_norm": 0.785822838791441, "learning_rate": 7.05449340932979e-06, "loss": 0.3603, "step": 7185 }, { "epoch": 0.3877826345043441, "grad_norm": 0.9010915637090584, "learning_rate": 7.0537774580491325e-06, "loss": 0.3701, "step": 7186 }, { "epoch": 0.3878365981328585, "grad_norm": 1.15367957180958, "learning_rate": 7.053061462111338e-06, "loss": 0.4921, "step": 7187 }, { "epoch": 0.3878905617613728, "grad_norm": 0.9970428394282319, "learning_rate": 7.052345421536989e-06, "loss": 0.4775, "step": 7188 }, { "epoch": 0.3879445253898872, "grad_norm": 0.8959197416137857, "learning_rate": 7.0516293363466636e-06, "loss": 0.4573, "step": 7189 }, { "epoch": 0.3879984890184016, "grad_norm": 1.1117595046405957, "learning_rate": 7.0509132065609446e-06, "loss": 0.5436, "step": 7190 }, { "epoch": 0.38805245264691596, "grad_norm": 0.8900668561743117, "learning_rate": 7.050197032200412e-06, "loss": 0.3922, "step": 7191 }, { "epoch": 0.38810641627543035, "grad_norm": 1.1924314925482522, "learning_rate": 7.049480813285651e-06, "loss": 0.5425, "step": 7192 }, { "epoch": 0.38816037990394475, "grad_norm": 1.0180136829689388, "learning_rate": 7.048764549837244e-06, "loss": 0.4372, "step": 7193 }, { "epoch": 0.38821434353245915, "grad_norm": 0.9585067729255137, "learning_rate": 7.0480482418757805e-06, "loss": 0.4317, "step": 7194 }, { "epoch": 0.3882683071609735, "grad_norm": 1.0963372241770633, "learning_rate": 7.047331889421846e-06, "loss": 0.4755, "step": 7195 }, { "epoch": 0.3883222707894879, "grad_norm": 1.0365354550424546, "learning_rate": 7.0466154924960296e-06, "loss": 0.426, "step": 7196 }, { "epoch": 0.3883762344180023, "grad_norm": 0.9419839395520084, "learning_rate": 7.045899051118921e-06, "loss": 0.4087, "step": 7197 }, { "epoch": 0.38843019804651663, "grad_norm": 1.0303980386663267, "learning_rate": 7.045182565311112e-06, "loss": 0.4673, "step": 7198 }, { "epoch": 0.388484161675031, "grad_norm": 1.0721416271194737, "learning_rate": 7.044466035093194e-06, "loss": 0.5022, "step": 7199 }, { "epoch": 0.3885381253035454, "grad_norm": 1.211098016480153, "learning_rate": 7.043749460485763e-06, "loss": 0.5835, "step": 7200 }, { "epoch": 0.38859208893205976, "grad_norm": 0.9670605427881132, "learning_rate": 7.043032841509412e-06, "loss": 0.4229, "step": 7201 }, { "epoch": 0.38864605256057416, "grad_norm": 0.7833572812396475, "learning_rate": 7.042316178184737e-06, "loss": 0.3889, "step": 7202 }, { "epoch": 0.38870001618908856, "grad_norm": 0.9266040312720747, "learning_rate": 7.041599470532339e-06, "loss": 0.41, "step": 7203 }, { "epoch": 0.38875397981760296, "grad_norm": 1.1931329029825855, "learning_rate": 7.040882718572811e-06, "loss": 0.4791, "step": 7204 }, { "epoch": 0.3888079434461173, "grad_norm": 0.8907613060056407, "learning_rate": 7.04016592232676e-06, "loss": 0.381, "step": 7205 }, { "epoch": 0.3888619070746317, "grad_norm": 0.8936559938593469, "learning_rate": 7.039449081814782e-06, "loss": 0.4726, "step": 7206 }, { "epoch": 0.3889158707031461, "grad_norm": 0.9096899482588223, "learning_rate": 7.038732197057483e-06, "loss": 0.5035, "step": 7207 }, { "epoch": 0.38896983433166044, "grad_norm": 0.8564365435946649, "learning_rate": 7.0380152680754665e-06, "loss": 0.293, "step": 7208 }, { "epoch": 0.38902379796017483, "grad_norm": 1.030939401818031, "learning_rate": 7.037298294889337e-06, "loss": 0.5405, "step": 7209 }, { "epoch": 0.38907776158868923, "grad_norm": 0.8156352019225015, "learning_rate": 7.0365812775197e-06, "loss": 0.3407, "step": 7210 }, { "epoch": 0.38913172521720363, "grad_norm": 0.8959917160228557, "learning_rate": 7.035864215987164e-06, "loss": 0.416, "step": 7211 }, { "epoch": 0.38918568884571797, "grad_norm": 0.8564084387389894, "learning_rate": 7.035147110312338e-06, "loss": 0.4639, "step": 7212 }, { "epoch": 0.38923965247423237, "grad_norm": 0.8421483994153907, "learning_rate": 7.034429960515835e-06, "loss": 0.3208, "step": 7213 }, { "epoch": 0.38929361610274676, "grad_norm": 1.0619468884089582, "learning_rate": 7.033712766618264e-06, "loss": 0.3954, "step": 7214 }, { "epoch": 0.3893475797312611, "grad_norm": 1.1193664515127848, "learning_rate": 7.032995528640239e-06, "loss": 0.4765, "step": 7215 }, { "epoch": 0.3894015433597755, "grad_norm": 0.8177462030389688, "learning_rate": 7.032278246602373e-06, "loss": 0.3515, "step": 7216 }, { "epoch": 0.3894555069882899, "grad_norm": 0.9966554277408534, "learning_rate": 7.031560920525281e-06, "loss": 0.4903, "step": 7217 }, { "epoch": 0.3895094706168043, "grad_norm": 1.0337519609089925, "learning_rate": 7.0308435504295815e-06, "loss": 0.4779, "step": 7218 }, { "epoch": 0.38956343424531864, "grad_norm": 0.9800705214860455, "learning_rate": 7.0301261363358915e-06, "loss": 0.3946, "step": 7219 }, { "epoch": 0.38961739787383304, "grad_norm": 0.8525677807156149, "learning_rate": 7.029408678264831e-06, "loss": 0.3557, "step": 7220 }, { "epoch": 0.38967136150234744, "grad_norm": 0.8058616122151263, "learning_rate": 7.028691176237018e-06, "loss": 0.349, "step": 7221 }, { "epoch": 0.3897253251308618, "grad_norm": 1.1761745000742074, "learning_rate": 7.027973630273077e-06, "loss": 0.4281, "step": 7222 }, { "epoch": 0.3897792887593762, "grad_norm": 0.8373419086931093, "learning_rate": 7.02725604039363e-06, "loss": 0.4475, "step": 7223 }, { "epoch": 0.38983325238789057, "grad_norm": 1.0985215493215759, "learning_rate": 7.026538406619301e-06, "loss": 0.6293, "step": 7224 }, { "epoch": 0.38988721601640497, "grad_norm": 1.0229374001690932, "learning_rate": 7.0258207289707145e-06, "loss": 0.5391, "step": 7225 }, { "epoch": 0.3899411796449193, "grad_norm": 0.8769962120893854, "learning_rate": 7.025103007468502e-06, "loss": 0.4002, "step": 7226 }, { "epoch": 0.3899951432734337, "grad_norm": 1.0958594134710034, "learning_rate": 7.024385242133286e-06, "loss": 0.435, "step": 7227 }, { "epoch": 0.3900491069019481, "grad_norm": 0.8972533948034126, "learning_rate": 7.023667432985697e-06, "loss": 0.4173, "step": 7228 }, { "epoch": 0.39010307053046245, "grad_norm": 1.0789616426778064, "learning_rate": 7.022949580046367e-06, "loss": 0.6232, "step": 7229 }, { "epoch": 0.39015703415897685, "grad_norm": 0.9537355588163308, "learning_rate": 7.0222316833359275e-06, "loss": 0.53, "step": 7230 }, { "epoch": 0.39021099778749124, "grad_norm": 0.9543028721252254, "learning_rate": 7.021513742875011e-06, "loss": 0.4977, "step": 7231 }, { "epoch": 0.3902649614160056, "grad_norm": 0.7659776709532105, "learning_rate": 7.020795758684253e-06, "loss": 0.3067, "step": 7232 }, { "epoch": 0.39031892504452, "grad_norm": 1.0748354462136311, "learning_rate": 7.0200777307842875e-06, "loss": 0.5044, "step": 7233 }, { "epoch": 0.3903728886730344, "grad_norm": 0.9853379338503038, "learning_rate": 7.019359659195752e-06, "loss": 0.4546, "step": 7234 }, { "epoch": 0.3904268523015488, "grad_norm": 0.7333583136660703, "learning_rate": 7.0186415439392855e-06, "loss": 0.3826, "step": 7235 }, { "epoch": 0.3904808159300631, "grad_norm": 1.0292533974858298, "learning_rate": 7.017923385035525e-06, "loss": 0.473, "step": 7236 }, { "epoch": 0.3905347795585775, "grad_norm": 1.1348003935542461, "learning_rate": 7.0172051825051136e-06, "loss": 0.4709, "step": 7237 }, { "epoch": 0.3905887431870919, "grad_norm": 1.15543624233146, "learning_rate": 7.016486936368693e-06, "loss": 0.5017, "step": 7238 }, { "epoch": 0.39064270681560626, "grad_norm": 1.1295906102138595, "learning_rate": 7.015768646646905e-06, "loss": 0.5352, "step": 7239 }, { "epoch": 0.39069667044412065, "grad_norm": 1.1053646692394374, "learning_rate": 7.0150503133603935e-06, "loss": 0.4332, "step": 7240 }, { "epoch": 0.39075063407263505, "grad_norm": 0.9590893703691303, "learning_rate": 7.014331936529806e-06, "loss": 0.5376, "step": 7241 }, { "epoch": 0.39080459770114945, "grad_norm": 1.0086592089816349, "learning_rate": 7.013613516175788e-06, "loss": 0.4296, "step": 7242 }, { "epoch": 0.3908585613296638, "grad_norm": 1.03641713735584, "learning_rate": 7.01289505231899e-06, "loss": 0.575, "step": 7243 }, { "epoch": 0.3909125249581782, "grad_norm": 1.0200168185723724, "learning_rate": 7.012176544980057e-06, "loss": 0.4415, "step": 7244 }, { "epoch": 0.3909664885866926, "grad_norm": 0.9762736029474288, "learning_rate": 7.011457994179643e-06, "loss": 0.4961, "step": 7245 }, { "epoch": 0.3910204522152069, "grad_norm": 1.0959573340426039, "learning_rate": 7.010739399938401e-06, "loss": 0.5827, "step": 7246 }, { "epoch": 0.3910744158437213, "grad_norm": 1.0635511777680617, "learning_rate": 7.01002076227698e-06, "loss": 0.5779, "step": 7247 }, { "epoch": 0.3911283794722357, "grad_norm": 1.128534522223739, "learning_rate": 7.009302081216036e-06, "loss": 0.4173, "step": 7248 }, { "epoch": 0.3911823431007501, "grad_norm": 0.8696701511681613, "learning_rate": 7.008583356776226e-06, "loss": 0.3832, "step": 7249 }, { "epoch": 0.39123630672926446, "grad_norm": 0.8690374324591784, "learning_rate": 7.007864588978208e-06, "loss": 0.3103, "step": 7250 }, { "epoch": 0.39129027035777886, "grad_norm": 1.042111707988464, "learning_rate": 7.0071457778426366e-06, "loss": 0.4952, "step": 7251 }, { "epoch": 0.39134423398629326, "grad_norm": 0.9734874268073874, "learning_rate": 7.006426923390171e-06, "loss": 0.5903, "step": 7252 }, { "epoch": 0.3913981976148076, "grad_norm": 1.0030761967790662, "learning_rate": 7.005708025641475e-06, "loss": 0.4342, "step": 7253 }, { "epoch": 0.391452161243322, "grad_norm": 1.1670486393762667, "learning_rate": 7.00498908461721e-06, "loss": 0.4916, "step": 7254 }, { "epoch": 0.3915061248718364, "grad_norm": 1.2599757416224033, "learning_rate": 7.004270100338037e-06, "loss": 0.5497, "step": 7255 }, { "epoch": 0.3915600885003508, "grad_norm": 1.19803033672981, "learning_rate": 7.00355107282462e-06, "loss": 0.5243, "step": 7256 }, { "epoch": 0.39161405212886513, "grad_norm": 0.9254486315159782, "learning_rate": 7.002832002097628e-06, "loss": 0.4854, "step": 7257 }, { "epoch": 0.39166801575737953, "grad_norm": 1.1035220708748605, "learning_rate": 7.002112888177725e-06, "loss": 0.7131, "step": 7258 }, { "epoch": 0.3917219793858939, "grad_norm": 1.2591655873019791, "learning_rate": 7.001393731085578e-06, "loss": 0.6249, "step": 7259 }, { "epoch": 0.39177594301440827, "grad_norm": 0.9636921861142513, "learning_rate": 7.000674530841861e-06, "loss": 0.3811, "step": 7260 }, { "epoch": 0.39182990664292267, "grad_norm": 1.101326954642365, "learning_rate": 6.99995528746724e-06, "loss": 0.6531, "step": 7261 }, { "epoch": 0.39188387027143706, "grad_norm": 1.0332783310162637, "learning_rate": 6.9992360009823905e-06, "loss": 0.4783, "step": 7262 }, { "epoch": 0.39193783389995146, "grad_norm": 0.9562874449110266, "learning_rate": 6.998516671407981e-06, "loss": 0.5066, "step": 7263 }, { "epoch": 0.3919917975284658, "grad_norm": 0.7928153506579074, "learning_rate": 6.997797298764689e-06, "loss": 0.3062, "step": 7264 }, { "epoch": 0.3920457611569802, "grad_norm": 1.191335448375976, "learning_rate": 6.9970778830731885e-06, "loss": 0.5226, "step": 7265 }, { "epoch": 0.3920997247854946, "grad_norm": 0.9146161021733743, "learning_rate": 6.9963584243541585e-06, "loss": 0.3885, "step": 7266 }, { "epoch": 0.39215368841400894, "grad_norm": 0.8630724937085266, "learning_rate": 6.9956389226282765e-06, "loss": 0.355, "step": 7267 }, { "epoch": 0.39220765204252334, "grad_norm": 0.9138588201961969, "learning_rate": 6.994919377916217e-06, "loss": 0.4228, "step": 7268 }, { "epoch": 0.39226161567103773, "grad_norm": 0.7964711662199958, "learning_rate": 6.994199790238667e-06, "loss": 0.3266, "step": 7269 }, { "epoch": 0.3923155792995521, "grad_norm": 1.1155913337915295, "learning_rate": 6.993480159616305e-06, "loss": 0.5108, "step": 7270 }, { "epoch": 0.3923695429280665, "grad_norm": 0.9455559613478185, "learning_rate": 6.992760486069812e-06, "loss": 0.4448, "step": 7271 }, { "epoch": 0.39242350655658087, "grad_norm": 0.8345209239811572, "learning_rate": 6.992040769619876e-06, "loss": 0.3708, "step": 7272 }, { "epoch": 0.39247747018509527, "grad_norm": 0.9116268634002278, "learning_rate": 6.99132101028718e-06, "loss": 0.5158, "step": 7273 }, { "epoch": 0.3925314338136096, "grad_norm": 0.9327978182193459, "learning_rate": 6.990601208092413e-06, "loss": 0.4622, "step": 7274 }, { "epoch": 0.392585397442124, "grad_norm": 0.9193700729033507, "learning_rate": 6.989881363056259e-06, "loss": 0.4865, "step": 7275 }, { "epoch": 0.3926393610706384, "grad_norm": 1.0297730939952272, "learning_rate": 6.989161475199409e-06, "loss": 0.5207, "step": 7276 }, { "epoch": 0.39269332469915275, "grad_norm": 1.010191024112803, "learning_rate": 6.988441544542556e-06, "loss": 0.5104, "step": 7277 }, { "epoch": 0.39274728832766714, "grad_norm": 0.9285584334818006, "learning_rate": 6.987721571106387e-06, "loss": 0.3639, "step": 7278 }, { "epoch": 0.39280125195618154, "grad_norm": 1.0291656196273573, "learning_rate": 6.987001554911598e-06, "loss": 0.3606, "step": 7279 }, { "epoch": 0.39285521558469594, "grad_norm": 1.0278733303581613, "learning_rate": 6.98628149597888e-06, "loss": 0.57, "step": 7280 }, { "epoch": 0.3929091792132103, "grad_norm": 1.4485565601033363, "learning_rate": 6.9855613943289326e-06, "loss": 0.5595, "step": 7281 }, { "epoch": 0.3929631428417247, "grad_norm": 1.0134234040328098, "learning_rate": 6.984841249982447e-06, "loss": 0.4677, "step": 7282 }, { "epoch": 0.3930171064702391, "grad_norm": 1.0850167792433496, "learning_rate": 6.9841210629601255e-06, "loss": 0.5431, "step": 7283 }, { "epoch": 0.3930710700987534, "grad_norm": 1.2117763336471414, "learning_rate": 6.983400833282665e-06, "loss": 0.6201, "step": 7284 }, { "epoch": 0.3931250337272678, "grad_norm": 0.9551810417962849, "learning_rate": 6.982680560970766e-06, "loss": 0.4107, "step": 7285 }, { "epoch": 0.3931789973557822, "grad_norm": 0.9634884333069673, "learning_rate": 6.981960246045128e-06, "loss": 0.5562, "step": 7286 }, { "epoch": 0.3932329609842966, "grad_norm": 0.9664024358454562, "learning_rate": 6.981239888526454e-06, "loss": 0.4762, "step": 7287 }, { "epoch": 0.39328692461281095, "grad_norm": 0.8788152396077874, "learning_rate": 6.980519488435451e-06, "loss": 0.4106, "step": 7288 }, { "epoch": 0.39334088824132535, "grad_norm": 0.965978351735014, "learning_rate": 6.9797990457928215e-06, "loss": 0.3329, "step": 7289 }, { "epoch": 0.39339485186983975, "grad_norm": 1.0332800407573766, "learning_rate": 6.9790785606192725e-06, "loss": 0.4745, "step": 7290 }, { "epoch": 0.3934488154983541, "grad_norm": 1.1177772761216382, "learning_rate": 6.978358032935512e-06, "loss": 0.5537, "step": 7291 }, { "epoch": 0.3935027791268685, "grad_norm": 1.0574971493488603, "learning_rate": 6.977637462762247e-06, "loss": 0.4304, "step": 7292 }, { "epoch": 0.3935567427553829, "grad_norm": 1.0835481091205992, "learning_rate": 6.976916850120186e-06, "loss": 0.5322, "step": 7293 }, { "epoch": 0.3936107063838973, "grad_norm": 1.011807646620418, "learning_rate": 6.976196195030045e-06, "loss": 0.4487, "step": 7294 }, { "epoch": 0.3936646700124116, "grad_norm": 1.1481946888803534, "learning_rate": 6.975475497512535e-06, "loss": 0.4752, "step": 7295 }, { "epoch": 0.393718633640926, "grad_norm": 1.1472699112619476, "learning_rate": 6.974754757588366e-06, "loss": 0.5453, "step": 7296 }, { "epoch": 0.3937725972694404, "grad_norm": 1.01127671171724, "learning_rate": 6.974033975278256e-06, "loss": 0.429, "step": 7297 }, { "epoch": 0.39382656089795476, "grad_norm": 1.0999096657764258, "learning_rate": 6.973313150602919e-06, "loss": 0.5535, "step": 7298 }, { "epoch": 0.39388052452646916, "grad_norm": 0.9157420891503849, "learning_rate": 6.972592283583075e-06, "loss": 0.4409, "step": 7299 }, { "epoch": 0.39393448815498355, "grad_norm": 0.6961270851459377, "learning_rate": 6.971871374239439e-06, "loss": 0.2959, "step": 7300 }, { "epoch": 0.3939884517834979, "grad_norm": 0.9839009173822624, "learning_rate": 6.971150422592735e-06, "loss": 0.5245, "step": 7301 }, { "epoch": 0.3940424154120123, "grad_norm": 0.9890475129561677, "learning_rate": 6.97042942866368e-06, "loss": 0.5016, "step": 7302 }, { "epoch": 0.3940963790405267, "grad_norm": 0.8942564408350887, "learning_rate": 6.969708392472997e-06, "loss": 0.3809, "step": 7303 }, { "epoch": 0.3941503426690411, "grad_norm": 1.1199570224069297, "learning_rate": 6.96898731404141e-06, "loss": 0.6085, "step": 7304 }, { "epoch": 0.39420430629755543, "grad_norm": 0.8810069279824563, "learning_rate": 6.968266193389645e-06, "loss": 0.4505, "step": 7305 }, { "epoch": 0.3942582699260698, "grad_norm": 0.9089628114739008, "learning_rate": 6.967545030538424e-06, "loss": 0.3548, "step": 7306 }, { "epoch": 0.3943122335545842, "grad_norm": 0.9842616244967018, "learning_rate": 6.966823825508475e-06, "loss": 0.4781, "step": 7307 }, { "epoch": 0.39436619718309857, "grad_norm": 0.8971557710401814, "learning_rate": 6.966102578320531e-06, "loss": 0.2965, "step": 7308 }, { "epoch": 0.39442016081161296, "grad_norm": 0.941848702688513, "learning_rate": 6.965381288995315e-06, "loss": 0.4286, "step": 7309 }, { "epoch": 0.39447412444012736, "grad_norm": 0.9010608924667852, "learning_rate": 6.964659957553559e-06, "loss": 0.4724, "step": 7310 }, { "epoch": 0.39452808806864176, "grad_norm": 0.9989821157248231, "learning_rate": 6.963938584015997e-06, "loss": 0.7211, "step": 7311 }, { "epoch": 0.3945820516971561, "grad_norm": 1.007520951789721, "learning_rate": 6.9632171684033614e-06, "loss": 0.499, "step": 7312 }, { "epoch": 0.3946360153256705, "grad_norm": 0.9393299682377289, "learning_rate": 6.962495710736386e-06, "loss": 0.3612, "step": 7313 }, { "epoch": 0.3946899789541849, "grad_norm": 1.115652712677839, "learning_rate": 6.961774211035805e-06, "loss": 0.4956, "step": 7314 }, { "epoch": 0.39474394258269924, "grad_norm": 0.9499080566972553, "learning_rate": 6.9610526693223565e-06, "loss": 0.341, "step": 7315 }, { "epoch": 0.39479790621121363, "grad_norm": 0.9507301207502706, "learning_rate": 6.960331085616778e-06, "loss": 0.5336, "step": 7316 }, { "epoch": 0.39485186983972803, "grad_norm": 1.1490695044847667, "learning_rate": 6.959609459939809e-06, "loss": 0.6681, "step": 7317 }, { "epoch": 0.39490583346824243, "grad_norm": 1.2738683892734768, "learning_rate": 6.9588877923121886e-06, "loss": 0.5697, "step": 7318 }, { "epoch": 0.39495979709675677, "grad_norm": 1.133284752242553, "learning_rate": 6.958166082754661e-06, "loss": 0.535, "step": 7319 }, { "epoch": 0.39501376072527117, "grad_norm": 0.8796959147060349, "learning_rate": 6.957444331287965e-06, "loss": 0.3798, "step": 7320 }, { "epoch": 0.39506772435378557, "grad_norm": 1.0908859427825204, "learning_rate": 6.9567225379328464e-06, "loss": 0.5771, "step": 7321 }, { "epoch": 0.3951216879822999, "grad_norm": 1.3565909705024344, "learning_rate": 6.956000702710051e-06, "loss": 0.602, "step": 7322 }, { "epoch": 0.3951756516108143, "grad_norm": 1.0733969199820161, "learning_rate": 6.955278825640323e-06, "loss": 0.4623, "step": 7323 }, { "epoch": 0.3952296152393287, "grad_norm": 1.168757410621451, "learning_rate": 6.954556906744412e-06, "loss": 0.5599, "step": 7324 }, { "epoch": 0.3952835788678431, "grad_norm": 0.9379821457022032, "learning_rate": 6.953834946043065e-06, "loss": 0.3355, "step": 7325 }, { "epoch": 0.39533754249635744, "grad_norm": 0.8635305358219658, "learning_rate": 6.953112943557035e-06, "loss": 0.4587, "step": 7326 }, { "epoch": 0.39539150612487184, "grad_norm": 1.175969633333356, "learning_rate": 6.952390899307066e-06, "loss": 0.487, "step": 7327 }, { "epoch": 0.39544546975338624, "grad_norm": 1.0520500182100254, "learning_rate": 6.951668813313919e-06, "loss": 0.5829, "step": 7328 }, { "epoch": 0.3954994333819006, "grad_norm": 1.0003832009783513, "learning_rate": 6.950946685598342e-06, "loss": 0.4606, "step": 7329 }, { "epoch": 0.395553397010415, "grad_norm": 0.8753920467546847, "learning_rate": 6.950224516181092e-06, "loss": 0.3909, "step": 7330 }, { "epoch": 0.3956073606389294, "grad_norm": 0.8331379090004266, "learning_rate": 6.949502305082923e-06, "loss": 0.4033, "step": 7331 }, { "epoch": 0.39566132426744377, "grad_norm": 1.145611283936693, "learning_rate": 6.948780052324594e-06, "loss": 0.5329, "step": 7332 }, { "epoch": 0.3957152878959581, "grad_norm": 0.987691911883217, "learning_rate": 6.948057757926863e-06, "loss": 0.4965, "step": 7333 }, { "epoch": 0.3957692515244725, "grad_norm": 1.0621368921858534, "learning_rate": 6.947335421910487e-06, "loss": 0.4641, "step": 7334 }, { "epoch": 0.3958232151529869, "grad_norm": 0.9370148205996476, "learning_rate": 6.9466130442962294e-06, "loss": 0.4491, "step": 7335 }, { "epoch": 0.39587717878150125, "grad_norm": 0.9166660282323302, "learning_rate": 6.945890625104852e-06, "loss": 0.4576, "step": 7336 }, { "epoch": 0.39593114241001565, "grad_norm": 0.8319097175821002, "learning_rate": 6.945168164357117e-06, "loss": 0.3466, "step": 7337 }, { "epoch": 0.39598510603853004, "grad_norm": 0.8842716313565426, "learning_rate": 6.944445662073787e-06, "loss": 0.4657, "step": 7338 }, { "epoch": 0.3960390696670444, "grad_norm": 1.2257993139512444, "learning_rate": 6.943723118275631e-06, "loss": 0.5387, "step": 7339 }, { "epoch": 0.3960930332955588, "grad_norm": 1.0719152401027197, "learning_rate": 6.943000532983413e-06, "loss": 0.5147, "step": 7340 }, { "epoch": 0.3961469969240732, "grad_norm": 1.0999414301822612, "learning_rate": 6.9422779062179015e-06, "loss": 0.5486, "step": 7341 }, { "epoch": 0.3962009605525876, "grad_norm": 1.0946976938022532, "learning_rate": 6.941555237999866e-06, "loss": 0.4823, "step": 7342 }, { "epoch": 0.3962549241811019, "grad_norm": 0.8518769356416481, "learning_rate": 6.940832528350077e-06, "loss": 0.3982, "step": 7343 }, { "epoch": 0.3963088878096163, "grad_norm": 0.8772455185616743, "learning_rate": 6.940109777289304e-06, "loss": 0.434, "step": 7344 }, { "epoch": 0.3963628514381307, "grad_norm": 0.9882598322136172, "learning_rate": 6.9393869848383214e-06, "loss": 0.4677, "step": 7345 }, { "epoch": 0.39641681506664506, "grad_norm": 0.9952991142534502, "learning_rate": 6.938664151017903e-06, "loss": 0.4449, "step": 7346 }, { "epoch": 0.39647077869515945, "grad_norm": 0.9814742887229432, "learning_rate": 6.937941275848823e-06, "loss": 0.4139, "step": 7347 }, { "epoch": 0.39652474232367385, "grad_norm": 1.155573138112056, "learning_rate": 6.937218359351857e-06, "loss": 0.5893, "step": 7348 }, { "epoch": 0.39657870595218825, "grad_norm": 0.9376832875631692, "learning_rate": 6.936495401547784e-06, "loss": 0.3567, "step": 7349 }, { "epoch": 0.3966326695807026, "grad_norm": 0.9704350101745409, "learning_rate": 6.935772402457383e-06, "loss": 0.4834, "step": 7350 }, { "epoch": 0.396686633209217, "grad_norm": 1.2282985235158532, "learning_rate": 6.9350493621014294e-06, "loss": 0.698, "step": 7351 }, { "epoch": 0.3967405968377314, "grad_norm": 1.2233005019209653, "learning_rate": 6.9343262805007086e-06, "loss": 0.492, "step": 7352 }, { "epoch": 0.3967945604662457, "grad_norm": 0.9335233370039169, "learning_rate": 6.933603157676001e-06, "loss": 0.4409, "step": 7353 }, { "epoch": 0.3968485240947601, "grad_norm": 0.9817664727059547, "learning_rate": 6.93287999364809e-06, "loss": 0.5589, "step": 7354 }, { "epoch": 0.3969024877232745, "grad_norm": 1.1679786123238152, "learning_rate": 6.932156788437761e-06, "loss": 0.4845, "step": 7355 }, { "epoch": 0.3969564513517889, "grad_norm": 1.0199058549748143, "learning_rate": 6.9314335420657985e-06, "loss": 0.4011, "step": 7356 }, { "epoch": 0.39701041498030326, "grad_norm": 0.9181191607718128, "learning_rate": 6.930710254552989e-06, "loss": 0.4672, "step": 7357 }, { "epoch": 0.39706437860881766, "grad_norm": 0.78227009410512, "learning_rate": 6.929986925920121e-06, "loss": 0.2682, "step": 7358 }, { "epoch": 0.39711834223733206, "grad_norm": 0.8952285701485744, "learning_rate": 6.929263556187985e-06, "loss": 0.4715, "step": 7359 }, { "epoch": 0.3971723058658464, "grad_norm": 0.9644531706227256, "learning_rate": 6.92854014537737e-06, "loss": 0.4303, "step": 7360 }, { "epoch": 0.3972262694943608, "grad_norm": 1.0519019190237167, "learning_rate": 6.927816693509067e-06, "loss": 0.569, "step": 7361 }, { "epoch": 0.3972802331228752, "grad_norm": 0.9866100255963471, "learning_rate": 6.92709320060387e-06, "loss": 0.3967, "step": 7362 }, { "epoch": 0.3973341967513896, "grad_norm": 1.0318057617500094, "learning_rate": 6.926369666682573e-06, "loss": 0.4257, "step": 7363 }, { "epoch": 0.39738816037990393, "grad_norm": 0.9264055715588506, "learning_rate": 6.925646091765969e-06, "loss": 0.3904, "step": 7364 }, { "epoch": 0.39744212400841833, "grad_norm": 1.1007287131951302, "learning_rate": 6.924922475874855e-06, "loss": 0.4957, "step": 7365 }, { "epoch": 0.3974960876369327, "grad_norm": 0.9599885795092227, "learning_rate": 6.924198819030032e-06, "loss": 0.444, "step": 7366 }, { "epoch": 0.39755005126544707, "grad_norm": 0.7760640523358238, "learning_rate": 6.923475121252295e-06, "loss": 0.3337, "step": 7367 }, { "epoch": 0.39760401489396147, "grad_norm": 1.0094978284850458, "learning_rate": 6.922751382562445e-06, "loss": 0.437, "step": 7368 }, { "epoch": 0.39765797852247586, "grad_norm": 0.9207834787686776, "learning_rate": 6.9220276029812825e-06, "loss": 0.484, "step": 7369 }, { "epoch": 0.3977119421509902, "grad_norm": 0.8557650507547608, "learning_rate": 6.9213037825296095e-06, "loss": 0.4155, "step": 7370 }, { "epoch": 0.3977659057795046, "grad_norm": 1.101239006968196, "learning_rate": 6.92057992122823e-06, "loss": 0.6449, "step": 7371 }, { "epoch": 0.397819869408019, "grad_norm": 1.171464565375767, "learning_rate": 6.919856019097948e-06, "loss": 0.5332, "step": 7372 }, { "epoch": 0.3978738330365334, "grad_norm": 1.020880702065345, "learning_rate": 6.919132076159569e-06, "loss": 0.4323, "step": 7373 }, { "epoch": 0.39792779666504774, "grad_norm": 1.0971454769292677, "learning_rate": 6.918408092433902e-06, "loss": 0.4816, "step": 7374 }, { "epoch": 0.39798176029356214, "grad_norm": 1.2714749410406507, "learning_rate": 6.917684067941751e-06, "loss": 0.6774, "step": 7375 }, { "epoch": 0.39803572392207653, "grad_norm": 1.1708828346481746, "learning_rate": 6.916960002703927e-06, "loss": 0.5032, "step": 7376 }, { "epoch": 0.3980896875505909, "grad_norm": 1.0156931386360397, "learning_rate": 6.916235896741243e-06, "loss": 0.428, "step": 7377 }, { "epoch": 0.3981436511791053, "grad_norm": 1.1102423042048066, "learning_rate": 6.9155117500745065e-06, "loss": 0.4033, "step": 7378 }, { "epoch": 0.39819761480761967, "grad_norm": 0.8929356063250794, "learning_rate": 6.914787562724533e-06, "loss": 0.3748, "step": 7379 }, { "epoch": 0.39825157843613407, "grad_norm": 0.9073345389577914, "learning_rate": 6.914063334712136e-06, "loss": 0.4249, "step": 7380 }, { "epoch": 0.3983055420646484, "grad_norm": 0.8023287248387875, "learning_rate": 6.913339066058129e-06, "loss": 0.3931, "step": 7381 }, { "epoch": 0.3983595056931628, "grad_norm": 1.288212264311498, "learning_rate": 6.912614756783329e-06, "loss": 0.7606, "step": 7382 }, { "epoch": 0.3984134693216772, "grad_norm": 1.0888621182649214, "learning_rate": 6.911890406908554e-06, "loss": 0.5458, "step": 7383 }, { "epoch": 0.39846743295019155, "grad_norm": 1.2057243393225157, "learning_rate": 6.9111660164546225e-06, "loss": 0.7101, "step": 7384 }, { "epoch": 0.39852139657870594, "grad_norm": 1.0782923834299436, "learning_rate": 6.910441585442353e-06, "loss": 0.4761, "step": 7385 }, { "epoch": 0.39857536020722034, "grad_norm": 1.0737868329773743, "learning_rate": 6.909717113892567e-06, "loss": 0.4911, "step": 7386 }, { "epoch": 0.39862932383573474, "grad_norm": 1.087461947949343, "learning_rate": 6.908992601826087e-06, "loss": 0.4479, "step": 7387 }, { "epoch": 0.3986832874642491, "grad_norm": 1.1554375256611862, "learning_rate": 6.9082680492637364e-06, "loss": 0.5771, "step": 7388 }, { "epoch": 0.3987372510927635, "grad_norm": 0.921951538945157, "learning_rate": 6.907543456226339e-06, "loss": 0.406, "step": 7389 }, { "epoch": 0.3987912147212779, "grad_norm": 1.1882786370681226, "learning_rate": 6.90681882273472e-06, "loss": 0.6643, "step": 7390 }, { "epoch": 0.3988451783497922, "grad_norm": 1.059806028332631, "learning_rate": 6.906094148809708e-06, "loss": 0.4533, "step": 7391 }, { "epoch": 0.3988991419783066, "grad_norm": 0.9999288894521997, "learning_rate": 6.905369434472127e-06, "loss": 0.4158, "step": 7392 }, { "epoch": 0.398953105606821, "grad_norm": 1.0450696152711116, "learning_rate": 6.904644679742809e-06, "loss": 0.4818, "step": 7393 }, { "epoch": 0.3990070692353354, "grad_norm": 1.2644153026381855, "learning_rate": 6.9039198846425836e-06, "loss": 0.4476, "step": 7394 }, { "epoch": 0.39906103286384975, "grad_norm": 0.8985276129022756, "learning_rate": 6.903195049192285e-06, "loss": 0.361, "step": 7395 }, { "epoch": 0.39911499649236415, "grad_norm": 0.7512154533590325, "learning_rate": 6.902470173412739e-06, "loss": 0.3129, "step": 7396 }, { "epoch": 0.39916896012087855, "grad_norm": 0.8918173668190926, "learning_rate": 6.901745257324786e-06, "loss": 0.3586, "step": 7397 }, { "epoch": 0.3992229237493929, "grad_norm": 1.2094021443982044, "learning_rate": 6.901020300949257e-06, "loss": 0.601, "step": 7398 }, { "epoch": 0.3992768873779073, "grad_norm": 1.23906409757255, "learning_rate": 6.900295304306987e-06, "loss": 0.5486, "step": 7399 }, { "epoch": 0.3993308510064217, "grad_norm": 1.2584505419173238, "learning_rate": 6.899570267418818e-06, "loss": 0.5424, "step": 7400 }, { "epoch": 0.3993848146349361, "grad_norm": 0.9360162736217269, "learning_rate": 6.898845190305583e-06, "loss": 0.4827, "step": 7401 }, { "epoch": 0.3994387782634504, "grad_norm": 1.046184058017311, "learning_rate": 6.898120072988127e-06, "loss": 0.4719, "step": 7402 }, { "epoch": 0.3994927418919648, "grad_norm": 0.8787748917289807, "learning_rate": 6.897394915487284e-06, "loss": 0.4037, "step": 7403 }, { "epoch": 0.3995467055204792, "grad_norm": 1.0198588765783025, "learning_rate": 6.8966697178239e-06, "loss": 0.519, "step": 7404 }, { "epoch": 0.39960066914899356, "grad_norm": 1.092223303801719, "learning_rate": 6.895944480018819e-06, "loss": 0.4816, "step": 7405 }, { "epoch": 0.39965463277750796, "grad_norm": 1.1756305919461671, "learning_rate": 6.895219202092882e-06, "loss": 0.6118, "step": 7406 }, { "epoch": 0.39970859640602235, "grad_norm": 1.1759752493577824, "learning_rate": 6.894493884066934e-06, "loss": 0.4735, "step": 7407 }, { "epoch": 0.3997625600345367, "grad_norm": 0.9398615015415274, "learning_rate": 6.893768525961825e-06, "loss": 0.3725, "step": 7408 }, { "epoch": 0.3998165236630511, "grad_norm": 1.0730535084503168, "learning_rate": 6.893043127798398e-06, "loss": 0.536, "step": 7409 }, { "epoch": 0.3998704872915655, "grad_norm": 0.9915702891297549, "learning_rate": 6.8923176895975065e-06, "loss": 0.5206, "step": 7410 }, { "epoch": 0.3999244509200799, "grad_norm": 1.1174807128643185, "learning_rate": 6.891592211379995e-06, "loss": 0.5537, "step": 7411 }, { "epoch": 0.39997841454859423, "grad_norm": 0.8705396537027286, "learning_rate": 6.890866693166719e-06, "loss": 0.3489, "step": 7412 }, { "epoch": 0.4000323781771086, "grad_norm": 0.9287044392606056, "learning_rate": 6.890141134978529e-06, "loss": 0.5821, "step": 7413 }, { "epoch": 0.400086341805623, "grad_norm": 1.0479318322055813, "learning_rate": 6.8894155368362766e-06, "loss": 0.3686, "step": 7414 }, { "epoch": 0.40014030543413737, "grad_norm": 0.799069691525588, "learning_rate": 6.888689898760819e-06, "loss": 0.4302, "step": 7415 }, { "epoch": 0.40019426906265176, "grad_norm": 0.8590001903091148, "learning_rate": 6.88796422077301e-06, "loss": 0.3657, "step": 7416 }, { "epoch": 0.40024823269116616, "grad_norm": 1.1169965896271177, "learning_rate": 6.887238502893707e-06, "loss": 0.5201, "step": 7417 }, { "epoch": 0.40030219631968056, "grad_norm": 1.0343760707293472, "learning_rate": 6.886512745143768e-06, "loss": 0.4912, "step": 7418 }, { "epoch": 0.4003561599481949, "grad_norm": 1.053534393407756, "learning_rate": 6.8857869475440505e-06, "loss": 0.5298, "step": 7419 }, { "epoch": 0.4004101235767093, "grad_norm": 1.0355645691339856, "learning_rate": 6.885061110115418e-06, "loss": 0.6798, "step": 7420 }, { "epoch": 0.4004640872052237, "grad_norm": 0.9813152412824858, "learning_rate": 6.884335232878728e-06, "loss": 0.456, "step": 7421 }, { "epoch": 0.40051805083373804, "grad_norm": 1.0180771388818712, "learning_rate": 6.883609315854846e-06, "loss": 0.5082, "step": 7422 }, { "epoch": 0.40057201446225243, "grad_norm": 1.0068640739746724, "learning_rate": 6.882883359064634e-06, "loss": 0.5287, "step": 7423 }, { "epoch": 0.40062597809076683, "grad_norm": 1.1611763703645002, "learning_rate": 6.882157362528955e-06, "loss": 0.506, "step": 7424 }, { "epoch": 0.40067994171928123, "grad_norm": 1.1598748801714478, "learning_rate": 6.881431326268678e-06, "loss": 0.5628, "step": 7425 }, { "epoch": 0.40073390534779557, "grad_norm": 1.057387444705196, "learning_rate": 6.88070525030467e-06, "loss": 0.4574, "step": 7426 }, { "epoch": 0.40078786897630997, "grad_norm": 1.236511696570724, "learning_rate": 6.879979134657797e-06, "loss": 0.6323, "step": 7427 }, { "epoch": 0.40084183260482437, "grad_norm": 1.0618581146782926, "learning_rate": 6.8792529793489294e-06, "loss": 0.6071, "step": 7428 }, { "epoch": 0.4008957962333387, "grad_norm": 0.9514098954990928, "learning_rate": 6.878526784398938e-06, "loss": 0.5519, "step": 7429 }, { "epoch": 0.4009497598618531, "grad_norm": 0.9513325328957637, "learning_rate": 6.877800549828694e-06, "loss": 0.3678, "step": 7430 }, { "epoch": 0.4010037234903675, "grad_norm": 1.2687720805135903, "learning_rate": 6.877074275659069e-06, "loss": 0.5549, "step": 7431 }, { "epoch": 0.4010576871188819, "grad_norm": 0.8961639237626571, "learning_rate": 6.876347961910938e-06, "loss": 0.3899, "step": 7432 }, { "epoch": 0.40111165074739624, "grad_norm": 0.8503756505926594, "learning_rate": 6.8756216086051766e-06, "loss": 0.4103, "step": 7433 }, { "epoch": 0.40116561437591064, "grad_norm": 0.9034626615202787, "learning_rate": 6.87489521576266e-06, "loss": 0.41, "step": 7434 }, { "epoch": 0.40121957800442504, "grad_norm": 1.2056837234997013, "learning_rate": 6.874168783404266e-06, "loss": 0.564, "step": 7435 }, { "epoch": 0.4012735416329394, "grad_norm": 1.1357757377519186, "learning_rate": 6.873442311550873e-06, "loss": 0.5908, "step": 7436 }, { "epoch": 0.4013275052614538, "grad_norm": 0.9313218853284131, "learning_rate": 6.872715800223359e-06, "loss": 0.3783, "step": 7437 }, { "epoch": 0.4013814688899682, "grad_norm": 1.155563053243819, "learning_rate": 6.871989249442605e-06, "loss": 0.5926, "step": 7438 }, { "epoch": 0.4014354325184825, "grad_norm": 0.8996284899361667, "learning_rate": 6.871262659229496e-06, "loss": 0.4025, "step": 7439 }, { "epoch": 0.4014893961469969, "grad_norm": 0.8845793529901812, "learning_rate": 6.870536029604912e-06, "loss": 0.407, "step": 7440 }, { "epoch": 0.4015433597755113, "grad_norm": 0.8223896169049989, "learning_rate": 6.8698093605897365e-06, "loss": 0.4421, "step": 7441 }, { "epoch": 0.4015973234040257, "grad_norm": 0.9414673973948463, "learning_rate": 6.869082652204857e-06, "loss": 0.408, "step": 7442 }, { "epoch": 0.40165128703254005, "grad_norm": 0.9085305304075144, "learning_rate": 6.868355904471158e-06, "loss": 0.4668, "step": 7443 }, { "epoch": 0.40170525066105445, "grad_norm": 1.0312921882842883, "learning_rate": 6.867629117409529e-06, "loss": 0.4487, "step": 7444 }, { "epoch": 0.40175921428956884, "grad_norm": 1.200632082204616, "learning_rate": 6.866902291040856e-06, "loss": 0.5366, "step": 7445 }, { "epoch": 0.4018131779180832, "grad_norm": 0.9881660640315135, "learning_rate": 6.86617542538603e-06, "loss": 0.3806, "step": 7446 }, { "epoch": 0.4018671415465976, "grad_norm": 0.929625133655214, "learning_rate": 6.865448520465943e-06, "loss": 0.3999, "step": 7447 }, { "epoch": 0.401921105175112, "grad_norm": 1.1174250382244793, "learning_rate": 6.8647215763014845e-06, "loss": 0.6896, "step": 7448 }, { "epoch": 0.4019750688036264, "grad_norm": 1.0038643175666273, "learning_rate": 6.8639945929135485e-06, "loss": 0.3346, "step": 7449 }, { "epoch": 0.4020290324321407, "grad_norm": 1.1563592668375249, "learning_rate": 6.863267570323032e-06, "loss": 0.5085, "step": 7450 }, { "epoch": 0.4020829960606551, "grad_norm": 1.1116327283424083, "learning_rate": 6.862540508550827e-06, "loss": 0.564, "step": 7451 }, { "epoch": 0.4021369596891695, "grad_norm": 1.1603719437970714, "learning_rate": 6.86181340761783e-06, "loss": 0.568, "step": 7452 }, { "epoch": 0.40219092331768386, "grad_norm": 1.058065333800366, "learning_rate": 6.861086267544942e-06, "loss": 0.5885, "step": 7453 }, { "epoch": 0.40224488694619825, "grad_norm": 1.0069163159700363, "learning_rate": 6.860359088353057e-06, "loss": 0.5453, "step": 7454 }, { "epoch": 0.40229885057471265, "grad_norm": 0.9385666636293563, "learning_rate": 6.859631870063077e-06, "loss": 0.6166, "step": 7455 }, { "epoch": 0.40235281420322705, "grad_norm": 1.3012322873709101, "learning_rate": 6.858904612695906e-06, "loss": 0.5792, "step": 7456 }, { "epoch": 0.4024067778317414, "grad_norm": 0.9776935961570965, "learning_rate": 6.858177316272441e-06, "loss": 0.3797, "step": 7457 }, { "epoch": 0.4024607414602558, "grad_norm": 1.1118260821267112, "learning_rate": 6.857449980813589e-06, "loss": 0.6064, "step": 7458 }, { "epoch": 0.4025147050887702, "grad_norm": 1.3267568383972237, "learning_rate": 6.856722606340251e-06, "loss": 0.6381, "step": 7459 }, { "epoch": 0.40256866871728453, "grad_norm": 1.04924190290178, "learning_rate": 6.855995192873336e-06, "loss": 0.4451, "step": 7460 }, { "epoch": 0.4026226323457989, "grad_norm": 1.0624287744130938, "learning_rate": 6.855267740433748e-06, "loss": 0.5318, "step": 7461 }, { "epoch": 0.4026765959743133, "grad_norm": 0.9697913526782592, "learning_rate": 6.854540249042397e-06, "loss": 0.4564, "step": 7462 }, { "epoch": 0.4027305596028277, "grad_norm": 0.8758203230983929, "learning_rate": 6.85381271872019e-06, "loss": 0.4214, "step": 7463 }, { "epoch": 0.40278452323134206, "grad_norm": 1.0358890284903266, "learning_rate": 6.853085149488038e-06, "loss": 0.4522, "step": 7464 }, { "epoch": 0.40283848685985646, "grad_norm": 1.0074240561037584, "learning_rate": 6.852357541366851e-06, "loss": 0.5253, "step": 7465 }, { "epoch": 0.40289245048837086, "grad_norm": 1.0884130227220628, "learning_rate": 6.851629894377542e-06, "loss": 0.4924, "step": 7466 }, { "epoch": 0.4029464141168852, "grad_norm": 1.1759556250156717, "learning_rate": 6.850902208541024e-06, "loss": 0.4528, "step": 7467 }, { "epoch": 0.4030003777453996, "grad_norm": 0.880225177163445, "learning_rate": 6.850174483878211e-06, "loss": 0.4197, "step": 7468 }, { "epoch": 0.403054341373914, "grad_norm": 1.0382024707070134, "learning_rate": 6.849446720410019e-06, "loss": 0.4766, "step": 7469 }, { "epoch": 0.4031083050024284, "grad_norm": 0.953585543028039, "learning_rate": 6.8487189181573656e-06, "loss": 0.4752, "step": 7470 }, { "epoch": 0.40316226863094273, "grad_norm": 1.0264949522896405, "learning_rate": 6.847991077141168e-06, "loss": 0.5286, "step": 7471 }, { "epoch": 0.40321623225945713, "grad_norm": 1.0825797290921273, "learning_rate": 6.847263197382345e-06, "loss": 0.4685, "step": 7472 }, { "epoch": 0.4032701958879715, "grad_norm": 1.0800804442899046, "learning_rate": 6.846535278901815e-06, "loss": 0.4152, "step": 7473 }, { "epoch": 0.40332415951648587, "grad_norm": 0.9680618608713457, "learning_rate": 6.845807321720503e-06, "loss": 0.6097, "step": 7474 }, { "epoch": 0.40337812314500027, "grad_norm": 0.8809066812618978, "learning_rate": 6.845079325859326e-06, "loss": 0.3879, "step": 7475 }, { "epoch": 0.40343208677351466, "grad_norm": 0.9438149331246248, "learning_rate": 6.844351291339211e-06, "loss": 0.4552, "step": 7476 }, { "epoch": 0.403486050402029, "grad_norm": 1.1426728305193914, "learning_rate": 6.843623218181081e-06, "loss": 0.4203, "step": 7477 }, { "epoch": 0.4035400140305434, "grad_norm": 0.9797420152753918, "learning_rate": 6.8428951064058634e-06, "loss": 0.4744, "step": 7478 }, { "epoch": 0.4035939776590578, "grad_norm": 1.217060320735071, "learning_rate": 6.842166956034483e-06, "loss": 0.6055, "step": 7479 }, { "epoch": 0.4036479412875722, "grad_norm": 0.9245265391375899, "learning_rate": 6.841438767087867e-06, "loss": 0.4398, "step": 7480 }, { "epoch": 0.40370190491608654, "grad_norm": 0.965177937195023, "learning_rate": 6.840710539586948e-06, "loss": 0.4907, "step": 7481 }, { "epoch": 0.40375586854460094, "grad_norm": 1.2629263114110907, "learning_rate": 6.839982273552651e-06, "loss": 0.6956, "step": 7482 }, { "epoch": 0.40380983217311534, "grad_norm": 1.1470679668772006, "learning_rate": 6.83925396900591e-06, "loss": 0.4389, "step": 7483 }, { "epoch": 0.4038637958016297, "grad_norm": 0.9178001033842014, "learning_rate": 6.838525625967657e-06, "loss": 0.4685, "step": 7484 }, { "epoch": 0.4039177594301441, "grad_norm": 1.0222283180441178, "learning_rate": 6.837797244458826e-06, "loss": 0.5736, "step": 7485 }, { "epoch": 0.40397172305865847, "grad_norm": 0.7666455893795111, "learning_rate": 6.837068824500348e-06, "loss": 0.2732, "step": 7486 }, { "epoch": 0.40402568668717287, "grad_norm": 1.0843833044925837, "learning_rate": 6.836340366113163e-06, "loss": 0.435, "step": 7487 }, { "epoch": 0.4040796503156872, "grad_norm": 1.0063179762407373, "learning_rate": 6.835611869318206e-06, "loss": 0.5379, "step": 7488 }, { "epoch": 0.4041336139442016, "grad_norm": 0.9843475557023736, "learning_rate": 6.834883334136413e-06, "loss": 0.5725, "step": 7489 }, { "epoch": 0.404187577572716, "grad_norm": 1.0185877060506705, "learning_rate": 6.834154760588723e-06, "loss": 0.5264, "step": 7490 }, { "epoch": 0.40424154120123035, "grad_norm": 0.9590359741842849, "learning_rate": 6.83342614869608e-06, "loss": 0.4472, "step": 7491 }, { "epoch": 0.40429550482974475, "grad_norm": 1.183126844254113, "learning_rate": 6.832697498479419e-06, "loss": 0.7068, "step": 7492 }, { "epoch": 0.40434946845825914, "grad_norm": 1.1307991526578705, "learning_rate": 6.831968809959687e-06, "loss": 0.5922, "step": 7493 }, { "epoch": 0.40440343208677354, "grad_norm": 1.0458904175067592, "learning_rate": 6.831240083157827e-06, "loss": 0.4676, "step": 7494 }, { "epoch": 0.4044573957152879, "grad_norm": 0.9768321621729886, "learning_rate": 6.83051131809478e-06, "loss": 0.4164, "step": 7495 }, { "epoch": 0.4045113593438023, "grad_norm": 1.0137345756288594, "learning_rate": 6.829782514791494e-06, "loss": 0.4322, "step": 7496 }, { "epoch": 0.4045653229723167, "grad_norm": 1.110675615134294, "learning_rate": 6.829053673268914e-06, "loss": 0.5366, "step": 7497 }, { "epoch": 0.404619286600831, "grad_norm": 0.8597882266362329, "learning_rate": 6.8283247935479905e-06, "loss": 0.3379, "step": 7498 }, { "epoch": 0.4046732502293454, "grad_norm": 1.3741169040876424, "learning_rate": 6.827595875649668e-06, "loss": 0.5546, "step": 7499 }, { "epoch": 0.4047272138578598, "grad_norm": 1.219943334540381, "learning_rate": 6.8268669195949e-06, "loss": 0.4878, "step": 7500 }, { "epoch": 0.4047272138578598, "eval_loss": 0.5583013892173767, "eval_runtime": 165.7815, "eval_samples_per_second": 20.744, "eval_steps_per_second": 0.869, "step": 7500 }, { "epoch": 0.4047811774863742, "grad_norm": 1.0552853424627657, "learning_rate": 6.826137925404635e-06, "loss": 0.6024, "step": 7501 }, { "epoch": 0.40483514111488855, "grad_norm": 0.9131848846130454, "learning_rate": 6.825408893099828e-06, "loss": 0.2693, "step": 7502 }, { "epoch": 0.40488910474340295, "grad_norm": 1.140690442601447, "learning_rate": 6.824679822701428e-06, "loss": 0.5655, "step": 7503 }, { "epoch": 0.40494306837191735, "grad_norm": 0.8196183357076992, "learning_rate": 6.823950714230393e-06, "loss": 0.4239, "step": 7504 }, { "epoch": 0.4049970320004317, "grad_norm": 0.9605209575037147, "learning_rate": 6.823221567707677e-06, "loss": 0.4902, "step": 7505 }, { "epoch": 0.4050509956289461, "grad_norm": 0.9419530362151369, "learning_rate": 6.822492383154235e-06, "loss": 0.346, "step": 7506 }, { "epoch": 0.4051049592574605, "grad_norm": 0.9590137525125092, "learning_rate": 6.821763160591024e-06, "loss": 0.576, "step": 7507 }, { "epoch": 0.4051589228859748, "grad_norm": 0.8877022729819032, "learning_rate": 6.821033900039008e-06, "loss": 0.4281, "step": 7508 }, { "epoch": 0.4052128865144892, "grad_norm": 0.8905284049940813, "learning_rate": 6.8203046015191395e-06, "loss": 0.4098, "step": 7509 }, { "epoch": 0.4052668501430036, "grad_norm": 1.2081350397547608, "learning_rate": 6.819575265052384e-06, "loss": 0.6063, "step": 7510 }, { "epoch": 0.405320813771518, "grad_norm": 1.294975696563038, "learning_rate": 6.818845890659702e-06, "loss": 0.5337, "step": 7511 }, { "epoch": 0.40537477740003236, "grad_norm": 0.7946295939688212, "learning_rate": 6.818116478362058e-06, "loss": 0.2971, "step": 7512 }, { "epoch": 0.40542874102854676, "grad_norm": 1.1609785129977837, "learning_rate": 6.817387028180412e-06, "loss": 0.6851, "step": 7513 }, { "epoch": 0.40548270465706115, "grad_norm": 0.773158934580305, "learning_rate": 6.816657540135733e-06, "loss": 0.3672, "step": 7514 }, { "epoch": 0.4055366682855755, "grad_norm": 0.9595224472036586, "learning_rate": 6.815928014248986e-06, "loss": 0.5144, "step": 7515 }, { "epoch": 0.4055906319140899, "grad_norm": 1.031984536946006, "learning_rate": 6.8151984505411385e-06, "loss": 0.4525, "step": 7516 }, { "epoch": 0.4056445955426043, "grad_norm": 1.3337563226311617, "learning_rate": 6.814468849033158e-06, "loss": 0.7379, "step": 7517 }, { "epoch": 0.4056985591711187, "grad_norm": 1.1204448930587543, "learning_rate": 6.813739209746014e-06, "loss": 0.4586, "step": 7518 }, { "epoch": 0.40575252279963303, "grad_norm": 1.081771678850783, "learning_rate": 6.8130095327006785e-06, "loss": 0.4894, "step": 7519 }, { "epoch": 0.40580648642814743, "grad_norm": 0.915494290361841, "learning_rate": 6.812279817918122e-06, "loss": 0.3839, "step": 7520 }, { "epoch": 0.4058604500566618, "grad_norm": 1.094493672970002, "learning_rate": 6.8115500654193175e-06, "loss": 0.6199, "step": 7521 }, { "epoch": 0.40591441368517617, "grad_norm": 1.0194901174355704, "learning_rate": 6.81082027522524e-06, "loss": 0.491, "step": 7522 }, { "epoch": 0.40596837731369056, "grad_norm": 1.1279999869250843, "learning_rate": 6.810090447356863e-06, "loss": 0.5424, "step": 7523 }, { "epoch": 0.40602234094220496, "grad_norm": 0.9787689440138151, "learning_rate": 6.809360581835161e-06, "loss": 0.304, "step": 7524 }, { "epoch": 0.40607630457071936, "grad_norm": 0.9254681192031328, "learning_rate": 6.8086306786811145e-06, "loss": 0.3386, "step": 7525 }, { "epoch": 0.4061302681992337, "grad_norm": 1.0553603089785617, "learning_rate": 6.8079007379157e-06, "loss": 0.4456, "step": 7526 }, { "epoch": 0.4061842318277481, "grad_norm": 1.0933990115909586, "learning_rate": 6.807170759559896e-06, "loss": 0.4727, "step": 7527 }, { "epoch": 0.4062381954562625, "grad_norm": 1.172876062336393, "learning_rate": 6.806440743634683e-06, "loss": 0.5729, "step": 7528 }, { "epoch": 0.40629215908477684, "grad_norm": 1.2760309340383158, "learning_rate": 6.805710690161046e-06, "loss": 0.684, "step": 7529 }, { "epoch": 0.40634612271329124, "grad_norm": 1.1133473582942983, "learning_rate": 6.804980599159963e-06, "loss": 0.5705, "step": 7530 }, { "epoch": 0.40640008634180563, "grad_norm": 0.991127554625731, "learning_rate": 6.804250470652417e-06, "loss": 0.4539, "step": 7531 }, { "epoch": 0.40645404997032003, "grad_norm": 0.9841668022017447, "learning_rate": 6.803520304659396e-06, "loss": 0.3919, "step": 7532 }, { "epoch": 0.4065080135988344, "grad_norm": 1.065858408731654, "learning_rate": 6.802790101201885e-06, "loss": 0.5211, "step": 7533 }, { "epoch": 0.40656197722734877, "grad_norm": 1.054092506582284, "learning_rate": 6.802059860300869e-06, "loss": 0.4725, "step": 7534 }, { "epoch": 0.40661594085586317, "grad_norm": 1.39948928729766, "learning_rate": 6.801329581977338e-06, "loss": 0.5703, "step": 7535 }, { "epoch": 0.4066699044843775, "grad_norm": 1.1653015386019334, "learning_rate": 6.800599266252279e-06, "loss": 0.6272, "step": 7536 }, { "epoch": 0.4067238681128919, "grad_norm": 1.0273994479014035, "learning_rate": 6.799868913146683e-06, "loss": 0.5823, "step": 7537 }, { "epoch": 0.4067778317414063, "grad_norm": 0.9892809482026573, "learning_rate": 6.799138522681542e-06, "loss": 0.429, "step": 7538 }, { "epoch": 0.40683179536992065, "grad_norm": 1.2576271613221424, "learning_rate": 6.798408094877847e-06, "loss": 0.7948, "step": 7539 }, { "epoch": 0.40688575899843504, "grad_norm": 1.114618605925581, "learning_rate": 6.797677629756591e-06, "loss": 0.4514, "step": 7540 }, { "epoch": 0.40693972262694944, "grad_norm": 0.9140679929472066, "learning_rate": 6.796947127338768e-06, "loss": 0.4563, "step": 7541 }, { "epoch": 0.40699368625546384, "grad_norm": 1.0625122294154816, "learning_rate": 6.796216587645374e-06, "loss": 0.5374, "step": 7542 }, { "epoch": 0.4070476498839782, "grad_norm": 1.3134810360989808, "learning_rate": 6.7954860106974065e-06, "loss": 0.6455, "step": 7543 }, { "epoch": 0.4071016135124926, "grad_norm": 1.0996546381645484, "learning_rate": 6.794755396515862e-06, "loss": 0.4961, "step": 7544 }, { "epoch": 0.407155577141007, "grad_norm": 1.3112552158266912, "learning_rate": 6.794024745121738e-06, "loss": 0.5662, "step": 7545 }, { "epoch": 0.4072095407695213, "grad_norm": 0.959326163198918, "learning_rate": 6.7932940565360375e-06, "loss": 0.3728, "step": 7546 }, { "epoch": 0.4072635043980357, "grad_norm": 0.9186566530650735, "learning_rate": 6.792563330779757e-06, "loss": 0.5392, "step": 7547 }, { "epoch": 0.4073174680265501, "grad_norm": 0.992646347534432, "learning_rate": 6.791832567873902e-06, "loss": 0.4142, "step": 7548 }, { "epoch": 0.4073714316550645, "grad_norm": 0.8873433798106148, "learning_rate": 6.791101767839472e-06, "loss": 0.4271, "step": 7549 }, { "epoch": 0.40742539528357885, "grad_norm": 0.9594934973262861, "learning_rate": 6.7903709306974745e-06, "loss": 0.515, "step": 7550 }, { "epoch": 0.40747935891209325, "grad_norm": 1.043207877365533, "learning_rate": 6.789640056468911e-06, "loss": 0.3814, "step": 7551 }, { "epoch": 0.40753332254060765, "grad_norm": 0.922967190895919, "learning_rate": 6.7889091451747894e-06, "loss": 0.4577, "step": 7552 }, { "epoch": 0.407587286169122, "grad_norm": 0.8386069079612846, "learning_rate": 6.788178196836118e-06, "loss": 0.4068, "step": 7553 }, { "epoch": 0.4076412497976364, "grad_norm": 1.1453995378585662, "learning_rate": 6.787447211473902e-06, "loss": 0.4606, "step": 7554 }, { "epoch": 0.4076952134261508, "grad_norm": 1.0152585487905392, "learning_rate": 6.786716189109153e-06, "loss": 0.4023, "step": 7555 }, { "epoch": 0.4077491770546652, "grad_norm": 1.1243032267600435, "learning_rate": 6.785985129762881e-06, "loss": 0.5693, "step": 7556 }, { "epoch": 0.4078031406831795, "grad_norm": 0.6307953743109266, "learning_rate": 6.785254033456096e-06, "loss": 0.2699, "step": 7557 }, { "epoch": 0.4078571043116939, "grad_norm": 1.067236559805759, "learning_rate": 6.784522900209811e-06, "loss": 0.4552, "step": 7558 }, { "epoch": 0.4079110679402083, "grad_norm": 1.086559953574209, "learning_rate": 6.7837917300450414e-06, "loss": 0.3796, "step": 7559 }, { "epoch": 0.40796503156872266, "grad_norm": 0.9973013518950935, "learning_rate": 6.7830605229828005e-06, "loss": 0.4176, "step": 7560 }, { "epoch": 0.40801899519723706, "grad_norm": 1.1122066223938112, "learning_rate": 6.782329279044103e-06, "loss": 0.5679, "step": 7561 }, { "epoch": 0.40807295882575145, "grad_norm": 0.840760887229507, "learning_rate": 6.7815979982499655e-06, "loss": 0.3165, "step": 7562 }, { "epoch": 0.40812692245426585, "grad_norm": 0.9728957970376029, "learning_rate": 6.780866680621407e-06, "loss": 0.4235, "step": 7563 }, { "epoch": 0.4081808860827802, "grad_norm": 1.187367273392454, "learning_rate": 6.780135326179447e-06, "loss": 0.5339, "step": 7564 }, { "epoch": 0.4082348497112946, "grad_norm": 0.9677355542074414, "learning_rate": 6.779403934945103e-06, "loss": 0.4031, "step": 7565 }, { "epoch": 0.408288813339809, "grad_norm": 1.0238675966647772, "learning_rate": 6.778672506939398e-06, "loss": 0.5097, "step": 7566 }, { "epoch": 0.40834277696832333, "grad_norm": 0.912666045631566, "learning_rate": 6.7779410421833525e-06, "loss": 0.4783, "step": 7567 }, { "epoch": 0.4083967405968377, "grad_norm": 0.976084008734294, "learning_rate": 6.77720954069799e-06, "loss": 0.444, "step": 7568 }, { "epoch": 0.4084507042253521, "grad_norm": 0.9435906068948745, "learning_rate": 6.776478002504335e-06, "loss": 0.396, "step": 7569 }, { "epoch": 0.4085046678538665, "grad_norm": 1.005116291594624, "learning_rate": 6.775746427623413e-06, "loss": 0.4793, "step": 7570 }, { "epoch": 0.40855863148238086, "grad_norm": 0.8904701157419485, "learning_rate": 6.77501481607625e-06, "loss": 0.4546, "step": 7571 }, { "epoch": 0.40861259511089526, "grad_norm": 1.168635157384942, "learning_rate": 6.774283167883871e-06, "loss": 0.5257, "step": 7572 }, { "epoch": 0.40866655873940966, "grad_norm": 0.9131103124197575, "learning_rate": 6.773551483067308e-06, "loss": 0.4541, "step": 7573 }, { "epoch": 0.408720522367924, "grad_norm": 1.0470166082191037, "learning_rate": 6.772819761647588e-06, "loss": 0.425, "step": 7574 }, { "epoch": 0.4087744859964384, "grad_norm": 1.1555396530150739, "learning_rate": 6.772088003645741e-06, "loss": 0.5683, "step": 7575 }, { "epoch": 0.4088284496249528, "grad_norm": 1.0130522384780438, "learning_rate": 6.771356209082801e-06, "loss": 0.507, "step": 7576 }, { "epoch": 0.40888241325346714, "grad_norm": 1.1290576311986695, "learning_rate": 6.770624377979798e-06, "loss": 0.5615, "step": 7577 }, { "epoch": 0.40893637688198153, "grad_norm": 1.1804053488068822, "learning_rate": 6.7698925103577676e-06, "loss": 0.531, "step": 7578 }, { "epoch": 0.40899034051049593, "grad_norm": 1.1233449814464416, "learning_rate": 6.769160606237741e-06, "loss": 0.6211, "step": 7579 }, { "epoch": 0.40904430413901033, "grad_norm": 0.8064305283203516, "learning_rate": 6.768428665640758e-06, "loss": 0.3043, "step": 7580 }, { "epoch": 0.40909826776752467, "grad_norm": 0.9317865720600396, "learning_rate": 6.767696688587854e-06, "loss": 0.6461, "step": 7581 }, { "epoch": 0.40915223139603907, "grad_norm": 1.0756165890056868, "learning_rate": 6.766964675100065e-06, "loss": 0.4348, "step": 7582 }, { "epoch": 0.40920619502455347, "grad_norm": 0.9861352697882854, "learning_rate": 6.766232625198431e-06, "loss": 0.4023, "step": 7583 }, { "epoch": 0.4092601586530678, "grad_norm": 0.9577546817200113, "learning_rate": 6.765500538903993e-06, "loss": 0.3159, "step": 7584 }, { "epoch": 0.4093141222815822, "grad_norm": 0.9234481047849848, "learning_rate": 6.76476841623779e-06, "loss": 0.5196, "step": 7585 }, { "epoch": 0.4093680859100966, "grad_norm": 1.0339590924349675, "learning_rate": 6.764036257220865e-06, "loss": 0.4502, "step": 7586 }, { "epoch": 0.409422049538611, "grad_norm": 0.9744250438024435, "learning_rate": 6.76330406187426e-06, "loss": 0.4142, "step": 7587 }, { "epoch": 0.40947601316712534, "grad_norm": 0.9067440202221327, "learning_rate": 6.7625718302190215e-06, "loss": 0.423, "step": 7588 }, { "epoch": 0.40952997679563974, "grad_norm": 1.0331148539466044, "learning_rate": 6.7618395622761915e-06, "loss": 0.4945, "step": 7589 }, { "epoch": 0.40958394042415414, "grad_norm": 0.9843819692798136, "learning_rate": 6.761107258066818e-06, "loss": 0.4529, "step": 7590 }, { "epoch": 0.4096379040526685, "grad_norm": 0.9131506729743512, "learning_rate": 6.760374917611948e-06, "loss": 0.4373, "step": 7591 }, { "epoch": 0.4096918676811829, "grad_norm": 1.033680759540545, "learning_rate": 6.759642540932628e-06, "loss": 0.404, "step": 7592 }, { "epoch": 0.4097458313096973, "grad_norm": 1.1867959390970622, "learning_rate": 6.758910128049909e-06, "loss": 0.6558, "step": 7593 }, { "epoch": 0.40979979493821167, "grad_norm": 0.8973494477708069, "learning_rate": 6.758177678984842e-06, "loss": 0.3911, "step": 7594 }, { "epoch": 0.409853758566726, "grad_norm": 1.2041954804215937, "learning_rate": 6.757445193758476e-06, "loss": 0.5895, "step": 7595 }, { "epoch": 0.4099077221952404, "grad_norm": 1.0594497875463043, "learning_rate": 6.756712672391865e-06, "loss": 0.5099, "step": 7596 }, { "epoch": 0.4099616858237548, "grad_norm": 1.2560997138109604, "learning_rate": 6.7559801149060625e-06, "loss": 0.6206, "step": 7597 }, { "epoch": 0.41001564945226915, "grad_norm": 0.9994304033489262, "learning_rate": 6.755247521322122e-06, "loss": 0.546, "step": 7598 }, { "epoch": 0.41006961308078355, "grad_norm": 1.1646336927008052, "learning_rate": 6.754514891661099e-06, "loss": 0.5776, "step": 7599 }, { "epoch": 0.41012357670929794, "grad_norm": 1.1702044695655607, "learning_rate": 6.753782225944051e-06, "loss": 0.6134, "step": 7600 }, { "epoch": 0.41017754033781234, "grad_norm": 0.8865512008750267, "learning_rate": 6.7530495241920334e-06, "loss": 0.3912, "step": 7601 }, { "epoch": 0.4102315039663267, "grad_norm": 1.178952043974545, "learning_rate": 6.7523167864261095e-06, "loss": 0.5268, "step": 7602 }, { "epoch": 0.4102854675948411, "grad_norm": 1.1564745680355186, "learning_rate": 6.751584012667334e-06, "loss": 0.5357, "step": 7603 }, { "epoch": 0.4103394312233555, "grad_norm": 1.008678514949802, "learning_rate": 6.750851202936769e-06, "loss": 0.443, "step": 7604 }, { "epoch": 0.4103933948518698, "grad_norm": 1.073093196636254, "learning_rate": 6.750118357255477e-06, "loss": 0.5584, "step": 7605 }, { "epoch": 0.4104473584803842, "grad_norm": 1.2173997063428343, "learning_rate": 6.74938547564452e-06, "loss": 0.4574, "step": 7606 }, { "epoch": 0.4105013221088986, "grad_norm": 1.1077892642005789, "learning_rate": 6.7486525581249614e-06, "loss": 0.548, "step": 7607 }, { "epoch": 0.41055528573741296, "grad_norm": 1.2658895522079476, "learning_rate": 6.7479196047178695e-06, "loss": 0.6846, "step": 7608 }, { "epoch": 0.41060924936592735, "grad_norm": 1.246820772395013, "learning_rate": 6.747186615444304e-06, "loss": 0.5081, "step": 7609 }, { "epoch": 0.41066321299444175, "grad_norm": 1.096987552047168, "learning_rate": 6.746453590325336e-06, "loss": 0.5769, "step": 7610 }, { "epoch": 0.41071717662295615, "grad_norm": 1.0653859903713836, "learning_rate": 6.745720529382033e-06, "loss": 0.5094, "step": 7611 }, { "epoch": 0.4107711402514705, "grad_norm": 1.0272659153562347, "learning_rate": 6.744987432635463e-06, "loss": 0.5134, "step": 7612 }, { "epoch": 0.4108251038799849, "grad_norm": 1.1667007636446742, "learning_rate": 6.7442543001066954e-06, "loss": 0.4401, "step": 7613 }, { "epoch": 0.4108790675084993, "grad_norm": 0.971640466841577, "learning_rate": 6.743521131816801e-06, "loss": 0.3558, "step": 7614 }, { "epoch": 0.4109330311370136, "grad_norm": 1.1786801765716473, "learning_rate": 6.742787927786857e-06, "loss": 0.6721, "step": 7615 }, { "epoch": 0.410986994765528, "grad_norm": 1.103240351659209, "learning_rate": 6.742054688037929e-06, "loss": 0.4973, "step": 7616 }, { "epoch": 0.4110409583940424, "grad_norm": 1.122231384761107, "learning_rate": 6.741321412591094e-06, "loss": 0.6702, "step": 7617 }, { "epoch": 0.4110949220225568, "grad_norm": 1.1076903084176246, "learning_rate": 6.740588101467428e-06, "loss": 0.5632, "step": 7618 }, { "epoch": 0.41114888565107116, "grad_norm": 1.0619176271266038, "learning_rate": 6.739854754688006e-06, "loss": 0.567, "step": 7619 }, { "epoch": 0.41120284927958556, "grad_norm": 0.9297136655990516, "learning_rate": 6.7391213722739055e-06, "loss": 0.4369, "step": 7620 }, { "epoch": 0.41125681290809996, "grad_norm": 1.0258217704285317, "learning_rate": 6.738387954246206e-06, "loss": 0.6, "step": 7621 }, { "epoch": 0.4113107765366143, "grad_norm": 0.9677333729556463, "learning_rate": 6.737654500625985e-06, "loss": 0.5075, "step": 7622 }, { "epoch": 0.4113647401651287, "grad_norm": 1.0784996383055916, "learning_rate": 6.736921011434323e-06, "loss": 0.3545, "step": 7623 }, { "epoch": 0.4114187037936431, "grad_norm": 1.103381403928868, "learning_rate": 6.736187486692302e-06, "loss": 0.6194, "step": 7624 }, { "epoch": 0.4114726674221575, "grad_norm": 1.1527952272591555, "learning_rate": 6.735453926421001e-06, "loss": 0.5563, "step": 7625 }, { "epoch": 0.41152663105067183, "grad_norm": 0.9284164357452473, "learning_rate": 6.734720330641509e-06, "loss": 0.3699, "step": 7626 }, { "epoch": 0.41158059467918623, "grad_norm": 1.04408371727465, "learning_rate": 6.733986699374906e-06, "loss": 0.3747, "step": 7627 }, { "epoch": 0.4116345583077006, "grad_norm": 0.9516816500079481, "learning_rate": 6.733253032642277e-06, "loss": 0.3226, "step": 7628 }, { "epoch": 0.41168852193621497, "grad_norm": 1.1020687602089865, "learning_rate": 6.732519330464711e-06, "loss": 0.513, "step": 7629 }, { "epoch": 0.41174248556472937, "grad_norm": 1.1504802521293893, "learning_rate": 6.731785592863293e-06, "loss": 0.528, "step": 7630 }, { "epoch": 0.41179644919324376, "grad_norm": 0.9661424828438891, "learning_rate": 6.731051819859115e-06, "loss": 0.4744, "step": 7631 }, { "epoch": 0.41185041282175816, "grad_norm": 1.0654390011746704, "learning_rate": 6.730318011473261e-06, "loss": 0.5528, "step": 7632 }, { "epoch": 0.4119043764502725, "grad_norm": 0.7943126007116983, "learning_rate": 6.729584167726826e-06, "loss": 0.4139, "step": 7633 }, { "epoch": 0.4119583400787869, "grad_norm": 0.9154252549707663, "learning_rate": 6.728850288640899e-06, "loss": 0.4672, "step": 7634 }, { "epoch": 0.4120123037073013, "grad_norm": 0.9923976890999333, "learning_rate": 6.728116374236573e-06, "loss": 0.4917, "step": 7635 }, { "epoch": 0.41206626733581564, "grad_norm": 1.0353572604473646, "learning_rate": 6.727382424534942e-06, "loss": 0.572, "step": 7636 }, { "epoch": 0.41212023096433004, "grad_norm": 1.027880488652444, "learning_rate": 6.726648439557099e-06, "loss": 0.3535, "step": 7637 }, { "epoch": 0.41217419459284443, "grad_norm": 1.1868125580913245, "learning_rate": 6.725914419324141e-06, "loss": 0.5437, "step": 7638 }, { "epoch": 0.41222815822135883, "grad_norm": 0.8630820527547929, "learning_rate": 6.725180363857162e-06, "loss": 0.3486, "step": 7639 }, { "epoch": 0.4122821218498732, "grad_norm": 1.0683834325749735, "learning_rate": 6.7244462731772645e-06, "loss": 0.4609, "step": 7640 }, { "epoch": 0.41233608547838757, "grad_norm": 1.234195366409918, "learning_rate": 6.723712147305542e-06, "loss": 0.596, "step": 7641 }, { "epoch": 0.41239004910690197, "grad_norm": 1.0066001917032066, "learning_rate": 6.722977986263096e-06, "loss": 0.3547, "step": 7642 }, { "epoch": 0.4124440127354163, "grad_norm": 1.0046143968257766, "learning_rate": 6.722243790071027e-06, "loss": 0.5508, "step": 7643 }, { "epoch": 0.4124979763639307, "grad_norm": 0.8886237134452628, "learning_rate": 6.721509558750438e-06, "loss": 0.4647, "step": 7644 }, { "epoch": 0.4125519399924451, "grad_norm": 1.1018432310557105, "learning_rate": 6.720775292322429e-06, "loss": 0.6361, "step": 7645 }, { "epoch": 0.41260590362095945, "grad_norm": 0.9665163822249392, "learning_rate": 6.720040990808104e-06, "loss": 0.5047, "step": 7646 }, { "epoch": 0.41265986724947384, "grad_norm": 0.9567722169420492, "learning_rate": 6.719306654228569e-06, "loss": 0.4024, "step": 7647 }, { "epoch": 0.41271383087798824, "grad_norm": 0.9222497535105044, "learning_rate": 6.718572282604927e-06, "loss": 0.4185, "step": 7648 }, { "epoch": 0.41276779450650264, "grad_norm": 1.1656640278794532, "learning_rate": 6.717837875958287e-06, "loss": 0.4187, "step": 7649 }, { "epoch": 0.412821758135017, "grad_norm": 1.1641689629686858, "learning_rate": 6.717103434309757e-06, "loss": 0.483, "step": 7650 }, { "epoch": 0.4128757217635314, "grad_norm": 0.7923833910831188, "learning_rate": 6.716368957680445e-06, "loss": 0.3003, "step": 7651 }, { "epoch": 0.4129296853920458, "grad_norm": 1.1185727426654974, "learning_rate": 6.7156344460914604e-06, "loss": 0.4648, "step": 7652 }, { "epoch": 0.4129836490205601, "grad_norm": 1.0204013316384901, "learning_rate": 6.714899899563912e-06, "loss": 0.4921, "step": 7653 }, { "epoch": 0.4130376126490745, "grad_norm": 0.9869879680395924, "learning_rate": 6.714165318118912e-06, "loss": 0.3916, "step": 7654 }, { "epoch": 0.4130915762775889, "grad_norm": 1.0032480439215195, "learning_rate": 6.713430701777576e-06, "loss": 0.5511, "step": 7655 }, { "epoch": 0.4131455399061033, "grad_norm": 1.1799242645062122, "learning_rate": 6.712696050561014e-06, "loss": 0.5597, "step": 7656 }, { "epoch": 0.41319950353461765, "grad_norm": 0.981315717594818, "learning_rate": 6.7119613644903435e-06, "loss": 0.5777, "step": 7657 }, { "epoch": 0.41325346716313205, "grad_norm": 0.8924887310239625, "learning_rate": 6.711226643586679e-06, "loss": 0.3983, "step": 7658 }, { "epoch": 0.41330743079164645, "grad_norm": 1.0721082397556114, "learning_rate": 6.710491887871135e-06, "loss": 0.5701, "step": 7659 }, { "epoch": 0.4133613944201608, "grad_norm": 0.8629083679093378, "learning_rate": 6.7097570973648326e-06, "loss": 0.4454, "step": 7660 }, { "epoch": 0.4134153580486752, "grad_norm": 0.9365858958194917, "learning_rate": 6.7090222720888884e-06, "loss": 0.485, "step": 7661 }, { "epoch": 0.4134693216771896, "grad_norm": 0.9985219025884867, "learning_rate": 6.708287412064421e-06, "loss": 0.3911, "step": 7662 }, { "epoch": 0.413523285305704, "grad_norm": 0.9419055835739972, "learning_rate": 6.707552517312555e-06, "loss": 0.4184, "step": 7663 }, { "epoch": 0.4135772489342183, "grad_norm": 1.0049469833636604, "learning_rate": 6.706817587854408e-06, "loss": 0.4152, "step": 7664 }, { "epoch": 0.4136312125627327, "grad_norm": 0.8778789838192276, "learning_rate": 6.7060826237111056e-06, "loss": 0.3587, "step": 7665 }, { "epoch": 0.4136851761912471, "grad_norm": 1.3522987053697317, "learning_rate": 6.705347624903767e-06, "loss": 0.63, "step": 7666 }, { "epoch": 0.41373913981976146, "grad_norm": 0.892107317966253, "learning_rate": 6.704612591453523e-06, "loss": 0.397, "step": 7667 }, { "epoch": 0.41379310344827586, "grad_norm": 0.8372897259500505, "learning_rate": 6.703877523381495e-06, "loss": 0.3236, "step": 7668 }, { "epoch": 0.41384706707679025, "grad_norm": 1.1426882829906473, "learning_rate": 6.70314242070881e-06, "loss": 0.6227, "step": 7669 }, { "epoch": 0.41390103070530465, "grad_norm": 1.1475143961058738, "learning_rate": 6.7024072834565955e-06, "loss": 0.6112, "step": 7670 }, { "epoch": 0.413954994333819, "grad_norm": 0.9217568536702735, "learning_rate": 6.701672111645982e-06, "loss": 0.414, "step": 7671 }, { "epoch": 0.4140089579623334, "grad_norm": 1.1276839494115682, "learning_rate": 6.700936905298096e-06, "loss": 0.4869, "step": 7672 }, { "epoch": 0.4140629215908478, "grad_norm": 0.9951110757381574, "learning_rate": 6.700201664434072e-06, "loss": 0.4377, "step": 7673 }, { "epoch": 0.41411688521936213, "grad_norm": 0.862866744033978, "learning_rate": 6.6994663890750386e-06, "loss": 0.4175, "step": 7674 }, { "epoch": 0.4141708488478765, "grad_norm": 0.865574237957905, "learning_rate": 6.69873107924213e-06, "loss": 0.3683, "step": 7675 }, { "epoch": 0.4142248124763909, "grad_norm": 0.720673719841367, "learning_rate": 6.697995734956477e-06, "loss": 0.2366, "step": 7676 }, { "epoch": 0.41427877610490527, "grad_norm": 1.0836532266259111, "learning_rate": 6.697260356239218e-06, "loss": 0.5442, "step": 7677 }, { "epoch": 0.41433273973341966, "grad_norm": 1.0823555761561603, "learning_rate": 6.696524943111485e-06, "loss": 0.4443, "step": 7678 }, { "epoch": 0.41438670336193406, "grad_norm": 1.0562360158260566, "learning_rate": 6.695789495594417e-06, "loss": 0.4682, "step": 7679 }, { "epoch": 0.41444066699044846, "grad_norm": 0.9224165312237081, "learning_rate": 6.695054013709152e-06, "loss": 0.3597, "step": 7680 }, { "epoch": 0.4144946306189628, "grad_norm": 1.226348400478821, "learning_rate": 6.694318497476826e-06, "loss": 0.5449, "step": 7681 }, { "epoch": 0.4145485942474772, "grad_norm": 0.923017727645661, "learning_rate": 6.69358294691858e-06, "loss": 0.5326, "step": 7682 }, { "epoch": 0.4146025578759916, "grad_norm": 0.9118756884023664, "learning_rate": 6.692847362055552e-06, "loss": 0.3969, "step": 7683 }, { "epoch": 0.41465652150450594, "grad_norm": 1.1353698458089057, "learning_rate": 6.692111742908888e-06, "loss": 0.4939, "step": 7684 }, { "epoch": 0.41471048513302033, "grad_norm": 1.1423593138128627, "learning_rate": 6.691376089499727e-06, "loss": 0.5041, "step": 7685 }, { "epoch": 0.41476444876153473, "grad_norm": 0.9874111637696947, "learning_rate": 6.6906404018492135e-06, "loss": 0.5262, "step": 7686 }, { "epoch": 0.41481841239004913, "grad_norm": 1.0111341929282947, "learning_rate": 6.689904679978491e-06, "loss": 0.4222, "step": 7687 }, { "epoch": 0.41487237601856347, "grad_norm": 0.838523897787651, "learning_rate": 6.689168923908707e-06, "loss": 0.359, "step": 7688 }, { "epoch": 0.41492633964707787, "grad_norm": 1.1918205631180137, "learning_rate": 6.688433133661007e-06, "loss": 0.4779, "step": 7689 }, { "epoch": 0.41498030327559227, "grad_norm": 0.9968139343557337, "learning_rate": 6.687697309256536e-06, "loss": 0.434, "step": 7690 }, { "epoch": 0.4150342669041066, "grad_norm": 1.0881959282736553, "learning_rate": 6.686961450716446e-06, "loss": 0.5286, "step": 7691 }, { "epoch": 0.415088230532621, "grad_norm": 0.9653996081541671, "learning_rate": 6.686225558061883e-06, "loss": 0.39, "step": 7692 }, { "epoch": 0.4151421941611354, "grad_norm": 0.9859411725158641, "learning_rate": 6.685489631313999e-06, "loss": 0.4415, "step": 7693 }, { "epoch": 0.4151961577896498, "grad_norm": 0.7722805353414243, "learning_rate": 6.684753670493945e-06, "loss": 0.3524, "step": 7694 }, { "epoch": 0.41525012141816414, "grad_norm": 0.8425649568227274, "learning_rate": 6.684017675622875e-06, "loss": 0.3295, "step": 7695 }, { "epoch": 0.41530408504667854, "grad_norm": 1.0692116092359243, "learning_rate": 6.683281646721939e-06, "loss": 0.589, "step": 7696 }, { "epoch": 0.41535804867519294, "grad_norm": 0.8988281527593945, "learning_rate": 6.682545583812294e-06, "loss": 0.4246, "step": 7697 }, { "epoch": 0.4154120123037073, "grad_norm": 1.2523741898570613, "learning_rate": 6.681809486915093e-06, "loss": 0.5495, "step": 7698 }, { "epoch": 0.4154659759322217, "grad_norm": 1.1238742463383296, "learning_rate": 6.681073356051494e-06, "loss": 0.4113, "step": 7699 }, { "epoch": 0.4155199395607361, "grad_norm": 0.9417358033363995, "learning_rate": 6.680337191242653e-06, "loss": 0.5045, "step": 7700 }, { "epoch": 0.41557390318925047, "grad_norm": 0.9573415323956321, "learning_rate": 6.67960099250973e-06, "loss": 0.4619, "step": 7701 }, { "epoch": 0.4156278668177648, "grad_norm": 1.074991429956297, "learning_rate": 6.678864759873881e-06, "loss": 0.6141, "step": 7702 }, { "epoch": 0.4156818304462792, "grad_norm": 0.9922477652108836, "learning_rate": 6.6781284933562675e-06, "loss": 0.3896, "step": 7703 }, { "epoch": 0.4157357940747936, "grad_norm": 0.9340324758366886, "learning_rate": 6.677392192978051e-06, "loss": 0.4116, "step": 7704 }, { "epoch": 0.41578975770330795, "grad_norm": 1.0372221152312187, "learning_rate": 6.676655858760394e-06, "loss": 0.4833, "step": 7705 }, { "epoch": 0.41584372133182235, "grad_norm": 0.852785633166194, "learning_rate": 6.675919490724456e-06, "loss": 0.4208, "step": 7706 }, { "epoch": 0.41589768496033674, "grad_norm": 1.0002412466781019, "learning_rate": 6.675183088891406e-06, "loss": 0.4517, "step": 7707 }, { "epoch": 0.41595164858885114, "grad_norm": 1.2568111893381517, "learning_rate": 6.674446653282407e-06, "loss": 0.7358, "step": 7708 }, { "epoch": 0.4160056122173655, "grad_norm": 1.0051378579336638, "learning_rate": 6.6737101839186245e-06, "loss": 0.4831, "step": 7709 }, { "epoch": 0.4160595758458799, "grad_norm": 1.0475563322612476, "learning_rate": 6.672973680821224e-06, "loss": 0.4927, "step": 7710 }, { "epoch": 0.4161135394743943, "grad_norm": 0.9512274702627562, "learning_rate": 6.672237144011374e-06, "loss": 0.4255, "step": 7711 }, { "epoch": 0.4161675031029086, "grad_norm": 0.9418195881251247, "learning_rate": 6.6715005735102466e-06, "loss": 0.4199, "step": 7712 }, { "epoch": 0.416221466731423, "grad_norm": 0.926315245279255, "learning_rate": 6.670763969339008e-06, "loss": 0.5246, "step": 7713 }, { "epoch": 0.4162754303599374, "grad_norm": 0.8153015445235215, "learning_rate": 6.67002733151883e-06, "loss": 0.3984, "step": 7714 }, { "epoch": 0.41632939398845176, "grad_norm": 0.9711997703379833, "learning_rate": 6.669290660070884e-06, "loss": 0.4662, "step": 7715 }, { "epoch": 0.41638335761696615, "grad_norm": 1.1483770499637191, "learning_rate": 6.668553955016343e-06, "loss": 0.5364, "step": 7716 }, { "epoch": 0.41643732124548055, "grad_norm": 0.7748353607065362, "learning_rate": 6.66781721637638e-06, "loss": 0.2755, "step": 7717 }, { "epoch": 0.41649128487399495, "grad_norm": 1.039990759613914, "learning_rate": 6.66708044417217e-06, "loss": 0.4846, "step": 7718 }, { "epoch": 0.4165452485025093, "grad_norm": 0.9964149495377834, "learning_rate": 6.666343638424891e-06, "loss": 0.4021, "step": 7719 }, { "epoch": 0.4165992121310237, "grad_norm": 1.1052553345654368, "learning_rate": 6.665606799155715e-06, "loss": 0.5379, "step": 7720 }, { "epoch": 0.4166531757595381, "grad_norm": 1.0363708610845994, "learning_rate": 6.664869926385823e-06, "loss": 0.4762, "step": 7721 }, { "epoch": 0.4167071393880524, "grad_norm": 1.060145899787564, "learning_rate": 6.664133020136391e-06, "loss": 0.4845, "step": 7722 }, { "epoch": 0.4167611030165668, "grad_norm": 0.8813489712731463, "learning_rate": 6.663396080428601e-06, "loss": 0.496, "step": 7723 }, { "epoch": 0.4168150666450812, "grad_norm": 1.0281069388119133, "learning_rate": 6.662659107283631e-06, "loss": 0.5066, "step": 7724 }, { "epoch": 0.4168690302735956, "grad_norm": 0.8442008940412116, "learning_rate": 6.661922100722665e-06, "loss": 0.3357, "step": 7725 }, { "epoch": 0.41692299390210996, "grad_norm": 1.0519496665098675, "learning_rate": 6.661185060766884e-06, "loss": 0.5384, "step": 7726 }, { "epoch": 0.41697695753062436, "grad_norm": 0.9794761132221237, "learning_rate": 6.660447987437469e-06, "loss": 0.5539, "step": 7727 }, { "epoch": 0.41703092115913876, "grad_norm": 0.8945343286169914, "learning_rate": 6.659710880755608e-06, "loss": 0.4365, "step": 7728 }, { "epoch": 0.4170848847876531, "grad_norm": 0.8514127289364112, "learning_rate": 6.658973740742484e-06, "loss": 0.4296, "step": 7729 }, { "epoch": 0.4171388484161675, "grad_norm": 0.9341048714044063, "learning_rate": 6.658236567419282e-06, "loss": 0.3508, "step": 7730 }, { "epoch": 0.4171928120446819, "grad_norm": 0.9119270917524162, "learning_rate": 6.657499360807192e-06, "loss": 0.4233, "step": 7731 }, { "epoch": 0.4172467756731963, "grad_norm": 1.2114645856764281, "learning_rate": 6.656762120927402e-06, "loss": 0.5369, "step": 7732 }, { "epoch": 0.41730073930171063, "grad_norm": 1.1093220251875684, "learning_rate": 6.6560248478010995e-06, "loss": 0.5401, "step": 7733 }, { "epoch": 0.41735470293022503, "grad_norm": 1.0821100436005389, "learning_rate": 6.655287541449475e-06, "loss": 0.6453, "step": 7734 }, { "epoch": 0.4174086665587394, "grad_norm": 1.053260896540987, "learning_rate": 6.654550201893718e-06, "loss": 0.4305, "step": 7735 }, { "epoch": 0.41746263018725377, "grad_norm": 1.0245598962400617, "learning_rate": 6.653812829155022e-06, "loss": 0.5153, "step": 7736 }, { "epoch": 0.41751659381576817, "grad_norm": 0.9888540950043225, "learning_rate": 6.65307542325458e-06, "loss": 0.4652, "step": 7737 }, { "epoch": 0.41757055744428256, "grad_norm": 1.057157088648307, "learning_rate": 6.652337984213584e-06, "loss": 0.4525, "step": 7738 }, { "epoch": 0.41762452107279696, "grad_norm": 1.2039896405930992, "learning_rate": 6.651600512053231e-06, "loss": 0.509, "step": 7739 }, { "epoch": 0.4176784847013113, "grad_norm": 1.0975000453399402, "learning_rate": 6.650863006794716e-06, "loss": 0.5685, "step": 7740 }, { "epoch": 0.4177324483298257, "grad_norm": 0.9682522167155524, "learning_rate": 6.650125468459233e-06, "loss": 0.4628, "step": 7741 }, { "epoch": 0.4177864119583401, "grad_norm": 0.943441741037434, "learning_rate": 6.6493878970679836e-06, "loss": 0.4206, "step": 7742 }, { "epoch": 0.41784037558685444, "grad_norm": 1.0846250769761805, "learning_rate": 6.648650292642166e-06, "loss": 0.4928, "step": 7743 }, { "epoch": 0.41789433921536884, "grad_norm": 0.9727695251646855, "learning_rate": 6.647912655202977e-06, "loss": 0.4406, "step": 7744 }, { "epoch": 0.41794830284388323, "grad_norm": 1.0276312545126804, "learning_rate": 6.647174984771619e-06, "loss": 0.4769, "step": 7745 }, { "epoch": 0.4180022664723976, "grad_norm": 1.018220624298367, "learning_rate": 6.646437281369292e-06, "loss": 0.4867, "step": 7746 }, { "epoch": 0.418056230100912, "grad_norm": 1.1243014255378048, "learning_rate": 6.645699545017199e-06, "loss": 0.4464, "step": 7747 }, { "epoch": 0.41811019372942637, "grad_norm": 1.0579772006245831, "learning_rate": 6.644961775736543e-06, "loss": 0.4905, "step": 7748 }, { "epoch": 0.41816415735794077, "grad_norm": 1.055102604163941, "learning_rate": 6.6442239735485295e-06, "loss": 0.4954, "step": 7749 }, { "epoch": 0.4182181209864551, "grad_norm": 0.8708371009626861, "learning_rate": 6.643486138474363e-06, "loss": 0.3513, "step": 7750 }, { "epoch": 0.4182720846149695, "grad_norm": 1.1079858020279711, "learning_rate": 6.642748270535249e-06, "loss": 0.5938, "step": 7751 }, { "epoch": 0.4183260482434839, "grad_norm": 1.0988526306356867, "learning_rate": 6.6420103697523956e-06, "loss": 0.5741, "step": 7752 }, { "epoch": 0.41838001187199825, "grad_norm": 1.062089854354666, "learning_rate": 6.641272436147011e-06, "loss": 0.6019, "step": 7753 }, { "epoch": 0.41843397550051264, "grad_norm": 1.008804784232101, "learning_rate": 6.6405344697403015e-06, "loss": 0.5008, "step": 7754 }, { "epoch": 0.41848793912902704, "grad_norm": 0.8921464394982095, "learning_rate": 6.63979647055348e-06, "loss": 0.3818, "step": 7755 }, { "epoch": 0.41854190275754144, "grad_norm": 1.0961383607238422, "learning_rate": 6.639058438607755e-06, "loss": 0.4628, "step": 7756 }, { "epoch": 0.4185958663860558, "grad_norm": 1.147905413376192, "learning_rate": 6.638320373924342e-06, "loss": 0.4621, "step": 7757 }, { "epoch": 0.4186498300145702, "grad_norm": 0.8585187053076972, "learning_rate": 6.63758227652445e-06, "loss": 0.3995, "step": 7758 }, { "epoch": 0.4187037936430846, "grad_norm": 1.0111918491004452, "learning_rate": 6.6368441464292945e-06, "loss": 0.4617, "step": 7759 }, { "epoch": 0.4187577572715989, "grad_norm": 1.0938188146804737, "learning_rate": 6.636105983660089e-06, "loss": 0.5124, "step": 7760 }, { "epoch": 0.4188117209001133, "grad_norm": 1.000891344108365, "learning_rate": 6.635367788238048e-06, "loss": 0.4309, "step": 7761 }, { "epoch": 0.4188656845286277, "grad_norm": 1.0588034394319876, "learning_rate": 6.634629560184391e-06, "loss": 0.4612, "step": 7762 }, { "epoch": 0.4189196481571421, "grad_norm": 1.0453863050710182, "learning_rate": 6.633891299520334e-06, "loss": 0.5576, "step": 7763 }, { "epoch": 0.41897361178565645, "grad_norm": 1.2692290279377507, "learning_rate": 6.633153006267096e-06, "loss": 0.6017, "step": 7764 }, { "epoch": 0.41902757541417085, "grad_norm": 0.9496727426924458, "learning_rate": 6.632414680445894e-06, "loss": 0.3274, "step": 7765 }, { "epoch": 0.41908153904268525, "grad_norm": 1.0591229154831199, "learning_rate": 6.6316763220779514e-06, "loss": 0.4753, "step": 7766 }, { "epoch": 0.4191355026711996, "grad_norm": 1.130285433723368, "learning_rate": 6.630937931184489e-06, "loss": 0.6046, "step": 7767 }, { "epoch": 0.419189466299714, "grad_norm": 0.9900435987533374, "learning_rate": 6.630199507786724e-06, "loss": 0.3196, "step": 7768 }, { "epoch": 0.4192434299282284, "grad_norm": 0.9460307522221565, "learning_rate": 6.6294610519058866e-06, "loss": 0.4983, "step": 7769 }, { "epoch": 0.4192973935567428, "grad_norm": 1.2514171052386276, "learning_rate": 6.628722563563194e-06, "loss": 0.7269, "step": 7770 }, { "epoch": 0.4193513571852571, "grad_norm": 0.8667510067228321, "learning_rate": 6.627984042779877e-06, "loss": 0.3671, "step": 7771 }, { "epoch": 0.4194053208137715, "grad_norm": 1.1306985620698198, "learning_rate": 6.627245489577159e-06, "loss": 0.6191, "step": 7772 }, { "epoch": 0.4194592844422859, "grad_norm": 0.9324429709424953, "learning_rate": 6.626506903976265e-06, "loss": 0.3762, "step": 7773 }, { "epoch": 0.41951324807080026, "grad_norm": 0.9669956294515418, "learning_rate": 6.625768285998425e-06, "loss": 0.4218, "step": 7774 }, { "epoch": 0.41956721169931466, "grad_norm": 1.1285087648423466, "learning_rate": 6.625029635664867e-06, "loss": 0.5254, "step": 7775 }, { "epoch": 0.41962117532782905, "grad_norm": 1.1683379288462716, "learning_rate": 6.62429095299682e-06, "loss": 0.6028, "step": 7776 }, { "epoch": 0.41967513895634345, "grad_norm": 0.8868467489553826, "learning_rate": 6.623552238015516e-06, "loss": 0.3803, "step": 7777 }, { "epoch": 0.4197291025848578, "grad_norm": 1.0676556399560408, "learning_rate": 6.622813490742186e-06, "loss": 0.4608, "step": 7778 }, { "epoch": 0.4197830662133722, "grad_norm": 0.8193665734649929, "learning_rate": 6.62207471119806e-06, "loss": 0.3257, "step": 7779 }, { "epoch": 0.4198370298418866, "grad_norm": 0.9798255341882586, "learning_rate": 6.621335899404373e-06, "loss": 0.5325, "step": 7780 }, { "epoch": 0.41989099347040093, "grad_norm": 0.8458572165580065, "learning_rate": 6.6205970553823615e-06, "loss": 0.4301, "step": 7781 }, { "epoch": 0.41994495709891533, "grad_norm": 0.8991486852518086, "learning_rate": 6.619858179153257e-06, "loss": 0.4399, "step": 7782 }, { "epoch": 0.4199989207274297, "grad_norm": 1.1562903790624, "learning_rate": 6.619119270738296e-06, "loss": 0.5605, "step": 7783 }, { "epoch": 0.42005288435594407, "grad_norm": 0.9199516659641727, "learning_rate": 6.618380330158718e-06, "loss": 0.48, "step": 7784 }, { "epoch": 0.42010684798445846, "grad_norm": 1.0491509727781185, "learning_rate": 6.617641357435758e-06, "loss": 0.489, "step": 7785 }, { "epoch": 0.42016081161297286, "grad_norm": 1.0145074191748635, "learning_rate": 6.616902352590656e-06, "loss": 0.5132, "step": 7786 }, { "epoch": 0.42021477524148726, "grad_norm": 1.124281629184787, "learning_rate": 6.6161633156446525e-06, "loss": 0.4838, "step": 7787 }, { "epoch": 0.4202687388700016, "grad_norm": 1.002743952280283, "learning_rate": 6.615424246618987e-06, "loss": 0.4723, "step": 7788 }, { "epoch": 0.420322702498516, "grad_norm": 0.9995524800169185, "learning_rate": 6.614685145534902e-06, "loss": 0.5729, "step": 7789 }, { "epoch": 0.4203766661270304, "grad_norm": 1.0752440887621597, "learning_rate": 6.61394601241364e-06, "loss": 0.6043, "step": 7790 }, { "epoch": 0.42043062975554474, "grad_norm": 1.1501232180622876, "learning_rate": 6.613206847276444e-06, "loss": 0.588, "step": 7791 }, { "epoch": 0.42048459338405914, "grad_norm": 1.0263677919693812, "learning_rate": 6.612467650144558e-06, "loss": 0.5683, "step": 7792 }, { "epoch": 0.42053855701257353, "grad_norm": 0.7675123401213942, "learning_rate": 6.611728421039227e-06, "loss": 0.3715, "step": 7793 }, { "epoch": 0.42059252064108793, "grad_norm": 1.1036730561362893, "learning_rate": 6.6109891599816985e-06, "loss": 0.4553, "step": 7794 }, { "epoch": 0.42064648426960227, "grad_norm": 0.9581763454745819, "learning_rate": 6.61024986699322e-06, "loss": 0.4299, "step": 7795 }, { "epoch": 0.42070044789811667, "grad_norm": 0.8279836596270003, "learning_rate": 6.609510542095037e-06, "loss": 0.4236, "step": 7796 }, { "epoch": 0.42075441152663107, "grad_norm": 0.9513580628012458, "learning_rate": 6.608771185308401e-06, "loss": 0.5073, "step": 7797 }, { "epoch": 0.4208083751551454, "grad_norm": 1.0735693349817763, "learning_rate": 6.608031796654559e-06, "loss": 0.4844, "step": 7798 }, { "epoch": 0.4208623387836598, "grad_norm": 0.8969257117589575, "learning_rate": 6.607292376154766e-06, "loss": 0.4285, "step": 7799 }, { "epoch": 0.4209163024121742, "grad_norm": 1.143306665451789, "learning_rate": 6.6065529238302685e-06, "loss": 0.6008, "step": 7800 }, { "epoch": 0.4209702660406886, "grad_norm": 1.064741603621822, "learning_rate": 6.605813439702323e-06, "loss": 0.4753, "step": 7801 }, { "epoch": 0.42102422966920294, "grad_norm": 0.92499046748397, "learning_rate": 6.605073923792183e-06, "loss": 0.4098, "step": 7802 }, { "epoch": 0.42107819329771734, "grad_norm": 0.9423727333617983, "learning_rate": 6.604334376121101e-06, "loss": 0.4073, "step": 7803 }, { "epoch": 0.42113215692623174, "grad_norm": 0.9512374344461975, "learning_rate": 6.6035947967103345e-06, "loss": 0.5366, "step": 7804 }, { "epoch": 0.4211861205547461, "grad_norm": 1.1776820621942818, "learning_rate": 6.602855185581136e-06, "loss": 0.5083, "step": 7805 }, { "epoch": 0.4212400841832605, "grad_norm": 1.0242391241129205, "learning_rate": 6.602115542754768e-06, "loss": 0.516, "step": 7806 }, { "epoch": 0.4212940478117749, "grad_norm": 1.0217435792485094, "learning_rate": 6.601375868252483e-06, "loss": 0.519, "step": 7807 }, { "epoch": 0.42134801144028927, "grad_norm": 1.3606981229417652, "learning_rate": 6.600636162095544e-06, "loss": 0.4309, "step": 7808 }, { "epoch": 0.4214019750688036, "grad_norm": 0.8945565279044887, "learning_rate": 6.5998964243052095e-06, "loss": 0.2956, "step": 7809 }, { "epoch": 0.421455938697318, "grad_norm": 0.7762997011529004, "learning_rate": 6.5991566549027406e-06, "loss": 0.3351, "step": 7810 }, { "epoch": 0.4215099023258324, "grad_norm": 1.034762129693803, "learning_rate": 6.5984168539093985e-06, "loss": 0.5062, "step": 7811 }, { "epoch": 0.42156386595434675, "grad_norm": 1.3049134480734557, "learning_rate": 6.5976770213464466e-06, "loss": 0.5295, "step": 7812 }, { "epoch": 0.42161782958286115, "grad_norm": 1.167667406003611, "learning_rate": 6.596937157235147e-06, "loss": 0.4985, "step": 7813 }, { "epoch": 0.42167179321137555, "grad_norm": 1.1247294035236357, "learning_rate": 6.596197261596765e-06, "loss": 0.4675, "step": 7814 }, { "epoch": 0.4217257568398899, "grad_norm": 1.088702954794712, "learning_rate": 6.595457334452568e-06, "loss": 0.4403, "step": 7815 }, { "epoch": 0.4217797204684043, "grad_norm": 0.9485167101058809, "learning_rate": 6.594717375823819e-06, "loss": 0.5457, "step": 7816 }, { "epoch": 0.4218336840969187, "grad_norm": 1.049024637549353, "learning_rate": 6.593977385731786e-06, "loss": 0.3848, "step": 7817 }, { "epoch": 0.4218876477254331, "grad_norm": 1.263031342058167, "learning_rate": 6.5932373641977376e-06, "loss": 0.5934, "step": 7818 }, { "epoch": 0.4219416113539474, "grad_norm": 1.000460521712132, "learning_rate": 6.592497311242944e-06, "loss": 0.4632, "step": 7819 }, { "epoch": 0.4219955749824618, "grad_norm": 0.9092427495045363, "learning_rate": 6.5917572268886746e-06, "loss": 0.3836, "step": 7820 }, { "epoch": 0.4220495386109762, "grad_norm": 0.8142914628193029, "learning_rate": 6.591017111156196e-06, "loss": 0.4127, "step": 7821 }, { "epoch": 0.42210350223949056, "grad_norm": 1.0069381525057215, "learning_rate": 6.590276964066785e-06, "loss": 0.5575, "step": 7822 }, { "epoch": 0.42215746586800496, "grad_norm": 0.717327075830327, "learning_rate": 6.589536785641712e-06, "loss": 0.3264, "step": 7823 }, { "epoch": 0.42221142949651935, "grad_norm": 1.0302790952393663, "learning_rate": 6.588796575902253e-06, "loss": 0.4521, "step": 7824 }, { "epoch": 0.42226539312503375, "grad_norm": 1.2320076566501479, "learning_rate": 6.588056334869678e-06, "loss": 0.6709, "step": 7825 }, { "epoch": 0.4223193567535481, "grad_norm": 1.0190578352246416, "learning_rate": 6.587316062565265e-06, "loss": 0.5236, "step": 7826 }, { "epoch": 0.4223733203820625, "grad_norm": 1.034545015001414, "learning_rate": 6.586575759010289e-06, "loss": 0.5559, "step": 7827 }, { "epoch": 0.4224272840105769, "grad_norm": 1.0071350049547674, "learning_rate": 6.585835424226029e-06, "loss": 0.4282, "step": 7828 }, { "epoch": 0.42248124763909123, "grad_norm": 1.2191075359663384, "learning_rate": 6.585095058233761e-06, "loss": 0.5636, "step": 7829 }, { "epoch": 0.4225352112676056, "grad_norm": 0.9658901177075248, "learning_rate": 6.584354661054765e-06, "loss": 0.4765, "step": 7830 }, { "epoch": 0.42258917489612, "grad_norm": 0.9374580914162814, "learning_rate": 6.58361423271032e-06, "loss": 0.3498, "step": 7831 }, { "epoch": 0.4226431385246344, "grad_norm": 0.895758330164406, "learning_rate": 6.582873773221707e-06, "loss": 0.4861, "step": 7832 }, { "epoch": 0.42269710215314876, "grad_norm": 0.8944225336029444, "learning_rate": 6.582133282610209e-06, "loss": 0.4296, "step": 7833 }, { "epoch": 0.42275106578166316, "grad_norm": 1.1077189302112191, "learning_rate": 6.581392760897106e-06, "loss": 0.5182, "step": 7834 }, { "epoch": 0.42280502941017756, "grad_norm": 0.8301822107658756, "learning_rate": 6.5806522081036826e-06, "loss": 0.4132, "step": 7835 }, { "epoch": 0.4228589930386919, "grad_norm": 0.6268144217525957, "learning_rate": 6.5799116242512225e-06, "loss": 0.2906, "step": 7836 }, { "epoch": 0.4229129566672063, "grad_norm": 0.9896523217798194, "learning_rate": 6.579171009361012e-06, "loss": 0.6334, "step": 7837 }, { "epoch": 0.4229669202957207, "grad_norm": 0.8350419924873439, "learning_rate": 6.578430363454335e-06, "loss": 0.3916, "step": 7838 }, { "epoch": 0.4230208839242351, "grad_norm": 1.0517521928880973, "learning_rate": 6.577689686552481e-06, "loss": 0.5376, "step": 7839 }, { "epoch": 0.42307484755274943, "grad_norm": 1.11797029679992, "learning_rate": 6.576948978676736e-06, "loss": 0.5482, "step": 7840 }, { "epoch": 0.42312881118126383, "grad_norm": 1.2988290814149865, "learning_rate": 6.576208239848389e-06, "loss": 0.6326, "step": 7841 }, { "epoch": 0.42318277480977823, "grad_norm": 0.9337397808453255, "learning_rate": 6.5754674700887275e-06, "loss": 0.344, "step": 7842 }, { "epoch": 0.42323673843829257, "grad_norm": 0.8631833910426425, "learning_rate": 6.574726669419048e-06, "loss": 0.346, "step": 7843 }, { "epoch": 0.42329070206680697, "grad_norm": 1.192643046022319, "learning_rate": 6.573985837860636e-06, "loss": 0.6669, "step": 7844 }, { "epoch": 0.42334466569532136, "grad_norm": 0.9537288597040855, "learning_rate": 6.573244975434786e-06, "loss": 0.457, "step": 7845 }, { "epoch": 0.42339862932383576, "grad_norm": 1.1434855409991436, "learning_rate": 6.572504082162791e-06, "loss": 0.5275, "step": 7846 }, { "epoch": 0.4234525929523501, "grad_norm": 1.1836437635085957, "learning_rate": 6.571763158065946e-06, "loss": 0.6804, "step": 7847 }, { "epoch": 0.4235065565808645, "grad_norm": 0.9160717618933008, "learning_rate": 6.5710222031655446e-06, "loss": 0.5165, "step": 7848 }, { "epoch": 0.4235605202093789, "grad_norm": 0.9201978977447625, "learning_rate": 6.570281217482882e-06, "loss": 0.3791, "step": 7849 }, { "epoch": 0.42361448383789324, "grad_norm": 1.066159198059642, "learning_rate": 6.5695402010392566e-06, "loss": 0.4569, "step": 7850 }, { "epoch": 0.42366844746640764, "grad_norm": 0.7939236768458465, "learning_rate": 6.5687991538559635e-06, "loss": 0.3678, "step": 7851 }, { "epoch": 0.42372241109492204, "grad_norm": 1.2515332684550198, "learning_rate": 6.568058075954304e-06, "loss": 0.5549, "step": 7852 }, { "epoch": 0.4237763747234364, "grad_norm": 1.1243208012733275, "learning_rate": 6.567316967355577e-06, "loss": 0.4527, "step": 7853 }, { "epoch": 0.4238303383519508, "grad_norm": 0.952789979296829, "learning_rate": 6.566575828081081e-06, "loss": 0.4472, "step": 7854 }, { "epoch": 0.4238843019804652, "grad_norm": 0.8750794234103747, "learning_rate": 6.565834658152117e-06, "loss": 0.4304, "step": 7855 }, { "epoch": 0.42393826560897957, "grad_norm": 1.0518528209974933, "learning_rate": 6.56509345758999e-06, "loss": 0.471, "step": 7856 }, { "epoch": 0.4239922292374939, "grad_norm": 1.0571308562445403, "learning_rate": 6.564352226416001e-06, "loss": 0.4665, "step": 7857 }, { "epoch": 0.4240461928660083, "grad_norm": 1.0594644800431554, "learning_rate": 6.563610964651451e-06, "loss": 0.6139, "step": 7858 }, { "epoch": 0.4241001564945227, "grad_norm": 1.0454436526851603, "learning_rate": 6.562869672317649e-06, "loss": 0.5317, "step": 7859 }, { "epoch": 0.42415412012303705, "grad_norm": 1.24020282265456, "learning_rate": 6.562128349435899e-06, "loss": 0.7382, "step": 7860 }, { "epoch": 0.42420808375155145, "grad_norm": 1.0892418148459613, "learning_rate": 6.561386996027507e-06, "loss": 0.4053, "step": 7861 }, { "epoch": 0.42426204738006584, "grad_norm": 1.0220002922575258, "learning_rate": 6.560645612113779e-06, "loss": 0.5085, "step": 7862 }, { "epoch": 0.42431601100858024, "grad_norm": 1.4223807911278348, "learning_rate": 6.559904197716025e-06, "loss": 0.5826, "step": 7863 }, { "epoch": 0.4243699746370946, "grad_norm": 1.164251164994881, "learning_rate": 6.559162752855554e-06, "loss": 0.6624, "step": 7864 }, { "epoch": 0.424423938265609, "grad_norm": 1.0749444945715358, "learning_rate": 6.558421277553675e-06, "loss": 0.5624, "step": 7865 }, { "epoch": 0.4244779018941234, "grad_norm": 1.048261327446229, "learning_rate": 6.557679771831698e-06, "loss": 0.4769, "step": 7866 }, { "epoch": 0.4245318655226377, "grad_norm": 0.9442833418615849, "learning_rate": 6.556938235710938e-06, "loss": 0.3909, "step": 7867 }, { "epoch": 0.4245858291511521, "grad_norm": 1.1319720414956345, "learning_rate": 6.556196669212702e-06, "loss": 0.4852, "step": 7868 }, { "epoch": 0.4246397927796665, "grad_norm": 1.052014469079334, "learning_rate": 6.5554550723583075e-06, "loss": 0.4921, "step": 7869 }, { "epoch": 0.4246937564081809, "grad_norm": 1.1235685861392024, "learning_rate": 6.554713445169068e-06, "loss": 0.5263, "step": 7870 }, { "epoch": 0.42474772003669525, "grad_norm": 1.1386474321578377, "learning_rate": 6.5539717876663e-06, "loss": 0.4861, "step": 7871 }, { "epoch": 0.42480168366520965, "grad_norm": 0.9575977368058407, "learning_rate": 6.553230099871318e-06, "loss": 0.4009, "step": 7872 }, { "epoch": 0.42485564729372405, "grad_norm": 1.1063285148284108, "learning_rate": 6.552488381805436e-06, "loss": 0.492, "step": 7873 }, { "epoch": 0.4249096109222384, "grad_norm": 1.204554810975995, "learning_rate": 6.551746633489976e-06, "loss": 0.6741, "step": 7874 }, { "epoch": 0.4249635745507528, "grad_norm": 0.930057334471502, "learning_rate": 6.551004854946255e-06, "loss": 0.421, "step": 7875 }, { "epoch": 0.4250175381792672, "grad_norm": 1.2133036502479575, "learning_rate": 6.550263046195591e-06, "loss": 0.5385, "step": 7876 }, { "epoch": 0.4250715018077816, "grad_norm": 0.935436758230232, "learning_rate": 6.549521207259308e-06, "loss": 0.4051, "step": 7877 }, { "epoch": 0.4251254654362959, "grad_norm": 1.1831219363151877, "learning_rate": 6.548779338158724e-06, "loss": 0.4557, "step": 7878 }, { "epoch": 0.4251794290648103, "grad_norm": 1.0288225156355828, "learning_rate": 6.548037438915161e-06, "loss": 0.479, "step": 7879 }, { "epoch": 0.4252333926933247, "grad_norm": 1.0688235620197069, "learning_rate": 6.547295509549946e-06, "loss": 0.4923, "step": 7880 }, { "epoch": 0.42528735632183906, "grad_norm": 1.0801609640581717, "learning_rate": 6.546553550084398e-06, "loss": 0.5184, "step": 7881 }, { "epoch": 0.42534131995035346, "grad_norm": 0.8725134693275037, "learning_rate": 6.545811560539843e-06, "loss": 0.3938, "step": 7882 }, { "epoch": 0.42539528357886786, "grad_norm": 0.8710258728784571, "learning_rate": 6.545069540937607e-06, "loss": 0.41, "step": 7883 }, { "epoch": 0.4254492472073822, "grad_norm": 0.8592848460981608, "learning_rate": 6.544327491299019e-06, "loss": 0.347, "step": 7884 }, { "epoch": 0.4255032108358966, "grad_norm": 0.8782234756180605, "learning_rate": 6.543585411645401e-06, "loss": 0.3697, "step": 7885 }, { "epoch": 0.425557174464411, "grad_norm": 1.031162586977422, "learning_rate": 6.542843301998085e-06, "loss": 0.5044, "step": 7886 }, { "epoch": 0.4256111380929254, "grad_norm": 0.8883330611783522, "learning_rate": 6.542101162378397e-06, "loss": 0.4087, "step": 7887 }, { "epoch": 0.42566510172143973, "grad_norm": 1.2102891257776733, "learning_rate": 6.541358992807671e-06, "loss": 0.5828, "step": 7888 }, { "epoch": 0.42571906534995413, "grad_norm": 1.0374352880276512, "learning_rate": 6.540616793307235e-06, "loss": 0.5721, "step": 7889 }, { "epoch": 0.4257730289784685, "grad_norm": 1.1349447291995236, "learning_rate": 6.53987456389842e-06, "loss": 0.5534, "step": 7890 }, { "epoch": 0.42582699260698287, "grad_norm": 1.2036280693015913, "learning_rate": 6.53913230460256e-06, "loss": 0.5168, "step": 7891 }, { "epoch": 0.42588095623549727, "grad_norm": 0.9093076054244045, "learning_rate": 6.538390015440987e-06, "loss": 0.397, "step": 7892 }, { "epoch": 0.42593491986401166, "grad_norm": 1.0136557091778857, "learning_rate": 6.537647696435035e-06, "loss": 0.4915, "step": 7893 }, { "epoch": 0.42598888349252606, "grad_norm": 1.0698799942849961, "learning_rate": 6.53690534760604e-06, "loss": 0.5014, "step": 7894 }, { "epoch": 0.4260428471210404, "grad_norm": 1.0501582154162956, "learning_rate": 6.5361629689753395e-06, "loss": 0.5126, "step": 7895 }, { "epoch": 0.4260968107495548, "grad_norm": 0.9840439418147563, "learning_rate": 6.535420560564267e-06, "loss": 0.4919, "step": 7896 }, { "epoch": 0.4261507743780692, "grad_norm": 1.0671901704533855, "learning_rate": 6.534678122394161e-06, "loss": 0.5331, "step": 7897 }, { "epoch": 0.42620473800658354, "grad_norm": 0.7918471542269024, "learning_rate": 6.533935654486361e-06, "loss": 0.3214, "step": 7898 }, { "epoch": 0.42625870163509794, "grad_norm": 0.9011671399727269, "learning_rate": 6.533193156862206e-06, "loss": 0.4676, "step": 7899 }, { "epoch": 0.42631266526361233, "grad_norm": 1.1985156063571485, "learning_rate": 6.532450629543035e-06, "loss": 0.441, "step": 7900 }, { "epoch": 0.42636662889212673, "grad_norm": 0.8999357944710695, "learning_rate": 6.5317080725501895e-06, "loss": 0.5057, "step": 7901 }, { "epoch": 0.4264205925206411, "grad_norm": 1.1319670999135547, "learning_rate": 6.5309654859050105e-06, "loss": 0.556, "step": 7902 }, { "epoch": 0.42647455614915547, "grad_norm": 1.080390029645909, "learning_rate": 6.530222869628844e-06, "loss": 0.5142, "step": 7903 }, { "epoch": 0.42652851977766987, "grad_norm": 0.9117917229278997, "learning_rate": 6.529480223743029e-06, "loss": 0.4969, "step": 7904 }, { "epoch": 0.4265824834061842, "grad_norm": 0.9411410592243175, "learning_rate": 6.5287375482689144e-06, "loss": 0.3978, "step": 7905 }, { "epoch": 0.4266364470346986, "grad_norm": 1.0826400551446058, "learning_rate": 6.527994843227842e-06, "loss": 0.4793, "step": 7906 }, { "epoch": 0.426690410663213, "grad_norm": 1.03550714822426, "learning_rate": 6.527252108641159e-06, "loss": 0.4833, "step": 7907 }, { "epoch": 0.4267443742917274, "grad_norm": 1.2501866983120542, "learning_rate": 6.526509344530212e-06, "loss": 0.5302, "step": 7908 }, { "epoch": 0.42679833792024174, "grad_norm": 0.8819800977656523, "learning_rate": 6.525766550916352e-06, "loss": 0.3731, "step": 7909 }, { "epoch": 0.42685230154875614, "grad_norm": 1.1198496565464324, "learning_rate": 6.525023727820922e-06, "loss": 0.5125, "step": 7910 }, { "epoch": 0.42690626517727054, "grad_norm": 1.1310846654143047, "learning_rate": 6.524280875265274e-06, "loss": 0.5418, "step": 7911 }, { "epoch": 0.4269602288057849, "grad_norm": 0.960488343703718, "learning_rate": 6.523537993270761e-06, "loss": 0.4327, "step": 7912 }, { "epoch": 0.4270141924342993, "grad_norm": 0.784766116185314, "learning_rate": 6.52279508185873e-06, "loss": 0.354, "step": 7913 }, { "epoch": 0.4270681560628137, "grad_norm": 1.0290480890895213, "learning_rate": 6.522052141050536e-06, "loss": 0.5283, "step": 7914 }, { "epoch": 0.427122119691328, "grad_norm": 0.9935958855490318, "learning_rate": 6.521309170867532e-06, "loss": 0.3786, "step": 7915 }, { "epoch": 0.4271760833198424, "grad_norm": 0.7352870374874995, "learning_rate": 6.520566171331067e-06, "loss": 0.3569, "step": 7916 }, { "epoch": 0.4272300469483568, "grad_norm": 0.963731007082754, "learning_rate": 6.519823142462501e-06, "loss": 0.4505, "step": 7917 }, { "epoch": 0.4272840105768712, "grad_norm": 1.0030082120337285, "learning_rate": 6.519080084283187e-06, "loss": 0.3375, "step": 7918 }, { "epoch": 0.42733797420538555, "grad_norm": 1.03661784886935, "learning_rate": 6.518336996814483e-06, "loss": 0.4212, "step": 7919 }, { "epoch": 0.42739193783389995, "grad_norm": 1.1157506103807897, "learning_rate": 6.517593880077744e-06, "loss": 0.5509, "step": 7920 }, { "epoch": 0.42744590146241435, "grad_norm": 0.9911867851820126, "learning_rate": 6.516850734094328e-06, "loss": 0.3999, "step": 7921 }, { "epoch": 0.4274998650909287, "grad_norm": 0.9850298105711124, "learning_rate": 6.516107558885595e-06, "loss": 0.4765, "step": 7922 }, { "epoch": 0.4275538287194431, "grad_norm": 0.8724980641331733, "learning_rate": 6.515364354472904e-06, "loss": 0.4526, "step": 7923 }, { "epoch": 0.4276077923479575, "grad_norm": 0.9373870837600559, "learning_rate": 6.514621120877614e-06, "loss": 0.4205, "step": 7924 }, { "epoch": 0.4276617559764719, "grad_norm": 1.112776809122554, "learning_rate": 6.51387785812109e-06, "loss": 0.582, "step": 7925 }, { "epoch": 0.4277157196049862, "grad_norm": 1.0519421505793276, "learning_rate": 6.5131345662246905e-06, "loss": 0.4908, "step": 7926 }, { "epoch": 0.4277696832335006, "grad_norm": 1.2510851087084056, "learning_rate": 6.51239124520978e-06, "loss": 0.5699, "step": 7927 }, { "epoch": 0.427823646862015, "grad_norm": 1.0172699519012407, "learning_rate": 6.5116478950977224e-06, "loss": 0.5477, "step": 7928 }, { "epoch": 0.42787761049052936, "grad_norm": 0.9979069677691887, "learning_rate": 6.510904515909884e-06, "loss": 0.4683, "step": 7929 }, { "epoch": 0.42793157411904376, "grad_norm": 0.8898701623591555, "learning_rate": 6.510161107667627e-06, "loss": 0.3595, "step": 7930 }, { "epoch": 0.42798553774755815, "grad_norm": 1.0802252419158502, "learning_rate": 6.5094176703923175e-06, "loss": 0.6147, "step": 7931 }, { "epoch": 0.42803950137607255, "grad_norm": 1.0973492830116818, "learning_rate": 6.508674204105324e-06, "loss": 0.4953, "step": 7932 }, { "epoch": 0.4280934650045869, "grad_norm": 0.9303407388314952, "learning_rate": 6.507930708828019e-06, "loss": 0.4524, "step": 7933 }, { "epoch": 0.4281474286331013, "grad_norm": 0.8978471583569569, "learning_rate": 6.507187184581763e-06, "loss": 0.4538, "step": 7934 }, { "epoch": 0.4282013922616157, "grad_norm": 0.9287534694005591, "learning_rate": 6.5064436313879306e-06, "loss": 0.3743, "step": 7935 }, { "epoch": 0.42825535589013003, "grad_norm": 0.9964726067406385, "learning_rate": 6.505700049267893e-06, "loss": 0.6128, "step": 7936 }, { "epoch": 0.4283093195186444, "grad_norm": 1.104509746118136, "learning_rate": 6.504956438243017e-06, "loss": 0.5888, "step": 7937 }, { "epoch": 0.4283632831471588, "grad_norm": 0.9144669923308995, "learning_rate": 6.504212798334678e-06, "loss": 0.4597, "step": 7938 }, { "epoch": 0.4284172467756732, "grad_norm": 0.8718643045464086, "learning_rate": 6.503469129564249e-06, "loss": 0.3489, "step": 7939 }, { "epoch": 0.42847121040418756, "grad_norm": 1.0144561307939426, "learning_rate": 6.502725431953105e-06, "loss": 0.5418, "step": 7940 }, { "epoch": 0.42852517403270196, "grad_norm": 0.8393232357207013, "learning_rate": 6.501981705522617e-06, "loss": 0.3163, "step": 7941 }, { "epoch": 0.42857913766121636, "grad_norm": 1.0852202392465276, "learning_rate": 6.501237950294162e-06, "loss": 0.4852, "step": 7942 }, { "epoch": 0.4286331012897307, "grad_norm": 1.0253867678180413, "learning_rate": 6.500494166289119e-06, "loss": 0.5402, "step": 7943 }, { "epoch": 0.4286870649182451, "grad_norm": 0.906074018292121, "learning_rate": 6.499750353528861e-06, "loss": 0.2975, "step": 7944 }, { "epoch": 0.4287410285467595, "grad_norm": 1.0271366324591424, "learning_rate": 6.499006512034768e-06, "loss": 0.4251, "step": 7945 }, { "epoch": 0.4287949921752739, "grad_norm": 0.821837432737237, "learning_rate": 6.498262641828218e-06, "loss": 0.3485, "step": 7946 }, { "epoch": 0.42884895580378823, "grad_norm": 1.356700536686015, "learning_rate": 6.497518742930591e-06, "loss": 0.6673, "step": 7947 }, { "epoch": 0.42890291943230263, "grad_norm": 0.9300836948882877, "learning_rate": 6.496774815363268e-06, "loss": 0.4523, "step": 7948 }, { "epoch": 0.42895688306081703, "grad_norm": 1.2611585801026053, "learning_rate": 6.496030859147628e-06, "loss": 0.4314, "step": 7949 }, { "epoch": 0.42901084668933137, "grad_norm": 1.237487737794435, "learning_rate": 6.495286874305057e-06, "loss": 0.4343, "step": 7950 }, { "epoch": 0.42906481031784577, "grad_norm": 0.8231913275096322, "learning_rate": 6.494542860856935e-06, "loss": 0.3724, "step": 7951 }, { "epoch": 0.42911877394636017, "grad_norm": 1.0103044372241177, "learning_rate": 6.493798818824646e-06, "loss": 0.4141, "step": 7952 }, { "epoch": 0.4291727375748745, "grad_norm": 1.063909439250591, "learning_rate": 6.493054748229573e-06, "loss": 0.4624, "step": 7953 }, { "epoch": 0.4292267012033889, "grad_norm": 0.9053833963494851, "learning_rate": 6.4923106490931055e-06, "loss": 0.5142, "step": 7954 }, { "epoch": 0.4292806648319033, "grad_norm": 1.1783021034140946, "learning_rate": 6.491566521436627e-06, "loss": 0.5219, "step": 7955 }, { "epoch": 0.4293346284604177, "grad_norm": 0.9624738544314634, "learning_rate": 6.4908223652815236e-06, "loss": 0.4415, "step": 7956 }, { "epoch": 0.42938859208893204, "grad_norm": 0.9711778814964833, "learning_rate": 6.490078180649186e-06, "loss": 0.4294, "step": 7957 }, { "epoch": 0.42944255571744644, "grad_norm": 0.9972705904645048, "learning_rate": 6.489333967561e-06, "loss": 0.5048, "step": 7958 }, { "epoch": 0.42949651934596084, "grad_norm": 1.0269519863186192, "learning_rate": 6.4885897260383556e-06, "loss": 0.5705, "step": 7959 }, { "epoch": 0.4295504829744752, "grad_norm": 1.114008004273625, "learning_rate": 6.487845456102645e-06, "loss": 0.4088, "step": 7960 }, { "epoch": 0.4296044466029896, "grad_norm": 1.1968023049346563, "learning_rate": 6.487101157775257e-06, "loss": 0.674, "step": 7961 }, { "epoch": 0.429658410231504, "grad_norm": 1.061390498161719, "learning_rate": 6.486356831077584e-06, "loss": 0.5433, "step": 7962 }, { "epoch": 0.42971237386001837, "grad_norm": 1.0596716119619518, "learning_rate": 6.485612476031019e-06, "loss": 0.4144, "step": 7963 }, { "epoch": 0.4297663374885327, "grad_norm": 1.1300407892256106, "learning_rate": 6.4848680926569565e-06, "loss": 0.5082, "step": 7964 }, { "epoch": 0.4298203011170471, "grad_norm": 1.081227335739126, "learning_rate": 6.48412368097679e-06, "loss": 0.4936, "step": 7965 }, { "epoch": 0.4298742647455615, "grad_norm": 0.8463809941918291, "learning_rate": 6.483379241011913e-06, "loss": 0.4332, "step": 7966 }, { "epoch": 0.42992822837407585, "grad_norm": 1.0603295311294068, "learning_rate": 6.482634772783725e-06, "loss": 0.4259, "step": 7967 }, { "epoch": 0.42998219200259025, "grad_norm": 1.00497304319385, "learning_rate": 6.4818902763136195e-06, "loss": 0.4144, "step": 7968 }, { "epoch": 0.43003615563110464, "grad_norm": 1.090616774902251, "learning_rate": 6.481145751622994e-06, "loss": 0.4345, "step": 7969 }, { "epoch": 0.43009011925961904, "grad_norm": 1.0708404719021594, "learning_rate": 6.48040119873325e-06, "loss": 0.5187, "step": 7970 }, { "epoch": 0.4301440828881334, "grad_norm": 0.9919900045996579, "learning_rate": 6.479656617665785e-06, "loss": 0.4554, "step": 7971 }, { "epoch": 0.4301980465166478, "grad_norm": 1.2814516421469635, "learning_rate": 6.478912008441999e-06, "loss": 0.5316, "step": 7972 }, { "epoch": 0.4302520101451622, "grad_norm": 1.1140540841017712, "learning_rate": 6.478167371083292e-06, "loss": 0.4904, "step": 7973 }, { "epoch": 0.4303059737736765, "grad_norm": 1.1315510742878296, "learning_rate": 6.477422705611067e-06, "loss": 0.481, "step": 7974 }, { "epoch": 0.4303599374021909, "grad_norm": 1.0761918450490597, "learning_rate": 6.476678012046725e-06, "loss": 0.5082, "step": 7975 }, { "epoch": 0.4304139010307053, "grad_norm": 1.0460941366449146, "learning_rate": 6.47593329041167e-06, "loss": 0.5471, "step": 7976 }, { "epoch": 0.4304678646592197, "grad_norm": 1.119650556548085, "learning_rate": 6.475188540727306e-06, "loss": 0.4849, "step": 7977 }, { "epoch": 0.43052182828773405, "grad_norm": 1.0214626505058457, "learning_rate": 6.474443763015039e-06, "loss": 0.4893, "step": 7978 }, { "epoch": 0.43057579191624845, "grad_norm": 0.9433567318662003, "learning_rate": 6.4736989572962725e-06, "loss": 0.4156, "step": 7979 }, { "epoch": 0.43062975554476285, "grad_norm": 0.9496464732559519, "learning_rate": 6.472954123592415e-06, "loss": 0.4403, "step": 7980 }, { "epoch": 0.4306837191732772, "grad_norm": 1.0101994253906252, "learning_rate": 6.472209261924874e-06, "loss": 0.5364, "step": 7981 }, { "epoch": 0.4307376828017916, "grad_norm": 1.5101717681756666, "learning_rate": 6.471464372315055e-06, "loss": 0.5231, "step": 7982 }, { "epoch": 0.430791646430306, "grad_norm": 1.1165661716952662, "learning_rate": 6.470719454784369e-06, "loss": 0.5773, "step": 7983 }, { "epoch": 0.4308456100588203, "grad_norm": 1.0694760095824354, "learning_rate": 6.4699745093542254e-06, "loss": 0.453, "step": 7984 }, { "epoch": 0.4308995736873347, "grad_norm": 1.0302393261749057, "learning_rate": 6.469229536046033e-06, "loss": 0.4842, "step": 7985 }, { "epoch": 0.4309535373158491, "grad_norm": 0.9388323192944762, "learning_rate": 6.468484534881206e-06, "loss": 0.3682, "step": 7986 }, { "epoch": 0.4310075009443635, "grad_norm": 1.0427892575394033, "learning_rate": 6.467739505881152e-06, "loss": 0.4745, "step": 7987 }, { "epoch": 0.43106146457287786, "grad_norm": 1.03478702783235, "learning_rate": 6.466994449067289e-06, "loss": 0.5533, "step": 7988 }, { "epoch": 0.43111542820139226, "grad_norm": 1.0044919513893809, "learning_rate": 6.466249364461028e-06, "loss": 0.4237, "step": 7989 }, { "epoch": 0.43116939182990666, "grad_norm": 1.0943553435622573, "learning_rate": 6.465504252083784e-06, "loss": 0.6121, "step": 7990 }, { "epoch": 0.431223355458421, "grad_norm": 1.0595969140536157, "learning_rate": 6.464759111956973e-06, "loss": 0.5318, "step": 7991 }, { "epoch": 0.4312773190869354, "grad_norm": 1.1863521769814103, "learning_rate": 6.46401394410201e-06, "loss": 0.6781, "step": 7992 }, { "epoch": 0.4313312827154498, "grad_norm": 1.136189988061752, "learning_rate": 6.4632687485403115e-06, "loss": 0.4817, "step": 7993 }, { "epoch": 0.4313852463439642, "grad_norm": 0.8079749771212793, "learning_rate": 6.462523525293295e-06, "loss": 0.3373, "step": 7994 }, { "epoch": 0.43143920997247853, "grad_norm": 1.0524757804639808, "learning_rate": 6.461778274382382e-06, "loss": 0.43, "step": 7995 }, { "epoch": 0.43149317360099293, "grad_norm": 0.7703881499431162, "learning_rate": 6.461032995828989e-06, "loss": 0.3599, "step": 7996 }, { "epoch": 0.4315471372295073, "grad_norm": 1.3202555821405633, "learning_rate": 6.460287689654535e-06, "loss": 0.6113, "step": 7997 }, { "epoch": 0.43160110085802167, "grad_norm": 1.0655317222366374, "learning_rate": 6.459542355880445e-06, "loss": 0.4125, "step": 7998 }, { "epoch": 0.43165506448653607, "grad_norm": 1.0805607215880697, "learning_rate": 6.458796994528136e-06, "loss": 0.4727, "step": 7999 }, { "epoch": 0.43170902811505046, "grad_norm": 0.8582092338717855, "learning_rate": 6.4580516056190334e-06, "loss": 0.4791, "step": 8000 }, { "epoch": 0.43170902811505046, "eval_loss": 0.555820882320404, "eval_runtime": 166.1295, "eval_samples_per_second": 20.701, "eval_steps_per_second": 0.867, "step": 8000 }, { "epoch": 0.43176299174356486, "grad_norm": 0.9966235165546949, "learning_rate": 6.457306189174559e-06, "loss": 0.3833, "step": 8001 }, { "epoch": 0.4318169553720792, "grad_norm": 1.027525741745414, "learning_rate": 6.456560745216139e-06, "loss": 0.3956, "step": 8002 }, { "epoch": 0.4318709190005936, "grad_norm": 1.1317187601111425, "learning_rate": 6.455815273765195e-06, "loss": 0.5781, "step": 8003 }, { "epoch": 0.431924882629108, "grad_norm": 1.0803088013711326, "learning_rate": 6.4550697748431545e-06, "loss": 0.4742, "step": 8004 }, { "epoch": 0.43197884625762234, "grad_norm": 0.9001692389401343, "learning_rate": 6.454324248471444e-06, "loss": 0.5093, "step": 8005 }, { "epoch": 0.43203280988613674, "grad_norm": 0.8206242173169033, "learning_rate": 6.453578694671491e-06, "loss": 0.4099, "step": 8006 }, { "epoch": 0.43208677351465113, "grad_norm": 0.955584064034573, "learning_rate": 6.452833113464722e-06, "loss": 0.4345, "step": 8007 }, { "epoch": 0.43214073714316553, "grad_norm": 1.0185153974031833, "learning_rate": 6.452087504872566e-06, "loss": 0.525, "step": 8008 }, { "epoch": 0.4321947007716799, "grad_norm": 1.0616851725071146, "learning_rate": 6.451341868916454e-06, "loss": 0.4836, "step": 8009 }, { "epoch": 0.43224866440019427, "grad_norm": 1.1175284008490858, "learning_rate": 6.450596205617815e-06, "loss": 0.4566, "step": 8010 }, { "epoch": 0.43230262802870867, "grad_norm": 1.0602954402529055, "learning_rate": 6.4498505149980815e-06, "loss": 0.6617, "step": 8011 }, { "epoch": 0.432356591657223, "grad_norm": 1.1091641879052583, "learning_rate": 6.4491047970786844e-06, "loss": 0.5464, "step": 8012 }, { "epoch": 0.4324105552857374, "grad_norm": 0.9664216949819006, "learning_rate": 6.448359051881055e-06, "loss": 0.4639, "step": 8013 }, { "epoch": 0.4324645189142518, "grad_norm": 1.0671588085083237, "learning_rate": 6.447613279426629e-06, "loss": 0.5246, "step": 8014 }, { "epoch": 0.4325184825427662, "grad_norm": 0.9953050598485057, "learning_rate": 6.446867479736839e-06, "loss": 0.4657, "step": 8015 }, { "epoch": 0.43257244617128054, "grad_norm": 1.136955749124388, "learning_rate": 6.446121652833123e-06, "loss": 0.494, "step": 8016 }, { "epoch": 0.43262640979979494, "grad_norm": 0.9929719131533222, "learning_rate": 6.445375798736913e-06, "loss": 0.4886, "step": 8017 }, { "epoch": 0.43268037342830934, "grad_norm": 1.087271087464239, "learning_rate": 6.444629917469647e-06, "loss": 0.5384, "step": 8018 }, { "epoch": 0.4327343370568237, "grad_norm": 1.1264946557462712, "learning_rate": 6.443884009052764e-06, "loss": 0.517, "step": 8019 }, { "epoch": 0.4327883006853381, "grad_norm": 1.2124720119031085, "learning_rate": 6.443138073507701e-06, "loss": 0.4419, "step": 8020 }, { "epoch": 0.4328422643138525, "grad_norm": 0.955610731677924, "learning_rate": 6.442392110855896e-06, "loss": 0.4156, "step": 8021 }, { "epoch": 0.4328962279423668, "grad_norm": 1.1352347708537454, "learning_rate": 6.44164612111879e-06, "loss": 0.5334, "step": 8022 }, { "epoch": 0.4329501915708812, "grad_norm": 0.9740608868368066, "learning_rate": 6.440900104317823e-06, "loss": 0.5386, "step": 8023 }, { "epoch": 0.4330041551993956, "grad_norm": 1.0113133293311571, "learning_rate": 6.440154060474435e-06, "loss": 0.4102, "step": 8024 }, { "epoch": 0.43305811882791, "grad_norm": 1.0627563267923887, "learning_rate": 6.43940798961007e-06, "loss": 0.5465, "step": 8025 }, { "epoch": 0.43311208245642435, "grad_norm": 0.9189982080698558, "learning_rate": 6.438661891746171e-06, "loss": 0.3948, "step": 8026 }, { "epoch": 0.43316604608493875, "grad_norm": 0.9608110351552288, "learning_rate": 6.43791576690418e-06, "loss": 0.3871, "step": 8027 }, { "epoch": 0.43322000971345315, "grad_norm": 1.128664149700366, "learning_rate": 6.437169615105543e-06, "loss": 0.6029, "step": 8028 }, { "epoch": 0.4332739733419675, "grad_norm": 1.23838706342371, "learning_rate": 6.436423436371704e-06, "loss": 0.6062, "step": 8029 }, { "epoch": 0.4333279369704819, "grad_norm": 1.1331837253287147, "learning_rate": 6.435677230724108e-06, "loss": 0.444, "step": 8030 }, { "epoch": 0.4333819005989963, "grad_norm": 0.9055049538561607, "learning_rate": 6.434930998184203e-06, "loss": 0.4771, "step": 8031 }, { "epoch": 0.4334358642275107, "grad_norm": 1.15706699723877, "learning_rate": 6.434184738773438e-06, "loss": 0.6248, "step": 8032 }, { "epoch": 0.433489827856025, "grad_norm": 0.9927963828321534, "learning_rate": 6.43343845251326e-06, "loss": 0.4158, "step": 8033 }, { "epoch": 0.4335437914845394, "grad_norm": 1.0006950383734416, "learning_rate": 6.4326921394251155e-06, "loss": 0.4932, "step": 8034 }, { "epoch": 0.4335977551130538, "grad_norm": 1.0255084737081837, "learning_rate": 6.431945799530458e-06, "loss": 0.4654, "step": 8035 }, { "epoch": 0.43365171874156816, "grad_norm": 0.9161309082014335, "learning_rate": 6.431199432850738e-06, "loss": 0.6028, "step": 8036 }, { "epoch": 0.43370568237008256, "grad_norm": 1.0022456988498338, "learning_rate": 6.430453039407403e-06, "loss": 0.4768, "step": 8037 }, { "epoch": 0.43375964599859695, "grad_norm": 1.115672070292445, "learning_rate": 6.429706619221907e-06, "loss": 0.6394, "step": 8038 }, { "epoch": 0.43381360962711135, "grad_norm": 1.0370494415761422, "learning_rate": 6.428960172315705e-06, "loss": 0.6024, "step": 8039 }, { "epoch": 0.4338675732556257, "grad_norm": 0.9400693565634222, "learning_rate": 6.42821369871025e-06, "loss": 0.4955, "step": 8040 }, { "epoch": 0.4339215368841401, "grad_norm": 1.0786392721469908, "learning_rate": 6.427467198426993e-06, "loss": 0.5728, "step": 8041 }, { "epoch": 0.4339755005126545, "grad_norm": 1.1366709880506471, "learning_rate": 6.426720671487392e-06, "loss": 0.6078, "step": 8042 }, { "epoch": 0.43402946414116883, "grad_norm": 1.0084031768941273, "learning_rate": 6.425974117912904e-06, "loss": 0.4871, "step": 8043 }, { "epoch": 0.4340834277696832, "grad_norm": 1.1338140760266902, "learning_rate": 6.4252275377249824e-06, "loss": 0.5438, "step": 8044 }, { "epoch": 0.4341373913981976, "grad_norm": 1.0537999960133295, "learning_rate": 6.424480930945087e-06, "loss": 0.5842, "step": 8045 }, { "epoch": 0.434191355026712, "grad_norm": 1.0903483285334878, "learning_rate": 6.423734297594676e-06, "loss": 0.4026, "step": 8046 }, { "epoch": 0.43424531865522636, "grad_norm": 1.012008774465215, "learning_rate": 6.422987637695208e-06, "loss": 0.4515, "step": 8047 }, { "epoch": 0.43429928228374076, "grad_norm": 1.1304014399621816, "learning_rate": 6.422240951268144e-06, "loss": 0.4715, "step": 8048 }, { "epoch": 0.43435324591225516, "grad_norm": 1.064400887053191, "learning_rate": 6.42149423833494e-06, "loss": 0.4188, "step": 8049 }, { "epoch": 0.4344072095407695, "grad_norm": 1.149042268668801, "learning_rate": 6.4207474989170634e-06, "loss": 0.4762, "step": 8050 }, { "epoch": 0.4344611731692839, "grad_norm": 1.1475140499568488, "learning_rate": 6.420000733035973e-06, "loss": 0.4585, "step": 8051 }, { "epoch": 0.4345151367977983, "grad_norm": 1.083605079893737, "learning_rate": 6.41925394071313e-06, "loss": 0.5235, "step": 8052 }, { "epoch": 0.43456910042631264, "grad_norm": 1.033835674097321, "learning_rate": 6.4185071219700005e-06, "loss": 0.5947, "step": 8053 }, { "epoch": 0.43462306405482704, "grad_norm": 0.8582916096244454, "learning_rate": 6.417760276828048e-06, "loss": 0.393, "step": 8054 }, { "epoch": 0.43467702768334143, "grad_norm": 1.0419185696017907, "learning_rate": 6.417013405308738e-06, "loss": 0.3591, "step": 8055 }, { "epoch": 0.43473099131185583, "grad_norm": 1.0709311587178505, "learning_rate": 6.416266507433537e-06, "loss": 0.4724, "step": 8056 }, { "epoch": 0.43478495494037017, "grad_norm": 1.074971733883941, "learning_rate": 6.415519583223912e-06, "loss": 0.5165, "step": 8057 }, { "epoch": 0.43483891856888457, "grad_norm": 1.1353685124645925, "learning_rate": 6.414772632701326e-06, "loss": 0.591, "step": 8058 }, { "epoch": 0.43489288219739897, "grad_norm": 1.1320142978827112, "learning_rate": 6.414025655887251e-06, "loss": 0.6093, "step": 8059 }, { "epoch": 0.4349468458259133, "grad_norm": 0.9892705211490621, "learning_rate": 6.413278652803158e-06, "loss": 0.4913, "step": 8060 }, { "epoch": 0.4350008094544277, "grad_norm": 0.9529040242943534, "learning_rate": 6.412531623470512e-06, "loss": 0.4892, "step": 8061 }, { "epoch": 0.4350547730829421, "grad_norm": 0.8702550960721849, "learning_rate": 6.411784567910783e-06, "loss": 0.388, "step": 8062 }, { "epoch": 0.4351087367114565, "grad_norm": 0.8945978521745916, "learning_rate": 6.411037486145446e-06, "loss": 0.3707, "step": 8063 }, { "epoch": 0.43516270033997084, "grad_norm": 1.0473644185786157, "learning_rate": 6.410290378195971e-06, "loss": 0.5794, "step": 8064 }, { "epoch": 0.43521666396848524, "grad_norm": 1.288300269468444, "learning_rate": 6.409543244083831e-06, "loss": 0.6389, "step": 8065 }, { "epoch": 0.43527062759699964, "grad_norm": 1.042710238895446, "learning_rate": 6.4087960838304995e-06, "loss": 0.459, "step": 8066 }, { "epoch": 0.435324591225514, "grad_norm": 0.9528224469913216, "learning_rate": 6.40804889745745e-06, "loss": 0.3872, "step": 8067 }, { "epoch": 0.4353785548540284, "grad_norm": 1.0420034195641645, "learning_rate": 6.407301684986157e-06, "loss": 0.4581, "step": 8068 }, { "epoch": 0.4354325184825428, "grad_norm": 1.030198547140232, "learning_rate": 6.406554446438097e-06, "loss": 0.4401, "step": 8069 }, { "epoch": 0.43548648211105717, "grad_norm": 1.082601079448098, "learning_rate": 6.405807181834747e-06, "loss": 0.5042, "step": 8070 }, { "epoch": 0.4355404457395715, "grad_norm": 1.163902238198311, "learning_rate": 6.4050598911975825e-06, "loss": 0.6521, "step": 8071 }, { "epoch": 0.4355944093680859, "grad_norm": 0.916552049491407, "learning_rate": 6.404312574548083e-06, "loss": 0.4371, "step": 8072 }, { "epoch": 0.4356483729966003, "grad_norm": 1.0182016372993206, "learning_rate": 6.403565231907724e-06, "loss": 0.564, "step": 8073 }, { "epoch": 0.43570233662511465, "grad_norm": 1.12107038092545, "learning_rate": 6.402817863297991e-06, "loss": 0.4994, "step": 8074 }, { "epoch": 0.43575630025362905, "grad_norm": 1.0470176871027743, "learning_rate": 6.402070468740358e-06, "loss": 0.4609, "step": 8075 }, { "epoch": 0.43581026388214344, "grad_norm": 1.0647090637761971, "learning_rate": 6.4013230482563086e-06, "loss": 0.653, "step": 8076 }, { "epoch": 0.43586422751065784, "grad_norm": 0.8831818413182673, "learning_rate": 6.400575601867323e-06, "loss": 0.4519, "step": 8077 }, { "epoch": 0.4359181911391722, "grad_norm": 0.8472336573540319, "learning_rate": 6.3998281295948865e-06, "loss": 0.3507, "step": 8078 }, { "epoch": 0.4359721547676866, "grad_norm": 1.068479235534307, "learning_rate": 6.399080631460479e-06, "loss": 0.5811, "step": 8079 }, { "epoch": 0.436026118396201, "grad_norm": 1.0104117912516317, "learning_rate": 6.398333107485587e-06, "loss": 0.5943, "step": 8080 }, { "epoch": 0.4360800820247153, "grad_norm": 1.0608152851224375, "learning_rate": 6.3975855576916934e-06, "loss": 0.6378, "step": 8081 }, { "epoch": 0.4361340456532297, "grad_norm": 0.9965171404627531, "learning_rate": 6.396837982100284e-06, "loss": 0.483, "step": 8082 }, { "epoch": 0.4361880092817441, "grad_norm": 0.9961876180060363, "learning_rate": 6.3960903807328435e-06, "loss": 0.5455, "step": 8083 }, { "epoch": 0.4362419729102585, "grad_norm": 1.0989478057070279, "learning_rate": 6.395342753610861e-06, "loss": 0.4914, "step": 8084 }, { "epoch": 0.43629593653877285, "grad_norm": 0.8721894517490578, "learning_rate": 6.394595100755824e-06, "loss": 0.4328, "step": 8085 }, { "epoch": 0.43634990016728725, "grad_norm": 1.098108333041575, "learning_rate": 6.393847422189218e-06, "loss": 0.5682, "step": 8086 }, { "epoch": 0.43640386379580165, "grad_norm": 0.8411764767745809, "learning_rate": 6.3930997179325345e-06, "loss": 0.4709, "step": 8087 }, { "epoch": 0.436457827424316, "grad_norm": 0.9541046927701743, "learning_rate": 6.392351988007264e-06, "loss": 0.4826, "step": 8088 }, { "epoch": 0.4365117910528304, "grad_norm": 0.9456780092062623, "learning_rate": 6.391604232434895e-06, "loss": 0.4175, "step": 8089 }, { "epoch": 0.4365657546813448, "grad_norm": 1.1913724530178627, "learning_rate": 6.390856451236917e-06, "loss": 0.6237, "step": 8090 }, { "epoch": 0.43661971830985913, "grad_norm": 1.1821435120283528, "learning_rate": 6.390108644434828e-06, "loss": 0.48, "step": 8091 }, { "epoch": 0.4366736819383735, "grad_norm": 1.0206977484768158, "learning_rate": 6.389360812050117e-06, "loss": 0.4698, "step": 8092 }, { "epoch": 0.4367276455668879, "grad_norm": 1.165265685388421, "learning_rate": 6.388612954104276e-06, "loss": 0.6669, "step": 8093 }, { "epoch": 0.4367816091954023, "grad_norm": 1.3769500238470522, "learning_rate": 6.387865070618801e-06, "loss": 0.7257, "step": 8094 }, { "epoch": 0.43683557282391666, "grad_norm": 1.0852223288059764, "learning_rate": 6.3871171616151874e-06, "loss": 0.4733, "step": 8095 }, { "epoch": 0.43688953645243106, "grad_norm": 0.9689878733323666, "learning_rate": 6.38636922711493e-06, "loss": 0.3621, "step": 8096 }, { "epoch": 0.43694350008094546, "grad_norm": 1.0001688379111937, "learning_rate": 6.385621267139525e-06, "loss": 0.4352, "step": 8097 }, { "epoch": 0.4369974637094598, "grad_norm": 1.0424637729186745, "learning_rate": 6.3848732817104705e-06, "loss": 0.5924, "step": 8098 }, { "epoch": 0.4370514273379742, "grad_norm": 1.200248853860949, "learning_rate": 6.384125270849263e-06, "loss": 0.5096, "step": 8099 }, { "epoch": 0.4371053909664886, "grad_norm": 1.028276266773512, "learning_rate": 6.383377234577403e-06, "loss": 0.5292, "step": 8100 }, { "epoch": 0.437159354595003, "grad_norm": 0.9455452204419401, "learning_rate": 6.382629172916388e-06, "loss": 0.4091, "step": 8101 }, { "epoch": 0.43721331822351733, "grad_norm": 1.1147202392381956, "learning_rate": 6.381881085887719e-06, "loss": 0.6211, "step": 8102 }, { "epoch": 0.43726728185203173, "grad_norm": 1.071053832735288, "learning_rate": 6.381132973512896e-06, "loss": 0.5623, "step": 8103 }, { "epoch": 0.43732124548054613, "grad_norm": 1.0535799831939132, "learning_rate": 6.380384835813421e-06, "loss": 0.4967, "step": 8104 }, { "epoch": 0.43737520910906047, "grad_norm": 0.9339735034567102, "learning_rate": 6.379636672810796e-06, "loss": 0.457, "step": 8105 }, { "epoch": 0.43742917273757487, "grad_norm": 0.8375217093843261, "learning_rate": 6.378888484526525e-06, "loss": 0.3512, "step": 8106 }, { "epoch": 0.43748313636608926, "grad_norm": 0.9654754565033535, "learning_rate": 6.378140270982111e-06, "loss": 0.4298, "step": 8107 }, { "epoch": 0.43753709999460366, "grad_norm": 1.0205784661289765, "learning_rate": 6.377392032199057e-06, "loss": 0.3585, "step": 8108 }, { "epoch": 0.437591063623118, "grad_norm": 0.9327675162870027, "learning_rate": 6.376643768198871e-06, "loss": 0.5, "step": 8109 }, { "epoch": 0.4376450272516324, "grad_norm": 0.9970830839253926, "learning_rate": 6.3758954790030565e-06, "loss": 0.4923, "step": 8110 }, { "epoch": 0.4376989908801468, "grad_norm": 1.1855793239058616, "learning_rate": 6.375147164633121e-06, "loss": 0.6608, "step": 8111 }, { "epoch": 0.43775295450866114, "grad_norm": 0.9869786164084325, "learning_rate": 6.374398825110572e-06, "loss": 0.4502, "step": 8112 }, { "epoch": 0.43780691813717554, "grad_norm": 0.9700881786400077, "learning_rate": 6.373650460456918e-06, "loss": 0.4897, "step": 8113 }, { "epoch": 0.43786088176568994, "grad_norm": 1.0699086688969823, "learning_rate": 6.372902070693666e-06, "loss": 0.5657, "step": 8114 }, { "epoch": 0.43791484539420433, "grad_norm": 0.9806957932412118, "learning_rate": 6.372153655842326e-06, "loss": 0.4564, "step": 8115 }, { "epoch": 0.4379688090227187, "grad_norm": 1.1931456001925733, "learning_rate": 6.37140521592441e-06, "loss": 0.5545, "step": 8116 }, { "epoch": 0.43802277265123307, "grad_norm": 0.757168779368349, "learning_rate": 6.370656750961427e-06, "loss": 0.31, "step": 8117 }, { "epoch": 0.43807673627974747, "grad_norm": 0.9856305847307492, "learning_rate": 6.369908260974889e-06, "loss": 0.3844, "step": 8118 }, { "epoch": 0.4381306999082618, "grad_norm": 0.8907570899339623, "learning_rate": 6.369159745986309e-06, "loss": 0.5752, "step": 8119 }, { "epoch": 0.4381846635367762, "grad_norm": 0.9880571819847156, "learning_rate": 6.368411206017199e-06, "loss": 0.6538, "step": 8120 }, { "epoch": 0.4382386271652906, "grad_norm": 0.9933601646139172, "learning_rate": 6.367662641089074e-06, "loss": 0.4543, "step": 8121 }, { "epoch": 0.43829259079380495, "grad_norm": 1.0133913158345669, "learning_rate": 6.366914051223449e-06, "loss": 0.4424, "step": 8122 }, { "epoch": 0.43834655442231935, "grad_norm": 1.1396161296497513, "learning_rate": 6.366165436441836e-06, "loss": 0.5371, "step": 8123 }, { "epoch": 0.43840051805083374, "grad_norm": 1.0524884732792306, "learning_rate": 6.365416796765755e-06, "loss": 0.5011, "step": 8124 }, { "epoch": 0.43845448167934814, "grad_norm": 1.1626336410302738, "learning_rate": 6.3646681322167196e-06, "loss": 0.559, "step": 8125 }, { "epoch": 0.4385084453078625, "grad_norm": 1.1335010796602056, "learning_rate": 6.36391944281625e-06, "loss": 0.5815, "step": 8126 }, { "epoch": 0.4385624089363769, "grad_norm": 0.9666629066518239, "learning_rate": 6.3631707285858605e-06, "loss": 0.4598, "step": 8127 }, { "epoch": 0.4386163725648913, "grad_norm": 1.0461815096795528, "learning_rate": 6.362421989547073e-06, "loss": 0.4925, "step": 8128 }, { "epoch": 0.4386703361934056, "grad_norm": 1.0858235241843988, "learning_rate": 6.361673225721406e-06, "loss": 0.5601, "step": 8129 }, { "epoch": 0.43872429982192, "grad_norm": 0.8876433346724394, "learning_rate": 6.36092443713038e-06, "loss": 0.3838, "step": 8130 }, { "epoch": 0.4387782634504344, "grad_norm": 1.0780156616152339, "learning_rate": 6.360175623795516e-06, "loss": 0.5177, "step": 8131 }, { "epoch": 0.4388322270789488, "grad_norm": 0.9840045639439228, "learning_rate": 6.359426785738334e-06, "loss": 0.4199, "step": 8132 }, { "epoch": 0.43888619070746315, "grad_norm": 1.123332377726366, "learning_rate": 6.358677922980361e-06, "loss": 0.4749, "step": 8133 }, { "epoch": 0.43894015433597755, "grad_norm": 0.8783892431051276, "learning_rate": 6.357929035543116e-06, "loss": 0.3921, "step": 8134 }, { "epoch": 0.43899411796449195, "grad_norm": 1.034136451834459, "learning_rate": 6.357180123448121e-06, "loss": 0.4977, "step": 8135 }, { "epoch": 0.4390480815930063, "grad_norm": 0.9879846902445798, "learning_rate": 6.356431186716905e-06, "loss": 0.5931, "step": 8136 }, { "epoch": 0.4391020452215207, "grad_norm": 0.8512448211390865, "learning_rate": 6.355682225370991e-06, "loss": 0.3508, "step": 8137 }, { "epoch": 0.4391560088500351, "grad_norm": 0.8924670967537378, "learning_rate": 6.354933239431905e-06, "loss": 0.5193, "step": 8138 }, { "epoch": 0.4392099724785495, "grad_norm": 1.0896040941650025, "learning_rate": 6.354184228921174e-06, "loss": 0.524, "step": 8139 }, { "epoch": 0.4392639361070638, "grad_norm": 1.1589943101813422, "learning_rate": 6.353435193860326e-06, "loss": 0.5319, "step": 8140 }, { "epoch": 0.4393178997355782, "grad_norm": 0.8871687377890529, "learning_rate": 6.352686134270888e-06, "loss": 0.4259, "step": 8141 }, { "epoch": 0.4393718633640926, "grad_norm": 0.7771725599384475, "learning_rate": 6.3519370501743885e-06, "loss": 0.3838, "step": 8142 }, { "epoch": 0.43942582699260696, "grad_norm": 1.200714298117275, "learning_rate": 6.3511879415923586e-06, "loss": 0.64, "step": 8143 }, { "epoch": 0.43947979062112136, "grad_norm": 1.047937932962672, "learning_rate": 6.350438808546324e-06, "loss": 0.5083, "step": 8144 }, { "epoch": 0.43953375424963576, "grad_norm": 1.1978782473578398, "learning_rate": 6.349689651057821e-06, "loss": 0.3857, "step": 8145 }, { "epoch": 0.43958771787815015, "grad_norm": 1.2029055049494446, "learning_rate": 6.3489404691483795e-06, "loss": 0.6547, "step": 8146 }, { "epoch": 0.4396416815066645, "grad_norm": 0.9581872662538926, "learning_rate": 6.348191262839529e-06, "loss": 0.3754, "step": 8147 }, { "epoch": 0.4396956451351789, "grad_norm": 1.083073680084396, "learning_rate": 6.347442032152806e-06, "loss": 0.4862, "step": 8148 }, { "epoch": 0.4397496087636933, "grad_norm": 1.1286799181299354, "learning_rate": 6.346692777109742e-06, "loss": 0.4204, "step": 8149 }, { "epoch": 0.43980357239220763, "grad_norm": 0.9743331328771458, "learning_rate": 6.345943497731873e-06, "loss": 0.3359, "step": 8150 }, { "epoch": 0.43985753602072203, "grad_norm": 0.9023054104083283, "learning_rate": 6.345194194040733e-06, "loss": 0.3835, "step": 8151 }, { "epoch": 0.4399114996492364, "grad_norm": 1.1186681761070714, "learning_rate": 6.344444866057857e-06, "loss": 0.5416, "step": 8152 }, { "epoch": 0.4399654632777508, "grad_norm": 1.2603636586906632, "learning_rate": 6.343695513804783e-06, "loss": 0.5895, "step": 8153 }, { "epoch": 0.44001942690626517, "grad_norm": 0.8864033774209207, "learning_rate": 6.342946137303046e-06, "loss": 0.5217, "step": 8154 }, { "epoch": 0.44007339053477956, "grad_norm": 0.9497093675618014, "learning_rate": 6.342196736574188e-06, "loss": 0.4329, "step": 8155 }, { "epoch": 0.44012735416329396, "grad_norm": 0.9765983099653647, "learning_rate": 6.341447311639744e-06, "loss": 0.5436, "step": 8156 }, { "epoch": 0.4401813177918083, "grad_norm": 1.1042772422743248, "learning_rate": 6.340697862521252e-06, "loss": 0.4615, "step": 8157 }, { "epoch": 0.4402352814203227, "grad_norm": 0.9380930829445366, "learning_rate": 6.339948389240256e-06, "loss": 0.3542, "step": 8158 }, { "epoch": 0.4402892450488371, "grad_norm": 1.1455537076511488, "learning_rate": 6.339198891818294e-06, "loss": 0.5423, "step": 8159 }, { "epoch": 0.44034320867735144, "grad_norm": 1.1138363715485986, "learning_rate": 6.338449370276908e-06, "loss": 0.6028, "step": 8160 }, { "epoch": 0.44039717230586584, "grad_norm": 0.8219761166196592, "learning_rate": 6.3376998246376385e-06, "loss": 0.3883, "step": 8161 }, { "epoch": 0.44045113593438023, "grad_norm": 1.2886158321449712, "learning_rate": 6.336950254922032e-06, "loss": 0.6709, "step": 8162 }, { "epoch": 0.44050509956289463, "grad_norm": 1.1059109511587166, "learning_rate": 6.336200661151627e-06, "loss": 0.5769, "step": 8163 }, { "epoch": 0.440559063191409, "grad_norm": 1.0291512470794295, "learning_rate": 6.335451043347972e-06, "loss": 0.3985, "step": 8164 }, { "epoch": 0.44061302681992337, "grad_norm": 0.924734911999811, "learning_rate": 6.334701401532609e-06, "loss": 0.4196, "step": 8165 }, { "epoch": 0.44066699044843777, "grad_norm": 1.039201638852191, "learning_rate": 6.333951735727085e-06, "loss": 0.5746, "step": 8166 }, { "epoch": 0.4407209540769521, "grad_norm": 1.0363687422139771, "learning_rate": 6.333202045952945e-06, "loss": 0.4912, "step": 8167 }, { "epoch": 0.4407749177054665, "grad_norm": 1.1160911142843102, "learning_rate": 6.332452332231737e-06, "loss": 0.5686, "step": 8168 }, { "epoch": 0.4408288813339809, "grad_norm": 0.9348136448116612, "learning_rate": 6.3317025945850064e-06, "loss": 0.3763, "step": 8169 }, { "epoch": 0.4408828449624953, "grad_norm": 0.8627989511814855, "learning_rate": 6.330952833034304e-06, "loss": 0.4318, "step": 8170 }, { "epoch": 0.44093680859100964, "grad_norm": 0.9499500987728281, "learning_rate": 6.330203047601178e-06, "loss": 0.5124, "step": 8171 }, { "epoch": 0.44099077221952404, "grad_norm": 0.9239559119230113, "learning_rate": 6.329453238307176e-06, "loss": 0.3238, "step": 8172 }, { "epoch": 0.44104473584803844, "grad_norm": 0.9714223164850403, "learning_rate": 6.328703405173851e-06, "loss": 0.3856, "step": 8173 }, { "epoch": 0.4410986994765528, "grad_norm": 1.2146435155933755, "learning_rate": 6.327953548222753e-06, "loss": 0.7072, "step": 8174 }, { "epoch": 0.4411526631050672, "grad_norm": 1.0192817279028203, "learning_rate": 6.327203667475432e-06, "loss": 0.5503, "step": 8175 }, { "epoch": 0.4412066267335816, "grad_norm": 0.807412687359062, "learning_rate": 6.326453762953442e-06, "loss": 0.4799, "step": 8176 }, { "epoch": 0.441260590362096, "grad_norm": 0.9864247211217492, "learning_rate": 6.3257038346783374e-06, "loss": 0.459, "step": 8177 }, { "epoch": 0.4413145539906103, "grad_norm": 1.0599515214229398, "learning_rate": 6.32495388267167e-06, "loss": 0.4568, "step": 8178 }, { "epoch": 0.4413685176191247, "grad_norm": 1.023233013595788, "learning_rate": 6.324203906954993e-06, "loss": 0.506, "step": 8179 }, { "epoch": 0.4414224812476391, "grad_norm": 0.9538386719147528, "learning_rate": 6.323453907549864e-06, "loss": 0.3746, "step": 8180 }, { "epoch": 0.44147644487615345, "grad_norm": 0.8962383666827701, "learning_rate": 6.322703884477836e-06, "loss": 0.5594, "step": 8181 }, { "epoch": 0.44153040850466785, "grad_norm": 0.8091687386812749, "learning_rate": 6.3219538377604695e-06, "loss": 0.3542, "step": 8182 }, { "epoch": 0.44158437213318225, "grad_norm": 1.0838700672742936, "learning_rate": 6.321203767419317e-06, "loss": 0.6996, "step": 8183 }, { "epoch": 0.44163833576169664, "grad_norm": 0.9145949421530559, "learning_rate": 6.320453673475939e-06, "loss": 0.3333, "step": 8184 }, { "epoch": 0.441692299390211, "grad_norm": 0.9877080777036605, "learning_rate": 6.319703555951895e-06, "loss": 0.4811, "step": 8185 }, { "epoch": 0.4417462630187254, "grad_norm": 1.2919528204982245, "learning_rate": 6.3189534148687405e-06, "loss": 0.4917, "step": 8186 }, { "epoch": 0.4418002266472398, "grad_norm": 0.9677924561092979, "learning_rate": 6.318203250248038e-06, "loss": 0.4048, "step": 8187 }, { "epoch": 0.4418541902757541, "grad_norm": 1.1427039498956448, "learning_rate": 6.317453062111349e-06, "loss": 0.5838, "step": 8188 }, { "epoch": 0.4419081539042685, "grad_norm": 1.0162853819867161, "learning_rate": 6.31670285048023e-06, "loss": 0.4848, "step": 8189 }, { "epoch": 0.4419621175327829, "grad_norm": 1.118976014419981, "learning_rate": 6.3159526153762486e-06, "loss": 0.4782, "step": 8190 }, { "epoch": 0.44201608116129726, "grad_norm": 1.0351736930399245, "learning_rate": 6.3152023568209644e-06, "loss": 0.4111, "step": 8191 }, { "epoch": 0.44207004478981166, "grad_norm": 1.0442399579263657, "learning_rate": 6.314452074835939e-06, "loss": 0.4251, "step": 8192 }, { "epoch": 0.44212400841832605, "grad_norm": 1.213416172423367, "learning_rate": 6.313701769442739e-06, "loss": 0.5543, "step": 8193 }, { "epoch": 0.44217797204684045, "grad_norm": 1.0284956045707423, "learning_rate": 6.312951440662929e-06, "loss": 0.3544, "step": 8194 }, { "epoch": 0.4422319356753548, "grad_norm": 0.987123132657458, "learning_rate": 6.312201088518072e-06, "loss": 0.4435, "step": 8195 }, { "epoch": 0.4422858993038692, "grad_norm": 1.2694454068825594, "learning_rate": 6.311450713029735e-06, "loss": 0.587, "step": 8196 }, { "epoch": 0.4423398629323836, "grad_norm": 0.8288299916585606, "learning_rate": 6.3107003142194845e-06, "loss": 0.3729, "step": 8197 }, { "epoch": 0.44239382656089793, "grad_norm": 0.9023748236410828, "learning_rate": 6.309949892108888e-06, "loss": 0.3403, "step": 8198 }, { "epoch": 0.4424477901894123, "grad_norm": 1.0731385703159089, "learning_rate": 6.309199446719514e-06, "loss": 0.4381, "step": 8199 }, { "epoch": 0.4425017538179267, "grad_norm": 0.9221213853348443, "learning_rate": 6.308448978072929e-06, "loss": 0.373, "step": 8200 }, { "epoch": 0.4425557174464411, "grad_norm": 0.9584848058545735, "learning_rate": 6.307698486190703e-06, "loss": 0.4064, "step": 8201 }, { "epoch": 0.44260968107495546, "grad_norm": 1.0326958618881306, "learning_rate": 6.306947971094408e-06, "loss": 0.4916, "step": 8202 }, { "epoch": 0.44266364470346986, "grad_norm": 1.0361854963113555, "learning_rate": 6.306197432805613e-06, "loss": 0.6035, "step": 8203 }, { "epoch": 0.44271760833198426, "grad_norm": 1.1644927444367013, "learning_rate": 6.305446871345887e-06, "loss": 0.4753, "step": 8204 }, { "epoch": 0.4427715719604986, "grad_norm": 1.0237868915227617, "learning_rate": 6.304696286736806e-06, "loss": 0.4134, "step": 8205 }, { "epoch": 0.442825535589013, "grad_norm": 1.0892502066123266, "learning_rate": 6.30394567899994e-06, "loss": 0.6368, "step": 8206 }, { "epoch": 0.4428794992175274, "grad_norm": 0.8741782152441725, "learning_rate": 6.303195048156864e-06, "loss": 0.4734, "step": 8207 }, { "epoch": 0.4429334628460418, "grad_norm": 0.8381479574640173, "learning_rate": 6.30244439422915e-06, "loss": 0.2939, "step": 8208 }, { "epoch": 0.44298742647455613, "grad_norm": 1.09090737181508, "learning_rate": 6.301693717238373e-06, "loss": 0.4413, "step": 8209 }, { "epoch": 0.44304139010307053, "grad_norm": 1.054117729719635, "learning_rate": 6.300943017206108e-06, "loss": 0.4736, "step": 8210 }, { "epoch": 0.44309535373158493, "grad_norm": 1.1298269340883949, "learning_rate": 6.300192294153932e-06, "loss": 0.4254, "step": 8211 }, { "epoch": 0.44314931736009927, "grad_norm": 1.0717817649100072, "learning_rate": 6.299441548103421e-06, "loss": 0.5625, "step": 8212 }, { "epoch": 0.44320328098861367, "grad_norm": 0.9637728890140532, "learning_rate": 6.298690779076153e-06, "loss": 0.3913, "step": 8213 }, { "epoch": 0.44325724461712807, "grad_norm": 0.8325067726490245, "learning_rate": 6.297939987093703e-06, "loss": 0.2968, "step": 8214 }, { "epoch": 0.44331120824564246, "grad_norm": 1.154098914995121, "learning_rate": 6.297189172177652e-06, "loss": 0.5368, "step": 8215 }, { "epoch": 0.4433651718741568, "grad_norm": 1.1915841719540385, "learning_rate": 6.2964383343495806e-06, "loss": 0.4341, "step": 8216 }, { "epoch": 0.4434191355026712, "grad_norm": 0.9890774317884556, "learning_rate": 6.295687473631066e-06, "loss": 0.634, "step": 8217 }, { "epoch": 0.4434730991311856, "grad_norm": 0.9752077991895186, "learning_rate": 6.294936590043688e-06, "loss": 0.4282, "step": 8218 }, { "epoch": 0.44352706275969994, "grad_norm": 1.1067859025873192, "learning_rate": 6.29418568360903e-06, "loss": 0.4941, "step": 8219 }, { "epoch": 0.44358102638821434, "grad_norm": 1.1825300788237498, "learning_rate": 6.293434754348673e-06, "loss": 0.5656, "step": 8220 }, { "epoch": 0.44363499001672874, "grad_norm": 0.8317822048245727, "learning_rate": 6.292683802284199e-06, "loss": 0.3694, "step": 8221 }, { "epoch": 0.44368895364524313, "grad_norm": 0.8759998119374629, "learning_rate": 6.291932827437191e-06, "loss": 0.4113, "step": 8222 }, { "epoch": 0.4437429172737575, "grad_norm": 0.8129601882371275, "learning_rate": 6.291181829829236e-06, "loss": 0.3685, "step": 8223 }, { "epoch": 0.4437968809022719, "grad_norm": 0.7864725287095008, "learning_rate": 6.2904308094819145e-06, "loss": 0.2948, "step": 8224 }, { "epoch": 0.44385084453078627, "grad_norm": 0.954843239468029, "learning_rate": 6.289679766416812e-06, "loss": 0.3572, "step": 8225 }, { "epoch": 0.4439048081593006, "grad_norm": 1.1197378145195935, "learning_rate": 6.288928700655516e-06, "loss": 0.4824, "step": 8226 }, { "epoch": 0.443958771787815, "grad_norm": 1.1840832461807584, "learning_rate": 6.288177612219612e-06, "loss": 0.6109, "step": 8227 }, { "epoch": 0.4440127354163294, "grad_norm": 1.0799232856004923, "learning_rate": 6.287426501130686e-06, "loss": 0.5627, "step": 8228 }, { "epoch": 0.44406669904484375, "grad_norm": 1.4080392936876376, "learning_rate": 6.286675367410329e-06, "loss": 0.7502, "step": 8229 }, { "epoch": 0.44412066267335815, "grad_norm": 1.0806069744294307, "learning_rate": 6.285924211080124e-06, "loss": 0.4956, "step": 8230 }, { "epoch": 0.44417462630187254, "grad_norm": 0.7834499491958148, "learning_rate": 6.285173032161665e-06, "loss": 0.3141, "step": 8231 }, { "epoch": 0.44422858993038694, "grad_norm": 1.0866560282979345, "learning_rate": 6.284421830676538e-06, "loss": 0.4347, "step": 8232 }, { "epoch": 0.4442825535589013, "grad_norm": 0.9480702697277158, "learning_rate": 6.283670606646336e-06, "loss": 0.4358, "step": 8233 }, { "epoch": 0.4443365171874157, "grad_norm": 0.9220059359209927, "learning_rate": 6.282919360092649e-06, "loss": 0.4317, "step": 8234 }, { "epoch": 0.4443904808159301, "grad_norm": 1.1988366135052093, "learning_rate": 6.282168091037067e-06, "loss": 0.669, "step": 8235 }, { "epoch": 0.4444444444444444, "grad_norm": 1.1111110270476083, "learning_rate": 6.281416799501188e-06, "loss": 0.5348, "step": 8236 }, { "epoch": 0.4444984080729588, "grad_norm": 1.0270444262977099, "learning_rate": 6.280665485506596e-06, "loss": 0.4979, "step": 8237 }, { "epoch": 0.4445523717014732, "grad_norm": 1.0021732095779479, "learning_rate": 6.2799141490748915e-06, "loss": 0.4451, "step": 8238 }, { "epoch": 0.4446063353299876, "grad_norm": 0.9185118107048618, "learning_rate": 6.279162790227666e-06, "loss": 0.4588, "step": 8239 }, { "epoch": 0.44466029895850195, "grad_norm": 0.7817167615419632, "learning_rate": 6.278411408986515e-06, "loss": 0.3816, "step": 8240 }, { "epoch": 0.44471426258701635, "grad_norm": 1.0672024629976542, "learning_rate": 6.277660005373033e-06, "loss": 0.6394, "step": 8241 }, { "epoch": 0.44476822621553075, "grad_norm": 0.8798847536131332, "learning_rate": 6.276908579408817e-06, "loss": 0.4219, "step": 8242 }, { "epoch": 0.4448221898440451, "grad_norm": 1.3314417753540595, "learning_rate": 6.276157131115463e-06, "loss": 0.5775, "step": 8243 }, { "epoch": 0.4448761534725595, "grad_norm": 0.8686468218945674, "learning_rate": 6.27540566051457e-06, "loss": 0.4752, "step": 8244 }, { "epoch": 0.4449301171010739, "grad_norm": 0.9270347874248582, "learning_rate": 6.274654167627732e-06, "loss": 0.4318, "step": 8245 }, { "epoch": 0.4449840807295883, "grad_norm": 1.1602155861773826, "learning_rate": 6.2739026524765535e-06, "loss": 0.5141, "step": 8246 }, { "epoch": 0.4450380443581026, "grad_norm": 0.8589735913922834, "learning_rate": 6.27315111508263e-06, "loss": 0.3548, "step": 8247 }, { "epoch": 0.445092007986617, "grad_norm": 0.8935179007468842, "learning_rate": 6.272399555467562e-06, "loss": 0.5043, "step": 8248 }, { "epoch": 0.4451459716151314, "grad_norm": 1.1337664081952104, "learning_rate": 6.27164797365295e-06, "loss": 0.5161, "step": 8249 }, { "epoch": 0.44519993524364576, "grad_norm": 1.0080019638462099, "learning_rate": 6.270896369660396e-06, "loss": 0.3475, "step": 8250 }, { "epoch": 0.44525389887216016, "grad_norm": 0.8889086586750827, "learning_rate": 6.2701447435115e-06, "loss": 0.3803, "step": 8251 }, { "epoch": 0.44530786250067456, "grad_norm": 1.0485007018478847, "learning_rate": 6.269393095227867e-06, "loss": 0.4782, "step": 8252 }, { "epoch": 0.44536182612918895, "grad_norm": 0.976537839521626, "learning_rate": 6.268641424831099e-06, "loss": 0.4625, "step": 8253 }, { "epoch": 0.4454157897577033, "grad_norm": 0.9007209484689772, "learning_rate": 6.267889732342801e-06, "loss": 0.342, "step": 8254 }, { "epoch": 0.4454697533862177, "grad_norm": 0.9943723513430234, "learning_rate": 6.267138017784574e-06, "loss": 0.4716, "step": 8255 }, { "epoch": 0.4455237170147321, "grad_norm": 1.1286679947400704, "learning_rate": 6.266386281178027e-06, "loss": 0.6132, "step": 8256 }, { "epoch": 0.44557768064324643, "grad_norm": 1.207032797681148, "learning_rate": 6.265634522544764e-06, "loss": 0.4628, "step": 8257 }, { "epoch": 0.44563164427176083, "grad_norm": 0.9660067187717021, "learning_rate": 6.2648827419063895e-06, "loss": 0.5171, "step": 8258 }, { "epoch": 0.4456856079002752, "grad_norm": 1.141916691625657, "learning_rate": 6.264130939284512e-06, "loss": 0.481, "step": 8259 }, { "epoch": 0.44573957152878957, "grad_norm": 1.0683508061794467, "learning_rate": 6.2633791147007405e-06, "loss": 0.4122, "step": 8260 }, { "epoch": 0.44579353515730397, "grad_norm": 0.8414162974271109, "learning_rate": 6.262627268176682e-06, "loss": 0.4005, "step": 8261 }, { "epoch": 0.44584749878581836, "grad_norm": 0.7957324879708162, "learning_rate": 6.261875399733944e-06, "loss": 0.3489, "step": 8262 }, { "epoch": 0.44590146241433276, "grad_norm": 1.1980065814666387, "learning_rate": 6.261123509394138e-06, "loss": 0.4865, "step": 8263 }, { "epoch": 0.4459554260428471, "grad_norm": 1.0324751228374605, "learning_rate": 6.2603715971788735e-06, "loss": 0.5615, "step": 8264 }, { "epoch": 0.4460093896713615, "grad_norm": 1.0205668026300676, "learning_rate": 6.259619663109762e-06, "loss": 0.4992, "step": 8265 }, { "epoch": 0.4460633532998759, "grad_norm": 0.9961386734942749, "learning_rate": 6.258867707208412e-06, "loss": 0.5174, "step": 8266 }, { "epoch": 0.44611731692839024, "grad_norm": 0.9390337589808205, "learning_rate": 6.258115729496438e-06, "loss": 0.4027, "step": 8267 }, { "epoch": 0.44617128055690464, "grad_norm": 0.9822247636911285, "learning_rate": 6.257363729995452e-06, "loss": 0.4304, "step": 8268 }, { "epoch": 0.44622524418541903, "grad_norm": 0.9810901313736503, "learning_rate": 6.256611708727067e-06, "loss": 0.4036, "step": 8269 }, { "epoch": 0.44627920781393343, "grad_norm": 1.0819434472966634, "learning_rate": 6.255859665712897e-06, "loss": 0.5296, "step": 8270 }, { "epoch": 0.4463331714424478, "grad_norm": 0.8559395133237707, "learning_rate": 6.255107600974557e-06, "loss": 0.3429, "step": 8271 }, { "epoch": 0.44638713507096217, "grad_norm": 1.0165905916419231, "learning_rate": 6.254355514533661e-06, "loss": 0.4707, "step": 8272 }, { "epoch": 0.44644109869947657, "grad_norm": 1.1912514391587248, "learning_rate": 6.2536034064118245e-06, "loss": 0.5647, "step": 8273 }, { "epoch": 0.4464950623279909, "grad_norm": 0.8507150505266678, "learning_rate": 6.2528512766306674e-06, "loss": 0.4495, "step": 8274 }, { "epoch": 0.4465490259565053, "grad_norm": 1.1838393451015867, "learning_rate": 6.2520991252118005e-06, "loss": 0.5475, "step": 8275 }, { "epoch": 0.4466029895850197, "grad_norm": 0.8320536995197675, "learning_rate": 6.251346952176847e-06, "loss": 0.3026, "step": 8276 }, { "epoch": 0.4466569532135341, "grad_norm": 1.2358260879971017, "learning_rate": 6.250594757547422e-06, "loss": 0.3825, "step": 8277 }, { "epoch": 0.44671091684204844, "grad_norm": 1.2582339478268636, "learning_rate": 6.2498425413451455e-06, "loss": 0.6025, "step": 8278 }, { "epoch": 0.44676488047056284, "grad_norm": 1.0614539401854632, "learning_rate": 6.2490903035916365e-06, "loss": 0.4201, "step": 8279 }, { "epoch": 0.44681884409907724, "grad_norm": 1.0489981466148692, "learning_rate": 6.248338044308515e-06, "loss": 0.5237, "step": 8280 }, { "epoch": 0.4468728077275916, "grad_norm": 1.0946284703330373, "learning_rate": 6.247585763517402e-06, "loss": 0.4316, "step": 8281 }, { "epoch": 0.446926771356106, "grad_norm": 0.9949303875092664, "learning_rate": 6.246833461239921e-06, "loss": 0.4473, "step": 8282 }, { "epoch": 0.4469807349846204, "grad_norm": 1.124923389048746, "learning_rate": 6.246081137497689e-06, "loss": 0.5006, "step": 8283 }, { "epoch": 0.4470346986131348, "grad_norm": 1.1364614517118319, "learning_rate": 6.245328792312332e-06, "loss": 0.6883, "step": 8284 }, { "epoch": 0.4470886622416491, "grad_norm": 0.8968541085967444, "learning_rate": 6.2445764257054715e-06, "loss": 0.4419, "step": 8285 }, { "epoch": 0.4471426258701635, "grad_norm": 1.01887116537739, "learning_rate": 6.243824037698734e-06, "loss": 0.5444, "step": 8286 }, { "epoch": 0.4471965894986779, "grad_norm": 1.1322230658838248, "learning_rate": 6.243071628313741e-06, "loss": 0.5171, "step": 8287 }, { "epoch": 0.44725055312719225, "grad_norm": 0.9174488917296533, "learning_rate": 6.2423191975721195e-06, "loss": 0.4484, "step": 8288 }, { "epoch": 0.44730451675570665, "grad_norm": 1.0608661624047486, "learning_rate": 6.241566745495493e-06, "loss": 0.4541, "step": 8289 }, { "epoch": 0.44735848038422105, "grad_norm": 0.9951625557270805, "learning_rate": 6.24081427210549e-06, "loss": 0.4436, "step": 8290 }, { "epoch": 0.44741244401273544, "grad_norm": 1.0419475416538353, "learning_rate": 6.2400617774237345e-06, "loss": 0.5386, "step": 8291 }, { "epoch": 0.4474664076412498, "grad_norm": 1.2347215256909756, "learning_rate": 6.2393092614718585e-06, "loss": 0.6277, "step": 8292 }, { "epoch": 0.4475203712697642, "grad_norm": 0.7676799061597555, "learning_rate": 6.238556724271485e-06, "loss": 0.382, "step": 8293 }, { "epoch": 0.4475743348982786, "grad_norm": 0.9588430752199348, "learning_rate": 6.237804165844247e-06, "loss": 0.4675, "step": 8294 }, { "epoch": 0.4476282985267929, "grad_norm": 0.9119965845666558, "learning_rate": 6.237051586211771e-06, "loss": 0.4194, "step": 8295 }, { "epoch": 0.4476822621553073, "grad_norm": 1.118102031488165, "learning_rate": 6.236298985395687e-06, "loss": 0.5585, "step": 8296 }, { "epoch": 0.4477362257838217, "grad_norm": 1.272839837054946, "learning_rate": 6.235546363417626e-06, "loss": 0.7868, "step": 8297 }, { "epoch": 0.44779018941233606, "grad_norm": 1.2080708868311492, "learning_rate": 6.23479372029922e-06, "loss": 0.4729, "step": 8298 }, { "epoch": 0.44784415304085046, "grad_norm": 0.882292242508779, "learning_rate": 6.2340410560621e-06, "loss": 0.4413, "step": 8299 }, { "epoch": 0.44789811666936485, "grad_norm": 1.1725534847153798, "learning_rate": 6.233288370727898e-06, "loss": 0.7341, "step": 8300 }, { "epoch": 0.44795208029787925, "grad_norm": 0.8902496313702011, "learning_rate": 6.232535664318248e-06, "loss": 0.3883, "step": 8301 }, { "epoch": 0.4480060439263936, "grad_norm": 0.9755453942074706, "learning_rate": 6.231782936854783e-06, "loss": 0.4675, "step": 8302 }, { "epoch": 0.448060007554908, "grad_norm": 1.0832986342892634, "learning_rate": 6.231030188359136e-06, "loss": 0.469, "step": 8303 }, { "epoch": 0.4481139711834224, "grad_norm": 0.9594125867379704, "learning_rate": 6.230277418852942e-06, "loss": 0.5057, "step": 8304 }, { "epoch": 0.44816793481193673, "grad_norm": 1.0079701577881373, "learning_rate": 6.229524628357838e-06, "loss": 0.3428, "step": 8305 }, { "epoch": 0.4482218984404511, "grad_norm": 0.962475534638248, "learning_rate": 6.228771816895459e-06, "loss": 0.5037, "step": 8306 }, { "epoch": 0.4482758620689655, "grad_norm": 1.4026839396885045, "learning_rate": 6.228018984487443e-06, "loss": 0.6352, "step": 8307 }, { "epoch": 0.4483298256974799, "grad_norm": 1.0513368844937503, "learning_rate": 6.2272661311554235e-06, "loss": 0.4776, "step": 8308 }, { "epoch": 0.44838378932599426, "grad_norm": 0.9121626387900661, "learning_rate": 6.226513256921042e-06, "loss": 0.4673, "step": 8309 }, { "epoch": 0.44843775295450866, "grad_norm": 0.8371355145290016, "learning_rate": 6.2257603618059335e-06, "loss": 0.359, "step": 8310 }, { "epoch": 0.44849171658302306, "grad_norm": 1.151421452181055, "learning_rate": 6.225007445831741e-06, "loss": 0.5794, "step": 8311 }, { "epoch": 0.4485456802115374, "grad_norm": 0.9334231884552951, "learning_rate": 6.2242545090201025e-06, "loss": 0.4177, "step": 8312 }, { "epoch": 0.4485996438400518, "grad_norm": 1.0115269486691854, "learning_rate": 6.223501551392656e-06, "loss": 0.4407, "step": 8313 }, { "epoch": 0.4486536074685662, "grad_norm": 1.098379406537149, "learning_rate": 6.222748572971044e-06, "loss": 0.5261, "step": 8314 }, { "epoch": 0.4487075710970806, "grad_norm": 0.9984254964387551, "learning_rate": 6.221995573776907e-06, "loss": 0.4602, "step": 8315 }, { "epoch": 0.44876153472559493, "grad_norm": 1.0385443864372967, "learning_rate": 6.221242553831889e-06, "loss": 0.5939, "step": 8316 }, { "epoch": 0.44881549835410933, "grad_norm": 0.8991944262663476, "learning_rate": 6.2204895131576315e-06, "loss": 0.4789, "step": 8317 }, { "epoch": 0.44886946198262373, "grad_norm": 1.0129856418267855, "learning_rate": 6.219736451775777e-06, "loss": 0.4682, "step": 8318 }, { "epoch": 0.44892342561113807, "grad_norm": 0.9235633118556752, "learning_rate": 6.218983369707969e-06, "loss": 0.52, "step": 8319 }, { "epoch": 0.44897738923965247, "grad_norm": 0.8832354205589996, "learning_rate": 6.2182302669758534e-06, "loss": 0.4493, "step": 8320 }, { "epoch": 0.44903135286816687, "grad_norm": 1.131605401898967, "learning_rate": 6.217477143601073e-06, "loss": 0.4993, "step": 8321 }, { "epoch": 0.44908531649668126, "grad_norm": 1.0258323143659747, "learning_rate": 6.216723999605275e-06, "loss": 0.4683, "step": 8322 }, { "epoch": 0.4491392801251956, "grad_norm": 0.8246839525219615, "learning_rate": 6.2159708350101064e-06, "loss": 0.3551, "step": 8323 }, { "epoch": 0.44919324375371, "grad_norm": 1.1903603655530142, "learning_rate": 6.215217649837212e-06, "loss": 0.4971, "step": 8324 }, { "epoch": 0.4492472073822244, "grad_norm": 1.04491046368811, "learning_rate": 6.214464444108238e-06, "loss": 0.5212, "step": 8325 }, { "epoch": 0.44930117101073874, "grad_norm": 0.7817496636603269, "learning_rate": 6.2137112178448356e-06, "loss": 0.4048, "step": 8326 }, { "epoch": 0.44935513463925314, "grad_norm": 1.0367474577104214, "learning_rate": 6.212957971068652e-06, "loss": 0.5398, "step": 8327 }, { "epoch": 0.44940909826776754, "grad_norm": 0.9083244646446756, "learning_rate": 6.212204703801334e-06, "loss": 0.3629, "step": 8328 }, { "epoch": 0.4494630618962819, "grad_norm": 0.9703374855091751, "learning_rate": 6.211451416064536e-06, "loss": 0.4534, "step": 8329 }, { "epoch": 0.4495170255247963, "grad_norm": 1.0219002599005267, "learning_rate": 6.210698107879903e-06, "loss": 0.538, "step": 8330 }, { "epoch": 0.4495709891533107, "grad_norm": 0.8619922925486874, "learning_rate": 6.209944779269088e-06, "loss": 0.353, "step": 8331 }, { "epoch": 0.44962495278182507, "grad_norm": 0.9930825798100148, "learning_rate": 6.209191430253743e-06, "loss": 0.3964, "step": 8332 }, { "epoch": 0.4496789164103394, "grad_norm": 0.9691815783001185, "learning_rate": 6.208438060855522e-06, "loss": 0.3916, "step": 8333 }, { "epoch": 0.4497328800388538, "grad_norm": 1.0870076654952308, "learning_rate": 6.207684671096073e-06, "loss": 0.5516, "step": 8334 }, { "epoch": 0.4497868436673682, "grad_norm": 1.0839231078343547, "learning_rate": 6.206931260997052e-06, "loss": 0.4722, "step": 8335 }, { "epoch": 0.44984080729588255, "grad_norm": 1.1291991686581544, "learning_rate": 6.206177830580113e-06, "loss": 0.5012, "step": 8336 }, { "epoch": 0.44989477092439695, "grad_norm": 1.1284066685510572, "learning_rate": 6.205424379866909e-06, "loss": 0.5494, "step": 8337 }, { "epoch": 0.44994873455291134, "grad_norm": 0.9301465594589194, "learning_rate": 6.204670908879095e-06, "loss": 0.3552, "step": 8338 }, { "epoch": 0.45000269818142574, "grad_norm": 1.1763205306953284, "learning_rate": 6.203917417638327e-06, "loss": 0.5184, "step": 8339 }, { "epoch": 0.4500566618099401, "grad_norm": 1.06251513760377, "learning_rate": 6.203163906166263e-06, "loss": 0.5114, "step": 8340 }, { "epoch": 0.4501106254384545, "grad_norm": 1.1333362481581362, "learning_rate": 6.202410374484557e-06, "loss": 0.5058, "step": 8341 }, { "epoch": 0.4501645890669689, "grad_norm": 0.9318898124912248, "learning_rate": 6.201656822614865e-06, "loss": 0.3844, "step": 8342 }, { "epoch": 0.4502185526954832, "grad_norm": 1.0563534968644528, "learning_rate": 6.20090325057885e-06, "loss": 0.528, "step": 8343 }, { "epoch": 0.4502725163239976, "grad_norm": 0.9993405420382931, "learning_rate": 6.200149658398166e-06, "loss": 0.5493, "step": 8344 }, { "epoch": 0.450326479952512, "grad_norm": 0.9457576795836067, "learning_rate": 6.1993960460944736e-06, "loss": 0.4024, "step": 8345 }, { "epoch": 0.4503804435810264, "grad_norm": 1.1096445902737497, "learning_rate": 6.198642413689432e-06, "loss": 0.5194, "step": 8346 }, { "epoch": 0.45043440720954075, "grad_norm": 1.2304162848135645, "learning_rate": 6.197888761204702e-06, "loss": 0.4624, "step": 8347 }, { "epoch": 0.45048837083805515, "grad_norm": 1.0012605899037046, "learning_rate": 6.197135088661943e-06, "loss": 0.3959, "step": 8348 }, { "epoch": 0.45054233446656955, "grad_norm": 1.216003438390205, "learning_rate": 6.1963813960828165e-06, "loss": 0.6588, "step": 8349 }, { "epoch": 0.4505962980950839, "grad_norm": 0.994540218474625, "learning_rate": 6.195627683488987e-06, "loss": 0.5286, "step": 8350 }, { "epoch": 0.4506502617235983, "grad_norm": 1.038760729273064, "learning_rate": 6.194873950902114e-06, "loss": 0.5037, "step": 8351 }, { "epoch": 0.4507042253521127, "grad_norm": 1.120989378994448, "learning_rate": 6.19412019834386e-06, "loss": 0.673, "step": 8352 }, { "epoch": 0.4507581889806271, "grad_norm": 1.0111310770412425, "learning_rate": 6.193366425835893e-06, "loss": 0.5204, "step": 8353 }, { "epoch": 0.4508121526091414, "grad_norm": 0.9703600015052282, "learning_rate": 6.1926126333998734e-06, "loss": 0.5822, "step": 8354 }, { "epoch": 0.4508661162376558, "grad_norm": 0.9622357499434295, "learning_rate": 6.191858821057466e-06, "loss": 0.45, "step": 8355 }, { "epoch": 0.4509200798661702, "grad_norm": 1.039420922409652, "learning_rate": 6.191104988830338e-06, "loss": 0.4756, "step": 8356 }, { "epoch": 0.45097404349468456, "grad_norm": 0.9776075490361592, "learning_rate": 6.190351136740153e-06, "loss": 0.5327, "step": 8357 }, { "epoch": 0.45102800712319896, "grad_norm": 1.0125723817976613, "learning_rate": 6.1895972648085804e-06, "loss": 0.4998, "step": 8358 }, { "epoch": 0.45108197075171336, "grad_norm": 0.9999117314031796, "learning_rate": 6.188843373057284e-06, "loss": 0.5464, "step": 8359 }, { "epoch": 0.4511359343802277, "grad_norm": 1.0246430134366973, "learning_rate": 6.188089461507933e-06, "loss": 0.5534, "step": 8360 }, { "epoch": 0.4511898980087421, "grad_norm": 0.9853291286500127, "learning_rate": 6.187335530182197e-06, "loss": 0.4887, "step": 8361 }, { "epoch": 0.4512438616372565, "grad_norm": 0.8041286237980081, "learning_rate": 6.1865815791017435e-06, "loss": 0.2845, "step": 8362 }, { "epoch": 0.4512978252657709, "grad_norm": 1.012319729362179, "learning_rate": 6.1858276082882406e-06, "loss": 0.5868, "step": 8363 }, { "epoch": 0.45135178889428523, "grad_norm": 1.0324055590509715, "learning_rate": 6.185073617763359e-06, "loss": 0.5689, "step": 8364 }, { "epoch": 0.45140575252279963, "grad_norm": 0.980216297373514, "learning_rate": 6.184319607548771e-06, "loss": 0.5346, "step": 8365 }, { "epoch": 0.45145971615131403, "grad_norm": 0.9669955495748339, "learning_rate": 6.183565577666144e-06, "loss": 0.5843, "step": 8366 }, { "epoch": 0.45151367977982837, "grad_norm": 1.1596689194399683, "learning_rate": 6.182811528137154e-06, "loss": 0.6075, "step": 8367 }, { "epoch": 0.45156764340834277, "grad_norm": 0.842196522859393, "learning_rate": 6.182057458983469e-06, "loss": 0.4574, "step": 8368 }, { "epoch": 0.45162160703685716, "grad_norm": 1.0362438583159617, "learning_rate": 6.181303370226764e-06, "loss": 0.4925, "step": 8369 }, { "epoch": 0.45167557066537156, "grad_norm": 1.2152759413158918, "learning_rate": 6.180549261888711e-06, "loss": 0.5789, "step": 8370 }, { "epoch": 0.4517295342938859, "grad_norm": 0.9103548226380835, "learning_rate": 6.179795133990986e-06, "loss": 0.4999, "step": 8371 }, { "epoch": 0.4517834979224003, "grad_norm": 1.1487731602898519, "learning_rate": 6.179040986555261e-06, "loss": 0.5211, "step": 8372 }, { "epoch": 0.4518374615509147, "grad_norm": 1.0812676407560937, "learning_rate": 6.178286819603212e-06, "loss": 0.4619, "step": 8373 }, { "epoch": 0.45189142517942904, "grad_norm": 0.8267472326923944, "learning_rate": 6.1775326331565154e-06, "loss": 0.273, "step": 8374 }, { "epoch": 0.45194538880794344, "grad_norm": 0.9225753201805581, "learning_rate": 6.1767784272368445e-06, "loss": 0.441, "step": 8375 }, { "epoch": 0.45199935243645784, "grad_norm": 1.204706833911825, "learning_rate": 6.176024201865879e-06, "loss": 0.5823, "step": 8376 }, { "epoch": 0.45205331606497223, "grad_norm": 0.9391251589629964, "learning_rate": 6.175269957065294e-06, "loss": 0.3979, "step": 8377 }, { "epoch": 0.4521072796934866, "grad_norm": 0.9858315946353605, "learning_rate": 6.174515692856769e-06, "loss": 0.392, "step": 8378 }, { "epoch": 0.45216124332200097, "grad_norm": 0.9188130667507144, "learning_rate": 6.173761409261982e-06, "loss": 0.4426, "step": 8379 }, { "epoch": 0.45221520695051537, "grad_norm": 1.0188839161177743, "learning_rate": 6.1730071063026095e-06, "loss": 0.4881, "step": 8380 }, { "epoch": 0.4522691705790297, "grad_norm": 0.9640000889921666, "learning_rate": 6.1722527840003354e-06, "loss": 0.4899, "step": 8381 }, { "epoch": 0.4523231342075441, "grad_norm": 1.0662342160455445, "learning_rate": 6.171498442376834e-06, "loss": 0.4939, "step": 8382 }, { "epoch": 0.4523770978360585, "grad_norm": 1.173047501051285, "learning_rate": 6.170744081453791e-06, "loss": 0.4825, "step": 8383 }, { "epoch": 0.4524310614645729, "grad_norm": 1.0085298781216618, "learning_rate": 6.169989701252884e-06, "loss": 0.501, "step": 8384 }, { "epoch": 0.45248502509308725, "grad_norm": 0.9506675705246955, "learning_rate": 6.169235301795796e-06, "loss": 0.3623, "step": 8385 }, { "epoch": 0.45253898872160164, "grad_norm": 1.0177712703639705, "learning_rate": 6.16848088310421e-06, "loss": 0.4606, "step": 8386 }, { "epoch": 0.45259295235011604, "grad_norm": 1.1285838234095045, "learning_rate": 6.167726445199808e-06, "loss": 0.5457, "step": 8387 }, { "epoch": 0.4526469159786304, "grad_norm": 1.2700764543458856, "learning_rate": 6.166971988104272e-06, "loss": 0.6198, "step": 8388 }, { "epoch": 0.4527008796071448, "grad_norm": 1.0182056657847405, "learning_rate": 6.1662175118392886e-06, "loss": 0.4384, "step": 8389 }, { "epoch": 0.4527548432356592, "grad_norm": 1.007168745803475, "learning_rate": 6.1654630164265406e-06, "loss": 0.4493, "step": 8390 }, { "epoch": 0.4528088068641736, "grad_norm": 1.082975471430919, "learning_rate": 6.164708501887713e-06, "loss": 0.5555, "step": 8391 }, { "epoch": 0.4528627704926879, "grad_norm": 1.20443162823091, "learning_rate": 6.1639539682444915e-06, "loss": 0.6704, "step": 8392 }, { "epoch": 0.4529167341212023, "grad_norm": 1.0178248637649168, "learning_rate": 6.163199415518562e-06, "loss": 0.5266, "step": 8393 }, { "epoch": 0.4529706977497167, "grad_norm": 0.9568505557307015, "learning_rate": 6.162444843731611e-06, "loss": 0.4566, "step": 8394 }, { "epoch": 0.45302466137823105, "grad_norm": 1.1306115302604292, "learning_rate": 6.161690252905325e-06, "loss": 0.5377, "step": 8395 }, { "epoch": 0.45307862500674545, "grad_norm": 0.9686777562326195, "learning_rate": 6.160935643061394e-06, "loss": 0.3678, "step": 8396 }, { "epoch": 0.45313258863525985, "grad_norm": 1.4316369198651668, "learning_rate": 6.160181014221503e-06, "loss": 0.4487, "step": 8397 }, { "epoch": 0.4531865522637742, "grad_norm": 0.9194427818136042, "learning_rate": 6.159426366407345e-06, "loss": 0.4422, "step": 8398 }, { "epoch": 0.4532405158922886, "grad_norm": 1.0726317007948751, "learning_rate": 6.158671699640606e-06, "loss": 0.5604, "step": 8399 }, { "epoch": 0.453294479520803, "grad_norm": 1.1646538564684907, "learning_rate": 6.157917013942977e-06, "loss": 0.4353, "step": 8400 }, { "epoch": 0.4533484431493174, "grad_norm": 1.013790709910214, "learning_rate": 6.157162309336146e-06, "loss": 0.4955, "step": 8401 }, { "epoch": 0.4534024067778317, "grad_norm": 0.8692143727078429, "learning_rate": 6.156407585841809e-06, "loss": 0.3412, "step": 8402 }, { "epoch": 0.4534563704063461, "grad_norm": 1.0236675467290766, "learning_rate": 6.155652843481655e-06, "loss": 0.4563, "step": 8403 }, { "epoch": 0.4535103340348605, "grad_norm": 1.1577464304035896, "learning_rate": 6.154898082277374e-06, "loss": 0.6952, "step": 8404 }, { "epoch": 0.45356429766337486, "grad_norm": 0.9653272623430008, "learning_rate": 6.154143302250661e-06, "loss": 0.4835, "step": 8405 }, { "epoch": 0.45361826129188926, "grad_norm": 1.2152141772947251, "learning_rate": 6.153388503423208e-06, "loss": 0.6698, "step": 8406 }, { "epoch": 0.45367222492040366, "grad_norm": 0.9519933989119045, "learning_rate": 6.1526336858167095e-06, "loss": 0.4883, "step": 8407 }, { "epoch": 0.45372618854891805, "grad_norm": 1.0341868436421364, "learning_rate": 6.1518788494528596e-06, "loss": 0.4129, "step": 8408 }, { "epoch": 0.4537801521774324, "grad_norm": 1.3135231331330393, "learning_rate": 6.1511239943533515e-06, "loss": 0.6155, "step": 8409 }, { "epoch": 0.4538341158059468, "grad_norm": 1.0899223236121354, "learning_rate": 6.1503691205398855e-06, "loss": 0.578, "step": 8410 }, { "epoch": 0.4538880794344612, "grad_norm": 1.3154815500184927, "learning_rate": 6.1496142280341506e-06, "loss": 0.6484, "step": 8411 }, { "epoch": 0.45394204306297553, "grad_norm": 1.1646261336130328, "learning_rate": 6.148859316857847e-06, "loss": 0.6288, "step": 8412 }, { "epoch": 0.45399600669148993, "grad_norm": 1.0544223359081173, "learning_rate": 6.1481043870326705e-06, "loss": 0.5162, "step": 8413 }, { "epoch": 0.4540499703200043, "grad_norm": 1.0392206489482072, "learning_rate": 6.1473494385803205e-06, "loss": 0.4698, "step": 8414 }, { "epoch": 0.4541039339485187, "grad_norm": 0.8609037082670334, "learning_rate": 6.146594471522492e-06, "loss": 0.4052, "step": 8415 }, { "epoch": 0.45415789757703307, "grad_norm": 1.0197117928968813, "learning_rate": 6.145839485880888e-06, "loss": 0.6074, "step": 8416 }, { "epoch": 0.45421186120554746, "grad_norm": 1.2330149711482408, "learning_rate": 6.145084481677202e-06, "loss": 0.5834, "step": 8417 }, { "epoch": 0.45426582483406186, "grad_norm": 0.9762581434531056, "learning_rate": 6.144329458933136e-06, "loss": 0.5018, "step": 8418 }, { "epoch": 0.4543197884625762, "grad_norm": 1.0473449784789306, "learning_rate": 6.143574417670392e-06, "loss": 0.5464, "step": 8419 }, { "epoch": 0.4543737520910906, "grad_norm": 1.025767839612092, "learning_rate": 6.142819357910668e-06, "loss": 0.4865, "step": 8420 }, { "epoch": 0.454427715719605, "grad_norm": 0.8045375711693429, "learning_rate": 6.142064279675666e-06, "loss": 0.4349, "step": 8421 }, { "epoch": 0.4544816793481194, "grad_norm": 1.4000566304986641, "learning_rate": 6.141309182987088e-06, "loss": 0.7409, "step": 8422 }, { "epoch": 0.45453564297663374, "grad_norm": 1.1533345558626389, "learning_rate": 6.1405540678666355e-06, "loss": 0.5492, "step": 8423 }, { "epoch": 0.45458960660514813, "grad_norm": 1.1651246286190822, "learning_rate": 6.1397989343360135e-06, "loss": 0.5318, "step": 8424 }, { "epoch": 0.45464357023366253, "grad_norm": 0.8395877309237959, "learning_rate": 6.139043782416922e-06, "loss": 0.3905, "step": 8425 }, { "epoch": 0.4546975338621769, "grad_norm": 1.0018641529477679, "learning_rate": 6.138288612131068e-06, "loss": 0.4648, "step": 8426 }, { "epoch": 0.45475149749069127, "grad_norm": 0.9188233555625183, "learning_rate": 6.1375334235001525e-06, "loss": 0.44, "step": 8427 }, { "epoch": 0.45480546111920567, "grad_norm": 1.1617255350860531, "learning_rate": 6.136778216545883e-06, "loss": 0.4955, "step": 8428 }, { "epoch": 0.45485942474772, "grad_norm": 0.9770796015753895, "learning_rate": 6.136022991289965e-06, "loss": 0.4278, "step": 8429 }, { "epoch": 0.4549133883762344, "grad_norm": 1.1101169000782731, "learning_rate": 6.135267747754102e-06, "loss": 0.473, "step": 8430 }, { "epoch": 0.4549673520047488, "grad_norm": 0.9638366606720424, "learning_rate": 6.134512485960003e-06, "loss": 0.4238, "step": 8431 }, { "epoch": 0.4550213156332632, "grad_norm": 0.9352684946429645, "learning_rate": 6.133757205929375e-06, "loss": 0.3438, "step": 8432 }, { "epoch": 0.45507527926177754, "grad_norm": 1.0500956939638022, "learning_rate": 6.133001907683923e-06, "loss": 0.3817, "step": 8433 }, { "epoch": 0.45512924289029194, "grad_norm": 1.107319073815282, "learning_rate": 6.132246591245358e-06, "loss": 0.5657, "step": 8434 }, { "epoch": 0.45518320651880634, "grad_norm": 1.0755078350298863, "learning_rate": 6.131491256635386e-06, "loss": 0.4565, "step": 8435 }, { "epoch": 0.4552371701473207, "grad_norm": 1.021547240217654, "learning_rate": 6.130735903875717e-06, "loss": 0.4506, "step": 8436 }, { "epoch": 0.4552911337758351, "grad_norm": 0.9503372261437775, "learning_rate": 6.12998053298806e-06, "loss": 0.3925, "step": 8437 }, { "epoch": 0.4553450974043495, "grad_norm": 0.8986971076519543, "learning_rate": 6.129225143994128e-06, "loss": 0.3797, "step": 8438 }, { "epoch": 0.45539906103286387, "grad_norm": 1.0924280393162988, "learning_rate": 6.1284697369156276e-06, "loss": 0.5575, "step": 8439 }, { "epoch": 0.4554530246613782, "grad_norm": 1.0662171701577052, "learning_rate": 6.127714311774274e-06, "loss": 0.6119, "step": 8440 }, { "epoch": 0.4555069882898926, "grad_norm": 1.1499326283133573, "learning_rate": 6.126958868591776e-06, "loss": 0.442, "step": 8441 }, { "epoch": 0.455560951918407, "grad_norm": 1.0986606405606907, "learning_rate": 6.126203407389847e-06, "loss": 0.3998, "step": 8442 }, { "epoch": 0.45561491554692135, "grad_norm": 0.8962837891663541, "learning_rate": 6.125447928190198e-06, "loss": 0.5006, "step": 8443 }, { "epoch": 0.45566887917543575, "grad_norm": 0.9971383229845352, "learning_rate": 6.124692431014545e-06, "loss": 0.6899, "step": 8444 }, { "epoch": 0.45572284280395015, "grad_norm": 0.8485723512707384, "learning_rate": 6.1239369158846e-06, "loss": 0.3514, "step": 8445 }, { "epoch": 0.45577680643246454, "grad_norm": 1.0580834044856593, "learning_rate": 6.1231813828220765e-06, "loss": 0.432, "step": 8446 }, { "epoch": 0.4558307700609789, "grad_norm": 1.1477871486260398, "learning_rate": 6.122425831848692e-06, "loss": 0.5182, "step": 8447 }, { "epoch": 0.4558847336894933, "grad_norm": 0.9197826516206705, "learning_rate": 6.121670262986159e-06, "loss": 0.4773, "step": 8448 }, { "epoch": 0.4559386973180077, "grad_norm": 1.028800803676154, "learning_rate": 6.120914676256194e-06, "loss": 0.4905, "step": 8449 }, { "epoch": 0.455992660946522, "grad_norm": 1.1323639001759163, "learning_rate": 6.120159071680516e-06, "loss": 0.6318, "step": 8450 }, { "epoch": 0.4560466245750364, "grad_norm": 0.9226195557320429, "learning_rate": 6.119403449280838e-06, "loss": 0.5202, "step": 8451 }, { "epoch": 0.4561005882035508, "grad_norm": 1.0149540456819108, "learning_rate": 6.118647809078879e-06, "loss": 0.4867, "step": 8452 }, { "epoch": 0.4561545518320652, "grad_norm": 1.240496641598694, "learning_rate": 6.117892151096357e-06, "loss": 0.7369, "step": 8453 }, { "epoch": 0.45620851546057956, "grad_norm": 1.1263990257026721, "learning_rate": 6.117136475354992e-06, "loss": 0.5227, "step": 8454 }, { "epoch": 0.45626247908909395, "grad_norm": 0.9613535018947932, "learning_rate": 6.1163807818765e-06, "loss": 0.5261, "step": 8455 }, { "epoch": 0.45631644271760835, "grad_norm": 0.9330383900526112, "learning_rate": 6.115625070682599e-06, "loss": 0.4279, "step": 8456 }, { "epoch": 0.4563704063461227, "grad_norm": 1.1216426898183989, "learning_rate": 6.114869341795016e-06, "loss": 0.4996, "step": 8457 }, { "epoch": 0.4564243699746371, "grad_norm": 1.0693226349887006, "learning_rate": 6.114113595235463e-06, "loss": 0.4831, "step": 8458 }, { "epoch": 0.4564783336031515, "grad_norm": 0.9244463053224956, "learning_rate": 6.113357831025666e-06, "loss": 0.4473, "step": 8459 }, { "epoch": 0.4565322972316659, "grad_norm": 1.1140635189354953, "learning_rate": 6.112602049187346e-06, "loss": 0.5454, "step": 8460 }, { "epoch": 0.4565862608601802, "grad_norm": 0.8572697561533181, "learning_rate": 6.111846249742223e-06, "loss": 0.3565, "step": 8461 }, { "epoch": 0.4566402244886946, "grad_norm": 0.9353356161947738, "learning_rate": 6.11109043271202e-06, "loss": 0.4668, "step": 8462 }, { "epoch": 0.456694188117209, "grad_norm": 0.8861661180500667, "learning_rate": 6.110334598118463e-06, "loss": 0.4136, "step": 8463 }, { "epoch": 0.45674815174572336, "grad_norm": 0.9218541589092455, "learning_rate": 6.109578745983271e-06, "loss": 0.5015, "step": 8464 }, { "epoch": 0.45680211537423776, "grad_norm": 0.9450886038216134, "learning_rate": 6.10882287632817e-06, "loss": 0.4391, "step": 8465 }, { "epoch": 0.45685607900275216, "grad_norm": 0.9693866807681021, "learning_rate": 6.108066989174885e-06, "loss": 0.435, "step": 8466 }, { "epoch": 0.4569100426312665, "grad_norm": 1.1243327528541787, "learning_rate": 6.10731108454514e-06, "loss": 0.4661, "step": 8467 }, { "epoch": 0.4569640062597809, "grad_norm": 1.0630308142947953, "learning_rate": 6.1065551624606604e-06, "loss": 0.445, "step": 8468 }, { "epoch": 0.4570179698882953, "grad_norm": 1.1700574430663637, "learning_rate": 6.105799222943174e-06, "loss": 0.5276, "step": 8469 }, { "epoch": 0.4570719335168097, "grad_norm": 1.0265078683983655, "learning_rate": 6.105043266014403e-06, "loss": 0.4411, "step": 8470 }, { "epoch": 0.45712589714532403, "grad_norm": 1.1374559221522296, "learning_rate": 6.1042872916960806e-06, "loss": 0.5157, "step": 8471 }, { "epoch": 0.45717986077383843, "grad_norm": 0.998708861454792, "learning_rate": 6.1035313000099295e-06, "loss": 0.4489, "step": 8472 }, { "epoch": 0.45723382440235283, "grad_norm": 1.169383222178501, "learning_rate": 6.102775290977677e-06, "loss": 0.4883, "step": 8473 }, { "epoch": 0.45728778803086717, "grad_norm": 1.0020367954374028, "learning_rate": 6.102019264621056e-06, "loss": 0.5212, "step": 8474 }, { "epoch": 0.45734175165938157, "grad_norm": 1.1450746089380885, "learning_rate": 6.101263220961793e-06, "loss": 0.5074, "step": 8475 }, { "epoch": 0.45739571528789597, "grad_norm": 1.0397486987654283, "learning_rate": 6.100507160021616e-06, "loss": 0.4283, "step": 8476 }, { "epoch": 0.45744967891641036, "grad_norm": 1.0796175625232014, "learning_rate": 6.099751081822258e-06, "loss": 0.5379, "step": 8477 }, { "epoch": 0.4575036425449247, "grad_norm": 1.1806903062657337, "learning_rate": 6.098994986385447e-06, "loss": 0.5238, "step": 8478 }, { "epoch": 0.4575576061734391, "grad_norm": 1.1241456515390165, "learning_rate": 6.098238873732916e-06, "loss": 0.4644, "step": 8479 }, { "epoch": 0.4576115698019535, "grad_norm": 1.0745543983247552, "learning_rate": 6.097482743886394e-06, "loss": 0.4618, "step": 8480 }, { "epoch": 0.45766553343046784, "grad_norm": 1.027403151556168, "learning_rate": 6.096726596867615e-06, "loss": 0.4792, "step": 8481 }, { "epoch": 0.45771949705898224, "grad_norm": 1.0969413909195633, "learning_rate": 6.09597043269831e-06, "loss": 0.5457, "step": 8482 }, { "epoch": 0.45777346068749664, "grad_norm": 1.1293282512055978, "learning_rate": 6.0952142514002136e-06, "loss": 0.504, "step": 8483 }, { "epoch": 0.45782742431601103, "grad_norm": 0.9197931789113908, "learning_rate": 6.094458052995056e-06, "loss": 0.3922, "step": 8484 }, { "epoch": 0.4578813879445254, "grad_norm": 1.0631695486416, "learning_rate": 6.093701837504575e-06, "loss": 0.4285, "step": 8485 }, { "epoch": 0.4579353515730398, "grad_norm": 0.8850271693059165, "learning_rate": 6.092945604950504e-06, "loss": 0.3399, "step": 8486 }, { "epoch": 0.45798931520155417, "grad_norm": 1.0660977822856967, "learning_rate": 6.092189355354575e-06, "loss": 0.4323, "step": 8487 }, { "epoch": 0.4580432788300685, "grad_norm": 0.9442403405791846, "learning_rate": 6.091433088738528e-06, "loss": 0.6314, "step": 8488 }, { "epoch": 0.4580972424585829, "grad_norm": 0.8417808395386996, "learning_rate": 6.090676805124094e-06, "loss": 0.3953, "step": 8489 }, { "epoch": 0.4581512060870973, "grad_norm": 1.177309652042817, "learning_rate": 6.089920504533012e-06, "loss": 0.5619, "step": 8490 }, { "epoch": 0.4582051697156117, "grad_norm": 1.2190354503390892, "learning_rate": 6.08916418698702e-06, "loss": 0.4667, "step": 8491 }, { "epoch": 0.45825913334412605, "grad_norm": 1.2528474275779622, "learning_rate": 6.088407852507854e-06, "loss": 0.6317, "step": 8492 }, { "epoch": 0.45831309697264044, "grad_norm": 1.009060114392703, "learning_rate": 6.087651501117252e-06, "loss": 0.4385, "step": 8493 }, { "epoch": 0.45836706060115484, "grad_norm": 0.9796257034831455, "learning_rate": 6.0868951328369505e-06, "loss": 0.4888, "step": 8494 }, { "epoch": 0.4584210242296692, "grad_norm": 0.8566386081332966, "learning_rate": 6.086138747688691e-06, "loss": 0.3991, "step": 8495 }, { "epoch": 0.4584749878581836, "grad_norm": 0.7232924677461603, "learning_rate": 6.085382345694212e-06, "loss": 0.3592, "step": 8496 }, { "epoch": 0.458528951486698, "grad_norm": 1.0287008324676763, "learning_rate": 6.084625926875251e-06, "loss": 0.3856, "step": 8497 }, { "epoch": 0.4585829151152123, "grad_norm": 0.7519772674964819, "learning_rate": 6.083869491253553e-06, "loss": 0.2965, "step": 8498 }, { "epoch": 0.4586368787437267, "grad_norm": 0.8789876564779575, "learning_rate": 6.083113038850854e-06, "loss": 0.389, "step": 8499 }, { "epoch": 0.4586908423722411, "grad_norm": 0.9747420238012463, "learning_rate": 6.082356569688898e-06, "loss": 0.4102, "step": 8500 }, { "epoch": 0.4586908423722411, "eval_loss": 0.5534877777099609, "eval_runtime": 165.1603, "eval_samples_per_second": 20.822, "eval_steps_per_second": 0.872, "step": 8500 }, { "epoch": 0.4587448060007555, "grad_norm": 1.259660922576211, "learning_rate": 6.081600083789424e-06, "loss": 0.5101, "step": 8501 }, { "epoch": 0.45879876962926985, "grad_norm": 0.8878362879094391, "learning_rate": 6.080843581174179e-06, "loss": 0.428, "step": 8502 }, { "epoch": 0.45885273325778425, "grad_norm": 0.9917270658596542, "learning_rate": 6.0800870618649e-06, "loss": 0.4654, "step": 8503 }, { "epoch": 0.45890669688629865, "grad_norm": 1.027263575300223, "learning_rate": 6.079330525883333e-06, "loss": 0.4326, "step": 8504 }, { "epoch": 0.458960660514813, "grad_norm": 1.3970476087036428, "learning_rate": 6.0785739732512216e-06, "loss": 0.4995, "step": 8505 }, { "epoch": 0.4590146241433274, "grad_norm": 0.787434660045765, "learning_rate": 6.07781740399031e-06, "loss": 0.324, "step": 8506 }, { "epoch": 0.4590685877718418, "grad_norm": 0.8530666082730316, "learning_rate": 6.077060818122343e-06, "loss": 0.3912, "step": 8507 }, { "epoch": 0.4591225514003562, "grad_norm": 0.8339493756166022, "learning_rate": 6.076304215669063e-06, "loss": 0.3573, "step": 8508 }, { "epoch": 0.4591765150288705, "grad_norm": 1.2327967212482644, "learning_rate": 6.0755475966522196e-06, "loss": 0.5945, "step": 8509 }, { "epoch": 0.4592304786573849, "grad_norm": 0.9543070845205326, "learning_rate": 6.074790961093555e-06, "loss": 0.4308, "step": 8510 }, { "epoch": 0.4592844422858993, "grad_norm": 1.0061190070855432, "learning_rate": 6.074034309014818e-06, "loss": 0.3884, "step": 8511 }, { "epoch": 0.45933840591441366, "grad_norm": 1.0139016435576889, "learning_rate": 6.073277640437754e-06, "loss": 0.4559, "step": 8512 }, { "epoch": 0.45939236954292806, "grad_norm": 1.1283108096314673, "learning_rate": 6.072520955384112e-06, "loss": 0.4811, "step": 8513 }, { "epoch": 0.45944633317144246, "grad_norm": 1.357177500835346, "learning_rate": 6.0717642538756405e-06, "loss": 0.6323, "step": 8514 }, { "epoch": 0.45950029679995685, "grad_norm": 1.0322337971309237, "learning_rate": 6.071007535934085e-06, "loss": 0.4207, "step": 8515 }, { "epoch": 0.4595542604284712, "grad_norm": 0.9192357711273357, "learning_rate": 6.0702508015811975e-06, "loss": 0.4171, "step": 8516 }, { "epoch": 0.4596082240569856, "grad_norm": 1.1949038511022465, "learning_rate": 6.069494050838725e-06, "loss": 0.55, "step": 8517 }, { "epoch": 0.4596621876855, "grad_norm": 0.9064737040866253, "learning_rate": 6.068737283728418e-06, "loss": 0.4357, "step": 8518 }, { "epoch": 0.45971615131401433, "grad_norm": 1.0812144808727229, "learning_rate": 6.067980500272028e-06, "loss": 0.5141, "step": 8519 }, { "epoch": 0.45977011494252873, "grad_norm": 1.316884031013826, "learning_rate": 6.067223700491303e-06, "loss": 0.5405, "step": 8520 }, { "epoch": 0.4598240785710431, "grad_norm": 1.0552758836854492, "learning_rate": 6.066466884407996e-06, "loss": 0.3664, "step": 8521 }, { "epoch": 0.4598780421995575, "grad_norm": 1.358218242937385, "learning_rate": 6.0657100520438575e-06, "loss": 0.8129, "step": 8522 }, { "epoch": 0.45993200582807187, "grad_norm": 0.9676680314635113, "learning_rate": 6.064953203420643e-06, "loss": 0.3972, "step": 8523 }, { "epoch": 0.45998596945658626, "grad_norm": 0.9761665341879637, "learning_rate": 6.0641963385601e-06, "loss": 0.4191, "step": 8524 }, { "epoch": 0.46003993308510066, "grad_norm": 1.1934176106614867, "learning_rate": 6.063439457483985e-06, "loss": 0.6565, "step": 8525 }, { "epoch": 0.460093896713615, "grad_norm": 0.7876575221085872, "learning_rate": 6.062682560214053e-06, "loss": 0.3919, "step": 8526 }, { "epoch": 0.4601478603421294, "grad_norm": 1.0183255491891021, "learning_rate": 6.061925646772052e-06, "loss": 0.5872, "step": 8527 }, { "epoch": 0.4602018239706438, "grad_norm": 0.999810071179452, "learning_rate": 6.061168717179742e-06, "loss": 0.4011, "step": 8528 }, { "epoch": 0.4602557875991582, "grad_norm": 0.8695218284031195, "learning_rate": 6.060411771458875e-06, "loss": 0.3683, "step": 8529 }, { "epoch": 0.46030975122767254, "grad_norm": 0.8227514524752766, "learning_rate": 6.0596548096312094e-06, "loss": 0.3394, "step": 8530 }, { "epoch": 0.46036371485618693, "grad_norm": 1.0136288952084063, "learning_rate": 6.058897831718497e-06, "loss": 0.5219, "step": 8531 }, { "epoch": 0.46041767848470133, "grad_norm": 0.9860509814102855, "learning_rate": 6.0581408377424975e-06, "loss": 0.466, "step": 8532 }, { "epoch": 0.4604716421132157, "grad_norm": 0.9043191050195626, "learning_rate": 6.057383827724967e-06, "loss": 0.3647, "step": 8533 }, { "epoch": 0.46052560574173007, "grad_norm": 1.1274046832432478, "learning_rate": 6.056626801687659e-06, "loss": 0.5511, "step": 8534 }, { "epoch": 0.46057956937024447, "grad_norm": 0.9500314073784283, "learning_rate": 6.055869759652337e-06, "loss": 0.4194, "step": 8535 }, { "epoch": 0.4606335329987588, "grad_norm": 1.078723657235146, "learning_rate": 6.055112701640755e-06, "loss": 0.4527, "step": 8536 }, { "epoch": 0.4606874966272732, "grad_norm": 1.0509973134061226, "learning_rate": 6.054355627674675e-06, "loss": 0.5997, "step": 8537 }, { "epoch": 0.4607414602557876, "grad_norm": 1.0567412526116664, "learning_rate": 6.0535985377758526e-06, "loss": 0.605, "step": 8538 }, { "epoch": 0.460795423884302, "grad_norm": 1.0501482710398908, "learning_rate": 6.052841431966049e-06, "loss": 0.4637, "step": 8539 }, { "epoch": 0.46084938751281634, "grad_norm": 0.742983539175541, "learning_rate": 6.052084310267024e-06, "loss": 0.28, "step": 8540 }, { "epoch": 0.46090335114133074, "grad_norm": 0.98456093525691, "learning_rate": 6.051327172700538e-06, "loss": 0.553, "step": 8541 }, { "epoch": 0.46095731476984514, "grad_norm": 0.9686361649333369, "learning_rate": 6.050570019288352e-06, "loss": 0.4231, "step": 8542 }, { "epoch": 0.4610112783983595, "grad_norm": 0.9895918654959686, "learning_rate": 6.049812850052228e-06, "loss": 0.367, "step": 8543 }, { "epoch": 0.4610652420268739, "grad_norm": 1.1165659280192266, "learning_rate": 6.049055665013928e-06, "loss": 0.4272, "step": 8544 }, { "epoch": 0.4611192056553883, "grad_norm": 0.9276420419133237, "learning_rate": 6.048298464195211e-06, "loss": 0.423, "step": 8545 }, { "epoch": 0.4611731692839027, "grad_norm": 1.2714418871058233, "learning_rate": 6.047541247617842e-06, "loss": 0.5129, "step": 8546 }, { "epoch": 0.461227132912417, "grad_norm": 1.077623074923214, "learning_rate": 6.046784015303587e-06, "loss": 0.5662, "step": 8547 }, { "epoch": 0.4612810965409314, "grad_norm": 1.029902406998799, "learning_rate": 6.046026767274205e-06, "loss": 0.6473, "step": 8548 }, { "epoch": 0.4613350601694458, "grad_norm": 0.9062009741714702, "learning_rate": 6.045269503551462e-06, "loss": 0.3871, "step": 8549 }, { "epoch": 0.46138902379796015, "grad_norm": 1.0724087982549324, "learning_rate": 6.044512224157124e-06, "loss": 0.5333, "step": 8550 }, { "epoch": 0.46144298742647455, "grad_norm": 0.9772232457464044, "learning_rate": 6.0437549291129535e-06, "loss": 0.4184, "step": 8551 }, { "epoch": 0.46149695105498895, "grad_norm": 1.2521652587631042, "learning_rate": 6.042997618440717e-06, "loss": 0.5943, "step": 8552 }, { "epoch": 0.46155091468350334, "grad_norm": 1.0367157104738083, "learning_rate": 6.04224029216218e-06, "loss": 0.4879, "step": 8553 }, { "epoch": 0.4616048783120177, "grad_norm": 0.9224847184811149, "learning_rate": 6.04148295029911e-06, "loss": 0.3864, "step": 8554 }, { "epoch": 0.4616588419405321, "grad_norm": 1.2778128402413684, "learning_rate": 6.040725592873271e-06, "loss": 0.6752, "step": 8555 }, { "epoch": 0.4617128055690465, "grad_norm": 1.0600632123132452, "learning_rate": 6.0399682199064335e-06, "loss": 0.4502, "step": 8556 }, { "epoch": 0.4617667691975608, "grad_norm": 0.9617921555886252, "learning_rate": 6.039210831420364e-06, "loss": 0.3858, "step": 8557 }, { "epoch": 0.4618207328260752, "grad_norm": 1.0125221631731958, "learning_rate": 6.03845342743683e-06, "loss": 0.4271, "step": 8558 }, { "epoch": 0.4618746964545896, "grad_norm": 0.9749110099984198, "learning_rate": 6.0376960079776e-06, "loss": 0.4663, "step": 8559 }, { "epoch": 0.461928660083104, "grad_norm": 1.0148470171082413, "learning_rate": 6.036938573064444e-06, "loss": 0.3924, "step": 8560 }, { "epoch": 0.46198262371161836, "grad_norm": 1.0651568150976634, "learning_rate": 6.03618112271913e-06, "loss": 0.5021, "step": 8561 }, { "epoch": 0.46203658734013275, "grad_norm": 0.8561499505579699, "learning_rate": 6.03542365696343e-06, "loss": 0.425, "step": 8562 }, { "epoch": 0.46209055096864715, "grad_norm": 1.1554439844180773, "learning_rate": 6.034666175819112e-06, "loss": 0.6158, "step": 8563 }, { "epoch": 0.4621445145971615, "grad_norm": 1.042240371432287, "learning_rate": 6.033908679307948e-06, "loss": 0.4431, "step": 8564 }, { "epoch": 0.4621984782256759, "grad_norm": 0.9429062317031697, "learning_rate": 6.03315116745171e-06, "loss": 0.4238, "step": 8565 }, { "epoch": 0.4622524418541903, "grad_norm": 1.038968178225938, "learning_rate": 6.032393640272168e-06, "loss": 0.5498, "step": 8566 }, { "epoch": 0.46230640548270463, "grad_norm": 0.9996180026665001, "learning_rate": 6.031636097791095e-06, "loss": 0.4259, "step": 8567 }, { "epoch": 0.462360369111219, "grad_norm": 1.4088311085072287, "learning_rate": 6.030878540030263e-06, "loss": 0.5896, "step": 8568 }, { "epoch": 0.4624143327397334, "grad_norm": 1.2196789446862595, "learning_rate": 6.0301209670114455e-06, "loss": 0.57, "step": 8569 }, { "epoch": 0.4624682963682478, "grad_norm": 1.069297016181253, "learning_rate": 6.029363378756416e-06, "loss": 0.4972, "step": 8570 }, { "epoch": 0.46252225999676216, "grad_norm": 0.847628714537642, "learning_rate": 6.028605775286949e-06, "loss": 0.4613, "step": 8571 }, { "epoch": 0.46257622362527656, "grad_norm": 0.9614153166371943, "learning_rate": 6.027848156624817e-06, "loss": 0.4015, "step": 8572 }, { "epoch": 0.46263018725379096, "grad_norm": 1.194854146475304, "learning_rate": 6.027090522791795e-06, "loss": 0.5305, "step": 8573 }, { "epoch": 0.4626841508823053, "grad_norm": 0.9591179197863521, "learning_rate": 6.0263328738096605e-06, "loss": 0.4321, "step": 8574 }, { "epoch": 0.4627381145108197, "grad_norm": 1.1022285839550956, "learning_rate": 6.025575209700185e-06, "loss": 0.4578, "step": 8575 }, { "epoch": 0.4627920781393341, "grad_norm": 1.1976645143557847, "learning_rate": 6.0248175304851485e-06, "loss": 0.4736, "step": 8576 }, { "epoch": 0.4628460417678485, "grad_norm": 0.8064290764371849, "learning_rate": 6.024059836186325e-06, "loss": 0.3826, "step": 8577 }, { "epoch": 0.46290000539636283, "grad_norm": 1.21187762228379, "learning_rate": 6.0233021268254945e-06, "loss": 0.6037, "step": 8578 }, { "epoch": 0.46295396902487723, "grad_norm": 1.000005090807188, "learning_rate": 6.02254440242443e-06, "loss": 0.4842, "step": 8579 }, { "epoch": 0.46300793265339163, "grad_norm": 1.0915912759227546, "learning_rate": 6.021786663004911e-06, "loss": 0.4485, "step": 8580 }, { "epoch": 0.46306189628190597, "grad_norm": 1.1506182195244363, "learning_rate": 6.021028908588718e-06, "loss": 0.4641, "step": 8581 }, { "epoch": 0.46311585991042037, "grad_norm": 1.306168161121709, "learning_rate": 6.0202711391976266e-06, "loss": 0.6068, "step": 8582 }, { "epoch": 0.46316982353893477, "grad_norm": 0.8050353487310665, "learning_rate": 6.019513354853417e-06, "loss": 0.3136, "step": 8583 }, { "epoch": 0.46322378716744916, "grad_norm": 1.1093392832045035, "learning_rate": 6.0187555555778684e-06, "loss": 0.5977, "step": 8584 }, { "epoch": 0.4632777507959635, "grad_norm": 1.0378005311193506, "learning_rate": 6.0179977413927615e-06, "loss": 0.4783, "step": 8585 }, { "epoch": 0.4633317144244779, "grad_norm": 0.8708008323294189, "learning_rate": 6.017239912319876e-06, "loss": 0.3866, "step": 8586 }, { "epoch": 0.4633856780529923, "grad_norm": 0.8506874453996385, "learning_rate": 6.0164820683809925e-06, "loss": 0.4495, "step": 8587 }, { "epoch": 0.46343964168150664, "grad_norm": 1.1436534590196734, "learning_rate": 6.015724209597892e-06, "loss": 0.54, "step": 8588 }, { "epoch": 0.46349360531002104, "grad_norm": 1.0292667667921005, "learning_rate": 6.014966335992357e-06, "loss": 0.3911, "step": 8589 }, { "epoch": 0.46354756893853544, "grad_norm": 1.143158253477684, "learning_rate": 6.01420844758617e-06, "loss": 0.4598, "step": 8590 }, { "epoch": 0.46360153256704983, "grad_norm": 1.1126650464912493, "learning_rate": 6.013450544401112e-06, "loss": 0.4511, "step": 8591 }, { "epoch": 0.4636554961955642, "grad_norm": 0.9293196319675322, "learning_rate": 6.012692626458966e-06, "loss": 0.4007, "step": 8592 }, { "epoch": 0.4637094598240786, "grad_norm": 1.1344476813603983, "learning_rate": 6.011934693781516e-06, "loss": 0.6098, "step": 8593 }, { "epoch": 0.46376342345259297, "grad_norm": 1.0066490869710114, "learning_rate": 6.011176746390546e-06, "loss": 0.5155, "step": 8594 }, { "epoch": 0.4638173870811073, "grad_norm": 1.147548677211478, "learning_rate": 6.010418784307841e-06, "loss": 0.5037, "step": 8595 }, { "epoch": 0.4638713507096217, "grad_norm": 0.9827103486089183, "learning_rate": 6.0096608075551836e-06, "loss": 0.6119, "step": 8596 }, { "epoch": 0.4639253143381361, "grad_norm": 1.0262904440112586, "learning_rate": 6.0089028161543584e-06, "loss": 0.5174, "step": 8597 }, { "epoch": 0.4639792779666505, "grad_norm": 0.8985165450390238, "learning_rate": 6.008144810127152e-06, "loss": 0.4626, "step": 8598 }, { "epoch": 0.46403324159516485, "grad_norm": 0.8878817196548485, "learning_rate": 6.007386789495353e-06, "loss": 0.3629, "step": 8599 }, { "epoch": 0.46408720522367924, "grad_norm": 1.0643114841731385, "learning_rate": 6.006628754280743e-06, "loss": 0.4279, "step": 8600 }, { "epoch": 0.46414116885219364, "grad_norm": 1.1400574849213891, "learning_rate": 6.005870704505111e-06, "loss": 0.503, "step": 8601 }, { "epoch": 0.464195132480708, "grad_norm": 0.9674012203029255, "learning_rate": 6.005112640190245e-06, "loss": 0.3654, "step": 8602 }, { "epoch": 0.4642490961092224, "grad_norm": 0.8927207597936652, "learning_rate": 6.004354561357929e-06, "loss": 0.3698, "step": 8603 }, { "epoch": 0.4643030597377368, "grad_norm": 1.016120966265503, "learning_rate": 6.003596468029955e-06, "loss": 0.4365, "step": 8604 }, { "epoch": 0.4643570233662511, "grad_norm": 0.9689884486453738, "learning_rate": 6.002838360228109e-06, "loss": 0.4496, "step": 8605 }, { "epoch": 0.4644109869947655, "grad_norm": 1.0609104025785, "learning_rate": 6.00208023797418e-06, "loss": 0.5653, "step": 8606 }, { "epoch": 0.4644649506232799, "grad_norm": 1.017684071468246, "learning_rate": 6.001322101289958e-06, "loss": 0.4175, "step": 8607 }, { "epoch": 0.4645189142517943, "grad_norm": 0.9377182470974565, "learning_rate": 6.000563950197232e-06, "loss": 0.4425, "step": 8608 }, { "epoch": 0.46457287788030865, "grad_norm": 1.2566410031905575, "learning_rate": 5.999805784717795e-06, "loss": 0.6786, "step": 8609 }, { "epoch": 0.46462684150882305, "grad_norm": 1.1302888762913454, "learning_rate": 5.9990476048734315e-06, "loss": 0.4598, "step": 8610 }, { "epoch": 0.46468080513733745, "grad_norm": 0.971848437441932, "learning_rate": 5.998289410685936e-06, "loss": 0.4333, "step": 8611 }, { "epoch": 0.4647347687658518, "grad_norm": 0.8410969303393462, "learning_rate": 5.997531202177101e-06, "loss": 0.3987, "step": 8612 }, { "epoch": 0.4647887323943662, "grad_norm": 0.9409245952108177, "learning_rate": 5.996772979368715e-06, "loss": 0.5592, "step": 8613 }, { "epoch": 0.4648426960228806, "grad_norm": 0.9086144598938792, "learning_rate": 5.996014742282572e-06, "loss": 0.4397, "step": 8614 }, { "epoch": 0.464896659651395, "grad_norm": 1.0321295483225879, "learning_rate": 5.9952564909404644e-06, "loss": 0.4302, "step": 8615 }, { "epoch": 0.4649506232799093, "grad_norm": 0.9587372313047655, "learning_rate": 5.994498225364185e-06, "loss": 0.4664, "step": 8616 }, { "epoch": 0.4650045869084237, "grad_norm": 0.9677270038644902, "learning_rate": 5.993739945575525e-06, "loss": 0.487, "step": 8617 }, { "epoch": 0.4650585505369381, "grad_norm": 0.9520085588485381, "learning_rate": 5.992981651596281e-06, "loss": 0.3724, "step": 8618 }, { "epoch": 0.46511251416545246, "grad_norm": 1.1850861090941784, "learning_rate": 5.992223343448248e-06, "loss": 0.5672, "step": 8619 }, { "epoch": 0.46516647779396686, "grad_norm": 0.9174174007326882, "learning_rate": 5.9914650211532175e-06, "loss": 0.4056, "step": 8620 }, { "epoch": 0.46522044142248126, "grad_norm": 0.8492182845552501, "learning_rate": 5.990706684732986e-06, "loss": 0.3057, "step": 8621 }, { "epoch": 0.46527440505099565, "grad_norm": 0.9928685979559576, "learning_rate": 5.989948334209348e-06, "loss": 0.5669, "step": 8622 }, { "epoch": 0.46532836867951, "grad_norm": 0.8756728017447322, "learning_rate": 5.9891899696041e-06, "loss": 0.3668, "step": 8623 }, { "epoch": 0.4653823323080244, "grad_norm": 1.3894544617329678, "learning_rate": 5.988431590939039e-06, "loss": 0.6959, "step": 8624 }, { "epoch": 0.4654362959365388, "grad_norm": 1.1278402905927059, "learning_rate": 5.9876731982359595e-06, "loss": 0.6408, "step": 8625 }, { "epoch": 0.46549025956505313, "grad_norm": 0.9755040935430775, "learning_rate": 5.986914791516661e-06, "loss": 0.4474, "step": 8626 }, { "epoch": 0.46554422319356753, "grad_norm": 1.0804591375034982, "learning_rate": 5.986156370802937e-06, "loss": 0.6488, "step": 8627 }, { "epoch": 0.4655981868220819, "grad_norm": 0.7571997016695278, "learning_rate": 5.9853979361165895e-06, "loss": 0.4047, "step": 8628 }, { "epoch": 0.4656521504505963, "grad_norm": 0.959641176439372, "learning_rate": 5.984639487479413e-06, "loss": 0.4029, "step": 8629 }, { "epoch": 0.46570611407911067, "grad_norm": 0.8377408388289379, "learning_rate": 5.983881024913209e-06, "loss": 0.4099, "step": 8630 }, { "epoch": 0.46576007770762506, "grad_norm": 1.1169547614008717, "learning_rate": 5.983122548439776e-06, "loss": 0.5216, "step": 8631 }, { "epoch": 0.46581404133613946, "grad_norm": 1.1271155065417413, "learning_rate": 5.982364058080913e-06, "loss": 0.4254, "step": 8632 }, { "epoch": 0.4658680049646538, "grad_norm": 1.045926416685868, "learning_rate": 5.9816055538584185e-06, "loss": 0.4406, "step": 8633 }, { "epoch": 0.4659219685931682, "grad_norm": 1.0132631284220903, "learning_rate": 5.980847035794095e-06, "loss": 0.4225, "step": 8634 }, { "epoch": 0.4659759322216826, "grad_norm": 1.3212278499133725, "learning_rate": 5.98008850390974e-06, "loss": 0.6129, "step": 8635 }, { "epoch": 0.46602989585019694, "grad_norm": 0.9909720703523236, "learning_rate": 5.979329958227158e-06, "loss": 0.6022, "step": 8636 }, { "epoch": 0.46608385947871134, "grad_norm": 1.013249023681921, "learning_rate": 5.978571398768148e-06, "loss": 0.5471, "step": 8637 }, { "epoch": 0.46613782310722573, "grad_norm": 0.9600659820943548, "learning_rate": 5.977812825554512e-06, "loss": 0.4279, "step": 8638 }, { "epoch": 0.46619178673574013, "grad_norm": 1.0294527471016084, "learning_rate": 5.977054238608053e-06, "loss": 0.4615, "step": 8639 }, { "epoch": 0.4662457503642545, "grad_norm": 0.9970590549086533, "learning_rate": 5.976295637950573e-06, "loss": 0.4488, "step": 8640 }, { "epoch": 0.46629971399276887, "grad_norm": 1.1568941941632573, "learning_rate": 5.975537023603874e-06, "loss": 0.4906, "step": 8641 }, { "epoch": 0.46635367762128327, "grad_norm": 1.1536367244848806, "learning_rate": 5.974778395589762e-06, "loss": 0.5514, "step": 8642 }, { "epoch": 0.4664076412497976, "grad_norm": 1.106082519604652, "learning_rate": 5.9740197539300395e-06, "loss": 0.4758, "step": 8643 }, { "epoch": 0.466461604878312, "grad_norm": 0.9092684144906557, "learning_rate": 5.973261098646509e-06, "loss": 0.3812, "step": 8644 }, { "epoch": 0.4665155685068264, "grad_norm": 1.1398591100678763, "learning_rate": 5.972502429760977e-06, "loss": 0.6398, "step": 8645 }, { "epoch": 0.4665695321353408, "grad_norm": 0.91177618828031, "learning_rate": 5.971743747295248e-06, "loss": 0.4378, "step": 8646 }, { "epoch": 0.46662349576385515, "grad_norm": 0.9769211124444062, "learning_rate": 5.970985051271126e-06, "loss": 0.4691, "step": 8647 }, { "epoch": 0.46667745939236954, "grad_norm": 0.8792803592308595, "learning_rate": 5.970226341710419e-06, "loss": 0.4188, "step": 8648 }, { "epoch": 0.46673142302088394, "grad_norm": 1.2153514875671256, "learning_rate": 5.969467618634932e-06, "loss": 0.671, "step": 8649 }, { "epoch": 0.4667853866493983, "grad_norm": 1.134562666337781, "learning_rate": 5.968708882066471e-06, "loss": 0.4859, "step": 8650 }, { "epoch": 0.4668393502779127, "grad_norm": 1.0308161043834947, "learning_rate": 5.967950132026842e-06, "loss": 0.5612, "step": 8651 }, { "epoch": 0.4668933139064271, "grad_norm": 1.2681787615113551, "learning_rate": 5.967191368537854e-06, "loss": 0.5963, "step": 8652 }, { "epoch": 0.4669472775349415, "grad_norm": 0.9695833567676125, "learning_rate": 5.966432591621314e-06, "loss": 0.5549, "step": 8653 }, { "epoch": 0.4670012411634558, "grad_norm": 0.9944462198205192, "learning_rate": 5.965673801299031e-06, "loss": 0.4263, "step": 8654 }, { "epoch": 0.4670552047919702, "grad_norm": 1.211725473564839, "learning_rate": 5.964914997592811e-06, "loss": 0.8016, "step": 8655 }, { "epoch": 0.4671091684204846, "grad_norm": 1.1130634568421225, "learning_rate": 5.964156180524465e-06, "loss": 0.4792, "step": 8656 }, { "epoch": 0.46716313204899895, "grad_norm": 0.9272633091932245, "learning_rate": 5.9633973501158015e-06, "loss": 0.3858, "step": 8657 }, { "epoch": 0.46721709567751335, "grad_norm": 1.111980051623778, "learning_rate": 5.962638506388631e-06, "loss": 0.5334, "step": 8658 }, { "epoch": 0.46727105930602775, "grad_norm": 0.9959655793165458, "learning_rate": 5.96187964936476e-06, "loss": 0.3664, "step": 8659 }, { "epoch": 0.46732502293454214, "grad_norm": 0.9450880319227349, "learning_rate": 5.961120779066002e-06, "loss": 0.4359, "step": 8660 }, { "epoch": 0.4673789865630565, "grad_norm": 0.7740723716997817, "learning_rate": 5.960361895514168e-06, "loss": 0.3293, "step": 8661 }, { "epoch": 0.4674329501915709, "grad_norm": 1.1629766557514967, "learning_rate": 5.959602998731066e-06, "loss": 0.3929, "step": 8662 }, { "epoch": 0.4674869138200853, "grad_norm": 1.0484223106265693, "learning_rate": 5.9588440887385105e-06, "loss": 0.4318, "step": 8663 }, { "epoch": 0.4675408774485996, "grad_norm": 1.070297364540619, "learning_rate": 5.958085165558312e-06, "loss": 0.6086, "step": 8664 }, { "epoch": 0.467594841077114, "grad_norm": 1.0386263023486975, "learning_rate": 5.957326229212283e-06, "loss": 0.5537, "step": 8665 }, { "epoch": 0.4676488047056284, "grad_norm": 0.9901979181556742, "learning_rate": 5.9565672797222365e-06, "loss": 0.3861, "step": 8666 }, { "epoch": 0.4677027683341428, "grad_norm": 0.9169226527247082, "learning_rate": 5.955808317109985e-06, "loss": 0.3829, "step": 8667 }, { "epoch": 0.46775673196265716, "grad_norm": 0.9014195737659311, "learning_rate": 5.9550493413973405e-06, "loss": 0.4217, "step": 8668 }, { "epoch": 0.46781069559117155, "grad_norm": 1.0898136639518043, "learning_rate": 5.9542903526061195e-06, "loss": 0.5368, "step": 8669 }, { "epoch": 0.46786465921968595, "grad_norm": 1.08831008164724, "learning_rate": 5.9535313507581346e-06, "loss": 0.4792, "step": 8670 }, { "epoch": 0.4679186228482003, "grad_norm": 1.0856731585625872, "learning_rate": 5.952772335875201e-06, "loss": 0.4616, "step": 8671 }, { "epoch": 0.4679725864767147, "grad_norm": 1.0994229227965029, "learning_rate": 5.952013307979132e-06, "loss": 0.4915, "step": 8672 }, { "epoch": 0.4680265501052291, "grad_norm": 1.2464641631293885, "learning_rate": 5.951254267091744e-06, "loss": 0.5125, "step": 8673 }, { "epoch": 0.46808051373374343, "grad_norm": 0.9589187534829755, "learning_rate": 5.950495213234853e-06, "loss": 0.4439, "step": 8674 }, { "epoch": 0.46813447736225783, "grad_norm": 1.052962174195603, "learning_rate": 5.949736146430274e-06, "loss": 0.419, "step": 8675 }, { "epoch": 0.4681884409907722, "grad_norm": 0.9779969625861619, "learning_rate": 5.9489770666998234e-06, "loss": 0.452, "step": 8676 }, { "epoch": 0.4682424046192866, "grad_norm": 1.075875119603226, "learning_rate": 5.9482179740653175e-06, "loss": 0.5157, "step": 8677 }, { "epoch": 0.46829636824780096, "grad_norm": 1.0908216055182927, "learning_rate": 5.947458868548577e-06, "loss": 0.5152, "step": 8678 }, { "epoch": 0.46835033187631536, "grad_norm": 1.1153021726561794, "learning_rate": 5.946699750171414e-06, "loss": 0.5496, "step": 8679 }, { "epoch": 0.46840429550482976, "grad_norm": 1.056936103234692, "learning_rate": 5.945940618955649e-06, "loss": 0.5754, "step": 8680 }, { "epoch": 0.4684582591333441, "grad_norm": 1.0353464689814496, "learning_rate": 5.9451814749231005e-06, "loss": 0.5063, "step": 8681 }, { "epoch": 0.4685122227618585, "grad_norm": 1.1607496901134593, "learning_rate": 5.944422318095588e-06, "loss": 0.5439, "step": 8682 }, { "epoch": 0.4685661863903729, "grad_norm": 0.9532401056867751, "learning_rate": 5.943663148494926e-06, "loss": 0.4003, "step": 8683 }, { "epoch": 0.4686201500188873, "grad_norm": 1.0164448079897388, "learning_rate": 5.942903966142939e-06, "loss": 0.4375, "step": 8684 }, { "epoch": 0.46867411364740164, "grad_norm": 1.0201298576660234, "learning_rate": 5.942144771061445e-06, "loss": 0.4595, "step": 8685 }, { "epoch": 0.46872807727591603, "grad_norm": 1.10065015961312, "learning_rate": 5.941385563272262e-06, "loss": 0.3963, "step": 8686 }, { "epoch": 0.46878204090443043, "grad_norm": 1.1932535709773886, "learning_rate": 5.9406263427972126e-06, "loss": 0.6041, "step": 8687 }, { "epoch": 0.4688360045329448, "grad_norm": 0.8615032633122801, "learning_rate": 5.939867109658117e-06, "loss": 0.3318, "step": 8688 }, { "epoch": 0.46888996816145917, "grad_norm": 1.0298053379530727, "learning_rate": 5.939107863876795e-06, "loss": 0.4638, "step": 8689 }, { "epoch": 0.46894393178997357, "grad_norm": 0.9034787322488955, "learning_rate": 5.938348605475071e-06, "loss": 0.3577, "step": 8690 }, { "epoch": 0.46899789541848796, "grad_norm": 1.0053786197675298, "learning_rate": 5.937589334474763e-06, "loss": 0.4011, "step": 8691 }, { "epoch": 0.4690518590470023, "grad_norm": 1.2246981481002477, "learning_rate": 5.936830050897697e-06, "loss": 0.545, "step": 8692 }, { "epoch": 0.4691058226755167, "grad_norm": 1.0296028200638963, "learning_rate": 5.936070754765693e-06, "loss": 0.4039, "step": 8693 }, { "epoch": 0.4691597863040311, "grad_norm": 0.8887519278600245, "learning_rate": 5.935311446100574e-06, "loss": 0.3496, "step": 8694 }, { "epoch": 0.46921374993254544, "grad_norm": 0.9470570573745989, "learning_rate": 5.9345521249241674e-06, "loss": 0.4731, "step": 8695 }, { "epoch": 0.46926771356105984, "grad_norm": 0.8639015663189205, "learning_rate": 5.933792791258293e-06, "loss": 0.3973, "step": 8696 }, { "epoch": 0.46932167718957424, "grad_norm": 1.0628684888056679, "learning_rate": 5.933033445124773e-06, "loss": 0.5114, "step": 8697 }, { "epoch": 0.46937564081808864, "grad_norm": 1.0055262234465847, "learning_rate": 5.932274086545435e-06, "loss": 0.4691, "step": 8698 }, { "epoch": 0.469429604446603, "grad_norm": 0.8454228379320959, "learning_rate": 5.9315147155421045e-06, "loss": 0.318, "step": 8699 }, { "epoch": 0.4694835680751174, "grad_norm": 0.9329160293069266, "learning_rate": 5.930755332136604e-06, "loss": 0.456, "step": 8700 }, { "epoch": 0.46953753170363177, "grad_norm": 0.9703792854480903, "learning_rate": 5.929995936350761e-06, "loss": 0.4318, "step": 8701 }, { "epoch": 0.4695914953321461, "grad_norm": 0.911391456820878, "learning_rate": 5.929236528206399e-06, "loss": 0.4449, "step": 8702 }, { "epoch": 0.4696454589606605, "grad_norm": 1.2299975917880814, "learning_rate": 5.928477107725347e-06, "loss": 0.6134, "step": 8703 }, { "epoch": 0.4696994225891749, "grad_norm": 0.8713363324081187, "learning_rate": 5.927717674929428e-06, "loss": 0.3967, "step": 8704 }, { "epoch": 0.46975338621768925, "grad_norm": 1.0527699428703525, "learning_rate": 5.926958229840473e-06, "loss": 0.3955, "step": 8705 }, { "epoch": 0.46980734984620365, "grad_norm": 0.956870204832798, "learning_rate": 5.926198772480305e-06, "loss": 0.3996, "step": 8706 }, { "epoch": 0.46986131347471805, "grad_norm": 0.9865029857600905, "learning_rate": 5.925439302870755e-06, "loss": 0.4696, "step": 8707 }, { "epoch": 0.46991527710323244, "grad_norm": 1.102505368236341, "learning_rate": 5.924679821033649e-06, "loss": 0.7191, "step": 8708 }, { "epoch": 0.4699692407317468, "grad_norm": 0.8619072770788635, "learning_rate": 5.923920326990816e-06, "loss": 0.3307, "step": 8709 }, { "epoch": 0.4700232043602612, "grad_norm": 1.031311878318018, "learning_rate": 5.923160820764083e-06, "loss": 0.46, "step": 8710 }, { "epoch": 0.4700771679887756, "grad_norm": 1.033092906980219, "learning_rate": 5.922401302375282e-06, "loss": 0.5759, "step": 8711 }, { "epoch": 0.4701311316172899, "grad_norm": 0.9717009675402507, "learning_rate": 5.921641771846241e-06, "loss": 0.5654, "step": 8712 }, { "epoch": 0.4701850952458043, "grad_norm": 1.0111446128673538, "learning_rate": 5.92088222919879e-06, "loss": 0.4698, "step": 8713 }, { "epoch": 0.4702390588743187, "grad_norm": 1.0409147179508291, "learning_rate": 5.920122674454757e-06, "loss": 0.4988, "step": 8714 }, { "epoch": 0.4702930225028331, "grad_norm": 0.8807493532515743, "learning_rate": 5.919363107635975e-06, "loss": 0.4112, "step": 8715 }, { "epoch": 0.47034698613134746, "grad_norm": 0.9305971199727108, "learning_rate": 5.9186035287642715e-06, "loss": 0.4356, "step": 8716 }, { "epoch": 0.47040094975986185, "grad_norm": 1.14520039606566, "learning_rate": 5.9178439378614816e-06, "loss": 0.3361, "step": 8717 }, { "epoch": 0.47045491338837625, "grad_norm": 1.160185690780033, "learning_rate": 5.917084334949435e-06, "loss": 0.4936, "step": 8718 }, { "epoch": 0.4705088770168906, "grad_norm": 0.9328728690203343, "learning_rate": 5.916324720049961e-06, "loss": 0.4566, "step": 8719 }, { "epoch": 0.470562840645405, "grad_norm": 1.1490939620644454, "learning_rate": 5.9155650931848955e-06, "loss": 0.5615, "step": 8720 }, { "epoch": 0.4706168042739194, "grad_norm": 1.0537279684521474, "learning_rate": 5.914805454376069e-06, "loss": 0.4399, "step": 8721 }, { "epoch": 0.4706707679024338, "grad_norm": 1.0606354504826565, "learning_rate": 5.9140458036453144e-06, "loss": 0.5231, "step": 8722 }, { "epoch": 0.4707247315309481, "grad_norm": 1.182499202142192, "learning_rate": 5.913286141014465e-06, "loss": 0.7202, "step": 8723 }, { "epoch": 0.4707786951594625, "grad_norm": 1.0442918064632247, "learning_rate": 5.912526466505355e-06, "loss": 0.5068, "step": 8724 }, { "epoch": 0.4708326587879769, "grad_norm": 1.0746182252657717, "learning_rate": 5.911766780139817e-06, "loss": 0.4413, "step": 8725 }, { "epoch": 0.47088662241649126, "grad_norm": 1.2134429067828767, "learning_rate": 5.911007081939686e-06, "loss": 0.6278, "step": 8726 }, { "epoch": 0.47094058604500566, "grad_norm": 1.0318070352898485, "learning_rate": 5.910247371926795e-06, "loss": 0.4455, "step": 8727 }, { "epoch": 0.47099454967352006, "grad_norm": 1.1686103154068637, "learning_rate": 5.909487650122981e-06, "loss": 0.4437, "step": 8728 }, { "epoch": 0.47104851330203446, "grad_norm": 0.9316429964173001, "learning_rate": 5.9087279165500775e-06, "loss": 0.3522, "step": 8729 }, { "epoch": 0.4711024769305488, "grad_norm": 1.1070740778675323, "learning_rate": 5.907968171229923e-06, "loss": 0.6205, "step": 8730 }, { "epoch": 0.4711564405590632, "grad_norm": 0.8607908448060101, "learning_rate": 5.907208414184349e-06, "loss": 0.3616, "step": 8731 }, { "epoch": 0.4712104041875776, "grad_norm": 0.6657758524236569, "learning_rate": 5.906448645435195e-06, "loss": 0.3562, "step": 8732 }, { "epoch": 0.47126436781609193, "grad_norm": 1.0550156162622841, "learning_rate": 5.905688865004295e-06, "loss": 0.5256, "step": 8733 }, { "epoch": 0.47131833144460633, "grad_norm": 1.0044533736145986, "learning_rate": 5.9049290729134865e-06, "loss": 0.4409, "step": 8734 }, { "epoch": 0.47137229507312073, "grad_norm": 1.0346477928542372, "learning_rate": 5.904169269184609e-06, "loss": 0.3886, "step": 8735 }, { "epoch": 0.47142625870163507, "grad_norm": 1.2445807670001123, "learning_rate": 5.903409453839498e-06, "loss": 0.6073, "step": 8736 }, { "epoch": 0.47148022233014947, "grad_norm": 0.9606007460620334, "learning_rate": 5.902649626899991e-06, "loss": 0.4071, "step": 8737 }, { "epoch": 0.47153418595866387, "grad_norm": 1.0416415081183634, "learning_rate": 5.901889788387927e-06, "loss": 0.59, "step": 8738 }, { "epoch": 0.47158814958717826, "grad_norm": 1.090679098085388, "learning_rate": 5.901129938325143e-06, "loss": 0.6608, "step": 8739 }, { "epoch": 0.4716421132156926, "grad_norm": 0.9906550294828262, "learning_rate": 5.900370076733483e-06, "loss": 0.4756, "step": 8740 }, { "epoch": 0.471696076844207, "grad_norm": 0.8664855916541196, "learning_rate": 5.89961020363478e-06, "loss": 0.3704, "step": 8741 }, { "epoch": 0.4717500404727214, "grad_norm": 1.0384096293963592, "learning_rate": 5.898850319050875e-06, "loss": 0.4755, "step": 8742 }, { "epoch": 0.47180400410123574, "grad_norm": 0.8807543215386913, "learning_rate": 5.898090423003609e-06, "loss": 0.4151, "step": 8743 }, { "epoch": 0.47185796772975014, "grad_norm": 1.026807749795398, "learning_rate": 5.897330515514823e-06, "loss": 0.4386, "step": 8744 }, { "epoch": 0.47191193135826454, "grad_norm": 1.045354845483807, "learning_rate": 5.896570596606357e-06, "loss": 0.5102, "step": 8745 }, { "epoch": 0.47196589498677893, "grad_norm": 1.1406799114762336, "learning_rate": 5.895810666300051e-06, "loss": 0.43, "step": 8746 }, { "epoch": 0.4720198586152933, "grad_norm": 0.9189202221858719, "learning_rate": 5.895050724617747e-06, "loss": 0.4319, "step": 8747 }, { "epoch": 0.4720738222438077, "grad_norm": 0.9277152913673969, "learning_rate": 5.894290771581285e-06, "loss": 0.4071, "step": 8748 }, { "epoch": 0.47212778587232207, "grad_norm": 0.8011755125156785, "learning_rate": 5.893530807212508e-06, "loss": 0.3745, "step": 8749 }, { "epoch": 0.4721817495008364, "grad_norm": 1.1348237593190482, "learning_rate": 5.892770831533258e-06, "loss": 0.5047, "step": 8750 }, { "epoch": 0.4722357131293508, "grad_norm": 0.8834888466402445, "learning_rate": 5.892010844565379e-06, "loss": 0.4554, "step": 8751 }, { "epoch": 0.4722896767578652, "grad_norm": 1.1351987211800108, "learning_rate": 5.891250846330709e-06, "loss": 0.4732, "step": 8752 }, { "epoch": 0.4723436403863796, "grad_norm": 0.9316793659205794, "learning_rate": 5.890490836851096e-06, "loss": 0.3597, "step": 8753 }, { "epoch": 0.47239760401489395, "grad_norm": 1.049573675622062, "learning_rate": 5.889730816148382e-06, "loss": 0.3616, "step": 8754 }, { "epoch": 0.47245156764340834, "grad_norm": 1.0981922241342856, "learning_rate": 5.888970784244411e-06, "loss": 0.4813, "step": 8755 }, { "epoch": 0.47250553127192274, "grad_norm": 0.9289055139216408, "learning_rate": 5.888210741161027e-06, "loss": 0.4903, "step": 8756 }, { "epoch": 0.4725594949004371, "grad_norm": 0.9748833317215747, "learning_rate": 5.887450686920074e-06, "loss": 0.4475, "step": 8757 }, { "epoch": 0.4726134585289515, "grad_norm": 1.1077311348093375, "learning_rate": 5.886690621543395e-06, "loss": 0.4738, "step": 8758 }, { "epoch": 0.4726674221574659, "grad_norm": 1.1282389816139495, "learning_rate": 5.885930545052838e-06, "loss": 0.5652, "step": 8759 }, { "epoch": 0.4727213857859803, "grad_norm": 0.9665546034335349, "learning_rate": 5.885170457470246e-06, "loss": 0.5715, "step": 8760 }, { "epoch": 0.4727753494144946, "grad_norm": 1.1470002795291008, "learning_rate": 5.884410358817467e-06, "loss": 0.5121, "step": 8761 }, { "epoch": 0.472829313043009, "grad_norm": 1.0676241257440153, "learning_rate": 5.883650249116346e-06, "loss": 0.5323, "step": 8762 }, { "epoch": 0.4728832766715234, "grad_norm": 1.1827647929069065, "learning_rate": 5.882890128388729e-06, "loss": 0.5387, "step": 8763 }, { "epoch": 0.47293724030003775, "grad_norm": 1.1355278077546374, "learning_rate": 5.882129996656464e-06, "loss": 0.4728, "step": 8764 }, { "epoch": 0.47299120392855215, "grad_norm": 1.1107131982025897, "learning_rate": 5.881369853941394e-06, "loss": 0.5407, "step": 8765 }, { "epoch": 0.47304516755706655, "grad_norm": 1.086083125628493, "learning_rate": 5.8806097002653705e-06, "loss": 0.4038, "step": 8766 }, { "epoch": 0.47309913118558095, "grad_norm": 1.2543490257033674, "learning_rate": 5.879849535650239e-06, "loss": 0.7451, "step": 8767 }, { "epoch": 0.4731530948140953, "grad_norm": 1.0975104520520291, "learning_rate": 5.879089360117849e-06, "loss": 0.5605, "step": 8768 }, { "epoch": 0.4732070584426097, "grad_norm": 1.1561746000599853, "learning_rate": 5.878329173690047e-06, "loss": 0.5764, "step": 8769 }, { "epoch": 0.4732610220711241, "grad_norm": 1.0525536988902107, "learning_rate": 5.877568976388683e-06, "loss": 0.565, "step": 8770 }, { "epoch": 0.4733149856996384, "grad_norm": 0.8676917791022474, "learning_rate": 5.8768087682356054e-06, "loss": 0.3781, "step": 8771 }, { "epoch": 0.4733689493281528, "grad_norm": 1.0211882753078003, "learning_rate": 5.876048549252662e-06, "loss": 0.5628, "step": 8772 }, { "epoch": 0.4734229129566672, "grad_norm": 0.9702473802518192, "learning_rate": 5.875288319461704e-06, "loss": 0.3836, "step": 8773 }, { "epoch": 0.47347687658518156, "grad_norm": 1.045512879272203, "learning_rate": 5.87452807888458e-06, "loss": 0.613, "step": 8774 }, { "epoch": 0.47353084021369596, "grad_norm": 0.8589056271062326, "learning_rate": 5.873767827543142e-06, "loss": 0.3663, "step": 8775 }, { "epoch": 0.47358480384221036, "grad_norm": 1.038677143323233, "learning_rate": 5.873007565459238e-06, "loss": 0.4317, "step": 8776 }, { "epoch": 0.47363876747072475, "grad_norm": 0.9065100783460267, "learning_rate": 5.872247292654718e-06, "loss": 0.4689, "step": 8777 }, { "epoch": 0.4736927310992391, "grad_norm": 0.9402962827237474, "learning_rate": 5.871487009151438e-06, "loss": 0.365, "step": 8778 }, { "epoch": 0.4737466947277535, "grad_norm": 1.1068375045079477, "learning_rate": 5.870726714971243e-06, "loss": 0.5468, "step": 8779 }, { "epoch": 0.4738006583562679, "grad_norm": 1.1018901936084629, "learning_rate": 5.869966410135989e-06, "loss": 0.4568, "step": 8780 }, { "epoch": 0.47385462198478223, "grad_norm": 1.0685733288575598, "learning_rate": 5.869206094667526e-06, "loss": 0.3838, "step": 8781 }, { "epoch": 0.47390858561329663, "grad_norm": 0.7847559028156487, "learning_rate": 5.868445768587708e-06, "loss": 0.3136, "step": 8782 }, { "epoch": 0.473962549241811, "grad_norm": 0.8654011210424204, "learning_rate": 5.867685431918385e-06, "loss": 0.4267, "step": 8783 }, { "epoch": 0.4740165128703254, "grad_norm": 0.9764441472458033, "learning_rate": 5.866925084681412e-06, "loss": 0.4245, "step": 8784 }, { "epoch": 0.47407047649883977, "grad_norm": 1.3181882997220347, "learning_rate": 5.866164726898642e-06, "loss": 0.707, "step": 8785 }, { "epoch": 0.47412444012735416, "grad_norm": 1.0049553244972391, "learning_rate": 5.865404358591927e-06, "loss": 0.4488, "step": 8786 }, { "epoch": 0.47417840375586856, "grad_norm": 0.9071038064302532, "learning_rate": 5.86464397978312e-06, "loss": 0.4201, "step": 8787 }, { "epoch": 0.4742323673843829, "grad_norm": 0.9515277722318186, "learning_rate": 5.863883590494078e-06, "loss": 0.5063, "step": 8788 }, { "epoch": 0.4742863310128973, "grad_norm": 0.9897699471133655, "learning_rate": 5.863123190746655e-06, "loss": 0.4586, "step": 8789 }, { "epoch": 0.4743402946414117, "grad_norm": 0.9092217277370972, "learning_rate": 5.862362780562703e-06, "loss": 0.4236, "step": 8790 }, { "epoch": 0.4743942582699261, "grad_norm": 0.6363771073935938, "learning_rate": 5.861602359964078e-06, "loss": 0.2239, "step": 8791 }, { "epoch": 0.47444822189844044, "grad_norm": 1.199072511816314, "learning_rate": 5.8608419289726356e-06, "loss": 0.6411, "step": 8792 }, { "epoch": 0.47450218552695483, "grad_norm": 0.9913110526765556, "learning_rate": 5.860081487610234e-06, "loss": 0.4923, "step": 8793 }, { "epoch": 0.47455614915546923, "grad_norm": 1.114043437313138, "learning_rate": 5.859321035898725e-06, "loss": 0.4765, "step": 8794 }, { "epoch": 0.4746101127839836, "grad_norm": 1.1520958352645954, "learning_rate": 5.858560573859967e-06, "loss": 0.5756, "step": 8795 }, { "epoch": 0.47466407641249797, "grad_norm": 1.027757942422187, "learning_rate": 5.857800101515815e-06, "loss": 0.4738, "step": 8796 }, { "epoch": 0.47471804004101237, "grad_norm": 0.9624334918723162, "learning_rate": 5.857039618888126e-06, "loss": 0.3564, "step": 8797 }, { "epoch": 0.47477200366952677, "grad_norm": 0.7870307953188028, "learning_rate": 5.856279125998759e-06, "loss": 0.3258, "step": 8798 }, { "epoch": 0.4748259672980411, "grad_norm": 0.8278499175913212, "learning_rate": 5.85551862286957e-06, "loss": 0.4551, "step": 8799 }, { "epoch": 0.4748799309265555, "grad_norm": 1.0707484917174876, "learning_rate": 5.854758109522416e-06, "loss": 0.4567, "step": 8800 }, { "epoch": 0.4749338945550699, "grad_norm": 1.3592703504371253, "learning_rate": 5.853997585979155e-06, "loss": 0.5649, "step": 8801 }, { "epoch": 0.47498785818358424, "grad_norm": 1.1069057732051715, "learning_rate": 5.853237052261648e-06, "loss": 0.6157, "step": 8802 }, { "epoch": 0.47504182181209864, "grad_norm": 0.8948879096422329, "learning_rate": 5.852476508391748e-06, "loss": 0.4407, "step": 8803 }, { "epoch": 0.47509578544061304, "grad_norm": 1.2106875010054567, "learning_rate": 5.8517159543913185e-06, "loss": 0.5007, "step": 8804 }, { "epoch": 0.4751497490691274, "grad_norm": 0.8598634972787603, "learning_rate": 5.850955390282218e-06, "loss": 0.3567, "step": 8805 }, { "epoch": 0.4752037126976418, "grad_norm": 0.89823265893177, "learning_rate": 5.8501948160863035e-06, "loss": 0.3608, "step": 8806 }, { "epoch": 0.4752576763261562, "grad_norm": 0.9711783303233786, "learning_rate": 5.8494342318254375e-06, "loss": 0.3961, "step": 8807 }, { "epoch": 0.4753116399546706, "grad_norm": 0.9930348829191323, "learning_rate": 5.848673637521477e-06, "loss": 0.482, "step": 8808 }, { "epoch": 0.4753656035831849, "grad_norm": 0.9192731867855013, "learning_rate": 5.847913033196285e-06, "loss": 0.3906, "step": 8809 }, { "epoch": 0.4754195672116993, "grad_norm": 0.948206606991972, "learning_rate": 5.84715241887172e-06, "loss": 0.4848, "step": 8810 }, { "epoch": 0.4754735308402137, "grad_norm": 1.0680382514799773, "learning_rate": 5.846391794569643e-06, "loss": 0.5713, "step": 8811 }, { "epoch": 0.47552749446872805, "grad_norm": 1.1390566520511052, "learning_rate": 5.845631160311919e-06, "loss": 0.5061, "step": 8812 }, { "epoch": 0.47558145809724245, "grad_norm": 0.7545615613501151, "learning_rate": 5.844870516120405e-06, "loss": 0.3131, "step": 8813 }, { "epoch": 0.47563542172575685, "grad_norm": 0.843464459593225, "learning_rate": 5.844109862016962e-06, "loss": 0.3776, "step": 8814 }, { "epoch": 0.47568938535427124, "grad_norm": 1.0688402726424056, "learning_rate": 5.843349198023455e-06, "loss": 0.626, "step": 8815 }, { "epoch": 0.4757433489827856, "grad_norm": 0.9552895495145576, "learning_rate": 5.842588524161745e-06, "loss": 0.4727, "step": 8816 }, { "epoch": 0.4757973126113, "grad_norm": 0.9293578122797493, "learning_rate": 5.841827840453695e-06, "loss": 0.394, "step": 8817 }, { "epoch": 0.4758512762398144, "grad_norm": 1.0782008322830479, "learning_rate": 5.841067146921167e-06, "loss": 0.4722, "step": 8818 }, { "epoch": 0.4759052398683287, "grad_norm": 0.9676852045502109, "learning_rate": 5.840306443586024e-06, "loss": 0.527, "step": 8819 }, { "epoch": 0.4759592034968431, "grad_norm": 0.9034596292845017, "learning_rate": 5.83954573047013e-06, "loss": 0.3565, "step": 8820 }, { "epoch": 0.4760131671253575, "grad_norm": 1.147294163414714, "learning_rate": 5.83878500759535e-06, "loss": 0.4802, "step": 8821 }, { "epoch": 0.4760671307538719, "grad_norm": 1.0050032962143893, "learning_rate": 5.838024274983545e-06, "loss": 0.4135, "step": 8822 }, { "epoch": 0.47612109438238626, "grad_norm": 1.0925049397520792, "learning_rate": 5.837263532656583e-06, "loss": 0.6075, "step": 8823 }, { "epoch": 0.47617505801090065, "grad_norm": 0.9550104830098863, "learning_rate": 5.836502780636325e-06, "loss": 0.4455, "step": 8824 }, { "epoch": 0.47622902163941505, "grad_norm": 1.171530761690344, "learning_rate": 5.835742018944637e-06, "loss": 0.4156, "step": 8825 }, { "epoch": 0.4762829852679294, "grad_norm": 0.8162749723093464, "learning_rate": 5.834981247603384e-06, "loss": 0.3656, "step": 8826 }, { "epoch": 0.4763369488964438, "grad_norm": 0.9758331264386221, "learning_rate": 5.834220466634431e-06, "loss": 0.429, "step": 8827 }, { "epoch": 0.4763909125249582, "grad_norm": 1.0787803110694003, "learning_rate": 5.8334596760596455e-06, "loss": 0.4664, "step": 8828 }, { "epoch": 0.4764448761534726, "grad_norm": 0.9959427480544031, "learning_rate": 5.832698875900891e-06, "loss": 0.4272, "step": 8829 }, { "epoch": 0.4764988397819869, "grad_norm": 0.901218094320874, "learning_rate": 5.831938066180036e-06, "loss": 0.4958, "step": 8830 }, { "epoch": 0.4765528034105013, "grad_norm": 1.0304045555586219, "learning_rate": 5.831177246918944e-06, "loss": 0.5764, "step": 8831 }, { "epoch": 0.4766067670390157, "grad_norm": 1.234312144147616, "learning_rate": 5.830416418139484e-06, "loss": 0.7064, "step": 8832 }, { "epoch": 0.47666073066753006, "grad_norm": 0.9952506306494329, "learning_rate": 5.829655579863522e-06, "loss": 0.3757, "step": 8833 }, { "epoch": 0.47671469429604446, "grad_norm": 0.863998548222133, "learning_rate": 5.828894732112925e-06, "loss": 0.3876, "step": 8834 }, { "epoch": 0.47676865792455886, "grad_norm": 0.9270008079067744, "learning_rate": 5.828133874909562e-06, "loss": 0.4776, "step": 8835 }, { "epoch": 0.47682262155307326, "grad_norm": 0.8174685301978881, "learning_rate": 5.827373008275299e-06, "loss": 0.3228, "step": 8836 }, { "epoch": 0.4768765851815876, "grad_norm": 0.9579412923260727, "learning_rate": 5.826612132232007e-06, "loss": 0.4507, "step": 8837 }, { "epoch": 0.476930548810102, "grad_norm": 1.024534861713695, "learning_rate": 5.825851246801551e-06, "loss": 0.5037, "step": 8838 }, { "epoch": 0.4769845124386164, "grad_norm": 1.0105057738566499, "learning_rate": 5.8250903520058e-06, "loss": 0.4007, "step": 8839 }, { "epoch": 0.47703847606713073, "grad_norm": 0.9023289093199663, "learning_rate": 5.824329447866625e-06, "loss": 0.4965, "step": 8840 }, { "epoch": 0.47709243969564513, "grad_norm": 1.0848484974574562, "learning_rate": 5.823568534405893e-06, "loss": 0.5123, "step": 8841 }, { "epoch": 0.47714640332415953, "grad_norm": 0.831128821514199, "learning_rate": 5.822807611645475e-06, "loss": 0.3673, "step": 8842 }, { "epoch": 0.47720036695267387, "grad_norm": 0.9704444334724761, "learning_rate": 5.8220466796072395e-06, "loss": 0.427, "step": 8843 }, { "epoch": 0.47725433058118827, "grad_norm": 0.8256573141878355, "learning_rate": 5.821285738313057e-06, "loss": 0.3416, "step": 8844 }, { "epoch": 0.47730829420970267, "grad_norm": 0.9464244578589652, "learning_rate": 5.820524787784798e-06, "loss": 0.454, "step": 8845 }, { "epoch": 0.47736225783821706, "grad_norm": 1.0101224134531765, "learning_rate": 5.819763828044334e-06, "loss": 0.468, "step": 8846 }, { "epoch": 0.4774162214667314, "grad_norm": 0.8547167430240799, "learning_rate": 5.819002859113533e-06, "loss": 0.3831, "step": 8847 }, { "epoch": 0.4774701850952458, "grad_norm": 0.9272011612944001, "learning_rate": 5.818241881014267e-06, "loss": 0.5051, "step": 8848 }, { "epoch": 0.4775241487237602, "grad_norm": 1.1679801913986676, "learning_rate": 5.8174808937684095e-06, "loss": 0.4902, "step": 8849 }, { "epoch": 0.47757811235227454, "grad_norm": 1.1501105898356971, "learning_rate": 5.816719897397829e-06, "loss": 0.509, "step": 8850 }, { "epoch": 0.47763207598078894, "grad_norm": 1.2881002030575865, "learning_rate": 5.8159588919244e-06, "loss": 0.6213, "step": 8851 }, { "epoch": 0.47768603960930334, "grad_norm": 0.9764972360754548, "learning_rate": 5.815197877369991e-06, "loss": 0.4409, "step": 8852 }, { "epoch": 0.47774000323781773, "grad_norm": 0.9469126734973976, "learning_rate": 5.814436853756479e-06, "loss": 0.5842, "step": 8853 }, { "epoch": 0.4777939668663321, "grad_norm": 0.7807368992074802, "learning_rate": 5.813675821105733e-06, "loss": 0.3347, "step": 8854 }, { "epoch": 0.4778479304948465, "grad_norm": 1.0268379417114026, "learning_rate": 5.812914779439626e-06, "loss": 0.5405, "step": 8855 }, { "epoch": 0.47790189412336087, "grad_norm": 1.0826772298220964, "learning_rate": 5.812153728780034e-06, "loss": 0.4706, "step": 8856 }, { "epoch": 0.4779558577518752, "grad_norm": 1.0563077415437647, "learning_rate": 5.8113926691488274e-06, "loss": 0.6322, "step": 8857 }, { "epoch": 0.4780098213803896, "grad_norm": 0.783275506876491, "learning_rate": 5.810631600567881e-06, "loss": 0.3034, "step": 8858 }, { "epoch": 0.478063785008904, "grad_norm": 1.055601073925802, "learning_rate": 5.809870523059068e-06, "loss": 0.6073, "step": 8859 }, { "epoch": 0.4781177486374184, "grad_norm": 1.3724594431857093, "learning_rate": 5.809109436644263e-06, "loss": 0.6055, "step": 8860 }, { "epoch": 0.47817171226593275, "grad_norm": 0.8948912670954919, "learning_rate": 5.808348341345341e-06, "loss": 0.3601, "step": 8861 }, { "epoch": 0.47822567589444714, "grad_norm": 1.1041785679653497, "learning_rate": 5.807587237184176e-06, "loss": 0.4815, "step": 8862 }, { "epoch": 0.47827963952296154, "grad_norm": 0.9840051277294879, "learning_rate": 5.806826124182643e-06, "loss": 0.5368, "step": 8863 }, { "epoch": 0.4783336031514759, "grad_norm": 0.9680965164665438, "learning_rate": 5.806065002362618e-06, "loss": 0.382, "step": 8864 }, { "epoch": 0.4783875667799903, "grad_norm": 1.2248253460092964, "learning_rate": 5.8053038717459754e-06, "loss": 0.5901, "step": 8865 }, { "epoch": 0.4784415304085047, "grad_norm": 0.8389630193506976, "learning_rate": 5.80454273235459e-06, "loss": 0.3816, "step": 8866 }, { "epoch": 0.4784954940370191, "grad_norm": 0.7991333292661946, "learning_rate": 5.803781584210338e-06, "loss": 0.3301, "step": 8867 }, { "epoch": 0.4785494576655334, "grad_norm": 0.9073954995637065, "learning_rate": 5.803020427335099e-06, "loss": 0.479, "step": 8868 }, { "epoch": 0.4786034212940478, "grad_norm": 0.9065046598187955, "learning_rate": 5.802259261750745e-06, "loss": 0.364, "step": 8869 }, { "epoch": 0.4786573849225622, "grad_norm": 1.0245535135295143, "learning_rate": 5.801498087479154e-06, "loss": 0.445, "step": 8870 }, { "epoch": 0.47871134855107655, "grad_norm": 0.8779865305973908, "learning_rate": 5.800736904542205e-06, "loss": 0.4138, "step": 8871 }, { "epoch": 0.47876531217959095, "grad_norm": 1.0852887459607508, "learning_rate": 5.799975712961771e-06, "loss": 0.6283, "step": 8872 }, { "epoch": 0.47881927580810535, "grad_norm": 0.8733702788836811, "learning_rate": 5.799214512759733e-06, "loss": 0.4734, "step": 8873 }, { "epoch": 0.4788732394366197, "grad_norm": 1.0456845555660599, "learning_rate": 5.798453303957968e-06, "loss": 0.4103, "step": 8874 }, { "epoch": 0.4789272030651341, "grad_norm": 1.2275330415314751, "learning_rate": 5.797692086578354e-06, "loss": 0.5701, "step": 8875 }, { "epoch": 0.4789811666936485, "grad_norm": 1.1256097613380478, "learning_rate": 5.796930860642768e-06, "loss": 0.5225, "step": 8876 }, { "epoch": 0.4790351303221629, "grad_norm": 1.1349965115451373, "learning_rate": 5.79616962617309e-06, "loss": 0.4443, "step": 8877 }, { "epoch": 0.4790890939506772, "grad_norm": 0.9681892691867793, "learning_rate": 5.795408383191196e-06, "loss": 0.5399, "step": 8878 }, { "epoch": 0.4791430575791916, "grad_norm": 1.00383575244915, "learning_rate": 5.794647131718967e-06, "loss": 0.4244, "step": 8879 }, { "epoch": 0.479197021207706, "grad_norm": 0.92672660583232, "learning_rate": 5.793885871778282e-06, "loss": 0.4541, "step": 8880 }, { "epoch": 0.47925098483622036, "grad_norm": 0.9288889071343785, "learning_rate": 5.79312460339102e-06, "loss": 0.4369, "step": 8881 }, { "epoch": 0.47930494846473476, "grad_norm": 1.0633120284826474, "learning_rate": 5.792363326579061e-06, "loss": 0.5531, "step": 8882 }, { "epoch": 0.47935891209324916, "grad_norm": 0.8600137114115991, "learning_rate": 5.791602041364285e-06, "loss": 0.3956, "step": 8883 }, { "epoch": 0.47941287572176355, "grad_norm": 1.0490335759406204, "learning_rate": 5.79084074776857e-06, "loss": 0.6398, "step": 8884 }, { "epoch": 0.4794668393502779, "grad_norm": 0.9509894936400564, "learning_rate": 5.790079445813801e-06, "loss": 0.4231, "step": 8885 }, { "epoch": 0.4795208029787923, "grad_norm": 1.0457517323743697, "learning_rate": 5.789318135521853e-06, "loss": 0.5844, "step": 8886 }, { "epoch": 0.4795747666073067, "grad_norm": 0.8268696246696647, "learning_rate": 5.78855681691461e-06, "loss": 0.3996, "step": 8887 }, { "epoch": 0.47962873023582103, "grad_norm": 0.9871129221685812, "learning_rate": 5.787795490013954e-06, "loss": 0.5157, "step": 8888 }, { "epoch": 0.47968269386433543, "grad_norm": 1.129557657669863, "learning_rate": 5.787034154841763e-06, "loss": 0.4982, "step": 8889 }, { "epoch": 0.4797366574928498, "grad_norm": 0.7994494212548594, "learning_rate": 5.7862728114199205e-06, "loss": 0.3599, "step": 8890 }, { "epoch": 0.4797906211213642, "grad_norm": 0.9375638558451366, "learning_rate": 5.785511459770308e-06, "loss": 0.4736, "step": 8891 }, { "epoch": 0.47984458474987857, "grad_norm": 1.0015626751652345, "learning_rate": 5.784750099914809e-06, "loss": 0.4188, "step": 8892 }, { "epoch": 0.47989854837839296, "grad_norm": 1.102586695031359, "learning_rate": 5.7839887318753026e-06, "loss": 0.4733, "step": 8893 }, { "epoch": 0.47995251200690736, "grad_norm": 1.1645781076203017, "learning_rate": 5.783227355673675e-06, "loss": 0.5567, "step": 8894 }, { "epoch": 0.4800064756354217, "grad_norm": 0.9574314256299064, "learning_rate": 5.782465971331805e-06, "loss": 0.3687, "step": 8895 }, { "epoch": 0.4800604392639361, "grad_norm": 1.0190531593213792, "learning_rate": 5.7817045788715795e-06, "loss": 0.5341, "step": 8896 }, { "epoch": 0.4801144028924505, "grad_norm": 1.0621539401027396, "learning_rate": 5.780943178314878e-06, "loss": 0.399, "step": 8897 }, { "epoch": 0.4801683665209649, "grad_norm": 0.8507418523449214, "learning_rate": 5.780181769683587e-06, "loss": 0.3957, "step": 8898 }, { "epoch": 0.48022233014947924, "grad_norm": 0.9612091584900498, "learning_rate": 5.77942035299959e-06, "loss": 0.4325, "step": 8899 }, { "epoch": 0.48027629377799363, "grad_norm": 1.011532456107316, "learning_rate": 5.778658928284767e-06, "loss": 0.5285, "step": 8900 }, { "epoch": 0.48033025740650803, "grad_norm": 0.9365141855321122, "learning_rate": 5.777897495561007e-06, "loss": 0.4502, "step": 8901 }, { "epoch": 0.4803842210350224, "grad_norm": 1.0771903439266712, "learning_rate": 5.777136054850192e-06, "loss": 0.4592, "step": 8902 }, { "epoch": 0.48043818466353677, "grad_norm": 1.2452574762112747, "learning_rate": 5.776374606174204e-06, "loss": 0.5045, "step": 8903 }, { "epoch": 0.48049214829205117, "grad_norm": 1.0037633303829268, "learning_rate": 5.775613149554934e-06, "loss": 0.4026, "step": 8904 }, { "epoch": 0.48054611192056557, "grad_norm": 1.0867070098645655, "learning_rate": 5.774851685014262e-06, "loss": 0.5337, "step": 8905 }, { "epoch": 0.4806000755490799, "grad_norm": 0.9195884251987443, "learning_rate": 5.774090212574075e-06, "loss": 0.392, "step": 8906 }, { "epoch": 0.4806540391775943, "grad_norm": 1.1452438588087077, "learning_rate": 5.7733287322562595e-06, "loss": 0.5456, "step": 8907 }, { "epoch": 0.4807080028061087, "grad_norm": 0.9567835244027647, "learning_rate": 5.772567244082697e-06, "loss": 0.462, "step": 8908 }, { "epoch": 0.48076196643462304, "grad_norm": 1.0962937067542196, "learning_rate": 5.77180574807528e-06, "loss": 0.5603, "step": 8909 }, { "epoch": 0.48081593006313744, "grad_norm": 0.9258578759401118, "learning_rate": 5.771044244255889e-06, "loss": 0.4985, "step": 8910 }, { "epoch": 0.48086989369165184, "grad_norm": 0.8311079611091676, "learning_rate": 5.7702827326464124e-06, "loss": 0.4456, "step": 8911 }, { "epoch": 0.4809238573201662, "grad_norm": 1.2334326954154025, "learning_rate": 5.769521213268739e-06, "loss": 0.529, "step": 8912 }, { "epoch": 0.4809778209486806, "grad_norm": 1.1006202110605932, "learning_rate": 5.768759686144751e-06, "loss": 0.5085, "step": 8913 }, { "epoch": 0.481031784577195, "grad_norm": 1.094079420476509, "learning_rate": 5.76799815129634e-06, "loss": 0.5607, "step": 8914 }, { "epoch": 0.4810857482057094, "grad_norm": 1.1132249211671637, "learning_rate": 5.76723660874539e-06, "loss": 0.4689, "step": 8915 }, { "epoch": 0.4811397118342237, "grad_norm": 1.1060881852396283, "learning_rate": 5.766475058513792e-06, "loss": 0.6089, "step": 8916 }, { "epoch": 0.4811936754627381, "grad_norm": 1.0425288247847708, "learning_rate": 5.76571350062343e-06, "loss": 0.5859, "step": 8917 }, { "epoch": 0.4812476390912525, "grad_norm": 1.2397901988894628, "learning_rate": 5.764951935096194e-06, "loss": 0.578, "step": 8918 }, { "epoch": 0.48130160271976685, "grad_norm": 1.172354984819246, "learning_rate": 5.764190361953971e-06, "loss": 0.5803, "step": 8919 }, { "epoch": 0.48135556634828125, "grad_norm": 1.463508585005897, "learning_rate": 5.7634287812186495e-06, "loss": 0.7346, "step": 8920 }, { "epoch": 0.48140952997679565, "grad_norm": 1.1229682052135896, "learning_rate": 5.762667192912119e-06, "loss": 0.5595, "step": 8921 }, { "epoch": 0.48146349360531004, "grad_norm": 0.8261681550016152, "learning_rate": 5.761905597056269e-06, "loss": 0.3684, "step": 8922 }, { "epoch": 0.4815174572338244, "grad_norm": 1.3801487299326027, "learning_rate": 5.761143993672988e-06, "loss": 0.6978, "step": 8923 }, { "epoch": 0.4815714208623388, "grad_norm": 1.2878417445671688, "learning_rate": 5.760382382784163e-06, "loss": 0.5309, "step": 8924 }, { "epoch": 0.4816253844908532, "grad_norm": 1.2579304255340757, "learning_rate": 5.759620764411685e-06, "loss": 0.6228, "step": 8925 }, { "epoch": 0.4816793481193675, "grad_norm": 1.0495904391998365, "learning_rate": 5.758859138577445e-06, "loss": 0.6004, "step": 8926 }, { "epoch": 0.4817333117478819, "grad_norm": 0.9783846015441312, "learning_rate": 5.758097505303332e-06, "loss": 0.5088, "step": 8927 }, { "epoch": 0.4817872753763963, "grad_norm": 1.0104148047712003, "learning_rate": 5.7573358646112355e-06, "loss": 0.5835, "step": 8928 }, { "epoch": 0.4818412390049107, "grad_norm": 1.1334875837146274, "learning_rate": 5.756574216523045e-06, "loss": 0.4572, "step": 8929 }, { "epoch": 0.48189520263342506, "grad_norm": 1.22182243226313, "learning_rate": 5.755812561060655e-06, "loss": 0.5662, "step": 8930 }, { "epoch": 0.48194916626193945, "grad_norm": 0.8294062249920503, "learning_rate": 5.755050898245953e-06, "loss": 0.4676, "step": 8931 }, { "epoch": 0.48200312989045385, "grad_norm": 1.1460688883928374, "learning_rate": 5.754289228100828e-06, "loss": 0.4912, "step": 8932 }, { "epoch": 0.4820570935189682, "grad_norm": 1.1348808406924946, "learning_rate": 5.753527550647177e-06, "loss": 0.504, "step": 8933 }, { "epoch": 0.4821110571474826, "grad_norm": 1.1285900077698594, "learning_rate": 5.7527658659068855e-06, "loss": 0.7409, "step": 8934 }, { "epoch": 0.482165020775997, "grad_norm": 1.2746779428259416, "learning_rate": 5.752004173901849e-06, "loss": 0.6315, "step": 8935 }, { "epoch": 0.4822189844045114, "grad_norm": 1.0116447882994264, "learning_rate": 5.751242474653955e-06, "loss": 0.4994, "step": 8936 }, { "epoch": 0.48227294803302573, "grad_norm": 1.1203678939909256, "learning_rate": 5.750480768185103e-06, "loss": 0.6108, "step": 8937 }, { "epoch": 0.4823269116615401, "grad_norm": 1.0700397519916547, "learning_rate": 5.749719054517176e-06, "loss": 0.5585, "step": 8938 }, { "epoch": 0.4823808752900545, "grad_norm": 0.9071431914159915, "learning_rate": 5.748957333672075e-06, "loss": 0.4203, "step": 8939 }, { "epoch": 0.48243483891856886, "grad_norm": 0.9787891481946263, "learning_rate": 5.748195605671687e-06, "loss": 0.4756, "step": 8940 }, { "epoch": 0.48248880254708326, "grad_norm": 0.9368911078968601, "learning_rate": 5.747433870537906e-06, "loss": 0.3959, "step": 8941 }, { "epoch": 0.48254276617559766, "grad_norm": 1.0210008027774962, "learning_rate": 5.746672128292626e-06, "loss": 0.4669, "step": 8942 }, { "epoch": 0.482596729804112, "grad_norm": 0.9871705550131088, "learning_rate": 5.745910378957741e-06, "loss": 0.5155, "step": 8943 }, { "epoch": 0.4826506934326264, "grad_norm": 1.0248368452345042, "learning_rate": 5.745148622555143e-06, "loss": 0.414, "step": 8944 }, { "epoch": 0.4827046570611408, "grad_norm": 1.1171592727394923, "learning_rate": 5.744386859106723e-06, "loss": 0.6185, "step": 8945 }, { "epoch": 0.4827586206896552, "grad_norm": 0.9800103183825898, "learning_rate": 5.74362508863438e-06, "loss": 0.3584, "step": 8946 }, { "epoch": 0.48281258431816954, "grad_norm": 1.0649573867502167, "learning_rate": 5.7428633111600055e-06, "loss": 0.6278, "step": 8947 }, { "epoch": 0.48286654794668393, "grad_norm": 1.014284563745636, "learning_rate": 5.742101526705492e-06, "loss": 0.3942, "step": 8948 }, { "epoch": 0.48292051157519833, "grad_norm": 1.0984175549142965, "learning_rate": 5.741339735292739e-06, "loss": 0.5857, "step": 8949 }, { "epoch": 0.48297447520371267, "grad_norm": 1.175613186975254, "learning_rate": 5.740577936943639e-06, "loss": 0.5437, "step": 8950 }, { "epoch": 0.48302843883222707, "grad_norm": 0.986151807392606, "learning_rate": 5.739816131680083e-06, "loss": 0.3765, "step": 8951 }, { "epoch": 0.48308240246074147, "grad_norm": 1.075751531113955, "learning_rate": 5.73905431952397e-06, "loss": 0.4863, "step": 8952 }, { "epoch": 0.48313636608925586, "grad_norm": 1.1144126332986921, "learning_rate": 5.738292500497195e-06, "loss": 0.6746, "step": 8953 }, { "epoch": 0.4831903297177702, "grad_norm": 0.9633828516217373, "learning_rate": 5.737530674621653e-06, "loss": 0.4162, "step": 8954 }, { "epoch": 0.4832442933462846, "grad_norm": 0.9439306333038188, "learning_rate": 5.736768841919238e-06, "loss": 0.5008, "step": 8955 }, { "epoch": 0.483298256974799, "grad_norm": 1.2358260784603383, "learning_rate": 5.736007002411847e-06, "loss": 0.6265, "step": 8956 }, { "epoch": 0.48335222060331334, "grad_norm": 1.0836553524257568, "learning_rate": 5.735245156121379e-06, "loss": 0.6006, "step": 8957 }, { "epoch": 0.48340618423182774, "grad_norm": 1.0012580915814944, "learning_rate": 5.734483303069724e-06, "loss": 0.4641, "step": 8958 }, { "epoch": 0.48346014786034214, "grad_norm": 0.9668970168525486, "learning_rate": 5.733721443278785e-06, "loss": 0.3939, "step": 8959 }, { "epoch": 0.48351411148885654, "grad_norm": 1.1900248120154215, "learning_rate": 5.7329595767704534e-06, "loss": 0.4988, "step": 8960 }, { "epoch": 0.4835680751173709, "grad_norm": 1.0152406186454868, "learning_rate": 5.73219770356663e-06, "loss": 0.499, "step": 8961 }, { "epoch": 0.4836220387458853, "grad_norm": 1.1021822170069868, "learning_rate": 5.731435823689209e-06, "loss": 0.6884, "step": 8962 }, { "epoch": 0.48367600237439967, "grad_norm": 1.0006494810227646, "learning_rate": 5.730673937160088e-06, "loss": 0.287, "step": 8963 }, { "epoch": 0.483729966002914, "grad_norm": 1.1068327646258418, "learning_rate": 5.729912044001167e-06, "loss": 0.5092, "step": 8964 }, { "epoch": 0.4837839296314284, "grad_norm": 1.058126201211116, "learning_rate": 5.72915014423434e-06, "loss": 0.4685, "step": 8965 }, { "epoch": 0.4838378932599428, "grad_norm": 1.0360707359613686, "learning_rate": 5.728388237881506e-06, "loss": 0.5583, "step": 8966 }, { "epoch": 0.4838918568884572, "grad_norm": 1.01881566521573, "learning_rate": 5.727626324964563e-06, "loss": 0.4436, "step": 8967 }, { "epoch": 0.48394582051697155, "grad_norm": 1.1077278620378985, "learning_rate": 5.72686440550541e-06, "loss": 0.6414, "step": 8968 }, { "epoch": 0.48399978414548595, "grad_norm": 0.9909253950888102, "learning_rate": 5.726102479525946e-06, "loss": 0.5359, "step": 8969 }, { "epoch": 0.48405374777400034, "grad_norm": 0.9259317706327108, "learning_rate": 5.725340547048067e-06, "loss": 0.4111, "step": 8970 }, { "epoch": 0.4841077114025147, "grad_norm": 1.1040840041914934, "learning_rate": 5.724578608093671e-06, "loss": 0.5298, "step": 8971 }, { "epoch": 0.4841616750310291, "grad_norm": 0.8878371820939249, "learning_rate": 5.7238166626846625e-06, "loss": 0.4146, "step": 8972 }, { "epoch": 0.4842156386595435, "grad_norm": 1.0624860652347219, "learning_rate": 5.7230547108429356e-06, "loss": 0.4169, "step": 8973 }, { "epoch": 0.4842696022880579, "grad_norm": 0.9531422758317465, "learning_rate": 5.722292752590391e-06, "loss": 0.4657, "step": 8974 }, { "epoch": 0.4843235659165722, "grad_norm": 1.2090817474443107, "learning_rate": 5.721530787948927e-06, "loss": 0.5508, "step": 8975 }, { "epoch": 0.4843775295450866, "grad_norm": 1.0376787396018732, "learning_rate": 5.720768816940446e-06, "loss": 0.5389, "step": 8976 }, { "epoch": 0.484431493173601, "grad_norm": 1.068804389898577, "learning_rate": 5.720006839586845e-06, "loss": 0.5748, "step": 8977 }, { "epoch": 0.48448545680211536, "grad_norm": 1.0994788034046652, "learning_rate": 5.719244855910027e-06, "loss": 0.4193, "step": 8978 }, { "epoch": 0.48453942043062975, "grad_norm": 0.9513739707833284, "learning_rate": 5.7184828659318866e-06, "loss": 0.4356, "step": 8979 }, { "epoch": 0.48459338405914415, "grad_norm": 0.8428064281541723, "learning_rate": 5.71772086967433e-06, "loss": 0.3693, "step": 8980 }, { "epoch": 0.4846473476876585, "grad_norm": 0.963015309586769, "learning_rate": 5.716958867159257e-06, "loss": 0.4981, "step": 8981 }, { "epoch": 0.4847013113161729, "grad_norm": 0.8541293958376956, "learning_rate": 5.7161968584085645e-06, "loss": 0.3019, "step": 8982 }, { "epoch": 0.4847552749446873, "grad_norm": 0.9988183500721246, "learning_rate": 5.715434843444158e-06, "loss": 0.4503, "step": 8983 }, { "epoch": 0.4848092385732017, "grad_norm": 0.9363316495309228, "learning_rate": 5.714672822287934e-06, "loss": 0.4624, "step": 8984 }, { "epoch": 0.484863202201716, "grad_norm": 0.9293958585906826, "learning_rate": 5.7139107949618004e-06, "loss": 0.298, "step": 8985 }, { "epoch": 0.4849171658302304, "grad_norm": 1.032124933885376, "learning_rate": 5.713148761487649e-06, "loss": 0.4632, "step": 8986 }, { "epoch": 0.4849711294587448, "grad_norm": 1.0153976170584875, "learning_rate": 5.71238672188739e-06, "loss": 0.4461, "step": 8987 }, { "epoch": 0.48502509308725916, "grad_norm": 1.0499519929240382, "learning_rate": 5.711624676182922e-06, "loss": 0.5287, "step": 8988 }, { "epoch": 0.48507905671577356, "grad_norm": 1.0115417408825789, "learning_rate": 5.710862624396146e-06, "loss": 0.4504, "step": 8989 }, { "epoch": 0.48513302034428796, "grad_norm": 0.971420612604982, "learning_rate": 5.710100566548966e-06, "loss": 0.5301, "step": 8990 }, { "epoch": 0.48518698397280235, "grad_norm": 1.0257195551501725, "learning_rate": 5.709338502663283e-06, "loss": 0.4326, "step": 8991 }, { "epoch": 0.4852409476013167, "grad_norm": 0.9799627868139753, "learning_rate": 5.7085764327610006e-06, "loss": 0.5442, "step": 8992 }, { "epoch": 0.4852949112298311, "grad_norm": 1.1205979618437802, "learning_rate": 5.707814356864019e-06, "loss": 0.5538, "step": 8993 }, { "epoch": 0.4853488748583455, "grad_norm": 1.3131174789829034, "learning_rate": 5.707052274994243e-06, "loss": 0.5984, "step": 8994 }, { "epoch": 0.48540283848685983, "grad_norm": 0.7982198698311004, "learning_rate": 5.706290187173577e-06, "loss": 0.348, "step": 8995 }, { "epoch": 0.48545680211537423, "grad_norm": 1.0185883179732644, "learning_rate": 5.705528093423922e-06, "loss": 0.5653, "step": 8996 }, { "epoch": 0.48551076574388863, "grad_norm": 0.8967157836487984, "learning_rate": 5.704765993767182e-06, "loss": 0.4219, "step": 8997 }, { "epoch": 0.485564729372403, "grad_norm": 1.0141133278094963, "learning_rate": 5.704003888225259e-06, "loss": 0.4431, "step": 8998 }, { "epoch": 0.48561869300091737, "grad_norm": 1.1906450569622669, "learning_rate": 5.703241776820059e-06, "loss": 0.6254, "step": 8999 }, { "epoch": 0.48567265662943176, "grad_norm": 1.0696067044142734, "learning_rate": 5.702479659573485e-06, "loss": 0.438, "step": 9000 }, { "epoch": 0.48567265662943176, "eval_loss": 0.5507768988609314, "eval_runtime": 163.5998, "eval_samples_per_second": 21.021, "eval_steps_per_second": 0.88, "step": 9000 }, { "epoch": 0.48572662025794616, "grad_norm": 0.8258489387750173, "learning_rate": 5.701717536507441e-06, "loss": 0.4522, "step": 9001 }, { "epoch": 0.4857805838864605, "grad_norm": 1.0708704774637918, "learning_rate": 5.700955407643833e-06, "loss": 0.5225, "step": 9002 }, { "epoch": 0.4858345475149749, "grad_norm": 1.191189551222171, "learning_rate": 5.700193273004562e-06, "loss": 0.5774, "step": 9003 }, { "epoch": 0.4858885111434893, "grad_norm": 1.0683967910429446, "learning_rate": 5.699431132611532e-06, "loss": 0.4572, "step": 9004 }, { "epoch": 0.4859424747720037, "grad_norm": 0.7811941286369862, "learning_rate": 5.698668986486652e-06, "loss": 0.3449, "step": 9005 }, { "epoch": 0.48599643840051804, "grad_norm": 0.8997353390492202, "learning_rate": 5.697906834651825e-06, "loss": 0.3701, "step": 9006 }, { "epoch": 0.48605040202903244, "grad_norm": 1.0160993455366434, "learning_rate": 5.697144677128955e-06, "loss": 0.5007, "step": 9007 }, { "epoch": 0.48610436565754683, "grad_norm": 0.9810074999233304, "learning_rate": 5.696382513939947e-06, "loss": 0.4599, "step": 9008 }, { "epoch": 0.4861583292860612, "grad_norm": 1.175000637691287, "learning_rate": 5.695620345106709e-06, "loss": 0.5142, "step": 9009 }, { "epoch": 0.4862122929145756, "grad_norm": 0.9986410358138473, "learning_rate": 5.694858170651144e-06, "loss": 0.5835, "step": 9010 }, { "epoch": 0.48626625654308997, "grad_norm": 1.1869735562438195, "learning_rate": 5.694095990595158e-06, "loss": 0.8689, "step": 9011 }, { "epoch": 0.4863202201716043, "grad_norm": 1.1913636863835737, "learning_rate": 5.693333804960657e-06, "loss": 0.5273, "step": 9012 }, { "epoch": 0.4863741838001187, "grad_norm": 0.9477564463264486, "learning_rate": 5.692571613769546e-06, "loss": 0.4125, "step": 9013 }, { "epoch": 0.4864281474286331, "grad_norm": 1.0995654302186082, "learning_rate": 5.691809417043733e-06, "loss": 0.7489, "step": 9014 }, { "epoch": 0.4864821110571475, "grad_norm": 1.0543152392843842, "learning_rate": 5.691047214805123e-06, "loss": 0.6065, "step": 9015 }, { "epoch": 0.48653607468566185, "grad_norm": 1.0807222990521688, "learning_rate": 5.6902850070756255e-06, "loss": 0.4433, "step": 9016 }, { "epoch": 0.48659003831417624, "grad_norm": 1.0448691638347891, "learning_rate": 5.689522793877141e-06, "loss": 0.6474, "step": 9017 }, { "epoch": 0.48664400194269064, "grad_norm": 1.004580854451043, "learning_rate": 5.6887605752315825e-06, "loss": 0.3622, "step": 9018 }, { "epoch": 0.486697965571205, "grad_norm": 1.0538013577635714, "learning_rate": 5.687998351160855e-06, "loss": 0.5465, "step": 9019 }, { "epoch": 0.4867519291997194, "grad_norm": 1.0095164775349887, "learning_rate": 5.687236121686862e-06, "loss": 0.4933, "step": 9020 }, { "epoch": 0.4868058928282338, "grad_norm": 1.0856562773300655, "learning_rate": 5.686473886831516e-06, "loss": 0.4879, "step": 9021 }, { "epoch": 0.4868598564567482, "grad_norm": 0.9298221523035938, "learning_rate": 5.685711646616722e-06, "loss": 0.4063, "step": 9022 }, { "epoch": 0.4869138200852625, "grad_norm": 1.2314667157371089, "learning_rate": 5.684949401064385e-06, "loss": 0.4448, "step": 9023 }, { "epoch": 0.4869677837137769, "grad_norm": 1.2143350841957286, "learning_rate": 5.6841871501964175e-06, "loss": 0.3794, "step": 9024 }, { "epoch": 0.4870217473422913, "grad_norm": 0.8478134258752489, "learning_rate": 5.6834248940347245e-06, "loss": 0.3676, "step": 9025 }, { "epoch": 0.48707571097080565, "grad_norm": 1.0983243100547224, "learning_rate": 5.682662632601216e-06, "loss": 0.5803, "step": 9026 }, { "epoch": 0.48712967459932005, "grad_norm": 1.0237021261518908, "learning_rate": 5.6819003659177984e-06, "loss": 0.4509, "step": 9027 }, { "epoch": 0.48718363822783445, "grad_norm": 0.9966202257210777, "learning_rate": 5.68113809400638e-06, "loss": 0.368, "step": 9028 }, { "epoch": 0.48723760185634885, "grad_norm": 0.9748564726544412, "learning_rate": 5.680375816888869e-06, "loss": 0.3873, "step": 9029 }, { "epoch": 0.4872915654848632, "grad_norm": 0.8974139130744889, "learning_rate": 5.679613534587176e-06, "loss": 0.4133, "step": 9030 }, { "epoch": 0.4873455291133776, "grad_norm": 1.011780184903026, "learning_rate": 5.678851247123208e-06, "loss": 0.4575, "step": 9031 }, { "epoch": 0.487399492741892, "grad_norm": 1.1732641803251664, "learning_rate": 5.678088954518874e-06, "loss": 0.4771, "step": 9032 }, { "epoch": 0.4874534563704063, "grad_norm": 1.0343273495426395, "learning_rate": 5.6773266567960864e-06, "loss": 0.436, "step": 9033 }, { "epoch": 0.4875074199989207, "grad_norm": 1.161626929202276, "learning_rate": 5.676564353976748e-06, "loss": 0.5725, "step": 9034 }, { "epoch": 0.4875613836274351, "grad_norm": 1.0270623916816533, "learning_rate": 5.675802046082772e-06, "loss": 0.3951, "step": 9035 }, { "epoch": 0.4876153472559495, "grad_norm": 0.9967003999386687, "learning_rate": 5.67503973313607e-06, "loss": 0.4063, "step": 9036 }, { "epoch": 0.48766931088446386, "grad_norm": 1.015640738263481, "learning_rate": 5.674277415158548e-06, "loss": 0.4719, "step": 9037 }, { "epoch": 0.48772327451297826, "grad_norm": 1.0997044271434895, "learning_rate": 5.673515092172115e-06, "loss": 0.5361, "step": 9038 }, { "epoch": 0.48777723814149265, "grad_norm": 1.0144318479358967, "learning_rate": 5.6727527641986854e-06, "loss": 0.529, "step": 9039 }, { "epoch": 0.487831201770007, "grad_norm": 1.134180627063445, "learning_rate": 5.671990431260166e-06, "loss": 0.5206, "step": 9040 }, { "epoch": 0.4878851653985214, "grad_norm": 1.0445275579652313, "learning_rate": 5.671228093378468e-06, "loss": 0.5604, "step": 9041 }, { "epoch": 0.4879391290270358, "grad_norm": 1.0195743761493503, "learning_rate": 5.670465750575501e-06, "loss": 0.6556, "step": 9042 }, { "epoch": 0.4879930926555502, "grad_norm": 0.8494023680929432, "learning_rate": 5.669703402873177e-06, "loss": 0.4281, "step": 9043 }, { "epoch": 0.48804705628406453, "grad_norm": 1.0654554688769102, "learning_rate": 5.668941050293405e-06, "loss": 0.4872, "step": 9044 }, { "epoch": 0.4881010199125789, "grad_norm": 0.9595716703938717, "learning_rate": 5.6681786928580965e-06, "loss": 0.4723, "step": 9045 }, { "epoch": 0.4881549835410933, "grad_norm": 1.0024533210719815, "learning_rate": 5.667416330589161e-06, "loss": 0.4153, "step": 9046 }, { "epoch": 0.48820894716960767, "grad_norm": 0.9008841006155078, "learning_rate": 5.666653963508514e-06, "loss": 0.3582, "step": 9047 }, { "epoch": 0.48826291079812206, "grad_norm": 1.2054046148338748, "learning_rate": 5.665891591638063e-06, "loss": 0.6435, "step": 9048 }, { "epoch": 0.48831687442663646, "grad_norm": 0.9455730915596545, "learning_rate": 5.665129214999719e-06, "loss": 0.4715, "step": 9049 }, { "epoch": 0.4883708380551508, "grad_norm": 0.9921144873349975, "learning_rate": 5.664366833615395e-06, "loss": 0.4686, "step": 9050 }, { "epoch": 0.4884248016836652, "grad_norm": 1.110386277828727, "learning_rate": 5.663604447507001e-06, "loss": 0.6002, "step": 9051 }, { "epoch": 0.4884787653121796, "grad_norm": 1.060541804088775, "learning_rate": 5.66284205669645e-06, "loss": 0.5314, "step": 9052 }, { "epoch": 0.488532728940694, "grad_norm": 0.9038001244693592, "learning_rate": 5.662079661205655e-06, "loss": 0.413, "step": 9053 }, { "epoch": 0.48858669256920834, "grad_norm": 1.0155073119807618, "learning_rate": 5.661317261056527e-06, "loss": 0.5412, "step": 9054 }, { "epoch": 0.48864065619772273, "grad_norm": 1.080357609652199, "learning_rate": 5.660554856270978e-06, "loss": 0.401, "step": 9055 }, { "epoch": 0.48869461982623713, "grad_norm": 0.9733955886591145, "learning_rate": 5.659792446870919e-06, "loss": 0.4119, "step": 9056 }, { "epoch": 0.4887485834547515, "grad_norm": 0.9331906517111229, "learning_rate": 5.659030032878264e-06, "loss": 0.4778, "step": 9057 }, { "epoch": 0.48880254708326587, "grad_norm": 0.9812455375904994, "learning_rate": 5.658267614314926e-06, "loss": 0.4181, "step": 9058 }, { "epoch": 0.48885651071178027, "grad_norm": 1.0148244433348792, "learning_rate": 5.657505191202815e-06, "loss": 0.4687, "step": 9059 }, { "epoch": 0.48891047434029467, "grad_norm": 1.0032508778608455, "learning_rate": 5.656742763563848e-06, "loss": 0.4211, "step": 9060 }, { "epoch": 0.488964437968809, "grad_norm": 0.9839858023374348, "learning_rate": 5.655980331419933e-06, "loss": 0.4306, "step": 9061 }, { "epoch": 0.4890184015973234, "grad_norm": 0.9618285731339128, "learning_rate": 5.655217894792988e-06, "loss": 0.4284, "step": 9062 }, { "epoch": 0.4890723652258378, "grad_norm": 1.3204996827588074, "learning_rate": 5.654455453704924e-06, "loss": 0.5638, "step": 9063 }, { "epoch": 0.48912632885435214, "grad_norm": 0.9190775710802204, "learning_rate": 5.653693008177652e-06, "loss": 0.4546, "step": 9064 }, { "epoch": 0.48918029248286654, "grad_norm": 0.9498044763175791, "learning_rate": 5.65293055823309e-06, "loss": 0.4811, "step": 9065 }, { "epoch": 0.48923425611138094, "grad_norm": 0.9502161538461522, "learning_rate": 5.652168103893146e-06, "loss": 0.4583, "step": 9066 }, { "epoch": 0.48928821973989534, "grad_norm": 1.1162640153105996, "learning_rate": 5.651405645179739e-06, "loss": 0.591, "step": 9067 }, { "epoch": 0.4893421833684097, "grad_norm": 1.0754056215436516, "learning_rate": 5.650643182114782e-06, "loss": 0.4767, "step": 9068 }, { "epoch": 0.4893961469969241, "grad_norm": 1.1564695733771335, "learning_rate": 5.649880714720186e-06, "loss": 0.4673, "step": 9069 }, { "epoch": 0.4894501106254385, "grad_norm": 1.063908188974962, "learning_rate": 5.649118243017867e-06, "loss": 0.4176, "step": 9070 }, { "epoch": 0.4895040742539528, "grad_norm": 0.9433518356948424, "learning_rate": 5.6483557670297395e-06, "loss": 0.4994, "step": 9071 }, { "epoch": 0.4895580378824672, "grad_norm": 1.10802606014558, "learning_rate": 5.647593286777717e-06, "loss": 0.4953, "step": 9072 }, { "epoch": 0.4896120015109816, "grad_norm": 0.8317653896410824, "learning_rate": 5.646830802283715e-06, "loss": 0.3691, "step": 9073 }, { "epoch": 0.489665965139496, "grad_norm": 1.0105601461502003, "learning_rate": 5.6460683135696455e-06, "loss": 0.4486, "step": 9074 }, { "epoch": 0.48971992876801035, "grad_norm": 1.1308052178158472, "learning_rate": 5.645305820657427e-06, "loss": 0.5394, "step": 9075 }, { "epoch": 0.48977389239652475, "grad_norm": 1.2222763163571557, "learning_rate": 5.644543323568972e-06, "loss": 0.666, "step": 9076 }, { "epoch": 0.48982785602503914, "grad_norm": 0.9258783262699528, "learning_rate": 5.643780822326195e-06, "loss": 0.407, "step": 9077 }, { "epoch": 0.4898818196535535, "grad_norm": 1.079257025317513, "learning_rate": 5.643018316951013e-06, "loss": 0.5201, "step": 9078 }, { "epoch": 0.4899357832820679, "grad_norm": 0.9093672035536486, "learning_rate": 5.642255807465339e-06, "loss": 0.4614, "step": 9079 }, { "epoch": 0.4899897469105823, "grad_norm": 1.0528566429842607, "learning_rate": 5.641493293891091e-06, "loss": 0.5733, "step": 9080 }, { "epoch": 0.4900437105390966, "grad_norm": 1.148805807435015, "learning_rate": 5.640730776250182e-06, "loss": 0.5154, "step": 9081 }, { "epoch": 0.490097674167611, "grad_norm": 1.0766487726531782, "learning_rate": 5.639968254564528e-06, "loss": 0.4668, "step": 9082 }, { "epoch": 0.4901516377961254, "grad_norm": 1.1070000175776042, "learning_rate": 5.639205728856044e-06, "loss": 0.5848, "step": 9083 }, { "epoch": 0.4902056014246398, "grad_norm": 0.9774337320541031, "learning_rate": 5.6384431991466474e-06, "loss": 0.3472, "step": 9084 }, { "epoch": 0.49025956505315416, "grad_norm": 1.1373754921727341, "learning_rate": 5.637680665458254e-06, "loss": 0.5523, "step": 9085 }, { "epoch": 0.49031352868166855, "grad_norm": 1.0730813291771926, "learning_rate": 5.6369181278127785e-06, "loss": 0.5177, "step": 9086 }, { "epoch": 0.49036749231018295, "grad_norm": 1.072558548709178, "learning_rate": 5.636155586232138e-06, "loss": 0.3806, "step": 9087 }, { "epoch": 0.4904214559386973, "grad_norm": 1.073174073297064, "learning_rate": 5.635393040738248e-06, "loss": 0.4183, "step": 9088 }, { "epoch": 0.4904754195672117, "grad_norm": 1.0484761729931467, "learning_rate": 5.6346304913530255e-06, "loss": 0.4522, "step": 9089 }, { "epoch": 0.4905293831957261, "grad_norm": 0.9510815213356895, "learning_rate": 5.633867938098387e-06, "loss": 0.4634, "step": 9090 }, { "epoch": 0.4905833468242405, "grad_norm": 1.3693507439791255, "learning_rate": 5.633105380996248e-06, "loss": 0.6434, "step": 9091 }, { "epoch": 0.4906373104527548, "grad_norm": 0.983070201544626, "learning_rate": 5.632342820068527e-06, "loss": 0.4089, "step": 9092 }, { "epoch": 0.4906912740812692, "grad_norm": 0.919827321286587, "learning_rate": 5.631580255337139e-06, "loss": 0.339, "step": 9093 }, { "epoch": 0.4907452377097836, "grad_norm": 0.9665890643810782, "learning_rate": 5.630817686824001e-06, "loss": 0.4679, "step": 9094 }, { "epoch": 0.49079920133829796, "grad_norm": 0.96136881018418, "learning_rate": 5.630055114551031e-06, "loss": 0.3986, "step": 9095 }, { "epoch": 0.49085316496681236, "grad_norm": 1.004987836870924, "learning_rate": 5.629292538540146e-06, "loss": 0.4103, "step": 9096 }, { "epoch": 0.49090712859532676, "grad_norm": 0.9927249506479977, "learning_rate": 5.628529958813262e-06, "loss": 0.48, "step": 9097 }, { "epoch": 0.49096109222384116, "grad_norm": 1.1901896279463655, "learning_rate": 5.627767375392295e-06, "loss": 0.4074, "step": 9098 }, { "epoch": 0.4910150558523555, "grad_norm": 1.0203343233747315, "learning_rate": 5.6270047882991675e-06, "loss": 0.4849, "step": 9099 }, { "epoch": 0.4910690194808699, "grad_norm": 0.9277309253238674, "learning_rate": 5.6262421975557935e-06, "loss": 0.3905, "step": 9100 }, { "epoch": 0.4911229831093843, "grad_norm": 1.1629862645431597, "learning_rate": 5.625479603184092e-06, "loss": 0.5546, "step": 9101 }, { "epoch": 0.49117694673789863, "grad_norm": 1.0872630804343892, "learning_rate": 5.624717005205978e-06, "loss": 0.4649, "step": 9102 }, { "epoch": 0.49123091036641303, "grad_norm": 1.0316687539540388, "learning_rate": 5.623954403643372e-06, "loss": 0.5985, "step": 9103 }, { "epoch": 0.49128487399492743, "grad_norm": 0.9655504277761057, "learning_rate": 5.623191798518192e-06, "loss": 0.4303, "step": 9104 }, { "epoch": 0.4913388376234418, "grad_norm": 1.0202583867849238, "learning_rate": 5.6224291898523545e-06, "loss": 0.385, "step": 9105 }, { "epoch": 0.49139280125195617, "grad_norm": 1.0597243845431228, "learning_rate": 5.621666577667779e-06, "loss": 0.4107, "step": 9106 }, { "epoch": 0.49144676488047057, "grad_norm": 0.9343221986159986, "learning_rate": 5.620903961986383e-06, "loss": 0.3732, "step": 9107 }, { "epoch": 0.49150072850898496, "grad_norm": 0.9614633658009251, "learning_rate": 5.620141342830083e-06, "loss": 0.4382, "step": 9108 }, { "epoch": 0.4915546921374993, "grad_norm": 1.001043862398232, "learning_rate": 5.619378720220803e-06, "loss": 0.5076, "step": 9109 }, { "epoch": 0.4916086557660137, "grad_norm": 0.9541530184519891, "learning_rate": 5.618616094180455e-06, "loss": 0.4421, "step": 9110 }, { "epoch": 0.4916626193945281, "grad_norm": 1.182443582300601, "learning_rate": 5.617853464730961e-06, "loss": 0.4961, "step": 9111 }, { "epoch": 0.49171658302304244, "grad_norm": 1.0014452499363702, "learning_rate": 5.617090831894242e-06, "loss": 0.3892, "step": 9112 }, { "epoch": 0.49177054665155684, "grad_norm": 0.984245254941679, "learning_rate": 5.616328195692211e-06, "loss": 0.5264, "step": 9113 }, { "epoch": 0.49182451028007124, "grad_norm": 0.9097772916548543, "learning_rate": 5.615565556146792e-06, "loss": 0.445, "step": 9114 }, { "epoch": 0.49187847390858563, "grad_norm": 0.9628613731593044, "learning_rate": 5.614802913279901e-06, "loss": 0.5766, "step": 9115 }, { "epoch": 0.4919324375371, "grad_norm": 0.8477536070441106, "learning_rate": 5.614040267113459e-06, "loss": 0.3414, "step": 9116 }, { "epoch": 0.4919864011656144, "grad_norm": 0.9971225396369336, "learning_rate": 5.6132776176693844e-06, "loss": 0.4782, "step": 9117 }, { "epoch": 0.49204036479412877, "grad_norm": 0.8657982783810172, "learning_rate": 5.612514964969596e-06, "loss": 0.3855, "step": 9118 }, { "epoch": 0.4920943284226431, "grad_norm": 1.05908034585393, "learning_rate": 5.611752309036015e-06, "loss": 0.5481, "step": 9119 }, { "epoch": 0.4921482920511575, "grad_norm": 1.054190345423335, "learning_rate": 5.610989649890559e-06, "loss": 0.4964, "step": 9120 }, { "epoch": 0.4922022556796719, "grad_norm": 1.147373574075141, "learning_rate": 5.610226987555148e-06, "loss": 0.4547, "step": 9121 }, { "epoch": 0.4922562193081863, "grad_norm": 1.2670287892522556, "learning_rate": 5.609464322051702e-06, "loss": 0.5442, "step": 9122 }, { "epoch": 0.49231018293670065, "grad_norm": 0.9524589264005422, "learning_rate": 5.608701653402141e-06, "loss": 0.4617, "step": 9123 }, { "epoch": 0.49236414656521504, "grad_norm": 1.0393432499066622, "learning_rate": 5.607938981628385e-06, "loss": 0.5551, "step": 9124 }, { "epoch": 0.49241811019372944, "grad_norm": 0.9679025535074631, "learning_rate": 5.607176306752352e-06, "loss": 0.5185, "step": 9125 }, { "epoch": 0.4924720738222438, "grad_norm": 0.9778398508519132, "learning_rate": 5.606413628795965e-06, "loss": 0.5109, "step": 9126 }, { "epoch": 0.4925260374507582, "grad_norm": 0.97562800218688, "learning_rate": 5.605650947781143e-06, "loss": 0.576, "step": 9127 }, { "epoch": 0.4925800010792726, "grad_norm": 1.0387596127610503, "learning_rate": 5.604888263729805e-06, "loss": 0.4762, "step": 9128 }, { "epoch": 0.492633964707787, "grad_norm": 1.2629110189231894, "learning_rate": 5.604125576663872e-06, "loss": 0.6897, "step": 9129 }, { "epoch": 0.4926879283363013, "grad_norm": 0.9378347697326365, "learning_rate": 5.603362886605267e-06, "loss": 0.3219, "step": 9130 }, { "epoch": 0.4927418919648157, "grad_norm": 1.0130724593233944, "learning_rate": 5.6026001935759076e-06, "loss": 0.5077, "step": 9131 }, { "epoch": 0.4927958555933301, "grad_norm": 0.7874646816805916, "learning_rate": 5.601837497597714e-06, "loss": 0.3519, "step": 9132 }, { "epoch": 0.49284981922184445, "grad_norm": 1.1014547059404938, "learning_rate": 5.6010747986926086e-06, "loss": 0.7056, "step": 9133 }, { "epoch": 0.49290378285035885, "grad_norm": 1.1006761529465736, "learning_rate": 5.600312096882511e-06, "loss": 0.641, "step": 9134 }, { "epoch": 0.49295774647887325, "grad_norm": 0.9865244187156262, "learning_rate": 5.5995493921893415e-06, "loss": 0.5426, "step": 9135 }, { "epoch": 0.49301171010738765, "grad_norm": 1.1003810468793087, "learning_rate": 5.598786684635022e-06, "loss": 0.534, "step": 9136 }, { "epoch": 0.493065673735902, "grad_norm": 1.1356140947380464, "learning_rate": 5.598023974241475e-06, "loss": 0.5856, "step": 9137 }, { "epoch": 0.4931196373644164, "grad_norm": 0.9528600031966235, "learning_rate": 5.597261261030619e-06, "loss": 0.4131, "step": 9138 }, { "epoch": 0.4931736009929308, "grad_norm": 0.9006135093163227, "learning_rate": 5.596498545024376e-06, "loss": 0.4254, "step": 9139 }, { "epoch": 0.4932275646214451, "grad_norm": 0.8812376560515882, "learning_rate": 5.595735826244668e-06, "loss": 0.4722, "step": 9140 }, { "epoch": 0.4932815282499595, "grad_norm": 0.9026527212597094, "learning_rate": 5.594973104713415e-06, "loss": 0.3664, "step": 9141 }, { "epoch": 0.4933354918784739, "grad_norm": 0.9195701211827959, "learning_rate": 5.594210380452538e-06, "loss": 0.4012, "step": 9142 }, { "epoch": 0.4933894555069883, "grad_norm": 1.0571645343750486, "learning_rate": 5.593447653483961e-06, "loss": 0.4507, "step": 9143 }, { "epoch": 0.49344341913550266, "grad_norm": 0.8309722422917568, "learning_rate": 5.592684923829603e-06, "loss": 0.3504, "step": 9144 }, { "epoch": 0.49349738276401706, "grad_norm": 0.9683668234971872, "learning_rate": 5.591922191511387e-06, "loss": 0.4697, "step": 9145 }, { "epoch": 0.49355134639253145, "grad_norm": 1.1082981706169845, "learning_rate": 5.591159456551234e-06, "loss": 0.5393, "step": 9146 }, { "epoch": 0.4936053100210458, "grad_norm": 0.9223557175453133, "learning_rate": 5.590396718971067e-06, "loss": 0.4114, "step": 9147 }, { "epoch": 0.4936592736495602, "grad_norm": 0.9308925023962136, "learning_rate": 5.589633978792808e-06, "loss": 0.4053, "step": 9148 }, { "epoch": 0.4937132372780746, "grad_norm": 0.754557871028483, "learning_rate": 5.588871236038376e-06, "loss": 0.3232, "step": 9149 }, { "epoch": 0.49376720090658893, "grad_norm": 1.1737933400497087, "learning_rate": 5.588108490729694e-06, "loss": 0.5379, "step": 9150 }, { "epoch": 0.49382116453510333, "grad_norm": 1.0854744385004083, "learning_rate": 5.587345742888687e-06, "loss": 0.5697, "step": 9151 }, { "epoch": 0.4938751281636177, "grad_norm": 1.0048893362232754, "learning_rate": 5.586582992537273e-06, "loss": 0.489, "step": 9152 }, { "epoch": 0.4939290917921321, "grad_norm": 0.9457083795549985, "learning_rate": 5.5858202396973785e-06, "loss": 0.4913, "step": 9153 }, { "epoch": 0.49398305542064647, "grad_norm": 1.584170562344879, "learning_rate": 5.585057484390923e-06, "loss": 0.7078, "step": 9154 }, { "epoch": 0.49403701904916086, "grad_norm": 1.1707641613345312, "learning_rate": 5.584294726639828e-06, "loss": 0.5097, "step": 9155 }, { "epoch": 0.49409098267767526, "grad_norm": 1.3367624003119714, "learning_rate": 5.583531966466019e-06, "loss": 0.7103, "step": 9156 }, { "epoch": 0.4941449463061896, "grad_norm": 1.1309348125955154, "learning_rate": 5.582769203891417e-06, "loss": 0.438, "step": 9157 }, { "epoch": 0.494198909934704, "grad_norm": 0.9272721258130063, "learning_rate": 5.582006438937943e-06, "loss": 0.5267, "step": 9158 }, { "epoch": 0.4942528735632184, "grad_norm": 1.1768842573059317, "learning_rate": 5.581243671627522e-06, "loss": 0.5009, "step": 9159 }, { "epoch": 0.4943068371917328, "grad_norm": 1.162302490526769, "learning_rate": 5.580480901982076e-06, "loss": 0.5607, "step": 9160 }, { "epoch": 0.49436080082024714, "grad_norm": 0.8319147912767002, "learning_rate": 5.5797181300235285e-06, "loss": 0.3761, "step": 9161 }, { "epoch": 0.49441476444876153, "grad_norm": 0.7800196772310112, "learning_rate": 5.5789553557737996e-06, "loss": 0.3261, "step": 9162 }, { "epoch": 0.49446872807727593, "grad_norm": 1.1257978153167363, "learning_rate": 5.578192579254814e-06, "loss": 0.5072, "step": 9163 }, { "epoch": 0.4945226917057903, "grad_norm": 0.917675092514413, "learning_rate": 5.5774298004884965e-06, "loss": 0.4871, "step": 9164 }, { "epoch": 0.49457665533430467, "grad_norm": 0.9970625752675731, "learning_rate": 5.576667019496767e-06, "loss": 0.5463, "step": 9165 }, { "epoch": 0.49463061896281907, "grad_norm": 1.316769497799427, "learning_rate": 5.5759042363015505e-06, "loss": 0.5336, "step": 9166 }, { "epoch": 0.49468458259133347, "grad_norm": 1.0351388890162914, "learning_rate": 5.575141450924768e-06, "loss": 0.6001, "step": 9167 }, { "epoch": 0.4947385462198478, "grad_norm": 0.916664722167747, "learning_rate": 5.574378663388346e-06, "loss": 0.3953, "step": 9168 }, { "epoch": 0.4947925098483622, "grad_norm": 0.996479421177259, "learning_rate": 5.573615873714204e-06, "loss": 0.4465, "step": 9169 }, { "epoch": 0.4948464734768766, "grad_norm": 0.9919557817409324, "learning_rate": 5.572853081924269e-06, "loss": 0.4387, "step": 9170 }, { "epoch": 0.49490043710539094, "grad_norm": 1.1587231129343423, "learning_rate": 5.5720902880404635e-06, "loss": 0.3653, "step": 9171 }, { "epoch": 0.49495440073390534, "grad_norm": 0.8569568172603038, "learning_rate": 5.57132749208471e-06, "loss": 0.4106, "step": 9172 }, { "epoch": 0.49500836436241974, "grad_norm": 1.0413459163096053, "learning_rate": 5.5705646940789305e-06, "loss": 0.4568, "step": 9173 }, { "epoch": 0.49506232799093414, "grad_norm": 0.9020983332997344, "learning_rate": 5.569801894045052e-06, "loss": 0.41, "step": 9174 }, { "epoch": 0.4951162916194485, "grad_norm": 0.9899595180164241, "learning_rate": 5.569039092004995e-06, "loss": 0.5751, "step": 9175 }, { "epoch": 0.4951702552479629, "grad_norm": 0.9368445909766963, "learning_rate": 5.568276287980686e-06, "loss": 0.4496, "step": 9176 }, { "epoch": 0.4952242188764773, "grad_norm": 1.0512549977278118, "learning_rate": 5.567513481994047e-06, "loss": 0.4305, "step": 9177 }, { "epoch": 0.4952781825049916, "grad_norm": 1.1869171094548334, "learning_rate": 5.566750674067003e-06, "loss": 0.4879, "step": 9178 }, { "epoch": 0.495332146133506, "grad_norm": 0.8402338715223964, "learning_rate": 5.565987864221476e-06, "loss": 0.3184, "step": 9179 }, { "epoch": 0.4953861097620204, "grad_norm": 0.956738886567115, "learning_rate": 5.565225052479391e-06, "loss": 0.4244, "step": 9180 }, { "epoch": 0.49544007339053475, "grad_norm": 0.7991380589482813, "learning_rate": 5.5644622388626745e-06, "loss": 0.387, "step": 9181 }, { "epoch": 0.49549403701904915, "grad_norm": 0.965212856278813, "learning_rate": 5.563699423393248e-06, "loss": 0.4728, "step": 9182 }, { "epoch": 0.49554800064756355, "grad_norm": 0.8815015073861363, "learning_rate": 5.562936606093033e-06, "loss": 0.3656, "step": 9183 }, { "epoch": 0.49560196427607794, "grad_norm": 0.9814643405929492, "learning_rate": 5.562173786983958e-06, "loss": 0.3579, "step": 9184 }, { "epoch": 0.4956559279045923, "grad_norm": 0.7747149258364787, "learning_rate": 5.561410966087945e-06, "loss": 0.3506, "step": 9185 }, { "epoch": 0.4957098915331067, "grad_norm": 1.241510409918767, "learning_rate": 5.560648143426919e-06, "loss": 0.6019, "step": 9186 }, { "epoch": 0.4957638551616211, "grad_norm": 0.9719412275299124, "learning_rate": 5.559885319022803e-06, "loss": 0.4331, "step": 9187 }, { "epoch": 0.4958178187901354, "grad_norm": 0.9079525774259637, "learning_rate": 5.559122492897525e-06, "loss": 0.419, "step": 9188 }, { "epoch": 0.4958717824186498, "grad_norm": 0.9050783759407539, "learning_rate": 5.5583596650730055e-06, "loss": 0.4644, "step": 9189 }, { "epoch": 0.4959257460471642, "grad_norm": 0.7765840191259948, "learning_rate": 5.55759683557117e-06, "loss": 0.361, "step": 9190 }, { "epoch": 0.4959797096756786, "grad_norm": 0.892423073722221, "learning_rate": 5.5568340044139425e-06, "loss": 0.3408, "step": 9191 }, { "epoch": 0.49603367330419296, "grad_norm": 0.9367320150823351, "learning_rate": 5.55607117162325e-06, "loss": 0.3644, "step": 9192 }, { "epoch": 0.49608763693270735, "grad_norm": 1.0959787115678488, "learning_rate": 5.555308337221014e-06, "loss": 0.4929, "step": 9193 }, { "epoch": 0.49614160056122175, "grad_norm": 0.9750656341521331, "learning_rate": 5.55454550122916e-06, "loss": 0.4788, "step": 9194 }, { "epoch": 0.4961955641897361, "grad_norm": 1.3758883528219292, "learning_rate": 5.5537826636696155e-06, "loss": 0.6005, "step": 9195 }, { "epoch": 0.4962495278182505, "grad_norm": 1.0745855843060585, "learning_rate": 5.553019824564301e-06, "loss": 0.4725, "step": 9196 }, { "epoch": 0.4963034914467649, "grad_norm": 1.0661749098050677, "learning_rate": 5.552256983935144e-06, "loss": 0.51, "step": 9197 }, { "epoch": 0.4963574550752793, "grad_norm": 0.9633597528343006, "learning_rate": 5.551494141804068e-06, "loss": 0.4341, "step": 9198 }, { "epoch": 0.4964114187037936, "grad_norm": 1.00159650606954, "learning_rate": 5.550731298193e-06, "loss": 0.5516, "step": 9199 }, { "epoch": 0.496465382332308, "grad_norm": 1.3447225878837499, "learning_rate": 5.549968453123861e-06, "loss": 0.6996, "step": 9200 }, { "epoch": 0.4965193459608224, "grad_norm": 1.280700006308369, "learning_rate": 5.549205606618578e-06, "loss": 0.5394, "step": 9201 }, { "epoch": 0.49657330958933676, "grad_norm": 1.1108567295515825, "learning_rate": 5.548442758699077e-06, "loss": 0.5923, "step": 9202 }, { "epoch": 0.49662727321785116, "grad_norm": 1.1432750104183798, "learning_rate": 5.547679909387282e-06, "loss": 0.5436, "step": 9203 }, { "epoch": 0.49668123684636556, "grad_norm": 1.0983881682648324, "learning_rate": 5.546917058705119e-06, "loss": 0.5283, "step": 9204 }, { "epoch": 0.49673520047487996, "grad_norm": 1.2771346967703645, "learning_rate": 5.5461542066745114e-06, "loss": 0.7373, "step": 9205 }, { "epoch": 0.4967891641033943, "grad_norm": 0.971490736383745, "learning_rate": 5.545391353317386e-06, "loss": 0.5015, "step": 9206 }, { "epoch": 0.4968431277319087, "grad_norm": 0.8623511271125768, "learning_rate": 5.544628498655666e-06, "loss": 0.4362, "step": 9207 }, { "epoch": 0.4968970913604231, "grad_norm": 1.1503290824124768, "learning_rate": 5.543865642711279e-06, "loss": 0.4935, "step": 9208 }, { "epoch": 0.49695105498893744, "grad_norm": 1.0619661857935878, "learning_rate": 5.54310278550615e-06, "loss": 0.578, "step": 9209 }, { "epoch": 0.49700501861745183, "grad_norm": 0.9650471928730295, "learning_rate": 5.542339927062201e-06, "loss": 0.3765, "step": 9210 }, { "epoch": 0.49705898224596623, "grad_norm": 0.794416352227625, "learning_rate": 5.541577067401361e-06, "loss": 0.3715, "step": 9211 }, { "epoch": 0.4971129458744806, "grad_norm": 1.0229566478639398, "learning_rate": 5.540814206545554e-06, "loss": 0.4574, "step": 9212 }, { "epoch": 0.49716690950299497, "grad_norm": 0.8964337511047894, "learning_rate": 5.5400513445167056e-06, "loss": 0.3848, "step": 9213 }, { "epoch": 0.49722087313150937, "grad_norm": 1.0810765826900421, "learning_rate": 5.539288481336742e-06, "loss": 0.5469, "step": 9214 }, { "epoch": 0.49727483676002376, "grad_norm": 1.0535324683642482, "learning_rate": 5.538525617027586e-06, "loss": 0.4165, "step": 9215 }, { "epoch": 0.4973288003885381, "grad_norm": 0.9027860304592159, "learning_rate": 5.537762751611166e-06, "loss": 0.4026, "step": 9216 }, { "epoch": 0.4973827640170525, "grad_norm": 0.7965163023205577, "learning_rate": 5.536999885109406e-06, "loss": 0.339, "step": 9217 }, { "epoch": 0.4974367276455669, "grad_norm": 1.0239802585658118, "learning_rate": 5.536237017544232e-06, "loss": 0.3952, "step": 9218 }, { "epoch": 0.49749069127408124, "grad_norm": 0.9568623628908487, "learning_rate": 5.535474148937569e-06, "loss": 0.3961, "step": 9219 }, { "epoch": 0.49754465490259564, "grad_norm": 1.093409086915177, "learning_rate": 5.534711279311344e-06, "loss": 0.5768, "step": 9220 }, { "epoch": 0.49759861853111004, "grad_norm": 1.1255604276549973, "learning_rate": 5.5339484086874815e-06, "loss": 0.4515, "step": 9221 }, { "epoch": 0.49765258215962443, "grad_norm": 1.086018540774263, "learning_rate": 5.533185537087906e-06, "loss": 0.5212, "step": 9222 }, { "epoch": 0.4977065457881388, "grad_norm": 1.0183055843779207, "learning_rate": 5.532422664534547e-06, "loss": 0.5544, "step": 9223 }, { "epoch": 0.4977605094166532, "grad_norm": 1.3098950719552949, "learning_rate": 5.531659791049326e-06, "loss": 0.6574, "step": 9224 }, { "epoch": 0.49781447304516757, "grad_norm": 0.9864156168474227, "learning_rate": 5.530896916654171e-06, "loss": 0.3561, "step": 9225 }, { "epoch": 0.4978684366736819, "grad_norm": 0.9766306184664869, "learning_rate": 5.530134041371008e-06, "loss": 0.4734, "step": 9226 }, { "epoch": 0.4979224003021963, "grad_norm": 1.2778619486007325, "learning_rate": 5.529371165221761e-06, "loss": 0.6621, "step": 9227 }, { "epoch": 0.4979763639307107, "grad_norm": 1.1254282599609082, "learning_rate": 5.528608288228359e-06, "loss": 0.4042, "step": 9228 }, { "epoch": 0.4980303275592251, "grad_norm": 0.9641622553088225, "learning_rate": 5.527845410412725e-06, "loss": 0.4534, "step": 9229 }, { "epoch": 0.49808429118773945, "grad_norm": 0.9693040039312797, "learning_rate": 5.5270825317967855e-06, "loss": 0.4398, "step": 9230 }, { "epoch": 0.49813825481625384, "grad_norm": 0.9995685021550145, "learning_rate": 5.5263196524024665e-06, "loss": 0.4426, "step": 9231 }, { "epoch": 0.49819221844476824, "grad_norm": 1.0956018001855417, "learning_rate": 5.525556772251694e-06, "loss": 0.4537, "step": 9232 }, { "epoch": 0.4982461820732826, "grad_norm": 0.8623665474077875, "learning_rate": 5.524793891366396e-06, "loss": 0.3951, "step": 9233 }, { "epoch": 0.498300145701797, "grad_norm": 1.0760636575520381, "learning_rate": 5.524031009768494e-06, "loss": 0.4087, "step": 9234 }, { "epoch": 0.4983541093303114, "grad_norm": 1.1893784505041916, "learning_rate": 5.523268127479916e-06, "loss": 0.5255, "step": 9235 }, { "epoch": 0.4984080729588258, "grad_norm": 1.042270717143089, "learning_rate": 5.5225052445225906e-06, "loss": 0.4471, "step": 9236 }, { "epoch": 0.4984620365873401, "grad_norm": 1.1287401873514937, "learning_rate": 5.5217423609184416e-06, "loss": 0.5576, "step": 9237 }, { "epoch": 0.4985160002158545, "grad_norm": 1.2856432888116804, "learning_rate": 5.520979476689394e-06, "loss": 0.4732, "step": 9238 }, { "epoch": 0.4985699638443689, "grad_norm": 0.8867049112659914, "learning_rate": 5.520216591857376e-06, "loss": 0.3618, "step": 9239 }, { "epoch": 0.49862392747288325, "grad_norm": 1.0127296504223997, "learning_rate": 5.519453706444312e-06, "loss": 0.3582, "step": 9240 }, { "epoch": 0.49867789110139765, "grad_norm": 0.9843132711681426, "learning_rate": 5.518690820472129e-06, "loss": 0.4582, "step": 9241 }, { "epoch": 0.49873185472991205, "grad_norm": 0.8510992699167186, "learning_rate": 5.5179279339627535e-06, "loss": 0.3387, "step": 9242 }, { "epoch": 0.49878581835842645, "grad_norm": 0.97645120635193, "learning_rate": 5.517165046938111e-06, "loss": 0.4712, "step": 9243 }, { "epoch": 0.4988397819869408, "grad_norm": 0.9981143885678362, "learning_rate": 5.516402159420128e-06, "loss": 0.42, "step": 9244 }, { "epoch": 0.4988937456154552, "grad_norm": 1.1732276826333454, "learning_rate": 5.51563927143073e-06, "loss": 0.6728, "step": 9245 }, { "epoch": 0.4989477092439696, "grad_norm": 0.8261095128713817, "learning_rate": 5.514876382991841e-06, "loss": 0.3446, "step": 9246 }, { "epoch": 0.4990016728724839, "grad_norm": 0.925854981310481, "learning_rate": 5.514113494125393e-06, "loss": 0.4093, "step": 9247 }, { "epoch": 0.4990556365009983, "grad_norm": 1.0880842125976935, "learning_rate": 5.513350604853308e-06, "loss": 0.6809, "step": 9248 }, { "epoch": 0.4991096001295127, "grad_norm": 1.2506835701187293, "learning_rate": 5.512587715197514e-06, "loss": 0.4694, "step": 9249 }, { "epoch": 0.49916356375802706, "grad_norm": 0.9370986148548095, "learning_rate": 5.511824825179935e-06, "loss": 0.3858, "step": 9250 }, { "epoch": 0.49921752738654146, "grad_norm": 1.0524354732457204, "learning_rate": 5.511061934822499e-06, "loss": 0.4471, "step": 9251 }, { "epoch": 0.49927149101505586, "grad_norm": 0.9602750206154824, "learning_rate": 5.510299044147133e-06, "loss": 0.52, "step": 9252 }, { "epoch": 0.49932545464357025, "grad_norm": 1.177104723010309, "learning_rate": 5.50953615317576e-06, "loss": 0.523, "step": 9253 }, { "epoch": 0.4993794182720846, "grad_norm": 0.966807735667726, "learning_rate": 5.508773261930309e-06, "loss": 0.3839, "step": 9254 }, { "epoch": 0.499433381900599, "grad_norm": 1.003084264183784, "learning_rate": 5.508010370432705e-06, "loss": 0.4512, "step": 9255 }, { "epoch": 0.4994873455291134, "grad_norm": 0.9322349644105417, "learning_rate": 5.5072474787048754e-06, "loss": 0.3882, "step": 9256 }, { "epoch": 0.49954130915762773, "grad_norm": 0.9147706700142655, "learning_rate": 5.506484586768746e-06, "loss": 0.5042, "step": 9257 }, { "epoch": 0.49959527278614213, "grad_norm": 0.9575750800222728, "learning_rate": 5.5057216946462436e-06, "loss": 0.4438, "step": 9258 }, { "epoch": 0.49964923641465653, "grad_norm": 0.9482651653102838, "learning_rate": 5.504958802359294e-06, "loss": 0.418, "step": 9259 }, { "epoch": 0.4997032000431709, "grad_norm": 1.006118194211023, "learning_rate": 5.504195909929823e-06, "loss": 0.5049, "step": 9260 }, { "epoch": 0.49975716367168527, "grad_norm": 1.3057530844110719, "learning_rate": 5.5034330173797566e-06, "loss": 0.5077, "step": 9261 }, { "epoch": 0.49981112730019966, "grad_norm": 1.178316076609424, "learning_rate": 5.502670124731022e-06, "loss": 0.6718, "step": 9262 }, { "epoch": 0.49986509092871406, "grad_norm": 0.8997310399230333, "learning_rate": 5.501907232005547e-06, "loss": 0.4621, "step": 9263 }, { "epoch": 0.4999190545572284, "grad_norm": 1.02915296005842, "learning_rate": 5.5011443392252554e-06, "loss": 0.4749, "step": 9264 }, { "epoch": 0.4999730181857428, "grad_norm": 0.9485332686788208, "learning_rate": 5.500381446412073e-06, "loss": 0.389, "step": 9265 }, { "epoch": 0.5000269818142572, "grad_norm": 0.8658289734556776, "learning_rate": 5.499618553587929e-06, "loss": 0.3832, "step": 9266 }, { "epoch": 0.5000809454427716, "grad_norm": 1.0496372779751484, "learning_rate": 5.498855660774747e-06, "loss": 0.5114, "step": 9267 }, { "epoch": 0.500134909071286, "grad_norm": 1.0776992093626383, "learning_rate": 5.4980927679944544e-06, "loss": 0.5814, "step": 9268 }, { "epoch": 0.5001888726998003, "grad_norm": 1.0385389210045457, "learning_rate": 5.4973298752689795e-06, "loss": 0.4194, "step": 9269 }, { "epoch": 0.5002428363283147, "grad_norm": 1.2160638007009277, "learning_rate": 5.496566982620245e-06, "loss": 0.4903, "step": 9270 }, { "epoch": 0.5002967999568291, "grad_norm": 1.1032424548174509, "learning_rate": 5.495804090070179e-06, "loss": 0.5506, "step": 9271 }, { "epoch": 0.5003507635853435, "grad_norm": 1.0774081891951837, "learning_rate": 5.495041197640708e-06, "loss": 0.5145, "step": 9272 }, { "epoch": 0.5004047272138579, "grad_norm": 1.138146251468353, "learning_rate": 5.494278305353757e-06, "loss": 0.5634, "step": 9273 }, { "epoch": 0.5004586908423723, "grad_norm": 0.9021893819040978, "learning_rate": 5.493515413231255e-06, "loss": 0.4193, "step": 9274 }, { "epoch": 0.5005126544708867, "grad_norm": 1.1440714437176214, "learning_rate": 5.492752521295125e-06, "loss": 0.4581, "step": 9275 }, { "epoch": 0.500566618099401, "grad_norm": 1.05128732285786, "learning_rate": 5.4919896295672966e-06, "loss": 0.5209, "step": 9276 }, { "epoch": 0.5006205817279153, "grad_norm": 1.1391533285256124, "learning_rate": 5.491226738069694e-06, "loss": 0.5411, "step": 9277 }, { "epoch": 0.5006745453564297, "grad_norm": 1.112966057322348, "learning_rate": 5.490463846824242e-06, "loss": 0.5193, "step": 9278 }, { "epoch": 0.5007285089849441, "grad_norm": 1.0982054750071115, "learning_rate": 5.48970095585287e-06, "loss": 0.6495, "step": 9279 }, { "epoch": 0.5007824726134585, "grad_norm": 1.327618210267992, "learning_rate": 5.488938065177503e-06, "loss": 0.6983, "step": 9280 }, { "epoch": 0.5008364362419729, "grad_norm": 0.9109597438027638, "learning_rate": 5.488175174820066e-06, "loss": 0.4684, "step": 9281 }, { "epoch": 0.5008903998704873, "grad_norm": 0.9343854830522487, "learning_rate": 5.487412284802488e-06, "loss": 0.4773, "step": 9282 }, { "epoch": 0.5009443634990016, "grad_norm": 0.9932142469256169, "learning_rate": 5.486649395146694e-06, "loss": 0.4988, "step": 9283 }, { "epoch": 0.500998327127516, "grad_norm": 0.8842115771216675, "learning_rate": 5.485886505874608e-06, "loss": 0.42, "step": 9284 }, { "epoch": 0.5010522907560304, "grad_norm": 1.0530549179037192, "learning_rate": 5.485123617008159e-06, "loss": 0.4296, "step": 9285 }, { "epoch": 0.5011062543845448, "grad_norm": 0.9727413825082329, "learning_rate": 5.4843607285692724e-06, "loss": 0.386, "step": 9286 }, { "epoch": 0.5011602180130592, "grad_norm": 1.0484008924346282, "learning_rate": 5.483597840579874e-06, "loss": 0.493, "step": 9287 }, { "epoch": 0.5012141816415736, "grad_norm": 0.7750573658290201, "learning_rate": 5.48283495306189e-06, "loss": 0.3215, "step": 9288 }, { "epoch": 0.501268145270088, "grad_norm": 0.8958063048715763, "learning_rate": 5.482072066037247e-06, "loss": 0.4441, "step": 9289 }, { "epoch": 0.5013221088986023, "grad_norm": 1.3033664504080293, "learning_rate": 5.4813091795278725e-06, "loss": 0.4811, "step": 9290 }, { "epoch": 0.5013760725271167, "grad_norm": 1.0047672702741697, "learning_rate": 5.48054629355569e-06, "loss": 0.464, "step": 9291 }, { "epoch": 0.5014300361556311, "grad_norm": 0.975623346071451, "learning_rate": 5.479783408142626e-06, "loss": 0.5071, "step": 9292 }, { "epoch": 0.5014839997841455, "grad_norm": 0.9963806716973526, "learning_rate": 5.479020523310607e-06, "loss": 0.5277, "step": 9293 }, { "epoch": 0.5015379634126599, "grad_norm": 0.9461420698157814, "learning_rate": 5.47825763908156e-06, "loss": 0.4261, "step": 9294 }, { "epoch": 0.5015919270411743, "grad_norm": 0.8015498138152314, "learning_rate": 5.477494755477411e-06, "loss": 0.3557, "step": 9295 }, { "epoch": 0.5016458906696887, "grad_norm": 1.1572102211494004, "learning_rate": 5.476731872520083e-06, "loss": 0.6191, "step": 9296 }, { "epoch": 0.501699854298203, "grad_norm": 1.5270574786718074, "learning_rate": 5.4759689902315095e-06, "loss": 0.8715, "step": 9297 }, { "epoch": 0.5017538179267174, "grad_norm": 1.0661110022046727, "learning_rate": 5.475206108633608e-06, "loss": 0.4475, "step": 9298 }, { "epoch": 0.5018077815552318, "grad_norm": 1.0660481773833614, "learning_rate": 5.4744432277483086e-06, "loss": 0.4851, "step": 9299 }, { "epoch": 0.5018617451837462, "grad_norm": 1.2121572373473974, "learning_rate": 5.473680347597535e-06, "loss": 0.713, "step": 9300 }, { "epoch": 0.5019157088122606, "grad_norm": 0.9451347813859317, "learning_rate": 5.472917468203216e-06, "loss": 0.3828, "step": 9301 }, { "epoch": 0.501969672440775, "grad_norm": 0.9571776064547108, "learning_rate": 5.472154589587276e-06, "loss": 0.4208, "step": 9302 }, { "epoch": 0.5020236360692893, "grad_norm": 1.2981520078704187, "learning_rate": 5.471391711771642e-06, "loss": 0.5711, "step": 9303 }, { "epoch": 0.5020775996978036, "grad_norm": 0.9095358867037191, "learning_rate": 5.47062883477824e-06, "loss": 0.3791, "step": 9304 }, { "epoch": 0.502131563326318, "grad_norm": 0.946052561951035, "learning_rate": 5.4698659586289934e-06, "loss": 0.4169, "step": 9305 }, { "epoch": 0.5021855269548324, "grad_norm": 0.7803281694778149, "learning_rate": 5.46910308334583e-06, "loss": 0.3566, "step": 9306 }, { "epoch": 0.5022394905833468, "grad_norm": 1.1505084518122253, "learning_rate": 5.468340208950675e-06, "loss": 0.5978, "step": 9307 }, { "epoch": 0.5022934542118612, "grad_norm": 1.041247158163933, "learning_rate": 5.467577335465455e-06, "loss": 0.5054, "step": 9308 }, { "epoch": 0.5023474178403756, "grad_norm": 1.1785699052511733, "learning_rate": 5.4668144629120945e-06, "loss": 0.5466, "step": 9309 }, { "epoch": 0.50240138146889, "grad_norm": 1.142400932213463, "learning_rate": 5.46605159131252e-06, "loss": 0.5006, "step": 9310 }, { "epoch": 0.5024553450974043, "grad_norm": 1.0966349856264155, "learning_rate": 5.465288720688659e-06, "loss": 0.5167, "step": 9311 }, { "epoch": 0.5025093087259187, "grad_norm": 1.104963842653995, "learning_rate": 5.464525851062433e-06, "loss": 0.4846, "step": 9312 }, { "epoch": 0.5025632723544331, "grad_norm": 1.0829797624443036, "learning_rate": 5.4637629824557705e-06, "loss": 0.4616, "step": 9313 }, { "epoch": 0.5026172359829475, "grad_norm": 0.8689456465601028, "learning_rate": 5.463000114890595e-06, "loss": 0.4699, "step": 9314 }, { "epoch": 0.5026711996114619, "grad_norm": 0.7422690248079703, "learning_rate": 5.462237248388835e-06, "loss": 0.2795, "step": 9315 }, { "epoch": 0.5027251632399763, "grad_norm": 1.0830910701152139, "learning_rate": 5.461474382972415e-06, "loss": 0.4245, "step": 9316 }, { "epoch": 0.5027791268684907, "grad_norm": 1.2100651752704135, "learning_rate": 5.460711518663259e-06, "loss": 0.5455, "step": 9317 }, { "epoch": 0.502833090497005, "grad_norm": 0.9480068540747455, "learning_rate": 5.459948655483296e-06, "loss": 0.4692, "step": 9318 }, { "epoch": 0.5028870541255194, "grad_norm": 0.8907689997269927, "learning_rate": 5.459185793454448e-06, "loss": 0.4237, "step": 9319 }, { "epoch": 0.5029410177540338, "grad_norm": 1.0470853913637472, "learning_rate": 5.45842293259864e-06, "loss": 0.5164, "step": 9320 }, { "epoch": 0.5029949813825482, "grad_norm": 0.9642900590236484, "learning_rate": 5.4576600729378e-06, "loss": 0.3899, "step": 9321 }, { "epoch": 0.5030489450110626, "grad_norm": 1.1159835458701577, "learning_rate": 5.456897214493851e-06, "loss": 0.5039, "step": 9322 }, { "epoch": 0.503102908639577, "grad_norm": 1.1579526554987467, "learning_rate": 5.4561343572887215e-06, "loss": 0.4681, "step": 9323 }, { "epoch": 0.5031568722680914, "grad_norm": 1.276968390987905, "learning_rate": 5.455371501344334e-06, "loss": 0.5481, "step": 9324 }, { "epoch": 0.5032108358966056, "grad_norm": 1.0388321532843348, "learning_rate": 5.454608646682617e-06, "loss": 0.6374, "step": 9325 }, { "epoch": 0.50326479952512, "grad_norm": 1.2064306882348976, "learning_rate": 5.453845793325491e-06, "loss": 0.6834, "step": 9326 }, { "epoch": 0.5033187631536344, "grad_norm": 1.2186904519316717, "learning_rate": 5.453082941294883e-06, "loss": 0.5471, "step": 9327 }, { "epoch": 0.5033727267821488, "grad_norm": 1.2250099647692714, "learning_rate": 5.452320090612719e-06, "loss": 0.4969, "step": 9328 }, { "epoch": 0.5034266904106632, "grad_norm": 1.021723245023374, "learning_rate": 5.451557241300924e-06, "loss": 0.4369, "step": 9329 }, { "epoch": 0.5034806540391776, "grad_norm": 0.9541591774335554, "learning_rate": 5.450794393381423e-06, "loss": 0.436, "step": 9330 }, { "epoch": 0.503534617667692, "grad_norm": 0.92751245803067, "learning_rate": 5.4500315468761414e-06, "loss": 0.4154, "step": 9331 }, { "epoch": 0.5035885812962063, "grad_norm": 1.135144226257816, "learning_rate": 5.449268701807003e-06, "loss": 0.5144, "step": 9332 }, { "epoch": 0.5036425449247207, "grad_norm": 1.2550214622349016, "learning_rate": 5.448505858195933e-06, "loss": 0.5789, "step": 9333 }, { "epoch": 0.5036965085532351, "grad_norm": 0.9298814035593941, "learning_rate": 5.447743016064858e-06, "loss": 0.4026, "step": 9334 }, { "epoch": 0.5037504721817495, "grad_norm": 0.7773789389700129, "learning_rate": 5.4469801754357e-06, "loss": 0.3697, "step": 9335 }, { "epoch": 0.5038044358102639, "grad_norm": 0.9350294791123803, "learning_rate": 5.446217336330386e-06, "loss": 0.4988, "step": 9336 }, { "epoch": 0.5038583994387783, "grad_norm": 1.2738846720526575, "learning_rate": 5.44545449877084e-06, "loss": 0.555, "step": 9337 }, { "epoch": 0.5039123630672926, "grad_norm": 0.924502726983835, "learning_rate": 5.444691662778989e-06, "loss": 0.4182, "step": 9338 }, { "epoch": 0.503966326695807, "grad_norm": 1.057086603303938, "learning_rate": 5.443928828376753e-06, "loss": 0.5273, "step": 9339 }, { "epoch": 0.5040202903243214, "grad_norm": 0.9676354184286606, "learning_rate": 5.44316599558606e-06, "loss": 0.4544, "step": 9340 }, { "epoch": 0.5040742539528358, "grad_norm": 1.3511246881178542, "learning_rate": 5.442403164428832e-06, "loss": 0.6052, "step": 9341 }, { "epoch": 0.5041282175813502, "grad_norm": 1.044260220963436, "learning_rate": 5.441640334926997e-06, "loss": 0.5119, "step": 9342 }, { "epoch": 0.5041821812098646, "grad_norm": 0.993627760405897, "learning_rate": 5.440877507102476e-06, "loss": 0.395, "step": 9343 }, { "epoch": 0.504236144838379, "grad_norm": 0.9960225566513855, "learning_rate": 5.440114680977196e-06, "loss": 0.4481, "step": 9344 }, { "epoch": 0.5042901084668933, "grad_norm": 0.9026869645671672, "learning_rate": 5.439351856573084e-06, "loss": 0.5248, "step": 9345 }, { "epoch": 0.5043440720954077, "grad_norm": 1.1328770918576176, "learning_rate": 5.438589033912057e-06, "loss": 0.427, "step": 9346 }, { "epoch": 0.5043980357239221, "grad_norm": 1.180925041264739, "learning_rate": 5.4378262130160445e-06, "loss": 0.703, "step": 9347 }, { "epoch": 0.5044519993524365, "grad_norm": 1.0564949353217257, "learning_rate": 5.437063393906969e-06, "loss": 0.5086, "step": 9348 }, { "epoch": 0.5045059629809509, "grad_norm": 1.0312572608066808, "learning_rate": 5.436300576606755e-06, "loss": 0.4891, "step": 9349 }, { "epoch": 0.5045599266094652, "grad_norm": 1.1668966911616434, "learning_rate": 5.435537761137327e-06, "loss": 0.6023, "step": 9350 }, { "epoch": 0.5046138902379796, "grad_norm": 1.1934751376681627, "learning_rate": 5.434774947520609e-06, "loss": 0.6304, "step": 9351 }, { "epoch": 0.5046678538664939, "grad_norm": 1.066615198981386, "learning_rate": 5.4340121357785246e-06, "loss": 0.526, "step": 9352 }, { "epoch": 0.5047218174950083, "grad_norm": 1.1245175247197177, "learning_rate": 5.433249325932999e-06, "loss": 0.4364, "step": 9353 }, { "epoch": 0.5047757811235227, "grad_norm": 0.957982125928268, "learning_rate": 5.4324865180059535e-06, "loss": 0.4167, "step": 9354 }, { "epoch": 0.5048297447520371, "grad_norm": 0.7667702722615145, "learning_rate": 5.431723712019315e-06, "loss": 0.2979, "step": 9355 }, { "epoch": 0.5048837083805515, "grad_norm": 1.0031592646840641, "learning_rate": 5.430960907995006e-06, "loss": 0.4156, "step": 9356 }, { "epoch": 0.5049376720090659, "grad_norm": 0.8468498740891265, "learning_rate": 5.43019810595495e-06, "loss": 0.3304, "step": 9357 }, { "epoch": 0.5049916356375803, "grad_norm": 1.0078863985549582, "learning_rate": 5.429435305921072e-06, "loss": 0.3914, "step": 9358 }, { "epoch": 0.5050455992660946, "grad_norm": 0.9361936802532981, "learning_rate": 5.428672507915293e-06, "loss": 0.4904, "step": 9359 }, { "epoch": 0.505099562894609, "grad_norm": 0.9259988693189075, "learning_rate": 5.42790971195954e-06, "loss": 0.4591, "step": 9360 }, { "epoch": 0.5051535265231234, "grad_norm": 0.9686011991388224, "learning_rate": 5.427146918075733e-06, "loss": 0.4575, "step": 9361 }, { "epoch": 0.5052074901516378, "grad_norm": 1.0954276125105042, "learning_rate": 5.426384126285798e-06, "loss": 0.5353, "step": 9362 }, { "epoch": 0.5052614537801522, "grad_norm": 1.1005415685953186, "learning_rate": 5.425621336611656e-06, "loss": 0.5944, "step": 9363 }, { "epoch": 0.5053154174086666, "grad_norm": 0.9103563331726027, "learning_rate": 5.4248585490752324e-06, "loss": 0.3945, "step": 9364 }, { "epoch": 0.505369381037181, "grad_norm": 1.0351248138314637, "learning_rate": 5.424095763698452e-06, "loss": 0.4892, "step": 9365 }, { "epoch": 0.5054233446656953, "grad_norm": 1.059695637475595, "learning_rate": 5.423332980503237e-06, "loss": 0.3783, "step": 9366 }, { "epoch": 0.5054773082942097, "grad_norm": 1.0411662641081108, "learning_rate": 5.422570199511507e-06, "loss": 0.5338, "step": 9367 }, { "epoch": 0.5055312719227241, "grad_norm": 1.0413124501990882, "learning_rate": 5.421807420745188e-06, "loss": 0.4821, "step": 9368 }, { "epoch": 0.5055852355512385, "grad_norm": 1.116746175507263, "learning_rate": 5.421044644226202e-06, "loss": 0.47, "step": 9369 }, { "epoch": 0.5056391991797529, "grad_norm": 0.8935173049739824, "learning_rate": 5.420281869976474e-06, "loss": 0.3627, "step": 9370 }, { "epoch": 0.5056931628082673, "grad_norm": 1.1863786720400902, "learning_rate": 5.419519098017925e-06, "loss": 0.5352, "step": 9371 }, { "epoch": 0.5057471264367817, "grad_norm": 1.051763762279436, "learning_rate": 5.418756328372477e-06, "loss": 0.3915, "step": 9372 }, { "epoch": 0.5058010900652959, "grad_norm": 1.048466154531086, "learning_rate": 5.417993561062059e-06, "loss": 0.3858, "step": 9373 }, { "epoch": 0.5058550536938103, "grad_norm": 1.0695401828727997, "learning_rate": 5.417230796108586e-06, "loss": 0.4799, "step": 9374 }, { "epoch": 0.5059090173223247, "grad_norm": 1.0065354471231704, "learning_rate": 5.416468033533982e-06, "loss": 0.351, "step": 9375 }, { "epoch": 0.5059629809508391, "grad_norm": 1.042728182586893, "learning_rate": 5.415705273360173e-06, "loss": 0.5501, "step": 9376 }, { "epoch": 0.5060169445793535, "grad_norm": 1.010689750315454, "learning_rate": 5.414942515609078e-06, "loss": 0.3562, "step": 9377 }, { "epoch": 0.5060709082078679, "grad_norm": 0.8755094726805982, "learning_rate": 5.414179760302623e-06, "loss": 0.3633, "step": 9378 }, { "epoch": 0.5061248718363823, "grad_norm": 0.9495270741376999, "learning_rate": 5.413417007462728e-06, "loss": 0.421, "step": 9379 }, { "epoch": 0.5061788354648966, "grad_norm": 0.8593685531327266, "learning_rate": 5.412654257111315e-06, "loss": 0.4134, "step": 9380 }, { "epoch": 0.506232799093411, "grad_norm": 1.0699045811038568, "learning_rate": 5.411891509270307e-06, "loss": 0.4354, "step": 9381 }, { "epoch": 0.5062867627219254, "grad_norm": 0.8893994365592468, "learning_rate": 5.411128763961626e-06, "loss": 0.3717, "step": 9382 }, { "epoch": 0.5063407263504398, "grad_norm": 1.3139095691493066, "learning_rate": 5.410366021207194e-06, "loss": 0.6111, "step": 9383 }, { "epoch": 0.5063946899789542, "grad_norm": 1.1950607669509459, "learning_rate": 5.409603281028933e-06, "loss": 0.5038, "step": 9384 }, { "epoch": 0.5064486536074686, "grad_norm": 0.9788738478249895, "learning_rate": 5.408840543448766e-06, "loss": 0.4019, "step": 9385 }, { "epoch": 0.506502617235983, "grad_norm": 0.8288213805411832, "learning_rate": 5.408077808488613e-06, "loss": 0.3655, "step": 9386 }, { "epoch": 0.5065565808644973, "grad_norm": 1.2524665935527506, "learning_rate": 5.407315076170398e-06, "loss": 0.6362, "step": 9387 }, { "epoch": 0.5066105444930117, "grad_norm": 1.0746720028850933, "learning_rate": 5.406552346516042e-06, "loss": 0.3777, "step": 9388 }, { "epoch": 0.5066645081215261, "grad_norm": 0.9934691439107699, "learning_rate": 5.405789619547463e-06, "loss": 0.5021, "step": 9389 }, { "epoch": 0.5067184717500405, "grad_norm": 0.8203333770994206, "learning_rate": 5.4050268952865866e-06, "loss": 0.438, "step": 9390 }, { "epoch": 0.5067724353785549, "grad_norm": 0.7816133871320595, "learning_rate": 5.404264173755334e-06, "loss": 0.2878, "step": 9391 }, { "epoch": 0.5068263990070693, "grad_norm": 1.3144088759289876, "learning_rate": 5.403501454975625e-06, "loss": 0.6818, "step": 9392 }, { "epoch": 0.5068803626355837, "grad_norm": 0.8907729879494586, "learning_rate": 5.402738738969382e-06, "loss": 0.4069, "step": 9393 }, { "epoch": 0.506934326264098, "grad_norm": 0.9302364476112674, "learning_rate": 5.401976025758527e-06, "loss": 0.4518, "step": 9394 }, { "epoch": 0.5069882898926124, "grad_norm": 1.034071783507473, "learning_rate": 5.401213315364979e-06, "loss": 0.4416, "step": 9395 }, { "epoch": 0.5070422535211268, "grad_norm": 0.9394050142986388, "learning_rate": 5.40045060781066e-06, "loss": 0.4656, "step": 9396 }, { "epoch": 0.5070962171496411, "grad_norm": 1.0506621977933974, "learning_rate": 5.399687903117491e-06, "loss": 0.4468, "step": 9397 }, { "epoch": 0.5071501807781555, "grad_norm": 0.9107891719361408, "learning_rate": 5.398925201307394e-06, "loss": 0.4129, "step": 9398 }, { "epoch": 0.5072041444066699, "grad_norm": 1.1058774710196875, "learning_rate": 5.398162502402287e-06, "loss": 0.5214, "step": 9399 }, { "epoch": 0.5072581080351843, "grad_norm": 1.0996782278554278, "learning_rate": 5.397399806424094e-06, "loss": 0.5603, "step": 9400 }, { "epoch": 0.5073120716636986, "grad_norm": 1.00967021738363, "learning_rate": 5.396637113394735e-06, "loss": 0.4306, "step": 9401 }, { "epoch": 0.507366035292213, "grad_norm": 1.1243844499726794, "learning_rate": 5.3958744233361285e-06, "loss": 0.5971, "step": 9402 }, { "epoch": 0.5074199989207274, "grad_norm": 1.0569794155400694, "learning_rate": 5.395111736270196e-06, "loss": 0.4198, "step": 9403 }, { "epoch": 0.5074739625492418, "grad_norm": 1.0131338626089779, "learning_rate": 5.394349052218859e-06, "loss": 0.7423, "step": 9404 }, { "epoch": 0.5075279261777562, "grad_norm": 0.9786948776115592, "learning_rate": 5.393586371204036e-06, "loss": 0.4106, "step": 9405 }, { "epoch": 0.5075818898062706, "grad_norm": 1.1244090308719106, "learning_rate": 5.3928236932476484e-06, "loss": 0.5618, "step": 9406 }, { "epoch": 0.5076358534347849, "grad_norm": 0.893154450020052, "learning_rate": 5.392061018371618e-06, "loss": 0.3424, "step": 9407 }, { "epoch": 0.5076898170632993, "grad_norm": 1.1119116900667911, "learning_rate": 5.391298346597862e-06, "loss": 0.5376, "step": 9408 }, { "epoch": 0.5077437806918137, "grad_norm": 0.8961298384644844, "learning_rate": 5.390535677948301e-06, "loss": 0.468, "step": 9409 }, { "epoch": 0.5077977443203281, "grad_norm": 1.0652336094787611, "learning_rate": 5.3897730124448545e-06, "loss": 0.5296, "step": 9410 }, { "epoch": 0.5078517079488425, "grad_norm": 1.006605202122239, "learning_rate": 5.3890103501094435e-06, "loss": 0.4375, "step": 9411 }, { "epoch": 0.5079056715773569, "grad_norm": 1.0441137668653337, "learning_rate": 5.3882476909639865e-06, "loss": 0.5252, "step": 9412 }, { "epoch": 0.5079596352058713, "grad_norm": 1.0806736314001495, "learning_rate": 5.387485035030406e-06, "loss": 0.5954, "step": 9413 }, { "epoch": 0.5080135988343856, "grad_norm": 1.1180207899052648, "learning_rate": 5.3867223823306195e-06, "loss": 0.4105, "step": 9414 }, { "epoch": 0.5080675624629, "grad_norm": 1.2201159616781307, "learning_rate": 5.3859597328865435e-06, "loss": 0.5592, "step": 9415 }, { "epoch": 0.5081215260914144, "grad_norm": 1.0405259013909465, "learning_rate": 5.385197086720101e-06, "loss": 0.3972, "step": 9416 }, { "epoch": 0.5081754897199288, "grad_norm": 0.8797325875213612, "learning_rate": 5.3844344438532105e-06, "loss": 0.4242, "step": 9417 }, { "epoch": 0.5082294533484432, "grad_norm": 0.8144842574907457, "learning_rate": 5.383671804307789e-06, "loss": 0.3655, "step": 9418 }, { "epoch": 0.5082834169769576, "grad_norm": 1.2600851776122295, "learning_rate": 5.38290916810576e-06, "loss": 0.5017, "step": 9419 }, { "epoch": 0.508337380605472, "grad_norm": 1.376890497699021, "learning_rate": 5.3821465352690384e-06, "loss": 0.7611, "step": 9420 }, { "epoch": 0.5083913442339862, "grad_norm": 1.110166377031791, "learning_rate": 5.381383905819547e-06, "loss": 0.6109, "step": 9421 }, { "epoch": 0.5084453078625006, "grad_norm": 1.1443386370529234, "learning_rate": 5.3806212797792004e-06, "loss": 0.6569, "step": 9422 }, { "epoch": 0.508499271491015, "grad_norm": 0.8865349740549396, "learning_rate": 5.3798586571699176e-06, "loss": 0.4071, "step": 9423 }, { "epoch": 0.5085532351195294, "grad_norm": 0.9851042462143774, "learning_rate": 5.3790960380136196e-06, "loss": 0.4647, "step": 9424 }, { "epoch": 0.5086071987480438, "grad_norm": 1.0921081506411567, "learning_rate": 5.3783334223322226e-06, "loss": 0.5272, "step": 9425 }, { "epoch": 0.5086611623765582, "grad_norm": 1.028663338945143, "learning_rate": 5.377570810147646e-06, "loss": 0.4466, "step": 9426 }, { "epoch": 0.5087151260050726, "grad_norm": 0.9695302465418382, "learning_rate": 5.376808201481809e-06, "loss": 0.5154, "step": 9427 }, { "epoch": 0.5087690896335869, "grad_norm": 1.0613315868994098, "learning_rate": 5.376045596356629e-06, "loss": 0.5275, "step": 9428 }, { "epoch": 0.5088230532621013, "grad_norm": 0.9600155062573511, "learning_rate": 5.375282994794023e-06, "loss": 0.5677, "step": 9429 }, { "epoch": 0.5088770168906157, "grad_norm": 1.2369017440829464, "learning_rate": 5.37452039681591e-06, "loss": 0.7897, "step": 9430 }, { "epoch": 0.5089309805191301, "grad_norm": 0.8346290853701288, "learning_rate": 5.373757802444208e-06, "loss": 0.3288, "step": 9431 }, { "epoch": 0.5089849441476445, "grad_norm": 1.152197974099437, "learning_rate": 5.372995211700833e-06, "loss": 0.4831, "step": 9432 }, { "epoch": 0.5090389077761589, "grad_norm": 0.9976002295486909, "learning_rate": 5.372232624607705e-06, "loss": 0.5326, "step": 9433 }, { "epoch": 0.5090928714046733, "grad_norm": 1.105605891311398, "learning_rate": 5.3714700411867405e-06, "loss": 0.4563, "step": 9434 }, { "epoch": 0.5091468350331876, "grad_norm": 0.9801942386580135, "learning_rate": 5.3707074614598575e-06, "loss": 0.5085, "step": 9435 }, { "epoch": 0.509200798661702, "grad_norm": 0.8388572354012555, "learning_rate": 5.3699448854489725e-06, "loss": 0.3667, "step": 9436 }, { "epoch": 0.5092547622902164, "grad_norm": 0.9767356605098251, "learning_rate": 5.3691823131760015e-06, "loss": 0.401, "step": 9437 }, { "epoch": 0.5093087259187308, "grad_norm": 0.8999967874237264, "learning_rate": 5.368419744662864e-06, "loss": 0.3666, "step": 9438 }, { "epoch": 0.5093626895472452, "grad_norm": 1.0605515908494463, "learning_rate": 5.367657179931475e-06, "loss": 0.5408, "step": 9439 }, { "epoch": 0.5094166531757596, "grad_norm": 0.8918331066264457, "learning_rate": 5.366894619003753e-06, "loss": 0.3501, "step": 9440 }, { "epoch": 0.509470616804274, "grad_norm": 1.019115625299005, "learning_rate": 5.3661320619016135e-06, "loss": 0.5123, "step": 9441 }, { "epoch": 0.5095245804327883, "grad_norm": 1.4176605397142628, "learning_rate": 5.365369508646977e-06, "loss": 0.4697, "step": 9442 }, { "epoch": 0.5095785440613027, "grad_norm": 1.0553679959858606, "learning_rate": 5.364606959261754e-06, "loss": 0.4904, "step": 9443 }, { "epoch": 0.509632507689817, "grad_norm": 1.3222229409071806, "learning_rate": 5.363844413767864e-06, "loss": 0.4557, "step": 9444 }, { "epoch": 0.5096864713183314, "grad_norm": 1.0511353815921147, "learning_rate": 5.363081872187224e-06, "loss": 0.5261, "step": 9445 }, { "epoch": 0.5097404349468458, "grad_norm": 1.0385258689720664, "learning_rate": 5.362319334541748e-06, "loss": 0.5468, "step": 9446 }, { "epoch": 0.5097943985753602, "grad_norm": 1.0546387415950829, "learning_rate": 5.361556800853354e-06, "loss": 0.4149, "step": 9447 }, { "epoch": 0.5098483622038746, "grad_norm": 1.1728757843616993, "learning_rate": 5.360794271143956e-06, "loss": 0.5437, "step": 9448 }, { "epoch": 0.5099023258323889, "grad_norm": 0.872400591515787, "learning_rate": 5.3600317454354755e-06, "loss": 0.4188, "step": 9449 }, { "epoch": 0.5099562894609033, "grad_norm": 0.8344911400215077, "learning_rate": 5.359269223749821e-06, "loss": 0.3577, "step": 9450 }, { "epoch": 0.5100102530894177, "grad_norm": 1.095191739649592, "learning_rate": 5.358506706108912e-06, "loss": 0.5297, "step": 9451 }, { "epoch": 0.5100642167179321, "grad_norm": 1.1650270543834973, "learning_rate": 5.357744192534662e-06, "loss": 0.6104, "step": 9452 }, { "epoch": 0.5101181803464465, "grad_norm": 0.9825647577285964, "learning_rate": 5.356981683048989e-06, "loss": 0.4341, "step": 9453 }, { "epoch": 0.5101721439749609, "grad_norm": 0.9988132219776641, "learning_rate": 5.356219177673806e-06, "loss": 0.4224, "step": 9454 }, { "epoch": 0.5102261076034753, "grad_norm": 1.2549631660856984, "learning_rate": 5.3554566764310286e-06, "loss": 0.6569, "step": 9455 }, { "epoch": 0.5102800712319896, "grad_norm": 1.2811279465075511, "learning_rate": 5.354694179342575e-06, "loss": 0.5737, "step": 9456 }, { "epoch": 0.510334034860504, "grad_norm": 1.1614188992985968, "learning_rate": 5.353931686430356e-06, "loss": 0.5508, "step": 9457 }, { "epoch": 0.5103879984890184, "grad_norm": 0.9940653919749042, "learning_rate": 5.353169197716287e-06, "loss": 0.3259, "step": 9458 }, { "epoch": 0.5104419621175328, "grad_norm": 1.0397730949597248, "learning_rate": 5.352406713222284e-06, "loss": 0.4376, "step": 9459 }, { "epoch": 0.5104959257460472, "grad_norm": 1.1631554022276065, "learning_rate": 5.351644232970261e-06, "loss": 0.6159, "step": 9460 }, { "epoch": 0.5105498893745616, "grad_norm": 1.3605443280130733, "learning_rate": 5.350881756982134e-06, "loss": 0.554, "step": 9461 }, { "epoch": 0.510603853003076, "grad_norm": 0.9794408626484726, "learning_rate": 5.350119285279816e-06, "loss": 0.5235, "step": 9462 }, { "epoch": 0.5106578166315903, "grad_norm": 0.9025172651539392, "learning_rate": 5.349356817885221e-06, "loss": 0.3805, "step": 9463 }, { "epoch": 0.5107117802601047, "grad_norm": 0.8985388723300692, "learning_rate": 5.348594354820262e-06, "loss": 0.4272, "step": 9464 }, { "epoch": 0.5107657438886191, "grad_norm": 0.9332492290462433, "learning_rate": 5.347831896106855e-06, "loss": 0.3846, "step": 9465 }, { "epoch": 0.5108197075171335, "grad_norm": 1.1903522300638305, "learning_rate": 5.3470694417669125e-06, "loss": 0.7522, "step": 9466 }, { "epoch": 0.5108736711456479, "grad_norm": 1.052427708161447, "learning_rate": 5.34630699182235e-06, "loss": 0.4175, "step": 9467 }, { "epoch": 0.5109276347741623, "grad_norm": 1.22906191734881, "learning_rate": 5.345544546295079e-06, "loss": 0.5449, "step": 9468 }, { "epoch": 0.5109815984026767, "grad_norm": 1.0489256322244487, "learning_rate": 5.344782105207013e-06, "loss": 0.4993, "step": 9469 }, { "epoch": 0.5110355620311909, "grad_norm": 0.8407264555357644, "learning_rate": 5.344019668580069e-06, "loss": 0.3819, "step": 9470 }, { "epoch": 0.5110895256597053, "grad_norm": 1.2270976850278346, "learning_rate": 5.343257236436155e-06, "loss": 0.4704, "step": 9471 }, { "epoch": 0.5111434892882197, "grad_norm": 0.9334152416333615, "learning_rate": 5.342494808797186e-06, "loss": 0.5237, "step": 9472 }, { "epoch": 0.5111974529167341, "grad_norm": 1.0388990990932683, "learning_rate": 5.341732385685077e-06, "loss": 0.4912, "step": 9473 }, { "epoch": 0.5112514165452485, "grad_norm": 0.767349263070513, "learning_rate": 5.3409699671217375e-06, "loss": 0.2651, "step": 9474 }, { "epoch": 0.5113053801737629, "grad_norm": 0.9853482995029555, "learning_rate": 5.340207553129082e-06, "loss": 0.5135, "step": 9475 }, { "epoch": 0.5113593438022772, "grad_norm": 0.9075800943197744, "learning_rate": 5.339445143729025e-06, "loss": 0.4034, "step": 9476 }, { "epoch": 0.5114133074307916, "grad_norm": 0.967253491797828, "learning_rate": 5.338682738943476e-06, "loss": 0.5876, "step": 9477 }, { "epoch": 0.511467271059306, "grad_norm": 1.1017140193584496, "learning_rate": 5.337920338794347e-06, "loss": 0.521, "step": 9478 }, { "epoch": 0.5115212346878204, "grad_norm": 0.9055936794609204, "learning_rate": 5.33715794330355e-06, "loss": 0.4222, "step": 9479 }, { "epoch": 0.5115751983163348, "grad_norm": 1.0025306627528165, "learning_rate": 5.336395552493e-06, "loss": 0.4276, "step": 9480 }, { "epoch": 0.5116291619448492, "grad_norm": 1.1533059748446113, "learning_rate": 5.335633166384608e-06, "loss": 0.5143, "step": 9481 }, { "epoch": 0.5116831255733636, "grad_norm": 0.9801555103829211, "learning_rate": 5.334870785000283e-06, "loss": 0.4028, "step": 9482 }, { "epoch": 0.5117370892018779, "grad_norm": 1.1639309709593904, "learning_rate": 5.33410840836194e-06, "loss": 0.4526, "step": 9483 }, { "epoch": 0.5117910528303923, "grad_norm": 1.2616770189198094, "learning_rate": 5.333346036491488e-06, "loss": 0.554, "step": 9484 }, { "epoch": 0.5118450164589067, "grad_norm": 1.0434637248640053, "learning_rate": 5.332583669410839e-06, "loss": 0.5137, "step": 9485 }, { "epoch": 0.5118989800874211, "grad_norm": 1.1223669099574738, "learning_rate": 5.331821307141906e-06, "loss": 0.3561, "step": 9486 }, { "epoch": 0.5119529437159355, "grad_norm": 1.0142832592253106, "learning_rate": 5.331058949706597e-06, "loss": 0.5087, "step": 9487 }, { "epoch": 0.5120069073444499, "grad_norm": 0.7799546641256196, "learning_rate": 5.330296597126825e-06, "loss": 0.3253, "step": 9488 }, { "epoch": 0.5120608709729643, "grad_norm": 0.9415499244953475, "learning_rate": 5.3295342494244995e-06, "loss": 0.4006, "step": 9489 }, { "epoch": 0.5121148346014786, "grad_norm": 0.9812011633594355, "learning_rate": 5.328771906621535e-06, "loss": 0.3876, "step": 9490 }, { "epoch": 0.512168798229993, "grad_norm": 0.8517014601324987, "learning_rate": 5.3280095687398355e-06, "loss": 0.4424, "step": 9491 }, { "epoch": 0.5122227618585073, "grad_norm": 0.8686420387461525, "learning_rate": 5.327247235801317e-06, "loss": 0.3667, "step": 9492 }, { "epoch": 0.5122767254870217, "grad_norm": 1.0073453193635682, "learning_rate": 5.326484907827886e-06, "loss": 0.4401, "step": 9493 }, { "epoch": 0.5123306891155361, "grad_norm": 1.1297567902788728, "learning_rate": 5.325722584841454e-06, "loss": 0.5509, "step": 9494 }, { "epoch": 0.5123846527440505, "grad_norm": 1.3931697942261212, "learning_rate": 5.3249602668639324e-06, "loss": 0.621, "step": 9495 }, { "epoch": 0.5124386163725649, "grad_norm": 1.0443065346933265, "learning_rate": 5.3241979539172286e-06, "loss": 0.5686, "step": 9496 }, { "epoch": 0.5124925800010792, "grad_norm": 1.007221749009845, "learning_rate": 5.323435646023255e-06, "loss": 0.4122, "step": 9497 }, { "epoch": 0.5125465436295936, "grad_norm": 0.7622185133305635, "learning_rate": 5.322673343203917e-06, "loss": 0.3528, "step": 9498 }, { "epoch": 0.512600507258108, "grad_norm": 0.8833510376897722, "learning_rate": 5.321911045481127e-06, "loss": 0.3657, "step": 9499 }, { "epoch": 0.5126544708866224, "grad_norm": 0.9310437674593675, "learning_rate": 5.321148752876794e-06, "loss": 0.443, "step": 9500 }, { "epoch": 0.5126544708866224, "eval_loss": 0.5477094054222107, "eval_runtime": 163.9886, "eval_samples_per_second": 20.971, "eval_steps_per_second": 0.878, "step": 9500 }, { "epoch": 0.5127084345151368, "grad_norm": 1.0137443824087755, "learning_rate": 5.3203864654128255e-06, "loss": 0.521, "step": 9501 }, { "epoch": 0.5127623981436512, "grad_norm": 1.0439389148806155, "learning_rate": 5.319624183111132e-06, "loss": 0.5964, "step": 9502 }, { "epoch": 0.5128163617721656, "grad_norm": 0.9730908661302556, "learning_rate": 5.318861905993621e-06, "loss": 0.5301, "step": 9503 }, { "epoch": 0.5128703254006799, "grad_norm": 0.944760340021586, "learning_rate": 5.318099634082205e-06, "loss": 0.6459, "step": 9504 }, { "epoch": 0.5129242890291943, "grad_norm": 0.7458453262845326, "learning_rate": 5.317337367398785e-06, "loss": 0.2956, "step": 9505 }, { "epoch": 0.5129782526577087, "grad_norm": 1.1366312343871146, "learning_rate": 5.316575105965277e-06, "loss": 0.4922, "step": 9506 }, { "epoch": 0.5130322162862231, "grad_norm": 0.8555854261054769, "learning_rate": 5.315812849803585e-06, "loss": 0.3898, "step": 9507 }, { "epoch": 0.5130861799147375, "grad_norm": 0.8645922830620804, "learning_rate": 5.315050598935616e-06, "loss": 0.3572, "step": 9508 }, { "epoch": 0.5131401435432519, "grad_norm": 1.0909336148921638, "learning_rate": 5.314288353383281e-06, "loss": 0.5596, "step": 9509 }, { "epoch": 0.5131941071717663, "grad_norm": 1.0284089170641122, "learning_rate": 5.313526113168486e-06, "loss": 0.5023, "step": 9510 }, { "epoch": 0.5132480708002806, "grad_norm": 1.0809901877259969, "learning_rate": 5.3127638783131394e-06, "loss": 0.5193, "step": 9511 }, { "epoch": 0.513302034428795, "grad_norm": 0.9226007170378812, "learning_rate": 5.312001648839148e-06, "loss": 0.4076, "step": 9512 }, { "epoch": 0.5133559980573094, "grad_norm": 1.0523353943724705, "learning_rate": 5.31123942476842e-06, "loss": 0.4356, "step": 9513 }, { "epoch": 0.5134099616858238, "grad_norm": 1.171670516093928, "learning_rate": 5.3104772061228595e-06, "loss": 0.4759, "step": 9514 }, { "epoch": 0.5134639253143382, "grad_norm": 0.8514167938978103, "learning_rate": 5.3097149929243776e-06, "loss": 0.356, "step": 9515 }, { "epoch": 0.5135178889428526, "grad_norm": 1.0705113257051169, "learning_rate": 5.308952785194877e-06, "loss": 0.6298, "step": 9516 }, { "epoch": 0.513571852571367, "grad_norm": 0.9728649254817006, "learning_rate": 5.308190582956267e-06, "loss": 0.4238, "step": 9517 }, { "epoch": 0.5136258161998812, "grad_norm": 0.8395213143387513, "learning_rate": 5.307428386230456e-06, "loss": 0.3951, "step": 9518 }, { "epoch": 0.5136797798283956, "grad_norm": 1.0715402601181856, "learning_rate": 5.306666195039346e-06, "loss": 0.5053, "step": 9519 }, { "epoch": 0.51373374345691, "grad_norm": 0.7847792768446136, "learning_rate": 5.305904009404845e-06, "loss": 0.3116, "step": 9520 }, { "epoch": 0.5137877070854244, "grad_norm": 0.875714052729186, "learning_rate": 5.305141829348858e-06, "loss": 0.4605, "step": 9521 }, { "epoch": 0.5138416707139388, "grad_norm": 1.065208468147302, "learning_rate": 5.3043796548932925e-06, "loss": 0.5866, "step": 9522 }, { "epoch": 0.5138956343424532, "grad_norm": 1.2548103475711776, "learning_rate": 5.303617486060053e-06, "loss": 0.3948, "step": 9523 }, { "epoch": 0.5139495979709676, "grad_norm": 1.262662462725494, "learning_rate": 5.3028553228710455e-06, "loss": 0.4929, "step": 9524 }, { "epoch": 0.5140035615994819, "grad_norm": 1.1166389490465882, "learning_rate": 5.302093165348176e-06, "loss": 0.6332, "step": 9525 }, { "epoch": 0.5140575252279963, "grad_norm": 1.1650809976765597, "learning_rate": 5.301331013513349e-06, "loss": 0.4387, "step": 9526 }, { "epoch": 0.5141114888565107, "grad_norm": 1.0101074439112827, "learning_rate": 5.300568867388469e-06, "loss": 0.5445, "step": 9527 }, { "epoch": 0.5141654524850251, "grad_norm": 0.9128755355254814, "learning_rate": 5.299806726995441e-06, "loss": 0.4714, "step": 9528 }, { "epoch": 0.5142194161135395, "grad_norm": 1.0520169465363594, "learning_rate": 5.299044592356169e-06, "loss": 0.5564, "step": 9529 }, { "epoch": 0.5142733797420539, "grad_norm": 0.9331334447036628, "learning_rate": 5.298282463492559e-06, "loss": 0.4303, "step": 9530 }, { "epoch": 0.5143273433705683, "grad_norm": 1.1891690189439506, "learning_rate": 5.297520340426515e-06, "loss": 0.5342, "step": 9531 }, { "epoch": 0.5143813069990826, "grad_norm": 1.227969558026063, "learning_rate": 5.296758223179942e-06, "loss": 0.4861, "step": 9532 }, { "epoch": 0.514435270627597, "grad_norm": 1.0628138205456097, "learning_rate": 5.295996111774743e-06, "loss": 0.5621, "step": 9533 }, { "epoch": 0.5144892342561114, "grad_norm": 0.8539649117378358, "learning_rate": 5.29523400623282e-06, "loss": 0.3493, "step": 9534 }, { "epoch": 0.5145431978846258, "grad_norm": 0.7919215544234193, "learning_rate": 5.29447190657608e-06, "loss": 0.3505, "step": 9535 }, { "epoch": 0.5145971615131402, "grad_norm": 0.9642900684788295, "learning_rate": 5.293709812826424e-06, "loss": 0.3663, "step": 9536 }, { "epoch": 0.5146511251416546, "grad_norm": 0.8900006720983381, "learning_rate": 5.292947725005758e-06, "loss": 0.3536, "step": 9537 }, { "epoch": 0.514705088770169, "grad_norm": 1.0900926058327856, "learning_rate": 5.292185643135983e-06, "loss": 0.483, "step": 9538 }, { "epoch": 0.5147590523986832, "grad_norm": 0.8644080685866212, "learning_rate": 5.291423567239003e-06, "loss": 0.3063, "step": 9539 }, { "epoch": 0.5148130160271976, "grad_norm": 1.1578621567822183, "learning_rate": 5.29066149733672e-06, "loss": 0.676, "step": 9540 }, { "epoch": 0.514866979655712, "grad_norm": 1.1870323480816298, "learning_rate": 5.2898994334510365e-06, "loss": 0.5014, "step": 9541 }, { "epoch": 0.5149209432842264, "grad_norm": 1.169619843763151, "learning_rate": 5.289137375603855e-06, "loss": 0.5368, "step": 9542 }, { "epoch": 0.5149749069127408, "grad_norm": 1.0494188523794414, "learning_rate": 5.28837532381708e-06, "loss": 0.5156, "step": 9543 }, { "epoch": 0.5150288705412552, "grad_norm": 0.8637406124301432, "learning_rate": 5.2876132781126114e-06, "loss": 0.3875, "step": 9544 }, { "epoch": 0.5150828341697695, "grad_norm": 1.0281518088010726, "learning_rate": 5.286851238512353e-06, "loss": 0.3397, "step": 9545 }, { "epoch": 0.5151367977982839, "grad_norm": 0.8775821082840113, "learning_rate": 5.286089205038204e-06, "loss": 0.3369, "step": 9546 }, { "epoch": 0.5151907614267983, "grad_norm": 1.1620883236728918, "learning_rate": 5.285327177712067e-06, "loss": 0.4857, "step": 9547 }, { "epoch": 0.5152447250553127, "grad_norm": 1.1157724760052157, "learning_rate": 5.284565156555845e-06, "loss": 0.5505, "step": 9548 }, { "epoch": 0.5152986886838271, "grad_norm": 0.8643473621903006, "learning_rate": 5.283803141591437e-06, "loss": 0.3599, "step": 9549 }, { "epoch": 0.5153526523123415, "grad_norm": 1.099209185598772, "learning_rate": 5.283041132840745e-06, "loss": 0.4954, "step": 9550 }, { "epoch": 0.5154066159408559, "grad_norm": 1.164662847270761, "learning_rate": 5.28227913032567e-06, "loss": 0.5588, "step": 9551 }, { "epoch": 0.5154605795693702, "grad_norm": 0.8709885667218966, "learning_rate": 5.281517134068115e-06, "loss": 0.3723, "step": 9552 }, { "epoch": 0.5155145431978846, "grad_norm": 1.1699189129428964, "learning_rate": 5.280755144089977e-06, "loss": 0.661, "step": 9553 }, { "epoch": 0.515568506826399, "grad_norm": 0.9847006379240044, "learning_rate": 5.279993160413157e-06, "loss": 0.4695, "step": 9554 }, { "epoch": 0.5156224704549134, "grad_norm": 0.9497383863625719, "learning_rate": 5.279231183059557e-06, "loss": 0.4238, "step": 9555 }, { "epoch": 0.5156764340834278, "grad_norm": 0.9695758096426631, "learning_rate": 5.278469212051074e-06, "loss": 0.3756, "step": 9556 }, { "epoch": 0.5157303977119422, "grad_norm": 1.0765203072650535, "learning_rate": 5.277707247409612e-06, "loss": 0.5415, "step": 9557 }, { "epoch": 0.5157843613404566, "grad_norm": 1.0372300941708161, "learning_rate": 5.276945289157066e-06, "loss": 0.4963, "step": 9558 }, { "epoch": 0.5158383249689709, "grad_norm": 0.921820973349719, "learning_rate": 5.276183337315339e-06, "loss": 0.3731, "step": 9559 }, { "epoch": 0.5158922885974853, "grad_norm": 0.8242762583385296, "learning_rate": 5.275421391906329e-06, "loss": 0.4566, "step": 9560 }, { "epoch": 0.5159462522259997, "grad_norm": 1.201772590949609, "learning_rate": 5.274659452951936e-06, "loss": 0.4693, "step": 9561 }, { "epoch": 0.516000215854514, "grad_norm": 1.0646673926222765, "learning_rate": 5.273897520474056e-06, "loss": 0.4803, "step": 9562 }, { "epoch": 0.5160541794830285, "grad_norm": 1.1631062746456144, "learning_rate": 5.273135594494591e-06, "loss": 0.4397, "step": 9563 }, { "epoch": 0.5161081431115428, "grad_norm": 1.0313084597967017, "learning_rate": 5.272373675035437e-06, "loss": 0.5303, "step": 9564 }, { "epoch": 0.5161621067400572, "grad_norm": 1.1074489744138354, "learning_rate": 5.2716117621184946e-06, "loss": 0.7014, "step": 9565 }, { "epoch": 0.5162160703685715, "grad_norm": 0.8563501782023145, "learning_rate": 5.270849855765663e-06, "loss": 0.4292, "step": 9566 }, { "epoch": 0.5162700339970859, "grad_norm": 0.8505697005500529, "learning_rate": 5.270087955998836e-06, "loss": 0.4029, "step": 9567 }, { "epoch": 0.5163239976256003, "grad_norm": 0.9458912590907358, "learning_rate": 5.269326062839913e-06, "loss": 0.3972, "step": 9568 }, { "epoch": 0.5163779612541147, "grad_norm": 1.0422540532051232, "learning_rate": 5.268564176310793e-06, "loss": 0.4776, "step": 9569 }, { "epoch": 0.5164319248826291, "grad_norm": 0.8701788992545315, "learning_rate": 5.267802296433372e-06, "loss": 0.2903, "step": 9570 }, { "epoch": 0.5164858885111435, "grad_norm": 1.2606677083653655, "learning_rate": 5.267040423229547e-06, "loss": 0.5112, "step": 9571 }, { "epoch": 0.5165398521396579, "grad_norm": 1.1168572702493915, "learning_rate": 5.266278556721216e-06, "loss": 0.4891, "step": 9572 }, { "epoch": 0.5165938157681722, "grad_norm": 1.0315528970050492, "learning_rate": 5.265516696930277e-06, "loss": 0.5358, "step": 9573 }, { "epoch": 0.5166477793966866, "grad_norm": 1.0307100551744068, "learning_rate": 5.2647548438786245e-06, "loss": 0.559, "step": 9574 }, { "epoch": 0.516701743025201, "grad_norm": 1.1524939902816118, "learning_rate": 5.263992997588154e-06, "loss": 0.6051, "step": 9575 }, { "epoch": 0.5167557066537154, "grad_norm": 1.0103818164779266, "learning_rate": 5.263231158080764e-06, "loss": 0.4504, "step": 9576 }, { "epoch": 0.5168096702822298, "grad_norm": 1.0147891319081173, "learning_rate": 5.262469325378349e-06, "loss": 0.4511, "step": 9577 }, { "epoch": 0.5168636339107442, "grad_norm": 1.1409476991649232, "learning_rate": 5.2617074995028065e-06, "loss": 0.5462, "step": 9578 }, { "epoch": 0.5169175975392586, "grad_norm": 1.0825736282629932, "learning_rate": 5.26094568047603e-06, "loss": 0.5441, "step": 9579 }, { "epoch": 0.5169715611677729, "grad_norm": 0.899972200415747, "learning_rate": 5.260183868319919e-06, "loss": 0.4443, "step": 9580 }, { "epoch": 0.5170255247962873, "grad_norm": 0.8737871669079942, "learning_rate": 5.259422063056364e-06, "loss": 0.3562, "step": 9581 }, { "epoch": 0.5170794884248017, "grad_norm": 0.9721542723808108, "learning_rate": 5.258660264707262e-06, "loss": 0.4826, "step": 9582 }, { "epoch": 0.5171334520533161, "grad_norm": 0.8235930607982394, "learning_rate": 5.257898473294508e-06, "loss": 0.2592, "step": 9583 }, { "epoch": 0.5171874156818305, "grad_norm": 1.0629933665998887, "learning_rate": 5.257136688839997e-06, "loss": 0.5594, "step": 9584 }, { "epoch": 0.5172413793103449, "grad_norm": 0.8996526125580303, "learning_rate": 5.256374911365621e-06, "loss": 0.3901, "step": 9585 }, { "epoch": 0.5172953429388593, "grad_norm": 0.8905619906635535, "learning_rate": 5.255613140893278e-06, "loss": 0.451, "step": 9586 }, { "epoch": 0.5173493065673735, "grad_norm": 1.2150883996855757, "learning_rate": 5.25485137744486e-06, "loss": 0.521, "step": 9587 }, { "epoch": 0.5174032701958879, "grad_norm": 0.9989084333432823, "learning_rate": 5.2540896210422625e-06, "loss": 0.4543, "step": 9588 }, { "epoch": 0.5174572338244023, "grad_norm": 0.8805579091778825, "learning_rate": 5.253327871707375e-06, "loss": 0.4405, "step": 9589 }, { "epoch": 0.5175111974529167, "grad_norm": 0.8715443077854018, "learning_rate": 5.252566129462095e-06, "loss": 0.3297, "step": 9590 }, { "epoch": 0.5175651610814311, "grad_norm": 0.8712234379622795, "learning_rate": 5.251804394328315e-06, "loss": 0.4039, "step": 9591 }, { "epoch": 0.5176191247099455, "grad_norm": 1.1152068881845316, "learning_rate": 5.251042666327926e-06, "loss": 0.5832, "step": 9592 }, { "epoch": 0.5176730883384599, "grad_norm": 1.056480608975123, "learning_rate": 5.250280945482823e-06, "loss": 0.553, "step": 9593 }, { "epoch": 0.5177270519669742, "grad_norm": 0.9371457814465499, "learning_rate": 5.2495192318149e-06, "loss": 0.3972, "step": 9594 }, { "epoch": 0.5177810155954886, "grad_norm": 1.114822068390724, "learning_rate": 5.2487575253460464e-06, "loss": 0.6291, "step": 9595 }, { "epoch": 0.517834979224003, "grad_norm": 1.2113965559819555, "learning_rate": 5.247995826098154e-06, "loss": 0.7008, "step": 9596 }, { "epoch": 0.5178889428525174, "grad_norm": 0.8392142517577571, "learning_rate": 5.247234134093117e-06, "loss": 0.3539, "step": 9597 }, { "epoch": 0.5179429064810318, "grad_norm": 0.9314994870542145, "learning_rate": 5.246472449352825e-06, "loss": 0.3526, "step": 9598 }, { "epoch": 0.5179968701095462, "grad_norm": 1.0381821101085098, "learning_rate": 5.245710771899173e-06, "loss": 0.4492, "step": 9599 }, { "epoch": 0.5180508337380606, "grad_norm": 1.0942671314693733, "learning_rate": 5.244949101754049e-06, "loss": 0.5046, "step": 9600 }, { "epoch": 0.5181047973665749, "grad_norm": 0.9804402824645503, "learning_rate": 5.244187438939348e-06, "loss": 0.4116, "step": 9601 }, { "epoch": 0.5181587609950893, "grad_norm": 1.1841844825590797, "learning_rate": 5.2434257834769555e-06, "loss": 0.4875, "step": 9602 }, { "epoch": 0.5182127246236037, "grad_norm": 1.1257837833422584, "learning_rate": 5.242664135388767e-06, "loss": 0.4932, "step": 9603 }, { "epoch": 0.5182666882521181, "grad_norm": 1.136020172781561, "learning_rate": 5.24190249469667e-06, "loss": 0.6086, "step": 9604 }, { "epoch": 0.5183206518806325, "grad_norm": 0.8686481367272051, "learning_rate": 5.2411408614225555e-06, "loss": 0.4531, "step": 9605 }, { "epoch": 0.5183746155091469, "grad_norm": 0.9211521146023607, "learning_rate": 5.240379235588315e-06, "loss": 0.4132, "step": 9606 }, { "epoch": 0.5184285791376613, "grad_norm": 0.9926402972011605, "learning_rate": 5.239617617215839e-06, "loss": 0.4225, "step": 9607 }, { "epoch": 0.5184825427661756, "grad_norm": 1.00860105509254, "learning_rate": 5.238856006327015e-06, "loss": 0.4877, "step": 9608 }, { "epoch": 0.51853650639469, "grad_norm": 0.9283166513551118, "learning_rate": 5.238094402943732e-06, "loss": 0.5285, "step": 9609 }, { "epoch": 0.5185904700232044, "grad_norm": 0.9518125216308094, "learning_rate": 5.237332807087881e-06, "loss": 0.4046, "step": 9610 }, { "epoch": 0.5186444336517188, "grad_norm": 0.796793160783964, "learning_rate": 5.236571218781351e-06, "loss": 0.3363, "step": 9611 }, { "epoch": 0.5186983972802331, "grad_norm": 0.927304765102309, "learning_rate": 5.235809638046032e-06, "loss": 0.4423, "step": 9612 }, { "epoch": 0.5187523609087475, "grad_norm": 0.8501849320155819, "learning_rate": 5.2350480649038084e-06, "loss": 0.3526, "step": 9613 }, { "epoch": 0.5188063245372618, "grad_norm": 1.0280089690016438, "learning_rate": 5.234286499376573e-06, "loss": 0.4963, "step": 9614 }, { "epoch": 0.5188602881657762, "grad_norm": 1.0473035276511051, "learning_rate": 5.233524941486211e-06, "loss": 0.5226, "step": 9615 }, { "epoch": 0.5189142517942906, "grad_norm": 1.1359285623169324, "learning_rate": 5.2327633912546105e-06, "loss": 0.4455, "step": 9616 }, { "epoch": 0.518968215422805, "grad_norm": 0.9654088114948347, "learning_rate": 5.232001848703662e-06, "loss": 0.4283, "step": 9617 }, { "epoch": 0.5190221790513194, "grad_norm": 1.1705489238758828, "learning_rate": 5.231240313855249e-06, "loss": 0.6863, "step": 9618 }, { "epoch": 0.5190761426798338, "grad_norm": 0.7650585532366093, "learning_rate": 5.230478786731262e-06, "loss": 0.2856, "step": 9619 }, { "epoch": 0.5191301063083482, "grad_norm": 1.2775566433219763, "learning_rate": 5.229717267353588e-06, "loss": 0.614, "step": 9620 }, { "epoch": 0.5191840699368625, "grad_norm": 0.9309559301011927, "learning_rate": 5.228955755744114e-06, "loss": 0.4593, "step": 9621 }, { "epoch": 0.5192380335653769, "grad_norm": 0.9348982285625476, "learning_rate": 5.2281942519247234e-06, "loss": 0.482, "step": 9622 }, { "epoch": 0.5192919971938913, "grad_norm": 0.9444850887417824, "learning_rate": 5.227432755917304e-06, "loss": 0.4012, "step": 9623 }, { "epoch": 0.5193459608224057, "grad_norm": 0.9874277516399572, "learning_rate": 5.2266712677437436e-06, "loss": 0.5133, "step": 9624 }, { "epoch": 0.5193999244509201, "grad_norm": 0.8525642564249177, "learning_rate": 5.2259097874259265e-06, "loss": 0.4084, "step": 9625 }, { "epoch": 0.5194538880794345, "grad_norm": 0.9824873847583103, "learning_rate": 5.2251483149857396e-06, "loss": 0.585, "step": 9626 }, { "epoch": 0.5195078517079489, "grad_norm": 0.9556821911991316, "learning_rate": 5.2243868504450665e-06, "loss": 0.4259, "step": 9627 }, { "epoch": 0.5195618153364632, "grad_norm": 1.090584512215743, "learning_rate": 5.223625393825797e-06, "loss": 0.5117, "step": 9628 }, { "epoch": 0.5196157789649776, "grad_norm": 1.3172272869469277, "learning_rate": 5.222863945149811e-06, "loss": 0.5551, "step": 9629 }, { "epoch": 0.519669742593492, "grad_norm": 1.265358764081254, "learning_rate": 5.222102504438995e-06, "loss": 0.58, "step": 9630 }, { "epoch": 0.5197237062220064, "grad_norm": 1.0239914984229466, "learning_rate": 5.2213410717152345e-06, "loss": 0.4849, "step": 9631 }, { "epoch": 0.5197776698505208, "grad_norm": 0.9013851257388786, "learning_rate": 5.2205796470004125e-06, "loss": 0.3961, "step": 9632 }, { "epoch": 0.5198316334790352, "grad_norm": 0.9196823912790001, "learning_rate": 5.219818230316413e-06, "loss": 0.3843, "step": 9633 }, { "epoch": 0.5198855971075496, "grad_norm": 1.1576426952802408, "learning_rate": 5.219056821685122e-06, "loss": 0.544, "step": 9634 }, { "epoch": 0.5199395607360638, "grad_norm": 1.1172076297874685, "learning_rate": 5.218295421128422e-06, "loss": 0.4564, "step": 9635 }, { "epoch": 0.5199935243645782, "grad_norm": 0.9876044477715409, "learning_rate": 5.217534028668195e-06, "loss": 0.4979, "step": 9636 }, { "epoch": 0.5200474879930926, "grad_norm": 1.0034846833950475, "learning_rate": 5.216772644326326e-06, "loss": 0.4413, "step": 9637 }, { "epoch": 0.520101451621607, "grad_norm": 1.0444907992183496, "learning_rate": 5.216011268124698e-06, "loss": 0.5136, "step": 9638 }, { "epoch": 0.5201554152501214, "grad_norm": 1.1570987083669353, "learning_rate": 5.215249900085192e-06, "loss": 0.5991, "step": 9639 }, { "epoch": 0.5202093788786358, "grad_norm": 0.8909997092140797, "learning_rate": 5.214488540229693e-06, "loss": 0.3892, "step": 9640 }, { "epoch": 0.5202633425071502, "grad_norm": 0.7484242538203026, "learning_rate": 5.21372718858008e-06, "loss": 0.3142, "step": 9641 }, { "epoch": 0.5203173061356645, "grad_norm": 1.0435869672094704, "learning_rate": 5.21296584515824e-06, "loss": 0.4225, "step": 9642 }, { "epoch": 0.5203712697641789, "grad_norm": 0.9497086924896089, "learning_rate": 5.21220450998605e-06, "loss": 0.4226, "step": 9643 }, { "epoch": 0.5204252333926933, "grad_norm": 0.9809168577951985, "learning_rate": 5.211443183085392e-06, "loss": 0.5073, "step": 9644 }, { "epoch": 0.5204791970212077, "grad_norm": 0.805141581807964, "learning_rate": 5.210681864478149e-06, "loss": 0.3329, "step": 9645 }, { "epoch": 0.5205331606497221, "grad_norm": 1.0790794260263412, "learning_rate": 5.2099205541862e-06, "loss": 0.5211, "step": 9646 }, { "epoch": 0.5205871242782365, "grad_norm": 0.8919513825787239, "learning_rate": 5.209159252231431e-06, "loss": 0.3996, "step": 9647 }, { "epoch": 0.5206410879067509, "grad_norm": 0.8367033247102641, "learning_rate": 5.2083979586357156e-06, "loss": 0.3125, "step": 9648 }, { "epoch": 0.5206950515352652, "grad_norm": 1.028778199112551, "learning_rate": 5.207636673420942e-06, "loss": 0.4538, "step": 9649 }, { "epoch": 0.5207490151637796, "grad_norm": 1.186487267235031, "learning_rate": 5.206875396608981e-06, "loss": 0.5198, "step": 9650 }, { "epoch": 0.520802978792294, "grad_norm": 1.1448527637653159, "learning_rate": 5.20611412822172e-06, "loss": 0.4441, "step": 9651 }, { "epoch": 0.5208569424208084, "grad_norm": 1.1945708064507647, "learning_rate": 5.2053528682810335e-06, "loss": 0.4916, "step": 9652 }, { "epoch": 0.5209109060493228, "grad_norm": 1.3514620088073783, "learning_rate": 5.204591616808805e-06, "loss": 0.6016, "step": 9653 }, { "epoch": 0.5209648696778372, "grad_norm": 0.7900433796031581, "learning_rate": 5.2038303738269125e-06, "loss": 0.406, "step": 9654 }, { "epoch": 0.5210188333063516, "grad_norm": 0.9605800611054289, "learning_rate": 5.203069139357233e-06, "loss": 0.5516, "step": 9655 }, { "epoch": 0.5210727969348659, "grad_norm": 1.0022009859103247, "learning_rate": 5.202307913421649e-06, "loss": 0.4124, "step": 9656 }, { "epoch": 0.5211267605633803, "grad_norm": 0.8831524478882585, "learning_rate": 5.201546696042033e-06, "loss": 0.3616, "step": 9657 }, { "epoch": 0.5211807241918947, "grad_norm": 0.9199655354610686, "learning_rate": 5.200785487240268e-06, "loss": 0.4455, "step": 9658 }, { "epoch": 0.521234687820409, "grad_norm": 1.0644316527177442, "learning_rate": 5.2000242870382316e-06, "loss": 0.5296, "step": 9659 }, { "epoch": 0.5212886514489234, "grad_norm": 0.8342310291393389, "learning_rate": 5.1992630954577984e-06, "loss": 0.3433, "step": 9660 }, { "epoch": 0.5213426150774378, "grad_norm": 1.0519974042422013, "learning_rate": 5.198501912520848e-06, "loss": 0.4637, "step": 9661 }, { "epoch": 0.5213965787059522, "grad_norm": 0.9568982919293628, "learning_rate": 5.197740738249258e-06, "loss": 0.4854, "step": 9662 }, { "epoch": 0.5214505423344665, "grad_norm": 0.9744712278976847, "learning_rate": 5.196979572664904e-06, "loss": 0.4107, "step": 9663 }, { "epoch": 0.5215045059629809, "grad_norm": 1.1361613026257185, "learning_rate": 5.196218415789663e-06, "loss": 0.6605, "step": 9664 }, { "epoch": 0.5215584695914953, "grad_norm": 0.9598370269010816, "learning_rate": 5.195457267645412e-06, "loss": 0.5433, "step": 9665 }, { "epoch": 0.5216124332200097, "grad_norm": 1.2191054239814343, "learning_rate": 5.194696128254028e-06, "loss": 0.5207, "step": 9666 }, { "epoch": 0.5216663968485241, "grad_norm": 1.1836060091430065, "learning_rate": 5.193934997637383e-06, "loss": 0.4466, "step": 9667 }, { "epoch": 0.5217203604770385, "grad_norm": 1.0235342200021762, "learning_rate": 5.193173875817358e-06, "loss": 0.4278, "step": 9668 }, { "epoch": 0.5217743241055529, "grad_norm": 1.0416863034567074, "learning_rate": 5.192412762815824e-06, "loss": 0.551, "step": 9669 }, { "epoch": 0.5218282877340672, "grad_norm": 0.9843409050045359, "learning_rate": 5.19165165865466e-06, "loss": 0.4125, "step": 9670 }, { "epoch": 0.5218822513625816, "grad_norm": 0.9027373064956576, "learning_rate": 5.190890563355738e-06, "loss": 0.3863, "step": 9671 }, { "epoch": 0.521936214991096, "grad_norm": 0.9916368384077192, "learning_rate": 5.190129476940934e-06, "loss": 0.6031, "step": 9672 }, { "epoch": 0.5219901786196104, "grad_norm": 1.152862172148206, "learning_rate": 5.189368399432121e-06, "loss": 0.6138, "step": 9673 }, { "epoch": 0.5220441422481248, "grad_norm": 1.0157933030915443, "learning_rate": 5.188607330851174e-06, "loss": 0.4258, "step": 9674 }, { "epoch": 0.5220981058766392, "grad_norm": 1.0182717921002815, "learning_rate": 5.187846271219967e-06, "loss": 0.3822, "step": 9675 }, { "epoch": 0.5221520695051535, "grad_norm": 1.2205893676101491, "learning_rate": 5.187085220560376e-06, "loss": 0.6016, "step": 9676 }, { "epoch": 0.5222060331336679, "grad_norm": 1.03013158869447, "learning_rate": 5.1863241788942695e-06, "loss": 0.462, "step": 9677 }, { "epoch": 0.5222599967621823, "grad_norm": 1.0468405134984857, "learning_rate": 5.185563146243523e-06, "loss": 0.518, "step": 9678 }, { "epoch": 0.5223139603906967, "grad_norm": 1.0541604364729629, "learning_rate": 5.184802122630009e-06, "loss": 0.6002, "step": 9679 }, { "epoch": 0.5223679240192111, "grad_norm": 1.0861210682141307, "learning_rate": 5.184041108075602e-06, "loss": 0.4807, "step": 9680 }, { "epoch": 0.5224218876477255, "grad_norm": 1.0760653494054, "learning_rate": 5.183280102602172e-06, "loss": 0.6313, "step": 9681 }, { "epoch": 0.5224758512762399, "grad_norm": 1.161952978821668, "learning_rate": 5.182519106231592e-06, "loss": 0.4907, "step": 9682 }, { "epoch": 0.5225298149047541, "grad_norm": 0.9485251956637524, "learning_rate": 5.181758118985735e-06, "loss": 0.4728, "step": 9683 }, { "epoch": 0.5225837785332685, "grad_norm": 0.9870468666583867, "learning_rate": 5.180997140886469e-06, "loss": 0.4828, "step": 9684 }, { "epoch": 0.5226377421617829, "grad_norm": 1.0064886256724106, "learning_rate": 5.180236171955668e-06, "loss": 0.4354, "step": 9685 }, { "epoch": 0.5226917057902973, "grad_norm": 0.9184619198469036, "learning_rate": 5.1794752122152035e-06, "loss": 0.4824, "step": 9686 }, { "epoch": 0.5227456694188117, "grad_norm": 1.1486371908113884, "learning_rate": 5.178714261686945e-06, "loss": 0.4845, "step": 9687 }, { "epoch": 0.5227996330473261, "grad_norm": 1.17066951596827, "learning_rate": 5.177953320392763e-06, "loss": 0.5198, "step": 9688 }, { "epoch": 0.5228535966758405, "grad_norm": 0.8143927021320968, "learning_rate": 5.177192388354527e-06, "loss": 0.3615, "step": 9689 }, { "epoch": 0.5229075603043548, "grad_norm": 1.1883570313485465, "learning_rate": 5.176431465594109e-06, "loss": 0.5521, "step": 9690 }, { "epoch": 0.5229615239328692, "grad_norm": 0.8149011046811768, "learning_rate": 5.175670552133378e-06, "loss": 0.4208, "step": 9691 }, { "epoch": 0.5230154875613836, "grad_norm": 1.0687523226023858, "learning_rate": 5.174909647994202e-06, "loss": 0.5109, "step": 9692 }, { "epoch": 0.523069451189898, "grad_norm": 1.1371724134509407, "learning_rate": 5.174148753198452e-06, "loss": 0.4535, "step": 9693 }, { "epoch": 0.5231234148184124, "grad_norm": 1.1033189258389058, "learning_rate": 5.173387867767995e-06, "loss": 0.5574, "step": 9694 }, { "epoch": 0.5231773784469268, "grad_norm": 0.9905204814640702, "learning_rate": 5.1726269917247015e-06, "loss": 0.3895, "step": 9695 }, { "epoch": 0.5232313420754412, "grad_norm": 0.7891119086685983, "learning_rate": 5.171866125090439e-06, "loss": 0.329, "step": 9696 }, { "epoch": 0.5232853057039555, "grad_norm": 1.0163090467758964, "learning_rate": 5.171105267887077e-06, "loss": 0.4421, "step": 9697 }, { "epoch": 0.5233392693324699, "grad_norm": 0.9143440246336719, "learning_rate": 5.170344420136481e-06, "loss": 0.3859, "step": 9698 }, { "epoch": 0.5233932329609843, "grad_norm": 0.9692121901936291, "learning_rate": 5.169583581860519e-06, "loss": 0.6322, "step": 9699 }, { "epoch": 0.5234471965894987, "grad_norm": 0.6842032840651432, "learning_rate": 5.168822753081058e-06, "loss": 0.3103, "step": 9700 }, { "epoch": 0.5235011602180131, "grad_norm": 0.9378063048005031, "learning_rate": 5.168061933819966e-06, "loss": 0.4271, "step": 9701 }, { "epoch": 0.5235551238465275, "grad_norm": 1.0662412278005384, "learning_rate": 5.16730112409911e-06, "loss": 0.5343, "step": 9702 }, { "epoch": 0.5236090874750419, "grad_norm": 0.967490425313805, "learning_rate": 5.166540323940356e-06, "loss": 0.4082, "step": 9703 }, { "epoch": 0.5236630511035562, "grad_norm": 0.9505329831695838, "learning_rate": 5.16577953336557e-06, "loss": 0.4676, "step": 9704 }, { "epoch": 0.5237170147320706, "grad_norm": 0.9684178693793721, "learning_rate": 5.165018752396618e-06, "loss": 0.3817, "step": 9705 }, { "epoch": 0.523770978360585, "grad_norm": 1.1749223918869116, "learning_rate": 5.164257981055365e-06, "loss": 0.6451, "step": 9706 }, { "epoch": 0.5238249419890993, "grad_norm": 1.034066185665713, "learning_rate": 5.163497219363677e-06, "loss": 0.426, "step": 9707 }, { "epoch": 0.5238789056176137, "grad_norm": 0.9465132270021277, "learning_rate": 5.1627364673434185e-06, "loss": 0.4616, "step": 9708 }, { "epoch": 0.5239328692461281, "grad_norm": 1.153542668255986, "learning_rate": 5.161975725016455e-06, "loss": 0.4784, "step": 9709 }, { "epoch": 0.5239868328746425, "grad_norm": 1.156759960239962, "learning_rate": 5.1612149924046505e-06, "loss": 0.5767, "step": 9710 }, { "epoch": 0.5240407965031568, "grad_norm": 0.8827901210162064, "learning_rate": 5.160454269529871e-06, "loss": 0.3561, "step": 9711 }, { "epoch": 0.5240947601316712, "grad_norm": 0.9261287038070382, "learning_rate": 5.159693556413977e-06, "loss": 0.5562, "step": 9712 }, { "epoch": 0.5241487237601856, "grad_norm": 1.0377818799449898, "learning_rate": 5.158932853078835e-06, "loss": 0.4596, "step": 9713 }, { "epoch": 0.5242026873887, "grad_norm": 1.032974823125582, "learning_rate": 5.158172159546308e-06, "loss": 0.4947, "step": 9714 }, { "epoch": 0.5242566510172144, "grad_norm": 1.050730777489599, "learning_rate": 5.157411475838256e-06, "loss": 0.5466, "step": 9715 }, { "epoch": 0.5243106146457288, "grad_norm": 1.267960430937756, "learning_rate": 5.1566508019765475e-06, "loss": 0.5167, "step": 9716 }, { "epoch": 0.5243645782742432, "grad_norm": 0.9730603581028664, "learning_rate": 5.15589013798304e-06, "loss": 0.5152, "step": 9717 }, { "epoch": 0.5244185419027575, "grad_norm": 1.1921470184726675, "learning_rate": 5.155129483879599e-06, "loss": 0.5117, "step": 9718 }, { "epoch": 0.5244725055312719, "grad_norm": 0.9014781575035112, "learning_rate": 5.154368839688084e-06, "loss": 0.4296, "step": 9719 }, { "epoch": 0.5245264691597863, "grad_norm": 1.0229823205713489, "learning_rate": 5.153608205430357e-06, "loss": 0.4074, "step": 9720 }, { "epoch": 0.5245804327883007, "grad_norm": 0.9435346840281509, "learning_rate": 5.152847581128282e-06, "loss": 0.4303, "step": 9721 }, { "epoch": 0.5246343964168151, "grad_norm": 0.9785791051136097, "learning_rate": 5.152086966803716e-06, "loss": 0.4187, "step": 9722 }, { "epoch": 0.5246883600453295, "grad_norm": 1.1992421363377896, "learning_rate": 5.151326362478525e-06, "loss": 0.6337, "step": 9723 }, { "epoch": 0.5247423236738439, "grad_norm": 1.150152440823792, "learning_rate": 5.150565768174565e-06, "loss": 0.456, "step": 9724 }, { "epoch": 0.5247962873023582, "grad_norm": 1.137896889556343, "learning_rate": 5.1498051839137e-06, "loss": 0.4635, "step": 9725 }, { "epoch": 0.5248502509308726, "grad_norm": 0.9788570605792491, "learning_rate": 5.149044609717785e-06, "loss": 0.5053, "step": 9726 }, { "epoch": 0.524904214559387, "grad_norm": 0.9961449611766471, "learning_rate": 5.148284045608684e-06, "loss": 0.4729, "step": 9727 }, { "epoch": 0.5249581781879014, "grad_norm": 1.0248468072717873, "learning_rate": 5.147523491608253e-06, "loss": 0.4127, "step": 9728 }, { "epoch": 0.5250121418164158, "grad_norm": 0.9235939572103329, "learning_rate": 5.146762947738354e-06, "loss": 0.4414, "step": 9729 }, { "epoch": 0.5250661054449302, "grad_norm": 1.1854696067097619, "learning_rate": 5.1460024140208455e-06, "loss": 0.5815, "step": 9730 }, { "epoch": 0.5251200690734446, "grad_norm": 1.1380335324446935, "learning_rate": 5.145241890477585e-06, "loss": 0.5735, "step": 9731 }, { "epoch": 0.5251740327019588, "grad_norm": 0.96580988556463, "learning_rate": 5.144481377130432e-06, "loss": 0.4909, "step": 9732 }, { "epoch": 0.5252279963304732, "grad_norm": 0.9923326802890604, "learning_rate": 5.143720874001243e-06, "loss": 0.3871, "step": 9733 }, { "epoch": 0.5252819599589876, "grad_norm": 1.056261914166459, "learning_rate": 5.142960381111875e-06, "loss": 0.4823, "step": 9734 }, { "epoch": 0.525335923587502, "grad_norm": 0.9262506560552561, "learning_rate": 5.142199898484187e-06, "loss": 0.3555, "step": 9735 }, { "epoch": 0.5253898872160164, "grad_norm": 1.1033609192732796, "learning_rate": 5.141439426140035e-06, "loss": 0.5104, "step": 9736 }, { "epoch": 0.5254438508445308, "grad_norm": 0.8725494373337509, "learning_rate": 5.1406789641012764e-06, "loss": 0.3962, "step": 9737 }, { "epoch": 0.5254978144730452, "grad_norm": 0.8399309984908676, "learning_rate": 5.139918512389768e-06, "loss": 0.2853, "step": 9738 }, { "epoch": 0.5255517781015595, "grad_norm": 1.3021287229923415, "learning_rate": 5.139158071027366e-06, "loss": 0.585, "step": 9739 }, { "epoch": 0.5256057417300739, "grad_norm": 1.116731359758357, "learning_rate": 5.138397640035924e-06, "loss": 0.536, "step": 9740 }, { "epoch": 0.5256597053585883, "grad_norm": 0.9405679181978963, "learning_rate": 5.1376372194372996e-06, "loss": 0.5167, "step": 9741 }, { "epoch": 0.5257136689871027, "grad_norm": 1.0816966282508211, "learning_rate": 5.136876809253348e-06, "loss": 0.5286, "step": 9742 }, { "epoch": 0.5257676326156171, "grad_norm": 0.9043627126847292, "learning_rate": 5.136116409505924e-06, "loss": 0.3525, "step": 9743 }, { "epoch": 0.5258215962441315, "grad_norm": 1.0831634123468201, "learning_rate": 5.13535602021688e-06, "loss": 0.4502, "step": 9744 }, { "epoch": 0.5258755598726458, "grad_norm": 1.00175676361393, "learning_rate": 5.1345956414080764e-06, "loss": 0.4922, "step": 9745 }, { "epoch": 0.5259295235011602, "grad_norm": 1.2639944575337376, "learning_rate": 5.133835273101362e-06, "loss": 0.5872, "step": 9746 }, { "epoch": 0.5259834871296746, "grad_norm": 0.8658766603647664, "learning_rate": 5.13307491531859e-06, "loss": 0.3746, "step": 9747 }, { "epoch": 0.526037450758189, "grad_norm": 0.9334430512708828, "learning_rate": 5.132314568081617e-06, "loss": 0.3774, "step": 9748 }, { "epoch": 0.5260914143867034, "grad_norm": 1.029208643965735, "learning_rate": 5.131554231412293e-06, "loss": 0.4748, "step": 9749 }, { "epoch": 0.5261453780152178, "grad_norm": 1.2091086446753636, "learning_rate": 5.130793905332475e-06, "loss": 0.4154, "step": 9750 }, { "epoch": 0.5261993416437322, "grad_norm": 0.9722527354998728, "learning_rate": 5.1300335898640115e-06, "loss": 0.3803, "step": 9751 }, { "epoch": 0.5262533052722465, "grad_norm": 0.8565574203080233, "learning_rate": 5.12927328502876e-06, "loss": 0.4375, "step": 9752 }, { "epoch": 0.5263072689007608, "grad_norm": 1.0021909028308913, "learning_rate": 5.128512990848565e-06, "loss": 0.4623, "step": 9753 }, { "epoch": 0.5263612325292752, "grad_norm": 1.0879252995567534, "learning_rate": 5.127752707345284e-06, "loss": 0.5057, "step": 9754 }, { "epoch": 0.5264151961577896, "grad_norm": 1.0638904576507002, "learning_rate": 5.126992434540764e-06, "loss": 0.5198, "step": 9755 }, { "epoch": 0.526469159786304, "grad_norm": 1.0222525476895574, "learning_rate": 5.126232172456861e-06, "loss": 0.4152, "step": 9756 }, { "epoch": 0.5265231234148184, "grad_norm": 1.276592011051086, "learning_rate": 5.1254719211154205e-06, "loss": 0.5514, "step": 9757 }, { "epoch": 0.5265770870433328, "grad_norm": 1.0060512533536143, "learning_rate": 5.1247116805382965e-06, "loss": 0.5332, "step": 9758 }, { "epoch": 0.5266310506718471, "grad_norm": 1.2220599719406897, "learning_rate": 5.12395145074734e-06, "loss": 0.3746, "step": 9759 }, { "epoch": 0.5266850143003615, "grad_norm": 0.9574842687567705, "learning_rate": 5.123191231764396e-06, "loss": 0.4802, "step": 9760 }, { "epoch": 0.5267389779288759, "grad_norm": 1.5667954497497976, "learning_rate": 5.122431023611319e-06, "loss": 0.6564, "step": 9761 }, { "epoch": 0.5267929415573903, "grad_norm": 1.075828019871153, "learning_rate": 5.121670826309954e-06, "loss": 0.4728, "step": 9762 }, { "epoch": 0.5268469051859047, "grad_norm": 1.3283465965055488, "learning_rate": 5.120910639882153e-06, "loss": 0.5628, "step": 9763 }, { "epoch": 0.5269008688144191, "grad_norm": 1.2881249581082195, "learning_rate": 5.120150464349762e-06, "loss": 0.5282, "step": 9764 }, { "epoch": 0.5269548324429335, "grad_norm": 0.9648565593053944, "learning_rate": 5.119390299734631e-06, "loss": 0.3575, "step": 9765 }, { "epoch": 0.5270087960714478, "grad_norm": 0.8737912074238181, "learning_rate": 5.118630146058608e-06, "loss": 0.3233, "step": 9766 }, { "epoch": 0.5270627596999622, "grad_norm": 0.9581093052205283, "learning_rate": 5.117870003343539e-06, "loss": 0.4413, "step": 9767 }, { "epoch": 0.5271167233284766, "grad_norm": 1.1056816767299782, "learning_rate": 5.117109871611274e-06, "loss": 0.5592, "step": 9768 }, { "epoch": 0.527170686956991, "grad_norm": 1.219521000045024, "learning_rate": 5.116349750883656e-06, "loss": 0.5524, "step": 9769 }, { "epoch": 0.5272246505855054, "grad_norm": 1.0594574157616343, "learning_rate": 5.115589641182534e-06, "loss": 0.5359, "step": 9770 }, { "epoch": 0.5272786142140198, "grad_norm": 1.0074772940578125, "learning_rate": 5.114829542529754e-06, "loss": 0.4576, "step": 9771 }, { "epoch": 0.5273325778425342, "grad_norm": 1.0455002313685924, "learning_rate": 5.1140694549471635e-06, "loss": 0.4928, "step": 9772 }, { "epoch": 0.5273865414710485, "grad_norm": 0.7869968859913611, "learning_rate": 5.113309378456608e-06, "loss": 0.3415, "step": 9773 }, { "epoch": 0.5274405050995629, "grad_norm": 1.315459094930469, "learning_rate": 5.11254931307993e-06, "loss": 0.5753, "step": 9774 }, { "epoch": 0.5274944687280773, "grad_norm": 0.9700689783685743, "learning_rate": 5.111789258838975e-06, "loss": 0.4855, "step": 9775 }, { "epoch": 0.5275484323565917, "grad_norm": 1.1195646080759265, "learning_rate": 5.111029215755591e-06, "loss": 0.4962, "step": 9776 }, { "epoch": 0.527602395985106, "grad_norm": 0.861425438986499, "learning_rate": 5.1102691838516185e-06, "loss": 0.3528, "step": 9777 }, { "epoch": 0.5276563596136205, "grad_norm": 1.1611344618701709, "learning_rate": 5.109509163148904e-06, "loss": 0.6867, "step": 9778 }, { "epoch": 0.5277103232421348, "grad_norm": 0.9277800859284727, "learning_rate": 5.108749153669291e-06, "loss": 0.4409, "step": 9779 }, { "epoch": 0.5277642868706491, "grad_norm": 0.9421141034002796, "learning_rate": 5.107989155434625e-06, "loss": 0.3411, "step": 9780 }, { "epoch": 0.5278182504991635, "grad_norm": 0.9459179862262426, "learning_rate": 5.107229168466744e-06, "loss": 0.4397, "step": 9781 }, { "epoch": 0.5278722141276779, "grad_norm": 0.8666449002820407, "learning_rate": 5.106469192787494e-06, "loss": 0.4316, "step": 9782 }, { "epoch": 0.5279261777561923, "grad_norm": 0.9954097631324498, "learning_rate": 5.105709228418717e-06, "loss": 0.5242, "step": 9783 }, { "epoch": 0.5279801413847067, "grad_norm": 0.9910910667758349, "learning_rate": 5.104949275382255e-06, "loss": 0.4613, "step": 9784 }, { "epoch": 0.5280341050132211, "grad_norm": 0.9236007786414026, "learning_rate": 5.10418933369995e-06, "loss": 0.4041, "step": 9785 }, { "epoch": 0.5280880686417355, "grad_norm": 0.8410941077246671, "learning_rate": 5.1034294033936435e-06, "loss": 0.3278, "step": 9786 }, { "epoch": 0.5281420322702498, "grad_norm": 0.9254965677205332, "learning_rate": 5.102669484485177e-06, "loss": 0.3481, "step": 9787 }, { "epoch": 0.5281959958987642, "grad_norm": 1.1727121435124424, "learning_rate": 5.101909576996392e-06, "loss": 0.7748, "step": 9788 }, { "epoch": 0.5282499595272786, "grad_norm": 0.9330127096834071, "learning_rate": 5.101149680949127e-06, "loss": 0.4355, "step": 9789 }, { "epoch": 0.528303923155793, "grad_norm": 1.0660258403400662, "learning_rate": 5.100389796365223e-06, "loss": 0.5036, "step": 9790 }, { "epoch": 0.5283578867843074, "grad_norm": 1.2624947285709311, "learning_rate": 5.099629923266519e-06, "loss": 0.6403, "step": 9791 }, { "epoch": 0.5284118504128218, "grad_norm": 0.9868363276105038, "learning_rate": 5.0988700616748575e-06, "loss": 0.416, "step": 9792 }, { "epoch": 0.5284658140413362, "grad_norm": 0.8434235886846871, "learning_rate": 5.098110211612075e-06, "loss": 0.3119, "step": 9793 }, { "epoch": 0.5285197776698505, "grad_norm": 1.0241521899178454, "learning_rate": 5.097350373100012e-06, "loss": 0.4987, "step": 9794 }, { "epoch": 0.5285737412983649, "grad_norm": 1.1017898221716476, "learning_rate": 5.096590546160505e-06, "loss": 0.4884, "step": 9795 }, { "epoch": 0.5286277049268793, "grad_norm": 0.9052401377852753, "learning_rate": 5.095830730815393e-06, "loss": 0.4229, "step": 9796 }, { "epoch": 0.5286816685553937, "grad_norm": 1.091726520591521, "learning_rate": 5.095070927086515e-06, "loss": 0.4981, "step": 9797 }, { "epoch": 0.5287356321839081, "grad_norm": 1.129908766802422, "learning_rate": 5.094311134995707e-06, "loss": 0.4722, "step": 9798 }, { "epoch": 0.5287895958124225, "grad_norm": 1.1067903806506734, "learning_rate": 5.093551354564808e-06, "loss": 0.5548, "step": 9799 }, { "epoch": 0.5288435594409369, "grad_norm": 1.164834368183249, "learning_rate": 5.092791585815652e-06, "loss": 0.5298, "step": 9800 }, { "epoch": 0.5288975230694511, "grad_norm": 0.9318925025580266, "learning_rate": 5.0920318287700795e-06, "loss": 0.3744, "step": 9801 }, { "epoch": 0.5289514866979655, "grad_norm": 1.011214731257614, "learning_rate": 5.091272083449923e-06, "loss": 0.4661, "step": 9802 }, { "epoch": 0.5290054503264799, "grad_norm": 1.0672583452064317, "learning_rate": 5.09051234987702e-06, "loss": 0.4989, "step": 9803 }, { "epoch": 0.5290594139549943, "grad_norm": 1.1166791342204359, "learning_rate": 5.089752628073205e-06, "loss": 0.4372, "step": 9804 }, { "epoch": 0.5291133775835087, "grad_norm": 1.0902932370358502, "learning_rate": 5.088992918060316e-06, "loss": 0.4328, "step": 9805 }, { "epoch": 0.5291673412120231, "grad_norm": 1.0634458562082594, "learning_rate": 5.088233219860185e-06, "loss": 0.4843, "step": 9806 }, { "epoch": 0.5292213048405375, "grad_norm": 1.0908089063490158, "learning_rate": 5.087473533494646e-06, "loss": 0.4787, "step": 9807 }, { "epoch": 0.5292752684690518, "grad_norm": 0.8399636273841661, "learning_rate": 5.086713858985537e-06, "loss": 0.3228, "step": 9808 }, { "epoch": 0.5293292320975662, "grad_norm": 0.7762796229377825, "learning_rate": 5.085954196354688e-06, "loss": 0.435, "step": 9809 }, { "epoch": 0.5293831957260806, "grad_norm": 1.0468609322474196, "learning_rate": 5.085194545623933e-06, "loss": 0.4905, "step": 9810 }, { "epoch": 0.529437159354595, "grad_norm": 1.0231173852644253, "learning_rate": 5.084434906815107e-06, "loss": 0.5216, "step": 9811 }, { "epoch": 0.5294911229831094, "grad_norm": 0.895246879334178, "learning_rate": 5.083675279950041e-06, "loss": 0.4641, "step": 9812 }, { "epoch": 0.5295450866116238, "grad_norm": 1.1319540334632623, "learning_rate": 5.0829156650505675e-06, "loss": 0.4614, "step": 9813 }, { "epoch": 0.5295990502401381, "grad_norm": 1.103725361563143, "learning_rate": 5.08215606213852e-06, "loss": 0.401, "step": 9814 }, { "epoch": 0.5296530138686525, "grad_norm": 0.8238025325801022, "learning_rate": 5.08139647123573e-06, "loss": 0.3562, "step": 9815 }, { "epoch": 0.5297069774971669, "grad_norm": 0.8809782446819693, "learning_rate": 5.080636892364027e-06, "loss": 0.3809, "step": 9816 }, { "epoch": 0.5297609411256813, "grad_norm": 1.0699923369727389, "learning_rate": 5.079877325545245e-06, "loss": 0.4714, "step": 9817 }, { "epoch": 0.5298149047541957, "grad_norm": 1.2589858822775974, "learning_rate": 5.079117770801212e-06, "loss": 0.5853, "step": 9818 }, { "epoch": 0.5298688683827101, "grad_norm": 1.0082969473806576, "learning_rate": 5.078358228153759e-06, "loss": 0.4772, "step": 9819 }, { "epoch": 0.5299228320112245, "grad_norm": 0.7968675671658431, "learning_rate": 5.077598697624718e-06, "loss": 0.3038, "step": 9820 }, { "epoch": 0.5299767956397388, "grad_norm": 1.200072866757801, "learning_rate": 5.076839179235918e-06, "loss": 0.6611, "step": 9821 }, { "epoch": 0.5300307592682532, "grad_norm": 0.9198703527120782, "learning_rate": 5.076079673009187e-06, "loss": 0.4199, "step": 9822 }, { "epoch": 0.5300847228967676, "grad_norm": 0.8070481286233315, "learning_rate": 5.075320178966353e-06, "loss": 0.3776, "step": 9823 }, { "epoch": 0.530138686525282, "grad_norm": 1.2011079495854977, "learning_rate": 5.074560697129247e-06, "loss": 0.5155, "step": 9824 }, { "epoch": 0.5301926501537964, "grad_norm": 0.9693674232436924, "learning_rate": 5.073801227519697e-06, "loss": 0.5198, "step": 9825 }, { "epoch": 0.5302466137823107, "grad_norm": 0.9793365794881753, "learning_rate": 5.073041770159529e-06, "loss": 0.4599, "step": 9826 }, { "epoch": 0.5303005774108251, "grad_norm": 1.1465256907083443, "learning_rate": 5.072282325070573e-06, "loss": 0.4597, "step": 9827 }, { "epoch": 0.5303545410393394, "grad_norm": 1.19222344909917, "learning_rate": 5.0715228922746565e-06, "loss": 0.9406, "step": 9828 }, { "epoch": 0.5304085046678538, "grad_norm": 0.8308316256432817, "learning_rate": 5.070763471793603e-06, "loss": 0.4297, "step": 9829 }, { "epoch": 0.5304624682963682, "grad_norm": 1.032557663753453, "learning_rate": 5.070004063649241e-06, "loss": 0.4065, "step": 9830 }, { "epoch": 0.5305164319248826, "grad_norm": 0.9130205273050845, "learning_rate": 5.069244667863397e-06, "loss": 0.3661, "step": 9831 }, { "epoch": 0.530570395553397, "grad_norm": 1.2980879046068137, "learning_rate": 5.068485284457897e-06, "loss": 0.5156, "step": 9832 }, { "epoch": 0.5306243591819114, "grad_norm": 1.1081940422911116, "learning_rate": 5.067725913454565e-06, "loss": 0.5407, "step": 9833 }, { "epoch": 0.5306783228104258, "grad_norm": 0.9250727196067227, "learning_rate": 5.066966554875228e-06, "loss": 0.4882, "step": 9834 }, { "epoch": 0.5307322864389401, "grad_norm": 1.233912346123534, "learning_rate": 5.06620720874171e-06, "loss": 0.4335, "step": 9835 }, { "epoch": 0.5307862500674545, "grad_norm": 1.249312703962299, "learning_rate": 5.065447875075835e-06, "loss": 0.588, "step": 9836 }, { "epoch": 0.5308402136959689, "grad_norm": 1.0927634448580101, "learning_rate": 5.0646885538994264e-06, "loss": 0.5695, "step": 9837 }, { "epoch": 0.5308941773244833, "grad_norm": 0.990576666123403, "learning_rate": 5.063929245234309e-06, "loss": 0.5136, "step": 9838 }, { "epoch": 0.5309481409529977, "grad_norm": 0.8747830891475921, "learning_rate": 5.063169949102306e-06, "loss": 0.4227, "step": 9839 }, { "epoch": 0.5310021045815121, "grad_norm": 1.050525506360563, "learning_rate": 5.062410665525239e-06, "loss": 0.5972, "step": 9840 }, { "epoch": 0.5310560682100265, "grad_norm": 1.0301176009511661, "learning_rate": 5.061651394524932e-06, "loss": 0.4896, "step": 9841 }, { "epoch": 0.5311100318385408, "grad_norm": 1.0053303710032866, "learning_rate": 5.060892136123208e-06, "loss": 0.5311, "step": 9842 }, { "epoch": 0.5311639954670552, "grad_norm": 1.0620610276914073, "learning_rate": 5.060132890341887e-06, "loss": 0.4438, "step": 9843 }, { "epoch": 0.5312179590955696, "grad_norm": 1.1317969337548908, "learning_rate": 5.0593736572027905e-06, "loss": 0.4412, "step": 9844 }, { "epoch": 0.531271922724084, "grad_norm": 1.0404419773815328, "learning_rate": 5.05861443672774e-06, "loss": 0.5023, "step": 9845 }, { "epoch": 0.5313258863525984, "grad_norm": 1.0141537173468835, "learning_rate": 5.0578552289385575e-06, "loss": 0.4462, "step": 9846 }, { "epoch": 0.5313798499811128, "grad_norm": 0.9401250978202378, "learning_rate": 5.0570960338570615e-06, "loss": 0.463, "step": 9847 }, { "epoch": 0.5314338136096272, "grad_norm": 1.1221144048164005, "learning_rate": 5.056336851505075e-06, "loss": 0.4806, "step": 9848 }, { "epoch": 0.5314877772381414, "grad_norm": 1.1534553205309213, "learning_rate": 5.055577681904415e-06, "loss": 0.5452, "step": 9849 }, { "epoch": 0.5315417408666558, "grad_norm": 1.2663836573677207, "learning_rate": 5.054818525076901e-06, "loss": 0.4412, "step": 9850 }, { "epoch": 0.5315957044951702, "grad_norm": 1.0019763584537527, "learning_rate": 5.054059381044353e-06, "loss": 0.3483, "step": 9851 }, { "epoch": 0.5316496681236846, "grad_norm": 1.1120904249583754, "learning_rate": 5.053300249828588e-06, "loss": 0.6354, "step": 9852 }, { "epoch": 0.531703631752199, "grad_norm": 0.9386835938236144, "learning_rate": 5.052541131451425e-06, "loss": 0.4406, "step": 9853 }, { "epoch": 0.5317575953807134, "grad_norm": 0.9317790243009573, "learning_rate": 5.051782025934683e-06, "loss": 0.4363, "step": 9854 }, { "epoch": 0.5318115590092278, "grad_norm": 1.0522881030211095, "learning_rate": 5.051022933300178e-06, "loss": 0.5451, "step": 9855 }, { "epoch": 0.5318655226377421, "grad_norm": 0.8655048394895103, "learning_rate": 5.050263853569729e-06, "loss": 0.4232, "step": 9856 }, { "epoch": 0.5319194862662565, "grad_norm": 0.9568939782338598, "learning_rate": 5.049504786765149e-06, "loss": 0.3889, "step": 9857 }, { "epoch": 0.5319734498947709, "grad_norm": 1.1367502463525687, "learning_rate": 5.048745732908258e-06, "loss": 0.5957, "step": 9858 }, { "epoch": 0.5320274135232853, "grad_norm": 0.7633109806817424, "learning_rate": 5.047986692020869e-06, "loss": 0.3029, "step": 9859 }, { "epoch": 0.5320813771517997, "grad_norm": 0.8417295438660892, "learning_rate": 5.047227664124801e-06, "loss": 0.3256, "step": 9860 }, { "epoch": 0.5321353407803141, "grad_norm": 1.068478106083507, "learning_rate": 5.046468649241867e-06, "loss": 0.4265, "step": 9861 }, { "epoch": 0.5321893044088285, "grad_norm": 1.1689198391610829, "learning_rate": 5.045709647393882e-06, "loss": 0.49, "step": 9862 }, { "epoch": 0.5322432680373428, "grad_norm": 1.0050888489163547, "learning_rate": 5.044950658602661e-06, "loss": 0.4901, "step": 9863 }, { "epoch": 0.5322972316658572, "grad_norm": 0.9339358575043493, "learning_rate": 5.044191682890017e-06, "loss": 0.4808, "step": 9864 }, { "epoch": 0.5323511952943716, "grad_norm": 1.0027021901464566, "learning_rate": 5.043432720277766e-06, "loss": 0.4548, "step": 9865 }, { "epoch": 0.532405158922886, "grad_norm": 1.030074293410242, "learning_rate": 5.042673770787718e-06, "loss": 0.4445, "step": 9866 }, { "epoch": 0.5324591225514004, "grad_norm": 1.1744694775459374, "learning_rate": 5.04191483444169e-06, "loss": 0.6395, "step": 9867 }, { "epoch": 0.5325130861799148, "grad_norm": 1.0277624429924037, "learning_rate": 5.041155911261492e-06, "loss": 0.451, "step": 9868 }, { "epoch": 0.5325670498084292, "grad_norm": 1.1838013682332493, "learning_rate": 5.040397001268936e-06, "loss": 0.4901, "step": 9869 }, { "epoch": 0.5326210134369435, "grad_norm": 0.9336277616702371, "learning_rate": 5.039638104485835e-06, "loss": 0.3854, "step": 9870 }, { "epoch": 0.5326749770654579, "grad_norm": 0.9841520244418016, "learning_rate": 5.038879220934e-06, "loss": 0.4295, "step": 9871 }, { "epoch": 0.5327289406939723, "grad_norm": 1.3609108323575496, "learning_rate": 5.038120350635241e-06, "loss": 0.645, "step": 9872 }, { "epoch": 0.5327829043224867, "grad_norm": 1.1932349376146079, "learning_rate": 5.0373614936113725e-06, "loss": 0.6341, "step": 9873 }, { "epoch": 0.532836867951001, "grad_norm": 0.9936516635062216, "learning_rate": 5.0366026498842e-06, "loss": 0.5415, "step": 9874 }, { "epoch": 0.5328908315795154, "grad_norm": 1.2946549071647346, "learning_rate": 5.035843819475535e-06, "loss": 0.6859, "step": 9875 }, { "epoch": 0.5329447952080298, "grad_norm": 0.8782416467466894, "learning_rate": 5.03508500240719e-06, "loss": 0.4893, "step": 9876 }, { "epoch": 0.5329987588365441, "grad_norm": 0.8622742496677499, "learning_rate": 5.034326198700972e-06, "loss": 0.4459, "step": 9877 }, { "epoch": 0.5330527224650585, "grad_norm": 1.001155572713495, "learning_rate": 5.033567408378688e-06, "loss": 0.5214, "step": 9878 }, { "epoch": 0.5331066860935729, "grad_norm": 0.9413203695713206, "learning_rate": 5.032808631462147e-06, "loss": 0.5098, "step": 9879 }, { "epoch": 0.5331606497220873, "grad_norm": 0.9853599885948897, "learning_rate": 5.032049867973159e-06, "loss": 0.5277, "step": 9880 }, { "epoch": 0.5332146133506017, "grad_norm": 0.8919222299896387, "learning_rate": 5.031291117933531e-06, "loss": 0.3975, "step": 9881 }, { "epoch": 0.5332685769791161, "grad_norm": 0.8801126447326915, "learning_rate": 5.030532381365069e-06, "loss": 0.4566, "step": 9882 }, { "epoch": 0.5333225406076304, "grad_norm": 0.8732902618164328, "learning_rate": 5.029773658289583e-06, "loss": 0.415, "step": 9883 }, { "epoch": 0.5333765042361448, "grad_norm": 0.8632648443910056, "learning_rate": 5.029014948728875e-06, "loss": 0.3298, "step": 9884 }, { "epoch": 0.5334304678646592, "grad_norm": 1.0198642654019867, "learning_rate": 5.028256252704754e-06, "loss": 0.4438, "step": 9885 }, { "epoch": 0.5334844314931736, "grad_norm": 0.921745908386737, "learning_rate": 5.027497570239024e-06, "loss": 0.3589, "step": 9886 }, { "epoch": 0.533538395121688, "grad_norm": 1.1292652179956546, "learning_rate": 5.026738901353493e-06, "loss": 0.5313, "step": 9887 }, { "epoch": 0.5335923587502024, "grad_norm": 1.2291531847651713, "learning_rate": 5.025980246069963e-06, "loss": 0.5267, "step": 9888 }, { "epoch": 0.5336463223787168, "grad_norm": 1.2463751830352283, "learning_rate": 5.025221604410239e-06, "loss": 0.6619, "step": 9889 }, { "epoch": 0.5337002860072311, "grad_norm": 1.0508197961317378, "learning_rate": 5.024462976396127e-06, "loss": 0.4837, "step": 9890 }, { "epoch": 0.5337542496357455, "grad_norm": 1.3351591006918475, "learning_rate": 5.02370436204943e-06, "loss": 0.6931, "step": 9891 }, { "epoch": 0.5338082132642599, "grad_norm": 1.2738846483289565, "learning_rate": 5.022945761391949e-06, "loss": 0.6689, "step": 9892 }, { "epoch": 0.5338621768927743, "grad_norm": 1.003723637203252, "learning_rate": 5.02218717444549e-06, "loss": 0.385, "step": 9893 }, { "epoch": 0.5339161405212887, "grad_norm": 0.8228043027279296, "learning_rate": 5.021428601231854e-06, "loss": 0.3498, "step": 9894 }, { "epoch": 0.5339701041498031, "grad_norm": 0.9422487904435871, "learning_rate": 5.020670041772844e-06, "loss": 0.3991, "step": 9895 }, { "epoch": 0.5340240677783175, "grad_norm": 1.072372447185218, "learning_rate": 5.019911496090261e-06, "loss": 0.4389, "step": 9896 }, { "epoch": 0.5340780314068317, "grad_norm": 1.0247539251499445, "learning_rate": 5.019152964205909e-06, "loss": 0.4601, "step": 9897 }, { "epoch": 0.5341319950353461, "grad_norm": 0.9346400612150642, "learning_rate": 5.018394446141584e-06, "loss": 0.4112, "step": 9898 }, { "epoch": 0.5341859586638605, "grad_norm": 1.0157134136928683, "learning_rate": 5.01763594191909e-06, "loss": 0.4248, "step": 9899 }, { "epoch": 0.5342399222923749, "grad_norm": 1.1000510907534347, "learning_rate": 5.016877451560226e-06, "loss": 0.5227, "step": 9900 }, { "epoch": 0.5342938859208893, "grad_norm": 0.9351609773266695, "learning_rate": 5.016118975086792e-06, "loss": 0.3496, "step": 9901 }, { "epoch": 0.5343478495494037, "grad_norm": 0.9744558684718088, "learning_rate": 5.015360512520588e-06, "loss": 0.3825, "step": 9902 }, { "epoch": 0.5344018131779181, "grad_norm": 0.9809932430668968, "learning_rate": 5.014602063883413e-06, "loss": 0.4075, "step": 9903 }, { "epoch": 0.5344557768064324, "grad_norm": 1.0857427377407478, "learning_rate": 5.013843629197066e-06, "loss": 0.5079, "step": 9904 }, { "epoch": 0.5345097404349468, "grad_norm": 1.2321502834617455, "learning_rate": 5.0130852084833425e-06, "loss": 0.639, "step": 9905 }, { "epoch": 0.5345637040634612, "grad_norm": 0.9583361535841615, "learning_rate": 5.012326801764043e-06, "loss": 0.4372, "step": 9906 }, { "epoch": 0.5346176676919756, "grad_norm": 1.1510542377943596, "learning_rate": 5.011568409060963e-06, "loss": 0.5659, "step": 9907 }, { "epoch": 0.53467163132049, "grad_norm": 0.9507329476502328, "learning_rate": 5.0108100303959e-06, "loss": 0.5195, "step": 9908 }, { "epoch": 0.5347255949490044, "grad_norm": 1.1224062928378957, "learning_rate": 5.010051665790653e-06, "loss": 0.5348, "step": 9909 }, { "epoch": 0.5347795585775188, "grad_norm": 1.065135566106261, "learning_rate": 5.009293315267015e-06, "loss": 0.4981, "step": 9910 }, { "epoch": 0.5348335222060331, "grad_norm": 0.9873706183018753, "learning_rate": 5.008534978846785e-06, "loss": 0.3475, "step": 9911 }, { "epoch": 0.5348874858345475, "grad_norm": 0.8889299051670229, "learning_rate": 5.007776656551755e-06, "loss": 0.4004, "step": 9912 }, { "epoch": 0.5349414494630619, "grad_norm": 0.828539413485709, "learning_rate": 5.00701834840372e-06, "loss": 0.4221, "step": 9913 }, { "epoch": 0.5349954130915763, "grad_norm": 0.8820196211274777, "learning_rate": 5.006260054424477e-06, "loss": 0.4072, "step": 9914 }, { "epoch": 0.5350493767200907, "grad_norm": 1.036560531293208, "learning_rate": 5.005501774635818e-06, "loss": 0.4643, "step": 9915 }, { "epoch": 0.5351033403486051, "grad_norm": 0.9400599873703808, "learning_rate": 5.004743509059538e-06, "loss": 0.5429, "step": 9916 }, { "epoch": 0.5351573039771195, "grad_norm": 0.9222972212389214, "learning_rate": 5.003985257717429e-06, "loss": 0.5607, "step": 9917 }, { "epoch": 0.5352112676056338, "grad_norm": 1.01275295247848, "learning_rate": 5.003227020631287e-06, "loss": 0.4581, "step": 9918 }, { "epoch": 0.5352652312341482, "grad_norm": 0.937493077123418, "learning_rate": 5.002468797822901e-06, "loss": 0.3713, "step": 9919 }, { "epoch": 0.5353191948626626, "grad_norm": 0.9202956312894247, "learning_rate": 5.001710589314065e-06, "loss": 0.3609, "step": 9920 }, { "epoch": 0.535373158491177, "grad_norm": 0.6868329139027787, "learning_rate": 5.000952395126571e-06, "loss": 0.294, "step": 9921 }, { "epoch": 0.5354271221196913, "grad_norm": 0.9196054520244205, "learning_rate": 5.000194215282207e-06, "loss": 0.4313, "step": 9922 }, { "epoch": 0.5354810857482057, "grad_norm": 0.9474701432519063, "learning_rate": 4.9994360498027684e-06, "loss": 0.5766, "step": 9923 }, { "epoch": 0.5355350493767201, "grad_norm": 1.03804268584054, "learning_rate": 4.998677898710042e-06, "loss": 0.5034, "step": 9924 }, { "epoch": 0.5355890130052344, "grad_norm": 1.2472522864702218, "learning_rate": 4.997919762025821e-06, "loss": 0.6893, "step": 9925 }, { "epoch": 0.5356429766337488, "grad_norm": 1.050550411378059, "learning_rate": 4.997161639771892e-06, "loss": 0.4198, "step": 9926 }, { "epoch": 0.5356969402622632, "grad_norm": 0.9629876644259304, "learning_rate": 4.996403531970048e-06, "loss": 0.4744, "step": 9927 }, { "epoch": 0.5357509038907776, "grad_norm": 1.1310545570967452, "learning_rate": 4.995645438642071e-06, "loss": 0.4855, "step": 9928 }, { "epoch": 0.535804867519292, "grad_norm": 1.1131251685025116, "learning_rate": 4.9948873598097565e-06, "loss": 0.6041, "step": 9929 }, { "epoch": 0.5358588311478064, "grad_norm": 0.9593199274930416, "learning_rate": 4.994129295494889e-06, "loss": 0.3513, "step": 9930 }, { "epoch": 0.5359127947763208, "grad_norm": 0.9418600222505135, "learning_rate": 4.993371245719257e-06, "loss": 0.4123, "step": 9931 }, { "epoch": 0.5359667584048351, "grad_norm": 1.0131850971198793, "learning_rate": 4.99261321050465e-06, "loss": 0.5146, "step": 9932 }, { "epoch": 0.5360207220333495, "grad_norm": 1.042539650127272, "learning_rate": 4.991855189872848e-06, "loss": 0.5174, "step": 9933 }, { "epoch": 0.5360746856618639, "grad_norm": 1.0708511066650659, "learning_rate": 4.991097183845642e-06, "loss": 0.5208, "step": 9934 }, { "epoch": 0.5361286492903783, "grad_norm": 0.743763493488394, "learning_rate": 4.990339192444819e-06, "loss": 0.2784, "step": 9935 }, { "epoch": 0.5361826129188927, "grad_norm": 1.0882365536218006, "learning_rate": 4.989581215692161e-06, "loss": 0.4035, "step": 9936 }, { "epoch": 0.5362365765474071, "grad_norm": 1.0055235448222235, "learning_rate": 4.988823253609454e-06, "loss": 0.501, "step": 9937 }, { "epoch": 0.5362905401759215, "grad_norm": 0.9908096396198701, "learning_rate": 4.9880653062184835e-06, "loss": 0.5071, "step": 9938 }, { "epoch": 0.5363445038044358, "grad_norm": 0.6381296162085466, "learning_rate": 4.987307373541035e-06, "loss": 0.2287, "step": 9939 }, { "epoch": 0.5363984674329502, "grad_norm": 1.0868617962544562, "learning_rate": 4.9865494555988895e-06, "loss": 0.5341, "step": 9940 }, { "epoch": 0.5364524310614646, "grad_norm": 1.186078147742559, "learning_rate": 4.985791552413831e-06, "loss": 0.5795, "step": 9941 }, { "epoch": 0.536506394689979, "grad_norm": 0.8673224930155873, "learning_rate": 4.985033664007644e-06, "loss": 0.3904, "step": 9942 }, { "epoch": 0.5365603583184934, "grad_norm": 0.9759421130768049, "learning_rate": 4.98427579040211e-06, "loss": 0.4662, "step": 9943 }, { "epoch": 0.5366143219470078, "grad_norm": 0.8927853030036168, "learning_rate": 4.98351793161901e-06, "loss": 0.3303, "step": 9944 }, { "epoch": 0.5366682855755222, "grad_norm": 1.1988665229002389, "learning_rate": 4.982760087680125e-06, "loss": 0.6174, "step": 9945 }, { "epoch": 0.5367222492040364, "grad_norm": 1.0085113795051772, "learning_rate": 4.98200225860724e-06, "loss": 0.3234, "step": 9946 }, { "epoch": 0.5367762128325508, "grad_norm": 0.9738048223442025, "learning_rate": 4.981244444422134e-06, "loss": 0.5131, "step": 9947 }, { "epoch": 0.5368301764610652, "grad_norm": 0.9273330592169257, "learning_rate": 4.980486645146585e-06, "loss": 0.5312, "step": 9948 }, { "epoch": 0.5368841400895796, "grad_norm": 0.8856463690253309, "learning_rate": 4.979728860802375e-06, "loss": 0.3729, "step": 9949 }, { "epoch": 0.536938103718094, "grad_norm": 1.015102081148679, "learning_rate": 4.978971091411283e-06, "loss": 0.488, "step": 9950 }, { "epoch": 0.5369920673466084, "grad_norm": 0.9493161455619169, "learning_rate": 4.978213336995089e-06, "loss": 0.4124, "step": 9951 }, { "epoch": 0.5370460309751227, "grad_norm": 1.0560354669019085, "learning_rate": 4.977455597575572e-06, "loss": 0.4308, "step": 9952 }, { "epoch": 0.5370999946036371, "grad_norm": 1.0921978231321052, "learning_rate": 4.976697873174509e-06, "loss": 0.5585, "step": 9953 }, { "epoch": 0.5371539582321515, "grad_norm": 0.8200884871965322, "learning_rate": 4.975940163813676e-06, "loss": 0.3604, "step": 9954 }, { "epoch": 0.5372079218606659, "grad_norm": 0.9025525865995081, "learning_rate": 4.975182469514853e-06, "loss": 0.407, "step": 9955 }, { "epoch": 0.5372618854891803, "grad_norm": 1.2137695871309955, "learning_rate": 4.9744247902998155e-06, "loss": 0.5944, "step": 9956 }, { "epoch": 0.5373158491176947, "grad_norm": 1.2539473727861978, "learning_rate": 4.973667126190341e-06, "loss": 0.5053, "step": 9957 }, { "epoch": 0.5373698127462091, "grad_norm": 0.9461718165150234, "learning_rate": 4.972909477208206e-06, "loss": 0.3656, "step": 9958 }, { "epoch": 0.5374237763747234, "grad_norm": 1.0754887069747932, "learning_rate": 4.972151843375186e-06, "loss": 0.4998, "step": 9959 }, { "epoch": 0.5374777400032378, "grad_norm": 1.1649645093618626, "learning_rate": 4.971394224713053e-06, "loss": 0.5293, "step": 9960 }, { "epoch": 0.5375317036317522, "grad_norm": 0.9499067076848354, "learning_rate": 4.9706366212435856e-06, "loss": 0.413, "step": 9961 }, { "epoch": 0.5375856672602666, "grad_norm": 1.051429901273323, "learning_rate": 4.969879032988556e-06, "loss": 0.4286, "step": 9962 }, { "epoch": 0.537639630888781, "grad_norm": 1.2339737274249403, "learning_rate": 4.969121459969738e-06, "loss": 0.5373, "step": 9963 }, { "epoch": 0.5376935945172954, "grad_norm": 0.7793456059658681, "learning_rate": 4.968363902208906e-06, "loss": 0.3721, "step": 9964 }, { "epoch": 0.5377475581458098, "grad_norm": 1.3145167940958153, "learning_rate": 4.967606359727834e-06, "loss": 0.5204, "step": 9965 }, { "epoch": 0.537801521774324, "grad_norm": 1.1498640974596428, "learning_rate": 4.966848832548293e-06, "loss": 0.523, "step": 9966 }, { "epoch": 0.5378554854028385, "grad_norm": 0.9926044738266445, "learning_rate": 4.9660913206920535e-06, "loss": 0.5252, "step": 9967 }, { "epoch": 0.5379094490313528, "grad_norm": 1.046409600341498, "learning_rate": 4.965333824180889e-06, "loss": 0.4583, "step": 9968 }, { "epoch": 0.5379634126598672, "grad_norm": 0.9396402268187883, "learning_rate": 4.964576343036572e-06, "loss": 0.4133, "step": 9969 }, { "epoch": 0.5380173762883816, "grad_norm": 0.9818600047451499, "learning_rate": 4.963818877280871e-06, "loss": 0.4937, "step": 9970 }, { "epoch": 0.538071339916896, "grad_norm": 1.0704144035904015, "learning_rate": 4.963061426935557e-06, "loss": 0.3779, "step": 9971 }, { "epoch": 0.5381253035454104, "grad_norm": 0.9758671262158307, "learning_rate": 4.962303992022401e-06, "loss": 0.3514, "step": 9972 }, { "epoch": 0.5381792671739247, "grad_norm": 1.0071932936259704, "learning_rate": 4.9615465725631725e-06, "loss": 0.3695, "step": 9973 }, { "epoch": 0.5382332308024391, "grad_norm": 0.9053524776508026, "learning_rate": 4.960789168579639e-06, "loss": 0.3778, "step": 9974 }, { "epoch": 0.5382871944309535, "grad_norm": 1.0483345315799397, "learning_rate": 4.960031780093568e-06, "loss": 0.4662, "step": 9975 }, { "epoch": 0.5383411580594679, "grad_norm": 0.9474343835230155, "learning_rate": 4.95927440712673e-06, "loss": 0.4068, "step": 9976 }, { "epoch": 0.5383951216879823, "grad_norm": 1.0865828373788853, "learning_rate": 4.958517049700891e-06, "loss": 0.4529, "step": 9977 }, { "epoch": 0.5384490853164967, "grad_norm": 0.8694051795362834, "learning_rate": 4.957759707837821e-06, "loss": 0.5141, "step": 9978 }, { "epoch": 0.5385030489450111, "grad_norm": 1.0242475093677368, "learning_rate": 4.957002381559284e-06, "loss": 0.4869, "step": 9979 }, { "epoch": 0.5385570125735254, "grad_norm": 0.923632879600402, "learning_rate": 4.956245070887049e-06, "loss": 0.3909, "step": 9980 }, { "epoch": 0.5386109762020398, "grad_norm": 0.965631436444083, "learning_rate": 4.955487775842878e-06, "loss": 0.4837, "step": 9981 }, { "epoch": 0.5386649398305542, "grad_norm": 1.1582940810796427, "learning_rate": 4.9547304964485385e-06, "loss": 0.5154, "step": 9982 }, { "epoch": 0.5387189034590686, "grad_norm": 1.1526870170586134, "learning_rate": 4.953973232725796e-06, "loss": 0.5078, "step": 9983 }, { "epoch": 0.538772867087583, "grad_norm": 1.0148047372193552, "learning_rate": 4.953215984696414e-06, "loss": 0.3773, "step": 9984 }, { "epoch": 0.5388268307160974, "grad_norm": 0.897624058709063, "learning_rate": 4.9524587523821574e-06, "loss": 0.3919, "step": 9985 }, { "epoch": 0.5388807943446118, "grad_norm": 0.9733251239242692, "learning_rate": 4.95170153580479e-06, "loss": 0.403, "step": 9986 }, { "epoch": 0.5389347579731261, "grad_norm": 1.1212704429787337, "learning_rate": 4.9509443349860755e-06, "loss": 0.4445, "step": 9987 }, { "epoch": 0.5389887216016405, "grad_norm": 1.1846771442444723, "learning_rate": 4.9501871499477735e-06, "loss": 0.5658, "step": 9988 }, { "epoch": 0.5390426852301549, "grad_norm": 0.7852794876464328, "learning_rate": 4.9494299807116495e-06, "loss": 0.3895, "step": 9989 }, { "epoch": 0.5390966488586693, "grad_norm": 0.8609658882641552, "learning_rate": 4.948672827299463e-06, "loss": 0.5473, "step": 9990 }, { "epoch": 0.5391506124871837, "grad_norm": 1.0706317109875338, "learning_rate": 4.9479156897329776e-06, "loss": 0.5802, "step": 9991 }, { "epoch": 0.539204576115698, "grad_norm": 0.7599801541566236, "learning_rate": 4.947158568033953e-06, "loss": 0.3802, "step": 9992 }, { "epoch": 0.5392585397442125, "grad_norm": 1.0736956105114253, "learning_rate": 4.946401462224149e-06, "loss": 0.5108, "step": 9993 }, { "epoch": 0.5393125033727267, "grad_norm": 0.8649068876074764, "learning_rate": 4.945644372325328e-06, "loss": 0.4074, "step": 9994 }, { "epoch": 0.5393664670012411, "grad_norm": 0.8941368180658905, "learning_rate": 4.9448872983592455e-06, "loss": 0.3707, "step": 9995 }, { "epoch": 0.5394204306297555, "grad_norm": 0.8867481953064631, "learning_rate": 4.944130240347664e-06, "loss": 0.3967, "step": 9996 }, { "epoch": 0.5394743942582699, "grad_norm": 0.8797379594661722, "learning_rate": 4.943373198312341e-06, "loss": 0.3776, "step": 9997 }, { "epoch": 0.5395283578867843, "grad_norm": 1.2252260373158261, "learning_rate": 4.9426161722750355e-06, "loss": 0.5581, "step": 9998 }, { "epoch": 0.5395823215152987, "grad_norm": 1.1651144408525247, "learning_rate": 4.941859162257503e-06, "loss": 0.6326, "step": 9999 }, { "epoch": 0.5396362851438131, "grad_norm": 1.0691149570888858, "learning_rate": 4.941102168281503e-06, "loss": 0.6408, "step": 10000 }, { "epoch": 0.5396362851438131, "eval_loss": 0.5460962057113647, "eval_runtime": 160.341, "eval_samples_per_second": 21.448, "eval_steps_per_second": 0.898, "step": 10000 }, { "epoch": 0.5396902487723274, "grad_norm": 1.0460036031545223, "learning_rate": 4.940345190368794e-06, "loss": 0.5537, "step": 10001 }, { "epoch": 0.5397442124008418, "grad_norm": 1.3047804085059709, "learning_rate": 4.9395882285411255e-06, "loss": 0.5249, "step": 10002 }, { "epoch": 0.5397981760293562, "grad_norm": 1.113219962245879, "learning_rate": 4.93883128282026e-06, "loss": 0.5417, "step": 10003 }, { "epoch": 0.5398521396578706, "grad_norm": 0.9920627395802261, "learning_rate": 4.93807435322795e-06, "loss": 0.4565, "step": 10004 }, { "epoch": 0.539906103286385, "grad_norm": 1.090895486822463, "learning_rate": 4.937317439785949e-06, "loss": 0.4838, "step": 10005 }, { "epoch": 0.5399600669148994, "grad_norm": 0.7996832714004838, "learning_rate": 4.9365605425160155e-06, "loss": 0.4138, "step": 10006 }, { "epoch": 0.5400140305434138, "grad_norm": 0.7779324032367219, "learning_rate": 4.9358036614399e-06, "loss": 0.2693, "step": 10007 }, { "epoch": 0.5400679941719281, "grad_norm": 1.078114989951544, "learning_rate": 4.9350467965793605e-06, "loss": 0.4834, "step": 10008 }, { "epoch": 0.5401219578004425, "grad_norm": 0.9475549291097144, "learning_rate": 4.934289947956143e-06, "loss": 0.4158, "step": 10009 }, { "epoch": 0.5401759214289569, "grad_norm": 0.9848080847361335, "learning_rate": 4.933533115592006e-06, "loss": 0.369, "step": 10010 }, { "epoch": 0.5402298850574713, "grad_norm": 0.9623952623872648, "learning_rate": 4.932776299508699e-06, "loss": 0.4438, "step": 10011 }, { "epoch": 0.5402838486859857, "grad_norm": 1.033107810705618, "learning_rate": 4.932019499727973e-06, "loss": 0.4444, "step": 10012 }, { "epoch": 0.5403378123145001, "grad_norm": 0.8691818524181658, "learning_rate": 4.931262716271582e-06, "loss": 0.4057, "step": 10013 }, { "epoch": 0.5403917759430145, "grad_norm": 1.1479433592950212, "learning_rate": 4.930505949161275e-06, "loss": 0.4497, "step": 10014 }, { "epoch": 0.5404457395715287, "grad_norm": 1.135297485942887, "learning_rate": 4.929749198418803e-06, "loss": 0.4509, "step": 10015 }, { "epoch": 0.5404997032000431, "grad_norm": 0.9789666214374919, "learning_rate": 4.928992464065916e-06, "loss": 0.4278, "step": 10016 }, { "epoch": 0.5405536668285575, "grad_norm": 0.826805450607612, "learning_rate": 4.928235746124361e-06, "loss": 0.3297, "step": 10017 }, { "epoch": 0.5406076304570719, "grad_norm": 0.9670934693603411, "learning_rate": 4.927479044615888e-06, "loss": 0.401, "step": 10018 }, { "epoch": 0.5406615940855863, "grad_norm": 1.0652428773874405, "learning_rate": 4.926722359562247e-06, "loss": 0.4934, "step": 10019 }, { "epoch": 0.5407155577141007, "grad_norm": 1.0266094398929384, "learning_rate": 4.925965690985184e-06, "loss": 0.6038, "step": 10020 }, { "epoch": 0.540769521342615, "grad_norm": 0.8747862429657675, "learning_rate": 4.925209038906448e-06, "loss": 0.4803, "step": 10021 }, { "epoch": 0.5408234849711294, "grad_norm": 0.9755478536013618, "learning_rate": 4.9244524033477835e-06, "loss": 0.4547, "step": 10022 }, { "epoch": 0.5408774485996438, "grad_norm": 0.8774215344013134, "learning_rate": 4.923695784330938e-06, "loss": 0.3665, "step": 10023 }, { "epoch": 0.5409314122281582, "grad_norm": 0.8419542736608732, "learning_rate": 4.92293918187766e-06, "loss": 0.2918, "step": 10024 }, { "epoch": 0.5409853758566726, "grad_norm": 1.0856967300985787, "learning_rate": 4.922182596009692e-06, "loss": 0.4888, "step": 10025 }, { "epoch": 0.541039339485187, "grad_norm": 0.7610645358828035, "learning_rate": 4.921426026748779e-06, "loss": 0.3412, "step": 10026 }, { "epoch": 0.5410933031137014, "grad_norm": 0.8724876682098054, "learning_rate": 4.920669474116668e-06, "loss": 0.4232, "step": 10027 }, { "epoch": 0.5411472667422157, "grad_norm": 0.9894381552941685, "learning_rate": 4.919912938135103e-06, "loss": 0.453, "step": 10028 }, { "epoch": 0.5412012303707301, "grad_norm": 1.0764689276696202, "learning_rate": 4.9191564188258244e-06, "loss": 0.6472, "step": 10029 }, { "epoch": 0.5412551939992445, "grad_norm": 1.0761294432476125, "learning_rate": 4.918399916210577e-06, "loss": 0.442, "step": 10030 }, { "epoch": 0.5413091576277589, "grad_norm": 0.9752200158593429, "learning_rate": 4.9176434303111045e-06, "loss": 0.4475, "step": 10031 }, { "epoch": 0.5413631212562733, "grad_norm": 0.9590485593894853, "learning_rate": 4.916886961149148e-06, "loss": 0.4649, "step": 10032 }, { "epoch": 0.5414170848847877, "grad_norm": 0.9209448271919018, "learning_rate": 4.916130508746448e-06, "loss": 0.3363, "step": 10033 }, { "epoch": 0.5414710485133021, "grad_norm": 1.0818282371243608, "learning_rate": 4.915374073124749e-06, "loss": 0.5428, "step": 10034 }, { "epoch": 0.5415250121418164, "grad_norm": 1.0841808133412505, "learning_rate": 4.914617654305791e-06, "loss": 0.4051, "step": 10035 }, { "epoch": 0.5415789757703308, "grad_norm": 0.8157092094833543, "learning_rate": 4.913861252311312e-06, "loss": 0.3263, "step": 10036 }, { "epoch": 0.5416329393988452, "grad_norm": 0.9894636229782733, "learning_rate": 4.913104867163051e-06, "loss": 0.4179, "step": 10037 }, { "epoch": 0.5416869030273596, "grad_norm": 0.963067478343932, "learning_rate": 4.91234849888275e-06, "loss": 0.4128, "step": 10038 }, { "epoch": 0.541740866655874, "grad_norm": 0.9331472020106104, "learning_rate": 4.911592147492147e-06, "loss": 0.385, "step": 10039 }, { "epoch": 0.5417948302843884, "grad_norm": 1.1097720578583075, "learning_rate": 4.910835813012981e-06, "loss": 0.4093, "step": 10040 }, { "epoch": 0.5418487939129027, "grad_norm": 1.1768655186314665, "learning_rate": 4.910079495466988e-06, "loss": 0.5621, "step": 10041 }, { "epoch": 0.541902757541417, "grad_norm": 1.0523722635616122, "learning_rate": 4.909323194875908e-06, "loss": 0.5737, "step": 10042 }, { "epoch": 0.5419567211699314, "grad_norm": 0.9069458967634363, "learning_rate": 4.9085669112614755e-06, "loss": 0.5352, "step": 10043 }, { "epoch": 0.5420106847984458, "grad_norm": 1.0660887694605456, "learning_rate": 4.9078106446454255e-06, "loss": 0.5132, "step": 10044 }, { "epoch": 0.5420646484269602, "grad_norm": 0.9375065848868652, "learning_rate": 4.907054395049499e-06, "loss": 0.4219, "step": 10045 }, { "epoch": 0.5421186120554746, "grad_norm": 0.9866024911446263, "learning_rate": 4.906298162495427e-06, "loss": 0.3414, "step": 10046 }, { "epoch": 0.542172575683989, "grad_norm": 0.8233499271211256, "learning_rate": 4.905541947004945e-06, "loss": 0.3012, "step": 10047 }, { "epoch": 0.5422265393125034, "grad_norm": 1.1914272914686268, "learning_rate": 4.9047857485997895e-06, "loss": 0.6139, "step": 10048 }, { "epoch": 0.5422805029410177, "grad_norm": 1.012695305364322, "learning_rate": 4.904029567301692e-06, "loss": 0.4112, "step": 10049 }, { "epoch": 0.5423344665695321, "grad_norm": 1.0713953518486306, "learning_rate": 4.903273403132387e-06, "loss": 0.4641, "step": 10050 }, { "epoch": 0.5423884301980465, "grad_norm": 1.2846421812882145, "learning_rate": 4.902517256113608e-06, "loss": 0.4693, "step": 10051 }, { "epoch": 0.5424423938265609, "grad_norm": 1.0671676153528726, "learning_rate": 4.9017611262670865e-06, "loss": 0.5072, "step": 10052 }, { "epoch": 0.5424963574550753, "grad_norm": 0.9815334947591683, "learning_rate": 4.901005013614553e-06, "loss": 0.4252, "step": 10053 }, { "epoch": 0.5425503210835897, "grad_norm": 0.9159102003711225, "learning_rate": 4.900248918177744e-06, "loss": 0.4775, "step": 10054 }, { "epoch": 0.5426042847121041, "grad_norm": 0.8925917658732192, "learning_rate": 4.8994928399783834e-06, "loss": 0.3451, "step": 10055 }, { "epoch": 0.5426582483406184, "grad_norm": 1.2067211020211048, "learning_rate": 4.89873677903821e-06, "loss": 0.6311, "step": 10056 }, { "epoch": 0.5427122119691328, "grad_norm": 1.0215505870181143, "learning_rate": 4.897980735378945e-06, "loss": 0.5236, "step": 10057 }, { "epoch": 0.5427661755976472, "grad_norm": 0.9422642332253948, "learning_rate": 4.897224709022324e-06, "loss": 0.4161, "step": 10058 }, { "epoch": 0.5428201392261616, "grad_norm": 1.268678819630988, "learning_rate": 4.896468699990072e-06, "loss": 0.4688, "step": 10059 }, { "epoch": 0.542874102854676, "grad_norm": 1.1040914323827407, "learning_rate": 4.895712708303922e-06, "loss": 0.4855, "step": 10060 }, { "epoch": 0.5429280664831904, "grad_norm": 0.9762128020518631, "learning_rate": 4.894956733985597e-06, "loss": 0.3322, "step": 10061 }, { "epoch": 0.5429820301117048, "grad_norm": 0.9195992236037852, "learning_rate": 4.894200777056828e-06, "loss": 0.3587, "step": 10062 }, { "epoch": 0.543035993740219, "grad_norm": 1.1120762401909288, "learning_rate": 4.893444837539342e-06, "loss": 0.4589, "step": 10063 }, { "epoch": 0.5430899573687334, "grad_norm": 1.1186098283803123, "learning_rate": 4.892688915454863e-06, "loss": 0.4956, "step": 10064 }, { "epoch": 0.5431439209972478, "grad_norm": 1.089940696333436, "learning_rate": 4.8919330108251175e-06, "loss": 0.5896, "step": 10065 }, { "epoch": 0.5431978846257622, "grad_norm": 0.8267140401939764, "learning_rate": 4.891177123671832e-06, "loss": 0.3524, "step": 10066 }, { "epoch": 0.5432518482542766, "grad_norm": 1.2715905733356314, "learning_rate": 4.890421254016731e-06, "loss": 0.4445, "step": 10067 }, { "epoch": 0.543305811882791, "grad_norm": 0.9587949906170379, "learning_rate": 4.8896654018815395e-06, "loss": 0.4968, "step": 10068 }, { "epoch": 0.5433597755113054, "grad_norm": 0.8571205321545999, "learning_rate": 4.888909567287981e-06, "loss": 0.4886, "step": 10069 }, { "epoch": 0.5434137391398197, "grad_norm": 1.0677616369454366, "learning_rate": 4.8881537502577794e-06, "loss": 0.5033, "step": 10070 }, { "epoch": 0.5434677027683341, "grad_norm": 1.165182158297888, "learning_rate": 4.8873979508126565e-06, "loss": 0.4358, "step": 10071 }, { "epoch": 0.5435216663968485, "grad_norm": 0.8428853274469854, "learning_rate": 4.886642168974335e-06, "loss": 0.3917, "step": 10072 }, { "epoch": 0.5435756300253629, "grad_norm": 1.1473159697484987, "learning_rate": 4.885886404764538e-06, "loss": 0.6079, "step": 10073 }, { "epoch": 0.5436295936538773, "grad_norm": 1.074672074395011, "learning_rate": 4.885130658204986e-06, "loss": 0.4893, "step": 10074 }, { "epoch": 0.5436835572823917, "grad_norm": 1.0645377659775097, "learning_rate": 4.8843749293174e-06, "loss": 0.5073, "step": 10075 }, { "epoch": 0.5437375209109061, "grad_norm": 1.1284101119197298, "learning_rate": 4.883619218123502e-06, "loss": 0.5104, "step": 10076 }, { "epoch": 0.5437914845394204, "grad_norm": 1.1978952910048142, "learning_rate": 4.882863524645011e-06, "loss": 0.6018, "step": 10077 }, { "epoch": 0.5438454481679348, "grad_norm": 1.0585317063678539, "learning_rate": 4.882107848903644e-06, "loss": 0.5555, "step": 10078 }, { "epoch": 0.5438994117964492, "grad_norm": 1.078133838785871, "learning_rate": 4.881352190921121e-06, "loss": 0.4932, "step": 10079 }, { "epoch": 0.5439533754249636, "grad_norm": 0.9953243752933106, "learning_rate": 4.880596550719163e-06, "loss": 0.4866, "step": 10080 }, { "epoch": 0.544007339053478, "grad_norm": 1.1148912906319999, "learning_rate": 4.879840928319484e-06, "loss": 0.4633, "step": 10081 }, { "epoch": 0.5440613026819924, "grad_norm": 1.1878424466476991, "learning_rate": 4.8790853237438065e-06, "loss": 0.6407, "step": 10082 }, { "epoch": 0.5441152663105068, "grad_norm": 1.2347691360368283, "learning_rate": 4.8783297370138415e-06, "loss": 0.5109, "step": 10083 }, { "epoch": 0.5441692299390211, "grad_norm": 0.9403822645295723, "learning_rate": 4.877574168151311e-06, "loss": 0.4186, "step": 10084 }, { "epoch": 0.5442231935675355, "grad_norm": 1.2653640760678184, "learning_rate": 4.876818617177925e-06, "loss": 0.7212, "step": 10085 }, { "epoch": 0.5442771571960499, "grad_norm": 1.0439933556708731, "learning_rate": 4.8760630841154024e-06, "loss": 0.4429, "step": 10086 }, { "epoch": 0.5443311208245643, "grad_norm": 1.0104414914523685, "learning_rate": 4.875307568985457e-06, "loss": 0.5066, "step": 10087 }, { "epoch": 0.5443850844530786, "grad_norm": 1.0171476113574158, "learning_rate": 4.874552071809803e-06, "loss": 0.4849, "step": 10088 }, { "epoch": 0.544439048081593, "grad_norm": 1.0187759134941643, "learning_rate": 4.873796592610155e-06, "loss": 0.435, "step": 10089 }, { "epoch": 0.5444930117101073, "grad_norm": 0.8534047055566423, "learning_rate": 4.873041131408226e-06, "loss": 0.3745, "step": 10090 }, { "epoch": 0.5445469753386217, "grad_norm": 0.7743411408548803, "learning_rate": 4.872285688225729e-06, "loss": 0.3582, "step": 10091 }, { "epoch": 0.5446009389671361, "grad_norm": 0.8456901152937784, "learning_rate": 4.871530263084373e-06, "loss": 0.3922, "step": 10092 }, { "epoch": 0.5446549025956505, "grad_norm": 1.341337948499867, "learning_rate": 4.8707748560058745e-06, "loss": 0.5813, "step": 10093 }, { "epoch": 0.5447088662241649, "grad_norm": 1.0919036356716096, "learning_rate": 4.87001946701194e-06, "loss": 0.6109, "step": 10094 }, { "epoch": 0.5447628298526793, "grad_norm": 1.0407258768248533, "learning_rate": 4.8692640961242855e-06, "loss": 0.4193, "step": 10095 }, { "epoch": 0.5448167934811937, "grad_norm": 1.3374896324397492, "learning_rate": 4.8685087433646165e-06, "loss": 0.7184, "step": 10096 }, { "epoch": 0.544870757109708, "grad_norm": 0.9436021911567432, "learning_rate": 4.867753408754645e-06, "loss": 0.5342, "step": 10097 }, { "epoch": 0.5449247207382224, "grad_norm": 0.9338648097614709, "learning_rate": 4.866998092316078e-06, "loss": 0.4535, "step": 10098 }, { "epoch": 0.5449786843667368, "grad_norm": 1.1797188905490101, "learning_rate": 4.8662427940706266e-06, "loss": 0.5205, "step": 10099 }, { "epoch": 0.5450326479952512, "grad_norm": 0.904833413008134, "learning_rate": 4.865487514039997e-06, "loss": 0.4053, "step": 10100 }, { "epoch": 0.5450866116237656, "grad_norm": 0.9812252315431945, "learning_rate": 4.864732252245899e-06, "loss": 0.5477, "step": 10101 }, { "epoch": 0.54514057525228, "grad_norm": 1.061445273418434, "learning_rate": 4.8639770087100355e-06, "loss": 0.467, "step": 10102 }, { "epoch": 0.5451945388807944, "grad_norm": 1.023094589972531, "learning_rate": 4.8632217834541175e-06, "loss": 0.5108, "step": 10103 }, { "epoch": 0.5452485025093087, "grad_norm": 0.8636095479759734, "learning_rate": 4.8624665764998506e-06, "loss": 0.3861, "step": 10104 }, { "epoch": 0.5453024661378231, "grad_norm": 0.9951955658888558, "learning_rate": 4.861711387868935e-06, "loss": 0.4629, "step": 10105 }, { "epoch": 0.5453564297663375, "grad_norm": 1.1172705293650023, "learning_rate": 4.860956217583079e-06, "loss": 0.5238, "step": 10106 }, { "epoch": 0.5454103933948519, "grad_norm": 1.113965900086544, "learning_rate": 4.860201065663989e-06, "loss": 0.4558, "step": 10107 }, { "epoch": 0.5454643570233663, "grad_norm": 1.1307587822442025, "learning_rate": 4.859445932133365e-06, "loss": 0.5166, "step": 10108 }, { "epoch": 0.5455183206518807, "grad_norm": 1.025323824278754, "learning_rate": 4.858690817012914e-06, "loss": 0.4964, "step": 10109 }, { "epoch": 0.5455722842803951, "grad_norm": 1.132443870729538, "learning_rate": 4.8579357203243345e-06, "loss": 0.4828, "step": 10110 }, { "epoch": 0.5456262479089093, "grad_norm": 0.9694021887443264, "learning_rate": 4.857180642089334e-06, "loss": 0.4107, "step": 10111 }, { "epoch": 0.5456802115374237, "grad_norm": 1.0530655278990793, "learning_rate": 4.856425582329611e-06, "loss": 0.4334, "step": 10112 }, { "epoch": 0.5457341751659381, "grad_norm": 0.7849756342693849, "learning_rate": 4.855670541066866e-06, "loss": 0.3966, "step": 10113 }, { "epoch": 0.5457881387944525, "grad_norm": 1.2989024381280894, "learning_rate": 4.8549155183228005e-06, "loss": 0.5991, "step": 10114 }, { "epoch": 0.5458421024229669, "grad_norm": 0.8053688271961718, "learning_rate": 4.854160514119114e-06, "loss": 0.3575, "step": 10115 }, { "epoch": 0.5458960660514813, "grad_norm": 0.9720315078822583, "learning_rate": 4.853405528477509e-06, "loss": 0.5081, "step": 10116 }, { "epoch": 0.5459500296799957, "grad_norm": 1.0142142787212018, "learning_rate": 4.852650561419681e-06, "loss": 0.5354, "step": 10117 }, { "epoch": 0.54600399330851, "grad_norm": 0.9367326978498313, "learning_rate": 4.851895612967331e-06, "loss": 0.4371, "step": 10118 }, { "epoch": 0.5460579569370244, "grad_norm": 0.9608228274296237, "learning_rate": 4.8511406831421545e-06, "loss": 0.4488, "step": 10119 }, { "epoch": 0.5461119205655388, "grad_norm": 1.0183371551362592, "learning_rate": 4.850385771965851e-06, "loss": 0.3782, "step": 10120 }, { "epoch": 0.5461658841940532, "grad_norm": 0.8492904538225049, "learning_rate": 4.849630879460118e-06, "loss": 0.3618, "step": 10121 }, { "epoch": 0.5462198478225676, "grad_norm": 1.0613187820146008, "learning_rate": 4.84887600564665e-06, "loss": 0.5298, "step": 10122 }, { "epoch": 0.546273811451082, "grad_norm": 0.7135037531631587, "learning_rate": 4.848121150547143e-06, "loss": 0.2906, "step": 10123 }, { "epoch": 0.5463277750795964, "grad_norm": 1.1071287555377285, "learning_rate": 4.847366314183293e-06, "loss": 0.5518, "step": 10124 }, { "epoch": 0.5463817387081107, "grad_norm": 0.8476278089783764, "learning_rate": 4.846611496576794e-06, "loss": 0.409, "step": 10125 }, { "epoch": 0.5464357023366251, "grad_norm": 1.1916891455276686, "learning_rate": 4.845856697749342e-06, "loss": 0.5251, "step": 10126 }, { "epoch": 0.5464896659651395, "grad_norm": 1.2993961793037963, "learning_rate": 4.845101917722629e-06, "loss": 0.7282, "step": 10127 }, { "epoch": 0.5465436295936539, "grad_norm": 0.9134930556393765, "learning_rate": 4.844347156518348e-06, "loss": 0.5476, "step": 10128 }, { "epoch": 0.5465975932221683, "grad_norm": 0.8945965089679371, "learning_rate": 4.843592414158192e-06, "loss": 0.4168, "step": 10129 }, { "epoch": 0.5466515568506827, "grad_norm": 1.0023803035357055, "learning_rate": 4.842837690663854e-06, "loss": 0.5185, "step": 10130 }, { "epoch": 0.5467055204791971, "grad_norm": 0.9875160783261898, "learning_rate": 4.842082986057025e-06, "loss": 0.4236, "step": 10131 }, { "epoch": 0.5467594841077114, "grad_norm": 1.1311747593268053, "learning_rate": 4.841328300359397e-06, "loss": 0.75, "step": 10132 }, { "epoch": 0.5468134477362258, "grad_norm": 0.85962438670122, "learning_rate": 4.8405736335926565e-06, "loss": 0.3594, "step": 10133 }, { "epoch": 0.5468674113647402, "grad_norm": 0.9775138055976144, "learning_rate": 4.839818985778497e-06, "loss": 0.4128, "step": 10134 }, { "epoch": 0.5469213749932545, "grad_norm": 1.0053394860653033, "learning_rate": 4.839064356938607e-06, "loss": 0.3958, "step": 10135 }, { "epoch": 0.546975338621769, "grad_norm": 0.9765038287632535, "learning_rate": 4.838309747094676e-06, "loss": 0.4358, "step": 10136 }, { "epoch": 0.5470293022502833, "grad_norm": 1.207070199257001, "learning_rate": 4.837555156268391e-06, "loss": 0.507, "step": 10137 }, { "epoch": 0.5470832658787977, "grad_norm": 1.0279505109572353, "learning_rate": 4.8368005844814396e-06, "loss": 0.4798, "step": 10138 }, { "epoch": 0.547137229507312, "grad_norm": 1.0312270395773961, "learning_rate": 4.836046031755512e-06, "loss": 0.3675, "step": 10139 }, { "epoch": 0.5471911931358264, "grad_norm": 1.0706055442087286, "learning_rate": 4.835291498112289e-06, "loss": 0.6133, "step": 10140 }, { "epoch": 0.5472451567643408, "grad_norm": 1.0325200161317947, "learning_rate": 4.834536983573461e-06, "loss": 0.5421, "step": 10141 }, { "epoch": 0.5472991203928552, "grad_norm": 1.0647905565242464, "learning_rate": 4.833782488160712e-06, "loss": 0.4928, "step": 10142 }, { "epoch": 0.5473530840213696, "grad_norm": 0.9017628764961584, "learning_rate": 4.833028011895728e-06, "loss": 0.3699, "step": 10143 }, { "epoch": 0.547407047649884, "grad_norm": 1.0990059499606692, "learning_rate": 4.832273554800193e-06, "loss": 0.6716, "step": 10144 }, { "epoch": 0.5474610112783984, "grad_norm": 1.0550403869627913, "learning_rate": 4.831519116895791e-06, "loss": 0.5123, "step": 10145 }, { "epoch": 0.5475149749069127, "grad_norm": 1.1458203034689012, "learning_rate": 4.830764698204205e-06, "loss": 0.5986, "step": 10146 }, { "epoch": 0.5475689385354271, "grad_norm": 0.7503629029605383, "learning_rate": 4.830010298747117e-06, "loss": 0.3477, "step": 10147 }, { "epoch": 0.5476229021639415, "grad_norm": 1.1423138266253003, "learning_rate": 4.829255918546211e-06, "loss": 0.6131, "step": 10148 }, { "epoch": 0.5476768657924559, "grad_norm": 1.0922818105317555, "learning_rate": 4.8285015576231664e-06, "loss": 0.5069, "step": 10149 }, { "epoch": 0.5477308294209703, "grad_norm": 1.0559832041656216, "learning_rate": 4.827747215999666e-06, "loss": 0.439, "step": 10150 }, { "epoch": 0.5477847930494847, "grad_norm": 1.060495737339165, "learning_rate": 4.826992893697391e-06, "loss": 0.491, "step": 10151 }, { "epoch": 0.5478387566779991, "grad_norm": 0.908339879116432, "learning_rate": 4.826238590738019e-06, "loss": 0.3903, "step": 10152 }, { "epoch": 0.5478927203065134, "grad_norm": 1.0751515849450888, "learning_rate": 4.825484307143232e-06, "loss": 0.4713, "step": 10153 }, { "epoch": 0.5479466839350278, "grad_norm": 1.1502604159284042, "learning_rate": 4.824730042934707e-06, "loss": 0.6197, "step": 10154 }, { "epoch": 0.5480006475635422, "grad_norm": 0.7076065677351586, "learning_rate": 4.823975798134123e-06, "loss": 0.3027, "step": 10155 }, { "epoch": 0.5480546111920566, "grad_norm": 0.838870099653771, "learning_rate": 4.823221572763156e-06, "loss": 0.3826, "step": 10156 }, { "epoch": 0.548108574820571, "grad_norm": 1.031514280937807, "learning_rate": 4.822467366843486e-06, "loss": 0.4521, "step": 10157 }, { "epoch": 0.5481625384490854, "grad_norm": 0.6882318817625689, "learning_rate": 4.821713180396789e-06, "loss": 0.2865, "step": 10158 }, { "epoch": 0.5482165020775996, "grad_norm": 0.8819565570025678, "learning_rate": 4.820959013444742e-06, "loss": 0.3892, "step": 10159 }, { "epoch": 0.548270465706114, "grad_norm": 1.1650488152540242, "learning_rate": 4.820204866009016e-06, "loss": 0.5201, "step": 10160 }, { "epoch": 0.5483244293346284, "grad_norm": 0.9043989842634114, "learning_rate": 4.81945073811129e-06, "loss": 0.3352, "step": 10161 }, { "epoch": 0.5483783929631428, "grad_norm": 1.1341756484848076, "learning_rate": 4.818696629773238e-06, "loss": 0.4933, "step": 10162 }, { "epoch": 0.5484323565916572, "grad_norm": 1.048289506404977, "learning_rate": 4.8179425410165325e-06, "loss": 0.3982, "step": 10163 }, { "epoch": 0.5484863202201716, "grad_norm": 0.9961651137355694, "learning_rate": 4.817188471862847e-06, "loss": 0.4461, "step": 10164 }, { "epoch": 0.548540283848686, "grad_norm": 1.0685822700024377, "learning_rate": 4.816434422333855e-06, "loss": 0.5126, "step": 10165 }, { "epoch": 0.5485942474772003, "grad_norm": 1.2861509159406181, "learning_rate": 4.815680392451232e-06, "loss": 0.4937, "step": 10166 }, { "epoch": 0.5486482111057147, "grad_norm": 1.1959423250793557, "learning_rate": 4.814926382236642e-06, "loss": 0.5418, "step": 10167 }, { "epoch": 0.5487021747342291, "grad_norm": 1.0382135733047877, "learning_rate": 4.814172391711761e-06, "loss": 0.5095, "step": 10168 }, { "epoch": 0.5487561383627435, "grad_norm": 0.9363566732613944, "learning_rate": 4.813418420898258e-06, "loss": 0.4322, "step": 10169 }, { "epoch": 0.5488101019912579, "grad_norm": 1.084394209049667, "learning_rate": 4.812664469817805e-06, "loss": 0.5534, "step": 10170 }, { "epoch": 0.5488640656197723, "grad_norm": 0.9725164263125391, "learning_rate": 4.811910538492067e-06, "loss": 0.4819, "step": 10171 }, { "epoch": 0.5489180292482867, "grad_norm": 0.8808300563152796, "learning_rate": 4.811156626942717e-06, "loss": 0.4685, "step": 10172 }, { "epoch": 0.548971992876801, "grad_norm": 1.225782638417651, "learning_rate": 4.810402735191422e-06, "loss": 0.5322, "step": 10173 }, { "epoch": 0.5490259565053154, "grad_norm": 0.9346113816031715, "learning_rate": 4.809648863259848e-06, "loss": 0.3837, "step": 10174 }, { "epoch": 0.5490799201338298, "grad_norm": 1.1201767342807656, "learning_rate": 4.8088950111696645e-06, "loss": 0.489, "step": 10175 }, { "epoch": 0.5491338837623442, "grad_norm": 0.9654303071116841, "learning_rate": 4.808141178942536e-06, "loss": 0.533, "step": 10176 }, { "epoch": 0.5491878473908586, "grad_norm": 1.0952241542248298, "learning_rate": 4.807387366600128e-06, "loss": 0.4848, "step": 10177 }, { "epoch": 0.549241811019373, "grad_norm": 1.0182050519390895, "learning_rate": 4.806633574164108e-06, "loss": 0.3627, "step": 10178 }, { "epoch": 0.5492957746478874, "grad_norm": 1.1924516726252343, "learning_rate": 4.80587980165614e-06, "loss": 0.5689, "step": 10179 }, { "epoch": 0.5493497382764017, "grad_norm": 1.0082750116228008, "learning_rate": 4.805126049097889e-06, "loss": 0.5119, "step": 10180 }, { "epoch": 0.549403701904916, "grad_norm": 1.1573479790102197, "learning_rate": 4.804372316511016e-06, "loss": 0.5091, "step": 10181 }, { "epoch": 0.5494576655334305, "grad_norm": 0.8059179467705652, "learning_rate": 4.803618603917185e-06, "loss": 0.3048, "step": 10182 }, { "epoch": 0.5495116291619448, "grad_norm": 0.9296067892523999, "learning_rate": 4.802864911338059e-06, "loss": 0.4063, "step": 10183 }, { "epoch": 0.5495655927904592, "grad_norm": 1.3034874442404973, "learning_rate": 4.802111238795301e-06, "loss": 0.7265, "step": 10184 }, { "epoch": 0.5496195564189736, "grad_norm": 1.2165091147154479, "learning_rate": 4.801357586310568e-06, "loss": 0.6054, "step": 10185 }, { "epoch": 0.549673520047488, "grad_norm": 0.9001674583197556, "learning_rate": 4.800603953905527e-06, "loss": 0.3899, "step": 10186 }, { "epoch": 0.5497274836760023, "grad_norm": 0.9694670468378022, "learning_rate": 4.799850341601837e-06, "loss": 0.4123, "step": 10187 }, { "epoch": 0.5497814473045167, "grad_norm": 0.8711597879727461, "learning_rate": 4.799096749421153e-06, "loss": 0.3831, "step": 10188 }, { "epoch": 0.5498354109330311, "grad_norm": 1.0837823283548245, "learning_rate": 4.798343177385136e-06, "loss": 0.5189, "step": 10189 }, { "epoch": 0.5498893745615455, "grad_norm": 1.2699595327442252, "learning_rate": 4.797589625515445e-06, "loss": 0.5129, "step": 10190 }, { "epoch": 0.5499433381900599, "grad_norm": 0.8767937280574267, "learning_rate": 4.796836093833739e-06, "loss": 0.4932, "step": 10191 }, { "epoch": 0.5499973018185743, "grad_norm": 1.0644126146759445, "learning_rate": 4.796082582361673e-06, "loss": 0.5038, "step": 10192 }, { "epoch": 0.5500512654470887, "grad_norm": 1.1863097674946566, "learning_rate": 4.795329091120906e-06, "loss": 0.5607, "step": 10193 }, { "epoch": 0.550105229075603, "grad_norm": 0.9209684565015405, "learning_rate": 4.7945756201330925e-06, "loss": 0.4339, "step": 10194 }, { "epoch": 0.5501591927041174, "grad_norm": 1.0434422441652182, "learning_rate": 4.793822169419889e-06, "loss": 0.5297, "step": 10195 }, { "epoch": 0.5502131563326318, "grad_norm": 0.935853174121805, "learning_rate": 4.793068739002949e-06, "loss": 0.4205, "step": 10196 }, { "epoch": 0.5502671199611462, "grad_norm": 1.311141643473365, "learning_rate": 4.792315328903929e-06, "loss": 0.5382, "step": 10197 }, { "epoch": 0.5503210835896606, "grad_norm": 1.1209685771968527, "learning_rate": 4.791561939144481e-06, "loss": 0.558, "step": 10198 }, { "epoch": 0.550375047218175, "grad_norm": 0.9997063853539551, "learning_rate": 4.790808569746257e-06, "loss": 0.5262, "step": 10199 }, { "epoch": 0.5504290108466894, "grad_norm": 1.0263681881017197, "learning_rate": 4.790055220730912e-06, "loss": 0.4061, "step": 10200 }, { "epoch": 0.5504829744752037, "grad_norm": 0.796017157360773, "learning_rate": 4.7893018921201e-06, "loss": 0.4078, "step": 10201 }, { "epoch": 0.5505369381037181, "grad_norm": 1.1706118447440286, "learning_rate": 4.788548583935467e-06, "loss": 0.5886, "step": 10202 }, { "epoch": 0.5505909017322325, "grad_norm": 1.4474117792966905, "learning_rate": 4.787795296198667e-06, "loss": 0.5283, "step": 10203 }, { "epoch": 0.5506448653607469, "grad_norm": 1.0736548543631448, "learning_rate": 4.7870420289313505e-06, "loss": 0.5232, "step": 10204 }, { "epoch": 0.5506988289892613, "grad_norm": 1.0092395427755219, "learning_rate": 4.786288782155165e-06, "loss": 0.4274, "step": 10205 }, { "epoch": 0.5507527926177757, "grad_norm": 1.0551846437496304, "learning_rate": 4.785535555891763e-06, "loss": 0.6667, "step": 10206 }, { "epoch": 0.55080675624629, "grad_norm": 0.9530261154188043, "learning_rate": 4.784782350162789e-06, "loss": 0.3778, "step": 10207 }, { "epoch": 0.5508607198748043, "grad_norm": 1.0155369939974788, "learning_rate": 4.784029164989896e-06, "loss": 0.567, "step": 10208 }, { "epoch": 0.5509146835033187, "grad_norm": 0.9374044281201305, "learning_rate": 4.783276000394726e-06, "loss": 0.3894, "step": 10209 }, { "epoch": 0.5509686471318331, "grad_norm": 1.1416842935611224, "learning_rate": 4.782522856398928e-06, "loss": 0.5874, "step": 10210 }, { "epoch": 0.5510226107603475, "grad_norm": 0.7849281439782066, "learning_rate": 4.781769733024149e-06, "loss": 0.277, "step": 10211 }, { "epoch": 0.5510765743888619, "grad_norm": 0.9982174028871303, "learning_rate": 4.781016630292032e-06, "loss": 0.4531, "step": 10212 }, { "epoch": 0.5511305380173763, "grad_norm": 0.7533142949942875, "learning_rate": 4.780263548224224e-06, "loss": 0.395, "step": 10213 }, { "epoch": 0.5511845016458907, "grad_norm": 0.9525022370103455, "learning_rate": 4.779510486842369e-06, "loss": 0.3992, "step": 10214 }, { "epoch": 0.551238465274405, "grad_norm": 1.1182411432279644, "learning_rate": 4.778757446168113e-06, "loss": 0.5592, "step": 10215 }, { "epoch": 0.5512924289029194, "grad_norm": 1.0255616737499014, "learning_rate": 4.778004426223094e-06, "loss": 0.5167, "step": 10216 }, { "epoch": 0.5513463925314338, "grad_norm": 1.0626322283527228, "learning_rate": 4.7772514270289584e-06, "loss": 0.4521, "step": 10217 }, { "epoch": 0.5514003561599482, "grad_norm": 1.0257338349132643, "learning_rate": 4.7764984486073466e-06, "loss": 0.6385, "step": 10218 }, { "epoch": 0.5514543197884626, "grad_norm": 0.8545786876518366, "learning_rate": 4.7757454909799e-06, "loss": 0.3482, "step": 10219 }, { "epoch": 0.551508283416977, "grad_norm": 0.9154630045847885, "learning_rate": 4.77499255416826e-06, "loss": 0.4659, "step": 10220 }, { "epoch": 0.5515622470454914, "grad_norm": 1.092531444540202, "learning_rate": 4.774239638194066e-06, "loss": 0.5415, "step": 10221 }, { "epoch": 0.5516162106740057, "grad_norm": 0.9256002932522819, "learning_rate": 4.77348674307896e-06, "loss": 0.3517, "step": 10222 }, { "epoch": 0.5516701743025201, "grad_norm": 0.9338821816473993, "learning_rate": 4.772733868844578e-06, "loss": 0.4667, "step": 10223 }, { "epoch": 0.5517241379310345, "grad_norm": 0.9630922760303035, "learning_rate": 4.771981015512559e-06, "loss": 0.4452, "step": 10224 }, { "epoch": 0.5517781015595489, "grad_norm": 0.9014473940182888, "learning_rate": 4.771228183104541e-06, "loss": 0.424, "step": 10225 }, { "epoch": 0.5518320651880633, "grad_norm": 0.907884479640725, "learning_rate": 4.770475371642163e-06, "loss": 0.415, "step": 10226 }, { "epoch": 0.5518860288165777, "grad_norm": 0.9991091313920106, "learning_rate": 4.769722581147058e-06, "loss": 0.4348, "step": 10227 }, { "epoch": 0.551939992445092, "grad_norm": 0.9527270253063992, "learning_rate": 4.768969811640868e-06, "loss": 0.4083, "step": 10228 }, { "epoch": 0.5519939560736064, "grad_norm": 1.0942630076749227, "learning_rate": 4.7682170631452205e-06, "loss": 0.6173, "step": 10229 }, { "epoch": 0.5520479197021207, "grad_norm": 0.9767507647862661, "learning_rate": 4.767464335681754e-06, "loss": 0.4472, "step": 10230 }, { "epoch": 0.5521018833306351, "grad_norm": 0.9307617629281258, "learning_rate": 4.766711629272104e-06, "loss": 0.4253, "step": 10231 }, { "epoch": 0.5521558469591495, "grad_norm": 1.0363925811977672, "learning_rate": 4.765958943937901e-06, "loss": 0.577, "step": 10232 }, { "epoch": 0.5522098105876639, "grad_norm": 0.894798172195993, "learning_rate": 4.765206279700782e-06, "loss": 0.3854, "step": 10233 }, { "epoch": 0.5522637742161783, "grad_norm": 1.1636358822625577, "learning_rate": 4.764453636582375e-06, "loss": 0.6263, "step": 10234 }, { "epoch": 0.5523177378446926, "grad_norm": 0.8120591636313294, "learning_rate": 4.763701014604316e-06, "loss": 0.4309, "step": 10235 }, { "epoch": 0.552371701473207, "grad_norm": 0.7925082482907375, "learning_rate": 4.762948413788232e-06, "loss": 0.3227, "step": 10236 }, { "epoch": 0.5524256651017214, "grad_norm": 1.0425905580305348, "learning_rate": 4.762195834155755e-06, "loss": 0.4828, "step": 10237 }, { "epoch": 0.5524796287302358, "grad_norm": 1.222189188197597, "learning_rate": 4.7614432757285155e-06, "loss": 0.6606, "step": 10238 }, { "epoch": 0.5525335923587502, "grad_norm": 0.8679516864558767, "learning_rate": 4.760690738528143e-06, "loss": 0.4159, "step": 10239 }, { "epoch": 0.5525875559872646, "grad_norm": 1.002349065062371, "learning_rate": 4.759938222576264e-06, "loss": 0.4983, "step": 10240 }, { "epoch": 0.552641519615779, "grad_norm": 1.1123171888072971, "learning_rate": 4.7591857278945105e-06, "loss": 0.4693, "step": 10241 }, { "epoch": 0.5526954832442933, "grad_norm": 1.0877396510968529, "learning_rate": 4.7584332545045095e-06, "loss": 0.5048, "step": 10242 }, { "epoch": 0.5527494468728077, "grad_norm": 1.2604156914624927, "learning_rate": 4.757680802427883e-06, "loss": 0.6495, "step": 10243 }, { "epoch": 0.5528034105013221, "grad_norm": 1.1552629241205457, "learning_rate": 4.756928371686262e-06, "loss": 0.4867, "step": 10244 }, { "epoch": 0.5528573741298365, "grad_norm": 1.1070536703481821, "learning_rate": 4.756175962301267e-06, "loss": 0.4325, "step": 10245 }, { "epoch": 0.5529113377583509, "grad_norm": 1.058587355404886, "learning_rate": 4.755423574294529e-06, "loss": 0.4808, "step": 10246 }, { "epoch": 0.5529653013868653, "grad_norm": 1.0021605741817001, "learning_rate": 4.75467120768767e-06, "loss": 0.482, "step": 10247 }, { "epoch": 0.5530192650153797, "grad_norm": 1.3458160113344066, "learning_rate": 4.753918862502312e-06, "loss": 0.8167, "step": 10248 }, { "epoch": 0.553073228643894, "grad_norm": 0.8343208415239175, "learning_rate": 4.7531665387600825e-06, "loss": 0.4041, "step": 10249 }, { "epoch": 0.5531271922724084, "grad_norm": 1.052267778054434, "learning_rate": 4.752414236482599e-06, "loss": 0.546, "step": 10250 }, { "epoch": 0.5531811559009228, "grad_norm": 0.9773511789038181, "learning_rate": 4.7516619556914854e-06, "loss": 0.5469, "step": 10251 }, { "epoch": 0.5532351195294372, "grad_norm": 1.0166159145370344, "learning_rate": 4.750909696408365e-06, "loss": 0.3872, "step": 10252 }, { "epoch": 0.5532890831579516, "grad_norm": 0.9896540224926904, "learning_rate": 4.750157458654856e-06, "loss": 0.4426, "step": 10253 }, { "epoch": 0.553343046786466, "grad_norm": 1.4930105902271418, "learning_rate": 4.749405242452578e-06, "loss": 0.5779, "step": 10254 }, { "epoch": 0.5533970104149804, "grad_norm": 0.8990369041479075, "learning_rate": 4.748653047823154e-06, "loss": 0.4047, "step": 10255 }, { "epoch": 0.5534509740434946, "grad_norm": 1.1535149741319002, "learning_rate": 4.747900874788202e-06, "loss": 0.4823, "step": 10256 }, { "epoch": 0.553504937672009, "grad_norm": 0.8703006694703891, "learning_rate": 4.747148723369336e-06, "loss": 0.4143, "step": 10257 }, { "epoch": 0.5535589013005234, "grad_norm": 1.0574048226244162, "learning_rate": 4.746396593588176e-06, "loss": 0.4691, "step": 10258 }, { "epoch": 0.5536128649290378, "grad_norm": 1.1203648416963612, "learning_rate": 4.745644485466341e-06, "loss": 0.549, "step": 10259 }, { "epoch": 0.5536668285575522, "grad_norm": 1.0940808620306717, "learning_rate": 4.744892399025445e-06, "loss": 0.5694, "step": 10260 }, { "epoch": 0.5537207921860666, "grad_norm": 1.1395646913357078, "learning_rate": 4.744140334287104e-06, "loss": 0.4712, "step": 10261 }, { "epoch": 0.553774755814581, "grad_norm": 0.9147035778624767, "learning_rate": 4.743388291272934e-06, "loss": 0.4687, "step": 10262 }, { "epoch": 0.5538287194430953, "grad_norm": 0.9686000647321907, "learning_rate": 4.74263627000455e-06, "loss": 0.5505, "step": 10263 }, { "epoch": 0.5538826830716097, "grad_norm": 0.9784204950372776, "learning_rate": 4.741884270503564e-06, "loss": 0.4249, "step": 10264 }, { "epoch": 0.5539366467001241, "grad_norm": 0.8685226968130856, "learning_rate": 4.74113229279159e-06, "loss": 0.423, "step": 10265 }, { "epoch": 0.5539906103286385, "grad_norm": 0.8385328130206176, "learning_rate": 4.74038033689024e-06, "loss": 0.4223, "step": 10266 }, { "epoch": 0.5540445739571529, "grad_norm": 0.8986874191908752, "learning_rate": 4.739628402821127e-06, "loss": 0.3658, "step": 10267 }, { "epoch": 0.5540985375856673, "grad_norm": 0.9513013969724722, "learning_rate": 4.738876490605861e-06, "loss": 0.5437, "step": 10268 }, { "epoch": 0.5541525012141817, "grad_norm": 1.0610797976526958, "learning_rate": 4.738124600266056e-06, "loss": 0.5378, "step": 10269 }, { "epoch": 0.554206464842696, "grad_norm": 0.8899970332221802, "learning_rate": 4.7373727318233194e-06, "loss": 0.4063, "step": 10270 }, { "epoch": 0.5542604284712104, "grad_norm": 0.8809406767453768, "learning_rate": 4.73662088529926e-06, "loss": 0.3815, "step": 10271 }, { "epoch": 0.5543143920997248, "grad_norm": 1.0927379263485222, "learning_rate": 4.7358690607154886e-06, "loss": 0.5555, "step": 10272 }, { "epoch": 0.5543683557282392, "grad_norm": 0.9590401989227892, "learning_rate": 4.735117258093612e-06, "loss": 0.503, "step": 10273 }, { "epoch": 0.5544223193567536, "grad_norm": 1.1191268165293693, "learning_rate": 4.7343654774552395e-06, "loss": 0.6532, "step": 10274 }, { "epoch": 0.554476282985268, "grad_norm": 0.9705838694303852, "learning_rate": 4.733613718821976e-06, "loss": 0.5342, "step": 10275 }, { "epoch": 0.5545302466137824, "grad_norm": 1.0288376979707352, "learning_rate": 4.732861982215426e-06, "loss": 0.4631, "step": 10276 }, { "epoch": 0.5545842102422966, "grad_norm": 1.1563276114208567, "learning_rate": 4.732110267657202e-06, "loss": 0.4684, "step": 10277 }, { "epoch": 0.554638173870811, "grad_norm": 1.1987156128250958, "learning_rate": 4.731358575168903e-06, "loss": 0.5821, "step": 10278 }, { "epoch": 0.5546921374993254, "grad_norm": 0.7402913880802539, "learning_rate": 4.730606904772134e-06, "loss": 0.3138, "step": 10279 }, { "epoch": 0.5547461011278398, "grad_norm": 1.0177408728973936, "learning_rate": 4.7298552564885005e-06, "loss": 0.5228, "step": 10280 }, { "epoch": 0.5548000647563542, "grad_norm": 1.061933210216232, "learning_rate": 4.729103630339605e-06, "loss": 0.4396, "step": 10281 }, { "epoch": 0.5548540283848686, "grad_norm": 1.066323276000388, "learning_rate": 4.728352026347052e-06, "loss": 0.4738, "step": 10282 }, { "epoch": 0.554907992013383, "grad_norm": 1.0989357653910055, "learning_rate": 4.72760044453244e-06, "loss": 0.5498, "step": 10283 }, { "epoch": 0.5549619556418973, "grad_norm": 0.9866766493055045, "learning_rate": 4.726848884917372e-06, "loss": 0.4766, "step": 10284 }, { "epoch": 0.5550159192704117, "grad_norm": 1.0429959951674592, "learning_rate": 4.726097347523449e-06, "loss": 0.5653, "step": 10285 }, { "epoch": 0.5550698828989261, "grad_norm": 0.8772269261031262, "learning_rate": 4.725345832372269e-06, "loss": 0.4621, "step": 10286 }, { "epoch": 0.5551238465274405, "grad_norm": 1.100554652342403, "learning_rate": 4.724594339485432e-06, "loss": 0.5058, "step": 10287 }, { "epoch": 0.5551778101559549, "grad_norm": 0.9390972055032538, "learning_rate": 4.7238428688845375e-06, "loss": 0.5574, "step": 10288 }, { "epoch": 0.5552317737844693, "grad_norm": 0.9432022490620339, "learning_rate": 4.723091420591184e-06, "loss": 0.4056, "step": 10289 }, { "epoch": 0.5552857374129837, "grad_norm": 1.0956778361955692, "learning_rate": 4.7223399946269675e-06, "loss": 0.46, "step": 10290 }, { "epoch": 0.555339701041498, "grad_norm": 1.0654124151829423, "learning_rate": 4.721588591013487e-06, "loss": 0.4788, "step": 10291 }, { "epoch": 0.5553936646700124, "grad_norm": 0.9235022120454096, "learning_rate": 4.720837209772335e-06, "loss": 0.3209, "step": 10292 }, { "epoch": 0.5554476282985268, "grad_norm": 0.9046613525752183, "learning_rate": 4.720085850925109e-06, "loss": 0.4432, "step": 10293 }, { "epoch": 0.5555015919270412, "grad_norm": 0.8742799378313397, "learning_rate": 4.719334514493405e-06, "loss": 0.4184, "step": 10294 }, { "epoch": 0.5555555555555556, "grad_norm": 0.9375443184693335, "learning_rate": 4.718583200498814e-06, "loss": 0.3687, "step": 10295 }, { "epoch": 0.55560951918407, "grad_norm": 1.1688956820111163, "learning_rate": 4.717831908962932e-06, "loss": 0.6131, "step": 10296 }, { "epoch": 0.5556634828125843, "grad_norm": 0.988552118354493, "learning_rate": 4.717080639907352e-06, "loss": 0.4555, "step": 10297 }, { "epoch": 0.5557174464410987, "grad_norm": 1.233525082950925, "learning_rate": 4.716329393353666e-06, "loss": 0.7664, "step": 10298 }, { "epoch": 0.5557714100696131, "grad_norm": 0.9869199192969014, "learning_rate": 4.715578169323463e-06, "loss": 0.4248, "step": 10299 }, { "epoch": 0.5558253736981275, "grad_norm": 0.9557645692277662, "learning_rate": 4.7148269678383376e-06, "loss": 0.4374, "step": 10300 }, { "epoch": 0.5558793373266419, "grad_norm": 1.058738572841535, "learning_rate": 4.714075788919877e-06, "loss": 0.4271, "step": 10301 }, { "epoch": 0.5559333009551563, "grad_norm": 1.0677287243199753, "learning_rate": 4.713324632589673e-06, "loss": 0.5769, "step": 10302 }, { "epoch": 0.5559872645836706, "grad_norm": 0.9241402289432602, "learning_rate": 4.712573498869315e-06, "loss": 0.3105, "step": 10303 }, { "epoch": 0.5560412282121849, "grad_norm": 1.059241895708833, "learning_rate": 4.711822387780391e-06, "loss": 0.4902, "step": 10304 }, { "epoch": 0.5560951918406993, "grad_norm": 0.9678383960371283, "learning_rate": 4.7110712993444865e-06, "loss": 0.5583, "step": 10305 }, { "epoch": 0.5561491554692137, "grad_norm": 0.9706934018175043, "learning_rate": 4.71032023358319e-06, "loss": 0.3878, "step": 10306 }, { "epoch": 0.5562031190977281, "grad_norm": 0.9747668616863375, "learning_rate": 4.709569190518088e-06, "loss": 0.3958, "step": 10307 }, { "epoch": 0.5562570827262425, "grad_norm": 1.1214627847961955, "learning_rate": 4.708818170170765e-06, "loss": 0.5152, "step": 10308 }, { "epoch": 0.5563110463547569, "grad_norm": 1.066287530183705, "learning_rate": 4.708067172562808e-06, "loss": 0.5932, "step": 10309 }, { "epoch": 0.5563650099832713, "grad_norm": 0.807244235892807, "learning_rate": 4.707316197715801e-06, "loss": 0.4033, "step": 10310 }, { "epoch": 0.5564189736117856, "grad_norm": 0.8484979177009955, "learning_rate": 4.70656524565133e-06, "loss": 0.3811, "step": 10311 }, { "epoch": 0.5564729372403, "grad_norm": 0.9418938308117809, "learning_rate": 4.705814316390972e-06, "loss": 0.3566, "step": 10312 }, { "epoch": 0.5565269008688144, "grad_norm": 1.0772264892751013, "learning_rate": 4.705063409956314e-06, "loss": 0.4949, "step": 10313 }, { "epoch": 0.5565808644973288, "grad_norm": 1.0399389949659381, "learning_rate": 4.704312526368937e-06, "loss": 0.4414, "step": 10314 }, { "epoch": 0.5566348281258432, "grad_norm": 1.1515561338852636, "learning_rate": 4.703561665650422e-06, "loss": 0.4481, "step": 10315 }, { "epoch": 0.5566887917543576, "grad_norm": 0.8989238641097997, "learning_rate": 4.702810827822348e-06, "loss": 0.4189, "step": 10316 }, { "epoch": 0.556742755382872, "grad_norm": 1.3655556027095446, "learning_rate": 4.702060012906298e-06, "loss": 0.4912, "step": 10317 }, { "epoch": 0.5567967190113863, "grad_norm": 0.9973029937030303, "learning_rate": 4.7013092209238506e-06, "loss": 0.4994, "step": 10318 }, { "epoch": 0.5568506826399007, "grad_norm": 1.0107992210332357, "learning_rate": 4.700558451896581e-06, "loss": 0.6377, "step": 10319 }, { "epoch": 0.5569046462684151, "grad_norm": 1.56091262859535, "learning_rate": 4.69980770584607e-06, "loss": 0.7337, "step": 10320 }, { "epoch": 0.5569586098969295, "grad_norm": 0.9575313192000734, "learning_rate": 4.699056982793894e-06, "loss": 0.4798, "step": 10321 }, { "epoch": 0.5570125735254439, "grad_norm": 0.8975024309737544, "learning_rate": 4.698306282761628e-06, "loss": 0.4212, "step": 10322 }, { "epoch": 0.5570665371539583, "grad_norm": 0.9973815460233287, "learning_rate": 4.697555605770851e-06, "loss": 0.5311, "step": 10323 }, { "epoch": 0.5571205007824727, "grad_norm": 1.034387272682623, "learning_rate": 4.696804951843137e-06, "loss": 0.5166, "step": 10324 }, { "epoch": 0.557174464410987, "grad_norm": 0.8841292317090952, "learning_rate": 4.696054321000061e-06, "loss": 0.3655, "step": 10325 }, { "epoch": 0.5572284280395013, "grad_norm": 1.0798317920732936, "learning_rate": 4.695303713263195e-06, "loss": 0.3811, "step": 10326 }, { "epoch": 0.5572823916680157, "grad_norm": 0.8429098436538636, "learning_rate": 4.694553128654114e-06, "loss": 0.3134, "step": 10327 }, { "epoch": 0.5573363552965301, "grad_norm": 1.0595349968234722, "learning_rate": 4.6938025671943885e-06, "loss": 0.6119, "step": 10328 }, { "epoch": 0.5573903189250445, "grad_norm": 1.278004580885724, "learning_rate": 4.693052028905593e-06, "loss": 0.5509, "step": 10329 }, { "epoch": 0.5574442825535589, "grad_norm": 1.056036466539982, "learning_rate": 4.692301513809297e-06, "loss": 0.4784, "step": 10330 }, { "epoch": 0.5574982461820733, "grad_norm": 1.2453560375326012, "learning_rate": 4.691551021927072e-06, "loss": 0.7517, "step": 10331 }, { "epoch": 0.5575522098105876, "grad_norm": 0.974916877890773, "learning_rate": 4.690800553280489e-06, "loss": 0.377, "step": 10332 }, { "epoch": 0.557606173439102, "grad_norm": 0.9814487984076121, "learning_rate": 4.690050107891114e-06, "loss": 0.4509, "step": 10333 }, { "epoch": 0.5576601370676164, "grad_norm": 0.9914481128189413, "learning_rate": 4.689299685780517e-06, "loss": 0.4907, "step": 10334 }, { "epoch": 0.5577141006961308, "grad_norm": 1.030210175492681, "learning_rate": 4.688549286970267e-06, "loss": 0.407, "step": 10335 }, { "epoch": 0.5577680643246452, "grad_norm": 0.8338832597687463, "learning_rate": 4.68779891148193e-06, "loss": 0.4251, "step": 10336 }, { "epoch": 0.5578220279531596, "grad_norm": 0.933504888556169, "learning_rate": 4.687048559337073e-06, "loss": 0.3399, "step": 10337 }, { "epoch": 0.557875991581674, "grad_norm": 0.6813383150552782, "learning_rate": 4.686298230557261e-06, "loss": 0.3027, "step": 10338 }, { "epoch": 0.5579299552101883, "grad_norm": 0.91641431890194, "learning_rate": 4.685547925164062e-06, "loss": 0.4568, "step": 10339 }, { "epoch": 0.5579839188387027, "grad_norm": 1.3059834266021237, "learning_rate": 4.684797643179039e-06, "loss": 0.6158, "step": 10340 }, { "epoch": 0.5580378824672171, "grad_norm": 1.2015672687602583, "learning_rate": 4.684047384623752e-06, "loss": 0.5358, "step": 10341 }, { "epoch": 0.5580918460957315, "grad_norm": 0.9299500080692359, "learning_rate": 4.68329714951977e-06, "loss": 0.433, "step": 10342 }, { "epoch": 0.5581458097242459, "grad_norm": 1.0319829592040375, "learning_rate": 4.682546937888653e-06, "loss": 0.4536, "step": 10343 }, { "epoch": 0.5581997733527603, "grad_norm": 1.1179217687877732, "learning_rate": 4.6817967497519626e-06, "loss": 0.6583, "step": 10344 }, { "epoch": 0.5582537369812747, "grad_norm": 0.9120345116987045, "learning_rate": 4.681046585131259e-06, "loss": 0.3801, "step": 10345 }, { "epoch": 0.558307700609789, "grad_norm": 0.9452728209767552, "learning_rate": 4.680296444048108e-06, "loss": 0.3677, "step": 10346 }, { "epoch": 0.5583616642383034, "grad_norm": 0.9484777119482529, "learning_rate": 4.679546326524062e-06, "loss": 0.3287, "step": 10347 }, { "epoch": 0.5584156278668178, "grad_norm": 1.153388427550987, "learning_rate": 4.678796232580685e-06, "loss": 0.5118, "step": 10348 }, { "epoch": 0.5584695914953322, "grad_norm": 0.9389343122756456, "learning_rate": 4.678046162239534e-06, "loss": 0.4184, "step": 10349 }, { "epoch": 0.5585235551238465, "grad_norm": 0.9148941316547532, "learning_rate": 4.677296115522165e-06, "loss": 0.434, "step": 10350 }, { "epoch": 0.558577518752361, "grad_norm": 0.7269979920895444, "learning_rate": 4.676546092450139e-06, "loss": 0.3065, "step": 10351 }, { "epoch": 0.5586314823808753, "grad_norm": 1.2668829189340294, "learning_rate": 4.675796093045008e-06, "loss": 0.4927, "step": 10352 }, { "epoch": 0.5586854460093896, "grad_norm": 0.9273894968222307, "learning_rate": 4.675046117328333e-06, "loss": 0.4039, "step": 10353 }, { "epoch": 0.558739409637904, "grad_norm": 0.9703100687260712, "learning_rate": 4.674296165321665e-06, "loss": 0.4668, "step": 10354 }, { "epoch": 0.5587933732664184, "grad_norm": 1.2265326607000822, "learning_rate": 4.673546237046559e-06, "loss": 0.5099, "step": 10355 }, { "epoch": 0.5588473368949328, "grad_norm": 0.9750918512898227, "learning_rate": 4.672796332524569e-06, "loss": 0.4729, "step": 10356 }, { "epoch": 0.5589013005234472, "grad_norm": 0.8957956448216362, "learning_rate": 4.672046451777249e-06, "loss": 0.4092, "step": 10357 }, { "epoch": 0.5589552641519616, "grad_norm": 1.1747097759498604, "learning_rate": 4.67129659482615e-06, "loss": 0.5837, "step": 10358 }, { "epoch": 0.559009227780476, "grad_norm": 1.0594049763871234, "learning_rate": 4.670546761692825e-06, "loss": 0.4992, "step": 10359 }, { "epoch": 0.5590631914089903, "grad_norm": 0.9353801735714632, "learning_rate": 4.669796952398825e-06, "loss": 0.3859, "step": 10360 }, { "epoch": 0.5591171550375047, "grad_norm": 0.9586907908343285, "learning_rate": 4.669047166965698e-06, "loss": 0.3863, "step": 10361 }, { "epoch": 0.5591711186660191, "grad_norm": 1.0103313890937382, "learning_rate": 4.668297405414994e-06, "loss": 0.5386, "step": 10362 }, { "epoch": 0.5592250822945335, "grad_norm": 1.1178698162789336, "learning_rate": 4.667547667768265e-06, "loss": 0.4697, "step": 10363 }, { "epoch": 0.5592790459230479, "grad_norm": 0.9780613485700601, "learning_rate": 4.666797954047055e-06, "loss": 0.4018, "step": 10364 }, { "epoch": 0.5593330095515623, "grad_norm": 1.0507323922150107, "learning_rate": 4.666048264272915e-06, "loss": 0.3662, "step": 10365 }, { "epoch": 0.5593869731800766, "grad_norm": 1.1241988512681806, "learning_rate": 4.665298598467393e-06, "loss": 0.6178, "step": 10366 }, { "epoch": 0.559440936808591, "grad_norm": 0.9354840129688691, "learning_rate": 4.66454895665203e-06, "loss": 0.4456, "step": 10367 }, { "epoch": 0.5594949004371054, "grad_norm": 1.232332675183214, "learning_rate": 4.663799338848374e-06, "loss": 0.6272, "step": 10368 }, { "epoch": 0.5595488640656198, "grad_norm": 1.0986903744621233, "learning_rate": 4.66304974507797e-06, "loss": 0.5754, "step": 10369 }, { "epoch": 0.5596028276941342, "grad_norm": 1.2114691018425239, "learning_rate": 4.662300175362362e-06, "loss": 0.5058, "step": 10370 }, { "epoch": 0.5596567913226486, "grad_norm": 1.1960355565851375, "learning_rate": 4.661550629723094e-06, "loss": 0.4867, "step": 10371 }, { "epoch": 0.559710754951163, "grad_norm": 1.0588153431375231, "learning_rate": 4.660801108181707e-06, "loss": 0.49, "step": 10372 }, { "epoch": 0.5597647185796772, "grad_norm": 0.784890716721979, "learning_rate": 4.660051610759746e-06, "loss": 0.3851, "step": 10373 }, { "epoch": 0.5598186822081916, "grad_norm": 1.126102239523937, "learning_rate": 4.65930213747875e-06, "loss": 0.5445, "step": 10374 }, { "epoch": 0.559872645836706, "grad_norm": 0.8160524589359321, "learning_rate": 4.6585526883602584e-06, "loss": 0.3581, "step": 10375 }, { "epoch": 0.5599266094652204, "grad_norm": 0.8390079957255429, "learning_rate": 4.657803263425813e-06, "loss": 0.341, "step": 10376 }, { "epoch": 0.5599805730937348, "grad_norm": 1.1101222067601637, "learning_rate": 4.657053862696954e-06, "loss": 0.5306, "step": 10377 }, { "epoch": 0.5600345367222492, "grad_norm": 1.112411984487719, "learning_rate": 4.656304486195219e-06, "loss": 0.4873, "step": 10378 }, { "epoch": 0.5600885003507636, "grad_norm": 1.2377705301373216, "learning_rate": 4.655555133942144e-06, "loss": 0.5217, "step": 10379 }, { "epoch": 0.5601424639792779, "grad_norm": 0.9428711233239776, "learning_rate": 4.6548058059592696e-06, "loss": 0.415, "step": 10380 }, { "epoch": 0.5601964276077923, "grad_norm": 0.9031629897496262, "learning_rate": 4.654056502268129e-06, "loss": 0.3823, "step": 10381 }, { "epoch": 0.5602503912363067, "grad_norm": 0.9982631272634211, "learning_rate": 4.653307222890259e-06, "loss": 0.4761, "step": 10382 }, { "epoch": 0.5603043548648211, "grad_norm": 0.9469601002023227, "learning_rate": 4.652557967847195e-06, "loss": 0.3737, "step": 10383 }, { "epoch": 0.5603583184933355, "grad_norm": 1.0020789463514839, "learning_rate": 4.651808737160471e-06, "loss": 0.5069, "step": 10384 }, { "epoch": 0.5604122821218499, "grad_norm": 1.0781346539748349, "learning_rate": 4.651059530851623e-06, "loss": 0.4856, "step": 10385 }, { "epoch": 0.5604662457503643, "grad_norm": 0.8875768184666442, "learning_rate": 4.650310348942179e-06, "loss": 0.4225, "step": 10386 }, { "epoch": 0.5605202093788786, "grad_norm": 0.8852392179681133, "learning_rate": 4.6495611914536775e-06, "loss": 0.4509, "step": 10387 }, { "epoch": 0.560574173007393, "grad_norm": 1.037107063079202, "learning_rate": 4.6488120584076445e-06, "loss": 0.5758, "step": 10388 }, { "epoch": 0.5606281366359074, "grad_norm": 1.0655736864421468, "learning_rate": 4.648062949825614e-06, "loss": 0.4682, "step": 10389 }, { "epoch": 0.5606821002644218, "grad_norm": 1.180016693349369, "learning_rate": 4.647313865729114e-06, "loss": 0.567, "step": 10390 }, { "epoch": 0.5607360638929362, "grad_norm": 1.1068824354536526, "learning_rate": 4.646564806139675e-06, "loss": 0.4522, "step": 10391 }, { "epoch": 0.5607900275214506, "grad_norm": 1.0252659796430574, "learning_rate": 4.645815771078827e-06, "loss": 0.4729, "step": 10392 }, { "epoch": 0.560843991149965, "grad_norm": 0.9864252674172859, "learning_rate": 4.645066760568095e-06, "loss": 0.4693, "step": 10393 }, { "epoch": 0.5608979547784793, "grad_norm": 0.8943367445684924, "learning_rate": 4.644317774629011e-06, "loss": 0.4325, "step": 10394 }, { "epoch": 0.5609519184069937, "grad_norm": 0.9934470974525529, "learning_rate": 4.643568813283097e-06, "loss": 0.4272, "step": 10395 }, { "epoch": 0.561005882035508, "grad_norm": 1.1938919721042003, "learning_rate": 4.6428198765518804e-06, "loss": 0.548, "step": 10396 }, { "epoch": 0.5610598456640224, "grad_norm": 0.8847724981310143, "learning_rate": 4.642070964456887e-06, "loss": 0.373, "step": 10397 }, { "epoch": 0.5611138092925368, "grad_norm": 1.0059593041145987, "learning_rate": 4.641322077019641e-06, "loss": 0.509, "step": 10398 }, { "epoch": 0.5611677729210512, "grad_norm": 1.0306981004710065, "learning_rate": 4.640573214261665e-06, "loss": 0.4168, "step": 10399 }, { "epoch": 0.5612217365495656, "grad_norm": 0.927691135238471, "learning_rate": 4.639824376204485e-06, "loss": 0.4628, "step": 10400 }, { "epoch": 0.5612757001780799, "grad_norm": 1.1861306931197144, "learning_rate": 4.639075562869622e-06, "loss": 0.488, "step": 10401 }, { "epoch": 0.5613296638065943, "grad_norm": 1.1430739716277543, "learning_rate": 4.638326774278595e-06, "loss": 0.5462, "step": 10402 }, { "epoch": 0.5613836274351087, "grad_norm": 0.9861494154601086, "learning_rate": 4.637578010452928e-06, "loss": 0.4978, "step": 10403 }, { "epoch": 0.5614375910636231, "grad_norm": 0.9051246520748504, "learning_rate": 4.636829271414141e-06, "loss": 0.375, "step": 10404 }, { "epoch": 0.5614915546921375, "grad_norm": 0.9659828462594624, "learning_rate": 4.636080557183752e-06, "loss": 0.5012, "step": 10405 }, { "epoch": 0.5615455183206519, "grad_norm": 1.0747341826820602, "learning_rate": 4.63533186778328e-06, "loss": 0.376, "step": 10406 }, { "epoch": 0.5615994819491663, "grad_norm": 0.9398439520446701, "learning_rate": 4.634583203234245e-06, "loss": 0.4071, "step": 10407 }, { "epoch": 0.5616534455776806, "grad_norm": 1.1323527955155106, "learning_rate": 4.6338345635581665e-06, "loss": 0.4492, "step": 10408 }, { "epoch": 0.561707409206195, "grad_norm": 0.987406525424712, "learning_rate": 4.633085948776555e-06, "loss": 0.5324, "step": 10409 }, { "epoch": 0.5617613728347094, "grad_norm": 0.956725352837083, "learning_rate": 4.632337358910928e-06, "loss": 0.4319, "step": 10410 }, { "epoch": 0.5618153364632238, "grad_norm": 1.0033426839608848, "learning_rate": 4.631588793982802e-06, "loss": 0.4015, "step": 10411 }, { "epoch": 0.5618693000917382, "grad_norm": 1.0007141754290345, "learning_rate": 4.630840254013692e-06, "loss": 0.5036, "step": 10412 }, { "epoch": 0.5619232637202526, "grad_norm": 1.012303998099746, "learning_rate": 4.630091739025112e-06, "loss": 0.462, "step": 10413 }, { "epoch": 0.561977227348767, "grad_norm": 1.0860760668900513, "learning_rate": 4.629343249038573e-06, "loss": 0.6, "step": 10414 }, { "epoch": 0.5620311909772813, "grad_norm": 1.235164221337869, "learning_rate": 4.6285947840755915e-06, "loss": 0.446, "step": 10415 }, { "epoch": 0.5620851546057957, "grad_norm": 1.1982023044453771, "learning_rate": 4.6278463441576756e-06, "loss": 0.5431, "step": 10416 }, { "epoch": 0.5621391182343101, "grad_norm": 1.0865833190933252, "learning_rate": 4.627097929306336e-06, "loss": 0.5236, "step": 10417 }, { "epoch": 0.5621930818628245, "grad_norm": 1.1288206630423583, "learning_rate": 4.626349539543083e-06, "loss": 0.4695, "step": 10418 }, { "epoch": 0.5622470454913389, "grad_norm": 1.1538249430664769, "learning_rate": 4.625601174889429e-06, "loss": 0.6827, "step": 10419 }, { "epoch": 0.5623010091198533, "grad_norm": 0.8459246648399261, "learning_rate": 4.62485283536688e-06, "loss": 0.3615, "step": 10420 }, { "epoch": 0.5623549727483677, "grad_norm": 1.0960837412505011, "learning_rate": 4.624104520996944e-06, "loss": 0.4261, "step": 10421 }, { "epoch": 0.5624089363768819, "grad_norm": 1.0664625288175344, "learning_rate": 4.623356231801131e-06, "loss": 0.4545, "step": 10422 }, { "epoch": 0.5624629000053963, "grad_norm": 0.9941248482187476, "learning_rate": 4.6226079678009436e-06, "loss": 0.4084, "step": 10423 }, { "epoch": 0.5625168636339107, "grad_norm": 1.0659811941456354, "learning_rate": 4.6218597290178915e-06, "loss": 0.4793, "step": 10424 }, { "epoch": 0.5625708272624251, "grad_norm": 0.7504981626510868, "learning_rate": 4.621111515473476e-06, "loss": 0.2855, "step": 10425 }, { "epoch": 0.5626247908909395, "grad_norm": 1.0426496642181562, "learning_rate": 4.6203633271892055e-06, "loss": 0.5239, "step": 10426 }, { "epoch": 0.5626787545194539, "grad_norm": 1.066076224474066, "learning_rate": 4.61961516418658e-06, "loss": 0.4354, "step": 10427 }, { "epoch": 0.5627327181479682, "grad_norm": 1.0952431732132881, "learning_rate": 4.618867026487106e-06, "loss": 0.4694, "step": 10428 }, { "epoch": 0.5627866817764826, "grad_norm": 0.9520782178437929, "learning_rate": 4.618118914112284e-06, "loss": 0.6378, "step": 10429 }, { "epoch": 0.562840645404997, "grad_norm": 0.8067231581362336, "learning_rate": 4.617370827083613e-06, "loss": 0.3572, "step": 10430 }, { "epoch": 0.5628946090335114, "grad_norm": 0.8462480082730182, "learning_rate": 4.6166227654225994e-06, "loss": 0.4812, "step": 10431 }, { "epoch": 0.5629485726620258, "grad_norm": 0.9639543458116495, "learning_rate": 4.615874729150737e-06, "loss": 0.5463, "step": 10432 }, { "epoch": 0.5630025362905402, "grad_norm": 0.9982364353732058, "learning_rate": 4.615126718289531e-06, "loss": 0.5208, "step": 10433 }, { "epoch": 0.5630564999190546, "grad_norm": 0.8243331603684473, "learning_rate": 4.614378732860476e-06, "loss": 0.4104, "step": 10434 }, { "epoch": 0.5631104635475689, "grad_norm": 1.1125961336960704, "learning_rate": 4.613630772885073e-06, "loss": 0.6076, "step": 10435 }, { "epoch": 0.5631644271760833, "grad_norm": 1.2530837806331192, "learning_rate": 4.612882838384815e-06, "loss": 0.5736, "step": 10436 }, { "epoch": 0.5632183908045977, "grad_norm": 0.8418762342834804, "learning_rate": 4.6121349293812015e-06, "loss": 0.304, "step": 10437 }, { "epoch": 0.5632723544331121, "grad_norm": 0.7864831688443276, "learning_rate": 4.611387045895726e-06, "loss": 0.2755, "step": 10438 }, { "epoch": 0.5633263180616265, "grad_norm": 1.2358056639912849, "learning_rate": 4.610639187949886e-06, "loss": 0.4849, "step": 10439 }, { "epoch": 0.5633802816901409, "grad_norm": 1.23260109049792, "learning_rate": 4.609891355565172e-06, "loss": 0.5104, "step": 10440 }, { "epoch": 0.5634342453186553, "grad_norm": 1.2231120755727316, "learning_rate": 4.609143548763082e-06, "loss": 0.4937, "step": 10441 }, { "epoch": 0.5634882089471696, "grad_norm": 0.8460552938705836, "learning_rate": 4.608395767565108e-06, "loss": 0.3399, "step": 10442 }, { "epoch": 0.563542172575684, "grad_norm": 1.0533703303904305, "learning_rate": 4.607648011992738e-06, "loss": 0.6159, "step": 10443 }, { "epoch": 0.5635961362041984, "grad_norm": 0.8970560104302894, "learning_rate": 4.606900282067467e-06, "loss": 0.4594, "step": 10444 }, { "epoch": 0.5636500998327127, "grad_norm": 0.854141211172588, "learning_rate": 4.6061525778107845e-06, "loss": 0.4102, "step": 10445 }, { "epoch": 0.5637040634612271, "grad_norm": 1.2398508588764234, "learning_rate": 4.605404899244178e-06, "loss": 0.5239, "step": 10446 }, { "epoch": 0.5637580270897415, "grad_norm": 1.0856103298333295, "learning_rate": 4.604657246389141e-06, "loss": 0.5266, "step": 10447 }, { "epoch": 0.5638119907182559, "grad_norm": 0.7770382137721871, "learning_rate": 4.603909619267157e-06, "loss": 0.3225, "step": 10448 }, { "epoch": 0.5638659543467702, "grad_norm": 1.1715355614581755, "learning_rate": 4.603162017899718e-06, "loss": 0.5408, "step": 10449 }, { "epoch": 0.5639199179752846, "grad_norm": 0.8882758862806005, "learning_rate": 4.602414442308308e-06, "loss": 0.4413, "step": 10450 }, { "epoch": 0.563973881603799, "grad_norm": 0.9127773076845014, "learning_rate": 4.601666892514415e-06, "loss": 0.4923, "step": 10451 }, { "epoch": 0.5640278452323134, "grad_norm": 0.8631816417602077, "learning_rate": 4.600919368539523e-06, "loss": 0.4085, "step": 10452 }, { "epoch": 0.5640818088608278, "grad_norm": 0.9017419735588537, "learning_rate": 4.600171870405116e-06, "loss": 0.4682, "step": 10453 }, { "epoch": 0.5641357724893422, "grad_norm": 1.016984510076863, "learning_rate": 4.599424398132678e-06, "loss": 0.5359, "step": 10454 }, { "epoch": 0.5641897361178566, "grad_norm": 0.8852944412240826, "learning_rate": 4.598676951743694e-06, "loss": 0.3275, "step": 10455 }, { "epoch": 0.5642436997463709, "grad_norm": 1.0186523742949374, "learning_rate": 4.597929531259645e-06, "loss": 0.4358, "step": 10456 }, { "epoch": 0.5642976633748853, "grad_norm": 0.946002290808795, "learning_rate": 4.597182136702012e-06, "loss": 0.3759, "step": 10457 }, { "epoch": 0.5643516270033997, "grad_norm": 1.0167468786411347, "learning_rate": 4.596434768092277e-06, "loss": 0.493, "step": 10458 }, { "epoch": 0.5644055906319141, "grad_norm": 0.9069401324439225, "learning_rate": 4.595687425451919e-06, "loss": 0.4318, "step": 10459 }, { "epoch": 0.5644595542604285, "grad_norm": 0.8631004982072913, "learning_rate": 4.594940108802419e-06, "loss": 0.4057, "step": 10460 }, { "epoch": 0.5645135178889429, "grad_norm": 0.9299250777982595, "learning_rate": 4.594192818165255e-06, "loss": 0.3949, "step": 10461 }, { "epoch": 0.5645674815174573, "grad_norm": 1.0309033625358532, "learning_rate": 4.593445553561904e-06, "loss": 0.5333, "step": 10462 }, { "epoch": 0.5646214451459716, "grad_norm": 0.9561423030004743, "learning_rate": 4.592698315013845e-06, "loss": 0.4953, "step": 10463 }, { "epoch": 0.564675408774486, "grad_norm": 0.9671555622210293, "learning_rate": 4.591951102542553e-06, "loss": 0.4201, "step": 10464 }, { "epoch": 0.5647293724030004, "grad_norm": 0.8754228020017677, "learning_rate": 4.5912039161695036e-06, "loss": 0.3495, "step": 10465 }, { "epoch": 0.5647833360315148, "grad_norm": 1.1302772054924015, "learning_rate": 4.590456755916171e-06, "loss": 0.53, "step": 10466 }, { "epoch": 0.5648372996600292, "grad_norm": 0.8351331953368962, "learning_rate": 4.5897096218040294e-06, "loss": 0.3962, "step": 10467 }, { "epoch": 0.5648912632885436, "grad_norm": 0.9440805249199721, "learning_rate": 4.588962513854555e-06, "loss": 0.513, "step": 10468 }, { "epoch": 0.564945226917058, "grad_norm": 1.101379508163217, "learning_rate": 4.588215432089217e-06, "loss": 0.4718, "step": 10469 }, { "epoch": 0.5649991905455722, "grad_norm": 1.2364221436105636, "learning_rate": 4.5874683765294915e-06, "loss": 0.6042, "step": 10470 }, { "epoch": 0.5650531541740866, "grad_norm": 0.985800850701892, "learning_rate": 4.5867213471968455e-06, "loss": 0.3376, "step": 10471 }, { "epoch": 0.565107117802601, "grad_norm": 0.9129079325302729, "learning_rate": 4.585974344112749e-06, "loss": 0.4402, "step": 10472 }, { "epoch": 0.5651610814311154, "grad_norm": 1.0760406374597122, "learning_rate": 4.585227367298674e-06, "loss": 0.5239, "step": 10473 }, { "epoch": 0.5652150450596298, "grad_norm": 1.2340386890757566, "learning_rate": 4.5844804167760906e-06, "loss": 0.6443, "step": 10474 }, { "epoch": 0.5652690086881442, "grad_norm": 1.0604224505118627, "learning_rate": 4.583733492566463e-06, "loss": 0.4868, "step": 10475 }, { "epoch": 0.5653229723166586, "grad_norm": 0.9722103964779522, "learning_rate": 4.582986594691262e-06, "loss": 0.3633, "step": 10476 }, { "epoch": 0.5653769359451729, "grad_norm": 0.8566737740791921, "learning_rate": 4.582239723171953e-06, "loss": 0.3369, "step": 10477 }, { "epoch": 0.5654308995736873, "grad_norm": 1.0583734588262836, "learning_rate": 4.581492878030002e-06, "loss": 0.574, "step": 10478 }, { "epoch": 0.5654848632022017, "grad_norm": 1.0488481127007796, "learning_rate": 4.580746059286871e-06, "loss": 0.4544, "step": 10479 }, { "epoch": 0.5655388268307161, "grad_norm": 1.0206252778138516, "learning_rate": 4.579999266964029e-06, "loss": 0.4877, "step": 10480 }, { "epoch": 0.5655927904592305, "grad_norm": 0.6842875634433881, "learning_rate": 4.579252501082938e-06, "loss": 0.2895, "step": 10481 }, { "epoch": 0.5656467540877449, "grad_norm": 0.8091021866258207, "learning_rate": 4.578505761665061e-06, "loss": 0.3336, "step": 10482 }, { "epoch": 0.5657007177162593, "grad_norm": 1.0663593237080324, "learning_rate": 4.5777590487318595e-06, "loss": 0.4363, "step": 10483 }, { "epoch": 0.5657546813447736, "grad_norm": 0.9985080998935604, "learning_rate": 4.577012362304794e-06, "loss": 0.4481, "step": 10484 }, { "epoch": 0.565808644973288, "grad_norm": 1.0259516177533448, "learning_rate": 4.576265702405326e-06, "loss": 0.5135, "step": 10485 }, { "epoch": 0.5658626086018024, "grad_norm": 0.7960509646590144, "learning_rate": 4.575519069054914e-06, "loss": 0.3009, "step": 10486 }, { "epoch": 0.5659165722303168, "grad_norm": 1.1318196518923591, "learning_rate": 4.574772462275019e-06, "loss": 0.5344, "step": 10487 }, { "epoch": 0.5659705358588312, "grad_norm": 1.1599812341538092, "learning_rate": 4.574025882087097e-06, "loss": 0.5148, "step": 10488 }, { "epoch": 0.5660244994873456, "grad_norm": 1.104193440300905, "learning_rate": 4.5732793285126085e-06, "loss": 0.6625, "step": 10489 }, { "epoch": 0.56607846311586, "grad_norm": 0.971664785097364, "learning_rate": 4.572532801573008e-06, "loss": 0.382, "step": 10490 }, { "epoch": 0.5661324267443743, "grad_norm": 0.847045839342596, "learning_rate": 4.571786301289753e-06, "loss": 0.3419, "step": 10491 }, { "epoch": 0.5661863903728886, "grad_norm": 1.0025076018083392, "learning_rate": 4.571039827684297e-06, "loss": 0.6109, "step": 10492 }, { "epoch": 0.566240354001403, "grad_norm": 0.9719561822674162, "learning_rate": 4.570293380778095e-06, "loss": 0.4424, "step": 10493 }, { "epoch": 0.5662943176299174, "grad_norm": 0.8319105236420241, "learning_rate": 4.569546960592599e-06, "loss": 0.3909, "step": 10494 }, { "epoch": 0.5663482812584318, "grad_norm": 0.9925270799723463, "learning_rate": 4.568800567149265e-06, "loss": 0.4677, "step": 10495 }, { "epoch": 0.5664022448869462, "grad_norm": 0.9520855293244139, "learning_rate": 4.568054200469543e-06, "loss": 0.3786, "step": 10496 }, { "epoch": 0.5664562085154605, "grad_norm": 1.3646821439645345, "learning_rate": 4.567307860574887e-06, "loss": 0.6379, "step": 10497 }, { "epoch": 0.5665101721439749, "grad_norm": 1.161037060780476, "learning_rate": 4.566561547486743e-06, "loss": 0.527, "step": 10498 }, { "epoch": 0.5665641357724893, "grad_norm": 1.1139793078565807, "learning_rate": 4.565815261226564e-06, "loss": 0.6897, "step": 10499 }, { "epoch": 0.5666180994010037, "grad_norm": 0.9759634546330336, "learning_rate": 4.565069001815798e-06, "loss": 0.3925, "step": 10500 }, { "epoch": 0.5666180994010037, "eval_loss": 0.5438081622123718, "eval_runtime": 164.3918, "eval_samples_per_second": 20.92, "eval_steps_per_second": 0.876, "step": 10500 }, { "epoch": 0.5666720630295181, "grad_norm": 1.2132508406829081, "learning_rate": 4.564322769275893e-06, "loss": 0.6681, "step": 10501 }, { "epoch": 0.5667260266580325, "grad_norm": 0.8951656295534676, "learning_rate": 4.563576563628298e-06, "loss": 0.3908, "step": 10502 }, { "epoch": 0.5667799902865469, "grad_norm": 1.034408194809722, "learning_rate": 4.562830384894459e-06, "loss": 0.6627, "step": 10503 }, { "epoch": 0.5668339539150612, "grad_norm": 0.8452687506975001, "learning_rate": 4.562084233095821e-06, "loss": 0.3675, "step": 10504 }, { "epoch": 0.5668879175435756, "grad_norm": 1.211241568699674, "learning_rate": 4.561338108253831e-06, "loss": 0.6211, "step": 10505 }, { "epoch": 0.56694188117209, "grad_norm": 0.9143935184570449, "learning_rate": 4.560592010389931e-06, "loss": 0.4774, "step": 10506 }, { "epoch": 0.5669958448006044, "grad_norm": 1.2234019226153698, "learning_rate": 4.559845939525567e-06, "loss": 0.5969, "step": 10507 }, { "epoch": 0.5670498084291188, "grad_norm": 0.8991553793622392, "learning_rate": 4.559099895682179e-06, "loss": 0.421, "step": 10508 }, { "epoch": 0.5671037720576332, "grad_norm": 1.0017559533969251, "learning_rate": 4.558353878881211e-06, "loss": 0.4093, "step": 10509 }, { "epoch": 0.5671577356861476, "grad_norm": 1.1653015337490409, "learning_rate": 4.557607889144104e-06, "loss": 0.497, "step": 10510 }, { "epoch": 0.5672116993146619, "grad_norm": 1.255258636203648, "learning_rate": 4.556861926492302e-06, "loss": 0.5213, "step": 10511 }, { "epoch": 0.5672656629431763, "grad_norm": 1.1708837881373115, "learning_rate": 4.556115990947237e-06, "loss": 0.5367, "step": 10512 }, { "epoch": 0.5673196265716907, "grad_norm": 0.9824199753056545, "learning_rate": 4.555370082530354e-06, "loss": 0.4713, "step": 10513 }, { "epoch": 0.5673735902002051, "grad_norm": 1.0134020256880298, "learning_rate": 4.554624201263088e-06, "loss": 0.4971, "step": 10514 }, { "epoch": 0.5674275538287195, "grad_norm": 1.0959336425207347, "learning_rate": 4.553878347166878e-06, "loss": 0.4196, "step": 10515 }, { "epoch": 0.5674815174572339, "grad_norm": 1.0679688270717111, "learning_rate": 4.553132520263161e-06, "loss": 0.4829, "step": 10516 }, { "epoch": 0.5675354810857483, "grad_norm": 0.9045756208258133, "learning_rate": 4.552386720573372e-06, "loss": 0.4122, "step": 10517 }, { "epoch": 0.5675894447142625, "grad_norm": 1.0700682652821185, "learning_rate": 4.551640948118947e-06, "loss": 0.4049, "step": 10518 }, { "epoch": 0.5676434083427769, "grad_norm": 0.9938818995689936, "learning_rate": 4.550895202921319e-06, "loss": 0.4334, "step": 10519 }, { "epoch": 0.5676973719712913, "grad_norm": 1.1922130131925306, "learning_rate": 4.550149485001922e-06, "loss": 0.5675, "step": 10520 }, { "epoch": 0.5677513355998057, "grad_norm": 1.1517116584619278, "learning_rate": 4.5494037943821855e-06, "loss": 0.6336, "step": 10521 }, { "epoch": 0.5678052992283201, "grad_norm": 0.9102326012757607, "learning_rate": 4.548658131083546e-06, "loss": 0.3859, "step": 10522 }, { "epoch": 0.5678592628568345, "grad_norm": 0.9100686524729895, "learning_rate": 4.547912495127435e-06, "loss": 0.5856, "step": 10523 }, { "epoch": 0.5679132264853489, "grad_norm": 1.1612250822560735, "learning_rate": 4.547166886535279e-06, "loss": 0.4224, "step": 10524 }, { "epoch": 0.5679671901138632, "grad_norm": 1.124810719943975, "learning_rate": 4.5464213053285115e-06, "loss": 0.622, "step": 10525 }, { "epoch": 0.5680211537423776, "grad_norm": 0.8666698667896425, "learning_rate": 4.545675751528556e-06, "loss": 0.3742, "step": 10526 }, { "epoch": 0.568075117370892, "grad_norm": 1.2144187730549996, "learning_rate": 4.544930225156847e-06, "loss": 0.6003, "step": 10527 }, { "epoch": 0.5681290809994064, "grad_norm": 1.091008511310262, "learning_rate": 4.544184726234807e-06, "loss": 0.5005, "step": 10528 }, { "epoch": 0.5681830446279208, "grad_norm": 0.9474822575642348, "learning_rate": 4.543439254783864e-06, "loss": 0.3841, "step": 10529 }, { "epoch": 0.5682370082564352, "grad_norm": 0.9525027683818699, "learning_rate": 4.542693810825443e-06, "loss": 0.4796, "step": 10530 }, { "epoch": 0.5682909718849496, "grad_norm": 1.1096716407691811, "learning_rate": 4.541948394380968e-06, "loss": 0.5117, "step": 10531 }, { "epoch": 0.5683449355134639, "grad_norm": 1.1559454816488712, "learning_rate": 4.541203005471867e-06, "loss": 0.5466, "step": 10532 }, { "epoch": 0.5683988991419783, "grad_norm": 1.0178992963985396, "learning_rate": 4.540457644119558e-06, "loss": 0.5088, "step": 10533 }, { "epoch": 0.5684528627704927, "grad_norm": 1.037623615809952, "learning_rate": 4.5397123103454665e-06, "loss": 0.5513, "step": 10534 }, { "epoch": 0.5685068263990071, "grad_norm": 1.0430502586912973, "learning_rate": 4.538967004171013e-06, "loss": 0.5399, "step": 10535 }, { "epoch": 0.5685607900275215, "grad_norm": 0.9913932180355542, "learning_rate": 4.53822172561762e-06, "loss": 0.4715, "step": 10536 }, { "epoch": 0.5686147536560359, "grad_norm": 1.0173395766093782, "learning_rate": 4.537476474706706e-06, "loss": 0.5516, "step": 10537 }, { "epoch": 0.5686687172845503, "grad_norm": 1.0182770347367338, "learning_rate": 4.536731251459689e-06, "loss": 0.4234, "step": 10538 }, { "epoch": 0.5687226809130645, "grad_norm": 0.7806052958255845, "learning_rate": 4.535986055897993e-06, "loss": 0.3633, "step": 10539 }, { "epoch": 0.568776644541579, "grad_norm": 0.9409031177040648, "learning_rate": 4.535240888043029e-06, "loss": 0.4104, "step": 10540 }, { "epoch": 0.5688306081700933, "grad_norm": 1.0698967036525588, "learning_rate": 4.534495747916218e-06, "loss": 0.5668, "step": 10541 }, { "epoch": 0.5688845717986077, "grad_norm": 1.0822913122984983, "learning_rate": 4.533750635538974e-06, "loss": 0.6055, "step": 10542 }, { "epoch": 0.5689385354271221, "grad_norm": 1.134195390052243, "learning_rate": 4.533005550932712e-06, "loss": 0.6183, "step": 10543 }, { "epoch": 0.5689924990556365, "grad_norm": 1.1415572245749117, "learning_rate": 4.532260494118849e-06, "loss": 0.5873, "step": 10544 }, { "epoch": 0.5690464626841509, "grad_norm": 0.9114946320112447, "learning_rate": 4.531515465118797e-06, "loss": 0.3716, "step": 10545 }, { "epoch": 0.5691004263126652, "grad_norm": 0.8388432682843823, "learning_rate": 4.530770463953969e-06, "loss": 0.4114, "step": 10546 }, { "epoch": 0.5691543899411796, "grad_norm": 1.0848657138134543, "learning_rate": 4.530025490645778e-06, "loss": 0.529, "step": 10547 }, { "epoch": 0.569208353569694, "grad_norm": 0.9650432036842284, "learning_rate": 4.529280545215633e-06, "loss": 0.4884, "step": 10548 }, { "epoch": 0.5692623171982084, "grad_norm": 0.9424371966893221, "learning_rate": 4.528535627684946e-06, "loss": 0.4177, "step": 10549 }, { "epoch": 0.5693162808267228, "grad_norm": 1.1418985813333165, "learning_rate": 4.527790738075128e-06, "loss": 0.5627, "step": 10550 }, { "epoch": 0.5693702444552372, "grad_norm": 0.7918763054499546, "learning_rate": 4.5270458764075855e-06, "loss": 0.2944, "step": 10551 }, { "epoch": 0.5694242080837516, "grad_norm": 1.175055885712422, "learning_rate": 4.526301042703727e-06, "loss": 0.6097, "step": 10552 }, { "epoch": 0.5694781717122659, "grad_norm": 0.9414820088729619, "learning_rate": 4.5255562369849625e-06, "loss": 0.3798, "step": 10553 }, { "epoch": 0.5695321353407803, "grad_norm": 0.8678124951116019, "learning_rate": 4.524811459272694e-06, "loss": 0.382, "step": 10554 }, { "epoch": 0.5695860989692947, "grad_norm": 1.0811301802284357, "learning_rate": 4.524066709588332e-06, "loss": 0.5468, "step": 10555 }, { "epoch": 0.5696400625978091, "grad_norm": 1.0969387736888854, "learning_rate": 4.523321987953276e-06, "loss": 0.589, "step": 10556 }, { "epoch": 0.5696940262263235, "grad_norm": 1.459383894529047, "learning_rate": 4.522577294388934e-06, "loss": 0.6391, "step": 10557 }, { "epoch": 0.5697479898548379, "grad_norm": 1.078736179186585, "learning_rate": 4.5218326289167106e-06, "loss": 0.4589, "step": 10558 }, { "epoch": 0.5698019534833523, "grad_norm": 0.8551121739217271, "learning_rate": 4.521087991558003e-06, "loss": 0.4184, "step": 10559 }, { "epoch": 0.5698559171118666, "grad_norm": 0.9925483654509739, "learning_rate": 4.520343382334217e-06, "loss": 0.4486, "step": 10560 }, { "epoch": 0.569909880740381, "grad_norm": 1.0781736582206884, "learning_rate": 4.519598801266753e-06, "loss": 0.359, "step": 10561 }, { "epoch": 0.5699638443688954, "grad_norm": 0.9239417380792921, "learning_rate": 4.518854248377007e-06, "loss": 0.4309, "step": 10562 }, { "epoch": 0.5700178079974098, "grad_norm": 0.9330596678201261, "learning_rate": 4.518109723686383e-06, "loss": 0.5132, "step": 10563 }, { "epoch": 0.5700717716259242, "grad_norm": 0.9500103334051261, "learning_rate": 4.5173652272162775e-06, "loss": 0.4419, "step": 10564 }, { "epoch": 0.5701257352544385, "grad_norm": 1.0728585293580428, "learning_rate": 4.516620758988087e-06, "loss": 0.5091, "step": 10565 }, { "epoch": 0.5701796988829528, "grad_norm": 0.984952903966271, "learning_rate": 4.515876319023213e-06, "loss": 0.4436, "step": 10566 }, { "epoch": 0.5702336625114672, "grad_norm": 1.3209893927019474, "learning_rate": 4.515131907343046e-06, "loss": 0.7137, "step": 10567 }, { "epoch": 0.5702876261399816, "grad_norm": 0.8544703789693543, "learning_rate": 4.514387523968983e-06, "loss": 0.3787, "step": 10568 }, { "epoch": 0.570341589768496, "grad_norm": 1.2009008387930713, "learning_rate": 4.513643168922418e-06, "loss": 0.459, "step": 10569 }, { "epoch": 0.5703955533970104, "grad_norm": 1.0320861994311454, "learning_rate": 4.512898842224745e-06, "loss": 0.4445, "step": 10570 }, { "epoch": 0.5704495170255248, "grad_norm": 0.9264477902612831, "learning_rate": 4.512154543897357e-06, "loss": 0.3697, "step": 10571 }, { "epoch": 0.5705034806540392, "grad_norm": 0.9267906066561483, "learning_rate": 4.511410273961645e-06, "loss": 0.5279, "step": 10572 }, { "epoch": 0.5705574442825535, "grad_norm": 1.117232184546726, "learning_rate": 4.510666032439003e-06, "loss": 0.4677, "step": 10573 }, { "epoch": 0.5706114079110679, "grad_norm": 1.3201633979718521, "learning_rate": 4.509921819350817e-06, "loss": 0.6103, "step": 10574 }, { "epoch": 0.5706653715395823, "grad_norm": 0.7394646500033647, "learning_rate": 4.509177634718478e-06, "loss": 0.291, "step": 10575 }, { "epoch": 0.5707193351680967, "grad_norm": 0.8164238954965022, "learning_rate": 4.508433478563375e-06, "loss": 0.337, "step": 10576 }, { "epoch": 0.5707732987966111, "grad_norm": 0.9377936993896832, "learning_rate": 4.507689350906895e-06, "loss": 0.4177, "step": 10577 }, { "epoch": 0.5708272624251255, "grad_norm": 1.0285659302703003, "learning_rate": 4.506945251770427e-06, "loss": 0.4399, "step": 10578 }, { "epoch": 0.5708812260536399, "grad_norm": 1.2379819057240777, "learning_rate": 4.506201181175356e-06, "loss": 0.5213, "step": 10579 }, { "epoch": 0.5709351896821542, "grad_norm": 0.8786634095111288, "learning_rate": 4.505457139143068e-06, "loss": 0.3471, "step": 10580 }, { "epoch": 0.5709891533106686, "grad_norm": 1.0687462454976182, "learning_rate": 4.504713125694945e-06, "loss": 0.5849, "step": 10581 }, { "epoch": 0.571043116939183, "grad_norm": 0.9770542060711697, "learning_rate": 4.5039691408523725e-06, "loss": 0.4603, "step": 10582 }, { "epoch": 0.5710970805676974, "grad_norm": 0.8196514535962424, "learning_rate": 4.5032251846367335e-06, "loss": 0.3196, "step": 10583 }, { "epoch": 0.5711510441962118, "grad_norm": 1.0666495916125962, "learning_rate": 4.50248125706941e-06, "loss": 0.5725, "step": 10584 }, { "epoch": 0.5712050078247262, "grad_norm": 0.8880110331413505, "learning_rate": 4.501737358171784e-06, "loss": 0.4414, "step": 10585 }, { "epoch": 0.5712589714532406, "grad_norm": 1.0806083702655236, "learning_rate": 4.5009934879652325e-06, "loss": 0.5349, "step": 10586 }, { "epoch": 0.5713129350817548, "grad_norm": 0.976760398180387, "learning_rate": 4.500249646471142e-06, "loss": 0.522, "step": 10587 }, { "epoch": 0.5713668987102692, "grad_norm": 1.1099936156874142, "learning_rate": 4.499505833710884e-06, "loss": 0.4811, "step": 10588 }, { "epoch": 0.5714208623387836, "grad_norm": 1.1365399227678186, "learning_rate": 4.498762049705839e-06, "loss": 0.4581, "step": 10589 }, { "epoch": 0.571474825967298, "grad_norm": 1.0310848613716224, "learning_rate": 4.498018294477384e-06, "loss": 0.5865, "step": 10590 }, { "epoch": 0.5715287895958124, "grad_norm": 1.023656860971758, "learning_rate": 4.497274568046896e-06, "loss": 0.4429, "step": 10591 }, { "epoch": 0.5715827532243268, "grad_norm": 1.1138134135035493, "learning_rate": 4.4965308704357515e-06, "loss": 0.5776, "step": 10592 }, { "epoch": 0.5716367168528412, "grad_norm": 0.8935108769966343, "learning_rate": 4.495787201665321e-06, "loss": 0.4136, "step": 10593 }, { "epoch": 0.5716906804813555, "grad_norm": 1.2970485389581448, "learning_rate": 4.495043561756986e-06, "loss": 0.6513, "step": 10594 }, { "epoch": 0.5717446441098699, "grad_norm": 1.2812015334423443, "learning_rate": 4.49429995073211e-06, "loss": 0.5031, "step": 10595 }, { "epoch": 0.5717986077383843, "grad_norm": 1.1613189386346907, "learning_rate": 4.493556368612071e-06, "loss": 0.404, "step": 10596 }, { "epoch": 0.5718525713668987, "grad_norm": 1.0982884112344797, "learning_rate": 4.492812815418239e-06, "loss": 0.468, "step": 10597 }, { "epoch": 0.5719065349954131, "grad_norm": 1.026060798855499, "learning_rate": 4.492069291171984e-06, "loss": 0.4663, "step": 10598 }, { "epoch": 0.5719604986239275, "grad_norm": 1.0152422338255567, "learning_rate": 4.491325795894675e-06, "loss": 0.4974, "step": 10599 }, { "epoch": 0.5720144622524419, "grad_norm": 1.0062215408303345, "learning_rate": 4.490582329607683e-06, "loss": 0.4519, "step": 10600 }, { "epoch": 0.5720684258809562, "grad_norm": 1.0536011911807657, "learning_rate": 4.4898388923323755e-06, "loss": 0.4505, "step": 10601 }, { "epoch": 0.5721223895094706, "grad_norm": 0.9955333466003488, "learning_rate": 4.489095484090119e-06, "loss": 0.3786, "step": 10602 }, { "epoch": 0.572176353137985, "grad_norm": 0.8181339198428329, "learning_rate": 4.488352104902278e-06, "loss": 0.3635, "step": 10603 }, { "epoch": 0.5722303167664994, "grad_norm": 1.0718829164782495, "learning_rate": 4.487608754790221e-06, "loss": 0.5469, "step": 10604 }, { "epoch": 0.5722842803950138, "grad_norm": 1.020375693885567, "learning_rate": 4.48686543377531e-06, "loss": 0.3884, "step": 10605 }, { "epoch": 0.5723382440235282, "grad_norm": 1.158302511788771, "learning_rate": 4.486122141878912e-06, "loss": 0.6245, "step": 10606 }, { "epoch": 0.5723922076520426, "grad_norm": 0.9122582134344321, "learning_rate": 4.4853788791223865e-06, "loss": 0.4675, "step": 10607 }, { "epoch": 0.5724461712805569, "grad_norm": 1.0154385974893372, "learning_rate": 4.484635645527099e-06, "loss": 0.4066, "step": 10608 }, { "epoch": 0.5725001349090713, "grad_norm": 1.010129177438139, "learning_rate": 4.483892441114408e-06, "loss": 0.4488, "step": 10609 }, { "epoch": 0.5725540985375857, "grad_norm": 1.0495565327298668, "learning_rate": 4.4831492659056745e-06, "loss": 0.5249, "step": 10610 }, { "epoch": 0.5726080621661, "grad_norm": 0.8551594604325177, "learning_rate": 4.482406119922258e-06, "loss": 0.4357, "step": 10611 }, { "epoch": 0.5726620257946144, "grad_norm": 1.0453406455494891, "learning_rate": 4.481663003185519e-06, "loss": 0.4747, "step": 10612 }, { "epoch": 0.5727159894231288, "grad_norm": 1.0537798755337189, "learning_rate": 4.480919915716814e-06, "loss": 0.4889, "step": 10613 }, { "epoch": 0.5727699530516432, "grad_norm": 0.9098585356323315, "learning_rate": 4.480176857537499e-06, "loss": 0.3944, "step": 10614 }, { "epoch": 0.5728239166801575, "grad_norm": 0.9713307100252243, "learning_rate": 4.479433828668935e-06, "loss": 0.4, "step": 10615 }, { "epoch": 0.5728778803086719, "grad_norm": 0.7627267409792713, "learning_rate": 4.478690829132471e-06, "loss": 0.3067, "step": 10616 }, { "epoch": 0.5729318439371863, "grad_norm": 1.1906731762058105, "learning_rate": 4.477947858949465e-06, "loss": 0.5543, "step": 10617 }, { "epoch": 0.5729858075657007, "grad_norm": 0.8867902300040209, "learning_rate": 4.477204918141271e-06, "loss": 0.3774, "step": 10618 }, { "epoch": 0.5730397711942151, "grad_norm": 1.1020224007125945, "learning_rate": 4.476462006729241e-06, "loss": 0.4897, "step": 10619 }, { "epoch": 0.5730937348227295, "grad_norm": 1.1515094448383796, "learning_rate": 4.475719124734725e-06, "loss": 0.5414, "step": 10620 }, { "epoch": 0.5731476984512439, "grad_norm": 1.208875848794995, "learning_rate": 4.474976272179079e-06, "loss": 0.5603, "step": 10621 }, { "epoch": 0.5732016620797582, "grad_norm": 1.3763739111325086, "learning_rate": 4.474233449083651e-06, "loss": 0.6205, "step": 10622 }, { "epoch": 0.5732556257082726, "grad_norm": 1.2380235108393458, "learning_rate": 4.473490655469789e-06, "loss": 0.4517, "step": 10623 }, { "epoch": 0.573309589336787, "grad_norm": 1.0572127500698172, "learning_rate": 4.472747891358843e-06, "loss": 0.5109, "step": 10624 }, { "epoch": 0.5733635529653014, "grad_norm": 1.0261652073233762, "learning_rate": 4.47200515677216e-06, "loss": 0.4524, "step": 10625 }, { "epoch": 0.5734175165938158, "grad_norm": 1.1489572176933056, "learning_rate": 4.471262451731086e-06, "loss": 0.4725, "step": 10626 }, { "epoch": 0.5734714802223302, "grad_norm": 1.5247047165162657, "learning_rate": 4.470519776256971e-06, "loss": 0.6385, "step": 10627 }, { "epoch": 0.5735254438508446, "grad_norm": 0.910473574632374, "learning_rate": 4.469777130371158e-06, "loss": 0.3324, "step": 10628 }, { "epoch": 0.5735794074793589, "grad_norm": 1.02306860790074, "learning_rate": 4.46903451409499e-06, "loss": 0.5237, "step": 10629 }, { "epoch": 0.5736333711078733, "grad_norm": 1.0074265986182689, "learning_rate": 4.468291927449813e-06, "loss": 0.4344, "step": 10630 }, { "epoch": 0.5736873347363877, "grad_norm": 0.9534049489737269, "learning_rate": 4.467549370456966e-06, "loss": 0.5183, "step": 10631 }, { "epoch": 0.5737412983649021, "grad_norm": 0.9788153041361316, "learning_rate": 4.466806843137795e-06, "loss": 0.484, "step": 10632 }, { "epoch": 0.5737952619934165, "grad_norm": 1.1465662917526995, "learning_rate": 4.466064345513641e-06, "loss": 0.6373, "step": 10633 }, { "epoch": 0.5738492256219309, "grad_norm": 1.0909256431101533, "learning_rate": 4.46532187760584e-06, "loss": 0.5888, "step": 10634 }, { "epoch": 0.5739031892504451, "grad_norm": 1.0076671982183465, "learning_rate": 4.464579439435736e-06, "loss": 0.4381, "step": 10635 }, { "epoch": 0.5739571528789595, "grad_norm": 1.1519822433028821, "learning_rate": 4.463837031024663e-06, "loss": 0.6434, "step": 10636 }, { "epoch": 0.5740111165074739, "grad_norm": 0.941164973166366, "learning_rate": 4.463094652393961e-06, "loss": 0.5318, "step": 10637 }, { "epoch": 0.5740650801359883, "grad_norm": 0.7363305232511407, "learning_rate": 4.462352303564966e-06, "loss": 0.2642, "step": 10638 }, { "epoch": 0.5741190437645027, "grad_norm": 1.0489660972652277, "learning_rate": 4.461609984559014e-06, "loss": 0.4037, "step": 10639 }, { "epoch": 0.5741730073930171, "grad_norm": 0.9438426645962671, "learning_rate": 4.4608676953974416e-06, "loss": 0.3441, "step": 10640 }, { "epoch": 0.5742269710215315, "grad_norm": 0.925922080447645, "learning_rate": 4.4601254361015815e-06, "loss": 0.3727, "step": 10641 }, { "epoch": 0.5742809346500458, "grad_norm": 0.9954552695095218, "learning_rate": 4.459383206692768e-06, "loss": 0.42, "step": 10642 }, { "epoch": 0.5743348982785602, "grad_norm": 1.1461463009291533, "learning_rate": 4.458641007192332e-06, "loss": 0.5276, "step": 10643 }, { "epoch": 0.5743888619070746, "grad_norm": 0.848896681246346, "learning_rate": 4.457898837621604e-06, "loss": 0.2856, "step": 10644 }, { "epoch": 0.574442825535589, "grad_norm": 0.9696876079010146, "learning_rate": 4.457156698001917e-06, "loss": 0.5137, "step": 10645 }, { "epoch": 0.5744967891641034, "grad_norm": 0.8506717179810759, "learning_rate": 4.4564145883546e-06, "loss": 0.4366, "step": 10646 }, { "epoch": 0.5745507527926178, "grad_norm": 1.1390513692599813, "learning_rate": 4.4556725087009835e-06, "loss": 0.5321, "step": 10647 }, { "epoch": 0.5746047164211322, "grad_norm": 0.9331765094324005, "learning_rate": 4.454930459062393e-06, "loss": 0.4524, "step": 10648 }, { "epoch": 0.5746586800496465, "grad_norm": 1.0193378712012588, "learning_rate": 4.45418843946016e-06, "loss": 0.5267, "step": 10649 }, { "epoch": 0.5747126436781609, "grad_norm": 1.1351950304079719, "learning_rate": 4.453446449915605e-06, "loss": 0.5341, "step": 10650 }, { "epoch": 0.5747666073066753, "grad_norm": 0.9623760350941655, "learning_rate": 4.452704490450057e-06, "loss": 0.401, "step": 10651 }, { "epoch": 0.5748205709351897, "grad_norm": 1.1465314665200284, "learning_rate": 4.45196256108484e-06, "loss": 0.6252, "step": 10652 }, { "epoch": 0.5748745345637041, "grad_norm": 1.0205692254614633, "learning_rate": 4.4512206618412775e-06, "loss": 0.4225, "step": 10653 }, { "epoch": 0.5749284981922185, "grad_norm": 0.8575756349485733, "learning_rate": 4.450478792740693e-06, "loss": 0.3653, "step": 10654 }, { "epoch": 0.5749824618207329, "grad_norm": 0.968824270851426, "learning_rate": 4.44973695380441e-06, "loss": 0.3721, "step": 10655 }, { "epoch": 0.5750364254492472, "grad_norm": 0.8818761568495516, "learning_rate": 4.448995145053747e-06, "loss": 0.3959, "step": 10656 }, { "epoch": 0.5750903890777616, "grad_norm": 1.0353503820231238, "learning_rate": 4.448253366510027e-06, "loss": 0.5265, "step": 10657 }, { "epoch": 0.575144352706276, "grad_norm": 1.1088991750348278, "learning_rate": 4.447511618194566e-06, "loss": 0.5284, "step": 10658 }, { "epoch": 0.5751983163347903, "grad_norm": 0.9110503012042285, "learning_rate": 4.446769900128685e-06, "loss": 0.3901, "step": 10659 }, { "epoch": 0.5752522799633047, "grad_norm": 1.2965241922704283, "learning_rate": 4.446028212333702e-06, "loss": 0.5571, "step": 10660 }, { "epoch": 0.5753062435918191, "grad_norm": 0.7805924178267505, "learning_rate": 4.445286554830932e-06, "loss": 0.3155, "step": 10661 }, { "epoch": 0.5753602072203335, "grad_norm": 0.8118498411789405, "learning_rate": 4.444544927641692e-06, "loss": 0.3785, "step": 10662 }, { "epoch": 0.5754141708488478, "grad_norm": 1.0804680718586033, "learning_rate": 4.4438033307873e-06, "loss": 0.5537, "step": 10663 }, { "epoch": 0.5754681344773622, "grad_norm": 0.89237425371023, "learning_rate": 4.443061764289066e-06, "loss": 0.4197, "step": 10664 }, { "epoch": 0.5755220981058766, "grad_norm": 0.7820333766537252, "learning_rate": 4.442320228168303e-06, "loss": 0.2762, "step": 10665 }, { "epoch": 0.575576061734391, "grad_norm": 1.1846033614085174, "learning_rate": 4.441578722446327e-06, "loss": 0.5004, "step": 10666 }, { "epoch": 0.5756300253629054, "grad_norm": 1.1890490110874712, "learning_rate": 4.440837247144447e-06, "loss": 0.5309, "step": 10667 }, { "epoch": 0.5756839889914198, "grad_norm": 1.1940633524984023, "learning_rate": 4.440095802283976e-06, "loss": 0.5053, "step": 10668 }, { "epoch": 0.5757379526199342, "grad_norm": 1.0428366995388378, "learning_rate": 4.439354387886222e-06, "loss": 0.4148, "step": 10669 }, { "epoch": 0.5757919162484485, "grad_norm": 1.0682556446517424, "learning_rate": 4.438613003972496e-06, "loss": 0.5071, "step": 10670 }, { "epoch": 0.5758458798769629, "grad_norm": 0.8569192878246306, "learning_rate": 4.4378716505641025e-06, "loss": 0.3738, "step": 10671 }, { "epoch": 0.5758998435054773, "grad_norm": 1.050186583096519, "learning_rate": 4.437130327682353e-06, "loss": 0.3937, "step": 10672 }, { "epoch": 0.5759538071339917, "grad_norm": 0.9761970650772003, "learning_rate": 4.4363890353485495e-06, "loss": 0.3677, "step": 10673 }, { "epoch": 0.5760077707625061, "grad_norm": 1.0570058227268315, "learning_rate": 4.435647773584001e-06, "loss": 0.4751, "step": 10674 }, { "epoch": 0.5760617343910205, "grad_norm": 1.0875588764862072, "learning_rate": 4.434906542410011e-06, "loss": 0.5224, "step": 10675 }, { "epoch": 0.5761156980195349, "grad_norm": 1.0527043768908537, "learning_rate": 4.434165341847883e-06, "loss": 0.475, "step": 10676 }, { "epoch": 0.5761696616480492, "grad_norm": 1.2398426839470986, "learning_rate": 4.433424171918922e-06, "loss": 0.5993, "step": 10677 }, { "epoch": 0.5762236252765636, "grad_norm": 1.2808480113548504, "learning_rate": 4.4326830326444246e-06, "loss": 0.7322, "step": 10678 }, { "epoch": 0.576277588905078, "grad_norm": 0.9556415749767011, "learning_rate": 4.431941924045697e-06, "loss": 0.4069, "step": 10679 }, { "epoch": 0.5763315525335924, "grad_norm": 1.013089484999727, "learning_rate": 4.431200846144038e-06, "loss": 0.5584, "step": 10680 }, { "epoch": 0.5763855161621068, "grad_norm": 1.086361037713495, "learning_rate": 4.430459798960746e-06, "loss": 0.4947, "step": 10681 }, { "epoch": 0.5764394797906212, "grad_norm": 1.077873560168301, "learning_rate": 4.429718782517119e-06, "loss": 0.4885, "step": 10682 }, { "epoch": 0.5764934434191356, "grad_norm": 0.9366757726525804, "learning_rate": 4.428977796834457e-06, "loss": 0.4907, "step": 10683 }, { "epoch": 0.5765474070476498, "grad_norm": 1.0187354445144805, "learning_rate": 4.428236841934056e-06, "loss": 0.425, "step": 10684 }, { "epoch": 0.5766013706761642, "grad_norm": 1.089358238605998, "learning_rate": 4.42749591783721e-06, "loss": 0.4437, "step": 10685 }, { "epoch": 0.5766553343046786, "grad_norm": 0.9985471785190964, "learning_rate": 4.426755024565215e-06, "loss": 0.47, "step": 10686 }, { "epoch": 0.576709297933193, "grad_norm": 0.9170018732528163, "learning_rate": 4.426014162139365e-06, "loss": 0.4732, "step": 10687 }, { "epoch": 0.5767632615617074, "grad_norm": 1.2895337996614005, "learning_rate": 4.425273330580954e-06, "loss": 0.5087, "step": 10688 }, { "epoch": 0.5768172251902218, "grad_norm": 0.8628008861982333, "learning_rate": 4.424532529911271e-06, "loss": 0.4137, "step": 10689 }, { "epoch": 0.5768711888187362, "grad_norm": 1.0196109767323036, "learning_rate": 4.4237917601516135e-06, "loss": 0.5107, "step": 10690 }, { "epoch": 0.5769251524472505, "grad_norm": 0.9033378578818797, "learning_rate": 4.423051021323267e-06, "loss": 0.3724, "step": 10691 }, { "epoch": 0.5769791160757649, "grad_norm": 0.9289069913292367, "learning_rate": 4.422310313447522e-06, "loss": 0.3745, "step": 10692 }, { "epoch": 0.5770330797042793, "grad_norm": 0.9805429895073225, "learning_rate": 4.421569636545666e-06, "loss": 0.5291, "step": 10693 }, { "epoch": 0.5770870433327937, "grad_norm": 0.959044927461277, "learning_rate": 4.42082899063899e-06, "loss": 0.4176, "step": 10694 }, { "epoch": 0.5771410069613081, "grad_norm": 0.9193011070176517, "learning_rate": 4.420088375748779e-06, "loss": 0.5035, "step": 10695 }, { "epoch": 0.5771949705898225, "grad_norm": 0.9038313444328734, "learning_rate": 4.419347791896318e-06, "loss": 0.4653, "step": 10696 }, { "epoch": 0.5772489342183369, "grad_norm": 1.1645046616095565, "learning_rate": 4.418607239102894e-06, "loss": 0.6183, "step": 10697 }, { "epoch": 0.5773028978468512, "grad_norm": 1.068819902974211, "learning_rate": 4.417866717389793e-06, "loss": 0.5308, "step": 10698 }, { "epoch": 0.5773568614753656, "grad_norm": 0.922305986933035, "learning_rate": 4.417126226778293e-06, "loss": 0.4655, "step": 10699 }, { "epoch": 0.57741082510388, "grad_norm": 1.0278258551557862, "learning_rate": 4.416385767289681e-06, "loss": 0.5378, "step": 10700 }, { "epoch": 0.5774647887323944, "grad_norm": 0.9128904159691642, "learning_rate": 4.415645338945236e-06, "loss": 0.4132, "step": 10701 }, { "epoch": 0.5775187523609088, "grad_norm": 1.010410235458784, "learning_rate": 4.4149049417662394e-06, "loss": 0.5185, "step": 10702 }, { "epoch": 0.5775727159894232, "grad_norm": 0.925047176939598, "learning_rate": 4.414164575773971e-06, "loss": 0.4077, "step": 10703 }, { "epoch": 0.5776266796179375, "grad_norm": 1.1104317523572533, "learning_rate": 4.413424240989712e-06, "loss": 0.5038, "step": 10704 }, { "epoch": 0.5776806432464519, "grad_norm": 1.151220457906447, "learning_rate": 4.412683937434736e-06, "loss": 0.5185, "step": 10705 }, { "epoch": 0.5777346068749662, "grad_norm": 0.972394500919524, "learning_rate": 4.411943665130324e-06, "loss": 0.4348, "step": 10706 }, { "epoch": 0.5777885705034806, "grad_norm": 1.0822357706189414, "learning_rate": 4.41120342409775e-06, "loss": 0.4733, "step": 10707 }, { "epoch": 0.577842534131995, "grad_norm": 0.9794546826784469, "learning_rate": 4.410463214358289e-06, "loss": 0.4705, "step": 10708 }, { "epoch": 0.5778964977605094, "grad_norm": 0.9326873245421238, "learning_rate": 4.409723035933217e-06, "loss": 0.3413, "step": 10709 }, { "epoch": 0.5779504613890238, "grad_norm": 1.1384562096545403, "learning_rate": 4.408982888843804e-06, "loss": 0.5466, "step": 10710 }, { "epoch": 0.5780044250175381, "grad_norm": 1.0499275563929176, "learning_rate": 4.4082427731113285e-06, "loss": 0.4472, "step": 10711 }, { "epoch": 0.5780583886460525, "grad_norm": 0.9693497199020219, "learning_rate": 4.407502688757057e-06, "loss": 0.4061, "step": 10712 }, { "epoch": 0.5781123522745669, "grad_norm": 0.9586103448723158, "learning_rate": 4.406762635802263e-06, "loss": 0.4148, "step": 10713 }, { "epoch": 0.5781663159030813, "grad_norm": 1.1252187925730974, "learning_rate": 4.406022614268215e-06, "loss": 0.5635, "step": 10714 }, { "epoch": 0.5782202795315957, "grad_norm": 1.2956607810945564, "learning_rate": 4.4052826241761825e-06, "loss": 0.4976, "step": 10715 }, { "epoch": 0.5782742431601101, "grad_norm": 1.0677613788344091, "learning_rate": 4.404542665547433e-06, "loss": 0.4254, "step": 10716 }, { "epoch": 0.5783282067886245, "grad_norm": 0.8851473988704571, "learning_rate": 4.403802738403235e-06, "loss": 0.4147, "step": 10717 }, { "epoch": 0.5783821704171388, "grad_norm": 0.9811366972264037, "learning_rate": 4.403062842764855e-06, "loss": 0.5393, "step": 10718 }, { "epoch": 0.5784361340456532, "grad_norm": 1.173380936114301, "learning_rate": 4.402322978653556e-06, "loss": 0.6141, "step": 10719 }, { "epoch": 0.5784900976741676, "grad_norm": 0.9562298706410602, "learning_rate": 4.401583146090604e-06, "loss": 0.4646, "step": 10720 }, { "epoch": 0.578544061302682, "grad_norm": 1.465345874806283, "learning_rate": 4.400843345097261e-06, "loss": 0.5805, "step": 10721 }, { "epoch": 0.5785980249311964, "grad_norm": 0.8865917511141234, "learning_rate": 4.400103575694792e-06, "loss": 0.3894, "step": 10722 }, { "epoch": 0.5786519885597108, "grad_norm": 1.0434905189899335, "learning_rate": 4.3993638379044565e-06, "loss": 0.3687, "step": 10723 }, { "epoch": 0.5787059521882252, "grad_norm": 0.9350023757670994, "learning_rate": 4.398624131747518e-06, "loss": 0.5278, "step": 10724 }, { "epoch": 0.5787599158167395, "grad_norm": 0.889622592589819, "learning_rate": 4.397884457245235e-06, "loss": 0.3485, "step": 10725 }, { "epoch": 0.5788138794452539, "grad_norm": 0.9378720637746693, "learning_rate": 4.397144814418865e-06, "loss": 0.404, "step": 10726 }, { "epoch": 0.5788678430737683, "grad_norm": 1.1750232080292062, "learning_rate": 4.396405203289668e-06, "loss": 0.5155, "step": 10727 }, { "epoch": 0.5789218067022827, "grad_norm": 0.9745553733650358, "learning_rate": 4.3956656238789e-06, "loss": 0.4584, "step": 10728 }, { "epoch": 0.5789757703307971, "grad_norm": 1.2098082755827806, "learning_rate": 4.394926076207818e-06, "loss": 0.5191, "step": 10729 }, { "epoch": 0.5790297339593115, "grad_norm": 1.0230243290545324, "learning_rate": 4.394186560297678e-06, "loss": 0.4768, "step": 10730 }, { "epoch": 0.5790836975878259, "grad_norm": 1.093045095738849, "learning_rate": 4.393447076169731e-06, "loss": 0.5434, "step": 10731 }, { "epoch": 0.5791376612163401, "grad_norm": 1.0880120882430226, "learning_rate": 4.392707623845237e-06, "loss": 0.4579, "step": 10732 }, { "epoch": 0.5791916248448545, "grad_norm": 0.8478929725453419, "learning_rate": 4.3919682033454425e-06, "loss": 0.4097, "step": 10733 }, { "epoch": 0.5792455884733689, "grad_norm": 1.0737463149842836, "learning_rate": 4.391228814691601e-06, "loss": 0.5087, "step": 10734 }, { "epoch": 0.5792995521018833, "grad_norm": 1.0005002300606136, "learning_rate": 4.390489457904964e-06, "loss": 0.4141, "step": 10735 }, { "epoch": 0.5793535157303977, "grad_norm": 1.084228463245065, "learning_rate": 4.3897501330067815e-06, "loss": 0.4655, "step": 10736 }, { "epoch": 0.5794074793589121, "grad_norm": 1.0542054481846073, "learning_rate": 4.389010840018302e-06, "loss": 0.517, "step": 10737 }, { "epoch": 0.5794614429874265, "grad_norm": 0.9412062021707129, "learning_rate": 4.388271578960773e-06, "loss": 0.3553, "step": 10738 }, { "epoch": 0.5795154066159408, "grad_norm": 1.0003873180596075, "learning_rate": 4.387532349855444e-06, "loss": 0.3571, "step": 10739 }, { "epoch": 0.5795693702444552, "grad_norm": 1.0103696822250403, "learning_rate": 4.386793152723558e-06, "loss": 0.3991, "step": 10740 }, { "epoch": 0.5796233338729696, "grad_norm": 1.1160594619609254, "learning_rate": 4.386053987586361e-06, "loss": 0.528, "step": 10741 }, { "epoch": 0.579677297501484, "grad_norm": 1.0247355203711692, "learning_rate": 4.385314854465099e-06, "loss": 0.4178, "step": 10742 }, { "epoch": 0.5797312611299984, "grad_norm": 1.1503297863100375, "learning_rate": 4.384575753381014e-06, "loss": 0.5421, "step": 10743 }, { "epoch": 0.5797852247585128, "grad_norm": 1.032341492681949, "learning_rate": 4.383836684355348e-06, "loss": 0.5091, "step": 10744 }, { "epoch": 0.5798391883870272, "grad_norm": 0.9398492212170682, "learning_rate": 4.383097647409345e-06, "loss": 0.3908, "step": 10745 }, { "epoch": 0.5798931520155415, "grad_norm": 0.9183518931796758, "learning_rate": 4.382358642564244e-06, "loss": 0.4364, "step": 10746 }, { "epoch": 0.5799471156440559, "grad_norm": 0.83805866417832, "learning_rate": 4.381619669841285e-06, "loss": 0.321, "step": 10747 }, { "epoch": 0.5800010792725703, "grad_norm": 1.0194551879246887, "learning_rate": 4.380880729261705e-06, "loss": 0.3638, "step": 10748 }, { "epoch": 0.5800550429010847, "grad_norm": 0.9561142511228675, "learning_rate": 4.380141820846745e-06, "loss": 0.5029, "step": 10749 }, { "epoch": 0.5801090065295991, "grad_norm": 1.0704908048926518, "learning_rate": 4.37940294461764e-06, "loss": 0.4526, "step": 10750 }, { "epoch": 0.5801629701581135, "grad_norm": 1.287964066610987, "learning_rate": 4.378664100595627e-06, "loss": 0.4876, "step": 10751 }, { "epoch": 0.5802169337866279, "grad_norm": 0.8227485709827045, "learning_rate": 4.377925288801941e-06, "loss": 0.3847, "step": 10752 }, { "epoch": 0.5802708974151422, "grad_norm": 0.7725858120272292, "learning_rate": 4.377186509257817e-06, "loss": 0.3806, "step": 10753 }, { "epoch": 0.5803248610436565, "grad_norm": 0.9012299215742978, "learning_rate": 4.376447761984486e-06, "loss": 0.4186, "step": 10754 }, { "epoch": 0.580378824672171, "grad_norm": 1.0802492000612998, "learning_rate": 4.375709047003181e-06, "loss": 0.5985, "step": 10755 }, { "epoch": 0.5804327883006853, "grad_norm": 0.9649869887112489, "learning_rate": 4.374970364335135e-06, "loss": 0.3539, "step": 10756 }, { "epoch": 0.5804867519291997, "grad_norm": 0.9502144952947014, "learning_rate": 4.374231714001576e-06, "loss": 0.4596, "step": 10757 }, { "epoch": 0.5805407155577141, "grad_norm": 0.9863384687469474, "learning_rate": 4.3734930960237365e-06, "loss": 0.4203, "step": 10758 }, { "epoch": 0.5805946791862285, "grad_norm": 1.271304599468789, "learning_rate": 4.372754510422843e-06, "loss": 0.4717, "step": 10759 }, { "epoch": 0.5806486428147428, "grad_norm": 1.0226275209427604, "learning_rate": 4.372015957220125e-06, "loss": 0.4646, "step": 10760 }, { "epoch": 0.5807026064432572, "grad_norm": 0.8933758352341065, "learning_rate": 4.371277436436806e-06, "loss": 0.2568, "step": 10761 }, { "epoch": 0.5807565700717716, "grad_norm": 0.9956996771238652, "learning_rate": 4.3705389480941165e-06, "loss": 0.5058, "step": 10762 }, { "epoch": 0.580810533700286, "grad_norm": 1.0226492048875213, "learning_rate": 4.369800492213277e-06, "loss": 0.4423, "step": 10763 }, { "epoch": 0.5808644973288004, "grad_norm": 0.8527859636131525, "learning_rate": 4.369062068815514e-06, "loss": 0.4503, "step": 10764 }, { "epoch": 0.5809184609573148, "grad_norm": 0.9321737229116237, "learning_rate": 4.368323677922048e-06, "loss": 0.3511, "step": 10765 }, { "epoch": 0.5809724245858292, "grad_norm": 0.9270458977674326, "learning_rate": 4.367585319554105e-06, "loss": 0.4409, "step": 10766 }, { "epoch": 0.5810263882143435, "grad_norm": 1.0391385472247665, "learning_rate": 4.366846993732906e-06, "loss": 0.4824, "step": 10767 }, { "epoch": 0.5810803518428579, "grad_norm": 0.9624931505974168, "learning_rate": 4.366108700479667e-06, "loss": 0.4835, "step": 10768 }, { "epoch": 0.5811343154713723, "grad_norm": 0.7472747789017256, "learning_rate": 4.36537043981561e-06, "loss": 0.3508, "step": 10769 }, { "epoch": 0.5811882790998867, "grad_norm": 1.1021552275199522, "learning_rate": 4.364632211761953e-06, "loss": 0.5976, "step": 10770 }, { "epoch": 0.5812422427284011, "grad_norm": 1.1204508576785062, "learning_rate": 4.363894016339914e-06, "loss": 0.561, "step": 10771 }, { "epoch": 0.5812962063569155, "grad_norm": 1.0369134733842182, "learning_rate": 4.363155853570708e-06, "loss": 0.3448, "step": 10772 }, { "epoch": 0.5813501699854298, "grad_norm": 1.1810078589221615, "learning_rate": 4.362417723475553e-06, "loss": 0.666, "step": 10773 }, { "epoch": 0.5814041336139442, "grad_norm": 0.7891491105205495, "learning_rate": 4.361679626075661e-06, "loss": 0.2872, "step": 10774 }, { "epoch": 0.5814580972424586, "grad_norm": 0.9043243054565318, "learning_rate": 4.360941561392246e-06, "loss": 0.4276, "step": 10775 }, { "epoch": 0.581512060870973, "grad_norm": 1.1136636303600596, "learning_rate": 4.360203529446522e-06, "loss": 0.5374, "step": 10776 }, { "epoch": 0.5815660244994874, "grad_norm": 1.2072567896009667, "learning_rate": 4.3594655302597e-06, "loss": 0.3906, "step": 10777 }, { "epoch": 0.5816199881280018, "grad_norm": 0.8975909923707904, "learning_rate": 4.3587275638529906e-06, "loss": 0.3099, "step": 10778 }, { "epoch": 0.5816739517565161, "grad_norm": 0.8710788601996851, "learning_rate": 4.357989630247606e-06, "loss": 0.3546, "step": 10779 }, { "epoch": 0.5817279153850304, "grad_norm": 1.1459380014212872, "learning_rate": 4.357251729464752e-06, "loss": 0.4851, "step": 10780 }, { "epoch": 0.5817818790135448, "grad_norm": 0.9658110368898553, "learning_rate": 4.356513861525639e-06, "loss": 0.4495, "step": 10781 }, { "epoch": 0.5818358426420592, "grad_norm": 0.7521307472138556, "learning_rate": 4.355776026451473e-06, "loss": 0.3269, "step": 10782 }, { "epoch": 0.5818898062705736, "grad_norm": 0.967765688113511, "learning_rate": 4.355038224263459e-06, "loss": 0.4625, "step": 10783 }, { "epoch": 0.581943769899088, "grad_norm": 1.0955319101143879, "learning_rate": 4.354300454982803e-06, "loss": 0.4658, "step": 10784 }, { "epoch": 0.5819977335276024, "grad_norm": 0.9261400265988177, "learning_rate": 4.353562718630711e-06, "loss": 0.4705, "step": 10785 }, { "epoch": 0.5820516971561168, "grad_norm": 1.042367974870492, "learning_rate": 4.352825015228384e-06, "loss": 0.5001, "step": 10786 }, { "epoch": 0.5821056607846311, "grad_norm": 0.9510692325560809, "learning_rate": 4.352087344797026e-06, "loss": 0.5201, "step": 10787 }, { "epoch": 0.5821596244131455, "grad_norm": 1.3282907518336626, "learning_rate": 4.351349707357836e-06, "loss": 0.5732, "step": 10788 }, { "epoch": 0.5822135880416599, "grad_norm": 0.9341402362951141, "learning_rate": 4.350612102932017e-06, "loss": 0.5467, "step": 10789 }, { "epoch": 0.5822675516701743, "grad_norm": 0.9752629086916158, "learning_rate": 4.3498745315407675e-06, "loss": 0.4426, "step": 10790 }, { "epoch": 0.5823215152986887, "grad_norm": 0.8748259599752354, "learning_rate": 4.349136993205286e-06, "loss": 0.4168, "step": 10791 }, { "epoch": 0.5823754789272031, "grad_norm": 0.8329306424613497, "learning_rate": 4.34839948794677e-06, "loss": 0.3949, "step": 10792 }, { "epoch": 0.5824294425557175, "grad_norm": 0.9880349383847927, "learning_rate": 4.347662015786417e-06, "loss": 0.382, "step": 10793 }, { "epoch": 0.5824834061842318, "grad_norm": 0.9321874005482529, "learning_rate": 4.346924576745423e-06, "loss": 0.2706, "step": 10794 }, { "epoch": 0.5825373698127462, "grad_norm": 0.8658902059789751, "learning_rate": 4.3461871708449796e-06, "loss": 0.3523, "step": 10795 }, { "epoch": 0.5825913334412606, "grad_norm": 1.0350407139473208, "learning_rate": 4.3454497981062835e-06, "loss": 0.4838, "step": 10796 }, { "epoch": 0.582645297069775, "grad_norm": 1.2771424303257874, "learning_rate": 4.3447124585505275e-06, "loss": 0.5801, "step": 10797 }, { "epoch": 0.5826992606982894, "grad_norm": 1.3376493542889678, "learning_rate": 4.343975152198902e-06, "loss": 0.5823, "step": 10798 }, { "epoch": 0.5827532243268038, "grad_norm": 0.9344517897367045, "learning_rate": 4.343237879072598e-06, "loss": 0.3396, "step": 10799 }, { "epoch": 0.5828071879553182, "grad_norm": 1.2324296848730487, "learning_rate": 4.342500639192807e-06, "loss": 0.5686, "step": 10800 }, { "epoch": 0.5828611515838324, "grad_norm": 1.0680883324252228, "learning_rate": 4.341763432580719e-06, "loss": 0.4781, "step": 10801 }, { "epoch": 0.5829151152123468, "grad_norm": 0.799447953191825, "learning_rate": 4.3410262592575184e-06, "loss": 0.354, "step": 10802 }, { "epoch": 0.5829690788408612, "grad_norm": 1.1411180690895073, "learning_rate": 4.340289119244394e-06, "loss": 0.5158, "step": 10803 }, { "epoch": 0.5830230424693756, "grad_norm": 1.1657415667613888, "learning_rate": 4.339552012562532e-06, "loss": 0.5316, "step": 10804 }, { "epoch": 0.58307700609789, "grad_norm": 1.0247373722521023, "learning_rate": 4.338814939233118e-06, "loss": 0.4158, "step": 10805 }, { "epoch": 0.5831309697264044, "grad_norm": 0.9201124043069568, "learning_rate": 4.338077899277337e-06, "loss": 0.3458, "step": 10806 }, { "epoch": 0.5831849333549188, "grad_norm": 1.0067653679319337, "learning_rate": 4.337340892716369e-06, "loss": 0.4111, "step": 10807 }, { "epoch": 0.5832388969834331, "grad_norm": 0.8808130783274866, "learning_rate": 4.3366039195714e-06, "loss": 0.3608, "step": 10808 }, { "epoch": 0.5832928606119475, "grad_norm": 1.1524358544832196, "learning_rate": 4.33586697986361e-06, "loss": 0.4674, "step": 10809 }, { "epoch": 0.5833468242404619, "grad_norm": 0.9515742596894681, "learning_rate": 4.335130073614179e-06, "loss": 0.4628, "step": 10810 }, { "epoch": 0.5834007878689763, "grad_norm": 1.2789779005108033, "learning_rate": 4.334393200844287e-06, "loss": 0.6095, "step": 10811 }, { "epoch": 0.5834547514974907, "grad_norm": 1.0708047013285145, "learning_rate": 4.333656361575112e-06, "loss": 0.5121, "step": 10812 }, { "epoch": 0.5835087151260051, "grad_norm": 0.7750442829321201, "learning_rate": 4.332919555827831e-06, "loss": 0.3292, "step": 10813 }, { "epoch": 0.5835626787545195, "grad_norm": 1.1093352160515706, "learning_rate": 4.332182783623622e-06, "loss": 0.5163, "step": 10814 }, { "epoch": 0.5836166423830338, "grad_norm": 0.9674289541564522, "learning_rate": 4.33144604498366e-06, "loss": 0.3577, "step": 10815 }, { "epoch": 0.5836706060115482, "grad_norm": 1.0330852815972185, "learning_rate": 4.330709339929118e-06, "loss": 0.4843, "step": 10816 }, { "epoch": 0.5837245696400626, "grad_norm": 0.9367013911498115, "learning_rate": 4.329972668481173e-06, "loss": 0.368, "step": 10817 }, { "epoch": 0.583778533268577, "grad_norm": 0.6968575785944818, "learning_rate": 4.329236030660994e-06, "loss": 0.3159, "step": 10818 }, { "epoch": 0.5838324968970914, "grad_norm": 1.1251998352061752, "learning_rate": 4.328499426489754e-06, "loss": 0.5352, "step": 10819 }, { "epoch": 0.5838864605256058, "grad_norm": 0.9258307630179584, "learning_rate": 4.3277628559886254e-06, "loss": 0.4271, "step": 10820 }, { "epoch": 0.5839404241541202, "grad_norm": 1.0012220052547136, "learning_rate": 4.327026319178776e-06, "loss": 0.5471, "step": 10821 }, { "epoch": 0.5839943877826345, "grad_norm": 0.9385059050329881, "learning_rate": 4.3262898160813786e-06, "loss": 0.3767, "step": 10822 }, { "epoch": 0.5840483514111489, "grad_norm": 1.083170503772952, "learning_rate": 4.325553346717595e-06, "loss": 0.4818, "step": 10823 }, { "epoch": 0.5841023150396633, "grad_norm": 0.8806323862327279, "learning_rate": 4.324816911108594e-06, "loss": 0.3769, "step": 10824 }, { "epoch": 0.5841562786681777, "grad_norm": 1.159151686787587, "learning_rate": 4.324080509275544e-06, "loss": 0.5388, "step": 10825 }, { "epoch": 0.584210242296692, "grad_norm": 0.8232237332657785, "learning_rate": 4.323344141239608e-06, "loss": 0.3064, "step": 10826 }, { "epoch": 0.5842642059252064, "grad_norm": 0.8778969859905339, "learning_rate": 4.32260780702195e-06, "loss": 0.3977, "step": 10827 }, { "epoch": 0.5843181695537208, "grad_norm": 1.212872101587933, "learning_rate": 4.321871506643733e-06, "loss": 0.5722, "step": 10828 }, { "epoch": 0.5843721331822351, "grad_norm": 0.9882109998239202, "learning_rate": 4.321135240126122e-06, "loss": 0.5125, "step": 10829 }, { "epoch": 0.5844260968107495, "grad_norm": 1.08578912023213, "learning_rate": 4.320399007490273e-06, "loss": 0.406, "step": 10830 }, { "epoch": 0.5844800604392639, "grad_norm": 0.9167977630868087, "learning_rate": 4.319662808757349e-06, "loss": 0.3846, "step": 10831 }, { "epoch": 0.5845340240677783, "grad_norm": 0.8798173879334827, "learning_rate": 4.318926643948507e-06, "loss": 0.3324, "step": 10832 }, { "epoch": 0.5845879876962927, "grad_norm": 0.9011126364450855, "learning_rate": 4.318190513084908e-06, "loss": 0.4119, "step": 10833 }, { "epoch": 0.5846419513248071, "grad_norm": 0.9703719198288479, "learning_rate": 4.317454416187707e-06, "loss": 0.3779, "step": 10834 }, { "epoch": 0.5846959149533215, "grad_norm": 1.1866939449654437, "learning_rate": 4.316718353278062e-06, "loss": 0.4641, "step": 10835 }, { "epoch": 0.5847498785818358, "grad_norm": 0.8453886282589783, "learning_rate": 4.315982324377127e-06, "loss": 0.376, "step": 10836 }, { "epoch": 0.5848038422103502, "grad_norm": 1.124974207219576, "learning_rate": 4.315246329506057e-06, "loss": 0.5529, "step": 10837 }, { "epoch": 0.5848578058388646, "grad_norm": 1.0374886720657028, "learning_rate": 4.314510368686002e-06, "loss": 0.664, "step": 10838 }, { "epoch": 0.584911769467379, "grad_norm": 1.0229583540418845, "learning_rate": 4.313774441938119e-06, "loss": 0.5487, "step": 10839 }, { "epoch": 0.5849657330958934, "grad_norm": 1.0790251155164094, "learning_rate": 4.313038549283556e-06, "loss": 0.5783, "step": 10840 }, { "epoch": 0.5850196967244078, "grad_norm": 1.0428318145074607, "learning_rate": 4.312302690743465e-06, "loss": 0.4164, "step": 10841 }, { "epoch": 0.5850736603529221, "grad_norm": 1.0282500801496532, "learning_rate": 4.311566866338996e-06, "loss": 0.3854, "step": 10842 }, { "epoch": 0.5851276239814365, "grad_norm": 1.133535232107666, "learning_rate": 4.310831076091295e-06, "loss": 0.7857, "step": 10843 }, { "epoch": 0.5851815876099509, "grad_norm": 0.6956020773895513, "learning_rate": 4.31009532002151e-06, "loss": 0.2686, "step": 10844 }, { "epoch": 0.5852355512384653, "grad_norm": 1.0546692555437611, "learning_rate": 4.309359598150789e-06, "loss": 0.4177, "step": 10845 }, { "epoch": 0.5852895148669797, "grad_norm": 1.0277867848398559, "learning_rate": 4.308623910500275e-06, "loss": 0.4699, "step": 10846 }, { "epoch": 0.5853434784954941, "grad_norm": 1.2401841224841403, "learning_rate": 4.307888257091113e-06, "loss": 0.6137, "step": 10847 }, { "epoch": 0.5853974421240085, "grad_norm": 0.8335089252766369, "learning_rate": 4.3071526379444486e-06, "loss": 0.397, "step": 10848 }, { "epoch": 0.5854514057525227, "grad_norm": 0.9933158566030164, "learning_rate": 4.306417053081424e-06, "loss": 0.6724, "step": 10849 }, { "epoch": 0.5855053693810371, "grad_norm": 1.091579636715559, "learning_rate": 4.305681502523177e-06, "loss": 0.3932, "step": 10850 }, { "epoch": 0.5855593330095515, "grad_norm": 0.8379033096099369, "learning_rate": 4.304945986290851e-06, "loss": 0.3129, "step": 10851 }, { "epoch": 0.5856132966380659, "grad_norm": 1.038774404522165, "learning_rate": 4.304210504405584e-06, "loss": 0.5046, "step": 10852 }, { "epoch": 0.5856672602665803, "grad_norm": 1.2852988567723174, "learning_rate": 4.303475056888515e-06, "loss": 0.5627, "step": 10853 }, { "epoch": 0.5857212238950947, "grad_norm": 0.8225177280784101, "learning_rate": 4.302739643760782e-06, "loss": 0.3726, "step": 10854 }, { "epoch": 0.5857751875236091, "grad_norm": 1.045845087073345, "learning_rate": 4.302004265043523e-06, "loss": 0.4598, "step": 10855 }, { "epoch": 0.5858291511521234, "grad_norm": 1.0717066151245913, "learning_rate": 4.301268920757873e-06, "loss": 0.5606, "step": 10856 }, { "epoch": 0.5858831147806378, "grad_norm": 0.8963611134135105, "learning_rate": 4.300533610924963e-06, "loss": 0.478, "step": 10857 }, { "epoch": 0.5859370784091522, "grad_norm": 0.8709740646325408, "learning_rate": 4.29979833556593e-06, "loss": 0.383, "step": 10858 }, { "epoch": 0.5859910420376666, "grad_norm": 1.0690207644758276, "learning_rate": 4.299063094701905e-06, "loss": 0.4522, "step": 10859 }, { "epoch": 0.586045005666181, "grad_norm": 0.8937104318276762, "learning_rate": 4.29832788835402e-06, "loss": 0.3466, "step": 10860 }, { "epoch": 0.5860989692946954, "grad_norm": 0.9421285976601806, "learning_rate": 4.297592716543406e-06, "loss": 0.4696, "step": 10861 }, { "epoch": 0.5861529329232098, "grad_norm": 0.9720992628483462, "learning_rate": 4.296857579291191e-06, "loss": 0.4732, "step": 10862 }, { "epoch": 0.5862068965517241, "grad_norm": 1.0572632106703115, "learning_rate": 4.296122476618507e-06, "loss": 0.5633, "step": 10863 }, { "epoch": 0.5862608601802385, "grad_norm": 1.108269161130122, "learning_rate": 4.295387408546479e-06, "loss": 0.5243, "step": 10864 }, { "epoch": 0.5863148238087529, "grad_norm": 1.0534266002803678, "learning_rate": 4.294652375096233e-06, "loss": 0.5458, "step": 10865 }, { "epoch": 0.5863687874372673, "grad_norm": 1.1145857121059677, "learning_rate": 4.293917376288897e-06, "loss": 0.535, "step": 10866 }, { "epoch": 0.5864227510657817, "grad_norm": 1.151301459396368, "learning_rate": 4.293182412145592e-06, "loss": 0.527, "step": 10867 }, { "epoch": 0.5864767146942961, "grad_norm": 1.0733259557287107, "learning_rate": 4.292447482687446e-06, "loss": 0.4791, "step": 10868 }, { "epoch": 0.5865306783228105, "grad_norm": 0.8876370979203263, "learning_rate": 4.291712587935578e-06, "loss": 0.3931, "step": 10869 }, { "epoch": 0.5865846419513248, "grad_norm": 0.9630137750287598, "learning_rate": 4.290977727911114e-06, "loss": 0.3678, "step": 10870 }, { "epoch": 0.5866386055798392, "grad_norm": 1.0168923090682573, "learning_rate": 4.29024290263517e-06, "loss": 0.4729, "step": 10871 }, { "epoch": 0.5866925692083536, "grad_norm": 0.9943916396497462, "learning_rate": 4.289508112128866e-06, "loss": 0.4032, "step": 10872 }, { "epoch": 0.586746532836868, "grad_norm": 0.9730199429147925, "learning_rate": 4.288773356413324e-06, "loss": 0.4756, "step": 10873 }, { "epoch": 0.5868004964653823, "grad_norm": 1.0029776063797764, "learning_rate": 4.288038635509658e-06, "loss": 0.4288, "step": 10874 }, { "epoch": 0.5868544600938967, "grad_norm": 1.0779102457076701, "learning_rate": 4.287303949438987e-06, "loss": 0.5281, "step": 10875 }, { "epoch": 0.5869084237224111, "grad_norm": 0.8321280427550214, "learning_rate": 4.286569298222425e-06, "loss": 0.3829, "step": 10876 }, { "epoch": 0.5869623873509254, "grad_norm": 1.1089223767173197, "learning_rate": 4.285834681881089e-06, "loss": 0.5705, "step": 10877 }, { "epoch": 0.5870163509794398, "grad_norm": 1.0358090001915436, "learning_rate": 4.2851001004360905e-06, "loss": 0.4361, "step": 10878 }, { "epoch": 0.5870703146079542, "grad_norm": 0.9789316428592371, "learning_rate": 4.284365553908543e-06, "loss": 0.4622, "step": 10879 }, { "epoch": 0.5871242782364686, "grad_norm": 1.0251788989402013, "learning_rate": 4.2836310423195556e-06, "loss": 0.5573, "step": 10880 }, { "epoch": 0.587178241864983, "grad_norm": 1.2117162583343548, "learning_rate": 4.282896565690243e-06, "loss": 0.5384, "step": 10881 }, { "epoch": 0.5872322054934974, "grad_norm": 1.1053199197618562, "learning_rate": 4.282162124041712e-06, "loss": 0.5394, "step": 10882 }, { "epoch": 0.5872861691220118, "grad_norm": 0.8952581201269858, "learning_rate": 4.281427717395072e-06, "loss": 0.3734, "step": 10883 }, { "epoch": 0.5873401327505261, "grad_norm": 0.9335331053590437, "learning_rate": 4.280693345771433e-06, "loss": 0.4799, "step": 10884 }, { "epoch": 0.5873940963790405, "grad_norm": 0.9379927928947436, "learning_rate": 4.279959009191897e-06, "loss": 0.3793, "step": 10885 }, { "epoch": 0.5874480600075549, "grad_norm": 1.18284250982107, "learning_rate": 4.2792247076775726e-06, "loss": 0.5128, "step": 10886 }, { "epoch": 0.5875020236360693, "grad_norm": 1.0331819794274721, "learning_rate": 4.278490441249563e-06, "loss": 0.3905, "step": 10887 }, { "epoch": 0.5875559872645837, "grad_norm": 1.1079970568605035, "learning_rate": 4.277756209928974e-06, "loss": 0.495, "step": 10888 }, { "epoch": 0.5876099508930981, "grad_norm": 1.0994439850388666, "learning_rate": 4.277022013736904e-06, "loss": 0.5144, "step": 10889 }, { "epoch": 0.5876639145216125, "grad_norm": 1.0032966823530296, "learning_rate": 4.276287852694458e-06, "loss": 0.4645, "step": 10890 }, { "epoch": 0.5877178781501268, "grad_norm": 1.056717497916176, "learning_rate": 4.275553726822738e-06, "loss": 0.4559, "step": 10891 }, { "epoch": 0.5877718417786412, "grad_norm": 1.0291998105957025, "learning_rate": 4.274819636142838e-06, "loss": 0.4223, "step": 10892 }, { "epoch": 0.5878258054071556, "grad_norm": 0.8278834667878908, "learning_rate": 4.27408558067586e-06, "loss": 0.3389, "step": 10893 }, { "epoch": 0.58787976903567, "grad_norm": 1.0500008397392324, "learning_rate": 4.2733515604429034e-06, "loss": 0.5824, "step": 10894 }, { "epoch": 0.5879337326641844, "grad_norm": 1.034437968727046, "learning_rate": 4.272617575465059e-06, "loss": 0.5581, "step": 10895 }, { "epoch": 0.5879876962926988, "grad_norm": 0.9031477917485106, "learning_rate": 4.271883625763427e-06, "loss": 0.3773, "step": 10896 }, { "epoch": 0.5880416599212132, "grad_norm": 1.0592669512390782, "learning_rate": 4.271149711359101e-06, "loss": 0.4596, "step": 10897 }, { "epoch": 0.5880956235497274, "grad_norm": 1.0367256768854838, "learning_rate": 4.270415832273176e-06, "loss": 0.4513, "step": 10898 }, { "epoch": 0.5881495871782418, "grad_norm": 1.076352784151165, "learning_rate": 4.26968198852674e-06, "loss": 0.4788, "step": 10899 }, { "epoch": 0.5882035508067562, "grad_norm": 1.0136429941729483, "learning_rate": 4.268948180140888e-06, "loss": 0.3847, "step": 10900 }, { "epoch": 0.5882575144352706, "grad_norm": 1.2676501530475186, "learning_rate": 4.268214407136707e-06, "loss": 0.5682, "step": 10901 }, { "epoch": 0.588311478063785, "grad_norm": 1.1276807041776558, "learning_rate": 4.2674806695352895e-06, "loss": 0.5382, "step": 10902 }, { "epoch": 0.5883654416922994, "grad_norm": 0.7867281974868271, "learning_rate": 4.266746967357723e-06, "loss": 0.3327, "step": 10903 }, { "epoch": 0.5884194053208138, "grad_norm": 0.8484602686788164, "learning_rate": 4.266013300625095e-06, "loss": 0.3477, "step": 10904 }, { "epoch": 0.5884733689493281, "grad_norm": 1.0067213999635962, "learning_rate": 4.265279669358494e-06, "loss": 0.4097, "step": 10905 }, { "epoch": 0.5885273325778425, "grad_norm": 1.0334072156383585, "learning_rate": 4.264546073579e-06, "loss": 0.4632, "step": 10906 }, { "epoch": 0.5885812962063569, "grad_norm": 0.9097239938002113, "learning_rate": 4.2638125133077005e-06, "loss": 0.3261, "step": 10907 }, { "epoch": 0.5886352598348713, "grad_norm": 1.1914365165735643, "learning_rate": 4.2630789885656786e-06, "loss": 0.6019, "step": 10908 }, { "epoch": 0.5886892234633857, "grad_norm": 1.0119170170784235, "learning_rate": 4.2623454993740165e-06, "loss": 0.4213, "step": 10909 }, { "epoch": 0.5887431870919001, "grad_norm": 1.11262761958445, "learning_rate": 4.2616120457537945e-06, "loss": 0.4331, "step": 10910 }, { "epoch": 0.5887971507204144, "grad_norm": 0.8625303474073208, "learning_rate": 4.260878627726095e-06, "loss": 0.3676, "step": 10911 }, { "epoch": 0.5888511143489288, "grad_norm": 1.1086800243531476, "learning_rate": 4.260145245311995e-06, "loss": 0.4922, "step": 10912 }, { "epoch": 0.5889050779774432, "grad_norm": 0.9305531051231767, "learning_rate": 4.259411898532573e-06, "loss": 0.7115, "step": 10913 }, { "epoch": 0.5889590416059576, "grad_norm": 1.1418046148283931, "learning_rate": 4.258678587408908e-06, "loss": 0.4106, "step": 10914 }, { "epoch": 0.589013005234472, "grad_norm": 1.0324747865478328, "learning_rate": 4.257945311962073e-06, "loss": 0.628, "step": 10915 }, { "epoch": 0.5890669688629864, "grad_norm": 0.7050640644084826, "learning_rate": 4.257212072213145e-06, "loss": 0.2229, "step": 10916 }, { "epoch": 0.5891209324915008, "grad_norm": 1.0445930200132412, "learning_rate": 4.256478868183198e-06, "loss": 0.6044, "step": 10917 }, { "epoch": 0.5891748961200151, "grad_norm": 0.9012154964583723, "learning_rate": 4.255745699893307e-06, "loss": 0.389, "step": 10918 }, { "epoch": 0.5892288597485295, "grad_norm": 1.0303306259436482, "learning_rate": 4.2550125673645396e-06, "loss": 0.4991, "step": 10919 }, { "epoch": 0.5892828233770439, "grad_norm": 0.9747157673974188, "learning_rate": 4.254279470617969e-06, "loss": 0.4067, "step": 10920 }, { "epoch": 0.5893367870055582, "grad_norm": 1.1064949097690493, "learning_rate": 4.253546409674664e-06, "loss": 0.5987, "step": 10921 }, { "epoch": 0.5893907506340726, "grad_norm": 1.138372555435741, "learning_rate": 4.252813384555697e-06, "loss": 0.6081, "step": 10922 }, { "epoch": 0.589444714262587, "grad_norm": 1.0446330340382701, "learning_rate": 4.252080395282132e-06, "loss": 0.465, "step": 10923 }, { "epoch": 0.5894986778911014, "grad_norm": 1.0389905734468081, "learning_rate": 4.251347441875039e-06, "loss": 0.4168, "step": 10924 }, { "epoch": 0.5895526415196157, "grad_norm": 0.75425978295224, "learning_rate": 4.250614524355482e-06, "loss": 0.379, "step": 10925 }, { "epoch": 0.5896066051481301, "grad_norm": 1.024857856384912, "learning_rate": 4.249881642744526e-06, "loss": 0.5811, "step": 10926 }, { "epoch": 0.5896605687766445, "grad_norm": 1.5484273708574183, "learning_rate": 4.249148797063233e-06, "loss": 0.4995, "step": 10927 }, { "epoch": 0.5897145324051589, "grad_norm": 0.9159910174500486, "learning_rate": 4.248415987332668e-06, "loss": 0.4257, "step": 10928 }, { "epoch": 0.5897684960336733, "grad_norm": 1.0177357543080732, "learning_rate": 4.247683213573892e-06, "loss": 0.3942, "step": 10929 }, { "epoch": 0.5898224596621877, "grad_norm": 0.941862968318894, "learning_rate": 4.246950475807966e-06, "loss": 0.4498, "step": 10930 }, { "epoch": 0.5898764232907021, "grad_norm": 0.858240013492606, "learning_rate": 4.2462177740559495e-06, "loss": 0.4284, "step": 10931 }, { "epoch": 0.5899303869192164, "grad_norm": 0.8802814664276891, "learning_rate": 4.245485108338904e-06, "loss": 0.4374, "step": 10932 }, { "epoch": 0.5899843505477308, "grad_norm": 1.1833059533532375, "learning_rate": 4.244752478677881e-06, "loss": 0.4409, "step": 10933 }, { "epoch": 0.5900383141762452, "grad_norm": 0.944253774977486, "learning_rate": 4.24401988509394e-06, "loss": 0.3817, "step": 10934 }, { "epoch": 0.5900922778047596, "grad_norm": 0.9804645323411332, "learning_rate": 4.243287327608137e-06, "loss": 0.4321, "step": 10935 }, { "epoch": 0.590146241433274, "grad_norm": 1.125208681986375, "learning_rate": 4.242554806241526e-06, "loss": 0.5304, "step": 10936 }, { "epoch": 0.5902002050617884, "grad_norm": 0.9128708235564581, "learning_rate": 4.24182232101516e-06, "loss": 0.3897, "step": 10937 }, { "epoch": 0.5902541686903028, "grad_norm": 0.9230092535442815, "learning_rate": 4.241089871950092e-06, "loss": 0.4343, "step": 10938 }, { "epoch": 0.5903081323188171, "grad_norm": 0.9902241926987247, "learning_rate": 4.240357459067374e-06, "loss": 0.3782, "step": 10939 }, { "epoch": 0.5903620959473315, "grad_norm": 1.1040283943714986, "learning_rate": 4.239625082388055e-06, "loss": 0.5776, "step": 10940 }, { "epoch": 0.5904160595758459, "grad_norm": 1.075195945291856, "learning_rate": 4.238892741933184e-06, "loss": 0.5503, "step": 10941 }, { "epoch": 0.5904700232043603, "grad_norm": 1.0895552890798734, "learning_rate": 4.23816043772381e-06, "loss": 0.5449, "step": 10942 }, { "epoch": 0.5905239868328747, "grad_norm": 1.2302439170135944, "learning_rate": 4.237428169780981e-06, "loss": 0.5053, "step": 10943 }, { "epoch": 0.5905779504613891, "grad_norm": 1.1474995459810682, "learning_rate": 4.23669593812574e-06, "loss": 0.5107, "step": 10944 }, { "epoch": 0.5906319140899035, "grad_norm": 1.0008069167958131, "learning_rate": 4.235963742779135e-06, "loss": 0.4149, "step": 10945 }, { "epoch": 0.5906858777184177, "grad_norm": 1.2629172697375057, "learning_rate": 4.235231583762212e-06, "loss": 0.6186, "step": 10946 }, { "epoch": 0.5907398413469321, "grad_norm": 1.0195046247805273, "learning_rate": 4.234499461096009e-06, "loss": 0.4649, "step": 10947 }, { "epoch": 0.5907938049754465, "grad_norm": 0.9799907110608739, "learning_rate": 4.23376737480157e-06, "loss": 0.4574, "step": 10948 }, { "epoch": 0.5908477686039609, "grad_norm": 0.9912336471024078, "learning_rate": 4.233035324899937e-06, "loss": 0.4562, "step": 10949 }, { "epoch": 0.5909017322324753, "grad_norm": 0.9893554526041589, "learning_rate": 4.232303311412147e-06, "loss": 0.45, "step": 10950 }, { "epoch": 0.5909556958609897, "grad_norm": 1.0729675223156676, "learning_rate": 4.231571334359243e-06, "loss": 0.4365, "step": 10951 }, { "epoch": 0.5910096594895041, "grad_norm": 1.1519138825643778, "learning_rate": 4.230839393762258e-06, "loss": 0.5148, "step": 10952 }, { "epoch": 0.5910636231180184, "grad_norm": 1.0198104645819699, "learning_rate": 4.2301074896422355e-06, "loss": 0.3782, "step": 10953 }, { "epoch": 0.5911175867465328, "grad_norm": 0.9859417453629512, "learning_rate": 4.229375622020204e-06, "loss": 0.4581, "step": 10954 }, { "epoch": 0.5911715503750472, "grad_norm": 0.9317699594587162, "learning_rate": 4.228643790917201e-06, "loss": 0.396, "step": 10955 }, { "epoch": 0.5912255140035616, "grad_norm": 0.9036768758710577, "learning_rate": 4.22791199635426e-06, "loss": 0.3495, "step": 10956 }, { "epoch": 0.591279477632076, "grad_norm": 1.249592755366539, "learning_rate": 4.227180238352413e-06, "loss": 0.5615, "step": 10957 }, { "epoch": 0.5913334412605904, "grad_norm": 0.9045935649528373, "learning_rate": 4.226448516932693e-06, "loss": 0.3599, "step": 10958 }, { "epoch": 0.5913874048891048, "grad_norm": 1.0425835038176108, "learning_rate": 4.225716832116129e-06, "loss": 0.4709, "step": 10959 }, { "epoch": 0.5914413685176191, "grad_norm": 1.0455443291793665, "learning_rate": 4.2249851839237525e-06, "loss": 0.5269, "step": 10960 }, { "epoch": 0.5914953321461335, "grad_norm": 1.09307464010701, "learning_rate": 4.2242535723765875e-06, "loss": 0.4098, "step": 10961 }, { "epoch": 0.5915492957746479, "grad_norm": 1.2405492994648102, "learning_rate": 4.223521997495665e-06, "loss": 0.5689, "step": 10962 }, { "epoch": 0.5916032594031623, "grad_norm": 0.9646559549033716, "learning_rate": 4.222790459302012e-06, "loss": 0.5031, "step": 10963 }, { "epoch": 0.5916572230316767, "grad_norm": 1.1191743640952783, "learning_rate": 4.22205895781665e-06, "loss": 0.5382, "step": 10964 }, { "epoch": 0.5917111866601911, "grad_norm": 0.9495121834730811, "learning_rate": 4.221327493060604e-06, "loss": 0.3825, "step": 10965 }, { "epoch": 0.5917651502887055, "grad_norm": 1.0037463072654413, "learning_rate": 4.220596065054898e-06, "loss": 0.3966, "step": 10966 }, { "epoch": 0.5918191139172198, "grad_norm": 1.0281347819325084, "learning_rate": 4.219864673820555e-06, "loss": 0.4116, "step": 10967 }, { "epoch": 0.5918730775457341, "grad_norm": 0.8442836264011336, "learning_rate": 4.219133319378594e-06, "loss": 0.4382, "step": 10968 }, { "epoch": 0.5919270411742485, "grad_norm": 0.8964448032455127, "learning_rate": 4.218402001750036e-06, "loss": 0.3927, "step": 10969 }, { "epoch": 0.591981004802763, "grad_norm": 1.1047450201423767, "learning_rate": 4.217670720955899e-06, "loss": 0.579, "step": 10970 }, { "epoch": 0.5920349684312773, "grad_norm": 1.0464273411284535, "learning_rate": 4.216939477017201e-06, "loss": 0.4891, "step": 10971 }, { "epoch": 0.5920889320597917, "grad_norm": 0.924752206528834, "learning_rate": 4.216208269954959e-06, "loss": 0.506, "step": 10972 }, { "epoch": 0.5921428956883061, "grad_norm": 0.8799690263330859, "learning_rate": 4.215477099790188e-06, "loss": 0.4437, "step": 10973 }, { "epoch": 0.5921968593168204, "grad_norm": 1.2194662988368745, "learning_rate": 4.214745966543906e-06, "loss": 0.5223, "step": 10974 }, { "epoch": 0.5922508229453348, "grad_norm": 1.0456472439515208, "learning_rate": 4.214014870237122e-06, "loss": 0.4262, "step": 10975 }, { "epoch": 0.5923047865738492, "grad_norm": 0.9632941951523539, "learning_rate": 4.213283810890848e-06, "loss": 0.5406, "step": 10976 }, { "epoch": 0.5923587502023636, "grad_norm": 1.1258323437335487, "learning_rate": 4.212552788526099e-06, "loss": 0.4284, "step": 10977 }, { "epoch": 0.592412713830878, "grad_norm": 1.0600155517591565, "learning_rate": 4.2118218031638836e-06, "loss": 0.4368, "step": 10978 }, { "epoch": 0.5924666774593924, "grad_norm": 1.1047790665009887, "learning_rate": 4.211090854825212e-06, "loss": 0.5582, "step": 10979 }, { "epoch": 0.5925206410879067, "grad_norm": 1.1235383859113703, "learning_rate": 4.210359943531092e-06, "loss": 0.5523, "step": 10980 }, { "epoch": 0.5925746047164211, "grad_norm": 1.141720060811772, "learning_rate": 4.209629069302528e-06, "loss": 0.3788, "step": 10981 }, { "epoch": 0.5926285683449355, "grad_norm": 1.1159423796231283, "learning_rate": 4.20889823216053e-06, "loss": 0.5596, "step": 10982 }, { "epoch": 0.5926825319734499, "grad_norm": 1.1312680790317653, "learning_rate": 4.2081674321261e-06, "loss": 0.4226, "step": 10983 }, { "epoch": 0.5927364956019643, "grad_norm": 0.8156913292471246, "learning_rate": 4.207436669220244e-06, "loss": 0.3924, "step": 10984 }, { "epoch": 0.5927904592304787, "grad_norm": 1.070964873656353, "learning_rate": 4.206705943463965e-06, "loss": 0.4112, "step": 10985 }, { "epoch": 0.5928444228589931, "grad_norm": 1.0689752087315099, "learning_rate": 4.205975254878262e-06, "loss": 0.4293, "step": 10986 }, { "epoch": 0.5928983864875074, "grad_norm": 0.8555779194583981, "learning_rate": 4.20524460348414e-06, "loss": 0.3485, "step": 10987 }, { "epoch": 0.5929523501160218, "grad_norm": 1.020338374713288, "learning_rate": 4.204513989302596e-06, "loss": 0.3761, "step": 10988 }, { "epoch": 0.5930063137445362, "grad_norm": 0.9481835443397031, "learning_rate": 4.203783412354627e-06, "loss": 0.4614, "step": 10989 }, { "epoch": 0.5930602773730506, "grad_norm": 1.0045156591355506, "learning_rate": 4.203052872661233e-06, "loss": 0.5837, "step": 10990 }, { "epoch": 0.593114241001565, "grad_norm": 0.9565793372810458, "learning_rate": 4.202322370243411e-06, "loss": 0.5078, "step": 10991 }, { "epoch": 0.5931682046300794, "grad_norm": 0.8538731237522023, "learning_rate": 4.201591905122154e-06, "loss": 0.4059, "step": 10992 }, { "epoch": 0.5932221682585938, "grad_norm": 1.0011376409007693, "learning_rate": 4.200861477318459e-06, "loss": 0.4344, "step": 10993 }, { "epoch": 0.593276131887108, "grad_norm": 1.0020034555379485, "learning_rate": 4.200131086853319e-06, "loss": 0.4371, "step": 10994 }, { "epoch": 0.5933300955156224, "grad_norm": 0.8960779027843356, "learning_rate": 4.1994007337477226e-06, "loss": 0.4, "step": 10995 }, { "epoch": 0.5933840591441368, "grad_norm": 1.2059822367220299, "learning_rate": 4.198670418022663e-06, "loss": 0.5408, "step": 10996 }, { "epoch": 0.5934380227726512, "grad_norm": 0.9767187437282322, "learning_rate": 4.197940139699132e-06, "loss": 0.4182, "step": 10997 }, { "epoch": 0.5934919864011656, "grad_norm": 0.9917065346923863, "learning_rate": 4.197209898798117e-06, "loss": 0.4166, "step": 10998 }, { "epoch": 0.59354595002968, "grad_norm": 1.072529580279694, "learning_rate": 4.196479695340604e-06, "loss": 0.5371, "step": 10999 }, { "epoch": 0.5935999136581944, "grad_norm": 0.7097591923694451, "learning_rate": 4.195749529347583e-06, "loss": 0.2791, "step": 11000 }, { "epoch": 0.5935999136581944, "eval_loss": 0.5420788526535034, "eval_runtime": 164.0786, "eval_samples_per_second": 20.959, "eval_steps_per_second": 0.878, "step": 11000 }, { "epoch": 0.5936538772867087, "grad_norm": 1.028865403702055, "learning_rate": 4.195019400840041e-06, "loss": 0.5185, "step": 11001 }, { "epoch": 0.5937078409152231, "grad_norm": 1.1283669384031108, "learning_rate": 4.1942893098389555e-06, "loss": 0.6308, "step": 11002 }, { "epoch": 0.5937618045437375, "grad_norm": 0.9673086622878548, "learning_rate": 4.193559256365317e-06, "loss": 0.4362, "step": 11003 }, { "epoch": 0.5938157681722519, "grad_norm": 0.8802151664381347, "learning_rate": 4.192829240440105e-06, "loss": 0.3538, "step": 11004 }, { "epoch": 0.5938697318007663, "grad_norm": 0.9016033848380937, "learning_rate": 4.192099262084301e-06, "loss": 0.358, "step": 11005 }, { "epoch": 0.5939236954292807, "grad_norm": 0.7625127469033044, "learning_rate": 4.191369321318887e-06, "loss": 0.3233, "step": 11006 }, { "epoch": 0.5939776590577951, "grad_norm": 0.9507115592672235, "learning_rate": 4.190639418164839e-06, "loss": 0.4246, "step": 11007 }, { "epoch": 0.5940316226863094, "grad_norm": 0.8898432588010998, "learning_rate": 4.18990955264314e-06, "loss": 0.4136, "step": 11008 }, { "epoch": 0.5940855863148238, "grad_norm": 0.8777419717265519, "learning_rate": 4.189179724774763e-06, "loss": 0.4508, "step": 11009 }, { "epoch": 0.5941395499433382, "grad_norm": 0.8776384775585993, "learning_rate": 4.188449934580684e-06, "loss": 0.3434, "step": 11010 }, { "epoch": 0.5941935135718526, "grad_norm": 1.1437768352895774, "learning_rate": 4.187720182081879e-06, "loss": 0.525, "step": 11011 }, { "epoch": 0.594247477200367, "grad_norm": 1.0836989671138357, "learning_rate": 4.186990467299323e-06, "loss": 0.5341, "step": 11012 }, { "epoch": 0.5943014408288814, "grad_norm": 1.1362705109385673, "learning_rate": 4.186260790253987e-06, "loss": 0.4704, "step": 11013 }, { "epoch": 0.5943554044573958, "grad_norm": 0.8755400876777962, "learning_rate": 4.185531150966844e-06, "loss": 0.3317, "step": 11014 }, { "epoch": 0.59440936808591, "grad_norm": 0.8288124314186148, "learning_rate": 4.184801549458864e-06, "loss": 0.3753, "step": 11015 }, { "epoch": 0.5944633317144244, "grad_norm": 0.9970224306304769, "learning_rate": 4.184071985751016e-06, "loss": 0.3971, "step": 11016 }, { "epoch": 0.5945172953429388, "grad_norm": 1.0831541854501883, "learning_rate": 4.183342459864268e-06, "loss": 0.427, "step": 11017 }, { "epoch": 0.5945712589714532, "grad_norm": 1.037879113522104, "learning_rate": 4.182612971819588e-06, "loss": 0.396, "step": 11018 }, { "epoch": 0.5946252225999676, "grad_norm": 1.119223511566342, "learning_rate": 4.181883521637944e-06, "loss": 0.5169, "step": 11019 }, { "epoch": 0.594679186228482, "grad_norm": 0.92575984695025, "learning_rate": 4.181154109340298e-06, "loss": 0.4228, "step": 11020 }, { "epoch": 0.5947331498569964, "grad_norm": 1.0855645789952875, "learning_rate": 4.1804247349476155e-06, "loss": 0.4118, "step": 11021 }, { "epoch": 0.5947871134855107, "grad_norm": 0.9294617428348763, "learning_rate": 4.179695398480862e-06, "loss": 0.3601, "step": 11022 }, { "epoch": 0.5948410771140251, "grad_norm": 0.9255505656009985, "learning_rate": 4.1789660999609945e-06, "loss": 0.3787, "step": 11023 }, { "epoch": 0.5948950407425395, "grad_norm": 1.2488653349202081, "learning_rate": 4.178236839408977e-06, "loss": 0.5802, "step": 11024 }, { "epoch": 0.5949490043710539, "grad_norm": 1.1189188437555928, "learning_rate": 4.177507616845767e-06, "loss": 0.4753, "step": 11025 }, { "epoch": 0.5950029679995683, "grad_norm": 0.8385555694729772, "learning_rate": 4.1767784322923256e-06, "loss": 0.4826, "step": 11026 }, { "epoch": 0.5950569316280827, "grad_norm": 1.078048020443036, "learning_rate": 4.176049285769608e-06, "loss": 0.5416, "step": 11027 }, { "epoch": 0.5951108952565971, "grad_norm": 1.054186354216756, "learning_rate": 4.175320177298572e-06, "loss": 0.4772, "step": 11028 }, { "epoch": 0.5951648588851114, "grad_norm": 1.1510285179635926, "learning_rate": 4.174591106900175e-06, "loss": 0.4312, "step": 11029 }, { "epoch": 0.5952188225136258, "grad_norm": 1.0265001060574979, "learning_rate": 4.173862074595366e-06, "loss": 0.5201, "step": 11030 }, { "epoch": 0.5952727861421402, "grad_norm": 0.990120254343981, "learning_rate": 4.173133080405102e-06, "loss": 0.4517, "step": 11031 }, { "epoch": 0.5953267497706546, "grad_norm": 1.0270092135342384, "learning_rate": 4.172404124350333e-06, "loss": 0.4751, "step": 11032 }, { "epoch": 0.595380713399169, "grad_norm": 0.7784894600066139, "learning_rate": 4.17167520645201e-06, "loss": 0.2956, "step": 11033 }, { "epoch": 0.5954346770276834, "grad_norm": 0.8476745930597585, "learning_rate": 4.170946326731087e-06, "loss": 0.304, "step": 11034 }, { "epoch": 0.5954886406561978, "grad_norm": 0.8928641309592352, "learning_rate": 4.170217485208507e-06, "loss": 0.4078, "step": 11035 }, { "epoch": 0.5955426042847121, "grad_norm": 0.8916848477178771, "learning_rate": 4.169488681905222e-06, "loss": 0.368, "step": 11036 }, { "epoch": 0.5955965679132265, "grad_norm": 0.9546895677210628, "learning_rate": 4.168759916842174e-06, "loss": 0.4251, "step": 11037 }, { "epoch": 0.5956505315417409, "grad_norm": 0.9104164135397887, "learning_rate": 4.168031190040313e-06, "loss": 0.3288, "step": 11038 }, { "epoch": 0.5957044951702553, "grad_norm": 1.0103756370951251, "learning_rate": 4.167302501520582e-06, "loss": 0.4401, "step": 11039 }, { "epoch": 0.5957584587987697, "grad_norm": 1.0856257774737101, "learning_rate": 4.166573851303922e-06, "loss": 0.5565, "step": 11040 }, { "epoch": 0.595812422427284, "grad_norm": 1.145908636457165, "learning_rate": 4.165845239411278e-06, "loss": 0.4111, "step": 11041 }, { "epoch": 0.5958663860557984, "grad_norm": 1.083402425971385, "learning_rate": 4.165116665863589e-06, "loss": 0.4486, "step": 11042 }, { "epoch": 0.5959203496843127, "grad_norm": 0.8986021797672753, "learning_rate": 4.164388130681798e-06, "loss": 0.3536, "step": 11043 }, { "epoch": 0.5959743133128271, "grad_norm": 1.1192005033601518, "learning_rate": 4.163659633886839e-06, "loss": 0.425, "step": 11044 }, { "epoch": 0.5960282769413415, "grad_norm": 0.9323640530768588, "learning_rate": 4.1629311754996526e-06, "loss": 0.4896, "step": 11045 }, { "epoch": 0.5960822405698559, "grad_norm": 1.2529437056820838, "learning_rate": 4.162202755541177e-06, "loss": 0.424, "step": 11046 }, { "epoch": 0.5961362041983703, "grad_norm": 1.078663730478356, "learning_rate": 4.161474374032343e-06, "loss": 0.6525, "step": 11047 }, { "epoch": 0.5961901678268847, "grad_norm": 0.7096034667796915, "learning_rate": 4.160746030994091e-06, "loss": 0.2438, "step": 11048 }, { "epoch": 0.596244131455399, "grad_norm": 1.009539121957056, "learning_rate": 4.160017726447352e-06, "loss": 0.5328, "step": 11049 }, { "epoch": 0.5962980950839134, "grad_norm": 1.0927938745605676, "learning_rate": 4.159289460413055e-06, "loss": 0.4793, "step": 11050 }, { "epoch": 0.5963520587124278, "grad_norm": 0.944710588803073, "learning_rate": 4.158561232912134e-06, "loss": 0.5425, "step": 11051 }, { "epoch": 0.5964060223409422, "grad_norm": 1.0289983325038785, "learning_rate": 4.157833043965519e-06, "loss": 0.5512, "step": 11052 }, { "epoch": 0.5964599859694566, "grad_norm": 0.9774162841184549, "learning_rate": 4.157104893594138e-06, "loss": 0.5631, "step": 11053 }, { "epoch": 0.596513949597971, "grad_norm": 1.227700178299568, "learning_rate": 4.156376781818919e-06, "loss": 0.5685, "step": 11054 }, { "epoch": 0.5965679132264854, "grad_norm": 1.076849690762014, "learning_rate": 4.15564870866079e-06, "loss": 0.4993, "step": 11055 }, { "epoch": 0.5966218768549997, "grad_norm": 0.9867993404681888, "learning_rate": 4.154920674140677e-06, "loss": 0.369, "step": 11056 }, { "epoch": 0.5966758404835141, "grad_norm": 1.092557576359699, "learning_rate": 4.1541926782794996e-06, "loss": 0.4592, "step": 11057 }, { "epoch": 0.5967298041120285, "grad_norm": 0.7978266195198108, "learning_rate": 4.153464721098186e-06, "loss": 0.3689, "step": 11058 }, { "epoch": 0.5967837677405429, "grad_norm": 1.2542044294996602, "learning_rate": 4.152736802617658e-06, "loss": 0.6038, "step": 11059 }, { "epoch": 0.5968377313690573, "grad_norm": 0.896623635364973, "learning_rate": 4.152008922858833e-06, "loss": 0.3393, "step": 11060 }, { "epoch": 0.5968916949975717, "grad_norm": 0.9930942647356851, "learning_rate": 4.151281081842634e-06, "loss": 0.4753, "step": 11061 }, { "epoch": 0.5969456586260861, "grad_norm": 1.1102018972534509, "learning_rate": 4.150553279589981e-06, "loss": 0.502, "step": 11062 }, { "epoch": 0.5969996222546003, "grad_norm": 0.9762769152249332, "learning_rate": 4.14982551612179e-06, "loss": 0.4787, "step": 11063 }, { "epoch": 0.5970535858831147, "grad_norm": 1.0425334979906387, "learning_rate": 4.149097791458977e-06, "loss": 0.5247, "step": 11064 }, { "epoch": 0.5971075495116291, "grad_norm": 0.9631213831060839, "learning_rate": 4.148370105622459e-06, "loss": 0.4293, "step": 11065 }, { "epoch": 0.5971615131401435, "grad_norm": 1.0976698658961441, "learning_rate": 4.14764245863315e-06, "loss": 0.5572, "step": 11066 }, { "epoch": 0.5972154767686579, "grad_norm": 1.02968370095937, "learning_rate": 4.146914850511963e-06, "loss": 0.4131, "step": 11067 }, { "epoch": 0.5972694403971723, "grad_norm": 1.1296387326346575, "learning_rate": 4.14618728127981e-06, "loss": 0.6333, "step": 11068 }, { "epoch": 0.5973234040256867, "grad_norm": 1.1236492104062799, "learning_rate": 4.145459750957603e-06, "loss": 0.5888, "step": 11069 }, { "epoch": 0.597377367654201, "grad_norm": 0.964787254159277, "learning_rate": 4.144732259566253e-06, "loss": 0.5035, "step": 11070 }, { "epoch": 0.5974313312827154, "grad_norm": 1.3613712403618075, "learning_rate": 4.144004807126666e-06, "loss": 0.5237, "step": 11071 }, { "epoch": 0.5974852949112298, "grad_norm": 1.024765481324011, "learning_rate": 4.14327739365975e-06, "loss": 0.451, "step": 11072 }, { "epoch": 0.5975392585397442, "grad_norm": 1.0978986366597525, "learning_rate": 4.142550019186413e-06, "loss": 0.6871, "step": 11073 }, { "epoch": 0.5975932221682586, "grad_norm": 1.2786828735229758, "learning_rate": 4.14182268372756e-06, "loss": 0.5489, "step": 11074 }, { "epoch": 0.597647185796773, "grad_norm": 1.1012207074807878, "learning_rate": 4.141095387304096e-06, "loss": 0.4988, "step": 11075 }, { "epoch": 0.5977011494252874, "grad_norm": 0.8669211180704689, "learning_rate": 4.140368129936923e-06, "loss": 0.3829, "step": 11076 }, { "epoch": 0.5977551130538017, "grad_norm": 1.1906745704057233, "learning_rate": 4.139640911646945e-06, "loss": 0.6462, "step": 11077 }, { "epoch": 0.5978090766823161, "grad_norm": 1.0056326209611721, "learning_rate": 4.138913732455061e-06, "loss": 0.3973, "step": 11078 }, { "epoch": 0.5978630403108305, "grad_norm": 1.0589156739683308, "learning_rate": 4.138186592382172e-06, "loss": 0.4976, "step": 11079 }, { "epoch": 0.5979170039393449, "grad_norm": 1.1146954849022401, "learning_rate": 4.137459491449175e-06, "loss": 0.5627, "step": 11080 }, { "epoch": 0.5979709675678593, "grad_norm": 0.9292250575332416, "learning_rate": 4.136732429676969e-06, "loss": 0.4187, "step": 11081 }, { "epoch": 0.5980249311963737, "grad_norm": 0.8994965637779799, "learning_rate": 4.13600540708645e-06, "loss": 0.3583, "step": 11082 }, { "epoch": 0.5980788948248881, "grad_norm": 1.0064433197629927, "learning_rate": 4.135278423698517e-06, "loss": 0.4779, "step": 11083 }, { "epoch": 0.5981328584534024, "grad_norm": 0.7803995534844482, "learning_rate": 4.13455147953406e-06, "loss": 0.3904, "step": 11084 }, { "epoch": 0.5981868220819168, "grad_norm": 1.0762144639325626, "learning_rate": 4.1338245746139715e-06, "loss": 0.5632, "step": 11085 }, { "epoch": 0.5982407857104312, "grad_norm": 0.8302205804069329, "learning_rate": 4.133097708959146e-06, "loss": 0.3815, "step": 11086 }, { "epoch": 0.5982947493389456, "grad_norm": 1.1696139085767192, "learning_rate": 4.132370882590473e-06, "loss": 0.5876, "step": 11087 }, { "epoch": 0.59834871296746, "grad_norm": 0.8558123368203989, "learning_rate": 4.131644095528842e-06, "loss": 0.339, "step": 11088 }, { "epoch": 0.5984026765959743, "grad_norm": 1.0514783904370386, "learning_rate": 4.130917347795145e-06, "loss": 0.6101, "step": 11089 }, { "epoch": 0.5984566402244887, "grad_norm": 1.0520827567189084, "learning_rate": 4.130190639410264e-06, "loss": 0.4474, "step": 11090 }, { "epoch": 0.598510603853003, "grad_norm": 1.00140140364508, "learning_rate": 4.1294639703950904e-06, "loss": 0.4425, "step": 11091 }, { "epoch": 0.5985645674815174, "grad_norm": 1.4009344594298783, "learning_rate": 4.1287373407705055e-06, "loss": 0.5972, "step": 11092 }, { "epoch": 0.5986185311100318, "grad_norm": 0.8548737380540052, "learning_rate": 4.1280107505573955e-06, "loss": 0.3991, "step": 11093 }, { "epoch": 0.5986724947385462, "grad_norm": 1.0938090088163557, "learning_rate": 4.127284199776643e-06, "loss": 0.5764, "step": 11094 }, { "epoch": 0.5987264583670606, "grad_norm": 0.9655497813858812, "learning_rate": 4.1265576884491285e-06, "loss": 0.4183, "step": 11095 }, { "epoch": 0.598780421995575, "grad_norm": 1.1308355362410822, "learning_rate": 4.1258312165957345e-06, "loss": 0.5124, "step": 11096 }, { "epoch": 0.5988343856240894, "grad_norm": 1.037591215472827, "learning_rate": 4.1251047842373404e-06, "loss": 0.455, "step": 11097 }, { "epoch": 0.5988883492526037, "grad_norm": 1.0136892349564408, "learning_rate": 4.124378391394826e-06, "loss": 0.4771, "step": 11098 }, { "epoch": 0.5989423128811181, "grad_norm": 1.024007181548137, "learning_rate": 4.123652038089063e-06, "loss": 0.4521, "step": 11099 }, { "epoch": 0.5989962765096325, "grad_norm": 0.9965188050366893, "learning_rate": 4.122925724340933e-06, "loss": 0.4217, "step": 11100 }, { "epoch": 0.5990502401381469, "grad_norm": 1.1604751137164162, "learning_rate": 4.122199450171307e-06, "loss": 0.5727, "step": 11101 }, { "epoch": 0.5991042037666613, "grad_norm": 0.9532426435535281, "learning_rate": 4.121473215601063e-06, "loss": 0.5046, "step": 11102 }, { "epoch": 0.5991581673951757, "grad_norm": 1.0199831398009973, "learning_rate": 4.120747020651071e-06, "loss": 0.5105, "step": 11103 }, { "epoch": 0.5992121310236901, "grad_norm": 1.1549708449921712, "learning_rate": 4.120020865342204e-06, "loss": 0.5553, "step": 11104 }, { "epoch": 0.5992660946522044, "grad_norm": 1.0487922200195547, "learning_rate": 4.119294749695332e-06, "loss": 0.6123, "step": 11105 }, { "epoch": 0.5993200582807188, "grad_norm": 1.2835984534530167, "learning_rate": 4.118568673731323e-06, "loss": 0.5415, "step": 11106 }, { "epoch": 0.5993740219092332, "grad_norm": 1.038710219444791, "learning_rate": 4.117842637471045e-06, "loss": 0.516, "step": 11107 }, { "epoch": 0.5994279855377476, "grad_norm": 1.102123358937724, "learning_rate": 4.117116640935368e-06, "loss": 0.5308, "step": 11108 }, { "epoch": 0.599481949166262, "grad_norm": 1.1227984213177935, "learning_rate": 4.116390684145155e-06, "loss": 0.4512, "step": 11109 }, { "epoch": 0.5995359127947764, "grad_norm": 1.4802644942721959, "learning_rate": 4.115664767121272e-06, "loss": 0.559, "step": 11110 }, { "epoch": 0.5995898764232908, "grad_norm": 0.9165525815853295, "learning_rate": 4.114938889884583e-06, "loss": 0.4082, "step": 11111 }, { "epoch": 0.599643840051805, "grad_norm": 1.113086088166424, "learning_rate": 4.114213052455951e-06, "loss": 0.4559, "step": 11112 }, { "epoch": 0.5996978036803194, "grad_norm": 1.1129592296582869, "learning_rate": 4.1134872548562345e-06, "loss": 0.4443, "step": 11113 }, { "epoch": 0.5997517673088338, "grad_norm": 1.0356120912083353, "learning_rate": 4.112761497106294e-06, "loss": 0.4075, "step": 11114 }, { "epoch": 0.5998057309373482, "grad_norm": 1.1899599388708462, "learning_rate": 4.112035779226993e-06, "loss": 0.5519, "step": 11115 }, { "epoch": 0.5998596945658626, "grad_norm": 0.918012525697702, "learning_rate": 4.1113101012391835e-06, "loss": 0.4563, "step": 11116 }, { "epoch": 0.599913658194377, "grad_norm": 1.0632704671898991, "learning_rate": 4.110584463163725e-06, "loss": 0.5708, "step": 11117 }, { "epoch": 0.5999676218228913, "grad_norm": 1.2562895987150515, "learning_rate": 4.109858865021474e-06, "loss": 0.6185, "step": 11118 }, { "epoch": 0.6000215854514057, "grad_norm": 0.9052682160717231, "learning_rate": 4.1091333068332836e-06, "loss": 0.5206, "step": 11119 }, { "epoch": 0.6000755490799201, "grad_norm": 0.8562382794630141, "learning_rate": 4.108407788620007e-06, "loss": 0.363, "step": 11120 }, { "epoch": 0.6001295127084345, "grad_norm": 0.8699963359042175, "learning_rate": 4.1076823104024966e-06, "loss": 0.4149, "step": 11121 }, { "epoch": 0.6001834763369489, "grad_norm": 1.0942963162580295, "learning_rate": 4.106956872201603e-06, "loss": 0.5174, "step": 11122 }, { "epoch": 0.6002374399654633, "grad_norm": 0.9254979158562309, "learning_rate": 4.106231474038177e-06, "loss": 0.3445, "step": 11123 }, { "epoch": 0.6002914035939777, "grad_norm": 1.204049852626815, "learning_rate": 4.105506115933067e-06, "loss": 0.5187, "step": 11124 }, { "epoch": 0.600345367222492, "grad_norm": 1.0054002505406736, "learning_rate": 4.104780797907121e-06, "loss": 0.4222, "step": 11125 }, { "epoch": 0.6003993308510064, "grad_norm": 1.106748688314904, "learning_rate": 4.104055519981184e-06, "loss": 0.4265, "step": 11126 }, { "epoch": 0.6004532944795208, "grad_norm": 1.2019503518294674, "learning_rate": 4.103330282176101e-06, "loss": 0.6356, "step": 11127 }, { "epoch": 0.6005072581080352, "grad_norm": 0.8831026764362648, "learning_rate": 4.102605084512717e-06, "loss": 0.4038, "step": 11128 }, { "epoch": 0.6005612217365496, "grad_norm": 0.9192533864005417, "learning_rate": 4.101879927011875e-06, "loss": 0.4076, "step": 11129 }, { "epoch": 0.600615185365064, "grad_norm": 0.9696283404685454, "learning_rate": 4.101154809694417e-06, "loss": 0.4362, "step": 11130 }, { "epoch": 0.6006691489935784, "grad_norm": 0.9486057472646162, "learning_rate": 4.100429732581184e-06, "loss": 0.3748, "step": 11131 }, { "epoch": 0.6007231126220927, "grad_norm": 0.926499981709349, "learning_rate": 4.099704695693014e-06, "loss": 0.437, "step": 11132 }, { "epoch": 0.6007770762506071, "grad_norm": 1.0451168739242667, "learning_rate": 4.098979699050746e-06, "loss": 0.6189, "step": 11133 }, { "epoch": 0.6008310398791215, "grad_norm": 0.8783930523802455, "learning_rate": 4.098254742675216e-06, "loss": 0.4574, "step": 11134 }, { "epoch": 0.6008850035076359, "grad_norm": 0.9890280154005837, "learning_rate": 4.097529826587262e-06, "loss": 0.508, "step": 11135 }, { "epoch": 0.6009389671361502, "grad_norm": 1.2559583508585266, "learning_rate": 4.096804950807717e-06, "loss": 0.5074, "step": 11136 }, { "epoch": 0.6009929307646646, "grad_norm": 0.7828456042890977, "learning_rate": 4.096080115357416e-06, "loss": 0.2782, "step": 11137 }, { "epoch": 0.601046894393179, "grad_norm": 1.0178313358440585, "learning_rate": 4.095355320257192e-06, "loss": 0.482, "step": 11138 }, { "epoch": 0.6011008580216933, "grad_norm": 0.7592620423274989, "learning_rate": 4.094630565527875e-06, "loss": 0.295, "step": 11139 }, { "epoch": 0.6011548216502077, "grad_norm": 0.9720104620195043, "learning_rate": 4.093905851190295e-06, "loss": 0.5308, "step": 11140 }, { "epoch": 0.6012087852787221, "grad_norm": 0.8580692696298982, "learning_rate": 4.093181177265282e-06, "loss": 0.4205, "step": 11141 }, { "epoch": 0.6012627489072365, "grad_norm": 1.168052625981166, "learning_rate": 4.092456543773662e-06, "loss": 0.4426, "step": 11142 }, { "epoch": 0.6013167125357509, "grad_norm": 0.9856739201489465, "learning_rate": 4.091731950736267e-06, "loss": 0.5008, "step": 11143 }, { "epoch": 0.6013706761642653, "grad_norm": 0.8610228738080602, "learning_rate": 4.091007398173914e-06, "loss": 0.3731, "step": 11144 }, { "epoch": 0.6014246397927797, "grad_norm": 1.0039160803527343, "learning_rate": 4.090282886107435e-06, "loss": 0.447, "step": 11145 }, { "epoch": 0.601478603421294, "grad_norm": 1.0989502001758973, "learning_rate": 4.089558414557649e-06, "loss": 0.5375, "step": 11146 }, { "epoch": 0.6015325670498084, "grad_norm": 1.023059728165426, "learning_rate": 4.088833983545381e-06, "loss": 0.4267, "step": 11147 }, { "epoch": 0.6015865306783228, "grad_norm": 0.902676412882943, "learning_rate": 4.088109593091447e-06, "loss": 0.371, "step": 11148 }, { "epoch": 0.6016404943068372, "grad_norm": 1.3501117122312973, "learning_rate": 4.087385243216673e-06, "loss": 0.5264, "step": 11149 }, { "epoch": 0.6016944579353516, "grad_norm": 0.9844987660451298, "learning_rate": 4.0866609339418725e-06, "loss": 0.4929, "step": 11150 }, { "epoch": 0.601748421563866, "grad_norm": 0.867126965495711, "learning_rate": 4.085936665287866e-06, "loss": 0.2327, "step": 11151 }, { "epoch": 0.6018023851923804, "grad_norm": 0.9341213020281629, "learning_rate": 4.085212437275467e-06, "loss": 0.4569, "step": 11152 }, { "epoch": 0.6018563488208947, "grad_norm": 0.9921423548551549, "learning_rate": 4.084488249925496e-06, "loss": 0.4025, "step": 11153 }, { "epoch": 0.6019103124494091, "grad_norm": 0.8241113122216805, "learning_rate": 4.08376410325876e-06, "loss": 0.3608, "step": 11154 }, { "epoch": 0.6019642760779235, "grad_norm": 1.0623265571884324, "learning_rate": 4.083039997296074e-06, "loss": 0.4376, "step": 11155 }, { "epoch": 0.6020182397064379, "grad_norm": 1.094521984738121, "learning_rate": 4.082315932058251e-06, "loss": 0.4961, "step": 11156 }, { "epoch": 0.6020722033349523, "grad_norm": 1.2371559182834297, "learning_rate": 4.0815919075661005e-06, "loss": 0.5837, "step": 11157 }, { "epoch": 0.6021261669634667, "grad_norm": 1.0657022523809114, "learning_rate": 4.080867923840431e-06, "loss": 0.5608, "step": 11158 }, { "epoch": 0.602180130591981, "grad_norm": 0.9645873010833275, "learning_rate": 4.080143980902054e-06, "loss": 0.4554, "step": 11159 }, { "epoch": 0.6022340942204953, "grad_norm": 0.978124875082468, "learning_rate": 4.079420078771773e-06, "loss": 0.4249, "step": 11160 }, { "epoch": 0.6022880578490097, "grad_norm": 0.9928962415851953, "learning_rate": 4.078696217470393e-06, "loss": 0.5787, "step": 11161 }, { "epoch": 0.6023420214775241, "grad_norm": 0.8782434874175966, "learning_rate": 4.077972397018719e-06, "loss": 0.3386, "step": 11162 }, { "epoch": 0.6023959851060385, "grad_norm": 1.1792525495011137, "learning_rate": 4.077248617437557e-06, "loss": 0.5919, "step": 11163 }, { "epoch": 0.6024499487345529, "grad_norm": 1.0055279733841165, "learning_rate": 4.076524878747706e-06, "loss": 0.5882, "step": 11164 }, { "epoch": 0.6025039123630673, "grad_norm": 1.310291546384891, "learning_rate": 4.075801180969968e-06, "loss": 0.5572, "step": 11165 }, { "epoch": 0.6025578759915817, "grad_norm": 1.1568031256848617, "learning_rate": 4.0750775241251436e-06, "loss": 0.4854, "step": 11166 }, { "epoch": 0.602611839620096, "grad_norm": 1.0775031331520695, "learning_rate": 4.074353908234032e-06, "loss": 0.4417, "step": 11167 }, { "epoch": 0.6026658032486104, "grad_norm": 1.1198670721037167, "learning_rate": 4.073630333317429e-06, "loss": 0.5437, "step": 11168 }, { "epoch": 0.6027197668771248, "grad_norm": 0.830277299231369, "learning_rate": 4.072906799396131e-06, "loss": 0.3909, "step": 11169 }, { "epoch": 0.6027737305056392, "grad_norm": 1.1974260041234048, "learning_rate": 4.072183306490934e-06, "loss": 0.4742, "step": 11170 }, { "epoch": 0.6028276941341536, "grad_norm": 1.041167079402187, "learning_rate": 4.071459854622631e-06, "loss": 0.558, "step": 11171 }, { "epoch": 0.602881657762668, "grad_norm": 0.9705731832509925, "learning_rate": 4.070736443812016e-06, "loss": 0.4601, "step": 11172 }, { "epoch": 0.6029356213911824, "grad_norm": 0.9558034544135781, "learning_rate": 4.070013074079879e-06, "loss": 0.5728, "step": 11173 }, { "epoch": 0.6029895850196967, "grad_norm": 1.2057312226077863, "learning_rate": 4.069289745447014e-06, "loss": 0.6178, "step": 11174 }, { "epoch": 0.6030435486482111, "grad_norm": 1.1842008051411952, "learning_rate": 4.068566457934203e-06, "loss": 0.4608, "step": 11175 }, { "epoch": 0.6030975122767255, "grad_norm": 1.0712938234486276, "learning_rate": 4.067843211562241e-06, "loss": 0.4298, "step": 11176 }, { "epoch": 0.6031514759052399, "grad_norm": 0.8413818085047241, "learning_rate": 4.067120006351911e-06, "loss": 0.3308, "step": 11177 }, { "epoch": 0.6032054395337543, "grad_norm": 0.9530148131435706, "learning_rate": 4.066396842323999e-06, "loss": 0.3881, "step": 11178 }, { "epoch": 0.6032594031622687, "grad_norm": 1.0553866418160338, "learning_rate": 4.065673719499292e-06, "loss": 0.4634, "step": 11179 }, { "epoch": 0.6033133667907831, "grad_norm": 1.0936798041969351, "learning_rate": 4.064950637898571e-06, "loss": 0.4742, "step": 11180 }, { "epoch": 0.6033673304192974, "grad_norm": 1.0645548660066442, "learning_rate": 4.06422759754262e-06, "loss": 0.4355, "step": 11181 }, { "epoch": 0.6034212940478118, "grad_norm": 1.11366095245495, "learning_rate": 4.063504598452218e-06, "loss": 0.5224, "step": 11182 }, { "epoch": 0.6034752576763261, "grad_norm": 0.979854962647158, "learning_rate": 4.062781640648144e-06, "loss": 0.4482, "step": 11183 }, { "epoch": 0.6035292213048405, "grad_norm": 0.8925371881412476, "learning_rate": 4.062058724151179e-06, "loss": 0.448, "step": 11184 }, { "epoch": 0.6035831849333549, "grad_norm": 0.9980494150895586, "learning_rate": 4.061335848982098e-06, "loss": 0.4539, "step": 11185 }, { "epoch": 0.6036371485618693, "grad_norm": 1.0742368810953316, "learning_rate": 4.06061301516168e-06, "loss": 0.5282, "step": 11186 }, { "epoch": 0.6036911121903836, "grad_norm": 1.0711779630734388, "learning_rate": 4.059890222710698e-06, "loss": 0.5313, "step": 11187 }, { "epoch": 0.603745075818898, "grad_norm": 1.0236163220939785, "learning_rate": 4.059167471649925e-06, "loss": 0.619, "step": 11188 }, { "epoch": 0.6037990394474124, "grad_norm": 1.0974989171775191, "learning_rate": 4.058444762000136e-06, "loss": 0.4331, "step": 11189 }, { "epoch": 0.6038530030759268, "grad_norm": 0.8455528999996135, "learning_rate": 4.057722093782101e-06, "loss": 0.3037, "step": 11190 }, { "epoch": 0.6039069667044412, "grad_norm": 1.1080437931127518, "learning_rate": 4.056999467016589e-06, "loss": 0.5232, "step": 11191 }, { "epoch": 0.6039609303329556, "grad_norm": 0.9852243743975097, "learning_rate": 4.056276881724371e-06, "loss": 0.565, "step": 11192 }, { "epoch": 0.60401489396147, "grad_norm": 0.8364547995679051, "learning_rate": 4.055554337926213e-06, "loss": 0.3522, "step": 11193 }, { "epoch": 0.6040688575899843, "grad_norm": 1.1448954804611817, "learning_rate": 4.054831835642885e-06, "loss": 0.7172, "step": 11194 }, { "epoch": 0.6041228212184987, "grad_norm": 0.8578572920096436, "learning_rate": 4.05410937489515e-06, "loss": 0.3283, "step": 11195 }, { "epoch": 0.6041767848470131, "grad_norm": 0.9489393845096502, "learning_rate": 4.053386955703772e-06, "loss": 0.416, "step": 11196 }, { "epoch": 0.6042307484755275, "grad_norm": 0.9614522477109886, "learning_rate": 4.052664578089514e-06, "loss": 0.7413, "step": 11197 }, { "epoch": 0.6042847121040419, "grad_norm": 0.9779521516273103, "learning_rate": 4.051942242073138e-06, "loss": 0.4593, "step": 11198 }, { "epoch": 0.6043386757325563, "grad_norm": 1.0346443853426976, "learning_rate": 4.051219947675407e-06, "loss": 0.3502, "step": 11199 }, { "epoch": 0.6043926393610707, "grad_norm": 0.9582755719812408, "learning_rate": 4.050497694917077e-06, "loss": 0.4681, "step": 11200 }, { "epoch": 0.604446602989585, "grad_norm": 0.9785315924871287, "learning_rate": 4.04977548381891e-06, "loss": 0.6152, "step": 11201 }, { "epoch": 0.6045005666180994, "grad_norm": 1.0953920484232824, "learning_rate": 4.049053314401659e-06, "loss": 0.4702, "step": 11202 }, { "epoch": 0.6045545302466138, "grad_norm": 0.836031336137012, "learning_rate": 4.048331186686083e-06, "loss": 0.4372, "step": 11203 }, { "epoch": 0.6046084938751282, "grad_norm": 0.9624096658059166, "learning_rate": 4.047609100692934e-06, "loss": 0.4611, "step": 11204 }, { "epoch": 0.6046624575036426, "grad_norm": 0.8876849527379131, "learning_rate": 4.046887056442968e-06, "loss": 0.3707, "step": 11205 }, { "epoch": 0.604716421132157, "grad_norm": 1.0003480444159765, "learning_rate": 4.046165053956936e-06, "loss": 0.5174, "step": 11206 }, { "epoch": 0.6047703847606714, "grad_norm": 1.178171876768977, "learning_rate": 4.04544309325559e-06, "loss": 0.5236, "step": 11207 }, { "epoch": 0.6048243483891856, "grad_norm": 0.9582173598008707, "learning_rate": 4.044721174359679e-06, "loss": 0.3862, "step": 11208 }, { "epoch": 0.6048783120177, "grad_norm": 0.7561630217454359, "learning_rate": 4.043999297289952e-06, "loss": 0.3351, "step": 11209 }, { "epoch": 0.6049322756462144, "grad_norm": 0.9727632021534015, "learning_rate": 4.043277462067156e-06, "loss": 0.4063, "step": 11210 }, { "epoch": 0.6049862392747288, "grad_norm": 1.1143997843851183, "learning_rate": 4.042555668712037e-06, "loss": 0.5563, "step": 11211 }, { "epoch": 0.6050402029032432, "grad_norm": 1.0074117849287367, "learning_rate": 4.0418339172453415e-06, "loss": 0.3397, "step": 11212 }, { "epoch": 0.6050941665317576, "grad_norm": 1.1368392369018505, "learning_rate": 4.041112207687812e-06, "loss": 0.5316, "step": 11213 }, { "epoch": 0.605148130160272, "grad_norm": 0.8113507982873562, "learning_rate": 4.040390540060191e-06, "loss": 0.3837, "step": 11214 }, { "epoch": 0.6052020937887863, "grad_norm": 0.9625014928434787, "learning_rate": 4.039668914383223e-06, "loss": 0.3434, "step": 11215 }, { "epoch": 0.6052560574173007, "grad_norm": 0.8931921044733949, "learning_rate": 4.038947330677645e-06, "loss": 0.379, "step": 11216 }, { "epoch": 0.6053100210458151, "grad_norm": 0.7900778083025959, "learning_rate": 4.038225788964196e-06, "loss": 0.2854, "step": 11217 }, { "epoch": 0.6053639846743295, "grad_norm": 1.138471180529493, "learning_rate": 4.0375042892636175e-06, "loss": 0.5592, "step": 11218 }, { "epoch": 0.6054179483028439, "grad_norm": 1.2745133822451102, "learning_rate": 4.036782831596641e-06, "loss": 0.5865, "step": 11219 }, { "epoch": 0.6054719119313583, "grad_norm": 0.8390068216580323, "learning_rate": 4.036061415984004e-06, "loss": 0.4344, "step": 11220 }, { "epoch": 0.6055258755598727, "grad_norm": 0.9161442200485773, "learning_rate": 4.035340042446443e-06, "loss": 0.4559, "step": 11221 }, { "epoch": 0.605579839188387, "grad_norm": 0.8694574772102286, "learning_rate": 4.034618711004688e-06, "loss": 0.4036, "step": 11222 }, { "epoch": 0.6056338028169014, "grad_norm": 1.05057122255945, "learning_rate": 4.033897421679472e-06, "loss": 0.3781, "step": 11223 }, { "epoch": 0.6056877664454158, "grad_norm": 1.0697882861140928, "learning_rate": 4.0331761744915255e-06, "loss": 0.4211, "step": 11224 }, { "epoch": 0.6057417300739302, "grad_norm": 1.031551210494694, "learning_rate": 4.0324549694615774e-06, "loss": 0.4547, "step": 11225 }, { "epoch": 0.6057956937024446, "grad_norm": 1.0243567610927513, "learning_rate": 4.0317338066103574e-06, "loss": 0.5351, "step": 11226 }, { "epoch": 0.605849657330959, "grad_norm": 0.9196717714318754, "learning_rate": 4.031012685958591e-06, "loss": 0.472, "step": 11227 }, { "epoch": 0.6059036209594734, "grad_norm": 0.9704136689534313, "learning_rate": 4.0302916075270035e-06, "loss": 0.4621, "step": 11228 }, { "epoch": 0.6059575845879877, "grad_norm": 0.8834547003530765, "learning_rate": 4.029570571336322e-06, "loss": 0.5115, "step": 11229 }, { "epoch": 0.606011548216502, "grad_norm": 0.9287278887285458, "learning_rate": 4.028849577407267e-06, "loss": 0.3732, "step": 11230 }, { "epoch": 0.6060655118450164, "grad_norm": 0.8865994125435757, "learning_rate": 4.028128625760562e-06, "loss": 0.4696, "step": 11231 }, { "epoch": 0.6061194754735308, "grad_norm": 1.1979263696097264, "learning_rate": 4.027407716416927e-06, "loss": 0.6034, "step": 11232 }, { "epoch": 0.6061734391020452, "grad_norm": 1.1846333399617008, "learning_rate": 4.026686849397081e-06, "loss": 0.6374, "step": 11233 }, { "epoch": 0.6062274027305596, "grad_norm": 0.8927272867057675, "learning_rate": 4.025966024721745e-06, "loss": 0.4132, "step": 11234 }, { "epoch": 0.606281366359074, "grad_norm": 0.9280986887753325, "learning_rate": 4.025245242411635e-06, "loss": 0.4912, "step": 11235 }, { "epoch": 0.6063353299875883, "grad_norm": 1.0552384756896573, "learning_rate": 4.024524502487468e-06, "loss": 0.4228, "step": 11236 }, { "epoch": 0.6063892936161027, "grad_norm": 0.8725498666984352, "learning_rate": 4.023803804969955e-06, "loss": 0.4231, "step": 11237 }, { "epoch": 0.6064432572446171, "grad_norm": 0.9727194629391644, "learning_rate": 4.023083149879815e-06, "loss": 0.4395, "step": 11238 }, { "epoch": 0.6064972208731315, "grad_norm": 1.1081886015991267, "learning_rate": 4.022362537237755e-06, "loss": 0.4966, "step": 11239 }, { "epoch": 0.6065511845016459, "grad_norm": 1.138666482144874, "learning_rate": 4.021641967064491e-06, "loss": 0.464, "step": 11240 }, { "epoch": 0.6066051481301603, "grad_norm": 1.162865405313143, "learning_rate": 4.020921439380728e-06, "loss": 0.561, "step": 11241 }, { "epoch": 0.6066591117586747, "grad_norm": 0.8368755219871664, "learning_rate": 4.020200954207178e-06, "loss": 0.3409, "step": 11242 }, { "epoch": 0.606713075387189, "grad_norm": 1.3050285815486153, "learning_rate": 4.01948051156455e-06, "loss": 0.4797, "step": 11243 }, { "epoch": 0.6067670390157034, "grad_norm": 0.9251036156066423, "learning_rate": 4.018760111473547e-06, "loss": 0.3434, "step": 11244 }, { "epoch": 0.6068210026442178, "grad_norm": 1.1043664981818386, "learning_rate": 4.018039753954875e-06, "loss": 0.4941, "step": 11245 }, { "epoch": 0.6068749662727322, "grad_norm": 0.9070523821739639, "learning_rate": 4.0173194390292366e-06, "loss": 0.3461, "step": 11246 }, { "epoch": 0.6069289299012466, "grad_norm": 0.8274946098131355, "learning_rate": 4.0165991667173365e-06, "loss": 0.4236, "step": 11247 }, { "epoch": 0.606982893529761, "grad_norm": 1.0246469991913778, "learning_rate": 4.015878937039875e-06, "loss": 0.4298, "step": 11248 }, { "epoch": 0.6070368571582753, "grad_norm": 0.9601031049789809, "learning_rate": 4.0151587500175545e-06, "loss": 0.4337, "step": 11249 }, { "epoch": 0.6070908207867897, "grad_norm": 1.1381191133275408, "learning_rate": 4.0144386056710705e-06, "loss": 0.6221, "step": 11250 }, { "epoch": 0.6071447844153041, "grad_norm": 0.8835880995541003, "learning_rate": 4.013718504021121e-06, "loss": 0.3881, "step": 11251 }, { "epoch": 0.6071987480438185, "grad_norm": 0.945169545507159, "learning_rate": 4.012998445088404e-06, "loss": 0.3469, "step": 11252 }, { "epoch": 0.6072527116723329, "grad_norm": 1.0768625747641027, "learning_rate": 4.0122784288936145e-06, "loss": 0.4484, "step": 11253 }, { "epoch": 0.6073066753008473, "grad_norm": 0.8217469368707649, "learning_rate": 4.011558455457446e-06, "loss": 0.3629, "step": 11254 }, { "epoch": 0.6073606389293617, "grad_norm": 1.0332280458208443, "learning_rate": 4.010838524800591e-06, "loss": 0.4356, "step": 11255 }, { "epoch": 0.6074146025578759, "grad_norm": 0.950266112529411, "learning_rate": 4.010118636943743e-06, "loss": 0.4377, "step": 11256 }, { "epoch": 0.6074685661863903, "grad_norm": 0.990426265626684, "learning_rate": 4.00939879190759e-06, "loss": 0.3485, "step": 11257 }, { "epoch": 0.6075225298149047, "grad_norm": 0.878715608521457, "learning_rate": 4.008678989712822e-06, "loss": 0.4263, "step": 11258 }, { "epoch": 0.6075764934434191, "grad_norm": 1.0054317251161695, "learning_rate": 4.007959230380125e-06, "loss": 0.5952, "step": 11259 }, { "epoch": 0.6076304570719335, "grad_norm": 0.9309127020208549, "learning_rate": 4.007239513930189e-06, "loss": 0.5628, "step": 11260 }, { "epoch": 0.6076844207004479, "grad_norm": 1.0287018787504136, "learning_rate": 4.006519840383697e-06, "loss": 0.4848, "step": 11261 }, { "epoch": 0.6077383843289623, "grad_norm": 1.097060620967825, "learning_rate": 4.0058002097613344e-06, "loss": 0.4431, "step": 11262 }, { "epoch": 0.6077923479574766, "grad_norm": 0.9122367920296621, "learning_rate": 4.0050806220837845e-06, "loss": 0.4725, "step": 11263 }, { "epoch": 0.607846311585991, "grad_norm": 0.9532191921072232, "learning_rate": 4.004361077371727e-06, "loss": 0.4694, "step": 11264 }, { "epoch": 0.6079002752145054, "grad_norm": 1.2189006182858988, "learning_rate": 4.003641575645842e-06, "loss": 0.5467, "step": 11265 }, { "epoch": 0.6079542388430198, "grad_norm": 1.0129672702730645, "learning_rate": 4.002922116926812e-06, "loss": 0.5341, "step": 11266 }, { "epoch": 0.6080082024715342, "grad_norm": 0.9196009372406767, "learning_rate": 4.0022027012353125e-06, "loss": 0.4533, "step": 11267 }, { "epoch": 0.6080621661000486, "grad_norm": 0.9665268030127374, "learning_rate": 4.00148332859202e-06, "loss": 0.3507, "step": 11268 }, { "epoch": 0.608116129728563, "grad_norm": 1.2051561613292476, "learning_rate": 4.000763999017612e-06, "loss": 0.4799, "step": 11269 }, { "epoch": 0.6081700933570773, "grad_norm": 1.049088487036102, "learning_rate": 4.000044712532761e-06, "loss": 0.4496, "step": 11270 }, { "epoch": 0.6082240569855917, "grad_norm": 0.9317509571044644, "learning_rate": 3.999325469158141e-06, "loss": 0.4395, "step": 11271 }, { "epoch": 0.6082780206141061, "grad_norm": 1.128061141658857, "learning_rate": 3.9986062689144225e-06, "loss": 0.527, "step": 11272 }, { "epoch": 0.6083319842426205, "grad_norm": 1.0142018782937168, "learning_rate": 3.997887111822277e-06, "loss": 0.536, "step": 11273 }, { "epoch": 0.6083859478711349, "grad_norm": 1.108691741727035, "learning_rate": 3.997167997902373e-06, "loss": 0.5016, "step": 11274 }, { "epoch": 0.6084399114996493, "grad_norm": 0.9486114255410073, "learning_rate": 3.99644892717538e-06, "loss": 0.3954, "step": 11275 }, { "epoch": 0.6084938751281637, "grad_norm": 0.8068982327668329, "learning_rate": 3.9957298996619646e-06, "loss": 0.5372, "step": 11276 }, { "epoch": 0.608547838756678, "grad_norm": 0.8989840810385241, "learning_rate": 3.9950109153827934e-06, "loss": 0.4155, "step": 11277 }, { "epoch": 0.6086018023851923, "grad_norm": 0.9784852480309489, "learning_rate": 3.994291974358526e-06, "loss": 0.4356, "step": 11278 }, { "epoch": 0.6086557660137067, "grad_norm": 1.0910800867370731, "learning_rate": 3.99357307660983e-06, "loss": 0.5441, "step": 11279 }, { "epoch": 0.6087097296422211, "grad_norm": 1.1021726474859521, "learning_rate": 3.992854222157365e-06, "loss": 0.5106, "step": 11280 }, { "epoch": 0.6087636932707355, "grad_norm": 0.9179396649128452, "learning_rate": 3.992135411021793e-06, "loss": 0.3822, "step": 11281 }, { "epoch": 0.6088176568992499, "grad_norm": 1.0017417596112426, "learning_rate": 3.991416643223773e-06, "loss": 0.4303, "step": 11282 }, { "epoch": 0.6088716205277643, "grad_norm": 1.0908943941028675, "learning_rate": 3.990697918783964e-06, "loss": 0.3898, "step": 11283 }, { "epoch": 0.6089255841562786, "grad_norm": 0.9333723012773977, "learning_rate": 3.989979237723023e-06, "loss": 0.3628, "step": 11284 }, { "epoch": 0.608979547784793, "grad_norm": 1.1999071680927131, "learning_rate": 3.989260600061602e-06, "loss": 0.3874, "step": 11285 }, { "epoch": 0.6090335114133074, "grad_norm": 1.2242123147858932, "learning_rate": 3.988542005820358e-06, "loss": 0.5281, "step": 11286 }, { "epoch": 0.6090874750418218, "grad_norm": 0.9149691471030462, "learning_rate": 3.9878234550199445e-06, "loss": 0.3081, "step": 11287 }, { "epoch": 0.6091414386703362, "grad_norm": 1.0853354841819784, "learning_rate": 3.987104947681012e-06, "loss": 0.6947, "step": 11288 }, { "epoch": 0.6091954022988506, "grad_norm": 0.8720304927225979, "learning_rate": 3.986386483824212e-06, "loss": 0.3764, "step": 11289 }, { "epoch": 0.609249365927365, "grad_norm": 0.9241425905475186, "learning_rate": 3.985668063470194e-06, "loss": 0.4038, "step": 11290 }, { "epoch": 0.6093033295558793, "grad_norm": 0.7620496624255647, "learning_rate": 3.984949686639608e-06, "loss": 0.293, "step": 11291 }, { "epoch": 0.6093572931843937, "grad_norm": 0.9510751321047908, "learning_rate": 3.984231353353097e-06, "loss": 0.4689, "step": 11292 }, { "epoch": 0.6094112568129081, "grad_norm": 1.1450204389556082, "learning_rate": 3.9835130636313094e-06, "loss": 0.4885, "step": 11293 }, { "epoch": 0.6094652204414225, "grad_norm": 0.9832060345427881, "learning_rate": 3.982794817494889e-06, "loss": 0.4229, "step": 11294 }, { "epoch": 0.6095191840699369, "grad_norm": 0.8454917103125924, "learning_rate": 3.982076614964476e-06, "loss": 0.3147, "step": 11295 }, { "epoch": 0.6095731476984513, "grad_norm": 1.07712178224362, "learning_rate": 3.981358456060717e-06, "loss": 0.4777, "step": 11296 }, { "epoch": 0.6096271113269657, "grad_norm": 0.8990886360331498, "learning_rate": 3.980640340804249e-06, "loss": 0.3585, "step": 11297 }, { "epoch": 0.60968107495548, "grad_norm": 0.8947749828679066, "learning_rate": 3.979922269215715e-06, "loss": 0.4773, "step": 11298 }, { "epoch": 0.6097350385839944, "grad_norm": 1.022364391588578, "learning_rate": 3.97920424131575e-06, "loss": 0.3734, "step": 11299 }, { "epoch": 0.6097890022125088, "grad_norm": 1.0316290097436582, "learning_rate": 3.97848625712499e-06, "loss": 0.401, "step": 11300 }, { "epoch": 0.6098429658410232, "grad_norm": 0.9267509025853948, "learning_rate": 3.977768316664073e-06, "loss": 0.4116, "step": 11301 }, { "epoch": 0.6098969294695376, "grad_norm": 0.9505863823076416, "learning_rate": 3.977050419953634e-06, "loss": 0.3948, "step": 11302 }, { "epoch": 0.609950893098052, "grad_norm": 0.7493025113473181, "learning_rate": 3.9763325670143034e-06, "loss": 0.3892, "step": 11303 }, { "epoch": 0.6100048567265663, "grad_norm": 1.1288665355762224, "learning_rate": 3.975614757866716e-06, "loss": 0.4823, "step": 11304 }, { "epoch": 0.6100588203550806, "grad_norm": 1.1464619682545463, "learning_rate": 3.9748969925315e-06, "loss": 0.4859, "step": 11305 }, { "epoch": 0.610112783983595, "grad_norm": 0.7375602844699507, "learning_rate": 3.974179271029286e-06, "loss": 0.334, "step": 11306 }, { "epoch": 0.6101667476121094, "grad_norm": 1.3288730669615159, "learning_rate": 3.9734615933807e-06, "loss": 0.52, "step": 11307 }, { "epoch": 0.6102207112406238, "grad_norm": 0.9940409182134845, "learning_rate": 3.972743959606371e-06, "loss": 0.5769, "step": 11308 }, { "epoch": 0.6102746748691382, "grad_norm": 0.9266444884063763, "learning_rate": 3.972026369726924e-06, "loss": 0.45, "step": 11309 }, { "epoch": 0.6103286384976526, "grad_norm": 0.9482166234084654, "learning_rate": 3.971308823762983e-06, "loss": 0.5219, "step": 11310 }, { "epoch": 0.610382602126167, "grad_norm": 1.0241842221373116, "learning_rate": 3.97059132173517e-06, "loss": 0.5138, "step": 11311 }, { "epoch": 0.6104365657546813, "grad_norm": 0.9318765932640736, "learning_rate": 3.969873863664111e-06, "loss": 0.5197, "step": 11312 }, { "epoch": 0.6104905293831957, "grad_norm": 1.1533524464942064, "learning_rate": 3.969156449570421e-06, "loss": 0.5235, "step": 11313 }, { "epoch": 0.6105444930117101, "grad_norm": 1.104617232147211, "learning_rate": 3.9684390794747204e-06, "loss": 0.4893, "step": 11314 }, { "epoch": 0.6105984566402245, "grad_norm": 1.1844849853542643, "learning_rate": 3.96772175339763e-06, "loss": 0.4907, "step": 11315 }, { "epoch": 0.6106524202687389, "grad_norm": 1.2470168125160055, "learning_rate": 3.967004471359763e-06, "loss": 0.4528, "step": 11316 }, { "epoch": 0.6107063838972533, "grad_norm": 0.9134421342224568, "learning_rate": 3.966287233381736e-06, "loss": 0.5379, "step": 11317 }, { "epoch": 0.6107603475257676, "grad_norm": 1.175179464143768, "learning_rate": 3.965570039484166e-06, "loss": 0.5529, "step": 11318 }, { "epoch": 0.610814311154282, "grad_norm": 0.921827071728991, "learning_rate": 3.9648528896876625e-06, "loss": 0.4175, "step": 11319 }, { "epoch": 0.6108682747827964, "grad_norm": 1.0057821878975541, "learning_rate": 3.964135784012838e-06, "loss": 0.4846, "step": 11320 }, { "epoch": 0.6109222384113108, "grad_norm": 0.9838859749531169, "learning_rate": 3.963418722480302e-06, "loss": 0.473, "step": 11321 }, { "epoch": 0.6109762020398252, "grad_norm": 1.040033944748168, "learning_rate": 3.9627017051106665e-06, "loss": 0.5781, "step": 11322 }, { "epoch": 0.6110301656683396, "grad_norm": 0.9999801329215182, "learning_rate": 3.961984731924535e-06, "loss": 0.4678, "step": 11323 }, { "epoch": 0.611084129296854, "grad_norm": 0.8500727626235808, "learning_rate": 3.9612678029425185e-06, "loss": 0.3925, "step": 11324 }, { "epoch": 0.6111380929253682, "grad_norm": 1.1472490616729791, "learning_rate": 3.96055091818522e-06, "loss": 0.5328, "step": 11325 }, { "epoch": 0.6111920565538826, "grad_norm": 1.3022803009646637, "learning_rate": 3.9598340776732426e-06, "loss": 0.4849, "step": 11326 }, { "epoch": 0.611246020182397, "grad_norm": 0.7861098314050887, "learning_rate": 3.9591172814271895e-06, "loss": 0.2796, "step": 11327 }, { "epoch": 0.6112999838109114, "grad_norm": 1.0270197924999247, "learning_rate": 3.9584005294676645e-06, "loss": 0.5157, "step": 11328 }, { "epoch": 0.6113539474394258, "grad_norm": 1.0044299850429308, "learning_rate": 3.957683821815263e-06, "loss": 0.4847, "step": 11329 }, { "epoch": 0.6114079110679402, "grad_norm": 0.9869121887662532, "learning_rate": 3.95696715849059e-06, "loss": 0.3961, "step": 11330 }, { "epoch": 0.6114618746964546, "grad_norm": 0.9573290077054151, "learning_rate": 3.956250539514238e-06, "loss": 0.4011, "step": 11331 }, { "epoch": 0.6115158383249689, "grad_norm": 1.0723433862467202, "learning_rate": 3.955533964906808e-06, "loss": 0.4481, "step": 11332 }, { "epoch": 0.6115698019534833, "grad_norm": 0.9566390207463136, "learning_rate": 3.95481743468889e-06, "loss": 0.477, "step": 11333 }, { "epoch": 0.6116237655819977, "grad_norm": 0.9678578739383089, "learning_rate": 3.954100948881081e-06, "loss": 0.4287, "step": 11334 }, { "epoch": 0.6116777292105121, "grad_norm": 1.0341314444805665, "learning_rate": 3.953384507503973e-06, "loss": 0.3936, "step": 11335 }, { "epoch": 0.6117316928390265, "grad_norm": 1.049427669531221, "learning_rate": 3.952668110578155e-06, "loss": 0.6302, "step": 11336 }, { "epoch": 0.6117856564675409, "grad_norm": 1.102549125071285, "learning_rate": 3.951951758124221e-06, "loss": 0.5986, "step": 11337 }, { "epoch": 0.6118396200960553, "grad_norm": 0.9558056077294073, "learning_rate": 3.951235450162756e-06, "loss": 0.397, "step": 11338 }, { "epoch": 0.6118935837245696, "grad_norm": 1.1043143411560228, "learning_rate": 3.9505191867143525e-06, "loss": 0.496, "step": 11339 }, { "epoch": 0.611947547353084, "grad_norm": 0.9033914830563022, "learning_rate": 3.949802967799591e-06, "loss": 0.336, "step": 11340 }, { "epoch": 0.6120015109815984, "grad_norm": 1.1288104514135848, "learning_rate": 3.949086793439058e-06, "loss": 0.4377, "step": 11341 }, { "epoch": 0.6120554746101128, "grad_norm": 1.0305232594037712, "learning_rate": 3.948370663653338e-06, "loss": 0.4801, "step": 11342 }, { "epoch": 0.6121094382386272, "grad_norm": 1.1164455975398875, "learning_rate": 3.9476545784630125e-06, "loss": 0.5317, "step": 11343 }, { "epoch": 0.6121634018671416, "grad_norm": 1.0240971335785225, "learning_rate": 3.946938537888663e-06, "loss": 0.4814, "step": 11344 }, { "epoch": 0.612217365495656, "grad_norm": 1.1278119673777605, "learning_rate": 3.946222541950871e-06, "loss": 0.4445, "step": 11345 }, { "epoch": 0.6122713291241703, "grad_norm": 1.1860059635101987, "learning_rate": 3.945506590670211e-06, "loss": 0.4612, "step": 11346 }, { "epoch": 0.6123252927526847, "grad_norm": 0.9259185709228956, "learning_rate": 3.944790684067263e-06, "loss": 0.4434, "step": 11347 }, { "epoch": 0.612379256381199, "grad_norm": 1.2722907658905114, "learning_rate": 3.944074822162603e-06, "loss": 0.558, "step": 11348 }, { "epoch": 0.6124332200097135, "grad_norm": 1.0705340437569038, "learning_rate": 3.943359004976802e-06, "loss": 0.5351, "step": 11349 }, { "epoch": 0.6124871836382279, "grad_norm": 0.9711782636713336, "learning_rate": 3.942643232530439e-06, "loss": 0.5056, "step": 11350 }, { "epoch": 0.6125411472667422, "grad_norm": 0.94954848956124, "learning_rate": 3.941927504844082e-06, "loss": 0.4982, "step": 11351 }, { "epoch": 0.6125951108952566, "grad_norm": 0.7447161519533517, "learning_rate": 3.9412118219383025e-06, "loss": 0.2876, "step": 11352 }, { "epoch": 0.6126490745237709, "grad_norm": 0.7304063072846637, "learning_rate": 3.940496183833672e-06, "loss": 0.2848, "step": 11353 }, { "epoch": 0.6127030381522853, "grad_norm": 1.1220299937164737, "learning_rate": 3.939780590550756e-06, "loss": 0.4673, "step": 11354 }, { "epoch": 0.6127570017807997, "grad_norm": 1.2590760863479662, "learning_rate": 3.939065042110122e-06, "loss": 0.4112, "step": 11355 }, { "epoch": 0.6128109654093141, "grad_norm": 1.1391221164871697, "learning_rate": 3.938349538532336e-06, "loss": 0.7944, "step": 11356 }, { "epoch": 0.6128649290378285, "grad_norm": 0.9200274034782757, "learning_rate": 3.937634079837961e-06, "loss": 0.3994, "step": 11357 }, { "epoch": 0.6129188926663429, "grad_norm": 1.0678471602267254, "learning_rate": 3.936918666047562e-06, "loss": 0.5252, "step": 11358 }, { "epoch": 0.6129728562948573, "grad_norm": 0.7951082511264022, "learning_rate": 3.9362032971817e-06, "loss": 0.3044, "step": 11359 }, { "epoch": 0.6130268199233716, "grad_norm": 0.8574973323391991, "learning_rate": 3.935487973260937e-06, "loss": 0.348, "step": 11360 }, { "epoch": 0.613080783551886, "grad_norm": 0.8524268238533316, "learning_rate": 3.934772694305829e-06, "loss": 0.3456, "step": 11361 }, { "epoch": 0.6131347471804004, "grad_norm": 1.2041623870486018, "learning_rate": 3.934057460336935e-06, "loss": 0.5385, "step": 11362 }, { "epoch": 0.6131887108089148, "grad_norm": 1.1360069684638865, "learning_rate": 3.933342271374813e-06, "loss": 0.648, "step": 11363 }, { "epoch": 0.6132426744374292, "grad_norm": 1.1425654876444762, "learning_rate": 3.932627127440015e-06, "loss": 0.4447, "step": 11364 }, { "epoch": 0.6132966380659436, "grad_norm": 0.870169580811279, "learning_rate": 3.9319120285530985e-06, "loss": 0.3058, "step": 11365 }, { "epoch": 0.613350601694458, "grad_norm": 1.0835800513759921, "learning_rate": 3.931196974734615e-06, "loss": 0.622, "step": 11366 }, { "epoch": 0.6134045653229723, "grad_norm": 1.183276211146257, "learning_rate": 3.930481966005115e-06, "loss": 0.585, "step": 11367 }, { "epoch": 0.6134585289514867, "grad_norm": 1.1415271785443588, "learning_rate": 3.9297670023851505e-06, "loss": 0.413, "step": 11368 }, { "epoch": 0.6135124925800011, "grad_norm": 1.0587290888220424, "learning_rate": 3.929052083895268e-06, "loss": 0.4679, "step": 11369 }, { "epoch": 0.6135664562085155, "grad_norm": 1.0787767415357277, "learning_rate": 3.928337210556015e-06, "loss": 0.4086, "step": 11370 }, { "epoch": 0.6136204198370299, "grad_norm": 0.9412736754068449, "learning_rate": 3.92762238238794e-06, "loss": 0.3181, "step": 11371 }, { "epoch": 0.6136743834655443, "grad_norm": 0.9659676813436732, "learning_rate": 3.926907599411586e-06, "loss": 0.4165, "step": 11372 }, { "epoch": 0.6137283470940587, "grad_norm": 1.3078832925827035, "learning_rate": 3.926192861647496e-06, "loss": 0.6013, "step": 11373 }, { "epoch": 0.6137823107225729, "grad_norm": 0.8160400340412286, "learning_rate": 3.9254781691162145e-06, "loss": 0.3615, "step": 11374 }, { "epoch": 0.6138362743510873, "grad_norm": 0.9626993194917328, "learning_rate": 3.924763521838282e-06, "loss": 0.4029, "step": 11375 }, { "epoch": 0.6138902379796017, "grad_norm": 1.034955304457003, "learning_rate": 3.924048919834236e-06, "loss": 0.4818, "step": 11376 }, { "epoch": 0.6139442016081161, "grad_norm": 1.154633534537917, "learning_rate": 3.923334363124617e-06, "loss": 0.5516, "step": 11377 }, { "epoch": 0.6139981652366305, "grad_norm": 1.0324987596350268, "learning_rate": 3.922619851729962e-06, "loss": 0.4121, "step": 11378 }, { "epoch": 0.6140521288651449, "grad_norm": 0.8402165325146292, "learning_rate": 3.921905385670805e-06, "loss": 0.4406, "step": 11379 }, { "epoch": 0.6141060924936593, "grad_norm": 0.8553724450173478, "learning_rate": 3.921190964967682e-06, "loss": 0.2889, "step": 11380 }, { "epoch": 0.6141600561221736, "grad_norm": 0.9188794730076365, "learning_rate": 3.920476589641129e-06, "loss": 0.4676, "step": 11381 }, { "epoch": 0.614214019750688, "grad_norm": 1.2150269024533378, "learning_rate": 3.9197622597116726e-06, "loss": 0.6813, "step": 11382 }, { "epoch": 0.6142679833792024, "grad_norm": 0.9875817965702387, "learning_rate": 3.919047975199846e-06, "loss": 0.4119, "step": 11383 }, { "epoch": 0.6143219470077168, "grad_norm": 1.0636522390771102, "learning_rate": 3.918333736126176e-06, "loss": 0.5994, "step": 11384 }, { "epoch": 0.6143759106362312, "grad_norm": 0.9014549650377265, "learning_rate": 3.917619542511195e-06, "loss": 0.3782, "step": 11385 }, { "epoch": 0.6144298742647456, "grad_norm": 1.0987780576560622, "learning_rate": 3.916905394375426e-06, "loss": 0.5547, "step": 11386 }, { "epoch": 0.6144838378932599, "grad_norm": 0.9307305231399055, "learning_rate": 3.916191291739398e-06, "loss": 0.4329, "step": 11387 }, { "epoch": 0.6145378015217743, "grad_norm": 0.7540857507865159, "learning_rate": 3.91547723462363e-06, "loss": 0.2783, "step": 11388 }, { "epoch": 0.6145917651502887, "grad_norm": 1.0433548829536956, "learning_rate": 3.914763223048646e-06, "loss": 0.584, "step": 11389 }, { "epoch": 0.6146457287788031, "grad_norm": 0.8278777772606793, "learning_rate": 3.914049257034971e-06, "loss": 0.3441, "step": 11390 }, { "epoch": 0.6146996924073175, "grad_norm": 1.1716801881399788, "learning_rate": 3.913335336603123e-06, "loss": 0.4449, "step": 11391 }, { "epoch": 0.6147536560358319, "grad_norm": 1.0817451277724341, "learning_rate": 3.912621461773619e-06, "loss": 0.3719, "step": 11392 }, { "epoch": 0.6148076196643463, "grad_norm": 0.9996869358622376, "learning_rate": 3.911907632566979e-06, "loss": 0.3916, "step": 11393 }, { "epoch": 0.6148615832928606, "grad_norm": 1.2747924363523395, "learning_rate": 3.911193849003719e-06, "loss": 0.5107, "step": 11394 }, { "epoch": 0.614915546921375, "grad_norm": 1.1357981867112188, "learning_rate": 3.910480111104352e-06, "loss": 0.4713, "step": 11395 }, { "epoch": 0.6149695105498894, "grad_norm": 1.4094727035356098, "learning_rate": 3.909766418889393e-06, "loss": 0.5831, "step": 11396 }, { "epoch": 0.6150234741784038, "grad_norm": 1.1255776665657375, "learning_rate": 3.9090527723793545e-06, "loss": 0.6059, "step": 11397 }, { "epoch": 0.6150774378069181, "grad_norm": 1.353476561975284, "learning_rate": 3.908339171594746e-06, "loss": 0.5773, "step": 11398 }, { "epoch": 0.6151314014354325, "grad_norm": 0.8935846918933449, "learning_rate": 3.90762561655608e-06, "loss": 0.4178, "step": 11399 }, { "epoch": 0.6151853650639469, "grad_norm": 0.9385304526931998, "learning_rate": 3.906912107283861e-06, "loss": 0.4162, "step": 11400 }, { "epoch": 0.6152393286924612, "grad_norm": 1.0131944832670243, "learning_rate": 3.906198643798599e-06, "loss": 0.4113, "step": 11401 }, { "epoch": 0.6152932923209756, "grad_norm": 0.7997704549820639, "learning_rate": 3.905485226120797e-06, "loss": 0.3259, "step": 11402 }, { "epoch": 0.61534725594949, "grad_norm": 1.0826006397685939, "learning_rate": 3.904771854270961e-06, "loss": 0.4991, "step": 11403 }, { "epoch": 0.6154012195780044, "grad_norm": 1.2134416407939599, "learning_rate": 3.904058528269593e-06, "loss": 0.5557, "step": 11404 }, { "epoch": 0.6154551832065188, "grad_norm": 1.0225824935604995, "learning_rate": 3.903345248137197e-06, "loss": 0.4356, "step": 11405 }, { "epoch": 0.6155091468350332, "grad_norm": 1.023086088323714, "learning_rate": 3.902632013894271e-06, "loss": 0.4712, "step": 11406 }, { "epoch": 0.6155631104635476, "grad_norm": 1.3351196117211133, "learning_rate": 3.901918825561315e-06, "loss": 0.4948, "step": 11407 }, { "epoch": 0.6156170740920619, "grad_norm": 1.0496823885962128, "learning_rate": 3.901205683158828e-06, "loss": 0.5264, "step": 11408 }, { "epoch": 0.6156710377205763, "grad_norm": 0.9826439793469758, "learning_rate": 3.900492586707304e-06, "loss": 0.6002, "step": 11409 }, { "epoch": 0.6157250013490907, "grad_norm": 0.808257663058477, "learning_rate": 3.899779536227239e-06, "loss": 0.3979, "step": 11410 }, { "epoch": 0.6157789649776051, "grad_norm": 0.8216850049701667, "learning_rate": 3.899066531739127e-06, "loss": 0.4501, "step": 11411 }, { "epoch": 0.6158329286061195, "grad_norm": 0.7943792179157726, "learning_rate": 3.89835357326346e-06, "loss": 0.2852, "step": 11412 }, { "epoch": 0.6158868922346339, "grad_norm": 0.9521426765254029, "learning_rate": 3.89764066082073e-06, "loss": 0.4051, "step": 11413 }, { "epoch": 0.6159408558631483, "grad_norm": 1.0656886205552583, "learning_rate": 3.8969277944314275e-06, "loss": 0.5224, "step": 11414 }, { "epoch": 0.6159948194916626, "grad_norm": 0.9467145612543645, "learning_rate": 3.896214974116041e-06, "loss": 0.4484, "step": 11415 }, { "epoch": 0.616048783120177, "grad_norm": 1.0500070992365433, "learning_rate": 3.895502199895055e-06, "loss": 0.5246, "step": 11416 }, { "epoch": 0.6161027467486914, "grad_norm": 0.9435026002494481, "learning_rate": 3.894789471788957e-06, "loss": 0.3799, "step": 11417 }, { "epoch": 0.6161567103772058, "grad_norm": 1.0582754191481545, "learning_rate": 3.894076789818232e-06, "loss": 0.4585, "step": 11418 }, { "epoch": 0.6162106740057202, "grad_norm": 0.9713238340541142, "learning_rate": 3.893364154003362e-06, "loss": 0.5277, "step": 11419 }, { "epoch": 0.6162646376342346, "grad_norm": 0.9682246208696998, "learning_rate": 3.892651564364829e-06, "loss": 0.3068, "step": 11420 }, { "epoch": 0.616318601262749, "grad_norm": 0.9863308735562876, "learning_rate": 3.891939020923115e-06, "loss": 0.4454, "step": 11421 }, { "epoch": 0.6163725648912632, "grad_norm": 1.0710780247419451, "learning_rate": 3.891226523698699e-06, "loss": 0.5203, "step": 11422 }, { "epoch": 0.6164265285197776, "grad_norm": 0.9910546340473102, "learning_rate": 3.890514072712057e-06, "loss": 0.4533, "step": 11423 }, { "epoch": 0.616480492148292, "grad_norm": 0.9325890643103935, "learning_rate": 3.8898016679836685e-06, "loss": 0.3863, "step": 11424 }, { "epoch": 0.6165344557768064, "grad_norm": 1.1363982421534473, "learning_rate": 3.889089309534006e-06, "loss": 0.5264, "step": 11425 }, { "epoch": 0.6165884194053208, "grad_norm": 0.9989220282256581, "learning_rate": 3.8883769973835435e-06, "loss": 0.4758, "step": 11426 }, { "epoch": 0.6166423830338352, "grad_norm": 0.9133871461934632, "learning_rate": 3.887664731552755e-06, "loss": 0.4096, "step": 11427 }, { "epoch": 0.6166963466623496, "grad_norm": 1.1683678551750676, "learning_rate": 3.886952512062112e-06, "loss": 0.4642, "step": 11428 }, { "epoch": 0.6167503102908639, "grad_norm": 1.1604436041027186, "learning_rate": 3.886240338932086e-06, "loss": 0.568, "step": 11429 }, { "epoch": 0.6168042739193783, "grad_norm": 0.967325888881782, "learning_rate": 3.885528212183139e-06, "loss": 0.4379, "step": 11430 }, { "epoch": 0.6168582375478927, "grad_norm": 0.919805564126186, "learning_rate": 3.884816131835744e-06, "loss": 0.4255, "step": 11431 }, { "epoch": 0.6169122011764071, "grad_norm": 1.0602917015298179, "learning_rate": 3.884104097910366e-06, "loss": 0.4573, "step": 11432 }, { "epoch": 0.6169661648049215, "grad_norm": 0.923098332264558, "learning_rate": 3.883392110427468e-06, "loss": 0.5404, "step": 11433 }, { "epoch": 0.6170201284334359, "grad_norm": 1.0945801072774433, "learning_rate": 3.882680169407515e-06, "loss": 0.5095, "step": 11434 }, { "epoch": 0.6170740920619503, "grad_norm": 0.9266970306344515, "learning_rate": 3.881968274870968e-06, "loss": 0.4464, "step": 11435 }, { "epoch": 0.6171280556904646, "grad_norm": 1.1850975503395336, "learning_rate": 3.881256426838289e-06, "loss": 0.4836, "step": 11436 }, { "epoch": 0.617182019318979, "grad_norm": 0.968073522372537, "learning_rate": 3.880544625329934e-06, "loss": 0.3456, "step": 11437 }, { "epoch": 0.6172359829474934, "grad_norm": 1.1617521938932986, "learning_rate": 3.8798328703663635e-06, "loss": 0.5371, "step": 11438 }, { "epoch": 0.6172899465760078, "grad_norm": 0.9807574171001371, "learning_rate": 3.879121161968032e-06, "loss": 0.5165, "step": 11439 }, { "epoch": 0.6173439102045222, "grad_norm": 1.0466857776840641, "learning_rate": 3.878409500155398e-06, "loss": 0.6067, "step": 11440 }, { "epoch": 0.6173978738330366, "grad_norm": 0.9986824633950354, "learning_rate": 3.877697884948912e-06, "loss": 0.5418, "step": 11441 }, { "epoch": 0.617451837461551, "grad_norm": 1.1004020969794024, "learning_rate": 3.876986316369028e-06, "loss": 0.5621, "step": 11442 }, { "epoch": 0.6175058010900653, "grad_norm": 0.8397005873392287, "learning_rate": 3.8762747944362e-06, "loss": 0.3675, "step": 11443 }, { "epoch": 0.6175597647185797, "grad_norm": 0.9378993021068696, "learning_rate": 3.875563319170873e-06, "loss": 0.4543, "step": 11444 }, { "epoch": 0.617613728347094, "grad_norm": 1.0046204971321318, "learning_rate": 3.874851890593497e-06, "loss": 0.5213, "step": 11445 }, { "epoch": 0.6176676919756084, "grad_norm": 0.9706273419803746, "learning_rate": 3.87414050872452e-06, "loss": 0.4766, "step": 11446 }, { "epoch": 0.6177216556041228, "grad_norm": 0.9953685792457094, "learning_rate": 3.873429173584387e-06, "loss": 0.4447, "step": 11447 }, { "epoch": 0.6177756192326372, "grad_norm": 0.9604981397911373, "learning_rate": 3.872717885193544e-06, "loss": 0.5047, "step": 11448 }, { "epoch": 0.6178295828611516, "grad_norm": 0.8993117503768179, "learning_rate": 3.872006643572433e-06, "loss": 0.269, "step": 11449 }, { "epoch": 0.6178835464896659, "grad_norm": 1.1723964686671366, "learning_rate": 3.871295448741498e-06, "loss": 0.4847, "step": 11450 }, { "epoch": 0.6179375101181803, "grad_norm": 0.9408961096829096, "learning_rate": 3.870584300721175e-06, "loss": 0.5554, "step": 11451 }, { "epoch": 0.6179914737466947, "grad_norm": 0.8559072370455617, "learning_rate": 3.869873199531906e-06, "loss": 0.4573, "step": 11452 }, { "epoch": 0.6180454373752091, "grad_norm": 1.0202856679161696, "learning_rate": 3.8691621451941285e-06, "loss": 0.4923, "step": 11453 }, { "epoch": 0.6180994010037235, "grad_norm": 1.328097931154329, "learning_rate": 3.8684511377282785e-06, "loss": 0.6606, "step": 11454 }, { "epoch": 0.6181533646322379, "grad_norm": 1.0628884969936774, "learning_rate": 3.867740177154792e-06, "loss": 0.5592, "step": 11455 }, { "epoch": 0.6182073282607522, "grad_norm": 1.0387541442125758, "learning_rate": 3.867029263494104e-06, "loss": 0.4998, "step": 11456 }, { "epoch": 0.6182612918892666, "grad_norm": 0.9587580865184013, "learning_rate": 3.866318396766642e-06, "loss": 0.3968, "step": 11457 }, { "epoch": 0.618315255517781, "grad_norm": 0.7560806714294208, "learning_rate": 3.865607576992841e-06, "loss": 0.2952, "step": 11458 }, { "epoch": 0.6183692191462954, "grad_norm": 0.9997306079514394, "learning_rate": 3.864896804193129e-06, "loss": 0.3599, "step": 11459 }, { "epoch": 0.6184231827748098, "grad_norm": 1.1649511912681678, "learning_rate": 3.864186078387935e-06, "loss": 0.5497, "step": 11460 }, { "epoch": 0.6184771464033242, "grad_norm": 0.9898316178760267, "learning_rate": 3.863475399597687e-06, "loss": 0.4326, "step": 11461 }, { "epoch": 0.6185311100318386, "grad_norm": 0.906481412932685, "learning_rate": 3.862764767842808e-06, "loss": 0.3772, "step": 11462 }, { "epoch": 0.6185850736603529, "grad_norm": 1.0477973505032714, "learning_rate": 3.862054183143727e-06, "loss": 0.3018, "step": 11463 }, { "epoch": 0.6186390372888673, "grad_norm": 1.0614606528497068, "learning_rate": 3.861343645520862e-06, "loss": 0.5207, "step": 11464 }, { "epoch": 0.6186930009173817, "grad_norm": 0.9976053254100097, "learning_rate": 3.860633154994635e-06, "loss": 0.5785, "step": 11465 }, { "epoch": 0.6187469645458961, "grad_norm": 1.03776987200864, "learning_rate": 3.85992271158547e-06, "loss": 0.6092, "step": 11466 }, { "epoch": 0.6188009281744105, "grad_norm": 0.788305797193371, "learning_rate": 3.859212315313782e-06, "loss": 0.3818, "step": 11467 }, { "epoch": 0.6188548918029249, "grad_norm": 1.2241588443619074, "learning_rate": 3.858501966199989e-06, "loss": 0.5175, "step": 11468 }, { "epoch": 0.6189088554314393, "grad_norm": 1.1170619082439357, "learning_rate": 3.85779166426451e-06, "loss": 0.4356, "step": 11469 }, { "epoch": 0.6189628190599535, "grad_norm": 0.9396062236296504, "learning_rate": 3.857081409527758e-06, "loss": 0.4082, "step": 11470 }, { "epoch": 0.6190167826884679, "grad_norm": 0.9578321713087723, "learning_rate": 3.856371202010144e-06, "loss": 0.5265, "step": 11471 }, { "epoch": 0.6190707463169823, "grad_norm": 0.9918621728368521, "learning_rate": 3.855661041732084e-06, "loss": 0.4304, "step": 11472 }, { "epoch": 0.6191247099454967, "grad_norm": 1.0004506619323135, "learning_rate": 3.854950928713987e-06, "loss": 0.4864, "step": 11473 }, { "epoch": 0.6191786735740111, "grad_norm": 1.082242422461479, "learning_rate": 3.854240862976264e-06, "loss": 0.436, "step": 11474 }, { "epoch": 0.6192326372025255, "grad_norm": 1.0581453688847322, "learning_rate": 3.8535308445393195e-06, "loss": 0.4712, "step": 11475 }, { "epoch": 0.6192866008310399, "grad_norm": 0.8274408248305202, "learning_rate": 3.852820873423563e-06, "loss": 0.3285, "step": 11476 }, { "epoch": 0.6193405644595542, "grad_norm": 0.8522411039665704, "learning_rate": 3.852110949649399e-06, "loss": 0.4077, "step": 11477 }, { "epoch": 0.6193945280880686, "grad_norm": 1.064807303019347, "learning_rate": 3.851401073237231e-06, "loss": 0.5056, "step": 11478 }, { "epoch": 0.619448491716583, "grad_norm": 0.9304164998566877, "learning_rate": 3.8506912442074615e-06, "loss": 0.459, "step": 11479 }, { "epoch": 0.6195024553450974, "grad_norm": 1.0238521124548599, "learning_rate": 3.849981462580492e-06, "loss": 0.458, "step": 11480 }, { "epoch": 0.6195564189736118, "grad_norm": 0.8619783848628629, "learning_rate": 3.8492717283767235e-06, "loss": 0.3337, "step": 11481 }, { "epoch": 0.6196103826021262, "grad_norm": 0.9648695344089687, "learning_rate": 3.8485620416165525e-06, "loss": 0.4244, "step": 11482 }, { "epoch": 0.6196643462306406, "grad_norm": 1.0257681974629396, "learning_rate": 3.8478524023203775e-06, "loss": 0.5546, "step": 11483 }, { "epoch": 0.6197183098591549, "grad_norm": 0.951172681978254, "learning_rate": 3.847142810508596e-06, "loss": 0.4347, "step": 11484 }, { "epoch": 0.6197722734876693, "grad_norm": 0.9130287918399902, "learning_rate": 3.8464332662015975e-06, "loss": 0.4301, "step": 11485 }, { "epoch": 0.6198262371161837, "grad_norm": 1.1828126088566135, "learning_rate": 3.845723769419779e-06, "loss": 0.5594, "step": 11486 }, { "epoch": 0.6198802007446981, "grad_norm": 1.0826286496555992, "learning_rate": 3.84501432018353e-06, "loss": 0.5103, "step": 11487 }, { "epoch": 0.6199341643732125, "grad_norm": 1.0644337873115821, "learning_rate": 3.844304918513242e-06, "loss": 0.3847, "step": 11488 }, { "epoch": 0.6199881280017269, "grad_norm": 1.0283876173864241, "learning_rate": 3.843595564429303e-06, "loss": 0.3769, "step": 11489 }, { "epoch": 0.6200420916302413, "grad_norm": 1.0292078668534241, "learning_rate": 3.842886257952102e-06, "loss": 0.5957, "step": 11490 }, { "epoch": 0.6200960552587556, "grad_norm": 1.0296125768024416, "learning_rate": 3.8421769991020255e-06, "loss": 0.3724, "step": 11491 }, { "epoch": 0.62015001888727, "grad_norm": 1.0605237476882372, "learning_rate": 3.8414677878994555e-06, "loss": 0.3722, "step": 11492 }, { "epoch": 0.6202039825157843, "grad_norm": 1.3004397249037534, "learning_rate": 3.840758624364777e-06, "loss": 0.5276, "step": 11493 }, { "epoch": 0.6202579461442987, "grad_norm": 1.0172165784815832, "learning_rate": 3.840049508518373e-06, "loss": 0.3925, "step": 11494 }, { "epoch": 0.6203119097728131, "grad_norm": 1.1777293288787547, "learning_rate": 3.839340440380622e-06, "loss": 0.4593, "step": 11495 }, { "epoch": 0.6203658734013275, "grad_norm": 0.8538588004947577, "learning_rate": 3.838631419971906e-06, "loss": 0.3505, "step": 11496 }, { "epoch": 0.6204198370298419, "grad_norm": 1.2546813991769616, "learning_rate": 3.837922447312601e-06, "loss": 0.5106, "step": 11497 }, { "epoch": 0.6204738006583562, "grad_norm": 1.19454985496936, "learning_rate": 3.837213522423085e-06, "loss": 0.4484, "step": 11498 }, { "epoch": 0.6205277642868706, "grad_norm": 1.0578902677266486, "learning_rate": 3.836504645323732e-06, "loss": 0.4383, "step": 11499 }, { "epoch": 0.620581727915385, "grad_norm": 1.2225484671526736, "learning_rate": 3.835795816034917e-06, "loss": 0.5431, "step": 11500 }, { "epoch": 0.620581727915385, "eval_loss": 0.5394495725631714, "eval_runtime": 162.1491, "eval_samples_per_second": 21.209, "eval_steps_per_second": 0.888, "step": 11500 }, { "epoch": 0.6206356915438994, "grad_norm": 1.0928242331320581, "learning_rate": 3.835087034577011e-06, "loss": 0.6188, "step": 11501 }, { "epoch": 0.6206896551724138, "grad_norm": 1.0426165233477172, "learning_rate": 3.834378300970385e-06, "loss": 0.4243, "step": 11502 }, { "epoch": 0.6207436188009282, "grad_norm": 1.018727726745797, "learning_rate": 3.8336696152354114e-06, "loss": 0.4492, "step": 11503 }, { "epoch": 0.6207975824294426, "grad_norm": 1.062229775389288, "learning_rate": 3.8329609773924555e-06, "loss": 0.4622, "step": 11504 }, { "epoch": 0.6208515460579569, "grad_norm": 1.1672120435524331, "learning_rate": 3.832252387461888e-06, "loss": 0.6572, "step": 11505 }, { "epoch": 0.6209055096864713, "grad_norm": 0.9702766509155669, "learning_rate": 3.83154384546407e-06, "loss": 0.5688, "step": 11506 }, { "epoch": 0.6209594733149857, "grad_norm": 0.8592360899075672, "learning_rate": 3.8308353514193675e-06, "loss": 0.3563, "step": 11507 }, { "epoch": 0.6210134369435001, "grad_norm": 1.2011842094061358, "learning_rate": 3.830126905348144e-06, "loss": 0.5057, "step": 11508 }, { "epoch": 0.6210674005720145, "grad_norm": 1.098551600577112, "learning_rate": 3.8294185072707605e-06, "loss": 0.5195, "step": 11509 }, { "epoch": 0.6211213642005289, "grad_norm": 1.2878262394424043, "learning_rate": 3.828710157207577e-06, "loss": 0.4814, "step": 11510 }, { "epoch": 0.6211753278290433, "grad_norm": 0.8629109397765728, "learning_rate": 3.828001855178951e-06, "loss": 0.4156, "step": 11511 }, { "epoch": 0.6212292914575576, "grad_norm": 1.0924538212688848, "learning_rate": 3.827293601205245e-06, "loss": 0.4687, "step": 11512 }, { "epoch": 0.621283255086072, "grad_norm": 1.0299379912474689, "learning_rate": 3.826585395306808e-06, "loss": 0.4794, "step": 11513 }, { "epoch": 0.6213372187145864, "grad_norm": 0.9461871914481407, "learning_rate": 3.8258772375039986e-06, "loss": 0.5357, "step": 11514 }, { "epoch": 0.6213911823431008, "grad_norm": 1.0261442283498754, "learning_rate": 3.825169127817168e-06, "loss": 0.4804, "step": 11515 }, { "epoch": 0.6214451459716152, "grad_norm": 0.8504956219411951, "learning_rate": 3.824461066266669e-06, "loss": 0.4666, "step": 11516 }, { "epoch": 0.6214991096001296, "grad_norm": 0.8728832273600415, "learning_rate": 3.823753052872853e-06, "loss": 0.3662, "step": 11517 }, { "epoch": 0.621553073228644, "grad_norm": 1.0526580014321663, "learning_rate": 3.823045087656066e-06, "loss": 0.5461, "step": 11518 }, { "epoch": 0.6216070368571582, "grad_norm": 1.0568995136112054, "learning_rate": 3.822337170636662e-06, "loss": 0.4336, "step": 11519 }, { "epoch": 0.6216610004856726, "grad_norm": 1.042633568113139, "learning_rate": 3.821629301834979e-06, "loss": 0.4872, "step": 11520 }, { "epoch": 0.621714964114187, "grad_norm": 1.241596452194854, "learning_rate": 3.820921481271366e-06, "loss": 0.5869, "step": 11521 }, { "epoch": 0.6217689277427014, "grad_norm": 1.0091101443320372, "learning_rate": 3.820213708966167e-06, "loss": 0.4826, "step": 11522 }, { "epoch": 0.6218228913712158, "grad_norm": 1.1996616074585864, "learning_rate": 3.819505984939723e-06, "loss": 0.5847, "step": 11523 }, { "epoch": 0.6218768549997302, "grad_norm": 1.3203179278497292, "learning_rate": 3.818798309212375e-06, "loss": 0.5566, "step": 11524 }, { "epoch": 0.6219308186282445, "grad_norm": 1.0512340690060629, "learning_rate": 3.818090681804463e-06, "loss": 0.5127, "step": 11525 }, { "epoch": 0.6219847822567589, "grad_norm": 0.8009916852283765, "learning_rate": 3.817383102736324e-06, "loss": 0.3141, "step": 11526 }, { "epoch": 0.6220387458852733, "grad_norm": 1.1437517560317418, "learning_rate": 3.8166755720282944e-06, "loss": 0.4351, "step": 11527 }, { "epoch": 0.6220927095137877, "grad_norm": 1.317766066981831, "learning_rate": 3.815968089700709e-06, "loss": 0.5724, "step": 11528 }, { "epoch": 0.6221466731423021, "grad_norm": 0.9348700614452657, "learning_rate": 3.815260655773903e-06, "loss": 0.444, "step": 11529 }, { "epoch": 0.6222006367708165, "grad_norm": 0.9068526737380279, "learning_rate": 3.8145532702682087e-06, "loss": 0.4259, "step": 11530 }, { "epoch": 0.6222546003993309, "grad_norm": 1.1522108505644295, "learning_rate": 3.8138459332039557e-06, "loss": 0.5051, "step": 11531 }, { "epoch": 0.6223085640278452, "grad_norm": 0.9558540264445917, "learning_rate": 3.8131386446014764e-06, "loss": 0.5014, "step": 11532 }, { "epoch": 0.6223625276563596, "grad_norm": 1.1481603827623297, "learning_rate": 3.812431404481094e-06, "loss": 0.3994, "step": 11533 }, { "epoch": 0.622416491284874, "grad_norm": 0.7937764020181572, "learning_rate": 3.8117242128631393e-06, "loss": 0.3154, "step": 11534 }, { "epoch": 0.6224704549133884, "grad_norm": 1.0477472948637327, "learning_rate": 3.811017069767937e-06, "loss": 0.478, "step": 11535 }, { "epoch": 0.6225244185419028, "grad_norm": 1.3814974313038366, "learning_rate": 3.81030997521581e-06, "loss": 0.4965, "step": 11536 }, { "epoch": 0.6225783821704172, "grad_norm": 1.0559092380340673, "learning_rate": 3.809602929227081e-06, "loss": 0.4572, "step": 11537 }, { "epoch": 0.6226323457989316, "grad_norm": 0.9408874081125741, "learning_rate": 3.8088959318220732e-06, "loss": 0.3563, "step": 11538 }, { "epoch": 0.6226863094274458, "grad_norm": 0.9783586702548881, "learning_rate": 3.808188983021106e-06, "loss": 0.4092, "step": 11539 }, { "epoch": 0.6227402730559602, "grad_norm": 1.0623124699118882, "learning_rate": 3.8074820828444956e-06, "loss": 0.659, "step": 11540 }, { "epoch": 0.6227942366844746, "grad_norm": 0.8290981627078367, "learning_rate": 3.8067752313125604e-06, "loss": 0.4249, "step": 11541 }, { "epoch": 0.622848200312989, "grad_norm": 0.7806879860138479, "learning_rate": 3.8060684284456152e-06, "loss": 0.2828, "step": 11542 }, { "epoch": 0.6229021639415034, "grad_norm": 0.8421508172221233, "learning_rate": 3.8053616742639754e-06, "loss": 0.3875, "step": 11543 }, { "epoch": 0.6229561275700178, "grad_norm": 1.1226874465740895, "learning_rate": 3.804654968787953e-06, "loss": 0.6562, "step": 11544 }, { "epoch": 0.6230100911985322, "grad_norm": 0.980367538310028, "learning_rate": 3.8039483120378594e-06, "loss": 0.4623, "step": 11545 }, { "epoch": 0.6230640548270465, "grad_norm": 1.1271966600707952, "learning_rate": 3.8032417040340075e-06, "loss": 0.5555, "step": 11546 }, { "epoch": 0.6231180184555609, "grad_norm": 1.1825399020404714, "learning_rate": 3.8025351447967006e-06, "loss": 0.6318, "step": 11547 }, { "epoch": 0.6231719820840753, "grad_norm": 0.9766405443489511, "learning_rate": 3.801828634346252e-06, "loss": 0.4287, "step": 11548 }, { "epoch": 0.6232259457125897, "grad_norm": 1.1523084417664822, "learning_rate": 3.8011221727029623e-06, "loss": 0.5187, "step": 11549 }, { "epoch": 0.6232799093411041, "grad_norm": 1.2479132252919225, "learning_rate": 3.800415759887137e-06, "loss": 0.5145, "step": 11550 }, { "epoch": 0.6233338729696185, "grad_norm": 0.924546418782295, "learning_rate": 3.7997093959190813e-06, "loss": 0.3334, "step": 11551 }, { "epoch": 0.6233878365981329, "grad_norm": 0.9780000855073663, "learning_rate": 3.7990030808190945e-06, "loss": 0.4709, "step": 11552 }, { "epoch": 0.6234418002266472, "grad_norm": 0.9713783685808701, "learning_rate": 3.7982968146074804e-06, "loss": 0.4029, "step": 11553 }, { "epoch": 0.6234957638551616, "grad_norm": 1.2003220978670202, "learning_rate": 3.7975905973045337e-06, "loss": 0.5824, "step": 11554 }, { "epoch": 0.623549727483676, "grad_norm": 0.9366943475626447, "learning_rate": 3.796884428930553e-06, "loss": 0.4996, "step": 11555 }, { "epoch": 0.6236036911121904, "grad_norm": 0.8945406279443906, "learning_rate": 3.7961783095058347e-06, "loss": 0.3327, "step": 11556 }, { "epoch": 0.6236576547407048, "grad_norm": 0.9688078471149181, "learning_rate": 3.795472239050674e-06, "loss": 0.3942, "step": 11557 }, { "epoch": 0.6237116183692192, "grad_norm": 0.9170887510164533, "learning_rate": 3.7947662175853627e-06, "loss": 0.3205, "step": 11558 }, { "epoch": 0.6237655819977336, "grad_norm": 1.0084836674977515, "learning_rate": 3.7940602451301935e-06, "loss": 0.4654, "step": 11559 }, { "epoch": 0.6238195456262479, "grad_norm": 1.122805904996504, "learning_rate": 3.793354321705458e-06, "loss": 0.4057, "step": 11560 }, { "epoch": 0.6238735092547623, "grad_norm": 0.9506578959961349, "learning_rate": 3.7926484473314427e-06, "loss": 0.4768, "step": 11561 }, { "epoch": 0.6239274728832767, "grad_norm": 0.9091604382347543, "learning_rate": 3.791942622028436e-06, "loss": 0.4445, "step": 11562 }, { "epoch": 0.623981436511791, "grad_norm": 0.8983705326791864, "learning_rate": 3.7912368458167248e-06, "loss": 0.418, "step": 11563 }, { "epoch": 0.6240354001403055, "grad_norm": 0.8401039737429548, "learning_rate": 3.7905311187165915e-06, "loss": 0.3606, "step": 11564 }, { "epoch": 0.6240893637688198, "grad_norm": 0.9302936093242125, "learning_rate": 3.7898254407483228e-06, "loss": 0.3379, "step": 11565 }, { "epoch": 0.6241433273973342, "grad_norm": 0.9538797343565371, "learning_rate": 3.789119811932198e-06, "loss": 0.4008, "step": 11566 }, { "epoch": 0.6241972910258485, "grad_norm": 1.1463706446827544, "learning_rate": 3.788414232288501e-06, "loss": 0.462, "step": 11567 }, { "epoch": 0.6242512546543629, "grad_norm": 1.1583146393320152, "learning_rate": 3.787708701837507e-06, "loss": 0.5341, "step": 11568 }, { "epoch": 0.6243052182828773, "grad_norm": 1.0697244820346914, "learning_rate": 3.7870032205994932e-06, "loss": 0.4715, "step": 11569 }, { "epoch": 0.6243591819113917, "grad_norm": 1.0959630518959362, "learning_rate": 3.7862977885947383e-06, "loss": 0.502, "step": 11570 }, { "epoch": 0.6244131455399061, "grad_norm": 0.9497984371441222, "learning_rate": 3.785592405843518e-06, "loss": 0.4457, "step": 11571 }, { "epoch": 0.6244671091684205, "grad_norm": 1.1821280780503531, "learning_rate": 3.7848870723661034e-06, "loss": 0.5548, "step": 11572 }, { "epoch": 0.6245210727969349, "grad_norm": 1.030132976792053, "learning_rate": 3.784181788182767e-06, "loss": 0.451, "step": 11573 }, { "epoch": 0.6245750364254492, "grad_norm": 1.1111811361507382, "learning_rate": 3.7834765533137806e-06, "loss": 0.5717, "step": 11574 }, { "epoch": 0.6246290000539636, "grad_norm": 0.8147422995366819, "learning_rate": 3.782771367779412e-06, "loss": 0.3314, "step": 11575 }, { "epoch": 0.624682963682478, "grad_norm": 1.0774416585168225, "learning_rate": 3.782066231599929e-06, "loss": 0.5377, "step": 11576 }, { "epoch": 0.6247369273109924, "grad_norm": 1.1165243815721473, "learning_rate": 3.781361144795599e-06, "loss": 0.5664, "step": 11577 }, { "epoch": 0.6247908909395068, "grad_norm": 1.1599817289948917, "learning_rate": 3.7806561073866856e-06, "loss": 0.5419, "step": 11578 }, { "epoch": 0.6248448545680212, "grad_norm": 0.9204214764305291, "learning_rate": 3.7799511193934536e-06, "loss": 0.4776, "step": 11579 }, { "epoch": 0.6248988181965356, "grad_norm": 1.211367137196393, "learning_rate": 3.7792461808361646e-06, "loss": 0.5244, "step": 11580 }, { "epoch": 0.6249527818250499, "grad_norm": 1.1054190769534502, "learning_rate": 3.7785412917350784e-06, "loss": 0.5759, "step": 11581 }, { "epoch": 0.6250067454535643, "grad_norm": 1.0429684465018547, "learning_rate": 3.7778364521104562e-06, "loss": 0.4359, "step": 11582 }, { "epoch": 0.6250607090820787, "grad_norm": 1.0714344919891698, "learning_rate": 3.7771316619825525e-06, "loss": 0.4272, "step": 11583 }, { "epoch": 0.6251146727105931, "grad_norm": 1.059130732047016, "learning_rate": 3.7764269213716265e-06, "loss": 0.4472, "step": 11584 }, { "epoch": 0.6251686363391075, "grad_norm": 1.0573654642325392, "learning_rate": 3.7757222302979325e-06, "loss": 0.4428, "step": 11585 }, { "epoch": 0.6252225999676219, "grad_norm": 0.8975516685406597, "learning_rate": 3.775017588781723e-06, "loss": 0.4552, "step": 11586 }, { "epoch": 0.6252765635961363, "grad_norm": 0.9811041392208626, "learning_rate": 3.774312996843251e-06, "loss": 0.4632, "step": 11587 }, { "epoch": 0.6253305272246505, "grad_norm": 1.0903223695925204, "learning_rate": 3.7736084545027705e-06, "loss": 0.4566, "step": 11588 }, { "epoch": 0.6253844908531649, "grad_norm": 1.1923819753638967, "learning_rate": 3.772903961780524e-06, "loss": 0.6346, "step": 11589 }, { "epoch": 0.6254384544816793, "grad_norm": 0.9354916848440237, "learning_rate": 3.772199518696763e-06, "loss": 0.4485, "step": 11590 }, { "epoch": 0.6254924181101937, "grad_norm": 0.7347372659440651, "learning_rate": 3.7714951252717347e-06, "loss": 0.2314, "step": 11591 }, { "epoch": 0.6255463817387081, "grad_norm": 0.8559321772126645, "learning_rate": 3.7707907815256816e-06, "loss": 0.2799, "step": 11592 }, { "epoch": 0.6256003453672225, "grad_norm": 1.1525961488874417, "learning_rate": 3.77008648747885e-06, "loss": 0.4059, "step": 11593 }, { "epoch": 0.6256543089957368, "grad_norm": 0.9276976353189834, "learning_rate": 3.7693822431514816e-06, "loss": 0.3989, "step": 11594 }, { "epoch": 0.6257082726242512, "grad_norm": 0.9178772225317449, "learning_rate": 3.768678048563814e-06, "loss": 0.4041, "step": 11595 }, { "epoch": 0.6257622362527656, "grad_norm": 1.2488143611067255, "learning_rate": 3.7679739037360895e-06, "loss": 0.5703, "step": 11596 }, { "epoch": 0.62581619988128, "grad_norm": 1.0591032936061235, "learning_rate": 3.767269808688545e-06, "loss": 0.459, "step": 11597 }, { "epoch": 0.6258701635097944, "grad_norm": 0.7958163705623074, "learning_rate": 3.7665657634414177e-06, "loss": 0.3811, "step": 11598 }, { "epoch": 0.6259241271383088, "grad_norm": 1.2330052091864983, "learning_rate": 3.7658617680149413e-06, "loss": 0.5289, "step": 11599 }, { "epoch": 0.6259780907668232, "grad_norm": 1.0478984650886545, "learning_rate": 3.7651578224293496e-06, "loss": 0.5487, "step": 11600 }, { "epoch": 0.6260320543953375, "grad_norm": 0.7845806347612052, "learning_rate": 3.764453926704875e-06, "loss": 0.4793, "step": 11601 }, { "epoch": 0.6260860180238519, "grad_norm": 0.8435117173153428, "learning_rate": 3.763750080861749e-06, "loss": 0.3384, "step": 11602 }, { "epoch": 0.6261399816523663, "grad_norm": 0.9056833438074271, "learning_rate": 3.7630462849202e-06, "loss": 0.4874, "step": 11603 }, { "epoch": 0.6261939452808807, "grad_norm": 1.0203060482471775, "learning_rate": 3.7623425389004544e-06, "loss": 0.5253, "step": 11604 }, { "epoch": 0.6262479089093951, "grad_norm": 0.8984658379203077, "learning_rate": 3.7616388428227402e-06, "loss": 0.3284, "step": 11605 }, { "epoch": 0.6263018725379095, "grad_norm": 0.8403292830617339, "learning_rate": 3.7609351967072827e-06, "loss": 0.3541, "step": 11606 }, { "epoch": 0.6263558361664239, "grad_norm": 1.2006389251135348, "learning_rate": 3.7602316005743043e-06, "loss": 0.5912, "step": 11607 }, { "epoch": 0.6264097997949382, "grad_norm": 0.922217729377116, "learning_rate": 3.7595280544440304e-06, "loss": 0.4611, "step": 11608 }, { "epoch": 0.6264637634234526, "grad_norm": 1.0451672658523248, "learning_rate": 3.758824558336677e-06, "loss": 0.4556, "step": 11609 }, { "epoch": 0.626517727051967, "grad_norm": 1.0518079519448627, "learning_rate": 3.7581211122724647e-06, "loss": 0.4753, "step": 11610 }, { "epoch": 0.6265716906804814, "grad_norm": 1.0395275725651656, "learning_rate": 3.757417716271612e-06, "loss": 0.4022, "step": 11611 }, { "epoch": 0.6266256543089957, "grad_norm": 0.8110092596466567, "learning_rate": 3.756714370354334e-06, "loss": 0.4436, "step": 11612 }, { "epoch": 0.6266796179375101, "grad_norm": 1.0364811668645844, "learning_rate": 3.7560110745408473e-06, "loss": 0.4835, "step": 11613 }, { "epoch": 0.6267335815660245, "grad_norm": 1.007840666062246, "learning_rate": 3.7553078288513646e-06, "loss": 0.4058, "step": 11614 }, { "epoch": 0.6267875451945388, "grad_norm": 1.080112167871475, "learning_rate": 3.754604633306098e-06, "loss": 0.4381, "step": 11615 }, { "epoch": 0.6268415088230532, "grad_norm": 0.7776489531961565, "learning_rate": 3.753901487925258e-06, "loss": 0.2505, "step": 11616 }, { "epoch": 0.6268954724515676, "grad_norm": 1.0771798668283927, "learning_rate": 3.7531983927290527e-06, "loss": 0.5081, "step": 11617 }, { "epoch": 0.626949436080082, "grad_norm": 0.8841763735992666, "learning_rate": 3.75249534773769e-06, "loss": 0.4043, "step": 11618 }, { "epoch": 0.6270033997085964, "grad_norm": 0.996249346525534, "learning_rate": 3.7517923529713782e-06, "loss": 0.3962, "step": 11619 }, { "epoch": 0.6270573633371108, "grad_norm": 0.9461680106798867, "learning_rate": 3.7510894084503192e-06, "loss": 0.3866, "step": 11620 }, { "epoch": 0.6271113269656252, "grad_norm": 1.1548245341890442, "learning_rate": 3.7503865141947194e-06, "loss": 0.4428, "step": 11621 }, { "epoch": 0.6271652905941395, "grad_norm": 1.1337988179244398, "learning_rate": 3.7496836702247785e-06, "loss": 0.5397, "step": 11622 }, { "epoch": 0.6272192542226539, "grad_norm": 1.1702019146193667, "learning_rate": 3.7489808765606982e-06, "loss": 0.4561, "step": 11623 }, { "epoch": 0.6272732178511683, "grad_norm": 1.21364607321981, "learning_rate": 3.7482781332226747e-06, "loss": 0.6952, "step": 11624 }, { "epoch": 0.6273271814796827, "grad_norm": 1.0767799230851904, "learning_rate": 3.7475754402309094e-06, "loss": 0.3205, "step": 11625 }, { "epoch": 0.6273811451081971, "grad_norm": 1.138832281184855, "learning_rate": 3.746872797605595e-06, "loss": 0.523, "step": 11626 }, { "epoch": 0.6274351087367115, "grad_norm": 1.1422907069043178, "learning_rate": 3.746170205366929e-06, "loss": 0.5401, "step": 11627 }, { "epoch": 0.6274890723652259, "grad_norm": 0.9796105119088424, "learning_rate": 3.745467663535103e-06, "loss": 0.3963, "step": 11628 }, { "epoch": 0.6275430359937402, "grad_norm": 0.8546858449491889, "learning_rate": 3.74476517213031e-06, "loss": 0.4562, "step": 11629 }, { "epoch": 0.6275969996222546, "grad_norm": 0.8136013168374662, "learning_rate": 3.7440627311727383e-06, "loss": 0.3517, "step": 11630 }, { "epoch": 0.627650963250769, "grad_norm": 1.1031326340416858, "learning_rate": 3.743360340682578e-06, "loss": 0.4331, "step": 11631 }, { "epoch": 0.6277049268792834, "grad_norm": 0.9879294550014037, "learning_rate": 3.7426580006800174e-06, "loss": 0.5062, "step": 11632 }, { "epoch": 0.6277588905077978, "grad_norm": 0.9691511852109512, "learning_rate": 3.7419557111852413e-06, "loss": 0.3562, "step": 11633 }, { "epoch": 0.6278128541363122, "grad_norm": 1.1109121074475676, "learning_rate": 3.7412534722184336e-06, "loss": 0.5568, "step": 11634 }, { "epoch": 0.6278668177648266, "grad_norm": 1.227783362767662, "learning_rate": 3.740551283799778e-06, "loss": 0.6361, "step": 11635 }, { "epoch": 0.6279207813933408, "grad_norm": 1.0118662703843477, "learning_rate": 3.739849145949459e-06, "loss": 0.4964, "step": 11636 }, { "epoch": 0.6279747450218552, "grad_norm": 1.0274253880676425, "learning_rate": 3.739147058687652e-06, "loss": 0.4158, "step": 11637 }, { "epoch": 0.6280287086503696, "grad_norm": 1.0182225473257707, "learning_rate": 3.738445022034537e-06, "loss": 0.3259, "step": 11638 }, { "epoch": 0.628082672278884, "grad_norm": 1.1090592997009976, "learning_rate": 3.737743036010294e-06, "loss": 0.4758, "step": 11639 }, { "epoch": 0.6281366359073984, "grad_norm": 1.0986149292798815, "learning_rate": 3.7370411006350947e-06, "loss": 0.384, "step": 11640 }, { "epoch": 0.6281905995359128, "grad_norm": 0.972626637905103, "learning_rate": 3.7363392159291157e-06, "loss": 0.3796, "step": 11641 }, { "epoch": 0.6282445631644272, "grad_norm": 1.0386424619811678, "learning_rate": 3.73563738191253e-06, "loss": 0.4509, "step": 11642 }, { "epoch": 0.6282985267929415, "grad_norm": 1.2220456985026242, "learning_rate": 3.7349355986055114e-06, "loss": 0.4609, "step": 11643 }, { "epoch": 0.6283524904214559, "grad_norm": 1.054705295893981, "learning_rate": 3.7342338660282253e-06, "loss": 0.5099, "step": 11644 }, { "epoch": 0.6284064540499703, "grad_norm": 0.9647182094566822, "learning_rate": 3.733532184200841e-06, "loss": 0.4806, "step": 11645 }, { "epoch": 0.6284604176784847, "grad_norm": 0.8836595615388159, "learning_rate": 3.732830553143527e-06, "loss": 0.4165, "step": 11646 }, { "epoch": 0.6285143813069991, "grad_norm": 1.3702280279025734, "learning_rate": 3.7321289728764486e-06, "loss": 0.5697, "step": 11647 }, { "epoch": 0.6285683449355135, "grad_norm": 1.4315060187621809, "learning_rate": 3.7314274434197693e-06, "loss": 0.4328, "step": 11648 }, { "epoch": 0.6286223085640279, "grad_norm": 1.0472382471392156, "learning_rate": 3.7307259647936532e-06, "loss": 0.6339, "step": 11649 }, { "epoch": 0.6286762721925422, "grad_norm": 0.8660328281585147, "learning_rate": 3.730024537018261e-06, "loss": 0.3906, "step": 11650 }, { "epoch": 0.6287302358210566, "grad_norm": 1.2445898764758339, "learning_rate": 3.729323160113751e-06, "loss": 0.4481, "step": 11651 }, { "epoch": 0.628784199449571, "grad_norm": 0.933750700421194, "learning_rate": 3.728621834100283e-06, "loss": 0.3148, "step": 11652 }, { "epoch": 0.6288381630780854, "grad_norm": 1.1723498380488844, "learning_rate": 3.727920558998013e-06, "loss": 0.6755, "step": 11653 }, { "epoch": 0.6288921267065998, "grad_norm": 1.1932719201980875, "learning_rate": 3.727219334827098e-06, "loss": 0.5757, "step": 11654 }, { "epoch": 0.6289460903351142, "grad_norm": 0.9868582756941889, "learning_rate": 3.7265181616076885e-06, "loss": 0.4277, "step": 11655 }, { "epoch": 0.6290000539636286, "grad_norm": 1.0309852601601595, "learning_rate": 3.7258170393599404e-06, "loss": 0.409, "step": 11656 }, { "epoch": 0.6290540175921429, "grad_norm": 0.9173323510012511, "learning_rate": 3.725115968104004e-06, "loss": 0.3147, "step": 11657 }, { "epoch": 0.6291079812206573, "grad_norm": 1.1190774515763384, "learning_rate": 3.724414947860027e-06, "loss": 0.4382, "step": 11658 }, { "epoch": 0.6291619448491717, "grad_norm": 1.199977446228294, "learning_rate": 3.723713978648157e-06, "loss": 0.6637, "step": 11659 }, { "epoch": 0.629215908477686, "grad_norm": 0.8427856822086506, "learning_rate": 3.7230130604885437e-06, "loss": 0.4044, "step": 11660 }, { "epoch": 0.6292698721062004, "grad_norm": 0.731891875933744, "learning_rate": 3.7223121934013306e-06, "loss": 0.3277, "step": 11661 }, { "epoch": 0.6293238357347148, "grad_norm": 1.0048410123749991, "learning_rate": 3.72161137740666e-06, "loss": 0.4143, "step": 11662 }, { "epoch": 0.6293777993632291, "grad_norm": 1.0700409042219783, "learning_rate": 3.7209106125246787e-06, "loss": 0.5134, "step": 11663 }, { "epoch": 0.6294317629917435, "grad_norm": 0.970091099034213, "learning_rate": 3.720209898775522e-06, "loss": 0.4349, "step": 11664 }, { "epoch": 0.6294857266202579, "grad_norm": 1.2023324763135415, "learning_rate": 3.7195092361793306e-06, "loss": 0.6205, "step": 11665 }, { "epoch": 0.6295396902487723, "grad_norm": 1.1679626523955267, "learning_rate": 3.7188086247562427e-06, "loss": 0.624, "step": 11666 }, { "epoch": 0.6295936538772867, "grad_norm": 1.0010094433406067, "learning_rate": 3.7181080645263946e-06, "loss": 0.4779, "step": 11667 }, { "epoch": 0.6296476175058011, "grad_norm": 0.9958824760280602, "learning_rate": 3.717407555509922e-06, "loss": 0.3253, "step": 11668 }, { "epoch": 0.6297015811343155, "grad_norm": 1.0197433857095815, "learning_rate": 3.716707097726957e-06, "loss": 0.4008, "step": 11669 }, { "epoch": 0.6297555447628298, "grad_norm": 1.1055915310681876, "learning_rate": 3.7160066911976334e-06, "loss": 0.6593, "step": 11670 }, { "epoch": 0.6298095083913442, "grad_norm": 1.286844932121823, "learning_rate": 3.715306335942078e-06, "loss": 0.7143, "step": 11671 }, { "epoch": 0.6298634720198586, "grad_norm": 1.256101813301474, "learning_rate": 3.7146060319804224e-06, "loss": 0.5866, "step": 11672 }, { "epoch": 0.629917435648373, "grad_norm": 0.8722356688289632, "learning_rate": 3.7139057793327938e-06, "loss": 0.376, "step": 11673 }, { "epoch": 0.6299713992768874, "grad_norm": 0.8813420606954312, "learning_rate": 3.7132055780193166e-06, "loss": 0.3838, "step": 11674 }, { "epoch": 0.6300253629054018, "grad_norm": 0.9021867270093612, "learning_rate": 3.712505428060118e-06, "loss": 0.3751, "step": 11675 }, { "epoch": 0.6300793265339162, "grad_norm": 0.9959021176662156, "learning_rate": 3.711805329475319e-06, "loss": 0.3355, "step": 11676 }, { "epoch": 0.6301332901624305, "grad_norm": 0.9858848285381606, "learning_rate": 3.7111052822850425e-06, "loss": 0.4605, "step": 11677 }, { "epoch": 0.6301872537909449, "grad_norm": 1.076277645842063, "learning_rate": 3.7104052865094065e-06, "loss": 0.504, "step": 11678 }, { "epoch": 0.6302412174194593, "grad_norm": 0.9976856307869747, "learning_rate": 3.7097053421685315e-06, "loss": 0.4596, "step": 11679 }, { "epoch": 0.6302951810479737, "grad_norm": 0.7545665241132605, "learning_rate": 3.7090054492825335e-06, "loss": 0.2854, "step": 11680 }, { "epoch": 0.6303491446764881, "grad_norm": 1.095332499610052, "learning_rate": 3.7083056078715273e-06, "loss": 0.4593, "step": 11681 }, { "epoch": 0.6304031083050025, "grad_norm": 0.8453865941654672, "learning_rate": 3.7076058179556295e-06, "loss": 0.3531, "step": 11682 }, { "epoch": 0.6304570719335169, "grad_norm": 1.076654476342824, "learning_rate": 3.7069060795549516e-06, "loss": 0.6186, "step": 11683 }, { "epoch": 0.6305110355620311, "grad_norm": 1.128150137968342, "learning_rate": 3.7062063926896073e-06, "loss": 0.498, "step": 11684 }, { "epoch": 0.6305649991905455, "grad_norm": 1.042648985356353, "learning_rate": 3.705506757379701e-06, "loss": 0.5151, "step": 11685 }, { "epoch": 0.6306189628190599, "grad_norm": 0.8118238529726451, "learning_rate": 3.704807173645344e-06, "loss": 0.3448, "step": 11686 }, { "epoch": 0.6306729264475743, "grad_norm": 0.8986399490274769, "learning_rate": 3.7041076415066425e-06, "loss": 0.4204, "step": 11687 }, { "epoch": 0.6307268900760887, "grad_norm": 0.9099447086226061, "learning_rate": 3.7034081609837025e-06, "loss": 0.4171, "step": 11688 }, { "epoch": 0.6307808537046031, "grad_norm": 0.9891140489437573, "learning_rate": 3.702708732096627e-06, "loss": 0.5149, "step": 11689 }, { "epoch": 0.6308348173331175, "grad_norm": 0.8196665577549329, "learning_rate": 3.702009354865518e-06, "loss": 0.2636, "step": 11690 }, { "epoch": 0.6308887809616318, "grad_norm": 1.083380036235683, "learning_rate": 3.70131002931048e-06, "loss": 0.5408, "step": 11691 }, { "epoch": 0.6309427445901462, "grad_norm": 0.8782187997392663, "learning_rate": 3.7006107554516057e-06, "loss": 0.3047, "step": 11692 }, { "epoch": 0.6309967082186606, "grad_norm": 0.8269741178468035, "learning_rate": 3.6999115333089986e-06, "loss": 0.3119, "step": 11693 }, { "epoch": 0.631050671847175, "grad_norm": 0.914725171557676, "learning_rate": 3.699212362902751e-06, "loss": 0.3776, "step": 11694 }, { "epoch": 0.6311046354756894, "grad_norm": 0.9846181252164405, "learning_rate": 3.6985132442529605e-06, "loss": 0.4353, "step": 11695 }, { "epoch": 0.6311585991042038, "grad_norm": 0.9741012372124475, "learning_rate": 3.6978141773797194e-06, "loss": 0.4273, "step": 11696 }, { "epoch": 0.6312125627327182, "grad_norm": 1.2728170911635095, "learning_rate": 3.69711516230312e-06, "loss": 0.5774, "step": 11697 }, { "epoch": 0.6312665263612325, "grad_norm": 1.0066299198571715, "learning_rate": 3.6964161990432545e-06, "loss": 0.5732, "step": 11698 }, { "epoch": 0.6313204899897469, "grad_norm": 0.9843562650606072, "learning_rate": 3.6957172876202074e-06, "loss": 0.5185, "step": 11699 }, { "epoch": 0.6313744536182613, "grad_norm": 1.144806269912816, "learning_rate": 3.695018428054068e-06, "loss": 0.5157, "step": 11700 }, { "epoch": 0.6314284172467757, "grad_norm": 0.934304918588507, "learning_rate": 3.6943196203649246e-06, "loss": 0.41, "step": 11701 }, { "epoch": 0.6314823808752901, "grad_norm": 1.030752103267561, "learning_rate": 3.693620864572859e-06, "loss": 0.4689, "step": 11702 }, { "epoch": 0.6315363445038045, "grad_norm": 0.976610073811376, "learning_rate": 3.6929221606979536e-06, "loss": 0.4614, "step": 11703 }, { "epoch": 0.6315903081323189, "grad_norm": 1.114618296797929, "learning_rate": 3.692223508760293e-06, "loss": 0.4964, "step": 11704 }, { "epoch": 0.6316442717608332, "grad_norm": 0.9587586310768876, "learning_rate": 3.6915249087799554e-06, "loss": 0.5071, "step": 11705 }, { "epoch": 0.6316982353893476, "grad_norm": 1.1630814868514119, "learning_rate": 3.690826360777019e-06, "loss": 0.4578, "step": 11706 }, { "epoch": 0.631752199017862, "grad_norm": 1.3410467696147954, "learning_rate": 3.69012786477156e-06, "loss": 0.5614, "step": 11707 }, { "epoch": 0.6318061626463763, "grad_norm": 0.9651012589296448, "learning_rate": 3.6894294207836556e-06, "loss": 0.547, "step": 11708 }, { "epoch": 0.6318601262748907, "grad_norm": 0.8981317530965073, "learning_rate": 3.68873102883338e-06, "loss": 0.4149, "step": 11709 }, { "epoch": 0.6319140899034051, "grad_norm": 1.0997023135446904, "learning_rate": 3.6880326889408034e-06, "loss": 0.471, "step": 11710 }, { "epoch": 0.6319680535319195, "grad_norm": 1.266019058637315, "learning_rate": 3.6873344011259983e-06, "loss": 0.424, "step": 11711 }, { "epoch": 0.6320220171604338, "grad_norm": 0.9887344924106649, "learning_rate": 3.6866361654090367e-06, "loss": 0.4476, "step": 11712 }, { "epoch": 0.6320759807889482, "grad_norm": 1.1167670976026245, "learning_rate": 3.6859379818099818e-06, "loss": 0.7563, "step": 11713 }, { "epoch": 0.6321299444174626, "grad_norm": 1.0084734912661986, "learning_rate": 3.6852398503489016e-06, "loss": 0.4309, "step": 11714 }, { "epoch": 0.632183908045977, "grad_norm": 1.0384013179529583, "learning_rate": 3.684541771045862e-06, "loss": 0.5946, "step": 11715 }, { "epoch": 0.6322378716744914, "grad_norm": 0.7992303223349975, "learning_rate": 3.6838437439209274e-06, "loss": 0.4608, "step": 11716 }, { "epoch": 0.6322918353030058, "grad_norm": 0.9190972725837366, "learning_rate": 3.6831457689941574e-06, "loss": 0.3273, "step": 11717 }, { "epoch": 0.6323457989315202, "grad_norm": 1.082777640117104, "learning_rate": 3.682447846285615e-06, "loss": 0.4226, "step": 11718 }, { "epoch": 0.6323997625600345, "grad_norm": 1.0624136579897412, "learning_rate": 3.6817499758153585e-06, "loss": 0.48, "step": 11719 }, { "epoch": 0.6324537261885489, "grad_norm": 0.9306937391065592, "learning_rate": 3.6810521576034443e-06, "loss": 0.4004, "step": 11720 }, { "epoch": 0.6325076898170633, "grad_norm": 1.1300526621415414, "learning_rate": 3.680354391669928e-06, "loss": 0.6041, "step": 11721 }, { "epoch": 0.6325616534455777, "grad_norm": 0.9389758204929867, "learning_rate": 3.6796566780348654e-06, "loss": 0.4431, "step": 11722 }, { "epoch": 0.6326156170740921, "grad_norm": 0.9998577474096917, "learning_rate": 3.6789590167183096e-06, "loss": 0.4533, "step": 11723 }, { "epoch": 0.6326695807026065, "grad_norm": 0.9364030542920666, "learning_rate": 3.6782614077403113e-06, "loss": 0.3821, "step": 11724 }, { "epoch": 0.6327235443311209, "grad_norm": 0.9977902826417548, "learning_rate": 3.677563851120921e-06, "loss": 0.449, "step": 11725 }, { "epoch": 0.6327775079596352, "grad_norm": 1.014305020670388, "learning_rate": 3.676866346880188e-06, "loss": 0.3804, "step": 11726 }, { "epoch": 0.6328314715881496, "grad_norm": 1.3218627861607557, "learning_rate": 3.676168895038158e-06, "loss": 0.6249, "step": 11727 }, { "epoch": 0.632885435216664, "grad_norm": 1.0301926009667381, "learning_rate": 3.675471495614875e-06, "loss": 0.4775, "step": 11728 }, { "epoch": 0.6329393988451784, "grad_norm": 1.0086342543379974, "learning_rate": 3.674774148630388e-06, "loss": 0.4421, "step": 11729 }, { "epoch": 0.6329933624736928, "grad_norm": 0.8618297181325053, "learning_rate": 3.6740768541047333e-06, "loss": 0.3693, "step": 11730 }, { "epoch": 0.6330473261022072, "grad_norm": 1.0623789744551002, "learning_rate": 3.6733796120579557e-06, "loss": 0.351, "step": 11731 }, { "epoch": 0.6331012897307214, "grad_norm": 0.9965590613522162, "learning_rate": 3.6726824225100942e-06, "loss": 0.4471, "step": 11732 }, { "epoch": 0.6331552533592358, "grad_norm": 0.9457573040209599, "learning_rate": 3.6719852854811868e-06, "loss": 0.4117, "step": 11733 }, { "epoch": 0.6332092169877502, "grad_norm": 1.010615084047796, "learning_rate": 3.6712882009912677e-06, "loss": 0.3935, "step": 11734 }, { "epoch": 0.6332631806162646, "grad_norm": 0.7802987425201245, "learning_rate": 3.6705911690603745e-06, "loss": 0.2624, "step": 11735 }, { "epoch": 0.633317144244779, "grad_norm": 1.1635770869940834, "learning_rate": 3.669894189708538e-06, "loss": 0.5546, "step": 11736 }, { "epoch": 0.6333711078732934, "grad_norm": 0.9650047359301667, "learning_rate": 3.6691972629557936e-06, "loss": 0.4187, "step": 11737 }, { "epoch": 0.6334250715018078, "grad_norm": 1.0599117757331322, "learning_rate": 3.6685003888221694e-06, "loss": 0.4452, "step": 11738 }, { "epoch": 0.6334790351303221, "grad_norm": 1.0180645462683335, "learning_rate": 3.6678035673276956e-06, "loss": 0.4997, "step": 11739 }, { "epoch": 0.6335329987588365, "grad_norm": 1.1214621536844407, "learning_rate": 3.667106798492397e-06, "loss": 0.4597, "step": 11740 }, { "epoch": 0.6335869623873509, "grad_norm": 0.9338134232366744, "learning_rate": 3.6664100823363014e-06, "loss": 0.4799, "step": 11741 }, { "epoch": 0.6336409260158653, "grad_norm": 1.0793192232475415, "learning_rate": 3.665713418879433e-06, "loss": 0.5026, "step": 11742 }, { "epoch": 0.6336948896443797, "grad_norm": 1.2837248177445746, "learning_rate": 3.6650168081418137e-06, "loss": 0.675, "step": 11743 }, { "epoch": 0.6337488532728941, "grad_norm": 0.8821112382703615, "learning_rate": 3.664320250143465e-06, "loss": 0.4615, "step": 11744 }, { "epoch": 0.6338028169014085, "grad_norm": 1.0390053588204784, "learning_rate": 3.6636237449044077e-06, "loss": 0.5128, "step": 11745 }, { "epoch": 0.6338567805299228, "grad_norm": 1.1083850881743846, "learning_rate": 3.6629272924446622e-06, "loss": 0.5426, "step": 11746 }, { "epoch": 0.6339107441584372, "grad_norm": 0.9221822393442772, "learning_rate": 3.66223089278424e-06, "loss": 0.3907, "step": 11747 }, { "epoch": 0.6339647077869516, "grad_norm": 1.1716208385700353, "learning_rate": 3.6615345459431583e-06, "loss": 0.5321, "step": 11748 }, { "epoch": 0.634018671415466, "grad_norm": 1.3624881507810442, "learning_rate": 3.660838251941432e-06, "loss": 0.4521, "step": 11749 }, { "epoch": 0.6340726350439804, "grad_norm": 0.9901953072390286, "learning_rate": 3.6601420107990727e-06, "loss": 0.4687, "step": 11750 }, { "epoch": 0.6341265986724948, "grad_norm": 1.0978701902977286, "learning_rate": 3.6594458225360906e-06, "loss": 0.5425, "step": 11751 }, { "epoch": 0.6341805623010092, "grad_norm": 0.9191404888677512, "learning_rate": 3.658749687172496e-06, "loss": 0.4357, "step": 11752 }, { "epoch": 0.6342345259295235, "grad_norm": 1.0095608054477112, "learning_rate": 3.6580536047282955e-06, "loss": 0.5322, "step": 11753 }, { "epoch": 0.6342884895580378, "grad_norm": 0.9293680994779544, "learning_rate": 3.657357575223496e-06, "loss": 0.4135, "step": 11754 }, { "epoch": 0.6343424531865522, "grad_norm": 1.0221860814496269, "learning_rate": 3.6566615986781006e-06, "loss": 0.46, "step": 11755 }, { "epoch": 0.6343964168150666, "grad_norm": 1.1175126011922853, "learning_rate": 3.655965675112114e-06, "loss": 0.6329, "step": 11756 }, { "epoch": 0.634450380443581, "grad_norm": 0.9387935258083498, "learning_rate": 3.655269804545537e-06, "loss": 0.6033, "step": 11757 }, { "epoch": 0.6345043440720954, "grad_norm": 1.0754432684328101, "learning_rate": 3.654573986998371e-06, "loss": 0.4818, "step": 11758 }, { "epoch": 0.6345583077006098, "grad_norm": 0.8793464758021582, "learning_rate": 3.653878222490611e-06, "loss": 0.398, "step": 11759 }, { "epoch": 0.6346122713291241, "grad_norm": 0.9718448636916581, "learning_rate": 3.653182511042259e-06, "loss": 0.419, "step": 11760 }, { "epoch": 0.6346662349576385, "grad_norm": 1.11300667295268, "learning_rate": 3.6524868526733073e-06, "loss": 0.5988, "step": 11761 }, { "epoch": 0.6347201985861529, "grad_norm": 1.0403284241725854, "learning_rate": 3.6517912474037476e-06, "loss": 0.4666, "step": 11762 }, { "epoch": 0.6347741622146673, "grad_norm": 0.8139932392406047, "learning_rate": 3.651095695253577e-06, "loss": 0.3183, "step": 11763 }, { "epoch": 0.6348281258431817, "grad_norm": 1.2571364390478301, "learning_rate": 3.650400196242784e-06, "loss": 0.5483, "step": 11764 }, { "epoch": 0.6348820894716961, "grad_norm": 1.1759958343925172, "learning_rate": 3.6497047503913572e-06, "loss": 0.5401, "step": 11765 }, { "epoch": 0.6349360531002105, "grad_norm": 1.2313514524367877, "learning_rate": 3.6490093577192865e-06, "loss": 0.648, "step": 11766 }, { "epoch": 0.6349900167287248, "grad_norm": 1.330833572791146, "learning_rate": 3.6483140182465583e-06, "loss": 0.523, "step": 11767 }, { "epoch": 0.6350439803572392, "grad_norm": 1.0048408878482038, "learning_rate": 3.6476187319931545e-06, "loss": 0.3735, "step": 11768 }, { "epoch": 0.6350979439857536, "grad_norm": 1.11546434498582, "learning_rate": 3.6469234989790605e-06, "loss": 0.4847, "step": 11769 }, { "epoch": 0.635151907614268, "grad_norm": 1.0820873463712257, "learning_rate": 3.6462283192242577e-06, "loss": 0.4686, "step": 11770 }, { "epoch": 0.6352058712427824, "grad_norm": 1.1136450400106024, "learning_rate": 3.6455331927487257e-06, "loss": 0.4593, "step": 11771 }, { "epoch": 0.6352598348712968, "grad_norm": 1.431720286416296, "learning_rate": 3.6448381195724436e-06, "loss": 0.4203, "step": 11772 }, { "epoch": 0.6353137984998112, "grad_norm": 1.002835473352176, "learning_rate": 3.644143099715388e-06, "loss": 0.4591, "step": 11773 }, { "epoch": 0.6353677621283255, "grad_norm": 0.8744532022576739, "learning_rate": 3.643448133197538e-06, "loss": 0.4838, "step": 11774 }, { "epoch": 0.6354217257568399, "grad_norm": 0.978742622056173, "learning_rate": 3.6427532200388617e-06, "loss": 0.4442, "step": 11775 }, { "epoch": 0.6354756893853543, "grad_norm": 1.1064385219350261, "learning_rate": 3.642058360259334e-06, "loss": 0.4714, "step": 11776 }, { "epoch": 0.6355296530138687, "grad_norm": 0.9806712768613906, "learning_rate": 3.6413635538789274e-06, "loss": 0.3649, "step": 11777 }, { "epoch": 0.635583616642383, "grad_norm": 1.0775012060147213, "learning_rate": 3.6406688009176095e-06, "loss": 0.651, "step": 11778 }, { "epoch": 0.6356375802708975, "grad_norm": 1.0076089895086626, "learning_rate": 3.6399741013953495e-06, "loss": 0.4275, "step": 11779 }, { "epoch": 0.6356915438994118, "grad_norm": 0.8334387672030034, "learning_rate": 3.6392794553321133e-06, "loss": 0.2848, "step": 11780 }, { "epoch": 0.6357455075279261, "grad_norm": 0.9330924501678919, "learning_rate": 3.638584862747866e-06, "loss": 0.447, "step": 11781 }, { "epoch": 0.6357994711564405, "grad_norm": 1.0187147514537467, "learning_rate": 3.637890323662571e-06, "loss": 0.4628, "step": 11782 }, { "epoch": 0.6358534347849549, "grad_norm": 0.8587067300209367, "learning_rate": 3.6371958380961882e-06, "loss": 0.3701, "step": 11783 }, { "epoch": 0.6359073984134693, "grad_norm": 0.8742220274216445, "learning_rate": 3.6365014060686788e-06, "loss": 0.3538, "step": 11784 }, { "epoch": 0.6359613620419837, "grad_norm": 1.2212896191119018, "learning_rate": 3.6358070276000036e-06, "loss": 0.5523, "step": 11785 }, { "epoch": 0.6360153256704981, "grad_norm": 1.0571852103517925, "learning_rate": 3.6351127027101175e-06, "loss": 0.4595, "step": 11786 }, { "epoch": 0.6360692892990125, "grad_norm": 1.046079150814298, "learning_rate": 3.6344184314189755e-06, "loss": 0.4774, "step": 11787 }, { "epoch": 0.6361232529275268, "grad_norm": 1.1446625630041969, "learning_rate": 3.6337242137465356e-06, "loss": 0.6144, "step": 11788 }, { "epoch": 0.6361772165560412, "grad_norm": 0.9637619789927471, "learning_rate": 3.633030049712746e-06, "loss": 0.4927, "step": 11789 }, { "epoch": 0.6362311801845556, "grad_norm": 1.1293497807962525, "learning_rate": 3.632335939337559e-06, "loss": 0.5546, "step": 11790 }, { "epoch": 0.63628514381307, "grad_norm": 1.1177844797491676, "learning_rate": 3.6316418826409238e-06, "loss": 0.5566, "step": 11791 }, { "epoch": 0.6363391074415844, "grad_norm": 0.9074640985434141, "learning_rate": 3.6309478796427887e-06, "loss": 0.4249, "step": 11792 }, { "epoch": 0.6363930710700988, "grad_norm": 0.8334220979043656, "learning_rate": 3.6302539303631003e-06, "loss": 0.3264, "step": 11793 }, { "epoch": 0.6364470346986132, "grad_norm": 1.008592552734655, "learning_rate": 3.629560034821804e-06, "loss": 0.411, "step": 11794 }, { "epoch": 0.6365009983271275, "grad_norm": 1.0296156176220612, "learning_rate": 3.6288661930388434e-06, "loss": 0.4417, "step": 11795 }, { "epoch": 0.6365549619556419, "grad_norm": 0.9906240420321379, "learning_rate": 3.628172405034157e-06, "loss": 0.456, "step": 11796 }, { "epoch": 0.6366089255841563, "grad_norm": 1.0160956466584452, "learning_rate": 3.6274786708276875e-06, "loss": 0.4539, "step": 11797 }, { "epoch": 0.6366628892126707, "grad_norm": 0.7707062941858106, "learning_rate": 3.6267849904393726e-06, "loss": 0.3556, "step": 11798 }, { "epoch": 0.6367168528411851, "grad_norm": 1.1040790885441074, "learning_rate": 3.6260913638891504e-06, "loss": 0.4418, "step": 11799 }, { "epoch": 0.6367708164696995, "grad_norm": 0.8577942777848409, "learning_rate": 3.6253977911969553e-06, "loss": 0.3636, "step": 11800 }, { "epoch": 0.6368247800982137, "grad_norm": 1.4709207495565477, "learning_rate": 3.624704272382723e-06, "loss": 0.6872, "step": 11801 }, { "epoch": 0.6368787437267281, "grad_norm": 0.9317767469216888, "learning_rate": 3.624010807466385e-06, "loss": 0.3399, "step": 11802 }, { "epoch": 0.6369327073552425, "grad_norm": 1.1684498908522445, "learning_rate": 3.6233173964678693e-06, "loss": 0.5054, "step": 11803 }, { "epoch": 0.6369866709837569, "grad_norm": 1.1519871793443182, "learning_rate": 3.622624039407109e-06, "loss": 0.6649, "step": 11804 }, { "epoch": 0.6370406346122713, "grad_norm": 1.1529575706695985, "learning_rate": 3.621930736304031e-06, "loss": 0.5264, "step": 11805 }, { "epoch": 0.6370945982407857, "grad_norm": 0.9194174560045032, "learning_rate": 3.6212374871785616e-06, "loss": 0.4148, "step": 11806 }, { "epoch": 0.6371485618693001, "grad_norm": 0.9341801292140862, "learning_rate": 3.620544292050625e-06, "loss": 0.3989, "step": 11807 }, { "epoch": 0.6372025254978144, "grad_norm": 1.0024498158620978, "learning_rate": 3.6198511509401467e-06, "loss": 0.4114, "step": 11808 }, { "epoch": 0.6372564891263288, "grad_norm": 0.957103362197787, "learning_rate": 3.6191580638670433e-06, "loss": 0.3812, "step": 11809 }, { "epoch": 0.6373104527548432, "grad_norm": 1.0058274417991109, "learning_rate": 3.618465030851238e-06, "loss": 0.3655, "step": 11810 }, { "epoch": 0.6373644163833576, "grad_norm": 0.9730580400485673, "learning_rate": 3.617772051912649e-06, "loss": 0.4483, "step": 11811 }, { "epoch": 0.637418380011872, "grad_norm": 1.0455260279646992, "learning_rate": 3.6170791270711925e-06, "loss": 0.5493, "step": 11812 }, { "epoch": 0.6374723436403864, "grad_norm": 0.9864622350730408, "learning_rate": 3.6163862563467854e-06, "loss": 0.4234, "step": 11813 }, { "epoch": 0.6375263072689008, "grad_norm": 0.8609351932831913, "learning_rate": 3.61569343975934e-06, "loss": 0.2874, "step": 11814 }, { "epoch": 0.6375802708974151, "grad_norm": 1.203990877520913, "learning_rate": 3.6150006773287694e-06, "loss": 0.5322, "step": 11815 }, { "epoch": 0.6376342345259295, "grad_norm": 0.9629819169263105, "learning_rate": 3.614307969074984e-06, "loss": 0.5394, "step": 11816 }, { "epoch": 0.6376881981544439, "grad_norm": 0.9846492842809291, "learning_rate": 3.613615315017892e-06, "loss": 0.3247, "step": 11817 }, { "epoch": 0.6377421617829583, "grad_norm": 0.9230098423040671, "learning_rate": 3.6129227151774027e-06, "loss": 0.5149, "step": 11818 }, { "epoch": 0.6377961254114727, "grad_norm": 0.9835220657053889, "learning_rate": 3.612230169573423e-06, "loss": 0.4401, "step": 11819 }, { "epoch": 0.6378500890399871, "grad_norm": 1.083103854230946, "learning_rate": 3.611537678225854e-06, "loss": 0.4839, "step": 11820 }, { "epoch": 0.6379040526685015, "grad_norm": 1.0107211048620754, "learning_rate": 3.610845241154599e-06, "loss": 0.4062, "step": 11821 }, { "epoch": 0.6379580162970158, "grad_norm": 0.8200151887418612, "learning_rate": 3.610152858379563e-06, "loss": 0.3761, "step": 11822 }, { "epoch": 0.6380119799255302, "grad_norm": 1.049109013075734, "learning_rate": 3.609460529920642e-06, "loss": 0.483, "step": 11823 }, { "epoch": 0.6380659435540446, "grad_norm": 1.2449391376260857, "learning_rate": 3.6087682557977354e-06, "loss": 0.5463, "step": 11824 }, { "epoch": 0.638119907182559, "grad_norm": 1.0392869453789984, "learning_rate": 3.60807603603074e-06, "loss": 0.5292, "step": 11825 }, { "epoch": 0.6381738708110734, "grad_norm": 0.9623057773497181, "learning_rate": 3.6073838706395515e-06, "loss": 0.4404, "step": 11826 }, { "epoch": 0.6382278344395877, "grad_norm": 1.0243394298402049, "learning_rate": 3.606691759644063e-06, "loss": 0.4221, "step": 11827 }, { "epoch": 0.6382817980681021, "grad_norm": 0.9227254197408107, "learning_rate": 3.6059997030641667e-06, "loss": 0.4289, "step": 11828 }, { "epoch": 0.6383357616966164, "grad_norm": 0.7130346358499351, "learning_rate": 3.6053077009197533e-06, "loss": 0.3031, "step": 11829 }, { "epoch": 0.6383897253251308, "grad_norm": 1.37439822397219, "learning_rate": 3.6046157532307102e-06, "loss": 0.6102, "step": 11830 }, { "epoch": 0.6384436889536452, "grad_norm": 0.9087633221020424, "learning_rate": 3.6039238600169246e-06, "loss": 0.3659, "step": 11831 }, { "epoch": 0.6384976525821596, "grad_norm": 1.2668161293989115, "learning_rate": 3.603232021298284e-06, "loss": 0.6256, "step": 11832 }, { "epoch": 0.638551616210674, "grad_norm": 0.959193550555773, "learning_rate": 3.6025402370946716e-06, "loss": 0.4063, "step": 11833 }, { "epoch": 0.6386055798391884, "grad_norm": 1.0784065121318387, "learning_rate": 3.60184850742597e-06, "loss": 0.6447, "step": 11834 }, { "epoch": 0.6386595434677028, "grad_norm": 1.1486671849872434, "learning_rate": 3.6011568323120593e-06, "loss": 0.5143, "step": 11835 }, { "epoch": 0.6387135070962171, "grad_norm": 1.0886315358567045, "learning_rate": 3.6004652117728233e-06, "loss": 0.4846, "step": 11836 }, { "epoch": 0.6387674707247315, "grad_norm": 1.059717028293982, "learning_rate": 3.599773645828134e-06, "loss": 0.4171, "step": 11837 }, { "epoch": 0.6388214343532459, "grad_norm": 0.9765178831125351, "learning_rate": 3.59908213449787e-06, "loss": 0.4814, "step": 11838 }, { "epoch": 0.6388753979817603, "grad_norm": 1.0601128174880687, "learning_rate": 3.598390677801906e-06, "loss": 0.4595, "step": 11839 }, { "epoch": 0.6389293616102747, "grad_norm": 1.156416807405208, "learning_rate": 3.597699275760115e-06, "loss": 0.4891, "step": 11840 }, { "epoch": 0.6389833252387891, "grad_norm": 1.0141580000687147, "learning_rate": 3.59700792839237e-06, "loss": 0.4731, "step": 11841 }, { "epoch": 0.6390372888673035, "grad_norm": 0.9870610302995407, "learning_rate": 3.5963166357185386e-06, "loss": 0.3475, "step": 11842 }, { "epoch": 0.6390912524958178, "grad_norm": 0.8680721287595856, "learning_rate": 3.595625397758493e-06, "loss": 0.3664, "step": 11843 }, { "epoch": 0.6391452161243322, "grad_norm": 1.2814171187417949, "learning_rate": 3.5949342145320963e-06, "loss": 0.5359, "step": 11844 }, { "epoch": 0.6391991797528466, "grad_norm": 0.8516095364958165, "learning_rate": 3.5942430860592157e-06, "loss": 0.3809, "step": 11845 }, { "epoch": 0.639253143381361, "grad_norm": 1.155759997730833, "learning_rate": 3.5935520123597135e-06, "loss": 0.5307, "step": 11846 }, { "epoch": 0.6393071070098754, "grad_norm": 1.0822458140400364, "learning_rate": 3.592860993453453e-06, "loss": 0.444, "step": 11847 }, { "epoch": 0.6393610706383898, "grad_norm": 0.9613989606704618, "learning_rate": 3.5921700293602945e-06, "loss": 0.4564, "step": 11848 }, { "epoch": 0.6394150342669042, "grad_norm": 1.0888574610948478, "learning_rate": 3.5914791201000976e-06, "loss": 0.4911, "step": 11849 }, { "epoch": 0.6394689978954184, "grad_norm": 1.1989843537209741, "learning_rate": 3.5907882656927197e-06, "loss": 0.6406, "step": 11850 }, { "epoch": 0.6395229615239328, "grad_norm": 0.990515817940133, "learning_rate": 3.5900974661580158e-06, "loss": 0.4356, "step": 11851 }, { "epoch": 0.6395769251524472, "grad_norm": 0.7850713174218881, "learning_rate": 3.58940672151584e-06, "loss": 0.3019, "step": 11852 }, { "epoch": 0.6396308887809616, "grad_norm": 0.7590479416073787, "learning_rate": 3.588716031786046e-06, "loss": 0.4237, "step": 11853 }, { "epoch": 0.639684852409476, "grad_norm": 1.1047549655136097, "learning_rate": 3.5880253969884836e-06, "loss": 0.4799, "step": 11854 }, { "epoch": 0.6397388160379904, "grad_norm": 1.1125404702873787, "learning_rate": 3.5873348171430032e-06, "loss": 0.5958, "step": 11855 }, { "epoch": 0.6397927796665048, "grad_norm": 0.9105936432571181, "learning_rate": 3.5866442922694533e-06, "loss": 0.3617, "step": 11856 }, { "epoch": 0.6398467432950191, "grad_norm": 1.2031948758219544, "learning_rate": 3.585953822387681e-06, "loss": 0.6519, "step": 11857 }, { "epoch": 0.6399007069235335, "grad_norm": 0.8786240910467211, "learning_rate": 3.5852634075175296e-06, "loss": 0.4227, "step": 11858 }, { "epoch": 0.6399546705520479, "grad_norm": 1.076703702229077, "learning_rate": 3.584573047678841e-06, "loss": 0.446, "step": 11859 }, { "epoch": 0.6400086341805623, "grad_norm": 0.9352386752843622, "learning_rate": 3.583882742891458e-06, "loss": 0.4432, "step": 11860 }, { "epoch": 0.6400625978090767, "grad_norm": 1.1917024856374494, "learning_rate": 3.5831924931752228e-06, "loss": 0.5753, "step": 11861 }, { "epoch": 0.6401165614375911, "grad_norm": 0.8973400470615696, "learning_rate": 3.5825022985499714e-06, "loss": 0.4015, "step": 11862 }, { "epoch": 0.6401705250661055, "grad_norm": 1.1400537019160801, "learning_rate": 3.5818121590355406e-06, "loss": 0.5438, "step": 11863 }, { "epoch": 0.6402244886946198, "grad_norm": 1.0355132503485394, "learning_rate": 3.5811220746517695e-06, "loss": 0.5055, "step": 11864 }, { "epoch": 0.6402784523231342, "grad_norm": 1.036482477837173, "learning_rate": 3.580432045418486e-06, "loss": 0.5454, "step": 11865 }, { "epoch": 0.6403324159516486, "grad_norm": 1.0180056641327966, "learning_rate": 3.5797420713555265e-06, "loss": 0.4254, "step": 11866 }, { "epoch": 0.640386379580163, "grad_norm": 0.917505645711224, "learning_rate": 3.5790521524827192e-06, "loss": 0.4709, "step": 11867 }, { "epoch": 0.6404403432086774, "grad_norm": 1.1545946425835654, "learning_rate": 3.5783622888198943e-06, "loss": 0.5769, "step": 11868 }, { "epoch": 0.6404943068371918, "grad_norm": 1.0431991640835372, "learning_rate": 3.577672480386878e-06, "loss": 0.5169, "step": 11869 }, { "epoch": 0.6405482704657061, "grad_norm": 0.9701285881827786, "learning_rate": 3.5769827272034975e-06, "loss": 0.5117, "step": 11870 }, { "epoch": 0.6406022340942205, "grad_norm": 1.0616998107306277, "learning_rate": 3.576293029289577e-06, "loss": 0.4275, "step": 11871 }, { "epoch": 0.6406561977227349, "grad_norm": 0.8677877830975936, "learning_rate": 3.5756033866649386e-06, "loss": 0.4124, "step": 11872 }, { "epoch": 0.6407101613512493, "grad_norm": 0.9352147137459771, "learning_rate": 3.5749137993494033e-06, "loss": 0.3787, "step": 11873 }, { "epoch": 0.6407641249797636, "grad_norm": 1.0011680153862383, "learning_rate": 3.5742242673627894e-06, "loss": 0.5526, "step": 11874 }, { "epoch": 0.640818088608278, "grad_norm": 1.053986654617322, "learning_rate": 3.5735347907249174e-06, "loss": 0.5636, "step": 11875 }, { "epoch": 0.6408720522367924, "grad_norm": 1.0706253210945322, "learning_rate": 3.572845369455601e-06, "loss": 0.5475, "step": 11876 }, { "epoch": 0.6409260158653067, "grad_norm": 0.9202411363457638, "learning_rate": 3.572156003574658e-06, "loss": 0.3487, "step": 11877 }, { "epoch": 0.6409799794938211, "grad_norm": 0.899863689164434, "learning_rate": 3.5714666931018983e-06, "loss": 0.3778, "step": 11878 }, { "epoch": 0.6410339431223355, "grad_norm": 1.0236463650333387, "learning_rate": 3.5707774380571335e-06, "loss": 0.4522, "step": 11879 }, { "epoch": 0.6410879067508499, "grad_norm": 1.1261955438551223, "learning_rate": 3.5700882384601755e-06, "loss": 0.5915, "step": 11880 }, { "epoch": 0.6411418703793643, "grad_norm": 1.0258030500347606, "learning_rate": 3.56939909433083e-06, "loss": 0.5951, "step": 11881 }, { "epoch": 0.6411958340078787, "grad_norm": 1.2158896633769478, "learning_rate": 3.5687100056889067e-06, "loss": 0.5773, "step": 11882 }, { "epoch": 0.6412497976363931, "grad_norm": 0.8365068509555995, "learning_rate": 3.5680209725542092e-06, "loss": 0.3966, "step": 11883 }, { "epoch": 0.6413037612649074, "grad_norm": 1.2697214576393172, "learning_rate": 3.5673319949465423e-06, "loss": 0.6151, "step": 11884 }, { "epoch": 0.6413577248934218, "grad_norm": 1.0620040751671826, "learning_rate": 3.566643072885705e-06, "loss": 0.4062, "step": 11885 }, { "epoch": 0.6414116885219362, "grad_norm": 1.1544363238427984, "learning_rate": 3.5659542063914986e-06, "loss": 0.4378, "step": 11886 }, { "epoch": 0.6414656521504506, "grad_norm": 1.1052501798928747, "learning_rate": 3.5652653954837248e-06, "loss": 0.3971, "step": 11887 }, { "epoch": 0.641519615778965, "grad_norm": 1.1378833052133042, "learning_rate": 3.5645766401821757e-06, "loss": 0.5962, "step": 11888 }, { "epoch": 0.6415735794074794, "grad_norm": 1.0205537698597862, "learning_rate": 3.563887940506651e-06, "loss": 0.4951, "step": 11889 }, { "epoch": 0.6416275430359938, "grad_norm": 0.8769584623939011, "learning_rate": 3.5631992964769435e-06, "loss": 0.4205, "step": 11890 }, { "epoch": 0.6416815066645081, "grad_norm": 0.9806507707233583, "learning_rate": 3.5625107081128465e-06, "loss": 0.4013, "step": 11891 }, { "epoch": 0.6417354702930225, "grad_norm": 1.158147346907811, "learning_rate": 3.5618221754341476e-06, "loss": 0.5503, "step": 11892 }, { "epoch": 0.6417894339215369, "grad_norm": 0.8313650267337248, "learning_rate": 3.561133698460637e-06, "loss": 0.3713, "step": 11893 }, { "epoch": 0.6418433975500513, "grad_norm": 1.0546089949668522, "learning_rate": 3.5604452772121047e-06, "loss": 0.4169, "step": 11894 }, { "epoch": 0.6418973611785657, "grad_norm": 1.071391496847875, "learning_rate": 3.559756911708333e-06, "loss": 0.4536, "step": 11895 }, { "epoch": 0.6419513248070801, "grad_norm": 1.006932852231791, "learning_rate": 3.55906860196911e-06, "loss": 0.4264, "step": 11896 }, { "epoch": 0.6420052884355945, "grad_norm": 1.2268579885616375, "learning_rate": 3.5583803480142153e-06, "loss": 0.5848, "step": 11897 }, { "epoch": 0.6420592520641087, "grad_norm": 1.217043942412284, "learning_rate": 3.5576921498634332e-06, "loss": 0.5177, "step": 11898 }, { "epoch": 0.6421132156926231, "grad_norm": 1.0088739460244016, "learning_rate": 3.5570040075365397e-06, "loss": 0.4419, "step": 11899 }, { "epoch": 0.6421671793211375, "grad_norm": 1.003730399324751, "learning_rate": 3.556315921053315e-06, "loss": 0.4928, "step": 11900 }, { "epoch": 0.6422211429496519, "grad_norm": 1.0670291735385713, "learning_rate": 3.5556278904335327e-06, "loss": 0.4995, "step": 11901 }, { "epoch": 0.6422751065781663, "grad_norm": 1.1270239846743635, "learning_rate": 3.554939915696971e-06, "loss": 0.725, "step": 11902 }, { "epoch": 0.6423290702066807, "grad_norm": 1.1539306702718852, "learning_rate": 3.5542519968634014e-06, "loss": 0.5147, "step": 11903 }, { "epoch": 0.6423830338351951, "grad_norm": 1.0589081125115505, "learning_rate": 3.553564133952595e-06, "loss": 0.4141, "step": 11904 }, { "epoch": 0.6424369974637094, "grad_norm": 0.8704627889021651, "learning_rate": 3.552876326984325e-06, "loss": 0.4628, "step": 11905 }, { "epoch": 0.6424909610922238, "grad_norm": 1.2241137542495393, "learning_rate": 3.5521885759783538e-06, "loss": 0.5089, "step": 11906 }, { "epoch": 0.6425449247207382, "grad_norm": 1.0431986454290278, "learning_rate": 3.5515008809544516e-06, "loss": 0.4745, "step": 11907 }, { "epoch": 0.6425988883492526, "grad_norm": 1.0667832693960269, "learning_rate": 3.5508132419323825e-06, "loss": 0.5309, "step": 11908 }, { "epoch": 0.642652851977767, "grad_norm": 1.0231897734631454, "learning_rate": 3.55012565893191e-06, "loss": 0.4872, "step": 11909 }, { "epoch": 0.6427068156062814, "grad_norm": 1.0107114621207502, "learning_rate": 3.549438131972797e-06, "loss": 0.5285, "step": 11910 }, { "epoch": 0.6427607792347958, "grad_norm": 0.851671503612425, "learning_rate": 3.5487506610748035e-06, "loss": 0.4303, "step": 11911 }, { "epoch": 0.6428147428633101, "grad_norm": 1.0272095779233312, "learning_rate": 3.548063246257688e-06, "loss": 0.4818, "step": 11912 }, { "epoch": 0.6428687064918245, "grad_norm": 1.084939575481687, "learning_rate": 3.5473758875412058e-06, "loss": 0.5225, "step": 11913 }, { "epoch": 0.6429226701203389, "grad_norm": 0.8491112396046413, "learning_rate": 3.546688584945114e-06, "loss": 0.3734, "step": 11914 }, { "epoch": 0.6429766337488533, "grad_norm": 0.8532936301543291, "learning_rate": 3.5460013384891658e-06, "loss": 0.352, "step": 11915 }, { "epoch": 0.6430305973773677, "grad_norm": 0.849745824626063, "learning_rate": 3.545314148193113e-06, "loss": 0.4371, "step": 11916 }, { "epoch": 0.6430845610058821, "grad_norm": 1.0984460195119274, "learning_rate": 3.544627014076707e-06, "loss": 0.5781, "step": 11917 }, { "epoch": 0.6431385246343965, "grad_norm": 1.2499596474846406, "learning_rate": 3.543939936159697e-06, "loss": 0.6563, "step": 11918 }, { "epoch": 0.6431924882629108, "grad_norm": 1.1322220896063537, "learning_rate": 3.5432529144618287e-06, "loss": 0.3856, "step": 11919 }, { "epoch": 0.6432464518914252, "grad_norm": 1.217226763452663, "learning_rate": 3.5425659490028484e-06, "loss": 0.5335, "step": 11920 }, { "epoch": 0.6433004155199396, "grad_norm": 1.1192795845824188, "learning_rate": 3.5418790398025017e-06, "loss": 0.4824, "step": 11921 }, { "epoch": 0.643354379148454, "grad_norm": 0.9877301624910121, "learning_rate": 3.54119218688053e-06, "loss": 0.4844, "step": 11922 }, { "epoch": 0.6434083427769683, "grad_norm": 1.1772423062509456, "learning_rate": 3.540505390256673e-06, "loss": 0.7378, "step": 11923 }, { "epoch": 0.6434623064054827, "grad_norm": 1.140672273415446, "learning_rate": 3.5398186499506727e-06, "loss": 0.5456, "step": 11924 }, { "epoch": 0.6435162700339971, "grad_norm": 1.1576295079430625, "learning_rate": 3.5391319659822655e-06, "loss": 0.5606, "step": 11925 }, { "epoch": 0.6435702336625114, "grad_norm": 1.1158734007624782, "learning_rate": 3.538445338371185e-06, "loss": 0.441, "step": 11926 }, { "epoch": 0.6436241972910258, "grad_norm": 1.0895387961647696, "learning_rate": 3.5377587671371684e-06, "loss": 0.4575, "step": 11927 }, { "epoch": 0.6436781609195402, "grad_norm": 0.9028356284805057, "learning_rate": 3.5370722522999468e-06, "loss": 0.3728, "step": 11928 }, { "epoch": 0.6437321245480546, "grad_norm": 1.194750387660433, "learning_rate": 3.536385793879252e-06, "loss": 0.5629, "step": 11929 }, { "epoch": 0.643786088176569, "grad_norm": 1.0048290462940337, "learning_rate": 3.5356993918948144e-06, "loss": 0.4298, "step": 11930 }, { "epoch": 0.6438400518050834, "grad_norm": 0.9120050962829911, "learning_rate": 3.53501304636636e-06, "loss": 0.3681, "step": 11931 }, { "epoch": 0.6438940154335978, "grad_norm": 1.0980971405553193, "learning_rate": 3.5343267573136162e-06, "loss": 0.4277, "step": 11932 }, { "epoch": 0.6439479790621121, "grad_norm": 0.9674674229318213, "learning_rate": 3.533640524756311e-06, "loss": 0.358, "step": 11933 }, { "epoch": 0.6440019426906265, "grad_norm": 1.0497592456279705, "learning_rate": 3.5329543487141605e-06, "loss": 0.4791, "step": 11934 }, { "epoch": 0.6440559063191409, "grad_norm": 1.0397268461070002, "learning_rate": 3.53226822920689e-06, "loss": 0.4646, "step": 11935 }, { "epoch": 0.6441098699476553, "grad_norm": 1.013968402626697, "learning_rate": 3.5315821662542184e-06, "loss": 0.4242, "step": 11936 }, { "epoch": 0.6441638335761697, "grad_norm": 0.686591279484563, "learning_rate": 3.530896159875865e-06, "loss": 0.2521, "step": 11937 }, { "epoch": 0.6442177972046841, "grad_norm": 1.3280297322686236, "learning_rate": 3.5302102100915457e-06, "loss": 0.5804, "step": 11938 }, { "epoch": 0.6442717608331984, "grad_norm": 0.9911727796565426, "learning_rate": 3.529524316920977e-06, "loss": 0.4233, "step": 11939 }, { "epoch": 0.6443257244617128, "grad_norm": 1.0949596262836387, "learning_rate": 3.5288384803838684e-06, "loss": 0.4113, "step": 11940 }, { "epoch": 0.6443796880902272, "grad_norm": 0.954651637684788, "learning_rate": 3.5281527004999327e-06, "loss": 0.5168, "step": 11941 }, { "epoch": 0.6444336517187416, "grad_norm": 0.758775086446862, "learning_rate": 3.5274669772888823e-06, "loss": 0.3177, "step": 11942 }, { "epoch": 0.644487615347256, "grad_norm": 1.0077864652770185, "learning_rate": 3.526781310770423e-06, "loss": 0.5296, "step": 11943 }, { "epoch": 0.6445415789757704, "grad_norm": 0.8561574002161556, "learning_rate": 3.5260957009642637e-06, "loss": 0.3607, "step": 11944 }, { "epoch": 0.6445955426042848, "grad_norm": 1.0364908697485768, "learning_rate": 3.525410147890107e-06, "loss": 0.4743, "step": 11945 }, { "epoch": 0.644649506232799, "grad_norm": 1.0588290200723498, "learning_rate": 3.5247246515676604e-06, "loss": 0.3644, "step": 11946 }, { "epoch": 0.6447034698613134, "grad_norm": 0.9094091043401185, "learning_rate": 3.5240392120166213e-06, "loss": 0.3983, "step": 11947 }, { "epoch": 0.6447574334898278, "grad_norm": 1.0012866905639008, "learning_rate": 3.5233538292566926e-06, "loss": 0.446, "step": 11948 }, { "epoch": 0.6448113971183422, "grad_norm": 0.9925163396189625, "learning_rate": 3.522668503307571e-06, "loss": 0.5024, "step": 11949 }, { "epoch": 0.6448653607468566, "grad_norm": 1.0756836157076615, "learning_rate": 3.5219832341889547e-06, "loss": 0.5328, "step": 11950 }, { "epoch": 0.644919324375371, "grad_norm": 0.9860805759431336, "learning_rate": 3.5212980219205393e-06, "loss": 0.4539, "step": 11951 }, { "epoch": 0.6449732880038854, "grad_norm": 1.0339469568083965, "learning_rate": 3.5206128665220185e-06, "loss": 0.4384, "step": 11952 }, { "epoch": 0.6450272516323997, "grad_norm": 0.9929974677253116, "learning_rate": 3.519927768013085e-06, "loss": 0.4799, "step": 11953 }, { "epoch": 0.6450812152609141, "grad_norm": 1.1859969354093964, "learning_rate": 3.5192427264134265e-06, "loss": 0.6296, "step": 11954 }, { "epoch": 0.6451351788894285, "grad_norm": 1.072597003008902, "learning_rate": 3.518557741742734e-06, "loss": 0.5244, "step": 11955 }, { "epoch": 0.6451891425179429, "grad_norm": 0.7052607853891402, "learning_rate": 3.517872814020693e-06, "loss": 0.2658, "step": 11956 }, { "epoch": 0.6452431061464573, "grad_norm": 1.1657041904256165, "learning_rate": 3.5171879432669907e-06, "loss": 0.4379, "step": 11957 }, { "epoch": 0.6452970697749717, "grad_norm": 1.130605479739754, "learning_rate": 3.516503129501311e-06, "loss": 0.4864, "step": 11958 }, { "epoch": 0.6453510334034861, "grad_norm": 0.980993114354136, "learning_rate": 3.515818372743335e-06, "loss": 0.4699, "step": 11959 }, { "epoch": 0.6454049970320004, "grad_norm": 0.8947894708812496, "learning_rate": 3.5151336730127462e-06, "loss": 0.4314, "step": 11960 }, { "epoch": 0.6454589606605148, "grad_norm": 0.8610539387097311, "learning_rate": 3.514449030329218e-06, "loss": 0.4741, "step": 11961 }, { "epoch": 0.6455129242890292, "grad_norm": 1.1871442600655469, "learning_rate": 3.513764444712432e-06, "loss": 0.5105, "step": 11962 }, { "epoch": 0.6455668879175436, "grad_norm": 1.0590143693726168, "learning_rate": 3.513079916182063e-06, "loss": 0.4842, "step": 11963 }, { "epoch": 0.645620851546058, "grad_norm": 0.9741806404140553, "learning_rate": 3.5123954447577836e-06, "loss": 0.607, "step": 11964 }, { "epoch": 0.6456748151745724, "grad_norm": 1.374233784852015, "learning_rate": 3.5117110304592682e-06, "loss": 0.5261, "step": 11965 }, { "epoch": 0.6457287788030868, "grad_norm": 1.1013421291945056, "learning_rate": 3.511026673306187e-06, "loss": 0.4686, "step": 11966 }, { "epoch": 0.645782742431601, "grad_norm": 1.0729944528064397, "learning_rate": 3.5103423733182086e-06, "loss": 0.4908, "step": 11967 }, { "epoch": 0.6458367060601155, "grad_norm": 0.8531198874159256, "learning_rate": 3.5096581305150036e-06, "loss": 0.4415, "step": 11968 }, { "epoch": 0.6458906696886298, "grad_norm": 0.9055682881444185, "learning_rate": 3.508973944916232e-06, "loss": 0.4578, "step": 11969 }, { "epoch": 0.6459446333171442, "grad_norm": 1.0994233941725013, "learning_rate": 3.5082898165415614e-06, "loss": 0.5986, "step": 11970 }, { "epoch": 0.6459985969456586, "grad_norm": 1.0705638019984045, "learning_rate": 3.507605745410654e-06, "loss": 0.4163, "step": 11971 }, { "epoch": 0.646052560574173, "grad_norm": 1.2316473614039445, "learning_rate": 3.5069217315431705e-06, "loss": 0.7238, "step": 11972 }, { "epoch": 0.6461065242026874, "grad_norm": 0.9890461397716981, "learning_rate": 3.5062377749587706e-06, "loss": 0.4961, "step": 11973 }, { "epoch": 0.6461604878312017, "grad_norm": 1.2127660521699346, "learning_rate": 3.5055538756771114e-06, "loss": 0.3634, "step": 11974 }, { "epoch": 0.6462144514597161, "grad_norm": 1.2597035317663392, "learning_rate": 3.5048700337178487e-06, "loss": 0.6444, "step": 11975 }, { "epoch": 0.6462684150882305, "grad_norm": 1.026868310020619, "learning_rate": 3.5041862491006375e-06, "loss": 0.4596, "step": 11976 }, { "epoch": 0.6463223787167449, "grad_norm": 1.0539895145314626, "learning_rate": 3.5035025218451302e-06, "loss": 0.4728, "step": 11977 }, { "epoch": 0.6463763423452593, "grad_norm": 0.993709324151771, "learning_rate": 3.502818851970977e-06, "loss": 0.5062, "step": 11978 }, { "epoch": 0.6464303059737737, "grad_norm": 0.8855176811870283, "learning_rate": 3.502135239497829e-06, "loss": 0.3457, "step": 11979 }, { "epoch": 0.6464842696022881, "grad_norm": 0.7861643009498145, "learning_rate": 3.501451684445333e-06, "loss": 0.2857, "step": 11980 }, { "epoch": 0.6465382332308024, "grad_norm": 0.8551614489047985, "learning_rate": 3.5007681868331357e-06, "loss": 0.364, "step": 11981 }, { "epoch": 0.6465921968593168, "grad_norm": 0.983725984186243, "learning_rate": 3.5000847466808795e-06, "loss": 0.4826, "step": 11982 }, { "epoch": 0.6466461604878312, "grad_norm": 1.0616287305277774, "learning_rate": 3.499401364008209e-06, "loss": 0.5545, "step": 11983 }, { "epoch": 0.6467001241163456, "grad_norm": 1.0858162030902956, "learning_rate": 3.498718038834764e-06, "loss": 0.535, "step": 11984 }, { "epoch": 0.64675408774486, "grad_norm": 1.0123656383763082, "learning_rate": 3.4980347711801853e-06, "loss": 0.4825, "step": 11985 }, { "epoch": 0.6468080513733744, "grad_norm": 1.082969447071066, "learning_rate": 3.4973515610641095e-06, "loss": 0.5023, "step": 11986 }, { "epoch": 0.6468620150018888, "grad_norm": 0.9688310911420915, "learning_rate": 3.4966684085061743e-06, "loss": 0.4552, "step": 11987 }, { "epoch": 0.6469159786304031, "grad_norm": 0.9405301421491425, "learning_rate": 3.4959853135260147e-06, "loss": 0.4323, "step": 11988 }, { "epoch": 0.6469699422589175, "grad_norm": 1.008925601390938, "learning_rate": 3.49530227614326e-06, "loss": 0.4316, "step": 11989 }, { "epoch": 0.6470239058874319, "grad_norm": 0.9466784918981792, "learning_rate": 3.4946192963775426e-06, "loss": 0.5245, "step": 11990 }, { "epoch": 0.6470778695159463, "grad_norm": 1.0912654405501792, "learning_rate": 3.493936374248494e-06, "loss": 0.4359, "step": 11991 }, { "epoch": 0.6471318331444607, "grad_norm": 0.8532623630940516, "learning_rate": 3.4932535097757415e-06, "loss": 0.364, "step": 11992 }, { "epoch": 0.647185796772975, "grad_norm": 1.223304457568847, "learning_rate": 3.49257070297891e-06, "loss": 0.3755, "step": 11993 }, { "epoch": 0.6472397604014895, "grad_norm": 0.9421915313611954, "learning_rate": 3.491887953877625e-06, "loss": 0.4689, "step": 11994 }, { "epoch": 0.6472937240300037, "grad_norm": 1.1605546645582148, "learning_rate": 3.4912052624915103e-06, "loss": 0.5835, "step": 11995 }, { "epoch": 0.6473476876585181, "grad_norm": 1.1380754956260215, "learning_rate": 3.490522628840185e-06, "loss": 0.4987, "step": 11996 }, { "epoch": 0.6474016512870325, "grad_norm": 1.0582605546154766, "learning_rate": 3.4898400529432703e-06, "loss": 0.4886, "step": 11997 }, { "epoch": 0.6474556149155469, "grad_norm": 1.2750742778075972, "learning_rate": 3.489157534820383e-06, "loss": 0.5606, "step": 11998 }, { "epoch": 0.6475095785440613, "grad_norm": 0.7461107255434049, "learning_rate": 3.48847507449114e-06, "loss": 0.3248, "step": 11999 }, { "epoch": 0.6475635421725757, "grad_norm": 1.1380250153657816, "learning_rate": 3.4877926719751565e-06, "loss": 0.4932, "step": 12000 }, { "epoch": 0.6475635421725757, "eval_loss": 0.5388334393501282, "eval_runtime": 161.06, "eval_samples_per_second": 21.352, "eval_steps_per_second": 0.894, "step": 12000 }, { "epoch": 0.64761750580109, "grad_norm": 1.0115408255471097, "learning_rate": 3.4871103272920454e-06, "loss": 0.5404, "step": 12001 }, { "epoch": 0.6476714694296044, "grad_norm": 0.9856560116097753, "learning_rate": 3.486428040461416e-06, "loss": 0.6018, "step": 12002 }, { "epoch": 0.6477254330581188, "grad_norm": 1.2572996292485785, "learning_rate": 3.48574581150288e-06, "loss": 0.6961, "step": 12003 }, { "epoch": 0.6477793966866332, "grad_norm": 1.0654168950996765, "learning_rate": 3.485063640436044e-06, "loss": 0.4839, "step": 12004 }, { "epoch": 0.6478333603151476, "grad_norm": 0.8445614781514537, "learning_rate": 3.484381527280515e-06, "loss": 0.3003, "step": 12005 }, { "epoch": 0.647887323943662, "grad_norm": 1.107279703029957, "learning_rate": 3.483699472055897e-06, "loss": 0.4626, "step": 12006 }, { "epoch": 0.6479412875721764, "grad_norm": 0.7782653290260381, "learning_rate": 3.4830174747817945e-06, "loss": 0.3494, "step": 12007 }, { "epoch": 0.6479952512006907, "grad_norm": 0.8418006945509531, "learning_rate": 3.4823355354778095e-06, "loss": 0.3649, "step": 12008 }, { "epoch": 0.6480492148292051, "grad_norm": 1.0051765011949316, "learning_rate": 3.481653654163537e-06, "loss": 0.4649, "step": 12009 }, { "epoch": 0.6481031784577195, "grad_norm": 1.0418608877793682, "learning_rate": 3.480971830858579e-06, "loss": 0.5339, "step": 12010 }, { "epoch": 0.6481571420862339, "grad_norm": 0.9858402214220267, "learning_rate": 3.4802900655825306e-06, "loss": 0.395, "step": 12011 }, { "epoch": 0.6482111057147483, "grad_norm": 0.8972181112207309, "learning_rate": 3.4796083583549857e-06, "loss": 0.4511, "step": 12012 }, { "epoch": 0.6482650693432627, "grad_norm": 0.9486437575474898, "learning_rate": 3.4789267091955393e-06, "loss": 0.3876, "step": 12013 }, { "epoch": 0.6483190329717771, "grad_norm": 1.3014709305218788, "learning_rate": 3.47824511812378e-06, "loss": 0.6734, "step": 12014 }, { "epoch": 0.6483729966002914, "grad_norm": 1.19203353433188, "learning_rate": 3.4775635851593014e-06, "loss": 0.4587, "step": 12015 }, { "epoch": 0.6484269602288057, "grad_norm": 1.0150378010398968, "learning_rate": 3.4768821103216877e-06, "loss": 0.505, "step": 12016 }, { "epoch": 0.6484809238573201, "grad_norm": 1.0467699279193174, "learning_rate": 3.4762006936305247e-06, "loss": 0.3933, "step": 12017 }, { "epoch": 0.6485348874858345, "grad_norm": 1.1245701343259038, "learning_rate": 3.4755193351053997e-06, "loss": 0.559, "step": 12018 }, { "epoch": 0.6485888511143489, "grad_norm": 1.2285838020178694, "learning_rate": 3.474838034765895e-06, "loss": 0.5629, "step": 12019 }, { "epoch": 0.6486428147428633, "grad_norm": 1.0260145817164394, "learning_rate": 3.47415679263159e-06, "loss": 0.4311, "step": 12020 }, { "epoch": 0.6486967783713777, "grad_norm": 1.0992158387974078, "learning_rate": 3.473475608722067e-06, "loss": 0.4847, "step": 12021 }, { "epoch": 0.648750741999892, "grad_norm": 0.7984862352711917, "learning_rate": 3.472794483056903e-06, "loss": 0.3833, "step": 12022 }, { "epoch": 0.6488047056284064, "grad_norm": 0.9009574698005273, "learning_rate": 3.4721134156556728e-06, "loss": 0.374, "step": 12023 }, { "epoch": 0.6488586692569208, "grad_norm": 0.9332665180892579, "learning_rate": 3.4714324065379523e-06, "loss": 0.591, "step": 12024 }, { "epoch": 0.6489126328854352, "grad_norm": 1.0429458478307991, "learning_rate": 3.4707514557233145e-06, "loss": 0.453, "step": 12025 }, { "epoch": 0.6489665965139496, "grad_norm": 0.8104326481644326, "learning_rate": 3.47007056323133e-06, "loss": 0.4274, "step": 12026 }, { "epoch": 0.649020560142464, "grad_norm": 1.4430002574786829, "learning_rate": 3.4693897290815694e-06, "loss": 0.6101, "step": 12027 }, { "epoch": 0.6490745237709784, "grad_norm": 1.1476886511454112, "learning_rate": 3.468708953293599e-06, "loss": 0.5493, "step": 12028 }, { "epoch": 0.6491284873994927, "grad_norm": 1.1376286674194502, "learning_rate": 3.468028235886987e-06, "loss": 0.5055, "step": 12029 }, { "epoch": 0.6491824510280071, "grad_norm": 1.1045598809183939, "learning_rate": 3.467347576881296e-06, "loss": 0.4155, "step": 12030 }, { "epoch": 0.6492364146565215, "grad_norm": 0.9987958477232202, "learning_rate": 3.4666669762960887e-06, "loss": 0.4178, "step": 12031 }, { "epoch": 0.6492903782850359, "grad_norm": 0.9558059388619287, "learning_rate": 3.4659864341509274e-06, "loss": 0.4288, "step": 12032 }, { "epoch": 0.6493443419135503, "grad_norm": 1.1274401728056826, "learning_rate": 3.465305950465371e-06, "loss": 0.4567, "step": 12033 }, { "epoch": 0.6493983055420647, "grad_norm": 0.9761423869164767, "learning_rate": 3.4646255252589778e-06, "loss": 0.4831, "step": 12034 }, { "epoch": 0.6494522691705791, "grad_norm": 0.8673761080874788, "learning_rate": 3.463945158551303e-06, "loss": 0.4881, "step": 12035 }, { "epoch": 0.6495062327990934, "grad_norm": 0.907881271846122, "learning_rate": 3.4632648503619043e-06, "loss": 0.3465, "step": 12036 }, { "epoch": 0.6495601964276078, "grad_norm": 0.9106197536196463, "learning_rate": 3.462584600710329e-06, "loss": 0.3285, "step": 12037 }, { "epoch": 0.6496141600561222, "grad_norm": 0.9239975593297185, "learning_rate": 3.461904409616132e-06, "loss": 0.5045, "step": 12038 }, { "epoch": 0.6496681236846366, "grad_norm": 1.2736656382242142, "learning_rate": 3.4612242770988613e-06, "loss": 0.6152, "step": 12039 }, { "epoch": 0.649722087313151, "grad_norm": 0.868424591061882, "learning_rate": 3.4605442031780646e-06, "loss": 0.4453, "step": 12040 }, { "epoch": 0.6497760509416654, "grad_norm": 0.9160307972520578, "learning_rate": 3.4598641878732888e-06, "loss": 0.415, "step": 12041 }, { "epoch": 0.6498300145701797, "grad_norm": 0.9909482378209951, "learning_rate": 3.459184231204077e-06, "loss": 0.3878, "step": 12042 }, { "epoch": 0.649883978198694, "grad_norm": 0.8570057976277617, "learning_rate": 3.458504333189974e-06, "loss": 0.4288, "step": 12043 }, { "epoch": 0.6499379418272084, "grad_norm": 1.0381066731193054, "learning_rate": 3.457824493850518e-06, "loss": 0.4066, "step": 12044 }, { "epoch": 0.6499919054557228, "grad_norm": 1.061688414586502, "learning_rate": 3.4571447132052494e-06, "loss": 0.4623, "step": 12045 }, { "epoch": 0.6500458690842372, "grad_norm": 0.925140010291013, "learning_rate": 3.456464991273705e-06, "loss": 0.4449, "step": 12046 }, { "epoch": 0.6500998327127516, "grad_norm": 0.9381655655012658, "learning_rate": 3.4557853280754234e-06, "loss": 0.4348, "step": 12047 }, { "epoch": 0.650153796341266, "grad_norm": 0.9690739719938267, "learning_rate": 3.455105723629936e-06, "loss": 0.4933, "step": 12048 }, { "epoch": 0.6502077599697804, "grad_norm": 1.1263254684311492, "learning_rate": 3.4544261779567765e-06, "loss": 0.4487, "step": 12049 }, { "epoch": 0.6502617235982947, "grad_norm": 1.0967202822851743, "learning_rate": 3.4537466910754767e-06, "loss": 0.589, "step": 12050 }, { "epoch": 0.6503156872268091, "grad_norm": 1.1323356630119494, "learning_rate": 3.453067263005564e-06, "loss": 0.5127, "step": 12051 }, { "epoch": 0.6503696508553235, "grad_norm": 1.0420852224694899, "learning_rate": 3.4523878937665654e-06, "loss": 0.3772, "step": 12052 }, { "epoch": 0.6504236144838379, "grad_norm": 1.114835595117865, "learning_rate": 3.4517085833780086e-06, "loss": 0.4884, "step": 12053 }, { "epoch": 0.6504775781123523, "grad_norm": 0.9306999061265518, "learning_rate": 3.4510293318594156e-06, "loss": 0.3552, "step": 12054 }, { "epoch": 0.6505315417408667, "grad_norm": 1.0722482279023124, "learning_rate": 3.450350139230311e-06, "loss": 0.5598, "step": 12055 }, { "epoch": 0.6505855053693811, "grad_norm": 1.139652697175992, "learning_rate": 3.449671005510213e-06, "loss": 0.5758, "step": 12056 }, { "epoch": 0.6506394689978954, "grad_norm": 0.9341892524726666, "learning_rate": 3.4489919307186457e-06, "loss": 0.4022, "step": 12057 }, { "epoch": 0.6506934326264098, "grad_norm": 1.1893047058579265, "learning_rate": 3.44831291487512e-06, "loss": 0.5705, "step": 12058 }, { "epoch": 0.6507473962549242, "grad_norm": 0.8711572846257112, "learning_rate": 3.4476339579991534e-06, "loss": 0.4066, "step": 12059 }, { "epoch": 0.6508013598834386, "grad_norm": 0.7171390747338711, "learning_rate": 3.446955060110262e-06, "loss": 0.2923, "step": 12060 }, { "epoch": 0.650855323511953, "grad_norm": 0.9185297130487637, "learning_rate": 3.446276221227956e-06, "loss": 0.4733, "step": 12061 }, { "epoch": 0.6509092871404674, "grad_norm": 0.9299591708073434, "learning_rate": 3.4455974413717463e-06, "loss": 0.4266, "step": 12062 }, { "epoch": 0.6509632507689818, "grad_norm": 1.070053569526316, "learning_rate": 3.4449187205611424e-06, "loss": 0.4518, "step": 12063 }, { "epoch": 0.651017214397496, "grad_norm": 1.016086445760248, "learning_rate": 3.4442400588156527e-06, "loss": 0.5487, "step": 12064 }, { "epoch": 0.6510711780260104, "grad_norm": 1.5090633676511294, "learning_rate": 3.443561456154779e-06, "loss": 0.5515, "step": 12065 }, { "epoch": 0.6511251416545248, "grad_norm": 0.991835774058149, "learning_rate": 3.442882912598027e-06, "loss": 0.4629, "step": 12066 }, { "epoch": 0.6511791052830392, "grad_norm": 1.0720584303687162, "learning_rate": 3.442204428164898e-06, "loss": 0.5837, "step": 12067 }, { "epoch": 0.6512330689115536, "grad_norm": 0.9888823288419507, "learning_rate": 3.441526002874894e-06, "loss": 0.4492, "step": 12068 }, { "epoch": 0.651287032540068, "grad_norm": 0.9160279570221956, "learning_rate": 3.440847636747512e-06, "loss": 0.3868, "step": 12069 }, { "epoch": 0.6513409961685823, "grad_norm": 0.8870083060447506, "learning_rate": 3.44016932980225e-06, "loss": 0.4323, "step": 12070 }, { "epoch": 0.6513949597970967, "grad_norm": 0.7644712840650895, "learning_rate": 3.4394910820586023e-06, "loss": 0.3361, "step": 12071 }, { "epoch": 0.6514489234256111, "grad_norm": 1.0460522085101054, "learning_rate": 3.438812893536063e-06, "loss": 0.4521, "step": 12072 }, { "epoch": 0.6515028870541255, "grad_norm": 0.9723179160435146, "learning_rate": 3.438134764254124e-06, "loss": 0.4652, "step": 12073 }, { "epoch": 0.6515568506826399, "grad_norm": 1.034454536650743, "learning_rate": 3.4374566942322767e-06, "loss": 0.537, "step": 12074 }, { "epoch": 0.6516108143111543, "grad_norm": 1.1125324832046957, "learning_rate": 3.436778683490007e-06, "loss": 0.4935, "step": 12075 }, { "epoch": 0.6516647779396687, "grad_norm": 0.9541508221699821, "learning_rate": 3.436100732046802e-06, "loss": 0.3821, "step": 12076 }, { "epoch": 0.651718741568183, "grad_norm": 1.073856585479336, "learning_rate": 3.435422839922148e-06, "loss": 0.4752, "step": 12077 }, { "epoch": 0.6517727051966974, "grad_norm": 1.0164899017747888, "learning_rate": 3.434745007135527e-06, "loss": 0.6277, "step": 12078 }, { "epoch": 0.6518266688252118, "grad_norm": 0.8463394398607668, "learning_rate": 3.434067233706422e-06, "loss": 0.3472, "step": 12079 }, { "epoch": 0.6518806324537262, "grad_norm": 1.068269652892902, "learning_rate": 3.4333895196543116e-06, "loss": 0.5364, "step": 12080 }, { "epoch": 0.6519345960822406, "grad_norm": 1.0198114206388127, "learning_rate": 3.4327118649986747e-06, "loss": 0.4892, "step": 12081 }, { "epoch": 0.651988559710755, "grad_norm": 1.2469448596883117, "learning_rate": 3.432034269758987e-06, "loss": 0.4737, "step": 12082 }, { "epoch": 0.6520425233392694, "grad_norm": 1.0515914250814684, "learning_rate": 3.431356733954725e-06, "loss": 0.5124, "step": 12083 }, { "epoch": 0.6520964869677837, "grad_norm": 0.9210027819503481, "learning_rate": 3.4306792576053614e-06, "loss": 0.3991, "step": 12084 }, { "epoch": 0.6521504505962981, "grad_norm": 0.9785544251808219, "learning_rate": 3.430001840730366e-06, "loss": 0.5077, "step": 12085 }, { "epoch": 0.6522044142248125, "grad_norm": 1.0972261552613138, "learning_rate": 3.429324483349209e-06, "loss": 0.5601, "step": 12086 }, { "epoch": 0.6522583778533269, "grad_norm": 0.8870965489347724, "learning_rate": 3.4286471854813587e-06, "loss": 0.4261, "step": 12087 }, { "epoch": 0.6523123414818413, "grad_norm": 1.1421663772785913, "learning_rate": 3.427969947146281e-06, "loss": 0.5266, "step": 12088 }, { "epoch": 0.6523663051103556, "grad_norm": 0.8273664677084684, "learning_rate": 3.4272927683634405e-06, "loss": 0.3535, "step": 12089 }, { "epoch": 0.65242026873887, "grad_norm": 1.034966027311295, "learning_rate": 3.4266156491523006e-06, "loss": 0.4722, "step": 12090 }, { "epoch": 0.6524742323673843, "grad_norm": 1.054308112110283, "learning_rate": 3.425938589532323e-06, "loss": 0.5454, "step": 12091 }, { "epoch": 0.6525281959958987, "grad_norm": 1.1345221444458555, "learning_rate": 3.4252615895229644e-06, "loss": 0.5197, "step": 12092 }, { "epoch": 0.6525821596244131, "grad_norm": 0.9995206426003981, "learning_rate": 3.424584649143685e-06, "loss": 0.2966, "step": 12093 }, { "epoch": 0.6526361232529275, "grad_norm": 1.0702710767274346, "learning_rate": 3.423907768413939e-06, "loss": 0.527, "step": 12094 }, { "epoch": 0.6526900868814419, "grad_norm": 1.14933448866209, "learning_rate": 3.423230947353182e-06, "loss": 0.5051, "step": 12095 }, { "epoch": 0.6527440505099563, "grad_norm": 1.6334132017073408, "learning_rate": 3.422554185980865e-06, "loss": 0.5419, "step": 12096 }, { "epoch": 0.6527980141384707, "grad_norm": 0.8394070519835319, "learning_rate": 3.4218774843164416e-06, "loss": 0.4215, "step": 12097 }, { "epoch": 0.652851977766985, "grad_norm": 1.135597535306332, "learning_rate": 3.4212008423793596e-06, "loss": 0.4697, "step": 12098 }, { "epoch": 0.6529059413954994, "grad_norm": 1.1096776193644327, "learning_rate": 3.420524260189064e-06, "loss": 0.4905, "step": 12099 }, { "epoch": 0.6529599050240138, "grad_norm": 0.9544527290955739, "learning_rate": 3.419847737765003e-06, "loss": 0.4403, "step": 12100 }, { "epoch": 0.6530138686525282, "grad_norm": 1.110385338918088, "learning_rate": 3.4191712751266195e-06, "loss": 0.4987, "step": 12101 }, { "epoch": 0.6530678322810426, "grad_norm": 0.9694162372826827, "learning_rate": 3.418494872293356e-06, "loss": 0.408, "step": 12102 }, { "epoch": 0.653121795909557, "grad_norm": 0.9754280321498997, "learning_rate": 3.4178185292846534e-06, "loss": 0.3983, "step": 12103 }, { "epoch": 0.6531757595380714, "grad_norm": 1.306879773451148, "learning_rate": 3.417142246119949e-06, "loss": 0.5704, "step": 12104 }, { "epoch": 0.6532297231665857, "grad_norm": 0.9515799919138271, "learning_rate": 3.416466022818684e-06, "loss": 0.4152, "step": 12105 }, { "epoch": 0.6532836867951001, "grad_norm": 1.1135366818530072, "learning_rate": 3.4157898594002884e-06, "loss": 0.5425, "step": 12106 }, { "epoch": 0.6533376504236145, "grad_norm": 1.0089123108444913, "learning_rate": 3.4151137558841984e-06, "loss": 0.4075, "step": 12107 }, { "epoch": 0.6533916140521289, "grad_norm": 1.0120473532717573, "learning_rate": 3.4144377122898465e-06, "loss": 0.4212, "step": 12108 }, { "epoch": 0.6534455776806433, "grad_norm": 0.952685159746953, "learning_rate": 3.4137617286366608e-06, "loss": 0.3671, "step": 12109 }, { "epoch": 0.6534995413091577, "grad_norm": 0.9980305494096618, "learning_rate": 3.4130858049440717e-06, "loss": 0.3239, "step": 12110 }, { "epoch": 0.6535535049376721, "grad_norm": 1.008994558582359, "learning_rate": 3.412409941231505e-06, "loss": 0.4312, "step": 12111 }, { "epoch": 0.6536074685661863, "grad_norm": 1.2048666213483523, "learning_rate": 3.4117341375183875e-06, "loss": 0.6028, "step": 12112 }, { "epoch": 0.6536614321947007, "grad_norm": 1.3484625756463664, "learning_rate": 3.4110583938241393e-06, "loss": 0.5498, "step": 12113 }, { "epoch": 0.6537153958232151, "grad_norm": 0.9989149539752589, "learning_rate": 3.410382710168184e-06, "loss": 0.5203, "step": 12114 }, { "epoch": 0.6537693594517295, "grad_norm": 1.1562384366566094, "learning_rate": 3.40970708656994e-06, "loss": 0.5376, "step": 12115 }, { "epoch": 0.6538233230802439, "grad_norm": 1.1213423113443282, "learning_rate": 3.409031523048828e-06, "loss": 0.5057, "step": 12116 }, { "epoch": 0.6538772867087583, "grad_norm": 0.8861429552945734, "learning_rate": 3.4083560196242617e-06, "loss": 0.4784, "step": 12117 }, { "epoch": 0.6539312503372727, "grad_norm": 1.112429768513656, "learning_rate": 3.4076805763156573e-06, "loss": 0.491, "step": 12118 }, { "epoch": 0.653985213965787, "grad_norm": 1.1086948563787358, "learning_rate": 3.4070051931424284e-06, "loss": 0.4846, "step": 12119 }, { "epoch": 0.6540391775943014, "grad_norm": 1.1354289306587633, "learning_rate": 3.4063298701239832e-06, "loss": 0.395, "step": 12120 }, { "epoch": 0.6540931412228158, "grad_norm": 1.0378982492090187, "learning_rate": 3.4056546072797344e-06, "loss": 0.5287, "step": 12121 }, { "epoch": 0.6541471048513302, "grad_norm": 1.2266164045772887, "learning_rate": 3.404979404629087e-06, "loss": 0.5329, "step": 12122 }, { "epoch": 0.6542010684798446, "grad_norm": 1.0441612695699238, "learning_rate": 3.404304262191449e-06, "loss": 0.5705, "step": 12123 }, { "epoch": 0.654255032108359, "grad_norm": 0.9754754466286323, "learning_rate": 3.4036291799862237e-06, "loss": 0.3627, "step": 12124 }, { "epoch": 0.6543089957368734, "grad_norm": 1.121923839022091, "learning_rate": 3.402954158032814e-06, "loss": 0.3975, "step": 12125 }, { "epoch": 0.6543629593653877, "grad_norm": 0.9047502521131885, "learning_rate": 3.402279196350622e-06, "loss": 0.3991, "step": 12126 }, { "epoch": 0.6544169229939021, "grad_norm": 0.9075175389431006, "learning_rate": 3.401604294959044e-06, "loss": 0.5051, "step": 12127 }, { "epoch": 0.6544708866224165, "grad_norm": 0.9063448776144766, "learning_rate": 3.4009294538774786e-06, "loss": 0.3976, "step": 12128 }, { "epoch": 0.6545248502509309, "grad_norm": 0.8582429870616064, "learning_rate": 3.400254673125323e-06, "loss": 0.3812, "step": 12129 }, { "epoch": 0.6545788138794453, "grad_norm": 1.302243887851338, "learning_rate": 3.3995799527219687e-06, "loss": 0.6367, "step": 12130 }, { "epoch": 0.6546327775079597, "grad_norm": 0.9951293836612463, "learning_rate": 3.3989052926868094e-06, "loss": 0.4002, "step": 12131 }, { "epoch": 0.6546867411364741, "grad_norm": 0.8580841558076736, "learning_rate": 3.398230693039235e-06, "loss": 0.3496, "step": 12132 }, { "epoch": 0.6547407047649884, "grad_norm": 1.14243652280191, "learning_rate": 3.3975561537986356e-06, "loss": 0.6479, "step": 12133 }, { "epoch": 0.6547946683935028, "grad_norm": 0.8949416506701103, "learning_rate": 3.396881674984395e-06, "loss": 0.491, "step": 12134 }, { "epoch": 0.6548486320220172, "grad_norm": 0.85843642984806, "learning_rate": 3.3962072566159e-06, "loss": 0.4148, "step": 12135 }, { "epoch": 0.6549025956505315, "grad_norm": 1.0867720046662457, "learning_rate": 3.3955328987125348e-06, "loss": 0.4745, "step": 12136 }, { "epoch": 0.654956559279046, "grad_norm": 0.9093568248135352, "learning_rate": 3.394858601293679e-06, "loss": 0.444, "step": 12137 }, { "epoch": 0.6550105229075603, "grad_norm": 1.0751662244535751, "learning_rate": 3.3941843643787166e-06, "loss": 0.456, "step": 12138 }, { "epoch": 0.6550644865360746, "grad_norm": 1.0402423735756046, "learning_rate": 3.3935101879870225e-06, "loss": 0.459, "step": 12139 }, { "epoch": 0.655118450164589, "grad_norm": 1.0151058524208336, "learning_rate": 3.392836072137974e-06, "loss": 0.3637, "step": 12140 }, { "epoch": 0.6551724137931034, "grad_norm": 1.1234945874573554, "learning_rate": 3.392162016850945e-06, "loss": 0.6899, "step": 12141 }, { "epoch": 0.6552263774216178, "grad_norm": 1.0044285895397762, "learning_rate": 3.39148802214531e-06, "loss": 0.476, "step": 12142 }, { "epoch": 0.6552803410501322, "grad_norm": 1.0742968907249537, "learning_rate": 3.3908140880404395e-06, "loss": 0.6608, "step": 12143 }, { "epoch": 0.6553343046786466, "grad_norm": 1.1635184753210615, "learning_rate": 3.3901402145557035e-06, "loss": 0.5652, "step": 12144 }, { "epoch": 0.655388268307161, "grad_norm": 0.9494032212360588, "learning_rate": 3.3894664017104695e-06, "loss": 0.3351, "step": 12145 }, { "epoch": 0.6554422319356753, "grad_norm": 1.00114269483659, "learning_rate": 3.3887926495241054e-06, "loss": 0.4381, "step": 12146 }, { "epoch": 0.6554961955641897, "grad_norm": 1.042188379874871, "learning_rate": 3.388118958015972e-06, "loss": 0.4573, "step": 12147 }, { "epoch": 0.6555501591927041, "grad_norm": 1.0939901414315485, "learning_rate": 3.387445327205433e-06, "loss": 0.4892, "step": 12148 }, { "epoch": 0.6556041228212185, "grad_norm": 1.1528318508086728, "learning_rate": 3.386771757111851e-06, "loss": 0.5992, "step": 12149 }, { "epoch": 0.6556580864497329, "grad_norm": 1.0067458181955184, "learning_rate": 3.386098247754583e-06, "loss": 0.6315, "step": 12150 }, { "epoch": 0.6557120500782473, "grad_norm": 1.0938134982814334, "learning_rate": 3.385424799152988e-06, "loss": 0.5071, "step": 12151 }, { "epoch": 0.6557660137067617, "grad_norm": 1.00058059127814, "learning_rate": 3.384751411326421e-06, "loss": 0.4661, "step": 12152 }, { "epoch": 0.655819977335276, "grad_norm": 1.2303754868518313, "learning_rate": 3.384078084294237e-06, "loss": 0.4711, "step": 12153 }, { "epoch": 0.6558739409637904, "grad_norm": 1.1312480019703492, "learning_rate": 3.383404818075785e-06, "loss": 0.4892, "step": 12154 }, { "epoch": 0.6559279045923048, "grad_norm": 1.022243255742534, "learning_rate": 3.382731612690417e-06, "loss": 0.4483, "step": 12155 }, { "epoch": 0.6559818682208192, "grad_norm": 0.9637087863840178, "learning_rate": 3.3820584681574824e-06, "loss": 0.5963, "step": 12156 }, { "epoch": 0.6560358318493336, "grad_norm": 1.0522111684803257, "learning_rate": 3.3813853844963273e-06, "loss": 0.409, "step": 12157 }, { "epoch": 0.656089795477848, "grad_norm": 0.954550152796691, "learning_rate": 3.3807123617262972e-06, "loss": 0.4487, "step": 12158 }, { "epoch": 0.6561437591063624, "grad_norm": 0.9322011756917016, "learning_rate": 3.3800393998667346e-06, "loss": 0.4575, "step": 12159 }, { "epoch": 0.6561977227348766, "grad_norm": 1.022577014050822, "learning_rate": 3.3793664989369836e-06, "loss": 0.4251, "step": 12160 }, { "epoch": 0.656251686363391, "grad_norm": 1.185693590072727, "learning_rate": 3.3786936589563803e-06, "loss": 0.5736, "step": 12161 }, { "epoch": 0.6563056499919054, "grad_norm": 0.9442073239862351, "learning_rate": 3.378020879944265e-06, "loss": 0.4278, "step": 12162 }, { "epoch": 0.6563596136204198, "grad_norm": 0.7815244467994428, "learning_rate": 3.3773481619199743e-06, "loss": 0.3923, "step": 12163 }, { "epoch": 0.6564135772489342, "grad_norm": 0.9368062150422383, "learning_rate": 3.3766755049028417e-06, "loss": 0.3805, "step": 12164 }, { "epoch": 0.6564675408774486, "grad_norm": 1.1804718088695256, "learning_rate": 3.3760029089121993e-06, "loss": 0.6681, "step": 12165 }, { "epoch": 0.656521504505963, "grad_norm": 1.0801534754427602, "learning_rate": 3.3753303739673816e-06, "loss": 0.3612, "step": 12166 }, { "epoch": 0.6565754681344773, "grad_norm": 1.1841713812398418, "learning_rate": 3.374657900087716e-06, "loss": 0.6189, "step": 12167 }, { "epoch": 0.6566294317629917, "grad_norm": 0.9380881393035884, "learning_rate": 3.3739854872925285e-06, "loss": 0.4989, "step": 12168 }, { "epoch": 0.6566833953915061, "grad_norm": 0.9790180037542231, "learning_rate": 3.3733131356011468e-06, "loss": 0.5181, "step": 12169 }, { "epoch": 0.6567373590200205, "grad_norm": 0.9522124072034627, "learning_rate": 3.372640845032894e-06, "loss": 0.5104, "step": 12170 }, { "epoch": 0.6567913226485349, "grad_norm": 1.0645186726080271, "learning_rate": 3.371968615607093e-06, "loss": 0.4319, "step": 12171 }, { "epoch": 0.6568452862770493, "grad_norm": 1.037184924207552, "learning_rate": 3.3712964473430633e-06, "loss": 0.3866, "step": 12172 }, { "epoch": 0.6568992499055637, "grad_norm": 0.9373937148245929, "learning_rate": 3.3706243402601257e-06, "loss": 0.3651, "step": 12173 }, { "epoch": 0.656953213534078, "grad_norm": 1.0244755554822482, "learning_rate": 3.3699522943775957e-06, "loss": 0.5154, "step": 12174 }, { "epoch": 0.6570071771625924, "grad_norm": 1.1886502395125522, "learning_rate": 3.369280309714789e-06, "loss": 0.5904, "step": 12175 }, { "epoch": 0.6570611407911068, "grad_norm": 0.9286856637993067, "learning_rate": 3.3686083862910197e-06, "loss": 0.3797, "step": 12176 }, { "epoch": 0.6571151044196212, "grad_norm": 1.0879204370384175, "learning_rate": 3.3679365241255984e-06, "loss": 0.3942, "step": 12177 }, { "epoch": 0.6571690680481356, "grad_norm": 1.0388791497416898, "learning_rate": 3.3672647232378362e-06, "loss": 0.4734, "step": 12178 }, { "epoch": 0.65722303167665, "grad_norm": 0.8996147259832636, "learning_rate": 3.36659298364704e-06, "loss": 0.4159, "step": 12179 }, { "epoch": 0.6572769953051644, "grad_norm": 1.0553742792449772, "learning_rate": 3.365921305372519e-06, "loss": 0.4839, "step": 12180 }, { "epoch": 0.6573309589336787, "grad_norm": 0.9078863643115785, "learning_rate": 3.3652496884335752e-06, "loss": 0.3723, "step": 12181 }, { "epoch": 0.657384922562193, "grad_norm": 1.0884198503250355, "learning_rate": 3.3645781328495116e-06, "loss": 0.4603, "step": 12182 }, { "epoch": 0.6574388861907074, "grad_norm": 1.1130969762554492, "learning_rate": 3.36390663863963e-06, "loss": 0.5096, "step": 12183 }, { "epoch": 0.6574928498192218, "grad_norm": 0.9538956939587255, "learning_rate": 3.3632352058232304e-06, "loss": 0.4605, "step": 12184 }, { "epoch": 0.6575468134477362, "grad_norm": 1.048531128880802, "learning_rate": 3.36256383441961e-06, "loss": 0.5209, "step": 12185 }, { "epoch": 0.6576007770762506, "grad_norm": 1.1202353800658622, "learning_rate": 3.361892524448065e-06, "loss": 0.4167, "step": 12186 }, { "epoch": 0.657654740704765, "grad_norm": 0.8865106313273421, "learning_rate": 3.3612212759278876e-06, "loss": 0.3767, "step": 12187 }, { "epoch": 0.6577087043332793, "grad_norm": 1.0838164709522071, "learning_rate": 3.3605500888783756e-06, "loss": 0.5275, "step": 12188 }, { "epoch": 0.6577626679617937, "grad_norm": 0.948477124170272, "learning_rate": 3.359878963318813e-06, "loss": 0.5345, "step": 12189 }, { "epoch": 0.6578166315903081, "grad_norm": 1.02294667275426, "learning_rate": 3.3592078992684916e-06, "loss": 0.4662, "step": 12190 }, { "epoch": 0.6578705952188225, "grad_norm": 0.9452756299791263, "learning_rate": 3.3585368967466986e-06, "loss": 0.453, "step": 12191 }, { "epoch": 0.6579245588473369, "grad_norm": 0.8044540639299731, "learning_rate": 3.357865955772719e-06, "loss": 0.3359, "step": 12192 }, { "epoch": 0.6579785224758513, "grad_norm": 1.0882995784077858, "learning_rate": 3.357195076365836e-06, "loss": 0.5098, "step": 12193 }, { "epoch": 0.6580324861043657, "grad_norm": 0.782231330441835, "learning_rate": 3.3565242585453313e-06, "loss": 0.2558, "step": 12194 }, { "epoch": 0.65808644973288, "grad_norm": 1.2483132067618739, "learning_rate": 3.3558535023304873e-06, "loss": 0.6465, "step": 12195 }, { "epoch": 0.6581404133613944, "grad_norm": 0.9884210505857033, "learning_rate": 3.355182807740579e-06, "loss": 0.4795, "step": 12196 }, { "epoch": 0.6581943769899088, "grad_norm": 0.9737533426779393, "learning_rate": 3.3545121747948834e-06, "loss": 0.5507, "step": 12197 }, { "epoch": 0.6582483406184232, "grad_norm": 0.9812998041549411, "learning_rate": 3.353841603512676e-06, "loss": 0.4807, "step": 12198 }, { "epoch": 0.6583023042469376, "grad_norm": 0.7624453323717157, "learning_rate": 3.353171093913229e-06, "loss": 0.3031, "step": 12199 }, { "epoch": 0.658356267875452, "grad_norm": 1.0202084758022034, "learning_rate": 3.352500646015814e-06, "loss": 0.4007, "step": 12200 }, { "epoch": 0.6584102315039664, "grad_norm": 0.8602368584755781, "learning_rate": 3.3518302598397006e-06, "loss": 0.3394, "step": 12201 }, { "epoch": 0.6584641951324807, "grad_norm": 1.2541901143756347, "learning_rate": 3.3511599354041578e-06, "loss": 0.6662, "step": 12202 }, { "epoch": 0.6585181587609951, "grad_norm": 0.9702604009711312, "learning_rate": 3.3504896727284485e-06, "loss": 0.4823, "step": 12203 }, { "epoch": 0.6585721223895095, "grad_norm": 0.9427146167118085, "learning_rate": 3.3498194718318373e-06, "loss": 0.4446, "step": 12204 }, { "epoch": 0.6586260860180239, "grad_norm": 0.9716802761429558, "learning_rate": 3.3491493327335865e-06, "loss": 0.405, "step": 12205 }, { "epoch": 0.6586800496465383, "grad_norm": 1.1890106636612499, "learning_rate": 3.3484792554529577e-06, "loss": 0.573, "step": 12206 }, { "epoch": 0.6587340132750527, "grad_norm": 0.9226141506328585, "learning_rate": 3.3478092400092094e-06, "loss": 0.3758, "step": 12207 }, { "epoch": 0.6587879769035669, "grad_norm": 0.8356932889245835, "learning_rate": 3.3471392864216e-06, "loss": 0.3322, "step": 12208 }, { "epoch": 0.6588419405320813, "grad_norm": 0.9571008278539685, "learning_rate": 3.3464693947093806e-06, "loss": 0.3961, "step": 12209 }, { "epoch": 0.6588959041605957, "grad_norm": 0.9777187331600683, "learning_rate": 3.3457995648918074e-06, "loss": 0.4027, "step": 12210 }, { "epoch": 0.6589498677891101, "grad_norm": 0.9358816641031806, "learning_rate": 3.345129796988131e-06, "loss": 0.366, "step": 12211 }, { "epoch": 0.6590038314176245, "grad_norm": 0.7826089547171353, "learning_rate": 3.344460091017601e-06, "loss": 0.325, "step": 12212 }, { "epoch": 0.6590577950461389, "grad_norm": 1.1502360347298302, "learning_rate": 3.3437904469994666e-06, "loss": 0.4785, "step": 12213 }, { "epoch": 0.6591117586746533, "grad_norm": 1.173928151965994, "learning_rate": 3.3431208649529736e-06, "loss": 0.4297, "step": 12214 }, { "epoch": 0.6591657223031676, "grad_norm": 1.0587934037257585, "learning_rate": 3.3424513448973673e-06, "loss": 0.4156, "step": 12215 }, { "epoch": 0.659219685931682, "grad_norm": 1.0392485305965347, "learning_rate": 3.341781886851888e-06, "loss": 0.5544, "step": 12216 }, { "epoch": 0.6592736495601964, "grad_norm": 1.1981757940821114, "learning_rate": 3.3411124908357783e-06, "loss": 0.7293, "step": 12217 }, { "epoch": 0.6593276131887108, "grad_norm": 0.9546313280702993, "learning_rate": 3.340443156868277e-06, "loss": 0.4546, "step": 12218 }, { "epoch": 0.6593815768172252, "grad_norm": 0.949185195549188, "learning_rate": 3.339773884968621e-06, "loss": 0.3623, "step": 12219 }, { "epoch": 0.6594355404457396, "grad_norm": 1.0788401839087185, "learning_rate": 3.339104675156047e-06, "loss": 0.5909, "step": 12220 }, { "epoch": 0.659489504074254, "grad_norm": 1.1581845294250535, "learning_rate": 3.3384355274497875e-06, "loss": 0.473, "step": 12221 }, { "epoch": 0.6595434677027683, "grad_norm": 0.8208718733004026, "learning_rate": 3.3377664418690746e-06, "loss": 0.2774, "step": 12222 }, { "epoch": 0.6595974313312827, "grad_norm": 0.8504207927978109, "learning_rate": 3.33709741843314e-06, "loss": 0.4262, "step": 12223 }, { "epoch": 0.6596513949597971, "grad_norm": 1.2549731288314645, "learning_rate": 3.3364284571612125e-06, "loss": 0.4444, "step": 12224 }, { "epoch": 0.6597053585883115, "grad_norm": 1.240237445929437, "learning_rate": 3.335759558072515e-06, "loss": 0.6003, "step": 12225 }, { "epoch": 0.6597593222168259, "grad_norm": 1.188913708534342, "learning_rate": 3.335090721186275e-06, "loss": 0.4332, "step": 12226 }, { "epoch": 0.6598132858453403, "grad_norm": 1.0778273829687348, "learning_rate": 3.3344219465217155e-06, "loss": 0.4514, "step": 12227 }, { "epoch": 0.6598672494738547, "grad_norm": 0.7479582206195925, "learning_rate": 3.3337532340980573e-06, "loss": 0.3055, "step": 12228 }, { "epoch": 0.659921213102369, "grad_norm": 1.0737175098820917, "learning_rate": 3.3330845839345196e-06, "loss": 0.4033, "step": 12229 }, { "epoch": 0.6599751767308834, "grad_norm": 1.0598536169339137, "learning_rate": 3.3324159960503198e-06, "loss": 0.4562, "step": 12230 }, { "epoch": 0.6600291403593977, "grad_norm": 0.8780672378325353, "learning_rate": 3.3317474704646753e-06, "loss": 0.3296, "step": 12231 }, { "epoch": 0.6600831039879121, "grad_norm": 0.9867162385675662, "learning_rate": 3.331079007196799e-06, "loss": 0.5471, "step": 12232 }, { "epoch": 0.6601370676164265, "grad_norm": 1.0508204795855765, "learning_rate": 3.330410606265904e-06, "loss": 0.4962, "step": 12233 }, { "epoch": 0.6601910312449409, "grad_norm": 1.0612917671591466, "learning_rate": 3.3297422676912007e-06, "loss": 0.5287, "step": 12234 }, { "epoch": 0.6602449948734553, "grad_norm": 0.8008331881374267, "learning_rate": 3.329073991491897e-06, "loss": 0.3163, "step": 12235 }, { "epoch": 0.6602989585019696, "grad_norm": 1.0390195876166075, "learning_rate": 3.328405777687202e-06, "loss": 0.5613, "step": 12236 }, { "epoch": 0.660352922130484, "grad_norm": 0.799754072595658, "learning_rate": 3.3277376262963174e-06, "loss": 0.3514, "step": 12237 }, { "epoch": 0.6604068857589984, "grad_norm": 1.4848576046570632, "learning_rate": 3.3270695373384488e-06, "loss": 0.5934, "step": 12238 }, { "epoch": 0.6604608493875128, "grad_norm": 1.0513071092814519, "learning_rate": 3.3264015108327962e-06, "loss": 0.6486, "step": 12239 }, { "epoch": 0.6605148130160272, "grad_norm": 1.2070462145444176, "learning_rate": 3.325733546798562e-06, "loss": 0.4518, "step": 12240 }, { "epoch": 0.6605687766445416, "grad_norm": 0.9529697645243869, "learning_rate": 3.325065645254941e-06, "loss": 0.4636, "step": 12241 }, { "epoch": 0.660622740273056, "grad_norm": 1.0020539449320847, "learning_rate": 3.3243978062211325e-06, "loss": 0.4623, "step": 12242 }, { "epoch": 0.6606767039015703, "grad_norm": 1.1228428477912078, "learning_rate": 3.32373002971633e-06, "loss": 0.5096, "step": 12243 }, { "epoch": 0.6607306675300847, "grad_norm": 1.1918665723736026, "learning_rate": 3.323062315759724e-06, "loss": 0.6541, "step": 12244 }, { "epoch": 0.6607846311585991, "grad_norm": 1.1690607812554088, "learning_rate": 3.322394664370507e-06, "loss": 0.4254, "step": 12245 }, { "epoch": 0.6608385947871135, "grad_norm": 1.009091810995177, "learning_rate": 3.321727075567867e-06, "loss": 0.4312, "step": 12246 }, { "epoch": 0.6608925584156279, "grad_norm": 0.8977342482618287, "learning_rate": 3.3210595493709928e-06, "loss": 0.3218, "step": 12247 }, { "epoch": 0.6609465220441423, "grad_norm": 0.8755678387028514, "learning_rate": 3.320392085799068e-06, "loss": 0.3261, "step": 12248 }, { "epoch": 0.6610004856726567, "grad_norm": 0.915321156602366, "learning_rate": 3.319724684871277e-06, "loss": 0.4698, "step": 12249 }, { "epoch": 0.661054449301171, "grad_norm": 1.0091092637069579, "learning_rate": 3.3190573466068044e-06, "loss": 0.3502, "step": 12250 }, { "epoch": 0.6611084129296854, "grad_norm": 1.0408562249313136, "learning_rate": 3.318390071024824e-06, "loss": 0.5303, "step": 12251 }, { "epoch": 0.6611623765581998, "grad_norm": 1.1440654510925317, "learning_rate": 3.3177228581445175e-06, "loss": 0.4794, "step": 12252 }, { "epoch": 0.6612163401867142, "grad_norm": 1.0742478070175858, "learning_rate": 3.317055707985062e-06, "loss": 0.5111, "step": 12253 }, { "epoch": 0.6612703038152286, "grad_norm": 1.1070637108901493, "learning_rate": 3.3163886205656307e-06, "loss": 0.4847, "step": 12254 }, { "epoch": 0.661324267443743, "grad_norm": 1.0902545059334203, "learning_rate": 3.315721595905397e-06, "loss": 0.5897, "step": 12255 }, { "epoch": 0.6613782310722573, "grad_norm": 1.1273818527927026, "learning_rate": 3.3150546340235316e-06, "loss": 0.5434, "step": 12256 }, { "epoch": 0.6614321947007716, "grad_norm": 0.9067226109925057, "learning_rate": 3.3143877349392055e-06, "loss": 0.4589, "step": 12257 }, { "epoch": 0.661486158329286, "grad_norm": 1.140436368949196, "learning_rate": 3.3137208986715823e-06, "loss": 0.4286, "step": 12258 }, { "epoch": 0.6615401219578004, "grad_norm": 1.203566615149496, "learning_rate": 3.3130541252398298e-06, "loss": 0.5266, "step": 12259 }, { "epoch": 0.6615940855863148, "grad_norm": 0.9657645858743545, "learning_rate": 3.3123874146631117e-06, "loss": 0.4039, "step": 12260 }, { "epoch": 0.6616480492148292, "grad_norm": 0.9895307653846924, "learning_rate": 3.311720766960589e-06, "loss": 0.4219, "step": 12261 }, { "epoch": 0.6617020128433436, "grad_norm": 0.9627087101560815, "learning_rate": 3.311054182151423e-06, "loss": 0.5045, "step": 12262 }, { "epoch": 0.661755976471858, "grad_norm": 1.0996241050073883, "learning_rate": 3.310387660254772e-06, "loss": 0.3977, "step": 12263 }, { "epoch": 0.6618099401003723, "grad_norm": 1.0173905744812322, "learning_rate": 3.309721201289794e-06, "loss": 0.4094, "step": 12264 }, { "epoch": 0.6618639037288867, "grad_norm": 1.0867268094893792, "learning_rate": 3.309054805275639e-06, "loss": 0.4563, "step": 12265 }, { "epoch": 0.6619178673574011, "grad_norm": 0.9975244524229353, "learning_rate": 3.3083884722314624e-06, "loss": 0.4086, "step": 12266 }, { "epoch": 0.6619718309859155, "grad_norm": 1.1802889749439103, "learning_rate": 3.307722202176417e-06, "loss": 0.5342, "step": 12267 }, { "epoch": 0.6620257946144299, "grad_norm": 1.0131807846438936, "learning_rate": 3.30705599512965e-06, "loss": 0.4973, "step": 12268 }, { "epoch": 0.6620797582429443, "grad_norm": 1.0110332500984178, "learning_rate": 3.30638985111031e-06, "loss": 0.4427, "step": 12269 }, { "epoch": 0.6621337218714587, "grad_norm": 1.0122444327216822, "learning_rate": 3.305723770137541e-06, "loss": 0.5084, "step": 12270 }, { "epoch": 0.662187685499973, "grad_norm": 0.9487475464827033, "learning_rate": 3.3050577522304903e-06, "loss": 0.4468, "step": 12271 }, { "epoch": 0.6622416491284874, "grad_norm": 1.2752246457949856, "learning_rate": 3.304391797408296e-06, "loss": 0.6757, "step": 12272 }, { "epoch": 0.6622956127570018, "grad_norm": 1.1066168130289051, "learning_rate": 3.3037259056900994e-06, "loss": 0.4638, "step": 12273 }, { "epoch": 0.6623495763855162, "grad_norm": 1.197230076595792, "learning_rate": 3.3030600770950406e-06, "loss": 0.5658, "step": 12274 }, { "epoch": 0.6624035400140306, "grad_norm": 0.8226303821835945, "learning_rate": 3.302394311642254e-06, "loss": 0.3699, "step": 12275 }, { "epoch": 0.662457503642545, "grad_norm": 0.9290210444676861, "learning_rate": 3.301728609350876e-06, "loss": 0.3833, "step": 12276 }, { "epoch": 0.6625114672710593, "grad_norm": 1.0479046055117764, "learning_rate": 3.3010629702400387e-06, "loss": 0.51, "step": 12277 }, { "epoch": 0.6625654308995736, "grad_norm": 1.0536524189436811, "learning_rate": 3.3003973943288735e-06, "loss": 0.428, "step": 12278 }, { "epoch": 0.662619394528088, "grad_norm": 0.8761353433537651, "learning_rate": 3.29973188163651e-06, "loss": 0.3461, "step": 12279 }, { "epoch": 0.6626733581566024, "grad_norm": 1.13687573786356, "learning_rate": 3.2990664321820753e-06, "loss": 0.4621, "step": 12280 }, { "epoch": 0.6627273217851168, "grad_norm": 1.0900065377158878, "learning_rate": 3.2984010459846948e-06, "loss": 0.6846, "step": 12281 }, { "epoch": 0.6627812854136312, "grad_norm": 1.2131994482478845, "learning_rate": 3.297735723063493e-06, "loss": 0.4263, "step": 12282 }, { "epoch": 0.6628352490421456, "grad_norm": 1.1246783217029643, "learning_rate": 3.2970704634375915e-06, "loss": 0.504, "step": 12283 }, { "epoch": 0.6628892126706599, "grad_norm": 1.0641911186818613, "learning_rate": 3.296405267126114e-06, "loss": 0.456, "step": 12284 }, { "epoch": 0.6629431762991743, "grad_norm": 0.940960316702678, "learning_rate": 3.295740134148172e-06, "loss": 0.4507, "step": 12285 }, { "epoch": 0.6629971399276887, "grad_norm": 0.9832344728519783, "learning_rate": 3.2950750645228868e-06, "loss": 0.4535, "step": 12286 }, { "epoch": 0.6630511035562031, "grad_norm": 1.1385669144373034, "learning_rate": 3.2944100582693717e-06, "loss": 0.5047, "step": 12287 }, { "epoch": 0.6631050671847175, "grad_norm": 1.0054123034935745, "learning_rate": 3.2937451154067403e-06, "loss": 0.4942, "step": 12288 }, { "epoch": 0.6631590308132319, "grad_norm": 0.8871940820885871, "learning_rate": 3.293080235954104e-06, "loss": 0.3786, "step": 12289 }, { "epoch": 0.6632129944417463, "grad_norm": 1.2173029324688913, "learning_rate": 3.292415419930571e-06, "loss": 0.367, "step": 12290 }, { "epoch": 0.6632669580702606, "grad_norm": 1.0567106506106088, "learning_rate": 3.2917506673552514e-06, "loss": 0.4384, "step": 12291 }, { "epoch": 0.663320921698775, "grad_norm": 1.2591132056098104, "learning_rate": 3.291085978247247e-06, "loss": 0.6048, "step": 12292 }, { "epoch": 0.6633748853272894, "grad_norm": 0.9730099870654649, "learning_rate": 3.290421352625664e-06, "loss": 0.6353, "step": 12293 }, { "epoch": 0.6634288489558038, "grad_norm": 0.8050400126153652, "learning_rate": 3.289756790509604e-06, "loss": 0.3587, "step": 12294 }, { "epoch": 0.6634828125843182, "grad_norm": 1.1382932967756263, "learning_rate": 3.289092291918167e-06, "loss": 0.5188, "step": 12295 }, { "epoch": 0.6635367762128326, "grad_norm": 1.3276462869402965, "learning_rate": 3.288427856870451e-06, "loss": 0.789, "step": 12296 }, { "epoch": 0.663590739841347, "grad_norm": 1.2317524044319534, "learning_rate": 3.2877634853855534e-06, "loss": 0.524, "step": 12297 }, { "epoch": 0.6636447034698613, "grad_norm": 1.1769963930182263, "learning_rate": 3.28709917748257e-06, "loss": 0.4946, "step": 12298 }, { "epoch": 0.6636986670983757, "grad_norm": 1.0155236338182156, "learning_rate": 3.28643493318059e-06, "loss": 0.5435, "step": 12299 }, { "epoch": 0.6637526307268901, "grad_norm": 0.8761272135918134, "learning_rate": 3.285770752498708e-06, "loss": 0.3819, "step": 12300 }, { "epoch": 0.6638065943554045, "grad_norm": 0.9450125650388921, "learning_rate": 3.2851066354560113e-06, "loss": 0.4806, "step": 12301 }, { "epoch": 0.6638605579839189, "grad_norm": 0.886842047027112, "learning_rate": 3.2844425820715874e-06, "loss": 0.3645, "step": 12302 }, { "epoch": 0.6639145216124333, "grad_norm": 0.9534722180372669, "learning_rate": 3.283778592364523e-06, "loss": 0.4082, "step": 12303 }, { "epoch": 0.6639684852409476, "grad_norm": 1.0419762164358175, "learning_rate": 3.2831146663539005e-06, "loss": 0.4179, "step": 12304 }, { "epoch": 0.6640224488694619, "grad_norm": 1.0995510255803589, "learning_rate": 3.282450804058804e-06, "loss": 0.4446, "step": 12305 }, { "epoch": 0.6640764124979763, "grad_norm": 1.1564253375772022, "learning_rate": 3.2817870054983127e-06, "loss": 0.6377, "step": 12306 }, { "epoch": 0.6641303761264907, "grad_norm": 1.0337355511529431, "learning_rate": 3.2811232706915018e-06, "loss": 0.5734, "step": 12307 }, { "epoch": 0.6641843397550051, "grad_norm": 0.9636586743795145, "learning_rate": 3.2804595996574507e-06, "loss": 0.3831, "step": 12308 }, { "epoch": 0.6642383033835195, "grad_norm": 1.213459088576215, "learning_rate": 3.2797959924152335e-06, "loss": 0.614, "step": 12309 }, { "epoch": 0.6642922670120339, "grad_norm": 0.9499648162673885, "learning_rate": 3.279132448983923e-06, "loss": 0.6809, "step": 12310 }, { "epoch": 0.6643462306405483, "grad_norm": 1.0484144984404247, "learning_rate": 3.27846896938259e-06, "loss": 0.4262, "step": 12311 }, { "epoch": 0.6644001942690626, "grad_norm": 0.9221714612338353, "learning_rate": 3.2778055536303042e-06, "loss": 0.2954, "step": 12312 }, { "epoch": 0.664454157897577, "grad_norm": 0.996160777510321, "learning_rate": 3.277142201746132e-06, "loss": 0.4754, "step": 12313 }, { "epoch": 0.6645081215260914, "grad_norm": 1.0575855839198571, "learning_rate": 3.2764789137491375e-06, "loss": 0.4332, "step": 12314 }, { "epoch": 0.6645620851546058, "grad_norm": 0.9931279686967059, "learning_rate": 3.2758156896583865e-06, "loss": 0.4825, "step": 12315 }, { "epoch": 0.6646160487831202, "grad_norm": 0.9875673519489118, "learning_rate": 3.2751525294929398e-06, "loss": 0.5105, "step": 12316 }, { "epoch": 0.6646700124116346, "grad_norm": 0.8880909339402102, "learning_rate": 3.2744894332718576e-06, "loss": 0.3762, "step": 12317 }, { "epoch": 0.664723976040149, "grad_norm": 0.9615724606234294, "learning_rate": 3.2738264010141974e-06, "loss": 0.5275, "step": 12318 }, { "epoch": 0.6647779396686633, "grad_norm": 1.1590976449051928, "learning_rate": 3.2731634327390182e-06, "loss": 0.5842, "step": 12319 }, { "epoch": 0.6648319032971777, "grad_norm": 0.9555928918133184, "learning_rate": 3.27250052846537e-06, "loss": 0.366, "step": 12320 }, { "epoch": 0.6648858669256921, "grad_norm": 0.9934289291026825, "learning_rate": 3.271837688212307e-06, "loss": 0.668, "step": 12321 }, { "epoch": 0.6649398305542065, "grad_norm": 1.2568837232677808, "learning_rate": 3.2711749119988812e-06, "loss": 0.4813, "step": 12322 }, { "epoch": 0.6649937941827209, "grad_norm": 1.1192395104947572, "learning_rate": 3.2705121998441384e-06, "loss": 0.4267, "step": 12323 }, { "epoch": 0.6650477578112353, "grad_norm": 1.1633132361437082, "learning_rate": 3.2698495517671297e-06, "loss": 0.5635, "step": 12324 }, { "epoch": 0.6651017214397497, "grad_norm": 0.8846721829242467, "learning_rate": 3.2691869677868977e-06, "loss": 0.4594, "step": 12325 }, { "epoch": 0.665155685068264, "grad_norm": 0.9198175363408206, "learning_rate": 3.2685244479224866e-06, "loss": 0.411, "step": 12326 }, { "epoch": 0.6652096486967783, "grad_norm": 0.924933117782707, "learning_rate": 3.267861992192937e-06, "loss": 0.3775, "step": 12327 }, { "epoch": 0.6652636123252927, "grad_norm": 0.7929835001724763, "learning_rate": 3.267199600617289e-06, "loss": 0.3076, "step": 12328 }, { "epoch": 0.6653175759538071, "grad_norm": 1.0058604091851406, "learning_rate": 3.266537273214582e-06, "loss": 0.5648, "step": 12329 }, { "epoch": 0.6653715395823215, "grad_norm": 0.8256984226627744, "learning_rate": 3.2658750100038507e-06, "loss": 0.3371, "step": 12330 }, { "epoch": 0.6654255032108359, "grad_norm": 1.2823918901619331, "learning_rate": 3.2652128110041277e-06, "loss": 0.6623, "step": 12331 }, { "epoch": 0.6654794668393503, "grad_norm": 1.01794539808815, "learning_rate": 3.264550676234447e-06, "loss": 0.5861, "step": 12332 }, { "epoch": 0.6655334304678646, "grad_norm": 1.0043160156983775, "learning_rate": 3.263888605713838e-06, "loss": 0.4525, "step": 12333 }, { "epoch": 0.665587394096379, "grad_norm": 1.26761373447209, "learning_rate": 3.2632265994613305e-06, "loss": 0.5509, "step": 12334 }, { "epoch": 0.6656413577248934, "grad_norm": 1.1971648009554738, "learning_rate": 3.262564657495951e-06, "loss": 0.594, "step": 12335 }, { "epoch": 0.6656953213534078, "grad_norm": 1.0144595246822048, "learning_rate": 3.261902779836723e-06, "loss": 0.4115, "step": 12336 }, { "epoch": 0.6657492849819222, "grad_norm": 1.0223204975749298, "learning_rate": 3.2612409665026712e-06, "loss": 0.4523, "step": 12337 }, { "epoch": 0.6658032486104366, "grad_norm": 0.895107628570298, "learning_rate": 3.260579217512815e-06, "loss": 0.3806, "step": 12338 }, { "epoch": 0.665857212238951, "grad_norm": 1.1548502267808474, "learning_rate": 3.2599175328861764e-06, "loss": 0.644, "step": 12339 }, { "epoch": 0.6659111758674653, "grad_norm": 0.9730964702599441, "learning_rate": 3.2592559126417723e-06, "loss": 0.4599, "step": 12340 }, { "epoch": 0.6659651394959797, "grad_norm": 0.9591535512049275, "learning_rate": 3.2585943567986156e-06, "loss": 0.4469, "step": 12341 }, { "epoch": 0.6660191031244941, "grad_norm": 0.7377201917412024, "learning_rate": 3.257932865375722e-06, "loss": 0.3553, "step": 12342 }, { "epoch": 0.6660730667530085, "grad_norm": 0.8051966424979499, "learning_rate": 3.2572714383921035e-06, "loss": 0.3587, "step": 12343 }, { "epoch": 0.6661270303815229, "grad_norm": 0.7890411760952944, "learning_rate": 3.2566100758667698e-06, "loss": 0.3621, "step": 12344 }, { "epoch": 0.6661809940100373, "grad_norm": 1.1731285025545173, "learning_rate": 3.255948777818729e-06, "loss": 0.5876, "step": 12345 }, { "epoch": 0.6662349576385516, "grad_norm": 0.9205837391813163, "learning_rate": 3.2552875442669896e-06, "loss": 0.4056, "step": 12346 }, { "epoch": 0.666288921267066, "grad_norm": 1.1253506943727885, "learning_rate": 3.254626375230553e-06, "loss": 0.4969, "step": 12347 }, { "epoch": 0.6663428848955804, "grad_norm": 0.9814810509970907, "learning_rate": 3.253965270728423e-06, "loss": 0.412, "step": 12348 }, { "epoch": 0.6663968485240948, "grad_norm": 0.8854114314641077, "learning_rate": 3.2533042307796004e-06, "loss": 0.3381, "step": 12349 }, { "epoch": 0.6664508121526092, "grad_norm": 1.1745992740023736, "learning_rate": 3.252643255403084e-06, "loss": 0.5069, "step": 12350 }, { "epoch": 0.6665047757811235, "grad_norm": 1.078594956025498, "learning_rate": 3.251982344617872e-06, "loss": 0.4386, "step": 12351 }, { "epoch": 0.666558739409638, "grad_norm": 1.2407835714824633, "learning_rate": 3.251321498442959e-06, "loss": 0.6154, "step": 12352 }, { "epoch": 0.6666127030381522, "grad_norm": 1.0439835417324956, "learning_rate": 3.2506607168973385e-06, "loss": 0.5407, "step": 12353 }, { "epoch": 0.6666666666666666, "grad_norm": 1.1629096088472213, "learning_rate": 3.2500000000000015e-06, "loss": 0.623, "step": 12354 }, { "epoch": 0.666720630295181, "grad_norm": 0.936880660740031, "learning_rate": 3.249339347769937e-06, "loss": 0.5012, "step": 12355 }, { "epoch": 0.6667745939236954, "grad_norm": 0.8823209558608377, "learning_rate": 3.248678760226135e-06, "loss": 0.4077, "step": 12356 }, { "epoch": 0.6668285575522098, "grad_norm": 1.037434378890483, "learning_rate": 3.248018237387579e-06, "loss": 0.4423, "step": 12357 }, { "epoch": 0.6668825211807242, "grad_norm": 0.9469526083012364, "learning_rate": 3.247357779273255e-06, "loss": 0.4474, "step": 12358 }, { "epoch": 0.6669364848092386, "grad_norm": 0.852142360381308, "learning_rate": 3.2466973859021446e-06, "loss": 0.3878, "step": 12359 }, { "epoch": 0.6669904484377529, "grad_norm": 1.1153423285679775, "learning_rate": 3.2460370572932292e-06, "loss": 0.499, "step": 12360 }, { "epoch": 0.6670444120662673, "grad_norm": 0.9520157046969236, "learning_rate": 3.245376793465485e-06, "loss": 0.5672, "step": 12361 }, { "epoch": 0.6670983756947817, "grad_norm": 1.173193303402801, "learning_rate": 3.24471659443789e-06, "loss": 0.5541, "step": 12362 }, { "epoch": 0.6671523393232961, "grad_norm": 1.1141271844721001, "learning_rate": 3.2440564602294185e-06, "loss": 0.3981, "step": 12363 }, { "epoch": 0.6672063029518105, "grad_norm": 0.7936050093678741, "learning_rate": 3.243396390859044e-06, "loss": 0.3551, "step": 12364 }, { "epoch": 0.6672602665803249, "grad_norm": 1.003281988979681, "learning_rate": 3.242736386345737e-06, "loss": 0.4323, "step": 12365 }, { "epoch": 0.6673142302088393, "grad_norm": 0.9956373957679652, "learning_rate": 3.2420764467084674e-06, "loss": 0.318, "step": 12366 }, { "epoch": 0.6673681938373536, "grad_norm": 0.9899000592190348, "learning_rate": 3.241416571966204e-06, "loss": 0.5113, "step": 12367 }, { "epoch": 0.667422157465868, "grad_norm": 0.932198131396753, "learning_rate": 3.240756762137909e-06, "loss": 0.3277, "step": 12368 }, { "epoch": 0.6674761210943824, "grad_norm": 1.1901650500322785, "learning_rate": 3.2400970172425472e-06, "loss": 0.5311, "step": 12369 }, { "epoch": 0.6675300847228968, "grad_norm": 1.0037583520224604, "learning_rate": 3.2394373372990806e-06, "loss": 0.4447, "step": 12370 }, { "epoch": 0.6675840483514112, "grad_norm": 0.8120452855265773, "learning_rate": 3.238777722326468e-06, "loss": 0.2604, "step": 12371 }, { "epoch": 0.6676380119799256, "grad_norm": 1.1063214074375392, "learning_rate": 3.2381181723436707e-06, "loss": 0.6125, "step": 12372 }, { "epoch": 0.66769197560844, "grad_norm": 1.043449808974125, "learning_rate": 3.2374586873696406e-06, "loss": 0.5612, "step": 12373 }, { "epoch": 0.6677459392369542, "grad_norm": 0.9424853978419362, "learning_rate": 3.236799267423336e-06, "loss": 0.4018, "step": 12374 }, { "epoch": 0.6677999028654686, "grad_norm": 1.3523067849054293, "learning_rate": 3.2361399125237062e-06, "loss": 0.5797, "step": 12375 }, { "epoch": 0.667853866493983, "grad_norm": 1.116793307043734, "learning_rate": 3.235480622689703e-06, "loss": 0.5967, "step": 12376 }, { "epoch": 0.6679078301224974, "grad_norm": 1.0768017663188467, "learning_rate": 3.2348213979402743e-06, "loss": 0.4332, "step": 12377 }, { "epoch": 0.6679617937510118, "grad_norm": 1.0053062289491403, "learning_rate": 3.234162238294367e-06, "loss": 0.4378, "step": 12378 }, { "epoch": 0.6680157573795262, "grad_norm": 0.9628041666990697, "learning_rate": 3.2335031437709267e-06, "loss": 0.5465, "step": 12379 }, { "epoch": 0.6680697210080406, "grad_norm": 0.8611795754559531, "learning_rate": 3.2328441143888965e-06, "loss": 0.329, "step": 12380 }, { "epoch": 0.6681236846365549, "grad_norm": 1.0501431251896747, "learning_rate": 3.2321851501672173e-06, "loss": 0.466, "step": 12381 }, { "epoch": 0.6681776482650693, "grad_norm": 0.9775546361625078, "learning_rate": 3.231526251124828e-06, "loss": 0.5297, "step": 12382 }, { "epoch": 0.6682316118935837, "grad_norm": 1.0058615389391146, "learning_rate": 3.2308674172806666e-06, "loss": 0.4203, "step": 12383 }, { "epoch": 0.6682855755220981, "grad_norm": 1.357306495521644, "learning_rate": 3.230208648653668e-06, "loss": 0.6187, "step": 12384 }, { "epoch": 0.6683395391506125, "grad_norm": 0.904370164125031, "learning_rate": 3.2295499452627665e-06, "loss": 0.3995, "step": 12385 }, { "epoch": 0.6683935027791269, "grad_norm": 0.7658731597397775, "learning_rate": 3.2288913071268945e-06, "loss": 0.3277, "step": 12386 }, { "epoch": 0.6684474664076413, "grad_norm": 1.0038712937876297, "learning_rate": 3.22823273426498e-06, "loss": 0.4448, "step": 12387 }, { "epoch": 0.6685014300361556, "grad_norm": 1.098301133661135, "learning_rate": 3.227574226695954e-06, "loss": 0.4787, "step": 12388 }, { "epoch": 0.66855539366467, "grad_norm": 1.0980722861209757, "learning_rate": 3.2269157844387394e-06, "loss": 0.4573, "step": 12389 }, { "epoch": 0.6686093572931844, "grad_norm": 0.8850143425657481, "learning_rate": 3.226257407512262e-06, "loss": 0.3725, "step": 12390 }, { "epoch": 0.6686633209216988, "grad_norm": 0.9754567345400976, "learning_rate": 3.2255990959354444e-06, "loss": 0.6684, "step": 12391 }, { "epoch": 0.6687172845502132, "grad_norm": 0.9759492136339822, "learning_rate": 3.2249408497272067e-06, "loss": 0.3893, "step": 12392 }, { "epoch": 0.6687712481787276, "grad_norm": 1.1703930849686768, "learning_rate": 3.2242826689064676e-06, "loss": 0.5033, "step": 12393 }, { "epoch": 0.668825211807242, "grad_norm": 1.1181579206684296, "learning_rate": 3.223624553492144e-06, "loss": 0.4967, "step": 12394 }, { "epoch": 0.6688791754357563, "grad_norm": 1.0791806859611004, "learning_rate": 3.2229665035031533e-06, "loss": 0.5056, "step": 12395 }, { "epoch": 0.6689331390642707, "grad_norm": 0.9122266460404179, "learning_rate": 3.2223085189584036e-06, "loss": 0.3611, "step": 12396 }, { "epoch": 0.668987102692785, "grad_norm": 0.873889389224243, "learning_rate": 3.2216505998768083e-06, "loss": 0.2659, "step": 12397 }, { "epoch": 0.6690410663212994, "grad_norm": 0.7465301798098738, "learning_rate": 3.2209927462772774e-06, "loss": 0.3581, "step": 12398 }, { "epoch": 0.6690950299498138, "grad_norm": 1.072929820076802, "learning_rate": 3.2203349581787175e-06, "loss": 0.3616, "step": 12399 }, { "epoch": 0.6691489935783282, "grad_norm": 1.2073481417732908, "learning_rate": 3.219677235600034e-06, "loss": 0.5546, "step": 12400 }, { "epoch": 0.6692029572068426, "grad_norm": 1.0435399179487563, "learning_rate": 3.219019578560132e-06, "loss": 0.3519, "step": 12401 }, { "epoch": 0.6692569208353569, "grad_norm": 1.0598047841911067, "learning_rate": 3.2183619870779125e-06, "loss": 0.6494, "step": 12402 }, { "epoch": 0.6693108844638713, "grad_norm": 0.7997497311296136, "learning_rate": 3.217704461172274e-06, "loss": 0.3476, "step": 12403 }, { "epoch": 0.6693648480923857, "grad_norm": 1.0992737888719173, "learning_rate": 3.2170470008621145e-06, "loss": 0.5185, "step": 12404 }, { "epoch": 0.6694188117209001, "grad_norm": 0.9501856939987475, "learning_rate": 3.216389606166332e-06, "loss": 0.3849, "step": 12405 }, { "epoch": 0.6694727753494145, "grad_norm": 0.7045781394764885, "learning_rate": 3.215732277103819e-06, "loss": 0.252, "step": 12406 }, { "epoch": 0.6695267389779289, "grad_norm": 1.0382597754865721, "learning_rate": 3.2150750136934687e-06, "loss": 0.3968, "step": 12407 }, { "epoch": 0.6695807026064433, "grad_norm": 1.038354696335282, "learning_rate": 3.2144178159541716e-06, "loss": 0.5212, "step": 12408 }, { "epoch": 0.6696346662349576, "grad_norm": 1.0738892607190234, "learning_rate": 3.2137606839048174e-06, "loss": 0.5239, "step": 12409 }, { "epoch": 0.669688629863472, "grad_norm": 1.2663652441750548, "learning_rate": 3.2131036175642884e-06, "loss": 0.5935, "step": 12410 }, { "epoch": 0.6697425934919864, "grad_norm": 0.9570454757476815, "learning_rate": 3.2124466169514734e-06, "loss": 0.4188, "step": 12411 }, { "epoch": 0.6697965571205008, "grad_norm": 1.0159692430604688, "learning_rate": 3.2117896820852534e-06, "loss": 0.4211, "step": 12412 }, { "epoch": 0.6698505207490152, "grad_norm": 1.0113274848904037, "learning_rate": 3.2111328129845103e-06, "loss": 0.3937, "step": 12413 }, { "epoch": 0.6699044843775296, "grad_norm": 1.055042077771013, "learning_rate": 3.210476009668123e-06, "loss": 0.5291, "step": 12414 }, { "epoch": 0.6699584480060439, "grad_norm": 1.1283024921285176, "learning_rate": 3.2098192721549704e-06, "loss": 0.5489, "step": 12415 }, { "epoch": 0.6700124116345583, "grad_norm": 1.2015417853271846, "learning_rate": 3.2091626004639237e-06, "loss": 0.5348, "step": 12416 }, { "epoch": 0.6700663752630727, "grad_norm": 1.11953106961071, "learning_rate": 3.208505994613859e-06, "loss": 0.5668, "step": 12417 }, { "epoch": 0.6701203388915871, "grad_norm": 0.9880332969908763, "learning_rate": 3.207849454623647e-06, "loss": 0.4615, "step": 12418 }, { "epoch": 0.6701743025201015, "grad_norm": 0.8284850153408269, "learning_rate": 3.207192980512159e-06, "loss": 0.3403, "step": 12419 }, { "epoch": 0.6702282661486159, "grad_norm": 1.0439980346025834, "learning_rate": 3.20653657229826e-06, "loss": 0.5373, "step": 12420 }, { "epoch": 0.6702822297771303, "grad_norm": 1.3162178922125916, "learning_rate": 3.205880230000818e-06, "loss": 0.6363, "step": 12421 }, { "epoch": 0.6703361934056445, "grad_norm": 0.995620564904958, "learning_rate": 3.2052239536386974e-06, "loss": 0.4452, "step": 12422 }, { "epoch": 0.6703901570341589, "grad_norm": 1.0322492076761298, "learning_rate": 3.2045677432307582e-06, "loss": 0.4927, "step": 12423 }, { "epoch": 0.6704441206626733, "grad_norm": 1.3726583166210202, "learning_rate": 3.20391159879586e-06, "loss": 0.5544, "step": 12424 }, { "epoch": 0.6704980842911877, "grad_norm": 1.2022952020588167, "learning_rate": 3.2032555203528637e-06, "loss": 0.5544, "step": 12425 }, { "epoch": 0.6705520479197021, "grad_norm": 1.1149517435214429, "learning_rate": 3.202599507920624e-06, "loss": 0.4547, "step": 12426 }, { "epoch": 0.6706060115482165, "grad_norm": 1.1178131258619826, "learning_rate": 3.201943561517996e-06, "loss": 0.4186, "step": 12427 }, { "epoch": 0.6706599751767309, "grad_norm": 1.0663680378726743, "learning_rate": 3.2012876811638325e-06, "loss": 0.5685, "step": 12428 }, { "epoch": 0.6707139388052452, "grad_norm": 0.8602105913374478, "learning_rate": 3.2006318668769833e-06, "loss": 0.382, "step": 12429 }, { "epoch": 0.6707679024337596, "grad_norm": 0.9097605924457389, "learning_rate": 3.1999761186762968e-06, "loss": 0.4184, "step": 12430 }, { "epoch": 0.670821866062274, "grad_norm": 1.293219107223365, "learning_rate": 3.199320436580622e-06, "loss": 0.4675, "step": 12431 }, { "epoch": 0.6708758296907884, "grad_norm": 1.0250787071955714, "learning_rate": 3.198664820608801e-06, "loss": 0.4823, "step": 12432 }, { "epoch": 0.6709297933193028, "grad_norm": 1.4885123468534653, "learning_rate": 3.1980092707796795e-06, "loss": 0.6486, "step": 12433 }, { "epoch": 0.6709837569478172, "grad_norm": 0.9703350876185489, "learning_rate": 3.1973537871120975e-06, "loss": 0.5426, "step": 12434 }, { "epoch": 0.6710377205763316, "grad_norm": 1.660026920460805, "learning_rate": 3.196698369624896e-06, "loss": 0.5425, "step": 12435 }, { "epoch": 0.6710916842048459, "grad_norm": 1.3465265811464069, "learning_rate": 3.1960430183369086e-06, "loss": 0.7166, "step": 12436 }, { "epoch": 0.6711456478333603, "grad_norm": 1.0216537302216104, "learning_rate": 3.195387733266973e-06, "loss": 0.4944, "step": 12437 }, { "epoch": 0.6711996114618747, "grad_norm": 1.0928065064463952, "learning_rate": 3.1947325144339224e-06, "loss": 0.4976, "step": 12438 }, { "epoch": 0.6712535750903891, "grad_norm": 0.8220869152026234, "learning_rate": 3.194077361856589e-06, "loss": 0.307, "step": 12439 }, { "epoch": 0.6713075387189035, "grad_norm": 1.0947173590911028, "learning_rate": 3.1934222755538026e-06, "loss": 0.6814, "step": 12440 }, { "epoch": 0.6713615023474179, "grad_norm": 0.8437172096917992, "learning_rate": 3.19276725554439e-06, "loss": 0.3311, "step": 12441 }, { "epoch": 0.6714154659759323, "grad_norm": 0.920484881339541, "learning_rate": 3.1921123018471777e-06, "loss": 0.4069, "step": 12442 }, { "epoch": 0.6714694296044466, "grad_norm": 0.9815091229544484, "learning_rate": 3.1914574144809916e-06, "loss": 0.5763, "step": 12443 }, { "epoch": 0.671523393232961, "grad_norm": 1.2478437619276084, "learning_rate": 3.19080259346465e-06, "loss": 0.4371, "step": 12444 }, { "epoch": 0.6715773568614753, "grad_norm": 1.0432916696216878, "learning_rate": 3.1901478388169754e-06, "loss": 0.4391, "step": 12445 }, { "epoch": 0.6716313204899897, "grad_norm": 1.0707674860605227, "learning_rate": 3.189493150556786e-06, "loss": 0.4256, "step": 12446 }, { "epoch": 0.6716852841185041, "grad_norm": 1.080439289176709, "learning_rate": 3.1888385287028983e-06, "loss": 0.3821, "step": 12447 }, { "epoch": 0.6717392477470185, "grad_norm": 1.2384454951813317, "learning_rate": 3.188183973274126e-06, "loss": 0.5385, "step": 12448 }, { "epoch": 0.6717932113755329, "grad_norm": 0.9833797260594502, "learning_rate": 3.1875294842892823e-06, "loss": 0.3995, "step": 12449 }, { "epoch": 0.6718471750040472, "grad_norm": 0.866106399746397, "learning_rate": 3.1868750617671793e-06, "loss": 0.3224, "step": 12450 }, { "epoch": 0.6719011386325616, "grad_norm": 0.8393215875288713, "learning_rate": 3.1862207057266235e-06, "loss": 0.3459, "step": 12451 }, { "epoch": 0.671955102261076, "grad_norm": 0.8941907409047861, "learning_rate": 3.185566416186422e-06, "loss": 0.3054, "step": 12452 }, { "epoch": 0.6720090658895904, "grad_norm": 1.1537474588429242, "learning_rate": 3.1849121931653797e-06, "loss": 0.3669, "step": 12453 }, { "epoch": 0.6720630295181048, "grad_norm": 0.9469888563771627, "learning_rate": 3.1842580366823007e-06, "loss": 0.3626, "step": 12454 }, { "epoch": 0.6721169931466192, "grad_norm": 0.889708163693672, "learning_rate": 3.183603946755986e-06, "loss": 0.4095, "step": 12455 }, { "epoch": 0.6721709567751336, "grad_norm": 1.09812605875971, "learning_rate": 3.182949923405234e-06, "loss": 0.6064, "step": 12456 }, { "epoch": 0.6722249204036479, "grad_norm": 0.8955850993905421, "learning_rate": 3.182295966648844e-06, "loss": 0.3595, "step": 12457 }, { "epoch": 0.6722788840321623, "grad_norm": 1.0510632759448908, "learning_rate": 3.181642076505608e-06, "loss": 0.4565, "step": 12458 }, { "epoch": 0.6723328476606767, "grad_norm": 0.8492451313010254, "learning_rate": 3.1809882529943212e-06, "loss": 0.3064, "step": 12459 }, { "epoch": 0.6723868112891911, "grad_norm": 0.7597745033783163, "learning_rate": 3.1803344961337758e-06, "loss": 0.2643, "step": 12460 }, { "epoch": 0.6724407749177055, "grad_norm": 1.1788212753411735, "learning_rate": 3.1796808059427608e-06, "loss": 0.509, "step": 12461 }, { "epoch": 0.6724947385462199, "grad_norm": 1.0467330988614503, "learning_rate": 3.179027182440064e-06, "loss": 0.4565, "step": 12462 }, { "epoch": 0.6725487021747343, "grad_norm": 1.0543890039873764, "learning_rate": 3.1783736256444704e-06, "loss": 0.4176, "step": 12463 }, { "epoch": 0.6726026658032486, "grad_norm": 1.1339557311912754, "learning_rate": 3.1777201355747665e-06, "loss": 0.4962, "step": 12464 }, { "epoch": 0.672656629431763, "grad_norm": 1.1245693257956215, "learning_rate": 3.177066712249731e-06, "loss": 0.6303, "step": 12465 }, { "epoch": 0.6727105930602774, "grad_norm": 1.2754580674934812, "learning_rate": 3.176413355688145e-06, "loss": 0.6977, "step": 12466 }, { "epoch": 0.6727645566887918, "grad_norm": 0.8595240895090033, "learning_rate": 3.175760065908787e-06, "loss": 0.3781, "step": 12467 }, { "epoch": 0.6728185203173062, "grad_norm": 1.1461152308701572, "learning_rate": 3.1751068429304343e-06, "loss": 0.389, "step": 12468 }, { "epoch": 0.6728724839458206, "grad_norm": 1.163073050189621, "learning_rate": 3.174453686771859e-06, "loss": 0.4906, "step": 12469 }, { "epoch": 0.672926447574335, "grad_norm": 1.0860347525842324, "learning_rate": 3.173800597451835e-06, "loss": 0.5056, "step": 12470 }, { "epoch": 0.6729804112028492, "grad_norm": 1.078296414646916, "learning_rate": 3.1731475749891335e-06, "loss": 0.4139, "step": 12471 }, { "epoch": 0.6730343748313636, "grad_norm": 0.8107043007334677, "learning_rate": 3.1724946194025204e-06, "loss": 0.3567, "step": 12472 }, { "epoch": 0.673088338459878, "grad_norm": 0.897884304199088, "learning_rate": 3.1718417307107634e-06, "loss": 0.465, "step": 12473 }, { "epoch": 0.6731423020883924, "grad_norm": 1.0158651440998, "learning_rate": 3.1711889089326276e-06, "loss": 0.4817, "step": 12474 }, { "epoch": 0.6731962657169068, "grad_norm": 1.0549593084984992, "learning_rate": 3.1705361540868764e-06, "loss": 0.4879, "step": 12475 }, { "epoch": 0.6732502293454212, "grad_norm": 0.993195361368971, "learning_rate": 3.1698834661922683e-06, "loss": 0.4459, "step": 12476 }, { "epoch": 0.6733041929739356, "grad_norm": 0.8335979854030285, "learning_rate": 3.169230845267565e-06, "loss": 0.4411, "step": 12477 }, { "epoch": 0.6733581566024499, "grad_norm": 0.870199727372066, "learning_rate": 3.168578291331522e-06, "loss": 0.4315, "step": 12478 }, { "epoch": 0.6734121202309643, "grad_norm": 1.0512288834062855, "learning_rate": 3.1679258044028965e-06, "loss": 0.4696, "step": 12479 }, { "epoch": 0.6734660838594787, "grad_norm": 0.8923534038725774, "learning_rate": 3.167273384500438e-06, "loss": 0.3467, "step": 12480 }, { "epoch": 0.6735200474879931, "grad_norm": 1.1233162904969056, "learning_rate": 3.1666210316428993e-06, "loss": 0.6298, "step": 12481 }, { "epoch": 0.6735740111165075, "grad_norm": 0.9751493200313842, "learning_rate": 3.1659687458490297e-06, "loss": 0.5559, "step": 12482 }, { "epoch": 0.6736279747450219, "grad_norm": 1.157179883579112, "learning_rate": 3.1653165271375776e-06, "loss": 0.4943, "step": 12483 }, { "epoch": 0.6736819383735362, "grad_norm": 1.125575593575788, "learning_rate": 3.1646643755272865e-06, "loss": 0.4879, "step": 12484 }, { "epoch": 0.6737359020020506, "grad_norm": 1.0944689511411934, "learning_rate": 3.164012291036901e-06, "loss": 0.4394, "step": 12485 }, { "epoch": 0.673789865630565, "grad_norm": 0.9909343957952464, "learning_rate": 3.163360273685163e-06, "loss": 0.4952, "step": 12486 }, { "epoch": 0.6738438292590794, "grad_norm": 1.248966102323525, "learning_rate": 3.162708323490812e-06, "loss": 0.5298, "step": 12487 }, { "epoch": 0.6738977928875938, "grad_norm": 1.1199986622272946, "learning_rate": 3.1620564404725852e-06, "loss": 0.5807, "step": 12488 }, { "epoch": 0.6739517565161082, "grad_norm": 0.9444850163489719, "learning_rate": 3.1614046246492187e-06, "loss": 0.3991, "step": 12489 }, { "epoch": 0.6740057201446226, "grad_norm": 1.0321433874134422, "learning_rate": 3.1607528760394455e-06, "loss": 0.4975, "step": 12490 }, { "epoch": 0.6740596837731369, "grad_norm": 1.1727988344832683, "learning_rate": 3.1601011946620006e-06, "loss": 0.5668, "step": 12491 }, { "epoch": 0.6741136474016513, "grad_norm": 0.9172244776320095, "learning_rate": 3.1594495805356095e-06, "loss": 0.3187, "step": 12492 }, { "epoch": 0.6741676110301656, "grad_norm": 0.8357798770921838, "learning_rate": 3.1587980336790037e-06, "loss": 0.3116, "step": 12493 }, { "epoch": 0.67422157465868, "grad_norm": 1.3873139126779312, "learning_rate": 3.1581465541109067e-06, "loss": 0.5952, "step": 12494 }, { "epoch": 0.6742755382871944, "grad_norm": 0.9740829406838187, "learning_rate": 3.1574951418500445e-06, "loss": 0.3723, "step": 12495 }, { "epoch": 0.6743295019157088, "grad_norm": 0.7764021303769214, "learning_rate": 3.1568437969151395e-06, "loss": 0.3283, "step": 12496 }, { "epoch": 0.6743834655442232, "grad_norm": 1.20442334798481, "learning_rate": 3.1561925193249104e-06, "loss": 0.3889, "step": 12497 }, { "epoch": 0.6744374291727375, "grad_norm": 0.9956533322533199, "learning_rate": 3.1555413090980787e-06, "loss": 0.3711, "step": 12498 }, { "epoch": 0.6744913928012519, "grad_norm": 0.979760991540413, "learning_rate": 3.1548901662533558e-06, "loss": 0.426, "step": 12499 }, { "epoch": 0.6745453564297663, "grad_norm": 0.9452040631302806, "learning_rate": 3.1542390908094606e-06, "loss": 0.3867, "step": 12500 }, { "epoch": 0.6745453564297663, "eval_loss": 0.5368213653564453, "eval_runtime": 163.942, "eval_samples_per_second": 20.977, "eval_steps_per_second": 0.878, "step": 12500 }, { "epoch": 0.6745993200582807, "grad_norm": 0.9571429397203504, "learning_rate": 3.153588082785104e-06, "loss": 0.33, "step": 12501 }, { "epoch": 0.6746532836867951, "grad_norm": 1.1423131821487138, "learning_rate": 3.152937142198996e-06, "loss": 0.4871, "step": 12502 }, { "epoch": 0.6747072473153095, "grad_norm": 1.08110896798623, "learning_rate": 3.152286269069847e-06, "loss": 0.4805, "step": 12503 }, { "epoch": 0.6747612109438239, "grad_norm": 1.0894989170111733, "learning_rate": 3.1516354634163625e-06, "loss": 0.4522, "step": 12504 }, { "epoch": 0.6748151745723382, "grad_norm": 0.9350860762668982, "learning_rate": 3.150984725257249e-06, "loss": 0.3974, "step": 12505 }, { "epoch": 0.6748691382008526, "grad_norm": 0.9598369570975777, "learning_rate": 3.150334054611206e-06, "loss": 0.3548, "step": 12506 }, { "epoch": 0.674923101829367, "grad_norm": 0.8836477731598379, "learning_rate": 3.1496834514969368e-06, "loss": 0.3618, "step": 12507 }, { "epoch": 0.6749770654578814, "grad_norm": 1.225816907401292, "learning_rate": 3.14903291593314e-06, "loss": 0.479, "step": 12508 }, { "epoch": 0.6750310290863958, "grad_norm": 1.3303839739971224, "learning_rate": 3.1483824479385127e-06, "loss": 0.5311, "step": 12509 }, { "epoch": 0.6750849927149102, "grad_norm": 1.2909697991993074, "learning_rate": 3.1477320475317506e-06, "loss": 0.5832, "step": 12510 }, { "epoch": 0.6751389563434246, "grad_norm": 1.0475144942867909, "learning_rate": 3.1470817147315454e-06, "loss": 0.5019, "step": 12511 }, { "epoch": 0.6751929199719389, "grad_norm": 1.2939685275802708, "learning_rate": 3.1464314495565914e-06, "loss": 0.6392, "step": 12512 }, { "epoch": 0.6752468836004533, "grad_norm": 1.1392797448187244, "learning_rate": 3.145781252025574e-06, "loss": 0.5239, "step": 12513 }, { "epoch": 0.6753008472289677, "grad_norm": 0.96507513010418, "learning_rate": 3.145131122157181e-06, "loss": 0.4198, "step": 12514 }, { "epoch": 0.6753548108574821, "grad_norm": 0.9906796534579229, "learning_rate": 3.1444810599701e-06, "loss": 0.4341, "step": 12515 }, { "epoch": 0.6754087744859965, "grad_norm": 0.9930140483307325, "learning_rate": 3.143831065483014e-06, "loss": 0.5738, "step": 12516 }, { "epoch": 0.6754627381145109, "grad_norm": 0.8875725174695971, "learning_rate": 3.1431811387146025e-06, "loss": 0.4713, "step": 12517 }, { "epoch": 0.6755167017430252, "grad_norm": 0.8576127550501019, "learning_rate": 3.142531279683547e-06, "loss": 0.363, "step": 12518 }, { "epoch": 0.6755706653715395, "grad_norm": 0.890218055408893, "learning_rate": 3.141881488408527e-06, "loss": 0.424, "step": 12519 }, { "epoch": 0.6756246290000539, "grad_norm": 1.0451223104737808, "learning_rate": 3.1412317649082134e-06, "loss": 0.4112, "step": 12520 }, { "epoch": 0.6756785926285683, "grad_norm": 1.1477077281273154, "learning_rate": 3.1405821092012824e-06, "loss": 0.5033, "step": 12521 }, { "epoch": 0.6757325562570827, "grad_norm": 0.8520788070004383, "learning_rate": 3.139932521306405e-06, "loss": 0.3883, "step": 12522 }, { "epoch": 0.6757865198855971, "grad_norm": 0.9778952707083785, "learning_rate": 3.1392830012422525e-06, "loss": 0.4341, "step": 12523 }, { "epoch": 0.6758404835141115, "grad_norm": 1.0913159882219015, "learning_rate": 3.1386335490274917e-06, "loss": 0.472, "step": 12524 }, { "epoch": 0.6758944471426259, "grad_norm": 0.7403046814695414, "learning_rate": 3.1379841646807883e-06, "loss": 0.3477, "step": 12525 }, { "epoch": 0.6759484107711402, "grad_norm": 1.2280726501799701, "learning_rate": 3.137334848220809e-06, "loss": 0.7131, "step": 12526 }, { "epoch": 0.6760023743996546, "grad_norm": 1.01053090004462, "learning_rate": 3.1366855996662104e-06, "loss": 0.5446, "step": 12527 }, { "epoch": 0.676056338028169, "grad_norm": 0.8895239621192281, "learning_rate": 3.136036419035656e-06, "loss": 0.4047, "step": 12528 }, { "epoch": 0.6761103016566834, "grad_norm": 0.9421582641984142, "learning_rate": 3.135387306347805e-06, "loss": 0.4504, "step": 12529 }, { "epoch": 0.6761642652851978, "grad_norm": 0.9424334600471115, "learning_rate": 3.1347382616213114e-06, "loss": 0.4253, "step": 12530 }, { "epoch": 0.6762182289137122, "grad_norm": 1.0122859443487862, "learning_rate": 3.1340892848748295e-06, "loss": 0.3451, "step": 12531 }, { "epoch": 0.6762721925422266, "grad_norm": 1.081719914555442, "learning_rate": 3.133440376127012e-06, "loss": 0.4325, "step": 12532 }, { "epoch": 0.6763261561707409, "grad_norm": 1.060582630200462, "learning_rate": 3.13279153539651e-06, "loss": 0.3994, "step": 12533 }, { "epoch": 0.6763801197992553, "grad_norm": 0.7651549793746555, "learning_rate": 3.132142762701971e-06, "loss": 0.304, "step": 12534 }, { "epoch": 0.6764340834277697, "grad_norm": 0.9315051354136454, "learning_rate": 3.131494058062041e-06, "loss": 0.5469, "step": 12535 }, { "epoch": 0.6764880470562841, "grad_norm": 1.0124631383658982, "learning_rate": 3.1308454214953653e-06, "loss": 0.4943, "step": 12536 }, { "epoch": 0.6765420106847985, "grad_norm": 0.8894507696597683, "learning_rate": 3.1301968530205867e-06, "loss": 0.3459, "step": 12537 }, { "epoch": 0.6765959743133129, "grad_norm": 1.0980792733254192, "learning_rate": 3.1295483526563446e-06, "loss": 0.5039, "step": 12538 }, { "epoch": 0.6766499379418273, "grad_norm": 0.9661265480596312, "learning_rate": 3.1288999204212777e-06, "loss": 0.3887, "step": 12539 }, { "epoch": 0.6767039015703415, "grad_norm": 1.3479687321021294, "learning_rate": 3.1282515563340247e-06, "loss": 0.5613, "step": 12540 }, { "epoch": 0.676757865198856, "grad_norm": 0.9341726786516737, "learning_rate": 3.1276032604132173e-06, "loss": 0.4838, "step": 12541 }, { "epoch": 0.6768118288273703, "grad_norm": 1.0667217369329645, "learning_rate": 3.126955032677489e-06, "loss": 0.5301, "step": 12542 }, { "epoch": 0.6768657924558847, "grad_norm": 1.073598153320754, "learning_rate": 3.1263068731454706e-06, "loss": 0.4384, "step": 12543 }, { "epoch": 0.6769197560843991, "grad_norm": 0.8456914375092188, "learning_rate": 3.125658781835791e-06, "loss": 0.3173, "step": 12544 }, { "epoch": 0.6769737197129135, "grad_norm": 0.9173480500794219, "learning_rate": 3.1250107587670777e-06, "loss": 0.3714, "step": 12545 }, { "epoch": 0.6770276833414279, "grad_norm": 0.8170093633735841, "learning_rate": 3.1243628039579544e-06, "loss": 0.3311, "step": 12546 }, { "epoch": 0.6770816469699422, "grad_norm": 1.1167131910980208, "learning_rate": 3.1237149174270465e-06, "loss": 0.4908, "step": 12547 }, { "epoch": 0.6771356105984566, "grad_norm": 0.9377850786378774, "learning_rate": 3.123067099192971e-06, "loss": 0.4023, "step": 12548 }, { "epoch": 0.677189574226971, "grad_norm": 0.8707626135572417, "learning_rate": 3.1224193492743495e-06, "loss": 0.324, "step": 12549 }, { "epoch": 0.6772435378554854, "grad_norm": 0.8143062644943083, "learning_rate": 3.1217716676897975e-06, "loss": 0.3997, "step": 12550 }, { "epoch": 0.6772975014839998, "grad_norm": 1.0519723280185604, "learning_rate": 3.1211240544579313e-06, "loss": 0.5422, "step": 12551 }, { "epoch": 0.6773514651125142, "grad_norm": 1.0639368694121791, "learning_rate": 3.1204765095973626e-06, "loss": 0.3847, "step": 12552 }, { "epoch": 0.6774054287410285, "grad_norm": 1.1941453382283562, "learning_rate": 3.1198290331267055e-06, "loss": 0.576, "step": 12553 }, { "epoch": 0.6774593923695429, "grad_norm": 1.0791123478090205, "learning_rate": 3.119181625064566e-06, "loss": 0.621, "step": 12554 }, { "epoch": 0.6775133559980573, "grad_norm": 0.9976154236149422, "learning_rate": 3.118534285429552e-06, "loss": 0.5007, "step": 12555 }, { "epoch": 0.6775673196265717, "grad_norm": 1.0366372341834067, "learning_rate": 3.1178870142402685e-06, "loss": 0.4185, "step": 12556 }, { "epoch": 0.6776212832550861, "grad_norm": 1.1880258547576286, "learning_rate": 3.1172398115153193e-06, "loss": 0.4925, "step": 12557 }, { "epoch": 0.6776752468836005, "grad_norm": 1.024137049595589, "learning_rate": 3.1165926772733053e-06, "loss": 0.4286, "step": 12558 }, { "epoch": 0.6777292105121149, "grad_norm": 1.0928563951031838, "learning_rate": 3.1159456115328257e-06, "loss": 0.5829, "step": 12559 }, { "epoch": 0.6777831741406292, "grad_norm": 1.0392567848576464, "learning_rate": 3.115298614312481e-06, "loss": 0.4742, "step": 12560 }, { "epoch": 0.6778371377691436, "grad_norm": 1.0552367840127157, "learning_rate": 3.114651685630861e-06, "loss": 0.5318, "step": 12561 }, { "epoch": 0.677891101397658, "grad_norm": 0.7747160033788929, "learning_rate": 3.114004825506562e-06, "loss": 0.2809, "step": 12562 }, { "epoch": 0.6779450650261724, "grad_norm": 0.9880987788889704, "learning_rate": 3.113358033958175e-06, "loss": 0.425, "step": 12563 }, { "epoch": 0.6779990286546868, "grad_norm": 1.0970920204374452, "learning_rate": 3.1127113110042893e-06, "loss": 0.6359, "step": 12564 }, { "epoch": 0.6780529922832012, "grad_norm": 0.9371212775721022, "learning_rate": 3.112064656663494e-06, "loss": 0.4757, "step": 12565 }, { "epoch": 0.6781069559117155, "grad_norm": 0.9260124838912506, "learning_rate": 3.111418070954373e-06, "loss": 0.4819, "step": 12566 }, { "epoch": 0.6781609195402298, "grad_norm": 1.0386917603920927, "learning_rate": 3.1107715538955107e-06, "loss": 0.4319, "step": 12567 }, { "epoch": 0.6782148831687442, "grad_norm": 0.99242959717869, "learning_rate": 3.1101251055054877e-06, "loss": 0.4616, "step": 12568 }, { "epoch": 0.6782688467972586, "grad_norm": 1.248465738272871, "learning_rate": 3.1094787258028837e-06, "loss": 0.5013, "step": 12569 }, { "epoch": 0.678322810425773, "grad_norm": 1.1358692018014827, "learning_rate": 3.1088324148062753e-06, "loss": 0.4493, "step": 12570 }, { "epoch": 0.6783767740542874, "grad_norm": 1.0763794320356304, "learning_rate": 3.108186172534241e-06, "loss": 0.5234, "step": 12571 }, { "epoch": 0.6784307376828018, "grad_norm": 1.1138566523636388, "learning_rate": 3.1075399990053522e-06, "loss": 0.5141, "step": 12572 }, { "epoch": 0.6784847013113162, "grad_norm": 1.0418303039442804, "learning_rate": 3.106893894238181e-06, "loss": 0.4881, "step": 12573 }, { "epoch": 0.6785386649398305, "grad_norm": 0.9645644695915627, "learning_rate": 3.1062478582512988e-06, "loss": 0.3944, "step": 12574 }, { "epoch": 0.6785926285683449, "grad_norm": 0.9684710964307409, "learning_rate": 3.1056018910632705e-06, "loss": 0.4344, "step": 12575 }, { "epoch": 0.6786465921968593, "grad_norm": 1.0681043327414068, "learning_rate": 3.1049559926926633e-06, "loss": 0.4651, "step": 12576 }, { "epoch": 0.6787005558253737, "grad_norm": 0.8059890754065552, "learning_rate": 3.1043101631580413e-06, "loss": 0.311, "step": 12577 }, { "epoch": 0.6787545194538881, "grad_norm": 1.1422558084269914, "learning_rate": 3.1036644024779657e-06, "loss": 0.5071, "step": 12578 }, { "epoch": 0.6788084830824025, "grad_norm": 0.7248523190672969, "learning_rate": 3.1030187106709963e-06, "loss": 0.2697, "step": 12579 }, { "epoch": 0.6788624467109169, "grad_norm": 0.9508927483904276, "learning_rate": 3.1023730877556906e-06, "loss": 0.4491, "step": 12580 }, { "epoch": 0.6789164103394312, "grad_norm": 1.1222046245409567, "learning_rate": 3.101727533750606e-06, "loss": 0.61, "step": 12581 }, { "epoch": 0.6789703739679456, "grad_norm": 0.9919619243683457, "learning_rate": 3.1010820486742945e-06, "loss": 0.4105, "step": 12582 }, { "epoch": 0.67902433759646, "grad_norm": 0.9274774804854894, "learning_rate": 3.1004366325453095e-06, "loss": 0.3426, "step": 12583 }, { "epoch": 0.6790783012249744, "grad_norm": 0.8582554478788331, "learning_rate": 3.0997912853822e-06, "loss": 0.286, "step": 12584 }, { "epoch": 0.6791322648534888, "grad_norm": 1.1587799698121426, "learning_rate": 3.099146007203515e-06, "loss": 0.4292, "step": 12585 }, { "epoch": 0.6791862284820032, "grad_norm": 0.925321100601125, "learning_rate": 3.0985007980277994e-06, "loss": 0.4032, "step": 12586 }, { "epoch": 0.6792401921105176, "grad_norm": 1.2917848724780916, "learning_rate": 3.0978556578735963e-06, "loss": 0.5848, "step": 12587 }, { "epoch": 0.6792941557390318, "grad_norm": 0.9207796303396527, "learning_rate": 3.097210586759449e-06, "loss": 0.4291, "step": 12588 }, { "epoch": 0.6793481193675462, "grad_norm": 0.9492494260022888, "learning_rate": 3.0965655847038977e-06, "loss": 0.3693, "step": 12589 }, { "epoch": 0.6794020829960606, "grad_norm": 1.1477568449370277, "learning_rate": 3.0959206517254803e-06, "loss": 0.5119, "step": 12590 }, { "epoch": 0.679456046624575, "grad_norm": 1.1582127077926427, "learning_rate": 3.095275787842732e-06, "loss": 0.5233, "step": 12591 }, { "epoch": 0.6795100102530894, "grad_norm": 1.1035162940918315, "learning_rate": 3.0946309930741875e-06, "loss": 0.4516, "step": 12592 }, { "epoch": 0.6795639738816038, "grad_norm": 0.8463918172694993, "learning_rate": 3.0939862674383787e-06, "loss": 0.3777, "step": 12593 }, { "epoch": 0.6796179375101182, "grad_norm": 0.9928689531126598, "learning_rate": 3.0933416109538354e-06, "loss": 0.391, "step": 12594 }, { "epoch": 0.6796719011386325, "grad_norm": 1.0813041886848058, "learning_rate": 3.0926970236390876e-06, "loss": 0.4321, "step": 12595 }, { "epoch": 0.6797258647671469, "grad_norm": 1.116584632274721, "learning_rate": 3.092052505512658e-06, "loss": 0.5157, "step": 12596 }, { "epoch": 0.6797798283956613, "grad_norm": 1.2576092010249982, "learning_rate": 3.091408056593073e-06, "loss": 0.44, "step": 12597 }, { "epoch": 0.6798337920241757, "grad_norm": 1.1953669672963545, "learning_rate": 3.090763676898854e-06, "loss": 0.464, "step": 12598 }, { "epoch": 0.6798877556526901, "grad_norm": 0.9471987328848365, "learning_rate": 3.0901193664485213e-06, "loss": 0.3399, "step": 12599 }, { "epoch": 0.6799417192812045, "grad_norm": 1.0273246911459386, "learning_rate": 3.0894751252605928e-06, "loss": 0.481, "step": 12600 }, { "epoch": 0.6799956829097189, "grad_norm": 1.1362921745098056, "learning_rate": 3.0888309533535853e-06, "loss": 0.6258, "step": 12601 }, { "epoch": 0.6800496465382332, "grad_norm": 0.9993341642174376, "learning_rate": 3.088186850746014e-06, "loss": 0.4631, "step": 12602 }, { "epoch": 0.6801036101667476, "grad_norm": 0.9266952240555277, "learning_rate": 3.087542817456387e-06, "loss": 0.3899, "step": 12603 }, { "epoch": 0.680157573795262, "grad_norm": 0.9894636206510858, "learning_rate": 3.086898853503218e-06, "loss": 0.4669, "step": 12604 }, { "epoch": 0.6802115374237764, "grad_norm": 0.8560918570141612, "learning_rate": 3.086254958905014e-06, "loss": 0.4453, "step": 12605 }, { "epoch": 0.6802655010522908, "grad_norm": 0.9357941289161964, "learning_rate": 3.085611133680282e-06, "loss": 0.4302, "step": 12606 }, { "epoch": 0.6803194646808052, "grad_norm": 1.0849546390734923, "learning_rate": 3.084967377847524e-06, "loss": 0.4785, "step": 12607 }, { "epoch": 0.6803734283093196, "grad_norm": 1.0720492753542559, "learning_rate": 3.084323691425245e-06, "loss": 0.5944, "step": 12608 }, { "epoch": 0.6804273919378339, "grad_norm": 0.8163589899177673, "learning_rate": 3.0836800744319454e-06, "loss": 0.3869, "step": 12609 }, { "epoch": 0.6804813555663483, "grad_norm": 1.008806291373415, "learning_rate": 3.0830365268861202e-06, "loss": 0.5182, "step": 12610 }, { "epoch": 0.6805353191948627, "grad_norm": 1.1409125918027845, "learning_rate": 3.082393048806267e-06, "loss": 0.5107, "step": 12611 }, { "epoch": 0.680589282823377, "grad_norm": 1.0611748008137616, "learning_rate": 3.0817496402108817e-06, "loss": 0.4543, "step": 12612 }, { "epoch": 0.6806432464518914, "grad_norm": 0.9054356115197474, "learning_rate": 3.081106301118454e-06, "loss": 0.3781, "step": 12613 }, { "epoch": 0.6806972100804058, "grad_norm": 0.924990267829609, "learning_rate": 3.0804630315474757e-06, "loss": 0.496, "step": 12614 }, { "epoch": 0.6807511737089202, "grad_norm": 1.130146282650754, "learning_rate": 3.0798198315164345e-06, "loss": 0.5075, "step": 12615 }, { "epoch": 0.6808051373374345, "grad_norm": 1.0059843116703926, "learning_rate": 3.079176701043819e-06, "loss": 0.4208, "step": 12616 }, { "epoch": 0.6808591009659489, "grad_norm": 1.0375302024428765, "learning_rate": 3.0785336401481087e-06, "loss": 0.441, "step": 12617 }, { "epoch": 0.6809130645944633, "grad_norm": 1.1410768643435854, "learning_rate": 3.0778906488477882e-06, "loss": 0.4156, "step": 12618 }, { "epoch": 0.6809670282229777, "grad_norm": 1.093732200671502, "learning_rate": 3.077247727161339e-06, "loss": 0.4273, "step": 12619 }, { "epoch": 0.6810209918514921, "grad_norm": 1.1662700660195735, "learning_rate": 3.076604875107237e-06, "loss": 0.5387, "step": 12620 }, { "epoch": 0.6810749554800065, "grad_norm": 0.9265317908262547, "learning_rate": 3.07596209270396e-06, "loss": 0.4331, "step": 12621 }, { "epoch": 0.6811289191085208, "grad_norm": 0.9373421531566584, "learning_rate": 3.0753193799699833e-06, "loss": 0.5373, "step": 12622 }, { "epoch": 0.6811828827370352, "grad_norm": 0.8886804577128159, "learning_rate": 3.0746767369237757e-06, "loss": 0.3398, "step": 12623 }, { "epoch": 0.6812368463655496, "grad_norm": 1.0001045304277278, "learning_rate": 3.07403416358381e-06, "loss": 0.4254, "step": 12624 }, { "epoch": 0.681290809994064, "grad_norm": 0.9166374403919453, "learning_rate": 3.073391659968553e-06, "loss": 0.3886, "step": 12625 }, { "epoch": 0.6813447736225784, "grad_norm": 0.9780711736921542, "learning_rate": 3.072749226096472e-06, "loss": 0.4457, "step": 12626 }, { "epoch": 0.6813987372510928, "grad_norm": 1.1086685930467592, "learning_rate": 3.0721068619860306e-06, "loss": 0.6839, "step": 12627 }, { "epoch": 0.6814527008796072, "grad_norm": 1.0797882875135867, "learning_rate": 3.071464567655691e-06, "loss": 0.4892, "step": 12628 }, { "epoch": 0.6815066645081215, "grad_norm": 1.2974204110442549, "learning_rate": 3.0708223431239156e-06, "loss": 0.505, "step": 12629 }, { "epoch": 0.6815606281366359, "grad_norm": 1.0753471892170139, "learning_rate": 3.070180188409159e-06, "loss": 0.5295, "step": 12630 }, { "epoch": 0.6816145917651503, "grad_norm": 1.276063980898254, "learning_rate": 3.0695381035298787e-06, "loss": 0.5461, "step": 12631 }, { "epoch": 0.6816685553936647, "grad_norm": 0.9164829886378051, "learning_rate": 3.0688960885045294e-06, "loss": 0.4836, "step": 12632 }, { "epoch": 0.6817225190221791, "grad_norm": 1.0884192625062559, "learning_rate": 3.0682541433515624e-06, "loss": 0.4604, "step": 12633 }, { "epoch": 0.6817764826506935, "grad_norm": 0.9917088483247262, "learning_rate": 3.067612268089429e-06, "loss": 0.452, "step": 12634 }, { "epoch": 0.6818304462792079, "grad_norm": 0.9841445939811543, "learning_rate": 3.066970462736576e-06, "loss": 0.4974, "step": 12635 }, { "epoch": 0.6818844099077221, "grad_norm": 0.9542133163022625, "learning_rate": 3.066328727311451e-06, "loss": 0.43, "step": 12636 }, { "epoch": 0.6819383735362365, "grad_norm": 1.1015159427113865, "learning_rate": 3.0656870618324975e-06, "loss": 0.491, "step": 12637 }, { "epoch": 0.6819923371647509, "grad_norm": 1.082376686227805, "learning_rate": 3.065045466318157e-06, "loss": 0.4122, "step": 12638 }, { "epoch": 0.6820463007932653, "grad_norm": 0.8166482495697612, "learning_rate": 3.0644039407868705e-06, "loss": 0.2809, "step": 12639 }, { "epoch": 0.6821002644217797, "grad_norm": 0.8826774003998288, "learning_rate": 3.0637624852570757e-06, "loss": 0.3284, "step": 12640 }, { "epoch": 0.6821542280502941, "grad_norm": 1.0542326090732264, "learning_rate": 3.0631210997472084e-06, "loss": 0.4068, "step": 12641 }, { "epoch": 0.6822081916788085, "grad_norm": 1.0713893224789965, "learning_rate": 3.0624797842757027e-06, "loss": 0.5491, "step": 12642 }, { "epoch": 0.6822621553073228, "grad_norm": 0.8857340182055169, "learning_rate": 3.061838538860993e-06, "loss": 0.4768, "step": 12643 }, { "epoch": 0.6823161189358372, "grad_norm": 1.078015850499123, "learning_rate": 3.0611973635215066e-06, "loss": 0.3582, "step": 12644 }, { "epoch": 0.6823700825643516, "grad_norm": 1.1238528898831581, "learning_rate": 3.060556258275671e-06, "loss": 0.6395, "step": 12645 }, { "epoch": 0.682424046192866, "grad_norm": 1.0445012733833214, "learning_rate": 3.059915223141914e-06, "loss": 0.4366, "step": 12646 }, { "epoch": 0.6824780098213804, "grad_norm": 0.8407677588637472, "learning_rate": 3.0592742581386585e-06, "loss": 0.3312, "step": 12647 }, { "epoch": 0.6825319734498948, "grad_norm": 1.071469680996547, "learning_rate": 3.0586333632843268e-06, "loss": 0.5237, "step": 12648 }, { "epoch": 0.6825859370784092, "grad_norm": 0.7134973318081653, "learning_rate": 3.0579925385973395e-06, "loss": 0.2769, "step": 12649 }, { "epoch": 0.6826399007069235, "grad_norm": 1.150841107917194, "learning_rate": 3.0573517840961164e-06, "loss": 0.4797, "step": 12650 }, { "epoch": 0.6826938643354379, "grad_norm": 0.8656777621485527, "learning_rate": 3.056711099799069e-06, "loss": 0.3484, "step": 12651 }, { "epoch": 0.6827478279639523, "grad_norm": 0.7323445557044926, "learning_rate": 3.056070485724613e-06, "loss": 0.2872, "step": 12652 }, { "epoch": 0.6828017915924667, "grad_norm": 0.7583763123707657, "learning_rate": 3.055429941891161e-06, "loss": 0.2827, "step": 12653 }, { "epoch": 0.6828557552209811, "grad_norm": 1.0863264419131908, "learning_rate": 3.054789468317122e-06, "loss": 0.4883, "step": 12654 }, { "epoch": 0.6829097188494955, "grad_norm": 0.9860521528548339, "learning_rate": 3.054149065020905e-06, "loss": 0.4903, "step": 12655 }, { "epoch": 0.6829636824780099, "grad_norm": 1.0952652856259264, "learning_rate": 3.0535087320209145e-06, "loss": 0.4318, "step": 12656 }, { "epoch": 0.6830176461065242, "grad_norm": 0.8921205113801666, "learning_rate": 3.0528684693355575e-06, "loss": 0.4638, "step": 12657 }, { "epoch": 0.6830716097350386, "grad_norm": 0.9382146530197142, "learning_rate": 3.052228276983231e-06, "loss": 0.4333, "step": 12658 }, { "epoch": 0.683125573363553, "grad_norm": 0.8342004687840429, "learning_rate": 3.0515881549823377e-06, "loss": 0.3667, "step": 12659 }, { "epoch": 0.6831795369920673, "grad_norm": 1.0564749684844854, "learning_rate": 3.0509481033512743e-06, "loss": 0.4453, "step": 12660 }, { "epoch": 0.6832335006205817, "grad_norm": 0.81822993690754, "learning_rate": 3.050308122108438e-06, "loss": 0.3635, "step": 12661 }, { "epoch": 0.6832874642490961, "grad_norm": 1.0354231330552925, "learning_rate": 3.049668211272221e-06, "loss": 0.3627, "step": 12662 }, { "epoch": 0.6833414278776105, "grad_norm": 0.923002398589932, "learning_rate": 3.0490283708610147e-06, "loss": 0.444, "step": 12663 }, { "epoch": 0.6833953915061248, "grad_norm": 1.1217331551716418, "learning_rate": 3.048388600893212e-06, "loss": 0.4573, "step": 12664 }, { "epoch": 0.6834493551346392, "grad_norm": 0.9476313230085514, "learning_rate": 3.0477489013871964e-06, "loss": 0.3555, "step": 12665 }, { "epoch": 0.6835033187631536, "grad_norm": 1.2581918717337999, "learning_rate": 3.047109272361355e-06, "loss": 0.5422, "step": 12666 }, { "epoch": 0.683557282391668, "grad_norm": 1.023918367088428, "learning_rate": 3.046469713834071e-06, "loss": 0.4558, "step": 12667 }, { "epoch": 0.6836112460201824, "grad_norm": 0.8633624241321597, "learning_rate": 3.0458302258237272e-06, "loss": 0.5295, "step": 12668 }, { "epoch": 0.6836652096486968, "grad_norm": 1.0806135865754323, "learning_rate": 3.0451908083487025e-06, "loss": 0.3605, "step": 12669 }, { "epoch": 0.6837191732772112, "grad_norm": 1.0268492895278352, "learning_rate": 3.0445514614273743e-06, "loss": 0.3971, "step": 12670 }, { "epoch": 0.6837731369057255, "grad_norm": 1.149274722702989, "learning_rate": 3.0439121850781196e-06, "loss": 0.5205, "step": 12671 }, { "epoch": 0.6838271005342399, "grad_norm": 1.2922088723939693, "learning_rate": 3.043272979319308e-06, "loss": 0.5147, "step": 12672 }, { "epoch": 0.6838810641627543, "grad_norm": 0.9523610297594303, "learning_rate": 3.042633844169315e-06, "loss": 0.373, "step": 12673 }, { "epoch": 0.6839350277912687, "grad_norm": 1.0806817246647884, "learning_rate": 3.0419947796465073e-06, "loss": 0.5046, "step": 12674 }, { "epoch": 0.6839889914197831, "grad_norm": 0.9086836699982206, "learning_rate": 3.041355785769253e-06, "loss": 0.3893, "step": 12675 }, { "epoch": 0.6840429550482975, "grad_norm": 1.1259510233450283, "learning_rate": 3.0407168625559187e-06, "loss": 0.4786, "step": 12676 }, { "epoch": 0.6840969186768119, "grad_norm": 1.0550378495994337, "learning_rate": 3.0400780100248663e-06, "loss": 0.405, "step": 12677 }, { "epoch": 0.6841508823053262, "grad_norm": 1.04313224152795, "learning_rate": 3.0394392281944585e-06, "loss": 0.4801, "step": 12678 }, { "epoch": 0.6842048459338406, "grad_norm": 0.9935208336225612, "learning_rate": 3.0388005170830525e-06, "loss": 0.4316, "step": 12679 }, { "epoch": 0.684258809562355, "grad_norm": 0.8536603550933983, "learning_rate": 3.038161876709006e-06, "loss": 0.5173, "step": 12680 }, { "epoch": 0.6843127731908694, "grad_norm": 1.093904279695257, "learning_rate": 3.037523307090676e-06, "loss": 0.5125, "step": 12681 }, { "epoch": 0.6843667368193838, "grad_norm": 1.077112926265179, "learning_rate": 3.0368848082464125e-06, "loss": 0.5851, "step": 12682 }, { "epoch": 0.6844207004478982, "grad_norm": 0.9086217160384871, "learning_rate": 3.0362463801945698e-06, "loss": 0.374, "step": 12683 }, { "epoch": 0.6844746640764126, "grad_norm": 0.9028834399483868, "learning_rate": 3.0356080229534947e-06, "loss": 0.4731, "step": 12684 }, { "epoch": 0.6845286277049268, "grad_norm": 1.0732844208954762, "learning_rate": 3.0349697365415353e-06, "loss": 0.4343, "step": 12685 }, { "epoch": 0.6845825913334412, "grad_norm": 0.8803259810964136, "learning_rate": 3.0343315209770363e-06, "loss": 0.4, "step": 12686 }, { "epoch": 0.6846365549619556, "grad_norm": 0.943562997615735, "learning_rate": 3.033693376278341e-06, "loss": 0.4353, "step": 12687 }, { "epoch": 0.68469051859047, "grad_norm": 1.201821833456644, "learning_rate": 3.03305530246379e-06, "loss": 0.6316, "step": 12688 }, { "epoch": 0.6847444822189844, "grad_norm": 0.9802262613915448, "learning_rate": 3.032417299551722e-06, "loss": 0.4613, "step": 12689 }, { "epoch": 0.6847984458474988, "grad_norm": 0.9562057463996573, "learning_rate": 3.0317793675604746e-06, "loss": 0.4435, "step": 12690 }, { "epoch": 0.6848524094760131, "grad_norm": 0.9431549856319231, "learning_rate": 3.0311415065083838e-06, "loss": 0.3428, "step": 12691 }, { "epoch": 0.6849063731045275, "grad_norm": 0.8324324480400573, "learning_rate": 3.0305037164137785e-06, "loss": 0.3019, "step": 12692 }, { "epoch": 0.6849603367330419, "grad_norm": 1.2571921263147945, "learning_rate": 3.0298659972949917e-06, "loss": 0.5086, "step": 12693 }, { "epoch": 0.6850143003615563, "grad_norm": 1.1571894716790918, "learning_rate": 3.0292283491703527e-06, "loss": 0.4808, "step": 12694 }, { "epoch": 0.6850682639900707, "grad_norm": 1.0665843087650773, "learning_rate": 3.028590772058187e-06, "loss": 0.4488, "step": 12695 }, { "epoch": 0.6851222276185851, "grad_norm": 1.0327212709632838, "learning_rate": 3.0279532659768194e-06, "loss": 0.4271, "step": 12696 }, { "epoch": 0.6851761912470995, "grad_norm": 1.273466929140729, "learning_rate": 3.0273158309445737e-06, "loss": 0.5292, "step": 12697 }, { "epoch": 0.6852301548756138, "grad_norm": 0.9368044973170376, "learning_rate": 3.0266784669797704e-06, "loss": 0.4592, "step": 12698 }, { "epoch": 0.6852841185041282, "grad_norm": 0.9967815606875823, "learning_rate": 3.0260411741007258e-06, "loss": 0.4795, "step": 12699 }, { "epoch": 0.6853380821326426, "grad_norm": 0.880963010383675, "learning_rate": 3.0254039523257572e-06, "loss": 0.3831, "step": 12700 }, { "epoch": 0.685392045761157, "grad_norm": 1.3034087299681338, "learning_rate": 3.0247668016731803e-06, "loss": 0.5394, "step": 12701 }, { "epoch": 0.6854460093896714, "grad_norm": 0.70984561928335, "learning_rate": 3.024129722161305e-06, "loss": 0.2572, "step": 12702 }, { "epoch": 0.6854999730181858, "grad_norm": 1.2371971611257149, "learning_rate": 3.0234927138084443e-06, "loss": 0.5539, "step": 12703 }, { "epoch": 0.6855539366467002, "grad_norm": 1.053413426524323, "learning_rate": 3.0228557766329047e-06, "loss": 0.4626, "step": 12704 }, { "epoch": 0.6856079002752145, "grad_norm": 1.014991213952614, "learning_rate": 3.0222189106529953e-06, "loss": 0.3832, "step": 12705 }, { "epoch": 0.6856618639037289, "grad_norm": 1.3214078402524474, "learning_rate": 3.0215821158870163e-06, "loss": 0.5807, "step": 12706 }, { "epoch": 0.6857158275322432, "grad_norm": 1.0321501640300252, "learning_rate": 3.0209453923532704e-06, "loss": 0.5332, "step": 12707 }, { "epoch": 0.6857697911607576, "grad_norm": 1.0988265220574764, "learning_rate": 3.0203087400700603e-06, "loss": 0.4843, "step": 12708 }, { "epoch": 0.685823754789272, "grad_norm": 1.06169350525357, "learning_rate": 3.0196721590556822e-06, "loss": 0.4802, "step": 12709 }, { "epoch": 0.6858777184177864, "grad_norm": 0.8831078009059795, "learning_rate": 3.0190356493284322e-06, "loss": 0.4144, "step": 12710 }, { "epoch": 0.6859316820463008, "grad_norm": 1.0128377030253473, "learning_rate": 3.0183992109066053e-06, "loss": 0.5153, "step": 12711 }, { "epoch": 0.6859856456748151, "grad_norm": 1.2563031006959888, "learning_rate": 3.0177628438084927e-06, "loss": 0.5364, "step": 12712 }, { "epoch": 0.6860396093033295, "grad_norm": 1.058063619181959, "learning_rate": 3.0171265480523835e-06, "loss": 0.4769, "step": 12713 }, { "epoch": 0.6860935729318439, "grad_norm": 1.0278367782836342, "learning_rate": 3.0164903236565657e-06, "loss": 0.5072, "step": 12714 }, { "epoch": 0.6861475365603583, "grad_norm": 0.7616513751162358, "learning_rate": 3.015854170639325e-06, "loss": 0.2992, "step": 12715 }, { "epoch": 0.6862015001888727, "grad_norm": 1.049721391009651, "learning_rate": 3.0152180890189462e-06, "loss": 0.554, "step": 12716 }, { "epoch": 0.6862554638173871, "grad_norm": 0.9074011367355389, "learning_rate": 3.01458207881371e-06, "loss": 0.342, "step": 12717 }, { "epoch": 0.6863094274459015, "grad_norm": 0.9891598427279928, "learning_rate": 3.013946140041897e-06, "loss": 0.417, "step": 12718 }, { "epoch": 0.6863633910744158, "grad_norm": 0.9967838476854528, "learning_rate": 3.0133102727217843e-06, "loss": 0.4177, "step": 12719 }, { "epoch": 0.6864173547029302, "grad_norm": 1.2351139905032635, "learning_rate": 3.0126744768716453e-06, "loss": 0.4177, "step": 12720 }, { "epoch": 0.6864713183314446, "grad_norm": 1.021506460156865, "learning_rate": 3.0120387525097555e-06, "loss": 0.5684, "step": 12721 }, { "epoch": 0.686525281959959, "grad_norm": 0.9727627045952555, "learning_rate": 3.0114030996543863e-06, "loss": 0.5288, "step": 12722 }, { "epoch": 0.6865792455884734, "grad_norm": 0.8258285449158584, "learning_rate": 3.010767518323806e-06, "loss": 0.4009, "step": 12723 }, { "epoch": 0.6866332092169878, "grad_norm": 1.0101042920915568, "learning_rate": 3.0101320085362827e-06, "loss": 0.4204, "step": 12724 }, { "epoch": 0.6866871728455022, "grad_norm": 0.9144914437519924, "learning_rate": 3.0094965703100808e-06, "loss": 0.3762, "step": 12725 }, { "epoch": 0.6867411364740165, "grad_norm": 1.059062840193236, "learning_rate": 3.008861203663466e-06, "loss": 0.538, "step": 12726 }, { "epoch": 0.6867951001025309, "grad_norm": 1.1495860666088755, "learning_rate": 3.008225908614696e-06, "loss": 0.5385, "step": 12727 }, { "epoch": 0.6868490637310453, "grad_norm": 1.0272588474806184, "learning_rate": 3.00759068518203e-06, "loss": 0.4282, "step": 12728 }, { "epoch": 0.6869030273595597, "grad_norm": 0.9261986323023068, "learning_rate": 3.0069555333837277e-06, "loss": 0.4226, "step": 12729 }, { "epoch": 0.6869569909880741, "grad_norm": 0.9953545060654383, "learning_rate": 3.006320453238041e-06, "loss": 0.4551, "step": 12730 }, { "epoch": 0.6870109546165885, "grad_norm": 1.1869557342465598, "learning_rate": 3.0056854447632254e-06, "loss": 0.5484, "step": 12731 }, { "epoch": 0.6870649182451029, "grad_norm": 1.0152731760643607, "learning_rate": 3.0050505079775293e-06, "loss": 0.3455, "step": 12732 }, { "epoch": 0.6871188818736171, "grad_norm": 0.9403908082633645, "learning_rate": 3.004415642899203e-06, "loss": 0.4801, "step": 12733 }, { "epoch": 0.6871728455021315, "grad_norm": 1.131325497731077, "learning_rate": 3.003780849546495e-06, "loss": 0.475, "step": 12734 }, { "epoch": 0.6872268091306459, "grad_norm": 1.3420347490130498, "learning_rate": 3.003146127937645e-06, "loss": 0.6216, "step": 12735 }, { "epoch": 0.6872807727591603, "grad_norm": 0.934724226804627, "learning_rate": 3.002511478090898e-06, "loss": 0.4469, "step": 12736 }, { "epoch": 0.6873347363876747, "grad_norm": 0.8880204441674767, "learning_rate": 3.001876900024496e-06, "loss": 0.3355, "step": 12737 }, { "epoch": 0.6873887000161891, "grad_norm": 1.1570794064193979, "learning_rate": 3.001242393756675e-06, "loss": 0.5955, "step": 12738 }, { "epoch": 0.6874426636447035, "grad_norm": 0.9563138060989563, "learning_rate": 3.0006079593056725e-06, "loss": 0.4503, "step": 12739 }, { "epoch": 0.6874966272732178, "grad_norm": 0.9666227748296949, "learning_rate": 2.9999735966897233e-06, "loss": 0.4342, "step": 12740 }, { "epoch": 0.6875505909017322, "grad_norm": 1.0605296401292492, "learning_rate": 2.999339305927058e-06, "loss": 0.4453, "step": 12741 }, { "epoch": 0.6876045545302466, "grad_norm": 0.9008556272320122, "learning_rate": 2.9987050870359092e-06, "loss": 0.4026, "step": 12742 }, { "epoch": 0.687658518158761, "grad_norm": 1.1051085613182805, "learning_rate": 2.998070940034502e-06, "loss": 0.4353, "step": 12743 }, { "epoch": 0.6877124817872754, "grad_norm": 1.1206816398691255, "learning_rate": 2.9974368649410652e-06, "loss": 0.5611, "step": 12744 }, { "epoch": 0.6877664454157898, "grad_norm": 0.953748980346162, "learning_rate": 2.996802861773821e-06, "loss": 0.4819, "step": 12745 }, { "epoch": 0.6878204090443042, "grad_norm": 1.073510749860126, "learning_rate": 2.9961689305509915e-06, "loss": 0.5311, "step": 12746 }, { "epoch": 0.6878743726728185, "grad_norm": 1.1155665183210635, "learning_rate": 2.9955350712907993e-06, "loss": 0.4529, "step": 12747 }, { "epoch": 0.6879283363013329, "grad_norm": 0.9748291945705497, "learning_rate": 2.9949012840114577e-06, "loss": 0.3618, "step": 12748 }, { "epoch": 0.6879822999298473, "grad_norm": 0.9507670320690967, "learning_rate": 2.9942675687311846e-06, "loss": 0.4869, "step": 12749 }, { "epoch": 0.6880362635583617, "grad_norm": 0.922976834234685, "learning_rate": 2.9936339254681934e-06, "loss": 0.4583, "step": 12750 }, { "epoch": 0.6880902271868761, "grad_norm": 0.671243853143658, "learning_rate": 2.993000354240696e-06, "loss": 0.2799, "step": 12751 }, { "epoch": 0.6881441908153905, "grad_norm": 0.889854971568285, "learning_rate": 2.9923668550669004e-06, "loss": 0.4982, "step": 12752 }, { "epoch": 0.6881981544439048, "grad_norm": 1.0175401041974248, "learning_rate": 2.9917334279650178e-06, "loss": 0.5227, "step": 12753 }, { "epoch": 0.6882521180724192, "grad_norm": 0.9826513006131506, "learning_rate": 2.991100072953249e-06, "loss": 0.4221, "step": 12754 }, { "epoch": 0.6883060817009335, "grad_norm": 1.1895500903057927, "learning_rate": 2.990466790049799e-06, "loss": 0.5329, "step": 12755 }, { "epoch": 0.688360045329448, "grad_norm": 1.2865643520609955, "learning_rate": 2.9898335792728696e-06, "loss": 0.63, "step": 12756 }, { "epoch": 0.6884140089579623, "grad_norm": 1.0989712702155578, "learning_rate": 2.989200440640659e-06, "loss": 0.4825, "step": 12757 }, { "epoch": 0.6884679725864767, "grad_norm": 0.8946290690389784, "learning_rate": 2.988567374171365e-06, "loss": 0.461, "step": 12758 }, { "epoch": 0.6885219362149911, "grad_norm": 0.9533144461807532, "learning_rate": 2.9879343798831823e-06, "loss": 0.436, "step": 12759 }, { "epoch": 0.6885758998435054, "grad_norm": 0.994147465782264, "learning_rate": 2.987301457794305e-06, "loss": 0.4058, "step": 12760 }, { "epoch": 0.6886298634720198, "grad_norm": 1.0067046624809628, "learning_rate": 2.986668607922921e-06, "loss": 0.5064, "step": 12761 }, { "epoch": 0.6886838271005342, "grad_norm": 1.0557670670104513, "learning_rate": 2.9860358302872217e-06, "loss": 0.4829, "step": 12762 }, { "epoch": 0.6887377907290486, "grad_norm": 1.0763808284635679, "learning_rate": 2.9854031249053915e-06, "loss": 0.5697, "step": 12763 }, { "epoch": 0.688791754357563, "grad_norm": 0.9685494515694384, "learning_rate": 2.9847704917956176e-06, "loss": 0.4759, "step": 12764 }, { "epoch": 0.6888457179860774, "grad_norm": 0.9617306247323356, "learning_rate": 2.984137930976081e-06, "loss": 0.4081, "step": 12765 }, { "epoch": 0.6888996816145918, "grad_norm": 1.274929988234217, "learning_rate": 2.9835054424649617e-06, "loss": 0.6508, "step": 12766 }, { "epoch": 0.6889536452431061, "grad_norm": 1.0463103203746498, "learning_rate": 2.982873026280441e-06, "loss": 0.5252, "step": 12767 }, { "epoch": 0.6890076088716205, "grad_norm": 1.0279205832770368, "learning_rate": 2.982240682440691e-06, "loss": 0.4898, "step": 12768 }, { "epoch": 0.6890615725001349, "grad_norm": 1.1543333868432266, "learning_rate": 2.9816084109638877e-06, "loss": 0.5339, "step": 12769 }, { "epoch": 0.6891155361286493, "grad_norm": 1.0280381767904392, "learning_rate": 2.980976211868203e-06, "loss": 0.4243, "step": 12770 }, { "epoch": 0.6891694997571637, "grad_norm": 1.27618783712121, "learning_rate": 2.980344085171808e-06, "loss": 0.523, "step": 12771 }, { "epoch": 0.6892234633856781, "grad_norm": 1.1861103466234624, "learning_rate": 2.979712030892869e-06, "loss": 0.6508, "step": 12772 }, { "epoch": 0.6892774270141925, "grad_norm": 1.0990688249428893, "learning_rate": 2.9790800490495535e-06, "loss": 0.5152, "step": 12773 }, { "epoch": 0.6893313906427068, "grad_norm": 1.041512158676093, "learning_rate": 2.9784481396600262e-06, "loss": 0.4349, "step": 12774 }, { "epoch": 0.6893853542712212, "grad_norm": 0.9946457718109984, "learning_rate": 2.9778163027424457e-06, "loss": 0.3723, "step": 12775 }, { "epoch": 0.6894393178997356, "grad_norm": 0.9053831850530457, "learning_rate": 2.9771845383149723e-06, "loss": 0.3668, "step": 12776 }, { "epoch": 0.68949328152825, "grad_norm": 1.20188578848208, "learning_rate": 2.976552846395765e-06, "loss": 0.5144, "step": 12777 }, { "epoch": 0.6895472451567644, "grad_norm": 1.0876389452420203, "learning_rate": 2.975921227002979e-06, "loss": 0.5104, "step": 12778 }, { "epoch": 0.6896012087852788, "grad_norm": 0.6735362549239753, "learning_rate": 2.9752896801547664e-06, "loss": 0.2535, "step": 12779 }, { "epoch": 0.6896551724137931, "grad_norm": 1.1012285456099549, "learning_rate": 2.9746582058692803e-06, "loss": 0.4243, "step": 12780 }, { "epoch": 0.6897091360423074, "grad_norm": 0.8280515276750501, "learning_rate": 2.9740268041646705e-06, "loss": 0.3878, "step": 12781 }, { "epoch": 0.6897630996708218, "grad_norm": 1.0428676099608645, "learning_rate": 2.9733954750590805e-06, "loss": 0.4638, "step": 12782 }, { "epoch": 0.6898170632993362, "grad_norm": 1.0360979456659163, "learning_rate": 2.9727642185706573e-06, "loss": 0.4009, "step": 12783 }, { "epoch": 0.6898710269278506, "grad_norm": 0.8974776707338593, "learning_rate": 2.972133034717545e-06, "loss": 0.434, "step": 12784 }, { "epoch": 0.689924990556365, "grad_norm": 1.1047652086884894, "learning_rate": 2.9715019235178833e-06, "loss": 0.463, "step": 12785 }, { "epoch": 0.6899789541848794, "grad_norm": 1.142281241198386, "learning_rate": 2.9708708849898106e-06, "loss": 0.51, "step": 12786 }, { "epoch": 0.6900329178133938, "grad_norm": 0.9306838414242946, "learning_rate": 2.9702399191514648e-06, "loss": 0.4318, "step": 12787 }, { "epoch": 0.6900868814419081, "grad_norm": 1.133975773637936, "learning_rate": 2.9696090260209787e-06, "loss": 0.5321, "step": 12788 }, { "epoch": 0.6901408450704225, "grad_norm": 1.1201717209616342, "learning_rate": 2.9689782056164874e-06, "loss": 0.605, "step": 12789 }, { "epoch": 0.6901948086989369, "grad_norm": 0.9994785030656648, "learning_rate": 2.9683474579561195e-06, "loss": 0.4958, "step": 12790 }, { "epoch": 0.6902487723274513, "grad_norm": 0.685438639854132, "learning_rate": 2.9677167830580035e-06, "loss": 0.2199, "step": 12791 }, { "epoch": 0.6903027359559657, "grad_norm": 1.0839501941598289, "learning_rate": 2.967086180940266e-06, "loss": 0.6265, "step": 12792 }, { "epoch": 0.6903566995844801, "grad_norm": 1.2373106329913193, "learning_rate": 2.9664556516210306e-06, "loss": 0.5534, "step": 12793 }, { "epoch": 0.6904106632129945, "grad_norm": 1.0137098475843154, "learning_rate": 2.9658251951184204e-06, "loss": 0.5928, "step": 12794 }, { "epoch": 0.6904646268415088, "grad_norm": 0.9098991973635244, "learning_rate": 2.965194811450556e-06, "loss": 0.3586, "step": 12795 }, { "epoch": 0.6905185904700232, "grad_norm": 1.1520693370503128, "learning_rate": 2.9645645006355516e-06, "loss": 0.5604, "step": 12796 }, { "epoch": 0.6905725540985376, "grad_norm": 1.0659510666442944, "learning_rate": 2.963934262691526e-06, "loss": 0.5049, "step": 12797 }, { "epoch": 0.690626517727052, "grad_norm": 1.138469039816294, "learning_rate": 2.9633040976365923e-06, "loss": 0.6063, "step": 12798 }, { "epoch": 0.6906804813555664, "grad_norm": 0.764085544235665, "learning_rate": 2.962674005488862e-06, "loss": 0.3419, "step": 12799 }, { "epoch": 0.6907344449840808, "grad_norm": 0.9199802083116927, "learning_rate": 2.9620439862664442e-06, "loss": 0.4645, "step": 12800 }, { "epoch": 0.6907884086125952, "grad_norm": 1.0350934841007453, "learning_rate": 2.961414039987447e-06, "loss": 0.4662, "step": 12801 }, { "epoch": 0.6908423722411094, "grad_norm": 1.0788544293015672, "learning_rate": 2.9607841666699764e-06, "loss": 0.4953, "step": 12802 }, { "epoch": 0.6908963358696238, "grad_norm": 1.052727462178044, "learning_rate": 2.9601543663321342e-06, "loss": 0.4706, "step": 12803 }, { "epoch": 0.6909502994981382, "grad_norm": 1.0284677710985632, "learning_rate": 2.959524638992021e-06, "loss": 0.4818, "step": 12804 }, { "epoch": 0.6910042631266526, "grad_norm": 0.983814961542173, "learning_rate": 2.958894984667736e-06, "loss": 0.453, "step": 12805 }, { "epoch": 0.691058226755167, "grad_norm": 1.2077043201157578, "learning_rate": 2.9582654033773774e-06, "loss": 0.4542, "step": 12806 }, { "epoch": 0.6911121903836814, "grad_norm": 1.0514266767472853, "learning_rate": 2.9576358951390393e-06, "loss": 0.4371, "step": 12807 }, { "epoch": 0.6911661540121958, "grad_norm": 0.9613212468182926, "learning_rate": 2.957006459970815e-06, "loss": 0.3781, "step": 12808 }, { "epoch": 0.6912201176407101, "grad_norm": 0.9894138901574346, "learning_rate": 2.9563770978907945e-06, "loss": 0.4225, "step": 12809 }, { "epoch": 0.6912740812692245, "grad_norm": 1.0604461927076396, "learning_rate": 2.9557478089170666e-06, "loss": 0.3967, "step": 12810 }, { "epoch": 0.6913280448977389, "grad_norm": 1.142045932852909, "learning_rate": 2.9551185930677164e-06, "loss": 0.5271, "step": 12811 }, { "epoch": 0.6913820085262533, "grad_norm": 1.1036941784297516, "learning_rate": 2.9544894503608296e-06, "loss": 0.6816, "step": 12812 }, { "epoch": 0.6914359721547677, "grad_norm": 1.122021114108549, "learning_rate": 2.9538603808144883e-06, "loss": 0.5161, "step": 12813 }, { "epoch": 0.6914899357832821, "grad_norm": 1.213969808302271, "learning_rate": 2.953231384446772e-06, "loss": 0.4975, "step": 12814 }, { "epoch": 0.6915438994117965, "grad_norm": 0.9515384534676911, "learning_rate": 2.9526024612757586e-06, "loss": 0.4014, "step": 12815 }, { "epoch": 0.6915978630403108, "grad_norm": 0.8577377069896276, "learning_rate": 2.951973611319527e-06, "loss": 0.3528, "step": 12816 }, { "epoch": 0.6916518266688252, "grad_norm": 0.9292944792173938, "learning_rate": 2.951344834596146e-06, "loss": 0.447, "step": 12817 }, { "epoch": 0.6917057902973396, "grad_norm": 0.8346644194549784, "learning_rate": 2.95071613112369e-06, "loss": 0.3123, "step": 12818 }, { "epoch": 0.691759753925854, "grad_norm": 1.1390289959374753, "learning_rate": 2.950087500920228e-06, "loss": 0.4898, "step": 12819 }, { "epoch": 0.6918137175543684, "grad_norm": 1.0979177218669127, "learning_rate": 2.9494589440038278e-06, "loss": 0.3971, "step": 12820 }, { "epoch": 0.6918676811828828, "grad_norm": 1.1418788249689829, "learning_rate": 2.948830460392555e-06, "loss": 0.5967, "step": 12821 }, { "epoch": 0.6919216448113971, "grad_norm": 1.2444672462530921, "learning_rate": 2.948202050104474e-06, "loss": 0.551, "step": 12822 }, { "epoch": 0.6919756084399115, "grad_norm": 1.1496721954742772, "learning_rate": 2.947573713157643e-06, "loss": 0.606, "step": 12823 }, { "epoch": 0.6920295720684259, "grad_norm": 1.2679227411450105, "learning_rate": 2.9469454495701223e-06, "loss": 0.7106, "step": 12824 }, { "epoch": 0.6920835356969403, "grad_norm": 1.0431428752633156, "learning_rate": 2.9463172593599688e-06, "loss": 0.4116, "step": 12825 }, { "epoch": 0.6921374993254547, "grad_norm": 1.1172435362836337, "learning_rate": 2.9456891425452383e-06, "loss": 0.5489, "step": 12826 }, { "epoch": 0.692191462953969, "grad_norm": 0.9130826728260667, "learning_rate": 2.945061099143982e-06, "loss": 0.3221, "step": 12827 }, { "epoch": 0.6922454265824834, "grad_norm": 1.026841822949245, "learning_rate": 2.9444331291742524e-06, "loss": 0.477, "step": 12828 }, { "epoch": 0.6922993902109977, "grad_norm": 1.1550470888886188, "learning_rate": 2.943805232654097e-06, "loss": 0.3965, "step": 12829 }, { "epoch": 0.6923533538395121, "grad_norm": 0.971067926112307, "learning_rate": 2.9431774096015613e-06, "loss": 0.4463, "step": 12830 }, { "epoch": 0.6924073174680265, "grad_norm": 0.9876562016159565, "learning_rate": 2.94254966003469e-06, "loss": 0.428, "step": 12831 }, { "epoch": 0.6924612810965409, "grad_norm": 0.9138679242861155, "learning_rate": 2.9419219839715264e-06, "loss": 0.3441, "step": 12832 }, { "epoch": 0.6925152447250553, "grad_norm": 1.1251477284386113, "learning_rate": 2.941294381430109e-06, "loss": 0.4598, "step": 12833 }, { "epoch": 0.6925692083535697, "grad_norm": 1.1267706879465222, "learning_rate": 2.9406668524284764e-06, "loss": 0.5949, "step": 12834 }, { "epoch": 0.6926231719820841, "grad_norm": 1.0176819238250523, "learning_rate": 2.940039396984665e-06, "loss": 0.4134, "step": 12835 }, { "epoch": 0.6926771356105984, "grad_norm": 1.1822581646977657, "learning_rate": 2.939412015116707e-06, "loss": 0.5472, "step": 12836 }, { "epoch": 0.6927310992391128, "grad_norm": 0.861134468157677, "learning_rate": 2.9387847068426355e-06, "loss": 0.3796, "step": 12837 }, { "epoch": 0.6927850628676272, "grad_norm": 0.9688210676742628, "learning_rate": 2.9381574721804803e-06, "loss": 0.3774, "step": 12838 }, { "epoch": 0.6928390264961416, "grad_norm": 1.289014146593846, "learning_rate": 2.9375303111482674e-06, "loss": 0.6295, "step": 12839 }, { "epoch": 0.692892990124656, "grad_norm": 0.9856520429966804, "learning_rate": 2.936903223764024e-06, "loss": 0.3841, "step": 12840 }, { "epoch": 0.6929469537531704, "grad_norm": 1.2479226986039564, "learning_rate": 2.93627621004577e-06, "loss": 0.6013, "step": 12841 }, { "epoch": 0.6930009173816848, "grad_norm": 1.0624119384967545, "learning_rate": 2.935649270011528e-06, "loss": 0.4386, "step": 12842 }, { "epoch": 0.6930548810101991, "grad_norm": 1.1949556795403318, "learning_rate": 2.9350224036793174e-06, "loss": 0.4706, "step": 12843 }, { "epoch": 0.6931088446387135, "grad_norm": 0.8689130047078445, "learning_rate": 2.9343956110671546e-06, "loss": 0.4316, "step": 12844 }, { "epoch": 0.6931628082672279, "grad_norm": 1.0109999059991253, "learning_rate": 2.933768892193054e-06, "loss": 0.5558, "step": 12845 }, { "epoch": 0.6932167718957423, "grad_norm": 1.0778598863392521, "learning_rate": 2.9331422470750293e-06, "loss": 0.5348, "step": 12846 }, { "epoch": 0.6932707355242567, "grad_norm": 1.2042754343618705, "learning_rate": 2.932515675731089e-06, "loss": 0.4958, "step": 12847 }, { "epoch": 0.6933246991527711, "grad_norm": 1.1567067745555395, "learning_rate": 2.9318891781792434e-06, "loss": 0.5976, "step": 12848 }, { "epoch": 0.6933786627812855, "grad_norm": 0.8641205314285554, "learning_rate": 2.9312627544374967e-06, "loss": 0.3752, "step": 12849 }, { "epoch": 0.6934326264097997, "grad_norm": 0.9107358194982551, "learning_rate": 2.9306364045238563e-06, "loss": 0.5038, "step": 12850 }, { "epoch": 0.6934865900383141, "grad_norm": 0.905221303383921, "learning_rate": 2.9300101284563197e-06, "loss": 0.473, "step": 12851 }, { "epoch": 0.6935405536668285, "grad_norm": 1.2036915729803042, "learning_rate": 2.929383926252889e-06, "loss": 0.6014, "step": 12852 }, { "epoch": 0.6935945172953429, "grad_norm": 1.0060470117090254, "learning_rate": 2.9287577979315617e-06, "loss": 0.5597, "step": 12853 }, { "epoch": 0.6936484809238573, "grad_norm": 1.2393005424388008, "learning_rate": 2.928131743510334e-06, "loss": 0.6308, "step": 12854 }, { "epoch": 0.6937024445523717, "grad_norm": 1.1640071101929124, "learning_rate": 2.9275057630071974e-06, "loss": 0.6734, "step": 12855 }, { "epoch": 0.6937564081808861, "grad_norm": 0.9394419132303418, "learning_rate": 2.926879856440146e-06, "loss": 0.4284, "step": 12856 }, { "epoch": 0.6938103718094004, "grad_norm": 1.1560894485305147, "learning_rate": 2.9262540238271675e-06, "loss": 0.5605, "step": 12857 }, { "epoch": 0.6938643354379148, "grad_norm": 1.0748689251923167, "learning_rate": 2.9256282651862476e-06, "loss": 0.5326, "step": 12858 }, { "epoch": 0.6939182990664292, "grad_norm": 1.0911462700612236, "learning_rate": 2.925002580535373e-06, "loss": 0.4865, "step": 12859 }, { "epoch": 0.6939722626949436, "grad_norm": 1.0505641545358602, "learning_rate": 2.9243769698925263e-06, "loss": 0.4873, "step": 12860 }, { "epoch": 0.694026226323458, "grad_norm": 0.9451905969082554, "learning_rate": 2.923751433275688e-06, "loss": 0.4574, "step": 12861 }, { "epoch": 0.6940801899519724, "grad_norm": 0.8538714868563984, "learning_rate": 2.9231259707028357e-06, "loss": 0.3275, "step": 12862 }, { "epoch": 0.6941341535804868, "grad_norm": 0.9944239288036242, "learning_rate": 2.9225005821919476e-06, "loss": 0.4969, "step": 12863 }, { "epoch": 0.6941881172090011, "grad_norm": 1.0290944634440025, "learning_rate": 2.9218752677609986e-06, "loss": 0.4881, "step": 12864 }, { "epoch": 0.6942420808375155, "grad_norm": 0.8460546529779057, "learning_rate": 2.9212500274279574e-06, "loss": 0.4128, "step": 12865 }, { "epoch": 0.6942960444660299, "grad_norm": 0.7783539437856293, "learning_rate": 2.9206248612107968e-06, "loss": 0.3212, "step": 12866 }, { "epoch": 0.6943500080945443, "grad_norm": 0.9382656439671115, "learning_rate": 2.919999769127484e-06, "loss": 0.4214, "step": 12867 }, { "epoch": 0.6944039717230587, "grad_norm": 1.1143408475980336, "learning_rate": 2.9193747511959835e-06, "loss": 0.5387, "step": 12868 }, { "epoch": 0.6944579353515731, "grad_norm": 0.9477430055700514, "learning_rate": 2.918749807434261e-06, "loss": 0.3927, "step": 12869 }, { "epoch": 0.6945118989800875, "grad_norm": 0.9490471035171822, "learning_rate": 2.9181249378602774e-06, "loss": 0.4052, "step": 12870 }, { "epoch": 0.6945658626086018, "grad_norm": 1.2560340030337758, "learning_rate": 2.917500142491993e-06, "loss": 0.5212, "step": 12871 }, { "epoch": 0.6946198262371162, "grad_norm": 0.9932031827400105, "learning_rate": 2.9168754213473628e-06, "loss": 0.3797, "step": 12872 }, { "epoch": 0.6946737898656306, "grad_norm": 1.2444545396703854, "learning_rate": 2.9162507744443423e-06, "loss": 0.6569, "step": 12873 }, { "epoch": 0.694727753494145, "grad_norm": 1.2702249043633798, "learning_rate": 2.9156262018008863e-06, "loss": 0.5807, "step": 12874 }, { "epoch": 0.6947817171226593, "grad_norm": 0.9775047331302462, "learning_rate": 2.915001703434944e-06, "loss": 0.3617, "step": 12875 }, { "epoch": 0.6948356807511737, "grad_norm": 1.2580869617076678, "learning_rate": 2.914377279364464e-06, "loss": 0.6335, "step": 12876 }, { "epoch": 0.6948896443796881, "grad_norm": 0.9490033050497612, "learning_rate": 2.9137529296073952e-06, "loss": 0.4407, "step": 12877 }, { "epoch": 0.6949436080082024, "grad_norm": 0.9676004391855759, "learning_rate": 2.9131286541816806e-06, "loss": 0.4324, "step": 12878 }, { "epoch": 0.6949975716367168, "grad_norm": 0.9564224047589261, "learning_rate": 2.9125044531052617e-06, "loss": 0.4345, "step": 12879 }, { "epoch": 0.6950515352652312, "grad_norm": 0.9928597824104464, "learning_rate": 2.9118803263960786e-06, "loss": 0.3463, "step": 12880 }, { "epoch": 0.6951054988937456, "grad_norm": 0.8143170528392323, "learning_rate": 2.91125627407207e-06, "loss": 0.2503, "step": 12881 }, { "epoch": 0.69515946252226, "grad_norm": 1.0427484278282806, "learning_rate": 2.9106322961511725e-06, "loss": 0.3909, "step": 12882 }, { "epoch": 0.6952134261507744, "grad_norm": 1.0885815789910442, "learning_rate": 2.910008392651319e-06, "loss": 0.416, "step": 12883 }, { "epoch": 0.6952673897792888, "grad_norm": 1.0272594747465376, "learning_rate": 2.9093845635904405e-06, "loss": 0.4533, "step": 12884 }, { "epoch": 0.6953213534078031, "grad_norm": 1.042567206667852, "learning_rate": 2.9087608089864695e-06, "loss": 0.4142, "step": 12885 }, { "epoch": 0.6953753170363175, "grad_norm": 1.0258770208503403, "learning_rate": 2.9081371288573292e-06, "loss": 0.449, "step": 12886 }, { "epoch": 0.6954292806648319, "grad_norm": 0.9594773490159131, "learning_rate": 2.9075135232209466e-06, "loss": 0.5071, "step": 12887 }, { "epoch": 0.6954832442933463, "grad_norm": 0.8854613153592871, "learning_rate": 2.9068899920952452e-06, "loss": 0.3507, "step": 12888 }, { "epoch": 0.6955372079218607, "grad_norm": 0.9203885460880191, "learning_rate": 2.9062665354981454e-06, "loss": 0.4746, "step": 12889 }, { "epoch": 0.6955911715503751, "grad_norm": 1.1651070118938545, "learning_rate": 2.9056431534475666e-06, "loss": 0.409, "step": 12890 }, { "epoch": 0.6956451351788894, "grad_norm": 1.175098657537262, "learning_rate": 2.9050198459614243e-06, "loss": 0.6136, "step": 12891 }, { "epoch": 0.6956990988074038, "grad_norm": 1.2086980687125146, "learning_rate": 2.904396613057634e-06, "loss": 0.5182, "step": 12892 }, { "epoch": 0.6957530624359182, "grad_norm": 1.1110022549618668, "learning_rate": 2.903773454754108e-06, "loss": 0.4285, "step": 12893 }, { "epoch": 0.6958070260644326, "grad_norm": 1.090840836097646, "learning_rate": 2.903150371068756e-06, "loss": 0.4506, "step": 12894 }, { "epoch": 0.695860989692947, "grad_norm": 0.9626698731139083, "learning_rate": 2.902527362019486e-06, "loss": 0.4799, "step": 12895 }, { "epoch": 0.6959149533214614, "grad_norm": 1.0098823480966108, "learning_rate": 2.9019044276242046e-06, "loss": 0.4269, "step": 12896 }, { "epoch": 0.6959689169499758, "grad_norm": 1.049464053740126, "learning_rate": 2.9012815679008155e-06, "loss": 0.4203, "step": 12897 }, { "epoch": 0.69602288057849, "grad_norm": 0.778048613037931, "learning_rate": 2.9006587828672205e-06, "loss": 0.3043, "step": 12898 }, { "epoch": 0.6960768442070044, "grad_norm": 0.9702105756186081, "learning_rate": 2.900036072541317e-06, "loss": 0.4419, "step": 12899 }, { "epoch": 0.6961308078355188, "grad_norm": 0.9078255756930769, "learning_rate": 2.8994134369410042e-06, "loss": 0.3893, "step": 12900 }, { "epoch": 0.6961847714640332, "grad_norm": 1.1077271708482619, "learning_rate": 2.8987908760841777e-06, "loss": 0.4663, "step": 12901 }, { "epoch": 0.6962387350925476, "grad_norm": 1.0710628809059515, "learning_rate": 2.898168389988728e-06, "loss": 0.4284, "step": 12902 }, { "epoch": 0.696292698721062, "grad_norm": 1.0203672494966032, "learning_rate": 2.897545978672549e-06, "loss": 0.5479, "step": 12903 }, { "epoch": 0.6963466623495764, "grad_norm": 1.1883344481235694, "learning_rate": 2.8969236421535287e-06, "loss": 0.4992, "step": 12904 }, { "epoch": 0.6964006259780907, "grad_norm": 1.0256511557360934, "learning_rate": 2.896301380449553e-06, "loss": 0.5288, "step": 12905 }, { "epoch": 0.6964545896066051, "grad_norm": 1.0588783767200949, "learning_rate": 2.895679193578506e-06, "loss": 0.3623, "step": 12906 }, { "epoch": 0.6965085532351195, "grad_norm": 0.9726017449109245, "learning_rate": 2.89505708155827e-06, "loss": 0.4796, "step": 12907 }, { "epoch": 0.6965625168636339, "grad_norm": 1.152626570949335, "learning_rate": 2.894435044406726e-06, "loss": 0.4958, "step": 12908 }, { "epoch": 0.6966164804921483, "grad_norm": 0.8107552941293773, "learning_rate": 2.893813082141751e-06, "loss": 0.3148, "step": 12909 }, { "epoch": 0.6966704441206627, "grad_norm": 0.9742327366538469, "learning_rate": 2.8931911947812224e-06, "loss": 0.3894, "step": 12910 }, { "epoch": 0.6967244077491771, "grad_norm": 1.0581285050418603, "learning_rate": 2.8925693823430117e-06, "loss": 0.5557, "step": 12911 }, { "epoch": 0.6967783713776914, "grad_norm": 1.0413657580950326, "learning_rate": 2.8919476448449944e-06, "loss": 0.4307, "step": 12912 }, { "epoch": 0.6968323350062058, "grad_norm": 1.005331417951079, "learning_rate": 2.8913259823050345e-06, "loss": 0.3737, "step": 12913 }, { "epoch": 0.6968862986347202, "grad_norm": 1.1016199062589507, "learning_rate": 2.890704394741002e-06, "loss": 0.5772, "step": 12914 }, { "epoch": 0.6969402622632346, "grad_norm": 0.9264686509540279, "learning_rate": 2.8900828821707614e-06, "loss": 0.4083, "step": 12915 }, { "epoch": 0.696994225891749, "grad_norm": 0.8943909375505588, "learning_rate": 2.889461444612176e-06, "loss": 0.34, "step": 12916 }, { "epoch": 0.6970481895202634, "grad_norm": 0.8079244924076091, "learning_rate": 2.8888400820831063e-06, "loss": 0.3487, "step": 12917 }, { "epoch": 0.6971021531487778, "grad_norm": 0.9553139771531703, "learning_rate": 2.888218794601412e-06, "loss": 0.3316, "step": 12918 }, { "epoch": 0.6971561167772921, "grad_norm": 0.881176082034555, "learning_rate": 2.887597582184949e-06, "loss": 0.3862, "step": 12919 }, { "epoch": 0.6972100804058065, "grad_norm": 0.9429409945433771, "learning_rate": 2.8869764448515702e-06, "loss": 0.3817, "step": 12920 }, { "epoch": 0.6972640440343209, "grad_norm": 1.1578707347011084, "learning_rate": 2.8863553826191283e-06, "loss": 0.5246, "step": 12921 }, { "epoch": 0.6973180076628352, "grad_norm": 0.8967520683780582, "learning_rate": 2.885734395505474e-06, "loss": 0.457, "step": 12922 }, { "epoch": 0.6973719712913496, "grad_norm": 0.9932738210975065, "learning_rate": 2.8851134835284546e-06, "loss": 0.4541, "step": 12923 }, { "epoch": 0.697425934919864, "grad_norm": 0.9854958833620842, "learning_rate": 2.8844926467059154e-06, "loss": 0.5381, "step": 12924 }, { "epoch": 0.6974798985483784, "grad_norm": 1.0387134169824275, "learning_rate": 2.8838718850557e-06, "loss": 0.3801, "step": 12925 }, { "epoch": 0.6975338621768927, "grad_norm": 1.05149151172047, "learning_rate": 2.8832511985956524e-06, "loss": 0.5175, "step": 12926 }, { "epoch": 0.6975878258054071, "grad_norm": 1.2602958665132944, "learning_rate": 2.8826305873436076e-06, "loss": 0.4822, "step": 12927 }, { "epoch": 0.6976417894339215, "grad_norm": 1.0789958947609768, "learning_rate": 2.8820100513174042e-06, "loss": 0.6905, "step": 12928 }, { "epoch": 0.6976957530624359, "grad_norm": 0.9858086354837914, "learning_rate": 2.881389590534878e-06, "loss": 0.5798, "step": 12929 }, { "epoch": 0.6977497166909503, "grad_norm": 0.8868523476500845, "learning_rate": 2.8807692050138604e-06, "loss": 0.3894, "step": 12930 }, { "epoch": 0.6978036803194647, "grad_norm": 0.9294987412565143, "learning_rate": 2.8801488947721818e-06, "loss": 0.3437, "step": 12931 }, { "epoch": 0.6978576439479791, "grad_norm": 1.0108922310797102, "learning_rate": 2.879528659827672e-06, "loss": 0.4687, "step": 12932 }, { "epoch": 0.6979116075764934, "grad_norm": 0.9396824300570262, "learning_rate": 2.878908500198157e-06, "loss": 0.4303, "step": 12933 }, { "epoch": 0.6979655712050078, "grad_norm": 0.8925678922445839, "learning_rate": 2.878288415901459e-06, "loss": 0.4633, "step": 12934 }, { "epoch": 0.6980195348335222, "grad_norm": 0.9433177683589723, "learning_rate": 2.8776684069554006e-06, "loss": 0.3796, "step": 12935 }, { "epoch": 0.6980734984620366, "grad_norm": 1.2063324756923042, "learning_rate": 2.8770484733778027e-06, "loss": 0.5849, "step": 12936 }, { "epoch": 0.698127462090551, "grad_norm": 0.7331134858124366, "learning_rate": 2.8764286151864818e-06, "loss": 0.3961, "step": 12937 }, { "epoch": 0.6981814257190654, "grad_norm": 1.1619805067459676, "learning_rate": 2.8758088323992527e-06, "loss": 0.5107, "step": 12938 }, { "epoch": 0.6982353893475798, "grad_norm": 0.8865085221984703, "learning_rate": 2.87518912503393e-06, "loss": 0.3576, "step": 12939 }, { "epoch": 0.6982893529760941, "grad_norm": 1.069890110359377, "learning_rate": 2.8745694931083234e-06, "loss": 0.4692, "step": 12940 }, { "epoch": 0.6983433166046085, "grad_norm": 0.8036546186489738, "learning_rate": 2.873949936640243e-06, "loss": 0.3559, "step": 12941 }, { "epoch": 0.6983972802331229, "grad_norm": 1.1617986736347894, "learning_rate": 2.8733304556474946e-06, "loss": 0.4084, "step": 12942 }, { "epoch": 0.6984512438616373, "grad_norm": 0.9263656535667706, "learning_rate": 2.8727110501478827e-06, "loss": 0.3834, "step": 12943 }, { "epoch": 0.6985052074901517, "grad_norm": 0.9176318156307719, "learning_rate": 2.87209172015921e-06, "loss": 0.5834, "step": 12944 }, { "epoch": 0.6985591711186661, "grad_norm": 0.8373167529559262, "learning_rate": 2.8714724656992764e-06, "loss": 0.395, "step": 12945 }, { "epoch": 0.6986131347471805, "grad_norm": 0.9201747919475872, "learning_rate": 2.8708532867858823e-06, "loss": 0.3777, "step": 12946 }, { "epoch": 0.6986670983756947, "grad_norm": 1.1483053010574185, "learning_rate": 2.8702341834368196e-06, "loss": 0.419, "step": 12947 }, { "epoch": 0.6987210620042091, "grad_norm": 1.085991431174362, "learning_rate": 2.8696151556698837e-06, "loss": 0.5315, "step": 12948 }, { "epoch": 0.6987750256327235, "grad_norm": 1.0244724316698472, "learning_rate": 2.8689962035028655e-06, "loss": 0.5262, "step": 12949 }, { "epoch": 0.6988289892612379, "grad_norm": 1.1116852469469143, "learning_rate": 2.8683773269535554e-06, "loss": 0.5146, "step": 12950 }, { "epoch": 0.6988829528897523, "grad_norm": 0.7131104441066115, "learning_rate": 2.8677585260397394e-06, "loss": 0.2743, "step": 12951 }, { "epoch": 0.6989369165182667, "grad_norm": 1.0910564656413746, "learning_rate": 2.8671398007792034e-06, "loss": 0.4474, "step": 12952 }, { "epoch": 0.6989908801467811, "grad_norm": 0.9544232239703293, "learning_rate": 2.8665211511897296e-06, "loss": 0.3873, "step": 12953 }, { "epoch": 0.6990448437752954, "grad_norm": 1.1432511577477051, "learning_rate": 2.8659025772891003e-06, "loss": 0.4477, "step": 12954 }, { "epoch": 0.6990988074038098, "grad_norm": 0.973986112396438, "learning_rate": 2.865284079095092e-06, "loss": 0.3751, "step": 12955 }, { "epoch": 0.6991527710323242, "grad_norm": 1.2231616023515723, "learning_rate": 2.8646656566254803e-06, "loss": 0.6086, "step": 12956 }, { "epoch": 0.6992067346608386, "grad_norm": 0.9556905245050342, "learning_rate": 2.8640473098980404e-06, "loss": 0.3298, "step": 12957 }, { "epoch": 0.699260698289353, "grad_norm": 1.0391009545463676, "learning_rate": 2.8634290389305454e-06, "loss": 0.4588, "step": 12958 }, { "epoch": 0.6993146619178674, "grad_norm": 1.1271598236666098, "learning_rate": 2.8628108437407633e-06, "loss": 0.5385, "step": 12959 }, { "epoch": 0.6993686255463817, "grad_norm": 1.0909593885095143, "learning_rate": 2.862192724346463e-06, "loss": 0.5398, "step": 12960 }, { "epoch": 0.6994225891748961, "grad_norm": 0.970489551654379, "learning_rate": 2.861574680765409e-06, "loss": 0.4929, "step": 12961 }, { "epoch": 0.6994765528034105, "grad_norm": 0.969708483849196, "learning_rate": 2.8609567130153633e-06, "loss": 0.465, "step": 12962 }, { "epoch": 0.6995305164319249, "grad_norm": 0.9941266954207261, "learning_rate": 2.8603388211140886e-06, "loss": 0.5499, "step": 12963 }, { "epoch": 0.6995844800604393, "grad_norm": 0.9613418576181274, "learning_rate": 2.8597210050793435e-06, "loss": 0.4431, "step": 12964 }, { "epoch": 0.6996384436889537, "grad_norm": 1.0984638216631157, "learning_rate": 2.8591032649288846e-06, "loss": 0.5586, "step": 12965 }, { "epoch": 0.6996924073174681, "grad_norm": 1.012950214666351, "learning_rate": 2.858485600680466e-06, "loss": 0.5465, "step": 12966 }, { "epoch": 0.6997463709459824, "grad_norm": 0.9387914768136617, "learning_rate": 2.857868012351842e-06, "loss": 0.3329, "step": 12967 }, { "epoch": 0.6998003345744968, "grad_norm": 1.175156450389991, "learning_rate": 2.8572504999607597e-06, "loss": 0.4165, "step": 12968 }, { "epoch": 0.6998542982030111, "grad_norm": 0.8894618814742211, "learning_rate": 2.8566330635249682e-06, "loss": 0.3284, "step": 12969 }, { "epoch": 0.6999082618315255, "grad_norm": 1.2161289781084283, "learning_rate": 2.856015703062214e-06, "loss": 0.4336, "step": 12970 }, { "epoch": 0.6999622254600399, "grad_norm": 1.001673379308925, "learning_rate": 2.855398418590239e-06, "loss": 0.4056, "step": 12971 }, { "epoch": 0.7000161890885543, "grad_norm": 1.059691907435687, "learning_rate": 2.8547812101267856e-06, "loss": 0.4667, "step": 12972 }, { "epoch": 0.7000701527170687, "grad_norm": 1.2703593986459272, "learning_rate": 2.8541640776895934e-06, "loss": 0.5237, "step": 12973 }, { "epoch": 0.700124116345583, "grad_norm": 1.0690454179495672, "learning_rate": 2.853547021296401e-06, "loss": 0.5773, "step": 12974 }, { "epoch": 0.7001780799740974, "grad_norm": 0.9152468059966911, "learning_rate": 2.8529300409649395e-06, "loss": 0.424, "step": 12975 }, { "epoch": 0.7002320436026118, "grad_norm": 1.1022082237926278, "learning_rate": 2.852313136712943e-06, "loss": 0.4881, "step": 12976 }, { "epoch": 0.7002860072311262, "grad_norm": 0.8970499536489263, "learning_rate": 2.851696308558142e-06, "loss": 0.4297, "step": 12977 }, { "epoch": 0.7003399708596406, "grad_norm": 0.9714205401431717, "learning_rate": 2.8510795565182656e-06, "loss": 0.4061, "step": 12978 }, { "epoch": 0.700393934488155, "grad_norm": 0.8877727922961122, "learning_rate": 2.850462880611039e-06, "loss": 0.2947, "step": 12979 }, { "epoch": 0.7004478981166694, "grad_norm": 1.1022785289051134, "learning_rate": 2.8498462808541865e-06, "loss": 0.523, "step": 12980 }, { "epoch": 0.7005018617451837, "grad_norm": 0.9110190920895537, "learning_rate": 2.8492297572654305e-06, "loss": 0.3825, "step": 12981 }, { "epoch": 0.7005558253736981, "grad_norm": 1.0714977445417089, "learning_rate": 2.848613309862489e-06, "loss": 0.427, "step": 12982 }, { "epoch": 0.7006097890022125, "grad_norm": 0.7550204507933227, "learning_rate": 2.8479969386630796e-06, "loss": 0.3099, "step": 12983 }, { "epoch": 0.7006637526307269, "grad_norm": 0.8941615655717171, "learning_rate": 2.8473806436849183e-06, "loss": 0.4074, "step": 12984 }, { "epoch": 0.7007177162592413, "grad_norm": 0.9514481769718431, "learning_rate": 2.846764424945717e-06, "loss": 0.4134, "step": 12985 }, { "epoch": 0.7007716798877557, "grad_norm": 0.8784077279945546, "learning_rate": 2.8461482824631883e-06, "loss": 0.387, "step": 12986 }, { "epoch": 0.7008256435162701, "grad_norm": 1.2133829296431922, "learning_rate": 2.845532216255038e-06, "loss": 0.6764, "step": 12987 }, { "epoch": 0.7008796071447844, "grad_norm": 1.23391218345741, "learning_rate": 2.844916226338974e-06, "loss": 0.5535, "step": 12988 }, { "epoch": 0.7009335707732988, "grad_norm": 0.8220678919514699, "learning_rate": 2.844300312732701e-06, "loss": 0.3129, "step": 12989 }, { "epoch": 0.7009875344018132, "grad_norm": 0.890965656113664, "learning_rate": 2.8436844754539217e-06, "loss": 0.3615, "step": 12990 }, { "epoch": 0.7010414980303276, "grad_norm": 1.0985265313520398, "learning_rate": 2.843068714520333e-06, "loss": 0.5382, "step": 12991 }, { "epoch": 0.701095461658842, "grad_norm": 0.8440279723636199, "learning_rate": 2.8424530299496345e-06, "loss": 0.4153, "step": 12992 }, { "epoch": 0.7011494252873564, "grad_norm": 0.9791161487316945, "learning_rate": 2.841837421759521e-06, "loss": 0.4473, "step": 12993 }, { "epoch": 0.7012033889158708, "grad_norm": 1.0787858084713418, "learning_rate": 2.8412218899676857e-06, "loss": 0.5041, "step": 12994 }, { "epoch": 0.701257352544385, "grad_norm": 1.0700533089352169, "learning_rate": 2.84060643459182e-06, "loss": 0.491, "step": 12995 }, { "epoch": 0.7013113161728994, "grad_norm": 1.218940987239314, "learning_rate": 2.8399910556496126e-06, "loss": 0.6347, "step": 12996 }, { "epoch": 0.7013652798014138, "grad_norm": 0.7664033350885981, "learning_rate": 2.8393757531587495e-06, "loss": 0.3795, "step": 12997 }, { "epoch": 0.7014192434299282, "grad_norm": 1.098783766452608, "learning_rate": 2.8387605271369156e-06, "loss": 0.5272, "step": 12998 }, { "epoch": 0.7014732070584426, "grad_norm": 0.9158361199546222, "learning_rate": 2.838145377601793e-06, "loss": 0.4115, "step": 12999 }, { "epoch": 0.701527170686957, "grad_norm": 0.9986176229963939, "learning_rate": 2.837530304571063e-06, "loss": 0.6343, "step": 13000 }, { "epoch": 0.701527170686957, "eval_loss": 0.5355429649353027, "eval_runtime": 164.4209, "eval_samples_per_second": 20.916, "eval_steps_per_second": 0.876, "step": 13000 }, { "epoch": 0.7015811343154714, "grad_norm": 0.996595930805883, "learning_rate": 2.8369153080624013e-06, "loss": 0.3917, "step": 13001 }, { "epoch": 0.7016350979439857, "grad_norm": 1.1455880390495106, "learning_rate": 2.8363003880934866e-06, "loss": 0.5528, "step": 13002 }, { "epoch": 0.7016890615725001, "grad_norm": 0.9904917930105941, "learning_rate": 2.8356855446819876e-06, "loss": 0.4379, "step": 13003 }, { "epoch": 0.7017430252010145, "grad_norm": 1.080466738903755, "learning_rate": 2.8350707778455794e-06, "loss": 0.5141, "step": 13004 }, { "epoch": 0.7017969888295289, "grad_norm": 1.1796887113654464, "learning_rate": 2.8344560876019293e-06, "loss": 0.4489, "step": 13005 }, { "epoch": 0.7018509524580433, "grad_norm": 0.8882446696757533, "learning_rate": 2.833841473968705e-06, "loss": 0.4007, "step": 13006 }, { "epoch": 0.7019049160865577, "grad_norm": 0.891459988599545, "learning_rate": 2.83322693696357e-06, "loss": 0.3809, "step": 13007 }, { "epoch": 0.7019588797150721, "grad_norm": 0.8916230932083137, "learning_rate": 2.8326124766041875e-06, "loss": 0.4695, "step": 13008 }, { "epoch": 0.7020128433435864, "grad_norm": 1.1527854167343397, "learning_rate": 2.8319980929082192e-06, "loss": 0.5072, "step": 13009 }, { "epoch": 0.7020668069721008, "grad_norm": 1.0398119058366753, "learning_rate": 2.8313837858933204e-06, "loss": 0.4467, "step": 13010 }, { "epoch": 0.7021207706006152, "grad_norm": 1.029893027929824, "learning_rate": 2.8307695555771473e-06, "loss": 0.4338, "step": 13011 }, { "epoch": 0.7021747342291296, "grad_norm": 0.8124298157360522, "learning_rate": 2.830155401977355e-06, "loss": 0.4514, "step": 13012 }, { "epoch": 0.702228697857644, "grad_norm": 1.0851632947123264, "learning_rate": 2.8295413251115933e-06, "loss": 0.5173, "step": 13013 }, { "epoch": 0.7022826614861584, "grad_norm": 0.7589029577566443, "learning_rate": 2.8289273249975123e-06, "loss": 0.3112, "step": 13014 }, { "epoch": 0.7023366251146728, "grad_norm": 0.9477779394614375, "learning_rate": 2.828313401652759e-06, "loss": 0.4566, "step": 13015 }, { "epoch": 0.702390588743187, "grad_norm": 1.201259040975867, "learning_rate": 2.8276995550949806e-06, "loss": 0.5038, "step": 13016 }, { "epoch": 0.7024445523717014, "grad_norm": 1.0455054158281298, "learning_rate": 2.8270857853418155e-06, "loss": 0.5357, "step": 13017 }, { "epoch": 0.7024985160002158, "grad_norm": 1.1041843674439185, "learning_rate": 2.8264720924109045e-06, "loss": 0.4489, "step": 13018 }, { "epoch": 0.7025524796287302, "grad_norm": 0.9089745667709747, "learning_rate": 2.825858476319888e-06, "loss": 0.5305, "step": 13019 }, { "epoch": 0.7026064432572446, "grad_norm": 0.9939081471060669, "learning_rate": 2.8252449370864e-06, "loss": 0.3485, "step": 13020 }, { "epoch": 0.702660406885759, "grad_norm": 1.1870215038016638, "learning_rate": 2.824631474728076e-06, "loss": 0.5953, "step": 13021 }, { "epoch": 0.7027143705142734, "grad_norm": 1.1182544652513735, "learning_rate": 2.824018089262546e-06, "loss": 0.5149, "step": 13022 }, { "epoch": 0.7027683341427877, "grad_norm": 1.0058210010240727, "learning_rate": 2.8234047807074423e-06, "loss": 0.5077, "step": 13023 }, { "epoch": 0.7028222977713021, "grad_norm": 1.1264014671905709, "learning_rate": 2.822791549080388e-06, "loss": 0.4612, "step": 13024 }, { "epoch": 0.7028762613998165, "grad_norm": 0.8990954914531564, "learning_rate": 2.822178394399009e-06, "loss": 0.4495, "step": 13025 }, { "epoch": 0.7029302250283309, "grad_norm": 1.0984104188104251, "learning_rate": 2.821565316680929e-06, "loss": 0.4928, "step": 13026 }, { "epoch": 0.7029841886568453, "grad_norm": 1.1126935262792854, "learning_rate": 2.8209523159437684e-06, "loss": 0.4631, "step": 13027 }, { "epoch": 0.7030381522853597, "grad_norm": 0.9999328647167958, "learning_rate": 2.820339392205145e-06, "loss": 0.4031, "step": 13028 }, { "epoch": 0.703092115913874, "grad_norm": 1.1306384270140757, "learning_rate": 2.8197265454826774e-06, "loss": 0.5018, "step": 13029 }, { "epoch": 0.7031460795423884, "grad_norm": 0.8735255399762555, "learning_rate": 2.8191137757939747e-06, "loss": 0.3792, "step": 13030 }, { "epoch": 0.7032000431709028, "grad_norm": 1.1949672833678369, "learning_rate": 2.8185010831566507e-06, "loss": 0.5102, "step": 13031 }, { "epoch": 0.7032540067994172, "grad_norm": 1.207977652785274, "learning_rate": 2.8178884675883156e-06, "loss": 0.5498, "step": 13032 }, { "epoch": 0.7033079704279316, "grad_norm": 0.9335739754608943, "learning_rate": 2.817275929106576e-06, "loss": 0.3556, "step": 13033 }, { "epoch": 0.703361934056446, "grad_norm": 0.9663375590565338, "learning_rate": 2.816663467729037e-06, "loss": 0.4838, "step": 13034 }, { "epoch": 0.7034158976849604, "grad_norm": 1.1939507687306725, "learning_rate": 2.8160510834733e-06, "loss": 0.5332, "step": 13035 }, { "epoch": 0.7034698613134747, "grad_norm": 0.8217518074155711, "learning_rate": 2.81543877635697e-06, "loss": 0.3563, "step": 13036 }, { "epoch": 0.7035238249419891, "grad_norm": 1.0986183688642583, "learning_rate": 2.8148265463976397e-06, "loss": 0.5324, "step": 13037 }, { "epoch": 0.7035777885705035, "grad_norm": 0.9195849219557485, "learning_rate": 2.8142143936129072e-06, "loss": 0.4031, "step": 13038 }, { "epoch": 0.7036317521990179, "grad_norm": 1.0471143540401262, "learning_rate": 2.813602318020369e-06, "loss": 0.4623, "step": 13039 }, { "epoch": 0.7036857158275323, "grad_norm": 1.1639418335222713, "learning_rate": 2.812990319637612e-06, "loss": 0.5683, "step": 13040 }, { "epoch": 0.7037396794560467, "grad_norm": 1.0878062013090393, "learning_rate": 2.8123783984822296e-06, "loss": 0.5384, "step": 13041 }, { "epoch": 0.703793643084561, "grad_norm": 1.1279163830832089, "learning_rate": 2.8117665545718065e-06, "loss": 0.446, "step": 13042 }, { "epoch": 0.7038476067130753, "grad_norm": 1.0897483618394614, "learning_rate": 2.8111547879239303e-06, "loss": 0.5023, "step": 13043 }, { "epoch": 0.7039015703415897, "grad_norm": 1.100359865641062, "learning_rate": 2.8105430985561816e-06, "loss": 0.5427, "step": 13044 }, { "epoch": 0.7039555339701041, "grad_norm": 1.0625241814938284, "learning_rate": 2.809931486486142e-06, "loss": 0.5393, "step": 13045 }, { "epoch": 0.7040094975986185, "grad_norm": 0.9675337380134282, "learning_rate": 2.8093199517313896e-06, "loss": 0.4158, "step": 13046 }, { "epoch": 0.7040634612271329, "grad_norm": 0.9838849220195337, "learning_rate": 2.8087084943095e-06, "loss": 0.4313, "step": 13047 }, { "epoch": 0.7041174248556473, "grad_norm": 0.8269455632601661, "learning_rate": 2.8080971142380486e-06, "loss": 0.4301, "step": 13048 }, { "epoch": 0.7041713884841617, "grad_norm": 0.8344177225360069, "learning_rate": 2.8074858115346054e-06, "loss": 0.3272, "step": 13049 }, { "epoch": 0.704225352112676, "grad_norm": 1.0845656158364825, "learning_rate": 2.8068745862167423e-06, "loss": 0.4704, "step": 13050 }, { "epoch": 0.7042793157411904, "grad_norm": 1.2446297392616135, "learning_rate": 2.806263438302024e-06, "loss": 0.5924, "step": 13051 }, { "epoch": 0.7043332793697048, "grad_norm": 0.9584484260979105, "learning_rate": 2.805652367808015e-06, "loss": 0.3035, "step": 13052 }, { "epoch": 0.7043872429982192, "grad_norm": 0.9800382490330949, "learning_rate": 2.80504137475228e-06, "loss": 0.4242, "step": 13053 }, { "epoch": 0.7044412066267336, "grad_norm": 1.0407842585281557, "learning_rate": 2.8044304591523785e-06, "loss": 0.3801, "step": 13054 }, { "epoch": 0.704495170255248, "grad_norm": 0.9326690358173315, "learning_rate": 2.80381962102587e-06, "loss": 0.4358, "step": 13055 }, { "epoch": 0.7045491338837624, "grad_norm": 1.1958010477825447, "learning_rate": 2.8032088603903095e-06, "loss": 0.4473, "step": 13056 }, { "epoch": 0.7046030975122767, "grad_norm": 1.0487652739161568, "learning_rate": 2.802598177263253e-06, "loss": 0.5553, "step": 13057 }, { "epoch": 0.7046570611407911, "grad_norm": 0.9303126085314012, "learning_rate": 2.8019875716622484e-06, "loss": 0.3733, "step": 13058 }, { "epoch": 0.7047110247693055, "grad_norm": 1.1837198431229963, "learning_rate": 2.8013770436048483e-06, "loss": 0.5919, "step": 13059 }, { "epoch": 0.7047649883978199, "grad_norm": 0.8625004885750298, "learning_rate": 2.8007665931085974e-06, "loss": 0.3274, "step": 13060 }, { "epoch": 0.7048189520263343, "grad_norm": 1.0054512199218175, "learning_rate": 2.8001562201910422e-06, "loss": 0.5327, "step": 13061 }, { "epoch": 0.7048729156548487, "grad_norm": 0.9986648037043501, "learning_rate": 2.7995459248697256e-06, "loss": 0.463, "step": 13062 }, { "epoch": 0.7049268792833631, "grad_norm": 0.9521536341535253, "learning_rate": 2.7989357071621882e-06, "loss": 0.3776, "step": 13063 }, { "epoch": 0.7049808429118773, "grad_norm": 1.0524318374431594, "learning_rate": 2.7983255670859684e-06, "loss": 0.5458, "step": 13064 }, { "epoch": 0.7050348065403917, "grad_norm": 0.8403016335417227, "learning_rate": 2.7977155046586e-06, "loss": 0.3191, "step": 13065 }, { "epoch": 0.7050887701689061, "grad_norm": 1.0389740724256136, "learning_rate": 2.7971055198976195e-06, "loss": 0.5136, "step": 13066 }, { "epoch": 0.7051427337974205, "grad_norm": 1.1760540537392443, "learning_rate": 2.796495612820557e-06, "loss": 0.5008, "step": 13067 }, { "epoch": 0.7051966974259349, "grad_norm": 1.2807941817034658, "learning_rate": 2.7958857834449417e-06, "loss": 0.4062, "step": 13068 }, { "epoch": 0.7052506610544493, "grad_norm": 1.218844368223324, "learning_rate": 2.795276031788302e-06, "loss": 0.5431, "step": 13069 }, { "epoch": 0.7053046246829637, "grad_norm": 1.0058268001671038, "learning_rate": 2.794666357868161e-06, "loss": 0.6061, "step": 13070 }, { "epoch": 0.705358588311478, "grad_norm": 0.8122780290388153, "learning_rate": 2.7940567617020453e-06, "loss": 0.3161, "step": 13071 }, { "epoch": 0.7054125519399924, "grad_norm": 0.9530569512651462, "learning_rate": 2.79344724330747e-06, "loss": 0.327, "step": 13072 }, { "epoch": 0.7054665155685068, "grad_norm": 0.9260017557289202, "learning_rate": 2.7928378027019564e-06, "loss": 0.431, "step": 13073 }, { "epoch": 0.7055204791970212, "grad_norm": 0.9286129603474494, "learning_rate": 2.7922284399030198e-06, "loss": 0.4352, "step": 13074 }, { "epoch": 0.7055744428255356, "grad_norm": 1.0853673821383802, "learning_rate": 2.791619154928173e-06, "loss": 0.6228, "step": 13075 }, { "epoch": 0.70562840645405, "grad_norm": 0.9600055297110929, "learning_rate": 2.791009947794929e-06, "loss": 0.4342, "step": 13076 }, { "epoch": 0.7056823700825644, "grad_norm": 1.0943157555824032, "learning_rate": 2.7904008185207955e-06, "loss": 0.5069, "step": 13077 }, { "epoch": 0.7057363337110787, "grad_norm": 1.1970715513857342, "learning_rate": 2.7897917671232825e-06, "loss": 0.6761, "step": 13078 }, { "epoch": 0.7057902973395931, "grad_norm": 0.9945706429572404, "learning_rate": 2.7891827936198914e-06, "loss": 0.4484, "step": 13079 }, { "epoch": 0.7058442609681075, "grad_norm": 0.9780944096479152, "learning_rate": 2.788573898028125e-06, "loss": 0.4552, "step": 13080 }, { "epoch": 0.7058982245966219, "grad_norm": 1.132829691964871, "learning_rate": 2.7879650803654856e-06, "loss": 0.4845, "step": 13081 }, { "epoch": 0.7059521882251363, "grad_norm": 1.0797041445849078, "learning_rate": 2.7873563406494687e-06, "loss": 0.4318, "step": 13082 }, { "epoch": 0.7060061518536507, "grad_norm": 1.2193800457267148, "learning_rate": 2.7867476788975727e-06, "loss": 0.4477, "step": 13083 }, { "epoch": 0.7060601154821651, "grad_norm": 1.1652198594242897, "learning_rate": 2.7861390951272895e-06, "loss": 0.4304, "step": 13084 }, { "epoch": 0.7061140791106794, "grad_norm": 1.1546969970782421, "learning_rate": 2.7855305893561123e-06, "loss": 0.4276, "step": 13085 }, { "epoch": 0.7061680427391938, "grad_norm": 1.0083104303102652, "learning_rate": 2.7849221616015277e-06, "loss": 0.4745, "step": 13086 }, { "epoch": 0.7062220063677082, "grad_norm": 1.0719212171403862, "learning_rate": 2.784313811881023e-06, "loss": 0.4861, "step": 13087 }, { "epoch": 0.7062759699962226, "grad_norm": 1.0346907874861577, "learning_rate": 2.7837055402120834e-06, "loss": 0.5629, "step": 13088 }, { "epoch": 0.706329933624737, "grad_norm": 1.1782964167291268, "learning_rate": 2.7830973466121914e-06, "loss": 0.7236, "step": 13089 }, { "epoch": 0.7063838972532513, "grad_norm": 1.0755119645720623, "learning_rate": 2.782489231098826e-06, "loss": 0.4281, "step": 13090 }, { "epoch": 0.7064378608817657, "grad_norm": 0.7670640174505496, "learning_rate": 2.781881193689467e-06, "loss": 0.2882, "step": 13091 }, { "epoch": 0.70649182451028, "grad_norm": 1.1217362835961913, "learning_rate": 2.7812732344015885e-06, "loss": 0.4635, "step": 13092 }, { "epoch": 0.7065457881387944, "grad_norm": 0.8639041525163884, "learning_rate": 2.780665353252665e-06, "loss": 0.3782, "step": 13093 }, { "epoch": 0.7065997517673088, "grad_norm": 1.035864339908178, "learning_rate": 2.7800575502601664e-06, "loss": 0.338, "step": 13094 }, { "epoch": 0.7066537153958232, "grad_norm": 0.7941475987212196, "learning_rate": 2.7794498254415636e-06, "loss": 0.3186, "step": 13095 }, { "epoch": 0.7067076790243376, "grad_norm": 1.188104109316251, "learning_rate": 2.7788421788143204e-06, "loss": 0.5806, "step": 13096 }, { "epoch": 0.706761642652852, "grad_norm": 1.1260155836654815, "learning_rate": 2.778234610395902e-06, "loss": 0.589, "step": 13097 }, { "epoch": 0.7068156062813663, "grad_norm": 0.9264751450333825, "learning_rate": 2.777627120203771e-06, "loss": 0.403, "step": 13098 }, { "epoch": 0.7068695699098807, "grad_norm": 1.130933006404503, "learning_rate": 2.777019708255388e-06, "loss": 0.5611, "step": 13099 }, { "epoch": 0.7069235335383951, "grad_norm": 1.0464913644176592, "learning_rate": 2.7764123745682096e-06, "loss": 0.5156, "step": 13100 }, { "epoch": 0.7069774971669095, "grad_norm": 1.0263637991212162, "learning_rate": 2.7758051191596918e-06, "loss": 0.3585, "step": 13101 }, { "epoch": 0.7070314607954239, "grad_norm": 1.0153809608560864, "learning_rate": 2.7751979420472874e-06, "loss": 0.3887, "step": 13102 }, { "epoch": 0.7070854244239383, "grad_norm": 0.9950758524934253, "learning_rate": 2.774590843248447e-06, "loss": 0.4882, "step": 13103 }, { "epoch": 0.7071393880524527, "grad_norm": 1.1429176368978327, "learning_rate": 2.7739838227806196e-06, "loss": 0.447, "step": 13104 }, { "epoch": 0.707193351680967, "grad_norm": 1.038826650481438, "learning_rate": 2.7733768806612536e-06, "loss": 0.4092, "step": 13105 }, { "epoch": 0.7072473153094814, "grad_norm": 1.014656447513123, "learning_rate": 2.7727700169077885e-06, "loss": 0.4617, "step": 13106 }, { "epoch": 0.7073012789379958, "grad_norm": 0.938494170913895, "learning_rate": 2.7721632315376703e-06, "loss": 0.3603, "step": 13107 }, { "epoch": 0.7073552425665102, "grad_norm": 1.0402742957516007, "learning_rate": 2.7715565245683364e-06, "loss": 0.3637, "step": 13108 }, { "epoch": 0.7074092061950246, "grad_norm": 0.867719340477086, "learning_rate": 2.7709498960172254e-06, "loss": 0.4548, "step": 13109 }, { "epoch": 0.707463169823539, "grad_norm": 1.132188636364043, "learning_rate": 2.7703433459017703e-06, "loss": 0.624, "step": 13110 }, { "epoch": 0.7075171334520534, "grad_norm": 1.0064525796031931, "learning_rate": 2.769736874239407e-06, "loss": 0.4635, "step": 13111 }, { "epoch": 0.7075710970805676, "grad_norm": 1.290136001042839, "learning_rate": 2.7691304810475662e-06, "loss": 0.6072, "step": 13112 }, { "epoch": 0.707625060709082, "grad_norm": 1.1306427880447638, "learning_rate": 2.7685241663436722e-06, "loss": 0.5081, "step": 13113 }, { "epoch": 0.7076790243375964, "grad_norm": 1.0252672085650592, "learning_rate": 2.767917930145154e-06, "loss": 0.5017, "step": 13114 }, { "epoch": 0.7077329879661108, "grad_norm": 0.9652394481129307, "learning_rate": 2.767311772469436e-06, "loss": 0.3537, "step": 13115 }, { "epoch": 0.7077869515946252, "grad_norm": 1.0607820044583118, "learning_rate": 2.7667056933339376e-06, "loss": 0.4746, "step": 13116 }, { "epoch": 0.7078409152231396, "grad_norm": 1.0565518759386583, "learning_rate": 2.76609969275608e-06, "loss": 0.5209, "step": 13117 }, { "epoch": 0.707894878851654, "grad_norm": 1.0857899010834557, "learning_rate": 2.7654937707532796e-06, "loss": 0.4381, "step": 13118 }, { "epoch": 0.7079488424801683, "grad_norm": 1.0950508502069594, "learning_rate": 2.764887927342953e-06, "loss": 0.5571, "step": 13119 }, { "epoch": 0.7080028061086827, "grad_norm": 1.1787340086910514, "learning_rate": 2.7642821625425088e-06, "loss": 0.4198, "step": 13120 }, { "epoch": 0.7080567697371971, "grad_norm": 0.9519026302125825, "learning_rate": 2.7636764763693603e-06, "loss": 0.3771, "step": 13121 }, { "epoch": 0.7081107333657115, "grad_norm": 1.008771328759152, "learning_rate": 2.763070868840914e-06, "loss": 0.469, "step": 13122 }, { "epoch": 0.7081646969942259, "grad_norm": 1.0920876976572427, "learning_rate": 2.762465339974576e-06, "loss": 0.4483, "step": 13123 }, { "epoch": 0.7082186606227403, "grad_norm": 0.9315602994409745, "learning_rate": 2.7618598897877515e-06, "loss": 0.4799, "step": 13124 }, { "epoch": 0.7082726242512547, "grad_norm": 1.1172879915176048, "learning_rate": 2.76125451829784e-06, "loss": 0.617, "step": 13125 }, { "epoch": 0.708326587879769, "grad_norm": 1.228137830106851, "learning_rate": 2.7606492255222416e-06, "loss": 0.5113, "step": 13126 }, { "epoch": 0.7083805515082834, "grad_norm": 1.0466773014911352, "learning_rate": 2.760044011478352e-06, "loss": 0.5105, "step": 13127 }, { "epoch": 0.7084345151367978, "grad_norm": 0.9213181581164044, "learning_rate": 2.7594388761835654e-06, "loss": 0.396, "step": 13128 }, { "epoch": 0.7084884787653122, "grad_norm": 0.9402498865698745, "learning_rate": 2.7588338196552746e-06, "loss": 0.3724, "step": 13129 }, { "epoch": 0.7085424423938266, "grad_norm": 0.948868159528535, "learning_rate": 2.75822884191087e-06, "loss": 0.5209, "step": 13130 }, { "epoch": 0.708596406022341, "grad_norm": 1.1827079020074853, "learning_rate": 2.757623942967739e-06, "loss": 0.5946, "step": 13131 }, { "epoch": 0.7086503696508554, "grad_norm": 0.9596532693623243, "learning_rate": 2.757019122843266e-06, "loss": 0.3987, "step": 13132 }, { "epoch": 0.7087043332793697, "grad_norm": 1.0506910640144465, "learning_rate": 2.756414381554837e-06, "loss": 0.5308, "step": 13133 }, { "epoch": 0.7087582969078841, "grad_norm": 0.8941855824360141, "learning_rate": 2.755809719119829e-06, "loss": 0.4266, "step": 13134 }, { "epoch": 0.7088122605363985, "grad_norm": 0.908570304879144, "learning_rate": 2.755205135555623e-06, "loss": 0.4105, "step": 13135 }, { "epoch": 0.7088662241649129, "grad_norm": 1.1165462413666525, "learning_rate": 2.754600630879595e-06, "loss": 0.4527, "step": 13136 }, { "epoch": 0.7089201877934272, "grad_norm": 0.9972159802373246, "learning_rate": 2.7539962051091185e-06, "loss": 0.4058, "step": 13137 }, { "epoch": 0.7089741514219416, "grad_norm": 0.9823415793683125, "learning_rate": 2.7533918582615656e-06, "loss": 0.4663, "step": 13138 }, { "epoch": 0.709028115050456, "grad_norm": 0.9226849699131008, "learning_rate": 2.7527875903543056e-06, "loss": 0.4869, "step": 13139 }, { "epoch": 0.7090820786789703, "grad_norm": 0.8184316444486526, "learning_rate": 2.7521834014047084e-06, "loss": 0.3948, "step": 13140 }, { "epoch": 0.7091360423074847, "grad_norm": 0.939832697571284, "learning_rate": 2.7515792914301348e-06, "loss": 0.4826, "step": 13141 }, { "epoch": 0.7091900059359991, "grad_norm": 0.8606581684069843, "learning_rate": 2.7509752604479488e-06, "loss": 0.4273, "step": 13142 }, { "epoch": 0.7092439695645135, "grad_norm": 0.9211513463204489, "learning_rate": 2.750371308475513e-06, "loss": 0.3476, "step": 13143 }, { "epoch": 0.7092979331930279, "grad_norm": 1.4164502755600485, "learning_rate": 2.749767435530183e-06, "loss": 0.4202, "step": 13144 }, { "epoch": 0.7093518968215423, "grad_norm": 1.0565958109696345, "learning_rate": 2.7491636416293167e-06, "loss": 0.4839, "step": 13145 }, { "epoch": 0.7094058604500567, "grad_norm": 0.8483346139147242, "learning_rate": 2.748559926790265e-06, "loss": 0.3286, "step": 13146 }, { "epoch": 0.709459824078571, "grad_norm": 1.3584298363804665, "learning_rate": 2.7479562910303824e-06, "loss": 0.659, "step": 13147 }, { "epoch": 0.7095137877070854, "grad_norm": 1.2129510573535576, "learning_rate": 2.7473527343670177e-06, "loss": 0.5291, "step": 13148 }, { "epoch": 0.7095677513355998, "grad_norm": 1.082046236715628, "learning_rate": 2.7467492568175154e-06, "loss": 0.4789, "step": 13149 }, { "epoch": 0.7096217149641142, "grad_norm": 0.9614485312838701, "learning_rate": 2.746145858399222e-06, "loss": 0.3866, "step": 13150 }, { "epoch": 0.7096756785926286, "grad_norm": 0.7744276445603744, "learning_rate": 2.7455425391294788e-06, "loss": 0.314, "step": 13151 }, { "epoch": 0.709729642221143, "grad_norm": 0.9736422069050968, "learning_rate": 2.7449392990256263e-06, "loss": 0.4035, "step": 13152 }, { "epoch": 0.7097836058496574, "grad_norm": 1.2184246301286172, "learning_rate": 2.744336138105002e-06, "loss": 0.5237, "step": 13153 }, { "epoch": 0.7098375694781717, "grad_norm": 1.033028820375112, "learning_rate": 2.743733056384944e-06, "loss": 0.4431, "step": 13154 }, { "epoch": 0.7098915331066861, "grad_norm": 1.1337068116553453, "learning_rate": 2.7431300538827804e-06, "loss": 0.5004, "step": 13155 }, { "epoch": 0.7099454967352005, "grad_norm": 0.9275733233124511, "learning_rate": 2.742527130615845e-06, "loss": 0.5036, "step": 13156 }, { "epoch": 0.7099994603637149, "grad_norm": 1.1560844004136976, "learning_rate": 2.741924286601465e-06, "loss": 0.6425, "step": 13157 }, { "epoch": 0.7100534239922293, "grad_norm": 0.9567361019887721, "learning_rate": 2.74132152185697e-06, "loss": 0.4875, "step": 13158 }, { "epoch": 0.7101073876207437, "grad_norm": 0.8216279308959288, "learning_rate": 2.74071883639968e-06, "loss": 0.3134, "step": 13159 }, { "epoch": 0.710161351249258, "grad_norm": 0.9452424854796572, "learning_rate": 2.74011623024692e-06, "loss": 0.4339, "step": 13160 }, { "epoch": 0.7102153148777723, "grad_norm": 0.9288022523712728, "learning_rate": 2.7395137034160093e-06, "loss": 0.5035, "step": 13161 }, { "epoch": 0.7102692785062867, "grad_norm": 1.045709780918447, "learning_rate": 2.738911255924262e-06, "loss": 0.4372, "step": 13162 }, { "epoch": 0.7103232421348011, "grad_norm": 1.06511866770516, "learning_rate": 2.7383088877889958e-06, "loss": 0.53, "step": 13163 }, { "epoch": 0.7103772057633155, "grad_norm": 1.0216732842678784, "learning_rate": 2.737706599027522e-06, "loss": 0.4532, "step": 13164 }, { "epoch": 0.7104311693918299, "grad_norm": 1.0236943747870901, "learning_rate": 2.7371043896571516e-06, "loss": 0.4097, "step": 13165 }, { "epoch": 0.7104851330203443, "grad_norm": 0.9954614095793402, "learning_rate": 2.7365022596951936e-06, "loss": 0.4992, "step": 13166 }, { "epoch": 0.7105390966488586, "grad_norm": 0.8734019779904878, "learning_rate": 2.7359002091589535e-06, "loss": 0.3375, "step": 13167 }, { "epoch": 0.710593060277373, "grad_norm": 0.9031312112383583, "learning_rate": 2.735298238065733e-06, "loss": 0.4714, "step": 13168 }, { "epoch": 0.7106470239058874, "grad_norm": 1.0508096502280095, "learning_rate": 2.734696346432836e-06, "loss": 0.5285, "step": 13169 }, { "epoch": 0.7107009875344018, "grad_norm": 0.8967519229845292, "learning_rate": 2.734094534277559e-06, "loss": 0.3191, "step": 13170 }, { "epoch": 0.7107549511629162, "grad_norm": 1.1246549861554855, "learning_rate": 2.7334928016171997e-06, "loss": 0.5259, "step": 13171 }, { "epoch": 0.7108089147914306, "grad_norm": 1.2063027011437977, "learning_rate": 2.7328911484690527e-06, "loss": 0.5941, "step": 13172 }, { "epoch": 0.710862878419945, "grad_norm": 1.1729411236156362, "learning_rate": 2.73228957485041e-06, "loss": 0.5991, "step": 13173 }, { "epoch": 0.7109168420484593, "grad_norm": 0.9226914194662332, "learning_rate": 2.7316880807785635e-06, "loss": 0.3421, "step": 13174 }, { "epoch": 0.7109708056769737, "grad_norm": 1.1024220332327437, "learning_rate": 2.731086666270797e-06, "loss": 0.5276, "step": 13175 }, { "epoch": 0.7110247693054881, "grad_norm": 1.024129773770123, "learning_rate": 2.730485331344397e-06, "loss": 0.3344, "step": 13176 }, { "epoch": 0.7110787329340025, "grad_norm": 1.4343394273855226, "learning_rate": 2.7298840760166473e-06, "loss": 0.5688, "step": 13177 }, { "epoch": 0.7111326965625169, "grad_norm": 1.10963471045379, "learning_rate": 2.729282900304828e-06, "loss": 0.6018, "step": 13178 }, { "epoch": 0.7111866601910313, "grad_norm": 1.2385169566013434, "learning_rate": 2.7286818042262187e-06, "loss": 0.5205, "step": 13179 }, { "epoch": 0.7112406238195457, "grad_norm": 0.8931774110757966, "learning_rate": 2.7280807877980936e-06, "loss": 0.4131, "step": 13180 }, { "epoch": 0.71129458744806, "grad_norm": 0.9778783547179932, "learning_rate": 2.72747985103773e-06, "loss": 0.3661, "step": 13181 }, { "epoch": 0.7113485510765744, "grad_norm": 0.9010223076726723, "learning_rate": 2.726878993962394e-06, "loss": 0.4383, "step": 13182 }, { "epoch": 0.7114025147050888, "grad_norm": 0.8810799059699322, "learning_rate": 2.7262782165893586e-06, "loss": 0.3539, "step": 13183 }, { "epoch": 0.7114564783336031, "grad_norm": 0.9534954373416316, "learning_rate": 2.7256775189358897e-06, "loss": 0.3971, "step": 13184 }, { "epoch": 0.7115104419621175, "grad_norm": 1.0812430628566978, "learning_rate": 2.7250769010192526e-06, "loss": 0.4879, "step": 13185 }, { "epoch": 0.7115644055906319, "grad_norm": 1.1976230073098244, "learning_rate": 2.7244763628567085e-06, "loss": 0.5887, "step": 13186 }, { "epoch": 0.7116183692191463, "grad_norm": 1.2195072289533524, "learning_rate": 2.723875904465519e-06, "loss": 0.555, "step": 13187 }, { "epoch": 0.7116723328476606, "grad_norm": 0.8142273372093012, "learning_rate": 2.723275525862942e-06, "loss": 0.3068, "step": 13188 }, { "epoch": 0.711726296476175, "grad_norm": 1.021227290198614, "learning_rate": 2.7226752270662306e-06, "loss": 0.5165, "step": 13189 }, { "epoch": 0.7117802601046894, "grad_norm": 1.1626099098948026, "learning_rate": 2.7220750080926393e-06, "loss": 0.5572, "step": 13190 }, { "epoch": 0.7118342237332038, "grad_norm": 1.2086179796841363, "learning_rate": 2.7214748689594194e-06, "loss": 0.5259, "step": 13191 }, { "epoch": 0.7118881873617182, "grad_norm": 1.0826830374526055, "learning_rate": 2.7208748096838195e-06, "loss": 0.3927, "step": 13192 }, { "epoch": 0.7119421509902326, "grad_norm": 0.8508746325058004, "learning_rate": 2.720274830283085e-06, "loss": 0.372, "step": 13193 }, { "epoch": 0.711996114618747, "grad_norm": 1.1286694542653122, "learning_rate": 2.7196749307744607e-06, "loss": 0.4576, "step": 13194 }, { "epoch": 0.7120500782472613, "grad_norm": 0.8238344051080262, "learning_rate": 2.719075111175189e-06, "loss": 0.3493, "step": 13195 }, { "epoch": 0.7121040418757757, "grad_norm": 0.8489989396531308, "learning_rate": 2.7184753715025082e-06, "loss": 0.4733, "step": 13196 }, { "epoch": 0.7121580055042901, "grad_norm": 1.2966222862804193, "learning_rate": 2.7178757117736553e-06, "loss": 0.5777, "step": 13197 }, { "epoch": 0.7122119691328045, "grad_norm": 0.9836424656808218, "learning_rate": 2.717276132005866e-06, "loss": 0.5234, "step": 13198 }, { "epoch": 0.7122659327613189, "grad_norm": 1.217110315896518, "learning_rate": 2.7166766322163717e-06, "loss": 0.4991, "step": 13199 }, { "epoch": 0.7123198963898333, "grad_norm": 0.9566030069736929, "learning_rate": 2.7160772124224044e-06, "loss": 0.3986, "step": 13200 }, { "epoch": 0.7123738600183477, "grad_norm": 0.8557981701131225, "learning_rate": 2.715477872641192e-06, "loss": 0.3708, "step": 13201 }, { "epoch": 0.712427823646862, "grad_norm": 0.749710224553375, "learning_rate": 2.714878612889957e-06, "loss": 0.3783, "step": 13202 }, { "epoch": 0.7124817872753764, "grad_norm": 0.9413269959907998, "learning_rate": 2.7142794331859257e-06, "loss": 0.4261, "step": 13203 }, { "epoch": 0.7125357509038908, "grad_norm": 1.0033722914318448, "learning_rate": 2.713680333546318e-06, "loss": 0.4768, "step": 13204 }, { "epoch": 0.7125897145324052, "grad_norm": 0.7632303804658391, "learning_rate": 2.7130813139883526e-06, "loss": 0.3219, "step": 13205 }, { "epoch": 0.7126436781609196, "grad_norm": 1.1844773485858, "learning_rate": 2.712482374529247e-06, "loss": 0.4821, "step": 13206 }, { "epoch": 0.712697641789434, "grad_norm": 0.833220695642105, "learning_rate": 2.7118835151862137e-06, "loss": 0.3437, "step": 13207 }, { "epoch": 0.7127516054179484, "grad_norm": 1.0379164798607257, "learning_rate": 2.711284735976465e-06, "loss": 0.6064, "step": 13208 }, { "epoch": 0.7128055690464626, "grad_norm": 1.055102803815856, "learning_rate": 2.7106860369172133e-06, "loss": 0.4022, "step": 13209 }, { "epoch": 0.712859532674977, "grad_norm": 0.7933042505184966, "learning_rate": 2.710087418025662e-06, "loss": 0.3497, "step": 13210 }, { "epoch": 0.7129134963034914, "grad_norm": 1.4548406895902832, "learning_rate": 2.709488879319016e-06, "loss": 0.6629, "step": 13211 }, { "epoch": 0.7129674599320058, "grad_norm": 1.0054688760578265, "learning_rate": 2.7088904208144793e-06, "loss": 0.5057, "step": 13212 }, { "epoch": 0.7130214235605202, "grad_norm": 1.2040285557074921, "learning_rate": 2.7082920425292534e-06, "loss": 0.4262, "step": 13213 }, { "epoch": 0.7130753871890346, "grad_norm": 1.2466780856408375, "learning_rate": 2.707693744480534e-06, "loss": 0.3894, "step": 13214 }, { "epoch": 0.713129350817549, "grad_norm": 0.8416414274029633, "learning_rate": 2.707095526685518e-06, "loss": 0.3926, "step": 13215 }, { "epoch": 0.7131833144460633, "grad_norm": 1.0381739717997371, "learning_rate": 2.7064973891614004e-06, "loss": 0.4493, "step": 13216 }, { "epoch": 0.7132372780745777, "grad_norm": 0.7904088485494563, "learning_rate": 2.705899331925369e-06, "loss": 0.3001, "step": 13217 }, { "epoch": 0.7132912417030921, "grad_norm": 1.0782919848152703, "learning_rate": 2.705301354994614e-06, "loss": 0.5077, "step": 13218 }, { "epoch": 0.7133452053316065, "grad_norm": 0.8766498142978533, "learning_rate": 2.7047034583863217e-06, "loss": 0.4246, "step": 13219 }, { "epoch": 0.7133991689601209, "grad_norm": 1.0510952382999337, "learning_rate": 2.704105642117677e-06, "loss": 0.5168, "step": 13220 }, { "epoch": 0.7134531325886353, "grad_norm": 0.9763845969440835, "learning_rate": 2.70350790620586e-06, "loss": 0.4663, "step": 13221 }, { "epoch": 0.7135070962171497, "grad_norm": 1.0595809261484235, "learning_rate": 2.702910250668052e-06, "loss": 0.6219, "step": 13222 }, { "epoch": 0.713561059845664, "grad_norm": 1.282392960561827, "learning_rate": 2.7023126755214312e-06, "loss": 0.5643, "step": 13223 }, { "epoch": 0.7136150234741784, "grad_norm": 1.0074172142399567, "learning_rate": 2.70171518078317e-06, "loss": 0.5517, "step": 13224 }, { "epoch": 0.7136689871026928, "grad_norm": 0.8116301615913486, "learning_rate": 2.7011177664704414e-06, "loss": 0.3993, "step": 13225 }, { "epoch": 0.7137229507312072, "grad_norm": 1.019622224102281, "learning_rate": 2.7005204326004166e-06, "loss": 0.6558, "step": 13226 }, { "epoch": 0.7137769143597216, "grad_norm": 0.9400987742206453, "learning_rate": 2.6999231791902623e-06, "loss": 0.4032, "step": 13227 }, { "epoch": 0.713830877988236, "grad_norm": 0.945807513515949, "learning_rate": 2.699326006257146e-06, "loss": 0.4073, "step": 13228 }, { "epoch": 0.7138848416167504, "grad_norm": 0.8547156582551306, "learning_rate": 2.69872891381823e-06, "loss": 0.3704, "step": 13229 }, { "epoch": 0.7139388052452647, "grad_norm": 1.0035580184531196, "learning_rate": 2.6981319018906772e-06, "loss": 0.5091, "step": 13230 }, { "epoch": 0.713992768873779, "grad_norm": 1.0324455148475713, "learning_rate": 2.697534970491642e-06, "loss": 0.5444, "step": 13231 }, { "epoch": 0.7140467325022934, "grad_norm": 1.2333248628014588, "learning_rate": 2.696938119638285e-06, "loss": 0.5209, "step": 13232 }, { "epoch": 0.7141006961308078, "grad_norm": 0.9664834641863045, "learning_rate": 2.6963413493477573e-06, "loss": 0.4183, "step": 13233 }, { "epoch": 0.7141546597593222, "grad_norm": 1.0841694859852071, "learning_rate": 2.695744659637213e-06, "loss": 0.4471, "step": 13234 }, { "epoch": 0.7142086233878366, "grad_norm": 0.9644334911967835, "learning_rate": 2.6951480505238004e-06, "loss": 0.4466, "step": 13235 }, { "epoch": 0.7142625870163509, "grad_norm": 0.8398925691314961, "learning_rate": 2.6945515220246687e-06, "loss": 0.3561, "step": 13236 }, { "epoch": 0.7143165506448653, "grad_norm": 1.0424603529217031, "learning_rate": 2.6939550741569593e-06, "loss": 0.4136, "step": 13237 }, { "epoch": 0.7143705142733797, "grad_norm": 0.9987975578461854, "learning_rate": 2.6933587069378165e-06, "loss": 0.5871, "step": 13238 }, { "epoch": 0.7144244779018941, "grad_norm": 0.7938424329959459, "learning_rate": 2.6927624203843792e-06, "loss": 0.3353, "step": 13239 }, { "epoch": 0.7144784415304085, "grad_norm": 0.8787651742520503, "learning_rate": 2.6921662145137885e-06, "loss": 0.4455, "step": 13240 }, { "epoch": 0.7145324051589229, "grad_norm": 1.2430738006924706, "learning_rate": 2.6915700893431763e-06, "loss": 0.6142, "step": 13241 }, { "epoch": 0.7145863687874373, "grad_norm": 1.092518004786396, "learning_rate": 2.690974044889678e-06, "loss": 0.6072, "step": 13242 }, { "epoch": 0.7146403324159516, "grad_norm": 1.1952128044946557, "learning_rate": 2.690378081170424e-06, "loss": 0.5104, "step": 13243 }, { "epoch": 0.714694296044466, "grad_norm": 0.7334551820438912, "learning_rate": 2.689782198202543e-06, "loss": 0.2768, "step": 13244 }, { "epoch": 0.7147482596729804, "grad_norm": 1.119554756953071, "learning_rate": 2.6891863960031626e-06, "loss": 0.5603, "step": 13245 }, { "epoch": 0.7148022233014948, "grad_norm": 0.9232546776617528, "learning_rate": 2.688590674589403e-06, "loss": 0.3676, "step": 13246 }, { "epoch": 0.7148561869300092, "grad_norm": 0.9233913418182438, "learning_rate": 2.6879950339783887e-06, "loss": 0.3612, "step": 13247 }, { "epoch": 0.7149101505585236, "grad_norm": 1.042664294696345, "learning_rate": 2.687399474187239e-06, "loss": 0.4857, "step": 13248 }, { "epoch": 0.714964114187038, "grad_norm": 0.9424720040863117, "learning_rate": 2.686803995233069e-06, "loss": 0.4724, "step": 13249 }, { "epoch": 0.7150180778155523, "grad_norm": 1.062157845113935, "learning_rate": 2.6862085971329964e-06, "loss": 0.4559, "step": 13250 }, { "epoch": 0.7150720414440667, "grad_norm": 1.0760827156999582, "learning_rate": 2.6856132799041308e-06, "loss": 0.4295, "step": 13251 }, { "epoch": 0.7151260050725811, "grad_norm": 1.0173462309012151, "learning_rate": 2.6850180435635836e-06, "loss": 0.4043, "step": 13252 }, { "epoch": 0.7151799687010955, "grad_norm": 0.9570669349832445, "learning_rate": 2.6844228881284615e-06, "loss": 0.4028, "step": 13253 }, { "epoch": 0.7152339323296099, "grad_norm": 0.93167713720306, "learning_rate": 2.6838278136158713e-06, "loss": 0.5179, "step": 13254 }, { "epoch": 0.7152878959581243, "grad_norm": 0.9230214225488681, "learning_rate": 2.683232820042916e-06, "loss": 0.4395, "step": 13255 }, { "epoch": 0.7153418595866387, "grad_norm": 0.9740229453066266, "learning_rate": 2.682637907426694e-06, "loss": 0.4564, "step": 13256 }, { "epoch": 0.7153958232151529, "grad_norm": 0.888360043144517, "learning_rate": 2.6820430757843076e-06, "loss": 0.375, "step": 13257 }, { "epoch": 0.7154497868436673, "grad_norm": 0.9987812677875207, "learning_rate": 2.6814483251328488e-06, "loss": 0.4693, "step": 13258 }, { "epoch": 0.7155037504721817, "grad_norm": 0.9583611942579091, "learning_rate": 2.680853655489413e-06, "loss": 0.372, "step": 13259 }, { "epoch": 0.7155577141006961, "grad_norm": 1.1269935631987493, "learning_rate": 2.6802590668710916e-06, "loss": 0.5154, "step": 13260 }, { "epoch": 0.7156116777292105, "grad_norm": 1.088998456207822, "learning_rate": 2.6796645592949744e-06, "loss": 0.3569, "step": 13261 }, { "epoch": 0.7156656413577249, "grad_norm": 0.9945935809960993, "learning_rate": 2.6790701327781467e-06, "loss": 0.4329, "step": 13262 }, { "epoch": 0.7157196049862393, "grad_norm": 1.1989325584577946, "learning_rate": 2.6784757873376945e-06, "loss": 0.5132, "step": 13263 }, { "epoch": 0.7157735686147536, "grad_norm": 1.0428823232448752, "learning_rate": 2.6778815229906994e-06, "loss": 0.4258, "step": 13264 }, { "epoch": 0.715827532243268, "grad_norm": 0.9354301866831964, "learning_rate": 2.677287339754241e-06, "loss": 0.3328, "step": 13265 }, { "epoch": 0.7158814958717824, "grad_norm": 0.9172893725100263, "learning_rate": 2.676693237645395e-06, "loss": 0.402, "step": 13266 }, { "epoch": 0.7159354595002968, "grad_norm": 0.983889994770096, "learning_rate": 2.6760992166812396e-06, "loss": 0.4646, "step": 13267 }, { "epoch": 0.7159894231288112, "grad_norm": 1.1708155379853766, "learning_rate": 2.6755052768788447e-06, "loss": 0.5944, "step": 13268 }, { "epoch": 0.7160433867573256, "grad_norm": 0.8861047897532095, "learning_rate": 2.6749114182552827e-06, "loss": 0.4037, "step": 13269 }, { "epoch": 0.71609735038584, "grad_norm": 1.1690298429364787, "learning_rate": 2.6743176408276215e-06, "loss": 0.5783, "step": 13270 }, { "epoch": 0.7161513140143543, "grad_norm": 1.2228243930557834, "learning_rate": 2.673723944612927e-06, "loss": 0.5624, "step": 13271 }, { "epoch": 0.7162052776428687, "grad_norm": 0.8621574509821913, "learning_rate": 2.6731303296282614e-06, "loss": 0.355, "step": 13272 }, { "epoch": 0.7162592412713831, "grad_norm": 0.9567079843700925, "learning_rate": 2.672536795890687e-06, "loss": 0.5317, "step": 13273 }, { "epoch": 0.7163132048998975, "grad_norm": 0.9649198063741043, "learning_rate": 2.6719433434172603e-06, "loss": 0.4161, "step": 13274 }, { "epoch": 0.7163671685284119, "grad_norm": 1.0886696745002253, "learning_rate": 2.6713499722250403e-06, "loss": 0.3889, "step": 13275 }, { "epoch": 0.7164211321569263, "grad_norm": 1.113282136502745, "learning_rate": 2.6707566823310803e-06, "loss": 0.4996, "step": 13276 }, { "epoch": 0.7164750957854407, "grad_norm": 1.1070749729080978, "learning_rate": 2.6701634737524323e-06, "loss": 0.3575, "step": 13277 }, { "epoch": 0.716529059413955, "grad_norm": 1.3873544784332612, "learning_rate": 2.6695703465061463e-06, "loss": 0.5762, "step": 13278 }, { "epoch": 0.7165830230424693, "grad_norm": 1.1576047664751277, "learning_rate": 2.668977300609268e-06, "loss": 0.3888, "step": 13279 }, { "epoch": 0.7166369866709837, "grad_norm": 0.9660189638618495, "learning_rate": 2.6683843360788415e-06, "loss": 0.3229, "step": 13280 }, { "epoch": 0.7166909502994981, "grad_norm": 0.9700735634443828, "learning_rate": 2.6677914529319103e-06, "loss": 0.4643, "step": 13281 }, { "epoch": 0.7167449139280125, "grad_norm": 1.115046992482619, "learning_rate": 2.6671986511855154e-06, "loss": 0.4689, "step": 13282 }, { "epoch": 0.7167988775565269, "grad_norm": 1.0349221073257826, "learning_rate": 2.6666059308566926e-06, "loss": 0.4359, "step": 13283 }, { "epoch": 0.7168528411850413, "grad_norm": 0.8536206114152576, "learning_rate": 2.6660132919624793e-06, "loss": 0.4098, "step": 13284 }, { "epoch": 0.7169068048135556, "grad_norm": 1.0386831621262411, "learning_rate": 2.6654207345199084e-06, "loss": 0.4638, "step": 13285 }, { "epoch": 0.71696076844207, "grad_norm": 1.027545738874129, "learning_rate": 2.6648282585460086e-06, "loss": 0.4656, "step": 13286 }, { "epoch": 0.7170147320705844, "grad_norm": 0.9790351929886741, "learning_rate": 2.6642358640578086e-06, "loss": 0.3951, "step": 13287 }, { "epoch": 0.7170686956990988, "grad_norm": 1.4776573244707627, "learning_rate": 2.6636435510723357e-06, "loss": 0.5339, "step": 13288 }, { "epoch": 0.7171226593276132, "grad_norm": 1.262035834028656, "learning_rate": 2.6630513196066133e-06, "loss": 0.6814, "step": 13289 }, { "epoch": 0.7171766229561276, "grad_norm": 1.1380460741563556, "learning_rate": 2.6624591696776623e-06, "loss": 0.5252, "step": 13290 }, { "epoch": 0.717230586584642, "grad_norm": 0.9421209954909444, "learning_rate": 2.661867101302502e-06, "loss": 0.3523, "step": 13291 }, { "epoch": 0.7172845502131563, "grad_norm": 0.7931646988618413, "learning_rate": 2.6612751144981502e-06, "loss": 0.3496, "step": 13292 }, { "epoch": 0.7173385138416707, "grad_norm": 1.0116450424103758, "learning_rate": 2.6606832092816183e-06, "loss": 0.5476, "step": 13293 }, { "epoch": 0.7173924774701851, "grad_norm": 1.1694923329584692, "learning_rate": 2.6600913856699206e-06, "loss": 0.5143, "step": 13294 }, { "epoch": 0.7174464410986995, "grad_norm": 1.0439990577497027, "learning_rate": 2.659499643680065e-06, "loss": 0.503, "step": 13295 }, { "epoch": 0.7175004047272139, "grad_norm": 1.1491498238907525, "learning_rate": 2.6589079833290608e-06, "loss": 0.5123, "step": 13296 }, { "epoch": 0.7175543683557283, "grad_norm": 0.8412888951603144, "learning_rate": 2.6583164046339112e-06, "loss": 0.3628, "step": 13297 }, { "epoch": 0.7176083319842427, "grad_norm": 1.046209302027235, "learning_rate": 2.657724907611619e-06, "loss": 0.4588, "step": 13298 }, { "epoch": 0.717662295612757, "grad_norm": 1.0942489871402077, "learning_rate": 2.6571334922791858e-06, "loss": 0.4354, "step": 13299 }, { "epoch": 0.7177162592412714, "grad_norm": 1.1244782105080735, "learning_rate": 2.6565421586536085e-06, "loss": 0.4198, "step": 13300 }, { "epoch": 0.7177702228697858, "grad_norm": 0.6990059782411363, "learning_rate": 2.655950906751882e-06, "loss": 0.2644, "step": 13301 }, { "epoch": 0.7178241864983002, "grad_norm": 1.2232957091094334, "learning_rate": 2.6553597365910004e-06, "loss": 0.4704, "step": 13302 }, { "epoch": 0.7178781501268146, "grad_norm": 1.0587331184659299, "learning_rate": 2.6547686481879542e-06, "loss": 0.4956, "step": 13303 }, { "epoch": 0.717932113755329, "grad_norm": 0.9264798800524818, "learning_rate": 2.654177641559732e-06, "loss": 0.4328, "step": 13304 }, { "epoch": 0.7179860773838432, "grad_norm": 0.9983079620042615, "learning_rate": 2.653586716723322e-06, "loss": 0.5073, "step": 13305 }, { "epoch": 0.7180400410123576, "grad_norm": 0.9712213318601072, "learning_rate": 2.652995873695704e-06, "loss": 0.5775, "step": 13306 }, { "epoch": 0.718094004640872, "grad_norm": 0.9722447385673195, "learning_rate": 2.652405112493861e-06, "loss": 0.4388, "step": 13307 }, { "epoch": 0.7181479682693864, "grad_norm": 1.046009711966584, "learning_rate": 2.651814433134773e-06, "loss": 0.5073, "step": 13308 }, { "epoch": 0.7182019318979008, "grad_norm": 0.6788485216298138, "learning_rate": 2.6512238356354148e-06, "loss": 0.2764, "step": 13309 }, { "epoch": 0.7182558955264152, "grad_norm": 1.1038209080376011, "learning_rate": 2.6506333200127633e-06, "loss": 0.579, "step": 13310 }, { "epoch": 0.7183098591549296, "grad_norm": 1.0355262471832192, "learning_rate": 2.6500428862837878e-06, "loss": 0.4646, "step": 13311 }, { "epoch": 0.7183638227834439, "grad_norm": 1.2367885021703338, "learning_rate": 2.649452534465462e-06, "loss": 0.4145, "step": 13312 }, { "epoch": 0.7184177864119583, "grad_norm": 1.0099756250747256, "learning_rate": 2.648862264574749e-06, "loss": 0.4183, "step": 13313 }, { "epoch": 0.7184717500404727, "grad_norm": 0.9347681737726682, "learning_rate": 2.648272076628616e-06, "loss": 0.5466, "step": 13314 }, { "epoch": 0.7185257136689871, "grad_norm": 1.0799355554489087, "learning_rate": 2.6476819706440242e-06, "loss": 0.5158, "step": 13315 }, { "epoch": 0.7185796772975015, "grad_norm": 1.28006892222168, "learning_rate": 2.6470919466379346e-06, "loss": 0.6678, "step": 13316 }, { "epoch": 0.7186336409260159, "grad_norm": 0.9103106999199244, "learning_rate": 2.646502004627305e-06, "loss": 0.3715, "step": 13317 }, { "epoch": 0.7186876045545303, "grad_norm": 1.0809759931941167, "learning_rate": 2.6459121446290913e-06, "loss": 0.4253, "step": 13318 }, { "epoch": 0.7187415681830446, "grad_norm": 0.850990093440804, "learning_rate": 2.6453223666602486e-06, "loss": 0.3361, "step": 13319 }, { "epoch": 0.718795531811559, "grad_norm": 0.9022283777896292, "learning_rate": 2.644732670737723e-06, "loss": 0.4412, "step": 13320 }, { "epoch": 0.7188494954400734, "grad_norm": 1.0324457958351054, "learning_rate": 2.644143056878466e-06, "loss": 0.5075, "step": 13321 }, { "epoch": 0.7189034590685878, "grad_norm": 1.0622441168485444, "learning_rate": 2.6435535250994225e-06, "loss": 0.4004, "step": 13322 }, { "epoch": 0.7189574226971022, "grad_norm": 1.2345112902168511, "learning_rate": 2.642964075417538e-06, "loss": 0.6082, "step": 13323 }, { "epoch": 0.7190113863256166, "grad_norm": 0.9033369072544559, "learning_rate": 2.642374707849752e-06, "loss": 0.3467, "step": 13324 }, { "epoch": 0.719065349954131, "grad_norm": 1.110861907016974, "learning_rate": 2.6417854224130045e-06, "loss": 0.5541, "step": 13325 }, { "epoch": 0.7191193135826452, "grad_norm": 1.054662168254958, "learning_rate": 2.6411962191242336e-06, "loss": 0.4239, "step": 13326 }, { "epoch": 0.7191732772111596, "grad_norm": 0.9245187832613796, "learning_rate": 2.6406070980003705e-06, "loss": 0.3075, "step": 13327 }, { "epoch": 0.719227240839674, "grad_norm": 1.3757893422438396, "learning_rate": 2.640018059058348e-06, "loss": 0.6885, "step": 13328 }, { "epoch": 0.7192812044681884, "grad_norm": 1.059567460082278, "learning_rate": 2.6394291023150975e-06, "loss": 0.5588, "step": 13329 }, { "epoch": 0.7193351680967028, "grad_norm": 0.9095210469799303, "learning_rate": 2.638840227787544e-06, "loss": 0.332, "step": 13330 }, { "epoch": 0.7193891317252172, "grad_norm": 1.0251321780202733, "learning_rate": 2.6382514354926135e-06, "loss": 0.4278, "step": 13331 }, { "epoch": 0.7194430953537316, "grad_norm": 0.9043921216248175, "learning_rate": 2.6376627254472287e-06, "loss": 0.3895, "step": 13332 }, { "epoch": 0.7194970589822459, "grad_norm": 0.8462545838853975, "learning_rate": 2.6370740976683108e-06, "loss": 0.5038, "step": 13333 }, { "epoch": 0.7195510226107603, "grad_norm": 0.8728069422776251, "learning_rate": 2.6364855521727737e-06, "loss": 0.3258, "step": 13334 }, { "epoch": 0.7196049862392747, "grad_norm": 1.143134116069217, "learning_rate": 2.635897088977536e-06, "loss": 0.6689, "step": 13335 }, { "epoch": 0.7196589498677891, "grad_norm": 1.1262566446839823, "learning_rate": 2.63530870809951e-06, "loss": 0.494, "step": 13336 }, { "epoch": 0.7197129134963035, "grad_norm": 1.044147974697176, "learning_rate": 2.634720409555606e-06, "loss": 0.5335, "step": 13337 }, { "epoch": 0.7197668771248179, "grad_norm": 1.222035072313866, "learning_rate": 2.6341321933627327e-06, "loss": 0.6227, "step": 13338 }, { "epoch": 0.7198208407533323, "grad_norm": 1.0395755569582927, "learning_rate": 2.6335440595377957e-06, "loss": 0.5965, "step": 13339 }, { "epoch": 0.7198748043818466, "grad_norm": 0.8866248245066388, "learning_rate": 2.632956008097701e-06, "loss": 0.4333, "step": 13340 }, { "epoch": 0.719928768010361, "grad_norm": 1.1266294740935232, "learning_rate": 2.6323680390593454e-06, "loss": 0.4582, "step": 13341 }, { "epoch": 0.7199827316388754, "grad_norm": 1.0365631193763172, "learning_rate": 2.6317801524396304e-06, "loss": 0.4067, "step": 13342 }, { "epoch": 0.7200366952673898, "grad_norm": 1.2500611994750814, "learning_rate": 2.6311923482554513e-06, "loss": 0.5949, "step": 13343 }, { "epoch": 0.7200906588959042, "grad_norm": 1.1883191130209865, "learning_rate": 2.630604626523703e-06, "loss": 0.4652, "step": 13344 }, { "epoch": 0.7201446225244186, "grad_norm": 0.7908549345485597, "learning_rate": 2.6300169872612768e-06, "loss": 0.3869, "step": 13345 }, { "epoch": 0.720198586152933, "grad_norm": 0.8211297297374475, "learning_rate": 2.629429430485063e-06, "loss": 0.3035, "step": 13346 }, { "epoch": 0.7202525497814473, "grad_norm": 1.271109007667921, "learning_rate": 2.628841956211948e-06, "loss": 0.6146, "step": 13347 }, { "epoch": 0.7203065134099617, "grad_norm": 1.0856460892834938, "learning_rate": 2.628254564458815e-06, "loss": 0.4812, "step": 13348 }, { "epoch": 0.720360477038476, "grad_norm": 0.9051195223617481, "learning_rate": 2.6276672552425485e-06, "loss": 0.3836, "step": 13349 }, { "epoch": 0.7204144406669905, "grad_norm": 0.9196845608309803, "learning_rate": 2.6270800285800268e-06, "loss": 0.4353, "step": 13350 }, { "epoch": 0.7204684042955048, "grad_norm": 1.0186971290573599, "learning_rate": 2.62649288448813e-06, "loss": 0.4908, "step": 13351 }, { "epoch": 0.7205223679240192, "grad_norm": 1.0945365479637779, "learning_rate": 2.62590582298373e-06, "loss": 0.4769, "step": 13352 }, { "epoch": 0.7205763315525336, "grad_norm": 1.1863620372870605, "learning_rate": 2.6253188440837e-06, "loss": 0.5071, "step": 13353 }, { "epoch": 0.7206302951810479, "grad_norm": 1.0597391744809816, "learning_rate": 2.624731947804911e-06, "loss": 0.4467, "step": 13354 }, { "epoch": 0.7206842588095623, "grad_norm": 0.8131773599336587, "learning_rate": 2.6241451341642317e-06, "loss": 0.333, "step": 13355 }, { "epoch": 0.7207382224380767, "grad_norm": 1.1090944625871009, "learning_rate": 2.6235584031785268e-06, "loss": 0.5467, "step": 13356 }, { "epoch": 0.7207921860665911, "grad_norm": 1.0252940983541994, "learning_rate": 2.6229717548646592e-06, "loss": 0.4827, "step": 13357 }, { "epoch": 0.7208461496951055, "grad_norm": 0.7778224259606199, "learning_rate": 2.6223851892394914e-06, "loss": 0.3416, "step": 13358 }, { "epoch": 0.7209001133236199, "grad_norm": 1.3489244281892114, "learning_rate": 2.62179870631988e-06, "loss": 0.6663, "step": 13359 }, { "epoch": 0.7209540769521343, "grad_norm": 1.0365589194355815, "learning_rate": 2.6212123061226825e-06, "loss": 0.5552, "step": 13360 }, { "epoch": 0.7210080405806486, "grad_norm": 0.8988408443706264, "learning_rate": 2.6206259886647533e-06, "loss": 0.3817, "step": 13361 }, { "epoch": 0.721062004209163, "grad_norm": 1.067960249452255, "learning_rate": 2.6200397539629418e-06, "loss": 0.4182, "step": 13362 }, { "epoch": 0.7211159678376774, "grad_norm": 1.2370291401363136, "learning_rate": 2.619453602034097e-06, "loss": 0.7044, "step": 13363 }, { "epoch": 0.7211699314661918, "grad_norm": 1.025091635631566, "learning_rate": 2.6188675328950665e-06, "loss": 0.4706, "step": 13364 }, { "epoch": 0.7212238950947062, "grad_norm": 0.9636420401400061, "learning_rate": 2.6182815465626943e-06, "loss": 0.5533, "step": 13365 }, { "epoch": 0.7212778587232206, "grad_norm": 0.6893176321208639, "learning_rate": 2.6176956430538225e-06, "loss": 0.3112, "step": 13366 }, { "epoch": 0.721331822351735, "grad_norm": 1.0299483787681833, "learning_rate": 2.6171098223852896e-06, "loss": 0.4938, "step": 13367 }, { "epoch": 0.7213857859802493, "grad_norm": 0.9390610185474342, "learning_rate": 2.616524084573936e-06, "loss": 0.4909, "step": 13368 }, { "epoch": 0.7214397496087637, "grad_norm": 1.1209523498051224, "learning_rate": 2.6159384296365908e-06, "loss": 0.4912, "step": 13369 }, { "epoch": 0.7214937132372781, "grad_norm": 0.952356167557773, "learning_rate": 2.61535285759009e-06, "loss": 0.3578, "step": 13370 }, { "epoch": 0.7215476768657925, "grad_norm": 1.0612729591430898, "learning_rate": 2.6147673684512625e-06, "loss": 0.4151, "step": 13371 }, { "epoch": 0.7216016404943069, "grad_norm": 0.8696471916677225, "learning_rate": 2.614181962236937e-06, "loss": 0.3292, "step": 13372 }, { "epoch": 0.7216556041228213, "grad_norm": 0.9750245167594038, "learning_rate": 2.6135966389639368e-06, "loss": 0.4625, "step": 13373 }, { "epoch": 0.7217095677513355, "grad_norm": 1.4664200522554673, "learning_rate": 2.6130113986490876e-06, "loss": 0.5737, "step": 13374 }, { "epoch": 0.7217635313798499, "grad_norm": 1.0433685219271545, "learning_rate": 2.612426241309207e-06, "loss": 0.5445, "step": 13375 }, { "epoch": 0.7218174950083643, "grad_norm": 1.243240134938577, "learning_rate": 2.611841166961112e-06, "loss": 0.6417, "step": 13376 }, { "epoch": 0.7218714586368787, "grad_norm": 0.9297417461358365, "learning_rate": 2.611256175621622e-06, "loss": 0.4172, "step": 13377 }, { "epoch": 0.7219254222653931, "grad_norm": 0.9800100035186387, "learning_rate": 2.610671267307548e-06, "loss": 0.3942, "step": 13378 }, { "epoch": 0.7219793858939075, "grad_norm": 1.3858848249656541, "learning_rate": 2.6100864420357007e-06, "loss": 0.5304, "step": 13379 }, { "epoch": 0.7220333495224219, "grad_norm": 0.8999764526867546, "learning_rate": 2.6095016998228896e-06, "loss": 0.4209, "step": 13380 }, { "epoch": 0.7220873131509362, "grad_norm": 0.8346293751623973, "learning_rate": 2.6089170406859223e-06, "loss": 0.3763, "step": 13381 }, { "epoch": 0.7221412767794506, "grad_norm": 0.9039919118207718, "learning_rate": 2.6083324646415975e-06, "loss": 0.3612, "step": 13382 }, { "epoch": 0.722195240407965, "grad_norm": 1.1546840769463755, "learning_rate": 2.6077479717067208e-06, "loss": 0.5268, "step": 13383 }, { "epoch": 0.7222492040364794, "grad_norm": 1.077537886080401, "learning_rate": 2.6071635618980888e-06, "loss": 0.4941, "step": 13384 }, { "epoch": 0.7223031676649938, "grad_norm": 0.9066313291098543, "learning_rate": 2.606579235232499e-06, "loss": 0.476, "step": 13385 }, { "epoch": 0.7223571312935082, "grad_norm": 1.0968952459798218, "learning_rate": 2.605994991726747e-06, "loss": 0.3986, "step": 13386 }, { "epoch": 0.7224110949220226, "grad_norm": 1.2058814298839027, "learning_rate": 2.605410831397622e-06, "loss": 0.5272, "step": 13387 }, { "epoch": 0.7224650585505369, "grad_norm": 1.2445512966540968, "learning_rate": 2.604826754261916e-06, "loss": 0.4773, "step": 13388 }, { "epoch": 0.7225190221790513, "grad_norm": 1.2496243249704435, "learning_rate": 2.6042427603364128e-06, "loss": 0.544, "step": 13389 }, { "epoch": 0.7225729858075657, "grad_norm": 1.237062632110237, "learning_rate": 2.603658849637899e-06, "loss": 0.4594, "step": 13390 }, { "epoch": 0.7226269494360801, "grad_norm": 1.1233287926250792, "learning_rate": 2.603075022183157e-06, "loss": 0.5654, "step": 13391 }, { "epoch": 0.7226809130645945, "grad_norm": 1.1428239001013867, "learning_rate": 2.6024912779889644e-06, "loss": 0.3955, "step": 13392 }, { "epoch": 0.7227348766931089, "grad_norm": 1.0847401353029984, "learning_rate": 2.6019076170721016e-06, "loss": 0.3706, "step": 13393 }, { "epoch": 0.7227888403216233, "grad_norm": 1.007130351347291, "learning_rate": 2.601324039449342e-06, "loss": 0.4198, "step": 13394 }, { "epoch": 0.7228428039501376, "grad_norm": 1.0234229307057372, "learning_rate": 2.600740545137459e-06, "loss": 0.4032, "step": 13395 }, { "epoch": 0.722896767578652, "grad_norm": 1.1563523393555486, "learning_rate": 2.6001571341532217e-06, "loss": 0.4983, "step": 13396 }, { "epoch": 0.7229507312071664, "grad_norm": 0.864507303855469, "learning_rate": 2.599573806513398e-06, "loss": 0.3891, "step": 13397 }, { "epoch": 0.7230046948356808, "grad_norm": 0.9588198198261277, "learning_rate": 2.5989905622347538e-06, "loss": 0.4734, "step": 13398 }, { "epoch": 0.7230586584641951, "grad_norm": 0.8226653585812616, "learning_rate": 2.598407401334052e-06, "loss": 0.2411, "step": 13399 }, { "epoch": 0.7231126220927095, "grad_norm": 1.0402009078820305, "learning_rate": 2.5978243238280532e-06, "loss": 0.5403, "step": 13400 }, { "epoch": 0.7231665857212239, "grad_norm": 0.9485042295359996, "learning_rate": 2.5972413297335163e-06, "loss": 0.4309, "step": 13401 }, { "epoch": 0.7232205493497382, "grad_norm": 0.9447265521248864, "learning_rate": 2.596658419067196e-06, "loss": 0.3598, "step": 13402 }, { "epoch": 0.7232745129782526, "grad_norm": 1.181815959235528, "learning_rate": 2.5960755918458468e-06, "loss": 0.438, "step": 13403 }, { "epoch": 0.723328476606767, "grad_norm": 1.1464676224211592, "learning_rate": 2.5954928480862198e-06, "loss": 0.5186, "step": 13404 }, { "epoch": 0.7233824402352814, "grad_norm": 0.8878367893950712, "learning_rate": 2.594910187805062e-06, "loss": 0.3149, "step": 13405 }, { "epoch": 0.7234364038637958, "grad_norm": 1.0687067475905487, "learning_rate": 2.594327611019121e-06, "loss": 0.5901, "step": 13406 }, { "epoch": 0.7234903674923102, "grad_norm": 1.0076934363885313, "learning_rate": 2.593745117745141e-06, "loss": 0.4113, "step": 13407 }, { "epoch": 0.7235443311208246, "grad_norm": 0.8418182694276831, "learning_rate": 2.593162707999862e-06, "loss": 0.2749, "step": 13408 }, { "epoch": 0.7235982947493389, "grad_norm": 0.9521206149220434, "learning_rate": 2.5925803818000267e-06, "loss": 0.349, "step": 13409 }, { "epoch": 0.7236522583778533, "grad_norm": 1.0032223209487299, "learning_rate": 2.5919981391623665e-06, "loss": 0.4346, "step": 13410 }, { "epoch": 0.7237062220063677, "grad_norm": 1.2622900497425407, "learning_rate": 2.5914159801036186e-06, "loss": 0.5471, "step": 13411 }, { "epoch": 0.7237601856348821, "grad_norm": 1.0024412227741937, "learning_rate": 2.590833904640514e-06, "loss": 0.4045, "step": 13412 }, { "epoch": 0.7238141492633965, "grad_norm": 1.0685179347140787, "learning_rate": 2.590251912789783e-06, "loss": 0.6033, "step": 13413 }, { "epoch": 0.7238681128919109, "grad_norm": 0.9030563653256031, "learning_rate": 2.589670004568151e-06, "loss": 0.3373, "step": 13414 }, { "epoch": 0.7239220765204253, "grad_norm": 1.2139581660842473, "learning_rate": 2.589088179992345e-06, "loss": 0.5355, "step": 13415 }, { "epoch": 0.7239760401489396, "grad_norm": 1.108809755600344, "learning_rate": 2.588506439079087e-06, "loss": 0.4288, "step": 13416 }, { "epoch": 0.724030003777454, "grad_norm": 1.086472818029685, "learning_rate": 2.5879247818450935e-06, "loss": 0.5728, "step": 13417 }, { "epoch": 0.7240839674059684, "grad_norm": 1.0433047513856017, "learning_rate": 2.587343208307085e-06, "loss": 0.3749, "step": 13418 }, { "epoch": 0.7241379310344828, "grad_norm": 1.103855316603011, "learning_rate": 2.586761718481776e-06, "loss": 0.665, "step": 13419 }, { "epoch": 0.7241918946629972, "grad_norm": 1.150991632653524, "learning_rate": 2.586180312385878e-06, "loss": 0.5238, "step": 13420 }, { "epoch": 0.7242458582915116, "grad_norm": 0.8619040356298572, "learning_rate": 2.585598990036103e-06, "loss": 0.4174, "step": 13421 }, { "epoch": 0.724299821920026, "grad_norm": 1.0253954306758601, "learning_rate": 2.585017751449157e-06, "loss": 0.4326, "step": 13422 }, { "epoch": 0.7243537855485402, "grad_norm": 1.0438291757955294, "learning_rate": 2.5844365966417485e-06, "loss": 0.4519, "step": 13423 }, { "epoch": 0.7244077491770546, "grad_norm": 0.9144167466397051, "learning_rate": 2.5838555256305757e-06, "loss": 0.3725, "step": 13424 }, { "epoch": 0.724461712805569, "grad_norm": 1.0208303272009667, "learning_rate": 2.583274538432342e-06, "loss": 0.6227, "step": 13425 }, { "epoch": 0.7245156764340834, "grad_norm": 1.0057592403421007, "learning_rate": 2.5826936350637453e-06, "loss": 0.3794, "step": 13426 }, { "epoch": 0.7245696400625978, "grad_norm": 0.9345804932280786, "learning_rate": 2.582112815541481e-06, "loss": 0.4171, "step": 13427 }, { "epoch": 0.7246236036911122, "grad_norm": 1.2513109999716336, "learning_rate": 2.5815320798822435e-06, "loss": 0.5249, "step": 13428 }, { "epoch": 0.7246775673196266, "grad_norm": 0.8875130433898615, "learning_rate": 2.5809514281027226e-06, "loss": 0.5018, "step": 13429 }, { "epoch": 0.7247315309481409, "grad_norm": 0.7784890537793363, "learning_rate": 2.5803708602196078e-06, "loss": 0.4323, "step": 13430 }, { "epoch": 0.7247854945766553, "grad_norm": 1.0115762304953475, "learning_rate": 2.579790376249584e-06, "loss": 0.4737, "step": 13431 }, { "epoch": 0.7248394582051697, "grad_norm": 1.1113212079010701, "learning_rate": 2.579209976209335e-06, "loss": 0.5808, "step": 13432 }, { "epoch": 0.7248934218336841, "grad_norm": 0.9544041116536818, "learning_rate": 2.5786296601155427e-06, "loss": 0.3852, "step": 13433 }, { "epoch": 0.7249473854621985, "grad_norm": 1.0139030283395896, "learning_rate": 2.5780494279848854e-06, "loss": 0.4283, "step": 13434 }, { "epoch": 0.7250013490907129, "grad_norm": 0.9006901988034783, "learning_rate": 2.5774692798340407e-06, "loss": 0.447, "step": 13435 }, { "epoch": 0.7250553127192273, "grad_norm": 1.0434744400956788, "learning_rate": 2.5768892156796815e-06, "loss": 0.4215, "step": 13436 }, { "epoch": 0.7251092763477416, "grad_norm": 1.071165558705846, "learning_rate": 2.5763092355384812e-06, "loss": 0.5398, "step": 13437 }, { "epoch": 0.725163239976256, "grad_norm": 1.0606191089530117, "learning_rate": 2.575729339427106e-06, "loss": 0.4677, "step": 13438 }, { "epoch": 0.7252172036047704, "grad_norm": 1.1135351453426188, "learning_rate": 2.575149527362225e-06, "loss": 0.496, "step": 13439 }, { "epoch": 0.7252711672332848, "grad_norm": 1.065480867671898, "learning_rate": 2.574569799360501e-06, "loss": 0.4069, "step": 13440 }, { "epoch": 0.7253251308617992, "grad_norm": 1.1215321995261585, "learning_rate": 2.5739901554385977e-06, "loss": 0.5248, "step": 13441 }, { "epoch": 0.7253790944903136, "grad_norm": 1.142664167430887, "learning_rate": 2.5734105956131737e-06, "loss": 0.503, "step": 13442 }, { "epoch": 0.7254330581188279, "grad_norm": 0.8838402510335802, "learning_rate": 2.572831119900888e-06, "loss": 0.3703, "step": 13443 }, { "epoch": 0.7254870217473423, "grad_norm": 0.7440157655215867, "learning_rate": 2.5722517283183914e-06, "loss": 0.2576, "step": 13444 }, { "epoch": 0.7255409853758567, "grad_norm": 1.106035769709015, "learning_rate": 2.5716724208823383e-06, "loss": 0.4654, "step": 13445 }, { "epoch": 0.725594949004371, "grad_norm": 1.1140617198996015, "learning_rate": 2.5710931976093797e-06, "loss": 0.4646, "step": 13446 }, { "epoch": 0.7256489126328854, "grad_norm": 0.9160680846340389, "learning_rate": 2.5705140585161613e-06, "loss": 0.3731, "step": 13447 }, { "epoch": 0.7257028762613998, "grad_norm": 1.0429912849896734, "learning_rate": 2.5699350036193287e-06, "loss": 0.5195, "step": 13448 }, { "epoch": 0.7257568398899142, "grad_norm": 1.0244798290902768, "learning_rate": 2.569356032935525e-06, "loss": 0.3341, "step": 13449 }, { "epoch": 0.7258108035184285, "grad_norm": 0.9959707542058127, "learning_rate": 2.5687771464813906e-06, "loss": 0.4311, "step": 13450 }, { "epoch": 0.7258647671469429, "grad_norm": 1.1487090686999863, "learning_rate": 2.5681983442735626e-06, "loss": 0.4549, "step": 13451 }, { "epoch": 0.7259187307754573, "grad_norm": 0.9134372755459692, "learning_rate": 2.5676196263286755e-06, "loss": 0.3628, "step": 13452 }, { "epoch": 0.7259726944039717, "grad_norm": 1.0126358520911054, "learning_rate": 2.5670409926633642e-06, "loss": 0.439, "step": 13453 }, { "epoch": 0.7260266580324861, "grad_norm": 1.249538990912904, "learning_rate": 2.5664624432942585e-06, "loss": 0.5967, "step": 13454 }, { "epoch": 0.7260806216610005, "grad_norm": 1.0292338761775375, "learning_rate": 2.565883978237986e-06, "loss": 0.5033, "step": 13455 }, { "epoch": 0.7261345852895149, "grad_norm": 1.1682916736302917, "learning_rate": 2.565305597511173e-06, "loss": 0.5272, "step": 13456 }, { "epoch": 0.7261885489180292, "grad_norm": 0.770031822145691, "learning_rate": 2.5647273011304435e-06, "loss": 0.2736, "step": 13457 }, { "epoch": 0.7262425125465436, "grad_norm": 0.8966492766576014, "learning_rate": 2.5641490891124168e-06, "loss": 0.4594, "step": 13458 }, { "epoch": 0.726296476175058, "grad_norm": 1.0921978061560218, "learning_rate": 2.5635709614737103e-06, "loss": 0.5292, "step": 13459 }, { "epoch": 0.7263504398035724, "grad_norm": 1.0503412928697025, "learning_rate": 2.562992918230942e-06, "loss": 0.4296, "step": 13460 }, { "epoch": 0.7264044034320868, "grad_norm": 0.8969878652004784, "learning_rate": 2.562414959400725e-06, "loss": 0.3714, "step": 13461 }, { "epoch": 0.7264583670606012, "grad_norm": 1.1431141316852604, "learning_rate": 2.56183708499967e-06, "loss": 0.508, "step": 13462 }, { "epoch": 0.7265123306891156, "grad_norm": 1.106903686496617, "learning_rate": 2.561259295044386e-06, "loss": 0.6066, "step": 13463 }, { "epoch": 0.7265662943176299, "grad_norm": 0.8486312353684848, "learning_rate": 2.56068158955148e-06, "loss": 0.3854, "step": 13464 }, { "epoch": 0.7266202579461443, "grad_norm": 1.0310339183529158, "learning_rate": 2.5601039685375545e-06, "loss": 0.5855, "step": 13465 }, { "epoch": 0.7266742215746587, "grad_norm": 0.9602753127440643, "learning_rate": 2.5595264320192115e-06, "loss": 0.4226, "step": 13466 }, { "epoch": 0.7267281852031731, "grad_norm": 1.0829597269121054, "learning_rate": 2.5589489800130502e-06, "loss": 0.4118, "step": 13467 }, { "epoch": 0.7267821488316875, "grad_norm": 0.9558687924618091, "learning_rate": 2.558371612535666e-06, "loss": 0.442, "step": 13468 }, { "epoch": 0.7268361124602019, "grad_norm": 1.1711716406608983, "learning_rate": 2.557794329603655e-06, "loss": 0.5272, "step": 13469 }, { "epoch": 0.7268900760887163, "grad_norm": 1.000493603825545, "learning_rate": 2.557217131233607e-06, "loss": 0.514, "step": 13470 }, { "epoch": 0.7269440397172305, "grad_norm": 1.0261834783316424, "learning_rate": 2.556640017442114e-06, "loss": 0.4245, "step": 13471 }, { "epoch": 0.7269980033457449, "grad_norm": 0.9762760330617286, "learning_rate": 2.55606298824576e-06, "loss": 0.4514, "step": 13472 }, { "epoch": 0.7270519669742593, "grad_norm": 1.0071282819837983, "learning_rate": 2.5554860436611293e-06, "loss": 0.4028, "step": 13473 }, { "epoch": 0.7271059306027737, "grad_norm": 1.0434496217640248, "learning_rate": 2.554909183704806e-06, "loss": 0.4258, "step": 13474 }, { "epoch": 0.7271598942312881, "grad_norm": 1.1341355863625404, "learning_rate": 2.5543324083933673e-06, "loss": 0.486, "step": 13475 }, { "epoch": 0.7272138578598025, "grad_norm": 0.9697054685378246, "learning_rate": 2.5537557177433923e-06, "loss": 0.414, "step": 13476 }, { "epoch": 0.7272678214883169, "grad_norm": 0.9446106254973385, "learning_rate": 2.553179111771455e-06, "loss": 0.4844, "step": 13477 }, { "epoch": 0.7273217851168312, "grad_norm": 1.0125567262800435, "learning_rate": 2.5526025904941287e-06, "loss": 0.4557, "step": 13478 }, { "epoch": 0.7273757487453456, "grad_norm": 1.1275198041556171, "learning_rate": 2.552026153927981e-06, "loss": 0.5271, "step": 13479 }, { "epoch": 0.72742971237386, "grad_norm": 1.3798050295767914, "learning_rate": 2.551449802089581e-06, "loss": 0.6914, "step": 13480 }, { "epoch": 0.7274836760023744, "grad_norm": 0.8693669713270226, "learning_rate": 2.5508735349954915e-06, "loss": 0.3983, "step": 13481 }, { "epoch": 0.7275376396308888, "grad_norm": 0.8918954738618847, "learning_rate": 2.5502973526622765e-06, "loss": 0.4955, "step": 13482 }, { "epoch": 0.7275916032594032, "grad_norm": 1.0883456788876844, "learning_rate": 2.5497212551064975e-06, "loss": 0.5499, "step": 13483 }, { "epoch": 0.7276455668879176, "grad_norm": 1.1435157567624432, "learning_rate": 2.5491452423447092e-06, "loss": 0.5367, "step": 13484 }, { "epoch": 0.7276995305164319, "grad_norm": 1.021574063750877, "learning_rate": 2.5485693143934704e-06, "loss": 0.4892, "step": 13485 }, { "epoch": 0.7277534941449463, "grad_norm": 1.1201109276828871, "learning_rate": 2.5479934712693304e-06, "loss": 0.3436, "step": 13486 }, { "epoch": 0.7278074577734607, "grad_norm": 0.859690391785482, "learning_rate": 2.5474177129888405e-06, "loss": 0.4377, "step": 13487 }, { "epoch": 0.7278614214019751, "grad_norm": 1.014996765384532, "learning_rate": 2.546842039568549e-06, "loss": 0.3723, "step": 13488 }, { "epoch": 0.7279153850304895, "grad_norm": 1.0336977387838089, "learning_rate": 2.546266451025001e-06, "loss": 0.3774, "step": 13489 }, { "epoch": 0.7279693486590039, "grad_norm": 1.0266008437310108, "learning_rate": 2.5456909473747404e-06, "loss": 0.4434, "step": 13490 }, { "epoch": 0.7280233122875183, "grad_norm": 1.2584558355916085, "learning_rate": 2.5451155286343065e-06, "loss": 0.6913, "step": 13491 }, { "epoch": 0.7280772759160326, "grad_norm": 0.954501971458651, "learning_rate": 2.54454019482024e-06, "loss": 0.379, "step": 13492 }, { "epoch": 0.728131239544547, "grad_norm": 1.0240603495366793, "learning_rate": 2.5439649459490724e-06, "loss": 0.3903, "step": 13493 }, { "epoch": 0.7281852031730613, "grad_norm": 1.1967391184068452, "learning_rate": 2.54338978203734e-06, "loss": 0.5204, "step": 13494 }, { "epoch": 0.7282391668015757, "grad_norm": 0.7955382193684571, "learning_rate": 2.5428147031015716e-06, "loss": 0.3488, "step": 13495 }, { "epoch": 0.7282931304300901, "grad_norm": 0.8121678564221708, "learning_rate": 2.5422397091582973e-06, "loss": 0.3459, "step": 13496 }, { "epoch": 0.7283470940586045, "grad_norm": 1.0608005243740393, "learning_rate": 2.541664800224042e-06, "loss": 0.3905, "step": 13497 }, { "epoch": 0.7284010576871189, "grad_norm": 0.8965358985749823, "learning_rate": 2.54108997631533e-06, "loss": 0.3919, "step": 13498 }, { "epoch": 0.7284550213156332, "grad_norm": 1.0057197680911782, "learning_rate": 2.540515237448681e-06, "loss": 0.4765, "step": 13499 }, { "epoch": 0.7285089849441476, "grad_norm": 0.7639110577649296, "learning_rate": 2.5399405836406165e-06, "loss": 0.3225, "step": 13500 }, { "epoch": 0.7285089849441476, "eval_loss": 0.5345942378044128, "eval_runtime": 166.1813, "eval_samples_per_second": 20.694, "eval_steps_per_second": 0.867, "step": 13500 }, { "epoch": 0.728562948572662, "grad_norm": 0.9832073302542869, "learning_rate": 2.5393660149076495e-06, "loss": 0.4193, "step": 13501 }, { "epoch": 0.7286169122011764, "grad_norm": 1.001608204073589, "learning_rate": 2.538791531266294e-06, "loss": 0.378, "step": 13502 }, { "epoch": 0.7286708758296908, "grad_norm": 1.1774002168067876, "learning_rate": 2.538217132733063e-06, "loss": 0.465, "step": 13503 }, { "epoch": 0.7287248394582052, "grad_norm": 1.1741578246835056, "learning_rate": 2.537642819324463e-06, "loss": 0.4887, "step": 13504 }, { "epoch": 0.7287788030867196, "grad_norm": 0.9866896679432774, "learning_rate": 2.5370685910570027e-06, "loss": 0.479, "step": 13505 }, { "epoch": 0.7288327667152339, "grad_norm": 1.143813844002504, "learning_rate": 2.5364944479471836e-06, "loss": 0.5776, "step": 13506 }, { "epoch": 0.7288867303437483, "grad_norm": 1.1072261202458875, "learning_rate": 2.5359203900115093e-06, "loss": 0.5148, "step": 13507 }, { "epoch": 0.7289406939722627, "grad_norm": 1.0148745440345333, "learning_rate": 2.5353464172664777e-06, "loss": 0.4708, "step": 13508 }, { "epoch": 0.7289946576007771, "grad_norm": 1.1720563683039176, "learning_rate": 2.5347725297285864e-06, "loss": 0.4011, "step": 13509 }, { "epoch": 0.7290486212292915, "grad_norm": 0.9104143572540124, "learning_rate": 2.5341987274143274e-06, "loss": 0.4954, "step": 13510 }, { "epoch": 0.7291025848578059, "grad_norm": 0.9017142091992483, "learning_rate": 2.5336250103401948e-06, "loss": 0.4026, "step": 13511 }, { "epoch": 0.7291565484863202, "grad_norm": 1.0736617218350282, "learning_rate": 2.5330513785226786e-06, "loss": 0.5487, "step": 13512 }, { "epoch": 0.7292105121148346, "grad_norm": 1.0644121129066744, "learning_rate": 2.5324778319782617e-06, "loss": 0.4671, "step": 13513 }, { "epoch": 0.729264475743349, "grad_norm": 1.1947133589908543, "learning_rate": 2.531904370723429e-06, "loss": 0.5197, "step": 13514 }, { "epoch": 0.7293184393718634, "grad_norm": 1.01309768018476, "learning_rate": 2.5313309947746656e-06, "loss": 0.5126, "step": 13515 }, { "epoch": 0.7293724030003778, "grad_norm": 0.8288609496775764, "learning_rate": 2.530757704148448e-06, "loss": 0.3009, "step": 13516 }, { "epoch": 0.7294263666288922, "grad_norm": 1.0208044554015725, "learning_rate": 2.5301844988612544e-06, "loss": 0.3674, "step": 13517 }, { "epoch": 0.7294803302574066, "grad_norm": 0.901744983702518, "learning_rate": 2.5296113789295597e-06, "loss": 0.4087, "step": 13518 }, { "epoch": 0.7295342938859208, "grad_norm": 0.9915973537370705, "learning_rate": 2.529038344369836e-06, "loss": 0.4455, "step": 13519 }, { "epoch": 0.7295882575144352, "grad_norm": 1.1429009277003839, "learning_rate": 2.5284653951985513e-06, "loss": 0.5089, "step": 13520 }, { "epoch": 0.7296422211429496, "grad_norm": 1.202684908676763, "learning_rate": 2.527892531432173e-06, "loss": 0.64, "step": 13521 }, { "epoch": 0.729696184771464, "grad_norm": 1.1542696897555655, "learning_rate": 2.5273197530871664e-06, "loss": 0.516, "step": 13522 }, { "epoch": 0.7297501483999784, "grad_norm": 0.9751391069634686, "learning_rate": 2.526747060179995e-06, "loss": 0.4185, "step": 13523 }, { "epoch": 0.7298041120284928, "grad_norm": 1.0488335385185037, "learning_rate": 2.526174452727116e-06, "loss": 0.4039, "step": 13524 }, { "epoch": 0.7298580756570072, "grad_norm": 1.059894313143924, "learning_rate": 2.525601930744989e-06, "loss": 0.5263, "step": 13525 }, { "epoch": 0.7299120392855215, "grad_norm": 1.0019448435580318, "learning_rate": 2.5250294942500687e-06, "loss": 0.525, "step": 13526 }, { "epoch": 0.7299660029140359, "grad_norm": 0.9827383612756054, "learning_rate": 2.5244571432588048e-06, "loss": 0.3808, "step": 13527 }, { "epoch": 0.7300199665425503, "grad_norm": 1.0578705980188305, "learning_rate": 2.52388487778765e-06, "loss": 0.4301, "step": 13528 }, { "epoch": 0.7300739301710647, "grad_norm": 1.0158402282433983, "learning_rate": 2.523312697853051e-06, "loss": 0.4724, "step": 13529 }, { "epoch": 0.7301278937995791, "grad_norm": 1.1174990974858967, "learning_rate": 2.522740603471453e-06, "loss": 0.6041, "step": 13530 }, { "epoch": 0.7301818574280935, "grad_norm": 1.049559431345731, "learning_rate": 2.522168594659298e-06, "loss": 0.3712, "step": 13531 }, { "epoch": 0.7302358210566079, "grad_norm": 1.0261827957359164, "learning_rate": 2.5215966714330266e-06, "loss": 0.5127, "step": 13532 }, { "epoch": 0.7302897846851222, "grad_norm": 1.0254581952971413, "learning_rate": 2.521024833809077e-06, "loss": 0.4189, "step": 13533 }, { "epoch": 0.7303437483136366, "grad_norm": 0.9211364903514767, "learning_rate": 2.520453081803883e-06, "loss": 0.4084, "step": 13534 }, { "epoch": 0.730397711942151, "grad_norm": 0.991218799677247, "learning_rate": 2.519881415433878e-06, "loss": 0.4296, "step": 13535 }, { "epoch": 0.7304516755706654, "grad_norm": 0.8895299094397353, "learning_rate": 2.5193098347154926e-06, "loss": 0.3414, "step": 13536 }, { "epoch": 0.7305056391991798, "grad_norm": 1.0574901772512477, "learning_rate": 2.5187383396651538e-06, "loss": 0.5157, "step": 13537 }, { "epoch": 0.7305596028276942, "grad_norm": 1.24006147316401, "learning_rate": 2.5181669302992872e-06, "loss": 0.5984, "step": 13538 }, { "epoch": 0.7306135664562086, "grad_norm": 1.1038539916501577, "learning_rate": 2.5175956066343165e-06, "loss": 0.6318, "step": 13539 }, { "epoch": 0.7306675300847228, "grad_norm": 1.0898484799760983, "learning_rate": 2.517024368686663e-06, "loss": 0.5657, "step": 13540 }, { "epoch": 0.7307214937132372, "grad_norm": 1.5924613782108885, "learning_rate": 2.516453216472742e-06, "loss": 0.7013, "step": 13541 }, { "epoch": 0.7307754573417516, "grad_norm": 0.8855286969647098, "learning_rate": 2.515882150008969e-06, "loss": 0.3234, "step": 13542 }, { "epoch": 0.730829420970266, "grad_norm": 0.8072671750465032, "learning_rate": 2.51531116931176e-06, "loss": 0.386, "step": 13543 }, { "epoch": 0.7308833845987804, "grad_norm": 0.9840547638066884, "learning_rate": 2.5147402743975223e-06, "loss": 0.524, "step": 13544 }, { "epoch": 0.7309373482272948, "grad_norm": 0.7673897870816924, "learning_rate": 2.5141694652826664e-06, "loss": 0.2922, "step": 13545 }, { "epoch": 0.7309913118558092, "grad_norm": 0.9949154018146212, "learning_rate": 2.5135987419835966e-06, "loss": 0.4517, "step": 13546 }, { "epoch": 0.7310452754843235, "grad_norm": 1.2938239723651193, "learning_rate": 2.5130281045167183e-06, "loss": 0.4854, "step": 13547 }, { "epoch": 0.7310992391128379, "grad_norm": 0.9376110424973763, "learning_rate": 2.5124575528984286e-06, "loss": 0.3981, "step": 13548 }, { "epoch": 0.7311532027413523, "grad_norm": 1.0644833033560988, "learning_rate": 2.511887087145127e-06, "loss": 0.4767, "step": 13549 }, { "epoch": 0.7312071663698667, "grad_norm": 1.0740469115062115, "learning_rate": 2.511316707273211e-06, "loss": 0.4535, "step": 13550 }, { "epoch": 0.7312611299983811, "grad_norm": 0.8780480252466446, "learning_rate": 2.510746413299072e-06, "loss": 0.3376, "step": 13551 }, { "epoch": 0.7313150936268955, "grad_norm": 1.1841828827984417, "learning_rate": 2.5101762052391023e-06, "loss": 0.6054, "step": 13552 }, { "epoch": 0.7313690572554099, "grad_norm": 1.0696982444712746, "learning_rate": 2.509606083109688e-06, "loss": 0.4148, "step": 13553 }, { "epoch": 0.7314230208839242, "grad_norm": 1.6449616865206025, "learning_rate": 2.509036046927218e-06, "loss": 0.6017, "step": 13554 }, { "epoch": 0.7314769845124386, "grad_norm": 0.9176156445957037, "learning_rate": 2.508466096708073e-06, "loss": 0.4544, "step": 13555 }, { "epoch": 0.731530948140953, "grad_norm": 1.0569769097494108, "learning_rate": 2.507896232468635e-06, "loss": 0.6096, "step": 13556 }, { "epoch": 0.7315849117694674, "grad_norm": 1.0097783821285016, "learning_rate": 2.507326454225283e-06, "loss": 0.4378, "step": 13557 }, { "epoch": 0.7316388753979818, "grad_norm": 1.0055814594832413, "learning_rate": 2.5067567619943934e-06, "loss": 0.6154, "step": 13558 }, { "epoch": 0.7316928390264962, "grad_norm": 1.0235893226596933, "learning_rate": 2.506187155792338e-06, "loss": 0.5591, "step": 13559 }, { "epoch": 0.7317468026550106, "grad_norm": 1.1688391134108418, "learning_rate": 2.505617635635489e-06, "loss": 0.6448, "step": 13560 }, { "epoch": 0.7318007662835249, "grad_norm": 0.8983101615377616, "learning_rate": 2.505048201540216e-06, "loss": 0.3132, "step": 13561 }, { "epoch": 0.7318547299120393, "grad_norm": 0.8376998328789775, "learning_rate": 2.504478853522882e-06, "loss": 0.4014, "step": 13562 }, { "epoch": 0.7319086935405537, "grad_norm": 1.2400460269268394, "learning_rate": 2.5039095915998535e-06, "loss": 0.5787, "step": 13563 }, { "epoch": 0.731962657169068, "grad_norm": 1.05612058390643, "learning_rate": 2.5033404157874903e-06, "loss": 0.6273, "step": 13564 }, { "epoch": 0.7320166207975825, "grad_norm": 0.9434358015120581, "learning_rate": 2.5027713261021513e-06, "loss": 0.3476, "step": 13565 }, { "epoch": 0.7320705844260968, "grad_norm": 1.0927515238425687, "learning_rate": 2.5022023225601926e-06, "loss": 0.424, "step": 13566 }, { "epoch": 0.7321245480546112, "grad_norm": 1.091197315691455, "learning_rate": 2.501633405177969e-06, "loss": 0.5136, "step": 13567 }, { "epoch": 0.7321785116831255, "grad_norm": 0.8696997598052195, "learning_rate": 2.501064573971832e-06, "loss": 0.4966, "step": 13568 }, { "epoch": 0.7322324753116399, "grad_norm": 1.0215244184162315, "learning_rate": 2.500495828958127e-06, "loss": 0.379, "step": 13569 }, { "epoch": 0.7322864389401543, "grad_norm": 1.0416543829776885, "learning_rate": 2.499927170153204e-06, "loss": 0.5633, "step": 13570 }, { "epoch": 0.7323404025686687, "grad_norm": 0.9436452647503643, "learning_rate": 2.499358597573405e-06, "loss": 0.4487, "step": 13571 }, { "epoch": 0.7323943661971831, "grad_norm": 1.0279375124071728, "learning_rate": 2.498790111235072e-06, "loss": 0.4935, "step": 13572 }, { "epoch": 0.7324483298256975, "grad_norm": 1.0926967528621099, "learning_rate": 2.4982217111545434e-06, "loss": 0.6538, "step": 13573 }, { "epoch": 0.7325022934542118, "grad_norm": 1.0496287782618587, "learning_rate": 2.4976533973481573e-06, "loss": 0.4201, "step": 13574 }, { "epoch": 0.7325562570827262, "grad_norm": 0.9466200707442465, "learning_rate": 2.497085169832245e-06, "loss": 0.3804, "step": 13575 }, { "epoch": 0.7326102207112406, "grad_norm": 1.0367277401205253, "learning_rate": 2.496517028623139e-06, "loss": 0.3724, "step": 13576 }, { "epoch": 0.732664184339755, "grad_norm": 0.9542886878599554, "learning_rate": 2.495948973737168e-06, "loss": 0.4414, "step": 13577 }, { "epoch": 0.7327181479682694, "grad_norm": 1.0080974744991127, "learning_rate": 2.49538100519066e-06, "loss": 0.4808, "step": 13578 }, { "epoch": 0.7327721115967838, "grad_norm": 1.0180316867987034, "learning_rate": 2.494813122999937e-06, "loss": 0.433, "step": 13579 }, { "epoch": 0.7328260752252982, "grad_norm": 1.1500089721177336, "learning_rate": 2.4942453271813214e-06, "loss": 0.5298, "step": 13580 }, { "epoch": 0.7328800388538125, "grad_norm": 0.9379713348556743, "learning_rate": 2.493677617751134e-06, "loss": 0.3808, "step": 13581 }, { "epoch": 0.7329340024823269, "grad_norm": 0.9380738813480819, "learning_rate": 2.493109994725688e-06, "loss": 0.5119, "step": 13582 }, { "epoch": 0.7329879661108413, "grad_norm": 1.0860798537628213, "learning_rate": 2.492542458121299e-06, "loss": 0.3805, "step": 13583 }, { "epoch": 0.7330419297393557, "grad_norm": 1.1372175037614276, "learning_rate": 2.491975007954278e-06, "loss": 0.4384, "step": 13584 }, { "epoch": 0.7330958933678701, "grad_norm": 1.0981269523554062, "learning_rate": 2.491407644240935e-06, "loss": 0.6351, "step": 13585 }, { "epoch": 0.7331498569963845, "grad_norm": 1.0645259090570434, "learning_rate": 2.490840366997576e-06, "loss": 0.5334, "step": 13586 }, { "epoch": 0.7332038206248989, "grad_norm": 1.0598964425689201, "learning_rate": 2.4902731762405063e-06, "loss": 0.5264, "step": 13587 }, { "epoch": 0.7332577842534131, "grad_norm": 0.9550600736919211, "learning_rate": 2.4897060719860274e-06, "loss": 0.4049, "step": 13588 }, { "epoch": 0.7333117478819275, "grad_norm": 0.9954754278515476, "learning_rate": 2.4891390542504363e-06, "loss": 0.4307, "step": 13589 }, { "epoch": 0.7333657115104419, "grad_norm": 0.9733028427572028, "learning_rate": 2.488572123050031e-06, "loss": 0.4574, "step": 13590 }, { "epoch": 0.7334196751389563, "grad_norm": 0.9176073096006121, "learning_rate": 2.488005278401106e-06, "loss": 0.4576, "step": 13591 }, { "epoch": 0.7334736387674707, "grad_norm": 1.0802478905146666, "learning_rate": 2.4874385203199524e-06, "loss": 0.5042, "step": 13592 }, { "epoch": 0.7335276023959851, "grad_norm": 1.0421548996171521, "learning_rate": 2.4868718488228597e-06, "loss": 0.4073, "step": 13593 }, { "epoch": 0.7335815660244995, "grad_norm": 1.0629989707680672, "learning_rate": 2.4863052639261144e-06, "loss": 0.498, "step": 13594 }, { "epoch": 0.7336355296530138, "grad_norm": 1.0240286857011502, "learning_rate": 2.485738765646003e-06, "loss": 0.4064, "step": 13595 }, { "epoch": 0.7336894932815282, "grad_norm": 0.9520207376926747, "learning_rate": 2.4851723539988034e-06, "loss": 0.3592, "step": 13596 }, { "epoch": 0.7337434569100426, "grad_norm": 1.04106787167802, "learning_rate": 2.4846060290007967e-06, "loss": 0.5149, "step": 13597 }, { "epoch": 0.733797420538557, "grad_norm": 1.0146410992623738, "learning_rate": 2.484039790668259e-06, "loss": 0.4253, "step": 13598 }, { "epoch": 0.7338513841670714, "grad_norm": 1.1754319416356693, "learning_rate": 2.4834736390174656e-06, "loss": 0.5845, "step": 13599 }, { "epoch": 0.7339053477955858, "grad_norm": 0.9747670393679951, "learning_rate": 2.4829075740646876e-06, "loss": 0.5589, "step": 13600 }, { "epoch": 0.7339593114241002, "grad_norm": 1.1321315685064057, "learning_rate": 2.482341595826195e-06, "loss": 0.5668, "step": 13601 }, { "epoch": 0.7340132750526145, "grad_norm": 1.101005115242968, "learning_rate": 2.4817757043182536e-06, "loss": 0.5146, "step": 13602 }, { "epoch": 0.7340672386811289, "grad_norm": 1.1648727827301775, "learning_rate": 2.4812098995571284e-06, "loss": 0.733, "step": 13603 }, { "epoch": 0.7341212023096433, "grad_norm": 1.0850837100201067, "learning_rate": 2.480644181559081e-06, "loss": 0.5119, "step": 13604 }, { "epoch": 0.7341751659381577, "grad_norm": 0.8897772348148945, "learning_rate": 2.48007855034037e-06, "loss": 0.3671, "step": 13605 }, { "epoch": 0.7342291295666721, "grad_norm": 0.87418832366154, "learning_rate": 2.4795130059172547e-06, "loss": 0.3947, "step": 13606 }, { "epoch": 0.7342830931951865, "grad_norm": 0.9755676194728852, "learning_rate": 2.478947548305986e-06, "loss": 0.3876, "step": 13607 }, { "epoch": 0.7343370568237009, "grad_norm": 0.977952299064298, "learning_rate": 2.478382177522818e-06, "loss": 0.4769, "step": 13608 }, { "epoch": 0.7343910204522152, "grad_norm": 1.0739838906969983, "learning_rate": 2.4778168935839987e-06, "loss": 0.4822, "step": 13609 }, { "epoch": 0.7344449840807296, "grad_norm": 0.9898485374483439, "learning_rate": 2.4772516965057757e-06, "loss": 0.4725, "step": 13610 }, { "epoch": 0.734498947709244, "grad_norm": 1.0428449620106803, "learning_rate": 2.476686586304393e-06, "loss": 0.4219, "step": 13611 }, { "epoch": 0.7345529113377584, "grad_norm": 0.9856419712172332, "learning_rate": 2.476121562996093e-06, "loss": 0.4196, "step": 13612 }, { "epoch": 0.7346068749662727, "grad_norm": 1.1462230080751992, "learning_rate": 2.4755566265971147e-06, "loss": 0.5165, "step": 13613 }, { "epoch": 0.7346608385947871, "grad_norm": 1.0550535251954816, "learning_rate": 2.474991777123695e-06, "loss": 0.3833, "step": 13614 }, { "epoch": 0.7347148022233015, "grad_norm": 1.0074439880866302, "learning_rate": 2.4744270145920675e-06, "loss": 0.4751, "step": 13615 }, { "epoch": 0.7347687658518158, "grad_norm": 0.9468332717744762, "learning_rate": 2.4738623390184666e-06, "loss": 0.3485, "step": 13616 }, { "epoch": 0.7348227294803302, "grad_norm": 0.9635066569822315, "learning_rate": 2.4732977504191185e-06, "loss": 0.4575, "step": 13617 }, { "epoch": 0.7348766931088446, "grad_norm": 1.17262902412489, "learning_rate": 2.472733248810251e-06, "loss": 0.5926, "step": 13618 }, { "epoch": 0.734930656737359, "grad_norm": 1.2751822765755176, "learning_rate": 2.472168834208089e-06, "loss": 0.5341, "step": 13619 }, { "epoch": 0.7349846203658734, "grad_norm": 0.9303027016025562, "learning_rate": 2.471604506628854e-06, "loss": 0.4653, "step": 13620 }, { "epoch": 0.7350385839943878, "grad_norm": 1.3230079238450747, "learning_rate": 2.4710402660887655e-06, "loss": 0.5251, "step": 13621 }, { "epoch": 0.7350925476229022, "grad_norm": 0.9670266134905476, "learning_rate": 2.4704761126040404e-06, "loss": 0.4706, "step": 13622 }, { "epoch": 0.7351465112514165, "grad_norm": 1.1396093191413559, "learning_rate": 2.469912046190894e-06, "loss": 0.4757, "step": 13623 }, { "epoch": 0.7352004748799309, "grad_norm": 0.9620759976206323, "learning_rate": 2.469348066865536e-06, "loss": 0.4038, "step": 13624 }, { "epoch": 0.7352544385084453, "grad_norm": 0.9298080851115904, "learning_rate": 2.4687841746441766e-06, "loss": 0.3902, "step": 13625 }, { "epoch": 0.7353084021369597, "grad_norm": 1.0178579390376412, "learning_rate": 2.468220369543023e-06, "loss": 0.4395, "step": 13626 }, { "epoch": 0.7353623657654741, "grad_norm": 0.9984670481600476, "learning_rate": 2.4676566515782792e-06, "loss": 0.4079, "step": 13627 }, { "epoch": 0.7354163293939885, "grad_norm": 1.0716192236181983, "learning_rate": 2.4670930207661474e-06, "loss": 0.459, "step": 13628 }, { "epoch": 0.7354702930225029, "grad_norm": 0.8730602459956811, "learning_rate": 2.4665294771228267e-06, "loss": 0.4428, "step": 13629 }, { "epoch": 0.7355242566510172, "grad_norm": 1.080405267970498, "learning_rate": 2.4659660206645153e-06, "loss": 0.4513, "step": 13630 }, { "epoch": 0.7355782202795316, "grad_norm": 1.1140804468789152, "learning_rate": 2.4654026514074043e-06, "loss": 0.5027, "step": 13631 }, { "epoch": 0.735632183908046, "grad_norm": 1.0587115137618324, "learning_rate": 2.464839369367688e-06, "loss": 0.429, "step": 13632 }, { "epoch": 0.7356861475365604, "grad_norm": 1.015505211807759, "learning_rate": 2.464276174561555e-06, "loss": 0.4797, "step": 13633 }, { "epoch": 0.7357401111650748, "grad_norm": 1.0489429798040013, "learning_rate": 2.4637130670051913e-06, "loss": 0.4782, "step": 13634 }, { "epoch": 0.7357940747935892, "grad_norm": 1.3163777288203045, "learning_rate": 2.463150046714782e-06, "loss": 0.603, "step": 13635 }, { "epoch": 0.7358480384221036, "grad_norm": 1.1186934562030635, "learning_rate": 2.46258711370651e-06, "loss": 0.5213, "step": 13636 }, { "epoch": 0.7359020020506178, "grad_norm": 1.078850872804275, "learning_rate": 2.462024267996554e-06, "loss": 0.5037, "step": 13637 }, { "epoch": 0.7359559656791322, "grad_norm": 1.0887400252370762, "learning_rate": 2.4614615096010894e-06, "loss": 0.4041, "step": 13638 }, { "epoch": 0.7360099293076466, "grad_norm": 0.8505007585673173, "learning_rate": 2.4608988385362907e-06, "loss": 0.4162, "step": 13639 }, { "epoch": 0.736063892936161, "grad_norm": 1.0884182926644816, "learning_rate": 2.460336254818331e-06, "loss": 0.6185, "step": 13640 }, { "epoch": 0.7361178565646754, "grad_norm": 1.0374396732770828, "learning_rate": 2.459773758463377e-06, "loss": 0.4999, "step": 13641 }, { "epoch": 0.7361718201931898, "grad_norm": 0.9805132244145264, "learning_rate": 2.459211349487599e-06, "loss": 0.5234, "step": 13642 }, { "epoch": 0.7362257838217041, "grad_norm": 1.0678965749028502, "learning_rate": 2.4586490279071597e-06, "loss": 0.4883, "step": 13643 }, { "epoch": 0.7362797474502185, "grad_norm": 0.9254355073156404, "learning_rate": 2.458086793738219e-06, "loss": 0.435, "step": 13644 }, { "epoch": 0.7363337110787329, "grad_norm": 0.9497452178753402, "learning_rate": 2.457524646996938e-06, "loss": 0.4346, "step": 13645 }, { "epoch": 0.7363876747072473, "grad_norm": 1.1111253448502625, "learning_rate": 2.456962587699472e-06, "loss": 0.4149, "step": 13646 }, { "epoch": 0.7364416383357617, "grad_norm": 1.11908850019378, "learning_rate": 2.4564006158619764e-06, "loss": 0.6062, "step": 13647 }, { "epoch": 0.7364956019642761, "grad_norm": 1.1899083835755424, "learning_rate": 2.4558387315006022e-06, "loss": 0.6, "step": 13648 }, { "epoch": 0.7365495655927905, "grad_norm": 0.983051466800003, "learning_rate": 2.455276934631499e-06, "loss": 0.402, "step": 13649 }, { "epoch": 0.7366035292213048, "grad_norm": 0.957602831588132, "learning_rate": 2.454715225270815e-06, "loss": 0.4131, "step": 13650 }, { "epoch": 0.7366574928498192, "grad_norm": 0.9957465363494457, "learning_rate": 2.4541536034346906e-06, "loss": 0.4844, "step": 13651 }, { "epoch": 0.7367114564783336, "grad_norm": 0.9593326257838737, "learning_rate": 2.4535920691392696e-06, "loss": 0.472, "step": 13652 }, { "epoch": 0.736765420106848, "grad_norm": 1.155382600831078, "learning_rate": 2.45303062240069e-06, "loss": 0.4199, "step": 13653 }, { "epoch": 0.7368193837353624, "grad_norm": 0.9215367143850469, "learning_rate": 2.4524692632350896e-06, "loss": 0.4403, "step": 13654 }, { "epoch": 0.7368733473638768, "grad_norm": 0.8759481814771279, "learning_rate": 2.451907991658602e-06, "loss": 0.3967, "step": 13655 }, { "epoch": 0.7369273109923912, "grad_norm": 1.02003606941435, "learning_rate": 2.4513468076873586e-06, "loss": 0.5502, "step": 13656 }, { "epoch": 0.7369812746209055, "grad_norm": 0.9020539164559835, "learning_rate": 2.450785711337488e-06, "loss": 0.3377, "step": 13657 }, { "epoch": 0.7370352382494199, "grad_norm": 1.2789257119461575, "learning_rate": 2.450224702625117e-06, "loss": 0.6103, "step": 13658 }, { "epoch": 0.7370892018779343, "grad_norm": 0.9467371461208075, "learning_rate": 2.4496637815663697e-06, "loss": 0.3928, "step": 13659 }, { "epoch": 0.7371431655064486, "grad_norm": 1.282642003345757, "learning_rate": 2.4491029481773675e-06, "loss": 0.6181, "step": 13660 }, { "epoch": 0.737197129134963, "grad_norm": 1.0226351416055555, "learning_rate": 2.448542202474229e-06, "loss": 0.4441, "step": 13661 }, { "epoch": 0.7372510927634774, "grad_norm": 1.0255037843302663, "learning_rate": 2.4479815444730714e-06, "loss": 0.49, "step": 13662 }, { "epoch": 0.7373050563919918, "grad_norm": 1.0827441597276284, "learning_rate": 2.447420974190008e-06, "loss": 0.5063, "step": 13663 }, { "epoch": 0.7373590200205061, "grad_norm": 1.1497380483289108, "learning_rate": 2.4468604916411516e-06, "loss": 0.5123, "step": 13664 }, { "epoch": 0.7374129836490205, "grad_norm": 0.9270781308245263, "learning_rate": 2.4463000968426077e-06, "loss": 0.4217, "step": 13665 }, { "epoch": 0.7374669472775349, "grad_norm": 1.1850070057491957, "learning_rate": 2.4457397898104856e-06, "loss": 0.4693, "step": 13666 }, { "epoch": 0.7375209109060493, "grad_norm": 0.9889364870837063, "learning_rate": 2.445179570560887e-06, "loss": 0.4101, "step": 13667 }, { "epoch": 0.7375748745345637, "grad_norm": 1.0182072520515841, "learning_rate": 2.4446194391099146e-06, "loss": 0.437, "step": 13668 }, { "epoch": 0.7376288381630781, "grad_norm": 1.3250955693155826, "learning_rate": 2.4440593954736666e-06, "loss": 0.4124, "step": 13669 }, { "epoch": 0.7376828017915925, "grad_norm": 1.0018573914160396, "learning_rate": 2.4434994396682397e-06, "loss": 0.6161, "step": 13670 }, { "epoch": 0.7377367654201068, "grad_norm": 0.8994548574169627, "learning_rate": 2.442939571709729e-06, "loss": 0.3547, "step": 13671 }, { "epoch": 0.7377907290486212, "grad_norm": 1.0749108245670753, "learning_rate": 2.442379791614222e-06, "loss": 0.505, "step": 13672 }, { "epoch": 0.7378446926771356, "grad_norm": 0.7342420213281813, "learning_rate": 2.441820099397809e-06, "loss": 0.3142, "step": 13673 }, { "epoch": 0.73789865630565, "grad_norm": 1.0702027751905223, "learning_rate": 2.441260495076577e-06, "loss": 0.5122, "step": 13674 }, { "epoch": 0.7379526199341644, "grad_norm": 1.1150972268539592, "learning_rate": 2.4407009786666083e-06, "loss": 0.3647, "step": 13675 }, { "epoch": 0.7380065835626788, "grad_norm": 0.9424367369836135, "learning_rate": 2.4401415501839856e-06, "loss": 0.494, "step": 13676 }, { "epoch": 0.7380605471911932, "grad_norm": 1.1507105030112639, "learning_rate": 2.439582209644786e-06, "loss": 0.4721, "step": 13677 }, { "epoch": 0.7381145108197075, "grad_norm": 0.9715291468544067, "learning_rate": 2.4390229570650877e-06, "loss": 0.4561, "step": 13678 }, { "epoch": 0.7381684744482219, "grad_norm": 1.315805504367494, "learning_rate": 2.438463792460961e-06, "loss": 0.6337, "step": 13679 }, { "epoch": 0.7382224380767363, "grad_norm": 1.0276749016827882, "learning_rate": 2.4379047158484782e-06, "loss": 0.5278, "step": 13680 }, { "epoch": 0.7382764017052507, "grad_norm": 1.1476555799581736, "learning_rate": 2.4373457272437074e-06, "loss": 0.5078, "step": 13681 }, { "epoch": 0.7383303653337651, "grad_norm": 1.0758844904611882, "learning_rate": 2.4367868266627156e-06, "loss": 0.4978, "step": 13682 }, { "epoch": 0.7383843289622795, "grad_norm": 1.1040785181034605, "learning_rate": 2.4362280141215656e-06, "loss": 0.4797, "step": 13683 }, { "epoch": 0.7384382925907939, "grad_norm": 0.7164325344886534, "learning_rate": 2.435669289636318e-06, "loss": 0.2639, "step": 13684 }, { "epoch": 0.7384922562193081, "grad_norm": 1.150942715004878, "learning_rate": 2.4351106532230324e-06, "loss": 0.6395, "step": 13685 }, { "epoch": 0.7385462198478225, "grad_norm": 1.0671006193424986, "learning_rate": 2.434552104897763e-06, "loss": 0.5917, "step": 13686 }, { "epoch": 0.7386001834763369, "grad_norm": 1.122983313886147, "learning_rate": 2.4339936446765626e-06, "loss": 0.5119, "step": 13687 }, { "epoch": 0.7386541471048513, "grad_norm": 1.149519177163638, "learning_rate": 2.433435272575483e-06, "loss": 0.5393, "step": 13688 }, { "epoch": 0.7387081107333657, "grad_norm": 0.9418991323786952, "learning_rate": 2.4328769886105727e-06, "loss": 0.46, "step": 13689 }, { "epoch": 0.7387620743618801, "grad_norm": 1.0207255114982146, "learning_rate": 2.432318792797876e-06, "loss": 0.6218, "step": 13690 }, { "epoch": 0.7388160379903945, "grad_norm": 1.0603094091727008, "learning_rate": 2.431760685153438e-06, "loss": 0.4732, "step": 13691 }, { "epoch": 0.7388700016189088, "grad_norm": 1.0561993270447907, "learning_rate": 2.431202665693299e-06, "loss": 0.65, "step": 13692 }, { "epoch": 0.7389239652474232, "grad_norm": 1.234141060046457, "learning_rate": 2.4306447344334944e-06, "loss": 0.5043, "step": 13693 }, { "epoch": 0.7389779288759376, "grad_norm": 0.9253077232396937, "learning_rate": 2.4300868913900614e-06, "loss": 0.3343, "step": 13694 }, { "epoch": 0.739031892504452, "grad_norm": 0.9937966782348889, "learning_rate": 2.429529136579034e-06, "loss": 0.4951, "step": 13695 }, { "epoch": 0.7390858561329664, "grad_norm": 0.8713841586850773, "learning_rate": 2.428971470016441e-06, "loss": 0.3861, "step": 13696 }, { "epoch": 0.7391398197614808, "grad_norm": 0.9315616895734296, "learning_rate": 2.428413891718311e-06, "loss": 0.3671, "step": 13697 }, { "epoch": 0.7391937833899952, "grad_norm": 0.9724083157325145, "learning_rate": 2.4278564017006697e-06, "loss": 0.4183, "step": 13698 }, { "epoch": 0.7392477470185095, "grad_norm": 0.8592093201675339, "learning_rate": 2.427298999979541e-06, "loss": 0.4937, "step": 13699 }, { "epoch": 0.7393017106470239, "grad_norm": 0.8841585858935, "learning_rate": 2.426741686570942e-06, "loss": 0.5018, "step": 13700 }, { "epoch": 0.7393556742755383, "grad_norm": 1.2010347959847076, "learning_rate": 2.426184461490892e-06, "loss": 0.5115, "step": 13701 }, { "epoch": 0.7394096379040527, "grad_norm": 0.9595410183125147, "learning_rate": 2.425627324755407e-06, "loss": 0.3275, "step": 13702 }, { "epoch": 0.7394636015325671, "grad_norm": 1.0116580052975386, "learning_rate": 2.425070276380499e-06, "loss": 0.4731, "step": 13703 }, { "epoch": 0.7395175651610815, "grad_norm": 1.1331323914910418, "learning_rate": 2.4245133163821786e-06, "loss": 0.5286, "step": 13704 }, { "epoch": 0.7395715287895959, "grad_norm": 0.965721998091864, "learning_rate": 2.4239564447764526e-06, "loss": 0.4567, "step": 13705 }, { "epoch": 0.7396254924181102, "grad_norm": 1.0871088715804735, "learning_rate": 2.4233996615793264e-06, "loss": 0.4945, "step": 13706 }, { "epoch": 0.7396794560466246, "grad_norm": 0.8891263507732957, "learning_rate": 2.422842966806803e-06, "loss": 0.3681, "step": 13707 }, { "epoch": 0.739733419675139, "grad_norm": 1.0631152902708518, "learning_rate": 2.422286360474881e-06, "loss": 0.4676, "step": 13708 }, { "epoch": 0.7397873833036533, "grad_norm": 1.0740771947266015, "learning_rate": 2.42172984259956e-06, "loss": 0.4305, "step": 13709 }, { "epoch": 0.7398413469321677, "grad_norm": 1.366298892919677, "learning_rate": 2.4211734131968333e-06, "loss": 0.4217, "step": 13710 }, { "epoch": 0.7398953105606821, "grad_norm": 0.8213648237364805, "learning_rate": 2.4206170722826937e-06, "loss": 0.4172, "step": 13711 }, { "epoch": 0.7399492741891964, "grad_norm": 0.8316913464395386, "learning_rate": 2.420060819873132e-06, "loss": 0.4531, "step": 13712 }, { "epoch": 0.7400032378177108, "grad_norm": 1.3046650194159426, "learning_rate": 2.4195046559841328e-06, "loss": 0.5476, "step": 13713 }, { "epoch": 0.7400572014462252, "grad_norm": 0.9939936224287365, "learning_rate": 2.418948580631683e-06, "loss": 0.453, "step": 13714 }, { "epoch": 0.7401111650747396, "grad_norm": 1.0473066879246253, "learning_rate": 2.4183925938317636e-06, "loss": 0.4768, "step": 13715 }, { "epoch": 0.740165128703254, "grad_norm": 1.101800801714389, "learning_rate": 2.4178366956003555e-06, "loss": 0.5632, "step": 13716 }, { "epoch": 0.7402190923317684, "grad_norm": 1.151336232101916, "learning_rate": 2.417280885953435e-06, "loss": 0.5397, "step": 13717 }, { "epoch": 0.7402730559602828, "grad_norm": 1.1003132418092016, "learning_rate": 2.416725164906976e-06, "loss": 0.5815, "step": 13718 }, { "epoch": 0.7403270195887971, "grad_norm": 1.1196128294876557, "learning_rate": 2.416169532476953e-06, "loss": 0.4559, "step": 13719 }, { "epoch": 0.7403809832173115, "grad_norm": 1.2847161137848364, "learning_rate": 2.415613988679332e-06, "loss": 0.5314, "step": 13720 }, { "epoch": 0.7404349468458259, "grad_norm": 0.905725827509395, "learning_rate": 2.4150585335300823e-06, "loss": 0.3688, "step": 13721 }, { "epoch": 0.7404889104743403, "grad_norm": 0.9331445897672912, "learning_rate": 2.4145031670451664e-06, "loss": 0.418, "step": 13722 }, { "epoch": 0.7405428741028547, "grad_norm": 0.9678438992702885, "learning_rate": 2.413947889240548e-06, "loss": 0.3283, "step": 13723 }, { "epoch": 0.7405968377313691, "grad_norm": 0.9668414890025637, "learning_rate": 2.4133927001321844e-06, "loss": 0.3424, "step": 13724 }, { "epoch": 0.7406508013598835, "grad_norm": 1.1793042044889026, "learning_rate": 2.4128375997360345e-06, "loss": 0.6142, "step": 13725 }, { "epoch": 0.7407047649883978, "grad_norm": 1.0491357141544038, "learning_rate": 2.4122825880680524e-06, "loss": 0.6195, "step": 13726 }, { "epoch": 0.7407587286169122, "grad_norm": 1.0182503156057756, "learning_rate": 2.411727665144187e-06, "loss": 0.4819, "step": 13727 }, { "epoch": 0.7408126922454266, "grad_norm": 0.8484597813744122, "learning_rate": 2.411172830980389e-06, "loss": 0.3027, "step": 13728 }, { "epoch": 0.740866655873941, "grad_norm": 0.8811491422541216, "learning_rate": 2.4106180855926047e-06, "loss": 0.5434, "step": 13729 }, { "epoch": 0.7409206195024554, "grad_norm": 1.1984682472476476, "learning_rate": 2.410063428996779e-06, "loss": 0.4817, "step": 13730 }, { "epoch": 0.7409745831309698, "grad_norm": 0.9503037022430559, "learning_rate": 2.409508861208852e-06, "loss": 0.4125, "step": 13731 }, { "epoch": 0.7410285467594842, "grad_norm": 1.0514323190152437, "learning_rate": 2.4089543822447625e-06, "loss": 0.5191, "step": 13732 }, { "epoch": 0.7410825103879984, "grad_norm": 0.9450518949259739, "learning_rate": 2.4083999921204494e-06, "loss": 0.3329, "step": 13733 }, { "epoch": 0.7411364740165128, "grad_norm": 1.1163883584995937, "learning_rate": 2.407845690851843e-06, "loss": 0.3758, "step": 13734 }, { "epoch": 0.7411904376450272, "grad_norm": 1.1852532065061756, "learning_rate": 2.4072914784548758e-06, "loss": 0.5194, "step": 13735 }, { "epoch": 0.7412444012735416, "grad_norm": 1.008302472986563, "learning_rate": 2.4067373549454763e-06, "loss": 0.4232, "step": 13736 }, { "epoch": 0.741298364902056, "grad_norm": 0.9919007032138214, "learning_rate": 2.406183320339571e-06, "loss": 0.5281, "step": 13737 }, { "epoch": 0.7413523285305704, "grad_norm": 1.0848160318087539, "learning_rate": 2.4056293746530827e-06, "loss": 0.4149, "step": 13738 }, { "epoch": 0.7414062921590848, "grad_norm": 1.2642470827812446, "learning_rate": 2.405075517901933e-06, "loss": 0.6762, "step": 13739 }, { "epoch": 0.7414602557875991, "grad_norm": 0.8860661897964235, "learning_rate": 2.404521750102041e-06, "loss": 0.375, "step": 13740 }, { "epoch": 0.7415142194161135, "grad_norm": 1.071061933988195, "learning_rate": 2.403968071269321e-06, "loss": 0.5575, "step": 13741 }, { "epoch": 0.7415681830446279, "grad_norm": 0.8810004739765295, "learning_rate": 2.403414481419687e-06, "loss": 0.4363, "step": 13742 }, { "epoch": 0.7416221466731423, "grad_norm": 0.8208026749574069, "learning_rate": 2.4028609805690497e-06, "loss": 0.4042, "step": 13743 }, { "epoch": 0.7416761103016567, "grad_norm": 1.0613778563980258, "learning_rate": 2.4023075687333164e-06, "loss": 0.5204, "step": 13744 }, { "epoch": 0.7417300739301711, "grad_norm": 1.103517846305272, "learning_rate": 2.4017542459283942e-06, "loss": 0.5989, "step": 13745 }, { "epoch": 0.7417840375586855, "grad_norm": 1.0642533134106034, "learning_rate": 2.4012010121701853e-06, "loss": 0.4206, "step": 13746 }, { "epoch": 0.7418380011871998, "grad_norm": 1.0412196996176735, "learning_rate": 2.400647867474592e-06, "loss": 0.4719, "step": 13747 }, { "epoch": 0.7418919648157142, "grad_norm": 1.032660266180819, "learning_rate": 2.4000948118575095e-06, "loss": 0.4689, "step": 13748 }, { "epoch": 0.7419459284442286, "grad_norm": 0.912428024473744, "learning_rate": 2.399541845334834e-06, "loss": 0.3993, "step": 13749 }, { "epoch": 0.741999892072743, "grad_norm": 1.1320335935144874, "learning_rate": 2.398988967922459e-06, "loss": 0.4421, "step": 13750 }, { "epoch": 0.7420538557012574, "grad_norm": 1.0017586660319573, "learning_rate": 2.3984361796362736e-06, "loss": 0.3701, "step": 13751 }, { "epoch": 0.7421078193297718, "grad_norm": 1.080069741561656, "learning_rate": 2.397883480492167e-06, "loss": 0.4945, "step": 13752 }, { "epoch": 0.7421617829582862, "grad_norm": 1.1308331905670144, "learning_rate": 2.397330870506023e-06, "loss": 0.3845, "step": 13753 }, { "epoch": 0.7422157465868005, "grad_norm": 1.2991499534930335, "learning_rate": 2.3967783496937257e-06, "loss": 0.5128, "step": 13754 }, { "epoch": 0.7422697102153148, "grad_norm": 1.0622439811957038, "learning_rate": 2.3962259180711535e-06, "loss": 0.4933, "step": 13755 }, { "epoch": 0.7423236738438292, "grad_norm": 0.9279697015938064, "learning_rate": 2.395673575654186e-06, "loss": 0.3394, "step": 13756 }, { "epoch": 0.7423776374723436, "grad_norm": 1.152018746229512, "learning_rate": 2.3951213224586952e-06, "loss": 0.5376, "step": 13757 }, { "epoch": 0.742431601100858, "grad_norm": 1.109857964289796, "learning_rate": 2.394569158500556e-06, "loss": 0.4527, "step": 13758 }, { "epoch": 0.7424855647293724, "grad_norm": 1.0246520040970153, "learning_rate": 2.3940170837956355e-06, "loss": 0.5165, "step": 13759 }, { "epoch": 0.7425395283578868, "grad_norm": 0.9894776757505382, "learning_rate": 2.393465098359803e-06, "loss": 0.4319, "step": 13760 }, { "epoch": 0.7425934919864011, "grad_norm": 0.9762314606003716, "learning_rate": 2.3929132022089226e-06, "loss": 0.3804, "step": 13761 }, { "epoch": 0.7426474556149155, "grad_norm": 1.012694306685135, "learning_rate": 2.3923613953588557e-06, "loss": 0.5295, "step": 13762 }, { "epoch": 0.7427014192434299, "grad_norm": 0.9671423106023049, "learning_rate": 2.391809677825463e-06, "loss": 0.4376, "step": 13763 }, { "epoch": 0.7427553828719443, "grad_norm": 0.9361471538045009, "learning_rate": 2.3912580496246005e-06, "loss": 0.3615, "step": 13764 }, { "epoch": 0.7428093465004587, "grad_norm": 0.9060281298727937, "learning_rate": 2.390706510772122e-06, "loss": 0.3711, "step": 13765 }, { "epoch": 0.7428633101289731, "grad_norm": 0.9913451955992864, "learning_rate": 2.3901550612838813e-06, "loss": 0.4483, "step": 13766 }, { "epoch": 0.7429172737574875, "grad_norm": 1.055559688541436, "learning_rate": 2.389603701175726e-06, "loss": 0.5486, "step": 13767 }, { "epoch": 0.7429712373860018, "grad_norm": 0.9993980484579671, "learning_rate": 2.3890524304635046e-06, "loss": 0.5141, "step": 13768 }, { "epoch": 0.7430252010145162, "grad_norm": 1.1904213081696895, "learning_rate": 2.3885012491630584e-06, "loss": 0.3744, "step": 13769 }, { "epoch": 0.7430791646430306, "grad_norm": 0.9682592631167551, "learning_rate": 2.3879501572902297e-06, "loss": 0.4878, "step": 13770 }, { "epoch": 0.743133128271545, "grad_norm": 1.015666374152435, "learning_rate": 2.3873991548608585e-06, "loss": 0.4673, "step": 13771 }, { "epoch": 0.7431870919000594, "grad_norm": 1.036437490523783, "learning_rate": 2.3868482418907802e-06, "loss": 0.4024, "step": 13772 }, { "epoch": 0.7432410555285738, "grad_norm": 0.9935265710147462, "learning_rate": 2.3862974183958296e-06, "loss": 0.4879, "step": 13773 }, { "epoch": 0.7432950191570882, "grad_norm": 1.1326060695396332, "learning_rate": 2.3857466843918373e-06, "loss": 0.4733, "step": 13774 }, { "epoch": 0.7433489827856025, "grad_norm": 1.310265642735977, "learning_rate": 2.385196039894633e-06, "loss": 0.628, "step": 13775 }, { "epoch": 0.7434029464141169, "grad_norm": 0.8669568076758674, "learning_rate": 2.3846454849200407e-06, "loss": 0.4615, "step": 13776 }, { "epoch": 0.7434569100426313, "grad_norm": 1.0805997239011107, "learning_rate": 2.384095019483885e-06, "loss": 0.5001, "step": 13777 }, { "epoch": 0.7435108736711457, "grad_norm": 1.096603561482585, "learning_rate": 2.3835446436019867e-06, "loss": 0.5918, "step": 13778 }, { "epoch": 0.74356483729966, "grad_norm": 1.2216267516814752, "learning_rate": 2.3829943572901647e-06, "loss": 0.5178, "step": 13779 }, { "epoch": 0.7436188009281745, "grad_norm": 1.1274079123242335, "learning_rate": 2.3824441605642336e-06, "loss": 0.4917, "step": 13780 }, { "epoch": 0.7436727645566887, "grad_norm": 1.0894475553323304, "learning_rate": 2.3818940534400094e-06, "loss": 0.4137, "step": 13781 }, { "epoch": 0.7437267281852031, "grad_norm": 1.5699155312577897, "learning_rate": 2.381344035933299e-06, "loss": 0.6076, "step": 13782 }, { "epoch": 0.7437806918137175, "grad_norm": 0.8814909667958198, "learning_rate": 2.3807941080599127e-06, "loss": 0.4378, "step": 13783 }, { "epoch": 0.7438346554422319, "grad_norm": 1.2751995379648908, "learning_rate": 2.380244269835655e-06, "loss": 0.5956, "step": 13784 }, { "epoch": 0.7438886190707463, "grad_norm": 1.036647707405406, "learning_rate": 2.379694521276329e-06, "loss": 0.5286, "step": 13785 }, { "epoch": 0.7439425826992607, "grad_norm": 0.9365790629900254, "learning_rate": 2.3791448623977355e-06, "loss": 0.5007, "step": 13786 }, { "epoch": 0.7439965463277751, "grad_norm": 0.894772996811788, "learning_rate": 2.378595293215672e-06, "loss": 0.3147, "step": 13787 }, { "epoch": 0.7440505099562894, "grad_norm": 1.053407350631881, "learning_rate": 2.3780458137459357e-06, "loss": 0.531, "step": 13788 }, { "epoch": 0.7441044735848038, "grad_norm": 1.0304153325450238, "learning_rate": 2.377496424004315e-06, "loss": 0.4198, "step": 13789 }, { "epoch": 0.7441584372133182, "grad_norm": 0.9101456239384091, "learning_rate": 2.376947124006603e-06, "loss": 0.3002, "step": 13790 }, { "epoch": 0.7442124008418326, "grad_norm": 1.1174036635410245, "learning_rate": 2.376397913768586e-06, "loss": 0.4323, "step": 13791 }, { "epoch": 0.744266364470347, "grad_norm": 1.1049241632795346, "learning_rate": 2.3758487933060487e-06, "loss": 0.4443, "step": 13792 }, { "epoch": 0.7443203280988614, "grad_norm": 1.2015833386334946, "learning_rate": 2.375299762634774e-06, "loss": 0.4555, "step": 13793 }, { "epoch": 0.7443742917273758, "grad_norm": 0.8594089736446202, "learning_rate": 2.3747508217705416e-06, "loss": 0.4406, "step": 13794 }, { "epoch": 0.7444282553558901, "grad_norm": 0.8742014033470046, "learning_rate": 2.3742019707291296e-06, "loss": 0.3099, "step": 13795 }, { "epoch": 0.7444822189844045, "grad_norm": 1.060963690080123, "learning_rate": 2.3736532095263105e-06, "loss": 0.4565, "step": 13796 }, { "epoch": 0.7445361826129189, "grad_norm": 0.8320523048708396, "learning_rate": 2.3731045381778557e-06, "loss": 0.3359, "step": 13797 }, { "epoch": 0.7445901462414333, "grad_norm": 0.8573467769601333, "learning_rate": 2.3725559566995373e-06, "loss": 0.4066, "step": 13798 }, { "epoch": 0.7446441098699477, "grad_norm": 1.0673434379914584, "learning_rate": 2.3720074651071202e-06, "loss": 0.37, "step": 13799 }, { "epoch": 0.7446980734984621, "grad_norm": 1.2937021763798593, "learning_rate": 2.3714590634163696e-06, "loss": 0.5532, "step": 13800 }, { "epoch": 0.7447520371269765, "grad_norm": 1.103289798143979, "learning_rate": 2.3709107516430464e-06, "loss": 0.4403, "step": 13801 }, { "epoch": 0.7448060007554907, "grad_norm": 1.1704785439797945, "learning_rate": 2.370362529802911e-06, "loss": 0.503, "step": 13802 }, { "epoch": 0.7448599643840051, "grad_norm": 1.038288956407734, "learning_rate": 2.3698143979117177e-06, "loss": 0.5855, "step": 13803 }, { "epoch": 0.7449139280125195, "grad_norm": 0.8857637613251428, "learning_rate": 2.3692663559852212e-06, "loss": 0.3766, "step": 13804 }, { "epoch": 0.7449678916410339, "grad_norm": 1.1413475335896897, "learning_rate": 2.3687184040391737e-06, "loss": 0.5588, "step": 13805 }, { "epoch": 0.7450218552695483, "grad_norm": 1.04276318580816, "learning_rate": 2.3681705420893227e-06, "loss": 0.4103, "step": 13806 }, { "epoch": 0.7450758188980627, "grad_norm": 1.2485561003632042, "learning_rate": 2.367622770151415e-06, "loss": 0.5486, "step": 13807 }, { "epoch": 0.7451297825265771, "grad_norm": 1.1290160104078468, "learning_rate": 2.3670750882411935e-06, "loss": 0.4975, "step": 13808 }, { "epoch": 0.7451837461550914, "grad_norm": 0.8360162742896027, "learning_rate": 2.3665274963743995e-06, "loss": 0.277, "step": 13809 }, { "epoch": 0.7452377097836058, "grad_norm": 0.8628724662214751, "learning_rate": 2.3659799945667717e-06, "loss": 0.3399, "step": 13810 }, { "epoch": 0.7452916734121202, "grad_norm": 0.8793707646838839, "learning_rate": 2.365432582834045e-06, "loss": 0.357, "step": 13811 }, { "epoch": 0.7453456370406346, "grad_norm": 0.9861971324201783, "learning_rate": 2.3648852611919544e-06, "loss": 0.4494, "step": 13812 }, { "epoch": 0.745399600669149, "grad_norm": 1.2487522244358646, "learning_rate": 2.3643380296562284e-06, "loss": 0.7373, "step": 13813 }, { "epoch": 0.7454535642976634, "grad_norm": 0.8530605760902752, "learning_rate": 2.3637908882425954e-06, "loss": 0.3561, "step": 13814 }, { "epoch": 0.7455075279261778, "grad_norm": 0.8584480902636806, "learning_rate": 2.3632438369667817e-06, "loss": 0.3312, "step": 13815 }, { "epoch": 0.7455614915546921, "grad_norm": 1.022913835148295, "learning_rate": 2.362696875844511e-06, "loss": 0.4936, "step": 13816 }, { "epoch": 0.7456154551832065, "grad_norm": 0.91513934472465, "learning_rate": 2.3621500048915004e-06, "loss": 0.4475, "step": 13817 }, { "epoch": 0.7456694188117209, "grad_norm": 1.073321145457848, "learning_rate": 2.3616032241234697e-06, "loss": 0.5151, "step": 13818 }, { "epoch": 0.7457233824402353, "grad_norm": 1.067086769307415, "learning_rate": 2.361056533556133e-06, "loss": 0.5296, "step": 13819 }, { "epoch": 0.7457773460687497, "grad_norm": 1.0799638784072596, "learning_rate": 2.360509933205203e-06, "loss": 0.4912, "step": 13820 }, { "epoch": 0.7458313096972641, "grad_norm": 1.0849559030965712, "learning_rate": 2.359963423086391e-06, "loss": 0.4463, "step": 13821 }, { "epoch": 0.7458852733257785, "grad_norm": 1.0863929223532487, "learning_rate": 2.359417003215402e-06, "loss": 0.4017, "step": 13822 }, { "epoch": 0.7459392369542928, "grad_norm": 0.9950650456064172, "learning_rate": 2.3588706736079424e-06, "loss": 0.4828, "step": 13823 }, { "epoch": 0.7459932005828072, "grad_norm": 0.7921325208881091, "learning_rate": 2.358324434279713e-06, "loss": 0.3795, "step": 13824 }, { "epoch": 0.7460471642113216, "grad_norm": 0.9676722532986726, "learning_rate": 2.357778285246414e-06, "loss": 0.4836, "step": 13825 }, { "epoch": 0.746101127839836, "grad_norm": 0.9558544113196051, "learning_rate": 2.357232226523742e-06, "loss": 0.4746, "step": 13826 }, { "epoch": 0.7461550914683504, "grad_norm": 1.0816936781493736, "learning_rate": 2.3566862581273905e-06, "loss": 0.5095, "step": 13827 }, { "epoch": 0.7462090550968647, "grad_norm": 1.0018724730405548, "learning_rate": 2.356140380073053e-06, "loss": 0.4796, "step": 13828 }, { "epoch": 0.7462630187253791, "grad_norm": 0.9495529639720932, "learning_rate": 2.3555945923764166e-06, "loss": 0.4459, "step": 13829 }, { "epoch": 0.7463169823538934, "grad_norm": 1.0916657440131894, "learning_rate": 2.355048895053171e-06, "loss": 0.4417, "step": 13830 }, { "epoch": 0.7463709459824078, "grad_norm": 0.9756424395715568, "learning_rate": 2.354503288118997e-06, "loss": 0.5197, "step": 13831 }, { "epoch": 0.7464249096109222, "grad_norm": 0.946673421669155, "learning_rate": 2.353957771589576e-06, "loss": 0.3948, "step": 13832 }, { "epoch": 0.7464788732394366, "grad_norm": 1.1069637139217243, "learning_rate": 2.353412345480587e-06, "loss": 0.5882, "step": 13833 }, { "epoch": 0.746532836867951, "grad_norm": 1.0743254544185008, "learning_rate": 2.3528670098077074e-06, "loss": 0.517, "step": 13834 }, { "epoch": 0.7465868004964654, "grad_norm": 1.0178847687459385, "learning_rate": 2.3523217645866094e-06, "loss": 0.4745, "step": 13835 }, { "epoch": 0.7466407641249798, "grad_norm": 0.854359428661137, "learning_rate": 2.351776609832965e-06, "loss": 0.3678, "step": 13836 }, { "epoch": 0.7466947277534941, "grad_norm": 0.9761637169520345, "learning_rate": 2.3512315455624426e-06, "loss": 0.532, "step": 13837 }, { "epoch": 0.7467486913820085, "grad_norm": 0.954597902077252, "learning_rate": 2.3506865717907064e-06, "loss": 0.4454, "step": 13838 }, { "epoch": 0.7468026550105229, "grad_norm": 1.122086390046103, "learning_rate": 2.35014168853342e-06, "loss": 0.5337, "step": 13839 }, { "epoch": 0.7468566186390373, "grad_norm": 1.091415959701526, "learning_rate": 2.349596895806244e-06, "loss": 0.4682, "step": 13840 }, { "epoch": 0.7469105822675517, "grad_norm": 0.9600569014067727, "learning_rate": 2.3490521936248365e-06, "loss": 0.4434, "step": 13841 }, { "epoch": 0.7469645458960661, "grad_norm": 0.9319223279167853, "learning_rate": 2.348507582004853e-06, "loss": 0.4092, "step": 13842 }, { "epoch": 0.7470185095245805, "grad_norm": 1.1145067796071018, "learning_rate": 2.347963060961946e-06, "loss": 0.4092, "step": 13843 }, { "epoch": 0.7470724731530948, "grad_norm": 1.3630301926926642, "learning_rate": 2.347418630511767e-06, "loss": 0.5381, "step": 13844 }, { "epoch": 0.7471264367816092, "grad_norm": 0.8660056737015015, "learning_rate": 2.34687429066996e-06, "loss": 0.4007, "step": 13845 }, { "epoch": 0.7471804004101236, "grad_norm": 0.9374074367344646, "learning_rate": 2.3463300414521723e-06, "loss": 0.4047, "step": 13846 }, { "epoch": 0.747234364038638, "grad_norm": 1.2693323402838448, "learning_rate": 2.3457858828740456e-06, "loss": 0.5171, "step": 13847 }, { "epoch": 0.7472883276671524, "grad_norm": 1.0372571018608676, "learning_rate": 2.34524181495122e-06, "loss": 0.4351, "step": 13848 }, { "epoch": 0.7473422912956668, "grad_norm": 0.9907971465472057, "learning_rate": 2.344697837699332e-06, "loss": 0.4426, "step": 13849 }, { "epoch": 0.747396254924181, "grad_norm": 1.0399103208492853, "learning_rate": 2.3441539511340183e-06, "loss": 0.4998, "step": 13850 }, { "epoch": 0.7474502185526954, "grad_norm": 0.8835383897600424, "learning_rate": 2.3436101552709065e-06, "loss": 0.3218, "step": 13851 }, { "epoch": 0.7475041821812098, "grad_norm": 1.0836387793865145, "learning_rate": 2.3430664501256287e-06, "loss": 0.5031, "step": 13852 }, { "epoch": 0.7475581458097242, "grad_norm": 1.166993759618592, "learning_rate": 2.3425228357138107e-06, "loss": 0.3687, "step": 13853 }, { "epoch": 0.7476121094382386, "grad_norm": 1.1643009362728767, "learning_rate": 2.341979312051077e-06, "loss": 0.5458, "step": 13854 }, { "epoch": 0.747666073066753, "grad_norm": 0.9661020292141071, "learning_rate": 2.3414358791530487e-06, "loss": 0.3801, "step": 13855 }, { "epoch": 0.7477200366952674, "grad_norm": 0.7767369554945415, "learning_rate": 2.340892537035345e-06, "loss": 0.3052, "step": 13856 }, { "epoch": 0.7477740003237817, "grad_norm": 1.0478652922912999, "learning_rate": 2.340349285713582e-06, "loss": 0.4699, "step": 13857 }, { "epoch": 0.7478279639522961, "grad_norm": 0.8708834059660753, "learning_rate": 2.3398061252033723e-06, "loss": 0.3708, "step": 13858 }, { "epoch": 0.7478819275808105, "grad_norm": 1.1082664917936471, "learning_rate": 2.3392630555203282e-06, "loss": 0.5985, "step": 13859 }, { "epoch": 0.7479358912093249, "grad_norm": 0.9559940790709079, "learning_rate": 2.3387200766800575e-06, "loss": 0.5273, "step": 13860 }, { "epoch": 0.7479898548378393, "grad_norm": 0.8248772637152212, "learning_rate": 2.338177188698168e-06, "loss": 0.5071, "step": 13861 }, { "epoch": 0.7480438184663537, "grad_norm": 1.035049290168375, "learning_rate": 2.337634391590258e-06, "loss": 0.6254, "step": 13862 }, { "epoch": 0.7480977820948681, "grad_norm": 0.877827136765907, "learning_rate": 2.3370916853719324e-06, "loss": 0.3536, "step": 13863 }, { "epoch": 0.7481517457233824, "grad_norm": 0.9934921072485104, "learning_rate": 2.3365490700587874e-06, "loss": 0.5967, "step": 13864 }, { "epoch": 0.7482057093518968, "grad_norm": 1.0411642703379984, "learning_rate": 2.336006545666418e-06, "loss": 0.518, "step": 13865 }, { "epoch": 0.7482596729804112, "grad_norm": 0.9130278927698046, "learning_rate": 2.335464112210418e-06, "loss": 0.3416, "step": 13866 }, { "epoch": 0.7483136366089256, "grad_norm": 0.9027868798275885, "learning_rate": 2.3349217697063767e-06, "loss": 0.3194, "step": 13867 }, { "epoch": 0.74836760023744, "grad_norm": 1.077666019839338, "learning_rate": 2.334379518169882e-06, "loss": 0.5004, "step": 13868 }, { "epoch": 0.7484215638659544, "grad_norm": 1.0968449613076456, "learning_rate": 2.333837357616518e-06, "loss": 0.6516, "step": 13869 }, { "epoch": 0.7484755274944688, "grad_norm": 1.1021636979794451, "learning_rate": 2.3332952880618677e-06, "loss": 0.7228, "step": 13870 }, { "epoch": 0.7485294911229831, "grad_norm": 0.9240080669251083, "learning_rate": 2.332753309521512e-06, "loss": 0.4977, "step": 13871 }, { "epoch": 0.7485834547514975, "grad_norm": 1.0128425219380541, "learning_rate": 2.332211422011026e-06, "loss": 0.3663, "step": 13872 }, { "epoch": 0.7486374183800119, "grad_norm": 1.0126395479367398, "learning_rate": 2.3316696255459838e-06, "loss": 0.5, "step": 13873 }, { "epoch": 0.7486913820085263, "grad_norm": 1.1642802066840596, "learning_rate": 2.3311279201419577e-06, "loss": 0.6003, "step": 13874 }, { "epoch": 0.7487453456370406, "grad_norm": 1.0172385822036754, "learning_rate": 2.3305863058145174e-06, "loss": 0.3729, "step": 13875 }, { "epoch": 0.748799309265555, "grad_norm": 0.7739083816895926, "learning_rate": 2.330044782579229e-06, "loss": 0.2766, "step": 13876 }, { "epoch": 0.7488532728940694, "grad_norm": 0.9493346939129942, "learning_rate": 2.329503350451656e-06, "loss": 0.3901, "step": 13877 }, { "epoch": 0.7489072365225837, "grad_norm": 0.9022201748842198, "learning_rate": 2.3289620094473624e-06, "loss": 0.3594, "step": 13878 }, { "epoch": 0.7489612001510981, "grad_norm": 0.9434494011742915, "learning_rate": 2.3284207595819028e-06, "loss": 0.3685, "step": 13879 }, { "epoch": 0.7490151637796125, "grad_norm": 0.9806707446549446, "learning_rate": 2.3278796008708357e-06, "loss": 0.3912, "step": 13880 }, { "epoch": 0.7490691274081269, "grad_norm": 0.7843899307359191, "learning_rate": 2.327338533329714e-06, "loss": 0.2971, "step": 13881 }, { "epoch": 0.7491230910366413, "grad_norm": 1.2347059656236767, "learning_rate": 2.326797556974089e-06, "loss": 0.4525, "step": 13882 }, { "epoch": 0.7491770546651557, "grad_norm": 1.03413736900149, "learning_rate": 2.3262566718195075e-06, "loss": 0.5572, "step": 13883 }, { "epoch": 0.7492310182936701, "grad_norm": 0.8866682133445495, "learning_rate": 2.325715877881517e-06, "loss": 0.444, "step": 13884 }, { "epoch": 0.7492849819221844, "grad_norm": 1.0034744194851308, "learning_rate": 2.3251751751756603e-06, "loss": 0.5889, "step": 13885 }, { "epoch": 0.7493389455506988, "grad_norm": 1.014114210736665, "learning_rate": 2.324634563717476e-06, "loss": 0.4455, "step": 13886 }, { "epoch": 0.7493929091792132, "grad_norm": 1.2251327276293535, "learning_rate": 2.3240940435225024e-06, "loss": 0.6765, "step": 13887 }, { "epoch": 0.7494468728077276, "grad_norm": 1.0085081107874403, "learning_rate": 2.3235536146062754e-06, "loss": 0.4723, "step": 13888 }, { "epoch": 0.749500836436242, "grad_norm": 0.9809147178811293, "learning_rate": 2.3230132769843267e-06, "loss": 0.4686, "step": 13889 }, { "epoch": 0.7495548000647564, "grad_norm": 1.0375204586339257, "learning_rate": 2.322473030672187e-06, "loss": 0.4763, "step": 13890 }, { "epoch": 0.7496087636932708, "grad_norm": 0.9002632225183947, "learning_rate": 2.321932875685383e-06, "loss": 0.4673, "step": 13891 }, { "epoch": 0.7496627273217851, "grad_norm": 0.7864064003814552, "learning_rate": 2.321392812039441e-06, "loss": 0.3506, "step": 13892 }, { "epoch": 0.7497166909502995, "grad_norm": 1.063057447801405, "learning_rate": 2.3208528397498793e-06, "loss": 0.4692, "step": 13893 }, { "epoch": 0.7497706545788139, "grad_norm": 1.0867849989947265, "learning_rate": 2.3203129588322194e-06, "loss": 0.5268, "step": 13894 }, { "epoch": 0.7498246182073283, "grad_norm": 0.7389927052721725, "learning_rate": 2.319773169301978e-06, "loss": 0.3311, "step": 13895 }, { "epoch": 0.7498785818358427, "grad_norm": 1.0303906003743488, "learning_rate": 2.319233471174669e-06, "loss": 0.4148, "step": 13896 }, { "epoch": 0.7499325454643571, "grad_norm": 0.7769908444070815, "learning_rate": 2.3186938644658043e-06, "loss": 0.3539, "step": 13897 }, { "epoch": 0.7499865090928715, "grad_norm": 1.3711606945478032, "learning_rate": 2.318154349190892e-06, "loss": 0.6757, "step": 13898 }, { "epoch": 0.7500404727213857, "grad_norm": 1.2609983664713675, "learning_rate": 2.317614925365441e-06, "loss": 0.4842, "step": 13899 }, { "epoch": 0.7500944363499001, "grad_norm": 0.941628279472951, "learning_rate": 2.3170755930049497e-06, "loss": 0.4943, "step": 13900 }, { "epoch": 0.7501483999784145, "grad_norm": 0.8392853439945963, "learning_rate": 2.3165363521249233e-06, "loss": 0.3131, "step": 13901 }, { "epoch": 0.7502023636069289, "grad_norm": 1.059485293529084, "learning_rate": 2.3159972027408584e-06, "loss": 0.5012, "step": 13902 }, { "epoch": 0.7502563272354433, "grad_norm": 0.9626318778083127, "learning_rate": 2.315458144868251e-06, "loss": 0.5125, "step": 13903 }, { "epoch": 0.7503102908639577, "grad_norm": 0.9336132967557704, "learning_rate": 2.314919178522594e-06, "loss": 0.4478, "step": 13904 }, { "epoch": 0.7503642544924721, "grad_norm": 1.1196736041258337, "learning_rate": 2.3143803037193783e-06, "loss": 0.5109, "step": 13905 }, { "epoch": 0.7504182181209864, "grad_norm": 1.3531374178677031, "learning_rate": 2.313841520474093e-06, "loss": 0.4355, "step": 13906 }, { "epoch": 0.7504721817495008, "grad_norm": 1.060131367256263, "learning_rate": 2.3133028288022203e-06, "loss": 0.4916, "step": 13907 }, { "epoch": 0.7505261453780152, "grad_norm": 0.9513431821987536, "learning_rate": 2.3127642287192444e-06, "loss": 0.4265, "step": 13908 }, { "epoch": 0.7505801090065296, "grad_norm": 1.1389279373359416, "learning_rate": 2.3122257202406444e-06, "loss": 0.4818, "step": 13909 }, { "epoch": 0.750634072635044, "grad_norm": 0.9989919462580521, "learning_rate": 2.311687303381899e-06, "loss": 0.383, "step": 13910 }, { "epoch": 0.7506880362635584, "grad_norm": 1.079402584020198, "learning_rate": 2.311148978158482e-06, "loss": 0.4942, "step": 13911 }, { "epoch": 0.7507419998920728, "grad_norm": 1.1041052828361313, "learning_rate": 2.3106107445858647e-06, "loss": 0.449, "step": 13912 }, { "epoch": 0.7507959635205871, "grad_norm": 0.9842580119143084, "learning_rate": 2.3100726026795183e-06, "loss": 0.5134, "step": 13913 }, { "epoch": 0.7508499271491015, "grad_norm": 0.9450788564796143, "learning_rate": 2.3095345524549076e-06, "loss": 0.3966, "step": 13914 }, { "epoch": 0.7509038907776159, "grad_norm": 0.8458323762100993, "learning_rate": 2.308996593927498e-06, "loss": 0.2922, "step": 13915 }, { "epoch": 0.7509578544061303, "grad_norm": 0.9558156328933171, "learning_rate": 2.3084587271127506e-06, "loss": 0.409, "step": 13916 }, { "epoch": 0.7510118180346447, "grad_norm": 1.0482624850213207, "learning_rate": 2.307920952026124e-06, "loss": 0.5502, "step": 13917 }, { "epoch": 0.7510657816631591, "grad_norm": 1.0425250159021064, "learning_rate": 2.3073832686830745e-06, "loss": 0.392, "step": 13918 }, { "epoch": 0.7511197452916734, "grad_norm": 0.9988277249060221, "learning_rate": 2.3068456770990576e-06, "loss": 0.4381, "step": 13919 }, { "epoch": 0.7511737089201878, "grad_norm": 1.153643635431112, "learning_rate": 2.3063081772895203e-06, "loss": 0.4842, "step": 13920 }, { "epoch": 0.7512276725487022, "grad_norm": 0.9670721308238392, "learning_rate": 2.305770769269914e-06, "loss": 0.3651, "step": 13921 }, { "epoch": 0.7512816361772165, "grad_norm": 1.0155191184004237, "learning_rate": 2.3052334530556823e-06, "loss": 0.4204, "step": 13922 }, { "epoch": 0.751335599805731, "grad_norm": 1.053197898745572, "learning_rate": 2.3046962286622697e-06, "loss": 0.4481, "step": 13923 }, { "epoch": 0.7513895634342453, "grad_norm": 1.059930182915908, "learning_rate": 2.3041590961051165e-06, "loss": 0.4725, "step": 13924 }, { "epoch": 0.7514435270627597, "grad_norm": 1.0728715593452358, "learning_rate": 2.3036220553996593e-06, "loss": 0.6513, "step": 13925 }, { "epoch": 0.751497490691274, "grad_norm": 1.0641860683521378, "learning_rate": 2.303085106561336e-06, "loss": 0.472, "step": 13926 }, { "epoch": 0.7515514543197884, "grad_norm": 1.0284422588963145, "learning_rate": 2.302548249605575e-06, "loss": 0.4558, "step": 13927 }, { "epoch": 0.7516054179483028, "grad_norm": 0.9302938241294749, "learning_rate": 2.3020114845478087e-06, "loss": 0.4778, "step": 13928 }, { "epoch": 0.7516593815768172, "grad_norm": 0.8223284883197628, "learning_rate": 2.301474811403463e-06, "loss": 0.4057, "step": 13929 }, { "epoch": 0.7517133452053316, "grad_norm": 0.9859580312894238, "learning_rate": 2.3009382301879636e-06, "loss": 0.5278, "step": 13930 }, { "epoch": 0.751767308833846, "grad_norm": 0.9645528171438593, "learning_rate": 2.3004017409167315e-06, "loss": 0.4694, "step": 13931 }, { "epoch": 0.7518212724623604, "grad_norm": 0.9750089893922079, "learning_rate": 2.2998653436051864e-06, "loss": 0.4163, "step": 13932 }, { "epoch": 0.7518752360908747, "grad_norm": 1.0411375184947833, "learning_rate": 2.299329038268746e-06, "loss": 0.5314, "step": 13933 }, { "epoch": 0.7519291997193891, "grad_norm": 1.1383386471466754, "learning_rate": 2.2987928249228226e-06, "loss": 0.6169, "step": 13934 }, { "epoch": 0.7519831633479035, "grad_norm": 0.9546003616574823, "learning_rate": 2.298256703582827e-06, "loss": 0.4001, "step": 13935 }, { "epoch": 0.7520371269764179, "grad_norm": 0.8354379060924966, "learning_rate": 2.297720674264169e-06, "loss": 0.3944, "step": 13936 }, { "epoch": 0.7520910906049323, "grad_norm": 0.8688536682157534, "learning_rate": 2.297184736982255e-06, "loss": 0.4041, "step": 13937 }, { "epoch": 0.7521450542334467, "grad_norm": 0.9673040290306656, "learning_rate": 2.2966488917524877e-06, "loss": 0.4497, "step": 13938 }, { "epoch": 0.7521990178619611, "grad_norm": 1.046302118952138, "learning_rate": 2.296113138590268e-06, "loss": 0.4914, "step": 13939 }, { "epoch": 0.7522529814904754, "grad_norm": 0.8064320777360616, "learning_rate": 2.2955774775109955e-06, "loss": 0.3208, "step": 13940 }, { "epoch": 0.7523069451189898, "grad_norm": 1.049437797134903, "learning_rate": 2.2950419085300627e-06, "loss": 0.4554, "step": 13941 }, { "epoch": 0.7523609087475042, "grad_norm": 0.963044979443334, "learning_rate": 2.2945064316628634e-06, "loss": 0.4605, "step": 13942 }, { "epoch": 0.7524148723760186, "grad_norm": 0.9277369030402229, "learning_rate": 2.2939710469247886e-06, "loss": 0.3891, "step": 13943 }, { "epoch": 0.752468836004533, "grad_norm": 1.038255899529012, "learning_rate": 2.2934357543312253e-06, "loss": 0.4647, "step": 13944 }, { "epoch": 0.7525227996330474, "grad_norm": 0.8308779826793878, "learning_rate": 2.292900553897558e-06, "loss": 0.2524, "step": 13945 }, { "epoch": 0.7525767632615618, "grad_norm": 1.1901001166279568, "learning_rate": 2.2923654456391696e-06, "loss": 0.5329, "step": 13946 }, { "epoch": 0.752630726890076, "grad_norm": 0.9153584119867314, "learning_rate": 2.291830429571441e-06, "loss": 0.391, "step": 13947 }, { "epoch": 0.7526846905185904, "grad_norm": 1.0398886500903242, "learning_rate": 2.291295505709746e-06, "loss": 0.3989, "step": 13948 }, { "epoch": 0.7527386541471048, "grad_norm": 0.8813885605314811, "learning_rate": 2.29076067406946e-06, "loss": 0.3499, "step": 13949 }, { "epoch": 0.7527926177756192, "grad_norm": 1.0567013872588795, "learning_rate": 2.2902259346659555e-06, "loss": 0.3763, "step": 13950 }, { "epoch": 0.7528465814041336, "grad_norm": 1.0239319776396805, "learning_rate": 2.2896912875145997e-06, "loss": 0.4325, "step": 13951 }, { "epoch": 0.752900545032648, "grad_norm": 1.043169568219525, "learning_rate": 2.2891567326307615e-06, "loss": 0.6055, "step": 13952 }, { "epoch": 0.7529545086611624, "grad_norm": 1.315158268320146, "learning_rate": 2.288622270029802e-06, "loss": 0.6208, "step": 13953 }, { "epoch": 0.7530084722896767, "grad_norm": 0.8877865196588159, "learning_rate": 2.2880878997270855e-06, "loss": 0.356, "step": 13954 }, { "epoch": 0.7530624359181911, "grad_norm": 0.9634296170178792, "learning_rate": 2.2875536217379666e-06, "loss": 0.4345, "step": 13955 }, { "epoch": 0.7531163995467055, "grad_norm": 0.8824796326431276, "learning_rate": 2.2870194360778026e-06, "loss": 0.3775, "step": 13956 }, { "epoch": 0.7531703631752199, "grad_norm": 1.1490704949596644, "learning_rate": 2.2864853427619464e-06, "loss": 0.4787, "step": 13957 }, { "epoch": 0.7532243268037343, "grad_norm": 1.0472843810317451, "learning_rate": 2.285951341805748e-06, "loss": 0.4538, "step": 13958 }, { "epoch": 0.7532782904322487, "grad_norm": 1.1603455904862483, "learning_rate": 2.2854174332245563e-06, "loss": 0.5279, "step": 13959 }, { "epoch": 0.7533322540607631, "grad_norm": 0.9812901696258612, "learning_rate": 2.2848836170337154e-06, "loss": 0.5151, "step": 13960 }, { "epoch": 0.7533862176892774, "grad_norm": 1.0601332914699295, "learning_rate": 2.2843498932485677e-06, "loss": 0.473, "step": 13961 }, { "epoch": 0.7534401813177918, "grad_norm": 0.8731307163576292, "learning_rate": 2.2838162618844532e-06, "loss": 0.3811, "step": 13962 }, { "epoch": 0.7534941449463062, "grad_norm": 0.8598279330023306, "learning_rate": 2.2832827229567098e-06, "loss": 0.322, "step": 13963 }, { "epoch": 0.7535481085748206, "grad_norm": 0.9160434513497467, "learning_rate": 2.2827492764806707e-06, "loss": 0.417, "step": 13964 }, { "epoch": 0.753602072203335, "grad_norm": 1.2753003756815688, "learning_rate": 2.282215922471668e-06, "loss": 0.4938, "step": 13965 }, { "epoch": 0.7536560358318494, "grad_norm": 1.0413033390431994, "learning_rate": 2.281682660945031e-06, "loss": 0.4525, "step": 13966 }, { "epoch": 0.7537099994603638, "grad_norm": 1.055866639858768, "learning_rate": 2.2811494919160883e-06, "loss": 0.4635, "step": 13967 }, { "epoch": 0.753763963088878, "grad_norm": 1.0670306556065037, "learning_rate": 2.2806164154001597e-06, "loss": 0.4695, "step": 13968 }, { "epoch": 0.7538179267173925, "grad_norm": 0.8832360134891668, "learning_rate": 2.2800834314125687e-06, "loss": 0.404, "step": 13969 }, { "epoch": 0.7538718903459068, "grad_norm": 1.0154232844028097, "learning_rate": 2.279550539968633e-06, "loss": 0.5647, "step": 13970 }, { "epoch": 0.7539258539744212, "grad_norm": 1.06918318546569, "learning_rate": 2.2790177410836686e-06, "loss": 0.5077, "step": 13971 }, { "epoch": 0.7539798176029356, "grad_norm": 1.0394868545589164, "learning_rate": 2.278485034772989e-06, "loss": 0.4387, "step": 13972 }, { "epoch": 0.75403378123145, "grad_norm": 1.0190498775526138, "learning_rate": 2.2779524210519046e-06, "loss": 0.4276, "step": 13973 }, { "epoch": 0.7540877448599644, "grad_norm": 1.0477537681727538, "learning_rate": 2.277419899935724e-06, "loss": 0.4977, "step": 13974 }, { "epoch": 0.7541417084884787, "grad_norm": 1.038687709598905, "learning_rate": 2.276887471439752e-06, "loss": 0.4205, "step": 13975 }, { "epoch": 0.7541956721169931, "grad_norm": 0.8550754999882894, "learning_rate": 2.27635513557929e-06, "loss": 0.3732, "step": 13976 }, { "epoch": 0.7542496357455075, "grad_norm": 1.002579757208517, "learning_rate": 2.2758228923696383e-06, "loss": 0.3779, "step": 13977 }, { "epoch": 0.7543035993740219, "grad_norm": 0.8757684609903668, "learning_rate": 2.2752907418260945e-06, "loss": 0.3862, "step": 13978 }, { "epoch": 0.7543575630025363, "grad_norm": 1.039766692050184, "learning_rate": 2.2747586839639535e-06, "loss": 0.455, "step": 13979 }, { "epoch": 0.7544115266310507, "grad_norm": 1.269311961145948, "learning_rate": 2.2742267187985063e-06, "loss": 0.4942, "step": 13980 }, { "epoch": 0.7544654902595651, "grad_norm": 0.9116306124318664, "learning_rate": 2.2736948463450428e-06, "loss": 0.4422, "step": 13981 }, { "epoch": 0.7545194538880794, "grad_norm": 1.4209363205709982, "learning_rate": 2.2731630666188513e-06, "loss": 0.5185, "step": 13982 }, { "epoch": 0.7545734175165938, "grad_norm": 1.091474460441169, "learning_rate": 2.2726313796352113e-06, "loss": 0.3717, "step": 13983 }, { "epoch": 0.7546273811451082, "grad_norm": 0.9502580375185803, "learning_rate": 2.2720997854094072e-06, "loss": 0.5635, "step": 13984 }, { "epoch": 0.7546813447736226, "grad_norm": 1.097727131566793, "learning_rate": 2.2715682839567167e-06, "loss": 0.5103, "step": 13985 }, { "epoch": 0.754735308402137, "grad_norm": 0.87476582051389, "learning_rate": 2.2710368752924157e-06, "loss": 0.3137, "step": 13986 }, { "epoch": 0.7547892720306514, "grad_norm": 1.0824908030959848, "learning_rate": 2.2705055594317776e-06, "loss": 0.507, "step": 13987 }, { "epoch": 0.7548432356591657, "grad_norm": 1.2412557048661557, "learning_rate": 2.2699743363900743e-06, "loss": 0.62, "step": 13988 }, { "epoch": 0.7548971992876801, "grad_norm": 0.7619064252134992, "learning_rate": 2.269443206182571e-06, "loss": 0.2743, "step": 13989 }, { "epoch": 0.7549511629161945, "grad_norm": 1.0291794920616641, "learning_rate": 2.2689121688245337e-06, "loss": 0.4238, "step": 13990 }, { "epoch": 0.7550051265447089, "grad_norm": 0.7634420652607231, "learning_rate": 2.2683812243312257e-06, "loss": 0.2759, "step": 13991 }, { "epoch": 0.7550590901732233, "grad_norm": 0.888660662533418, "learning_rate": 2.267850372717907e-06, "loss": 0.3637, "step": 13992 }, { "epoch": 0.7551130538017377, "grad_norm": 1.0938418505343273, "learning_rate": 2.267319613999834e-06, "loss": 0.5777, "step": 13993 }, { "epoch": 0.755167017430252, "grad_norm": 1.281393687038122, "learning_rate": 2.266788948192262e-06, "loss": 0.5981, "step": 13994 }, { "epoch": 0.7552209810587663, "grad_norm": 0.9156048909418717, "learning_rate": 2.2662583753104436e-06, "loss": 0.397, "step": 13995 }, { "epoch": 0.7552749446872807, "grad_norm": 1.3000762897722957, "learning_rate": 2.265727895369626e-06, "loss": 0.4909, "step": 13996 }, { "epoch": 0.7553289083157951, "grad_norm": 0.8873700642481425, "learning_rate": 2.265197508385056e-06, "loss": 0.4425, "step": 13997 }, { "epoch": 0.7553828719443095, "grad_norm": 1.2342881044884315, "learning_rate": 2.2646672143719787e-06, "loss": 0.5763, "step": 13998 }, { "epoch": 0.7554368355728239, "grad_norm": 1.404536929865371, "learning_rate": 2.2641370133456347e-06, "loss": 0.6666, "step": 13999 }, { "epoch": 0.7554907992013383, "grad_norm": 1.068235239302593, "learning_rate": 2.2636069053212624e-06, "loss": 0.4564, "step": 14000 }, { "epoch": 0.7554907992013383, "eval_loss": 0.533542811870575, "eval_runtime": 164.5859, "eval_samples_per_second": 20.895, "eval_steps_per_second": 0.875, "step": 14000 }, { "epoch": 0.7555447628298527, "grad_norm": 0.9354936789278996, "learning_rate": 2.263076890314098e-06, "loss": 0.4387, "step": 14001 }, { "epoch": 0.755598726458367, "grad_norm": 0.9931809275498044, "learning_rate": 2.262546968339375e-06, "loss": 0.4557, "step": 14002 }, { "epoch": 0.7556526900868814, "grad_norm": 1.0022497036178668, "learning_rate": 2.262017139412323e-06, "loss": 0.4709, "step": 14003 }, { "epoch": 0.7557066537153958, "grad_norm": 1.0504671486400556, "learning_rate": 2.2614874035481697e-06, "loss": 0.5235, "step": 14004 }, { "epoch": 0.7557606173439102, "grad_norm": 0.903311764010699, "learning_rate": 2.2609577607621405e-06, "loss": 0.3745, "step": 14005 }, { "epoch": 0.7558145809724246, "grad_norm": 1.0634134743913592, "learning_rate": 2.2604282110694588e-06, "loss": 0.5042, "step": 14006 }, { "epoch": 0.755868544600939, "grad_norm": 1.09724279283852, "learning_rate": 2.2598987544853428e-06, "loss": 0.6339, "step": 14007 }, { "epoch": 0.7559225082294534, "grad_norm": 0.9340576035854213, "learning_rate": 2.2593693910250107e-06, "loss": 0.414, "step": 14008 }, { "epoch": 0.7559764718579677, "grad_norm": 1.1319018665907776, "learning_rate": 2.258840120703677e-06, "loss": 0.5375, "step": 14009 }, { "epoch": 0.7560304354864821, "grad_norm": 0.9371574397537727, "learning_rate": 2.2583109435365535e-06, "loss": 0.4329, "step": 14010 }, { "epoch": 0.7560843991149965, "grad_norm": 0.8465797703266068, "learning_rate": 2.257781859538849e-06, "loss": 0.3663, "step": 14011 }, { "epoch": 0.7561383627435109, "grad_norm": 0.9799546732909562, "learning_rate": 2.2572528687257697e-06, "loss": 0.3774, "step": 14012 }, { "epoch": 0.7561923263720253, "grad_norm": 1.0731783137962008, "learning_rate": 2.256723971112519e-06, "loss": 0.4569, "step": 14013 }, { "epoch": 0.7562462900005397, "grad_norm": 0.8695403505905999, "learning_rate": 2.256195166714299e-06, "loss": 0.3188, "step": 14014 }, { "epoch": 0.7563002536290541, "grad_norm": 0.943825112091173, "learning_rate": 2.255666455546307e-06, "loss": 0.4951, "step": 14015 }, { "epoch": 0.7563542172575684, "grad_norm": 1.2331045022486962, "learning_rate": 2.2551378376237395e-06, "loss": 0.6782, "step": 14016 }, { "epoch": 0.7564081808860827, "grad_norm": 1.4798032267275172, "learning_rate": 2.2546093129617893e-06, "loss": 0.5434, "step": 14017 }, { "epoch": 0.7564621445145971, "grad_norm": 0.9938028535199084, "learning_rate": 2.2540808815756467e-06, "loss": 0.3908, "step": 14018 }, { "epoch": 0.7565161081431115, "grad_norm": 1.0779670982988565, "learning_rate": 2.2535525434804987e-06, "loss": 0.4575, "step": 14019 }, { "epoch": 0.7565700717716259, "grad_norm": 0.9610341649051867, "learning_rate": 2.2530242986915314e-06, "loss": 0.4521, "step": 14020 }, { "epoch": 0.7566240354001403, "grad_norm": 1.0548144970262483, "learning_rate": 2.2524961472239264e-06, "loss": 0.5371, "step": 14021 }, { "epoch": 0.7566779990286547, "grad_norm": 1.0359540052835028, "learning_rate": 2.251968089092864e-06, "loss": 0.4842, "step": 14022 }, { "epoch": 0.756731962657169, "grad_norm": 1.1252138429991847, "learning_rate": 2.2514401243135216e-06, "loss": 0.5575, "step": 14023 }, { "epoch": 0.7567859262856834, "grad_norm": 1.0030169820521018, "learning_rate": 2.250912252901071e-06, "loss": 0.4537, "step": 14024 }, { "epoch": 0.7568398899141978, "grad_norm": 1.1314355028232312, "learning_rate": 2.2503844748706854e-06, "loss": 0.4044, "step": 14025 }, { "epoch": 0.7568938535427122, "grad_norm": 1.1767965295077552, "learning_rate": 2.2498567902375335e-06, "loss": 0.4921, "step": 14026 }, { "epoch": 0.7569478171712266, "grad_norm": 0.8726495729706913, "learning_rate": 2.2493291990167814e-06, "loss": 0.355, "step": 14027 }, { "epoch": 0.757001780799741, "grad_norm": 1.051174877906105, "learning_rate": 2.2488017012235922e-06, "loss": 0.5024, "step": 14028 }, { "epoch": 0.7570557444282554, "grad_norm": 1.2255619159247264, "learning_rate": 2.2482742968731276e-06, "loss": 0.6038, "step": 14029 }, { "epoch": 0.7571097080567697, "grad_norm": 1.0387737497554268, "learning_rate": 2.247746985980547e-06, "loss": 0.5265, "step": 14030 }, { "epoch": 0.7571636716852841, "grad_norm": 1.1051019382834246, "learning_rate": 2.2472197685610026e-06, "loss": 0.5472, "step": 14031 }, { "epoch": 0.7572176353137985, "grad_norm": 1.1298131798795752, "learning_rate": 2.2466926446296487e-06, "loss": 0.578, "step": 14032 }, { "epoch": 0.7572715989423129, "grad_norm": 0.8247354359424252, "learning_rate": 2.246165614201636e-06, "loss": 0.3487, "step": 14033 }, { "epoch": 0.7573255625708273, "grad_norm": 0.9147531285338606, "learning_rate": 2.245638677292111e-06, "loss": 0.4712, "step": 14034 }, { "epoch": 0.7573795261993417, "grad_norm": 0.9962392298538044, "learning_rate": 2.245111833916218e-06, "loss": 0.482, "step": 14035 }, { "epoch": 0.7574334898278561, "grad_norm": 1.2219595437358248, "learning_rate": 2.2445850840891e-06, "loss": 0.4479, "step": 14036 }, { "epoch": 0.7574874534563704, "grad_norm": 0.9829072754572074, "learning_rate": 2.244058427825898e-06, "loss": 0.5882, "step": 14037 }, { "epoch": 0.7575414170848848, "grad_norm": 1.0715133363125136, "learning_rate": 2.2435318651417443e-06, "loss": 0.3446, "step": 14038 }, { "epoch": 0.7575953807133992, "grad_norm": 1.0532794651427677, "learning_rate": 2.243005396051776e-06, "loss": 0.3683, "step": 14039 }, { "epoch": 0.7576493443419136, "grad_norm": 1.2657654250446775, "learning_rate": 2.2424790205711232e-06, "loss": 0.6074, "step": 14040 }, { "epoch": 0.757703307970428, "grad_norm": 0.8303571842159004, "learning_rate": 2.2419527387149155e-06, "loss": 0.3366, "step": 14041 }, { "epoch": 0.7577572715989424, "grad_norm": 1.1109347240107132, "learning_rate": 2.2414265504982775e-06, "loss": 0.4992, "step": 14042 }, { "epoch": 0.7578112352274567, "grad_norm": 0.8947093340476518, "learning_rate": 2.2409004559363336e-06, "loss": 0.3795, "step": 14043 }, { "epoch": 0.757865198855971, "grad_norm": 1.2874307539552958, "learning_rate": 2.240374455044204e-06, "loss": 0.5297, "step": 14044 }, { "epoch": 0.7579191624844854, "grad_norm": 1.1151955526239983, "learning_rate": 2.239848547837005e-06, "loss": 0.4746, "step": 14045 }, { "epoch": 0.7579731261129998, "grad_norm": 0.9757719585476565, "learning_rate": 2.239322734329854e-06, "loss": 0.4732, "step": 14046 }, { "epoch": 0.7580270897415142, "grad_norm": 0.960906462502796, "learning_rate": 2.2387970145378613e-06, "loss": 0.4091, "step": 14047 }, { "epoch": 0.7580810533700286, "grad_norm": 0.8644604168307908, "learning_rate": 2.238271388476138e-06, "loss": 0.3521, "step": 14048 }, { "epoch": 0.758135016998543, "grad_norm": 0.8754311658601478, "learning_rate": 2.2377458561597903e-06, "loss": 0.3968, "step": 14049 }, { "epoch": 0.7581889806270574, "grad_norm": 1.022753319384135, "learning_rate": 2.2372204176039237e-06, "loss": 0.4365, "step": 14050 }, { "epoch": 0.7582429442555717, "grad_norm": 0.7037696112676267, "learning_rate": 2.2366950728236396e-06, "loss": 0.2856, "step": 14051 }, { "epoch": 0.7582969078840861, "grad_norm": 1.1603634650448553, "learning_rate": 2.2361698218340356e-06, "loss": 0.6064, "step": 14052 }, { "epoch": 0.7583508715126005, "grad_norm": 0.8946276753020095, "learning_rate": 2.235644664650209e-06, "loss": 0.4482, "step": 14053 }, { "epoch": 0.7584048351411149, "grad_norm": 1.0900952725093074, "learning_rate": 2.2351196012872524e-06, "loss": 0.4911, "step": 14054 }, { "epoch": 0.7584587987696293, "grad_norm": 1.0434579186318522, "learning_rate": 2.2345946317602577e-06, "loss": 0.5979, "step": 14055 }, { "epoch": 0.7585127623981437, "grad_norm": 1.2318179300551424, "learning_rate": 2.2340697560843127e-06, "loss": 0.5244, "step": 14056 }, { "epoch": 0.758566726026658, "grad_norm": 0.9363785292792274, "learning_rate": 2.233544974274504e-06, "loss": 0.432, "step": 14057 }, { "epoch": 0.7586206896551724, "grad_norm": 1.0351200631672817, "learning_rate": 2.2330202863459123e-06, "loss": 0.4116, "step": 14058 }, { "epoch": 0.7586746532836868, "grad_norm": 1.0772944519371075, "learning_rate": 2.232495692313619e-06, "loss": 0.4812, "step": 14059 }, { "epoch": 0.7587286169122012, "grad_norm": 0.9345690297230624, "learning_rate": 2.2319711921927005e-06, "loss": 0.3545, "step": 14060 }, { "epoch": 0.7587825805407156, "grad_norm": 1.0437092098766032, "learning_rate": 2.231446785998232e-06, "loss": 0.6237, "step": 14061 }, { "epoch": 0.75883654416923, "grad_norm": 1.058723526718693, "learning_rate": 2.230922473745286e-06, "loss": 0.4297, "step": 14062 }, { "epoch": 0.7588905077977444, "grad_norm": 1.0184821947065592, "learning_rate": 2.230398255448931e-06, "loss": 0.5125, "step": 14063 }, { "epoch": 0.7589444714262586, "grad_norm": 1.1870944654756497, "learning_rate": 2.2298741311242334e-06, "loss": 0.5936, "step": 14064 }, { "epoch": 0.758998435054773, "grad_norm": 1.0288548404742364, "learning_rate": 2.2293501007862584e-06, "loss": 0.459, "step": 14065 }, { "epoch": 0.7590523986832874, "grad_norm": 1.0000619082019335, "learning_rate": 2.2288261644500652e-06, "loss": 0.4297, "step": 14066 }, { "epoch": 0.7591063623118018, "grad_norm": 0.8352114368200882, "learning_rate": 2.2283023221307138e-06, "loss": 0.3187, "step": 14067 }, { "epoch": 0.7591603259403162, "grad_norm": 1.0119472095432958, "learning_rate": 2.2277785738432596e-06, "loss": 0.4523, "step": 14068 }, { "epoch": 0.7592142895688306, "grad_norm": 0.9683223852011174, "learning_rate": 2.2272549196027553e-06, "loss": 0.4315, "step": 14069 }, { "epoch": 0.759268253197345, "grad_norm": 0.9026347161140674, "learning_rate": 2.226731359424252e-06, "loss": 0.3789, "step": 14070 }, { "epoch": 0.7593222168258593, "grad_norm": 1.008241487806419, "learning_rate": 2.2262078933227976e-06, "loss": 0.4114, "step": 14071 }, { "epoch": 0.7593761804543737, "grad_norm": 0.9643104761945684, "learning_rate": 2.225684521313435e-06, "loss": 0.3934, "step": 14072 }, { "epoch": 0.7594301440828881, "grad_norm": 0.9044016698067384, "learning_rate": 2.225161243411208e-06, "loss": 0.4271, "step": 14073 }, { "epoch": 0.7594841077114025, "grad_norm": 1.1884586731218172, "learning_rate": 2.2246380596311555e-06, "loss": 0.598, "step": 14074 }, { "epoch": 0.7595380713399169, "grad_norm": 0.9864132670642436, "learning_rate": 2.224114969988315e-06, "loss": 0.463, "step": 14075 }, { "epoch": 0.7595920349684313, "grad_norm": 1.3309934198088655, "learning_rate": 2.22359197449772e-06, "loss": 0.6471, "step": 14076 }, { "epoch": 0.7596459985969457, "grad_norm": 0.7669020242829246, "learning_rate": 2.2230690731744027e-06, "loss": 0.3698, "step": 14077 }, { "epoch": 0.75969996222546, "grad_norm": 0.9011193220538629, "learning_rate": 2.222546266033392e-06, "loss": 0.4986, "step": 14078 }, { "epoch": 0.7597539258539744, "grad_norm": 0.8253816512140858, "learning_rate": 2.222023553089712e-06, "loss": 0.3449, "step": 14079 }, { "epoch": 0.7598078894824888, "grad_norm": 0.9593549382720731, "learning_rate": 2.2215009343583877e-06, "loss": 0.6023, "step": 14080 }, { "epoch": 0.7598618531110032, "grad_norm": 0.9206240189425108, "learning_rate": 2.220978409854439e-06, "loss": 0.4004, "step": 14081 }, { "epoch": 0.7599158167395176, "grad_norm": 1.034067088604144, "learning_rate": 2.220455979592884e-06, "loss": 0.5054, "step": 14082 }, { "epoch": 0.759969780368032, "grad_norm": 1.0525801116777458, "learning_rate": 2.219933643588738e-06, "loss": 0.4722, "step": 14083 }, { "epoch": 0.7600237439965464, "grad_norm": 1.0905118359563968, "learning_rate": 2.2194114018570136e-06, "loss": 0.4881, "step": 14084 }, { "epoch": 0.7600777076250607, "grad_norm": 0.9572745724599465, "learning_rate": 2.2188892544127213e-06, "loss": 0.3695, "step": 14085 }, { "epoch": 0.7601316712535751, "grad_norm": 1.1559502840868627, "learning_rate": 2.2183672012708656e-06, "loss": 0.4817, "step": 14086 }, { "epoch": 0.7601856348820895, "grad_norm": 1.0847997119539958, "learning_rate": 2.2178452424464526e-06, "loss": 0.5115, "step": 14087 }, { "epoch": 0.7602395985106039, "grad_norm": 0.9553736328831977, "learning_rate": 2.2173233779544832e-06, "loss": 0.5437, "step": 14088 }, { "epoch": 0.7602935621391183, "grad_norm": 1.2411575101739827, "learning_rate": 2.2168016078099576e-06, "loss": 0.6462, "step": 14089 }, { "epoch": 0.7603475257676326, "grad_norm": 1.1008434186157212, "learning_rate": 2.2162799320278707e-06, "loss": 0.4988, "step": 14090 }, { "epoch": 0.760401489396147, "grad_norm": 0.9544268175890872, "learning_rate": 2.2157583506232167e-06, "loss": 0.3743, "step": 14091 }, { "epoch": 0.7604554530246613, "grad_norm": 1.0623833826343994, "learning_rate": 2.2152368636109877e-06, "loss": 0.5235, "step": 14092 }, { "epoch": 0.7605094166531757, "grad_norm": 1.095349650283702, "learning_rate": 2.2147154710061686e-06, "loss": 0.4706, "step": 14093 }, { "epoch": 0.7605633802816901, "grad_norm": 1.079017033617189, "learning_rate": 2.2141941728237467e-06, "loss": 0.4716, "step": 14094 }, { "epoch": 0.7606173439102045, "grad_norm": 1.0994110985912298, "learning_rate": 2.213672969078704e-06, "loss": 0.4631, "step": 14095 }, { "epoch": 0.7606713075387189, "grad_norm": 0.9312769563651165, "learning_rate": 2.2131518597860214e-06, "loss": 0.3883, "step": 14096 }, { "epoch": 0.7607252711672333, "grad_norm": 1.050316798886754, "learning_rate": 2.212630844960675e-06, "loss": 0.4083, "step": 14097 }, { "epoch": 0.7607792347957477, "grad_norm": 1.1134487024014956, "learning_rate": 2.21210992461764e-06, "loss": 0.4578, "step": 14098 }, { "epoch": 0.760833198424262, "grad_norm": 1.0960197421337934, "learning_rate": 2.211589098771889e-06, "loss": 0.5087, "step": 14099 }, { "epoch": 0.7608871620527764, "grad_norm": 0.8576646686766642, "learning_rate": 2.2110683674383887e-06, "loss": 0.3286, "step": 14100 }, { "epoch": 0.7609411256812908, "grad_norm": 0.8975381305868854, "learning_rate": 2.2105477306321068e-06, "loss": 0.4361, "step": 14101 }, { "epoch": 0.7609950893098052, "grad_norm": 0.9608841246615539, "learning_rate": 2.210027188368006e-06, "loss": 0.3888, "step": 14102 }, { "epoch": 0.7610490529383196, "grad_norm": 1.0148271247897842, "learning_rate": 2.2095067406610496e-06, "loss": 0.5752, "step": 14103 }, { "epoch": 0.761103016566834, "grad_norm": 0.7432483846045046, "learning_rate": 2.2089863875261936e-06, "loss": 0.266, "step": 14104 }, { "epoch": 0.7611569801953484, "grad_norm": 1.3547244580104505, "learning_rate": 2.2084661289783943e-06, "loss": 0.4121, "step": 14105 }, { "epoch": 0.7612109438238627, "grad_norm": 0.9899404515292248, "learning_rate": 2.2079459650326055e-06, "loss": 0.5122, "step": 14106 }, { "epoch": 0.7612649074523771, "grad_norm": 1.0719810476322928, "learning_rate": 2.207425895703774e-06, "loss": 0.5035, "step": 14107 }, { "epoch": 0.7613188710808915, "grad_norm": 1.2057477895575381, "learning_rate": 2.2069059210068506e-06, "loss": 0.5813, "step": 14108 }, { "epoch": 0.7613728347094059, "grad_norm": 1.3077407342789313, "learning_rate": 2.206386040956778e-06, "loss": 0.5667, "step": 14109 }, { "epoch": 0.7614267983379203, "grad_norm": 1.0891011486178523, "learning_rate": 2.2058662555684976e-06, "loss": 0.4929, "step": 14110 }, { "epoch": 0.7614807619664347, "grad_norm": 0.876300571343487, "learning_rate": 2.20534656485695e-06, "loss": 0.4158, "step": 14111 }, { "epoch": 0.7615347255949491, "grad_norm": 1.2609000277855016, "learning_rate": 2.2048269688370715e-06, "loss": 0.5348, "step": 14112 }, { "epoch": 0.7615886892234633, "grad_norm": 1.255577216504524, "learning_rate": 2.2043074675237953e-06, "loss": 0.5882, "step": 14113 }, { "epoch": 0.7616426528519777, "grad_norm": 0.9810583477098861, "learning_rate": 2.203788060932053e-06, "loss": 0.4572, "step": 14114 }, { "epoch": 0.7616966164804921, "grad_norm": 1.015088715850209, "learning_rate": 2.2032687490767714e-06, "loss": 0.5748, "step": 14115 }, { "epoch": 0.7617505801090065, "grad_norm": 1.1294025485613624, "learning_rate": 2.2027495319728774e-06, "loss": 0.4734, "step": 14116 }, { "epoch": 0.7618045437375209, "grad_norm": 0.9008141026401497, "learning_rate": 2.202230409635295e-06, "loss": 0.398, "step": 14117 }, { "epoch": 0.7618585073660353, "grad_norm": 0.9815570918188072, "learning_rate": 2.2017113820789414e-06, "loss": 0.5603, "step": 14118 }, { "epoch": 0.7619124709945497, "grad_norm": 1.0641470744699095, "learning_rate": 2.2011924493187353e-06, "loss": 0.5121, "step": 14119 }, { "epoch": 0.761966434623064, "grad_norm": 0.8850332303789288, "learning_rate": 2.2006736113695914e-06, "loss": 0.4148, "step": 14120 }, { "epoch": 0.7620203982515784, "grad_norm": 1.0720982611199925, "learning_rate": 2.2001548682464213e-06, "loss": 0.5362, "step": 14121 }, { "epoch": 0.7620743618800928, "grad_norm": 1.1485372621498815, "learning_rate": 2.199636219964135e-06, "loss": 0.5404, "step": 14122 }, { "epoch": 0.7621283255086072, "grad_norm": 0.9201928139685264, "learning_rate": 2.199117666537639e-06, "loss": 0.3676, "step": 14123 }, { "epoch": 0.7621822891371216, "grad_norm": 0.7476943928426587, "learning_rate": 2.198599207981836e-06, "loss": 0.3376, "step": 14124 }, { "epoch": 0.762236252765636, "grad_norm": 0.9774180712911992, "learning_rate": 2.1980808443116274e-06, "loss": 0.452, "step": 14125 }, { "epoch": 0.7622902163941503, "grad_norm": 1.0079382991202668, "learning_rate": 2.1975625755419133e-06, "loss": 0.4358, "step": 14126 }, { "epoch": 0.7623441800226647, "grad_norm": 0.9349152087273793, "learning_rate": 2.1970444016875857e-06, "loss": 0.4344, "step": 14127 }, { "epoch": 0.7623981436511791, "grad_norm": 1.169278198242889, "learning_rate": 2.19652632276354e-06, "loss": 0.6267, "step": 14128 }, { "epoch": 0.7624521072796935, "grad_norm": 1.1551902957713174, "learning_rate": 2.196008338784666e-06, "loss": 0.6579, "step": 14129 }, { "epoch": 0.7625060709082079, "grad_norm": 1.2287926249260928, "learning_rate": 2.19549044976585e-06, "loss": 0.6022, "step": 14130 }, { "epoch": 0.7625600345367223, "grad_norm": 1.1457493789188624, "learning_rate": 2.194972655721978e-06, "loss": 0.6283, "step": 14131 }, { "epoch": 0.7626139981652367, "grad_norm": 1.226954303229493, "learning_rate": 2.194454956667931e-06, "loss": 0.5536, "step": 14132 }, { "epoch": 0.762667961793751, "grad_norm": 0.9980505997504556, "learning_rate": 2.19393735261859e-06, "loss": 0.3807, "step": 14133 }, { "epoch": 0.7627219254222654, "grad_norm": 1.1613050201561097, "learning_rate": 2.1934198435888286e-06, "loss": 0.635, "step": 14134 }, { "epoch": 0.7627758890507798, "grad_norm": 0.719146258817388, "learning_rate": 2.1929024295935224e-06, "loss": 0.3052, "step": 14135 }, { "epoch": 0.7628298526792942, "grad_norm": 1.1269172232683708, "learning_rate": 2.192385110647542e-06, "loss": 0.5619, "step": 14136 }, { "epoch": 0.7628838163078085, "grad_norm": 1.358585481082049, "learning_rate": 2.191867886765755e-06, "loss": 0.6047, "step": 14137 }, { "epoch": 0.762937779936323, "grad_norm": 0.9031446240978861, "learning_rate": 2.1913507579630285e-06, "loss": 0.3885, "step": 14138 }, { "epoch": 0.7629917435648373, "grad_norm": 1.35876267265125, "learning_rate": 2.1908337242542227e-06, "loss": 0.6019, "step": 14139 }, { "epoch": 0.7630457071933516, "grad_norm": 1.139517603283508, "learning_rate": 2.190316785654202e-06, "loss": 0.5293, "step": 14140 }, { "epoch": 0.763099670821866, "grad_norm": 0.8268150960788346, "learning_rate": 2.1897999421778195e-06, "loss": 0.3497, "step": 14141 }, { "epoch": 0.7631536344503804, "grad_norm": 1.1243207609895531, "learning_rate": 2.1892831938399313e-06, "loss": 0.5638, "step": 14142 }, { "epoch": 0.7632075980788948, "grad_norm": 1.0337416102456323, "learning_rate": 2.1887665406553895e-06, "loss": 0.5032, "step": 14143 }, { "epoch": 0.7632615617074092, "grad_norm": 1.1711393254163078, "learning_rate": 2.188249982639043e-06, "loss": 0.5308, "step": 14144 }, { "epoch": 0.7633155253359236, "grad_norm": 1.0842393658176575, "learning_rate": 2.187733519805738e-06, "loss": 0.4788, "step": 14145 }, { "epoch": 0.763369488964438, "grad_norm": 1.1263597740909923, "learning_rate": 2.187217152170319e-06, "loss": 0.4335, "step": 14146 }, { "epoch": 0.7634234525929523, "grad_norm": 1.1549469661226268, "learning_rate": 2.1867008797476265e-06, "loss": 0.4775, "step": 14147 }, { "epoch": 0.7634774162214667, "grad_norm": 1.3166143354409474, "learning_rate": 2.1861847025524985e-06, "loss": 0.6455, "step": 14148 }, { "epoch": 0.7635313798499811, "grad_norm": 1.0538583863379278, "learning_rate": 2.1856686205997702e-06, "loss": 0.4138, "step": 14149 }, { "epoch": 0.7635853434784955, "grad_norm": 1.0515666686156713, "learning_rate": 2.1851526339042745e-06, "loss": 0.5078, "step": 14150 }, { "epoch": 0.7636393071070099, "grad_norm": 0.9308575701809098, "learning_rate": 2.1846367424808414e-06, "loss": 0.4475, "step": 14151 }, { "epoch": 0.7636932707355243, "grad_norm": 0.8918375461148187, "learning_rate": 2.1841209463442985e-06, "loss": 0.2741, "step": 14152 }, { "epoch": 0.7637472343640387, "grad_norm": 1.3268228984779902, "learning_rate": 2.1836052455094696e-06, "loss": 0.6183, "step": 14153 }, { "epoch": 0.763801197992553, "grad_norm": 1.0515211657722692, "learning_rate": 2.183089639991178e-06, "loss": 0.426, "step": 14154 }, { "epoch": 0.7638551616210674, "grad_norm": 0.88292273492877, "learning_rate": 2.1825741298042414e-06, "loss": 0.4358, "step": 14155 }, { "epoch": 0.7639091252495818, "grad_norm": 1.0911168238694267, "learning_rate": 2.182058714963476e-06, "loss": 0.4283, "step": 14156 }, { "epoch": 0.7639630888780962, "grad_norm": 0.8911752224151559, "learning_rate": 2.181543395483695e-06, "loss": 0.399, "step": 14157 }, { "epoch": 0.7640170525066106, "grad_norm": 0.7167491549474045, "learning_rate": 2.1810281713797107e-06, "loss": 0.2991, "step": 14158 }, { "epoch": 0.764071016135125, "grad_norm": 1.0635658019508292, "learning_rate": 2.1805130426663297e-06, "loss": 0.4326, "step": 14159 }, { "epoch": 0.7641249797636394, "grad_norm": 1.0288199523801118, "learning_rate": 2.1799980093583582e-06, "loss": 0.4989, "step": 14160 }, { "epoch": 0.7641789433921536, "grad_norm": 0.9558932197269022, "learning_rate": 2.1794830714706e-06, "loss": 0.4825, "step": 14161 }, { "epoch": 0.764232907020668, "grad_norm": 0.9454661459190418, "learning_rate": 2.178968229017852e-06, "loss": 0.426, "step": 14162 }, { "epoch": 0.7642868706491824, "grad_norm": 1.2708702160228194, "learning_rate": 2.178453482014913e-06, "loss": 0.5117, "step": 14163 }, { "epoch": 0.7643408342776968, "grad_norm": 0.9654540568971489, "learning_rate": 2.177938830476577e-06, "loss": 0.4161, "step": 14164 }, { "epoch": 0.7643947979062112, "grad_norm": 1.1887865943075255, "learning_rate": 2.1774242744176354e-06, "loss": 0.527, "step": 14165 }, { "epoch": 0.7644487615347256, "grad_norm": 0.8727706057012192, "learning_rate": 2.1769098138528772e-06, "loss": 0.3616, "step": 14166 }, { "epoch": 0.76450272516324, "grad_norm": 0.8895798914935643, "learning_rate": 2.1763954487970894e-06, "loss": 0.3446, "step": 14167 }, { "epoch": 0.7645566887917543, "grad_norm": 0.989852874801464, "learning_rate": 2.175881179265054e-06, "loss": 0.4102, "step": 14168 }, { "epoch": 0.7646106524202687, "grad_norm": 1.064101463955328, "learning_rate": 2.175367005271553e-06, "loss": 0.6475, "step": 14169 }, { "epoch": 0.7646646160487831, "grad_norm": 0.9220361048747693, "learning_rate": 2.1748529268313627e-06, "loss": 0.4591, "step": 14170 }, { "epoch": 0.7647185796772975, "grad_norm": 1.2082277028305115, "learning_rate": 2.1743389439592593e-06, "loss": 0.5257, "step": 14171 }, { "epoch": 0.7647725433058119, "grad_norm": 0.9730970180234755, "learning_rate": 2.1738250566700154e-06, "loss": 0.5218, "step": 14172 }, { "epoch": 0.7648265069343263, "grad_norm": 1.1175320014981787, "learning_rate": 2.1733112649784e-06, "loss": 0.5523, "step": 14173 }, { "epoch": 0.7648804705628407, "grad_norm": 1.141492548903176, "learning_rate": 2.1727975688991804e-06, "loss": 0.5721, "step": 14174 }, { "epoch": 0.764934434191355, "grad_norm": 1.061004409578363, "learning_rate": 2.172283968447122e-06, "loss": 0.4685, "step": 14175 }, { "epoch": 0.7649883978198694, "grad_norm": 1.0558149381490756, "learning_rate": 2.1717704636369826e-06, "loss": 0.5687, "step": 14176 }, { "epoch": 0.7650423614483838, "grad_norm": 0.9978846676182903, "learning_rate": 2.171257054483524e-06, "loss": 0.3818, "step": 14177 }, { "epoch": 0.7650963250768982, "grad_norm": 1.075598428910838, "learning_rate": 2.170743741001501e-06, "loss": 0.4331, "step": 14178 }, { "epoch": 0.7651502887054126, "grad_norm": 1.0637113607872113, "learning_rate": 2.1702305232056662e-06, "loss": 0.4469, "step": 14179 }, { "epoch": 0.765204252333927, "grad_norm": 1.0536867574106419, "learning_rate": 2.1697174011107712e-06, "loss": 0.4384, "step": 14180 }, { "epoch": 0.7652582159624414, "grad_norm": 1.0466022976721183, "learning_rate": 2.1692043747315628e-06, "loss": 0.4576, "step": 14181 }, { "epoch": 0.7653121795909557, "grad_norm": 0.975279154385196, "learning_rate": 2.1686914440827877e-06, "loss": 0.3916, "step": 14182 }, { "epoch": 0.76536614321947, "grad_norm": 1.050325752292318, "learning_rate": 2.168178609179185e-06, "loss": 0.4562, "step": 14183 }, { "epoch": 0.7654201068479844, "grad_norm": 1.3308253921321456, "learning_rate": 2.167665870035496e-06, "loss": 0.5308, "step": 14184 }, { "epoch": 0.7654740704764988, "grad_norm": 1.1650281575672303, "learning_rate": 2.1671532266664562e-06, "loss": 0.6941, "step": 14185 }, { "epoch": 0.7655280341050132, "grad_norm": 1.0050826948381852, "learning_rate": 2.166640679086801e-06, "loss": 0.3894, "step": 14186 }, { "epoch": 0.7655819977335276, "grad_norm": 1.0356948220090099, "learning_rate": 2.1661282273112604e-06, "loss": 0.4736, "step": 14187 }, { "epoch": 0.765635961362042, "grad_norm": 0.9600368350789804, "learning_rate": 2.1656158713545635e-06, "loss": 0.401, "step": 14188 }, { "epoch": 0.7656899249905563, "grad_norm": 1.0799072749872123, "learning_rate": 2.165103611231437e-06, "loss": 0.4584, "step": 14189 }, { "epoch": 0.7657438886190707, "grad_norm": 1.3311947456817206, "learning_rate": 2.1645914469566006e-06, "loss": 0.5596, "step": 14190 }, { "epoch": 0.7657978522475851, "grad_norm": 0.9389451810837417, "learning_rate": 2.1640793785447766e-06, "loss": 0.4406, "step": 14191 }, { "epoch": 0.7658518158760995, "grad_norm": 0.887318733688726, "learning_rate": 2.1635674060106816e-06, "loss": 0.3229, "step": 14192 }, { "epoch": 0.7659057795046139, "grad_norm": 1.2113104262460652, "learning_rate": 2.1630555293690307e-06, "loss": 0.7512, "step": 14193 }, { "epoch": 0.7659597431331283, "grad_norm": 0.8703942727321965, "learning_rate": 2.1625437486345356e-06, "loss": 0.399, "step": 14194 }, { "epoch": 0.7660137067616426, "grad_norm": 1.2510142177818817, "learning_rate": 2.162032063821907e-06, "loss": 0.5029, "step": 14195 }, { "epoch": 0.766067670390157, "grad_norm": 1.1203145092525542, "learning_rate": 2.1615204749458486e-06, "loss": 0.4734, "step": 14196 }, { "epoch": 0.7661216340186714, "grad_norm": 1.197828515284275, "learning_rate": 2.1610089820210643e-06, "loss": 0.5636, "step": 14197 }, { "epoch": 0.7661755976471858, "grad_norm": 1.096667298324891, "learning_rate": 2.160497585062256e-06, "loss": 0.4598, "step": 14198 }, { "epoch": 0.7662295612757002, "grad_norm": 1.0165928435783191, "learning_rate": 2.1599862840841213e-06, "loss": 0.4308, "step": 14199 }, { "epoch": 0.7662835249042146, "grad_norm": 0.9981656631074851, "learning_rate": 2.1594750791013557e-06, "loss": 0.5132, "step": 14200 }, { "epoch": 0.766337488532729, "grad_norm": 1.1228662329921573, "learning_rate": 2.158963970128652e-06, "loss": 0.4867, "step": 14201 }, { "epoch": 0.7663914521612433, "grad_norm": 0.9518765215134315, "learning_rate": 2.1584529571807006e-06, "loss": 0.5246, "step": 14202 }, { "epoch": 0.7664454157897577, "grad_norm": 0.8835602761469125, "learning_rate": 2.157942040272186e-06, "loss": 0.3645, "step": 14203 }, { "epoch": 0.7664993794182721, "grad_norm": 0.7841990020267345, "learning_rate": 2.1574312194177945e-06, "loss": 0.3968, "step": 14204 }, { "epoch": 0.7665533430467865, "grad_norm": 1.0091223711030786, "learning_rate": 2.1569204946322072e-06, "loss": 0.5128, "step": 14205 }, { "epoch": 0.7666073066753009, "grad_norm": 0.8440343387684025, "learning_rate": 2.156409865930103e-06, "loss": 0.2964, "step": 14206 }, { "epoch": 0.7666612703038153, "grad_norm": 1.1542418756099928, "learning_rate": 2.1558993333261572e-06, "loss": 0.5922, "step": 14207 }, { "epoch": 0.7667152339323297, "grad_norm": 0.9867860427489499, "learning_rate": 2.1553888968350437e-06, "loss": 0.4621, "step": 14208 }, { "epoch": 0.7667691975608439, "grad_norm": 0.9787632574096162, "learning_rate": 2.1548785564714344e-06, "loss": 0.4274, "step": 14209 }, { "epoch": 0.7668231611893583, "grad_norm": 0.9783912784166661, "learning_rate": 2.154368312249994e-06, "loss": 0.5039, "step": 14210 }, { "epoch": 0.7668771248178727, "grad_norm": 1.0302440891054205, "learning_rate": 2.1538581641853885e-06, "loss": 0.4108, "step": 14211 }, { "epoch": 0.7669310884463871, "grad_norm": 1.3470618513672132, "learning_rate": 2.1533481122922804e-06, "loss": 0.595, "step": 14212 }, { "epoch": 0.7669850520749015, "grad_norm": 0.9578399120610472, "learning_rate": 2.1528381565853297e-06, "loss": 0.4495, "step": 14213 }, { "epoch": 0.7670390157034159, "grad_norm": 0.8629556371905172, "learning_rate": 2.1523282970791914e-06, "loss": 0.3305, "step": 14214 }, { "epoch": 0.7670929793319303, "grad_norm": 1.0093581037235846, "learning_rate": 2.1518185337885213e-06, "loss": 0.4366, "step": 14215 }, { "epoch": 0.7671469429604446, "grad_norm": 0.8955786521217854, "learning_rate": 2.1513088667279696e-06, "loss": 0.3984, "step": 14216 }, { "epoch": 0.767200906588959, "grad_norm": 0.9767114827414054, "learning_rate": 2.150799295912184e-06, "loss": 0.5098, "step": 14217 }, { "epoch": 0.7672548702174734, "grad_norm": 1.1021459671390357, "learning_rate": 2.1502898213558117e-06, "loss": 0.4459, "step": 14218 }, { "epoch": 0.7673088338459878, "grad_norm": 1.1719967600501304, "learning_rate": 2.1497804430734937e-06, "loss": 0.6049, "step": 14219 }, { "epoch": 0.7673627974745022, "grad_norm": 1.0085223728680084, "learning_rate": 2.149271161079871e-06, "loss": 0.4297, "step": 14220 }, { "epoch": 0.7674167611030166, "grad_norm": 0.8556113453301768, "learning_rate": 2.1487619753895817e-06, "loss": 0.4203, "step": 14221 }, { "epoch": 0.767470724731531, "grad_norm": 1.030769886544714, "learning_rate": 2.1482528860172585e-06, "loss": 0.4516, "step": 14222 }, { "epoch": 0.7675246883600453, "grad_norm": 1.080861895240368, "learning_rate": 2.147743892977536e-06, "loss": 0.356, "step": 14223 }, { "epoch": 0.7675786519885597, "grad_norm": 1.0128758835186016, "learning_rate": 2.147234996285039e-06, "loss": 0.4541, "step": 14224 }, { "epoch": 0.7676326156170741, "grad_norm": 0.929413556309162, "learning_rate": 2.146726195954397e-06, "loss": 0.3513, "step": 14225 }, { "epoch": 0.7676865792455885, "grad_norm": 0.9619793196371735, "learning_rate": 2.1462174920002315e-06, "loss": 0.4065, "step": 14226 }, { "epoch": 0.7677405428741029, "grad_norm": 1.0191993280076568, "learning_rate": 2.1457088844371644e-06, "loss": 0.6887, "step": 14227 }, { "epoch": 0.7677945065026173, "grad_norm": 0.9106749451473213, "learning_rate": 2.1452003732798133e-06, "loss": 0.3279, "step": 14228 }, { "epoch": 0.7678484701311317, "grad_norm": 1.0253190116067716, "learning_rate": 2.144691958542793e-06, "loss": 0.4967, "step": 14229 }, { "epoch": 0.767902433759646, "grad_norm": 1.1024949043569654, "learning_rate": 2.1441836402407175e-06, "loss": 0.4994, "step": 14230 }, { "epoch": 0.7679563973881604, "grad_norm": 1.121303841238638, "learning_rate": 2.1436754183881934e-06, "loss": 0.4662, "step": 14231 }, { "epoch": 0.7680103610166747, "grad_norm": 1.0286104271549372, "learning_rate": 2.1431672929998297e-06, "loss": 0.4208, "step": 14232 }, { "epoch": 0.7680643246451891, "grad_norm": 1.0944492165295847, "learning_rate": 2.142659264090229e-06, "loss": 0.4691, "step": 14233 }, { "epoch": 0.7681182882737035, "grad_norm": 1.1564859558490694, "learning_rate": 2.1421513316739945e-06, "loss": 0.4118, "step": 14234 }, { "epoch": 0.7681722519022179, "grad_norm": 1.0483422017477237, "learning_rate": 2.1416434957657227e-06, "loss": 0.4086, "step": 14235 }, { "epoch": 0.7682262155307323, "grad_norm": 0.9536361341713236, "learning_rate": 2.1411357563800106e-06, "loss": 0.4085, "step": 14236 }, { "epoch": 0.7682801791592466, "grad_norm": 0.9689108930763332, "learning_rate": 2.1406281135314523e-06, "loss": 0.3889, "step": 14237 }, { "epoch": 0.768334142787761, "grad_norm": 1.0006025609323963, "learning_rate": 2.1401205672346347e-06, "loss": 0.3848, "step": 14238 }, { "epoch": 0.7683881064162754, "grad_norm": 1.306741109114828, "learning_rate": 2.1396131175041467e-06, "loss": 0.671, "step": 14239 }, { "epoch": 0.7684420700447898, "grad_norm": 1.0370096571887635, "learning_rate": 2.1391057643545736e-06, "loss": 0.5284, "step": 14240 }, { "epoch": 0.7684960336733042, "grad_norm": 0.9081107053821491, "learning_rate": 2.1385985078004965e-06, "loss": 0.4168, "step": 14241 }, { "epoch": 0.7685499973018186, "grad_norm": 1.0336358240987997, "learning_rate": 2.138091347856495e-06, "loss": 0.3939, "step": 14242 }, { "epoch": 0.768603960930333, "grad_norm": 0.9168266007324878, "learning_rate": 2.137584284537145e-06, "loss": 0.4293, "step": 14243 }, { "epoch": 0.7686579245588473, "grad_norm": 0.7869110937545207, "learning_rate": 2.1370773178570216e-06, "loss": 0.3932, "step": 14244 }, { "epoch": 0.7687118881873617, "grad_norm": 1.1559919219411525, "learning_rate": 2.136570447830692e-06, "loss": 0.5533, "step": 14245 }, { "epoch": 0.7687658518158761, "grad_norm": 0.9963454711940187, "learning_rate": 2.136063674472727e-06, "loss": 0.4934, "step": 14246 }, { "epoch": 0.7688198154443905, "grad_norm": 0.8039381481472839, "learning_rate": 2.135556997797691e-06, "loss": 0.3326, "step": 14247 }, { "epoch": 0.7688737790729049, "grad_norm": 0.896315153703183, "learning_rate": 2.135050417820146e-06, "loss": 0.3246, "step": 14248 }, { "epoch": 0.7689277427014193, "grad_norm": 0.9294680108089669, "learning_rate": 2.1345439345546524e-06, "loss": 0.4219, "step": 14249 }, { "epoch": 0.7689817063299337, "grad_norm": 1.040677759763582, "learning_rate": 2.1340375480157667e-06, "loss": 0.4349, "step": 14250 }, { "epoch": 0.769035669958448, "grad_norm": 1.2527667249799772, "learning_rate": 2.1335312582180433e-06, "loss": 0.7291, "step": 14251 }, { "epoch": 0.7690896335869624, "grad_norm": 1.38159144242021, "learning_rate": 2.1330250651760333e-06, "loss": 0.5498, "step": 14252 }, { "epoch": 0.7691435972154768, "grad_norm": 0.9098150124832253, "learning_rate": 2.1325189689042836e-06, "loss": 0.3961, "step": 14253 }, { "epoch": 0.7691975608439912, "grad_norm": 0.9004359139429029, "learning_rate": 2.132012969417342e-06, "loss": 0.4398, "step": 14254 }, { "epoch": 0.7692515244725056, "grad_norm": 0.9673873151897715, "learning_rate": 2.131507066729751e-06, "loss": 0.4435, "step": 14255 }, { "epoch": 0.76930548810102, "grad_norm": 0.9783466807608096, "learning_rate": 2.13100126085605e-06, "loss": 0.3887, "step": 14256 }, { "epoch": 0.7693594517295343, "grad_norm": 1.2669337579433886, "learning_rate": 2.1304955518107772e-06, "loss": 0.5495, "step": 14257 }, { "epoch": 0.7694134153580486, "grad_norm": 0.9665587734335788, "learning_rate": 2.129989939608468e-06, "loss": 0.4791, "step": 14258 }, { "epoch": 0.769467378986563, "grad_norm": 0.9343944605128095, "learning_rate": 2.129484424263652e-06, "loss": 0.4635, "step": 14259 }, { "epoch": 0.7695213426150774, "grad_norm": 1.102448937766551, "learning_rate": 2.1289790057908592e-06, "loss": 0.4225, "step": 14260 }, { "epoch": 0.7695753062435918, "grad_norm": 1.1233919853113272, "learning_rate": 2.128473684204616e-06, "loss": 0.5577, "step": 14261 }, { "epoch": 0.7696292698721062, "grad_norm": 1.073993698625925, "learning_rate": 2.1279684595194453e-06, "loss": 0.4473, "step": 14262 }, { "epoch": 0.7696832335006206, "grad_norm": 1.1432879563612997, "learning_rate": 2.127463331749869e-06, "loss": 0.4986, "step": 14263 }, { "epoch": 0.7697371971291349, "grad_norm": 0.9694714796635561, "learning_rate": 2.1269583009104038e-06, "loss": 0.4473, "step": 14264 }, { "epoch": 0.7697911607576493, "grad_norm": 1.0396245757403264, "learning_rate": 2.1264533670155657e-06, "loss": 0.507, "step": 14265 }, { "epoch": 0.7698451243861637, "grad_norm": 1.0396770391116028, "learning_rate": 2.1259485300798675e-06, "loss": 0.4667, "step": 14266 }, { "epoch": 0.7698990880146781, "grad_norm": 0.8489437717516952, "learning_rate": 2.125443790117816e-06, "loss": 0.3607, "step": 14267 }, { "epoch": 0.7699530516431925, "grad_norm": 0.691182175608516, "learning_rate": 2.1249391471439206e-06, "loss": 0.2762, "step": 14268 }, { "epoch": 0.7700070152717069, "grad_norm": 1.079948133310876, "learning_rate": 2.124434601172684e-06, "loss": 0.5946, "step": 14269 }, { "epoch": 0.7700609789002213, "grad_norm": 1.115839397569839, "learning_rate": 2.123930152218607e-06, "loss": 0.4393, "step": 14270 }, { "epoch": 0.7701149425287356, "grad_norm": 0.9530309149603373, "learning_rate": 2.12342580029619e-06, "loss": 0.51, "step": 14271 }, { "epoch": 0.77016890615725, "grad_norm": 0.844352729393124, "learning_rate": 2.1229215454199265e-06, "loss": 0.3533, "step": 14272 }, { "epoch": 0.7702228697857644, "grad_norm": 0.9673428916122069, "learning_rate": 2.1224173876043104e-06, "loss": 0.4245, "step": 14273 }, { "epoch": 0.7702768334142788, "grad_norm": 1.0104721342411072, "learning_rate": 2.1219133268638307e-06, "loss": 0.4977, "step": 14274 }, { "epoch": 0.7703307970427932, "grad_norm": 1.2013452055612124, "learning_rate": 2.121409363212976e-06, "loss": 0.6165, "step": 14275 }, { "epoch": 0.7703847606713076, "grad_norm": 0.971873228303481, "learning_rate": 2.1209054966662303e-06, "loss": 0.4357, "step": 14276 }, { "epoch": 0.770438724299822, "grad_norm": 1.0110038025001873, "learning_rate": 2.1204017272380744e-06, "loss": 0.439, "step": 14277 }, { "epoch": 0.7704926879283363, "grad_norm": 1.066893556705141, "learning_rate": 2.119898054942989e-06, "loss": 0.4634, "step": 14278 }, { "epoch": 0.7705466515568506, "grad_norm": 0.8869175953016482, "learning_rate": 2.1193944797954473e-06, "loss": 0.4061, "step": 14279 }, { "epoch": 0.770600615185365, "grad_norm": 0.9208767090581513, "learning_rate": 2.1188910018099245e-06, "loss": 0.3859, "step": 14280 }, { "epoch": 0.7706545788138794, "grad_norm": 0.9867387224768924, "learning_rate": 2.118387621000891e-06, "loss": 0.4175, "step": 14281 }, { "epoch": 0.7707085424423938, "grad_norm": 0.9658982801667303, "learning_rate": 2.1178843373828135e-06, "loss": 0.4617, "step": 14282 }, { "epoch": 0.7707625060709082, "grad_norm": 1.0681469412889457, "learning_rate": 2.1173811509701575e-06, "loss": 0.4099, "step": 14283 }, { "epoch": 0.7708164696994226, "grad_norm": 1.1178953577981978, "learning_rate": 2.1168780617773856e-06, "loss": 0.4394, "step": 14284 }, { "epoch": 0.7708704333279369, "grad_norm": 0.8958802081916195, "learning_rate": 2.116375069818957e-06, "loss": 0.4143, "step": 14285 }, { "epoch": 0.7709243969564513, "grad_norm": 1.1924630492388575, "learning_rate": 2.1158721751093277e-06, "loss": 0.5219, "step": 14286 }, { "epoch": 0.7709783605849657, "grad_norm": 1.1741886962966726, "learning_rate": 2.115369377662951e-06, "loss": 0.5439, "step": 14287 }, { "epoch": 0.7710323242134801, "grad_norm": 1.0216582429965113, "learning_rate": 2.1148666774942785e-06, "loss": 0.3303, "step": 14288 }, { "epoch": 0.7710862878419945, "grad_norm": 1.236522005946748, "learning_rate": 2.1143640746177576e-06, "loss": 0.5081, "step": 14289 }, { "epoch": 0.7711402514705089, "grad_norm": 0.7961106974665461, "learning_rate": 2.113861569047834e-06, "loss": 0.3337, "step": 14290 }, { "epoch": 0.7711942150990233, "grad_norm": 1.0452259180502994, "learning_rate": 2.113359160798951e-06, "loss": 0.615, "step": 14291 }, { "epoch": 0.7712481787275376, "grad_norm": 0.9544171447792474, "learning_rate": 2.112856849885549e-06, "loss": 0.3842, "step": 14292 }, { "epoch": 0.771302142356052, "grad_norm": 0.9542611324505222, "learning_rate": 2.112354636322062e-06, "loss": 0.5281, "step": 14293 }, { "epoch": 0.7713561059845664, "grad_norm": 1.19495098040227, "learning_rate": 2.1118525201229254e-06, "loss": 0.7226, "step": 14294 }, { "epoch": 0.7714100696130808, "grad_norm": 0.9837780138903477, "learning_rate": 2.1113505013025713e-06, "loss": 0.4383, "step": 14295 }, { "epoch": 0.7714640332415952, "grad_norm": 1.0560853040044587, "learning_rate": 2.1108485798754275e-06, "loss": 0.5338, "step": 14296 }, { "epoch": 0.7715179968701096, "grad_norm": 1.1480644460917355, "learning_rate": 2.1103467558559203e-06, "loss": 0.6061, "step": 14297 }, { "epoch": 0.771571960498624, "grad_norm": 1.0340674054706762, "learning_rate": 2.109845029258472e-06, "loss": 0.4852, "step": 14298 }, { "epoch": 0.7716259241271383, "grad_norm": 0.9653277987224296, "learning_rate": 2.1093434000975047e-06, "loss": 0.3485, "step": 14299 }, { "epoch": 0.7716798877556527, "grad_norm": 1.0325467132311257, "learning_rate": 2.1088418683874327e-06, "loss": 0.4124, "step": 14300 }, { "epoch": 0.7717338513841671, "grad_norm": 1.0821064158381222, "learning_rate": 2.108340434142672e-06, "loss": 0.4119, "step": 14301 }, { "epoch": 0.7717878150126815, "grad_norm": 1.0884120212675803, "learning_rate": 2.1078390973776338e-06, "loss": 0.4339, "step": 14302 }, { "epoch": 0.7718417786411959, "grad_norm": 1.0849884438670336, "learning_rate": 2.107337858106728e-06, "loss": 0.4853, "step": 14303 }, { "epoch": 0.7718957422697102, "grad_norm": 1.1094765971279958, "learning_rate": 2.1068367163443596e-06, "loss": 0.5364, "step": 14304 }, { "epoch": 0.7719497058982246, "grad_norm": 1.1314439579594944, "learning_rate": 2.1063356721049333e-06, "loss": 0.5238, "step": 14305 }, { "epoch": 0.7720036695267389, "grad_norm": 0.8591360438066771, "learning_rate": 2.105834725402849e-06, "loss": 0.4131, "step": 14306 }, { "epoch": 0.7720576331552533, "grad_norm": 1.2300666986408573, "learning_rate": 2.1053338762525044e-06, "loss": 0.4682, "step": 14307 }, { "epoch": 0.7721115967837677, "grad_norm": 1.1913549131930028, "learning_rate": 2.104833124668293e-06, "loss": 0.5309, "step": 14308 }, { "epoch": 0.7721655604122821, "grad_norm": 0.9755711569933762, "learning_rate": 2.104332470664609e-06, "loss": 0.4183, "step": 14309 }, { "epoch": 0.7722195240407965, "grad_norm": 0.7273154489952732, "learning_rate": 2.1038319142558404e-06, "loss": 0.3599, "step": 14310 }, { "epoch": 0.7722734876693109, "grad_norm": 0.934213678997547, "learning_rate": 2.1033314554563743e-06, "loss": 0.3613, "step": 14311 }, { "epoch": 0.7723274512978253, "grad_norm": 0.9869528944148045, "learning_rate": 2.1028310942805944e-06, "loss": 0.4968, "step": 14312 }, { "epoch": 0.7723814149263396, "grad_norm": 1.1493784909207156, "learning_rate": 2.1023308307428826e-06, "loss": 0.5266, "step": 14313 }, { "epoch": 0.772435378554854, "grad_norm": 0.9387360642326492, "learning_rate": 2.1018306648576143e-06, "loss": 0.3935, "step": 14314 }, { "epoch": 0.7724893421833684, "grad_norm": 1.062510820733369, "learning_rate": 2.1013305966391665e-06, "loss": 0.4395, "step": 14315 }, { "epoch": 0.7725433058118828, "grad_norm": 0.8262267227732525, "learning_rate": 2.100830626101912e-06, "loss": 0.3324, "step": 14316 }, { "epoch": 0.7725972694403972, "grad_norm": 1.2435848776817182, "learning_rate": 2.100330753260219e-06, "loss": 0.5007, "step": 14317 }, { "epoch": 0.7726512330689116, "grad_norm": 0.9338430535118151, "learning_rate": 2.0998309781284552e-06, "loss": 0.3908, "step": 14318 }, { "epoch": 0.772705196697426, "grad_norm": 1.1663077577933865, "learning_rate": 2.0993313007209855e-06, "loss": 0.5217, "step": 14319 }, { "epoch": 0.7727591603259403, "grad_norm": 1.3257649393710185, "learning_rate": 2.09883172105217e-06, "loss": 0.5353, "step": 14320 }, { "epoch": 0.7728131239544547, "grad_norm": 1.0987590163461898, "learning_rate": 2.0983322391363677e-06, "loss": 0.5294, "step": 14321 }, { "epoch": 0.7728670875829691, "grad_norm": 1.2388604233043665, "learning_rate": 2.0978328549879335e-06, "loss": 0.5069, "step": 14322 }, { "epoch": 0.7729210512114835, "grad_norm": 0.9706719423805242, "learning_rate": 2.0973335686212208e-06, "loss": 0.4075, "step": 14323 }, { "epoch": 0.7729750148399979, "grad_norm": 1.058463769787896, "learning_rate": 2.09683438005058e-06, "loss": 0.4506, "step": 14324 }, { "epoch": 0.7730289784685123, "grad_norm": 1.0994357133679753, "learning_rate": 2.096335289290357e-06, "loss": 0.5042, "step": 14325 }, { "epoch": 0.7730829420970265, "grad_norm": 1.0013080357516266, "learning_rate": 2.095836296354899e-06, "loss": 0.5048, "step": 14326 }, { "epoch": 0.773136905725541, "grad_norm": 0.987420812848804, "learning_rate": 2.095337401258544e-06, "loss": 0.3857, "step": 14327 }, { "epoch": 0.7731908693540553, "grad_norm": 0.836910579882509, "learning_rate": 2.0948386040156317e-06, "loss": 0.3548, "step": 14328 }, { "epoch": 0.7732448329825697, "grad_norm": 0.8372340179199624, "learning_rate": 2.0943399046404987e-06, "loss": 0.4051, "step": 14329 }, { "epoch": 0.7732987966110841, "grad_norm": 1.1063531799983626, "learning_rate": 2.093841303147479e-06, "loss": 0.5324, "step": 14330 }, { "epoch": 0.7733527602395985, "grad_norm": 1.1825221131395094, "learning_rate": 2.0933427995509008e-06, "loss": 0.5282, "step": 14331 }, { "epoch": 0.7734067238681129, "grad_norm": 0.8861686125751678, "learning_rate": 2.0928443938650926e-06, "loss": 0.3157, "step": 14332 }, { "epoch": 0.7734606874966272, "grad_norm": 1.079579262303941, "learning_rate": 2.092346086104381e-06, "loss": 0.5429, "step": 14333 }, { "epoch": 0.7735146511251416, "grad_norm": 1.0186039028542024, "learning_rate": 2.0918478762830852e-06, "loss": 0.4188, "step": 14334 }, { "epoch": 0.773568614753656, "grad_norm": 1.0202251089662961, "learning_rate": 2.091349764415524e-06, "loss": 0.3858, "step": 14335 }, { "epoch": 0.7736225783821704, "grad_norm": 1.0658978603771039, "learning_rate": 2.0908517505160157e-06, "loss": 0.5141, "step": 14336 }, { "epoch": 0.7736765420106848, "grad_norm": 1.1478683205385043, "learning_rate": 2.0903538345988726e-06, "loss": 0.4743, "step": 14337 }, { "epoch": 0.7737305056391992, "grad_norm": 0.9234933914379745, "learning_rate": 2.0898560166784054e-06, "loss": 0.4121, "step": 14338 }, { "epoch": 0.7737844692677136, "grad_norm": 0.7353720006650354, "learning_rate": 2.089358296768922e-06, "loss": 0.3007, "step": 14339 }, { "epoch": 0.7738384328962279, "grad_norm": 1.160203013691209, "learning_rate": 2.088860674884729e-06, "loss": 0.4961, "step": 14340 }, { "epoch": 0.7738923965247423, "grad_norm": 1.1901919205743785, "learning_rate": 2.088363151040125e-06, "loss": 0.5558, "step": 14341 }, { "epoch": 0.7739463601532567, "grad_norm": 1.0757500728856886, "learning_rate": 2.087865725249411e-06, "loss": 0.5013, "step": 14342 }, { "epoch": 0.7740003237817711, "grad_norm": 1.2253694724614346, "learning_rate": 2.0873683975268847e-06, "loss": 0.3725, "step": 14343 }, { "epoch": 0.7740542874102855, "grad_norm": 1.028363710697949, "learning_rate": 2.086871167886839e-06, "loss": 0.3879, "step": 14344 }, { "epoch": 0.7741082510387999, "grad_norm": 1.142734041857299, "learning_rate": 2.0863740363435632e-06, "loss": 0.4855, "step": 14345 }, { "epoch": 0.7741622146673143, "grad_norm": 1.2561496969639887, "learning_rate": 2.0858770029113478e-06, "loss": 0.4657, "step": 14346 }, { "epoch": 0.7742161782958286, "grad_norm": 0.9144290021262295, "learning_rate": 2.0853800676044785e-06, "loss": 0.3505, "step": 14347 }, { "epoch": 0.774270141924343, "grad_norm": 1.0579784536567547, "learning_rate": 2.0848832304372347e-06, "loss": 0.5187, "step": 14348 }, { "epoch": 0.7743241055528574, "grad_norm": 1.054784929562731, "learning_rate": 2.0843864914238978e-06, "loss": 0.5176, "step": 14349 }, { "epoch": 0.7743780691813718, "grad_norm": 0.9262335345117498, "learning_rate": 2.083889850578744e-06, "loss": 0.4082, "step": 14350 }, { "epoch": 0.7744320328098862, "grad_norm": 0.9470698706197943, "learning_rate": 2.0833933079160473e-06, "loss": 0.3342, "step": 14351 }, { "epoch": 0.7744859964384005, "grad_norm": 1.1861652098478488, "learning_rate": 2.08289686345008e-06, "loss": 0.438, "step": 14352 }, { "epoch": 0.774539960066915, "grad_norm": 1.1219133485829607, "learning_rate": 2.082400517195109e-06, "loss": 0.4311, "step": 14353 }, { "epoch": 0.7745939236954292, "grad_norm": 1.0150602469393966, "learning_rate": 2.0819042691654023e-06, "loss": 0.4004, "step": 14354 }, { "epoch": 0.7746478873239436, "grad_norm": 1.3054476800728159, "learning_rate": 2.081408119375219e-06, "loss": 0.3654, "step": 14355 }, { "epoch": 0.774701850952458, "grad_norm": 0.9796761624209479, "learning_rate": 2.0809120678388202e-06, "loss": 0.555, "step": 14356 }, { "epoch": 0.7747558145809724, "grad_norm": 1.0953217420802197, "learning_rate": 2.080416114570464e-06, "loss": 0.4251, "step": 14357 }, { "epoch": 0.7748097782094868, "grad_norm": 1.0333740031127727, "learning_rate": 2.0799202595844033e-06, "loss": 0.3472, "step": 14358 }, { "epoch": 0.7748637418380012, "grad_norm": 1.0158728755176933, "learning_rate": 2.0794245028948904e-06, "loss": 0.355, "step": 14359 }, { "epoch": 0.7749177054665156, "grad_norm": 0.8097227111131159, "learning_rate": 2.0789288445161736e-06, "loss": 0.3196, "step": 14360 }, { "epoch": 0.7749716690950299, "grad_norm": 1.020862447306454, "learning_rate": 2.0784332844624994e-06, "loss": 0.396, "step": 14361 }, { "epoch": 0.7750256327235443, "grad_norm": 1.0765236816529666, "learning_rate": 2.077937822748109e-06, "loss": 0.4822, "step": 14362 }, { "epoch": 0.7750795963520587, "grad_norm": 1.1305948413338578, "learning_rate": 2.077442459387243e-06, "loss": 0.5108, "step": 14363 }, { "epoch": 0.7751335599805731, "grad_norm": 1.0551246459773433, "learning_rate": 2.076947194394139e-06, "loss": 0.5606, "step": 14364 }, { "epoch": 0.7751875236090875, "grad_norm": 1.1311752063643037, "learning_rate": 2.0764520277830312e-06, "loss": 0.6118, "step": 14365 }, { "epoch": 0.7752414872376019, "grad_norm": 1.0499145864352535, "learning_rate": 2.075956959568152e-06, "loss": 0.4381, "step": 14366 }, { "epoch": 0.7752954508661163, "grad_norm": 0.9496719552405049, "learning_rate": 2.075461989763729e-06, "loss": 0.3996, "step": 14367 }, { "epoch": 0.7753494144946306, "grad_norm": 1.276578970790503, "learning_rate": 2.074967118383989e-06, "loss": 0.5822, "step": 14368 }, { "epoch": 0.775403378123145, "grad_norm": 0.8687332110688493, "learning_rate": 2.0744723454431547e-06, "loss": 0.362, "step": 14369 }, { "epoch": 0.7754573417516594, "grad_norm": 1.0007149470769077, "learning_rate": 2.073977670955446e-06, "loss": 0.4037, "step": 14370 }, { "epoch": 0.7755113053801738, "grad_norm": 1.0360504846959904, "learning_rate": 2.073483094935081e-06, "loss": 0.3986, "step": 14371 }, { "epoch": 0.7755652690086882, "grad_norm": 0.9598494425256857, "learning_rate": 2.072988617396276e-06, "loss": 0.4312, "step": 14372 }, { "epoch": 0.7756192326372026, "grad_norm": 1.0283033312013563, "learning_rate": 2.0724942383532387e-06, "loss": 0.4248, "step": 14373 }, { "epoch": 0.775673196265717, "grad_norm": 1.0725744464930982, "learning_rate": 2.0719999578201803e-06, "loss": 0.5608, "step": 14374 }, { "epoch": 0.7757271598942312, "grad_norm": 0.8964152460758551, "learning_rate": 2.071505775811308e-06, "loss": 0.313, "step": 14375 }, { "epoch": 0.7757811235227456, "grad_norm": 0.8387030913175174, "learning_rate": 2.0710116923408226e-06, "loss": 0.2842, "step": 14376 }, { "epoch": 0.77583508715126, "grad_norm": 1.0972072867608762, "learning_rate": 2.0705177074229264e-06, "loss": 0.5419, "step": 14377 }, { "epoch": 0.7758890507797744, "grad_norm": 0.8053923197315072, "learning_rate": 2.0700238210718165e-06, "loss": 0.4089, "step": 14378 }, { "epoch": 0.7759430144082888, "grad_norm": 1.1671393645957278, "learning_rate": 2.069530033301688e-06, "loss": 0.4847, "step": 14379 }, { "epoch": 0.7759969780368032, "grad_norm": 0.9685300367127977, "learning_rate": 2.0690363441267324e-06, "loss": 0.4278, "step": 14380 }, { "epoch": 0.7760509416653176, "grad_norm": 1.05425181815556, "learning_rate": 2.0685427535611387e-06, "loss": 0.4749, "step": 14381 }, { "epoch": 0.7761049052938319, "grad_norm": 1.0113027903349576, "learning_rate": 2.068049261619095e-06, "loss": 0.5165, "step": 14382 }, { "epoch": 0.7761588689223463, "grad_norm": 1.1259911360388273, "learning_rate": 2.0675558683147823e-06, "loss": 0.5087, "step": 14383 }, { "epoch": 0.7762128325508607, "grad_norm": 0.7211926499314123, "learning_rate": 2.0670625736623816e-06, "loss": 0.2655, "step": 14384 }, { "epoch": 0.7762667961793751, "grad_norm": 0.9835995575578486, "learning_rate": 2.0665693776760717e-06, "loss": 0.3718, "step": 14385 }, { "epoch": 0.7763207598078895, "grad_norm": 0.8223119645906154, "learning_rate": 2.0660762803700267e-06, "loss": 0.3696, "step": 14386 }, { "epoch": 0.7763747234364039, "grad_norm": 1.0760840311062059, "learning_rate": 2.0655832817584193e-06, "loss": 0.5231, "step": 14387 }, { "epoch": 0.7764286870649183, "grad_norm": 1.0057243395012716, "learning_rate": 2.0650903818554187e-06, "loss": 0.4305, "step": 14388 }, { "epoch": 0.7764826506934326, "grad_norm": 1.0719124799698891, "learning_rate": 2.0645975806751926e-06, "loss": 0.4625, "step": 14389 }, { "epoch": 0.776536614321947, "grad_norm": 1.0455960047342394, "learning_rate": 2.064104878231902e-06, "loss": 0.4216, "step": 14390 }, { "epoch": 0.7765905779504614, "grad_norm": 0.9128095241420763, "learning_rate": 2.063612274539709e-06, "loss": 0.4446, "step": 14391 }, { "epoch": 0.7766445415789758, "grad_norm": 0.7802035930995922, "learning_rate": 2.063119769612771e-06, "loss": 0.3446, "step": 14392 }, { "epoch": 0.7766985052074902, "grad_norm": 1.161108802392436, "learning_rate": 2.0626273634652437e-06, "loss": 0.5247, "step": 14393 }, { "epoch": 0.7767524688360046, "grad_norm": 0.9272204876071786, "learning_rate": 2.062135056111279e-06, "loss": 0.3808, "step": 14394 }, { "epoch": 0.7768064324645189, "grad_norm": 1.2133573149954733, "learning_rate": 2.061642847565028e-06, "loss": 0.4893, "step": 14395 }, { "epoch": 0.7768603960930333, "grad_norm": 1.2200092919907302, "learning_rate": 2.0611507378406346e-06, "loss": 0.5801, "step": 14396 }, { "epoch": 0.7769143597215477, "grad_norm": 1.0109356966178489, "learning_rate": 2.0606587269522438e-06, "loss": 0.466, "step": 14397 }, { "epoch": 0.776968323350062, "grad_norm": 1.113021699656678, "learning_rate": 2.0601668149139963e-06, "loss": 0.5077, "step": 14398 }, { "epoch": 0.7770222869785764, "grad_norm": 0.9414631942392963, "learning_rate": 2.05967500174003e-06, "loss": 0.4067, "step": 14399 }, { "epoch": 0.7770762506070908, "grad_norm": 1.1966292087506343, "learning_rate": 2.0591832874444803e-06, "loss": 0.3936, "step": 14400 }, { "epoch": 0.7771302142356052, "grad_norm": 1.0229313821570796, "learning_rate": 2.05869167204148e-06, "loss": 0.4955, "step": 14401 }, { "epoch": 0.7771841778641195, "grad_norm": 1.0884067483616644, "learning_rate": 2.0582001555451596e-06, "loss": 0.4968, "step": 14402 }, { "epoch": 0.7772381414926339, "grad_norm": 1.1496113895405562, "learning_rate": 2.0577087379696427e-06, "loss": 0.4997, "step": 14403 }, { "epoch": 0.7772921051211483, "grad_norm": 1.0316091585506433, "learning_rate": 2.057217419329055e-06, "loss": 0.4616, "step": 14404 }, { "epoch": 0.7773460687496627, "grad_norm": 1.0188820428988987, "learning_rate": 2.0567261996375177e-06, "loss": 0.4651, "step": 14405 }, { "epoch": 0.7774000323781771, "grad_norm": 1.0805457812576844, "learning_rate": 2.0562350789091474e-06, "loss": 0.5871, "step": 14406 }, { "epoch": 0.7774539960066915, "grad_norm": 1.119655554710967, "learning_rate": 2.0557440571580615e-06, "loss": 0.4714, "step": 14407 }, { "epoch": 0.7775079596352059, "grad_norm": 1.1749782463613263, "learning_rate": 2.0552531343983715e-06, "loss": 0.5361, "step": 14408 }, { "epoch": 0.7775619232637202, "grad_norm": 0.9616241526148085, "learning_rate": 2.054762310644189e-06, "loss": 0.3761, "step": 14409 }, { "epoch": 0.7776158868922346, "grad_norm": 1.176193877243342, "learning_rate": 2.054271585909617e-06, "loss": 0.5797, "step": 14410 }, { "epoch": 0.777669850520749, "grad_norm": 1.119055597181627, "learning_rate": 2.0537809602087607e-06, "loss": 0.6086, "step": 14411 }, { "epoch": 0.7777238141492634, "grad_norm": 0.9609628723473139, "learning_rate": 2.0532904335557225e-06, "loss": 0.4962, "step": 14412 }, { "epoch": 0.7777777777777778, "grad_norm": 0.965922113810176, "learning_rate": 2.0528000059646e-06, "loss": 0.4794, "step": 14413 }, { "epoch": 0.7778317414062922, "grad_norm": 1.033505263402845, "learning_rate": 2.052309677449488e-06, "loss": 0.5029, "step": 14414 }, { "epoch": 0.7778857050348066, "grad_norm": 0.927512485467369, "learning_rate": 2.0518194480244795e-06, "loss": 0.4097, "step": 14415 }, { "epoch": 0.7779396686633209, "grad_norm": 0.8795300774054674, "learning_rate": 2.051329317703665e-06, "loss": 0.3341, "step": 14416 }, { "epoch": 0.7779936322918353, "grad_norm": 0.9623697370881142, "learning_rate": 2.0508392865011304e-06, "loss": 0.3764, "step": 14417 }, { "epoch": 0.7780475959203497, "grad_norm": 0.9902206413756315, "learning_rate": 2.050349354430959e-06, "loss": 0.4537, "step": 14418 }, { "epoch": 0.7781015595488641, "grad_norm": 0.7042370685625253, "learning_rate": 2.049859521507233e-06, "loss": 0.3146, "step": 14419 }, { "epoch": 0.7781555231773785, "grad_norm": 1.1992730750468843, "learning_rate": 2.049369787744031e-06, "loss": 0.5277, "step": 14420 }, { "epoch": 0.7782094868058929, "grad_norm": 1.2285729890173887, "learning_rate": 2.0488801531554275e-06, "loss": 0.6661, "step": 14421 }, { "epoch": 0.7782634504344073, "grad_norm": 0.8964973857904519, "learning_rate": 2.0483906177554953e-06, "loss": 0.3293, "step": 14422 }, { "epoch": 0.7783174140629215, "grad_norm": 0.8173687043236592, "learning_rate": 2.0479011815583043e-06, "loss": 0.3657, "step": 14423 }, { "epoch": 0.7783713776914359, "grad_norm": 1.1450500831117785, "learning_rate": 2.047411844577922e-06, "loss": 0.5061, "step": 14424 }, { "epoch": 0.7784253413199503, "grad_norm": 1.1443693699499344, "learning_rate": 2.0469226068284117e-06, "loss": 0.4878, "step": 14425 }, { "epoch": 0.7784793049484647, "grad_norm": 1.228838390917046, "learning_rate": 2.046433468323834e-06, "loss": 0.5364, "step": 14426 }, { "epoch": 0.7785332685769791, "grad_norm": 1.1320499738938357, "learning_rate": 2.0459444290782488e-06, "loss": 0.615, "step": 14427 }, { "epoch": 0.7785872322054935, "grad_norm": 0.7859517898059896, "learning_rate": 2.04545548910571e-06, "loss": 0.335, "step": 14428 }, { "epoch": 0.7786411958340079, "grad_norm": 0.9314475316010032, "learning_rate": 2.044966648420272e-06, "loss": 0.5007, "step": 14429 }, { "epoch": 0.7786951594625222, "grad_norm": 1.022743549693376, "learning_rate": 2.044477907035984e-06, "loss": 0.3695, "step": 14430 }, { "epoch": 0.7787491230910366, "grad_norm": 0.9557945164575815, "learning_rate": 2.0439892649668918e-06, "loss": 0.4189, "step": 14431 }, { "epoch": 0.778803086719551, "grad_norm": 0.9278745865160652, "learning_rate": 2.04350072222704e-06, "loss": 0.3587, "step": 14432 }, { "epoch": 0.7788570503480654, "grad_norm": 1.140461045450529, "learning_rate": 2.04301227883047e-06, "loss": 0.4414, "step": 14433 }, { "epoch": 0.7789110139765798, "grad_norm": 1.090949650275269, "learning_rate": 2.04252393479122e-06, "loss": 0.6113, "step": 14434 }, { "epoch": 0.7789649776050942, "grad_norm": 0.934314375940731, "learning_rate": 2.0420356901233256e-06, "loss": 0.4277, "step": 14435 }, { "epoch": 0.7790189412336086, "grad_norm": 0.9272732000360229, "learning_rate": 2.0415475448408195e-06, "loss": 0.3367, "step": 14436 }, { "epoch": 0.7790729048621229, "grad_norm": 0.9427236644770652, "learning_rate": 2.041059498957733e-06, "loss": 0.4637, "step": 14437 }, { "epoch": 0.7791268684906373, "grad_norm": 1.0290089913095755, "learning_rate": 2.04057155248809e-06, "loss": 0.433, "step": 14438 }, { "epoch": 0.7791808321191517, "grad_norm": 1.0104285536623612, "learning_rate": 2.0400837054459165e-06, "loss": 0.5414, "step": 14439 }, { "epoch": 0.7792347957476661, "grad_norm": 1.2058858741274943, "learning_rate": 2.039595957845233e-06, "loss": 0.5443, "step": 14440 }, { "epoch": 0.7792887593761805, "grad_norm": 0.8988980148026456, "learning_rate": 2.039108309700058e-06, "loss": 0.2988, "step": 14441 }, { "epoch": 0.7793427230046949, "grad_norm": 1.2661650798940882, "learning_rate": 2.0386207610244073e-06, "loss": 0.6166, "step": 14442 }, { "epoch": 0.7793966866332093, "grad_norm": 1.0909141755802485, "learning_rate": 2.038133311832294e-06, "loss": 0.4535, "step": 14443 }, { "epoch": 0.7794506502617236, "grad_norm": 1.0209353091442916, "learning_rate": 2.0376459621377274e-06, "loss": 0.5598, "step": 14444 }, { "epoch": 0.779504613890238, "grad_norm": 0.8932556600163734, "learning_rate": 2.0371587119547133e-06, "loss": 0.3668, "step": 14445 }, { "epoch": 0.7795585775187523, "grad_norm": 1.2737370946830107, "learning_rate": 2.036671561297257e-06, "loss": 0.414, "step": 14446 }, { "epoch": 0.7796125411472667, "grad_norm": 1.037955347250833, "learning_rate": 2.0361845101793597e-06, "loss": 0.4399, "step": 14447 }, { "epoch": 0.7796665047757811, "grad_norm": 1.2044028187848255, "learning_rate": 2.035697558615019e-06, "loss": 0.5112, "step": 14448 }, { "epoch": 0.7797204684042955, "grad_norm": 1.2188368998085295, "learning_rate": 2.0352107066182312e-06, "loss": 0.4468, "step": 14449 }, { "epoch": 0.7797744320328099, "grad_norm": 1.0109406777428043, "learning_rate": 2.034723954202989e-06, "loss": 0.4905, "step": 14450 }, { "epoch": 0.7798283956613242, "grad_norm": 0.9923325755110624, "learning_rate": 2.034237301383283e-06, "loss": 0.3956, "step": 14451 }, { "epoch": 0.7798823592898386, "grad_norm": 1.0201644367358738, "learning_rate": 2.0337507481730962e-06, "loss": 0.4434, "step": 14452 }, { "epoch": 0.779936322918353, "grad_norm": 1.2982836971463227, "learning_rate": 2.0332642945864165e-06, "loss": 0.6354, "step": 14453 }, { "epoch": 0.7799902865468674, "grad_norm": 1.0749164710417765, "learning_rate": 2.032777940637224e-06, "loss": 0.4394, "step": 14454 }, { "epoch": 0.7800442501753818, "grad_norm": 0.9809456651532976, "learning_rate": 2.0322916863394964e-06, "loss": 0.4937, "step": 14455 }, { "epoch": 0.7800982138038962, "grad_norm": 1.0395330957157807, "learning_rate": 2.0318055317072094e-06, "loss": 0.4315, "step": 14456 }, { "epoch": 0.7801521774324106, "grad_norm": 0.9248108032665019, "learning_rate": 2.031319476754336e-06, "loss": 0.3659, "step": 14457 }, { "epoch": 0.7802061410609249, "grad_norm": 1.0012895896883862, "learning_rate": 2.0308335214948477e-06, "loss": 0.4294, "step": 14458 }, { "epoch": 0.7802601046894393, "grad_norm": 1.098053149539852, "learning_rate": 2.0303476659427073e-06, "loss": 0.5854, "step": 14459 }, { "epoch": 0.7803140683179537, "grad_norm": 0.9780917670674242, "learning_rate": 2.0298619101118807e-06, "loss": 0.4355, "step": 14460 }, { "epoch": 0.7803680319464681, "grad_norm": 0.8427677745087346, "learning_rate": 2.029376254016329e-06, "loss": 0.384, "step": 14461 }, { "epoch": 0.7804219955749825, "grad_norm": 1.1406418680394432, "learning_rate": 2.0288906976700113e-06, "loss": 0.5202, "step": 14462 }, { "epoch": 0.7804759592034969, "grad_norm": 1.254188639345077, "learning_rate": 2.028405241086882e-06, "loss": 0.4134, "step": 14463 }, { "epoch": 0.7805299228320112, "grad_norm": 1.0492251735850429, "learning_rate": 2.0279198842808946e-06, "loss": 0.4175, "step": 14464 }, { "epoch": 0.7805838864605256, "grad_norm": 1.0579110904317792, "learning_rate": 2.0274346272659973e-06, "loss": 0.5302, "step": 14465 }, { "epoch": 0.78063785008904, "grad_norm": 1.1737391589162267, "learning_rate": 2.0269494700561372e-06, "loss": 0.5993, "step": 14466 }, { "epoch": 0.7806918137175544, "grad_norm": 1.2485248934360795, "learning_rate": 2.026464412665259e-06, "loss": 0.6083, "step": 14467 }, { "epoch": 0.7807457773460688, "grad_norm": 1.1304342612532223, "learning_rate": 2.0259794551073033e-06, "loss": 0.544, "step": 14468 }, { "epoch": 0.7807997409745832, "grad_norm": 0.8648598273865524, "learning_rate": 2.0254945973962074e-06, "loss": 0.4178, "step": 14469 }, { "epoch": 0.7808537046030976, "grad_norm": 1.2575831402439106, "learning_rate": 2.025009839545909e-06, "loss": 0.5421, "step": 14470 }, { "epoch": 0.7809076682316118, "grad_norm": 0.8857441769585577, "learning_rate": 2.024525181570338e-06, "loss": 0.3173, "step": 14471 }, { "epoch": 0.7809616318601262, "grad_norm": 1.0066849779756508, "learning_rate": 2.0240406234834255e-06, "loss": 0.4779, "step": 14472 }, { "epoch": 0.7810155954886406, "grad_norm": 1.2275309832958314, "learning_rate": 2.023556165299097e-06, "loss": 0.5706, "step": 14473 }, { "epoch": 0.781069559117155, "grad_norm": 0.9900239791453298, "learning_rate": 2.0230718070312776e-06, "loss": 0.4283, "step": 14474 }, { "epoch": 0.7811235227456694, "grad_norm": 0.9687414298778189, "learning_rate": 2.0225875486938873e-06, "loss": 0.4308, "step": 14475 }, { "epoch": 0.7811774863741838, "grad_norm": 1.0549694628033899, "learning_rate": 2.022103390300845e-06, "loss": 0.5082, "step": 14476 }, { "epoch": 0.7812314500026982, "grad_norm": 1.1071721553353007, "learning_rate": 2.0216193318660647e-06, "loss": 0.5297, "step": 14477 }, { "epoch": 0.7812854136312125, "grad_norm": 1.1786026297534808, "learning_rate": 2.0211353734034616e-06, "loss": 0.5893, "step": 14478 }, { "epoch": 0.7813393772597269, "grad_norm": 0.9907743011426239, "learning_rate": 2.020651514926941e-06, "loss": 0.4203, "step": 14479 }, { "epoch": 0.7813933408882413, "grad_norm": 0.9411050437879911, "learning_rate": 2.020167756450412e-06, "loss": 0.4515, "step": 14480 }, { "epoch": 0.7814473045167557, "grad_norm": 0.8430810236880563, "learning_rate": 2.0196840979877773e-06, "loss": 0.3761, "step": 14481 }, { "epoch": 0.7815012681452701, "grad_norm": 1.1588162596435063, "learning_rate": 2.0192005395529385e-06, "loss": 0.6014, "step": 14482 }, { "epoch": 0.7815552317737845, "grad_norm": 0.9977850186804282, "learning_rate": 2.0187170811597926e-06, "loss": 0.4551, "step": 14483 }, { "epoch": 0.7816091954022989, "grad_norm": 1.008223282880729, "learning_rate": 2.0182337228222366e-06, "loss": 0.4626, "step": 14484 }, { "epoch": 0.7816631590308132, "grad_norm": 1.1051713262692409, "learning_rate": 2.0177504645541623e-06, "loss": 0.4952, "step": 14485 }, { "epoch": 0.7817171226593276, "grad_norm": 1.1868618035459073, "learning_rate": 2.0172673063694566e-06, "loss": 0.5616, "step": 14486 }, { "epoch": 0.781771086287842, "grad_norm": 1.1805049111733419, "learning_rate": 2.0167842482820077e-06, "loss": 0.4974, "step": 14487 }, { "epoch": 0.7818250499163564, "grad_norm": 1.1702871891226332, "learning_rate": 2.0163012903056987e-06, "loss": 0.5857, "step": 14488 }, { "epoch": 0.7818790135448708, "grad_norm": 1.2479803201837485, "learning_rate": 2.0158184324544118e-06, "loss": 0.6273, "step": 14489 }, { "epoch": 0.7819329771733852, "grad_norm": 1.02857235519036, "learning_rate": 2.0153356747420225e-06, "loss": 0.498, "step": 14490 }, { "epoch": 0.7819869408018996, "grad_norm": 1.1579185071793177, "learning_rate": 2.014853017182408e-06, "loss": 0.4698, "step": 14491 }, { "epoch": 0.7820409044304139, "grad_norm": 0.871069709967407, "learning_rate": 2.0143704597894388e-06, "loss": 0.4174, "step": 14492 }, { "epoch": 0.7820948680589282, "grad_norm": 1.3203712565476016, "learning_rate": 2.0138880025769847e-06, "loss": 0.5711, "step": 14493 }, { "epoch": 0.7821488316874426, "grad_norm": 0.86232776291382, "learning_rate": 2.0134056455589114e-06, "loss": 0.3138, "step": 14494 }, { "epoch": 0.782202795315957, "grad_norm": 0.9088384766482491, "learning_rate": 2.012923388749083e-06, "loss": 0.3711, "step": 14495 }, { "epoch": 0.7822567589444714, "grad_norm": 0.9011836768532737, "learning_rate": 2.0124412321613594e-06, "loss": 0.3593, "step": 14496 }, { "epoch": 0.7823107225729858, "grad_norm": 0.9488262373048938, "learning_rate": 2.011959175809599e-06, "loss": 0.4802, "step": 14497 }, { "epoch": 0.7823646862015002, "grad_norm": 1.0321900076323258, "learning_rate": 2.0114772197076564e-06, "loss": 0.4377, "step": 14498 }, { "epoch": 0.7824186498300145, "grad_norm": 1.1728024797736873, "learning_rate": 2.010995363869385e-06, "loss": 0.4675, "step": 14499 }, { "epoch": 0.7824726134585289, "grad_norm": 0.945161989143589, "learning_rate": 2.0105136083086303e-06, "loss": 0.4364, "step": 14500 }, { "epoch": 0.7824726134585289, "eval_loss": 0.5324375629425049, "eval_runtime": 164.3441, "eval_samples_per_second": 20.926, "eval_steps_per_second": 0.876, "step": 14500 }, { "epoch": 0.7825265770870433, "grad_norm": 1.0353846129642752, "learning_rate": 2.010031953039241e-06, "loss": 0.4253, "step": 14501 }, { "epoch": 0.7825805407155577, "grad_norm": 0.9617501953433107, "learning_rate": 2.0095503980750586e-06, "loss": 0.4071, "step": 14502 }, { "epoch": 0.7826345043440721, "grad_norm": 1.1316488692146285, "learning_rate": 2.0090689434299256e-06, "loss": 0.4627, "step": 14503 }, { "epoch": 0.7826884679725865, "grad_norm": 1.1922138238286948, "learning_rate": 2.008587589117678e-06, "loss": 0.5853, "step": 14504 }, { "epoch": 0.7827424316011009, "grad_norm": 1.1350677370168967, "learning_rate": 2.0081063351521518e-06, "loss": 0.5289, "step": 14505 }, { "epoch": 0.7827963952296152, "grad_norm": 0.9102313093006867, "learning_rate": 2.007625181547178e-06, "loss": 0.3898, "step": 14506 }, { "epoch": 0.7828503588581296, "grad_norm": 0.8304204735228492, "learning_rate": 2.0071441283165847e-06, "loss": 0.3726, "step": 14507 }, { "epoch": 0.782904322486644, "grad_norm": 0.9821913928769301, "learning_rate": 2.0066631754741976e-06, "loss": 0.45, "step": 14508 }, { "epoch": 0.7829582861151584, "grad_norm": 0.9426820772123301, "learning_rate": 2.0061823230338414e-06, "loss": 0.4777, "step": 14509 }, { "epoch": 0.7830122497436728, "grad_norm": 1.0131095534742025, "learning_rate": 2.005701571009335e-06, "loss": 0.4378, "step": 14510 }, { "epoch": 0.7830662133721872, "grad_norm": 1.1223231170420371, "learning_rate": 2.005220919414497e-06, "loss": 0.4285, "step": 14511 }, { "epoch": 0.7831201770007016, "grad_norm": 0.9923094342799185, "learning_rate": 2.0047403682631405e-06, "loss": 0.3785, "step": 14512 }, { "epoch": 0.7831741406292159, "grad_norm": 1.1368355777241224, "learning_rate": 2.0042599175690784e-06, "loss": 0.3905, "step": 14513 }, { "epoch": 0.7832281042577303, "grad_norm": 0.876358998378452, "learning_rate": 2.003779567346118e-06, "loss": 0.3619, "step": 14514 }, { "epoch": 0.7832820678862447, "grad_norm": 0.8944103544261888, "learning_rate": 2.0032993176080645e-06, "loss": 0.3212, "step": 14515 }, { "epoch": 0.7833360315147591, "grad_norm": 0.9263869719107248, "learning_rate": 2.002819168368722e-06, "loss": 0.4233, "step": 14516 }, { "epoch": 0.7833899951432735, "grad_norm": 1.0650469051883267, "learning_rate": 2.0023391196418905e-06, "loss": 0.4235, "step": 14517 }, { "epoch": 0.7834439587717879, "grad_norm": 1.0336447862200027, "learning_rate": 2.001859171441367e-06, "loss": 0.5651, "step": 14518 }, { "epoch": 0.7834979224003022, "grad_norm": 1.2057059050511176, "learning_rate": 2.0013793237809454e-06, "loss": 0.4926, "step": 14519 }, { "epoch": 0.7835518860288165, "grad_norm": 0.9365287480066077, "learning_rate": 2.0008995766744173e-06, "loss": 0.3103, "step": 14520 }, { "epoch": 0.7836058496573309, "grad_norm": 1.0475193906281244, "learning_rate": 2.000419930135572e-06, "loss": 0.4171, "step": 14521 }, { "epoch": 0.7836598132858453, "grad_norm": 1.013503977196661, "learning_rate": 1.9999403841781925e-06, "loss": 0.494, "step": 14522 }, { "epoch": 0.7837137769143597, "grad_norm": 0.8250730115690996, "learning_rate": 1.999460938816063e-06, "loss": 0.3494, "step": 14523 }, { "epoch": 0.7837677405428741, "grad_norm": 1.0981165292556543, "learning_rate": 1.9989815940629635e-06, "loss": 0.4657, "step": 14524 }, { "epoch": 0.7838217041713885, "grad_norm": 0.9767573672555151, "learning_rate": 1.9985023499326705e-06, "loss": 0.434, "step": 14525 }, { "epoch": 0.7838756677999029, "grad_norm": 1.0417134922571631, "learning_rate": 1.9980232064389578e-06, "loss": 0.4772, "step": 14526 }, { "epoch": 0.7839296314284172, "grad_norm": 0.9349967649175002, "learning_rate": 1.9975441635955965e-06, "loss": 0.3744, "step": 14527 }, { "epoch": 0.7839835950569316, "grad_norm": 0.9041636491919524, "learning_rate": 1.9970652214163547e-06, "loss": 0.392, "step": 14528 }, { "epoch": 0.784037558685446, "grad_norm": 0.8995478858751499, "learning_rate": 1.9965863799149988e-06, "loss": 0.3891, "step": 14529 }, { "epoch": 0.7840915223139604, "grad_norm": 1.0090148456458585, "learning_rate": 1.9961076391052893e-06, "loss": 0.4327, "step": 14530 }, { "epoch": 0.7841454859424748, "grad_norm": 1.0011835593994496, "learning_rate": 1.995628999000987e-06, "loss": 0.4694, "step": 14531 }, { "epoch": 0.7841994495709892, "grad_norm": 1.0241981781107876, "learning_rate": 1.9951504596158484e-06, "loss": 0.4115, "step": 14532 }, { "epoch": 0.7842534131995035, "grad_norm": 0.8217138392426513, "learning_rate": 1.994672020963628e-06, "loss": 0.3208, "step": 14533 }, { "epoch": 0.7843073768280179, "grad_norm": 1.1553323282329495, "learning_rate": 1.9941936830580747e-06, "loss": 0.5156, "step": 14534 }, { "epoch": 0.7843613404565323, "grad_norm": 0.8293158794750655, "learning_rate": 1.9937154459129364e-06, "loss": 0.3394, "step": 14535 }, { "epoch": 0.7844153040850467, "grad_norm": 0.8342557247820733, "learning_rate": 1.99323730954196e-06, "loss": 0.292, "step": 14536 }, { "epoch": 0.7844692677135611, "grad_norm": 0.9156244096674335, "learning_rate": 1.9927592739588864e-06, "loss": 0.3896, "step": 14537 }, { "epoch": 0.7845232313420755, "grad_norm": 0.9287009469797889, "learning_rate": 1.992281339177455e-06, "loss": 0.436, "step": 14538 }, { "epoch": 0.7845771949705899, "grad_norm": 0.9713892905296607, "learning_rate": 1.991803505211402e-06, "loss": 0.511, "step": 14539 }, { "epoch": 0.7846311585991042, "grad_norm": 0.8738529260398595, "learning_rate": 1.991325772074463e-06, "loss": 0.4812, "step": 14540 }, { "epoch": 0.7846851222276185, "grad_norm": 1.069978642742901, "learning_rate": 1.990848139780364e-06, "loss": 0.7341, "step": 14541 }, { "epoch": 0.784739085856133, "grad_norm": 0.9230346084709934, "learning_rate": 1.9903706083428363e-06, "loss": 0.409, "step": 14542 }, { "epoch": 0.7847930494846473, "grad_norm": 0.8421839116379571, "learning_rate": 1.989893177775603e-06, "loss": 0.3569, "step": 14543 }, { "epoch": 0.7848470131131617, "grad_norm": 1.0410140526870304, "learning_rate": 1.989415848092387e-06, "loss": 0.4455, "step": 14544 }, { "epoch": 0.7849009767416761, "grad_norm": 0.8727437842261191, "learning_rate": 1.9889386193069064e-06, "loss": 0.4315, "step": 14545 }, { "epoch": 0.7849549403701905, "grad_norm": 0.9326318032478996, "learning_rate": 1.988461491432877e-06, "loss": 0.3895, "step": 14546 }, { "epoch": 0.7850089039987048, "grad_norm": 1.0320452812984966, "learning_rate": 1.9879844644840148e-06, "loss": 0.4316, "step": 14547 }, { "epoch": 0.7850628676272192, "grad_norm": 1.1123391946361239, "learning_rate": 1.9875075384740255e-06, "loss": 0.5044, "step": 14548 }, { "epoch": 0.7851168312557336, "grad_norm": 1.0418362989425352, "learning_rate": 1.9870307134166196e-06, "loss": 0.4159, "step": 14549 }, { "epoch": 0.785170794884248, "grad_norm": 0.9244888386789647, "learning_rate": 1.9865539893255e-06, "loss": 0.4579, "step": 14550 }, { "epoch": 0.7852247585127624, "grad_norm": 1.1458244696798083, "learning_rate": 1.986077366214369e-06, "loss": 0.4527, "step": 14551 }, { "epoch": 0.7852787221412768, "grad_norm": 0.9912633749272167, "learning_rate": 1.9856008440969253e-06, "loss": 0.3148, "step": 14552 }, { "epoch": 0.7853326857697912, "grad_norm": 0.9247364275283552, "learning_rate": 1.985124422986864e-06, "loss": 0.3358, "step": 14553 }, { "epoch": 0.7853866493983055, "grad_norm": 0.9927487479966528, "learning_rate": 1.9846481028978793e-06, "loss": 0.4986, "step": 14554 }, { "epoch": 0.7854406130268199, "grad_norm": 0.9813022766760525, "learning_rate": 1.98417188384366e-06, "loss": 0.4218, "step": 14555 }, { "epoch": 0.7854945766553343, "grad_norm": 0.9118419843640746, "learning_rate": 1.9836957658378917e-06, "loss": 0.3339, "step": 14556 }, { "epoch": 0.7855485402838487, "grad_norm": 0.9595747141682858, "learning_rate": 1.9832197488942614e-06, "loss": 0.4361, "step": 14557 }, { "epoch": 0.7856025039123631, "grad_norm": 1.0400634196970147, "learning_rate": 1.9827438330264475e-06, "loss": 0.4812, "step": 14558 }, { "epoch": 0.7856564675408775, "grad_norm": 0.9053376960437989, "learning_rate": 1.9822680182481306e-06, "loss": 0.3813, "step": 14559 }, { "epoch": 0.7857104311693919, "grad_norm": 0.9242120045080967, "learning_rate": 1.981792304572986e-06, "loss": 0.4672, "step": 14560 }, { "epoch": 0.7857643947979062, "grad_norm": 1.0026425104287817, "learning_rate": 1.9813166920146854e-06, "loss": 0.3997, "step": 14561 }, { "epoch": 0.7858183584264206, "grad_norm": 1.0164268815817612, "learning_rate": 1.980841180586898e-06, "loss": 0.5048, "step": 14562 }, { "epoch": 0.785872322054935, "grad_norm": 0.8824254190975734, "learning_rate": 1.980365770303291e-06, "loss": 0.3829, "step": 14563 }, { "epoch": 0.7859262856834494, "grad_norm": 1.1090674621178995, "learning_rate": 1.979890461177528e-06, "loss": 0.5719, "step": 14564 }, { "epoch": 0.7859802493119638, "grad_norm": 1.0277712093655211, "learning_rate": 1.9794152532232696e-06, "loss": 0.4221, "step": 14565 }, { "epoch": 0.7860342129404781, "grad_norm": 1.109636482810379, "learning_rate": 1.978940146454174e-06, "loss": 0.4797, "step": 14566 }, { "epoch": 0.7860881765689925, "grad_norm": 0.8719448094763242, "learning_rate": 1.9784651408838966e-06, "loss": 0.3701, "step": 14567 }, { "epoch": 0.7861421401975068, "grad_norm": 0.6917187417779371, "learning_rate": 1.9779902365260906e-06, "loss": 0.29, "step": 14568 }, { "epoch": 0.7861961038260212, "grad_norm": 1.1010768429165132, "learning_rate": 1.9775154333944024e-06, "loss": 0.4108, "step": 14569 }, { "epoch": 0.7862500674545356, "grad_norm": 1.177161036320019, "learning_rate": 1.977040731502481e-06, "loss": 0.5081, "step": 14570 }, { "epoch": 0.78630403108305, "grad_norm": 1.2740437649420884, "learning_rate": 1.9765661308639677e-06, "loss": 0.5229, "step": 14571 }, { "epoch": 0.7863579947115644, "grad_norm": 1.0715639090120188, "learning_rate": 1.9760916314925038e-06, "loss": 0.4769, "step": 14572 }, { "epoch": 0.7864119583400788, "grad_norm": 1.105726045225631, "learning_rate": 1.9756172334017273e-06, "loss": 0.5088, "step": 14573 }, { "epoch": 0.7864659219685932, "grad_norm": 1.0647358363333272, "learning_rate": 1.9751429366052726e-06, "loss": 0.389, "step": 14574 }, { "epoch": 0.7865198855971075, "grad_norm": 1.1353298651037742, "learning_rate": 1.9746687411167715e-06, "loss": 0.5701, "step": 14575 }, { "epoch": 0.7865738492256219, "grad_norm": 1.0274057492681332, "learning_rate": 1.974194646949853e-06, "loss": 0.4412, "step": 14576 }, { "epoch": 0.7866278128541363, "grad_norm": 1.0453580205384694, "learning_rate": 1.973720654118143e-06, "loss": 0.5148, "step": 14577 }, { "epoch": 0.7866817764826507, "grad_norm": 0.8991452673025677, "learning_rate": 1.9732467626352646e-06, "loss": 0.3408, "step": 14578 }, { "epoch": 0.7867357401111651, "grad_norm": 1.1501329993109375, "learning_rate": 1.972772972514837e-06, "loss": 0.427, "step": 14579 }, { "epoch": 0.7867897037396795, "grad_norm": 0.9872379786277704, "learning_rate": 1.972299283770479e-06, "loss": 0.4763, "step": 14580 }, { "epoch": 0.7868436673681939, "grad_norm": 1.1753246199241705, "learning_rate": 1.9718256964158033e-06, "loss": 0.5492, "step": 14581 }, { "epoch": 0.7868976309967082, "grad_norm": 0.9336711373710453, "learning_rate": 1.971352210464424e-06, "loss": 0.3532, "step": 14582 }, { "epoch": 0.7869515946252226, "grad_norm": 1.1399169765568866, "learning_rate": 1.9708788259299463e-06, "loss": 0.4481, "step": 14583 }, { "epoch": 0.787005558253737, "grad_norm": 1.0486413266759491, "learning_rate": 1.9704055428259763e-06, "loss": 0.437, "step": 14584 }, { "epoch": 0.7870595218822514, "grad_norm": 0.9555552968870438, "learning_rate": 1.969932361166118e-06, "loss": 0.4435, "step": 14585 }, { "epoch": 0.7871134855107658, "grad_norm": 1.001808400411889, "learning_rate": 1.9694592809639705e-06, "loss": 0.3734, "step": 14586 }, { "epoch": 0.7871674491392802, "grad_norm": 0.7375081586672189, "learning_rate": 1.9689863022331308e-06, "loss": 0.4185, "step": 14587 }, { "epoch": 0.7872214127677946, "grad_norm": 0.9213188442921666, "learning_rate": 1.968513424987192e-06, "loss": 0.4506, "step": 14588 }, { "epoch": 0.7872753763963088, "grad_norm": 1.0559839526868657, "learning_rate": 1.9680406492397476e-06, "loss": 0.505, "step": 14589 }, { "epoch": 0.7873293400248232, "grad_norm": 1.0419308833432812, "learning_rate": 1.9675679750043817e-06, "loss": 0.4701, "step": 14590 }, { "epoch": 0.7873833036533376, "grad_norm": 0.9616046469989242, "learning_rate": 1.9670954022946826e-06, "loss": 0.3328, "step": 14591 }, { "epoch": 0.787437267281852, "grad_norm": 0.9040819571329246, "learning_rate": 1.9666229311242303e-06, "loss": 0.3662, "step": 14592 }, { "epoch": 0.7874912309103664, "grad_norm": 1.1345561409696694, "learning_rate": 1.9661505615066058e-06, "loss": 0.5443, "step": 14593 }, { "epoch": 0.7875451945388808, "grad_norm": 1.1657201297365287, "learning_rate": 1.965678293455385e-06, "loss": 0.6008, "step": 14594 }, { "epoch": 0.7875991581673952, "grad_norm": 1.1236248936800637, "learning_rate": 1.96520612698414e-06, "loss": 0.4303, "step": 14595 }, { "epoch": 0.7876531217959095, "grad_norm": 1.1887382871394314, "learning_rate": 1.9647340621064455e-06, "loss": 0.5023, "step": 14596 }, { "epoch": 0.7877070854244239, "grad_norm": 1.1167323224158185, "learning_rate": 1.9642620988358635e-06, "loss": 0.51, "step": 14597 }, { "epoch": 0.7877610490529383, "grad_norm": 0.9725598658298157, "learning_rate": 1.963790237185962e-06, "loss": 0.4917, "step": 14598 }, { "epoch": 0.7878150126814527, "grad_norm": 1.1766063516941194, "learning_rate": 1.9633184771703022e-06, "loss": 0.4994, "step": 14599 }, { "epoch": 0.7878689763099671, "grad_norm": 0.9588054871739451, "learning_rate": 1.962846818802443e-06, "loss": 0.463, "step": 14600 }, { "epoch": 0.7879229399384815, "grad_norm": 1.053901011648955, "learning_rate": 1.96237526209594e-06, "loss": 0.5106, "step": 14601 }, { "epoch": 0.7879769035669958, "grad_norm": 1.1504303191946863, "learning_rate": 1.9619038070643476e-06, "loss": 0.4879, "step": 14602 }, { "epoch": 0.7880308671955102, "grad_norm": 1.0669827823038642, "learning_rate": 1.961432453721214e-06, "loss": 0.5072, "step": 14603 }, { "epoch": 0.7880848308240246, "grad_norm": 0.9782645001759421, "learning_rate": 1.9609612020800866e-06, "loss": 0.4439, "step": 14604 }, { "epoch": 0.788138794452539, "grad_norm": 1.359828050116991, "learning_rate": 1.9604900521545107e-06, "loss": 0.6595, "step": 14605 }, { "epoch": 0.7881927580810534, "grad_norm": 1.22670663652383, "learning_rate": 1.9600190039580267e-06, "loss": 0.5579, "step": 14606 }, { "epoch": 0.7882467217095678, "grad_norm": 0.9832801412819628, "learning_rate": 1.9595480575041737e-06, "loss": 0.4679, "step": 14607 }, { "epoch": 0.7883006853380822, "grad_norm": 1.063455423744853, "learning_rate": 1.959077212806487e-06, "loss": 0.521, "step": 14608 }, { "epoch": 0.7883546489665965, "grad_norm": 1.0184793325192882, "learning_rate": 1.9586064698785e-06, "loss": 0.3785, "step": 14609 }, { "epoch": 0.7884086125951109, "grad_norm": 0.9612397042067697, "learning_rate": 1.9581358287337405e-06, "loss": 0.5349, "step": 14610 }, { "epoch": 0.7884625762236253, "grad_norm": 1.0876314603954356, "learning_rate": 1.9576652893857355e-06, "loss": 0.6497, "step": 14611 }, { "epoch": 0.7885165398521397, "grad_norm": 0.8442428382866128, "learning_rate": 1.95719485184801e-06, "loss": 0.4138, "step": 14612 }, { "epoch": 0.788570503480654, "grad_norm": 1.0370312246105564, "learning_rate": 1.9567245161340834e-06, "loss": 0.4297, "step": 14613 }, { "epoch": 0.7886244671091684, "grad_norm": 0.9831280330808988, "learning_rate": 1.956254282257475e-06, "loss": 0.4283, "step": 14614 }, { "epoch": 0.7886784307376828, "grad_norm": 1.0810431846276434, "learning_rate": 1.955784150231699e-06, "loss": 0.5893, "step": 14615 }, { "epoch": 0.7887323943661971, "grad_norm": 1.1802345429070025, "learning_rate": 1.955314120070269e-06, "loss": 0.547, "step": 14616 }, { "epoch": 0.7887863579947115, "grad_norm": 1.069473762120721, "learning_rate": 1.954844191786692e-06, "loss": 0.3753, "step": 14617 }, { "epoch": 0.7888403216232259, "grad_norm": 1.0239668779978723, "learning_rate": 1.954374365394474e-06, "loss": 0.3447, "step": 14618 }, { "epoch": 0.7888942852517403, "grad_norm": 1.0831535143537088, "learning_rate": 1.9539046409071206e-06, "loss": 0.4251, "step": 14619 }, { "epoch": 0.7889482488802547, "grad_norm": 1.1492900822974768, "learning_rate": 1.95343501833813e-06, "loss": 0.5577, "step": 14620 }, { "epoch": 0.7890022125087691, "grad_norm": 0.7583297713124629, "learning_rate": 1.9529654977010005e-06, "loss": 0.323, "step": 14621 }, { "epoch": 0.7890561761372835, "grad_norm": 0.9096020058222228, "learning_rate": 1.9524960790092274e-06, "loss": 0.3826, "step": 14622 }, { "epoch": 0.7891101397657978, "grad_norm": 0.9778620177521072, "learning_rate": 1.9520267622763013e-06, "loss": 0.4296, "step": 14623 }, { "epoch": 0.7891641033943122, "grad_norm": 1.0741544767826348, "learning_rate": 1.9515575475157104e-06, "loss": 0.4401, "step": 14624 }, { "epoch": 0.7892180670228266, "grad_norm": 0.9765940157495852, "learning_rate": 1.9510884347409412e-06, "loss": 0.5735, "step": 14625 }, { "epoch": 0.789272030651341, "grad_norm": 1.2053500147056275, "learning_rate": 1.9506194239654764e-06, "loss": 0.4886, "step": 14626 }, { "epoch": 0.7893259942798554, "grad_norm": 1.057664835411804, "learning_rate": 1.950150515202797e-06, "loss": 0.5093, "step": 14627 }, { "epoch": 0.7893799579083698, "grad_norm": 0.9053075275254887, "learning_rate": 1.949681708466378e-06, "loss": 0.4229, "step": 14628 }, { "epoch": 0.7894339215368842, "grad_norm": 1.4660527909542647, "learning_rate": 1.949213003769693e-06, "loss": 0.5433, "step": 14629 }, { "epoch": 0.7894878851653985, "grad_norm": 0.8426924354439126, "learning_rate": 1.9487444011262148e-06, "loss": 0.3952, "step": 14630 }, { "epoch": 0.7895418487939129, "grad_norm": 0.8901930112317316, "learning_rate": 1.9482759005494104e-06, "loss": 0.3724, "step": 14631 }, { "epoch": 0.7895958124224273, "grad_norm": 1.015796746177723, "learning_rate": 1.9478075020527453e-06, "loss": 0.3919, "step": 14632 }, { "epoch": 0.7896497760509417, "grad_norm": 1.0053810531367111, "learning_rate": 1.9473392056496825e-06, "loss": 0.5391, "step": 14633 }, { "epoch": 0.7897037396794561, "grad_norm": 0.879850645077835, "learning_rate": 1.9468710113536802e-06, "loss": 0.3311, "step": 14634 }, { "epoch": 0.7897577033079705, "grad_norm": 1.1052677512858058, "learning_rate": 1.9464029191781948e-06, "loss": 0.446, "step": 14635 }, { "epoch": 0.7898116669364849, "grad_norm": 1.0155677497942495, "learning_rate": 1.9459349291366805e-06, "loss": 0.6197, "step": 14636 }, { "epoch": 0.7898656305649991, "grad_norm": 1.2018586687388273, "learning_rate": 1.945467041242589e-06, "loss": 0.4817, "step": 14637 }, { "epoch": 0.7899195941935135, "grad_norm": 1.0869570579438435, "learning_rate": 1.9449992555093645e-06, "loss": 0.5304, "step": 14638 }, { "epoch": 0.7899735578220279, "grad_norm": 0.9497907662467563, "learning_rate": 1.9445315719504536e-06, "loss": 0.4894, "step": 14639 }, { "epoch": 0.7900275214505423, "grad_norm": 1.0053172531095134, "learning_rate": 1.9440639905792983e-06, "loss": 0.4993, "step": 14640 }, { "epoch": 0.7900814850790567, "grad_norm": 1.0519826044040437, "learning_rate": 1.9435965114093364e-06, "loss": 0.4603, "step": 14641 }, { "epoch": 0.7901354487075711, "grad_norm": 1.121994285303685, "learning_rate": 1.9431291344540047e-06, "loss": 0.4758, "step": 14642 }, { "epoch": 0.7901894123360855, "grad_norm": 0.8946920116199533, "learning_rate": 1.9426618597267355e-06, "loss": 0.2618, "step": 14643 }, { "epoch": 0.7902433759645998, "grad_norm": 1.113220607357293, "learning_rate": 1.94219468724096e-06, "loss": 0.4654, "step": 14644 }, { "epoch": 0.7902973395931142, "grad_norm": 1.0580874674336034, "learning_rate": 1.941727617010103e-06, "loss": 0.4891, "step": 14645 }, { "epoch": 0.7903513032216286, "grad_norm": 1.0364647805909184, "learning_rate": 1.94126064904759e-06, "loss": 0.45, "step": 14646 }, { "epoch": 0.790405266850143, "grad_norm": 1.0490587399810778, "learning_rate": 1.9407937833668408e-06, "loss": 0.4759, "step": 14647 }, { "epoch": 0.7904592304786574, "grad_norm": 0.9432544641532278, "learning_rate": 1.940327019981275e-06, "loss": 0.3557, "step": 14648 }, { "epoch": 0.7905131941071718, "grad_norm": 1.1337849867019563, "learning_rate": 1.9398603589043074e-06, "loss": 0.4166, "step": 14649 }, { "epoch": 0.7905671577356862, "grad_norm": 0.9628411686784104, "learning_rate": 1.9393938001493514e-06, "loss": 0.4034, "step": 14650 }, { "epoch": 0.7906211213642005, "grad_norm": 1.111355875500459, "learning_rate": 1.938927343729815e-06, "loss": 0.4596, "step": 14651 }, { "epoch": 0.7906750849927149, "grad_norm": 0.9017676529933735, "learning_rate": 1.9384609896591043e-06, "loss": 0.4594, "step": 14652 }, { "epoch": 0.7907290486212293, "grad_norm": 0.8261720474104377, "learning_rate": 1.937994737950624e-06, "loss": 0.3802, "step": 14653 }, { "epoch": 0.7907830122497437, "grad_norm": 1.072229210797679, "learning_rate": 1.9375285886177738e-06, "loss": 0.52, "step": 14654 }, { "epoch": 0.7908369758782581, "grad_norm": 1.0910878308133074, "learning_rate": 1.9370625416739507e-06, "loss": 0.4756, "step": 14655 }, { "epoch": 0.7908909395067725, "grad_norm": 0.8586268695833523, "learning_rate": 1.9365965971325514e-06, "loss": 0.3749, "step": 14656 }, { "epoch": 0.7909449031352869, "grad_norm": 1.0057634185191764, "learning_rate": 1.936130755006966e-06, "loss": 0.4992, "step": 14657 }, { "epoch": 0.7909988667638012, "grad_norm": 1.6150398588663673, "learning_rate": 1.9356650153105847e-06, "loss": 0.4619, "step": 14658 }, { "epoch": 0.7910528303923156, "grad_norm": 1.0251777035348544, "learning_rate": 1.9351993780567912e-06, "loss": 0.4286, "step": 14659 }, { "epoch": 0.79110679402083, "grad_norm": 1.0231706273543215, "learning_rate": 1.9347338432589697e-06, "loss": 0.3845, "step": 14660 }, { "epoch": 0.7911607576493443, "grad_norm": 0.9315099261473602, "learning_rate": 1.9342684109305e-06, "loss": 0.4123, "step": 14661 }, { "epoch": 0.7912147212778587, "grad_norm": 1.0407359457055043, "learning_rate": 1.9338030810847593e-06, "loss": 0.6077, "step": 14662 }, { "epoch": 0.7912686849063731, "grad_norm": 0.9916866429737331, "learning_rate": 1.9333378537351206e-06, "loss": 0.3936, "step": 14663 }, { "epoch": 0.7913226485348875, "grad_norm": 0.888361047527817, "learning_rate": 1.9328727288949567e-06, "loss": 0.4563, "step": 14664 }, { "epoch": 0.7913766121634018, "grad_norm": 1.0274301388597396, "learning_rate": 1.9324077065776355e-06, "loss": 0.5094, "step": 14665 }, { "epoch": 0.7914305757919162, "grad_norm": 0.921856580276973, "learning_rate": 1.931942786796521e-06, "loss": 0.3809, "step": 14666 }, { "epoch": 0.7914845394204306, "grad_norm": 1.034561716415534, "learning_rate": 1.931477969564976e-06, "loss": 0.4305, "step": 14667 }, { "epoch": 0.791538503048945, "grad_norm": 0.9502745333496354, "learning_rate": 1.9310132548963593e-06, "loss": 0.3065, "step": 14668 }, { "epoch": 0.7915924666774594, "grad_norm": 1.1840242400956, "learning_rate": 1.9305486428040283e-06, "loss": 0.4666, "step": 14669 }, { "epoch": 0.7916464303059738, "grad_norm": 1.1169356083965438, "learning_rate": 1.930084133301336e-06, "loss": 0.427, "step": 14670 }, { "epoch": 0.7917003939344881, "grad_norm": 1.023123042642945, "learning_rate": 1.9296197264016337e-06, "loss": 0.3979, "step": 14671 }, { "epoch": 0.7917543575630025, "grad_norm": 1.0554826563828439, "learning_rate": 1.929155422118267e-06, "loss": 0.5007, "step": 14672 }, { "epoch": 0.7918083211915169, "grad_norm": 0.9737572176380674, "learning_rate": 1.9286912204645812e-06, "loss": 0.3973, "step": 14673 }, { "epoch": 0.7918622848200313, "grad_norm": 1.1623836894968198, "learning_rate": 1.928227121453919e-06, "loss": 0.4656, "step": 14674 }, { "epoch": 0.7919162484485457, "grad_norm": 0.9376393575868741, "learning_rate": 1.9277631250996182e-06, "loss": 0.3689, "step": 14675 }, { "epoch": 0.7919702120770601, "grad_norm": 1.04390223492826, "learning_rate": 1.9272992314150144e-06, "loss": 0.402, "step": 14676 }, { "epoch": 0.7920241757055745, "grad_norm": 1.0322256347024008, "learning_rate": 1.9268354404134407e-06, "loss": 0.4102, "step": 14677 }, { "epoch": 0.7920781393340888, "grad_norm": 1.0449109987877059, "learning_rate": 1.926371752108227e-06, "loss": 0.4433, "step": 14678 }, { "epoch": 0.7921321029626032, "grad_norm": 1.0368468534175435, "learning_rate": 1.9259081665127e-06, "loss": 0.5125, "step": 14679 }, { "epoch": 0.7921860665911176, "grad_norm": 1.0797972209112312, "learning_rate": 1.9254446836401835e-06, "loss": 0.3963, "step": 14680 }, { "epoch": 0.792240030219632, "grad_norm": 1.0909159040676193, "learning_rate": 1.9249813035039984e-06, "loss": 0.5694, "step": 14681 }, { "epoch": 0.7922939938481464, "grad_norm": 1.1645583006570448, "learning_rate": 1.9245180261174635e-06, "loss": 0.5176, "step": 14682 }, { "epoch": 0.7923479574766608, "grad_norm": 1.2057847053649595, "learning_rate": 1.9240548514938928e-06, "loss": 0.6152, "step": 14683 }, { "epoch": 0.7924019211051752, "grad_norm": 0.9080371031529645, "learning_rate": 1.923591779646599e-06, "loss": 0.3399, "step": 14684 }, { "epoch": 0.7924558847336894, "grad_norm": 1.1587000176992215, "learning_rate": 1.923128810588893e-06, "loss": 0.4928, "step": 14685 }, { "epoch": 0.7925098483622038, "grad_norm": 1.1940806224629577, "learning_rate": 1.9226659443340774e-06, "loss": 0.5321, "step": 14686 }, { "epoch": 0.7925638119907182, "grad_norm": 0.9433538820088282, "learning_rate": 1.922203180895457e-06, "loss": 0.5364, "step": 14687 }, { "epoch": 0.7926177756192326, "grad_norm": 0.9562633786115002, "learning_rate": 1.9217405202863325e-06, "loss": 0.3673, "step": 14688 }, { "epoch": 0.792671739247747, "grad_norm": 0.9207957651230827, "learning_rate": 1.9212779625200008e-06, "loss": 0.3171, "step": 14689 }, { "epoch": 0.7927257028762614, "grad_norm": 1.0041870101883708, "learning_rate": 1.920815507609757e-06, "loss": 0.4341, "step": 14690 }, { "epoch": 0.7927796665047758, "grad_norm": 1.0255983190137221, "learning_rate": 1.9203531555688913e-06, "loss": 0.5445, "step": 14691 }, { "epoch": 0.7928336301332901, "grad_norm": 1.1145455360977843, "learning_rate": 1.919890906410695e-06, "loss": 0.5018, "step": 14692 }, { "epoch": 0.7928875937618045, "grad_norm": 1.053575793297646, "learning_rate": 1.9194287601484496e-06, "loss": 0.6112, "step": 14693 }, { "epoch": 0.7929415573903189, "grad_norm": 1.129890802658404, "learning_rate": 1.9189667167954395e-06, "loss": 0.4163, "step": 14694 }, { "epoch": 0.7929955210188333, "grad_norm": 0.9971143942702119, "learning_rate": 1.918504776364945e-06, "loss": 0.4133, "step": 14695 }, { "epoch": 0.7930494846473477, "grad_norm": 0.828246794843553, "learning_rate": 1.9180429388702414e-06, "loss": 0.3685, "step": 14696 }, { "epoch": 0.7931034482758621, "grad_norm": 0.8479513596844016, "learning_rate": 1.9175812043246034e-06, "loss": 0.3271, "step": 14697 }, { "epoch": 0.7931574119043765, "grad_norm": 1.1243099086835642, "learning_rate": 1.917119572741301e-06, "loss": 0.5523, "step": 14698 }, { "epoch": 0.7932113755328908, "grad_norm": 0.9875972402353539, "learning_rate": 1.9166580441336043e-06, "loss": 0.5131, "step": 14699 }, { "epoch": 0.7932653391614052, "grad_norm": 1.0048136692105383, "learning_rate": 1.916196618514774e-06, "loss": 0.3896, "step": 14700 }, { "epoch": 0.7933193027899196, "grad_norm": 0.9156991604281818, "learning_rate": 1.9157352958980743e-06, "loss": 0.3503, "step": 14701 }, { "epoch": 0.793373266418434, "grad_norm": 0.9290362434969981, "learning_rate": 1.9152740762967636e-06, "loss": 0.4249, "step": 14702 }, { "epoch": 0.7934272300469484, "grad_norm": 0.8883630918949069, "learning_rate": 1.9148129597240984e-06, "loss": 0.3974, "step": 14703 }, { "epoch": 0.7934811936754628, "grad_norm": 1.0265765300504754, "learning_rate": 1.9143519461933313e-06, "loss": 0.5065, "step": 14704 }, { "epoch": 0.7935351573039772, "grad_norm": 0.9646740803721207, "learning_rate": 1.913891035717712e-06, "loss": 0.4677, "step": 14705 }, { "epoch": 0.7935891209324915, "grad_norm": 1.1173312613779238, "learning_rate": 1.9134302283104894e-06, "loss": 0.5185, "step": 14706 }, { "epoch": 0.7936430845610059, "grad_norm": 1.0480630540340863, "learning_rate": 1.9129695239849047e-06, "loss": 0.4234, "step": 14707 }, { "epoch": 0.7936970481895202, "grad_norm": 1.3204896184984523, "learning_rate": 1.9125089227542005e-06, "loss": 0.5089, "step": 14708 }, { "epoch": 0.7937510118180346, "grad_norm": 0.9922520433747511, "learning_rate": 1.9120484246316147e-06, "loss": 0.4104, "step": 14709 }, { "epoch": 0.793804975446549, "grad_norm": 1.1014923012566726, "learning_rate": 1.911588029630382e-06, "loss": 0.4784, "step": 14710 }, { "epoch": 0.7938589390750634, "grad_norm": 1.0634956849871278, "learning_rate": 1.9111277377637357e-06, "loss": 0.5374, "step": 14711 }, { "epoch": 0.7939129027035778, "grad_norm": 1.048762337801115, "learning_rate": 1.9106675490449043e-06, "loss": 0.3949, "step": 14712 }, { "epoch": 0.7939668663320921, "grad_norm": 1.3857732550325788, "learning_rate": 1.910207463487116e-06, "loss": 0.8371, "step": 14713 }, { "epoch": 0.7940208299606065, "grad_norm": 1.0843348717776715, "learning_rate": 1.9097474811035903e-06, "loss": 0.5509, "step": 14714 }, { "epoch": 0.7940747935891209, "grad_norm": 0.9699804361405676, "learning_rate": 1.90928760190755e-06, "loss": 0.4629, "step": 14715 }, { "epoch": 0.7941287572176353, "grad_norm": 1.409965799386098, "learning_rate": 1.908827825912213e-06, "loss": 0.482, "step": 14716 }, { "epoch": 0.7941827208461497, "grad_norm": 0.947238591271362, "learning_rate": 1.908368153130792e-06, "loss": 0.353, "step": 14717 }, { "epoch": 0.7942366844746641, "grad_norm": 1.36389969610373, "learning_rate": 1.9079085835764994e-06, "loss": 0.6026, "step": 14718 }, { "epoch": 0.7942906481031785, "grad_norm": 1.1519277611316663, "learning_rate": 1.9074491172625437e-06, "loss": 0.5681, "step": 14719 }, { "epoch": 0.7943446117316928, "grad_norm": 1.1076512005633572, "learning_rate": 1.9069897542021317e-06, "loss": 0.4524, "step": 14720 }, { "epoch": 0.7943985753602072, "grad_norm": 1.017848421325392, "learning_rate": 1.9065304944084634e-06, "loss": 0.5098, "step": 14721 }, { "epoch": 0.7944525389887216, "grad_norm": 0.937765737512193, "learning_rate": 1.90607133789474e-06, "loss": 0.3411, "step": 14722 }, { "epoch": 0.794506502617236, "grad_norm": 1.073202808900377, "learning_rate": 1.9056122846741577e-06, "loss": 0.5777, "step": 14723 }, { "epoch": 0.7945604662457504, "grad_norm": 1.1296825422326133, "learning_rate": 1.9051533347599094e-06, "loss": 0.402, "step": 14724 }, { "epoch": 0.7946144298742648, "grad_norm": 0.8173768052235465, "learning_rate": 1.9046944881651874e-06, "loss": 0.2975, "step": 14725 }, { "epoch": 0.7946683935027792, "grad_norm": 1.1794373399455296, "learning_rate": 1.9042357449031785e-06, "loss": 0.6981, "step": 14726 }, { "epoch": 0.7947223571312935, "grad_norm": 1.1123641890988822, "learning_rate": 1.9037771049870668e-06, "loss": 0.5596, "step": 14727 }, { "epoch": 0.7947763207598079, "grad_norm": 1.2357123113849133, "learning_rate": 1.903318568430036e-06, "loss": 0.4851, "step": 14728 }, { "epoch": 0.7948302843883223, "grad_norm": 1.1441593422059406, "learning_rate": 1.9028601352452628e-06, "loss": 0.5436, "step": 14729 }, { "epoch": 0.7948842480168367, "grad_norm": 0.9781521793624168, "learning_rate": 1.9024018054459246e-06, "loss": 0.4038, "step": 14730 }, { "epoch": 0.7949382116453511, "grad_norm": 1.2311397131124744, "learning_rate": 1.901943579045193e-06, "loss": 0.6452, "step": 14731 }, { "epoch": 0.7949921752738655, "grad_norm": 0.9010140984652273, "learning_rate": 1.901485456056239e-06, "loss": 0.391, "step": 14732 }, { "epoch": 0.7950461389023799, "grad_norm": 0.9463505220214544, "learning_rate": 1.9010274364922298e-06, "loss": 0.4638, "step": 14733 }, { "epoch": 0.7951001025308941, "grad_norm": 0.9936048123130422, "learning_rate": 1.9005695203663276e-06, "loss": 0.3912, "step": 14734 }, { "epoch": 0.7951540661594085, "grad_norm": 1.33041783630449, "learning_rate": 1.9001117076916941e-06, "loss": 0.6522, "step": 14735 }, { "epoch": 0.7952080297879229, "grad_norm": 1.0508493125225786, "learning_rate": 1.8996539984814882e-06, "loss": 0.4, "step": 14736 }, { "epoch": 0.7952619934164373, "grad_norm": 1.0494029222890209, "learning_rate": 1.8991963927488635e-06, "loss": 0.4771, "step": 14737 }, { "epoch": 0.7953159570449517, "grad_norm": 1.0545694244403985, "learning_rate": 1.8987388905069731e-06, "loss": 0.5372, "step": 14738 }, { "epoch": 0.7953699206734661, "grad_norm": 0.9671201359390122, "learning_rate": 1.8982814917689664e-06, "loss": 0.4741, "step": 14739 }, { "epoch": 0.7954238843019804, "grad_norm": 0.8776667955522179, "learning_rate": 1.8978241965479894e-06, "loss": 0.374, "step": 14740 }, { "epoch": 0.7954778479304948, "grad_norm": 1.1490784023313267, "learning_rate": 1.8973670048571836e-06, "loss": 0.4851, "step": 14741 }, { "epoch": 0.7955318115590092, "grad_norm": 0.954857012397209, "learning_rate": 1.89690991670969e-06, "loss": 0.4751, "step": 14742 }, { "epoch": 0.7955857751875236, "grad_norm": 0.9630626942063284, "learning_rate": 1.8964529321186464e-06, "loss": 0.3477, "step": 14743 }, { "epoch": 0.795639738816038, "grad_norm": 1.0151186509726797, "learning_rate": 1.895996051097187e-06, "loss": 0.4953, "step": 14744 }, { "epoch": 0.7956937024445524, "grad_norm": 0.8379192698790362, "learning_rate": 1.8955392736584425e-06, "loss": 0.3317, "step": 14745 }, { "epoch": 0.7957476660730668, "grad_norm": 1.0851161359368682, "learning_rate": 1.895082599815541e-06, "loss": 0.5159, "step": 14746 }, { "epoch": 0.7958016297015811, "grad_norm": 1.2478104662804392, "learning_rate": 1.8946260295816094e-06, "loss": 0.4625, "step": 14747 }, { "epoch": 0.7958555933300955, "grad_norm": 0.7742887885056116, "learning_rate": 1.8941695629697673e-06, "loss": 0.3163, "step": 14748 }, { "epoch": 0.7959095569586099, "grad_norm": 1.1622273708228705, "learning_rate": 1.8937131999931356e-06, "loss": 0.5126, "step": 14749 }, { "epoch": 0.7959635205871243, "grad_norm": 0.9005113732268086, "learning_rate": 1.8932569406648305e-06, "loss": 0.4271, "step": 14750 }, { "epoch": 0.7960174842156387, "grad_norm": 0.9804885453601415, "learning_rate": 1.8928007849979655e-06, "loss": 0.4355, "step": 14751 }, { "epoch": 0.7960714478441531, "grad_norm": 1.0436313548469127, "learning_rate": 1.8923447330056502e-06, "loss": 0.4373, "step": 14752 }, { "epoch": 0.7961254114726675, "grad_norm": 0.9109588063388007, "learning_rate": 1.891888784700993e-06, "loss": 0.426, "step": 14753 }, { "epoch": 0.7961793751011818, "grad_norm": 1.0762700531991114, "learning_rate": 1.8914329400970988e-06, "loss": 0.4699, "step": 14754 }, { "epoch": 0.7962333387296961, "grad_norm": 0.8861200813844414, "learning_rate": 1.890977199207067e-06, "loss": 0.3702, "step": 14755 }, { "epoch": 0.7962873023582105, "grad_norm": 0.9524228528688526, "learning_rate": 1.8905215620439966e-06, "loss": 0.4173, "step": 14756 }, { "epoch": 0.7963412659867249, "grad_norm": 1.003601682473795, "learning_rate": 1.8900660286209843e-06, "loss": 0.3874, "step": 14757 }, { "epoch": 0.7963952296152393, "grad_norm": 0.9770116603968483, "learning_rate": 1.8896105989511214e-06, "loss": 0.3487, "step": 14758 }, { "epoch": 0.7964491932437537, "grad_norm": 1.0921888857640392, "learning_rate": 1.8891552730474986e-06, "loss": 0.5435, "step": 14759 }, { "epoch": 0.7965031568722681, "grad_norm": 0.8985530295698759, "learning_rate": 1.888700050923201e-06, "loss": 0.3803, "step": 14760 }, { "epoch": 0.7965571205007824, "grad_norm": 1.0272766593325036, "learning_rate": 1.8882449325913149e-06, "loss": 0.4374, "step": 14761 }, { "epoch": 0.7966110841292968, "grad_norm": 1.196360621861719, "learning_rate": 1.8877899180649168e-06, "loss": 0.6302, "step": 14762 }, { "epoch": 0.7966650477578112, "grad_norm": 0.9966199547257808, "learning_rate": 1.8873350073570868e-06, "loss": 0.3572, "step": 14763 }, { "epoch": 0.7967190113863256, "grad_norm": 1.1335156988378492, "learning_rate": 1.886880200480899e-06, "loss": 0.4459, "step": 14764 }, { "epoch": 0.79677297501484, "grad_norm": 1.1490114188716736, "learning_rate": 1.886425497449425e-06, "loss": 0.4226, "step": 14765 }, { "epoch": 0.7968269386433544, "grad_norm": 1.0059514956825795, "learning_rate": 1.8859708982757335e-06, "loss": 0.4157, "step": 14766 }, { "epoch": 0.7968809022718688, "grad_norm": 0.7956385282690904, "learning_rate": 1.8855164029728902e-06, "loss": 0.4154, "step": 14767 }, { "epoch": 0.7969348659003831, "grad_norm": 1.027229258560275, "learning_rate": 1.8850620115539586e-06, "loss": 0.4882, "step": 14768 }, { "epoch": 0.7969888295288975, "grad_norm": 0.9200074034127403, "learning_rate": 1.884607724031996e-06, "loss": 0.3864, "step": 14769 }, { "epoch": 0.7970427931574119, "grad_norm": 0.887075731931406, "learning_rate": 1.8841535404200605e-06, "loss": 0.3955, "step": 14770 }, { "epoch": 0.7970967567859263, "grad_norm": 1.2438792107003147, "learning_rate": 1.8836994607312065e-06, "loss": 0.5332, "step": 14771 }, { "epoch": 0.7971507204144407, "grad_norm": 1.0307681574932173, "learning_rate": 1.8832454849784836e-06, "loss": 0.4218, "step": 14772 }, { "epoch": 0.7972046840429551, "grad_norm": 0.9959415972932533, "learning_rate": 1.8827916131749396e-06, "loss": 0.4544, "step": 14773 }, { "epoch": 0.7972586476714695, "grad_norm": 1.1420035912272635, "learning_rate": 1.88233784533362e-06, "loss": 0.5161, "step": 14774 }, { "epoch": 0.7973126112999838, "grad_norm": 0.9139977786354526, "learning_rate": 1.8818841814675658e-06, "loss": 0.384, "step": 14775 }, { "epoch": 0.7973665749284982, "grad_norm": 1.1253187578119446, "learning_rate": 1.8814306215898165e-06, "loss": 0.4422, "step": 14776 }, { "epoch": 0.7974205385570126, "grad_norm": 0.8713162778267493, "learning_rate": 1.8809771657134086e-06, "loss": 0.5021, "step": 14777 }, { "epoch": 0.797474502185527, "grad_norm": 1.1005253140300735, "learning_rate": 1.880523813851372e-06, "loss": 0.5531, "step": 14778 }, { "epoch": 0.7975284658140414, "grad_norm": 1.1386980644003926, "learning_rate": 1.8800705660167384e-06, "loss": 0.5642, "step": 14779 }, { "epoch": 0.7975824294425558, "grad_norm": 1.1828925739550857, "learning_rate": 1.8796174222225345e-06, "loss": 0.5738, "step": 14780 }, { "epoch": 0.7976363930710701, "grad_norm": 0.9292486094603751, "learning_rate": 1.879164382481784e-06, "loss": 0.3698, "step": 14781 }, { "epoch": 0.7976903566995844, "grad_norm": 1.0984978322483399, "learning_rate": 1.878711446807508e-06, "loss": 0.4803, "step": 14782 }, { "epoch": 0.7977443203280988, "grad_norm": 1.0808667381546153, "learning_rate": 1.8782586152127236e-06, "loss": 0.5648, "step": 14783 }, { "epoch": 0.7977982839566132, "grad_norm": 1.0103599267923162, "learning_rate": 1.877805887710446e-06, "loss": 0.4267, "step": 14784 }, { "epoch": 0.7978522475851276, "grad_norm": 1.127591519733452, "learning_rate": 1.8773532643136877e-06, "loss": 0.5792, "step": 14785 }, { "epoch": 0.797906211213642, "grad_norm": 0.9309712047612532, "learning_rate": 1.876900745035457e-06, "loss": 0.5485, "step": 14786 }, { "epoch": 0.7979601748421564, "grad_norm": 1.128324876273577, "learning_rate": 1.8764483298887593e-06, "loss": 0.5225, "step": 14787 }, { "epoch": 0.7980141384706708, "grad_norm": 0.884910801788974, "learning_rate": 1.8759960188865984e-06, "loss": 0.4142, "step": 14788 }, { "epoch": 0.7980681020991851, "grad_norm": 0.9471320499228437, "learning_rate": 1.8755438120419745e-06, "loss": 0.4434, "step": 14789 }, { "epoch": 0.7981220657276995, "grad_norm": 0.8134948222090379, "learning_rate": 1.8750917093678824e-06, "loss": 0.3054, "step": 14790 }, { "epoch": 0.7981760293562139, "grad_norm": 0.9682750170772254, "learning_rate": 1.8746397108773178e-06, "loss": 0.4417, "step": 14791 }, { "epoch": 0.7982299929847283, "grad_norm": 1.036577158250916, "learning_rate": 1.8741878165832712e-06, "loss": 0.5362, "step": 14792 }, { "epoch": 0.7982839566132427, "grad_norm": 1.163347762033495, "learning_rate": 1.8737360264987292e-06, "loss": 0.5545, "step": 14793 }, { "epoch": 0.7983379202417571, "grad_norm": 1.0995744107310832, "learning_rate": 1.8732843406366792e-06, "loss": 0.4869, "step": 14794 }, { "epoch": 0.7983918838702715, "grad_norm": 1.0229166415700088, "learning_rate": 1.8728327590101009e-06, "loss": 0.4744, "step": 14795 }, { "epoch": 0.7984458474987858, "grad_norm": 0.9607900755609097, "learning_rate": 1.8723812816319753e-06, "loss": 0.3749, "step": 14796 }, { "epoch": 0.7984998111273002, "grad_norm": 1.1734748183851609, "learning_rate": 1.8719299085152767e-06, "loss": 0.4903, "step": 14797 }, { "epoch": 0.7985537747558146, "grad_norm": 1.02817619512855, "learning_rate": 1.8714786396729775e-06, "loss": 0.3206, "step": 14798 }, { "epoch": 0.798607738384329, "grad_norm": 1.0437421904178203, "learning_rate": 1.871027475118049e-06, "loss": 0.4057, "step": 14799 }, { "epoch": 0.7986617020128434, "grad_norm": 0.9650162500973755, "learning_rate": 1.8705764148634581e-06, "loss": 0.3601, "step": 14800 }, { "epoch": 0.7987156656413578, "grad_norm": 0.8539200665972264, "learning_rate": 1.8701254589221674e-06, "loss": 0.3216, "step": 14801 }, { "epoch": 0.7987696292698722, "grad_norm": 0.9478621133524248, "learning_rate": 1.8696746073071394e-06, "loss": 0.4205, "step": 14802 }, { "epoch": 0.7988235928983864, "grad_norm": 1.2313558330083039, "learning_rate": 1.8692238600313324e-06, "loss": 0.4798, "step": 14803 }, { "epoch": 0.7988775565269008, "grad_norm": 0.9485377490832243, "learning_rate": 1.868773217107699e-06, "loss": 0.4027, "step": 14804 }, { "epoch": 0.7989315201554152, "grad_norm": 1.022802011819007, "learning_rate": 1.8683226785491923e-06, "loss": 0.5217, "step": 14805 }, { "epoch": 0.7989854837839296, "grad_norm": 1.1219222287987456, "learning_rate": 1.867872244368762e-06, "loss": 0.4711, "step": 14806 }, { "epoch": 0.799039447412444, "grad_norm": 0.7372547253334664, "learning_rate": 1.8674219145793525e-06, "loss": 0.3157, "step": 14807 }, { "epoch": 0.7990934110409584, "grad_norm": 1.0320274121862387, "learning_rate": 1.8669716891939083e-06, "loss": 0.4678, "step": 14808 }, { "epoch": 0.7991473746694727, "grad_norm": 1.2294591615519743, "learning_rate": 1.8665215682253696e-06, "loss": 0.5302, "step": 14809 }, { "epoch": 0.7992013382979871, "grad_norm": 0.9773987588766254, "learning_rate": 1.8660715516866706e-06, "loss": 0.613, "step": 14810 }, { "epoch": 0.7992553019265015, "grad_norm": 1.2147389764750902, "learning_rate": 1.865621639590748e-06, "loss": 0.5757, "step": 14811 }, { "epoch": 0.7993092655550159, "grad_norm": 0.9401454724725238, "learning_rate": 1.8651718319505312e-06, "loss": 0.4048, "step": 14812 }, { "epoch": 0.7993632291835303, "grad_norm": 0.9517836672760154, "learning_rate": 1.8647221287789488e-06, "loss": 0.5221, "step": 14813 }, { "epoch": 0.7994171928120447, "grad_norm": 1.110762375303299, "learning_rate": 1.864272530088926e-06, "loss": 0.6151, "step": 14814 }, { "epoch": 0.7994711564405591, "grad_norm": 1.0193248049048989, "learning_rate": 1.863823035893384e-06, "loss": 0.6308, "step": 14815 }, { "epoch": 0.7995251200690734, "grad_norm": 1.05604216479492, "learning_rate": 1.8633736462052432e-06, "loss": 0.4483, "step": 14816 }, { "epoch": 0.7995790836975878, "grad_norm": 1.074631013728031, "learning_rate": 1.862924361037417e-06, "loss": 0.4908, "step": 14817 }, { "epoch": 0.7996330473261022, "grad_norm": 1.1984826118264553, "learning_rate": 1.8624751804028196e-06, "loss": 0.4117, "step": 14818 }, { "epoch": 0.7996870109546166, "grad_norm": 0.9798695348898983, "learning_rate": 1.8620261043143616e-06, "loss": 0.4278, "step": 14819 }, { "epoch": 0.799740974583131, "grad_norm": 1.0197562633385497, "learning_rate": 1.8615771327849486e-06, "loss": 0.4843, "step": 14820 }, { "epoch": 0.7997949382116454, "grad_norm": 1.360094125839444, "learning_rate": 1.8611282658274853e-06, "loss": 0.7185, "step": 14821 }, { "epoch": 0.7998489018401598, "grad_norm": 1.1174267692521802, "learning_rate": 1.8606795034548727e-06, "loss": 0.5623, "step": 14822 }, { "epoch": 0.7999028654686741, "grad_norm": 1.0881951461208044, "learning_rate": 1.8602308456800095e-06, "loss": 0.542, "step": 14823 }, { "epoch": 0.7999568290971885, "grad_norm": 0.9855754390166537, "learning_rate": 1.8597822925157883e-06, "loss": 0.555, "step": 14824 }, { "epoch": 0.8000107927257029, "grad_norm": 1.1022811016941738, "learning_rate": 1.8593338439751022e-06, "loss": 0.4787, "step": 14825 }, { "epoch": 0.8000647563542173, "grad_norm": 1.245202377260087, "learning_rate": 1.8588855000708397e-06, "loss": 0.505, "step": 14826 }, { "epoch": 0.8001187199827317, "grad_norm": 1.0709447889774986, "learning_rate": 1.8584372608158875e-06, "loss": 0.4763, "step": 14827 }, { "epoch": 0.800172683611246, "grad_norm": 1.1800619612185912, "learning_rate": 1.8579891262231275e-06, "loss": 0.5308, "step": 14828 }, { "epoch": 0.8002266472397604, "grad_norm": 0.9204724358070794, "learning_rate": 1.8575410963054407e-06, "loss": 0.2741, "step": 14829 }, { "epoch": 0.8002806108682747, "grad_norm": 0.994022041402739, "learning_rate": 1.8570931710757028e-06, "loss": 0.518, "step": 14830 }, { "epoch": 0.8003345744967891, "grad_norm": 0.9422201697703397, "learning_rate": 1.856645350546788e-06, "loss": 0.4082, "step": 14831 }, { "epoch": 0.8003885381253035, "grad_norm": 0.982440631638376, "learning_rate": 1.8561976347315675e-06, "loss": 0.429, "step": 14832 }, { "epoch": 0.8004425017538179, "grad_norm": 1.0557796318425101, "learning_rate": 1.8557500236429086e-06, "loss": 0.4192, "step": 14833 }, { "epoch": 0.8004964653823323, "grad_norm": 0.9481502418370241, "learning_rate": 1.8553025172936762e-06, "loss": 0.3895, "step": 14834 }, { "epoch": 0.8005504290108467, "grad_norm": 1.0170468570929492, "learning_rate": 1.8548551156967318e-06, "loss": 0.4005, "step": 14835 }, { "epoch": 0.8006043926393611, "grad_norm": 1.1047325988975143, "learning_rate": 1.854407818864935e-06, "loss": 0.5188, "step": 14836 }, { "epoch": 0.8006583562678754, "grad_norm": 1.031524217695675, "learning_rate": 1.853960626811142e-06, "loss": 0.485, "step": 14837 }, { "epoch": 0.8007123198963898, "grad_norm": 1.162140224190474, "learning_rate": 1.8535135395482038e-06, "loss": 0.4989, "step": 14838 }, { "epoch": 0.8007662835249042, "grad_norm": 1.2349089712846228, "learning_rate": 1.8530665570889704e-06, "loss": 0.4955, "step": 14839 }, { "epoch": 0.8008202471534186, "grad_norm": 1.0767333385297704, "learning_rate": 1.8526196794462898e-06, "loss": 0.5216, "step": 14840 }, { "epoch": 0.800874210781933, "grad_norm": 1.0481536421062496, "learning_rate": 1.8521729066330047e-06, "loss": 0.4008, "step": 14841 }, { "epoch": 0.8009281744104474, "grad_norm": 0.9619177211874119, "learning_rate": 1.8517262386619561e-06, "loss": 0.395, "step": 14842 }, { "epoch": 0.8009821380389618, "grad_norm": 1.1224281469708115, "learning_rate": 1.8512796755459816e-06, "loss": 0.4083, "step": 14843 }, { "epoch": 0.8010361016674761, "grad_norm": 0.9164417573962617, "learning_rate": 1.8508332172979172e-06, "loss": 0.4138, "step": 14844 }, { "epoch": 0.8010900652959905, "grad_norm": 1.1474931795160652, "learning_rate": 1.850386863930592e-06, "loss": 0.3693, "step": 14845 }, { "epoch": 0.8011440289245049, "grad_norm": 1.083078399936675, "learning_rate": 1.8499406154568361e-06, "loss": 0.5743, "step": 14846 }, { "epoch": 0.8011979925530193, "grad_norm": 1.0840862623104552, "learning_rate": 1.8494944718894756e-06, "loss": 0.4105, "step": 14847 }, { "epoch": 0.8012519561815337, "grad_norm": 0.6091667664591182, "learning_rate": 1.8490484332413322e-06, "loss": 0.1826, "step": 14848 }, { "epoch": 0.8013059198100481, "grad_norm": 1.0085393137402148, "learning_rate": 1.848602499525226e-06, "loss": 0.5533, "step": 14849 }, { "epoch": 0.8013598834385625, "grad_norm": 1.2522589257185284, "learning_rate": 1.8481566707539727e-06, "loss": 0.6378, "step": 14850 }, { "epoch": 0.8014138470670767, "grad_norm": 1.0408550182451914, "learning_rate": 1.847710946940388e-06, "loss": 0.3927, "step": 14851 }, { "epoch": 0.8014678106955911, "grad_norm": 0.984302905834938, "learning_rate": 1.8472653280972808e-06, "loss": 0.4355, "step": 14852 }, { "epoch": 0.8015217743241055, "grad_norm": 1.0100942959360883, "learning_rate": 1.846819814237458e-06, "loss": 0.443, "step": 14853 }, { "epoch": 0.8015757379526199, "grad_norm": 0.946854540615051, "learning_rate": 1.8463744053737254e-06, "loss": 0.387, "step": 14854 }, { "epoch": 0.8016297015811343, "grad_norm": 0.8768283991857462, "learning_rate": 1.8459291015188841e-06, "loss": 0.4245, "step": 14855 }, { "epoch": 0.8016836652096487, "grad_norm": 0.8946967481900465, "learning_rate": 1.845483902685732e-06, "loss": 0.3883, "step": 14856 }, { "epoch": 0.8017376288381631, "grad_norm": 0.940535041229304, "learning_rate": 1.8450388088870657e-06, "loss": 0.3987, "step": 14857 }, { "epoch": 0.8017915924666774, "grad_norm": 1.221502934085716, "learning_rate": 1.8445938201356783e-06, "loss": 0.4223, "step": 14858 }, { "epoch": 0.8018455560951918, "grad_norm": 1.2224243487987394, "learning_rate": 1.8441489364443568e-06, "loss": 0.7295, "step": 14859 }, { "epoch": 0.8018995197237062, "grad_norm": 1.0806682871089455, "learning_rate": 1.843704157825889e-06, "loss": 0.4475, "step": 14860 }, { "epoch": 0.8019534833522206, "grad_norm": 1.0541236266539329, "learning_rate": 1.843259484293058e-06, "loss": 0.4467, "step": 14861 }, { "epoch": 0.802007446980735, "grad_norm": 1.6573794761381317, "learning_rate": 1.8428149158586438e-06, "loss": 0.4755, "step": 14862 }, { "epoch": 0.8020614106092494, "grad_norm": 0.964174586843505, "learning_rate": 1.8423704525354247e-06, "loss": 0.3688, "step": 14863 }, { "epoch": 0.8021153742377638, "grad_norm": 1.1134043635819921, "learning_rate": 1.8419260943361743e-06, "loss": 0.5964, "step": 14864 }, { "epoch": 0.8021693378662781, "grad_norm": 1.1345024966063415, "learning_rate": 1.8414818412736657e-06, "loss": 0.5014, "step": 14865 }, { "epoch": 0.8022233014947925, "grad_norm": 1.136672695267758, "learning_rate": 1.8410376933606644e-06, "loss": 0.6105, "step": 14866 }, { "epoch": 0.8022772651233069, "grad_norm": 1.0081075047634147, "learning_rate": 1.840593650609937e-06, "loss": 0.4465, "step": 14867 }, { "epoch": 0.8023312287518213, "grad_norm": 1.0258975073860266, "learning_rate": 1.840149713034245e-06, "loss": 0.5233, "step": 14868 }, { "epoch": 0.8023851923803357, "grad_norm": 1.0874742521148508, "learning_rate": 1.8397058806463491e-06, "loss": 0.4121, "step": 14869 }, { "epoch": 0.8024391560088501, "grad_norm": 0.8342244516417907, "learning_rate": 1.839262153459004e-06, "loss": 0.3514, "step": 14870 }, { "epoch": 0.8024931196373645, "grad_norm": 0.9905578183520143, "learning_rate": 1.838818531484964e-06, "loss": 0.4699, "step": 14871 }, { "epoch": 0.8025470832658788, "grad_norm": 1.3025108718167793, "learning_rate": 1.8383750147369795e-06, "loss": 0.5032, "step": 14872 }, { "epoch": 0.8026010468943932, "grad_norm": 0.9442464478431325, "learning_rate": 1.8379316032277955e-06, "loss": 0.3419, "step": 14873 }, { "epoch": 0.8026550105229076, "grad_norm": 0.8532057014131399, "learning_rate": 1.837488296970158e-06, "loss": 0.3165, "step": 14874 }, { "epoch": 0.802708974151422, "grad_norm": 1.087983332882838, "learning_rate": 1.8370450959768077e-06, "loss": 0.5392, "step": 14875 }, { "epoch": 0.8027629377799363, "grad_norm": 1.0358739416827556, "learning_rate": 1.8366020002604823e-06, "loss": 0.4967, "step": 14876 }, { "epoch": 0.8028169014084507, "grad_norm": 1.0539420297924937, "learning_rate": 1.8361590098339168e-06, "loss": 0.448, "step": 14877 }, { "epoch": 0.802870865036965, "grad_norm": 0.9705497081071237, "learning_rate": 1.8357161247098431e-06, "loss": 0.3624, "step": 14878 }, { "epoch": 0.8029248286654794, "grad_norm": 1.073161273398357, "learning_rate": 1.8352733449009914e-06, "loss": 0.6208, "step": 14879 }, { "epoch": 0.8029787922939938, "grad_norm": 1.055156864659904, "learning_rate": 1.8348306704200859e-06, "loss": 0.4476, "step": 14880 }, { "epoch": 0.8030327559225082, "grad_norm": 0.7822144268295095, "learning_rate": 1.834388101279851e-06, "loss": 0.2875, "step": 14881 }, { "epoch": 0.8030867195510226, "grad_norm": 1.1108834775176897, "learning_rate": 1.8339456374930053e-06, "loss": 0.5091, "step": 14882 }, { "epoch": 0.803140683179537, "grad_norm": 1.0132494450251692, "learning_rate": 1.8335032790722674e-06, "loss": 0.4036, "step": 14883 }, { "epoch": 0.8031946468080514, "grad_norm": 1.0494940086681137, "learning_rate": 1.833061026030349e-06, "loss": 0.4833, "step": 14884 }, { "epoch": 0.8032486104365657, "grad_norm": 0.9409484376686271, "learning_rate": 1.832618878379962e-06, "loss": 0.3543, "step": 14885 }, { "epoch": 0.8033025740650801, "grad_norm": 0.9287032919470498, "learning_rate": 1.8321768361338139e-06, "loss": 0.4069, "step": 14886 }, { "epoch": 0.8033565376935945, "grad_norm": 0.8634983491242433, "learning_rate": 1.8317348993046094e-06, "loss": 0.3427, "step": 14887 }, { "epoch": 0.8034105013221089, "grad_norm": 0.7291706127039025, "learning_rate": 1.8312930679050512e-06, "loss": 0.3481, "step": 14888 }, { "epoch": 0.8034644649506233, "grad_norm": 0.990692591551886, "learning_rate": 1.8308513419478363e-06, "loss": 0.3881, "step": 14889 }, { "epoch": 0.8035184285791377, "grad_norm": 1.0030693153396586, "learning_rate": 1.8304097214456618e-06, "loss": 0.4915, "step": 14890 }, { "epoch": 0.8035723922076521, "grad_norm": 0.7767291989627656, "learning_rate": 1.8299682064112196e-06, "loss": 0.2906, "step": 14891 }, { "epoch": 0.8036263558361664, "grad_norm": 1.094021916396228, "learning_rate": 1.8295267968572005e-06, "loss": 0.5207, "step": 14892 }, { "epoch": 0.8036803194646808, "grad_norm": 1.1749131854970998, "learning_rate": 1.8290854927962886e-06, "loss": 0.4895, "step": 14893 }, { "epoch": 0.8037342830931952, "grad_norm": 1.0607811994489298, "learning_rate": 1.8286442942411692e-06, "loss": 0.3938, "step": 14894 }, { "epoch": 0.8037882467217096, "grad_norm": 0.9117354168175209, "learning_rate": 1.8282032012045222e-06, "loss": 0.4272, "step": 14895 }, { "epoch": 0.803842210350224, "grad_norm": 0.9857441065327186, "learning_rate": 1.8277622136990253e-06, "loss": 0.4238, "step": 14896 }, { "epoch": 0.8038961739787384, "grad_norm": 1.014995242077085, "learning_rate": 1.8273213317373534e-06, "loss": 0.4147, "step": 14897 }, { "epoch": 0.8039501376072528, "grad_norm": 0.9367949136322224, "learning_rate": 1.8268805553321766e-06, "loss": 0.3837, "step": 14898 }, { "epoch": 0.804004101235767, "grad_norm": 0.9906727856071421, "learning_rate": 1.8264398844961657e-06, "loss": 0.4906, "step": 14899 }, { "epoch": 0.8040580648642814, "grad_norm": 0.8062355015692951, "learning_rate": 1.8259993192419833e-06, "loss": 0.3752, "step": 14900 }, { "epoch": 0.8041120284927958, "grad_norm": 1.1894756184352309, "learning_rate": 1.8255588595822927e-06, "loss": 0.5302, "step": 14901 }, { "epoch": 0.8041659921213102, "grad_norm": 0.9758674926882615, "learning_rate": 1.8251185055297533e-06, "loss": 0.3437, "step": 14902 }, { "epoch": 0.8042199557498246, "grad_norm": 0.9876948716458978, "learning_rate": 1.8246782570970207e-06, "loss": 0.4206, "step": 14903 }, { "epoch": 0.804273919378339, "grad_norm": 0.9577555278383736, "learning_rate": 1.8242381142967491e-06, "loss": 0.4024, "step": 14904 }, { "epoch": 0.8043278830068534, "grad_norm": 0.903248763719223, "learning_rate": 1.823798077141588e-06, "loss": 0.349, "step": 14905 }, { "epoch": 0.8043818466353677, "grad_norm": 1.1046407512277232, "learning_rate": 1.8233581456441862e-06, "loss": 0.3762, "step": 14906 }, { "epoch": 0.8044358102638821, "grad_norm": 0.6580234371148095, "learning_rate": 1.8229183198171846e-06, "loss": 0.2546, "step": 14907 }, { "epoch": 0.8044897738923965, "grad_norm": 1.0806330916845162, "learning_rate": 1.8224785996732263e-06, "loss": 0.5294, "step": 14908 }, { "epoch": 0.8045437375209109, "grad_norm": 0.9263223811457929, "learning_rate": 1.8220389852249486e-06, "loss": 0.3744, "step": 14909 }, { "epoch": 0.8045977011494253, "grad_norm": 0.8455803283602408, "learning_rate": 1.821599476484987e-06, "loss": 0.3316, "step": 14910 }, { "epoch": 0.8046516647779397, "grad_norm": 1.0439169384952034, "learning_rate": 1.8211600734659735e-06, "loss": 0.4248, "step": 14911 }, { "epoch": 0.8047056284064541, "grad_norm": 0.9087322470945891, "learning_rate": 1.8207207761805361e-06, "loss": 0.4269, "step": 14912 }, { "epoch": 0.8047595920349684, "grad_norm": 0.7926332198596833, "learning_rate": 1.8202815846413025e-06, "loss": 0.3373, "step": 14913 }, { "epoch": 0.8048135556634828, "grad_norm": 1.1330138133778485, "learning_rate": 1.819842498860893e-06, "loss": 0.4328, "step": 14914 }, { "epoch": 0.8048675192919972, "grad_norm": 0.9237203115154883, "learning_rate": 1.8194035188519294e-06, "loss": 0.3359, "step": 14915 }, { "epoch": 0.8049214829205116, "grad_norm": 0.8752440739375261, "learning_rate": 1.8189646446270265e-06, "loss": 0.3275, "step": 14916 }, { "epoch": 0.804975446549026, "grad_norm": 1.1421747025087434, "learning_rate": 1.8185258761988e-06, "loss": 0.5606, "step": 14917 }, { "epoch": 0.8050294101775404, "grad_norm": 1.0333628821566332, "learning_rate": 1.8180872135798594e-06, "loss": 0.4535, "step": 14918 }, { "epoch": 0.8050833738060548, "grad_norm": 1.0470871013490217, "learning_rate": 1.8176486567828127e-06, "loss": 0.5128, "step": 14919 }, { "epoch": 0.8051373374345691, "grad_norm": 0.8426683055029965, "learning_rate": 1.817210205820265e-06, "loss": 0.4155, "step": 14920 }, { "epoch": 0.8051913010630835, "grad_norm": 0.9124348915522433, "learning_rate": 1.8167718607048164e-06, "loss": 0.4121, "step": 14921 }, { "epoch": 0.8052452646915979, "grad_norm": 1.155530921667954, "learning_rate": 1.8163336214490668e-06, "loss": 0.5767, "step": 14922 }, { "epoch": 0.8052992283201122, "grad_norm": 0.971090482523904, "learning_rate": 1.8158954880656099e-06, "loss": 0.4347, "step": 14923 }, { "epoch": 0.8053531919486266, "grad_norm": 0.8543214083354823, "learning_rate": 1.81545746056704e-06, "loss": 0.3602, "step": 14924 }, { "epoch": 0.805407155577141, "grad_norm": 1.2350446268928017, "learning_rate": 1.8150195389659446e-06, "loss": 0.4601, "step": 14925 }, { "epoch": 0.8054611192056554, "grad_norm": 1.1650666900499977, "learning_rate": 1.8145817232749118e-06, "loss": 0.6227, "step": 14926 }, { "epoch": 0.8055150828341697, "grad_norm": 1.1028149537478762, "learning_rate": 1.8141440135065253e-06, "loss": 0.5892, "step": 14927 }, { "epoch": 0.8055690464626841, "grad_norm": 1.127625915231473, "learning_rate": 1.8137064096733628e-06, "loss": 0.5826, "step": 14928 }, { "epoch": 0.8056230100911985, "grad_norm": 0.9528063076572508, "learning_rate": 1.8132689117880029e-06, "loss": 0.4848, "step": 14929 }, { "epoch": 0.8056769737197129, "grad_norm": 0.9479943547418739, "learning_rate": 1.8128315198630195e-06, "loss": 0.4662, "step": 14930 }, { "epoch": 0.8057309373482273, "grad_norm": 1.1839034240923088, "learning_rate": 1.812394233910984e-06, "loss": 0.5281, "step": 14931 }, { "epoch": 0.8057849009767417, "grad_norm": 1.1277863995063973, "learning_rate": 1.8119570539444641e-06, "loss": 0.5561, "step": 14932 }, { "epoch": 0.8058388646052561, "grad_norm": 0.8745497663551298, "learning_rate": 1.8115199799760251e-06, "loss": 0.4066, "step": 14933 }, { "epoch": 0.8058928282337704, "grad_norm": 1.1514325675933397, "learning_rate": 1.8110830120182287e-06, "loss": 0.5129, "step": 14934 }, { "epoch": 0.8059467918622848, "grad_norm": 1.0778910881744626, "learning_rate": 1.8106461500836345e-06, "loss": 0.4657, "step": 14935 }, { "epoch": 0.8060007554907992, "grad_norm": 1.1042558576558912, "learning_rate": 1.810209394184797e-06, "loss": 0.5772, "step": 14936 }, { "epoch": 0.8060547191193136, "grad_norm": 1.1568424155069672, "learning_rate": 1.8097727443342702e-06, "loss": 0.5108, "step": 14937 }, { "epoch": 0.806108682747828, "grad_norm": 0.837572558348581, "learning_rate": 1.8093362005446032e-06, "loss": 0.2649, "step": 14938 }, { "epoch": 0.8061626463763424, "grad_norm": 0.9650766217331049, "learning_rate": 1.8088997628283429e-06, "loss": 0.4303, "step": 14939 }, { "epoch": 0.8062166100048568, "grad_norm": 1.0683719638628948, "learning_rate": 1.808463431198033e-06, "loss": 0.4666, "step": 14940 }, { "epoch": 0.8062705736333711, "grad_norm": 1.0256846334417231, "learning_rate": 1.8080272056662158e-06, "loss": 0.5534, "step": 14941 }, { "epoch": 0.8063245372618855, "grad_norm": 1.1823339246448672, "learning_rate": 1.8075910862454254e-06, "loss": 0.442, "step": 14942 }, { "epoch": 0.8063785008903999, "grad_norm": 0.8986031410947927, "learning_rate": 1.8071550729481984e-06, "loss": 0.3387, "step": 14943 }, { "epoch": 0.8064324645189143, "grad_norm": 0.9340110763787425, "learning_rate": 1.806719165787066e-06, "loss": 0.4021, "step": 14944 }, { "epoch": 0.8064864281474287, "grad_norm": 1.085852510115472, "learning_rate": 1.806283364774556e-06, "loss": 0.3275, "step": 14945 }, { "epoch": 0.806540391775943, "grad_norm": 1.1364319764806416, "learning_rate": 1.8058476699231947e-06, "loss": 0.5651, "step": 14946 }, { "epoch": 0.8065943554044573, "grad_norm": 1.2764468224019285, "learning_rate": 1.8054120812455053e-06, "loss": 0.525, "step": 14947 }, { "epoch": 0.8066483190329717, "grad_norm": 1.159503606183495, "learning_rate": 1.8049765987540047e-06, "loss": 0.4545, "step": 14948 }, { "epoch": 0.8067022826614861, "grad_norm": 1.1395060064264508, "learning_rate": 1.80454122246121e-06, "loss": 0.5564, "step": 14949 }, { "epoch": 0.8067562462900005, "grad_norm": 0.8336616875565236, "learning_rate": 1.8041059523796345e-06, "loss": 0.3829, "step": 14950 }, { "epoch": 0.8068102099185149, "grad_norm": 1.0539589910938114, "learning_rate": 1.803670788521789e-06, "loss": 0.5007, "step": 14951 }, { "epoch": 0.8068641735470293, "grad_norm": 0.914580639616152, "learning_rate": 1.8032357309001791e-06, "loss": 0.3904, "step": 14952 }, { "epoch": 0.8069181371755437, "grad_norm": 1.009529856483741, "learning_rate": 1.80280077952731e-06, "loss": 0.398, "step": 14953 }, { "epoch": 0.806972100804058, "grad_norm": 1.104746471422327, "learning_rate": 1.8023659344156833e-06, "loss": 0.4922, "step": 14954 }, { "epoch": 0.8070260644325724, "grad_norm": 1.267548476021106, "learning_rate": 1.8019311955777942e-06, "loss": 0.5947, "step": 14955 }, { "epoch": 0.8070800280610868, "grad_norm": 1.0439939128513471, "learning_rate": 1.8014965630261396e-06, "loss": 0.4267, "step": 14956 }, { "epoch": 0.8071339916896012, "grad_norm": 0.8372970192184802, "learning_rate": 1.801062036773211e-06, "loss": 0.3953, "step": 14957 }, { "epoch": 0.8071879553181156, "grad_norm": 0.9190879973606361, "learning_rate": 1.8006276168314969e-06, "loss": 0.4926, "step": 14958 }, { "epoch": 0.80724191894663, "grad_norm": 1.0292497193473313, "learning_rate": 1.8001933032134827e-06, "loss": 0.385, "step": 14959 }, { "epoch": 0.8072958825751444, "grad_norm": 0.9801563690338797, "learning_rate": 1.7997590959316516e-06, "loss": 0.47, "step": 14960 }, { "epoch": 0.8073498462036587, "grad_norm": 0.9321822240731279, "learning_rate": 1.7993249949984842e-06, "loss": 0.3759, "step": 14961 }, { "epoch": 0.8074038098321731, "grad_norm": 0.9563053320242623, "learning_rate": 1.7988910004264542e-06, "loss": 0.3856, "step": 14962 }, { "epoch": 0.8074577734606875, "grad_norm": 0.8571348686757044, "learning_rate": 1.7984571122280372e-06, "loss": 0.4418, "step": 14963 }, { "epoch": 0.8075117370892019, "grad_norm": 1.070332704067573, "learning_rate": 1.7980233304157025e-06, "loss": 0.4729, "step": 14964 }, { "epoch": 0.8075657007177163, "grad_norm": 0.9184247681591252, "learning_rate": 1.7975896550019173e-06, "loss": 0.457, "step": 14965 }, { "epoch": 0.8076196643462307, "grad_norm": 1.1165326065132974, "learning_rate": 1.7971560859991471e-06, "loss": 0.5484, "step": 14966 }, { "epoch": 0.8076736279747451, "grad_norm": 0.9863118032775459, "learning_rate": 1.7967226234198525e-06, "loss": 0.4191, "step": 14967 }, { "epoch": 0.8077275916032594, "grad_norm": 1.1860739047909694, "learning_rate": 1.7962892672764925e-06, "loss": 0.4851, "step": 14968 }, { "epoch": 0.8077815552317738, "grad_norm": 0.7894256296856095, "learning_rate": 1.7958560175815204e-06, "loss": 0.3412, "step": 14969 }, { "epoch": 0.8078355188602881, "grad_norm": 1.0568125051268067, "learning_rate": 1.795422874347389e-06, "loss": 0.3956, "step": 14970 }, { "epoch": 0.8078894824888025, "grad_norm": 0.9599838317133688, "learning_rate": 1.7949898375865473e-06, "loss": 0.4401, "step": 14971 }, { "epoch": 0.8079434461173169, "grad_norm": 0.8130173749173386, "learning_rate": 1.794556907311441e-06, "loss": 0.2779, "step": 14972 }, { "epoch": 0.8079974097458313, "grad_norm": 0.9078919781123607, "learning_rate": 1.794124083534514e-06, "loss": 0.3555, "step": 14973 }, { "epoch": 0.8080513733743457, "grad_norm": 0.9084122922611785, "learning_rate": 1.7936913662682046e-06, "loss": 0.3522, "step": 14974 }, { "epoch": 0.80810533700286, "grad_norm": 0.887351414631382, "learning_rate": 1.7932587555249517e-06, "loss": 0.3329, "step": 14975 }, { "epoch": 0.8081593006313744, "grad_norm": 1.0560674036483217, "learning_rate": 1.7928262513171868e-06, "loss": 0.4639, "step": 14976 }, { "epoch": 0.8082132642598888, "grad_norm": 1.0109169675477818, "learning_rate": 1.7923938536573408e-06, "loss": 0.4551, "step": 14977 }, { "epoch": 0.8082672278884032, "grad_norm": 1.1793991819063474, "learning_rate": 1.7919615625578418e-06, "loss": 0.5174, "step": 14978 }, { "epoch": 0.8083211915169176, "grad_norm": 1.0014114535798888, "learning_rate": 1.7915293780311145e-06, "loss": 0.4813, "step": 14979 }, { "epoch": 0.808375155145432, "grad_norm": 1.0124623551860468, "learning_rate": 1.7910973000895793e-06, "loss": 0.4352, "step": 14980 }, { "epoch": 0.8084291187739464, "grad_norm": 1.0090925628776648, "learning_rate": 1.790665328745656e-06, "loss": 0.5218, "step": 14981 }, { "epoch": 0.8084830824024607, "grad_norm": 1.2327965910475613, "learning_rate": 1.7902334640117586e-06, "loss": 0.5195, "step": 14982 }, { "epoch": 0.8085370460309751, "grad_norm": 1.1547113245905833, "learning_rate": 1.7898017059003002e-06, "loss": 0.4874, "step": 14983 }, { "epoch": 0.8085910096594895, "grad_norm": 0.985503494722898, "learning_rate": 1.7893700544236897e-06, "loss": 0.4217, "step": 14984 }, { "epoch": 0.8086449732880039, "grad_norm": 1.2340858512506225, "learning_rate": 1.788938509594333e-06, "loss": 0.4677, "step": 14985 }, { "epoch": 0.8086989369165183, "grad_norm": 1.1056450129954292, "learning_rate": 1.7885070714246333e-06, "loss": 0.577, "step": 14986 }, { "epoch": 0.8087529005450327, "grad_norm": 1.0647169751318186, "learning_rate": 1.788075739926991e-06, "loss": 0.5555, "step": 14987 }, { "epoch": 0.8088068641735471, "grad_norm": 1.117389251365727, "learning_rate": 1.7876445151138034e-06, "loss": 0.7261, "step": 14988 }, { "epoch": 0.8088608278020614, "grad_norm": 1.1345824698241858, "learning_rate": 1.787213396997462e-06, "loss": 0.4991, "step": 14989 }, { "epoch": 0.8089147914305758, "grad_norm": 1.1242228676654655, "learning_rate": 1.7867823855903592e-06, "loss": 0.4845, "step": 14990 }, { "epoch": 0.8089687550590902, "grad_norm": 0.9204631404080857, "learning_rate": 1.7863514809048827e-06, "loss": 0.4009, "step": 14991 }, { "epoch": 0.8090227186876046, "grad_norm": 1.2561591540542034, "learning_rate": 1.7859206829534175e-06, "loss": 0.5299, "step": 14992 }, { "epoch": 0.809076682316119, "grad_norm": 1.1683708825450252, "learning_rate": 1.7854899917483438e-06, "loss": 0.4639, "step": 14993 }, { "epoch": 0.8091306459446334, "grad_norm": 1.0376969752541425, "learning_rate": 1.785059407302041e-06, "loss": 0.4499, "step": 14994 }, { "epoch": 0.8091846095731478, "grad_norm": 1.0871378881220306, "learning_rate": 1.7846289296268853e-06, "loss": 0.4923, "step": 14995 }, { "epoch": 0.809238573201662, "grad_norm": 0.976759349907928, "learning_rate": 1.7841985587352492e-06, "loss": 0.4123, "step": 14996 }, { "epoch": 0.8092925368301764, "grad_norm": 1.0629330365357155, "learning_rate": 1.7837682946394997e-06, "loss": 0.5319, "step": 14997 }, { "epoch": 0.8093465004586908, "grad_norm": 0.9205182449534607, "learning_rate": 1.7833381373520041e-06, "loss": 0.3385, "step": 14998 }, { "epoch": 0.8094004640872052, "grad_norm": 1.2154714236721322, "learning_rate": 1.7829080868851262e-06, "loss": 0.5878, "step": 14999 }, { "epoch": 0.8094544277157196, "grad_norm": 1.2976292256622846, "learning_rate": 1.7824781432512256e-06, "loss": 0.6482, "step": 15000 }, { "epoch": 0.8094544277157196, "eval_loss": 0.5320729613304138, "eval_runtime": 163.8564, "eval_samples_per_second": 20.988, "eval_steps_per_second": 0.879, "step": 15000 }, { "epoch": 0.809508391344234, "grad_norm": 1.1863441200170959, "learning_rate": 1.7820483064626597e-06, "loss": 0.4728, "step": 15001 }, { "epoch": 0.8095623549727484, "grad_norm": 1.0080234605331135, "learning_rate": 1.7816185765317822e-06, "loss": 0.4781, "step": 15002 }, { "epoch": 0.8096163186012627, "grad_norm": 0.8850413627677761, "learning_rate": 1.7811889534709449e-06, "loss": 0.4378, "step": 15003 }, { "epoch": 0.8096702822297771, "grad_norm": 0.8345954079871095, "learning_rate": 1.780759437292493e-06, "loss": 0.295, "step": 15004 }, { "epoch": 0.8097242458582915, "grad_norm": 1.1139046844990743, "learning_rate": 1.7803300280087738e-06, "loss": 0.5285, "step": 15005 }, { "epoch": 0.8097782094868059, "grad_norm": 1.0593706783806836, "learning_rate": 1.7799007256321276e-06, "loss": 0.4337, "step": 15006 }, { "epoch": 0.8098321731153203, "grad_norm": 0.9804437252946457, "learning_rate": 1.7794715301748936e-06, "loss": 0.4022, "step": 15007 }, { "epoch": 0.8098861367438347, "grad_norm": 1.1375057083100792, "learning_rate": 1.7790424416494074e-06, "loss": 0.5933, "step": 15008 }, { "epoch": 0.8099401003723491, "grad_norm": 1.1636874970256401, "learning_rate": 1.7786134600680001e-06, "loss": 0.5059, "step": 15009 }, { "epoch": 0.8099940640008634, "grad_norm": 1.1135987378545398, "learning_rate": 1.778184585443004e-06, "loss": 0.5402, "step": 15010 }, { "epoch": 0.8100480276293778, "grad_norm": 1.0000052746056893, "learning_rate": 1.7777558177867427e-06, "loss": 0.4939, "step": 15011 }, { "epoch": 0.8101019912578922, "grad_norm": 0.991326883153044, "learning_rate": 1.7773271571115403e-06, "loss": 0.4456, "step": 15012 }, { "epoch": 0.8101559548864066, "grad_norm": 1.05906384632891, "learning_rate": 1.776898603429716e-06, "loss": 0.4838, "step": 15013 }, { "epoch": 0.810209918514921, "grad_norm": 1.002298963206294, "learning_rate": 1.7764701567535885e-06, "loss": 0.3233, "step": 15014 }, { "epoch": 0.8102638821434354, "grad_norm": 1.3738403079161232, "learning_rate": 1.7760418170954708e-06, "loss": 0.6579, "step": 15015 }, { "epoch": 0.8103178457719497, "grad_norm": 1.6598733687139255, "learning_rate": 1.7756135844676752e-06, "loss": 0.422, "step": 15016 }, { "epoch": 0.810371809400464, "grad_norm": 1.1336587353622936, "learning_rate": 1.7751854588825075e-06, "loss": 0.5138, "step": 15017 }, { "epoch": 0.8104257730289784, "grad_norm": 1.0175921252993692, "learning_rate": 1.7747574403522733e-06, "loss": 0.4678, "step": 15018 }, { "epoch": 0.8104797366574928, "grad_norm": 1.1661461083938378, "learning_rate": 1.7743295288892743e-06, "loss": 0.5124, "step": 15019 }, { "epoch": 0.8105337002860072, "grad_norm": 0.860716818273619, "learning_rate": 1.7739017245058089e-06, "loss": 0.3894, "step": 15020 }, { "epoch": 0.8105876639145216, "grad_norm": 0.8168400528370259, "learning_rate": 1.773474027214173e-06, "loss": 0.3716, "step": 15021 }, { "epoch": 0.810641627543036, "grad_norm": 1.1685792593057673, "learning_rate": 1.7730464370266587e-06, "loss": 0.563, "step": 15022 }, { "epoch": 0.8106955911715503, "grad_norm": 1.3472048182129714, "learning_rate": 1.7726189539555572e-06, "loss": 0.6815, "step": 15023 }, { "epoch": 0.8107495548000647, "grad_norm": 1.0736375926752653, "learning_rate": 1.7721915780131522e-06, "loss": 0.5605, "step": 15024 }, { "epoch": 0.8108035184285791, "grad_norm": 1.0211496883191755, "learning_rate": 1.771764309211727e-06, "loss": 0.3775, "step": 15025 }, { "epoch": 0.8108574820570935, "grad_norm": 0.9090755112037462, "learning_rate": 1.7713371475635637e-06, "loss": 0.4407, "step": 15026 }, { "epoch": 0.8109114456856079, "grad_norm": 1.050819920008424, "learning_rate": 1.7709100930809382e-06, "loss": 0.4721, "step": 15027 }, { "epoch": 0.8109654093141223, "grad_norm": 1.0183332833978376, "learning_rate": 1.770483145776124e-06, "loss": 0.5717, "step": 15028 }, { "epoch": 0.8110193729426367, "grad_norm": 1.2949718287047092, "learning_rate": 1.7700563056613928e-06, "loss": 0.5345, "step": 15029 }, { "epoch": 0.811073336571151, "grad_norm": 0.9807674701731001, "learning_rate": 1.7696295727490127e-06, "loss": 0.465, "step": 15030 }, { "epoch": 0.8111273001996654, "grad_norm": 0.7975593296569758, "learning_rate": 1.7692029470512473e-06, "loss": 0.4181, "step": 15031 }, { "epoch": 0.8111812638281798, "grad_norm": 0.9554080022033713, "learning_rate": 1.7687764285803602e-06, "loss": 0.3528, "step": 15032 }, { "epoch": 0.8112352274566942, "grad_norm": 1.0494824058899817, "learning_rate": 1.768350017348608e-06, "loss": 0.5595, "step": 15033 }, { "epoch": 0.8112891910852086, "grad_norm": 1.031737000930814, "learning_rate": 1.7679237133682465e-06, "loss": 0.458, "step": 15034 }, { "epoch": 0.811343154713723, "grad_norm": 0.8947440835027869, "learning_rate": 1.767497516651528e-06, "loss": 0.3653, "step": 15035 }, { "epoch": 0.8113971183422374, "grad_norm": 1.4364257981975208, "learning_rate": 1.767071427210703e-06, "loss": 0.5575, "step": 15036 }, { "epoch": 0.8114510819707517, "grad_norm": 1.11092716619213, "learning_rate": 1.7666454450580166e-06, "loss": 0.5065, "step": 15037 }, { "epoch": 0.8115050455992661, "grad_norm": 1.0225252214628613, "learning_rate": 1.7662195702057127e-06, "loss": 0.4453, "step": 15038 }, { "epoch": 0.8115590092277805, "grad_norm": 0.842351008972849, "learning_rate": 1.7657938026660308e-06, "loss": 0.4553, "step": 15039 }, { "epoch": 0.8116129728562949, "grad_norm": 0.8657925350717867, "learning_rate": 1.7653681424512086e-06, "loss": 0.3252, "step": 15040 }, { "epoch": 0.8116669364848093, "grad_norm": 1.1338157187208147, "learning_rate": 1.764942589573479e-06, "loss": 0.5433, "step": 15041 }, { "epoch": 0.8117209001133237, "grad_norm": 0.9501993780123866, "learning_rate": 1.7645171440450737e-06, "loss": 0.4986, "step": 15042 }, { "epoch": 0.811774863741838, "grad_norm": 0.9946240155934433, "learning_rate": 1.7640918058782196e-06, "loss": 0.4144, "step": 15043 }, { "epoch": 0.8118288273703523, "grad_norm": 1.0747504347797308, "learning_rate": 1.7636665750851435e-06, "loss": 0.4966, "step": 15044 }, { "epoch": 0.8118827909988667, "grad_norm": 1.0718500224286507, "learning_rate": 1.763241451678064e-06, "loss": 0.5328, "step": 15045 }, { "epoch": 0.8119367546273811, "grad_norm": 0.9745615218813367, "learning_rate": 1.7628164356692012e-06, "loss": 0.5111, "step": 15046 }, { "epoch": 0.8119907182558955, "grad_norm": 1.0201795589662783, "learning_rate": 1.7623915270707704e-06, "loss": 0.3848, "step": 15047 }, { "epoch": 0.8120446818844099, "grad_norm": 0.9797688631806711, "learning_rate": 1.761966725894983e-06, "loss": 0.4517, "step": 15048 }, { "epoch": 0.8120986455129243, "grad_norm": 1.220546859588374, "learning_rate": 1.7615420321540495e-06, "loss": 0.6075, "step": 15049 }, { "epoch": 0.8121526091414387, "grad_norm": 0.9286313305979726, "learning_rate": 1.761117445860175e-06, "loss": 0.3389, "step": 15050 }, { "epoch": 0.812206572769953, "grad_norm": 1.0906033186195883, "learning_rate": 1.760692967025564e-06, "loss": 0.4641, "step": 15051 }, { "epoch": 0.8122605363984674, "grad_norm": 0.9281618600735212, "learning_rate": 1.7602685956624143e-06, "loss": 0.4128, "step": 15052 }, { "epoch": 0.8123145000269818, "grad_norm": 0.9228913279007626, "learning_rate": 1.7598443317829238e-06, "loss": 0.3918, "step": 15053 }, { "epoch": 0.8123684636554962, "grad_norm": 0.9726166968848565, "learning_rate": 1.7594201753992867e-06, "loss": 0.467, "step": 15054 }, { "epoch": 0.8124224272840106, "grad_norm": 0.9505287269078845, "learning_rate": 1.7589961265236933e-06, "loss": 0.4632, "step": 15055 }, { "epoch": 0.812476390912525, "grad_norm": 1.017786292321823, "learning_rate": 1.7585721851683315e-06, "loss": 0.4069, "step": 15056 }, { "epoch": 0.8125303545410394, "grad_norm": 1.161272788386706, "learning_rate": 1.758148351345385e-06, "loss": 0.504, "step": 15057 }, { "epoch": 0.8125843181695537, "grad_norm": 1.0816243987592278, "learning_rate": 1.7577246250670371e-06, "loss": 0.3928, "step": 15058 }, { "epoch": 0.8126382817980681, "grad_norm": 1.0700417249672256, "learning_rate": 1.7573010063454638e-06, "loss": 0.5, "step": 15059 }, { "epoch": 0.8126922454265825, "grad_norm": 0.9613288741890027, "learning_rate": 1.7568774951928409e-06, "loss": 0.5336, "step": 15060 }, { "epoch": 0.8127462090550969, "grad_norm": 0.8590534834836154, "learning_rate": 1.7564540916213415e-06, "loss": 0.2727, "step": 15061 }, { "epoch": 0.8128001726836113, "grad_norm": 1.1449316477622389, "learning_rate": 1.756030795643134e-06, "loss": 0.5823, "step": 15062 }, { "epoch": 0.8128541363121257, "grad_norm": 0.8688042572141161, "learning_rate": 1.7556076072703836e-06, "loss": 0.3848, "step": 15063 }, { "epoch": 0.8129080999406401, "grad_norm": 1.1398900047288822, "learning_rate": 1.7551845265152551e-06, "loss": 0.5134, "step": 15064 }, { "epoch": 0.8129620635691543, "grad_norm": 1.2288621092842575, "learning_rate": 1.7547615533899076e-06, "loss": 0.4351, "step": 15065 }, { "epoch": 0.8130160271976687, "grad_norm": 1.1285164601993736, "learning_rate": 1.7543386879064967e-06, "loss": 0.4712, "step": 15066 }, { "epoch": 0.8130699908261831, "grad_norm": 0.8491767051728157, "learning_rate": 1.753915930077176e-06, "loss": 0.3256, "step": 15067 }, { "epoch": 0.8131239544546975, "grad_norm": 0.867821730795633, "learning_rate": 1.7534932799140975e-06, "loss": 0.3285, "step": 15068 }, { "epoch": 0.8131779180832119, "grad_norm": 0.8942739222155119, "learning_rate": 1.7530707374294068e-06, "loss": 0.5627, "step": 15069 }, { "epoch": 0.8132318817117263, "grad_norm": 1.142258686153707, "learning_rate": 1.7526483026352498e-06, "loss": 0.5152, "step": 15070 }, { "epoch": 0.8132858453402407, "grad_norm": 1.2945810026361777, "learning_rate": 1.7522259755437666e-06, "loss": 0.5458, "step": 15071 }, { "epoch": 0.813339808968755, "grad_norm": 1.177462380169229, "learning_rate": 1.7518037561670967e-06, "loss": 0.4323, "step": 15072 }, { "epoch": 0.8133937725972694, "grad_norm": 1.032273033352269, "learning_rate": 1.7513816445173734e-06, "loss": 0.5061, "step": 15073 }, { "epoch": 0.8134477362257838, "grad_norm": 1.1759257729250066, "learning_rate": 1.7509596406067286e-06, "loss": 0.4707, "step": 15074 }, { "epoch": 0.8135016998542982, "grad_norm": 1.0687775208740045, "learning_rate": 1.7505377444472926e-06, "loss": 0.427, "step": 15075 }, { "epoch": 0.8135556634828126, "grad_norm": 0.9950248208213496, "learning_rate": 1.7501159560511905e-06, "loss": 0.5003, "step": 15076 }, { "epoch": 0.813609627111327, "grad_norm": 1.0381643494009056, "learning_rate": 1.7496942754305441e-06, "loss": 0.5011, "step": 15077 }, { "epoch": 0.8136635907398413, "grad_norm": 0.9622982546521305, "learning_rate": 1.749272702597475e-06, "loss": 0.4457, "step": 15078 }, { "epoch": 0.8137175543683557, "grad_norm": 0.9257103328895527, "learning_rate": 1.7488512375640976e-06, "loss": 0.3438, "step": 15079 }, { "epoch": 0.8137715179968701, "grad_norm": 1.0628779641462953, "learning_rate": 1.7484298803425253e-06, "loss": 0.4762, "step": 15080 }, { "epoch": 0.8138254816253845, "grad_norm": 1.0903607370443784, "learning_rate": 1.748008630944869e-06, "loss": 0.5062, "step": 15081 }, { "epoch": 0.8138794452538989, "grad_norm": 1.0372253890873118, "learning_rate": 1.7475874893832363e-06, "loss": 0.4499, "step": 15082 }, { "epoch": 0.8139334088824133, "grad_norm": 1.0732225787618093, "learning_rate": 1.7471664556697305e-06, "loss": 0.3982, "step": 15083 }, { "epoch": 0.8139873725109277, "grad_norm": 0.9649086873186808, "learning_rate": 1.7467455298164525e-06, "loss": 0.451, "step": 15084 }, { "epoch": 0.814041336139442, "grad_norm": 1.0065573090965891, "learning_rate": 1.7463247118355009e-06, "loss": 0.4062, "step": 15085 }, { "epoch": 0.8140952997679564, "grad_norm": 0.9683149991183063, "learning_rate": 1.745904001738969e-06, "loss": 0.4351, "step": 15086 }, { "epoch": 0.8141492633964708, "grad_norm": 0.9664827107852813, "learning_rate": 1.7454833995389498e-06, "loss": 0.358, "step": 15087 }, { "epoch": 0.8142032270249852, "grad_norm": 1.0322280720681658, "learning_rate": 1.7450629052475312e-06, "loss": 0.5193, "step": 15088 }, { "epoch": 0.8142571906534996, "grad_norm": 0.905768647720355, "learning_rate": 1.744642518876799e-06, "loss": 0.3446, "step": 15089 }, { "epoch": 0.814311154282014, "grad_norm": 1.0122542823513263, "learning_rate": 1.744222240438835e-06, "loss": 0.3496, "step": 15090 }, { "epoch": 0.8143651179105283, "grad_norm": 0.9526966313163798, "learning_rate": 1.7438020699457192e-06, "loss": 0.4415, "step": 15091 }, { "epoch": 0.8144190815390426, "grad_norm": 1.1722786517777881, "learning_rate": 1.7433820074095281e-06, "loss": 0.5076, "step": 15092 }, { "epoch": 0.814473045167557, "grad_norm": 0.8690401099128786, "learning_rate": 1.742962052842333e-06, "loss": 0.252, "step": 15093 }, { "epoch": 0.8145270087960714, "grad_norm": 0.9669428287104388, "learning_rate": 1.7425422062562042e-06, "loss": 0.48, "step": 15094 }, { "epoch": 0.8145809724245858, "grad_norm": 1.3425039882915293, "learning_rate": 1.7421224676632097e-06, "loss": 0.4968, "step": 15095 }, { "epoch": 0.8146349360531002, "grad_norm": 1.051796953321871, "learning_rate": 1.7417028370754124e-06, "loss": 0.5064, "step": 15096 }, { "epoch": 0.8146888996816146, "grad_norm": 1.111424443600791, "learning_rate": 1.7412833145048725e-06, "loss": 0.4134, "step": 15097 }, { "epoch": 0.814742863310129, "grad_norm": 1.4429364988121711, "learning_rate": 1.7408638999636481e-06, "loss": 0.5257, "step": 15098 }, { "epoch": 0.8147968269386433, "grad_norm": 0.8955639375008229, "learning_rate": 1.7404445934637954e-06, "loss": 0.3548, "step": 15099 }, { "epoch": 0.8148507905671577, "grad_norm": 1.1712014707507226, "learning_rate": 1.7400253950173618e-06, "loss": 0.5068, "step": 15100 }, { "epoch": 0.8149047541956721, "grad_norm": 1.110871267303176, "learning_rate": 1.7396063046363981e-06, "loss": 0.4628, "step": 15101 }, { "epoch": 0.8149587178241865, "grad_norm": 1.1708196856809225, "learning_rate": 1.7391873223329491e-06, "loss": 0.3986, "step": 15102 }, { "epoch": 0.8150126814527009, "grad_norm": 1.1820238998553476, "learning_rate": 1.7387684481190559e-06, "loss": 0.6195, "step": 15103 }, { "epoch": 0.8150666450812153, "grad_norm": 1.0028754147914025, "learning_rate": 1.738349682006758e-06, "loss": 0.386, "step": 15104 }, { "epoch": 0.8151206087097297, "grad_norm": 0.9976206099272383, "learning_rate": 1.7379310240080913e-06, "loss": 0.4719, "step": 15105 }, { "epoch": 0.815174572338244, "grad_norm": 0.8360013072997218, "learning_rate": 1.7375124741350897e-06, "loss": 0.3464, "step": 15106 }, { "epoch": 0.8152285359667584, "grad_norm": 1.0523357493428456, "learning_rate": 1.7370940323997794e-06, "loss": 0.4834, "step": 15107 }, { "epoch": 0.8152824995952728, "grad_norm": 0.8712314369171777, "learning_rate": 1.7366756988141892e-06, "loss": 0.3176, "step": 15108 }, { "epoch": 0.8153364632237872, "grad_norm": 1.1199600100351657, "learning_rate": 1.736257473390342e-06, "loss": 0.4858, "step": 15109 }, { "epoch": 0.8153904268523016, "grad_norm": 1.0674403693648564, "learning_rate": 1.735839356140258e-06, "loss": 0.4886, "step": 15110 }, { "epoch": 0.815444390480816, "grad_norm": 1.0901726533090381, "learning_rate": 1.7354213470759543e-06, "loss": 0.5467, "step": 15111 }, { "epoch": 0.8154983541093304, "grad_norm": 1.0339069268329082, "learning_rate": 1.7350034462094451e-06, "loss": 0.4451, "step": 15112 }, { "epoch": 0.8155523177378446, "grad_norm": 1.0057614378727346, "learning_rate": 1.7345856535527413e-06, "loss": 0.4198, "step": 15113 }, { "epoch": 0.815606281366359, "grad_norm": 1.0159768008715466, "learning_rate": 1.7341679691178502e-06, "loss": 0.5239, "step": 15114 }, { "epoch": 0.8156602449948734, "grad_norm": 1.0455589995478431, "learning_rate": 1.7337503929167764e-06, "loss": 0.4742, "step": 15115 }, { "epoch": 0.8157142086233878, "grad_norm": 1.0614386793007953, "learning_rate": 1.7333329249615228e-06, "loss": 0.4726, "step": 15116 }, { "epoch": 0.8157681722519022, "grad_norm": 1.0767425261300112, "learning_rate": 1.732915565264086e-06, "loss": 0.4747, "step": 15117 }, { "epoch": 0.8158221358804166, "grad_norm": 1.1241505074808311, "learning_rate": 1.732498313836462e-06, "loss": 0.6552, "step": 15118 }, { "epoch": 0.815876099508931, "grad_norm": 0.9334109126824673, "learning_rate": 1.732081170690644e-06, "loss": 0.3467, "step": 15119 }, { "epoch": 0.8159300631374453, "grad_norm": 1.244613518243582, "learning_rate": 1.731664135838621e-06, "loss": 0.6001, "step": 15120 }, { "epoch": 0.8159840267659597, "grad_norm": 1.2344020015749921, "learning_rate": 1.731247209292377e-06, "loss": 0.6271, "step": 15121 }, { "epoch": 0.8160379903944741, "grad_norm": 1.1818382982229965, "learning_rate": 1.7308303910638968e-06, "loss": 0.4584, "step": 15122 }, { "epoch": 0.8160919540229885, "grad_norm": 0.921933653289056, "learning_rate": 1.7304136811651595e-06, "loss": 0.4343, "step": 15123 }, { "epoch": 0.8161459176515029, "grad_norm": 1.1762653204705922, "learning_rate": 1.7299970796081418e-06, "loss": 0.5568, "step": 15124 }, { "epoch": 0.8161998812800173, "grad_norm": 0.9152830710848486, "learning_rate": 1.7295805864048172e-06, "loss": 0.3551, "step": 15125 }, { "epoch": 0.8162538449085317, "grad_norm": 0.8106504220108, "learning_rate": 1.729164201567156e-06, "loss": 0.2962, "step": 15126 }, { "epoch": 0.816307808537046, "grad_norm": 0.9651932021039273, "learning_rate": 1.7287479251071276e-06, "loss": 0.3988, "step": 15127 }, { "epoch": 0.8163617721655604, "grad_norm": 1.0344336122073787, "learning_rate": 1.7283317570366932e-06, "loss": 0.5532, "step": 15128 }, { "epoch": 0.8164157357940748, "grad_norm": 0.9841066214194347, "learning_rate": 1.7279156973678146e-06, "loss": 0.3836, "step": 15129 }, { "epoch": 0.8164696994225892, "grad_norm": 1.1979796659556876, "learning_rate": 1.7274997461124505e-06, "loss": 0.5245, "step": 15130 }, { "epoch": 0.8165236630511036, "grad_norm": 1.2160194836749223, "learning_rate": 1.727083903282556e-06, "loss": 0.5113, "step": 15131 }, { "epoch": 0.816577626679618, "grad_norm": 1.0834107492549452, "learning_rate": 1.7266681688900818e-06, "loss": 0.4522, "step": 15132 }, { "epoch": 0.8166315903081324, "grad_norm": 1.2704657925601976, "learning_rate": 1.7262525429469772e-06, "loss": 0.6654, "step": 15133 }, { "epoch": 0.8166855539366467, "grad_norm": 0.8886308246887907, "learning_rate": 1.7258370254651878e-06, "loss": 0.346, "step": 15134 }, { "epoch": 0.816739517565161, "grad_norm": 1.2057503006807524, "learning_rate": 1.7254216164566557e-06, "loss": 0.546, "step": 15135 }, { "epoch": 0.8167934811936755, "grad_norm": 0.9872574723305986, "learning_rate": 1.72500631593332e-06, "loss": 0.4473, "step": 15136 }, { "epoch": 0.8168474448221898, "grad_norm": 0.9550430535742108, "learning_rate": 1.7245911239071173e-06, "loss": 0.4153, "step": 15137 }, { "epoch": 0.8169014084507042, "grad_norm": 1.0637829910401888, "learning_rate": 1.724176040389982e-06, "loss": 0.4444, "step": 15138 }, { "epoch": 0.8169553720792186, "grad_norm": 0.9615300470904677, "learning_rate": 1.723761065393841e-06, "loss": 0.4073, "step": 15139 }, { "epoch": 0.817009335707733, "grad_norm": 0.8760107586559983, "learning_rate": 1.7233461989306228e-06, "loss": 0.3442, "step": 15140 }, { "epoch": 0.8170632993362473, "grad_norm": 1.0898336738932737, "learning_rate": 1.7229314410122505e-06, "loss": 0.3841, "step": 15141 }, { "epoch": 0.8171172629647617, "grad_norm": 0.9843084497830475, "learning_rate": 1.7225167916506452e-06, "loss": 0.501, "step": 15142 }, { "epoch": 0.8171712265932761, "grad_norm": 1.2199929395560487, "learning_rate": 1.7221022508577245e-06, "loss": 0.4557, "step": 15143 }, { "epoch": 0.8172251902217905, "grad_norm": 1.0547251240118944, "learning_rate": 1.7216878186454019e-06, "loss": 0.446, "step": 15144 }, { "epoch": 0.8172791538503049, "grad_norm": 0.9961280188763848, "learning_rate": 1.7212734950255891e-06, "loss": 0.4058, "step": 15145 }, { "epoch": 0.8173331174788193, "grad_norm": 1.1509943920364634, "learning_rate": 1.7208592800101947e-06, "loss": 0.5363, "step": 15146 }, { "epoch": 0.8173870811073336, "grad_norm": 1.0177544506846612, "learning_rate": 1.7204451736111238e-06, "loss": 0.3991, "step": 15147 }, { "epoch": 0.817441044735848, "grad_norm": 0.844767114487663, "learning_rate": 1.7200311758402765e-06, "loss": 0.401, "step": 15148 }, { "epoch": 0.8174950083643624, "grad_norm": 0.9407685433048384, "learning_rate": 1.7196172867095528e-06, "loss": 0.4434, "step": 15149 }, { "epoch": 0.8175489719928768, "grad_norm": 1.065244587996238, "learning_rate": 1.7192035062308476e-06, "loss": 0.4303, "step": 15150 }, { "epoch": 0.8176029356213912, "grad_norm": 0.9311825906199078, "learning_rate": 1.7187898344160547e-06, "loss": 0.374, "step": 15151 }, { "epoch": 0.8176568992499056, "grad_norm": 1.149769920024768, "learning_rate": 1.7183762712770619e-06, "loss": 0.484, "step": 15152 }, { "epoch": 0.81771086287842, "grad_norm": 1.1495573386857607, "learning_rate": 1.7179628168257567e-06, "loss": 0.3943, "step": 15153 }, { "epoch": 0.8177648265069343, "grad_norm": 1.0451360011956505, "learning_rate": 1.7175494710740223e-06, "loss": 0.4849, "step": 15154 }, { "epoch": 0.8178187901354487, "grad_norm": 1.057213380359949, "learning_rate": 1.7171362340337367e-06, "loss": 0.6289, "step": 15155 }, { "epoch": 0.8178727537639631, "grad_norm": 0.9364574570580809, "learning_rate": 1.7167231057167782e-06, "loss": 0.4733, "step": 15156 }, { "epoch": 0.8179267173924775, "grad_norm": 1.2795891799871753, "learning_rate": 1.7163100861350208e-06, "loss": 0.5759, "step": 15157 }, { "epoch": 0.8179806810209919, "grad_norm": 1.1747363932518176, "learning_rate": 1.715897175300334e-06, "loss": 0.5677, "step": 15158 }, { "epoch": 0.8180346446495063, "grad_norm": 0.8931459010240307, "learning_rate": 1.7154843732245862e-06, "loss": 0.4422, "step": 15159 }, { "epoch": 0.8180886082780207, "grad_norm": 1.0055573861401688, "learning_rate": 1.7150716799196414e-06, "loss": 0.5087, "step": 15160 }, { "epoch": 0.8181425719065349, "grad_norm": 1.2377504555005403, "learning_rate": 1.7146590953973625e-06, "loss": 0.5498, "step": 15161 }, { "epoch": 0.8181965355350493, "grad_norm": 1.361495944776977, "learning_rate": 1.7142466196696044e-06, "loss": 0.651, "step": 15162 }, { "epoch": 0.8182504991635637, "grad_norm": 1.1331645703832274, "learning_rate": 1.7138342527482238e-06, "loss": 0.4738, "step": 15163 }, { "epoch": 0.8183044627920781, "grad_norm": 1.0351599573079375, "learning_rate": 1.7134219946450725e-06, "loss": 0.3814, "step": 15164 }, { "epoch": 0.8183584264205925, "grad_norm": 1.360444761976536, "learning_rate": 1.713009845371999e-06, "loss": 0.5723, "step": 15165 }, { "epoch": 0.8184123900491069, "grad_norm": 1.102710551027252, "learning_rate": 1.7125978049408486e-06, "loss": 0.4076, "step": 15166 }, { "epoch": 0.8184663536776213, "grad_norm": 0.9755193654078085, "learning_rate": 1.712185873363465e-06, "loss": 0.4893, "step": 15167 }, { "epoch": 0.8185203173061356, "grad_norm": 1.0520917216630519, "learning_rate": 1.7117740506516874e-06, "loss": 0.4777, "step": 15168 }, { "epoch": 0.81857428093465, "grad_norm": 0.997824178128833, "learning_rate": 1.7113623368173504e-06, "loss": 0.3743, "step": 15169 }, { "epoch": 0.8186282445631644, "grad_norm": 0.9263484601998064, "learning_rate": 1.710950731872288e-06, "loss": 0.4078, "step": 15170 }, { "epoch": 0.8186822081916788, "grad_norm": 1.120297231608471, "learning_rate": 1.71053923582833e-06, "loss": 0.5405, "step": 15171 }, { "epoch": 0.8187361718201932, "grad_norm": 1.0129089694326077, "learning_rate": 1.7101278486973033e-06, "loss": 0.4254, "step": 15172 }, { "epoch": 0.8187901354487076, "grad_norm": 0.9514323351833918, "learning_rate": 1.7097165704910319e-06, "loss": 0.3767, "step": 15173 }, { "epoch": 0.818844099077222, "grad_norm": 0.9661659430917452, "learning_rate": 1.7093054012213355e-06, "loss": 0.5089, "step": 15174 }, { "epoch": 0.8188980627057363, "grad_norm": 0.9268128258056492, "learning_rate": 1.7088943409000336e-06, "loss": 0.3478, "step": 15175 }, { "epoch": 0.8189520263342507, "grad_norm": 1.0937691238798186, "learning_rate": 1.7084833895389374e-06, "loss": 0.5081, "step": 15176 }, { "epoch": 0.8190059899627651, "grad_norm": 1.0125312335778345, "learning_rate": 1.7080725471498603e-06, "loss": 0.3877, "step": 15177 }, { "epoch": 0.8190599535912795, "grad_norm": 0.8845595572174, "learning_rate": 1.707661813744609e-06, "loss": 0.4497, "step": 15178 }, { "epoch": 0.8191139172197939, "grad_norm": 1.0211368833174785, "learning_rate": 1.707251189334989e-06, "loss": 0.468, "step": 15179 }, { "epoch": 0.8191678808483083, "grad_norm": 0.9534858323104747, "learning_rate": 1.706840673932803e-06, "loss": 0.3681, "step": 15180 }, { "epoch": 0.8192218444768227, "grad_norm": 0.9160056799040834, "learning_rate": 1.7064302675498481e-06, "loss": 0.4413, "step": 15181 }, { "epoch": 0.819275808105337, "grad_norm": 1.0341806283417736, "learning_rate": 1.7060199701979218e-06, "loss": 0.5375, "step": 15182 }, { "epoch": 0.8193297717338514, "grad_norm": 1.0673690645939127, "learning_rate": 1.705609781888814e-06, "loss": 0.4299, "step": 15183 }, { "epoch": 0.8193837353623658, "grad_norm": 1.1681129524617253, "learning_rate": 1.7051997026343148e-06, "loss": 0.5788, "step": 15184 }, { "epoch": 0.8194376989908801, "grad_norm": 0.8467781199960405, "learning_rate": 1.7047897324462104e-06, "loss": 0.3111, "step": 15185 }, { "epoch": 0.8194916626193945, "grad_norm": 1.0322381942757666, "learning_rate": 1.7043798713362842e-06, "loss": 0.5266, "step": 15186 }, { "epoch": 0.8195456262479089, "grad_norm": 1.33939831830161, "learning_rate": 1.7039701193163156e-06, "loss": 0.5874, "step": 15187 }, { "epoch": 0.8195995898764233, "grad_norm": 1.1074320809595735, "learning_rate": 1.7035604763980812e-06, "loss": 0.5073, "step": 15188 }, { "epoch": 0.8196535535049376, "grad_norm": 1.1102030340292126, "learning_rate": 1.7031509425933546e-06, "loss": 0.457, "step": 15189 }, { "epoch": 0.819707517133452, "grad_norm": 0.9555393779954622, "learning_rate": 1.7027415179139068e-06, "loss": 0.4502, "step": 15190 }, { "epoch": 0.8197614807619664, "grad_norm": 1.1164453235548275, "learning_rate": 1.7023322023715048e-06, "loss": 0.4891, "step": 15191 }, { "epoch": 0.8198154443904808, "grad_norm": 1.072119526866778, "learning_rate": 1.701922995977912e-06, "loss": 0.4004, "step": 15192 }, { "epoch": 0.8198694080189952, "grad_norm": 0.96430374649677, "learning_rate": 1.70151389874489e-06, "loss": 0.4469, "step": 15193 }, { "epoch": 0.8199233716475096, "grad_norm": 0.9580832223783925, "learning_rate": 1.701104910684197e-06, "loss": 0.6075, "step": 15194 }, { "epoch": 0.819977335276024, "grad_norm": 1.0872272543927284, "learning_rate": 1.7006960318075872e-06, "loss": 0.4832, "step": 15195 }, { "epoch": 0.8200312989045383, "grad_norm": 1.1923898264720125, "learning_rate": 1.7002872621268135e-06, "loss": 0.3911, "step": 15196 }, { "epoch": 0.8200852625330527, "grad_norm": 1.0283255162914302, "learning_rate": 1.6998786016536223e-06, "loss": 0.3966, "step": 15197 }, { "epoch": 0.8201392261615671, "grad_norm": 1.1185873154251138, "learning_rate": 1.699470050399759e-06, "loss": 0.497, "step": 15198 }, { "epoch": 0.8201931897900815, "grad_norm": 1.030085881861573, "learning_rate": 1.6990616083769678e-06, "loss": 0.4303, "step": 15199 }, { "epoch": 0.8202471534185959, "grad_norm": 0.8852793080058973, "learning_rate": 1.6986532755969854e-06, "loss": 0.3803, "step": 15200 }, { "epoch": 0.8203011170471103, "grad_norm": 0.9893078256397938, "learning_rate": 1.69824505207155e-06, "loss": 0.3654, "step": 15201 }, { "epoch": 0.8203550806756247, "grad_norm": 0.9727138521177023, "learning_rate": 1.6978369378123922e-06, "loss": 0.3476, "step": 15202 }, { "epoch": 0.820409044304139, "grad_norm": 0.7612902500712936, "learning_rate": 1.697428932831244e-06, "loss": 0.3039, "step": 15203 }, { "epoch": 0.8204630079326534, "grad_norm": 0.9597916191809853, "learning_rate": 1.6970210371398299e-06, "loss": 0.4491, "step": 15204 }, { "epoch": 0.8205169715611678, "grad_norm": 0.9444731911654156, "learning_rate": 1.6966132507498734e-06, "loss": 0.395, "step": 15205 }, { "epoch": 0.8205709351896822, "grad_norm": 0.7937933132303412, "learning_rate": 1.6962055736730949e-06, "loss": 0.3291, "step": 15206 }, { "epoch": 0.8206248988181966, "grad_norm": 1.0115446334117435, "learning_rate": 1.6957980059212114e-06, "loss": 0.6179, "step": 15207 }, { "epoch": 0.820678862446711, "grad_norm": 1.0002018211489654, "learning_rate": 1.6953905475059379e-06, "loss": 0.4277, "step": 15208 }, { "epoch": 0.8207328260752254, "grad_norm": 1.1708166516554661, "learning_rate": 1.6949831984389843e-06, "loss": 0.5028, "step": 15209 }, { "epoch": 0.8207867897037396, "grad_norm": 1.0714092071168877, "learning_rate": 1.6945759587320587e-06, "loss": 0.5435, "step": 15210 }, { "epoch": 0.820840753332254, "grad_norm": 0.9961441919936915, "learning_rate": 1.6941688283968644e-06, "loss": 0.3604, "step": 15211 }, { "epoch": 0.8208947169607684, "grad_norm": 1.0255774522573817, "learning_rate": 1.6937618074451034e-06, "loss": 0.5432, "step": 15212 }, { "epoch": 0.8209486805892828, "grad_norm": 1.0831078464151398, "learning_rate": 1.6933548958884744e-06, "loss": 0.3171, "step": 15213 }, { "epoch": 0.8210026442177972, "grad_norm": 1.0068235602847235, "learning_rate": 1.6929480937386716e-06, "loss": 0.4286, "step": 15214 }, { "epoch": 0.8210566078463116, "grad_norm": 1.0575133589707755, "learning_rate": 1.6925414010073876e-06, "loss": 0.4495, "step": 15215 }, { "epoch": 0.8211105714748259, "grad_norm": 0.7346566803368823, "learning_rate": 1.6921348177063116e-06, "loss": 0.2661, "step": 15216 }, { "epoch": 0.8211645351033403, "grad_norm": 1.2708810179683256, "learning_rate": 1.6917283438471276e-06, "loss": 0.5395, "step": 15217 }, { "epoch": 0.8212184987318547, "grad_norm": 1.0041273748397244, "learning_rate": 1.6913219794415191e-06, "loss": 0.4293, "step": 15218 }, { "epoch": 0.8212724623603691, "grad_norm": 1.1481636173389942, "learning_rate": 1.6909157245011653e-06, "loss": 0.5606, "step": 15219 }, { "epoch": 0.8213264259888835, "grad_norm": 1.0486392581911248, "learning_rate": 1.6905095790377424e-06, "loss": 0.5641, "step": 15220 }, { "epoch": 0.8213803896173979, "grad_norm": 0.9368245880370383, "learning_rate": 1.6901035430629229e-06, "loss": 0.41, "step": 15221 }, { "epoch": 0.8214343532459123, "grad_norm": 1.1266425989379656, "learning_rate": 1.6896976165883778e-06, "loss": 0.5366, "step": 15222 }, { "epoch": 0.8214883168744266, "grad_norm": 0.9727424893779304, "learning_rate": 1.689291799625774e-06, "loss": 0.3264, "step": 15223 }, { "epoch": 0.821542280502941, "grad_norm": 1.040179566534727, "learning_rate": 1.6888860921867733e-06, "loss": 0.4803, "step": 15224 }, { "epoch": 0.8215962441314554, "grad_norm": 1.1701016359833818, "learning_rate": 1.6884804942830373e-06, "loss": 0.4633, "step": 15225 }, { "epoch": 0.8216502077599698, "grad_norm": 0.7011096013236991, "learning_rate": 1.688075005926223e-06, "loss": 0.2525, "step": 15226 }, { "epoch": 0.8217041713884842, "grad_norm": 0.9905039077018664, "learning_rate": 1.6876696271279846e-06, "loss": 0.5482, "step": 15227 }, { "epoch": 0.8217581350169986, "grad_norm": 1.0803946995998157, "learning_rate": 1.6872643578999737e-06, "loss": 0.4277, "step": 15228 }, { "epoch": 0.821812098645513, "grad_norm": 1.0400495994480834, "learning_rate": 1.686859198253837e-06, "loss": 0.5455, "step": 15229 }, { "epoch": 0.8218660622740273, "grad_norm": 1.010128671237372, "learning_rate": 1.6864541482012215e-06, "loss": 0.4466, "step": 15230 }, { "epoch": 0.8219200259025417, "grad_norm": 0.9575118245129701, "learning_rate": 1.6860492077537654e-06, "loss": 0.363, "step": 15231 }, { "epoch": 0.821973989531056, "grad_norm": 1.0234621837132485, "learning_rate": 1.685644376923109e-06, "loss": 0.4032, "step": 15232 }, { "epoch": 0.8220279531595704, "grad_norm": 1.2004785287443844, "learning_rate": 1.6852396557208879e-06, "loss": 0.5484, "step": 15233 }, { "epoch": 0.8220819167880848, "grad_norm": 1.078887260609283, "learning_rate": 1.684835044158733e-06, "loss": 0.4227, "step": 15234 }, { "epoch": 0.8221358804165992, "grad_norm": 1.4590680628441808, "learning_rate": 1.6844305422482738e-06, "loss": 0.691, "step": 15235 }, { "epoch": 0.8221898440451136, "grad_norm": 1.2480920491415, "learning_rate": 1.6840261500011362e-06, "loss": 0.4859, "step": 15236 }, { "epoch": 0.8222438076736279, "grad_norm": 1.0346299656002698, "learning_rate": 1.6836218674289435e-06, "loss": 0.4868, "step": 15237 }, { "epoch": 0.8222977713021423, "grad_norm": 1.0065199045088573, "learning_rate": 1.6832176945433137e-06, "loss": 0.3191, "step": 15238 }, { "epoch": 0.8223517349306567, "grad_norm": 0.9489358030933921, "learning_rate": 1.6828136313558646e-06, "loss": 0.4306, "step": 15239 }, { "epoch": 0.8224056985591711, "grad_norm": 0.792785621453142, "learning_rate": 1.6824096778782082e-06, "loss": 0.2892, "step": 15240 }, { "epoch": 0.8224596621876855, "grad_norm": 0.9661409010971205, "learning_rate": 1.6820058341219553e-06, "loss": 0.4272, "step": 15241 }, { "epoch": 0.8225136258161999, "grad_norm": 0.8564701058663273, "learning_rate": 1.681602100098712e-06, "loss": 0.4342, "step": 15242 }, { "epoch": 0.8225675894447143, "grad_norm": 1.022451030645694, "learning_rate": 1.6811984758200833e-06, "loss": 0.4564, "step": 15243 }, { "epoch": 0.8226215530732286, "grad_norm": 1.049153988759497, "learning_rate": 1.68079496129767e-06, "loss": 0.4529, "step": 15244 }, { "epoch": 0.822675516701743, "grad_norm": 1.1292566559492871, "learning_rate": 1.6803915565430672e-06, "loss": 0.5754, "step": 15245 }, { "epoch": 0.8227294803302574, "grad_norm": 0.9079465948219066, "learning_rate": 1.6799882615678703e-06, "loss": 0.4485, "step": 15246 }, { "epoch": 0.8227834439587718, "grad_norm": 1.0155934404026505, "learning_rate": 1.6795850763836708e-06, "loss": 0.5628, "step": 15247 }, { "epoch": 0.8228374075872862, "grad_norm": 1.1764315046248373, "learning_rate": 1.6791820010020568e-06, "loss": 0.5022, "step": 15248 }, { "epoch": 0.8228913712158006, "grad_norm": 1.100738548215173, "learning_rate": 1.678779035434612e-06, "loss": 0.5808, "step": 15249 }, { "epoch": 0.822945334844315, "grad_norm": 0.9849390506516633, "learning_rate": 1.6783761796929193e-06, "loss": 0.4148, "step": 15250 }, { "epoch": 0.8229992984728293, "grad_norm": 0.8745954548936484, "learning_rate": 1.6779734337885578e-06, "loss": 0.4044, "step": 15251 }, { "epoch": 0.8230532621013437, "grad_norm": 0.736396353759655, "learning_rate": 1.6775707977331008e-06, "loss": 0.2608, "step": 15252 }, { "epoch": 0.8231072257298581, "grad_norm": 0.920702597703036, "learning_rate": 1.677168271538121e-06, "loss": 0.4324, "step": 15253 }, { "epoch": 0.8231611893583725, "grad_norm": 1.0488014199500135, "learning_rate": 1.6767658552151878e-06, "loss": 0.4942, "step": 15254 }, { "epoch": 0.8232151529868869, "grad_norm": 0.9593326466663169, "learning_rate": 1.6763635487758668e-06, "loss": 0.4176, "step": 15255 }, { "epoch": 0.8232691166154013, "grad_norm": 1.0052775996662797, "learning_rate": 1.6759613522317214e-06, "loss": 0.3579, "step": 15256 }, { "epoch": 0.8233230802439157, "grad_norm": 0.8962863776418115, "learning_rate": 1.6755592655943103e-06, "loss": 0.3879, "step": 15257 }, { "epoch": 0.8233770438724299, "grad_norm": 1.158062090981049, "learning_rate": 1.675157288875191e-06, "loss": 0.5486, "step": 15258 }, { "epoch": 0.8234310075009443, "grad_norm": 0.8762405266793526, "learning_rate": 1.6747554220859153e-06, "loss": 0.3529, "step": 15259 }, { "epoch": 0.8234849711294587, "grad_norm": 1.1217545633979755, "learning_rate": 1.6743536652380344e-06, "loss": 0.4672, "step": 15260 }, { "epoch": 0.8235389347579731, "grad_norm": 0.9562218278969031, "learning_rate": 1.6739520183430934e-06, "loss": 0.4804, "step": 15261 }, { "epoch": 0.8235928983864875, "grad_norm": 1.1282345637610869, "learning_rate": 1.6735504814126385e-06, "loss": 0.482, "step": 15262 }, { "epoch": 0.8236468620150019, "grad_norm": 1.0658878244195737, "learning_rate": 1.6731490544582086e-06, "loss": 0.4978, "step": 15263 }, { "epoch": 0.8237008256435163, "grad_norm": 1.0209844104943446, "learning_rate": 1.6727477374913415e-06, "loss": 0.6033, "step": 15264 }, { "epoch": 0.8237547892720306, "grad_norm": 1.0675909764184786, "learning_rate": 1.6723465305235727e-06, "loss": 0.4, "step": 15265 }, { "epoch": 0.823808752900545, "grad_norm": 1.0741853297900583, "learning_rate": 1.6719454335664315e-06, "loss": 0.4067, "step": 15266 }, { "epoch": 0.8238627165290594, "grad_norm": 0.9782178042780708, "learning_rate": 1.6715444466314462e-06, "loss": 0.4295, "step": 15267 }, { "epoch": 0.8239166801575738, "grad_norm": 1.0612408994113318, "learning_rate": 1.671143569730142e-06, "loss": 0.3816, "step": 15268 }, { "epoch": 0.8239706437860882, "grad_norm": 1.1400180863833498, "learning_rate": 1.6707428028740402e-06, "loss": 0.64, "step": 15269 }, { "epoch": 0.8240246074146026, "grad_norm": 0.919419911835329, "learning_rate": 1.67034214607466e-06, "loss": 0.3435, "step": 15270 }, { "epoch": 0.824078571043117, "grad_norm": 1.054872539270685, "learning_rate": 1.6699415993435154e-06, "loss": 0.4764, "step": 15271 }, { "epoch": 0.8241325346716313, "grad_norm": 1.0658727204629213, "learning_rate": 1.6695411626921205e-06, "loss": 0.3804, "step": 15272 }, { "epoch": 0.8241864983001457, "grad_norm": 0.9353168687161953, "learning_rate": 1.6691408361319823e-06, "loss": 0.3439, "step": 15273 }, { "epoch": 0.8242404619286601, "grad_norm": 1.3723189725860312, "learning_rate": 1.6687406196746063e-06, "loss": 0.5516, "step": 15274 }, { "epoch": 0.8242944255571745, "grad_norm": 0.9791749248950546, "learning_rate": 1.668340513331497e-06, "loss": 0.4388, "step": 15275 }, { "epoch": 0.8243483891856889, "grad_norm": 1.134389714060165, "learning_rate": 1.6679405171141528e-06, "loss": 0.6094, "step": 15276 }, { "epoch": 0.8244023528142033, "grad_norm": 1.2409679470973485, "learning_rate": 1.6675406310340698e-06, "loss": 0.4468, "step": 15277 }, { "epoch": 0.8244563164427177, "grad_norm": 0.877506017831225, "learning_rate": 1.6671408551027419e-06, "loss": 0.396, "step": 15278 }, { "epoch": 0.824510280071232, "grad_norm": 0.8884766588142108, "learning_rate": 1.6667411893316592e-06, "loss": 0.4363, "step": 15279 }, { "epoch": 0.8245642436997463, "grad_norm": 1.1552380995314846, "learning_rate": 1.6663416337323074e-06, "loss": 0.5106, "step": 15280 }, { "epoch": 0.8246182073282607, "grad_norm": 0.945257535211169, "learning_rate": 1.6659421883161709e-06, "loss": 0.3614, "step": 15281 }, { "epoch": 0.8246721709567751, "grad_norm": 0.9657387074278247, "learning_rate": 1.6655428530947293e-06, "loss": 0.3992, "step": 15282 }, { "epoch": 0.8247261345852895, "grad_norm": 1.1451314236179333, "learning_rate": 1.6651436280794606e-06, "loss": 0.5964, "step": 15283 }, { "epoch": 0.8247800982138039, "grad_norm": 0.9482422709385145, "learning_rate": 1.6647445132818393e-06, "loss": 0.4665, "step": 15284 }, { "epoch": 0.8248340618423182, "grad_norm": 0.8469977240633774, "learning_rate": 1.6643455087133354e-06, "loss": 0.3531, "step": 15285 }, { "epoch": 0.8248880254708326, "grad_norm": 0.8494720351294899, "learning_rate": 1.6639466143854177e-06, "loss": 0.344, "step": 15286 }, { "epoch": 0.824941989099347, "grad_norm": 1.0006468357966578, "learning_rate": 1.6635478303095508e-06, "loss": 0.3963, "step": 15287 }, { "epoch": 0.8249959527278614, "grad_norm": 1.1825062703229994, "learning_rate": 1.6631491564971954e-06, "loss": 0.5821, "step": 15288 }, { "epoch": 0.8250499163563758, "grad_norm": 0.9703370374855572, "learning_rate": 1.6627505929598098e-06, "loss": 0.4698, "step": 15289 }, { "epoch": 0.8251038799848902, "grad_norm": 0.7704826166576337, "learning_rate": 1.6623521397088494e-06, "loss": 0.309, "step": 15290 }, { "epoch": 0.8251578436134046, "grad_norm": 0.7475413397779399, "learning_rate": 1.6619537967557658e-06, "loss": 0.3164, "step": 15291 }, { "epoch": 0.8252118072419189, "grad_norm": 0.9815406048652813, "learning_rate": 1.6615555641120087e-06, "loss": 0.4722, "step": 15292 }, { "epoch": 0.8252657708704333, "grad_norm": 1.1830777588943997, "learning_rate": 1.661157441789023e-06, "loss": 0.4208, "step": 15293 }, { "epoch": 0.8253197344989477, "grad_norm": 1.002791344793153, "learning_rate": 1.660759429798251e-06, "loss": 0.4328, "step": 15294 }, { "epoch": 0.8253736981274621, "grad_norm": 0.9963884093504602, "learning_rate": 1.6603615281511327e-06, "loss": 0.3678, "step": 15295 }, { "epoch": 0.8254276617559765, "grad_norm": 1.0621158113636961, "learning_rate": 1.6599637368591031e-06, "loss": 0.4961, "step": 15296 }, { "epoch": 0.8254816253844909, "grad_norm": 1.1883653775029228, "learning_rate": 1.659566055933597e-06, "loss": 0.5059, "step": 15297 }, { "epoch": 0.8255355890130053, "grad_norm": 0.9616357470863179, "learning_rate": 1.6591684853860419e-06, "loss": 0.4451, "step": 15298 }, { "epoch": 0.8255895526415196, "grad_norm": 0.954410842348908, "learning_rate": 1.6587710252278666e-06, "loss": 0.3717, "step": 15299 }, { "epoch": 0.825643516270034, "grad_norm": 1.0226124432135435, "learning_rate": 1.658373675470492e-06, "loss": 0.5448, "step": 15300 }, { "epoch": 0.8256974798985484, "grad_norm": 1.051332942469408, "learning_rate": 1.6579764361253402e-06, "loss": 0.4587, "step": 15301 }, { "epoch": 0.8257514435270628, "grad_norm": 1.0064378918233974, "learning_rate": 1.6575793072038277e-06, "loss": 0.5064, "step": 15302 }, { "epoch": 0.8258054071555772, "grad_norm": 0.8280355004178112, "learning_rate": 1.6571822887173682e-06, "loss": 0.3153, "step": 15303 }, { "epoch": 0.8258593707840916, "grad_norm": 0.9433685809385743, "learning_rate": 1.6567853806773725e-06, "loss": 0.4151, "step": 15304 }, { "epoch": 0.825913334412606, "grad_norm": 1.0278959797644276, "learning_rate": 1.6563885830952481e-06, "loss": 0.4695, "step": 15305 }, { "epoch": 0.8259672980411202, "grad_norm": 1.0951052825092307, "learning_rate": 1.6559918959824007e-06, "loss": 0.6042, "step": 15306 }, { "epoch": 0.8260212616696346, "grad_norm": 1.1560765268794617, "learning_rate": 1.6555953193502294e-06, "loss": 0.4197, "step": 15307 }, { "epoch": 0.826075225298149, "grad_norm": 0.9341647133161034, "learning_rate": 1.6551988532101334e-06, "loss": 0.358, "step": 15308 }, { "epoch": 0.8261291889266634, "grad_norm": 0.9596059882092927, "learning_rate": 1.6548024975735068e-06, "loss": 0.4001, "step": 15309 }, { "epoch": 0.8261831525551778, "grad_norm": 0.9496388432571761, "learning_rate": 1.6544062524517417e-06, "loss": 0.4992, "step": 15310 }, { "epoch": 0.8262371161836922, "grad_norm": 1.0679020293879193, "learning_rate": 1.6540101178562268e-06, "loss": 0.4602, "step": 15311 }, { "epoch": 0.8262910798122066, "grad_norm": 1.0117689512059747, "learning_rate": 1.6536140937983469e-06, "loss": 0.5547, "step": 15312 }, { "epoch": 0.8263450434407209, "grad_norm": 0.9387888661809785, "learning_rate": 1.6532181802894858e-06, "loss": 0.3941, "step": 15313 }, { "epoch": 0.8263990070692353, "grad_norm": 1.038077720108746, "learning_rate": 1.6528223773410202e-06, "loss": 0.4535, "step": 15314 }, { "epoch": 0.8264529706977497, "grad_norm": 0.8492798883185232, "learning_rate": 1.6524266849643263e-06, "loss": 0.4195, "step": 15315 }, { "epoch": 0.8265069343262641, "grad_norm": 0.984466346648735, "learning_rate": 1.6520311031707776e-06, "loss": 0.4196, "step": 15316 }, { "epoch": 0.8265608979547785, "grad_norm": 0.9525329023144323, "learning_rate": 1.6516356319717424e-06, "loss": 0.3348, "step": 15317 }, { "epoch": 0.8266148615832929, "grad_norm": 1.0401583310194717, "learning_rate": 1.651240271378588e-06, "loss": 0.3512, "step": 15318 }, { "epoch": 0.8266688252118073, "grad_norm": 1.121124164012634, "learning_rate": 1.6508450214026779e-06, "loss": 0.643, "step": 15319 }, { "epoch": 0.8267227888403216, "grad_norm": 1.3295100307235266, "learning_rate": 1.6504498820553708e-06, "loss": 0.5374, "step": 15320 }, { "epoch": 0.826776752468836, "grad_norm": 1.022600458689407, "learning_rate": 1.6500548533480237e-06, "loss": 0.3431, "step": 15321 }, { "epoch": 0.8268307160973504, "grad_norm": 0.952025877056846, "learning_rate": 1.6496599352919895e-06, "loss": 0.3692, "step": 15322 }, { "epoch": 0.8268846797258648, "grad_norm": 0.9850382153017488, "learning_rate": 1.6492651278986199e-06, "loss": 0.4874, "step": 15323 }, { "epoch": 0.8269386433543792, "grad_norm": 0.9754257827735962, "learning_rate": 1.6488704311792614e-06, "loss": 0.4086, "step": 15324 }, { "epoch": 0.8269926069828936, "grad_norm": 1.0875429196686848, "learning_rate": 1.6484758451452576e-06, "loss": 0.5221, "step": 15325 }, { "epoch": 0.827046570611408, "grad_norm": 0.9844649431257165, "learning_rate": 1.64808136980795e-06, "loss": 0.3816, "step": 15326 }, { "epoch": 0.8271005342399222, "grad_norm": 0.9131930073443923, "learning_rate": 1.647687005178677e-06, "loss": 0.4081, "step": 15327 }, { "epoch": 0.8271544978684366, "grad_norm": 0.9712443854969908, "learning_rate": 1.6472927512687703e-06, "loss": 0.4141, "step": 15328 }, { "epoch": 0.827208461496951, "grad_norm": 1.0355973072206333, "learning_rate": 1.6468986080895632e-06, "loss": 0.3999, "step": 15329 }, { "epoch": 0.8272624251254654, "grad_norm": 0.9727168487786289, "learning_rate": 1.6465045756523835e-06, "loss": 0.4082, "step": 15330 }, { "epoch": 0.8273163887539798, "grad_norm": 1.0137916292130777, "learning_rate": 1.6461106539685557e-06, "loss": 0.4452, "step": 15331 }, { "epoch": 0.8273703523824942, "grad_norm": 0.9848647473256945, "learning_rate": 1.645716843049402e-06, "loss": 0.3723, "step": 15332 }, { "epoch": 0.8274243160110086, "grad_norm": 1.0315841005455213, "learning_rate": 1.6453231429062406e-06, "loss": 0.5252, "step": 15333 }, { "epoch": 0.8274782796395229, "grad_norm": 1.115264253986196, "learning_rate": 1.6449295535503879e-06, "loss": 0.5367, "step": 15334 }, { "epoch": 0.8275322432680373, "grad_norm": 1.1932735938232533, "learning_rate": 1.644536074993154e-06, "loss": 0.5656, "step": 15335 }, { "epoch": 0.8275862068965517, "grad_norm": 1.1770821675523802, "learning_rate": 1.6441427072458493e-06, "loss": 0.5771, "step": 15336 }, { "epoch": 0.8276401705250661, "grad_norm": 1.1470761731590955, "learning_rate": 1.6437494503197793e-06, "loss": 0.4973, "step": 15337 }, { "epoch": 0.8276941341535805, "grad_norm": 1.112590310917873, "learning_rate": 1.6433563042262462e-06, "loss": 0.4466, "step": 15338 }, { "epoch": 0.8277480977820949, "grad_norm": 1.179742942604993, "learning_rate": 1.6429632689765495e-06, "loss": 0.7439, "step": 15339 }, { "epoch": 0.8278020614106093, "grad_norm": 0.9418536515824265, "learning_rate": 1.6425703445819868e-06, "loss": 0.5878, "step": 15340 }, { "epoch": 0.8278560250391236, "grad_norm": 1.1606521531608371, "learning_rate": 1.6421775310538491e-06, "loss": 0.5657, "step": 15341 }, { "epoch": 0.827909988667638, "grad_norm": 0.8111427005460786, "learning_rate": 1.6417848284034277e-06, "loss": 0.3825, "step": 15342 }, { "epoch": 0.8279639522961524, "grad_norm": 0.82409662291311, "learning_rate": 1.6413922366420083e-06, "loss": 0.2925, "step": 15343 }, { "epoch": 0.8280179159246668, "grad_norm": 1.0134838940954902, "learning_rate": 1.640999755780875e-06, "loss": 0.4218, "step": 15344 }, { "epoch": 0.8280718795531812, "grad_norm": 1.359957018304399, "learning_rate": 1.6406073858313082e-06, "loss": 0.492, "step": 15345 }, { "epoch": 0.8281258431816956, "grad_norm": 0.9848664247455619, "learning_rate": 1.6402151268045845e-06, "loss": 0.46, "step": 15346 }, { "epoch": 0.82817980681021, "grad_norm": 0.9882096098383427, "learning_rate": 1.6398229787119783e-06, "loss": 0.5366, "step": 15347 }, { "epoch": 0.8282337704387243, "grad_norm": 1.1467199926370104, "learning_rate": 1.6394309415647607e-06, "loss": 0.6507, "step": 15348 }, { "epoch": 0.8282877340672387, "grad_norm": 1.1089960433376567, "learning_rate": 1.639039015374198e-06, "loss": 0.405, "step": 15349 }, { "epoch": 0.828341697695753, "grad_norm": 0.9034260353095049, "learning_rate": 1.6386472001515551e-06, "loss": 0.4843, "step": 15350 }, { "epoch": 0.8283956613242675, "grad_norm": 0.9372743728434907, "learning_rate": 1.6382554959080935e-06, "loss": 0.326, "step": 15351 }, { "epoch": 0.8284496249527818, "grad_norm": 0.9203627778104434, "learning_rate": 1.6378639026550709e-06, "loss": 0.476, "step": 15352 }, { "epoch": 0.8285035885812962, "grad_norm": 0.9919834063939926, "learning_rate": 1.6374724204037424e-06, "loss": 0.3753, "step": 15353 }, { "epoch": 0.8285575522098105, "grad_norm": 1.0934076519936318, "learning_rate": 1.6370810491653598e-06, "loss": 0.5406, "step": 15354 }, { "epoch": 0.8286115158383249, "grad_norm": 1.0013347753243231, "learning_rate": 1.63668978895117e-06, "loss": 0.3907, "step": 15355 }, { "epoch": 0.8286654794668393, "grad_norm": 1.1236995563846215, "learning_rate": 1.6362986397724195e-06, "loss": 0.4294, "step": 15356 }, { "epoch": 0.8287194430953537, "grad_norm": 0.993775266249401, "learning_rate": 1.6359076016403502e-06, "loss": 0.3901, "step": 15357 }, { "epoch": 0.8287734067238681, "grad_norm": 1.2536948145051494, "learning_rate": 1.6355166745662004e-06, "loss": 0.5583, "step": 15358 }, { "epoch": 0.8288273703523825, "grad_norm": 0.9206707188964893, "learning_rate": 1.6351258585612066e-06, "loss": 0.3841, "step": 15359 }, { "epoch": 0.8288813339808969, "grad_norm": 0.9173560508696751, "learning_rate": 1.6347351536366007e-06, "loss": 0.4087, "step": 15360 }, { "epoch": 0.8289352976094112, "grad_norm": 1.1409267254422841, "learning_rate": 1.634344559803613e-06, "loss": 0.4314, "step": 15361 }, { "epoch": 0.8289892612379256, "grad_norm": 1.4403809437476502, "learning_rate": 1.6339540770734675e-06, "loss": 0.588, "step": 15362 }, { "epoch": 0.82904322486644, "grad_norm": 1.1553253568377821, "learning_rate": 1.6335637054573885e-06, "loss": 0.5501, "step": 15363 }, { "epoch": 0.8290971884949544, "grad_norm": 1.279456055629867, "learning_rate": 1.6331734449665951e-06, "loss": 0.657, "step": 15364 }, { "epoch": 0.8291511521234688, "grad_norm": 1.0347977504619774, "learning_rate": 1.6327832956123042e-06, "loss": 0.4935, "step": 15365 }, { "epoch": 0.8292051157519832, "grad_norm": 1.1320971733316116, "learning_rate": 1.6323932574057286e-06, "loss": 0.4609, "step": 15366 }, { "epoch": 0.8292590793804976, "grad_norm": 1.1737440614733097, "learning_rate": 1.6320033303580787e-06, "loss": 0.5883, "step": 15367 }, { "epoch": 0.8293130430090119, "grad_norm": 1.0241309139919301, "learning_rate": 1.6316135144805627e-06, "loss": 0.4134, "step": 15368 }, { "epoch": 0.8293670066375263, "grad_norm": 1.052990435201778, "learning_rate": 1.631223809784382e-06, "loss": 0.5443, "step": 15369 }, { "epoch": 0.8294209702660407, "grad_norm": 0.9953715784078616, "learning_rate": 1.6308342162807378e-06, "loss": 0.5802, "step": 15370 }, { "epoch": 0.8294749338945551, "grad_norm": 1.1573935995493407, "learning_rate": 1.630444733980828e-06, "loss": 0.5258, "step": 15371 }, { "epoch": 0.8295288975230695, "grad_norm": 0.9760629770655556, "learning_rate": 1.6300553628958465e-06, "loss": 0.4057, "step": 15372 }, { "epoch": 0.8295828611515839, "grad_norm": 0.9405748451846002, "learning_rate": 1.6296661030369842e-06, "loss": 0.4053, "step": 15373 }, { "epoch": 0.8296368247800983, "grad_norm": 1.1409267966358656, "learning_rate": 1.6292769544154286e-06, "loss": 0.4725, "step": 15374 }, { "epoch": 0.8296907884086125, "grad_norm": 1.3473920011749605, "learning_rate": 1.6288879170423655e-06, "loss": 0.3913, "step": 15375 }, { "epoch": 0.8297447520371269, "grad_norm": 1.0210178350199894, "learning_rate": 1.6284989909289742e-06, "loss": 0.5578, "step": 15376 }, { "epoch": 0.8297987156656413, "grad_norm": 1.3074107239874468, "learning_rate": 1.6281101760864331e-06, "loss": 0.5884, "step": 15377 }, { "epoch": 0.8298526792941557, "grad_norm": 1.2864957659619, "learning_rate": 1.6277214725259186e-06, "loss": 0.5754, "step": 15378 }, { "epoch": 0.8299066429226701, "grad_norm": 1.0087017551091262, "learning_rate": 1.6273328802586014e-06, "loss": 0.44, "step": 15379 }, { "epoch": 0.8299606065511845, "grad_norm": 1.058579090990101, "learning_rate": 1.6269443992956506e-06, "loss": 0.4641, "step": 15380 }, { "epoch": 0.8300145701796989, "grad_norm": 1.0959609816684277, "learning_rate": 1.6265560296482308e-06, "loss": 0.6074, "step": 15381 }, { "epoch": 0.8300685338082132, "grad_norm": 0.9473560366849721, "learning_rate": 1.6261677713275056e-06, "loss": 0.3645, "step": 15382 }, { "epoch": 0.8301224974367276, "grad_norm": 0.9885028322314426, "learning_rate": 1.6257796243446316e-06, "loss": 0.4401, "step": 15383 }, { "epoch": 0.830176461065242, "grad_norm": 0.9124421877386184, "learning_rate": 1.6253915887107662e-06, "loss": 0.4123, "step": 15384 }, { "epoch": 0.8302304246937564, "grad_norm": 0.974782845582678, "learning_rate": 1.6250036644370614e-06, "loss": 0.4333, "step": 15385 }, { "epoch": 0.8302843883222708, "grad_norm": 0.8827787107749782, "learning_rate": 1.6246158515346666e-06, "loss": 0.4255, "step": 15386 }, { "epoch": 0.8303383519507852, "grad_norm": 1.1217396031088451, "learning_rate": 1.6242281500147283e-06, "loss": 0.5426, "step": 15387 }, { "epoch": 0.8303923155792996, "grad_norm": 0.942461516956828, "learning_rate": 1.6238405598883892e-06, "loss": 0.3866, "step": 15388 }, { "epoch": 0.8304462792078139, "grad_norm": 1.0525126029461804, "learning_rate": 1.6234530811667888e-06, "loss": 0.4491, "step": 15389 }, { "epoch": 0.8305002428363283, "grad_norm": 1.007521273389118, "learning_rate": 1.6230657138610636e-06, "loss": 0.4761, "step": 15390 }, { "epoch": 0.8305542064648427, "grad_norm": 1.0782207584626167, "learning_rate": 1.6226784579823473e-06, "loss": 0.576, "step": 15391 }, { "epoch": 0.8306081700933571, "grad_norm": 1.2713924843577549, "learning_rate": 1.62229131354177e-06, "loss": 0.4287, "step": 15392 }, { "epoch": 0.8306621337218715, "grad_norm": 1.3201762229068055, "learning_rate": 1.6219042805504591e-06, "loss": 0.4662, "step": 15393 }, { "epoch": 0.8307160973503859, "grad_norm": 1.0997816194577656, "learning_rate": 1.6215173590195375e-06, "loss": 0.7125, "step": 15394 }, { "epoch": 0.8307700609789003, "grad_norm": 1.4264876999674316, "learning_rate": 1.621130548960125e-06, "loss": 0.5558, "step": 15395 }, { "epoch": 0.8308240246074146, "grad_norm": 0.9044083506550118, "learning_rate": 1.6207438503833403e-06, "loss": 0.3904, "step": 15396 }, { "epoch": 0.830877988235929, "grad_norm": 0.9593840809195331, "learning_rate": 1.6203572633002967e-06, "loss": 0.448, "step": 15397 }, { "epoch": 0.8309319518644434, "grad_norm": 0.8372278402376571, "learning_rate": 1.619970787722106e-06, "loss": 0.3658, "step": 15398 }, { "epoch": 0.8309859154929577, "grad_norm": 1.2818194586584315, "learning_rate": 1.619584423659875e-06, "loss": 0.5185, "step": 15399 }, { "epoch": 0.8310398791214721, "grad_norm": 0.9128565011562656, "learning_rate": 1.6191981711247088e-06, "loss": 0.3617, "step": 15400 }, { "epoch": 0.8310938427499865, "grad_norm": 1.111651066693923, "learning_rate": 1.6188120301277077e-06, "loss": 0.5754, "step": 15401 }, { "epoch": 0.8311478063785009, "grad_norm": 0.8489233010440381, "learning_rate": 1.6184260006799713e-06, "loss": 0.3246, "step": 15402 }, { "epoch": 0.8312017700070152, "grad_norm": 0.9727107313473871, "learning_rate": 1.6180400827925947e-06, "loss": 0.4512, "step": 15403 }, { "epoch": 0.8312557336355296, "grad_norm": 0.8534793087467182, "learning_rate": 1.617654276476667e-06, "loss": 0.4352, "step": 15404 }, { "epoch": 0.831309697264044, "grad_norm": 1.193872356677087, "learning_rate": 1.6172685817432786e-06, "loss": 0.6557, "step": 15405 }, { "epoch": 0.8313636608925584, "grad_norm": 1.0002804448382285, "learning_rate": 1.6168829986035142e-06, "loss": 0.413, "step": 15406 }, { "epoch": 0.8314176245210728, "grad_norm": 1.1382512632168367, "learning_rate": 1.6164975270684563e-06, "loss": 0.4019, "step": 15407 }, { "epoch": 0.8314715881495872, "grad_norm": 1.5034149998405386, "learning_rate": 1.6161121671491833e-06, "loss": 0.4728, "step": 15408 }, { "epoch": 0.8315255517781016, "grad_norm": 0.8020719572717433, "learning_rate": 1.6157269188567707e-06, "loss": 0.2878, "step": 15409 }, { "epoch": 0.8315795154066159, "grad_norm": 1.1982361298988031, "learning_rate": 1.6153417822022924e-06, "loss": 0.5711, "step": 15410 }, { "epoch": 0.8316334790351303, "grad_norm": 0.8731803604544135, "learning_rate": 1.6149567571968155e-06, "loss": 0.4187, "step": 15411 }, { "epoch": 0.8316874426636447, "grad_norm": 0.8721333860861324, "learning_rate": 1.6145718438514072e-06, "loss": 0.3787, "step": 15412 }, { "epoch": 0.8317414062921591, "grad_norm": 1.2286064509049628, "learning_rate": 1.61418704217713e-06, "loss": 0.495, "step": 15413 }, { "epoch": 0.8317953699206735, "grad_norm": 1.0517934024118158, "learning_rate": 1.6138023521850432e-06, "loss": 0.4482, "step": 15414 }, { "epoch": 0.8318493335491879, "grad_norm": 1.2375432854548964, "learning_rate": 1.6134177738862039e-06, "loss": 0.738, "step": 15415 }, { "epoch": 0.8319032971777023, "grad_norm": 1.1225499681073126, "learning_rate": 1.6130333072916646e-06, "loss": 0.4777, "step": 15416 }, { "epoch": 0.8319572608062166, "grad_norm": 0.8425856452675937, "learning_rate": 1.6126489524124764e-06, "loss": 0.3074, "step": 15417 }, { "epoch": 0.832011224434731, "grad_norm": 1.2060189078267347, "learning_rate": 1.612264709259684e-06, "loss": 0.5855, "step": 15418 }, { "epoch": 0.8320651880632454, "grad_norm": 0.8777255808701291, "learning_rate": 1.6118805778443327e-06, "loss": 0.3978, "step": 15419 }, { "epoch": 0.8321191516917598, "grad_norm": 0.8415611672750207, "learning_rate": 1.6114965581774618e-06, "loss": 0.378, "step": 15420 }, { "epoch": 0.8321731153202742, "grad_norm": 0.8998017275692519, "learning_rate": 1.6111126502701093e-06, "loss": 0.4069, "step": 15421 }, { "epoch": 0.8322270789487886, "grad_norm": 1.0829321595191217, "learning_rate": 1.6107288541333087e-06, "loss": 0.4262, "step": 15422 }, { "epoch": 0.8322810425773028, "grad_norm": 1.0371350334094551, "learning_rate": 1.6103451697780914e-06, "loss": 0.5284, "step": 15423 }, { "epoch": 0.8323350062058172, "grad_norm": 1.0613850534229425, "learning_rate": 1.6099615972154832e-06, "loss": 0.5963, "step": 15424 }, { "epoch": 0.8323889698343316, "grad_norm": 1.2120816671785168, "learning_rate": 1.6095781364565094e-06, "loss": 0.6594, "step": 15425 }, { "epoch": 0.832442933462846, "grad_norm": 1.3435391936281014, "learning_rate": 1.6091947875121905e-06, "loss": 0.5859, "step": 15426 }, { "epoch": 0.8324968970913604, "grad_norm": 1.0089868674008657, "learning_rate": 1.6088115503935454e-06, "loss": 0.5091, "step": 15427 }, { "epoch": 0.8325508607198748, "grad_norm": 1.1006106154910875, "learning_rate": 1.6084284251115877e-06, "loss": 0.4729, "step": 15428 }, { "epoch": 0.8326048243483892, "grad_norm": 1.251822668941307, "learning_rate": 1.6080454116773292e-06, "loss": 0.5016, "step": 15429 }, { "epoch": 0.8326587879769035, "grad_norm": 1.093031540117436, "learning_rate": 1.6076625101017793e-06, "loss": 0.4558, "step": 15430 }, { "epoch": 0.8327127516054179, "grad_norm": 0.8084222569184688, "learning_rate": 1.6072797203959402e-06, "loss": 0.28, "step": 15431 }, { "epoch": 0.8327667152339323, "grad_norm": 1.0632321543272978, "learning_rate": 1.6068970425708164e-06, "loss": 0.4485, "step": 15432 }, { "epoch": 0.8328206788624467, "grad_norm": 0.9949950780979813, "learning_rate": 1.6065144766374041e-06, "loss": 0.4578, "step": 15433 }, { "epoch": 0.8328746424909611, "grad_norm": 1.0680871728637895, "learning_rate": 1.6061320226067003e-06, "loss": 0.5031, "step": 15434 }, { "epoch": 0.8329286061194755, "grad_norm": 1.0682046669520722, "learning_rate": 1.6057496804896972e-06, "loss": 0.4953, "step": 15435 }, { "epoch": 0.8329825697479899, "grad_norm": 1.0756701641920334, "learning_rate": 1.6053674502973823e-06, "loss": 0.6223, "step": 15436 }, { "epoch": 0.8330365333765042, "grad_norm": 0.9629766957992251, "learning_rate": 1.6049853320407432e-06, "loss": 0.3974, "step": 15437 }, { "epoch": 0.8330904970050186, "grad_norm": 1.06396352588827, "learning_rate": 1.6046033257307605e-06, "loss": 0.3937, "step": 15438 }, { "epoch": 0.833144460633533, "grad_norm": 0.8147411131856522, "learning_rate": 1.6042214313784147e-06, "loss": 0.3568, "step": 15439 }, { "epoch": 0.8331984242620474, "grad_norm": 0.9533864023835867, "learning_rate": 1.6038396489946806e-06, "loss": 0.3532, "step": 15440 }, { "epoch": 0.8332523878905618, "grad_norm": 1.0302196846677762, "learning_rate": 1.603457978590532e-06, "loss": 0.402, "step": 15441 }, { "epoch": 0.8333063515190762, "grad_norm": 0.9758146464841674, "learning_rate": 1.6030764201769384e-06, "loss": 0.549, "step": 15442 }, { "epoch": 0.8333603151475906, "grad_norm": 0.8020670586479, "learning_rate": 1.6026949737648657e-06, "loss": 0.5788, "step": 15443 }, { "epoch": 0.8334142787761049, "grad_norm": 0.9608324653599793, "learning_rate": 1.6023136393652778e-06, "loss": 0.4775, "step": 15444 }, { "epoch": 0.8334682424046193, "grad_norm": 1.0236748245535792, "learning_rate": 1.601932416989134e-06, "loss": 0.4332, "step": 15445 }, { "epoch": 0.8335222060331337, "grad_norm": 1.0793062751377, "learning_rate": 1.6015513066473915e-06, "loss": 0.5708, "step": 15446 }, { "epoch": 0.833576169661648, "grad_norm": 1.2646977882591044, "learning_rate": 1.6011703083510036e-06, "loss": 0.492, "step": 15447 }, { "epoch": 0.8336301332901624, "grad_norm": 1.0332386509432254, "learning_rate": 1.6007894221109202e-06, "loss": 0.4787, "step": 15448 }, { "epoch": 0.8336840969186768, "grad_norm": 0.9728259660034616, "learning_rate": 1.6004086479380884e-06, "loss": 0.4202, "step": 15449 }, { "epoch": 0.8337380605471912, "grad_norm": 1.1584952033457256, "learning_rate": 1.6000279858434529e-06, "loss": 0.4996, "step": 15450 }, { "epoch": 0.8337920241757055, "grad_norm": 1.2649563185585226, "learning_rate": 1.5996474358379546e-06, "loss": 0.667, "step": 15451 }, { "epoch": 0.8338459878042199, "grad_norm": 0.9069482119480039, "learning_rate": 1.5992669979325288e-06, "loss": 0.4403, "step": 15452 }, { "epoch": 0.8338999514327343, "grad_norm": 0.9398816949568085, "learning_rate": 1.5988866721381108e-06, "loss": 0.3711, "step": 15453 }, { "epoch": 0.8339539150612487, "grad_norm": 0.9286961469888042, "learning_rate": 1.5985064584656322e-06, "loss": 0.3879, "step": 15454 }, { "epoch": 0.8340078786897631, "grad_norm": 1.0675522780680875, "learning_rate": 1.5981263569260195e-06, "loss": 0.4197, "step": 15455 }, { "epoch": 0.8340618423182775, "grad_norm": 1.102339013243039, "learning_rate": 1.5977463675301979e-06, "loss": 0.4749, "step": 15456 }, { "epoch": 0.8341158059467919, "grad_norm": 0.9948520773399109, "learning_rate": 1.5973664902890889e-06, "loss": 0.4192, "step": 15457 }, { "epoch": 0.8341697695753062, "grad_norm": 1.0397151584757678, "learning_rate": 1.5969867252136113e-06, "loss": 0.3953, "step": 15458 }, { "epoch": 0.8342237332038206, "grad_norm": 1.0861944609042549, "learning_rate": 1.5966070723146782e-06, "loss": 0.3692, "step": 15459 }, { "epoch": 0.834277696832335, "grad_norm": 1.1243146014513827, "learning_rate": 1.5962275316032015e-06, "loss": 0.4827, "step": 15460 }, { "epoch": 0.8343316604608494, "grad_norm": 0.9593071977186706, "learning_rate": 1.5958481030900901e-06, "loss": 0.5513, "step": 15461 }, { "epoch": 0.8343856240893638, "grad_norm": 0.9268842176998455, "learning_rate": 1.5954687867862489e-06, "loss": 0.4535, "step": 15462 }, { "epoch": 0.8344395877178782, "grad_norm": 1.0673988750977188, "learning_rate": 1.5950895827025803e-06, "loss": 0.4033, "step": 15463 }, { "epoch": 0.8344935513463926, "grad_norm": 0.8715056957113748, "learning_rate": 1.5947104908499824e-06, "loss": 0.4577, "step": 15464 }, { "epoch": 0.8345475149749069, "grad_norm": 0.8987602501928993, "learning_rate": 1.5943315112393526e-06, "loss": 0.4029, "step": 15465 }, { "epoch": 0.8346014786034213, "grad_norm": 0.8773751497525236, "learning_rate": 1.59395264388158e-06, "loss": 0.3383, "step": 15466 }, { "epoch": 0.8346554422319357, "grad_norm": 1.0843570612023368, "learning_rate": 1.5935738887875554e-06, "loss": 0.5083, "step": 15467 }, { "epoch": 0.8347094058604501, "grad_norm": 1.134885321277727, "learning_rate": 1.5931952459681642e-06, "loss": 0.4528, "step": 15468 }, { "epoch": 0.8347633694889645, "grad_norm": 1.1872079318245, "learning_rate": 1.5928167154342897e-06, "loss": 0.4426, "step": 15469 }, { "epoch": 0.8348173331174789, "grad_norm": 1.0479680987545812, "learning_rate": 1.59243829719681e-06, "loss": 0.4619, "step": 15470 }, { "epoch": 0.8348712967459933, "grad_norm": 1.0781794782543832, "learning_rate": 1.5920599912666023e-06, "loss": 0.554, "step": 15471 }, { "epoch": 0.8349252603745075, "grad_norm": 0.8385822015470974, "learning_rate": 1.5916817976545401e-06, "loss": 0.313, "step": 15472 }, { "epoch": 0.8349792240030219, "grad_norm": 1.0637144790626056, "learning_rate": 1.5913037163714918e-06, "loss": 0.4704, "step": 15473 }, { "epoch": 0.8350331876315363, "grad_norm": 1.0384855461031282, "learning_rate": 1.590925747428323e-06, "loss": 0.4625, "step": 15474 }, { "epoch": 0.8350871512600507, "grad_norm": 1.0344086942224813, "learning_rate": 1.5905478908358986e-06, "loss": 0.3915, "step": 15475 }, { "epoch": 0.8351411148885651, "grad_norm": 0.9382600541660834, "learning_rate": 1.590170146605078e-06, "loss": 0.4082, "step": 15476 }, { "epoch": 0.8351950785170795, "grad_norm": 0.9435627897058326, "learning_rate": 1.589792514746718e-06, "loss": 0.3756, "step": 15477 }, { "epoch": 0.8352490421455939, "grad_norm": 0.815861921340752, "learning_rate": 1.589414995271672e-06, "loss": 0.3624, "step": 15478 }, { "epoch": 0.8353030057741082, "grad_norm": 1.0895511276126646, "learning_rate": 1.5890375881907913e-06, "loss": 0.537, "step": 15479 }, { "epoch": 0.8353569694026226, "grad_norm": 0.9628337526826546, "learning_rate": 1.5886602935149213e-06, "loss": 0.4138, "step": 15480 }, { "epoch": 0.835410933031137, "grad_norm": 1.1839720253833979, "learning_rate": 1.5882831112549062e-06, "loss": 0.4764, "step": 15481 }, { "epoch": 0.8354648966596514, "grad_norm": 0.9446969237386693, "learning_rate": 1.5879060414215872e-06, "loss": 0.4724, "step": 15482 }, { "epoch": 0.8355188602881658, "grad_norm": 1.0523577150402004, "learning_rate": 1.5875290840258018e-06, "loss": 0.4539, "step": 15483 }, { "epoch": 0.8355728239166802, "grad_norm": 1.1426080841347372, "learning_rate": 1.5871522390783837e-06, "loss": 0.4716, "step": 15484 }, { "epoch": 0.8356267875451946, "grad_norm": 1.0680833212343255, "learning_rate": 1.5867755065901633e-06, "loss": 0.514, "step": 15485 }, { "epoch": 0.8356807511737089, "grad_norm": 1.0718875817183453, "learning_rate": 1.5863988865719702e-06, "loss": 0.5649, "step": 15486 }, { "epoch": 0.8357347148022233, "grad_norm": 1.1958154353870627, "learning_rate": 1.5860223790346268e-06, "loss": 0.5051, "step": 15487 }, { "epoch": 0.8357886784307377, "grad_norm": 1.0346655311914728, "learning_rate": 1.585645983988955e-06, "loss": 0.4995, "step": 15488 }, { "epoch": 0.8358426420592521, "grad_norm": 0.9746658368748649, "learning_rate": 1.585269701445772e-06, "loss": 0.4792, "step": 15489 }, { "epoch": 0.8358966056877665, "grad_norm": 0.9860016438021469, "learning_rate": 1.5848935314158942e-06, "loss": 0.3884, "step": 15490 }, { "epoch": 0.8359505693162809, "grad_norm": 1.0554058493433476, "learning_rate": 1.5845174739101318e-06, "loss": 0.4706, "step": 15491 }, { "epoch": 0.8360045329447952, "grad_norm": 1.091638406478731, "learning_rate": 1.5841415289392936e-06, "loss": 0.5771, "step": 15492 }, { "epoch": 0.8360584965733096, "grad_norm": 1.0463120367282213, "learning_rate": 1.5837656965141846e-06, "loss": 0.4753, "step": 15493 }, { "epoch": 0.836112460201824, "grad_norm": 1.078644891273764, "learning_rate": 1.5833899766456068e-06, "loss": 0.4695, "step": 15494 }, { "epoch": 0.8361664238303383, "grad_norm": 1.1321392919911106, "learning_rate": 1.5830143693443581e-06, "loss": 0.6149, "step": 15495 }, { "epoch": 0.8362203874588527, "grad_norm": 1.0958851221375978, "learning_rate": 1.582638874621234e-06, "loss": 0.4649, "step": 15496 }, { "epoch": 0.8362743510873671, "grad_norm": 1.0387402235829108, "learning_rate": 1.5822634924870272e-06, "loss": 0.425, "step": 15497 }, { "epoch": 0.8363283147158815, "grad_norm": 1.14659320874304, "learning_rate": 1.5818882229525268e-06, "loss": 0.4939, "step": 15498 }, { "epoch": 0.8363822783443958, "grad_norm": 0.96507869949211, "learning_rate": 1.581513066028518e-06, "loss": 0.6267, "step": 15499 }, { "epoch": 0.8364362419729102, "grad_norm": 1.2469349457589842, "learning_rate": 1.5811380217257825e-06, "loss": 0.5586, "step": 15500 }, { "epoch": 0.8364362419729102, "eval_loss": 0.5311984419822693, "eval_runtime": 164.6487, "eval_samples_per_second": 20.887, "eval_steps_per_second": 0.875, "step": 15500 }, { "epoch": 0.8364902056014246, "grad_norm": 1.0208297743669947, "learning_rate": 1.5807630900550996e-06, "loss": 0.4772, "step": 15501 }, { "epoch": 0.836544169229939, "grad_norm": 1.0485464961239297, "learning_rate": 1.5803882710272455e-06, "loss": 0.4318, "step": 15502 }, { "epoch": 0.8365981328584534, "grad_norm": 1.0674857903449697, "learning_rate": 1.5800135646529931e-06, "loss": 0.4156, "step": 15503 }, { "epoch": 0.8366520964869678, "grad_norm": 1.302290883065623, "learning_rate": 1.5796389709431123e-06, "loss": 0.636, "step": 15504 }, { "epoch": 0.8367060601154822, "grad_norm": 1.0727640844906052, "learning_rate": 1.5792644899083682e-06, "loss": 0.5362, "step": 15505 }, { "epoch": 0.8367600237439965, "grad_norm": 0.8858143947268418, "learning_rate": 1.578890121559525e-06, "loss": 0.3987, "step": 15506 }, { "epoch": 0.8368139873725109, "grad_norm": 0.9847345018805407, "learning_rate": 1.5785158659073411e-06, "loss": 0.4161, "step": 15507 }, { "epoch": 0.8368679510010253, "grad_norm": 0.7122367733409533, "learning_rate": 1.5781417229625739e-06, "loss": 0.2901, "step": 15508 }, { "epoch": 0.8369219146295397, "grad_norm": 1.0069260467648573, "learning_rate": 1.5777676927359756e-06, "loss": 0.5402, "step": 15509 }, { "epoch": 0.8369758782580541, "grad_norm": 0.9837379156901922, "learning_rate": 1.5773937752382974e-06, "loss": 0.5058, "step": 15510 }, { "epoch": 0.8370298418865685, "grad_norm": 1.3462639398938983, "learning_rate": 1.5770199704802858e-06, "loss": 0.5068, "step": 15511 }, { "epoch": 0.8370838055150829, "grad_norm": 1.1479964795810278, "learning_rate": 1.5766462784726838e-06, "loss": 0.5063, "step": 15512 }, { "epoch": 0.8371377691435972, "grad_norm": 1.0948436488742808, "learning_rate": 1.5762726992262326e-06, "loss": 0.5249, "step": 15513 }, { "epoch": 0.8371917327721116, "grad_norm": 1.331084626382694, "learning_rate": 1.575899232751668e-06, "loss": 0.5525, "step": 15514 }, { "epoch": 0.837245696400626, "grad_norm": 0.9861529514681407, "learning_rate": 1.5755258790597247e-06, "loss": 0.4615, "step": 15515 }, { "epoch": 0.8372996600291404, "grad_norm": 0.9108610317477531, "learning_rate": 1.575152638161133e-06, "loss": 0.3566, "step": 15516 }, { "epoch": 0.8373536236576548, "grad_norm": 1.0563884223890743, "learning_rate": 1.57477951006662e-06, "loss": 0.5625, "step": 15517 }, { "epoch": 0.8374075872861692, "grad_norm": 1.1190767850282246, "learning_rate": 1.5744064947869105e-06, "loss": 0.4973, "step": 15518 }, { "epoch": 0.8374615509146836, "grad_norm": 1.0483703009262524, "learning_rate": 1.574033592332724e-06, "loss": 0.5296, "step": 15519 }, { "epoch": 0.8375155145431978, "grad_norm": 0.7750788972529037, "learning_rate": 1.5736608027147804e-06, "loss": 0.3216, "step": 15520 }, { "epoch": 0.8375694781717122, "grad_norm": 0.9981656318019938, "learning_rate": 1.5732881259437921e-06, "loss": 0.5326, "step": 15521 }, { "epoch": 0.8376234418002266, "grad_norm": 1.0788586415213026, "learning_rate": 1.57291556203047e-06, "loss": 0.4626, "step": 15522 }, { "epoch": 0.837677405428741, "grad_norm": 1.1277969752216248, "learning_rate": 1.5725431109855233e-06, "loss": 0.6338, "step": 15523 }, { "epoch": 0.8377313690572554, "grad_norm": 0.8806320956964613, "learning_rate": 1.5721707728196554e-06, "loss": 0.4228, "step": 15524 }, { "epoch": 0.8377853326857698, "grad_norm": 0.9024850767853645, "learning_rate": 1.5717985475435684e-06, "loss": 0.5006, "step": 15525 }, { "epoch": 0.8378392963142842, "grad_norm": 0.9576024766338505, "learning_rate": 1.5714264351679607e-06, "loss": 0.4977, "step": 15526 }, { "epoch": 0.8378932599427985, "grad_norm": 1.0040510660789583, "learning_rate": 1.5710544357035279e-06, "loss": 0.4607, "step": 15527 }, { "epoch": 0.8379472235713129, "grad_norm": 1.121756012829598, "learning_rate": 1.5706825491609587e-06, "loss": 0.4487, "step": 15528 }, { "epoch": 0.8380011871998273, "grad_norm": 1.0666597297888356, "learning_rate": 1.570310775550944e-06, "loss": 0.4233, "step": 15529 }, { "epoch": 0.8380551508283417, "grad_norm": 1.4548882030090533, "learning_rate": 1.5699391148841677e-06, "loss": 0.7751, "step": 15530 }, { "epoch": 0.8381091144568561, "grad_norm": 1.1710269364680927, "learning_rate": 1.5695675671713132e-06, "loss": 0.6118, "step": 15531 }, { "epoch": 0.8381630780853705, "grad_norm": 0.9908887257628076, "learning_rate": 1.5691961324230575e-06, "loss": 0.4487, "step": 15532 }, { "epoch": 0.8382170417138849, "grad_norm": 1.1444201620574257, "learning_rate": 1.568824810650077e-06, "loss": 0.4457, "step": 15533 }, { "epoch": 0.8382710053423992, "grad_norm": 1.1356439758538068, "learning_rate": 1.5684536018630442e-06, "loss": 0.6472, "step": 15534 }, { "epoch": 0.8383249689709136, "grad_norm": 1.0396377427167394, "learning_rate": 1.5680825060726269e-06, "loss": 0.4528, "step": 15535 }, { "epoch": 0.838378932599428, "grad_norm": 1.0178773801284349, "learning_rate": 1.567711523289491e-06, "loss": 0.4244, "step": 15536 }, { "epoch": 0.8384328962279424, "grad_norm": 1.1168994781879635, "learning_rate": 1.5673406535242992e-06, "loss": 0.4732, "step": 15537 }, { "epoch": 0.8384868598564568, "grad_norm": 1.3991284872716359, "learning_rate": 1.5669698967877108e-06, "loss": 0.5227, "step": 15538 }, { "epoch": 0.8385408234849712, "grad_norm": 1.125999648831656, "learning_rate": 1.5665992530903821e-06, "loss": 0.521, "step": 15539 }, { "epoch": 0.8385947871134856, "grad_norm": 0.7754519326792063, "learning_rate": 1.5662287224429645e-06, "loss": 0.3976, "step": 15540 }, { "epoch": 0.8386487507419998, "grad_norm": 0.9693054231220607, "learning_rate": 1.5658583048561085e-06, "loss": 0.5432, "step": 15541 }, { "epoch": 0.8387027143705142, "grad_norm": 1.1667820808394238, "learning_rate": 1.56548800034046e-06, "loss": 0.5246, "step": 15542 }, { "epoch": 0.8387566779990286, "grad_norm": 0.9784687575834556, "learning_rate": 1.5651178089066627e-06, "loss": 0.3657, "step": 15543 }, { "epoch": 0.838810641627543, "grad_norm": 1.1177386700991623, "learning_rate": 1.5647477305653546e-06, "loss": 0.5693, "step": 15544 }, { "epoch": 0.8388646052560574, "grad_norm": 0.9100597312971741, "learning_rate": 1.5643777653271734e-06, "loss": 0.4045, "step": 15545 }, { "epoch": 0.8389185688845718, "grad_norm": 0.8614887604229023, "learning_rate": 1.564007913202751e-06, "loss": 0.3508, "step": 15546 }, { "epoch": 0.8389725325130862, "grad_norm": 1.1168961458309108, "learning_rate": 1.5636381742027188e-06, "loss": 0.5634, "step": 15547 }, { "epoch": 0.8390264961416005, "grad_norm": 1.1020084523379883, "learning_rate": 1.5632685483377031e-06, "loss": 0.4267, "step": 15548 }, { "epoch": 0.8390804597701149, "grad_norm": 1.1048948092208573, "learning_rate": 1.5628990356183267e-06, "loss": 0.5287, "step": 15549 }, { "epoch": 0.8391344233986293, "grad_norm": 1.1453961644181934, "learning_rate": 1.56252963605521e-06, "loss": 0.4221, "step": 15550 }, { "epoch": 0.8391883870271437, "grad_norm": 1.3050703741966283, "learning_rate": 1.5621603496589708e-06, "loss": 0.4957, "step": 15551 }, { "epoch": 0.8392423506556581, "grad_norm": 1.0183456901240044, "learning_rate": 1.5617911764402212e-06, "loss": 0.5258, "step": 15552 }, { "epoch": 0.8392963142841725, "grad_norm": 1.0992568929611217, "learning_rate": 1.5614221164095732e-06, "loss": 0.5173, "step": 15553 }, { "epoch": 0.8393502779126869, "grad_norm": 1.0238101910879376, "learning_rate": 1.5610531695776327e-06, "loss": 0.3312, "step": 15554 }, { "epoch": 0.8394042415412012, "grad_norm": 1.16937732421084, "learning_rate": 1.5606843359550053e-06, "loss": 0.4706, "step": 15555 }, { "epoch": 0.8394582051697156, "grad_norm": 0.939287629188896, "learning_rate": 1.5603156155522895e-06, "loss": 0.4166, "step": 15556 }, { "epoch": 0.83951216879823, "grad_norm": 0.9834324787031573, "learning_rate": 1.5599470083800837e-06, "loss": 0.3255, "step": 15557 }, { "epoch": 0.8395661324267444, "grad_norm": 0.9443240760881272, "learning_rate": 1.559578514448982e-06, "loss": 0.3834, "step": 15558 }, { "epoch": 0.8396200960552588, "grad_norm": 1.231388934292451, "learning_rate": 1.559210133769575e-06, "loss": 0.3705, "step": 15559 }, { "epoch": 0.8396740596837732, "grad_norm": 0.9077461754332709, "learning_rate": 1.5588418663524513e-06, "loss": 0.4103, "step": 15560 }, { "epoch": 0.8397280233122875, "grad_norm": 0.8792249230134237, "learning_rate": 1.558473712208195e-06, "loss": 0.3633, "step": 15561 }, { "epoch": 0.8397819869408019, "grad_norm": 1.087648120561072, "learning_rate": 1.558105671347386e-06, "loss": 0.4301, "step": 15562 }, { "epoch": 0.8398359505693163, "grad_norm": 1.2110174302942933, "learning_rate": 1.5577377437806033e-06, "loss": 0.5583, "step": 15563 }, { "epoch": 0.8398899141978307, "grad_norm": 0.8581004118813542, "learning_rate": 1.5573699295184206e-06, "loss": 0.4359, "step": 15564 }, { "epoch": 0.839943877826345, "grad_norm": 1.067440705344865, "learning_rate": 1.55700222857141e-06, "loss": 0.4869, "step": 15565 }, { "epoch": 0.8399978414548595, "grad_norm": 1.03293553982358, "learning_rate": 1.5566346409501399e-06, "loss": 0.4807, "step": 15566 }, { "epoch": 0.8400518050833738, "grad_norm": 1.0428222011764312, "learning_rate": 1.5562671666651747e-06, "loss": 0.5519, "step": 15567 }, { "epoch": 0.8401057687118881, "grad_norm": 1.0986706673808653, "learning_rate": 1.5558998057270766e-06, "loss": 0.4343, "step": 15568 }, { "epoch": 0.8401597323404025, "grad_norm": 1.0237842225787237, "learning_rate": 1.5555325581464024e-06, "loss": 0.5529, "step": 15569 }, { "epoch": 0.8402136959689169, "grad_norm": 1.0644490306371286, "learning_rate": 1.5551654239337083e-06, "loss": 0.3911, "step": 15570 }, { "epoch": 0.8402676595974313, "grad_norm": 0.9083267249104359, "learning_rate": 1.5547984030995452e-06, "loss": 0.3876, "step": 15571 }, { "epoch": 0.8403216232259457, "grad_norm": 0.9575762132747923, "learning_rate": 1.554431495654463e-06, "loss": 0.5125, "step": 15572 }, { "epoch": 0.8403755868544601, "grad_norm": 0.8523796270181165, "learning_rate": 1.5540647016090066e-06, "loss": 0.3148, "step": 15573 }, { "epoch": 0.8404295504829745, "grad_norm": 0.9978047047219434, "learning_rate": 1.5536980209737172e-06, "loss": 0.3877, "step": 15574 }, { "epoch": 0.8404835141114888, "grad_norm": 1.0426434581066748, "learning_rate": 1.5533314537591352e-06, "loss": 0.4041, "step": 15575 }, { "epoch": 0.8405374777400032, "grad_norm": 1.1240320302166988, "learning_rate": 1.552964999975794e-06, "loss": 0.4652, "step": 15576 }, { "epoch": 0.8405914413685176, "grad_norm": 1.0664260973418827, "learning_rate": 1.5525986596342268e-06, "loss": 0.5393, "step": 15577 }, { "epoch": 0.840645404997032, "grad_norm": 0.7787820743046387, "learning_rate": 1.5522324327449628e-06, "loss": 0.3669, "step": 15578 }, { "epoch": 0.8406993686255464, "grad_norm": 0.9804192928563061, "learning_rate": 1.551866319318528e-06, "loss": 0.4494, "step": 15579 }, { "epoch": 0.8407533322540608, "grad_norm": 1.0194823777864181, "learning_rate": 1.5515003193654438e-06, "loss": 0.5125, "step": 15580 }, { "epoch": 0.8408072958825752, "grad_norm": 0.9308877682468607, "learning_rate": 1.5511344328962301e-06, "loss": 0.4123, "step": 15581 }, { "epoch": 0.8408612595110895, "grad_norm": 1.0647955140508276, "learning_rate": 1.5507686599214047e-06, "loss": 0.32, "step": 15582 }, { "epoch": 0.8409152231396039, "grad_norm": 0.9359130234146371, "learning_rate": 1.5504030004514767e-06, "loss": 0.5042, "step": 15583 }, { "epoch": 0.8409691867681183, "grad_norm": 1.1931017853893984, "learning_rate": 1.550037454496957e-06, "loss": 0.6229, "step": 15584 }, { "epoch": 0.8410231503966327, "grad_norm": 1.0537813193171577, "learning_rate": 1.549672022068353e-06, "loss": 0.5197, "step": 15585 }, { "epoch": 0.8410771140251471, "grad_norm": 1.1506779838849222, "learning_rate": 1.5493067031761657e-06, "loss": 0.5619, "step": 15586 }, { "epoch": 0.8411310776536615, "grad_norm": 1.1455182848332406, "learning_rate": 1.548941497830896e-06, "loss": 0.4871, "step": 15587 }, { "epoch": 0.8411850412821759, "grad_norm": 0.9543181418540896, "learning_rate": 1.54857640604304e-06, "loss": 0.3547, "step": 15588 }, { "epoch": 0.8412390049106901, "grad_norm": 1.0538407370586427, "learning_rate": 1.5482114278230915e-06, "loss": 0.5522, "step": 15589 }, { "epoch": 0.8412929685392045, "grad_norm": 1.0582181067276633, "learning_rate": 1.5478465631815387e-06, "loss": 0.676, "step": 15590 }, { "epoch": 0.8413469321677189, "grad_norm": 0.747398100069921, "learning_rate": 1.5474818121288687e-06, "loss": 0.2231, "step": 15591 }, { "epoch": 0.8414008957962333, "grad_norm": 0.778502224727566, "learning_rate": 1.5471171746755653e-06, "loss": 0.2815, "step": 15592 }, { "epoch": 0.8414548594247477, "grad_norm": 0.793909066386534, "learning_rate": 1.5467526508321088e-06, "loss": 0.3181, "step": 15593 }, { "epoch": 0.8415088230532621, "grad_norm": 0.8397800854947285, "learning_rate": 1.5463882406089752e-06, "loss": 0.4084, "step": 15594 }, { "epoch": 0.8415627866817765, "grad_norm": 0.9170500887745147, "learning_rate": 1.5460239440166386e-06, "loss": 0.3721, "step": 15595 }, { "epoch": 0.8416167503102908, "grad_norm": 1.0216364230396604, "learning_rate": 1.5456597610655692e-06, "loss": 0.4339, "step": 15596 }, { "epoch": 0.8416707139388052, "grad_norm": 1.258023987744892, "learning_rate": 1.5452956917662332e-06, "loss": 0.4564, "step": 15597 }, { "epoch": 0.8417246775673196, "grad_norm": 1.3739793071366757, "learning_rate": 1.5449317361290956e-06, "loss": 0.6747, "step": 15598 }, { "epoch": 0.841778641195834, "grad_norm": 1.0992868643478324, "learning_rate": 1.5445678941646156e-06, "loss": 0.5635, "step": 15599 }, { "epoch": 0.8418326048243484, "grad_norm": 1.0542431107578454, "learning_rate": 1.5442041658832516e-06, "loss": 0.4258, "step": 15600 }, { "epoch": 0.8418865684528628, "grad_norm": 0.9675619511544512, "learning_rate": 1.5438405512954565e-06, "loss": 0.3489, "step": 15601 }, { "epoch": 0.8419405320813772, "grad_norm": 1.052713208143923, "learning_rate": 1.5434770504116814e-06, "loss": 0.5039, "step": 15602 }, { "epoch": 0.8419944957098915, "grad_norm": 1.1854999968485487, "learning_rate": 1.5431136632423749e-06, "loss": 0.3994, "step": 15603 }, { "epoch": 0.8420484593384059, "grad_norm": 1.2654173408310079, "learning_rate": 1.5427503897979787e-06, "loss": 0.6131, "step": 15604 }, { "epoch": 0.8421024229669203, "grad_norm": 1.090355424192914, "learning_rate": 1.5423872300889347e-06, "loss": 0.4192, "step": 15605 }, { "epoch": 0.8421563865954347, "grad_norm": 1.1128558673473303, "learning_rate": 1.542024184125681e-06, "loss": 0.4157, "step": 15606 }, { "epoch": 0.8422103502239491, "grad_norm": 1.0147074341448536, "learning_rate": 1.5416612519186508e-06, "loss": 0.4434, "step": 15607 }, { "epoch": 0.8422643138524635, "grad_norm": 0.7364606036601585, "learning_rate": 1.5412984334782762e-06, "loss": 0.2912, "step": 15608 }, { "epoch": 0.8423182774809779, "grad_norm": 1.0401434329791017, "learning_rate": 1.5409357288149846e-06, "loss": 0.4572, "step": 15609 }, { "epoch": 0.8423722411094922, "grad_norm": 1.2336517186647886, "learning_rate": 1.540573137939202e-06, "loss": 0.5057, "step": 15610 }, { "epoch": 0.8424262047380066, "grad_norm": 0.954521521727013, "learning_rate": 1.5402106608613463e-06, "loss": 0.3518, "step": 15611 }, { "epoch": 0.842480168366521, "grad_norm": 1.1020829093871387, "learning_rate": 1.5398482975918378e-06, "loss": 0.5578, "step": 15612 }, { "epoch": 0.8425341319950354, "grad_norm": 1.0749108977759225, "learning_rate": 1.5394860481410903e-06, "loss": 0.4575, "step": 15613 }, { "epoch": 0.8425880956235497, "grad_norm": 1.2015204421810688, "learning_rate": 1.5391239125195157e-06, "loss": 0.5051, "step": 15614 }, { "epoch": 0.8426420592520641, "grad_norm": 0.9140882933900291, "learning_rate": 1.538761890737522e-06, "loss": 0.3763, "step": 15615 }, { "epoch": 0.8426960228805785, "grad_norm": 0.8307259138850285, "learning_rate": 1.5383999828055142e-06, "loss": 0.2464, "step": 15616 }, { "epoch": 0.8427499865090928, "grad_norm": 0.6738548208830848, "learning_rate": 1.5380381887338951e-06, "loss": 0.2718, "step": 15617 }, { "epoch": 0.8428039501376072, "grad_norm": 1.0215386898135, "learning_rate": 1.5376765085330603e-06, "loss": 0.4021, "step": 15618 }, { "epoch": 0.8428579137661216, "grad_norm": 0.9302651269309149, "learning_rate": 1.5373149422134064e-06, "loss": 0.43, "step": 15619 }, { "epoch": 0.842911877394636, "grad_norm": 1.0094367776411532, "learning_rate": 1.5369534897853256e-06, "loss": 0.4959, "step": 15620 }, { "epoch": 0.8429658410231504, "grad_norm": 0.8003082844027756, "learning_rate": 1.5365921512592053e-06, "loss": 0.2892, "step": 15621 }, { "epoch": 0.8430198046516648, "grad_norm": 0.7779880128013374, "learning_rate": 1.5362309266454308e-06, "loss": 0.2617, "step": 15622 }, { "epoch": 0.8430737682801792, "grad_norm": 0.8994193036741248, "learning_rate": 1.5358698159543853e-06, "loss": 0.3638, "step": 15623 }, { "epoch": 0.8431277319086935, "grad_norm": 1.233392741986116, "learning_rate": 1.5355088191964473e-06, "loss": 0.5541, "step": 15624 }, { "epoch": 0.8431816955372079, "grad_norm": 0.8539974526849857, "learning_rate": 1.535147936381991e-06, "loss": 0.3277, "step": 15625 }, { "epoch": 0.8432356591657223, "grad_norm": 1.0076607624096026, "learning_rate": 1.5347871675213893e-06, "loss": 0.4668, "step": 15626 }, { "epoch": 0.8432896227942367, "grad_norm": 1.0347599262852203, "learning_rate": 1.5344265126250107e-06, "loss": 0.4307, "step": 15627 }, { "epoch": 0.8433435864227511, "grad_norm": 1.1866362871254612, "learning_rate": 1.5340659717032211e-06, "loss": 0.5849, "step": 15628 }, { "epoch": 0.8433975500512655, "grad_norm": 0.9744844127451343, "learning_rate": 1.533705544766383e-06, "loss": 0.51, "step": 15629 }, { "epoch": 0.8434515136797798, "grad_norm": 0.8072184838957053, "learning_rate": 1.533345231824856e-06, "loss": 0.3813, "step": 15630 }, { "epoch": 0.8435054773082942, "grad_norm": 1.011812807273064, "learning_rate": 1.5329850328889936e-06, "loss": 0.4298, "step": 15631 }, { "epoch": 0.8435594409368086, "grad_norm": 1.0198565800579311, "learning_rate": 1.5326249479691502e-06, "loss": 0.51, "step": 15632 }, { "epoch": 0.843613404565323, "grad_norm": 0.972540294264195, "learning_rate": 1.5322649770756746e-06, "loss": 0.3957, "step": 15633 }, { "epoch": 0.8436673681938374, "grad_norm": 1.0752082361849231, "learning_rate": 1.5319051202189125e-06, "loss": 0.4533, "step": 15634 }, { "epoch": 0.8437213318223518, "grad_norm": 1.0356576727901199, "learning_rate": 1.5315453774092065e-06, "loss": 0.5183, "step": 15635 }, { "epoch": 0.8437752954508662, "grad_norm": 1.1379719120174503, "learning_rate": 1.5311857486568967e-06, "loss": 0.4075, "step": 15636 }, { "epoch": 0.8438292590793804, "grad_norm": 1.0051130526428098, "learning_rate": 1.5308262339723193e-06, "loss": 0.4177, "step": 15637 }, { "epoch": 0.8438832227078948, "grad_norm": 1.1734405175079952, "learning_rate": 1.5304668333658056e-06, "loss": 0.5285, "step": 15638 }, { "epoch": 0.8439371863364092, "grad_norm": 0.9116643477268707, "learning_rate": 1.5301075468476862e-06, "loss": 0.3737, "step": 15639 }, { "epoch": 0.8439911499649236, "grad_norm": 1.109970703551109, "learning_rate": 1.5297483744282873e-06, "loss": 0.4999, "step": 15640 }, { "epoch": 0.844045113593438, "grad_norm": 1.004180089464532, "learning_rate": 1.5293893161179313e-06, "loss": 0.3884, "step": 15641 }, { "epoch": 0.8440990772219524, "grad_norm": 1.1681651758296154, "learning_rate": 1.5290303719269383e-06, "loss": 0.5603, "step": 15642 }, { "epoch": 0.8441530408504668, "grad_norm": 1.0991786872799718, "learning_rate": 1.528671541865625e-06, "loss": 0.5582, "step": 15643 }, { "epoch": 0.8442070044789811, "grad_norm": 1.1946474945385264, "learning_rate": 1.5283128259443048e-06, "loss": 0.4571, "step": 15644 }, { "epoch": 0.8442609681074955, "grad_norm": 1.002998397826846, "learning_rate": 1.5279542241732864e-06, "loss": 0.3947, "step": 15645 }, { "epoch": 0.8443149317360099, "grad_norm": 0.955698653651919, "learning_rate": 1.5275957365628773e-06, "loss": 0.43, "step": 15646 }, { "epoch": 0.8443688953645243, "grad_norm": 1.0831566663886352, "learning_rate": 1.5272373631233806e-06, "loss": 0.6253, "step": 15647 }, { "epoch": 0.8444228589930387, "grad_norm": 1.151361359194399, "learning_rate": 1.5268791038650975e-06, "loss": 0.5058, "step": 15648 }, { "epoch": 0.8444768226215531, "grad_norm": 0.8796930606746819, "learning_rate": 1.5265209587983221e-06, "loss": 0.3226, "step": 15649 }, { "epoch": 0.8445307862500675, "grad_norm": 1.172956063308295, "learning_rate": 1.5261629279333492e-06, "loss": 0.6813, "step": 15650 }, { "epoch": 0.8445847498785818, "grad_norm": 1.206714564346388, "learning_rate": 1.5258050112804693e-06, "loss": 0.5728, "step": 15651 }, { "epoch": 0.8446387135070962, "grad_norm": 0.9416206385126298, "learning_rate": 1.5254472088499685e-06, "loss": 0.3645, "step": 15652 }, { "epoch": 0.8446926771356106, "grad_norm": 1.0452365084521218, "learning_rate": 1.5250895206521316e-06, "loss": 0.4241, "step": 15653 }, { "epoch": 0.844746640764125, "grad_norm": 0.8368733906809237, "learning_rate": 1.524731946697238e-06, "loss": 0.3788, "step": 15654 }, { "epoch": 0.8448006043926394, "grad_norm": 1.130356665915359, "learning_rate": 1.5243744869955648e-06, "loss": 0.4596, "step": 15655 }, { "epoch": 0.8448545680211538, "grad_norm": 1.0542698490689562, "learning_rate": 1.5240171415573862e-06, "loss": 0.5717, "step": 15656 }, { "epoch": 0.8449085316496682, "grad_norm": 1.4176149069543755, "learning_rate": 1.5236599103929722e-06, "loss": 0.7412, "step": 15657 }, { "epoch": 0.8449624952781825, "grad_norm": 1.0819664043252837, "learning_rate": 1.523302793512591e-06, "loss": 0.57, "step": 15658 }, { "epoch": 0.8450164589066969, "grad_norm": 1.2290749183877465, "learning_rate": 1.5229457909265053e-06, "loss": 0.6637, "step": 15659 }, { "epoch": 0.8450704225352113, "grad_norm": 1.0489037396459566, "learning_rate": 1.5225889026449754e-06, "loss": 0.4026, "step": 15660 }, { "epoch": 0.8451243861637256, "grad_norm": 1.0948028629880235, "learning_rate": 1.5222321286782594e-06, "loss": 0.4641, "step": 15661 }, { "epoch": 0.84517834979224, "grad_norm": 0.8551910326132586, "learning_rate": 1.5218754690366114e-06, "loss": 0.4152, "step": 15662 }, { "epoch": 0.8452323134207544, "grad_norm": 1.0170543971328168, "learning_rate": 1.521518923730282e-06, "loss": 0.504, "step": 15663 }, { "epoch": 0.8452862770492688, "grad_norm": 1.135491872139825, "learning_rate": 1.521162492769519e-06, "loss": 0.4609, "step": 15664 }, { "epoch": 0.8453402406777831, "grad_norm": 1.169891679399081, "learning_rate": 1.5208061761645671e-06, "loss": 0.5186, "step": 15665 }, { "epoch": 0.8453942043062975, "grad_norm": 1.1450771696472846, "learning_rate": 1.5204499739256653e-06, "loss": 0.4862, "step": 15666 }, { "epoch": 0.8454481679348119, "grad_norm": 1.0244272046879965, "learning_rate": 1.520093886063052e-06, "loss": 0.3807, "step": 15667 }, { "epoch": 0.8455021315633263, "grad_norm": 1.0959603534024962, "learning_rate": 1.5197379125869624e-06, "loss": 0.518, "step": 15668 }, { "epoch": 0.8455560951918407, "grad_norm": 1.0867438939061882, "learning_rate": 1.5193820535076264e-06, "loss": 0.5516, "step": 15669 }, { "epoch": 0.8456100588203551, "grad_norm": 1.102048936742892, "learning_rate": 1.5190263088352727e-06, "loss": 0.5641, "step": 15670 }, { "epoch": 0.8456640224488695, "grad_norm": 1.0895360331259327, "learning_rate": 1.5186706785801254e-06, "loss": 0.5802, "step": 15671 }, { "epoch": 0.8457179860773838, "grad_norm": 0.9451019320585996, "learning_rate": 1.5183151627524065e-06, "loss": 0.4569, "step": 15672 }, { "epoch": 0.8457719497058982, "grad_norm": 0.9978216294560042, "learning_rate": 1.517959761362332e-06, "loss": 0.4188, "step": 15673 }, { "epoch": 0.8458259133344126, "grad_norm": 1.1198097111779806, "learning_rate": 1.5176044744201173e-06, "loss": 0.4682, "step": 15674 }, { "epoch": 0.845879876962927, "grad_norm": 1.255605327038385, "learning_rate": 1.517249301935974e-06, "loss": 0.4846, "step": 15675 }, { "epoch": 0.8459338405914414, "grad_norm": 1.0436531186155327, "learning_rate": 1.5168942439201107e-06, "loss": 0.5202, "step": 15676 }, { "epoch": 0.8459878042199558, "grad_norm": 0.9232969061135506, "learning_rate": 1.5165393003827308e-06, "loss": 0.4021, "step": 15677 }, { "epoch": 0.8460417678484702, "grad_norm": 0.8147570932043685, "learning_rate": 1.5161844713340364e-06, "loss": 0.3094, "step": 15678 }, { "epoch": 0.8460957314769845, "grad_norm": 1.2627248711227055, "learning_rate": 1.5158297567842266e-06, "loss": 0.5915, "step": 15679 }, { "epoch": 0.8461496951054989, "grad_norm": 1.1886283842150887, "learning_rate": 1.5154751567434944e-06, "loss": 0.532, "step": 15680 }, { "epoch": 0.8462036587340133, "grad_norm": 1.0283885610952799, "learning_rate": 1.5151206712220326e-06, "loss": 0.4946, "step": 15681 }, { "epoch": 0.8462576223625277, "grad_norm": 1.0107088324610436, "learning_rate": 1.514766300230029e-06, "loss": 0.3893, "step": 15682 }, { "epoch": 0.8463115859910421, "grad_norm": 1.301034999949981, "learning_rate": 1.5144120437776686e-06, "loss": 0.5841, "step": 15683 }, { "epoch": 0.8463655496195565, "grad_norm": 1.072419932641718, "learning_rate": 1.5140579018751327e-06, "loss": 0.4495, "step": 15684 }, { "epoch": 0.8464195132480709, "grad_norm": 1.0478313501813261, "learning_rate": 1.513703874532601e-06, "loss": 0.4316, "step": 15685 }, { "epoch": 0.8464734768765851, "grad_norm": 0.9397128301918715, "learning_rate": 1.5133499617602488e-06, "loss": 0.4092, "step": 15686 }, { "epoch": 0.8465274405050995, "grad_norm": 1.02324865240044, "learning_rate": 1.5129961635682455e-06, "loss": 0.4742, "step": 15687 }, { "epoch": 0.8465814041336139, "grad_norm": 0.7205241479806055, "learning_rate": 1.5126424799667608e-06, "loss": 0.2645, "step": 15688 }, { "epoch": 0.8466353677621283, "grad_norm": 1.3436038829895383, "learning_rate": 1.5122889109659605e-06, "loss": 0.4738, "step": 15689 }, { "epoch": 0.8466893313906427, "grad_norm": 1.1203097929474553, "learning_rate": 1.511935456576006e-06, "loss": 0.4884, "step": 15690 }, { "epoch": 0.8467432950191571, "grad_norm": 0.849693838477334, "learning_rate": 1.5115821168070566e-06, "loss": 0.3563, "step": 15691 }, { "epoch": 0.8467972586476715, "grad_norm": 0.9942231877881641, "learning_rate": 1.511228891669267e-06, "loss": 0.4779, "step": 15692 }, { "epoch": 0.8468512222761858, "grad_norm": 0.9646936269492785, "learning_rate": 1.5108757811727904e-06, "loss": 0.3296, "step": 15693 }, { "epoch": 0.8469051859047002, "grad_norm": 0.9863326215000598, "learning_rate": 1.510522785327773e-06, "loss": 0.3582, "step": 15694 }, { "epoch": 0.8469591495332146, "grad_norm": 1.2541564055945007, "learning_rate": 1.5101699041443626e-06, "loss": 0.5427, "step": 15695 }, { "epoch": 0.847013113161729, "grad_norm": 1.067874565456146, "learning_rate": 1.5098171376327e-06, "loss": 0.434, "step": 15696 }, { "epoch": 0.8470670767902434, "grad_norm": 0.8676080622440123, "learning_rate": 1.5094644858029251e-06, "loss": 0.311, "step": 15697 }, { "epoch": 0.8471210404187578, "grad_norm": 1.1110851080837238, "learning_rate": 1.5091119486651729e-06, "loss": 0.4165, "step": 15698 }, { "epoch": 0.8471750040472721, "grad_norm": 1.082143923214254, "learning_rate": 1.508759526229576e-06, "loss": 0.4826, "step": 15699 }, { "epoch": 0.8472289676757865, "grad_norm": 1.2370947067573443, "learning_rate": 1.5084072185062627e-06, "loss": 0.5745, "step": 15700 }, { "epoch": 0.8472829313043009, "grad_norm": 0.9899907439625864, "learning_rate": 1.5080550255053594e-06, "loss": 0.4032, "step": 15701 }, { "epoch": 0.8473368949328153, "grad_norm": 0.8673386970844211, "learning_rate": 1.5077029472369884e-06, "loss": 0.3717, "step": 15702 }, { "epoch": 0.8473908585613297, "grad_norm": 0.9120663057235976, "learning_rate": 1.507350983711268e-06, "loss": 0.5253, "step": 15703 }, { "epoch": 0.8474448221898441, "grad_norm": 1.0678633393656072, "learning_rate": 1.5069991349383151e-06, "loss": 0.4357, "step": 15704 }, { "epoch": 0.8474987858183585, "grad_norm": 0.9813157967987356, "learning_rate": 1.5066474009282417e-06, "loss": 0.4855, "step": 15705 }, { "epoch": 0.8475527494468728, "grad_norm": 1.0290988232134095, "learning_rate": 1.506295781691158e-06, "loss": 0.5004, "step": 15706 }, { "epoch": 0.8476067130753872, "grad_norm": 0.8550078135398885, "learning_rate": 1.505944277237168e-06, "loss": 0.3387, "step": 15707 }, { "epoch": 0.8476606767039015, "grad_norm": 0.9119805372373545, "learning_rate": 1.5055928875763745e-06, "loss": 0.3789, "step": 15708 }, { "epoch": 0.847714640332416, "grad_norm": 1.1350851933650363, "learning_rate": 1.5052416127188777e-06, "loss": 0.488, "step": 15709 }, { "epoch": 0.8477686039609303, "grad_norm": 1.1240290847970673, "learning_rate": 1.5048904526747738e-06, "loss": 0.4973, "step": 15710 }, { "epoch": 0.8478225675894447, "grad_norm": 1.0365015847659047, "learning_rate": 1.5045394074541548e-06, "loss": 0.401, "step": 15711 }, { "epoch": 0.8478765312179591, "grad_norm": 1.0373086829482396, "learning_rate": 1.5041884770671106e-06, "loss": 0.5639, "step": 15712 }, { "epoch": 0.8479304948464734, "grad_norm": 1.0822040456419593, "learning_rate": 1.5038376615237275e-06, "loss": 0.4455, "step": 15713 }, { "epoch": 0.8479844584749878, "grad_norm": 1.215595558886416, "learning_rate": 1.5034869608340873e-06, "loss": 0.5225, "step": 15714 }, { "epoch": 0.8480384221035022, "grad_norm": 0.9455240806719459, "learning_rate": 1.5031363750082698e-06, "loss": 0.4571, "step": 15715 }, { "epoch": 0.8480923857320166, "grad_norm": 1.0189232957429506, "learning_rate": 1.5027859040563513e-06, "loss": 0.4905, "step": 15716 }, { "epoch": 0.848146349360531, "grad_norm": 1.0045969371783228, "learning_rate": 1.5024355479884052e-06, "loss": 0.4549, "step": 15717 }, { "epoch": 0.8482003129890454, "grad_norm": 1.0064237149291162, "learning_rate": 1.5020853068145004e-06, "loss": 0.4408, "step": 15718 }, { "epoch": 0.8482542766175598, "grad_norm": 0.9655046603832823, "learning_rate": 1.5017351805447033e-06, "loss": 0.4198, "step": 15719 }, { "epoch": 0.8483082402460741, "grad_norm": 1.1765629565948794, "learning_rate": 1.5013851691890787e-06, "loss": 0.56, "step": 15720 }, { "epoch": 0.8483622038745885, "grad_norm": 0.9662417779794124, "learning_rate": 1.5010352727576832e-06, "loss": 0.4166, "step": 15721 }, { "epoch": 0.8484161675031029, "grad_norm": 1.2365817854568666, "learning_rate": 1.5006854912605745e-06, "loss": 0.4678, "step": 15722 }, { "epoch": 0.8484701311316173, "grad_norm": 0.9288138813109479, "learning_rate": 1.5003358247078061e-06, "loss": 0.4048, "step": 15723 }, { "epoch": 0.8485240947601317, "grad_norm": 1.1818950142331557, "learning_rate": 1.4999862731094272e-06, "loss": 0.3427, "step": 15724 }, { "epoch": 0.8485780583886461, "grad_norm": 1.012070299554473, "learning_rate": 1.499636836475485e-06, "loss": 0.438, "step": 15725 }, { "epoch": 0.8486320220171605, "grad_norm": 1.022620678299921, "learning_rate": 1.4992875148160216e-06, "loss": 0.3855, "step": 15726 }, { "epoch": 0.8486859856456748, "grad_norm": 0.9592909300998097, "learning_rate": 1.498938308141079e-06, "loss": 0.5367, "step": 15727 }, { "epoch": 0.8487399492741892, "grad_norm": 1.0212417101416968, "learning_rate": 1.4985892164606908e-06, "loss": 0.6372, "step": 15728 }, { "epoch": 0.8487939129027036, "grad_norm": 0.9225495333572487, "learning_rate": 1.498240239784892e-06, "loss": 0.3465, "step": 15729 }, { "epoch": 0.848847876531218, "grad_norm": 1.0116591351076443, "learning_rate": 1.4978913781237116e-06, "loss": 0.3832, "step": 15730 }, { "epoch": 0.8489018401597324, "grad_norm": 0.895027797929562, "learning_rate": 1.4975426314871774e-06, "loss": 0.4381, "step": 15731 }, { "epoch": 0.8489558037882468, "grad_norm": 0.9383857222527567, "learning_rate": 1.4971939998853118e-06, "loss": 0.4793, "step": 15732 }, { "epoch": 0.8490097674167612, "grad_norm": 1.0135467489670569, "learning_rate": 1.4968454833281354e-06, "loss": 0.422, "step": 15733 }, { "epoch": 0.8490637310452754, "grad_norm": 0.9789793481090988, "learning_rate": 1.4964970818256658e-06, "loss": 0.432, "step": 15734 }, { "epoch": 0.8491176946737898, "grad_norm": 0.9623969228342123, "learning_rate": 1.4961487953879142e-06, "loss": 0.4182, "step": 15735 }, { "epoch": 0.8491716583023042, "grad_norm": 0.9402201300082402, "learning_rate": 1.4958006240248918e-06, "loss": 0.3837, "step": 15736 }, { "epoch": 0.8492256219308186, "grad_norm": 1.2675933983595005, "learning_rate": 1.495452567746605e-06, "loss": 0.4639, "step": 15737 }, { "epoch": 0.849279585559333, "grad_norm": 1.20562386002614, "learning_rate": 1.4951046265630583e-06, "loss": 0.7426, "step": 15738 }, { "epoch": 0.8493335491878474, "grad_norm": 1.3534321249055234, "learning_rate": 1.4947568004842511e-06, "loss": 0.475, "step": 15739 }, { "epoch": 0.8493875128163618, "grad_norm": 0.9662891615368887, "learning_rate": 1.4944090895201802e-06, "loss": 0.5155, "step": 15740 }, { "epoch": 0.8494414764448761, "grad_norm": 1.0366847106062607, "learning_rate": 1.4940614936808406e-06, "loss": 0.3993, "step": 15741 }, { "epoch": 0.8494954400733905, "grad_norm": 0.9707253952123226, "learning_rate": 1.4937140129762207e-06, "loss": 0.538, "step": 15742 }, { "epoch": 0.8495494037019049, "grad_norm": 1.202387720064826, "learning_rate": 1.4933666474163078e-06, "loss": 0.5635, "step": 15743 }, { "epoch": 0.8496033673304193, "grad_norm": 1.0800994648502338, "learning_rate": 1.4930193970110859e-06, "loss": 0.4975, "step": 15744 }, { "epoch": 0.8496573309589337, "grad_norm": 0.8932254080114047, "learning_rate": 1.4926722617705353e-06, "loss": 0.3613, "step": 15745 }, { "epoch": 0.8497112945874481, "grad_norm": 1.319133754716954, "learning_rate": 1.492325241704633e-06, "loss": 0.4482, "step": 15746 }, { "epoch": 0.8497652582159625, "grad_norm": 1.299527876943168, "learning_rate": 1.4919783368233525e-06, "loss": 0.418, "step": 15747 }, { "epoch": 0.8498192218444768, "grad_norm": 1.0885212037106737, "learning_rate": 1.491631547136665e-06, "loss": 0.5217, "step": 15748 }, { "epoch": 0.8498731854729912, "grad_norm": 1.3232458407812677, "learning_rate": 1.4912848726545364e-06, "loss": 0.6393, "step": 15749 }, { "epoch": 0.8499271491015056, "grad_norm": 1.0639296034871653, "learning_rate": 1.4909383133869312e-06, "loss": 0.3847, "step": 15750 }, { "epoch": 0.84998111273002, "grad_norm": 0.9865725281776033, "learning_rate": 1.4905918693438098e-06, "loss": 0.5118, "step": 15751 }, { "epoch": 0.8500350763585344, "grad_norm": 1.0107564927626724, "learning_rate": 1.4902455405351295e-06, "loss": 0.4243, "step": 15752 }, { "epoch": 0.8500890399870488, "grad_norm": 0.9784188129070368, "learning_rate": 1.489899326970844e-06, "loss": 0.4338, "step": 15753 }, { "epoch": 0.8501430036155632, "grad_norm": 0.9414754023800073, "learning_rate": 1.489553228660904e-06, "loss": 0.41, "step": 15754 }, { "epoch": 0.8501969672440775, "grad_norm": 0.8985769242794818, "learning_rate": 1.489207245615256e-06, "loss": 0.4013, "step": 15755 }, { "epoch": 0.8502509308725918, "grad_norm": 0.8765765661657081, "learning_rate": 1.4888613778438443e-06, "loss": 0.3748, "step": 15756 }, { "epoch": 0.8503048945011062, "grad_norm": 0.9125343515198978, "learning_rate": 1.4885156253566096e-06, "loss": 0.3874, "step": 15757 }, { "epoch": 0.8503588581296206, "grad_norm": 0.9756591840293476, "learning_rate": 1.4881699881634892e-06, "loss": 0.514, "step": 15758 }, { "epoch": 0.850412821758135, "grad_norm": 1.1546873648066518, "learning_rate": 1.4878244662744169e-06, "loss": 0.4839, "step": 15759 }, { "epoch": 0.8504667853866494, "grad_norm": 1.023417048054535, "learning_rate": 1.4874790596993236e-06, "loss": 0.4756, "step": 15760 }, { "epoch": 0.8505207490151638, "grad_norm": 0.9578463887343279, "learning_rate": 1.4871337684481366e-06, "loss": 0.4548, "step": 15761 }, { "epoch": 0.8505747126436781, "grad_norm": 1.0469285446925227, "learning_rate": 1.48678859253078e-06, "loss": 0.4421, "step": 15762 }, { "epoch": 0.8506286762721925, "grad_norm": 0.9946958864502419, "learning_rate": 1.4864435319571742e-06, "loss": 0.4689, "step": 15763 }, { "epoch": 0.8506826399007069, "grad_norm": 1.1488126702329948, "learning_rate": 1.4860985867372365e-06, "loss": 0.4742, "step": 15764 }, { "epoch": 0.8507366035292213, "grad_norm": 0.8039318657227673, "learning_rate": 1.4857537568808808e-06, "loss": 0.3507, "step": 15765 }, { "epoch": 0.8507905671577357, "grad_norm": 0.9224411999445356, "learning_rate": 1.4854090423980186e-06, "loss": 0.4048, "step": 15766 }, { "epoch": 0.8508445307862501, "grad_norm": 0.9134814432106474, "learning_rate": 1.4850644432985567e-06, "loss": 0.4494, "step": 15767 }, { "epoch": 0.8508984944147644, "grad_norm": 1.121736983361893, "learning_rate": 1.4847199595924005e-06, "loss": 0.5818, "step": 15768 }, { "epoch": 0.8509524580432788, "grad_norm": 1.1620112856097646, "learning_rate": 1.484375591289449e-06, "loss": 0.5362, "step": 15769 }, { "epoch": 0.8510064216717932, "grad_norm": 1.1494533341107662, "learning_rate": 1.4840313383996001e-06, "loss": 0.5495, "step": 15770 }, { "epoch": 0.8510603853003076, "grad_norm": 1.0686185199518534, "learning_rate": 1.4836872009327491e-06, "loss": 0.4825, "step": 15771 }, { "epoch": 0.851114348928822, "grad_norm": 1.0419026466347046, "learning_rate": 1.4833431788987854e-06, "loss": 0.382, "step": 15772 }, { "epoch": 0.8511683125573364, "grad_norm": 1.0381524929024746, "learning_rate": 1.4829992723075976e-06, "loss": 0.4721, "step": 15773 }, { "epoch": 0.8512222761858508, "grad_norm": 1.0249330002717119, "learning_rate": 1.4826554811690697e-06, "loss": 0.3721, "step": 15774 }, { "epoch": 0.8512762398143651, "grad_norm": 0.8526058899312333, "learning_rate": 1.4823118054930834e-06, "loss": 0.498, "step": 15775 }, { "epoch": 0.8513302034428795, "grad_norm": 1.2195580877410226, "learning_rate": 1.4819682452895142e-06, "loss": 0.5318, "step": 15776 }, { "epoch": 0.8513841670713939, "grad_norm": 1.006026111709622, "learning_rate": 1.4816248005682379e-06, "loss": 0.4567, "step": 15777 }, { "epoch": 0.8514381306999083, "grad_norm": 1.133734495278117, "learning_rate": 1.4812814713391252e-06, "loss": 0.4928, "step": 15778 }, { "epoch": 0.8514920943284227, "grad_norm": 1.0579972772339776, "learning_rate": 1.480938257612043e-06, "loss": 0.4726, "step": 15779 }, { "epoch": 0.851546057956937, "grad_norm": 0.9204732811600153, "learning_rate": 1.4805951593968571e-06, "loss": 0.4825, "step": 15780 }, { "epoch": 0.8516000215854514, "grad_norm": 1.2094388911244773, "learning_rate": 1.480252176703427e-06, "loss": 0.6022, "step": 15781 }, { "epoch": 0.8516539852139657, "grad_norm": 0.8147993491552107, "learning_rate": 1.4799093095416117e-06, "loss": 0.24, "step": 15782 }, { "epoch": 0.8517079488424801, "grad_norm": 1.2079275173860349, "learning_rate": 1.4795665579212645e-06, "loss": 0.5638, "step": 15783 }, { "epoch": 0.8517619124709945, "grad_norm": 1.3134812046221398, "learning_rate": 1.4792239218522367e-06, "loss": 0.4358, "step": 15784 }, { "epoch": 0.8518158760995089, "grad_norm": 0.9413218946320672, "learning_rate": 1.4788814013443758e-06, "loss": 0.4035, "step": 15785 }, { "epoch": 0.8518698397280233, "grad_norm": 1.0440111736259523, "learning_rate": 1.4785389964075267e-06, "loss": 0.4921, "step": 15786 }, { "epoch": 0.8519238033565377, "grad_norm": 0.8433954335303583, "learning_rate": 1.4781967070515302e-06, "loss": 0.3445, "step": 15787 }, { "epoch": 0.8519777669850521, "grad_norm": 0.8984090171328069, "learning_rate": 1.477854533286224e-06, "loss": 0.3537, "step": 15788 }, { "epoch": 0.8520317306135664, "grad_norm": 0.7747507281965911, "learning_rate": 1.4775124751214436e-06, "loss": 0.4312, "step": 15789 }, { "epoch": 0.8520856942420808, "grad_norm": 1.080136267408936, "learning_rate": 1.4771705325670183e-06, "loss": 0.5117, "step": 15790 }, { "epoch": 0.8521396578705952, "grad_norm": 1.0170026921178028, "learning_rate": 1.4768287056327762e-06, "loss": 0.4868, "step": 15791 }, { "epoch": 0.8521936214991096, "grad_norm": 0.9500726582741089, "learning_rate": 1.4764869943285425e-06, "loss": 0.5448, "step": 15792 }, { "epoch": 0.852247585127624, "grad_norm": 1.0665257048718844, "learning_rate": 1.4761453986641383e-06, "loss": 0.4186, "step": 15793 }, { "epoch": 0.8523015487561384, "grad_norm": 1.1803345585722, "learning_rate": 1.475803918649381e-06, "loss": 0.5873, "step": 15794 }, { "epoch": 0.8523555123846528, "grad_norm": 1.0021283205644853, "learning_rate": 1.4754625542940848e-06, "loss": 0.405, "step": 15795 }, { "epoch": 0.8524094760131671, "grad_norm": 0.8136987213904351, "learning_rate": 1.475121305608062e-06, "loss": 0.2955, "step": 15796 }, { "epoch": 0.8524634396416815, "grad_norm": 1.0079844761549812, "learning_rate": 1.474780172601119e-06, "loss": 0.4548, "step": 15797 }, { "epoch": 0.8525174032701959, "grad_norm": 0.8728727798334589, "learning_rate": 1.4744391552830626e-06, "loss": 0.3893, "step": 15798 }, { "epoch": 0.8525713668987103, "grad_norm": 1.0942580555514418, "learning_rate": 1.474098253663691e-06, "loss": 0.5079, "step": 15799 }, { "epoch": 0.8526253305272247, "grad_norm": 0.960035837727071, "learning_rate": 1.4737574677528034e-06, "loss": 0.4878, "step": 15800 }, { "epoch": 0.8526792941557391, "grad_norm": 0.9338648183653308, "learning_rate": 1.4734167975601948e-06, "loss": 0.3326, "step": 15801 }, { "epoch": 0.8527332577842535, "grad_norm": 0.8973901080261126, "learning_rate": 1.4730762430956557e-06, "loss": 0.4335, "step": 15802 }, { "epoch": 0.8527872214127677, "grad_norm": 0.9691510010864917, "learning_rate": 1.4727358043689747e-06, "loss": 0.4065, "step": 15803 }, { "epoch": 0.8528411850412821, "grad_norm": 1.1289848961700548, "learning_rate": 1.4723954813899357e-06, "loss": 0.5244, "step": 15804 }, { "epoch": 0.8528951486697965, "grad_norm": 1.020518667326881, "learning_rate": 1.4720552741683203e-06, "loss": 0.4372, "step": 15805 }, { "epoch": 0.8529491122983109, "grad_norm": 0.8680844314276434, "learning_rate": 1.4717151827139059e-06, "loss": 0.3244, "step": 15806 }, { "epoch": 0.8530030759268253, "grad_norm": 1.0001988550738208, "learning_rate": 1.471375207036468e-06, "loss": 0.441, "step": 15807 }, { "epoch": 0.8530570395553397, "grad_norm": 1.0503053979060033, "learning_rate": 1.4710353471457767e-06, "loss": 0.3961, "step": 15808 }, { "epoch": 0.8531110031838541, "grad_norm": 1.0903334805389977, "learning_rate": 1.4706956030516004e-06, "loss": 0.4714, "step": 15809 }, { "epoch": 0.8531649668123684, "grad_norm": 1.037032592063396, "learning_rate": 1.4703559747637053e-06, "loss": 0.4099, "step": 15810 }, { "epoch": 0.8532189304408828, "grad_norm": 0.8901861589672014, "learning_rate": 1.47001646229185e-06, "loss": 0.3164, "step": 15811 }, { "epoch": 0.8532728940693972, "grad_norm": 1.1260546210390316, "learning_rate": 1.4696770656457934e-06, "loss": 0.4738, "step": 15812 }, { "epoch": 0.8533268576979116, "grad_norm": 1.1083632891910054, "learning_rate": 1.4693377848352902e-06, "loss": 0.6003, "step": 15813 }, { "epoch": 0.853380821326426, "grad_norm": 1.011466981322459, "learning_rate": 1.4689986198700924e-06, "loss": 0.5891, "step": 15814 }, { "epoch": 0.8534347849549404, "grad_norm": 0.9080254174991468, "learning_rate": 1.4686595707599466e-06, "loss": 0.3369, "step": 15815 }, { "epoch": 0.8534887485834548, "grad_norm": 1.156074163260614, "learning_rate": 1.4683206375145983e-06, "loss": 0.4933, "step": 15816 }, { "epoch": 0.8535427122119691, "grad_norm": 1.1892608864246066, "learning_rate": 1.4679818201437894e-06, "loss": 0.4603, "step": 15817 }, { "epoch": 0.8535966758404835, "grad_norm": 0.918170454751779, "learning_rate": 1.4676431186572561e-06, "loss": 0.4888, "step": 15818 }, { "epoch": 0.8536506394689979, "grad_norm": 1.1629390981585814, "learning_rate": 1.4673045330647344e-06, "loss": 0.5214, "step": 15819 }, { "epoch": 0.8537046030975123, "grad_norm": 1.0721371460333542, "learning_rate": 1.4669660633759553e-06, "loss": 0.4362, "step": 15820 }, { "epoch": 0.8537585667260267, "grad_norm": 0.9601621311010247, "learning_rate": 1.466627709600646e-06, "loss": 0.3606, "step": 15821 }, { "epoch": 0.8538125303545411, "grad_norm": 1.0065214315108326, "learning_rate": 1.4662894717485318e-06, "loss": 0.4377, "step": 15822 }, { "epoch": 0.8538664939830555, "grad_norm": 0.8390647145513496, "learning_rate": 1.465951349829334e-06, "loss": 0.332, "step": 15823 }, { "epoch": 0.8539204576115698, "grad_norm": 0.837788779839645, "learning_rate": 1.4656133438527716e-06, "loss": 0.323, "step": 15824 }, { "epoch": 0.8539744212400842, "grad_norm": 1.1952772635498945, "learning_rate": 1.4652754538285574e-06, "loss": 0.5159, "step": 15825 }, { "epoch": 0.8540283848685986, "grad_norm": 1.0857620036651507, "learning_rate": 1.4649376797664034e-06, "loss": 0.4396, "step": 15826 }, { "epoch": 0.854082348497113, "grad_norm": 1.1796186870998873, "learning_rate": 1.4646000216760173e-06, "loss": 0.6277, "step": 15827 }, { "epoch": 0.8541363121256274, "grad_norm": 0.9863713433548841, "learning_rate": 1.4642624795671043e-06, "loss": 0.5365, "step": 15828 }, { "epoch": 0.8541902757541417, "grad_norm": 1.153378483911638, "learning_rate": 1.4639250534493654e-06, "loss": 0.4688, "step": 15829 }, { "epoch": 0.854244239382656, "grad_norm": 0.9370545292126401, "learning_rate": 1.4635877433324994e-06, "loss": 0.4454, "step": 15830 }, { "epoch": 0.8542982030111704, "grad_norm": 0.9733987150656904, "learning_rate": 1.463250549226199e-06, "loss": 0.5954, "step": 15831 }, { "epoch": 0.8543521666396848, "grad_norm": 0.9480551003777602, "learning_rate": 1.4629134711401571e-06, "loss": 0.4911, "step": 15832 }, { "epoch": 0.8544061302681992, "grad_norm": 0.8342078927967626, "learning_rate": 1.4625765090840612e-06, "loss": 0.3804, "step": 15833 }, { "epoch": 0.8544600938967136, "grad_norm": 1.0866599174190996, "learning_rate": 1.462239663067596e-06, "loss": 0.4724, "step": 15834 }, { "epoch": 0.854514057525228, "grad_norm": 0.9327010145077209, "learning_rate": 1.4619029331004423e-06, "loss": 0.4429, "step": 15835 }, { "epoch": 0.8545680211537424, "grad_norm": 1.2182815759290246, "learning_rate": 1.4615663191922788e-06, "loss": 0.4346, "step": 15836 }, { "epoch": 0.8546219847822567, "grad_norm": 1.1245294249652618, "learning_rate": 1.4612298213527807e-06, "loss": 0.4539, "step": 15837 }, { "epoch": 0.8546759484107711, "grad_norm": 0.8170143521439835, "learning_rate": 1.4608934395916174e-06, "loss": 0.339, "step": 15838 }, { "epoch": 0.8547299120392855, "grad_norm": 1.0323241354808055, "learning_rate": 1.4605571739184582e-06, "loss": 0.6143, "step": 15839 }, { "epoch": 0.8547838756677999, "grad_norm": 0.9540524375503299, "learning_rate": 1.460221024342967e-06, "loss": 0.3567, "step": 15840 }, { "epoch": 0.8548378392963143, "grad_norm": 1.2224515217428633, "learning_rate": 1.4598849908748059e-06, "loss": 0.5034, "step": 15841 }, { "epoch": 0.8548918029248287, "grad_norm": 1.1314576200969684, "learning_rate": 1.459549073523632e-06, "loss": 0.3654, "step": 15842 }, { "epoch": 0.8549457665533431, "grad_norm": 0.9541611977051758, "learning_rate": 1.4592132722991007e-06, "loss": 0.3485, "step": 15843 }, { "epoch": 0.8549997301818574, "grad_norm": 1.0771320473297965, "learning_rate": 1.4588775872108634e-06, "loss": 0.4654, "step": 15844 }, { "epoch": 0.8550536938103718, "grad_norm": 1.1030949693992358, "learning_rate": 1.4585420182685669e-06, "loss": 0.4748, "step": 15845 }, { "epoch": 0.8551076574388862, "grad_norm": 1.4379408497840662, "learning_rate": 1.4582065654818561e-06, "loss": 0.6368, "step": 15846 }, { "epoch": 0.8551616210674006, "grad_norm": 1.255951601812409, "learning_rate": 1.4578712288603727e-06, "loss": 0.4439, "step": 15847 }, { "epoch": 0.855215584695915, "grad_norm": 1.081231307300269, "learning_rate": 1.4575360084137542e-06, "loss": 0.6327, "step": 15848 }, { "epoch": 0.8552695483244294, "grad_norm": 0.8629403049634136, "learning_rate": 1.4572009041516357e-06, "loss": 0.3976, "step": 15849 }, { "epoch": 0.8553235119529438, "grad_norm": 0.9276008678244068, "learning_rate": 1.456865916083648e-06, "loss": 0.3878, "step": 15850 }, { "epoch": 0.855377475581458, "grad_norm": 1.047688622786715, "learning_rate": 1.4565310442194193e-06, "loss": 0.52, "step": 15851 }, { "epoch": 0.8554314392099724, "grad_norm": 0.9749084213510643, "learning_rate": 1.456196288568574e-06, "loss": 0.4235, "step": 15852 }, { "epoch": 0.8554854028384868, "grad_norm": 0.8992123251891002, "learning_rate": 1.4558616491407335e-06, "loss": 0.3438, "step": 15853 }, { "epoch": 0.8555393664670012, "grad_norm": 1.0667766303489499, "learning_rate": 1.455527125945515e-06, "loss": 0.3558, "step": 15854 }, { "epoch": 0.8555933300955156, "grad_norm": 0.854030670602688, "learning_rate": 1.455192718992534e-06, "loss": 0.3772, "step": 15855 }, { "epoch": 0.85564729372403, "grad_norm": 0.9802884654391336, "learning_rate": 1.4548584282914014e-06, "loss": 0.5382, "step": 15856 }, { "epoch": 0.8557012573525444, "grad_norm": 0.8981881561280495, "learning_rate": 1.454524253851724e-06, "loss": 0.5086, "step": 15857 }, { "epoch": 0.8557552209810587, "grad_norm": 0.9353584485033251, "learning_rate": 1.4541901956831089e-06, "loss": 0.3782, "step": 15858 }, { "epoch": 0.8558091846095731, "grad_norm": 1.1259992995224728, "learning_rate": 1.4538562537951545e-06, "loss": 0.4391, "step": 15859 }, { "epoch": 0.8558631482380875, "grad_norm": 1.181240710942633, "learning_rate": 1.4535224281974597e-06, "loss": 0.5302, "step": 15860 }, { "epoch": 0.8559171118666019, "grad_norm": 1.0176253549943677, "learning_rate": 1.4531887188996187e-06, "loss": 0.3315, "step": 15861 }, { "epoch": 0.8559710754951163, "grad_norm": 1.08460360291916, "learning_rate": 1.4528551259112233e-06, "loss": 0.4559, "step": 15862 }, { "epoch": 0.8560250391236307, "grad_norm": 0.9684425954219358, "learning_rate": 1.4525216492418607e-06, "loss": 0.3626, "step": 15863 }, { "epoch": 0.8560790027521451, "grad_norm": 0.7642836153530456, "learning_rate": 1.4521882889011157e-06, "loss": 0.3131, "step": 15864 }, { "epoch": 0.8561329663806594, "grad_norm": 0.829008815355002, "learning_rate": 1.4518550448985702e-06, "loss": 0.3249, "step": 15865 }, { "epoch": 0.8561869300091738, "grad_norm": 1.0349547029256545, "learning_rate": 1.4515219172438002e-06, "loss": 0.4868, "step": 15866 }, { "epoch": 0.8562408936376882, "grad_norm": 1.0132757867236892, "learning_rate": 1.4511889059463807e-06, "loss": 0.5031, "step": 15867 }, { "epoch": 0.8562948572662026, "grad_norm": 1.0921997922649795, "learning_rate": 1.450856011015883e-06, "loss": 0.5425, "step": 15868 }, { "epoch": 0.856348820894717, "grad_norm": 1.050629999246534, "learning_rate": 1.4505232324618758e-06, "loss": 0.4621, "step": 15869 }, { "epoch": 0.8564027845232314, "grad_norm": 1.0932315655026048, "learning_rate": 1.4501905702939215e-06, "loss": 0.5528, "step": 15870 }, { "epoch": 0.8564567481517458, "grad_norm": 1.0359276172153218, "learning_rate": 1.4498580245215828e-06, "loss": 0.4534, "step": 15871 }, { "epoch": 0.8565107117802601, "grad_norm": 1.0163036056099466, "learning_rate": 1.4495255951544178e-06, "loss": 0.3357, "step": 15872 }, { "epoch": 0.8565646754087745, "grad_norm": 1.0669617782809329, "learning_rate": 1.4491932822019786e-06, "loss": 0.4171, "step": 15873 }, { "epoch": 0.8566186390372889, "grad_norm": 0.8592681192155163, "learning_rate": 1.448861085673818e-06, "loss": 0.3333, "step": 15874 }, { "epoch": 0.8566726026658033, "grad_norm": 1.1252787145913525, "learning_rate": 1.4485290055794832e-06, "loss": 0.5327, "step": 15875 }, { "epoch": 0.8567265662943176, "grad_norm": 0.8725678219792498, "learning_rate": 1.4481970419285179e-06, "loss": 0.4063, "step": 15876 }, { "epoch": 0.856780529922832, "grad_norm": 1.109102577345782, "learning_rate": 1.447865194730464e-06, "loss": 0.5195, "step": 15877 }, { "epoch": 0.8568344935513464, "grad_norm": 1.0604682812137793, "learning_rate": 1.4475334639948587e-06, "loss": 0.4866, "step": 15878 }, { "epoch": 0.8568884571798607, "grad_norm": 1.1563847223817985, "learning_rate": 1.4472018497312373e-06, "loss": 0.4441, "step": 15879 }, { "epoch": 0.8569424208083751, "grad_norm": 1.0909792206969944, "learning_rate": 1.446870351949129e-06, "loss": 0.5326, "step": 15880 }, { "epoch": 0.8569963844368895, "grad_norm": 1.2026644556031447, "learning_rate": 1.4465389706580624e-06, "loss": 0.5225, "step": 15881 }, { "epoch": 0.8570503480654039, "grad_norm": 0.997671093408529, "learning_rate": 1.446207705867561e-06, "loss": 0.3887, "step": 15882 }, { "epoch": 0.8571043116939183, "grad_norm": 0.9384232153161337, "learning_rate": 1.4458765575871465e-06, "loss": 0.5017, "step": 15883 }, { "epoch": 0.8571582753224327, "grad_norm": 0.9342080544699762, "learning_rate": 1.445545525826336e-06, "loss": 0.418, "step": 15884 }, { "epoch": 0.8572122389509471, "grad_norm": 1.0689499785096086, "learning_rate": 1.4452146105946438e-06, "loss": 0.4425, "step": 15885 }, { "epoch": 0.8572662025794614, "grad_norm": 1.1703820126289595, "learning_rate": 1.4448838119015814e-06, "loss": 0.5747, "step": 15886 }, { "epoch": 0.8573201662079758, "grad_norm": 1.024961206266867, "learning_rate": 1.444553129756655e-06, "loss": 0.4594, "step": 15887 }, { "epoch": 0.8573741298364902, "grad_norm": 1.0893436851946077, "learning_rate": 1.4442225641693697e-06, "loss": 0.4369, "step": 15888 }, { "epoch": 0.8574280934650046, "grad_norm": 1.0949898289595388, "learning_rate": 1.4438921151492258e-06, "loss": 0.3972, "step": 15889 }, { "epoch": 0.857482057093519, "grad_norm": 0.8469801304065431, "learning_rate": 1.4435617827057204e-06, "loss": 0.3808, "step": 15890 }, { "epoch": 0.8575360207220334, "grad_norm": 1.0605278101920335, "learning_rate": 1.4432315668483488e-06, "loss": 0.4227, "step": 15891 }, { "epoch": 0.8575899843505478, "grad_norm": 1.2457192297941235, "learning_rate": 1.4429014675866007e-06, "loss": 0.6217, "step": 15892 }, { "epoch": 0.8576439479790621, "grad_norm": 1.025495169455793, "learning_rate": 1.442571484929965e-06, "loss": 0.3927, "step": 15893 }, { "epoch": 0.8576979116075765, "grad_norm": 0.955404595589251, "learning_rate": 1.442241618887923e-06, "loss": 0.3838, "step": 15894 }, { "epoch": 0.8577518752360909, "grad_norm": 1.1734594831744913, "learning_rate": 1.4419118694699575e-06, "loss": 0.4931, "step": 15895 }, { "epoch": 0.8578058388646053, "grad_norm": 1.1938011189629854, "learning_rate": 1.4415822366855453e-06, "loss": 0.6036, "step": 15896 }, { "epoch": 0.8578598024931197, "grad_norm": 1.152551952517387, "learning_rate": 1.44125272054416e-06, "loss": 0.5432, "step": 15897 }, { "epoch": 0.8579137661216341, "grad_norm": 1.06589876494821, "learning_rate": 1.4409233210552728e-06, "loss": 0.4612, "step": 15898 }, { "epoch": 0.8579677297501483, "grad_norm": 0.9825946619655737, "learning_rate": 1.4405940382283505e-06, "loss": 0.4093, "step": 15899 }, { "epoch": 0.8580216933786627, "grad_norm": 1.097304330679007, "learning_rate": 1.4402648720728574e-06, "loss": 0.5552, "step": 15900 }, { "epoch": 0.8580756570071771, "grad_norm": 0.9962777717849847, "learning_rate": 1.439935822598254e-06, "loss": 0.3911, "step": 15901 }, { "epoch": 0.8581296206356915, "grad_norm": 0.9406688914542373, "learning_rate": 1.4396068898139979e-06, "loss": 0.3552, "step": 15902 }, { "epoch": 0.8581835842642059, "grad_norm": 1.2208059315184703, "learning_rate": 1.4392780737295418e-06, "loss": 0.5402, "step": 15903 }, { "epoch": 0.8582375478927203, "grad_norm": 1.1359782079116498, "learning_rate": 1.4389493743543379e-06, "loss": 0.4671, "step": 15904 }, { "epoch": 0.8582915115212347, "grad_norm": 1.009861463843404, "learning_rate": 1.4386207916978317e-06, "loss": 0.4471, "step": 15905 }, { "epoch": 0.858345475149749, "grad_norm": 0.9056339857402622, "learning_rate": 1.4382923257694679e-06, "loss": 0.39, "step": 15906 }, { "epoch": 0.8583994387782634, "grad_norm": 0.9386697005872333, "learning_rate": 1.4379639765786866e-06, "loss": 0.3542, "step": 15907 }, { "epoch": 0.8584534024067778, "grad_norm": 0.994111870969634, "learning_rate": 1.4376357441349248e-06, "loss": 0.3975, "step": 15908 }, { "epoch": 0.8585073660352922, "grad_norm": 1.0369749692462495, "learning_rate": 1.4373076284476164e-06, "loss": 0.3246, "step": 15909 }, { "epoch": 0.8585613296638066, "grad_norm": 0.984069845666785, "learning_rate": 1.4369796295261918e-06, "loss": 0.4683, "step": 15910 }, { "epoch": 0.858615293292321, "grad_norm": 0.9208427184139549, "learning_rate": 1.4366517473800782e-06, "loss": 0.3794, "step": 15911 }, { "epoch": 0.8586692569208354, "grad_norm": 0.9870178500037841, "learning_rate": 1.4363239820186985e-06, "loss": 0.3571, "step": 15912 }, { "epoch": 0.8587232205493497, "grad_norm": 1.1506313222705482, "learning_rate": 1.4359963334514754e-06, "loss": 0.4619, "step": 15913 }, { "epoch": 0.8587771841778641, "grad_norm": 0.9169992119429607, "learning_rate": 1.4356688016878226e-06, "loss": 0.3198, "step": 15914 }, { "epoch": 0.8588311478063785, "grad_norm": 1.1369237681324578, "learning_rate": 1.4353413867371557e-06, "loss": 0.5378, "step": 15915 }, { "epoch": 0.8588851114348929, "grad_norm": 0.9847893228974146, "learning_rate": 1.4350140886088836e-06, "loss": 0.4116, "step": 15916 }, { "epoch": 0.8589390750634073, "grad_norm": 1.1364652917023306, "learning_rate": 1.4346869073124148e-06, "loss": 0.5708, "step": 15917 }, { "epoch": 0.8589930386919217, "grad_norm": 0.8638220438489722, "learning_rate": 1.4343598428571514e-06, "loss": 0.3193, "step": 15918 }, { "epoch": 0.8590470023204361, "grad_norm": 0.9476155888701241, "learning_rate": 1.434032895252494e-06, "loss": 0.4945, "step": 15919 }, { "epoch": 0.8591009659489504, "grad_norm": 1.1576573645855195, "learning_rate": 1.433706064507841e-06, "loss": 0.4854, "step": 15920 }, { "epoch": 0.8591549295774648, "grad_norm": 0.9565109131368867, "learning_rate": 1.4333793506325832e-06, "loss": 0.3604, "step": 15921 }, { "epoch": 0.8592088932059792, "grad_norm": 0.9728394462776149, "learning_rate": 1.4330527536361121e-06, "loss": 0.4729, "step": 15922 }, { "epoch": 0.8592628568344935, "grad_norm": 1.0614510301418998, "learning_rate": 1.4327262735278142e-06, "loss": 0.5868, "step": 15923 }, { "epoch": 0.859316820463008, "grad_norm": 1.0824139467197467, "learning_rate": 1.432399910317072e-06, "loss": 0.3664, "step": 15924 }, { "epoch": 0.8593707840915223, "grad_norm": 1.0083672502427832, "learning_rate": 1.4320736640132673e-06, "loss": 0.3831, "step": 15925 }, { "epoch": 0.8594247477200367, "grad_norm": 0.9864891861566125, "learning_rate": 1.4317475346257755e-06, "loss": 0.4938, "step": 15926 }, { "epoch": 0.859478711348551, "grad_norm": 1.447390308701108, "learning_rate": 1.4314215221639715e-06, "loss": 0.5771, "step": 15927 }, { "epoch": 0.8595326749770654, "grad_norm": 0.9279244200635454, "learning_rate": 1.4310956266372228e-06, "loss": 0.347, "step": 15928 }, { "epoch": 0.8595866386055798, "grad_norm": 1.081968419002428, "learning_rate": 1.430769848054897e-06, "loss": 0.42, "step": 15929 }, { "epoch": 0.8596406022340942, "grad_norm": 0.76770282858641, "learning_rate": 1.430444186426358e-06, "loss": 0.3927, "step": 15930 }, { "epoch": 0.8596945658626086, "grad_norm": 0.9855381777597689, "learning_rate": 1.4301186417609645e-06, "loss": 0.4205, "step": 15931 }, { "epoch": 0.859748529491123, "grad_norm": 1.0318940910107028, "learning_rate": 1.429793214068073e-06, "loss": 0.4803, "step": 15932 }, { "epoch": 0.8598024931196374, "grad_norm": 0.9699503935014682, "learning_rate": 1.4294679033570383e-06, "loss": 0.4068, "step": 15933 }, { "epoch": 0.8598564567481517, "grad_norm": 1.2192412971609141, "learning_rate": 1.4291427096372096e-06, "loss": 0.5045, "step": 15934 }, { "epoch": 0.8599104203766661, "grad_norm": 1.0672920101549561, "learning_rate": 1.4288176329179315e-06, "loss": 0.4336, "step": 15935 }, { "epoch": 0.8599643840051805, "grad_norm": 1.0343209846855994, "learning_rate": 1.4284926732085486e-06, "loss": 0.46, "step": 15936 }, { "epoch": 0.8600183476336949, "grad_norm": 1.058777102408671, "learning_rate": 1.4281678305184002e-06, "loss": 0.4421, "step": 15937 }, { "epoch": 0.8600723112622093, "grad_norm": 0.9647120422009714, "learning_rate": 1.4278431048568225e-06, "loss": 0.4214, "step": 15938 }, { "epoch": 0.8601262748907237, "grad_norm": 1.4331258408884076, "learning_rate": 1.4275184962331486e-06, "loss": 0.6528, "step": 15939 }, { "epoch": 0.8601802385192381, "grad_norm": 1.121417448114517, "learning_rate": 1.427194004656708e-06, "loss": 0.4845, "step": 15940 }, { "epoch": 0.8602342021477524, "grad_norm": 0.9651501684627006, "learning_rate": 1.4268696301368278e-06, "loss": 0.5314, "step": 15941 }, { "epoch": 0.8602881657762668, "grad_norm": 1.145268699570462, "learning_rate": 1.4265453726828293e-06, "loss": 0.4915, "step": 15942 }, { "epoch": 0.8603421294047812, "grad_norm": 0.9791759234874867, "learning_rate": 1.4262212323040325e-06, "loss": 0.3752, "step": 15943 }, { "epoch": 0.8603960930332956, "grad_norm": 0.9051315390867263, "learning_rate": 1.4258972090097541e-06, "loss": 0.3417, "step": 15944 }, { "epoch": 0.86045005666181, "grad_norm": 1.0471410376825796, "learning_rate": 1.4255733028093066e-06, "loss": 0.4429, "step": 15945 }, { "epoch": 0.8605040202903244, "grad_norm": 1.1850548336388953, "learning_rate": 1.425249513711999e-06, "loss": 0.4425, "step": 15946 }, { "epoch": 0.8605579839188388, "grad_norm": 1.074197136046372, "learning_rate": 1.4249258417271373e-06, "loss": 0.5196, "step": 15947 }, { "epoch": 0.860611947547353, "grad_norm": 1.0162868048902367, "learning_rate": 1.4246022868640258e-06, "loss": 0.4103, "step": 15948 }, { "epoch": 0.8606659111758674, "grad_norm": 0.9348489496610543, "learning_rate": 1.4242788491319615e-06, "loss": 0.4252, "step": 15949 }, { "epoch": 0.8607198748043818, "grad_norm": 1.2017771273442426, "learning_rate": 1.4239555285402409e-06, "loss": 0.5225, "step": 15950 }, { "epoch": 0.8607738384328962, "grad_norm": 1.0507939921817726, "learning_rate": 1.4236323250981576e-06, "loss": 0.4735, "step": 15951 }, { "epoch": 0.8608278020614106, "grad_norm": 0.984977330016026, "learning_rate": 1.4233092388149997e-06, "loss": 0.4511, "step": 15952 }, { "epoch": 0.860881765689925, "grad_norm": 1.2144885822169362, "learning_rate": 1.4229862697000535e-06, "loss": 0.4916, "step": 15953 }, { "epoch": 0.8609357293184394, "grad_norm": 1.0438250539544476, "learning_rate": 1.4226634177626017e-06, "loss": 0.3664, "step": 15954 }, { "epoch": 0.8609896929469537, "grad_norm": 1.0744890804873033, "learning_rate": 1.4223406830119226e-06, "loss": 0.4929, "step": 15955 }, { "epoch": 0.8610436565754681, "grad_norm": 0.9204804239178237, "learning_rate": 1.4220180654572933e-06, "loss": 0.3916, "step": 15956 }, { "epoch": 0.8610976202039825, "grad_norm": 1.2693343315533419, "learning_rate": 1.4216955651079844e-06, "loss": 0.4356, "step": 15957 }, { "epoch": 0.8611515838324969, "grad_norm": 1.033125623539115, "learning_rate": 1.4213731819732661e-06, "loss": 0.5017, "step": 15958 }, { "epoch": 0.8612055474610113, "grad_norm": 1.129121797181992, "learning_rate": 1.4210509160624042e-06, "loss": 0.5605, "step": 15959 }, { "epoch": 0.8612595110895257, "grad_norm": 1.2563254114625597, "learning_rate": 1.4207287673846598e-06, "loss": 0.6091, "step": 15960 }, { "epoch": 0.8613134747180401, "grad_norm": 1.2560325420972729, "learning_rate": 1.4204067359492929e-06, "loss": 0.6504, "step": 15961 }, { "epoch": 0.8613674383465544, "grad_norm": 1.0869344249827126, "learning_rate": 1.4200848217655595e-06, "loss": 0.5154, "step": 15962 }, { "epoch": 0.8614214019750688, "grad_norm": 1.0487511700831937, "learning_rate": 1.4197630248427091e-06, "loss": 0.5328, "step": 15963 }, { "epoch": 0.8614753656035832, "grad_norm": 1.1312728803623147, "learning_rate": 1.4194413451899927e-06, "loss": 0.4699, "step": 15964 }, { "epoch": 0.8615293292320976, "grad_norm": 1.045843582259091, "learning_rate": 1.4191197828166548e-06, "loss": 0.4534, "step": 15965 }, { "epoch": 0.861583292860612, "grad_norm": 0.9374846363756233, "learning_rate": 1.4187983377319384e-06, "loss": 0.4319, "step": 15966 }, { "epoch": 0.8616372564891264, "grad_norm": 0.9224466198078187, "learning_rate": 1.4184770099450812e-06, "loss": 0.3596, "step": 15967 }, { "epoch": 0.8616912201176407, "grad_norm": 0.7949052251616, "learning_rate": 1.4181557994653191e-06, "loss": 0.3517, "step": 15968 }, { "epoch": 0.861745183746155, "grad_norm": 0.8976535165002293, "learning_rate": 1.4178347063018837e-06, "loss": 0.3541, "step": 15969 }, { "epoch": 0.8617991473746694, "grad_norm": 1.059221410318111, "learning_rate": 1.4175137304640032e-06, "loss": 0.4538, "step": 15970 }, { "epoch": 0.8618531110031838, "grad_norm": 1.1811230115637241, "learning_rate": 1.4171928719609034e-06, "loss": 0.599, "step": 15971 }, { "epoch": 0.8619070746316982, "grad_norm": 0.8752475460601022, "learning_rate": 1.4168721308018055e-06, "loss": 0.3394, "step": 15972 }, { "epoch": 0.8619610382602126, "grad_norm": 1.2339807609986468, "learning_rate": 1.4165515069959284e-06, "loss": 0.6023, "step": 15973 }, { "epoch": 0.862015001888727, "grad_norm": 1.2078786419694487, "learning_rate": 1.416231000552487e-06, "loss": 0.5201, "step": 15974 }, { "epoch": 0.8620689655172413, "grad_norm": 1.0615114035869715, "learning_rate": 1.4159106114806943e-06, "loss": 0.4179, "step": 15975 }, { "epoch": 0.8621229291457557, "grad_norm": 1.0768319746046544, "learning_rate": 1.4155903397897558e-06, "loss": 0.3841, "step": 15976 }, { "epoch": 0.8621768927742701, "grad_norm": 0.9069604757752442, "learning_rate": 1.4152701854888782e-06, "loss": 0.3718, "step": 15977 }, { "epoch": 0.8622308564027845, "grad_norm": 1.2947096656716894, "learning_rate": 1.4149501485872628e-06, "loss": 0.5341, "step": 15978 }, { "epoch": 0.8622848200312989, "grad_norm": 1.0154531319501023, "learning_rate": 1.4146302290941075e-06, "loss": 0.4093, "step": 15979 }, { "epoch": 0.8623387836598133, "grad_norm": 0.846573145614561, "learning_rate": 1.4143104270186076e-06, "loss": 0.4138, "step": 15980 }, { "epoch": 0.8623927472883277, "grad_norm": 0.961360587893776, "learning_rate": 1.413990742369954e-06, "loss": 0.4016, "step": 15981 }, { "epoch": 0.862446710916842, "grad_norm": 1.123183681422894, "learning_rate": 1.4136711751573362e-06, "loss": 0.4834, "step": 15982 }, { "epoch": 0.8625006745453564, "grad_norm": 1.0260146284690133, "learning_rate": 1.4133517253899373e-06, "loss": 0.377, "step": 15983 }, { "epoch": 0.8625546381738708, "grad_norm": 0.8427135377080797, "learning_rate": 1.4130323930769383e-06, "loss": 0.3091, "step": 15984 }, { "epoch": 0.8626086018023852, "grad_norm": 1.1817841602821046, "learning_rate": 1.4127131782275188e-06, "loss": 0.444, "step": 15985 }, { "epoch": 0.8626625654308996, "grad_norm": 1.247391221336531, "learning_rate": 1.4123940808508517e-06, "loss": 0.6012, "step": 15986 }, { "epoch": 0.862716529059414, "grad_norm": 0.9923787974961044, "learning_rate": 1.4120751009561095e-06, "loss": 0.4112, "step": 15987 }, { "epoch": 0.8627704926879284, "grad_norm": 1.066839641643829, "learning_rate": 1.4117562385524589e-06, "loss": 0.513, "step": 15988 }, { "epoch": 0.8628244563164427, "grad_norm": 0.9755266380696296, "learning_rate": 1.411437493649066e-06, "loss": 0.4549, "step": 15989 }, { "epoch": 0.8628784199449571, "grad_norm": 0.9984450423966413, "learning_rate": 1.4111188662550899e-06, "loss": 0.5047, "step": 15990 }, { "epoch": 0.8629323835734715, "grad_norm": 0.731918722567138, "learning_rate": 1.4108003563796888e-06, "loss": 0.2404, "step": 15991 }, { "epoch": 0.8629863472019859, "grad_norm": 1.018612702173676, "learning_rate": 1.4104819640320175e-06, "loss": 0.421, "step": 15992 }, { "epoch": 0.8630403108305003, "grad_norm": 0.8102219016189661, "learning_rate": 1.410163689221227e-06, "loss": 0.2974, "step": 15993 }, { "epoch": 0.8630942744590147, "grad_norm": 0.9865223773135915, "learning_rate": 1.409845531956464e-06, "loss": 0.4049, "step": 15994 }, { "epoch": 0.863148238087529, "grad_norm": 1.1540373982572925, "learning_rate": 1.4095274922468735e-06, "loss": 0.6225, "step": 15995 }, { "epoch": 0.8632022017160433, "grad_norm": 1.131291556117137, "learning_rate": 1.4092095701015967e-06, "loss": 0.4926, "step": 15996 }, { "epoch": 0.8632561653445577, "grad_norm": 0.9767646726005362, "learning_rate": 1.4088917655297698e-06, "loss": 0.5098, "step": 15997 }, { "epoch": 0.8633101289730721, "grad_norm": 1.2586348700649965, "learning_rate": 1.4085740785405272e-06, "loss": 0.4799, "step": 15998 }, { "epoch": 0.8633640926015865, "grad_norm": 1.203479650304201, "learning_rate": 1.4082565091429995e-06, "loss": 0.6744, "step": 15999 }, { "epoch": 0.8634180562301009, "grad_norm": 0.8379793734539012, "learning_rate": 1.407939057346314e-06, "loss": 0.33, "step": 16000 }, { "epoch": 0.8634180562301009, "eval_loss": 0.530173659324646, "eval_runtime": 159.5405, "eval_samples_per_second": 21.556, "eval_steps_per_second": 0.903, "step": 16000 }, { "epoch": 0.8634720198586153, "grad_norm": 0.887453276793935, "learning_rate": 1.4076217231595952e-06, "loss": 0.3317, "step": 16001 }, { "epoch": 0.8635259834871297, "grad_norm": 1.0613328518457446, "learning_rate": 1.4073045065919626e-06, "loss": 0.4116, "step": 16002 }, { "epoch": 0.863579947115644, "grad_norm": 0.94971006500543, "learning_rate": 1.4069874076525345e-06, "loss": 0.474, "step": 16003 }, { "epoch": 0.8636339107441584, "grad_norm": 1.0642828222655067, "learning_rate": 1.4066704263504236e-06, "loss": 0.5063, "step": 16004 }, { "epoch": 0.8636878743726728, "grad_norm": 0.968726643612459, "learning_rate": 1.4063535626947405e-06, "loss": 0.487, "step": 16005 }, { "epoch": 0.8637418380011872, "grad_norm": 0.8758106860713164, "learning_rate": 1.4060368166945925e-06, "loss": 0.5536, "step": 16006 }, { "epoch": 0.8637958016297016, "grad_norm": 1.0617495382858972, "learning_rate": 1.4057201883590832e-06, "loss": 0.4976, "step": 16007 }, { "epoch": 0.863849765258216, "grad_norm": 1.040591413242043, "learning_rate": 1.4054036776973123e-06, "loss": 0.5468, "step": 16008 }, { "epoch": 0.8639037288867304, "grad_norm": 1.084507076859897, "learning_rate": 1.4050872847183773e-06, "loss": 0.4374, "step": 16009 }, { "epoch": 0.8639576925152447, "grad_norm": 1.0125501252786353, "learning_rate": 1.4047710094313721e-06, "loss": 0.4792, "step": 16010 }, { "epoch": 0.8640116561437591, "grad_norm": 1.1550284623050406, "learning_rate": 1.404454851845385e-06, "loss": 0.5422, "step": 16011 }, { "epoch": 0.8640656197722735, "grad_norm": 1.0653542174368869, "learning_rate": 1.4041388119695034e-06, "loss": 0.4778, "step": 16012 }, { "epoch": 0.8641195834007879, "grad_norm": 1.0171155384734847, "learning_rate": 1.4038228898128115e-06, "loss": 0.4549, "step": 16013 }, { "epoch": 0.8641735470293023, "grad_norm": 0.9157559560410936, "learning_rate": 1.4035070853843886e-06, "loss": 0.342, "step": 16014 }, { "epoch": 0.8642275106578167, "grad_norm": 0.9719171588895529, "learning_rate": 1.4031913986933112e-06, "loss": 0.4759, "step": 16015 }, { "epoch": 0.8642814742863311, "grad_norm": 0.771018276778794, "learning_rate": 1.4028758297486524e-06, "loss": 0.2712, "step": 16016 }, { "epoch": 0.8643354379148454, "grad_norm": 1.0295528936070981, "learning_rate": 1.4025603785594826e-06, "loss": 0.4352, "step": 16017 }, { "epoch": 0.8643894015433597, "grad_norm": 0.819469310851545, "learning_rate": 1.4022450451348679e-06, "loss": 0.3663, "step": 16018 }, { "epoch": 0.8644433651718741, "grad_norm": 0.92170869806711, "learning_rate": 1.40192982948387e-06, "loss": 0.4241, "step": 16019 }, { "epoch": 0.8644973288003885, "grad_norm": 1.007724800725122, "learning_rate": 1.40161473161555e-06, "loss": 0.4388, "step": 16020 }, { "epoch": 0.8645512924289029, "grad_norm": 1.086398251259194, "learning_rate": 1.401299751538964e-06, "loss": 0.6251, "step": 16021 }, { "epoch": 0.8646052560574173, "grad_norm": 0.9887508220337071, "learning_rate": 1.400984889263164e-06, "loss": 0.4632, "step": 16022 }, { "epoch": 0.8646592196859317, "grad_norm": 1.1253776939820697, "learning_rate": 1.4006701447972004e-06, "loss": 0.4686, "step": 16023 }, { "epoch": 0.864713183314446, "grad_norm": 0.8844839621485916, "learning_rate": 1.40035551815012e-06, "loss": 0.3742, "step": 16024 }, { "epoch": 0.8647671469429604, "grad_norm": 1.0639229825836636, "learning_rate": 1.4000410093309632e-06, "loss": 0.4578, "step": 16025 }, { "epoch": 0.8648211105714748, "grad_norm": 1.0013419126193146, "learning_rate": 1.3997266183487704e-06, "loss": 0.3754, "step": 16026 }, { "epoch": 0.8648750741999892, "grad_norm": 1.290661101417018, "learning_rate": 1.399412345212578e-06, "loss": 0.567, "step": 16027 }, { "epoch": 0.8649290378285036, "grad_norm": 0.9474516112739214, "learning_rate": 1.399098189931418e-06, "loss": 0.4514, "step": 16028 }, { "epoch": 0.864983001457018, "grad_norm": 0.955052883263688, "learning_rate": 1.3987841525143192e-06, "loss": 0.4875, "step": 16029 }, { "epoch": 0.8650369650855324, "grad_norm": 1.079703992629002, "learning_rate": 1.398470232970308e-06, "loss": 0.6401, "step": 16030 }, { "epoch": 0.8650909287140467, "grad_norm": 0.993149832037558, "learning_rate": 1.398156431308408e-06, "loss": 0.4345, "step": 16031 }, { "epoch": 0.8651448923425611, "grad_norm": 1.1694787954590027, "learning_rate": 1.3978427475376355e-06, "loss": 0.5396, "step": 16032 }, { "epoch": 0.8651988559710755, "grad_norm": 0.8604624256722553, "learning_rate": 1.3975291816670078e-06, "loss": 0.3088, "step": 16033 }, { "epoch": 0.8652528195995899, "grad_norm": 1.0829544121259367, "learning_rate": 1.3972157337055365e-06, "loss": 0.6836, "step": 16034 }, { "epoch": 0.8653067832281043, "grad_norm": 1.1240740643668043, "learning_rate": 1.3969024036622303e-06, "loss": 0.4721, "step": 16035 }, { "epoch": 0.8653607468566187, "grad_norm": 1.0640674801608045, "learning_rate": 1.3965891915460946e-06, "loss": 0.5535, "step": 16036 }, { "epoch": 0.865414710485133, "grad_norm": 0.9907022767114316, "learning_rate": 1.3962760973661333e-06, "loss": 0.4608, "step": 16037 }, { "epoch": 0.8654686741136474, "grad_norm": 1.2247088910889385, "learning_rate": 1.395963121131342e-06, "loss": 0.4413, "step": 16038 }, { "epoch": 0.8655226377421618, "grad_norm": 1.009051020017518, "learning_rate": 1.395650262850718e-06, "loss": 0.4979, "step": 16039 }, { "epoch": 0.8655766013706762, "grad_norm": 1.1772209284283381, "learning_rate": 1.3953375225332519e-06, "loss": 0.5238, "step": 16040 }, { "epoch": 0.8656305649991906, "grad_norm": 1.1297710078642407, "learning_rate": 1.3950249001879334e-06, "loss": 0.505, "step": 16041 }, { "epoch": 0.865684528627705, "grad_norm": 1.1385790350210376, "learning_rate": 1.3947123958237469e-06, "loss": 0.5596, "step": 16042 }, { "epoch": 0.8657384922562193, "grad_norm": 0.9890371350841766, "learning_rate": 1.394400009449674e-06, "loss": 0.4273, "step": 16043 }, { "epoch": 0.8657924558847336, "grad_norm": 0.9737856023223564, "learning_rate": 1.3940877410746948e-06, "loss": 0.4287, "step": 16044 }, { "epoch": 0.865846419513248, "grad_norm": 0.9781804679410638, "learning_rate": 1.393775590707781e-06, "loss": 0.4705, "step": 16045 }, { "epoch": 0.8659003831417624, "grad_norm": 1.0263800575123965, "learning_rate": 1.3934635583579062e-06, "loss": 0.4649, "step": 16046 }, { "epoch": 0.8659543467702768, "grad_norm": 1.0996046726559146, "learning_rate": 1.3931516440340382e-06, "loss": 0.4406, "step": 16047 }, { "epoch": 0.8660083103987912, "grad_norm": 1.1046343841556652, "learning_rate": 1.392839847745141e-06, "loss": 0.4062, "step": 16048 }, { "epoch": 0.8660622740273056, "grad_norm": 0.8652012530827887, "learning_rate": 1.3925281695001772e-06, "loss": 0.3848, "step": 16049 }, { "epoch": 0.86611623765582, "grad_norm": 0.9605319806712386, "learning_rate": 1.392216609308104e-06, "loss": 0.4347, "step": 16050 }, { "epoch": 0.8661702012843343, "grad_norm": 1.1102586264780712, "learning_rate": 1.3919051671778758e-06, "loss": 0.6059, "step": 16051 }, { "epoch": 0.8662241649128487, "grad_norm": 0.8940590130358088, "learning_rate": 1.3915938431184442e-06, "loss": 0.4109, "step": 16052 }, { "epoch": 0.8662781285413631, "grad_norm": 1.1791815894272986, "learning_rate": 1.3912826371387575e-06, "loss": 0.5774, "step": 16053 }, { "epoch": 0.8663320921698775, "grad_norm": 1.0620188208551782, "learning_rate": 1.3909715492477588e-06, "loss": 0.4613, "step": 16054 }, { "epoch": 0.8663860557983919, "grad_norm": 1.1043702610923958, "learning_rate": 1.390660579454389e-06, "loss": 0.4283, "step": 16055 }, { "epoch": 0.8664400194269063, "grad_norm": 1.0162210616030949, "learning_rate": 1.390349727767587e-06, "loss": 0.6004, "step": 16056 }, { "epoch": 0.8664939830554207, "grad_norm": 0.997451131207885, "learning_rate": 1.3900389941962861e-06, "loss": 0.4671, "step": 16057 }, { "epoch": 0.866547946683935, "grad_norm": 1.0554052076574567, "learning_rate": 1.389728378749417e-06, "loss": 0.4872, "step": 16058 }, { "epoch": 0.8666019103124494, "grad_norm": 1.0251757289552526, "learning_rate": 1.3894178814359077e-06, "loss": 0.4916, "step": 16059 }, { "epoch": 0.8666558739409638, "grad_norm": 1.046077585448734, "learning_rate": 1.3891075022646822e-06, "loss": 0.431, "step": 16060 }, { "epoch": 0.8667098375694782, "grad_norm": 1.31230653298192, "learning_rate": 1.3887972412446608e-06, "loss": 0.6478, "step": 16061 }, { "epoch": 0.8667638011979926, "grad_norm": 1.0632964279311767, "learning_rate": 1.3884870983847604e-06, "loss": 0.4656, "step": 16062 }, { "epoch": 0.866817764826507, "grad_norm": 0.8854448388034084, "learning_rate": 1.3881770736938954e-06, "loss": 0.419, "step": 16063 }, { "epoch": 0.8668717284550214, "grad_norm": 1.0009328867494571, "learning_rate": 1.3878671671809759e-06, "loss": 0.436, "step": 16064 }, { "epoch": 0.8669256920835356, "grad_norm": 0.9921178278969827, "learning_rate": 1.3875573788549095e-06, "loss": 0.4074, "step": 16065 }, { "epoch": 0.86697965571205, "grad_norm": 1.156789898969116, "learning_rate": 1.387247708724599e-06, "loss": 0.4307, "step": 16066 }, { "epoch": 0.8670336193405644, "grad_norm": 0.860507172211298, "learning_rate": 1.386938156798945e-06, "loss": 0.4075, "step": 16067 }, { "epoch": 0.8670875829690788, "grad_norm": 1.1254649436321356, "learning_rate": 1.3866287230868445e-06, "loss": 0.5214, "step": 16068 }, { "epoch": 0.8671415465975932, "grad_norm": 1.118931643320334, "learning_rate": 1.3863194075971904e-06, "loss": 0.3571, "step": 16069 }, { "epoch": 0.8671955102261076, "grad_norm": 0.8595429709281893, "learning_rate": 1.3860102103388728e-06, "loss": 0.3689, "step": 16070 }, { "epoch": 0.867249473854622, "grad_norm": 0.9526729450411437, "learning_rate": 1.3857011313207791e-06, "loss": 0.3684, "step": 16071 }, { "epoch": 0.8673034374831363, "grad_norm": 1.0246077664936666, "learning_rate": 1.385392170551793e-06, "loss": 0.4082, "step": 16072 }, { "epoch": 0.8673574011116507, "grad_norm": 1.0617800257989989, "learning_rate": 1.3850833280407924e-06, "loss": 0.574, "step": 16073 }, { "epoch": 0.8674113647401651, "grad_norm": 0.9404674090415638, "learning_rate": 1.3847746037966541e-06, "loss": 0.4172, "step": 16074 }, { "epoch": 0.8674653283686795, "grad_norm": 1.124740125721108, "learning_rate": 1.384465997828253e-06, "loss": 0.5465, "step": 16075 }, { "epoch": 0.8675192919971939, "grad_norm": 1.136932845887837, "learning_rate": 1.384157510144457e-06, "loss": 0.5971, "step": 16076 }, { "epoch": 0.8675732556257083, "grad_norm": 0.9855516888648608, "learning_rate": 1.3838491407541329e-06, "loss": 0.4069, "step": 16077 }, { "epoch": 0.8676272192542227, "grad_norm": 0.9351424469033709, "learning_rate": 1.3835408896661437e-06, "loss": 0.321, "step": 16078 }, { "epoch": 0.867681182882737, "grad_norm": 1.0285498158706627, "learning_rate": 1.3832327568893496e-06, "loss": 0.4238, "step": 16079 }, { "epoch": 0.8677351465112514, "grad_norm": 1.0379522360000892, "learning_rate": 1.3829247424326044e-06, "loss": 0.4773, "step": 16080 }, { "epoch": 0.8677891101397658, "grad_norm": 0.939613253615824, "learning_rate": 1.382616846304763e-06, "loss": 0.388, "step": 16081 }, { "epoch": 0.8678430737682802, "grad_norm": 0.9539592372622561, "learning_rate": 1.3823090685146728e-06, "loss": 0.3699, "step": 16082 }, { "epoch": 0.8678970373967946, "grad_norm": 1.2394409833549738, "learning_rate": 1.3820014090711808e-06, "loss": 0.5191, "step": 16083 }, { "epoch": 0.867951001025309, "grad_norm": 0.9562219117111755, "learning_rate": 1.3816938679831299e-06, "loss": 0.4508, "step": 16084 }, { "epoch": 0.8680049646538234, "grad_norm": 0.9186425441389877, "learning_rate": 1.3813864452593578e-06, "loss": 0.4134, "step": 16085 }, { "epoch": 0.8680589282823377, "grad_norm": 1.065583652826002, "learning_rate": 1.3810791409087022e-06, "loss": 0.4189, "step": 16086 }, { "epoch": 0.8681128919108521, "grad_norm": 0.9330314109104152, "learning_rate": 1.3807719549399926e-06, "loss": 0.4813, "step": 16087 }, { "epoch": 0.8681668555393665, "grad_norm": 1.0810382849207258, "learning_rate": 1.3804648873620597e-06, "loss": 0.5205, "step": 16088 }, { "epoch": 0.8682208191678809, "grad_norm": 0.9933615974959051, "learning_rate": 1.380157938183728e-06, "loss": 0.4406, "step": 16089 }, { "epoch": 0.8682747827963953, "grad_norm": 1.0261553517428226, "learning_rate": 1.3798511074138204e-06, "loss": 0.4614, "step": 16090 }, { "epoch": 0.8683287464249096, "grad_norm": 1.236979341473934, "learning_rate": 1.3795443950611548e-06, "loss": 0.4813, "step": 16091 }, { "epoch": 0.868382710053424, "grad_norm": 0.9875497459273016, "learning_rate": 1.3792378011345464e-06, "loss": 0.3791, "step": 16092 }, { "epoch": 0.8684366736819383, "grad_norm": 0.98501396240504, "learning_rate": 1.3789313256428083e-06, "loss": 0.3687, "step": 16093 }, { "epoch": 0.8684906373104527, "grad_norm": 1.1451115166223478, "learning_rate": 1.3786249685947473e-06, "loss": 0.4908, "step": 16094 }, { "epoch": 0.8685446009389671, "grad_norm": 0.9149378750060622, "learning_rate": 1.378318729999169e-06, "loss": 0.3245, "step": 16095 }, { "epoch": 0.8685985645674815, "grad_norm": 1.0195229136598205, "learning_rate": 1.378012609864875e-06, "loss": 0.3878, "step": 16096 }, { "epoch": 0.8686525281959959, "grad_norm": 0.8680100036010613, "learning_rate": 1.3777066082006638e-06, "loss": 0.3232, "step": 16097 }, { "epoch": 0.8687064918245103, "grad_norm": 0.9386649720105785, "learning_rate": 1.3774007250153293e-06, "loss": 0.5095, "step": 16098 }, { "epoch": 0.8687604554530247, "grad_norm": 0.9952900314270993, "learning_rate": 1.3770949603176643e-06, "loss": 0.5587, "step": 16099 }, { "epoch": 0.868814419081539, "grad_norm": 0.9876989212897406, "learning_rate": 1.3767893141164567e-06, "loss": 0.4281, "step": 16100 }, { "epoch": 0.8688683827100534, "grad_norm": 1.2985161861331027, "learning_rate": 1.376483786420489e-06, "loss": 0.8057, "step": 16101 }, { "epoch": 0.8689223463385678, "grad_norm": 1.1783287776719449, "learning_rate": 1.3761783772385445e-06, "loss": 0.4225, "step": 16102 }, { "epoch": 0.8689763099670822, "grad_norm": 1.1411742002104799, "learning_rate": 1.3758730865794002e-06, "loss": 0.4815, "step": 16103 }, { "epoch": 0.8690302735955966, "grad_norm": 1.0747917860418272, "learning_rate": 1.37556791445183e-06, "loss": 0.5573, "step": 16104 }, { "epoch": 0.869084237224111, "grad_norm": 0.9664008391731783, "learning_rate": 1.3752628608646056e-06, "loss": 0.5101, "step": 16105 }, { "epoch": 0.8691382008526253, "grad_norm": 0.8700667458863904, "learning_rate": 1.3749579258264946e-06, "loss": 0.4118, "step": 16106 }, { "epoch": 0.8691921644811397, "grad_norm": 1.0715963139976257, "learning_rate": 1.3746531093462607e-06, "loss": 0.5085, "step": 16107 }, { "epoch": 0.8692461281096541, "grad_norm": 0.9753353159673824, "learning_rate": 1.3743484114326643e-06, "loss": 0.4375, "step": 16108 }, { "epoch": 0.8693000917381685, "grad_norm": 0.9021106636994022, "learning_rate": 1.3740438320944637e-06, "loss": 0.3694, "step": 16109 }, { "epoch": 0.8693540553666829, "grad_norm": 1.105276085436314, "learning_rate": 1.3737393713404127e-06, "loss": 0.438, "step": 16110 }, { "epoch": 0.8694080189951973, "grad_norm": 1.1391061817507306, "learning_rate": 1.3734350291792611e-06, "loss": 0.6086, "step": 16111 }, { "epoch": 0.8694619826237117, "grad_norm": 0.9773434045830147, "learning_rate": 1.3731308056197559e-06, "loss": 0.4328, "step": 16112 }, { "epoch": 0.869515946252226, "grad_norm": 1.1565451872062975, "learning_rate": 1.372826700670643e-06, "loss": 0.6308, "step": 16113 }, { "epoch": 0.8695699098807403, "grad_norm": 0.8256899847413528, "learning_rate": 1.3725227143406595e-06, "loss": 0.3108, "step": 16114 }, { "epoch": 0.8696238735092547, "grad_norm": 0.9562243797391092, "learning_rate": 1.372218846638544e-06, "loss": 0.4535, "step": 16115 }, { "epoch": 0.8696778371377691, "grad_norm": 1.0428583380074818, "learning_rate": 1.3719150975730294e-06, "loss": 0.5863, "step": 16116 }, { "epoch": 0.8697318007662835, "grad_norm": 1.531613523366415, "learning_rate": 1.3716114671528457e-06, "loss": 0.6786, "step": 16117 }, { "epoch": 0.8697857643947979, "grad_norm": 0.8599125551370869, "learning_rate": 1.3713079553867204e-06, "loss": 0.3009, "step": 16118 }, { "epoch": 0.8698397280233123, "grad_norm": 1.2156802473526944, "learning_rate": 1.3710045622833763e-06, "loss": 0.4883, "step": 16119 }, { "epoch": 0.8698936916518266, "grad_norm": 1.082416925571627, "learning_rate": 1.3707012878515336e-06, "loss": 0.522, "step": 16120 }, { "epoch": 0.869947655280341, "grad_norm": 0.9109854946275128, "learning_rate": 1.3703981320999072e-06, "loss": 0.3727, "step": 16121 }, { "epoch": 0.8700016189088554, "grad_norm": 1.1797095393090407, "learning_rate": 1.3700950950372113e-06, "loss": 0.4824, "step": 16122 }, { "epoch": 0.8700555825373698, "grad_norm": 0.8815374069717248, "learning_rate": 1.3697921766721556e-06, "loss": 0.3721, "step": 16123 }, { "epoch": 0.8701095461658842, "grad_norm": 1.003291726951096, "learning_rate": 1.3694893770134459e-06, "loss": 0.4137, "step": 16124 }, { "epoch": 0.8701635097943986, "grad_norm": 1.1663913342693344, "learning_rate": 1.3691866960697853e-06, "loss": 0.4814, "step": 16125 }, { "epoch": 0.870217473422913, "grad_norm": 1.1046820591635083, "learning_rate": 1.3688841338498727e-06, "loss": 0.4396, "step": 16126 }, { "epoch": 0.8702714370514273, "grad_norm": 0.9521805725281192, "learning_rate": 1.368581690362405e-06, "loss": 0.5982, "step": 16127 }, { "epoch": 0.8703254006799417, "grad_norm": 1.267862301747905, "learning_rate": 1.3682793656160734e-06, "loss": 0.541, "step": 16128 }, { "epoch": 0.8703793643084561, "grad_norm": 1.0201033485188375, "learning_rate": 1.3679771596195672e-06, "loss": 0.4457, "step": 16129 }, { "epoch": 0.8704333279369705, "grad_norm": 1.043246794389693, "learning_rate": 1.367675072381573e-06, "loss": 0.4474, "step": 16130 }, { "epoch": 0.8704872915654849, "grad_norm": 1.0799382703429783, "learning_rate": 1.3673731039107726e-06, "loss": 0.5305, "step": 16131 }, { "epoch": 0.8705412551939993, "grad_norm": 1.1577623643432198, "learning_rate": 1.367071254215845e-06, "loss": 0.4998, "step": 16132 }, { "epoch": 0.8705952188225137, "grad_norm": 0.990625915361084, "learning_rate": 1.3667695233054651e-06, "loss": 0.3781, "step": 16133 }, { "epoch": 0.870649182451028, "grad_norm": 0.9940018695270442, "learning_rate": 1.3664679111883068e-06, "loss": 0.3528, "step": 16134 }, { "epoch": 0.8707031460795424, "grad_norm": 1.0279633049485066, "learning_rate": 1.366166417873036e-06, "loss": 0.4739, "step": 16135 }, { "epoch": 0.8707571097080568, "grad_norm": 1.0916213917274067, "learning_rate": 1.3658650433683198e-06, "loss": 0.5409, "step": 16136 }, { "epoch": 0.8708110733365712, "grad_norm": 1.1360261833068148, "learning_rate": 1.3655637876828192e-06, "loss": 0.4516, "step": 16137 }, { "epoch": 0.8708650369650855, "grad_norm": 1.111808575701815, "learning_rate": 1.365262650825193e-06, "loss": 0.5902, "step": 16138 }, { "epoch": 0.8709190005936, "grad_norm": 1.2311601385314508, "learning_rate": 1.364961632804096e-06, "loss": 0.4378, "step": 16139 }, { "epoch": 0.8709729642221143, "grad_norm": 0.8985519086167008, "learning_rate": 1.3646607336281798e-06, "loss": 0.4333, "step": 16140 }, { "epoch": 0.8710269278506286, "grad_norm": 0.7758171060322469, "learning_rate": 1.3643599533060933e-06, "loss": 0.4019, "step": 16141 }, { "epoch": 0.871080891479143, "grad_norm": 0.9402180790839404, "learning_rate": 1.3640592918464799e-06, "loss": 0.3713, "step": 16142 }, { "epoch": 0.8711348551076574, "grad_norm": 1.0241980903106382, "learning_rate": 1.3637587492579812e-06, "loss": 0.4631, "step": 16143 }, { "epoch": 0.8711888187361718, "grad_norm": 0.8692352588090932, "learning_rate": 1.3634583255492357e-06, "loss": 0.3084, "step": 16144 }, { "epoch": 0.8712427823646862, "grad_norm": 0.9208504216967156, "learning_rate": 1.3631580207288777e-06, "loss": 0.4352, "step": 16145 }, { "epoch": 0.8712967459932006, "grad_norm": 1.1170767535255977, "learning_rate": 1.3628578348055377e-06, "loss": 0.5356, "step": 16146 }, { "epoch": 0.871350709621715, "grad_norm": 1.0298898966486347, "learning_rate": 1.3625577677878443e-06, "loss": 0.531, "step": 16147 }, { "epoch": 0.8714046732502293, "grad_norm": 0.9957206196275015, "learning_rate": 1.3622578196844215e-06, "loss": 0.4559, "step": 16148 }, { "epoch": 0.8714586368787437, "grad_norm": 1.0992135603182238, "learning_rate": 1.3619579905038892e-06, "loss": 0.4101, "step": 16149 }, { "epoch": 0.8715126005072581, "grad_norm": 1.0894893584160243, "learning_rate": 1.3616582802548662e-06, "loss": 0.3852, "step": 16150 }, { "epoch": 0.8715665641357725, "grad_norm": 0.9914274292165268, "learning_rate": 1.3613586889459646e-06, "loss": 0.4456, "step": 16151 }, { "epoch": 0.8716205277642869, "grad_norm": 0.9345336537946539, "learning_rate": 1.361059216585797e-06, "loss": 0.4005, "step": 16152 }, { "epoch": 0.8716744913928013, "grad_norm": 1.1244778765597754, "learning_rate": 1.3607598631829688e-06, "loss": 0.4472, "step": 16153 }, { "epoch": 0.8717284550213157, "grad_norm": 0.9721936954906572, "learning_rate": 1.3604606287460853e-06, "loss": 0.4495, "step": 16154 }, { "epoch": 0.87178241864983, "grad_norm": 0.9135220477467266, "learning_rate": 1.3601615132837454e-06, "loss": 0.3482, "step": 16155 }, { "epoch": 0.8718363822783444, "grad_norm": 1.200500867553237, "learning_rate": 1.3598625168045467e-06, "loss": 0.6387, "step": 16156 }, { "epoch": 0.8718903459068588, "grad_norm": 1.086063182182612, "learning_rate": 1.3595636393170828e-06, "loss": 0.4782, "step": 16157 }, { "epoch": 0.8719443095353732, "grad_norm": 0.8378772527067964, "learning_rate": 1.3592648808299433e-06, "loss": 0.3562, "step": 16158 }, { "epoch": 0.8719982731638876, "grad_norm": 0.9614197833527857, "learning_rate": 1.3589662413517163e-06, "loss": 0.42, "step": 16159 }, { "epoch": 0.872052236792402, "grad_norm": 0.9465480557806304, "learning_rate": 1.3586677208909825e-06, "loss": 0.4747, "step": 16160 }, { "epoch": 0.8721062004209164, "grad_norm": 1.3492633965183105, "learning_rate": 1.358369319456323e-06, "loss": 0.5267, "step": 16161 }, { "epoch": 0.8721601640494306, "grad_norm": 1.1737112770384424, "learning_rate": 1.3580710370563142e-06, "loss": 0.4838, "step": 16162 }, { "epoch": 0.872214127677945, "grad_norm": 0.9885937936727277, "learning_rate": 1.3577728736995288e-06, "loss": 0.4485, "step": 16163 }, { "epoch": 0.8722680913064594, "grad_norm": 0.9841119724552323, "learning_rate": 1.357474829394536e-06, "loss": 0.4902, "step": 16164 }, { "epoch": 0.8723220549349738, "grad_norm": 0.8847402583752003, "learning_rate": 1.3571769041499034e-06, "loss": 0.3662, "step": 16165 }, { "epoch": 0.8723760185634882, "grad_norm": 1.1051869415893845, "learning_rate": 1.3568790979741914e-06, "loss": 0.4421, "step": 16166 }, { "epoch": 0.8724299821920026, "grad_norm": 1.0235224503654698, "learning_rate": 1.3565814108759612e-06, "loss": 0.4538, "step": 16167 }, { "epoch": 0.872483945820517, "grad_norm": 0.8767621757428213, "learning_rate": 1.3562838428637679e-06, "loss": 0.4266, "step": 16168 }, { "epoch": 0.8725379094490313, "grad_norm": 0.9631959669288327, "learning_rate": 1.3559863939461643e-06, "loss": 0.4322, "step": 16169 }, { "epoch": 0.8725918730775457, "grad_norm": 1.019213285185678, "learning_rate": 1.3556890641316983e-06, "loss": 0.6214, "step": 16170 }, { "epoch": 0.8726458367060601, "grad_norm": 1.3264550709130312, "learning_rate": 1.3553918534289163e-06, "loss": 0.539, "step": 16171 }, { "epoch": 0.8726998003345745, "grad_norm": 1.127972437255977, "learning_rate": 1.35509476184636e-06, "loss": 0.4962, "step": 16172 }, { "epoch": 0.8727537639630889, "grad_norm": 0.9157499315450833, "learning_rate": 1.3547977893925685e-06, "loss": 0.5323, "step": 16173 }, { "epoch": 0.8728077275916033, "grad_norm": 0.9339540478830759, "learning_rate": 1.3545009360760766e-06, "loss": 0.3551, "step": 16174 }, { "epoch": 0.8728616912201176, "grad_norm": 1.003224520860702, "learning_rate": 1.3542042019054174e-06, "loss": 0.3907, "step": 16175 }, { "epoch": 0.872915654848632, "grad_norm": 0.9016734753049422, "learning_rate": 1.353907586889118e-06, "loss": 0.5232, "step": 16176 }, { "epoch": 0.8729696184771464, "grad_norm": 0.9589432607051979, "learning_rate": 1.3536110910357039e-06, "loss": 0.3985, "step": 16177 }, { "epoch": 0.8730235821056608, "grad_norm": 0.9519870917865427, "learning_rate": 1.3533147143536964e-06, "loss": 0.4161, "step": 16178 }, { "epoch": 0.8730775457341752, "grad_norm": 1.1021329574623013, "learning_rate": 1.3530184568516142e-06, "loss": 0.3881, "step": 16179 }, { "epoch": 0.8731315093626896, "grad_norm": 1.115403530729816, "learning_rate": 1.3527223185379714e-06, "loss": 0.4815, "step": 16180 }, { "epoch": 0.873185472991204, "grad_norm": 0.9078095454632906, "learning_rate": 1.35242629942128e-06, "loss": 0.3796, "step": 16181 }, { "epoch": 0.8732394366197183, "grad_norm": 1.0095614864348466, "learning_rate": 1.3521303995100479e-06, "loss": 0.3688, "step": 16182 }, { "epoch": 0.8732934002482327, "grad_norm": 1.0237823280447695, "learning_rate": 1.3518346188127786e-06, "loss": 0.3799, "step": 16183 }, { "epoch": 0.873347363876747, "grad_norm": 1.25877722748606, "learning_rate": 1.3515389573379745e-06, "loss": 0.6, "step": 16184 }, { "epoch": 0.8734013275052614, "grad_norm": 0.9682231216834499, "learning_rate": 1.3512434150941317e-06, "loss": 0.5315, "step": 16185 }, { "epoch": 0.8734552911337758, "grad_norm": 0.9429742196446539, "learning_rate": 1.3509479920897456e-06, "loss": 0.4265, "step": 16186 }, { "epoch": 0.8735092547622902, "grad_norm": 0.9554849076351533, "learning_rate": 1.3506526883333064e-06, "loss": 0.3468, "step": 16187 }, { "epoch": 0.8735632183908046, "grad_norm": 0.9366176276202473, "learning_rate": 1.3503575038333012e-06, "loss": 0.3759, "step": 16188 }, { "epoch": 0.8736171820193189, "grad_norm": 0.9202095223270981, "learning_rate": 1.3500624385982152e-06, "loss": 0.3038, "step": 16189 }, { "epoch": 0.8736711456478333, "grad_norm": 1.0565379545044808, "learning_rate": 1.349767492636527e-06, "loss": 0.403, "step": 16190 }, { "epoch": 0.8737251092763477, "grad_norm": 0.9597680428527434, "learning_rate": 1.3494726659567148e-06, "loss": 0.3364, "step": 16191 }, { "epoch": 0.8737790729048621, "grad_norm": 1.0424276035307118, "learning_rate": 1.349177958567252e-06, "loss": 0.3094, "step": 16192 }, { "epoch": 0.8738330365333765, "grad_norm": 1.184409482562362, "learning_rate": 1.3488833704766089e-06, "loss": 0.728, "step": 16193 }, { "epoch": 0.8738870001618909, "grad_norm": 0.9233311877398541, "learning_rate": 1.3485889016932522e-06, "loss": 0.3386, "step": 16194 }, { "epoch": 0.8739409637904053, "grad_norm": 1.145729624079814, "learning_rate": 1.3482945522256447e-06, "loss": 0.5515, "step": 16195 }, { "epoch": 0.8739949274189196, "grad_norm": 1.0944963804644126, "learning_rate": 1.3480003220822477e-06, "loss": 0.5841, "step": 16196 }, { "epoch": 0.874048891047434, "grad_norm": 1.0295699846959496, "learning_rate": 1.3477062112715162e-06, "loss": 0.4498, "step": 16197 }, { "epoch": 0.8741028546759484, "grad_norm": 0.9087576121256383, "learning_rate": 1.3474122198019038e-06, "loss": 0.462, "step": 16198 }, { "epoch": 0.8741568183044628, "grad_norm": 1.230293339728579, "learning_rate": 1.3471183476818598e-06, "loss": 0.558, "step": 16199 }, { "epoch": 0.8742107819329772, "grad_norm": 1.2086249145093795, "learning_rate": 1.3468245949198308e-06, "loss": 0.5747, "step": 16200 }, { "epoch": 0.8742647455614916, "grad_norm": 1.0783169231626186, "learning_rate": 1.3465309615242589e-06, "loss": 0.4034, "step": 16201 }, { "epoch": 0.874318709190006, "grad_norm": 1.0019962311657518, "learning_rate": 1.3462374475035845e-06, "loss": 0.3985, "step": 16202 }, { "epoch": 0.8743726728185203, "grad_norm": 1.093208681886094, "learning_rate": 1.3459440528662441e-06, "loss": 0.4938, "step": 16203 }, { "epoch": 0.8744266364470347, "grad_norm": 1.2193558521799472, "learning_rate": 1.3456507776206674e-06, "loss": 0.4969, "step": 16204 }, { "epoch": 0.8744806000755491, "grad_norm": 1.071824311393677, "learning_rate": 1.3453576217752854e-06, "loss": 0.493, "step": 16205 }, { "epoch": 0.8745345637040635, "grad_norm": 1.090456470598965, "learning_rate": 1.3450645853385235e-06, "loss": 0.5096, "step": 16206 }, { "epoch": 0.8745885273325779, "grad_norm": 1.1684221708857356, "learning_rate": 1.3447716683188032e-06, "loss": 0.5251, "step": 16207 }, { "epoch": 0.8746424909610923, "grad_norm": 0.976421832261836, "learning_rate": 1.344478870724544e-06, "loss": 0.4241, "step": 16208 }, { "epoch": 0.8746964545896067, "grad_norm": 0.8095598871256602, "learning_rate": 1.3441861925641609e-06, "loss": 0.2806, "step": 16209 }, { "epoch": 0.8747504182181209, "grad_norm": 0.828570874100992, "learning_rate": 1.3438936338460653e-06, "loss": 0.3215, "step": 16210 }, { "epoch": 0.8748043818466353, "grad_norm": 1.0357945673540385, "learning_rate": 1.3436011945786665e-06, "loss": 0.492, "step": 16211 }, { "epoch": 0.8748583454751497, "grad_norm": 0.971975235569279, "learning_rate": 1.343308874770369e-06, "loss": 0.4122, "step": 16212 }, { "epoch": 0.8749123091036641, "grad_norm": 0.8686189719093521, "learning_rate": 1.3430166744295753e-06, "loss": 0.2868, "step": 16213 }, { "epoch": 0.8749662727321785, "grad_norm": 0.9193858394409322, "learning_rate": 1.3427245935646813e-06, "loss": 0.5665, "step": 16214 }, { "epoch": 0.8750202363606929, "grad_norm": 1.0279637718014387, "learning_rate": 1.3424326321840842e-06, "loss": 0.4225, "step": 16215 }, { "epoch": 0.8750741999892073, "grad_norm": 1.2095369099495306, "learning_rate": 1.3421407902961733e-06, "loss": 0.631, "step": 16216 }, { "epoch": 0.8751281636177216, "grad_norm": 1.1051721292706589, "learning_rate": 1.3418490679093388e-06, "loss": 0.6027, "step": 16217 }, { "epoch": 0.875182127246236, "grad_norm": 1.3806581313165702, "learning_rate": 1.3415574650319628e-06, "loss": 0.5689, "step": 16218 }, { "epoch": 0.8752360908747504, "grad_norm": 1.018399059119866, "learning_rate": 1.3412659816724266e-06, "loss": 0.4072, "step": 16219 }, { "epoch": 0.8752900545032648, "grad_norm": 1.1273643154385393, "learning_rate": 1.3409746178391084e-06, "loss": 0.4681, "step": 16220 }, { "epoch": 0.8753440181317792, "grad_norm": 1.1111287148987132, "learning_rate": 1.3406833735403819e-06, "loss": 0.503, "step": 16221 }, { "epoch": 0.8753979817602936, "grad_norm": 1.1810402487094827, "learning_rate": 1.340392248784618e-06, "loss": 0.2904, "step": 16222 }, { "epoch": 0.875451945388808, "grad_norm": 0.825549075306673, "learning_rate": 1.3401012435801837e-06, "loss": 0.43, "step": 16223 }, { "epoch": 0.8755059090173223, "grad_norm": 0.989321325589098, "learning_rate": 1.3398103579354444e-06, "loss": 0.412, "step": 16224 }, { "epoch": 0.8755598726458367, "grad_norm": 0.9825084829744649, "learning_rate": 1.3395195918587574e-06, "loss": 0.4834, "step": 16225 }, { "epoch": 0.8756138362743511, "grad_norm": 1.1180643560626067, "learning_rate": 1.3392289453584816e-06, "loss": 0.5126, "step": 16226 }, { "epoch": 0.8756677999028655, "grad_norm": 1.0700545515820141, "learning_rate": 1.33893841844297e-06, "loss": 0.5359, "step": 16227 }, { "epoch": 0.8757217635313799, "grad_norm": 0.8962622859253799, "learning_rate": 1.338648011120573e-06, "loss": 0.3817, "step": 16228 }, { "epoch": 0.8757757271598943, "grad_norm": 1.0494403783418564, "learning_rate": 1.3383577233996365e-06, "loss": 0.4043, "step": 16229 }, { "epoch": 0.8758296907884087, "grad_norm": 0.9641242982344483, "learning_rate": 1.338067555288504e-06, "loss": 0.4663, "step": 16230 }, { "epoch": 0.875883654416923, "grad_norm": 1.0591959959224793, "learning_rate": 1.3377775067955158e-06, "loss": 0.4235, "step": 16231 }, { "epoch": 0.8759376180454373, "grad_norm": 0.8492782775488025, "learning_rate": 1.3374875779290076e-06, "loss": 0.3581, "step": 16232 }, { "epoch": 0.8759915816739517, "grad_norm": 0.8455419438000532, "learning_rate": 1.337197768697312e-06, "loss": 0.3399, "step": 16233 }, { "epoch": 0.8760455453024661, "grad_norm": 0.8886929873726982, "learning_rate": 1.336908079108759e-06, "loss": 0.4403, "step": 16234 }, { "epoch": 0.8760995089309805, "grad_norm": 0.9418267707651176, "learning_rate": 1.3366185091716738e-06, "loss": 0.372, "step": 16235 }, { "epoch": 0.8761534725594949, "grad_norm": 1.0365418339388308, "learning_rate": 1.33632905889438e-06, "loss": 0.4273, "step": 16236 }, { "epoch": 0.8762074361880093, "grad_norm": 1.1473859308375958, "learning_rate": 1.336039728285196e-06, "loss": 0.3898, "step": 16237 }, { "epoch": 0.8762613998165236, "grad_norm": 1.1139524906931684, "learning_rate": 1.3357505173524379e-06, "loss": 0.552, "step": 16238 }, { "epoch": 0.876315363445038, "grad_norm": 1.0690433025245032, "learning_rate": 1.3354614261044171e-06, "loss": 0.4938, "step": 16239 }, { "epoch": 0.8763693270735524, "grad_norm": 1.0361623322611004, "learning_rate": 1.335172454549443e-06, "loss": 0.3914, "step": 16240 }, { "epoch": 0.8764232907020668, "grad_norm": 1.324977538579406, "learning_rate": 1.3348836026958206e-06, "loss": 0.5913, "step": 16241 }, { "epoch": 0.8764772543305812, "grad_norm": 1.139345839799654, "learning_rate": 1.3345948705518518e-06, "loss": 0.5611, "step": 16242 }, { "epoch": 0.8765312179590956, "grad_norm": 0.8520748806038332, "learning_rate": 1.3343062581258356e-06, "loss": 0.3411, "step": 16243 }, { "epoch": 0.8765851815876099, "grad_norm": 1.0429905459336324, "learning_rate": 1.3340177654260671e-06, "loss": 0.4028, "step": 16244 }, { "epoch": 0.8766391452161243, "grad_norm": 1.0024855965241013, "learning_rate": 1.3337293924608375e-06, "loss": 0.4284, "step": 16245 }, { "epoch": 0.8766931088446387, "grad_norm": 0.954267327529957, "learning_rate": 1.333441139238434e-06, "loss": 0.4437, "step": 16246 }, { "epoch": 0.8767470724731531, "grad_norm": 0.9909663586897963, "learning_rate": 1.3331530057671426e-06, "loss": 0.4253, "step": 16247 }, { "epoch": 0.8768010361016675, "grad_norm": 1.056526542700705, "learning_rate": 1.3328649920552442e-06, "loss": 0.4754, "step": 16248 }, { "epoch": 0.8768549997301819, "grad_norm": 1.228122553137251, "learning_rate": 1.332577098111016e-06, "loss": 0.5173, "step": 16249 }, { "epoch": 0.8769089633586963, "grad_norm": 1.1959084745564956, "learning_rate": 1.3322893239427331e-06, "loss": 0.4654, "step": 16250 }, { "epoch": 0.8769629269872106, "grad_norm": 0.8081133166010039, "learning_rate": 1.3320016695586673e-06, "loss": 0.2631, "step": 16251 }, { "epoch": 0.877016890615725, "grad_norm": 1.2128518778678408, "learning_rate": 1.3317141349670837e-06, "loss": 0.4838, "step": 16252 }, { "epoch": 0.8770708542442394, "grad_norm": 1.0326010209938243, "learning_rate": 1.331426720176248e-06, "loss": 0.4467, "step": 16253 }, { "epoch": 0.8771248178727538, "grad_norm": 0.9669595460962193, "learning_rate": 1.3311394251944201e-06, "loss": 0.4061, "step": 16254 }, { "epoch": 0.8771787815012682, "grad_norm": 1.151576126519565, "learning_rate": 1.330852250029858e-06, "loss": 0.5281, "step": 16255 }, { "epoch": 0.8772327451297826, "grad_norm": 1.4582218738483015, "learning_rate": 1.330565194690814e-06, "loss": 0.4123, "step": 16256 }, { "epoch": 0.877286708758297, "grad_norm": 0.7612206730464174, "learning_rate": 1.3302782591855396e-06, "loss": 0.2508, "step": 16257 }, { "epoch": 0.8773406723868112, "grad_norm": 1.1365948333274234, "learning_rate": 1.3299914435222815e-06, "loss": 0.5228, "step": 16258 }, { "epoch": 0.8773946360153256, "grad_norm": 1.0988728260710177, "learning_rate": 1.3297047477092826e-06, "loss": 0.4832, "step": 16259 }, { "epoch": 0.87744859964384, "grad_norm": 0.9161579948645353, "learning_rate": 1.3294181717547827e-06, "loss": 0.3357, "step": 16260 }, { "epoch": 0.8775025632723544, "grad_norm": 1.044317377943981, "learning_rate": 1.3291317156670192e-06, "loss": 0.6142, "step": 16261 }, { "epoch": 0.8775565269008688, "grad_norm": 1.0141679052160875, "learning_rate": 1.3288453794542244e-06, "loss": 0.3848, "step": 16262 }, { "epoch": 0.8776104905293832, "grad_norm": 1.1473738622462137, "learning_rate": 1.3285591631246278e-06, "loss": 0.4666, "step": 16263 }, { "epoch": 0.8776644541578976, "grad_norm": 0.8929392251063433, "learning_rate": 1.3282730666864558e-06, "loss": 0.4781, "step": 16264 }, { "epoch": 0.8777184177864119, "grad_norm": 1.0910401444815376, "learning_rate": 1.327987090147932e-06, "loss": 0.5229, "step": 16265 }, { "epoch": 0.8777723814149263, "grad_norm": 1.1430890082677725, "learning_rate": 1.327701233517274e-06, "loss": 0.5055, "step": 16266 }, { "epoch": 0.8778263450434407, "grad_norm": 0.8423276584927167, "learning_rate": 1.3274154968026978e-06, "loss": 0.3907, "step": 16267 }, { "epoch": 0.8778803086719551, "grad_norm": 0.8797936187027491, "learning_rate": 1.3271298800124172e-06, "loss": 0.4005, "step": 16268 }, { "epoch": 0.8779342723004695, "grad_norm": 0.9952257154881801, "learning_rate": 1.32684438315464e-06, "loss": 0.49, "step": 16269 }, { "epoch": 0.8779882359289839, "grad_norm": 1.1519047074517272, "learning_rate": 1.3265590062375712e-06, "loss": 0.4759, "step": 16270 }, { "epoch": 0.8780421995574983, "grad_norm": 0.9495959789456008, "learning_rate": 1.3262737492694144e-06, "loss": 0.4568, "step": 16271 }, { "epoch": 0.8780961631860126, "grad_norm": 1.2718010313498125, "learning_rate": 1.3259886122583674e-06, "loss": 0.6703, "step": 16272 }, { "epoch": 0.878150126814527, "grad_norm": 1.1281112484178857, "learning_rate": 1.3257035952126246e-06, "loss": 0.4407, "step": 16273 }, { "epoch": 0.8782040904430414, "grad_norm": 1.163730690891375, "learning_rate": 1.3254186981403789e-06, "loss": 0.5563, "step": 16274 }, { "epoch": 0.8782580540715558, "grad_norm": 0.8861846476819842, "learning_rate": 1.3251339210498171e-06, "loss": 0.3662, "step": 16275 }, { "epoch": 0.8783120177000702, "grad_norm": 1.1940297873957384, "learning_rate": 1.3248492639491249e-06, "loss": 0.5453, "step": 16276 }, { "epoch": 0.8783659813285846, "grad_norm": 1.0393743275512835, "learning_rate": 1.324564726846484e-06, "loss": 0.4336, "step": 16277 }, { "epoch": 0.878419944957099, "grad_norm": 0.9736588115283616, "learning_rate": 1.3242803097500714e-06, "loss": 0.5693, "step": 16278 }, { "epoch": 0.8784739085856133, "grad_norm": 1.054150887677765, "learning_rate": 1.3239960126680628e-06, "loss": 0.4937, "step": 16279 }, { "epoch": 0.8785278722141276, "grad_norm": 1.0605386710162192, "learning_rate": 1.3237118356086277e-06, "loss": 0.5145, "step": 16280 }, { "epoch": 0.878581835842642, "grad_norm": 1.084493336948569, "learning_rate": 1.3234277785799343e-06, "loss": 0.458, "step": 16281 }, { "epoch": 0.8786357994711564, "grad_norm": 1.1238115004553337, "learning_rate": 1.323143841590147e-06, "loss": 0.4468, "step": 16282 }, { "epoch": 0.8786897630996708, "grad_norm": 1.005988046506106, "learning_rate": 1.3228600246474255e-06, "loss": 0.4485, "step": 16283 }, { "epoch": 0.8787437267281852, "grad_norm": 0.9884198148694481, "learning_rate": 1.3225763277599277e-06, "loss": 0.402, "step": 16284 }, { "epoch": 0.8787976903566996, "grad_norm": 1.2182579825488973, "learning_rate": 1.3222927509358075e-06, "loss": 0.5584, "step": 16285 }, { "epoch": 0.8788516539852139, "grad_norm": 0.9642691254251994, "learning_rate": 1.3220092941832154e-06, "loss": 0.4548, "step": 16286 }, { "epoch": 0.8789056176137283, "grad_norm": 1.0118242796238452, "learning_rate": 1.321725957510297e-06, "loss": 0.5285, "step": 16287 }, { "epoch": 0.8789595812422427, "grad_norm": 0.9921622604962324, "learning_rate": 1.3214427409251965e-06, "loss": 0.4938, "step": 16288 }, { "epoch": 0.8790135448707571, "grad_norm": 1.162160560333159, "learning_rate": 1.3211596444360535e-06, "loss": 0.5148, "step": 16289 }, { "epoch": 0.8790675084992715, "grad_norm": 1.0728172272259302, "learning_rate": 1.320876668051005e-06, "loss": 0.4141, "step": 16290 }, { "epoch": 0.8791214721277859, "grad_norm": 1.002430518001186, "learning_rate": 1.3205938117781836e-06, "loss": 0.431, "step": 16291 }, { "epoch": 0.8791754357563003, "grad_norm": 1.020604290787138, "learning_rate": 1.3203110756257193e-06, "loss": 0.4525, "step": 16292 }, { "epoch": 0.8792293993848146, "grad_norm": 1.2584930814628903, "learning_rate": 1.3200284596017384e-06, "loss": 0.3627, "step": 16293 }, { "epoch": 0.879283363013329, "grad_norm": 0.8189246848053718, "learning_rate": 1.3197459637143622e-06, "loss": 0.3195, "step": 16294 }, { "epoch": 0.8793373266418434, "grad_norm": 0.8684090660133488, "learning_rate": 1.3194635879717115e-06, "loss": 0.3873, "step": 16295 }, { "epoch": 0.8793912902703578, "grad_norm": 0.9943214435637469, "learning_rate": 1.3191813323819012e-06, "loss": 0.4067, "step": 16296 }, { "epoch": 0.8794452538988722, "grad_norm": 1.0003310440870399, "learning_rate": 1.3188991969530434e-06, "loss": 0.3536, "step": 16297 }, { "epoch": 0.8794992175273866, "grad_norm": 1.1062223503083537, "learning_rate": 1.3186171816932477e-06, "loss": 0.4716, "step": 16298 }, { "epoch": 0.879553181155901, "grad_norm": 1.051498998880181, "learning_rate": 1.318335286610619e-06, "loss": 0.4848, "step": 16299 }, { "epoch": 0.8796071447844153, "grad_norm": 1.1142429870573873, "learning_rate": 1.3180535117132606e-06, "loss": 0.4206, "step": 16300 }, { "epoch": 0.8796611084129297, "grad_norm": 1.092564932281193, "learning_rate": 1.3177718570092684e-06, "loss": 0.4857, "step": 16301 }, { "epoch": 0.8797150720414441, "grad_norm": 1.0662420595925992, "learning_rate": 1.3174903225067395e-06, "loss": 0.4736, "step": 16302 }, { "epoch": 0.8797690356699585, "grad_norm": 1.0900640418566727, "learning_rate": 1.3172089082137646e-06, "loss": 0.3477, "step": 16303 }, { "epoch": 0.8798229992984729, "grad_norm": 1.0214498841325603, "learning_rate": 1.3169276141384323e-06, "loss": 0.4155, "step": 16304 }, { "epoch": 0.8798769629269872, "grad_norm": 1.0322377284065911, "learning_rate": 1.3166464402888265e-06, "loss": 0.4335, "step": 16305 }, { "epoch": 0.8799309265555016, "grad_norm": 0.8937511083639723, "learning_rate": 1.3163653866730295e-06, "loss": 0.3172, "step": 16306 }, { "epoch": 0.8799848901840159, "grad_norm": 0.9090745380658347, "learning_rate": 1.3160844532991185e-06, "loss": 0.3075, "step": 16307 }, { "epoch": 0.8800388538125303, "grad_norm": 0.9560315617941141, "learning_rate": 1.3158036401751675e-06, "loss": 0.425, "step": 16308 }, { "epoch": 0.8800928174410447, "grad_norm": 1.0274506953536002, "learning_rate": 1.3155229473092484e-06, "loss": 0.3914, "step": 16309 }, { "epoch": 0.8801467810695591, "grad_norm": 0.9764322133841534, "learning_rate": 1.3152423747094275e-06, "loss": 0.4817, "step": 16310 }, { "epoch": 0.8802007446980735, "grad_norm": 1.2006463471050075, "learning_rate": 1.3149619223837689e-06, "loss": 0.5441, "step": 16311 }, { "epoch": 0.8802547083265879, "grad_norm": 0.8727541896273092, "learning_rate": 1.3146815903403336e-06, "loss": 0.4283, "step": 16312 }, { "epoch": 0.8803086719551022, "grad_norm": 1.1975838672454306, "learning_rate": 1.3144013785871785e-06, "loss": 0.4389, "step": 16313 }, { "epoch": 0.8803626355836166, "grad_norm": 1.1387391331738848, "learning_rate": 1.3141212871323564e-06, "loss": 0.53, "step": 16314 }, { "epoch": 0.880416599212131, "grad_norm": 1.2963482341722576, "learning_rate": 1.3138413159839186e-06, "loss": 0.5771, "step": 16315 }, { "epoch": 0.8804705628406454, "grad_norm": 1.2321489481052343, "learning_rate": 1.313561465149911e-06, "loss": 0.5682, "step": 16316 }, { "epoch": 0.8805245264691598, "grad_norm": 1.0115514473672362, "learning_rate": 1.3132817346383768e-06, "loss": 0.4291, "step": 16317 }, { "epoch": 0.8805784900976742, "grad_norm": 0.9695907867074682, "learning_rate": 1.313002124457356e-06, "loss": 0.4329, "step": 16318 }, { "epoch": 0.8806324537261886, "grad_norm": 0.9451894949924334, "learning_rate": 1.3127226346148846e-06, "loss": 0.4107, "step": 16319 }, { "epoch": 0.8806864173547029, "grad_norm": 0.8122714306373786, "learning_rate": 1.312443265118997e-06, "loss": 0.3706, "step": 16320 }, { "epoch": 0.8807403809832173, "grad_norm": 0.9802070734311316, "learning_rate": 1.3121640159777197e-06, "loss": 0.5194, "step": 16321 }, { "epoch": 0.8807943446117317, "grad_norm": 1.0021207485052832, "learning_rate": 1.3118848871990805e-06, "loss": 0.4204, "step": 16322 }, { "epoch": 0.8808483082402461, "grad_norm": 0.7861167473018752, "learning_rate": 1.311605878791101e-06, "loss": 0.2756, "step": 16323 }, { "epoch": 0.8809022718687605, "grad_norm": 1.1705889369574873, "learning_rate": 1.3113269907618012e-06, "loss": 0.4848, "step": 16324 }, { "epoch": 0.8809562354972749, "grad_norm": 1.0345147934271375, "learning_rate": 1.3110482231191957e-06, "loss": 0.4206, "step": 16325 }, { "epoch": 0.8810101991257893, "grad_norm": 1.2129802832634726, "learning_rate": 1.3107695758712971e-06, "loss": 0.57, "step": 16326 }, { "epoch": 0.8810641627543035, "grad_norm": 1.038242858607382, "learning_rate": 1.3104910490261145e-06, "loss": 0.412, "step": 16327 }, { "epoch": 0.881118126382818, "grad_norm": 1.0011833385531665, "learning_rate": 1.3102126425916513e-06, "loss": 0.4001, "step": 16328 }, { "epoch": 0.8811720900113323, "grad_norm": 1.0154417372411588, "learning_rate": 1.30993435657591e-06, "loss": 0.5307, "step": 16329 }, { "epoch": 0.8812260536398467, "grad_norm": 1.118090838711728, "learning_rate": 1.30965619098689e-06, "loss": 0.5883, "step": 16330 }, { "epoch": 0.8812800172683611, "grad_norm": 1.0056359936343076, "learning_rate": 1.3093781458325844e-06, "loss": 0.4588, "step": 16331 }, { "epoch": 0.8813339808968755, "grad_norm": 0.9452585500044672, "learning_rate": 1.3091002211209853e-06, "loss": 0.4124, "step": 16332 }, { "epoch": 0.8813879445253899, "grad_norm": 1.0415532879888756, "learning_rate": 1.3088224168600805e-06, "loss": 0.4028, "step": 16333 }, { "epoch": 0.8814419081539042, "grad_norm": 0.9465635993855074, "learning_rate": 1.3085447330578551e-06, "loss": 0.3763, "step": 16334 }, { "epoch": 0.8814958717824186, "grad_norm": 0.8494108898611828, "learning_rate": 1.3082671697222888e-06, "loss": 0.4193, "step": 16335 }, { "epoch": 0.881549835410933, "grad_norm": 1.0805863944042722, "learning_rate": 1.3079897268613586e-06, "loss": 0.5026, "step": 16336 }, { "epoch": 0.8816037990394474, "grad_norm": 1.410091983415008, "learning_rate": 1.3077124044830403e-06, "loss": 0.6464, "step": 16337 }, { "epoch": 0.8816577626679618, "grad_norm": 1.1653730334290522, "learning_rate": 1.307435202595303e-06, "loss": 0.6247, "step": 16338 }, { "epoch": 0.8817117262964762, "grad_norm": 1.2598140051114024, "learning_rate": 1.307158121206114e-06, "loss": 0.5201, "step": 16339 }, { "epoch": 0.8817656899249906, "grad_norm": 0.7430563731218666, "learning_rate": 1.306881160323438e-06, "loss": 0.3296, "step": 16340 }, { "epoch": 0.8818196535535049, "grad_norm": 0.9912882769534633, "learning_rate": 1.3066043199552344e-06, "loss": 0.4126, "step": 16341 }, { "epoch": 0.8818736171820193, "grad_norm": 1.1267548079484635, "learning_rate": 1.3063276001094594e-06, "loss": 0.423, "step": 16342 }, { "epoch": 0.8819275808105337, "grad_norm": 1.2567577484779262, "learning_rate": 1.3060510007940659e-06, "loss": 0.5822, "step": 16343 }, { "epoch": 0.8819815444390481, "grad_norm": 0.8549632879371133, "learning_rate": 1.3057745220170048e-06, "loss": 0.3933, "step": 16344 }, { "epoch": 0.8820355080675625, "grad_norm": 1.1284381277882378, "learning_rate": 1.3054981637862215e-06, "loss": 0.4522, "step": 16345 }, { "epoch": 0.8820894716960769, "grad_norm": 1.134526738087843, "learning_rate": 1.3052219261096602e-06, "loss": 0.4045, "step": 16346 }, { "epoch": 0.8821434353245913, "grad_norm": 0.8303747636044022, "learning_rate": 1.3049458089952583e-06, "loss": 0.4033, "step": 16347 }, { "epoch": 0.8821973989531056, "grad_norm": 1.0653920681971525, "learning_rate": 1.3046698124509535e-06, "loss": 0.4707, "step": 16348 }, { "epoch": 0.88225136258162, "grad_norm": 0.9851862929910917, "learning_rate": 1.3043939364846773e-06, "loss": 0.4299, "step": 16349 }, { "epoch": 0.8823053262101344, "grad_norm": 1.0319375814748237, "learning_rate": 1.304118181104358e-06, "loss": 0.4195, "step": 16350 }, { "epoch": 0.8823592898386488, "grad_norm": 0.9504788332112963, "learning_rate": 1.3038425463179218e-06, "loss": 0.443, "step": 16351 }, { "epoch": 0.8824132534671632, "grad_norm": 1.0747054378420613, "learning_rate": 1.303567032133291e-06, "loss": 0.5255, "step": 16352 }, { "epoch": 0.8824672170956775, "grad_norm": 0.9986198894569697, "learning_rate": 1.3032916385583836e-06, "loss": 0.4467, "step": 16353 }, { "epoch": 0.882521180724192, "grad_norm": 0.9617094428782382, "learning_rate": 1.3030163656011156e-06, "loss": 0.3755, "step": 16354 }, { "epoch": 0.8825751443527062, "grad_norm": 1.0550446644592442, "learning_rate": 1.3027412132693981e-06, "loss": 0.4058, "step": 16355 }, { "epoch": 0.8826291079812206, "grad_norm": 1.2047229261507832, "learning_rate": 1.3024661815711387e-06, "loss": 0.3166, "step": 16356 }, { "epoch": 0.882683071609735, "grad_norm": 0.9911468637815285, "learning_rate": 1.3021912705142423e-06, "loss": 0.3818, "step": 16357 }, { "epoch": 0.8827370352382494, "grad_norm": 1.0625514569716301, "learning_rate": 1.3019164801066108e-06, "loss": 0.6496, "step": 16358 }, { "epoch": 0.8827909988667638, "grad_norm": 1.178329684062857, "learning_rate": 1.3016418103561411e-06, "loss": 0.4474, "step": 16359 }, { "epoch": 0.8828449624952782, "grad_norm": 1.1620133142654654, "learning_rate": 1.3013672612707281e-06, "loss": 0.6303, "step": 16360 }, { "epoch": 0.8828989261237926, "grad_norm": 0.8902551163234473, "learning_rate": 1.3010928328582629e-06, "loss": 0.4184, "step": 16361 }, { "epoch": 0.8829528897523069, "grad_norm": 0.9698666511251441, "learning_rate": 1.3008185251266315e-06, "loss": 0.5415, "step": 16362 }, { "epoch": 0.8830068533808213, "grad_norm": 0.9136388490165804, "learning_rate": 1.3005443380837188e-06, "loss": 0.3938, "step": 16363 }, { "epoch": 0.8830608170093357, "grad_norm": 1.0263546395930037, "learning_rate": 1.3002702717374054e-06, "loss": 0.5587, "step": 16364 }, { "epoch": 0.8831147806378501, "grad_norm": 0.8966050535143485, "learning_rate": 1.2999963260955678e-06, "loss": 0.3742, "step": 16365 }, { "epoch": 0.8831687442663645, "grad_norm": 0.8946996138187174, "learning_rate": 1.2997225011660795e-06, "loss": 0.3398, "step": 16366 }, { "epoch": 0.8832227078948789, "grad_norm": 1.112348334987563, "learning_rate": 1.299448796956811e-06, "loss": 0.4108, "step": 16367 }, { "epoch": 0.8832766715233933, "grad_norm": 1.2343550785587607, "learning_rate": 1.2991752134756278e-06, "loss": 0.5867, "step": 16368 }, { "epoch": 0.8833306351519076, "grad_norm": 0.9195066623619099, "learning_rate": 1.2989017507303944e-06, "loss": 0.3703, "step": 16369 }, { "epoch": 0.883384598780422, "grad_norm": 1.1365859584556606, "learning_rate": 1.2986284087289691e-06, "loss": 0.6064, "step": 16370 }, { "epoch": 0.8834385624089364, "grad_norm": 1.1635733551956753, "learning_rate": 1.2983551874792086e-06, "loss": 0.555, "step": 16371 }, { "epoch": 0.8834925260374508, "grad_norm": 1.0521671185156922, "learning_rate": 1.2980820869889651e-06, "loss": 0.3635, "step": 16372 }, { "epoch": 0.8835464896659652, "grad_norm": 0.9348341114302332, "learning_rate": 1.2978091072660884e-06, "loss": 0.417, "step": 16373 }, { "epoch": 0.8836004532944796, "grad_norm": 0.8995182468958178, "learning_rate": 1.2975362483184239e-06, "loss": 0.4144, "step": 16374 }, { "epoch": 0.883654416922994, "grad_norm": 1.1337785856229525, "learning_rate": 1.2972635101538142e-06, "loss": 0.4758, "step": 16375 }, { "epoch": 0.8837083805515082, "grad_norm": 0.8951134363350084, "learning_rate": 1.296990892780098e-06, "loss": 0.4646, "step": 16376 }, { "epoch": 0.8837623441800226, "grad_norm": 0.9840971984922269, "learning_rate": 1.2967183962051097e-06, "loss": 0.5684, "step": 16377 }, { "epoch": 0.883816307808537, "grad_norm": 0.8809038442374075, "learning_rate": 1.296446020436682e-06, "loss": 0.3909, "step": 16378 }, { "epoch": 0.8838702714370514, "grad_norm": 1.088991111222965, "learning_rate": 1.2961737654826432e-06, "loss": 0.3985, "step": 16379 }, { "epoch": 0.8839242350655658, "grad_norm": 0.8516374219043824, "learning_rate": 1.2959016313508178e-06, "loss": 0.4043, "step": 16380 }, { "epoch": 0.8839781986940802, "grad_norm": 0.7360247716777377, "learning_rate": 1.2956296180490278e-06, "loss": 0.2913, "step": 16381 }, { "epoch": 0.8840321623225945, "grad_norm": 1.0017739607996576, "learning_rate": 1.2953577255850907e-06, "loss": 0.4822, "step": 16382 }, { "epoch": 0.8840861259511089, "grad_norm": 0.831955594760269, "learning_rate": 1.2950859539668208e-06, "loss": 0.2627, "step": 16383 }, { "epoch": 0.8841400895796233, "grad_norm": 1.4616939205791322, "learning_rate": 1.2948143032020296e-06, "loss": 0.6098, "step": 16384 }, { "epoch": 0.8841940532081377, "grad_norm": 1.0555503936414146, "learning_rate": 1.2945427732985242e-06, "loss": 0.4732, "step": 16385 }, { "epoch": 0.8842480168366521, "grad_norm": 1.0962561779684534, "learning_rate": 1.2942713642641086e-06, "loss": 0.4363, "step": 16386 }, { "epoch": 0.8843019804651665, "grad_norm": 1.0287623960233445, "learning_rate": 1.2940000761065837e-06, "loss": 0.3768, "step": 16387 }, { "epoch": 0.8843559440936809, "grad_norm": 0.8100250947921226, "learning_rate": 1.2937289088337464e-06, "loss": 0.2968, "step": 16388 }, { "epoch": 0.8844099077221952, "grad_norm": 0.9334458354411618, "learning_rate": 1.2934578624533913e-06, "loss": 0.4371, "step": 16389 }, { "epoch": 0.8844638713507096, "grad_norm": 1.038967261868459, "learning_rate": 1.2931869369733065e-06, "loss": 0.5252, "step": 16390 }, { "epoch": 0.884517834979224, "grad_norm": 1.079875836918772, "learning_rate": 1.2929161324012804e-06, "loss": 0.4523, "step": 16391 }, { "epoch": 0.8845717986077384, "grad_norm": 1.0253542262590924, "learning_rate": 1.2926454487450953e-06, "loss": 0.3709, "step": 16392 }, { "epoch": 0.8846257622362528, "grad_norm": 0.9320960900673885, "learning_rate": 1.2923748860125313e-06, "loss": 0.3165, "step": 16393 }, { "epoch": 0.8846797258647672, "grad_norm": 1.2605177479763339, "learning_rate": 1.2921044442113647e-06, "loss": 0.5414, "step": 16394 }, { "epoch": 0.8847336894932816, "grad_norm": 1.081987345831397, "learning_rate": 1.2918341233493675e-06, "loss": 0.4982, "step": 16395 }, { "epoch": 0.8847876531217959, "grad_norm": 0.9654210339201181, "learning_rate": 1.291563923434311e-06, "loss": 0.3622, "step": 16396 }, { "epoch": 0.8848416167503103, "grad_norm": 1.1419374959863091, "learning_rate": 1.2912938444739587e-06, "loss": 0.6342, "step": 16397 }, { "epoch": 0.8848955803788247, "grad_norm": 0.8093493129835441, "learning_rate": 1.291023886476074e-06, "loss": 0.2975, "step": 16398 }, { "epoch": 0.884949544007339, "grad_norm": 1.20709070793282, "learning_rate": 1.2907540494484155e-06, "loss": 0.5039, "step": 16399 }, { "epoch": 0.8850035076358534, "grad_norm": 1.166165220879286, "learning_rate": 1.290484333398739e-06, "loss": 0.4084, "step": 16400 }, { "epoch": 0.8850574712643678, "grad_norm": 0.8835260819370534, "learning_rate": 1.290214738334796e-06, "loss": 0.4651, "step": 16401 }, { "epoch": 0.8851114348928822, "grad_norm": 0.7861476195240932, "learning_rate": 1.289945264264335e-06, "loss": 0.3078, "step": 16402 }, { "epoch": 0.8851653985213965, "grad_norm": 1.0378189422021185, "learning_rate": 1.2896759111951018e-06, "loss": 0.4216, "step": 16403 }, { "epoch": 0.8852193621499109, "grad_norm": 1.0287793796246105, "learning_rate": 1.2894066791348365e-06, "loss": 0.4273, "step": 16404 }, { "epoch": 0.8852733257784253, "grad_norm": 0.8964218092818278, "learning_rate": 1.2891375680912773e-06, "loss": 0.3087, "step": 16405 }, { "epoch": 0.8853272894069397, "grad_norm": 1.1856203571411077, "learning_rate": 1.2888685780721602e-06, "loss": 0.4933, "step": 16406 }, { "epoch": 0.8853812530354541, "grad_norm": 1.1615505767755219, "learning_rate": 1.2885997090852146e-06, "loss": 0.5066, "step": 16407 }, { "epoch": 0.8854352166639685, "grad_norm": 1.073095644857882, "learning_rate": 1.2883309611381686e-06, "loss": 0.3942, "step": 16408 }, { "epoch": 0.8854891802924829, "grad_norm": 0.92725053438267, "learning_rate": 1.2880623342387461e-06, "loss": 0.3924, "step": 16409 }, { "epoch": 0.8855431439209972, "grad_norm": 1.1362806845785176, "learning_rate": 1.2877938283946685e-06, "loss": 0.5555, "step": 16410 }, { "epoch": 0.8855971075495116, "grad_norm": 1.0100703540242997, "learning_rate": 1.2875254436136523e-06, "loss": 0.4284, "step": 16411 }, { "epoch": 0.885651071178026, "grad_norm": 1.107278794918129, "learning_rate": 1.287257179903411e-06, "loss": 0.4864, "step": 16412 }, { "epoch": 0.8857050348065404, "grad_norm": 1.2299812254296707, "learning_rate": 1.2869890372716554e-06, "loss": 0.5976, "step": 16413 }, { "epoch": 0.8857589984350548, "grad_norm": 1.2670854198039954, "learning_rate": 1.2867210157260917e-06, "loss": 0.5084, "step": 16414 }, { "epoch": 0.8858129620635692, "grad_norm": 0.7505876168584866, "learning_rate": 1.2864531152744232e-06, "loss": 0.2983, "step": 16415 }, { "epoch": 0.8858669256920836, "grad_norm": 0.9863252537314471, "learning_rate": 1.2861853359243492e-06, "loss": 0.3659, "step": 16416 }, { "epoch": 0.8859208893205979, "grad_norm": 0.9810064654805469, "learning_rate": 1.2859176776835671e-06, "loss": 0.4477, "step": 16417 }, { "epoch": 0.8859748529491123, "grad_norm": 1.041505178014188, "learning_rate": 1.2856501405597683e-06, "loss": 0.5491, "step": 16418 }, { "epoch": 0.8860288165776267, "grad_norm": 0.9557228680251226, "learning_rate": 1.2853827245606435e-06, "loss": 0.5254, "step": 16419 }, { "epoch": 0.8860827802061411, "grad_norm": 1.2908854821482454, "learning_rate": 1.285115429693877e-06, "loss": 0.5596, "step": 16420 }, { "epoch": 0.8861367438346555, "grad_norm": 1.2914466202607622, "learning_rate": 1.2848482559671522e-06, "loss": 0.6656, "step": 16421 }, { "epoch": 0.8861907074631699, "grad_norm": 1.104565974705371, "learning_rate": 1.2845812033881478e-06, "loss": 0.3877, "step": 16422 }, { "epoch": 0.8862446710916843, "grad_norm": 1.0328896805146894, "learning_rate": 1.2843142719645381e-06, "loss": 0.4138, "step": 16423 }, { "epoch": 0.8862986347201985, "grad_norm": 0.8151965530529152, "learning_rate": 1.2840474617039977e-06, "loss": 0.3329, "step": 16424 }, { "epoch": 0.8863525983487129, "grad_norm": 1.0709161453888374, "learning_rate": 1.2837807726141919e-06, "loss": 0.4615, "step": 16425 }, { "epoch": 0.8864065619772273, "grad_norm": 1.060571317968483, "learning_rate": 1.2835142047027868e-06, "loss": 0.5468, "step": 16426 }, { "epoch": 0.8864605256057417, "grad_norm": 1.0674326013600595, "learning_rate": 1.283247757977444e-06, "loss": 0.525, "step": 16427 }, { "epoch": 0.8865144892342561, "grad_norm": 0.9698898416388564, "learning_rate": 1.2829814324458214e-06, "loss": 0.4013, "step": 16428 }, { "epoch": 0.8865684528627705, "grad_norm": 1.0769489003217798, "learning_rate": 1.2827152281155736e-06, "loss": 0.3654, "step": 16429 }, { "epoch": 0.8866224164912849, "grad_norm": 1.0490606327606042, "learning_rate": 1.2824491449943512e-06, "loss": 0.5048, "step": 16430 }, { "epoch": 0.8866763801197992, "grad_norm": 1.0899988682707076, "learning_rate": 1.2821831830898023e-06, "loss": 0.4952, "step": 16431 }, { "epoch": 0.8867303437483136, "grad_norm": 0.8887075321321516, "learning_rate": 1.2819173424095705e-06, "loss": 0.4443, "step": 16432 }, { "epoch": 0.886784307376828, "grad_norm": 1.2079843910976504, "learning_rate": 1.281651622961296e-06, "loss": 0.6927, "step": 16433 }, { "epoch": 0.8868382710053424, "grad_norm": 0.8972898506224976, "learning_rate": 1.2813860247526163e-06, "loss": 0.5287, "step": 16434 }, { "epoch": 0.8868922346338568, "grad_norm": 1.0506842897137345, "learning_rate": 1.2811205477911648e-06, "loss": 0.4441, "step": 16435 }, { "epoch": 0.8869461982623712, "grad_norm": 1.002325114802535, "learning_rate": 1.2808551920845713e-06, "loss": 0.4567, "step": 16436 }, { "epoch": 0.8870001618908856, "grad_norm": 0.9860623277686188, "learning_rate": 1.280589957640463e-06, "loss": 0.4985, "step": 16437 }, { "epoch": 0.8870541255193999, "grad_norm": 1.0565656584899619, "learning_rate": 1.2803248444664631e-06, "loss": 0.4944, "step": 16438 }, { "epoch": 0.8871080891479143, "grad_norm": 1.0295451897352184, "learning_rate": 1.2800598525701905e-06, "loss": 0.407, "step": 16439 }, { "epoch": 0.8871620527764287, "grad_norm": 1.004366283270853, "learning_rate": 1.2797949819592618e-06, "loss": 0.4054, "step": 16440 }, { "epoch": 0.8872160164049431, "grad_norm": 1.0213623081127725, "learning_rate": 1.2795302326412892e-06, "loss": 0.4755, "step": 16441 }, { "epoch": 0.8872699800334575, "grad_norm": 1.1541480890476314, "learning_rate": 1.2792656046238823e-06, "loss": 0.5604, "step": 16442 }, { "epoch": 0.8873239436619719, "grad_norm": 0.8456243326759948, "learning_rate": 1.2790010979146467e-06, "loss": 0.3824, "step": 16443 }, { "epoch": 0.8873779072904863, "grad_norm": 1.240206881450869, "learning_rate": 1.2787367125211844e-06, "loss": 0.4309, "step": 16444 }, { "epoch": 0.8874318709190006, "grad_norm": 0.8075517665117974, "learning_rate": 1.2784724484510952e-06, "loss": 0.333, "step": 16445 }, { "epoch": 0.887485834547515, "grad_norm": 0.9864276806200334, "learning_rate": 1.2782083057119723e-06, "loss": 0.4129, "step": 16446 }, { "epoch": 0.8875397981760293, "grad_norm": 0.9795681691921394, "learning_rate": 1.2779442843114087e-06, "loss": 0.4754, "step": 16447 }, { "epoch": 0.8875937618045437, "grad_norm": 1.290433594121106, "learning_rate": 1.2776803842569924e-06, "loss": 0.5115, "step": 16448 }, { "epoch": 0.8876477254330581, "grad_norm": 0.9854478626227076, "learning_rate": 1.2774166055563084e-06, "loss": 0.537, "step": 16449 }, { "epoch": 0.8877016890615725, "grad_norm": 0.7989539072069418, "learning_rate": 1.2771529482169379e-06, "loss": 0.3242, "step": 16450 }, { "epoch": 0.8877556526900868, "grad_norm": 0.9835767919956414, "learning_rate": 1.2768894122464588e-06, "loss": 0.4498, "step": 16451 }, { "epoch": 0.8878096163186012, "grad_norm": 0.8525160141914656, "learning_rate": 1.2766259976524444e-06, "loss": 0.3453, "step": 16452 }, { "epoch": 0.8878635799471156, "grad_norm": 0.8189167659545582, "learning_rate": 1.2763627044424665e-06, "loss": 0.3584, "step": 16453 }, { "epoch": 0.88791754357563, "grad_norm": 0.9919440461034471, "learning_rate": 1.2760995326240923e-06, "loss": 0.457, "step": 16454 }, { "epoch": 0.8879715072041444, "grad_norm": 0.9519144645747675, "learning_rate": 1.2758364822048855e-06, "loss": 0.416, "step": 16455 }, { "epoch": 0.8880254708326588, "grad_norm": 1.0663090938568633, "learning_rate": 1.2755735531924063e-06, "loss": 0.4505, "step": 16456 }, { "epoch": 0.8880794344611732, "grad_norm": 1.051992675941245, "learning_rate": 1.275310745594211e-06, "loss": 0.5079, "step": 16457 }, { "epoch": 0.8881333980896875, "grad_norm": 0.9160025602100328, "learning_rate": 1.275048059417855e-06, "loss": 0.4837, "step": 16458 }, { "epoch": 0.8881873617182019, "grad_norm": 1.0354868373294255, "learning_rate": 1.274785494670886e-06, "loss": 0.5487, "step": 16459 }, { "epoch": 0.8882413253467163, "grad_norm": 0.8962256058130614, "learning_rate": 1.2745230513608512e-06, "loss": 0.3515, "step": 16460 }, { "epoch": 0.8882952889752307, "grad_norm": 1.1315919440814448, "learning_rate": 1.2742607294952936e-06, "loss": 0.5272, "step": 16461 }, { "epoch": 0.8883492526037451, "grad_norm": 0.9438513699188169, "learning_rate": 1.2739985290817519e-06, "loss": 0.3895, "step": 16462 }, { "epoch": 0.8884032162322595, "grad_norm": 1.1674968190840456, "learning_rate": 1.2737364501277627e-06, "loss": 0.623, "step": 16463 }, { "epoch": 0.8884571798607739, "grad_norm": 1.0396420498317855, "learning_rate": 1.2734744926408585e-06, "loss": 0.5119, "step": 16464 }, { "epoch": 0.8885111434892882, "grad_norm": 1.1078827166579142, "learning_rate": 1.2732126566285676e-06, "loss": 0.4782, "step": 16465 }, { "epoch": 0.8885651071178026, "grad_norm": 1.0856388166542545, "learning_rate": 1.272950942098416e-06, "loss": 0.5306, "step": 16466 }, { "epoch": 0.888619070746317, "grad_norm": 0.8935620009613143, "learning_rate": 1.2726893490579253e-06, "loss": 0.3611, "step": 16467 }, { "epoch": 0.8886730343748314, "grad_norm": 0.8170034191561761, "learning_rate": 1.2724278775146143e-06, "loss": 0.2925, "step": 16468 }, { "epoch": 0.8887269980033458, "grad_norm": 0.8609069150892673, "learning_rate": 1.2721665274759979e-06, "loss": 0.3795, "step": 16469 }, { "epoch": 0.8887809616318602, "grad_norm": 1.009447510559249, "learning_rate": 1.271905298949587e-06, "loss": 0.4837, "step": 16470 }, { "epoch": 0.8888349252603746, "grad_norm": 0.7341122017535853, "learning_rate": 1.2716441919428903e-06, "loss": 0.3017, "step": 16471 }, { "epoch": 0.8888888888888888, "grad_norm": 1.1207995725947584, "learning_rate": 1.2713832064634127e-06, "loss": 0.4981, "step": 16472 }, { "epoch": 0.8889428525174032, "grad_norm": 1.0568906055695508, "learning_rate": 1.2711223425186539e-06, "loss": 0.4212, "step": 16473 }, { "epoch": 0.8889968161459176, "grad_norm": 0.7845839562116749, "learning_rate": 1.270861600116111e-06, "loss": 0.3198, "step": 16474 }, { "epoch": 0.889050779774432, "grad_norm": 0.7979085036851493, "learning_rate": 1.2706009792632799e-06, "loss": 0.3025, "step": 16475 }, { "epoch": 0.8891047434029464, "grad_norm": 1.0221419998354604, "learning_rate": 1.2703404799676497e-06, "loss": 0.4756, "step": 16476 }, { "epoch": 0.8891587070314608, "grad_norm": 1.2026524847805997, "learning_rate": 1.270080102236708e-06, "loss": 0.5554, "step": 16477 }, { "epoch": 0.8892126706599752, "grad_norm": 0.9589015940299753, "learning_rate": 1.2698198460779388e-06, "loss": 0.4472, "step": 16478 }, { "epoch": 0.8892666342884895, "grad_norm": 0.9712733035366702, "learning_rate": 1.2695597114988213e-06, "loss": 0.4188, "step": 16479 }, { "epoch": 0.8893205979170039, "grad_norm": 1.0225060043070002, "learning_rate": 1.2692996985068317e-06, "loss": 0.4887, "step": 16480 }, { "epoch": 0.8893745615455183, "grad_norm": 1.0844642834987794, "learning_rate": 1.269039807109444e-06, "loss": 0.4168, "step": 16481 }, { "epoch": 0.8894285251740327, "grad_norm": 0.7601127380542876, "learning_rate": 1.2687800373141272e-06, "loss": 0.2665, "step": 16482 }, { "epoch": 0.8894824888025471, "grad_norm": 1.2674125123268303, "learning_rate": 1.2685203891283473e-06, "loss": 0.5738, "step": 16483 }, { "epoch": 0.8895364524310615, "grad_norm": 0.8785650203446898, "learning_rate": 1.2682608625595668e-06, "loss": 0.316, "step": 16484 }, { "epoch": 0.8895904160595759, "grad_norm": 0.7980527998176535, "learning_rate": 1.2680014576152452e-06, "loss": 0.2798, "step": 16485 }, { "epoch": 0.8896443796880902, "grad_norm": 1.011726549358425, "learning_rate": 1.2677421743028385e-06, "loss": 0.5324, "step": 16486 }, { "epoch": 0.8896983433166046, "grad_norm": 1.1770644150639689, "learning_rate": 1.2674830126297973e-06, "loss": 0.6624, "step": 16487 }, { "epoch": 0.889752306945119, "grad_norm": 0.9616074781754076, "learning_rate": 1.2672239726035712e-06, "loss": 0.3578, "step": 16488 }, { "epoch": 0.8898062705736334, "grad_norm": 1.0117915401309545, "learning_rate": 1.2669650542316048e-06, "loss": 0.4478, "step": 16489 }, { "epoch": 0.8898602342021478, "grad_norm": 1.1999455862455493, "learning_rate": 1.2667062575213403e-06, "loss": 0.4966, "step": 16490 }, { "epoch": 0.8899141978306622, "grad_norm": 1.0242588006543893, "learning_rate": 1.2664475824802152e-06, "loss": 0.3755, "step": 16491 }, { "epoch": 0.8899681614591766, "grad_norm": 1.105176615139589, "learning_rate": 1.266189029115664e-06, "loss": 0.5054, "step": 16492 }, { "epoch": 0.8900221250876909, "grad_norm": 0.8907792860475334, "learning_rate": 1.265930597435119e-06, "loss": 0.3603, "step": 16493 }, { "epoch": 0.8900760887162052, "grad_norm": 1.0198933135094712, "learning_rate": 1.2656722874460062e-06, "loss": 0.497, "step": 16494 }, { "epoch": 0.8901300523447196, "grad_norm": 1.044316266920108, "learning_rate": 1.26541409915575e-06, "loss": 0.4672, "step": 16495 }, { "epoch": 0.890184015973234, "grad_norm": 0.8473874034473863, "learning_rate": 1.2651560325717717e-06, "loss": 0.2748, "step": 16496 }, { "epoch": 0.8902379796017484, "grad_norm": 0.9419617667005832, "learning_rate": 1.264898087701488e-06, "loss": 0.3286, "step": 16497 }, { "epoch": 0.8902919432302628, "grad_norm": 1.308104805888339, "learning_rate": 1.2646402645523124e-06, "loss": 0.5435, "step": 16498 }, { "epoch": 0.8903459068587772, "grad_norm": 0.9358306876718675, "learning_rate": 1.264382563131655e-06, "loss": 0.4782, "step": 16499 }, { "epoch": 0.8903998704872915, "grad_norm": 1.084478990800832, "learning_rate": 1.2641249834469236e-06, "loss": 0.5039, "step": 16500 }, { "epoch": 0.8903998704872915, "eval_loss": 0.5293381810188293, "eval_runtime": 163.501, "eval_samples_per_second": 21.034, "eval_steps_per_second": 0.881, "step": 16500 }, { "epoch": 0.8904538341158059, "grad_norm": 1.0217530888648618, "learning_rate": 1.263867525505519e-06, "loss": 0.3723, "step": 16501 }, { "epoch": 0.8905077977443203, "grad_norm": 0.8472186345214957, "learning_rate": 1.2636101893148422e-06, "loss": 0.3893, "step": 16502 }, { "epoch": 0.8905617613728347, "grad_norm": 1.2526475131335866, "learning_rate": 1.2633529748822894e-06, "loss": 0.6571, "step": 16503 }, { "epoch": 0.8906157250013491, "grad_norm": 1.302715439291715, "learning_rate": 1.2630958822152526e-06, "loss": 0.5258, "step": 16504 }, { "epoch": 0.8906696886298635, "grad_norm": 0.9318817368330088, "learning_rate": 1.2628389113211214e-06, "loss": 0.4626, "step": 16505 }, { "epoch": 0.8907236522583779, "grad_norm": 0.974352124059338, "learning_rate": 1.2625820622072808e-06, "loss": 0.3899, "step": 16506 }, { "epoch": 0.8907776158868922, "grad_norm": 1.0275919221901608, "learning_rate": 1.2623253348811143e-06, "loss": 0.5347, "step": 16507 }, { "epoch": 0.8908315795154066, "grad_norm": 0.9639482680189808, "learning_rate": 1.2620687293499992e-06, "loss": 0.4032, "step": 16508 }, { "epoch": 0.890885543143921, "grad_norm": 1.1985002830238212, "learning_rate": 1.2618122456213105e-06, "loss": 0.5555, "step": 16509 }, { "epoch": 0.8909395067724354, "grad_norm": 0.9135884283006279, "learning_rate": 1.2615558837024204e-06, "loss": 0.4077, "step": 16510 }, { "epoch": 0.8909934704009498, "grad_norm": 1.0172487447063896, "learning_rate": 1.2612996436006969e-06, "loss": 0.4663, "step": 16511 }, { "epoch": 0.8910474340294642, "grad_norm": 1.0748884954276077, "learning_rate": 1.2610435253235042e-06, "loss": 0.4069, "step": 16512 }, { "epoch": 0.8911013976579786, "grad_norm": 1.1684402971671777, "learning_rate": 1.2607875288782043e-06, "loss": 0.5033, "step": 16513 }, { "epoch": 0.8911553612864929, "grad_norm": 0.966950269708176, "learning_rate": 1.2605316542721541e-06, "loss": 0.3671, "step": 16514 }, { "epoch": 0.8912093249150073, "grad_norm": 1.3241290628388032, "learning_rate": 1.2602759015127075e-06, "loss": 0.5862, "step": 16515 }, { "epoch": 0.8912632885435217, "grad_norm": 1.1703286419328354, "learning_rate": 1.2600202706072157e-06, "loss": 0.6544, "step": 16516 }, { "epoch": 0.8913172521720361, "grad_norm": 1.138413146711598, "learning_rate": 1.259764761563025e-06, "loss": 0.4253, "step": 16517 }, { "epoch": 0.8913712158005505, "grad_norm": 1.0627597472587622, "learning_rate": 1.2595093743874804e-06, "loss": 0.398, "step": 16518 }, { "epoch": 0.8914251794290649, "grad_norm": 0.9108117835236497, "learning_rate": 1.2592541090879204e-06, "loss": 0.4787, "step": 16519 }, { "epoch": 0.8914791430575791, "grad_norm": 1.200334765692928, "learning_rate": 1.2589989656716828e-06, "loss": 0.6281, "step": 16520 }, { "epoch": 0.8915331066860935, "grad_norm": 0.994082617632326, "learning_rate": 1.2587439441460997e-06, "loss": 0.421, "step": 16521 }, { "epoch": 0.8915870703146079, "grad_norm": 1.1961546795934537, "learning_rate": 1.2584890445185017e-06, "loss": 0.6503, "step": 16522 }, { "epoch": 0.8916410339431223, "grad_norm": 1.0001549706647135, "learning_rate": 1.2582342667962136e-06, "loss": 0.4951, "step": 16523 }, { "epoch": 0.8916949975716367, "grad_norm": 1.0615005433632017, "learning_rate": 1.2579796109865586e-06, "loss": 0.4716, "step": 16524 }, { "epoch": 0.8917489612001511, "grad_norm": 0.8826479559599854, "learning_rate": 1.2577250770968564e-06, "loss": 0.3994, "step": 16525 }, { "epoch": 0.8918029248286655, "grad_norm": 1.215901497263955, "learning_rate": 1.2574706651344218e-06, "loss": 0.5102, "step": 16526 }, { "epoch": 0.8918568884571798, "grad_norm": 0.9888895200018744, "learning_rate": 1.2572163751065671e-06, "loss": 0.4634, "step": 16527 }, { "epoch": 0.8919108520856942, "grad_norm": 1.118666577379255, "learning_rate": 1.2569622070206005e-06, "loss": 0.4976, "step": 16528 }, { "epoch": 0.8919648157142086, "grad_norm": 1.1091448515902338, "learning_rate": 1.256708160883828e-06, "loss": 0.449, "step": 16529 }, { "epoch": 0.892018779342723, "grad_norm": 1.108640379548791, "learning_rate": 1.2564542367035502e-06, "loss": 0.4259, "step": 16530 }, { "epoch": 0.8920727429712374, "grad_norm": 1.1091838449648266, "learning_rate": 1.256200434487065e-06, "loss": 0.3998, "step": 16531 }, { "epoch": 0.8921267065997518, "grad_norm": 1.0176849206478824, "learning_rate": 1.2559467542416673e-06, "loss": 0.5137, "step": 16532 }, { "epoch": 0.8921806702282662, "grad_norm": 1.203340161308398, "learning_rate": 1.2556931959746485e-06, "loss": 0.4936, "step": 16533 }, { "epoch": 0.8922346338567805, "grad_norm": 0.8327366562798605, "learning_rate": 1.2554397596932963e-06, "loss": 0.3888, "step": 16534 }, { "epoch": 0.8922885974852949, "grad_norm": 4.393572328472059, "learning_rate": 1.2551864454048935e-06, "loss": 0.3703, "step": 16535 }, { "epoch": 0.8923425611138093, "grad_norm": 0.8959613009695632, "learning_rate": 1.2549332531167216e-06, "loss": 0.409, "step": 16536 }, { "epoch": 0.8923965247423237, "grad_norm": 0.9148845356521775, "learning_rate": 1.2546801828360572e-06, "loss": 0.4549, "step": 16537 }, { "epoch": 0.8924504883708381, "grad_norm": 1.0401684755893603, "learning_rate": 1.2544272345701742e-06, "loss": 0.5105, "step": 16538 }, { "epoch": 0.8925044519993525, "grad_norm": 1.055691724688839, "learning_rate": 1.2541744083263422e-06, "loss": 0.5184, "step": 16539 }, { "epoch": 0.8925584156278669, "grad_norm": 1.108073633361213, "learning_rate": 1.2539217041118275e-06, "loss": 0.6506, "step": 16540 }, { "epoch": 0.8926123792563811, "grad_norm": 1.1510604963061541, "learning_rate": 1.253669121933894e-06, "loss": 0.5981, "step": 16541 }, { "epoch": 0.8926663428848955, "grad_norm": 1.1748632438433584, "learning_rate": 1.2534166617998004e-06, "loss": 0.4788, "step": 16542 }, { "epoch": 0.89272030651341, "grad_norm": 0.9486444859560507, "learning_rate": 1.253164323716802e-06, "loss": 0.4692, "step": 16543 }, { "epoch": 0.8927742701419243, "grad_norm": 1.060085343321271, "learning_rate": 1.2529121076921526e-06, "loss": 0.565, "step": 16544 }, { "epoch": 0.8928282337704387, "grad_norm": 1.107667743922369, "learning_rate": 1.2526600137331007e-06, "loss": 0.5324, "step": 16545 }, { "epoch": 0.8928821973989531, "grad_norm": 1.0208831219249432, "learning_rate": 1.2524080418468914e-06, "loss": 0.4214, "step": 16546 }, { "epoch": 0.8929361610274675, "grad_norm": 1.0174501093969073, "learning_rate": 1.252156192040767e-06, "loss": 0.5105, "step": 16547 }, { "epoch": 0.8929901246559818, "grad_norm": 0.9869706570967811, "learning_rate": 1.2519044643219662e-06, "loss": 0.4753, "step": 16548 }, { "epoch": 0.8930440882844962, "grad_norm": 0.8215668508517365, "learning_rate": 1.2516528586977232e-06, "loss": 0.3511, "step": 16549 }, { "epoch": 0.8930980519130106, "grad_norm": 1.3135463816799575, "learning_rate": 1.2514013751752695e-06, "loss": 0.6137, "step": 16550 }, { "epoch": 0.893152015541525, "grad_norm": 0.9959825424413834, "learning_rate": 1.2511500137618329e-06, "loss": 0.4249, "step": 16551 }, { "epoch": 0.8932059791700394, "grad_norm": 1.0272823321431281, "learning_rate": 1.2508987744646384e-06, "loss": 0.4823, "step": 16552 }, { "epoch": 0.8932599427985538, "grad_norm": 0.9258018896104172, "learning_rate": 1.2506476572909062e-06, "loss": 0.3954, "step": 16553 }, { "epoch": 0.8933139064270682, "grad_norm": 0.7578424444688344, "learning_rate": 1.2503966622478542e-06, "loss": 0.322, "step": 16554 }, { "epoch": 0.8933678700555825, "grad_norm": 1.0080123328317325, "learning_rate": 1.250145789342697e-06, "loss": 0.4534, "step": 16555 }, { "epoch": 0.8934218336840969, "grad_norm": 0.9650576438984382, "learning_rate": 1.2498950385826427e-06, "loss": 0.4054, "step": 16556 }, { "epoch": 0.8934757973126113, "grad_norm": 0.8483603364968832, "learning_rate": 1.2496444099748997e-06, "loss": 0.3958, "step": 16557 }, { "epoch": 0.8935297609411257, "grad_norm": 0.8181061791071963, "learning_rate": 1.2493939035266712e-06, "loss": 0.3266, "step": 16558 }, { "epoch": 0.8935837245696401, "grad_norm": 1.0075346426934482, "learning_rate": 1.249143519245157e-06, "loss": 0.6376, "step": 16559 }, { "epoch": 0.8936376881981545, "grad_norm": 1.036891000265129, "learning_rate": 1.2488932571375527e-06, "loss": 0.6937, "step": 16560 }, { "epoch": 0.8936916518266689, "grad_norm": 0.9519098701554323, "learning_rate": 1.2486431172110516e-06, "loss": 0.4522, "step": 16561 }, { "epoch": 0.8937456154551832, "grad_norm": 1.170704134399399, "learning_rate": 1.248393099472843e-06, "loss": 0.5588, "step": 16562 }, { "epoch": 0.8937995790836976, "grad_norm": 0.9444481580915027, "learning_rate": 1.2481432039301134e-06, "loss": 0.3761, "step": 16563 }, { "epoch": 0.893853542712212, "grad_norm": 1.2190771469882187, "learning_rate": 1.2478934305900445e-06, "loss": 0.5295, "step": 16564 }, { "epoch": 0.8939075063407264, "grad_norm": 1.179284402695383, "learning_rate": 1.247643779459814e-06, "loss": 0.35, "step": 16565 }, { "epoch": 0.8939614699692408, "grad_norm": 0.9252540307693538, "learning_rate": 1.2473942505465978e-06, "loss": 0.4644, "step": 16566 }, { "epoch": 0.8940154335977551, "grad_norm": 1.0768920642088355, "learning_rate": 1.2471448438575682e-06, "loss": 0.4941, "step": 16567 }, { "epoch": 0.8940693972262695, "grad_norm": 1.0970095163635307, "learning_rate": 1.246895559399893e-06, "loss": 0.4218, "step": 16568 }, { "epoch": 0.8941233608547838, "grad_norm": 1.0444501113468312, "learning_rate": 1.2466463971807368e-06, "loss": 0.4468, "step": 16569 }, { "epoch": 0.8941773244832982, "grad_norm": 1.2698907470415834, "learning_rate": 1.2463973572072604e-06, "loss": 0.5248, "step": 16570 }, { "epoch": 0.8942312881118126, "grad_norm": 1.1076601204940808, "learning_rate": 1.2461484394866224e-06, "loss": 0.5472, "step": 16571 }, { "epoch": 0.894285251740327, "grad_norm": 1.0560689343137104, "learning_rate": 1.2458996440259766e-06, "loss": 0.4427, "step": 16572 }, { "epoch": 0.8943392153688414, "grad_norm": 1.0054769613027383, "learning_rate": 1.2456509708324733e-06, "loss": 0.3454, "step": 16573 }, { "epoch": 0.8943931789973558, "grad_norm": 1.359604787302321, "learning_rate": 1.2454024199132594e-06, "loss": 0.4874, "step": 16574 }, { "epoch": 0.8944471426258702, "grad_norm": 0.834195320309321, "learning_rate": 1.2451539912754795e-06, "loss": 0.282, "step": 16575 }, { "epoch": 0.8945011062543845, "grad_norm": 1.0379056513969505, "learning_rate": 1.2449056849262735e-06, "loss": 0.4906, "step": 16576 }, { "epoch": 0.8945550698828989, "grad_norm": 1.0311886052535417, "learning_rate": 1.2446575008727765e-06, "loss": 0.4416, "step": 16577 }, { "epoch": 0.8946090335114133, "grad_norm": 0.9215855354907386, "learning_rate": 1.2444094391221235e-06, "loss": 0.3505, "step": 16578 }, { "epoch": 0.8946629971399277, "grad_norm": 1.17542271939718, "learning_rate": 1.2441614996814425e-06, "loss": 0.5333, "step": 16579 }, { "epoch": 0.8947169607684421, "grad_norm": 0.8544448824884102, "learning_rate": 1.2439136825578605e-06, "loss": 0.3386, "step": 16580 }, { "epoch": 0.8947709243969565, "grad_norm": 1.1827145759065303, "learning_rate": 1.2436659877584998e-06, "loss": 0.4428, "step": 16581 }, { "epoch": 0.8948248880254709, "grad_norm": 0.8889807992430249, "learning_rate": 1.2434184152904792e-06, "loss": 0.3553, "step": 16582 }, { "epoch": 0.8948788516539852, "grad_norm": 0.910967540231939, "learning_rate": 1.2431709651609147e-06, "loss": 0.3701, "step": 16583 }, { "epoch": 0.8949328152824996, "grad_norm": 1.2303874609595469, "learning_rate": 1.2429236373769175e-06, "loss": 0.5456, "step": 16584 }, { "epoch": 0.894986778911014, "grad_norm": 0.9799532161018254, "learning_rate": 1.2426764319455964e-06, "loss": 0.4361, "step": 16585 }, { "epoch": 0.8950407425395284, "grad_norm": 1.1075065324683198, "learning_rate": 1.2424293488740564e-06, "loss": 0.4961, "step": 16586 }, { "epoch": 0.8950947061680428, "grad_norm": 1.0057056562968245, "learning_rate": 1.2421823881693988e-06, "loss": 0.4948, "step": 16587 }, { "epoch": 0.8951486697965572, "grad_norm": 1.0257909555267728, "learning_rate": 1.2419355498387215e-06, "loss": 0.5134, "step": 16588 }, { "epoch": 0.8952026334250714, "grad_norm": 1.110669033244554, "learning_rate": 1.2416888338891198e-06, "loss": 0.518, "step": 16589 }, { "epoch": 0.8952565970535858, "grad_norm": 1.0501869037450313, "learning_rate": 1.241442240327683e-06, "loss": 0.4213, "step": 16590 }, { "epoch": 0.8953105606821002, "grad_norm": 0.9824561878084013, "learning_rate": 1.241195769161499e-06, "loss": 0.4979, "step": 16591 }, { "epoch": 0.8953645243106146, "grad_norm": 1.2188297950057785, "learning_rate": 1.2409494203976523e-06, "loss": 0.5692, "step": 16592 }, { "epoch": 0.895418487939129, "grad_norm": 1.2422629455908751, "learning_rate": 1.2407031940432222e-06, "loss": 0.5979, "step": 16593 }, { "epoch": 0.8954724515676434, "grad_norm": 1.0726866733247764, "learning_rate": 1.240457090105286e-06, "loss": 0.4447, "step": 16594 }, { "epoch": 0.8955264151961578, "grad_norm": 0.9095784964048158, "learning_rate": 1.2402111085909171e-06, "loss": 0.3662, "step": 16595 }, { "epoch": 0.8955803788246721, "grad_norm": 1.193822793723948, "learning_rate": 1.239965249507186e-06, "loss": 0.5798, "step": 16596 }, { "epoch": 0.8956343424531865, "grad_norm": 0.8376405544396237, "learning_rate": 1.2397195128611577e-06, "loss": 0.4392, "step": 16597 }, { "epoch": 0.8956883060817009, "grad_norm": 0.8780492847457859, "learning_rate": 1.2394738986598947e-06, "loss": 0.3965, "step": 16598 }, { "epoch": 0.8957422697102153, "grad_norm": 1.1744904278538202, "learning_rate": 1.2392284069104567e-06, "loss": 0.5452, "step": 16599 }, { "epoch": 0.8957962333387297, "grad_norm": 1.0342346237573024, "learning_rate": 1.2389830376199001e-06, "loss": 0.3535, "step": 16600 }, { "epoch": 0.8958501969672441, "grad_norm": 1.079316604871361, "learning_rate": 1.2387377907952758e-06, "loss": 0.3567, "step": 16601 }, { "epoch": 0.8959041605957585, "grad_norm": 1.060413161250887, "learning_rate": 1.2384926664436337e-06, "loss": 0.3661, "step": 16602 }, { "epoch": 0.8959581242242728, "grad_norm": 0.9125578785050198, "learning_rate": 1.2382476645720182e-06, "loss": 0.3678, "step": 16603 }, { "epoch": 0.8960120878527872, "grad_norm": 1.0493319708316957, "learning_rate": 1.2380027851874711e-06, "loss": 0.5027, "step": 16604 }, { "epoch": 0.8960660514813016, "grad_norm": 0.9335126376802196, "learning_rate": 1.2377580282970301e-06, "loss": 0.3236, "step": 16605 }, { "epoch": 0.896120015109816, "grad_norm": 0.8194309305720052, "learning_rate": 1.2375133939077303e-06, "loss": 0.2602, "step": 16606 }, { "epoch": 0.8961739787383304, "grad_norm": 1.246286250743248, "learning_rate": 1.237268882026602e-06, "loss": 0.4087, "step": 16607 }, { "epoch": 0.8962279423668448, "grad_norm": 1.1922214335501442, "learning_rate": 1.237024492660673e-06, "loss": 0.4764, "step": 16608 }, { "epoch": 0.8962819059953592, "grad_norm": 0.9287961995002572, "learning_rate": 1.2367802258169683e-06, "loss": 0.3628, "step": 16609 }, { "epoch": 0.8963358696238735, "grad_norm": 1.4348745743586708, "learning_rate": 1.236536081502508e-06, "loss": 0.554, "step": 16610 }, { "epoch": 0.8963898332523879, "grad_norm": 0.9921380084129724, "learning_rate": 1.2362920597243074e-06, "loss": 0.4572, "step": 16611 }, { "epoch": 0.8964437968809023, "grad_norm": 1.0439618257091634, "learning_rate": 1.236048160489382e-06, "loss": 0.5197, "step": 16612 }, { "epoch": 0.8964977605094167, "grad_norm": 0.8907244918413592, "learning_rate": 1.2358043838047404e-06, "loss": 0.3505, "step": 16613 }, { "epoch": 0.896551724137931, "grad_norm": 1.0465177807830133, "learning_rate": 1.2355607296773896e-06, "loss": 0.535, "step": 16614 }, { "epoch": 0.8966056877664454, "grad_norm": 1.1278392209618169, "learning_rate": 1.235317198114332e-06, "loss": 0.4382, "step": 16615 }, { "epoch": 0.8966596513949598, "grad_norm": 1.0936893214142978, "learning_rate": 1.2350737891225675e-06, "loss": 0.4891, "step": 16616 }, { "epoch": 0.8967136150234741, "grad_norm": 0.9670248450091921, "learning_rate": 1.2348305027090923e-06, "loss": 0.4992, "step": 16617 }, { "epoch": 0.8967675786519885, "grad_norm": 0.8009880066409537, "learning_rate": 1.2345873388808971e-06, "loss": 0.2583, "step": 16618 }, { "epoch": 0.8968215422805029, "grad_norm": 1.0752712582113488, "learning_rate": 1.2343442976449724e-06, "loss": 0.4609, "step": 16619 }, { "epoch": 0.8968755059090173, "grad_norm": 1.056995790472711, "learning_rate": 1.234101379008302e-06, "loss": 0.4896, "step": 16620 }, { "epoch": 0.8969294695375317, "grad_norm": 1.1023990704721909, "learning_rate": 1.2338585829778687e-06, "loss": 0.5226, "step": 16621 }, { "epoch": 0.8969834331660461, "grad_norm": 1.025448883346239, "learning_rate": 1.2336159095606506e-06, "loss": 0.464, "step": 16622 }, { "epoch": 0.8970373967945605, "grad_norm": 1.3397719380793285, "learning_rate": 1.2333733587636218e-06, "loss": 0.723, "step": 16623 }, { "epoch": 0.8970913604230748, "grad_norm": 1.1292982508486942, "learning_rate": 1.2331309305937547e-06, "loss": 0.5367, "step": 16624 }, { "epoch": 0.8971453240515892, "grad_norm": 1.074915879893168, "learning_rate": 1.2328886250580154e-06, "loss": 0.5696, "step": 16625 }, { "epoch": 0.8971992876801036, "grad_norm": 0.8564645218629254, "learning_rate": 1.2326464421633683e-06, "loss": 0.3229, "step": 16626 }, { "epoch": 0.897253251308618, "grad_norm": 0.9520230570401239, "learning_rate": 1.2324043819167748e-06, "loss": 0.3929, "step": 16627 }, { "epoch": 0.8973072149371324, "grad_norm": 1.0420041235147786, "learning_rate": 1.232162444325191e-06, "loss": 0.4885, "step": 16628 }, { "epoch": 0.8973611785656468, "grad_norm": 0.7140381366790686, "learning_rate": 1.231920629395571e-06, "loss": 0.3258, "step": 16629 }, { "epoch": 0.8974151421941612, "grad_norm": 1.0047477850701425, "learning_rate": 1.2316789371348647e-06, "loss": 0.4146, "step": 16630 }, { "epoch": 0.8974691058226755, "grad_norm": 1.008172866486972, "learning_rate": 1.231437367550019e-06, "loss": 0.4609, "step": 16631 }, { "epoch": 0.8975230694511899, "grad_norm": 0.9191653075451771, "learning_rate": 1.2311959206479757e-06, "loss": 0.4168, "step": 16632 }, { "epoch": 0.8975770330797043, "grad_norm": 1.1092036903888478, "learning_rate": 1.2309545964356754e-06, "loss": 0.4981, "step": 16633 }, { "epoch": 0.8976309967082187, "grad_norm": 1.052883919466102, "learning_rate": 1.2307133949200533e-06, "loss": 0.4477, "step": 16634 }, { "epoch": 0.8976849603367331, "grad_norm": 1.020870035153527, "learning_rate": 1.230472316108042e-06, "loss": 0.5646, "step": 16635 }, { "epoch": 0.8977389239652475, "grad_norm": 1.277904283487938, "learning_rate": 1.230231360006571e-06, "loss": 0.6889, "step": 16636 }, { "epoch": 0.8977928875937619, "grad_norm": 1.0136179356200992, "learning_rate": 1.2299905266225642e-06, "loss": 0.4926, "step": 16637 }, { "epoch": 0.8978468512222761, "grad_norm": 0.9633269651853454, "learning_rate": 1.2297498159629454e-06, "loss": 0.4382, "step": 16638 }, { "epoch": 0.8979008148507905, "grad_norm": 0.9344173242507018, "learning_rate": 1.2295092280346304e-06, "loss": 0.3608, "step": 16639 }, { "epoch": 0.8979547784793049, "grad_norm": 0.893180567705878, "learning_rate": 1.2292687628445357e-06, "loss": 0.3469, "step": 16640 }, { "epoch": 0.8980087421078193, "grad_norm": 1.1845004225780589, "learning_rate": 1.2290284203995719e-06, "loss": 0.4748, "step": 16641 }, { "epoch": 0.8980627057363337, "grad_norm": 1.0018513288286304, "learning_rate": 1.228788200706647e-06, "loss": 0.5446, "step": 16642 }, { "epoch": 0.8981166693648481, "grad_norm": 0.9201558595620329, "learning_rate": 1.2285481037726644e-06, "loss": 0.3716, "step": 16643 }, { "epoch": 0.8981706329933625, "grad_norm": 1.1679579687150983, "learning_rate": 1.2283081296045256e-06, "loss": 0.5931, "step": 16644 }, { "epoch": 0.8982245966218768, "grad_norm": 1.0080828075109014, "learning_rate": 1.228068278209128e-06, "loss": 0.5802, "step": 16645 }, { "epoch": 0.8982785602503912, "grad_norm": 1.1866440329471075, "learning_rate": 1.2278285495933642e-06, "loss": 0.4731, "step": 16646 }, { "epoch": 0.8983325238789056, "grad_norm": 1.2356251508686347, "learning_rate": 1.227588943764124e-06, "loss": 0.5503, "step": 16647 }, { "epoch": 0.89838648750742, "grad_norm": 1.0169657124305187, "learning_rate": 1.227349460728295e-06, "loss": 0.4662, "step": 16648 }, { "epoch": 0.8984404511359344, "grad_norm": 1.1524975939463453, "learning_rate": 1.2271101004927594e-06, "loss": 0.4256, "step": 16649 }, { "epoch": 0.8984944147644488, "grad_norm": 1.036860463792441, "learning_rate": 1.226870863064397e-06, "loss": 0.4402, "step": 16650 }, { "epoch": 0.8985483783929631, "grad_norm": 0.9283860236441164, "learning_rate": 1.2266317484500847e-06, "loss": 0.4741, "step": 16651 }, { "epoch": 0.8986023420214775, "grad_norm": 1.037705740790301, "learning_rate": 1.2263927566566928e-06, "loss": 0.603, "step": 16652 }, { "epoch": 0.8986563056499919, "grad_norm": 1.3095954646334673, "learning_rate": 1.2261538876910913e-06, "loss": 0.5896, "step": 16653 }, { "epoch": 0.8987102692785063, "grad_norm": 1.0579842250108549, "learning_rate": 1.2259151415601454e-06, "loss": 0.5089, "step": 16654 }, { "epoch": 0.8987642329070207, "grad_norm": 1.153059921399229, "learning_rate": 1.2256765182707174e-06, "loss": 0.4582, "step": 16655 }, { "epoch": 0.8988181965355351, "grad_norm": 0.8833540060816297, "learning_rate": 1.2254380178296649e-06, "loss": 0.4287, "step": 16656 }, { "epoch": 0.8988721601640495, "grad_norm": 0.988157051319922, "learning_rate": 1.2251996402438427e-06, "loss": 0.4747, "step": 16657 }, { "epoch": 0.8989261237925638, "grad_norm": 0.9082953327580486, "learning_rate": 1.2249613855201033e-06, "loss": 0.3843, "step": 16658 }, { "epoch": 0.8989800874210782, "grad_norm": 0.9647390001788155, "learning_rate": 1.2247232536652924e-06, "loss": 0.4294, "step": 16659 }, { "epoch": 0.8990340510495926, "grad_norm": 1.1255155547550646, "learning_rate": 1.2244852446862548e-06, "loss": 0.4375, "step": 16660 }, { "epoch": 0.899088014678107, "grad_norm": 1.0764517322127314, "learning_rate": 1.2242473585898317e-06, "loss": 0.4759, "step": 16661 }, { "epoch": 0.8991419783066213, "grad_norm": 1.1629322726063878, "learning_rate": 1.2240095953828596e-06, "loss": 0.5314, "step": 16662 }, { "epoch": 0.8991959419351357, "grad_norm": 0.9282711611115064, "learning_rate": 1.2237719550721723e-06, "loss": 0.4152, "step": 16663 }, { "epoch": 0.8992499055636501, "grad_norm": 1.197274846773252, "learning_rate": 1.2235344376646006e-06, "loss": 0.5891, "step": 16664 }, { "epoch": 0.8993038691921644, "grad_norm": 1.1396567600240226, "learning_rate": 1.2232970431669692e-06, "loss": 0.5419, "step": 16665 }, { "epoch": 0.8993578328206788, "grad_norm": 1.0602268543486988, "learning_rate": 1.2230597715861022e-06, "loss": 0.4613, "step": 16666 }, { "epoch": 0.8994117964491932, "grad_norm": 0.9809160172553654, "learning_rate": 1.2228226229288198e-06, "loss": 0.4152, "step": 16667 }, { "epoch": 0.8994657600777076, "grad_norm": 1.1156612719428147, "learning_rate": 1.222585597201936e-06, "loss": 0.5685, "step": 16668 }, { "epoch": 0.899519723706222, "grad_norm": 1.334358576441543, "learning_rate": 1.2223486944122644e-06, "loss": 0.5226, "step": 16669 }, { "epoch": 0.8995736873347364, "grad_norm": 1.0222748285677334, "learning_rate": 1.2221119145666142e-06, "loss": 0.3418, "step": 16670 }, { "epoch": 0.8996276509632508, "grad_norm": 1.2810471455307793, "learning_rate": 1.2218752576717897e-06, "loss": 0.5283, "step": 16671 }, { "epoch": 0.8996816145917651, "grad_norm": 1.0409551710609215, "learning_rate": 1.221638723734593e-06, "loss": 0.4881, "step": 16672 }, { "epoch": 0.8997355782202795, "grad_norm": 1.2198758224019, "learning_rate": 1.2214023127618223e-06, "loss": 0.4445, "step": 16673 }, { "epoch": 0.8997895418487939, "grad_norm": 0.9752782855696729, "learning_rate": 1.2211660247602718e-06, "loss": 0.4459, "step": 16674 }, { "epoch": 0.8998435054773083, "grad_norm": 1.2029567147971472, "learning_rate": 1.2209298597367336e-06, "loss": 0.551, "step": 16675 }, { "epoch": 0.8998974691058227, "grad_norm": 0.9236222277284928, "learning_rate": 1.2206938176979953e-06, "loss": 0.4224, "step": 16676 }, { "epoch": 0.8999514327343371, "grad_norm": 1.043343136207993, "learning_rate": 1.2204578986508404e-06, "loss": 0.4164, "step": 16677 }, { "epoch": 0.9000053963628515, "grad_norm": 0.9348803678505607, "learning_rate": 1.2202221026020495e-06, "loss": 0.3914, "step": 16678 }, { "epoch": 0.9000593599913658, "grad_norm": 0.8943326144959638, "learning_rate": 1.2199864295584e-06, "loss": 0.4186, "step": 16679 }, { "epoch": 0.9001133236198802, "grad_norm": 0.9481628039620029, "learning_rate": 1.219750879526665e-06, "loss": 0.4207, "step": 16680 }, { "epoch": 0.9001672872483946, "grad_norm": 0.9327812003408481, "learning_rate": 1.2195154525136146e-06, "loss": 0.3761, "step": 16681 }, { "epoch": 0.900221250876909, "grad_norm": 0.937116890290192, "learning_rate": 1.2192801485260152e-06, "loss": 0.3913, "step": 16682 }, { "epoch": 0.9002752145054234, "grad_norm": 0.9716758981647424, "learning_rate": 1.2190449675706296e-06, "loss": 0.5011, "step": 16683 }, { "epoch": 0.9003291781339378, "grad_norm": 0.9829481874556457, "learning_rate": 1.2188099096542175e-06, "loss": 0.3641, "step": 16684 }, { "epoch": 0.9003831417624522, "grad_norm": 1.029847414136919, "learning_rate": 1.2185749747835338e-06, "loss": 0.3964, "step": 16685 }, { "epoch": 0.9004371053909664, "grad_norm": 1.2573203983310157, "learning_rate": 1.2183401629653324e-06, "loss": 0.4872, "step": 16686 }, { "epoch": 0.9004910690194808, "grad_norm": 1.1200858806749747, "learning_rate": 1.2181054742063603e-06, "loss": 0.6089, "step": 16687 }, { "epoch": 0.9005450326479952, "grad_norm": 1.087561655776066, "learning_rate": 1.2178709085133635e-06, "loss": 0.5319, "step": 16688 }, { "epoch": 0.9005989962765096, "grad_norm": 0.9477195107024905, "learning_rate": 1.2176364658930836e-06, "loss": 0.338, "step": 16689 }, { "epoch": 0.900652959905024, "grad_norm": 1.0095256887997281, "learning_rate": 1.217402146352259e-06, "loss": 0.4237, "step": 16690 }, { "epoch": 0.9007069235335384, "grad_norm": 1.0804802081211164, "learning_rate": 1.2171679498976231e-06, "loss": 0.5543, "step": 16691 }, { "epoch": 0.9007608871620528, "grad_norm": 1.1574384358259182, "learning_rate": 1.2169338765359087e-06, "loss": 0.515, "step": 16692 }, { "epoch": 0.9008148507905671, "grad_norm": 0.8851573826354238, "learning_rate": 1.2166999262738422e-06, "loss": 0.3283, "step": 16693 }, { "epoch": 0.9008688144190815, "grad_norm": 1.148451787502722, "learning_rate": 1.2164660991181478e-06, "loss": 0.402, "step": 16694 }, { "epoch": 0.9009227780475959, "grad_norm": 1.474635778767568, "learning_rate": 1.2162323950755458e-06, "loss": 0.6205, "step": 16695 }, { "epoch": 0.9009767416761103, "grad_norm": 1.3058334604106872, "learning_rate": 1.2159988141527529e-06, "loss": 0.5471, "step": 16696 }, { "epoch": 0.9010307053046247, "grad_norm": 0.9146713661810555, "learning_rate": 1.2157653563564831e-06, "loss": 0.4147, "step": 16697 }, { "epoch": 0.9010846689331391, "grad_norm": 1.1984015135542683, "learning_rate": 1.2155320216934454e-06, "loss": 0.5521, "step": 16698 }, { "epoch": 0.9011386325616535, "grad_norm": 0.9906711173075657, "learning_rate": 1.215298810170347e-06, "loss": 0.4888, "step": 16699 }, { "epoch": 0.9011925961901678, "grad_norm": 0.9786313126934569, "learning_rate": 1.2150657217938908e-06, "loss": 0.5349, "step": 16700 }, { "epoch": 0.9012465598186822, "grad_norm": 1.240289660254431, "learning_rate": 1.2148327565707742e-06, "loss": 0.5487, "step": 16701 }, { "epoch": 0.9013005234471966, "grad_norm": 1.1775805606210468, "learning_rate": 1.2145999145076945e-06, "loss": 0.5929, "step": 16702 }, { "epoch": 0.901354487075711, "grad_norm": 1.0084077787543544, "learning_rate": 1.2143671956113434e-06, "loss": 0.5914, "step": 16703 }, { "epoch": 0.9014084507042254, "grad_norm": 0.8310169807351301, "learning_rate": 1.2141345998884092e-06, "loss": 0.2939, "step": 16704 }, { "epoch": 0.9014624143327398, "grad_norm": 1.2796960252357983, "learning_rate": 1.2139021273455773e-06, "loss": 0.597, "step": 16705 }, { "epoch": 0.9015163779612542, "grad_norm": 0.9635007877309409, "learning_rate": 1.213669777989529e-06, "loss": 0.5354, "step": 16706 }, { "epoch": 0.9015703415897685, "grad_norm": 1.052478715631544, "learning_rate": 1.2134375518269429e-06, "loss": 0.4678, "step": 16707 }, { "epoch": 0.9016243052182829, "grad_norm": 1.0066414619194994, "learning_rate": 1.213205448864492e-06, "loss": 0.5239, "step": 16708 }, { "epoch": 0.9016782688467972, "grad_norm": 1.1865257244174428, "learning_rate": 1.2129734691088483e-06, "loss": 0.6521, "step": 16709 }, { "epoch": 0.9017322324753116, "grad_norm": 0.9675514312992051, "learning_rate": 1.2127416125666786e-06, "loss": 0.3917, "step": 16710 }, { "epoch": 0.901786196103826, "grad_norm": 1.247112586659996, "learning_rate": 1.2125098792446468e-06, "loss": 0.702, "step": 16711 }, { "epoch": 0.9018401597323404, "grad_norm": 1.000971853570305, "learning_rate": 1.2122782691494137e-06, "loss": 0.4259, "step": 16712 }, { "epoch": 0.9018941233608548, "grad_norm": 1.0897955521408966, "learning_rate": 1.2120467822876353e-06, "loss": 0.5454, "step": 16713 }, { "epoch": 0.9019480869893691, "grad_norm": 0.9607833471185613, "learning_rate": 1.2118154186659653e-06, "loss": 0.3562, "step": 16714 }, { "epoch": 0.9020020506178835, "grad_norm": 1.1886920083458952, "learning_rate": 1.211584178291053e-06, "loss": 0.4838, "step": 16715 }, { "epoch": 0.9020560142463979, "grad_norm": 0.8666227219347404, "learning_rate": 1.2113530611695443e-06, "loss": 0.4211, "step": 16716 }, { "epoch": 0.9021099778749123, "grad_norm": 1.0676656350972396, "learning_rate": 1.211122067308082e-06, "loss": 0.4639, "step": 16717 }, { "epoch": 0.9021639415034267, "grad_norm": 1.0420219671031778, "learning_rate": 1.2108911967133052e-06, "loss": 0.4714, "step": 16718 }, { "epoch": 0.9022179051319411, "grad_norm": 0.9793702142375112, "learning_rate": 1.210660449391849e-06, "loss": 0.4423, "step": 16719 }, { "epoch": 0.9022718687604554, "grad_norm": 0.9971024162901069, "learning_rate": 1.2104298253503461e-06, "loss": 0.3833, "step": 16720 }, { "epoch": 0.9023258323889698, "grad_norm": 1.100232205796732, "learning_rate": 1.2101993245954235e-06, "loss": 0.5271, "step": 16721 }, { "epoch": 0.9023797960174842, "grad_norm": 1.0717112133471145, "learning_rate": 1.2099689471337077e-06, "loss": 0.4019, "step": 16722 }, { "epoch": 0.9024337596459986, "grad_norm": 1.0975203890941791, "learning_rate": 1.2097386929718185e-06, "loss": 0.5274, "step": 16723 }, { "epoch": 0.902487723274513, "grad_norm": 1.1832403109950573, "learning_rate": 1.2095085621163747e-06, "loss": 0.6526, "step": 16724 }, { "epoch": 0.9025416869030274, "grad_norm": 0.8650952522058243, "learning_rate": 1.2092785545739893e-06, "loss": 0.3577, "step": 16725 }, { "epoch": 0.9025956505315418, "grad_norm": 0.9758424292926259, "learning_rate": 1.209048670351274e-06, "loss": 0.392, "step": 16726 }, { "epoch": 0.9026496141600561, "grad_norm": 0.8790984968824037, "learning_rate": 1.2088189094548366e-06, "loss": 0.3699, "step": 16727 }, { "epoch": 0.9027035777885705, "grad_norm": 1.1532803703071761, "learning_rate": 1.2085892718912789e-06, "loss": 0.4486, "step": 16728 }, { "epoch": 0.9027575414170849, "grad_norm": 1.1126263273588575, "learning_rate": 1.2083597576672019e-06, "loss": 0.5473, "step": 16729 }, { "epoch": 0.9028115050455993, "grad_norm": 1.0358348190920725, "learning_rate": 1.2081303667892013e-06, "loss": 0.4508, "step": 16730 }, { "epoch": 0.9028654686741137, "grad_norm": 1.1368768306323236, "learning_rate": 1.2079010992638713e-06, "loss": 0.5042, "step": 16731 }, { "epoch": 0.9029194323026281, "grad_norm": 1.1796211875784193, "learning_rate": 1.2076719550977998e-06, "loss": 0.406, "step": 16732 }, { "epoch": 0.9029733959311425, "grad_norm": 1.0256604864613124, "learning_rate": 1.2074429342975737e-06, "loss": 0.4966, "step": 16733 }, { "epoch": 0.9030273595596567, "grad_norm": 1.0112261493181969, "learning_rate": 1.2072140368697757e-06, "loss": 0.509, "step": 16734 }, { "epoch": 0.9030813231881711, "grad_norm": 1.0453950935088143, "learning_rate": 1.2069852628209832e-06, "loss": 0.4298, "step": 16735 }, { "epoch": 0.9031352868166855, "grad_norm": 0.9053106866789684, "learning_rate": 1.2067566121577715e-06, "loss": 0.3341, "step": 16736 }, { "epoch": 0.9031892504451999, "grad_norm": 1.0596651281974154, "learning_rate": 1.2065280848867133e-06, "loss": 0.5446, "step": 16737 }, { "epoch": 0.9032432140737143, "grad_norm": 0.9527883659564679, "learning_rate": 1.2062996810143763e-06, "loss": 0.4473, "step": 16738 }, { "epoch": 0.9032971777022287, "grad_norm": 1.1710613258362486, "learning_rate": 1.2060714005473247e-06, "loss": 0.4834, "step": 16739 }, { "epoch": 0.9033511413307431, "grad_norm": 0.9219973479180453, "learning_rate": 1.2058432434921196e-06, "loss": 0.3612, "step": 16740 }, { "epoch": 0.9034051049592574, "grad_norm": 0.8073586153969352, "learning_rate": 1.2056152098553194e-06, "loss": 0.3163, "step": 16741 }, { "epoch": 0.9034590685877718, "grad_norm": 1.0377857267467192, "learning_rate": 1.2053872996434769e-06, "loss": 0.3817, "step": 16742 }, { "epoch": 0.9035130322162862, "grad_norm": 0.859846617526315, "learning_rate": 1.2051595128631425e-06, "loss": 0.4129, "step": 16743 }, { "epoch": 0.9035669958448006, "grad_norm": 0.9955365089079901, "learning_rate": 1.2049318495208633e-06, "loss": 0.4412, "step": 16744 }, { "epoch": 0.903620959473315, "grad_norm": 1.0889744374520482, "learning_rate": 1.2047043096231826e-06, "loss": 0.4781, "step": 16745 }, { "epoch": 0.9036749231018294, "grad_norm": 1.1119264924734953, "learning_rate": 1.2044768931766396e-06, "loss": 0.451, "step": 16746 }, { "epoch": 0.9037288867303438, "grad_norm": 1.0173120815631487, "learning_rate": 1.204249600187772e-06, "loss": 0.55, "step": 16747 }, { "epoch": 0.9037828503588581, "grad_norm": 1.10126709178847, "learning_rate": 1.2040224306631115e-06, "loss": 0.3977, "step": 16748 }, { "epoch": 0.9038368139873725, "grad_norm": 0.9398889817332698, "learning_rate": 1.2037953846091863e-06, "loss": 0.377, "step": 16749 }, { "epoch": 0.9038907776158869, "grad_norm": 1.0322759814475428, "learning_rate": 1.2035684620325235e-06, "loss": 0.3974, "step": 16750 }, { "epoch": 0.9039447412444013, "grad_norm": 1.212783386839925, "learning_rate": 1.2033416629396438e-06, "loss": 0.3724, "step": 16751 }, { "epoch": 0.9039987048729157, "grad_norm": 1.0616762878345487, "learning_rate": 1.2031149873370662e-06, "loss": 0.4393, "step": 16752 }, { "epoch": 0.9040526685014301, "grad_norm": 1.1173407057526572, "learning_rate": 1.2028884352313052e-06, "loss": 0.4926, "step": 16753 }, { "epoch": 0.9041066321299445, "grad_norm": 1.0621731804999577, "learning_rate": 1.202662006628873e-06, "loss": 0.4506, "step": 16754 }, { "epoch": 0.9041605957584588, "grad_norm": 1.0907736599534423, "learning_rate": 1.2024357015362775e-06, "loss": 0.4617, "step": 16755 }, { "epoch": 0.9042145593869731, "grad_norm": 1.0969531438374895, "learning_rate": 1.2022095199600212e-06, "loss": 0.5341, "step": 16756 }, { "epoch": 0.9042685230154875, "grad_norm": 1.0061623871393737, "learning_rate": 1.2019834619066064e-06, "loss": 0.4644, "step": 16757 }, { "epoch": 0.9043224866440019, "grad_norm": 1.275793234882058, "learning_rate": 1.2017575273825297e-06, "loss": 0.5038, "step": 16758 }, { "epoch": 0.9043764502725163, "grad_norm": 1.1151777687604314, "learning_rate": 1.2015317163942842e-06, "loss": 0.6202, "step": 16759 }, { "epoch": 0.9044304139010307, "grad_norm": 0.9917883550645258, "learning_rate": 1.201306028948361e-06, "loss": 0.4203, "step": 16760 }, { "epoch": 0.9044843775295451, "grad_norm": 0.898235738604466, "learning_rate": 1.201080465051246e-06, "loss": 0.3784, "step": 16761 }, { "epoch": 0.9045383411580594, "grad_norm": 0.8981332052897137, "learning_rate": 1.2008550247094224e-06, "loss": 0.3393, "step": 16762 }, { "epoch": 0.9045923047865738, "grad_norm": 0.9579351308154923, "learning_rate": 1.2006297079293695e-06, "loss": 0.4044, "step": 16763 }, { "epoch": 0.9046462684150882, "grad_norm": 0.9976494898412256, "learning_rate": 1.2004045147175623e-06, "loss": 0.4348, "step": 16764 }, { "epoch": 0.9047002320436026, "grad_norm": 0.9964891272790907, "learning_rate": 1.2001794450804739e-06, "loss": 0.5737, "step": 16765 }, { "epoch": 0.904754195672117, "grad_norm": 1.0379170980123198, "learning_rate": 1.199954499024573e-06, "loss": 0.5533, "step": 16766 }, { "epoch": 0.9048081593006314, "grad_norm": 1.2664982818860355, "learning_rate": 1.1997296765563243e-06, "loss": 0.6408, "step": 16767 }, { "epoch": 0.9048621229291458, "grad_norm": 1.1331017074317575, "learning_rate": 1.1995049776821907e-06, "loss": 0.4041, "step": 16768 }, { "epoch": 0.9049160865576601, "grad_norm": 0.9183550394211663, "learning_rate": 1.1992804024086288e-06, "loss": 0.4577, "step": 16769 }, { "epoch": 0.9049700501861745, "grad_norm": 1.108108244799261, "learning_rate": 1.1990559507420934e-06, "loss": 0.4312, "step": 16770 }, { "epoch": 0.9050240138146889, "grad_norm": 0.7854628099704745, "learning_rate": 1.198831622689036e-06, "loss": 0.3901, "step": 16771 }, { "epoch": 0.9050779774432033, "grad_norm": 0.9512897334411671, "learning_rate": 1.1986074182559038e-06, "loss": 0.4343, "step": 16772 }, { "epoch": 0.9051319410717177, "grad_norm": 1.1171915817025302, "learning_rate": 1.198383337449141e-06, "loss": 0.4723, "step": 16773 }, { "epoch": 0.9051859047002321, "grad_norm": 0.8471324655079376, "learning_rate": 1.1981593802751869e-06, "loss": 0.3513, "step": 16774 }, { "epoch": 0.9052398683287465, "grad_norm": 1.1764630277106005, "learning_rate": 1.1979355467404794e-06, "loss": 0.3686, "step": 16775 }, { "epoch": 0.9052938319572608, "grad_norm": 0.8863528402103076, "learning_rate": 1.1977118368514519e-06, "loss": 0.36, "step": 16776 }, { "epoch": 0.9053477955857752, "grad_norm": 1.0642932794287812, "learning_rate": 1.1974882506145325e-06, "loss": 0.4999, "step": 16777 }, { "epoch": 0.9054017592142896, "grad_norm": 0.9346699779231484, "learning_rate": 1.197264788036148e-06, "loss": 0.3671, "step": 16778 }, { "epoch": 0.905455722842804, "grad_norm": 1.059855853203582, "learning_rate": 1.1970414491227214e-06, "loss": 0.4463, "step": 16779 }, { "epoch": 0.9055096864713184, "grad_norm": 1.342374004052335, "learning_rate": 1.1968182338806713e-06, "loss": 0.4931, "step": 16780 }, { "epoch": 0.9055636500998328, "grad_norm": 1.1131122888168414, "learning_rate": 1.1965951423164128e-06, "loss": 0.444, "step": 16781 }, { "epoch": 0.9056176137283471, "grad_norm": 1.0104393039296322, "learning_rate": 1.1963721744363587e-06, "loss": 0.4099, "step": 16782 }, { "epoch": 0.9056715773568614, "grad_norm": 0.9736517005836665, "learning_rate": 1.1961493302469172e-06, "loss": 0.4052, "step": 16783 }, { "epoch": 0.9057255409853758, "grad_norm": 0.9619747454262176, "learning_rate": 1.195926609754492e-06, "loss": 0.4523, "step": 16784 }, { "epoch": 0.9057795046138902, "grad_norm": 1.0270582073863601, "learning_rate": 1.1957040129654852e-06, "loss": 0.5369, "step": 16785 }, { "epoch": 0.9058334682424046, "grad_norm": 1.0933902127379427, "learning_rate": 1.1954815398862936e-06, "loss": 0.4323, "step": 16786 }, { "epoch": 0.905887431870919, "grad_norm": 0.8653221044850875, "learning_rate": 1.1952591905233126e-06, "loss": 0.3149, "step": 16787 }, { "epoch": 0.9059413954994334, "grad_norm": 1.1813095963737361, "learning_rate": 1.1950369648829324e-06, "loss": 0.4578, "step": 16788 }, { "epoch": 0.9059953591279477, "grad_norm": 0.9529122289285663, "learning_rate": 1.1948148629715393e-06, "loss": 0.3778, "step": 16789 }, { "epoch": 0.9060493227564621, "grad_norm": 1.0443828436900413, "learning_rate": 1.1945928847955174e-06, "loss": 0.5184, "step": 16790 }, { "epoch": 0.9061032863849765, "grad_norm": 1.0880913420430582, "learning_rate": 1.1943710303612459e-06, "loss": 0.5902, "step": 16791 }, { "epoch": 0.9061572500134909, "grad_norm": 1.085663411744397, "learning_rate": 1.1941492996751015e-06, "loss": 0.3788, "step": 16792 }, { "epoch": 0.9062112136420053, "grad_norm": 0.8516716658693528, "learning_rate": 1.1939276927434567e-06, "loss": 0.3165, "step": 16793 }, { "epoch": 0.9062651772705197, "grad_norm": 1.2044128902721896, "learning_rate": 1.1937062095726818e-06, "loss": 0.5981, "step": 16794 }, { "epoch": 0.9063191408990341, "grad_norm": 1.0289571109223823, "learning_rate": 1.193484850169141e-06, "loss": 0.4018, "step": 16795 }, { "epoch": 0.9063731045275484, "grad_norm": 0.9033119282200087, "learning_rate": 1.1932636145391977e-06, "loss": 0.4905, "step": 16796 }, { "epoch": 0.9064270681560628, "grad_norm": 0.9417746886274232, "learning_rate": 1.1930425026892092e-06, "loss": 0.3652, "step": 16797 }, { "epoch": 0.9064810317845772, "grad_norm": 0.9618644943975591, "learning_rate": 1.192821514625531e-06, "loss": 0.5013, "step": 16798 }, { "epoch": 0.9065349954130916, "grad_norm": 0.8289480810942114, "learning_rate": 1.1926006503545147e-06, "loss": 0.3175, "step": 16799 }, { "epoch": 0.906588959041606, "grad_norm": 1.0260893544658707, "learning_rate": 1.1923799098825076e-06, "loss": 0.523, "step": 16800 }, { "epoch": 0.9066429226701204, "grad_norm": 1.0530980186913874, "learning_rate": 1.192159293215855e-06, "loss": 0.4332, "step": 16801 }, { "epoch": 0.9066968862986348, "grad_norm": 1.1206235478683007, "learning_rate": 1.1919388003608965e-06, "loss": 0.5926, "step": 16802 }, { "epoch": 0.906750849927149, "grad_norm": 1.0761510979215079, "learning_rate": 1.1917184313239706e-06, "loss": 0.4463, "step": 16803 }, { "epoch": 0.9068048135556634, "grad_norm": 1.4373580615063613, "learning_rate": 1.19149818611141e-06, "loss": 0.6423, "step": 16804 }, { "epoch": 0.9068587771841778, "grad_norm": 0.8710218384759507, "learning_rate": 1.1912780647295444e-06, "loss": 0.3943, "step": 16805 }, { "epoch": 0.9069127408126922, "grad_norm": 0.8941716909841638, "learning_rate": 1.1910580671847014e-06, "loss": 0.3958, "step": 16806 }, { "epoch": 0.9069667044412066, "grad_norm": 1.0866830078220886, "learning_rate": 1.1908381934832031e-06, "loss": 0.5705, "step": 16807 }, { "epoch": 0.907020668069721, "grad_norm": 1.1046454934114835, "learning_rate": 1.1906184436313694e-06, "loss": 0.4645, "step": 16808 }, { "epoch": 0.9070746316982354, "grad_norm": 1.0912570530602865, "learning_rate": 1.1903988176355156e-06, "loss": 0.4093, "step": 16809 }, { "epoch": 0.9071285953267497, "grad_norm": 0.9762157369650749, "learning_rate": 1.1901793155019554e-06, "loss": 0.3816, "step": 16810 }, { "epoch": 0.9071825589552641, "grad_norm": 0.8887553835459238, "learning_rate": 1.1899599372369952e-06, "loss": 0.3764, "step": 16811 }, { "epoch": 0.9072365225837785, "grad_norm": 0.8205506671709637, "learning_rate": 1.1897406828469417e-06, "loss": 0.3849, "step": 16812 }, { "epoch": 0.9072904862122929, "grad_norm": 0.7259338815642064, "learning_rate": 1.1895215523380962e-06, "loss": 0.2773, "step": 16813 }, { "epoch": 0.9073444498408073, "grad_norm": 0.9871838753333931, "learning_rate": 1.1893025457167567e-06, "loss": 0.4494, "step": 16814 }, { "epoch": 0.9073984134693217, "grad_norm": 1.0315884644247137, "learning_rate": 1.1890836629892178e-06, "loss": 0.5172, "step": 16815 }, { "epoch": 0.9074523770978361, "grad_norm": 0.9084785595970151, "learning_rate": 1.18886490416177e-06, "loss": 0.3586, "step": 16816 }, { "epoch": 0.9075063407263504, "grad_norm": 0.9975832576333824, "learning_rate": 1.1886462692407015e-06, "loss": 0.5283, "step": 16817 }, { "epoch": 0.9075603043548648, "grad_norm": 1.2246729654002966, "learning_rate": 1.188427758232295e-06, "loss": 0.6909, "step": 16818 }, { "epoch": 0.9076142679833792, "grad_norm": 0.9187406787658606, "learning_rate": 1.1882093711428317e-06, "loss": 0.3021, "step": 16819 }, { "epoch": 0.9076682316118936, "grad_norm": 0.9050591895090271, "learning_rate": 1.187991107978588e-06, "loss": 0.4473, "step": 16820 }, { "epoch": 0.907722195240408, "grad_norm": 0.874098640939496, "learning_rate": 1.1877729687458365e-06, "loss": 0.3799, "step": 16821 }, { "epoch": 0.9077761588689224, "grad_norm": 0.9597996847441804, "learning_rate": 1.1875549534508469e-06, "loss": 0.4021, "step": 16822 }, { "epoch": 0.9078301224974368, "grad_norm": 1.1547451714294281, "learning_rate": 1.1873370620998853e-06, "loss": 0.5515, "step": 16823 }, { "epoch": 0.9078840861259511, "grad_norm": 1.083522615765843, "learning_rate": 1.1871192946992142e-06, "loss": 0.4589, "step": 16824 }, { "epoch": 0.9079380497544655, "grad_norm": 1.1088767500850647, "learning_rate": 1.1869016512550922e-06, "loss": 0.511, "step": 16825 }, { "epoch": 0.9079920133829799, "grad_norm": 1.0033789473205046, "learning_rate": 1.1866841317737753e-06, "loss": 0.5229, "step": 16826 }, { "epoch": 0.9080459770114943, "grad_norm": 1.1136461941909117, "learning_rate": 1.1864667362615143e-06, "loss": 0.4669, "step": 16827 }, { "epoch": 0.9080999406400087, "grad_norm": 0.990556255757785, "learning_rate": 1.186249464724558e-06, "loss": 0.4876, "step": 16828 }, { "epoch": 0.908153904268523, "grad_norm": 0.9729339178517875, "learning_rate": 1.186032317169151e-06, "loss": 0.5033, "step": 16829 }, { "epoch": 0.9082078678970374, "grad_norm": 1.1091273458290871, "learning_rate": 1.1858152936015336e-06, "loss": 0.4183, "step": 16830 }, { "epoch": 0.9082618315255517, "grad_norm": 1.403796230703734, "learning_rate": 1.1855983940279447e-06, "loss": 0.5139, "step": 16831 }, { "epoch": 0.9083157951540661, "grad_norm": 1.1266710870467602, "learning_rate": 1.185381618454617e-06, "loss": 0.5037, "step": 16832 }, { "epoch": 0.9083697587825805, "grad_norm": 1.0638481160418614, "learning_rate": 1.1851649668877812e-06, "loss": 0.4482, "step": 16833 }, { "epoch": 0.9084237224110949, "grad_norm": 0.8032821170037179, "learning_rate": 1.1849484393336636e-06, "loss": 0.3429, "step": 16834 }, { "epoch": 0.9084776860396093, "grad_norm": 1.1617278888622353, "learning_rate": 1.1847320357984881e-06, "loss": 0.4578, "step": 16835 }, { "epoch": 0.9085316496681237, "grad_norm": 1.2370079234726632, "learning_rate": 1.1845157562884743e-06, "loss": 0.4756, "step": 16836 }, { "epoch": 0.9085856132966381, "grad_norm": 1.0364067968823278, "learning_rate": 1.184299600809838e-06, "loss": 0.2732, "step": 16837 }, { "epoch": 0.9086395769251524, "grad_norm": 0.8736544656034154, "learning_rate": 1.1840835693687927e-06, "loss": 0.3894, "step": 16838 }, { "epoch": 0.9086935405536668, "grad_norm": 1.0567370029865544, "learning_rate": 1.1838676619715458e-06, "loss": 0.4961, "step": 16839 }, { "epoch": 0.9087475041821812, "grad_norm": 1.050688817949281, "learning_rate": 1.1836518786243034e-06, "loss": 0.3375, "step": 16840 }, { "epoch": 0.9088014678106956, "grad_norm": 1.1075523722116583, "learning_rate": 1.1834362193332676e-06, "loss": 0.5446, "step": 16841 }, { "epoch": 0.90885543143921, "grad_norm": 1.3545956915177442, "learning_rate": 1.183220684104636e-06, "loss": 0.4044, "step": 16842 }, { "epoch": 0.9089093950677244, "grad_norm": 1.1250805211705208, "learning_rate": 1.183005272944604e-06, "loss": 0.4315, "step": 16843 }, { "epoch": 0.9089633586962388, "grad_norm": 0.8979991980791777, "learning_rate": 1.1827899858593625e-06, "loss": 0.3624, "step": 16844 }, { "epoch": 0.9090173223247531, "grad_norm": 0.9611009031380302, "learning_rate": 1.1825748228550995e-06, "loss": 0.3663, "step": 16845 }, { "epoch": 0.9090712859532675, "grad_norm": 0.9806338837117654, "learning_rate": 1.182359783937998e-06, "loss": 0.4423, "step": 16846 }, { "epoch": 0.9091252495817819, "grad_norm": 1.195829841147055, "learning_rate": 1.1821448691142388e-06, "loss": 0.6147, "step": 16847 }, { "epoch": 0.9091792132102963, "grad_norm": 0.9858585830501105, "learning_rate": 1.1819300783899989e-06, "loss": 0.4594, "step": 16848 }, { "epoch": 0.9092331768388107, "grad_norm": 1.3454615683381237, "learning_rate": 1.181715411771452e-06, "loss": 0.5912, "step": 16849 }, { "epoch": 0.9092871404673251, "grad_norm": 1.0998985644713724, "learning_rate": 1.1815008692647672e-06, "loss": 0.4943, "step": 16850 }, { "epoch": 0.9093411040958395, "grad_norm": 0.9919698531777323, "learning_rate": 1.181286450876111e-06, "loss": 0.366, "step": 16851 }, { "epoch": 0.9093950677243537, "grad_norm": 1.1263026359526167, "learning_rate": 1.1810721566116465e-06, "loss": 0.6189, "step": 16852 }, { "epoch": 0.9094490313528681, "grad_norm": 1.1300459543833647, "learning_rate": 1.1808579864775318e-06, "loss": 0.5346, "step": 16853 }, { "epoch": 0.9095029949813825, "grad_norm": 0.9666786823740185, "learning_rate": 1.1806439404799224e-06, "loss": 0.4578, "step": 16854 }, { "epoch": 0.9095569586098969, "grad_norm": 1.0499863825496538, "learning_rate": 1.1804300186249706e-06, "loss": 0.4679, "step": 16855 }, { "epoch": 0.9096109222384113, "grad_norm": 1.1824737839625958, "learning_rate": 1.1802162209188248e-06, "loss": 0.5694, "step": 16856 }, { "epoch": 0.9096648858669257, "grad_norm": 1.134878762070186, "learning_rate": 1.1800025473676296e-06, "loss": 0.4552, "step": 16857 }, { "epoch": 0.90971884949544, "grad_norm": 0.9043992787089347, "learning_rate": 1.179788997977527e-06, "loss": 0.4184, "step": 16858 }, { "epoch": 0.9097728131239544, "grad_norm": 0.7583470576267999, "learning_rate": 1.1795755727546528e-06, "loss": 0.3305, "step": 16859 }, { "epoch": 0.9098267767524688, "grad_norm": 1.0433820775080942, "learning_rate": 1.1793622717051426e-06, "loss": 0.4983, "step": 16860 }, { "epoch": 0.9098807403809832, "grad_norm": 1.0227055987259839, "learning_rate": 1.179149094835126e-06, "loss": 0.3968, "step": 16861 }, { "epoch": 0.9099347040094976, "grad_norm": 1.080768203351878, "learning_rate": 1.1789360421507305e-06, "loss": 0.407, "step": 16862 }, { "epoch": 0.909988667638012, "grad_norm": 1.0179116995892183, "learning_rate": 1.1787231136580793e-06, "loss": 0.4314, "step": 16863 }, { "epoch": 0.9100426312665264, "grad_norm": 1.051107876174947, "learning_rate": 1.178510309363292e-06, "loss": 0.4054, "step": 16864 }, { "epoch": 0.9100965948950407, "grad_norm": 0.928022803155768, "learning_rate": 1.1782976292724858e-06, "loss": 0.464, "step": 16865 }, { "epoch": 0.9101505585235551, "grad_norm": 1.108490965968445, "learning_rate": 1.178085073391772e-06, "loss": 0.5364, "step": 16866 }, { "epoch": 0.9102045221520695, "grad_norm": 1.3798449481006136, "learning_rate": 1.1778726417272599e-06, "loss": 0.4796, "step": 16867 }, { "epoch": 0.9102584857805839, "grad_norm": 0.9918125094895963, "learning_rate": 1.177660334285055e-06, "loss": 0.3887, "step": 16868 }, { "epoch": 0.9103124494090983, "grad_norm": 0.951484698639378, "learning_rate": 1.17744815107126e-06, "loss": 0.3107, "step": 16869 }, { "epoch": 0.9103664130376127, "grad_norm": 1.1719765456659375, "learning_rate": 1.1772360920919725e-06, "loss": 0.494, "step": 16870 }, { "epoch": 0.9104203766661271, "grad_norm": 1.427314154120136, "learning_rate": 1.1770241573532875e-06, "loss": 0.6771, "step": 16871 }, { "epoch": 0.9104743402946414, "grad_norm": 1.059229018404341, "learning_rate": 1.1768123468612968e-06, "loss": 0.4515, "step": 16872 }, { "epoch": 0.9105283039231558, "grad_norm": 1.0925142997059742, "learning_rate": 1.1766006606220866e-06, "loss": 0.4737, "step": 16873 }, { "epoch": 0.9105822675516702, "grad_norm": 1.2346115217102818, "learning_rate": 1.1763890986417426e-06, "loss": 0.6227, "step": 16874 }, { "epoch": 0.9106362311801846, "grad_norm": 1.267881158392703, "learning_rate": 1.1761776609263446e-06, "loss": 0.6349, "step": 16875 }, { "epoch": 0.910690194808699, "grad_norm": 1.0405024478839842, "learning_rate": 1.1759663474819688e-06, "loss": 0.4499, "step": 16876 }, { "epoch": 0.9107441584372133, "grad_norm": 0.992217987948118, "learning_rate": 1.1757551583146897e-06, "loss": 0.4335, "step": 16877 }, { "epoch": 0.9107981220657277, "grad_norm": 1.001423784072231, "learning_rate": 1.175544093430577e-06, "loss": 0.5019, "step": 16878 }, { "epoch": 0.910852085694242, "grad_norm": 1.0363973222331415, "learning_rate": 1.175333152835697e-06, "loss": 0.4376, "step": 16879 }, { "epoch": 0.9109060493227564, "grad_norm": 0.8688669417645254, "learning_rate": 1.1751223365361114e-06, "loss": 0.4468, "step": 16880 }, { "epoch": 0.9109600129512708, "grad_norm": 0.8773068439552222, "learning_rate": 1.1749116445378798e-06, "loss": 0.3512, "step": 16881 }, { "epoch": 0.9110139765797852, "grad_norm": 1.0555960470178207, "learning_rate": 1.1747010768470576e-06, "loss": 0.4125, "step": 16882 }, { "epoch": 0.9110679402082996, "grad_norm": 1.0025470799065912, "learning_rate": 1.1744906334696969e-06, "loss": 0.4808, "step": 16883 }, { "epoch": 0.911121903836814, "grad_norm": 0.824373075472008, "learning_rate": 1.174280314411846e-06, "loss": 0.3676, "step": 16884 }, { "epoch": 0.9111758674653284, "grad_norm": 1.1058322650940604, "learning_rate": 1.1740701196795496e-06, "loss": 0.4872, "step": 16885 }, { "epoch": 0.9112298310938427, "grad_norm": 1.1404207331336609, "learning_rate": 1.1738600492788495e-06, "loss": 0.3994, "step": 16886 }, { "epoch": 0.9112837947223571, "grad_norm": 1.0734189328576162, "learning_rate": 1.1736501032157827e-06, "loss": 0.4858, "step": 16887 }, { "epoch": 0.9113377583508715, "grad_norm": 1.0183101278272144, "learning_rate": 1.173440281496383e-06, "loss": 0.5195, "step": 16888 }, { "epoch": 0.9113917219793859, "grad_norm": 1.0626484909225495, "learning_rate": 1.1732305841266814e-06, "loss": 0.4371, "step": 16889 }, { "epoch": 0.9114456856079003, "grad_norm": 0.9478137928140227, "learning_rate": 1.1730210111127046e-06, "loss": 0.3613, "step": 16890 }, { "epoch": 0.9114996492364147, "grad_norm": 1.2298279653042206, "learning_rate": 1.1728115624604762e-06, "loss": 0.5203, "step": 16891 }, { "epoch": 0.9115536128649291, "grad_norm": 1.183539691896239, "learning_rate": 1.1726022381760159e-06, "loss": 0.4561, "step": 16892 }, { "epoch": 0.9116075764934434, "grad_norm": 0.9791766270297672, "learning_rate": 1.1723930382653398e-06, "loss": 0.3574, "step": 16893 }, { "epoch": 0.9116615401219578, "grad_norm": 0.9051945698672417, "learning_rate": 1.1721839627344606e-06, "loss": 0.547, "step": 16894 }, { "epoch": 0.9117155037504722, "grad_norm": 0.9626661709113797, "learning_rate": 1.1719750115893865e-06, "loss": 0.4983, "step": 16895 }, { "epoch": 0.9117694673789866, "grad_norm": 1.105431017892257, "learning_rate": 1.1717661848361245e-06, "loss": 0.5486, "step": 16896 }, { "epoch": 0.911823431007501, "grad_norm": 0.9936592463666849, "learning_rate": 1.1715574824806756e-06, "loss": 0.4417, "step": 16897 }, { "epoch": 0.9118773946360154, "grad_norm": 1.1674816246072603, "learning_rate": 1.1713489045290378e-06, "loss": 0.5095, "step": 16898 }, { "epoch": 0.9119313582645298, "grad_norm": 0.8689335628480996, "learning_rate": 1.1711404509872062e-06, "loss": 0.4287, "step": 16899 }, { "epoch": 0.911985321893044, "grad_norm": 0.9941597777952049, "learning_rate": 1.1709321218611731e-06, "loss": 0.4762, "step": 16900 }, { "epoch": 0.9120392855215584, "grad_norm": 0.9988806628324017, "learning_rate": 1.1707239171569242e-06, "loss": 0.3618, "step": 16901 }, { "epoch": 0.9120932491500728, "grad_norm": 1.0780730264934164, "learning_rate": 1.1705158368804445e-06, "loss": 0.5722, "step": 16902 }, { "epoch": 0.9121472127785872, "grad_norm": 1.0872730174832148, "learning_rate": 1.170307881037714e-06, "loss": 0.3882, "step": 16903 }, { "epoch": 0.9122011764071016, "grad_norm": 1.0922398138690594, "learning_rate": 1.1701000496347098e-06, "loss": 0.4929, "step": 16904 }, { "epoch": 0.912255140035616, "grad_norm": 0.9070075270983222, "learning_rate": 1.1698923426774057e-06, "loss": 0.3486, "step": 16905 }, { "epoch": 0.9123091036641304, "grad_norm": 1.0097538847201213, "learning_rate": 1.1696847601717704e-06, "loss": 0.454, "step": 16906 }, { "epoch": 0.9123630672926447, "grad_norm": 0.9577960226751314, "learning_rate": 1.1694773021237714e-06, "loss": 0.4537, "step": 16907 }, { "epoch": 0.9124170309211591, "grad_norm": 1.0530423808167795, "learning_rate": 1.16926996853937e-06, "loss": 0.4209, "step": 16908 }, { "epoch": 0.9124709945496735, "grad_norm": 1.2101800871826536, "learning_rate": 1.169062759424525e-06, "loss": 0.4484, "step": 16909 }, { "epoch": 0.9125249581781879, "grad_norm": 0.8898463287825683, "learning_rate": 1.168855674785193e-06, "loss": 0.4085, "step": 16910 }, { "epoch": 0.9125789218067023, "grad_norm": 0.98996862264736, "learning_rate": 1.1686487146273251e-06, "loss": 0.3847, "step": 16911 }, { "epoch": 0.9126328854352167, "grad_norm": 1.1031345405815276, "learning_rate": 1.1684418789568692e-06, "loss": 0.5635, "step": 16912 }, { "epoch": 0.9126868490637311, "grad_norm": 1.0128103338518741, "learning_rate": 1.1682351677797708e-06, "loss": 0.4614, "step": 16913 }, { "epoch": 0.9127408126922454, "grad_norm": 0.9485855556713889, "learning_rate": 1.1680285811019712e-06, "loss": 0.4142, "step": 16914 }, { "epoch": 0.9127947763207598, "grad_norm": 0.7421410704731143, "learning_rate": 1.1678221189294065e-06, "loss": 0.3003, "step": 16915 }, { "epoch": 0.9128487399492742, "grad_norm": 1.132683247003801, "learning_rate": 1.167615781268012e-06, "loss": 0.5233, "step": 16916 }, { "epoch": 0.9129027035777886, "grad_norm": 1.0033156577917535, "learning_rate": 1.167409568123717e-06, "loss": 0.4335, "step": 16917 }, { "epoch": 0.912956667206303, "grad_norm": 0.8197734071933604, "learning_rate": 1.1672034795024486e-06, "loss": 0.3627, "step": 16918 }, { "epoch": 0.9130106308348174, "grad_norm": 1.0228482283353368, "learning_rate": 1.1669975154101306e-06, "loss": 0.4696, "step": 16919 }, { "epoch": 0.9130645944633318, "grad_norm": 1.1195546609838904, "learning_rate": 1.1667916758526818e-06, "loss": 0.4858, "step": 16920 }, { "epoch": 0.913118558091846, "grad_norm": 1.0239018892206044, "learning_rate": 1.166585960836019e-06, "loss": 0.5144, "step": 16921 }, { "epoch": 0.9131725217203605, "grad_norm": 1.014630920873624, "learning_rate": 1.1663803703660543e-06, "loss": 0.4369, "step": 16922 }, { "epoch": 0.9132264853488749, "grad_norm": 0.8826738320412871, "learning_rate": 1.1661749044486962e-06, "loss": 0.3426, "step": 16923 }, { "epoch": 0.9132804489773892, "grad_norm": 1.0057867419034845, "learning_rate": 1.1659695630898507e-06, "loss": 0.4401, "step": 16924 }, { "epoch": 0.9133344126059036, "grad_norm": 1.0506411927342532, "learning_rate": 1.1657643462954194e-06, "loss": 0.5208, "step": 16925 }, { "epoch": 0.913388376234418, "grad_norm": 0.8986404685200945, "learning_rate": 1.1655592540712997e-06, "loss": 0.3333, "step": 16926 }, { "epoch": 0.9134423398629323, "grad_norm": 0.8947295748724384, "learning_rate": 1.165354286423387e-06, "loss": 0.3834, "step": 16927 }, { "epoch": 0.9134963034914467, "grad_norm": 1.0086139544807173, "learning_rate": 1.1651494433575717e-06, "loss": 0.5561, "step": 16928 }, { "epoch": 0.9135502671199611, "grad_norm": 1.0077866524111596, "learning_rate": 1.1649447248797416e-06, "loss": 0.3764, "step": 16929 }, { "epoch": 0.9136042307484755, "grad_norm": 1.0155951022679206, "learning_rate": 1.1647401309957805e-06, "loss": 0.3978, "step": 16930 }, { "epoch": 0.9136581943769899, "grad_norm": 1.1438370125493493, "learning_rate": 1.164535661711568e-06, "loss": 0.5294, "step": 16931 }, { "epoch": 0.9137121580055043, "grad_norm": 1.1129915099421646, "learning_rate": 1.1643313170329818e-06, "loss": 0.442, "step": 16932 }, { "epoch": 0.9137661216340187, "grad_norm": 1.3597479594018367, "learning_rate": 1.1641270969658942e-06, "loss": 0.5389, "step": 16933 }, { "epoch": 0.913820085262533, "grad_norm": 1.069723178584762, "learning_rate": 1.163923001516175e-06, "loss": 0.5008, "step": 16934 }, { "epoch": 0.9138740488910474, "grad_norm": 1.2416942829925761, "learning_rate": 1.16371903068969e-06, "loss": 0.6111, "step": 16935 }, { "epoch": 0.9139280125195618, "grad_norm": 0.8305094500166785, "learning_rate": 1.1635151844923017e-06, "loss": 0.3738, "step": 16936 }, { "epoch": 0.9139819761480762, "grad_norm": 1.0306578846344712, "learning_rate": 1.1633114629298681e-06, "loss": 0.408, "step": 16937 }, { "epoch": 0.9140359397765906, "grad_norm": 0.9951584248706308, "learning_rate": 1.1631078660082453e-06, "loss": 0.3763, "step": 16938 }, { "epoch": 0.914089903405105, "grad_norm": 1.1548323295505134, "learning_rate": 1.1629043937332843e-06, "loss": 0.6563, "step": 16939 }, { "epoch": 0.9141438670336194, "grad_norm": 1.3088069572689804, "learning_rate": 1.162701046110834e-06, "loss": 0.6121, "step": 16940 }, { "epoch": 0.9141978306621337, "grad_norm": 0.8539940645028278, "learning_rate": 1.162497823146738e-06, "loss": 0.3781, "step": 16941 }, { "epoch": 0.9142517942906481, "grad_norm": 1.284262987567295, "learning_rate": 1.1622947248468369e-06, "loss": 0.6883, "step": 16942 }, { "epoch": 0.9143057579191625, "grad_norm": 1.0759572582854429, "learning_rate": 1.1620917512169689e-06, "loss": 0.4421, "step": 16943 }, { "epoch": 0.9143597215476769, "grad_norm": 1.111741988068475, "learning_rate": 1.1618889022629663e-06, "loss": 0.4417, "step": 16944 }, { "epoch": 0.9144136851761913, "grad_norm": 0.9582464901250636, "learning_rate": 1.1616861779906603e-06, "loss": 0.3493, "step": 16945 }, { "epoch": 0.9144676488047057, "grad_norm": 0.8084475950723385, "learning_rate": 1.1614835784058776e-06, "loss": 0.2846, "step": 16946 }, { "epoch": 0.91452161243322, "grad_norm": 1.0554539092608264, "learning_rate": 1.16128110351444e-06, "loss": 0.4522, "step": 16947 }, { "epoch": 0.9145755760617343, "grad_norm": 1.011038529604387, "learning_rate": 1.1610787533221678e-06, "loss": 0.4876, "step": 16948 }, { "epoch": 0.9146295396902487, "grad_norm": 1.007069694024192, "learning_rate": 1.1608765278348766e-06, "loss": 0.4317, "step": 16949 }, { "epoch": 0.9146835033187631, "grad_norm": 1.136726347428227, "learning_rate": 1.1606744270583776e-06, "loss": 0.5333, "step": 16950 }, { "epoch": 0.9147374669472775, "grad_norm": 1.3134431504355117, "learning_rate": 1.1604724509984805e-06, "loss": 0.5045, "step": 16951 }, { "epoch": 0.9147914305757919, "grad_norm": 0.9654851806094548, "learning_rate": 1.1602705996609897e-06, "loss": 0.3679, "step": 16952 }, { "epoch": 0.9148453942043063, "grad_norm": 1.042880331932488, "learning_rate": 1.1600688730517075e-06, "loss": 0.4848, "step": 16953 }, { "epoch": 0.9148993578328207, "grad_norm": 0.8517091179157101, "learning_rate": 1.1598672711764303e-06, "loss": 0.3885, "step": 16954 }, { "epoch": 0.914953321461335, "grad_norm": 1.1279226438269596, "learning_rate": 1.159665794040954e-06, "loss": 0.4635, "step": 16955 }, { "epoch": 0.9150072850898494, "grad_norm": 0.7872390429726648, "learning_rate": 1.1594644416510676e-06, "loss": 0.3994, "step": 16956 }, { "epoch": 0.9150612487183638, "grad_norm": 1.028743709888183, "learning_rate": 1.1592632140125594e-06, "loss": 0.4185, "step": 16957 }, { "epoch": 0.9151152123468782, "grad_norm": 1.0723119635976255, "learning_rate": 1.159062111131212e-06, "loss": 0.4565, "step": 16958 }, { "epoch": 0.9151691759753926, "grad_norm": 1.073772788781618, "learning_rate": 1.1588611330128058e-06, "loss": 0.459, "step": 16959 }, { "epoch": 0.915223139603907, "grad_norm": 0.9126417226068954, "learning_rate": 1.158660279663117e-06, "loss": 0.3961, "step": 16960 }, { "epoch": 0.9152771032324214, "grad_norm": 1.046765441052405, "learning_rate": 1.1584595510879186e-06, "loss": 0.474, "step": 16961 }, { "epoch": 0.9153310668609357, "grad_norm": 0.8221019719035789, "learning_rate": 1.1582589472929798e-06, "loss": 0.3279, "step": 16962 }, { "epoch": 0.9153850304894501, "grad_norm": 1.1057198289963421, "learning_rate": 1.1580584682840653e-06, "loss": 0.5864, "step": 16963 }, { "epoch": 0.9154389941179645, "grad_norm": 0.9057140436914779, "learning_rate": 1.157858114066938e-06, "loss": 0.4203, "step": 16964 }, { "epoch": 0.9154929577464789, "grad_norm": 0.9928056462712203, "learning_rate": 1.1576578846473558e-06, "loss": 0.4263, "step": 16965 }, { "epoch": 0.9155469213749933, "grad_norm": 0.8288688400917101, "learning_rate": 1.1574577800310733e-06, "loss": 0.3815, "step": 16966 }, { "epoch": 0.9156008850035077, "grad_norm": 1.0859338307608108, "learning_rate": 1.1572578002238424e-06, "loss": 0.5694, "step": 16967 }, { "epoch": 0.9156548486320221, "grad_norm": 0.900635605981182, "learning_rate": 1.1570579452314102e-06, "loss": 0.3727, "step": 16968 }, { "epoch": 0.9157088122605364, "grad_norm": 1.212689092470864, "learning_rate": 1.1568582150595213e-06, "loss": 0.5387, "step": 16969 }, { "epoch": 0.9157627758890508, "grad_norm": 0.7491935600034412, "learning_rate": 1.156658609713915e-06, "loss": 0.2649, "step": 16970 }, { "epoch": 0.9158167395175651, "grad_norm": 1.1541771907842062, "learning_rate": 1.1564591292003296e-06, "loss": 0.5503, "step": 16971 }, { "epoch": 0.9158707031460795, "grad_norm": 1.1319563736164848, "learning_rate": 1.156259773524497e-06, "loss": 0.4652, "step": 16972 }, { "epoch": 0.9159246667745939, "grad_norm": 1.0150001952676329, "learning_rate": 1.1560605426921475e-06, "loss": 0.4932, "step": 16973 }, { "epoch": 0.9159786304031083, "grad_norm": 0.9571521568337467, "learning_rate": 1.1558614367090078e-06, "loss": 0.4205, "step": 16974 }, { "epoch": 0.9160325940316227, "grad_norm": 1.1493457450394349, "learning_rate": 1.1556624555807993e-06, "loss": 0.5325, "step": 16975 }, { "epoch": 0.916086557660137, "grad_norm": 0.9845167299502717, "learning_rate": 1.1554635993132419e-06, "loss": 0.4315, "step": 16976 }, { "epoch": 0.9161405212886514, "grad_norm": 1.2323099922116445, "learning_rate": 1.15526486791205e-06, "loss": 0.4658, "step": 16977 }, { "epoch": 0.9161944849171658, "grad_norm": 1.1113009236894305, "learning_rate": 1.1550662613829365e-06, "loss": 0.4177, "step": 16978 }, { "epoch": 0.9162484485456802, "grad_norm": 0.9418218512729671, "learning_rate": 1.1548677797316085e-06, "loss": 0.5034, "step": 16979 }, { "epoch": 0.9163024121741946, "grad_norm": 1.0920404228604519, "learning_rate": 1.1546694229637704e-06, "loss": 0.4276, "step": 16980 }, { "epoch": 0.916356375802709, "grad_norm": 0.9558595666841655, "learning_rate": 1.1544711910851244e-06, "loss": 0.4113, "step": 16981 }, { "epoch": 0.9164103394312234, "grad_norm": 1.0236719175302933, "learning_rate": 1.1542730841013669e-06, "loss": 0.3792, "step": 16982 }, { "epoch": 0.9164643030597377, "grad_norm": 1.0805072352798952, "learning_rate": 1.1540751020181928e-06, "loss": 0.4178, "step": 16983 }, { "epoch": 0.9165182666882521, "grad_norm": 1.0251806620167006, "learning_rate": 1.1538772448412908e-06, "loss": 0.3497, "step": 16984 }, { "epoch": 0.9165722303167665, "grad_norm": 1.2894571927182308, "learning_rate": 1.1536795125763484e-06, "loss": 0.5691, "step": 16985 }, { "epoch": 0.9166261939452809, "grad_norm": 0.7723369556549816, "learning_rate": 1.1534819052290485e-06, "loss": 0.3295, "step": 16986 }, { "epoch": 0.9166801575737953, "grad_norm": 0.975575689707718, "learning_rate": 1.1532844228050704e-06, "loss": 0.4602, "step": 16987 }, { "epoch": 0.9167341212023097, "grad_norm": 1.1892590155214395, "learning_rate": 1.1530870653100899e-06, "loss": 0.4315, "step": 16988 }, { "epoch": 0.9167880848308241, "grad_norm": 1.073504029566406, "learning_rate": 1.152889832749779e-06, "loss": 0.4778, "step": 16989 }, { "epoch": 0.9168420484593384, "grad_norm": 1.0081862369836172, "learning_rate": 1.1526927251298076e-06, "loss": 0.3521, "step": 16990 }, { "epoch": 0.9168960120878528, "grad_norm": 0.9333604770467135, "learning_rate": 1.1524957424558397e-06, "loss": 0.3443, "step": 16991 }, { "epoch": 0.9169499757163672, "grad_norm": 0.9450674538494087, "learning_rate": 1.1522988847335364e-06, "loss": 0.5058, "step": 16992 }, { "epoch": 0.9170039393448816, "grad_norm": 1.106172691271883, "learning_rate": 1.1521021519685568e-06, "loss": 0.5654, "step": 16993 }, { "epoch": 0.917057902973396, "grad_norm": 0.8745514117081873, "learning_rate": 1.1519055441665543e-06, "loss": 0.3819, "step": 16994 }, { "epoch": 0.9171118666019104, "grad_norm": 1.0609949368110423, "learning_rate": 1.15170906133318e-06, "loss": 0.5089, "step": 16995 }, { "epoch": 0.9171658302304246, "grad_norm": 1.2138329950584097, "learning_rate": 1.151512703474081e-06, "loss": 0.4594, "step": 16996 }, { "epoch": 0.917219793858939, "grad_norm": 1.083636295949006, "learning_rate": 1.1513164705949006e-06, "loss": 0.5259, "step": 16997 }, { "epoch": 0.9172737574874534, "grad_norm": 0.9425070332663033, "learning_rate": 1.151120362701279e-06, "loss": 0.4298, "step": 16998 }, { "epoch": 0.9173277211159678, "grad_norm": 0.9038492433593869, "learning_rate": 1.1509243797988523e-06, "loss": 0.3843, "step": 16999 }, { "epoch": 0.9173816847444822, "grad_norm": 1.1866900096261452, "learning_rate": 1.1507285218932529e-06, "loss": 0.4999, "step": 17000 }, { "epoch": 0.9173816847444822, "eval_loss": 0.5285828709602356, "eval_runtime": 164.3598, "eval_samples_per_second": 20.924, "eval_steps_per_second": 0.876, "step": 17000 }, { "epoch": 0.9174356483729966, "grad_norm": 1.1155783507408839, "learning_rate": 1.150532788990111e-06, "loss": 0.4426, "step": 17001 }, { "epoch": 0.917489612001511, "grad_norm": 1.088237415312259, "learning_rate": 1.1503371810950514e-06, "loss": 0.5544, "step": 17002 }, { "epoch": 0.9175435756300253, "grad_norm": 0.8358327920475628, "learning_rate": 1.1501416982136967e-06, "loss": 0.3636, "step": 17003 }, { "epoch": 0.9175975392585397, "grad_norm": 0.9647551009148343, "learning_rate": 1.149946340351664e-06, "loss": 0.4291, "step": 17004 }, { "epoch": 0.9176515028870541, "grad_norm": 1.1061946708540777, "learning_rate": 1.1497511075145689e-06, "loss": 0.4437, "step": 17005 }, { "epoch": 0.9177054665155685, "grad_norm": 1.1745516579705224, "learning_rate": 1.1495559997080231e-06, "loss": 0.4748, "step": 17006 }, { "epoch": 0.9177594301440829, "grad_norm": 1.098433588168236, "learning_rate": 1.149361016937633e-06, "loss": 0.4418, "step": 17007 }, { "epoch": 0.9178133937725973, "grad_norm": 1.164364168280332, "learning_rate": 1.149166159209004e-06, "loss": 0.4921, "step": 17008 }, { "epoch": 0.9178673574011117, "grad_norm": 0.8363004858697384, "learning_rate": 1.1489714265277352e-06, "loss": 0.4179, "step": 17009 }, { "epoch": 0.917921321029626, "grad_norm": 1.0548350388803647, "learning_rate": 1.1487768188994248e-06, "loss": 0.4554, "step": 17010 }, { "epoch": 0.9179752846581404, "grad_norm": 1.116210290767277, "learning_rate": 1.1485823363296646e-06, "loss": 0.5774, "step": 17011 }, { "epoch": 0.9180292482866548, "grad_norm": 0.8716425216474286, "learning_rate": 1.148387978824045e-06, "loss": 0.4093, "step": 17012 }, { "epoch": 0.9180832119151692, "grad_norm": 1.1240306117838947, "learning_rate": 1.1481937463881514e-06, "loss": 0.4718, "step": 17013 }, { "epoch": 0.9181371755436836, "grad_norm": 0.990497087661862, "learning_rate": 1.147999639027567e-06, "loss": 0.4847, "step": 17014 }, { "epoch": 0.918191139172198, "grad_norm": 0.9867570265340146, "learning_rate": 1.1478056567478708e-06, "loss": 0.454, "step": 17015 }, { "epoch": 0.9182451028007124, "grad_norm": 0.9120879093433375, "learning_rate": 1.1476117995546371e-06, "loss": 0.3977, "step": 17016 }, { "epoch": 0.9182990664292267, "grad_norm": 1.1273761401346343, "learning_rate": 1.1474180674534384e-06, "loss": 0.4822, "step": 17017 }, { "epoch": 0.918353030057741, "grad_norm": 1.0352979098880144, "learning_rate": 1.1472244604498425e-06, "loss": 0.5565, "step": 17018 }, { "epoch": 0.9184069936862554, "grad_norm": 1.151319556675141, "learning_rate": 1.147030978549413e-06, "loss": 0.4297, "step": 17019 }, { "epoch": 0.9184609573147698, "grad_norm": 1.1265850355796767, "learning_rate": 1.1468376217577124e-06, "loss": 0.5055, "step": 17020 }, { "epoch": 0.9185149209432842, "grad_norm": 0.9721762345578367, "learning_rate": 1.1466443900802964e-06, "loss": 0.3163, "step": 17021 }, { "epoch": 0.9185688845717986, "grad_norm": 0.900237386840704, "learning_rate": 1.1464512835227194e-06, "loss": 0.4876, "step": 17022 }, { "epoch": 0.918622848200313, "grad_norm": 1.362122660503254, "learning_rate": 1.146258302090532e-06, "loss": 0.4905, "step": 17023 }, { "epoch": 0.9186768118288273, "grad_norm": 0.9162471402076313, "learning_rate": 1.1460654457892797e-06, "loss": 0.4241, "step": 17024 }, { "epoch": 0.9187307754573417, "grad_norm": 1.1224590258572404, "learning_rate": 1.1458727146245058e-06, "loss": 0.473, "step": 17025 }, { "epoch": 0.9187847390858561, "grad_norm": 0.8916376445884127, "learning_rate": 1.14568010860175e-06, "loss": 0.4442, "step": 17026 }, { "epoch": 0.9188387027143705, "grad_norm": 1.0374183930381475, "learning_rate": 1.1454876277265474e-06, "loss": 0.5648, "step": 17027 }, { "epoch": 0.9188926663428849, "grad_norm": 0.9886527990337742, "learning_rate": 1.1452952720044303e-06, "loss": 0.3942, "step": 17028 }, { "epoch": 0.9189466299713993, "grad_norm": 1.1326640341159453, "learning_rate": 1.1451030414409266e-06, "loss": 0.5084, "step": 17029 }, { "epoch": 0.9190005935999137, "grad_norm": 0.8828127212879949, "learning_rate": 1.1449109360415625e-06, "loss": 0.3348, "step": 17030 }, { "epoch": 0.919054557228428, "grad_norm": 1.1157326869438646, "learning_rate": 1.1447189558118591e-06, "loss": 0.3628, "step": 17031 }, { "epoch": 0.9191085208569424, "grad_norm": 1.1238648269189224, "learning_rate": 1.1445271007573329e-06, "loss": 0.536, "step": 17032 }, { "epoch": 0.9191624844854568, "grad_norm": 1.028174013329178, "learning_rate": 1.1443353708834983e-06, "loss": 0.5468, "step": 17033 }, { "epoch": 0.9192164481139712, "grad_norm": 1.1044161399820003, "learning_rate": 1.1441437661958668e-06, "loss": 0.5068, "step": 17034 }, { "epoch": 0.9192704117424856, "grad_norm": 1.0219623345979358, "learning_rate": 1.1439522866999443e-06, "loss": 0.4524, "step": 17035 }, { "epoch": 0.919324375371, "grad_norm": 1.0541482944309646, "learning_rate": 1.143760932401235e-06, "loss": 0.4213, "step": 17036 }, { "epoch": 0.9193783389995144, "grad_norm": 1.1580814771599297, "learning_rate": 1.1435697033052381e-06, "loss": 0.5353, "step": 17037 }, { "epoch": 0.9194323026280287, "grad_norm": 1.1243527196951757, "learning_rate": 1.1433785994174501e-06, "loss": 0.5016, "step": 17038 }, { "epoch": 0.9194862662565431, "grad_norm": 1.0893624226320795, "learning_rate": 1.1431876207433628e-06, "loss": 0.5734, "step": 17039 }, { "epoch": 0.9195402298850575, "grad_norm": 1.0862059095158552, "learning_rate": 1.1429967672884653e-06, "loss": 0.4973, "step": 17040 }, { "epoch": 0.9195941935135719, "grad_norm": 0.886309627725469, "learning_rate": 1.1428060390582433e-06, "loss": 0.3487, "step": 17041 }, { "epoch": 0.9196481571420863, "grad_norm": 1.0585815538239522, "learning_rate": 1.1426154360581782e-06, "loss": 0.393, "step": 17042 }, { "epoch": 0.9197021207706007, "grad_norm": 1.1782601818750436, "learning_rate": 1.1424249582937485e-06, "loss": 0.4978, "step": 17043 }, { "epoch": 0.919756084399115, "grad_norm": 1.055006352915145, "learning_rate": 1.1422346057704287e-06, "loss": 0.3948, "step": 17044 }, { "epoch": 0.9198100480276293, "grad_norm": 1.2846705883695575, "learning_rate": 1.1420443784936894e-06, "loss": 0.6511, "step": 17045 }, { "epoch": 0.9198640116561437, "grad_norm": 1.0814506287307002, "learning_rate": 1.141854276468998e-06, "loss": 0.6203, "step": 17046 }, { "epoch": 0.9199179752846581, "grad_norm": 1.0968791816291783, "learning_rate": 1.1416642997018185e-06, "loss": 0.4518, "step": 17047 }, { "epoch": 0.9199719389131725, "grad_norm": 1.0274509489929953, "learning_rate": 1.1414744481976105e-06, "loss": 0.4897, "step": 17048 }, { "epoch": 0.9200259025416869, "grad_norm": 1.1295071116784903, "learning_rate": 1.1412847219618304e-06, "loss": 0.5577, "step": 17049 }, { "epoch": 0.9200798661702013, "grad_norm": 0.9170240695275826, "learning_rate": 1.1410951209999322e-06, "loss": 0.4143, "step": 17050 }, { "epoch": 0.9201338297987157, "grad_norm": 1.0957323082543307, "learning_rate": 1.1409056453173644e-06, "loss": 0.4437, "step": 17051 }, { "epoch": 0.92018779342723, "grad_norm": 1.210417880116114, "learning_rate": 1.1407162949195732e-06, "loss": 0.4704, "step": 17052 }, { "epoch": 0.9202417570557444, "grad_norm": 1.1573002554251823, "learning_rate": 1.1405270698120003e-06, "loss": 0.5054, "step": 17053 }, { "epoch": 0.9202957206842588, "grad_norm": 1.0398608465687487, "learning_rate": 1.140337970000084e-06, "loss": 0.4437, "step": 17054 }, { "epoch": 0.9203496843127732, "grad_norm": 0.8815851983788495, "learning_rate": 1.14014899548926e-06, "loss": 0.39, "step": 17055 }, { "epoch": 0.9204036479412876, "grad_norm": 1.0235498842215274, "learning_rate": 1.1399601462849583e-06, "loss": 0.4799, "step": 17056 }, { "epoch": 0.920457611569802, "grad_norm": 1.031980673044282, "learning_rate": 1.1397714223926082e-06, "loss": 0.4805, "step": 17057 }, { "epoch": 0.9205115751983164, "grad_norm": 1.0818361430716121, "learning_rate": 1.139582823817633e-06, "loss": 0.4017, "step": 17058 }, { "epoch": 0.9205655388268307, "grad_norm": 1.2076673289197333, "learning_rate": 1.1393943505654539e-06, "loss": 0.5072, "step": 17059 }, { "epoch": 0.9206195024553451, "grad_norm": 1.04528249771486, "learning_rate": 1.1392060026414866e-06, "loss": 0.388, "step": 17060 }, { "epoch": 0.9206734660838595, "grad_norm": 1.1233164887601286, "learning_rate": 1.1390177800511448e-06, "loss": 0.4477, "step": 17061 }, { "epoch": 0.9207274297123739, "grad_norm": 1.1271513990039128, "learning_rate": 1.1388296827998387e-06, "loss": 0.4982, "step": 17062 }, { "epoch": 0.9207813933408883, "grad_norm": 0.8524433233965182, "learning_rate": 1.1386417108929744e-06, "loss": 0.2946, "step": 17063 }, { "epoch": 0.9208353569694027, "grad_norm": 1.0545686855009162, "learning_rate": 1.1384538643359541e-06, "loss": 0.5361, "step": 17064 }, { "epoch": 0.920889320597917, "grad_norm": 1.1580989174403744, "learning_rate": 1.1382661431341769e-06, "loss": 0.4756, "step": 17065 }, { "epoch": 0.9209432842264313, "grad_norm": 1.1106304842621835, "learning_rate": 1.1380785472930378e-06, "loss": 0.5145, "step": 17066 }, { "epoch": 0.9209972478549457, "grad_norm": 1.1791707125633641, "learning_rate": 1.1378910768179284e-06, "loss": 0.4882, "step": 17067 }, { "epoch": 0.9210512114834601, "grad_norm": 0.9534236996505927, "learning_rate": 1.137703731714237e-06, "loss": 0.4203, "step": 17068 }, { "epoch": 0.9211051751119745, "grad_norm": 1.0999607342994862, "learning_rate": 1.1375165119873482e-06, "loss": 0.406, "step": 17069 }, { "epoch": 0.9211591387404889, "grad_norm": 1.0135776779418948, "learning_rate": 1.1373294176426432e-06, "loss": 0.5796, "step": 17070 }, { "epoch": 0.9212131023690033, "grad_norm": 1.070523103915182, "learning_rate": 1.1371424486854982e-06, "loss": 0.4645, "step": 17071 }, { "epoch": 0.9212670659975176, "grad_norm": 0.9756657946035469, "learning_rate": 1.1369556051212882e-06, "loss": 0.404, "step": 17072 }, { "epoch": 0.921321029626032, "grad_norm": 1.040922304839918, "learning_rate": 1.1367688869553823e-06, "loss": 0.4798, "step": 17073 }, { "epoch": 0.9213749932545464, "grad_norm": 0.7890646642457021, "learning_rate": 1.136582294193148e-06, "loss": 0.2575, "step": 17074 }, { "epoch": 0.9214289568830608, "grad_norm": 1.075064477087498, "learning_rate": 1.136395826839947e-06, "loss": 0.4548, "step": 17075 }, { "epoch": 0.9214829205115752, "grad_norm": 1.1018460358345108, "learning_rate": 1.1362094849011386e-06, "loss": 0.4652, "step": 17076 }, { "epoch": 0.9215368841400896, "grad_norm": 1.0956946113548025, "learning_rate": 1.1360232683820795e-06, "loss": 0.5099, "step": 17077 }, { "epoch": 0.921590847768604, "grad_norm": 1.1813705198965359, "learning_rate": 1.1358371772881204e-06, "loss": 0.4539, "step": 17078 }, { "epoch": 0.9216448113971183, "grad_norm": 0.953211387826852, "learning_rate": 1.1356512116246111e-06, "loss": 0.4959, "step": 17079 }, { "epoch": 0.9216987750256327, "grad_norm": 1.0781003571580279, "learning_rate": 1.1354653713968955e-06, "loss": 0.3908, "step": 17080 }, { "epoch": 0.9217527386541471, "grad_norm": 1.1235241210100717, "learning_rate": 1.1352796566103155e-06, "loss": 0.5921, "step": 17081 }, { "epoch": 0.9218067022826615, "grad_norm": 0.7780656021408747, "learning_rate": 1.1350940672702085e-06, "loss": 0.2707, "step": 17082 }, { "epoch": 0.9218606659111759, "grad_norm": 0.9503794878089104, "learning_rate": 1.134908603381908e-06, "loss": 0.4029, "step": 17083 }, { "epoch": 0.9219146295396903, "grad_norm": 1.0417896858242564, "learning_rate": 1.1347232649507452e-06, "loss": 0.4403, "step": 17084 }, { "epoch": 0.9219685931682047, "grad_norm": 1.1441889718527973, "learning_rate": 1.1345380519820464e-06, "loss": 0.5491, "step": 17085 }, { "epoch": 0.922022556796719, "grad_norm": 1.1955788790678965, "learning_rate": 1.134352964481135e-06, "loss": 0.5337, "step": 17086 }, { "epoch": 0.9220765204252334, "grad_norm": 0.9766053204211788, "learning_rate": 1.1341680024533308e-06, "loss": 0.3935, "step": 17087 }, { "epoch": 0.9221304840537478, "grad_norm": 0.9936432766759872, "learning_rate": 1.133983165903949e-06, "loss": 0.5345, "step": 17088 }, { "epoch": 0.9221844476822622, "grad_norm": 1.303900751500316, "learning_rate": 1.1337984548383027e-06, "loss": 0.4372, "step": 17089 }, { "epoch": 0.9222384113107766, "grad_norm": 0.8973116089762633, "learning_rate": 1.1336138692617008e-06, "loss": 0.4089, "step": 17090 }, { "epoch": 0.922292374939291, "grad_norm": 1.1169234211151209, "learning_rate": 1.133429409179448e-06, "loss": 0.4716, "step": 17091 }, { "epoch": 0.9223463385678053, "grad_norm": 1.3783416456332866, "learning_rate": 1.133245074596846e-06, "loss": 0.4538, "step": 17092 }, { "epoch": 0.9224003021963196, "grad_norm": 1.1102316359296656, "learning_rate": 1.1330608655191933e-06, "loss": 0.4622, "step": 17093 }, { "epoch": 0.922454265824834, "grad_norm": 1.1289029400290056, "learning_rate": 1.1328767819517835e-06, "loss": 0.3707, "step": 17094 }, { "epoch": 0.9225082294533484, "grad_norm": 0.7536033001466376, "learning_rate": 1.1326928238999076e-06, "loss": 0.3129, "step": 17095 }, { "epoch": 0.9225621930818628, "grad_norm": 1.0422039236377312, "learning_rate": 1.1325089913688528e-06, "loss": 0.4999, "step": 17096 }, { "epoch": 0.9226161567103772, "grad_norm": 1.109389978565776, "learning_rate": 1.1323252843639027e-06, "loss": 0.434, "step": 17097 }, { "epoch": 0.9226701203388916, "grad_norm": 1.1391214952837603, "learning_rate": 1.1321417028903373e-06, "loss": 0.4822, "step": 17098 }, { "epoch": 0.922724083967406, "grad_norm": 0.9926647855838342, "learning_rate": 1.1319582469534324e-06, "loss": 0.3999, "step": 17099 }, { "epoch": 0.9227780475959203, "grad_norm": 1.092255994618334, "learning_rate": 1.1317749165584615e-06, "loss": 0.5098, "step": 17100 }, { "epoch": 0.9228320112244347, "grad_norm": 0.9890223491671929, "learning_rate": 1.1315917117106926e-06, "loss": 0.4589, "step": 17101 }, { "epoch": 0.9228859748529491, "grad_norm": 0.9204321965445273, "learning_rate": 1.1314086324153922e-06, "loss": 0.3734, "step": 17102 }, { "epoch": 0.9229399384814635, "grad_norm": 0.9501815354081277, "learning_rate": 1.1312256786778219e-06, "loss": 0.3095, "step": 17103 }, { "epoch": 0.9229939021099779, "grad_norm": 0.8888529357759046, "learning_rate": 1.1310428505032397e-06, "loss": 0.3913, "step": 17104 }, { "epoch": 0.9230478657384923, "grad_norm": 1.0211170407469718, "learning_rate": 1.1308601478969007e-06, "loss": 0.4369, "step": 17105 }, { "epoch": 0.9231018293670067, "grad_norm": 1.0756297481705168, "learning_rate": 1.130677570864056e-06, "loss": 0.5124, "step": 17106 }, { "epoch": 0.923155792995521, "grad_norm": 1.0432492250141208, "learning_rate": 1.1304951194099527e-06, "loss": 0.4023, "step": 17107 }, { "epoch": 0.9232097566240354, "grad_norm": 1.0995340398068094, "learning_rate": 1.1303127935398346e-06, "loss": 0.4011, "step": 17108 }, { "epoch": 0.9232637202525498, "grad_norm": 0.819933435158619, "learning_rate": 1.1301305932589419e-06, "loss": 0.4285, "step": 17109 }, { "epoch": 0.9233176838810642, "grad_norm": 1.1604469709711045, "learning_rate": 1.1299485185725115e-06, "loss": 0.43, "step": 17110 }, { "epoch": 0.9233716475095786, "grad_norm": 1.1537034770087642, "learning_rate": 1.1297665694857763e-06, "loss": 0.6285, "step": 17111 }, { "epoch": 0.923425611138093, "grad_norm": 0.8693944776839003, "learning_rate": 1.1295847460039655e-06, "loss": 0.4008, "step": 17112 }, { "epoch": 0.9234795747666074, "grad_norm": 1.2508552647148314, "learning_rate": 1.129403048132305e-06, "loss": 0.6882, "step": 17113 }, { "epoch": 0.9235335383951216, "grad_norm": 1.0056142815107392, "learning_rate": 1.1292214758760178e-06, "loss": 0.4088, "step": 17114 }, { "epoch": 0.923587502023636, "grad_norm": 1.1335008756648373, "learning_rate": 1.1290400292403214e-06, "loss": 0.3617, "step": 17115 }, { "epoch": 0.9236414656521504, "grad_norm": 1.048711645470727, "learning_rate": 1.1288587082304307e-06, "loss": 0.4579, "step": 17116 }, { "epoch": 0.9236954292806648, "grad_norm": 1.1200453210847177, "learning_rate": 1.1286775128515576e-06, "loss": 0.4916, "step": 17117 }, { "epoch": 0.9237493929091792, "grad_norm": 1.3141451218483213, "learning_rate": 1.12849644310891e-06, "loss": 0.4754, "step": 17118 }, { "epoch": 0.9238033565376936, "grad_norm": 1.0849959869446433, "learning_rate": 1.1283154990076911e-06, "loss": 0.458, "step": 17119 }, { "epoch": 0.923857320166208, "grad_norm": 1.1316401129916418, "learning_rate": 1.1281346805531024e-06, "loss": 0.417, "step": 17120 }, { "epoch": 0.9239112837947223, "grad_norm": 1.0808390937688435, "learning_rate": 1.1279539877503408e-06, "loss": 0.4629, "step": 17121 }, { "epoch": 0.9239652474232367, "grad_norm": 1.1527773689519094, "learning_rate": 1.127773420604599e-06, "loss": 0.5622, "step": 17122 }, { "epoch": 0.9240192110517511, "grad_norm": 1.1893621151238225, "learning_rate": 1.1275929791210665e-06, "loss": 0.5139, "step": 17123 }, { "epoch": 0.9240731746802655, "grad_norm": 0.9200976762749007, "learning_rate": 1.12741266330493e-06, "loss": 0.4983, "step": 17124 }, { "epoch": 0.9241271383087799, "grad_norm": 1.164780516796796, "learning_rate": 1.1272324731613716e-06, "loss": 0.4464, "step": 17125 }, { "epoch": 0.9241811019372943, "grad_norm": 1.1784610414245527, "learning_rate": 1.1270524086955707e-06, "loss": 0.3972, "step": 17126 }, { "epoch": 0.9242350655658087, "grad_norm": 0.9127131635431531, "learning_rate": 1.1268724699127019e-06, "loss": 0.4663, "step": 17127 }, { "epoch": 0.924289029194323, "grad_norm": 1.0582830518339936, "learning_rate": 1.1266926568179365e-06, "loss": 0.3845, "step": 17128 }, { "epoch": 0.9243429928228374, "grad_norm": 1.02019230198679, "learning_rate": 1.1265129694164438e-06, "loss": 0.3848, "step": 17129 }, { "epoch": 0.9243969564513518, "grad_norm": 1.027008304472066, "learning_rate": 1.126333407713387e-06, "loss": 0.4289, "step": 17130 }, { "epoch": 0.9244509200798662, "grad_norm": 1.1693731058055012, "learning_rate": 1.1261539717139276e-06, "loss": 0.4516, "step": 17131 }, { "epoch": 0.9245048837083806, "grad_norm": 1.1814510619410472, "learning_rate": 1.125974661423222e-06, "loss": 0.5784, "step": 17132 }, { "epoch": 0.924558847336895, "grad_norm": 1.0294309539058584, "learning_rate": 1.1257954768464248e-06, "loss": 0.4895, "step": 17133 }, { "epoch": 0.9246128109654093, "grad_norm": 0.9731056968971341, "learning_rate": 1.1256164179886857e-06, "loss": 0.3443, "step": 17134 }, { "epoch": 0.9246667745939237, "grad_norm": 0.9876508021307268, "learning_rate": 1.1254374848551504e-06, "loss": 0.474, "step": 17135 }, { "epoch": 0.924720738222438, "grad_norm": 0.8075787376420062, "learning_rate": 1.1252586774509616e-06, "loss": 0.3051, "step": 17136 }, { "epoch": 0.9247747018509525, "grad_norm": 1.1927649976299126, "learning_rate": 1.125079995781259e-06, "loss": 0.5053, "step": 17137 }, { "epoch": 0.9248286654794668, "grad_norm": 1.2369831410687326, "learning_rate": 1.124901439851178e-06, "loss": 0.4847, "step": 17138 }, { "epoch": 0.9248826291079812, "grad_norm": 0.953134832801891, "learning_rate": 1.1247230096658498e-06, "loss": 0.4206, "step": 17139 }, { "epoch": 0.9249365927364956, "grad_norm": 1.0282754225114121, "learning_rate": 1.124544705230404e-06, "loss": 0.5549, "step": 17140 }, { "epoch": 0.9249905563650099, "grad_norm": 1.488968903156188, "learning_rate": 1.1243665265499646e-06, "loss": 0.6487, "step": 17141 }, { "epoch": 0.9250445199935243, "grad_norm": 0.9810361591947774, "learning_rate": 1.1241884736296519e-06, "loss": 0.5267, "step": 17142 }, { "epoch": 0.9250984836220387, "grad_norm": 0.865661985282122, "learning_rate": 1.1240105464745838e-06, "loss": 0.3648, "step": 17143 }, { "epoch": 0.9251524472505531, "grad_norm": 1.0552733968127936, "learning_rate": 1.123832745089874e-06, "loss": 0.4474, "step": 17144 }, { "epoch": 0.9252064108790675, "grad_norm": 1.21209292144695, "learning_rate": 1.123655069480633e-06, "loss": 0.5531, "step": 17145 }, { "epoch": 0.9252603745075819, "grad_norm": 1.1514588514187536, "learning_rate": 1.1234775196519678e-06, "loss": 0.5145, "step": 17146 }, { "epoch": 0.9253143381360963, "grad_norm": 1.0010429097115787, "learning_rate": 1.1233000956089802e-06, "loss": 0.4279, "step": 17147 }, { "epoch": 0.9253683017646106, "grad_norm": 0.873765198824895, "learning_rate": 1.1231227973567712e-06, "loss": 0.4622, "step": 17148 }, { "epoch": 0.925422265393125, "grad_norm": 1.2663105465946654, "learning_rate": 1.1229456249004352e-06, "loss": 0.5332, "step": 17149 }, { "epoch": 0.9254762290216394, "grad_norm": 1.0499196363951542, "learning_rate": 1.1227685782450638e-06, "loss": 0.2898, "step": 17150 }, { "epoch": 0.9255301926501538, "grad_norm": 0.9006006524404785, "learning_rate": 1.122591657395747e-06, "loss": 0.418, "step": 17151 }, { "epoch": 0.9255841562786682, "grad_norm": 0.9961550392107617, "learning_rate": 1.1224148623575688e-06, "loss": 0.4381, "step": 17152 }, { "epoch": 0.9256381199071826, "grad_norm": 0.9373773081017521, "learning_rate": 1.1222381931356106e-06, "loss": 0.4319, "step": 17153 }, { "epoch": 0.925692083535697, "grad_norm": 0.9093651047061327, "learning_rate": 1.12206164973495e-06, "loss": 0.3954, "step": 17154 }, { "epoch": 0.9257460471642113, "grad_norm": 1.0945158871032434, "learning_rate": 1.121885232160662e-06, "loss": 0.5049, "step": 17155 }, { "epoch": 0.9258000107927257, "grad_norm": 0.9699883734010841, "learning_rate": 1.1217089404178158e-06, "loss": 0.394, "step": 17156 }, { "epoch": 0.9258539744212401, "grad_norm": 1.0137439667609587, "learning_rate": 1.1215327745114784e-06, "loss": 0.5073, "step": 17157 }, { "epoch": 0.9259079380497545, "grad_norm": 1.0170979649907703, "learning_rate": 1.121356734446713e-06, "loss": 0.3884, "step": 17158 }, { "epoch": 0.9259619016782689, "grad_norm": 1.1075305414261314, "learning_rate": 1.1211808202285797e-06, "loss": 0.4916, "step": 17159 }, { "epoch": 0.9260158653067833, "grad_norm": 1.1193242084217927, "learning_rate": 1.121005031862134e-06, "loss": 0.5069, "step": 17160 }, { "epoch": 0.9260698289352977, "grad_norm": 0.9520306816081967, "learning_rate": 1.1208293693524282e-06, "loss": 0.3347, "step": 17161 }, { "epoch": 0.9261237925638119, "grad_norm": 1.1324635660416942, "learning_rate": 1.1206538327045116e-06, "loss": 0.5558, "step": 17162 }, { "epoch": 0.9261777561923263, "grad_norm": 0.8891962068242001, "learning_rate": 1.1204784219234282e-06, "loss": 0.4262, "step": 17163 }, { "epoch": 0.9262317198208407, "grad_norm": 0.8889060953592636, "learning_rate": 1.1203031370142204e-06, "loss": 0.3264, "step": 17164 }, { "epoch": 0.9262856834493551, "grad_norm": 1.1808920056980299, "learning_rate": 1.1201279779819257e-06, "loss": 0.5799, "step": 17165 }, { "epoch": 0.9263396470778695, "grad_norm": 1.0079423902035387, "learning_rate": 1.1199529448315787e-06, "loss": 0.3691, "step": 17166 }, { "epoch": 0.9263936107063839, "grad_norm": 1.0049527634231838, "learning_rate": 1.1197780375682093e-06, "loss": 0.493, "step": 17167 }, { "epoch": 0.9264475743348983, "grad_norm": 1.0249945730504453, "learning_rate": 1.1196032561968456e-06, "loss": 0.4408, "step": 17168 }, { "epoch": 0.9265015379634126, "grad_norm": 1.1837771105176669, "learning_rate": 1.1194286007225102e-06, "loss": 0.5013, "step": 17169 }, { "epoch": 0.926555501591927, "grad_norm": 0.9991441585249675, "learning_rate": 1.1192540711502234e-06, "loss": 0.4524, "step": 17170 }, { "epoch": 0.9266094652204414, "grad_norm": 0.8473605159145541, "learning_rate": 1.1190796674850003e-06, "loss": 0.3729, "step": 17171 }, { "epoch": 0.9266634288489558, "grad_norm": 1.0261411970709997, "learning_rate": 1.1189053897318546e-06, "loss": 0.3716, "step": 17172 }, { "epoch": 0.9267173924774702, "grad_norm": 1.0400818934533709, "learning_rate": 1.1187312378957944e-06, "loss": 0.3879, "step": 17173 }, { "epoch": 0.9267713561059846, "grad_norm": 1.1701124127562916, "learning_rate": 1.1185572119818256e-06, "loss": 0.4509, "step": 17174 }, { "epoch": 0.926825319734499, "grad_norm": 0.9201409078863747, "learning_rate": 1.1183833119949495e-06, "loss": 0.4702, "step": 17175 }, { "epoch": 0.9268792833630133, "grad_norm": 1.1238238754939784, "learning_rate": 1.1182095379401648e-06, "loss": 0.4871, "step": 17176 }, { "epoch": 0.9269332469915277, "grad_norm": 1.1297655963330888, "learning_rate": 1.1180358898224647e-06, "loss": 0.5699, "step": 17177 }, { "epoch": 0.9269872106200421, "grad_norm": 1.0475347489763582, "learning_rate": 1.1178623676468415e-06, "loss": 0.5164, "step": 17178 }, { "epoch": 0.9270411742485565, "grad_norm": 0.7877484435589853, "learning_rate": 1.1176889714182814e-06, "loss": 0.3532, "step": 17179 }, { "epoch": 0.9270951378770709, "grad_norm": 1.1040034175356985, "learning_rate": 1.1175157011417683e-06, "loss": 0.5007, "step": 17180 }, { "epoch": 0.9271491015055853, "grad_norm": 0.8964377489021044, "learning_rate": 1.1173425568222823e-06, "loss": 0.4924, "step": 17181 }, { "epoch": 0.9272030651340997, "grad_norm": 1.0617332127500285, "learning_rate": 1.1171695384647993e-06, "loss": 0.4225, "step": 17182 }, { "epoch": 0.927257028762614, "grad_norm": 1.278284611133064, "learning_rate": 1.116996646074292e-06, "loss": 0.4289, "step": 17183 }, { "epoch": 0.9273109923911284, "grad_norm": 1.0113130488371063, "learning_rate": 1.1168238796557303e-06, "loss": 0.4381, "step": 17184 }, { "epoch": 0.9273649560196427, "grad_norm": 1.1715697308905562, "learning_rate": 1.1166512392140788e-06, "loss": 0.4783, "step": 17185 }, { "epoch": 0.9274189196481571, "grad_norm": 0.8541843181844087, "learning_rate": 1.1164787247543003e-06, "loss": 0.3183, "step": 17186 }, { "epoch": 0.9274728832766715, "grad_norm": 1.2893666194743563, "learning_rate": 1.1163063362813516e-06, "loss": 0.4971, "step": 17187 }, { "epoch": 0.9275268469051859, "grad_norm": 0.9762654116425453, "learning_rate": 1.1161340738001887e-06, "loss": 0.4342, "step": 17188 }, { "epoch": 0.9275808105337003, "grad_norm": 1.148166195753637, "learning_rate": 1.1159619373157623e-06, "loss": 0.5335, "step": 17189 }, { "epoch": 0.9276347741622146, "grad_norm": 1.1248471771807793, "learning_rate": 1.11578992683302e-06, "loss": 0.5505, "step": 17190 }, { "epoch": 0.927688737790729, "grad_norm": 0.9591595305276363, "learning_rate": 1.115618042356904e-06, "loss": 0.3476, "step": 17191 }, { "epoch": 0.9277427014192434, "grad_norm": 1.2464198608963695, "learning_rate": 1.115446283892356e-06, "loss": 0.5416, "step": 17192 }, { "epoch": 0.9277966650477578, "grad_norm": 1.5266093741284787, "learning_rate": 1.1152746514443122e-06, "loss": 0.4077, "step": 17193 }, { "epoch": 0.9278506286762722, "grad_norm": 1.10609014632624, "learning_rate": 1.1151031450177055e-06, "loss": 0.4715, "step": 17194 }, { "epoch": 0.9279045923047866, "grad_norm": 0.9409276284781632, "learning_rate": 1.114931764617465e-06, "loss": 0.4105, "step": 17195 }, { "epoch": 0.927958555933301, "grad_norm": 0.8944432616540952, "learning_rate": 1.1147605102485155e-06, "loss": 0.372, "step": 17196 }, { "epoch": 0.9280125195618153, "grad_norm": 1.0588476469608041, "learning_rate": 1.1145893819157811e-06, "loss": 0.4149, "step": 17197 }, { "epoch": 0.9280664831903297, "grad_norm": 1.1755485684601388, "learning_rate": 1.1144183796241783e-06, "loss": 0.4254, "step": 17198 }, { "epoch": 0.9281204468188441, "grad_norm": 0.9801181404880135, "learning_rate": 1.114247503378623e-06, "loss": 0.4816, "step": 17199 }, { "epoch": 0.9281744104473585, "grad_norm": 1.1849025667434465, "learning_rate": 1.1140767531840254e-06, "loss": 0.4675, "step": 17200 }, { "epoch": 0.9282283740758729, "grad_norm": 0.9715956901709254, "learning_rate": 1.1139061290452942e-06, "loss": 0.5519, "step": 17201 }, { "epoch": 0.9282823377043873, "grad_norm": 0.950570690411513, "learning_rate": 1.1137356309673323e-06, "loss": 0.3672, "step": 17202 }, { "epoch": 0.9283363013329016, "grad_norm": 0.8179084170153493, "learning_rate": 1.1135652589550406e-06, "loss": 0.3813, "step": 17203 }, { "epoch": 0.928390264961416, "grad_norm": 1.0349329214833236, "learning_rate": 1.113395013013315e-06, "loss": 0.4487, "step": 17204 }, { "epoch": 0.9284442285899304, "grad_norm": 1.038789373933383, "learning_rate": 1.1132248931470496e-06, "loss": 0.5069, "step": 17205 }, { "epoch": 0.9284981922184448, "grad_norm": 1.0655276848745483, "learning_rate": 1.113054899361133e-06, "loss": 0.4858, "step": 17206 }, { "epoch": 0.9285521558469592, "grad_norm": 1.199973074897376, "learning_rate": 1.1128850316604515e-06, "loss": 0.6595, "step": 17207 }, { "epoch": 0.9286061194754736, "grad_norm": 0.9801633869212708, "learning_rate": 1.1127152900498868e-06, "loss": 0.4323, "step": 17208 }, { "epoch": 0.928660083103988, "grad_norm": 1.0758294277047462, "learning_rate": 1.1125456745343175e-06, "loss": 0.5692, "step": 17209 }, { "epoch": 0.9287140467325022, "grad_norm": 1.1626022374927458, "learning_rate": 1.1123761851186196e-06, "loss": 0.5068, "step": 17210 }, { "epoch": 0.9287680103610166, "grad_norm": 0.8895867803767645, "learning_rate": 1.1122068218076626e-06, "loss": 0.4129, "step": 17211 }, { "epoch": 0.928821973989531, "grad_norm": 1.0401002039998533, "learning_rate": 1.1120375846063158e-06, "loss": 0.4531, "step": 17212 }, { "epoch": 0.9288759376180454, "grad_norm": 0.9702461234855705, "learning_rate": 1.1118684735194417e-06, "loss": 0.3607, "step": 17213 }, { "epoch": 0.9289299012465598, "grad_norm": 0.92075711915728, "learning_rate": 1.111699488551902e-06, "loss": 0.379, "step": 17214 }, { "epoch": 0.9289838648750742, "grad_norm": 1.0518613529888734, "learning_rate": 1.1115306297085531e-06, "loss": 0.5308, "step": 17215 }, { "epoch": 0.9290378285035886, "grad_norm": 0.8659809856294727, "learning_rate": 1.111361896994248e-06, "loss": 0.3412, "step": 17216 }, { "epoch": 0.9290917921321029, "grad_norm": 1.2886770368604843, "learning_rate": 1.1111932904138366e-06, "loss": 0.5891, "step": 17217 }, { "epoch": 0.9291457557606173, "grad_norm": 0.9892016191962122, "learning_rate": 1.1110248099721646e-06, "loss": 0.4154, "step": 17218 }, { "epoch": 0.9291997193891317, "grad_norm": 0.9051110145093129, "learning_rate": 1.1108564556740738e-06, "loss": 0.3513, "step": 17219 }, { "epoch": 0.9292536830176461, "grad_norm": 1.1760371053648504, "learning_rate": 1.1106882275244036e-06, "loss": 0.422, "step": 17220 }, { "epoch": 0.9293076466461605, "grad_norm": 0.9312030733706808, "learning_rate": 1.1105201255279894e-06, "loss": 0.4449, "step": 17221 }, { "epoch": 0.9293616102746749, "grad_norm": 1.014785417066404, "learning_rate": 1.110352149689661e-06, "loss": 0.5429, "step": 17222 }, { "epoch": 0.9294155739031893, "grad_norm": 0.9143504590684891, "learning_rate": 1.1101843000142477e-06, "loss": 0.4212, "step": 17223 }, { "epoch": 0.9294695375317036, "grad_norm": 1.3478028252809429, "learning_rate": 1.1100165765065737e-06, "loss": 0.5319, "step": 17224 }, { "epoch": 0.929523501160218, "grad_norm": 1.1185421020861572, "learning_rate": 1.1098489791714586e-06, "loss": 0.5507, "step": 17225 }, { "epoch": 0.9295774647887324, "grad_norm": 1.1111966993969193, "learning_rate": 1.1096815080137196e-06, "loss": 0.4859, "step": 17226 }, { "epoch": 0.9296314284172468, "grad_norm": 1.1063374142569322, "learning_rate": 1.1095141630381709e-06, "loss": 0.5138, "step": 17227 }, { "epoch": 0.9296853920457612, "grad_norm": 1.0927822915494732, "learning_rate": 1.1093469442496203e-06, "loss": 0.4989, "step": 17228 }, { "epoch": 0.9297393556742756, "grad_norm": 1.0416881321912916, "learning_rate": 1.109179851652876e-06, "loss": 0.5016, "step": 17229 }, { "epoch": 0.92979331930279, "grad_norm": 1.0677881337606276, "learning_rate": 1.1090128852527387e-06, "loss": 0.4817, "step": 17230 }, { "epoch": 0.9298472829313043, "grad_norm": 0.9101789279210969, "learning_rate": 1.1088460450540076e-06, "loss": 0.3948, "step": 17231 }, { "epoch": 0.9299012465598187, "grad_norm": 0.8861957591524564, "learning_rate": 1.1086793310614787e-06, "loss": 0.3342, "step": 17232 }, { "epoch": 0.929955210188333, "grad_norm": 1.2229258747245766, "learning_rate": 1.1085127432799427e-06, "loss": 0.423, "step": 17233 }, { "epoch": 0.9300091738168474, "grad_norm": 0.9137827059361836, "learning_rate": 1.1083462817141878e-06, "loss": 0.3629, "step": 17234 }, { "epoch": 0.9300631374453618, "grad_norm": 0.9656139237967916, "learning_rate": 1.1081799463689982e-06, "loss": 0.427, "step": 17235 }, { "epoch": 0.9301171010738762, "grad_norm": 1.1771460207850526, "learning_rate": 1.1080137372491545e-06, "loss": 0.4559, "step": 17236 }, { "epoch": 0.9301710647023906, "grad_norm": 0.9388769313970659, "learning_rate": 1.1078476543594336e-06, "loss": 0.4278, "step": 17237 }, { "epoch": 0.9302250283309049, "grad_norm": 0.9166087878782561, "learning_rate": 1.1076816977046096e-06, "loss": 0.4373, "step": 17238 }, { "epoch": 0.9302789919594193, "grad_norm": 1.0703586235736726, "learning_rate": 1.1075158672894513e-06, "loss": 0.4228, "step": 17239 }, { "epoch": 0.9303329555879337, "grad_norm": 0.8864505086993925, "learning_rate": 1.1073501631187256e-06, "loss": 0.3836, "step": 17240 }, { "epoch": 0.9303869192164481, "grad_norm": 0.8553034091142702, "learning_rate": 1.1071845851971944e-06, "loss": 0.325, "step": 17241 }, { "epoch": 0.9304408828449625, "grad_norm": 1.0495765790130687, "learning_rate": 1.1070191335296169e-06, "loss": 0.6224, "step": 17242 }, { "epoch": 0.9304948464734769, "grad_norm": 1.1961312798202317, "learning_rate": 1.1068538081207478e-06, "loss": 0.5098, "step": 17243 }, { "epoch": 0.9305488101019913, "grad_norm": 0.883967226041481, "learning_rate": 1.1066886089753397e-06, "loss": 0.341, "step": 17244 }, { "epoch": 0.9306027737305056, "grad_norm": 0.9600200395820533, "learning_rate": 1.1065235360981404e-06, "loss": 0.586, "step": 17245 }, { "epoch": 0.93065673735902, "grad_norm": 0.7652902755684388, "learning_rate": 1.1063585894938936e-06, "loss": 0.3368, "step": 17246 }, { "epoch": 0.9307107009875344, "grad_norm": 1.2378360102053838, "learning_rate": 1.1061937691673407e-06, "loss": 0.5139, "step": 17247 }, { "epoch": 0.9307646646160488, "grad_norm": 1.2221357051678798, "learning_rate": 1.1060290751232177e-06, "loss": 0.6267, "step": 17248 }, { "epoch": 0.9308186282445632, "grad_norm": 0.8719454965965641, "learning_rate": 1.1058645073662593e-06, "loss": 0.4509, "step": 17249 }, { "epoch": 0.9308725918730776, "grad_norm": 0.8855958700325937, "learning_rate": 1.1057000659011952e-06, "loss": 0.3575, "step": 17250 }, { "epoch": 0.930926555501592, "grad_norm": 0.9822031219976025, "learning_rate": 1.105535750732751e-06, "loss": 0.4387, "step": 17251 }, { "epoch": 0.9309805191301063, "grad_norm": 0.987807586379645, "learning_rate": 1.1053715618656498e-06, "loss": 0.4537, "step": 17252 }, { "epoch": 0.9310344827586207, "grad_norm": 1.0068578910740493, "learning_rate": 1.1052074993046102e-06, "loss": 0.4794, "step": 17253 }, { "epoch": 0.9310884463871351, "grad_norm": 1.0121036778970915, "learning_rate": 1.105043563054348e-06, "loss": 0.3514, "step": 17254 }, { "epoch": 0.9311424100156495, "grad_norm": 1.0679524111160856, "learning_rate": 1.104879753119574e-06, "loss": 0.4076, "step": 17255 }, { "epoch": 0.9311963736441639, "grad_norm": 1.1864636830274964, "learning_rate": 1.1047160695049974e-06, "loss": 0.6043, "step": 17256 }, { "epoch": 0.9312503372726783, "grad_norm": 0.8092753965597969, "learning_rate": 1.1045525122153214e-06, "loss": 0.2931, "step": 17257 }, { "epoch": 0.9313043009011926, "grad_norm": 1.3347551052221265, "learning_rate": 1.1043890812552477e-06, "loss": 0.472, "step": 17258 }, { "epoch": 0.9313582645297069, "grad_norm": 1.1094679335527144, "learning_rate": 1.1042257766294743e-06, "loss": 0.4596, "step": 17259 }, { "epoch": 0.9314122281582213, "grad_norm": 0.9046028373275663, "learning_rate": 1.1040625983426924e-06, "loss": 0.3391, "step": 17260 }, { "epoch": 0.9314661917867357, "grad_norm": 1.1355250577646598, "learning_rate": 1.1038995463995935e-06, "loss": 0.5188, "step": 17261 }, { "epoch": 0.9315201554152501, "grad_norm": 1.290877329588031, "learning_rate": 1.1037366208048636e-06, "loss": 0.6492, "step": 17262 }, { "epoch": 0.9315741190437645, "grad_norm": 1.0693484767055255, "learning_rate": 1.1035738215631856e-06, "loss": 0.4421, "step": 17263 }, { "epoch": 0.9316280826722789, "grad_norm": 1.0979597898597038, "learning_rate": 1.103411148679238e-06, "loss": 0.4442, "step": 17264 }, { "epoch": 0.9316820463007933, "grad_norm": 1.0761266028692755, "learning_rate": 1.1032486021576961e-06, "loss": 0.4086, "step": 17265 }, { "epoch": 0.9317360099293076, "grad_norm": 0.9574249927458519, "learning_rate": 1.1030861820032328e-06, "loss": 0.4927, "step": 17266 }, { "epoch": 0.931789973557822, "grad_norm": 1.0212729419615736, "learning_rate": 1.102923888220515e-06, "loss": 0.3841, "step": 17267 }, { "epoch": 0.9318439371863364, "grad_norm": 1.13896800575113, "learning_rate": 1.1027617208142076e-06, "loss": 0.5671, "step": 17268 }, { "epoch": 0.9318979008148508, "grad_norm": 0.9932309306353466, "learning_rate": 1.1025996797889712e-06, "loss": 0.3995, "step": 17269 }, { "epoch": 0.9319518644433652, "grad_norm": 0.9138083109924741, "learning_rate": 1.102437765149463e-06, "loss": 0.3358, "step": 17270 }, { "epoch": 0.9320058280718796, "grad_norm": 1.1906258384324042, "learning_rate": 1.1022759769003373e-06, "loss": 0.499, "step": 17271 }, { "epoch": 0.9320597917003939, "grad_norm": 0.9155554400136157, "learning_rate": 1.102114315046244e-06, "loss": 0.4681, "step": 17272 }, { "epoch": 0.9321137553289083, "grad_norm": 1.0279245675060757, "learning_rate": 1.1019527795918284e-06, "loss": 0.5581, "step": 17273 }, { "epoch": 0.9321677189574227, "grad_norm": 1.0497827393025896, "learning_rate": 1.1017913705417337e-06, "loss": 0.4492, "step": 17274 }, { "epoch": 0.9322216825859371, "grad_norm": 1.1367575641384589, "learning_rate": 1.1016300879005993e-06, "loss": 0.5066, "step": 17275 }, { "epoch": 0.9322756462144515, "grad_norm": 1.2069170255904667, "learning_rate": 1.1014689316730604e-06, "loss": 0.5807, "step": 17276 }, { "epoch": 0.9323296098429659, "grad_norm": 1.042358674046549, "learning_rate": 1.1013079018637486e-06, "loss": 0.3813, "step": 17277 }, { "epoch": 0.9323835734714803, "grad_norm": 0.8998528440625521, "learning_rate": 1.1011469984772924e-06, "loss": 0.3756, "step": 17278 }, { "epoch": 0.9324375370999946, "grad_norm": 1.2383650627914182, "learning_rate": 1.1009862215183162e-06, "loss": 0.4473, "step": 17279 }, { "epoch": 0.932491500728509, "grad_norm": 1.2485503192537872, "learning_rate": 1.1008255709914414e-06, "loss": 0.5904, "step": 17280 }, { "epoch": 0.9325454643570233, "grad_norm": 1.0999215789066727, "learning_rate": 1.100665046901284e-06, "loss": 0.3905, "step": 17281 }, { "epoch": 0.9325994279855377, "grad_norm": 1.0830867755534237, "learning_rate": 1.1005046492524587e-06, "loss": 0.5057, "step": 17282 }, { "epoch": 0.9326533916140521, "grad_norm": 0.8455875049785918, "learning_rate": 1.1003443780495748e-06, "loss": 0.4082, "step": 17283 }, { "epoch": 0.9327073552425665, "grad_norm": 1.004147302318887, "learning_rate": 1.1001842332972393e-06, "loss": 0.4817, "step": 17284 }, { "epoch": 0.9327613188710809, "grad_norm": 1.0621554192860336, "learning_rate": 1.1000242150000543e-06, "loss": 0.4361, "step": 17285 }, { "epoch": 0.9328152824995952, "grad_norm": 1.0840429396551567, "learning_rate": 1.0998643231626199e-06, "loss": 0.5123, "step": 17286 }, { "epoch": 0.9328692461281096, "grad_norm": 0.9924910142864513, "learning_rate": 1.0997045577895306e-06, "loss": 0.4585, "step": 17287 }, { "epoch": 0.932923209756624, "grad_norm": 1.1427535912544042, "learning_rate": 1.0995449188853782e-06, "loss": 0.5234, "step": 17288 }, { "epoch": 0.9329771733851384, "grad_norm": 0.9100148467988263, "learning_rate": 1.099385406454751e-06, "loss": 0.6417, "step": 17289 }, { "epoch": 0.9330311370136528, "grad_norm": 1.0354326402288565, "learning_rate": 1.0992260205022344e-06, "loss": 0.5027, "step": 17290 }, { "epoch": 0.9330851006421672, "grad_norm": 0.9710623637983505, "learning_rate": 1.0990667610324081e-06, "loss": 0.4556, "step": 17291 }, { "epoch": 0.9331390642706816, "grad_norm": 0.7993467081693396, "learning_rate": 1.0989076280498497e-06, "loss": 0.3276, "step": 17292 }, { "epoch": 0.9331930278991959, "grad_norm": 0.8287510056045735, "learning_rate": 1.0987486215591341e-06, "loss": 0.3535, "step": 17293 }, { "epoch": 0.9332469915277103, "grad_norm": 1.257693218501587, "learning_rate": 1.0985897415648292e-06, "loss": 0.4823, "step": 17294 }, { "epoch": 0.9333009551562247, "grad_norm": 1.121306031977901, "learning_rate": 1.0984309880715032e-06, "loss": 0.4786, "step": 17295 }, { "epoch": 0.9333549187847391, "grad_norm": 0.9252553705902091, "learning_rate": 1.0982723610837175e-06, "loss": 0.3902, "step": 17296 }, { "epoch": 0.9334088824132535, "grad_norm": 0.839664696630705, "learning_rate": 1.0981138606060317e-06, "loss": 0.347, "step": 17297 }, { "epoch": 0.9334628460417679, "grad_norm": 0.8604812978540517, "learning_rate": 1.0979554866430013e-06, "loss": 0.3627, "step": 17298 }, { "epoch": 0.9335168096702823, "grad_norm": 0.9975137402880859, "learning_rate": 1.0977972391991785e-06, "loss": 0.4892, "step": 17299 }, { "epoch": 0.9335707732987966, "grad_norm": 1.0943248339858238, "learning_rate": 1.0976391182791117e-06, "loss": 0.4674, "step": 17300 }, { "epoch": 0.933624736927311, "grad_norm": 0.8991630330645857, "learning_rate": 1.097481123887344e-06, "loss": 0.2761, "step": 17301 }, { "epoch": 0.9336787005558254, "grad_norm": 1.0757337931288524, "learning_rate": 1.0973232560284177e-06, "loss": 0.4579, "step": 17302 }, { "epoch": 0.9337326641843398, "grad_norm": 1.0692363710818704, "learning_rate": 1.0971655147068688e-06, "loss": 0.4828, "step": 17303 }, { "epoch": 0.9337866278128542, "grad_norm": 0.9320224961587416, "learning_rate": 1.0970078999272326e-06, "loss": 0.4052, "step": 17304 }, { "epoch": 0.9338405914413686, "grad_norm": 1.1708782271518505, "learning_rate": 1.096850411694038e-06, "loss": 0.5298, "step": 17305 }, { "epoch": 0.933894555069883, "grad_norm": 1.0385046496227677, "learning_rate": 1.0966930500118112e-06, "loss": 0.4281, "step": 17306 }, { "epoch": 0.9339485186983972, "grad_norm": 1.023965315750552, "learning_rate": 1.0965358148850765e-06, "loss": 0.5053, "step": 17307 }, { "epoch": 0.9340024823269116, "grad_norm": 0.8956742176122396, "learning_rate": 1.0963787063183508e-06, "loss": 0.3806, "step": 17308 }, { "epoch": 0.934056445955426, "grad_norm": 1.028431903032215, "learning_rate": 1.0962217243161508e-06, "loss": 0.5742, "step": 17309 }, { "epoch": 0.9341104095839404, "grad_norm": 0.9880966949317165, "learning_rate": 1.0960648688829882e-06, "loss": 0.4137, "step": 17310 }, { "epoch": 0.9341643732124548, "grad_norm": 1.318045546494377, "learning_rate": 1.0959081400233712e-06, "loss": 0.2976, "step": 17311 }, { "epoch": 0.9342183368409692, "grad_norm": 1.09621462969618, "learning_rate": 1.0957515377418044e-06, "loss": 0.4035, "step": 17312 }, { "epoch": 0.9342723004694836, "grad_norm": 1.2078827649447292, "learning_rate": 1.0955950620427883e-06, "loss": 0.5537, "step": 17313 }, { "epoch": 0.9343262640979979, "grad_norm": 1.0515224366861584, "learning_rate": 1.0954387129308208e-06, "loss": 0.4372, "step": 17314 }, { "epoch": 0.9343802277265123, "grad_norm": 1.0967027208612905, "learning_rate": 1.0952824904103948e-06, "loss": 0.5038, "step": 17315 }, { "epoch": 0.9344341913550267, "grad_norm": 0.8598139248525903, "learning_rate": 1.095126394486001e-06, "loss": 0.3313, "step": 17316 }, { "epoch": 0.9344881549835411, "grad_norm": 1.005043325931441, "learning_rate": 1.0949704251621254e-06, "loss": 0.3897, "step": 17317 }, { "epoch": 0.9345421186120555, "grad_norm": 0.9646680502668209, "learning_rate": 1.0948145824432503e-06, "loss": 0.4251, "step": 17318 }, { "epoch": 0.9345960822405699, "grad_norm": 1.276188620379973, "learning_rate": 1.0946588663338552e-06, "loss": 0.564, "step": 17319 }, { "epoch": 0.9346500458690843, "grad_norm": 1.0213304326100074, "learning_rate": 1.094503276838416e-06, "loss": 0.5081, "step": 17320 }, { "epoch": 0.9347040094975986, "grad_norm": 1.0102188045359872, "learning_rate": 1.0943478139614042e-06, "loss": 0.5079, "step": 17321 }, { "epoch": 0.934757973126113, "grad_norm": 1.032358349859664, "learning_rate": 1.0941924777072875e-06, "loss": 0.3739, "step": 17322 }, { "epoch": 0.9348119367546274, "grad_norm": 0.9352808195075534, "learning_rate": 1.0940372680805305e-06, "loss": 0.4339, "step": 17323 }, { "epoch": 0.9348659003831418, "grad_norm": 0.9761123854245445, "learning_rate": 1.0938821850855947e-06, "loss": 0.4471, "step": 17324 }, { "epoch": 0.9349198640116562, "grad_norm": 0.8462229878271017, "learning_rate": 1.0937272287269367e-06, "loss": 0.3372, "step": 17325 }, { "epoch": 0.9349738276401706, "grad_norm": 1.0407690450066263, "learning_rate": 1.0935723990090105e-06, "loss": 0.4216, "step": 17326 }, { "epoch": 0.935027791268685, "grad_norm": 1.2065963866030907, "learning_rate": 1.0934176959362659e-06, "loss": 0.5568, "step": 17327 }, { "epoch": 0.9350817548971992, "grad_norm": 1.1235905854120753, "learning_rate": 1.093263119513149e-06, "loss": 0.501, "step": 17328 }, { "epoch": 0.9351357185257136, "grad_norm": 1.0296468876434302, "learning_rate": 1.0931086697441032e-06, "loss": 0.4273, "step": 17329 }, { "epoch": 0.935189682154228, "grad_norm": 1.1959373754682798, "learning_rate": 1.092954346633567e-06, "loss": 0.3959, "step": 17330 }, { "epoch": 0.9352436457827424, "grad_norm": 0.9781533465024763, "learning_rate": 1.0928001501859757e-06, "loss": 0.5428, "step": 17331 }, { "epoch": 0.9352976094112568, "grad_norm": 0.8024007871169889, "learning_rate": 1.0926460804057614e-06, "loss": 0.4676, "step": 17332 }, { "epoch": 0.9353515730397712, "grad_norm": 1.083358847962274, "learning_rate": 1.0924921372973516e-06, "loss": 0.3748, "step": 17333 }, { "epoch": 0.9354055366682856, "grad_norm": 0.8859466537156724, "learning_rate": 1.0923383208651713e-06, "loss": 0.3174, "step": 17334 }, { "epoch": 0.9354595002967999, "grad_norm": 0.967545067686146, "learning_rate": 1.0921846311136417e-06, "loss": 0.3952, "step": 17335 }, { "epoch": 0.9355134639253143, "grad_norm": 1.326186446056577, "learning_rate": 1.0920310680471792e-06, "loss": 0.5043, "step": 17336 }, { "epoch": 0.9355674275538287, "grad_norm": 1.0353649065096666, "learning_rate": 1.0918776316701978e-06, "loss": 0.3937, "step": 17337 }, { "epoch": 0.9356213911823431, "grad_norm": 0.8816947652763565, "learning_rate": 1.0917243219871077e-06, "loss": 0.418, "step": 17338 }, { "epoch": 0.9356753548108575, "grad_norm": 1.0487251307851215, "learning_rate": 1.0915711390023146e-06, "loss": 0.5768, "step": 17339 }, { "epoch": 0.9357293184393719, "grad_norm": 1.1638287634432303, "learning_rate": 1.091418082720221e-06, "loss": 0.3479, "step": 17340 }, { "epoch": 0.9357832820678862, "grad_norm": 1.206250616631468, "learning_rate": 1.091265153145227e-06, "loss": 0.5249, "step": 17341 }, { "epoch": 0.9358372456964006, "grad_norm": 1.1562606649507132, "learning_rate": 1.0911123502817267e-06, "loss": 0.3847, "step": 17342 }, { "epoch": 0.935891209324915, "grad_norm": 0.9991782959530328, "learning_rate": 1.0909596741341127e-06, "loss": 0.3968, "step": 17343 }, { "epoch": 0.9359451729534294, "grad_norm": 1.0016780230208167, "learning_rate": 1.090807124706772e-06, "loss": 0.3587, "step": 17344 }, { "epoch": 0.9359991365819438, "grad_norm": 0.9058851754954083, "learning_rate": 1.0906547020040901e-06, "loss": 0.4171, "step": 17345 }, { "epoch": 0.9360531002104582, "grad_norm": 0.9369029717872934, "learning_rate": 1.0905024060304475e-06, "loss": 0.365, "step": 17346 }, { "epoch": 0.9361070638389726, "grad_norm": 0.8084239191607253, "learning_rate": 1.0903502367902208e-06, "loss": 0.2306, "step": 17347 }, { "epoch": 0.9361610274674869, "grad_norm": 1.2338940830837064, "learning_rate": 1.090198194287785e-06, "loss": 0.5438, "step": 17348 }, { "epoch": 0.9362149910960013, "grad_norm": 0.9805079648038706, "learning_rate": 1.0900462785275079e-06, "loss": 0.3722, "step": 17349 }, { "epoch": 0.9362689547245157, "grad_norm": 1.1464342544552608, "learning_rate": 1.0898944895137568e-06, "loss": 0.5976, "step": 17350 }, { "epoch": 0.93632291835303, "grad_norm": 1.1088231383231355, "learning_rate": 1.0897428272508939e-06, "loss": 0.5493, "step": 17351 }, { "epoch": 0.9363768819815445, "grad_norm": 1.035419427688752, "learning_rate": 1.0895912917432788e-06, "loss": 0.4553, "step": 17352 }, { "epoch": 0.9364308456100588, "grad_norm": 1.1094929735141852, "learning_rate": 1.0894398829952662e-06, "loss": 0.4774, "step": 17353 }, { "epoch": 0.9364848092385732, "grad_norm": 0.9466578367412182, "learning_rate": 1.0892886010112077e-06, "loss": 0.4297, "step": 17354 }, { "epoch": 0.9365387728670875, "grad_norm": 1.733687342556291, "learning_rate": 1.089137445795452e-06, "loss": 0.4569, "step": 17355 }, { "epoch": 0.9365927364956019, "grad_norm": 0.8837718947727119, "learning_rate": 1.088986417352343e-06, "loss": 0.3582, "step": 17356 }, { "epoch": 0.9366467001241163, "grad_norm": 1.0545065604719959, "learning_rate": 1.0888355156862212e-06, "loss": 0.4401, "step": 17357 }, { "epoch": 0.9367006637526307, "grad_norm": 1.1683909058766477, "learning_rate": 1.0886847408014236e-06, "loss": 0.4023, "step": 17358 }, { "epoch": 0.9367546273811451, "grad_norm": 0.9822943197379215, "learning_rate": 1.0885340927022842e-06, "loss": 0.4913, "step": 17359 }, { "epoch": 0.9368085910096595, "grad_norm": 0.9519505516386172, "learning_rate": 1.0883835713931318e-06, "loss": 0.4369, "step": 17360 }, { "epoch": 0.9368625546381739, "grad_norm": 1.031649618768374, "learning_rate": 1.088233176878294e-06, "loss": 0.498, "step": 17361 }, { "epoch": 0.9369165182666882, "grad_norm": 1.0280464851993039, "learning_rate": 1.0880829091620922e-06, "loss": 0.359, "step": 17362 }, { "epoch": 0.9369704818952026, "grad_norm": 0.9537800848247804, "learning_rate": 1.0879327682488452e-06, "loss": 0.3983, "step": 17363 }, { "epoch": 0.937024445523717, "grad_norm": 0.9550657718994277, "learning_rate": 1.087782754142869e-06, "loss": 0.3932, "step": 17364 }, { "epoch": 0.9370784091522314, "grad_norm": 1.0088667958978885, "learning_rate": 1.0876328668484741e-06, "loss": 0.5102, "step": 17365 }, { "epoch": 0.9371323727807458, "grad_norm": 0.9771555942378689, "learning_rate": 1.0874831063699695e-06, "loss": 0.4715, "step": 17366 }, { "epoch": 0.9371863364092602, "grad_norm": 1.2294456029194378, "learning_rate": 1.0873334727116585e-06, "loss": 0.4784, "step": 17367 }, { "epoch": 0.9372403000377746, "grad_norm": 1.1435225492865144, "learning_rate": 1.0871839658778424e-06, "loss": 0.5601, "step": 17368 }, { "epoch": 0.9372942636662889, "grad_norm": 0.9851466094571982, "learning_rate": 1.0870345858728182e-06, "loss": 0.4334, "step": 17369 }, { "epoch": 0.9373482272948033, "grad_norm": 1.0388403914319213, "learning_rate": 1.0868853327008788e-06, "loss": 0.4107, "step": 17370 }, { "epoch": 0.9374021909233177, "grad_norm": 0.7694108463261368, "learning_rate": 1.0867362063663137e-06, "loss": 0.2978, "step": 17371 }, { "epoch": 0.9374561545518321, "grad_norm": 0.9117317975729406, "learning_rate": 1.0865872068734092e-06, "loss": 0.4295, "step": 17372 }, { "epoch": 0.9375101181803465, "grad_norm": 1.2237130483140148, "learning_rate": 1.0864383342264484e-06, "loss": 0.5778, "step": 17373 }, { "epoch": 0.9375640818088609, "grad_norm": 1.163291274840639, "learning_rate": 1.0862895884297091e-06, "loss": 0.4932, "step": 17374 }, { "epoch": 0.9376180454373753, "grad_norm": 1.0003931965906736, "learning_rate": 1.0861409694874668e-06, "loss": 0.3709, "step": 17375 }, { "epoch": 0.9376720090658895, "grad_norm": 0.9892953070017084, "learning_rate": 1.0859924774039934e-06, "loss": 0.4821, "step": 17376 }, { "epoch": 0.9377259726944039, "grad_norm": 0.9181756315618775, "learning_rate": 1.0858441121835556e-06, "loss": 0.3834, "step": 17377 }, { "epoch": 0.9377799363229183, "grad_norm": 0.946810878306497, "learning_rate": 1.0856958738304186e-06, "loss": 0.3552, "step": 17378 }, { "epoch": 0.9378338999514327, "grad_norm": 0.9464000715673325, "learning_rate": 1.0855477623488423e-06, "loss": 0.3773, "step": 17379 }, { "epoch": 0.9378878635799471, "grad_norm": 1.227134518858664, "learning_rate": 1.0853997777430837e-06, "loss": 0.4586, "step": 17380 }, { "epoch": 0.9379418272084615, "grad_norm": 0.9330748839347619, "learning_rate": 1.085251920017396e-06, "loss": 0.3408, "step": 17381 }, { "epoch": 0.9379957908369759, "grad_norm": 1.0366177623876756, "learning_rate": 1.0851041891760292e-06, "loss": 0.3988, "step": 17382 }, { "epoch": 0.9380497544654902, "grad_norm": 1.148629041603677, "learning_rate": 1.084956585223229e-06, "loss": 0.4445, "step": 17383 }, { "epoch": 0.9381037180940046, "grad_norm": 1.0574689889780948, "learning_rate": 1.0848091081632372e-06, "loss": 0.4593, "step": 17384 }, { "epoch": 0.938157681722519, "grad_norm": 1.0753221454234463, "learning_rate": 1.084661758000293e-06, "loss": 0.5109, "step": 17385 }, { "epoch": 0.9382116453510334, "grad_norm": 1.1332287640763128, "learning_rate": 1.0845145347386313e-06, "loss": 0.5417, "step": 17386 }, { "epoch": 0.9382656089795478, "grad_norm": 1.0825640839128843, "learning_rate": 1.0843674383824836e-06, "loss": 0.3649, "step": 17387 }, { "epoch": 0.9383195726080622, "grad_norm": 1.1759936865884797, "learning_rate": 1.0842204689360773e-06, "loss": 0.6493, "step": 17388 }, { "epoch": 0.9383735362365766, "grad_norm": 1.053742650757147, "learning_rate": 1.0840736264036362e-06, "loss": 0.4967, "step": 17389 }, { "epoch": 0.9384274998650909, "grad_norm": 1.113705928508162, "learning_rate": 1.0839269107893815e-06, "loss": 0.4884, "step": 17390 }, { "epoch": 0.9384814634936053, "grad_norm": 0.906716693258534, "learning_rate": 1.0837803220975296e-06, "loss": 0.4423, "step": 17391 }, { "epoch": 0.9385354271221197, "grad_norm": 1.062830331852171, "learning_rate": 1.0836338603322932e-06, "loss": 0.3925, "step": 17392 }, { "epoch": 0.9385893907506341, "grad_norm": 1.0624860195009338, "learning_rate": 1.0834875254978822e-06, "loss": 0.4163, "step": 17393 }, { "epoch": 0.9386433543791485, "grad_norm": 1.2574376112007684, "learning_rate": 1.083341317598502e-06, "loss": 0.5203, "step": 17394 }, { "epoch": 0.9386973180076629, "grad_norm": 1.1541638713473246, "learning_rate": 1.0831952366383549e-06, "loss": 0.4568, "step": 17395 }, { "epoch": 0.9387512816361773, "grad_norm": 0.997749482003211, "learning_rate": 1.0830492826216405e-06, "loss": 0.4047, "step": 17396 }, { "epoch": 0.9388052452646916, "grad_norm": 1.1039128185265104, "learning_rate": 1.0829034555525522e-06, "loss": 0.5303, "step": 17397 }, { "epoch": 0.938859208893206, "grad_norm": 1.0535255162626413, "learning_rate": 1.0827577554352818e-06, "loss": 0.5072, "step": 17398 }, { "epoch": 0.9389131725217204, "grad_norm": 1.1991173584109622, "learning_rate": 1.0826121822740162e-06, "loss": 0.5938, "step": 17399 }, { "epoch": 0.9389671361502347, "grad_norm": 1.085107974641101, "learning_rate": 1.0824667360729408e-06, "loss": 0.5229, "step": 17400 }, { "epoch": 0.9390210997787491, "grad_norm": 1.1486466398284558, "learning_rate": 1.0823214168362343e-06, "loss": 0.5757, "step": 17401 }, { "epoch": 0.9390750634072635, "grad_norm": 1.0984857625046873, "learning_rate": 1.0821762245680744e-06, "loss": 0.6785, "step": 17402 }, { "epoch": 0.9391290270357778, "grad_norm": 0.9585735502272028, "learning_rate": 1.0820311592726339e-06, "loss": 0.3766, "step": 17403 }, { "epoch": 0.9391829906642922, "grad_norm": 1.1284454027913027, "learning_rate": 1.0818862209540813e-06, "loss": 0.5033, "step": 17404 }, { "epoch": 0.9392369542928066, "grad_norm": 1.2766011041950835, "learning_rate": 1.0817414096165832e-06, "loss": 0.5345, "step": 17405 }, { "epoch": 0.939290917921321, "grad_norm": 0.8707958888255534, "learning_rate": 1.081596725264301e-06, "loss": 0.3901, "step": 17406 }, { "epoch": 0.9393448815498354, "grad_norm": 0.9647120504237828, "learning_rate": 1.081452167901394e-06, "loss": 0.358, "step": 17407 }, { "epoch": 0.9393988451783498, "grad_norm": 0.8844395353060923, "learning_rate": 1.0813077375320158e-06, "loss": 0.4195, "step": 17408 }, { "epoch": 0.9394528088068642, "grad_norm": 0.9293176741023896, "learning_rate": 1.0811634341603184e-06, "loss": 0.394, "step": 17409 }, { "epoch": 0.9395067724353785, "grad_norm": 1.3134197692893612, "learning_rate": 1.0810192577904486e-06, "loss": 0.5357, "step": 17410 }, { "epoch": 0.9395607360638929, "grad_norm": 0.9076212295919399, "learning_rate": 1.0808752084265505e-06, "loss": 0.4133, "step": 17411 }, { "epoch": 0.9396146996924073, "grad_norm": 1.2650666090173655, "learning_rate": 1.0807312860727636e-06, "loss": 0.6318, "step": 17412 }, { "epoch": 0.9396686633209217, "grad_norm": 1.1461707646831374, "learning_rate": 1.0805874907332256e-06, "loss": 0.5255, "step": 17413 }, { "epoch": 0.9397226269494361, "grad_norm": 0.9052485505109944, "learning_rate": 1.0804438224120684e-06, "loss": 0.3503, "step": 17414 }, { "epoch": 0.9397765905779505, "grad_norm": 0.8831609976170246, "learning_rate": 1.080300281113421e-06, "loss": 0.3446, "step": 17415 }, { "epoch": 0.9398305542064649, "grad_norm": 1.2358757085573848, "learning_rate": 1.0801568668414096e-06, "loss": 0.4093, "step": 17416 }, { "epoch": 0.9398845178349792, "grad_norm": 0.9145291171311667, "learning_rate": 1.080013579600156e-06, "loss": 0.3794, "step": 17417 }, { "epoch": 0.9399384814634936, "grad_norm": 1.464378316901581, "learning_rate": 1.079870419393778e-06, "loss": 0.6542, "step": 17418 }, { "epoch": 0.939992445092008, "grad_norm": 0.7797195896780417, "learning_rate": 1.0797273862263903e-06, "loss": 0.4027, "step": 17419 }, { "epoch": 0.9400464087205224, "grad_norm": 0.9222089823528181, "learning_rate": 1.0795844801021037e-06, "loss": 0.3671, "step": 17420 }, { "epoch": 0.9401003723490368, "grad_norm": 1.0458920747579148, "learning_rate": 1.0794417010250258e-06, "loss": 0.4104, "step": 17421 }, { "epoch": 0.9401543359775512, "grad_norm": 0.9876519761980403, "learning_rate": 1.07929904899926e-06, "loss": 0.3686, "step": 17422 }, { "epoch": 0.9402082996060656, "grad_norm": 1.1939810701902476, "learning_rate": 1.0791565240289063e-06, "loss": 0.5895, "step": 17423 }, { "epoch": 0.9402622632345798, "grad_norm": 0.8325523521679353, "learning_rate": 1.0790141261180615e-06, "loss": 0.3962, "step": 17424 }, { "epoch": 0.9403162268630942, "grad_norm": 1.052476896210643, "learning_rate": 1.0788718552708172e-06, "loss": 0.4419, "step": 17425 }, { "epoch": 0.9403701904916086, "grad_norm": 1.1959243148688423, "learning_rate": 1.0787297114912635e-06, "loss": 0.583, "step": 17426 }, { "epoch": 0.940424154120123, "grad_norm": 0.9013798225756159, "learning_rate": 1.0785876947834847e-06, "loss": 0.3217, "step": 17427 }, { "epoch": 0.9404781177486374, "grad_norm": 0.9509687207712175, "learning_rate": 1.0784458051515634e-06, "loss": 0.4444, "step": 17428 }, { "epoch": 0.9405320813771518, "grad_norm": 1.3817594736621224, "learning_rate": 1.078304042599577e-06, "loss": 0.5556, "step": 17429 }, { "epoch": 0.9405860450056662, "grad_norm": 1.1637515427403333, "learning_rate": 1.0781624071316006e-06, "loss": 0.4102, "step": 17430 }, { "epoch": 0.9406400086341805, "grad_norm": 1.0019392158731615, "learning_rate": 1.0780208987517041e-06, "loss": 0.4848, "step": 17431 }, { "epoch": 0.9406939722626949, "grad_norm": 1.176316262226128, "learning_rate": 1.0778795174639555e-06, "loss": 0.5086, "step": 17432 }, { "epoch": 0.9407479358912093, "grad_norm": 1.0184269740411365, "learning_rate": 1.0777382632724181e-06, "loss": 0.418, "step": 17433 }, { "epoch": 0.9408018995197237, "grad_norm": 1.1241779303072872, "learning_rate": 1.077597136181151e-06, "loss": 0.3524, "step": 17434 }, { "epoch": 0.9408558631482381, "grad_norm": 1.100817884753176, "learning_rate": 1.0774561361942102e-06, "loss": 0.5107, "step": 17435 }, { "epoch": 0.9409098267767525, "grad_norm": 1.1181688777828696, "learning_rate": 1.0773152633156498e-06, "loss": 0.4124, "step": 17436 }, { "epoch": 0.9409637904052669, "grad_norm": 0.917681286226624, "learning_rate": 1.0771745175495167e-06, "loss": 0.492, "step": 17437 }, { "epoch": 0.9410177540337812, "grad_norm": 1.2540739393980092, "learning_rate": 1.077033898899857e-06, "loss": 0.4314, "step": 17438 }, { "epoch": 0.9410717176622956, "grad_norm": 1.2570786254911834, "learning_rate": 1.0768934073707124e-06, "loss": 0.4354, "step": 17439 }, { "epoch": 0.94112568129081, "grad_norm": 1.4124252878057995, "learning_rate": 1.0767530429661204e-06, "loss": 0.6175, "step": 17440 }, { "epoch": 0.9411796449193244, "grad_norm": 0.8793254917124304, "learning_rate": 1.0766128056901152e-06, "loss": 0.2811, "step": 17441 }, { "epoch": 0.9412336085478388, "grad_norm": 0.9981789171755551, "learning_rate": 1.0764726955467273e-06, "loss": 0.3626, "step": 17442 }, { "epoch": 0.9412875721763532, "grad_norm": 0.970804315546047, "learning_rate": 1.0763327125399842e-06, "loss": 0.4832, "step": 17443 }, { "epoch": 0.9413415358048676, "grad_norm": 1.006026357254184, "learning_rate": 1.0761928566739082e-06, "loss": 0.3932, "step": 17444 }, { "epoch": 0.9413954994333819, "grad_norm": 1.2446846373686593, "learning_rate": 1.0760531279525202e-06, "loss": 0.5419, "step": 17445 }, { "epoch": 0.9414494630618963, "grad_norm": 0.8979209940790261, "learning_rate": 1.0759135263798349e-06, "loss": 0.3624, "step": 17446 }, { "epoch": 0.9415034266904106, "grad_norm": 1.14922461168997, "learning_rate": 1.075774051959865e-06, "loss": 0.5505, "step": 17447 }, { "epoch": 0.941557390318925, "grad_norm": 1.2232568171282814, "learning_rate": 1.0756347046966192e-06, "loss": 0.5831, "step": 17448 }, { "epoch": 0.9416113539474394, "grad_norm": 1.0817623423195535, "learning_rate": 1.0754954845941026e-06, "loss": 0.4528, "step": 17449 }, { "epoch": 0.9416653175759538, "grad_norm": 0.9853619957234634, "learning_rate": 1.0753563916563165e-06, "loss": 0.2971, "step": 17450 }, { "epoch": 0.9417192812044682, "grad_norm": 1.206265962956966, "learning_rate": 1.0752174258872582e-06, "loss": 0.4867, "step": 17451 }, { "epoch": 0.9417732448329825, "grad_norm": 0.9770687171476103, "learning_rate": 1.0750785872909226e-06, "loss": 0.4861, "step": 17452 }, { "epoch": 0.9418272084614969, "grad_norm": 0.9213204478559972, "learning_rate": 1.0749398758712987e-06, "loss": 0.3959, "step": 17453 }, { "epoch": 0.9418811720900113, "grad_norm": 1.261965679402492, "learning_rate": 1.0748012916323747e-06, "loss": 0.4515, "step": 17454 }, { "epoch": 0.9419351357185257, "grad_norm": 1.233065448617164, "learning_rate": 1.0746628345781323e-06, "loss": 0.5819, "step": 17455 }, { "epoch": 0.9419890993470401, "grad_norm": 1.3106406699152944, "learning_rate": 1.0745245047125516e-06, "loss": 0.5164, "step": 17456 }, { "epoch": 0.9420430629755545, "grad_norm": 0.8455112173572287, "learning_rate": 1.0743863020396084e-06, "loss": 0.2847, "step": 17457 }, { "epoch": 0.9420970266040689, "grad_norm": 1.0244272919907735, "learning_rate": 1.0742482265632744e-06, "loss": 0.4901, "step": 17458 }, { "epoch": 0.9421509902325832, "grad_norm": 1.136795616190668, "learning_rate": 1.074110278287519e-06, "loss": 0.5852, "step": 17459 }, { "epoch": 0.9422049538610976, "grad_norm": 1.1420590831941488, "learning_rate": 1.0739724572163057e-06, "loss": 0.5316, "step": 17460 }, { "epoch": 0.942258917489612, "grad_norm": 0.9874084782198072, "learning_rate": 1.0738347633535958e-06, "loss": 0.4379, "step": 17461 }, { "epoch": 0.9423128811181264, "grad_norm": 0.9298717739239738, "learning_rate": 1.0736971967033476e-06, "loss": 0.3578, "step": 17462 }, { "epoch": 0.9423668447466408, "grad_norm": 0.9786934391686091, "learning_rate": 1.0735597572695142e-06, "loss": 0.507, "step": 17463 }, { "epoch": 0.9424208083751552, "grad_norm": 0.9922910674994573, "learning_rate": 1.073422445056046e-06, "loss": 0.4539, "step": 17464 }, { "epoch": 0.9424747720036696, "grad_norm": 1.2788390673274184, "learning_rate": 1.0732852600668896e-06, "loss": 0.6293, "step": 17465 }, { "epoch": 0.9425287356321839, "grad_norm": 1.2944126625171304, "learning_rate": 1.073148202305988e-06, "loss": 0.4715, "step": 17466 }, { "epoch": 0.9425826992606983, "grad_norm": 1.2385237715333102, "learning_rate": 1.0730112717772798e-06, "loss": 0.6687, "step": 17467 }, { "epoch": 0.9426366628892127, "grad_norm": 1.165117535254885, "learning_rate": 1.0728744684847007e-06, "loss": 0.4253, "step": 17468 }, { "epoch": 0.9426906265177271, "grad_norm": 1.1222126617819985, "learning_rate": 1.0727377924321825e-06, "loss": 0.4945, "step": 17469 }, { "epoch": 0.9427445901462415, "grad_norm": 0.9810341291433643, "learning_rate": 1.0726012436236537e-06, "loss": 0.4917, "step": 17470 }, { "epoch": 0.9427985537747559, "grad_norm": 0.9984896125891163, "learning_rate": 1.0724648220630387e-06, "loss": 0.5047, "step": 17471 }, { "epoch": 0.9428525174032701, "grad_norm": 1.0607805272165745, "learning_rate": 1.0723285277542586e-06, "loss": 0.485, "step": 17472 }, { "epoch": 0.9429064810317845, "grad_norm": 1.102093979612336, "learning_rate": 1.0721923607012299e-06, "loss": 0.4888, "step": 17473 }, { "epoch": 0.9429604446602989, "grad_norm": 1.1346747988337693, "learning_rate": 1.072056320907867e-06, "loss": 0.5951, "step": 17474 }, { "epoch": 0.9430144082888133, "grad_norm": 1.1635474516759199, "learning_rate": 1.07192040837808e-06, "loss": 0.4826, "step": 17475 }, { "epoch": 0.9430683719173277, "grad_norm": 1.1072672605258773, "learning_rate": 1.0717846231157744e-06, "loss": 0.4378, "step": 17476 }, { "epoch": 0.9431223355458421, "grad_norm": 0.9348939813565327, "learning_rate": 1.0716489651248528e-06, "loss": 0.3991, "step": 17477 }, { "epoch": 0.9431762991743565, "grad_norm": 1.046904466672987, "learning_rate": 1.071513434409215e-06, "loss": 0.445, "step": 17478 }, { "epoch": 0.9432302628028708, "grad_norm": 1.0959023916400938, "learning_rate": 1.0713780309727559e-06, "loss": 0.4541, "step": 17479 }, { "epoch": 0.9432842264313852, "grad_norm": 0.8030607571122733, "learning_rate": 1.0712427548193662e-06, "loss": 0.2875, "step": 17480 }, { "epoch": 0.9433381900598996, "grad_norm": 0.895934003857164, "learning_rate": 1.0711076059529353e-06, "loss": 0.317, "step": 17481 }, { "epoch": 0.943392153688414, "grad_norm": 0.8558935227871727, "learning_rate": 1.0709725843773467e-06, "loss": 0.3083, "step": 17482 }, { "epoch": 0.9434461173169284, "grad_norm": 0.988726291921667, "learning_rate": 1.0708376900964812e-06, "loss": 0.4394, "step": 17483 }, { "epoch": 0.9435000809454428, "grad_norm": 1.0163670888177232, "learning_rate": 1.0707029231142153e-06, "loss": 0.5626, "step": 17484 }, { "epoch": 0.9435540445739572, "grad_norm": 1.2212543412395898, "learning_rate": 1.0705682834344235e-06, "loss": 0.5302, "step": 17485 }, { "epoch": 0.9436080082024715, "grad_norm": 1.179022527255604, "learning_rate": 1.0704337710609745e-06, "loss": 0.4327, "step": 17486 }, { "epoch": 0.9436619718309859, "grad_norm": 1.352685876292664, "learning_rate": 1.070299385997735e-06, "loss": 0.5746, "step": 17487 }, { "epoch": 0.9437159354595003, "grad_norm": 0.8445142232159811, "learning_rate": 1.070165128248567e-06, "loss": 0.3344, "step": 17488 }, { "epoch": 0.9437698990880147, "grad_norm": 1.3041571846027975, "learning_rate": 1.070030997817329e-06, "loss": 0.3908, "step": 17489 }, { "epoch": 0.9438238627165291, "grad_norm": 1.163952804179614, "learning_rate": 1.0698969947078763e-06, "loss": 0.5228, "step": 17490 }, { "epoch": 0.9438778263450435, "grad_norm": 0.7919936556816627, "learning_rate": 1.0697631189240606e-06, "loss": 0.2409, "step": 17491 }, { "epoch": 0.9439317899735579, "grad_norm": 0.9680736874425601, "learning_rate": 1.0696293704697288e-06, "loss": 0.4938, "step": 17492 }, { "epoch": 0.9439857536020722, "grad_norm": 0.8938878562788537, "learning_rate": 1.0694957493487258e-06, "loss": 0.3746, "step": 17493 }, { "epoch": 0.9440397172305866, "grad_norm": 1.1889658585241398, "learning_rate": 1.0693622555648918e-06, "loss": 0.4858, "step": 17494 }, { "epoch": 0.944093680859101, "grad_norm": 1.1671254435836316, "learning_rate": 1.0692288891220629e-06, "loss": 0.6597, "step": 17495 }, { "epoch": 0.9441476444876153, "grad_norm": 0.8851486100435733, "learning_rate": 1.0690956500240733e-06, "loss": 0.3159, "step": 17496 }, { "epoch": 0.9442016081161297, "grad_norm": 1.0063725542891657, "learning_rate": 1.0689625382747511e-06, "loss": 0.414, "step": 17497 }, { "epoch": 0.9442555717446441, "grad_norm": 0.9328836909399898, "learning_rate": 1.0688295538779236e-06, "loss": 0.3431, "step": 17498 }, { "epoch": 0.9443095353731585, "grad_norm": 1.0453397583896495, "learning_rate": 1.0686966968374116e-06, "loss": 0.6067, "step": 17499 }, { "epoch": 0.9443634990016728, "grad_norm": 1.1653692126429538, "learning_rate": 1.0685639671570345e-06, "loss": 0.5373, "step": 17500 }, { "epoch": 0.9443634990016728, "eval_loss": 0.5278103947639465, "eval_runtime": 165.3254, "eval_samples_per_second": 20.801, "eval_steps_per_second": 0.871, "step": 17500 }, { "epoch": 0.9444174626301872, "grad_norm": 0.919340152874765, "learning_rate": 1.0684313648406057e-06, "loss": 0.5487, "step": 17501 }, { "epoch": 0.9444714262587016, "grad_norm": 0.9150036836390414, "learning_rate": 1.068298889891938e-06, "loss": 0.4556, "step": 17502 }, { "epoch": 0.944525389887216, "grad_norm": 1.155778563432956, "learning_rate": 1.0681665423148379e-06, "loss": 0.5486, "step": 17503 }, { "epoch": 0.9445793535157304, "grad_norm": 1.0435667597772245, "learning_rate": 1.0680343221131096e-06, "loss": 0.5244, "step": 17504 }, { "epoch": 0.9446333171442448, "grad_norm": 0.9736620268372393, "learning_rate": 1.0679022292905525e-06, "loss": 0.4842, "step": 17505 }, { "epoch": 0.9446872807727592, "grad_norm": 1.0361002247771984, "learning_rate": 1.067770263850964e-06, "loss": 0.4402, "step": 17506 }, { "epoch": 0.9447412444012735, "grad_norm": 1.028144567229061, "learning_rate": 1.0676384257981372e-06, "loss": 0.4014, "step": 17507 }, { "epoch": 0.9447952080297879, "grad_norm": 1.1525582286743679, "learning_rate": 1.0675067151358601e-06, "loss": 0.4911, "step": 17508 }, { "epoch": 0.9448491716583023, "grad_norm": 1.0147048166925388, "learning_rate": 1.0673751318679185e-06, "loss": 0.4119, "step": 17509 }, { "epoch": 0.9449031352868167, "grad_norm": 0.8644611896719882, "learning_rate": 1.0672436759980946e-06, "loss": 0.3423, "step": 17510 }, { "epoch": 0.9449570989153311, "grad_norm": 1.0256899491736409, "learning_rate": 1.0671123475301659e-06, "loss": 0.5553, "step": 17511 }, { "epoch": 0.9450110625438455, "grad_norm": 1.1390558002464215, "learning_rate": 1.066981146467908e-06, "loss": 0.543, "step": 17512 }, { "epoch": 0.9450650261723599, "grad_norm": 0.9660885293403647, "learning_rate": 1.0668500728150914e-06, "loss": 0.3982, "step": 17513 }, { "epoch": 0.9451189898008742, "grad_norm": 0.7934473405406189, "learning_rate": 1.0667191265754829e-06, "loss": 0.3691, "step": 17514 }, { "epoch": 0.9451729534293886, "grad_norm": 0.9574450685398503, "learning_rate": 1.0665883077528458e-06, "loss": 0.4128, "step": 17515 }, { "epoch": 0.945226917057903, "grad_norm": 0.8788458067952307, "learning_rate": 1.0664576163509409e-06, "loss": 0.3994, "step": 17516 }, { "epoch": 0.9452808806864174, "grad_norm": 0.9432924275034762, "learning_rate": 1.0663270523735237e-06, "loss": 0.4187, "step": 17517 }, { "epoch": 0.9453348443149318, "grad_norm": 1.1377993465063772, "learning_rate": 1.0661966158243464e-06, "loss": 0.4816, "step": 17518 }, { "epoch": 0.9453888079434462, "grad_norm": 0.933365555393075, "learning_rate": 1.0660663067071592e-06, "loss": 0.4845, "step": 17519 }, { "epoch": 0.9454427715719605, "grad_norm": 0.9528193697523573, "learning_rate": 1.065936125025706e-06, "loss": 0.3789, "step": 17520 }, { "epoch": 0.9454967352004748, "grad_norm": 0.9847312305441132, "learning_rate": 1.0658060707837291e-06, "loss": 0.4155, "step": 17521 }, { "epoch": 0.9455506988289892, "grad_norm": 0.9716974289218435, "learning_rate": 1.0656761439849665e-06, "loss": 0.2857, "step": 17522 }, { "epoch": 0.9456046624575036, "grad_norm": 0.9876711267398114, "learning_rate": 1.0655463446331512e-06, "loss": 0.4961, "step": 17523 }, { "epoch": 0.945658626086018, "grad_norm": 0.9008783420411685, "learning_rate": 1.0654166727320155e-06, "loss": 0.393, "step": 17524 }, { "epoch": 0.9457125897145324, "grad_norm": 1.0488967404853808, "learning_rate": 1.0652871282852848e-06, "loss": 0.4367, "step": 17525 }, { "epoch": 0.9457665533430468, "grad_norm": 1.0189066070846666, "learning_rate": 1.065157711296683e-06, "loss": 0.4212, "step": 17526 }, { "epoch": 0.9458205169715612, "grad_norm": 0.6336356854142714, "learning_rate": 1.06502842176993e-06, "loss": 0.2625, "step": 17527 }, { "epoch": 0.9458744806000755, "grad_norm": 0.983526769121989, "learning_rate": 1.0648992597087414e-06, "loss": 0.4035, "step": 17528 }, { "epoch": 0.9459284442285899, "grad_norm": 0.9823036754445538, "learning_rate": 1.0647702251168294e-06, "loss": 0.4695, "step": 17529 }, { "epoch": 0.9459824078571043, "grad_norm": 0.9601582275869454, "learning_rate": 1.0646413179979024e-06, "loss": 0.4189, "step": 17530 }, { "epoch": 0.9460363714856187, "grad_norm": 1.162472408342118, "learning_rate": 1.0645125383556658e-06, "loss": 0.6454, "step": 17531 }, { "epoch": 0.9460903351141331, "grad_norm": 1.0278878371578082, "learning_rate": 1.06438388619382e-06, "loss": 0.5125, "step": 17532 }, { "epoch": 0.9461442987426475, "grad_norm": 1.1352965342578698, "learning_rate": 1.0642553615160634e-06, "loss": 0.45, "step": 17533 }, { "epoch": 0.9461982623711619, "grad_norm": 1.0802149010826647, "learning_rate": 1.0641269643260898e-06, "loss": 0.5747, "step": 17534 }, { "epoch": 0.9462522259996762, "grad_norm": 1.144835587920776, "learning_rate": 1.0639986946275896e-06, "loss": 0.3966, "step": 17535 }, { "epoch": 0.9463061896281906, "grad_norm": 0.863545944178848, "learning_rate": 1.0638705524242487e-06, "loss": 0.3088, "step": 17536 }, { "epoch": 0.946360153256705, "grad_norm": 1.0342548441944406, "learning_rate": 1.063742537719751e-06, "loss": 0.4181, "step": 17537 }, { "epoch": 0.9464141168852194, "grad_norm": 1.161319569984265, "learning_rate": 1.063614650517775e-06, "loss": 0.4243, "step": 17538 }, { "epoch": 0.9464680805137338, "grad_norm": 0.947729188106141, "learning_rate": 1.063486890821997e-06, "loss": 0.3993, "step": 17539 }, { "epoch": 0.9465220441422482, "grad_norm": 0.9318621259595681, "learning_rate": 1.0633592586360877e-06, "loss": 0.3539, "step": 17540 }, { "epoch": 0.9465760077707625, "grad_norm": 1.0512478474523645, "learning_rate": 1.063231753963717e-06, "loss": 0.4573, "step": 17541 }, { "epoch": 0.9466299713992768, "grad_norm": 1.2036629277734847, "learning_rate": 1.0631043768085485e-06, "loss": 0.6013, "step": 17542 }, { "epoch": 0.9466839350277912, "grad_norm": 0.9416817520148757, "learning_rate": 1.0629771271742433e-06, "loss": 0.3835, "step": 17543 }, { "epoch": 0.9467378986563056, "grad_norm": 1.0986199190501218, "learning_rate": 1.0628500050644586e-06, "loss": 0.47, "step": 17544 }, { "epoch": 0.94679186228482, "grad_norm": 0.9808269933649015, "learning_rate": 1.0627230104828484e-06, "loss": 0.4299, "step": 17545 }, { "epoch": 0.9468458259133344, "grad_norm": 1.0006281936131571, "learning_rate": 1.0625961434330622e-06, "loss": 0.4489, "step": 17546 }, { "epoch": 0.9468997895418488, "grad_norm": 1.099572255639449, "learning_rate": 1.0624694039187464e-06, "loss": 0.4566, "step": 17547 }, { "epoch": 0.9469537531703631, "grad_norm": 0.9223443952339713, "learning_rate": 1.0623427919435442e-06, "loss": 0.4004, "step": 17548 }, { "epoch": 0.9470077167988775, "grad_norm": 0.86231144845256, "learning_rate": 1.062216307511094e-06, "loss": 0.3584, "step": 17549 }, { "epoch": 0.9470616804273919, "grad_norm": 1.093046457625342, "learning_rate": 1.0620899506250306e-06, "loss": 0.4756, "step": 17550 }, { "epoch": 0.9471156440559063, "grad_norm": 1.1680058494490955, "learning_rate": 1.0619637212889868e-06, "loss": 0.4151, "step": 17551 }, { "epoch": 0.9471696076844207, "grad_norm": 0.8517988101754712, "learning_rate": 1.0618376195065896e-06, "loss": 0.3505, "step": 17552 }, { "epoch": 0.9472235713129351, "grad_norm": 1.0393381640380404, "learning_rate": 1.0617116452814639e-06, "loss": 0.4632, "step": 17553 }, { "epoch": 0.9472775349414495, "grad_norm": 1.064498866760536, "learning_rate": 1.0615857986172298e-06, "loss": 0.4316, "step": 17554 }, { "epoch": 0.9473314985699638, "grad_norm": 1.1094018618912822, "learning_rate": 1.0614600795175052e-06, "loss": 0.4617, "step": 17555 }, { "epoch": 0.9473854621984782, "grad_norm": 0.9247202620547346, "learning_rate": 1.0613344879859023e-06, "loss": 0.3885, "step": 17556 }, { "epoch": 0.9474394258269926, "grad_norm": 0.9865073565869363, "learning_rate": 1.0612090240260308e-06, "loss": 0.4632, "step": 17557 }, { "epoch": 0.947493389455507, "grad_norm": 1.0499795225951762, "learning_rate": 1.0610836876414973e-06, "loss": 0.4581, "step": 17558 }, { "epoch": 0.9475473530840214, "grad_norm": 1.1342490858315624, "learning_rate": 1.060958478835904e-06, "loss": 0.4816, "step": 17559 }, { "epoch": 0.9476013167125358, "grad_norm": 1.117976623299202, "learning_rate": 1.060833397612849e-06, "loss": 0.5111, "step": 17560 }, { "epoch": 0.9476552803410502, "grad_norm": 0.8930671083412475, "learning_rate": 1.0607084439759282e-06, "loss": 0.3523, "step": 17561 }, { "epoch": 0.9477092439695645, "grad_norm": 0.972350348590994, "learning_rate": 1.060583617928732e-06, "loss": 0.4514, "step": 17562 }, { "epoch": 0.9477632075980789, "grad_norm": 0.8689446205790831, "learning_rate": 1.0604589194748482e-06, "loss": 0.4961, "step": 17563 }, { "epoch": 0.9478171712265933, "grad_norm": 1.1688850461171667, "learning_rate": 1.060334348617861e-06, "loss": 0.4595, "step": 17564 }, { "epoch": 0.9478711348551077, "grad_norm": 1.0500546007362237, "learning_rate": 1.06020990536135e-06, "loss": 0.3544, "step": 17565 }, { "epoch": 0.947925098483622, "grad_norm": 1.093478386678579, "learning_rate": 1.0600855897088933e-06, "loss": 0.7618, "step": 17566 }, { "epoch": 0.9479790621121365, "grad_norm": 1.0472726264363874, "learning_rate": 1.059961401664062e-06, "loss": 0.5343, "step": 17567 }, { "epoch": 0.9480330257406508, "grad_norm": 1.0174380401161265, "learning_rate": 1.0598373412304269e-06, "loss": 0.3745, "step": 17568 }, { "epoch": 0.9480869893691651, "grad_norm": 0.9701479959311213, "learning_rate": 1.0597134084115535e-06, "loss": 0.3435, "step": 17569 }, { "epoch": 0.9481409529976795, "grad_norm": 1.0758037337031652, "learning_rate": 1.0595896032110028e-06, "loss": 0.4351, "step": 17570 }, { "epoch": 0.9481949166261939, "grad_norm": 1.3877638457066415, "learning_rate": 1.0594659256323337e-06, "loss": 0.5476, "step": 17571 }, { "epoch": 0.9482488802547083, "grad_norm": 0.9599622371717922, "learning_rate": 1.0593423756791007e-06, "loss": 0.4404, "step": 17572 }, { "epoch": 0.9483028438832227, "grad_norm": 0.971526332635944, "learning_rate": 1.059218953354855e-06, "loss": 0.4025, "step": 17573 }, { "epoch": 0.9483568075117371, "grad_norm": 1.193566931817505, "learning_rate": 1.0590956586631432e-06, "loss": 0.4367, "step": 17574 }, { "epoch": 0.9484107711402515, "grad_norm": 1.1450846100184533, "learning_rate": 1.0589724916075096e-06, "loss": 0.6168, "step": 17575 }, { "epoch": 0.9484647347687658, "grad_norm": 0.9571375163285176, "learning_rate": 1.0588494521914944e-06, "loss": 0.3829, "step": 17576 }, { "epoch": 0.9485186983972802, "grad_norm": 1.011596998126651, "learning_rate": 1.0587265404186327e-06, "loss": 0.4433, "step": 17577 }, { "epoch": 0.9485726620257946, "grad_norm": 1.4301156770818677, "learning_rate": 1.058603756292458e-06, "loss": 0.6002, "step": 17578 }, { "epoch": 0.948626625654309, "grad_norm": 1.0528021604406168, "learning_rate": 1.0584810998164994e-06, "loss": 0.4737, "step": 17579 }, { "epoch": 0.9486805892828234, "grad_norm": 1.1104296407052574, "learning_rate": 1.0583585709942815e-06, "loss": 0.524, "step": 17580 }, { "epoch": 0.9487345529113378, "grad_norm": 0.8814833968636292, "learning_rate": 1.0582361698293258e-06, "loss": 0.3353, "step": 17581 }, { "epoch": 0.9487885165398522, "grad_norm": 1.1161010211198492, "learning_rate": 1.058113896325151e-06, "loss": 0.5981, "step": 17582 }, { "epoch": 0.9488424801683665, "grad_norm": 1.2077814905368793, "learning_rate": 1.0579917504852715e-06, "loss": 0.5307, "step": 17583 }, { "epoch": 0.9488964437968809, "grad_norm": 1.2096194317661646, "learning_rate": 1.057869732313197e-06, "loss": 0.5494, "step": 17584 }, { "epoch": 0.9489504074253953, "grad_norm": 0.9647337152071195, "learning_rate": 1.057747841812435e-06, "loss": 0.3488, "step": 17585 }, { "epoch": 0.9490043710539097, "grad_norm": 1.1277414698847856, "learning_rate": 1.0576260789864887e-06, "loss": 0.5059, "step": 17586 }, { "epoch": 0.9490583346824241, "grad_norm": 1.2171344550142191, "learning_rate": 1.0575044438388571e-06, "loss": 0.5606, "step": 17587 }, { "epoch": 0.9491122983109385, "grad_norm": 1.1839853163796918, "learning_rate": 1.0573829363730367e-06, "loss": 0.6986, "step": 17588 }, { "epoch": 0.9491662619394529, "grad_norm": 1.1057264440704324, "learning_rate": 1.05726155659252e-06, "loss": 0.4344, "step": 17589 }, { "epoch": 0.9492202255679671, "grad_norm": 1.0053517225601263, "learning_rate": 1.0571403045007948e-06, "loss": 0.4158, "step": 17590 }, { "epoch": 0.9492741891964815, "grad_norm": 0.9632841273201004, "learning_rate": 1.0570191801013466e-06, "loss": 0.5073, "step": 17591 }, { "epoch": 0.9493281528249959, "grad_norm": 0.8528989255233609, "learning_rate": 1.0568981833976566e-06, "loss": 0.3174, "step": 17592 }, { "epoch": 0.9493821164535103, "grad_norm": 1.0992333611789769, "learning_rate": 1.0567773143932019e-06, "loss": 0.5261, "step": 17593 }, { "epoch": 0.9494360800820247, "grad_norm": 0.9180169721633795, "learning_rate": 1.0566565730914572e-06, "loss": 0.3734, "step": 17594 }, { "epoch": 0.9494900437105391, "grad_norm": 1.05186056575915, "learning_rate": 1.0565359594958921e-06, "loss": 0.4177, "step": 17595 }, { "epoch": 0.9495440073390535, "grad_norm": 1.0898070464731402, "learning_rate": 1.0564154736099733e-06, "loss": 0.4927, "step": 17596 }, { "epoch": 0.9495979709675678, "grad_norm": 0.8980480215430352, "learning_rate": 1.0562951154371636e-06, "loss": 0.3933, "step": 17597 }, { "epoch": 0.9496519345960822, "grad_norm": 1.0357125731852825, "learning_rate": 1.0561748849809227e-06, "loss": 0.4825, "step": 17598 }, { "epoch": 0.9497058982245966, "grad_norm": 1.3026461077253615, "learning_rate": 1.0560547822447053e-06, "loss": 0.6274, "step": 17599 }, { "epoch": 0.949759861853111, "grad_norm": 1.060659815126623, "learning_rate": 1.0559348072319639e-06, "loss": 0.4234, "step": 17600 }, { "epoch": 0.9498138254816254, "grad_norm": 0.9206066763629969, "learning_rate": 1.0558149599461462e-06, "loss": 0.4403, "step": 17601 }, { "epoch": 0.9498677891101398, "grad_norm": 0.9553472412905634, "learning_rate": 1.0556952403906978e-06, "loss": 0.3699, "step": 17602 }, { "epoch": 0.9499217527386542, "grad_norm": 1.011732583421941, "learning_rate": 1.055575648569058e-06, "loss": 0.3778, "step": 17603 }, { "epoch": 0.9499757163671685, "grad_norm": 0.9875932311424257, "learning_rate": 1.0554561844846657e-06, "loss": 0.3969, "step": 17604 }, { "epoch": 0.9500296799956829, "grad_norm": 1.03024449031879, "learning_rate": 1.0553368481409528e-06, "loss": 0.3203, "step": 17605 }, { "epoch": 0.9500836436241973, "grad_norm": 1.1260197738062632, "learning_rate": 1.0552176395413502e-06, "loss": 0.4982, "step": 17606 }, { "epoch": 0.9501376072527117, "grad_norm": 1.0769001657135813, "learning_rate": 1.0550985586892838e-06, "loss": 0.5221, "step": 17607 }, { "epoch": 0.9501915708812261, "grad_norm": 1.0292782108295273, "learning_rate": 1.054979605588176e-06, "loss": 0.3786, "step": 17608 }, { "epoch": 0.9502455345097405, "grad_norm": 1.0263272246545248, "learning_rate": 1.0548607802414458e-06, "loss": 0.4202, "step": 17609 }, { "epoch": 0.9502994981382548, "grad_norm": 1.0199168901497793, "learning_rate": 1.0547420826525085e-06, "loss": 0.4217, "step": 17610 }, { "epoch": 0.9503534617667692, "grad_norm": 1.0903832026451736, "learning_rate": 1.0546235128247748e-06, "loss": 0.4922, "step": 17611 }, { "epoch": 0.9504074253952836, "grad_norm": 1.250600295550576, "learning_rate": 1.054505070761653e-06, "loss": 0.568, "step": 17612 }, { "epoch": 0.950461389023798, "grad_norm": 0.7999368929545015, "learning_rate": 1.054386756466548e-06, "loss": 0.2575, "step": 17613 }, { "epoch": 0.9505153526523124, "grad_norm": 1.0003526289890181, "learning_rate": 1.0542685699428593e-06, "loss": 0.3298, "step": 17614 }, { "epoch": 0.9505693162808267, "grad_norm": 1.2882564006627486, "learning_rate": 1.0541505111939832e-06, "loss": 0.6094, "step": 17615 }, { "epoch": 0.9506232799093411, "grad_norm": 0.8942514990971155, "learning_rate": 1.0540325802233143e-06, "loss": 0.3076, "step": 17616 }, { "epoch": 0.9506772435378554, "grad_norm": 0.8664887793453793, "learning_rate": 1.0539147770342416e-06, "loss": 0.4171, "step": 17617 }, { "epoch": 0.9507312071663698, "grad_norm": 1.0380235114048717, "learning_rate": 1.0537971016301507e-06, "loss": 0.3839, "step": 17618 }, { "epoch": 0.9507851707948842, "grad_norm": 1.1934669675639038, "learning_rate": 1.0536795540144232e-06, "loss": 0.6089, "step": 17619 }, { "epoch": 0.9508391344233986, "grad_norm": 1.119083941449759, "learning_rate": 1.053562134190438e-06, "loss": 0.5283, "step": 17620 }, { "epoch": 0.950893098051913, "grad_norm": 1.0166359927264608, "learning_rate": 1.0534448421615702e-06, "loss": 0.4177, "step": 17621 }, { "epoch": 0.9509470616804274, "grad_norm": 1.121585250181695, "learning_rate": 1.0533276779311906e-06, "loss": 0.4544, "step": 17622 }, { "epoch": 0.9510010253089418, "grad_norm": 1.1036883059044593, "learning_rate": 1.053210641502666e-06, "loss": 0.5292, "step": 17623 }, { "epoch": 0.9510549889374561, "grad_norm": 0.9160297024000854, "learning_rate": 1.0530937328793615e-06, "loss": 0.3979, "step": 17624 }, { "epoch": 0.9511089525659705, "grad_norm": 1.1817338968566953, "learning_rate": 1.052976952064636e-06, "loss": 0.5171, "step": 17625 }, { "epoch": 0.9511629161944849, "grad_norm": 1.008804934363091, "learning_rate": 1.0528602990618459e-06, "loss": 0.5387, "step": 17626 }, { "epoch": 0.9512168798229993, "grad_norm": 1.0010144745255987, "learning_rate": 1.0527437738743444e-06, "loss": 0.4229, "step": 17627 }, { "epoch": 0.9512708434515137, "grad_norm": 0.9309813156841535, "learning_rate": 1.052627376505481e-06, "loss": 0.3715, "step": 17628 }, { "epoch": 0.9513248070800281, "grad_norm": 1.0531797435876542, "learning_rate": 1.0525111069586002e-06, "loss": 0.6202, "step": 17629 }, { "epoch": 0.9513787707085425, "grad_norm": 0.889345664946669, "learning_rate": 1.052394965237044e-06, "loss": 0.3591, "step": 17630 }, { "epoch": 0.9514327343370568, "grad_norm": 1.1066690737170983, "learning_rate": 1.0522789513441508e-06, "loss": 0.3865, "step": 17631 }, { "epoch": 0.9514866979655712, "grad_norm": 1.107699108375548, "learning_rate": 1.0521630652832542e-06, "loss": 0.554, "step": 17632 }, { "epoch": 0.9515406615940856, "grad_norm": 1.0309008815380227, "learning_rate": 1.0520473070576857e-06, "loss": 0.4469, "step": 17633 }, { "epoch": 0.9515946252226, "grad_norm": 1.1362166527863702, "learning_rate": 1.051931676670772e-06, "loss": 0.5731, "step": 17634 }, { "epoch": 0.9516485888511144, "grad_norm": 1.2212576989212656, "learning_rate": 1.051816174125836e-06, "loss": 0.5999, "step": 17635 }, { "epoch": 0.9517025524796288, "grad_norm": 1.1601667896209962, "learning_rate": 1.0517007994261974e-06, "loss": 0.5036, "step": 17636 }, { "epoch": 0.9517565161081432, "grad_norm": 0.8389435394866919, "learning_rate": 1.051585552575173e-06, "loss": 0.3647, "step": 17637 }, { "epoch": 0.9518104797366574, "grad_norm": 1.1036808014179826, "learning_rate": 1.0514704335760745e-06, "loss": 0.5584, "step": 17638 }, { "epoch": 0.9518644433651718, "grad_norm": 0.9016261404037019, "learning_rate": 1.0513554424322108e-06, "loss": 0.3668, "step": 17639 }, { "epoch": 0.9519184069936862, "grad_norm": 0.8962507866122025, "learning_rate": 1.0512405791468866e-06, "loss": 0.3721, "step": 17640 }, { "epoch": 0.9519723706222006, "grad_norm": 1.0335624806950634, "learning_rate": 1.051125843723403e-06, "loss": 0.4562, "step": 17641 }, { "epoch": 0.952026334250715, "grad_norm": 0.843583024126452, "learning_rate": 1.0510112361650584e-06, "loss": 0.3341, "step": 17642 }, { "epoch": 0.9520802978792294, "grad_norm": 1.0770652278479815, "learning_rate": 1.0508967564751457e-06, "loss": 0.3958, "step": 17643 }, { "epoch": 0.9521342615077438, "grad_norm": 0.9708368305725501, "learning_rate": 1.0507824046569563e-06, "loss": 0.3391, "step": 17644 }, { "epoch": 0.9521882251362581, "grad_norm": 0.9851122262730378, "learning_rate": 1.0506681807137762e-06, "loss": 0.4206, "step": 17645 }, { "epoch": 0.9522421887647725, "grad_norm": 0.9297507241168641, "learning_rate": 1.0505540846488883e-06, "loss": 0.4296, "step": 17646 }, { "epoch": 0.9522961523932869, "grad_norm": 1.0457631285836326, "learning_rate": 1.050440116465571e-06, "loss": 0.4153, "step": 17647 }, { "epoch": 0.9523501160218013, "grad_norm": 1.0753969675962822, "learning_rate": 1.0503262761671015e-06, "loss": 0.3985, "step": 17648 }, { "epoch": 0.9524040796503157, "grad_norm": 1.3357804711696608, "learning_rate": 1.0502125637567507e-06, "loss": 0.4159, "step": 17649 }, { "epoch": 0.9524580432788301, "grad_norm": 0.9555765359466383, "learning_rate": 1.0500989792377868e-06, "loss": 0.3315, "step": 17650 }, { "epoch": 0.9525120069073445, "grad_norm": 1.010547543904692, "learning_rate": 1.049985522613475e-06, "loss": 0.4438, "step": 17651 }, { "epoch": 0.9525659705358588, "grad_norm": 1.2242796264990534, "learning_rate": 1.0498721938870757e-06, "loss": 0.4773, "step": 17652 }, { "epoch": 0.9526199341643732, "grad_norm": 0.8741964857863869, "learning_rate": 1.0497589930618456e-06, "loss": 0.3859, "step": 17653 }, { "epoch": 0.9526738977928876, "grad_norm": 1.0402375372208164, "learning_rate": 1.049645920141039e-06, "loss": 0.4422, "step": 17654 }, { "epoch": 0.952727861421402, "grad_norm": 1.1431143254444935, "learning_rate": 1.0495329751279049e-06, "loss": 0.4153, "step": 17655 }, { "epoch": 0.9527818250499164, "grad_norm": 0.9032345684703901, "learning_rate": 1.0494201580256909e-06, "loss": 0.3631, "step": 17656 }, { "epoch": 0.9528357886784308, "grad_norm": 1.1045862970631781, "learning_rate": 1.0493074688376377e-06, "loss": 0.5038, "step": 17657 }, { "epoch": 0.9528897523069452, "grad_norm": 0.9820575398578418, "learning_rate": 1.049194907566985e-06, "loss": 0.5346, "step": 17658 }, { "epoch": 0.9529437159354595, "grad_norm": 1.3080387519678802, "learning_rate": 1.0490824742169681e-06, "loss": 0.6324, "step": 17659 }, { "epoch": 0.9529976795639739, "grad_norm": 1.165268554844112, "learning_rate": 1.0489701687908186e-06, "loss": 0.5677, "step": 17660 }, { "epoch": 0.9530516431924883, "grad_norm": 1.0662527186166062, "learning_rate": 1.0488579912917633e-06, "loss": 0.4551, "step": 17661 }, { "epoch": 0.9531056068210026, "grad_norm": 1.1187684012649728, "learning_rate": 1.0487459417230272e-06, "loss": 0.5619, "step": 17662 }, { "epoch": 0.953159570449517, "grad_norm": 1.2088529727759703, "learning_rate": 1.04863402008783e-06, "loss": 0.4408, "step": 17663 }, { "epoch": 0.9532135340780314, "grad_norm": 1.177533437145635, "learning_rate": 1.048522226389389e-06, "loss": 0.4022, "step": 17664 }, { "epoch": 0.9532674977065458, "grad_norm": 1.234571508720051, "learning_rate": 1.0484105606309173e-06, "loss": 0.5027, "step": 17665 }, { "epoch": 0.9533214613350601, "grad_norm": 1.091445740940015, "learning_rate": 1.0482990228156244e-06, "loss": 0.4347, "step": 17666 }, { "epoch": 0.9533754249635745, "grad_norm": 1.2553942688133366, "learning_rate": 1.0481876129467155e-06, "loss": 0.5557, "step": 17667 }, { "epoch": 0.9534293885920889, "grad_norm": 1.3443417619220825, "learning_rate": 1.0480763310273926e-06, "loss": 0.6185, "step": 17668 }, { "epoch": 0.9534833522206033, "grad_norm": 0.9731637448298938, "learning_rate": 1.0479651770608542e-06, "loss": 0.4857, "step": 17669 }, { "epoch": 0.9535373158491177, "grad_norm": 1.0115749730655552, "learning_rate": 1.0478541510502949e-06, "loss": 0.54, "step": 17670 }, { "epoch": 0.9535912794776321, "grad_norm": 1.0083989685724766, "learning_rate": 1.0477432529989063e-06, "loss": 0.3684, "step": 17671 }, { "epoch": 0.9536452431061465, "grad_norm": 0.990603171907023, "learning_rate": 1.0476324829098752e-06, "loss": 0.5231, "step": 17672 }, { "epoch": 0.9536992067346608, "grad_norm": 1.085726614527059, "learning_rate": 1.0475218407863856e-06, "loss": 0.43, "step": 17673 }, { "epoch": 0.9537531703631752, "grad_norm": 0.9610723731221705, "learning_rate": 1.0474113266316165e-06, "loss": 0.4765, "step": 17674 }, { "epoch": 0.9538071339916896, "grad_norm": 1.1290952107149033, "learning_rate": 1.0473009404487457e-06, "loss": 0.4295, "step": 17675 }, { "epoch": 0.953861097620204, "grad_norm": 0.9215067393297959, "learning_rate": 1.0471906822409442e-06, "loss": 0.4157, "step": 17676 }, { "epoch": 0.9539150612487184, "grad_norm": 1.0470756764388338, "learning_rate": 1.0470805520113819e-06, "loss": 0.4045, "step": 17677 }, { "epoch": 0.9539690248772328, "grad_norm": 1.0307174850827003, "learning_rate": 1.046970549763224e-06, "loss": 0.4437, "step": 17678 }, { "epoch": 0.9540229885057471, "grad_norm": 1.11061759362116, "learning_rate": 1.0468606754996326e-06, "loss": 0.4742, "step": 17679 }, { "epoch": 0.9540769521342615, "grad_norm": 1.114854173150465, "learning_rate": 1.046750929223764e-06, "loss": 0.6119, "step": 17680 }, { "epoch": 0.9541309157627759, "grad_norm": 0.9847414804928707, "learning_rate": 1.046641310938774e-06, "loss": 0.4232, "step": 17681 }, { "epoch": 0.9541848793912903, "grad_norm": 1.1109607203669796, "learning_rate": 1.0465318206478124e-06, "loss": 0.4372, "step": 17682 }, { "epoch": 0.9542388430198047, "grad_norm": 0.9582056349965231, "learning_rate": 1.0464224583540259e-06, "loss": 0.3866, "step": 17683 }, { "epoch": 0.9542928066483191, "grad_norm": 1.1201153569349884, "learning_rate": 1.0463132240605577e-06, "loss": 0.5326, "step": 17684 }, { "epoch": 0.9543467702768335, "grad_norm": 1.2324118633377332, "learning_rate": 1.0462041177705482e-06, "loss": 0.6902, "step": 17685 }, { "epoch": 0.9544007339053477, "grad_norm": 1.0798541694354742, "learning_rate": 1.0460951394871321e-06, "loss": 0.3508, "step": 17686 }, { "epoch": 0.9544546975338621, "grad_norm": 0.9359774790878951, "learning_rate": 1.0459862892134425e-06, "loss": 0.4451, "step": 17687 }, { "epoch": 0.9545086611623765, "grad_norm": 0.718418275040109, "learning_rate": 1.0458775669526073e-06, "loss": 0.2675, "step": 17688 }, { "epoch": 0.9545626247908909, "grad_norm": 0.9827792135680532, "learning_rate": 1.045768972707751e-06, "loss": 0.4183, "step": 17689 }, { "epoch": 0.9546165884194053, "grad_norm": 0.9902427001287208, "learning_rate": 1.0456605064819953e-06, "loss": 0.425, "step": 17690 }, { "epoch": 0.9546705520479197, "grad_norm": 1.0348500837253471, "learning_rate": 1.045552168278458e-06, "loss": 0.4558, "step": 17691 }, { "epoch": 0.9547245156764341, "grad_norm": 0.8575155310673214, "learning_rate": 1.0454439581002516e-06, "loss": 0.397, "step": 17692 }, { "epoch": 0.9547784793049484, "grad_norm": 1.4360496251926085, "learning_rate": 1.045335875950487e-06, "loss": 0.5174, "step": 17693 }, { "epoch": 0.9548324429334628, "grad_norm": 1.024975834218003, "learning_rate": 1.0452279218322704e-06, "loss": 0.4369, "step": 17694 }, { "epoch": 0.9548864065619772, "grad_norm": 1.092512806303529, "learning_rate": 1.0451200957487044e-06, "loss": 0.4264, "step": 17695 }, { "epoch": 0.9549403701904916, "grad_norm": 1.0611428554073434, "learning_rate": 1.0450123977028886e-06, "loss": 0.5135, "step": 17696 }, { "epoch": 0.954994333819006, "grad_norm": 0.9980648977585351, "learning_rate": 1.0449048276979174e-06, "loss": 0.3864, "step": 17697 }, { "epoch": 0.9550482974475204, "grad_norm": 0.9461468803910928, "learning_rate": 1.0447973857368834e-06, "loss": 0.3863, "step": 17698 }, { "epoch": 0.9551022610760348, "grad_norm": 1.0978084771973087, "learning_rate": 1.0446900718228743e-06, "loss": 0.483, "step": 17699 }, { "epoch": 0.9551562247045491, "grad_norm": 0.8978731375611855, "learning_rate": 1.0445828859589743e-06, "loss": 0.5017, "step": 17700 }, { "epoch": 0.9552101883330635, "grad_norm": 1.0979394112677254, "learning_rate": 1.0444758281482637e-06, "loss": 0.5262, "step": 17701 }, { "epoch": 0.9552641519615779, "grad_norm": 1.0616813898680568, "learning_rate": 1.0443688983938199e-06, "loss": 0.4212, "step": 17702 }, { "epoch": 0.9553181155900923, "grad_norm": 0.9891963158647209, "learning_rate": 1.044262096698716e-06, "loss": 0.508, "step": 17703 }, { "epoch": 0.9553720792186067, "grad_norm": 1.0020101319230459, "learning_rate": 1.0441554230660214e-06, "loss": 0.3942, "step": 17704 }, { "epoch": 0.9554260428471211, "grad_norm": 1.1869993088750936, "learning_rate": 1.0440488774988025e-06, "loss": 0.5137, "step": 17705 }, { "epoch": 0.9554800064756355, "grad_norm": 1.0442410427488011, "learning_rate": 1.0439424600001213e-06, "loss": 0.4338, "step": 17706 }, { "epoch": 0.9555339701041498, "grad_norm": 1.3410162127961305, "learning_rate": 1.0438361705730366e-06, "loss": 0.6366, "step": 17707 }, { "epoch": 0.9555879337326642, "grad_norm": 1.1380491493760103, "learning_rate": 1.0437300092206027e-06, "loss": 0.5098, "step": 17708 }, { "epoch": 0.9556418973611785, "grad_norm": 1.2099188914345962, "learning_rate": 1.0436239759458706e-06, "loss": 0.5391, "step": 17709 }, { "epoch": 0.955695860989693, "grad_norm": 1.030921659759116, "learning_rate": 1.0435180707518893e-06, "loss": 0.3893, "step": 17710 }, { "epoch": 0.9557498246182073, "grad_norm": 1.028493883766659, "learning_rate": 1.0434122936417008e-06, "loss": 0.4023, "step": 17711 }, { "epoch": 0.9558037882467217, "grad_norm": 1.139975423688594, "learning_rate": 1.0433066446183461e-06, "loss": 0.4774, "step": 17712 }, { "epoch": 0.9558577518752361, "grad_norm": 0.9959768846473165, "learning_rate": 1.0432011236848617e-06, "loss": 0.4092, "step": 17713 }, { "epoch": 0.9559117155037504, "grad_norm": 1.086792776682233, "learning_rate": 1.0430957308442803e-06, "loss": 0.413, "step": 17714 }, { "epoch": 0.9559656791322648, "grad_norm": 0.9659619943331393, "learning_rate": 1.0429904660996313e-06, "loss": 0.5156, "step": 17715 }, { "epoch": 0.9560196427607792, "grad_norm": 1.1205599090875018, "learning_rate": 1.0428853294539394e-06, "loss": 0.6102, "step": 17716 }, { "epoch": 0.9560736063892936, "grad_norm": 1.1462236769240362, "learning_rate": 1.0427803209102264e-06, "loss": 0.5216, "step": 17717 }, { "epoch": 0.956127570017808, "grad_norm": 0.9592046640094596, "learning_rate": 1.042675440471511e-06, "loss": 0.4165, "step": 17718 }, { "epoch": 0.9561815336463224, "grad_norm": 0.9212851544249544, "learning_rate": 1.0425706881408077e-06, "loss": 0.4389, "step": 17719 }, { "epoch": 0.9562354972748368, "grad_norm": 1.3534285352857323, "learning_rate": 1.042466063921126e-06, "loss": 0.5107, "step": 17720 }, { "epoch": 0.9562894609033511, "grad_norm": 0.8444942617126312, "learning_rate": 1.0423615678154742e-06, "loss": 0.4232, "step": 17721 }, { "epoch": 0.9563434245318655, "grad_norm": 0.978843321585457, "learning_rate": 1.042257199826855e-06, "loss": 0.4703, "step": 17722 }, { "epoch": 0.9563973881603799, "grad_norm": 1.0406275146846216, "learning_rate": 1.0421529599582675e-06, "loss": 0.433, "step": 17723 }, { "epoch": 0.9564513517888943, "grad_norm": 1.077547345639178, "learning_rate": 1.0420488482127087e-06, "loss": 0.4628, "step": 17724 }, { "epoch": 0.9565053154174087, "grad_norm": 1.0770319302788138, "learning_rate": 1.0419448645931704e-06, "loss": 0.4472, "step": 17725 }, { "epoch": 0.9565592790459231, "grad_norm": 1.2328458454538282, "learning_rate": 1.0418410091026415e-06, "loss": 0.5473, "step": 17726 }, { "epoch": 0.9566132426744375, "grad_norm": 0.8348238701452746, "learning_rate": 1.0417372817441059e-06, "loss": 0.3618, "step": 17727 }, { "epoch": 0.9566672063029518, "grad_norm": 1.6358276578772126, "learning_rate": 1.0416336825205465e-06, "loss": 0.4475, "step": 17728 }, { "epoch": 0.9567211699314662, "grad_norm": 0.8473469149249803, "learning_rate": 1.0415302114349395e-06, "loss": 0.4059, "step": 17729 }, { "epoch": 0.9567751335599806, "grad_norm": 1.0164194582845456, "learning_rate": 1.0414268684902588e-06, "loss": 0.4518, "step": 17730 }, { "epoch": 0.956829097188495, "grad_norm": 1.015227693201472, "learning_rate": 1.0413236536894755e-06, "loss": 0.3668, "step": 17731 }, { "epoch": 0.9568830608170094, "grad_norm": 0.8883798306481594, "learning_rate": 1.0412205670355555e-06, "loss": 0.4853, "step": 17732 }, { "epoch": 0.9569370244455238, "grad_norm": 0.929913013414162, "learning_rate": 1.041117608531461e-06, "loss": 0.3863, "step": 17733 }, { "epoch": 0.9569909880740382, "grad_norm": 0.9269784530881331, "learning_rate": 1.0410147781801526e-06, "loss": 0.3679, "step": 17734 }, { "epoch": 0.9570449517025524, "grad_norm": 1.0222152220900902, "learning_rate": 1.0409120759845847e-06, "loss": 0.5078, "step": 17735 }, { "epoch": 0.9570989153310668, "grad_norm": 0.896020226354129, "learning_rate": 1.0408095019477093e-06, "loss": 0.405, "step": 17736 }, { "epoch": 0.9571528789595812, "grad_norm": 1.1147115781505392, "learning_rate": 1.0407070560724743e-06, "loss": 0.5496, "step": 17737 }, { "epoch": 0.9572068425880956, "grad_norm": 1.249905312818438, "learning_rate": 1.0406047383618248e-06, "loss": 0.4694, "step": 17738 }, { "epoch": 0.95726080621661, "grad_norm": 1.2587793953450306, "learning_rate": 1.0405025488187003e-06, "loss": 0.4567, "step": 17739 }, { "epoch": 0.9573147698451244, "grad_norm": 0.95219212579897, "learning_rate": 1.040400487446039e-06, "loss": 0.3957, "step": 17740 }, { "epoch": 0.9573687334736388, "grad_norm": 1.0338858317263628, "learning_rate": 1.0402985542467738e-06, "loss": 0.4931, "step": 17741 }, { "epoch": 0.9574226971021531, "grad_norm": 0.975737406563452, "learning_rate": 1.040196749223834e-06, "loss": 0.5264, "step": 17742 }, { "epoch": 0.9574766607306675, "grad_norm": 0.9314578463781079, "learning_rate": 1.0400950723801464e-06, "loss": 0.3643, "step": 17743 }, { "epoch": 0.9575306243591819, "grad_norm": 1.2187869295547722, "learning_rate": 1.0399935237186322e-06, "loss": 0.7999, "step": 17744 }, { "epoch": 0.9575845879876963, "grad_norm": 0.970237359790173, "learning_rate": 1.039892103242211e-06, "loss": 0.3741, "step": 17745 }, { "epoch": 0.9576385516162107, "grad_norm": 1.0040419361998687, "learning_rate": 1.0397908109537977e-06, "loss": 0.4514, "step": 17746 }, { "epoch": 0.9576925152447251, "grad_norm": 1.072397400898371, "learning_rate": 1.0396896468563026e-06, "loss": 0.4617, "step": 17747 }, { "epoch": 0.9577464788732394, "grad_norm": 1.1114026267859385, "learning_rate": 1.0395886109526346e-06, "loss": 0.4701, "step": 17748 }, { "epoch": 0.9578004425017538, "grad_norm": 0.86983322205349, "learning_rate": 1.0394877032456962e-06, "loss": 0.3734, "step": 17749 }, { "epoch": 0.9578544061302682, "grad_norm": 0.9360601557914175, "learning_rate": 1.0393869237383885e-06, "loss": 0.4767, "step": 17750 }, { "epoch": 0.9579083697587826, "grad_norm": 1.104363051885875, "learning_rate": 1.0392862724336077e-06, "loss": 0.5442, "step": 17751 }, { "epoch": 0.957962333387297, "grad_norm": 1.2150470593995297, "learning_rate": 1.0391857493342466e-06, "loss": 0.5096, "step": 17752 }, { "epoch": 0.9580162970158114, "grad_norm": 1.2235229531774363, "learning_rate": 1.0390853544431943e-06, "loss": 0.5891, "step": 17753 }, { "epoch": 0.9580702606443258, "grad_norm": 1.0840329855731063, "learning_rate": 1.0389850877633363e-06, "loss": 0.4794, "step": 17754 }, { "epoch": 0.95812422427284, "grad_norm": 0.9712401167125208, "learning_rate": 1.0388849492975549e-06, "loss": 0.5553, "step": 17755 }, { "epoch": 0.9581781879013545, "grad_norm": 0.8535542663321446, "learning_rate": 1.0387849390487275e-06, "loss": 0.333, "step": 17756 }, { "epoch": 0.9582321515298688, "grad_norm": 1.1196008375013198, "learning_rate": 1.0386850570197283e-06, "loss": 0.3848, "step": 17757 }, { "epoch": 0.9582861151583832, "grad_norm": 1.0148381817806675, "learning_rate": 1.0385853032134286e-06, "loss": 0.5283, "step": 17758 }, { "epoch": 0.9583400787868976, "grad_norm": 1.4057983999056316, "learning_rate": 1.0384856776326954e-06, "loss": 0.3568, "step": 17759 }, { "epoch": 0.958394042415412, "grad_norm": 1.0817293548021443, "learning_rate": 1.0383861802803913e-06, "loss": 0.408, "step": 17760 }, { "epoch": 0.9584480060439264, "grad_norm": 1.0885837367144853, "learning_rate": 1.0382868111593773e-06, "loss": 0.5009, "step": 17761 }, { "epoch": 0.9585019696724407, "grad_norm": 0.9030553297122709, "learning_rate": 1.0381875702725084e-06, "loss": 0.3447, "step": 17762 }, { "epoch": 0.9585559333009551, "grad_norm": 1.0400000936401956, "learning_rate": 1.0380884576226366e-06, "loss": 0.5068, "step": 17763 }, { "epoch": 0.9586098969294695, "grad_norm": 0.8792210321934569, "learning_rate": 1.037989473212611e-06, "loss": 0.3478, "step": 17764 }, { "epoch": 0.9586638605579839, "grad_norm": 1.2571053756366484, "learning_rate": 1.0378906170452769e-06, "loss": 0.5545, "step": 17765 }, { "epoch": 0.9587178241864983, "grad_norm": 0.9786951632352334, "learning_rate": 1.0377918891234745e-06, "loss": 0.4642, "step": 17766 }, { "epoch": 0.9587717878150127, "grad_norm": 0.9267528113855232, "learning_rate": 1.0376932894500425e-06, "loss": 0.3882, "step": 17767 }, { "epoch": 0.9588257514435271, "grad_norm": 1.0571608441531606, "learning_rate": 1.0375948180278139e-06, "loss": 0.4726, "step": 17768 }, { "epoch": 0.9588797150720414, "grad_norm": 1.0253784562121409, "learning_rate": 1.0374964748596194e-06, "loss": 0.4982, "step": 17769 }, { "epoch": 0.9589336787005558, "grad_norm": 1.138370982693256, "learning_rate": 1.037398259948285e-06, "loss": 0.4369, "step": 17770 }, { "epoch": 0.9589876423290702, "grad_norm": 1.2362092049183677, "learning_rate": 1.0373001732966336e-06, "loss": 0.5716, "step": 17771 }, { "epoch": 0.9590416059575846, "grad_norm": 0.9131615057677653, "learning_rate": 1.037202214907485e-06, "loss": 0.3085, "step": 17772 }, { "epoch": 0.959095569586099, "grad_norm": 1.2377854372747885, "learning_rate": 1.0371043847836535e-06, "loss": 0.4293, "step": 17773 }, { "epoch": 0.9591495332146134, "grad_norm": 0.8502394748359379, "learning_rate": 1.0370066829279518e-06, "loss": 0.4171, "step": 17774 }, { "epoch": 0.9592034968431278, "grad_norm": 1.1054012230251269, "learning_rate": 1.0369091093431873e-06, "loss": 0.5485, "step": 17775 }, { "epoch": 0.9592574604716421, "grad_norm": 1.0173493943618388, "learning_rate": 1.036811664032165e-06, "loss": 0.4111, "step": 17776 }, { "epoch": 0.9593114241001565, "grad_norm": 0.9556756036768218, "learning_rate": 1.036714346997685e-06, "loss": 0.5103, "step": 17777 }, { "epoch": 0.9593653877286709, "grad_norm": 1.1293090632549063, "learning_rate": 1.0366171582425444e-06, "loss": 0.5426, "step": 17778 }, { "epoch": 0.9594193513571853, "grad_norm": 0.9558760919447268, "learning_rate": 1.0365200977695365e-06, "loss": 0.3854, "step": 17779 }, { "epoch": 0.9594733149856997, "grad_norm": 1.3165901407414835, "learning_rate": 1.0364231655814513e-06, "loss": 0.5533, "step": 17780 }, { "epoch": 0.959527278614214, "grad_norm": 1.0784371606843117, "learning_rate": 1.0363263616810742e-06, "loss": 0.3897, "step": 17781 }, { "epoch": 0.9595812422427284, "grad_norm": 0.9843913026337026, "learning_rate": 1.0362296860711879e-06, "loss": 0.3492, "step": 17782 }, { "epoch": 0.9596352058712427, "grad_norm": 0.961559767168133, "learning_rate": 1.0361331387545707e-06, "loss": 0.3924, "step": 17783 }, { "epoch": 0.9596891694997571, "grad_norm": 1.1283605172314912, "learning_rate": 1.036036719733997e-06, "loss": 0.3911, "step": 17784 }, { "epoch": 0.9597431331282715, "grad_norm": 1.1232262417522745, "learning_rate": 1.0359404290122391e-06, "loss": 0.6024, "step": 17785 }, { "epoch": 0.9597970967567859, "grad_norm": 0.936757241373038, "learning_rate": 1.0358442665920637e-06, "loss": 0.3737, "step": 17786 }, { "epoch": 0.9598510603853003, "grad_norm": 0.9288426980696525, "learning_rate": 1.0357482324762347e-06, "loss": 0.4834, "step": 17787 }, { "epoch": 0.9599050240138147, "grad_norm": 0.9108370126880697, "learning_rate": 1.0356523266675119e-06, "loss": 0.4213, "step": 17788 }, { "epoch": 0.9599589876423291, "grad_norm": 0.9650317172733208, "learning_rate": 1.0355565491686526e-06, "loss": 0.4192, "step": 17789 }, { "epoch": 0.9600129512708434, "grad_norm": 1.2219971105843666, "learning_rate": 1.0354608999824092e-06, "loss": 0.6119, "step": 17790 }, { "epoch": 0.9600669148993578, "grad_norm": 1.0789889265466652, "learning_rate": 1.0353653791115303e-06, "loss": 0.5469, "step": 17791 }, { "epoch": 0.9601208785278722, "grad_norm": 0.8035186573370074, "learning_rate": 1.0352699865587618e-06, "loss": 0.4639, "step": 17792 }, { "epoch": 0.9601748421563866, "grad_norm": 0.9584016640655257, "learning_rate": 1.0351747223268449e-06, "loss": 0.4382, "step": 17793 }, { "epoch": 0.960228805784901, "grad_norm": 1.0085559776137847, "learning_rate": 1.035079586418518e-06, "loss": 0.3836, "step": 17794 }, { "epoch": 0.9602827694134154, "grad_norm": 1.13162926891348, "learning_rate": 1.0349845788365153e-06, "loss": 0.439, "step": 17795 }, { "epoch": 0.9603367330419298, "grad_norm": 1.0277144240689935, "learning_rate": 1.0348896995835676e-06, "loss": 0.3953, "step": 17796 }, { "epoch": 0.9603906966704441, "grad_norm": 0.9725216861478407, "learning_rate": 1.0347949486624017e-06, "loss": 0.5384, "step": 17797 }, { "epoch": 0.9604446602989585, "grad_norm": 1.1687313052463928, "learning_rate": 1.03470032607574e-06, "loss": 0.5086, "step": 17798 }, { "epoch": 0.9604986239274729, "grad_norm": 0.9307316422153171, "learning_rate": 1.0346058318263034e-06, "loss": 0.4179, "step": 17799 }, { "epoch": 0.9605525875559873, "grad_norm": 0.8951515470042894, "learning_rate": 1.0345114659168068e-06, "loss": 0.3346, "step": 17800 }, { "epoch": 0.9606065511845017, "grad_norm": 0.8456784465567858, "learning_rate": 1.034417228349963e-06, "loss": 0.4632, "step": 17801 }, { "epoch": 0.9606605148130161, "grad_norm": 1.0938200510937397, "learning_rate": 1.0343231191284798e-06, "loss": 0.4015, "step": 17802 }, { "epoch": 0.9607144784415305, "grad_norm": 1.1273739224444321, "learning_rate": 1.0342291382550626e-06, "loss": 0.5294, "step": 17803 }, { "epoch": 0.9607684420700447, "grad_norm": 0.9337108271471135, "learning_rate": 1.0341352857324128e-06, "loss": 0.3409, "step": 17804 }, { "epoch": 0.9608224056985591, "grad_norm": 1.1945625392753083, "learning_rate": 1.0340415615632266e-06, "loss": 0.4398, "step": 17805 }, { "epoch": 0.9608763693270735, "grad_norm": 1.0412908117891129, "learning_rate": 1.0339479657501987e-06, "loss": 0.4362, "step": 17806 }, { "epoch": 0.9609303329555879, "grad_norm": 1.2821620271787078, "learning_rate": 1.0338544982960188e-06, "loss": 0.4057, "step": 17807 }, { "epoch": 0.9609842965841023, "grad_norm": 1.1469856592848473, "learning_rate": 1.0337611592033732e-06, "loss": 0.5388, "step": 17808 }, { "epoch": 0.9610382602126167, "grad_norm": 0.9771851589078671, "learning_rate": 1.033667948474945e-06, "loss": 0.4011, "step": 17809 }, { "epoch": 0.9610922238411311, "grad_norm": 1.1093452589481345, "learning_rate": 1.0335748661134129e-06, "loss": 0.4837, "step": 17810 }, { "epoch": 0.9611461874696454, "grad_norm": 1.1499146791759003, "learning_rate": 1.0334819121214524e-06, "loss": 0.4578, "step": 17811 }, { "epoch": 0.9612001510981598, "grad_norm": 0.9174664721278442, "learning_rate": 1.033389086501734e-06, "loss": 0.3225, "step": 17812 }, { "epoch": 0.9612541147266742, "grad_norm": 1.148450469967518, "learning_rate": 1.033296389256927e-06, "loss": 0.5432, "step": 17813 }, { "epoch": 0.9613080783551886, "grad_norm": 1.1224472497300408, "learning_rate": 1.0332038203896949e-06, "loss": 0.4559, "step": 17814 }, { "epoch": 0.961362041983703, "grad_norm": 1.032105816573149, "learning_rate": 1.0331113799026982e-06, "loss": 0.4058, "step": 17815 }, { "epoch": 0.9614160056122174, "grad_norm": 1.17900329558276, "learning_rate": 1.033019067798594e-06, "loss": 0.5411, "step": 17816 }, { "epoch": 0.9614699692407317, "grad_norm": 1.462501033840754, "learning_rate": 1.0329268840800361e-06, "loss": 0.5191, "step": 17817 }, { "epoch": 0.9615239328692461, "grad_norm": 1.0432674034473248, "learning_rate": 1.032834828749672e-06, "loss": 0.6402, "step": 17818 }, { "epoch": 0.9615778964977605, "grad_norm": 1.0687312478938558, "learning_rate": 1.0327429018101495e-06, "loss": 0.4453, "step": 17819 }, { "epoch": 0.9616318601262749, "grad_norm": 0.8943684571218878, "learning_rate": 1.0326511032641094e-06, "loss": 0.4535, "step": 17820 }, { "epoch": 0.9616858237547893, "grad_norm": 0.8181611872106016, "learning_rate": 1.0325594331141902e-06, "loss": 0.3733, "step": 17821 }, { "epoch": 0.9617397873833037, "grad_norm": 1.0347529754775624, "learning_rate": 1.0324678913630275e-06, "loss": 0.2954, "step": 17822 }, { "epoch": 0.9617937510118181, "grad_norm": 0.8041383947890971, "learning_rate": 1.0323764780132517e-06, "loss": 0.3668, "step": 17823 }, { "epoch": 0.9618477146403324, "grad_norm": 1.2354958407611445, "learning_rate": 1.0322851930674903e-06, "loss": 0.6399, "step": 17824 }, { "epoch": 0.9619016782688468, "grad_norm": 0.9428310295754792, "learning_rate": 1.0321940365283662e-06, "loss": 0.4399, "step": 17825 }, { "epoch": 0.9619556418973612, "grad_norm": 0.9852785550982763, "learning_rate": 1.0321030083985004e-06, "loss": 0.4506, "step": 17826 }, { "epoch": 0.9620096055258756, "grad_norm": 1.018632599472547, "learning_rate": 1.0320121086805083e-06, "loss": 0.4259, "step": 17827 }, { "epoch": 0.96206356915439, "grad_norm": 0.8843087021489069, "learning_rate": 1.0319213373770025e-06, "loss": 0.3149, "step": 17828 }, { "epoch": 0.9621175327829043, "grad_norm": 1.0436087781426167, "learning_rate": 1.0318306944905923e-06, "loss": 0.5354, "step": 17829 }, { "epoch": 0.9621714964114187, "grad_norm": 1.2437877744021688, "learning_rate": 1.031740180023883e-06, "loss": 0.5098, "step": 17830 }, { "epoch": 0.962225460039933, "grad_norm": 1.1825161284636074, "learning_rate": 1.031649793979476e-06, "loss": 0.4955, "step": 17831 }, { "epoch": 0.9622794236684474, "grad_norm": 0.9987501100771807, "learning_rate": 1.0315595363599682e-06, "loss": 0.4037, "step": 17832 }, { "epoch": 0.9623333872969618, "grad_norm": 1.1868750833476218, "learning_rate": 1.0314694071679546e-06, "loss": 0.525, "step": 17833 }, { "epoch": 0.9623873509254762, "grad_norm": 0.9129871513173932, "learning_rate": 1.0313794064060253e-06, "loss": 0.3429, "step": 17834 }, { "epoch": 0.9624413145539906, "grad_norm": 0.8597746158332128, "learning_rate": 1.0312895340767674e-06, "loss": 0.398, "step": 17835 }, { "epoch": 0.962495278182505, "grad_norm": 1.190475073824763, "learning_rate": 1.031199790182763e-06, "loss": 0.4711, "step": 17836 }, { "epoch": 0.9625492418110194, "grad_norm": 0.9272317329151514, "learning_rate": 1.0311101747265927e-06, "loss": 0.3592, "step": 17837 }, { "epoch": 0.9626032054395337, "grad_norm": 0.8753608076286427, "learning_rate": 1.0310206877108308e-06, "loss": 0.3308, "step": 17838 }, { "epoch": 0.9626571690680481, "grad_norm": 0.9922325850964324, "learning_rate": 1.0309313291380507e-06, "loss": 0.4727, "step": 17839 }, { "epoch": 0.9627111326965625, "grad_norm": 1.1794026089140381, "learning_rate": 1.0308420990108194e-06, "loss": 0.6268, "step": 17840 }, { "epoch": 0.9627650963250769, "grad_norm": 1.0426036482956351, "learning_rate": 1.0307529973317021e-06, "loss": 0.3908, "step": 17841 }, { "epoch": 0.9628190599535913, "grad_norm": 0.9439085166138109, "learning_rate": 1.0306640241032591e-06, "loss": 0.4145, "step": 17842 }, { "epoch": 0.9628730235821057, "grad_norm": 1.002191734151182, "learning_rate": 1.030575179328048e-06, "loss": 0.5489, "step": 17843 }, { "epoch": 0.9629269872106201, "grad_norm": 0.8320335958375719, "learning_rate": 1.0304864630086228e-06, "loss": 0.2349, "step": 17844 }, { "epoch": 0.9629809508391344, "grad_norm": 0.956815211118935, "learning_rate": 1.0303978751475323e-06, "loss": 0.4539, "step": 17845 }, { "epoch": 0.9630349144676488, "grad_norm": 1.0717500178554453, "learning_rate": 1.030309415747323e-06, "loss": 0.3612, "step": 17846 }, { "epoch": 0.9630888780961632, "grad_norm": 0.8863490072070994, "learning_rate": 1.0302210848105376e-06, "loss": 0.2537, "step": 17847 }, { "epoch": 0.9631428417246776, "grad_norm": 1.0623019564729872, "learning_rate": 1.0301328823397146e-06, "loss": 0.7749, "step": 17848 }, { "epoch": 0.963196805353192, "grad_norm": 0.8674116118255991, "learning_rate": 1.0300448083373893e-06, "loss": 0.3717, "step": 17849 }, { "epoch": 0.9632507689817064, "grad_norm": 0.7822389675961525, "learning_rate": 1.0299568628060923e-06, "loss": 0.3227, "step": 17850 }, { "epoch": 0.9633047326102208, "grad_norm": 1.1865948109257531, "learning_rate": 1.0298690457483518e-06, "loss": 0.4439, "step": 17851 }, { "epoch": 0.963358696238735, "grad_norm": 0.9856340931394383, "learning_rate": 1.029781357166692e-06, "loss": 0.4523, "step": 17852 }, { "epoch": 0.9634126598672494, "grad_norm": 0.8895006942207564, "learning_rate": 1.0296937970636328e-06, "loss": 0.2738, "step": 17853 }, { "epoch": 0.9634666234957638, "grad_norm": 1.0641206274440027, "learning_rate": 1.0296063654416904e-06, "loss": 0.4292, "step": 17854 }, { "epoch": 0.9635205871242782, "grad_norm": 0.9297197545576139, "learning_rate": 1.0295190623033784e-06, "loss": 0.4333, "step": 17855 }, { "epoch": 0.9635745507527926, "grad_norm": 1.085500276263674, "learning_rate": 1.0294318876512055e-06, "loss": 0.5555, "step": 17856 }, { "epoch": 0.963628514381307, "grad_norm": 1.1678778031458226, "learning_rate": 1.0293448414876773e-06, "loss": 0.4371, "step": 17857 }, { "epoch": 0.9636824780098214, "grad_norm": 1.0212188194120606, "learning_rate": 1.0292579238152952e-06, "loss": 0.3865, "step": 17858 }, { "epoch": 0.9637364416383357, "grad_norm": 1.0924321840487399, "learning_rate": 1.0291711346365585e-06, "loss": 0.3798, "step": 17859 }, { "epoch": 0.9637904052668501, "grad_norm": 1.164620020855037, "learning_rate": 1.0290844739539606e-06, "loss": 0.4551, "step": 17860 }, { "epoch": 0.9638443688953645, "grad_norm": 1.0276925261270873, "learning_rate": 1.0289979417699924e-06, "loss": 0.3858, "step": 17861 }, { "epoch": 0.9638983325238789, "grad_norm": 0.9567818566884306, "learning_rate": 1.028911538087141e-06, "loss": 0.4068, "step": 17862 }, { "epoch": 0.9639522961523933, "grad_norm": 1.038829230738043, "learning_rate": 1.0288252629078897e-06, "loss": 0.3733, "step": 17863 }, { "epoch": 0.9640062597809077, "grad_norm": 1.0274048190455871, "learning_rate": 1.0287391162347177e-06, "loss": 0.4476, "step": 17864 }, { "epoch": 0.9640602234094221, "grad_norm": 1.1132675243969998, "learning_rate": 1.0286530980701018e-06, "loss": 0.5052, "step": 17865 }, { "epoch": 0.9641141870379364, "grad_norm": 1.0015813628045236, "learning_rate": 1.028567208416514e-06, "loss": 0.3708, "step": 17866 }, { "epoch": 0.9641681506664508, "grad_norm": 1.1448434152323455, "learning_rate": 1.0284814472764225e-06, "loss": 0.4733, "step": 17867 }, { "epoch": 0.9642221142949652, "grad_norm": 0.8336423099155317, "learning_rate": 1.0283958146522925e-06, "loss": 0.3679, "step": 17868 }, { "epoch": 0.9642760779234796, "grad_norm": 0.9625165106633883, "learning_rate": 1.0283103105465847e-06, "loss": 0.4152, "step": 17869 }, { "epoch": 0.964330041551994, "grad_norm": 1.0651891545157988, "learning_rate": 1.028224934961757e-06, "loss": 0.5732, "step": 17870 }, { "epoch": 0.9643840051805084, "grad_norm": 0.9171351378608484, "learning_rate": 1.028139687900263e-06, "loss": 0.4724, "step": 17871 }, { "epoch": 0.9644379688090228, "grad_norm": 1.0078628063634751, "learning_rate": 1.0280545693645528e-06, "loss": 0.5491, "step": 17872 }, { "epoch": 0.9644919324375371, "grad_norm": 0.8899626469964094, "learning_rate": 1.0279695793570731e-06, "loss": 0.297, "step": 17873 }, { "epoch": 0.9645458960660515, "grad_norm": 1.2555265010507757, "learning_rate": 1.0278847178802664e-06, "loss": 0.4736, "step": 17874 }, { "epoch": 0.9645998596945659, "grad_norm": 1.0857085744545776, "learning_rate": 1.0277999849365714e-06, "loss": 0.3695, "step": 17875 }, { "epoch": 0.9646538233230803, "grad_norm": 0.929586095032255, "learning_rate": 1.0277153805284236e-06, "loss": 0.3793, "step": 17876 }, { "epoch": 0.9647077869515946, "grad_norm": 0.8241015551057623, "learning_rate": 1.0276309046582545e-06, "loss": 0.375, "step": 17877 }, { "epoch": 0.964761750580109, "grad_norm": 0.7523499404912642, "learning_rate": 1.0275465573284928e-06, "loss": 0.2889, "step": 17878 }, { "epoch": 0.9648157142086234, "grad_norm": 0.9765269084361601, "learning_rate": 1.0274623385415616e-06, "loss": 0.4251, "step": 17879 }, { "epoch": 0.9648696778371377, "grad_norm": 1.0172661525891928, "learning_rate": 1.0273782482998827e-06, "loss": 0.4331, "step": 17880 }, { "epoch": 0.9649236414656521, "grad_norm": 1.0108747618632523, "learning_rate": 1.0272942866058714e-06, "loss": 0.4178, "step": 17881 }, { "epoch": 0.9649776050941665, "grad_norm": 0.9897210263063176, "learning_rate": 1.0272104534619421e-06, "loss": 0.3518, "step": 17882 }, { "epoch": 0.9650315687226809, "grad_norm": 1.2570710169809765, "learning_rate": 1.0271267488705033e-06, "loss": 0.6787, "step": 17883 }, { "epoch": 0.9650855323511953, "grad_norm": 0.996014547114282, "learning_rate": 1.0270431728339615e-06, "loss": 0.4036, "step": 17884 }, { "epoch": 0.9651394959797097, "grad_norm": 1.0025569354056054, "learning_rate": 1.026959725354719e-06, "loss": 0.3969, "step": 17885 }, { "epoch": 0.965193459608224, "grad_norm": 1.0031363834946845, "learning_rate": 1.0268764064351732e-06, "loss": 0.4562, "step": 17886 }, { "epoch": 0.9652474232367384, "grad_norm": 0.7549133103591573, "learning_rate": 1.0267932160777195e-06, "loss": 0.343, "step": 17887 }, { "epoch": 0.9653013868652528, "grad_norm": 1.125111672093527, "learning_rate": 1.0267101542847482e-06, "loss": 0.4846, "step": 17888 }, { "epoch": 0.9653553504937672, "grad_norm": 1.295898205205464, "learning_rate": 1.0266272210586473e-06, "loss": 0.655, "step": 17889 }, { "epoch": 0.9654093141222816, "grad_norm": 0.9620376330144821, "learning_rate": 1.0265444164017998e-06, "loss": 0.4134, "step": 17890 }, { "epoch": 0.965463277750796, "grad_norm": 1.0820406277605394, "learning_rate": 1.026461740316586e-06, "loss": 0.7075, "step": 17891 }, { "epoch": 0.9655172413793104, "grad_norm": 1.116645817768908, "learning_rate": 1.026379192805382e-06, "loss": 0.5177, "step": 17892 }, { "epoch": 0.9655712050078247, "grad_norm": 0.9204012021783984, "learning_rate": 1.02629677387056e-06, "loss": 0.4117, "step": 17893 }, { "epoch": 0.9656251686363391, "grad_norm": 1.0818601627594364, "learning_rate": 1.0262144835144897e-06, "loss": 0.5074, "step": 17894 }, { "epoch": 0.9656791322648535, "grad_norm": 0.9690285914804339, "learning_rate": 1.0261323217395352e-06, "loss": 0.4375, "step": 17895 }, { "epoch": 0.9657330958933679, "grad_norm": 0.9430185168624731, "learning_rate": 1.026050288548058e-06, "loss": 0.4065, "step": 17896 }, { "epoch": 0.9657870595218823, "grad_norm": 1.122293553442814, "learning_rate": 1.0259683839424167e-06, "loss": 0.6528, "step": 17897 }, { "epoch": 0.9658410231503967, "grad_norm": 0.855262542982004, "learning_rate": 1.025886607924964e-06, "loss": 0.3905, "step": 17898 }, { "epoch": 0.9658949867789111, "grad_norm": 0.8098971092793059, "learning_rate": 1.0258049604980515e-06, "loss": 0.3398, "step": 17899 }, { "epoch": 0.9659489504074253, "grad_norm": 0.9911239813641783, "learning_rate": 1.0257234416640255e-06, "loss": 0.3477, "step": 17900 }, { "epoch": 0.9660029140359397, "grad_norm": 1.2693430116626974, "learning_rate": 1.0256420514252285e-06, "loss": 0.4855, "step": 17901 }, { "epoch": 0.9660568776644541, "grad_norm": 0.9471655070882822, "learning_rate": 1.0255607897839994e-06, "loss": 0.3178, "step": 17902 }, { "epoch": 0.9661108412929685, "grad_norm": 1.0471143560756995, "learning_rate": 1.0254796567426745e-06, "loss": 0.4504, "step": 17903 }, { "epoch": 0.9661648049214829, "grad_norm": 1.2698459158725, "learning_rate": 1.0253986523035857e-06, "loss": 0.4673, "step": 17904 }, { "epoch": 0.9662187685499973, "grad_norm": 0.9068773711643514, "learning_rate": 1.0253177764690613e-06, "loss": 0.3652, "step": 17905 }, { "epoch": 0.9662727321785117, "grad_norm": 1.0738579287715162, "learning_rate": 1.0252370292414244e-06, "loss": 0.4006, "step": 17906 }, { "epoch": 0.966326695807026, "grad_norm": 0.9741741110029672, "learning_rate": 1.0251564106229974e-06, "loss": 0.4628, "step": 17907 }, { "epoch": 0.9663806594355404, "grad_norm": 1.2059434844354031, "learning_rate": 1.0250759206160964e-06, "loss": 0.6102, "step": 17908 }, { "epoch": 0.9664346230640548, "grad_norm": 1.082691631896521, "learning_rate": 1.0249955592230354e-06, "loss": 0.462, "step": 17909 }, { "epoch": 0.9664885866925692, "grad_norm": 1.1360780866970472, "learning_rate": 1.0249153264461232e-06, "loss": 0.5068, "step": 17910 }, { "epoch": 0.9665425503210836, "grad_norm": 0.9105956607980784, "learning_rate": 1.0248352222876661e-06, "loss": 0.3663, "step": 17911 }, { "epoch": 0.966596513949598, "grad_norm": 1.00702825364352, "learning_rate": 1.024755246749967e-06, "loss": 0.3843, "step": 17912 }, { "epoch": 0.9666504775781124, "grad_norm": 1.0104475076986612, "learning_rate": 1.0246753998353237e-06, "loss": 0.3216, "step": 17913 }, { "epoch": 0.9667044412066267, "grad_norm": 1.071184757683129, "learning_rate": 1.024595681546032e-06, "loss": 0.4045, "step": 17914 }, { "epoch": 0.9667584048351411, "grad_norm": 0.9994208332500781, "learning_rate": 1.0245160918843817e-06, "loss": 0.5676, "step": 17915 }, { "epoch": 0.9668123684636555, "grad_norm": 0.916376068978545, "learning_rate": 1.0244366308526619e-06, "loss": 0.4003, "step": 17916 }, { "epoch": 0.9668663320921699, "grad_norm": 1.0373828504978186, "learning_rate": 1.0243572984531548e-06, "loss": 0.4601, "step": 17917 }, { "epoch": 0.9669202957206843, "grad_norm": 1.130847378079657, "learning_rate": 1.0242780946881417e-06, "loss": 0.6079, "step": 17918 }, { "epoch": 0.9669742593491987, "grad_norm": 1.0822500759583193, "learning_rate": 1.0241990195598981e-06, "loss": 0.5366, "step": 17919 }, { "epoch": 0.9670282229777131, "grad_norm": 0.9734413231881124, "learning_rate": 1.0241200730706973e-06, "loss": 0.3896, "step": 17920 }, { "epoch": 0.9670821866062274, "grad_norm": 0.9734939348230095, "learning_rate": 1.0240412552228085e-06, "loss": 0.433, "step": 17921 }, { "epoch": 0.9671361502347418, "grad_norm": 0.8697652291976674, "learning_rate": 1.0239625660184965e-06, "loss": 0.277, "step": 17922 }, { "epoch": 0.9671901138632562, "grad_norm": 1.0339336759384465, "learning_rate": 1.023884005460023e-06, "loss": 0.4507, "step": 17923 }, { "epoch": 0.9672440774917705, "grad_norm": 0.8494610379040256, "learning_rate": 1.0238055735496454e-06, "loss": 0.3624, "step": 17924 }, { "epoch": 0.967298041120285, "grad_norm": 0.9096978146570516, "learning_rate": 1.0237272702896188e-06, "loss": 0.4068, "step": 17925 }, { "epoch": 0.9673520047487993, "grad_norm": 0.8960002173067981, "learning_rate": 1.023649095682194e-06, "loss": 0.3817, "step": 17926 }, { "epoch": 0.9674059683773137, "grad_norm": 1.1087403570936085, "learning_rate": 1.023571049729617e-06, "loss": 0.5568, "step": 17927 }, { "epoch": 0.967459932005828, "grad_norm": 1.1425657631016721, "learning_rate": 1.0234931324341307e-06, "loss": 0.59, "step": 17928 }, { "epoch": 0.9675138956343424, "grad_norm": 1.2249175710668825, "learning_rate": 1.0234153437979752e-06, "loss": 0.5461, "step": 17929 }, { "epoch": 0.9675678592628568, "grad_norm": 1.1516156467956786, "learning_rate": 1.0233376838233861e-06, "loss": 0.4433, "step": 17930 }, { "epoch": 0.9676218228913712, "grad_norm": 1.0688404454392078, "learning_rate": 1.0232601525125952e-06, "loss": 0.4762, "step": 17931 }, { "epoch": 0.9676757865198856, "grad_norm": 0.8945699694856074, "learning_rate": 1.0231827498678306e-06, "loss": 0.3463, "step": 17932 }, { "epoch": 0.9677297501484, "grad_norm": 0.9381772251488613, "learning_rate": 1.0231054758913176e-06, "loss": 0.3875, "step": 17933 }, { "epoch": 0.9677837137769144, "grad_norm": 1.153159367677306, "learning_rate": 1.0230283305852767e-06, "loss": 0.4666, "step": 17934 }, { "epoch": 0.9678376774054287, "grad_norm": 0.9790019482812711, "learning_rate": 1.0229513139519254e-06, "loss": 0.3567, "step": 17935 }, { "epoch": 0.9678916410339431, "grad_norm": 1.097073553141336, "learning_rate": 1.0228744259934772e-06, "loss": 0.5562, "step": 17936 }, { "epoch": 0.9679456046624575, "grad_norm": 1.1877005452087197, "learning_rate": 1.0227976667121417e-06, "loss": 0.4755, "step": 17937 }, { "epoch": 0.9679995682909719, "grad_norm": 0.983369509608898, "learning_rate": 1.0227210361101247e-06, "loss": 0.5646, "step": 17938 }, { "epoch": 0.9680535319194863, "grad_norm": 1.127318474731123, "learning_rate": 1.0226445341896294e-06, "loss": 0.4433, "step": 17939 }, { "epoch": 0.9681074955480007, "grad_norm": 1.003917119121436, "learning_rate": 1.022568160952854e-06, "loss": 0.3939, "step": 17940 }, { "epoch": 0.9681614591765151, "grad_norm": 1.0721522673342416, "learning_rate": 1.0224919164019943e-06, "loss": 0.432, "step": 17941 }, { "epoch": 0.9682154228050294, "grad_norm": 1.0532234847367967, "learning_rate": 1.0224158005392405e-06, "loss": 0.4268, "step": 17942 }, { "epoch": 0.9682693864335438, "grad_norm": 1.0337799555045897, "learning_rate": 1.0223398133667812e-06, "loss": 0.4303, "step": 17943 }, { "epoch": 0.9683233500620582, "grad_norm": 1.0335658553614406, "learning_rate": 1.0222639548868001e-06, "loss": 0.3635, "step": 17944 }, { "epoch": 0.9683773136905726, "grad_norm": 1.0770778407190655, "learning_rate": 1.022188225101477e-06, "loss": 0.4892, "step": 17945 }, { "epoch": 0.968431277319087, "grad_norm": 1.1709460009929626, "learning_rate": 1.022112624012989e-06, "loss": 0.7669, "step": 17946 }, { "epoch": 0.9684852409476014, "grad_norm": 1.0091662442482074, "learning_rate": 1.0220371516235089e-06, "loss": 0.465, "step": 17947 }, { "epoch": 0.9685392045761158, "grad_norm": 1.0740569857368223, "learning_rate": 1.0219618079352057e-06, "loss": 0.5232, "step": 17948 }, { "epoch": 0.96859316820463, "grad_norm": 1.3056445853555478, "learning_rate": 1.021886592950245e-06, "loss": 0.4805, "step": 17949 }, { "epoch": 0.9686471318331444, "grad_norm": 0.7248177777846264, "learning_rate": 1.0218115066707878e-06, "loss": 0.2545, "step": 17950 }, { "epoch": 0.9687010954616588, "grad_norm": 0.9658544320808473, "learning_rate": 1.0217365490989932e-06, "loss": 0.3123, "step": 17951 }, { "epoch": 0.9687550590901732, "grad_norm": 1.0362310464996607, "learning_rate": 1.021661720237015e-06, "loss": 0.3614, "step": 17952 }, { "epoch": 0.9688090227186876, "grad_norm": 1.108763890757111, "learning_rate": 1.0215870200870043e-06, "loss": 0.4466, "step": 17953 }, { "epoch": 0.968862986347202, "grad_norm": 1.0859257408716558, "learning_rate": 1.0215124486511076e-06, "loss": 0.5687, "step": 17954 }, { "epoch": 0.9689169499757163, "grad_norm": 0.9638020329819141, "learning_rate": 1.0214380059314684e-06, "loss": 0.3274, "step": 17955 }, { "epoch": 0.9689709136042307, "grad_norm": 0.9648403456141517, "learning_rate": 1.0213636919302257e-06, "loss": 0.3536, "step": 17956 }, { "epoch": 0.9690248772327451, "grad_norm": 1.1370965429815654, "learning_rate": 1.0212895066495162e-06, "loss": 0.5114, "step": 17957 }, { "epoch": 0.9690788408612595, "grad_norm": 0.9892726633311988, "learning_rate": 1.021215450091472e-06, "loss": 0.4854, "step": 17958 }, { "epoch": 0.9691328044897739, "grad_norm": 0.9537775241528009, "learning_rate": 1.0211415222582202e-06, "loss": 0.446, "step": 17959 }, { "epoch": 0.9691867681182883, "grad_norm": 1.3388648731217332, "learning_rate": 1.0210677231518878e-06, "loss": 0.6358, "step": 17960 }, { "epoch": 0.9692407317468027, "grad_norm": 1.321525863893946, "learning_rate": 1.0209940527745937e-06, "loss": 0.4884, "step": 17961 }, { "epoch": 0.969294695375317, "grad_norm": 1.3675563393319181, "learning_rate": 1.0209205111284568e-06, "loss": 0.5611, "step": 17962 }, { "epoch": 0.9693486590038314, "grad_norm": 1.1503612726313361, "learning_rate": 1.02084709821559e-06, "loss": 0.614, "step": 17963 }, { "epoch": 0.9694026226323458, "grad_norm": 1.0008190163529656, "learning_rate": 1.0207738140381037e-06, "loss": 0.3521, "step": 17964 }, { "epoch": 0.9694565862608602, "grad_norm": 0.8840560622903401, "learning_rate": 1.0207006585981038e-06, "loss": 0.3702, "step": 17965 }, { "epoch": 0.9695105498893746, "grad_norm": 1.0086970738993442, "learning_rate": 1.0206276318976927e-06, "loss": 0.4817, "step": 17966 }, { "epoch": 0.969564513517889, "grad_norm": 0.9723861008800753, "learning_rate": 1.0205547339389697e-06, "loss": 0.4418, "step": 17967 }, { "epoch": 0.9696184771464034, "grad_norm": 0.9263613503185468, "learning_rate": 1.02048196472403e-06, "loss": 0.3829, "step": 17968 }, { "epoch": 0.9696724407749177, "grad_norm": 0.9502685549357336, "learning_rate": 1.0204093242549645e-06, "loss": 0.3532, "step": 17969 }, { "epoch": 0.969726404403432, "grad_norm": 0.9661400662419387, "learning_rate": 1.0203368125338615e-06, "loss": 0.4812, "step": 17970 }, { "epoch": 0.9697803680319464, "grad_norm": 0.8962112155940124, "learning_rate": 1.020264429562805e-06, "loss": 0.4371, "step": 17971 }, { "epoch": 0.9698343316604608, "grad_norm": 0.9795120444014361, "learning_rate": 1.0201921753438751e-06, "loss": 0.4647, "step": 17972 }, { "epoch": 0.9698882952889752, "grad_norm": 0.9320153696267194, "learning_rate": 1.0201200498791488e-06, "loss": 0.3535, "step": 17973 }, { "epoch": 0.9699422589174896, "grad_norm": 1.074744220607773, "learning_rate": 1.0200480531706982e-06, "loss": 0.4855, "step": 17974 }, { "epoch": 0.969996222546004, "grad_norm": 1.0572480481681181, "learning_rate": 1.0199761852205937e-06, "loss": 0.3882, "step": 17975 }, { "epoch": 0.9700501861745183, "grad_norm": 1.2906687263699386, "learning_rate": 1.0199044460309006e-06, "loss": 0.4311, "step": 17976 }, { "epoch": 0.9701041498030327, "grad_norm": 1.111316494394137, "learning_rate": 1.0198328356036805e-06, "loss": 0.4808, "step": 17977 }, { "epoch": 0.9701581134315471, "grad_norm": 1.0641438294794052, "learning_rate": 1.019761353940991e-06, "loss": 0.4324, "step": 17978 }, { "epoch": 0.9702120770600615, "grad_norm": 1.198288584808613, "learning_rate": 1.0196900010448879e-06, "loss": 0.6303, "step": 17979 }, { "epoch": 0.9702660406885759, "grad_norm": 1.260344827559928, "learning_rate": 1.0196187769174206e-06, "loss": 0.4536, "step": 17980 }, { "epoch": 0.9703200043170903, "grad_norm": 1.0202265780362707, "learning_rate": 1.019547681560637e-06, "loss": 0.4242, "step": 17981 }, { "epoch": 0.9703739679456047, "grad_norm": 1.0393088977398341, "learning_rate": 1.01947671497658e-06, "loss": 0.5378, "step": 17982 }, { "epoch": 0.970427931574119, "grad_norm": 0.9038681792198786, "learning_rate": 1.0194058771672896e-06, "loss": 0.3819, "step": 17983 }, { "epoch": 0.9704818952026334, "grad_norm": 0.9490614708162891, "learning_rate": 1.019335168134802e-06, "loss": 0.3325, "step": 17984 }, { "epoch": 0.9705358588311478, "grad_norm": 1.175677515510177, "learning_rate": 1.0192645878811484e-06, "loss": 0.621, "step": 17985 }, { "epoch": 0.9705898224596622, "grad_norm": 0.9742705421471248, "learning_rate": 1.0191941364083582e-06, "loss": 0.4162, "step": 17986 }, { "epoch": 0.9706437860881766, "grad_norm": 1.158591201346969, "learning_rate": 1.019123813718456e-06, "loss": 0.4924, "step": 17987 }, { "epoch": 0.970697749716691, "grad_norm": 0.9398136893635369, "learning_rate": 1.019053619813463e-06, "loss": 0.385, "step": 17988 }, { "epoch": 0.9707517133452054, "grad_norm": 0.969448654866024, "learning_rate": 1.0189835546953966e-06, "loss": 0.5117, "step": 17989 }, { "epoch": 0.9708056769737197, "grad_norm": 1.0354562893644046, "learning_rate": 1.0189136183662708e-06, "loss": 0.4802, "step": 17990 }, { "epoch": 0.9708596406022341, "grad_norm": 0.9628997418989812, "learning_rate": 1.0188438108280952e-06, "loss": 0.3718, "step": 17991 }, { "epoch": 0.9709136042307485, "grad_norm": 1.0655703729613528, "learning_rate": 1.0187741320828765e-06, "loss": 0.4613, "step": 17992 }, { "epoch": 0.9709675678592629, "grad_norm": 0.6971511371982585, "learning_rate": 1.018704582132617e-06, "loss": 0.3221, "step": 17993 }, { "epoch": 0.9710215314877773, "grad_norm": 0.9287856752858177, "learning_rate": 1.0186351609793161e-06, "loss": 0.3847, "step": 17994 }, { "epoch": 0.9710754951162917, "grad_norm": 1.097259970205671, "learning_rate": 1.0185658686249688e-06, "loss": 0.4001, "step": 17995 }, { "epoch": 0.971129458744806, "grad_norm": 1.2089053302622532, "learning_rate": 1.0184967050715663e-06, "loss": 0.5217, "step": 17996 }, { "epoch": 0.9711834223733203, "grad_norm": 0.9899671896659042, "learning_rate": 1.0184276703210967e-06, "loss": 0.3994, "step": 17997 }, { "epoch": 0.9712373860018347, "grad_norm": 1.1178972957769615, "learning_rate": 1.0183587643755443e-06, "loss": 0.5384, "step": 17998 }, { "epoch": 0.9712913496303491, "grad_norm": 1.0975141207731514, "learning_rate": 1.0182899872368895e-06, "loss": 0.4725, "step": 17999 }, { "epoch": 0.9713453132588635, "grad_norm": 0.9627333429157267, "learning_rate": 1.018221338907109e-06, "loss": 0.3884, "step": 18000 }, { "epoch": 0.9713453132588635, "eval_loss": 0.5274714231491089, "eval_runtime": 161.4197, "eval_samples_per_second": 21.305, "eval_steps_per_second": 0.892, "step": 18000 }, { "epoch": 0.9713992768873779, "grad_norm": 1.1328763531926356, "learning_rate": 1.0181528193881752e-06, "loss": 0.429, "step": 18001 }, { "epoch": 0.9714532405158923, "grad_norm": 1.0306978235519206, "learning_rate": 1.0180844286820581e-06, "loss": 0.6274, "step": 18002 }, { "epoch": 0.9715072041444067, "grad_norm": 1.002171745650088, "learning_rate": 1.0180161667907233e-06, "loss": 0.4979, "step": 18003 }, { "epoch": 0.971561167772921, "grad_norm": 1.0997233807343583, "learning_rate": 1.017948033716133e-06, "loss": 0.4954, "step": 18004 }, { "epoch": 0.9716151314014354, "grad_norm": 1.0212534442859866, "learning_rate": 1.0178800294602446e-06, "loss": 0.4144, "step": 18005 }, { "epoch": 0.9716690950299498, "grad_norm": 1.171328013613981, "learning_rate": 1.0178121540250127e-06, "loss": 0.4489, "step": 18006 }, { "epoch": 0.9717230586584642, "grad_norm": 1.0803249901108014, "learning_rate": 1.017744407412389e-06, "loss": 0.502, "step": 18007 }, { "epoch": 0.9717770222869786, "grad_norm": 0.944312005227451, "learning_rate": 1.0176767896243198e-06, "loss": 0.4296, "step": 18008 }, { "epoch": 0.971830985915493, "grad_norm": 1.023631639050111, "learning_rate": 1.0176093006627485e-06, "loss": 0.4585, "step": 18009 }, { "epoch": 0.9718849495440074, "grad_norm": 0.8655145733764561, "learning_rate": 1.017541940529615e-06, "loss": 0.3885, "step": 18010 }, { "epoch": 0.9719389131725217, "grad_norm": 1.0188007506358656, "learning_rate": 1.0174747092268556e-06, "loss": 0.4793, "step": 18011 }, { "epoch": 0.9719928768010361, "grad_norm": 0.8262735025143435, "learning_rate": 1.0174076067564024e-06, "loss": 0.3157, "step": 18012 }, { "epoch": 0.9720468404295505, "grad_norm": 0.9385289914734662, "learning_rate": 1.0173406331201837e-06, "loss": 0.3985, "step": 18013 }, { "epoch": 0.9721008040580649, "grad_norm": 1.0448583862785652, "learning_rate": 1.0172737883201244e-06, "loss": 0.449, "step": 18014 }, { "epoch": 0.9721547676865793, "grad_norm": 0.8104506879596775, "learning_rate": 1.017207072358146e-06, "loss": 0.3265, "step": 18015 }, { "epoch": 0.9722087313150937, "grad_norm": 1.0848780167041987, "learning_rate": 1.0171404852361663e-06, "loss": 0.4303, "step": 18016 }, { "epoch": 0.9722626949436081, "grad_norm": 0.9300776913010911, "learning_rate": 1.0170740269560982e-06, "loss": 0.5227, "step": 18017 }, { "epoch": 0.9723166585721223, "grad_norm": 0.9759178786310342, "learning_rate": 1.0170076975198525e-06, "loss": 0.3182, "step": 18018 }, { "epoch": 0.9723706222006367, "grad_norm": 0.9517121234564407, "learning_rate": 1.0169414969293352e-06, "loss": 0.4019, "step": 18019 }, { "epoch": 0.9724245858291511, "grad_norm": 0.9223500733878252, "learning_rate": 1.0168754251864487e-06, "loss": 0.4654, "step": 18020 }, { "epoch": 0.9724785494576655, "grad_norm": 1.248658007734825, "learning_rate": 1.016809482293093e-06, "loss": 0.5129, "step": 18021 }, { "epoch": 0.9725325130861799, "grad_norm": 0.762163682543234, "learning_rate": 1.0167436682511622e-06, "loss": 0.2941, "step": 18022 }, { "epoch": 0.9725864767146943, "grad_norm": 1.1364166920600156, "learning_rate": 1.016677983062549e-06, "loss": 0.4145, "step": 18023 }, { "epoch": 0.9726404403432086, "grad_norm": 0.9499605204605104, "learning_rate": 1.0166124267291403e-06, "loss": 0.4228, "step": 18024 }, { "epoch": 0.972694403971723, "grad_norm": 0.993330907392355, "learning_rate": 1.0165469992528203e-06, "loss": 0.3718, "step": 18025 }, { "epoch": 0.9727483676002374, "grad_norm": 1.10620652430404, "learning_rate": 1.0164817006354702e-06, "loss": 0.4252, "step": 18026 }, { "epoch": 0.9728023312287518, "grad_norm": 1.02636794987454, "learning_rate": 1.0164165308789658e-06, "loss": 0.4001, "step": 18027 }, { "epoch": 0.9728562948572662, "grad_norm": 1.0145050539979432, "learning_rate": 1.0163514899851807e-06, "loss": 0.4238, "step": 18028 }, { "epoch": 0.9729102584857806, "grad_norm": 1.2074874907431528, "learning_rate": 1.0162865779559843e-06, "loss": 0.4629, "step": 18029 }, { "epoch": 0.972964222114295, "grad_norm": 1.0052872753058684, "learning_rate": 1.0162217947932425e-06, "loss": 0.467, "step": 18030 }, { "epoch": 0.9730181857428093, "grad_norm": 1.0006345896175435, "learning_rate": 1.0161571404988163e-06, "loss": 0.3675, "step": 18031 }, { "epoch": 0.9730721493713237, "grad_norm": 0.8213445728258764, "learning_rate": 1.016092615074565e-06, "loss": 0.3485, "step": 18032 }, { "epoch": 0.9731261129998381, "grad_norm": 1.0402457711805533, "learning_rate": 1.0160282185223419e-06, "loss": 0.4131, "step": 18033 }, { "epoch": 0.9731800766283525, "grad_norm": 0.9428716444633622, "learning_rate": 1.0159639508439993e-06, "loss": 0.4261, "step": 18034 }, { "epoch": 0.9732340402568669, "grad_norm": 1.1118724429153695, "learning_rate": 1.0158998120413827e-06, "loss": 0.394, "step": 18035 }, { "epoch": 0.9732880038853813, "grad_norm": 1.273067003670178, "learning_rate": 1.0158358021163367e-06, "loss": 0.5248, "step": 18036 }, { "epoch": 0.9733419675138957, "grad_norm": 1.1554296508776936, "learning_rate": 1.0157719210707012e-06, "loss": 0.4668, "step": 18037 }, { "epoch": 0.97339593114241, "grad_norm": 1.0345303950012612, "learning_rate": 1.015708168906311e-06, "loss": 0.3635, "step": 18038 }, { "epoch": 0.9734498947709244, "grad_norm": 1.0598998491873557, "learning_rate": 1.0156445456249995e-06, "loss": 0.4431, "step": 18039 }, { "epoch": 0.9735038583994388, "grad_norm": 1.0576053209848826, "learning_rate": 1.0155810512285945e-06, "loss": 0.477, "step": 18040 }, { "epoch": 0.9735578220279532, "grad_norm": 0.9469843734794028, "learning_rate": 1.0155176857189217e-06, "loss": 0.4268, "step": 18041 }, { "epoch": 0.9736117856564676, "grad_norm": 0.790758614085866, "learning_rate": 1.015454449097801e-06, "loss": 0.3501, "step": 18042 }, { "epoch": 0.973665749284982, "grad_norm": 1.2214685586476977, "learning_rate": 1.0153913413670514e-06, "loss": 0.4719, "step": 18043 }, { "epoch": 0.9737197129134963, "grad_norm": 1.0245133507821669, "learning_rate": 1.0153283625284863e-06, "loss": 0.362, "step": 18044 }, { "epoch": 0.9737736765420106, "grad_norm": 1.1514262785520362, "learning_rate": 1.015265512583915e-06, "loss": 0.5175, "step": 18045 }, { "epoch": 0.973827640170525, "grad_norm": 0.845297602103749, "learning_rate": 1.0152027915351443e-06, "loss": 0.285, "step": 18046 }, { "epoch": 0.9738816037990394, "grad_norm": 0.9991348460688204, "learning_rate": 1.0151401993839772e-06, "loss": 0.4199, "step": 18047 }, { "epoch": 0.9739355674275538, "grad_norm": 1.2644439135386834, "learning_rate": 1.0150777361322123e-06, "loss": 0.417, "step": 18048 }, { "epoch": 0.9739895310560682, "grad_norm": 0.9687601733782427, "learning_rate": 1.015015401781645e-06, "loss": 0.4322, "step": 18049 }, { "epoch": 0.9740434946845826, "grad_norm": 0.9745143892758288, "learning_rate": 1.0149531963340665e-06, "loss": 0.4595, "step": 18050 }, { "epoch": 0.974097458313097, "grad_norm": 0.9088534825989094, "learning_rate": 1.0148911197912654e-06, "loss": 0.3922, "step": 18051 }, { "epoch": 0.9741514219416113, "grad_norm": 1.0698219269637987, "learning_rate": 1.0148291721550254e-06, "loss": 0.4231, "step": 18052 }, { "epoch": 0.9742053855701257, "grad_norm": 0.8973189162744587, "learning_rate": 1.0147673534271266e-06, "loss": 0.4007, "step": 18053 }, { "epoch": 0.9742593491986401, "grad_norm": 0.919528507832268, "learning_rate": 1.014705663609346e-06, "loss": 0.3579, "step": 18054 }, { "epoch": 0.9743133128271545, "grad_norm": 1.059930800017334, "learning_rate": 1.014644102703457e-06, "loss": 0.5876, "step": 18055 }, { "epoch": 0.9743672764556689, "grad_norm": 1.0878077362574954, "learning_rate": 1.0145826707112284e-06, "loss": 0.4552, "step": 18056 }, { "epoch": 0.9744212400841833, "grad_norm": 0.9780916765598362, "learning_rate": 1.014521367634426e-06, "loss": 0.3991, "step": 18057 }, { "epoch": 0.9744752037126977, "grad_norm": 1.0687809565633837, "learning_rate": 1.0144601934748123e-06, "loss": 0.4888, "step": 18058 }, { "epoch": 0.974529167341212, "grad_norm": 0.8776835530088282, "learning_rate": 1.0143991482341441e-06, "loss": 0.3376, "step": 18059 }, { "epoch": 0.9745831309697264, "grad_norm": 1.2798073666925707, "learning_rate": 1.0143382319141775e-06, "loss": 0.6058, "step": 18060 }, { "epoch": 0.9746370945982408, "grad_norm": 0.8937949449702395, "learning_rate": 1.0142774445166618e-06, "loss": 0.347, "step": 18061 }, { "epoch": 0.9746910582267552, "grad_norm": 0.8820387598596883, "learning_rate": 1.0142167860433455e-06, "loss": 0.3725, "step": 18062 }, { "epoch": 0.9747450218552696, "grad_norm": 1.0144998420830902, "learning_rate": 1.0141562564959707e-06, "loss": 0.3642, "step": 18063 }, { "epoch": 0.974798985483784, "grad_norm": 1.0271247759111297, "learning_rate": 1.0140958558762783e-06, "loss": 0.5319, "step": 18064 }, { "epoch": 0.9748529491122984, "grad_norm": 1.1095181607857743, "learning_rate": 1.0140355841860032e-06, "loss": 0.483, "step": 18065 }, { "epoch": 0.9749069127408126, "grad_norm": 1.2714455606123745, "learning_rate": 1.0139754414268787e-06, "loss": 0.5139, "step": 18066 }, { "epoch": 0.974960876369327, "grad_norm": 0.9277221124613251, "learning_rate": 1.0139154276006322e-06, "loss": 0.4348, "step": 18067 }, { "epoch": 0.9750148399978414, "grad_norm": 1.061710294978301, "learning_rate": 1.0138555427089897e-06, "loss": 0.4173, "step": 18068 }, { "epoch": 0.9750688036263558, "grad_norm": 0.9949897615254458, "learning_rate": 1.0137957867536714e-06, "loss": 0.3916, "step": 18069 }, { "epoch": 0.9751227672548702, "grad_norm": 0.8878688591420227, "learning_rate": 1.013736159736395e-06, "loss": 0.3277, "step": 18070 }, { "epoch": 0.9751767308833846, "grad_norm": 1.0522244172311517, "learning_rate": 1.0136766616588748e-06, "loss": 0.445, "step": 18071 }, { "epoch": 0.975230694511899, "grad_norm": 1.0713606697345743, "learning_rate": 1.0136172925228205e-06, "loss": 0.5239, "step": 18072 }, { "epoch": 0.9752846581404133, "grad_norm": 0.9948468703010666, "learning_rate": 1.0135580523299384e-06, "loss": 0.3708, "step": 18073 }, { "epoch": 0.9753386217689277, "grad_norm": 0.9076397304546973, "learning_rate": 1.013498941081931e-06, "loss": 0.4023, "step": 18074 }, { "epoch": 0.9753925853974421, "grad_norm": 1.076820001182323, "learning_rate": 1.0134399587804975e-06, "loss": 0.4614, "step": 18075 }, { "epoch": 0.9754465490259565, "grad_norm": 0.9155163002381717, "learning_rate": 1.0133811054273324e-06, "loss": 0.397, "step": 18076 }, { "epoch": 0.9755005126544709, "grad_norm": 1.1775540998061875, "learning_rate": 1.0133223810241278e-06, "loss": 0.5366, "step": 18077 }, { "epoch": 0.9755544762829853, "grad_norm": 0.8871661975453764, "learning_rate": 1.013263785572572e-06, "loss": 0.2946, "step": 18078 }, { "epoch": 0.9756084399114997, "grad_norm": 1.0631971254903316, "learning_rate": 1.0132053190743481e-06, "loss": 0.4143, "step": 18079 }, { "epoch": 0.975662403540014, "grad_norm": 1.1122853943000228, "learning_rate": 1.013146981531137e-06, "loss": 0.484, "step": 18080 }, { "epoch": 0.9757163671685284, "grad_norm": 1.1166792031664092, "learning_rate": 1.0130887729446152e-06, "loss": 0.5109, "step": 18081 }, { "epoch": 0.9757703307970428, "grad_norm": 0.8902107840888837, "learning_rate": 1.0130306933164557e-06, "loss": 0.3799, "step": 18082 }, { "epoch": 0.9758242944255572, "grad_norm": 1.021386503212034, "learning_rate": 1.012972742648328e-06, "loss": 0.3747, "step": 18083 }, { "epoch": 0.9758782580540716, "grad_norm": 1.1694817312060117, "learning_rate": 1.0129149209418972e-06, "loss": 0.3697, "step": 18084 }, { "epoch": 0.975932221682586, "grad_norm": 1.128239919081581, "learning_rate": 1.0128572281988259e-06, "loss": 0.4284, "step": 18085 }, { "epoch": 0.9759861853111004, "grad_norm": 0.8845508203752075, "learning_rate": 1.0127996644207711e-06, "loss": 0.49, "step": 18086 }, { "epoch": 0.9760401489396147, "grad_norm": 1.1007899491637163, "learning_rate": 1.0127422296093888e-06, "loss": 0.5224, "step": 18087 }, { "epoch": 0.9760941125681291, "grad_norm": 1.030130818003272, "learning_rate": 1.0126849237663284e-06, "loss": 0.4841, "step": 18088 }, { "epoch": 0.9761480761966435, "grad_norm": 1.1261481412751173, "learning_rate": 1.0126277468932372e-06, "loss": 0.4721, "step": 18089 }, { "epoch": 0.9762020398251579, "grad_norm": 0.9570717482173775, "learning_rate": 1.012570698991759e-06, "loss": 0.352, "step": 18090 }, { "epoch": 0.9762560034536722, "grad_norm": 1.2416516377528521, "learning_rate": 1.0125137800635332e-06, "loss": 0.5088, "step": 18091 }, { "epoch": 0.9763099670821866, "grad_norm": 1.365636903424236, "learning_rate": 1.0124569901101955e-06, "loss": 0.4724, "step": 18092 }, { "epoch": 0.9763639307107009, "grad_norm": 1.047732205441396, "learning_rate": 1.012400329133378e-06, "loss": 0.4506, "step": 18093 }, { "epoch": 0.9764178943392153, "grad_norm": 1.0665553929723155, "learning_rate": 1.0123437971347098e-06, "loss": 0.4633, "step": 18094 }, { "epoch": 0.9764718579677297, "grad_norm": 0.9598739415894162, "learning_rate": 1.0122873941158153e-06, "loss": 0.463, "step": 18095 }, { "epoch": 0.9765258215962441, "grad_norm": 1.220803164805238, "learning_rate": 1.0122311200783154e-06, "loss": 0.6544, "step": 18096 }, { "epoch": 0.9765797852247585, "grad_norm": 1.042986223605916, "learning_rate": 1.0121749750238279e-06, "loss": 0.4811, "step": 18097 }, { "epoch": 0.9766337488532729, "grad_norm": 0.9266844776555484, "learning_rate": 1.0121189589539662e-06, "loss": 0.4392, "step": 18098 }, { "epoch": 0.9766877124817873, "grad_norm": 0.9544992510563778, "learning_rate": 1.0120630718703397e-06, "loss": 0.3253, "step": 18099 }, { "epoch": 0.9767416761103016, "grad_norm": 1.3082673723914955, "learning_rate": 1.0120073137745557e-06, "loss": 0.6495, "step": 18100 }, { "epoch": 0.976795639738816, "grad_norm": 0.8977290577675325, "learning_rate": 1.0119516846682165e-06, "loss": 0.3739, "step": 18101 }, { "epoch": 0.9768496033673304, "grad_norm": 0.932588049512579, "learning_rate": 1.0118961845529204e-06, "loss": 0.4511, "step": 18102 }, { "epoch": 0.9769035669958448, "grad_norm": 1.2099227209616008, "learning_rate": 1.011840813430263e-06, "loss": 0.5139, "step": 18103 }, { "epoch": 0.9769575306243592, "grad_norm": 0.9918164606383799, "learning_rate": 1.0117855713018356e-06, "loss": 0.3853, "step": 18104 }, { "epoch": 0.9770114942528736, "grad_norm": 1.13099420030521, "learning_rate": 1.0117304581692261e-06, "loss": 0.5049, "step": 18105 }, { "epoch": 0.977065457881388, "grad_norm": 0.7893086738490819, "learning_rate": 1.0116754740340176e-06, "loss": 0.3166, "step": 18106 }, { "epoch": 0.9771194215099023, "grad_norm": 0.9897265959083399, "learning_rate": 1.0116206188977918e-06, "loss": 0.4042, "step": 18107 }, { "epoch": 0.9771733851384167, "grad_norm": 1.109337742688425, "learning_rate": 1.0115658927621242e-06, "loss": 0.5344, "step": 18108 }, { "epoch": 0.9772273487669311, "grad_norm": 1.395479482543543, "learning_rate": 1.0115112956285886e-06, "loss": 0.6934, "step": 18109 }, { "epoch": 0.9772813123954455, "grad_norm": 1.0004340051323202, "learning_rate": 1.011456827498753e-06, "loss": 0.4164, "step": 18110 }, { "epoch": 0.9773352760239599, "grad_norm": 0.9757732982036992, "learning_rate": 1.0114024883741835e-06, "loss": 0.5241, "step": 18111 }, { "epoch": 0.9773892396524743, "grad_norm": 1.077248122988553, "learning_rate": 1.0113482782564424e-06, "loss": 0.4202, "step": 18112 }, { "epoch": 0.9774432032809887, "grad_norm": 1.10720238150688, "learning_rate": 1.011294197147087e-06, "loss": 0.4188, "step": 18113 }, { "epoch": 0.977497166909503, "grad_norm": 0.8616868165464051, "learning_rate": 1.011240245047672e-06, "loss": 0.3447, "step": 18114 }, { "epoch": 0.9775511305380173, "grad_norm": 1.0005921722472457, "learning_rate": 1.011186421959748e-06, "loss": 0.5084, "step": 18115 }, { "epoch": 0.9776050941665317, "grad_norm": 0.9450088196074476, "learning_rate": 1.0111327278848616e-06, "loss": 0.4369, "step": 18116 }, { "epoch": 0.9776590577950461, "grad_norm": 1.0212645045982813, "learning_rate": 1.0110791628245562e-06, "loss": 0.3825, "step": 18117 }, { "epoch": 0.9777130214235605, "grad_norm": 1.0038556670448877, "learning_rate": 1.0110257267803713e-06, "loss": 0.4267, "step": 18118 }, { "epoch": 0.9777669850520749, "grad_norm": 0.8301868517497739, "learning_rate": 1.010972419753843e-06, "loss": 0.3331, "step": 18119 }, { "epoch": 0.9778209486805893, "grad_norm": 0.862951565202753, "learning_rate": 1.0109192417465032e-06, "loss": 0.3455, "step": 18120 }, { "epoch": 0.9778749123091036, "grad_norm": 1.1781202287271402, "learning_rate": 1.0108661927598801e-06, "loss": 0.5539, "step": 18121 }, { "epoch": 0.977928875937618, "grad_norm": 0.9337509440004241, "learning_rate": 1.0108132727954992e-06, "loss": 0.4734, "step": 18122 }, { "epoch": 0.9779828395661324, "grad_norm": 1.0152066625072462, "learning_rate": 1.0107604818548803e-06, "loss": 0.4454, "step": 18123 }, { "epoch": 0.9780368031946468, "grad_norm": 1.097816332292557, "learning_rate": 1.0107078199395412e-06, "loss": 0.4234, "step": 18124 }, { "epoch": 0.9780907668231612, "grad_norm": 1.2156524692412445, "learning_rate": 1.0106552870509954e-06, "loss": 0.4834, "step": 18125 }, { "epoch": 0.9781447304516756, "grad_norm": 0.8971273857009229, "learning_rate": 1.0106028831907528e-06, "loss": 0.4471, "step": 18126 }, { "epoch": 0.97819869408019, "grad_norm": 1.2180385526090078, "learning_rate": 1.0105506083603194e-06, "loss": 0.5007, "step": 18127 }, { "epoch": 0.9782526577087043, "grad_norm": 0.980561212891799, "learning_rate": 1.0104984625611983e-06, "loss": 0.3225, "step": 18128 }, { "epoch": 0.9783066213372187, "grad_norm": 1.1222499466707594, "learning_rate": 1.0104464457948874e-06, "loss": 0.5624, "step": 18129 }, { "epoch": 0.9783605849657331, "grad_norm": 0.827916510612417, "learning_rate": 1.0103945580628819e-06, "loss": 0.3301, "step": 18130 }, { "epoch": 0.9784145485942475, "grad_norm": 0.9870376600276756, "learning_rate": 1.0103427993666734e-06, "loss": 0.3735, "step": 18131 }, { "epoch": 0.9784685122227619, "grad_norm": 1.0546495304772863, "learning_rate": 1.0102911697077493e-06, "loss": 0.4328, "step": 18132 }, { "epoch": 0.9785224758512763, "grad_norm": 0.8936157840489036, "learning_rate": 1.0102396690875933e-06, "loss": 0.396, "step": 18133 }, { "epoch": 0.9785764394797907, "grad_norm": 0.9965196863527791, "learning_rate": 1.0101882975076858e-06, "loss": 0.5175, "step": 18134 }, { "epoch": 0.978630403108305, "grad_norm": 0.7855980035152029, "learning_rate": 1.0101370549695036e-06, "loss": 0.3472, "step": 18135 }, { "epoch": 0.9786843667368194, "grad_norm": 0.9879028949683455, "learning_rate": 1.010085941474519e-06, "loss": 0.4519, "step": 18136 }, { "epoch": 0.9787383303653338, "grad_norm": 1.1303004022196244, "learning_rate": 1.010034957024201e-06, "loss": 0.4345, "step": 18137 }, { "epoch": 0.9787922939938482, "grad_norm": 1.1326582539831522, "learning_rate": 1.009984101620015e-06, "loss": 0.4768, "step": 18138 }, { "epoch": 0.9788462576223625, "grad_norm": 1.1087613203064681, "learning_rate": 1.009933375263423e-06, "loss": 0.484, "step": 18139 }, { "epoch": 0.978900221250877, "grad_norm": 1.049135708799523, "learning_rate": 1.0098827779558828e-06, "loss": 0.4053, "step": 18140 }, { "epoch": 0.9789541848793913, "grad_norm": 1.2797077019584613, "learning_rate": 1.0098323096988482e-06, "loss": 0.5732, "step": 18141 }, { "epoch": 0.9790081485079056, "grad_norm": 1.013127326389871, "learning_rate": 1.00978197049377e-06, "loss": 0.3665, "step": 18142 }, { "epoch": 0.97906211213642, "grad_norm": 1.2609954697528132, "learning_rate": 1.009731760342095e-06, "loss": 0.6353, "step": 18143 }, { "epoch": 0.9791160757649344, "grad_norm": 0.809306903059424, "learning_rate": 1.0096816792452664e-06, "loss": 0.4416, "step": 18144 }, { "epoch": 0.9791700393934488, "grad_norm": 0.9907922030743701, "learning_rate": 1.0096317272047235e-06, "loss": 0.4811, "step": 18145 }, { "epoch": 0.9792240030219632, "grad_norm": 1.163130820144943, "learning_rate": 1.0095819042219021e-06, "loss": 0.4712, "step": 18146 }, { "epoch": 0.9792779666504776, "grad_norm": 0.8367245820436914, "learning_rate": 1.0095322102982335e-06, "loss": 0.3716, "step": 18147 }, { "epoch": 0.979331930278992, "grad_norm": 1.0872510078203723, "learning_rate": 1.0094826454351467e-06, "loss": 0.5669, "step": 18148 }, { "epoch": 0.9793858939075063, "grad_norm": 1.0113968467370829, "learning_rate": 1.0094332096340665e-06, "loss": 0.3827, "step": 18149 }, { "epoch": 0.9794398575360207, "grad_norm": 0.9757532364728224, "learning_rate": 1.0093839028964127e-06, "loss": 0.4252, "step": 18150 }, { "epoch": 0.9794938211645351, "grad_norm": 1.017550642637091, "learning_rate": 1.0093347252236032e-06, "loss": 0.5442, "step": 18151 }, { "epoch": 0.9795477847930495, "grad_norm": 1.1081291747849564, "learning_rate": 1.009285676617051e-06, "loss": 0.5742, "step": 18152 }, { "epoch": 0.9796017484215639, "grad_norm": 1.016243254478458, "learning_rate": 1.0092367570781662e-06, "loss": 0.4543, "step": 18153 }, { "epoch": 0.9796557120500783, "grad_norm": 1.042707370596398, "learning_rate": 1.0091879666083543e-06, "loss": 0.4029, "step": 18154 }, { "epoch": 0.9797096756785926, "grad_norm": 1.0885245434916806, "learning_rate": 1.0091393052090178e-06, "loss": 0.4568, "step": 18155 }, { "epoch": 0.979763639307107, "grad_norm": 1.096945357734605, "learning_rate": 1.009090772881556e-06, "loss": 0.4954, "step": 18156 }, { "epoch": 0.9798176029356214, "grad_norm": 1.4061241342917765, "learning_rate": 1.0090423696273622e-06, "loss": 0.675, "step": 18157 }, { "epoch": 0.9798715665641358, "grad_norm": 1.6067755764102338, "learning_rate": 1.0089940954478294e-06, "loss": 0.5401, "step": 18158 }, { "epoch": 0.9799255301926502, "grad_norm": 0.9744164225233323, "learning_rate": 1.0089459503443435e-06, "loss": 0.434, "step": 18159 }, { "epoch": 0.9799794938211646, "grad_norm": 1.2286964899415982, "learning_rate": 1.0088979343182893e-06, "loss": 0.5718, "step": 18160 }, { "epoch": 0.980033457449679, "grad_norm": 1.0881118896369042, "learning_rate": 1.008850047371046e-06, "loss": 0.446, "step": 18161 }, { "epoch": 0.9800874210781932, "grad_norm": 1.0466462775271386, "learning_rate": 1.0088022895039907e-06, "loss": 0.5322, "step": 18162 }, { "epoch": 0.9801413847067076, "grad_norm": 0.8850562603511756, "learning_rate": 1.0087546607184951e-06, "loss": 0.3247, "step": 18163 }, { "epoch": 0.980195348335222, "grad_norm": 1.1048349582346668, "learning_rate": 1.0087071610159289e-06, "loss": 0.5944, "step": 18164 }, { "epoch": 0.9802493119637364, "grad_norm": 1.0553682201993737, "learning_rate": 1.0086597903976574e-06, "loss": 0.4352, "step": 18165 }, { "epoch": 0.9803032755922508, "grad_norm": 1.0109869448420752, "learning_rate": 1.008612548865041e-06, "loss": 0.319, "step": 18166 }, { "epoch": 0.9803572392207652, "grad_norm": 1.2179284930988414, "learning_rate": 1.0085654364194384e-06, "loss": 0.5053, "step": 18167 }, { "epoch": 0.9804112028492796, "grad_norm": 1.124397495186284, "learning_rate": 1.0085184530622036e-06, "loss": 0.4701, "step": 18168 }, { "epoch": 0.9804651664777939, "grad_norm": 0.9691286676573289, "learning_rate": 1.0084715987946868e-06, "loss": 0.3604, "step": 18169 }, { "epoch": 0.9805191301063083, "grad_norm": 1.148808657024611, "learning_rate": 1.0084248736182348e-06, "loss": 0.4632, "step": 18170 }, { "epoch": 0.9805730937348227, "grad_norm": 1.1482145516739721, "learning_rate": 1.0083782775341896e-06, "loss": 0.5492, "step": 18171 }, { "epoch": 0.9806270573633371, "grad_norm": 1.04684841000243, "learning_rate": 1.0083318105438916e-06, "loss": 0.6575, "step": 18172 }, { "epoch": 0.9806810209918515, "grad_norm": 0.8746654764198996, "learning_rate": 1.0082854726486762e-06, "loss": 0.3835, "step": 18173 }, { "epoch": 0.9807349846203659, "grad_norm": 0.9826383417690083, "learning_rate": 1.0082392638498743e-06, "loss": 0.479, "step": 18174 }, { "epoch": 0.9807889482488803, "grad_norm": 1.1799744077112881, "learning_rate": 1.008193184148815e-06, "loss": 0.6455, "step": 18175 }, { "epoch": 0.9808429118773946, "grad_norm": 1.183458635674134, "learning_rate": 1.008147233546822e-06, "loss": 0.5509, "step": 18176 }, { "epoch": 0.980896875505909, "grad_norm": 1.1632075306345713, "learning_rate": 1.0081014120452159e-06, "loss": 0.4979, "step": 18177 }, { "epoch": 0.9809508391344234, "grad_norm": 1.1836925945347285, "learning_rate": 1.0080557196453143e-06, "loss": 0.4678, "step": 18178 }, { "epoch": 0.9810048027629378, "grad_norm": 1.0398366517841071, "learning_rate": 1.0080101563484299e-06, "loss": 0.4987, "step": 18179 }, { "epoch": 0.9810587663914522, "grad_norm": 0.9729466164429071, "learning_rate": 1.0079647221558725e-06, "loss": 0.4997, "step": 18180 }, { "epoch": 0.9811127300199666, "grad_norm": 0.9685842662128966, "learning_rate": 1.0079194170689482e-06, "loss": 0.3667, "step": 18181 }, { "epoch": 0.981166693648481, "grad_norm": 0.966241600667927, "learning_rate": 1.0078742410889583e-06, "loss": 0.4646, "step": 18182 }, { "epoch": 0.9812206572769953, "grad_norm": 1.14052648188633, "learning_rate": 1.0078291942172018e-06, "loss": 0.4961, "step": 18183 }, { "epoch": 0.9812746209055097, "grad_norm": 0.950189912568297, "learning_rate": 1.0077842764549738e-06, "loss": 0.4096, "step": 18184 }, { "epoch": 0.981328584534024, "grad_norm": 0.8953338701019746, "learning_rate": 1.0077394878035642e-06, "loss": 0.4281, "step": 18185 }, { "epoch": 0.9813825481625384, "grad_norm": 1.0997180637953392, "learning_rate": 1.007694828264261e-06, "loss": 0.5758, "step": 18186 }, { "epoch": 0.9814365117910528, "grad_norm": 1.0227271502629796, "learning_rate": 1.0076502978383474e-06, "loss": 0.418, "step": 18187 }, { "epoch": 0.9814904754195672, "grad_norm": 0.9192749042914306, "learning_rate": 1.007605896527103e-06, "loss": 0.4067, "step": 18188 }, { "epoch": 0.9815444390480816, "grad_norm": 0.7734473257320773, "learning_rate": 1.0075616243318053e-06, "loss": 0.376, "step": 18189 }, { "epoch": 0.9815984026765959, "grad_norm": 0.9018667239248379, "learning_rate": 1.0075174812537254e-06, "loss": 0.3639, "step": 18190 }, { "epoch": 0.9816523663051103, "grad_norm": 1.2452280963652373, "learning_rate": 1.0074734672941325e-06, "loss": 0.6006, "step": 18191 }, { "epoch": 0.9817063299336247, "grad_norm": 0.8761715411188457, "learning_rate": 1.0074295824542913e-06, "loss": 0.449, "step": 18192 }, { "epoch": 0.9817602935621391, "grad_norm": 1.1489912186007136, "learning_rate": 1.0073858267354632e-06, "loss": 0.4977, "step": 18193 }, { "epoch": 0.9818142571906535, "grad_norm": 1.0717418091020081, "learning_rate": 1.0073422001389063e-06, "loss": 0.4049, "step": 18194 }, { "epoch": 0.9818682208191679, "grad_norm": 1.1470621841439548, "learning_rate": 1.0072987026658738e-06, "loss": 0.5002, "step": 18195 }, { "epoch": 0.9819221844476823, "grad_norm": 0.9233279024462852, "learning_rate": 1.0072553343176158e-06, "loss": 0.4683, "step": 18196 }, { "epoch": 0.9819761480761966, "grad_norm": 1.2870965510194652, "learning_rate": 1.0072120950953795e-06, "loss": 0.6111, "step": 18197 }, { "epoch": 0.982030111704711, "grad_norm": 1.1447051199033667, "learning_rate": 1.0071689850004073e-06, "loss": 0.5625, "step": 18198 }, { "epoch": 0.9820840753332254, "grad_norm": 1.1349408350999848, "learning_rate": 1.0071260040339378e-06, "loss": 0.3781, "step": 18199 }, { "epoch": 0.9821380389617398, "grad_norm": 1.0449355200693438, "learning_rate": 1.0070831521972072e-06, "loss": 0.5821, "step": 18200 }, { "epoch": 0.9821920025902542, "grad_norm": 1.329689120131221, "learning_rate": 1.0070404294914463e-06, "loss": 0.5435, "step": 18201 }, { "epoch": 0.9822459662187686, "grad_norm": 1.0965281736248673, "learning_rate": 1.0069978359178832e-06, "loss": 0.4836, "step": 18202 }, { "epoch": 0.982299929847283, "grad_norm": 1.0176554257984893, "learning_rate": 1.0069553714777423e-06, "loss": 0.4513, "step": 18203 }, { "epoch": 0.9823538934757973, "grad_norm": 1.040582656031757, "learning_rate": 1.0069130361722436e-06, "loss": 0.5312, "step": 18204 }, { "epoch": 0.9824078571043117, "grad_norm": 1.0010686329789062, "learning_rate": 1.0068708300026043e-06, "loss": 0.4399, "step": 18205 }, { "epoch": 0.9824618207328261, "grad_norm": 0.9041465211844968, "learning_rate": 1.0068287529700377e-06, "loss": 0.425, "step": 18206 }, { "epoch": 0.9825157843613405, "grad_norm": 0.9932138029374277, "learning_rate": 1.006786805075752e-06, "loss": 0.45, "step": 18207 }, { "epoch": 0.9825697479898549, "grad_norm": 1.2245529047345556, "learning_rate": 1.006744986320954e-06, "loss": 0.6005, "step": 18208 }, { "epoch": 0.9826237116183693, "grad_norm": 0.9026483170159882, "learning_rate": 1.0067032967068455e-06, "loss": 0.3546, "step": 18209 }, { "epoch": 0.9826776752468837, "grad_norm": 1.1038519195075662, "learning_rate": 1.0066617362346238e-06, "loss": 0.4307, "step": 18210 }, { "epoch": 0.9827316388753979, "grad_norm": 0.9008630562636736, "learning_rate": 1.0066203049054843e-06, "loss": 0.4698, "step": 18211 }, { "epoch": 0.9827856025039123, "grad_norm": 1.0777629658208496, "learning_rate": 1.0065790027206176e-06, "loss": 0.4676, "step": 18212 }, { "epoch": 0.9828395661324267, "grad_norm": 1.1308371782448063, "learning_rate": 1.006537829681211e-06, "loss": 0.4926, "step": 18213 }, { "epoch": 0.9828935297609411, "grad_norm": 1.0564011279118206, "learning_rate": 1.0064967857884463e-06, "loss": 0.4305, "step": 18214 }, { "epoch": 0.9829474933894555, "grad_norm": 1.0803578712037736, "learning_rate": 1.0064558710435052e-06, "loss": 0.4842, "step": 18215 }, { "epoch": 0.9830014570179699, "grad_norm": 0.8734324500360199, "learning_rate": 1.0064150854475627e-06, "loss": 0.2905, "step": 18216 }, { "epoch": 0.9830554206464843, "grad_norm": 1.0700042510862018, "learning_rate": 1.0063744290017907e-06, "loss": 0.4753, "step": 18217 }, { "epoch": 0.9831093842749986, "grad_norm": 1.0044028723119616, "learning_rate": 1.0063339017073588e-06, "loss": 0.3678, "step": 18218 }, { "epoch": 0.983163347903513, "grad_norm": 0.8365898435909265, "learning_rate": 1.0062935035654305e-06, "loss": 0.3748, "step": 18219 }, { "epoch": 0.9832173115320274, "grad_norm": 1.1914293477419051, "learning_rate": 1.0062532345771678e-06, "loss": 0.4886, "step": 18220 }, { "epoch": 0.9832712751605418, "grad_norm": 1.0647671882819592, "learning_rate": 1.0062130947437272e-06, "loss": 0.4334, "step": 18221 }, { "epoch": 0.9833252387890562, "grad_norm": 1.034577191696908, "learning_rate": 1.0061730840662634e-06, "loss": 0.4641, "step": 18222 }, { "epoch": 0.9833792024175706, "grad_norm": 1.2306640220021285, "learning_rate": 1.0061332025459254e-06, "loss": 0.4458, "step": 18223 }, { "epoch": 0.9834331660460849, "grad_norm": 1.112177325256744, "learning_rate": 1.00609345018386e-06, "loss": 0.5474, "step": 18224 }, { "epoch": 0.9834871296745993, "grad_norm": 1.0437642462191972, "learning_rate": 1.0060538269812102e-06, "loss": 0.378, "step": 18225 }, { "epoch": 0.9835410933031137, "grad_norm": 1.2021338265143033, "learning_rate": 1.0060143329391136e-06, "loss": 0.466, "step": 18226 }, { "epoch": 0.9835950569316281, "grad_norm": 1.0398276269324418, "learning_rate": 1.0059749680587062e-06, "loss": 0.4721, "step": 18227 }, { "epoch": 0.9836490205601425, "grad_norm": 0.9692963770283574, "learning_rate": 1.0059357323411188e-06, "loss": 0.4055, "step": 18228 }, { "epoch": 0.9837029841886569, "grad_norm": 1.2000271330654384, "learning_rate": 1.0058966257874795e-06, "loss": 0.3847, "step": 18229 }, { "epoch": 0.9837569478171713, "grad_norm": 1.0512583845130368, "learning_rate": 1.0058576483989122e-06, "loss": 0.3482, "step": 18230 }, { "epoch": 0.9838109114456856, "grad_norm": 1.1243446046343173, "learning_rate": 1.005818800176537e-06, "loss": 0.4107, "step": 18231 }, { "epoch": 0.9838648750742, "grad_norm": 1.1610840379514549, "learning_rate": 1.0057800811214707e-06, "loss": 0.4495, "step": 18232 }, { "epoch": 0.9839188387027143, "grad_norm": 0.9316836874987512, "learning_rate": 1.005741491234826e-06, "loss": 0.4049, "step": 18233 }, { "epoch": 0.9839728023312287, "grad_norm": 1.1134203056649714, "learning_rate": 1.0057030305177118e-06, "loss": 0.4329, "step": 18234 }, { "epoch": 0.9840267659597431, "grad_norm": 1.1743277057931736, "learning_rate": 1.0056646989712334e-06, "loss": 0.5086, "step": 18235 }, { "epoch": 0.9840807295882575, "grad_norm": 0.9216401843672322, "learning_rate": 1.0056264965964932e-06, "loss": 0.3802, "step": 18236 }, { "epoch": 0.9841346932167719, "grad_norm": 0.8592541274796697, "learning_rate": 1.0055884233945886e-06, "loss": 0.3381, "step": 18237 }, { "epoch": 0.9841886568452862, "grad_norm": 0.8829869270783117, "learning_rate": 1.0055504793666139e-06, "loss": 0.3898, "step": 18238 }, { "epoch": 0.9842426204738006, "grad_norm": 0.8629232867351901, "learning_rate": 1.0055126645136596e-06, "loss": 0.3301, "step": 18239 }, { "epoch": 0.984296584102315, "grad_norm": 0.9050872495089389, "learning_rate": 1.0054749788368129e-06, "loss": 0.3133, "step": 18240 }, { "epoch": 0.9843505477308294, "grad_norm": 0.814666072193666, "learning_rate": 1.0054374223371565e-06, "loss": 0.3693, "step": 18241 }, { "epoch": 0.9844045113593438, "grad_norm": 0.9396071443650622, "learning_rate": 1.00539999501577e-06, "loss": 0.3867, "step": 18242 }, { "epoch": 0.9844584749878582, "grad_norm": 1.0118339830823415, "learning_rate": 1.0053626968737292e-06, "loss": 0.4123, "step": 18243 }, { "epoch": 0.9845124386163726, "grad_norm": 1.0145481664941927, "learning_rate": 1.005325527912106e-06, "loss": 0.5768, "step": 18244 }, { "epoch": 0.9845664022448869, "grad_norm": 0.8696590983676783, "learning_rate": 1.005288488131969e-06, "loss": 0.3358, "step": 18245 }, { "epoch": 0.9846203658734013, "grad_norm": 0.8980830216114041, "learning_rate": 1.0052515775343822e-06, "loss": 0.3976, "step": 18246 }, { "epoch": 0.9846743295019157, "grad_norm": 1.0478407376333694, "learning_rate": 1.0052147961204062e-06, "loss": 0.349, "step": 18247 }, { "epoch": 0.9847282931304301, "grad_norm": 1.1066506661839204, "learning_rate": 1.005178143891099e-06, "loss": 0.5298, "step": 18248 }, { "epoch": 0.9847822567589445, "grad_norm": 1.093684639579365, "learning_rate": 1.0051416208475135e-06, "loss": 0.5162, "step": 18249 }, { "epoch": 0.9848362203874589, "grad_norm": 1.2256587780017763, "learning_rate": 1.0051052269906997e-06, "loss": 0.6069, "step": 18250 }, { "epoch": 0.9848901840159733, "grad_norm": 0.9593915962190848, "learning_rate": 1.0050689623217035e-06, "loss": 0.4657, "step": 18251 }, { "epoch": 0.9849441476444876, "grad_norm": 1.0962049463225507, "learning_rate": 1.005032826841567e-06, "loss": 0.5599, "step": 18252 }, { "epoch": 0.984998111273002, "grad_norm": 1.0583406537142928, "learning_rate": 1.004996820551329e-06, "loss": 0.5445, "step": 18253 }, { "epoch": 0.9850520749015164, "grad_norm": 1.0345727799297262, "learning_rate": 1.004960943452024e-06, "loss": 0.3963, "step": 18254 }, { "epoch": 0.9851060385300308, "grad_norm": 1.308932196241908, "learning_rate": 1.0049251955446834e-06, "loss": 0.6512, "step": 18255 }, { "epoch": 0.9851600021585452, "grad_norm": 1.1085152381252514, "learning_rate": 1.004889576830335e-06, "loss": 0.5703, "step": 18256 }, { "epoch": 0.9852139657870596, "grad_norm": 1.2414541017410305, "learning_rate": 1.0048540873100019e-06, "loss": 0.6488, "step": 18257 }, { "epoch": 0.985267929415574, "grad_norm": 1.1522782318195677, "learning_rate": 1.004818726984704e-06, "loss": 0.5326, "step": 18258 }, { "epoch": 0.9853218930440882, "grad_norm": 1.164448385879329, "learning_rate": 1.0047834958554586e-06, "loss": 0.4495, "step": 18259 }, { "epoch": 0.9853758566726026, "grad_norm": 1.0209892377338907, "learning_rate": 1.0047483939232771e-06, "loss": 0.3622, "step": 18260 }, { "epoch": 0.985429820301117, "grad_norm": 1.230433249042254, "learning_rate": 1.0047134211891691e-06, "loss": 0.5905, "step": 18261 }, { "epoch": 0.9854837839296314, "grad_norm": 0.9850723713908215, "learning_rate": 1.0046785776541392e-06, "loss": 0.5285, "step": 18262 }, { "epoch": 0.9855377475581458, "grad_norm": 1.1918676048872154, "learning_rate": 1.0046438633191896e-06, "loss": 0.4449, "step": 18263 }, { "epoch": 0.9855917111866602, "grad_norm": 1.0949680205912922, "learning_rate": 1.0046092781853173e-06, "loss": 0.4855, "step": 18264 }, { "epoch": 0.9856456748151746, "grad_norm": 1.1016646318917303, "learning_rate": 1.004574822253517e-06, "loss": 0.6209, "step": 18265 }, { "epoch": 0.9856996384436889, "grad_norm": 1.0480242424525588, "learning_rate": 1.0045404955247787e-06, "loss": 0.4905, "step": 18266 }, { "epoch": 0.9857536020722033, "grad_norm": 0.9649795113407106, "learning_rate": 1.004506298000088e-06, "loss": 0.4235, "step": 18267 }, { "epoch": 0.9858075657007177, "grad_norm": 1.0961290326494726, "learning_rate": 1.0044722296804299e-06, "loss": 0.509, "step": 18268 }, { "epoch": 0.9858615293292321, "grad_norm": 0.9931244635726947, "learning_rate": 1.0044382905667814e-06, "loss": 0.4623, "step": 18269 }, { "epoch": 0.9859154929577465, "grad_norm": 0.8983103031769314, "learning_rate": 1.0044044806601188e-06, "loss": 0.3958, "step": 18270 }, { "epoch": 0.9859694565862609, "grad_norm": 0.9824877709717669, "learning_rate": 1.0043707999614145e-06, "loss": 0.3334, "step": 18271 }, { "epoch": 0.9860234202147753, "grad_norm": 1.1206280975716212, "learning_rate": 1.0043372484716354e-06, "loss": 0.5302, "step": 18272 }, { "epoch": 0.9860773838432896, "grad_norm": 0.850834284070151, "learning_rate": 1.0043038261917468e-06, "loss": 0.3346, "step": 18273 }, { "epoch": 0.986131347471804, "grad_norm": 0.9432469932889882, "learning_rate": 1.0042705331227082e-06, "loss": 0.4346, "step": 18274 }, { "epoch": 0.9861853111003184, "grad_norm": 1.1029279909154361, "learning_rate": 1.0042373692654773e-06, "loss": 0.4031, "step": 18275 }, { "epoch": 0.9862392747288328, "grad_norm": 0.9813679784605401, "learning_rate": 1.0042043346210074e-06, "loss": 0.4045, "step": 18276 }, { "epoch": 0.9862932383573472, "grad_norm": 1.1138866055146197, "learning_rate": 1.004171429190247e-06, "loss": 0.5222, "step": 18277 }, { "epoch": 0.9863472019858616, "grad_norm": 0.9178131726245456, "learning_rate": 1.0041386529741428e-06, "loss": 0.2554, "step": 18278 }, { "epoch": 0.986401165614376, "grad_norm": 1.2169431313211292, "learning_rate": 1.0041060059736363e-06, "loss": 0.5749, "step": 18279 }, { "epoch": 0.9864551292428902, "grad_norm": 0.9078935693463321, "learning_rate": 1.0040734881896664e-06, "loss": 0.3543, "step": 18280 }, { "epoch": 0.9865090928714046, "grad_norm": 1.1620783499335867, "learning_rate": 1.0040410996231672e-06, "loss": 0.4035, "step": 18281 }, { "epoch": 0.986563056499919, "grad_norm": 1.014865994007913, "learning_rate": 1.004008840275069e-06, "loss": 0.4407, "step": 18282 }, { "epoch": 0.9866170201284334, "grad_norm": 0.9784484360173267, "learning_rate": 1.0039767101462998e-06, "loss": 0.3863, "step": 18283 }, { "epoch": 0.9866709837569478, "grad_norm": 1.06214586436164, "learning_rate": 1.0039447092377832e-06, "loss": 0.4745, "step": 18284 }, { "epoch": 0.9867249473854622, "grad_norm": 1.103956061930172, "learning_rate": 1.0039128375504383e-06, "loss": 0.4326, "step": 18285 }, { "epoch": 0.9867789110139766, "grad_norm": 1.1560465020155366, "learning_rate": 1.0038810950851816e-06, "loss": 0.5157, "step": 18286 }, { "epoch": 0.9868328746424909, "grad_norm": 0.8174886292650656, "learning_rate": 1.003849481842925e-06, "loss": 0.3161, "step": 18287 }, { "epoch": 0.9868868382710053, "grad_norm": 0.9765321252278635, "learning_rate": 1.0038179978245777e-06, "loss": 0.446, "step": 18288 }, { "epoch": 0.9869408018995197, "grad_norm": 1.0605564076666492, "learning_rate": 1.0037866430310437e-06, "loss": 0.5177, "step": 18289 }, { "epoch": 0.9869947655280341, "grad_norm": 1.0395547023376426, "learning_rate": 1.0037554174632251e-06, "loss": 0.4179, "step": 18290 }, { "epoch": 0.9870487291565485, "grad_norm": 1.0823578565099994, "learning_rate": 1.0037243211220187e-06, "loss": 0.4319, "step": 18291 }, { "epoch": 0.9871026927850629, "grad_norm": 0.9009497451485586, "learning_rate": 1.0036933540083183e-06, "loss": 0.3799, "step": 18292 }, { "epoch": 0.9871566564135772, "grad_norm": 1.1257290424317652, "learning_rate": 1.0036625161230146e-06, "loss": 0.5548, "step": 18293 }, { "epoch": 0.9872106200420916, "grad_norm": 1.0618977617744, "learning_rate": 1.003631807466993e-06, "loss": 0.4447, "step": 18294 }, { "epoch": 0.987264583670606, "grad_norm": 1.0919200086610794, "learning_rate": 1.0036012280411366e-06, "loss": 0.4259, "step": 18295 }, { "epoch": 0.9873185472991204, "grad_norm": 1.0753906018115966, "learning_rate": 1.0035707778463244e-06, "loss": 0.4556, "step": 18296 }, { "epoch": 0.9873725109276348, "grad_norm": 1.267899617075028, "learning_rate": 1.0035404568834308e-06, "loss": 0.4891, "step": 18297 }, { "epoch": 0.9874264745561492, "grad_norm": 0.8983139915447558, "learning_rate": 1.0035102651533285e-06, "loss": 0.4394, "step": 18298 }, { "epoch": 0.9874804381846636, "grad_norm": 1.3557085263618613, "learning_rate": 1.0034802026568842e-06, "loss": 0.5345, "step": 18299 }, { "epoch": 0.9875344018131779, "grad_norm": 1.199900587713111, "learning_rate": 1.003450269394962e-06, "loss": 0.5055, "step": 18300 }, { "epoch": 0.9875883654416923, "grad_norm": 1.0543647407175287, "learning_rate": 1.003420465368423e-06, "loss": 0.4389, "step": 18301 }, { "epoch": 0.9876423290702067, "grad_norm": 0.9726110282002045, "learning_rate": 1.003390790578123e-06, "loss": 0.3642, "step": 18302 }, { "epoch": 0.9876962926987211, "grad_norm": 1.1788532230638067, "learning_rate": 1.0033612450249152e-06, "loss": 0.4348, "step": 18303 }, { "epoch": 0.9877502563272355, "grad_norm": 1.026343981519351, "learning_rate": 1.0033318287096487e-06, "loss": 0.4196, "step": 18304 }, { "epoch": 0.9878042199557499, "grad_norm": 0.9041360507158261, "learning_rate": 1.0033025416331692e-06, "loss": 0.4252, "step": 18305 }, { "epoch": 0.9878581835842642, "grad_norm": 0.7729073542744118, "learning_rate": 1.0032733837963182e-06, "loss": 0.3015, "step": 18306 }, { "epoch": 0.9879121472127785, "grad_norm": 0.9851343341420472, "learning_rate": 1.0032443551999335e-06, "loss": 0.4195, "step": 18307 }, { "epoch": 0.9879661108412929, "grad_norm": 0.9849982239090818, "learning_rate": 1.0032154558448502e-06, "loss": 0.4183, "step": 18308 }, { "epoch": 0.9880200744698073, "grad_norm": 1.1344501365313695, "learning_rate": 1.0031866857318975e-06, "loss": 0.5231, "step": 18309 }, { "epoch": 0.9880740380983217, "grad_norm": 0.9729465678746738, "learning_rate": 1.003158044861904e-06, "loss": 0.3778, "step": 18310 }, { "epoch": 0.9881280017268361, "grad_norm": 1.1666556995258823, "learning_rate": 1.0031295332356914e-06, "loss": 0.4016, "step": 18311 }, { "epoch": 0.9881819653553505, "grad_norm": 1.0216438191854136, "learning_rate": 1.00310115085408e-06, "loss": 0.4335, "step": 18312 }, { "epoch": 0.9882359289838649, "grad_norm": 1.2932886889274218, "learning_rate": 1.0030728977178854e-06, "loss": 0.7647, "step": 18313 }, { "epoch": 0.9882898926123792, "grad_norm": 0.9643036938347801, "learning_rate": 1.0030447738279195e-06, "loss": 0.5105, "step": 18314 }, { "epoch": 0.9883438562408936, "grad_norm": 0.8920087782880494, "learning_rate": 1.0030167791849907e-06, "loss": 0.3923, "step": 18315 }, { "epoch": 0.988397819869408, "grad_norm": 1.3145218527508202, "learning_rate": 1.0029889137899034e-06, "loss": 0.5274, "step": 18316 }, { "epoch": 0.9884517834979224, "grad_norm": 1.0744809346904274, "learning_rate": 1.0029611776434588e-06, "loss": 0.4969, "step": 18317 }, { "epoch": 0.9885057471264368, "grad_norm": 0.9080886806558113, "learning_rate": 1.002933570746454e-06, "loss": 0.3739, "step": 18318 }, { "epoch": 0.9885597107549512, "grad_norm": 1.0392965399430079, "learning_rate": 1.002906093099682e-06, "loss": 0.4986, "step": 18319 }, { "epoch": 0.9886136743834656, "grad_norm": 1.2253051490616929, "learning_rate": 1.0028787447039328e-06, "loss": 0.5424, "step": 18320 }, { "epoch": 0.9886676380119799, "grad_norm": 1.0781878166388663, "learning_rate": 1.0028515255599927e-06, "loss": 0.5953, "step": 18321 }, { "epoch": 0.9887216016404943, "grad_norm": 0.9586334071939676, "learning_rate": 1.0028244356686435e-06, "loss": 0.3556, "step": 18322 }, { "epoch": 0.9887755652690087, "grad_norm": 1.143543690527548, "learning_rate": 1.0027974750306643e-06, "loss": 0.5412, "step": 18323 }, { "epoch": 0.9888295288975231, "grad_norm": 1.2168848366629923, "learning_rate": 1.0027706436468302e-06, "loss": 0.3527, "step": 18324 }, { "epoch": 0.9888834925260375, "grad_norm": 0.8038001885478543, "learning_rate": 1.002743941517911e-06, "loss": 0.3098, "step": 18325 }, { "epoch": 0.9889374561545519, "grad_norm": 1.1501976199462125, "learning_rate": 1.002717368644676e-06, "loss": 0.5583, "step": 18326 }, { "epoch": 0.9889914197830663, "grad_norm": 1.0458674390584841, "learning_rate": 1.0026909250278875e-06, "loss": 0.3512, "step": 18327 }, { "epoch": 0.9890453834115805, "grad_norm": 0.9215082740320958, "learning_rate": 1.0026646106683061e-06, "loss": 0.5135, "step": 18328 }, { "epoch": 0.989099347040095, "grad_norm": 0.9944481128050141, "learning_rate": 1.0026384255666876e-06, "loss": 0.3901, "step": 18329 }, { "epoch": 0.9891533106686093, "grad_norm": 1.0945692553158084, "learning_rate": 1.0026123697237854e-06, "loss": 0.4885, "step": 18330 }, { "epoch": 0.9892072742971237, "grad_norm": 1.0166013518014263, "learning_rate": 1.0025864431403483e-06, "loss": 0.5264, "step": 18331 }, { "epoch": 0.9892612379256381, "grad_norm": 1.184214513389542, "learning_rate": 1.0025606458171206e-06, "loss": 0.5963, "step": 18332 }, { "epoch": 0.9893152015541525, "grad_norm": 1.139553714869688, "learning_rate": 1.0025349777548445e-06, "loss": 0.515, "step": 18333 }, { "epoch": 0.9893691651826669, "grad_norm": 1.0193654634684444, "learning_rate": 1.0025094389542579e-06, "loss": 0.5054, "step": 18334 }, { "epoch": 0.9894231288111812, "grad_norm": 1.1947542964153786, "learning_rate": 1.0024840294160936e-06, "loss": 0.5128, "step": 18335 }, { "epoch": 0.9894770924396956, "grad_norm": 0.8995876039764699, "learning_rate": 1.0024587491410834e-06, "loss": 0.3897, "step": 18336 }, { "epoch": 0.98953105606821, "grad_norm": 0.8578067122970012, "learning_rate": 1.0024335981299525e-06, "loss": 0.4387, "step": 18337 }, { "epoch": 0.9895850196967244, "grad_norm": 1.1837956999771444, "learning_rate": 1.0024085763834248e-06, "loss": 0.6932, "step": 18338 }, { "epoch": 0.9896389833252388, "grad_norm": 1.1535021712522362, "learning_rate": 1.002383683902219e-06, "loss": 0.5652, "step": 18339 }, { "epoch": 0.9896929469537532, "grad_norm": 1.2514453156549379, "learning_rate": 1.002358920687051e-06, "loss": 0.6481, "step": 18340 }, { "epoch": 0.9897469105822676, "grad_norm": 1.2411429186322565, "learning_rate": 1.002334286738632e-06, "loss": 0.525, "step": 18341 }, { "epoch": 0.9898008742107819, "grad_norm": 1.0740097857084852, "learning_rate": 1.00230978205767e-06, "loss": 0.5156, "step": 18342 }, { "epoch": 0.9898548378392963, "grad_norm": 1.103589238652413, "learning_rate": 1.0022854066448694e-06, "loss": 0.5069, "step": 18343 }, { "epoch": 0.9899088014678107, "grad_norm": 0.9663477784777028, "learning_rate": 1.0022611605009309e-06, "loss": 0.3375, "step": 18344 }, { "epoch": 0.9899627650963251, "grad_norm": 0.8746129084834625, "learning_rate": 1.0022370436265513e-06, "loss": 0.2935, "step": 18345 }, { "epoch": 0.9900167287248395, "grad_norm": 0.9755198597388265, "learning_rate": 1.0022130560224238e-06, "loss": 0.4055, "step": 18346 }, { "epoch": 0.9900706923533539, "grad_norm": 0.8641563753235636, "learning_rate": 1.0021891976892377e-06, "loss": 0.3442, "step": 18347 }, { "epoch": 0.9901246559818683, "grad_norm": 1.1223832691252933, "learning_rate": 1.0021654686276785e-06, "loss": 0.4355, "step": 18348 }, { "epoch": 0.9901786196103826, "grad_norm": 1.0096865343119064, "learning_rate": 1.0021418688384287e-06, "loss": 0.3916, "step": 18349 }, { "epoch": 0.990232583238897, "grad_norm": 0.9938323241284259, "learning_rate": 1.0021183983221662e-06, "loss": 0.4249, "step": 18350 }, { "epoch": 0.9902865468674114, "grad_norm": 0.9770875401195024, "learning_rate": 1.002095057079566e-06, "loss": 0.3578, "step": 18351 }, { "epoch": 0.9903405104959258, "grad_norm": 1.0376236197147999, "learning_rate": 1.0020718451112984e-06, "loss": 0.4475, "step": 18352 }, { "epoch": 0.9903944741244401, "grad_norm": 0.901504593003279, "learning_rate": 1.002048762418031e-06, "loss": 0.4348, "step": 18353 }, { "epoch": 0.9904484377529545, "grad_norm": 1.1533133356232634, "learning_rate": 1.002025809000427e-06, "loss": 0.5228, "step": 18354 }, { "epoch": 0.9905024013814689, "grad_norm": 1.2358395099757273, "learning_rate": 1.002002984859146e-06, "loss": 0.6421, "step": 18355 }, { "epoch": 0.9905563650099832, "grad_norm": 1.0244787176520822, "learning_rate": 1.0019802899948443e-06, "loss": 0.502, "step": 18356 }, { "epoch": 0.9906103286384976, "grad_norm": 1.23294297739529, "learning_rate": 1.0019577244081736e-06, "loss": 0.6117, "step": 18357 }, { "epoch": 0.990664292267012, "grad_norm": 0.8909486683820619, "learning_rate": 1.0019352880997832e-06, "loss": 0.3499, "step": 18358 }, { "epoch": 0.9907182558955264, "grad_norm": 1.0525581346518027, "learning_rate": 1.0019129810703171e-06, "loss": 0.4516, "step": 18359 }, { "epoch": 0.9907722195240408, "grad_norm": 0.9040450482318629, "learning_rate": 1.0018908033204178e-06, "loss": 0.3149, "step": 18360 }, { "epoch": 0.9908261831525552, "grad_norm": 1.023266638012293, "learning_rate": 1.0018687548507212e-06, "loss": 0.4593, "step": 18361 }, { "epoch": 0.9908801467810695, "grad_norm": 0.9783588268619413, "learning_rate": 1.0018468356618616e-06, "loss": 0.4835, "step": 18362 }, { "epoch": 0.9909341104095839, "grad_norm": 1.0783952239946148, "learning_rate": 1.0018250457544692e-06, "loss": 0.4296, "step": 18363 }, { "epoch": 0.9909880740380983, "grad_norm": 0.7904294734284498, "learning_rate": 1.00180338512917e-06, "loss": 0.3907, "step": 18364 }, { "epoch": 0.9910420376666127, "grad_norm": 0.9480792552743722, "learning_rate": 1.0017818537865866e-06, "loss": 0.3578, "step": 18365 }, { "epoch": 0.9910960012951271, "grad_norm": 1.1259146636400907, "learning_rate": 1.0017604517273378e-06, "loss": 0.5216, "step": 18366 }, { "epoch": 0.9911499649236415, "grad_norm": 1.127263832397505, "learning_rate": 1.0017391789520385e-06, "loss": 0.4962, "step": 18367 }, { "epoch": 0.9912039285521559, "grad_norm": 1.0390230515977825, "learning_rate": 1.0017180354613007e-06, "loss": 0.483, "step": 18368 }, { "epoch": 0.9912578921806702, "grad_norm": 0.8836978686143901, "learning_rate": 1.0016970212557315e-06, "loss": 0.3925, "step": 18369 }, { "epoch": 0.9913118558091846, "grad_norm": 1.1148368559451598, "learning_rate": 1.0016761363359355e-06, "loss": 0.5086, "step": 18370 }, { "epoch": 0.991365819437699, "grad_norm": 0.8930625394979353, "learning_rate": 1.0016553807025126e-06, "loss": 0.3637, "step": 18371 }, { "epoch": 0.9914197830662134, "grad_norm": 0.95530829572851, "learning_rate": 1.0016347543560587e-06, "loss": 0.4168, "step": 18372 }, { "epoch": 0.9914737466947278, "grad_norm": 0.9848685547687274, "learning_rate": 1.0016142572971676e-06, "loss": 0.3664, "step": 18373 }, { "epoch": 0.9915277103232422, "grad_norm": 1.1034275370258138, "learning_rate": 1.001593889526428e-06, "loss": 0.4282, "step": 18374 }, { "epoch": 0.9915816739517566, "grad_norm": 1.0762335724900403, "learning_rate": 1.001573651044425e-06, "loss": 0.5589, "step": 18375 }, { "epoch": 0.9916356375802708, "grad_norm": 1.0832882694564596, "learning_rate": 1.0015535418517412e-06, "loss": 0.4344, "step": 18376 }, { "epoch": 0.9916896012087852, "grad_norm": 0.9741162958993312, "learning_rate": 1.001533561948954e-06, "loss": 0.4243, "step": 18377 }, { "epoch": 0.9917435648372996, "grad_norm": 1.0538435157374961, "learning_rate": 1.001513711336637e-06, "loss": 0.3754, "step": 18378 }, { "epoch": 0.991797528465814, "grad_norm": 0.9393132399666979, "learning_rate": 1.0014939900153614e-06, "loss": 0.4748, "step": 18379 }, { "epoch": 0.9918514920943284, "grad_norm": 1.0165722228879244, "learning_rate": 1.0014743979856944e-06, "loss": 0.4148, "step": 18380 }, { "epoch": 0.9919054557228428, "grad_norm": 0.8212015058230271, "learning_rate": 1.0014549352481982e-06, "loss": 0.3125, "step": 18381 }, { "epoch": 0.9919594193513572, "grad_norm": 0.9577020112030686, "learning_rate": 1.0014356018034331e-06, "loss": 0.3572, "step": 18382 }, { "epoch": 0.9920133829798715, "grad_norm": 1.1998388303556253, "learning_rate": 1.001416397651954e-06, "loss": 0.5905, "step": 18383 }, { "epoch": 0.9920673466083859, "grad_norm": 0.7629371555310218, "learning_rate": 1.001397322794313e-06, "loss": 0.3084, "step": 18384 }, { "epoch": 0.9921213102369003, "grad_norm": 1.1516287777766407, "learning_rate": 1.0013783772310584e-06, "loss": 0.5435, "step": 18385 }, { "epoch": 0.9921752738654147, "grad_norm": 0.9510711013953445, "learning_rate": 1.001359560962735e-06, "loss": 0.337, "step": 18386 }, { "epoch": 0.9922292374939291, "grad_norm": 0.9517043076354024, "learning_rate": 1.0013408739898833e-06, "loss": 0.379, "step": 18387 }, { "epoch": 0.9922832011224435, "grad_norm": 1.2029351126352912, "learning_rate": 1.0013223163130402e-06, "loss": 0.6458, "step": 18388 }, { "epoch": 0.9923371647509579, "grad_norm": 1.0955888460614083, "learning_rate": 1.0013038879327394e-06, "loss": 0.4326, "step": 18389 }, { "epoch": 0.9923911283794722, "grad_norm": 1.0751470288464444, "learning_rate": 1.0012855888495108e-06, "loss": 0.4891, "step": 18390 }, { "epoch": 0.9924450920079866, "grad_norm": 0.9190426947517256, "learning_rate": 1.0012674190638795e-06, "loss": 0.3628, "step": 18391 }, { "epoch": 0.992499055636501, "grad_norm": 0.95073847634167, "learning_rate": 1.0012493785763689e-06, "loss": 0.5119, "step": 18392 }, { "epoch": 0.9925530192650154, "grad_norm": 0.930747869874686, "learning_rate": 1.0012314673874961e-06, "loss": 0.3933, "step": 18393 }, { "epoch": 0.9926069828935298, "grad_norm": 0.9499963741104693, "learning_rate": 1.0012136854977772e-06, "loss": 0.3633, "step": 18394 }, { "epoch": 0.9926609465220442, "grad_norm": 1.0858446387932625, "learning_rate": 1.0011960329077223e-06, "loss": 0.5875, "step": 18395 }, { "epoch": 0.9927149101505586, "grad_norm": 1.1518466083744512, "learning_rate": 1.0011785096178393e-06, "loss": 0.5438, "step": 18396 }, { "epoch": 0.9927688737790729, "grad_norm": 1.0811681321936708, "learning_rate": 1.0011611156286311e-06, "loss": 0.4842, "step": 18397 }, { "epoch": 0.9928228374075873, "grad_norm": 0.8843053358299039, "learning_rate": 1.001143850940599e-06, "loss": 0.4117, "step": 18398 }, { "epoch": 0.9928768010361017, "grad_norm": 1.2024688789809779, "learning_rate": 1.0011267155542377e-06, "loss": 0.4346, "step": 18399 }, { "epoch": 0.992930764664616, "grad_norm": 1.2967011364989678, "learning_rate": 1.0011097094700408e-06, "loss": 0.5817, "step": 18400 }, { "epoch": 0.9929847282931304, "grad_norm": 0.7955485266642397, "learning_rate": 1.0010928326884966e-06, "loss": 0.3022, "step": 18401 }, { "epoch": 0.9930386919216448, "grad_norm": 0.923764783683336, "learning_rate": 1.00107608521009e-06, "loss": 0.3615, "step": 18402 }, { "epoch": 0.9930926555501592, "grad_norm": 0.9749643380748793, "learning_rate": 1.0010594670353027e-06, "loss": 0.4051, "step": 18403 }, { "epoch": 0.9931466191786735, "grad_norm": 1.035658140594875, "learning_rate": 1.001042978164612e-06, "loss": 0.5249, "step": 18404 }, { "epoch": 0.9932005828071879, "grad_norm": 1.0568138016088358, "learning_rate": 1.0010266185984921e-06, "loss": 0.4302, "step": 18405 }, { "epoch": 0.9932545464357023, "grad_norm": 1.0740356356009348, "learning_rate": 1.001010388337413e-06, "loss": 0.527, "step": 18406 }, { "epoch": 0.9933085100642167, "grad_norm": 0.9737360580959202, "learning_rate": 1.0009942873818416e-06, "loss": 0.6706, "step": 18407 }, { "epoch": 0.9933624736927311, "grad_norm": 0.6775708065987449, "learning_rate": 1.0009783157322397e-06, "loss": 0.3566, "step": 18408 }, { "epoch": 0.9934164373212455, "grad_norm": 0.9959302803493967, "learning_rate": 1.0009624733890674e-06, "loss": 0.3142, "step": 18409 }, { "epoch": 0.9934704009497599, "grad_norm": 1.0393653993614935, "learning_rate": 1.0009467603527793e-06, "loss": 0.4812, "step": 18410 }, { "epoch": 0.9935243645782742, "grad_norm": 1.1407391525944253, "learning_rate": 1.0009311766238277e-06, "loss": 0.4533, "step": 18411 }, { "epoch": 0.9935783282067886, "grad_norm": 1.1092502471514785, "learning_rate": 1.00091572220266e-06, "loss": 0.5114, "step": 18412 }, { "epoch": 0.993632291835303, "grad_norm": 0.7989078186076607, "learning_rate": 1.0009003970897197e-06, "loss": 0.3002, "step": 18413 }, { "epoch": 0.9936862554638174, "grad_norm": 1.167664944283381, "learning_rate": 1.0008852012854482e-06, "loss": 0.605, "step": 18414 }, { "epoch": 0.9937402190923318, "grad_norm": 1.1305384630048814, "learning_rate": 1.0008701347902827e-06, "loss": 0.5035, "step": 18415 }, { "epoch": 0.9937941827208462, "grad_norm": 0.8674929221047492, "learning_rate": 1.000855197604655e-06, "loss": 0.4271, "step": 18416 }, { "epoch": 0.9938481463493606, "grad_norm": 1.1583306752921834, "learning_rate": 1.0008403897289952e-06, "loss": 0.5807, "step": 18417 }, { "epoch": 0.9939021099778749, "grad_norm": 1.230555165269435, "learning_rate": 1.0008257111637285e-06, "loss": 0.5731, "step": 18418 }, { "epoch": 0.9939560736063893, "grad_norm": 1.0581796193135984, "learning_rate": 1.0008111619092773e-06, "loss": 0.459, "step": 18419 }, { "epoch": 0.9940100372349037, "grad_norm": 0.7622152453154231, "learning_rate": 1.0007967419660592e-06, "loss": 0.3016, "step": 18420 }, { "epoch": 0.9940640008634181, "grad_norm": 1.0353011308919744, "learning_rate": 1.0007824513344886e-06, "loss": 0.3731, "step": 18421 }, { "epoch": 0.9941179644919325, "grad_norm": 1.3480856936603853, "learning_rate": 1.0007682900149766e-06, "loss": 0.5996, "step": 18422 }, { "epoch": 0.9941719281204469, "grad_norm": 1.1174011637319903, "learning_rate": 1.0007542580079302e-06, "loss": 0.5645, "step": 18423 }, { "epoch": 0.9942258917489613, "grad_norm": 0.8257801964247194, "learning_rate": 1.0007403553137524e-06, "loss": 0.2721, "step": 18424 }, { "epoch": 0.9942798553774755, "grad_norm": 1.0408760875791672, "learning_rate": 1.0007265819328437e-06, "loss": 0.3688, "step": 18425 }, { "epoch": 0.9943338190059899, "grad_norm": 1.1270854376399546, "learning_rate": 1.000712937865598e-06, "loss": 0.4614, "step": 18426 }, { "epoch": 0.9943877826345043, "grad_norm": 1.0175077814531839, "learning_rate": 1.0006994231124095e-06, "loss": 0.5991, "step": 18427 }, { "epoch": 0.9944417462630187, "grad_norm": 1.0840588258282462, "learning_rate": 1.0006860376736655e-06, "loss": 0.5761, "step": 18428 }, { "epoch": 0.9944957098915331, "grad_norm": 1.081991988769724, "learning_rate": 1.0006727815497511e-06, "loss": 0.4691, "step": 18429 }, { "epoch": 0.9945496735200475, "grad_norm": 0.9791711874572566, "learning_rate": 1.0006596547410475e-06, "loss": 0.5432, "step": 18430 }, { "epoch": 0.9946036371485618, "grad_norm": 0.934016518513832, "learning_rate": 1.0006466572479311e-06, "loss": 0.4284, "step": 18431 }, { "epoch": 0.9946576007770762, "grad_norm": 1.0471205434015671, "learning_rate": 1.0006337890707764e-06, "loss": 0.4074, "step": 18432 }, { "epoch": 0.9947115644055906, "grad_norm": 0.9709949281442777, "learning_rate": 1.0006210502099527e-06, "loss": 0.439, "step": 18433 }, { "epoch": 0.994765528034105, "grad_norm": 1.0319603788403233, "learning_rate": 1.0006084406658263e-06, "loss": 0.3731, "step": 18434 }, { "epoch": 0.9948194916626194, "grad_norm": 1.1243795166210273, "learning_rate": 1.00059596043876e-06, "loss": 0.3781, "step": 18435 }, { "epoch": 0.9948734552911338, "grad_norm": 0.9512007943250874, "learning_rate": 1.0005836095291116e-06, "loss": 0.3594, "step": 18436 }, { "epoch": 0.9949274189196482, "grad_norm": 1.0383897528421595, "learning_rate": 1.000571387937237e-06, "loss": 0.3928, "step": 18437 }, { "epoch": 0.9949813825481625, "grad_norm": 1.0940583949837202, "learning_rate": 1.0005592956634867e-06, "loss": 0.4174, "step": 18438 }, { "epoch": 0.9950353461766769, "grad_norm": 1.0568149430203366, "learning_rate": 1.0005473327082089e-06, "loss": 0.5858, "step": 18439 }, { "epoch": 0.9950893098051913, "grad_norm": 0.8174669010508476, "learning_rate": 1.0005354990717472e-06, "loss": 0.3311, "step": 18440 }, { "epoch": 0.9951432734337057, "grad_norm": 1.2467594726548978, "learning_rate": 1.0005237947544411e-06, "loss": 0.4504, "step": 18441 }, { "epoch": 0.9951972370622201, "grad_norm": 1.0650601044891532, "learning_rate": 1.000512219756628e-06, "loss": 0.4366, "step": 18442 }, { "epoch": 0.9952512006907345, "grad_norm": 1.2101391844157494, "learning_rate": 1.0005007740786396e-06, "loss": 0.4282, "step": 18443 }, { "epoch": 0.9953051643192489, "grad_norm": 1.0443617491936916, "learning_rate": 1.0004894577208057e-06, "loss": 0.4815, "step": 18444 }, { "epoch": 0.9953591279477632, "grad_norm": 1.1946815732524485, "learning_rate": 1.0004782706834513e-06, "loss": 0.5652, "step": 18445 }, { "epoch": 0.9954130915762776, "grad_norm": 1.1880164362836878, "learning_rate": 1.000467212966898e-06, "loss": 0.4606, "step": 18446 }, { "epoch": 0.995467055204792, "grad_norm": 1.1558755508925032, "learning_rate": 1.0004562845714629e-06, "loss": 0.458, "step": 18447 }, { "epoch": 0.9955210188333063, "grad_norm": 1.0736900782497427, "learning_rate": 1.0004454854974607e-06, "loss": 0.4606, "step": 18448 }, { "epoch": 0.9955749824618207, "grad_norm": 1.2182351023462827, "learning_rate": 1.000434815745202e-06, "loss": 0.3983, "step": 18449 }, { "epoch": 0.9956289460903351, "grad_norm": 1.0677978256505927, "learning_rate": 1.0004242753149934e-06, "loss": 0.4469, "step": 18450 }, { "epoch": 0.9956829097188495, "grad_norm": 0.6395682212083621, "learning_rate": 1.0004138642071371e-06, "loss": 0.2482, "step": 18451 }, { "epoch": 0.9957368733473638, "grad_norm": 0.8296826586991661, "learning_rate": 1.0004035824219334e-06, "loss": 0.2612, "step": 18452 }, { "epoch": 0.9957908369758782, "grad_norm": 1.0048259610996664, "learning_rate": 1.0003934299596769e-06, "loss": 0.4729, "step": 18453 }, { "epoch": 0.9958448006043926, "grad_norm": 0.9542158817059453, "learning_rate": 1.0003834068206597e-06, "loss": 0.4195, "step": 18454 }, { "epoch": 0.995898764232907, "grad_norm": 1.2992264810648746, "learning_rate": 1.0003735130051698e-06, "loss": 0.5161, "step": 18455 }, { "epoch": 0.9959527278614214, "grad_norm": 0.949937494940201, "learning_rate": 1.0003637485134921e-06, "loss": 0.5017, "step": 18456 }, { "epoch": 0.9960066914899358, "grad_norm": 1.0234515795574237, "learning_rate": 1.0003541133459063e-06, "loss": 0.4782, "step": 18457 }, { "epoch": 0.9960606551184502, "grad_norm": 1.0725541641268932, "learning_rate": 1.0003446075026902e-06, "loss": 0.3464, "step": 18458 }, { "epoch": 0.9961146187469645, "grad_norm": 1.2342650459286182, "learning_rate": 1.0003352309841166e-06, "loss": 0.4981, "step": 18459 }, { "epoch": 0.9961685823754789, "grad_norm": 0.8611438178691448, "learning_rate": 1.0003259837904555e-06, "loss": 0.3856, "step": 18460 }, { "epoch": 0.9962225460039933, "grad_norm": 1.0097774627651561, "learning_rate": 1.0003168659219717e-06, "loss": 0.3344, "step": 18461 }, { "epoch": 0.9962765096325077, "grad_norm": 0.95818852402117, "learning_rate": 1.0003078773789279e-06, "loss": 0.446, "step": 18462 }, { "epoch": 0.9963304732610221, "grad_norm": 0.9284324794492886, "learning_rate": 1.0002990181615821e-06, "loss": 0.4524, "step": 18463 }, { "epoch": 0.9963844368895365, "grad_norm": 1.05683498049531, "learning_rate": 1.0002902882701888e-06, "loss": 0.3809, "step": 18464 }, { "epoch": 0.9964384005180509, "grad_norm": 1.0848425931906238, "learning_rate": 1.0002816877049999e-06, "loss": 0.4483, "step": 18465 }, { "epoch": 0.9964923641465652, "grad_norm": 1.2403069683654322, "learning_rate": 1.0002732164662618e-06, "loss": 0.5565, "step": 18466 }, { "epoch": 0.9965463277750796, "grad_norm": 0.9236353907334149, "learning_rate": 1.0002648745542181e-06, "loss": 0.3768, "step": 18467 }, { "epoch": 0.996600291403594, "grad_norm": 0.8173897229028756, "learning_rate": 1.0002566619691082e-06, "loss": 0.3635, "step": 18468 }, { "epoch": 0.9966542550321084, "grad_norm": 1.153126541301487, "learning_rate": 1.0002485787111687e-06, "loss": 0.3295, "step": 18469 }, { "epoch": 0.9967082186606228, "grad_norm": 1.1644339498191476, "learning_rate": 1.0002406247806319e-06, "loss": 0.5345, "step": 18470 }, { "epoch": 0.9967621822891372, "grad_norm": 1.0862242848214658, "learning_rate": 1.0002328001777263e-06, "loss": 0.4913, "step": 18471 }, { "epoch": 0.9968161459176516, "grad_norm": 1.1026482639617785, "learning_rate": 1.0002251049026762e-06, "loss": 0.445, "step": 18472 }, { "epoch": 0.9968701095461658, "grad_norm": 1.0018764522824024, "learning_rate": 1.0002175389557037e-06, "loss": 0.4788, "step": 18473 }, { "epoch": 0.9969240731746802, "grad_norm": 1.0102257144856674, "learning_rate": 1.000210102337026e-06, "loss": 0.4552, "step": 18474 }, { "epoch": 0.9969780368031946, "grad_norm": 1.3933708877797142, "learning_rate": 1.0002027950468564e-06, "loss": 0.6766, "step": 18475 }, { "epoch": 0.997032000431709, "grad_norm": 0.9302169750457506, "learning_rate": 1.0001956170854051e-06, "loss": 0.3355, "step": 18476 }, { "epoch": 0.9970859640602234, "grad_norm": 1.1552161736911235, "learning_rate": 1.0001885684528785e-06, "loss": 0.4796, "step": 18477 }, { "epoch": 0.9971399276887378, "grad_norm": 0.9029034204146128, "learning_rate": 1.0001816491494793e-06, "loss": 0.511, "step": 18478 }, { "epoch": 0.9971938913172522, "grad_norm": 0.8282888823759261, "learning_rate": 1.0001748591754058e-06, "loss": 0.3374, "step": 18479 }, { "epoch": 0.9972478549457665, "grad_norm": 1.2440616319322944, "learning_rate": 1.000168198530854e-06, "loss": 0.701, "step": 18480 }, { "epoch": 0.9973018185742809, "grad_norm": 1.218837811911342, "learning_rate": 1.000161667216015e-06, "loss": 0.4887, "step": 18481 }, { "epoch": 0.9973557822027953, "grad_norm": 0.9913560782834503, "learning_rate": 1.0001552652310763e-06, "loss": 0.3704, "step": 18482 }, { "epoch": 0.9974097458313097, "grad_norm": 1.083338640168778, "learning_rate": 1.0001489925762223e-06, "loss": 0.4631, "step": 18483 }, { "epoch": 0.9974637094598241, "grad_norm": 1.205246622145469, "learning_rate": 1.0001428492516326e-06, "loss": 0.6625, "step": 18484 }, { "epoch": 0.9975176730883385, "grad_norm": 0.8565959092290395, "learning_rate": 1.0001368352574845e-06, "loss": 0.3393, "step": 18485 }, { "epoch": 0.9975716367168529, "grad_norm": 1.0300081874188178, "learning_rate": 1.00013095059395e-06, "loss": 0.5637, "step": 18486 }, { "epoch": 0.9976256003453672, "grad_norm": 0.9834691041930705, "learning_rate": 1.0001251952611991e-06, "loss": 0.414, "step": 18487 }, { "epoch": 0.9976795639738816, "grad_norm": 1.1629181261064778, "learning_rate": 1.0001195692593972e-06, "loss": 0.4638, "step": 18488 }, { "epoch": 0.997733527602396, "grad_norm": 0.9788066965044775, "learning_rate": 1.0001140725887052e-06, "loss": 0.4518, "step": 18489 }, { "epoch": 0.9977874912309104, "grad_norm": 1.2282858037990427, "learning_rate": 1.0001087052492817e-06, "loss": 0.6476, "step": 18490 }, { "epoch": 0.9978414548594248, "grad_norm": 1.16931709141275, "learning_rate": 1.0001034672412809e-06, "loss": 0.511, "step": 18491 }, { "epoch": 0.9978954184879392, "grad_norm": 1.0003444301489588, "learning_rate": 1.0000983585648536e-06, "loss": 0.3795, "step": 18492 }, { "epoch": 0.9979493821164536, "grad_norm": 0.9238154243232823, "learning_rate": 1.000093379220146e-06, "loss": 0.497, "step": 18493 }, { "epoch": 0.9980033457449679, "grad_norm": 1.0165961505919756, "learning_rate": 1.0000885292073011e-06, "loss": 0.42, "step": 18494 }, { "epoch": 0.9980573093734822, "grad_norm": 1.2355485625727491, "learning_rate": 1.0000838085264594e-06, "loss": 0.4648, "step": 18495 }, { "epoch": 0.9981112730019966, "grad_norm": 1.074809869867822, "learning_rate": 1.0000792171777552e-06, "loss": 0.536, "step": 18496 }, { "epoch": 0.998165236630511, "grad_norm": 1.026334321632097, "learning_rate": 1.0000747551613218e-06, "loss": 0.406, "step": 18497 }, { "epoch": 0.9982192002590254, "grad_norm": 1.1312012954783295, "learning_rate": 1.0000704224772864e-06, "loss": 0.5423, "step": 18498 }, { "epoch": 0.9982731638875398, "grad_norm": 0.9747373496655249, "learning_rate": 1.000066219125774e-06, "loss": 0.4517, "step": 18499 }, { "epoch": 0.9983271275160541, "grad_norm": 1.1569161390039195, "learning_rate": 1.000062145106905e-06, "loss": 0.4424, "step": 18500 }, { "epoch": 0.9983271275160541, "eval_loss": 0.526906430721283, "eval_runtime": 163.1155, "eval_samples_per_second": 21.083, "eval_steps_per_second": 0.883, "step": 18500 }, { "epoch": 0.9983810911445685, "grad_norm": 1.163719449613361, "learning_rate": 1.0000582004207968e-06, "loss": 0.4895, "step": 18501 }, { "epoch": 0.9984350547730829, "grad_norm": 1.149728121926013, "learning_rate": 1.0000543850675636e-06, "loss": 0.5625, "step": 18502 }, { "epoch": 0.9984890184015973, "grad_norm": 1.0144566652765594, "learning_rate": 1.0000506990473135e-06, "loss": 0.4175, "step": 18503 }, { "epoch": 0.9985429820301117, "grad_norm": 1.1109847492181069, "learning_rate": 1.0000471423601532e-06, "loss": 0.445, "step": 18504 }, { "epoch": 0.9985969456586261, "grad_norm": 0.9688401126550942, "learning_rate": 1.000043715006185e-06, "loss": 0.4086, "step": 18505 }, { "epoch": 0.9986509092871405, "grad_norm": 0.9361366216141924, "learning_rate": 1.0000404169855076e-06, "loss": 0.4652, "step": 18506 }, { "epoch": 0.9987048729156548, "grad_norm": 1.1167180563340167, "learning_rate": 1.000037248298215e-06, "loss": 0.4973, "step": 18507 }, { "epoch": 0.9987588365441692, "grad_norm": 1.0932232512383238, "learning_rate": 1.0000342089443993e-06, "loss": 0.4951, "step": 18508 }, { "epoch": 0.9988128001726836, "grad_norm": 1.2186056130783893, "learning_rate": 1.000031298924147e-06, "loss": 0.3838, "step": 18509 }, { "epoch": 0.998866763801198, "grad_norm": 1.1255029218921468, "learning_rate": 1.0000285182375422e-06, "loss": 0.5443, "step": 18510 }, { "epoch": 0.9989207274297124, "grad_norm": 0.974934430479258, "learning_rate": 1.0000258668846643e-06, "loss": 0.3418, "step": 18511 }, { "epoch": 0.9989746910582268, "grad_norm": 1.0549676270814177, "learning_rate": 1.0000233448655903e-06, "loss": 0.3935, "step": 18512 }, { "epoch": 0.9990286546867412, "grad_norm": 1.0916380697085497, "learning_rate": 1.0000209521803917e-06, "loss": 0.4953, "step": 18513 }, { "epoch": 0.9990826183152555, "grad_norm": 1.1054779283956757, "learning_rate": 1.0000186888291385e-06, "loss": 0.4542, "step": 18514 }, { "epoch": 0.9991365819437699, "grad_norm": 0.8938320323817286, "learning_rate": 1.000016554811895e-06, "loss": 0.3199, "step": 18515 }, { "epoch": 0.9991905455722843, "grad_norm": 1.0392140901034475, "learning_rate": 1.0000145501287225e-06, "loss": 0.4311, "step": 18516 }, { "epoch": 0.9992445092007987, "grad_norm": 1.0965112771103944, "learning_rate": 1.0000126747796783e-06, "loss": 0.4336, "step": 18517 }, { "epoch": 0.9992984728293131, "grad_norm": 1.0130596924882802, "learning_rate": 1.0000109287648174e-06, "loss": 0.4628, "step": 18518 }, { "epoch": 0.9993524364578275, "grad_norm": 0.9469850989468779, "learning_rate": 1.0000093120841888e-06, "loss": 0.5356, "step": 18519 }, { "epoch": 0.9994064000863419, "grad_norm": 0.7398953533968945, "learning_rate": 1.0000078247378398e-06, "loss": 0.3257, "step": 18520 }, { "epoch": 0.9994603637148561, "grad_norm": 0.9716800188101506, "learning_rate": 1.0000064667258125e-06, "loss": 0.3607, "step": 18521 }, { "epoch": 0.9995143273433705, "grad_norm": 0.8158076879826773, "learning_rate": 1.000005238048147e-06, "loss": 0.2466, "step": 18522 }, { "epoch": 0.9995682909718849, "grad_norm": 0.9051159237310575, "learning_rate": 1.000004138704877e-06, "loss": 0.3083, "step": 18523 }, { "epoch": 0.9996222546003993, "grad_norm": 1.0624751290394137, "learning_rate": 1.0000031686960351e-06, "loss": 0.4858, "step": 18524 }, { "epoch": 0.9996762182289137, "grad_norm": 1.0178736989462844, "learning_rate": 1.0000023280216497e-06, "loss": 0.4919, "step": 18525 }, { "epoch": 0.9997301818574281, "grad_norm": 0.8231678679540493, "learning_rate": 1.0000016166817437e-06, "loss": 0.3226, "step": 18526 }, { "epoch": 0.9997841454859425, "grad_norm": 1.2681969720075554, "learning_rate": 1.0000010346763381e-06, "loss": 0.7385, "step": 18527 }, { "epoch": 0.9998381091144568, "grad_norm": 1.1563989415460567, "learning_rate": 1.00000058200545e-06, "loss": 0.5028, "step": 18528 }, { "epoch": 0.9998920727429712, "grad_norm": 1.0234185229700692, "learning_rate": 1.0000002586690923e-06, "loss": 0.4663, "step": 18529 }, { "epoch": 0.9999460363714856, "grad_norm": 0.9519100534808891, "learning_rate": 1.0000000646672737e-06, "loss": 0.392, "step": 18530 }, { "epoch": 1.0, "grad_norm": 0.8605762284142406, "learning_rate": 1.0000000000000002e-06, "loss": 0.294, "step": 18531 } ], "logging_steps": 1, "max_steps": 18531, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3167434831822848.0, "train_batch_size": 3, "trial_name": null, "trial_params": null }