diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,129533 +1,3 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.2090713025292465, - "eval_steps": 500, - "global_step": 18500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 6.535520554212144e-05, - "grad_norm": 4.307169437408447, - "learning_rate": 1.088139281828074e-08, - "loss": 0.7613, - "step": 1 - }, - { - "epoch": 0.00013071041108424287, - "grad_norm": 4.215761184692383, - "learning_rate": 2.176278563656148e-08, - "loss": 0.668, - "step": 2 - }, - { - "epoch": 0.0001960656166263643, - "grad_norm": 3.614027500152588, - "learning_rate": 3.264417845484222e-08, - "loss": 0.6272, - "step": 3 - }, - { - "epoch": 0.00026142082216848575, - "grad_norm": 3.7208406925201416, - "learning_rate": 4.352557127312296e-08, - "loss": 0.6818, - "step": 4 - }, - { - "epoch": 0.00032677602771060717, - "grad_norm": 3.8955652713775635, - "learning_rate": 5.44069640914037e-08, - "loss": 0.6793, - "step": 5 - }, - { - "epoch": 0.0003921312332527286, - "grad_norm": 3.875957727432251, - "learning_rate": 6.528835690968444e-08, - "loss": 0.6872, - "step": 6 - }, - { - "epoch": 0.00045748643879485, - "grad_norm": 3.9089701175689697, - "learning_rate": 7.616974972796518e-08, - "loss": 0.6867, - "step": 7 - }, - { - "epoch": 0.0005228416443369715, - "grad_norm": 3.8417911529541016, - "learning_rate": 8.705114254624592e-08, - "loss": 0.6714, - "step": 8 - }, - { - "epoch": 0.0005881968498790929, - "grad_norm": 4.269287109375, - "learning_rate": 9.793253536452667e-08, - "loss": 0.7382, - "step": 9 - }, - { - "epoch": 0.0006535520554212143, - "grad_norm": 4.398168087005615, - "learning_rate": 1.088139281828074e-07, - "loss": 0.7119, - "step": 10 - }, - { - "epoch": 0.0007189072609633358, - "grad_norm": 3.9808433055877686, - "learning_rate": 1.1969532100108813e-07, - "loss": 0.6287, - "step": 11 - }, - { - "epoch": 0.0007842624665054572, - "grad_norm": 3.8891844749450684, - "learning_rate": 1.305767138193689e-07, - "loss": 0.7767, - "step": 12 - }, - { - "epoch": 0.0008496176720475786, - "grad_norm": 3.9056179523468018, - "learning_rate": 1.4145810663764961e-07, - "loss": 0.7177, - "step": 13 - }, - { - "epoch": 0.0009149728775897, - "grad_norm": 3.930377960205078, - "learning_rate": 1.5233949945593037e-07, - "loss": 0.6845, - "step": 14 - }, - { - "epoch": 0.0009803280831318215, - "grad_norm": 3.7490644454956055, - "learning_rate": 1.6322089227421112e-07, - "loss": 0.6027, - "step": 15 - }, - { - "epoch": 0.001045683288673943, - "grad_norm": 4.5679473876953125, - "learning_rate": 1.7410228509249185e-07, - "loss": 0.7149, - "step": 16 - }, - { - "epoch": 0.0011110384942160643, - "grad_norm": 4.164201736450195, - "learning_rate": 1.8498367791077258e-07, - "loss": 0.7281, - "step": 17 - }, - { - "epoch": 0.0011763936997581858, - "grad_norm": 4.343791961669922, - "learning_rate": 1.9586507072905333e-07, - "loss": 0.7351, - "step": 18 - }, - { - "epoch": 0.0012417489053003071, - "grad_norm": 4.092403411865234, - "learning_rate": 2.0674646354733408e-07, - "loss": 0.6856, - "step": 19 - }, - { - "epoch": 0.0013071041108424287, - "grad_norm": 4.36989688873291, - "learning_rate": 2.176278563656148e-07, - "loss": 0.7318, - "step": 20 - }, - { - "epoch": 0.00137245931638455, - "grad_norm": 3.744257688522339, - "learning_rate": 2.2850924918389557e-07, - "loss": 0.6401, - "step": 21 - }, - { - "epoch": 0.0014378145219266715, - "grad_norm": 3.8534998893737793, - "learning_rate": 2.3939064200217627e-07, - "loss": 0.6965, - "step": 22 - }, - { - "epoch": 0.0015031697274687928, - "grad_norm": 3.3089194297790527, - "learning_rate": 2.502720348204571e-07, - "loss": 0.6115, - "step": 23 - }, - { - "epoch": 0.0015685249330109144, - "grad_norm": 3.912757158279419, - "learning_rate": 2.611534276387378e-07, - "loss": 0.7333, - "step": 24 - }, - { - "epoch": 0.0016338801385530357, - "grad_norm": 3.9144887924194336, - "learning_rate": 2.7203482045701853e-07, - "loss": 0.6414, - "step": 25 - }, - { - "epoch": 0.0016992353440951572, - "grad_norm": 3.714334011077881, - "learning_rate": 2.8291621327529923e-07, - "loss": 0.6527, - "step": 26 - }, - { - "epoch": 0.0017645905496372785, - "grad_norm": 4.35928201675415, - "learning_rate": 2.9379760609358004e-07, - "loss": 0.6958, - "step": 27 - }, - { - "epoch": 0.0018299457551794, - "grad_norm": 3.618541717529297, - "learning_rate": 3.0467899891186074e-07, - "loss": 0.6722, - "step": 28 - }, - { - "epoch": 0.0018953009607215214, - "grad_norm": 3.630235195159912, - "learning_rate": 3.155603917301415e-07, - "loss": 0.7036, - "step": 29 - }, - { - "epoch": 0.001960656166263643, - "grad_norm": 3.5564510822296143, - "learning_rate": 3.2644178454842224e-07, - "loss": 0.6338, - "step": 30 - }, - { - "epoch": 0.0020260113718057644, - "grad_norm": 3.5693376064300537, - "learning_rate": 3.3732317736670295e-07, - "loss": 0.7431, - "step": 31 - }, - { - "epoch": 0.002091366577347886, - "grad_norm": 3.2249109745025635, - "learning_rate": 3.482045701849837e-07, - "loss": 0.5889, - "step": 32 - }, - { - "epoch": 0.002156721782890007, - "grad_norm": 3.57615065574646, - "learning_rate": 3.590859630032645e-07, - "loss": 0.7309, - "step": 33 - }, - { - "epoch": 0.0022220769884321286, - "grad_norm": 3.233099937438965, - "learning_rate": 3.6996735582154515e-07, - "loss": 0.692, - "step": 34 - }, - { - "epoch": 0.00228743219397425, - "grad_norm": 3.3310513496398926, - "learning_rate": 3.8084874863982596e-07, - "loss": 0.738, - "step": 35 - }, - { - "epoch": 0.0023527873995163717, - "grad_norm": 3.067564010620117, - "learning_rate": 3.9173014145810666e-07, - "loss": 0.6536, - "step": 36 - }, - { - "epoch": 0.0024181426050584928, - "grad_norm": 3.765641689300537, - "learning_rate": 4.026115342763874e-07, - "loss": 0.6706, - "step": 37 - }, - { - "epoch": 0.0024834978106006143, - "grad_norm": 3.6483607292175293, - "learning_rate": 4.1349292709466817e-07, - "loss": 0.7605, - "step": 38 - }, - { - "epoch": 0.002548853016142736, - "grad_norm": 3.1373181343078613, - "learning_rate": 4.2437431991294887e-07, - "loss": 0.6633, - "step": 39 - }, - { - "epoch": 0.0026142082216848573, - "grad_norm": 2.975259304046631, - "learning_rate": 4.352557127312296e-07, - "loss": 0.7719, - "step": 40 - }, - { - "epoch": 0.0026795634272269784, - "grad_norm": 2.4532039165496826, - "learning_rate": 4.461371055495103e-07, - "loss": 0.6455, - "step": 41 - }, - { - "epoch": 0.0027449186327691, - "grad_norm": 2.310720682144165, - "learning_rate": 4.5701849836779113e-07, - "loss": 0.6816, - "step": 42 - }, - { - "epoch": 0.0028102738383112215, - "grad_norm": 2.1589982509613037, - "learning_rate": 4.678998911860719e-07, - "loss": 0.6649, - "step": 43 - }, - { - "epoch": 0.002875629043853343, - "grad_norm": 2.4565489292144775, - "learning_rate": 4.787812840043525e-07, - "loss": 0.7088, - "step": 44 - }, - { - "epoch": 0.002940984249395464, - "grad_norm": 2.1987106800079346, - "learning_rate": 4.896626768226333e-07, - "loss": 0.6978, - "step": 45 - }, - { - "epoch": 0.0030063394549375857, - "grad_norm": 1.9119884967803955, - "learning_rate": 5.005440696409141e-07, - "loss": 0.6952, - "step": 46 - }, - { - "epoch": 0.003071694660479707, - "grad_norm": 2.0680160522460938, - "learning_rate": 5.114254624591948e-07, - "loss": 0.6818, - "step": 47 - }, - { - "epoch": 0.0031370498660218287, - "grad_norm": 1.882829189300537, - "learning_rate": 5.223068552774755e-07, - "loss": 0.6843, - "step": 48 - }, - { - "epoch": 0.0032024050715639503, - "grad_norm": 1.6339185237884521, - "learning_rate": 5.331882480957563e-07, - "loss": 0.5162, - "step": 49 - }, - { - "epoch": 0.0032677602771060714, - "grad_norm": 1.7776230573654175, - "learning_rate": 5.440696409140371e-07, - "loss": 0.6381, - "step": 50 - }, - { - "epoch": 0.003333115482648193, - "grad_norm": 1.873404622077942, - "learning_rate": 5.549510337323178e-07, - "loss": 0.6518, - "step": 51 - }, - { - "epoch": 0.0033984706881903144, - "grad_norm": 1.8278203010559082, - "learning_rate": 5.658324265505985e-07, - "loss": 0.6048, - "step": 52 - }, - { - "epoch": 0.003463825893732436, - "grad_norm": 1.652664303779602, - "learning_rate": 5.767138193688793e-07, - "loss": 0.5727, - "step": 53 - }, - { - "epoch": 0.003529181099274557, - "grad_norm": 1.7577887773513794, - "learning_rate": 5.875952121871601e-07, - "loss": 0.6717, - "step": 54 - }, - { - "epoch": 0.0035945363048166786, - "grad_norm": 1.3795794248580933, - "learning_rate": 5.984766050054407e-07, - "loss": 0.6046, - "step": 55 - }, - { - "epoch": 0.0036598915103588, - "grad_norm": 1.725850224494934, - "learning_rate": 6.093579978237215e-07, - "loss": 0.6673, - "step": 56 - }, - { - "epoch": 0.0037252467159009216, - "grad_norm": 1.3847562074661255, - "learning_rate": 6.202393906420022e-07, - "loss": 0.6046, - "step": 57 - }, - { - "epoch": 0.0037906019214430427, - "grad_norm": 1.551095962524414, - "learning_rate": 6.31120783460283e-07, - "loss": 0.6481, - "step": 58 - }, - { - "epoch": 0.0038559571269851643, - "grad_norm": 1.5927350521087646, - "learning_rate": 6.420021762785637e-07, - "loss": 0.6342, - "step": 59 - }, - { - "epoch": 0.003921312332527286, - "grad_norm": 1.6861894130706787, - "learning_rate": 6.528835690968445e-07, - "loss": 0.7077, - "step": 60 - }, - { - "epoch": 0.003986667538069407, - "grad_norm": 1.5050122737884521, - "learning_rate": 6.637649619151251e-07, - "loss": 0.5497, - "step": 61 - }, - { - "epoch": 0.004052022743611529, - "grad_norm": 1.658370852470398, - "learning_rate": 6.746463547334059e-07, - "loss": 0.7689, - "step": 62 - }, - { - "epoch": 0.00411737794915365, - "grad_norm": 1.7540634870529175, - "learning_rate": 6.855277475516866e-07, - "loss": 0.6374, - "step": 63 - }, - { - "epoch": 0.004182733154695772, - "grad_norm": 1.380056381225586, - "learning_rate": 6.964091403699674e-07, - "loss": 0.5967, - "step": 64 - }, - { - "epoch": 0.004248088360237893, - "grad_norm": 1.303650975227356, - "learning_rate": 7.072905331882482e-07, - "loss": 0.6135, - "step": 65 - }, - { - "epoch": 0.004313443565780014, - "grad_norm": 1.3145781755447388, - "learning_rate": 7.18171926006529e-07, - "loss": 0.5947, - "step": 66 - }, - { - "epoch": 0.004378798771322136, - "grad_norm": 1.2271623611450195, - "learning_rate": 7.290533188248096e-07, - "loss": 0.562, - "step": 67 - }, - { - "epoch": 0.004444153976864257, - "grad_norm": 1.2204347848892212, - "learning_rate": 7.399347116430903e-07, - "loss": 0.6246, - "step": 68 - }, - { - "epoch": 0.004509509182406378, - "grad_norm": 1.298128604888916, - "learning_rate": 7.508161044613712e-07, - "loss": 0.6229, - "step": 69 - }, - { - "epoch": 0.0045748643879485, - "grad_norm": 1.0342358350753784, - "learning_rate": 7.616974972796519e-07, - "loss": 0.5551, - "step": 70 - }, - { - "epoch": 0.004640219593490621, - "grad_norm": 1.2292605638504028, - "learning_rate": 7.725788900979327e-07, - "loss": 0.5936, - "step": 71 - }, - { - "epoch": 0.004705574799032743, - "grad_norm": 1.0556893348693848, - "learning_rate": 7.834602829162133e-07, - "loss": 0.6333, - "step": 72 - }, - { - "epoch": 0.004770930004574864, - "grad_norm": 0.9866349697113037, - "learning_rate": 7.943416757344941e-07, - "loss": 0.6039, - "step": 73 - }, - { - "epoch": 0.0048362852101169855, - "grad_norm": 1.0882861614227295, - "learning_rate": 8.052230685527748e-07, - "loss": 0.5994, - "step": 74 - }, - { - "epoch": 0.0049016404156591075, - "grad_norm": 0.8924552202224731, - "learning_rate": 8.161044613710556e-07, - "loss": 0.5833, - "step": 75 - }, - { - "epoch": 0.004966995621201229, - "grad_norm": 1.0262629985809326, - "learning_rate": 8.269858541893363e-07, - "loss": 0.6139, - "step": 76 - }, - { - "epoch": 0.0050323508267433505, - "grad_norm": 0.7652501463890076, - "learning_rate": 8.37867247007617e-07, - "loss": 0.5224, - "step": 77 - }, - { - "epoch": 0.005097706032285472, - "grad_norm": 0.8548263311386108, - "learning_rate": 8.487486398258977e-07, - "loss": 0.5988, - "step": 78 - }, - { - "epoch": 0.005163061237827593, - "grad_norm": 0.8868620991706848, - "learning_rate": 8.596300326441785e-07, - "loss": 0.6644, - "step": 79 - }, - { - "epoch": 0.005228416443369715, - "grad_norm": 0.7825864553451538, - "learning_rate": 8.705114254624592e-07, - "loss": 0.5369, - "step": 80 - }, - { - "epoch": 0.005293771648911836, - "grad_norm": 0.8478065729141235, - "learning_rate": 8.8139281828074e-07, - "loss": 0.6551, - "step": 81 - }, - { - "epoch": 0.005359126854453957, - "grad_norm": 0.8154911994934082, - "learning_rate": 8.922742110990207e-07, - "loss": 0.6163, - "step": 82 - }, - { - "epoch": 0.005424482059996079, - "grad_norm": 0.8136587142944336, - "learning_rate": 9.031556039173014e-07, - "loss": 0.5972, - "step": 83 - }, - { - "epoch": 0.0054898372655382, - "grad_norm": 0.8032965064048767, - "learning_rate": 9.140369967355823e-07, - "loss": 0.6475, - "step": 84 - }, - { - "epoch": 0.005555192471080322, - "grad_norm": 0.7939515113830566, - "learning_rate": 9.24918389553863e-07, - "loss": 0.6002, - "step": 85 - }, - { - "epoch": 0.005620547676622443, - "grad_norm": 0.8008838295936584, - "learning_rate": 9.357997823721438e-07, - "loss": 0.5833, - "step": 86 - }, - { - "epoch": 0.005685902882164564, - "grad_norm": 0.6839145421981812, - "learning_rate": 9.466811751904245e-07, - "loss": 0.4914, - "step": 87 - }, - { - "epoch": 0.005751258087706686, - "grad_norm": 0.7718969583511353, - "learning_rate": 9.57562568008705e-07, - "loss": 0.5712, - "step": 88 - }, - { - "epoch": 0.005816613293248807, - "grad_norm": 0.6582275032997131, - "learning_rate": 9.68443960826986e-07, - "loss": 0.5437, - "step": 89 - }, - { - "epoch": 0.005881968498790928, - "grad_norm": 0.7192770838737488, - "learning_rate": 9.793253536452666e-07, - "loss": 0.5673, - "step": 90 - }, - { - "epoch": 0.00594732370433305, - "grad_norm": 0.707466185092926, - "learning_rate": 9.902067464635474e-07, - "loss": 0.6058, - "step": 91 - }, - { - "epoch": 0.006012678909875171, - "grad_norm": 0.6474989652633667, - "learning_rate": 1.0010881392818283e-06, - "loss": 0.5328, - "step": 92 - }, - { - "epoch": 0.006078034115417293, - "grad_norm": 0.6916255950927734, - "learning_rate": 1.011969532100109e-06, - "loss": 0.5966, - "step": 93 - }, - { - "epoch": 0.006143389320959414, - "grad_norm": 0.6603280305862427, - "learning_rate": 1.0228509249183896e-06, - "loss": 0.5382, - "step": 94 - }, - { - "epoch": 0.0062087445265015355, - "grad_norm": 0.6775929927825928, - "learning_rate": 1.0337323177366705e-06, - "loss": 0.513, - "step": 95 - }, - { - "epoch": 0.0062740997320436575, - "grad_norm": 0.6800976991653442, - "learning_rate": 1.044613710554951e-06, - "loss": 0.4817, - "step": 96 - }, - { - "epoch": 0.0063394549375857786, - "grad_norm": 0.7037233710289001, - "learning_rate": 1.055495103373232e-06, - "loss": 0.5381, - "step": 97 - }, - { - "epoch": 0.0064048101431279005, - "grad_norm": 0.6074303984642029, - "learning_rate": 1.0663764961915126e-06, - "loss": 0.5317, - "step": 98 - }, - { - "epoch": 0.006470165348670022, - "grad_norm": 0.6007310152053833, - "learning_rate": 1.0772578890097933e-06, - "loss": 0.5476, - "step": 99 - }, - { - "epoch": 0.006535520554212143, - "grad_norm": 0.6698499917984009, - "learning_rate": 1.0881392818280741e-06, - "loss": 0.5226, - "step": 100 - }, - { - "epoch": 0.006600875759754265, - "grad_norm": 0.5805163383483887, - "learning_rate": 1.0990206746463548e-06, - "loss": 0.4965, - "step": 101 - }, - { - "epoch": 0.006666230965296386, - "grad_norm": 0.6301014423370361, - "learning_rate": 1.1099020674646356e-06, - "loss": 0.5327, - "step": 102 - }, - { - "epoch": 0.006731586170838507, - "grad_norm": 0.6079239845275879, - "learning_rate": 1.1207834602829163e-06, - "loss": 0.4963, - "step": 103 - }, - { - "epoch": 0.006796941376380629, - "grad_norm": 0.5865227580070496, - "learning_rate": 1.131664853101197e-06, - "loss": 0.4875, - "step": 104 - }, - { - "epoch": 0.00686229658192275, - "grad_norm": 0.6349737644195557, - "learning_rate": 1.1425462459194778e-06, - "loss": 0.5909, - "step": 105 - }, - { - "epoch": 0.006927651787464872, - "grad_norm": 0.6744226813316345, - "learning_rate": 1.1534276387377586e-06, - "loss": 0.5864, - "step": 106 - }, - { - "epoch": 0.006993006993006993, - "grad_norm": 0.6469442844390869, - "learning_rate": 1.1643090315560393e-06, - "loss": 0.6008, - "step": 107 - }, - { - "epoch": 0.007058362198549114, - "grad_norm": 0.7091807126998901, - "learning_rate": 1.1751904243743201e-06, - "loss": 0.5771, - "step": 108 - }, - { - "epoch": 0.007123717404091236, - "grad_norm": 0.5648328065872192, - "learning_rate": 1.1860718171926008e-06, - "loss": 0.5274, - "step": 109 - }, - { - "epoch": 0.007189072609633357, - "grad_norm": 0.5575926899909973, - "learning_rate": 1.1969532100108814e-06, - "loss": 0.5327, - "step": 110 - }, - { - "epoch": 0.007254427815175479, - "grad_norm": 0.6258563995361328, - "learning_rate": 1.2078346028291623e-06, - "loss": 0.5631, - "step": 111 - }, - { - "epoch": 0.0073197830207176, - "grad_norm": 0.6843926310539246, - "learning_rate": 1.218715995647443e-06, - "loss": 0.5581, - "step": 112 - }, - { - "epoch": 0.007385138226259721, - "grad_norm": 0.5899907946586609, - "learning_rate": 1.2295973884657238e-06, - "loss": 0.5119, - "step": 113 - }, - { - "epoch": 0.007450493431801843, - "grad_norm": 0.5911238789558411, - "learning_rate": 1.2404787812840045e-06, - "loss": 0.5373, - "step": 114 - }, - { - "epoch": 0.007515848637343964, - "grad_norm": 0.5474612712860107, - "learning_rate": 1.251360174102285e-06, - "loss": 0.5233, - "step": 115 - }, - { - "epoch": 0.0075812038428860855, - "grad_norm": 0.551700234413147, - "learning_rate": 1.262241566920566e-06, - "loss": 0.5032, - "step": 116 - }, - { - "epoch": 0.0076465590484282074, - "grad_norm": 0.5410349369049072, - "learning_rate": 1.2731229597388466e-06, - "loss": 0.4826, - "step": 117 - }, - { - "epoch": 0.0077119142539703285, - "grad_norm": 0.5772070288658142, - "learning_rate": 1.2840043525571275e-06, - "loss": 0.5321, - "step": 118 - }, - { - "epoch": 0.0077772694595124505, - "grad_norm": 0.6064501404762268, - "learning_rate": 1.2948857453754083e-06, - "loss": 0.5671, - "step": 119 - }, - { - "epoch": 0.007842624665054572, - "grad_norm": 0.5898416042327881, - "learning_rate": 1.305767138193689e-06, - "loss": 0.5738, - "step": 120 - }, - { - "epoch": 0.007907979870596693, - "grad_norm": 0.5656580924987793, - "learning_rate": 1.3166485310119698e-06, - "loss": 0.479, - "step": 121 - }, - { - "epoch": 0.007973335076138814, - "grad_norm": 0.5723082423210144, - "learning_rate": 1.3275299238302503e-06, - "loss": 0.537, - "step": 122 - }, - { - "epoch": 0.008038690281680937, - "grad_norm": 0.5626292824745178, - "learning_rate": 1.338411316648531e-06, - "loss": 0.4921, - "step": 123 - }, - { - "epoch": 0.008104045487223058, - "grad_norm": 0.5675073862075806, - "learning_rate": 1.3492927094668118e-06, - "loss": 0.4907, - "step": 124 - }, - { - "epoch": 0.008169400692765179, - "grad_norm": 0.6061102151870728, - "learning_rate": 1.3601741022850926e-06, - "loss": 0.5864, - "step": 125 - }, - { - "epoch": 0.0082347558983073, - "grad_norm": 0.5727167725563049, - "learning_rate": 1.3710554951033733e-06, - "loss": 0.547, - "step": 126 - }, - { - "epoch": 0.008300111103849421, - "grad_norm": 0.6190813183784485, - "learning_rate": 1.3819368879216541e-06, - "loss": 0.5945, - "step": 127 - }, - { - "epoch": 0.008365466309391544, - "grad_norm": 0.6756262183189392, - "learning_rate": 1.3928182807399348e-06, - "loss": 0.6157, - "step": 128 - }, - { - "epoch": 0.008430821514933665, - "grad_norm": 0.5898597240447998, - "learning_rate": 1.4036996735582157e-06, - "loss": 0.5841, - "step": 129 - }, - { - "epoch": 0.008496176720475786, - "grad_norm": 0.5485091209411621, - "learning_rate": 1.4145810663764963e-06, - "loss": 0.4964, - "step": 130 - }, - { - "epoch": 0.008561531926017907, - "grad_norm": 0.5897760987281799, - "learning_rate": 1.4254624591947772e-06, - "loss": 0.472, - "step": 131 - }, - { - "epoch": 0.008626887131560028, - "grad_norm": 0.5610101222991943, - "learning_rate": 1.436343852013058e-06, - "loss": 0.5105, - "step": 132 - }, - { - "epoch": 0.00869224233710215, - "grad_norm": 0.6131514310836792, - "learning_rate": 1.4472252448313385e-06, - "loss": 0.5259, - "step": 133 - }, - { - "epoch": 0.008757597542644272, - "grad_norm": 0.520524799823761, - "learning_rate": 1.4581066376496191e-06, - "loss": 0.522, - "step": 134 - }, - { - "epoch": 0.008822952748186393, - "grad_norm": 0.5163785815238953, - "learning_rate": 1.4689880304679e-06, - "loss": 0.4493, - "step": 135 - }, - { - "epoch": 0.008888307953728514, - "grad_norm": 0.5651346445083618, - "learning_rate": 1.4798694232861806e-06, - "loss": 0.544, - "step": 136 - }, - { - "epoch": 0.008953663159270635, - "grad_norm": 0.5506546497344971, - "learning_rate": 1.4907508161044615e-06, - "loss": 0.5048, - "step": 137 - }, - { - "epoch": 0.009019018364812757, - "grad_norm": 0.6239061951637268, - "learning_rate": 1.5016322089227423e-06, - "loss": 0.5648, - "step": 138 - }, - { - "epoch": 0.00908437357035488, - "grad_norm": 0.5096587538719177, - "learning_rate": 1.512513601741023e-06, - "loss": 0.4776, - "step": 139 - }, - { - "epoch": 0.009149728775897, - "grad_norm": 0.646660327911377, - "learning_rate": 1.5233949945593038e-06, - "loss": 0.5995, - "step": 140 - }, - { - "epoch": 0.009215083981439122, - "grad_norm": 0.5526559948921204, - "learning_rate": 1.5342763873775845e-06, - "loss": 0.5316, - "step": 141 - }, - { - "epoch": 0.009280439186981243, - "grad_norm": 0.5419862270355225, - "learning_rate": 1.5451577801958654e-06, - "loss": 0.5316, - "step": 142 - }, - { - "epoch": 0.009345794392523364, - "grad_norm": 0.512829065322876, - "learning_rate": 1.5560391730141458e-06, - "loss": 0.4442, - "step": 143 - }, - { - "epoch": 0.009411149598065487, - "grad_norm": 0.48366427421569824, - "learning_rate": 1.5669205658324266e-06, - "loss": 0.4226, - "step": 144 - }, - { - "epoch": 0.009476504803607608, - "grad_norm": 0.5660455226898193, - "learning_rate": 1.5778019586507073e-06, - "loss": 0.4859, - "step": 145 - }, - { - "epoch": 0.009541860009149729, - "grad_norm": 0.5128727555274963, - "learning_rate": 1.5886833514689882e-06, - "loss": 0.4619, - "step": 146 - }, - { - "epoch": 0.00960721521469185, - "grad_norm": 0.5074844360351562, - "learning_rate": 1.5995647442872688e-06, - "loss": 0.4757, - "step": 147 - }, - { - "epoch": 0.009672570420233971, - "grad_norm": 0.4872952699661255, - "learning_rate": 1.6104461371055497e-06, - "loss": 0.4593, - "step": 148 - }, - { - "epoch": 0.009737925625776094, - "grad_norm": 0.5077556371688843, - "learning_rate": 1.6213275299238303e-06, - "loss": 0.445, - "step": 149 - }, - { - "epoch": 0.009803280831318215, - "grad_norm": 0.5790615081787109, - "learning_rate": 1.6322089227421112e-06, - "loss": 0.5307, - "step": 150 - }, - { - "epoch": 0.009868636036860336, - "grad_norm": 0.5457535982131958, - "learning_rate": 1.643090315560392e-06, - "loss": 0.5595, - "step": 151 - }, - { - "epoch": 0.009933991242402457, - "grad_norm": 0.5287249088287354, - "learning_rate": 1.6539717083786727e-06, - "loss": 0.4813, - "step": 152 - }, - { - "epoch": 0.009999346447944578, - "grad_norm": 0.6061781048774719, - "learning_rate": 1.6648531011969535e-06, - "loss": 0.5805, - "step": 153 - }, - { - "epoch": 0.010064701653486701, - "grad_norm": 0.572921633720398, - "learning_rate": 1.675734494015234e-06, - "loss": 0.5791, - "step": 154 - }, - { - "epoch": 0.010130056859028822, - "grad_norm": 0.5639303922653198, - "learning_rate": 1.6866158868335148e-06, - "loss": 0.5556, - "step": 155 - }, - { - "epoch": 0.010195412064570943, - "grad_norm": 0.5342715382575989, - "learning_rate": 1.6974972796517955e-06, - "loss": 0.471, - "step": 156 - }, - { - "epoch": 0.010260767270113064, - "grad_norm": 0.5192899107933044, - "learning_rate": 1.7083786724700763e-06, - "loss": 0.4722, - "step": 157 - }, - { - "epoch": 0.010326122475655185, - "grad_norm": 0.511927604675293, - "learning_rate": 1.719260065288357e-06, - "loss": 0.4848, - "step": 158 - }, - { - "epoch": 0.010391477681197307, - "grad_norm": 0.48781517148017883, - "learning_rate": 1.7301414581066378e-06, - "loss": 0.4455, - "step": 159 - }, - { - "epoch": 0.01045683288673943, - "grad_norm": 0.5133116245269775, - "learning_rate": 1.7410228509249185e-06, - "loss": 0.4575, - "step": 160 - }, - { - "epoch": 0.01052218809228155, - "grad_norm": 0.5270615220069885, - "learning_rate": 1.7519042437431994e-06, - "loss": 0.4897, - "step": 161 - }, - { - "epoch": 0.010587543297823672, - "grad_norm": 0.5281715393066406, - "learning_rate": 1.76278563656148e-06, - "loss": 0.4943, - "step": 162 - }, - { - "epoch": 0.010652898503365793, - "grad_norm": 0.550919771194458, - "learning_rate": 1.7736670293797609e-06, - "loss": 0.5126, - "step": 163 - }, - { - "epoch": 0.010718253708907914, - "grad_norm": 0.5056918859481812, - "learning_rate": 1.7845484221980413e-06, - "loss": 0.4466, - "step": 164 - }, - { - "epoch": 0.010783608914450037, - "grad_norm": 0.5478942394256592, - "learning_rate": 1.7954298150163222e-06, - "loss": 0.4994, - "step": 165 - }, - { - "epoch": 0.010848964119992158, - "grad_norm": 0.6065598130226135, - "learning_rate": 1.8063112078346028e-06, - "loss": 0.535, - "step": 166 - }, - { - "epoch": 0.010914319325534279, - "grad_norm": 0.5542285442352295, - "learning_rate": 1.8171926006528837e-06, - "loss": 0.5126, - "step": 167 - }, - { - "epoch": 0.0109796745310764, - "grad_norm": 0.5846586227416992, - "learning_rate": 1.8280739934711645e-06, - "loss": 0.5165, - "step": 168 - }, - { - "epoch": 0.011045029736618521, - "grad_norm": 0.5893979072570801, - "learning_rate": 1.8389553862894452e-06, - "loss": 0.5743, - "step": 169 - }, - { - "epoch": 0.011110384942160644, - "grad_norm": 0.5135499238967896, - "learning_rate": 1.849836779107726e-06, - "loss": 0.432, - "step": 170 - }, - { - "epoch": 0.011175740147702765, - "grad_norm": 0.5777080059051514, - "learning_rate": 1.8607181719260067e-06, - "loss": 0.5251, - "step": 171 - }, - { - "epoch": 0.011241095353244886, - "grad_norm": 0.5141600370407104, - "learning_rate": 1.8715995647442875e-06, - "loss": 0.4484, - "step": 172 - }, - { - "epoch": 0.011306450558787007, - "grad_norm": 0.5639011263847351, - "learning_rate": 1.8824809575625682e-06, - "loss": 0.519, - "step": 173 - }, - { - "epoch": 0.011371805764329128, - "grad_norm": 0.5748486518859863, - "learning_rate": 1.893362350380849e-06, - "loss": 0.5692, - "step": 174 - }, - { - "epoch": 0.011437160969871251, - "grad_norm": 0.5394601821899414, - "learning_rate": 1.9042437431991295e-06, - "loss": 0.5143, - "step": 175 - }, - { - "epoch": 0.011502516175413372, - "grad_norm": 0.5378672480583191, - "learning_rate": 1.91512513601741e-06, - "loss": 0.4956, - "step": 176 - }, - { - "epoch": 0.011567871380955493, - "grad_norm": 0.5230949521064758, - "learning_rate": 1.926006528835691e-06, - "loss": 0.4408, - "step": 177 - }, - { - "epoch": 0.011633226586497614, - "grad_norm": 0.5612627863883972, - "learning_rate": 1.936887921653972e-06, - "loss": 0.4842, - "step": 178 - }, - { - "epoch": 0.011698581792039735, - "grad_norm": 0.5498820543289185, - "learning_rate": 1.9477693144722527e-06, - "loss": 0.5395, - "step": 179 - }, - { - "epoch": 0.011763936997581857, - "grad_norm": 0.5984400510787964, - "learning_rate": 1.958650707290533e-06, - "loss": 0.4864, - "step": 180 - }, - { - "epoch": 0.01182929220312398, - "grad_norm": 0.5528789162635803, - "learning_rate": 1.969532100108814e-06, - "loss": 0.5236, - "step": 181 - }, - { - "epoch": 0.0118946474086661, - "grad_norm": 0.5016252994537354, - "learning_rate": 1.980413492927095e-06, - "loss": 0.4496, - "step": 182 - }, - { - "epoch": 0.011960002614208222, - "grad_norm": 0.5519850850105286, - "learning_rate": 1.9912948857453757e-06, - "loss": 0.5349, - "step": 183 - }, - { - "epoch": 0.012025357819750343, - "grad_norm": 0.5407207608222961, - "learning_rate": 2.0021762785636566e-06, - "loss": 0.5174, - "step": 184 - }, - { - "epoch": 0.012090713025292464, - "grad_norm": 0.4994608759880066, - "learning_rate": 2.013057671381937e-06, - "loss": 0.4631, - "step": 185 - }, - { - "epoch": 0.012156068230834587, - "grad_norm": 0.548252284526825, - "learning_rate": 2.023939064200218e-06, - "loss": 0.5058, - "step": 186 - }, - { - "epoch": 0.012221423436376708, - "grad_norm": 0.5238258838653564, - "learning_rate": 2.0348204570184983e-06, - "loss": 0.4947, - "step": 187 - }, - { - "epoch": 0.012286778641918829, - "grad_norm": 0.5684877634048462, - "learning_rate": 2.045701849836779e-06, - "loss": 0.5531, - "step": 188 - }, - { - "epoch": 0.01235213384746095, - "grad_norm": 0.5372764468193054, - "learning_rate": 2.05658324265506e-06, - "loss": 0.4918, - "step": 189 - }, - { - "epoch": 0.012417489053003071, - "grad_norm": 0.5252590775489807, - "learning_rate": 2.067464635473341e-06, - "loss": 0.4838, - "step": 190 - }, - { - "epoch": 0.012482844258545194, - "grad_norm": 0.5476294159889221, - "learning_rate": 2.0783460282916213e-06, - "loss": 0.5394, - "step": 191 - }, - { - "epoch": 0.012548199464087315, - "grad_norm": 0.5454583168029785, - "learning_rate": 2.089227421109902e-06, - "loss": 0.5217, - "step": 192 - }, - { - "epoch": 0.012613554669629436, - "grad_norm": 0.5394318103790283, - "learning_rate": 2.100108813928183e-06, - "loss": 0.4354, - "step": 193 - }, - { - "epoch": 0.012678909875171557, - "grad_norm": 0.5456311702728271, - "learning_rate": 2.110990206746464e-06, - "loss": 0.5058, - "step": 194 - }, - { - "epoch": 0.012744265080713678, - "grad_norm": 0.528677225112915, - "learning_rate": 2.1218715995647448e-06, - "loss": 0.4879, - "step": 195 - }, - { - "epoch": 0.012809620286255801, - "grad_norm": 0.5452241897583008, - "learning_rate": 2.1327529923830252e-06, - "loss": 0.522, - "step": 196 - }, - { - "epoch": 0.012874975491797922, - "grad_norm": 0.5905510187149048, - "learning_rate": 2.1436343852013056e-06, - "loss": 0.5218, - "step": 197 - }, - { - "epoch": 0.012940330697340043, - "grad_norm": 0.48563042283058167, - "learning_rate": 2.1545157780195865e-06, - "loss": 0.4231, - "step": 198 - }, - { - "epoch": 0.013005685902882164, - "grad_norm": 0.49927181005477905, - "learning_rate": 2.1653971708378674e-06, - "loss": 0.4182, - "step": 199 - }, - { - "epoch": 0.013071041108424285, - "grad_norm": 0.4863174259662628, - "learning_rate": 2.1762785636561482e-06, - "loss": 0.4262, - "step": 200 - }, - { - "epoch": 0.013136396313966408, - "grad_norm": 0.5405679941177368, - "learning_rate": 2.187159956474429e-06, - "loss": 0.5124, - "step": 201 - }, - { - "epoch": 0.01320175151950853, - "grad_norm": 0.5243266224861145, - "learning_rate": 2.1980413492927095e-06, - "loss": 0.4921, - "step": 202 - }, - { - "epoch": 0.01326710672505065, - "grad_norm": 0.5484087467193604, - "learning_rate": 2.2089227421109904e-06, - "loss": 0.5368, - "step": 203 - }, - { - "epoch": 0.013332461930592772, - "grad_norm": 0.5455852150917053, - "learning_rate": 2.2198041349292712e-06, - "loss": 0.4925, - "step": 204 - }, - { - "epoch": 0.013397817136134893, - "grad_norm": 0.5337474942207336, - "learning_rate": 2.230685527747552e-06, - "loss": 0.4396, - "step": 205 - }, - { - "epoch": 0.013463172341677014, - "grad_norm": 0.5302766561508179, - "learning_rate": 2.2415669205658325e-06, - "loss": 0.4705, - "step": 206 - }, - { - "epoch": 0.013528527547219137, - "grad_norm": 0.5174290537834167, - "learning_rate": 2.2524483133841134e-06, - "loss": 0.4584, - "step": 207 - }, - { - "epoch": 0.013593882752761258, - "grad_norm": 0.545461893081665, - "learning_rate": 2.263329706202394e-06, - "loss": 0.5213, - "step": 208 - }, - { - "epoch": 0.013659237958303379, - "grad_norm": 0.5689192414283752, - "learning_rate": 2.2742110990206747e-06, - "loss": 0.5755, - "step": 209 - }, - { - "epoch": 0.0137245931638455, - "grad_norm": 0.558050274848938, - "learning_rate": 2.2850924918389556e-06, - "loss": 0.4973, - "step": 210 - }, - { - "epoch": 0.013789948369387621, - "grad_norm": 0.5459199547767639, - "learning_rate": 2.2959738846572364e-06, - "loss": 0.4719, - "step": 211 - }, - { - "epoch": 0.013855303574929744, - "grad_norm": 0.5807853937149048, - "learning_rate": 2.3068552774755173e-06, - "loss": 0.5219, - "step": 212 - }, - { - "epoch": 0.013920658780471865, - "grad_norm": 0.5296688675880432, - "learning_rate": 2.3177366702937977e-06, - "loss": 0.4791, - "step": 213 - }, - { - "epoch": 0.013986013986013986, - "grad_norm": 0.5390053391456604, - "learning_rate": 2.3286180631120786e-06, - "loss": 0.4881, - "step": 214 - }, - { - "epoch": 0.014051369191556107, - "grad_norm": 0.5217953324317932, - "learning_rate": 2.3394994559303594e-06, - "loss": 0.4584, - "step": 215 - }, - { - "epoch": 0.014116724397098228, - "grad_norm": 0.5571444034576416, - "learning_rate": 2.3503808487486403e-06, - "loss": 0.5203, - "step": 216 - }, - { - "epoch": 0.014182079602640351, - "grad_norm": 0.49948057532310486, - "learning_rate": 2.3612622415669207e-06, - "loss": 0.4676, - "step": 217 - }, - { - "epoch": 0.014247434808182472, - "grad_norm": 0.5300005674362183, - "learning_rate": 2.3721436343852016e-06, - "loss": 0.4667, - "step": 218 - }, - { - "epoch": 0.014312790013724593, - "grad_norm": 0.5889579653739929, - "learning_rate": 2.383025027203482e-06, - "loss": 0.4937, - "step": 219 - }, - { - "epoch": 0.014378145219266714, - "grad_norm": 0.5454444885253906, - "learning_rate": 2.393906420021763e-06, - "loss": 0.4882, - "step": 220 - }, - { - "epoch": 0.014443500424808835, - "grad_norm": 0.540638267993927, - "learning_rate": 2.4047878128400437e-06, - "loss": 0.4539, - "step": 221 - }, - { - "epoch": 0.014508855630350958, - "grad_norm": 0.5287627577781677, - "learning_rate": 2.4156692056583246e-06, - "loss": 0.5135, - "step": 222 - }, - { - "epoch": 0.01457421083589308, - "grad_norm": 0.4872298836708069, - "learning_rate": 2.426550598476605e-06, - "loss": 0.4097, - "step": 223 - }, - { - "epoch": 0.0146395660414352, - "grad_norm": 0.5071539878845215, - "learning_rate": 2.437431991294886e-06, - "loss": 0.4748, - "step": 224 - }, - { - "epoch": 0.014704921246977322, - "grad_norm": 0.5522286891937256, - "learning_rate": 2.4483133841131668e-06, - "loss": 0.51, - "step": 225 - }, - { - "epoch": 0.014770276452519443, - "grad_norm": 0.50446617603302, - "learning_rate": 2.4591947769314476e-06, - "loss": 0.4605, - "step": 226 - }, - { - "epoch": 0.014835631658061564, - "grad_norm": 0.55495685338974, - "learning_rate": 2.4700761697497285e-06, - "loss": 0.5133, - "step": 227 - }, - { - "epoch": 0.014900986863603687, - "grad_norm": 0.621311366558075, - "learning_rate": 2.480957562568009e-06, - "loss": 0.5095, - "step": 228 - }, - { - "epoch": 0.014966342069145808, - "grad_norm": 0.4835759401321411, - "learning_rate": 2.4918389553862898e-06, - "loss": 0.3879, - "step": 229 - }, - { - "epoch": 0.015031697274687929, - "grad_norm": 0.5162561535835266, - "learning_rate": 2.50272034820457e-06, - "loss": 0.4907, - "step": 230 - }, - { - "epoch": 0.01509705248023005, - "grad_norm": 0.5197626948356628, - "learning_rate": 2.5136017410228515e-06, - "loss": 0.4758, - "step": 231 - }, - { - "epoch": 0.015162407685772171, - "grad_norm": 0.5136451125144958, - "learning_rate": 2.524483133841132e-06, - "loss": 0.4617, - "step": 232 - }, - { - "epoch": 0.015227762891314294, - "grad_norm": 0.5694302320480347, - "learning_rate": 2.5353645266594124e-06, - "loss": 0.5283, - "step": 233 - }, - { - "epoch": 0.015293118096856415, - "grad_norm": 0.588736355304718, - "learning_rate": 2.5462459194776932e-06, - "loss": 0.5085, - "step": 234 - }, - { - "epoch": 0.015358473302398536, - "grad_norm": 0.5073676109313965, - "learning_rate": 2.557127312295974e-06, - "loss": 0.4521, - "step": 235 - }, - { - "epoch": 0.015423828507940657, - "grad_norm": 0.5521355867385864, - "learning_rate": 2.568008705114255e-06, - "loss": 0.4846, - "step": 236 - }, - { - "epoch": 0.015489183713482778, - "grad_norm": 0.5055086016654968, - "learning_rate": 2.5788900979325354e-06, - "loss": 0.4133, - "step": 237 - }, - { - "epoch": 0.015554538919024901, - "grad_norm": 0.5076500773429871, - "learning_rate": 2.5897714907508167e-06, - "loss": 0.4314, - "step": 238 - }, - { - "epoch": 0.015619894124567022, - "grad_norm": 0.512229859828949, - "learning_rate": 2.600652883569097e-06, - "loss": 0.4631, - "step": 239 - }, - { - "epoch": 0.015685249330109143, - "grad_norm": 0.5498212575912476, - "learning_rate": 2.611534276387378e-06, - "loss": 0.4705, - "step": 240 - }, - { - "epoch": 0.015750604535651264, - "grad_norm": 0.5373813509941101, - "learning_rate": 2.6224156692056584e-06, - "loss": 0.508, - "step": 241 - }, - { - "epoch": 0.015815959741193385, - "grad_norm": 0.5663868188858032, - "learning_rate": 2.6332970620239397e-06, - "loss": 0.5417, - "step": 242 - }, - { - "epoch": 0.015881314946735507, - "grad_norm": 0.4976261556148529, - "learning_rate": 2.64417845484222e-06, - "loss": 0.3833, - "step": 243 - }, - { - "epoch": 0.015946670152277628, - "grad_norm": 0.516413152217865, - "learning_rate": 2.6550598476605005e-06, - "loss": 0.4281, - "step": 244 - }, - { - "epoch": 0.01601202535781975, - "grad_norm": 0.5108800530433655, - "learning_rate": 2.6659412404787814e-06, - "loss": 0.4669, - "step": 245 - }, - { - "epoch": 0.016077380563361873, - "grad_norm": 0.517380952835083, - "learning_rate": 2.676822633297062e-06, - "loss": 0.4966, - "step": 246 - }, - { - "epoch": 0.016142735768903994, - "grad_norm": 0.5903849601745605, - "learning_rate": 2.687704026115343e-06, - "loss": 0.5843, - "step": 247 - }, - { - "epoch": 0.016208090974446115, - "grad_norm": 0.5836236476898193, - "learning_rate": 2.6985854189336236e-06, - "loss": 0.5589, - "step": 248 - }, - { - "epoch": 0.016273446179988237, - "grad_norm": 0.5302822589874268, - "learning_rate": 2.7094668117519044e-06, - "loss": 0.4774, - "step": 249 - }, - { - "epoch": 0.016338801385530358, - "grad_norm": 0.48977068066596985, - "learning_rate": 2.7203482045701853e-06, - "loss": 0.4425, - "step": 250 - }, - { - "epoch": 0.01640415659107248, - "grad_norm": 0.4973001778125763, - "learning_rate": 2.731229597388466e-06, - "loss": 0.4404, - "step": 251 - }, - { - "epoch": 0.0164695117966146, - "grad_norm": 0.49313315749168396, - "learning_rate": 2.7421109902067466e-06, - "loss": 0.4513, - "step": 252 - }, - { - "epoch": 0.01653486700215672, - "grad_norm": 0.5417724847793579, - "learning_rate": 2.752992383025028e-06, - "loss": 0.5055, - "step": 253 - }, - { - "epoch": 0.016600222207698842, - "grad_norm": 0.4942086935043335, - "learning_rate": 2.7638737758433083e-06, - "loss": 0.4373, - "step": 254 - }, - { - "epoch": 0.016665577413240963, - "grad_norm": 0.548223078250885, - "learning_rate": 2.7747551686615887e-06, - "loss": 0.5172, - "step": 255 - }, - { - "epoch": 0.016730932618783088, - "grad_norm": 0.5284595489501953, - "learning_rate": 2.7856365614798696e-06, - "loss": 0.4853, - "step": 256 - }, - { - "epoch": 0.01679628782432521, - "grad_norm": 0.4953025281429291, - "learning_rate": 2.79651795429815e-06, - "loss": 0.4207, - "step": 257 - }, - { - "epoch": 0.01686164302986733, - "grad_norm": 0.553777813911438, - "learning_rate": 2.8073993471164313e-06, - "loss": 0.4984, - "step": 258 - }, - { - "epoch": 0.01692699823540945, - "grad_norm": 0.588208019733429, - "learning_rate": 2.8182807399347118e-06, - "loss": 0.5379, - "step": 259 - }, - { - "epoch": 0.016992353440951572, - "grad_norm": 0.509104311466217, - "learning_rate": 2.8291621327529926e-06, - "loss": 0.4606, - "step": 260 - }, - { - "epoch": 0.017057708646493693, - "grad_norm": 0.5887159109115601, - "learning_rate": 2.8400435255712735e-06, - "loss": 0.4976, - "step": 261 - }, - { - "epoch": 0.017123063852035814, - "grad_norm": 0.49038127064704895, - "learning_rate": 2.8509249183895543e-06, - "loss": 0.4299, - "step": 262 - }, - { - "epoch": 0.017188419057577935, - "grad_norm": 0.5132138133049011, - "learning_rate": 2.8618063112078348e-06, - "loss": 0.4689, - "step": 263 - }, - { - "epoch": 0.017253774263120056, - "grad_norm": 0.4953780472278595, - "learning_rate": 2.872687704026116e-06, - "loss": 0.456, - "step": 264 - }, - { - "epoch": 0.017319129468662178, - "grad_norm": 0.5370753407478333, - "learning_rate": 2.8835690968443965e-06, - "loss": 0.5012, - "step": 265 - }, - { - "epoch": 0.0173844846742043, - "grad_norm": 0.5061358213424683, - "learning_rate": 2.894450489662677e-06, - "loss": 0.4574, - "step": 266 - }, - { - "epoch": 0.017449839879746423, - "grad_norm": 0.5424850583076477, - "learning_rate": 2.9053318824809578e-06, - "loss": 0.5142, - "step": 267 - }, - { - "epoch": 0.017515195085288544, - "grad_norm": 0.5463417172431946, - "learning_rate": 2.9162132752992382e-06, - "loss": 0.5358, - "step": 268 - }, - { - "epoch": 0.017580550290830665, - "grad_norm": 0.4903034567832947, - "learning_rate": 2.9270946681175195e-06, - "loss": 0.433, - "step": 269 - }, - { - "epoch": 0.017645905496372787, - "grad_norm": 0.6892983913421631, - "learning_rate": 2.9379760609358e-06, - "loss": 0.5141, - "step": 270 - }, - { - "epoch": 0.017711260701914908, - "grad_norm": 0.5312873721122742, - "learning_rate": 2.948857453754081e-06, - "loss": 0.5043, - "step": 271 - }, - { - "epoch": 0.01777661590745703, - "grad_norm": 0.5391185879707336, - "learning_rate": 2.9597388465723612e-06, - "loss": 0.4861, - "step": 272 - }, - { - "epoch": 0.01784197111299915, - "grad_norm": 0.5108627676963806, - "learning_rate": 2.9706202393906425e-06, - "loss": 0.5066, - "step": 273 - }, - { - "epoch": 0.01790732631854127, - "grad_norm": 0.5569199323654175, - "learning_rate": 2.981501632208923e-06, - "loss": 0.5139, - "step": 274 - }, - { - "epoch": 0.017972681524083392, - "grad_norm": 0.5281971096992493, - "learning_rate": 2.9923830250272034e-06, - "loss": 0.4751, - "step": 275 - }, - { - "epoch": 0.018038036729625513, - "grad_norm": 0.538384735584259, - "learning_rate": 3.0032644178454847e-06, - "loss": 0.495, - "step": 276 - }, - { - "epoch": 0.018103391935167638, - "grad_norm": 0.5089460611343384, - "learning_rate": 3.014145810663765e-06, - "loss": 0.4497, - "step": 277 - }, - { - "epoch": 0.01816874714070976, - "grad_norm": 0.5131743550300598, - "learning_rate": 3.025027203482046e-06, - "loss": 0.3966, - "step": 278 - }, - { - "epoch": 0.01823410234625188, - "grad_norm": 0.5346877574920654, - "learning_rate": 3.0359085963003264e-06, - "loss": 0.4881, - "step": 279 - }, - { - "epoch": 0.018299457551794, - "grad_norm": 0.5207666158676147, - "learning_rate": 3.0467899891186077e-06, - "loss": 0.4874, - "step": 280 - }, - { - "epoch": 0.018364812757336122, - "grad_norm": 0.4716075658798218, - "learning_rate": 3.057671381936888e-06, - "loss": 0.3783, - "step": 281 - }, - { - "epoch": 0.018430167962878243, - "grad_norm": 0.5745481848716736, - "learning_rate": 3.068552774755169e-06, - "loss": 0.5241, - "step": 282 - }, - { - "epoch": 0.018495523168420364, - "grad_norm": 0.5004214644432068, - "learning_rate": 3.0794341675734494e-06, - "loss": 0.4256, - "step": 283 - }, - { - "epoch": 0.018560878373962485, - "grad_norm": 0.4955251216888428, - "learning_rate": 3.0903155603917307e-06, - "loss": 0.4184, - "step": 284 - }, - { - "epoch": 0.018626233579504606, - "grad_norm": 0.48548418283462524, - "learning_rate": 3.101196953210011e-06, - "loss": 0.4214, - "step": 285 - }, - { - "epoch": 0.018691588785046728, - "grad_norm": 0.5416601896286011, - "learning_rate": 3.1120783460282916e-06, - "loss": 0.5082, - "step": 286 - }, - { - "epoch": 0.01875694399058885, - "grad_norm": 0.6502732634544373, - "learning_rate": 3.122959738846573e-06, - "loss": 0.6006, - "step": 287 - }, - { - "epoch": 0.018822299196130973, - "grad_norm": 0.5318127870559692, - "learning_rate": 3.1338411316648533e-06, - "loss": 0.4629, - "step": 288 - }, - { - "epoch": 0.018887654401673094, - "grad_norm": 0.5368547439575195, - "learning_rate": 3.144722524483134e-06, - "loss": 0.4913, - "step": 289 - }, - { - "epoch": 0.018953009607215215, - "grad_norm": 0.5342006087303162, - "learning_rate": 3.1556039173014146e-06, - "loss": 0.5098, - "step": 290 - }, - { - "epoch": 0.019018364812757337, - "grad_norm": 0.5251221060752869, - "learning_rate": 3.166485310119696e-06, - "loss": 0.4688, - "step": 291 - }, - { - "epoch": 0.019083720018299458, - "grad_norm": 0.5649088621139526, - "learning_rate": 3.1773667029379763e-06, - "loss": 0.4969, - "step": 292 - }, - { - "epoch": 0.01914907522384158, - "grad_norm": 0.5344524383544922, - "learning_rate": 3.188248095756257e-06, - "loss": 0.5144, - "step": 293 - }, - { - "epoch": 0.0192144304293837, - "grad_norm": 0.5382475852966309, - "learning_rate": 3.1991294885745376e-06, - "loss": 0.4812, - "step": 294 - }, - { - "epoch": 0.01927978563492582, - "grad_norm": 0.5152425765991211, - "learning_rate": 3.210010881392819e-06, - "loss": 0.4366, - "step": 295 - }, - { - "epoch": 0.019345140840467942, - "grad_norm": 0.511518657207489, - "learning_rate": 3.2208922742110993e-06, - "loss": 0.45, - "step": 296 - }, - { - "epoch": 0.019410496046010063, - "grad_norm": 0.6026496291160583, - "learning_rate": 3.2317736670293798e-06, - "loss": 0.5255, - "step": 297 - }, - { - "epoch": 0.019475851251552188, - "grad_norm": 0.5703912973403931, - "learning_rate": 3.2426550598476606e-06, - "loss": 0.5176, - "step": 298 - }, - { - "epoch": 0.01954120645709431, - "grad_norm": 0.5228263735771179, - "learning_rate": 3.2535364526659415e-06, - "loss": 0.4564, - "step": 299 - }, - { - "epoch": 0.01960656166263643, - "grad_norm": 0.5298845171928406, - "learning_rate": 3.2644178454842223e-06, - "loss": 0.4319, - "step": 300 - }, - { - "epoch": 0.01967191686817855, - "grad_norm": 0.5105089545249939, - "learning_rate": 3.2752992383025028e-06, - "loss": 0.4404, - "step": 301 - }, - { - "epoch": 0.019737272073720672, - "grad_norm": 0.5126194357872009, - "learning_rate": 3.286180631120784e-06, - "loss": 0.4531, - "step": 302 - }, - { - "epoch": 0.019802627279262793, - "grad_norm": 0.5196699500083923, - "learning_rate": 3.2970620239390645e-06, - "loss": 0.4621, - "step": 303 - }, - { - "epoch": 0.019867982484804914, - "grad_norm": 0.5301274657249451, - "learning_rate": 3.3079434167573454e-06, - "loss": 0.4821, - "step": 304 - }, - { - "epoch": 0.019933337690347035, - "grad_norm": 0.5426238775253296, - "learning_rate": 3.318824809575626e-06, - "loss": 0.4118, - "step": 305 - }, - { - "epoch": 0.019998692895889156, - "grad_norm": 0.602854311466217, - "learning_rate": 3.329706202393907e-06, - "loss": 0.5386, - "step": 306 - }, - { - "epoch": 0.020064048101431278, - "grad_norm": 0.5767044425010681, - "learning_rate": 3.3405875952121875e-06, - "loss": 0.5179, - "step": 307 - }, - { - "epoch": 0.020129403306973402, - "grad_norm": 0.5042062997817993, - "learning_rate": 3.351468988030468e-06, - "loss": 0.4432, - "step": 308 - }, - { - "epoch": 0.020194758512515523, - "grad_norm": 0.524115800857544, - "learning_rate": 3.362350380848749e-06, - "loss": 0.4366, - "step": 309 - }, - { - "epoch": 0.020260113718057644, - "grad_norm": 0.5185021758079529, - "learning_rate": 3.3732317736670297e-06, - "loss": 0.4283, - "step": 310 - }, - { - "epoch": 0.020325468923599765, - "grad_norm": 0.5096868872642517, - "learning_rate": 3.3841131664853105e-06, - "loss": 0.456, - "step": 311 - }, - { - "epoch": 0.020390824129141887, - "grad_norm": 0.513860821723938, - "learning_rate": 3.394994559303591e-06, - "loss": 0.4377, - "step": 312 - }, - { - "epoch": 0.020456179334684008, - "grad_norm": 0.5447676777839661, - "learning_rate": 3.4058759521218722e-06, - "loss": 0.4969, - "step": 313 - }, - { - "epoch": 0.02052153454022613, - "grad_norm": 0.5675314664840698, - "learning_rate": 3.4167573449401527e-06, - "loss": 0.4798, - "step": 314 - }, - { - "epoch": 0.02058688974576825, - "grad_norm": 0.5476747155189514, - "learning_rate": 3.4276387377584335e-06, - "loss": 0.473, - "step": 315 - }, - { - "epoch": 0.02065224495131037, - "grad_norm": 0.5846548676490784, - "learning_rate": 3.438520130576714e-06, - "loss": 0.5253, - "step": 316 - }, - { - "epoch": 0.020717600156852492, - "grad_norm": 0.5542411804199219, - "learning_rate": 3.4494015233949944e-06, - "loss": 0.4818, - "step": 317 - }, - { - "epoch": 0.020782955362394613, - "grad_norm": 0.48111483454704285, - "learning_rate": 3.4602829162132757e-06, - "loss": 0.4281, - "step": 318 - }, - { - "epoch": 0.020848310567936738, - "grad_norm": 0.5389544367790222, - "learning_rate": 3.471164309031556e-06, - "loss": 0.4577, - "step": 319 - }, - { - "epoch": 0.02091366577347886, - "grad_norm": 0.5208144783973694, - "learning_rate": 3.482045701849837e-06, - "loss": 0.3836, - "step": 320 - }, - { - "epoch": 0.02097902097902098, - "grad_norm": 0.56230229139328, - "learning_rate": 3.4929270946681174e-06, - "loss": 0.4785, - "step": 321 - }, - { - "epoch": 0.0210443761845631, - "grad_norm": 0.5516102313995361, - "learning_rate": 3.5038084874863987e-06, - "loss": 0.4941, - "step": 322 - }, - { - "epoch": 0.021109731390105222, - "grad_norm": 0.561265766620636, - "learning_rate": 3.514689880304679e-06, - "loss": 0.4969, - "step": 323 - }, - { - "epoch": 0.021175086595647343, - "grad_norm": 0.5460501909255981, - "learning_rate": 3.52557127312296e-06, - "loss": 0.4945, - "step": 324 - }, - { - "epoch": 0.021240441801189464, - "grad_norm": 0.6526318192481995, - "learning_rate": 3.536452665941241e-06, - "loss": 0.49, - "step": 325 - }, - { - "epoch": 0.021305797006731585, - "grad_norm": 0.5364881157875061, - "learning_rate": 3.5473340587595217e-06, - "loss": 0.4776, - "step": 326 - }, - { - "epoch": 0.021371152212273706, - "grad_norm": 0.5304608941078186, - "learning_rate": 3.558215451577802e-06, - "loss": 0.4669, - "step": 327 - }, - { - "epoch": 0.021436507417815828, - "grad_norm": 0.5698348879814148, - "learning_rate": 3.5690968443960826e-06, - "loss": 0.4828, - "step": 328 - }, - { - "epoch": 0.021501862623357952, - "grad_norm": 0.5477334260940552, - "learning_rate": 3.579978237214364e-06, - "loss": 0.4854, - "step": 329 - }, - { - "epoch": 0.021567217828900073, - "grad_norm": 0.5482262969017029, - "learning_rate": 3.5908596300326443e-06, - "loss": 0.4708, - "step": 330 - }, - { - "epoch": 0.021632573034442194, - "grad_norm": 0.7397144436836243, - "learning_rate": 3.601741022850925e-06, - "loss": 0.5223, - "step": 331 - }, - { - "epoch": 0.021697928239984315, - "grad_norm": 0.5388163328170776, - "learning_rate": 3.6126224156692056e-06, - "loss": 0.4929, - "step": 332 - }, - { - "epoch": 0.021763283445526437, - "grad_norm": 0.511736273765564, - "learning_rate": 3.623503808487487e-06, - "loss": 0.4483, - "step": 333 - }, - { - "epoch": 0.021828638651068558, - "grad_norm": 0.5120736360549927, - "learning_rate": 3.6343852013057673e-06, - "loss": 0.46, - "step": 334 - }, - { - "epoch": 0.02189399385661068, - "grad_norm": 0.5173547863960266, - "learning_rate": 3.645266594124048e-06, - "loss": 0.4578, - "step": 335 - }, - { - "epoch": 0.0219593490621528, - "grad_norm": 0.5703136324882507, - "learning_rate": 3.656147986942329e-06, - "loss": 0.4723, - "step": 336 - }, - { - "epoch": 0.02202470426769492, - "grad_norm": 0.5174160003662109, - "learning_rate": 3.66702937976061e-06, - "loss": 0.4545, - "step": 337 - }, - { - "epoch": 0.022090059473237042, - "grad_norm": 0.5286480188369751, - "learning_rate": 3.6779107725788904e-06, - "loss": 0.4637, - "step": 338 - }, - { - "epoch": 0.022155414678779163, - "grad_norm": 0.5682716369628906, - "learning_rate": 3.688792165397171e-06, - "loss": 0.5242, - "step": 339 - }, - { - "epoch": 0.022220769884321288, - "grad_norm": 0.5390458106994629, - "learning_rate": 3.699673558215452e-06, - "loss": 0.4419, - "step": 340 - }, - { - "epoch": 0.02228612508986341, - "grad_norm": 0.541253387928009, - "learning_rate": 3.7105549510337325e-06, - "loss": 0.4825, - "step": 341 - }, - { - "epoch": 0.02235148029540553, - "grad_norm": 0.49162402749061584, - "learning_rate": 3.7214363438520134e-06, - "loss": 0.4254, - "step": 342 - }, - { - "epoch": 0.02241683550094765, - "grad_norm": 0.5324705839157104, - "learning_rate": 3.732317736670294e-06, - "loss": 0.5062, - "step": 343 - }, - { - "epoch": 0.022482190706489772, - "grad_norm": 0.5231584310531616, - "learning_rate": 3.743199129488575e-06, - "loss": 0.4707, - "step": 344 - }, - { - "epoch": 0.022547545912031893, - "grad_norm": 0.53677898645401, - "learning_rate": 3.7540805223068555e-06, - "loss": 0.4996, - "step": 345 - }, - { - "epoch": 0.022612901117574014, - "grad_norm": 0.5236653089523315, - "learning_rate": 3.7649619151251364e-06, - "loss": 0.4294, - "step": 346 - }, - { - "epoch": 0.022678256323116135, - "grad_norm": 0.5420531630516052, - "learning_rate": 3.775843307943417e-06, - "loss": 0.5134, - "step": 347 - }, - { - "epoch": 0.022743611528658256, - "grad_norm": 0.5473664999008179, - "learning_rate": 3.786724700761698e-06, - "loss": 0.4731, - "step": 348 - }, - { - "epoch": 0.022808966734200378, - "grad_norm": 0.5433962941169739, - "learning_rate": 3.7976060935799785e-06, - "loss": 0.5018, - "step": 349 - }, - { - "epoch": 0.022874321939742502, - "grad_norm": 0.5301963090896606, - "learning_rate": 3.808487486398259e-06, - "loss": 0.4671, - "step": 350 - }, - { - "epoch": 0.022939677145284623, - "grad_norm": 0.5100705623626709, - "learning_rate": 3.81936887921654e-06, - "loss": 0.4444, - "step": 351 - }, - { - "epoch": 0.023005032350826744, - "grad_norm": 0.5468143224716187, - "learning_rate": 3.83025027203482e-06, - "loss": 0.5, - "step": 352 - }, - { - "epoch": 0.023070387556368865, - "grad_norm": 0.5839720368385315, - "learning_rate": 3.841131664853102e-06, - "loss": 0.4728, - "step": 353 - }, - { - "epoch": 0.023135742761910986, - "grad_norm": 0.5293095707893372, - "learning_rate": 3.852013057671382e-06, - "loss": 0.47, - "step": 354 - }, - { - "epoch": 0.023201097967453108, - "grad_norm": 0.5732521414756775, - "learning_rate": 3.862894450489663e-06, - "loss": 0.5369, - "step": 355 - }, - { - "epoch": 0.02326645317299523, - "grad_norm": 0.5569798946380615, - "learning_rate": 3.873775843307944e-06, - "loss": 0.5178, - "step": 356 - }, - { - "epoch": 0.02333180837853735, - "grad_norm": 0.5082626938819885, - "learning_rate": 3.8846572361262246e-06, - "loss": 0.4672, - "step": 357 - }, - { - "epoch": 0.02339716358407947, - "grad_norm": 0.5630055665969849, - "learning_rate": 3.8955386289445054e-06, - "loss": 0.4553, - "step": 358 - }, - { - "epoch": 0.023462518789621592, - "grad_norm": 0.4950932562351227, - "learning_rate": 3.906420021762786e-06, - "loss": 0.4705, - "step": 359 - }, - { - "epoch": 0.023527873995163713, - "grad_norm": 0.5586927533149719, - "learning_rate": 3.917301414581066e-06, - "loss": 0.5097, - "step": 360 - }, - { - "epoch": 0.023593229200705838, - "grad_norm": 0.5062628388404846, - "learning_rate": 3.928182807399347e-06, - "loss": 0.4233, - "step": 361 - }, - { - "epoch": 0.02365858440624796, - "grad_norm": 0.5195222496986389, - "learning_rate": 3.939064200217628e-06, - "loss": 0.4026, - "step": 362 - }, - { - "epoch": 0.02372393961179008, - "grad_norm": 0.5128687620162964, - "learning_rate": 3.949945593035909e-06, - "loss": 0.4778, - "step": 363 - }, - { - "epoch": 0.0237892948173322, - "grad_norm": 0.5465303063392639, - "learning_rate": 3.96082698585419e-06, - "loss": 0.4323, - "step": 364 - }, - { - "epoch": 0.023854650022874322, - "grad_norm": 0.544765055179596, - "learning_rate": 3.971708378672471e-06, - "loss": 0.4914, - "step": 365 - }, - { - "epoch": 0.023920005228416443, - "grad_norm": 0.5569670796394348, - "learning_rate": 3.9825897714907515e-06, - "loss": 0.4991, - "step": 366 - }, - { - "epoch": 0.023985360433958564, - "grad_norm": 0.5170602798461914, - "learning_rate": 3.9934711643090315e-06, - "loss": 0.466, - "step": 367 - }, - { - "epoch": 0.024050715639500685, - "grad_norm": 0.7524043321609497, - "learning_rate": 4.004352557127313e-06, - "loss": 0.4417, - "step": 368 - }, - { - "epoch": 0.024116070845042806, - "grad_norm": 0.5501721501350403, - "learning_rate": 4.015233949945593e-06, - "loss": 0.4684, - "step": 369 - }, - { - "epoch": 0.024181426050584928, - "grad_norm": 0.566725492477417, - "learning_rate": 4.026115342763874e-06, - "loss": 0.552, - "step": 370 - }, - { - "epoch": 0.024246781256127052, - "grad_norm": 0.563490629196167, - "learning_rate": 4.036996735582155e-06, - "loss": 0.4769, - "step": 371 - }, - { - "epoch": 0.024312136461669173, - "grad_norm": 0.5314151048660278, - "learning_rate": 4.047878128400436e-06, - "loss": 0.4471, - "step": 372 - }, - { - "epoch": 0.024377491667211294, - "grad_norm": 0.5828354954719543, - "learning_rate": 4.058759521218717e-06, - "loss": 0.4987, - "step": 373 - }, - { - "epoch": 0.024442846872753415, - "grad_norm": 0.5326939225196838, - "learning_rate": 4.069640914036997e-06, - "loss": 0.4299, - "step": 374 - }, - { - "epoch": 0.024508202078295536, - "grad_norm": 0.5667662024497986, - "learning_rate": 4.080522306855278e-06, - "loss": 0.4939, - "step": 375 - }, - { - "epoch": 0.024573557283837658, - "grad_norm": 0.5117393136024475, - "learning_rate": 4.091403699673558e-06, - "loss": 0.441, - "step": 376 - }, - { - "epoch": 0.02463891248937978, - "grad_norm": 0.5950685143470764, - "learning_rate": 4.102285092491839e-06, - "loss": 0.5514, - "step": 377 - }, - { - "epoch": 0.0247042676949219, - "grad_norm": 0.5770097970962524, - "learning_rate": 4.11316648531012e-06, - "loss": 0.493, - "step": 378 - }, - { - "epoch": 0.02476962290046402, - "grad_norm": 0.5505401492118835, - "learning_rate": 4.124047878128401e-06, - "loss": 0.4858, - "step": 379 - }, - { - "epoch": 0.024834978106006142, - "grad_norm": 0.48124557733535767, - "learning_rate": 4.134929270946682e-06, - "loss": 0.3715, - "step": 380 - }, - { - "epoch": 0.024900333311548267, - "grad_norm": 0.5477302074432373, - "learning_rate": 4.145810663764962e-06, - "loss": 0.4675, - "step": 381 - }, - { - "epoch": 0.024965688517090388, - "grad_norm": 0.5690323710441589, - "learning_rate": 4.156692056583243e-06, - "loss": 0.5179, - "step": 382 - }, - { - "epoch": 0.02503104372263251, - "grad_norm": 0.5915126800537109, - "learning_rate": 4.1675734494015235e-06, - "loss": 0.5116, - "step": 383 - }, - { - "epoch": 0.02509639892817463, - "grad_norm": 0.5344145894050598, - "learning_rate": 4.178454842219804e-06, - "loss": 0.4738, - "step": 384 - }, - { - "epoch": 0.02516175413371675, - "grad_norm": 0.5669682025909424, - "learning_rate": 4.189336235038085e-06, - "loss": 0.521, - "step": 385 - }, - { - "epoch": 0.025227109339258872, - "grad_norm": 0.5806385278701782, - "learning_rate": 4.200217627856366e-06, - "loss": 0.4906, - "step": 386 - }, - { - "epoch": 0.025292464544800993, - "grad_norm": 0.5490126609802246, - "learning_rate": 4.211099020674647e-06, - "loss": 0.4858, - "step": 387 - }, - { - "epoch": 0.025357819750343114, - "grad_norm": 0.5345961451530457, - "learning_rate": 4.221980413492928e-06, - "loss": 0.4458, - "step": 388 - }, - { - "epoch": 0.025423174955885235, - "grad_norm": 0.5605209469795227, - "learning_rate": 4.232861806311208e-06, - "loss": 0.4524, - "step": 389 - }, - { - "epoch": 0.025488530161427356, - "grad_norm": 0.538071870803833, - "learning_rate": 4.2437431991294896e-06, - "loss": 0.4865, - "step": 390 - }, - { - "epoch": 0.025553885366969478, - "grad_norm": 0.5728001594543457, - "learning_rate": 4.2546245919477696e-06, - "loss": 0.483, - "step": 391 - }, - { - "epoch": 0.025619240572511602, - "grad_norm": 0.5253920555114746, - "learning_rate": 4.2655059847660504e-06, - "loss": 0.4832, - "step": 392 - }, - { - "epoch": 0.025684595778053723, - "grad_norm": 0.6155691146850586, - "learning_rate": 4.276387377584331e-06, - "loss": 0.5437, - "step": 393 - }, - { - "epoch": 0.025749950983595844, - "grad_norm": 0.5745170712471008, - "learning_rate": 4.287268770402611e-06, - "loss": 0.5444, - "step": 394 - }, - { - "epoch": 0.025815306189137965, - "grad_norm": 0.5143162608146667, - "learning_rate": 4.298150163220893e-06, - "loss": 0.3949, - "step": 395 - }, - { - "epoch": 0.025880661394680086, - "grad_norm": 0.5189085602760315, - "learning_rate": 4.309031556039173e-06, - "loss": 0.4346, - "step": 396 - }, - { - "epoch": 0.025946016600222208, - "grad_norm": 0.5619039535522461, - "learning_rate": 4.319912948857454e-06, - "loss": 0.4984, - "step": 397 - }, - { - "epoch": 0.02601137180576433, - "grad_norm": 0.5270082950592041, - "learning_rate": 4.330794341675735e-06, - "loss": 0.4672, - "step": 398 - }, - { - "epoch": 0.02607672701130645, - "grad_norm": 0.5133958458900452, - "learning_rate": 4.341675734494016e-06, - "loss": 0.4689, - "step": 399 - }, - { - "epoch": 0.02614208221684857, - "grad_norm": 0.5218203663825989, - "learning_rate": 4.3525571273122965e-06, - "loss": 0.4577, - "step": 400 - }, - { - "epoch": 0.026207437422390692, - "grad_norm": 0.5627186894416809, - "learning_rate": 4.363438520130577e-06, - "loss": 0.4454, - "step": 401 - }, - { - "epoch": 0.026272792627932817, - "grad_norm": 0.585961639881134, - "learning_rate": 4.374319912948858e-06, - "loss": 0.4478, - "step": 402 - }, - { - "epoch": 0.026338147833474938, - "grad_norm": 0.5784450769424438, - "learning_rate": 4.385201305767138e-06, - "loss": 0.4511, - "step": 403 - }, - { - "epoch": 0.02640350303901706, - "grad_norm": 0.6283280253410339, - "learning_rate": 4.396082698585419e-06, - "loss": 0.5295, - "step": 404 - }, - { - "epoch": 0.02646885824455918, - "grad_norm": 0.5436939001083374, - "learning_rate": 4.4069640914037e-06, - "loss": 0.4292, - "step": 405 - }, - { - "epoch": 0.0265342134501013, - "grad_norm": 0.5761511921882629, - "learning_rate": 4.417845484221981e-06, - "loss": 0.442, - "step": 406 - }, - { - "epoch": 0.026599568655643422, - "grad_norm": 0.5336189866065979, - "learning_rate": 4.428726877040262e-06, - "loss": 0.4625, - "step": 407 - }, - { - "epoch": 0.026664923861185543, - "grad_norm": 0.5750748515129089, - "learning_rate": 4.4396082698585425e-06, - "loss": 0.5365, - "step": 408 - }, - { - "epoch": 0.026730279066727664, - "grad_norm": 0.5452337265014648, - "learning_rate": 4.4504896626768225e-06, - "loss": 0.4606, - "step": 409 - }, - { - "epoch": 0.026795634272269785, - "grad_norm": 0.5602395534515381, - "learning_rate": 4.461371055495104e-06, - "loss": 0.4824, - "step": 410 - }, - { - "epoch": 0.026860989477811906, - "grad_norm": 0.5694287419319153, - "learning_rate": 4.472252448313384e-06, - "loss": 0.4904, - "step": 411 - }, - { - "epoch": 0.026926344683354028, - "grad_norm": 0.5007390975952148, - "learning_rate": 4.483133841131665e-06, - "loss": 0.4129, - "step": 412 - }, - { - "epoch": 0.026991699888896152, - "grad_norm": 0.4548834562301636, - "learning_rate": 4.494015233949946e-06, - "loss": 0.3564, - "step": 413 - }, - { - "epoch": 0.027057055094438273, - "grad_norm": 0.4879645109176636, - "learning_rate": 4.504896626768227e-06, - "loss": 0.4316, - "step": 414 - }, - { - "epoch": 0.027122410299980394, - "grad_norm": 0.5681940913200378, - "learning_rate": 4.515778019586508e-06, - "loss": 0.4761, - "step": 415 - }, - { - "epoch": 0.027187765505522515, - "grad_norm": 0.47913455963134766, - "learning_rate": 4.526659412404788e-06, - "loss": 0.3899, - "step": 416 - }, - { - "epoch": 0.027253120711064636, - "grad_norm": 0.5863394737243652, - "learning_rate": 4.537540805223069e-06, - "loss": 0.5088, - "step": 417 - }, - { - "epoch": 0.027318475916606758, - "grad_norm": 0.5107646584510803, - "learning_rate": 4.548422198041349e-06, - "loss": 0.3999, - "step": 418 - }, - { - "epoch": 0.02738383112214888, - "grad_norm": 0.5345116853713989, - "learning_rate": 4.55930359085963e-06, - "loss": 0.5046, - "step": 419 - }, - { - "epoch": 0.027449186327691, - "grad_norm": 0.5197203755378723, - "learning_rate": 4.570184983677911e-06, - "loss": 0.4445, - "step": 420 - }, - { - "epoch": 0.02751454153323312, - "grad_norm": 0.5551862716674805, - "learning_rate": 4.581066376496192e-06, - "loss": 0.4799, - "step": 421 - }, - { - "epoch": 0.027579896738775242, - "grad_norm": 0.553368330001831, - "learning_rate": 4.591947769314473e-06, - "loss": 0.4877, - "step": 422 - }, - { - "epoch": 0.027645251944317367, - "grad_norm": 0.5523605346679688, - "learning_rate": 4.602829162132753e-06, - "loss": 0.465, - "step": 423 - }, - { - "epoch": 0.027710607149859488, - "grad_norm": 1.0156335830688477, - "learning_rate": 4.6137105549510345e-06, - "loss": 0.4935, - "step": 424 - }, - { - "epoch": 0.02777596235540161, - "grad_norm": 0.5236708521842957, - "learning_rate": 4.6245919477693146e-06, - "loss": 0.4467, - "step": 425 - }, - { - "epoch": 0.02784131756094373, - "grad_norm": 0.5503356456756592, - "learning_rate": 4.635473340587595e-06, - "loss": 0.5303, - "step": 426 - }, - { - "epoch": 0.02790667276648585, - "grad_norm": 0.5287750363349915, - "learning_rate": 4.646354733405876e-06, - "loss": 0.5106, - "step": 427 - }, - { - "epoch": 0.027972027972027972, - "grad_norm": 0.5276519060134888, - "learning_rate": 4.657236126224157e-06, - "loss": 0.4274, - "step": 428 - }, - { - "epoch": 0.028037383177570093, - "grad_norm": 0.5391794443130493, - "learning_rate": 4.668117519042438e-06, - "loss": 0.4665, - "step": 429 - }, - { - "epoch": 0.028102738383112214, - "grad_norm": 0.5067090392112732, - "learning_rate": 4.678998911860719e-06, - "loss": 0.4665, - "step": 430 - }, - { - "epoch": 0.028168093588654335, - "grad_norm": 0.553927481174469, - "learning_rate": 4.689880304678999e-06, - "loss": 0.4922, - "step": 431 - }, - { - "epoch": 0.028233448794196456, - "grad_norm": 0.5183501839637756, - "learning_rate": 4.700761697497281e-06, - "loss": 0.4454, - "step": 432 - }, - { - "epoch": 0.028298803999738577, - "grad_norm": 0.576321005821228, - "learning_rate": 4.711643090315561e-06, - "loss": 0.5218, - "step": 433 - }, - { - "epoch": 0.028364159205280702, - "grad_norm": 0.4934738278388977, - "learning_rate": 4.7225244831338415e-06, - "loss": 0.4204, - "step": 434 - }, - { - "epoch": 0.028429514410822823, - "grad_norm": 0.5205022096633911, - "learning_rate": 4.733405875952122e-06, - "loss": 0.4427, - "step": 435 - }, - { - "epoch": 0.028494869616364944, - "grad_norm": 0.486479789018631, - "learning_rate": 4.744287268770403e-06, - "loss": 0.395, - "step": 436 - }, - { - "epoch": 0.028560224821907065, - "grad_norm": 0.5366991758346558, - "learning_rate": 4.755168661588684e-06, - "loss": 0.459, - "step": 437 - }, - { - "epoch": 0.028625580027449186, - "grad_norm": 0.5452165007591248, - "learning_rate": 4.766050054406964e-06, - "loss": 0.5097, - "step": 438 - }, - { - "epoch": 0.028690935232991308, - "grad_norm": 0.4924579858779907, - "learning_rate": 4.776931447225246e-06, - "loss": 0.4046, - "step": 439 - }, - { - "epoch": 0.02875629043853343, - "grad_norm": 0.5535774827003479, - "learning_rate": 4.787812840043526e-06, - "loss": 0.438, - "step": 440 - }, - { - "epoch": 0.02882164564407555, - "grad_norm": 0.5720954537391663, - "learning_rate": 4.798694232861807e-06, - "loss": 0.4355, - "step": 441 - }, - { - "epoch": 0.02888700084961767, - "grad_norm": 0.5581308603286743, - "learning_rate": 4.8095756256800875e-06, - "loss": 0.4972, - "step": 442 - }, - { - "epoch": 0.028952356055159792, - "grad_norm": 0.5132817625999451, - "learning_rate": 4.820457018498368e-06, - "loss": 0.4686, - "step": 443 - }, - { - "epoch": 0.029017711260701917, - "grad_norm": 0.5478911399841309, - "learning_rate": 4.831338411316649e-06, - "loss": 0.542, - "step": 444 - }, - { - "epoch": 0.029083066466244038, - "grad_norm": 0.5395066738128662, - "learning_rate": 4.842219804134929e-06, - "loss": 0.4493, - "step": 445 - }, - { - "epoch": 0.02914842167178616, - "grad_norm": 0.554943323135376, - "learning_rate": 4.85310119695321e-06, - "loss": 0.498, - "step": 446 - }, - { - "epoch": 0.02921377687732828, - "grad_norm": 0.5648518800735474, - "learning_rate": 4.863982589771491e-06, - "loss": 0.4738, - "step": 447 - }, - { - "epoch": 0.0292791320828704, - "grad_norm": 0.5569779276847839, - "learning_rate": 4.874863982589772e-06, - "loss": 0.5183, - "step": 448 - }, - { - "epoch": 0.029344487288412522, - "grad_norm": 0.5409430265426636, - "learning_rate": 4.885745375408053e-06, - "loss": 0.4605, - "step": 449 - }, - { - "epoch": 0.029409842493954643, - "grad_norm": 0.5412803292274475, - "learning_rate": 4.8966267682263335e-06, - "loss": 0.4824, - "step": 450 - }, - { - "epoch": 0.029475197699496764, - "grad_norm": 0.5730560421943665, - "learning_rate": 4.907508161044614e-06, - "loss": 0.5014, - "step": 451 - }, - { - "epoch": 0.029540552905038885, - "grad_norm": 0.5052904486656189, - "learning_rate": 4.918389553862895e-06, - "loss": 0.4111, - "step": 452 - }, - { - "epoch": 0.029605908110581006, - "grad_norm": 0.536467432975769, - "learning_rate": 4.929270946681175e-06, - "loss": 0.4557, - "step": 453 - }, - { - "epoch": 0.029671263316123127, - "grad_norm": 0.5028022527694702, - "learning_rate": 4.940152339499457e-06, - "loss": 0.4002, - "step": 454 - }, - { - "epoch": 0.029736618521665252, - "grad_norm": 0.5263887643814087, - "learning_rate": 4.951033732317737e-06, - "loss": 0.4389, - "step": 455 - }, - { - "epoch": 0.029801973727207373, - "grad_norm": 0.5358768701553345, - "learning_rate": 4.961915125136018e-06, - "loss": 0.4788, - "step": 456 - }, - { - "epoch": 0.029867328932749494, - "grad_norm": 0.5781591534614563, - "learning_rate": 4.972796517954299e-06, - "loss": 0.4495, - "step": 457 - }, - { - "epoch": 0.029932684138291615, - "grad_norm": 0.5463568568229675, - "learning_rate": 4.9836779107725795e-06, - "loss": 0.4652, - "step": 458 - }, - { - "epoch": 0.029998039343833736, - "grad_norm": 0.5033892393112183, - "learning_rate": 4.99455930359086e-06, - "loss": 0.4665, - "step": 459 - }, - { - "epoch": 0.030063394549375858, - "grad_norm": 0.5578131675720215, - "learning_rate": 5.00544069640914e-06, - "loss": 0.5104, - "step": 460 - }, - { - "epoch": 0.03012874975491798, - "grad_norm": 0.5375128388404846, - "learning_rate": 5.016322089227421e-06, - "loss": 0.4344, - "step": 461 - }, - { - "epoch": 0.0301941049604601, - "grad_norm": 0.5085914731025696, - "learning_rate": 5.027203482045703e-06, - "loss": 0.4199, - "step": 462 - }, - { - "epoch": 0.03025946016600222, - "grad_norm": 0.5424376726150513, - "learning_rate": 5.038084874863983e-06, - "loss": 0.4852, - "step": 463 - }, - { - "epoch": 0.030324815371544342, - "grad_norm": 0.5353248119354248, - "learning_rate": 5.048966267682264e-06, - "loss": 0.4683, - "step": 464 - }, - { - "epoch": 0.030390170577086466, - "grad_norm": 0.5041464567184448, - "learning_rate": 5.059847660500544e-06, - "loss": 0.4485, - "step": 465 - }, - { - "epoch": 0.030455525782628588, - "grad_norm": 0.47988349199295044, - "learning_rate": 5.070729053318825e-06, - "loss": 0.4278, - "step": 466 - }, - { - "epoch": 0.03052088098817071, - "grad_norm": 0.561044454574585, - "learning_rate": 5.0816104461371064e-06, - "loss": 0.4877, - "step": 467 - }, - { - "epoch": 0.03058623619371283, - "grad_norm": 0.5972671508789062, - "learning_rate": 5.0924918389553864e-06, - "loss": 0.5439, - "step": 468 - }, - { - "epoch": 0.03065159139925495, - "grad_norm": 0.5359597206115723, - "learning_rate": 5.103373231773667e-06, - "loss": 0.4675, - "step": 469 - }, - { - "epoch": 0.030716946604797072, - "grad_norm": 0.5351109504699707, - "learning_rate": 5.114254624591948e-06, - "loss": 0.4827, - "step": 470 - }, - { - "epoch": 0.030782301810339193, - "grad_norm": 48.03053283691406, - "learning_rate": 5.125136017410229e-06, - "loss": 0.3983, - "step": 471 - }, - { - "epoch": 0.030847657015881314, - "grad_norm": 0.5735889077186584, - "learning_rate": 5.13601741022851e-06, - "loss": 0.4391, - "step": 472 - }, - { - "epoch": 0.030913012221423435, - "grad_norm": 0.5440057516098022, - "learning_rate": 5.146898803046791e-06, - "loss": 0.5, - "step": 473 - }, - { - "epoch": 0.030978367426965556, - "grad_norm": 0.5572605729103088, - "learning_rate": 5.157780195865071e-06, - "loss": 0.4396, - "step": 474 - }, - { - "epoch": 0.03104372263250768, - "grad_norm": 0.5115422606468201, - "learning_rate": 5.1686615886833525e-06, - "loss": 0.4114, - "step": 475 - }, - { - "epoch": 0.031109077838049802, - "grad_norm": 0.4984517991542816, - "learning_rate": 5.179542981501633e-06, - "loss": 0.411, - "step": 476 - }, - { - "epoch": 0.031174433043591923, - "grad_norm": 0.5517117977142334, - "learning_rate": 5.190424374319913e-06, - "loss": 0.4754, - "step": 477 - }, - { - "epoch": 0.031239788249134044, - "grad_norm": 0.5398489236831665, - "learning_rate": 5.201305767138194e-06, - "loss": 0.4647, - "step": 478 - }, - { - "epoch": 0.031305143454676165, - "grad_norm": 0.5186377763748169, - "learning_rate": 5.212187159956474e-06, - "loss": 0.4236, - "step": 479 - }, - { - "epoch": 0.031370498660218286, - "grad_norm": 0.5774402022361755, - "learning_rate": 5.223068552774756e-06, - "loss": 0.5579, - "step": 480 - }, - { - "epoch": 0.03143585386576041, - "grad_norm": 0.5345587730407715, - "learning_rate": 5.233949945593037e-06, - "loss": 0.4513, - "step": 481 - }, - { - "epoch": 0.03150120907130253, - "grad_norm": 0.5191734433174133, - "learning_rate": 5.244831338411317e-06, - "loss": 0.4174, - "step": 482 - }, - { - "epoch": 0.03156656427684465, - "grad_norm": 0.5381778478622437, - "learning_rate": 5.255712731229598e-06, - "loss": 0.466, - "step": 483 - }, - { - "epoch": 0.03163191948238677, - "grad_norm": 0.5413488745689392, - "learning_rate": 5.266594124047879e-06, - "loss": 0.4677, - "step": 484 - }, - { - "epoch": 0.03169727468792889, - "grad_norm": 0.5327398777008057, - "learning_rate": 5.277475516866159e-06, - "loss": 0.4372, - "step": 485 - }, - { - "epoch": 0.03176262989347101, - "grad_norm": 0.539981484413147, - "learning_rate": 5.28835690968444e-06, - "loss": 0.4302, - "step": 486 - }, - { - "epoch": 0.031827985099013134, - "grad_norm": 0.5105658173561096, - "learning_rate": 5.29923830250272e-06, - "loss": 0.4502, - "step": 487 - }, - { - "epoch": 0.031893340304555255, - "grad_norm": 0.576172411441803, - "learning_rate": 5.310119695321001e-06, - "loss": 0.5053, - "step": 488 - }, - { - "epoch": 0.031958695510097376, - "grad_norm": 0.5153981447219849, - "learning_rate": 5.321001088139283e-06, - "loss": 0.4475, - "step": 489 - }, - { - "epoch": 0.0320240507156395, - "grad_norm": 0.5384749174118042, - "learning_rate": 5.331882480957563e-06, - "loss": 0.4564, - "step": 490 - }, - { - "epoch": 0.032089405921181625, - "grad_norm": 0.5373324155807495, - "learning_rate": 5.342763873775844e-06, - "loss": 0.4775, - "step": 491 - }, - { - "epoch": 0.03215476112672375, - "grad_norm": 0.4921714663505554, - "learning_rate": 5.353645266594124e-06, - "loss": 0.3957, - "step": 492 - }, - { - "epoch": 0.03222011633226587, - "grad_norm": 0.5208613872528076, - "learning_rate": 5.364526659412405e-06, - "loss": 0.4641, - "step": 493 - }, - { - "epoch": 0.03228547153780799, - "grad_norm": 0.5194923877716064, - "learning_rate": 5.375408052230686e-06, - "loss": 0.4431, - "step": 494 - }, - { - "epoch": 0.03235082674335011, - "grad_norm": 1.0010076761245728, - "learning_rate": 5.386289445048966e-06, - "loss": 0.5236, - "step": 495 - }, - { - "epoch": 0.03241618194889223, - "grad_norm": 0.49507319927215576, - "learning_rate": 5.397170837867247e-06, - "loss": 0.4033, - "step": 496 - }, - { - "epoch": 0.03248153715443435, - "grad_norm": 0.5528603792190552, - "learning_rate": 5.408052230685528e-06, - "loss": 0.4901, - "step": 497 - }, - { - "epoch": 0.03254689235997647, - "grad_norm": 0.5661624073982239, - "learning_rate": 5.418933623503809e-06, - "loss": 0.4757, - "step": 498 - }, - { - "epoch": 0.032612247565518594, - "grad_norm": 0.5334492921829224, - "learning_rate": 5.42981501632209e-06, - "loss": 0.4036, - "step": 499 - }, - { - "epoch": 0.032677602771060715, - "grad_norm": 0.5270481705665588, - "learning_rate": 5.4406964091403706e-06, - "loss": 0.4564, - "step": 500 - }, - { - "epoch": 0.032742957976602836, - "grad_norm": 0.5354252457618713, - "learning_rate": 5.451577801958651e-06, - "loss": 0.4485, - "step": 501 - }, - { - "epoch": 0.03280831318214496, - "grad_norm": 0.5266621112823486, - "learning_rate": 5.462459194776932e-06, - "loss": 0.4308, - "step": 502 - }, - { - "epoch": 0.03287366838768708, - "grad_norm": 0.5338118076324463, - "learning_rate": 5.473340587595213e-06, - "loss": 0.4572, - "step": 503 - }, - { - "epoch": 0.0329390235932292, - "grad_norm": 0.5976651310920715, - "learning_rate": 5.484221980413493e-06, - "loss": 0.4834, - "step": 504 - }, - { - "epoch": 0.03300437879877132, - "grad_norm": 0.49956831336021423, - "learning_rate": 5.495103373231774e-06, - "loss": 0.4135, - "step": 505 - }, - { - "epoch": 0.03306973400431344, - "grad_norm": 0.5595198273658752, - "learning_rate": 5.505984766050056e-06, - "loss": 0.4968, - "step": 506 - }, - { - "epoch": 0.03313508920985556, - "grad_norm": 0.491152286529541, - "learning_rate": 5.516866158868336e-06, - "loss": 0.4026, - "step": 507 - }, - { - "epoch": 0.033200444415397684, - "grad_norm": 0.5413976907730103, - "learning_rate": 5.527747551686617e-06, - "loss": 0.4449, - "step": 508 - }, - { - "epoch": 0.033265799620939805, - "grad_norm": 0.5298357605934143, - "learning_rate": 5.538628944504897e-06, - "loss": 0.4039, - "step": 509 - }, - { - "epoch": 0.033331154826481926, - "grad_norm": 0.5238986015319824, - "learning_rate": 5.5495103373231775e-06, - "loss": 0.4694, - "step": 510 - }, - { - "epoch": 0.03339651003202405, - "grad_norm": 0.5359320640563965, - "learning_rate": 5.560391730141459e-06, - "loss": 0.4702, - "step": 511 - }, - { - "epoch": 0.033461865237566175, - "grad_norm": 0.5686987638473511, - "learning_rate": 5.571273122959739e-06, - "loss": 0.4735, - "step": 512 - }, - { - "epoch": 0.033527220443108297, - "grad_norm": 0.5644851326942444, - "learning_rate": 5.58215451577802e-06, - "loss": 0.489, - "step": 513 - }, - { - "epoch": 0.03359257564865042, - "grad_norm": 0.5575340986251831, - "learning_rate": 5.5930359085963e-06, - "loss": 0.4434, - "step": 514 - }, - { - "epoch": 0.03365793085419254, - "grad_norm": 0.5376102924346924, - "learning_rate": 5.603917301414582e-06, - "loss": 0.4183, - "step": 515 - }, - { - "epoch": 0.03372328605973466, - "grad_norm": 0.5130487084388733, - "learning_rate": 5.614798694232863e-06, - "loss": 0.4094, - "step": 516 - }, - { - "epoch": 0.03378864126527678, - "grad_norm": 0.527062714099884, - "learning_rate": 5.625680087051143e-06, - "loss": 0.4263, - "step": 517 - }, - { - "epoch": 0.0338539964708189, - "grad_norm": 0.5213437676429749, - "learning_rate": 5.6365614798694235e-06, - "loss": 0.4224, - "step": 518 - }, - { - "epoch": 0.03391935167636102, - "grad_norm": 0.49539193511009216, - "learning_rate": 5.647442872687704e-06, - "loss": 0.4028, - "step": 519 - }, - { - "epoch": 0.033984706881903144, - "grad_norm": 0.5212374329566956, - "learning_rate": 5.658324265505985e-06, - "loss": 0.4455, - "step": 520 - }, - { - "epoch": 0.034050062087445265, - "grad_norm": 0.5280008316040039, - "learning_rate": 5.669205658324266e-06, - "loss": 0.4327, - "step": 521 - }, - { - "epoch": 0.034115417292987386, - "grad_norm": 0.4981239438056946, - "learning_rate": 5.680087051142547e-06, - "loss": 0.4224, - "step": 522 - }, - { - "epoch": 0.03418077249852951, - "grad_norm": 0.542140781879425, - "learning_rate": 5.690968443960827e-06, - "loss": 0.4818, - "step": 523 - }, - { - "epoch": 0.03424612770407163, - "grad_norm": 0.5307171940803528, - "learning_rate": 5.701849836779109e-06, - "loss": 0.3948, - "step": 524 - }, - { - "epoch": 0.03431148290961375, - "grad_norm": 0.5340259075164795, - "learning_rate": 5.7127312295973895e-06, - "loss": 0.4668, - "step": 525 - }, - { - "epoch": 0.03437683811515587, - "grad_norm": 0.5348265171051025, - "learning_rate": 5.7236126224156695e-06, - "loss": 0.4919, - "step": 526 - }, - { - "epoch": 0.03444219332069799, - "grad_norm": 0.5372627973556519, - "learning_rate": 5.73449401523395e-06, - "loss": 0.5255, - "step": 527 - }, - { - "epoch": 0.03450754852624011, - "grad_norm": 0.5648165345191956, - "learning_rate": 5.745375408052232e-06, - "loss": 0.4818, - "step": 528 - }, - { - "epoch": 0.034572903731782234, - "grad_norm": 0.5220668315887451, - "learning_rate": 5.756256800870512e-06, - "loss": 0.4706, - "step": 529 - }, - { - "epoch": 0.034638258937324355, - "grad_norm": 0.5265078544616699, - "learning_rate": 5.767138193688793e-06, - "loss": 0.4059, - "step": 530 - }, - { - "epoch": 0.034703614142866476, - "grad_norm": 0.5335327386856079, - "learning_rate": 5.778019586507073e-06, - "loss": 0.4158, - "step": 531 - }, - { - "epoch": 0.0347689693484086, - "grad_norm": 0.5514426827430725, - "learning_rate": 5.788900979325354e-06, - "loss": 0.4555, - "step": 532 - }, - { - "epoch": 0.034834324553950725, - "grad_norm": 0.5093376040458679, - "learning_rate": 5.7997823721436356e-06, - "loss": 0.4121, - "step": 533 - }, - { - "epoch": 0.034899679759492847, - "grad_norm": 0.5448405146598816, - "learning_rate": 5.8106637649619156e-06, - "loss": 0.5008, - "step": 534 - }, - { - "epoch": 0.03496503496503497, - "grad_norm": 0.5521230101585388, - "learning_rate": 5.8215451577801964e-06, - "loss": 0.4608, - "step": 535 - }, - { - "epoch": 0.03503039017057709, - "grad_norm": 0.5409108996391296, - "learning_rate": 5.8324265505984764e-06, - "loss": 0.4605, - "step": 536 - }, - { - "epoch": 0.03509574537611921, - "grad_norm": 0.5000602006912231, - "learning_rate": 5.843307943416758e-06, - "loss": 0.4197, - "step": 537 - }, - { - "epoch": 0.03516110058166133, - "grad_norm": 0.5224672555923462, - "learning_rate": 5.854189336235039e-06, - "loss": 0.4366, - "step": 538 - }, - { - "epoch": 0.03522645578720345, - "grad_norm": 0.5698318481445312, - "learning_rate": 5.865070729053319e-06, - "loss": 0.5485, - "step": 539 - }, - { - "epoch": 0.03529181099274557, - "grad_norm": 0.5366117358207703, - "learning_rate": 5.8759521218716e-06, - "loss": 0.4482, - "step": 540 - }, - { - "epoch": 0.035357166198287694, - "grad_norm": 0.54118412733078, - "learning_rate": 5.88683351468988e-06, - "loss": 0.4476, - "step": 541 - }, - { - "epoch": 0.035422521403829815, - "grad_norm": 0.5354444980621338, - "learning_rate": 5.897714907508162e-06, - "loss": 0.4827, - "step": 542 - }, - { - "epoch": 0.035487876609371936, - "grad_norm": 0.523681640625, - "learning_rate": 5.9085963003264425e-06, - "loss": 0.4486, - "step": 543 - }, - { - "epoch": 0.03555323181491406, - "grad_norm": 0.5297341346740723, - "learning_rate": 5.9194776931447225e-06, - "loss": 0.4525, - "step": 544 - }, - { - "epoch": 0.03561858702045618, - "grad_norm": 0.5035794973373413, - "learning_rate": 5.930359085963003e-06, - "loss": 0.4558, - "step": 545 - }, - { - "epoch": 0.0356839422259983, - "grad_norm": 0.5241169929504395, - "learning_rate": 5.941240478781285e-06, - "loss": 0.4654, - "step": 546 - }, - { - "epoch": 0.03574929743154042, - "grad_norm": 0.5188154578208923, - "learning_rate": 5.952121871599565e-06, - "loss": 0.454, - "step": 547 - }, - { - "epoch": 0.03581465263708254, - "grad_norm": 0.4866127073764801, - "learning_rate": 5.963003264417846e-06, - "loss": 0.411, - "step": 548 - }, - { - "epoch": 0.03588000784262466, - "grad_norm": 0.5161880254745483, - "learning_rate": 5.973884657236127e-06, - "loss": 0.4425, - "step": 549 - }, - { - "epoch": 0.035945363048166784, - "grad_norm": 0.5133089423179626, - "learning_rate": 5.984766050054407e-06, - "loss": 0.4554, - "step": 550 - }, - { - "epoch": 0.036010718253708905, - "grad_norm": 0.5831466913223267, - "learning_rate": 5.9956474428726885e-06, - "loss": 0.5481, - "step": 551 - }, - { - "epoch": 0.036076073459251026, - "grad_norm": 0.5680014491081238, - "learning_rate": 6.006528835690969e-06, - "loss": 0.4916, - "step": 552 - }, - { - "epoch": 0.03614142866479315, - "grad_norm": 0.5159114599227905, - "learning_rate": 6.017410228509249e-06, - "loss": 0.4435, - "step": 553 - }, - { - "epoch": 0.036206783870335275, - "grad_norm": 0.5152553915977478, - "learning_rate": 6.02829162132753e-06, - "loss": 0.418, - "step": 554 - }, - { - "epoch": 0.036272139075877396, - "grad_norm": 0.5299906134605408, - "learning_rate": 6.039173014145812e-06, - "loss": 0.4592, - "step": 555 - }, - { - "epoch": 0.03633749428141952, - "grad_norm": 0.506966233253479, - "learning_rate": 6.050054406964092e-06, - "loss": 0.4238, - "step": 556 - }, - { - "epoch": 0.03640284948696164, - "grad_norm": 0.5458035469055176, - "learning_rate": 6.060935799782373e-06, - "loss": 0.4889, - "step": 557 - }, - { - "epoch": 0.03646820469250376, - "grad_norm": 0.5626875758171082, - "learning_rate": 6.071817192600653e-06, - "loss": 0.4789, - "step": 558 - }, - { - "epoch": 0.03653355989804588, - "grad_norm": 0.589255690574646, - "learning_rate": 6.0826985854189345e-06, - "loss": 0.4852, - "step": 559 - }, - { - "epoch": 0.036598915103588, - "grad_norm": 0.5371728539466858, - "learning_rate": 6.093579978237215e-06, - "loss": 0.4736, - "step": 560 - }, - { - "epoch": 0.03666427030913012, - "grad_norm": 0.5571090579032898, - "learning_rate": 6.104461371055495e-06, - "loss": 0.4766, - "step": 561 - }, - { - "epoch": 0.036729625514672244, - "grad_norm": 0.5630400776863098, - "learning_rate": 6.115342763873776e-06, - "loss": 0.4499, - "step": 562 - }, - { - "epoch": 0.036794980720214365, - "grad_norm": 0.5382595062255859, - "learning_rate": 6.126224156692056e-06, - "loss": 0.4899, - "step": 563 - }, - { - "epoch": 0.036860335925756486, - "grad_norm": 0.5315620303153992, - "learning_rate": 6.137105549510338e-06, - "loss": 0.4521, - "step": 564 - }, - { - "epoch": 0.03692569113129861, - "grad_norm": 0.5756667852401733, - "learning_rate": 6.147986942328619e-06, - "loss": 0.4767, - "step": 565 - }, - { - "epoch": 0.03699104633684073, - "grad_norm": 0.509128987789154, - "learning_rate": 6.158868335146899e-06, - "loss": 0.3923, - "step": 566 - }, - { - "epoch": 0.03705640154238285, - "grad_norm": 0.5773864388465881, - "learning_rate": 6.16974972796518e-06, - "loss": 0.4425, - "step": 567 - }, - { - "epoch": 0.03712175674792497, - "grad_norm": 0.5730013251304626, - "learning_rate": 6.180631120783461e-06, - "loss": 0.4531, - "step": 568 - }, - { - "epoch": 0.03718711195346709, - "grad_norm": 0.5450667142868042, - "learning_rate": 6.191512513601741e-06, - "loss": 0.4685, - "step": 569 - }, - { - "epoch": 0.03725246715900921, - "grad_norm": 0.5366420745849609, - "learning_rate": 6.202393906420022e-06, - "loss": 0.4681, - "step": 570 - }, - { - "epoch": 0.037317822364551334, - "grad_norm": 0.5366652011871338, - "learning_rate": 6.213275299238303e-06, - "loss": 0.3952, - "step": 571 - }, - { - "epoch": 0.037383177570093455, - "grad_norm": 0.5294990539550781, - "learning_rate": 6.224156692056583e-06, - "loss": 0.4675, - "step": 572 - }, - { - "epoch": 0.037448532775635576, - "grad_norm": 0.5067282915115356, - "learning_rate": 6.235038084874865e-06, - "loss": 0.4105, - "step": 573 - }, - { - "epoch": 0.0375138879811777, - "grad_norm": 0.5480269193649292, - "learning_rate": 6.245919477693146e-06, - "loss": 0.4792, - "step": 574 - }, - { - "epoch": 0.037579243186719825, - "grad_norm": 0.5152676701545715, - "learning_rate": 6.256800870511426e-06, - "loss": 0.4149, - "step": 575 - }, - { - "epoch": 0.037644598392261946, - "grad_norm": 0.5218726992607117, - "learning_rate": 6.267682263329707e-06, - "loss": 0.4089, - "step": 576 - }, - { - "epoch": 0.03770995359780407, - "grad_norm": 0.5660258531570435, - "learning_rate": 6.278563656147988e-06, - "loss": 0.4605, - "step": 577 - }, - { - "epoch": 0.03777530880334619, - "grad_norm": 0.47741585969924927, - "learning_rate": 6.289445048966268e-06, - "loss": 0.3852, - "step": 578 - }, - { - "epoch": 0.03784066400888831, - "grad_norm": 0.545810341835022, - "learning_rate": 6.300326441784549e-06, - "loss": 0.4394, - "step": 579 - }, - { - "epoch": 0.03790601921443043, - "grad_norm": 0.5607401728630066, - "learning_rate": 6.311207834602829e-06, - "loss": 0.4964, - "step": 580 - }, - { - "epoch": 0.03797137441997255, - "grad_norm": 0.5499708652496338, - "learning_rate": 6.32208922742111e-06, - "loss": 0.4109, - "step": 581 - }, - { - "epoch": 0.03803672962551467, - "grad_norm": 0.5648245811462402, - "learning_rate": 6.332970620239392e-06, - "loss": 0.5523, - "step": 582 - }, - { - "epoch": 0.038102084831056794, - "grad_norm": 0.5284977555274963, - "learning_rate": 6.343852013057672e-06, - "loss": 0.4311, - "step": 583 - }, - { - "epoch": 0.038167440036598915, - "grad_norm": 0.5634139180183411, - "learning_rate": 6.354733405875953e-06, - "loss": 0.5076, - "step": 584 - }, - { - "epoch": 0.038232795242141036, - "grad_norm": 0.5088948607444763, - "learning_rate": 6.365614798694233e-06, - "loss": 0.4246, - "step": 585 - }, - { - "epoch": 0.03829815044768316, - "grad_norm": 0.5613812804222107, - "learning_rate": 6.376496191512514e-06, - "loss": 0.4443, - "step": 586 - }, - { - "epoch": 0.03836350565322528, - "grad_norm": 0.5567344427108765, - "learning_rate": 6.387377584330795e-06, - "loss": 0.5085, - "step": 587 - }, - { - "epoch": 0.0384288608587674, - "grad_norm": 0.49491986632347107, - "learning_rate": 6.398258977149075e-06, - "loss": 0.3923, - "step": 588 - }, - { - "epoch": 0.03849421606430952, - "grad_norm": 0.5684411525726318, - "learning_rate": 6.409140369967356e-06, - "loss": 0.4444, - "step": 589 - }, - { - "epoch": 0.03855957126985164, - "grad_norm": 0.5668994784355164, - "learning_rate": 6.420021762785638e-06, - "loss": 0.4558, - "step": 590 - }, - { - "epoch": 0.03862492647539376, - "grad_norm": 0.5855531096458435, - "learning_rate": 6.430903155603918e-06, - "loss": 0.4668, - "step": 591 - }, - { - "epoch": 0.038690281680935884, - "grad_norm": 0.5380173921585083, - "learning_rate": 6.441784548422199e-06, - "loss": 0.471, - "step": 592 - }, - { - "epoch": 0.038755636886478005, - "grad_norm": 0.5691683292388916, - "learning_rate": 6.452665941240479e-06, - "loss": 0.4829, - "step": 593 - }, - { - "epoch": 0.038820992092020126, - "grad_norm": 0.607672393321991, - "learning_rate": 6.4635473340587595e-06, - "loss": 0.4983, - "step": 594 - }, - { - "epoch": 0.038886347297562254, - "grad_norm": 0.5337852239608765, - "learning_rate": 6.474428726877041e-06, - "loss": 0.441, - "step": 595 - }, - { - "epoch": 0.038951702503104375, - "grad_norm": 0.5541263222694397, - "learning_rate": 6.485310119695321e-06, - "loss": 0.4492, - "step": 596 - }, - { - "epoch": 0.039017057708646496, - "grad_norm": 0.566577672958374, - "learning_rate": 6.496191512513602e-06, - "loss": 0.5324, - "step": 597 - }, - { - "epoch": 0.03908241291418862, - "grad_norm": 0.5563501715660095, - "learning_rate": 6.507072905331883e-06, - "loss": 0.4868, - "step": 598 - }, - { - "epoch": 0.03914776811973074, - "grad_norm": 0.5898980498313904, - "learning_rate": 6.517954298150164e-06, - "loss": 0.4476, - "step": 599 - }, - { - "epoch": 0.03921312332527286, - "grad_norm": 0.5672775506973267, - "learning_rate": 6.528835690968445e-06, - "loss": 0.4852, - "step": 600 - }, - { - "epoch": 0.03927847853081498, - "grad_norm": 0.5669152736663818, - "learning_rate": 6.5397170837867255e-06, - "loss": 0.4813, - "step": 601 - }, - { - "epoch": 0.0393438337363571, - "grad_norm": 0.5214797854423523, - "learning_rate": 6.5505984766050056e-06, - "loss": 0.4511, - "step": 602 - }, - { - "epoch": 0.03940918894189922, - "grad_norm": 0.519077479839325, - "learning_rate": 6.561479869423286e-06, - "loss": 0.4049, - "step": 603 - }, - { - "epoch": 0.039474544147441344, - "grad_norm": 0.5328835844993591, - "learning_rate": 6.572361262241568e-06, - "loss": 0.4301, - "step": 604 - }, - { - "epoch": 0.039539899352983465, - "grad_norm": 0.5590944290161133, - "learning_rate": 6.583242655059848e-06, - "loss": 0.4755, - "step": 605 - }, - { - "epoch": 0.039605254558525586, - "grad_norm": 0.5169242024421692, - "learning_rate": 6.594124047878129e-06, - "loss": 0.3647, - "step": 606 - }, - { - "epoch": 0.03967060976406771, - "grad_norm": 0.5104243159294128, - "learning_rate": 6.605005440696409e-06, - "loss": 0.4208, - "step": 607 - }, - { - "epoch": 0.03973596496960983, - "grad_norm": 0.4833294153213501, - "learning_rate": 6.615886833514691e-06, - "loss": 0.41, - "step": 608 - }, - { - "epoch": 0.03980132017515195, - "grad_norm": 0.5645463466644287, - "learning_rate": 6.6267682263329716e-06, - "loss": 0.4371, - "step": 609 - }, - { - "epoch": 0.03986667538069407, - "grad_norm": 0.4820958375930786, - "learning_rate": 6.637649619151252e-06, - "loss": 0.3911, - "step": 610 - }, - { - "epoch": 0.03993203058623619, - "grad_norm": 0.5233252644538879, - "learning_rate": 6.6485310119695324e-06, - "loss": 0.4235, - "step": 611 - }, - { - "epoch": 0.03999738579177831, - "grad_norm": 0.5318486094474792, - "learning_rate": 6.659412404787814e-06, - "loss": 0.4399, - "step": 612 - }, - { - "epoch": 0.040062740997320434, - "grad_norm": 0.5524714589118958, - "learning_rate": 6.670293797606094e-06, - "loss": 0.503, - "step": 613 - }, - { - "epoch": 0.040128096202862555, - "grad_norm": 0.48769381642341614, - "learning_rate": 6.681175190424375e-06, - "loss": 0.4067, - "step": 614 - }, - { - "epoch": 0.040193451408404676, - "grad_norm": 0.5265127420425415, - "learning_rate": 6.692056583242655e-06, - "loss": 0.4746, - "step": 615 - }, - { - "epoch": 0.040258806613946804, - "grad_norm": 0.49916163086891174, - "learning_rate": 6.702937976060936e-06, - "loss": 0.3907, - "step": 616 - }, - { - "epoch": 0.040324161819488925, - "grad_norm": 0.5203204154968262, - "learning_rate": 6.713819368879218e-06, - "loss": 0.4469, - "step": 617 - }, - { - "epoch": 0.040389517025031046, - "grad_norm": 0.5336718559265137, - "learning_rate": 6.724700761697498e-06, - "loss": 0.4842, - "step": 618 - }, - { - "epoch": 0.04045487223057317, - "grad_norm": 0.5118704438209534, - "learning_rate": 6.7355821545157785e-06, - "loss": 0.4224, - "step": 619 - }, - { - "epoch": 0.04052022743611529, - "grad_norm": 0.5074040293693542, - "learning_rate": 6.746463547334059e-06, - "loss": 0.4409, - "step": 620 - }, - { - "epoch": 0.04058558264165741, - "grad_norm": 0.5172423124313354, - "learning_rate": 6.75734494015234e-06, - "loss": 0.432, - "step": 621 - }, - { - "epoch": 0.04065093784719953, - "grad_norm": 0.5750128030776978, - "learning_rate": 6.768226332970621e-06, - "loss": 0.5288, - "step": 622 - }, - { - "epoch": 0.04071629305274165, - "grad_norm": 0.5487008094787598, - "learning_rate": 6.779107725788902e-06, - "loss": 0.4392, - "step": 623 - }, - { - "epoch": 0.04078164825828377, - "grad_norm": 0.5502815246582031, - "learning_rate": 6.789989118607182e-06, - "loss": 0.4358, - "step": 624 - }, - { - "epoch": 0.040847003463825894, - "grad_norm": 0.5159929394721985, - "learning_rate": 6.800870511425463e-06, - "loss": 0.4346, - "step": 625 - }, - { - "epoch": 0.040912358669368015, - "grad_norm": 0.5169581770896912, - "learning_rate": 6.8117519042437445e-06, - "loss": 0.4549, - "step": 626 - }, - { - "epoch": 0.040977713874910136, - "grad_norm": 0.5334294438362122, - "learning_rate": 6.8226332970620245e-06, - "loss": 0.4371, - "step": 627 - }, - { - "epoch": 0.04104306908045226, - "grad_norm": 0.5245968699455261, - "learning_rate": 6.833514689880305e-06, - "loss": 0.4437, - "step": 628 - }, - { - "epoch": 0.04110842428599438, - "grad_norm": 0.5483084917068481, - "learning_rate": 6.844396082698585e-06, - "loss": 0.438, - "step": 629 - }, - { - "epoch": 0.0411737794915365, - "grad_norm": 0.5519688725471497, - "learning_rate": 6.855277475516867e-06, - "loss": 0.4163, - "step": 630 - }, - { - "epoch": 0.04123913469707862, - "grad_norm": 0.531467080116272, - "learning_rate": 6.866158868335148e-06, - "loss": 0.4302, - "step": 631 - }, - { - "epoch": 0.04130448990262074, - "grad_norm": 0.542911171913147, - "learning_rate": 6.877040261153428e-06, - "loss": 0.4758, - "step": 632 - }, - { - "epoch": 0.04136984510816286, - "grad_norm": 0.514519214630127, - "learning_rate": 6.887921653971709e-06, - "loss": 0.4265, - "step": 633 - }, - { - "epoch": 0.041435200313704984, - "grad_norm": 0.5962108969688416, - "learning_rate": 6.898803046789989e-06, - "loss": 0.4733, - "step": 634 - }, - { - "epoch": 0.041500555519247105, - "grad_norm": 0.5529797077178955, - "learning_rate": 6.9096844396082705e-06, - "loss": 0.4656, - "step": 635 - }, - { - "epoch": 0.041565910724789226, - "grad_norm": 0.4788876175880432, - "learning_rate": 6.920565832426551e-06, - "loss": 0.3523, - "step": 636 - }, - { - "epoch": 0.041631265930331354, - "grad_norm": 0.5504601001739502, - "learning_rate": 6.931447225244831e-06, - "loss": 0.4427, - "step": 637 - }, - { - "epoch": 0.041696621135873475, - "grad_norm": 0.5427254438400269, - "learning_rate": 6.942328618063112e-06, - "loss": 0.4463, - "step": 638 - }, - { - "epoch": 0.041761976341415596, - "grad_norm": 0.47728046774864197, - "learning_rate": 6.953210010881394e-06, - "loss": 0.4082, - "step": 639 - }, - { - "epoch": 0.04182733154695772, - "grad_norm": 0.566616415977478, - "learning_rate": 6.964091403699674e-06, - "loss": 0.4286, - "step": 640 - }, - { - "epoch": 0.04189268675249984, - "grad_norm": 0.5331923365592957, - "learning_rate": 6.974972796517955e-06, - "loss": 0.4229, - "step": 641 - }, - { - "epoch": 0.04195804195804196, - "grad_norm": 0.4955950677394867, - "learning_rate": 6.985854189336235e-06, - "loss": 0.4257, - "step": 642 - }, - { - "epoch": 0.04202339716358408, - "grad_norm": 0.5213703513145447, - "learning_rate": 6.9967355821545166e-06, - "loss": 0.466, - "step": 643 - }, - { - "epoch": 0.0420887523691262, - "grad_norm": 0.5337159037590027, - "learning_rate": 7.0076169749727974e-06, - "loss": 0.4539, - "step": 644 - }, - { - "epoch": 0.04215410757466832, - "grad_norm": 0.5224979519844055, - "learning_rate": 7.0184983677910774e-06, - "loss": 0.4384, - "step": 645 - }, - { - "epoch": 0.042219462780210444, - "grad_norm": 0.5413781404495239, - "learning_rate": 7.029379760609358e-06, - "loss": 0.4687, - "step": 646 - }, - { - "epoch": 0.042284817985752565, - "grad_norm": 0.553650975227356, - "learning_rate": 7.040261153427639e-06, - "loss": 0.477, - "step": 647 - }, - { - "epoch": 0.042350173191294686, - "grad_norm": 0.5719106793403625, - "learning_rate": 7.05114254624592e-06, - "loss": 0.4478, - "step": 648 - }, - { - "epoch": 0.04241552839683681, - "grad_norm": 0.48158884048461914, - "learning_rate": 7.062023939064201e-06, - "loss": 0.3995, - "step": 649 - }, - { - "epoch": 0.04248088360237893, - "grad_norm": 0.5439937710762024, - "learning_rate": 7.072905331882482e-06, - "loss": 0.4142, - "step": 650 - }, - { - "epoch": 0.04254623880792105, - "grad_norm": 0.570808470249176, - "learning_rate": 7.083786724700762e-06, - "loss": 0.4679, - "step": 651 - }, - { - "epoch": 0.04261159401346317, - "grad_norm": 0.5186165571212769, - "learning_rate": 7.0946681175190435e-06, - "loss": 0.48, - "step": 652 - }, - { - "epoch": 0.04267694921900529, - "grad_norm": 0.5627509355545044, - "learning_rate": 7.105549510337324e-06, - "loss": 0.4676, - "step": 653 - }, - { - "epoch": 0.04274230442454741, - "grad_norm": 0.5147085785865784, - "learning_rate": 7.116430903155604e-06, - "loss": 0.4241, - "step": 654 - }, - { - "epoch": 0.042807659630089534, - "grad_norm": 0.5302978754043579, - "learning_rate": 7.127312295973885e-06, - "loss": 0.4572, - "step": 655 - }, - { - "epoch": 0.042873014835631655, - "grad_norm": 0.5276473164558411, - "learning_rate": 7.138193688792165e-06, - "loss": 0.4091, - "step": 656 - }, - { - "epoch": 0.042938370041173776, - "grad_norm": 0.598788321018219, - "learning_rate": 7.149075081610447e-06, - "loss": 0.4061, - "step": 657 - }, - { - "epoch": 0.043003725246715904, - "grad_norm": 0.6199572682380676, - "learning_rate": 7.159956474428728e-06, - "loss": 0.5387, - "step": 658 - }, - { - "epoch": 0.043069080452258025, - "grad_norm": 0.5637344121932983, - "learning_rate": 7.170837867247008e-06, - "loss": 0.4928, - "step": 659 - }, - { - "epoch": 0.043134435657800146, - "grad_norm": 0.5307586789131165, - "learning_rate": 7.181719260065289e-06, - "loss": 0.438, - "step": 660 - }, - { - "epoch": 0.04319979086334227, - "grad_norm": 0.5489634871482849, - "learning_rate": 7.19260065288357e-06, - "loss": 0.4022, - "step": 661 - }, - { - "epoch": 0.04326514606888439, - "grad_norm": 0.5737041234970093, - "learning_rate": 7.20348204570185e-06, - "loss": 0.4616, - "step": 662 - }, - { - "epoch": 0.04333050127442651, - "grad_norm": 0.5683348774909973, - "learning_rate": 7.214363438520131e-06, - "loss": 0.45, - "step": 663 - }, - { - "epoch": 0.04339585647996863, - "grad_norm": 0.5592344403266907, - "learning_rate": 7.225244831338411e-06, - "loss": 0.4015, - "step": 664 - }, - { - "epoch": 0.04346121168551075, - "grad_norm": 0.6093730330467224, - "learning_rate": 7.236126224156693e-06, - "loss": 0.4693, - "step": 665 - }, - { - "epoch": 0.04352656689105287, - "grad_norm": 0.5761213898658752, - "learning_rate": 7.247007616974974e-06, - "loss": 0.4408, - "step": 666 - }, - { - "epoch": 0.043591922096594994, - "grad_norm": 0.5475156307220459, - "learning_rate": 7.257889009793254e-06, - "loss": 0.481, - "step": 667 - }, - { - "epoch": 0.043657277302137115, - "grad_norm": 0.5181083083152771, - "learning_rate": 7.268770402611535e-06, - "loss": 0.4391, - "step": 668 - }, - { - "epoch": 0.043722632507679236, - "grad_norm": 0.574974000453949, - "learning_rate": 7.2796517954298155e-06, - "loss": 0.4821, - "step": 669 - }, - { - "epoch": 0.04378798771322136, - "grad_norm": 0.6030741930007935, - "learning_rate": 7.290533188248096e-06, - "loss": 0.457, - "step": 670 - }, - { - "epoch": 0.04385334291876348, - "grad_norm": 0.5326198935508728, - "learning_rate": 7.301414581066377e-06, - "loss": 0.4408, - "step": 671 - }, - { - "epoch": 0.0439186981243056, - "grad_norm": 0.5482437610626221, - "learning_rate": 7.312295973884658e-06, - "loss": 0.4234, - "step": 672 - }, - { - "epoch": 0.04398405332984772, - "grad_norm": 0.5907866954803467, - "learning_rate": 7.323177366702938e-06, - "loss": 0.4332, - "step": 673 - }, - { - "epoch": 0.04404940853538984, - "grad_norm": 0.5776271820068359, - "learning_rate": 7.33405875952122e-06, - "loss": 0.4587, - "step": 674 - }, - { - "epoch": 0.04411476374093196, - "grad_norm": 0.5511412620544434, - "learning_rate": 7.344940152339501e-06, - "loss": 0.451, - "step": 675 - }, - { - "epoch": 0.044180118946474084, - "grad_norm": 0.5105332136154175, - "learning_rate": 7.355821545157781e-06, - "loss": 0.4066, - "step": 676 - }, - { - "epoch": 0.044245474152016205, - "grad_norm": 0.5682520270347595, - "learning_rate": 7.3667029379760616e-06, - "loss": 0.4715, - "step": 677 - }, - { - "epoch": 0.044310829357558326, - "grad_norm": 0.6172433495521545, - "learning_rate": 7.377584330794342e-06, - "loss": 0.53, - "step": 678 - }, - { - "epoch": 0.044376184563100454, - "grad_norm": 0.5491930842399597, - "learning_rate": 7.388465723612623e-06, - "loss": 0.4559, - "step": 679 - }, - { - "epoch": 0.044441539768642575, - "grad_norm": 0.5460163950920105, - "learning_rate": 7.399347116430904e-06, - "loss": 0.4509, - "step": 680 - }, - { - "epoch": 0.044506894974184696, - "grad_norm": 0.5838077664375305, - "learning_rate": 7.410228509249184e-06, - "loss": 0.5506, - "step": 681 - }, - { - "epoch": 0.04457225017972682, - "grad_norm": 0.5743003487586975, - "learning_rate": 7.421109902067465e-06, - "loss": 0.4691, - "step": 682 - }, - { - "epoch": 0.04463760538526894, - "grad_norm": 0.507209837436676, - "learning_rate": 7.431991294885747e-06, - "loss": 0.3838, - "step": 683 - }, - { - "epoch": 0.04470296059081106, - "grad_norm": 0.5423256158828735, - "learning_rate": 7.442872687704027e-06, - "loss": 0.4069, - "step": 684 - }, - { - "epoch": 0.04476831579635318, - "grad_norm": 0.5516565442085266, - "learning_rate": 7.453754080522308e-06, - "loss": 0.4655, - "step": 685 - }, - { - "epoch": 0.0448336710018953, - "grad_norm": 0.5188184380531311, - "learning_rate": 7.464635473340588e-06, - "loss": 0.4598, - "step": 686 - }, - { - "epoch": 0.04489902620743742, - "grad_norm": 0.5910035967826843, - "learning_rate": 7.4755168661588685e-06, - "loss": 0.48, - "step": 687 - }, - { - "epoch": 0.044964381412979544, - "grad_norm": 0.5000728964805603, - "learning_rate": 7.48639825897715e-06, - "loss": 0.3985, - "step": 688 - }, - { - "epoch": 0.045029736618521665, - "grad_norm": 1.4668203592300415, - "learning_rate": 7.49727965179543e-06, - "loss": 0.5042, - "step": 689 - }, - { - "epoch": 0.045095091824063786, - "grad_norm": 0.5863484144210815, - "learning_rate": 7.508161044613711e-06, - "loss": 0.4646, - "step": 690 - }, - { - "epoch": 0.04516044702960591, - "grad_norm": 0.5586270093917847, - "learning_rate": 7.519042437431991e-06, - "loss": 0.3939, - "step": 691 - }, - { - "epoch": 0.04522580223514803, - "grad_norm": 0.5796319842338562, - "learning_rate": 7.529923830250273e-06, - "loss": 0.471, - "step": 692 - }, - { - "epoch": 0.04529115744069015, - "grad_norm": 0.5738232731819153, - "learning_rate": 7.540805223068554e-06, - "loss": 0.4838, - "step": 693 - }, - { - "epoch": 0.04535651264623227, - "grad_norm": 0.531330943107605, - "learning_rate": 7.551686615886834e-06, - "loss": 0.4456, - "step": 694 - }, - { - "epoch": 0.04542186785177439, - "grad_norm": 0.5848109126091003, - "learning_rate": 7.5625680087051145e-06, - "loss": 0.4741, - "step": 695 - }, - { - "epoch": 0.04548722305731651, - "grad_norm": 0.5010263919830322, - "learning_rate": 7.573449401523396e-06, - "loss": 0.459, - "step": 696 - }, - { - "epoch": 0.045552578262858634, - "grad_norm": 0.5514805316925049, - "learning_rate": 7.584330794341676e-06, - "loss": 0.4871, - "step": 697 - }, - { - "epoch": 0.045617933468400755, - "grad_norm": 0.5259074568748474, - "learning_rate": 7.595212187159957e-06, - "loss": 0.4667, - "step": 698 - }, - { - "epoch": 0.045683288673942876, - "grad_norm": 0.5424180030822754, - "learning_rate": 7.606093579978238e-06, - "loss": 0.457, - "step": 699 - }, - { - "epoch": 0.045748643879485004, - "grad_norm": 0.5486298203468323, - "learning_rate": 7.616974972796518e-06, - "loss": 0.5152, - "step": 700 - }, - { - "epoch": 0.045813999085027125, - "grad_norm": 0.5700769424438477, - "learning_rate": 7.6278563656148e-06, - "loss": 0.5096, - "step": 701 - }, - { - "epoch": 0.045879354290569246, - "grad_norm": 0.5538732409477234, - "learning_rate": 7.63873775843308e-06, - "loss": 0.4997, - "step": 702 - }, - { - "epoch": 0.04594470949611137, - "grad_norm": 0.5293434262275696, - "learning_rate": 7.64961915125136e-06, - "loss": 0.4286, - "step": 703 - }, - { - "epoch": 0.04601006470165349, - "grad_norm": 0.5087040662765503, - "learning_rate": 7.66050054406964e-06, - "loss": 0.4463, - "step": 704 - }, - { - "epoch": 0.04607541990719561, - "grad_norm": 0.5482538938522339, - "learning_rate": 7.671381936887922e-06, - "loss": 0.4553, - "step": 705 - }, - { - "epoch": 0.04614077511273773, - "grad_norm": 0.5640394687652588, - "learning_rate": 7.682263329706204e-06, - "loss": 0.4935, - "step": 706 - }, - { - "epoch": 0.04620613031827985, - "grad_norm": 0.49894604086875916, - "learning_rate": 7.693144722524484e-06, - "loss": 0.4091, - "step": 707 - }, - { - "epoch": 0.04627148552382197, - "grad_norm": 0.5695271492004395, - "learning_rate": 7.704026115342764e-06, - "loss": 0.4704, - "step": 708 - }, - { - "epoch": 0.046336840729364094, - "grad_norm": 0.5633416771888733, - "learning_rate": 7.714907508161044e-06, - "loss": 0.4555, - "step": 709 - }, - { - "epoch": 0.046402195934906215, - "grad_norm": 0.6127053499221802, - "learning_rate": 7.725788900979326e-06, - "loss": 0.4554, - "step": 710 - }, - { - "epoch": 0.046467551140448336, - "grad_norm": 0.4968777298927307, - "learning_rate": 7.736670293797607e-06, - "loss": 0.3906, - "step": 711 - }, - { - "epoch": 0.04653290634599046, - "grad_norm": 0.5375694632530212, - "learning_rate": 7.747551686615887e-06, - "loss": 0.4089, - "step": 712 - }, - { - "epoch": 0.04659826155153258, - "grad_norm": 0.6223379373550415, - "learning_rate": 7.758433079434167e-06, - "loss": 0.4875, - "step": 713 - }, - { - "epoch": 0.0466636167570747, - "grad_norm": 0.48704859614372253, - "learning_rate": 7.769314472252449e-06, - "loss": 0.3397, - "step": 714 - }, - { - "epoch": 0.04672897196261682, - "grad_norm": 0.5273993611335754, - "learning_rate": 7.780195865070729e-06, - "loss": 0.4646, - "step": 715 - }, - { - "epoch": 0.04679432716815894, - "grad_norm": 0.563279390335083, - "learning_rate": 7.791077257889011e-06, - "loss": 0.514, - "step": 716 - }, - { - "epoch": 0.04685968237370106, - "grad_norm": 0.5412565469741821, - "learning_rate": 7.801958650707291e-06, - "loss": 0.4696, - "step": 717 - }, - { - "epoch": 0.046925037579243184, - "grad_norm": 0.5288301706314087, - "learning_rate": 7.812840043525573e-06, - "loss": 0.4215, - "step": 718 - }, - { - "epoch": 0.046990392784785305, - "grad_norm": 0.4975229501724243, - "learning_rate": 7.823721436343853e-06, - "loss": 0.4005, - "step": 719 - }, - { - "epoch": 0.047055747990327426, - "grad_norm": 0.5747808814048767, - "learning_rate": 7.834602829162133e-06, - "loss": 0.469, - "step": 720 - }, - { - "epoch": 0.047121103195869554, - "grad_norm": 0.5444490909576416, - "learning_rate": 7.845484221980414e-06, - "loss": 0.489, - "step": 721 - }, - { - "epoch": 0.047186458401411675, - "grad_norm": 0.5794644951820374, - "learning_rate": 7.856365614798694e-06, - "loss": 0.4856, - "step": 722 - }, - { - "epoch": 0.047251813606953796, - "grad_norm": 0.5320571064949036, - "learning_rate": 7.867247007616976e-06, - "loss": 0.4433, - "step": 723 - }, - { - "epoch": 0.04731716881249592, - "grad_norm": 0.501530647277832, - "learning_rate": 7.878128400435256e-06, - "loss": 0.412, - "step": 724 - }, - { - "epoch": 0.04738252401803804, - "grad_norm": 0.5136852860450745, - "learning_rate": 7.889009793253538e-06, - "loss": 0.435, - "step": 725 - }, - { - "epoch": 0.04744787922358016, - "grad_norm": 0.5464115142822266, - "learning_rate": 7.899891186071818e-06, - "loss": 0.4377, - "step": 726 - }, - { - "epoch": 0.04751323442912228, - "grad_norm": 0.5368839502334595, - "learning_rate": 7.9107725788901e-06, - "loss": 0.4023, - "step": 727 - }, - { - "epoch": 0.0475785896346644, - "grad_norm": 0.5113937854766846, - "learning_rate": 7.92165397170838e-06, - "loss": 0.4257, - "step": 728 - }, - { - "epoch": 0.04764394484020652, - "grad_norm": 0.5719674229621887, - "learning_rate": 7.93253536452666e-06, - "loss": 0.4748, - "step": 729 - }, - { - "epoch": 0.047709300045748644, - "grad_norm": 0.5421920418739319, - "learning_rate": 7.943416757344941e-06, - "loss": 0.4914, - "step": 730 - }, - { - "epoch": 0.047774655251290765, - "grad_norm": 0.5723292231559753, - "learning_rate": 7.954298150163221e-06, - "loss": 0.4412, - "step": 731 - }, - { - "epoch": 0.047840010456832886, - "grad_norm": 0.5472431778907776, - "learning_rate": 7.965179542981503e-06, - "loss": 0.4433, - "step": 732 - }, - { - "epoch": 0.04790536566237501, - "grad_norm": 0.600566565990448, - "learning_rate": 7.976060935799783e-06, - "loss": 0.5535, - "step": 733 - }, - { - "epoch": 0.04797072086791713, - "grad_norm": 0.5550974011421204, - "learning_rate": 7.986942328618063e-06, - "loss": 0.4749, - "step": 734 - }, - { - "epoch": 0.04803607607345925, - "grad_norm": 0.5856187343597412, - "learning_rate": 7.997823721436345e-06, - "loss": 0.5042, - "step": 735 - }, - { - "epoch": 0.04810143127900137, - "grad_norm": 0.5473790168762207, - "learning_rate": 8.008705114254626e-06, - "loss": 0.487, - "step": 736 - }, - { - "epoch": 0.04816678648454349, - "grad_norm": 0.549591064453125, - "learning_rate": 8.019586507072906e-06, - "loss": 0.4657, - "step": 737 - }, - { - "epoch": 0.04823214169008561, - "grad_norm": 0.5372916460037231, - "learning_rate": 8.030467899891186e-06, - "loss": 0.4445, - "step": 738 - }, - { - "epoch": 0.048297496895627734, - "grad_norm": 0.5481677651405334, - "learning_rate": 8.041349292709466e-06, - "loss": 0.4527, - "step": 739 - }, - { - "epoch": 0.048362852101169855, - "grad_norm": 0.5766710638999939, - "learning_rate": 8.052230685527748e-06, - "loss": 0.4772, - "step": 740 - }, - { - "epoch": 0.048428207306711976, - "grad_norm": 0.5556133985519409, - "learning_rate": 8.06311207834603e-06, - "loss": 0.3829, - "step": 741 - }, - { - "epoch": 0.048493562512254104, - "grad_norm": 0.5223917365074158, - "learning_rate": 8.07399347116431e-06, - "loss": 0.4482, - "step": 742 - }, - { - "epoch": 0.048558917717796225, - "grad_norm": 0.5459646582603455, - "learning_rate": 8.08487486398259e-06, - "loss": 0.4289, - "step": 743 - }, - { - "epoch": 0.048624272923338346, - "grad_norm": 0.5293027758598328, - "learning_rate": 8.095756256800872e-06, - "loss": 0.4199, - "step": 744 - }, - { - "epoch": 0.04868962812888047, - "grad_norm": 0.5380204319953918, - "learning_rate": 8.106637649619152e-06, - "loss": 0.4665, - "step": 745 - }, - { - "epoch": 0.04875498333442259, - "grad_norm": 0.5344138145446777, - "learning_rate": 8.117519042437433e-06, - "loss": 0.4722, - "step": 746 - }, - { - "epoch": 0.04882033853996471, - "grad_norm": 0.518205463886261, - "learning_rate": 8.128400435255713e-06, - "loss": 0.4425, - "step": 747 - }, - { - "epoch": 0.04888569374550683, - "grad_norm": 0.5475545525550842, - "learning_rate": 8.139281828073993e-06, - "loss": 0.4778, - "step": 748 - }, - { - "epoch": 0.04895104895104895, - "grad_norm": 0.57927006483078, - "learning_rate": 8.150163220892275e-06, - "loss": 0.4909, - "step": 749 - }, - { - "epoch": 0.04901640415659107, - "grad_norm": 0.5073854923248291, - "learning_rate": 8.161044613710557e-06, - "loss": 0.4322, - "step": 750 - }, - { - "epoch": 0.049081759362133194, - "grad_norm": 0.5383774638175964, - "learning_rate": 8.171926006528837e-06, - "loss": 0.4194, - "step": 751 - }, - { - "epoch": 0.049147114567675315, - "grad_norm": 0.5598601698875427, - "learning_rate": 8.182807399347117e-06, - "loss": 0.4452, - "step": 752 - }, - { - "epoch": 0.049212469773217436, - "grad_norm": 0.5418949127197266, - "learning_rate": 8.193688792165397e-06, - "loss": 0.4533, - "step": 753 - }, - { - "epoch": 0.04927782497875956, - "grad_norm": 0.5240333080291748, - "learning_rate": 8.204570184983678e-06, - "loss": 0.4535, - "step": 754 - }, - { - "epoch": 0.04934318018430168, - "grad_norm": 0.6091196537017822, - "learning_rate": 8.21545157780196e-06, - "loss": 0.4184, - "step": 755 - }, - { - "epoch": 0.0494085353898438, - "grad_norm": 0.5421688556671143, - "learning_rate": 8.22633297062024e-06, - "loss": 0.4393, - "step": 756 - }, - { - "epoch": 0.04947389059538592, - "grad_norm": 0.5601967573165894, - "learning_rate": 8.23721436343852e-06, - "loss": 0.4746, - "step": 757 - }, - { - "epoch": 0.04953924580092804, - "grad_norm": 0.5984824895858765, - "learning_rate": 8.248095756256802e-06, - "loss": 0.5617, - "step": 758 - }, - { - "epoch": 0.04960460100647016, - "grad_norm": 0.5614069104194641, - "learning_rate": 8.258977149075082e-06, - "loss": 0.5006, - "step": 759 - }, - { - "epoch": 0.049669956212012284, - "grad_norm": 0.559607207775116, - "learning_rate": 8.269858541893364e-06, - "loss": 0.4674, - "step": 760 - }, - { - "epoch": 0.049735311417554405, - "grad_norm": 0.5390970706939697, - "learning_rate": 8.280739934711644e-06, - "loss": 0.4281, - "step": 761 - }, - { - "epoch": 0.04980066662309653, - "grad_norm": 0.5136409997940063, - "learning_rate": 8.291621327529924e-06, - "loss": 0.4185, - "step": 762 - }, - { - "epoch": 0.049866021828638654, - "grad_norm": 0.506865918636322, - "learning_rate": 8.302502720348205e-06, - "loss": 0.4401, - "step": 763 - }, - { - "epoch": 0.049931377034180775, - "grad_norm": 0.5017704367637634, - "learning_rate": 8.313384113166485e-06, - "loss": 0.3789, - "step": 764 - }, - { - "epoch": 0.049996732239722896, - "grad_norm": 0.5536909699440002, - "learning_rate": 8.324265505984767e-06, - "loss": 0.4543, - "step": 765 - }, - { - "epoch": 0.05006208744526502, - "grad_norm": 0.5310081839561462, - "learning_rate": 8.335146898803047e-06, - "loss": 0.4019, - "step": 766 - }, - { - "epoch": 0.05012744265080714, - "grad_norm": 0.5387538075447083, - "learning_rate": 8.346028291621329e-06, - "loss": 0.4151, - "step": 767 - }, - { - "epoch": 0.05019279785634926, - "grad_norm": 0.5525701642036438, - "learning_rate": 8.356909684439609e-06, - "loss": 0.4788, - "step": 768 - }, - { - "epoch": 0.05025815306189138, - "grad_norm": 0.557262659072876, - "learning_rate": 8.367791077257889e-06, - "loss": 0.447, - "step": 769 - }, - { - "epoch": 0.0503235082674335, - "grad_norm": 0.5567659735679626, - "learning_rate": 8.37867247007617e-06, - "loss": 0.4929, - "step": 770 - }, - { - "epoch": 0.05038886347297562, - "grad_norm": 0.5414775013923645, - "learning_rate": 8.389553862894452e-06, - "loss": 0.4282, - "step": 771 - }, - { - "epoch": 0.050454218678517744, - "grad_norm": 0.6173386573791504, - "learning_rate": 8.400435255712732e-06, - "loss": 0.5012, - "step": 772 - }, - { - "epoch": 0.050519573884059865, - "grad_norm": 0.5573265552520752, - "learning_rate": 8.411316648531012e-06, - "loss": 0.4603, - "step": 773 - }, - { - "epoch": 0.050584929089601986, - "grad_norm": 0.5525687336921692, - "learning_rate": 8.422198041349294e-06, - "loss": 0.4163, - "step": 774 - }, - { - "epoch": 0.05065028429514411, - "grad_norm": 0.5632034540176392, - "learning_rate": 8.433079434167574e-06, - "loss": 0.4471, - "step": 775 - }, - { - "epoch": 0.05071563950068623, - "grad_norm": 0.4934619665145874, - "learning_rate": 8.443960826985856e-06, - "loss": 0.3795, - "step": 776 - }, - { - "epoch": 0.05078099470622835, - "grad_norm": 0.5690104961395264, - "learning_rate": 8.454842219804136e-06, - "loss": 0.4735, - "step": 777 - }, - { - "epoch": 0.05084634991177047, - "grad_norm": 0.5554296970367432, - "learning_rate": 8.465723612622416e-06, - "loss": 0.474, - "step": 778 - }, - { - "epoch": 0.05091170511731259, - "grad_norm": 0.5207845568656921, - "learning_rate": 8.476605005440697e-06, - "loss": 0.3919, - "step": 779 - }, - { - "epoch": 0.05097706032285471, - "grad_norm": 0.5561384558677673, - "learning_rate": 8.487486398258979e-06, - "loss": 0.5127, - "step": 780 - }, - { - "epoch": 0.051042415528396834, - "grad_norm": 0.516481339931488, - "learning_rate": 8.498367791077259e-06, - "loss": 0.4004, - "step": 781 - }, - { - "epoch": 0.051107770733938955, - "grad_norm": 0.538422167301178, - "learning_rate": 8.509249183895539e-06, - "loss": 0.4719, - "step": 782 - }, - { - "epoch": 0.05117312593948108, - "grad_norm": 0.5367478132247925, - "learning_rate": 8.520130576713819e-06, - "loss": 0.4235, - "step": 783 - }, - { - "epoch": 0.051238481145023204, - "grad_norm": 0.5337499380111694, - "learning_rate": 8.531011969532101e-06, - "loss": 0.4185, - "step": 784 - }, - { - "epoch": 0.051303836350565325, - "grad_norm": 0.5809076428413391, - "learning_rate": 8.541893362350383e-06, - "loss": 0.5318, - "step": 785 - }, - { - "epoch": 0.051369191556107446, - "grad_norm": 0.5143489241600037, - "learning_rate": 8.552774755168663e-06, - "loss": 0.4697, - "step": 786 - }, - { - "epoch": 0.05143454676164957, - "grad_norm": 0.5569016337394714, - "learning_rate": 8.563656147986943e-06, - "loss": 0.4646, - "step": 787 - }, - { - "epoch": 0.05149990196719169, - "grad_norm": 0.5275742411613464, - "learning_rate": 8.574537540805223e-06, - "loss": 0.4293, - "step": 788 - }, - { - "epoch": 0.05156525717273381, - "grad_norm": 0.5588119029998779, - "learning_rate": 8.585418933623504e-06, - "loss": 0.3993, - "step": 789 - }, - { - "epoch": 0.05163061237827593, - "grad_norm": 0.5158429741859436, - "learning_rate": 8.596300326441786e-06, - "loss": 0.468, - "step": 790 - }, - { - "epoch": 0.05169596758381805, - "grad_norm": 0.5410779118537903, - "learning_rate": 8.607181719260066e-06, - "loss": 0.5098, - "step": 791 - }, - { - "epoch": 0.05176132278936017, - "grad_norm": 0.5506214499473572, - "learning_rate": 8.618063112078346e-06, - "loss": 0.4229, - "step": 792 - }, - { - "epoch": 0.051826677994902294, - "grad_norm": 0.5361695885658264, - "learning_rate": 8.628944504896628e-06, - "loss": 0.4313, - "step": 793 - }, - { - "epoch": 0.051892033200444415, - "grad_norm": 0.5345651507377625, - "learning_rate": 8.639825897714908e-06, - "loss": 0.4366, - "step": 794 - }, - { - "epoch": 0.051957388405986536, - "grad_norm": 0.5610102415084839, - "learning_rate": 8.65070729053319e-06, - "loss": 0.4524, - "step": 795 - }, - { - "epoch": 0.05202274361152866, - "grad_norm": 0.5412043333053589, - "learning_rate": 8.66158868335147e-06, - "loss": 0.427, - "step": 796 - }, - { - "epoch": 0.05208809881707078, - "grad_norm": 0.5883796811103821, - "learning_rate": 8.67247007616975e-06, - "loss": 0.4357, - "step": 797 - }, - { - "epoch": 0.0521534540226129, - "grad_norm": 0.527977705001831, - "learning_rate": 8.683351468988031e-06, - "loss": 0.4123, - "step": 798 - }, - { - "epoch": 0.05221880922815502, - "grad_norm": 0.5837652683258057, - "learning_rate": 8.694232861806313e-06, - "loss": 0.4355, - "step": 799 - }, - { - "epoch": 0.05228416443369714, - "grad_norm": 0.5441904664039612, - "learning_rate": 8.705114254624593e-06, - "loss": 0.4301, - "step": 800 - }, - { - "epoch": 0.05234951963923926, - "grad_norm": 0.5277956128120422, - "learning_rate": 8.715995647442873e-06, - "loss": 0.4592, - "step": 801 - }, - { - "epoch": 0.052414874844781384, - "grad_norm": 0.5981716513633728, - "learning_rate": 8.726877040261155e-06, - "loss": 0.4253, - "step": 802 - }, - { - "epoch": 0.052480230050323505, - "grad_norm": 0.5430685877799988, - "learning_rate": 8.737758433079435e-06, - "loss": 0.456, - "step": 803 - }, - { - "epoch": 0.05254558525586563, - "grad_norm": 0.5334436893463135, - "learning_rate": 8.748639825897716e-06, - "loss": 0.3857, - "step": 804 - }, - { - "epoch": 0.052610940461407754, - "grad_norm": 0.5099507570266724, - "learning_rate": 8.759521218715996e-06, - "loss": 0.4, - "step": 805 - }, - { - "epoch": 0.052676295666949875, - "grad_norm": 0.5122255086898804, - "learning_rate": 8.770402611534276e-06, - "loss": 0.4687, - "step": 806 - }, - { - "epoch": 0.052741650872491996, - "grad_norm": 0.5638749599456787, - "learning_rate": 8.781284004352558e-06, - "loss": 0.4455, - "step": 807 - }, - { - "epoch": 0.05280700607803412, - "grad_norm": 0.7972688674926758, - "learning_rate": 8.792165397170838e-06, - "loss": 0.4383, - "step": 808 - }, - { - "epoch": 0.05287236128357624, - "grad_norm": 0.49088436365127563, - "learning_rate": 8.80304678998912e-06, - "loss": 0.3938, - "step": 809 - }, - { - "epoch": 0.05293771648911836, - "grad_norm": 0.5083533525466919, - "learning_rate": 8.8139281828074e-06, - "loss": 0.3863, - "step": 810 - }, - { - "epoch": 0.05300307169466048, - "grad_norm": 0.5804579257965088, - "learning_rate": 8.824809575625682e-06, - "loss": 0.5243, - "step": 811 - }, - { - "epoch": 0.0530684269002026, - "grad_norm": 0.517282247543335, - "learning_rate": 8.835690968443962e-06, - "loss": 0.4416, - "step": 812 - }, - { - "epoch": 0.05313378210574472, - "grad_norm": 0.579800009727478, - "learning_rate": 8.846572361262242e-06, - "loss": 0.5161, - "step": 813 - }, - { - "epoch": 0.053199137311286844, - "grad_norm": 0.5247344970703125, - "learning_rate": 8.857453754080523e-06, - "loss": 0.4162, - "step": 814 - }, - { - "epoch": 0.053264492516828965, - "grad_norm": 0.5368344187736511, - "learning_rate": 8.868335146898803e-06, - "loss": 0.4507, - "step": 815 - }, - { - "epoch": 0.053329847722371086, - "grad_norm": 0.558159589767456, - "learning_rate": 8.879216539717085e-06, - "loss": 0.5208, - "step": 816 - }, - { - "epoch": 0.05339520292791321, - "grad_norm": 0.5349926948547363, - "learning_rate": 8.890097932535365e-06, - "loss": 0.4552, - "step": 817 - }, - { - "epoch": 0.05346055813345533, - "grad_norm": 0.5398790836334229, - "learning_rate": 8.900979325353645e-06, - "loss": 0.4633, - "step": 818 - }, - { - "epoch": 0.05352591333899745, - "grad_norm": 0.5353681445121765, - "learning_rate": 8.911860718171927e-06, - "loss": 0.4302, - "step": 819 - }, - { - "epoch": 0.05359126854453957, - "grad_norm": 0.5524716973304749, - "learning_rate": 8.922742110990208e-06, - "loss": 0.4916, - "step": 820 - }, - { - "epoch": 0.05365662375008169, - "grad_norm": 0.49374276399612427, - "learning_rate": 8.933623503808488e-06, - "loss": 0.3643, - "step": 821 - }, - { - "epoch": 0.05372197895562381, - "grad_norm": 0.5321981906890869, - "learning_rate": 8.944504896626768e-06, - "loss": 0.4435, - "step": 822 - }, - { - "epoch": 0.053787334161165934, - "grad_norm": 0.5989148020744324, - "learning_rate": 8.95538628944505e-06, - "loss": 0.4757, - "step": 823 - }, - { - "epoch": 0.053852689366708055, - "grad_norm": 0.5643342137336731, - "learning_rate": 8.96626768226333e-06, - "loss": 0.5149, - "step": 824 - }, - { - "epoch": 0.05391804457225018, - "grad_norm": 0.5305259227752686, - "learning_rate": 8.977149075081612e-06, - "loss": 0.4443, - "step": 825 - }, - { - "epoch": 0.053983399777792304, - "grad_norm": 0.5195196270942688, - "learning_rate": 8.988030467899892e-06, - "loss": 0.4115, - "step": 826 - }, - { - "epoch": 0.054048754983334425, - "grad_norm": 0.5913254618644714, - "learning_rate": 8.998911860718172e-06, - "loss": 0.4783, - "step": 827 - }, - { - "epoch": 0.054114110188876546, - "grad_norm": 0.553651750087738, - "learning_rate": 9.009793253536454e-06, - "loss": 0.4455, - "step": 828 - }, - { - "epoch": 0.05417946539441867, - "grad_norm": 0.5101826190948486, - "learning_rate": 9.020674646354735e-06, - "loss": 0.358, - "step": 829 - }, - { - "epoch": 0.05424482059996079, - "grad_norm": 0.5553346872329712, - "learning_rate": 9.031556039173015e-06, - "loss": 0.4284, - "step": 830 - }, - { - "epoch": 0.05431017580550291, - "grad_norm": 0.5712149739265442, - "learning_rate": 9.042437431991295e-06, - "loss": 0.485, - "step": 831 - }, - { - "epoch": 0.05437553101104503, - "grad_norm": 0.5390961170196533, - "learning_rate": 9.053318824809575e-06, - "loss": 0.4059, - "step": 832 - }, - { - "epoch": 0.05444088621658715, - "grad_norm": 0.5263479351997375, - "learning_rate": 9.064200217627857e-06, - "loss": 0.4044, - "step": 833 - }, - { - "epoch": 0.05450624142212927, - "grad_norm": 0.5573816895484924, - "learning_rate": 9.075081610446139e-06, - "loss": 0.475, - "step": 834 - }, - { - "epoch": 0.054571596627671394, - "grad_norm": 0.5833741426467896, - "learning_rate": 9.085963003264419e-06, - "loss": 0.4946, - "step": 835 - }, - { - "epoch": 0.054636951833213515, - "grad_norm": 0.5497047901153564, - "learning_rate": 9.096844396082699e-06, - "loss": 0.4481, - "step": 836 - }, - { - "epoch": 0.054702307038755636, - "grad_norm": 0.5038496255874634, - "learning_rate": 9.107725788900979e-06, - "loss": 0.4085, - "step": 837 - }, - { - "epoch": 0.05476766224429776, - "grad_norm": 0.5255208015441895, - "learning_rate": 9.11860718171926e-06, - "loss": 0.4543, - "step": 838 - }, - { - "epoch": 0.05483301744983988, - "grad_norm": 0.4954850673675537, - "learning_rate": 9.129488574537542e-06, - "loss": 0.3709, - "step": 839 - }, - { - "epoch": 0.054898372655382, - "grad_norm": 0.5355923175811768, - "learning_rate": 9.140369967355822e-06, - "loss": 0.4283, - "step": 840 - }, - { - "epoch": 0.05496372786092412, - "grad_norm": 0.6211481094360352, - "learning_rate": 9.151251360174102e-06, - "loss": 0.5203, - "step": 841 - }, - { - "epoch": 0.05502908306646624, - "grad_norm": 0.566294252872467, - "learning_rate": 9.162132752992384e-06, - "loss": 0.4664, - "step": 842 - }, - { - "epoch": 0.05509443827200836, - "grad_norm": 0.557698667049408, - "learning_rate": 9.173014145810664e-06, - "loss": 0.4644, - "step": 843 - }, - { - "epoch": 0.055159793477550484, - "grad_norm": 0.5852855443954468, - "learning_rate": 9.183895538628946e-06, - "loss": 0.4967, - "step": 844 - }, - { - "epoch": 0.055225148683092605, - "grad_norm": 0.5442548990249634, - "learning_rate": 9.194776931447226e-06, - "loss": 0.4765, - "step": 845 - }, - { - "epoch": 0.05529050388863473, - "grad_norm": 0.591444730758667, - "learning_rate": 9.205658324265506e-06, - "loss": 0.4683, - "step": 846 - }, - { - "epoch": 0.055355859094176854, - "grad_norm": 0.5280168652534485, - "learning_rate": 9.216539717083787e-06, - "loss": 0.396, - "step": 847 - }, - { - "epoch": 0.055421214299718975, - "grad_norm": 0.5102829337120056, - "learning_rate": 9.227421109902069e-06, - "loss": 0.4289, - "step": 848 - }, - { - "epoch": 0.055486569505261096, - "grad_norm": 0.581202507019043, - "learning_rate": 9.238302502720349e-06, - "loss": 0.4502, - "step": 849 - }, - { - "epoch": 0.05555192471080322, - "grad_norm": 0.5247049331665039, - "learning_rate": 9.249183895538629e-06, - "loss": 0.4348, - "step": 850 - }, - { - "epoch": 0.05561727991634534, - "grad_norm": 0.5914639234542847, - "learning_rate": 9.26006528835691e-06, - "loss": 0.5089, - "step": 851 - }, - { - "epoch": 0.05568263512188746, - "grad_norm": 0.532891035079956, - "learning_rate": 9.27094668117519e-06, - "loss": 0.4431, - "step": 852 - }, - { - "epoch": 0.05574799032742958, - "grad_norm": 0.5694881677627563, - "learning_rate": 9.281828073993473e-06, - "loss": 0.4512, - "step": 853 - }, - { - "epoch": 0.0558133455329717, - "grad_norm": 0.596929669380188, - "learning_rate": 9.292709466811753e-06, - "loss": 0.4639, - "step": 854 - }, - { - "epoch": 0.05587870073851382, - "grad_norm": 0.5148254632949829, - "learning_rate": 9.303590859630034e-06, - "loss": 0.4262, - "step": 855 - }, - { - "epoch": 0.055944055944055944, - "grad_norm": 0.5276803970336914, - "learning_rate": 9.314472252448314e-06, - "loss": 0.4571, - "step": 856 - }, - { - "epoch": 0.056009411149598065, - "grad_norm": 0.553871214389801, - "learning_rate": 9.325353645266594e-06, - "loss": 0.4855, - "step": 857 - }, - { - "epoch": 0.056074766355140186, - "grad_norm": 0.5498103499412537, - "learning_rate": 9.336235038084876e-06, - "loss": 0.4508, - "step": 858 - }, - { - "epoch": 0.05614012156068231, - "grad_norm": 0.571270763874054, - "learning_rate": 9.347116430903156e-06, - "loss": 0.4735, - "step": 859 - }, - { - "epoch": 0.05620547676622443, - "grad_norm": 0.48032382130622864, - "learning_rate": 9.357997823721438e-06, - "loss": 0.3663, - "step": 860 - }, - { - "epoch": 0.05627083197176655, - "grad_norm": 0.5407636761665344, - "learning_rate": 9.368879216539718e-06, - "loss": 0.4516, - "step": 861 - }, - { - "epoch": 0.05633618717730867, - "grad_norm": 0.5646923780441284, - "learning_rate": 9.379760609357998e-06, - "loss": 0.4501, - "step": 862 - }, - { - "epoch": 0.05640154238285079, - "grad_norm": 0.5731817483901978, - "learning_rate": 9.39064200217628e-06, - "loss": 0.4508, - "step": 863 - }, - { - "epoch": 0.05646689758839291, - "grad_norm": 0.6380143761634827, - "learning_rate": 9.401523394994561e-06, - "loss": 0.468, - "step": 864 - }, - { - "epoch": 0.056532252793935034, - "grad_norm": 0.5581998229026794, - "learning_rate": 9.412404787812841e-06, - "loss": 0.4683, - "step": 865 - }, - { - "epoch": 0.056597607999477155, - "grad_norm": 0.5660893321037292, - "learning_rate": 9.423286180631121e-06, - "loss": 0.4792, - "step": 866 - }, - { - "epoch": 0.05666296320501928, - "grad_norm": 0.46633392572402954, - "learning_rate": 9.434167573449401e-06, - "loss": 0.3853, - "step": 867 - }, - { - "epoch": 0.056728318410561404, - "grad_norm": 0.6074538230895996, - "learning_rate": 9.445048966267683e-06, - "loss": 0.5162, - "step": 868 - }, - { - "epoch": 0.056793673616103525, - "grad_norm": 0.5075995326042175, - "learning_rate": 9.455930359085965e-06, - "loss": 0.4245, - "step": 869 - }, - { - "epoch": 0.056859028821645646, - "grad_norm": 0.5416386723518372, - "learning_rate": 9.466811751904245e-06, - "loss": 0.4191, - "step": 870 - }, - { - "epoch": 0.05692438402718777, - "grad_norm": 0.5612762570381165, - "learning_rate": 9.477693144722525e-06, - "loss": 0.4899, - "step": 871 - }, - { - "epoch": 0.05698973923272989, - "grad_norm": 0.5579902529716492, - "learning_rate": 9.488574537540806e-06, - "loss": 0.4449, - "step": 872 - }, - { - "epoch": 0.05705509443827201, - "grad_norm": 0.6033218502998352, - "learning_rate": 9.499455930359086e-06, - "loss": 0.5391, - "step": 873 - }, - { - "epoch": 0.05712044964381413, - "grad_norm": 0.5937187075614929, - "learning_rate": 9.510337323177368e-06, - "loss": 0.4332, - "step": 874 - }, - { - "epoch": 0.05718580484935625, - "grad_norm": 0.5878888368606567, - "learning_rate": 9.521218715995648e-06, - "loss": 0.5223, - "step": 875 - }, - { - "epoch": 0.05725116005489837, - "grad_norm": 0.5516737103462219, - "learning_rate": 9.532100108813928e-06, - "loss": 0.4321, - "step": 876 - }, - { - "epoch": 0.057316515260440494, - "grad_norm": 0.5144123435020447, - "learning_rate": 9.54298150163221e-06, - "loss": 0.352, - "step": 877 - }, - { - "epoch": 0.057381870465982615, - "grad_norm": 0.5788304209709167, - "learning_rate": 9.553862894450491e-06, - "loss": 0.4312, - "step": 878 - }, - { - "epoch": 0.057447225671524736, - "grad_norm": 0.558205783367157, - "learning_rate": 9.564744287268772e-06, - "loss": 0.4623, - "step": 879 - }, - { - "epoch": 0.05751258087706686, - "grad_norm": 0.564784824848175, - "learning_rate": 9.575625680087052e-06, - "loss": 0.484, - "step": 880 - }, - { - "epoch": 0.05757793608260898, - "grad_norm": 0.5264977812767029, - "learning_rate": 9.586507072905332e-06, - "loss": 0.4197, - "step": 881 - }, - { - "epoch": 0.0576432912881511, - "grad_norm": 0.5363552570343018, - "learning_rate": 9.597388465723613e-06, - "loss": 0.4211, - "step": 882 - }, - { - "epoch": 0.05770864649369322, - "grad_norm": 0.5306923985481262, - "learning_rate": 9.608269858541895e-06, - "loss": 0.4587, - "step": 883 - }, - { - "epoch": 0.05777400169923534, - "grad_norm": 0.5312590003013611, - "learning_rate": 9.619151251360175e-06, - "loss": 0.4374, - "step": 884 - }, - { - "epoch": 0.05783935690477746, - "grad_norm": 0.5317679643630981, - "learning_rate": 9.630032644178455e-06, - "loss": 0.469, - "step": 885 - }, - { - "epoch": 0.057904712110319584, - "grad_norm": 0.5046166181564331, - "learning_rate": 9.640914036996737e-06, - "loss": 0.4133, - "step": 886 - }, - { - "epoch": 0.057970067315861705, - "grad_norm": 0.6376034021377563, - "learning_rate": 9.651795429815017e-06, - "loss": 0.4476, - "step": 887 - }, - { - "epoch": 0.05803542252140383, - "grad_norm": 0.5270693302154541, - "learning_rate": 9.662676822633298e-06, - "loss": 0.4211, - "step": 888 - }, - { - "epoch": 0.058100777726945954, - "grad_norm": 0.5376937389373779, - "learning_rate": 9.673558215451578e-06, - "loss": 0.4576, - "step": 889 - }, - { - "epoch": 0.058166132932488075, - "grad_norm": 0.5243609547615051, - "learning_rate": 9.684439608269858e-06, - "loss": 0.4567, - "step": 890 - }, - { - "epoch": 0.058231488138030196, - "grad_norm": 0.5273962020874023, - "learning_rate": 9.69532100108814e-06, - "loss": 0.4256, - "step": 891 - }, - { - "epoch": 0.05829684334357232, - "grad_norm": 0.5864324569702148, - "learning_rate": 9.70620239390642e-06, - "loss": 0.4907, - "step": 892 - }, - { - "epoch": 0.05836219854911444, - "grad_norm": 0.5216048359870911, - "learning_rate": 9.717083786724702e-06, - "loss": 0.4245, - "step": 893 - }, - { - "epoch": 0.05842755375465656, - "grad_norm": 0.48350924253463745, - "learning_rate": 9.727965179542982e-06, - "loss": 0.388, - "step": 894 - }, - { - "epoch": 0.05849290896019868, - "grad_norm": 0.5730065107345581, - "learning_rate": 9.738846572361264e-06, - "loss": 0.4364, - "step": 895 - }, - { - "epoch": 0.0585582641657408, - "grad_norm": 0.5184619426727295, - "learning_rate": 9.749727965179544e-06, - "loss": 0.4087, - "step": 896 - }, - { - "epoch": 0.05862361937128292, - "grad_norm": 0.5821964740753174, - "learning_rate": 9.760609357997825e-06, - "loss": 0.5004, - "step": 897 - }, - { - "epoch": 0.058688974576825044, - "grad_norm": 0.5208947062492371, - "learning_rate": 9.771490750816105e-06, - "loss": 0.3913, - "step": 898 - }, - { - "epoch": 0.058754329782367165, - "grad_norm": 0.5657762885093689, - "learning_rate": 9.782372143634385e-06, - "loss": 0.454, - "step": 899 - }, - { - "epoch": 0.058819684987909286, - "grad_norm": 0.5364660620689392, - "learning_rate": 9.793253536452667e-06, - "loss": 0.4305, - "step": 900 - }, - { - "epoch": 0.05888504019345141, - "grad_norm": 0.510415256023407, - "learning_rate": 9.804134929270947e-06, - "loss": 0.4463, - "step": 901 - }, - { - "epoch": 0.05895039539899353, - "grad_norm": 0.6170061826705933, - "learning_rate": 9.815016322089229e-06, - "loss": 0.4997, - "step": 902 - }, - { - "epoch": 0.05901575060453565, - "grad_norm": 0.6070553064346313, - "learning_rate": 9.825897714907509e-06, - "loss": 0.4979, - "step": 903 - }, - { - "epoch": 0.05908110581007777, - "grad_norm": 0.5229129791259766, - "learning_rate": 9.83677910772579e-06, - "loss": 0.3985, - "step": 904 - }, - { - "epoch": 0.05914646101561989, - "grad_norm": 0.5174643397331238, - "learning_rate": 9.84766050054407e-06, - "loss": 0.4483, - "step": 905 - }, - { - "epoch": 0.05921181622116201, - "grad_norm": 0.5256576538085938, - "learning_rate": 9.85854189336235e-06, - "loss": 0.3863, - "step": 906 - }, - { - "epoch": 0.059277171426704134, - "grad_norm": 0.5165674686431885, - "learning_rate": 9.869423286180632e-06, - "loss": 0.354, - "step": 907 - }, - { - "epoch": 0.059342526632246255, - "grad_norm": 0.5451213717460632, - "learning_rate": 9.880304678998914e-06, - "loss": 0.4456, - "step": 908 - }, - { - "epoch": 0.05940788183778838, - "grad_norm": 0.4831288158893585, - "learning_rate": 9.891186071817194e-06, - "loss": 0.3507, - "step": 909 - }, - { - "epoch": 0.059473237043330504, - "grad_norm": 0.510681688785553, - "learning_rate": 9.902067464635474e-06, - "loss": 0.3763, - "step": 910 - }, - { - "epoch": 0.059538592248872625, - "grad_norm": 0.6625702977180481, - "learning_rate": 9.912948857453754e-06, - "loss": 0.4688, - "step": 911 - }, - { - "epoch": 0.059603947454414746, - "grad_norm": 0.5980280637741089, - "learning_rate": 9.923830250272036e-06, - "loss": 0.4215, - "step": 912 - }, - { - "epoch": 0.05966930265995687, - "grad_norm": 0.6070137023925781, - "learning_rate": 9.934711643090317e-06, - "loss": 0.4185, - "step": 913 - }, - { - "epoch": 0.05973465786549899, - "grad_norm": 0.6734384298324585, - "learning_rate": 9.945593035908597e-06, - "loss": 0.5606, - "step": 914 - }, - { - "epoch": 0.05980001307104111, - "grad_norm": 0.574552595615387, - "learning_rate": 9.956474428726877e-06, - "loss": 0.5411, - "step": 915 - }, - { - "epoch": 0.05986536827658323, - "grad_norm": 0.5196554660797119, - "learning_rate": 9.967355821545159e-06, - "loss": 0.4218, - "step": 916 - }, - { - "epoch": 0.05993072348212535, - "grad_norm": 0.5171263813972473, - "learning_rate": 9.978237214363439e-06, - "loss": 0.4259, - "step": 917 - }, - { - "epoch": 0.05999607868766747, - "grad_norm": 0.6005182862281799, - "learning_rate": 9.98911860718172e-06, - "loss": 0.5567, - "step": 918 - }, - { - "epoch": 0.060061433893209594, - "grad_norm": 0.5370460748672485, - "learning_rate": 1e-05, - "loss": 0.4488, - "step": 919 - }, - { - "epoch": 0.060126789098751715, - "grad_norm": 0.5299703478813171, - "learning_rate": 9.999999987806635e-06, - "loss": 0.4235, - "step": 920 - }, - { - "epoch": 0.060192144304293836, - "grad_norm": 0.556443989276886, - "learning_rate": 9.999999951226536e-06, - "loss": 0.4769, - "step": 921 - }, - { - "epoch": 0.06025749950983596, - "grad_norm": 0.5349219441413879, - "learning_rate": 9.999999890259706e-06, - "loss": 0.4508, - "step": 922 - }, - { - "epoch": 0.06032285471537808, - "grad_norm": 0.548328161239624, - "learning_rate": 9.999999804906145e-06, - "loss": 0.5043, - "step": 923 - }, - { - "epoch": 0.0603882099209202, - "grad_norm": 0.5339406728744507, - "learning_rate": 9.999999695165852e-06, - "loss": 0.4141, - "step": 924 - }, - { - "epoch": 0.06045356512646232, - "grad_norm": 0.5387172698974609, - "learning_rate": 9.999999561038828e-06, - "loss": 0.4016, - "step": 925 - }, - { - "epoch": 0.06051892033200444, - "grad_norm": 0.5516391396522522, - "learning_rate": 9.999999402525074e-06, - "loss": 0.4303, - "step": 926 - }, - { - "epoch": 0.06058427553754656, - "grad_norm": 0.5340887308120728, - "learning_rate": 9.999999219624593e-06, - "loss": 0.4463, - "step": 927 - }, - { - "epoch": 0.060649630743088684, - "grad_norm": 0.5485219359397888, - "learning_rate": 9.99999901233738e-06, - "loss": 0.4164, - "step": 928 - }, - { - "epoch": 0.06071498594863081, - "grad_norm": 0.5041017532348633, - "learning_rate": 9.999998780663442e-06, - "loss": 0.3712, - "step": 929 - }, - { - "epoch": 0.06078034115417293, - "grad_norm": 0.577510416507721, - "learning_rate": 9.999998524602777e-06, - "loss": 0.4896, - "step": 930 - }, - { - "epoch": 0.060845696359715054, - "grad_norm": 0.5734471082687378, - "learning_rate": 9.999998244155387e-06, - "loss": 0.4917, - "step": 931 - }, - { - "epoch": 0.060911051565257175, - "grad_norm": 0.5420089960098267, - "learning_rate": 9.999997939321274e-06, - "loss": 0.456, - "step": 932 - }, - { - "epoch": 0.060976406770799296, - "grad_norm": 0.5438268184661865, - "learning_rate": 9.999997610100438e-06, - "loss": 0.4542, - "step": 933 - }, - { - "epoch": 0.06104176197634142, - "grad_norm": 0.5289359092712402, - "learning_rate": 9.999997256492882e-06, - "loss": 0.4261, - "step": 934 - }, - { - "epoch": 0.06110711718188354, - "grad_norm": 0.530704915523529, - "learning_rate": 9.999996878498607e-06, - "loss": 0.444, - "step": 935 - }, - { - "epoch": 0.06117247238742566, - "grad_norm": 0.5149810314178467, - "learning_rate": 9.999996476117614e-06, - "loss": 0.4656, - "step": 936 - }, - { - "epoch": 0.06123782759296778, - "grad_norm": 0.5710325837135315, - "learning_rate": 9.99999604934991e-06, - "loss": 0.4992, - "step": 937 - }, - { - "epoch": 0.0613031827985099, - "grad_norm": 0.5574349164962769, - "learning_rate": 9.99999559819549e-06, - "loss": 0.4507, - "step": 938 - }, - { - "epoch": 0.06136853800405202, - "grad_norm": 0.5291350483894348, - "learning_rate": 9.999995122654357e-06, - "loss": 0.4293, - "step": 939 - }, - { - "epoch": 0.061433893209594144, - "grad_norm": 0.5154761075973511, - "learning_rate": 9.99999462272652e-06, - "loss": 0.417, - "step": 940 - }, - { - "epoch": 0.061499248415136265, - "grad_norm": 0.5109313130378723, - "learning_rate": 9.999994098411975e-06, - "loss": 0.4051, - "step": 941 - }, - { - "epoch": 0.061564603620678386, - "grad_norm": 0.5403725504875183, - "learning_rate": 9.999993549710727e-06, - "loss": 0.4648, - "step": 942 - }, - { - "epoch": 0.06162995882622051, - "grad_norm": 0.5102765560150146, - "learning_rate": 9.999992976622778e-06, - "loss": 0.4253, - "step": 943 - }, - { - "epoch": 0.06169531403176263, - "grad_norm": 0.562269389629364, - "learning_rate": 9.999992379148131e-06, - "loss": 0.431, - "step": 944 - }, - { - "epoch": 0.06176066923730475, - "grad_norm": 0.5346286296844482, - "learning_rate": 9.99999175728679e-06, - "loss": 0.4257, - "step": 945 - }, - { - "epoch": 0.06182602444284687, - "grad_norm": 0.5671541094779968, - "learning_rate": 9.999991111038756e-06, - "loss": 0.4605, - "step": 946 - }, - { - "epoch": 0.06189137964838899, - "grad_norm": 0.5588353276252747, - "learning_rate": 9.999990440404034e-06, - "loss": 0.4266, - "step": 947 - }, - { - "epoch": 0.06195673485393111, - "grad_norm": 0.51191645860672, - "learning_rate": 9.999989745382626e-06, - "loss": 0.4281, - "step": 948 - }, - { - "epoch": 0.062022090059473234, - "grad_norm": 0.5754305124282837, - "learning_rate": 9.999989025974535e-06, - "loss": 0.4728, - "step": 949 - }, - { - "epoch": 0.06208744526501536, - "grad_norm": 0.5369451642036438, - "learning_rate": 9.999988282179766e-06, - "loss": 0.4651, - "step": 950 - }, - { - "epoch": 0.06215280047055748, - "grad_norm": 0.5306262969970703, - "learning_rate": 9.999987513998324e-06, - "loss": 0.4238, - "step": 951 - }, - { - "epoch": 0.062218155676099604, - "grad_norm": 0.5413644909858704, - "learning_rate": 9.999986721430208e-06, - "loss": 0.3978, - "step": 952 - }, - { - "epoch": 0.062283510881641725, - "grad_norm": 0.49585309624671936, - "learning_rate": 9.999985904475427e-06, - "loss": 0.4195, - "step": 953 - }, - { - "epoch": 0.062348866087183846, - "grad_norm": 0.5562087893486023, - "learning_rate": 9.99998506313398e-06, - "loss": 0.4443, - "step": 954 - }, - { - "epoch": 0.06241422129272597, - "grad_norm": 0.5618133544921875, - "learning_rate": 9.999984197405874e-06, - "loss": 0.4596, - "step": 955 - }, - { - "epoch": 0.06247957649826809, - "grad_norm": 0.5170841813087463, - "learning_rate": 9.999983307291115e-06, - "loss": 0.4443, - "step": 956 - }, - { - "epoch": 0.0625449317038102, - "grad_norm": 0.5080602169036865, - "learning_rate": 9.999982392789703e-06, - "loss": 0.409, - "step": 957 - }, - { - "epoch": 0.06261028690935233, - "grad_norm": 0.5565418601036072, - "learning_rate": 9.999981453901647e-06, - "loss": 0.4677, - "step": 958 - }, - { - "epoch": 0.06267564211489444, - "grad_norm": 0.5918434262275696, - "learning_rate": 9.999980490626948e-06, - "loss": 0.4442, - "step": 959 - }, - { - "epoch": 0.06274099732043657, - "grad_norm": 0.5190476775169373, - "learning_rate": 9.999979502965611e-06, - "loss": 0.462, - "step": 960 - }, - { - "epoch": 0.0628063525259787, - "grad_norm": 0.47892889380455017, - "learning_rate": 9.999978490917644e-06, - "loss": 0.3883, - "step": 961 - }, - { - "epoch": 0.06287170773152082, - "grad_norm": 0.579393208026886, - "learning_rate": 9.999977454483047e-06, - "loss": 0.5126, - "step": 962 - }, - { - "epoch": 0.06293706293706294, - "grad_norm": 0.58144211769104, - "learning_rate": 9.99997639366183e-06, - "loss": 0.4811, - "step": 963 - }, - { - "epoch": 0.06300241814260506, - "grad_norm": 0.5610965490341187, - "learning_rate": 9.999975308453996e-06, - "loss": 0.4737, - "step": 964 - }, - { - "epoch": 0.06306777334814719, - "grad_norm": 0.500460684299469, - "learning_rate": 9.999974198859548e-06, - "loss": 0.4236, - "step": 965 - }, - { - "epoch": 0.0631331285536893, - "grad_norm": 0.569346010684967, - "learning_rate": 9.999973064878496e-06, - "loss": 0.5006, - "step": 966 - }, - { - "epoch": 0.06319848375923143, - "grad_norm": 0.49624568223953247, - "learning_rate": 9.999971906510842e-06, - "loss": 0.3761, - "step": 967 - }, - { - "epoch": 0.06326383896477354, - "grad_norm": 0.5685256719589233, - "learning_rate": 9.999970723756594e-06, - "loss": 0.4643, - "step": 968 - }, - { - "epoch": 0.06332919417031567, - "grad_norm": 0.5329216122627258, - "learning_rate": 9.999969516615755e-06, - "loss": 0.4207, - "step": 969 - }, - { - "epoch": 0.06339454937585778, - "grad_norm": 0.5951482057571411, - "learning_rate": 9.999968285088332e-06, - "loss": 0.417, - "step": 970 - }, - { - "epoch": 0.06345990458139991, - "grad_norm": 0.5494505167007446, - "learning_rate": 9.999967029174334e-06, - "loss": 0.5063, - "step": 971 - }, - { - "epoch": 0.06352525978694203, - "grad_norm": 0.5310516953468323, - "learning_rate": 9.999965748873763e-06, - "loss": 0.4259, - "step": 972 - }, - { - "epoch": 0.06359061499248415, - "grad_norm": 0.5489716529846191, - "learning_rate": 9.999964444186628e-06, - "loss": 0.4494, - "step": 973 - }, - { - "epoch": 0.06365597019802627, - "grad_norm": 0.5342676639556885, - "learning_rate": 9.999963115112934e-06, - "loss": 0.4479, - "step": 974 - }, - { - "epoch": 0.0637213254035684, - "grad_norm": 0.4992792308330536, - "learning_rate": 9.999961761652688e-06, - "loss": 0.4007, - "step": 975 - }, - { - "epoch": 0.06378668060911051, - "grad_norm": 0.5222305059432983, - "learning_rate": 9.999960383805895e-06, - "loss": 0.3886, - "step": 976 - }, - { - "epoch": 0.06385203581465264, - "grad_norm": 0.5601535439491272, - "learning_rate": 9.999958981572565e-06, - "loss": 0.4434, - "step": 977 - }, - { - "epoch": 0.06391739102019475, - "grad_norm": 0.7607196569442749, - "learning_rate": 9.999957554952702e-06, - "loss": 0.4415, - "step": 978 - }, - { - "epoch": 0.06398274622573688, - "grad_norm": 0.5980501770973206, - "learning_rate": 9.999956103946313e-06, - "loss": 0.5057, - "step": 979 - }, - { - "epoch": 0.064048101431279, - "grad_norm": 0.5119470357894897, - "learning_rate": 9.999954628553406e-06, - "loss": 0.4124, - "step": 980 - }, - { - "epoch": 0.06411345663682112, - "grad_norm": 0.5557889938354492, - "learning_rate": 9.99995312877399e-06, - "loss": 0.4192, - "step": 981 - }, - { - "epoch": 0.06417881184236325, - "grad_norm": 0.6168763041496277, - "learning_rate": 9.999951604608067e-06, - "loss": 0.5102, - "step": 982 - }, - { - "epoch": 0.06424416704790537, - "grad_norm": 0.5247033834457397, - "learning_rate": 9.99995005605565e-06, - "loss": 0.427, - "step": 983 - }, - { - "epoch": 0.0643095222534475, - "grad_norm": 0.4901919960975647, - "learning_rate": 9.999948483116746e-06, - "loss": 0.391, - "step": 984 - }, - { - "epoch": 0.06437487745898961, - "grad_norm": 0.537260890007019, - "learning_rate": 9.999946885791359e-06, - "loss": 0.436, - "step": 985 - }, - { - "epoch": 0.06444023266453174, - "grad_norm": 0.5723612904548645, - "learning_rate": 9.9999452640795e-06, - "loss": 0.4787, - "step": 986 - }, - { - "epoch": 0.06450558787007385, - "grad_norm": 0.5198825001716614, - "learning_rate": 9.999943617981174e-06, - "loss": 0.4308, - "step": 987 - }, - { - "epoch": 0.06457094307561598, - "grad_norm": 0.519920289516449, - "learning_rate": 9.999941947496392e-06, - "loss": 0.4615, - "step": 988 - }, - { - "epoch": 0.06463629828115809, - "grad_norm": 0.5049760341644287, - "learning_rate": 9.99994025262516e-06, - "loss": 0.4347, - "step": 989 - }, - { - "epoch": 0.06470165348670022, - "grad_norm": 0.5089125633239746, - "learning_rate": 9.99993853336749e-06, - "loss": 0.4497, - "step": 990 - }, - { - "epoch": 0.06476700869224233, - "grad_norm": 0.537655234336853, - "learning_rate": 9.999936789723385e-06, - "loss": 0.4734, - "step": 991 - }, - { - "epoch": 0.06483236389778446, - "grad_norm": 0.5458921790122986, - "learning_rate": 9.999935021692857e-06, - "loss": 0.4165, - "step": 992 - }, - { - "epoch": 0.06489771910332658, - "grad_norm": 0.5852230787277222, - "learning_rate": 9.999933229275912e-06, - "loss": 0.5647, - "step": 993 - }, - { - "epoch": 0.0649630743088687, - "grad_norm": 0.5207486748695374, - "learning_rate": 9.999931412472564e-06, - "loss": 0.4477, - "step": 994 - }, - { - "epoch": 0.06502842951441082, - "grad_norm": 0.7490776181221008, - "learning_rate": 9.999929571282816e-06, - "loss": 0.4672, - "step": 995 - }, - { - "epoch": 0.06509378471995295, - "grad_norm": 0.5382635593414307, - "learning_rate": 9.99992770570668e-06, - "loss": 0.4679, - "step": 996 - }, - { - "epoch": 0.06515913992549506, - "grad_norm": 0.5522093772888184, - "learning_rate": 9.999925815744164e-06, - "loss": 0.4407, - "step": 997 - }, - { - "epoch": 0.06522449513103719, - "grad_norm": 0.5206857323646545, - "learning_rate": 9.999923901395278e-06, - "loss": 0.4224, - "step": 998 - }, - { - "epoch": 0.0652898503365793, - "grad_norm": 0.5740740299224854, - "learning_rate": 9.999921962660032e-06, - "loss": 0.5181, - "step": 999 - }, - { - "epoch": 0.06535520554212143, - "grad_norm": 0.5404792428016663, - "learning_rate": 9.999919999538433e-06, - "loss": 0.4804, - "step": 1000 - }, - { - "epoch": 0.06542056074766354, - "grad_norm": 0.5321018695831299, - "learning_rate": 9.999918012030493e-06, - "loss": 0.4637, - "step": 1001 - }, - { - "epoch": 0.06548591595320567, - "grad_norm": 0.49648821353912354, - "learning_rate": 9.999916000136221e-06, - "loss": 0.383, - "step": 1002 - }, - { - "epoch": 0.0655512711587478, - "grad_norm": 0.5232150554656982, - "learning_rate": 9.999913963855626e-06, - "loss": 0.4104, - "step": 1003 - }, - { - "epoch": 0.06561662636428992, - "grad_norm": 0.5995215177536011, - "learning_rate": 9.999911903188717e-06, - "loss": 0.5065, - "step": 1004 - }, - { - "epoch": 0.06568198156983204, - "grad_norm": 0.5301916003227234, - "learning_rate": 9.999909818135507e-06, - "loss": 0.4998, - "step": 1005 - }, - { - "epoch": 0.06574733677537416, - "grad_norm": 0.5459743738174438, - "learning_rate": 9.999907708696004e-06, - "loss": 0.4283, - "step": 1006 - }, - { - "epoch": 0.06581269198091629, - "grad_norm": 0.5282205939292908, - "learning_rate": 9.999905574870219e-06, - "loss": 0.4219, - "step": 1007 - }, - { - "epoch": 0.0658780471864584, - "grad_norm": 0.5390070080757141, - "learning_rate": 9.999903416658164e-06, - "loss": 0.4787, - "step": 1008 - }, - { - "epoch": 0.06594340239200053, - "grad_norm": 0.5368457436561584, - "learning_rate": 9.999901234059845e-06, - "loss": 0.4519, - "step": 1009 - }, - { - "epoch": 0.06600875759754264, - "grad_norm": 0.5622742176055908, - "learning_rate": 9.999899027075279e-06, - "loss": 0.4821, - "step": 1010 - }, - { - "epoch": 0.06607411280308477, - "grad_norm": 0.502387285232544, - "learning_rate": 9.999896795704471e-06, - "loss": 0.413, - "step": 1011 - }, - { - "epoch": 0.06613946800862688, - "grad_norm": 0.5299873948097229, - "learning_rate": 9.999894539947435e-06, - "loss": 0.4521, - "step": 1012 - }, - { - "epoch": 0.06620482321416901, - "grad_norm": 0.5077336430549622, - "learning_rate": 9.99989225980418e-06, - "loss": 0.4049, - "step": 1013 - }, - { - "epoch": 0.06627017841971113, - "grad_norm": 0.5375440716743469, - "learning_rate": 9.999889955274719e-06, - "loss": 0.4666, - "step": 1014 - }, - { - "epoch": 0.06633553362525325, - "grad_norm": 0.5278374552726746, - "learning_rate": 9.999887626359064e-06, - "loss": 0.4591, - "step": 1015 - }, - { - "epoch": 0.06640088883079537, - "grad_norm": 0.5720160603523254, - "learning_rate": 9.999885273057223e-06, - "loss": 0.4545, - "step": 1016 - }, - { - "epoch": 0.0664662440363375, - "grad_norm": 0.5435892939567566, - "learning_rate": 9.99988289536921e-06, - "loss": 0.4822, - "step": 1017 - }, - { - "epoch": 0.06653159924187961, - "grad_norm": 0.5654741525650024, - "learning_rate": 9.999880493295035e-06, - "loss": 0.4699, - "step": 1018 - }, - { - "epoch": 0.06659695444742174, - "grad_norm": 0.569327712059021, - "learning_rate": 9.999878066834713e-06, - "loss": 0.4985, - "step": 1019 - }, - { - "epoch": 0.06666230965296385, - "grad_norm": 0.5556838512420654, - "learning_rate": 9.999875615988252e-06, - "loss": 0.4365, - "step": 1020 - }, - { - "epoch": 0.06672766485850598, - "grad_norm": 0.5812886953353882, - "learning_rate": 9.999873140755666e-06, - "loss": 0.4512, - "step": 1021 - }, - { - "epoch": 0.0667930200640481, - "grad_norm": 0.5536554455757141, - "learning_rate": 9.999870641136966e-06, - "loss": 0.4766, - "step": 1022 - }, - { - "epoch": 0.06685837526959022, - "grad_norm": 0.5084673166275024, - "learning_rate": 9.999868117132166e-06, - "loss": 0.3906, - "step": 1023 - }, - { - "epoch": 0.06692373047513235, - "grad_norm": 0.5405479073524475, - "learning_rate": 9.999865568741275e-06, - "loss": 0.4192, - "step": 1024 - }, - { - "epoch": 0.06698908568067447, - "grad_norm": 0.5325936079025269, - "learning_rate": 9.99986299596431e-06, - "loss": 0.3922, - "step": 1025 - }, - { - "epoch": 0.06705444088621659, - "grad_norm": 0.522232174873352, - "learning_rate": 9.99986039880128e-06, - "loss": 0.4321, - "step": 1026 - }, - { - "epoch": 0.06711979609175871, - "grad_norm": 0.5085806846618652, - "learning_rate": 9.999857777252198e-06, - "loss": 0.4419, - "step": 1027 - }, - { - "epoch": 0.06718515129730084, - "grad_norm": 0.5148561596870422, - "learning_rate": 9.999855131317077e-06, - "loss": 0.4104, - "step": 1028 - }, - { - "epoch": 0.06725050650284295, - "grad_norm": 0.5366589426994324, - "learning_rate": 9.999852460995933e-06, - "loss": 0.413, - "step": 1029 - }, - { - "epoch": 0.06731586170838508, - "grad_norm": 0.5244190096855164, - "learning_rate": 9.999849766288774e-06, - "loss": 0.4195, - "step": 1030 - }, - { - "epoch": 0.06738121691392719, - "grad_norm": 0.4705215096473694, - "learning_rate": 9.999847047195616e-06, - "loss": 0.3667, - "step": 1031 - }, - { - "epoch": 0.06744657211946932, - "grad_norm": 0.5042105913162231, - "learning_rate": 9.999844303716473e-06, - "loss": 0.4562, - "step": 1032 - }, - { - "epoch": 0.06751192732501143, - "grad_norm": 0.5559691190719604, - "learning_rate": 9.999841535851356e-06, - "loss": 0.4836, - "step": 1033 - }, - { - "epoch": 0.06757728253055356, - "grad_norm": 0.5235845446586609, - "learning_rate": 9.999838743600281e-06, - "loss": 0.3812, - "step": 1034 - }, - { - "epoch": 0.06764263773609568, - "grad_norm": 0.5642328858375549, - "learning_rate": 9.99983592696326e-06, - "loss": 0.512, - "step": 1035 - }, - { - "epoch": 0.0677079929416378, - "grad_norm": 0.5955402851104736, - "learning_rate": 9.999833085940306e-06, - "loss": 0.4639, - "step": 1036 - }, - { - "epoch": 0.06777334814717992, - "grad_norm": 0.5364554524421692, - "learning_rate": 9.999830220531434e-06, - "loss": 0.5009, - "step": 1037 - }, - { - "epoch": 0.06783870335272205, - "grad_norm": 0.5376973748207092, - "learning_rate": 9.99982733073666e-06, - "loss": 0.4239, - "step": 1038 - }, - { - "epoch": 0.06790405855826416, - "grad_norm": 0.5236703157424927, - "learning_rate": 9.999824416555993e-06, - "loss": 0.4436, - "step": 1039 - }, - { - "epoch": 0.06796941376380629, - "grad_norm": 0.5033944249153137, - "learning_rate": 9.999821477989452e-06, - "loss": 0.4452, - "step": 1040 - }, - { - "epoch": 0.0680347689693484, - "grad_norm": 0.5482272505760193, - "learning_rate": 9.99981851503705e-06, - "loss": 0.4499, - "step": 1041 - }, - { - "epoch": 0.06810012417489053, - "grad_norm": 0.5539483428001404, - "learning_rate": 9.9998155276988e-06, - "loss": 0.4985, - "step": 1042 - }, - { - "epoch": 0.06816547938043264, - "grad_norm": 0.528601884841919, - "learning_rate": 9.999812515974717e-06, - "loss": 0.478, - "step": 1043 - }, - { - "epoch": 0.06823083458597477, - "grad_norm": 0.4756832718849182, - "learning_rate": 9.999809479864817e-06, - "loss": 0.3917, - "step": 1044 - }, - { - "epoch": 0.0682961897915169, - "grad_norm": 0.5553827881813049, - "learning_rate": 9.999806419369114e-06, - "loss": 0.4666, - "step": 1045 - }, - { - "epoch": 0.06836154499705901, - "grad_norm": 0.5052905082702637, - "learning_rate": 9.999803334487624e-06, - "loss": 0.3979, - "step": 1046 - }, - { - "epoch": 0.06842690020260114, - "grad_norm": 0.5354941487312317, - "learning_rate": 9.999800225220359e-06, - "loss": 0.4174, - "step": 1047 - }, - { - "epoch": 0.06849225540814326, - "grad_norm": 0.4951237142086029, - "learning_rate": 9.999797091567339e-06, - "loss": 0.4191, - "step": 1048 - }, - { - "epoch": 0.06855761061368539, - "grad_norm": 0.5031898021697998, - "learning_rate": 9.999793933528575e-06, - "loss": 0.3946, - "step": 1049 - }, - { - "epoch": 0.0686229658192275, - "grad_norm": 0.5494363307952881, - "learning_rate": 9.999790751104082e-06, - "loss": 0.4309, - "step": 1050 - }, - { - "epoch": 0.06868832102476963, - "grad_norm": 0.5712085962295532, - "learning_rate": 9.99978754429388e-06, - "loss": 0.5081, - "step": 1051 - }, - { - "epoch": 0.06875367623031174, - "grad_norm": 0.5110986232757568, - "learning_rate": 9.99978431309798e-06, - "loss": 0.4063, - "step": 1052 - }, - { - "epoch": 0.06881903143585387, - "grad_norm": 0.5322535037994385, - "learning_rate": 9.999781057516402e-06, - "loss": 0.3905, - "step": 1053 - }, - { - "epoch": 0.06888438664139598, - "grad_norm": 0.5490505695343018, - "learning_rate": 9.999777777549158e-06, - "loss": 0.4842, - "step": 1054 - }, - { - "epoch": 0.06894974184693811, - "grad_norm": 0.5415787100791931, - "learning_rate": 9.999774473196266e-06, - "loss": 0.4193, - "step": 1055 - }, - { - "epoch": 0.06901509705248023, - "grad_norm": 0.5278641581535339, - "learning_rate": 9.999771144457743e-06, - "loss": 0.466, - "step": 1056 - }, - { - "epoch": 0.06908045225802235, - "grad_norm": 0.5006137490272522, - "learning_rate": 9.999767791333604e-06, - "loss": 0.4058, - "step": 1057 - }, - { - "epoch": 0.06914580746356447, - "grad_norm": 0.5041103363037109, - "learning_rate": 9.999764413823864e-06, - "loss": 0.3976, - "step": 1058 - }, - { - "epoch": 0.0692111626691066, - "grad_norm": 0.5599545240402222, - "learning_rate": 9.999761011928542e-06, - "loss": 0.4106, - "step": 1059 - }, - { - "epoch": 0.06927651787464871, - "grad_norm": 0.529897928237915, - "learning_rate": 9.999757585647653e-06, - "loss": 0.4151, - "step": 1060 - }, - { - "epoch": 0.06934187308019084, - "grad_norm": 0.5626737475395203, - "learning_rate": 9.999754134981215e-06, - "loss": 0.349, - "step": 1061 - }, - { - "epoch": 0.06940722828573295, - "grad_norm": 0.5403541922569275, - "learning_rate": 9.999750659929241e-06, - "loss": 0.4523, - "step": 1062 - }, - { - "epoch": 0.06947258349127508, - "grad_norm": 0.5121879577636719, - "learning_rate": 9.999747160491754e-06, - "loss": 0.3941, - "step": 1063 - }, - { - "epoch": 0.0695379386968172, - "grad_norm": 0.5438024997711182, - "learning_rate": 9.999743636668767e-06, - "loss": 0.4034, - "step": 1064 - }, - { - "epoch": 0.06960329390235932, - "grad_norm": 0.5025306344032288, - "learning_rate": 9.999740088460299e-06, - "loss": 0.3851, - "step": 1065 - }, - { - "epoch": 0.06966864910790145, - "grad_norm": 0.5248860716819763, - "learning_rate": 9.999736515866365e-06, - "loss": 0.4613, - "step": 1066 - }, - { - "epoch": 0.06973400431344356, - "grad_norm": 0.5184089541435242, - "learning_rate": 9.999732918886985e-06, - "loss": 0.3957, - "step": 1067 - }, - { - "epoch": 0.06979935951898569, - "grad_norm": 0.5219573974609375, - "learning_rate": 9.999729297522176e-06, - "loss": 0.433, - "step": 1068 - }, - { - "epoch": 0.06986471472452781, - "grad_norm": 0.5599725842475891, - "learning_rate": 9.999725651771955e-06, - "loss": 0.4545, - "step": 1069 - }, - { - "epoch": 0.06993006993006994, - "grad_norm": 0.558887243270874, - "learning_rate": 9.99972198163634e-06, - "loss": 0.484, - "step": 1070 - }, - { - "epoch": 0.06999542513561205, - "grad_norm": 0.5470594167709351, - "learning_rate": 9.999718287115346e-06, - "loss": 0.5024, - "step": 1071 - }, - { - "epoch": 0.07006078034115418, - "grad_norm": 0.6109849214553833, - "learning_rate": 9.999714568208997e-06, - "loss": 0.4765, - "step": 1072 - }, - { - "epoch": 0.07012613554669629, - "grad_norm": 0.47927340865135193, - "learning_rate": 9.999710824917306e-06, - "loss": 0.399, - "step": 1073 - }, - { - "epoch": 0.07019149075223842, - "grad_norm": 0.5048418045043945, - "learning_rate": 9.999707057240294e-06, - "loss": 0.3745, - "step": 1074 - }, - { - "epoch": 0.07025684595778053, - "grad_norm": 0.5382951498031616, - "learning_rate": 9.999703265177979e-06, - "loss": 0.4251, - "step": 1075 - }, - { - "epoch": 0.07032220116332266, - "grad_norm": 0.4840947985649109, - "learning_rate": 9.99969944873038e-06, - "loss": 0.4107, - "step": 1076 - }, - { - "epoch": 0.07038755636886478, - "grad_norm": 0.5169443488121033, - "learning_rate": 9.999695607897513e-06, - "loss": 0.4057, - "step": 1077 - }, - { - "epoch": 0.0704529115744069, - "grad_norm": 0.6006619334220886, - "learning_rate": 9.999691742679398e-06, - "loss": 0.5285, - "step": 1078 - }, - { - "epoch": 0.07051826677994902, - "grad_norm": 0.5611095428466797, - "learning_rate": 9.999687853076056e-06, - "loss": 0.4978, - "step": 1079 - }, - { - "epoch": 0.07058362198549115, - "grad_norm": 0.538235604763031, - "learning_rate": 9.999683939087504e-06, - "loss": 0.4559, - "step": 1080 - }, - { - "epoch": 0.07064897719103326, - "grad_norm": 0.58662348985672, - "learning_rate": 9.999680000713761e-06, - "loss": 0.5676, - "step": 1081 - }, - { - "epoch": 0.07071433239657539, - "grad_norm": 0.5555466413497925, - "learning_rate": 9.999676037954846e-06, - "loss": 0.3845, - "step": 1082 - }, - { - "epoch": 0.0707796876021175, - "grad_norm": 0.4942343533039093, - "learning_rate": 9.999672050810781e-06, - "loss": 0.4048, - "step": 1083 - }, - { - "epoch": 0.07084504280765963, - "grad_norm": 0.5459291338920593, - "learning_rate": 9.99966803928158e-06, - "loss": 0.4352, - "step": 1084 - }, - { - "epoch": 0.07091039801320174, - "grad_norm": 0.5130406022071838, - "learning_rate": 9.99966400336727e-06, - "loss": 0.4111, - "step": 1085 - }, - { - "epoch": 0.07097575321874387, - "grad_norm": 0.4856773912906647, - "learning_rate": 9.999659943067864e-06, - "loss": 0.3968, - "step": 1086 - }, - { - "epoch": 0.071041108424286, - "grad_norm": 0.5157826542854309, - "learning_rate": 9.999655858383384e-06, - "loss": 0.4324, - "step": 1087 - }, - { - "epoch": 0.07110646362982811, - "grad_norm": 0.5272077322006226, - "learning_rate": 9.999651749313852e-06, - "loss": 0.4589, - "step": 1088 - }, - { - "epoch": 0.07117181883537024, - "grad_norm": 0.49379098415374756, - "learning_rate": 9.999647615859284e-06, - "loss": 0.4067, - "step": 1089 - }, - { - "epoch": 0.07123717404091236, - "grad_norm": 0.5427277088165283, - "learning_rate": 9.999643458019706e-06, - "loss": 0.4638, - "step": 1090 - }, - { - "epoch": 0.07130252924645449, - "grad_norm": 0.49519068002700806, - "learning_rate": 9.999639275795132e-06, - "loss": 0.4352, - "step": 1091 - }, - { - "epoch": 0.0713678844519966, - "grad_norm": 0.506629228591919, - "learning_rate": 9.999635069185587e-06, - "loss": 0.4753, - "step": 1092 - }, - { - "epoch": 0.07143323965753873, - "grad_norm": 0.46790847182273865, - "learning_rate": 9.999630838191087e-06, - "loss": 0.4048, - "step": 1093 - }, - { - "epoch": 0.07149859486308084, - "grad_norm": 0.581870436668396, - "learning_rate": 9.99962658281166e-06, - "loss": 0.4858, - "step": 1094 - }, - { - "epoch": 0.07156395006862297, - "grad_norm": 0.5799008011817932, - "learning_rate": 9.999622303047318e-06, - "loss": 0.5081, - "step": 1095 - }, - { - "epoch": 0.07162930527416508, - "grad_norm": 0.5335209369659424, - "learning_rate": 9.999617998898087e-06, - "loss": 0.4161, - "step": 1096 - }, - { - "epoch": 0.07169466047970721, - "grad_norm": 0.5431477427482605, - "learning_rate": 9.999613670363988e-06, - "loss": 0.4598, - "step": 1097 - }, - { - "epoch": 0.07176001568524933, - "grad_norm": 0.5139455795288086, - "learning_rate": 9.999609317445041e-06, - "loss": 0.4032, - "step": 1098 - }, - { - "epoch": 0.07182537089079145, - "grad_norm": 0.5462816953659058, - "learning_rate": 9.999604940141266e-06, - "loss": 0.4114, - "step": 1099 - }, - { - "epoch": 0.07189072609633357, - "grad_norm": 0.5116475820541382, - "learning_rate": 9.999600538452687e-06, - "loss": 0.425, - "step": 1100 - }, - { - "epoch": 0.0719560813018757, - "grad_norm": 0.552571713924408, - "learning_rate": 9.999596112379323e-06, - "loss": 0.4446, - "step": 1101 - }, - { - "epoch": 0.07202143650741781, - "grad_norm": 0.5241992473602295, - "learning_rate": 9.999591661921197e-06, - "loss": 0.438, - "step": 1102 - }, - { - "epoch": 0.07208679171295994, - "grad_norm": 0.5425822138786316, - "learning_rate": 9.99958718707833e-06, - "loss": 0.4437, - "step": 1103 - }, - { - "epoch": 0.07215214691850205, - "grad_norm": 0.5181475877761841, - "learning_rate": 9.999582687850746e-06, - "loss": 0.4483, - "step": 1104 - }, - { - "epoch": 0.07221750212404418, - "grad_norm": 0.49834105372428894, - "learning_rate": 9.999578164238463e-06, - "loss": 0.4055, - "step": 1105 - }, - { - "epoch": 0.0722828573295863, - "grad_norm": 0.5015610456466675, - "learning_rate": 9.999573616241509e-06, - "loss": 0.424, - "step": 1106 - }, - { - "epoch": 0.07234821253512842, - "grad_norm": 0.5378239154815674, - "learning_rate": 9.999569043859898e-06, - "loss": 0.4475, - "step": 1107 - }, - { - "epoch": 0.07241356774067055, - "grad_norm": 0.5474442839622498, - "learning_rate": 9.99956444709366e-06, - "loss": 0.4708, - "step": 1108 - }, - { - "epoch": 0.07247892294621266, - "grad_norm": 0.5020264387130737, - "learning_rate": 9.999559825942812e-06, - "loss": 0.4158, - "step": 1109 - }, - { - "epoch": 0.07254427815175479, - "grad_norm": 0.5356208086013794, - "learning_rate": 9.99955518040738e-06, - "loss": 0.383, - "step": 1110 - }, - { - "epoch": 0.07260963335729691, - "grad_norm": 0.5045859217643738, - "learning_rate": 9.999550510487385e-06, - "loss": 0.3824, - "step": 1111 - }, - { - "epoch": 0.07267498856283904, - "grad_norm": 0.596736490726471, - "learning_rate": 9.99954581618285e-06, - "loss": 0.4847, - "step": 1112 - }, - { - "epoch": 0.07274034376838115, - "grad_norm": 0.5553385615348816, - "learning_rate": 9.999541097493799e-06, - "loss": 0.4628, - "step": 1113 - }, - { - "epoch": 0.07280569897392328, - "grad_norm": 0.5719872713088989, - "learning_rate": 9.999536354420252e-06, - "loss": 0.4532, - "step": 1114 - }, - { - "epoch": 0.07287105417946539, - "grad_norm": 0.5502328872680664, - "learning_rate": 9.999531586962236e-06, - "loss": 0.4702, - "step": 1115 - }, - { - "epoch": 0.07293640938500752, - "grad_norm": 0.60325688123703, - "learning_rate": 9.999526795119771e-06, - "loss": 0.4916, - "step": 1116 - }, - { - "epoch": 0.07300176459054963, - "grad_norm": 0.5491304993629456, - "learning_rate": 9.999521978892882e-06, - "loss": 0.466, - "step": 1117 - }, - { - "epoch": 0.07306711979609176, - "grad_norm": 0.5431767702102661, - "learning_rate": 9.999517138281594e-06, - "loss": 0.4389, - "step": 1118 - }, - { - "epoch": 0.07313247500163388, - "grad_norm": 0.5361014008522034, - "learning_rate": 9.999512273285928e-06, - "loss": 0.4183, - "step": 1119 - }, - { - "epoch": 0.073197830207176, - "grad_norm": 0.5825538635253906, - "learning_rate": 9.999507383905908e-06, - "loss": 0.4194, - "step": 1120 - }, - { - "epoch": 0.07326318541271812, - "grad_norm": 0.5627398490905762, - "learning_rate": 9.99950247014156e-06, - "loss": 0.4284, - "step": 1121 - }, - { - "epoch": 0.07332854061826025, - "grad_norm": 0.547063946723938, - "learning_rate": 9.999497531992905e-06, - "loss": 0.4409, - "step": 1122 - }, - { - "epoch": 0.07339389582380236, - "grad_norm": 0.50401771068573, - "learning_rate": 9.99949256945997e-06, - "loss": 0.3774, - "step": 1123 - }, - { - "epoch": 0.07345925102934449, - "grad_norm": 0.6625709533691406, - "learning_rate": 9.999487582542777e-06, - "loss": 0.5254, - "step": 1124 - }, - { - "epoch": 0.0735246062348866, - "grad_norm": 0.5538679361343384, - "learning_rate": 9.99948257124135e-06, - "loss": 0.4445, - "step": 1125 - }, - { - "epoch": 0.07358996144042873, - "grad_norm": 0.5207232236862183, - "learning_rate": 9.999477535555716e-06, - "loss": 0.414, - "step": 1126 - }, - { - "epoch": 0.07365531664597084, - "grad_norm": 0.5595287084579468, - "learning_rate": 9.999472475485897e-06, - "loss": 0.5615, - "step": 1127 - }, - { - "epoch": 0.07372067185151297, - "grad_norm": 0.4857555031776428, - "learning_rate": 9.999467391031918e-06, - "loss": 0.4213, - "step": 1128 - }, - { - "epoch": 0.0737860270570551, - "grad_norm": 0.5126360654830933, - "learning_rate": 9.999462282193808e-06, - "loss": 0.4042, - "step": 1129 - }, - { - "epoch": 0.07385138226259721, - "grad_norm": 0.5549189448356628, - "learning_rate": 9.999457148971585e-06, - "loss": 0.4648, - "step": 1130 - }, - { - "epoch": 0.07391673746813934, - "grad_norm": 0.5788109302520752, - "learning_rate": 9.999451991365278e-06, - "loss": 0.458, - "step": 1131 - }, - { - "epoch": 0.07398209267368146, - "grad_norm": 0.580055296421051, - "learning_rate": 9.999446809374913e-06, - "loss": 0.4423, - "step": 1132 - }, - { - "epoch": 0.07404744787922359, - "grad_norm": 0.5349094867706299, - "learning_rate": 9.999441603000514e-06, - "loss": 0.4635, - "step": 1133 - }, - { - "epoch": 0.0741128030847657, - "grad_norm": 0.5247857570648193, - "learning_rate": 9.999436372242106e-06, - "loss": 0.4573, - "step": 1134 - }, - { - "epoch": 0.07417815829030783, - "grad_norm": 0.5215403437614441, - "learning_rate": 9.999431117099714e-06, - "loss": 0.4152, - "step": 1135 - }, - { - "epoch": 0.07424351349584994, - "grad_norm": 0.5917163491249084, - "learning_rate": 9.999425837573364e-06, - "loss": 0.4539, - "step": 1136 - }, - { - "epoch": 0.07430886870139207, - "grad_norm": 0.5871264934539795, - "learning_rate": 9.999420533663084e-06, - "loss": 0.5734, - "step": 1137 - }, - { - "epoch": 0.07437422390693418, - "grad_norm": 0.5448279976844788, - "learning_rate": 9.999415205368897e-06, - "loss": 0.4242, - "step": 1138 - }, - { - "epoch": 0.07443957911247631, - "grad_norm": 0.4966379404067993, - "learning_rate": 9.999409852690832e-06, - "loss": 0.3862, - "step": 1139 - }, - { - "epoch": 0.07450493431801843, - "grad_norm": 0.5236524343490601, - "learning_rate": 9.99940447562891e-06, - "loss": 0.4625, - "step": 1140 - }, - { - "epoch": 0.07457028952356055, - "grad_norm": 0.5394020080566406, - "learning_rate": 9.999399074183163e-06, - "loss": 0.4387, - "step": 1141 - }, - { - "epoch": 0.07463564472910267, - "grad_norm": 0.5543898940086365, - "learning_rate": 9.999393648353613e-06, - "loss": 0.4771, - "step": 1142 - }, - { - "epoch": 0.0747009999346448, - "grad_norm": 0.5211477279663086, - "learning_rate": 9.99938819814029e-06, - "loss": 0.3871, - "step": 1143 - }, - { - "epoch": 0.07476635514018691, - "grad_norm": 0.7291384935379028, - "learning_rate": 9.999382723543216e-06, - "loss": 0.3779, - "step": 1144 - }, - { - "epoch": 0.07483171034572904, - "grad_norm": 0.5049942135810852, - "learning_rate": 9.999377224562424e-06, - "loss": 0.4263, - "step": 1145 - }, - { - "epoch": 0.07489706555127115, - "grad_norm": 0.5275691151618958, - "learning_rate": 9.999371701197935e-06, - "loss": 0.4243, - "step": 1146 - }, - { - "epoch": 0.07496242075681328, - "grad_norm": 0.5381787419319153, - "learning_rate": 9.99936615344978e-06, - "loss": 0.4225, - "step": 1147 - }, - { - "epoch": 0.0750277759623554, - "grad_norm": 0.5490167140960693, - "learning_rate": 9.999360581317982e-06, - "loss": 0.4843, - "step": 1148 - }, - { - "epoch": 0.07509313116789752, - "grad_norm": 0.5006473660469055, - "learning_rate": 9.999354984802572e-06, - "loss": 0.4086, - "step": 1149 - }, - { - "epoch": 0.07515848637343965, - "grad_norm": 0.5244328379631042, - "learning_rate": 9.999349363903574e-06, - "loss": 0.4037, - "step": 1150 - }, - { - "epoch": 0.07522384157898176, - "grad_norm": 0.527870774269104, - "learning_rate": 9.99934371862102e-06, - "loss": 0.3991, - "step": 1151 - }, - { - "epoch": 0.07528919678452389, - "grad_norm": 0.5216260552406311, - "learning_rate": 9.999338048954933e-06, - "loss": 0.357, - "step": 1152 - }, - { - "epoch": 0.07535455199006601, - "grad_norm": 0.531283974647522, - "learning_rate": 9.999332354905343e-06, - "loss": 0.4682, - "step": 1153 - }, - { - "epoch": 0.07541990719560814, - "grad_norm": 0.5901212692260742, - "learning_rate": 9.999326636472278e-06, - "loss": 0.4902, - "step": 1154 - }, - { - "epoch": 0.07548526240115025, - "grad_norm": 0.5611442923545837, - "learning_rate": 9.999320893655762e-06, - "loss": 0.4659, - "step": 1155 - }, - { - "epoch": 0.07555061760669238, - "grad_norm": 0.5276128053665161, - "learning_rate": 9.99931512645583e-06, - "loss": 0.4918, - "step": 1156 - }, - { - "epoch": 0.07561597281223449, - "grad_norm": 0.5524461269378662, - "learning_rate": 9.999309334872503e-06, - "loss": 0.4832, - "step": 1157 - }, - { - "epoch": 0.07568132801777662, - "grad_norm": 0.5486389398574829, - "learning_rate": 9.999303518905815e-06, - "loss": 0.4823, - "step": 1158 - }, - { - "epoch": 0.07574668322331873, - "grad_norm": 0.562732994556427, - "learning_rate": 9.99929767855579e-06, - "loss": 0.4375, - "step": 1159 - }, - { - "epoch": 0.07581203842886086, - "grad_norm": 0.49552303552627563, - "learning_rate": 9.999291813822459e-06, - "loss": 0.3802, - "step": 1160 - }, - { - "epoch": 0.07587739363440298, - "grad_norm": 0.505366325378418, - "learning_rate": 9.99928592470585e-06, - "loss": 0.4133, - "step": 1161 - }, - { - "epoch": 0.0759427488399451, - "grad_norm": 0.4455549716949463, - "learning_rate": 9.999280011205991e-06, - "loss": 0.3705, - "step": 1162 - }, - { - "epoch": 0.07600810404548722, - "grad_norm": 0.4988388419151306, - "learning_rate": 9.99927407332291e-06, - "loss": 0.4121, - "step": 1163 - }, - { - "epoch": 0.07607345925102935, - "grad_norm": 0.5100632905960083, - "learning_rate": 9.999268111056641e-06, - "loss": 0.4365, - "step": 1164 - }, - { - "epoch": 0.07613881445657146, - "grad_norm": 0.5309730768203735, - "learning_rate": 9.999262124407207e-06, - "loss": 0.4792, - "step": 1165 - }, - { - "epoch": 0.07620416966211359, - "grad_norm": 0.5231695771217346, - "learning_rate": 9.99925611337464e-06, - "loss": 0.4703, - "step": 1166 - }, - { - "epoch": 0.0762695248676557, - "grad_norm": 0.5190871953964233, - "learning_rate": 9.99925007795897e-06, - "loss": 0.4615, - "step": 1167 - }, - { - "epoch": 0.07633488007319783, - "grad_norm": 0.5430537462234497, - "learning_rate": 9.999244018160225e-06, - "loss": 0.4867, - "step": 1168 - }, - { - "epoch": 0.07640023527873996, - "grad_norm": 0.5315860509872437, - "learning_rate": 9.999237933978437e-06, - "loss": 0.4364, - "step": 1169 - }, - { - "epoch": 0.07646559048428207, - "grad_norm": 0.553806722164154, - "learning_rate": 9.999231825413631e-06, - "loss": 0.4627, - "step": 1170 - }, - { - "epoch": 0.0765309456898242, - "grad_norm": 0.5221887826919556, - "learning_rate": 9.999225692465839e-06, - "loss": 0.3913, - "step": 1171 - }, - { - "epoch": 0.07659630089536631, - "grad_norm": 0.5462474226951599, - "learning_rate": 9.999219535135093e-06, - "loss": 0.4811, - "step": 1172 - }, - { - "epoch": 0.07666165610090844, - "grad_norm": 0.496337890625, - "learning_rate": 9.999213353421422e-06, - "loss": 0.3781, - "step": 1173 - }, - { - "epoch": 0.07672701130645056, - "grad_norm": 0.5239957571029663, - "learning_rate": 9.999207147324854e-06, - "loss": 0.4518, - "step": 1174 - }, - { - "epoch": 0.07679236651199269, - "grad_norm": 0.5142859816551208, - "learning_rate": 9.999200916845422e-06, - "loss": 0.446, - "step": 1175 - }, - { - "epoch": 0.0768577217175348, - "grad_norm": 0.5262611508369446, - "learning_rate": 9.999194661983154e-06, - "loss": 0.4095, - "step": 1176 - }, - { - "epoch": 0.07692307692307693, - "grad_norm": 0.5234584808349609, - "learning_rate": 9.999188382738083e-06, - "loss": 0.44, - "step": 1177 - }, - { - "epoch": 0.07698843212861904, - "grad_norm": 0.5316318869590759, - "learning_rate": 9.999182079110238e-06, - "loss": 0.3961, - "step": 1178 - }, - { - "epoch": 0.07705378733416117, - "grad_norm": 0.553196370601654, - "learning_rate": 9.99917575109965e-06, - "loss": 0.4897, - "step": 1179 - }, - { - "epoch": 0.07711914253970328, - "grad_norm": 0.6131114959716797, - "learning_rate": 9.99916939870635e-06, - "loss": 0.4655, - "step": 1180 - }, - { - "epoch": 0.07718449774524541, - "grad_norm": 0.5405822992324829, - "learning_rate": 9.999163021930369e-06, - "loss": 0.4634, - "step": 1181 - }, - { - "epoch": 0.07724985295078753, - "grad_norm": 0.4962118864059448, - "learning_rate": 9.999156620771736e-06, - "loss": 0.4538, - "step": 1182 - }, - { - "epoch": 0.07731520815632965, - "grad_norm": 0.5419692397117615, - "learning_rate": 9.999150195230487e-06, - "loss": 0.4292, - "step": 1183 - }, - { - "epoch": 0.07738056336187177, - "grad_norm": 0.5354037880897522, - "learning_rate": 9.99914374530665e-06, - "loss": 0.4915, - "step": 1184 - }, - { - "epoch": 0.0774459185674139, - "grad_norm": 0.6750656962394714, - "learning_rate": 9.999137271000258e-06, - "loss": 0.4394, - "step": 1185 - }, - { - "epoch": 0.07751127377295601, - "grad_norm": 0.6161524057388306, - "learning_rate": 9.99913077231134e-06, - "loss": 0.4917, - "step": 1186 - }, - { - "epoch": 0.07757662897849814, - "grad_norm": 0.5116806030273438, - "learning_rate": 9.99912424923993e-06, - "loss": 0.4292, - "step": 1187 - }, - { - "epoch": 0.07764198418404025, - "grad_norm": 0.5059065818786621, - "learning_rate": 9.999117701786059e-06, - "loss": 0.4539, - "step": 1188 - }, - { - "epoch": 0.07770733938958238, - "grad_norm": 0.548248291015625, - "learning_rate": 9.999111129949759e-06, - "loss": 0.4325, - "step": 1189 - }, - { - "epoch": 0.07777269459512451, - "grad_norm": 0.5650060176849365, - "learning_rate": 9.999104533731064e-06, - "loss": 0.4258, - "step": 1190 - }, - { - "epoch": 0.07783804980066662, - "grad_norm": 0.5023185014724731, - "learning_rate": 9.999097913130002e-06, - "loss": 0.455, - "step": 1191 - }, - { - "epoch": 0.07790340500620875, - "grad_norm": 0.5457863211631775, - "learning_rate": 9.999091268146608e-06, - "loss": 0.4893, - "step": 1192 - }, - { - "epoch": 0.07796876021175086, - "grad_norm": 0.5315849184989929, - "learning_rate": 9.999084598780914e-06, - "loss": 0.4525, - "step": 1193 - }, - { - "epoch": 0.07803411541729299, - "grad_norm": 0.513178288936615, - "learning_rate": 9.999077905032953e-06, - "loss": 0.4396, - "step": 1194 - }, - { - "epoch": 0.07809947062283511, - "grad_norm": 0.525798499584198, - "learning_rate": 9.999071186902758e-06, - "loss": 0.4627, - "step": 1195 - }, - { - "epoch": 0.07816482582837724, - "grad_norm": 0.5288268327713013, - "learning_rate": 9.999064444390361e-06, - "loss": 0.4314, - "step": 1196 - }, - { - "epoch": 0.07823018103391935, - "grad_norm": 0.48024097084999084, - "learning_rate": 9.999057677495794e-06, - "loss": 0.4085, - "step": 1197 - }, - { - "epoch": 0.07829553623946148, - "grad_norm": 0.5265238881111145, - "learning_rate": 9.99905088621909e-06, - "loss": 0.4385, - "step": 1198 - }, - { - "epoch": 0.07836089144500359, - "grad_norm": 0.5488736033439636, - "learning_rate": 9.999044070560285e-06, - "loss": 0.4245, - "step": 1199 - }, - { - "epoch": 0.07842624665054572, - "grad_norm": 0.5518878698348999, - "learning_rate": 9.999037230519408e-06, - "loss": 0.4259, - "step": 1200 - }, - { - "epoch": 0.07849160185608783, - "grad_norm": 0.49042201042175293, - "learning_rate": 9.999030366096495e-06, - "loss": 0.4178, - "step": 1201 - }, - { - "epoch": 0.07855695706162996, - "grad_norm": 0.5355582237243652, - "learning_rate": 9.99902347729158e-06, - "loss": 0.4266, - "step": 1202 - }, - { - "epoch": 0.07862231226717208, - "grad_norm": 0.6018792986869812, - "learning_rate": 9.999016564104696e-06, - "loss": 0.5454, - "step": 1203 - }, - { - "epoch": 0.0786876674727142, - "grad_norm": 0.504183828830719, - "learning_rate": 9.999009626535877e-06, - "loss": 0.398, - "step": 1204 - }, - { - "epoch": 0.07875302267825632, - "grad_norm": 0.5036664009094238, - "learning_rate": 9.999002664585153e-06, - "loss": 0.4108, - "step": 1205 - }, - { - "epoch": 0.07881837788379845, - "grad_norm": 0.4950101673603058, - "learning_rate": 9.998995678252564e-06, - "loss": 0.4001, - "step": 1206 - }, - { - "epoch": 0.07888373308934056, - "grad_norm": 0.5225205421447754, - "learning_rate": 9.99898866753814e-06, - "loss": 0.4438, - "step": 1207 - }, - { - "epoch": 0.07894908829488269, - "grad_norm": 0.5419541001319885, - "learning_rate": 9.998981632441917e-06, - "loss": 0.4618, - "step": 1208 - }, - { - "epoch": 0.0790144435004248, - "grad_norm": 0.47289782762527466, - "learning_rate": 9.998974572963929e-06, - "loss": 0.4279, - "step": 1209 - }, - { - "epoch": 0.07907979870596693, - "grad_norm": 0.4971350133419037, - "learning_rate": 9.99896748910421e-06, - "loss": 0.4121, - "step": 1210 - }, - { - "epoch": 0.07914515391150906, - "grad_norm": 0.48509129881858826, - "learning_rate": 9.998960380862794e-06, - "loss": 0.3895, - "step": 1211 - }, - { - "epoch": 0.07921050911705117, - "grad_norm": 0.5354259014129639, - "learning_rate": 9.998953248239717e-06, - "loss": 0.4433, - "step": 1212 - }, - { - "epoch": 0.0792758643225933, - "grad_norm": 0.49842292070388794, - "learning_rate": 9.998946091235014e-06, - "loss": 0.4251, - "step": 1213 - }, - { - "epoch": 0.07934121952813541, - "grad_norm": 0.5182493925094604, - "learning_rate": 9.998938909848718e-06, - "loss": 0.4526, - "step": 1214 - }, - { - "epoch": 0.07940657473367754, - "grad_norm": 0.48397088050842285, - "learning_rate": 9.998931704080867e-06, - "loss": 0.3854, - "step": 1215 - }, - { - "epoch": 0.07947192993921966, - "grad_norm": 0.5367823839187622, - "learning_rate": 9.998924473931493e-06, - "loss": 0.4529, - "step": 1216 - }, - { - "epoch": 0.07953728514476179, - "grad_norm": 0.5396056175231934, - "learning_rate": 9.998917219400632e-06, - "loss": 0.464, - "step": 1217 - }, - { - "epoch": 0.0796026403503039, - "grad_norm": 0.5314146280288696, - "learning_rate": 9.99890994048832e-06, - "loss": 0.441, - "step": 1218 - }, - { - "epoch": 0.07966799555584603, - "grad_norm": 0.50490403175354, - "learning_rate": 9.998902637194593e-06, - "loss": 0.4734, - "step": 1219 - }, - { - "epoch": 0.07973335076138814, - "grad_norm": 0.5450233817100525, - "learning_rate": 9.998895309519484e-06, - "loss": 0.4509, - "step": 1220 - }, - { - "epoch": 0.07979870596693027, - "grad_norm": 0.5138707756996155, - "learning_rate": 9.998887957463034e-06, - "loss": 0.4645, - "step": 1221 - }, - { - "epoch": 0.07986406117247238, - "grad_norm": 0.5474032163619995, - "learning_rate": 9.998880581025274e-06, - "loss": 0.4259, - "step": 1222 - }, - { - "epoch": 0.07992941637801451, - "grad_norm": 0.4801062345504761, - "learning_rate": 9.998873180206242e-06, - "loss": 0.4245, - "step": 1223 - }, - { - "epoch": 0.07999477158355663, - "grad_norm": 0.5042070746421814, - "learning_rate": 9.998865755005973e-06, - "loss": 0.4157, - "step": 1224 - }, - { - "epoch": 0.08006012678909875, - "grad_norm": 0.49243617057800293, - "learning_rate": 9.998858305424506e-06, - "loss": 0.4372, - "step": 1225 - }, - { - "epoch": 0.08012548199464087, - "grad_norm": 0.5416117906570435, - "learning_rate": 9.998850831461873e-06, - "loss": 0.4519, - "step": 1226 - }, - { - "epoch": 0.080190837200183, - "grad_norm": 0.528499186038971, - "learning_rate": 9.998843333118113e-06, - "loss": 0.4893, - "step": 1227 - }, - { - "epoch": 0.08025619240572511, - "grad_norm": 0.4788937270641327, - "learning_rate": 9.998835810393264e-06, - "loss": 0.4147, - "step": 1228 - }, - { - "epoch": 0.08032154761126724, - "grad_norm": 0.5041825771331787, - "learning_rate": 9.998828263287359e-06, - "loss": 0.4216, - "step": 1229 - }, - { - "epoch": 0.08038690281680935, - "grad_norm": 0.5103921890258789, - "learning_rate": 9.998820691800439e-06, - "loss": 0.4718, - "step": 1230 - }, - { - "epoch": 0.08045225802235148, - "grad_norm": 0.4930843114852905, - "learning_rate": 9.998813095932536e-06, - "loss": 0.4359, - "step": 1231 - }, - { - "epoch": 0.08051761322789361, - "grad_norm": 0.46574723720550537, - "learning_rate": 9.998805475683691e-06, - "loss": 0.3872, - "step": 1232 - }, - { - "epoch": 0.08058296843343572, - "grad_norm": 0.5203606486320496, - "learning_rate": 9.998797831053942e-06, - "loss": 0.4558, - "step": 1233 - }, - { - "epoch": 0.08064832363897785, - "grad_norm": 0.5298285484313965, - "learning_rate": 9.998790162043321e-06, - "loss": 0.4588, - "step": 1234 - }, - { - "epoch": 0.08071367884451996, - "grad_norm": 0.5079378485679626, - "learning_rate": 9.99878246865187e-06, - "loss": 0.4715, - "step": 1235 - }, - { - "epoch": 0.08077903405006209, - "grad_norm": 0.4804714620113373, - "learning_rate": 9.998774750879626e-06, - "loss": 0.3643, - "step": 1236 - }, - { - "epoch": 0.08084438925560421, - "grad_norm": 0.5205805897712708, - "learning_rate": 9.998767008726624e-06, - "loss": 0.3987, - "step": 1237 - }, - { - "epoch": 0.08090974446114634, - "grad_norm": 0.5701265931129456, - "learning_rate": 9.998759242192904e-06, - "loss": 0.4508, - "step": 1238 - }, - { - "epoch": 0.08097509966668845, - "grad_norm": 0.4982219934463501, - "learning_rate": 9.998751451278504e-06, - "loss": 0.4393, - "step": 1239 - }, - { - "epoch": 0.08104045487223058, - "grad_norm": 0.5158557891845703, - "learning_rate": 9.99874363598346e-06, - "loss": 0.3905, - "step": 1240 - }, - { - "epoch": 0.08110581007777269, - "grad_norm": 0.5369464755058289, - "learning_rate": 9.998735796307815e-06, - "loss": 0.4403, - "step": 1241 - }, - { - "epoch": 0.08117116528331482, - "grad_norm": 0.515998125076294, - "learning_rate": 9.998727932251602e-06, - "loss": 0.409, - "step": 1242 - }, - { - "epoch": 0.08123652048885693, - "grad_norm": 0.5507062673568726, - "learning_rate": 9.99872004381486e-06, - "loss": 0.4413, - "step": 1243 - }, - { - "epoch": 0.08130187569439906, - "grad_norm": 0.5182384848594666, - "learning_rate": 9.99871213099763e-06, - "loss": 0.4661, - "step": 1244 - }, - { - "epoch": 0.08136723089994118, - "grad_norm": 0.510947585105896, - "learning_rate": 9.998704193799948e-06, - "loss": 0.4406, - "step": 1245 - }, - { - "epoch": 0.0814325861054833, - "grad_norm": 0.5021692514419556, - "learning_rate": 9.998696232221854e-06, - "loss": 0.4215, - "step": 1246 - }, - { - "epoch": 0.08149794131102542, - "grad_norm": 0.49621596932411194, - "learning_rate": 9.998688246263388e-06, - "loss": 0.4088, - "step": 1247 - }, - { - "epoch": 0.08156329651656755, - "grad_norm": 0.528205931186676, - "learning_rate": 9.998680235924587e-06, - "loss": 0.4198, - "step": 1248 - }, - { - "epoch": 0.08162865172210966, - "grad_norm": 0.46504709124565125, - "learning_rate": 9.99867220120549e-06, - "loss": 0.3673, - "step": 1249 - }, - { - "epoch": 0.08169400692765179, - "grad_norm": 0.5134182572364807, - "learning_rate": 9.998664142106138e-06, - "loss": 0.4325, - "step": 1250 - }, - { - "epoch": 0.0817593621331939, - "grad_norm": 0.5338544845581055, - "learning_rate": 9.99865605862657e-06, - "loss": 0.4973, - "step": 1251 - }, - { - "epoch": 0.08182471733873603, - "grad_norm": 0.4876263439655304, - "learning_rate": 9.998647950766824e-06, - "loss": 0.4073, - "step": 1252 - }, - { - "epoch": 0.08189007254427816, - "grad_norm": 0.4959527850151062, - "learning_rate": 9.998639818526939e-06, - "loss": 0.4325, - "step": 1253 - }, - { - "epoch": 0.08195542774982027, - "grad_norm": 0.5380318760871887, - "learning_rate": 9.998631661906957e-06, - "loss": 0.447, - "step": 1254 - }, - { - "epoch": 0.0820207829553624, - "grad_norm": 0.5196751952171326, - "learning_rate": 9.998623480906917e-06, - "loss": 0.471, - "step": 1255 - }, - { - "epoch": 0.08208613816090451, - "grad_norm": 0.49254852533340454, - "learning_rate": 9.998615275526859e-06, - "loss": 0.4241, - "step": 1256 - }, - { - "epoch": 0.08215149336644664, - "grad_norm": 0.5189266800880432, - "learning_rate": 9.998607045766822e-06, - "loss": 0.4352, - "step": 1257 - }, - { - "epoch": 0.08221684857198876, - "grad_norm": 0.531071662902832, - "learning_rate": 9.998598791626846e-06, - "loss": 0.48, - "step": 1258 - }, - { - "epoch": 0.08228220377753089, - "grad_norm": 0.49283671379089355, - "learning_rate": 9.998590513106973e-06, - "loss": 0.3903, - "step": 1259 - }, - { - "epoch": 0.082347558983073, - "grad_norm": 0.47999781370162964, - "learning_rate": 9.998582210207242e-06, - "loss": 0.4123, - "step": 1260 - }, - { - "epoch": 0.08241291418861513, - "grad_norm": 0.477071613073349, - "learning_rate": 9.998573882927694e-06, - "loss": 0.3761, - "step": 1261 - }, - { - "epoch": 0.08247826939415724, - "grad_norm": 0.5222316980361938, - "learning_rate": 9.998565531268369e-06, - "loss": 0.4376, - "step": 1262 - }, - { - "epoch": 0.08254362459969937, - "grad_norm": 0.516411304473877, - "learning_rate": 9.998557155229308e-06, - "loss": 0.4561, - "step": 1263 - }, - { - "epoch": 0.08260897980524148, - "grad_norm": 0.5287901163101196, - "learning_rate": 9.998548754810553e-06, - "loss": 0.4359, - "step": 1264 - }, - { - "epoch": 0.08267433501078361, - "grad_norm": 0.5043032169342041, - "learning_rate": 9.998540330012143e-06, - "loss": 0.44, - "step": 1265 - }, - { - "epoch": 0.08273969021632573, - "grad_norm": 0.5158609747886658, - "learning_rate": 9.998531880834121e-06, - "loss": 0.4713, - "step": 1266 - }, - { - "epoch": 0.08280504542186785, - "grad_norm": 0.5348635315895081, - "learning_rate": 9.998523407276528e-06, - "loss": 0.4345, - "step": 1267 - }, - { - "epoch": 0.08287040062740997, - "grad_norm": 0.500033438205719, - "learning_rate": 9.998514909339404e-06, - "loss": 0.416, - "step": 1268 - }, - { - "epoch": 0.0829357558329521, - "grad_norm": 0.5261061191558838, - "learning_rate": 9.99850638702279e-06, - "loss": 0.4685, - "step": 1269 - }, - { - "epoch": 0.08300111103849421, - "grad_norm": 0.5408302545547485, - "learning_rate": 9.998497840326731e-06, - "loss": 0.4679, - "step": 1270 - }, - { - "epoch": 0.08306646624403634, - "grad_norm": 0.5662031173706055, - "learning_rate": 9.998489269251266e-06, - "loss": 0.4473, - "step": 1271 - }, - { - "epoch": 0.08313182144957845, - "grad_norm": 0.5220599174499512, - "learning_rate": 9.998480673796435e-06, - "loss": 0.4566, - "step": 1272 - }, - { - "epoch": 0.08319717665512058, - "grad_norm": 0.5670110583305359, - "learning_rate": 9.998472053962285e-06, - "loss": 0.5111, - "step": 1273 - }, - { - "epoch": 0.08326253186066271, - "grad_norm": 0.5320886373519897, - "learning_rate": 9.998463409748852e-06, - "loss": 0.4318, - "step": 1274 - }, - { - "epoch": 0.08332788706620482, - "grad_norm": 0.5130778551101685, - "learning_rate": 9.998454741156184e-06, - "loss": 0.3946, - "step": 1275 - }, - { - "epoch": 0.08339324227174695, - "grad_norm": 0.4906269311904907, - "learning_rate": 9.99844604818432e-06, - "loss": 0.414, - "step": 1276 - }, - { - "epoch": 0.08345859747728906, - "grad_norm": 0.5453106760978699, - "learning_rate": 9.998437330833302e-06, - "loss": 0.5047, - "step": 1277 - }, - { - "epoch": 0.08352395268283119, - "grad_norm": 0.5290608406066895, - "learning_rate": 9.998428589103174e-06, - "loss": 0.4424, - "step": 1278 - }, - { - "epoch": 0.08358930788837331, - "grad_norm": 0.5562750697135925, - "learning_rate": 9.998419822993979e-06, - "loss": 0.4446, - "step": 1279 - }, - { - "epoch": 0.08365466309391544, - "grad_norm": 0.47817593812942505, - "learning_rate": 9.998411032505758e-06, - "loss": 0.4054, - "step": 1280 - }, - { - "epoch": 0.08372001829945755, - "grad_norm": 0.5376811027526855, - "learning_rate": 9.998402217638554e-06, - "loss": 0.4594, - "step": 1281 - }, - { - "epoch": 0.08378537350499968, - "grad_norm": 0.5015135407447815, - "learning_rate": 9.998393378392413e-06, - "loss": 0.3934, - "step": 1282 - }, - { - "epoch": 0.08385072871054179, - "grad_norm": 0.5472721457481384, - "learning_rate": 9.998384514767374e-06, - "loss": 0.4683, - "step": 1283 - }, - { - "epoch": 0.08391608391608392, - "grad_norm": 0.47711455821990967, - "learning_rate": 9.998375626763482e-06, - "loss": 0.4335, - "step": 1284 - }, - { - "epoch": 0.08398143912162603, - "grad_norm": 0.5403627753257751, - "learning_rate": 9.99836671438078e-06, - "loss": 0.4436, - "step": 1285 - }, - { - "epoch": 0.08404679432716816, - "grad_norm": 0.47652876377105713, - "learning_rate": 9.998357777619314e-06, - "loss": 0.4103, - "step": 1286 - }, - { - "epoch": 0.08411214953271028, - "grad_norm": 0.5195986032485962, - "learning_rate": 9.998348816479124e-06, - "loss": 0.4508, - "step": 1287 - }, - { - "epoch": 0.0841775047382524, - "grad_norm": 0.4842926561832428, - "learning_rate": 9.998339830960257e-06, - "loss": 0.4039, - "step": 1288 - }, - { - "epoch": 0.08424285994379452, - "grad_norm": 0.5277332067489624, - "learning_rate": 9.998330821062754e-06, - "loss": 0.4339, - "step": 1289 - }, - { - "epoch": 0.08430821514933665, - "grad_norm": 0.5088818073272705, - "learning_rate": 9.99832178678666e-06, - "loss": 0.4918, - "step": 1290 - }, - { - "epoch": 0.08437357035487876, - "grad_norm": 0.5293411016464233, - "learning_rate": 9.998312728132019e-06, - "loss": 0.4452, - "step": 1291 - }, - { - "epoch": 0.08443892556042089, - "grad_norm": 0.4776824116706848, - "learning_rate": 9.998303645098875e-06, - "loss": 0.3591, - "step": 1292 - }, - { - "epoch": 0.084504280765963, - "grad_norm": 0.5139163136482239, - "learning_rate": 9.998294537687273e-06, - "loss": 0.4308, - "step": 1293 - }, - { - "epoch": 0.08456963597150513, - "grad_norm": 0.48579853773117065, - "learning_rate": 9.998285405897256e-06, - "loss": 0.3964, - "step": 1294 - }, - { - "epoch": 0.08463499117704726, - "grad_norm": 0.5369120836257935, - "learning_rate": 9.99827624972887e-06, - "loss": 0.4503, - "step": 1295 - }, - { - "epoch": 0.08470034638258937, - "grad_norm": 0.5286991000175476, - "learning_rate": 9.99826706918216e-06, - "loss": 0.4463, - "step": 1296 - }, - { - "epoch": 0.0847657015881315, - "grad_norm": 0.5808084607124329, - "learning_rate": 9.998257864257169e-06, - "loss": 0.4637, - "step": 1297 - }, - { - "epoch": 0.08483105679367361, - "grad_norm": 0.474880576133728, - "learning_rate": 9.998248634953942e-06, - "loss": 0.3975, - "step": 1298 - }, - { - "epoch": 0.08489641199921574, - "grad_norm": 0.5344276428222656, - "learning_rate": 9.998239381272527e-06, - "loss": 0.4606, - "step": 1299 - }, - { - "epoch": 0.08496176720475786, - "grad_norm": 0.5275298357009888, - "learning_rate": 9.998230103212966e-06, - "loss": 0.4647, - "step": 1300 - }, - { - "epoch": 0.08502712241029999, - "grad_norm": 0.5716614723205566, - "learning_rate": 9.998220800775304e-06, - "loss": 0.4029, - "step": 1301 - }, - { - "epoch": 0.0850924776158421, - "grad_norm": 0.5430360436439514, - "learning_rate": 9.99821147395959e-06, - "loss": 0.4885, - "step": 1302 - }, - { - "epoch": 0.08515783282138423, - "grad_norm": 0.5336328148841858, - "learning_rate": 9.998202122765866e-06, - "loss": 0.4458, - "step": 1303 - }, - { - "epoch": 0.08522318802692634, - "grad_norm": 0.5014939904212952, - "learning_rate": 9.998192747194178e-06, - "loss": 0.4085, - "step": 1304 - }, - { - "epoch": 0.08528854323246847, - "grad_norm": 0.49508243799209595, - "learning_rate": 9.998183347244574e-06, - "loss": 0.3786, - "step": 1305 - }, - { - "epoch": 0.08535389843801058, - "grad_norm": 0.5499253273010254, - "learning_rate": 9.998173922917096e-06, - "loss": 0.4776, - "step": 1306 - }, - { - "epoch": 0.08541925364355271, - "grad_norm": 0.5309222340583801, - "learning_rate": 9.998164474211794e-06, - "loss": 0.4511, - "step": 1307 - }, - { - "epoch": 0.08548460884909483, - "grad_norm": 0.5119657516479492, - "learning_rate": 9.998155001128713e-06, - "loss": 0.3875, - "step": 1308 - }, - { - "epoch": 0.08554996405463695, - "grad_norm": 0.5707727074623108, - "learning_rate": 9.998145503667896e-06, - "loss": 0.4268, - "step": 1309 - }, - { - "epoch": 0.08561531926017907, - "grad_norm": 0.5600884556770325, - "learning_rate": 9.998135981829393e-06, - "loss": 0.5066, - "step": 1310 - }, - { - "epoch": 0.0856806744657212, - "grad_norm": 0.5002754330635071, - "learning_rate": 9.99812643561325e-06, - "loss": 0.3898, - "step": 1311 - }, - { - "epoch": 0.08574602967126331, - "grad_norm": 0.5500643253326416, - "learning_rate": 9.998116865019513e-06, - "loss": 0.4632, - "step": 1312 - }, - { - "epoch": 0.08581138487680544, - "grad_norm": 0.4922448992729187, - "learning_rate": 9.998107270048228e-06, - "loss": 0.4229, - "step": 1313 - }, - { - "epoch": 0.08587674008234755, - "grad_norm": 0.5603806376457214, - "learning_rate": 9.998097650699441e-06, - "loss": 0.4493, - "step": 1314 - }, - { - "epoch": 0.08594209528788968, - "grad_norm": 0.604393720626831, - "learning_rate": 9.998088006973203e-06, - "loss": 0.3981, - "step": 1315 - }, - { - "epoch": 0.08600745049343181, - "grad_norm": 0.51221764087677, - "learning_rate": 9.998078338869557e-06, - "loss": 0.4158, - "step": 1316 - }, - { - "epoch": 0.08607280569897392, - "grad_norm": 0.49882960319519043, - "learning_rate": 9.998068646388551e-06, - "loss": 0.3689, - "step": 1317 - }, - { - "epoch": 0.08613816090451605, - "grad_norm": 0.48062098026275635, - "learning_rate": 9.998058929530233e-06, - "loss": 0.4114, - "step": 1318 - }, - { - "epoch": 0.08620351611005816, - "grad_norm": 0.5422084927558899, - "learning_rate": 9.998049188294649e-06, - "loss": 0.3996, - "step": 1319 - }, - { - "epoch": 0.08626887131560029, - "grad_norm": 0.46148210763931274, - "learning_rate": 9.99803942268185e-06, - "loss": 0.34, - "step": 1320 - }, - { - "epoch": 0.08633422652114241, - "grad_norm": 0.5104997754096985, - "learning_rate": 9.998029632691879e-06, - "loss": 0.4344, - "step": 1321 - }, - { - "epoch": 0.08639958172668454, - "grad_norm": 0.49040332436561584, - "learning_rate": 9.998019818324787e-06, - "loss": 0.4351, - "step": 1322 - }, - { - "epoch": 0.08646493693222665, - "grad_norm": 0.471892386674881, - "learning_rate": 9.998009979580621e-06, - "loss": 0.3806, - "step": 1323 - }, - { - "epoch": 0.08653029213776878, - "grad_norm": 0.5132998824119568, - "learning_rate": 9.998000116459429e-06, - "loss": 0.4145, - "step": 1324 - }, - { - "epoch": 0.08659564734331089, - "grad_norm": 0.7055545449256897, - "learning_rate": 9.997990228961258e-06, - "loss": 0.4662, - "step": 1325 - }, - { - "epoch": 0.08666100254885302, - "grad_norm": 0.49374476075172424, - "learning_rate": 9.997980317086157e-06, - "loss": 0.4424, - "step": 1326 - }, - { - "epoch": 0.08672635775439513, - "grad_norm": 0.4715639054775238, - "learning_rate": 9.997970380834176e-06, - "loss": 0.3937, - "step": 1327 - }, - { - "epoch": 0.08679171295993726, - "grad_norm": 0.4831608831882477, - "learning_rate": 9.997960420205361e-06, - "loss": 0.3895, - "step": 1328 - }, - { - "epoch": 0.08685706816547938, - "grad_norm": 0.5228745937347412, - "learning_rate": 9.997950435199763e-06, - "loss": 0.4129, - "step": 1329 - }, - { - "epoch": 0.0869224233710215, - "grad_norm": 0.5391446352005005, - "learning_rate": 9.997940425817427e-06, - "loss": 0.4552, - "step": 1330 - }, - { - "epoch": 0.08698777857656362, - "grad_norm": 0.4888913333415985, - "learning_rate": 9.997930392058405e-06, - "loss": 0.4266, - "step": 1331 - }, - { - "epoch": 0.08705313378210575, - "grad_norm": 0.4974735379219055, - "learning_rate": 9.997920333922745e-06, - "loss": 0.4104, - "step": 1332 - }, - { - "epoch": 0.08711848898764786, - "grad_norm": 0.5001447200775146, - "learning_rate": 9.997910251410499e-06, - "loss": 0.4628, - "step": 1333 - }, - { - "epoch": 0.08718384419318999, - "grad_norm": 0.5306075215339661, - "learning_rate": 9.99790014452171e-06, - "loss": 0.4202, - "step": 1334 - }, - { - "epoch": 0.0872491993987321, - "grad_norm": 0.4916905164718628, - "learning_rate": 9.99789001325643e-06, - "loss": 0.4299, - "step": 1335 - }, - { - "epoch": 0.08731455460427423, - "grad_norm": 0.5258871912956238, - "learning_rate": 9.99787985761471e-06, - "loss": 0.4328, - "step": 1336 - }, - { - "epoch": 0.08737990980981636, - "grad_norm": 0.4798186421394348, - "learning_rate": 9.9978696775966e-06, - "loss": 0.4385, - "step": 1337 - }, - { - "epoch": 0.08744526501535847, - "grad_norm": 0.48297154903411865, - "learning_rate": 9.997859473202146e-06, - "loss": 0.3989, - "step": 1338 - }, - { - "epoch": 0.0875106202209006, - "grad_norm": 0.501555860042572, - "learning_rate": 9.997849244431401e-06, - "loss": 0.4328, - "step": 1339 - }, - { - "epoch": 0.08757597542644271, - "grad_norm": 0.5189911723136902, - "learning_rate": 9.997838991284415e-06, - "loss": 0.4502, - "step": 1340 - }, - { - "epoch": 0.08764133063198484, - "grad_norm": 0.5261041522026062, - "learning_rate": 9.997828713761233e-06, - "loss": 0.4627, - "step": 1341 - }, - { - "epoch": 0.08770668583752696, - "grad_norm": 0.495754212141037, - "learning_rate": 9.997818411861912e-06, - "loss": 0.4418, - "step": 1342 - }, - { - "epoch": 0.08777204104306909, - "grad_norm": 0.4789591133594513, - "learning_rate": 9.997808085586499e-06, - "loss": 0.3847, - "step": 1343 - }, - { - "epoch": 0.0878373962486112, - "grad_norm": 0.48939236998558044, - "learning_rate": 9.997797734935045e-06, - "loss": 0.4056, - "step": 1344 - }, - { - "epoch": 0.08790275145415333, - "grad_norm": 0.4681905210018158, - "learning_rate": 9.997787359907598e-06, - "loss": 0.3849, - "step": 1345 - }, - { - "epoch": 0.08796810665969544, - "grad_norm": 0.5467929244041443, - "learning_rate": 9.997776960504212e-06, - "loss": 0.4675, - "step": 1346 - }, - { - "epoch": 0.08803346186523757, - "grad_norm": 0.5142377614974976, - "learning_rate": 9.997766536724936e-06, - "loss": 0.4462, - "step": 1347 - }, - { - "epoch": 0.08809881707077968, - "grad_norm": 0.5171924233436584, - "learning_rate": 9.997756088569821e-06, - "loss": 0.471, - "step": 1348 - }, - { - "epoch": 0.08816417227632181, - "grad_norm": 0.5064006447792053, - "learning_rate": 9.997745616038918e-06, - "loss": 0.4007, - "step": 1349 - }, - { - "epoch": 0.08822952748186393, - "grad_norm": 0.5326420068740845, - "learning_rate": 9.997735119132279e-06, - "loss": 0.5005, - "step": 1350 - }, - { - "epoch": 0.08829488268740605, - "grad_norm": 0.5336724519729614, - "learning_rate": 9.997724597849955e-06, - "loss": 0.4846, - "step": 1351 - }, - { - "epoch": 0.08836023789294817, - "grad_norm": 0.5144051909446716, - "learning_rate": 9.997714052191996e-06, - "loss": 0.4372, - "step": 1352 - }, - { - "epoch": 0.0884255930984903, - "grad_norm": 0.5164487957954407, - "learning_rate": 9.997703482158454e-06, - "loss": 0.4418, - "step": 1353 - }, - { - "epoch": 0.08849094830403241, - "grad_norm": 0.4768177270889282, - "learning_rate": 9.997692887749381e-06, - "loss": 0.4048, - "step": 1354 - }, - { - "epoch": 0.08855630350957454, - "grad_norm": 0.5341006517410278, - "learning_rate": 9.997682268964828e-06, - "loss": 0.4612, - "step": 1355 - }, - { - "epoch": 0.08862165871511665, - "grad_norm": 0.5181215405464172, - "learning_rate": 9.997671625804848e-06, - "loss": 0.4461, - "step": 1356 - }, - { - "epoch": 0.08868701392065878, - "grad_norm": 0.47202199697494507, - "learning_rate": 9.997660958269491e-06, - "loss": 0.3583, - "step": 1357 - }, - { - "epoch": 0.08875236912620091, - "grad_norm": 0.4560900926589966, - "learning_rate": 9.997650266358811e-06, - "loss": 0.3716, - "step": 1358 - }, - { - "epoch": 0.08881772433174302, - "grad_norm": 0.5311445593833923, - "learning_rate": 9.99763955007286e-06, - "loss": 0.4473, - "step": 1359 - }, - { - "epoch": 0.08888307953728515, - "grad_norm": 0.4800487756729126, - "learning_rate": 9.997628809411688e-06, - "loss": 0.4024, - "step": 1360 - }, - { - "epoch": 0.08894843474282726, - "grad_norm": 0.5415546894073486, - "learning_rate": 9.99761804437535e-06, - "loss": 0.4829, - "step": 1361 - }, - { - "epoch": 0.08901378994836939, - "grad_norm": 0.47608330845832825, - "learning_rate": 9.997607254963896e-06, - "loss": 0.3918, - "step": 1362 - }, - { - "epoch": 0.08907914515391151, - "grad_norm": 0.490690678358078, - "learning_rate": 9.997596441177381e-06, - "loss": 0.4053, - "step": 1363 - }, - { - "epoch": 0.08914450035945364, - "grad_norm": 0.4878924489021301, - "learning_rate": 9.997585603015858e-06, - "loss": 0.4084, - "step": 1364 - }, - { - "epoch": 0.08920985556499575, - "grad_norm": 0.5493990182876587, - "learning_rate": 9.997574740479377e-06, - "loss": 0.4568, - "step": 1365 - }, - { - "epoch": 0.08927521077053788, - "grad_norm": 0.5351925492286682, - "learning_rate": 9.997563853567994e-06, - "loss": 0.4328, - "step": 1366 - }, - { - "epoch": 0.08934056597607999, - "grad_norm": 0.4934317171573639, - "learning_rate": 9.997552942281759e-06, - "loss": 0.3862, - "step": 1367 - }, - { - "epoch": 0.08940592118162212, - "grad_norm": 0.5227459073066711, - "learning_rate": 9.997542006620728e-06, - "loss": 0.4412, - "step": 1368 - }, - { - "epoch": 0.08947127638716423, - "grad_norm": 0.5125664472579956, - "learning_rate": 9.997531046584954e-06, - "loss": 0.4295, - "step": 1369 - }, - { - "epoch": 0.08953663159270636, - "grad_norm": 0.5206894278526306, - "learning_rate": 9.99752006217449e-06, - "loss": 0.4631, - "step": 1370 - }, - { - "epoch": 0.08960198679824848, - "grad_norm": 0.5265061855316162, - "learning_rate": 9.997509053389386e-06, - "loss": 0.4301, - "step": 1371 - }, - { - "epoch": 0.0896673420037906, - "grad_norm": 0.4820745885372162, - "learning_rate": 9.997498020229703e-06, - "loss": 0.4047, - "step": 1372 - }, - { - "epoch": 0.08973269720933272, - "grad_norm": 0.4692222476005554, - "learning_rate": 9.99748696269549e-06, - "loss": 0.3754, - "step": 1373 - }, - { - "epoch": 0.08979805241487485, - "grad_norm": 0.5081862211227417, - "learning_rate": 9.9974758807868e-06, - "loss": 0.4253, - "step": 1374 - }, - { - "epoch": 0.08986340762041696, - "grad_norm": 0.5094119310379028, - "learning_rate": 9.997464774503691e-06, - "loss": 0.4216, - "step": 1375 - }, - { - "epoch": 0.08992876282595909, - "grad_norm": 0.5726935267448425, - "learning_rate": 9.997453643846213e-06, - "loss": 0.4377, - "step": 1376 - }, - { - "epoch": 0.0899941180315012, - "grad_norm": 0.5270561575889587, - "learning_rate": 9.997442488814423e-06, - "loss": 0.4702, - "step": 1377 - }, - { - "epoch": 0.09005947323704333, - "grad_norm": 0.5101881623268127, - "learning_rate": 9.997431309408376e-06, - "loss": 0.4717, - "step": 1378 - }, - { - "epoch": 0.09012482844258546, - "grad_norm": 0.5228449106216431, - "learning_rate": 9.997420105628124e-06, - "loss": 0.4525, - "step": 1379 - }, - { - "epoch": 0.09019018364812757, - "grad_norm": 0.5499043464660645, - "learning_rate": 9.997408877473724e-06, - "loss": 0.4226, - "step": 1380 - }, - { - "epoch": 0.0902555388536697, - "grad_norm": 0.5792656540870667, - "learning_rate": 9.997397624945229e-06, - "loss": 0.49, - "step": 1381 - }, - { - "epoch": 0.09032089405921181, - "grad_norm": 0.49885094165802, - "learning_rate": 9.997386348042694e-06, - "loss": 0.4134, - "step": 1382 - }, - { - "epoch": 0.09038624926475394, - "grad_norm": 0.5309830904006958, - "learning_rate": 9.997375046766175e-06, - "loss": 0.4073, - "step": 1383 - }, - { - "epoch": 0.09045160447029606, - "grad_norm": 0.4986577332019806, - "learning_rate": 9.997363721115725e-06, - "loss": 0.4107, - "step": 1384 - }, - { - "epoch": 0.09051695967583819, - "grad_norm": 0.4967551827430725, - "learning_rate": 9.997352371091403e-06, - "loss": 0.3932, - "step": 1385 - }, - { - "epoch": 0.0905823148813803, - "grad_norm": 0.4970942735671997, - "learning_rate": 9.997340996693262e-06, - "loss": 0.4114, - "step": 1386 - }, - { - "epoch": 0.09064767008692243, - "grad_norm": 0.4806903302669525, - "learning_rate": 9.997329597921356e-06, - "loss": 0.4308, - "step": 1387 - }, - { - "epoch": 0.09071302529246454, - "grad_norm": 0.47248023748397827, - "learning_rate": 9.997318174775746e-06, - "loss": 0.3902, - "step": 1388 - }, - { - "epoch": 0.09077838049800667, - "grad_norm": 0.5082437992095947, - "learning_rate": 9.997306727256481e-06, - "loss": 0.4391, - "step": 1389 - }, - { - "epoch": 0.09084373570354878, - "grad_norm": 0.4743526577949524, - "learning_rate": 9.99729525536362e-06, - "loss": 0.3933, - "step": 1390 - }, - { - "epoch": 0.09090909090909091, - "grad_norm": 0.5254323482513428, - "learning_rate": 9.997283759097219e-06, - "loss": 0.4292, - "step": 1391 - }, - { - "epoch": 0.09097444611463303, - "grad_norm": 0.5116239786148071, - "learning_rate": 9.997272238457334e-06, - "loss": 0.4309, - "step": 1392 - }, - { - "epoch": 0.09103980132017515, - "grad_norm": 0.5199630856513977, - "learning_rate": 9.997260693444023e-06, - "loss": 0.4408, - "step": 1393 - }, - { - "epoch": 0.09110515652571727, - "grad_norm": 0.533906102180481, - "learning_rate": 9.997249124057337e-06, - "loss": 0.5071, - "step": 1394 - }, - { - "epoch": 0.0911705117312594, - "grad_norm": 0.5672672986984253, - "learning_rate": 9.997237530297338e-06, - "loss": 0.4823, - "step": 1395 - }, - { - "epoch": 0.09123586693680151, - "grad_norm": 0.5255613327026367, - "learning_rate": 9.997225912164078e-06, - "loss": 0.428, - "step": 1396 - }, - { - "epoch": 0.09130122214234364, - "grad_norm": 0.536078155040741, - "learning_rate": 9.99721426965762e-06, - "loss": 0.4634, - "step": 1397 - }, - { - "epoch": 0.09136657734788575, - "grad_norm": 0.48807597160339355, - "learning_rate": 9.997202602778014e-06, - "loss": 0.3934, - "step": 1398 - }, - { - "epoch": 0.09143193255342788, - "grad_norm": 0.4788658320903778, - "learning_rate": 9.99719091152532e-06, - "loss": 0.4111, - "step": 1399 - }, - { - "epoch": 0.09149728775897001, - "grad_norm": 0.47326433658599854, - "learning_rate": 9.997179195899595e-06, - "loss": 0.3763, - "step": 1400 - }, - { - "epoch": 0.09156264296451212, - "grad_norm": 0.5255954265594482, - "learning_rate": 9.997167455900896e-06, - "loss": 0.4695, - "step": 1401 - }, - { - "epoch": 0.09162799817005425, - "grad_norm": 0.474807471036911, - "learning_rate": 9.99715569152928e-06, - "loss": 0.3514, - "step": 1402 - }, - { - "epoch": 0.09169335337559636, - "grad_norm": 0.5895363092422485, - "learning_rate": 9.997143902784805e-06, - "loss": 0.5219, - "step": 1403 - }, - { - "epoch": 0.09175870858113849, - "grad_norm": 0.4866838753223419, - "learning_rate": 9.997132089667527e-06, - "loss": 0.3969, - "step": 1404 - }, - { - "epoch": 0.09182406378668061, - "grad_norm": 0.8142039179801941, - "learning_rate": 9.997120252177507e-06, - "loss": 0.5273, - "step": 1405 - }, - { - "epoch": 0.09188941899222274, - "grad_norm": 0.49827301502227783, - "learning_rate": 9.997108390314798e-06, - "loss": 0.4436, - "step": 1406 - }, - { - "epoch": 0.09195477419776485, - "grad_norm": 0.46191340684890747, - "learning_rate": 9.99709650407946e-06, - "loss": 0.357, - "step": 1407 - }, - { - "epoch": 0.09202012940330698, - "grad_norm": 0.5786252021789551, - "learning_rate": 9.997084593471552e-06, - "loss": 0.5216, - "step": 1408 - }, - { - "epoch": 0.09208548460884909, - "grad_norm": 0.5046772360801697, - "learning_rate": 9.997072658491131e-06, - "loss": 0.4555, - "step": 1409 - }, - { - "epoch": 0.09215083981439122, - "grad_norm": 0.524642288684845, - "learning_rate": 9.997060699138255e-06, - "loss": 0.4381, - "step": 1410 - }, - { - "epoch": 0.09221619501993333, - "grad_norm": 0.4975273311138153, - "learning_rate": 9.997048715412984e-06, - "loss": 0.3946, - "step": 1411 - }, - { - "epoch": 0.09228155022547546, - "grad_norm": 0.5561621785163879, - "learning_rate": 9.997036707315375e-06, - "loss": 0.4494, - "step": 1412 - }, - { - "epoch": 0.09234690543101758, - "grad_norm": 0.48719069361686707, - "learning_rate": 9.997024674845488e-06, - "loss": 0.4129, - "step": 1413 - }, - { - "epoch": 0.0924122606365597, - "grad_norm": 0.5011395812034607, - "learning_rate": 9.997012618003379e-06, - "loss": 0.4148, - "step": 1414 - }, - { - "epoch": 0.09247761584210182, - "grad_norm": 0.5327781438827515, - "learning_rate": 9.997000536789108e-06, - "loss": 0.4556, - "step": 1415 - }, - { - "epoch": 0.09254297104764395, - "grad_norm": 0.475110799074173, - "learning_rate": 9.996988431202735e-06, - "loss": 0.3896, - "step": 1416 - }, - { - "epoch": 0.09260832625318606, - "grad_norm": 0.5412651300430298, - "learning_rate": 9.996976301244317e-06, - "loss": 0.4329, - "step": 1417 - }, - { - "epoch": 0.09267368145872819, - "grad_norm": 0.4974592924118042, - "learning_rate": 9.996964146913917e-06, - "loss": 0.3904, - "step": 1418 - }, - { - "epoch": 0.0927390366642703, - "grad_norm": 0.5360898375511169, - "learning_rate": 9.99695196821159e-06, - "loss": 0.4746, - "step": 1419 - }, - { - "epoch": 0.09280439186981243, - "grad_norm": 0.5064204335212708, - "learning_rate": 9.996939765137396e-06, - "loss": 0.3793, - "step": 1420 - }, - { - "epoch": 0.09286974707535456, - "grad_norm": 0.47146132588386536, - "learning_rate": 9.996927537691398e-06, - "loss": 0.3923, - "step": 1421 - }, - { - "epoch": 0.09293510228089667, - "grad_norm": 0.5135983824729919, - "learning_rate": 9.996915285873652e-06, - "loss": 0.4106, - "step": 1422 - }, - { - "epoch": 0.0930004574864388, - "grad_norm": 0.4887148439884186, - "learning_rate": 9.99690300968422e-06, - "loss": 0.4121, - "step": 1423 - }, - { - "epoch": 0.09306581269198091, - "grad_norm": 0.5050368905067444, - "learning_rate": 9.996890709123161e-06, - "loss": 0.459, - "step": 1424 - }, - { - "epoch": 0.09313116789752304, - "grad_norm": 0.494385689496994, - "learning_rate": 9.996878384190534e-06, - "loss": 0.3678, - "step": 1425 - }, - { - "epoch": 0.09319652310306516, - "grad_norm": 0.5028787851333618, - "learning_rate": 9.9968660348864e-06, - "loss": 0.4066, - "step": 1426 - }, - { - "epoch": 0.09326187830860729, - "grad_norm": 0.4854961633682251, - "learning_rate": 9.99685366121082e-06, - "loss": 0.4324, - "step": 1427 - }, - { - "epoch": 0.0933272335141494, - "grad_norm": 0.5298318266868591, - "learning_rate": 9.996841263163853e-06, - "loss": 0.4301, - "step": 1428 - }, - { - "epoch": 0.09339258871969153, - "grad_norm": 0.5543140769004822, - "learning_rate": 9.996828840745561e-06, - "loss": 0.5094, - "step": 1429 - }, - { - "epoch": 0.09345794392523364, - "grad_norm": 0.4847946763038635, - "learning_rate": 9.996816393956002e-06, - "loss": 0.4099, - "step": 1430 - }, - { - "epoch": 0.09352329913077577, - "grad_norm": 0.5146633386611938, - "learning_rate": 9.996803922795239e-06, - "loss": 0.4173, - "step": 1431 - }, - { - "epoch": 0.09358865433631788, - "grad_norm": 0.5044020414352417, - "learning_rate": 9.996791427263333e-06, - "loss": 0.4084, - "step": 1432 - }, - { - "epoch": 0.09365400954186001, - "grad_norm": 0.49075740575790405, - "learning_rate": 9.996778907360343e-06, - "loss": 0.3819, - "step": 1433 - }, - { - "epoch": 0.09371936474740213, - "grad_norm": 0.4910449683666229, - "learning_rate": 9.996766363086332e-06, - "loss": 0.3806, - "step": 1434 - }, - { - "epoch": 0.09378471995294425, - "grad_norm": 0.48591721057891846, - "learning_rate": 9.996753794441361e-06, - "loss": 0.4651, - "step": 1435 - }, - { - "epoch": 0.09385007515848637, - "grad_norm": 0.5201261639595032, - "learning_rate": 9.996741201425491e-06, - "loss": 0.4427, - "step": 1436 - }, - { - "epoch": 0.0939154303640285, - "grad_norm": 0.48716381192207336, - "learning_rate": 9.996728584038782e-06, - "loss": 0.3618, - "step": 1437 - }, - { - "epoch": 0.09398078556957061, - "grad_norm": 0.4862307906150818, - "learning_rate": 9.996715942281297e-06, - "loss": 0.4023, - "step": 1438 - }, - { - "epoch": 0.09404614077511274, - "grad_norm": 0.48926007747650146, - "learning_rate": 9.996703276153095e-06, - "loss": 0.3815, - "step": 1439 - }, - { - "epoch": 0.09411149598065485, - "grad_norm": 1.492929220199585, - "learning_rate": 9.996690585654243e-06, - "loss": 0.4893, - "step": 1440 - }, - { - "epoch": 0.09417685118619698, - "grad_norm": 0.5764843821525574, - "learning_rate": 9.996677870784799e-06, - "loss": 0.4688, - "step": 1441 - }, - { - "epoch": 0.09424220639173911, - "grad_norm": 0.6438168287277222, - "learning_rate": 9.996665131544828e-06, - "loss": 0.4721, - "step": 1442 - }, - { - "epoch": 0.09430756159728122, - "grad_norm": 0.5278419256210327, - "learning_rate": 9.996652367934388e-06, - "loss": 0.4687, - "step": 1443 - }, - { - "epoch": 0.09437291680282335, - "grad_norm": 0.5648612976074219, - "learning_rate": 9.996639579953542e-06, - "loss": 0.4598, - "step": 1444 - }, - { - "epoch": 0.09443827200836546, - "grad_norm": 0.5199219584465027, - "learning_rate": 9.996626767602356e-06, - "loss": 0.4317, - "step": 1445 - }, - { - "epoch": 0.09450362721390759, - "grad_norm": 0.4909183382987976, - "learning_rate": 9.99661393088089e-06, - "loss": 0.4075, - "step": 1446 - }, - { - "epoch": 0.09456898241944971, - "grad_norm": 0.5209808945655823, - "learning_rate": 9.996601069789207e-06, - "loss": 0.4602, - "step": 1447 - }, - { - "epoch": 0.09463433762499183, - "grad_norm": 0.5387190580368042, - "learning_rate": 9.996588184327369e-06, - "loss": 0.4483, - "step": 1448 - }, - { - "epoch": 0.09469969283053395, - "grad_norm": 0.5025069117546082, - "learning_rate": 9.99657527449544e-06, - "loss": 0.403, - "step": 1449 - }, - { - "epoch": 0.09476504803607608, - "grad_norm": 0.5117092132568359, - "learning_rate": 9.996562340293482e-06, - "loss": 0.4055, - "step": 1450 - }, - { - "epoch": 0.09483040324161819, - "grad_norm": 0.45879194140434265, - "learning_rate": 9.996549381721558e-06, - "loss": 0.3907, - "step": 1451 - }, - { - "epoch": 0.09489575844716032, - "grad_norm": 0.5125434994697571, - "learning_rate": 9.996536398779732e-06, - "loss": 0.4107, - "step": 1452 - }, - { - "epoch": 0.09496111365270243, - "grad_norm": 0.5189753770828247, - "learning_rate": 9.996523391468068e-06, - "loss": 0.4015, - "step": 1453 - }, - { - "epoch": 0.09502646885824456, - "grad_norm": 0.5442360043525696, - "learning_rate": 9.996510359786628e-06, - "loss": 0.4504, - "step": 1454 - }, - { - "epoch": 0.09509182406378668, - "grad_norm": 0.500811755657196, - "learning_rate": 9.996497303735474e-06, - "loss": 0.4213, - "step": 1455 - }, - { - "epoch": 0.0951571792693288, - "grad_norm": 0.5382258296012878, - "learning_rate": 9.996484223314676e-06, - "loss": 0.4811, - "step": 1456 - }, - { - "epoch": 0.09522253447487092, - "grad_norm": 0.48647505044937134, - "learning_rate": 9.996471118524291e-06, - "loss": 0.377, - "step": 1457 - }, - { - "epoch": 0.09528788968041305, - "grad_norm": 0.5107442140579224, - "learning_rate": 9.996457989364385e-06, - "loss": 0.4103, - "step": 1458 - }, - { - "epoch": 0.09535324488595516, - "grad_norm": 0.4937625229358673, - "learning_rate": 9.996444835835023e-06, - "loss": 0.3966, - "step": 1459 - }, - { - "epoch": 0.09541860009149729, - "grad_norm": 0.5010054111480713, - "learning_rate": 9.996431657936267e-06, - "loss": 0.3988, - "step": 1460 - }, - { - "epoch": 0.0954839552970394, - "grad_norm": 0.5201773643493652, - "learning_rate": 9.996418455668185e-06, - "loss": 0.4288, - "step": 1461 - }, - { - "epoch": 0.09554931050258153, - "grad_norm": 0.5089337229728699, - "learning_rate": 9.99640522903084e-06, - "loss": 0.3942, - "step": 1462 - }, - { - "epoch": 0.09561466570812366, - "grad_norm": 0.5113603472709656, - "learning_rate": 9.996391978024294e-06, - "loss": 0.4371, - "step": 1463 - }, - { - "epoch": 0.09568002091366577, - "grad_norm": 0.4873717129230499, - "learning_rate": 9.996378702648612e-06, - "loss": 0.4065, - "step": 1464 - }, - { - "epoch": 0.0957453761192079, - "grad_norm": 0.4917776584625244, - "learning_rate": 9.996365402903863e-06, - "loss": 0.4379, - "step": 1465 - }, - { - "epoch": 0.09581073132475001, - "grad_norm": 0.5205647945404053, - "learning_rate": 9.996352078790109e-06, - "loss": 0.4396, - "step": 1466 - }, - { - "epoch": 0.09587608653029214, - "grad_norm": 0.496063232421875, - "learning_rate": 9.996338730307413e-06, - "loss": 0.3942, - "step": 1467 - }, - { - "epoch": 0.09594144173583426, - "grad_norm": 0.5094549059867859, - "learning_rate": 9.996325357455843e-06, - "loss": 0.3956, - "step": 1468 - }, - { - "epoch": 0.09600679694137638, - "grad_norm": 0.5358335375785828, - "learning_rate": 9.996311960235463e-06, - "loss": 0.4488, - "step": 1469 - }, - { - "epoch": 0.0960721521469185, - "grad_norm": 0.5215104222297668, - "learning_rate": 9.99629853864634e-06, - "loss": 0.4823, - "step": 1470 - }, - { - "epoch": 0.09613750735246063, - "grad_norm": 0.5060777068138123, - "learning_rate": 9.996285092688537e-06, - "loss": 0.4378, - "step": 1471 - }, - { - "epoch": 0.09620286255800274, - "grad_norm": 0.5364307165145874, - "learning_rate": 9.99627162236212e-06, - "loss": 0.4764, - "step": 1472 - }, - { - "epoch": 0.09626821776354487, - "grad_norm": 0.5011062622070312, - "learning_rate": 9.996258127667158e-06, - "loss": 0.4395, - "step": 1473 - }, - { - "epoch": 0.09633357296908698, - "grad_norm": 0.5110766887664795, - "learning_rate": 9.996244608603711e-06, - "loss": 0.3971, - "step": 1474 - }, - { - "epoch": 0.09639892817462911, - "grad_norm": 0.5151036977767944, - "learning_rate": 9.99623106517185e-06, - "loss": 0.4513, - "step": 1475 - }, - { - "epoch": 0.09646428338017123, - "grad_norm": 0.5112361907958984, - "learning_rate": 9.996217497371639e-06, - "loss": 0.446, - "step": 1476 - }, - { - "epoch": 0.09652963858571335, - "grad_norm": 0.4761430323123932, - "learning_rate": 9.996203905203146e-06, - "loss": 0.3894, - "step": 1477 - }, - { - "epoch": 0.09659499379125547, - "grad_norm": 0.5030077695846558, - "learning_rate": 9.996190288666433e-06, - "loss": 0.4398, - "step": 1478 - }, - { - "epoch": 0.0966603489967976, - "grad_norm": 0.507631778717041, - "learning_rate": 9.99617664776157e-06, - "loss": 0.4237, - "step": 1479 - }, - { - "epoch": 0.09672570420233971, - "grad_norm": 0.518007755279541, - "learning_rate": 9.996162982488624e-06, - "loss": 0.4235, - "step": 1480 - }, - { - "epoch": 0.09679105940788184, - "grad_norm": 0.5226354002952576, - "learning_rate": 9.996149292847658e-06, - "loss": 0.466, - "step": 1481 - }, - { - "epoch": 0.09685641461342395, - "grad_norm": 0.5401211977005005, - "learning_rate": 9.996135578838743e-06, - "loss": 0.4778, - "step": 1482 - }, - { - "epoch": 0.09692176981896608, - "grad_norm": 0.5578910112380981, - "learning_rate": 9.996121840461943e-06, - "loss": 0.4761, - "step": 1483 - }, - { - "epoch": 0.09698712502450821, - "grad_norm": 0.5571273565292358, - "learning_rate": 9.996108077717325e-06, - "loss": 0.4708, - "step": 1484 - }, - { - "epoch": 0.09705248023005032, - "grad_norm": 0.5445072650909424, - "learning_rate": 9.99609429060496e-06, - "loss": 0.4494, - "step": 1485 - }, - { - "epoch": 0.09711783543559245, - "grad_norm": 0.5183945298194885, - "learning_rate": 9.99608047912491e-06, - "loss": 0.4311, - "step": 1486 - }, - { - "epoch": 0.09718319064113456, - "grad_norm": 0.5346569418907166, - "learning_rate": 9.996066643277245e-06, - "loss": 0.3996, - "step": 1487 - }, - { - "epoch": 0.09724854584667669, - "grad_norm": 0.5302724838256836, - "learning_rate": 9.996052783062034e-06, - "loss": 0.4387, - "step": 1488 - }, - { - "epoch": 0.0973139010522188, - "grad_norm": 0.489535927772522, - "learning_rate": 9.99603889847934e-06, - "loss": 0.3866, - "step": 1489 - }, - { - "epoch": 0.09737925625776093, - "grad_norm": 0.5626073479652405, - "learning_rate": 9.996024989529235e-06, - "loss": 0.4795, - "step": 1490 - }, - { - "epoch": 0.09744461146330305, - "grad_norm": 0.5010583400726318, - "learning_rate": 9.996011056211785e-06, - "loss": 0.4482, - "step": 1491 - }, - { - "epoch": 0.09750996666884518, - "grad_norm": 0.49008432030677795, - "learning_rate": 9.995997098527058e-06, - "loss": 0.416, - "step": 1492 - }, - { - "epoch": 0.09757532187438729, - "grad_norm": 0.5158562660217285, - "learning_rate": 9.995983116475123e-06, - "loss": 0.4335, - "step": 1493 - }, - { - "epoch": 0.09764067707992942, - "grad_norm": 0.5218483209609985, - "learning_rate": 9.995969110056047e-06, - "loss": 0.4127, - "step": 1494 - }, - { - "epoch": 0.09770603228547153, - "grad_norm": 0.5236784815788269, - "learning_rate": 9.9959550792699e-06, - "loss": 0.4047, - "step": 1495 - }, - { - "epoch": 0.09777138749101366, - "grad_norm": 0.5455120801925659, - "learning_rate": 9.995941024116748e-06, - "loss": 0.451, - "step": 1496 - }, - { - "epoch": 0.09783674269655578, - "grad_norm": 0.5234445929527283, - "learning_rate": 9.995926944596662e-06, - "loss": 0.4518, - "step": 1497 - }, - { - "epoch": 0.0979020979020979, - "grad_norm": 0.5312226414680481, - "learning_rate": 9.995912840709709e-06, - "loss": 0.4351, - "step": 1498 - }, - { - "epoch": 0.09796745310764002, - "grad_norm": 0.5053860545158386, - "learning_rate": 9.995898712455958e-06, - "loss": 0.4531, - "step": 1499 - }, - { - "epoch": 0.09803280831318215, - "grad_norm": 0.5232927203178406, - "learning_rate": 9.995884559835478e-06, - "loss": 0.4266, - "step": 1500 - }, - { - "epoch": 0.09809816351872426, - "grad_norm": 0.4847826063632965, - "learning_rate": 9.995870382848338e-06, - "loss": 0.419, - "step": 1501 - }, - { - "epoch": 0.09816351872426639, - "grad_norm": 0.5197350978851318, - "learning_rate": 9.99585618149461e-06, - "loss": 0.4592, - "step": 1502 - }, - { - "epoch": 0.09822887392980852, - "grad_norm": 0.5199298858642578, - "learning_rate": 9.995841955774358e-06, - "loss": 0.4474, - "step": 1503 - }, - { - "epoch": 0.09829422913535063, - "grad_norm": 0.5503425598144531, - "learning_rate": 9.995827705687655e-06, - "loss": 0.5241, - "step": 1504 - }, - { - "epoch": 0.09835958434089276, - "grad_norm": 0.48477134108543396, - "learning_rate": 9.995813431234569e-06, - "loss": 0.3931, - "step": 1505 - }, - { - "epoch": 0.09842493954643487, - "grad_norm": 0.516323983669281, - "learning_rate": 9.995799132415172e-06, - "loss": 0.4527, - "step": 1506 - }, - { - "epoch": 0.098490294751977, - "grad_norm": 0.46379178762435913, - "learning_rate": 9.99578480922953e-06, - "loss": 0.3489, - "step": 1507 - }, - { - "epoch": 0.09855564995751911, - "grad_norm": 0.4947770833969116, - "learning_rate": 9.995770461677715e-06, - "loss": 0.4138, - "step": 1508 - }, - { - "epoch": 0.09862100516306124, - "grad_norm": 0.5108930468559265, - "learning_rate": 9.995756089759797e-06, - "loss": 0.4641, - "step": 1509 - }, - { - "epoch": 0.09868636036860336, - "grad_norm": 0.469390332698822, - "learning_rate": 9.995741693475846e-06, - "loss": 0.3713, - "step": 1510 - }, - { - "epoch": 0.09875171557414548, - "grad_norm": 0.5141362547874451, - "learning_rate": 9.995727272825931e-06, - "loss": 0.4445, - "step": 1511 - }, - { - "epoch": 0.0988170707796876, - "grad_norm": 1.0825378894805908, - "learning_rate": 9.995712827810125e-06, - "loss": 0.4494, - "step": 1512 - }, - { - "epoch": 0.09888242598522973, - "grad_norm": 0.4801233410835266, - "learning_rate": 9.995698358428496e-06, - "loss": 0.3884, - "step": 1513 - }, - { - "epoch": 0.09894778119077184, - "grad_norm": 0.5004074573516846, - "learning_rate": 9.995683864681116e-06, - "loss": 0.4609, - "step": 1514 - }, - { - "epoch": 0.09901313639631397, - "grad_norm": 0.5093396306037903, - "learning_rate": 9.995669346568055e-06, - "loss": 0.4095, - "step": 1515 - }, - { - "epoch": 0.09907849160185608, - "grad_norm": 0.5307527780532837, - "learning_rate": 9.995654804089384e-06, - "loss": 0.4027, - "step": 1516 - }, - { - "epoch": 0.09914384680739821, - "grad_norm": 0.5089740753173828, - "learning_rate": 9.995640237245173e-06, - "loss": 0.4296, - "step": 1517 - }, - { - "epoch": 0.09920920201294033, - "grad_norm": 0.48025357723236084, - "learning_rate": 9.995625646035495e-06, - "loss": 0.3968, - "step": 1518 - }, - { - "epoch": 0.09927455721848245, - "grad_norm": 0.4784804582595825, - "learning_rate": 9.99561103046042e-06, - "loss": 0.3747, - "step": 1519 - }, - { - "epoch": 0.09933991242402457, - "grad_norm": 0.5449445247650146, - "learning_rate": 9.995596390520018e-06, - "loss": 0.4331, - "step": 1520 - }, - { - "epoch": 0.0994052676295667, - "grad_norm": 0.488118439912796, - "learning_rate": 9.995581726214362e-06, - "loss": 0.4057, - "step": 1521 - }, - { - "epoch": 0.09947062283510881, - "grad_norm": 0.5430318117141724, - "learning_rate": 9.995567037543525e-06, - "loss": 0.4705, - "step": 1522 - }, - { - "epoch": 0.09953597804065094, - "grad_norm": 0.6241530179977417, - "learning_rate": 9.995552324507575e-06, - "loss": 0.5303, - "step": 1523 - }, - { - "epoch": 0.09960133324619307, - "grad_norm": 0.4596569240093231, - "learning_rate": 9.995537587106587e-06, - "loss": 0.3809, - "step": 1524 - }, - { - "epoch": 0.09966668845173518, - "grad_norm": 0.5403998494148254, - "learning_rate": 9.995522825340631e-06, - "loss": 0.4972, - "step": 1525 - }, - { - "epoch": 0.09973204365727731, - "grad_norm": 0.5301600098609924, - "learning_rate": 9.99550803920978e-06, - "loss": 0.3813, - "step": 1526 - }, - { - "epoch": 0.09979739886281942, - "grad_norm": 0.5359787344932556, - "learning_rate": 9.995493228714105e-06, - "loss": 0.4376, - "step": 1527 - }, - { - "epoch": 0.09986275406836155, - "grad_norm": 0.5042316317558289, - "learning_rate": 9.995478393853679e-06, - "loss": 0.433, - "step": 1528 - }, - { - "epoch": 0.09992810927390366, - "grad_norm": 0.5169788599014282, - "learning_rate": 9.995463534628575e-06, - "loss": 0.4544, - "step": 1529 - }, - { - "epoch": 0.09999346447944579, - "grad_norm": 0.497503399848938, - "learning_rate": 9.995448651038865e-06, - "loss": 0.4213, - "step": 1530 - }, - { - "epoch": 0.1000588196849879, - "grad_norm": 0.47543859481811523, - "learning_rate": 9.99543374308462e-06, - "loss": 0.3667, - "step": 1531 - }, - { - "epoch": 0.10012417489053003, - "grad_norm": 0.5248241424560547, - "learning_rate": 9.995418810765915e-06, - "loss": 0.3839, - "step": 1532 - }, - { - "epoch": 0.10018953009607215, - "grad_norm": 0.5335806012153625, - "learning_rate": 9.995403854082822e-06, - "loss": 0.4861, - "step": 1533 - }, - { - "epoch": 0.10025488530161428, - "grad_norm": 0.5063391923904419, - "learning_rate": 9.995388873035415e-06, - "loss": 0.472, - "step": 1534 - }, - { - "epoch": 0.10032024050715639, - "grad_norm": 0.46077948808670044, - "learning_rate": 9.995373867623763e-06, - "loss": 0.3847, - "step": 1535 - }, - { - "epoch": 0.10038559571269852, - "grad_norm": 0.48244237899780273, - "learning_rate": 9.995358837847943e-06, - "loss": 0.4041, - "step": 1536 - }, - { - "epoch": 0.10045095091824063, - "grad_norm": 0.5086256861686707, - "learning_rate": 9.995343783708029e-06, - "loss": 0.4282, - "step": 1537 - }, - { - "epoch": 0.10051630612378276, - "grad_norm": 0.5021991729736328, - "learning_rate": 9.995328705204091e-06, - "loss": 0.3936, - "step": 1538 - }, - { - "epoch": 0.10058166132932488, - "grad_norm": 0.5289314389228821, - "learning_rate": 9.995313602336206e-06, - "loss": 0.4739, - "step": 1539 - }, - { - "epoch": 0.100647016534867, - "grad_norm": 0.5517817139625549, - "learning_rate": 9.995298475104443e-06, - "loss": 0.4827, - "step": 1540 - }, - { - "epoch": 0.10071237174040912, - "grad_norm": 0.5402710437774658, - "learning_rate": 9.995283323508882e-06, - "loss": 0.444, - "step": 1541 - }, - { - "epoch": 0.10077772694595125, - "grad_norm": 0.4955950081348419, - "learning_rate": 9.995268147549594e-06, - "loss": 0.4211, - "step": 1542 - }, - { - "epoch": 0.10084308215149336, - "grad_norm": 0.46556901931762695, - "learning_rate": 9.99525294722665e-06, - "loss": 0.3613, - "step": 1543 - }, - { - "epoch": 0.10090843735703549, - "grad_norm": 0.5250805020332336, - "learning_rate": 9.995237722540127e-06, - "loss": 0.4193, - "step": 1544 - }, - { - "epoch": 0.10097379256257762, - "grad_norm": 0.574885904788971, - "learning_rate": 9.9952224734901e-06, - "loss": 0.4805, - "step": 1545 - }, - { - "epoch": 0.10103914776811973, - "grad_norm": 0.5209019780158997, - "learning_rate": 9.995207200076642e-06, - "loss": 0.4788, - "step": 1546 - }, - { - "epoch": 0.10110450297366186, - "grad_norm": 0.5465680956840515, - "learning_rate": 9.99519190229983e-06, - "loss": 0.5027, - "step": 1547 - }, - { - "epoch": 0.10116985817920397, - "grad_norm": 0.5158794522285461, - "learning_rate": 9.995176580159735e-06, - "loss": 0.4429, - "step": 1548 - }, - { - "epoch": 0.1012352133847461, - "grad_norm": 0.5002278089523315, - "learning_rate": 9.995161233656434e-06, - "loss": 0.417, - "step": 1549 - }, - { - "epoch": 0.10130056859028821, - "grad_norm": 0.4885588586330414, - "learning_rate": 9.995145862790001e-06, - "loss": 0.4068, - "step": 1550 - }, - { - "epoch": 0.10136592379583034, - "grad_norm": 0.5092244148254395, - "learning_rate": 9.99513046756051e-06, - "loss": 0.429, - "step": 1551 - }, - { - "epoch": 0.10143127900137246, - "grad_norm": 0.49464020133018494, - "learning_rate": 9.995115047968038e-06, - "loss": 0.4077, - "step": 1552 - }, - { - "epoch": 0.10149663420691458, - "grad_norm": 0.488465279340744, - "learning_rate": 9.99509960401266e-06, - "loss": 0.389, - "step": 1553 - }, - { - "epoch": 0.1015619894124567, - "grad_norm": 0.514087975025177, - "learning_rate": 9.995084135694451e-06, - "loss": 0.4167, - "step": 1554 - }, - { - "epoch": 0.10162734461799883, - "grad_norm": 0.47458693385124207, - "learning_rate": 9.995068643013486e-06, - "loss": 0.4161, - "step": 1555 - }, - { - "epoch": 0.10169269982354094, - "grad_norm": 0.4821431636810303, - "learning_rate": 9.995053125969839e-06, - "loss": 0.3993, - "step": 1556 - }, - { - "epoch": 0.10175805502908307, - "grad_norm": 0.5431994795799255, - "learning_rate": 9.99503758456359e-06, - "loss": 0.4565, - "step": 1557 - }, - { - "epoch": 0.10182341023462518, - "grad_norm": 0.5592271685600281, - "learning_rate": 9.99502201879481e-06, - "loss": 0.5253, - "step": 1558 - }, - { - "epoch": 0.10188876544016731, - "grad_norm": 0.48341649770736694, - "learning_rate": 9.99500642866358e-06, - "loss": 0.3993, - "step": 1559 - }, - { - "epoch": 0.10195412064570943, - "grad_norm": 0.5405024886131287, - "learning_rate": 9.994990814169969e-06, - "loss": 0.4567, - "step": 1560 - }, - { - "epoch": 0.10201947585125155, - "grad_norm": 0.49808263778686523, - "learning_rate": 9.994975175314061e-06, - "loss": 0.4328, - "step": 1561 - }, - { - "epoch": 0.10208483105679367, - "grad_norm": 0.5058544278144836, - "learning_rate": 9.994959512095928e-06, - "loss": 0.4123, - "step": 1562 - }, - { - "epoch": 0.1021501862623358, - "grad_norm": 0.5445604920387268, - "learning_rate": 9.994943824515644e-06, - "loss": 0.4722, - "step": 1563 - }, - { - "epoch": 0.10221554146787791, - "grad_norm": 0.5007298588752747, - "learning_rate": 9.994928112573292e-06, - "loss": 0.4426, - "step": 1564 - }, - { - "epoch": 0.10228089667342004, - "grad_norm": 0.5044405460357666, - "learning_rate": 9.994912376268943e-06, - "loss": 0.442, - "step": 1565 - }, - { - "epoch": 0.10234625187896217, - "grad_norm": 0.4776204526424408, - "learning_rate": 9.994896615602679e-06, - "loss": 0.3454, - "step": 1566 - }, - { - "epoch": 0.10241160708450428, - "grad_norm": 0.4716126620769501, - "learning_rate": 9.994880830574572e-06, - "loss": 0.3728, - "step": 1567 - }, - { - "epoch": 0.10247696229004641, - "grad_norm": 0.5031254291534424, - "learning_rate": 9.994865021184701e-06, - "loss": 0.4132, - "step": 1568 - }, - { - "epoch": 0.10254231749558852, - "grad_norm": 0.4964865446090698, - "learning_rate": 9.994849187433142e-06, - "loss": 0.4052, - "step": 1569 - }, - { - "epoch": 0.10260767270113065, - "grad_norm": 0.5192766785621643, - "learning_rate": 9.994833329319972e-06, - "loss": 0.4655, - "step": 1570 - }, - { - "epoch": 0.10267302790667276, - "grad_norm": 0.5177010297775269, - "learning_rate": 9.994817446845273e-06, - "loss": 0.4839, - "step": 1571 - }, - { - "epoch": 0.10273838311221489, - "grad_norm": 0.5169263482093811, - "learning_rate": 9.994801540009117e-06, - "loss": 0.4489, - "step": 1572 - }, - { - "epoch": 0.102803738317757, - "grad_norm": 0.5630887746810913, - "learning_rate": 9.994785608811584e-06, - "loss": 0.4681, - "step": 1573 - }, - { - "epoch": 0.10286909352329913, - "grad_norm": 0.4649583399295807, - "learning_rate": 9.99476965325275e-06, - "loss": 0.3973, - "step": 1574 - }, - { - "epoch": 0.10293444872884125, - "grad_norm": 0.5167850852012634, - "learning_rate": 9.994753673332696e-06, - "loss": 0.4593, - "step": 1575 - }, - { - "epoch": 0.10299980393438338, - "grad_norm": 0.45438265800476074, - "learning_rate": 9.994737669051497e-06, - "loss": 0.3794, - "step": 1576 - }, - { - "epoch": 0.10306515913992549, - "grad_norm": 0.534008800983429, - "learning_rate": 9.994721640409231e-06, - "loss": 0.4541, - "step": 1577 - }, - { - "epoch": 0.10313051434546762, - "grad_norm": 0.5064383745193481, - "learning_rate": 9.99470558740598e-06, - "loss": 0.4248, - "step": 1578 - }, - { - "epoch": 0.10319586955100973, - "grad_norm": 0.46664220094680786, - "learning_rate": 9.994689510041817e-06, - "loss": 0.3905, - "step": 1579 - }, - { - "epoch": 0.10326122475655186, - "grad_norm": 0.5122666358947754, - "learning_rate": 9.994673408316824e-06, - "loss": 0.4329, - "step": 1580 - }, - { - "epoch": 0.10332657996209398, - "grad_norm": 0.5435193777084351, - "learning_rate": 9.994657282231077e-06, - "loss": 0.4561, - "step": 1581 - }, - { - "epoch": 0.1033919351676361, - "grad_norm": 0.4839650094509125, - "learning_rate": 9.994641131784657e-06, - "loss": 0.4042, - "step": 1582 - }, - { - "epoch": 0.10345729037317822, - "grad_norm": 0.5004949569702148, - "learning_rate": 9.994624956977644e-06, - "loss": 0.3976, - "step": 1583 - }, - { - "epoch": 0.10352264557872035, - "grad_norm": 0.5046452283859253, - "learning_rate": 9.994608757810111e-06, - "loss": 0.4224, - "step": 1584 - }, - { - "epoch": 0.10358800078426246, - "grad_norm": 0.45456764101982117, - "learning_rate": 9.994592534282144e-06, - "loss": 0.3623, - "step": 1585 - }, - { - "epoch": 0.10365335598980459, - "grad_norm": 0.5273374319076538, - "learning_rate": 9.994576286393817e-06, - "loss": 0.4477, - "step": 1586 - }, - { - "epoch": 0.10371871119534672, - "grad_norm": 0.5582617521286011, - "learning_rate": 9.994560014145211e-06, - "loss": 0.5149, - "step": 1587 - }, - { - "epoch": 0.10378406640088883, - "grad_norm": 0.4669838547706604, - "learning_rate": 9.994543717536407e-06, - "loss": 0.3804, - "step": 1588 - }, - { - "epoch": 0.10384942160643096, - "grad_norm": 0.5422541499137878, - "learning_rate": 9.994527396567483e-06, - "loss": 0.4509, - "step": 1589 - }, - { - "epoch": 0.10391477681197307, - "grad_norm": 0.5106235146522522, - "learning_rate": 9.994511051238518e-06, - "loss": 0.4067, - "step": 1590 - }, - { - "epoch": 0.1039801320175152, - "grad_norm": 0.5292513966560364, - "learning_rate": 9.994494681549592e-06, - "loss": 0.4103, - "step": 1591 - }, - { - "epoch": 0.10404548722305731, - "grad_norm": 0.5347157716751099, - "learning_rate": 9.994478287500786e-06, - "loss": 0.44, - "step": 1592 - }, - { - "epoch": 0.10411084242859944, - "grad_norm": 0.5292321443557739, - "learning_rate": 9.994461869092178e-06, - "loss": 0.4746, - "step": 1593 - }, - { - "epoch": 0.10417619763414156, - "grad_norm": 0.5306785106658936, - "learning_rate": 9.99444542632385e-06, - "loss": 0.4639, - "step": 1594 - }, - { - "epoch": 0.10424155283968368, - "grad_norm": 0.5565119385719299, - "learning_rate": 9.994428959195882e-06, - "loss": 0.4482, - "step": 1595 - }, - { - "epoch": 0.1043069080452258, - "grad_norm": 0.4725090265274048, - "learning_rate": 9.994412467708352e-06, - "loss": 0.3954, - "step": 1596 - }, - { - "epoch": 0.10437226325076793, - "grad_norm": 0.5077757835388184, - "learning_rate": 9.994395951861343e-06, - "loss": 0.443, - "step": 1597 - }, - { - "epoch": 0.10443761845631004, - "grad_norm": 0.6092401742935181, - "learning_rate": 9.994379411654936e-06, - "loss": 0.4611, - "step": 1598 - }, - { - "epoch": 0.10450297366185217, - "grad_norm": 0.5067799091339111, - "learning_rate": 9.994362847089207e-06, - "loss": 0.4469, - "step": 1599 - }, - { - "epoch": 0.10456832886739428, - "grad_norm": 0.5002574324607849, - "learning_rate": 9.994346258164244e-06, - "loss": 0.4265, - "step": 1600 - }, - { - "epoch": 0.10463368407293641, - "grad_norm": 0.5395341515541077, - "learning_rate": 9.994329644880121e-06, - "loss": 0.4755, - "step": 1601 - }, - { - "epoch": 0.10469903927847853, - "grad_norm": 0.47599154710769653, - "learning_rate": 9.994313007236925e-06, - "loss": 0.376, - "step": 1602 - }, - { - "epoch": 0.10476439448402065, - "grad_norm": 0.5089483857154846, - "learning_rate": 9.994296345234733e-06, - "loss": 0.4201, - "step": 1603 - }, - { - "epoch": 0.10482974968956277, - "grad_norm": 0.5276210904121399, - "learning_rate": 9.994279658873627e-06, - "loss": 0.4472, - "step": 1604 - }, - { - "epoch": 0.1048951048951049, - "grad_norm": 0.4988803565502167, - "learning_rate": 9.99426294815369e-06, - "loss": 0.4022, - "step": 1605 - }, - { - "epoch": 0.10496046010064701, - "grad_norm": 0.5025919079780579, - "learning_rate": 9.994246213075001e-06, - "loss": 0.4693, - "step": 1606 - }, - { - "epoch": 0.10502581530618914, - "grad_norm": 0.5430751442909241, - "learning_rate": 9.994229453637644e-06, - "loss": 0.4879, - "step": 1607 - }, - { - "epoch": 0.10509117051173127, - "grad_norm": 0.4761691689491272, - "learning_rate": 9.9942126698417e-06, - "loss": 0.3696, - "step": 1608 - }, - { - "epoch": 0.10515652571727338, - "grad_norm": 0.4970346689224243, - "learning_rate": 9.99419586168725e-06, - "loss": 0.4098, - "step": 1609 - }, - { - "epoch": 0.10522188092281551, - "grad_norm": 0.5387327075004578, - "learning_rate": 9.994179029174377e-06, - "loss": 0.3672, - "step": 1610 - }, - { - "epoch": 0.10528723612835762, - "grad_norm": 0.4932538568973541, - "learning_rate": 9.994162172303162e-06, - "loss": 0.3763, - "step": 1611 - }, - { - "epoch": 0.10535259133389975, - "grad_norm": 0.5018151998519897, - "learning_rate": 9.994145291073688e-06, - "loss": 0.477, - "step": 1612 - }, - { - "epoch": 0.10541794653944186, - "grad_norm": 0.5169281363487244, - "learning_rate": 9.994128385486039e-06, - "loss": 0.4181, - "step": 1613 - }, - { - "epoch": 0.10548330174498399, - "grad_norm": 0.5027089715003967, - "learning_rate": 9.994111455540294e-06, - "loss": 0.4196, - "step": 1614 - }, - { - "epoch": 0.1055486569505261, - "grad_norm": 0.5028634071350098, - "learning_rate": 9.994094501236537e-06, - "loss": 0.4098, - "step": 1615 - }, - { - "epoch": 0.10561401215606823, - "grad_norm": 0.5034271478652954, - "learning_rate": 9.994077522574853e-06, - "loss": 0.4586, - "step": 1616 - }, - { - "epoch": 0.10567936736161035, - "grad_norm": 0.5230409502983093, - "learning_rate": 9.994060519555323e-06, - "loss": 0.4776, - "step": 1617 - }, - { - "epoch": 0.10574472256715248, - "grad_norm": 0.5798277854919434, - "learning_rate": 9.994043492178027e-06, - "loss": 0.493, - "step": 1618 - }, - { - "epoch": 0.10581007777269459, - "grad_norm": 0.4700954556465149, - "learning_rate": 9.994026440443052e-06, - "loss": 0.3865, - "step": 1619 - }, - { - "epoch": 0.10587543297823672, - "grad_norm": 0.5281357765197754, - "learning_rate": 9.994009364350481e-06, - "loss": 0.4346, - "step": 1620 - }, - { - "epoch": 0.10594078818377883, - "grad_norm": 0.5067729353904724, - "learning_rate": 9.993992263900395e-06, - "loss": 0.4011, - "step": 1621 - }, - { - "epoch": 0.10600614338932096, - "grad_norm": 0.4662727117538452, - "learning_rate": 9.993975139092879e-06, - "loss": 0.3957, - "step": 1622 - }, - { - "epoch": 0.10607149859486308, - "grad_norm": 0.48641085624694824, - "learning_rate": 9.993957989928016e-06, - "loss": 0.4027, - "step": 1623 - }, - { - "epoch": 0.1061368538004052, - "grad_norm": 0.5185033679008484, - "learning_rate": 9.99394081640589e-06, - "loss": 0.4105, - "step": 1624 - }, - { - "epoch": 0.10620220900594732, - "grad_norm": 0.5151177048683167, - "learning_rate": 9.993923618526584e-06, - "loss": 0.4104, - "step": 1625 - }, - { - "epoch": 0.10626756421148945, - "grad_norm": 0.5204619765281677, - "learning_rate": 9.993906396290184e-06, - "loss": 0.43, - "step": 1626 - }, - { - "epoch": 0.10633291941703156, - "grad_norm": 0.5157848596572876, - "learning_rate": 9.993889149696772e-06, - "loss": 0.4142, - "step": 1627 - }, - { - "epoch": 0.10639827462257369, - "grad_norm": 0.5414742231369019, - "learning_rate": 9.993871878746432e-06, - "loss": 0.4357, - "step": 1628 - }, - { - "epoch": 0.10646362982811582, - "grad_norm": 0.5015614628791809, - "learning_rate": 9.993854583439249e-06, - "loss": 0.4337, - "step": 1629 - }, - { - "epoch": 0.10652898503365793, - "grad_norm": 0.5032528638839722, - "learning_rate": 9.993837263775306e-06, - "loss": 0.4211, - "step": 1630 - }, - { - "epoch": 0.10659434023920006, - "grad_norm": 0.5219725370407104, - "learning_rate": 9.993819919754691e-06, - "loss": 0.4959, - "step": 1631 - }, - { - "epoch": 0.10665969544474217, - "grad_norm": 0.4600250720977783, - "learning_rate": 9.993802551377484e-06, - "loss": 0.3833, - "step": 1632 - }, - { - "epoch": 0.1067250506502843, - "grad_norm": 0.4900052547454834, - "learning_rate": 9.993785158643774e-06, - "loss": 0.4029, - "step": 1633 - }, - { - "epoch": 0.10679040585582641, - "grad_norm": 0.48988422751426697, - "learning_rate": 9.99376774155364e-06, - "loss": 0.4248, - "step": 1634 - }, - { - "epoch": 0.10685576106136854, - "grad_norm": 0.4932857155799866, - "learning_rate": 9.993750300107174e-06, - "loss": 0.396, - "step": 1635 - }, - { - "epoch": 0.10692111626691066, - "grad_norm": 0.5180819630622864, - "learning_rate": 9.993732834304458e-06, - "loss": 0.4724, - "step": 1636 - }, - { - "epoch": 0.10698647147245278, - "grad_norm": 0.5097213983535767, - "learning_rate": 9.993715344145576e-06, - "loss": 0.4581, - "step": 1637 - }, - { - "epoch": 0.1070518266779949, - "grad_norm": 0.48215875029563904, - "learning_rate": 9.993697829630614e-06, - "loss": 0.4307, - "step": 1638 - }, - { - "epoch": 0.10711718188353703, - "grad_norm": 0.5152288675308228, - "learning_rate": 9.993680290759657e-06, - "loss": 0.4411, - "step": 1639 - }, - { - "epoch": 0.10718253708907914, - "grad_norm": 0.5181601643562317, - "learning_rate": 9.993662727532794e-06, - "loss": 0.4685, - "step": 1640 - }, - { - "epoch": 0.10724789229462127, - "grad_norm": 0.5137525200843811, - "learning_rate": 9.993645139950106e-06, - "loss": 0.4556, - "step": 1641 - }, - { - "epoch": 0.10731324750016338, - "grad_norm": 0.5311994552612305, - "learning_rate": 9.99362752801168e-06, - "loss": 0.4174, - "step": 1642 - }, - { - "epoch": 0.10737860270570551, - "grad_norm": 0.5230871438980103, - "learning_rate": 9.993609891717605e-06, - "loss": 0.4633, - "step": 1643 - }, - { - "epoch": 0.10744395791124763, - "grad_norm": 0.48344889283180237, - "learning_rate": 9.993592231067962e-06, - "loss": 0.3779, - "step": 1644 - }, - { - "epoch": 0.10750931311678975, - "grad_norm": 0.5213485360145569, - "learning_rate": 9.993574546062841e-06, - "loss": 0.4961, - "step": 1645 - }, - { - "epoch": 0.10757466832233187, - "grad_norm": 0.5260332822799683, - "learning_rate": 9.993556836702327e-06, - "loss": 0.4409, - "step": 1646 - }, - { - "epoch": 0.107640023527874, - "grad_norm": 0.5283243060112, - "learning_rate": 9.993539102986506e-06, - "loss": 0.4591, - "step": 1647 - }, - { - "epoch": 0.10770537873341611, - "grad_norm": 0.49721431732177734, - "learning_rate": 9.993521344915464e-06, - "loss": 0.4099, - "step": 1648 - }, - { - "epoch": 0.10777073393895824, - "grad_norm": 0.5064494013786316, - "learning_rate": 9.993503562489291e-06, - "loss": 0.3882, - "step": 1649 - }, - { - "epoch": 0.10783608914450037, - "grad_norm": 0.5435827970504761, - "learning_rate": 9.99348575570807e-06, - "loss": 0.4477, - "step": 1650 - }, - { - "epoch": 0.10790144435004248, - "grad_norm": 0.5121777057647705, - "learning_rate": 9.993467924571888e-06, - "loss": 0.4361, - "step": 1651 - }, - { - "epoch": 0.10796679955558461, - "grad_norm": 0.5017474889755249, - "learning_rate": 9.993450069080834e-06, - "loss": 0.4572, - "step": 1652 - }, - { - "epoch": 0.10803215476112672, - "grad_norm": 0.4521902799606323, - "learning_rate": 9.993432189234995e-06, - "loss": 0.3896, - "step": 1653 - }, - { - "epoch": 0.10809750996666885, - "grad_norm": 0.5386385321617126, - "learning_rate": 9.993414285034456e-06, - "loss": 0.4219, - "step": 1654 - }, - { - "epoch": 0.10816286517221096, - "grad_norm": 0.5199062824249268, - "learning_rate": 9.993396356479305e-06, - "loss": 0.4084, - "step": 1655 - }, - { - "epoch": 0.10822822037775309, - "grad_norm": 0.5277518033981323, - "learning_rate": 9.993378403569632e-06, - "loss": 0.4688, - "step": 1656 - }, - { - "epoch": 0.1082935755832952, - "grad_norm": 0.5194827914237976, - "learning_rate": 9.99336042630552e-06, - "loss": 0.4943, - "step": 1657 - }, - { - "epoch": 0.10835893078883733, - "grad_norm": 0.5181694626808167, - "learning_rate": 9.993342424687063e-06, - "loss": 0.4819, - "step": 1658 - }, - { - "epoch": 0.10842428599437945, - "grad_norm": 0.5547198057174683, - "learning_rate": 9.993324398714341e-06, - "loss": 0.4555, - "step": 1659 - }, - { - "epoch": 0.10848964119992158, - "grad_norm": 0.5099554657936096, - "learning_rate": 9.99330634838745e-06, - "loss": 0.4147, - "step": 1660 - }, - { - "epoch": 0.10855499640546369, - "grad_norm": 0.4664912223815918, - "learning_rate": 9.993288273706471e-06, - "loss": 0.3636, - "step": 1661 - }, - { - "epoch": 0.10862035161100582, - "grad_norm": 0.5194811224937439, - "learning_rate": 9.993270174671496e-06, - "loss": 0.4497, - "step": 1662 - }, - { - "epoch": 0.10868570681654793, - "grad_norm": 0.47649380564689636, - "learning_rate": 9.993252051282614e-06, - "loss": 0.395, - "step": 1663 - }, - { - "epoch": 0.10875106202209006, - "grad_norm": 0.5036858320236206, - "learning_rate": 9.993233903539913e-06, - "loss": 0.4341, - "step": 1664 - }, - { - "epoch": 0.10881641722763218, - "grad_norm": 0.5305114984512329, - "learning_rate": 9.993215731443476e-06, - "loss": 0.5104, - "step": 1665 - }, - { - "epoch": 0.1088817724331743, - "grad_norm": 0.4840755760669708, - "learning_rate": 9.9931975349934e-06, - "loss": 0.4561, - "step": 1666 - }, - { - "epoch": 0.10894712763871642, - "grad_norm": 0.5001997947692871, - "learning_rate": 9.993179314189767e-06, - "loss": 0.4431, - "step": 1667 - }, - { - "epoch": 0.10901248284425855, - "grad_norm": 0.5084608793258667, - "learning_rate": 9.993161069032671e-06, - "loss": 0.3941, - "step": 1668 - }, - { - "epoch": 0.10907783804980066, - "grad_norm": 0.5229108333587646, - "learning_rate": 9.993142799522198e-06, - "loss": 0.4406, - "step": 1669 - }, - { - "epoch": 0.10914319325534279, - "grad_norm": 0.5073447227478027, - "learning_rate": 9.993124505658435e-06, - "loss": 0.4173, - "step": 1670 - }, - { - "epoch": 0.10920854846088492, - "grad_norm": 0.5383942723274231, - "learning_rate": 9.993106187441477e-06, - "loss": 0.4033, - "step": 1671 - }, - { - "epoch": 0.10927390366642703, - "grad_norm": 0.5326029062271118, - "learning_rate": 9.99308784487141e-06, - "loss": 0.4214, - "step": 1672 - }, - { - "epoch": 0.10933925887196916, - "grad_norm": 0.5241849422454834, - "learning_rate": 9.993069477948325e-06, - "loss": 0.4317, - "step": 1673 - }, - { - "epoch": 0.10940461407751127, - "grad_norm": 0.5135782361030579, - "learning_rate": 9.993051086672308e-06, - "loss": 0.4172, - "step": 1674 - }, - { - "epoch": 0.1094699692830534, - "grad_norm": 0.4599984288215637, - "learning_rate": 9.993032671043451e-06, - "loss": 0.3514, - "step": 1675 - }, - { - "epoch": 0.10953532448859551, - "grad_norm": 0.4994356036186218, - "learning_rate": 9.993014231061846e-06, - "loss": 0.4263, - "step": 1676 - }, - { - "epoch": 0.10960067969413764, - "grad_norm": 0.5081284046173096, - "learning_rate": 9.99299576672758e-06, - "loss": 0.4016, - "step": 1677 - }, - { - "epoch": 0.10966603489967976, - "grad_norm": 0.5670870542526245, - "learning_rate": 9.992977278040743e-06, - "loss": 0.4764, - "step": 1678 - }, - { - "epoch": 0.10973139010522188, - "grad_norm": 0.5047267079353333, - "learning_rate": 9.992958765001427e-06, - "loss": 0.4452, - "step": 1679 - }, - { - "epoch": 0.109796745310764, - "grad_norm": 0.5062739849090576, - "learning_rate": 9.992940227609721e-06, - "loss": 0.4526, - "step": 1680 - }, - { - "epoch": 0.10986210051630613, - "grad_norm": 0.5351704955101013, - "learning_rate": 9.992921665865717e-06, - "loss": 0.5013, - "step": 1681 - }, - { - "epoch": 0.10992745572184824, - "grad_norm": 0.4982771575450897, - "learning_rate": 9.992903079769503e-06, - "loss": 0.4476, - "step": 1682 - }, - { - "epoch": 0.10999281092739037, - "grad_norm": 0.4960046410560608, - "learning_rate": 9.992884469321171e-06, - "loss": 0.4369, - "step": 1683 - }, - { - "epoch": 0.11005816613293248, - "grad_norm": 0.4791679084300995, - "learning_rate": 9.99286583452081e-06, - "loss": 0.391, - "step": 1684 - }, - { - "epoch": 0.11012352133847461, - "grad_norm": 0.5098387002944946, - "learning_rate": 9.992847175368516e-06, - "loss": 0.4256, - "step": 1685 - }, - { - "epoch": 0.11018887654401673, - "grad_norm": 0.5323256254196167, - "learning_rate": 9.992828491864375e-06, - "loss": 0.4628, - "step": 1686 - }, - { - "epoch": 0.11025423174955885, - "grad_norm": 0.48827052116394043, - "learning_rate": 9.99280978400848e-06, - "loss": 0.4276, - "step": 1687 - }, - { - "epoch": 0.11031958695510097, - "grad_norm": 0.5217145085334778, - "learning_rate": 9.992791051800922e-06, - "loss": 0.457, - "step": 1688 - }, - { - "epoch": 0.1103849421606431, - "grad_norm": 0.48917123675346375, - "learning_rate": 9.992772295241793e-06, - "loss": 0.3973, - "step": 1689 - }, - { - "epoch": 0.11045029736618521, - "grad_norm": 0.5238152742385864, - "learning_rate": 9.992753514331184e-06, - "loss": 0.475, - "step": 1690 - }, - { - "epoch": 0.11051565257172734, - "grad_norm": 0.5162878632545471, - "learning_rate": 9.992734709069184e-06, - "loss": 0.4535, - "step": 1691 - }, - { - "epoch": 0.11058100777726947, - "grad_norm": 0.5076547861099243, - "learning_rate": 9.99271587945589e-06, - "loss": 0.4339, - "step": 1692 - }, - { - "epoch": 0.11064636298281158, - "grad_norm": 0.53127521276474, - "learning_rate": 9.992697025491389e-06, - "loss": 0.4814, - "step": 1693 - }, - { - "epoch": 0.11071171818835371, - "grad_norm": 0.4735592305660248, - "learning_rate": 9.992678147175776e-06, - "loss": 0.3961, - "step": 1694 - }, - { - "epoch": 0.11077707339389582, - "grad_norm": 0.49765831232070923, - "learning_rate": 9.992659244509141e-06, - "loss": 0.4069, - "step": 1695 - }, - { - "epoch": 0.11084242859943795, - "grad_norm": 0.5660755038261414, - "learning_rate": 9.99264031749158e-06, - "loss": 0.499, - "step": 1696 - }, - { - "epoch": 0.11090778380498006, - "grad_norm": 0.5559900403022766, - "learning_rate": 9.99262136612318e-06, - "loss": 0.489, - "step": 1697 - }, - { - "epoch": 0.11097313901052219, - "grad_norm": 0.5337640047073364, - "learning_rate": 9.992602390404037e-06, - "loss": 0.4689, - "step": 1698 - }, - { - "epoch": 0.1110384942160643, - "grad_norm": 0.5071684122085571, - "learning_rate": 9.992583390334243e-06, - "loss": 0.4457, - "step": 1699 - }, - { - "epoch": 0.11110384942160643, - "grad_norm": 0.5411041975021362, - "learning_rate": 9.992564365913888e-06, - "loss": 0.5051, - "step": 1700 - }, - { - "epoch": 0.11116920462714855, - "grad_norm": 0.530849039554596, - "learning_rate": 9.99254531714307e-06, - "loss": 0.4164, - "step": 1701 - }, - { - "epoch": 0.11123455983269068, - "grad_norm": 0.5025213956832886, - "learning_rate": 9.992526244021877e-06, - "loss": 0.4171, - "step": 1702 - }, - { - "epoch": 0.11129991503823279, - "grad_norm": 0.5115631222724915, - "learning_rate": 9.992507146550404e-06, - "loss": 0.4399, - "step": 1703 - }, - { - "epoch": 0.11136527024377492, - "grad_norm": 0.5250508785247803, - "learning_rate": 9.992488024728744e-06, - "loss": 0.4049, - "step": 1704 - }, - { - "epoch": 0.11143062544931703, - "grad_norm": 0.4998297095298767, - "learning_rate": 9.992468878556992e-06, - "loss": 0.3713, - "step": 1705 - }, - { - "epoch": 0.11149598065485916, - "grad_norm": 0.5240525007247925, - "learning_rate": 9.992449708035237e-06, - "loss": 0.4612, - "step": 1706 - }, - { - "epoch": 0.11156133586040128, - "grad_norm": 0.538304328918457, - "learning_rate": 9.992430513163578e-06, - "loss": 0.4479, - "step": 1707 - }, - { - "epoch": 0.1116266910659434, - "grad_norm": 0.4934563934803009, - "learning_rate": 9.992411293942104e-06, - "loss": 0.3545, - "step": 1708 - }, - { - "epoch": 0.11169204627148552, - "grad_norm": 0.5011223554611206, - "learning_rate": 9.99239205037091e-06, - "loss": 0.3981, - "step": 1709 - }, - { - "epoch": 0.11175740147702765, - "grad_norm": 0.4800606071949005, - "learning_rate": 9.992372782450091e-06, - "loss": 0.3889, - "step": 1710 - }, - { - "epoch": 0.11182275668256976, - "grad_norm": 0.565067708492279, - "learning_rate": 9.992353490179741e-06, - "loss": 0.5346, - "step": 1711 - }, - { - "epoch": 0.11188811188811189, - "grad_norm": 0.5308020710945129, - "learning_rate": 9.992334173559955e-06, - "loss": 0.456, - "step": 1712 - }, - { - "epoch": 0.11195346709365402, - "grad_norm": 0.5349618196487427, - "learning_rate": 9.992314832590823e-06, - "loss": 0.4232, - "step": 1713 - }, - { - "epoch": 0.11201882229919613, - "grad_norm": 0.5136187076568604, - "learning_rate": 9.992295467272445e-06, - "loss": 0.3996, - "step": 1714 - }, - { - "epoch": 0.11208417750473826, - "grad_norm": 0.4795161485671997, - "learning_rate": 9.99227607760491e-06, - "loss": 0.404, - "step": 1715 - }, - { - "epoch": 0.11214953271028037, - "grad_norm": 0.5381472706794739, - "learning_rate": 9.992256663588315e-06, - "loss": 0.3986, - "step": 1716 - }, - { - "epoch": 0.1122148879158225, - "grad_norm": 0.5232036113739014, - "learning_rate": 9.992237225222756e-06, - "loss": 0.4077, - "step": 1717 - }, - { - "epoch": 0.11228024312136461, - "grad_norm": 0.5185948610305786, - "learning_rate": 9.992217762508324e-06, - "loss": 0.441, - "step": 1718 - }, - { - "epoch": 0.11234559832690674, - "grad_norm": 0.5535147190093994, - "learning_rate": 9.99219827544512e-06, - "loss": 0.3869, - "step": 1719 - }, - { - "epoch": 0.11241095353244886, - "grad_norm": 0.48375648260116577, - "learning_rate": 9.992178764033234e-06, - "loss": 0.3639, - "step": 1720 - }, - { - "epoch": 0.11247630873799098, - "grad_norm": 0.5208756923675537, - "learning_rate": 9.992159228272764e-06, - "loss": 0.394, - "step": 1721 - }, - { - "epoch": 0.1125416639435331, - "grad_norm": 0.5771656632423401, - "learning_rate": 9.992139668163803e-06, - "loss": 0.5047, - "step": 1722 - }, - { - "epoch": 0.11260701914907523, - "grad_norm": 0.5113723874092102, - "learning_rate": 9.992120083706447e-06, - "loss": 0.4335, - "step": 1723 - }, - { - "epoch": 0.11267237435461734, - "grad_norm": 0.530390202999115, - "learning_rate": 9.992100474900793e-06, - "loss": 0.4678, - "step": 1724 - }, - { - "epoch": 0.11273772956015947, - "grad_norm": 0.49649545550346375, - "learning_rate": 9.992080841746934e-06, - "loss": 0.4306, - "step": 1725 - }, - { - "epoch": 0.11280308476570158, - "grad_norm": 0.49351996183395386, - "learning_rate": 9.992061184244967e-06, - "loss": 0.401, - "step": 1726 - }, - { - "epoch": 0.11286843997124371, - "grad_norm": 0.501213788986206, - "learning_rate": 9.99204150239499e-06, - "loss": 0.4023, - "step": 1727 - }, - { - "epoch": 0.11293379517678583, - "grad_norm": 0.4906255304813385, - "learning_rate": 9.992021796197095e-06, - "loss": 0.3941, - "step": 1728 - }, - { - "epoch": 0.11299915038232795, - "grad_norm": 0.49137502908706665, - "learning_rate": 9.992002065651383e-06, - "loss": 0.4386, - "step": 1729 - }, - { - "epoch": 0.11306450558787007, - "grad_norm": 0.5452741980552673, - "learning_rate": 9.991982310757946e-06, - "loss": 0.4914, - "step": 1730 - }, - { - "epoch": 0.1131298607934122, - "grad_norm": 0.49915504455566406, - "learning_rate": 9.991962531516882e-06, - "loss": 0.4056, - "step": 1731 - }, - { - "epoch": 0.11319521599895431, - "grad_norm": 0.49487340450286865, - "learning_rate": 9.991942727928288e-06, - "loss": 0.4123, - "step": 1732 - }, - { - "epoch": 0.11326057120449644, - "grad_norm": 0.4953140616416931, - "learning_rate": 9.99192289999226e-06, - "loss": 0.4098, - "step": 1733 - }, - { - "epoch": 0.11332592641003857, - "grad_norm": 0.4806705117225647, - "learning_rate": 9.991903047708893e-06, - "loss": 0.4107, - "step": 1734 - }, - { - "epoch": 0.11339128161558068, - "grad_norm": 0.5293604135513306, - "learning_rate": 9.991883171078287e-06, - "loss": 0.4342, - "step": 1735 - }, - { - "epoch": 0.11345663682112281, - "grad_norm": 0.5292914509773254, - "learning_rate": 9.991863270100537e-06, - "loss": 0.4091, - "step": 1736 - }, - { - "epoch": 0.11352199202666492, - "grad_norm": 0.4674372375011444, - "learning_rate": 9.99184334477574e-06, - "loss": 0.3822, - "step": 1737 - }, - { - "epoch": 0.11358734723220705, - "grad_norm": 0.53412264585495, - "learning_rate": 9.991823395103995e-06, - "loss": 0.474, - "step": 1738 - }, - { - "epoch": 0.11365270243774916, - "grad_norm": 0.49650678038597107, - "learning_rate": 9.991803421085397e-06, - "loss": 0.3835, - "step": 1739 - }, - { - "epoch": 0.11371805764329129, - "grad_norm": 0.49791911244392395, - "learning_rate": 9.991783422720046e-06, - "loss": 0.4151, - "step": 1740 - }, - { - "epoch": 0.1137834128488334, - "grad_norm": 0.510292649269104, - "learning_rate": 9.991763400008035e-06, - "loss": 0.4251, - "step": 1741 - }, - { - "epoch": 0.11384876805437553, - "grad_norm": 0.5056973099708557, - "learning_rate": 9.991743352949466e-06, - "loss": 0.4595, - "step": 1742 - }, - { - "epoch": 0.11391412325991765, - "grad_norm": 0.5104454755783081, - "learning_rate": 9.991723281544433e-06, - "loss": 0.4174, - "step": 1743 - }, - { - "epoch": 0.11397947846545978, - "grad_norm": 0.5372150540351868, - "learning_rate": 9.991703185793041e-06, - "loss": 0.3844, - "step": 1744 - }, - { - "epoch": 0.11404483367100189, - "grad_norm": 0.5111712217330933, - "learning_rate": 9.99168306569538e-06, - "loss": 0.4219, - "step": 1745 - }, - { - "epoch": 0.11411018887654402, - "grad_norm": 0.4930039346218109, - "learning_rate": 9.991662921251552e-06, - "loss": 0.3793, - "step": 1746 - }, - { - "epoch": 0.11417554408208613, - "grad_norm": 0.49626004695892334, - "learning_rate": 9.991642752461657e-06, - "loss": 0.4392, - "step": 1747 - }, - { - "epoch": 0.11424089928762826, - "grad_norm": 0.5289170145988464, - "learning_rate": 9.991622559325787e-06, - "loss": 0.4399, - "step": 1748 - }, - { - "epoch": 0.11430625449317038, - "grad_norm": 0.5389708280563354, - "learning_rate": 9.991602341844047e-06, - "loss": 0.4415, - "step": 1749 - }, - { - "epoch": 0.1143716096987125, - "grad_norm": 0.46755653619766235, - "learning_rate": 9.991582100016532e-06, - "loss": 0.3867, - "step": 1750 - }, - { - "epoch": 0.11443696490425462, - "grad_norm": 0.4930843710899353, - "learning_rate": 9.991561833843344e-06, - "loss": 0.4016, - "step": 1751 - }, - { - "epoch": 0.11450232010979675, - "grad_norm": 0.4829428195953369, - "learning_rate": 9.991541543324578e-06, - "loss": 0.4276, - "step": 1752 - }, - { - "epoch": 0.11456767531533886, - "grad_norm": 0.4952201247215271, - "learning_rate": 9.991521228460334e-06, - "loss": 0.4201, - "step": 1753 - }, - { - "epoch": 0.11463303052088099, - "grad_norm": 0.5221691131591797, - "learning_rate": 9.991500889250713e-06, - "loss": 0.4356, - "step": 1754 - }, - { - "epoch": 0.11469838572642312, - "grad_norm": 0.5008527636528015, - "learning_rate": 9.991480525695813e-06, - "loss": 0.4374, - "step": 1755 - }, - { - "epoch": 0.11476374093196523, - "grad_norm": 0.5217990279197693, - "learning_rate": 9.991460137795733e-06, - "loss": 0.4545, - "step": 1756 - }, - { - "epoch": 0.11482909613750736, - "grad_norm": 0.5186170339584351, - "learning_rate": 9.991439725550571e-06, - "loss": 0.4272, - "step": 1757 - }, - { - "epoch": 0.11489445134304947, - "grad_norm": 0.4857713580131531, - "learning_rate": 9.99141928896043e-06, - "loss": 0.3829, - "step": 1758 - }, - { - "epoch": 0.1149598065485916, - "grad_norm": 0.5543569326400757, - "learning_rate": 9.991398828025408e-06, - "loss": 0.4596, - "step": 1759 - }, - { - "epoch": 0.11502516175413371, - "grad_norm": 0.5568326711654663, - "learning_rate": 9.991378342745604e-06, - "loss": 0.4903, - "step": 1760 - }, - { - "epoch": 0.11509051695967584, - "grad_norm": 0.472256064414978, - "learning_rate": 9.991357833121119e-06, - "loss": 0.4053, - "step": 1761 - }, - { - "epoch": 0.11515587216521796, - "grad_norm": 0.5433557629585266, - "learning_rate": 9.991337299152054e-06, - "loss": 0.4756, - "step": 1762 - }, - { - "epoch": 0.11522122737076008, - "grad_norm": 0.5174272060394287, - "learning_rate": 9.991316740838506e-06, - "loss": 0.3913, - "step": 1763 - }, - { - "epoch": 0.1152865825763022, - "grad_norm": 0.500368595123291, - "learning_rate": 9.991296158180577e-06, - "loss": 0.4013, - "step": 1764 - }, - { - "epoch": 0.11535193778184433, - "grad_norm": 0.5240638256072998, - "learning_rate": 9.991275551178368e-06, - "loss": 0.4406, - "step": 1765 - }, - { - "epoch": 0.11541729298738644, - "grad_norm": 0.5398024320602417, - "learning_rate": 9.99125491983198e-06, - "loss": 0.4668, - "step": 1766 - }, - { - "epoch": 0.11548264819292857, - "grad_norm": 0.5100404620170593, - "learning_rate": 9.991234264141512e-06, - "loss": 0.3996, - "step": 1767 - }, - { - "epoch": 0.11554800339847068, - "grad_norm": 0.5085362195968628, - "learning_rate": 9.991213584107065e-06, - "loss": 0.4481, - "step": 1768 - }, - { - "epoch": 0.11561335860401281, - "grad_norm": 0.619003176689148, - "learning_rate": 9.991192879728739e-06, - "loss": 0.4742, - "step": 1769 - }, - { - "epoch": 0.11567871380955493, - "grad_norm": 0.5342796444892883, - "learning_rate": 9.991172151006639e-06, - "loss": 0.4228, - "step": 1770 - }, - { - "epoch": 0.11574406901509705, - "grad_norm": 0.4943518340587616, - "learning_rate": 9.991151397940862e-06, - "loss": 0.3999, - "step": 1771 - }, - { - "epoch": 0.11580942422063917, - "grad_norm": 0.5110808610916138, - "learning_rate": 9.99113062053151e-06, - "loss": 0.4131, - "step": 1772 - }, - { - "epoch": 0.1158747794261813, - "grad_norm": 0.4850325286388397, - "learning_rate": 9.991109818778686e-06, - "loss": 0.3905, - "step": 1773 - }, - { - "epoch": 0.11594013463172341, - "grad_norm": 0.5520676374435425, - "learning_rate": 9.991088992682489e-06, - "loss": 0.4437, - "step": 1774 - }, - { - "epoch": 0.11600548983726554, - "grad_norm": 0.5156357288360596, - "learning_rate": 9.991068142243021e-06, - "loss": 0.4561, - "step": 1775 - }, - { - "epoch": 0.11607084504280767, - "grad_norm": 0.505234956741333, - "learning_rate": 9.991047267460387e-06, - "loss": 0.3864, - "step": 1776 - }, - { - "epoch": 0.11613620024834978, - "grad_norm": 0.5520933866500854, - "learning_rate": 9.991026368334685e-06, - "loss": 0.4962, - "step": 1777 - }, - { - "epoch": 0.11620155545389191, - "grad_norm": 0.5127909779548645, - "learning_rate": 9.991005444866019e-06, - "loss": 0.4412, - "step": 1778 - }, - { - "epoch": 0.11626691065943402, - "grad_norm": 0.47656968235969543, - "learning_rate": 9.99098449705449e-06, - "loss": 0.3944, - "step": 1779 - }, - { - "epoch": 0.11633226586497615, - "grad_norm": 0.4825184941291809, - "learning_rate": 9.990963524900202e-06, - "loss": 0.4123, - "step": 1780 - }, - { - "epoch": 0.11639762107051826, - "grad_norm": 0.4872205853462219, - "learning_rate": 9.990942528403253e-06, - "loss": 0.3892, - "step": 1781 - }, - { - "epoch": 0.11646297627606039, - "grad_norm": 0.644323468208313, - "learning_rate": 9.990921507563752e-06, - "loss": 0.4625, - "step": 1782 - }, - { - "epoch": 0.1165283314816025, - "grad_norm": 0.5240007042884827, - "learning_rate": 9.990900462381794e-06, - "loss": 0.4141, - "step": 1783 - }, - { - "epoch": 0.11659368668714463, - "grad_norm": 0.4936656951904297, - "learning_rate": 9.990879392857486e-06, - "loss": 0.4686, - "step": 1784 - }, - { - "epoch": 0.11665904189268675, - "grad_norm": 0.5088244080543518, - "learning_rate": 9.990858298990932e-06, - "loss": 0.4375, - "step": 1785 - }, - { - "epoch": 0.11672439709822888, - "grad_norm": 0.5279545187950134, - "learning_rate": 9.990837180782234e-06, - "loss": 0.4559, - "step": 1786 - }, - { - "epoch": 0.11678975230377099, - "grad_norm": 0.512206494808197, - "learning_rate": 9.99081603823149e-06, - "loss": 0.4568, - "step": 1787 - }, - { - "epoch": 0.11685510750931312, - "grad_norm": 0.4780323803424835, - "learning_rate": 9.990794871338811e-06, - "loss": 0.389, - "step": 1788 - }, - { - "epoch": 0.11692046271485523, - "grad_norm": 0.48555147647857666, - "learning_rate": 9.990773680104296e-06, - "loss": 0.4388, - "step": 1789 - }, - { - "epoch": 0.11698581792039736, - "grad_norm": 0.4882522225379944, - "learning_rate": 9.990752464528047e-06, - "loss": 0.4183, - "step": 1790 - }, - { - "epoch": 0.11705117312593948, - "grad_norm": 0.5026500225067139, - "learning_rate": 9.99073122461017e-06, - "loss": 0.4497, - "step": 1791 - }, - { - "epoch": 0.1171165283314816, - "grad_norm": 0.4988664984703064, - "learning_rate": 9.990709960350769e-06, - "loss": 0.415, - "step": 1792 - }, - { - "epoch": 0.11718188353702372, - "grad_norm": 0.581355094909668, - "learning_rate": 9.990688671749944e-06, - "loss": 0.3973, - "step": 1793 - }, - { - "epoch": 0.11724723874256585, - "grad_norm": 0.4597133696079254, - "learning_rate": 9.990667358807804e-06, - "loss": 0.4133, - "step": 1794 - }, - { - "epoch": 0.11731259394810796, - "grad_norm": 0.5237943530082703, - "learning_rate": 9.990646021524449e-06, - "loss": 0.4414, - "step": 1795 - }, - { - "epoch": 0.11737794915365009, - "grad_norm": 0.549954891204834, - "learning_rate": 9.990624659899986e-06, - "loss": 0.5089, - "step": 1796 - }, - { - "epoch": 0.11744330435919222, - "grad_norm": 0.45088574290275574, - "learning_rate": 9.990603273934516e-06, - "loss": 0.3577, - "step": 1797 - }, - { - "epoch": 0.11750865956473433, - "grad_norm": 0.4860559403896332, - "learning_rate": 9.990581863628144e-06, - "loss": 0.4324, - "step": 1798 - }, - { - "epoch": 0.11757401477027646, - "grad_norm": 0.5469778776168823, - "learning_rate": 9.990560428980977e-06, - "loss": 0.4381, - "step": 1799 - }, - { - "epoch": 0.11763936997581857, - "grad_norm": 0.49300289154052734, - "learning_rate": 9.990538969993118e-06, - "loss": 0.4192, - "step": 1800 - }, - { - "epoch": 0.1177047251813607, - "grad_norm": 0.5044761896133423, - "learning_rate": 9.99051748666467e-06, - "loss": 0.4707, - "step": 1801 - }, - { - "epoch": 0.11777008038690281, - "grad_norm": 0.4667385220527649, - "learning_rate": 9.99049597899574e-06, - "loss": 0.4064, - "step": 1802 - }, - { - "epoch": 0.11783543559244494, - "grad_norm": 0.4754604399204254, - "learning_rate": 9.990474446986433e-06, - "loss": 0.4128, - "step": 1803 - }, - { - "epoch": 0.11790079079798706, - "grad_norm": 0.4578009247779846, - "learning_rate": 9.990452890636856e-06, - "loss": 0.38, - "step": 1804 - }, - { - "epoch": 0.11796614600352918, - "grad_norm": 0.4580478370189667, - "learning_rate": 9.990431309947106e-06, - "loss": 0.3957, - "step": 1805 - }, - { - "epoch": 0.1180315012090713, - "grad_norm": 0.4998745620250702, - "learning_rate": 9.990409704917297e-06, - "loss": 0.4659, - "step": 1806 - }, - { - "epoch": 0.11809685641461343, - "grad_norm": 0.5275945663452148, - "learning_rate": 9.99038807554753e-06, - "loss": 0.488, - "step": 1807 - }, - { - "epoch": 0.11816221162015554, - "grad_norm": 0.4460282623767853, - "learning_rate": 9.990366421837912e-06, - "loss": 0.3598, - "step": 1808 - }, - { - "epoch": 0.11822756682569767, - "grad_norm": 0.5143075585365295, - "learning_rate": 9.990344743788547e-06, - "loss": 0.4938, - "step": 1809 - }, - { - "epoch": 0.11829292203123978, - "grad_norm": 0.5240001678466797, - "learning_rate": 9.990323041399543e-06, - "loss": 0.4981, - "step": 1810 - }, - { - "epoch": 0.11835827723678191, - "grad_norm": 0.5387542843818665, - "learning_rate": 9.990301314671003e-06, - "loss": 0.4879, - "step": 1811 - }, - { - "epoch": 0.11842363244232403, - "grad_norm": 0.4960077106952667, - "learning_rate": 9.990279563603035e-06, - "loss": 0.4133, - "step": 1812 - }, - { - "epoch": 0.11848898764786615, - "grad_norm": 0.5138953328132629, - "learning_rate": 9.990257788195747e-06, - "loss": 0.4514, - "step": 1813 - }, - { - "epoch": 0.11855434285340827, - "grad_norm": 0.4938754141330719, - "learning_rate": 9.990235988449242e-06, - "loss": 0.4345, - "step": 1814 - }, - { - "epoch": 0.1186196980589504, - "grad_norm": 0.4556517004966736, - "learning_rate": 9.990214164363628e-06, - "loss": 0.3922, - "step": 1815 - }, - { - "epoch": 0.11868505326449251, - "grad_norm": 0.5381069779396057, - "learning_rate": 9.99019231593901e-06, - "loss": 0.467, - "step": 1816 - }, - { - "epoch": 0.11875040847003464, - "grad_norm": 0.4847467243671417, - "learning_rate": 9.990170443175493e-06, - "loss": 0.4205, - "step": 1817 - }, - { - "epoch": 0.11881576367557677, - "grad_norm": 0.48896893858909607, - "learning_rate": 9.990148546073189e-06, - "loss": 0.394, - "step": 1818 - }, - { - "epoch": 0.11888111888111888, - "grad_norm": 0.5556392669677734, - "learning_rate": 9.990126624632201e-06, - "loss": 0.4897, - "step": 1819 - }, - { - "epoch": 0.11894647408666101, - "grad_norm": 0.555661678314209, - "learning_rate": 9.990104678852635e-06, - "loss": 0.4079, - "step": 1820 - }, - { - "epoch": 0.11901182929220312, - "grad_norm": 0.5040169954299927, - "learning_rate": 9.990082708734602e-06, - "loss": 0.4017, - "step": 1821 - }, - { - "epoch": 0.11907718449774525, - "grad_norm": 0.5507213473320007, - "learning_rate": 9.990060714278207e-06, - "loss": 0.5192, - "step": 1822 - }, - { - "epoch": 0.11914253970328736, - "grad_norm": 0.5001083612442017, - "learning_rate": 9.990038695483555e-06, - "loss": 0.459, - "step": 1823 - }, - { - "epoch": 0.11920789490882949, - "grad_norm": 0.4889615476131439, - "learning_rate": 9.990016652350759e-06, - "loss": 0.4397, - "step": 1824 - }, - { - "epoch": 0.1192732501143716, - "grad_norm": 0.47661325335502625, - "learning_rate": 9.989994584879919e-06, - "loss": 0.3961, - "step": 1825 - }, - { - "epoch": 0.11933860531991373, - "grad_norm": 0.49876272678375244, - "learning_rate": 9.98997249307115e-06, - "loss": 0.4079, - "step": 1826 - }, - { - "epoch": 0.11940396052545585, - "grad_norm": 0.46574166417121887, - "learning_rate": 9.989950376924555e-06, - "loss": 0.3667, - "step": 1827 - }, - { - "epoch": 0.11946931573099798, - "grad_norm": 0.5499573349952698, - "learning_rate": 9.989928236440242e-06, - "loss": 0.4348, - "step": 1828 - }, - { - "epoch": 0.11953467093654009, - "grad_norm": 0.5078558325767517, - "learning_rate": 9.989906071618323e-06, - "loss": 0.4414, - "step": 1829 - }, - { - "epoch": 0.11960002614208222, - "grad_norm": 0.47681233286857605, - "learning_rate": 9.989883882458902e-06, - "loss": 0.3838, - "step": 1830 - }, - { - "epoch": 0.11966538134762433, - "grad_norm": 0.4673955738544464, - "learning_rate": 9.989861668962089e-06, - "loss": 0.4117, - "step": 1831 - }, - { - "epoch": 0.11973073655316646, - "grad_norm": 0.4810030460357666, - "learning_rate": 9.989839431127992e-06, - "loss": 0.4078, - "step": 1832 - }, - { - "epoch": 0.11979609175870858, - "grad_norm": 0.4352864623069763, - "learning_rate": 9.989817168956719e-06, - "loss": 0.3383, - "step": 1833 - }, - { - "epoch": 0.1198614469642507, - "grad_norm": 0.5169110894203186, - "learning_rate": 9.989794882448378e-06, - "loss": 0.4659, - "step": 1834 - }, - { - "epoch": 0.11992680216979282, - "grad_norm": 0.5480664372444153, - "learning_rate": 9.98977257160308e-06, - "loss": 0.4267, - "step": 1835 - }, - { - "epoch": 0.11999215737533495, - "grad_norm": 0.48018908500671387, - "learning_rate": 9.989750236420933e-06, - "loss": 0.3706, - "step": 1836 - }, - { - "epoch": 0.12005751258087707, - "grad_norm": 0.5030679702758789, - "learning_rate": 9.989727876902044e-06, - "loss": 0.4464, - "step": 1837 - }, - { - "epoch": 0.12012286778641919, - "grad_norm": 0.566403329372406, - "learning_rate": 9.989705493046527e-06, - "loss": 0.4736, - "step": 1838 - }, - { - "epoch": 0.12018822299196132, - "grad_norm": 0.4993888735771179, - "learning_rate": 9.989683084854484e-06, - "loss": 0.4262, - "step": 1839 - }, - { - "epoch": 0.12025357819750343, - "grad_norm": 0.4965766966342926, - "learning_rate": 9.98966065232603e-06, - "loss": 0.4548, - "step": 1840 - }, - { - "epoch": 0.12031893340304556, - "grad_norm": 0.6620581746101379, - "learning_rate": 9.989638195461271e-06, - "loss": 0.4863, - "step": 1841 - }, - { - "epoch": 0.12038428860858767, - "grad_norm": 0.48163750767707825, - "learning_rate": 9.989615714260319e-06, - "loss": 0.3745, - "step": 1842 - }, - { - "epoch": 0.1204496438141298, - "grad_norm": 0.5864192843437195, - "learning_rate": 9.989593208723283e-06, - "loss": 0.5025, - "step": 1843 - }, - { - "epoch": 0.12051499901967191, - "grad_norm": 0.5082445740699768, - "learning_rate": 9.989570678850271e-06, - "loss": 0.41, - "step": 1844 - }, - { - "epoch": 0.12058035422521404, - "grad_norm": 0.47217845916748047, - "learning_rate": 9.989548124641396e-06, - "loss": 0.3884, - "step": 1845 - }, - { - "epoch": 0.12064570943075616, - "grad_norm": 0.5516869425773621, - "learning_rate": 9.989525546096762e-06, - "loss": 0.4281, - "step": 1846 - }, - { - "epoch": 0.12071106463629828, - "grad_norm": 0.5288026332855225, - "learning_rate": 9.989502943216488e-06, - "loss": 0.4794, - "step": 1847 - }, - { - "epoch": 0.1207764198418404, - "grad_norm": 0.5199207067489624, - "learning_rate": 9.989480316000678e-06, - "loss": 0.3937, - "step": 1848 - }, - { - "epoch": 0.12084177504738253, - "grad_norm": 0.5149030089378357, - "learning_rate": 9.989457664449445e-06, - "loss": 0.411, - "step": 1849 - }, - { - "epoch": 0.12090713025292464, - "grad_norm": 0.5265056490898132, - "learning_rate": 9.989434988562896e-06, - "loss": 0.4697, - "step": 1850 - }, - { - "epoch": 0.12097248545846677, - "grad_norm": 0.5422550439834595, - "learning_rate": 9.989412288341148e-06, - "loss": 0.4513, - "step": 1851 - }, - { - "epoch": 0.12103784066400888, - "grad_norm": 0.4541454613208771, - "learning_rate": 9.989389563784304e-06, - "loss": 0.3599, - "step": 1852 - }, - { - "epoch": 0.12110319586955101, - "grad_norm": 0.46409621834754944, - "learning_rate": 9.98936681489248e-06, - "loss": 0.3761, - "step": 1853 - }, - { - "epoch": 0.12116855107509313, - "grad_norm": 0.4885619878768921, - "learning_rate": 9.989344041665784e-06, - "loss": 0.4299, - "step": 1854 - }, - { - "epoch": 0.12123390628063525, - "grad_norm": 0.5073054432868958, - "learning_rate": 9.989321244104331e-06, - "loss": 0.3957, - "step": 1855 - }, - { - "epoch": 0.12129926148617737, - "grad_norm": 0.5251143574714661, - "learning_rate": 9.989298422208228e-06, - "loss": 0.4334, - "step": 1856 - }, - { - "epoch": 0.1213646166917195, - "grad_norm": 0.4727812707424164, - "learning_rate": 9.98927557597759e-06, - "loss": 0.3721, - "step": 1857 - }, - { - "epoch": 0.12142997189726162, - "grad_norm": 0.5348283052444458, - "learning_rate": 9.989252705412526e-06, - "loss": 0.4731, - "step": 1858 - }, - { - "epoch": 0.12149532710280374, - "grad_norm": 0.513326108455658, - "learning_rate": 9.989229810513147e-06, - "loss": 0.4111, - "step": 1859 - }, - { - "epoch": 0.12156068230834587, - "grad_norm": 0.4820597171783447, - "learning_rate": 9.989206891279569e-06, - "loss": 0.3684, - "step": 1860 - }, - { - "epoch": 0.12162603751388798, - "grad_norm": 0.5179558992385864, - "learning_rate": 9.989183947711898e-06, - "loss": 0.4668, - "step": 1861 - }, - { - "epoch": 0.12169139271943011, - "grad_norm": 0.4776163101196289, - "learning_rate": 9.989160979810247e-06, - "loss": 0.4098, - "step": 1862 - }, - { - "epoch": 0.12175674792497222, - "grad_norm": 0.5698267221450806, - "learning_rate": 9.989137987574731e-06, - "loss": 0.5449, - "step": 1863 - }, - { - "epoch": 0.12182210313051435, - "grad_norm": 0.5245393514633179, - "learning_rate": 9.98911497100546e-06, - "loss": 0.43, - "step": 1864 - }, - { - "epoch": 0.12188745833605646, - "grad_norm": 0.48276931047439575, - "learning_rate": 9.989091930102549e-06, - "loss": 0.3807, - "step": 1865 - }, - { - "epoch": 0.12195281354159859, - "grad_norm": 0.5482274889945984, - "learning_rate": 9.989068864866108e-06, - "loss": 0.4946, - "step": 1866 - }, - { - "epoch": 0.1220181687471407, - "grad_norm": 0.5497584939002991, - "learning_rate": 9.989045775296247e-06, - "loss": 0.5072, - "step": 1867 - }, - { - "epoch": 0.12208352395268283, - "grad_norm": 0.5020398497581482, - "learning_rate": 9.989022661393084e-06, - "loss": 0.4395, - "step": 1868 - }, - { - "epoch": 0.12214887915822495, - "grad_norm": 0.5637296438217163, - "learning_rate": 9.988999523156728e-06, - "loss": 0.4452, - "step": 1869 - }, - { - "epoch": 0.12221423436376708, - "grad_norm": 0.5545478463172913, - "learning_rate": 9.988976360587292e-06, - "loss": 0.4496, - "step": 1870 - }, - { - "epoch": 0.12227958956930919, - "grad_norm": 0.5133147835731506, - "learning_rate": 9.988953173684892e-06, - "loss": 0.4046, - "step": 1871 - }, - { - "epoch": 0.12234494477485132, - "grad_norm": 0.4993395507335663, - "learning_rate": 9.988929962449638e-06, - "loss": 0.4166, - "step": 1872 - }, - { - "epoch": 0.12241029998039343, - "grad_norm": 0.5099334120750427, - "learning_rate": 9.988906726881644e-06, - "loss": 0.4395, - "step": 1873 - }, - { - "epoch": 0.12247565518593556, - "grad_norm": 0.5003343224525452, - "learning_rate": 9.988883466981024e-06, - "loss": 0.4326, - "step": 1874 - }, - { - "epoch": 0.12254101039147768, - "grad_norm": 0.5290389060974121, - "learning_rate": 9.988860182747891e-06, - "loss": 0.4164, - "step": 1875 - }, - { - "epoch": 0.1226063655970198, - "grad_norm": 0.48433294892311096, - "learning_rate": 9.988836874182359e-06, - "loss": 0.406, - "step": 1876 - }, - { - "epoch": 0.12267172080256192, - "grad_norm": 0.4913772940635681, - "learning_rate": 9.98881354128454e-06, - "loss": 0.424, - "step": 1877 - }, - { - "epoch": 0.12273707600810405, - "grad_norm": 0.515835165977478, - "learning_rate": 9.988790184054551e-06, - "loss": 0.4607, - "step": 1878 - }, - { - "epoch": 0.12280243121364617, - "grad_norm": 0.5250944495201111, - "learning_rate": 9.988766802492503e-06, - "loss": 0.4528, - "step": 1879 - }, - { - "epoch": 0.12286778641918829, - "grad_norm": 0.5235453248023987, - "learning_rate": 9.988743396598511e-06, - "loss": 0.4573, - "step": 1880 - }, - { - "epoch": 0.12293314162473042, - "grad_norm": 0.5616452097892761, - "learning_rate": 9.988719966372688e-06, - "loss": 0.4835, - "step": 1881 - }, - { - "epoch": 0.12299849683027253, - "grad_norm": 0.534363865852356, - "learning_rate": 9.988696511815151e-06, - "loss": 0.4628, - "step": 1882 - }, - { - "epoch": 0.12306385203581466, - "grad_norm": 0.5168138146400452, - "learning_rate": 9.988673032926011e-06, - "loss": 0.4264, - "step": 1883 - }, - { - "epoch": 0.12312920724135677, - "grad_norm": 0.4910268187522888, - "learning_rate": 9.988649529705386e-06, - "loss": 0.4386, - "step": 1884 - }, - { - "epoch": 0.1231945624468989, - "grad_norm": 0.4870971441268921, - "learning_rate": 9.98862600215339e-06, - "loss": 0.3803, - "step": 1885 - }, - { - "epoch": 0.12325991765244101, - "grad_norm": 0.47671326994895935, - "learning_rate": 9.988602450270135e-06, - "loss": 0.4337, - "step": 1886 - }, - { - "epoch": 0.12332527285798314, - "grad_norm": 0.498994380235672, - "learning_rate": 9.988578874055739e-06, - "loss": 0.4714, - "step": 1887 - }, - { - "epoch": 0.12339062806352526, - "grad_norm": 0.5405935049057007, - "learning_rate": 9.988555273510315e-06, - "loss": 0.4635, - "step": 1888 - }, - { - "epoch": 0.12345598326906738, - "grad_norm": 0.49952763319015503, - "learning_rate": 9.988531648633976e-06, - "loss": 0.4473, - "step": 1889 - }, - { - "epoch": 0.1235213384746095, - "grad_norm": 0.5323501825332642, - "learning_rate": 9.988507999426845e-06, - "loss": 0.4491, - "step": 1890 - }, - { - "epoch": 0.12358669368015163, - "grad_norm": 0.4978667199611664, - "learning_rate": 9.988484325889029e-06, - "loss": 0.4162, - "step": 1891 - }, - { - "epoch": 0.12365204888569374, - "grad_norm": 0.4646618366241455, - "learning_rate": 9.988460628020646e-06, - "loss": 0.3946, - "step": 1892 - }, - { - "epoch": 0.12371740409123587, - "grad_norm": 0.4838745594024658, - "learning_rate": 9.988436905821814e-06, - "loss": 0.4222, - "step": 1893 - }, - { - "epoch": 0.12378275929677798, - "grad_norm": 0.478985458612442, - "learning_rate": 9.988413159292648e-06, - "loss": 0.3889, - "step": 1894 - }, - { - "epoch": 0.12384811450232011, - "grad_norm": 0.5021668076515198, - "learning_rate": 9.988389388433262e-06, - "loss": 0.4015, - "step": 1895 - }, - { - "epoch": 0.12391346970786223, - "grad_norm": 0.48870745301246643, - "learning_rate": 9.988365593243772e-06, - "loss": 0.4003, - "step": 1896 - }, - { - "epoch": 0.12397882491340435, - "grad_norm": 0.4673478305339813, - "learning_rate": 9.988341773724297e-06, - "loss": 0.3946, - "step": 1897 - }, - { - "epoch": 0.12404418011894647, - "grad_norm": 0.5073935985565186, - "learning_rate": 9.988317929874948e-06, - "loss": 0.4193, - "step": 1898 - }, - { - "epoch": 0.1241095353244886, - "grad_norm": 0.44659894704818726, - "learning_rate": 9.988294061695846e-06, - "loss": 0.3657, - "step": 1899 - }, - { - "epoch": 0.12417489053003072, - "grad_norm": 0.5482346415519714, - "learning_rate": 9.988270169187106e-06, - "loss": 0.5001, - "step": 1900 - }, - { - "epoch": 0.12424024573557284, - "grad_norm": 0.514901340007782, - "learning_rate": 9.988246252348843e-06, - "loss": 0.4342, - "step": 1901 - }, - { - "epoch": 0.12430560094111497, - "grad_norm": 0.4485880434513092, - "learning_rate": 9.988222311181177e-06, - "loss": 0.3543, - "step": 1902 - }, - { - "epoch": 0.12437095614665708, - "grad_norm": 0.5502244830131531, - "learning_rate": 9.988198345684222e-06, - "loss": 0.4872, - "step": 1903 - }, - { - "epoch": 0.12443631135219921, - "grad_norm": 0.5377838015556335, - "learning_rate": 9.988174355858093e-06, - "loss": 0.4766, - "step": 1904 - }, - { - "epoch": 0.12450166655774132, - "grad_norm": 0.520308792591095, - "learning_rate": 9.988150341702913e-06, - "loss": 0.4778, - "step": 1905 - }, - { - "epoch": 0.12456702176328345, - "grad_norm": 0.5440343618392944, - "learning_rate": 9.988126303218794e-06, - "loss": 0.4564, - "step": 1906 - }, - { - "epoch": 0.12463237696882556, - "grad_norm": 0.5795683264732361, - "learning_rate": 9.988102240405856e-06, - "loss": 0.4957, - "step": 1907 - }, - { - "epoch": 0.12469773217436769, - "grad_norm": 0.5030964016914368, - "learning_rate": 9.988078153264215e-06, - "loss": 0.4178, - "step": 1908 - }, - { - "epoch": 0.1247630873799098, - "grad_norm": 0.4966350197792053, - "learning_rate": 9.988054041793989e-06, - "loss": 0.4039, - "step": 1909 - }, - { - "epoch": 0.12482844258545193, - "grad_norm": 0.5520350933074951, - "learning_rate": 9.988029905995293e-06, - "loss": 0.4348, - "step": 1910 - }, - { - "epoch": 0.12489379779099405, - "grad_norm": 0.48109182715415955, - "learning_rate": 9.98800574586825e-06, - "loss": 0.3874, - "step": 1911 - }, - { - "epoch": 0.12495915299653618, - "grad_norm": 0.48294177651405334, - "learning_rate": 9.987981561412975e-06, - "loss": 0.4106, - "step": 1912 - }, - { - "epoch": 0.1250245082020783, - "grad_norm": 0.5936354994773865, - "learning_rate": 9.987957352629585e-06, - "loss": 0.4344, - "step": 1913 - }, - { - "epoch": 0.1250898634076204, - "grad_norm": 0.6715177893638611, - "learning_rate": 9.987933119518199e-06, - "loss": 0.4559, - "step": 1914 - }, - { - "epoch": 0.12515521861316253, - "grad_norm": 0.5224996209144592, - "learning_rate": 9.987908862078934e-06, - "loss": 0.4576, - "step": 1915 - }, - { - "epoch": 0.12522057381870466, - "grad_norm": 0.5017551779747009, - "learning_rate": 9.98788458031191e-06, - "loss": 0.3905, - "step": 1916 - }, - { - "epoch": 0.1252859290242468, - "grad_norm": 0.5825735926628113, - "learning_rate": 9.987860274217247e-06, - "loss": 0.4431, - "step": 1917 - }, - { - "epoch": 0.1253512842297889, - "grad_norm": 0.5097066760063171, - "learning_rate": 9.987835943795059e-06, - "loss": 0.4021, - "step": 1918 - }, - { - "epoch": 0.12541663943533102, - "grad_norm": 0.5344629883766174, - "learning_rate": 9.987811589045468e-06, - "loss": 0.4483, - "step": 1919 - }, - { - "epoch": 0.12548199464087315, - "grad_norm": 0.5319956541061401, - "learning_rate": 9.987787209968594e-06, - "loss": 0.4149, - "step": 1920 - }, - { - "epoch": 0.12554734984641527, - "grad_norm": 0.5144539475440979, - "learning_rate": 9.987762806564551e-06, - "loss": 0.4404, - "step": 1921 - }, - { - "epoch": 0.1256127050519574, - "grad_norm": 0.5102441310882568, - "learning_rate": 9.987738378833463e-06, - "loss": 0.4205, - "step": 1922 - }, - { - "epoch": 0.1256780602574995, - "grad_norm": 0.4958871304988861, - "learning_rate": 9.987713926775444e-06, - "loss": 0.3938, - "step": 1923 - }, - { - "epoch": 0.12574341546304163, - "grad_norm": 0.48275959491729736, - "learning_rate": 9.987689450390619e-06, - "loss": 0.3981, - "step": 1924 - }, - { - "epoch": 0.12580877066858376, - "grad_norm": 0.5233222246170044, - "learning_rate": 9.987664949679103e-06, - "loss": 0.4472, - "step": 1925 - }, - { - "epoch": 0.1258741258741259, - "grad_norm": 0.48806333541870117, - "learning_rate": 9.987640424641018e-06, - "loss": 0.3999, - "step": 1926 - }, - { - "epoch": 0.125939481079668, - "grad_norm": 0.5304328203201294, - "learning_rate": 9.987615875276483e-06, - "loss": 0.4717, - "step": 1927 - }, - { - "epoch": 0.12600483628521011, - "grad_norm": 0.5369510650634766, - "learning_rate": 9.987591301585618e-06, - "loss": 0.475, - "step": 1928 - }, - { - "epoch": 0.12607019149075224, - "grad_norm": 0.4829438626766205, - "learning_rate": 9.98756670356854e-06, - "loss": 0.413, - "step": 1929 - }, - { - "epoch": 0.12613554669629437, - "grad_norm": 0.4957874119281769, - "learning_rate": 9.987542081225374e-06, - "loss": 0.4514, - "step": 1930 - }, - { - "epoch": 0.12620090190183647, - "grad_norm": 0.49196264147758484, - "learning_rate": 9.987517434556237e-06, - "loss": 0.4087, - "step": 1931 - }, - { - "epoch": 0.1262662571073786, - "grad_norm": 0.4845946729183197, - "learning_rate": 9.987492763561249e-06, - "loss": 0.4006, - "step": 1932 - }, - { - "epoch": 0.12633161231292073, - "grad_norm": 0.5046796202659607, - "learning_rate": 9.98746806824053e-06, - "loss": 0.418, - "step": 1933 - }, - { - "epoch": 0.12639696751846285, - "grad_norm": 0.4948212504386902, - "learning_rate": 9.987443348594202e-06, - "loss": 0.4302, - "step": 1934 - }, - { - "epoch": 0.12646232272400496, - "grad_norm": 0.45483171939849854, - "learning_rate": 9.987418604622385e-06, - "loss": 0.3677, - "step": 1935 - }, - { - "epoch": 0.12652767792954708, - "grad_norm": 0.5139275789260864, - "learning_rate": 9.987393836325202e-06, - "loss": 0.4894, - "step": 1936 - }, - { - "epoch": 0.1265930331350892, - "grad_norm": 0.5322288274765015, - "learning_rate": 9.987369043702769e-06, - "loss": 0.4111, - "step": 1937 - }, - { - "epoch": 0.12665838834063134, - "grad_norm": 0.47072285413742065, - "learning_rate": 9.98734422675521e-06, - "loss": 0.3524, - "step": 1938 - }, - { - "epoch": 0.12672374354617344, - "grad_norm": 0.4729613959789276, - "learning_rate": 9.987319385482643e-06, - "loss": 0.4062, - "step": 1939 - }, - { - "epoch": 0.12678909875171557, - "grad_norm": 0.5595700740814209, - "learning_rate": 9.987294519885195e-06, - "loss": 0.4698, - "step": 1940 - }, - { - "epoch": 0.1268544539572577, - "grad_norm": 0.45723602175712585, - "learning_rate": 9.987269629962982e-06, - "loss": 0.3809, - "step": 1941 - }, - { - "epoch": 0.12691980916279982, - "grad_norm": 0.5328608155250549, - "learning_rate": 9.987244715716129e-06, - "loss": 0.4729, - "step": 1942 - }, - { - "epoch": 0.12698516436834195, - "grad_norm": 0.4701862633228302, - "learning_rate": 9.987219777144754e-06, - "loss": 0.3939, - "step": 1943 - }, - { - "epoch": 0.12705051957388405, - "grad_norm": 0.518118679523468, - "learning_rate": 9.987194814248981e-06, - "loss": 0.4676, - "step": 1944 - }, - { - "epoch": 0.12711587477942618, - "grad_norm": 0.5471842288970947, - "learning_rate": 9.987169827028931e-06, - "loss": 0.5107, - "step": 1945 - }, - { - "epoch": 0.1271812299849683, - "grad_norm": 0.4979475736618042, - "learning_rate": 9.987144815484726e-06, - "loss": 0.4176, - "step": 1946 - }, - { - "epoch": 0.12724658519051044, - "grad_norm": 0.4850045442581177, - "learning_rate": 9.987119779616489e-06, - "loss": 0.4047, - "step": 1947 - }, - { - "epoch": 0.12731194039605254, - "grad_norm": 0.4952496290206909, - "learning_rate": 9.98709471942434e-06, - "loss": 0.4438, - "step": 1948 - }, - { - "epoch": 0.12737729560159466, - "grad_norm": 0.5202100872993469, - "learning_rate": 9.987069634908402e-06, - "loss": 0.4774, - "step": 1949 - }, - { - "epoch": 0.1274426508071368, - "grad_norm": 0.5104197263717651, - "learning_rate": 9.987044526068799e-06, - "loss": 0.4328, - "step": 1950 - }, - { - "epoch": 0.12750800601267892, - "grad_norm": 0.473518431186676, - "learning_rate": 9.987019392905653e-06, - "loss": 0.3709, - "step": 1951 - }, - { - "epoch": 0.12757336121822102, - "grad_norm": 0.5406576991081238, - "learning_rate": 9.986994235419084e-06, - "loss": 0.4342, - "step": 1952 - }, - { - "epoch": 0.12763871642376315, - "grad_norm": 0.5023006200790405, - "learning_rate": 9.986969053609216e-06, - "loss": 0.4567, - "step": 1953 - }, - { - "epoch": 0.12770407162930528, - "grad_norm": 0.46537768840789795, - "learning_rate": 9.986943847476174e-06, - "loss": 0.3792, - "step": 1954 - }, - { - "epoch": 0.1277694268348474, - "grad_norm": 0.5310960412025452, - "learning_rate": 9.986918617020078e-06, - "loss": 0.4643, - "step": 1955 - }, - { - "epoch": 0.1278347820403895, - "grad_norm": 0.5471760034561157, - "learning_rate": 9.986893362241053e-06, - "loss": 0.5443, - "step": 1956 - }, - { - "epoch": 0.12790013724593163, - "grad_norm": 0.47290247678756714, - "learning_rate": 9.986868083139221e-06, - "loss": 0.3673, - "step": 1957 - }, - { - "epoch": 0.12796549245147376, - "grad_norm": 0.5044741034507751, - "learning_rate": 9.986842779714704e-06, - "loss": 0.4415, - "step": 1958 - }, - { - "epoch": 0.1280308476570159, - "grad_norm": 0.4717695415019989, - "learning_rate": 9.98681745196763e-06, - "loss": 0.3531, - "step": 1959 - }, - { - "epoch": 0.128096202862558, - "grad_norm": 0.49389272928237915, - "learning_rate": 9.98679209989812e-06, - "loss": 0.4117, - "step": 1960 - }, - { - "epoch": 0.12816155806810012, - "grad_norm": 0.5558046102523804, - "learning_rate": 9.986766723506295e-06, - "loss": 0.4613, - "step": 1961 - }, - { - "epoch": 0.12822691327364225, - "grad_norm": 0.4360634982585907, - "learning_rate": 9.986741322792282e-06, - "loss": 0.3352, - "step": 1962 - }, - { - "epoch": 0.12829226847918437, - "grad_norm": 0.5748230814933777, - "learning_rate": 9.986715897756206e-06, - "loss": 0.4892, - "step": 1963 - }, - { - "epoch": 0.1283576236847265, - "grad_norm": 0.46454548835754395, - "learning_rate": 9.986690448398185e-06, - "loss": 0.3994, - "step": 1964 - }, - { - "epoch": 0.1284229788902686, - "grad_norm": 0.5058770775794983, - "learning_rate": 9.98666497471835e-06, - "loss": 0.4254, - "step": 1965 - }, - { - "epoch": 0.12848833409581073, - "grad_norm": 0.5321208238601685, - "learning_rate": 9.986639476716821e-06, - "loss": 0.4693, - "step": 1966 - }, - { - "epoch": 0.12855368930135286, - "grad_norm": 0.5063179731369019, - "learning_rate": 9.986613954393725e-06, - "loss": 0.4256, - "step": 1967 - }, - { - "epoch": 0.128619044506895, - "grad_norm": 0.4850414991378784, - "learning_rate": 9.986588407749185e-06, - "loss": 0.3837, - "step": 1968 - }, - { - "epoch": 0.1286843997124371, - "grad_norm": 0.49293845891952515, - "learning_rate": 9.986562836783325e-06, - "loss": 0.4171, - "step": 1969 - }, - { - "epoch": 0.12874975491797921, - "grad_norm": 0.46116194128990173, - "learning_rate": 9.98653724149627e-06, - "loss": 0.3743, - "step": 1970 - }, - { - "epoch": 0.12881511012352134, - "grad_norm": 0.49085626006126404, - "learning_rate": 9.986511621888146e-06, - "loss": 0.3887, - "step": 1971 - }, - { - "epoch": 0.12888046532906347, - "grad_norm": 0.5477619171142578, - "learning_rate": 9.986485977959078e-06, - "loss": 0.4911, - "step": 1972 - }, - { - "epoch": 0.12894582053460557, - "grad_norm": 0.4762897193431854, - "learning_rate": 9.98646030970919e-06, - "loss": 0.3835, - "step": 1973 - }, - { - "epoch": 0.1290111757401477, - "grad_norm": 0.5051640868186951, - "learning_rate": 9.986434617138608e-06, - "loss": 0.3974, - "step": 1974 - }, - { - "epoch": 0.12907653094568983, - "grad_norm": 0.5479174256324768, - "learning_rate": 9.986408900247457e-06, - "loss": 0.4457, - "step": 1975 - }, - { - "epoch": 0.12914188615123195, - "grad_norm": 0.5653102993965149, - "learning_rate": 9.986383159035862e-06, - "loss": 0.491, - "step": 1976 - }, - { - "epoch": 0.12920724135677406, - "grad_norm": 0.4818381369113922, - "learning_rate": 9.986357393503947e-06, - "loss": 0.3849, - "step": 1977 - }, - { - "epoch": 0.12927259656231618, - "grad_norm": 0.48640090227127075, - "learning_rate": 9.986331603651843e-06, - "loss": 0.3895, - "step": 1978 - }, - { - "epoch": 0.1293379517678583, - "grad_norm": 0.5210935473442078, - "learning_rate": 9.986305789479669e-06, - "loss": 0.4264, - "step": 1979 - }, - { - "epoch": 0.12940330697340044, - "grad_norm": 0.5012984275817871, - "learning_rate": 9.986279950987556e-06, - "loss": 0.4618, - "step": 1980 - }, - { - "epoch": 0.12946866217894254, - "grad_norm": 0.504041314125061, - "learning_rate": 9.986254088175629e-06, - "loss": 0.4376, - "step": 1981 - }, - { - "epoch": 0.12953401738448467, - "grad_norm": 0.5589451789855957, - "learning_rate": 9.986228201044013e-06, - "loss": 0.4753, - "step": 1982 - }, - { - "epoch": 0.1295993725900268, - "grad_norm": 0.5148593187332153, - "learning_rate": 9.986202289592833e-06, - "loss": 0.4215, - "step": 1983 - }, - { - "epoch": 0.12966472779556892, - "grad_norm": 0.5945443511009216, - "learning_rate": 9.986176353822219e-06, - "loss": 0.4762, - "step": 1984 - }, - { - "epoch": 0.12973008300111105, - "grad_norm": 0.522632896900177, - "learning_rate": 9.986150393732294e-06, - "loss": 0.4608, - "step": 1985 - }, - { - "epoch": 0.12979543820665315, - "grad_norm": 0.5101238489151001, - "learning_rate": 9.986124409323188e-06, - "loss": 0.4402, - "step": 1986 - }, - { - "epoch": 0.12986079341219528, - "grad_norm": 0.5400654673576355, - "learning_rate": 9.986098400595024e-06, - "loss": 0.4693, - "step": 1987 - }, - { - "epoch": 0.1299261486177374, - "grad_norm": 0.5147498250007629, - "learning_rate": 9.986072367547932e-06, - "loss": 0.4267, - "step": 1988 - }, - { - "epoch": 0.12999150382327954, - "grad_norm": 0.5210046768188477, - "learning_rate": 9.986046310182037e-06, - "loss": 0.4559, - "step": 1989 - }, - { - "epoch": 0.13005685902882164, - "grad_norm": 0.488017737865448, - "learning_rate": 9.986020228497467e-06, - "loss": 0.3971, - "step": 1990 - }, - { - "epoch": 0.13012221423436376, - "grad_norm": 0.5373337864875793, - "learning_rate": 9.98599412249435e-06, - "loss": 0.4133, - "step": 1991 - }, - { - "epoch": 0.1301875694399059, - "grad_norm": 0.4798405170440674, - "learning_rate": 9.985967992172812e-06, - "loss": 0.388, - "step": 1992 - }, - { - "epoch": 0.13025292464544802, - "grad_norm": 0.5257364511489868, - "learning_rate": 9.985941837532979e-06, - "loss": 0.4119, - "step": 1993 - }, - { - "epoch": 0.13031827985099012, - "grad_norm": 0.47971484065055847, - "learning_rate": 9.985915658574982e-06, - "loss": 0.3939, - "step": 1994 - }, - { - "epoch": 0.13038363505653225, - "grad_norm": 0.5414712429046631, - "learning_rate": 9.985889455298948e-06, - "loss": 0.4333, - "step": 1995 - }, - { - "epoch": 0.13044899026207438, - "grad_norm": 0.46881482005119324, - "learning_rate": 9.985863227705002e-06, - "loss": 0.3941, - "step": 1996 - }, - { - "epoch": 0.1305143454676165, - "grad_norm": 0.5257583856582642, - "learning_rate": 9.985836975793272e-06, - "loss": 0.5, - "step": 1997 - }, - { - "epoch": 0.1305797006731586, - "grad_norm": 0.4880787134170532, - "learning_rate": 9.985810699563892e-06, - "loss": 0.418, - "step": 1998 - }, - { - "epoch": 0.13064505587870073, - "grad_norm": 0.45849093794822693, - "learning_rate": 9.985784399016984e-06, - "loss": 0.3674, - "step": 1999 - }, - { - "epoch": 0.13071041108424286, - "grad_norm": 0.5036157965660095, - "learning_rate": 9.985758074152678e-06, - "loss": 0.4192, - "step": 2000 - }, - { - "epoch": 0.130775766289785, - "grad_norm": 0.5034522414207458, - "learning_rate": 9.985731724971103e-06, - "loss": 0.4102, - "step": 2001 - }, - { - "epoch": 0.1308411214953271, - "grad_norm": 0.5690385103225708, - "learning_rate": 9.985705351472388e-06, - "loss": 0.4832, - "step": 2002 - }, - { - "epoch": 0.13090647670086922, - "grad_norm": 0.4804292619228363, - "learning_rate": 9.985678953656658e-06, - "loss": 0.4295, - "step": 2003 - }, - { - "epoch": 0.13097183190641135, - "grad_norm": 0.5212149620056152, - "learning_rate": 9.985652531524049e-06, - "loss": 0.4224, - "step": 2004 - }, - { - "epoch": 0.13103718711195347, - "grad_norm": 0.5116698145866394, - "learning_rate": 9.98562608507468e-06, - "loss": 0.4392, - "step": 2005 - }, - { - "epoch": 0.1311025423174956, - "grad_norm": 0.5497850775718689, - "learning_rate": 9.98559961430869e-06, - "loss": 0.4952, - "step": 2006 - }, - { - "epoch": 0.1311678975230377, - "grad_norm": 0.49011555314064026, - "learning_rate": 9.985573119226202e-06, - "loss": 0.429, - "step": 2007 - }, - { - "epoch": 0.13123325272857983, - "grad_norm": 0.5441197156906128, - "learning_rate": 9.985546599827346e-06, - "loss": 0.5136, - "step": 2008 - }, - { - "epoch": 0.13129860793412196, - "grad_norm": 0.4889719486236572, - "learning_rate": 9.985520056112252e-06, - "loss": 0.3924, - "step": 2009 - }, - { - "epoch": 0.13136396313966409, - "grad_norm": 0.7058687210083008, - "learning_rate": 9.98549348808105e-06, - "loss": 0.4088, - "step": 2010 - }, - { - "epoch": 0.1314293183452062, - "grad_norm": 0.48820960521698, - "learning_rate": 9.98546689573387e-06, - "loss": 0.4078, - "step": 2011 - }, - { - "epoch": 0.13149467355074831, - "grad_norm": 0.484792023897171, - "learning_rate": 9.98544027907084e-06, - "loss": 0.3738, - "step": 2012 - }, - { - "epoch": 0.13156002875629044, - "grad_norm": 0.44519925117492676, - "learning_rate": 9.98541363809209e-06, - "loss": 0.3907, - "step": 2013 - }, - { - "epoch": 0.13162538396183257, - "grad_norm": 0.4528064727783203, - "learning_rate": 9.98538697279775e-06, - "loss": 0.3685, - "step": 2014 - }, - { - "epoch": 0.13169073916737467, - "grad_norm": 0.5622209310531616, - "learning_rate": 9.985360283187954e-06, - "loss": 0.4623, - "step": 2015 - }, - { - "epoch": 0.1317560943729168, - "grad_norm": 0.5047494769096375, - "learning_rate": 9.985333569262827e-06, - "loss": 0.4528, - "step": 2016 - }, - { - "epoch": 0.13182144957845893, - "grad_norm": 0.4966709017753601, - "learning_rate": 9.9853068310225e-06, - "loss": 0.4212, - "step": 2017 - }, - { - "epoch": 0.13188680478400105, - "grad_norm": 0.46429166197776794, - "learning_rate": 9.985280068467104e-06, - "loss": 0.3791, - "step": 2018 - }, - { - "epoch": 0.13195215998954316, - "grad_norm": 0.4956360161304474, - "learning_rate": 9.985253281596773e-06, - "loss": 0.415, - "step": 2019 - }, - { - "epoch": 0.13201751519508528, - "grad_norm": 0.5155148506164551, - "learning_rate": 9.985226470411633e-06, - "loss": 0.4122, - "step": 2020 - }, - { - "epoch": 0.1320828704006274, - "grad_norm": 0.48214486241340637, - "learning_rate": 9.985199634911816e-06, - "loss": 0.3825, - "step": 2021 - }, - { - "epoch": 0.13214822560616954, - "grad_norm": 0.5534549951553345, - "learning_rate": 9.985172775097453e-06, - "loss": 0.4773, - "step": 2022 - }, - { - "epoch": 0.13221358081171164, - "grad_norm": 0.45360979437828064, - "learning_rate": 9.985145890968677e-06, - "loss": 0.3465, - "step": 2023 - }, - { - "epoch": 0.13227893601725377, - "grad_norm": 0.4827433228492737, - "learning_rate": 9.985118982525616e-06, - "loss": 0.4082, - "step": 2024 - }, - { - "epoch": 0.1323442912227959, - "grad_norm": 0.49237141013145447, - "learning_rate": 9.985092049768403e-06, - "loss": 0.4253, - "step": 2025 - }, - { - "epoch": 0.13240964642833802, - "grad_norm": 0.5407829284667969, - "learning_rate": 9.985065092697171e-06, - "loss": 0.4795, - "step": 2026 - }, - { - "epoch": 0.13247500163388015, - "grad_norm": 0.49884968996047974, - "learning_rate": 9.985038111312048e-06, - "loss": 0.4081, - "step": 2027 - }, - { - "epoch": 0.13254035683942225, - "grad_norm": 0.49501216411590576, - "learning_rate": 9.985011105613167e-06, - "loss": 0.3897, - "step": 2028 - }, - { - "epoch": 0.13260571204496438, - "grad_norm": 0.4731520712375641, - "learning_rate": 9.984984075600658e-06, - "loss": 0.3976, - "step": 2029 - }, - { - "epoch": 0.1326710672505065, - "grad_norm": 0.4776824414730072, - "learning_rate": 9.984957021274658e-06, - "loss": 0.3961, - "step": 2030 - }, - { - "epoch": 0.13273642245604864, - "grad_norm": 0.5426200032234192, - "learning_rate": 9.984929942635295e-06, - "loss": 0.4394, - "step": 2031 - }, - { - "epoch": 0.13280177766159074, - "grad_norm": 0.48932594060897827, - "learning_rate": 9.9849028396827e-06, - "loss": 0.4227, - "step": 2032 - }, - { - "epoch": 0.13286713286713286, - "grad_norm": 0.5132553577423096, - "learning_rate": 9.984875712417008e-06, - "loss": 0.4344, - "step": 2033 - }, - { - "epoch": 0.132932488072675, - "grad_norm": 0.4714146852493286, - "learning_rate": 9.984848560838352e-06, - "loss": 0.3729, - "step": 2034 - }, - { - "epoch": 0.13299784327821712, - "grad_norm": 0.4908793568611145, - "learning_rate": 9.98482138494686e-06, - "loss": 0.4192, - "step": 2035 - }, - { - "epoch": 0.13306319848375922, - "grad_norm": 0.5225798487663269, - "learning_rate": 9.984794184742668e-06, - "loss": 0.4606, - "step": 2036 - }, - { - "epoch": 0.13312855368930135, - "grad_norm": 0.5129513144493103, - "learning_rate": 9.984766960225907e-06, - "loss": 0.4287, - "step": 2037 - }, - { - "epoch": 0.13319390889484348, - "grad_norm": 0.45756110548973083, - "learning_rate": 9.984739711396712e-06, - "loss": 0.3792, - "step": 2038 - }, - { - "epoch": 0.1332592641003856, - "grad_norm": 0.48158565163612366, - "learning_rate": 9.984712438255213e-06, - "loss": 0.4197, - "step": 2039 - }, - { - "epoch": 0.1333246193059277, - "grad_norm": 0.5298853516578674, - "learning_rate": 9.984685140801547e-06, - "loss": 0.5092, - "step": 2040 - }, - { - "epoch": 0.13338997451146983, - "grad_norm": 0.5147441625595093, - "learning_rate": 9.984657819035844e-06, - "loss": 0.4191, - "step": 2041 - }, - { - "epoch": 0.13345532971701196, - "grad_norm": 0.4683409631252289, - "learning_rate": 9.984630472958237e-06, - "loss": 0.3968, - "step": 2042 - }, - { - "epoch": 0.1335206849225541, - "grad_norm": 0.4909045100212097, - "learning_rate": 9.98460310256886e-06, - "loss": 0.425, - "step": 2043 - }, - { - "epoch": 0.1335860401280962, - "grad_norm": 0.500486433506012, - "learning_rate": 9.984575707867847e-06, - "loss": 0.446, - "step": 2044 - }, - { - "epoch": 0.13365139533363832, - "grad_norm": 0.4790321886539459, - "learning_rate": 9.984548288855334e-06, - "loss": 0.4058, - "step": 2045 - }, - { - "epoch": 0.13371675053918045, - "grad_norm": 0.4697878360748291, - "learning_rate": 9.98452084553145e-06, - "loss": 0.3785, - "step": 2046 - }, - { - "epoch": 0.13378210574472257, - "grad_norm": 0.46032053232192993, - "learning_rate": 9.984493377896331e-06, - "loss": 0.3861, - "step": 2047 - }, - { - "epoch": 0.1338474609502647, - "grad_norm": 0.5933749675750732, - "learning_rate": 9.98446588595011e-06, - "loss": 0.4091, - "step": 2048 - }, - { - "epoch": 0.1339128161558068, - "grad_norm": 0.5029727816581726, - "learning_rate": 9.984438369692923e-06, - "loss": 0.4177, - "step": 2049 - }, - { - "epoch": 0.13397817136134893, - "grad_norm": 0.49849143624305725, - "learning_rate": 9.984410829124905e-06, - "loss": 0.3631, - "step": 2050 - }, - { - "epoch": 0.13404352656689106, - "grad_norm": 0.5268356204032898, - "learning_rate": 9.984383264246188e-06, - "loss": 0.4745, - "step": 2051 - }, - { - "epoch": 0.13410888177243319, - "grad_norm": 0.47841522097587585, - "learning_rate": 9.984355675056904e-06, - "loss": 0.4315, - "step": 2052 - }, - { - "epoch": 0.1341742369779753, - "grad_norm": 0.4980556070804596, - "learning_rate": 9.984328061557193e-06, - "loss": 0.4177, - "step": 2053 - }, - { - "epoch": 0.13423959218351741, - "grad_norm": 0.5080947279930115, - "learning_rate": 9.984300423747189e-06, - "loss": 0.3703, - "step": 2054 - }, - { - "epoch": 0.13430494738905954, - "grad_norm": 0.5016592741012573, - "learning_rate": 9.984272761627022e-06, - "loss": 0.4279, - "step": 2055 - }, - { - "epoch": 0.13437030259460167, - "grad_norm": 0.46037498116493225, - "learning_rate": 9.984245075196832e-06, - "loss": 0.3866, - "step": 2056 - }, - { - "epoch": 0.13443565780014377, - "grad_norm": 0.5036309957504272, - "learning_rate": 9.98421736445675e-06, - "loss": 0.438, - "step": 2057 - }, - { - "epoch": 0.1345010130056859, - "grad_norm": 0.485921710729599, - "learning_rate": 9.984189629406915e-06, - "loss": 0.4015, - "step": 2058 - }, - { - "epoch": 0.13456636821122803, - "grad_norm": 0.47186097502708435, - "learning_rate": 9.98416187004746e-06, - "loss": 0.4148, - "step": 2059 - }, - { - "epoch": 0.13463172341677015, - "grad_norm": 0.49796515703201294, - "learning_rate": 9.98413408637852e-06, - "loss": 0.3819, - "step": 2060 - }, - { - "epoch": 0.13469707862231226, - "grad_norm": 0.5647782683372498, - "learning_rate": 9.984106278400234e-06, - "loss": 0.5065, - "step": 2061 - }, - { - "epoch": 0.13476243382785438, - "grad_norm": 0.5064375996589661, - "learning_rate": 9.984078446112732e-06, - "loss": 0.4517, - "step": 2062 - }, - { - "epoch": 0.1348277890333965, - "grad_norm": 0.5200279951095581, - "learning_rate": 9.984050589516156e-06, - "loss": 0.4384, - "step": 2063 - }, - { - "epoch": 0.13489314423893864, - "grad_norm": 0.6002118587493896, - "learning_rate": 9.984022708610636e-06, - "loss": 0.5024, - "step": 2064 - }, - { - "epoch": 0.13495849944448074, - "grad_norm": 0.49059632420539856, - "learning_rate": 9.98399480339631e-06, - "loss": 0.444, - "step": 2065 - }, - { - "epoch": 0.13502385465002287, - "grad_norm": 0.5100719928741455, - "learning_rate": 9.983966873873317e-06, - "loss": 0.446, - "step": 2066 - }, - { - "epoch": 0.135089209855565, - "grad_norm": 0.4898199439048767, - "learning_rate": 9.983938920041792e-06, - "loss": 0.3841, - "step": 2067 - }, - { - "epoch": 0.13515456506110712, - "grad_norm": 0.5595495700836182, - "learning_rate": 9.983910941901867e-06, - "loss": 0.4472, - "step": 2068 - }, - { - "epoch": 0.13521992026664925, - "grad_norm": 0.5707384943962097, - "learning_rate": 9.983882939453683e-06, - "loss": 0.4348, - "step": 2069 - }, - { - "epoch": 0.13528527547219135, - "grad_norm": 0.5026779770851135, - "learning_rate": 9.983854912697374e-06, - "loss": 0.4351, - "step": 2070 - }, - { - "epoch": 0.13535063067773348, - "grad_norm": 0.498727023601532, - "learning_rate": 9.983826861633082e-06, - "loss": 0.4126, - "step": 2071 - }, - { - "epoch": 0.1354159858832756, - "grad_norm": 0.5695613026618958, - "learning_rate": 9.983798786260936e-06, - "loss": 0.4841, - "step": 2072 - }, - { - "epoch": 0.13548134108881774, - "grad_norm": 0.5176199674606323, - "learning_rate": 9.983770686581079e-06, - "loss": 0.4299, - "step": 2073 - }, - { - "epoch": 0.13554669629435984, - "grad_norm": 0.5157866477966309, - "learning_rate": 9.983742562593643e-06, - "loss": 0.4234, - "step": 2074 - }, - { - "epoch": 0.13561205149990196, - "grad_norm": 0.4823054373264313, - "learning_rate": 9.98371441429877e-06, - "loss": 0.3912, - "step": 2075 - }, - { - "epoch": 0.1356774067054441, - "grad_norm": 0.5139957070350647, - "learning_rate": 9.983686241696595e-06, - "loss": 0.465, - "step": 2076 - }, - { - "epoch": 0.13574276191098622, - "grad_norm": 0.5168617367744446, - "learning_rate": 9.983658044787257e-06, - "loss": 0.4429, - "step": 2077 - }, - { - "epoch": 0.13580811711652832, - "grad_norm": 0.5060180425643921, - "learning_rate": 9.98362982357089e-06, - "loss": 0.4591, - "step": 2078 - }, - { - "epoch": 0.13587347232207045, - "grad_norm": 0.5484333634376526, - "learning_rate": 9.983601578047636e-06, - "loss": 0.5163, - "step": 2079 - }, - { - "epoch": 0.13593882752761258, - "grad_norm": 0.5179669857025146, - "learning_rate": 9.983573308217629e-06, - "loss": 0.452, - "step": 2080 - }, - { - "epoch": 0.1360041827331547, - "grad_norm": 0.4726775884628296, - "learning_rate": 9.983545014081008e-06, - "loss": 0.4041, - "step": 2081 - }, - { - "epoch": 0.1360695379386968, - "grad_norm": 0.5156143307685852, - "learning_rate": 9.983516695637914e-06, - "loss": 0.4764, - "step": 2082 - }, - { - "epoch": 0.13613489314423893, - "grad_norm": 0.5028104186058044, - "learning_rate": 9.98348835288848e-06, - "loss": 0.393, - "step": 2083 - }, - { - "epoch": 0.13620024834978106, - "grad_norm": 0.48415282368659973, - "learning_rate": 9.983459985832848e-06, - "loss": 0.4175, - "step": 2084 - }, - { - "epoch": 0.1362656035553232, - "grad_norm": 0.5132015347480774, - "learning_rate": 9.983431594471156e-06, - "loss": 0.4379, - "step": 2085 - }, - { - "epoch": 0.1363309587608653, - "grad_norm": 0.47953587770462036, - "learning_rate": 9.98340317880354e-06, - "loss": 0.4583, - "step": 2086 - }, - { - "epoch": 0.13639631396640742, - "grad_norm": 0.4793354272842407, - "learning_rate": 9.983374738830142e-06, - "loss": 0.4065, - "step": 2087 - }, - { - "epoch": 0.13646166917194955, - "grad_norm": 0.516028106212616, - "learning_rate": 9.983346274551097e-06, - "loss": 0.4177, - "step": 2088 - }, - { - "epoch": 0.13652702437749167, - "grad_norm": 0.5440710783004761, - "learning_rate": 9.983317785966549e-06, - "loss": 0.5044, - "step": 2089 - }, - { - "epoch": 0.1365923795830338, - "grad_norm": 0.4665113389492035, - "learning_rate": 9.98328927307663e-06, - "loss": 0.3802, - "step": 2090 - }, - { - "epoch": 0.1366577347885759, - "grad_norm": 0.48358651995658875, - "learning_rate": 9.983260735881486e-06, - "loss": 0.3992, - "step": 2091 - }, - { - "epoch": 0.13672308999411803, - "grad_norm": 0.48970726132392883, - "learning_rate": 9.98323217438125e-06, - "loss": 0.3982, - "step": 2092 - }, - { - "epoch": 0.13678844519966016, - "grad_norm": 0.4874531924724579, - "learning_rate": 9.983203588576067e-06, - "loss": 0.4169, - "step": 2093 - }, - { - "epoch": 0.13685380040520229, - "grad_norm": 0.5156344771385193, - "learning_rate": 9.983174978466072e-06, - "loss": 0.3945, - "step": 2094 - }, - { - "epoch": 0.1369191556107444, - "grad_norm": 0.49989965558052063, - "learning_rate": 9.983146344051407e-06, - "loss": 0.42, - "step": 2095 - }, - { - "epoch": 0.13698451081628651, - "grad_norm": 0.5101418495178223, - "learning_rate": 9.98311768533221e-06, - "loss": 0.4517, - "step": 2096 - }, - { - "epoch": 0.13704986602182864, - "grad_norm": 0.5618047714233398, - "learning_rate": 9.983089002308623e-06, - "loss": 0.5002, - "step": 2097 - }, - { - "epoch": 0.13711522122737077, - "grad_norm": 0.44366690516471863, - "learning_rate": 9.983060294980786e-06, - "loss": 0.3677, - "step": 2098 - }, - { - "epoch": 0.13718057643291287, - "grad_norm": 0.488775372505188, - "learning_rate": 9.983031563348835e-06, - "loss": 0.4209, - "step": 2099 - }, - { - "epoch": 0.137245931638455, - "grad_norm": 0.4840698838233948, - "learning_rate": 9.983002807412913e-06, - "loss": 0.3831, - "step": 2100 - }, - { - "epoch": 0.13731128684399713, - "grad_norm": 0.5189481377601624, - "learning_rate": 9.98297402717316e-06, - "loss": 0.445, - "step": 2101 - }, - { - "epoch": 0.13737664204953925, - "grad_norm": 0.47477179765701294, - "learning_rate": 9.982945222629719e-06, - "loss": 0.373, - "step": 2102 - }, - { - "epoch": 0.13744199725508136, - "grad_norm": 0.5027723908424377, - "learning_rate": 9.982916393782725e-06, - "loss": 0.4158, - "step": 2103 - }, - { - "epoch": 0.13750735246062348, - "grad_norm": 0.507691502571106, - "learning_rate": 9.982887540632323e-06, - "loss": 0.3809, - "step": 2104 - }, - { - "epoch": 0.1375727076661656, - "grad_norm": 0.5458555221557617, - "learning_rate": 9.982858663178651e-06, - "loss": 0.4419, - "step": 2105 - }, - { - "epoch": 0.13763806287170774, - "grad_norm": 0.5059611201286316, - "learning_rate": 9.982829761421853e-06, - "loss": 0.4294, - "step": 2106 - }, - { - "epoch": 0.13770341807724984, - "grad_norm": 0.5017284750938416, - "learning_rate": 9.982800835362067e-06, - "loss": 0.3752, - "step": 2107 - }, - { - "epoch": 0.13776877328279197, - "grad_norm": 0.5887925624847412, - "learning_rate": 9.982771884999433e-06, - "loss": 0.5432, - "step": 2108 - }, - { - "epoch": 0.1378341284883341, - "grad_norm": 0.48969125747680664, - "learning_rate": 9.982742910334098e-06, - "loss": 0.4059, - "step": 2109 - }, - { - "epoch": 0.13789948369387622, - "grad_norm": 0.47747862339019775, - "learning_rate": 9.982713911366198e-06, - "loss": 0.4054, - "step": 2110 - }, - { - "epoch": 0.13796483889941835, - "grad_norm": 0.536247193813324, - "learning_rate": 9.982684888095874e-06, - "loss": 0.4363, - "step": 2111 - }, - { - "epoch": 0.13803019410496045, - "grad_norm": 0.5041215419769287, - "learning_rate": 9.982655840523272e-06, - "loss": 0.3947, - "step": 2112 - }, - { - "epoch": 0.13809554931050258, - "grad_norm": 0.5441122055053711, - "learning_rate": 9.982626768648533e-06, - "loss": 0.4576, - "step": 2113 - }, - { - "epoch": 0.1381609045160447, - "grad_norm": 0.5348301529884338, - "learning_rate": 9.982597672471795e-06, - "loss": 0.4644, - "step": 2114 - }, - { - "epoch": 0.13822625972158684, - "grad_norm": 0.49890631437301636, - "learning_rate": 9.982568551993202e-06, - "loss": 0.4001, - "step": 2115 - }, - { - "epoch": 0.13829161492712894, - "grad_norm": 0.5681653618812561, - "learning_rate": 9.982539407212895e-06, - "loss": 0.4812, - "step": 2116 - }, - { - "epoch": 0.13835697013267106, - "grad_norm": 0.5000870227813721, - "learning_rate": 9.982510238131018e-06, - "loss": 0.4108, - "step": 2117 - }, - { - "epoch": 0.1384223253382132, - "grad_norm": 0.49189293384552, - "learning_rate": 9.982481044747713e-06, - "loss": 0.4109, - "step": 2118 - }, - { - "epoch": 0.13848768054375532, - "grad_norm": 0.49221333861351013, - "learning_rate": 9.982451827063121e-06, - "loss": 0.415, - "step": 2119 - }, - { - "epoch": 0.13855303574929742, - "grad_norm": 0.5470876693725586, - "learning_rate": 9.982422585077386e-06, - "loss": 0.4894, - "step": 2120 - }, - { - "epoch": 0.13861839095483955, - "grad_norm": 0.4839327335357666, - "learning_rate": 9.98239331879065e-06, - "loss": 0.4405, - "step": 2121 - }, - { - "epoch": 0.13868374616038168, - "grad_norm": 0.5111157894134521, - "learning_rate": 9.982364028203056e-06, - "loss": 0.4661, - "step": 2122 - }, - { - "epoch": 0.1387491013659238, - "grad_norm": 0.5053145885467529, - "learning_rate": 9.982334713314748e-06, - "loss": 0.4127, - "step": 2123 - }, - { - "epoch": 0.1388144565714659, - "grad_norm": 0.47632038593292236, - "learning_rate": 9.982305374125865e-06, - "loss": 0.4067, - "step": 2124 - }, - { - "epoch": 0.13887981177700803, - "grad_norm": 0.48555120825767517, - "learning_rate": 9.982276010636554e-06, - "loss": 0.3996, - "step": 2125 - }, - { - "epoch": 0.13894516698255016, - "grad_norm": 0.4961962103843689, - "learning_rate": 9.982246622846955e-06, - "loss": 0.4187, - "step": 2126 - }, - { - "epoch": 0.1390105221880923, - "grad_norm": 0.48168522119522095, - "learning_rate": 9.982217210757214e-06, - "loss": 0.4124, - "step": 2127 - }, - { - "epoch": 0.1390758773936344, - "grad_norm": 0.48840004205703735, - "learning_rate": 9.982187774367475e-06, - "loss": 0.4133, - "step": 2128 - }, - { - "epoch": 0.13914123259917652, - "grad_norm": 0.45378541946411133, - "learning_rate": 9.982158313677879e-06, - "loss": 0.3904, - "step": 2129 - }, - { - "epoch": 0.13920658780471865, - "grad_norm": 0.608108401298523, - "learning_rate": 9.98212882868857e-06, - "loss": 0.4546, - "step": 2130 - }, - { - "epoch": 0.13927194301026077, - "grad_norm": 0.4957594573497772, - "learning_rate": 9.982099319399696e-06, - "loss": 0.4246, - "step": 2131 - }, - { - "epoch": 0.1393372982158029, - "grad_norm": 0.47023138403892517, - "learning_rate": 9.982069785811395e-06, - "loss": 0.3833, - "step": 2132 - }, - { - "epoch": 0.139402653421345, - "grad_norm": 0.48780372738838196, - "learning_rate": 9.982040227923815e-06, - "loss": 0.4143, - "step": 2133 - }, - { - "epoch": 0.13946800862688713, - "grad_norm": 0.5033027529716492, - "learning_rate": 9.982010645737098e-06, - "loss": 0.4193, - "step": 2134 - }, - { - "epoch": 0.13953336383242926, - "grad_norm": 0.5258437991142273, - "learning_rate": 9.98198103925139e-06, - "loss": 0.4729, - "step": 2135 - }, - { - "epoch": 0.13959871903797139, - "grad_norm": 0.5443706512451172, - "learning_rate": 9.981951408466834e-06, - "loss": 0.4389, - "step": 2136 - }, - { - "epoch": 0.1396640742435135, - "grad_norm": 0.5517335534095764, - "learning_rate": 9.981921753383574e-06, - "loss": 0.4902, - "step": 2137 - }, - { - "epoch": 0.13972942944905561, - "grad_norm": 0.4983333647251129, - "learning_rate": 9.981892074001758e-06, - "loss": 0.4121, - "step": 2138 - }, - { - "epoch": 0.13979478465459774, - "grad_norm": 0.49221929907798767, - "learning_rate": 9.981862370321527e-06, - "loss": 0.4307, - "step": 2139 - }, - { - "epoch": 0.13986013986013987, - "grad_norm": 0.4637519419193268, - "learning_rate": 9.981832642343027e-06, - "loss": 0.383, - "step": 2140 - }, - { - "epoch": 0.13992549506568197, - "grad_norm": 0.5155725479125977, - "learning_rate": 9.981802890066404e-06, - "loss": 0.4103, - "step": 2141 - }, - { - "epoch": 0.1399908502712241, - "grad_norm": 0.4840599000453949, - "learning_rate": 9.981773113491801e-06, - "loss": 0.388, - "step": 2142 - }, - { - "epoch": 0.14005620547676623, - "grad_norm": 0.4303397834300995, - "learning_rate": 9.981743312619367e-06, - "loss": 0.3225, - "step": 2143 - }, - { - "epoch": 0.14012156068230835, - "grad_norm": 0.5074151754379272, - "learning_rate": 9.981713487449243e-06, - "loss": 0.4638, - "step": 2144 - }, - { - "epoch": 0.14018691588785046, - "grad_norm": 0.4615415036678314, - "learning_rate": 9.981683637981579e-06, - "loss": 0.407, - "step": 2145 - }, - { - "epoch": 0.14025227109339258, - "grad_norm": 0.5153157114982605, - "learning_rate": 9.981653764216516e-06, - "loss": 0.4803, - "step": 2146 - }, - { - "epoch": 0.1403176262989347, - "grad_norm": 0.4582500159740448, - "learning_rate": 9.981623866154202e-06, - "loss": 0.3967, - "step": 2147 - }, - { - "epoch": 0.14038298150447684, - "grad_norm": 0.48393577337265015, - "learning_rate": 9.981593943794783e-06, - "loss": 0.3556, - "step": 2148 - }, - { - "epoch": 0.14044833671001894, - "grad_norm": 0.5076748132705688, - "learning_rate": 9.981563997138405e-06, - "loss": 0.422, - "step": 2149 - }, - { - "epoch": 0.14051369191556107, - "grad_norm": 0.5000056028366089, - "learning_rate": 9.981534026185213e-06, - "loss": 0.428, - "step": 2150 - }, - { - "epoch": 0.1405790471211032, - "grad_norm": 0.47516247630119324, - "learning_rate": 9.981504030935354e-06, - "loss": 0.3851, - "step": 2151 - }, - { - "epoch": 0.14064440232664532, - "grad_norm": 0.5118639469146729, - "learning_rate": 9.981474011388974e-06, - "loss": 0.4027, - "step": 2152 - }, - { - "epoch": 0.14070975753218745, - "grad_norm": 0.49031445384025574, - "learning_rate": 9.98144396754622e-06, - "loss": 0.4167, - "step": 2153 - }, - { - "epoch": 0.14077511273772955, - "grad_norm": 0.4741804897785187, - "learning_rate": 9.981413899407237e-06, - "loss": 0.386, - "step": 2154 - }, - { - "epoch": 0.14084046794327168, - "grad_norm": 0.5037586688995361, - "learning_rate": 9.981383806972174e-06, - "loss": 0.4392, - "step": 2155 - }, - { - "epoch": 0.1409058231488138, - "grad_norm": 0.4791088402271271, - "learning_rate": 9.981353690241174e-06, - "loss": 0.3811, - "step": 2156 - }, - { - "epoch": 0.14097117835435594, - "grad_norm": 0.446625292301178, - "learning_rate": 9.981323549214388e-06, - "loss": 0.3497, - "step": 2157 - }, - { - "epoch": 0.14103653355989804, - "grad_norm": 0.5049254298210144, - "learning_rate": 9.981293383891962e-06, - "loss": 0.4535, - "step": 2158 - }, - { - "epoch": 0.14110188876544016, - "grad_norm": 0.4784114360809326, - "learning_rate": 9.981263194274043e-06, - "loss": 0.423, - "step": 2159 - }, - { - "epoch": 0.1411672439709823, - "grad_norm": 0.5221757888793945, - "learning_rate": 9.981232980360776e-06, - "loss": 0.4802, - "step": 2160 - }, - { - "epoch": 0.14123259917652442, - "grad_norm": 0.5037352442741394, - "learning_rate": 9.981202742152309e-06, - "loss": 0.4063, - "step": 2161 - }, - { - "epoch": 0.14129795438206652, - "grad_norm": 0.4714972972869873, - "learning_rate": 9.981172479648793e-06, - "loss": 0.3518, - "step": 2162 - }, - { - "epoch": 0.14136330958760865, - "grad_norm": 0.48040875792503357, - "learning_rate": 9.981142192850373e-06, - "loss": 0.416, - "step": 2163 - }, - { - "epoch": 0.14142866479315078, - "grad_norm": 0.47911337018013, - "learning_rate": 9.981111881757195e-06, - "loss": 0.3878, - "step": 2164 - }, - { - "epoch": 0.1414940199986929, - "grad_norm": 0.5378538966178894, - "learning_rate": 9.98108154636941e-06, - "loss": 0.4468, - "step": 2165 - }, - { - "epoch": 0.141559375204235, - "grad_norm": 0.4630666971206665, - "learning_rate": 9.981051186687165e-06, - "loss": 0.3939, - "step": 2166 - }, - { - "epoch": 0.14162473040977713, - "grad_norm": 0.4693721532821655, - "learning_rate": 9.981020802710608e-06, - "loss": 0.4077, - "step": 2167 - }, - { - "epoch": 0.14169008561531926, - "grad_norm": 0.48182645440101624, - "learning_rate": 9.980990394439887e-06, - "loss": 0.405, - "step": 2168 - }, - { - "epoch": 0.1417554408208614, - "grad_norm": 0.5250624418258667, - "learning_rate": 9.980959961875149e-06, - "loss": 0.4425, - "step": 2169 - }, - { - "epoch": 0.1418207960264035, - "grad_norm": 0.5258338451385498, - "learning_rate": 9.980929505016544e-06, - "loss": 0.4352, - "step": 2170 - }, - { - "epoch": 0.14188615123194562, - "grad_norm": 0.49704474210739136, - "learning_rate": 9.980899023864222e-06, - "loss": 0.4166, - "step": 2171 - }, - { - "epoch": 0.14195150643748775, - "grad_norm": 0.5207509398460388, - "learning_rate": 9.980868518418327e-06, - "loss": 0.4443, - "step": 2172 - }, - { - "epoch": 0.14201686164302987, - "grad_norm": 0.44627878069877625, - "learning_rate": 9.980837988679013e-06, - "loss": 0.3656, - "step": 2173 - }, - { - "epoch": 0.142082216848572, - "grad_norm": 0.5200849771499634, - "learning_rate": 9.980807434646426e-06, - "loss": 0.4662, - "step": 2174 - }, - { - "epoch": 0.1421475720541141, - "grad_norm": 0.5185167193412781, - "learning_rate": 9.980776856320715e-06, - "loss": 0.4289, - "step": 2175 - }, - { - "epoch": 0.14221292725965623, - "grad_norm": 0.5304030179977417, - "learning_rate": 9.980746253702031e-06, - "loss": 0.4672, - "step": 2176 - }, - { - "epoch": 0.14227828246519836, - "grad_norm": 0.5466896295547485, - "learning_rate": 9.98071562679052e-06, - "loss": 0.477, - "step": 2177 - }, - { - "epoch": 0.14234363767074049, - "grad_norm": 0.5005002617835999, - "learning_rate": 9.980684975586335e-06, - "loss": 0.4571, - "step": 2178 - }, - { - "epoch": 0.1424089928762826, - "grad_norm": 0.4743422269821167, - "learning_rate": 9.980654300089624e-06, - "loss": 0.3921, - "step": 2179 - }, - { - "epoch": 0.14247434808182471, - "grad_norm": 0.5106698274612427, - "learning_rate": 9.980623600300536e-06, - "loss": 0.4562, - "step": 2180 - }, - { - "epoch": 0.14253970328736684, - "grad_norm": 0.4898502826690674, - "learning_rate": 9.980592876219221e-06, - "loss": 0.4064, - "step": 2181 - }, - { - "epoch": 0.14260505849290897, - "grad_norm": 0.4879530370235443, - "learning_rate": 9.98056212784583e-06, - "loss": 0.434, - "step": 2182 - }, - { - "epoch": 0.14267041369845107, - "grad_norm": 0.46781447529792786, - "learning_rate": 9.980531355180512e-06, - "loss": 0.3684, - "step": 2183 - }, - { - "epoch": 0.1427357689039932, - "grad_norm": 0.47800523042678833, - "learning_rate": 9.980500558223415e-06, - "loss": 0.4082, - "step": 2184 - }, - { - "epoch": 0.14280112410953533, - "grad_norm": 0.45998021960258484, - "learning_rate": 9.980469736974694e-06, - "loss": 0.3637, - "step": 2185 - }, - { - "epoch": 0.14286647931507745, - "grad_norm": 0.48784536123275757, - "learning_rate": 9.980438891434495e-06, - "loss": 0.3754, - "step": 2186 - }, - { - "epoch": 0.14293183452061956, - "grad_norm": 0.4959687292575836, - "learning_rate": 9.980408021602971e-06, - "loss": 0.4298, - "step": 2187 - }, - { - "epoch": 0.14299718972616168, - "grad_norm": 0.8836976289749146, - "learning_rate": 9.980377127480272e-06, - "loss": 0.4265, - "step": 2188 - }, - { - "epoch": 0.1430625449317038, - "grad_norm": 0.49875491857528687, - "learning_rate": 9.980346209066546e-06, - "loss": 0.4197, - "step": 2189 - }, - { - "epoch": 0.14312790013724594, - "grad_norm": 0.5402434468269348, - "learning_rate": 9.980315266361949e-06, - "loss": 0.3996, - "step": 2190 - }, - { - "epoch": 0.14319325534278804, - "grad_norm": 0.499809205532074, - "learning_rate": 9.980284299366629e-06, - "loss": 0.441, - "step": 2191 - }, - { - "epoch": 0.14325861054833017, - "grad_norm": 0.5115283727645874, - "learning_rate": 9.980253308080736e-06, - "loss": 0.4306, - "step": 2192 - }, - { - "epoch": 0.1433239657538723, - "grad_norm": 0.47517943382263184, - "learning_rate": 9.980222292504422e-06, - "loss": 0.3831, - "step": 2193 - }, - { - "epoch": 0.14338932095941442, - "grad_norm": 0.47964057326316833, - "learning_rate": 9.98019125263784e-06, - "loss": 0.3985, - "step": 2194 - }, - { - "epoch": 0.14345467616495655, - "grad_norm": 0.5241397023200989, - "learning_rate": 9.980160188481138e-06, - "loss": 0.4569, - "step": 2195 - }, - { - "epoch": 0.14352003137049865, - "grad_norm": 0.49302685260772705, - "learning_rate": 9.980129100034473e-06, - "loss": 0.4065, - "step": 2196 - }, - { - "epoch": 0.14358538657604078, - "grad_norm": 0.5190097689628601, - "learning_rate": 9.98009798729799e-06, - "loss": 0.4696, - "step": 2197 - }, - { - "epoch": 0.1436507417815829, - "grad_norm": 0.48809418082237244, - "learning_rate": 9.980066850271844e-06, - "loss": 0.4064, - "step": 2198 - }, - { - "epoch": 0.14371609698712504, - "grad_norm": 0.5275015830993652, - "learning_rate": 9.980035688956186e-06, - "loss": 0.446, - "step": 2199 - }, - { - "epoch": 0.14378145219266714, - "grad_norm": 0.4656153619289398, - "learning_rate": 9.98000450335117e-06, - "loss": 0.4146, - "step": 2200 - }, - { - "epoch": 0.14384680739820926, - "grad_norm": 0.5004518628120422, - "learning_rate": 9.979973293456949e-06, - "loss": 0.3737, - "step": 2201 - }, - { - "epoch": 0.1439121626037514, - "grad_norm": 0.5236954689025879, - "learning_rate": 9.979942059273669e-06, - "loss": 0.4274, - "step": 2202 - }, - { - "epoch": 0.14397751780929352, - "grad_norm": 0.5039278268814087, - "learning_rate": 9.979910800801489e-06, - "loss": 0.4355, - "step": 2203 - }, - { - "epoch": 0.14404287301483562, - "grad_norm": 0.4495544135570526, - "learning_rate": 9.979879518040558e-06, - "loss": 0.3711, - "step": 2204 - }, - { - "epoch": 0.14410822822037775, - "grad_norm": 0.4917999804019928, - "learning_rate": 9.979848210991028e-06, - "loss": 0.413, - "step": 2205 - }, - { - "epoch": 0.14417358342591988, - "grad_norm": 0.5010198354721069, - "learning_rate": 9.979816879653054e-06, - "loss": 0.4008, - "step": 2206 - }, - { - "epoch": 0.144238938631462, - "grad_norm": 0.5320481657981873, - "learning_rate": 9.97978552402679e-06, - "loss": 0.4524, - "step": 2207 - }, - { - "epoch": 0.1443042938370041, - "grad_norm": 0.4922701120376587, - "learning_rate": 9.979754144112386e-06, - "loss": 0.3814, - "step": 2208 - }, - { - "epoch": 0.14436964904254623, - "grad_norm": 0.4877546429634094, - "learning_rate": 9.979722739909994e-06, - "loss": 0.4526, - "step": 2209 - }, - { - "epoch": 0.14443500424808836, - "grad_norm": 0.5020317435264587, - "learning_rate": 9.97969131141977e-06, - "loss": 0.4018, - "step": 2210 - }, - { - "epoch": 0.1445003594536305, - "grad_norm": 0.5045342445373535, - "learning_rate": 9.979659858641866e-06, - "loss": 0.4447, - "step": 2211 - }, - { - "epoch": 0.1445657146591726, - "grad_norm": 0.5243743062019348, - "learning_rate": 9.979628381576437e-06, - "loss": 0.4698, - "step": 2212 - }, - { - "epoch": 0.14463106986471472, - "grad_norm": 0.5178640484809875, - "learning_rate": 9.979596880223634e-06, - "loss": 0.3934, - "step": 2213 - }, - { - "epoch": 0.14469642507025685, - "grad_norm": 0.49646425247192383, - "learning_rate": 9.979565354583612e-06, - "loss": 0.4379, - "step": 2214 - }, - { - "epoch": 0.14476178027579897, - "grad_norm": 0.4796517789363861, - "learning_rate": 9.979533804656526e-06, - "loss": 0.4287, - "step": 2215 - }, - { - "epoch": 0.1448271354813411, - "grad_norm": 0.5299208164215088, - "learning_rate": 9.979502230442527e-06, - "loss": 0.4678, - "step": 2216 - }, - { - "epoch": 0.1448924906868832, - "grad_norm": 0.49109676480293274, - "learning_rate": 9.979470631941773e-06, - "loss": 0.4413, - "step": 2217 - }, - { - "epoch": 0.14495784589242533, - "grad_norm": 0.5628079771995544, - "learning_rate": 9.979439009154412e-06, - "loss": 0.4514, - "step": 2218 - }, - { - "epoch": 0.14502320109796746, - "grad_norm": 0.48290780186653137, - "learning_rate": 9.979407362080604e-06, - "loss": 0.414, - "step": 2219 - }, - { - "epoch": 0.14508855630350959, - "grad_norm": 0.521253228187561, - "learning_rate": 9.979375690720501e-06, - "loss": 0.4535, - "step": 2220 - }, - { - "epoch": 0.1451539115090517, - "grad_norm": 0.48355838656425476, - "learning_rate": 9.979343995074258e-06, - "loss": 0.4312, - "step": 2221 - }, - { - "epoch": 0.14521926671459381, - "grad_norm": 0.4963189363479614, - "learning_rate": 9.97931227514203e-06, - "loss": 0.4179, - "step": 2222 - }, - { - "epoch": 0.14528462192013594, - "grad_norm": 0.44014713168144226, - "learning_rate": 9.97928053092397e-06, - "loss": 0.3873, - "step": 2223 - }, - { - "epoch": 0.14534997712567807, - "grad_norm": 0.5127825140953064, - "learning_rate": 9.979248762420233e-06, - "loss": 0.479, - "step": 2224 - }, - { - "epoch": 0.14541533233122017, - "grad_norm": 0.5252187848091125, - "learning_rate": 9.979216969630976e-06, - "loss": 0.4282, - "step": 2225 - }, - { - "epoch": 0.1454806875367623, - "grad_norm": 0.5021201372146606, - "learning_rate": 9.979185152556353e-06, - "loss": 0.4307, - "step": 2226 - }, - { - "epoch": 0.14554604274230443, - "grad_norm": 0.5003250241279602, - "learning_rate": 9.979153311196519e-06, - "loss": 0.4427, - "step": 2227 - }, - { - "epoch": 0.14561139794784655, - "grad_norm": 0.4868018627166748, - "learning_rate": 9.979121445551629e-06, - "loss": 0.3816, - "step": 2228 - }, - { - "epoch": 0.14567675315338866, - "grad_norm": 0.49736616015434265, - "learning_rate": 9.979089555621838e-06, - "loss": 0.4107, - "step": 2229 - }, - { - "epoch": 0.14574210835893078, - "grad_norm": 0.5106949210166931, - "learning_rate": 9.979057641407303e-06, - "loss": 0.4051, - "step": 2230 - }, - { - "epoch": 0.1458074635644729, - "grad_norm": 0.4665818214416504, - "learning_rate": 9.979025702908181e-06, - "loss": 0.3796, - "step": 2231 - }, - { - "epoch": 0.14587281877001504, - "grad_norm": 0.4829004406929016, - "learning_rate": 9.978993740124623e-06, - "loss": 0.4271, - "step": 2232 - }, - { - "epoch": 0.14593817397555714, - "grad_norm": 0.5315292477607727, - "learning_rate": 9.978961753056789e-06, - "loss": 0.4555, - "step": 2233 - }, - { - "epoch": 0.14600352918109927, - "grad_norm": 0.530169665813446, - "learning_rate": 9.978929741704832e-06, - "loss": 0.435, - "step": 2234 - }, - { - "epoch": 0.1460688843866414, - "grad_norm": 0.5203970670700073, - "learning_rate": 9.978897706068911e-06, - "loss": 0.4168, - "step": 2235 - }, - { - "epoch": 0.14613423959218352, - "grad_norm": 0.46462902426719666, - "learning_rate": 9.97886564614918e-06, - "loss": 0.3774, - "step": 2236 - }, - { - "epoch": 0.14619959479772565, - "grad_norm": 0.5217627286911011, - "learning_rate": 9.978833561945798e-06, - "loss": 0.4302, - "step": 2237 - }, - { - "epoch": 0.14626495000326775, - "grad_norm": 0.4850352704524994, - "learning_rate": 9.97880145345892e-06, - "loss": 0.3647, - "step": 2238 - }, - { - "epoch": 0.14633030520880988, - "grad_norm": 0.501953661441803, - "learning_rate": 9.978769320688702e-06, - "loss": 0.4255, - "step": 2239 - }, - { - "epoch": 0.146395660414352, - "grad_norm": 0.5132482647895813, - "learning_rate": 9.978737163635301e-06, - "loss": 0.4256, - "step": 2240 - }, - { - "epoch": 0.14646101561989414, - "grad_norm": 0.5114203691482544, - "learning_rate": 9.978704982298875e-06, - "loss": 0.4578, - "step": 2241 - }, - { - "epoch": 0.14652637082543624, - "grad_norm": 0.5146781206130981, - "learning_rate": 9.978672776679578e-06, - "loss": 0.4518, - "step": 2242 - }, - { - "epoch": 0.14659172603097836, - "grad_norm": 0.4847266972064972, - "learning_rate": 9.97864054677757e-06, - "loss": 0.4169, - "step": 2243 - }, - { - "epoch": 0.1466570812365205, - "grad_norm": 0.5032507181167603, - "learning_rate": 9.978608292593007e-06, - "loss": 0.428, - "step": 2244 - }, - { - "epoch": 0.14672243644206262, - "grad_norm": 0.481487900018692, - "learning_rate": 9.978576014126047e-06, - "loss": 0.4075, - "step": 2245 - }, - { - "epoch": 0.14678779164760472, - "grad_norm": 0.541334867477417, - "learning_rate": 9.978543711376847e-06, - "loss": 0.4248, - "step": 2246 - }, - { - "epoch": 0.14685314685314685, - "grad_norm": 0.4564792215824127, - "learning_rate": 9.978511384345565e-06, - "loss": 0.3525, - "step": 2247 - }, - { - "epoch": 0.14691850205868898, - "grad_norm": 0.5100907683372498, - "learning_rate": 9.978479033032357e-06, - "loss": 0.4246, - "step": 2248 - }, - { - "epoch": 0.1469838572642311, - "grad_norm": 0.5410794019699097, - "learning_rate": 9.978446657437383e-06, - "loss": 0.5128, - "step": 2249 - }, - { - "epoch": 0.1470492124697732, - "grad_norm": 0.5061261653900146, - "learning_rate": 9.9784142575608e-06, - "loss": 0.379, - "step": 2250 - }, - { - "epoch": 0.14711456767531533, - "grad_norm": 0.5119841694831848, - "learning_rate": 9.978381833402766e-06, - "loss": 0.4102, - "step": 2251 - }, - { - "epoch": 0.14717992288085746, - "grad_norm": 0.4679925739765167, - "learning_rate": 9.97834938496344e-06, - "loss": 0.3811, - "step": 2252 - }, - { - "epoch": 0.1472452780863996, - "grad_norm": 0.48318007588386536, - "learning_rate": 9.978316912242978e-06, - "loss": 0.4114, - "step": 2253 - }, - { - "epoch": 0.1473106332919417, - "grad_norm": 0.4982635974884033, - "learning_rate": 9.97828441524154e-06, - "loss": 0.4602, - "step": 2254 - }, - { - "epoch": 0.14737598849748382, - "grad_norm": 0.495976060628891, - "learning_rate": 9.978251893959286e-06, - "loss": 0.3616, - "step": 2255 - }, - { - "epoch": 0.14744134370302595, - "grad_norm": 0.48419514298439026, - "learning_rate": 9.97821934839637e-06, - "loss": 0.4388, - "step": 2256 - }, - { - "epoch": 0.14750669890856807, - "grad_norm": 0.4629019796848297, - "learning_rate": 9.978186778552955e-06, - "loss": 0.4112, - "step": 2257 - }, - { - "epoch": 0.1475720541141102, - "grad_norm": 0.5135029554367065, - "learning_rate": 9.978154184429198e-06, - "loss": 0.4388, - "step": 2258 - }, - { - "epoch": 0.1476374093196523, - "grad_norm": 0.46955379843711853, - "learning_rate": 9.97812156602526e-06, - "loss": 0.3721, - "step": 2259 - }, - { - "epoch": 0.14770276452519443, - "grad_norm": 0.5143701434135437, - "learning_rate": 9.978088923341296e-06, - "loss": 0.429, - "step": 2260 - }, - { - "epoch": 0.14776811973073656, - "grad_norm": 0.49218401312828064, - "learning_rate": 9.97805625637747e-06, - "loss": 0.4341, - "step": 2261 - }, - { - "epoch": 0.14783347493627869, - "grad_norm": 0.512100338935852, - "learning_rate": 9.978023565133938e-06, - "loss": 0.4368, - "step": 2262 - }, - { - "epoch": 0.1478988301418208, - "grad_norm": 0.5482844710350037, - "learning_rate": 9.977990849610861e-06, - "loss": 0.474, - "step": 2263 - }, - { - "epoch": 0.14796418534736291, - "grad_norm": 0.5389716029167175, - "learning_rate": 9.977958109808396e-06, - "loss": 0.4738, - "step": 2264 - }, - { - "epoch": 0.14802954055290504, - "grad_norm": 0.4722362756729126, - "learning_rate": 9.977925345726707e-06, - "loss": 0.3908, - "step": 2265 - }, - { - "epoch": 0.14809489575844717, - "grad_norm": 0.5035797953605652, - "learning_rate": 9.977892557365953e-06, - "loss": 0.4336, - "step": 2266 - }, - { - "epoch": 0.14816025096398927, - "grad_norm": 0.49283385276794434, - "learning_rate": 9.97785974472629e-06, - "loss": 0.4481, - "step": 2267 - }, - { - "epoch": 0.1482256061695314, - "grad_norm": 0.48502737283706665, - "learning_rate": 9.977826907807882e-06, - "loss": 0.4387, - "step": 2268 - }, - { - "epoch": 0.14829096137507353, - "grad_norm": 0.5310791730880737, - "learning_rate": 9.977794046610886e-06, - "loss": 0.4555, - "step": 2269 - }, - { - "epoch": 0.14835631658061565, - "grad_norm": 0.5248449444770813, - "learning_rate": 9.977761161135465e-06, - "loss": 0.4808, - "step": 2270 - }, - { - "epoch": 0.14842167178615776, - "grad_norm": 0.5024116039276123, - "learning_rate": 9.977728251381777e-06, - "loss": 0.4468, - "step": 2271 - }, - { - "epoch": 0.14848702699169988, - "grad_norm": 0.4562566876411438, - "learning_rate": 9.977695317349986e-06, - "loss": 0.3714, - "step": 2272 - }, - { - "epoch": 0.148552382197242, - "grad_norm": 0.6429773569107056, - "learning_rate": 9.97766235904025e-06, - "loss": 0.4526, - "step": 2273 - }, - { - "epoch": 0.14861773740278414, - "grad_norm": 0.5034480690956116, - "learning_rate": 9.97762937645273e-06, - "loss": 0.4759, - "step": 2274 - }, - { - "epoch": 0.14868309260832624, - "grad_norm": 0.479637086391449, - "learning_rate": 9.977596369587585e-06, - "loss": 0.3865, - "step": 2275 - }, - { - "epoch": 0.14874844781386837, - "grad_norm": 0.516732394695282, - "learning_rate": 9.977563338444982e-06, - "loss": 0.4363, - "step": 2276 - }, - { - "epoch": 0.1488138030194105, - "grad_norm": 0.4789265990257263, - "learning_rate": 9.977530283025076e-06, - "loss": 0.3908, - "step": 2277 - }, - { - "epoch": 0.14887915822495262, - "grad_norm": 0.5140530467033386, - "learning_rate": 9.97749720332803e-06, - "loss": 0.4376, - "step": 2278 - }, - { - "epoch": 0.14894451343049475, - "grad_norm": 0.473240464925766, - "learning_rate": 9.977464099354006e-06, - "loss": 0.357, - "step": 2279 - }, - { - "epoch": 0.14900986863603685, - "grad_norm": 0.5226386785507202, - "learning_rate": 9.977430971103166e-06, - "loss": 0.4535, - "step": 2280 - }, - { - "epoch": 0.14907522384157898, - "grad_norm": 0.4788806736469269, - "learning_rate": 9.97739781857567e-06, - "loss": 0.4008, - "step": 2281 - }, - { - "epoch": 0.1491405790471211, - "grad_norm": 0.49951809644699097, - "learning_rate": 9.97736464177168e-06, - "loss": 0.4339, - "step": 2282 - }, - { - "epoch": 0.14920593425266324, - "grad_norm": 0.5056042075157166, - "learning_rate": 9.977331440691361e-06, - "loss": 0.4592, - "step": 2283 - }, - { - "epoch": 0.14927128945820534, - "grad_norm": 0.5108303427696228, - "learning_rate": 9.97729821533487e-06, - "loss": 0.4575, - "step": 2284 - }, - { - "epoch": 0.14933664466374746, - "grad_norm": 0.4839963912963867, - "learning_rate": 9.977264965702372e-06, - "loss": 0.3743, - "step": 2285 - }, - { - "epoch": 0.1494019998692896, - "grad_norm": 0.463399738073349, - "learning_rate": 9.977231691794027e-06, - "loss": 0.3404, - "step": 2286 - }, - { - "epoch": 0.14946735507483172, - "grad_norm": 0.5017335414886475, - "learning_rate": 9.977198393610001e-06, - "loss": 0.4269, - "step": 2287 - }, - { - "epoch": 0.14953271028037382, - "grad_norm": 0.47880029678344727, - "learning_rate": 9.977165071150453e-06, - "loss": 0.4256, - "step": 2288 - }, - { - "epoch": 0.14959806548591595, - "grad_norm": 0.5520682334899902, - "learning_rate": 9.977131724415548e-06, - "loss": 0.4682, - "step": 2289 - }, - { - "epoch": 0.14966342069145808, - "grad_norm": 0.5078201293945312, - "learning_rate": 9.977098353405445e-06, - "loss": 0.4772, - "step": 2290 - }, - { - "epoch": 0.1497287758970002, - "grad_norm": 0.49950456619262695, - "learning_rate": 9.97706495812031e-06, - "loss": 0.4307, - "step": 2291 - }, - { - "epoch": 0.1497941311025423, - "grad_norm": 0.5226721167564392, - "learning_rate": 9.977031538560305e-06, - "loss": 0.4842, - "step": 2292 - }, - { - "epoch": 0.14985948630808443, - "grad_norm": 0.5184908509254456, - "learning_rate": 9.976998094725594e-06, - "loss": 0.4977, - "step": 2293 - }, - { - "epoch": 0.14992484151362656, - "grad_norm": 0.49716198444366455, - "learning_rate": 9.976964626616339e-06, - "loss": 0.457, - "step": 2294 - }, - { - "epoch": 0.1499901967191687, - "grad_norm": 0.48878955841064453, - "learning_rate": 9.976931134232702e-06, - "loss": 0.4397, - "step": 2295 - }, - { - "epoch": 0.1500555519247108, - "grad_norm": 1.3947206735610962, - "learning_rate": 9.976897617574849e-06, - "loss": 0.4807, - "step": 2296 - }, - { - "epoch": 0.15012090713025292, - "grad_norm": 0.4955795109272003, - "learning_rate": 9.97686407664294e-06, - "loss": 0.4134, - "step": 2297 - }, - { - "epoch": 0.15018626233579505, - "grad_norm": 0.5125658512115479, - "learning_rate": 9.976830511437143e-06, - "loss": 0.4344, - "step": 2298 - }, - { - "epoch": 0.15025161754133717, - "grad_norm": 0.5206575989723206, - "learning_rate": 9.976796921957619e-06, - "loss": 0.4545, - "step": 2299 - }, - { - "epoch": 0.1503169727468793, - "grad_norm": 0.518939197063446, - "learning_rate": 9.976763308204532e-06, - "loss": 0.4508, - "step": 2300 - }, - { - "epoch": 0.1503823279524214, - "grad_norm": 0.5151715278625488, - "learning_rate": 9.976729670178046e-06, - "loss": 0.4028, - "step": 2301 - }, - { - "epoch": 0.15044768315796353, - "grad_norm": 0.5385341048240662, - "learning_rate": 9.976696007878326e-06, - "loss": 0.4299, - "step": 2302 - }, - { - "epoch": 0.15051303836350566, - "grad_norm": 0.531333327293396, - "learning_rate": 9.976662321305535e-06, - "loss": 0.4172, - "step": 2303 - }, - { - "epoch": 0.15057839356904779, - "grad_norm": 0.48247775435447693, - "learning_rate": 9.976628610459837e-06, - "loss": 0.4387, - "step": 2304 - }, - { - "epoch": 0.1506437487745899, - "grad_norm": 0.5362417101860046, - "learning_rate": 9.976594875341399e-06, - "loss": 0.4274, - "step": 2305 - }, - { - "epoch": 0.15070910398013201, - "grad_norm": 0.5025076866149902, - "learning_rate": 9.976561115950383e-06, - "loss": 0.4143, - "step": 2306 - }, - { - "epoch": 0.15077445918567414, - "grad_norm": 0.475013792514801, - "learning_rate": 9.976527332286953e-06, - "loss": 0.4127, - "step": 2307 - }, - { - "epoch": 0.15083981439121627, - "grad_norm": 0.4805130958557129, - "learning_rate": 9.976493524351276e-06, - "loss": 0.3979, - "step": 2308 - }, - { - "epoch": 0.15090516959675837, - "grad_norm": 0.5044189691543579, - "learning_rate": 9.976459692143516e-06, - "loss": 0.4128, - "step": 2309 - }, - { - "epoch": 0.1509705248023005, - "grad_norm": 0.4822445809841156, - "learning_rate": 9.976425835663839e-06, - "loss": 0.3999, - "step": 2310 - }, - { - "epoch": 0.15103588000784263, - "grad_norm": 0.5312120914459229, - "learning_rate": 9.97639195491241e-06, - "loss": 0.4142, - "step": 2311 - }, - { - "epoch": 0.15110123521338475, - "grad_norm": 0.4583929181098938, - "learning_rate": 9.976358049889392e-06, - "loss": 0.3819, - "step": 2312 - }, - { - "epoch": 0.15116659041892685, - "grad_norm": 0.51595538854599, - "learning_rate": 9.97632412059495e-06, - "loss": 0.3849, - "step": 2313 - }, - { - "epoch": 0.15123194562446898, - "grad_norm": 0.5065730214118958, - "learning_rate": 9.976290167029255e-06, - "loss": 0.4276, - "step": 2314 - }, - { - "epoch": 0.1512973008300111, - "grad_norm": 0.482515424489975, - "learning_rate": 9.976256189192465e-06, - "loss": 0.4047, - "step": 2315 - }, - { - "epoch": 0.15136265603555324, - "grad_norm": 0.4882115423679352, - "learning_rate": 9.976222187084753e-06, - "loss": 0.4058, - "step": 2316 - }, - { - "epoch": 0.15142801124109534, - "grad_norm": 0.4734536111354828, - "learning_rate": 9.97618816070628e-06, - "loss": 0.4117, - "step": 2317 - }, - { - "epoch": 0.15149336644663747, - "grad_norm": 0.5062746405601501, - "learning_rate": 9.976154110057214e-06, - "loss": 0.3693, - "step": 2318 - }, - { - "epoch": 0.1515587216521796, - "grad_norm": 0.4745713174343109, - "learning_rate": 9.976120035137719e-06, - "loss": 0.427, - "step": 2319 - }, - { - "epoch": 0.15162407685772172, - "grad_norm": 0.5089232325553894, - "learning_rate": 9.976085935947966e-06, - "loss": 0.4183, - "step": 2320 - }, - { - "epoch": 0.15168943206326385, - "grad_norm": 0.5024188160896301, - "learning_rate": 9.976051812488115e-06, - "loss": 0.4173, - "step": 2321 - }, - { - "epoch": 0.15175478726880595, - "grad_norm": 0.5488912463188171, - "learning_rate": 9.976017664758335e-06, - "loss": 0.4848, - "step": 2322 - }, - { - "epoch": 0.15182014247434808, - "grad_norm": 0.49083489179611206, - "learning_rate": 9.975983492758796e-06, - "loss": 0.4546, - "step": 2323 - }, - { - "epoch": 0.1518854976798902, - "grad_norm": 0.4858939051628113, - "learning_rate": 9.97594929648966e-06, - "loss": 0.456, - "step": 2324 - }, - { - "epoch": 0.15195085288543234, - "grad_norm": 0.5023922920227051, - "learning_rate": 9.975915075951095e-06, - "loss": 0.4422, - "step": 2325 - }, - { - "epoch": 0.15201620809097444, - "grad_norm": 0.46519041061401367, - "learning_rate": 9.975880831143267e-06, - "loss": 0.3725, - "step": 2326 - }, - { - "epoch": 0.15208156329651656, - "grad_norm": 0.4633754789829254, - "learning_rate": 9.975846562066347e-06, - "loss": 0.3691, - "step": 2327 - }, - { - "epoch": 0.1521469185020587, - "grad_norm": 0.5417261719703674, - "learning_rate": 9.9758122687205e-06, - "loss": 0.4732, - "step": 2328 - }, - { - "epoch": 0.15221227370760082, - "grad_norm": 0.509924590587616, - "learning_rate": 9.975777951105889e-06, - "loss": 0.473, - "step": 2329 - }, - { - "epoch": 0.15227762891314292, - "grad_norm": 0.500942051410675, - "learning_rate": 9.975743609222689e-06, - "loss": 0.4007, - "step": 2330 - }, - { - "epoch": 0.15234298411868505, - "grad_norm": 0.49725082516670227, - "learning_rate": 9.97570924307106e-06, - "loss": 0.4541, - "step": 2331 - }, - { - "epoch": 0.15240833932422718, - "grad_norm": 0.48673009872436523, - "learning_rate": 9.975674852651177e-06, - "loss": 0.4245, - "step": 2332 - }, - { - "epoch": 0.1524736945297693, - "grad_norm": 0.48869961500167847, - "learning_rate": 9.975640437963203e-06, - "loss": 0.3738, - "step": 2333 - }, - { - "epoch": 0.1525390497353114, - "grad_norm": 0.508194088935852, - "learning_rate": 9.975605999007306e-06, - "loss": 0.4185, - "step": 2334 - }, - { - "epoch": 0.15260440494085353, - "grad_norm": 0.5041077136993408, - "learning_rate": 9.975571535783654e-06, - "loss": 0.3882, - "step": 2335 - }, - { - "epoch": 0.15266976014639566, - "grad_norm": 0.5077170133590698, - "learning_rate": 9.975537048292419e-06, - "loss": 0.4299, - "step": 2336 - }, - { - "epoch": 0.1527351153519378, - "grad_norm": 0.496986448764801, - "learning_rate": 9.975502536533762e-06, - "loss": 0.3915, - "step": 2337 - }, - { - "epoch": 0.15280047055747992, - "grad_norm": 0.5121133327484131, - "learning_rate": 9.97546800050786e-06, - "loss": 0.4435, - "step": 2338 - }, - { - "epoch": 0.15286582576302202, - "grad_norm": 0.48407822847366333, - "learning_rate": 9.975433440214874e-06, - "loss": 0.3995, - "step": 2339 - }, - { - "epoch": 0.15293118096856415, - "grad_norm": 0.5398458242416382, - "learning_rate": 9.975398855654976e-06, - "loss": 0.4322, - "step": 2340 - }, - { - "epoch": 0.15299653617410627, - "grad_norm": 0.43759775161743164, - "learning_rate": 9.975364246828334e-06, - "loss": 0.3736, - "step": 2341 - }, - { - "epoch": 0.1530618913796484, - "grad_norm": 0.5048732757568359, - "learning_rate": 9.975329613735117e-06, - "loss": 0.4444, - "step": 2342 - }, - { - "epoch": 0.1531272465851905, - "grad_norm": 0.453186571598053, - "learning_rate": 9.975294956375495e-06, - "loss": 0.3525, - "step": 2343 - }, - { - "epoch": 0.15319260179073263, - "grad_norm": 0.5275481939315796, - "learning_rate": 9.975260274749632e-06, - "loss": 0.4464, - "step": 2344 - }, - { - "epoch": 0.15325795699627476, - "grad_norm": 0.5292560458183289, - "learning_rate": 9.975225568857704e-06, - "loss": 0.4778, - "step": 2345 - }, - { - "epoch": 0.15332331220181689, - "grad_norm": 0.4985980689525604, - "learning_rate": 9.975190838699878e-06, - "loss": 0.4261, - "step": 2346 - }, - { - "epoch": 0.153388667407359, - "grad_norm": 0.4822975695133209, - "learning_rate": 9.975156084276323e-06, - "loss": 0.4166, - "step": 2347 - }, - { - "epoch": 0.15345402261290111, - "grad_norm": 0.5165229439735413, - "learning_rate": 9.975121305587205e-06, - "loss": 0.4462, - "step": 2348 - }, - { - "epoch": 0.15351937781844324, - "grad_norm": 0.49926477670669556, - "learning_rate": 9.9750865026327e-06, - "loss": 0.4324, - "step": 2349 - }, - { - "epoch": 0.15358473302398537, - "grad_norm": 0.5210674405097961, - "learning_rate": 9.975051675412974e-06, - "loss": 0.4622, - "step": 2350 - }, - { - "epoch": 0.15365008822952747, - "grad_norm": 0.5540010333061218, - "learning_rate": 9.975016823928196e-06, - "loss": 0.4629, - "step": 2351 - }, - { - "epoch": 0.1537154434350696, - "grad_norm": 0.5138182640075684, - "learning_rate": 9.974981948178539e-06, - "loss": 0.4563, - "step": 2352 - }, - { - "epoch": 0.15378079864061173, - "grad_norm": 0.47402670979499817, - "learning_rate": 9.97494704816417e-06, - "loss": 0.4038, - "step": 2353 - }, - { - "epoch": 0.15384615384615385, - "grad_norm": 0.49794459342956543, - "learning_rate": 9.974912123885263e-06, - "loss": 0.4349, - "step": 2354 - }, - { - "epoch": 0.15391150905169595, - "grad_norm": 0.5229313373565674, - "learning_rate": 9.974877175341984e-06, - "loss": 0.4185, - "step": 2355 - }, - { - "epoch": 0.15397686425723808, - "grad_norm": 0.5404794812202454, - "learning_rate": 9.974842202534507e-06, - "loss": 0.4551, - "step": 2356 - }, - { - "epoch": 0.1540422194627802, - "grad_norm": 0.5233075022697449, - "learning_rate": 9.974807205463e-06, - "loss": 0.4446, - "step": 2357 - }, - { - "epoch": 0.15410757466832234, - "grad_norm": 0.5049179196357727, - "learning_rate": 9.974772184127636e-06, - "loss": 0.4438, - "step": 2358 - }, - { - "epoch": 0.15417292987386447, - "grad_norm": 0.6475977897644043, - "learning_rate": 9.974737138528584e-06, - "loss": 0.4383, - "step": 2359 - }, - { - "epoch": 0.15423828507940657, - "grad_norm": 0.5115492343902588, - "learning_rate": 9.974702068666015e-06, - "loss": 0.4222, - "step": 2360 - }, - { - "epoch": 0.1543036402849487, - "grad_norm": 0.506045937538147, - "learning_rate": 9.974666974540101e-06, - "loss": 0.4222, - "step": 2361 - }, - { - "epoch": 0.15436899549049082, - "grad_norm": 0.5062224268913269, - "learning_rate": 9.974631856151014e-06, - "loss": 0.4477, - "step": 2362 - }, - { - "epoch": 0.15443435069603295, - "grad_norm": 0.5204629898071289, - "learning_rate": 9.974596713498921e-06, - "loss": 0.4422, - "step": 2363 - }, - { - "epoch": 0.15449970590157505, - "grad_norm": 0.4689907133579254, - "learning_rate": 9.974561546583999e-06, - "loss": 0.3657, - "step": 2364 - }, - { - "epoch": 0.15456506110711718, - "grad_norm": 0.5130204558372498, - "learning_rate": 9.974526355406417e-06, - "loss": 0.3866, - "step": 2365 - }, - { - "epoch": 0.1546304163126593, - "grad_norm": 0.5153915286064148, - "learning_rate": 9.974491139966346e-06, - "loss": 0.4234, - "step": 2366 - }, - { - "epoch": 0.15469577151820144, - "grad_norm": 0.49781179428100586, - "learning_rate": 9.974455900263958e-06, - "loss": 0.4006, - "step": 2367 - }, - { - "epoch": 0.15476112672374354, - "grad_norm": 0.5339179635047913, - "learning_rate": 9.974420636299427e-06, - "loss": 0.4446, - "step": 2368 - }, - { - "epoch": 0.15482648192928566, - "grad_norm": 0.5743839144706726, - "learning_rate": 9.97438534807292e-06, - "loss": 0.4957, - "step": 2369 - }, - { - "epoch": 0.1548918371348278, - "grad_norm": 0.5327855348587036, - "learning_rate": 9.974350035584615e-06, - "loss": 0.4267, - "step": 2370 - }, - { - "epoch": 0.15495719234036992, - "grad_norm": 0.5445069670677185, - "learning_rate": 9.974314698834682e-06, - "loss": 0.4871, - "step": 2371 - }, - { - "epoch": 0.15502254754591202, - "grad_norm": 0.5317440032958984, - "learning_rate": 9.97427933782329e-06, - "loss": 0.4293, - "step": 2372 - }, - { - "epoch": 0.15508790275145415, - "grad_norm": 0.514540433883667, - "learning_rate": 9.974243952550617e-06, - "loss": 0.382, - "step": 2373 - }, - { - "epoch": 0.15515325795699628, - "grad_norm": 0.46227580308914185, - "learning_rate": 9.974208543016833e-06, - "loss": 0.3745, - "step": 2374 - }, - { - "epoch": 0.1552186131625384, - "grad_norm": 0.48030710220336914, - "learning_rate": 9.97417310922211e-06, - "loss": 0.3821, - "step": 2375 - }, - { - "epoch": 0.1552839683680805, - "grad_norm": 0.5295577049255371, - "learning_rate": 9.97413765116662e-06, - "loss": 0.4289, - "step": 2376 - }, - { - "epoch": 0.15534932357362263, - "grad_norm": 0.49830979108810425, - "learning_rate": 9.97410216885054e-06, - "loss": 0.358, - "step": 2377 - }, - { - "epoch": 0.15541467877916476, - "grad_norm": 0.54817795753479, - "learning_rate": 9.974066662274038e-06, - "loss": 0.4759, - "step": 2378 - }, - { - "epoch": 0.1554800339847069, - "grad_norm": 0.47363972663879395, - "learning_rate": 9.974031131437292e-06, - "loss": 0.3983, - "step": 2379 - }, - { - "epoch": 0.15554538919024902, - "grad_norm": 0.5195755958557129, - "learning_rate": 9.973995576340471e-06, - "loss": 0.441, - "step": 2380 - }, - { - "epoch": 0.15561074439579112, - "grad_norm": 0.477298378944397, - "learning_rate": 9.973959996983753e-06, - "loss": 0.4067, - "step": 2381 - }, - { - "epoch": 0.15567609960133325, - "grad_norm": 0.5366641879081726, - "learning_rate": 9.973924393367307e-06, - "loss": 0.4643, - "step": 2382 - }, - { - "epoch": 0.15574145480687537, - "grad_norm": 0.5306047201156616, - "learning_rate": 9.97388876549131e-06, - "loss": 0.446, - "step": 2383 - }, - { - "epoch": 0.1558068100124175, - "grad_norm": 0.5169602036476135, - "learning_rate": 9.973853113355933e-06, - "loss": 0.4324, - "step": 2384 - }, - { - "epoch": 0.1558721652179596, - "grad_norm": 0.4701854884624481, - "learning_rate": 9.973817436961352e-06, - "loss": 0.396, - "step": 2385 - }, - { - "epoch": 0.15593752042350173, - "grad_norm": 0.5466340780258179, - "learning_rate": 9.973781736307739e-06, - "loss": 0.4145, - "step": 2386 - }, - { - "epoch": 0.15600287562904386, - "grad_norm": 0.4791712164878845, - "learning_rate": 9.973746011395271e-06, - "loss": 0.3664, - "step": 2387 - }, - { - "epoch": 0.15606823083458599, - "grad_norm": 0.5368568301200867, - "learning_rate": 9.97371026222412e-06, - "loss": 0.436, - "step": 2388 - }, - { - "epoch": 0.1561335860401281, - "grad_norm": 0.5561971664428711, - "learning_rate": 9.973674488794462e-06, - "loss": 0.4419, - "step": 2389 - }, - { - "epoch": 0.15619894124567021, - "grad_norm": 0.45606914162635803, - "learning_rate": 9.973638691106468e-06, - "loss": 0.3394, - "step": 2390 - }, - { - "epoch": 0.15626429645121234, - "grad_norm": 0.4836057126522064, - "learning_rate": 9.973602869160317e-06, - "loss": 0.4044, - "step": 2391 - }, - { - "epoch": 0.15632965165675447, - "grad_norm": 0.525128960609436, - "learning_rate": 9.97356702295618e-06, - "loss": 0.4277, - "step": 2392 - }, - { - "epoch": 0.15639500686229657, - "grad_norm": 0.5319295525550842, - "learning_rate": 9.973531152494237e-06, - "loss": 0.4373, - "step": 2393 - }, - { - "epoch": 0.1564603620678387, - "grad_norm": 0.5011122822761536, - "learning_rate": 9.973495257774657e-06, - "loss": 0.4233, - "step": 2394 - }, - { - "epoch": 0.15652571727338083, - "grad_norm": 0.5131736993789673, - "learning_rate": 9.973459338797619e-06, - "loss": 0.4591, - "step": 2395 - }, - { - "epoch": 0.15659107247892295, - "grad_norm": 0.495587021112442, - "learning_rate": 9.973423395563295e-06, - "loss": 0.4431, - "step": 2396 - }, - { - "epoch": 0.15665642768446505, - "grad_norm": 0.5457156896591187, - "learning_rate": 9.973387428071864e-06, - "loss": 0.461, - "step": 2397 - }, - { - "epoch": 0.15672178289000718, - "grad_norm": 0.49077412486076355, - "learning_rate": 9.973351436323498e-06, - "loss": 0.4373, - "step": 2398 - }, - { - "epoch": 0.1567871380955493, - "grad_norm": 0.4816787838935852, - "learning_rate": 9.973315420318375e-06, - "loss": 0.4248, - "step": 2399 - }, - { - "epoch": 0.15685249330109144, - "grad_norm": 0.468403160572052, - "learning_rate": 9.973279380056669e-06, - "loss": 0.3655, - "step": 2400 - }, - { - "epoch": 0.15691784850663357, - "grad_norm": 0.4903174340724945, - "learning_rate": 9.973243315538559e-06, - "loss": 0.3717, - "step": 2401 - }, - { - "epoch": 0.15698320371217567, - "grad_norm": 0.5325028300285339, - "learning_rate": 9.973207226764215e-06, - "loss": 0.4761, - "step": 2402 - }, - { - "epoch": 0.1570485589177178, - "grad_norm": 0.5138307213783264, - "learning_rate": 9.97317111373382e-06, - "loss": 0.4406, - "step": 2403 - }, - { - "epoch": 0.15711391412325992, - "grad_norm": 0.48092320561408997, - "learning_rate": 9.973134976447543e-06, - "loss": 0.4219, - "step": 2404 - }, - { - "epoch": 0.15717926932880205, - "grad_norm": 0.4826708436012268, - "learning_rate": 9.973098814905566e-06, - "loss": 0.4325, - "step": 2405 - }, - { - "epoch": 0.15724462453434415, - "grad_norm": 0.4795357584953308, - "learning_rate": 9.973062629108064e-06, - "loss": 0.3773, - "step": 2406 - }, - { - "epoch": 0.15730997973988628, - "grad_norm": 0.46927645802497864, - "learning_rate": 9.97302641905521e-06, - "loss": 0.4059, - "step": 2407 - }, - { - "epoch": 0.1573753349454284, - "grad_norm": 0.5348656177520752, - "learning_rate": 9.972990184747185e-06, - "loss": 0.4843, - "step": 2408 - }, - { - "epoch": 0.15744069015097054, - "grad_norm": 0.48582735657691956, - "learning_rate": 9.972953926184164e-06, - "loss": 0.4215, - "step": 2409 - }, - { - "epoch": 0.15750604535651264, - "grad_norm": 0.5237491130828857, - "learning_rate": 9.972917643366325e-06, - "loss": 0.478, - "step": 2410 - }, - { - "epoch": 0.15757140056205476, - "grad_norm": 0.47684186697006226, - "learning_rate": 9.97288133629384e-06, - "loss": 0.4309, - "step": 2411 - }, - { - "epoch": 0.1576367557675969, - "grad_norm": 0.4899986982345581, - "learning_rate": 9.972845004966895e-06, - "loss": 0.402, - "step": 2412 - }, - { - "epoch": 0.15770211097313902, - "grad_norm": 0.48216915130615234, - "learning_rate": 9.972808649385658e-06, - "loss": 0.4335, - "step": 2413 - }, - { - "epoch": 0.15776746617868112, - "grad_norm": 0.49452242255210876, - "learning_rate": 9.972772269550313e-06, - "loss": 0.4367, - "step": 2414 - }, - { - "epoch": 0.15783282138422325, - "grad_norm": 0.515508770942688, - "learning_rate": 9.972735865461034e-06, - "loss": 0.4591, - "step": 2415 - }, - { - "epoch": 0.15789817658976538, - "grad_norm": 0.48209694027900696, - "learning_rate": 9.972699437118e-06, - "loss": 0.4206, - "step": 2416 - }, - { - "epoch": 0.1579635317953075, - "grad_norm": 0.4766177535057068, - "learning_rate": 9.972662984521388e-06, - "loss": 0.3985, - "step": 2417 - }, - { - "epoch": 0.1580288870008496, - "grad_norm": 0.511594831943512, - "learning_rate": 9.972626507671375e-06, - "loss": 0.4561, - "step": 2418 - }, - { - "epoch": 0.15809424220639173, - "grad_norm": 0.4937410354614258, - "learning_rate": 9.972590006568142e-06, - "loss": 0.392, - "step": 2419 - }, - { - "epoch": 0.15815959741193386, - "grad_norm": 0.479414165019989, - "learning_rate": 9.972553481211862e-06, - "loss": 0.4053, - "step": 2420 - }, - { - "epoch": 0.158224952617476, - "grad_norm": 0.5139066576957703, - "learning_rate": 9.972516931602718e-06, - "loss": 0.4514, - "step": 2421 - }, - { - "epoch": 0.15829030782301812, - "grad_norm": 0.5182710886001587, - "learning_rate": 9.972480357740886e-06, - "loss": 0.4503, - "step": 2422 - }, - { - "epoch": 0.15835566302856022, - "grad_norm": 0.5157301425933838, - "learning_rate": 9.972443759626544e-06, - "loss": 0.4582, - "step": 2423 - }, - { - "epoch": 0.15842101823410235, - "grad_norm": 0.48315101861953735, - "learning_rate": 9.972407137259872e-06, - "loss": 0.3745, - "step": 2424 - }, - { - "epoch": 0.15848637343964447, - "grad_norm": 0.5033113956451416, - "learning_rate": 9.972370490641047e-06, - "loss": 0.3872, - "step": 2425 - }, - { - "epoch": 0.1585517286451866, - "grad_norm": 0.4903390407562256, - "learning_rate": 9.972333819770248e-06, - "loss": 0.394, - "step": 2426 - }, - { - "epoch": 0.1586170838507287, - "grad_norm": 0.4640309512615204, - "learning_rate": 9.972297124647657e-06, - "loss": 0.4091, - "step": 2427 - }, - { - "epoch": 0.15868243905627083, - "grad_norm": 0.48285433650016785, - "learning_rate": 9.972260405273447e-06, - "loss": 0.4245, - "step": 2428 - }, - { - "epoch": 0.15874779426181296, - "grad_norm": 0.5143033862113953, - "learning_rate": 9.972223661647802e-06, - "loss": 0.4609, - "step": 2429 - }, - { - "epoch": 0.15881314946735509, - "grad_norm": 0.47964897751808167, - "learning_rate": 9.972186893770901e-06, - "loss": 0.3741, - "step": 2430 - }, - { - "epoch": 0.1588785046728972, - "grad_norm": 0.5177162289619446, - "learning_rate": 9.97215010164292e-06, - "loss": 0.4548, - "step": 2431 - }, - { - "epoch": 0.15894385987843931, - "grad_norm": 0.501973032951355, - "learning_rate": 9.972113285264042e-06, - "loss": 0.4082, - "step": 2432 - }, - { - "epoch": 0.15900921508398144, - "grad_norm": 0.45796096324920654, - "learning_rate": 9.972076444634443e-06, - "loss": 0.3939, - "step": 2433 - }, - { - "epoch": 0.15907457028952357, - "grad_norm": 0.48177218437194824, - "learning_rate": 9.972039579754305e-06, - "loss": 0.3938, - "step": 2434 - }, - { - "epoch": 0.15913992549506567, - "grad_norm": 0.4820548892021179, - "learning_rate": 9.97200269062381e-06, - "loss": 0.4491, - "step": 2435 - }, - { - "epoch": 0.1592052807006078, - "grad_norm": 0.4811043441295624, - "learning_rate": 9.971965777243133e-06, - "loss": 0.3995, - "step": 2436 - }, - { - "epoch": 0.15927063590614993, - "grad_norm": 0.8044670820236206, - "learning_rate": 9.971928839612458e-06, - "loss": 0.4068, - "step": 2437 - }, - { - "epoch": 0.15933599111169205, - "grad_norm": 0.5364180207252502, - "learning_rate": 9.971891877731961e-06, - "loss": 0.4235, - "step": 2438 - }, - { - "epoch": 0.15940134631723415, - "grad_norm": 0.4931538999080658, - "learning_rate": 9.971854891601828e-06, - "loss": 0.3894, - "step": 2439 - }, - { - "epoch": 0.15946670152277628, - "grad_norm": 0.5401843786239624, - "learning_rate": 9.971817881222234e-06, - "loss": 0.4618, - "step": 2440 - }, - { - "epoch": 0.1595320567283184, - "grad_norm": 0.4642857015132904, - "learning_rate": 9.971780846593363e-06, - "loss": 0.3731, - "step": 2441 - }, - { - "epoch": 0.15959741193386054, - "grad_norm": 0.5497761964797974, - "learning_rate": 9.971743787715393e-06, - "loss": 0.4824, - "step": 2442 - }, - { - "epoch": 0.15966276713940267, - "grad_norm": 0.5208721160888672, - "learning_rate": 9.971706704588509e-06, - "loss": 0.4415, - "step": 2443 - }, - { - "epoch": 0.15972812234494477, - "grad_norm": 0.45102331042289734, - "learning_rate": 9.971669597212887e-06, - "loss": 0.3535, - "step": 2444 - }, - { - "epoch": 0.1597934775504869, - "grad_norm": 0.5041860342025757, - "learning_rate": 9.971632465588709e-06, - "loss": 0.4654, - "step": 2445 - }, - { - "epoch": 0.15985883275602902, - "grad_norm": 0.5482191443443298, - "learning_rate": 9.97159530971616e-06, - "loss": 0.4445, - "step": 2446 - }, - { - "epoch": 0.15992418796157115, - "grad_norm": 0.5334155559539795, - "learning_rate": 9.971558129595415e-06, - "loss": 0.4317, - "step": 2447 - }, - { - "epoch": 0.15998954316711325, - "grad_norm": 0.4832179546356201, - "learning_rate": 9.971520925226662e-06, - "loss": 0.4177, - "step": 2448 - }, - { - "epoch": 0.16005489837265538, - "grad_norm": 0.5031760931015015, - "learning_rate": 9.971483696610078e-06, - "loss": 0.4143, - "step": 2449 - }, - { - "epoch": 0.1601202535781975, - "grad_norm": 0.49227967858314514, - "learning_rate": 9.971446443745845e-06, - "loss": 0.4254, - "step": 2450 - }, - { - "epoch": 0.16018560878373964, - "grad_norm": 0.5087870359420776, - "learning_rate": 9.971409166634144e-06, - "loss": 0.4446, - "step": 2451 - }, - { - "epoch": 0.16025096398928174, - "grad_norm": 0.5398125648498535, - "learning_rate": 9.971371865275162e-06, - "loss": 0.4469, - "step": 2452 - }, - { - "epoch": 0.16031631919482386, - "grad_norm": 0.4663192629814148, - "learning_rate": 9.971334539669075e-06, - "loss": 0.3717, - "step": 2453 - }, - { - "epoch": 0.160381674400366, - "grad_norm": 0.4872797429561615, - "learning_rate": 9.971297189816068e-06, - "loss": 0.3952, - "step": 2454 - }, - { - "epoch": 0.16044702960590812, - "grad_norm": 0.4967391788959503, - "learning_rate": 9.971259815716322e-06, - "loss": 0.388, - "step": 2455 - }, - { - "epoch": 0.16051238481145022, - "grad_norm": 0.4867062568664551, - "learning_rate": 9.971222417370019e-06, - "loss": 0.3991, - "step": 2456 - }, - { - "epoch": 0.16057774001699235, - "grad_norm": 0.47665491700172424, - "learning_rate": 9.971184994777342e-06, - "loss": 0.3841, - "step": 2457 - }, - { - "epoch": 0.16064309522253448, - "grad_norm": 0.5055465698242188, - "learning_rate": 9.971147547938475e-06, - "loss": 0.3978, - "step": 2458 - }, - { - "epoch": 0.1607084504280766, - "grad_norm": 0.4906710386276245, - "learning_rate": 9.971110076853599e-06, - "loss": 0.3905, - "step": 2459 - }, - { - "epoch": 0.1607738056336187, - "grad_norm": 0.486117959022522, - "learning_rate": 9.971072581522894e-06, - "loss": 0.3927, - "step": 2460 - }, - { - "epoch": 0.16083916083916083, - "grad_norm": 0.4956723749637604, - "learning_rate": 9.971035061946549e-06, - "loss": 0.4235, - "step": 2461 - }, - { - "epoch": 0.16090451604470296, - "grad_norm": 0.4936509132385254, - "learning_rate": 9.970997518124743e-06, - "loss": 0.4027, - "step": 2462 - }, - { - "epoch": 0.1609698712502451, - "grad_norm": 0.47628098726272583, - "learning_rate": 9.97095995005766e-06, - "loss": 0.4145, - "step": 2463 - }, - { - "epoch": 0.16103522645578722, - "grad_norm": 0.5037341713905334, - "learning_rate": 9.970922357745483e-06, - "loss": 0.433, - "step": 2464 - }, - { - "epoch": 0.16110058166132932, - "grad_norm": 0.521263599395752, - "learning_rate": 9.970884741188397e-06, - "loss": 0.4394, - "step": 2465 - }, - { - "epoch": 0.16116593686687145, - "grad_norm": 0.4817979633808136, - "learning_rate": 9.970847100386582e-06, - "loss": 0.4414, - "step": 2466 - }, - { - "epoch": 0.16123129207241357, - "grad_norm": 0.4828839600086212, - "learning_rate": 9.970809435340226e-06, - "loss": 0.4241, - "step": 2467 - }, - { - "epoch": 0.1612966472779557, - "grad_norm": 0.5036912560462952, - "learning_rate": 9.970771746049508e-06, - "loss": 0.4186, - "step": 2468 - }, - { - "epoch": 0.1613620024834978, - "grad_norm": 0.5398948192596436, - "learning_rate": 9.970734032514616e-06, - "loss": 0.4558, - "step": 2469 - }, - { - "epoch": 0.16142735768903993, - "grad_norm": 0.5140867829322815, - "learning_rate": 9.97069629473573e-06, - "loss": 0.4256, - "step": 2470 - }, - { - "epoch": 0.16149271289458206, - "grad_norm": 0.5130822062492371, - "learning_rate": 9.970658532713038e-06, - "loss": 0.4516, - "step": 2471 - }, - { - "epoch": 0.16155806810012419, - "grad_norm": 0.4797975718975067, - "learning_rate": 9.970620746446721e-06, - "loss": 0.4297, - "step": 2472 - }, - { - "epoch": 0.16162342330566629, - "grad_norm": 0.4829849600791931, - "learning_rate": 9.970582935936966e-06, - "loss": 0.3745, - "step": 2473 - }, - { - "epoch": 0.16168877851120841, - "grad_norm": 0.5111587047576904, - "learning_rate": 9.970545101183956e-06, - "loss": 0.4004, - "step": 2474 - }, - { - "epoch": 0.16175413371675054, - "grad_norm": 0.5087136626243591, - "learning_rate": 9.970507242187873e-06, - "loss": 0.4819, - "step": 2475 - }, - { - "epoch": 0.16181948892229267, - "grad_norm": 0.5141459107398987, - "learning_rate": 9.970469358948906e-06, - "loss": 0.4173, - "step": 2476 - }, - { - "epoch": 0.16188484412783477, - "grad_norm": 0.46680575609207153, - "learning_rate": 9.97043145146724e-06, - "loss": 0.4384, - "step": 2477 - }, - { - "epoch": 0.1619501993333769, - "grad_norm": 0.4982910752296448, - "learning_rate": 9.970393519743055e-06, - "loss": 0.4524, - "step": 2478 - }, - { - "epoch": 0.16201555453891903, - "grad_norm": 0.48790621757507324, - "learning_rate": 9.97035556377654e-06, - "loss": 0.4261, - "step": 2479 - }, - { - "epoch": 0.16208090974446115, - "grad_norm": 0.5400761365890503, - "learning_rate": 9.970317583567879e-06, - "loss": 0.45, - "step": 2480 - }, - { - "epoch": 0.16214626495000325, - "grad_norm": 0.5190091133117676, - "learning_rate": 9.970279579117257e-06, - "loss": 0.4153, - "step": 2481 - }, - { - "epoch": 0.16221162015554538, - "grad_norm": 0.48533931374549866, - "learning_rate": 9.970241550424859e-06, - "loss": 0.4143, - "step": 2482 - }, - { - "epoch": 0.1622769753610875, - "grad_norm": 0.4670906662940979, - "learning_rate": 9.970203497490873e-06, - "loss": 0.383, - "step": 2483 - }, - { - "epoch": 0.16234233056662964, - "grad_norm": 0.49258172512054443, - "learning_rate": 9.970165420315481e-06, - "loss": 0.4109, - "step": 2484 - }, - { - "epoch": 0.16240768577217177, - "grad_norm": 0.5390580892562866, - "learning_rate": 9.97012731889887e-06, - "loss": 0.4761, - "step": 2485 - }, - { - "epoch": 0.16247304097771387, - "grad_norm": 0.46807584166526794, - "learning_rate": 9.970089193241229e-06, - "loss": 0.397, - "step": 2486 - }, - { - "epoch": 0.162538396183256, - "grad_norm": 0.5004175901412964, - "learning_rate": 9.97005104334274e-06, - "loss": 0.4167, - "step": 2487 - }, - { - "epoch": 0.16260375138879812, - "grad_norm": 0.4912553131580353, - "learning_rate": 9.97001286920359e-06, - "loss": 0.3806, - "step": 2488 - }, - { - "epoch": 0.16266910659434025, - "grad_norm": 0.5068600177764893, - "learning_rate": 9.969974670823963e-06, - "loss": 0.4448, - "step": 2489 - }, - { - "epoch": 0.16273446179988235, - "grad_norm": 0.5258041024208069, - "learning_rate": 9.969936448204051e-06, - "loss": 0.4242, - "step": 2490 - }, - { - "epoch": 0.16279981700542448, - "grad_norm": 0.5120575428009033, - "learning_rate": 9.969898201344037e-06, - "loss": 0.4393, - "step": 2491 - }, - { - "epoch": 0.1628651722109666, - "grad_norm": 0.5244059562683105, - "learning_rate": 9.969859930244106e-06, - "loss": 0.4359, - "step": 2492 - }, - { - "epoch": 0.16293052741650874, - "grad_norm": 0.5091240406036377, - "learning_rate": 9.969821634904447e-06, - "loss": 0.413, - "step": 2493 - }, - { - "epoch": 0.16299588262205084, - "grad_norm": 0.5374748706817627, - "learning_rate": 9.969783315325246e-06, - "loss": 0.4473, - "step": 2494 - }, - { - "epoch": 0.16306123782759296, - "grad_norm": 0.4869825839996338, - "learning_rate": 9.969744971506691e-06, - "loss": 0.3928, - "step": 2495 - }, - { - "epoch": 0.1631265930331351, - "grad_norm": 0.49428319931030273, - "learning_rate": 9.969706603448967e-06, - "loss": 0.402, - "step": 2496 - }, - { - "epoch": 0.16319194823867722, - "grad_norm": 0.43656015396118164, - "learning_rate": 9.969668211152264e-06, - "loss": 0.3381, - "step": 2497 - }, - { - "epoch": 0.16325730344421932, - "grad_norm": 0.4965074360370636, - "learning_rate": 9.969629794616766e-06, - "loss": 0.3864, - "step": 2498 - }, - { - "epoch": 0.16332265864976145, - "grad_norm": 0.5049904584884644, - "learning_rate": 9.969591353842663e-06, - "loss": 0.4281, - "step": 2499 - }, - { - "epoch": 0.16338801385530358, - "grad_norm": 0.523197591304779, - "learning_rate": 9.96955288883014e-06, - "loss": 0.4588, - "step": 2500 - }, - { - "epoch": 0.1634533690608457, - "grad_norm": 0.5229784250259399, - "learning_rate": 9.969514399579385e-06, - "loss": 0.4401, - "step": 2501 - }, - { - "epoch": 0.1635187242663878, - "grad_norm": 0.5095672011375427, - "learning_rate": 9.96947588609059e-06, - "loss": 0.4417, - "step": 2502 - }, - { - "epoch": 0.16358407947192993, - "grad_norm": 0.4908083975315094, - "learning_rate": 9.969437348363937e-06, - "loss": 0.434, - "step": 2503 - }, - { - "epoch": 0.16364943467747206, - "grad_norm": 0.5618243217468262, - "learning_rate": 9.969398786399616e-06, - "loss": 0.48, - "step": 2504 - }, - { - "epoch": 0.1637147898830142, - "grad_norm": 0.5383886694908142, - "learning_rate": 9.969360200197818e-06, - "loss": 0.4726, - "step": 2505 - }, - { - "epoch": 0.16378014508855632, - "grad_norm": 0.48987555503845215, - "learning_rate": 9.969321589758726e-06, - "loss": 0.4145, - "step": 2506 - }, - { - "epoch": 0.16384550029409842, - "grad_norm": 0.5051498413085938, - "learning_rate": 9.969282955082534e-06, - "loss": 0.436, - "step": 2507 - }, - { - "epoch": 0.16391085549964055, - "grad_norm": 0.4904925525188446, - "learning_rate": 9.969244296169425e-06, - "loss": 0.4305, - "step": 2508 - }, - { - "epoch": 0.16397621070518267, - "grad_norm": 0.44761115312576294, - "learning_rate": 9.969205613019592e-06, - "loss": 0.3561, - "step": 2509 - }, - { - "epoch": 0.1640415659107248, - "grad_norm": 0.5262337327003479, - "learning_rate": 9.969166905633219e-06, - "loss": 0.4472, - "step": 2510 - }, - { - "epoch": 0.1641069211162669, - "grad_norm": 0.4879244863986969, - "learning_rate": 9.9691281740105e-06, - "loss": 0.382, - "step": 2511 - }, - { - "epoch": 0.16417227632180903, - "grad_norm": 0.4646982252597809, - "learning_rate": 9.96908941815162e-06, - "loss": 0.3673, - "step": 2512 - }, - { - "epoch": 0.16423763152735116, - "grad_norm": 0.45479026436805725, - "learning_rate": 9.96905063805677e-06, - "loss": 0.3925, - "step": 2513 - }, - { - "epoch": 0.16430298673289329, - "grad_norm": 0.4475038945674896, - "learning_rate": 9.96901183372614e-06, - "loss": 0.3981, - "step": 2514 - }, - { - "epoch": 0.16436834193843539, - "grad_norm": 0.5348302721977234, - "learning_rate": 9.968973005159916e-06, - "loss": 0.4298, - "step": 2515 - }, - { - "epoch": 0.16443369714397751, - "grad_norm": 0.4796079695224762, - "learning_rate": 9.96893415235829e-06, - "loss": 0.3671, - "step": 2516 - }, - { - "epoch": 0.16449905234951964, - "grad_norm": 0.5769591927528381, - "learning_rate": 9.968895275321451e-06, - "loss": 0.5147, - "step": 2517 - }, - { - "epoch": 0.16456440755506177, - "grad_norm": 0.4583422839641571, - "learning_rate": 9.968856374049587e-06, - "loss": 0.3913, - "step": 2518 - }, - { - "epoch": 0.16462976276060387, - "grad_norm": 0.48605695366859436, - "learning_rate": 9.96881744854289e-06, - "loss": 0.3907, - "step": 2519 - }, - { - "epoch": 0.164695117966146, - "grad_norm": 0.4812755882740021, - "learning_rate": 9.96877849880155e-06, - "loss": 0.426, - "step": 2520 - }, - { - "epoch": 0.16476047317168813, - "grad_norm": 0.4877561032772064, - "learning_rate": 9.968739524825754e-06, - "loss": 0.4104, - "step": 2521 - }, - { - "epoch": 0.16482582837723025, - "grad_norm": 0.511563241481781, - "learning_rate": 9.968700526615696e-06, - "loss": 0.4241, - "step": 2522 - }, - { - "epoch": 0.16489118358277235, - "grad_norm": 0.4960847496986389, - "learning_rate": 9.968661504171562e-06, - "loss": 0.4505, - "step": 2523 - }, - { - "epoch": 0.16495653878831448, - "grad_norm": 0.5165857076644897, - "learning_rate": 9.968622457493547e-06, - "loss": 0.4381, - "step": 2524 - }, - { - "epoch": 0.1650218939938566, - "grad_norm": 0.4729250967502594, - "learning_rate": 9.968583386581836e-06, - "loss": 0.3937, - "step": 2525 - }, - { - "epoch": 0.16508724919939874, - "grad_norm": 0.4830740690231323, - "learning_rate": 9.968544291436625e-06, - "loss": 0.4158, - "step": 2526 - }, - { - "epoch": 0.16515260440494087, - "grad_norm": 0.5134458541870117, - "learning_rate": 9.9685051720581e-06, - "loss": 0.4746, - "step": 2527 - }, - { - "epoch": 0.16521795961048297, - "grad_norm": 0.48444420099258423, - "learning_rate": 9.968466028446456e-06, - "loss": 0.4025, - "step": 2528 - }, - { - "epoch": 0.1652833148160251, - "grad_norm": 0.4610399603843689, - "learning_rate": 9.968426860601882e-06, - "loss": 0.3912, - "step": 2529 - }, - { - "epoch": 0.16534867002156722, - "grad_norm": 0.4778654873371124, - "learning_rate": 9.968387668524569e-06, - "loss": 0.4188, - "step": 2530 - }, - { - "epoch": 0.16541402522710935, - "grad_norm": 0.49111485481262207, - "learning_rate": 9.968348452214708e-06, - "loss": 0.4031, - "step": 2531 - }, - { - "epoch": 0.16547938043265145, - "grad_norm": 0.501004159450531, - "learning_rate": 9.968309211672489e-06, - "loss": 0.4069, - "step": 2532 - }, - { - "epoch": 0.16554473563819358, - "grad_norm": 0.4897302985191345, - "learning_rate": 9.968269946898106e-06, - "loss": 0.4174, - "step": 2533 - }, - { - "epoch": 0.1656100908437357, - "grad_norm": 0.5068104267120361, - "learning_rate": 9.968230657891748e-06, - "loss": 0.4256, - "step": 2534 - }, - { - "epoch": 0.16567544604927784, - "grad_norm": 0.5204775929450989, - "learning_rate": 9.968191344653608e-06, - "loss": 0.4388, - "step": 2535 - }, - { - "epoch": 0.16574080125481994, - "grad_norm": 0.5054092407226562, - "learning_rate": 9.968152007183879e-06, - "loss": 0.4322, - "step": 2536 - }, - { - "epoch": 0.16580615646036206, - "grad_norm": 0.48208674788475037, - "learning_rate": 9.96811264548275e-06, - "loss": 0.3779, - "step": 2537 - }, - { - "epoch": 0.1658715116659042, - "grad_norm": 0.5330373048782349, - "learning_rate": 9.968073259550417e-06, - "loss": 0.417, - "step": 2538 - }, - { - "epoch": 0.16593686687144632, - "grad_norm": 0.4920862317085266, - "learning_rate": 9.968033849387067e-06, - "loss": 0.4066, - "step": 2539 - }, - { - "epoch": 0.16600222207698842, - "grad_norm": 0.5067939758300781, - "learning_rate": 9.967994414992897e-06, - "loss": 0.4024, - "step": 2540 - }, - { - "epoch": 0.16606757728253055, - "grad_norm": 0.47380581498146057, - "learning_rate": 9.967954956368095e-06, - "loss": 0.4062, - "step": 2541 - }, - { - "epoch": 0.16613293248807268, - "grad_norm": 0.48494410514831543, - "learning_rate": 9.967915473512857e-06, - "loss": 0.409, - "step": 2542 - }, - { - "epoch": 0.1661982876936148, - "grad_norm": 0.4796536862850189, - "learning_rate": 9.967875966427374e-06, - "loss": 0.4195, - "step": 2543 - }, - { - "epoch": 0.1662636428991569, - "grad_norm": 0.5089160203933716, - "learning_rate": 9.96783643511184e-06, - "loss": 0.4332, - "step": 2544 - }, - { - "epoch": 0.16632899810469903, - "grad_norm": 0.4698975384235382, - "learning_rate": 9.967796879566445e-06, - "loss": 0.3886, - "step": 2545 - }, - { - "epoch": 0.16639435331024116, - "grad_norm": 0.4938991665840149, - "learning_rate": 9.967757299791383e-06, - "loss": 0.4114, - "step": 2546 - }, - { - "epoch": 0.1664597085157833, - "grad_norm": 0.42095014452934265, - "learning_rate": 9.967717695786849e-06, - "loss": 0.3436, - "step": 2547 - }, - { - "epoch": 0.16652506372132542, - "grad_norm": 0.4695545732975006, - "learning_rate": 9.967678067553035e-06, - "loss": 0.4201, - "step": 2548 - }, - { - "epoch": 0.16659041892686752, - "grad_norm": 0.4768037796020508, - "learning_rate": 9.967638415090132e-06, - "loss": 0.3815, - "step": 2549 - }, - { - "epoch": 0.16665577413240965, - "grad_norm": 0.45915326476097107, - "learning_rate": 9.967598738398338e-06, - "loss": 0.3719, - "step": 2550 - }, - { - "epoch": 0.16672112933795177, - "grad_norm": 0.47874993085861206, - "learning_rate": 9.967559037477842e-06, - "loss": 0.3782, - "step": 2551 - }, - { - "epoch": 0.1667864845434939, - "grad_norm": 0.4765692949295044, - "learning_rate": 9.967519312328842e-06, - "loss": 0.4424, - "step": 2552 - }, - { - "epoch": 0.166851839749036, - "grad_norm": 0.4355213940143585, - "learning_rate": 9.967479562951527e-06, - "loss": 0.3788, - "step": 2553 - }, - { - "epoch": 0.16691719495457813, - "grad_norm": 0.44575202465057373, - "learning_rate": 9.967439789346096e-06, - "loss": 0.3618, - "step": 2554 - }, - { - "epoch": 0.16698255016012026, - "grad_norm": 0.4374285638332367, - "learning_rate": 9.967399991512739e-06, - "loss": 0.39, - "step": 2555 - }, - { - "epoch": 0.16704790536566239, - "grad_norm": 0.5247700214385986, - "learning_rate": 9.96736016945165e-06, - "loss": 0.4986, - "step": 2556 - }, - { - "epoch": 0.16711326057120449, - "grad_norm": 0.502058207988739, - "learning_rate": 9.967320323163025e-06, - "loss": 0.4027, - "step": 2557 - }, - { - "epoch": 0.16717861577674661, - "grad_norm": 0.45537254214286804, - "learning_rate": 9.967280452647059e-06, - "loss": 0.3588, - "step": 2558 - }, - { - "epoch": 0.16724397098228874, - "grad_norm": 0.4959736168384552, - "learning_rate": 9.967240557903946e-06, - "loss": 0.38, - "step": 2559 - }, - { - "epoch": 0.16730932618783087, - "grad_norm": 0.5058095455169678, - "learning_rate": 9.967200638933878e-06, - "loss": 0.436, - "step": 2560 - }, - { - "epoch": 0.16737468139337297, - "grad_norm": 0.47594496607780457, - "learning_rate": 9.967160695737053e-06, - "loss": 0.4214, - "step": 2561 - }, - { - "epoch": 0.1674400365989151, - "grad_norm": 0.5219940543174744, - "learning_rate": 9.967120728313664e-06, - "loss": 0.3942, - "step": 2562 - }, - { - "epoch": 0.16750539180445723, - "grad_norm": 0.4856316149234772, - "learning_rate": 9.967080736663907e-06, - "loss": 0.4215, - "step": 2563 - }, - { - "epoch": 0.16757074700999935, - "grad_norm": 0.49029284715652466, - "learning_rate": 9.967040720787976e-06, - "loss": 0.3627, - "step": 2564 - }, - { - "epoch": 0.16763610221554145, - "grad_norm": 0.43458911776542664, - "learning_rate": 9.967000680686067e-06, - "loss": 0.3616, - "step": 2565 - }, - { - "epoch": 0.16770145742108358, - "grad_norm": 0.43978604674339294, - "learning_rate": 9.966960616358374e-06, - "loss": 0.3626, - "step": 2566 - }, - { - "epoch": 0.1677668126266257, - "grad_norm": 0.4900888502597809, - "learning_rate": 9.966920527805095e-06, - "loss": 0.3947, - "step": 2567 - }, - { - "epoch": 0.16783216783216784, - "grad_norm": 0.5364375114440918, - "learning_rate": 9.966880415026423e-06, - "loss": 0.4354, - "step": 2568 - }, - { - "epoch": 0.16789752303770997, - "grad_norm": 0.508897066116333, - "learning_rate": 9.966840278022553e-06, - "loss": 0.4934, - "step": 2569 - }, - { - "epoch": 0.16796287824325207, - "grad_norm": 0.48830941319465637, - "learning_rate": 9.966800116793684e-06, - "loss": 0.4073, - "step": 2570 - }, - { - "epoch": 0.1680282334487942, - "grad_norm": 0.45657533407211304, - "learning_rate": 9.96675993134001e-06, - "loss": 0.4061, - "step": 2571 - }, - { - "epoch": 0.16809358865433632, - "grad_norm": 0.4951988160610199, - "learning_rate": 9.966719721661728e-06, - "loss": 0.4522, - "step": 2572 - }, - { - "epoch": 0.16815894385987845, - "grad_norm": 0.47685354948043823, - "learning_rate": 9.966679487759032e-06, - "loss": 0.4018, - "step": 2573 - }, - { - "epoch": 0.16822429906542055, - "grad_norm": 0.5142900943756104, - "learning_rate": 9.96663922963212e-06, - "loss": 0.4395, - "step": 2574 - }, - { - "epoch": 0.16828965427096268, - "grad_norm": 0.5248950719833374, - "learning_rate": 9.966598947281187e-06, - "loss": 0.4512, - "step": 2575 - }, - { - "epoch": 0.1683550094765048, - "grad_norm": 0.5207715034484863, - "learning_rate": 9.966558640706432e-06, - "loss": 0.4485, - "step": 2576 - }, - { - "epoch": 0.16842036468204694, - "grad_norm": 0.4778415560722351, - "learning_rate": 9.966518309908049e-06, - "loss": 0.3832, - "step": 2577 - }, - { - "epoch": 0.16848571988758904, - "grad_norm": 0.47981250286102295, - "learning_rate": 9.966477954886236e-06, - "loss": 0.4027, - "step": 2578 - }, - { - "epoch": 0.16855107509313116, - "grad_norm": 0.5018543004989624, - "learning_rate": 9.966437575641188e-06, - "loss": 0.4194, - "step": 2579 - }, - { - "epoch": 0.1686164302986733, - "grad_norm": 0.5558579564094543, - "learning_rate": 9.966397172173106e-06, - "loss": 0.4751, - "step": 2580 - }, - { - "epoch": 0.16868178550421542, - "grad_norm": 0.5145378112792969, - "learning_rate": 9.966356744482182e-06, - "loss": 0.4732, - "step": 2581 - }, - { - "epoch": 0.16874714070975752, - "grad_norm": 0.44169601798057556, - "learning_rate": 9.966316292568616e-06, - "loss": 0.3465, - "step": 2582 - }, - { - "epoch": 0.16881249591529965, - "grad_norm": 0.4938541054725647, - "learning_rate": 9.966275816432606e-06, - "loss": 0.4023, - "step": 2583 - }, - { - "epoch": 0.16887785112084178, - "grad_norm": 0.5080822706222534, - "learning_rate": 9.96623531607435e-06, - "loss": 0.4284, - "step": 2584 - }, - { - "epoch": 0.1689432063263839, - "grad_norm": 0.4997018575668335, - "learning_rate": 9.966194791494039e-06, - "loss": 0.3967, - "step": 2585 - }, - { - "epoch": 0.169008561531926, - "grad_norm": 0.45045390725135803, - "learning_rate": 9.966154242691879e-06, - "loss": 0.4077, - "step": 2586 - }, - { - "epoch": 0.16907391673746813, - "grad_norm": 0.4637119472026825, - "learning_rate": 9.966113669668063e-06, - "loss": 0.3846, - "step": 2587 - }, - { - "epoch": 0.16913927194301026, - "grad_norm": 0.5090743899345398, - "learning_rate": 9.966073072422791e-06, - "loss": 0.3832, - "step": 2588 - }, - { - "epoch": 0.1692046271485524, - "grad_norm": 0.5185267925262451, - "learning_rate": 9.96603245095626e-06, - "loss": 0.4207, - "step": 2589 - }, - { - "epoch": 0.16926998235409452, - "grad_norm": 0.4877210855484009, - "learning_rate": 9.96599180526867e-06, - "loss": 0.4343, - "step": 2590 - }, - { - "epoch": 0.16933533755963662, - "grad_norm": 0.4766295254230499, - "learning_rate": 9.965951135360214e-06, - "loss": 0.4267, - "step": 2591 - }, - { - "epoch": 0.16940069276517875, - "grad_norm": 0.5057688355445862, - "learning_rate": 9.965910441231097e-06, - "loss": 0.418, - "step": 2592 - }, - { - "epoch": 0.16946604797072087, - "grad_norm": 0.4822060763835907, - "learning_rate": 9.965869722881515e-06, - "loss": 0.3854, - "step": 2593 - }, - { - "epoch": 0.169531403176263, - "grad_norm": 0.49696168303489685, - "learning_rate": 9.965828980311662e-06, - "loss": 0.372, - "step": 2594 - }, - { - "epoch": 0.1695967583818051, - "grad_norm": 0.5186379551887512, - "learning_rate": 9.965788213521743e-06, - "loss": 0.4242, - "step": 2595 - }, - { - "epoch": 0.16966211358734723, - "grad_norm": 0.48256298899650574, - "learning_rate": 9.965747422511956e-06, - "loss": 0.4027, - "step": 2596 - }, - { - "epoch": 0.16972746879288936, - "grad_norm": 0.45609068870544434, - "learning_rate": 9.965706607282497e-06, - "loss": 0.3708, - "step": 2597 - }, - { - "epoch": 0.16979282399843149, - "grad_norm": 0.47587496042251587, - "learning_rate": 9.965665767833567e-06, - "loss": 0.4422, - "step": 2598 - }, - { - "epoch": 0.16985817920397359, - "grad_norm": 0.5188522934913635, - "learning_rate": 9.965624904165364e-06, - "loss": 0.4643, - "step": 2599 - }, - { - "epoch": 0.16992353440951571, - "grad_norm": 0.5248739719390869, - "learning_rate": 9.965584016278089e-06, - "loss": 0.4377, - "step": 2600 - }, - { - "epoch": 0.16998888961505784, - "grad_norm": 0.4977901577949524, - "learning_rate": 9.96554310417194e-06, - "loss": 0.4275, - "step": 2601 - }, - { - "epoch": 0.17005424482059997, - "grad_norm": 0.48617416620254517, - "learning_rate": 9.965502167847117e-06, - "loss": 0.402, - "step": 2602 - }, - { - "epoch": 0.17011960002614207, - "grad_norm": 0.50570148229599, - "learning_rate": 9.96546120730382e-06, - "loss": 0.4729, - "step": 2603 - }, - { - "epoch": 0.1701849552316842, - "grad_norm": 0.5073838233947754, - "learning_rate": 9.965420222542248e-06, - "loss": 0.4264, - "step": 2604 - }, - { - "epoch": 0.17025031043722633, - "grad_norm": 0.4870482385158539, - "learning_rate": 9.965379213562602e-06, - "loss": 0.4014, - "step": 2605 - }, - { - "epoch": 0.17031566564276845, - "grad_norm": 0.5134543180465698, - "learning_rate": 9.965338180365081e-06, - "loss": 0.4449, - "step": 2606 - }, - { - "epoch": 0.17038102084831055, - "grad_norm": 0.5562323927879333, - "learning_rate": 9.965297122949886e-06, - "loss": 0.4601, - "step": 2607 - }, - { - "epoch": 0.17044637605385268, - "grad_norm": 0.7858109474182129, - "learning_rate": 9.965256041317217e-06, - "loss": 0.4765, - "step": 2608 - }, - { - "epoch": 0.1705117312593948, - "grad_norm": 0.4887913465499878, - "learning_rate": 9.965214935467274e-06, - "loss": 0.4496, - "step": 2609 - }, - { - "epoch": 0.17057708646493694, - "grad_norm": 0.5138968825340271, - "learning_rate": 9.965173805400257e-06, - "loss": 0.4569, - "step": 2610 - }, - { - "epoch": 0.17064244167047907, - "grad_norm": 0.47897759079933167, - "learning_rate": 9.965132651116368e-06, - "loss": 0.4029, - "step": 2611 - }, - { - "epoch": 0.17070779687602117, - "grad_norm": 0.4786286950111389, - "learning_rate": 9.965091472615807e-06, - "loss": 0.4086, - "step": 2612 - }, - { - "epoch": 0.1707731520815633, - "grad_norm": 0.4858819544315338, - "learning_rate": 9.965050269898774e-06, - "loss": 0.3894, - "step": 2613 - }, - { - "epoch": 0.17083850728710542, - "grad_norm": 0.4524725079536438, - "learning_rate": 9.96500904296547e-06, - "loss": 0.3649, - "step": 2614 - }, - { - "epoch": 0.17090386249264755, - "grad_norm": 0.5363240838050842, - "learning_rate": 9.964967791816098e-06, - "loss": 0.4893, - "step": 2615 - }, - { - "epoch": 0.17096921769818965, - "grad_norm": 0.4693722128868103, - "learning_rate": 9.96492651645086e-06, - "loss": 0.3968, - "step": 2616 - }, - { - "epoch": 0.17103457290373178, - "grad_norm": 0.5352193713188171, - "learning_rate": 9.964885216869953e-06, - "loss": 0.4488, - "step": 2617 - }, - { - "epoch": 0.1710999281092739, - "grad_norm": 0.45310312509536743, - "learning_rate": 9.96484389307358e-06, - "loss": 0.3974, - "step": 2618 - }, - { - "epoch": 0.17116528331481604, - "grad_norm": 0.49187517166137695, - "learning_rate": 9.964802545061944e-06, - "loss": 0.4147, - "step": 2619 - }, - { - "epoch": 0.17123063852035814, - "grad_norm": 0.5644070506095886, - "learning_rate": 9.964761172835247e-06, - "loss": 0.4787, - "step": 2620 - }, - { - "epoch": 0.17129599372590026, - "grad_norm": 0.4508492648601532, - "learning_rate": 9.96471977639369e-06, - "loss": 0.3417, - "step": 2621 - }, - { - "epoch": 0.1713613489314424, - "grad_norm": 0.4854797124862671, - "learning_rate": 9.964678355737474e-06, - "loss": 0.3945, - "step": 2622 - }, - { - "epoch": 0.17142670413698452, - "grad_norm": 0.523059606552124, - "learning_rate": 9.964636910866802e-06, - "loss": 0.4428, - "step": 2623 - }, - { - "epoch": 0.17149205934252662, - "grad_norm": 0.5245983600616455, - "learning_rate": 9.964595441781874e-06, - "loss": 0.4341, - "step": 2624 - }, - { - "epoch": 0.17155741454806875, - "grad_norm": 0.47294411063194275, - "learning_rate": 9.964553948482897e-06, - "loss": 0.3847, - "step": 2625 - }, - { - "epoch": 0.17162276975361088, - "grad_norm": 0.48770037293434143, - "learning_rate": 9.964512430970071e-06, - "loss": 0.4038, - "step": 2626 - }, - { - "epoch": 0.171688124959153, - "grad_norm": 0.45670706033706665, - "learning_rate": 9.964470889243595e-06, - "loss": 0.3577, - "step": 2627 - }, - { - "epoch": 0.1717534801646951, - "grad_norm": 0.5225144028663635, - "learning_rate": 9.964429323303675e-06, - "loss": 0.442, - "step": 2628 - }, - { - "epoch": 0.17181883537023723, - "grad_norm": 0.5164922475814819, - "learning_rate": 9.964387733150516e-06, - "loss": 0.4187, - "step": 2629 - }, - { - "epoch": 0.17188419057577936, - "grad_norm": 0.4995775520801544, - "learning_rate": 9.964346118784315e-06, - "loss": 0.3911, - "step": 2630 - }, - { - "epoch": 0.1719495457813215, - "grad_norm": 0.48990604281425476, - "learning_rate": 9.96430448020528e-06, - "loss": 0.4182, - "step": 2631 - }, - { - "epoch": 0.17201490098686362, - "grad_norm": 0.48489493131637573, - "learning_rate": 9.964262817413613e-06, - "loss": 0.3983, - "step": 2632 - }, - { - "epoch": 0.17208025619240572, - "grad_norm": 0.5082417726516724, - "learning_rate": 9.964221130409515e-06, - "loss": 0.3983, - "step": 2633 - }, - { - "epoch": 0.17214561139794785, - "grad_norm": 0.503032386302948, - "learning_rate": 9.96417941919319e-06, - "loss": 0.4297, - "step": 2634 - }, - { - "epoch": 0.17221096660348997, - "grad_norm": 0.5166121125221252, - "learning_rate": 9.964137683764844e-06, - "loss": 0.4047, - "step": 2635 - }, - { - "epoch": 0.1722763218090321, - "grad_norm": 0.4831024706363678, - "learning_rate": 9.964095924124678e-06, - "loss": 0.4258, - "step": 2636 - }, - { - "epoch": 0.1723416770145742, - "grad_norm": 0.48391157388687134, - "learning_rate": 9.964054140272895e-06, - "loss": 0.4064, - "step": 2637 - }, - { - "epoch": 0.17240703222011633, - "grad_norm": 0.6155975461006165, - "learning_rate": 9.964012332209703e-06, - "loss": 0.4597, - "step": 2638 - }, - { - "epoch": 0.17247238742565846, - "grad_norm": 0.5119083523750305, - "learning_rate": 9.963970499935303e-06, - "loss": 0.4272, - "step": 2639 - }, - { - "epoch": 0.17253774263120059, - "grad_norm": 0.5019258260726929, - "learning_rate": 9.963928643449898e-06, - "loss": 0.4597, - "step": 2640 - }, - { - "epoch": 0.17260309783674269, - "grad_norm": 0.4831063151359558, - "learning_rate": 9.963886762753692e-06, - "loss": 0.3992, - "step": 2641 - }, - { - "epoch": 0.17266845304228481, - "grad_norm": 0.5370878577232361, - "learning_rate": 9.963844857846895e-06, - "loss": 0.4319, - "step": 2642 - }, - { - "epoch": 0.17273380824782694, - "grad_norm": 0.5012040138244629, - "learning_rate": 9.963802928729702e-06, - "loss": 0.4204, - "step": 2643 - }, - { - "epoch": 0.17279916345336907, - "grad_norm": 0.523209810256958, - "learning_rate": 9.963760975402325e-06, - "loss": 0.4792, - "step": 2644 - }, - { - "epoch": 0.17286451865891117, - "grad_norm": 0.45911481976509094, - "learning_rate": 9.963718997864967e-06, - "loss": 0.3701, - "step": 2645 - }, - { - "epoch": 0.1729298738644533, - "grad_norm": 0.557472288608551, - "learning_rate": 9.963676996117827e-06, - "loss": 0.4708, - "step": 2646 - }, - { - "epoch": 0.17299522906999543, - "grad_norm": 0.49956074357032776, - "learning_rate": 9.96363497016112e-06, - "loss": 0.4201, - "step": 2647 - }, - { - "epoch": 0.17306058427553755, - "grad_norm": 0.4768489897251129, - "learning_rate": 9.963592919995044e-06, - "loss": 0.3931, - "step": 2648 - }, - { - "epoch": 0.17312593948107965, - "grad_norm": 0.4986908733844757, - "learning_rate": 9.963550845619805e-06, - "loss": 0.4258, - "step": 2649 - }, - { - "epoch": 0.17319129468662178, - "grad_norm": 0.4949187636375427, - "learning_rate": 9.963508747035611e-06, - "loss": 0.4068, - "step": 2650 - }, - { - "epoch": 0.1732566498921639, - "grad_norm": 0.494733601808548, - "learning_rate": 9.963466624242664e-06, - "loss": 0.4559, - "step": 2651 - }, - { - "epoch": 0.17332200509770604, - "grad_norm": 0.49882426857948303, - "learning_rate": 9.96342447724117e-06, - "loss": 0.3474, - "step": 2652 - }, - { - "epoch": 0.17338736030324817, - "grad_norm": 0.5101995468139648, - "learning_rate": 9.963382306031336e-06, - "loss": 0.3964, - "step": 2653 - }, - { - "epoch": 0.17345271550879027, - "grad_norm": 0.4387817084789276, - "learning_rate": 9.963340110613368e-06, - "loss": 0.3374, - "step": 2654 - }, - { - "epoch": 0.1735180707143324, - "grad_norm": 0.4991458058357239, - "learning_rate": 9.963297890987469e-06, - "loss": 0.3834, - "step": 2655 - }, - { - "epoch": 0.17358342591987452, - "grad_norm": 0.53550124168396, - "learning_rate": 9.963255647153848e-06, - "loss": 0.4925, - "step": 2656 - }, - { - "epoch": 0.17364878112541665, - "grad_norm": 0.48368826508522034, - "learning_rate": 9.96321337911271e-06, - "loss": 0.422, - "step": 2657 - }, - { - "epoch": 0.17371413633095875, - "grad_norm": 0.5215728878974915, - "learning_rate": 9.963171086864261e-06, - "loss": 0.4427, - "step": 2658 - }, - { - "epoch": 0.17377949153650088, - "grad_norm": 0.5231907963752747, - "learning_rate": 9.963128770408705e-06, - "loss": 0.4695, - "step": 2659 - }, - { - "epoch": 0.173844846742043, - "grad_norm": 0.5197647213935852, - "learning_rate": 9.963086429746253e-06, - "loss": 0.4168, - "step": 2660 - }, - { - "epoch": 0.17391020194758514, - "grad_norm": 0.5657692551612854, - "learning_rate": 9.963044064877108e-06, - "loss": 0.5545, - "step": 2661 - }, - { - "epoch": 0.17397555715312724, - "grad_norm": 0.47776147723197937, - "learning_rate": 9.963001675801478e-06, - "loss": 0.4143, - "step": 2662 - }, - { - "epoch": 0.17404091235866936, - "grad_norm": 0.47372832894325256, - "learning_rate": 9.96295926251957e-06, - "loss": 0.4079, - "step": 2663 - }, - { - "epoch": 0.1741062675642115, - "grad_norm": 0.5402714610099792, - "learning_rate": 9.96291682503159e-06, - "loss": 0.4726, - "step": 2664 - }, - { - "epoch": 0.17417162276975362, - "grad_norm": 0.48774057626724243, - "learning_rate": 9.962874363337745e-06, - "loss": 0.4021, - "step": 2665 - }, - { - "epoch": 0.17423697797529572, - "grad_norm": 0.4426060914993286, - "learning_rate": 9.962831877438242e-06, - "loss": 0.3216, - "step": 2666 - }, - { - "epoch": 0.17430233318083785, - "grad_norm": 0.49408379197120667, - "learning_rate": 9.962789367333289e-06, - "loss": 0.4005, - "step": 2667 - }, - { - "epoch": 0.17436768838637998, - "grad_norm": 0.4574377238750458, - "learning_rate": 9.962746833023093e-06, - "loss": 0.3721, - "step": 2668 - }, - { - "epoch": 0.1744330435919221, - "grad_norm": 0.46665847301483154, - "learning_rate": 9.962704274507862e-06, - "loss": 0.3392, - "step": 2669 - }, - { - "epoch": 0.1744983987974642, - "grad_norm": 0.50373774766922, - "learning_rate": 9.962661691787802e-06, - "loss": 0.4304, - "step": 2670 - }, - { - "epoch": 0.17456375400300633, - "grad_norm": 0.5277829766273499, - "learning_rate": 9.962619084863124e-06, - "loss": 0.44, - "step": 2671 - }, - { - "epoch": 0.17462910920854846, - "grad_norm": 0.4681248366832733, - "learning_rate": 9.96257645373403e-06, - "loss": 0.3894, - "step": 2672 - }, - { - "epoch": 0.1746944644140906, - "grad_norm": 0.4964708983898163, - "learning_rate": 9.962533798400734e-06, - "loss": 0.3904, - "step": 2673 - }, - { - "epoch": 0.17475981961963272, - "grad_norm": 0.5258198976516724, - "learning_rate": 9.96249111886344e-06, - "loss": 0.486, - "step": 2674 - }, - { - "epoch": 0.17482517482517482, - "grad_norm": 0.45436540246009827, - "learning_rate": 9.96244841512236e-06, - "loss": 0.4057, - "step": 2675 - }, - { - "epoch": 0.17489053003071695, - "grad_norm": 0.4477421045303345, - "learning_rate": 9.962405687177696e-06, - "loss": 0.3382, - "step": 2676 - }, - { - "epoch": 0.17495588523625907, - "grad_norm": 0.5042634606361389, - "learning_rate": 9.962362935029664e-06, - "loss": 0.4325, - "step": 2677 - }, - { - "epoch": 0.1750212404418012, - "grad_norm": 0.46567997336387634, - "learning_rate": 9.962320158678467e-06, - "loss": 0.3978, - "step": 2678 - }, - { - "epoch": 0.1750865956473433, - "grad_norm": 0.49018993973731995, - "learning_rate": 9.962277358124315e-06, - "loss": 0.4545, - "step": 2679 - }, - { - "epoch": 0.17515195085288543, - "grad_norm": 0.4478982388973236, - "learning_rate": 9.962234533367418e-06, - "loss": 0.3594, - "step": 2680 - }, - { - "epoch": 0.17521730605842756, - "grad_norm": 0.4821394681930542, - "learning_rate": 9.962191684407986e-06, - "loss": 0.3919, - "step": 2681 - }, - { - "epoch": 0.17528266126396969, - "grad_norm": 0.48290926218032837, - "learning_rate": 9.962148811246224e-06, - "loss": 0.401, - "step": 2682 - }, - { - "epoch": 0.17534801646951179, - "grad_norm": 0.45694294571876526, - "learning_rate": 9.962105913882342e-06, - "loss": 0.378, - "step": 2683 - }, - { - "epoch": 0.17541337167505391, - "grad_norm": 0.4834173321723938, - "learning_rate": 9.962062992316553e-06, - "loss": 0.3825, - "step": 2684 - }, - { - "epoch": 0.17547872688059604, - "grad_norm": 0.47401756048202515, - "learning_rate": 9.962020046549063e-06, - "loss": 0.394, - "step": 2685 - }, - { - "epoch": 0.17554408208613817, - "grad_norm": 0.5378835201263428, - "learning_rate": 9.961977076580082e-06, - "loss": 0.4804, - "step": 2686 - }, - { - "epoch": 0.17560943729168027, - "grad_norm": 0.5090337991714478, - "learning_rate": 9.961934082409819e-06, - "loss": 0.4134, - "step": 2687 - }, - { - "epoch": 0.1756747924972224, - "grad_norm": 0.4982738196849823, - "learning_rate": 9.961891064038487e-06, - "loss": 0.4509, - "step": 2688 - }, - { - "epoch": 0.17574014770276453, - "grad_norm": 0.46464022994041443, - "learning_rate": 9.961848021466291e-06, - "loss": 0.362, - "step": 2689 - }, - { - "epoch": 0.17580550290830665, - "grad_norm": 0.5181958675384521, - "learning_rate": 9.961804954693443e-06, - "loss": 0.4662, - "step": 2690 - }, - { - "epoch": 0.17587085811384875, - "grad_norm": 0.4751102030277252, - "learning_rate": 9.961761863720154e-06, - "loss": 0.395, - "step": 2691 - }, - { - "epoch": 0.17593621331939088, - "grad_norm": 0.49805060029029846, - "learning_rate": 9.961718748546633e-06, - "loss": 0.4122, - "step": 2692 - }, - { - "epoch": 0.176001568524933, - "grad_norm": 0.4755553901195526, - "learning_rate": 9.961675609173092e-06, - "loss": 0.4316, - "step": 2693 - }, - { - "epoch": 0.17606692373047514, - "grad_norm": 0.4815382659435272, - "learning_rate": 9.961632445599741e-06, - "loss": 0.3892, - "step": 2694 - }, - { - "epoch": 0.17613227893601727, - "grad_norm": 0.5086824297904968, - "learning_rate": 9.961589257826786e-06, - "loss": 0.3946, - "step": 2695 - }, - { - "epoch": 0.17619763414155937, - "grad_norm": 0.5206469297409058, - "learning_rate": 9.961546045854444e-06, - "loss": 0.4368, - "step": 2696 - }, - { - "epoch": 0.1762629893471015, - "grad_norm": 0.4665102958679199, - "learning_rate": 9.961502809682924e-06, - "loss": 0.3791, - "step": 2697 - }, - { - "epoch": 0.17632834455264362, - "grad_norm": 0.43253853917121887, - "learning_rate": 9.961459549312434e-06, - "loss": 0.3437, - "step": 2698 - }, - { - "epoch": 0.17639369975818575, - "grad_norm": 0.4989438056945801, - "learning_rate": 9.961416264743188e-06, - "loss": 0.4552, - "step": 2699 - }, - { - "epoch": 0.17645905496372785, - "grad_norm": 0.5197848081588745, - "learning_rate": 9.961372955975395e-06, - "loss": 0.4618, - "step": 2700 - }, - { - "epoch": 0.17652441016926998, - "grad_norm": 0.44897815585136414, - "learning_rate": 9.961329623009268e-06, - "loss": 0.3801, - "step": 2701 - }, - { - "epoch": 0.1765897653748121, - "grad_norm": 0.49561241269111633, - "learning_rate": 9.961286265845018e-06, - "loss": 0.3879, - "step": 2702 - }, - { - "epoch": 0.17665512058035424, - "grad_norm": 0.5102766752243042, - "learning_rate": 9.961242884482856e-06, - "loss": 0.4175, - "step": 2703 - }, - { - "epoch": 0.17672047578589634, - "grad_norm": 0.5269936919212341, - "learning_rate": 9.961199478922994e-06, - "loss": 0.4487, - "step": 2704 - }, - { - "epoch": 0.17678583099143846, - "grad_norm": 0.44980356097221375, - "learning_rate": 9.961156049165641e-06, - "loss": 0.357, - "step": 2705 - }, - { - "epoch": 0.1768511861969806, - "grad_norm": 0.4681536853313446, - "learning_rate": 9.961112595211014e-06, - "loss": 0.3701, - "step": 2706 - }, - { - "epoch": 0.17691654140252272, - "grad_norm": 0.48139142990112305, - "learning_rate": 9.96106911705932e-06, - "loss": 0.3979, - "step": 2707 - }, - { - "epoch": 0.17698189660806482, - "grad_norm": 0.5184427499771118, - "learning_rate": 9.961025614710775e-06, - "loss": 0.4142, - "step": 2708 - }, - { - "epoch": 0.17704725181360695, - "grad_norm": 0.4597087800502777, - "learning_rate": 9.96098208816559e-06, - "loss": 0.3845, - "step": 2709 - }, - { - "epoch": 0.17711260701914908, - "grad_norm": 0.4684363901615143, - "learning_rate": 9.960938537423976e-06, - "loss": 0.416, - "step": 2710 - }, - { - "epoch": 0.1771779622246912, - "grad_norm": 0.48280373215675354, - "learning_rate": 9.960894962486145e-06, - "loss": 0.412, - "step": 2711 - }, - { - "epoch": 0.1772433174302333, - "grad_norm": 0.47760751843452454, - "learning_rate": 9.96085136335231e-06, - "loss": 0.3837, - "step": 2712 - }, - { - "epoch": 0.17730867263577543, - "grad_norm": 0.4962843656539917, - "learning_rate": 9.960807740022686e-06, - "loss": 0.4204, - "step": 2713 - }, - { - "epoch": 0.17737402784131756, - "grad_norm": 0.4643685817718506, - "learning_rate": 9.960764092497485e-06, - "loss": 0.4018, - "step": 2714 - }, - { - "epoch": 0.1774393830468597, - "grad_norm": 0.4813143014907837, - "learning_rate": 9.960720420776917e-06, - "loss": 0.4005, - "step": 2715 - }, - { - "epoch": 0.17750473825240182, - "grad_norm": 0.4844858944416046, - "learning_rate": 9.960676724861198e-06, - "loss": 0.4202, - "step": 2716 - }, - { - "epoch": 0.17757009345794392, - "grad_norm": 0.506568968296051, - "learning_rate": 9.96063300475054e-06, - "loss": 0.4362, - "step": 2717 - }, - { - "epoch": 0.17763544866348605, - "grad_norm": 0.48755761981010437, - "learning_rate": 9.960589260445156e-06, - "loss": 0.396, - "step": 2718 - }, - { - "epoch": 0.17770080386902817, - "grad_norm": 0.486409991979599, - "learning_rate": 9.960545491945259e-06, - "loss": 0.4419, - "step": 2719 - }, - { - "epoch": 0.1777661590745703, - "grad_norm": 0.4849031865596771, - "learning_rate": 9.960501699251063e-06, - "loss": 0.3919, - "step": 2720 - }, - { - "epoch": 0.1778315142801124, - "grad_norm": 0.49966585636138916, - "learning_rate": 9.960457882362784e-06, - "loss": 0.408, - "step": 2721 - }, - { - "epoch": 0.17789686948565453, - "grad_norm": 0.4932485520839691, - "learning_rate": 9.960414041280632e-06, - "loss": 0.4624, - "step": 2722 - }, - { - "epoch": 0.17796222469119666, - "grad_norm": 0.46550408005714417, - "learning_rate": 9.960370176004822e-06, - "loss": 0.3948, - "step": 2723 - }, - { - "epoch": 0.17802757989673879, - "grad_norm": 0.5008115768432617, - "learning_rate": 9.960326286535569e-06, - "loss": 0.4448, - "step": 2724 - }, - { - "epoch": 0.17809293510228089, - "grad_norm": 0.5356562733650208, - "learning_rate": 9.960282372873086e-06, - "loss": 0.516, - "step": 2725 - }, - { - "epoch": 0.17815829030782301, - "grad_norm": 0.5430569648742676, - "learning_rate": 9.960238435017586e-06, - "loss": 0.4586, - "step": 2726 - }, - { - "epoch": 0.17822364551336514, - "grad_norm": 0.48317262530326843, - "learning_rate": 9.960194472969286e-06, - "loss": 0.4023, - "step": 2727 - }, - { - "epoch": 0.17828900071890727, - "grad_norm": 0.5187653303146362, - "learning_rate": 9.9601504867284e-06, - "loss": 0.4474, - "step": 2728 - }, - { - "epoch": 0.17835435592444937, - "grad_norm": 0.4397761821746826, - "learning_rate": 9.960106476295142e-06, - "loss": 0.3827, - "step": 2729 - }, - { - "epoch": 0.1784197111299915, - "grad_norm": 0.44425103068351746, - "learning_rate": 9.960062441669724e-06, - "loss": 0.3933, - "step": 2730 - }, - { - "epoch": 0.17848506633553363, - "grad_norm": 0.47415691614151, - "learning_rate": 9.960018382852364e-06, - "loss": 0.3889, - "step": 2731 - }, - { - "epoch": 0.17855042154107575, - "grad_norm": 0.489446759223938, - "learning_rate": 9.959974299843277e-06, - "loss": 0.4232, - "step": 2732 - }, - { - "epoch": 0.17861577674661785, - "grad_norm": 0.4992932379245758, - "learning_rate": 9.959930192642676e-06, - "loss": 0.448, - "step": 2733 - }, - { - "epoch": 0.17868113195215998, - "grad_norm": 0.47179922461509705, - "learning_rate": 9.959886061250778e-06, - "loss": 0.3851, - "step": 2734 - }, - { - "epoch": 0.1787464871577021, - "grad_norm": 0.47768720984458923, - "learning_rate": 9.959841905667798e-06, - "loss": 0.4111, - "step": 2735 - }, - { - "epoch": 0.17881184236324424, - "grad_norm": 0.4497457444667816, - "learning_rate": 9.95979772589395e-06, - "loss": 0.3718, - "step": 2736 - }, - { - "epoch": 0.17887719756878637, - "grad_norm": 0.4446553885936737, - "learning_rate": 9.95975352192945e-06, - "loss": 0.3936, - "step": 2737 - }, - { - "epoch": 0.17894255277432847, - "grad_norm": 0.541711151599884, - "learning_rate": 9.959709293774513e-06, - "loss": 0.4346, - "step": 2738 - }, - { - "epoch": 0.1790079079798706, - "grad_norm": 0.4848293364048004, - "learning_rate": 9.959665041429355e-06, - "loss": 0.39, - "step": 2739 - }, - { - "epoch": 0.17907326318541272, - "grad_norm": 0.4830087721347809, - "learning_rate": 9.959620764894196e-06, - "loss": 0.4342, - "step": 2740 - }, - { - "epoch": 0.17913861839095485, - "grad_norm": 0.48588570952415466, - "learning_rate": 9.959576464169245e-06, - "loss": 0.3978, - "step": 2741 - }, - { - "epoch": 0.17920397359649695, - "grad_norm": 0.5507774353027344, - "learning_rate": 9.959532139254723e-06, - "loss": 0.4369, - "step": 2742 - }, - { - "epoch": 0.17926932880203908, - "grad_norm": 0.468826562166214, - "learning_rate": 9.959487790150844e-06, - "loss": 0.4161, - "step": 2743 - }, - { - "epoch": 0.1793346840075812, - "grad_norm": 0.45922598242759705, - "learning_rate": 9.959443416857827e-06, - "loss": 0.4035, - "step": 2744 - }, - { - "epoch": 0.17940003921312334, - "grad_norm": 0.5572922825813293, - "learning_rate": 9.959399019375884e-06, - "loss": 0.4935, - "step": 2745 - }, - { - "epoch": 0.17946539441866544, - "grad_norm": 0.5046947002410889, - "learning_rate": 9.959354597705232e-06, - "loss": 0.4202, - "step": 2746 - }, - { - "epoch": 0.17953074962420756, - "grad_norm": 0.5219874978065491, - "learning_rate": 9.959310151846092e-06, - "loss": 0.441, - "step": 2747 - }, - { - "epoch": 0.1795961048297497, - "grad_norm": 0.5299882888793945, - "learning_rate": 9.95926568179868e-06, - "loss": 0.4974, - "step": 2748 - }, - { - "epoch": 0.17966146003529182, - "grad_norm": 0.4867027997970581, - "learning_rate": 9.959221187563208e-06, - "loss": 0.4215, - "step": 2749 - }, - { - "epoch": 0.17972681524083392, - "grad_norm": 0.4667873978614807, - "learning_rate": 9.959176669139898e-06, - "loss": 0.4003, - "step": 2750 - }, - { - "epoch": 0.17979217044637605, - "grad_norm": 0.5644049048423767, - "learning_rate": 9.959132126528965e-06, - "loss": 0.4377, - "step": 2751 - }, - { - "epoch": 0.17985752565191818, - "grad_norm": 0.4796614944934845, - "learning_rate": 9.959087559730627e-06, - "loss": 0.4016, - "step": 2752 - }, - { - "epoch": 0.1799228808574603, - "grad_norm": 0.49181193113327026, - "learning_rate": 9.959042968745101e-06, - "loss": 0.3768, - "step": 2753 - }, - { - "epoch": 0.1799882360630024, - "grad_norm": 0.5339409112930298, - "learning_rate": 9.958998353572605e-06, - "loss": 0.4718, - "step": 2754 - }, - { - "epoch": 0.18005359126854453, - "grad_norm": 0.4908137619495392, - "learning_rate": 9.958953714213355e-06, - "loss": 0.4309, - "step": 2755 - }, - { - "epoch": 0.18011894647408666, - "grad_norm": 0.44965025782585144, - "learning_rate": 9.95890905066757e-06, - "loss": 0.3547, - "step": 2756 - }, - { - "epoch": 0.1801843016796288, - "grad_norm": 0.4824472963809967, - "learning_rate": 9.958864362935468e-06, - "loss": 0.438, - "step": 2757 - }, - { - "epoch": 0.18024965688517092, - "grad_norm": 0.5025871992111206, - "learning_rate": 9.958819651017266e-06, - "loss": 0.3762, - "step": 2758 - }, - { - "epoch": 0.18031501209071302, - "grad_norm": 0.5372518301010132, - "learning_rate": 9.958774914913183e-06, - "loss": 0.4312, - "step": 2759 - }, - { - "epoch": 0.18038036729625515, - "grad_norm": 0.46665894985198975, - "learning_rate": 9.958730154623436e-06, - "loss": 0.4116, - "step": 2760 - }, - { - "epoch": 0.18044572250179727, - "grad_norm": 0.4289593994617462, - "learning_rate": 9.958685370148244e-06, - "loss": 0.3363, - "step": 2761 - }, - { - "epoch": 0.1805110777073394, - "grad_norm": 0.5162213444709778, - "learning_rate": 9.958640561487826e-06, - "loss": 0.4618, - "step": 2762 - }, - { - "epoch": 0.1805764329128815, - "grad_norm": 0.5332930088043213, - "learning_rate": 9.958595728642401e-06, - "loss": 0.4222, - "step": 2763 - }, - { - "epoch": 0.18064178811842363, - "grad_norm": 0.4913944900035858, - "learning_rate": 9.958550871612186e-06, - "loss": 0.3846, - "step": 2764 - }, - { - "epoch": 0.18070714332396576, - "grad_norm": 0.451699823141098, - "learning_rate": 9.958505990397402e-06, - "loss": 0.3482, - "step": 2765 - }, - { - "epoch": 0.18077249852950789, - "grad_norm": 0.46061721444129944, - "learning_rate": 9.958461084998265e-06, - "loss": 0.357, - "step": 2766 - }, - { - "epoch": 0.18083785373504999, - "grad_norm": 0.54320228099823, - "learning_rate": 9.958416155414996e-06, - "loss": 0.5188, - "step": 2767 - }, - { - "epoch": 0.18090320894059211, - "grad_norm": 0.49299147725105286, - "learning_rate": 9.958371201647814e-06, - "loss": 0.3738, - "step": 2768 - }, - { - "epoch": 0.18096856414613424, - "grad_norm": 0.4782959818840027, - "learning_rate": 9.958326223696938e-06, - "loss": 0.4039, - "step": 2769 - }, - { - "epoch": 0.18103391935167637, - "grad_norm": 0.4619569480419159, - "learning_rate": 9.958281221562586e-06, - "loss": 0.397, - "step": 2770 - }, - { - "epoch": 0.18109927455721847, - "grad_norm": 0.501964271068573, - "learning_rate": 9.958236195244981e-06, - "loss": 0.4531, - "step": 2771 - }, - { - "epoch": 0.1811646297627606, - "grad_norm": 0.5149459838867188, - "learning_rate": 9.958191144744339e-06, - "loss": 0.4573, - "step": 2772 - }, - { - "epoch": 0.18122998496830273, - "grad_norm": 0.5695905089378357, - "learning_rate": 9.958146070060881e-06, - "loss": 0.486, - "step": 2773 - }, - { - "epoch": 0.18129534017384485, - "grad_norm": 0.5138512849807739, - "learning_rate": 9.958100971194827e-06, - "loss": 0.4179, - "step": 2774 - }, - { - "epoch": 0.18136069537938695, - "grad_norm": 0.533556342124939, - "learning_rate": 9.958055848146398e-06, - "loss": 0.4039, - "step": 2775 - }, - { - "epoch": 0.18142605058492908, - "grad_norm": 0.48465925455093384, - "learning_rate": 9.95801070091581e-06, - "loss": 0.3961, - "step": 2776 - }, - { - "epoch": 0.1814914057904712, - "grad_norm": 0.4893660843372345, - "learning_rate": 9.957965529503288e-06, - "loss": 0.4316, - "step": 2777 - }, - { - "epoch": 0.18155676099601334, - "grad_norm": 0.49492719769477844, - "learning_rate": 9.957920333909051e-06, - "loss": 0.4537, - "step": 2778 - }, - { - "epoch": 0.18162211620155547, - "grad_norm": 0.5087707042694092, - "learning_rate": 9.957875114133319e-06, - "loss": 0.438, - "step": 2779 - }, - { - "epoch": 0.18168747140709757, - "grad_norm": 0.43353673815727234, - "learning_rate": 9.957829870176312e-06, - "loss": 0.3576, - "step": 2780 - }, - { - "epoch": 0.1817528266126397, - "grad_norm": 0.4844864010810852, - "learning_rate": 9.957784602038252e-06, - "loss": 0.3669, - "step": 2781 - }, - { - "epoch": 0.18181818181818182, - "grad_norm": 0.5208877921104431, - "learning_rate": 9.957739309719357e-06, - "loss": 0.4753, - "step": 2782 - }, - { - "epoch": 0.18188353702372395, - "grad_norm": 0.4982951581478119, - "learning_rate": 9.957693993219852e-06, - "loss": 0.4126, - "step": 2783 - }, - { - "epoch": 0.18194889222926605, - "grad_norm": 0.48803335428237915, - "learning_rate": 9.957648652539955e-06, - "loss": 0.4386, - "step": 2784 - }, - { - "epoch": 0.18201424743480818, - "grad_norm": 0.44746482372283936, - "learning_rate": 9.957603287679888e-06, - "loss": 0.3802, - "step": 2785 - }, - { - "epoch": 0.1820796026403503, - "grad_norm": 0.5041863322257996, - "learning_rate": 9.957557898639871e-06, - "loss": 0.4748, - "step": 2786 - }, - { - "epoch": 0.18214495784589244, - "grad_norm": 0.49817386269569397, - "learning_rate": 9.957512485420129e-06, - "loss": 0.372, - "step": 2787 - }, - { - "epoch": 0.18221031305143454, - "grad_norm": 0.6099628210067749, - "learning_rate": 9.95746704802088e-06, - "loss": 0.4915, - "step": 2788 - }, - { - "epoch": 0.18227566825697666, - "grad_norm": 0.4703923463821411, - "learning_rate": 9.957421586442346e-06, - "loss": 0.3882, - "step": 2789 - }, - { - "epoch": 0.1823410234625188, - "grad_norm": 0.5085967779159546, - "learning_rate": 9.957376100684751e-06, - "loss": 0.4065, - "step": 2790 - }, - { - "epoch": 0.18240637866806092, - "grad_norm": 0.4918684959411621, - "learning_rate": 9.957330590748315e-06, - "loss": 0.3979, - "step": 2791 - }, - { - "epoch": 0.18247173387360302, - "grad_norm": 0.5107892751693726, - "learning_rate": 9.95728505663326e-06, - "loss": 0.4268, - "step": 2792 - }, - { - "epoch": 0.18253708907914515, - "grad_norm": 0.46673840284347534, - "learning_rate": 9.957239498339809e-06, - "loss": 0.3606, - "step": 2793 - }, - { - "epoch": 0.18260244428468728, - "grad_norm": 0.45932820439338684, - "learning_rate": 9.957193915868184e-06, - "loss": 0.3767, - "step": 2794 - }, - { - "epoch": 0.1826677994902294, - "grad_norm": 0.562187910079956, - "learning_rate": 9.957148309218605e-06, - "loss": 0.4199, - "step": 2795 - }, - { - "epoch": 0.1827331546957715, - "grad_norm": 0.4595358371734619, - "learning_rate": 9.957102678391297e-06, - "loss": 0.3737, - "step": 2796 - }, - { - "epoch": 0.18279850990131363, - "grad_norm": 0.5276442766189575, - "learning_rate": 9.957057023386482e-06, - "loss": 0.4423, - "step": 2797 - }, - { - "epoch": 0.18286386510685576, - "grad_norm": 0.49165117740631104, - "learning_rate": 9.957011344204384e-06, - "loss": 0.4345, - "step": 2798 - }, - { - "epoch": 0.1829292203123979, - "grad_norm": 0.4598459303379059, - "learning_rate": 9.956965640845223e-06, - "loss": 0.3825, - "step": 2799 - }, - { - "epoch": 0.18299457551794002, - "grad_norm": 0.4706825315952301, - "learning_rate": 9.956919913309225e-06, - "loss": 0.3574, - "step": 2800 - }, - { - "epoch": 0.18305993072348212, - "grad_norm": 0.4787357449531555, - "learning_rate": 9.956874161596609e-06, - "loss": 0.3828, - "step": 2801 - }, - { - "epoch": 0.18312528592902425, - "grad_norm": 0.5086193680763245, - "learning_rate": 9.9568283857076e-06, - "loss": 0.4072, - "step": 2802 - }, - { - "epoch": 0.18319064113456637, - "grad_norm": 0.5089016556739807, - "learning_rate": 9.956782585642424e-06, - "loss": 0.4356, - "step": 2803 - }, - { - "epoch": 0.1832559963401085, - "grad_norm": 0.4908629059791565, - "learning_rate": 9.956736761401302e-06, - "loss": 0.3937, - "step": 2804 - }, - { - "epoch": 0.1833213515456506, - "grad_norm": 0.5212690234184265, - "learning_rate": 9.956690912984457e-06, - "loss": 0.4683, - "step": 2805 - }, - { - "epoch": 0.18338670675119273, - "grad_norm": 0.5048547983169556, - "learning_rate": 9.956645040392111e-06, - "loss": 0.4163, - "step": 2806 - }, - { - "epoch": 0.18345206195673486, - "grad_norm": 0.5160130858421326, - "learning_rate": 9.956599143624495e-06, - "loss": 0.4377, - "step": 2807 - }, - { - "epoch": 0.18351741716227699, - "grad_norm": 0.47323620319366455, - "learning_rate": 9.956553222681825e-06, - "loss": 0.4034, - "step": 2808 - }, - { - "epoch": 0.18358277236781909, - "grad_norm": 0.4883844256401062, - "learning_rate": 9.956507277564328e-06, - "loss": 0.396, - "step": 2809 - }, - { - "epoch": 0.18364812757336121, - "grad_norm": 0.49567705392837524, - "learning_rate": 9.956461308272227e-06, - "loss": 0.3822, - "step": 2810 - }, - { - "epoch": 0.18371348277890334, - "grad_norm": 0.5009114146232605, - "learning_rate": 9.956415314805747e-06, - "loss": 0.429, - "step": 2811 - }, - { - "epoch": 0.18377883798444547, - "grad_norm": 0.4522407352924347, - "learning_rate": 9.956369297165113e-06, - "loss": 0.3981, - "step": 2812 - }, - { - "epoch": 0.18384419318998757, - "grad_norm": 0.46104896068573, - "learning_rate": 9.956323255350551e-06, - "loss": 0.3663, - "step": 2813 - }, - { - "epoch": 0.1839095483955297, - "grad_norm": 0.6915690898895264, - "learning_rate": 9.956277189362281e-06, - "loss": 0.4644, - "step": 2814 - }, - { - "epoch": 0.18397490360107183, - "grad_norm": 0.47293227910995483, - "learning_rate": 9.95623109920053e-06, - "loss": 0.4012, - "step": 2815 - }, - { - "epoch": 0.18404025880661395, - "grad_norm": 0.49006471037864685, - "learning_rate": 9.956184984865524e-06, - "loss": 0.4488, - "step": 2816 - }, - { - "epoch": 0.18410561401215605, - "grad_norm": 0.47831088304519653, - "learning_rate": 9.956138846357488e-06, - "loss": 0.4071, - "step": 2817 - }, - { - "epoch": 0.18417096921769818, - "grad_norm": 0.5056982040405273, - "learning_rate": 9.956092683676644e-06, - "loss": 0.4303, - "step": 2818 - }, - { - "epoch": 0.1842363244232403, - "grad_norm": 0.5214632153511047, - "learning_rate": 9.95604649682322e-06, - "loss": 0.3883, - "step": 2819 - }, - { - "epoch": 0.18430167962878244, - "grad_norm": 0.5154610276222229, - "learning_rate": 9.95600028579744e-06, - "loss": 0.4622, - "step": 2820 - }, - { - "epoch": 0.18436703483432457, - "grad_norm": 0.5146011710166931, - "learning_rate": 9.955954050599529e-06, - "loss": 0.477, - "step": 2821 - }, - { - "epoch": 0.18443239003986667, - "grad_norm": 0.5352776050567627, - "learning_rate": 9.955907791229713e-06, - "loss": 0.444, - "step": 2822 - }, - { - "epoch": 0.1844977452454088, - "grad_norm": 0.5003596544265747, - "learning_rate": 9.955861507688219e-06, - "loss": 0.422, - "step": 2823 - }, - { - "epoch": 0.18456310045095092, - "grad_norm": 0.5347896814346313, - "learning_rate": 9.955815199975271e-06, - "loss": 0.507, - "step": 2824 - }, - { - "epoch": 0.18462845565649305, - "grad_norm": 0.5119844675064087, - "learning_rate": 9.955768868091096e-06, - "loss": 0.4488, - "step": 2825 - }, - { - "epoch": 0.18469381086203515, - "grad_norm": 0.5100293159484863, - "learning_rate": 9.95572251203592e-06, - "loss": 0.4442, - "step": 2826 - }, - { - "epoch": 0.18475916606757728, - "grad_norm": 0.4475594460964203, - "learning_rate": 9.955676131809966e-06, - "loss": 0.3668, - "step": 2827 - }, - { - "epoch": 0.1848245212731194, - "grad_norm": 0.4683065116405487, - "learning_rate": 9.955629727413465e-06, - "loss": 0.4293, - "step": 2828 - }, - { - "epoch": 0.18488987647866154, - "grad_norm": 0.4584593176841736, - "learning_rate": 9.955583298846638e-06, - "loss": 0.3729, - "step": 2829 - }, - { - "epoch": 0.18495523168420364, - "grad_norm": 0.4597283899784088, - "learning_rate": 9.955536846109717e-06, - "loss": 0.3884, - "step": 2830 - }, - { - "epoch": 0.18502058688974576, - "grad_norm": 0.4883746802806854, - "learning_rate": 9.955490369202926e-06, - "loss": 0.425, - "step": 2831 - }, - { - "epoch": 0.1850859420952879, - "grad_norm": 0.4885231554508209, - "learning_rate": 9.95544386812649e-06, - "loss": 0.419, - "step": 2832 - }, - { - "epoch": 0.18515129730083002, - "grad_norm": 0.4709199368953705, - "learning_rate": 9.95539734288064e-06, - "loss": 0.4117, - "step": 2833 - }, - { - "epoch": 0.18521665250637212, - "grad_norm": 0.48245787620544434, - "learning_rate": 9.955350793465599e-06, - "loss": 0.4371, - "step": 2834 - }, - { - "epoch": 0.18528200771191425, - "grad_norm": 0.42119458317756653, - "learning_rate": 9.955304219881597e-06, - "loss": 0.3272, - "step": 2835 - }, - { - "epoch": 0.18534736291745638, - "grad_norm": 0.476041316986084, - "learning_rate": 9.955257622128857e-06, - "loss": 0.3919, - "step": 2836 - }, - { - "epoch": 0.1854127181229985, - "grad_norm": 0.48731857538223267, - "learning_rate": 9.95521100020761e-06, - "loss": 0.4009, - "step": 2837 - }, - { - "epoch": 0.1854780733285406, - "grad_norm": 0.5108567476272583, - "learning_rate": 9.955164354118082e-06, - "loss": 0.4774, - "step": 2838 - }, - { - "epoch": 0.18554342853408273, - "grad_norm": 0.47319793701171875, - "learning_rate": 9.955117683860502e-06, - "loss": 0.4241, - "step": 2839 - }, - { - "epoch": 0.18560878373962486, - "grad_norm": 0.4684969186782837, - "learning_rate": 9.955070989435097e-06, - "loss": 0.3701, - "step": 2840 - }, - { - "epoch": 0.185674138945167, - "grad_norm": 0.5024217367172241, - "learning_rate": 9.955024270842093e-06, - "loss": 0.4255, - "step": 2841 - }, - { - "epoch": 0.18573949415070912, - "grad_norm": 0.485853374004364, - "learning_rate": 9.954977528081718e-06, - "loss": 0.3623, - "step": 2842 - }, - { - "epoch": 0.18580484935625122, - "grad_norm": 0.49599698185920715, - "learning_rate": 9.954930761154202e-06, - "loss": 0.4565, - "step": 2843 - }, - { - "epoch": 0.18587020456179335, - "grad_norm": 0.49961188435554504, - "learning_rate": 9.954883970059774e-06, - "loss": 0.4354, - "step": 2844 - }, - { - "epoch": 0.18593555976733547, - "grad_norm": 0.4934704601764679, - "learning_rate": 9.954837154798657e-06, - "loss": 0.4318, - "step": 2845 - }, - { - "epoch": 0.1860009149728776, - "grad_norm": 0.47270822525024414, - "learning_rate": 9.954790315371084e-06, - "loss": 0.3625, - "step": 2846 - }, - { - "epoch": 0.1860662701784197, - "grad_norm": 0.4581604301929474, - "learning_rate": 9.954743451777283e-06, - "loss": 0.362, - "step": 2847 - }, - { - "epoch": 0.18613162538396183, - "grad_norm": 0.5095042586326599, - "learning_rate": 9.954696564017481e-06, - "loss": 0.4033, - "step": 2848 - }, - { - "epoch": 0.18619698058950396, - "grad_norm": 0.5206222534179688, - "learning_rate": 9.954649652091908e-06, - "loss": 0.4295, - "step": 2849 - }, - { - "epoch": 0.18626233579504609, - "grad_norm": 0.4605466425418854, - "learning_rate": 9.954602716000792e-06, - "loss": 0.3965, - "step": 2850 - }, - { - "epoch": 0.18632769100058819, - "grad_norm": 0.4901019334793091, - "learning_rate": 9.954555755744362e-06, - "loss": 0.4067, - "step": 2851 - }, - { - "epoch": 0.18639304620613031, - "grad_norm": 0.5255517959594727, - "learning_rate": 9.954508771322846e-06, - "loss": 0.4333, - "step": 2852 - }, - { - "epoch": 0.18645840141167244, - "grad_norm": 0.4537615180015564, - "learning_rate": 9.954461762736477e-06, - "loss": 0.369, - "step": 2853 - }, - { - "epoch": 0.18652375661721457, - "grad_norm": 0.4792485237121582, - "learning_rate": 9.954414729985478e-06, - "loss": 0.3713, - "step": 2854 - }, - { - "epoch": 0.18658911182275667, - "grad_norm": 0.4976639449596405, - "learning_rate": 9.954367673070085e-06, - "loss": 0.4186, - "step": 2855 - }, - { - "epoch": 0.1866544670282988, - "grad_norm": 0.49446138739585876, - "learning_rate": 9.954320591990522e-06, - "loss": 0.4391, - "step": 2856 - }, - { - "epoch": 0.18671982223384093, - "grad_norm": 0.5113913416862488, - "learning_rate": 9.954273486747022e-06, - "loss": 0.4553, - "step": 2857 - }, - { - "epoch": 0.18678517743938305, - "grad_norm": 0.480999231338501, - "learning_rate": 9.954226357339815e-06, - "loss": 0.4054, - "step": 2858 - }, - { - "epoch": 0.18685053264492515, - "grad_norm": 0.6251979470252991, - "learning_rate": 9.954179203769128e-06, - "loss": 0.3825, - "step": 2859 - }, - { - "epoch": 0.18691588785046728, - "grad_norm": 0.48471885919570923, - "learning_rate": 9.954132026035195e-06, - "loss": 0.4072, - "step": 2860 - }, - { - "epoch": 0.1869812430560094, - "grad_norm": 0.4829366207122803, - "learning_rate": 9.954084824138243e-06, - "loss": 0.3581, - "step": 2861 - }, - { - "epoch": 0.18704659826155154, - "grad_norm": 0.5017015933990479, - "learning_rate": 9.954037598078501e-06, - "loss": 0.4154, - "step": 2862 - }, - { - "epoch": 0.18711195346709367, - "grad_norm": 0.5041373372077942, - "learning_rate": 9.953990347856203e-06, - "loss": 0.417, - "step": 2863 - }, - { - "epoch": 0.18717730867263577, - "grad_norm": 0.5277778506278992, - "learning_rate": 9.95394307347158e-06, - "loss": 0.403, - "step": 2864 - }, - { - "epoch": 0.1872426638781779, - "grad_norm": 0.5407474637031555, - "learning_rate": 9.953895774924857e-06, - "loss": 0.4373, - "step": 2865 - }, - { - "epoch": 0.18730801908372002, - "grad_norm": 0.5035947561264038, - "learning_rate": 9.95384845221627e-06, - "loss": 0.3907, - "step": 2866 - }, - { - "epoch": 0.18737337428926215, - "grad_norm": 0.5016087889671326, - "learning_rate": 9.953801105346047e-06, - "loss": 0.4095, - "step": 2867 - }, - { - "epoch": 0.18743872949480425, - "grad_norm": 0.4977370798587799, - "learning_rate": 9.95375373431442e-06, - "loss": 0.3988, - "step": 2868 - }, - { - "epoch": 0.18750408470034638, - "grad_norm": 0.5564749836921692, - "learning_rate": 9.95370633912162e-06, - "loss": 0.4316, - "step": 2869 - }, - { - "epoch": 0.1875694399058885, - "grad_norm": 0.5515015125274658, - "learning_rate": 9.95365891976788e-06, - "loss": 0.438, - "step": 2870 - }, - { - "epoch": 0.18763479511143064, - "grad_norm": 0.5201045274734497, - "learning_rate": 9.953611476253427e-06, - "loss": 0.4856, - "step": 2871 - }, - { - "epoch": 0.18770015031697274, - "grad_norm": 0.5049932599067688, - "learning_rate": 9.953564008578494e-06, - "loss": 0.4252, - "step": 2872 - }, - { - "epoch": 0.18776550552251486, - "grad_norm": 0.5389835238456726, - "learning_rate": 9.953516516743316e-06, - "loss": 0.407, - "step": 2873 - }, - { - "epoch": 0.187830860728057, - "grad_norm": 0.5269778966903687, - "learning_rate": 9.953469000748119e-06, - "loss": 0.4253, - "step": 2874 - }, - { - "epoch": 0.18789621593359912, - "grad_norm": 0.5148095488548279, - "learning_rate": 9.95342146059314e-06, - "loss": 0.3965, - "step": 2875 - }, - { - "epoch": 0.18796157113914122, - "grad_norm": 0.49052777886390686, - "learning_rate": 9.953373896278606e-06, - "loss": 0.4362, - "step": 2876 - }, - { - "epoch": 0.18802692634468335, - "grad_norm": 0.5031803250312805, - "learning_rate": 9.953326307804754e-06, - "loss": 0.464, - "step": 2877 - }, - { - "epoch": 0.18809228155022548, - "grad_norm": 0.4811420142650604, - "learning_rate": 9.953278695171813e-06, - "loss": 0.3791, - "step": 2878 - }, - { - "epoch": 0.1881576367557676, - "grad_norm": 0.5202829241752625, - "learning_rate": 9.953231058380016e-06, - "loss": 0.4402, - "step": 2879 - }, - { - "epoch": 0.1882229919613097, - "grad_norm": 0.4753965437412262, - "learning_rate": 9.953183397429594e-06, - "loss": 0.4359, - "step": 2880 - }, - { - "epoch": 0.18828834716685183, - "grad_norm": 0.5079650282859802, - "learning_rate": 9.95313571232078e-06, - "loss": 0.4241, - "step": 2881 - }, - { - "epoch": 0.18835370237239396, - "grad_norm": 0.5138829946517944, - "learning_rate": 9.95308800305381e-06, - "loss": 0.4323, - "step": 2882 - }, - { - "epoch": 0.1884190575779361, - "grad_norm": 0.47650250792503357, - "learning_rate": 9.95304026962891e-06, - "loss": 0.3822, - "step": 2883 - }, - { - "epoch": 0.18848441278347822, - "grad_norm": 0.49669456481933594, - "learning_rate": 9.95299251204632e-06, - "loss": 0.4221, - "step": 2884 - }, - { - "epoch": 0.18854976798902032, - "grad_norm": 0.47730210423469543, - "learning_rate": 9.952944730306269e-06, - "loss": 0.4177, - "step": 2885 - }, - { - "epoch": 0.18861512319456245, - "grad_norm": 0.5034216046333313, - "learning_rate": 9.95289692440899e-06, - "loss": 0.4301, - "step": 2886 - }, - { - "epoch": 0.18868047840010457, - "grad_norm": 0.47410061955451965, - "learning_rate": 9.952849094354718e-06, - "loss": 0.4136, - "step": 2887 - }, - { - "epoch": 0.1887458336056467, - "grad_norm": 0.502212643623352, - "learning_rate": 9.952801240143683e-06, - "loss": 0.4182, - "step": 2888 - }, - { - "epoch": 0.1888111888111888, - "grad_norm": 0.47118884325027466, - "learning_rate": 9.952753361776122e-06, - "loss": 0.4227, - "step": 2889 - }, - { - "epoch": 0.18887654401673093, - "grad_norm": 0.49516400694847107, - "learning_rate": 9.952705459252268e-06, - "loss": 0.4109, - "step": 2890 - }, - { - "epoch": 0.18894189922227306, - "grad_norm": 0.4778333902359009, - "learning_rate": 9.952657532572351e-06, - "loss": 0.3907, - "step": 2891 - }, - { - "epoch": 0.18900725442781519, - "grad_norm": 0.5070626139640808, - "learning_rate": 9.95260958173661e-06, - "loss": 0.4215, - "step": 2892 - }, - { - "epoch": 0.18907260963335729, - "grad_norm": 0.4762381613254547, - "learning_rate": 9.952561606745276e-06, - "loss": 0.3843, - "step": 2893 - }, - { - "epoch": 0.18913796483889941, - "grad_norm": 0.5275992751121521, - "learning_rate": 9.952513607598582e-06, - "loss": 0.4819, - "step": 2894 - }, - { - "epoch": 0.18920332004444154, - "grad_norm": 0.47402840852737427, - "learning_rate": 9.952465584296764e-06, - "loss": 0.3812, - "step": 2895 - }, - { - "epoch": 0.18926867524998367, - "grad_norm": 0.4844459295272827, - "learning_rate": 9.952417536840056e-06, - "loss": 0.4263, - "step": 2896 - }, - { - "epoch": 0.18933403045552577, - "grad_norm": 0.4950529634952545, - "learning_rate": 9.952369465228692e-06, - "loss": 0.4427, - "step": 2897 - }, - { - "epoch": 0.1893993856610679, - "grad_norm": 0.4626738727092743, - "learning_rate": 9.952321369462906e-06, - "loss": 0.377, - "step": 2898 - }, - { - "epoch": 0.18946474086661003, - "grad_norm": 0.49446913599967957, - "learning_rate": 9.952273249542934e-06, - "loss": 0.4158, - "step": 2899 - }, - { - "epoch": 0.18953009607215215, - "grad_norm": 0.47827136516571045, - "learning_rate": 9.952225105469008e-06, - "loss": 0.4088, - "step": 2900 - }, - { - "epoch": 0.18959545127769425, - "grad_norm": 0.47055137157440186, - "learning_rate": 9.952176937241367e-06, - "loss": 0.3736, - "step": 2901 - }, - { - "epoch": 0.18966080648323638, - "grad_norm": 0.5122933983802795, - "learning_rate": 9.95212874486024e-06, - "loss": 0.4083, - "step": 2902 - }, - { - "epoch": 0.1897261616887785, - "grad_norm": 0.49091583490371704, - "learning_rate": 9.952080528325868e-06, - "loss": 0.4331, - "step": 2903 - }, - { - "epoch": 0.18979151689432064, - "grad_norm": 0.5090427994728088, - "learning_rate": 9.952032287638484e-06, - "loss": 0.3838, - "step": 2904 - }, - { - "epoch": 0.18985687209986277, - "grad_norm": 0.5443885326385498, - "learning_rate": 9.951984022798322e-06, - "loss": 0.47, - "step": 2905 - }, - { - "epoch": 0.18992222730540487, - "grad_norm": 0.4834836423397064, - "learning_rate": 9.95193573380562e-06, - "loss": 0.4003, - "step": 2906 - }, - { - "epoch": 0.189987582510947, - "grad_norm": 0.49716243147850037, - "learning_rate": 9.951887420660609e-06, - "loss": 0.3943, - "step": 2907 - }, - { - "epoch": 0.19005293771648912, - "grad_norm": 0.4701566994190216, - "learning_rate": 9.95183908336353e-06, - "loss": 0.3968, - "step": 2908 - }, - { - "epoch": 0.19011829292203125, - "grad_norm": 0.5198107957839966, - "learning_rate": 9.951790721914615e-06, - "loss": 0.484, - "step": 2909 - }, - { - "epoch": 0.19018364812757335, - "grad_norm": 0.5403050780296326, - "learning_rate": 9.951742336314101e-06, - "loss": 0.4833, - "step": 2910 - }, - { - "epoch": 0.19024900333311548, - "grad_norm": 0.9144891500473022, - "learning_rate": 9.951693926562225e-06, - "loss": 0.3682, - "step": 2911 - }, - { - "epoch": 0.1903143585386576, - "grad_norm": 0.5206704139709473, - "learning_rate": 9.951645492659222e-06, - "loss": 0.4376, - "step": 2912 - }, - { - "epoch": 0.19037971374419974, - "grad_norm": 0.48614439368247986, - "learning_rate": 9.95159703460533e-06, - "loss": 0.4016, - "step": 2913 - }, - { - "epoch": 0.19044506894974184, - "grad_norm": 0.526782214641571, - "learning_rate": 9.95154855240078e-06, - "loss": 0.4798, - "step": 2914 - }, - { - "epoch": 0.19051042415528396, - "grad_norm": 0.47169598937034607, - "learning_rate": 9.951500046045815e-06, - "loss": 0.3736, - "step": 2915 - }, - { - "epoch": 0.1905757793608261, - "grad_norm": 0.4730372428894043, - "learning_rate": 9.95145151554067e-06, - "loss": 0.4016, - "step": 2916 - }, - { - "epoch": 0.19064113456636822, - "grad_norm": 0.5132974982261658, - "learning_rate": 9.95140296088558e-06, - "loss": 0.4528, - "step": 2917 - }, - { - "epoch": 0.19070648977191032, - "grad_norm": 0.511885404586792, - "learning_rate": 9.95135438208078e-06, - "loss": 0.431, - "step": 2918 - }, - { - "epoch": 0.19077184497745245, - "grad_norm": 0.5020025968551636, - "learning_rate": 9.951305779126512e-06, - "loss": 0.421, - "step": 2919 - }, - { - "epoch": 0.19083720018299458, - "grad_norm": 0.4954475164413452, - "learning_rate": 9.951257152023008e-06, - "loss": 0.4234, - "step": 2920 - }, - { - "epoch": 0.1909025553885367, - "grad_norm": 0.4862518906593323, - "learning_rate": 9.951208500770509e-06, - "loss": 0.3815, - "step": 2921 - }, - { - "epoch": 0.1909679105940788, - "grad_norm": 0.4983312487602234, - "learning_rate": 9.951159825369251e-06, - "loss": 0.4354, - "step": 2922 - }, - { - "epoch": 0.19103326579962093, - "grad_norm": 0.5410768985748291, - "learning_rate": 9.951111125819472e-06, - "loss": 0.4781, - "step": 2923 - }, - { - "epoch": 0.19109862100516306, - "grad_norm": 0.46137362718582153, - "learning_rate": 9.951062402121407e-06, - "loss": 0.3955, - "step": 2924 - }, - { - "epoch": 0.1911639762107052, - "grad_norm": 0.4605845510959625, - "learning_rate": 9.951013654275297e-06, - "loss": 0.3869, - "step": 2925 - }, - { - "epoch": 0.19122933141624732, - "grad_norm": 0.4990957975387573, - "learning_rate": 9.950964882281378e-06, - "loss": 0.4599, - "step": 2926 - }, - { - "epoch": 0.19129468662178942, - "grad_norm": 0.46196988224983215, - "learning_rate": 9.950916086139888e-06, - "loss": 0.3958, - "step": 2927 - }, - { - "epoch": 0.19136004182733155, - "grad_norm": 0.5079265832901001, - "learning_rate": 9.950867265851065e-06, - "loss": 0.4342, - "step": 2928 - }, - { - "epoch": 0.19142539703287367, - "grad_norm": 0.49464908242225647, - "learning_rate": 9.950818421415146e-06, - "loss": 0.3949, - "step": 2929 - }, - { - "epoch": 0.1914907522384158, - "grad_norm": 0.491862028837204, - "learning_rate": 9.950769552832372e-06, - "loss": 0.429, - "step": 2930 - }, - { - "epoch": 0.1915561074439579, - "grad_norm": 0.4822671115398407, - "learning_rate": 9.95072066010298e-06, - "loss": 0.4328, - "step": 2931 - }, - { - "epoch": 0.19162146264950003, - "grad_norm": 0.49650049209594727, - "learning_rate": 9.950671743227206e-06, - "loss": 0.4325, - "step": 2932 - }, - { - "epoch": 0.19168681785504216, - "grad_norm": 0.4699820280075073, - "learning_rate": 9.950622802205295e-06, - "loss": 0.3996, - "step": 2933 - }, - { - "epoch": 0.19175217306058429, - "grad_norm": 0.48909181356430054, - "learning_rate": 9.950573837037478e-06, - "loss": 0.3859, - "step": 2934 - }, - { - "epoch": 0.19181752826612639, - "grad_norm": 0.4794492721557617, - "learning_rate": 9.950524847723997e-06, - "loss": 0.3664, - "step": 2935 - }, - { - "epoch": 0.19188288347166851, - "grad_norm": 0.5136559009552002, - "learning_rate": 9.950475834265093e-06, - "loss": 0.4798, - "step": 2936 - }, - { - "epoch": 0.19194823867721064, - "grad_norm": 0.4615060091018677, - "learning_rate": 9.950426796661004e-06, - "loss": 0.3884, - "step": 2937 - }, - { - "epoch": 0.19201359388275277, - "grad_norm": 0.47666165232658386, - "learning_rate": 9.950377734911966e-06, - "loss": 0.4247, - "step": 2938 - }, - { - "epoch": 0.19207894908829487, - "grad_norm": 0.4553219676017761, - "learning_rate": 9.950328649018223e-06, - "loss": 0.3678, - "step": 2939 - }, - { - "epoch": 0.192144304293837, - "grad_norm": 0.49629679322242737, - "learning_rate": 9.950279538980012e-06, - "loss": 0.4022, - "step": 2940 - }, - { - "epoch": 0.19220965949937913, - "grad_norm": 0.44633710384368896, - "learning_rate": 9.950230404797571e-06, - "loss": 0.3578, - "step": 2941 - }, - { - "epoch": 0.19227501470492125, - "grad_norm": 0.4926506280899048, - "learning_rate": 9.950181246471143e-06, - "loss": 0.4071, - "step": 2942 - }, - { - "epoch": 0.19234036991046335, - "grad_norm": 0.48583802580833435, - "learning_rate": 9.950132064000967e-06, - "loss": 0.4112, - "step": 2943 - }, - { - "epoch": 0.19240572511600548, - "grad_norm": 0.4745608866214752, - "learning_rate": 9.95008285738728e-06, - "loss": 0.3989, - "step": 2944 - }, - { - "epoch": 0.1924710803215476, - "grad_norm": 0.4737425744533539, - "learning_rate": 9.950033626630324e-06, - "loss": 0.4038, - "step": 2945 - }, - { - "epoch": 0.19253643552708974, - "grad_norm": 0.4979565143585205, - "learning_rate": 9.94998437173034e-06, - "loss": 0.3971, - "step": 2946 - }, - { - "epoch": 0.19260179073263187, - "grad_norm": 0.49709323048591614, - "learning_rate": 9.949935092687566e-06, - "loss": 0.4702, - "step": 2947 - }, - { - "epoch": 0.19266714593817397, - "grad_norm": 0.45512285828590393, - "learning_rate": 9.949885789502246e-06, - "loss": 0.3935, - "step": 2948 - }, - { - "epoch": 0.1927325011437161, - "grad_norm": 0.5323231220245361, - "learning_rate": 9.949836462174618e-06, - "loss": 0.4397, - "step": 2949 - }, - { - "epoch": 0.19279785634925822, - "grad_norm": 0.5058055520057678, - "learning_rate": 9.949787110704921e-06, - "loss": 0.4414, - "step": 2950 - }, - { - "epoch": 0.19286321155480035, - "grad_norm": 0.4774121344089508, - "learning_rate": 9.949737735093398e-06, - "loss": 0.3904, - "step": 2951 - }, - { - "epoch": 0.19292856676034245, - "grad_norm": 0.4506870210170746, - "learning_rate": 9.94968833534029e-06, - "loss": 0.3784, - "step": 2952 - }, - { - "epoch": 0.19299392196588458, - "grad_norm": 0.4744364321231842, - "learning_rate": 9.949638911445838e-06, - "loss": 0.3831, - "step": 2953 - }, - { - "epoch": 0.1930592771714267, - "grad_norm": 0.5002384781837463, - "learning_rate": 9.94958946341028e-06, - "loss": 0.4185, - "step": 2954 - }, - { - "epoch": 0.19312463237696884, - "grad_norm": 0.49564677476882935, - "learning_rate": 9.949539991233863e-06, - "loss": 0.4085, - "step": 2955 - }, - { - "epoch": 0.19318998758251094, - "grad_norm": 0.5562955737113953, - "learning_rate": 9.949490494916822e-06, - "loss": 0.4773, - "step": 2956 - }, - { - "epoch": 0.19325534278805306, - "grad_norm": 0.4735763967037201, - "learning_rate": 9.9494409744594e-06, - "loss": 0.3838, - "step": 2957 - }, - { - "epoch": 0.1933206979935952, - "grad_norm": 0.4815181791782379, - "learning_rate": 9.949391429861843e-06, - "loss": 0.4107, - "step": 2958 - }, - { - "epoch": 0.19338605319913732, - "grad_norm": 0.4927268624305725, - "learning_rate": 9.94934186112439e-06, - "loss": 0.4567, - "step": 2959 - }, - { - "epoch": 0.19345140840467942, - "grad_norm": 0.4868094325065613, - "learning_rate": 9.949292268247279e-06, - "loss": 0.4535, - "step": 2960 - }, - { - "epoch": 0.19351676361022155, - "grad_norm": 0.4772026836872101, - "learning_rate": 9.949242651230756e-06, - "loss": 0.4221, - "step": 2961 - }, - { - "epoch": 0.19358211881576368, - "grad_norm": 0.49434521794319153, - "learning_rate": 9.949193010075063e-06, - "loss": 0.439, - "step": 2962 - }, - { - "epoch": 0.1936474740213058, - "grad_norm": 0.4620160758495331, - "learning_rate": 9.949143344780438e-06, - "loss": 0.4054, - "step": 2963 - }, - { - "epoch": 0.1937128292268479, - "grad_norm": 0.46887150406837463, - "learning_rate": 9.949093655347128e-06, - "loss": 0.4319, - "step": 2964 - }, - { - "epoch": 0.19377818443239003, - "grad_norm": 0.5082137584686279, - "learning_rate": 9.949043941775376e-06, - "loss": 0.4468, - "step": 2965 - }, - { - "epoch": 0.19384353963793216, - "grad_norm": 0.5001013278961182, - "learning_rate": 9.94899420406542e-06, - "loss": 0.4206, - "step": 2966 - }, - { - "epoch": 0.1939088948434743, - "grad_norm": 0.4783688187599182, - "learning_rate": 9.948944442217505e-06, - "loss": 0.4112, - "step": 2967 - }, - { - "epoch": 0.19397425004901642, - "grad_norm": 0.4642834961414337, - "learning_rate": 9.948894656231873e-06, - "loss": 0.4158, - "step": 2968 - }, - { - "epoch": 0.19403960525455852, - "grad_norm": 0.49629834294319153, - "learning_rate": 9.948844846108769e-06, - "loss": 0.4633, - "step": 2969 - }, - { - "epoch": 0.19410496046010065, - "grad_norm": 0.5198444724082947, - "learning_rate": 9.948795011848434e-06, - "loss": 0.4758, - "step": 2970 - }, - { - "epoch": 0.19417031566564277, - "grad_norm": 0.4808283746242523, - "learning_rate": 9.94874515345111e-06, - "loss": 0.4345, - "step": 2971 - }, - { - "epoch": 0.1942356708711849, - "grad_norm": 0.4733369052410126, - "learning_rate": 9.948695270917042e-06, - "loss": 0.4091, - "step": 2972 - }, - { - "epoch": 0.194301026076727, - "grad_norm": 0.49000704288482666, - "learning_rate": 9.948645364246473e-06, - "loss": 0.4366, - "step": 2973 - }, - { - "epoch": 0.19436638128226913, - "grad_norm": 0.49197790026664734, - "learning_rate": 9.948595433439645e-06, - "loss": 0.445, - "step": 2974 - }, - { - "epoch": 0.19443173648781126, - "grad_norm": 0.5117666125297546, - "learning_rate": 9.948545478496804e-06, - "loss": 0.4724, - "step": 2975 - }, - { - "epoch": 0.19449709169335339, - "grad_norm": 0.5310428142547607, - "learning_rate": 9.94849549941819e-06, - "loss": 0.4818, - "step": 2976 - }, - { - "epoch": 0.19456244689889549, - "grad_norm": 0.5212787389755249, - "learning_rate": 9.948445496204053e-06, - "loss": 0.4772, - "step": 2977 - }, - { - "epoch": 0.1946278021044376, - "grad_norm": 0.48359397053718567, - "learning_rate": 9.948395468854631e-06, - "loss": 0.4264, - "step": 2978 - }, - { - "epoch": 0.19469315730997974, - "grad_norm": 0.45742395520210266, - "learning_rate": 9.948345417370171e-06, - "loss": 0.3659, - "step": 2979 - }, - { - "epoch": 0.19475851251552187, - "grad_norm": 0.48544666171073914, - "learning_rate": 9.948295341750915e-06, - "loss": 0.407, - "step": 2980 - }, - { - "epoch": 0.19482386772106397, - "grad_norm": 0.4681014120578766, - "learning_rate": 9.948245241997109e-06, - "loss": 0.3803, - "step": 2981 - }, - { - "epoch": 0.1948892229266061, - "grad_norm": 0.4730170965194702, - "learning_rate": 9.948195118108997e-06, - "loss": 0.4058, - "step": 2982 - }, - { - "epoch": 0.19495457813214823, - "grad_norm": 0.5104221105575562, - "learning_rate": 9.948144970086822e-06, - "loss": 0.4847, - "step": 2983 - }, - { - "epoch": 0.19501993333769035, - "grad_norm": 0.47281453013420105, - "learning_rate": 9.94809479793083e-06, - "loss": 0.4039, - "step": 2984 - }, - { - "epoch": 0.19508528854323245, - "grad_norm": 0.4699348509311676, - "learning_rate": 9.948044601641266e-06, - "loss": 0.3886, - "step": 2985 - }, - { - "epoch": 0.19515064374877458, - "grad_norm": 0.488786518573761, - "learning_rate": 9.947994381218373e-06, - "loss": 0.4694, - "step": 2986 - }, - { - "epoch": 0.1952159989543167, - "grad_norm": 0.5252128839492798, - "learning_rate": 9.9479441366624e-06, - "loss": 0.416, - "step": 2987 - }, - { - "epoch": 0.19528135415985884, - "grad_norm": 0.5052991509437561, - "learning_rate": 9.947893867973586e-06, - "loss": 0.4482, - "step": 2988 - }, - { - "epoch": 0.19534670936540097, - "grad_norm": 0.47287124395370483, - "learning_rate": 9.947843575152182e-06, - "loss": 0.3973, - "step": 2989 - }, - { - "epoch": 0.19541206457094307, - "grad_norm": 0.4809347987174988, - "learning_rate": 9.94779325819843e-06, - "loss": 0.3785, - "step": 2990 - }, - { - "epoch": 0.1954774197764852, - "grad_norm": 0.4892086982727051, - "learning_rate": 9.947742917112577e-06, - "loss": 0.4301, - "step": 2991 - }, - { - "epoch": 0.19554277498202732, - "grad_norm": 0.49457159638404846, - "learning_rate": 9.947692551894867e-06, - "loss": 0.4404, - "step": 2992 - }, - { - "epoch": 0.19560813018756945, - "grad_norm": 0.4926825761795044, - "learning_rate": 9.947642162545546e-06, - "loss": 0.3892, - "step": 2993 - }, - { - "epoch": 0.19567348539311155, - "grad_norm": 0.49925148487091064, - "learning_rate": 9.94759174906486e-06, - "loss": 0.4404, - "step": 2994 - }, - { - "epoch": 0.19573884059865368, - "grad_norm": 0.5001039505004883, - "learning_rate": 9.947541311453056e-06, - "loss": 0.4217, - "step": 2995 - }, - { - "epoch": 0.1958041958041958, - "grad_norm": 0.4720327854156494, - "learning_rate": 9.947490849710378e-06, - "loss": 0.4309, - "step": 2996 - }, - { - "epoch": 0.19586955100973794, - "grad_norm": 0.4850936532020569, - "learning_rate": 9.947440363837073e-06, - "loss": 0.3798, - "step": 2997 - }, - { - "epoch": 0.19593490621528004, - "grad_norm": 0.5073444247245789, - "learning_rate": 9.947389853833389e-06, - "loss": 0.4547, - "step": 2998 - }, - { - "epoch": 0.19600026142082216, - "grad_norm": 0.46684056520462036, - "learning_rate": 9.94733931969957e-06, - "loss": 0.402, - "step": 2999 - }, - { - "epoch": 0.1960656166263643, - "grad_norm": 0.5287966132164001, - "learning_rate": 9.947288761435863e-06, - "loss": 0.4397, - "step": 3000 - }, - { - "epoch": 0.19613097183190642, - "grad_norm": 0.47407302260398865, - "learning_rate": 9.947238179042515e-06, - "loss": 0.4266, - "step": 3001 - }, - { - "epoch": 0.19619632703744852, - "grad_norm": 0.5004463195800781, - "learning_rate": 9.947187572519772e-06, - "loss": 0.3904, - "step": 3002 - }, - { - "epoch": 0.19626168224299065, - "grad_norm": 0.4812561273574829, - "learning_rate": 9.947136941867881e-06, - "loss": 0.4051, - "step": 3003 - }, - { - "epoch": 0.19632703744853278, - "grad_norm": 0.47599342465400696, - "learning_rate": 9.947086287087091e-06, - "loss": 0.3976, - "step": 3004 - }, - { - "epoch": 0.1963923926540749, - "grad_norm": 0.4551607370376587, - "learning_rate": 9.947035608177646e-06, - "loss": 0.3557, - "step": 3005 - }, - { - "epoch": 0.19645774785961703, - "grad_norm": 0.4981020390987396, - "learning_rate": 9.946984905139793e-06, - "loss": 0.4282, - "step": 3006 - }, - { - "epoch": 0.19652310306515913, - "grad_norm": 0.48331186175346375, - "learning_rate": 9.946934177973783e-06, - "loss": 0.4099, - "step": 3007 - }, - { - "epoch": 0.19658845827070126, - "grad_norm": 0.47015613317489624, - "learning_rate": 9.946883426679862e-06, - "loss": 0.4361, - "step": 3008 - }, - { - "epoch": 0.1966538134762434, - "grad_norm": 0.45380699634552, - "learning_rate": 9.946832651258277e-06, - "loss": 0.4162, - "step": 3009 - }, - { - "epoch": 0.19671916868178552, - "grad_norm": 0.4749274253845215, - "learning_rate": 9.946781851709273e-06, - "loss": 0.3918, - "step": 3010 - }, - { - "epoch": 0.19678452388732762, - "grad_norm": 0.4943445920944214, - "learning_rate": 9.946731028033102e-06, - "loss": 0.4397, - "step": 3011 - }, - { - "epoch": 0.19684987909286975, - "grad_norm": 0.5177479982376099, - "learning_rate": 9.946680180230008e-06, - "loss": 0.441, - "step": 3012 - }, - { - "epoch": 0.19691523429841187, - "grad_norm": 0.43935540318489075, - "learning_rate": 9.946629308300242e-06, - "loss": 0.3378, - "step": 3013 - }, - { - "epoch": 0.196980589503954, - "grad_norm": 0.47517162561416626, - "learning_rate": 9.946578412244053e-06, - "loss": 0.4125, - "step": 3014 - }, - { - "epoch": 0.1970459447094961, - "grad_norm": 0.4808422327041626, - "learning_rate": 9.946527492061686e-06, - "loss": 0.4199, - "step": 3015 - }, - { - "epoch": 0.19711129991503823, - "grad_norm": 0.48982810974121094, - "learning_rate": 9.94647654775339e-06, - "loss": 0.4366, - "step": 3016 - }, - { - "epoch": 0.19717665512058036, - "grad_norm": 0.45494890213012695, - "learning_rate": 9.946425579319414e-06, - "loss": 0.377, - "step": 3017 - }, - { - "epoch": 0.19724201032612249, - "grad_norm": 0.46580860018730164, - "learning_rate": 9.946374586760008e-06, - "loss": 0.3914, - "step": 3018 - }, - { - "epoch": 0.19730736553166459, - "grad_norm": 0.48293864727020264, - "learning_rate": 9.94632357007542e-06, - "loss": 0.4335, - "step": 3019 - }, - { - "epoch": 0.1973727207372067, - "grad_norm": 0.48486408591270447, - "learning_rate": 9.946272529265898e-06, - "loss": 0.4141, - "step": 3020 - }, - { - "epoch": 0.19743807594274884, - "grad_norm": 0.4494650363922119, - "learning_rate": 9.946221464331692e-06, - "loss": 0.3404, - "step": 3021 - }, - { - "epoch": 0.19750343114829097, - "grad_norm": 0.48047712445259094, - "learning_rate": 9.946170375273047e-06, - "loss": 0.4371, - "step": 3022 - }, - { - "epoch": 0.19756878635383307, - "grad_norm": 0.5063717365264893, - "learning_rate": 9.946119262090218e-06, - "loss": 0.4458, - "step": 3023 - }, - { - "epoch": 0.1976341415593752, - "grad_norm": 0.46469295024871826, - "learning_rate": 9.946068124783454e-06, - "loss": 0.4195, - "step": 3024 - }, - { - "epoch": 0.19769949676491733, - "grad_norm": 0.49715572595596313, - "learning_rate": 9.946016963353e-06, - "loss": 0.4232, - "step": 3025 - }, - { - "epoch": 0.19776485197045945, - "grad_norm": 0.467678427696228, - "learning_rate": 9.945965777799107e-06, - "loss": 0.4288, - "step": 3026 - }, - { - "epoch": 0.19783020717600158, - "grad_norm": 0.5184245705604553, - "learning_rate": 9.945914568122026e-06, - "loss": 0.4764, - "step": 3027 - }, - { - "epoch": 0.19789556238154368, - "grad_norm": 0.48791640996932983, - "learning_rate": 9.945863334322008e-06, - "loss": 0.3664, - "step": 3028 - }, - { - "epoch": 0.1979609175870858, - "grad_norm": 0.4951387941837311, - "learning_rate": 9.945812076399299e-06, - "loss": 0.4006, - "step": 3029 - }, - { - "epoch": 0.19802627279262794, - "grad_norm": 0.49412912130355835, - "learning_rate": 9.945760794354152e-06, - "loss": 0.4226, - "step": 3030 - }, - { - "epoch": 0.19809162799817007, - "grad_norm": 0.43828338384628296, - "learning_rate": 9.945709488186817e-06, - "loss": 0.3588, - "step": 3031 - }, - { - "epoch": 0.19815698320371217, - "grad_norm": 0.46683600544929504, - "learning_rate": 9.945658157897542e-06, - "loss": 0.4145, - "step": 3032 - }, - { - "epoch": 0.1982223384092543, - "grad_norm": 0.47453755140304565, - "learning_rate": 9.945606803486578e-06, - "loss": 0.4173, - "step": 3033 - }, - { - "epoch": 0.19828769361479642, - "grad_norm": 0.4920898377895355, - "learning_rate": 9.945555424954179e-06, - "loss": 0.4405, - "step": 3034 - }, - { - "epoch": 0.19835304882033855, - "grad_norm": 0.4704682230949402, - "learning_rate": 9.945504022300591e-06, - "loss": 0.3836, - "step": 3035 - }, - { - "epoch": 0.19841840402588065, - "grad_norm": 0.48477762937545776, - "learning_rate": 9.945452595526067e-06, - "loss": 0.3873, - "step": 3036 - }, - { - "epoch": 0.19848375923142278, - "grad_norm": 0.48065242171287537, - "learning_rate": 9.945401144630858e-06, - "loss": 0.3985, - "step": 3037 - }, - { - "epoch": 0.1985491144369649, - "grad_norm": 0.49803870916366577, - "learning_rate": 9.945349669615214e-06, - "loss": 0.45, - "step": 3038 - }, - { - "epoch": 0.19861446964250704, - "grad_norm": 0.5130840539932251, - "learning_rate": 9.945298170479388e-06, - "loss": 0.4503, - "step": 3039 - }, - { - "epoch": 0.19867982484804914, - "grad_norm": 0.49603888392448425, - "learning_rate": 9.945246647223626e-06, - "loss": 0.4267, - "step": 3040 - }, - { - "epoch": 0.19874518005359126, - "grad_norm": 0.4903644323348999, - "learning_rate": 9.945195099848185e-06, - "loss": 0.4383, - "step": 3041 - }, - { - "epoch": 0.1988105352591334, - "grad_norm": 0.5150305032730103, - "learning_rate": 9.945143528353315e-06, - "loss": 0.4184, - "step": 3042 - }, - { - "epoch": 0.19887589046467552, - "grad_norm": 0.5166090130805969, - "learning_rate": 9.945091932739266e-06, - "loss": 0.3575, - "step": 3043 - }, - { - "epoch": 0.19894124567021762, - "grad_norm": 0.5110989809036255, - "learning_rate": 9.94504031300629e-06, - "loss": 0.4477, - "step": 3044 - }, - { - "epoch": 0.19900660087575975, - "grad_norm": 0.5148541927337646, - "learning_rate": 9.94498866915464e-06, - "loss": 0.4442, - "step": 3045 - }, - { - "epoch": 0.19907195608130188, - "grad_norm": 0.5030977725982666, - "learning_rate": 9.944937001184567e-06, - "loss": 0.4418, - "step": 3046 - }, - { - "epoch": 0.199137311286844, - "grad_norm": 0.5268440246582031, - "learning_rate": 9.944885309096323e-06, - "loss": 0.4133, - "step": 3047 - }, - { - "epoch": 0.19920266649238613, - "grad_norm": 0.49540185928344727, - "learning_rate": 9.94483359289016e-06, - "loss": 0.451, - "step": 3048 - }, - { - "epoch": 0.19926802169792823, - "grad_norm": 0.4891604483127594, - "learning_rate": 9.944781852566332e-06, - "loss": 0.4171, - "step": 3049 - }, - { - "epoch": 0.19933337690347036, - "grad_norm": 0.48614418506622314, - "learning_rate": 9.944730088125088e-06, - "loss": 0.3818, - "step": 3050 - }, - { - "epoch": 0.1993987321090125, - "grad_norm": 0.4653158485889435, - "learning_rate": 9.944678299566683e-06, - "loss": 0.4341, - "step": 3051 - }, - { - "epoch": 0.19946408731455462, - "grad_norm": 0.5147649645805359, - "learning_rate": 9.94462648689137e-06, - "loss": 0.4217, - "step": 3052 - }, - { - "epoch": 0.19952944252009672, - "grad_norm": 0.4894815981388092, - "learning_rate": 9.944574650099398e-06, - "loss": 0.4613, - "step": 3053 - }, - { - "epoch": 0.19959479772563885, - "grad_norm": 0.4685996472835541, - "learning_rate": 9.944522789191025e-06, - "loss": 0.4172, - "step": 3054 - }, - { - "epoch": 0.19966015293118097, - "grad_norm": 0.4641586244106293, - "learning_rate": 9.944470904166501e-06, - "loss": 0.3646, - "step": 3055 - }, - { - "epoch": 0.1997255081367231, - "grad_norm": 0.7271279096603394, - "learning_rate": 9.94441899502608e-06, - "loss": 0.4043, - "step": 3056 - }, - { - "epoch": 0.1997908633422652, - "grad_norm": 0.4952391982078552, - "learning_rate": 9.944367061770012e-06, - "loss": 0.4222, - "step": 3057 - }, - { - "epoch": 0.19985621854780733, - "grad_norm": 0.4307084381580353, - "learning_rate": 9.944315104398556e-06, - "loss": 0.3664, - "step": 3058 - }, - { - "epoch": 0.19992157375334946, - "grad_norm": 0.5306459665298462, - "learning_rate": 9.94426312291196e-06, - "loss": 0.4592, - "step": 3059 - }, - { - "epoch": 0.19998692895889159, - "grad_norm": 0.4990803301334381, - "learning_rate": 9.94421111731048e-06, - "loss": 0.4043, - "step": 3060 - }, - { - "epoch": 0.20005228416443369, - "grad_norm": 0.45972147583961487, - "learning_rate": 9.944159087594372e-06, - "loss": 0.3694, - "step": 3061 - }, - { - "epoch": 0.2001176393699758, - "grad_norm": 0.4808419942855835, - "learning_rate": 9.944107033763886e-06, - "loss": 0.4052, - "step": 3062 - }, - { - "epoch": 0.20018299457551794, - "grad_norm": 0.5073785781860352, - "learning_rate": 9.944054955819275e-06, - "loss": 0.4372, - "step": 3063 - }, - { - "epoch": 0.20024834978106007, - "grad_norm": 0.5173817873001099, - "learning_rate": 9.944002853760798e-06, - "loss": 0.4257, - "step": 3064 - }, - { - "epoch": 0.20031370498660217, - "grad_norm": 0.5190584063529968, - "learning_rate": 9.943950727588706e-06, - "loss": 0.4242, - "step": 3065 - }, - { - "epoch": 0.2003790601921443, - "grad_norm": 0.4993712306022644, - "learning_rate": 9.943898577303253e-06, - "loss": 0.3869, - "step": 3066 - }, - { - "epoch": 0.20044441539768643, - "grad_norm": 0.5062596201896667, - "learning_rate": 9.943846402904693e-06, - "loss": 0.4487, - "step": 3067 - }, - { - "epoch": 0.20050977060322855, - "grad_norm": 0.47708597779273987, - "learning_rate": 9.943794204393282e-06, - "loss": 0.424, - "step": 3068 - }, - { - "epoch": 0.20057512580877068, - "grad_norm": 0.470600426197052, - "learning_rate": 9.943741981769275e-06, - "loss": 0.4098, - "step": 3069 - }, - { - "epoch": 0.20064048101431278, - "grad_norm": 0.47591736912727356, - "learning_rate": 9.943689735032926e-06, - "loss": 0.3667, - "step": 3070 - }, - { - "epoch": 0.2007058362198549, - "grad_norm": 0.48058611154556274, - "learning_rate": 9.943637464184488e-06, - "loss": 0.3966, - "step": 3071 - }, - { - "epoch": 0.20077119142539704, - "grad_norm": 0.4728534519672394, - "learning_rate": 9.943585169224216e-06, - "loss": 0.4437, - "step": 3072 - }, - { - "epoch": 0.20083654663093917, - "grad_norm": 0.4847133457660675, - "learning_rate": 9.94353285015237e-06, - "loss": 0.4125, - "step": 3073 - }, - { - "epoch": 0.20090190183648127, - "grad_norm": 0.5271087288856506, - "learning_rate": 9.9434805069692e-06, - "loss": 0.4308, - "step": 3074 - }, - { - "epoch": 0.2009672570420234, - "grad_norm": 0.4755455553531647, - "learning_rate": 9.943428139674963e-06, - "loss": 0.3931, - "step": 3075 - }, - { - "epoch": 0.20103261224756552, - "grad_norm": 0.466139018535614, - "learning_rate": 9.943375748269914e-06, - "loss": 0.378, - "step": 3076 - }, - { - "epoch": 0.20109796745310765, - "grad_norm": 0.47393783926963806, - "learning_rate": 9.94332333275431e-06, - "loss": 0.4291, - "step": 3077 - }, - { - "epoch": 0.20116332265864975, - "grad_norm": 0.5035730004310608, - "learning_rate": 9.943270893128405e-06, - "loss": 0.4132, - "step": 3078 - }, - { - "epoch": 0.20122867786419188, - "grad_norm": 0.49975332617759705, - "learning_rate": 9.943218429392456e-06, - "loss": 0.3801, - "step": 3079 - }, - { - "epoch": 0.201294033069734, - "grad_norm": 0.523624837398529, - "learning_rate": 9.943165941546717e-06, - "loss": 0.4561, - "step": 3080 - }, - { - "epoch": 0.20135938827527614, - "grad_norm": 0.5602368712425232, - "learning_rate": 9.943113429591445e-06, - "loss": 0.4861, - "step": 3081 - }, - { - "epoch": 0.20142474348081824, - "grad_norm": 0.4771738052368164, - "learning_rate": 9.943060893526896e-06, - "loss": 0.4053, - "step": 3082 - }, - { - "epoch": 0.20149009868636036, - "grad_norm": 0.4846934676170349, - "learning_rate": 9.94300833335333e-06, - "loss": 0.4158, - "step": 3083 - }, - { - "epoch": 0.2015554538919025, - "grad_norm": 0.47020387649536133, - "learning_rate": 9.942955749070996e-06, - "loss": 0.3534, - "step": 3084 - }, - { - "epoch": 0.20162080909744462, - "grad_norm": 0.4568374454975128, - "learning_rate": 9.942903140680157e-06, - "loss": 0.3812, - "step": 3085 - }, - { - "epoch": 0.20168616430298672, - "grad_norm": 0.45790353417396545, - "learning_rate": 9.942850508181065e-06, - "loss": 0.3674, - "step": 3086 - }, - { - "epoch": 0.20175151950852885, - "grad_norm": 0.6400083303451538, - "learning_rate": 9.942797851573982e-06, - "loss": 0.3992, - "step": 3087 - }, - { - "epoch": 0.20181687471407098, - "grad_norm": 0.44926542043685913, - "learning_rate": 9.942745170859158e-06, - "loss": 0.4104, - "step": 3088 - }, - { - "epoch": 0.2018822299196131, - "grad_norm": 0.5154117345809937, - "learning_rate": 9.942692466036854e-06, - "loss": 0.4709, - "step": 3089 - }, - { - "epoch": 0.20194758512515523, - "grad_norm": 0.5078647136688232, - "learning_rate": 9.942639737107327e-06, - "loss": 0.4232, - "step": 3090 - }, - { - "epoch": 0.20201294033069733, - "grad_norm": 0.4890657961368561, - "learning_rate": 9.942586984070834e-06, - "loss": 0.4083, - "step": 3091 - }, - { - "epoch": 0.20207829553623946, - "grad_norm": 0.4696938991546631, - "learning_rate": 9.942534206927631e-06, - "loss": 0.3894, - "step": 3092 - }, - { - "epoch": 0.2021436507417816, - "grad_norm": 0.48734769225120544, - "learning_rate": 9.94248140567798e-06, - "loss": 0.4047, - "step": 3093 - }, - { - "epoch": 0.20220900594732372, - "grad_norm": 0.4499852955341339, - "learning_rate": 9.942428580322132e-06, - "loss": 0.3395, - "step": 3094 - }, - { - "epoch": 0.20227436115286582, - "grad_norm": 0.48252198100090027, - "learning_rate": 9.942375730860347e-06, - "loss": 0.4034, - "step": 3095 - }, - { - "epoch": 0.20233971635840795, - "grad_norm": 0.5231744050979614, - "learning_rate": 9.942322857292886e-06, - "loss": 0.4517, - "step": 3096 - }, - { - "epoch": 0.20240507156395007, - "grad_norm": 0.4870540201663971, - "learning_rate": 9.94226995962e-06, - "loss": 0.45, - "step": 3097 - }, - { - "epoch": 0.2024704267694922, - "grad_norm": 0.5164012908935547, - "learning_rate": 9.942217037841955e-06, - "loss": 0.4722, - "step": 3098 - }, - { - "epoch": 0.2025357819750343, - "grad_norm": 0.4839988350868225, - "learning_rate": 9.942164091959004e-06, - "loss": 0.4341, - "step": 3099 - }, - { - "epoch": 0.20260113718057643, - "grad_norm": 0.5070463418960571, - "learning_rate": 9.942111121971407e-06, - "loss": 0.4233, - "step": 3100 - }, - { - "epoch": 0.20266649238611856, - "grad_norm": 0.44807514548301697, - "learning_rate": 9.942058127879421e-06, - "loss": 0.3667, - "step": 3101 - }, - { - "epoch": 0.20273184759166069, - "grad_norm": 0.4492264688014984, - "learning_rate": 9.942005109683305e-06, - "loss": 0.3564, - "step": 3102 - }, - { - "epoch": 0.20279720279720279, - "grad_norm": 0.5319750308990479, - "learning_rate": 9.94195206738332e-06, - "loss": 0.4389, - "step": 3103 - }, - { - "epoch": 0.2028625580027449, - "grad_norm": 0.5499125719070435, - "learning_rate": 9.941899000979722e-06, - "loss": 0.4612, - "step": 3104 - }, - { - "epoch": 0.20292791320828704, - "grad_norm": 0.5554568767547607, - "learning_rate": 9.94184591047277e-06, - "loss": 0.5242, - "step": 3105 - }, - { - "epoch": 0.20299326841382917, - "grad_norm": 0.5147949457168579, - "learning_rate": 9.941792795862723e-06, - "loss": 0.4334, - "step": 3106 - }, - { - "epoch": 0.20305862361937127, - "grad_norm": 0.4976714551448822, - "learning_rate": 9.941739657149843e-06, - "loss": 0.4439, - "step": 3107 - }, - { - "epoch": 0.2031239788249134, - "grad_norm": 0.4922333359718323, - "learning_rate": 9.941686494334384e-06, - "loss": 0.4226, - "step": 3108 - }, - { - "epoch": 0.20318933403045553, - "grad_norm": 0.5006389021873474, - "learning_rate": 9.941633307416609e-06, - "loss": 0.4031, - "step": 3109 - }, - { - "epoch": 0.20325468923599765, - "grad_norm": 0.4451974630355835, - "learning_rate": 9.941580096396776e-06, - "loss": 0.3712, - "step": 3110 - }, - { - "epoch": 0.20332004444153978, - "grad_norm": 0.4745657444000244, - "learning_rate": 9.941526861275146e-06, - "loss": 0.4287, - "step": 3111 - }, - { - "epoch": 0.20338539964708188, - "grad_norm": 0.5245086550712585, - "learning_rate": 9.941473602051978e-06, - "loss": 0.4101, - "step": 3112 - }, - { - "epoch": 0.203450754852624, - "grad_norm": 0.44496220350265503, - "learning_rate": 9.94142031872753e-06, - "loss": 0.3706, - "step": 3113 - }, - { - "epoch": 0.20351611005816614, - "grad_norm": 0.505380392074585, - "learning_rate": 9.941367011302063e-06, - "loss": 0.3862, - "step": 3114 - }, - { - "epoch": 0.20358146526370827, - "grad_norm": 0.4921588599681854, - "learning_rate": 9.94131367977584e-06, - "loss": 0.4421, - "step": 3115 - }, - { - "epoch": 0.20364682046925037, - "grad_norm": 0.46233102679252625, - "learning_rate": 9.941260324149114e-06, - "loss": 0.3797, - "step": 3116 - }, - { - "epoch": 0.2037121756747925, - "grad_norm": 0.528011679649353, - "learning_rate": 9.941206944422153e-06, - "loss": 0.488, - "step": 3117 - }, - { - "epoch": 0.20377753088033462, - "grad_norm": 0.5147208571434021, - "learning_rate": 9.941153540595211e-06, - "loss": 0.4665, - "step": 3118 - }, - { - "epoch": 0.20384288608587675, - "grad_norm": 0.554547131061554, - "learning_rate": 9.941100112668554e-06, - "loss": 0.45, - "step": 3119 - }, - { - "epoch": 0.20390824129141885, - "grad_norm": 0.4568560719490051, - "learning_rate": 9.94104666064244e-06, - "loss": 0.3713, - "step": 3120 - }, - { - "epoch": 0.20397359649696098, - "grad_norm": 0.46647655963897705, - "learning_rate": 9.940993184517126e-06, - "loss": 0.4206, - "step": 3121 - }, - { - "epoch": 0.2040389517025031, - "grad_norm": 0.454797625541687, - "learning_rate": 9.940939684292881e-06, - "loss": 0.38, - "step": 3122 - }, - { - "epoch": 0.20410430690804524, - "grad_norm": 0.5423403978347778, - "learning_rate": 9.94088615996996e-06, - "loss": 0.4573, - "step": 3123 - }, - { - "epoch": 0.20416966211358734, - "grad_norm": 0.48902904987335205, - "learning_rate": 9.940832611548625e-06, - "loss": 0.4239, - "step": 3124 - }, - { - "epoch": 0.20423501731912946, - "grad_norm": 0.4641076326370239, - "learning_rate": 9.940779039029138e-06, - "loss": 0.3552, - "step": 3125 - }, - { - "epoch": 0.2043003725246716, - "grad_norm": 0.47573062777519226, - "learning_rate": 9.94072544241176e-06, - "loss": 0.4293, - "step": 3126 - }, - { - "epoch": 0.20436572773021372, - "grad_norm": 0.5493038892745972, - "learning_rate": 9.940671821696752e-06, - "loss": 0.4896, - "step": 3127 - }, - { - "epoch": 0.20443108293575582, - "grad_norm": 0.46159788966178894, - "learning_rate": 9.940618176884376e-06, - "loss": 0.3674, - "step": 3128 - }, - { - "epoch": 0.20449643814129795, - "grad_norm": 0.5037823915481567, - "learning_rate": 9.940564507974895e-06, - "loss": 0.4155, - "step": 3129 - }, - { - "epoch": 0.20456179334684008, - "grad_norm": 0.47430261969566345, - "learning_rate": 9.940510814968567e-06, - "loss": 0.4106, - "step": 3130 - }, - { - "epoch": 0.2046271485523822, - "grad_norm": 0.44317811727523804, - "learning_rate": 9.940457097865656e-06, - "loss": 0.3631, - "step": 3131 - }, - { - "epoch": 0.20469250375792433, - "grad_norm": 0.44111067056655884, - "learning_rate": 9.940403356666427e-06, - "loss": 0.3688, - "step": 3132 - }, - { - "epoch": 0.20475785896346643, - "grad_norm": 0.5026799440383911, - "learning_rate": 9.940349591371137e-06, - "loss": 0.5078, - "step": 3133 - }, - { - "epoch": 0.20482321416900856, - "grad_norm": 0.47906067967414856, - "learning_rate": 9.94029580198005e-06, - "loss": 0.4116, - "step": 3134 - }, - { - "epoch": 0.2048885693745507, - "grad_norm": 0.5190334916114807, - "learning_rate": 9.940241988493432e-06, - "loss": 0.4791, - "step": 3135 - }, - { - "epoch": 0.20495392458009282, - "grad_norm": 0.45592185854911804, - "learning_rate": 9.940188150911541e-06, - "loss": 0.3882, - "step": 3136 - }, - { - "epoch": 0.20501927978563492, - "grad_norm": 0.4982894957065582, - "learning_rate": 9.94013428923464e-06, - "loss": 0.4061, - "step": 3137 - }, - { - "epoch": 0.20508463499117704, - "grad_norm": 0.430267333984375, - "learning_rate": 9.940080403462993e-06, - "loss": 0.3177, - "step": 3138 - }, - { - "epoch": 0.20514999019671917, - "grad_norm": 0.48851391673088074, - "learning_rate": 9.940026493596863e-06, - "loss": 0.4162, - "step": 3139 - }, - { - "epoch": 0.2052153454022613, - "grad_norm": 0.4630681276321411, - "learning_rate": 9.939972559636511e-06, - "loss": 0.4115, - "step": 3140 - }, - { - "epoch": 0.2052807006078034, - "grad_norm": 0.44982555508613586, - "learning_rate": 9.939918601582203e-06, - "loss": 0.3826, - "step": 3141 - }, - { - "epoch": 0.20534605581334553, - "grad_norm": 0.4807620644569397, - "learning_rate": 9.939864619434201e-06, - "loss": 0.3713, - "step": 3142 - }, - { - "epoch": 0.20541141101888766, - "grad_norm": 0.4503217041492462, - "learning_rate": 9.939810613192766e-06, - "loss": 0.3403, - "step": 3143 - }, - { - "epoch": 0.20547676622442979, - "grad_norm": 0.5286787748336792, - "learning_rate": 9.939756582858164e-06, - "loss": 0.4696, - "step": 3144 - }, - { - "epoch": 0.20554212142997189, - "grad_norm": 0.5015026330947876, - "learning_rate": 9.939702528430658e-06, - "loss": 0.4455, - "step": 3145 - }, - { - "epoch": 0.205607476635514, - "grad_norm": 0.5203262567520142, - "learning_rate": 9.939648449910513e-06, - "loss": 0.4345, - "step": 3146 - }, - { - "epoch": 0.20567283184105614, - "grad_norm": 0.48993492126464844, - "learning_rate": 9.93959434729799e-06, - "loss": 0.3685, - "step": 3147 - }, - { - "epoch": 0.20573818704659827, - "grad_norm": 0.5506002902984619, - "learning_rate": 9.939540220593353e-06, - "loss": 0.4938, - "step": 3148 - }, - { - "epoch": 0.20580354225214037, - "grad_norm": 0.4672248959541321, - "learning_rate": 9.939486069796869e-06, - "loss": 0.3733, - "step": 3149 - }, - { - "epoch": 0.2058688974576825, - "grad_norm": 0.5117464661598206, - "learning_rate": 9.9394318949088e-06, - "loss": 0.4284, - "step": 3150 - }, - { - "epoch": 0.20593425266322463, - "grad_norm": 0.5215551853179932, - "learning_rate": 9.939377695929409e-06, - "loss": 0.471, - "step": 3151 - }, - { - "epoch": 0.20599960786876675, - "grad_norm": 0.5296312570571899, - "learning_rate": 9.939323472858963e-06, - "loss": 0.4314, - "step": 3152 - }, - { - "epoch": 0.20606496307430888, - "grad_norm": 0.5131133794784546, - "learning_rate": 9.939269225697726e-06, - "loss": 0.4579, - "step": 3153 - }, - { - "epoch": 0.20613031827985098, - "grad_norm": 0.5022668838500977, - "learning_rate": 9.939214954445959e-06, - "loss": 0.4262, - "step": 3154 - }, - { - "epoch": 0.2061956734853931, - "grad_norm": 0.49072396755218506, - "learning_rate": 9.939160659103933e-06, - "loss": 0.4047, - "step": 3155 - }, - { - "epoch": 0.20626102869093524, - "grad_norm": 0.5091725587844849, - "learning_rate": 9.939106339671907e-06, - "loss": 0.422, - "step": 3156 - }, - { - "epoch": 0.20632638389647737, - "grad_norm": 0.4800470173358917, - "learning_rate": 9.939051996150149e-06, - "loss": 0.4108, - "step": 3157 - }, - { - "epoch": 0.20639173910201947, - "grad_norm": 0.514090359210968, - "learning_rate": 9.938997628538924e-06, - "loss": 0.4693, - "step": 3158 - }, - { - "epoch": 0.2064570943075616, - "grad_norm": 0.48027849197387695, - "learning_rate": 9.938943236838496e-06, - "loss": 0.4376, - "step": 3159 - }, - { - "epoch": 0.20652244951310372, - "grad_norm": 0.4613378345966339, - "learning_rate": 9.93888882104913e-06, - "loss": 0.4015, - "step": 3160 - }, - { - "epoch": 0.20658780471864585, - "grad_norm": 0.44781532883644104, - "learning_rate": 9.938834381171093e-06, - "loss": 0.3736, - "step": 3161 - }, - { - "epoch": 0.20665315992418795, - "grad_norm": 0.4846932291984558, - "learning_rate": 9.93877991720465e-06, - "loss": 0.362, - "step": 3162 - }, - { - "epoch": 0.20671851512973008, - "grad_norm": 0.49717938899993896, - "learning_rate": 9.938725429150066e-06, - "loss": 0.443, - "step": 3163 - }, - { - "epoch": 0.2067838703352722, - "grad_norm": 0.5367235541343689, - "learning_rate": 9.938670917007606e-06, - "loss": 0.4483, - "step": 3164 - }, - { - "epoch": 0.20684922554081434, - "grad_norm": 0.4786202013492584, - "learning_rate": 9.93861638077754e-06, - "loss": 0.4262, - "step": 3165 - }, - { - "epoch": 0.20691458074635644, - "grad_norm": 0.5516684651374817, - "learning_rate": 9.938561820460128e-06, - "loss": 0.5142, - "step": 3166 - }, - { - "epoch": 0.20697993595189856, - "grad_norm": 0.4452001452445984, - "learning_rate": 9.938507236055642e-06, - "loss": 0.3254, - "step": 3167 - }, - { - "epoch": 0.2070452911574407, - "grad_norm": 0.5052228569984436, - "learning_rate": 9.938452627564344e-06, - "loss": 0.4298, - "step": 3168 - }, - { - "epoch": 0.20711064636298282, - "grad_norm": 0.5003474354743958, - "learning_rate": 9.938397994986501e-06, - "loss": 0.4518, - "step": 3169 - }, - { - "epoch": 0.20717600156852492, - "grad_norm": 0.48262444138526917, - "learning_rate": 9.938343338322381e-06, - "loss": 0.4085, - "step": 3170 - }, - { - "epoch": 0.20724135677406705, - "grad_norm": 0.5018842816352844, - "learning_rate": 9.938288657572248e-06, - "loss": 0.4547, - "step": 3171 - }, - { - "epoch": 0.20730671197960918, - "grad_norm": 0.47284314036369324, - "learning_rate": 9.938233952736372e-06, - "loss": 0.3755, - "step": 3172 - }, - { - "epoch": 0.2073720671851513, - "grad_norm": 0.49318069219589233, - "learning_rate": 9.938179223815019e-06, - "loss": 0.4385, - "step": 3173 - }, - { - "epoch": 0.20743742239069343, - "grad_norm": 0.4700956642627716, - "learning_rate": 9.938124470808454e-06, - "loss": 0.3693, - "step": 3174 - }, - { - "epoch": 0.20750277759623553, - "grad_norm": 0.5164834260940552, - "learning_rate": 9.938069693716945e-06, - "loss": 0.4665, - "step": 3175 - }, - { - "epoch": 0.20756813280177766, - "grad_norm": 0.4650100767612457, - "learning_rate": 9.93801489254076e-06, - "loss": 0.4092, - "step": 3176 - }, - { - "epoch": 0.2076334880073198, - "grad_norm": 0.43761709332466125, - "learning_rate": 9.937960067280165e-06, - "loss": 0.3421, - "step": 3177 - }, - { - "epoch": 0.20769884321286192, - "grad_norm": 0.48198753595352173, - "learning_rate": 9.937905217935428e-06, - "loss": 0.4009, - "step": 3178 - }, - { - "epoch": 0.20776419841840402, - "grad_norm": 0.45960405468940735, - "learning_rate": 9.93785034450682e-06, - "loss": 0.3799, - "step": 3179 - }, - { - "epoch": 0.20782955362394614, - "grad_norm": 0.49930331110954285, - "learning_rate": 9.9377954469946e-06, - "loss": 0.3879, - "step": 3180 - }, - { - "epoch": 0.20789490882948827, - "grad_norm": 0.4426723122596741, - "learning_rate": 9.937740525399044e-06, - "loss": 0.3593, - "step": 3181 - }, - { - "epoch": 0.2079602640350304, - "grad_norm": 0.4870409667491913, - "learning_rate": 9.937685579720415e-06, - "loss": 0.4495, - "step": 3182 - }, - { - "epoch": 0.2080256192405725, - "grad_norm": 0.4347936809062958, - "learning_rate": 9.937630609958986e-06, - "loss": 0.3739, - "step": 3183 - }, - { - "epoch": 0.20809097444611463, - "grad_norm": 0.5160869359970093, - "learning_rate": 9.93757561611502e-06, - "loss": 0.4429, - "step": 3184 - }, - { - "epoch": 0.20815632965165676, - "grad_norm": 0.4967484772205353, - "learning_rate": 9.937520598188786e-06, - "loss": 0.4306, - "step": 3185 - }, - { - "epoch": 0.20822168485719889, - "grad_norm": 0.4874371290206909, - "learning_rate": 9.937465556180555e-06, - "loss": 0.4307, - "step": 3186 - }, - { - "epoch": 0.20828704006274099, - "grad_norm": 0.49783626198768616, - "learning_rate": 9.937410490090593e-06, - "loss": 0.4579, - "step": 3187 - }, - { - "epoch": 0.2083523952682831, - "grad_norm": 0.5056105852127075, - "learning_rate": 9.93735539991917e-06, - "loss": 0.4494, - "step": 3188 - }, - { - "epoch": 0.20841775047382524, - "grad_norm": 0.49673497676849365, - "learning_rate": 9.937300285666556e-06, - "loss": 0.4389, - "step": 3189 - }, - { - "epoch": 0.20848310567936737, - "grad_norm": 0.4490938186645508, - "learning_rate": 9.937245147333016e-06, - "loss": 0.3822, - "step": 3190 - }, - { - "epoch": 0.20854846088490947, - "grad_norm": 0.5379023551940918, - "learning_rate": 9.937189984918822e-06, - "loss": 0.3911, - "step": 3191 - }, - { - "epoch": 0.2086138160904516, - "grad_norm": 0.5370519161224365, - "learning_rate": 9.937134798424242e-06, - "loss": 0.454, - "step": 3192 - }, - { - "epoch": 0.20867917129599373, - "grad_norm": 0.4845854938030243, - "learning_rate": 9.937079587849545e-06, - "loss": 0.3959, - "step": 3193 - }, - { - "epoch": 0.20874452650153585, - "grad_norm": 0.4893931448459625, - "learning_rate": 9.937024353195e-06, - "loss": 0.4306, - "step": 3194 - }, - { - "epoch": 0.20880988170707798, - "grad_norm": 0.5111810564994812, - "learning_rate": 9.936969094460877e-06, - "loss": 0.4809, - "step": 3195 - }, - { - "epoch": 0.20887523691262008, - "grad_norm": 0.4792070686817169, - "learning_rate": 9.936913811647446e-06, - "loss": 0.4234, - "step": 3196 - }, - { - "epoch": 0.2089405921181622, - "grad_norm": 0.4581071436405182, - "learning_rate": 9.936858504754976e-06, - "loss": 0.3631, - "step": 3197 - }, - { - "epoch": 0.20900594732370434, - "grad_norm": 0.508477509021759, - "learning_rate": 9.936803173783735e-06, - "loss": 0.445, - "step": 3198 - }, - { - "epoch": 0.20907130252924647, - "grad_norm": 0.48508119583129883, - "learning_rate": 9.936747818733996e-06, - "loss": 0.4139, - "step": 3199 - }, - { - "epoch": 0.20913665773478857, - "grad_norm": 0.5168250203132629, - "learning_rate": 9.936692439606028e-06, - "loss": 0.427, - "step": 3200 - }, - { - "epoch": 0.2092020129403307, - "grad_norm": 0.4804687201976776, - "learning_rate": 9.936637036400101e-06, - "loss": 0.3888, - "step": 3201 - }, - { - "epoch": 0.20926736814587282, - "grad_norm": 0.48574692010879517, - "learning_rate": 9.936581609116485e-06, - "loss": 0.4211, - "step": 3202 - }, - { - "epoch": 0.20933272335141495, - "grad_norm": 0.5198602080345154, - "learning_rate": 9.936526157755448e-06, - "loss": 0.4395, - "step": 3203 - }, - { - "epoch": 0.20939807855695705, - "grad_norm": 0.5564453601837158, - "learning_rate": 9.936470682317265e-06, - "loss": 0.5146, - "step": 3204 - }, - { - "epoch": 0.20946343376249918, - "grad_norm": 0.4566188454627991, - "learning_rate": 9.936415182802203e-06, - "loss": 0.3731, - "step": 3205 - }, - { - "epoch": 0.2095287889680413, - "grad_norm": 0.5720825791358948, - "learning_rate": 9.936359659210537e-06, - "loss": 0.4609, - "step": 3206 - }, - { - "epoch": 0.20959414417358344, - "grad_norm": 0.4590819776058197, - "learning_rate": 9.936304111542532e-06, - "loss": 0.4009, - "step": 3207 - }, - { - "epoch": 0.20965949937912554, - "grad_norm": 0.5057934522628784, - "learning_rate": 9.936248539798462e-06, - "loss": 0.4047, - "step": 3208 - }, - { - "epoch": 0.20972485458466766, - "grad_norm": 0.49229344725608826, - "learning_rate": 9.936192943978597e-06, - "loss": 0.394, - "step": 3209 - }, - { - "epoch": 0.2097902097902098, - "grad_norm": 0.50483638048172, - "learning_rate": 9.93613732408321e-06, - "loss": 0.3974, - "step": 3210 - }, - { - "epoch": 0.20985556499575192, - "grad_norm": 0.492837131023407, - "learning_rate": 9.936081680112573e-06, - "loss": 0.3986, - "step": 3211 - }, - { - "epoch": 0.20992092020129402, - "grad_norm": 0.457077294588089, - "learning_rate": 9.936026012066952e-06, - "loss": 0.3967, - "step": 3212 - }, - { - "epoch": 0.20998627540683615, - "grad_norm": 0.4983299970626831, - "learning_rate": 9.935970319946627e-06, - "loss": 0.4083, - "step": 3213 - }, - { - "epoch": 0.21005163061237828, - "grad_norm": 0.49250268936157227, - "learning_rate": 9.93591460375186e-06, - "loss": 0.3713, - "step": 3214 - }, - { - "epoch": 0.2101169858179204, - "grad_norm": 0.5435276031494141, - "learning_rate": 9.93585886348293e-06, - "loss": 0.4754, - "step": 3215 - }, - { - "epoch": 0.21018234102346253, - "grad_norm": 0.49676281213760376, - "learning_rate": 9.935803099140106e-06, - "loss": 0.4596, - "step": 3216 - }, - { - "epoch": 0.21024769622900463, - "grad_norm": 0.4617408812046051, - "learning_rate": 9.93574731072366e-06, - "loss": 0.3884, - "step": 3217 - }, - { - "epoch": 0.21031305143454676, - "grad_norm": 0.5948168039321899, - "learning_rate": 9.935691498233864e-06, - "loss": 0.508, - "step": 3218 - }, - { - "epoch": 0.2103784066400889, - "grad_norm": 0.49031031131744385, - "learning_rate": 9.935635661670992e-06, - "loss": 0.4222, - "step": 3219 - }, - { - "epoch": 0.21044376184563102, - "grad_norm": 0.482632040977478, - "learning_rate": 9.935579801035314e-06, - "loss": 0.413, - "step": 3220 - }, - { - "epoch": 0.21050911705117312, - "grad_norm": 0.5207369327545166, - "learning_rate": 9.935523916327103e-06, - "loss": 0.4704, - "step": 3221 - }, - { - "epoch": 0.21057447225671524, - "grad_norm": 0.5311384797096252, - "learning_rate": 9.935468007546634e-06, - "loss": 0.5367, - "step": 3222 - }, - { - "epoch": 0.21063982746225737, - "grad_norm": 0.5066277980804443, - "learning_rate": 9.935412074694175e-06, - "loss": 0.4196, - "step": 3223 - }, - { - "epoch": 0.2107051826677995, - "grad_norm": 0.4708802402019501, - "learning_rate": 9.935356117770003e-06, - "loss": 0.3943, - "step": 3224 - }, - { - "epoch": 0.2107705378733416, - "grad_norm": 0.48777833580970764, - "learning_rate": 9.935300136774389e-06, - "loss": 0.4278, - "step": 3225 - }, - { - "epoch": 0.21083589307888373, - "grad_norm": 0.4551773965358734, - "learning_rate": 9.935244131707607e-06, - "loss": 0.379, - "step": 3226 - }, - { - "epoch": 0.21090124828442586, - "grad_norm": 0.4466029405593872, - "learning_rate": 9.93518810256993e-06, - "loss": 0.3787, - "step": 3227 - }, - { - "epoch": 0.21096660348996799, - "grad_norm": 0.46648839116096497, - "learning_rate": 9.935132049361631e-06, - "loss": 0.402, - "step": 3228 - }, - { - "epoch": 0.21103195869551009, - "grad_norm": 0.5277845859527588, - "learning_rate": 9.935075972082982e-06, - "loss": 0.4619, - "step": 3229 - }, - { - "epoch": 0.2110973139010522, - "grad_norm": 0.476776659488678, - "learning_rate": 9.93501987073426e-06, - "loss": 0.3646, - "step": 3230 - }, - { - "epoch": 0.21116266910659434, - "grad_norm": 0.4704614579677582, - "learning_rate": 9.934963745315733e-06, - "loss": 0.391, - "step": 3231 - }, - { - "epoch": 0.21122802431213647, - "grad_norm": 0.47129425406455994, - "learning_rate": 9.934907595827681e-06, - "loss": 0.3842, - "step": 3232 - }, - { - "epoch": 0.21129337951767857, - "grad_norm": 0.4966982305049896, - "learning_rate": 9.934851422270374e-06, - "loss": 0.4296, - "step": 3233 - }, - { - "epoch": 0.2113587347232207, - "grad_norm": 0.464455246925354, - "learning_rate": 9.934795224644088e-06, - "loss": 0.3967, - "step": 3234 - }, - { - "epoch": 0.21142408992876283, - "grad_norm": 0.5066690444946289, - "learning_rate": 9.934739002949094e-06, - "loss": 0.4719, - "step": 3235 - }, - { - "epoch": 0.21148944513430495, - "grad_norm": 0.4529123604297638, - "learning_rate": 9.93468275718567e-06, - "loss": 0.3636, - "step": 3236 - }, - { - "epoch": 0.21155480033984708, - "grad_norm": 0.49601155519485474, - "learning_rate": 9.934626487354088e-06, - "loss": 0.3912, - "step": 3237 - }, - { - "epoch": 0.21162015554538918, - "grad_norm": 0.4670489728450775, - "learning_rate": 9.934570193454625e-06, - "loss": 0.3863, - "step": 3238 - }, - { - "epoch": 0.2116855107509313, - "grad_norm": 0.5014293193817139, - "learning_rate": 9.934513875487552e-06, - "loss": 0.4399, - "step": 3239 - }, - { - "epoch": 0.21175086595647344, - "grad_norm": 0.49930065870285034, - "learning_rate": 9.934457533453144e-06, - "loss": 0.4593, - "step": 3240 - }, - { - "epoch": 0.21181622116201557, - "grad_norm": 0.48156407475471497, - "learning_rate": 9.93440116735168e-06, - "loss": 0.3905, - "step": 3241 - }, - { - "epoch": 0.21188157636755767, - "grad_norm": 0.5224270820617676, - "learning_rate": 9.93434477718343e-06, - "loss": 0.4378, - "step": 3242 - }, - { - "epoch": 0.2119469315730998, - "grad_norm": 0.4587031900882721, - "learning_rate": 9.934288362948672e-06, - "loss": 0.3758, - "step": 3243 - }, - { - "epoch": 0.21201228677864192, - "grad_norm": 0.46535447239875793, - "learning_rate": 9.93423192464768e-06, - "loss": 0.3866, - "step": 3244 - }, - { - "epoch": 0.21207764198418405, - "grad_norm": 0.4809655547142029, - "learning_rate": 9.934175462280729e-06, - "loss": 0.4016, - "step": 3245 - }, - { - "epoch": 0.21214299718972615, - "grad_norm": 0.4861283302307129, - "learning_rate": 9.934118975848094e-06, - "loss": 0.3678, - "step": 3246 - }, - { - "epoch": 0.21220835239526828, - "grad_norm": 0.47646233439445496, - "learning_rate": 9.934062465350053e-06, - "loss": 0.4075, - "step": 3247 - }, - { - "epoch": 0.2122737076008104, - "grad_norm": 0.5074921250343323, - "learning_rate": 9.934005930786881e-06, - "loss": 0.441, - "step": 3248 - }, - { - "epoch": 0.21233906280635254, - "grad_norm": 0.4906167984008789, - "learning_rate": 9.933949372158852e-06, - "loss": 0.411, - "step": 3249 - }, - { - "epoch": 0.21240441801189464, - "grad_norm": 0.48985132575035095, - "learning_rate": 9.933892789466242e-06, - "loss": 0.4294, - "step": 3250 - }, - { - "epoch": 0.21246977321743676, - "grad_norm": 0.509037435054779, - "learning_rate": 9.933836182709328e-06, - "loss": 0.4287, - "step": 3251 - }, - { - "epoch": 0.2125351284229789, - "grad_norm": 0.48478612303733826, - "learning_rate": 9.933779551888385e-06, - "loss": 0.426, - "step": 3252 - }, - { - "epoch": 0.21260048362852102, - "grad_norm": 0.46264371275901794, - "learning_rate": 9.93372289700369e-06, - "loss": 0.3676, - "step": 3253 - }, - { - "epoch": 0.21266583883406312, - "grad_norm": 0.45310333371162415, - "learning_rate": 9.933666218055522e-06, - "loss": 0.3736, - "step": 3254 - }, - { - "epoch": 0.21273119403960525, - "grad_norm": 0.48330405354499817, - "learning_rate": 9.93360951504415e-06, - "loss": 0.4298, - "step": 3255 - }, - { - "epoch": 0.21279654924514738, - "grad_norm": 0.49773460626602173, - "learning_rate": 9.93355278796986e-06, - "loss": 0.4021, - "step": 3256 - }, - { - "epoch": 0.2128619044506895, - "grad_norm": 0.4893500506877899, - "learning_rate": 9.93349603683292e-06, - "loss": 0.4424, - "step": 3257 - }, - { - "epoch": 0.21292725965623163, - "grad_norm": 0.49103978276252747, - "learning_rate": 9.933439261633612e-06, - "loss": 0.4454, - "step": 3258 - }, - { - "epoch": 0.21299261486177373, - "grad_norm": 0.5382106900215149, - "learning_rate": 9.933382462372212e-06, - "loss": 0.4142, - "step": 3259 - }, - { - "epoch": 0.21305797006731586, - "grad_norm": 0.5229787826538086, - "learning_rate": 9.933325639048996e-06, - "loss": 0.4539, - "step": 3260 - }, - { - "epoch": 0.213123325272858, - "grad_norm": 0.4758698344230652, - "learning_rate": 9.933268791664243e-06, - "loss": 0.4277, - "step": 3261 - }, - { - "epoch": 0.21318868047840012, - "grad_norm": 0.5039137601852417, - "learning_rate": 9.933211920218229e-06, - "loss": 0.4301, - "step": 3262 - }, - { - "epoch": 0.21325403568394222, - "grad_norm": 0.49445581436157227, - "learning_rate": 9.93315502471123e-06, - "loss": 0.4329, - "step": 3263 - }, - { - "epoch": 0.21331939088948434, - "grad_norm": 0.4823456108570099, - "learning_rate": 9.933098105143526e-06, - "loss": 0.3474, - "step": 3264 - }, - { - "epoch": 0.21338474609502647, - "grad_norm": 0.47261980175971985, - "learning_rate": 9.933041161515393e-06, - "loss": 0.41, - "step": 3265 - }, - { - "epoch": 0.2134501013005686, - "grad_norm": 0.48997393250465393, - "learning_rate": 9.93298419382711e-06, - "loss": 0.4375, - "step": 3266 - }, - { - "epoch": 0.2135154565061107, - "grad_norm": 0.47008273005485535, - "learning_rate": 9.932927202078952e-06, - "loss": 0.3836, - "step": 3267 - }, - { - "epoch": 0.21358081171165283, - "grad_norm": 0.5084006786346436, - "learning_rate": 9.932870186271202e-06, - "loss": 0.3936, - "step": 3268 - }, - { - "epoch": 0.21364616691719496, - "grad_norm": 0.47581514716148376, - "learning_rate": 9.932813146404134e-06, - "loss": 0.3835, - "step": 3269 - }, - { - "epoch": 0.21371152212273709, - "grad_norm": 0.4586402177810669, - "learning_rate": 9.932756082478028e-06, - "loss": 0.3853, - "step": 3270 - }, - { - "epoch": 0.21377687732827919, - "grad_norm": 0.45872631669044495, - "learning_rate": 9.932698994493159e-06, - "loss": 0.3721, - "step": 3271 - }, - { - "epoch": 0.2138422325338213, - "grad_norm": 0.5588338375091553, - "learning_rate": 9.932641882449811e-06, - "loss": 0.4109, - "step": 3272 - }, - { - "epoch": 0.21390758773936344, - "grad_norm": 0.5490834712982178, - "learning_rate": 9.93258474634826e-06, - "loss": 0.3999, - "step": 3273 - }, - { - "epoch": 0.21397294294490557, - "grad_norm": 0.4640160799026489, - "learning_rate": 9.932527586188783e-06, - "loss": 0.403, - "step": 3274 - }, - { - "epoch": 0.21403829815044767, - "grad_norm": 0.458261102437973, - "learning_rate": 9.93247040197166e-06, - "loss": 0.3818, - "step": 3275 - }, - { - "epoch": 0.2141036533559898, - "grad_norm": 0.4928989112377167, - "learning_rate": 9.93241319369717e-06, - "loss": 0.4057, - "step": 3276 - }, - { - "epoch": 0.21416900856153193, - "grad_norm": 0.49751630425453186, - "learning_rate": 9.932355961365594e-06, - "loss": 0.4303, - "step": 3277 - }, - { - "epoch": 0.21423436376707405, - "grad_norm": 0.47694024443626404, - "learning_rate": 9.932298704977207e-06, - "loss": 0.3878, - "step": 3278 - }, - { - "epoch": 0.21429971897261618, - "grad_norm": 0.5090917348861694, - "learning_rate": 9.932241424532291e-06, - "loss": 0.4125, - "step": 3279 - }, - { - "epoch": 0.21436507417815828, - "grad_norm": 0.4575689435005188, - "learning_rate": 9.932184120031125e-06, - "loss": 0.4127, - "step": 3280 - }, - { - "epoch": 0.2144304293837004, - "grad_norm": 0.5061025619506836, - "learning_rate": 9.93212679147399e-06, - "loss": 0.4034, - "step": 3281 - }, - { - "epoch": 0.21449578458924254, - "grad_norm": 0.4857165813446045, - "learning_rate": 9.932069438861163e-06, - "loss": 0.4119, - "step": 3282 - }, - { - "epoch": 0.21456113979478467, - "grad_norm": 0.44739657640457153, - "learning_rate": 9.932012062192923e-06, - "loss": 0.3425, - "step": 3283 - }, - { - "epoch": 0.21462649500032677, - "grad_norm": 0.4993947148323059, - "learning_rate": 9.931954661469553e-06, - "loss": 0.4619, - "step": 3284 - }, - { - "epoch": 0.2146918502058689, - "grad_norm": 0.473056823015213, - "learning_rate": 9.931897236691332e-06, - "loss": 0.4243, - "step": 3285 - }, - { - "epoch": 0.21475720541141102, - "grad_norm": 0.5073739290237427, - "learning_rate": 9.93183978785854e-06, - "loss": 0.456, - "step": 3286 - }, - { - "epoch": 0.21482256061695315, - "grad_norm": 0.486345112323761, - "learning_rate": 9.931782314971454e-06, - "loss": 0.394, - "step": 3287 - }, - { - "epoch": 0.21488791582249525, - "grad_norm": 0.4326786696910858, - "learning_rate": 9.931724818030359e-06, - "loss": 0.3208, - "step": 3288 - }, - { - "epoch": 0.21495327102803738, - "grad_norm": 0.501753032207489, - "learning_rate": 9.931667297035535e-06, - "loss": 0.4218, - "step": 3289 - }, - { - "epoch": 0.2150186262335795, - "grad_norm": 0.5065110921859741, - "learning_rate": 9.93160975198726e-06, - "loss": 0.4332, - "step": 3290 - }, - { - "epoch": 0.21508398143912164, - "grad_norm": 0.4691832661628723, - "learning_rate": 9.931552182885815e-06, - "loss": 0.3712, - "step": 3291 - }, - { - "epoch": 0.21514933664466374, - "grad_norm": 0.47420334815979004, - "learning_rate": 9.93149458973148e-06, - "loss": 0.384, - "step": 3292 - }, - { - "epoch": 0.21521469185020586, - "grad_norm": 0.538435161113739, - "learning_rate": 9.931436972524539e-06, - "loss": 0.4659, - "step": 3293 - }, - { - "epoch": 0.215280047055748, - "grad_norm": 0.48547956347465515, - "learning_rate": 9.931379331265272e-06, - "loss": 0.4189, - "step": 3294 - }, - { - "epoch": 0.21534540226129012, - "grad_norm": 0.5160083174705505, - "learning_rate": 9.931321665953961e-06, - "loss": 0.4416, - "step": 3295 - }, - { - "epoch": 0.21541075746683222, - "grad_norm": 0.5340427160263062, - "learning_rate": 9.931263976590883e-06, - "loss": 0.4075, - "step": 3296 - }, - { - "epoch": 0.21547611267237435, - "grad_norm": 0.4734879434108734, - "learning_rate": 9.931206263176325e-06, - "loss": 0.4118, - "step": 3297 - }, - { - "epoch": 0.21554146787791648, - "grad_norm": 0.46744245290756226, - "learning_rate": 9.931148525710563e-06, - "loss": 0.405, - "step": 3298 - }, - { - "epoch": 0.2156068230834586, - "grad_norm": 0.46901634335517883, - "learning_rate": 9.931090764193882e-06, - "loss": 0.3863, - "step": 3299 - }, - { - "epoch": 0.21567217828900073, - "grad_norm": 0.4528055787086487, - "learning_rate": 9.931032978626564e-06, - "loss": 0.3505, - "step": 3300 - }, - { - "epoch": 0.21573753349454283, - "grad_norm": 0.46030697226524353, - "learning_rate": 9.930975169008889e-06, - "loss": 0.3723, - "step": 3301 - }, - { - "epoch": 0.21580288870008496, - "grad_norm": 0.5202280879020691, - "learning_rate": 9.930917335341142e-06, - "loss": 0.4326, - "step": 3302 - }, - { - "epoch": 0.2158682439056271, - "grad_norm": 0.49749305844306946, - "learning_rate": 9.930859477623601e-06, - "loss": 0.4446, - "step": 3303 - }, - { - "epoch": 0.21593359911116922, - "grad_norm": 0.5041854977607727, - "learning_rate": 9.93080159585655e-06, - "loss": 0.4553, - "step": 3304 - }, - { - "epoch": 0.21599895431671132, - "grad_norm": 0.45173487067222595, - "learning_rate": 9.930743690040272e-06, - "loss": 0.3483, - "step": 3305 - }, - { - "epoch": 0.21606430952225344, - "grad_norm": 0.4890424311161041, - "learning_rate": 9.93068576017505e-06, - "loss": 0.3785, - "step": 3306 - }, - { - "epoch": 0.21612966472779557, - "grad_norm": 0.5173264741897583, - "learning_rate": 9.930627806261162e-06, - "loss": 0.4447, - "step": 3307 - }, - { - "epoch": 0.2161950199333377, - "grad_norm": 0.46183329820632935, - "learning_rate": 9.930569828298897e-06, - "loss": 0.4042, - "step": 3308 - }, - { - "epoch": 0.2162603751388798, - "grad_norm": 0.47356879711151123, - "learning_rate": 9.930511826288534e-06, - "loss": 0.417, - "step": 3309 - }, - { - "epoch": 0.21632573034442193, - "grad_norm": 0.4511333405971527, - "learning_rate": 9.930453800230358e-06, - "loss": 0.4107, - "step": 3310 - }, - { - "epoch": 0.21639108554996406, - "grad_norm": 0.4873167872428894, - "learning_rate": 9.930395750124648e-06, - "loss": 0.4136, - "step": 3311 - }, - { - "epoch": 0.21645644075550619, - "grad_norm": 0.4364887773990631, - "learning_rate": 9.93033767597169e-06, - "loss": 0.3903, - "step": 3312 - }, - { - "epoch": 0.21652179596104829, - "grad_norm": 0.49707648158073425, - "learning_rate": 9.93027957777177e-06, - "loss": 0.4272, - "step": 3313 - }, - { - "epoch": 0.2165871511665904, - "grad_norm": 0.5045496821403503, - "learning_rate": 9.930221455525167e-06, - "loss": 0.4627, - "step": 3314 - }, - { - "epoch": 0.21665250637213254, - "grad_norm": 0.4444250762462616, - "learning_rate": 9.930163309232167e-06, - "loss": 0.399, - "step": 3315 - }, - { - "epoch": 0.21671786157767467, - "grad_norm": 0.497793585062027, - "learning_rate": 9.93010513889305e-06, - "loss": 0.4363, - "step": 3316 - }, - { - "epoch": 0.21678321678321677, - "grad_norm": 0.4658908247947693, - "learning_rate": 9.930046944508104e-06, - "loss": 0.4037, - "step": 3317 - }, - { - "epoch": 0.2168485719887589, - "grad_norm": 0.4523892402648926, - "learning_rate": 9.92998872607761e-06, - "loss": 0.3937, - "step": 3318 - }, - { - "epoch": 0.21691392719430103, - "grad_norm": 0.47562769055366516, - "learning_rate": 9.929930483601855e-06, - "loss": 0.4209, - "step": 3319 - }, - { - "epoch": 0.21697928239984315, - "grad_norm": 0.48166099190711975, - "learning_rate": 9.92987221708112e-06, - "loss": 0.4187, - "step": 3320 - }, - { - "epoch": 0.21704463760538528, - "grad_norm": 0.4934748709201813, - "learning_rate": 9.92981392651569e-06, - "loss": 0.4222, - "step": 3321 - }, - { - "epoch": 0.21710999281092738, - "grad_norm": 0.5159883499145508, - "learning_rate": 9.92975561190585e-06, - "loss": 0.4605, - "step": 3322 - }, - { - "epoch": 0.2171753480164695, - "grad_norm": 0.5119197964668274, - "learning_rate": 9.929697273251884e-06, - "loss": 0.4384, - "step": 3323 - }, - { - "epoch": 0.21724070322201164, - "grad_norm": 0.501603364944458, - "learning_rate": 9.929638910554077e-06, - "loss": 0.4107, - "step": 3324 - }, - { - "epoch": 0.21730605842755377, - "grad_norm": 0.48651987314224243, - "learning_rate": 9.929580523812712e-06, - "loss": 0.4142, - "step": 3325 - }, - { - "epoch": 0.21737141363309587, - "grad_norm": 0.4588828980922699, - "learning_rate": 9.929522113028075e-06, - "loss": 0.3878, - "step": 3326 - }, - { - "epoch": 0.217436768838638, - "grad_norm": 0.4741341173648834, - "learning_rate": 9.929463678200452e-06, - "loss": 0.3995, - "step": 3327 - }, - { - "epoch": 0.21750212404418012, - "grad_norm": 0.49042367935180664, - "learning_rate": 9.929405219330127e-06, - "loss": 0.4059, - "step": 3328 - }, - { - "epoch": 0.21756747924972225, - "grad_norm": 0.48561954498291016, - "learning_rate": 9.929346736417387e-06, - "loss": 0.4061, - "step": 3329 - }, - { - "epoch": 0.21763283445526435, - "grad_norm": 0.46217429637908936, - "learning_rate": 9.929288229462513e-06, - "loss": 0.368, - "step": 3330 - }, - { - "epoch": 0.21769818966080648, - "grad_norm": 0.5045156478881836, - "learning_rate": 9.929229698465792e-06, - "loss": 0.4699, - "step": 3331 - }, - { - "epoch": 0.2177635448663486, - "grad_norm": 0.4598250985145569, - "learning_rate": 9.92917114342751e-06, - "loss": 0.4179, - "step": 3332 - }, - { - "epoch": 0.21782890007189074, - "grad_norm": 0.4896177053451538, - "learning_rate": 9.929112564347953e-06, - "loss": 0.4419, - "step": 3333 - }, - { - "epoch": 0.21789425527743284, - "grad_norm": 0.4439111649990082, - "learning_rate": 9.929053961227408e-06, - "loss": 0.3774, - "step": 3334 - }, - { - "epoch": 0.21795961048297496, - "grad_norm": 0.482452392578125, - "learning_rate": 9.928995334066158e-06, - "loss": 0.4369, - "step": 3335 - }, - { - "epoch": 0.2180249656885171, - "grad_norm": 0.43713170289993286, - "learning_rate": 9.92893668286449e-06, - "loss": 0.3457, - "step": 3336 - }, - { - "epoch": 0.21809032089405922, - "grad_norm": 0.4810910224914551, - "learning_rate": 9.92887800762269e-06, - "loss": 0.4441, - "step": 3337 - }, - { - "epoch": 0.21815567609960132, - "grad_norm": 0.44690775871276855, - "learning_rate": 9.928819308341048e-06, - "loss": 0.3682, - "step": 3338 - }, - { - "epoch": 0.21822103130514345, - "grad_norm": 0.4413868486881256, - "learning_rate": 9.92876058501984e-06, - "loss": 0.3701, - "step": 3339 - }, - { - "epoch": 0.21828638651068558, - "grad_norm": 0.5194119811058044, - "learning_rate": 9.928701837659365e-06, - "loss": 0.4488, - "step": 3340 - }, - { - "epoch": 0.2183517417162277, - "grad_norm": 0.4844612777233124, - "learning_rate": 9.9286430662599e-06, - "loss": 0.4242, - "step": 3341 - }, - { - "epoch": 0.21841709692176983, - "grad_norm": 0.5143688321113586, - "learning_rate": 9.928584270821737e-06, - "loss": 0.4585, - "step": 3342 - }, - { - "epoch": 0.21848245212731193, - "grad_norm": 0.4815845787525177, - "learning_rate": 9.928525451345162e-06, - "loss": 0.4028, - "step": 3343 - }, - { - "epoch": 0.21854780733285406, - "grad_norm": 0.5016213059425354, - "learning_rate": 9.92846660783046e-06, - "loss": 0.4078, - "step": 3344 - }, - { - "epoch": 0.2186131625383962, - "grad_norm": 0.45264023542404175, - "learning_rate": 9.92840774027792e-06, - "loss": 0.3467, - "step": 3345 - }, - { - "epoch": 0.21867851774393832, - "grad_norm": 0.5195870995521545, - "learning_rate": 9.928348848687825e-06, - "loss": 0.4393, - "step": 3346 - }, - { - "epoch": 0.21874387294948042, - "grad_norm": 0.5074870586395264, - "learning_rate": 9.928289933060469e-06, - "loss": 0.4728, - "step": 3347 - }, - { - "epoch": 0.21880922815502254, - "grad_norm": 0.48438355326652527, - "learning_rate": 9.928230993396134e-06, - "loss": 0.3779, - "step": 3348 - }, - { - "epoch": 0.21887458336056467, - "grad_norm": 0.4741329252719879, - "learning_rate": 9.92817202969511e-06, - "loss": 0.3937, - "step": 3349 - }, - { - "epoch": 0.2189399385661068, - "grad_norm": 0.5010062456130981, - "learning_rate": 9.928113041957682e-06, - "loss": 0.4131, - "step": 3350 - }, - { - "epoch": 0.2190052937716489, - "grad_norm": 0.53528892993927, - "learning_rate": 9.92805403018414e-06, - "loss": 0.4795, - "step": 3351 - }, - { - "epoch": 0.21907064897719103, - "grad_norm": 0.46200308203697205, - "learning_rate": 9.927994994374771e-06, - "loss": 0.4124, - "step": 3352 - }, - { - "epoch": 0.21913600418273316, - "grad_norm": 0.4657052457332611, - "learning_rate": 9.927935934529864e-06, - "loss": 0.382, - "step": 3353 - }, - { - "epoch": 0.21920135938827529, - "grad_norm": 0.5091944336891174, - "learning_rate": 9.927876850649706e-06, - "loss": 0.4547, - "step": 3354 - }, - { - "epoch": 0.21926671459381739, - "grad_norm": 0.4988209307193756, - "learning_rate": 9.927817742734585e-06, - "loss": 0.4231, - "step": 3355 - }, - { - "epoch": 0.2193320697993595, - "grad_norm": 0.4642221927642822, - "learning_rate": 9.927758610784791e-06, - "loss": 0.3752, - "step": 3356 - }, - { - "epoch": 0.21939742500490164, - "grad_norm": 0.47392427921295166, - "learning_rate": 9.92769945480061e-06, - "loss": 0.3678, - "step": 3357 - }, - { - "epoch": 0.21946278021044377, - "grad_norm": 0.4886663854122162, - "learning_rate": 9.92764027478233e-06, - "loss": 0.4318, - "step": 3358 - }, - { - "epoch": 0.21952813541598587, - "grad_norm": 0.4901321232318878, - "learning_rate": 9.927581070730244e-06, - "loss": 0.3917, - "step": 3359 - }, - { - "epoch": 0.219593490621528, - "grad_norm": 0.4995364546775818, - "learning_rate": 9.927521842644637e-06, - "loss": 0.4202, - "step": 3360 - }, - { - "epoch": 0.21965884582707013, - "grad_norm": 0.5176507830619812, - "learning_rate": 9.927462590525801e-06, - "loss": 0.4486, - "step": 3361 - }, - { - "epoch": 0.21972420103261225, - "grad_norm": 0.4921971261501312, - "learning_rate": 9.92740331437402e-06, - "loss": 0.4425, - "step": 3362 - }, - { - "epoch": 0.21978955623815438, - "grad_norm": 0.45165786147117615, - "learning_rate": 9.927344014189587e-06, - "loss": 0.3702, - "step": 3363 - }, - { - "epoch": 0.21985491144369648, - "grad_norm": 0.520483136177063, - "learning_rate": 9.92728468997279e-06, - "loss": 0.4732, - "step": 3364 - }, - { - "epoch": 0.2199202666492386, - "grad_norm": 0.5097583532333374, - "learning_rate": 9.927225341723918e-06, - "loss": 0.4181, - "step": 3365 - }, - { - "epoch": 0.21998562185478074, - "grad_norm": 0.4923904240131378, - "learning_rate": 9.927165969443262e-06, - "loss": 0.4158, - "step": 3366 - }, - { - "epoch": 0.22005097706032287, - "grad_norm": 0.4719454050064087, - "learning_rate": 9.927106573131112e-06, - "loss": 0.4026, - "step": 3367 - }, - { - "epoch": 0.22011633226586497, - "grad_norm": 0.4841327965259552, - "learning_rate": 9.927047152787754e-06, - "loss": 0.4223, - "step": 3368 - }, - { - "epoch": 0.2201816874714071, - "grad_norm": 0.488750696182251, - "learning_rate": 9.92698770841348e-06, - "loss": 0.4718, - "step": 3369 - }, - { - "epoch": 0.22024704267694922, - "grad_norm": 0.501806378364563, - "learning_rate": 9.926928240008583e-06, - "loss": 0.4108, - "step": 3370 - }, - { - "epoch": 0.22031239788249135, - "grad_norm": 0.4870474636554718, - "learning_rate": 9.926868747573348e-06, - "loss": 0.4144, - "step": 3371 - }, - { - "epoch": 0.22037775308803345, - "grad_norm": 0.5012169480323792, - "learning_rate": 9.926809231108068e-06, - "loss": 0.4378, - "step": 3372 - }, - { - "epoch": 0.22044310829357558, - "grad_norm": 0.5123572945594788, - "learning_rate": 9.926749690613031e-06, - "loss": 0.4185, - "step": 3373 - }, - { - "epoch": 0.2205084634991177, - "grad_norm": 0.47672030329704285, - "learning_rate": 9.92669012608853e-06, - "loss": 0.3625, - "step": 3374 - }, - { - "epoch": 0.22057381870465984, - "grad_norm": 0.5566274523735046, - "learning_rate": 9.926630537534855e-06, - "loss": 0.4724, - "step": 3375 - }, - { - "epoch": 0.22063917391020194, - "grad_norm": 0.46616724133491516, - "learning_rate": 9.926570924952295e-06, - "loss": 0.3786, - "step": 3376 - }, - { - "epoch": 0.22070452911574406, - "grad_norm": 0.48081544041633606, - "learning_rate": 9.926511288341143e-06, - "loss": 0.3832, - "step": 3377 - }, - { - "epoch": 0.2207698843212862, - "grad_norm": 0.5178582072257996, - "learning_rate": 9.926451627701687e-06, - "loss": 0.4417, - "step": 3378 - }, - { - "epoch": 0.22083523952682832, - "grad_norm": 0.4961230456829071, - "learning_rate": 9.926391943034222e-06, - "loss": 0.4278, - "step": 3379 - }, - { - "epoch": 0.22090059473237042, - "grad_norm": 0.4684832692146301, - "learning_rate": 9.926332234339035e-06, - "loss": 0.3777, - "step": 3380 - }, - { - "epoch": 0.22096594993791255, - "grad_norm": 0.5126791000366211, - "learning_rate": 9.92627250161642e-06, - "loss": 0.476, - "step": 3381 - }, - { - "epoch": 0.22103130514345468, - "grad_norm": 0.453173965215683, - "learning_rate": 9.926212744866668e-06, - "loss": 0.3912, - "step": 3382 - }, - { - "epoch": 0.2210966603489968, - "grad_norm": 0.5246379971504211, - "learning_rate": 9.926152964090068e-06, - "loss": 0.4641, - "step": 3383 - }, - { - "epoch": 0.22116201555453893, - "grad_norm": 0.4921092987060547, - "learning_rate": 9.926093159286916e-06, - "loss": 0.4173, - "step": 3384 - }, - { - "epoch": 0.22122737076008103, - "grad_norm": 0.4708023965358734, - "learning_rate": 9.926033330457498e-06, - "loss": 0.4066, - "step": 3385 - }, - { - "epoch": 0.22129272596562316, - "grad_norm": 0.49583831429481506, - "learning_rate": 9.925973477602111e-06, - "loss": 0.4209, - "step": 3386 - }, - { - "epoch": 0.2213580811711653, - "grad_norm": 0.46903926134109497, - "learning_rate": 9.925913600721045e-06, - "loss": 0.3758, - "step": 3387 - }, - { - "epoch": 0.22142343637670742, - "grad_norm": 0.4628562331199646, - "learning_rate": 9.925853699814592e-06, - "loss": 0.3919, - "step": 3388 - }, - { - "epoch": 0.22148879158224952, - "grad_norm": 0.4656374454498291, - "learning_rate": 9.925793774883042e-06, - "loss": 0.3627, - "step": 3389 - }, - { - "epoch": 0.22155414678779164, - "grad_norm": 0.49630263447761536, - "learning_rate": 9.925733825926691e-06, - "loss": 0.4053, - "step": 3390 - }, - { - "epoch": 0.22161950199333377, - "grad_norm": 0.5168403387069702, - "learning_rate": 9.92567385294583e-06, - "loss": 0.3891, - "step": 3391 - }, - { - "epoch": 0.2216848571988759, - "grad_norm": 0.4836116135120392, - "learning_rate": 9.92561385594075e-06, - "loss": 0.4171, - "step": 3392 - }, - { - "epoch": 0.221750212404418, - "grad_norm": 0.4696779251098633, - "learning_rate": 9.925553834911745e-06, - "loss": 0.3903, - "step": 3393 - }, - { - "epoch": 0.22181556760996013, - "grad_norm": 0.46139299869537354, - "learning_rate": 9.925493789859107e-06, - "loss": 0.3961, - "step": 3394 - }, - { - "epoch": 0.22188092281550226, - "grad_norm": 0.4747457206249237, - "learning_rate": 9.92543372078313e-06, - "loss": 0.4106, - "step": 3395 - }, - { - "epoch": 0.22194627802104439, - "grad_norm": 0.4985063374042511, - "learning_rate": 9.925373627684107e-06, - "loss": 0.4419, - "step": 3396 - }, - { - "epoch": 0.22201163322658649, - "grad_norm": 0.5045239925384521, - "learning_rate": 9.925313510562331e-06, - "loss": 0.4206, - "step": 3397 - }, - { - "epoch": 0.2220769884321286, - "grad_norm": 0.5201502442359924, - "learning_rate": 9.925253369418093e-06, - "loss": 0.4159, - "step": 3398 - }, - { - "epoch": 0.22214234363767074, - "grad_norm": 0.5161673426628113, - "learning_rate": 9.925193204251689e-06, - "loss": 0.4384, - "step": 3399 - }, - { - "epoch": 0.22220769884321287, - "grad_norm": 0.48066210746765137, - "learning_rate": 9.925133015063412e-06, - "loss": 0.4421, - "step": 3400 - }, - { - "epoch": 0.22227305404875497, - "grad_norm": 0.4755731523036957, - "learning_rate": 9.925072801853554e-06, - "loss": 0.3963, - "step": 3401 - }, - { - "epoch": 0.2223384092542971, - "grad_norm": 0.48620015382766724, - "learning_rate": 9.92501256462241e-06, - "loss": 0.4184, - "step": 3402 - }, - { - "epoch": 0.22240376445983923, - "grad_norm": 0.49302372336387634, - "learning_rate": 9.924952303370274e-06, - "loss": 0.4506, - "step": 3403 - }, - { - "epoch": 0.22246911966538135, - "grad_norm": 0.4507593810558319, - "learning_rate": 9.92489201809744e-06, - "loss": 0.3732, - "step": 3404 - }, - { - "epoch": 0.22253447487092348, - "grad_norm": 0.4765528738498688, - "learning_rate": 9.9248317088042e-06, - "loss": 0.4214, - "step": 3405 - }, - { - "epoch": 0.22259983007646558, - "grad_norm": 0.5671145915985107, - "learning_rate": 9.92477137549085e-06, - "loss": 0.4977, - "step": 3406 - }, - { - "epoch": 0.2226651852820077, - "grad_norm": 0.48747357726097107, - "learning_rate": 9.924711018157684e-06, - "loss": 0.4504, - "step": 3407 - }, - { - "epoch": 0.22273054048754984, - "grad_norm": 0.48180848360061646, - "learning_rate": 9.924650636804997e-06, - "loss": 0.3956, - "step": 3408 - }, - { - "epoch": 0.22279589569309197, - "grad_norm": 0.4759974181652069, - "learning_rate": 9.924590231433082e-06, - "loss": 0.4175, - "step": 3409 - }, - { - "epoch": 0.22286125089863407, - "grad_norm": 0.4900985360145569, - "learning_rate": 9.924529802042236e-06, - "loss": 0.451, - "step": 3410 - }, - { - "epoch": 0.2229266061041762, - "grad_norm": 0.4859611690044403, - "learning_rate": 9.92446934863275e-06, - "loss": 0.4109, - "step": 3411 - }, - { - "epoch": 0.22299196130971832, - "grad_norm": 0.46664854884147644, - "learning_rate": 9.924408871204923e-06, - "loss": 0.4346, - "step": 3412 - }, - { - "epoch": 0.22305731651526045, - "grad_norm": 0.4617181420326233, - "learning_rate": 9.924348369759045e-06, - "loss": 0.3751, - "step": 3413 - }, - { - "epoch": 0.22312267172080255, - "grad_norm": 0.49750587344169617, - "learning_rate": 9.924287844295417e-06, - "loss": 0.4764, - "step": 3414 - }, - { - "epoch": 0.22318802692634468, - "grad_norm": 0.47653597593307495, - "learning_rate": 9.92422729481433e-06, - "loss": 0.4216, - "step": 3415 - }, - { - "epoch": 0.2232533821318868, - "grad_norm": 0.4832088053226471, - "learning_rate": 9.92416672131608e-06, - "loss": 0.4099, - "step": 3416 - }, - { - "epoch": 0.22331873733742894, - "grad_norm": 0.4652642607688904, - "learning_rate": 9.924106123800964e-06, - "loss": 0.3525, - "step": 3417 - }, - { - "epoch": 0.22338409254297104, - "grad_norm": 0.5141023993492126, - "learning_rate": 9.924045502269275e-06, - "loss": 0.463, - "step": 3418 - }, - { - "epoch": 0.22344944774851316, - "grad_norm": 0.4620446562767029, - "learning_rate": 9.923984856721312e-06, - "loss": 0.381, - "step": 3419 - }, - { - "epoch": 0.2235148029540553, - "grad_norm": 0.47818121314048767, - "learning_rate": 9.923924187157368e-06, - "loss": 0.42, - "step": 3420 - }, - { - "epoch": 0.22358015815959742, - "grad_norm": 0.4844329059123993, - "learning_rate": 9.92386349357774e-06, - "loss": 0.42, - "step": 3421 - }, - { - "epoch": 0.22364551336513952, - "grad_norm": 0.5048141479492188, - "learning_rate": 9.923802775982724e-06, - "loss": 0.4725, - "step": 3422 - }, - { - "epoch": 0.22371086857068165, - "grad_norm": 0.47144418954849243, - "learning_rate": 9.923742034372618e-06, - "loss": 0.4094, - "step": 3423 - }, - { - "epoch": 0.22377622377622378, - "grad_norm": 0.49419599771499634, - "learning_rate": 9.923681268747714e-06, - "loss": 0.4514, - "step": 3424 - }, - { - "epoch": 0.2238415789817659, - "grad_norm": 0.49246811866760254, - "learning_rate": 9.92362047910831e-06, - "loss": 0.4264, - "step": 3425 - }, - { - "epoch": 0.22390693418730803, - "grad_norm": 0.46921661496162415, - "learning_rate": 9.923559665454707e-06, - "loss": 0.4393, - "step": 3426 - }, - { - "epoch": 0.22397228939285013, - "grad_norm": 0.4811283051967621, - "learning_rate": 9.923498827787195e-06, - "loss": 0.3721, - "step": 3427 - }, - { - "epoch": 0.22403764459839226, - "grad_norm": 0.5040667653083801, - "learning_rate": 9.923437966106074e-06, - "loss": 0.4154, - "step": 3428 - }, - { - "epoch": 0.2241029998039344, - "grad_norm": 0.4838813543319702, - "learning_rate": 9.92337708041164e-06, - "loss": 0.4096, - "step": 3429 - }, - { - "epoch": 0.22416835500947652, - "grad_norm": 0.5002124309539795, - "learning_rate": 9.923316170704192e-06, - "loss": 0.4395, - "step": 3430 - }, - { - "epoch": 0.22423371021501862, - "grad_norm": 0.4668019413948059, - "learning_rate": 9.923255236984024e-06, - "loss": 0.3919, - "step": 3431 - }, - { - "epoch": 0.22429906542056074, - "grad_norm": 0.4683651328086853, - "learning_rate": 9.923194279251435e-06, - "loss": 0.3844, - "step": 3432 - }, - { - "epoch": 0.22436442062610287, - "grad_norm": 0.46975427865982056, - "learning_rate": 9.923133297506721e-06, - "loss": 0.4201, - "step": 3433 - }, - { - "epoch": 0.224429775831645, - "grad_norm": 0.48653876781463623, - "learning_rate": 9.923072291750182e-06, - "loss": 0.3965, - "step": 3434 - }, - { - "epoch": 0.2244951310371871, - "grad_norm": 0.5011444091796875, - "learning_rate": 9.923011261982113e-06, - "loss": 0.4005, - "step": 3435 - }, - { - "epoch": 0.22456048624272923, - "grad_norm": 0.4734165370464325, - "learning_rate": 9.922950208202812e-06, - "loss": 0.414, - "step": 3436 - }, - { - "epoch": 0.22462584144827136, - "grad_norm": 0.4409369230270386, - "learning_rate": 9.922889130412578e-06, - "loss": 0.3678, - "step": 3437 - }, - { - "epoch": 0.22469119665381349, - "grad_norm": 0.4692237973213196, - "learning_rate": 9.922828028611708e-06, - "loss": 0.3892, - "step": 3438 - }, - { - "epoch": 0.22475655185935559, - "grad_norm": 0.5060802102088928, - "learning_rate": 9.922766902800502e-06, - "loss": 0.3997, - "step": 3439 - }, - { - "epoch": 0.2248219070648977, - "grad_norm": 0.5373620390892029, - "learning_rate": 9.922705752979254e-06, - "loss": 0.4634, - "step": 3440 - }, - { - "epoch": 0.22488726227043984, - "grad_norm": 0.4730021357536316, - "learning_rate": 9.922644579148267e-06, - "loss": 0.4026, - "step": 3441 - }, - { - "epoch": 0.22495261747598197, - "grad_norm": 0.4861553907394409, - "learning_rate": 9.922583381307835e-06, - "loss": 0.4224, - "step": 3442 - }, - { - "epoch": 0.22501797268152407, - "grad_norm": 0.507655918598175, - "learning_rate": 9.922522159458259e-06, - "loss": 0.4462, - "step": 3443 - }, - { - "epoch": 0.2250833278870662, - "grad_norm": 0.47881460189819336, - "learning_rate": 9.922460913599838e-06, - "loss": 0.3974, - "step": 3444 - }, - { - "epoch": 0.22514868309260833, - "grad_norm": 0.49684351682662964, - "learning_rate": 9.922399643732867e-06, - "loss": 0.4459, - "step": 3445 - }, - { - "epoch": 0.22521403829815045, - "grad_norm": 0.4507707953453064, - "learning_rate": 9.92233834985765e-06, - "loss": 0.3473, - "step": 3446 - }, - { - "epoch": 0.22527939350369258, - "grad_norm": 0.4546440839767456, - "learning_rate": 9.922277031974484e-06, - "loss": 0.4452, - "step": 3447 - }, - { - "epoch": 0.22534474870923468, - "grad_norm": 0.47355177998542786, - "learning_rate": 9.922215690083667e-06, - "loss": 0.3942, - "step": 3448 - }, - { - "epoch": 0.2254101039147768, - "grad_norm": 0.46548977494239807, - "learning_rate": 9.9221543241855e-06, - "loss": 0.4144, - "step": 3449 - }, - { - "epoch": 0.22547545912031894, - "grad_norm": 0.4996720850467682, - "learning_rate": 9.92209293428028e-06, - "loss": 0.4264, - "step": 3450 - }, - { - "epoch": 0.22554081432586107, - "grad_norm": 0.44034871459007263, - "learning_rate": 9.922031520368307e-06, - "loss": 0.3552, - "step": 3451 - }, - { - "epoch": 0.22560616953140317, - "grad_norm": 0.43639031052589417, - "learning_rate": 9.921970082449881e-06, - "loss": 0.3225, - "step": 3452 - }, - { - "epoch": 0.2256715247369453, - "grad_norm": 0.502032458782196, - "learning_rate": 9.921908620525303e-06, - "loss": 0.4183, - "step": 3453 - }, - { - "epoch": 0.22573687994248742, - "grad_norm": 0.4731839895248413, - "learning_rate": 9.921847134594871e-06, - "loss": 0.4144, - "step": 3454 - }, - { - "epoch": 0.22580223514802955, - "grad_norm": 0.512500524520874, - "learning_rate": 9.921785624658887e-06, - "loss": 0.4391, - "step": 3455 - }, - { - "epoch": 0.22586759035357165, - "grad_norm": 0.5110539197921753, - "learning_rate": 9.921724090717646e-06, - "loss": 0.4066, - "step": 3456 - }, - { - "epoch": 0.22593294555911378, - "grad_norm": 0.478849858045578, - "learning_rate": 9.921662532771455e-06, - "loss": 0.419, - "step": 3457 - }, - { - "epoch": 0.2259983007646559, - "grad_norm": 0.5071877837181091, - "learning_rate": 9.92160095082061e-06, - "loss": 0.4284, - "step": 3458 - }, - { - "epoch": 0.22606365597019804, - "grad_norm": 0.5038055777549744, - "learning_rate": 9.92153934486541e-06, - "loss": 0.4439, - "step": 3459 - }, - { - "epoch": 0.22612901117574014, - "grad_norm": 0.5345181226730347, - "learning_rate": 9.921477714906158e-06, - "loss": 0.4447, - "step": 3460 - }, - { - "epoch": 0.22619436638128226, - "grad_norm": 0.4881491959095001, - "learning_rate": 9.921416060943157e-06, - "loss": 0.3998, - "step": 3461 - }, - { - "epoch": 0.2262597215868244, - "grad_norm": 0.4545268416404724, - "learning_rate": 9.921354382976703e-06, - "loss": 0.3934, - "step": 3462 - }, - { - "epoch": 0.22632507679236652, - "grad_norm": 0.47923189401626587, - "learning_rate": 9.9212926810071e-06, - "loss": 0.4024, - "step": 3463 - }, - { - "epoch": 0.22639043199790862, - "grad_norm": 0.5075094699859619, - "learning_rate": 9.921230955034645e-06, - "loss": 0.469, - "step": 3464 - }, - { - "epoch": 0.22645578720345075, - "grad_norm": 0.46883073449134827, - "learning_rate": 9.921169205059644e-06, - "loss": 0.4028, - "step": 3465 - }, - { - "epoch": 0.22652114240899288, - "grad_norm": 0.46435844898223877, - "learning_rate": 9.921107431082395e-06, - "loss": 0.4042, - "step": 3466 - }, - { - "epoch": 0.226586497614535, - "grad_norm": 0.44952312111854553, - "learning_rate": 9.921045633103201e-06, - "loss": 0.3556, - "step": 3467 - }, - { - "epoch": 0.22665185282007713, - "grad_norm": 0.44821831583976746, - "learning_rate": 9.920983811122363e-06, - "loss": 0.4041, - "step": 3468 - }, - { - "epoch": 0.22671720802561923, - "grad_norm": 0.46383920311927795, - "learning_rate": 9.92092196514018e-06, - "loss": 0.3687, - "step": 3469 - }, - { - "epoch": 0.22678256323116136, - "grad_norm": 0.4864318072795868, - "learning_rate": 9.920860095156956e-06, - "loss": 0.4704, - "step": 3470 - }, - { - "epoch": 0.2268479184367035, - "grad_norm": 0.46455636620521545, - "learning_rate": 9.920798201172996e-06, - "loss": 0.3975, - "step": 3471 - }, - { - "epoch": 0.22691327364224562, - "grad_norm": 0.47325679659843445, - "learning_rate": 9.920736283188596e-06, - "loss": 0.4323, - "step": 3472 - }, - { - "epoch": 0.22697862884778772, - "grad_norm": 0.49008190631866455, - "learning_rate": 9.92067434120406e-06, - "loss": 0.4475, - "step": 3473 - }, - { - "epoch": 0.22704398405332984, - "grad_norm": 0.45270171761512756, - "learning_rate": 9.92061237521969e-06, - "loss": 0.3751, - "step": 3474 - }, - { - "epoch": 0.22710933925887197, - "grad_norm": 0.4598439931869507, - "learning_rate": 9.920550385235791e-06, - "loss": 0.3957, - "step": 3475 - }, - { - "epoch": 0.2271746944644141, - "grad_norm": 0.5912983417510986, - "learning_rate": 9.920488371252662e-06, - "loss": 0.4098, - "step": 3476 - }, - { - "epoch": 0.2272400496699562, - "grad_norm": 0.49598702788352966, - "learning_rate": 9.920426333270607e-06, - "loss": 0.4417, - "step": 3477 - }, - { - "epoch": 0.22730540487549833, - "grad_norm": 0.4662719666957855, - "learning_rate": 9.920364271289929e-06, - "loss": 0.3967, - "step": 3478 - }, - { - "epoch": 0.22737076008104046, - "grad_norm": 0.4649987518787384, - "learning_rate": 9.920302185310928e-06, - "loss": 0.4069, - "step": 3479 - }, - { - "epoch": 0.22743611528658259, - "grad_norm": 0.4543205201625824, - "learning_rate": 9.920240075333909e-06, - "loss": 0.3966, - "step": 3480 - }, - { - "epoch": 0.22750147049212469, - "grad_norm": 0.47058290243148804, - "learning_rate": 9.920177941359174e-06, - "loss": 0.4021, - "step": 3481 - }, - { - "epoch": 0.2275668256976668, - "grad_norm": 0.5269588828086853, - "learning_rate": 9.920115783387028e-06, - "loss": 0.4509, - "step": 3482 - }, - { - "epoch": 0.22763218090320894, - "grad_norm": 0.48189884424209595, - "learning_rate": 9.920053601417773e-06, - "loss": 0.3597, - "step": 3483 - }, - { - "epoch": 0.22769753610875107, - "grad_norm": 0.49318572878837585, - "learning_rate": 9.919991395451713e-06, - "loss": 0.4451, - "step": 3484 - }, - { - "epoch": 0.22776289131429317, - "grad_norm": 0.44078201055526733, - "learning_rate": 9.919929165489149e-06, - "loss": 0.3647, - "step": 3485 - }, - { - "epoch": 0.2278282465198353, - "grad_norm": 0.4564656615257263, - "learning_rate": 9.919866911530386e-06, - "loss": 0.3636, - "step": 3486 - }, - { - "epoch": 0.22789360172537743, - "grad_norm": 0.5045684576034546, - "learning_rate": 9.919804633575727e-06, - "loss": 0.4093, - "step": 3487 - }, - { - "epoch": 0.22795895693091955, - "grad_norm": 0.48582541942596436, - "learning_rate": 9.919742331625477e-06, - "loss": 0.4026, - "step": 3488 - }, - { - "epoch": 0.22802431213646168, - "grad_norm": 0.49130311608314514, - "learning_rate": 9.91968000567994e-06, - "loss": 0.4286, - "step": 3489 - }, - { - "epoch": 0.22808966734200378, - "grad_norm": 0.4653776288032532, - "learning_rate": 9.91961765573942e-06, - "loss": 0.3151, - "step": 3490 - }, - { - "epoch": 0.2281550225475459, - "grad_norm": 0.49312624335289, - "learning_rate": 9.919555281804219e-06, - "loss": 0.4269, - "step": 3491 - }, - { - "epoch": 0.22822037775308804, - "grad_norm": 0.5089777708053589, - "learning_rate": 9.919492883874642e-06, - "loss": 0.4033, - "step": 3492 - }, - { - "epoch": 0.22828573295863017, - "grad_norm": 0.4697995185852051, - "learning_rate": 9.919430461950996e-06, - "loss": 0.357, - "step": 3493 - }, - { - "epoch": 0.22835108816417227, - "grad_norm": 0.4682181179523468, - "learning_rate": 9.919368016033581e-06, - "loss": 0.4047, - "step": 3494 - }, - { - "epoch": 0.2284164433697144, - "grad_norm": 0.48286086320877075, - "learning_rate": 9.919305546122704e-06, - "loss": 0.4092, - "step": 3495 - }, - { - "epoch": 0.22848179857525652, - "grad_norm": 0.4905316233634949, - "learning_rate": 9.919243052218672e-06, - "loss": 0.4078, - "step": 3496 - }, - { - "epoch": 0.22854715378079865, - "grad_norm": 0.5208161473274231, - "learning_rate": 9.919180534321787e-06, - "loss": 0.4205, - "step": 3497 - }, - { - "epoch": 0.22861250898634075, - "grad_norm": 0.4441189467906952, - "learning_rate": 9.919117992432352e-06, - "loss": 0.3484, - "step": 3498 - }, - { - "epoch": 0.22867786419188288, - "grad_norm": 0.4950271546840668, - "learning_rate": 9.919055426550676e-06, - "loss": 0.4509, - "step": 3499 - }, - { - "epoch": 0.228743219397425, - "grad_norm": 0.4683346748352051, - "learning_rate": 9.918992836677064e-06, - "loss": 0.4291, - "step": 3500 - }, - { - "epoch": 0.22880857460296714, - "grad_norm": 0.49860504269599915, - "learning_rate": 9.918930222811818e-06, - "loss": 0.4008, - "step": 3501 - }, - { - "epoch": 0.22887392980850924, - "grad_norm": 0.48979246616363525, - "learning_rate": 9.918867584955245e-06, - "loss": 0.4318, - "step": 3502 - }, - { - "epoch": 0.22893928501405136, - "grad_norm": 0.4749269485473633, - "learning_rate": 9.918804923107651e-06, - "loss": 0.3796, - "step": 3503 - }, - { - "epoch": 0.2290046402195935, - "grad_norm": 0.47051766514778137, - "learning_rate": 9.918742237269341e-06, - "loss": 0.374, - "step": 3504 - }, - { - "epoch": 0.22906999542513562, - "grad_norm": 0.4721180200576782, - "learning_rate": 9.918679527440623e-06, - "loss": 0.4035, - "step": 3505 - }, - { - "epoch": 0.22913535063067772, - "grad_norm": 0.5044035315513611, - "learning_rate": 9.9186167936218e-06, - "loss": 0.4212, - "step": 3506 - }, - { - "epoch": 0.22920070583621985, - "grad_norm": 0.5842257738113403, - "learning_rate": 9.918554035813177e-06, - "loss": 0.4856, - "step": 3507 - }, - { - "epoch": 0.22926606104176198, - "grad_norm": 0.5033165812492371, - "learning_rate": 9.918491254015064e-06, - "loss": 0.4001, - "step": 3508 - }, - { - "epoch": 0.2293314162473041, - "grad_norm": 0.5334236025810242, - "learning_rate": 9.918428448227767e-06, - "loss": 0.4575, - "step": 3509 - }, - { - "epoch": 0.22939677145284623, - "grad_norm": 0.48921439051628113, - "learning_rate": 9.918365618451586e-06, - "loss": 0.4554, - "step": 3510 - }, - { - "epoch": 0.22946212665838833, - "grad_norm": 0.4366236925125122, - "learning_rate": 9.918302764686835e-06, - "loss": 0.3555, - "step": 3511 - }, - { - "epoch": 0.22952748186393046, - "grad_norm": 0.4496869444847107, - "learning_rate": 9.918239886933818e-06, - "loss": 0.3394, - "step": 3512 - }, - { - "epoch": 0.2295928370694726, - "grad_norm": 0.46605080366134644, - "learning_rate": 9.91817698519284e-06, - "loss": 0.3875, - "step": 3513 - }, - { - "epoch": 0.22965819227501472, - "grad_norm": 0.48124197125434875, - "learning_rate": 9.918114059464209e-06, - "loss": 0.4025, - "step": 3514 - }, - { - "epoch": 0.22972354748055682, - "grad_norm": 0.47093167901039124, - "learning_rate": 9.918051109748233e-06, - "loss": 0.3923, - "step": 3515 - }, - { - "epoch": 0.22978890268609894, - "grad_norm": 0.4437168538570404, - "learning_rate": 9.917988136045215e-06, - "loss": 0.3532, - "step": 3516 - }, - { - "epoch": 0.22985425789164107, - "grad_norm": 0.5177018642425537, - "learning_rate": 9.917925138355468e-06, - "loss": 0.4273, - "step": 3517 - }, - { - "epoch": 0.2299196130971832, - "grad_norm": 0.48225924372673035, - "learning_rate": 9.917862116679295e-06, - "loss": 0.4301, - "step": 3518 - }, - { - "epoch": 0.2299849683027253, - "grad_norm": 0.4468449056148529, - "learning_rate": 9.917799071017007e-06, - "loss": 0.3567, - "step": 3519 - }, - { - "epoch": 0.23005032350826743, - "grad_norm": 0.48492759466171265, - "learning_rate": 9.917736001368907e-06, - "loss": 0.4383, - "step": 3520 - }, - { - "epoch": 0.23011567871380956, - "grad_norm": 0.489656001329422, - "learning_rate": 9.917672907735306e-06, - "loss": 0.3883, - "step": 3521 - }, - { - "epoch": 0.23018103391935169, - "grad_norm": 0.490484356880188, - "learning_rate": 9.917609790116508e-06, - "loss": 0.4052, - "step": 3522 - }, - { - "epoch": 0.23024638912489379, - "grad_norm": 0.48557600378990173, - "learning_rate": 9.917546648512826e-06, - "loss": 0.3701, - "step": 3523 - }, - { - "epoch": 0.2303117443304359, - "grad_norm": 0.44677576422691345, - "learning_rate": 9.917483482924566e-06, - "loss": 0.3621, - "step": 3524 - }, - { - "epoch": 0.23037709953597804, - "grad_norm": 0.45892393589019775, - "learning_rate": 9.917420293352034e-06, - "loss": 0.3976, - "step": 3525 - }, - { - "epoch": 0.23044245474152017, - "grad_norm": 0.440434992313385, - "learning_rate": 9.91735707979554e-06, - "loss": 0.3849, - "step": 3526 - }, - { - "epoch": 0.23050780994706227, - "grad_norm": 0.4849710762500763, - "learning_rate": 9.917293842255392e-06, - "loss": 0.4536, - "step": 3527 - }, - { - "epoch": 0.2305731651526044, - "grad_norm": 0.5004308819770813, - "learning_rate": 9.917230580731898e-06, - "loss": 0.4063, - "step": 3528 - }, - { - "epoch": 0.23063852035814653, - "grad_norm": 0.5044596195220947, - "learning_rate": 9.917167295225367e-06, - "loss": 0.4073, - "step": 3529 - }, - { - "epoch": 0.23070387556368865, - "grad_norm": 0.5104182958602905, - "learning_rate": 9.917103985736107e-06, - "loss": 0.4824, - "step": 3530 - }, - { - "epoch": 0.23076923076923078, - "grad_norm": 0.4673587381839752, - "learning_rate": 9.917040652264429e-06, - "loss": 0.3776, - "step": 3531 - }, - { - "epoch": 0.23083458597477288, - "grad_norm": 0.46703916788101196, - "learning_rate": 9.91697729481064e-06, - "loss": 0.399, - "step": 3532 - }, - { - "epoch": 0.230899941180315, - "grad_norm": 0.48946160078048706, - "learning_rate": 9.91691391337505e-06, - "loss": 0.4151, - "step": 3533 - }, - { - "epoch": 0.23096529638585714, - "grad_norm": 0.5334754586219788, - "learning_rate": 9.916850507957965e-06, - "loss": 0.4416, - "step": 3534 - }, - { - "epoch": 0.23103065159139927, - "grad_norm": 0.5059509873390198, - "learning_rate": 9.9167870785597e-06, - "loss": 0.4324, - "step": 3535 - }, - { - "epoch": 0.23109600679694137, - "grad_norm": 0.48675912618637085, - "learning_rate": 9.916723625180557e-06, - "loss": 0.3674, - "step": 3536 - }, - { - "epoch": 0.2311613620024835, - "grad_norm": 0.4450955092906952, - "learning_rate": 9.916660147820853e-06, - "loss": 0.3888, - "step": 3537 - }, - { - "epoch": 0.23122671720802562, - "grad_norm": 0.4775831699371338, - "learning_rate": 9.916596646480894e-06, - "loss": 0.4156, - "step": 3538 - }, - { - "epoch": 0.23129207241356775, - "grad_norm": 0.5021882653236389, - "learning_rate": 9.916533121160988e-06, - "loss": 0.4414, - "step": 3539 - }, - { - "epoch": 0.23135742761910985, - "grad_norm": 0.4575294852256775, - "learning_rate": 9.916469571861447e-06, - "loss": 0.4026, - "step": 3540 - }, - { - "epoch": 0.23142278282465198, - "grad_norm": 0.5298008322715759, - "learning_rate": 9.91640599858258e-06, - "loss": 0.4382, - "step": 3541 - }, - { - "epoch": 0.2314881380301941, - "grad_norm": 0.5370910167694092, - "learning_rate": 9.916342401324702e-06, - "loss": 0.4831, - "step": 3542 - }, - { - "epoch": 0.23155349323573624, - "grad_norm": 0.4930441975593567, - "learning_rate": 9.916278780088115e-06, - "loss": 0.4447, - "step": 3543 - }, - { - "epoch": 0.23161884844127834, - "grad_norm": 0.47559595108032227, - "learning_rate": 9.916215134873134e-06, - "loss": 0.4133, - "step": 3544 - }, - { - "epoch": 0.23168420364682046, - "grad_norm": 0.5217772126197815, - "learning_rate": 9.916151465680069e-06, - "loss": 0.407, - "step": 3545 - }, - { - "epoch": 0.2317495588523626, - "grad_norm": 0.5612869262695312, - "learning_rate": 9.91608777250923e-06, - "loss": 0.4943, - "step": 3546 - }, - { - "epoch": 0.23181491405790472, - "grad_norm": 0.4543122947216034, - "learning_rate": 9.916024055360928e-06, - "loss": 0.3515, - "step": 3547 - }, - { - "epoch": 0.23188026926344682, - "grad_norm": 0.44925716519355774, - "learning_rate": 9.915960314235473e-06, - "loss": 0.39, - "step": 3548 - }, - { - "epoch": 0.23194562446898895, - "grad_norm": 0.5313082933425903, - "learning_rate": 9.915896549133178e-06, - "loss": 0.4397, - "step": 3549 - }, - { - "epoch": 0.23201097967453108, - "grad_norm": 0.5094080567359924, - "learning_rate": 9.915832760054351e-06, - "loss": 0.4458, - "step": 3550 - }, - { - "epoch": 0.2320763348800732, - "grad_norm": 0.4623429477214813, - "learning_rate": 9.915768946999305e-06, - "loss": 0.4179, - "step": 3551 - }, - { - "epoch": 0.23214169008561533, - "grad_norm": 0.4832753539085388, - "learning_rate": 9.91570510996835e-06, - "loss": 0.3926, - "step": 3552 - }, - { - "epoch": 0.23220704529115743, - "grad_norm": 0.48376351594924927, - "learning_rate": 9.9156412489618e-06, - "loss": 0.4169, - "step": 3553 - }, - { - "epoch": 0.23227240049669956, - "grad_norm": 0.4905616343021393, - "learning_rate": 9.915577363979963e-06, - "loss": 0.4202, - "step": 3554 - }, - { - "epoch": 0.2323377557022417, - "grad_norm": 0.48949626088142395, - "learning_rate": 9.915513455023154e-06, - "loss": 0.4279, - "step": 3555 - }, - { - "epoch": 0.23240311090778382, - "grad_norm": 0.4484732449054718, - "learning_rate": 9.915449522091682e-06, - "loss": 0.3685, - "step": 3556 - }, - { - "epoch": 0.23246846611332592, - "grad_norm": 0.44788533449172974, - "learning_rate": 9.91538556518586e-06, - "loss": 0.3432, - "step": 3557 - }, - { - "epoch": 0.23253382131886804, - "grad_norm": 0.44627711176872253, - "learning_rate": 9.915321584306e-06, - "loss": 0.3753, - "step": 3558 - }, - { - "epoch": 0.23259917652441017, - "grad_norm": 0.4644292891025543, - "learning_rate": 9.915257579452412e-06, - "loss": 0.3836, - "step": 3559 - }, - { - "epoch": 0.2326645317299523, - "grad_norm": 0.5058528780937195, - "learning_rate": 9.915193550625411e-06, - "loss": 0.4646, - "step": 3560 - }, - { - "epoch": 0.2327298869354944, - "grad_norm": 0.4695664048194885, - "learning_rate": 9.915129497825309e-06, - "loss": 0.3794, - "step": 3561 - }, - { - "epoch": 0.23279524214103653, - "grad_norm": 0.45301932096481323, - "learning_rate": 9.915065421052418e-06, - "loss": 0.3863, - "step": 3562 - }, - { - "epoch": 0.23286059734657866, - "grad_norm": 0.5022915005683899, - "learning_rate": 9.915001320307049e-06, - "loss": 0.4001, - "step": 3563 - }, - { - "epoch": 0.23292595255212079, - "grad_norm": 0.468749076128006, - "learning_rate": 9.914937195589516e-06, - "loss": 0.3831, - "step": 3564 - }, - { - "epoch": 0.23299130775766289, - "grad_norm": 0.44168904423713684, - "learning_rate": 9.914873046900133e-06, - "loss": 0.3438, - "step": 3565 - }, - { - "epoch": 0.233056662963205, - "grad_norm": 0.4480777084827423, - "learning_rate": 9.91480887423921e-06, - "loss": 0.3792, - "step": 3566 - }, - { - "epoch": 0.23312201816874714, - "grad_norm": 0.4802215099334717, - "learning_rate": 9.914744677607063e-06, - "loss": 0.3791, - "step": 3567 - }, - { - "epoch": 0.23318737337428927, - "grad_norm": 0.523776650428772, - "learning_rate": 9.914680457004003e-06, - "loss": 0.4683, - "step": 3568 - }, - { - "epoch": 0.23325272857983137, - "grad_norm": 0.5157068967819214, - "learning_rate": 9.914616212430341e-06, - "loss": 0.3736, - "step": 3569 - }, - { - "epoch": 0.2333180837853735, - "grad_norm": 0.5125598311424255, - "learning_rate": 9.914551943886397e-06, - "loss": 0.4395, - "step": 3570 - }, - { - "epoch": 0.23338343899091563, - "grad_norm": 0.5070778131484985, - "learning_rate": 9.91448765137248e-06, - "loss": 0.4615, - "step": 3571 - }, - { - "epoch": 0.23344879419645775, - "grad_norm": 0.47507959604263306, - "learning_rate": 9.914423334888901e-06, - "loss": 0.4407, - "step": 3572 - }, - { - "epoch": 0.23351414940199988, - "grad_norm": 0.5215258002281189, - "learning_rate": 9.91435899443598e-06, - "loss": 0.4708, - "step": 3573 - }, - { - "epoch": 0.23357950460754198, - "grad_norm": 0.5039072036743164, - "learning_rate": 9.914294630014027e-06, - "loss": 0.4433, - "step": 3574 - }, - { - "epoch": 0.2336448598130841, - "grad_norm": 0.4702485203742981, - "learning_rate": 9.914230241623356e-06, - "loss": 0.3941, - "step": 3575 - }, - { - "epoch": 0.23371021501862624, - "grad_norm": 0.47556859254837036, - "learning_rate": 9.914165829264281e-06, - "loss": 0.4122, - "step": 3576 - }, - { - "epoch": 0.23377557022416837, - "grad_norm": 0.7105624675750732, - "learning_rate": 9.914101392937119e-06, - "loss": 0.4393, - "step": 3577 - }, - { - "epoch": 0.23384092542971047, - "grad_norm": 0.4980992078781128, - "learning_rate": 9.914036932642181e-06, - "loss": 0.3901, - "step": 3578 - }, - { - "epoch": 0.2339062806352526, - "grad_norm": 0.4689491391181946, - "learning_rate": 9.913972448379783e-06, - "loss": 0.388, - "step": 3579 - }, - { - "epoch": 0.23397163584079472, - "grad_norm": 0.5059848427772522, - "learning_rate": 9.913907940150238e-06, - "loss": 0.4531, - "step": 3580 - }, - { - "epoch": 0.23403699104633685, - "grad_norm": 0.47146686911582947, - "learning_rate": 9.913843407953862e-06, - "loss": 0.3656, - "step": 3581 - }, - { - "epoch": 0.23410234625187895, - "grad_norm": 0.5070616602897644, - "learning_rate": 9.91377885179097e-06, - "loss": 0.4375, - "step": 3582 - }, - { - "epoch": 0.23416770145742108, - "grad_norm": 0.5187751054763794, - "learning_rate": 9.913714271661875e-06, - "loss": 0.4443, - "step": 3583 - }, - { - "epoch": 0.2342330566629632, - "grad_norm": 0.4969579875469208, - "learning_rate": 9.913649667566893e-06, - "loss": 0.4393, - "step": 3584 - }, - { - "epoch": 0.23429841186850534, - "grad_norm": 0.45504826307296753, - "learning_rate": 9.913585039506342e-06, - "loss": 0.39, - "step": 3585 - }, - { - "epoch": 0.23436376707404744, - "grad_norm": 0.5259515643119812, - "learning_rate": 9.913520387480533e-06, - "loss": 0.4972, - "step": 3586 - }, - { - "epoch": 0.23442912227958956, - "grad_norm": 0.49079811573028564, - "learning_rate": 9.913455711489782e-06, - "loss": 0.407, - "step": 3587 - }, - { - "epoch": 0.2344944774851317, - "grad_norm": 0.4935847818851471, - "learning_rate": 9.913391011534406e-06, - "loss": 0.4423, - "step": 3588 - }, - { - "epoch": 0.23455983269067382, - "grad_norm": 0.45255184173583984, - "learning_rate": 9.91332628761472e-06, - "loss": 0.4047, - "step": 3589 - }, - { - "epoch": 0.23462518789621592, - "grad_norm": 0.5370975732803345, - "learning_rate": 9.91326153973104e-06, - "loss": 0.3959, - "step": 3590 - }, - { - "epoch": 0.23469054310175805, - "grad_norm": 0.534020721912384, - "learning_rate": 9.91319676788368e-06, - "loss": 0.4743, - "step": 3591 - }, - { - "epoch": 0.23475589830730018, - "grad_norm": 0.5184808969497681, - "learning_rate": 9.913131972072959e-06, - "loss": 0.4356, - "step": 3592 - }, - { - "epoch": 0.2348212535128423, - "grad_norm": 0.45330944657325745, - "learning_rate": 9.91306715229919e-06, - "loss": 0.3782, - "step": 3593 - }, - { - "epoch": 0.23488660871838443, - "grad_norm": 0.5136331915855408, - "learning_rate": 9.91300230856269e-06, - "loss": 0.4294, - "step": 3594 - }, - { - "epoch": 0.23495196392392653, - "grad_norm": 0.4611961543560028, - "learning_rate": 9.912937440863777e-06, - "loss": 0.3651, - "step": 3595 - }, - { - "epoch": 0.23501731912946866, - "grad_norm": 1.2540053129196167, - "learning_rate": 9.912872549202766e-06, - "loss": 0.4077, - "step": 3596 - }, - { - "epoch": 0.2350826743350108, - "grad_norm": 0.4653492867946625, - "learning_rate": 9.912807633579972e-06, - "loss": 0.3764, - "step": 3597 - }, - { - "epoch": 0.23514802954055292, - "grad_norm": 0.5431433320045471, - "learning_rate": 9.912742693995716e-06, - "loss": 0.4674, - "step": 3598 - }, - { - "epoch": 0.23521338474609502, - "grad_norm": 0.4963741898536682, - "learning_rate": 9.91267773045031e-06, - "loss": 0.4226, - "step": 3599 - }, - { - "epoch": 0.23527873995163714, - "grad_norm": 0.5268748998641968, - "learning_rate": 9.912612742944072e-06, - "loss": 0.4736, - "step": 3600 - }, - { - "epoch": 0.23534409515717927, - "grad_norm": 0.4762163758277893, - "learning_rate": 9.912547731477322e-06, - "loss": 0.4385, - "step": 3601 - }, - { - "epoch": 0.2354094503627214, - "grad_norm": 0.471513569355011, - "learning_rate": 9.912482696050374e-06, - "loss": 0.3856, - "step": 3602 - }, - { - "epoch": 0.2354748055682635, - "grad_norm": 0.5295483469963074, - "learning_rate": 9.912417636663545e-06, - "loss": 0.4437, - "step": 3603 - }, - { - "epoch": 0.23554016077380563, - "grad_norm": 0.5125187039375305, - "learning_rate": 9.912352553317155e-06, - "loss": 0.392, - "step": 3604 - }, - { - "epoch": 0.23560551597934776, - "grad_norm": 0.5781072378158569, - "learning_rate": 9.912287446011518e-06, - "loss": 0.3485, - "step": 3605 - }, - { - "epoch": 0.23567087118488989, - "grad_norm": 0.49361076951026917, - "learning_rate": 9.912222314746955e-06, - "loss": 0.4151, - "step": 3606 - }, - { - "epoch": 0.23573622639043199, - "grad_norm": 0.5029470324516296, - "learning_rate": 9.91215715952378e-06, - "loss": 0.4423, - "step": 3607 - }, - { - "epoch": 0.2358015815959741, - "grad_norm": 0.4984515309333801, - "learning_rate": 9.912091980342316e-06, - "loss": 0.4242, - "step": 3608 - }, - { - "epoch": 0.23586693680151624, - "grad_norm": 0.4875701069831848, - "learning_rate": 9.912026777202874e-06, - "loss": 0.3597, - "step": 3609 - }, - { - "epoch": 0.23593229200705837, - "grad_norm": 0.5226539969444275, - "learning_rate": 9.911961550105779e-06, - "loss": 0.4043, - "step": 3610 - }, - { - "epoch": 0.23599764721260047, - "grad_norm": 0.4634535014629364, - "learning_rate": 9.911896299051345e-06, - "loss": 0.3585, - "step": 3611 - }, - { - "epoch": 0.2360630024181426, - "grad_norm": 0.5316253900527954, - "learning_rate": 9.911831024039888e-06, - "loss": 0.4959, - "step": 3612 - }, - { - "epoch": 0.23612835762368473, - "grad_norm": 0.4336017966270447, - "learning_rate": 9.911765725071734e-06, - "loss": 0.3652, - "step": 3613 - }, - { - "epoch": 0.23619371282922685, - "grad_norm": 0.45477989315986633, - "learning_rate": 9.911700402147195e-06, - "loss": 0.4172, - "step": 3614 - }, - { - "epoch": 0.23625906803476898, - "grad_norm": 0.5167801976203918, - "learning_rate": 9.91163505526659e-06, - "loss": 0.4651, - "step": 3615 - }, - { - "epoch": 0.23632442324031108, - "grad_norm": 0.5102893710136414, - "learning_rate": 9.911569684430242e-06, - "loss": 0.4382, - "step": 3616 - }, - { - "epoch": 0.2363897784458532, - "grad_norm": 0.4945951998233795, - "learning_rate": 9.911504289638465e-06, - "loss": 0.4434, - "step": 3617 - }, - { - "epoch": 0.23645513365139534, - "grad_norm": 0.498888224363327, - "learning_rate": 9.91143887089158e-06, - "loss": 0.39, - "step": 3618 - }, - { - "epoch": 0.23652048885693747, - "grad_norm": 0.4623052477836609, - "learning_rate": 9.911373428189908e-06, - "loss": 0.4011, - "step": 3619 - }, - { - "epoch": 0.23658584406247957, - "grad_norm": 0.5997217297554016, - "learning_rate": 9.911307961533765e-06, - "loss": 0.447, - "step": 3620 - }, - { - "epoch": 0.2366511992680217, - "grad_norm": 0.49396345019340515, - "learning_rate": 9.911242470923472e-06, - "loss": 0.4589, - "step": 3621 - }, - { - "epoch": 0.23671655447356382, - "grad_norm": 0.5173277258872986, - "learning_rate": 9.91117695635935e-06, - "loss": 0.4422, - "step": 3622 - }, - { - "epoch": 0.23678190967910595, - "grad_norm": 0.502960205078125, - "learning_rate": 9.911111417841715e-06, - "loss": 0.4131, - "step": 3623 - }, - { - "epoch": 0.23684726488464805, - "grad_norm": 0.4695163071155548, - "learning_rate": 9.911045855370887e-06, - "loss": 0.3577, - "step": 3624 - }, - { - "epoch": 0.23691262009019018, - "grad_norm": 0.47594931721687317, - "learning_rate": 9.910980268947188e-06, - "loss": 0.3931, - "step": 3625 - }, - { - "epoch": 0.2369779752957323, - "grad_norm": 0.5421062707901001, - "learning_rate": 9.910914658570936e-06, - "loss": 0.4604, - "step": 3626 - }, - { - "epoch": 0.23704333050127444, - "grad_norm": 0.5341295003890991, - "learning_rate": 9.910849024242453e-06, - "loss": 0.4852, - "step": 3627 - }, - { - "epoch": 0.23710868570681654, - "grad_norm": 0.5298961400985718, - "learning_rate": 9.910783365962057e-06, - "loss": 0.4514, - "step": 3628 - }, - { - "epoch": 0.23717404091235866, - "grad_norm": 0.47505030035972595, - "learning_rate": 9.910717683730072e-06, - "loss": 0.4018, - "step": 3629 - }, - { - "epoch": 0.2372393961179008, - "grad_norm": 0.5131982564926147, - "learning_rate": 9.910651977546812e-06, - "loss": 0.4161, - "step": 3630 - }, - { - "epoch": 0.23730475132344292, - "grad_norm": 0.48219045996665955, - "learning_rate": 9.910586247412604e-06, - "loss": 0.3945, - "step": 3631 - }, - { - "epoch": 0.23737010652898502, - "grad_norm": 0.5161522626876831, - "learning_rate": 9.910520493327764e-06, - "loss": 0.3998, - "step": 3632 - }, - { - "epoch": 0.23743546173452715, - "grad_norm": 0.5477389097213745, - "learning_rate": 9.910454715292614e-06, - "loss": 0.4687, - "step": 3633 - }, - { - "epoch": 0.23750081694006928, - "grad_norm": 0.48694872856140137, - "learning_rate": 9.910388913307476e-06, - "loss": 0.4175, - "step": 3634 - }, - { - "epoch": 0.2375661721456114, - "grad_norm": 0.4663483798503876, - "learning_rate": 9.91032308737267e-06, - "loss": 0.3902, - "step": 3635 - }, - { - "epoch": 0.23763152735115353, - "grad_norm": 0.5143205523490906, - "learning_rate": 9.910257237488519e-06, - "loss": 0.377, - "step": 3636 - }, - { - "epoch": 0.23769688255669563, - "grad_norm": 0.5119413137435913, - "learning_rate": 9.91019136365534e-06, - "loss": 0.4236, - "step": 3637 - }, - { - "epoch": 0.23776223776223776, - "grad_norm": 0.43303024768829346, - "learning_rate": 9.910125465873458e-06, - "loss": 0.3411, - "step": 3638 - }, - { - "epoch": 0.2378275929677799, - "grad_norm": 0.48212507367134094, - "learning_rate": 9.910059544143193e-06, - "loss": 0.4249, - "step": 3639 - }, - { - "epoch": 0.23789294817332202, - "grad_norm": 0.5053635835647583, - "learning_rate": 9.909993598464865e-06, - "loss": 0.4061, - "step": 3640 - }, - { - "epoch": 0.23795830337886412, - "grad_norm": 0.48906877636909485, - "learning_rate": 9.909927628838799e-06, - "loss": 0.4569, - "step": 3641 - }, - { - "epoch": 0.23802365858440624, - "grad_norm": 0.48252391815185547, - "learning_rate": 9.909861635265315e-06, - "loss": 0.3862, - "step": 3642 - }, - { - "epoch": 0.23808901378994837, - "grad_norm": 0.5164976119995117, - "learning_rate": 9.909795617744733e-06, - "loss": 0.4697, - "step": 3643 - }, - { - "epoch": 0.2381543689954905, - "grad_norm": 0.4901650547981262, - "learning_rate": 9.909729576277379e-06, - "loss": 0.4316, - "step": 3644 - }, - { - "epoch": 0.2382197242010326, - "grad_norm": 0.4517502188682556, - "learning_rate": 9.909663510863571e-06, - "loss": 0.378, - "step": 3645 - }, - { - "epoch": 0.23828507940657473, - "grad_norm": 0.4997141361236572, - "learning_rate": 9.909597421503635e-06, - "loss": 0.432, - "step": 3646 - }, - { - "epoch": 0.23835043461211686, - "grad_norm": 0.5163469910621643, - "learning_rate": 9.90953130819789e-06, - "loss": 0.3907, - "step": 3647 - }, - { - "epoch": 0.23841578981765899, - "grad_norm": 0.5234763026237488, - "learning_rate": 9.909465170946661e-06, - "loss": 0.424, - "step": 3648 - }, - { - "epoch": 0.23848114502320109, - "grad_norm": 0.5328369736671448, - "learning_rate": 9.909399009750268e-06, - "loss": 0.4601, - "step": 3649 - }, - { - "epoch": 0.2385465002287432, - "grad_norm": 0.45241212844848633, - "learning_rate": 9.909332824609037e-06, - "loss": 0.3821, - "step": 3650 - }, - { - "epoch": 0.23861185543428534, - "grad_norm": 0.5118778944015503, - "learning_rate": 9.90926661552329e-06, - "loss": 0.4394, - "step": 3651 - }, - { - "epoch": 0.23867721063982747, - "grad_norm": 0.5193794369697571, - "learning_rate": 9.909200382493347e-06, - "loss": 0.4087, - "step": 3652 - }, - { - "epoch": 0.23874256584536957, - "grad_norm": 0.4448736310005188, - "learning_rate": 9.909134125519533e-06, - "loss": 0.351, - "step": 3653 - }, - { - "epoch": 0.2388079210509117, - "grad_norm": 0.4771903455257416, - "learning_rate": 9.909067844602172e-06, - "loss": 0.4167, - "step": 3654 - }, - { - "epoch": 0.23887327625645383, - "grad_norm": 0.5226938128471375, - "learning_rate": 9.909001539741587e-06, - "loss": 0.4411, - "step": 3655 - }, - { - "epoch": 0.23893863146199595, - "grad_norm": 0.48490509390830994, - "learning_rate": 9.9089352109381e-06, - "loss": 0.3758, - "step": 3656 - }, - { - "epoch": 0.23900398666753808, - "grad_norm": 0.47723546624183655, - "learning_rate": 9.908868858192036e-06, - "loss": 0.435, - "step": 3657 - }, - { - "epoch": 0.23906934187308018, - "grad_norm": 0.4633159041404724, - "learning_rate": 9.908802481503717e-06, - "loss": 0.3985, - "step": 3658 - }, - { - "epoch": 0.2391346970786223, - "grad_norm": 0.5509350895881653, - "learning_rate": 9.908736080873468e-06, - "loss": 0.4614, - "step": 3659 - }, - { - "epoch": 0.23920005228416444, - "grad_norm": 0.46527114510536194, - "learning_rate": 9.908669656301613e-06, - "loss": 0.3911, - "step": 3660 - }, - { - "epoch": 0.23926540748970657, - "grad_norm": 0.512252688407898, - "learning_rate": 9.908603207788475e-06, - "loss": 0.4333, - "step": 3661 - }, - { - "epoch": 0.23933076269524867, - "grad_norm": 0.4877110421657562, - "learning_rate": 9.908536735334379e-06, - "loss": 0.3963, - "step": 3662 - }, - { - "epoch": 0.2393961179007908, - "grad_norm": 0.4963582754135132, - "learning_rate": 9.908470238939649e-06, - "loss": 0.4148, - "step": 3663 - }, - { - "epoch": 0.23946147310633292, - "grad_norm": 0.4605066180229187, - "learning_rate": 9.908403718604609e-06, - "loss": 0.373, - "step": 3664 - }, - { - "epoch": 0.23952682831187505, - "grad_norm": 0.468755841255188, - "learning_rate": 9.908337174329583e-06, - "loss": 0.375, - "step": 3665 - }, - { - "epoch": 0.23959218351741715, - "grad_norm": 0.5060827732086182, - "learning_rate": 9.908270606114897e-06, - "loss": 0.4497, - "step": 3666 - }, - { - "epoch": 0.23965753872295928, - "grad_norm": 0.44706472754478455, - "learning_rate": 9.908204013960875e-06, - "loss": 0.3538, - "step": 3667 - }, - { - "epoch": 0.2397228939285014, - "grad_norm": 0.4768923819065094, - "learning_rate": 9.90813739786784e-06, - "loss": 0.39, - "step": 3668 - }, - { - "epoch": 0.23978824913404354, - "grad_norm": 0.4842081069946289, - "learning_rate": 9.908070757836121e-06, - "loss": 0.4624, - "step": 3669 - }, - { - "epoch": 0.23985360433958564, - "grad_norm": 0.47158530354499817, - "learning_rate": 9.90800409386604e-06, - "loss": 0.4236, - "step": 3670 - }, - { - "epoch": 0.23991895954512776, - "grad_norm": 0.5272809267044067, - "learning_rate": 9.907937405957921e-06, - "loss": 0.3945, - "step": 3671 - }, - { - "epoch": 0.2399843147506699, - "grad_norm": 0.4997832775115967, - "learning_rate": 9.907870694112092e-06, - "loss": 0.4343, - "step": 3672 - }, - { - "epoch": 0.24004966995621202, - "grad_norm": 0.4826876223087311, - "learning_rate": 9.907803958328879e-06, - "loss": 0.4162, - "step": 3673 - }, - { - "epoch": 0.24011502516175415, - "grad_norm": 0.5297667384147644, - "learning_rate": 9.907737198608604e-06, - "loss": 0.495, - "step": 3674 - }, - { - "epoch": 0.24018038036729625, - "grad_norm": 0.48894059658050537, - "learning_rate": 9.907670414951596e-06, - "loss": 0.4015, - "step": 3675 - }, - { - "epoch": 0.24024573557283838, - "grad_norm": 0.49508002400398254, - "learning_rate": 9.907603607358178e-06, - "loss": 0.4239, - "step": 3676 - }, - { - "epoch": 0.2403110907783805, - "grad_norm": 0.4823409914970398, - "learning_rate": 9.907536775828677e-06, - "loss": 0.4233, - "step": 3677 - }, - { - "epoch": 0.24037644598392263, - "grad_norm": 0.5320729613304138, - "learning_rate": 9.907469920363418e-06, - "loss": 0.4583, - "step": 3678 - }, - { - "epoch": 0.24044180118946473, - "grad_norm": 0.564666211605072, - "learning_rate": 9.90740304096273e-06, - "loss": 0.4155, - "step": 3679 - }, - { - "epoch": 0.24050715639500686, - "grad_norm": 0.46652790904045105, - "learning_rate": 9.907336137626937e-06, - "loss": 0.3988, - "step": 3680 - }, - { - "epoch": 0.240572511600549, - "grad_norm": 0.47065576910972595, - "learning_rate": 9.907269210356364e-06, - "loss": 0.3879, - "step": 3681 - }, - { - "epoch": 0.24063786680609112, - "grad_norm": 0.4547605514526367, - "learning_rate": 9.90720225915134e-06, - "loss": 0.3587, - "step": 3682 - }, - { - "epoch": 0.24070322201163322, - "grad_norm": 0.523067831993103, - "learning_rate": 9.907135284012191e-06, - "loss": 0.4263, - "step": 3683 - }, - { - "epoch": 0.24076857721717534, - "grad_norm": 0.4699363708496094, - "learning_rate": 9.907068284939244e-06, - "loss": 0.38, - "step": 3684 - }, - { - "epoch": 0.24083393242271747, - "grad_norm": 0.44570061564445496, - "learning_rate": 9.907001261932824e-06, - "loss": 0.3761, - "step": 3685 - }, - { - "epoch": 0.2408992876282596, - "grad_norm": 0.4833367168903351, - "learning_rate": 9.906934214993259e-06, - "loss": 0.3602, - "step": 3686 - }, - { - "epoch": 0.2409646428338017, - "grad_norm": 0.4648684561252594, - "learning_rate": 9.906867144120875e-06, - "loss": 0.3739, - "step": 3687 - }, - { - "epoch": 0.24102999803934383, - "grad_norm": 0.6127973198890686, - "learning_rate": 9.906800049316001e-06, - "loss": 0.396, - "step": 3688 - }, - { - "epoch": 0.24109535324488596, - "grad_norm": 0.4691463112831116, - "learning_rate": 9.906732930578963e-06, - "loss": 0.4036, - "step": 3689 - }, - { - "epoch": 0.24116070845042809, - "grad_norm": 0.4602457582950592, - "learning_rate": 9.906665787910089e-06, - "loss": 0.3991, - "step": 3690 - }, - { - "epoch": 0.24122606365597019, - "grad_norm": 0.486935555934906, - "learning_rate": 9.906598621309706e-06, - "loss": 0.4189, - "step": 3691 - }, - { - "epoch": 0.2412914188615123, - "grad_norm": 0.4824185073375702, - "learning_rate": 9.906531430778142e-06, - "loss": 0.4131, - "step": 3692 - }, - { - "epoch": 0.24135677406705444, - "grad_norm": 0.4648727774620056, - "learning_rate": 9.906464216315724e-06, - "loss": 0.3911, - "step": 3693 - }, - { - "epoch": 0.24142212927259657, - "grad_norm": 0.4880336821079254, - "learning_rate": 9.90639697792278e-06, - "loss": 0.4357, - "step": 3694 - }, - { - "epoch": 0.2414874844781387, - "grad_norm": 0.4606556296348572, - "learning_rate": 9.906329715599639e-06, - "loss": 0.3877, - "step": 3695 - }, - { - "epoch": 0.2415528396836808, - "grad_norm": 0.4391932189464569, - "learning_rate": 9.906262429346627e-06, - "loss": 0.3512, - "step": 3696 - }, - { - "epoch": 0.24161819488922293, - "grad_norm": 0.4812285602092743, - "learning_rate": 9.906195119164074e-06, - "loss": 0.4152, - "step": 3697 - }, - { - "epoch": 0.24168355009476505, - "grad_norm": 0.4891197979450226, - "learning_rate": 9.906127785052308e-06, - "loss": 0.4649, - "step": 3698 - }, - { - "epoch": 0.24174890530030718, - "grad_norm": 0.48923251032829285, - "learning_rate": 9.906060427011657e-06, - "loss": 0.4232, - "step": 3699 - }, - { - "epoch": 0.24181426050584928, - "grad_norm": 0.44078633189201355, - "learning_rate": 9.90599304504245e-06, - "loss": 0.3612, - "step": 3700 - }, - { - "epoch": 0.2418796157113914, - "grad_norm": 0.48194047808647156, - "learning_rate": 9.905925639145015e-06, - "loss": 0.3981, - "step": 3701 - }, - { - "epoch": 0.24194497091693354, - "grad_norm": 0.53200364112854, - "learning_rate": 9.905858209319681e-06, - "loss": 0.4444, - "step": 3702 - }, - { - "epoch": 0.24201032612247567, - "grad_norm": 0.5198573470115662, - "learning_rate": 9.905790755566777e-06, - "loss": 0.485, - "step": 3703 - }, - { - "epoch": 0.24207568132801777, - "grad_norm": 0.4784989655017853, - "learning_rate": 9.905723277886631e-06, - "loss": 0.4177, - "step": 3704 - }, - { - "epoch": 0.2421410365335599, - "grad_norm": 0.49317121505737305, - "learning_rate": 9.905655776279576e-06, - "loss": 0.4136, - "step": 3705 - }, - { - "epoch": 0.24220639173910202, - "grad_norm": 0.5199515223503113, - "learning_rate": 9.905588250745936e-06, - "loss": 0.4396, - "step": 3706 - }, - { - "epoch": 0.24227174694464415, - "grad_norm": 0.5406073927879333, - "learning_rate": 9.905520701286043e-06, - "loss": 0.429, - "step": 3707 - }, - { - "epoch": 0.24233710215018625, - "grad_norm": 0.4321300685405731, - "learning_rate": 9.905453127900227e-06, - "loss": 0.355, - "step": 3708 - }, - { - "epoch": 0.24240245735572838, - "grad_norm": 0.4865424335002899, - "learning_rate": 9.905385530588817e-06, - "loss": 0.3876, - "step": 3709 - }, - { - "epoch": 0.2424678125612705, - "grad_norm": 0.4845200479030609, - "learning_rate": 9.905317909352139e-06, - "loss": 0.4025, - "step": 3710 - }, - { - "epoch": 0.24253316776681264, - "grad_norm": 0.47324949502944946, - "learning_rate": 9.90525026419053e-06, - "loss": 0.3774, - "step": 3711 - }, - { - "epoch": 0.24259852297235474, - "grad_norm": 0.47342386841773987, - "learning_rate": 9.905182595104314e-06, - "loss": 0.4033, - "step": 3712 - }, - { - "epoch": 0.24266387817789686, - "grad_norm": 0.49976396560668945, - "learning_rate": 9.905114902093824e-06, - "loss": 0.4306, - "step": 3713 - }, - { - "epoch": 0.242729233383439, - "grad_norm": 0.4608675241470337, - "learning_rate": 9.905047185159389e-06, - "loss": 0.4018, - "step": 3714 - }, - { - "epoch": 0.24279458858898112, - "grad_norm": 0.4639568626880646, - "learning_rate": 9.90497944430134e-06, - "loss": 0.4036, - "step": 3715 - }, - { - "epoch": 0.24285994379452325, - "grad_norm": 0.4871489107608795, - "learning_rate": 9.904911679520006e-06, - "loss": 0.4131, - "step": 3716 - }, - { - "epoch": 0.24292529900006535, - "grad_norm": 0.4942607581615448, - "learning_rate": 9.90484389081572e-06, - "loss": 0.4076, - "step": 3717 - }, - { - "epoch": 0.24299065420560748, - "grad_norm": 0.5495970845222473, - "learning_rate": 9.90477607818881e-06, - "loss": 0.4217, - "step": 3718 - }, - { - "epoch": 0.2430560094111496, - "grad_norm": 0.4705306887626648, - "learning_rate": 9.904708241639606e-06, - "loss": 0.4024, - "step": 3719 - }, - { - "epoch": 0.24312136461669173, - "grad_norm": 0.44730666279792786, - "learning_rate": 9.904640381168444e-06, - "loss": 0.3443, - "step": 3720 - }, - { - "epoch": 0.24318671982223383, - "grad_norm": 0.47078898549079895, - "learning_rate": 9.90457249677565e-06, - "loss": 0.3724, - "step": 3721 - }, - { - "epoch": 0.24325207502777596, - "grad_norm": 0.4704471230506897, - "learning_rate": 9.904504588461558e-06, - "loss": 0.3869, - "step": 3722 - }, - { - "epoch": 0.2433174302333181, - "grad_norm": 0.46855428814888, - "learning_rate": 9.904436656226497e-06, - "loss": 0.4121, - "step": 3723 - }, - { - "epoch": 0.24338278543886022, - "grad_norm": 0.49998342990875244, - "learning_rate": 9.904368700070802e-06, - "loss": 0.4077, - "step": 3724 - }, - { - "epoch": 0.24344814064440232, - "grad_norm": 0.45472219586372375, - "learning_rate": 9.904300719994798e-06, - "loss": 0.3806, - "step": 3725 - }, - { - "epoch": 0.24351349584994444, - "grad_norm": 0.4763962924480438, - "learning_rate": 9.904232715998822e-06, - "loss": 0.4262, - "step": 3726 - }, - { - "epoch": 0.24357885105548657, - "grad_norm": 0.5250136852264404, - "learning_rate": 9.904164688083204e-06, - "loss": 0.4254, - "step": 3727 - }, - { - "epoch": 0.2436442062610287, - "grad_norm": 0.513821542263031, - "learning_rate": 9.904096636248278e-06, - "loss": 0.4359, - "step": 3728 - }, - { - "epoch": 0.2437095614665708, - "grad_norm": 0.47730183601379395, - "learning_rate": 9.90402856049437e-06, - "loss": 0.3834, - "step": 3729 - }, - { - "epoch": 0.24377491667211293, - "grad_norm": 0.4817773401737213, - "learning_rate": 9.903960460821818e-06, - "loss": 0.4112, - "step": 3730 - }, - { - "epoch": 0.24384027187765506, - "grad_norm": 0.4721023440361023, - "learning_rate": 9.903892337230952e-06, - "loss": 0.3987, - "step": 3731 - }, - { - "epoch": 0.24390562708319719, - "grad_norm": 0.4319049119949341, - "learning_rate": 9.903824189722103e-06, - "loss": 0.3409, - "step": 3732 - }, - { - "epoch": 0.24397098228873929, - "grad_norm": 0.5072712898254395, - "learning_rate": 9.903756018295605e-06, - "loss": 0.386, - "step": 3733 - }, - { - "epoch": 0.2440363374942814, - "grad_norm": 0.45842063426971436, - "learning_rate": 9.903687822951791e-06, - "loss": 0.3715, - "step": 3734 - }, - { - "epoch": 0.24410169269982354, - "grad_norm": 0.5203177332878113, - "learning_rate": 9.903619603690991e-06, - "loss": 0.4744, - "step": 3735 - }, - { - "epoch": 0.24416704790536567, - "grad_norm": 0.46919986605644226, - "learning_rate": 9.903551360513542e-06, - "loss": 0.4347, - "step": 3736 - }, - { - "epoch": 0.2442324031109078, - "grad_norm": 0.4473360776901245, - "learning_rate": 9.903483093419773e-06, - "loss": 0.358, - "step": 3737 - }, - { - "epoch": 0.2442977583164499, - "grad_norm": 0.4448186755180359, - "learning_rate": 9.903414802410016e-06, - "loss": 0.3769, - "step": 3738 - }, - { - "epoch": 0.24436311352199203, - "grad_norm": 0.4455445110797882, - "learning_rate": 9.903346487484608e-06, - "loss": 0.351, - "step": 3739 - }, - { - "epoch": 0.24442846872753415, - "grad_norm": 0.49022209644317627, - "learning_rate": 9.903278148643883e-06, - "loss": 0.4276, - "step": 3740 - }, - { - "epoch": 0.24449382393307628, - "grad_norm": 0.49154970049858093, - "learning_rate": 9.903209785888168e-06, - "loss": 0.4033, - "step": 3741 - }, - { - "epoch": 0.24455917913861838, - "grad_norm": 0.5160120129585266, - "learning_rate": 9.903141399217801e-06, - "loss": 0.4124, - "step": 3742 - }, - { - "epoch": 0.2446245343441605, - "grad_norm": 0.49152106046676636, - "learning_rate": 9.903072988633117e-06, - "loss": 0.3852, - "step": 3743 - }, - { - "epoch": 0.24468988954970264, - "grad_norm": 0.5074083209037781, - "learning_rate": 9.903004554134445e-06, - "loss": 0.431, - "step": 3744 - }, - { - "epoch": 0.24475524475524477, - "grad_norm": 0.4891955554485321, - "learning_rate": 9.902936095722123e-06, - "loss": 0.3834, - "step": 3745 - }, - { - "epoch": 0.24482059996078687, - "grad_norm": 0.4742303788661957, - "learning_rate": 9.902867613396482e-06, - "loss": 0.3664, - "step": 3746 - }, - { - "epoch": 0.244885955166329, - "grad_norm": 0.5135862827301025, - "learning_rate": 9.902799107157857e-06, - "loss": 0.4371, - "step": 3747 - }, - { - "epoch": 0.24495131037187112, - "grad_norm": 0.462126225233078, - "learning_rate": 9.902730577006583e-06, - "loss": 0.3749, - "step": 3748 - }, - { - "epoch": 0.24501666557741325, - "grad_norm": 0.5256122946739197, - "learning_rate": 9.902662022942994e-06, - "loss": 0.4601, - "step": 3749 - }, - { - "epoch": 0.24508202078295535, - "grad_norm": 0.4707671105861664, - "learning_rate": 9.902593444967424e-06, - "loss": 0.3879, - "step": 3750 - }, - { - "epoch": 0.24514737598849748, - "grad_norm": 0.4596821665763855, - "learning_rate": 9.902524843080206e-06, - "loss": 0.4088, - "step": 3751 - }, - { - "epoch": 0.2452127311940396, - "grad_norm": 0.4927043616771698, - "learning_rate": 9.902456217281674e-06, - "loss": 0.4366, - "step": 3752 - }, - { - "epoch": 0.24527808639958174, - "grad_norm": 0.48119106888771057, - "learning_rate": 9.90238756757217e-06, - "loss": 0.4108, - "step": 3753 - }, - { - "epoch": 0.24534344160512384, - "grad_norm": 0.48688697814941406, - "learning_rate": 9.90231889395202e-06, - "loss": 0.4673, - "step": 3754 - }, - { - "epoch": 0.24540879681066596, - "grad_norm": 0.4701494872570038, - "learning_rate": 9.902250196421562e-06, - "loss": 0.3807, - "step": 3755 - }, - { - "epoch": 0.2454741520162081, - "grad_norm": 0.44716522097587585, - "learning_rate": 9.902181474981133e-06, - "loss": 0.3892, - "step": 3756 - }, - { - "epoch": 0.24553950722175022, - "grad_norm": 0.4895716607570648, - "learning_rate": 9.902112729631066e-06, - "loss": 0.4221, - "step": 3757 - }, - { - "epoch": 0.24560486242729235, - "grad_norm": 0.49176111817359924, - "learning_rate": 9.902043960371697e-06, - "loss": 0.4293, - "step": 3758 - }, - { - "epoch": 0.24567021763283445, - "grad_norm": 0.4756181240081787, - "learning_rate": 9.901975167203361e-06, - "loss": 0.3844, - "step": 3759 - }, - { - "epoch": 0.24573557283837658, - "grad_norm": 0.48180314898490906, - "learning_rate": 9.901906350126395e-06, - "loss": 0.4009, - "step": 3760 - }, - { - "epoch": 0.2458009280439187, - "grad_norm": 0.48072636127471924, - "learning_rate": 9.901837509141132e-06, - "loss": 0.394, - "step": 3761 - }, - { - "epoch": 0.24586628324946083, - "grad_norm": 0.4648549258708954, - "learning_rate": 9.901768644247911e-06, - "loss": 0.3976, - "step": 3762 - }, - { - "epoch": 0.24593163845500293, - "grad_norm": 0.418720006942749, - "learning_rate": 9.901699755447065e-06, - "loss": 0.333, - "step": 3763 - }, - { - "epoch": 0.24599699366054506, - "grad_norm": 0.4856666624546051, - "learning_rate": 9.901630842738931e-06, - "loss": 0.4291, - "step": 3764 - }, - { - "epoch": 0.2460623488660872, - "grad_norm": 0.47633805871009827, - "learning_rate": 9.901561906123846e-06, - "loss": 0.4316, - "step": 3765 - }, - { - "epoch": 0.24612770407162932, - "grad_norm": 0.5254993438720703, - "learning_rate": 9.901492945602147e-06, - "loss": 0.4471, - "step": 3766 - }, - { - "epoch": 0.24619305927717142, - "grad_norm": 0.48775529861450195, - "learning_rate": 9.901423961174167e-06, - "loss": 0.3877, - "step": 3767 - }, - { - "epoch": 0.24625841448271354, - "grad_norm": 0.5022530555725098, - "learning_rate": 9.901354952840245e-06, - "loss": 0.4226, - "step": 3768 - }, - { - "epoch": 0.24632376968825567, - "grad_norm": 0.4446774125099182, - "learning_rate": 9.901285920600717e-06, - "loss": 0.3532, - "step": 3769 - }, - { - "epoch": 0.2463891248937978, - "grad_norm": 0.4788687527179718, - "learning_rate": 9.901216864455918e-06, - "loss": 0.3943, - "step": 3770 - }, - { - "epoch": 0.2464544800993399, - "grad_norm": 0.5055045485496521, - "learning_rate": 9.901147784406188e-06, - "loss": 0.4492, - "step": 3771 - }, - { - "epoch": 0.24651983530488203, - "grad_norm": 0.4874025881290436, - "learning_rate": 9.90107868045186e-06, - "loss": 0.409, - "step": 3772 - }, - { - "epoch": 0.24658519051042416, - "grad_norm": 0.5308887958526611, - "learning_rate": 9.901009552593277e-06, - "loss": 0.4231, - "step": 3773 - }, - { - "epoch": 0.24665054571596629, - "grad_norm": 0.48197951912879944, - "learning_rate": 9.900940400830771e-06, - "loss": 0.4134, - "step": 3774 - }, - { - "epoch": 0.24671590092150839, - "grad_norm": 0.5125496983528137, - "learning_rate": 9.90087122516468e-06, - "loss": 0.3976, - "step": 3775 - }, - { - "epoch": 0.2467812561270505, - "grad_norm": 0.4560488164424896, - "learning_rate": 9.900802025595342e-06, - "loss": 0.3593, - "step": 3776 - }, - { - "epoch": 0.24684661133259264, - "grad_norm": 0.4720732867717743, - "learning_rate": 9.900732802123097e-06, - "loss": 0.4183, - "step": 3777 - }, - { - "epoch": 0.24691196653813477, - "grad_norm": 0.5029764771461487, - "learning_rate": 9.900663554748278e-06, - "loss": 0.4845, - "step": 3778 - }, - { - "epoch": 0.2469773217436769, - "grad_norm": 0.46738043427467346, - "learning_rate": 9.900594283471226e-06, - "loss": 0.4029, - "step": 3779 - }, - { - "epoch": 0.247042676949219, - "grad_norm": 0.47529447078704834, - "learning_rate": 9.900524988292278e-06, - "loss": 0.395, - "step": 3780 - }, - { - "epoch": 0.24710803215476113, - "grad_norm": 0.49335628747940063, - "learning_rate": 9.900455669211773e-06, - "loss": 0.4791, - "step": 3781 - }, - { - "epoch": 0.24717338736030325, - "grad_norm": 0.4847749173641205, - "learning_rate": 9.900386326230046e-06, - "loss": 0.377, - "step": 3782 - }, - { - "epoch": 0.24723874256584538, - "grad_norm": 0.48539313673973083, - "learning_rate": 9.900316959347439e-06, - "loss": 0.4176, - "step": 3783 - }, - { - "epoch": 0.24730409777138748, - "grad_norm": 0.47560250759124756, - "learning_rate": 9.900247568564287e-06, - "loss": 0.4224, - "step": 3784 - }, - { - "epoch": 0.2473694529769296, - "grad_norm": 0.5065321922302246, - "learning_rate": 9.90017815388093e-06, - "loss": 0.4549, - "step": 3785 - }, - { - "epoch": 0.24743480818247174, - "grad_norm": 0.41895002126693726, - "learning_rate": 9.900108715297707e-06, - "loss": 0.3222, - "step": 3786 - }, - { - "epoch": 0.24750016338801387, - "grad_norm": 0.4707399308681488, - "learning_rate": 9.900039252814957e-06, - "loss": 0.4061, - "step": 3787 - }, - { - "epoch": 0.24756551859355597, - "grad_norm": 0.4642290771007538, - "learning_rate": 9.899969766433018e-06, - "loss": 0.3804, - "step": 3788 - }, - { - "epoch": 0.2476308737990981, - "grad_norm": 0.4792925715446472, - "learning_rate": 9.899900256152228e-06, - "loss": 0.36, - "step": 3789 - }, - { - "epoch": 0.24769622900464022, - "grad_norm": 0.48355647921562195, - "learning_rate": 9.899830721972927e-06, - "loss": 0.4061, - "step": 3790 - }, - { - "epoch": 0.24776158421018235, - "grad_norm": 0.479915976524353, - "learning_rate": 9.899761163895454e-06, - "loss": 0.3863, - "step": 3791 - }, - { - "epoch": 0.24782693941572445, - "grad_norm": 0.4555598199367523, - "learning_rate": 9.89969158192015e-06, - "loss": 0.4038, - "step": 3792 - }, - { - "epoch": 0.24789229462126658, - "grad_norm": 0.5053451061248779, - "learning_rate": 9.899621976047351e-06, - "loss": 0.423, - "step": 3793 - }, - { - "epoch": 0.2479576498268087, - "grad_norm": 0.503421425819397, - "learning_rate": 9.899552346277399e-06, - "loss": 0.4331, - "step": 3794 - }, - { - "epoch": 0.24802300503235084, - "grad_norm": 0.48323407769203186, - "learning_rate": 9.899482692610633e-06, - "loss": 0.4149, - "step": 3795 - }, - { - "epoch": 0.24808836023789294, - "grad_norm": 0.43359190225601196, - "learning_rate": 9.899413015047392e-06, - "loss": 0.3766, - "step": 3796 - }, - { - "epoch": 0.24815371544343506, - "grad_norm": 0.4996505081653595, - "learning_rate": 9.899343313588017e-06, - "loss": 0.3819, - "step": 3797 - }, - { - "epoch": 0.2482190706489772, - "grad_norm": 0.533610463142395, - "learning_rate": 9.899273588232847e-06, - "loss": 0.4787, - "step": 3798 - }, - { - "epoch": 0.24828442585451932, - "grad_norm": 0.5073145031929016, - "learning_rate": 9.899203838982221e-06, - "loss": 0.4339, - "step": 3799 - }, - { - "epoch": 0.24834978106006145, - "grad_norm": 0.4526595175266266, - "learning_rate": 9.899134065836482e-06, - "loss": 0.4048, - "step": 3800 - }, - { - "epoch": 0.24841513626560355, - "grad_norm": 0.4555380344390869, - "learning_rate": 9.89906426879597e-06, - "loss": 0.3982, - "step": 3801 - }, - { - "epoch": 0.24848049147114568, - "grad_norm": 0.47561123967170715, - "learning_rate": 9.898994447861024e-06, - "loss": 0.4033, - "step": 3802 - }, - { - "epoch": 0.2485458466766878, - "grad_norm": 0.49485746026039124, - "learning_rate": 9.898924603031983e-06, - "loss": 0.4475, - "step": 3803 - }, - { - "epoch": 0.24861120188222993, - "grad_norm": 0.44991278648376465, - "learning_rate": 9.898854734309191e-06, - "loss": 0.3678, - "step": 3804 - }, - { - "epoch": 0.24867655708777203, - "grad_norm": 0.45418450236320496, - "learning_rate": 9.898784841692988e-06, - "loss": 0.3994, - "step": 3805 - }, - { - "epoch": 0.24874191229331416, - "grad_norm": 0.50419682264328, - "learning_rate": 9.898714925183713e-06, - "loss": 0.4467, - "step": 3806 - }, - { - "epoch": 0.2488072674988563, - "grad_norm": 0.4924890995025635, - "learning_rate": 9.898644984781708e-06, - "loss": 0.4426, - "step": 3807 - }, - { - "epoch": 0.24887262270439842, - "grad_norm": 0.47436606884002686, - "learning_rate": 9.898575020487315e-06, - "loss": 0.4162, - "step": 3808 - }, - { - "epoch": 0.24893797790994052, - "grad_norm": 0.46907737851142883, - "learning_rate": 9.898505032300875e-06, - "loss": 0.4353, - "step": 3809 - }, - { - "epoch": 0.24900333311548264, - "grad_norm": 0.47675079107284546, - "learning_rate": 9.898435020222728e-06, - "loss": 0.3905, - "step": 3810 - }, - { - "epoch": 0.24906868832102477, - "grad_norm": 0.44763273000717163, - "learning_rate": 9.898364984253216e-06, - "loss": 0.378, - "step": 3811 - }, - { - "epoch": 0.2491340435265669, - "grad_norm": 0.447813481092453, - "learning_rate": 9.898294924392683e-06, - "loss": 0.366, - "step": 3812 - }, - { - "epoch": 0.249199398732109, - "grad_norm": 0.49202960729599, - "learning_rate": 9.898224840641469e-06, - "loss": 0.4146, - "step": 3813 - }, - { - "epoch": 0.24926475393765113, - "grad_norm": 0.4937524199485779, - "learning_rate": 9.898154732999912e-06, - "loss": 0.4352, - "step": 3814 - }, - { - "epoch": 0.24933010914319326, - "grad_norm": 0.4539417028427124, - "learning_rate": 9.89808460146836e-06, - "loss": 0.3948, - "step": 3815 - }, - { - "epoch": 0.24939546434873539, - "grad_norm": 0.48760756850242615, - "learning_rate": 9.898014446047153e-06, - "loss": 0.3868, - "step": 3816 - }, - { - "epoch": 0.24946081955427749, - "grad_norm": 0.5057722926139832, - "learning_rate": 9.897944266736632e-06, - "loss": 0.4052, - "step": 3817 - }, - { - "epoch": 0.2495261747598196, - "grad_norm": 0.4397360384464264, - "learning_rate": 9.89787406353714e-06, - "loss": 0.3392, - "step": 3818 - }, - { - "epoch": 0.24959152996536174, - "grad_norm": 0.4437639117240906, - "learning_rate": 9.897803836449018e-06, - "loss": 0.3629, - "step": 3819 - }, - { - "epoch": 0.24965688517090387, - "grad_norm": 0.4564015567302704, - "learning_rate": 9.897733585472612e-06, - "loss": 0.4098, - "step": 3820 - }, - { - "epoch": 0.249722240376446, - "grad_norm": 0.48790571093559265, - "learning_rate": 9.897663310608261e-06, - "loss": 0.3884, - "step": 3821 - }, - { - "epoch": 0.2497875955819881, - "grad_norm": 0.5272866487503052, - "learning_rate": 9.89759301185631e-06, - "loss": 0.4879, - "step": 3822 - }, - { - "epoch": 0.24985295078753023, - "grad_norm": 0.48300930857658386, - "learning_rate": 9.897522689217102e-06, - "loss": 0.4706, - "step": 3823 - }, - { - "epoch": 0.24991830599307235, - "grad_norm": 0.48800382018089294, - "learning_rate": 9.897452342690979e-06, - "loss": 0.4633, - "step": 3824 - }, - { - "epoch": 0.24998366119861448, - "grad_norm": 0.48852139711380005, - "learning_rate": 9.897381972278284e-06, - "loss": 0.4146, - "step": 3825 - }, - { - "epoch": 0.2500490164041566, - "grad_norm": 0.5131561160087585, - "learning_rate": 9.89731157797936e-06, - "loss": 0.4364, - "step": 3826 - }, - { - "epoch": 0.25011437160969874, - "grad_norm": 0.45051053166389465, - "learning_rate": 9.897241159794552e-06, - "loss": 0.3748, - "step": 3827 - }, - { - "epoch": 0.2501797268152408, - "grad_norm": 0.5114407539367676, - "learning_rate": 9.8971707177242e-06, - "loss": 0.4334, - "step": 3828 - }, - { - "epoch": 0.25024508202078294, - "grad_norm": 0.44549310207366943, - "learning_rate": 9.897100251768652e-06, - "loss": 0.3661, - "step": 3829 - }, - { - "epoch": 0.25031043722632507, - "grad_norm": 0.49115490913391113, - "learning_rate": 9.89702976192825e-06, - "loss": 0.3887, - "step": 3830 - }, - { - "epoch": 0.2503757924318672, - "grad_norm": 0.491936057806015, - "learning_rate": 9.896959248203335e-06, - "loss": 0.4038, - "step": 3831 - }, - { - "epoch": 0.2504411476374093, - "grad_norm": 0.5014188885688782, - "learning_rate": 9.896888710594255e-06, - "loss": 0.4619, - "step": 3832 - }, - { - "epoch": 0.25050650284295145, - "grad_norm": 0.4646052122116089, - "learning_rate": 9.896818149101352e-06, - "loss": 0.4399, - "step": 3833 - }, - { - "epoch": 0.2505718580484936, - "grad_norm": 0.45592236518859863, - "learning_rate": 9.89674756372497e-06, - "loss": 0.3611, - "step": 3834 - }, - { - "epoch": 0.2506372132540357, - "grad_norm": 0.47756922245025635, - "learning_rate": 9.896676954465454e-06, - "loss": 0.4089, - "step": 3835 - }, - { - "epoch": 0.2507025684595778, - "grad_norm": 0.49206778407096863, - "learning_rate": 9.896606321323147e-06, - "loss": 0.4206, - "step": 3836 - }, - { - "epoch": 0.2507679236651199, - "grad_norm": 0.48088929057121277, - "learning_rate": 9.896535664298396e-06, - "loss": 0.4064, - "step": 3837 - }, - { - "epoch": 0.25083327887066204, - "grad_norm": 0.4337596297264099, - "learning_rate": 9.896464983391544e-06, - "loss": 0.3596, - "step": 3838 - }, - { - "epoch": 0.25089863407620416, - "grad_norm": 0.45142289996147156, - "learning_rate": 9.896394278602937e-06, - "loss": 0.384, - "step": 3839 - }, - { - "epoch": 0.2509639892817463, - "grad_norm": 0.4437491297721863, - "learning_rate": 9.896323549932917e-06, - "loss": 0.357, - "step": 3840 - }, - { - "epoch": 0.2510293444872884, - "grad_norm": 0.46476033329963684, - "learning_rate": 9.896252797381832e-06, - "loss": 0.3721, - "step": 3841 - }, - { - "epoch": 0.25109469969283055, - "grad_norm": 0.5244317054748535, - "learning_rate": 9.896182020950026e-06, - "loss": 0.4606, - "step": 3842 - }, - { - "epoch": 0.2511600548983727, - "grad_norm": 0.46809300780296326, - "learning_rate": 9.896111220637843e-06, - "loss": 0.3968, - "step": 3843 - }, - { - "epoch": 0.2512254101039148, - "grad_norm": 0.44542941451072693, - "learning_rate": 9.89604039644563e-06, - "loss": 0.3994, - "step": 3844 - }, - { - "epoch": 0.2512907653094569, - "grad_norm": 0.4796372354030609, - "learning_rate": 9.895969548373731e-06, - "loss": 0.406, - "step": 3845 - }, - { - "epoch": 0.251356120514999, - "grad_norm": 0.4873756170272827, - "learning_rate": 9.895898676422494e-06, - "loss": 0.4229, - "step": 3846 - }, - { - "epoch": 0.25142147572054113, - "grad_norm": 0.483729749917984, - "learning_rate": 9.895827780592262e-06, - "loss": 0.4137, - "step": 3847 - }, - { - "epoch": 0.25148683092608326, - "grad_norm": 0.46675360202789307, - "learning_rate": 9.895756860883383e-06, - "loss": 0.3974, - "step": 3848 - }, - { - "epoch": 0.2515521861316254, - "grad_norm": 0.4536055028438568, - "learning_rate": 9.8956859172962e-06, - "loss": 0.4002, - "step": 3849 - }, - { - "epoch": 0.2516175413371675, - "grad_norm": 0.506420910358429, - "learning_rate": 9.895614949831063e-06, - "loss": 0.4112, - "step": 3850 - }, - { - "epoch": 0.25168289654270964, - "grad_norm": 0.5092169642448425, - "learning_rate": 9.895543958488314e-06, - "loss": 0.431, - "step": 3851 - }, - { - "epoch": 0.2517482517482518, - "grad_norm": 0.48873788118362427, - "learning_rate": 9.895472943268301e-06, - "loss": 0.4629, - "step": 3852 - }, - { - "epoch": 0.25181360695379384, - "grad_norm": 0.4860228896141052, - "learning_rate": 9.89540190417137e-06, - "loss": 0.3962, - "step": 3853 - }, - { - "epoch": 0.251878962159336, - "grad_norm": 0.5054576992988586, - "learning_rate": 9.89533084119787e-06, - "loss": 0.3795, - "step": 3854 - }, - { - "epoch": 0.2519443173648781, - "grad_norm": 0.49959778785705566, - "learning_rate": 9.895259754348145e-06, - "loss": 0.4733, - "step": 3855 - }, - { - "epoch": 0.25200967257042023, - "grad_norm": 0.5031869411468506, - "learning_rate": 9.895188643622542e-06, - "loss": 0.4629, - "step": 3856 - }, - { - "epoch": 0.25207502777596236, - "grad_norm": 0.47100287675857544, - "learning_rate": 9.895117509021408e-06, - "loss": 0.3989, - "step": 3857 - }, - { - "epoch": 0.2521403829815045, - "grad_norm": 0.5107840299606323, - "learning_rate": 9.89504635054509e-06, - "loss": 0.468, - "step": 3858 - }, - { - "epoch": 0.2522057381870466, - "grad_norm": 0.49201256036758423, - "learning_rate": 9.894975168193937e-06, - "loss": 0.4938, - "step": 3859 - }, - { - "epoch": 0.25227109339258874, - "grad_norm": 0.4625264108181, - "learning_rate": 9.894903961968292e-06, - "loss": 0.429, - "step": 3860 - }, - { - "epoch": 0.2523364485981308, - "grad_norm": 0.5019761323928833, - "learning_rate": 9.894832731868504e-06, - "loss": 0.4222, - "step": 3861 - }, - { - "epoch": 0.25240180380367294, - "grad_norm": 0.5039170980453491, - "learning_rate": 9.894761477894924e-06, - "loss": 0.4561, - "step": 3862 - }, - { - "epoch": 0.25246715900921507, - "grad_norm": 0.4441440999507904, - "learning_rate": 9.894690200047894e-06, - "loss": 0.368, - "step": 3863 - }, - { - "epoch": 0.2525325142147572, - "grad_norm": 0.473471999168396, - "learning_rate": 9.894618898327766e-06, - "loss": 0.3622, - "step": 3864 - }, - { - "epoch": 0.2525978694202993, - "grad_norm": 0.47707849740982056, - "learning_rate": 9.894547572734885e-06, - "loss": 0.4149, - "step": 3865 - }, - { - "epoch": 0.25266322462584145, - "grad_norm": 0.45889490842819214, - "learning_rate": 9.894476223269598e-06, - "loss": 0.3483, - "step": 3866 - }, - { - "epoch": 0.2527285798313836, - "grad_norm": 0.4801705777645111, - "learning_rate": 9.894404849932257e-06, - "loss": 0.4214, - "step": 3867 - }, - { - "epoch": 0.2527939350369257, - "grad_norm": 0.46492329239845276, - "learning_rate": 9.894333452723208e-06, - "loss": 0.4121, - "step": 3868 - }, - { - "epoch": 0.25285929024246784, - "grad_norm": 0.4617963135242462, - "learning_rate": 9.8942620316428e-06, - "loss": 0.3818, - "step": 3869 - }, - { - "epoch": 0.2529246454480099, - "grad_norm": 0.4506014585494995, - "learning_rate": 9.89419058669138e-06, - "loss": 0.3736, - "step": 3870 - }, - { - "epoch": 0.25299000065355204, - "grad_norm": 0.4737391769886017, - "learning_rate": 9.894119117869296e-06, - "loss": 0.4185, - "step": 3871 - }, - { - "epoch": 0.25305535585909417, - "grad_norm": 0.4927527606487274, - "learning_rate": 9.894047625176898e-06, - "loss": 0.4224, - "step": 3872 - }, - { - "epoch": 0.2531207110646363, - "grad_norm": 0.4633755087852478, - "learning_rate": 9.893976108614533e-06, - "loss": 0.4166, - "step": 3873 - }, - { - "epoch": 0.2531860662701784, - "grad_norm": 0.4423373341560364, - "learning_rate": 9.893904568182553e-06, - "loss": 0.4053, - "step": 3874 - }, - { - "epoch": 0.25325142147572055, - "grad_norm": 0.4814908504486084, - "learning_rate": 9.893833003881305e-06, - "loss": 0.4643, - "step": 3875 - }, - { - "epoch": 0.2533167766812627, - "grad_norm": 0.4767465591430664, - "learning_rate": 9.893761415711136e-06, - "loss": 0.4093, - "step": 3876 - }, - { - "epoch": 0.2533821318868048, - "grad_norm": 0.4405258297920227, - "learning_rate": 9.893689803672399e-06, - "loss": 0.3618, - "step": 3877 - }, - { - "epoch": 0.2534474870923469, - "grad_norm": 0.4864327907562256, - "learning_rate": 9.893618167765442e-06, - "loss": 0.4206, - "step": 3878 - }, - { - "epoch": 0.253512842297889, - "grad_norm": 0.46083828806877136, - "learning_rate": 9.893546507990612e-06, - "loss": 0.3794, - "step": 3879 - }, - { - "epoch": 0.25357819750343114, - "grad_norm": 0.46849796175956726, - "learning_rate": 9.893474824348261e-06, - "loss": 0.3849, - "step": 3880 - }, - { - "epoch": 0.25364355270897326, - "grad_norm": 0.5232370495796204, - "learning_rate": 9.89340311683874e-06, - "loss": 0.4427, - "step": 3881 - }, - { - "epoch": 0.2537089079145154, - "grad_norm": 0.46081361174583435, - "learning_rate": 9.893331385462394e-06, - "loss": 0.4173, - "step": 3882 - }, - { - "epoch": 0.2537742631200575, - "grad_norm": 0.4474320113658905, - "learning_rate": 9.893259630219579e-06, - "loss": 0.3701, - "step": 3883 - }, - { - "epoch": 0.25383961832559965, - "grad_norm": 0.454286128282547, - "learning_rate": 9.893187851110637e-06, - "loss": 0.3909, - "step": 3884 - }, - { - "epoch": 0.2539049735311418, - "grad_norm": 0.47931092977523804, - "learning_rate": 9.893116048135928e-06, - "loss": 0.4102, - "step": 3885 - }, - { - "epoch": 0.2539703287366839, - "grad_norm": 0.4402013421058655, - "learning_rate": 9.893044221295793e-06, - "loss": 0.3714, - "step": 3886 - }, - { - "epoch": 0.254035683942226, - "grad_norm": 0.44256025552749634, - "learning_rate": 9.892972370590586e-06, - "loss": 0.3918, - "step": 3887 - }, - { - "epoch": 0.2541010391477681, - "grad_norm": 0.48306509852409363, - "learning_rate": 9.892900496020659e-06, - "loss": 0.4127, - "step": 3888 - }, - { - "epoch": 0.25416639435331023, - "grad_norm": 0.4957873523235321, - "learning_rate": 9.892828597586362e-06, - "loss": 0.4352, - "step": 3889 - }, - { - "epoch": 0.25423174955885236, - "grad_norm": 0.4706031084060669, - "learning_rate": 9.892756675288043e-06, - "loss": 0.4346, - "step": 3890 - }, - { - "epoch": 0.2542971047643945, - "grad_norm": 0.4572066068649292, - "learning_rate": 9.892684729126056e-06, - "loss": 0.3892, - "step": 3891 - }, - { - "epoch": 0.2543624599699366, - "grad_norm": 0.5330418348312378, - "learning_rate": 9.89261275910075e-06, - "loss": 0.5388, - "step": 3892 - }, - { - "epoch": 0.25442781517547874, - "grad_norm": 0.49673694372177124, - "learning_rate": 9.892540765212477e-06, - "loss": 0.4226, - "step": 3893 - }, - { - "epoch": 0.2544931703810209, - "grad_norm": 0.46769097447395325, - "learning_rate": 9.892468747461588e-06, - "loss": 0.4398, - "step": 3894 - }, - { - "epoch": 0.25455852558656294, - "grad_norm": 0.46777594089508057, - "learning_rate": 9.892396705848433e-06, - "loss": 0.3878, - "step": 3895 - }, - { - "epoch": 0.2546238807921051, - "grad_norm": 0.46772271394729614, - "learning_rate": 9.892324640373365e-06, - "loss": 0.4045, - "step": 3896 - }, - { - "epoch": 0.2546892359976472, - "grad_norm": 0.4632786512374878, - "learning_rate": 9.892252551036735e-06, - "loss": 0.4049, - "step": 3897 - }, - { - "epoch": 0.25475459120318933, - "grad_norm": 0.4853006899356842, - "learning_rate": 9.892180437838895e-06, - "loss": 0.4005, - "step": 3898 - }, - { - "epoch": 0.25481994640873146, - "grad_norm": 0.46542122960090637, - "learning_rate": 9.892108300780195e-06, - "loss": 0.3882, - "step": 3899 - }, - { - "epoch": 0.2548853016142736, - "grad_norm": 0.4371604025363922, - "learning_rate": 9.892036139860987e-06, - "loss": 0.38, - "step": 3900 - }, - { - "epoch": 0.2549506568198157, - "grad_norm": 0.44974663853645325, - "learning_rate": 9.891963955081627e-06, - "loss": 0.372, - "step": 3901 - }, - { - "epoch": 0.25501601202535784, - "grad_norm": 0.4604519009590149, - "learning_rate": 9.891891746442462e-06, - "loss": 0.3589, - "step": 3902 - }, - { - "epoch": 0.2550813672308999, - "grad_norm": 0.5047156810760498, - "learning_rate": 9.891819513943847e-06, - "loss": 0.3929, - "step": 3903 - }, - { - "epoch": 0.25514672243644204, - "grad_norm": 0.4475330114364624, - "learning_rate": 9.891747257586134e-06, - "loss": 0.3784, - "step": 3904 - }, - { - "epoch": 0.25521207764198417, - "grad_norm": 0.4892031252384186, - "learning_rate": 9.891674977369674e-06, - "loss": 0.4405, - "step": 3905 - }, - { - "epoch": 0.2552774328475263, - "grad_norm": 0.49480122327804565, - "learning_rate": 9.89160267329482e-06, - "loss": 0.4084, - "step": 3906 - }, - { - "epoch": 0.2553427880530684, - "grad_norm": 0.4493216872215271, - "learning_rate": 9.891530345361927e-06, - "loss": 0.3819, - "step": 3907 - }, - { - "epoch": 0.25540814325861055, - "grad_norm": 0.5069324970245361, - "learning_rate": 9.891457993571345e-06, - "loss": 0.4356, - "step": 3908 - }, - { - "epoch": 0.2554734984641527, - "grad_norm": 0.5098019242286682, - "learning_rate": 9.891385617923427e-06, - "loss": 0.3603, - "step": 3909 - }, - { - "epoch": 0.2555388536696948, - "grad_norm": 0.4521212577819824, - "learning_rate": 9.891313218418528e-06, - "loss": 0.3858, - "step": 3910 - }, - { - "epoch": 0.25560420887523694, - "grad_norm": 0.49558156728744507, - "learning_rate": 9.891240795057e-06, - "loss": 0.4393, - "step": 3911 - }, - { - "epoch": 0.255669564080779, - "grad_norm": 0.4960598349571228, - "learning_rate": 9.891168347839194e-06, - "loss": 0.4176, - "step": 3912 - }, - { - "epoch": 0.25573491928632114, - "grad_norm": 0.4858163297176361, - "learning_rate": 9.891095876765468e-06, - "loss": 0.4654, - "step": 3913 - }, - { - "epoch": 0.25580027449186327, - "grad_norm": 0.45587536692619324, - "learning_rate": 9.891023381836171e-06, - "loss": 0.3675, - "step": 3914 - }, - { - "epoch": 0.2558656296974054, - "grad_norm": 0.437977135181427, - "learning_rate": 9.89095086305166e-06, - "loss": 0.3425, - "step": 3915 - }, - { - "epoch": 0.2559309849029475, - "grad_norm": 0.472843199968338, - "learning_rate": 9.890878320412288e-06, - "loss": 0.3945, - "step": 3916 - }, - { - "epoch": 0.25599634010848965, - "grad_norm": 0.4441700279712677, - "learning_rate": 9.890805753918406e-06, - "loss": 0.3089, - "step": 3917 - }, - { - "epoch": 0.2560616953140318, - "grad_norm": 0.44410449266433716, - "learning_rate": 9.890733163570372e-06, - "loss": 0.3577, - "step": 3918 - }, - { - "epoch": 0.2561270505195739, - "grad_norm": 0.4786568880081177, - "learning_rate": 9.890660549368536e-06, - "loss": 0.3797, - "step": 3919 - }, - { - "epoch": 0.256192405725116, - "grad_norm": 0.47811007499694824, - "learning_rate": 9.890587911313255e-06, - "loss": 0.3986, - "step": 3920 - }, - { - "epoch": 0.2562577609306581, - "grad_norm": 0.47644883394241333, - "learning_rate": 9.890515249404883e-06, - "loss": 0.429, - "step": 3921 - }, - { - "epoch": 0.25632311613620024, - "grad_norm": 0.4829001724720001, - "learning_rate": 9.890442563643774e-06, - "loss": 0.4139, - "step": 3922 - }, - { - "epoch": 0.25638847134174236, - "grad_norm": 0.4659916162490845, - "learning_rate": 9.890369854030281e-06, - "loss": 0.4007, - "step": 3923 - }, - { - "epoch": 0.2564538265472845, - "grad_norm": 0.4580618441104889, - "learning_rate": 9.890297120564761e-06, - "loss": 0.4054, - "step": 3924 - }, - { - "epoch": 0.2565191817528266, - "grad_norm": 0.490489661693573, - "learning_rate": 9.890224363247568e-06, - "loss": 0.3949, - "step": 3925 - }, - { - "epoch": 0.25658453695836875, - "grad_norm": 0.4814055263996124, - "learning_rate": 9.890151582079058e-06, - "loss": 0.436, - "step": 3926 - }, - { - "epoch": 0.2566498921639109, - "grad_norm": 0.5186078548431396, - "learning_rate": 9.890078777059581e-06, - "loss": 0.4993, - "step": 3927 - }, - { - "epoch": 0.256715247369453, - "grad_norm": 0.47267815470695496, - "learning_rate": 9.890005948189498e-06, - "loss": 0.3656, - "step": 3928 - }, - { - "epoch": 0.2567806025749951, - "grad_norm": 0.4519590735435486, - "learning_rate": 9.889933095469162e-06, - "loss": 0.3715, - "step": 3929 - }, - { - "epoch": 0.2568459577805372, - "grad_norm": 0.4927625060081482, - "learning_rate": 9.889860218898928e-06, - "loss": 0.4043, - "step": 3930 - }, - { - "epoch": 0.25691131298607933, - "grad_norm": 0.49229928851127625, - "learning_rate": 9.889787318479151e-06, - "loss": 0.3785, - "step": 3931 - }, - { - "epoch": 0.25697666819162146, - "grad_norm": 0.4878690242767334, - "learning_rate": 9.889714394210189e-06, - "loss": 0.4205, - "step": 3932 - }, - { - "epoch": 0.2570420233971636, - "grad_norm": 0.4788166582584381, - "learning_rate": 9.889641446092395e-06, - "loss": 0.4137, - "step": 3933 - }, - { - "epoch": 0.2571073786027057, - "grad_norm": 0.4857322871685028, - "learning_rate": 9.889568474126125e-06, - "loss": 0.444, - "step": 3934 - }, - { - "epoch": 0.25717273380824784, - "grad_norm": 0.500454306602478, - "learning_rate": 9.889495478311737e-06, - "loss": 0.4084, - "step": 3935 - }, - { - "epoch": 0.25723808901379, - "grad_norm": 0.43148401379585266, - "learning_rate": 9.889422458649585e-06, - "loss": 0.3234, - "step": 3936 - }, - { - "epoch": 0.25730344421933204, - "grad_norm": 0.5101863145828247, - "learning_rate": 9.889349415140025e-06, - "loss": 0.4184, - "step": 3937 - }, - { - "epoch": 0.2573687994248742, - "grad_norm": 0.47251182794570923, - "learning_rate": 9.889276347783413e-06, - "loss": 0.3775, - "step": 3938 - }, - { - "epoch": 0.2574341546304163, - "grad_norm": 0.518918514251709, - "learning_rate": 9.88920325658011e-06, - "loss": 0.4736, - "step": 3939 - }, - { - "epoch": 0.25749950983595843, - "grad_norm": 0.4914916455745697, - "learning_rate": 9.889130141530468e-06, - "loss": 0.409, - "step": 3940 - }, - { - "epoch": 0.25756486504150056, - "grad_norm": 0.48664769530296326, - "learning_rate": 9.889057002634844e-06, - "loss": 0.4299, - "step": 3941 - }, - { - "epoch": 0.2576302202470427, - "grad_norm": 0.5491824150085449, - "learning_rate": 9.888983839893593e-06, - "loss": 0.4988, - "step": 3942 - }, - { - "epoch": 0.2576955754525848, - "grad_norm": 0.48742780089378357, - "learning_rate": 9.888910653307078e-06, - "loss": 0.4271, - "step": 3943 - }, - { - "epoch": 0.25776093065812694, - "grad_norm": 0.47784173488616943, - "learning_rate": 9.88883744287565e-06, - "loss": 0.4035, - "step": 3944 - }, - { - "epoch": 0.257826285863669, - "grad_norm": 0.4807237684726715, - "learning_rate": 9.888764208599669e-06, - "loss": 0.3989, - "step": 3945 - }, - { - "epoch": 0.25789164106921114, - "grad_norm": 0.4672047197818756, - "learning_rate": 9.888690950479489e-06, - "loss": 0.3765, - "step": 3946 - }, - { - "epoch": 0.25795699627475327, - "grad_norm": 0.5563791394233704, - "learning_rate": 9.888617668515472e-06, - "loss": 0.4377, - "step": 3947 - }, - { - "epoch": 0.2580223514802954, - "grad_norm": 0.4825108051300049, - "learning_rate": 9.88854436270797e-06, - "loss": 0.445, - "step": 3948 - }, - { - "epoch": 0.2580877066858375, - "grad_norm": 0.438420832157135, - "learning_rate": 9.888471033057347e-06, - "loss": 0.3558, - "step": 3949 - }, - { - "epoch": 0.25815306189137965, - "grad_norm": 0.4145852327346802, - "learning_rate": 9.888397679563958e-06, - "loss": 0.3175, - "step": 3950 - }, - { - "epoch": 0.2582184170969218, - "grad_norm": 0.4814380407333374, - "learning_rate": 9.888324302228156e-06, - "loss": 0.3622, - "step": 3951 - }, - { - "epoch": 0.2582837723024639, - "grad_norm": 0.5345417261123657, - "learning_rate": 9.888250901050306e-06, - "loss": 0.4457, - "step": 3952 - }, - { - "epoch": 0.25834912750800604, - "grad_norm": 0.5023251175880432, - "learning_rate": 9.888177476030761e-06, - "loss": 0.4571, - "step": 3953 - }, - { - "epoch": 0.2584144827135481, - "grad_norm": 0.464595764875412, - "learning_rate": 9.888104027169883e-06, - "loss": 0.3636, - "step": 3954 - }, - { - "epoch": 0.25847983791909024, - "grad_norm": 0.48747649788856506, - "learning_rate": 9.888030554468026e-06, - "loss": 0.4364, - "step": 3955 - }, - { - "epoch": 0.25854519312463237, - "grad_norm": 0.5128535032272339, - "learning_rate": 9.887957057925552e-06, - "loss": 0.4106, - "step": 3956 - }, - { - "epoch": 0.2586105483301745, - "grad_norm": 0.49727413058280945, - "learning_rate": 9.887883537542818e-06, - "loss": 0.4135, - "step": 3957 - }, - { - "epoch": 0.2586759035357166, - "grad_norm": 0.46694761514663696, - "learning_rate": 9.887809993320182e-06, - "loss": 0.4462, - "step": 3958 - }, - { - "epoch": 0.25874125874125875, - "grad_norm": 0.4622994065284729, - "learning_rate": 9.887736425258006e-06, - "loss": 0.379, - "step": 3959 - }, - { - "epoch": 0.2588066139468009, - "grad_norm": 0.5834900736808777, - "learning_rate": 9.887662833356644e-06, - "loss": 0.4102, - "step": 3960 - }, - { - "epoch": 0.258871969152343, - "grad_norm": 0.4487383961677551, - "learning_rate": 9.887589217616455e-06, - "loss": 0.391, - "step": 3961 - }, - { - "epoch": 0.2589373243578851, - "grad_norm": 0.4785480499267578, - "learning_rate": 9.887515578037803e-06, - "loss": 0.3996, - "step": 3962 - }, - { - "epoch": 0.2590026795634272, - "grad_norm": 0.4960395097732544, - "learning_rate": 9.887441914621043e-06, - "loss": 0.3742, - "step": 3963 - }, - { - "epoch": 0.25906803476896934, - "grad_norm": 0.5078006386756897, - "learning_rate": 9.887368227366539e-06, - "loss": 0.4238, - "step": 3964 - }, - { - "epoch": 0.25913338997451146, - "grad_norm": 0.5127818584442139, - "learning_rate": 9.887294516274643e-06, - "loss": 0.4571, - "step": 3965 - }, - { - "epoch": 0.2591987451800536, - "grad_norm": 0.4530894458293915, - "learning_rate": 9.88722078134572e-06, - "loss": 0.3529, - "step": 3966 - }, - { - "epoch": 0.2592641003855957, - "grad_norm": 0.46037912368774414, - "learning_rate": 9.887147022580127e-06, - "loss": 0.4012, - "step": 3967 - }, - { - "epoch": 0.25932945559113785, - "grad_norm": 0.5028442144393921, - "learning_rate": 9.887073239978227e-06, - "loss": 0.4172, - "step": 3968 - }, - { - "epoch": 0.25939481079668, - "grad_norm": 0.5270559191703796, - "learning_rate": 9.886999433540376e-06, - "loss": 0.4431, - "step": 3969 - }, - { - "epoch": 0.2594601660022221, - "grad_norm": 0.522784411907196, - "learning_rate": 9.886925603266936e-06, - "loss": 0.401, - "step": 3970 - }, - { - "epoch": 0.2595255212077642, - "grad_norm": 0.46748587489128113, - "learning_rate": 9.886851749158268e-06, - "loss": 0.3861, - "step": 3971 - }, - { - "epoch": 0.2595908764133063, - "grad_norm": 0.5080457329750061, - "learning_rate": 9.88677787121473e-06, - "loss": 0.4376, - "step": 3972 - }, - { - "epoch": 0.25965623161884843, - "grad_norm": 0.5248509049415588, - "learning_rate": 9.886703969436684e-06, - "loss": 0.4379, - "step": 3973 - }, - { - "epoch": 0.25972158682439056, - "grad_norm": 0.4547731876373291, - "learning_rate": 9.88663004382449e-06, - "loss": 0.3392, - "step": 3974 - }, - { - "epoch": 0.2597869420299327, - "grad_norm": 0.4710044264793396, - "learning_rate": 9.886556094378507e-06, - "loss": 0.4111, - "step": 3975 - }, - { - "epoch": 0.2598522972354748, - "grad_norm": 0.4873550236225128, - "learning_rate": 9.8864821210991e-06, - "loss": 0.4262, - "step": 3976 - }, - { - "epoch": 0.25991765244101694, - "grad_norm": 0.47116461396217346, - "learning_rate": 9.886408123986624e-06, - "loss": 0.3706, - "step": 3977 - }, - { - "epoch": 0.2599830076465591, - "grad_norm": 0.506894052028656, - "learning_rate": 9.886334103041443e-06, - "loss": 0.4173, - "step": 3978 - }, - { - "epoch": 0.26004836285210114, - "grad_norm": 0.5134207010269165, - "learning_rate": 9.886260058263918e-06, - "loss": 0.4242, - "step": 3979 - }, - { - "epoch": 0.2601137180576433, - "grad_norm": 0.4570533037185669, - "learning_rate": 9.886185989654411e-06, - "loss": 0.401, - "step": 3980 - }, - { - "epoch": 0.2601790732631854, - "grad_norm": 0.46695584058761597, - "learning_rate": 9.886111897213282e-06, - "loss": 0.4072, - "step": 3981 - }, - { - "epoch": 0.26024442846872753, - "grad_norm": 0.5276580452919006, - "learning_rate": 9.886037780940892e-06, - "loss": 0.4822, - "step": 3982 - }, - { - "epoch": 0.26030978367426966, - "grad_norm": 0.47777944803237915, - "learning_rate": 9.885963640837601e-06, - "loss": 0.3822, - "step": 3983 - }, - { - "epoch": 0.2603751388798118, - "grad_norm": 0.5472515225410461, - "learning_rate": 9.885889476903776e-06, - "loss": 0.4825, - "step": 3984 - }, - { - "epoch": 0.2604404940853539, - "grad_norm": 0.47866106033325195, - "learning_rate": 9.885815289139774e-06, - "loss": 0.4205, - "step": 3985 - }, - { - "epoch": 0.26050584929089604, - "grad_norm": 0.6274198293685913, - "learning_rate": 9.885741077545958e-06, - "loss": 0.382, - "step": 3986 - }, - { - "epoch": 0.2605712044964381, - "grad_norm": 0.5079156756401062, - "learning_rate": 9.885666842122692e-06, - "loss": 0.4011, - "step": 3987 - }, - { - "epoch": 0.26063655970198024, - "grad_norm": 0.6067430973052979, - "learning_rate": 9.885592582870334e-06, - "loss": 0.4514, - "step": 3988 - }, - { - "epoch": 0.26070191490752237, - "grad_norm": 0.4657166302204132, - "learning_rate": 9.88551829978925e-06, - "loss": 0.4008, - "step": 3989 - }, - { - "epoch": 0.2607672701130645, - "grad_norm": 0.46768006682395935, - "learning_rate": 9.8854439928798e-06, - "loss": 0.3943, - "step": 3990 - }, - { - "epoch": 0.2608326253186066, - "grad_norm": 0.5387409329414368, - "learning_rate": 9.885369662142345e-06, - "loss": 0.4716, - "step": 3991 - }, - { - "epoch": 0.26089798052414875, - "grad_norm": 0.5316016674041748, - "learning_rate": 9.885295307577253e-06, - "loss": 0.4515, - "step": 3992 - }, - { - "epoch": 0.2609633357296909, - "grad_norm": 0.5189493894577026, - "learning_rate": 9.885220929184882e-06, - "loss": 0.4268, - "step": 3993 - }, - { - "epoch": 0.261028690935233, - "grad_norm": 0.4890076220035553, - "learning_rate": 9.885146526965597e-06, - "loss": 0.4038, - "step": 3994 - }, - { - "epoch": 0.26109404614077514, - "grad_norm": 0.4809994697570801, - "learning_rate": 9.885072100919759e-06, - "loss": 0.4277, - "step": 3995 - }, - { - "epoch": 0.2611594013463172, - "grad_norm": 0.5016999244689941, - "learning_rate": 9.884997651047732e-06, - "loss": 0.4215, - "step": 3996 - }, - { - "epoch": 0.26122475655185934, - "grad_norm": 0.4756239652633667, - "learning_rate": 9.88492317734988e-06, - "loss": 0.3985, - "step": 3997 - }, - { - "epoch": 0.26129011175740147, - "grad_norm": 0.52363520860672, - "learning_rate": 9.884848679826563e-06, - "loss": 0.4323, - "step": 3998 - }, - { - "epoch": 0.2613554669629436, - "grad_norm": 0.46867337822914124, - "learning_rate": 9.88477415847815e-06, - "loss": 0.3827, - "step": 3999 - }, - { - "epoch": 0.2614208221684857, - "grad_norm": 0.5153798460960388, - "learning_rate": 9.884699613305e-06, - "loss": 0.4539, - "step": 4000 - }, - { - "epoch": 0.26148617737402785, - "grad_norm": 0.486310750246048, - "learning_rate": 9.884625044307477e-06, - "loss": 0.3942, - "step": 4001 - }, - { - "epoch": 0.26155153257957, - "grad_norm": 0.5417717695236206, - "learning_rate": 9.884550451485945e-06, - "loss": 0.4121, - "step": 4002 - }, - { - "epoch": 0.2616168877851121, - "grad_norm": 0.46015897393226624, - "learning_rate": 9.88447583484077e-06, - "loss": 0.3829, - "step": 4003 - }, - { - "epoch": 0.2616822429906542, - "grad_norm": 0.5171236991882324, - "learning_rate": 9.884401194372316e-06, - "loss": 0.488, - "step": 4004 - }, - { - "epoch": 0.2617475981961963, - "grad_norm": 0.5257484912872314, - "learning_rate": 9.88432653008094e-06, - "loss": 0.458, - "step": 4005 - }, - { - "epoch": 0.26181295340173844, - "grad_norm": 0.5574596524238586, - "learning_rate": 9.884251841967015e-06, - "loss": 0.4217, - "step": 4006 - }, - { - "epoch": 0.26187830860728056, - "grad_norm": 0.4806183874607086, - "learning_rate": 9.8841771300309e-06, - "loss": 0.3959, - "step": 4007 - }, - { - "epoch": 0.2619436638128227, - "grad_norm": 0.49253955483436584, - "learning_rate": 9.884102394272962e-06, - "loss": 0.4402, - "step": 4008 - }, - { - "epoch": 0.2620090190183648, - "grad_norm": 0.47940564155578613, - "learning_rate": 9.884027634693566e-06, - "loss": 0.4011, - "step": 4009 - }, - { - "epoch": 0.26207437422390695, - "grad_norm": 0.47881099581718445, - "learning_rate": 9.883952851293073e-06, - "loss": 0.3793, - "step": 4010 - }, - { - "epoch": 0.2621397294294491, - "grad_norm": 0.48134446144104004, - "learning_rate": 9.883878044071851e-06, - "loss": 0.377, - "step": 4011 - }, - { - "epoch": 0.2622050846349912, - "grad_norm": 0.4901047348976135, - "learning_rate": 9.883803213030263e-06, - "loss": 0.4556, - "step": 4012 - }, - { - "epoch": 0.2622704398405333, - "grad_norm": 0.4936966001987457, - "learning_rate": 9.883728358168676e-06, - "loss": 0.4527, - "step": 4013 - }, - { - "epoch": 0.2623357950460754, - "grad_norm": 0.44317910075187683, - "learning_rate": 9.883653479487453e-06, - "loss": 0.3333, - "step": 4014 - }, - { - "epoch": 0.26240115025161753, - "grad_norm": 0.4643218219280243, - "learning_rate": 9.883578576986961e-06, - "loss": 0.4127, - "step": 4015 - }, - { - "epoch": 0.26246650545715966, - "grad_norm": 0.4814830422401428, - "learning_rate": 9.883503650667563e-06, - "loss": 0.4337, - "step": 4016 - }, - { - "epoch": 0.2625318606627018, - "grad_norm": 0.48062700033187866, - "learning_rate": 9.883428700529626e-06, - "loss": 0.3871, - "step": 4017 - }, - { - "epoch": 0.2625972158682439, - "grad_norm": 0.5002309679985046, - "learning_rate": 9.883353726573518e-06, - "loss": 0.4498, - "step": 4018 - }, - { - "epoch": 0.26266257107378604, - "grad_norm": 0.4648917615413666, - "learning_rate": 9.8832787287996e-06, - "loss": 0.379, - "step": 4019 - }, - { - "epoch": 0.26272792627932817, - "grad_norm": 0.5252535343170166, - "learning_rate": 9.88320370720824e-06, - "loss": 0.4755, - "step": 4020 - }, - { - "epoch": 0.26279328148487024, - "grad_norm": 0.4704096019268036, - "learning_rate": 9.883128661799805e-06, - "loss": 0.3981, - "step": 4021 - }, - { - "epoch": 0.2628586366904124, - "grad_norm": 0.47374221682548523, - "learning_rate": 9.883053592574658e-06, - "loss": 0.4171, - "step": 4022 - }, - { - "epoch": 0.2629239918959545, - "grad_norm": 0.4634070098400116, - "learning_rate": 9.88297849953317e-06, - "loss": 0.3805, - "step": 4023 - }, - { - "epoch": 0.26298934710149663, - "grad_norm": 0.5194708704948425, - "learning_rate": 9.8829033826757e-06, - "loss": 0.4813, - "step": 4024 - }, - { - "epoch": 0.26305470230703876, - "grad_norm": 0.4325157403945923, - "learning_rate": 9.882828242002622e-06, - "loss": 0.3485, - "step": 4025 - }, - { - "epoch": 0.2631200575125809, - "grad_norm": 0.45242777466773987, - "learning_rate": 9.882753077514298e-06, - "loss": 0.4173, - "step": 4026 - }, - { - "epoch": 0.263185412718123, - "grad_norm": 0.4679414629936218, - "learning_rate": 9.882677889211095e-06, - "loss": 0.4511, - "step": 4027 - }, - { - "epoch": 0.26325076792366514, - "grad_norm": 0.5232787132263184, - "learning_rate": 9.88260267709338e-06, - "loss": 0.4671, - "step": 4028 - }, - { - "epoch": 0.26331612312920727, - "grad_norm": 0.4798663854598999, - "learning_rate": 9.882527441161523e-06, - "loss": 0.4206, - "step": 4029 - }, - { - "epoch": 0.26338147833474934, - "grad_norm": 0.4608439803123474, - "learning_rate": 9.882452181415885e-06, - "loss": 0.4074, - "step": 4030 - }, - { - "epoch": 0.26344683354029147, - "grad_norm": 0.46279963850975037, - "learning_rate": 9.882376897856838e-06, - "loss": 0.3958, - "step": 4031 - }, - { - "epoch": 0.2635121887458336, - "grad_norm": 0.4729881286621094, - "learning_rate": 9.882301590484746e-06, - "loss": 0.4197, - "step": 4032 - }, - { - "epoch": 0.2635775439513757, - "grad_norm": 0.4474340081214905, - "learning_rate": 9.88222625929998e-06, - "loss": 0.3865, - "step": 4033 - }, - { - "epoch": 0.26364289915691785, - "grad_norm": 0.4575665593147278, - "learning_rate": 9.882150904302905e-06, - "loss": 0.3816, - "step": 4034 - }, - { - "epoch": 0.26370825436246, - "grad_norm": 0.4573649764060974, - "learning_rate": 9.882075525493885e-06, - "loss": 0.3803, - "step": 4035 - }, - { - "epoch": 0.2637736095680021, - "grad_norm": 0.5491724014282227, - "learning_rate": 9.882000122873296e-06, - "loss": 0.461, - "step": 4036 - }, - { - "epoch": 0.26383896477354424, - "grad_norm": 0.5086228251457214, - "learning_rate": 9.881924696441499e-06, - "loss": 0.3976, - "step": 4037 - }, - { - "epoch": 0.2639043199790863, - "grad_norm": 0.49077677726745605, - "learning_rate": 9.881849246198864e-06, - "loss": 0.4091, - "step": 4038 - }, - { - "epoch": 0.26396967518462844, - "grad_norm": 0.4744749367237091, - "learning_rate": 9.88177377214576e-06, - "loss": 0.3891, - "step": 4039 - }, - { - "epoch": 0.26403503039017057, - "grad_norm": 0.49219316244125366, - "learning_rate": 9.881698274282552e-06, - "loss": 0.4198, - "step": 4040 - }, - { - "epoch": 0.2641003855957127, - "grad_norm": 0.4904021918773651, - "learning_rate": 9.881622752609611e-06, - "loss": 0.4265, - "step": 4041 - }, - { - "epoch": 0.2641657408012548, - "grad_norm": 0.4952344596385956, - "learning_rate": 9.881547207127307e-06, - "loss": 0.4069, - "step": 4042 - }, - { - "epoch": 0.26423109600679695, - "grad_norm": 0.4888193905353546, - "learning_rate": 9.881471637836005e-06, - "loss": 0.3746, - "step": 4043 - }, - { - "epoch": 0.2642964512123391, - "grad_norm": 0.48952335119247437, - "learning_rate": 9.881396044736073e-06, - "loss": 0.4348, - "step": 4044 - }, - { - "epoch": 0.2643618064178812, - "grad_norm": 1.197820782661438, - "learning_rate": 9.881320427827883e-06, - "loss": 0.4015, - "step": 4045 - }, - { - "epoch": 0.2644271616234233, - "grad_norm": 0.4951687455177307, - "learning_rate": 9.881244787111802e-06, - "loss": 0.4245, - "step": 4046 - }, - { - "epoch": 0.2644925168289654, - "grad_norm": 0.4858238101005554, - "learning_rate": 9.881169122588198e-06, - "loss": 0.4021, - "step": 4047 - }, - { - "epoch": 0.26455787203450754, - "grad_norm": 0.5516292452812195, - "learning_rate": 9.881093434257443e-06, - "loss": 0.4342, - "step": 4048 - }, - { - "epoch": 0.26462322724004966, - "grad_norm": 0.4711117744445801, - "learning_rate": 9.881017722119903e-06, - "loss": 0.3943, - "step": 4049 - }, - { - "epoch": 0.2646885824455918, - "grad_norm": 0.5131600499153137, - "learning_rate": 9.880941986175948e-06, - "loss": 0.4188, - "step": 4050 - }, - { - "epoch": 0.2647539376511339, - "grad_norm": 0.46999862790107727, - "learning_rate": 9.88086622642595e-06, - "loss": 0.4066, - "step": 4051 - }, - { - "epoch": 0.26481929285667605, - "grad_norm": 0.5009545683860779, - "learning_rate": 9.880790442870277e-06, - "loss": 0.4341, - "step": 4052 - }, - { - "epoch": 0.2648846480622182, - "grad_norm": 0.5397742986679077, - "learning_rate": 9.880714635509295e-06, - "loss": 0.4512, - "step": 4053 - }, - { - "epoch": 0.2649500032677603, - "grad_norm": 0.45424947142601013, - "learning_rate": 9.880638804343378e-06, - "loss": 0.3894, - "step": 4054 - }, - { - "epoch": 0.2650153584733024, - "grad_norm": 0.48217862844467163, - "learning_rate": 9.880562949372895e-06, - "loss": 0.4123, - "step": 4055 - }, - { - "epoch": 0.2650807136788445, - "grad_norm": 0.49495089054107666, - "learning_rate": 9.880487070598217e-06, - "loss": 0.4691, - "step": 4056 - }, - { - "epoch": 0.26514606888438663, - "grad_norm": 0.4753105044364929, - "learning_rate": 9.880411168019713e-06, - "loss": 0.4241, - "step": 4057 - }, - { - "epoch": 0.26521142408992876, - "grad_norm": 0.47640448808670044, - "learning_rate": 9.880335241637751e-06, - "loss": 0.4326, - "step": 4058 - }, - { - "epoch": 0.2652767792954709, - "grad_norm": 0.4479662775993347, - "learning_rate": 9.880259291452704e-06, - "loss": 0.3333, - "step": 4059 - }, - { - "epoch": 0.265342134501013, - "grad_norm": 0.4611780345439911, - "learning_rate": 9.880183317464943e-06, - "loss": 0.4045, - "step": 4060 - }, - { - "epoch": 0.26540748970655514, - "grad_norm": 0.44803959131240845, - "learning_rate": 9.880107319674835e-06, - "loss": 0.3835, - "step": 4061 - }, - { - "epoch": 0.26547284491209727, - "grad_norm": 0.5119698643684387, - "learning_rate": 9.880031298082754e-06, - "loss": 0.4628, - "step": 4062 - }, - { - "epoch": 0.26553820011763934, - "grad_norm": 0.47646498680114746, - "learning_rate": 9.87995525268907e-06, - "loss": 0.3763, - "step": 4063 - }, - { - "epoch": 0.2656035553231815, - "grad_norm": 0.49275916814804077, - "learning_rate": 9.879879183494154e-06, - "loss": 0.4224, - "step": 4064 - }, - { - "epoch": 0.2656689105287236, - "grad_norm": 0.5169394016265869, - "learning_rate": 9.879803090498377e-06, - "loss": 0.4363, - "step": 4065 - }, - { - "epoch": 0.26573426573426573, - "grad_norm": 0.46649786829948425, - "learning_rate": 9.879726973702109e-06, - "loss": 0.4457, - "step": 4066 - }, - { - "epoch": 0.26579962093980786, - "grad_norm": 1.0375394821166992, - "learning_rate": 9.879650833105721e-06, - "loss": 0.4381, - "step": 4067 - }, - { - "epoch": 0.26586497614535, - "grad_norm": 0.5473775863647461, - "learning_rate": 9.879574668709588e-06, - "loss": 0.4868, - "step": 4068 - }, - { - "epoch": 0.2659303313508921, - "grad_norm": 0.4799162745475769, - "learning_rate": 9.879498480514077e-06, - "loss": 0.3871, - "step": 4069 - }, - { - "epoch": 0.26599568655643424, - "grad_norm": 0.4750443398952484, - "learning_rate": 9.879422268519562e-06, - "loss": 0.4327, - "step": 4070 - }, - { - "epoch": 0.26606104176197637, - "grad_norm": 0.46318313479423523, - "learning_rate": 9.879346032726413e-06, - "loss": 0.405, - "step": 4071 - }, - { - "epoch": 0.26612639696751844, - "grad_norm": 0.4690793752670288, - "learning_rate": 9.879269773135005e-06, - "loss": 0.3928, - "step": 4072 - }, - { - "epoch": 0.26619175217306057, - "grad_norm": 0.5178049206733704, - "learning_rate": 9.879193489745706e-06, - "loss": 0.4278, - "step": 4073 - }, - { - "epoch": 0.2662571073786027, - "grad_norm": 0.46420466899871826, - "learning_rate": 9.879117182558893e-06, - "loss": 0.3865, - "step": 4074 - }, - { - "epoch": 0.2663224625841448, - "grad_norm": 0.5093705058097839, - "learning_rate": 9.879040851574932e-06, - "loss": 0.391, - "step": 4075 - }, - { - "epoch": 0.26638781778968695, - "grad_norm": 0.44918498396873474, - "learning_rate": 9.878964496794202e-06, - "loss": 0.3983, - "step": 4076 - }, - { - "epoch": 0.2664531729952291, - "grad_norm": 0.5079885721206665, - "learning_rate": 9.87888811821707e-06, - "loss": 0.4132, - "step": 4077 - }, - { - "epoch": 0.2665185282007712, - "grad_norm": 0.4735715091228485, - "learning_rate": 9.878811715843908e-06, - "loss": 0.4042, - "step": 4078 - }, - { - "epoch": 0.26658388340631334, - "grad_norm": 0.46049559116363525, - "learning_rate": 9.878735289675095e-06, - "loss": 0.4054, - "step": 4079 - }, - { - "epoch": 0.2666492386118554, - "grad_norm": 0.46098044514656067, - "learning_rate": 9.878658839710997e-06, - "loss": 0.3684, - "step": 4080 - }, - { - "epoch": 0.26671459381739754, - "grad_norm": 0.4658360481262207, - "learning_rate": 9.87858236595199e-06, - "loss": 0.3914, - "step": 4081 - }, - { - "epoch": 0.26677994902293967, - "grad_norm": 0.48726463317871094, - "learning_rate": 9.87850586839845e-06, - "loss": 0.4097, - "step": 4082 - }, - { - "epoch": 0.2668453042284818, - "grad_norm": 0.4387027621269226, - "learning_rate": 9.878429347050743e-06, - "loss": 0.3648, - "step": 4083 - }, - { - "epoch": 0.2669106594340239, - "grad_norm": 0.4726237952709198, - "learning_rate": 9.878352801909248e-06, - "loss": 0.401, - "step": 4084 - }, - { - "epoch": 0.26697601463956605, - "grad_norm": 0.47512876987457275, - "learning_rate": 9.878276232974336e-06, - "loss": 0.3675, - "step": 4085 - }, - { - "epoch": 0.2670413698451082, - "grad_norm": 0.49428948760032654, - "learning_rate": 9.878199640246379e-06, - "loss": 0.4089, - "step": 4086 - }, - { - "epoch": 0.2671067250506503, - "grad_norm": 0.48950693011283875, - "learning_rate": 9.878123023725754e-06, - "loss": 0.423, - "step": 4087 - }, - { - "epoch": 0.2671720802561924, - "grad_norm": 0.5121861100196838, - "learning_rate": 9.878046383412833e-06, - "loss": 0.4184, - "step": 4088 - }, - { - "epoch": 0.2672374354617345, - "grad_norm": 0.4943595230579376, - "learning_rate": 9.87796971930799e-06, - "loss": 0.4107, - "step": 4089 - }, - { - "epoch": 0.26730279066727664, - "grad_norm": 0.506575882434845, - "learning_rate": 9.8778930314116e-06, - "loss": 0.3956, - "step": 4090 - }, - { - "epoch": 0.26736814587281876, - "grad_norm": 0.4697633981704712, - "learning_rate": 9.877816319724034e-06, - "loss": 0.3959, - "step": 4091 - }, - { - "epoch": 0.2674335010783609, - "grad_norm": 0.47665300965309143, - "learning_rate": 9.87773958424567e-06, - "loss": 0.3928, - "step": 4092 - }, - { - "epoch": 0.267498856283903, - "grad_norm": 0.5202171206474304, - "learning_rate": 9.877662824976876e-06, - "loss": 0.4358, - "step": 4093 - }, - { - "epoch": 0.26756421148944515, - "grad_norm": 0.44679561257362366, - "learning_rate": 9.877586041918034e-06, - "loss": 0.374, - "step": 4094 - }, - { - "epoch": 0.2676295666949873, - "grad_norm": 0.5135511755943298, - "learning_rate": 9.877509235069516e-06, - "loss": 0.4368, - "step": 4095 - }, - { - "epoch": 0.2676949219005294, - "grad_norm": 0.4886908531188965, - "learning_rate": 9.877432404431692e-06, - "loss": 0.4077, - "step": 4096 - }, - { - "epoch": 0.2677602771060715, - "grad_norm": 0.43191686272621155, - "learning_rate": 9.877355550004944e-06, - "loss": 0.3836, - "step": 4097 - }, - { - "epoch": 0.2678256323116136, - "grad_norm": 0.4852792024612427, - "learning_rate": 9.877278671789641e-06, - "loss": 0.4156, - "step": 4098 - }, - { - "epoch": 0.26789098751715573, - "grad_norm": 0.47688236832618713, - "learning_rate": 9.877201769786162e-06, - "loss": 0.3864, - "step": 4099 - }, - { - "epoch": 0.26795634272269786, - "grad_norm": 0.45649418234825134, - "learning_rate": 9.877124843994879e-06, - "loss": 0.3622, - "step": 4100 - }, - { - "epoch": 0.26802169792824, - "grad_norm": 0.47562023997306824, - "learning_rate": 9.87704789441617e-06, - "loss": 0.4457, - "step": 4101 - }, - { - "epoch": 0.2680870531337821, - "grad_norm": 0.48525452613830566, - "learning_rate": 9.876970921050406e-06, - "loss": 0.3903, - "step": 4102 - }, - { - "epoch": 0.26815240833932424, - "grad_norm": 0.45969781279563904, - "learning_rate": 9.87689392389797e-06, - "loss": 0.3931, - "step": 4103 - }, - { - "epoch": 0.26821776354486637, - "grad_norm": 0.4747847318649292, - "learning_rate": 9.876816902959228e-06, - "loss": 0.4442, - "step": 4104 - }, - { - "epoch": 0.26828311875040844, - "grad_norm": 0.5107711553573608, - "learning_rate": 9.876739858234563e-06, - "loss": 0.4395, - "step": 4105 - }, - { - "epoch": 0.2683484739559506, - "grad_norm": 0.5010309219360352, - "learning_rate": 9.876662789724347e-06, - "loss": 0.434, - "step": 4106 - }, - { - "epoch": 0.2684138291614927, - "grad_norm": 0.4896714389324188, - "learning_rate": 9.876585697428958e-06, - "loss": 0.4415, - "step": 4107 - }, - { - "epoch": 0.26847918436703483, - "grad_norm": 0.47342249751091003, - "learning_rate": 9.87650858134877e-06, - "loss": 0.4137, - "step": 4108 - }, - { - "epoch": 0.26854453957257696, - "grad_norm": 0.4698140323162079, - "learning_rate": 9.876431441484164e-06, - "loss": 0.396, - "step": 4109 - }, - { - "epoch": 0.2686098947781191, - "grad_norm": 0.4311375617980957, - "learning_rate": 9.876354277835509e-06, - "loss": 0.3754, - "step": 4110 - }, - { - "epoch": 0.2686752499836612, - "grad_norm": 0.47583404183387756, - "learning_rate": 9.876277090403185e-06, - "loss": 0.3759, - "step": 4111 - }, - { - "epoch": 0.26874060518920334, - "grad_norm": 0.5071882605552673, - "learning_rate": 9.87619987918757e-06, - "loss": 0.4031, - "step": 4112 - }, - { - "epoch": 0.26880596039474547, - "grad_norm": 0.43392661213874817, - "learning_rate": 9.876122644189036e-06, - "loss": 0.3539, - "step": 4113 - }, - { - "epoch": 0.26887131560028754, - "grad_norm": 0.4953223466873169, - "learning_rate": 9.876045385407966e-06, - "loss": 0.4125, - "step": 4114 - }, - { - "epoch": 0.26893667080582967, - "grad_norm": 0.4818981885910034, - "learning_rate": 9.875968102844732e-06, - "loss": 0.4236, - "step": 4115 - }, - { - "epoch": 0.2690020260113718, - "grad_norm": 0.5168367028236389, - "learning_rate": 9.875890796499711e-06, - "loss": 0.4124, - "step": 4116 - }, - { - "epoch": 0.2690673812169139, - "grad_norm": 0.505105197429657, - "learning_rate": 9.875813466373285e-06, - "loss": 0.4784, - "step": 4117 - }, - { - "epoch": 0.26913273642245605, - "grad_norm": 0.46311938762664795, - "learning_rate": 9.875736112465824e-06, - "loss": 0.3907, - "step": 4118 - }, - { - "epoch": 0.2691980916279982, - "grad_norm": 0.4399479627609253, - "learning_rate": 9.875658734777712e-06, - "loss": 0.3895, - "step": 4119 - }, - { - "epoch": 0.2692634468335403, - "grad_norm": 0.4627532660961151, - "learning_rate": 9.87558133330932e-06, - "loss": 0.3478, - "step": 4120 - }, - { - "epoch": 0.26932880203908244, - "grad_norm": 0.4844004213809967, - "learning_rate": 9.875503908061031e-06, - "loss": 0.3943, - "step": 4121 - }, - { - "epoch": 0.2693941572446245, - "grad_norm": 0.4906649589538574, - "learning_rate": 9.875426459033219e-06, - "loss": 0.4322, - "step": 4122 - }, - { - "epoch": 0.26945951245016664, - "grad_norm": 0.43836209177970886, - "learning_rate": 9.875348986226263e-06, - "loss": 0.3721, - "step": 4123 - }, - { - "epoch": 0.26952486765570877, - "grad_norm": 0.46436160802841187, - "learning_rate": 9.875271489640542e-06, - "loss": 0.4343, - "step": 4124 - }, - { - "epoch": 0.2695902228612509, - "grad_norm": 0.45985153317451477, - "learning_rate": 9.875193969276433e-06, - "loss": 0.4208, - "step": 4125 - }, - { - "epoch": 0.269655578066793, - "grad_norm": 0.5256823897361755, - "learning_rate": 9.875116425134313e-06, - "loss": 0.47, - "step": 4126 - }, - { - "epoch": 0.26972093327233515, - "grad_norm": 0.47644487023353577, - "learning_rate": 9.875038857214563e-06, - "loss": 0.4195, - "step": 4127 - }, - { - "epoch": 0.2697862884778773, - "grad_norm": 0.4636766314506531, - "learning_rate": 9.874961265517557e-06, - "loss": 0.4012, - "step": 4128 - }, - { - "epoch": 0.2698516436834194, - "grad_norm": 0.4734828770160675, - "learning_rate": 9.874883650043678e-06, - "loss": 0.4096, - "step": 4129 - }, - { - "epoch": 0.2699169988889615, - "grad_norm": 0.4910510778427124, - "learning_rate": 9.874806010793303e-06, - "loss": 0.4372, - "step": 4130 - }, - { - "epoch": 0.2699823540945036, - "grad_norm": 0.47214993834495544, - "learning_rate": 9.87472834776681e-06, - "loss": 0.4324, - "step": 4131 - }, - { - "epoch": 0.27004770930004574, - "grad_norm": 0.46082454919815063, - "learning_rate": 9.874650660964578e-06, - "loss": 0.4119, - "step": 4132 - }, - { - "epoch": 0.27011306450558786, - "grad_norm": 0.4675063490867615, - "learning_rate": 9.874572950386986e-06, - "loss": 0.4403, - "step": 4133 - }, - { - "epoch": 0.27017841971113, - "grad_norm": 0.4442681670188904, - "learning_rate": 9.874495216034413e-06, - "loss": 0.3491, - "step": 4134 - }, - { - "epoch": 0.2702437749166721, - "grad_norm": 0.46750369668006897, - "learning_rate": 9.874417457907237e-06, - "loss": 0.3811, - "step": 4135 - }, - { - "epoch": 0.27030913012221425, - "grad_norm": 0.4324701726436615, - "learning_rate": 9.87433967600584e-06, - "loss": 0.381, - "step": 4136 - }, - { - "epoch": 0.2703744853277564, - "grad_norm": 0.449904203414917, - "learning_rate": 9.8742618703306e-06, - "loss": 0.4025, - "step": 4137 - }, - { - "epoch": 0.2704398405332985, - "grad_norm": 0.4237454831600189, - "learning_rate": 9.874184040881893e-06, - "loss": 0.3427, - "step": 4138 - }, - { - "epoch": 0.2705051957388406, - "grad_norm": 0.4685540497303009, - "learning_rate": 9.874106187660106e-06, - "loss": 0.3583, - "step": 4139 - }, - { - "epoch": 0.2705705509443827, - "grad_norm": 0.4869711399078369, - "learning_rate": 9.874028310665612e-06, - "loss": 0.4449, - "step": 4140 - }, - { - "epoch": 0.27063590614992483, - "grad_norm": 0.4771939516067505, - "learning_rate": 9.873950409898793e-06, - "loss": 0.4557, - "step": 4141 - }, - { - "epoch": 0.27070126135546696, - "grad_norm": 0.4451706111431122, - "learning_rate": 9.873872485360032e-06, - "loss": 0.3941, - "step": 4142 - }, - { - "epoch": 0.2707666165610091, - "grad_norm": 0.4551406502723694, - "learning_rate": 9.873794537049704e-06, - "loss": 0.3592, - "step": 4143 - }, - { - "epoch": 0.2708319717665512, - "grad_norm": 0.5270684361457825, - "learning_rate": 9.873716564968193e-06, - "loss": 0.4924, - "step": 4144 - }, - { - "epoch": 0.27089732697209334, - "grad_norm": 0.5258855819702148, - "learning_rate": 9.873638569115878e-06, - "loss": 0.4887, - "step": 4145 - }, - { - "epoch": 0.27096268217763547, - "grad_norm": 0.4573988914489746, - "learning_rate": 9.873560549493138e-06, - "loss": 0.4094, - "step": 4146 - }, - { - "epoch": 0.27102803738317754, - "grad_norm": 0.42023995518684387, - "learning_rate": 9.873482506100355e-06, - "loss": 0.3675, - "step": 4147 - }, - { - "epoch": 0.2710933925887197, - "grad_norm": 0.4430708587169647, - "learning_rate": 9.87340443893791e-06, - "loss": 0.4083, - "step": 4148 - }, - { - "epoch": 0.2711587477942618, - "grad_norm": 0.47372132539749146, - "learning_rate": 9.873326348006185e-06, - "loss": 0.4293, - "step": 4149 - }, - { - "epoch": 0.27122410299980393, - "grad_norm": 0.5177667140960693, - "learning_rate": 9.873248233305558e-06, - "loss": 0.485, - "step": 4150 - }, - { - "epoch": 0.27128945820534606, - "grad_norm": 0.4773666560649872, - "learning_rate": 9.873170094836408e-06, - "loss": 0.4055, - "step": 4151 - }, - { - "epoch": 0.2713548134108882, - "grad_norm": 0.5047542452812195, - "learning_rate": 9.873091932599124e-06, - "loss": 0.4817, - "step": 4152 - }, - { - "epoch": 0.2714201686164303, - "grad_norm": 0.4635542035102844, - "learning_rate": 9.873013746594078e-06, - "loss": 0.441, - "step": 4153 - }, - { - "epoch": 0.27148552382197244, - "grad_norm": 0.4301548898220062, - "learning_rate": 9.87293553682166e-06, - "loss": 0.3518, - "step": 4154 - }, - { - "epoch": 0.27155087902751457, - "grad_norm": 0.46144917607307434, - "learning_rate": 9.872857303282245e-06, - "loss": 0.3923, - "step": 4155 - }, - { - "epoch": 0.27161623423305664, - "grad_norm": 0.5201066136360168, - "learning_rate": 9.872779045976215e-06, - "loss": 0.5005, - "step": 4156 - }, - { - "epoch": 0.27168158943859877, - "grad_norm": 0.4255172312259674, - "learning_rate": 9.872700764903958e-06, - "loss": 0.3595, - "step": 4157 - }, - { - "epoch": 0.2717469446441409, - "grad_norm": 0.4870705306529999, - "learning_rate": 9.872622460065848e-06, - "loss": 0.453, - "step": 4158 - }, - { - "epoch": 0.271812299849683, - "grad_norm": 0.4662631154060364, - "learning_rate": 9.87254413146227e-06, - "loss": 0.4058, - "step": 4159 - }, - { - "epoch": 0.27187765505522515, - "grad_norm": 0.4541662931442261, - "learning_rate": 9.872465779093607e-06, - "loss": 0.3861, - "step": 4160 - }, - { - "epoch": 0.2719430102607673, - "grad_norm": 0.44846227765083313, - "learning_rate": 9.872387402960241e-06, - "loss": 0.3581, - "step": 4161 - }, - { - "epoch": 0.2720083654663094, - "grad_norm": 0.47655147314071655, - "learning_rate": 9.872309003062554e-06, - "loss": 0.4104, - "step": 4162 - }, - { - "epoch": 0.27207372067185154, - "grad_norm": 0.46559497714042664, - "learning_rate": 9.872230579400928e-06, - "loss": 0.3903, - "step": 4163 - }, - { - "epoch": 0.2721390758773936, - "grad_norm": 0.4417462646961212, - "learning_rate": 9.872152131975745e-06, - "loss": 0.3891, - "step": 4164 - }, - { - "epoch": 0.27220443108293574, - "grad_norm": 0.4599807858467102, - "learning_rate": 9.872073660787388e-06, - "loss": 0.3899, - "step": 4165 - }, - { - "epoch": 0.27226978628847787, - "grad_norm": 0.4935910105705261, - "learning_rate": 9.87199516583624e-06, - "loss": 0.438, - "step": 4166 - }, - { - "epoch": 0.27233514149402, - "grad_norm": 0.4480958878993988, - "learning_rate": 9.871916647122684e-06, - "loss": 0.407, - "step": 4167 - }, - { - "epoch": 0.2724004966995621, - "grad_norm": 0.4256836771965027, - "learning_rate": 9.871838104647102e-06, - "loss": 0.3393, - "step": 4168 - }, - { - "epoch": 0.27246585190510425, - "grad_norm": 0.45450159907341003, - "learning_rate": 9.871759538409878e-06, - "loss": 0.4047, - "step": 4169 - }, - { - "epoch": 0.2725312071106464, - "grad_norm": 0.5286385416984558, - "learning_rate": 9.871680948411396e-06, - "loss": 0.4408, - "step": 4170 - }, - { - "epoch": 0.2725965623161885, - "grad_norm": 0.4764297604560852, - "learning_rate": 9.871602334652037e-06, - "loss": 0.4316, - "step": 4171 - }, - { - "epoch": 0.2726619175217306, - "grad_norm": 0.5015479326248169, - "learning_rate": 9.871523697132186e-06, - "loss": 0.4222, - "step": 4172 - }, - { - "epoch": 0.2727272727272727, - "grad_norm": 0.4523698389530182, - "learning_rate": 9.871445035852228e-06, - "loss": 0.3854, - "step": 4173 - }, - { - "epoch": 0.27279262793281484, - "grad_norm": 0.4936469495296478, - "learning_rate": 9.871366350812543e-06, - "loss": 0.4206, - "step": 4174 - }, - { - "epoch": 0.27285798313835696, - "grad_norm": 0.48123857378959656, - "learning_rate": 9.87128764201352e-06, - "loss": 0.3939, - "step": 4175 - }, - { - "epoch": 0.2729233383438991, - "grad_norm": 0.48141565918922424, - "learning_rate": 9.871208909455535e-06, - "loss": 0.4143, - "step": 4176 - }, - { - "epoch": 0.2729886935494412, - "grad_norm": 0.43874800205230713, - "learning_rate": 9.871130153138978e-06, - "loss": 0.3398, - "step": 4177 - }, - { - "epoch": 0.27305404875498335, - "grad_norm": 0.4822591245174408, - "learning_rate": 9.871051373064232e-06, - "loss": 0.4502, - "step": 4178 - }, - { - "epoch": 0.2731194039605255, - "grad_norm": 0.47263237833976746, - "learning_rate": 9.870972569231681e-06, - "loss": 0.3933, - "step": 4179 - }, - { - "epoch": 0.2731847591660676, - "grad_norm": 0.4788112938404083, - "learning_rate": 9.87089374164171e-06, - "loss": 0.4106, - "step": 4180 - }, - { - "epoch": 0.2732501143716097, - "grad_norm": 0.4840521812438965, - "learning_rate": 9.870814890294701e-06, - "loss": 0.4249, - "step": 4181 - }, - { - "epoch": 0.2733154695771518, - "grad_norm": 0.4387449622154236, - "learning_rate": 9.870736015191043e-06, - "loss": 0.3768, - "step": 4182 - }, - { - "epoch": 0.27338082478269393, - "grad_norm": 0.49455657601356506, - "learning_rate": 9.870657116331118e-06, - "loss": 0.4425, - "step": 4183 - }, - { - "epoch": 0.27344617998823606, - "grad_norm": 0.47682246565818787, - "learning_rate": 9.870578193715308e-06, - "loss": 0.411, - "step": 4184 - }, - { - "epoch": 0.2735115351937782, - "grad_norm": 0.5592783093452454, - "learning_rate": 9.870499247344004e-06, - "loss": 0.4408, - "step": 4185 - }, - { - "epoch": 0.2735768903993203, - "grad_norm": 0.4375693202018738, - "learning_rate": 9.870420277217584e-06, - "loss": 0.3513, - "step": 4186 - }, - { - "epoch": 0.27364224560486244, - "grad_norm": 0.5457838177680969, - "learning_rate": 9.870341283336439e-06, - "loss": 0.4368, - "step": 4187 - }, - { - "epoch": 0.27370760081040457, - "grad_norm": 0.48499536514282227, - "learning_rate": 9.870262265700954e-06, - "loss": 0.4413, - "step": 4188 - }, - { - "epoch": 0.27377295601594664, - "grad_norm": 0.49165967106819153, - "learning_rate": 9.870183224311512e-06, - "loss": 0.4388, - "step": 4189 - }, - { - "epoch": 0.2738383112214888, - "grad_norm": 0.47874099016189575, - "learning_rate": 9.870104159168497e-06, - "loss": 0.3902, - "step": 4190 - }, - { - "epoch": 0.2739036664270309, - "grad_norm": 0.49625450372695923, - "learning_rate": 9.870025070272298e-06, - "loss": 0.4004, - "step": 4191 - }, - { - "epoch": 0.27396902163257303, - "grad_norm": 0.46407896280288696, - "learning_rate": 9.869945957623302e-06, - "loss": 0.4058, - "step": 4192 - }, - { - "epoch": 0.27403437683811516, - "grad_norm": 0.474025160074234, - "learning_rate": 9.869866821221889e-06, - "loss": 0.3872, - "step": 4193 - }, - { - "epoch": 0.2740997320436573, - "grad_norm": 0.47441983222961426, - "learning_rate": 9.86978766106845e-06, - "loss": 0.4028, - "step": 4194 - }, - { - "epoch": 0.2741650872491994, - "grad_norm": 0.44902557134628296, - "learning_rate": 9.86970847716337e-06, - "loss": 0.3714, - "step": 4195 - }, - { - "epoch": 0.27423044245474154, - "grad_norm": 0.43087777495384216, - "learning_rate": 9.869629269507034e-06, - "loss": 0.3599, - "step": 4196 - }, - { - "epoch": 0.27429579766028367, - "grad_norm": 0.43613943457603455, - "learning_rate": 9.86955003809983e-06, - "loss": 0.3849, - "step": 4197 - }, - { - "epoch": 0.27436115286582574, - "grad_norm": 0.502096951007843, - "learning_rate": 9.86947078294214e-06, - "loss": 0.422, - "step": 4198 - }, - { - "epoch": 0.27442650807136787, - "grad_norm": 0.44126638770103455, - "learning_rate": 9.869391504034358e-06, - "loss": 0.3816, - "step": 4199 - }, - { - "epoch": 0.27449186327691, - "grad_norm": 0.485454261302948, - "learning_rate": 9.869312201376865e-06, - "loss": 0.4453, - "step": 4200 - }, - { - "epoch": 0.2745572184824521, - "grad_norm": 0.5053019523620605, - "learning_rate": 9.869232874970052e-06, - "loss": 0.5149, - "step": 4201 - }, - { - "epoch": 0.27462257368799425, - "grad_norm": 0.4587983191013336, - "learning_rate": 9.8691535248143e-06, - "loss": 0.4059, - "step": 4202 - }, - { - "epoch": 0.2746879288935364, - "grad_norm": 0.5347425937652588, - "learning_rate": 9.869074150910001e-06, - "loss": 0.4822, - "step": 4203 - }, - { - "epoch": 0.2747532840990785, - "grad_norm": 0.4826345443725586, - "learning_rate": 9.86899475325754e-06, - "loss": 0.4088, - "step": 4204 - }, - { - "epoch": 0.27481863930462064, - "grad_norm": 0.48696738481521606, - "learning_rate": 9.868915331857304e-06, - "loss": 0.4142, - "step": 4205 - }, - { - "epoch": 0.2748839945101627, - "grad_norm": 0.48205018043518066, - "learning_rate": 9.868835886709685e-06, - "loss": 0.4301, - "step": 4206 - }, - { - "epoch": 0.27494934971570484, - "grad_norm": 0.5135433077812195, - "learning_rate": 9.868756417815062e-06, - "loss": 0.4694, - "step": 4207 - }, - { - "epoch": 0.27501470492124697, - "grad_norm": 0.49597039818763733, - "learning_rate": 9.86867692517383e-06, - "loss": 0.4468, - "step": 4208 - }, - { - "epoch": 0.2750800601267891, - "grad_norm": 0.46526581048965454, - "learning_rate": 9.868597408786373e-06, - "loss": 0.3739, - "step": 4209 - }, - { - "epoch": 0.2751454153323312, - "grad_norm": 0.5034768581390381, - "learning_rate": 9.86851786865308e-06, - "loss": 0.4577, - "step": 4210 - }, - { - "epoch": 0.27521077053787335, - "grad_norm": 0.48159655928611755, - "learning_rate": 9.86843830477434e-06, - "loss": 0.4274, - "step": 4211 - }, - { - "epoch": 0.2752761257434155, - "grad_norm": 0.4275132119655609, - "learning_rate": 9.868358717150537e-06, - "loss": 0.3662, - "step": 4212 - }, - { - "epoch": 0.2753414809489576, - "grad_norm": 0.45612967014312744, - "learning_rate": 9.868279105782063e-06, - "loss": 0.4201, - "step": 4213 - }, - { - "epoch": 0.2754068361544997, - "grad_norm": 0.4631737172603607, - "learning_rate": 9.868199470669306e-06, - "loss": 0.3874, - "step": 4214 - }, - { - "epoch": 0.2754721913600418, - "grad_norm": 0.4741147756576538, - "learning_rate": 9.868119811812653e-06, - "loss": 0.4184, - "step": 4215 - }, - { - "epoch": 0.27553754656558394, - "grad_norm": 0.4729333221912384, - "learning_rate": 9.868040129212495e-06, - "loss": 0.4127, - "step": 4216 - }, - { - "epoch": 0.27560290177112606, - "grad_norm": 0.441802978515625, - "learning_rate": 9.867960422869217e-06, - "loss": 0.3622, - "step": 4217 - }, - { - "epoch": 0.2756682569766682, - "grad_norm": 0.47777706384658813, - "learning_rate": 9.867880692783209e-06, - "loss": 0.4121, - "step": 4218 - }, - { - "epoch": 0.2757336121822103, - "grad_norm": 0.483521044254303, - "learning_rate": 9.867800938954862e-06, - "loss": 0.4265, - "step": 4219 - }, - { - "epoch": 0.27579896738775245, - "grad_norm": 0.48338282108306885, - "learning_rate": 9.867721161384564e-06, - "loss": 0.4245, - "step": 4220 - }, - { - "epoch": 0.2758643225932946, - "grad_norm": 0.518380880355835, - "learning_rate": 9.867641360072702e-06, - "loss": 0.4834, - "step": 4221 - }, - { - "epoch": 0.2759296777988367, - "grad_norm": 0.4838882386684418, - "learning_rate": 9.867561535019667e-06, - "loss": 0.424, - "step": 4222 - }, - { - "epoch": 0.2759950330043788, - "grad_norm": 0.4742378890514374, - "learning_rate": 9.867481686225848e-06, - "loss": 0.4034, - "step": 4223 - }, - { - "epoch": 0.2760603882099209, - "grad_norm": 0.45290738344192505, - "learning_rate": 9.867401813691636e-06, - "loss": 0.3871, - "step": 4224 - }, - { - "epoch": 0.27612574341546303, - "grad_norm": 0.463894784450531, - "learning_rate": 9.867321917417418e-06, - "loss": 0.4015, - "step": 4225 - }, - { - "epoch": 0.27619109862100516, - "grad_norm": 0.660118043422699, - "learning_rate": 9.867241997403586e-06, - "loss": 0.3977, - "step": 4226 - }, - { - "epoch": 0.2762564538265473, - "grad_norm": 0.5125059485435486, - "learning_rate": 9.867162053650525e-06, - "loss": 0.4303, - "step": 4227 - }, - { - "epoch": 0.2763218090320894, - "grad_norm": 0.4675084948539734, - "learning_rate": 9.867082086158633e-06, - "loss": 0.3835, - "step": 4228 - }, - { - "epoch": 0.27638716423763154, - "grad_norm": 0.4916883111000061, - "learning_rate": 9.867002094928293e-06, - "loss": 0.4358, - "step": 4229 - }, - { - "epoch": 0.27645251944317367, - "grad_norm": 0.45190033316612244, - "learning_rate": 9.866922079959897e-06, - "loss": 0.3938, - "step": 4230 - }, - { - "epoch": 0.27651787464871574, - "grad_norm": 0.44110557436943054, - "learning_rate": 9.866842041253838e-06, - "loss": 0.3494, - "step": 4231 - }, - { - "epoch": 0.2765832298542579, - "grad_norm": 0.45761820673942566, - "learning_rate": 9.866761978810505e-06, - "loss": 0.3583, - "step": 4232 - }, - { - "epoch": 0.2766485850598, - "grad_norm": 0.5181942582130432, - "learning_rate": 9.866681892630286e-06, - "loss": 0.4689, - "step": 4233 - }, - { - "epoch": 0.27671394026534213, - "grad_norm": 0.4559297263622284, - "learning_rate": 9.866601782713572e-06, - "loss": 0.3863, - "step": 4234 - }, - { - "epoch": 0.27677929547088426, - "grad_norm": 0.4838227927684784, - "learning_rate": 9.866521649060758e-06, - "loss": 0.4521, - "step": 4235 - }, - { - "epoch": 0.2768446506764264, - "grad_norm": 0.5002492666244507, - "learning_rate": 9.86644149167223e-06, - "loss": 0.4604, - "step": 4236 - }, - { - "epoch": 0.2769100058819685, - "grad_norm": 0.4975980818271637, - "learning_rate": 9.866361310548383e-06, - "loss": 0.4169, - "step": 4237 - }, - { - "epoch": 0.27697536108751064, - "grad_norm": 0.4638250470161438, - "learning_rate": 9.866281105689605e-06, - "loss": 0.3938, - "step": 4238 - }, - { - "epoch": 0.27704071629305277, - "grad_norm": 0.491578608751297, - "learning_rate": 9.866200877096288e-06, - "loss": 0.4545, - "step": 4239 - }, - { - "epoch": 0.27710607149859484, - "grad_norm": 0.4563657343387604, - "learning_rate": 9.866120624768822e-06, - "loss": 0.3852, - "step": 4240 - }, - { - "epoch": 0.27717142670413697, - "grad_norm": 0.4132401645183563, - "learning_rate": 9.866040348707602e-06, - "loss": 0.327, - "step": 4241 - }, - { - "epoch": 0.2772367819096791, - "grad_norm": 0.5281173586845398, - "learning_rate": 9.865960048913018e-06, - "loss": 0.4479, - "step": 4242 - }, - { - "epoch": 0.2773021371152212, - "grad_norm": 0.43360963463783264, - "learning_rate": 9.86587972538546e-06, - "loss": 0.3497, - "step": 4243 - }, - { - "epoch": 0.27736749232076335, - "grad_norm": 0.49240660667419434, - "learning_rate": 9.86579937812532e-06, - "loss": 0.4213, - "step": 4244 - }, - { - "epoch": 0.2774328475263055, - "grad_norm": 0.49276968836784363, - "learning_rate": 9.865719007132993e-06, - "loss": 0.439, - "step": 4245 - }, - { - "epoch": 0.2774982027318476, - "grad_norm": 0.461213618516922, - "learning_rate": 9.865638612408868e-06, - "loss": 0.4273, - "step": 4246 - }, - { - "epoch": 0.27756355793738974, - "grad_norm": 0.4940822422504425, - "learning_rate": 9.865558193953336e-06, - "loss": 0.4213, - "step": 4247 - }, - { - "epoch": 0.2776289131429318, - "grad_norm": 0.46520429849624634, - "learning_rate": 9.865477751766792e-06, - "loss": 0.423, - "step": 4248 - }, - { - "epoch": 0.27769426834847394, - "grad_norm": 0.4513480067253113, - "learning_rate": 9.865397285849629e-06, - "loss": 0.3945, - "step": 4249 - }, - { - "epoch": 0.27775962355401607, - "grad_norm": 0.4619412422180176, - "learning_rate": 9.865316796202236e-06, - "loss": 0.3979, - "step": 4250 - }, - { - "epoch": 0.2778249787595582, - "grad_norm": 0.4449423849582672, - "learning_rate": 9.865236282825008e-06, - "loss": 0.3931, - "step": 4251 - }, - { - "epoch": 0.2778903339651003, - "grad_norm": 0.4934066832065582, - "learning_rate": 9.865155745718337e-06, - "loss": 0.4538, - "step": 4252 - }, - { - "epoch": 0.27795568917064245, - "grad_norm": 0.4296741783618927, - "learning_rate": 9.865075184882618e-06, - "loss": 0.3764, - "step": 4253 - }, - { - "epoch": 0.2780210443761846, - "grad_norm": 0.4742310345172882, - "learning_rate": 9.86499460031824e-06, - "loss": 0.4072, - "step": 4254 - }, - { - "epoch": 0.2780863995817267, - "grad_norm": 0.48595312237739563, - "learning_rate": 9.864913992025597e-06, - "loss": 0.4098, - "step": 4255 - }, - { - "epoch": 0.2781517547872688, - "grad_norm": 0.4828436076641083, - "learning_rate": 9.864833360005085e-06, - "loss": 0.4151, - "step": 4256 - }, - { - "epoch": 0.2782171099928109, - "grad_norm": 0.471900075674057, - "learning_rate": 9.864752704257095e-06, - "loss": 0.384, - "step": 4257 - }, - { - "epoch": 0.27828246519835304, - "grad_norm": 0.4691624939441681, - "learning_rate": 9.86467202478202e-06, - "loss": 0.3715, - "step": 4258 - }, - { - "epoch": 0.27834782040389516, - "grad_norm": 0.4548409581184387, - "learning_rate": 9.864591321580255e-06, - "loss": 0.4181, - "step": 4259 - }, - { - "epoch": 0.2784131756094373, - "grad_norm": 0.47972768545150757, - "learning_rate": 9.864510594652194e-06, - "loss": 0.4132, - "step": 4260 - }, - { - "epoch": 0.2784785308149794, - "grad_norm": 0.48893147706985474, - "learning_rate": 9.864429843998227e-06, - "loss": 0.4143, - "step": 4261 - }, - { - "epoch": 0.27854388602052155, - "grad_norm": 0.48815682530403137, - "learning_rate": 9.864349069618753e-06, - "loss": 0.416, - "step": 4262 - }, - { - "epoch": 0.2786092412260637, - "grad_norm": 0.5057495832443237, - "learning_rate": 9.864268271514162e-06, - "loss": 0.4514, - "step": 4263 - }, - { - "epoch": 0.2786745964316058, - "grad_norm": 0.5050725936889648, - "learning_rate": 9.864187449684849e-06, - "loss": 0.4184, - "step": 4264 - }, - { - "epoch": 0.2787399516371479, - "grad_norm": 0.4830845892429352, - "learning_rate": 9.864106604131209e-06, - "loss": 0.4709, - "step": 4265 - }, - { - "epoch": 0.27880530684269, - "grad_norm": 0.4551175534725189, - "learning_rate": 9.864025734853636e-06, - "loss": 0.3539, - "step": 4266 - }, - { - "epoch": 0.27887066204823213, - "grad_norm": 0.48389312624931335, - "learning_rate": 9.863944841852523e-06, - "loss": 0.4114, - "step": 4267 - }, - { - "epoch": 0.27893601725377426, - "grad_norm": 0.49458593130111694, - "learning_rate": 9.86386392512827e-06, - "loss": 0.4188, - "step": 4268 - }, - { - "epoch": 0.2790013724593164, - "grad_norm": 0.45328977704048157, - "learning_rate": 9.863782984681266e-06, - "loss": 0.4042, - "step": 4269 - }, - { - "epoch": 0.2790667276648585, - "grad_norm": 0.4529956877231598, - "learning_rate": 9.863702020511905e-06, - "loss": 0.4071, - "step": 4270 - }, - { - "epoch": 0.27913208287040064, - "grad_norm": 0.48014965653419495, - "learning_rate": 9.863621032620588e-06, - "loss": 0.406, - "step": 4271 - }, - { - "epoch": 0.27919743807594277, - "grad_norm": 0.5194218158721924, - "learning_rate": 9.863540021007702e-06, - "loss": 0.4139, - "step": 4272 - }, - { - "epoch": 0.27926279328148484, - "grad_norm": 0.4963468909263611, - "learning_rate": 9.86345898567365e-06, - "loss": 0.3846, - "step": 4273 - }, - { - "epoch": 0.279328148487027, - "grad_norm": 0.4484500586986542, - "learning_rate": 9.863377926618823e-06, - "loss": 0.3742, - "step": 4274 - }, - { - "epoch": 0.2793935036925691, - "grad_norm": 0.4989372193813324, - "learning_rate": 9.863296843843616e-06, - "loss": 0.4348, - "step": 4275 - }, - { - "epoch": 0.27945885889811123, - "grad_norm": 0.5235374569892883, - "learning_rate": 9.863215737348425e-06, - "loss": 0.4709, - "step": 4276 - }, - { - "epoch": 0.27952421410365336, - "grad_norm": 0.4583902657032013, - "learning_rate": 9.863134607133647e-06, - "loss": 0.368, - "step": 4277 - }, - { - "epoch": 0.2795895693091955, - "grad_norm": 0.4683593213558197, - "learning_rate": 9.863053453199676e-06, - "loss": 0.4128, - "step": 4278 - }, - { - "epoch": 0.2796549245147376, - "grad_norm": 0.49003276228904724, - "learning_rate": 9.862972275546911e-06, - "loss": 0.3988, - "step": 4279 - }, - { - "epoch": 0.27972027972027974, - "grad_norm": 0.4580129384994507, - "learning_rate": 9.862891074175743e-06, - "loss": 0.3855, - "step": 4280 - }, - { - "epoch": 0.27978563492582187, - "grad_norm": 0.4886868894100189, - "learning_rate": 9.862809849086571e-06, - "loss": 0.4067, - "step": 4281 - }, - { - "epoch": 0.27985099013136394, - "grad_norm": 0.47899141907691956, - "learning_rate": 9.862728600279791e-06, - "loss": 0.4376, - "step": 4282 - }, - { - "epoch": 0.27991634533690607, - "grad_norm": 0.5096350908279419, - "learning_rate": 9.8626473277558e-06, - "loss": 0.3962, - "step": 4283 - }, - { - "epoch": 0.2799817005424482, - "grad_norm": 0.48719924688339233, - "learning_rate": 9.862566031514992e-06, - "loss": 0.3973, - "step": 4284 - }, - { - "epoch": 0.2800470557479903, - "grad_norm": 0.4794047176837921, - "learning_rate": 9.862484711557765e-06, - "loss": 0.4489, - "step": 4285 - }, - { - "epoch": 0.28011241095353245, - "grad_norm": 0.44751083850860596, - "learning_rate": 9.862403367884517e-06, - "loss": 0.3554, - "step": 4286 - }, - { - "epoch": 0.2801777661590746, - "grad_norm": 0.4844992160797119, - "learning_rate": 9.862322000495642e-06, - "loss": 0.4344, - "step": 4287 - }, - { - "epoch": 0.2802431213646167, - "grad_norm": 0.5009134411811829, - "learning_rate": 9.862240609391538e-06, - "loss": 0.4331, - "step": 4288 - }, - { - "epoch": 0.28030847657015884, - "grad_norm": 0.49703457951545715, - "learning_rate": 9.862159194572602e-06, - "loss": 0.4175, - "step": 4289 - }, - { - "epoch": 0.2803738317757009, - "grad_norm": 0.4641529619693756, - "learning_rate": 9.862077756039232e-06, - "loss": 0.3843, - "step": 4290 - }, - { - "epoch": 0.28043918698124304, - "grad_norm": 0.490360289812088, - "learning_rate": 9.861996293791825e-06, - "loss": 0.4238, - "step": 4291 - }, - { - "epoch": 0.28050454218678517, - "grad_norm": 0.44240522384643555, - "learning_rate": 9.861914807830776e-06, - "loss": 0.3901, - "step": 4292 - }, - { - "epoch": 0.2805698973923273, - "grad_norm": 0.4521576166152954, - "learning_rate": 9.861833298156485e-06, - "loss": 0.3576, - "step": 4293 - }, - { - "epoch": 0.2806352525978694, - "grad_norm": 0.45959368348121643, - "learning_rate": 9.86175176476935e-06, - "loss": 0.3847, - "step": 4294 - }, - { - "epoch": 0.28070060780341155, - "grad_norm": 0.506125271320343, - "learning_rate": 9.861670207669765e-06, - "loss": 0.4687, - "step": 4295 - }, - { - "epoch": 0.2807659630089537, - "grad_norm": 0.47286534309387207, - "learning_rate": 9.861588626858131e-06, - "loss": 0.3955, - "step": 4296 - }, - { - "epoch": 0.2808313182144958, - "grad_norm": 0.4786333441734314, - "learning_rate": 9.861507022334845e-06, - "loss": 0.397, - "step": 4297 - }, - { - "epoch": 0.2808966734200379, - "grad_norm": 0.47633546590805054, - "learning_rate": 9.861425394100305e-06, - "loss": 0.3835, - "step": 4298 - }, - { - "epoch": 0.28096202862558, - "grad_norm": 0.4873596131801605, - "learning_rate": 9.86134374215491e-06, - "loss": 0.4445, - "step": 4299 - }, - { - "epoch": 0.28102738383112214, - "grad_norm": 0.45364174246788025, - "learning_rate": 9.861262066499058e-06, - "loss": 0.3648, - "step": 4300 - }, - { - "epoch": 0.28109273903666426, - "grad_norm": 0.5587484836578369, - "learning_rate": 9.861180367133144e-06, - "loss": 0.4146, - "step": 4301 - }, - { - "epoch": 0.2811580942422064, - "grad_norm": 0.47603994607925415, - "learning_rate": 9.861098644057572e-06, - "loss": 0.3902, - "step": 4302 - }, - { - "epoch": 0.2812234494477485, - "grad_norm": 0.5273681879043579, - "learning_rate": 9.861016897272738e-06, - "loss": 0.4315, - "step": 4303 - }, - { - "epoch": 0.28128880465329065, - "grad_norm": 0.4664003252983093, - "learning_rate": 9.86093512677904e-06, - "loss": 0.3831, - "step": 4304 - }, - { - "epoch": 0.2813541598588328, - "grad_norm": 0.4565950930118561, - "learning_rate": 9.860853332576876e-06, - "loss": 0.4142, - "step": 4305 - }, - { - "epoch": 0.2814195150643749, - "grad_norm": 0.5035836696624756, - "learning_rate": 9.860771514666646e-06, - "loss": 0.4552, - "step": 4306 - }, - { - "epoch": 0.281484870269917, - "grad_norm": 0.527451753616333, - "learning_rate": 9.860689673048751e-06, - "loss": 0.4876, - "step": 4307 - }, - { - "epoch": 0.2815502254754591, - "grad_norm": 0.4922797679901123, - "learning_rate": 9.860607807723587e-06, - "loss": 0.4235, - "step": 4308 - }, - { - "epoch": 0.28161558068100123, - "grad_norm": 0.42618000507354736, - "learning_rate": 9.860525918691557e-06, - "loss": 0.3471, - "step": 4309 - }, - { - "epoch": 0.28168093588654336, - "grad_norm": 0.5181910991668701, - "learning_rate": 9.860444005953058e-06, - "loss": 0.4926, - "step": 4310 - }, - { - "epoch": 0.2817462910920855, - "grad_norm": 0.5081905722618103, - "learning_rate": 9.860362069508488e-06, - "loss": 0.4827, - "step": 4311 - }, - { - "epoch": 0.2818116462976276, - "grad_norm": 0.49703356623649597, - "learning_rate": 9.860280109358248e-06, - "loss": 0.4451, - "step": 4312 - }, - { - "epoch": 0.28187700150316974, - "grad_norm": 0.4495966136455536, - "learning_rate": 9.86019812550274e-06, - "loss": 0.3759, - "step": 4313 - }, - { - "epoch": 0.28194235670871187, - "grad_norm": 0.5054322481155396, - "learning_rate": 9.860116117942363e-06, - "loss": 0.3962, - "step": 4314 - }, - { - "epoch": 0.28200771191425394, - "grad_norm": 0.5199347138404846, - "learning_rate": 9.860034086677515e-06, - "loss": 0.4643, - "step": 4315 - }, - { - "epoch": 0.2820730671197961, - "grad_norm": 0.45203331112861633, - "learning_rate": 9.859952031708595e-06, - "loss": 0.3936, - "step": 4316 - }, - { - "epoch": 0.2821384223253382, - "grad_norm": 0.4817637503147125, - "learning_rate": 9.859869953036007e-06, - "loss": 0.4314, - "step": 4317 - }, - { - "epoch": 0.28220377753088033, - "grad_norm": 0.4901078939437866, - "learning_rate": 9.85978785066015e-06, - "loss": 0.4126, - "step": 4318 - }, - { - "epoch": 0.28226913273642246, - "grad_norm": 0.46651607751846313, - "learning_rate": 9.859705724581423e-06, - "loss": 0.3995, - "step": 4319 - }, - { - "epoch": 0.2823344879419646, - "grad_norm": 0.4911923110485077, - "learning_rate": 9.859623574800228e-06, - "loss": 0.4519, - "step": 4320 - }, - { - "epoch": 0.2823998431475067, - "grad_norm": 0.46471184492111206, - "learning_rate": 9.859541401316965e-06, - "loss": 0.3877, - "step": 4321 - }, - { - "epoch": 0.28246519835304884, - "grad_norm": 0.4655369222164154, - "learning_rate": 9.859459204132037e-06, - "loss": 0.3872, - "step": 4322 - }, - { - "epoch": 0.28253055355859097, - "grad_norm": 0.48006582260131836, - "learning_rate": 9.85937698324584e-06, - "loss": 0.4473, - "step": 4323 - }, - { - "epoch": 0.28259590876413304, - "grad_norm": 0.48849478363990784, - "learning_rate": 9.85929473865878e-06, - "loss": 0.4537, - "step": 4324 - }, - { - "epoch": 0.28266126396967517, - "grad_norm": 0.4851961135864258, - "learning_rate": 9.859212470371256e-06, - "loss": 0.4223, - "step": 4325 - }, - { - "epoch": 0.2827266191752173, - "grad_norm": 0.44070538878440857, - "learning_rate": 9.859130178383669e-06, - "loss": 0.3319, - "step": 4326 - }, - { - "epoch": 0.2827919743807594, - "grad_norm": 0.45314934849739075, - "learning_rate": 9.859047862696421e-06, - "loss": 0.3728, - "step": 4327 - }, - { - "epoch": 0.28285732958630155, - "grad_norm": 0.45467445254325867, - "learning_rate": 9.858965523309914e-06, - "loss": 0.3492, - "step": 4328 - }, - { - "epoch": 0.2829226847918437, - "grad_norm": 0.4738505780696869, - "learning_rate": 9.858883160224547e-06, - "loss": 0.4278, - "step": 4329 - }, - { - "epoch": 0.2829880399973858, - "grad_norm": 0.49519234895706177, - "learning_rate": 9.858800773440724e-06, - "loss": 0.4361, - "step": 4330 - }, - { - "epoch": 0.28305339520292794, - "grad_norm": 0.48089075088500977, - "learning_rate": 9.858718362958848e-06, - "loss": 0.4303, - "step": 4331 - }, - { - "epoch": 0.28311875040847, - "grad_norm": 0.5376557111740112, - "learning_rate": 9.858635928779318e-06, - "loss": 0.4428, - "step": 4332 - }, - { - "epoch": 0.28318410561401214, - "grad_norm": 0.4594705402851105, - "learning_rate": 9.858553470902536e-06, - "loss": 0.4034, - "step": 4333 - }, - { - "epoch": 0.28324946081955427, - "grad_norm": 0.4774813950061798, - "learning_rate": 9.858470989328907e-06, - "loss": 0.4075, - "step": 4334 - }, - { - "epoch": 0.2833148160250964, - "grad_norm": 0.4887755513191223, - "learning_rate": 9.858388484058834e-06, - "loss": 0.3967, - "step": 4335 - }, - { - "epoch": 0.2833801712306385, - "grad_norm": 0.4676692485809326, - "learning_rate": 9.858305955092715e-06, - "loss": 0.3833, - "step": 4336 - }, - { - "epoch": 0.28344552643618065, - "grad_norm": 0.4158990681171417, - "learning_rate": 9.858223402430955e-06, - "loss": 0.3218, - "step": 4337 - }, - { - "epoch": 0.2835108816417228, - "grad_norm": 0.4769608974456787, - "learning_rate": 9.858140826073956e-06, - "loss": 0.4069, - "step": 4338 - }, - { - "epoch": 0.2835762368472649, - "grad_norm": 0.4703862965106964, - "learning_rate": 9.85805822602212e-06, - "loss": 0.4004, - "step": 4339 - }, - { - "epoch": 0.283641592052807, - "grad_norm": 0.4532063603401184, - "learning_rate": 9.857975602275853e-06, - "loss": 0.3712, - "step": 4340 - }, - { - "epoch": 0.2837069472583491, - "grad_norm": 0.5027262568473816, - "learning_rate": 9.857892954835558e-06, - "loss": 0.4135, - "step": 4341 - }, - { - "epoch": 0.28377230246389124, - "grad_norm": 0.4493769109249115, - "learning_rate": 9.857810283701632e-06, - "loss": 0.3551, - "step": 4342 - }, - { - "epoch": 0.28383765766943336, - "grad_norm": 0.4735272228717804, - "learning_rate": 9.857727588874484e-06, - "loss": 0.43, - "step": 4343 - }, - { - "epoch": 0.2839030128749755, - "grad_norm": 0.4992130696773529, - "learning_rate": 9.857644870354516e-06, - "loss": 0.4497, - "step": 4344 - }, - { - "epoch": 0.2839683680805176, - "grad_norm": 0.5045772194862366, - "learning_rate": 9.85756212814213e-06, - "loss": 0.4368, - "step": 4345 - }, - { - "epoch": 0.28403372328605975, - "grad_norm": 0.4504072666168213, - "learning_rate": 9.857479362237732e-06, - "loss": 0.3682, - "step": 4346 - }, - { - "epoch": 0.2840990784916019, - "grad_norm": 0.44460558891296387, - "learning_rate": 9.857396572641724e-06, - "loss": 0.3883, - "step": 4347 - }, - { - "epoch": 0.284164433697144, - "grad_norm": 0.4436810612678528, - "learning_rate": 9.85731375935451e-06, - "loss": 0.3719, - "step": 4348 - }, - { - "epoch": 0.2842297889026861, - "grad_norm": 0.47199153900146484, - "learning_rate": 9.857230922376496e-06, - "loss": 0.4315, - "step": 4349 - }, - { - "epoch": 0.2842951441082282, - "grad_norm": 0.4648796319961548, - "learning_rate": 9.857148061708082e-06, - "loss": 0.4245, - "step": 4350 - }, - { - "epoch": 0.28436049931377033, - "grad_norm": 0.5298538208007812, - "learning_rate": 9.857065177349673e-06, - "loss": 0.5075, - "step": 4351 - }, - { - "epoch": 0.28442585451931246, - "grad_norm": 0.47171550989151, - "learning_rate": 9.856982269301676e-06, - "loss": 0.409, - "step": 4352 - }, - { - "epoch": 0.2844912097248546, - "grad_norm": 0.4968525171279907, - "learning_rate": 9.856899337564494e-06, - "loss": 0.4702, - "step": 4353 - }, - { - "epoch": 0.2845565649303967, - "grad_norm": 0.4832991361618042, - "learning_rate": 9.85681638213853e-06, - "loss": 0.3866, - "step": 4354 - }, - { - "epoch": 0.28462192013593884, - "grad_norm": 0.47334814071655273, - "learning_rate": 9.856733403024192e-06, - "loss": 0.4389, - "step": 4355 - }, - { - "epoch": 0.28468727534148097, - "grad_norm": 0.4745638072490692, - "learning_rate": 9.856650400221882e-06, - "loss": 0.4068, - "step": 4356 - }, - { - "epoch": 0.28475263054702304, - "grad_norm": 0.4593832790851593, - "learning_rate": 9.856567373732005e-06, - "loss": 0.404, - "step": 4357 - }, - { - "epoch": 0.2848179857525652, - "grad_norm": 0.4992309510707855, - "learning_rate": 9.856484323554967e-06, - "loss": 0.4387, - "step": 4358 - }, - { - "epoch": 0.2848833409581073, - "grad_norm": 0.46862438321113586, - "learning_rate": 9.856401249691171e-06, - "loss": 0.4036, - "step": 4359 - }, - { - "epoch": 0.28494869616364943, - "grad_norm": 0.5406008362770081, - "learning_rate": 9.856318152141026e-06, - "loss": 0.4794, - "step": 4360 - }, - { - "epoch": 0.28501405136919156, - "grad_norm": 0.45046672224998474, - "learning_rate": 9.856235030904934e-06, - "loss": 0.3533, - "step": 4361 - }, - { - "epoch": 0.2850794065747337, - "grad_norm": 0.5082617998123169, - "learning_rate": 9.8561518859833e-06, - "loss": 0.4268, - "step": 4362 - }, - { - "epoch": 0.2851447617802758, - "grad_norm": 0.4527800977230072, - "learning_rate": 9.856068717376533e-06, - "loss": 0.3765, - "step": 4363 - }, - { - "epoch": 0.28521011698581794, - "grad_norm": 0.45166146755218506, - "learning_rate": 9.855985525085035e-06, - "loss": 0.3369, - "step": 4364 - }, - { - "epoch": 0.28527547219136007, - "grad_norm": 0.508328914642334, - "learning_rate": 9.855902309109214e-06, - "loss": 0.4233, - "step": 4365 - }, - { - "epoch": 0.28534082739690214, - "grad_norm": 0.47016677260398865, - "learning_rate": 9.855819069449475e-06, - "loss": 0.3705, - "step": 4366 - }, - { - "epoch": 0.28540618260244427, - "grad_norm": 0.4435829818248749, - "learning_rate": 9.855735806106226e-06, - "loss": 0.3818, - "step": 4367 - }, - { - "epoch": 0.2854715378079864, - "grad_norm": 0.4735620319843292, - "learning_rate": 9.855652519079867e-06, - "loss": 0.4362, - "step": 4368 - }, - { - "epoch": 0.2855368930135285, - "grad_norm": 0.4764656722545624, - "learning_rate": 9.855569208370813e-06, - "loss": 0.3775, - "step": 4369 - }, - { - "epoch": 0.28560224821907065, - "grad_norm": 0.4751443862915039, - "learning_rate": 9.855485873979464e-06, - "loss": 0.3796, - "step": 4370 - }, - { - "epoch": 0.2856676034246128, - "grad_norm": 0.5028964281082153, - "learning_rate": 9.855402515906229e-06, - "loss": 0.4524, - "step": 4371 - }, - { - "epoch": 0.2857329586301549, - "grad_norm": 0.4496062099933624, - "learning_rate": 9.855319134151514e-06, - "loss": 0.3677, - "step": 4372 - }, - { - "epoch": 0.28579831383569704, - "grad_norm": 0.47241446375846863, - "learning_rate": 9.855235728715723e-06, - "loss": 0.4358, - "step": 4373 - }, - { - "epoch": 0.2858636690412391, - "grad_norm": 0.44672802090644836, - "learning_rate": 9.855152299599267e-06, - "loss": 0.4042, - "step": 4374 - }, - { - "epoch": 0.28592902424678124, - "grad_norm": 0.6679685115814209, - "learning_rate": 9.855068846802552e-06, - "loss": 0.3636, - "step": 4375 - }, - { - "epoch": 0.28599437945232337, - "grad_norm": 0.4665829539299011, - "learning_rate": 9.854985370325983e-06, - "loss": 0.4069, - "step": 4376 - }, - { - "epoch": 0.2860597346578655, - "grad_norm": 0.5016047358512878, - "learning_rate": 9.854901870169968e-06, - "loss": 0.4619, - "step": 4377 - }, - { - "epoch": 0.2861250898634076, - "grad_norm": 0.536938488483429, - "learning_rate": 9.854818346334916e-06, - "loss": 0.4359, - "step": 4378 - }, - { - "epoch": 0.28619044506894975, - "grad_norm": 0.4891755282878876, - "learning_rate": 9.854734798821233e-06, - "loss": 0.3784, - "step": 4379 - }, - { - "epoch": 0.2862558002744919, - "grad_norm": 0.44296059012413025, - "learning_rate": 9.854651227629325e-06, - "loss": 0.3682, - "step": 4380 - }, - { - "epoch": 0.286321155480034, - "grad_norm": 0.5143328905105591, - "learning_rate": 9.854567632759604e-06, - "loss": 0.4594, - "step": 4381 - }, - { - "epoch": 0.2863865106855761, - "grad_norm": 0.48886820673942566, - "learning_rate": 9.854484014212472e-06, - "loss": 0.4223, - "step": 4382 - }, - { - "epoch": 0.2864518658911182, - "grad_norm": 0.49212801456451416, - "learning_rate": 9.854400371988342e-06, - "loss": 0.4465, - "step": 4383 - }, - { - "epoch": 0.28651722109666034, - "grad_norm": 0.49502620100975037, - "learning_rate": 9.854316706087619e-06, - "loss": 0.4315, - "step": 4384 - }, - { - "epoch": 0.28658257630220246, - "grad_norm": 0.5504209995269775, - "learning_rate": 9.854233016510712e-06, - "loss": 0.5186, - "step": 4385 - }, - { - "epoch": 0.2866479315077446, - "grad_norm": 0.49391207098960876, - "learning_rate": 9.854149303258027e-06, - "loss": 0.434, - "step": 4386 - }, - { - "epoch": 0.2867132867132867, - "grad_norm": 0.4450891613960266, - "learning_rate": 9.854065566329976e-06, - "loss": 0.3643, - "step": 4387 - }, - { - "epoch": 0.28677864191882885, - "grad_norm": 0.4998491406440735, - "learning_rate": 9.853981805726966e-06, - "loss": 0.443, - "step": 4388 - }, - { - "epoch": 0.286843997124371, - "grad_norm": 0.42358624935150146, - "learning_rate": 9.853898021449404e-06, - "loss": 0.3353, - "step": 4389 - }, - { - "epoch": 0.2869093523299131, - "grad_norm": 0.47219541668891907, - "learning_rate": 9.853814213497699e-06, - "loss": 0.4451, - "step": 4390 - }, - { - "epoch": 0.2869747075354552, - "grad_norm": 0.46333885192871094, - "learning_rate": 9.853730381872262e-06, - "loss": 0.4005, - "step": 4391 - }, - { - "epoch": 0.2870400627409973, - "grad_norm": 0.4804172217845917, - "learning_rate": 9.853646526573501e-06, - "loss": 0.3934, - "step": 4392 - }, - { - "epoch": 0.28710541794653943, - "grad_norm": 0.5372201800346375, - "learning_rate": 9.853562647601823e-06, - "loss": 0.5348, - "step": 4393 - }, - { - "epoch": 0.28717077315208156, - "grad_norm": 0.4997507631778717, - "learning_rate": 9.853478744957638e-06, - "loss": 0.4746, - "step": 4394 - }, - { - "epoch": 0.2872361283576237, - "grad_norm": 0.5150099992752075, - "learning_rate": 9.853394818641358e-06, - "loss": 0.4844, - "step": 4395 - }, - { - "epoch": 0.2873014835631658, - "grad_norm": 0.4945598840713501, - "learning_rate": 9.853310868653389e-06, - "loss": 0.4405, - "step": 4396 - }, - { - "epoch": 0.28736683876870794, - "grad_norm": 0.45849746465682983, - "learning_rate": 9.853226894994142e-06, - "loss": 0.3829, - "step": 4397 - }, - { - "epoch": 0.28743219397425007, - "grad_norm": 0.48079851269721985, - "learning_rate": 9.853142897664024e-06, - "loss": 0.4288, - "step": 4398 - }, - { - "epoch": 0.28749754917979214, - "grad_norm": 0.4650821089744568, - "learning_rate": 9.853058876663448e-06, - "loss": 0.371, - "step": 4399 - }, - { - "epoch": 0.2875629043853343, - "grad_norm": 0.49603012204170227, - "learning_rate": 9.852974831992823e-06, - "loss": 0.4216, - "step": 4400 - }, - { - "epoch": 0.2876282595908764, - "grad_norm": 0.45978420972824097, - "learning_rate": 9.852890763652558e-06, - "loss": 0.4004, - "step": 4401 - }, - { - "epoch": 0.28769361479641853, - "grad_norm": 0.41766291856765747, - "learning_rate": 9.852806671643064e-06, - "loss": 0.3755, - "step": 4402 - }, - { - "epoch": 0.28775897000196066, - "grad_norm": 0.4829985201358795, - "learning_rate": 9.85272255596475e-06, - "loss": 0.379, - "step": 4403 - }, - { - "epoch": 0.2878243252075028, - "grad_norm": 0.44976744055747986, - "learning_rate": 9.852638416618029e-06, - "loss": 0.392, - "step": 4404 - }, - { - "epoch": 0.2878896804130449, - "grad_norm": 0.49071407318115234, - "learning_rate": 9.852554253603308e-06, - "loss": 0.5037, - "step": 4405 - }, - { - "epoch": 0.28795503561858704, - "grad_norm": 0.522971510887146, - "learning_rate": 9.852470066920999e-06, - "loss": 0.4007, - "step": 4406 - }, - { - "epoch": 0.28802039082412917, - "grad_norm": 0.4969806969165802, - "learning_rate": 9.852385856571512e-06, - "loss": 0.4441, - "step": 4407 - }, - { - "epoch": 0.28808574602967124, - "grad_norm": 0.44576096534729004, - "learning_rate": 9.852301622555259e-06, - "loss": 0.3568, - "step": 4408 - }, - { - "epoch": 0.28815110123521337, - "grad_norm": 0.49539312720298767, - "learning_rate": 9.852217364872649e-06, - "loss": 0.4528, - "step": 4409 - }, - { - "epoch": 0.2882164564407555, - "grad_norm": 0.46742770075798035, - "learning_rate": 9.852133083524093e-06, - "loss": 0.3828, - "step": 4410 - }, - { - "epoch": 0.2882818116462976, - "grad_norm": 0.4757823348045349, - "learning_rate": 9.852048778510006e-06, - "loss": 0.3925, - "step": 4411 - }, - { - "epoch": 0.28834716685183975, - "grad_norm": 0.44201409816741943, - "learning_rate": 9.851964449830794e-06, - "loss": 0.3768, - "step": 4412 - }, - { - "epoch": 0.2884125220573819, - "grad_norm": 0.47092169523239136, - "learning_rate": 9.85188009748687e-06, - "loss": 0.4285, - "step": 4413 - }, - { - "epoch": 0.288477877262924, - "grad_norm": 0.4804210066795349, - "learning_rate": 9.851795721478647e-06, - "loss": 0.4168, - "step": 4414 - }, - { - "epoch": 0.28854323246846614, - "grad_norm": 0.43654242157936096, - "learning_rate": 9.851711321806537e-06, - "loss": 0.3518, - "step": 4415 - }, - { - "epoch": 0.2886085876740082, - "grad_norm": 0.4612194001674652, - "learning_rate": 9.851626898470948e-06, - "loss": 0.3957, - "step": 4416 - }, - { - "epoch": 0.28867394287955034, - "grad_norm": 0.46339964866638184, - "learning_rate": 9.851542451472293e-06, - "loss": 0.4083, - "step": 4417 - }, - { - "epoch": 0.28873929808509247, - "grad_norm": 0.4413035213947296, - "learning_rate": 9.851457980810988e-06, - "loss": 0.3679, - "step": 4418 - }, - { - "epoch": 0.2888046532906346, - "grad_norm": 0.4670655131340027, - "learning_rate": 9.851373486487439e-06, - "loss": 0.3959, - "step": 4419 - }, - { - "epoch": 0.2888700084961767, - "grad_norm": 0.5646473169326782, - "learning_rate": 9.85128896850206e-06, - "loss": 0.4123, - "step": 4420 - }, - { - "epoch": 0.28893536370171885, - "grad_norm": 0.49724411964416504, - "learning_rate": 9.851204426855267e-06, - "loss": 0.4474, - "step": 4421 - }, - { - "epoch": 0.289000718907261, - "grad_norm": 0.5162045955657959, - "learning_rate": 9.851119861547467e-06, - "loss": 0.4092, - "step": 4422 - }, - { - "epoch": 0.2890660741128031, - "grad_norm": 0.47607100009918213, - "learning_rate": 9.851035272579077e-06, - "loss": 0.4045, - "step": 4423 - }, - { - "epoch": 0.2891314293183452, - "grad_norm": 0.4476981461048126, - "learning_rate": 9.850950659950506e-06, - "loss": 0.375, - "step": 4424 - }, - { - "epoch": 0.2891967845238873, - "grad_norm": 0.4492538273334503, - "learning_rate": 9.850866023662168e-06, - "loss": 0.3872, - "step": 4425 - }, - { - "epoch": 0.28926213972942943, - "grad_norm": 0.45858004689216614, - "learning_rate": 9.850781363714477e-06, - "loss": 0.3755, - "step": 4426 - }, - { - "epoch": 0.28932749493497156, - "grad_norm": 0.4616815447807312, - "learning_rate": 9.850696680107844e-06, - "loss": 0.3768, - "step": 4427 - }, - { - "epoch": 0.2893928501405137, - "grad_norm": 0.42143869400024414, - "learning_rate": 9.850611972842682e-06, - "loss": 0.3498, - "step": 4428 - }, - { - "epoch": 0.2894582053460558, - "grad_norm": 0.4461817443370819, - "learning_rate": 9.850527241919405e-06, - "loss": 0.4022, - "step": 4429 - }, - { - "epoch": 0.28952356055159795, - "grad_norm": 0.48425930738449097, - "learning_rate": 9.850442487338427e-06, - "loss": 0.4367, - "step": 4430 - }, - { - "epoch": 0.2895889157571401, - "grad_norm": 0.4955015480518341, - "learning_rate": 9.850357709100163e-06, - "loss": 0.3992, - "step": 4431 - }, - { - "epoch": 0.2896542709626822, - "grad_norm": 0.47167977690696716, - "learning_rate": 9.85027290720502e-06, - "loss": 0.3774, - "step": 4432 - }, - { - "epoch": 0.2897196261682243, - "grad_norm": 0.4849473834037781, - "learning_rate": 9.850188081653419e-06, - "loss": 0.4454, - "step": 4433 - }, - { - "epoch": 0.2897849813737664, - "grad_norm": 0.5112374424934387, - "learning_rate": 9.850103232445769e-06, - "loss": 0.4332, - "step": 4434 - }, - { - "epoch": 0.28985033657930853, - "grad_norm": 0.507053792476654, - "learning_rate": 9.850018359582484e-06, - "loss": 0.3938, - "step": 4435 - }, - { - "epoch": 0.28991569178485066, - "grad_norm": 0.5383029580116272, - "learning_rate": 9.849933463063982e-06, - "loss": 0.4272, - "step": 4436 - }, - { - "epoch": 0.2899810469903928, - "grad_norm": 0.4883407652378082, - "learning_rate": 9.849848542890673e-06, - "loss": 0.4145, - "step": 4437 - }, - { - "epoch": 0.2900464021959349, - "grad_norm": 0.4537391662597656, - "learning_rate": 9.849763599062972e-06, - "loss": 0.3592, - "step": 4438 - }, - { - "epoch": 0.29011175740147704, - "grad_norm": 0.4909239113330841, - "learning_rate": 9.849678631581294e-06, - "loss": 0.398, - "step": 4439 - }, - { - "epoch": 0.29017711260701917, - "grad_norm": 0.4724974036216736, - "learning_rate": 9.849593640446054e-06, - "loss": 0.3663, - "step": 4440 - }, - { - "epoch": 0.29024246781256124, - "grad_norm": 0.5158087015151978, - "learning_rate": 9.849508625657666e-06, - "loss": 0.4682, - "step": 4441 - }, - { - "epoch": 0.2903078230181034, - "grad_norm": 0.4384050667285919, - "learning_rate": 9.849423587216543e-06, - "loss": 0.3911, - "step": 4442 - }, - { - "epoch": 0.2903731782236455, - "grad_norm": 0.4931519627571106, - "learning_rate": 9.849338525123102e-06, - "loss": 0.4265, - "step": 4443 - }, - { - "epoch": 0.29043853342918763, - "grad_norm": 0.4780293405056, - "learning_rate": 9.849253439377757e-06, - "loss": 0.4529, - "step": 4444 - }, - { - "epoch": 0.29050388863472976, - "grad_norm": 0.5410692095756531, - "learning_rate": 9.849168329980922e-06, - "loss": 0.489, - "step": 4445 - }, - { - "epoch": 0.2905692438402719, - "grad_norm": 0.5165309309959412, - "learning_rate": 9.849083196933014e-06, - "loss": 0.3583, - "step": 4446 - }, - { - "epoch": 0.290634599045814, - "grad_norm": 0.49044737219810486, - "learning_rate": 9.848998040234449e-06, - "loss": 0.4111, - "step": 4447 - }, - { - "epoch": 0.29069995425135614, - "grad_norm": 0.457153856754303, - "learning_rate": 9.848912859885638e-06, - "loss": 0.3878, - "step": 4448 - }, - { - "epoch": 0.29076530945689827, - "grad_norm": 0.4699672758579254, - "learning_rate": 9.848827655887002e-06, - "loss": 0.3797, - "step": 4449 - }, - { - "epoch": 0.29083066466244034, - "grad_norm": 0.4563548266887665, - "learning_rate": 9.848742428238951e-06, - "loss": 0.3935, - "step": 4450 - }, - { - "epoch": 0.29089601986798247, - "grad_norm": 0.4894541800022125, - "learning_rate": 9.848657176941905e-06, - "loss": 0.4398, - "step": 4451 - }, - { - "epoch": 0.2909613750735246, - "grad_norm": 0.4865151047706604, - "learning_rate": 9.848571901996278e-06, - "loss": 0.4178, - "step": 4452 - }, - { - "epoch": 0.2910267302790667, - "grad_norm": 0.45140793919563293, - "learning_rate": 9.848486603402484e-06, - "loss": 0.3697, - "step": 4453 - }, - { - "epoch": 0.29109208548460885, - "grad_norm": 0.4754602909088135, - "learning_rate": 9.848401281160943e-06, - "loss": 0.3992, - "step": 4454 - }, - { - "epoch": 0.291157440690151, - "grad_norm": 0.44573870301246643, - "learning_rate": 9.84831593527207e-06, - "loss": 0.4038, - "step": 4455 - }, - { - "epoch": 0.2912227958956931, - "grad_norm": 0.4916362464427948, - "learning_rate": 9.848230565736279e-06, - "loss": 0.4157, - "step": 4456 - }, - { - "epoch": 0.29128815110123524, - "grad_norm": 0.4606829881668091, - "learning_rate": 9.848145172553989e-06, - "loss": 0.4002, - "step": 4457 - }, - { - "epoch": 0.2913535063067773, - "grad_norm": 0.5081844329833984, - "learning_rate": 9.848059755725617e-06, - "loss": 0.4217, - "step": 4458 - }, - { - "epoch": 0.29141886151231944, - "grad_norm": 0.48040980100631714, - "learning_rate": 9.847974315251575e-06, - "loss": 0.4508, - "step": 4459 - }, - { - "epoch": 0.29148421671786157, - "grad_norm": 0.4761035740375519, - "learning_rate": 9.847888851132283e-06, - "loss": 0.3871, - "step": 4460 - }, - { - "epoch": 0.2915495719234037, - "grad_norm": 0.5370839238166809, - "learning_rate": 9.84780336336816e-06, - "loss": 0.4498, - "step": 4461 - }, - { - "epoch": 0.2916149271289458, - "grad_norm": 0.4452376961708069, - "learning_rate": 9.84771785195962e-06, - "loss": 0.4065, - "step": 4462 - }, - { - "epoch": 0.29168028233448795, - "grad_norm": 0.45047760009765625, - "learning_rate": 9.847632316907079e-06, - "loss": 0.3519, - "step": 4463 - }, - { - "epoch": 0.2917456375400301, - "grad_norm": 0.48897784948349, - "learning_rate": 9.847546758210956e-06, - "loss": 0.4454, - "step": 4464 - }, - { - "epoch": 0.2918109927455722, - "grad_norm": 0.49426940083503723, - "learning_rate": 9.847461175871669e-06, - "loss": 0.3911, - "step": 4465 - }, - { - "epoch": 0.2918763479511143, - "grad_norm": 0.4327999949455261, - "learning_rate": 9.847375569889635e-06, - "loss": 0.34, - "step": 4466 - }, - { - "epoch": 0.2919417031566564, - "grad_norm": 0.4581160545349121, - "learning_rate": 9.84728994026527e-06, - "loss": 0.3525, - "step": 4467 - }, - { - "epoch": 0.29200705836219853, - "grad_norm": 0.4722375273704529, - "learning_rate": 9.847204286998993e-06, - "loss": 0.3694, - "step": 4468 - }, - { - "epoch": 0.29207241356774066, - "grad_norm": 0.4604976773262024, - "learning_rate": 9.84711861009122e-06, - "loss": 0.3839, - "step": 4469 - }, - { - "epoch": 0.2921377687732828, - "grad_norm": 0.49679136276245117, - "learning_rate": 9.847032909542373e-06, - "loss": 0.4079, - "step": 4470 - }, - { - "epoch": 0.2922031239788249, - "grad_norm": 0.4918653964996338, - "learning_rate": 9.846947185352865e-06, - "loss": 0.4231, - "step": 4471 - }, - { - "epoch": 0.29226847918436705, - "grad_norm": 0.4945702850818634, - "learning_rate": 9.84686143752312e-06, - "loss": 0.4672, - "step": 4472 - }, - { - "epoch": 0.2923338343899092, - "grad_norm": 0.4729475975036621, - "learning_rate": 9.846775666053549e-06, - "loss": 0.4161, - "step": 4473 - }, - { - "epoch": 0.2923991895954513, - "grad_norm": 0.48046860098838806, - "learning_rate": 9.846689870944574e-06, - "loss": 0.4461, - "step": 4474 - }, - { - "epoch": 0.2924645448009934, - "grad_norm": 0.4358729422092438, - "learning_rate": 9.846604052196616e-06, - "loss": 0.3567, - "step": 4475 - }, - { - "epoch": 0.2925299000065355, - "grad_norm": 0.4827226400375366, - "learning_rate": 9.846518209810089e-06, - "loss": 0.4204, - "step": 4476 - }, - { - "epoch": 0.29259525521207763, - "grad_norm": 0.5996066927909851, - "learning_rate": 9.846432343785415e-06, - "loss": 0.4532, - "step": 4477 - }, - { - "epoch": 0.29266061041761976, - "grad_norm": 0.4725746512413025, - "learning_rate": 9.84634645412301e-06, - "loss": 0.4161, - "step": 4478 - }, - { - "epoch": 0.2927259656231619, - "grad_norm": 0.47878170013427734, - "learning_rate": 9.846260540823296e-06, - "loss": 0.4186, - "step": 4479 - }, - { - "epoch": 0.292791320828704, - "grad_norm": 0.49192699790000916, - "learning_rate": 9.84617460388669e-06, - "loss": 0.4375, - "step": 4480 - }, - { - "epoch": 0.29285667603424614, - "grad_norm": 0.48404741287231445, - "learning_rate": 9.84608864331361e-06, - "loss": 0.4356, - "step": 4481 - }, - { - "epoch": 0.29292203123978827, - "grad_norm": 0.4758375287055969, - "learning_rate": 9.846002659104479e-06, - "loss": 0.4377, - "step": 4482 - }, - { - "epoch": 0.29298738644533034, - "grad_norm": 0.4543372392654419, - "learning_rate": 9.845916651259713e-06, - "loss": 0.4154, - "step": 4483 - }, - { - "epoch": 0.2930527416508725, - "grad_norm": 0.48143458366394043, - "learning_rate": 9.845830619779732e-06, - "loss": 0.4141, - "step": 4484 - }, - { - "epoch": 0.2931180968564146, - "grad_norm": 0.4619728922843933, - "learning_rate": 9.845744564664958e-06, - "loss": 0.3974, - "step": 4485 - }, - { - "epoch": 0.29318345206195673, - "grad_norm": 0.4468969702720642, - "learning_rate": 9.845658485915808e-06, - "loss": 0.3851, - "step": 4486 - }, - { - "epoch": 0.29324880726749886, - "grad_norm": 0.4667202830314636, - "learning_rate": 9.845572383532703e-06, - "loss": 0.3952, - "step": 4487 - }, - { - "epoch": 0.293314162473041, - "grad_norm": 0.49085402488708496, - "learning_rate": 9.845486257516064e-06, - "loss": 0.446, - "step": 4488 - }, - { - "epoch": 0.2933795176785831, - "grad_norm": 0.48247113823890686, - "learning_rate": 9.845400107866307e-06, - "loss": 0.4061, - "step": 4489 - }, - { - "epoch": 0.29344487288412524, - "grad_norm": 0.5165753364562988, - "learning_rate": 9.845313934583858e-06, - "loss": 0.4231, - "step": 4490 - }, - { - "epoch": 0.29351022808966737, - "grad_norm": 0.4774547815322876, - "learning_rate": 9.845227737669134e-06, - "loss": 0.4169, - "step": 4491 - }, - { - "epoch": 0.29357558329520944, - "grad_norm": 0.4724103510379791, - "learning_rate": 9.845141517122554e-06, - "loss": 0.3999, - "step": 4492 - }, - { - "epoch": 0.29364093850075157, - "grad_norm": 0.468107670545578, - "learning_rate": 9.845055272944541e-06, - "loss": 0.3857, - "step": 4493 - }, - { - "epoch": 0.2937062937062937, - "grad_norm": 0.4065115451812744, - "learning_rate": 9.844969005135517e-06, - "loss": 0.3096, - "step": 4494 - }, - { - "epoch": 0.2937716489118358, - "grad_norm": 0.48373425006866455, - "learning_rate": 9.844882713695898e-06, - "loss": 0.4499, - "step": 4495 - }, - { - "epoch": 0.29383700411737795, - "grad_norm": 0.4965156316757202, - "learning_rate": 9.844796398626109e-06, - "loss": 0.4481, - "step": 4496 - }, - { - "epoch": 0.2939023593229201, - "grad_norm": 0.4595758020877838, - "learning_rate": 9.84471005992657e-06, - "loss": 0.3943, - "step": 4497 - }, - { - "epoch": 0.2939677145284622, - "grad_norm": 0.4767528474330902, - "learning_rate": 9.8446236975977e-06, - "loss": 0.3675, - "step": 4498 - }, - { - "epoch": 0.29403306973400434, - "grad_norm": 0.6278597712516785, - "learning_rate": 9.844537311639923e-06, - "loss": 0.429, - "step": 4499 - }, - { - "epoch": 0.2940984249395464, - "grad_norm": 0.4658062756061554, - "learning_rate": 9.84445090205366e-06, - "loss": 0.4168, - "step": 4500 - }, - { - "epoch": 0.29416378014508854, - "grad_norm": 0.45288029313087463, - "learning_rate": 9.84436446883933e-06, - "loss": 0.3918, - "step": 4501 - }, - { - "epoch": 0.29422913535063067, - "grad_norm": 0.5236877799034119, - "learning_rate": 9.844278011997357e-06, - "loss": 0.4589, - "step": 4502 - }, - { - "epoch": 0.2942944905561728, - "grad_norm": 0.47641849517822266, - "learning_rate": 9.844191531528162e-06, - "loss": 0.4035, - "step": 4503 - }, - { - "epoch": 0.2943598457617149, - "grad_norm": 0.47187209129333496, - "learning_rate": 9.844105027432166e-06, - "loss": 0.3838, - "step": 4504 - }, - { - "epoch": 0.29442520096725705, - "grad_norm": 0.5041341185569763, - "learning_rate": 9.844018499709793e-06, - "loss": 0.401, - "step": 4505 - }, - { - "epoch": 0.2944905561727992, - "grad_norm": 0.4318012297153473, - "learning_rate": 9.843931948361463e-06, - "loss": 0.3826, - "step": 4506 - }, - { - "epoch": 0.2945559113783413, - "grad_norm": 0.4853137135505676, - "learning_rate": 9.843845373387597e-06, - "loss": 0.3823, - "step": 4507 - }, - { - "epoch": 0.2946212665838834, - "grad_norm": 0.4941728711128235, - "learning_rate": 9.84375877478862e-06, - "loss": 0.3931, - "step": 4508 - }, - { - "epoch": 0.2946866217894255, - "grad_norm": 0.49251747131347656, - "learning_rate": 9.843672152564956e-06, - "loss": 0.4658, - "step": 4509 - }, - { - "epoch": 0.29475197699496763, - "grad_norm": 0.4598727226257324, - "learning_rate": 9.84358550671702e-06, - "loss": 0.4298, - "step": 4510 - }, - { - "epoch": 0.29481733220050976, - "grad_norm": 0.464657187461853, - "learning_rate": 9.843498837245244e-06, - "loss": 0.3822, - "step": 4511 - }, - { - "epoch": 0.2948826874060519, - "grad_norm": 0.4527546167373657, - "learning_rate": 9.843412144150045e-06, - "loss": 0.4042, - "step": 4512 - }, - { - "epoch": 0.294948042611594, - "grad_norm": 0.46141108870506287, - "learning_rate": 9.843325427431847e-06, - "loss": 0.4183, - "step": 4513 - }, - { - "epoch": 0.29501339781713615, - "grad_norm": 0.48721104860305786, - "learning_rate": 9.843238687091072e-06, - "loss": 0.4016, - "step": 4514 - }, - { - "epoch": 0.2950787530226783, - "grad_norm": 0.458452433347702, - "learning_rate": 9.843151923128146e-06, - "loss": 0.3895, - "step": 4515 - }, - { - "epoch": 0.2951441082282204, - "grad_norm": 0.46637964248657227, - "learning_rate": 9.843065135543488e-06, - "loss": 0.416, - "step": 4516 - }, - { - "epoch": 0.2952094634337625, - "grad_norm": 0.4475843012332916, - "learning_rate": 9.842978324337525e-06, - "loss": 0.3719, - "step": 4517 - }, - { - "epoch": 0.2952748186393046, - "grad_norm": 0.4849666357040405, - "learning_rate": 9.842891489510678e-06, - "loss": 0.4102, - "step": 4518 - }, - { - "epoch": 0.29534017384484673, - "grad_norm": 0.4784989655017853, - "learning_rate": 9.842804631063374e-06, - "loss": 0.3904, - "step": 4519 - }, - { - "epoch": 0.29540552905038886, - "grad_norm": 0.4642713665962219, - "learning_rate": 9.84271774899603e-06, - "loss": 0.3766, - "step": 4520 - }, - { - "epoch": 0.295470884255931, - "grad_norm": 0.466028094291687, - "learning_rate": 9.842630843309077e-06, - "loss": 0.3761, - "step": 4521 - }, - { - "epoch": 0.2955362394614731, - "grad_norm": 0.4432559907436371, - "learning_rate": 9.842543914002935e-06, - "loss": 0.3765, - "step": 4522 - }, - { - "epoch": 0.29560159466701524, - "grad_norm": 0.4879373610019684, - "learning_rate": 9.842456961078028e-06, - "loss": 0.4192, - "step": 4523 - }, - { - "epoch": 0.29566694987255737, - "grad_norm": 0.4954962134361267, - "learning_rate": 9.842369984534781e-06, - "loss": 0.3676, - "step": 4524 - }, - { - "epoch": 0.29573230507809944, - "grad_norm": 0.5037001967430115, - "learning_rate": 9.842282984373618e-06, - "loss": 0.456, - "step": 4525 - }, - { - "epoch": 0.2957976602836416, - "grad_norm": 0.4720968008041382, - "learning_rate": 9.842195960594965e-06, - "loss": 0.3599, - "step": 4526 - }, - { - "epoch": 0.2958630154891837, - "grad_norm": 0.47066187858581543, - "learning_rate": 9.842108913199241e-06, - "loss": 0.389, - "step": 4527 - }, - { - "epoch": 0.29592837069472583, - "grad_norm": 0.4656980335712433, - "learning_rate": 9.842021842186878e-06, - "loss": 0.4234, - "step": 4528 - }, - { - "epoch": 0.29599372590026796, - "grad_norm": 0.49611374735832214, - "learning_rate": 9.841934747558295e-06, - "loss": 0.4452, - "step": 4529 - }, - { - "epoch": 0.2960590811058101, - "grad_norm": 0.4645419716835022, - "learning_rate": 9.84184762931392e-06, - "loss": 0.369, - "step": 4530 - }, - { - "epoch": 0.2961244363113522, - "grad_norm": 0.4592958390712738, - "learning_rate": 9.841760487454176e-06, - "loss": 0.3984, - "step": 4531 - }, - { - "epoch": 0.29618979151689434, - "grad_norm": 0.47864311933517456, - "learning_rate": 9.84167332197949e-06, - "loss": 0.4141, - "step": 4532 - }, - { - "epoch": 0.29625514672243647, - "grad_norm": 0.44153285026550293, - "learning_rate": 9.841586132890285e-06, - "loss": 0.3648, - "step": 4533 - }, - { - "epoch": 0.29632050192797854, - "grad_norm": 0.44436541199684143, - "learning_rate": 9.841498920186987e-06, - "loss": 0.3942, - "step": 4534 - }, - { - "epoch": 0.29638585713352067, - "grad_norm": 0.4569571614265442, - "learning_rate": 9.841411683870021e-06, - "loss": 0.393, - "step": 4535 - }, - { - "epoch": 0.2964512123390628, - "grad_norm": 0.4499158263206482, - "learning_rate": 9.841324423939815e-06, - "loss": 0.3645, - "step": 4536 - }, - { - "epoch": 0.2965165675446049, - "grad_norm": 0.5056281685829163, - "learning_rate": 9.841237140396791e-06, - "loss": 0.4122, - "step": 4537 - }, - { - "epoch": 0.29658192275014705, - "grad_norm": 0.5346924662590027, - "learning_rate": 9.841149833241378e-06, - "loss": 0.4697, - "step": 4538 - }, - { - "epoch": 0.2966472779556892, - "grad_norm": 0.4869162440299988, - "learning_rate": 9.841062502473997e-06, - "loss": 0.4437, - "step": 4539 - }, - { - "epoch": 0.2967126331612313, - "grad_norm": 0.4643200635910034, - "learning_rate": 9.840975148095079e-06, - "loss": 0.4113, - "step": 4540 - }, - { - "epoch": 0.29677798836677344, - "grad_norm": 0.451917827129364, - "learning_rate": 9.840887770105048e-06, - "loss": 0.3693, - "step": 4541 - }, - { - "epoch": 0.2968433435723155, - "grad_norm": 0.5004224181175232, - "learning_rate": 9.840800368504329e-06, - "loss": 0.4307, - "step": 4542 - }, - { - "epoch": 0.29690869877785764, - "grad_norm": 0.4540042579174042, - "learning_rate": 9.840712943293351e-06, - "loss": 0.4094, - "step": 4543 - }, - { - "epoch": 0.29697405398339977, - "grad_norm": 0.4906425178050995, - "learning_rate": 9.840625494472539e-06, - "loss": 0.4418, - "step": 4544 - }, - { - "epoch": 0.2970394091889419, - "grad_norm": 0.45951879024505615, - "learning_rate": 9.84053802204232e-06, - "loss": 0.4282, - "step": 4545 - }, - { - "epoch": 0.297104764394484, - "grad_norm": 0.5001217126846313, - "learning_rate": 9.840450526003118e-06, - "loss": 0.4212, - "step": 4546 - }, - { - "epoch": 0.29717011960002615, - "grad_norm": 0.4755907952785492, - "learning_rate": 9.840363006355365e-06, - "loss": 0.4088, - "step": 4547 - }, - { - "epoch": 0.2972354748055683, - "grad_norm": 0.4836277961730957, - "learning_rate": 9.840275463099482e-06, - "loss": 0.3981, - "step": 4548 - }, - { - "epoch": 0.2973008300111104, - "grad_norm": 0.44367456436157227, - "learning_rate": 9.840187896235899e-06, - "loss": 0.3569, - "step": 4549 - }, - { - "epoch": 0.2973661852166525, - "grad_norm": 0.49634233117103577, - "learning_rate": 9.840100305765044e-06, - "loss": 0.405, - "step": 4550 - }, - { - "epoch": 0.2974315404221946, - "grad_norm": 0.4834412932395935, - "learning_rate": 9.840012691687344e-06, - "loss": 0.3799, - "step": 4551 - }, - { - "epoch": 0.29749689562773673, - "grad_norm": 0.46757590770721436, - "learning_rate": 9.839925054003223e-06, - "loss": 0.3779, - "step": 4552 - }, - { - "epoch": 0.29756225083327886, - "grad_norm": 0.42607083916664124, - "learning_rate": 9.839837392713112e-06, - "loss": 0.3244, - "step": 4553 - }, - { - "epoch": 0.297627606038821, - "grad_norm": 0.4805174469947815, - "learning_rate": 9.839749707817437e-06, - "loss": 0.4165, - "step": 4554 - }, - { - "epoch": 0.2976929612443631, - "grad_norm": 0.5129186511039734, - "learning_rate": 9.839661999316627e-06, - "loss": 0.4274, - "step": 4555 - }, - { - "epoch": 0.29775831644990525, - "grad_norm": 0.5060441493988037, - "learning_rate": 9.839574267211107e-06, - "loss": 0.4402, - "step": 4556 - }, - { - "epoch": 0.2978236716554474, - "grad_norm": 0.4707791209220886, - "learning_rate": 9.839486511501309e-06, - "loss": 0.3965, - "step": 4557 - }, - { - "epoch": 0.2978890268609895, - "grad_norm": 0.4697127342224121, - "learning_rate": 9.839398732187657e-06, - "loss": 0.3934, - "step": 4558 - }, - { - "epoch": 0.2979543820665316, - "grad_norm": 0.5240148901939392, - "learning_rate": 9.839310929270581e-06, - "loss": 0.4728, - "step": 4559 - }, - { - "epoch": 0.2980197372720737, - "grad_norm": 0.5014421939849854, - "learning_rate": 9.839223102750511e-06, - "loss": 0.4011, - "step": 4560 - }, - { - "epoch": 0.29808509247761583, - "grad_norm": 0.47523826360702515, - "learning_rate": 9.83913525262787e-06, - "loss": 0.4048, - "step": 4561 - }, - { - "epoch": 0.29815044768315796, - "grad_norm": 0.5312103033065796, - "learning_rate": 9.839047378903093e-06, - "loss": 0.5076, - "step": 4562 - }, - { - "epoch": 0.2982158028887001, - "grad_norm": 0.460380494594574, - "learning_rate": 9.838959481576602e-06, - "loss": 0.3707, - "step": 4563 - }, - { - "epoch": 0.2982811580942422, - "grad_norm": 0.47403544187545776, - "learning_rate": 9.838871560648833e-06, - "loss": 0.3929, - "step": 4564 - }, - { - "epoch": 0.29834651329978434, - "grad_norm": 0.45916664600372314, - "learning_rate": 9.838783616120209e-06, - "loss": 0.3943, - "step": 4565 - }, - { - "epoch": 0.29841186850532647, - "grad_norm": 0.45200034976005554, - "learning_rate": 9.838695647991163e-06, - "loss": 0.3804, - "step": 4566 - }, - { - "epoch": 0.29847722371086854, - "grad_norm": 0.4933592975139618, - "learning_rate": 9.83860765626212e-06, - "loss": 0.4077, - "step": 4567 - }, - { - "epoch": 0.2985425789164107, - "grad_norm": 0.5133188962936401, - "learning_rate": 9.838519640933512e-06, - "loss": 0.4201, - "step": 4568 - }, - { - "epoch": 0.2986079341219528, - "grad_norm": 0.47661587595939636, - "learning_rate": 9.838431602005767e-06, - "loss": 0.4297, - "step": 4569 - }, - { - "epoch": 0.29867328932749493, - "grad_norm": 0.4640342593193054, - "learning_rate": 9.838343539479316e-06, - "loss": 0.4357, - "step": 4570 - }, - { - "epoch": 0.29873864453303706, - "grad_norm": 0.47774818539619446, - "learning_rate": 9.838255453354585e-06, - "loss": 0.4113, - "step": 4571 - }, - { - "epoch": 0.2988039997385792, - "grad_norm": 0.47215497493743896, - "learning_rate": 9.838167343632008e-06, - "loss": 0.3907, - "step": 4572 - }, - { - "epoch": 0.2988693549441213, - "grad_norm": 0.5664907693862915, - "learning_rate": 9.838079210312012e-06, - "loss": 0.4052, - "step": 4573 - }, - { - "epoch": 0.29893471014966344, - "grad_norm": 0.4794827997684479, - "learning_rate": 9.83799105339503e-06, - "loss": 0.4169, - "step": 4574 - }, - { - "epoch": 0.29900006535520557, - "grad_norm": 0.48394709825515747, - "learning_rate": 9.837902872881486e-06, - "loss": 0.4334, - "step": 4575 - }, - { - "epoch": 0.29906542056074764, - "grad_norm": 0.45113661885261536, - "learning_rate": 9.837814668771815e-06, - "loss": 0.3719, - "step": 4576 - }, - { - "epoch": 0.29913077576628977, - "grad_norm": 0.47531017661094666, - "learning_rate": 9.837726441066446e-06, - "loss": 0.3827, - "step": 4577 - }, - { - "epoch": 0.2991961309718319, - "grad_norm": 0.4978850185871124, - "learning_rate": 9.83763818976581e-06, - "loss": 0.448, - "step": 4578 - }, - { - "epoch": 0.299261486177374, - "grad_norm": 0.5086488127708435, - "learning_rate": 9.837549914870336e-06, - "loss": 0.3996, - "step": 4579 - }, - { - "epoch": 0.29932684138291615, - "grad_norm": 0.5452041625976562, - "learning_rate": 9.837461616380455e-06, - "loss": 0.3795, - "step": 4580 - }, - { - "epoch": 0.2993921965884583, - "grad_norm": 0.46452298760414124, - "learning_rate": 9.837373294296598e-06, - "loss": 0.3824, - "step": 4581 - }, - { - "epoch": 0.2994575517940004, - "grad_norm": 0.4715021550655365, - "learning_rate": 9.837284948619195e-06, - "loss": 0.39, - "step": 4582 - }, - { - "epoch": 0.29952290699954254, - "grad_norm": 0.4850403666496277, - "learning_rate": 9.83719657934868e-06, - "loss": 0.4377, - "step": 4583 - }, - { - "epoch": 0.2995882622050846, - "grad_norm": 0.4575905203819275, - "learning_rate": 9.837108186485477e-06, - "loss": 0.3871, - "step": 4584 - }, - { - "epoch": 0.29965361741062674, - "grad_norm": 0.4533096253871918, - "learning_rate": 9.837019770030025e-06, - "loss": 0.4087, - "step": 4585 - }, - { - "epoch": 0.29971897261616887, - "grad_norm": 0.497069388628006, - "learning_rate": 9.836931329982752e-06, - "loss": 0.4166, - "step": 4586 - }, - { - "epoch": 0.299784327821711, - "grad_norm": 0.4475323557853699, - "learning_rate": 9.83684286634409e-06, - "loss": 0.3588, - "step": 4587 - }, - { - "epoch": 0.2998496830272531, - "grad_norm": 0.4541557729244232, - "learning_rate": 9.836754379114466e-06, - "loss": 0.3737, - "step": 4588 - }, - { - "epoch": 0.29991503823279525, - "grad_norm": 0.45018231868743896, - "learning_rate": 9.836665868294317e-06, - "loss": 0.3783, - "step": 4589 - }, - { - "epoch": 0.2999803934383374, - "grad_norm": 0.48943397402763367, - "learning_rate": 9.836577333884074e-06, - "loss": 0.3958, - "step": 4590 - }, - { - "epoch": 0.3000457486438795, - "grad_norm": 0.4738661050796509, - "learning_rate": 9.836488775884167e-06, - "loss": 0.4434, - "step": 4591 - }, - { - "epoch": 0.3001111038494216, - "grad_norm": 0.4493043124675751, - "learning_rate": 9.83640019429503e-06, - "loss": 0.383, - "step": 4592 - }, - { - "epoch": 0.3001764590549637, - "grad_norm": 0.4620586037635803, - "learning_rate": 9.836311589117091e-06, - "loss": 0.3707, - "step": 4593 - }, - { - "epoch": 0.30024181426050583, - "grad_norm": 0.4813469648361206, - "learning_rate": 9.836222960350788e-06, - "loss": 0.4225, - "step": 4594 - }, - { - "epoch": 0.30030716946604796, - "grad_norm": 0.48044130206108093, - "learning_rate": 9.836134307996548e-06, - "loss": 0.4355, - "step": 4595 - }, - { - "epoch": 0.3003725246715901, - "grad_norm": 0.47041261196136475, - "learning_rate": 9.836045632054806e-06, - "loss": 0.3756, - "step": 4596 - }, - { - "epoch": 0.3004378798771322, - "grad_norm": 0.47787100076675415, - "learning_rate": 9.835956932525993e-06, - "loss": 0.4207, - "step": 4597 - }, - { - "epoch": 0.30050323508267435, - "grad_norm": 0.4532429575920105, - "learning_rate": 9.835868209410547e-06, - "loss": 0.3889, - "step": 4598 - }, - { - "epoch": 0.3005685902882165, - "grad_norm": 0.46816444396972656, - "learning_rate": 9.835779462708892e-06, - "loss": 0.4109, - "step": 4599 - }, - { - "epoch": 0.3006339454937586, - "grad_norm": 0.4704676866531372, - "learning_rate": 9.835690692421466e-06, - "loss": 0.3553, - "step": 4600 - }, - { - "epoch": 0.3006993006993007, - "grad_norm": 0.5116649270057678, - "learning_rate": 9.835601898548704e-06, - "loss": 0.4238, - "step": 4601 - }, - { - "epoch": 0.3007646559048428, - "grad_norm": 0.47347110509872437, - "learning_rate": 9.835513081091034e-06, - "loss": 0.4105, - "step": 4602 - }, - { - "epoch": 0.30083001111038493, - "grad_norm": 0.44327715039253235, - "learning_rate": 9.835424240048891e-06, - "loss": 0.3709, - "step": 4603 - }, - { - "epoch": 0.30089536631592706, - "grad_norm": 0.4147056043148041, - "learning_rate": 9.83533537542271e-06, - "loss": 0.313, - "step": 4604 - }, - { - "epoch": 0.3009607215214692, - "grad_norm": 0.474161297082901, - "learning_rate": 9.835246487212924e-06, - "loss": 0.4169, - "step": 4605 - }, - { - "epoch": 0.3010260767270113, - "grad_norm": 0.4815036356449127, - "learning_rate": 9.835157575419965e-06, - "loss": 0.4072, - "step": 4606 - }, - { - "epoch": 0.30109143193255344, - "grad_norm": 0.49013909697532654, - "learning_rate": 9.835068640044266e-06, - "loss": 0.4277, - "step": 4607 - }, - { - "epoch": 0.30115678713809557, - "grad_norm": 0.4272536337375641, - "learning_rate": 9.834979681086265e-06, - "loss": 0.3614, - "step": 4608 - }, - { - "epoch": 0.30122214234363764, - "grad_norm": 0.45833322405815125, - "learning_rate": 9.834890698546392e-06, - "loss": 0.3711, - "step": 4609 - }, - { - "epoch": 0.3012874975491798, - "grad_norm": 0.4576624035835266, - "learning_rate": 9.83480169242508e-06, - "loss": 0.3751, - "step": 4610 - }, - { - "epoch": 0.3013528527547219, - "grad_norm": 0.5056855082511902, - "learning_rate": 9.834712662722768e-06, - "loss": 0.5115, - "step": 4611 - }, - { - "epoch": 0.30141820796026403, - "grad_norm": 0.42207542061805725, - "learning_rate": 9.834623609439886e-06, - "loss": 0.3673, - "step": 4612 - }, - { - "epoch": 0.30148356316580616, - "grad_norm": 0.504401445388794, - "learning_rate": 9.83453453257687e-06, - "loss": 0.4818, - "step": 4613 - }, - { - "epoch": 0.3015489183713483, - "grad_norm": 0.47074586153030396, - "learning_rate": 9.834445432134155e-06, - "loss": 0.3904, - "step": 4614 - }, - { - "epoch": 0.3016142735768904, - "grad_norm": 0.4869149327278137, - "learning_rate": 9.834356308112173e-06, - "loss": 0.436, - "step": 4615 - }, - { - "epoch": 0.30167962878243254, - "grad_norm": 0.4553590416908264, - "learning_rate": 9.834267160511361e-06, - "loss": 0.3819, - "step": 4616 - }, - { - "epoch": 0.30174498398797467, - "grad_norm": 0.4626240134239197, - "learning_rate": 9.834177989332155e-06, - "loss": 0.4362, - "step": 4617 - }, - { - "epoch": 0.30181033919351674, - "grad_norm": 0.47052204608917236, - "learning_rate": 9.834088794574986e-06, - "loss": 0.4109, - "step": 4618 - }, - { - "epoch": 0.30187569439905887, - "grad_norm": 0.4849553406238556, - "learning_rate": 9.833999576240293e-06, - "loss": 0.4515, - "step": 4619 - }, - { - "epoch": 0.301941049604601, - "grad_norm": 0.43929845094680786, - "learning_rate": 9.833910334328509e-06, - "loss": 0.3671, - "step": 4620 - }, - { - "epoch": 0.3020064048101431, - "grad_norm": 0.48614925146102905, - "learning_rate": 9.83382106884007e-06, - "loss": 0.4505, - "step": 4621 - }, - { - "epoch": 0.30207176001568525, - "grad_norm": 0.43567654490470886, - "learning_rate": 9.833731779775411e-06, - "loss": 0.3468, - "step": 4622 - }, - { - "epoch": 0.3021371152212274, - "grad_norm": 0.4525315463542938, - "learning_rate": 9.833642467134966e-06, - "loss": 0.403, - "step": 4623 - }, - { - "epoch": 0.3022024704267695, - "grad_norm": 0.5000825524330139, - "learning_rate": 9.833553130919174e-06, - "loss": 0.4383, - "step": 4624 - }, - { - "epoch": 0.30226782563231164, - "grad_norm": 0.5177122950553894, - "learning_rate": 9.833463771128467e-06, - "loss": 0.479, - "step": 4625 - }, - { - "epoch": 0.3023331808378537, - "grad_norm": 0.4748156666755676, - "learning_rate": 9.833374387763284e-06, - "loss": 0.4102, - "step": 4626 - }, - { - "epoch": 0.30239853604339584, - "grad_norm": 0.5039382576942444, - "learning_rate": 9.83328498082406e-06, - "loss": 0.4264, - "step": 4627 - }, - { - "epoch": 0.30246389124893797, - "grad_norm": 0.4976131319999695, - "learning_rate": 9.83319555031123e-06, - "loss": 0.4775, - "step": 4628 - }, - { - "epoch": 0.3025292464544801, - "grad_norm": 0.47715598344802856, - "learning_rate": 9.83310609622523e-06, - "loss": 0.4193, - "step": 4629 - }, - { - "epoch": 0.3025946016600222, - "grad_norm": 0.4935559332370758, - "learning_rate": 9.8330166185665e-06, - "loss": 0.3982, - "step": 4630 - }, - { - "epoch": 0.30265995686556435, - "grad_norm": 0.4852454662322998, - "learning_rate": 9.832927117335472e-06, - "loss": 0.4354, - "step": 4631 - }, - { - "epoch": 0.3027253120711065, - "grad_norm": 0.47433051466941833, - "learning_rate": 9.832837592532584e-06, - "loss": 0.3778, - "step": 4632 - }, - { - "epoch": 0.3027906672766486, - "grad_norm": 0.4809684753417969, - "learning_rate": 9.832748044158273e-06, - "loss": 0.4245, - "step": 4633 - }, - { - "epoch": 0.3028560224821907, - "grad_norm": 0.42704394459724426, - "learning_rate": 9.832658472212975e-06, - "loss": 0.3272, - "step": 4634 - }, - { - "epoch": 0.3029213776877328, - "grad_norm": 0.515950620174408, - "learning_rate": 9.832568876697129e-06, - "loss": 0.4492, - "step": 4635 - }, - { - "epoch": 0.30298673289327493, - "grad_norm": 0.46974262595176697, - "learning_rate": 9.83247925761117e-06, - "loss": 0.4386, - "step": 4636 - }, - { - "epoch": 0.30305208809881706, - "grad_norm": 0.48093926906585693, - "learning_rate": 9.832389614955533e-06, - "loss": 0.3841, - "step": 4637 - }, - { - "epoch": 0.3031174433043592, - "grad_norm": 0.49033021926879883, - "learning_rate": 9.83229994873066e-06, - "loss": 0.447, - "step": 4638 - }, - { - "epoch": 0.3031827985099013, - "grad_norm": 0.4828386902809143, - "learning_rate": 9.832210258936986e-06, - "loss": 0.4219, - "step": 4639 - }, - { - "epoch": 0.30324815371544345, - "grad_norm": 0.43323618173599243, - "learning_rate": 9.832120545574948e-06, - "loss": 0.3594, - "step": 4640 - }, - { - "epoch": 0.3033135089209856, - "grad_norm": 0.4587923288345337, - "learning_rate": 9.832030808644986e-06, - "loss": 0.3951, - "step": 4641 - }, - { - "epoch": 0.3033788641265277, - "grad_norm": 0.49970561265945435, - "learning_rate": 9.831941048147533e-06, - "loss": 0.4541, - "step": 4642 - }, - { - "epoch": 0.3034442193320698, - "grad_norm": 0.45958563685417175, - "learning_rate": 9.83185126408303e-06, - "loss": 0.3875, - "step": 4643 - }, - { - "epoch": 0.3035095745376119, - "grad_norm": 0.4726570248603821, - "learning_rate": 9.831761456451916e-06, - "loss": 0.4299, - "step": 4644 - }, - { - "epoch": 0.30357492974315403, - "grad_norm": 0.49867337942123413, - "learning_rate": 9.831671625254626e-06, - "loss": 0.4522, - "step": 4645 - }, - { - "epoch": 0.30364028494869616, - "grad_norm": 0.5065587162971497, - "learning_rate": 9.8315817704916e-06, - "loss": 0.4377, - "step": 4646 - }, - { - "epoch": 0.3037056401542383, - "grad_norm": 0.5036814212799072, - "learning_rate": 9.831491892163277e-06, - "loss": 0.3949, - "step": 4647 - }, - { - "epoch": 0.3037709953597804, - "grad_norm": 0.4690750241279602, - "learning_rate": 9.831401990270094e-06, - "loss": 0.4522, - "step": 4648 - }, - { - "epoch": 0.30383635056532254, - "grad_norm": 0.48045486211776733, - "learning_rate": 9.83131206481249e-06, - "loss": 0.3932, - "step": 4649 - }, - { - "epoch": 0.30390170577086467, - "grad_norm": 0.47589701414108276, - "learning_rate": 9.831222115790902e-06, - "loss": 0.3997, - "step": 4650 - }, - { - "epoch": 0.30396706097640674, - "grad_norm": 0.48952358961105347, - "learning_rate": 9.83113214320577e-06, - "loss": 0.4137, - "step": 4651 - }, - { - "epoch": 0.3040324161819489, - "grad_norm": 0.4988320469856262, - "learning_rate": 9.831042147057533e-06, - "loss": 0.4335, - "step": 4652 - }, - { - "epoch": 0.304097771387491, - "grad_norm": 0.4423309862613678, - "learning_rate": 9.830952127346631e-06, - "loss": 0.3441, - "step": 4653 - }, - { - "epoch": 0.30416312659303313, - "grad_norm": 0.4505099058151245, - "learning_rate": 9.8308620840735e-06, - "loss": 0.3588, - "step": 4654 - }, - { - "epoch": 0.30422848179857526, - "grad_norm": 0.48584893345832825, - "learning_rate": 9.830772017238583e-06, - "loss": 0.4113, - "step": 4655 - }, - { - "epoch": 0.3042938370041174, - "grad_norm": 0.4860285818576813, - "learning_rate": 9.830681926842316e-06, - "loss": 0.4269, - "step": 4656 - }, - { - "epoch": 0.3043591922096595, - "grad_norm": 0.45671284198760986, - "learning_rate": 9.830591812885142e-06, - "loss": 0.3842, - "step": 4657 - }, - { - "epoch": 0.30442454741520164, - "grad_norm": 0.49379563331604004, - "learning_rate": 9.830501675367497e-06, - "loss": 0.4139, - "step": 4658 - }, - { - "epoch": 0.30448990262074377, - "grad_norm": 0.5117769837379456, - "learning_rate": 9.83041151428982e-06, - "loss": 0.4672, - "step": 4659 - }, - { - "epoch": 0.30455525782628584, - "grad_norm": 0.4411393702030182, - "learning_rate": 9.830321329652555e-06, - "loss": 0.3989, - "step": 4660 - }, - { - "epoch": 0.30462061303182797, - "grad_norm": 0.47283071279525757, - "learning_rate": 9.830231121456137e-06, - "loss": 0.409, - "step": 4661 - }, - { - "epoch": 0.3046859682373701, - "grad_norm": 0.4507950246334076, - "learning_rate": 9.83014088970101e-06, - "loss": 0.3753, - "step": 4662 - }, - { - "epoch": 0.3047513234429122, - "grad_norm": 0.46355167031288147, - "learning_rate": 9.830050634387614e-06, - "loss": 0.3925, - "step": 4663 - }, - { - "epoch": 0.30481667864845435, - "grad_norm": 0.49535447359085083, - "learning_rate": 9.829960355516385e-06, - "loss": 0.4166, - "step": 4664 - }, - { - "epoch": 0.3048820338539965, - "grad_norm": 0.4795665740966797, - "learning_rate": 9.829870053087768e-06, - "loss": 0.4013, - "step": 4665 - }, - { - "epoch": 0.3049473890595386, - "grad_norm": 0.46602609753608704, - "learning_rate": 9.8297797271022e-06, - "loss": 0.3946, - "step": 4666 - }, - { - "epoch": 0.30501274426508074, - "grad_norm": 0.4819977879524231, - "learning_rate": 9.829689377560125e-06, - "loss": 0.4248, - "step": 4667 - }, - { - "epoch": 0.3050780994706228, - "grad_norm": 0.4841451048851013, - "learning_rate": 9.82959900446198e-06, - "loss": 0.4017, - "step": 4668 - }, - { - "epoch": 0.30514345467616494, - "grad_norm": 0.47611433267593384, - "learning_rate": 9.829508607808208e-06, - "loss": 0.4294, - "step": 4669 - }, - { - "epoch": 0.30520880988170707, - "grad_norm": 0.44730934500694275, - "learning_rate": 9.829418187599252e-06, - "loss": 0.378, - "step": 4670 - }, - { - "epoch": 0.3052741650872492, - "grad_norm": 0.46959617733955383, - "learning_rate": 9.829327743835547e-06, - "loss": 0.3983, - "step": 4671 - }, - { - "epoch": 0.3053395202927913, - "grad_norm": 0.585527777671814, - "learning_rate": 9.829237276517538e-06, - "loss": 0.4016, - "step": 4672 - }, - { - "epoch": 0.30540487549833345, - "grad_norm": 0.4545510411262512, - "learning_rate": 9.829146785645667e-06, - "loss": 0.3583, - "step": 4673 - }, - { - "epoch": 0.3054702307038756, - "grad_norm": 0.4610280394554138, - "learning_rate": 9.829056271220373e-06, - "loss": 0.3798, - "step": 4674 - }, - { - "epoch": 0.3055355859094177, - "grad_norm": 0.4837823212146759, - "learning_rate": 9.8289657332421e-06, - "loss": 0.3746, - "step": 4675 - }, - { - "epoch": 0.30560094111495983, - "grad_norm": 0.4811020493507385, - "learning_rate": 9.828875171711288e-06, - "loss": 0.4325, - "step": 4676 - }, - { - "epoch": 0.3056662963205019, - "grad_norm": 0.481588214635849, - "learning_rate": 9.828784586628378e-06, - "loss": 0.4401, - "step": 4677 - }, - { - "epoch": 0.30573165152604403, - "grad_norm": 0.4185912013053894, - "learning_rate": 9.828693977993813e-06, - "loss": 0.3133, - "step": 4678 - }, - { - "epoch": 0.30579700673158616, - "grad_norm": 0.4645090103149414, - "learning_rate": 9.828603345808034e-06, - "loss": 0.3459, - "step": 4679 - }, - { - "epoch": 0.3058623619371283, - "grad_norm": 0.5124992728233337, - "learning_rate": 9.828512690071485e-06, - "loss": 0.4257, - "step": 4680 - }, - { - "epoch": 0.3059277171426704, - "grad_norm": 0.5183594226837158, - "learning_rate": 9.828422010784606e-06, - "loss": 0.4466, - "step": 4681 - }, - { - "epoch": 0.30599307234821255, - "grad_norm": 0.48616012930870056, - "learning_rate": 9.82833130794784e-06, - "loss": 0.4588, - "step": 4682 - }, - { - "epoch": 0.3060584275537547, - "grad_norm": 0.4824143648147583, - "learning_rate": 9.82824058156163e-06, - "loss": 0.4333, - "step": 4683 - }, - { - "epoch": 0.3061237827592968, - "grad_norm": 0.4992258548736572, - "learning_rate": 9.828149831626418e-06, - "loss": 0.3931, - "step": 4684 - }, - { - "epoch": 0.3061891379648389, - "grad_norm": 0.5806849598884583, - "learning_rate": 9.828059058142647e-06, - "loss": 0.4993, - "step": 4685 - }, - { - "epoch": 0.306254493170381, - "grad_norm": 0.4938088059425354, - "learning_rate": 9.827968261110758e-06, - "loss": 0.3776, - "step": 4686 - }, - { - "epoch": 0.30631984837592313, - "grad_norm": 0.5305953025817871, - "learning_rate": 9.827877440531195e-06, - "loss": 0.4366, - "step": 4687 - }, - { - "epoch": 0.30638520358146526, - "grad_norm": 0.4520416557788849, - "learning_rate": 9.827786596404403e-06, - "loss": 0.3701, - "step": 4688 - }, - { - "epoch": 0.3064505587870074, - "grad_norm": 0.48587965965270996, - "learning_rate": 9.827695728730824e-06, - "loss": 0.4261, - "step": 4689 - }, - { - "epoch": 0.3065159139925495, - "grad_norm": 0.5094895958900452, - "learning_rate": 9.827604837510897e-06, - "loss": 0.4354, - "step": 4690 - }, - { - "epoch": 0.30658126919809164, - "grad_norm": 0.4881570041179657, - "learning_rate": 9.827513922745073e-06, - "loss": 0.4471, - "step": 4691 - }, - { - "epoch": 0.30664662440363377, - "grad_norm": 0.4493294060230255, - "learning_rate": 9.827422984433787e-06, - "loss": 0.3971, - "step": 4692 - }, - { - "epoch": 0.30671197960917584, - "grad_norm": 0.41316869854927063, - "learning_rate": 9.82733202257749e-06, - "loss": 0.3215, - "step": 4693 - }, - { - "epoch": 0.306777334814718, - "grad_norm": 0.4954182207584381, - "learning_rate": 9.82724103717662e-06, - "loss": 0.4057, - "step": 4694 - }, - { - "epoch": 0.3068426900202601, - "grad_norm": 0.45912855863571167, - "learning_rate": 9.827150028231625e-06, - "loss": 0.3981, - "step": 4695 - }, - { - "epoch": 0.30690804522580223, - "grad_norm": 0.46424421668052673, - "learning_rate": 9.827058995742947e-06, - "loss": 0.3586, - "step": 4696 - }, - { - "epoch": 0.30697340043134436, - "grad_norm": 0.4777919352054596, - "learning_rate": 9.826967939711029e-06, - "loss": 0.4328, - "step": 4697 - }, - { - "epoch": 0.3070387556368865, - "grad_norm": 0.4849260151386261, - "learning_rate": 9.826876860136317e-06, - "loss": 0.4256, - "step": 4698 - }, - { - "epoch": 0.3071041108424286, - "grad_norm": 0.4411482512950897, - "learning_rate": 9.826785757019253e-06, - "loss": 0.3589, - "step": 4699 - }, - { - "epoch": 0.30716946604797074, - "grad_norm": 0.45035502314567566, - "learning_rate": 9.826694630360283e-06, - "loss": 0.3917, - "step": 4700 - }, - { - "epoch": 0.30723482125351287, - "grad_norm": 0.48693060874938965, - "learning_rate": 9.826603480159852e-06, - "loss": 0.4292, - "step": 4701 - }, - { - "epoch": 0.30730017645905494, - "grad_norm": 0.4641180634498596, - "learning_rate": 9.826512306418403e-06, - "loss": 0.3931, - "step": 4702 - }, - { - "epoch": 0.30736553166459707, - "grad_norm": 0.4459921717643738, - "learning_rate": 9.82642110913638e-06, - "loss": 0.3673, - "step": 4703 - }, - { - "epoch": 0.3074308868701392, - "grad_norm": 0.4809280037879944, - "learning_rate": 9.826329888314233e-06, - "loss": 0.4166, - "step": 4704 - }, - { - "epoch": 0.3074962420756813, - "grad_norm": 0.4905911684036255, - "learning_rate": 9.826238643952402e-06, - "loss": 0.4126, - "step": 4705 - }, - { - "epoch": 0.30756159728122345, - "grad_norm": 0.47121360898017883, - "learning_rate": 9.826147376051332e-06, - "loss": 0.3723, - "step": 4706 - }, - { - "epoch": 0.3076269524867656, - "grad_norm": 0.7335187792778015, - "learning_rate": 9.82605608461147e-06, - "loss": 0.4497, - "step": 4707 - }, - { - "epoch": 0.3076923076923077, - "grad_norm": 0.45816537737846375, - "learning_rate": 9.825964769633259e-06, - "loss": 0.3828, - "step": 4708 - }, - { - "epoch": 0.30775766289784984, - "grad_norm": 0.46843087673187256, - "learning_rate": 9.825873431117147e-06, - "loss": 0.4246, - "step": 4709 - }, - { - "epoch": 0.3078230181033919, - "grad_norm": 0.4835190773010254, - "learning_rate": 9.82578206906358e-06, - "loss": 0.4053, - "step": 4710 - }, - { - "epoch": 0.30788837330893404, - "grad_norm": 0.4642230272293091, - "learning_rate": 9.825690683472998e-06, - "loss": 0.4198, - "step": 4711 - }, - { - "epoch": 0.30795372851447617, - "grad_norm": 0.5255396962165833, - "learning_rate": 9.825599274345856e-06, - "loss": 0.4735, - "step": 4712 - }, - { - "epoch": 0.3080190837200183, - "grad_norm": 0.4632430374622345, - "learning_rate": 9.825507841682591e-06, - "loss": 0.431, - "step": 4713 - }, - { - "epoch": 0.3080844389255604, - "grad_norm": 0.4993661344051361, - "learning_rate": 9.825416385483654e-06, - "loss": 0.3825, - "step": 4714 - }, - { - "epoch": 0.30814979413110255, - "grad_norm": 0.49783140420913696, - "learning_rate": 9.82532490574949e-06, - "loss": 0.4408, - "step": 4715 - }, - { - "epoch": 0.3082151493366447, - "grad_norm": 0.5053375959396362, - "learning_rate": 9.825233402480543e-06, - "loss": 0.4418, - "step": 4716 - }, - { - "epoch": 0.3082805045421868, - "grad_norm": 0.4538673162460327, - "learning_rate": 9.825141875677263e-06, - "loss": 0.3689, - "step": 4717 - }, - { - "epoch": 0.30834585974772893, - "grad_norm": 0.4795911908149719, - "learning_rate": 9.825050325340092e-06, - "loss": 0.4425, - "step": 4718 - }, - { - "epoch": 0.308411214953271, - "grad_norm": 0.4877456724643707, - "learning_rate": 9.82495875146948e-06, - "loss": 0.4171, - "step": 4719 - }, - { - "epoch": 0.30847657015881313, - "grad_norm": 0.4846137464046478, - "learning_rate": 9.824867154065874e-06, - "loss": 0.4039, - "step": 4720 - }, - { - "epoch": 0.30854192536435526, - "grad_norm": 0.4265865981578827, - "learning_rate": 9.824775533129718e-06, - "loss": 0.3286, - "step": 4721 - }, - { - "epoch": 0.3086072805698974, - "grad_norm": 0.4717889428138733, - "learning_rate": 9.824683888661462e-06, - "loss": 0.4207, - "step": 4722 - }, - { - "epoch": 0.3086726357754395, - "grad_norm": 0.4649326205253601, - "learning_rate": 9.824592220661548e-06, - "loss": 0.4077, - "step": 4723 - }, - { - "epoch": 0.30873799098098165, - "grad_norm": 0.4909663200378418, - "learning_rate": 9.824500529130429e-06, - "loss": 0.4668, - "step": 4724 - }, - { - "epoch": 0.3088033461865238, - "grad_norm": 0.4453728497028351, - "learning_rate": 9.824408814068548e-06, - "loss": 0.3678, - "step": 4725 - }, - { - "epoch": 0.3088687013920659, - "grad_norm": 0.5093079805374146, - "learning_rate": 9.824317075476356e-06, - "loss": 0.4338, - "step": 4726 - }, - { - "epoch": 0.308934056597608, - "grad_norm": 0.48250722885131836, - "learning_rate": 9.824225313354296e-06, - "loss": 0.4095, - "step": 4727 - }, - { - "epoch": 0.3089994118031501, - "grad_norm": 0.4546104967594147, - "learning_rate": 9.824133527702819e-06, - "loss": 0.4029, - "step": 4728 - }, - { - "epoch": 0.30906476700869223, - "grad_norm": 0.48363611102104187, - "learning_rate": 9.824041718522372e-06, - "loss": 0.4135, - "step": 4729 - }, - { - "epoch": 0.30913012221423436, - "grad_norm": 0.47212186455726624, - "learning_rate": 9.823949885813402e-06, - "loss": 0.3982, - "step": 4730 - }, - { - "epoch": 0.3091954774197765, - "grad_norm": 0.5019171237945557, - "learning_rate": 9.823858029576357e-06, - "loss": 0.4371, - "step": 4731 - }, - { - "epoch": 0.3092608326253186, - "grad_norm": 0.4634392559528351, - "learning_rate": 9.823766149811686e-06, - "loss": 0.381, - "step": 4732 - }, - { - "epoch": 0.30932618783086074, - "grad_norm": 0.4996906518936157, - "learning_rate": 9.823674246519835e-06, - "loss": 0.4128, - "step": 4733 - }, - { - "epoch": 0.30939154303640287, - "grad_norm": 0.4786403477191925, - "learning_rate": 9.823582319701255e-06, - "loss": 0.3792, - "step": 4734 - }, - { - "epoch": 0.30945689824194494, - "grad_norm": 0.4715338349342346, - "learning_rate": 9.823490369356392e-06, - "loss": 0.3604, - "step": 4735 - }, - { - "epoch": 0.3095222534474871, - "grad_norm": 0.5077422857284546, - "learning_rate": 9.823398395485696e-06, - "loss": 0.4201, - "step": 4736 - }, - { - "epoch": 0.3095876086530292, - "grad_norm": 0.4325348138809204, - "learning_rate": 9.823306398089615e-06, - "loss": 0.3879, - "step": 4737 - }, - { - "epoch": 0.30965296385857133, - "grad_norm": 0.47307687997817993, - "learning_rate": 9.823214377168597e-06, - "loss": 0.4156, - "step": 4738 - }, - { - "epoch": 0.30971831906411346, - "grad_norm": 0.5081484913825989, - "learning_rate": 9.823122332723091e-06, - "loss": 0.4202, - "step": 4739 - }, - { - "epoch": 0.3097836742696556, - "grad_norm": 0.4860573410987854, - "learning_rate": 9.823030264753549e-06, - "loss": 0.4427, - "step": 4740 - }, - { - "epoch": 0.3098490294751977, - "grad_norm": 0.5136462450027466, - "learning_rate": 9.822938173260416e-06, - "loss": 0.4832, - "step": 4741 - }, - { - "epoch": 0.30991438468073984, - "grad_norm": 0.49386855959892273, - "learning_rate": 9.822846058244141e-06, - "loss": 0.4266, - "step": 4742 - }, - { - "epoch": 0.30997973988628197, - "grad_norm": 0.4761289060115814, - "learning_rate": 9.822753919705179e-06, - "loss": 0.4064, - "step": 4743 - }, - { - "epoch": 0.31004509509182404, - "grad_norm": 0.4351711869239807, - "learning_rate": 9.822661757643971e-06, - "loss": 0.3351, - "step": 4744 - }, - { - "epoch": 0.31011045029736617, - "grad_norm": 0.4576933681964874, - "learning_rate": 9.822569572060975e-06, - "loss": 0.3785, - "step": 4745 - }, - { - "epoch": 0.3101758055029083, - "grad_norm": 0.48872002959251404, - "learning_rate": 9.822477362956635e-06, - "loss": 0.4176, - "step": 4746 - }, - { - "epoch": 0.3102411607084504, - "grad_norm": 0.5006043910980225, - "learning_rate": 9.822385130331401e-06, - "loss": 0.4231, - "step": 4747 - }, - { - "epoch": 0.31030651591399255, - "grad_norm": 0.4624032974243164, - "learning_rate": 9.822292874185726e-06, - "loss": 0.4088, - "step": 4748 - }, - { - "epoch": 0.3103718711195347, - "grad_norm": 0.43168407678604126, - "learning_rate": 9.822200594520055e-06, - "loss": 0.3781, - "step": 4749 - }, - { - "epoch": 0.3104372263250768, - "grad_norm": 0.5045598745346069, - "learning_rate": 9.822108291334845e-06, - "loss": 0.4475, - "step": 4750 - }, - { - "epoch": 0.31050258153061894, - "grad_norm": 0.45648258924484253, - "learning_rate": 9.822015964630539e-06, - "loss": 0.3695, - "step": 4751 - }, - { - "epoch": 0.310567936736161, - "grad_norm": 0.4925973415374756, - "learning_rate": 9.821923614407593e-06, - "loss": 0.4386, - "step": 4752 - }, - { - "epoch": 0.31063329194170314, - "grad_norm": 0.4795874059200287, - "learning_rate": 9.821831240666453e-06, - "loss": 0.3643, - "step": 4753 - }, - { - "epoch": 0.31069864714724527, - "grad_norm": 0.46666625142097473, - "learning_rate": 9.821738843407574e-06, - "loss": 0.4192, - "step": 4754 - }, - { - "epoch": 0.3107640023527874, - "grad_norm": 0.4786038100719452, - "learning_rate": 9.8216464226314e-06, - "loss": 0.4122, - "step": 4755 - }, - { - "epoch": 0.3108293575583295, - "grad_norm": 0.4647231698036194, - "learning_rate": 9.821553978338391e-06, - "loss": 0.3796, - "step": 4756 - }, - { - "epoch": 0.31089471276387165, - "grad_norm": 0.4673691987991333, - "learning_rate": 9.821461510528991e-06, - "loss": 0.4054, - "step": 4757 - }, - { - "epoch": 0.3109600679694138, - "grad_norm": 0.4552198648452759, - "learning_rate": 9.821369019203652e-06, - "loss": 0.362, - "step": 4758 - }, - { - "epoch": 0.3110254231749559, - "grad_norm": 0.42929479479789734, - "learning_rate": 9.821276504362827e-06, - "loss": 0.3518, - "step": 4759 - }, - { - "epoch": 0.31109077838049803, - "grad_norm": 0.45886924862861633, - "learning_rate": 9.821183966006967e-06, - "loss": 0.4119, - "step": 4760 - }, - { - "epoch": 0.3111561335860401, - "grad_norm": 0.485416442155838, - "learning_rate": 9.821091404136521e-06, - "loss": 0.4136, - "step": 4761 - }, - { - "epoch": 0.31122148879158223, - "grad_norm": 0.4691421091556549, - "learning_rate": 9.820998818751943e-06, - "loss": 0.4121, - "step": 4762 - }, - { - "epoch": 0.31128684399712436, - "grad_norm": 0.525909423828125, - "learning_rate": 9.820906209853682e-06, - "loss": 0.4613, - "step": 4763 - }, - { - "epoch": 0.3113521992026665, - "grad_norm": 0.5122382640838623, - "learning_rate": 9.820813577442192e-06, - "loss": 0.4174, - "step": 4764 - }, - { - "epoch": 0.3114175544082086, - "grad_norm": 0.46602943539619446, - "learning_rate": 9.820720921517925e-06, - "loss": 0.3721, - "step": 4765 - }, - { - "epoch": 0.31148290961375075, - "grad_norm": 0.5424133539199829, - "learning_rate": 9.820628242081332e-06, - "loss": 0.462, - "step": 4766 - }, - { - "epoch": 0.3115482648192929, - "grad_norm": 0.49611037969589233, - "learning_rate": 9.820535539132865e-06, - "loss": 0.4226, - "step": 4767 - }, - { - "epoch": 0.311613620024835, - "grad_norm": 0.4741397500038147, - "learning_rate": 9.820442812672974e-06, - "loss": 0.3786, - "step": 4768 - }, - { - "epoch": 0.3116789752303771, - "grad_norm": 0.5043719410896301, - "learning_rate": 9.820350062702117e-06, - "loss": 0.4327, - "step": 4769 - }, - { - "epoch": 0.3117443304359192, - "grad_norm": 0.47938835620880127, - "learning_rate": 9.82025728922074e-06, - "loss": 0.397, - "step": 4770 - }, - { - "epoch": 0.31180968564146133, - "grad_norm": 0.6575962901115417, - "learning_rate": 9.8201644922293e-06, - "loss": 0.4668, - "step": 4771 - }, - { - "epoch": 0.31187504084700346, - "grad_norm": 0.4484061598777771, - "learning_rate": 9.820071671728247e-06, - "loss": 0.3998, - "step": 4772 - }, - { - "epoch": 0.3119403960525456, - "grad_norm": 0.4660794734954834, - "learning_rate": 9.819978827718035e-06, - "loss": 0.3946, - "step": 4773 - }, - { - "epoch": 0.3120057512580877, - "grad_norm": 0.42437443137168884, - "learning_rate": 9.819885960199115e-06, - "loss": 0.3356, - "step": 4774 - }, - { - "epoch": 0.31207110646362984, - "grad_norm": 0.47052139043807983, - "learning_rate": 9.819793069171944e-06, - "loss": 0.3964, - "step": 4775 - }, - { - "epoch": 0.31213646166917197, - "grad_norm": 0.46570703387260437, - "learning_rate": 9.819700154636971e-06, - "loss": 0.4351, - "step": 4776 - }, - { - "epoch": 0.31220181687471404, - "grad_norm": 0.4962845742702484, - "learning_rate": 9.819607216594652e-06, - "loss": 0.4024, - "step": 4777 - }, - { - "epoch": 0.3122671720802562, - "grad_norm": 0.4872830808162689, - "learning_rate": 9.819514255045437e-06, - "loss": 0.4276, - "step": 4778 - }, - { - "epoch": 0.3123325272857983, - "grad_norm": 0.5150973200798035, - "learning_rate": 9.819421269989782e-06, - "loss": 0.4137, - "step": 4779 - }, - { - "epoch": 0.31239788249134043, - "grad_norm": 0.46941718459129333, - "learning_rate": 9.81932826142814e-06, - "loss": 0.3733, - "step": 4780 - }, - { - "epoch": 0.31246323769688256, - "grad_norm": 0.47068560123443604, - "learning_rate": 9.819235229360964e-06, - "loss": 0.3926, - "step": 4781 - }, - { - "epoch": 0.3125285929024247, - "grad_norm": 0.5268428325653076, - "learning_rate": 9.81914217378871e-06, - "loss": 0.4788, - "step": 4782 - }, - { - "epoch": 0.3125939481079668, - "grad_norm": 0.44260939955711365, - "learning_rate": 9.819049094711827e-06, - "loss": 0.3621, - "step": 4783 - }, - { - "epoch": 0.31265930331350894, - "grad_norm": 0.44305866956710815, - "learning_rate": 9.818955992130776e-06, - "loss": 0.3822, - "step": 4784 - }, - { - "epoch": 0.31272465851905107, - "grad_norm": 0.48154309391975403, - "learning_rate": 9.818862866046004e-06, - "loss": 0.4069, - "step": 4785 - }, - { - "epoch": 0.31279001372459314, - "grad_norm": 0.4962350130081177, - "learning_rate": 9.81876971645797e-06, - "loss": 0.4843, - "step": 4786 - }, - { - "epoch": 0.31285536893013527, - "grad_norm": 0.4804168939590454, - "learning_rate": 9.818676543367127e-06, - "loss": 0.4579, - "step": 4787 - }, - { - "epoch": 0.3129207241356774, - "grad_norm": 0.4673965275287628, - "learning_rate": 9.818583346773926e-06, - "loss": 0.4024, - "step": 4788 - }, - { - "epoch": 0.3129860793412195, - "grad_norm": 0.49661630392074585, - "learning_rate": 9.818490126678828e-06, - "loss": 0.4329, - "step": 4789 - }, - { - "epoch": 0.31305143454676165, - "grad_norm": 0.45884013175964355, - "learning_rate": 9.818396883082285e-06, - "loss": 0.3635, - "step": 4790 - }, - { - "epoch": 0.3131167897523038, - "grad_norm": 0.43619024753570557, - "learning_rate": 9.818303615984748e-06, - "loss": 0.3677, - "step": 4791 - }, - { - "epoch": 0.3131821449578459, - "grad_norm": 0.4732099771499634, - "learning_rate": 9.818210325386676e-06, - "loss": 0.4172, - "step": 4792 - }, - { - "epoch": 0.31324750016338804, - "grad_norm": 0.4724849462509155, - "learning_rate": 9.818117011288523e-06, - "loss": 0.4241, - "step": 4793 - }, - { - "epoch": 0.3133128553689301, - "grad_norm": 0.4618249237537384, - "learning_rate": 9.818023673690746e-06, - "loss": 0.3868, - "step": 4794 - }, - { - "epoch": 0.31337821057447224, - "grad_norm": 0.49833354353904724, - "learning_rate": 9.817930312593798e-06, - "loss": 0.4183, - "step": 4795 - }, - { - "epoch": 0.31344356578001437, - "grad_norm": 0.5455418229103088, - "learning_rate": 9.817836927998134e-06, - "loss": 0.4504, - "step": 4796 - }, - { - "epoch": 0.3135089209855565, - "grad_norm": 0.49523916840553284, - "learning_rate": 9.81774351990421e-06, - "loss": 0.4108, - "step": 4797 - }, - { - "epoch": 0.3135742761910986, - "grad_norm": 0.4911780059337616, - "learning_rate": 9.817650088312483e-06, - "loss": 0.3926, - "step": 4798 - }, - { - "epoch": 0.31363963139664075, - "grad_norm": 0.4689830243587494, - "learning_rate": 9.817556633223407e-06, - "loss": 0.3995, - "step": 4799 - }, - { - "epoch": 0.3137049866021829, - "grad_norm": 0.471271276473999, - "learning_rate": 9.817463154637436e-06, - "loss": 0.4288, - "step": 4800 - }, - { - "epoch": 0.313770341807725, - "grad_norm": 0.46512287855148315, - "learning_rate": 9.817369652555032e-06, - "loss": 0.404, - "step": 4801 - }, - { - "epoch": 0.31383569701326713, - "grad_norm": 0.44516071677207947, - "learning_rate": 9.817276126976645e-06, - "loss": 0.3895, - "step": 4802 - }, - { - "epoch": 0.3139010522188092, - "grad_norm": 0.4859333038330078, - "learning_rate": 9.817182577902733e-06, - "loss": 0.4241, - "step": 4803 - }, - { - "epoch": 0.31396640742435133, - "grad_norm": 0.44133713841438293, - "learning_rate": 9.817089005333754e-06, - "loss": 0.368, - "step": 4804 - }, - { - "epoch": 0.31403176262989346, - "grad_norm": 0.43846553564071655, - "learning_rate": 9.816995409270163e-06, - "loss": 0.3794, - "step": 4805 - }, - { - "epoch": 0.3140971178354356, - "grad_norm": 0.4645024538040161, - "learning_rate": 9.816901789712417e-06, - "loss": 0.3909, - "step": 4806 - }, - { - "epoch": 0.3141624730409777, - "grad_norm": 0.4686172902584076, - "learning_rate": 9.81680814666097e-06, - "loss": 0.3942, - "step": 4807 - }, - { - "epoch": 0.31422782824651985, - "grad_norm": 0.481277734041214, - "learning_rate": 9.816714480116284e-06, - "loss": 0.4025, - "step": 4808 - }, - { - "epoch": 0.314293183452062, - "grad_norm": 0.4675150513648987, - "learning_rate": 9.816620790078811e-06, - "loss": 0.3855, - "step": 4809 - }, - { - "epoch": 0.3143585386576041, - "grad_norm": 0.528398334980011, - "learning_rate": 9.81652707654901e-06, - "loss": 0.4621, - "step": 4810 - }, - { - "epoch": 0.3144238938631462, - "grad_norm": 0.4680522680282593, - "learning_rate": 9.816433339527338e-06, - "loss": 0.3746, - "step": 4811 - }, - { - "epoch": 0.3144892490686883, - "grad_norm": 0.4627244174480438, - "learning_rate": 9.816339579014253e-06, - "loss": 0.4109, - "step": 4812 - }, - { - "epoch": 0.31455460427423043, - "grad_norm": 0.43803465366363525, - "learning_rate": 9.81624579501021e-06, - "loss": 0.3649, - "step": 4813 - }, - { - "epoch": 0.31461995947977256, - "grad_norm": 0.49013015627861023, - "learning_rate": 9.816151987515669e-06, - "loss": 0.435, - "step": 4814 - }, - { - "epoch": 0.3146853146853147, - "grad_norm": 0.4839861989021301, - "learning_rate": 9.816058156531085e-06, - "loss": 0.4232, - "step": 4815 - }, - { - "epoch": 0.3147506698908568, - "grad_norm": 0.5070106387138367, - "learning_rate": 9.815964302056918e-06, - "loss": 0.4771, - "step": 4816 - }, - { - "epoch": 0.31481602509639894, - "grad_norm": 0.525214672088623, - "learning_rate": 9.815870424093623e-06, - "loss": 0.47, - "step": 4817 - }, - { - "epoch": 0.31488138030194107, - "grad_norm": 0.468043714761734, - "learning_rate": 9.81577652264166e-06, - "loss": 0.4163, - "step": 4818 - }, - { - "epoch": 0.31494673550748314, - "grad_norm": 0.4506476819515228, - "learning_rate": 9.815682597701488e-06, - "loss": 0.3898, - "step": 4819 - }, - { - "epoch": 0.3150120907130253, - "grad_norm": 0.45676279067993164, - "learning_rate": 9.815588649273565e-06, - "loss": 0.3912, - "step": 4820 - }, - { - "epoch": 0.3150774459185674, - "grad_norm": 0.4628327488899231, - "learning_rate": 9.815494677358344e-06, - "loss": 0.3812, - "step": 4821 - }, - { - "epoch": 0.31514280112410953, - "grad_norm": 0.4558892548084259, - "learning_rate": 9.815400681956288e-06, - "loss": 0.415, - "step": 4822 - }, - { - "epoch": 0.31520815632965166, - "grad_norm": 0.4446291923522949, - "learning_rate": 9.815306663067856e-06, - "loss": 0.3809, - "step": 4823 - }, - { - "epoch": 0.3152735115351938, - "grad_norm": 0.48152267932891846, - "learning_rate": 9.815212620693506e-06, - "loss": 0.4539, - "step": 4824 - }, - { - "epoch": 0.3153388667407359, - "grad_norm": 0.4603506326675415, - "learning_rate": 9.815118554833695e-06, - "loss": 0.3669, - "step": 4825 - }, - { - "epoch": 0.31540422194627804, - "grad_norm": 0.41335731744766235, - "learning_rate": 9.815024465488883e-06, - "loss": 0.3086, - "step": 4826 - }, - { - "epoch": 0.31546957715182017, - "grad_norm": 0.4538346827030182, - "learning_rate": 9.814930352659527e-06, - "loss": 0.4002, - "step": 4827 - }, - { - "epoch": 0.31553493235736224, - "grad_norm": 0.49037274718284607, - "learning_rate": 9.814836216346089e-06, - "loss": 0.4029, - "step": 4828 - }, - { - "epoch": 0.31560028756290437, - "grad_norm": 0.4771523177623749, - "learning_rate": 9.814742056549025e-06, - "loss": 0.3865, - "step": 4829 - }, - { - "epoch": 0.3156656427684465, - "grad_norm": 0.48241978883743286, - "learning_rate": 9.8146478732688e-06, - "loss": 0.4385, - "step": 4830 - }, - { - "epoch": 0.3157309979739886, - "grad_norm": 0.46045243740081787, - "learning_rate": 9.814553666505864e-06, - "loss": 0.4259, - "step": 4831 - }, - { - "epoch": 0.31579635317953075, - "grad_norm": 0.4759175777435303, - "learning_rate": 9.814459436260686e-06, - "loss": 0.4349, - "step": 4832 - }, - { - "epoch": 0.3158617083850729, - "grad_norm": 0.47514161467552185, - "learning_rate": 9.814365182533721e-06, - "loss": 0.4178, - "step": 4833 - }, - { - "epoch": 0.315927063590615, - "grad_norm": 0.44572609663009644, - "learning_rate": 9.814270905325428e-06, - "loss": 0.4036, - "step": 4834 - }, - { - "epoch": 0.31599241879615714, - "grad_norm": 0.44046252965927124, - "learning_rate": 9.814176604636268e-06, - "loss": 0.3579, - "step": 4835 - }, - { - "epoch": 0.3160577740016992, - "grad_norm": 0.5021373629570007, - "learning_rate": 9.8140822804667e-06, - "loss": 0.4373, - "step": 4836 - }, - { - "epoch": 0.31612312920724134, - "grad_norm": 0.46788740158081055, - "learning_rate": 9.813987932817185e-06, - "loss": 0.4139, - "step": 4837 - }, - { - "epoch": 0.31618848441278347, - "grad_norm": 0.4405035078525543, - "learning_rate": 9.813893561688186e-06, - "loss": 0.3801, - "step": 4838 - }, - { - "epoch": 0.3162538396183256, - "grad_norm": 0.4530717134475708, - "learning_rate": 9.813799167080157e-06, - "loss": 0.3852, - "step": 4839 - }, - { - "epoch": 0.3163191948238677, - "grad_norm": 0.43838727474212646, - "learning_rate": 9.813704748993564e-06, - "loss": 0.3637, - "step": 4840 - }, - { - "epoch": 0.31638455002940985, - "grad_norm": 0.46214601397514343, - "learning_rate": 9.813610307428866e-06, - "loss": 0.4053, - "step": 4841 - }, - { - "epoch": 0.316449905234952, - "grad_norm": 0.516463577747345, - "learning_rate": 9.81351584238652e-06, - "loss": 0.472, - "step": 4842 - }, - { - "epoch": 0.3165152604404941, - "grad_norm": 0.4470294117927551, - "learning_rate": 9.813421353866991e-06, - "loss": 0.3857, - "step": 4843 - }, - { - "epoch": 0.31658061564603623, - "grad_norm": 0.4604097902774811, - "learning_rate": 9.813326841870741e-06, - "loss": 0.3819, - "step": 4844 - }, - { - "epoch": 0.3166459708515783, - "grad_norm": 0.4638279378414154, - "learning_rate": 9.813232306398226e-06, - "loss": 0.3988, - "step": 4845 - }, - { - "epoch": 0.31671132605712043, - "grad_norm": 0.4969153106212616, - "learning_rate": 9.81313774744991e-06, - "loss": 0.4113, - "step": 4846 - }, - { - "epoch": 0.31677668126266256, - "grad_norm": 0.4891085922718048, - "learning_rate": 9.813043165026252e-06, - "loss": 0.3624, - "step": 4847 - }, - { - "epoch": 0.3168420364682047, - "grad_norm": 0.45764076709747314, - "learning_rate": 9.812948559127717e-06, - "loss": 0.3808, - "step": 4848 - }, - { - "epoch": 0.3169073916737468, - "grad_norm": 0.5100204348564148, - "learning_rate": 9.812853929754765e-06, - "loss": 0.4807, - "step": 4849 - }, - { - "epoch": 0.31697274687928895, - "grad_norm": 0.5149915814399719, - "learning_rate": 9.812759276907857e-06, - "loss": 0.4546, - "step": 4850 - }, - { - "epoch": 0.3170381020848311, - "grad_norm": 0.4715445339679718, - "learning_rate": 9.812664600587454e-06, - "loss": 0.4152, - "step": 4851 - }, - { - "epoch": 0.3171034572903732, - "grad_norm": 0.4651075303554535, - "learning_rate": 9.812569900794018e-06, - "loss": 0.3809, - "step": 4852 - }, - { - "epoch": 0.3171688124959153, - "grad_norm": 0.5031759142875671, - "learning_rate": 9.812475177528012e-06, - "loss": 0.4265, - "step": 4853 - }, - { - "epoch": 0.3172341677014574, - "grad_norm": 0.4432404637336731, - "learning_rate": 9.812380430789898e-06, - "loss": 0.402, - "step": 4854 - }, - { - "epoch": 0.31729952290699953, - "grad_norm": 0.4887496829032898, - "learning_rate": 9.812285660580136e-06, - "loss": 0.4299, - "step": 4855 - }, - { - "epoch": 0.31736487811254166, - "grad_norm": 0.5075775980949402, - "learning_rate": 9.81219086689919e-06, - "loss": 0.4039, - "step": 4856 - }, - { - "epoch": 0.3174302333180838, - "grad_norm": 0.4657699167728424, - "learning_rate": 9.812096049747524e-06, - "loss": 0.4107, - "step": 4857 - }, - { - "epoch": 0.3174955885236259, - "grad_norm": 0.4746883511543274, - "learning_rate": 9.812001209125597e-06, - "loss": 0.402, - "step": 4858 - }, - { - "epoch": 0.31756094372916804, - "grad_norm": 0.4992033541202545, - "learning_rate": 9.811906345033873e-06, - "loss": 0.4126, - "step": 4859 - }, - { - "epoch": 0.31762629893471017, - "grad_norm": 0.480905681848526, - "learning_rate": 9.811811457472813e-06, - "loss": 0.4132, - "step": 4860 - }, - { - "epoch": 0.31769165414025224, - "grad_norm": 0.4425283670425415, - "learning_rate": 9.811716546442884e-06, - "loss": 0.3699, - "step": 4861 - }, - { - "epoch": 0.3177570093457944, - "grad_norm": 0.46087726950645447, - "learning_rate": 9.811621611944547e-06, - "loss": 0.4009, - "step": 4862 - }, - { - "epoch": 0.3178223645513365, - "grad_norm": 0.4641392230987549, - "learning_rate": 9.811526653978262e-06, - "loss": 0.3651, - "step": 4863 - }, - { - "epoch": 0.31788771975687863, - "grad_norm": 0.49839305877685547, - "learning_rate": 9.811431672544496e-06, - "loss": 0.4535, - "step": 4864 - }, - { - "epoch": 0.31795307496242076, - "grad_norm": 0.4499477744102478, - "learning_rate": 9.811336667643711e-06, - "loss": 0.3932, - "step": 4865 - }, - { - "epoch": 0.3180184301679629, - "grad_norm": 0.4783080816268921, - "learning_rate": 9.81124163927637e-06, - "loss": 0.4182, - "step": 4866 - }, - { - "epoch": 0.318083785373505, - "grad_norm": 0.4732609689235687, - "learning_rate": 9.811146587442937e-06, - "loss": 0.3861, - "step": 4867 - }, - { - "epoch": 0.31814914057904714, - "grad_norm": 0.46453970670700073, - "learning_rate": 9.811051512143875e-06, - "loss": 0.3576, - "step": 4868 - }, - { - "epoch": 0.31821449578458927, - "grad_norm": 0.44703081250190735, - "learning_rate": 9.810956413379649e-06, - "loss": 0.349, - "step": 4869 - }, - { - "epoch": 0.31827985099013134, - "grad_norm": 0.4656713604927063, - "learning_rate": 9.81086129115072e-06, - "loss": 0.3806, - "step": 4870 - }, - { - "epoch": 0.31834520619567347, - "grad_norm": 0.46229737997055054, - "learning_rate": 9.810766145457556e-06, - "loss": 0.4023, - "step": 4871 - }, - { - "epoch": 0.3184105614012156, - "grad_norm": 0.4506048560142517, - "learning_rate": 9.810670976300618e-06, - "loss": 0.4009, - "step": 4872 - }, - { - "epoch": 0.3184759166067577, - "grad_norm": 0.5028387308120728, - "learning_rate": 9.810575783680369e-06, - "loss": 0.4266, - "step": 4873 - }, - { - "epoch": 0.31854127181229985, - "grad_norm": 0.47096943855285645, - "learning_rate": 9.810480567597278e-06, - "loss": 0.4004, - "step": 4874 - }, - { - "epoch": 0.318606627017842, - "grad_norm": 0.5066052079200745, - "learning_rate": 9.810385328051806e-06, - "loss": 0.3837, - "step": 4875 - }, - { - "epoch": 0.3186719822233841, - "grad_norm": 0.4741377830505371, - "learning_rate": 9.810290065044418e-06, - "loss": 0.3798, - "step": 4876 - }, - { - "epoch": 0.31873733742892624, - "grad_norm": 0.504368245601654, - "learning_rate": 9.81019477857558e-06, - "loss": 0.4111, - "step": 4877 - }, - { - "epoch": 0.3188026926344683, - "grad_norm": 0.44283339381217957, - "learning_rate": 9.810099468645756e-06, - "loss": 0.3882, - "step": 4878 - }, - { - "epoch": 0.31886804784001044, - "grad_norm": 0.44365501403808594, - "learning_rate": 9.810004135255409e-06, - "loss": 0.3829, - "step": 4879 - }, - { - "epoch": 0.31893340304555257, - "grad_norm": 0.4875386953353882, - "learning_rate": 9.809908778405006e-06, - "loss": 0.4594, - "step": 4880 - }, - { - "epoch": 0.3189987582510947, - "grad_norm": 0.49244803190231323, - "learning_rate": 9.80981339809501e-06, - "loss": 0.4057, - "step": 4881 - }, - { - "epoch": 0.3190641134566368, - "grad_norm": 0.4813857674598694, - "learning_rate": 9.80971799432589e-06, - "loss": 0.4603, - "step": 4882 - }, - { - "epoch": 0.31912946866217895, - "grad_norm": 0.47425293922424316, - "learning_rate": 9.809622567098108e-06, - "loss": 0.456, - "step": 4883 - }, - { - "epoch": 0.3191948238677211, - "grad_norm": 0.5302835702896118, - "learning_rate": 9.809527116412133e-06, - "loss": 0.4752, - "step": 4884 - }, - { - "epoch": 0.3192601790732632, - "grad_norm": 0.45904242992401123, - "learning_rate": 9.809431642268424e-06, - "loss": 0.4233, - "step": 4885 - }, - { - "epoch": 0.31932553427880533, - "grad_norm": 0.48194384574890137, - "learning_rate": 9.809336144667454e-06, - "loss": 0.4301, - "step": 4886 - }, - { - "epoch": 0.3193908894843474, - "grad_norm": 0.45269575715065, - "learning_rate": 9.809240623609683e-06, - "loss": 0.4144, - "step": 4887 - }, - { - "epoch": 0.31945624468988953, - "grad_norm": 0.43395453691482544, - "learning_rate": 9.809145079095581e-06, - "loss": 0.395, - "step": 4888 - }, - { - "epoch": 0.31952159989543166, - "grad_norm": 0.48187124729156494, - "learning_rate": 9.809049511125613e-06, - "loss": 0.4329, - "step": 4889 - }, - { - "epoch": 0.3195869551009738, - "grad_norm": 0.5337011218070984, - "learning_rate": 9.808953919700243e-06, - "loss": 0.4293, - "step": 4890 - }, - { - "epoch": 0.3196523103065159, - "grad_norm": 0.4339333176612854, - "learning_rate": 9.80885830481994e-06, - "loss": 0.3563, - "step": 4891 - }, - { - "epoch": 0.31971766551205805, - "grad_norm": 0.4830499291419983, - "learning_rate": 9.808762666485167e-06, - "loss": 0.4327, - "step": 4892 - }, - { - "epoch": 0.3197830207176002, - "grad_norm": 0.476817786693573, - "learning_rate": 9.808667004696394e-06, - "loss": 0.3727, - "step": 4893 - }, - { - "epoch": 0.3198483759231423, - "grad_norm": 0.5114434957504272, - "learning_rate": 9.808571319454085e-06, - "loss": 0.4815, - "step": 4894 - }, - { - "epoch": 0.3199137311286844, - "grad_norm": 0.46511292457580566, - "learning_rate": 9.80847561075871e-06, - "loss": 0.4011, - "step": 4895 - }, - { - "epoch": 0.3199790863342265, - "grad_norm": 0.49821165204048157, - "learning_rate": 9.808379878610732e-06, - "loss": 0.4239, - "step": 4896 - }, - { - "epoch": 0.32004444153976863, - "grad_norm": 0.4866395890712738, - "learning_rate": 9.80828412301062e-06, - "loss": 0.39, - "step": 4897 - }, - { - "epoch": 0.32010979674531076, - "grad_norm": 0.43412354588508606, - "learning_rate": 9.80818834395884e-06, - "loss": 0.3541, - "step": 4898 - }, - { - "epoch": 0.3201751519508529, - "grad_norm": 0.4595932960510254, - "learning_rate": 9.808092541455862e-06, - "loss": 0.4036, - "step": 4899 - }, - { - "epoch": 0.320240507156395, - "grad_norm": 0.46590059995651245, - "learning_rate": 9.807996715502148e-06, - "loss": 0.3805, - "step": 4900 - }, - { - "epoch": 0.32030586236193714, - "grad_norm": 0.5135805606842041, - "learning_rate": 9.80790086609817e-06, - "loss": 0.4093, - "step": 4901 - }, - { - "epoch": 0.32037121756747927, - "grad_norm": 0.4773502051830292, - "learning_rate": 9.807804993244394e-06, - "loss": 0.3762, - "step": 4902 - }, - { - "epoch": 0.32043657277302134, - "grad_norm": 0.44620808959007263, - "learning_rate": 9.807709096941287e-06, - "loss": 0.3836, - "step": 4903 - }, - { - "epoch": 0.3205019279785635, - "grad_norm": 0.4804670810699463, - "learning_rate": 9.807613177189318e-06, - "loss": 0.3777, - "step": 4904 - }, - { - "epoch": 0.3205672831841056, - "grad_norm": 0.44309714436531067, - "learning_rate": 9.807517233988952e-06, - "loss": 0.3586, - "step": 4905 - }, - { - "epoch": 0.32063263838964773, - "grad_norm": 0.5175981521606445, - "learning_rate": 9.80742126734066e-06, - "loss": 0.4161, - "step": 4906 - }, - { - "epoch": 0.32069799359518986, - "grad_norm": 0.514324426651001, - "learning_rate": 9.80732527724491e-06, - "loss": 0.5022, - "step": 4907 - }, - { - "epoch": 0.320763348800732, - "grad_norm": 0.4319520890712738, - "learning_rate": 9.807229263702169e-06, - "loss": 0.3607, - "step": 4908 - }, - { - "epoch": 0.3208287040062741, - "grad_norm": 0.5073283314704895, - "learning_rate": 9.807133226712905e-06, - "loss": 0.4502, - "step": 4909 - }, - { - "epoch": 0.32089405921181624, - "grad_norm": 0.4918001890182495, - "learning_rate": 9.807037166277586e-06, - "loss": 0.3678, - "step": 4910 - }, - { - "epoch": 0.32095941441735837, - "grad_norm": 0.4978146553039551, - "learning_rate": 9.806941082396683e-06, - "loss": 0.4138, - "step": 4911 - }, - { - "epoch": 0.32102476962290044, - "grad_norm": 0.47458863258361816, - "learning_rate": 9.806844975070662e-06, - "loss": 0.3617, - "step": 4912 - }, - { - "epoch": 0.32109012482844257, - "grad_norm": 0.5195441246032715, - "learning_rate": 9.806748844299994e-06, - "loss": 0.4032, - "step": 4913 - }, - { - "epoch": 0.3211554800339847, - "grad_norm": 0.4969855546951294, - "learning_rate": 9.806652690085146e-06, - "loss": 0.4334, - "step": 4914 - }, - { - "epoch": 0.3212208352395268, - "grad_norm": 0.48649662733078003, - "learning_rate": 9.806556512426586e-06, - "loss": 0.4287, - "step": 4915 - }, - { - "epoch": 0.32128619044506895, - "grad_norm": 0.49306175112724304, - "learning_rate": 9.806460311324787e-06, - "loss": 0.3836, - "step": 4916 - }, - { - "epoch": 0.3213515456506111, - "grad_norm": 0.5063831806182861, - "learning_rate": 9.806364086780216e-06, - "loss": 0.4104, - "step": 4917 - }, - { - "epoch": 0.3214169008561532, - "grad_norm": 0.4891044497489929, - "learning_rate": 9.80626783879334e-06, - "loss": 0.3936, - "step": 4918 - }, - { - "epoch": 0.32148225606169534, - "grad_norm": 0.5033397078514099, - "learning_rate": 9.806171567364633e-06, - "loss": 0.4287, - "step": 4919 - }, - { - "epoch": 0.3215476112672374, - "grad_norm": 0.5145201086997986, - "learning_rate": 9.806075272494562e-06, - "loss": 0.4038, - "step": 4920 - }, - { - "epoch": 0.32161296647277954, - "grad_norm": 0.4916360080242157, - "learning_rate": 9.805978954183595e-06, - "loss": 0.4277, - "step": 4921 - }, - { - "epoch": 0.32167832167832167, - "grad_norm": 0.5213127732276917, - "learning_rate": 9.805882612432205e-06, - "loss": 0.4565, - "step": 4922 - }, - { - "epoch": 0.3217436768838638, - "grad_norm": 0.508414626121521, - "learning_rate": 9.80578624724086e-06, - "loss": 0.3896, - "step": 4923 - }, - { - "epoch": 0.3218090320894059, - "grad_norm": 0.4595962464809418, - "learning_rate": 9.805689858610033e-06, - "loss": 0.4066, - "step": 4924 - }, - { - "epoch": 0.32187438729494805, - "grad_norm": 0.5002645254135132, - "learning_rate": 9.805593446540188e-06, - "loss": 0.4005, - "step": 4925 - }, - { - "epoch": 0.3219397425004902, - "grad_norm": 0.47116753458976746, - "learning_rate": 9.8054970110318e-06, - "loss": 0.3768, - "step": 4926 - }, - { - "epoch": 0.3220050977060323, - "grad_norm": 0.5051274299621582, - "learning_rate": 9.80540055208534e-06, - "loss": 0.4134, - "step": 4927 - }, - { - "epoch": 0.32207045291157443, - "grad_norm": 0.5116768479347229, - "learning_rate": 9.805304069701276e-06, - "loss": 0.4379, - "step": 4928 - }, - { - "epoch": 0.3221358081171165, - "grad_norm": 0.4674070477485657, - "learning_rate": 9.805207563880078e-06, - "loss": 0.4012, - "step": 4929 - }, - { - "epoch": 0.32220116332265863, - "grad_norm": 0.46312177181243896, - "learning_rate": 9.805111034622221e-06, - "loss": 0.4031, - "step": 4930 - }, - { - "epoch": 0.32226651852820076, - "grad_norm": 0.5625869035720825, - "learning_rate": 9.80501448192817e-06, - "loss": 0.4368, - "step": 4931 - }, - { - "epoch": 0.3223318737337429, - "grad_norm": 0.5150803923606873, - "learning_rate": 9.8049179057984e-06, - "loss": 0.4453, - "step": 4932 - }, - { - "epoch": 0.322397228939285, - "grad_norm": 0.49893876910209656, - "learning_rate": 9.80482130623338e-06, - "loss": 0.4186, - "step": 4933 - }, - { - "epoch": 0.32246258414482715, - "grad_norm": 0.5060268640518188, - "learning_rate": 9.804724683233584e-06, - "loss": 0.4821, - "step": 4934 - }, - { - "epoch": 0.3225279393503693, - "grad_norm": 0.4880949854850769, - "learning_rate": 9.80462803679948e-06, - "loss": 0.4248, - "step": 4935 - }, - { - "epoch": 0.3225932945559114, - "grad_norm": 0.445774644613266, - "learning_rate": 9.80453136693154e-06, - "loss": 0.3584, - "step": 4936 - }, - { - "epoch": 0.3226586497614535, - "grad_norm": 0.48850998282432556, - "learning_rate": 9.804434673630236e-06, - "loss": 0.4177, - "step": 4937 - }, - { - "epoch": 0.3227240049669956, - "grad_norm": 0.46730414032936096, - "learning_rate": 9.80433795689604e-06, - "loss": 0.4052, - "step": 4938 - }, - { - "epoch": 0.32278936017253773, - "grad_norm": 0.5012783408164978, - "learning_rate": 9.804241216729425e-06, - "loss": 0.4372, - "step": 4939 - }, - { - "epoch": 0.32285471537807986, - "grad_norm": 0.4370277523994446, - "learning_rate": 9.804144453130858e-06, - "loss": 0.3667, - "step": 4940 - }, - { - "epoch": 0.322920070583622, - "grad_norm": 0.521051287651062, - "learning_rate": 9.804047666100816e-06, - "loss": 0.4247, - "step": 4941 - }, - { - "epoch": 0.3229854257891641, - "grad_norm": 0.4966512620449066, - "learning_rate": 9.80395085563977e-06, - "loss": 0.3422, - "step": 4942 - }, - { - "epoch": 0.32305078099470624, - "grad_norm": 0.4762769639492035, - "learning_rate": 9.80385402174819e-06, - "loss": 0.4253, - "step": 4943 - }, - { - "epoch": 0.32311613620024837, - "grad_norm": 0.46476513147354126, - "learning_rate": 9.80375716442655e-06, - "loss": 0.3973, - "step": 4944 - }, - { - "epoch": 0.32318149140579044, - "grad_norm": 0.5150056481361389, - "learning_rate": 9.803660283675323e-06, - "loss": 0.4335, - "step": 4945 - }, - { - "epoch": 0.32324684661133257, - "grad_norm": 0.45104771852493286, - "learning_rate": 9.80356337949498e-06, - "loss": 0.3653, - "step": 4946 - }, - { - "epoch": 0.3233122018168747, - "grad_norm": 0.4779456555843353, - "learning_rate": 9.803466451885995e-06, - "loss": 0.3699, - "step": 4947 - }, - { - "epoch": 0.32337755702241683, - "grad_norm": 0.49230143427848816, - "learning_rate": 9.803369500848839e-06, - "loss": 0.4168, - "step": 4948 - }, - { - "epoch": 0.32344291222795896, - "grad_norm": 0.554871678352356, - "learning_rate": 9.803272526383985e-06, - "loss": 0.5157, - "step": 4949 - }, - { - "epoch": 0.3235082674335011, - "grad_norm": 0.45842504501342773, - "learning_rate": 9.803175528491909e-06, - "loss": 0.3702, - "step": 4950 - }, - { - "epoch": 0.3235736226390432, - "grad_norm": 0.4780445694923401, - "learning_rate": 9.80307850717308e-06, - "loss": 0.4227, - "step": 4951 - }, - { - "epoch": 0.32363897784458534, - "grad_norm": 0.4503099024295807, - "learning_rate": 9.802981462427975e-06, - "loss": 0.3619, - "step": 4952 - }, - { - "epoch": 0.32370433305012747, - "grad_norm": 0.970115602016449, - "learning_rate": 9.802884394257066e-06, - "loss": 0.3804, - "step": 4953 - }, - { - "epoch": 0.32376968825566954, - "grad_norm": 0.5114675760269165, - "learning_rate": 9.802787302660823e-06, - "loss": 0.3734, - "step": 4954 - }, - { - "epoch": 0.32383504346121167, - "grad_norm": 0.5145630836486816, - "learning_rate": 9.802690187639725e-06, - "loss": 0.434, - "step": 4955 - }, - { - "epoch": 0.3239003986667538, - "grad_norm": 0.46561309695243835, - "learning_rate": 9.802593049194243e-06, - "loss": 0.3827, - "step": 4956 - }, - { - "epoch": 0.3239657538722959, - "grad_norm": 0.5032932758331299, - "learning_rate": 9.80249588732485e-06, - "loss": 0.3679, - "step": 4957 - }, - { - "epoch": 0.32403110907783805, - "grad_norm": 0.4980396628379822, - "learning_rate": 9.80239870203202e-06, - "loss": 0.416, - "step": 4958 - }, - { - "epoch": 0.3240964642833802, - "grad_norm": 0.4916515052318573, - "learning_rate": 9.802301493316229e-06, - "loss": 0.3937, - "step": 4959 - }, - { - "epoch": 0.3241618194889223, - "grad_norm": 0.48291802406311035, - "learning_rate": 9.80220426117795e-06, - "loss": 0.4344, - "step": 4960 - }, - { - "epoch": 0.32422717469446444, - "grad_norm": 0.5286703705787659, - "learning_rate": 9.802107005617658e-06, - "loss": 0.4356, - "step": 4961 - }, - { - "epoch": 0.3242925299000065, - "grad_norm": 0.4947613775730133, - "learning_rate": 9.802009726635825e-06, - "loss": 0.4141, - "step": 4962 - }, - { - "epoch": 0.32435788510554864, - "grad_norm": 0.4657216966152191, - "learning_rate": 9.801912424232928e-06, - "loss": 0.4074, - "step": 4963 - }, - { - "epoch": 0.32442324031109077, - "grad_norm": 0.4662453830242157, - "learning_rate": 9.801815098409439e-06, - "loss": 0.3998, - "step": 4964 - }, - { - "epoch": 0.3244885955166329, - "grad_norm": 0.46382343769073486, - "learning_rate": 9.801717749165835e-06, - "loss": 0.371, - "step": 4965 - }, - { - "epoch": 0.324553950722175, - "grad_norm": 0.46157434582710266, - "learning_rate": 9.801620376502592e-06, - "loss": 0.3621, - "step": 4966 - }, - { - "epoch": 0.32461930592771715, - "grad_norm": 0.47322601079940796, - "learning_rate": 9.80152298042018e-06, - "loss": 0.4423, - "step": 4967 - }, - { - "epoch": 0.3246846611332593, - "grad_norm": 0.5009423494338989, - "learning_rate": 9.80142556091908e-06, - "loss": 0.4561, - "step": 4968 - }, - { - "epoch": 0.3247500163388014, - "grad_norm": 0.4587727189064026, - "learning_rate": 9.801328117999762e-06, - "loss": 0.3871, - "step": 4969 - }, - { - "epoch": 0.32481537154434353, - "grad_norm": 0.46929559111595154, - "learning_rate": 9.801230651662703e-06, - "loss": 0.4181, - "step": 4970 - }, - { - "epoch": 0.3248807267498856, - "grad_norm": 0.5121802687644958, - "learning_rate": 9.80113316190838e-06, - "loss": 0.4297, - "step": 4971 - }, - { - "epoch": 0.32494608195542773, - "grad_norm": 0.5123001933097839, - "learning_rate": 9.801035648737266e-06, - "loss": 0.4747, - "step": 4972 - }, - { - "epoch": 0.32501143716096986, - "grad_norm": 0.461913526058197, - "learning_rate": 9.80093811214984e-06, - "loss": 0.3989, - "step": 4973 - }, - { - "epoch": 0.325076792366512, - "grad_norm": 0.441133975982666, - "learning_rate": 9.800840552146576e-06, - "loss": 0.3403, - "step": 4974 - }, - { - "epoch": 0.3251421475720541, - "grad_norm": 0.4553835988044739, - "learning_rate": 9.800742968727947e-06, - "loss": 0.3905, - "step": 4975 - }, - { - "epoch": 0.32520750277759625, - "grad_norm": 0.4045025706291199, - "learning_rate": 9.800645361894432e-06, - "loss": 0.3317, - "step": 4976 - }, - { - "epoch": 0.3252728579831384, - "grad_norm": 0.4598342478275299, - "learning_rate": 9.800547731646508e-06, - "loss": 0.3968, - "step": 4977 - }, - { - "epoch": 0.3253382131886805, - "grad_norm": 0.45731550455093384, - "learning_rate": 9.800450077984648e-06, - "loss": 0.3762, - "step": 4978 - }, - { - "epoch": 0.3254035683942226, - "grad_norm": 0.504328727722168, - "learning_rate": 9.800352400909331e-06, - "loss": 0.4328, - "step": 4979 - }, - { - "epoch": 0.3254689235997647, - "grad_norm": 0.4817037582397461, - "learning_rate": 9.800254700421032e-06, - "loss": 0.4082, - "step": 4980 - }, - { - "epoch": 0.32553427880530683, - "grad_norm": 0.5107463002204895, - "learning_rate": 9.800156976520227e-06, - "loss": 0.4378, - "step": 4981 - }, - { - "epoch": 0.32559963401084896, - "grad_norm": 0.44809725880622864, - "learning_rate": 9.800059229207394e-06, - "loss": 0.3974, - "step": 4982 - }, - { - "epoch": 0.3256649892163911, - "grad_norm": 0.4267743229866028, - "learning_rate": 9.799961458483011e-06, - "loss": 0.3727, - "step": 4983 - }, - { - "epoch": 0.3257303444219332, - "grad_norm": 0.4678683876991272, - "learning_rate": 9.79986366434755e-06, - "loss": 0.41, - "step": 4984 - }, - { - "epoch": 0.32579569962747534, - "grad_norm": 0.4843780994415283, - "learning_rate": 9.799765846801494e-06, - "loss": 0.4251, - "step": 4985 - }, - { - "epoch": 0.32586105483301747, - "grad_norm": 0.4673929512500763, - "learning_rate": 9.799668005845315e-06, - "loss": 0.3929, - "step": 4986 - }, - { - "epoch": 0.32592641003855954, - "grad_norm": 0.47374188899993896, - "learning_rate": 9.799570141479493e-06, - "loss": 0.4159, - "step": 4987 - }, - { - "epoch": 0.32599176524410167, - "grad_norm": 0.443036288022995, - "learning_rate": 9.799472253704504e-06, - "loss": 0.3666, - "step": 4988 - }, - { - "epoch": 0.3260571204496438, - "grad_norm": 0.44358721375465393, - "learning_rate": 9.799374342520829e-06, - "loss": 0.3487, - "step": 4989 - }, - { - "epoch": 0.32612247565518593, - "grad_norm": 0.4454592764377594, - "learning_rate": 9.799276407928938e-06, - "loss": 0.3666, - "step": 4990 - }, - { - "epoch": 0.32618783086072806, - "grad_norm": 0.47238078713417053, - "learning_rate": 9.799178449929318e-06, - "loss": 0.3851, - "step": 4991 - }, - { - "epoch": 0.3262531860662702, - "grad_norm": 0.5328107476234436, - "learning_rate": 9.799080468522439e-06, - "loss": 0.4467, - "step": 4992 - }, - { - "epoch": 0.3263185412718123, - "grad_norm": 0.4789188504219055, - "learning_rate": 9.798982463708785e-06, - "loss": 0.4206, - "step": 4993 - }, - { - "epoch": 0.32638389647735444, - "grad_norm": 0.4428007900714874, - "learning_rate": 9.798884435488829e-06, - "loss": 0.3313, - "step": 4994 - }, - { - "epoch": 0.32644925168289657, - "grad_norm": 0.4746893346309662, - "learning_rate": 9.79878638386305e-06, - "loss": 0.4084, - "step": 4995 - }, - { - "epoch": 0.32651460688843864, - "grad_norm": 0.4389726221561432, - "learning_rate": 9.79868830883193e-06, - "loss": 0.314, - "step": 4996 - }, - { - "epoch": 0.32657996209398077, - "grad_norm": 0.45682990550994873, - "learning_rate": 9.798590210395943e-06, - "loss": 0.3771, - "step": 4997 - }, - { - "epoch": 0.3266453172995229, - "grad_norm": 0.48574742674827576, - "learning_rate": 9.79849208855557e-06, - "loss": 0.409, - "step": 4998 - }, - { - "epoch": 0.326710672505065, - "grad_norm": 0.7528083324432373, - "learning_rate": 9.798393943311286e-06, - "loss": 0.3808, - "step": 4999 - }, - { - "epoch": 0.32677602771060715, - "grad_norm": 0.5345405340194702, - "learning_rate": 9.798295774663576e-06, - "loss": 0.4426, - "step": 5000 - }, - { - "epoch": 0.3268413829161493, - "grad_norm": 0.5063375234603882, - "learning_rate": 9.798197582612914e-06, - "loss": 0.4368, - "step": 5001 - }, - { - "epoch": 0.3269067381216914, - "grad_norm": 0.4531327188014984, - "learning_rate": 9.79809936715978e-06, - "loss": 0.3938, - "step": 5002 - }, - { - "epoch": 0.32697209332723354, - "grad_norm": 0.4788791835308075, - "learning_rate": 9.798001128304652e-06, - "loss": 0.4262, - "step": 5003 - }, - { - "epoch": 0.3270374485327756, - "grad_norm": 0.4856942296028137, - "learning_rate": 9.79790286604801e-06, - "loss": 0.407, - "step": 5004 - }, - { - "epoch": 0.32710280373831774, - "grad_norm": 0.48944294452667236, - "learning_rate": 9.797804580390337e-06, - "loss": 0.3982, - "step": 5005 - }, - { - "epoch": 0.32716815894385987, - "grad_norm": 0.45362338423728943, - "learning_rate": 9.797706271332106e-06, - "loss": 0.3587, - "step": 5006 - }, - { - "epoch": 0.327233514149402, - "grad_norm": 0.4958278238773346, - "learning_rate": 9.7976079388738e-06, - "loss": 0.4072, - "step": 5007 - }, - { - "epoch": 0.3272988693549441, - "grad_norm": 0.48262861371040344, - "learning_rate": 9.797509583015898e-06, - "loss": 0.3975, - "step": 5008 - }, - { - "epoch": 0.32736422456048625, - "grad_norm": 0.48696309328079224, - "learning_rate": 9.79741120375888e-06, - "loss": 0.4068, - "step": 5009 - }, - { - "epoch": 0.3274295797660284, - "grad_norm": 0.44781172275543213, - "learning_rate": 9.797312801103227e-06, - "loss": 0.3859, - "step": 5010 - }, - { - "epoch": 0.3274949349715705, - "grad_norm": 0.4755784273147583, - "learning_rate": 9.797214375049416e-06, - "loss": 0.4057, - "step": 5011 - }, - { - "epoch": 0.32756029017711263, - "grad_norm": 0.4229740500450134, - "learning_rate": 9.797115925597929e-06, - "loss": 0.3708, - "step": 5012 - }, - { - "epoch": 0.3276256453826547, - "grad_norm": 0.4717099368572235, - "learning_rate": 9.797017452749245e-06, - "loss": 0.4248, - "step": 5013 - }, - { - "epoch": 0.32769100058819683, - "grad_norm": 0.5107245445251465, - "learning_rate": 9.796918956503845e-06, - "loss": 0.429, - "step": 5014 - }, - { - "epoch": 0.32775635579373896, - "grad_norm": 0.4377608299255371, - "learning_rate": 9.796820436862212e-06, - "loss": 0.363, - "step": 5015 - }, - { - "epoch": 0.3278217109992811, - "grad_norm": 0.45328426361083984, - "learning_rate": 9.79672189382482e-06, - "loss": 0.3815, - "step": 5016 - }, - { - "epoch": 0.3278870662048232, - "grad_norm": 0.47197818756103516, - "learning_rate": 9.796623327392156e-06, - "loss": 0.4267, - "step": 5017 - }, - { - "epoch": 0.32795242141036535, - "grad_norm": 0.5229854583740234, - "learning_rate": 9.796524737564697e-06, - "loss": 0.4421, - "step": 5018 - }, - { - "epoch": 0.3280177766159075, - "grad_norm": 0.522213876247406, - "learning_rate": 9.796426124342927e-06, - "loss": 0.4714, - "step": 5019 - }, - { - "epoch": 0.3280831318214496, - "grad_norm": 0.4774869680404663, - "learning_rate": 9.796327487727324e-06, - "loss": 0.4045, - "step": 5020 - }, - { - "epoch": 0.3281484870269917, - "grad_norm": 0.4742977023124695, - "learning_rate": 9.796228827718371e-06, - "loss": 0.4321, - "step": 5021 - }, - { - "epoch": 0.3282138422325338, - "grad_norm": 0.4220803678035736, - "learning_rate": 9.796130144316547e-06, - "loss": 0.3275, - "step": 5022 - }, - { - "epoch": 0.32827919743807593, - "grad_norm": 0.5076951384544373, - "learning_rate": 9.796031437522335e-06, - "loss": 0.4535, - "step": 5023 - }, - { - "epoch": 0.32834455264361806, - "grad_norm": 0.47955322265625, - "learning_rate": 9.795932707336218e-06, - "loss": 0.4473, - "step": 5024 - }, - { - "epoch": 0.3284099078491602, - "grad_norm": 0.4727722406387329, - "learning_rate": 9.795833953758674e-06, - "loss": 0.3898, - "step": 5025 - }, - { - "epoch": 0.3284752630547023, - "grad_norm": 1.5456161499023438, - "learning_rate": 9.795735176790187e-06, - "loss": 0.417, - "step": 5026 - }, - { - "epoch": 0.32854061826024444, - "grad_norm": 0.43937140703201294, - "learning_rate": 9.795636376431239e-06, - "loss": 0.3395, - "step": 5027 - }, - { - "epoch": 0.32860597346578657, - "grad_norm": 0.44527506828308105, - "learning_rate": 9.795537552682307e-06, - "loss": 0.3617, - "step": 5028 - }, - { - "epoch": 0.32867132867132864, - "grad_norm": 0.45572924613952637, - "learning_rate": 9.795438705543883e-06, - "loss": 0.3687, - "step": 5029 - }, - { - "epoch": 0.32873668387687077, - "grad_norm": 0.4578344523906708, - "learning_rate": 9.795339835016439e-06, - "loss": 0.3938, - "step": 5030 - }, - { - "epoch": 0.3288020390824129, - "grad_norm": 0.43646878004074097, - "learning_rate": 9.795240941100462e-06, - "loss": 0.356, - "step": 5031 - }, - { - "epoch": 0.32886739428795503, - "grad_norm": 0.4848819375038147, - "learning_rate": 9.795142023796434e-06, - "loss": 0.392, - "step": 5032 - }, - { - "epoch": 0.32893274949349716, - "grad_norm": 0.4681786298751831, - "learning_rate": 9.795043083104838e-06, - "loss": 0.3809, - "step": 5033 - }, - { - "epoch": 0.3289981046990393, - "grad_norm": 0.45695218443870544, - "learning_rate": 9.794944119026154e-06, - "loss": 0.3961, - "step": 5034 - }, - { - "epoch": 0.3290634599045814, - "grad_norm": 0.43824487924575806, - "learning_rate": 9.794845131560869e-06, - "loss": 0.3477, - "step": 5035 - }, - { - "epoch": 0.32912881511012354, - "grad_norm": 0.4756670594215393, - "learning_rate": 9.794746120709461e-06, - "loss": 0.4306, - "step": 5036 - }, - { - "epoch": 0.32919417031566567, - "grad_norm": 0.4912284016609192, - "learning_rate": 9.794647086472416e-06, - "loss": 0.4074, - "step": 5037 - }, - { - "epoch": 0.32925952552120774, - "grad_norm": 0.4648467004299164, - "learning_rate": 9.794548028850215e-06, - "loss": 0.3952, - "step": 5038 - }, - { - "epoch": 0.32932488072674987, - "grad_norm": 0.42651990056037903, - "learning_rate": 9.794448947843345e-06, - "loss": 0.3829, - "step": 5039 - }, - { - "epoch": 0.329390235932292, - "grad_norm": 0.42729347944259644, - "learning_rate": 9.794349843452284e-06, - "loss": 0.3613, - "step": 5040 - }, - { - "epoch": 0.3294555911378341, - "grad_norm": 0.4726579487323761, - "learning_rate": 9.794250715677518e-06, - "loss": 0.3726, - "step": 5041 - }, - { - "epoch": 0.32952094634337625, - "grad_norm": 0.45628806948661804, - "learning_rate": 9.794151564519532e-06, - "loss": 0.4173, - "step": 5042 - }, - { - "epoch": 0.3295863015489184, - "grad_norm": 0.43488210439682007, - "learning_rate": 9.794052389978806e-06, - "loss": 0.3653, - "step": 5043 - }, - { - "epoch": 0.3296516567544605, - "grad_norm": 0.4603349566459656, - "learning_rate": 9.793953192055826e-06, - "loss": 0.3667, - "step": 5044 - }, - { - "epoch": 0.32971701196000264, - "grad_norm": 0.4581151604652405, - "learning_rate": 9.793853970751077e-06, - "loss": 0.3619, - "step": 5045 - }, - { - "epoch": 0.3297823671655447, - "grad_norm": 0.4290536642074585, - "learning_rate": 9.793754726065042e-06, - "loss": 0.3468, - "step": 5046 - }, - { - "epoch": 0.32984772237108684, - "grad_norm": 0.4620216488838196, - "learning_rate": 9.793655457998202e-06, - "loss": 0.3679, - "step": 5047 - }, - { - "epoch": 0.32991307757662897, - "grad_norm": 0.4647311568260193, - "learning_rate": 9.793556166551045e-06, - "loss": 0.3801, - "step": 5048 - }, - { - "epoch": 0.3299784327821711, - "grad_norm": 0.4497399628162384, - "learning_rate": 9.793456851724053e-06, - "loss": 0.3709, - "step": 5049 - }, - { - "epoch": 0.3300437879877132, - "grad_norm": 0.4660644829273224, - "learning_rate": 9.793357513517711e-06, - "loss": 0.4166, - "step": 5050 - }, - { - "epoch": 0.33010914319325535, - "grad_norm": 0.4483477473258972, - "learning_rate": 9.793258151932505e-06, - "loss": 0.4081, - "step": 5051 - }, - { - "epoch": 0.3301744983987975, - "grad_norm": 0.4496282637119293, - "learning_rate": 9.79315876696892e-06, - "loss": 0.3476, - "step": 5052 - }, - { - "epoch": 0.3302398536043396, - "grad_norm": 0.45826923847198486, - "learning_rate": 9.793059358627437e-06, - "loss": 0.4207, - "step": 5053 - }, - { - "epoch": 0.33030520880988173, - "grad_norm": 0.47262704372406006, - "learning_rate": 9.792959926908543e-06, - "loss": 0.4522, - "step": 5054 - }, - { - "epoch": 0.3303705640154238, - "grad_norm": 0.4803999662399292, - "learning_rate": 9.792860471812723e-06, - "loss": 0.4425, - "step": 5055 - }, - { - "epoch": 0.33043591922096593, - "grad_norm": 0.5056366920471191, - "learning_rate": 9.792760993340463e-06, - "loss": 0.4312, - "step": 5056 - }, - { - "epoch": 0.33050127442650806, - "grad_norm": 0.480058491230011, - "learning_rate": 9.792661491492247e-06, - "loss": 0.4309, - "step": 5057 - }, - { - "epoch": 0.3305666296320502, - "grad_norm": 0.45647209882736206, - "learning_rate": 9.79256196626856e-06, - "loss": 0.4007, - "step": 5058 - }, - { - "epoch": 0.3306319848375923, - "grad_norm": 0.4690300524234772, - "learning_rate": 9.792462417669887e-06, - "loss": 0.4277, - "step": 5059 - }, - { - "epoch": 0.33069734004313445, - "grad_norm": 0.4632362127304077, - "learning_rate": 9.792362845696716e-06, - "loss": 0.3772, - "step": 5060 - }, - { - "epoch": 0.3307626952486766, - "grad_norm": 0.45691102743148804, - "learning_rate": 9.792263250349532e-06, - "loss": 0.4073, - "step": 5061 - }, - { - "epoch": 0.3308280504542187, - "grad_norm": 0.47819358110427856, - "learning_rate": 9.79216363162882e-06, - "loss": 0.4256, - "step": 5062 - }, - { - "epoch": 0.3308934056597608, - "grad_norm": 0.508348286151886, - "learning_rate": 9.792063989535064e-06, - "loss": 0.4182, - "step": 5063 - }, - { - "epoch": 0.3309587608653029, - "grad_norm": 0.48340311646461487, - "learning_rate": 9.791964324068753e-06, - "loss": 0.3991, - "step": 5064 - }, - { - "epoch": 0.33102411607084503, - "grad_norm": 0.49286407232284546, - "learning_rate": 9.791864635230372e-06, - "loss": 0.429, - "step": 5065 - }, - { - "epoch": 0.33108947127638716, - "grad_norm": 0.4674474895000458, - "learning_rate": 9.791764923020407e-06, - "loss": 0.3437, - "step": 5066 - }, - { - "epoch": 0.3311548264819293, - "grad_norm": 0.45047426223754883, - "learning_rate": 9.791665187439344e-06, - "loss": 0.3666, - "step": 5067 - }, - { - "epoch": 0.3312201816874714, - "grad_norm": 0.5228461027145386, - "learning_rate": 9.791565428487668e-06, - "loss": 0.4622, - "step": 5068 - }, - { - "epoch": 0.33128553689301354, - "grad_norm": 0.48304784297943115, - "learning_rate": 9.79146564616587e-06, - "loss": 0.4281, - "step": 5069 - }, - { - "epoch": 0.33135089209855567, - "grad_norm": 0.466929167509079, - "learning_rate": 9.791365840474434e-06, - "loss": 0.3524, - "step": 5070 - }, - { - "epoch": 0.33141624730409774, - "grad_norm": 0.5755605697631836, - "learning_rate": 9.791266011413846e-06, - "loss": 0.4478, - "step": 5071 - }, - { - "epoch": 0.33148160250963987, - "grad_norm": 0.5303314328193665, - "learning_rate": 9.791166158984593e-06, - "loss": 0.4259, - "step": 5072 - }, - { - "epoch": 0.331546957715182, - "grad_norm": 0.48098957538604736, - "learning_rate": 9.791066283187165e-06, - "loss": 0.3562, - "step": 5073 - }, - { - "epoch": 0.33161231292072413, - "grad_norm": 0.46910491585731506, - "learning_rate": 9.790966384022047e-06, - "loss": 0.3722, - "step": 5074 - }, - { - "epoch": 0.33167766812626626, - "grad_norm": 0.4961332380771637, - "learning_rate": 9.790866461489725e-06, - "loss": 0.3984, - "step": 5075 - }, - { - "epoch": 0.3317430233318084, - "grad_norm": 0.49478527903556824, - "learning_rate": 9.790766515590688e-06, - "loss": 0.4149, - "step": 5076 - }, - { - "epoch": 0.3318083785373505, - "grad_norm": 0.47687938809394836, - "learning_rate": 9.790666546325422e-06, - "loss": 0.4127, - "step": 5077 - }, - { - "epoch": 0.33187373374289264, - "grad_norm": 0.4761548340320587, - "learning_rate": 9.790566553694415e-06, - "loss": 0.3927, - "step": 5078 - }, - { - "epoch": 0.33193908894843477, - "grad_norm": 0.49564751982688904, - "learning_rate": 9.790466537698157e-06, - "loss": 0.4073, - "step": 5079 - }, - { - "epoch": 0.33200444415397684, - "grad_norm": 0.46131080389022827, - "learning_rate": 9.790366498337134e-06, - "loss": 0.368, - "step": 5080 - }, - { - "epoch": 0.33206979935951897, - "grad_norm": 0.46849748492240906, - "learning_rate": 9.790266435611835e-06, - "loss": 0.3266, - "step": 5081 - }, - { - "epoch": 0.3321351545650611, - "grad_norm": 0.45079654455184937, - "learning_rate": 9.790166349522745e-06, - "loss": 0.3825, - "step": 5082 - }, - { - "epoch": 0.3322005097706032, - "grad_norm": 0.49253472685813904, - "learning_rate": 9.790066240070355e-06, - "loss": 0.4122, - "step": 5083 - }, - { - "epoch": 0.33226586497614535, - "grad_norm": 0.45975279808044434, - "learning_rate": 9.789966107255154e-06, - "loss": 0.3643, - "step": 5084 - }, - { - "epoch": 0.3323312201816875, - "grad_norm": 0.48659080266952515, - "learning_rate": 9.789865951077626e-06, - "loss": 0.4521, - "step": 5085 - }, - { - "epoch": 0.3323965753872296, - "grad_norm": 0.5242331027984619, - "learning_rate": 9.789765771538264e-06, - "loss": 0.4395, - "step": 5086 - }, - { - "epoch": 0.33246193059277174, - "grad_norm": 0.5002502799034119, - "learning_rate": 9.789665568637556e-06, - "loss": 0.4354, - "step": 5087 - }, - { - "epoch": 0.3325272857983138, - "grad_norm": 0.4841715395450592, - "learning_rate": 9.789565342375989e-06, - "loss": 0.379, - "step": 5088 - }, - { - "epoch": 0.33259264100385594, - "grad_norm": 0.4892784357070923, - "learning_rate": 9.78946509275405e-06, - "loss": 0.4251, - "step": 5089 - }, - { - "epoch": 0.33265799620939807, - "grad_norm": 0.4734096825122833, - "learning_rate": 9.789364819772233e-06, - "loss": 0.4023, - "step": 5090 - }, - { - "epoch": 0.3327233514149402, - "grad_norm": 0.515963077545166, - "learning_rate": 9.789264523431026e-06, - "loss": 0.4709, - "step": 5091 - }, - { - "epoch": 0.3327887066204823, - "grad_norm": 0.48209792375564575, - "learning_rate": 9.789164203730915e-06, - "loss": 0.4056, - "step": 5092 - }, - { - "epoch": 0.33285406182602445, - "grad_norm": 0.5084713697433472, - "learning_rate": 9.78906386067239e-06, - "loss": 0.4286, - "step": 5093 - }, - { - "epoch": 0.3329194170315666, - "grad_norm": 0.4530640244483948, - "learning_rate": 9.788963494255943e-06, - "loss": 0.3542, - "step": 5094 - }, - { - "epoch": 0.3329847722371087, - "grad_norm": 0.4868583083152771, - "learning_rate": 9.788863104482062e-06, - "loss": 0.4345, - "step": 5095 - }, - { - "epoch": 0.33305012744265083, - "grad_norm": 0.45594459772109985, - "learning_rate": 9.788762691351235e-06, - "loss": 0.3618, - "step": 5096 - }, - { - "epoch": 0.3331154826481929, - "grad_norm": 0.4857982397079468, - "learning_rate": 9.788662254863955e-06, - "loss": 0.4055, - "step": 5097 - }, - { - "epoch": 0.33318083785373503, - "grad_norm": 0.4911488890647888, - "learning_rate": 9.78856179502071e-06, - "loss": 0.3894, - "step": 5098 - }, - { - "epoch": 0.33324619305927716, - "grad_norm": 0.5101039409637451, - "learning_rate": 9.788461311821989e-06, - "loss": 0.4088, - "step": 5099 - }, - { - "epoch": 0.3333115482648193, - "grad_norm": 0.4890715777873993, - "learning_rate": 9.788360805268285e-06, - "loss": 0.4507, - "step": 5100 - }, - { - "epoch": 0.3333769034703614, - "grad_norm": 0.4909462034702301, - "learning_rate": 9.788260275360087e-06, - "loss": 0.437, - "step": 5101 - }, - { - "epoch": 0.33344225867590355, - "grad_norm": 0.4736488163471222, - "learning_rate": 9.788159722097883e-06, - "loss": 0.4016, - "step": 5102 - }, - { - "epoch": 0.3335076138814457, - "grad_norm": 0.5087499618530273, - "learning_rate": 9.788059145482166e-06, - "loss": 0.454, - "step": 5103 - }, - { - "epoch": 0.3335729690869878, - "grad_norm": 0.4919663965702057, - "learning_rate": 9.787958545513425e-06, - "loss": 0.4448, - "step": 5104 - }, - { - "epoch": 0.3336383242925299, - "grad_norm": 0.49457287788391113, - "learning_rate": 9.787857922192151e-06, - "loss": 0.3984, - "step": 5105 - }, - { - "epoch": 0.333703679498072, - "grad_norm": 0.4914587736129761, - "learning_rate": 9.787757275518837e-06, - "loss": 0.4475, - "step": 5106 - }, - { - "epoch": 0.33376903470361413, - "grad_norm": 0.4439575672149658, - "learning_rate": 9.787656605493971e-06, - "loss": 0.3585, - "step": 5107 - }, - { - "epoch": 0.33383438990915626, - "grad_norm": 0.44091567397117615, - "learning_rate": 9.787555912118047e-06, - "loss": 0.3979, - "step": 5108 - }, - { - "epoch": 0.3338997451146984, - "grad_norm": 0.45893609523773193, - "learning_rate": 9.787455195391554e-06, - "loss": 0.3796, - "step": 5109 - }, - { - "epoch": 0.3339651003202405, - "grad_norm": 0.44441351294517517, - "learning_rate": 9.787354455314981e-06, - "loss": 0.3589, - "step": 5110 - }, - { - "epoch": 0.33403045552578264, - "grad_norm": 0.48110440373420715, - "learning_rate": 9.787253691888822e-06, - "loss": 0.3875, - "step": 5111 - }, - { - "epoch": 0.33409581073132477, - "grad_norm": 0.46581965684890747, - "learning_rate": 9.78715290511357e-06, - "loss": 0.4347, - "step": 5112 - }, - { - "epoch": 0.33416116593686684, - "grad_norm": 0.46184372901916504, - "learning_rate": 9.787052094989716e-06, - "loss": 0.3464, - "step": 5113 - }, - { - "epoch": 0.33422652114240897, - "grad_norm": 0.45507046580314636, - "learning_rate": 9.786951261517747e-06, - "loss": 0.3647, - "step": 5114 - }, - { - "epoch": 0.3342918763479511, - "grad_norm": 0.4871494174003601, - "learning_rate": 9.78685040469816e-06, - "loss": 0.3937, - "step": 5115 - }, - { - "epoch": 0.33435723155349323, - "grad_norm": 0.48447662591934204, - "learning_rate": 9.786749524531446e-06, - "loss": 0.379, - "step": 5116 - }, - { - "epoch": 0.33442258675903536, - "grad_norm": 0.5180823802947998, - "learning_rate": 9.786648621018096e-06, - "loss": 0.4582, - "step": 5117 - }, - { - "epoch": 0.3344879419645775, - "grad_norm": 0.46476200222969055, - "learning_rate": 9.786547694158602e-06, - "loss": 0.3645, - "step": 5118 - }, - { - "epoch": 0.3345532971701196, - "grad_norm": 0.46656325459480286, - "learning_rate": 9.786446743953457e-06, - "loss": 0.3696, - "step": 5119 - }, - { - "epoch": 0.33461865237566174, - "grad_norm": 0.44941627979278564, - "learning_rate": 9.786345770403153e-06, - "loss": 0.3945, - "step": 5120 - }, - { - "epoch": 0.33468400758120387, - "grad_norm": 0.483822762966156, - "learning_rate": 9.786244773508182e-06, - "loss": 0.4445, - "step": 5121 - }, - { - "epoch": 0.33474936278674594, - "grad_norm": 0.4862872362136841, - "learning_rate": 9.786143753269038e-06, - "loss": 0.3872, - "step": 5122 - }, - { - "epoch": 0.33481471799228807, - "grad_norm": 0.4959210753440857, - "learning_rate": 9.786042709686212e-06, - "loss": 0.4456, - "step": 5123 - }, - { - "epoch": 0.3348800731978302, - "grad_norm": 0.48525673151016235, - "learning_rate": 9.785941642760198e-06, - "loss": 0.4144, - "step": 5124 - }, - { - "epoch": 0.3349454284033723, - "grad_norm": 0.5273424386978149, - "learning_rate": 9.785840552491488e-06, - "loss": 0.4137, - "step": 5125 - }, - { - "epoch": 0.33501078360891445, - "grad_norm": 0.43980926275253296, - "learning_rate": 9.785739438880577e-06, - "loss": 0.3595, - "step": 5126 - }, - { - "epoch": 0.3350761388144566, - "grad_norm": 0.44354137778282166, - "learning_rate": 9.785638301927956e-06, - "loss": 0.3639, - "step": 5127 - }, - { - "epoch": 0.3351414940199987, - "grad_norm": 0.4843825101852417, - "learning_rate": 9.785537141634118e-06, - "loss": 0.3806, - "step": 5128 - }, - { - "epoch": 0.33520684922554084, - "grad_norm": 0.47106778621673584, - "learning_rate": 9.78543595799956e-06, - "loss": 0.4186, - "step": 5129 - }, - { - "epoch": 0.3352722044310829, - "grad_norm": 0.48503050208091736, - "learning_rate": 9.78533475102477e-06, - "loss": 0.3921, - "step": 5130 - }, - { - "epoch": 0.33533755963662504, - "grad_norm": 0.5354419946670532, - "learning_rate": 9.785233520710248e-06, - "loss": 0.5246, - "step": 5131 - }, - { - "epoch": 0.33540291484216717, - "grad_norm": 0.5101272463798523, - "learning_rate": 9.785132267056483e-06, - "loss": 0.44, - "step": 5132 - }, - { - "epoch": 0.3354682700477093, - "grad_norm": 0.44861114025115967, - "learning_rate": 9.785030990063968e-06, - "loss": 0.3906, - "step": 5133 - }, - { - "epoch": 0.3355336252532514, - "grad_norm": 0.47585704922676086, - "learning_rate": 9.784929689733202e-06, - "loss": 0.4409, - "step": 5134 - }, - { - "epoch": 0.33559898045879355, - "grad_norm": 0.4529799222946167, - "learning_rate": 9.784828366064677e-06, - "loss": 0.3843, - "step": 5135 - }, - { - "epoch": 0.3356643356643357, - "grad_norm": 0.427306592464447, - "learning_rate": 9.784727019058884e-06, - "loss": 0.3343, - "step": 5136 - }, - { - "epoch": 0.3357296908698778, - "grad_norm": 0.5161789655685425, - "learning_rate": 9.78462564871632e-06, - "loss": 0.4282, - "step": 5137 - }, - { - "epoch": 0.33579504607541993, - "grad_norm": 0.5111210346221924, - "learning_rate": 9.78452425503748e-06, - "loss": 0.4301, - "step": 5138 - }, - { - "epoch": 0.335860401280962, - "grad_norm": 0.4565163850784302, - "learning_rate": 9.784422838022855e-06, - "loss": 0.3852, - "step": 5139 - }, - { - "epoch": 0.33592575648650413, - "grad_norm": 0.46572020649909973, - "learning_rate": 9.784321397672947e-06, - "loss": 0.3753, - "step": 5140 - }, - { - "epoch": 0.33599111169204626, - "grad_norm": 0.5026994347572327, - "learning_rate": 9.784219933988242e-06, - "loss": 0.4427, - "step": 5141 - }, - { - "epoch": 0.3360564668975884, - "grad_norm": 0.4901333153247833, - "learning_rate": 9.784118446969241e-06, - "loss": 0.422, - "step": 5142 - }, - { - "epoch": 0.3361218221031305, - "grad_norm": 0.45889827609062195, - "learning_rate": 9.784016936616436e-06, - "loss": 0.4112, - "step": 5143 - }, - { - "epoch": 0.33618717730867265, - "grad_norm": 0.43094539642333984, - "learning_rate": 9.783915402930324e-06, - "loss": 0.3826, - "step": 5144 - }, - { - "epoch": 0.3362525325142148, - "grad_norm": 0.500842809677124, - "learning_rate": 9.7838138459114e-06, - "loss": 0.4607, - "step": 5145 - }, - { - "epoch": 0.3363178877197569, - "grad_norm": 0.5126469731330872, - "learning_rate": 9.783712265560157e-06, - "loss": 0.4148, - "step": 5146 - }, - { - "epoch": 0.336383242925299, - "grad_norm": 0.4601035714149475, - "learning_rate": 9.783610661877093e-06, - "loss": 0.4182, - "step": 5147 - }, - { - "epoch": 0.3364485981308411, - "grad_norm": 0.43518704175949097, - "learning_rate": 9.783509034862702e-06, - "loss": 0.3486, - "step": 5148 - }, - { - "epoch": 0.33651395333638323, - "grad_norm": 0.45159706473350525, - "learning_rate": 9.78340738451748e-06, - "loss": 0.3528, - "step": 5149 - }, - { - "epoch": 0.33657930854192536, - "grad_norm": 0.42190390825271606, - "learning_rate": 9.783305710841923e-06, - "loss": 0.3731, - "step": 5150 - }, - { - "epoch": 0.3366446637474675, - "grad_norm": 0.47011086344718933, - "learning_rate": 9.78320401383653e-06, - "loss": 0.4203, - "step": 5151 - }, - { - "epoch": 0.3367100189530096, - "grad_norm": 0.43599116802215576, - "learning_rate": 9.78310229350179e-06, - "loss": 0.336, - "step": 5152 - }, - { - "epoch": 0.33677537415855174, - "grad_norm": 0.4630354046821594, - "learning_rate": 9.783000549838205e-06, - "loss": 0.3942, - "step": 5153 - }, - { - "epoch": 0.33684072936409387, - "grad_norm": 0.4692355692386627, - "learning_rate": 9.782898782846268e-06, - "loss": 0.3973, - "step": 5154 - }, - { - "epoch": 0.33690608456963594, - "grad_norm": 0.45558837056159973, - "learning_rate": 9.782796992526478e-06, - "loss": 0.4409, - "step": 5155 - }, - { - "epoch": 0.33697143977517807, - "grad_norm": 0.4604448974132538, - "learning_rate": 9.782695178879328e-06, - "loss": 0.397, - "step": 5156 - }, - { - "epoch": 0.3370367949807202, - "grad_norm": 0.4689256548881531, - "learning_rate": 9.78259334190532e-06, - "loss": 0.4044, - "step": 5157 - }, - { - "epoch": 0.33710215018626233, - "grad_norm": 0.4754765033721924, - "learning_rate": 9.782491481604945e-06, - "loss": 0.4247, - "step": 5158 - }, - { - "epoch": 0.33716750539180446, - "grad_norm": 0.4838542342185974, - "learning_rate": 9.782389597978703e-06, - "loss": 0.4363, - "step": 5159 - }, - { - "epoch": 0.3372328605973466, - "grad_norm": 0.428602933883667, - "learning_rate": 9.782287691027092e-06, - "loss": 0.3475, - "step": 5160 - }, - { - "epoch": 0.3372982158028887, - "grad_norm": 0.4951384961605072, - "learning_rate": 9.782185760750605e-06, - "loss": 0.3946, - "step": 5161 - }, - { - "epoch": 0.33736357100843084, - "grad_norm": 0.47937989234924316, - "learning_rate": 9.782083807149741e-06, - "loss": 0.4453, - "step": 5162 - }, - { - "epoch": 0.33742892621397297, - "grad_norm": 0.44877153635025024, - "learning_rate": 9.781981830224998e-06, - "loss": 0.385, - "step": 5163 - }, - { - "epoch": 0.33749428141951504, - "grad_norm": 0.4709230065345764, - "learning_rate": 9.781879829976875e-06, - "loss": 0.3853, - "step": 5164 - }, - { - "epoch": 0.33755963662505717, - "grad_norm": 0.48274341225624084, - "learning_rate": 9.781777806405866e-06, - "loss": 0.4037, - "step": 5165 - }, - { - "epoch": 0.3376249918305993, - "grad_norm": 0.43445566296577454, - "learning_rate": 9.781675759512468e-06, - "loss": 0.362, - "step": 5166 - }, - { - "epoch": 0.3376903470361414, - "grad_norm": 0.4579179883003235, - "learning_rate": 9.781573689297183e-06, - "loss": 0.3854, - "step": 5167 - }, - { - "epoch": 0.33775570224168355, - "grad_norm": 0.4639510214328766, - "learning_rate": 9.781471595760507e-06, - "loss": 0.4206, - "step": 5168 - }, - { - "epoch": 0.3378210574472257, - "grad_norm": 0.4690952003002167, - "learning_rate": 9.781369478902936e-06, - "loss": 0.4138, - "step": 5169 - }, - { - "epoch": 0.3378864126527678, - "grad_norm": 0.45831334590911865, - "learning_rate": 9.781267338724971e-06, - "loss": 0.4038, - "step": 5170 - }, - { - "epoch": 0.33795176785830994, - "grad_norm": 0.46414628624916077, - "learning_rate": 9.781165175227108e-06, - "loss": 0.4025, - "step": 5171 - }, - { - "epoch": 0.338017123063852, - "grad_norm": 0.43977731466293335, - "learning_rate": 9.781062988409846e-06, - "loss": 0.374, - "step": 5172 - }, - { - "epoch": 0.33808247826939414, - "grad_norm": 0.46739593148231506, - "learning_rate": 9.780960778273685e-06, - "loss": 0.4351, - "step": 5173 - }, - { - "epoch": 0.33814783347493627, - "grad_norm": 0.4551723301410675, - "learning_rate": 9.78085854481912e-06, - "loss": 0.3894, - "step": 5174 - }, - { - "epoch": 0.3382131886804784, - "grad_norm": 0.4585324227809906, - "learning_rate": 9.780756288046653e-06, - "loss": 0.3566, - "step": 5175 - }, - { - "epoch": 0.3382785438860205, - "grad_norm": 0.4442490339279175, - "learning_rate": 9.78065400795678e-06, - "loss": 0.3983, - "step": 5176 - }, - { - "epoch": 0.33834389909156265, - "grad_norm": 0.4540204405784607, - "learning_rate": 9.780551704550003e-06, - "loss": 0.3662, - "step": 5177 - }, - { - "epoch": 0.3384092542971048, - "grad_norm": 0.5195617079734802, - "learning_rate": 9.780449377826818e-06, - "loss": 0.4317, - "step": 5178 - }, - { - "epoch": 0.3384746095026469, - "grad_norm": 0.4470427632331848, - "learning_rate": 9.780347027787726e-06, - "loss": 0.3786, - "step": 5179 - }, - { - "epoch": 0.33853996470818903, - "grad_norm": 0.469748854637146, - "learning_rate": 9.780244654433224e-06, - "loss": 0.3994, - "step": 5180 - }, - { - "epoch": 0.3386053199137311, - "grad_norm": 0.48107877373695374, - "learning_rate": 9.780142257763815e-06, - "loss": 0.436, - "step": 5181 - }, - { - "epoch": 0.33867067511927323, - "grad_norm": 0.4793350398540497, - "learning_rate": 9.780039837779994e-06, - "loss": 0.3486, - "step": 5182 - }, - { - "epoch": 0.33873603032481536, - "grad_norm": 0.4753890633583069, - "learning_rate": 9.779937394482263e-06, - "loss": 0.4229, - "step": 5183 - }, - { - "epoch": 0.3388013855303575, - "grad_norm": 0.47949421405792236, - "learning_rate": 9.779834927871124e-06, - "loss": 0.3871, - "step": 5184 - }, - { - "epoch": 0.3388667407358996, - "grad_norm": 0.4666721522808075, - "learning_rate": 9.779732437947072e-06, - "loss": 0.393, - "step": 5185 - }, - { - "epoch": 0.33893209594144175, - "grad_norm": 0.46229565143585205, - "learning_rate": 9.779629924710608e-06, - "loss": 0.4183, - "step": 5186 - }, - { - "epoch": 0.3389974511469839, - "grad_norm": 0.4590091407299042, - "learning_rate": 9.779527388162236e-06, - "loss": 0.3985, - "step": 5187 - }, - { - "epoch": 0.339062806352526, - "grad_norm": 0.4367179274559021, - "learning_rate": 9.77942482830245e-06, - "loss": 0.3498, - "step": 5188 - }, - { - "epoch": 0.3391281615580681, - "grad_norm": 0.4872339963912964, - "learning_rate": 9.779322245131755e-06, - "loss": 0.4351, - "step": 5189 - }, - { - "epoch": 0.3391935167636102, - "grad_norm": 0.43242597579956055, - "learning_rate": 9.77921963865065e-06, - "loss": 0.33, - "step": 5190 - }, - { - "epoch": 0.33925887196915233, - "grad_norm": 0.4527781009674072, - "learning_rate": 9.779117008859635e-06, - "loss": 0.3354, - "step": 5191 - }, - { - "epoch": 0.33932422717469446, - "grad_norm": 0.45935195684432983, - "learning_rate": 9.77901435575921e-06, - "loss": 0.397, - "step": 5192 - }, - { - "epoch": 0.3393895823802366, - "grad_norm": 0.4584974944591522, - "learning_rate": 9.778911679349877e-06, - "loss": 0.4017, - "step": 5193 - }, - { - "epoch": 0.3394549375857787, - "grad_norm": 0.4841710925102234, - "learning_rate": 9.778808979632136e-06, - "loss": 0.3804, - "step": 5194 - }, - { - "epoch": 0.33952029279132084, - "grad_norm": 0.46480992436408997, - "learning_rate": 9.778706256606488e-06, - "loss": 0.4033, - "step": 5195 - }, - { - "epoch": 0.33958564799686297, - "grad_norm": 0.4626947343349457, - "learning_rate": 9.778603510273435e-06, - "loss": 0.3508, - "step": 5196 - }, - { - "epoch": 0.33965100320240504, - "grad_norm": 0.48210132122039795, - "learning_rate": 9.778500740633476e-06, - "loss": 0.4048, - "step": 5197 - }, - { - "epoch": 0.33971635840794717, - "grad_norm": 0.4198559820652008, - "learning_rate": 9.778397947687114e-06, - "loss": 0.3492, - "step": 5198 - }, - { - "epoch": 0.3397817136134893, - "grad_norm": 0.4523397982120514, - "learning_rate": 9.77829513143485e-06, - "loss": 0.3888, - "step": 5199 - }, - { - "epoch": 0.33984706881903143, - "grad_norm": 0.4638594090938568, - "learning_rate": 9.778192291877185e-06, - "loss": 0.4205, - "step": 5200 - }, - { - "epoch": 0.33991242402457356, - "grad_norm": 0.436329185962677, - "learning_rate": 9.778089429014619e-06, - "loss": 0.3906, - "step": 5201 - }, - { - "epoch": 0.3399777792301157, - "grad_norm": 0.5121533274650574, - "learning_rate": 9.777986542847658e-06, - "loss": 0.3779, - "step": 5202 - }, - { - "epoch": 0.3400431344356578, - "grad_norm": 0.4427978992462158, - "learning_rate": 9.777883633376801e-06, - "loss": 0.3539, - "step": 5203 - }, - { - "epoch": 0.34010848964119994, - "grad_norm": 0.46456608176231384, - "learning_rate": 9.77778070060255e-06, - "loss": 0.4138, - "step": 5204 - }, - { - "epoch": 0.34017384484674207, - "grad_norm": 0.4669976234436035, - "learning_rate": 9.777677744525406e-06, - "loss": 0.395, - "step": 5205 - }, - { - "epoch": 0.34023920005228414, - "grad_norm": 0.47090378403663635, - "learning_rate": 9.777574765145874e-06, - "loss": 0.4263, - "step": 5206 - }, - { - "epoch": 0.34030455525782627, - "grad_norm": 0.49403876066207886, - "learning_rate": 9.777471762464456e-06, - "loss": 0.389, - "step": 5207 - }, - { - "epoch": 0.3403699104633684, - "grad_norm": 0.4523240625858307, - "learning_rate": 9.777368736481652e-06, - "loss": 0.3816, - "step": 5208 - }, - { - "epoch": 0.3404352656689105, - "grad_norm": 0.4438014328479767, - "learning_rate": 9.777265687197965e-06, - "loss": 0.4136, - "step": 5209 - }, - { - "epoch": 0.34050062087445265, - "grad_norm": 0.45609503984451294, - "learning_rate": 9.777162614613898e-06, - "loss": 0.4134, - "step": 5210 - }, - { - "epoch": 0.3405659760799948, - "grad_norm": 0.44377031922340393, - "learning_rate": 9.777059518729954e-06, - "loss": 0.3619, - "step": 5211 - }, - { - "epoch": 0.3406313312855369, - "grad_norm": 0.4724767804145813, - "learning_rate": 9.776956399546638e-06, - "loss": 0.4506, - "step": 5212 - }, - { - "epoch": 0.34069668649107904, - "grad_norm": 0.45906752347946167, - "learning_rate": 9.77685325706445e-06, - "loss": 0.4049, - "step": 5213 - }, - { - "epoch": 0.3407620416966211, - "grad_norm": 0.455089271068573, - "learning_rate": 9.776750091283891e-06, - "loss": 0.4018, - "step": 5214 - }, - { - "epoch": 0.34082739690216324, - "grad_norm": 0.43956664204597473, - "learning_rate": 9.77664690220547e-06, - "loss": 0.4037, - "step": 5215 - }, - { - "epoch": 0.34089275210770537, - "grad_norm": 0.4752846956253052, - "learning_rate": 9.776543689829685e-06, - "loss": 0.413, - "step": 5216 - }, - { - "epoch": 0.3409581073132475, - "grad_norm": 0.48933643102645874, - "learning_rate": 9.776440454157043e-06, - "loss": 0.4334, - "step": 5217 - }, - { - "epoch": 0.3410234625187896, - "grad_norm": 0.4655178487300873, - "learning_rate": 9.776337195188046e-06, - "loss": 0.4199, - "step": 5218 - }, - { - "epoch": 0.34108881772433175, - "grad_norm": 0.45571649074554443, - "learning_rate": 9.776233912923198e-06, - "loss": 0.4059, - "step": 5219 - }, - { - "epoch": 0.3411541729298739, - "grad_norm": 0.503077507019043, - "learning_rate": 9.776130607363003e-06, - "loss": 0.4645, - "step": 5220 - }, - { - "epoch": 0.341219528135416, - "grad_norm": 0.4741753339767456, - "learning_rate": 9.776027278507963e-06, - "loss": 0.4037, - "step": 5221 - }, - { - "epoch": 0.34128488334095813, - "grad_norm": 0.43636104464530945, - "learning_rate": 9.775923926358584e-06, - "loss": 0.3667, - "step": 5222 - }, - { - "epoch": 0.3413502385465002, - "grad_norm": 0.5163292288780212, - "learning_rate": 9.77582055091537e-06, - "loss": 0.4511, - "step": 5223 - }, - { - "epoch": 0.34141559375204233, - "grad_norm": 0.4682936370372772, - "learning_rate": 9.775717152178825e-06, - "loss": 0.4061, - "step": 5224 - }, - { - "epoch": 0.34148094895758446, - "grad_norm": 0.449651300907135, - "learning_rate": 9.775613730149452e-06, - "loss": 0.3349, - "step": 5225 - }, - { - "epoch": 0.3415463041631266, - "grad_norm": 0.45347052812576294, - "learning_rate": 9.775510284827756e-06, - "loss": 0.3614, - "step": 5226 - }, - { - "epoch": 0.3416116593686687, - "grad_norm": 0.4409697949886322, - "learning_rate": 9.775406816214244e-06, - "loss": 0.3522, - "step": 5227 - }, - { - "epoch": 0.34167701457421085, - "grad_norm": 0.45578670501708984, - "learning_rate": 9.775303324309416e-06, - "loss": 0.4011, - "step": 5228 - }, - { - "epoch": 0.341742369779753, - "grad_norm": 0.5071733593940735, - "learning_rate": 9.77519980911378e-06, - "loss": 0.3959, - "step": 5229 - }, - { - "epoch": 0.3418077249852951, - "grad_norm": 0.5738910436630249, - "learning_rate": 9.775096270627841e-06, - "loss": 0.4307, - "step": 5230 - }, - { - "epoch": 0.3418730801908372, - "grad_norm": 0.44033265113830566, - "learning_rate": 9.774992708852104e-06, - "loss": 0.3795, - "step": 5231 - }, - { - "epoch": 0.3419384353963793, - "grad_norm": 0.47738906741142273, - "learning_rate": 9.774889123787072e-06, - "loss": 0.4089, - "step": 5232 - }, - { - "epoch": 0.34200379060192143, - "grad_norm": 0.4668895900249481, - "learning_rate": 9.774785515433252e-06, - "loss": 0.3823, - "step": 5233 - }, - { - "epoch": 0.34206914580746356, - "grad_norm": 0.4882426857948303, - "learning_rate": 9.77468188379115e-06, - "loss": 0.4395, - "step": 5234 - }, - { - "epoch": 0.3421345010130057, - "grad_norm": 0.44981008768081665, - "learning_rate": 9.77457822886127e-06, - "loss": 0.3734, - "step": 5235 - }, - { - "epoch": 0.3421998562185478, - "grad_norm": 0.43056395649909973, - "learning_rate": 9.774474550644115e-06, - "loss": 0.3545, - "step": 5236 - }, - { - "epoch": 0.34226521142408994, - "grad_norm": 0.4812091886997223, - "learning_rate": 9.774370849140196e-06, - "loss": 0.4662, - "step": 5237 - }, - { - "epoch": 0.34233056662963207, - "grad_norm": 0.48207148909568787, - "learning_rate": 9.774267124350017e-06, - "loss": 0.4371, - "step": 5238 - }, - { - "epoch": 0.34239592183517414, - "grad_norm": 0.47329193353652954, - "learning_rate": 9.774163376274083e-06, - "loss": 0.3981, - "step": 5239 - }, - { - "epoch": 0.34246127704071627, - "grad_norm": 0.45506712794303894, - "learning_rate": 9.774059604912899e-06, - "loss": 0.3911, - "step": 5240 - }, - { - "epoch": 0.3425266322462584, - "grad_norm": 0.47248575091362, - "learning_rate": 9.773955810266972e-06, - "loss": 0.3569, - "step": 5241 - }, - { - "epoch": 0.34259198745180053, - "grad_norm": 0.443486750125885, - "learning_rate": 9.773851992336812e-06, - "loss": 0.3458, - "step": 5242 - }, - { - "epoch": 0.34265734265734266, - "grad_norm": 0.47293639183044434, - "learning_rate": 9.773748151122918e-06, - "loss": 0.3702, - "step": 5243 - }, - { - "epoch": 0.3427226978628848, - "grad_norm": 0.492477148771286, - "learning_rate": 9.773644286625803e-06, - "loss": 0.4305, - "step": 5244 - }, - { - "epoch": 0.3427880530684269, - "grad_norm": 0.5646010637283325, - "learning_rate": 9.77354039884597e-06, - "loss": 0.5537, - "step": 5245 - }, - { - "epoch": 0.34285340827396904, - "grad_norm": 0.5110243558883667, - "learning_rate": 9.773436487783927e-06, - "loss": 0.4172, - "step": 5246 - }, - { - "epoch": 0.34291876347951117, - "grad_norm": 0.4851347506046295, - "learning_rate": 9.77333255344018e-06, - "loss": 0.4543, - "step": 5247 - }, - { - "epoch": 0.34298411868505324, - "grad_norm": 0.5463439226150513, - "learning_rate": 9.773228595815238e-06, - "loss": 0.4234, - "step": 5248 - }, - { - "epoch": 0.34304947389059537, - "grad_norm": 0.46826621890068054, - "learning_rate": 9.773124614909606e-06, - "loss": 0.4571, - "step": 5249 - }, - { - "epoch": 0.3431148290961375, - "grad_norm": 0.48085686564445496, - "learning_rate": 9.773020610723792e-06, - "loss": 0.4218, - "step": 5250 - }, - { - "epoch": 0.3431801843016796, - "grad_norm": 0.5734224915504456, - "learning_rate": 9.7729165832583e-06, - "loss": 0.4724, - "step": 5251 - }, - { - "epoch": 0.34324553950722175, - "grad_norm": 0.44442424178123474, - "learning_rate": 9.772812532513644e-06, - "loss": 0.3399, - "step": 5252 - }, - { - "epoch": 0.3433108947127639, - "grad_norm": 0.4710421562194824, - "learning_rate": 9.772708458490326e-06, - "loss": 0.4409, - "step": 5253 - }, - { - "epoch": 0.343376249918306, - "grad_norm": 0.49244454503059387, - "learning_rate": 9.772604361188855e-06, - "loss": 0.3878, - "step": 5254 - }, - { - "epoch": 0.34344160512384814, - "grad_norm": 0.5072375535964966, - "learning_rate": 9.772500240609741e-06, - "loss": 0.4529, - "step": 5255 - }, - { - "epoch": 0.3435069603293902, - "grad_norm": 0.4409193694591522, - "learning_rate": 9.77239609675349e-06, - "loss": 0.3615, - "step": 5256 - }, - { - "epoch": 0.34357231553493234, - "grad_norm": 0.45375481247901917, - "learning_rate": 9.772291929620608e-06, - "loss": 0.3863, - "step": 5257 - }, - { - "epoch": 0.34363767074047447, - "grad_norm": 0.46864810585975647, - "learning_rate": 9.772187739211607e-06, - "loss": 0.4379, - "step": 5258 - }, - { - "epoch": 0.3437030259460166, - "grad_norm": 0.46199578046798706, - "learning_rate": 9.77208352552699e-06, - "loss": 0.4082, - "step": 5259 - }, - { - "epoch": 0.3437683811515587, - "grad_norm": 0.46045181155204773, - "learning_rate": 9.77197928856727e-06, - "loss": 0.381, - "step": 5260 - }, - { - "epoch": 0.34383373635710085, - "grad_norm": 0.4706612825393677, - "learning_rate": 9.771875028332956e-06, - "loss": 0.3901, - "step": 5261 - }, - { - "epoch": 0.343899091562643, - "grad_norm": 0.48839184641838074, - "learning_rate": 9.771770744824553e-06, - "loss": 0.424, - "step": 5262 - }, - { - "epoch": 0.3439644467681851, - "grad_norm": 0.46977683901786804, - "learning_rate": 9.77166643804257e-06, - "loss": 0.3891, - "step": 5263 - }, - { - "epoch": 0.34402980197372723, - "grad_norm": 0.46044379472732544, - "learning_rate": 9.771562107987518e-06, - "loss": 0.401, - "step": 5264 - }, - { - "epoch": 0.3440951571792693, - "grad_norm": 0.4603383541107178, - "learning_rate": 9.771457754659903e-06, - "loss": 0.3716, - "step": 5265 - }, - { - "epoch": 0.34416051238481143, - "grad_norm": 0.43309587240219116, - "learning_rate": 9.771353378060236e-06, - "loss": 0.3743, - "step": 5266 - }, - { - "epoch": 0.34422586759035356, - "grad_norm": 0.44623294472694397, - "learning_rate": 9.771248978189027e-06, - "loss": 0.3862, - "step": 5267 - }, - { - "epoch": 0.3442912227958957, - "grad_norm": 0.4425095021724701, - "learning_rate": 9.771144555046783e-06, - "loss": 0.3765, - "step": 5268 - }, - { - "epoch": 0.3443565780014378, - "grad_norm": 0.48969170451164246, - "learning_rate": 9.771040108634013e-06, - "loss": 0.4295, - "step": 5269 - }, - { - "epoch": 0.34442193320697995, - "grad_norm": 0.4806336760520935, - "learning_rate": 9.770935638951229e-06, - "loss": 0.4105, - "step": 5270 - }, - { - "epoch": 0.3444872884125221, - "grad_norm": 0.517914891242981, - "learning_rate": 9.770831145998938e-06, - "loss": 0.4211, - "step": 5271 - }, - { - "epoch": 0.3445526436180642, - "grad_norm": 0.4635848104953766, - "learning_rate": 9.770726629777652e-06, - "loss": 0.4277, - "step": 5272 - }, - { - "epoch": 0.3446179988236063, - "grad_norm": 0.47082462906837463, - "learning_rate": 9.770622090287879e-06, - "loss": 0.4485, - "step": 5273 - }, - { - "epoch": 0.3446833540291484, - "grad_norm": 0.5020426511764526, - "learning_rate": 9.77051752753013e-06, - "loss": 0.4301, - "step": 5274 - }, - { - "epoch": 0.34474870923469053, - "grad_norm": 0.4663870334625244, - "learning_rate": 9.770412941504912e-06, - "loss": 0.3832, - "step": 5275 - }, - { - "epoch": 0.34481406444023266, - "grad_norm": 0.49421462416648865, - "learning_rate": 9.77030833221274e-06, - "loss": 0.437, - "step": 5276 - }, - { - "epoch": 0.3448794196457748, - "grad_norm": 0.49273785948753357, - "learning_rate": 9.77020369965412e-06, - "loss": 0.3942, - "step": 5277 - }, - { - "epoch": 0.3449447748513169, - "grad_norm": 0.4762526750564575, - "learning_rate": 9.770099043829564e-06, - "loss": 0.4323, - "step": 5278 - }, - { - "epoch": 0.34501013005685904, - "grad_norm": 0.4772988259792328, - "learning_rate": 9.769994364739585e-06, - "loss": 0.3905, - "step": 5279 - }, - { - "epoch": 0.34507548526240117, - "grad_norm": 0.4762629568576813, - "learning_rate": 9.769889662384689e-06, - "loss": 0.3995, - "step": 5280 - }, - { - "epoch": 0.34514084046794324, - "grad_norm": 0.4549536406993866, - "learning_rate": 9.769784936765389e-06, - "loss": 0.4048, - "step": 5281 - }, - { - "epoch": 0.34520619567348537, - "grad_norm": 0.511263906955719, - "learning_rate": 9.769680187882195e-06, - "loss": 0.4277, - "step": 5282 - }, - { - "epoch": 0.3452715508790275, - "grad_norm": 0.47359809279441833, - "learning_rate": 9.769575415735618e-06, - "loss": 0.403, - "step": 5283 - }, - { - "epoch": 0.34533690608456963, - "grad_norm": 0.455730676651001, - "learning_rate": 9.76947062032617e-06, - "loss": 0.3845, - "step": 5284 - }, - { - "epoch": 0.34540226129011176, - "grad_norm": 0.5127042531967163, - "learning_rate": 9.76936580165436e-06, - "loss": 0.4843, - "step": 5285 - }, - { - "epoch": 0.3454676164956539, - "grad_norm": 0.4402686059474945, - "learning_rate": 9.769260959720703e-06, - "loss": 0.3592, - "step": 5286 - }, - { - "epoch": 0.345532971701196, - "grad_norm": 0.45730164647102356, - "learning_rate": 9.769156094525708e-06, - "loss": 0.3927, - "step": 5287 - }, - { - "epoch": 0.34559832690673814, - "grad_norm": 0.4772600829601288, - "learning_rate": 9.769051206069886e-06, - "loss": 0.4324, - "step": 5288 - }, - { - "epoch": 0.34566368211228027, - "grad_norm": 0.4552757143974304, - "learning_rate": 9.768946294353749e-06, - "loss": 0.3365, - "step": 5289 - }, - { - "epoch": 0.34572903731782234, - "grad_norm": 0.4652750492095947, - "learning_rate": 9.768841359377808e-06, - "loss": 0.3756, - "step": 5290 - }, - { - "epoch": 0.34579439252336447, - "grad_norm": 0.4607808589935303, - "learning_rate": 9.768736401142576e-06, - "loss": 0.3864, - "step": 5291 - }, - { - "epoch": 0.3458597477289066, - "grad_norm": 0.4790264368057251, - "learning_rate": 9.768631419648565e-06, - "loss": 0.4045, - "step": 5292 - }, - { - "epoch": 0.3459251029344487, - "grad_norm": 0.46356624364852905, - "learning_rate": 9.768526414896286e-06, - "loss": 0.4037, - "step": 5293 - }, - { - "epoch": 0.34599045813999085, - "grad_norm": 0.46210983395576477, - "learning_rate": 9.768421386886253e-06, - "loss": 0.4008, - "step": 5294 - }, - { - "epoch": 0.346055813345533, - "grad_norm": 0.4501861035823822, - "learning_rate": 9.768316335618975e-06, - "loss": 0.3902, - "step": 5295 - }, - { - "epoch": 0.3461211685510751, - "grad_norm": 0.4635646641254425, - "learning_rate": 9.76821126109497e-06, - "loss": 0.4008, - "step": 5296 - }, - { - "epoch": 0.34618652375661724, - "grad_norm": 0.4980321228504181, - "learning_rate": 9.768106163314742e-06, - "loss": 0.407, - "step": 5297 - }, - { - "epoch": 0.3462518789621593, - "grad_norm": 0.4468640089035034, - "learning_rate": 9.76800104227881e-06, - "loss": 0.3635, - "step": 5298 - }, - { - "epoch": 0.34631723416770144, - "grad_norm": 0.4522833526134491, - "learning_rate": 9.767895897987688e-06, - "loss": 0.376, - "step": 5299 - }, - { - "epoch": 0.34638258937324357, - "grad_norm": 0.43752583861351013, - "learning_rate": 9.767790730441882e-06, - "loss": 0.3781, - "step": 5300 - }, - { - "epoch": 0.3464479445787857, - "grad_norm": 0.4958682954311371, - "learning_rate": 9.767685539641911e-06, - "loss": 0.4468, - "step": 5301 - }, - { - "epoch": 0.3465132997843278, - "grad_norm": 0.4759429693222046, - "learning_rate": 9.767580325588286e-06, - "loss": 0.4078, - "step": 5302 - }, - { - "epoch": 0.34657865498986995, - "grad_norm": 0.45632949471473694, - "learning_rate": 9.76747508828152e-06, - "loss": 0.3854, - "step": 5303 - }, - { - "epoch": 0.3466440101954121, - "grad_norm": 0.41211169958114624, - "learning_rate": 9.767369827722123e-06, - "loss": 0.325, - "step": 5304 - }, - { - "epoch": 0.3467093654009542, - "grad_norm": 0.4712884724140167, - "learning_rate": 9.767264543910617e-06, - "loss": 0.3689, - "step": 5305 - }, - { - "epoch": 0.34677472060649633, - "grad_norm": 0.44414106011390686, - "learning_rate": 9.767159236847508e-06, - "loss": 0.3944, - "step": 5306 - }, - { - "epoch": 0.3468400758120384, - "grad_norm": 0.49052873253822327, - "learning_rate": 9.767053906533312e-06, - "loss": 0.4559, - "step": 5307 - }, - { - "epoch": 0.34690543101758053, - "grad_norm": 0.45353513956069946, - "learning_rate": 9.766948552968542e-06, - "loss": 0.4019, - "step": 5308 - }, - { - "epoch": 0.34697078622312266, - "grad_norm": 0.49267831444740295, - "learning_rate": 9.766843176153714e-06, - "loss": 0.3805, - "step": 5309 - }, - { - "epoch": 0.3470361414286648, - "grad_norm": 0.468070924282074, - "learning_rate": 9.766737776089339e-06, - "loss": 0.3812, - "step": 5310 - }, - { - "epoch": 0.3471014966342069, - "grad_norm": 0.491280198097229, - "learning_rate": 9.766632352775932e-06, - "loss": 0.4057, - "step": 5311 - }, - { - "epoch": 0.34716685183974905, - "grad_norm": 0.4380781352519989, - "learning_rate": 9.76652690621401e-06, - "loss": 0.3849, - "step": 5312 - }, - { - "epoch": 0.3472322070452912, - "grad_norm": 0.441520094871521, - "learning_rate": 9.766421436404083e-06, - "loss": 0.336, - "step": 5313 - }, - { - "epoch": 0.3472975622508333, - "grad_norm": 0.45357683300971985, - "learning_rate": 9.766315943346668e-06, - "loss": 0.3855, - "step": 5314 - }, - { - "epoch": 0.3473629174563754, - "grad_norm": 0.482016384601593, - "learning_rate": 9.76621042704228e-06, - "loss": 0.4172, - "step": 5315 - }, - { - "epoch": 0.3474282726619175, - "grad_norm": 0.5012636184692383, - "learning_rate": 9.76610488749143e-06, - "loss": 0.4854, - "step": 5316 - }, - { - "epoch": 0.34749362786745963, - "grad_norm": 0.4567621648311615, - "learning_rate": 9.765999324694637e-06, - "loss": 0.3955, - "step": 5317 - }, - { - "epoch": 0.34755898307300176, - "grad_norm": 0.4705146551132202, - "learning_rate": 9.765893738652415e-06, - "loss": 0.3946, - "step": 5318 - }, - { - "epoch": 0.3476243382785439, - "grad_norm": 0.44210195541381836, - "learning_rate": 9.765788129365276e-06, - "loss": 0.3675, - "step": 5319 - }, - { - "epoch": 0.347689693484086, - "grad_norm": 0.4626901149749756, - "learning_rate": 9.76568249683374e-06, - "loss": 0.4241, - "step": 5320 - }, - { - "epoch": 0.34775504868962814, - "grad_norm": 0.45457345247268677, - "learning_rate": 9.765576841058317e-06, - "loss": 0.3814, - "step": 5321 - }, - { - "epoch": 0.34782040389517027, - "grad_norm": 0.466986745595932, - "learning_rate": 9.765471162039526e-06, - "loss": 0.4255, - "step": 5322 - }, - { - "epoch": 0.3478857591007124, - "grad_norm": 0.4808909595012665, - "learning_rate": 9.76536545977788e-06, - "loss": 0.4366, - "step": 5323 - }, - { - "epoch": 0.34795111430625447, - "grad_norm": 0.49690648913383484, - "learning_rate": 9.765259734273898e-06, - "loss": 0.4808, - "step": 5324 - }, - { - "epoch": 0.3480164695117966, - "grad_norm": 0.45402097702026367, - "learning_rate": 9.765153985528092e-06, - "loss": 0.3989, - "step": 5325 - }, - { - "epoch": 0.34808182471733873, - "grad_norm": 0.4688388407230377, - "learning_rate": 9.76504821354098e-06, - "loss": 0.3746, - "step": 5326 - }, - { - "epoch": 0.34814717992288086, - "grad_norm": 0.4402535557746887, - "learning_rate": 9.764942418313074e-06, - "loss": 0.3504, - "step": 5327 - }, - { - "epoch": 0.348212535128423, - "grad_norm": 0.46410199999809265, - "learning_rate": 9.764836599844896e-06, - "loss": 0.3876, - "step": 5328 - }, - { - "epoch": 0.3482778903339651, - "grad_norm": 0.45752081274986267, - "learning_rate": 9.76473075813696e-06, - "loss": 0.3709, - "step": 5329 - }, - { - "epoch": 0.34834324553950724, - "grad_norm": 0.5043820142745972, - "learning_rate": 9.76462489318978e-06, - "loss": 0.4149, - "step": 5330 - }, - { - "epoch": 0.34840860074504937, - "grad_norm": 0.44153255224227905, - "learning_rate": 9.764519005003874e-06, - "loss": 0.3739, - "step": 5331 - }, - { - "epoch": 0.34847395595059144, - "grad_norm": 0.4349711835384369, - "learning_rate": 9.76441309357976e-06, - "loss": 0.3354, - "step": 5332 - }, - { - "epoch": 0.34853931115613357, - "grad_norm": 0.5045499801635742, - "learning_rate": 9.76430715891795e-06, - "loss": 0.4174, - "step": 5333 - }, - { - "epoch": 0.3486046663616757, - "grad_norm": 0.46213701367378235, - "learning_rate": 9.764201201018963e-06, - "loss": 0.373, - "step": 5334 - }, - { - "epoch": 0.3486700215672178, - "grad_norm": 0.4964313507080078, - "learning_rate": 9.764095219883319e-06, - "loss": 0.4172, - "step": 5335 - }, - { - "epoch": 0.34873537677275995, - "grad_norm": 0.4488334655761719, - "learning_rate": 9.76398921551153e-06, - "loss": 0.3682, - "step": 5336 - }, - { - "epoch": 0.3488007319783021, - "grad_norm": 0.49103713035583496, - "learning_rate": 9.763883187904117e-06, - "loss": 0.4111, - "step": 5337 - }, - { - "epoch": 0.3488660871838442, - "grad_norm": 0.441040962934494, - "learning_rate": 9.763777137061595e-06, - "loss": 0.39, - "step": 5338 - }, - { - "epoch": 0.34893144238938634, - "grad_norm": 0.48513248562812805, - "learning_rate": 9.76367106298448e-06, - "loss": 0.3817, - "step": 5339 - }, - { - "epoch": 0.3489967975949284, - "grad_norm": 0.44116392731666565, - "learning_rate": 9.763564965673292e-06, - "loss": 0.3998, - "step": 5340 - }, - { - "epoch": 0.34906215280047054, - "grad_norm": 0.4690583050251007, - "learning_rate": 9.763458845128547e-06, - "loss": 0.393, - "step": 5341 - }, - { - "epoch": 0.34912750800601267, - "grad_norm": 0.4888540804386139, - "learning_rate": 9.763352701350764e-06, - "loss": 0.4198, - "step": 5342 - }, - { - "epoch": 0.3491928632115548, - "grad_norm": 0.501666784286499, - "learning_rate": 9.763246534340458e-06, - "loss": 0.3886, - "step": 5343 - }, - { - "epoch": 0.3492582184170969, - "grad_norm": 0.4757966995239258, - "learning_rate": 9.76314034409815e-06, - "loss": 0.3674, - "step": 5344 - }, - { - "epoch": 0.34932357362263905, - "grad_norm": 0.4631199538707733, - "learning_rate": 9.763034130624355e-06, - "loss": 0.3988, - "step": 5345 - }, - { - "epoch": 0.3493889288281812, - "grad_norm": 0.4730125963687897, - "learning_rate": 9.762927893919595e-06, - "loss": 0.4347, - "step": 5346 - }, - { - "epoch": 0.3494542840337233, - "grad_norm": 0.5159075260162354, - "learning_rate": 9.762821633984383e-06, - "loss": 0.4224, - "step": 5347 - }, - { - "epoch": 0.34951963923926543, - "grad_norm": 0.43075433373451233, - "learning_rate": 9.762715350819242e-06, - "loss": 0.3216, - "step": 5348 - }, - { - "epoch": 0.3495849944448075, - "grad_norm": 0.47610026597976685, - "learning_rate": 9.762609044424688e-06, - "loss": 0.3888, - "step": 5349 - }, - { - "epoch": 0.34965034965034963, - "grad_norm": 0.49334654211997986, - "learning_rate": 9.762502714801239e-06, - "loss": 0.4107, - "step": 5350 - }, - { - "epoch": 0.34971570485589176, - "grad_norm": 0.5176300406455994, - "learning_rate": 9.762396361949414e-06, - "loss": 0.4084, - "step": 5351 - }, - { - "epoch": 0.3497810600614339, - "grad_norm": 0.48287269473075867, - "learning_rate": 9.762289985869731e-06, - "loss": 0.4043, - "step": 5352 - }, - { - "epoch": 0.349846415266976, - "grad_norm": 0.4483534097671509, - "learning_rate": 9.762183586562713e-06, - "loss": 0.3574, - "step": 5353 - }, - { - "epoch": 0.34991177047251815, - "grad_norm": 0.48141026496887207, - "learning_rate": 9.762077164028874e-06, - "loss": 0.4202, - "step": 5354 - }, - { - "epoch": 0.3499771256780603, - "grad_norm": 0.47812286019325256, - "learning_rate": 9.761970718268734e-06, - "loss": 0.4009, - "step": 5355 - }, - { - "epoch": 0.3500424808836024, - "grad_norm": 0.4493675231933594, - "learning_rate": 9.761864249282815e-06, - "loss": 0.3916, - "step": 5356 - }, - { - "epoch": 0.3501078360891445, - "grad_norm": 0.4531344771385193, - "learning_rate": 9.761757757071632e-06, - "loss": 0.3684, - "step": 5357 - }, - { - "epoch": 0.3501731912946866, - "grad_norm": 0.4760850667953491, - "learning_rate": 9.76165124163571e-06, - "loss": 0.3981, - "step": 5358 - }, - { - "epoch": 0.35023854650022873, - "grad_norm": 0.48776134848594666, - "learning_rate": 9.761544702975562e-06, - "loss": 0.4452, - "step": 5359 - }, - { - "epoch": 0.35030390170577086, - "grad_norm": 0.4582652449607849, - "learning_rate": 9.761438141091715e-06, - "loss": 0.4244, - "step": 5360 - }, - { - "epoch": 0.350369256911313, - "grad_norm": 0.4685945212841034, - "learning_rate": 9.76133155598468e-06, - "loss": 0.3681, - "step": 5361 - }, - { - "epoch": 0.3504346121168551, - "grad_norm": 0.4862159788608551, - "learning_rate": 9.761224947654986e-06, - "loss": 0.4116, - "step": 5362 - }, - { - "epoch": 0.35049996732239724, - "grad_norm": 0.48712801933288574, - "learning_rate": 9.761118316103146e-06, - "loss": 0.4026, - "step": 5363 - }, - { - "epoch": 0.35056532252793937, - "grad_norm": 0.4633937180042267, - "learning_rate": 9.761011661329683e-06, - "loss": 0.3921, - "step": 5364 - }, - { - "epoch": 0.3506306777334815, - "grad_norm": 0.47881579399108887, - "learning_rate": 9.760904983335117e-06, - "loss": 0.4227, - "step": 5365 - }, - { - "epoch": 0.35069603293902357, - "grad_norm": 0.4811265170574188, - "learning_rate": 9.760798282119967e-06, - "loss": 0.4271, - "step": 5366 - }, - { - "epoch": 0.3507613881445657, - "grad_norm": 0.46356168389320374, - "learning_rate": 9.760691557684756e-06, - "loss": 0.4003, - "step": 5367 - }, - { - "epoch": 0.35082674335010783, - "grad_norm": 0.48348528146743774, - "learning_rate": 9.760584810030002e-06, - "loss": 0.3982, - "step": 5368 - }, - { - "epoch": 0.35089209855564996, - "grad_norm": 0.4457794427871704, - "learning_rate": 9.760478039156226e-06, - "loss": 0.3609, - "step": 5369 - }, - { - "epoch": 0.3509574537611921, - "grad_norm": 0.48174190521240234, - "learning_rate": 9.760371245063951e-06, - "loss": 0.4377, - "step": 5370 - }, - { - "epoch": 0.3510228089667342, - "grad_norm": 0.4791134297847748, - "learning_rate": 9.760264427753695e-06, - "loss": 0.4164, - "step": 5371 - }, - { - "epoch": 0.35108816417227634, - "grad_norm": 0.4756622910499573, - "learning_rate": 9.760157587225981e-06, - "loss": 0.3839, - "step": 5372 - }, - { - "epoch": 0.35115351937781847, - "grad_norm": 0.4487653374671936, - "learning_rate": 9.76005072348133e-06, - "loss": 0.3948, - "step": 5373 - }, - { - "epoch": 0.35121887458336054, - "grad_norm": 0.4537963569164276, - "learning_rate": 9.759943836520261e-06, - "loss": 0.3767, - "step": 5374 - }, - { - "epoch": 0.35128422978890267, - "grad_norm": 0.49372246861457825, - "learning_rate": 9.759836926343298e-06, - "loss": 0.3957, - "step": 5375 - }, - { - "epoch": 0.3513495849944448, - "grad_norm": 0.471619188785553, - "learning_rate": 9.75972999295096e-06, - "loss": 0.4235, - "step": 5376 - }, - { - "epoch": 0.3514149401999869, - "grad_norm": 0.48206186294555664, - "learning_rate": 9.759623036343772e-06, - "loss": 0.4246, - "step": 5377 - }, - { - "epoch": 0.35148029540552905, - "grad_norm": 0.45793044567108154, - "learning_rate": 9.759516056522252e-06, - "loss": 0.3924, - "step": 5378 - }, - { - "epoch": 0.3515456506110712, - "grad_norm": 0.5243789553642273, - "learning_rate": 9.759409053486923e-06, - "loss": 0.4208, - "step": 5379 - }, - { - "epoch": 0.3516110058166133, - "grad_norm": 0.4804753065109253, - "learning_rate": 9.759302027238308e-06, - "loss": 0.4052, - "step": 5380 - }, - { - "epoch": 0.35167636102215544, - "grad_norm": 0.45715075731277466, - "learning_rate": 9.75919497777693e-06, - "loss": 0.3764, - "step": 5381 - }, - { - "epoch": 0.3517417162276975, - "grad_norm": 0.4755937457084656, - "learning_rate": 9.759087905103307e-06, - "loss": 0.4235, - "step": 5382 - }, - { - "epoch": 0.35180707143323964, - "grad_norm": 0.45948106050491333, - "learning_rate": 9.758980809217964e-06, - "loss": 0.3657, - "step": 5383 - }, - { - "epoch": 0.35187242663878177, - "grad_norm": 0.47409531474113464, - "learning_rate": 9.758873690121424e-06, - "loss": 0.426, - "step": 5384 - }, - { - "epoch": 0.3519377818443239, - "grad_norm": 0.5137777924537659, - "learning_rate": 9.758766547814207e-06, - "loss": 0.4567, - "step": 5385 - }, - { - "epoch": 0.352003137049866, - "grad_norm": 0.4825906455516815, - "learning_rate": 9.758659382296838e-06, - "loss": 0.4238, - "step": 5386 - }, - { - "epoch": 0.35206849225540815, - "grad_norm": 0.4490877687931061, - "learning_rate": 9.758552193569838e-06, - "loss": 0.3907, - "step": 5387 - }, - { - "epoch": 0.3521338474609503, - "grad_norm": 0.4570304751396179, - "learning_rate": 9.75844498163373e-06, - "loss": 0.3823, - "step": 5388 - }, - { - "epoch": 0.3521992026664924, - "grad_norm": 0.46208950877189636, - "learning_rate": 9.758337746489038e-06, - "loss": 0.3856, - "step": 5389 - }, - { - "epoch": 0.35226455787203453, - "grad_norm": 0.4878591299057007, - "learning_rate": 9.758230488136285e-06, - "loss": 0.4141, - "step": 5390 - }, - { - "epoch": 0.3523299130775766, - "grad_norm": 0.44514963030815125, - "learning_rate": 9.758123206575993e-06, - "loss": 0.3846, - "step": 5391 - }, - { - "epoch": 0.35239526828311873, - "grad_norm": 0.43946024775505066, - "learning_rate": 9.758015901808684e-06, - "loss": 0.3655, - "step": 5392 - }, - { - "epoch": 0.35246062348866086, - "grad_norm": 0.4391081631183624, - "learning_rate": 9.757908573834886e-06, - "loss": 0.4043, - "step": 5393 - }, - { - "epoch": 0.352525978694203, - "grad_norm": 0.47638437151908875, - "learning_rate": 9.757801222655119e-06, - "loss": 0.4361, - "step": 5394 - }, - { - "epoch": 0.3525913338997451, - "grad_norm": 0.4875653088092804, - "learning_rate": 9.757693848269904e-06, - "loss": 0.4101, - "step": 5395 - }, - { - "epoch": 0.35265668910528725, - "grad_norm": 0.46153193712234497, - "learning_rate": 9.757586450679771e-06, - "loss": 0.3962, - "step": 5396 - }, - { - "epoch": 0.3527220443108294, - "grad_norm": 0.4558506906032562, - "learning_rate": 9.75747902988524e-06, - "loss": 0.41, - "step": 5397 - }, - { - "epoch": 0.3527873995163715, - "grad_norm": 0.4575936198234558, - "learning_rate": 9.757371585886836e-06, - "loss": 0.3452, - "step": 5398 - }, - { - "epoch": 0.3528527547219136, - "grad_norm": 0.4510190486907959, - "learning_rate": 9.757264118685081e-06, - "loss": 0.3873, - "step": 5399 - }, - { - "epoch": 0.3529181099274557, - "grad_norm": 0.4614260196685791, - "learning_rate": 9.757156628280504e-06, - "loss": 0.4343, - "step": 5400 - }, - { - "epoch": 0.35298346513299783, - "grad_norm": 0.4935864806175232, - "learning_rate": 9.757049114673623e-06, - "loss": 0.4269, - "step": 5401 - }, - { - "epoch": 0.35304882033853996, - "grad_norm": 0.47003424167633057, - "learning_rate": 9.756941577864967e-06, - "loss": 0.4262, - "step": 5402 - }, - { - "epoch": 0.3531141755440821, - "grad_norm": 0.46406206488609314, - "learning_rate": 9.756834017855059e-06, - "loss": 0.4061, - "step": 5403 - }, - { - "epoch": 0.3531795307496242, - "grad_norm": 0.47702664136886597, - "learning_rate": 9.756726434644424e-06, - "loss": 0.3961, - "step": 5404 - }, - { - "epoch": 0.35324488595516634, - "grad_norm": 0.47764095664024353, - "learning_rate": 9.756618828233585e-06, - "loss": 0.4145, - "step": 5405 - }, - { - "epoch": 0.35331024116070847, - "grad_norm": 0.45300671458244324, - "learning_rate": 9.756511198623067e-06, - "loss": 0.3881, - "step": 5406 - }, - { - "epoch": 0.3533755963662506, - "grad_norm": 0.46042224764823914, - "learning_rate": 9.756403545813398e-06, - "loss": 0.3944, - "step": 5407 - }, - { - "epoch": 0.35344095157179267, - "grad_norm": 0.5612374544143677, - "learning_rate": 9.7562958698051e-06, - "loss": 0.3907, - "step": 5408 - }, - { - "epoch": 0.3535063067773348, - "grad_norm": 0.4778275489807129, - "learning_rate": 9.756188170598702e-06, - "loss": 0.4056, - "step": 5409 - }, - { - "epoch": 0.35357166198287693, - "grad_norm": 0.4500736892223358, - "learning_rate": 9.756080448194724e-06, - "loss": 0.3736, - "step": 5410 - }, - { - "epoch": 0.35363701718841906, - "grad_norm": 0.46924859285354614, - "learning_rate": 9.755972702593695e-06, - "loss": 0.3956, - "step": 5411 - }, - { - "epoch": 0.3537023723939612, - "grad_norm": 0.4467761814594269, - "learning_rate": 9.755864933796139e-06, - "loss": 0.3717, - "step": 5412 - }, - { - "epoch": 0.3537677275995033, - "grad_norm": 0.4989909529685974, - "learning_rate": 9.755757141802582e-06, - "loss": 0.451, - "step": 5413 - }, - { - "epoch": 0.35383308280504544, - "grad_norm": 0.48000356554985046, - "learning_rate": 9.75564932661355e-06, - "loss": 0.409, - "step": 5414 - }, - { - "epoch": 0.35389843801058757, - "grad_norm": 0.5046958923339844, - "learning_rate": 9.75554148822957e-06, - "loss": 0.424, - "step": 5415 - }, - { - "epoch": 0.35396379321612964, - "grad_norm": 0.5241200923919678, - "learning_rate": 9.755433626651165e-06, - "loss": 0.46, - "step": 5416 - }, - { - "epoch": 0.35402914842167177, - "grad_norm": 0.46465495228767395, - "learning_rate": 9.755325741878863e-06, - "loss": 0.3875, - "step": 5417 - }, - { - "epoch": 0.3540945036272139, - "grad_norm": 0.4638899564743042, - "learning_rate": 9.75521783391319e-06, - "loss": 0.4184, - "step": 5418 - }, - { - "epoch": 0.354159858832756, - "grad_norm": 0.464154452085495, - "learning_rate": 9.755109902754673e-06, - "loss": 0.4188, - "step": 5419 - }, - { - "epoch": 0.35422521403829815, - "grad_norm": 0.4229236841201782, - "learning_rate": 9.755001948403838e-06, - "loss": 0.3127, - "step": 5420 - }, - { - "epoch": 0.3542905692438403, - "grad_norm": 0.5362170934677124, - "learning_rate": 9.754893970861208e-06, - "loss": 0.3835, - "step": 5421 - }, - { - "epoch": 0.3543559244493824, - "grad_norm": 0.48871511220932007, - "learning_rate": 9.754785970127317e-06, - "loss": 0.4176, - "step": 5422 - }, - { - "epoch": 0.35442127965492454, - "grad_norm": 0.46417954564094543, - "learning_rate": 9.754677946202686e-06, - "loss": 0.3755, - "step": 5423 - }, - { - "epoch": 0.3544866348604666, - "grad_norm": 0.46701881289482117, - "learning_rate": 9.754569899087843e-06, - "loss": 0.3702, - "step": 5424 - }, - { - "epoch": 0.35455199006600874, - "grad_norm": 0.5148953199386597, - "learning_rate": 9.754461828783315e-06, - "loss": 0.4643, - "step": 5425 - }, - { - "epoch": 0.35461734527155087, - "grad_norm": 0.47312691807746887, - "learning_rate": 9.75435373528963e-06, - "loss": 0.3995, - "step": 5426 - }, - { - "epoch": 0.354682700477093, - "grad_norm": 0.5017833709716797, - "learning_rate": 9.754245618607317e-06, - "loss": 0.4244, - "step": 5427 - }, - { - "epoch": 0.3547480556826351, - "grad_norm": 0.49053823947906494, - "learning_rate": 9.754137478736898e-06, - "loss": 0.4149, - "step": 5428 - }, - { - "epoch": 0.35481341088817725, - "grad_norm": 0.47810912132263184, - "learning_rate": 9.754029315678906e-06, - "loss": 0.3963, - "step": 5429 - }, - { - "epoch": 0.3548787660937194, - "grad_norm": 0.4808669984340668, - "learning_rate": 9.753921129433864e-06, - "loss": 0.3927, - "step": 5430 - }, - { - "epoch": 0.3549441212992615, - "grad_norm": 0.455217182636261, - "learning_rate": 9.753812920002302e-06, - "loss": 0.3875, - "step": 5431 - }, - { - "epoch": 0.35500947650480363, - "grad_norm": 0.4422004818916321, - "learning_rate": 9.753704687384749e-06, - "loss": 0.3604, - "step": 5432 - }, - { - "epoch": 0.3550748317103457, - "grad_norm": 0.42850714921951294, - "learning_rate": 9.75359643158173e-06, - "loss": 0.3491, - "step": 5433 - }, - { - "epoch": 0.35514018691588783, - "grad_norm": 0.5053018927574158, - "learning_rate": 9.753488152593774e-06, - "loss": 0.4384, - "step": 5434 - }, - { - "epoch": 0.35520554212142996, - "grad_norm": 0.42704612016677856, - "learning_rate": 9.753379850421412e-06, - "loss": 0.3278, - "step": 5435 - }, - { - "epoch": 0.3552708973269721, - "grad_norm": 0.4832640588283539, - "learning_rate": 9.753271525065166e-06, - "loss": 0.3469, - "step": 5436 - }, - { - "epoch": 0.3553362525325142, - "grad_norm": 0.46148040890693665, - "learning_rate": 9.753163176525572e-06, - "loss": 0.3929, - "step": 5437 - }, - { - "epoch": 0.35540160773805635, - "grad_norm": 0.46563684940338135, - "learning_rate": 9.753054804803153e-06, - "loss": 0.4351, - "step": 5438 - }, - { - "epoch": 0.3554669629435985, - "grad_norm": 0.44625213742256165, - "learning_rate": 9.752946409898439e-06, - "loss": 0.3957, - "step": 5439 - }, - { - "epoch": 0.3555323181491406, - "grad_norm": 0.4298880398273468, - "learning_rate": 9.752837991811958e-06, - "loss": 0.3268, - "step": 5440 - }, - { - "epoch": 0.3555976733546827, - "grad_norm": 0.464630663394928, - "learning_rate": 9.75272955054424e-06, - "loss": 0.4075, - "step": 5441 - }, - { - "epoch": 0.3556630285602248, - "grad_norm": 0.44898244738578796, - "learning_rate": 9.752621086095813e-06, - "loss": 0.3573, - "step": 5442 - }, - { - "epoch": 0.35572838376576693, - "grad_norm": 0.4824317693710327, - "learning_rate": 9.752512598467207e-06, - "loss": 0.4329, - "step": 5443 - }, - { - "epoch": 0.35579373897130906, - "grad_norm": 0.465991348028183, - "learning_rate": 9.752404087658951e-06, - "loss": 0.3885, - "step": 5444 - }, - { - "epoch": 0.3558590941768512, - "grad_norm": 0.4964909255504608, - "learning_rate": 9.752295553671574e-06, - "loss": 0.4342, - "step": 5445 - }, - { - "epoch": 0.3559244493823933, - "grad_norm": 0.5367789268493652, - "learning_rate": 9.752186996505605e-06, - "loss": 0.454, - "step": 5446 - }, - { - "epoch": 0.35598980458793544, - "grad_norm": 0.4750783145427704, - "learning_rate": 9.752078416161574e-06, - "loss": 0.4286, - "step": 5447 - }, - { - "epoch": 0.35605515979347757, - "grad_norm": 0.4420899748802185, - "learning_rate": 9.751969812640009e-06, - "loss": 0.3954, - "step": 5448 - }, - { - "epoch": 0.3561205149990197, - "grad_norm": 0.48597240447998047, - "learning_rate": 9.751861185941442e-06, - "loss": 0.4412, - "step": 5449 - }, - { - "epoch": 0.35618587020456177, - "grad_norm": 0.4420667588710785, - "learning_rate": 9.7517525360664e-06, - "loss": 0.3875, - "step": 5450 - }, - { - "epoch": 0.3562512254101039, - "grad_norm": 0.47928571701049805, - "learning_rate": 9.751643863015418e-06, - "loss": 0.423, - "step": 5451 - }, - { - "epoch": 0.35631658061564603, - "grad_norm": 0.5068503022193909, - "learning_rate": 9.751535166789021e-06, - "loss": 0.3496, - "step": 5452 - }, - { - "epoch": 0.35638193582118816, - "grad_norm": 0.4657083749771118, - "learning_rate": 9.751426447387741e-06, - "loss": 0.4095, - "step": 5453 - }, - { - "epoch": 0.3564472910267303, - "grad_norm": 0.5119538903236389, - "learning_rate": 9.751317704812108e-06, - "loss": 0.438, - "step": 5454 - }, - { - "epoch": 0.3565126462322724, - "grad_norm": 0.4512006938457489, - "learning_rate": 9.751208939062653e-06, - "loss": 0.3885, - "step": 5455 - }, - { - "epoch": 0.35657800143781454, - "grad_norm": 0.49278318881988525, - "learning_rate": 9.751100150139906e-06, - "loss": 0.4138, - "step": 5456 - }, - { - "epoch": 0.35664335664335667, - "grad_norm": 0.49377885460853577, - "learning_rate": 9.750991338044397e-06, - "loss": 0.4183, - "step": 5457 - }, - { - "epoch": 0.35670871184889874, - "grad_norm": 0.4811665415763855, - "learning_rate": 9.750882502776658e-06, - "loss": 0.4146, - "step": 5458 - }, - { - "epoch": 0.35677406705444087, - "grad_norm": 0.5551828742027283, - "learning_rate": 9.750773644337219e-06, - "loss": 0.4836, - "step": 5459 - }, - { - "epoch": 0.356839422259983, - "grad_norm": 0.45828860998153687, - "learning_rate": 9.750664762726612e-06, - "loss": 0.4045, - "step": 5460 - }, - { - "epoch": 0.3569047774655251, - "grad_norm": 0.44688984751701355, - "learning_rate": 9.750555857945366e-06, - "loss": 0.3596, - "step": 5461 - }, - { - "epoch": 0.35697013267106725, - "grad_norm": 0.5028802156448364, - "learning_rate": 9.750446929994014e-06, - "loss": 0.4572, - "step": 5462 - }, - { - "epoch": 0.3570354878766094, - "grad_norm": 0.4700569808483124, - "learning_rate": 9.750337978873085e-06, - "loss": 0.3513, - "step": 5463 - }, - { - "epoch": 0.3571008430821515, - "grad_norm": 0.46906787157058716, - "learning_rate": 9.750229004583112e-06, - "loss": 0.3789, - "step": 5464 - }, - { - "epoch": 0.35716619828769364, - "grad_norm": 0.47062036395072937, - "learning_rate": 9.750120007124628e-06, - "loss": 0.4085, - "step": 5465 - }, - { - "epoch": 0.3572315534932357, - "grad_norm": 0.4533155858516693, - "learning_rate": 9.750010986498164e-06, - "loss": 0.4235, - "step": 5466 - }, - { - "epoch": 0.35729690869877784, - "grad_norm": 0.47060179710388184, - "learning_rate": 9.74990194270425e-06, - "loss": 0.4232, - "step": 5467 - }, - { - "epoch": 0.35736226390431997, - "grad_norm": 0.5088547468185425, - "learning_rate": 9.749792875743418e-06, - "loss": 0.4294, - "step": 5468 - }, - { - "epoch": 0.3574276191098621, - "grad_norm": 0.4410618245601654, - "learning_rate": 9.7496837856162e-06, - "loss": 0.3867, - "step": 5469 - }, - { - "epoch": 0.3574929743154042, - "grad_norm": 0.4552725851535797, - "learning_rate": 9.74957467232313e-06, - "loss": 0.3738, - "step": 5470 - }, - { - "epoch": 0.35755832952094635, - "grad_norm": 0.4843587875366211, - "learning_rate": 9.749465535864738e-06, - "loss": 0.4186, - "step": 5471 - }, - { - "epoch": 0.3576236847264885, - "grad_norm": 0.49985405802726746, - "learning_rate": 9.749356376241559e-06, - "loss": 0.475, - "step": 5472 - }, - { - "epoch": 0.3576890399320306, - "grad_norm": 0.47680795192718506, - "learning_rate": 9.74924719345412e-06, - "loss": 0.39, - "step": 5473 - }, - { - "epoch": 0.35775439513757273, - "grad_norm": 0.4933762848377228, - "learning_rate": 9.74913798750296e-06, - "loss": 0.3891, - "step": 5474 - }, - { - "epoch": 0.3578197503431148, - "grad_norm": 0.47685137391090393, - "learning_rate": 9.74902875838861e-06, - "loss": 0.3745, - "step": 5475 - }, - { - "epoch": 0.35788510554865693, - "grad_norm": 0.507700502872467, - "learning_rate": 9.748919506111601e-06, - "loss": 0.4134, - "step": 5476 - }, - { - "epoch": 0.35795046075419906, - "grad_norm": 0.45000192523002625, - "learning_rate": 9.748810230672463e-06, - "loss": 0.3845, - "step": 5477 - }, - { - "epoch": 0.3580158159597412, - "grad_norm": 0.4318159222602844, - "learning_rate": 9.748700932071735e-06, - "loss": 0.3561, - "step": 5478 - }, - { - "epoch": 0.3580811711652833, - "grad_norm": 0.436652272939682, - "learning_rate": 9.748591610309948e-06, - "loss": 0.3441, - "step": 5479 - }, - { - "epoch": 0.35814652637082545, - "grad_norm": 0.5244379043579102, - "learning_rate": 9.748482265387634e-06, - "loss": 0.4799, - "step": 5480 - }, - { - "epoch": 0.3582118815763676, - "grad_norm": 0.4660002291202545, - "learning_rate": 9.748372897305327e-06, - "loss": 0.3899, - "step": 5481 - }, - { - "epoch": 0.3582772367819097, - "grad_norm": 0.4497160315513611, - "learning_rate": 9.74826350606356e-06, - "loss": 0.3476, - "step": 5482 - }, - { - "epoch": 0.3583425919874518, - "grad_norm": 0.45765113830566406, - "learning_rate": 9.748154091662867e-06, - "loss": 0.4087, - "step": 5483 - }, - { - "epoch": 0.3584079471929939, - "grad_norm": 0.5139644145965576, - "learning_rate": 9.748044654103781e-06, - "loss": 0.4456, - "step": 5484 - }, - { - "epoch": 0.35847330239853603, - "grad_norm": 0.4682544767856598, - "learning_rate": 9.747935193386837e-06, - "loss": 0.3922, - "step": 5485 - }, - { - "epoch": 0.35853865760407816, - "grad_norm": 0.47969430685043335, - "learning_rate": 9.747825709512568e-06, - "loss": 0.4239, - "step": 5486 - }, - { - "epoch": 0.3586040128096203, - "grad_norm": 0.4690496325492859, - "learning_rate": 9.747716202481507e-06, - "loss": 0.3948, - "step": 5487 - }, - { - "epoch": 0.3586693680151624, - "grad_norm": 0.4625760614871979, - "learning_rate": 9.747606672294192e-06, - "loss": 0.3936, - "step": 5488 - }, - { - "epoch": 0.35873472322070454, - "grad_norm": 0.4752259850502014, - "learning_rate": 9.747497118951152e-06, - "loss": 0.4128, - "step": 5489 - }, - { - "epoch": 0.35880007842624667, - "grad_norm": 0.4447445273399353, - "learning_rate": 9.747387542452927e-06, - "loss": 0.3358, - "step": 5490 - }, - { - "epoch": 0.3588654336317888, - "grad_norm": 0.45358070731163025, - "learning_rate": 9.747277942800045e-06, - "loss": 0.388, - "step": 5491 - }, - { - "epoch": 0.35893078883733087, - "grad_norm": 0.46292173862457275, - "learning_rate": 9.747168319993045e-06, - "loss": 0.3976, - "step": 5492 - }, - { - "epoch": 0.358996144042873, - "grad_norm": 0.47038424015045166, - "learning_rate": 9.747058674032462e-06, - "loss": 0.4258, - "step": 5493 - }, - { - "epoch": 0.35906149924841513, - "grad_norm": 0.4775792062282562, - "learning_rate": 9.746949004918826e-06, - "loss": 0.4294, - "step": 5494 - }, - { - "epoch": 0.35912685445395726, - "grad_norm": 0.48537901043891907, - "learning_rate": 9.746839312652678e-06, - "loss": 0.4168, - "step": 5495 - }, - { - "epoch": 0.3591922096594994, - "grad_norm": 0.4761759638786316, - "learning_rate": 9.74672959723455e-06, - "loss": 0.3888, - "step": 5496 - }, - { - "epoch": 0.3592575648650415, - "grad_norm": 0.4897666573524475, - "learning_rate": 9.746619858664976e-06, - "loss": 0.4147, - "step": 5497 - }, - { - "epoch": 0.35932292007058364, - "grad_norm": 0.4796993136405945, - "learning_rate": 9.746510096944494e-06, - "loss": 0.4033, - "step": 5498 - }, - { - "epoch": 0.35938827527612577, - "grad_norm": 0.4589497148990631, - "learning_rate": 9.746400312073637e-06, - "loss": 0.3749, - "step": 5499 - }, - { - "epoch": 0.35945363048166784, - "grad_norm": 0.46367713809013367, - "learning_rate": 9.746290504052942e-06, - "loss": 0.404, - "step": 5500 - }, - { - "epoch": 0.35951898568720997, - "grad_norm": 0.5216031074523926, - "learning_rate": 9.746180672882943e-06, - "loss": 0.4589, - "step": 5501 - }, - { - "epoch": 0.3595843408927521, - "grad_norm": 0.467579185962677, - "learning_rate": 9.746070818564178e-06, - "loss": 0.3964, - "step": 5502 - }, - { - "epoch": 0.3596496960982942, - "grad_norm": 0.44784092903137207, - "learning_rate": 9.74596094109718e-06, - "loss": 0.3994, - "step": 5503 - }, - { - "epoch": 0.35971505130383635, - "grad_norm": 0.45274531841278076, - "learning_rate": 9.745851040482486e-06, - "loss": 0.3899, - "step": 5504 - }, - { - "epoch": 0.3597804065093785, - "grad_norm": 0.504065990447998, - "learning_rate": 9.745741116720635e-06, - "loss": 0.3982, - "step": 5505 - }, - { - "epoch": 0.3598457617149206, - "grad_norm": 0.4520778954029083, - "learning_rate": 9.745631169812157e-06, - "loss": 0.3813, - "step": 5506 - }, - { - "epoch": 0.35991111692046274, - "grad_norm": 0.47651755809783936, - "learning_rate": 9.745521199757595e-06, - "loss": 0.418, - "step": 5507 - }, - { - "epoch": 0.3599764721260048, - "grad_norm": 0.4437931180000305, - "learning_rate": 9.74541120655748e-06, - "loss": 0.3728, - "step": 5508 - }, - { - "epoch": 0.36004182733154694, - "grad_norm": 0.5083013772964478, - "learning_rate": 9.74530119021235e-06, - "loss": 0.4607, - "step": 5509 - }, - { - "epoch": 0.36010718253708907, - "grad_norm": 0.49100443720817566, - "learning_rate": 9.745191150722745e-06, - "loss": 0.4, - "step": 5510 - }, - { - "epoch": 0.3601725377426312, - "grad_norm": 0.46294522285461426, - "learning_rate": 9.745081088089196e-06, - "loss": 0.3774, - "step": 5511 - }, - { - "epoch": 0.3602378929481733, - "grad_norm": 0.446426659822464, - "learning_rate": 9.744971002312244e-06, - "loss": 0.4067, - "step": 5512 - }, - { - "epoch": 0.36030324815371545, - "grad_norm": 0.466602623462677, - "learning_rate": 9.744860893392425e-06, - "loss": 0.4036, - "step": 5513 - }, - { - "epoch": 0.3603686033592576, - "grad_norm": 0.5313547849655151, - "learning_rate": 9.744750761330276e-06, - "loss": 0.4251, - "step": 5514 - }, - { - "epoch": 0.3604339585647997, - "grad_norm": 0.45921388268470764, - "learning_rate": 9.744640606126332e-06, - "loss": 0.4073, - "step": 5515 - }, - { - "epoch": 0.36049931377034183, - "grad_norm": 0.4541753828525543, - "learning_rate": 9.744530427781134e-06, - "loss": 0.4036, - "step": 5516 - }, - { - "epoch": 0.3605646689758839, - "grad_norm": 0.4802130460739136, - "learning_rate": 9.744420226295215e-06, - "loss": 0.3744, - "step": 5517 - }, - { - "epoch": 0.36063002418142603, - "grad_norm": 0.475797563791275, - "learning_rate": 9.744310001669117e-06, - "loss": 0.3781, - "step": 5518 - }, - { - "epoch": 0.36069537938696816, - "grad_norm": 0.4912639260292053, - "learning_rate": 9.744199753903375e-06, - "loss": 0.3703, - "step": 5519 - }, - { - "epoch": 0.3607607345925103, - "grad_norm": 0.45177358388900757, - "learning_rate": 9.744089482998526e-06, - "loss": 0.3866, - "step": 5520 - }, - { - "epoch": 0.3608260897980524, - "grad_norm": 0.49249595403671265, - "learning_rate": 9.743979188955111e-06, - "loss": 0.4488, - "step": 5521 - }, - { - "epoch": 0.36089144500359455, - "grad_norm": 0.49944764375686646, - "learning_rate": 9.743868871773666e-06, - "loss": 0.4194, - "step": 5522 - }, - { - "epoch": 0.3609568002091367, - "grad_norm": 0.49870505928993225, - "learning_rate": 9.743758531454727e-06, - "loss": 0.4651, - "step": 5523 - }, - { - "epoch": 0.3610221554146788, - "grad_norm": 0.4502068758010864, - "learning_rate": 9.743648167998837e-06, - "loss": 0.3812, - "step": 5524 - }, - { - "epoch": 0.3610875106202209, - "grad_norm": 0.42723122239112854, - "learning_rate": 9.743537781406529e-06, - "loss": 0.3463, - "step": 5525 - }, - { - "epoch": 0.361152865825763, - "grad_norm": 0.4512379765510559, - "learning_rate": 9.743427371678346e-06, - "loss": 0.3532, - "step": 5526 - }, - { - "epoch": 0.36121822103130513, - "grad_norm": 0.46591150760650635, - "learning_rate": 9.743316938814824e-06, - "loss": 0.3973, - "step": 5527 - }, - { - "epoch": 0.36128357623684726, - "grad_norm": 0.4805774688720703, - "learning_rate": 9.743206482816501e-06, - "loss": 0.4118, - "step": 5528 - }, - { - "epoch": 0.3613489314423894, - "grad_norm": 0.4822915196418762, - "learning_rate": 9.743096003683918e-06, - "loss": 0.4368, - "step": 5529 - }, - { - "epoch": 0.3614142866479315, - "grad_norm": 0.45295917987823486, - "learning_rate": 9.742985501417611e-06, - "loss": 0.4003, - "step": 5530 - }, - { - "epoch": 0.36147964185347364, - "grad_norm": 0.46429935097694397, - "learning_rate": 9.742874976018122e-06, - "loss": 0.3672, - "step": 5531 - }, - { - "epoch": 0.36154499705901577, - "grad_norm": 0.5184620022773743, - "learning_rate": 9.742764427485988e-06, - "loss": 0.4481, - "step": 5532 - }, - { - "epoch": 0.3616103522645579, - "grad_norm": 0.47342073917388916, - "learning_rate": 9.742653855821748e-06, - "loss": 0.3731, - "step": 5533 - }, - { - "epoch": 0.36167570747009997, - "grad_norm": 0.4863404929637909, - "learning_rate": 9.742543261025943e-06, - "loss": 0.411, - "step": 5534 - }, - { - "epoch": 0.3617410626756421, - "grad_norm": 0.47418177127838135, - "learning_rate": 9.742432643099112e-06, - "loss": 0.3984, - "step": 5535 - }, - { - "epoch": 0.36180641788118423, - "grad_norm": 0.504815936088562, - "learning_rate": 9.742322002041793e-06, - "loss": 0.4734, - "step": 5536 - }, - { - "epoch": 0.36187177308672636, - "grad_norm": 0.49866777658462524, - "learning_rate": 9.742211337854529e-06, - "loss": 0.4398, - "step": 5537 - }, - { - "epoch": 0.3619371282922685, - "grad_norm": 0.50166255235672, - "learning_rate": 9.742100650537856e-06, - "loss": 0.4316, - "step": 5538 - }, - { - "epoch": 0.3620024834978106, - "grad_norm": 0.4519596993923187, - "learning_rate": 9.741989940092314e-06, - "loss": 0.3626, - "step": 5539 - }, - { - "epoch": 0.36206783870335274, - "grad_norm": 0.46182096004486084, - "learning_rate": 9.741879206518447e-06, - "loss": 0.3627, - "step": 5540 - }, - { - "epoch": 0.36213319390889487, - "grad_norm": 0.4782857894897461, - "learning_rate": 9.74176844981679e-06, - "loss": 0.3856, - "step": 5541 - }, - { - "epoch": 0.36219854911443694, - "grad_norm": 0.46283137798309326, - "learning_rate": 9.741657669987887e-06, - "loss": 0.3823, - "step": 5542 - }, - { - "epoch": 0.36226390431997907, - "grad_norm": 0.48076778650283813, - "learning_rate": 9.741546867032277e-06, - "loss": 0.4086, - "step": 5543 - }, - { - "epoch": 0.3623292595255212, - "grad_norm": 0.44915562868118286, - "learning_rate": 9.741436040950499e-06, - "loss": 0.4086, - "step": 5544 - }, - { - "epoch": 0.3623946147310633, - "grad_norm": 0.47054797410964966, - "learning_rate": 9.741325191743093e-06, - "loss": 0.3952, - "step": 5545 - }, - { - "epoch": 0.36245996993660545, - "grad_norm": 0.488459050655365, - "learning_rate": 9.741214319410606e-06, - "loss": 0.4387, - "step": 5546 - }, - { - "epoch": 0.3625253251421476, - "grad_norm": 0.4568168520927429, - "learning_rate": 9.741103423953572e-06, - "loss": 0.3785, - "step": 5547 - }, - { - "epoch": 0.3625906803476897, - "grad_norm": 0.49804478883743286, - "learning_rate": 9.740992505372533e-06, - "loss": 0.41, - "step": 5548 - }, - { - "epoch": 0.36265603555323184, - "grad_norm": 0.49567046761512756, - "learning_rate": 9.740881563668032e-06, - "loss": 0.4289, - "step": 5549 - }, - { - "epoch": 0.3627213907587739, - "grad_norm": 0.4165554642677307, - "learning_rate": 9.740770598840611e-06, - "loss": 0.3466, - "step": 5550 - }, - { - "epoch": 0.36278674596431604, - "grad_norm": 0.5371134877204895, - "learning_rate": 9.740659610890808e-06, - "loss": 0.3951, - "step": 5551 - }, - { - "epoch": 0.36285210116985817, - "grad_norm": 0.46541106700897217, - "learning_rate": 9.740548599819166e-06, - "loss": 0.4122, - "step": 5552 - }, - { - "epoch": 0.3629174563754003, - "grad_norm": 0.48032405972480774, - "learning_rate": 9.740437565626226e-06, - "loss": 0.4446, - "step": 5553 - }, - { - "epoch": 0.3629828115809424, - "grad_norm": 0.447642058134079, - "learning_rate": 9.74032650831253e-06, - "loss": 0.3726, - "step": 5554 - }, - { - "epoch": 0.36304816678648455, - "grad_norm": 0.4682174324989319, - "learning_rate": 9.740215427878618e-06, - "loss": 0.4127, - "step": 5555 - }, - { - "epoch": 0.3631135219920267, - "grad_norm": 0.46120691299438477, - "learning_rate": 9.740104324325035e-06, - "loss": 0.4178, - "step": 5556 - }, - { - "epoch": 0.3631788771975688, - "grad_norm": 0.4727381765842438, - "learning_rate": 9.73999319765232e-06, - "loss": 0.3631, - "step": 5557 - }, - { - "epoch": 0.36324423240311093, - "grad_norm": 0.4543185532093048, - "learning_rate": 9.739882047861016e-06, - "loss": 0.4067, - "step": 5558 - }, - { - "epoch": 0.363309587608653, - "grad_norm": 0.4852119982242584, - "learning_rate": 9.739770874951666e-06, - "loss": 0.4622, - "step": 5559 - }, - { - "epoch": 0.36337494281419513, - "grad_norm": 0.5047418475151062, - "learning_rate": 9.73965967892481e-06, - "loss": 0.4463, - "step": 5560 - }, - { - "epoch": 0.36344029801973726, - "grad_norm": 0.48737582564353943, - "learning_rate": 9.739548459780993e-06, - "loss": 0.3986, - "step": 5561 - }, - { - "epoch": 0.3635056532252794, - "grad_norm": 0.4619516432285309, - "learning_rate": 9.739437217520758e-06, - "loss": 0.3787, - "step": 5562 - }, - { - "epoch": 0.3635710084308215, - "grad_norm": 0.47795623540878296, - "learning_rate": 9.739325952144644e-06, - "loss": 0.4186, - "step": 5563 - }, - { - "epoch": 0.36363636363636365, - "grad_norm": 0.47678953409194946, - "learning_rate": 9.739214663653196e-06, - "loss": 0.4466, - "step": 5564 - }, - { - "epoch": 0.3637017188419058, - "grad_norm": 0.47488972544670105, - "learning_rate": 9.739103352046955e-06, - "loss": 0.4109, - "step": 5565 - }, - { - "epoch": 0.3637670740474479, - "grad_norm": 0.4641948640346527, - "learning_rate": 9.738992017326465e-06, - "loss": 0.435, - "step": 5566 - }, - { - "epoch": 0.36383242925299, - "grad_norm": 0.48926520347595215, - "learning_rate": 9.73888065949227e-06, - "loss": 0.4337, - "step": 5567 - }, - { - "epoch": 0.3638977844585321, - "grad_norm": 0.4352390468120575, - "learning_rate": 9.738769278544914e-06, - "loss": 0.328, - "step": 5568 - }, - { - "epoch": 0.36396313966407423, - "grad_norm": 0.5158409476280212, - "learning_rate": 9.738657874484936e-06, - "loss": 0.4929, - "step": 5569 - }, - { - "epoch": 0.36402849486961636, - "grad_norm": 0.44906100630760193, - "learning_rate": 9.738546447312883e-06, - "loss": 0.3907, - "step": 5570 - }, - { - "epoch": 0.3640938500751585, - "grad_norm": 0.47618475556373596, - "learning_rate": 9.738434997029295e-06, - "loss": 0.4269, - "step": 5571 - }, - { - "epoch": 0.3641592052807006, - "grad_norm": 0.4537811577320099, - "learning_rate": 9.738323523634722e-06, - "loss": 0.3738, - "step": 5572 - }, - { - "epoch": 0.36422456048624274, - "grad_norm": 0.46539798378944397, - "learning_rate": 9.7382120271297e-06, - "loss": 0.3697, - "step": 5573 - }, - { - "epoch": 0.36428991569178487, - "grad_norm": 0.4666674733161926, - "learning_rate": 9.738100507514779e-06, - "loss": 0.3902, - "step": 5574 - }, - { - "epoch": 0.364355270897327, - "grad_norm": 0.45224064588546753, - "learning_rate": 9.7379889647905e-06, - "loss": 0.3936, - "step": 5575 - }, - { - "epoch": 0.36442062610286907, - "grad_norm": 0.48884791135787964, - "learning_rate": 9.737877398957406e-06, - "loss": 0.4348, - "step": 5576 - }, - { - "epoch": 0.3644859813084112, - "grad_norm": 0.7112709879875183, - "learning_rate": 9.737765810016045e-06, - "loss": 0.4619, - "step": 5577 - }, - { - "epoch": 0.36455133651395333, - "grad_norm": 0.4430217444896698, - "learning_rate": 9.737654197966957e-06, - "loss": 0.3548, - "step": 5578 - }, - { - "epoch": 0.36461669171949546, - "grad_norm": 0.45775002241134644, - "learning_rate": 9.737542562810689e-06, - "loss": 0.3367, - "step": 5579 - }, - { - "epoch": 0.3646820469250376, - "grad_norm": 0.48870351910591125, - "learning_rate": 9.737430904547785e-06, - "loss": 0.413, - "step": 5580 - }, - { - "epoch": 0.3647474021305797, - "grad_norm": 0.469472199678421, - "learning_rate": 9.737319223178788e-06, - "loss": 0.4088, - "step": 5581 - }, - { - "epoch": 0.36481275733612184, - "grad_norm": 0.4777522683143616, - "learning_rate": 9.737207518704245e-06, - "loss": 0.4041, - "step": 5582 - }, - { - "epoch": 0.36487811254166397, - "grad_norm": 0.5005249977111816, - "learning_rate": 9.7370957911247e-06, - "loss": 0.42, - "step": 5583 - }, - { - "epoch": 0.36494346774720604, - "grad_norm": 0.47446170449256897, - "learning_rate": 9.736984040440695e-06, - "loss": 0.3897, - "step": 5584 - }, - { - "epoch": 0.36500882295274817, - "grad_norm": 0.4743311107158661, - "learning_rate": 9.736872266652782e-06, - "loss": 0.386, - "step": 5585 - }, - { - "epoch": 0.3650741781582903, - "grad_norm": 0.4345497488975525, - "learning_rate": 9.7367604697615e-06, - "loss": 0.3466, - "step": 5586 - }, - { - "epoch": 0.3651395333638324, - "grad_norm": 0.43958693742752075, - "learning_rate": 9.736648649767396e-06, - "loss": 0.3466, - "step": 5587 - }, - { - "epoch": 0.36520488856937455, - "grad_norm": 0.48933878540992737, - "learning_rate": 9.736536806671015e-06, - "loss": 0.4191, - "step": 5588 - }, - { - "epoch": 0.3652702437749167, - "grad_norm": 0.4951789677143097, - "learning_rate": 9.736424940472903e-06, - "loss": 0.4231, - "step": 5589 - }, - { - "epoch": 0.3653355989804588, - "grad_norm": 0.5030261278152466, - "learning_rate": 9.736313051173606e-06, - "loss": 0.3794, - "step": 5590 - }, - { - "epoch": 0.36540095418600094, - "grad_norm": 0.5088279843330383, - "learning_rate": 9.736201138773671e-06, - "loss": 0.4746, - "step": 5591 - }, - { - "epoch": 0.365466309391543, - "grad_norm": 0.45412909984588623, - "learning_rate": 9.736089203273641e-06, - "loss": 0.3731, - "step": 5592 - }, - { - "epoch": 0.36553166459708514, - "grad_norm": 0.47598782181739807, - "learning_rate": 9.735977244674064e-06, - "loss": 0.4199, - "step": 5593 - }, - { - "epoch": 0.36559701980262727, - "grad_norm": 0.47414252161979675, - "learning_rate": 9.735865262975485e-06, - "loss": 0.4206, - "step": 5594 - }, - { - "epoch": 0.3656623750081694, - "grad_norm": 0.47325989603996277, - "learning_rate": 9.73575325817845e-06, - "loss": 0.3965, - "step": 5595 - }, - { - "epoch": 0.3657277302137115, - "grad_norm": 0.49983224272727966, - "learning_rate": 9.735641230283506e-06, - "loss": 0.4014, - "step": 5596 - }, - { - "epoch": 0.36579308541925365, - "grad_norm": 0.4910130202770233, - "learning_rate": 9.7355291792912e-06, - "loss": 0.4269, - "step": 5597 - }, - { - "epoch": 0.3658584406247958, - "grad_norm": 0.49281221628189087, - "learning_rate": 9.735417105202076e-06, - "loss": 0.4208, - "step": 5598 - }, - { - "epoch": 0.3659237958303379, - "grad_norm": 0.45060423016548157, - "learning_rate": 9.735305008016682e-06, - "loss": 0.3841, - "step": 5599 - }, - { - "epoch": 0.36598915103588003, - "grad_norm": 0.477408230304718, - "learning_rate": 9.735192887735566e-06, - "loss": 0.4059, - "step": 5600 - }, - { - "epoch": 0.3660545062414221, - "grad_norm": 0.4904358983039856, - "learning_rate": 9.735080744359274e-06, - "loss": 0.447, - "step": 5601 - }, - { - "epoch": 0.36611986144696423, - "grad_norm": 0.4667161703109741, - "learning_rate": 9.734968577888354e-06, - "loss": 0.4022, - "step": 5602 - }, - { - "epoch": 0.36618521665250636, - "grad_norm": 0.4091729521751404, - "learning_rate": 9.73485638832335e-06, - "loss": 0.3389, - "step": 5603 - }, - { - "epoch": 0.3662505718580485, - "grad_norm": 0.4527824819087982, - "learning_rate": 9.734744175664812e-06, - "loss": 0.3562, - "step": 5604 - }, - { - "epoch": 0.3663159270635906, - "grad_norm": 0.46282854676246643, - "learning_rate": 9.734631939913284e-06, - "loss": 0.3935, - "step": 5605 - }, - { - "epoch": 0.36638128226913275, - "grad_norm": 0.4634261131286621, - "learning_rate": 9.734519681069318e-06, - "loss": 0.3451, - "step": 5606 - }, - { - "epoch": 0.3664466374746749, - "grad_norm": 0.41418299078941345, - "learning_rate": 9.73440739913346e-06, - "loss": 0.312, - "step": 5607 - }, - { - "epoch": 0.366511992680217, - "grad_norm": 0.47577157616615295, - "learning_rate": 9.734295094106257e-06, - "loss": 0.4029, - "step": 5608 - }, - { - "epoch": 0.3665773478857591, - "grad_norm": 0.4775184094905853, - "learning_rate": 9.734182765988255e-06, - "loss": 0.4235, - "step": 5609 - }, - { - "epoch": 0.3666427030913012, - "grad_norm": 0.4890030324459076, - "learning_rate": 9.734070414780003e-06, - "loss": 0.4324, - "step": 5610 - }, - { - "epoch": 0.36670805829684333, - "grad_norm": 0.4508154094219208, - "learning_rate": 9.733958040482053e-06, - "loss": 0.3926, - "step": 5611 - }, - { - "epoch": 0.36677341350238546, - "grad_norm": 0.48513153195381165, - "learning_rate": 9.733845643094947e-06, - "loss": 0.3973, - "step": 5612 - }, - { - "epoch": 0.3668387687079276, - "grad_norm": 0.4305776357650757, - "learning_rate": 9.733733222619235e-06, - "loss": 0.3661, - "step": 5613 - }, - { - "epoch": 0.3669041239134697, - "grad_norm": 0.45274618268013, - "learning_rate": 9.733620779055467e-06, - "loss": 0.3762, - "step": 5614 - }, - { - "epoch": 0.36696947911901184, - "grad_norm": 0.4698070287704468, - "learning_rate": 9.733508312404192e-06, - "loss": 0.3982, - "step": 5615 - }, - { - "epoch": 0.36703483432455397, - "grad_norm": 0.487503319978714, - "learning_rate": 9.733395822665956e-06, - "loss": 0.4062, - "step": 5616 - }, - { - "epoch": 0.3671001895300961, - "grad_norm": 0.43929579854011536, - "learning_rate": 9.73328330984131e-06, - "loss": 0.3857, - "step": 5617 - }, - { - "epoch": 0.36716554473563817, - "grad_norm": 0.4575706720352173, - "learning_rate": 9.7331707739308e-06, - "loss": 0.4024, - "step": 5618 - }, - { - "epoch": 0.3672308999411803, - "grad_norm": 0.5243099927902222, - "learning_rate": 9.733058214934976e-06, - "loss": 0.4088, - "step": 5619 - }, - { - "epoch": 0.36729625514672243, - "grad_norm": 0.45200109481811523, - "learning_rate": 9.73294563285439e-06, - "loss": 0.4076, - "step": 5620 - }, - { - "epoch": 0.36736161035226456, - "grad_norm": 0.45660802721977234, - "learning_rate": 9.732833027689586e-06, - "loss": 0.3723, - "step": 5621 - }, - { - "epoch": 0.3674269655578067, - "grad_norm": 0.530707061290741, - "learning_rate": 9.732720399441116e-06, - "loss": 0.5005, - "step": 5622 - }, - { - "epoch": 0.3674923207633488, - "grad_norm": 0.4938926696777344, - "learning_rate": 9.732607748109531e-06, - "loss": 0.4579, - "step": 5623 - }, - { - "epoch": 0.36755767596889094, - "grad_norm": 0.46209609508514404, - "learning_rate": 9.732495073695377e-06, - "loss": 0.3743, - "step": 5624 - }, - { - "epoch": 0.36762303117443307, - "grad_norm": 0.47291889786720276, - "learning_rate": 9.732382376199205e-06, - "loss": 0.372, - "step": 5625 - }, - { - "epoch": 0.36768838637997514, - "grad_norm": 0.46801483631134033, - "learning_rate": 9.732269655621565e-06, - "loss": 0.415, - "step": 5626 - }, - { - "epoch": 0.36775374158551727, - "grad_norm": 0.4820306897163391, - "learning_rate": 9.732156911963006e-06, - "loss": 0.4191, - "step": 5627 - }, - { - "epoch": 0.3678190967910594, - "grad_norm": 0.5146209597587585, - "learning_rate": 9.732044145224078e-06, - "loss": 0.4556, - "step": 5628 - }, - { - "epoch": 0.3678844519966015, - "grad_norm": 0.42632946372032166, - "learning_rate": 9.731931355405334e-06, - "loss": 0.3393, - "step": 5629 - }, - { - "epoch": 0.36794980720214365, - "grad_norm": 0.498308926820755, - "learning_rate": 9.731818542507318e-06, - "loss": 0.3939, - "step": 5630 - }, - { - "epoch": 0.3680151624076858, - "grad_norm": 0.4742498993873596, - "learning_rate": 9.731705706530585e-06, - "loss": 0.3908, - "step": 5631 - }, - { - "epoch": 0.3680805176132279, - "grad_norm": 0.4501262307167053, - "learning_rate": 9.731592847475685e-06, - "loss": 0.3827, - "step": 5632 - }, - { - "epoch": 0.36814587281877004, - "grad_norm": 0.44378212094306946, - "learning_rate": 9.731479965343166e-06, - "loss": 0.3771, - "step": 5633 - }, - { - "epoch": 0.3682112280243121, - "grad_norm": 0.4440082013607025, - "learning_rate": 9.73136706013358e-06, - "loss": 0.3379, - "step": 5634 - }, - { - "epoch": 0.36827658322985424, - "grad_norm": 0.44855231046676636, - "learning_rate": 9.73125413184748e-06, - "loss": 0.3703, - "step": 5635 - }, - { - "epoch": 0.36834193843539637, - "grad_norm": 0.45003512501716614, - "learning_rate": 9.731141180485412e-06, - "loss": 0.3888, - "step": 5636 - }, - { - "epoch": 0.3684072936409385, - "grad_norm": 0.49998369812965393, - "learning_rate": 9.73102820604793e-06, - "loss": 0.4369, - "step": 5637 - }, - { - "epoch": 0.3684726488464806, - "grad_norm": 0.4833516478538513, - "learning_rate": 9.730915208535584e-06, - "loss": 0.4015, - "step": 5638 - }, - { - "epoch": 0.36853800405202275, - "grad_norm": 0.5183923244476318, - "learning_rate": 9.730802187948929e-06, - "loss": 0.4831, - "step": 5639 - }, - { - "epoch": 0.3686033592575649, - "grad_norm": 0.44433853030204773, - "learning_rate": 9.73068914428851e-06, - "loss": 0.3593, - "step": 5640 - }, - { - "epoch": 0.368668714463107, - "grad_norm": 0.4593752920627594, - "learning_rate": 9.730576077554881e-06, - "loss": 0.3753, - "step": 5641 - }, - { - "epoch": 0.36873406966864913, - "grad_norm": 0.4683303236961365, - "learning_rate": 9.730462987748595e-06, - "loss": 0.4123, - "step": 5642 - }, - { - "epoch": 0.3687994248741912, - "grad_norm": 0.45201343297958374, - "learning_rate": 9.7303498748702e-06, - "loss": 0.3621, - "step": 5643 - }, - { - "epoch": 0.36886478007973333, - "grad_norm": 0.4989899694919586, - "learning_rate": 9.730236738920253e-06, - "loss": 0.422, - "step": 5644 - }, - { - "epoch": 0.36893013528527546, - "grad_norm": 0.4664297103881836, - "learning_rate": 9.7301235798993e-06, - "loss": 0.3796, - "step": 5645 - }, - { - "epoch": 0.3689954904908176, - "grad_norm": 0.45861926674842834, - "learning_rate": 9.730010397807896e-06, - "loss": 0.357, - "step": 5646 - }, - { - "epoch": 0.3690608456963597, - "grad_norm": 0.4439290761947632, - "learning_rate": 9.729897192646593e-06, - "loss": 0.3652, - "step": 5647 - }, - { - "epoch": 0.36912620090190185, - "grad_norm": 0.46171101927757263, - "learning_rate": 9.729783964415943e-06, - "loss": 0.3981, - "step": 5648 - }, - { - "epoch": 0.369191556107444, - "grad_norm": 0.47597968578338623, - "learning_rate": 9.729670713116499e-06, - "loss": 0.4216, - "step": 5649 - }, - { - "epoch": 0.3692569113129861, - "grad_norm": 0.4658285975456238, - "learning_rate": 9.729557438748813e-06, - "loss": 0.407, - "step": 5650 - }, - { - "epoch": 0.3693222665185282, - "grad_norm": 0.470427006483078, - "learning_rate": 9.729444141313435e-06, - "loss": 0.3899, - "step": 5651 - }, - { - "epoch": 0.3693876217240703, - "grad_norm": 0.4995580315589905, - "learning_rate": 9.729330820810919e-06, - "loss": 0.4577, - "step": 5652 - }, - { - "epoch": 0.36945297692961243, - "grad_norm": 0.4420444071292877, - "learning_rate": 9.729217477241818e-06, - "loss": 0.3681, - "step": 5653 - }, - { - "epoch": 0.36951833213515456, - "grad_norm": 0.4462299644947052, - "learning_rate": 9.729104110606688e-06, - "loss": 0.3972, - "step": 5654 - }, - { - "epoch": 0.3695836873406967, - "grad_norm": 0.4327520728111267, - "learning_rate": 9.728990720906078e-06, - "loss": 0.3553, - "step": 5655 - }, - { - "epoch": 0.3696490425462388, - "grad_norm": 0.5445707440376282, - "learning_rate": 9.72887730814054e-06, - "loss": 0.464, - "step": 5656 - }, - { - "epoch": 0.36971439775178094, - "grad_norm": 0.4488747715950012, - "learning_rate": 9.728763872310631e-06, - "loss": 0.3879, - "step": 5657 - }, - { - "epoch": 0.36977975295732307, - "grad_norm": 0.4353260397911072, - "learning_rate": 9.7286504134169e-06, - "loss": 0.3943, - "step": 5658 - }, - { - "epoch": 0.3698451081628652, - "grad_norm": 0.4729407727718353, - "learning_rate": 9.728536931459906e-06, - "loss": 0.4071, - "step": 5659 - }, - { - "epoch": 0.36991046336840727, - "grad_norm": 0.48383960127830505, - "learning_rate": 9.728423426440197e-06, - "loss": 0.4353, - "step": 5660 - }, - { - "epoch": 0.3699758185739494, - "grad_norm": 0.5584110021591187, - "learning_rate": 9.728309898358329e-06, - "loss": 0.4949, - "step": 5661 - }, - { - "epoch": 0.37004117377949153, - "grad_norm": 0.4433134198188782, - "learning_rate": 9.728196347214857e-06, - "loss": 0.3992, - "step": 5662 - }, - { - "epoch": 0.37010652898503366, - "grad_norm": 0.46349090337753296, - "learning_rate": 9.728082773010331e-06, - "loss": 0.4185, - "step": 5663 - }, - { - "epoch": 0.3701718841905758, - "grad_norm": 0.4926023781299591, - "learning_rate": 9.72796917574531e-06, - "loss": 0.4228, - "step": 5664 - }, - { - "epoch": 0.3702372393961179, - "grad_norm": 0.4178747534751892, - "learning_rate": 9.727855555420345e-06, - "loss": 0.3268, - "step": 5665 - }, - { - "epoch": 0.37030259460166004, - "grad_norm": 0.4787285327911377, - "learning_rate": 9.72774191203599e-06, - "loss": 0.4176, - "step": 5666 - }, - { - "epoch": 0.37036794980720217, - "grad_norm": 0.4780355393886566, - "learning_rate": 9.727628245592798e-06, - "loss": 0.4169, - "step": 5667 - }, - { - "epoch": 0.37043330501274424, - "grad_norm": 0.45276856422424316, - "learning_rate": 9.727514556091327e-06, - "loss": 0.376, - "step": 5668 - }, - { - "epoch": 0.37049866021828637, - "grad_norm": 0.4688391089439392, - "learning_rate": 9.72740084353213e-06, - "loss": 0.4083, - "step": 5669 - }, - { - "epoch": 0.3705640154238285, - "grad_norm": 0.4540936052799225, - "learning_rate": 9.727287107915762e-06, - "loss": 0.3592, - "step": 5670 - }, - { - "epoch": 0.3706293706293706, - "grad_norm": 0.47923359274864197, - "learning_rate": 9.727173349242775e-06, - "loss": 0.4295, - "step": 5671 - }, - { - "epoch": 0.37069472583491275, - "grad_norm": 0.4697933495044708, - "learning_rate": 9.727059567513726e-06, - "loss": 0.4117, - "step": 5672 - }, - { - "epoch": 0.3707600810404549, - "grad_norm": 0.42383813858032227, - "learning_rate": 9.726945762729172e-06, - "loss": 0.3304, - "step": 5673 - }, - { - "epoch": 0.370825436245997, - "grad_norm": 0.44558316469192505, - "learning_rate": 9.726831934889664e-06, - "loss": 0.3653, - "step": 5674 - }, - { - "epoch": 0.37089079145153914, - "grad_norm": 0.462527334690094, - "learning_rate": 9.72671808399576e-06, - "loss": 0.3771, - "step": 5675 - }, - { - "epoch": 0.3709561466570812, - "grad_norm": 0.4809938967227936, - "learning_rate": 9.726604210048015e-06, - "loss": 0.4431, - "step": 5676 - }, - { - "epoch": 0.37102150186262334, - "grad_norm": 0.43497100472450256, - "learning_rate": 9.726490313046984e-06, - "loss": 0.3566, - "step": 5677 - }, - { - "epoch": 0.37108685706816547, - "grad_norm": 0.41215407848358154, - "learning_rate": 9.726376392993221e-06, - "loss": 0.3405, - "step": 5678 - }, - { - "epoch": 0.3711522122737076, - "grad_norm": 0.4581126868724823, - "learning_rate": 9.726262449887282e-06, - "loss": 0.378, - "step": 5679 - }, - { - "epoch": 0.3712175674792497, - "grad_norm": 0.4768775403499603, - "learning_rate": 9.726148483729728e-06, - "loss": 0.4278, - "step": 5680 - }, - { - "epoch": 0.37128292268479185, - "grad_norm": 0.45675817131996155, - "learning_rate": 9.726034494521105e-06, - "loss": 0.4159, - "step": 5681 - }, - { - "epoch": 0.371348277890334, - "grad_norm": 0.46914592385292053, - "learning_rate": 9.725920482261979e-06, - "loss": 0.4281, - "step": 5682 - }, - { - "epoch": 0.3714136330958761, - "grad_norm": 0.4493812620639801, - "learning_rate": 9.725806446952899e-06, - "loss": 0.3658, - "step": 5683 - }, - { - "epoch": 0.37147898830141823, - "grad_norm": 0.7395375967025757, - "learning_rate": 9.725692388594426e-06, - "loss": 0.3804, - "step": 5684 - }, - { - "epoch": 0.3715443435069603, - "grad_norm": 0.45588287711143494, - "learning_rate": 9.725578307187112e-06, - "loss": 0.3539, - "step": 5685 - }, - { - "epoch": 0.37160969871250243, - "grad_norm": 0.4964325726032257, - "learning_rate": 9.725464202731515e-06, - "loss": 0.3911, - "step": 5686 - }, - { - "epoch": 0.37167505391804456, - "grad_norm": 0.5292208194732666, - "learning_rate": 9.725350075228194e-06, - "loss": 0.4343, - "step": 5687 - }, - { - "epoch": 0.3717404091235867, - "grad_norm": 0.4464513063430786, - "learning_rate": 9.725235924677703e-06, - "loss": 0.3828, - "step": 5688 - }, - { - "epoch": 0.3718057643291288, - "grad_norm": 0.47259700298309326, - "learning_rate": 9.7251217510806e-06, - "loss": 0.4177, - "step": 5689 - }, - { - "epoch": 0.37187111953467095, - "grad_norm": 0.4382416903972626, - "learning_rate": 9.72500755443744e-06, - "loss": 0.3304, - "step": 5690 - }, - { - "epoch": 0.3719364747402131, - "grad_norm": 0.4824185371398926, - "learning_rate": 9.724893334748781e-06, - "loss": 0.3786, - "step": 5691 - }, - { - "epoch": 0.3720018299457552, - "grad_norm": 0.47357267141342163, - "learning_rate": 9.724779092015183e-06, - "loss": 0.3813, - "step": 5692 - }, - { - "epoch": 0.3720671851512973, - "grad_norm": 0.4315038323402405, - "learning_rate": 9.724664826237198e-06, - "loss": 0.3573, - "step": 5693 - }, - { - "epoch": 0.3721325403568394, - "grad_norm": 0.44466453790664673, - "learning_rate": 9.724550537415386e-06, - "loss": 0.3895, - "step": 5694 - }, - { - "epoch": 0.37219789556238153, - "grad_norm": 0.42833951115608215, - "learning_rate": 9.724436225550305e-06, - "loss": 0.336, - "step": 5695 - }, - { - "epoch": 0.37226325076792366, - "grad_norm": 0.4936835467815399, - "learning_rate": 9.724321890642512e-06, - "loss": 0.3872, - "step": 5696 - }, - { - "epoch": 0.3723286059734658, - "grad_norm": 0.4252793788909912, - "learning_rate": 9.724207532692563e-06, - "loss": 0.331, - "step": 5697 - }, - { - "epoch": 0.3723939611790079, - "grad_norm": 0.48302435874938965, - "learning_rate": 9.724093151701019e-06, - "loss": 0.4133, - "step": 5698 - }, - { - "epoch": 0.37245931638455004, - "grad_norm": 0.44952890276908875, - "learning_rate": 9.723978747668436e-06, - "loss": 0.3816, - "step": 5699 - }, - { - "epoch": 0.37252467159009217, - "grad_norm": 0.46215274930000305, - "learning_rate": 9.72386432059537e-06, - "loss": 0.4048, - "step": 5700 - }, - { - "epoch": 0.3725900267956343, - "grad_norm": 0.4442006051540375, - "learning_rate": 9.723749870482384e-06, - "loss": 0.3711, - "step": 5701 - }, - { - "epoch": 0.37265538200117637, - "grad_norm": 0.4747190475463867, - "learning_rate": 9.723635397330032e-06, - "loss": 0.3532, - "step": 5702 - }, - { - "epoch": 0.3727207372067185, - "grad_norm": 0.509239673614502, - "learning_rate": 9.723520901138875e-06, - "loss": 0.435, - "step": 5703 - }, - { - "epoch": 0.37278609241226063, - "grad_norm": 0.4344327449798584, - "learning_rate": 9.723406381909469e-06, - "loss": 0.3907, - "step": 5704 - }, - { - "epoch": 0.37285144761780276, - "grad_norm": 0.41855117678642273, - "learning_rate": 9.723291839642372e-06, - "loss": 0.3361, - "step": 5705 - }, - { - "epoch": 0.3729168028233449, - "grad_norm": 0.4844302833080292, - "learning_rate": 9.723177274338146e-06, - "loss": 0.4348, - "step": 5706 - }, - { - "epoch": 0.372982158028887, - "grad_norm": 0.4601530432701111, - "learning_rate": 9.72306268599735e-06, - "loss": 0.3793, - "step": 5707 - }, - { - "epoch": 0.37304751323442914, - "grad_norm": 0.5064185261726379, - "learning_rate": 9.722948074620539e-06, - "loss": 0.4081, - "step": 5708 - }, - { - "epoch": 0.37311286843997127, - "grad_norm": 0.4733896851539612, - "learning_rate": 9.722833440208274e-06, - "loss": 0.4084, - "step": 5709 - }, - { - "epoch": 0.37317822364551334, - "grad_norm": 0.4767979085445404, - "learning_rate": 9.722718782761116e-06, - "loss": 0.4331, - "step": 5710 - }, - { - "epoch": 0.37324357885105547, - "grad_norm": 0.47276976704597473, - "learning_rate": 9.72260410227962e-06, - "loss": 0.4029, - "step": 5711 - }, - { - "epoch": 0.3733089340565976, - "grad_norm": 0.4697682857513428, - "learning_rate": 9.72248939876435e-06, - "loss": 0.4054, - "step": 5712 - }, - { - "epoch": 0.3733742892621397, - "grad_norm": 0.4399544596672058, - "learning_rate": 9.722374672215861e-06, - "loss": 0.369, - "step": 5713 - }, - { - "epoch": 0.37343964446768185, - "grad_norm": 0.4537452161312103, - "learning_rate": 9.722259922634717e-06, - "loss": 0.3864, - "step": 5714 - }, - { - "epoch": 0.373504999673224, - "grad_norm": 0.4806780517101288, - "learning_rate": 9.722145150021474e-06, - "loss": 0.4301, - "step": 5715 - }, - { - "epoch": 0.3735703548787661, - "grad_norm": 0.459480881690979, - "learning_rate": 9.722030354376693e-06, - "loss": 0.3682, - "step": 5716 - }, - { - "epoch": 0.37363571008430824, - "grad_norm": 0.45941513776779175, - "learning_rate": 9.721915535700934e-06, - "loss": 0.3576, - "step": 5717 - }, - { - "epoch": 0.3737010652898503, - "grad_norm": 0.4493623375892639, - "learning_rate": 9.721800693994759e-06, - "loss": 0.3914, - "step": 5718 - }, - { - "epoch": 0.37376642049539244, - "grad_norm": 0.47806409001350403, - "learning_rate": 9.721685829258724e-06, - "loss": 0.3855, - "step": 5719 - }, - { - "epoch": 0.37383177570093457, - "grad_norm": 0.4922315180301666, - "learning_rate": 9.721570941493392e-06, - "loss": 0.4044, - "step": 5720 - }, - { - "epoch": 0.3738971309064767, - "grad_norm": 0.4188794493675232, - "learning_rate": 9.721456030699323e-06, - "loss": 0.3548, - "step": 5721 - }, - { - "epoch": 0.3739624861120188, - "grad_norm": 0.4768242835998535, - "learning_rate": 9.721341096877078e-06, - "loss": 0.3684, - "step": 5722 - }, - { - "epoch": 0.37402784131756095, - "grad_norm": 0.4828105568885803, - "learning_rate": 9.721226140027215e-06, - "loss": 0.3814, - "step": 5723 - }, - { - "epoch": 0.3740931965231031, - "grad_norm": 0.41793200373649597, - "learning_rate": 9.721111160150299e-06, - "loss": 0.3481, - "step": 5724 - }, - { - "epoch": 0.3741585517286452, - "grad_norm": 0.45060980319976807, - "learning_rate": 9.720996157246887e-06, - "loss": 0.4173, - "step": 5725 - }, - { - "epoch": 0.37422390693418733, - "grad_norm": 0.4805615544319153, - "learning_rate": 9.72088113131754e-06, - "loss": 0.4074, - "step": 5726 - }, - { - "epoch": 0.3742892621397294, - "grad_norm": 0.4488745629787445, - "learning_rate": 9.720766082362823e-06, - "loss": 0.3878, - "step": 5727 - }, - { - "epoch": 0.37435461734527153, - "grad_norm": 0.4357205331325531, - "learning_rate": 9.720651010383293e-06, - "loss": 0.3577, - "step": 5728 - }, - { - "epoch": 0.37441997255081366, - "grad_norm": 0.47308945655822754, - "learning_rate": 9.720535915379513e-06, - "loss": 0.4411, - "step": 5729 - }, - { - "epoch": 0.3744853277563558, - "grad_norm": 0.44247686862945557, - "learning_rate": 9.720420797352041e-06, - "loss": 0.3704, - "step": 5730 - }, - { - "epoch": 0.3745506829618979, - "grad_norm": 0.447162389755249, - "learning_rate": 9.720305656301446e-06, - "loss": 0.3855, - "step": 5731 - }, - { - "epoch": 0.37461603816744005, - "grad_norm": 0.4559944272041321, - "learning_rate": 9.720190492228283e-06, - "loss": 0.3663, - "step": 5732 - }, - { - "epoch": 0.3746813933729822, - "grad_norm": 0.5380062460899353, - "learning_rate": 9.720075305133115e-06, - "loss": 0.5212, - "step": 5733 - }, - { - "epoch": 0.3747467485785243, - "grad_norm": 0.45457741618156433, - "learning_rate": 9.719960095016506e-06, - "loss": 0.3365, - "step": 5734 - }, - { - "epoch": 0.3748121037840664, - "grad_norm": 0.43439793586730957, - "learning_rate": 9.719844861879016e-06, - "loss": 0.3446, - "step": 5735 - }, - { - "epoch": 0.3748774589896085, - "grad_norm": 0.43436864018440247, - "learning_rate": 9.719729605721206e-06, - "loss": 0.3738, - "step": 5736 - }, - { - "epoch": 0.37494281419515063, - "grad_norm": 0.4463585615158081, - "learning_rate": 9.719614326543642e-06, - "loss": 0.3721, - "step": 5737 - }, - { - "epoch": 0.37500816940069276, - "grad_norm": 0.5004968047142029, - "learning_rate": 9.719499024346883e-06, - "loss": 0.4225, - "step": 5738 - }, - { - "epoch": 0.3750735246062349, - "grad_norm": 0.4942430853843689, - "learning_rate": 9.71938369913149e-06, - "loss": 0.4333, - "step": 5739 - }, - { - "epoch": 0.375138879811777, - "grad_norm": 0.48074761033058167, - "learning_rate": 9.719268350898031e-06, - "loss": 0.3774, - "step": 5740 - }, - { - "epoch": 0.37520423501731914, - "grad_norm": 0.47752365469932556, - "learning_rate": 9.719152979647064e-06, - "loss": 0.4025, - "step": 5741 - }, - { - "epoch": 0.37526959022286127, - "grad_norm": 0.5058532953262329, - "learning_rate": 9.719037585379155e-06, - "loss": 0.4587, - "step": 5742 - }, - { - "epoch": 0.3753349454284034, - "grad_norm": 0.5277442336082458, - "learning_rate": 9.718922168094862e-06, - "loss": 0.4512, - "step": 5743 - }, - { - "epoch": 0.37540030063394547, - "grad_norm": 0.4554592967033386, - "learning_rate": 9.718806727794751e-06, - "loss": 0.3809, - "step": 5744 - }, - { - "epoch": 0.3754656558394876, - "grad_norm": 0.4330997169017792, - "learning_rate": 9.718691264479386e-06, - "loss": 0.335, - "step": 5745 - }, - { - "epoch": 0.37553101104502973, - "grad_norm": 0.49338728189468384, - "learning_rate": 9.718575778149328e-06, - "loss": 0.3968, - "step": 5746 - }, - { - "epoch": 0.37559636625057186, - "grad_norm": 0.4403141438961029, - "learning_rate": 9.718460268805143e-06, - "loss": 0.3625, - "step": 5747 - }, - { - "epoch": 0.375661721456114, - "grad_norm": 0.45258715748786926, - "learning_rate": 9.718344736447392e-06, - "loss": 0.3745, - "step": 5748 - }, - { - "epoch": 0.3757270766616561, - "grad_norm": 0.4778488576412201, - "learning_rate": 9.718229181076639e-06, - "loss": 0.3956, - "step": 5749 - }, - { - "epoch": 0.37579243186719824, - "grad_norm": 0.46462589502334595, - "learning_rate": 9.718113602693446e-06, - "loss": 0.3873, - "step": 5750 - }, - { - "epoch": 0.37585778707274037, - "grad_norm": 0.4922333359718323, - "learning_rate": 9.717998001298382e-06, - "loss": 0.4461, - "step": 5751 - }, - { - "epoch": 0.37592314227828244, - "grad_norm": 0.49379634857177734, - "learning_rate": 9.717882376892002e-06, - "loss": 0.4088, - "step": 5752 - }, - { - "epoch": 0.37598849748382457, - "grad_norm": 0.502359926700592, - "learning_rate": 9.71776672947488e-06, - "loss": 0.47, - "step": 5753 - }, - { - "epoch": 0.3760538526893667, - "grad_norm": 0.479813814163208, - "learning_rate": 9.717651059047574e-06, - "loss": 0.3779, - "step": 5754 - }, - { - "epoch": 0.3761192078949088, - "grad_norm": 0.4545009732246399, - "learning_rate": 9.717535365610649e-06, - "loss": 0.3494, - "step": 5755 - }, - { - "epoch": 0.37618456310045095, - "grad_norm": 0.43695205450057983, - "learning_rate": 9.71741964916467e-06, - "loss": 0.3387, - "step": 5756 - }, - { - "epoch": 0.3762499183059931, - "grad_norm": 0.4959096610546112, - "learning_rate": 9.717303909710201e-06, - "loss": 0.3893, - "step": 5757 - }, - { - "epoch": 0.3763152735115352, - "grad_norm": 0.49351975321769714, - "learning_rate": 9.717188147247806e-06, - "loss": 0.4322, - "step": 5758 - }, - { - "epoch": 0.37638062871707734, - "grad_norm": 0.49044859409332275, - "learning_rate": 9.71707236177805e-06, - "loss": 0.427, - "step": 5759 - }, - { - "epoch": 0.3764459839226194, - "grad_norm": 0.4676279127597809, - "learning_rate": 9.7169565533015e-06, - "loss": 0.3888, - "step": 5760 - }, - { - "epoch": 0.37651133912816154, - "grad_norm": 0.46494054794311523, - "learning_rate": 9.716840721818717e-06, - "loss": 0.3148, - "step": 5761 - }, - { - "epoch": 0.37657669433370367, - "grad_norm": 0.4796944260597229, - "learning_rate": 9.716724867330268e-06, - "loss": 0.4077, - "step": 5762 - }, - { - "epoch": 0.3766420495392458, - "grad_norm": 0.44766202569007874, - "learning_rate": 9.716608989836718e-06, - "loss": 0.3447, - "step": 5763 - }, - { - "epoch": 0.3767074047447879, - "grad_norm": 0.4754466712474823, - "learning_rate": 9.716493089338632e-06, - "loss": 0.417, - "step": 5764 - }, - { - "epoch": 0.37677275995033005, - "grad_norm": 0.5241832137107849, - "learning_rate": 9.716377165836575e-06, - "loss": 0.3804, - "step": 5765 - }, - { - "epoch": 0.3768381151558722, - "grad_norm": 0.5270892977714539, - "learning_rate": 9.716261219331113e-06, - "loss": 0.4039, - "step": 5766 - }, - { - "epoch": 0.3769034703614143, - "grad_norm": 0.4481326639652252, - "learning_rate": 9.71614524982281e-06, - "loss": 0.3254, - "step": 5767 - }, - { - "epoch": 0.37696882556695643, - "grad_norm": 0.44411876797676086, - "learning_rate": 9.716029257312234e-06, - "loss": 0.3497, - "step": 5768 - }, - { - "epoch": 0.3770341807724985, - "grad_norm": 0.4580141603946686, - "learning_rate": 9.71591324179995e-06, - "loss": 0.3828, - "step": 5769 - }, - { - "epoch": 0.37709953597804063, - "grad_norm": 0.7634884119033813, - "learning_rate": 9.715797203286523e-06, - "loss": 0.4167, - "step": 5770 - }, - { - "epoch": 0.37716489118358276, - "grad_norm": 0.4594669044017792, - "learning_rate": 9.71568114177252e-06, - "loss": 0.4285, - "step": 5771 - }, - { - "epoch": 0.3772302463891249, - "grad_norm": 0.4424137771129608, - "learning_rate": 9.715565057258506e-06, - "loss": 0.3743, - "step": 5772 - }, - { - "epoch": 0.377295601594667, - "grad_norm": 0.4911845624446869, - "learning_rate": 9.715448949745045e-06, - "loss": 0.391, - "step": 5773 - }, - { - "epoch": 0.37736095680020915, - "grad_norm": 0.4715944528579712, - "learning_rate": 9.715332819232708e-06, - "loss": 0.3792, - "step": 5774 - }, - { - "epoch": 0.3774263120057513, - "grad_norm": 0.46876052021980286, - "learning_rate": 9.715216665722059e-06, - "loss": 0.4158, - "step": 5775 - }, - { - "epoch": 0.3774916672112934, - "grad_norm": 0.5218589901924133, - "learning_rate": 9.715100489213665e-06, - "loss": 0.4765, - "step": 5776 - }, - { - "epoch": 0.3775570224168355, - "grad_norm": 0.45977842807769775, - "learning_rate": 9.714984289708093e-06, - "loss": 0.381, - "step": 5777 - }, - { - "epoch": 0.3776223776223776, - "grad_norm": 0.45912104845046997, - "learning_rate": 9.714868067205908e-06, - "loss": 0.3881, - "step": 5778 - }, - { - "epoch": 0.37768773282791973, - "grad_norm": 0.47479283809661865, - "learning_rate": 9.714751821707678e-06, - "loss": 0.4224, - "step": 5779 - }, - { - "epoch": 0.37775308803346186, - "grad_norm": 0.4962746202945709, - "learning_rate": 9.714635553213971e-06, - "loss": 0.4039, - "step": 5780 - }, - { - "epoch": 0.377818443239004, - "grad_norm": 0.4834185242652893, - "learning_rate": 9.714519261725354e-06, - "loss": 0.3381, - "step": 5781 - }, - { - "epoch": 0.3778837984445461, - "grad_norm": 0.4590103030204773, - "learning_rate": 9.714402947242392e-06, - "loss": 0.3527, - "step": 5782 - }, - { - "epoch": 0.37794915365008824, - "grad_norm": 0.5560005903244019, - "learning_rate": 9.714286609765654e-06, - "loss": 0.4437, - "step": 5783 - }, - { - "epoch": 0.37801450885563037, - "grad_norm": 0.47062599658966064, - "learning_rate": 9.714170249295705e-06, - "loss": 0.3903, - "step": 5784 - }, - { - "epoch": 0.3780798640611725, - "grad_norm": 0.47350549697875977, - "learning_rate": 9.714053865833117e-06, - "loss": 0.3844, - "step": 5785 - }, - { - "epoch": 0.37814521926671457, - "grad_norm": 0.46465545892715454, - "learning_rate": 9.713937459378456e-06, - "loss": 0.3819, - "step": 5786 - }, - { - "epoch": 0.3782105744722567, - "grad_norm": 0.4582565128803253, - "learning_rate": 9.713821029932287e-06, - "loss": 0.3651, - "step": 5787 - }, - { - "epoch": 0.37827592967779883, - "grad_norm": 0.48874345421791077, - "learning_rate": 9.71370457749518e-06, - "loss": 0.4033, - "step": 5788 - }, - { - "epoch": 0.37834128488334096, - "grad_norm": 0.4774669110774994, - "learning_rate": 9.7135881020677e-06, - "loss": 0.4221, - "step": 5789 - }, - { - "epoch": 0.3784066400888831, - "grad_norm": 0.45412692427635193, - "learning_rate": 9.713471603650422e-06, - "loss": 0.3775, - "step": 5790 - }, - { - "epoch": 0.3784719952944252, - "grad_norm": 0.4761379361152649, - "learning_rate": 9.713355082243909e-06, - "loss": 0.4267, - "step": 5791 - }, - { - "epoch": 0.37853735049996734, - "grad_norm": 0.536064624786377, - "learning_rate": 9.713238537848731e-06, - "loss": 0.3554, - "step": 5792 - }, - { - "epoch": 0.37860270570550947, - "grad_norm": 0.43408045172691345, - "learning_rate": 9.713121970465456e-06, - "loss": 0.393, - "step": 5793 - }, - { - "epoch": 0.37866806091105154, - "grad_norm": 0.4712388813495636, - "learning_rate": 9.713005380094651e-06, - "loss": 0.3941, - "step": 5794 - }, - { - "epoch": 0.37873341611659367, - "grad_norm": 0.5197617411613464, - "learning_rate": 9.712888766736887e-06, - "loss": 0.3907, - "step": 5795 - }, - { - "epoch": 0.3787987713221358, - "grad_norm": 0.4356193542480469, - "learning_rate": 9.712772130392731e-06, - "loss": 0.3731, - "step": 5796 - }, - { - "epoch": 0.3788641265276779, - "grad_norm": 0.4431528151035309, - "learning_rate": 9.712655471062753e-06, - "loss": 0.3671, - "step": 5797 - }, - { - "epoch": 0.37892948173322005, - "grad_norm": 0.41992664337158203, - "learning_rate": 9.712538788747522e-06, - "loss": 0.3212, - "step": 5798 - }, - { - "epoch": 0.3789948369387622, - "grad_norm": 0.5286357402801514, - "learning_rate": 9.712422083447606e-06, - "loss": 0.4632, - "step": 5799 - }, - { - "epoch": 0.3790601921443043, - "grad_norm": 0.4489547312259674, - "learning_rate": 9.712305355163577e-06, - "loss": 0.3681, - "step": 5800 - }, - { - "epoch": 0.37912554734984644, - "grad_norm": 0.43811309337615967, - "learning_rate": 9.712188603896e-06, - "loss": 0.3575, - "step": 5801 - }, - { - "epoch": 0.3791909025553885, - "grad_norm": 0.506718099117279, - "learning_rate": 9.712071829645447e-06, - "loss": 0.3645, - "step": 5802 - }, - { - "epoch": 0.37925625776093064, - "grad_norm": 0.4482395052909851, - "learning_rate": 9.711955032412488e-06, - "loss": 0.3644, - "step": 5803 - }, - { - "epoch": 0.37932161296647277, - "grad_norm": 0.46370434761047363, - "learning_rate": 9.71183821219769e-06, - "loss": 0.4268, - "step": 5804 - }, - { - "epoch": 0.3793869681720149, - "grad_norm": 0.45522060990333557, - "learning_rate": 9.711721369001628e-06, - "loss": 0.4008, - "step": 5805 - }, - { - "epoch": 0.379452323377557, - "grad_norm": 0.4726666808128357, - "learning_rate": 9.711604502824866e-06, - "loss": 0.4255, - "step": 5806 - }, - { - "epoch": 0.37951767858309915, - "grad_norm": 0.48169249296188354, - "learning_rate": 9.711487613667979e-06, - "loss": 0.4176, - "step": 5807 - }, - { - "epoch": 0.3795830337886413, - "grad_norm": 0.4644019901752472, - "learning_rate": 9.711370701531531e-06, - "loss": 0.442, - "step": 5808 - }, - { - "epoch": 0.3796483889941834, - "grad_norm": 0.5224023461341858, - "learning_rate": 9.711253766416098e-06, - "loss": 0.43, - "step": 5809 - }, - { - "epoch": 0.37971374419972553, - "grad_norm": 0.4404146373271942, - "learning_rate": 9.711136808322248e-06, - "loss": 0.3619, - "step": 5810 - }, - { - "epoch": 0.3797790994052676, - "grad_norm": 0.42197537422180176, - "learning_rate": 9.711019827250553e-06, - "loss": 0.3293, - "step": 5811 - }, - { - "epoch": 0.37984445461080973, - "grad_norm": 0.4823967218399048, - "learning_rate": 9.710902823201581e-06, - "loss": 0.429, - "step": 5812 - }, - { - "epoch": 0.37990980981635186, - "grad_norm": 0.46075764298439026, - "learning_rate": 9.710785796175904e-06, - "loss": 0.4204, - "step": 5813 - }, - { - "epoch": 0.379975165021894, - "grad_norm": 0.49664467573165894, - "learning_rate": 9.710668746174094e-06, - "loss": 0.4367, - "step": 5814 - }, - { - "epoch": 0.3800405202274361, - "grad_norm": 0.4818166196346283, - "learning_rate": 9.710551673196718e-06, - "loss": 0.4268, - "step": 5815 - }, - { - "epoch": 0.38010587543297825, - "grad_norm": 0.47597020864486694, - "learning_rate": 9.710434577244352e-06, - "loss": 0.3874, - "step": 5816 - }, - { - "epoch": 0.3801712306385204, - "grad_norm": 0.48701581358909607, - "learning_rate": 9.710317458317563e-06, - "loss": 0.4458, - "step": 5817 - }, - { - "epoch": 0.3802365858440625, - "grad_norm": 0.46156200766563416, - "learning_rate": 9.710200316416925e-06, - "loss": 0.3805, - "step": 5818 - }, - { - "epoch": 0.3803019410496046, - "grad_norm": 0.46452781558036804, - "learning_rate": 9.710083151543009e-06, - "loss": 0.3927, - "step": 5819 - }, - { - "epoch": 0.3803672962551467, - "grad_norm": 0.47801366448402405, - "learning_rate": 9.709965963696384e-06, - "loss": 0.4331, - "step": 5820 - }, - { - "epoch": 0.38043265146068883, - "grad_norm": 0.4413713216781616, - "learning_rate": 9.709848752877625e-06, - "loss": 0.3852, - "step": 5821 - }, - { - "epoch": 0.38049800666623096, - "grad_norm": 0.4716586470603943, - "learning_rate": 9.7097315190873e-06, - "loss": 0.4046, - "step": 5822 - }, - { - "epoch": 0.3805633618717731, - "grad_norm": 0.488387793302536, - "learning_rate": 9.709614262325984e-06, - "loss": 0.3903, - "step": 5823 - }, - { - "epoch": 0.3806287170773152, - "grad_norm": 0.4786669611930847, - "learning_rate": 9.709496982594248e-06, - "loss": 0.4106, - "step": 5824 - }, - { - "epoch": 0.38069407228285734, - "grad_norm": 0.49281588196754456, - "learning_rate": 9.709379679892664e-06, - "loss": 0.4434, - "step": 5825 - }, - { - "epoch": 0.38075942748839947, - "grad_norm": 0.44772833585739136, - "learning_rate": 9.709262354221802e-06, - "loss": 0.3613, - "step": 5826 - }, - { - "epoch": 0.3808247826939416, - "grad_norm": 0.4543074667453766, - "learning_rate": 9.709145005582236e-06, - "loss": 0.3803, - "step": 5827 - }, - { - "epoch": 0.38089013789948367, - "grad_norm": 0.4856089949607849, - "learning_rate": 9.709027633974539e-06, - "loss": 0.4341, - "step": 5828 - }, - { - "epoch": 0.3809554931050258, - "grad_norm": 0.46512290835380554, - "learning_rate": 9.708910239399285e-06, - "loss": 0.4293, - "step": 5829 - }, - { - "epoch": 0.38102084831056793, - "grad_norm": 0.48706063628196716, - "learning_rate": 9.708792821857043e-06, - "loss": 0.4709, - "step": 5830 - }, - { - "epoch": 0.38108620351611006, - "grad_norm": 0.5061370134353638, - "learning_rate": 9.708675381348386e-06, - "loss": 0.4216, - "step": 5831 - }, - { - "epoch": 0.3811515587216522, - "grad_norm": 0.4879632592201233, - "learning_rate": 9.708557917873888e-06, - "loss": 0.4117, - "step": 5832 - }, - { - "epoch": 0.3812169139271943, - "grad_norm": 0.44429755210876465, - "learning_rate": 9.708440431434124e-06, - "loss": 0.3531, - "step": 5833 - }, - { - "epoch": 0.38128226913273644, - "grad_norm": 0.45839354395866394, - "learning_rate": 9.708322922029663e-06, - "loss": 0.4112, - "step": 5834 - }, - { - "epoch": 0.38134762433827857, - "grad_norm": 0.4795777499675751, - "learning_rate": 9.70820538966108e-06, - "loss": 0.3855, - "step": 5835 - }, - { - "epoch": 0.38141297954382064, - "grad_norm": 0.48305612802505493, - "learning_rate": 9.70808783432895e-06, - "loss": 0.3883, - "step": 5836 - }, - { - "epoch": 0.38147833474936277, - "grad_norm": 0.48505690693855286, - "learning_rate": 9.707970256033842e-06, - "loss": 0.4406, - "step": 5837 - }, - { - "epoch": 0.3815436899549049, - "grad_norm": 0.42778703570365906, - "learning_rate": 9.707852654776334e-06, - "loss": 0.3501, - "step": 5838 - }, - { - "epoch": 0.381609045160447, - "grad_norm": 0.5006610155105591, - "learning_rate": 9.707735030556997e-06, - "loss": 0.425, - "step": 5839 - }, - { - "epoch": 0.38167440036598915, - "grad_norm": 0.46174129843711853, - "learning_rate": 9.707617383376405e-06, - "loss": 0.383, - "step": 5840 - }, - { - "epoch": 0.3817397555715313, - "grad_norm": 0.47170525789260864, - "learning_rate": 9.707499713235134e-06, - "loss": 0.373, - "step": 5841 - }, - { - "epoch": 0.3818051107770734, - "grad_norm": 0.4463726878166199, - "learning_rate": 9.707382020133753e-06, - "loss": 0.3583, - "step": 5842 - }, - { - "epoch": 0.38187046598261554, - "grad_norm": 0.49965202808380127, - "learning_rate": 9.707264304072842e-06, - "loss": 0.3849, - "step": 5843 - }, - { - "epoch": 0.3819358211881576, - "grad_norm": 0.4620155096054077, - "learning_rate": 9.70714656505297e-06, - "loss": 0.4252, - "step": 5844 - }, - { - "epoch": 0.38200117639369974, - "grad_norm": 0.478447288274765, - "learning_rate": 9.707028803074714e-06, - "loss": 0.4037, - "step": 5845 - }, - { - "epoch": 0.38206653159924187, - "grad_norm": 0.4635656177997589, - "learning_rate": 9.706911018138648e-06, - "loss": 0.4044, - "step": 5846 - }, - { - "epoch": 0.382131886804784, - "grad_norm": 0.46208301186561584, - "learning_rate": 9.706793210245347e-06, - "loss": 0.3903, - "step": 5847 - }, - { - "epoch": 0.3821972420103261, - "grad_norm": 0.4922182261943817, - "learning_rate": 9.706675379395384e-06, - "loss": 0.4278, - "step": 5848 - }, - { - "epoch": 0.38226259721586825, - "grad_norm": 0.4474487006664276, - "learning_rate": 9.706557525589335e-06, - "loss": 0.3734, - "step": 5849 - }, - { - "epoch": 0.3823279524214104, - "grad_norm": 0.47487691044807434, - "learning_rate": 9.706439648827774e-06, - "loss": 0.4198, - "step": 5850 - }, - { - "epoch": 0.3823933076269525, - "grad_norm": 0.4491572380065918, - "learning_rate": 9.706321749111278e-06, - "loss": 0.3854, - "step": 5851 - }, - { - "epoch": 0.38245866283249463, - "grad_norm": 0.45293572545051575, - "learning_rate": 9.70620382644042e-06, - "loss": 0.3901, - "step": 5852 - }, - { - "epoch": 0.3825240180380367, - "grad_norm": 0.48248592019081116, - "learning_rate": 9.706085880815775e-06, - "loss": 0.3819, - "step": 5853 - }, - { - "epoch": 0.38258937324357883, - "grad_norm": 0.4660474956035614, - "learning_rate": 9.705967912237918e-06, - "loss": 0.39, - "step": 5854 - }, - { - "epoch": 0.38265472844912096, - "grad_norm": 0.44777587056159973, - "learning_rate": 9.705849920707426e-06, - "loss": 0.4059, - "step": 5855 - }, - { - "epoch": 0.3827200836546631, - "grad_norm": 0.47871822118759155, - "learning_rate": 9.705731906224874e-06, - "loss": 0.3948, - "step": 5856 - }, - { - "epoch": 0.3827854388602052, - "grad_norm": 0.47725963592529297, - "learning_rate": 9.705613868790836e-06, - "loss": 0.4298, - "step": 5857 - }, - { - "epoch": 0.38285079406574735, - "grad_norm": 0.4882162809371948, - "learning_rate": 9.70549580840589e-06, - "loss": 0.4004, - "step": 5858 - }, - { - "epoch": 0.3829161492712895, - "grad_norm": 0.43429046869277954, - "learning_rate": 9.70537772507061e-06, - "loss": 0.3604, - "step": 5859 - }, - { - "epoch": 0.3829815044768316, - "grad_norm": 0.5152312517166138, - "learning_rate": 9.705259618785574e-06, - "loss": 0.4511, - "step": 5860 - }, - { - "epoch": 0.3830468596823737, - "grad_norm": 0.4840807318687439, - "learning_rate": 9.705141489551355e-06, - "loss": 0.4271, - "step": 5861 - }, - { - "epoch": 0.3831122148879158, - "grad_norm": 0.4814096987247467, - "learning_rate": 9.705023337368533e-06, - "loss": 0.3728, - "step": 5862 - }, - { - "epoch": 0.38317757009345793, - "grad_norm": 0.4504605531692505, - "learning_rate": 9.704905162237682e-06, - "loss": 0.3762, - "step": 5863 - }, - { - "epoch": 0.38324292529900006, - "grad_norm": 0.48239997029304504, - "learning_rate": 9.704786964159378e-06, - "loss": 0.3957, - "step": 5864 - }, - { - "epoch": 0.3833082805045422, - "grad_norm": 0.47395145893096924, - "learning_rate": 9.704668743134198e-06, - "loss": 0.369, - "step": 5865 - }, - { - "epoch": 0.3833736357100843, - "grad_norm": 0.4495809078216553, - "learning_rate": 9.704550499162718e-06, - "loss": 0.3869, - "step": 5866 - }, - { - "epoch": 0.38343899091562644, - "grad_norm": 0.4741559326648712, - "learning_rate": 9.704432232245515e-06, - "loss": 0.4328, - "step": 5867 - }, - { - "epoch": 0.38350434612116857, - "grad_norm": 0.4773492217063904, - "learning_rate": 9.704313942383168e-06, - "loss": 0.3766, - "step": 5868 - }, - { - "epoch": 0.3835697013267107, - "grad_norm": 0.5167033076286316, - "learning_rate": 9.704195629576251e-06, - "loss": 0.441, - "step": 5869 - }, - { - "epoch": 0.38363505653225277, - "grad_norm": 0.42016810178756714, - "learning_rate": 9.704077293825344e-06, - "loss": 0.3633, - "step": 5870 - }, - { - "epoch": 0.3837004117377949, - "grad_norm": 0.4640307128429413, - "learning_rate": 9.70395893513102e-06, - "loss": 0.3903, - "step": 5871 - }, - { - "epoch": 0.38376576694333703, - "grad_norm": 0.4658341109752655, - "learning_rate": 9.70384055349386e-06, - "loss": 0.3917, - "step": 5872 - }, - { - "epoch": 0.38383112214887916, - "grad_norm": 0.4318199157714844, - "learning_rate": 9.70372214891444e-06, - "loss": 0.3425, - "step": 5873 - }, - { - "epoch": 0.3838964773544213, - "grad_norm": 0.4524925649166107, - "learning_rate": 9.703603721393338e-06, - "loss": 0.3607, - "step": 5874 - }, - { - "epoch": 0.3839618325599634, - "grad_norm": 0.44121888279914856, - "learning_rate": 9.70348527093113e-06, - "loss": 0.3294, - "step": 5875 - }, - { - "epoch": 0.38402718776550554, - "grad_norm": 0.46296006441116333, - "learning_rate": 9.703366797528396e-06, - "loss": 0.3842, - "step": 5876 - }, - { - "epoch": 0.38409254297104767, - "grad_norm": 0.44925475120544434, - "learning_rate": 9.703248301185712e-06, - "loss": 0.3972, - "step": 5877 - }, - { - "epoch": 0.38415789817658974, - "grad_norm": 0.44322964549064636, - "learning_rate": 9.703129781903657e-06, - "loss": 0.3795, - "step": 5878 - }, - { - "epoch": 0.38422325338213187, - "grad_norm": 0.5163261294364929, - "learning_rate": 9.703011239682808e-06, - "loss": 0.492, - "step": 5879 - }, - { - "epoch": 0.384288608587674, - "grad_norm": 0.485990047454834, - "learning_rate": 9.702892674523744e-06, - "loss": 0.4458, - "step": 5880 - }, - { - "epoch": 0.3843539637932161, - "grad_norm": 0.4604759216308594, - "learning_rate": 9.702774086427044e-06, - "loss": 0.3971, - "step": 5881 - }, - { - "epoch": 0.38441931899875825, - "grad_norm": 0.46555641293525696, - "learning_rate": 9.702655475393286e-06, - "loss": 0.4059, - "step": 5882 - }, - { - "epoch": 0.3844846742043004, - "grad_norm": 0.6112158298492432, - "learning_rate": 9.702536841423047e-06, - "loss": 0.3652, - "step": 5883 - }, - { - "epoch": 0.3845500294098425, - "grad_norm": 0.4829863905906677, - "learning_rate": 9.702418184516906e-06, - "loss": 0.4305, - "step": 5884 - }, - { - "epoch": 0.38461538461538464, - "grad_norm": 0.4427691400051117, - "learning_rate": 9.702299504675443e-06, - "loss": 0.402, - "step": 5885 - }, - { - "epoch": 0.3846807398209267, - "grad_norm": 0.44634321331977844, - "learning_rate": 9.702180801899237e-06, - "loss": 0.3748, - "step": 5886 - }, - { - "epoch": 0.38474609502646884, - "grad_norm": 0.46292808651924133, - "learning_rate": 9.702062076188866e-06, - "loss": 0.4121, - "step": 5887 - }, - { - "epoch": 0.38481145023201097, - "grad_norm": 0.47690489888191223, - "learning_rate": 9.701943327544909e-06, - "loss": 0.3898, - "step": 5888 - }, - { - "epoch": 0.3848768054375531, - "grad_norm": 0.468337744474411, - "learning_rate": 9.701824555967947e-06, - "loss": 0.387, - "step": 5889 - }, - { - "epoch": 0.3849421606430952, - "grad_norm": 0.4708351194858551, - "learning_rate": 9.701705761458555e-06, - "loss": 0.4185, - "step": 5890 - }, - { - "epoch": 0.38500751584863735, - "grad_norm": 0.4758375883102417, - "learning_rate": 9.701586944017317e-06, - "loss": 0.3729, - "step": 5891 - }, - { - "epoch": 0.3850728710541795, - "grad_norm": 0.485735148191452, - "learning_rate": 9.70146810364481e-06, - "loss": 0.434, - "step": 5892 - }, - { - "epoch": 0.3851382262597216, - "grad_norm": 0.4785767197608948, - "learning_rate": 9.701349240341615e-06, - "loss": 0.3815, - "step": 5893 - }, - { - "epoch": 0.38520358146526373, - "grad_norm": 0.4771265685558319, - "learning_rate": 9.70123035410831e-06, - "loss": 0.4088, - "step": 5894 - }, - { - "epoch": 0.3852689366708058, - "grad_norm": 0.48006612062454224, - "learning_rate": 9.701111444945478e-06, - "loss": 0.4555, - "step": 5895 - }, - { - "epoch": 0.38533429187634793, - "grad_norm": 0.4835205376148224, - "learning_rate": 9.700992512853695e-06, - "loss": 0.4414, - "step": 5896 - }, - { - "epoch": 0.38539964708189006, - "grad_norm": 0.4276254177093506, - "learning_rate": 9.700873557833543e-06, - "loss": 0.3414, - "step": 5897 - }, - { - "epoch": 0.3854650022874322, - "grad_norm": 0.4507283568382263, - "learning_rate": 9.700754579885603e-06, - "loss": 0.3464, - "step": 5898 - }, - { - "epoch": 0.3855303574929743, - "grad_norm": 0.4980127513408661, - "learning_rate": 9.700635579010454e-06, - "loss": 0.447, - "step": 5899 - }, - { - "epoch": 0.38559571269851645, - "grad_norm": 0.4952795207500458, - "learning_rate": 9.700516555208677e-06, - "loss": 0.4639, - "step": 5900 - }, - { - "epoch": 0.3856610679040586, - "grad_norm": 0.5392407774925232, - "learning_rate": 9.700397508480852e-06, - "loss": 0.4082, - "step": 5901 - }, - { - "epoch": 0.3857264231096007, - "grad_norm": 0.5083760619163513, - "learning_rate": 9.70027843882756e-06, - "loss": 0.4325, - "step": 5902 - }, - { - "epoch": 0.3857917783151428, - "grad_norm": 0.45893141627311707, - "learning_rate": 9.700159346249383e-06, - "loss": 0.4033, - "step": 5903 - }, - { - "epoch": 0.3858571335206849, - "grad_norm": 0.4481857717037201, - "learning_rate": 9.7000402307469e-06, - "loss": 0.3696, - "step": 5904 - }, - { - "epoch": 0.38592248872622703, - "grad_norm": 0.5332913994789124, - "learning_rate": 9.69992109232069e-06, - "loss": 0.4616, - "step": 5905 - }, - { - "epoch": 0.38598784393176916, - "grad_norm": 0.474316269159317, - "learning_rate": 9.69980193097134e-06, - "loss": 0.3921, - "step": 5906 - }, - { - "epoch": 0.3860531991373113, - "grad_norm": 0.43687090277671814, - "learning_rate": 9.699682746699425e-06, - "loss": 0.3547, - "step": 5907 - }, - { - "epoch": 0.3861185543428534, - "grad_norm": 0.47035229206085205, - "learning_rate": 9.699563539505532e-06, - "loss": 0.4036, - "step": 5908 - }, - { - "epoch": 0.38618390954839554, - "grad_norm": 0.4641532599925995, - "learning_rate": 9.699444309390237e-06, - "loss": 0.3854, - "step": 5909 - }, - { - "epoch": 0.38624926475393767, - "grad_norm": 0.44381463527679443, - "learning_rate": 9.699325056354126e-06, - "loss": 0.3714, - "step": 5910 - }, - { - "epoch": 0.3863146199594798, - "grad_norm": 0.4531979262828827, - "learning_rate": 9.699205780397777e-06, - "loss": 0.3808, - "step": 5911 - }, - { - "epoch": 0.38637997516502187, - "grad_norm": 0.49356502294540405, - "learning_rate": 9.699086481521774e-06, - "loss": 0.3875, - "step": 5912 - }, - { - "epoch": 0.386445330370564, - "grad_norm": 0.42907342314720154, - "learning_rate": 9.698967159726698e-06, - "loss": 0.3766, - "step": 5913 - }, - { - "epoch": 0.3865106855761061, - "grad_norm": 0.43039849400520325, - "learning_rate": 9.698847815013133e-06, - "loss": 0.3299, - "step": 5914 - }, - { - "epoch": 0.38657604078164826, - "grad_norm": 0.4388934075832367, - "learning_rate": 9.698728447381658e-06, - "loss": 0.3504, - "step": 5915 - }, - { - "epoch": 0.3866413959871904, - "grad_norm": 0.47367024421691895, - "learning_rate": 9.698609056832857e-06, - "loss": 0.4229, - "step": 5916 - }, - { - "epoch": 0.3867067511927325, - "grad_norm": 0.46311619877815247, - "learning_rate": 9.698489643367311e-06, - "loss": 0.4141, - "step": 5917 - }, - { - "epoch": 0.38677210639827464, - "grad_norm": 0.46769559383392334, - "learning_rate": 9.698370206985604e-06, - "loss": 0.3497, - "step": 5918 - }, - { - "epoch": 0.38683746160381677, - "grad_norm": 0.4645219147205353, - "learning_rate": 9.698250747688319e-06, - "loss": 0.3759, - "step": 5919 - }, - { - "epoch": 0.38690281680935884, - "grad_norm": 0.4617730975151062, - "learning_rate": 9.698131265476036e-06, - "loss": 0.3543, - "step": 5920 - }, - { - "epoch": 0.38696817201490097, - "grad_norm": 0.4533134400844574, - "learning_rate": 9.69801176034934e-06, - "loss": 0.3967, - "step": 5921 - }, - { - "epoch": 0.3870335272204431, - "grad_norm": 0.512360692024231, - "learning_rate": 9.697892232308812e-06, - "loss": 0.4733, - "step": 5922 - }, - { - "epoch": 0.3870988824259852, - "grad_norm": 0.5169496536254883, - "learning_rate": 9.697772681355035e-06, - "loss": 0.4406, - "step": 5923 - }, - { - "epoch": 0.38716423763152735, - "grad_norm": 0.4784247875213623, - "learning_rate": 9.697653107488596e-06, - "loss": 0.3734, - "step": 5924 - }, - { - "epoch": 0.3872295928370695, - "grad_norm": 0.429458886384964, - "learning_rate": 9.697533510710074e-06, - "loss": 0.3416, - "step": 5925 - }, - { - "epoch": 0.3872949480426116, - "grad_norm": 0.4785601794719696, - "learning_rate": 9.697413891020053e-06, - "loss": 0.4273, - "step": 5926 - }, - { - "epoch": 0.38736030324815374, - "grad_norm": 0.46648111939430237, - "learning_rate": 9.697294248419118e-06, - "loss": 0.398, - "step": 5927 - }, - { - "epoch": 0.3874256584536958, - "grad_norm": 0.4867631196975708, - "learning_rate": 9.697174582907854e-06, - "loss": 0.4216, - "step": 5928 - }, - { - "epoch": 0.38749101365923794, - "grad_norm": 0.4751908481121063, - "learning_rate": 9.69705489448684e-06, - "loss": 0.3818, - "step": 5929 - }, - { - "epoch": 0.38755636886478007, - "grad_norm": 0.49188607931137085, - "learning_rate": 9.696935183156661e-06, - "loss": 0.3607, - "step": 5930 - }, - { - "epoch": 0.3876217240703222, - "grad_norm": 0.48401615023612976, - "learning_rate": 9.696815448917902e-06, - "loss": 0.3898, - "step": 5931 - }, - { - "epoch": 0.3876870792758643, - "grad_norm": 0.4528600871562958, - "learning_rate": 9.696695691771149e-06, - "loss": 0.4031, - "step": 5932 - }, - { - "epoch": 0.38775243448140645, - "grad_norm": 0.49441906809806824, - "learning_rate": 9.696575911716982e-06, - "loss": 0.4348, - "step": 5933 - }, - { - "epoch": 0.3878177896869486, - "grad_norm": 0.4886414706707001, - "learning_rate": 9.696456108755989e-06, - "loss": 0.423, - "step": 5934 - }, - { - "epoch": 0.3878831448924907, - "grad_norm": 0.48988276720046997, - "learning_rate": 9.696336282888751e-06, - "loss": 0.4128, - "step": 5935 - }, - { - "epoch": 0.38794850009803283, - "grad_norm": 0.4868811368942261, - "learning_rate": 9.696216434115855e-06, - "loss": 0.3689, - "step": 5936 - }, - { - "epoch": 0.3880138553035749, - "grad_norm": 0.5110340118408203, - "learning_rate": 9.696096562437884e-06, - "loss": 0.4753, - "step": 5937 - }, - { - "epoch": 0.38807921050911703, - "grad_norm": 0.4750353991985321, - "learning_rate": 9.695976667855424e-06, - "loss": 0.4023, - "step": 5938 - }, - { - "epoch": 0.38814456571465916, - "grad_norm": 0.46796661615371704, - "learning_rate": 9.695856750369057e-06, - "loss": 0.3937, - "step": 5939 - }, - { - "epoch": 0.3882099209202013, - "grad_norm": 0.5175364017486572, - "learning_rate": 9.695736809979372e-06, - "loss": 0.4367, - "step": 5940 - }, - { - "epoch": 0.3882752761257434, - "grad_norm": 0.4603165090084076, - "learning_rate": 9.69561684668695e-06, - "loss": 0.3692, - "step": 5941 - }, - { - "epoch": 0.38834063133128555, - "grad_norm": 0.4634871184825897, - "learning_rate": 9.69549686049238e-06, - "loss": 0.4013, - "step": 5942 - }, - { - "epoch": 0.3884059865368277, - "grad_norm": 0.45909276604652405, - "learning_rate": 9.695376851396242e-06, - "loss": 0.3399, - "step": 5943 - }, - { - "epoch": 0.3884713417423698, - "grad_norm": 0.43440115451812744, - "learning_rate": 9.695256819399127e-06, - "loss": 0.3666, - "step": 5944 - }, - { - "epoch": 0.3885366969479119, - "grad_norm": 0.46124815940856934, - "learning_rate": 9.695136764501618e-06, - "loss": 0.3866, - "step": 5945 - }, - { - "epoch": 0.388602052153454, - "grad_norm": 0.4922986924648285, - "learning_rate": 9.6950166867043e-06, - "loss": 0.4598, - "step": 5946 - }, - { - "epoch": 0.38866740735899613, - "grad_norm": 0.4456659257411957, - "learning_rate": 9.694896586007759e-06, - "loss": 0.3983, - "step": 5947 - }, - { - "epoch": 0.38873276256453826, - "grad_norm": 0.4693983793258667, - "learning_rate": 9.69477646241258e-06, - "loss": 0.3757, - "step": 5948 - }, - { - "epoch": 0.3887981177700804, - "grad_norm": 0.4834117591381073, - "learning_rate": 9.69465631591935e-06, - "loss": 0.3837, - "step": 5949 - }, - { - "epoch": 0.3888634729756225, - "grad_norm": 0.47310110926628113, - "learning_rate": 9.694536146528657e-06, - "loss": 0.3852, - "step": 5950 - }, - { - "epoch": 0.38892882818116464, - "grad_norm": 0.48990702629089355, - "learning_rate": 9.694415954241083e-06, - "loss": 0.3872, - "step": 5951 - }, - { - "epoch": 0.38899418338670677, - "grad_norm": 0.5139848589897156, - "learning_rate": 9.694295739057215e-06, - "loss": 0.4552, - "step": 5952 - }, - { - "epoch": 0.3890595385922489, - "grad_norm": 0.45801395177841187, - "learning_rate": 9.694175500977643e-06, - "loss": 0.3868, - "step": 5953 - }, - { - "epoch": 0.38912489379779097, - "grad_norm": 0.4636388421058655, - "learning_rate": 9.69405524000295e-06, - "loss": 0.374, - "step": 5954 - }, - { - "epoch": 0.3891902490033331, - "grad_norm": 0.43106546998023987, - "learning_rate": 9.693934956133723e-06, - "loss": 0.3507, - "step": 5955 - }, - { - "epoch": 0.3892556042088752, - "grad_norm": 0.43061211705207825, - "learning_rate": 9.693814649370548e-06, - "loss": 0.3327, - "step": 5956 - }, - { - "epoch": 0.38932095941441736, - "grad_norm": 0.45796898007392883, - "learning_rate": 9.693694319714016e-06, - "loss": 0.372, - "step": 5957 - }, - { - "epoch": 0.3893863146199595, - "grad_norm": 0.4221617579460144, - "learning_rate": 9.693573967164707e-06, - "loss": 0.3127, - "step": 5958 - }, - { - "epoch": 0.3894516698255016, - "grad_norm": 0.4860340356826782, - "learning_rate": 9.693453591723215e-06, - "loss": 0.4414, - "step": 5959 - }, - { - "epoch": 0.38951702503104374, - "grad_norm": 0.49887949228286743, - "learning_rate": 9.69333319339012e-06, - "loss": 0.4547, - "step": 5960 - }, - { - "epoch": 0.38958238023658587, - "grad_norm": 0.44215089082717896, - "learning_rate": 9.693212772166016e-06, - "loss": 0.3971, - "step": 5961 - }, - { - "epoch": 0.38964773544212794, - "grad_norm": 0.4157714247703552, - "learning_rate": 9.693092328051488e-06, - "loss": 0.3221, - "step": 5962 - }, - { - "epoch": 0.38971309064767007, - "grad_norm": 0.45220786333084106, - "learning_rate": 9.692971861047122e-06, - "loss": 0.3862, - "step": 5963 - }, - { - "epoch": 0.3897784458532122, - "grad_norm": 0.4925874173641205, - "learning_rate": 9.692851371153504e-06, - "loss": 0.4128, - "step": 5964 - }, - { - "epoch": 0.3898438010587543, - "grad_norm": 0.458907812833786, - "learning_rate": 9.692730858371227e-06, - "loss": 0.3982, - "step": 5965 - }, - { - "epoch": 0.38990915626429645, - "grad_norm": 0.4813711941242218, - "learning_rate": 9.692610322700874e-06, - "loss": 0.4372, - "step": 5966 - }, - { - "epoch": 0.3899745114698386, - "grad_norm": 0.42877089977264404, - "learning_rate": 9.692489764143039e-06, - "loss": 0.329, - "step": 5967 - }, - { - "epoch": 0.3900398666753807, - "grad_norm": 0.4834284782409668, - "learning_rate": 9.6923691826983e-06, - "loss": 0.423, - "step": 5968 - }, - { - "epoch": 0.39010522188092284, - "grad_norm": 0.5371841788291931, - "learning_rate": 9.692248578367256e-06, - "loss": 0.4302, - "step": 5969 - }, - { - "epoch": 0.3901705770864649, - "grad_norm": 0.48016226291656494, - "learning_rate": 9.692127951150487e-06, - "loss": 0.4248, - "step": 5970 - }, - { - "epoch": 0.39023593229200704, - "grad_norm": 0.4683961272239685, - "learning_rate": 9.692007301048585e-06, - "loss": 0.3935, - "step": 5971 - }, - { - "epoch": 0.39030128749754917, - "grad_norm": 0.4606408178806305, - "learning_rate": 9.691886628062136e-06, - "loss": 0.3907, - "step": 5972 - }, - { - "epoch": 0.3903666427030913, - "grad_norm": 0.4929756820201874, - "learning_rate": 9.691765932191734e-06, - "loss": 0.4469, - "step": 5973 - }, - { - "epoch": 0.3904319979086334, - "grad_norm": 0.5043468475341797, - "learning_rate": 9.691645213437963e-06, - "loss": 0.4196, - "step": 5974 - }, - { - "epoch": 0.39049735311417555, - "grad_norm": 0.4898582100868225, - "learning_rate": 9.691524471801412e-06, - "loss": 0.4567, - "step": 5975 - }, - { - "epoch": 0.3905627083197177, - "grad_norm": 0.4886986315250397, - "learning_rate": 9.691403707282672e-06, - "loss": 0.3763, - "step": 5976 - }, - { - "epoch": 0.3906280635252598, - "grad_norm": 0.49640101194381714, - "learning_rate": 9.691282919882332e-06, - "loss": 0.4357, - "step": 5977 - }, - { - "epoch": 0.39069341873080193, - "grad_norm": 0.505385160446167, - "learning_rate": 9.691162109600978e-06, - "loss": 0.41, - "step": 5978 - }, - { - "epoch": 0.390758773936344, - "grad_norm": 0.49238911271095276, - "learning_rate": 9.691041276439202e-06, - "loss": 0.4404, - "step": 5979 - }, - { - "epoch": 0.39082412914188613, - "grad_norm": 0.4722394049167633, - "learning_rate": 9.690920420397592e-06, - "loss": 0.4149, - "step": 5980 - }, - { - "epoch": 0.39088948434742826, - "grad_norm": 0.460263729095459, - "learning_rate": 9.690799541476738e-06, - "loss": 0.4246, - "step": 5981 - }, - { - "epoch": 0.3909548395529704, - "grad_norm": 0.47054922580718994, - "learning_rate": 9.690678639677229e-06, - "loss": 0.4027, - "step": 5982 - }, - { - "epoch": 0.3910201947585125, - "grad_norm": 0.47182849049568176, - "learning_rate": 9.690557714999656e-06, - "loss": 0.3996, - "step": 5983 - }, - { - "epoch": 0.39108554996405465, - "grad_norm": 0.4657513201236725, - "learning_rate": 9.690436767444608e-06, - "loss": 0.4072, - "step": 5984 - }, - { - "epoch": 0.3911509051695968, - "grad_norm": 0.44540852308273315, - "learning_rate": 9.690315797012676e-06, - "loss": 0.3894, - "step": 5985 - }, - { - "epoch": 0.3912162603751389, - "grad_norm": 0.5249620079994202, - "learning_rate": 9.690194803704447e-06, - "loss": 0.4839, - "step": 5986 - }, - { - "epoch": 0.391281615580681, - "grad_norm": 0.4964336156845093, - "learning_rate": 9.690073787520516e-06, - "loss": 0.3777, - "step": 5987 - }, - { - "epoch": 0.3913469707862231, - "grad_norm": 0.4647408723831177, - "learning_rate": 9.689952748461466e-06, - "loss": 0.4157, - "step": 5988 - }, - { - "epoch": 0.39141232599176523, - "grad_norm": 0.4532619118690491, - "learning_rate": 9.689831686527895e-06, - "loss": 0.3737, - "step": 5989 - }, - { - "epoch": 0.39147768119730736, - "grad_norm": 0.47622814774513245, - "learning_rate": 9.68971060172039e-06, - "loss": 0.3808, - "step": 5990 - }, - { - "epoch": 0.3915430364028495, - "grad_norm": 0.44839900732040405, - "learning_rate": 9.68958949403954e-06, - "loss": 0.3873, - "step": 5991 - }, - { - "epoch": 0.3916083916083916, - "grad_norm": 0.519347608089447, - "learning_rate": 9.68946836348594e-06, - "loss": 0.4935, - "step": 5992 - }, - { - "epoch": 0.39167374681393374, - "grad_norm": 0.5313746929168701, - "learning_rate": 9.689347210060175e-06, - "loss": 0.4754, - "step": 5993 - }, - { - "epoch": 0.39173910201947587, - "grad_norm": 0.4559462368488312, - "learning_rate": 9.689226033762841e-06, - "loss": 0.354, - "step": 5994 - }, - { - "epoch": 0.391804457225018, - "grad_norm": 0.4504825174808502, - "learning_rate": 9.689104834594527e-06, - "loss": 0.3945, - "step": 5995 - }, - { - "epoch": 0.39186981243056007, - "grad_norm": 0.5034160017967224, - "learning_rate": 9.688983612555825e-06, - "loss": 0.4352, - "step": 5996 - }, - { - "epoch": 0.3919351676361022, - "grad_norm": 0.5011710524559021, - "learning_rate": 9.688862367647325e-06, - "loss": 0.3854, - "step": 5997 - }, - { - "epoch": 0.3920005228416443, - "grad_norm": 0.48244917392730713, - "learning_rate": 9.68874109986962e-06, - "loss": 0.3953, - "step": 5998 - }, - { - "epoch": 0.39206587804718646, - "grad_norm": 0.4704824686050415, - "learning_rate": 9.688619809223297e-06, - "loss": 0.3756, - "step": 5999 - }, - { - "epoch": 0.3921312332527286, - "grad_norm": 0.42899468541145325, - "learning_rate": 9.688498495708953e-06, - "loss": 0.3553, - "step": 6000 - }, - { - "epoch": 0.3921965884582707, - "grad_norm": 0.4486721158027649, - "learning_rate": 9.688377159327178e-06, - "loss": 0.3659, - "step": 6001 - }, - { - "epoch": 0.39226194366381284, - "grad_norm": 0.47961145639419556, - "learning_rate": 9.688255800078562e-06, - "loss": 0.3606, - "step": 6002 - }, - { - "epoch": 0.39232729886935497, - "grad_norm": 0.4899479150772095, - "learning_rate": 9.6881344179637e-06, - "loss": 0.3824, - "step": 6003 - }, - { - "epoch": 0.39239265407489704, - "grad_norm": 0.4607725441455841, - "learning_rate": 9.68801301298318e-06, - "loss": 0.4032, - "step": 6004 - }, - { - "epoch": 0.39245800928043917, - "grad_norm": 0.5207237005233765, - "learning_rate": 9.687891585137598e-06, - "loss": 0.4236, - "step": 6005 - }, - { - "epoch": 0.3925233644859813, - "grad_norm": 0.4831946790218353, - "learning_rate": 9.687770134427544e-06, - "loss": 0.4222, - "step": 6006 - }, - { - "epoch": 0.3925887196915234, - "grad_norm": 0.45149165391921997, - "learning_rate": 9.687648660853613e-06, - "loss": 0.38, - "step": 6007 - }, - { - "epoch": 0.39265407489706555, - "grad_norm": 0.4608800411224365, - "learning_rate": 9.687527164416392e-06, - "loss": 0.4043, - "step": 6008 - }, - { - "epoch": 0.3927194301026077, - "grad_norm": 0.48860853910446167, - "learning_rate": 9.687405645116481e-06, - "loss": 0.3856, - "step": 6009 - }, - { - "epoch": 0.3927847853081498, - "grad_norm": 0.46203213930130005, - "learning_rate": 9.687284102954467e-06, - "loss": 0.3805, - "step": 6010 - }, - { - "epoch": 0.39285014051369194, - "grad_norm": 0.4599906802177429, - "learning_rate": 9.687162537930944e-06, - "loss": 0.3983, - "step": 6011 - }, - { - "epoch": 0.39291549571923406, - "grad_norm": 0.48271965980529785, - "learning_rate": 9.687040950046506e-06, - "loss": 0.4349, - "step": 6012 - }, - { - "epoch": 0.39298085092477614, - "grad_norm": 0.483013391494751, - "learning_rate": 9.686919339301747e-06, - "loss": 0.4362, - "step": 6013 - }, - { - "epoch": 0.39304620613031827, - "grad_norm": 0.43602317571640015, - "learning_rate": 9.686797705697255e-06, - "loss": 0.3632, - "step": 6014 - }, - { - "epoch": 0.3931115613358604, - "grad_norm": 0.5030431747436523, - "learning_rate": 9.68667604923363e-06, - "loss": 0.48, - "step": 6015 - }, - { - "epoch": 0.3931769165414025, - "grad_norm": 0.43381696939468384, - "learning_rate": 9.686554369911462e-06, - "loss": 0.3573, - "step": 6016 - }, - { - "epoch": 0.39324227174694465, - "grad_norm": 0.46310433745384216, - "learning_rate": 9.686432667731344e-06, - "loss": 0.3957, - "step": 6017 - }, - { - "epoch": 0.3933076269524868, - "grad_norm": 0.4989355504512787, - "learning_rate": 9.68631094269387e-06, - "loss": 0.4141, - "step": 6018 - }, - { - "epoch": 0.3933729821580289, - "grad_norm": 0.49543890357017517, - "learning_rate": 9.686189194799635e-06, - "loss": 0.4506, - "step": 6019 - }, - { - "epoch": 0.39343833736357103, - "grad_norm": 0.4531422555446625, - "learning_rate": 9.686067424049232e-06, - "loss": 0.3995, - "step": 6020 - }, - { - "epoch": 0.3935036925691131, - "grad_norm": 0.4357486069202423, - "learning_rate": 9.685945630443254e-06, - "loss": 0.3293, - "step": 6021 - }, - { - "epoch": 0.39356904777465523, - "grad_norm": 0.49692434072494507, - "learning_rate": 9.685823813982295e-06, - "loss": 0.4287, - "step": 6022 - }, - { - "epoch": 0.39363440298019736, - "grad_norm": 0.49291113018989563, - "learning_rate": 9.685701974666952e-06, - "loss": 0.4617, - "step": 6023 - }, - { - "epoch": 0.3936997581857395, - "grad_norm": 0.4602315425872803, - "learning_rate": 9.685580112497816e-06, - "loss": 0.3932, - "step": 6024 - }, - { - "epoch": 0.3937651133912816, - "grad_norm": 0.4561045467853546, - "learning_rate": 9.685458227475483e-06, - "loss": 0.3948, - "step": 6025 - }, - { - "epoch": 0.39383046859682375, - "grad_norm": 0.48737287521362305, - "learning_rate": 9.685336319600548e-06, - "loss": 0.3936, - "step": 6026 - }, - { - "epoch": 0.3938958238023659, - "grad_norm": 0.6739256381988525, - "learning_rate": 9.685214388873602e-06, - "loss": 0.4216, - "step": 6027 - }, - { - "epoch": 0.393961179007908, - "grad_norm": 0.4541867971420288, - "learning_rate": 9.685092435295244e-06, - "loss": 0.3767, - "step": 6028 - }, - { - "epoch": 0.3940265342134501, - "grad_norm": 0.4627193808555603, - "learning_rate": 9.684970458866066e-06, - "loss": 0.3892, - "step": 6029 - }, - { - "epoch": 0.3940918894189922, - "grad_norm": 0.46207600831985474, - "learning_rate": 9.684848459586666e-06, - "loss": 0.3508, - "step": 6030 - }, - { - "epoch": 0.39415724462453433, - "grad_norm": 0.4354219436645508, - "learning_rate": 9.684726437457635e-06, - "loss": 0.38, - "step": 6031 - }, - { - "epoch": 0.39422259983007646, - "grad_norm": 0.47896140813827515, - "learning_rate": 9.68460439247957e-06, - "loss": 0.4224, - "step": 6032 - }, - { - "epoch": 0.3942879550356186, - "grad_norm": 0.4910140931606293, - "learning_rate": 9.68448232465307e-06, - "loss": 0.3653, - "step": 6033 - }, - { - "epoch": 0.3943533102411607, - "grad_norm": 0.4564701020717621, - "learning_rate": 9.684360233978724e-06, - "loss": 0.4004, - "step": 6034 - }, - { - "epoch": 0.39441866544670284, - "grad_norm": 0.698441207408905, - "learning_rate": 9.68423812045713e-06, - "loss": 0.3669, - "step": 6035 - }, - { - "epoch": 0.39448402065224497, - "grad_norm": 0.4709613621234894, - "learning_rate": 9.684115984088884e-06, - "loss": 0.3879, - "step": 6036 - }, - { - "epoch": 0.3945493758577871, - "grad_norm": 0.44479063153266907, - "learning_rate": 9.683993824874584e-06, - "loss": 0.3785, - "step": 6037 - }, - { - "epoch": 0.39461473106332917, - "grad_norm": 0.4567413330078125, - "learning_rate": 9.683871642814821e-06, - "loss": 0.4103, - "step": 6038 - }, - { - "epoch": 0.3946800862688713, - "grad_norm": 0.47958114743232727, - "learning_rate": 9.683749437910195e-06, - "loss": 0.3866, - "step": 6039 - }, - { - "epoch": 0.3947454414744134, - "grad_norm": 0.5066713690757751, - "learning_rate": 9.683627210161299e-06, - "loss": 0.4795, - "step": 6040 - }, - { - "epoch": 0.39481079667995556, - "grad_norm": 0.4703404903411865, - "learning_rate": 9.68350495956873e-06, - "loss": 0.3812, - "step": 6041 - }, - { - "epoch": 0.3948761518854977, - "grad_norm": 0.4703965187072754, - "learning_rate": 9.683382686133086e-06, - "loss": 0.4418, - "step": 6042 - }, - { - "epoch": 0.3949415070910398, - "grad_norm": 0.47360336780548096, - "learning_rate": 9.683260389854962e-06, - "loss": 0.3947, - "step": 6043 - }, - { - "epoch": 0.39500686229658194, - "grad_norm": 0.44658640027046204, - "learning_rate": 9.683138070734953e-06, - "loss": 0.3537, - "step": 6044 - }, - { - "epoch": 0.39507221750212407, - "grad_norm": 0.45639845728874207, - "learning_rate": 9.683015728773661e-06, - "loss": 0.3795, - "step": 6045 - }, - { - "epoch": 0.39513757270766614, - "grad_norm": 0.4557878077030182, - "learning_rate": 9.682893363971678e-06, - "loss": 0.3839, - "step": 6046 - }, - { - "epoch": 0.39520292791320827, - "grad_norm": 0.4923136532306671, - "learning_rate": 9.682770976329599e-06, - "loss": 0.4318, - "step": 6047 - }, - { - "epoch": 0.3952682831187504, - "grad_norm": 0.4797445833683014, - "learning_rate": 9.682648565848025e-06, - "loss": 0.4008, - "step": 6048 - }, - { - "epoch": 0.3953336383242925, - "grad_norm": 0.44140514731407166, - "learning_rate": 9.682526132527553e-06, - "loss": 0.3682, - "step": 6049 - }, - { - "epoch": 0.39539899352983465, - "grad_norm": 0.47167378664016724, - "learning_rate": 9.682403676368777e-06, - "loss": 0.4334, - "step": 6050 - }, - { - "epoch": 0.3954643487353768, - "grad_norm": 0.49207475781440735, - "learning_rate": 9.682281197372297e-06, - "loss": 0.4322, - "step": 6051 - }, - { - "epoch": 0.3955297039409189, - "grad_norm": 0.5148537755012512, - "learning_rate": 9.68215869553871e-06, - "loss": 0.4675, - "step": 6052 - }, - { - "epoch": 0.39559505914646104, - "grad_norm": 0.4530176520347595, - "learning_rate": 9.682036170868612e-06, - "loss": 0.4124, - "step": 6053 - }, - { - "epoch": 0.39566041435200316, - "grad_norm": 0.4727493226528168, - "learning_rate": 9.681913623362602e-06, - "loss": 0.4255, - "step": 6054 - }, - { - "epoch": 0.39572576955754524, - "grad_norm": 0.46910151839256287, - "learning_rate": 9.681791053021277e-06, - "loss": 0.3803, - "step": 6055 - }, - { - "epoch": 0.39579112476308737, - "grad_norm": 0.4994412958621979, - "learning_rate": 9.681668459845236e-06, - "loss": 0.4584, - "step": 6056 - }, - { - "epoch": 0.3958564799686295, - "grad_norm": 0.4252305328845978, - "learning_rate": 9.681545843835074e-06, - "loss": 0.3308, - "step": 6057 - }, - { - "epoch": 0.3959218351741716, - "grad_norm": 0.4465518295764923, - "learning_rate": 9.681423204991394e-06, - "loss": 0.3798, - "step": 6058 - }, - { - "epoch": 0.39598719037971375, - "grad_norm": 0.4711715579032898, - "learning_rate": 9.68130054331479e-06, - "loss": 0.4227, - "step": 6059 - }, - { - "epoch": 0.3960525455852559, - "grad_norm": 0.4443996846675873, - "learning_rate": 9.681177858805858e-06, - "loss": 0.3306, - "step": 6060 - }, - { - "epoch": 0.396117900790798, - "grad_norm": 0.4859817624092102, - "learning_rate": 9.681055151465205e-06, - "loss": 0.3916, - "step": 6061 - }, - { - "epoch": 0.39618325599634013, - "grad_norm": 0.4672118127346039, - "learning_rate": 9.680932421293424e-06, - "loss": 0.4478, - "step": 6062 - }, - { - "epoch": 0.3962486112018822, - "grad_norm": 0.48797398805618286, - "learning_rate": 9.680809668291111e-06, - "loss": 0.4316, - "step": 6063 - }, - { - "epoch": 0.39631396640742433, - "grad_norm": 0.45298853516578674, - "learning_rate": 9.680686892458869e-06, - "loss": 0.3621, - "step": 6064 - }, - { - "epoch": 0.39637932161296646, - "grad_norm": 0.4821392595767975, - "learning_rate": 9.680564093797296e-06, - "loss": 0.4776, - "step": 6065 - }, - { - "epoch": 0.3964446768185086, - "grad_norm": 0.44541066884994507, - "learning_rate": 9.68044127230699e-06, - "loss": 0.3821, - "step": 6066 - }, - { - "epoch": 0.3965100320240507, - "grad_norm": 0.43608272075653076, - "learning_rate": 9.68031842798855e-06, - "loss": 0.3553, - "step": 6067 - }, - { - "epoch": 0.39657538722959285, - "grad_norm": 0.41650286316871643, - "learning_rate": 9.680195560842575e-06, - "loss": 0.3473, - "step": 6068 - }, - { - "epoch": 0.396640742435135, - "grad_norm": 0.4567144513130188, - "learning_rate": 9.680072670869667e-06, - "loss": 0.3902, - "step": 6069 - }, - { - "epoch": 0.3967060976406771, - "grad_norm": 0.4912777543067932, - "learning_rate": 9.679949758070421e-06, - "loss": 0.4242, - "step": 6070 - }, - { - "epoch": 0.3967714528462192, - "grad_norm": 0.4811214506626129, - "learning_rate": 9.67982682244544e-06, - "loss": 0.3831, - "step": 6071 - }, - { - "epoch": 0.3968368080517613, - "grad_norm": 0.45678627490997314, - "learning_rate": 9.679703863995322e-06, - "loss": 0.4118, - "step": 6072 - }, - { - "epoch": 0.39690216325730343, - "grad_norm": 0.4407769441604614, - "learning_rate": 9.679580882720668e-06, - "loss": 0.3817, - "step": 6073 - }, - { - "epoch": 0.39696751846284556, - "grad_norm": 0.4498608708381653, - "learning_rate": 9.679457878622076e-06, - "loss": 0.3685, - "step": 6074 - }, - { - "epoch": 0.3970328736683877, - "grad_norm": 0.47542741894721985, - "learning_rate": 9.679334851700147e-06, - "loss": 0.4311, - "step": 6075 - }, - { - "epoch": 0.3970982288739298, - "grad_norm": 0.45421308279037476, - "learning_rate": 9.679211801955482e-06, - "loss": 0.3979, - "step": 6076 - }, - { - "epoch": 0.39716358407947194, - "grad_norm": 0.44924843311309814, - "learning_rate": 9.679088729388677e-06, - "loss": 0.3691, - "step": 6077 - }, - { - "epoch": 0.39722893928501407, - "grad_norm": 0.4603331983089447, - "learning_rate": 9.678965634000338e-06, - "loss": 0.3961, - "step": 6078 - }, - { - "epoch": 0.3972942944905562, - "grad_norm": 0.4692233204841614, - "learning_rate": 9.678842515791062e-06, - "loss": 0.4175, - "step": 6079 - }, - { - "epoch": 0.39735964969609827, - "grad_norm": 0.4724222719669342, - "learning_rate": 9.67871937476145e-06, - "loss": 0.4059, - "step": 6080 - }, - { - "epoch": 0.3974250049016404, - "grad_norm": 0.476258248090744, - "learning_rate": 9.678596210912102e-06, - "loss": 0.4137, - "step": 6081 - }, - { - "epoch": 0.3974903601071825, - "grad_norm": 0.45830920338630676, - "learning_rate": 9.67847302424362e-06, - "loss": 0.3781, - "step": 6082 - }, - { - "epoch": 0.39755571531272466, - "grad_norm": 0.46827584505081177, - "learning_rate": 9.678349814756605e-06, - "loss": 0.4389, - "step": 6083 - }, - { - "epoch": 0.3976210705182668, - "grad_norm": 0.40678760409355164, - "learning_rate": 9.678226582451655e-06, - "loss": 0.3175, - "step": 6084 - }, - { - "epoch": 0.3976864257238089, - "grad_norm": 0.417957067489624, - "learning_rate": 9.678103327329375e-06, - "loss": 0.3262, - "step": 6085 - }, - { - "epoch": 0.39775178092935104, - "grad_norm": 0.45792457461357117, - "learning_rate": 9.677980049390366e-06, - "loss": 0.3649, - "step": 6086 - }, - { - "epoch": 0.39781713613489317, - "grad_norm": 0.4505128860473633, - "learning_rate": 9.677856748635224e-06, - "loss": 0.3688, - "step": 6087 - }, - { - "epoch": 0.39788249134043524, - "grad_norm": 0.4464775621891022, - "learning_rate": 9.677733425064558e-06, - "loss": 0.3526, - "step": 6088 - }, - { - "epoch": 0.39794784654597737, - "grad_norm": 0.4564993381500244, - "learning_rate": 9.677610078678964e-06, - "loss": 0.4147, - "step": 6089 - }, - { - "epoch": 0.3980132017515195, - "grad_norm": 0.45127493143081665, - "learning_rate": 9.677486709479042e-06, - "loss": 0.3747, - "step": 6090 - }, - { - "epoch": 0.3980785569570616, - "grad_norm": 0.4899660646915436, - "learning_rate": 9.677363317465401e-06, - "loss": 0.4295, - "step": 6091 - }, - { - "epoch": 0.39814391216260375, - "grad_norm": 0.44159093499183655, - "learning_rate": 9.677239902638637e-06, - "loss": 0.3664, - "step": 6092 - }, - { - "epoch": 0.3982092673681459, - "grad_norm": 0.43933340907096863, - "learning_rate": 9.677116464999355e-06, - "loss": 0.3427, - "step": 6093 - }, - { - "epoch": 0.398274622573688, - "grad_norm": 0.48111939430236816, - "learning_rate": 9.676993004548153e-06, - "loss": 0.3958, - "step": 6094 - }, - { - "epoch": 0.39833997777923014, - "grad_norm": 0.45526638627052307, - "learning_rate": 9.676869521285638e-06, - "loss": 0.4179, - "step": 6095 - }, - { - "epoch": 0.39840533298477226, - "grad_norm": 0.42688971757888794, - "learning_rate": 9.676746015212411e-06, - "loss": 0.3652, - "step": 6096 - }, - { - "epoch": 0.39847068819031434, - "grad_norm": 0.48857513070106506, - "learning_rate": 9.676622486329071e-06, - "loss": 0.3724, - "step": 6097 - }, - { - "epoch": 0.39853604339585647, - "grad_norm": 0.5034164190292358, - "learning_rate": 9.676498934636224e-06, - "loss": 0.4495, - "step": 6098 - }, - { - "epoch": 0.3986013986013986, - "grad_norm": 0.4916623830795288, - "learning_rate": 9.676375360134471e-06, - "loss": 0.4278, - "step": 6099 - }, - { - "epoch": 0.3986667538069407, - "grad_norm": 0.4503425657749176, - "learning_rate": 9.676251762824416e-06, - "loss": 0.4003, - "step": 6100 - }, - { - "epoch": 0.39873210901248285, - "grad_norm": 0.47600290179252625, - "learning_rate": 9.676128142706663e-06, - "loss": 0.3832, - "step": 6101 - }, - { - "epoch": 0.398797464218025, - "grad_norm": 0.5034260153770447, - "learning_rate": 9.67600449978181e-06, - "loss": 0.4491, - "step": 6102 - }, - { - "epoch": 0.3988628194235671, - "grad_norm": 0.4781205654144287, - "learning_rate": 9.675880834050465e-06, - "loss": 0.4199, - "step": 6103 - }, - { - "epoch": 0.39892817462910923, - "grad_norm": 0.4337165057659149, - "learning_rate": 9.675757145513229e-06, - "loss": 0.3518, - "step": 6104 - }, - { - "epoch": 0.3989935298346513, - "grad_norm": 0.45678630471229553, - "learning_rate": 9.675633434170704e-06, - "loss": 0.3712, - "step": 6105 - }, - { - "epoch": 0.39905888504019343, - "grad_norm": 0.47965630888938904, - "learning_rate": 9.675509700023498e-06, - "loss": 0.4467, - "step": 6106 - }, - { - "epoch": 0.39912424024573556, - "grad_norm": 0.42392995953559875, - "learning_rate": 9.675385943072209e-06, - "loss": 0.37, - "step": 6107 - }, - { - "epoch": 0.3991895954512777, - "grad_norm": 0.48698365688323975, - "learning_rate": 9.675262163317442e-06, - "loss": 0.4305, - "step": 6108 - }, - { - "epoch": 0.3992549506568198, - "grad_norm": 0.5198854207992554, - "learning_rate": 9.675138360759805e-06, - "loss": 0.457, - "step": 6109 - }, - { - "epoch": 0.39932030586236195, - "grad_norm": 0.5073537826538086, - "learning_rate": 9.675014535399897e-06, - "loss": 0.408, - "step": 6110 - }, - { - "epoch": 0.3993856610679041, - "grad_norm": 0.476301908493042, - "learning_rate": 9.674890687238324e-06, - "loss": 0.4619, - "step": 6111 - }, - { - "epoch": 0.3994510162734462, - "grad_norm": 0.4359187185764313, - "learning_rate": 9.67476681627569e-06, - "loss": 0.3556, - "step": 6112 - }, - { - "epoch": 0.3995163714789883, - "grad_norm": 0.42895931005477905, - "learning_rate": 9.674642922512596e-06, - "loss": 0.343, - "step": 6113 - }, - { - "epoch": 0.3995817266845304, - "grad_norm": 0.48938706517219543, - "learning_rate": 9.674519005949652e-06, - "loss": 0.3933, - "step": 6114 - }, - { - "epoch": 0.39964708189007253, - "grad_norm": 0.42167866230010986, - "learning_rate": 9.674395066587457e-06, - "loss": 0.3351, - "step": 6115 - }, - { - "epoch": 0.39971243709561466, - "grad_norm": 0.4954562187194824, - "learning_rate": 9.67427110442662e-06, - "loss": 0.4437, - "step": 6116 - }, - { - "epoch": 0.3997777923011568, - "grad_norm": 0.4689842760562897, - "learning_rate": 9.674147119467742e-06, - "loss": 0.4131, - "step": 6117 - }, - { - "epoch": 0.3998431475066989, - "grad_norm": 0.44719985127449036, - "learning_rate": 9.67402311171143e-06, - "loss": 0.3869, - "step": 6118 - }, - { - "epoch": 0.39990850271224104, - "grad_norm": 0.4831448495388031, - "learning_rate": 9.673899081158289e-06, - "loss": 0.4166, - "step": 6119 - }, - { - "epoch": 0.39997385791778317, - "grad_norm": 0.4672890901565552, - "learning_rate": 9.673775027808922e-06, - "loss": 0.371, - "step": 6120 - }, - { - "epoch": 0.4000392131233253, - "grad_norm": 0.47074273228645325, - "learning_rate": 9.673650951663934e-06, - "loss": 0.3713, - "step": 6121 - }, - { - "epoch": 0.40010456832886737, - "grad_norm": 0.43529555201530457, - "learning_rate": 9.673526852723934e-06, - "loss": 0.3629, - "step": 6122 - }, - { - "epoch": 0.4001699235344095, - "grad_norm": 0.45171019434928894, - "learning_rate": 9.67340273098952e-06, - "loss": 0.3958, - "step": 6123 - }, - { - "epoch": 0.4002352787399516, - "grad_norm": 0.4475041925907135, - "learning_rate": 9.673278586461305e-06, - "loss": 0.3904, - "step": 6124 - }, - { - "epoch": 0.40030063394549376, - "grad_norm": 0.45028144121170044, - "learning_rate": 9.673154419139892e-06, - "loss": 0.374, - "step": 6125 - }, - { - "epoch": 0.4003659891510359, - "grad_norm": 0.46634340286254883, - "learning_rate": 9.673030229025885e-06, - "loss": 0.4349, - "step": 6126 - }, - { - "epoch": 0.400431344356578, - "grad_norm": 0.4717932641506195, - "learning_rate": 9.672906016119889e-06, - "loss": 0.4063, - "step": 6127 - }, - { - "epoch": 0.40049669956212014, - "grad_norm": 0.4801435172557831, - "learning_rate": 9.672781780422515e-06, - "loss": 0.459, - "step": 6128 - }, - { - "epoch": 0.40056205476766227, - "grad_norm": 0.4910016357898712, - "learning_rate": 9.672657521934364e-06, - "loss": 0.3214, - "step": 6129 - }, - { - "epoch": 0.40062740997320434, - "grad_norm": 0.45259609818458557, - "learning_rate": 9.672533240656041e-06, - "loss": 0.3868, - "step": 6130 - }, - { - "epoch": 0.40069276517874647, - "grad_norm": 0.4314712882041931, - "learning_rate": 9.672408936588158e-06, - "loss": 0.3731, - "step": 6131 - }, - { - "epoch": 0.4007581203842886, - "grad_norm": 0.4598412811756134, - "learning_rate": 9.672284609731316e-06, - "loss": 0.3494, - "step": 6132 - }, - { - "epoch": 0.4008234755898307, - "grad_norm": 0.44612693786621094, - "learning_rate": 9.672160260086124e-06, - "loss": 0.4143, - "step": 6133 - }, - { - "epoch": 0.40088883079537285, - "grad_norm": 0.440768301486969, - "learning_rate": 9.672035887653189e-06, - "loss": 0.3627, - "step": 6134 - }, - { - "epoch": 0.400954186000915, - "grad_norm": 0.4287469685077667, - "learning_rate": 9.671911492433114e-06, - "loss": 0.3514, - "step": 6135 - }, - { - "epoch": 0.4010195412064571, - "grad_norm": 0.45346057415008545, - "learning_rate": 9.671787074426509e-06, - "loss": 0.4192, - "step": 6136 - }, - { - "epoch": 0.40108489641199924, - "grad_norm": 0.46000832319259644, - "learning_rate": 9.67166263363398e-06, - "loss": 0.3943, - "step": 6137 - }, - { - "epoch": 0.40115025161754136, - "grad_norm": 0.45692506432533264, - "learning_rate": 9.671538170056134e-06, - "loss": 0.3913, - "step": 6138 - }, - { - "epoch": 0.40121560682308344, - "grad_norm": 0.47234079241752625, - "learning_rate": 9.67141368369358e-06, - "loss": 0.4193, - "step": 6139 - }, - { - "epoch": 0.40128096202862557, - "grad_norm": 0.4784364402294159, - "learning_rate": 9.67128917454692e-06, - "loss": 0.4285, - "step": 6140 - }, - { - "epoch": 0.4013463172341677, - "grad_norm": 0.44267991185188293, - "learning_rate": 9.671164642616766e-06, - "loss": 0.3749, - "step": 6141 - }, - { - "epoch": 0.4014116724397098, - "grad_norm": 0.4557999074459076, - "learning_rate": 9.671040087903723e-06, - "loss": 0.4004, - "step": 6142 - }, - { - "epoch": 0.40147702764525195, - "grad_norm": 0.4551524817943573, - "learning_rate": 9.6709155104084e-06, - "loss": 0.3863, - "step": 6143 - }, - { - "epoch": 0.4015423828507941, - "grad_norm": 0.47498413920402527, - "learning_rate": 9.670790910131405e-06, - "loss": 0.3952, - "step": 6144 - }, - { - "epoch": 0.4016077380563362, - "grad_norm": 0.4864051342010498, - "learning_rate": 9.670666287073343e-06, - "loss": 0.4735, - "step": 6145 - }, - { - "epoch": 0.40167309326187833, - "grad_norm": 0.4366544187068939, - "learning_rate": 9.670541641234823e-06, - "loss": 0.3455, - "step": 6146 - }, - { - "epoch": 0.4017384484674204, - "grad_norm": 0.4424472749233246, - "learning_rate": 9.670416972616454e-06, - "loss": 0.38, - "step": 6147 - }, - { - "epoch": 0.40180380367296253, - "grad_norm": 0.4703499674797058, - "learning_rate": 9.670292281218844e-06, - "loss": 0.3753, - "step": 6148 - }, - { - "epoch": 0.40186915887850466, - "grad_norm": 0.42187562584877014, - "learning_rate": 9.6701675670426e-06, - "loss": 0.3769, - "step": 6149 - }, - { - "epoch": 0.4019345140840468, - "grad_norm": 0.44826239347457886, - "learning_rate": 9.670042830088331e-06, - "loss": 0.3742, - "step": 6150 - }, - { - "epoch": 0.4019998692895889, - "grad_norm": 0.47213315963745117, - "learning_rate": 9.669918070356645e-06, - "loss": 0.406, - "step": 6151 - }, - { - "epoch": 0.40206522449513105, - "grad_norm": 0.456643670797348, - "learning_rate": 9.669793287848151e-06, - "loss": 0.3881, - "step": 6152 - }, - { - "epoch": 0.4021305797006732, - "grad_norm": 0.46143490076065063, - "learning_rate": 9.669668482563456e-06, - "loss": 0.4419, - "step": 6153 - }, - { - "epoch": 0.4021959349062153, - "grad_norm": 0.5147846341133118, - "learning_rate": 9.669543654503174e-06, - "loss": 0.4547, - "step": 6154 - }, - { - "epoch": 0.4022612901117574, - "grad_norm": 0.5235462188720703, - "learning_rate": 9.669418803667906e-06, - "loss": 0.4945, - "step": 6155 - }, - { - "epoch": 0.4023266453172995, - "grad_norm": 0.4780176281929016, - "learning_rate": 9.669293930058266e-06, - "loss": 0.4002, - "step": 6156 - }, - { - "epoch": 0.40239200052284163, - "grad_norm": 0.4642355144023895, - "learning_rate": 9.669169033674863e-06, - "loss": 0.377, - "step": 6157 - }, - { - "epoch": 0.40245735572838376, - "grad_norm": 0.4488992691040039, - "learning_rate": 9.669044114518304e-06, - "loss": 0.3831, - "step": 6158 - }, - { - "epoch": 0.4025227109339259, - "grad_norm": 0.4399069845676422, - "learning_rate": 9.668919172589199e-06, - "loss": 0.3759, - "step": 6159 - }, - { - "epoch": 0.402588066139468, - "grad_norm": 0.4538023769855499, - "learning_rate": 9.668794207888158e-06, - "loss": 0.3739, - "step": 6160 - }, - { - "epoch": 0.40265342134501014, - "grad_norm": 0.47913607954978943, - "learning_rate": 9.668669220415792e-06, - "loss": 0.4149, - "step": 6161 - }, - { - "epoch": 0.40271877655055227, - "grad_norm": 0.47564640641212463, - "learning_rate": 9.668544210172707e-06, - "loss": 0.4273, - "step": 6162 - }, - { - "epoch": 0.4027841317560944, - "grad_norm": 0.4367576539516449, - "learning_rate": 9.668419177159515e-06, - "loss": 0.3508, - "step": 6163 - }, - { - "epoch": 0.40284948696163647, - "grad_norm": 0.4587484300136566, - "learning_rate": 9.668294121376825e-06, - "loss": 0.3809, - "step": 6164 - }, - { - "epoch": 0.4029148421671786, - "grad_norm": 0.47635382413864136, - "learning_rate": 9.668169042825249e-06, - "loss": 0.3786, - "step": 6165 - }, - { - "epoch": 0.4029801973727207, - "grad_norm": 0.48471590876579285, - "learning_rate": 9.668043941505392e-06, - "loss": 0.4324, - "step": 6166 - }, - { - "epoch": 0.40304555257826286, - "grad_norm": 0.5001281499862671, - "learning_rate": 9.66791881741787e-06, - "loss": 0.4585, - "step": 6167 - }, - { - "epoch": 0.403110907783805, - "grad_norm": 0.47650250792503357, - "learning_rate": 9.66779367056329e-06, - "loss": 0.4205, - "step": 6168 - }, - { - "epoch": 0.4031762629893471, - "grad_norm": 0.44792792201042175, - "learning_rate": 9.667668500942264e-06, - "loss": 0.368, - "step": 6169 - }, - { - "epoch": 0.40324161819488924, - "grad_norm": 0.4794873893260956, - "learning_rate": 9.6675433085554e-06, - "loss": 0.3765, - "step": 6170 - }, - { - "epoch": 0.40330697340043137, - "grad_norm": 0.4200485646724701, - "learning_rate": 9.667418093403312e-06, - "loss": 0.3524, - "step": 6171 - }, - { - "epoch": 0.40337232860597344, - "grad_norm": 0.48350051045417786, - "learning_rate": 9.66729285548661e-06, - "loss": 0.4464, - "step": 6172 - }, - { - "epoch": 0.40343768381151557, - "grad_norm": 0.48739489912986755, - "learning_rate": 9.667167594805901e-06, - "loss": 0.4104, - "step": 6173 - }, - { - "epoch": 0.4035030390170577, - "grad_norm": 0.5147889852523804, - "learning_rate": 9.6670423113618e-06, - "loss": 0.3991, - "step": 6174 - }, - { - "epoch": 0.4035683942225998, - "grad_norm": 0.5043358206748962, - "learning_rate": 9.666917005154918e-06, - "loss": 0.473, - "step": 6175 - }, - { - "epoch": 0.40363374942814195, - "grad_norm": 0.4084410071372986, - "learning_rate": 9.666791676185862e-06, - "loss": 0.339, - "step": 6176 - }, - { - "epoch": 0.4036991046336841, - "grad_norm": 0.4801981449127197, - "learning_rate": 9.66666632445525e-06, - "loss": 0.392, - "step": 6177 - }, - { - "epoch": 0.4037644598392262, - "grad_norm": 0.46107038855552673, - "learning_rate": 9.666540949963687e-06, - "loss": 0.3739, - "step": 6178 - }, - { - "epoch": 0.40382981504476834, - "grad_norm": 0.4498555064201355, - "learning_rate": 9.666415552711789e-06, - "loss": 0.3855, - "step": 6179 - }, - { - "epoch": 0.40389517025031046, - "grad_norm": 0.4480822682380676, - "learning_rate": 9.666290132700165e-06, - "loss": 0.3804, - "step": 6180 - }, - { - "epoch": 0.40396052545585254, - "grad_norm": 0.4630272388458252, - "learning_rate": 9.666164689929427e-06, - "loss": 0.4211, - "step": 6181 - }, - { - "epoch": 0.40402588066139467, - "grad_norm": 0.4566306173801422, - "learning_rate": 9.666039224400187e-06, - "loss": 0.4363, - "step": 6182 - }, - { - "epoch": 0.4040912358669368, - "grad_norm": 0.450600802898407, - "learning_rate": 9.665913736113057e-06, - "loss": 0.3984, - "step": 6183 - }, - { - "epoch": 0.4041565910724789, - "grad_norm": 0.449144184589386, - "learning_rate": 9.66578822506865e-06, - "loss": 0.3577, - "step": 6184 - }, - { - "epoch": 0.40422194627802105, - "grad_norm": 0.45714282989501953, - "learning_rate": 9.665662691267578e-06, - "loss": 0.4253, - "step": 6185 - }, - { - "epoch": 0.4042873014835632, - "grad_norm": 0.44167929887771606, - "learning_rate": 9.665537134710452e-06, - "loss": 0.3617, - "step": 6186 - }, - { - "epoch": 0.4043526566891053, - "grad_norm": 0.4349885880947113, - "learning_rate": 9.665411555397885e-06, - "loss": 0.3551, - "step": 6187 - }, - { - "epoch": 0.40441801189464743, - "grad_norm": 0.4807620942592621, - "learning_rate": 9.66528595333049e-06, - "loss": 0.3952, - "step": 6188 - }, - { - "epoch": 0.4044833671001895, - "grad_norm": 0.45490214228630066, - "learning_rate": 9.66516032850888e-06, - "loss": 0.4011, - "step": 6189 - }, - { - "epoch": 0.40454872230573163, - "grad_norm": 0.504051148891449, - "learning_rate": 9.665034680933665e-06, - "loss": 0.4593, - "step": 6190 - }, - { - "epoch": 0.40461407751127376, - "grad_norm": 0.4414256513118744, - "learning_rate": 9.664909010605462e-06, - "loss": 0.436, - "step": 6191 - }, - { - "epoch": 0.4046794327168159, - "grad_norm": 0.49999725818634033, - "learning_rate": 9.664783317524879e-06, - "loss": 0.4606, - "step": 6192 - }, - { - "epoch": 0.404744787922358, - "grad_norm": 0.44729292392730713, - "learning_rate": 9.664657601692532e-06, - "loss": 0.3958, - "step": 6193 - }, - { - "epoch": 0.40481014312790015, - "grad_norm": 0.45030468702316284, - "learning_rate": 9.664531863109036e-06, - "loss": 0.3377, - "step": 6194 - }, - { - "epoch": 0.4048754983334423, - "grad_norm": 0.4917182922363281, - "learning_rate": 9.664406101775001e-06, - "loss": 0.4469, - "step": 6195 - }, - { - "epoch": 0.4049408535389844, - "grad_norm": 0.4703455865383148, - "learning_rate": 9.664280317691042e-06, - "loss": 0.3949, - "step": 6196 - }, - { - "epoch": 0.4050062087445265, - "grad_norm": 0.4861055016517639, - "learning_rate": 9.664154510857772e-06, - "loss": 0.4385, - "step": 6197 - }, - { - "epoch": 0.4050715639500686, - "grad_norm": 0.47776445746421814, - "learning_rate": 9.664028681275804e-06, - "loss": 0.3932, - "step": 6198 - }, - { - "epoch": 0.40513691915561073, - "grad_norm": 0.5069558620452881, - "learning_rate": 9.663902828945753e-06, - "loss": 0.4466, - "step": 6199 - }, - { - "epoch": 0.40520227436115286, - "grad_norm": 0.4434361457824707, - "learning_rate": 9.663776953868232e-06, - "loss": 0.3606, - "step": 6200 - }, - { - "epoch": 0.405267629566695, - "grad_norm": 0.4312838912010193, - "learning_rate": 9.663651056043855e-06, - "loss": 0.3551, - "step": 6201 - }, - { - "epoch": 0.4053329847722371, - "grad_norm": 0.4512668251991272, - "learning_rate": 9.663525135473235e-06, - "loss": 0.4085, - "step": 6202 - }, - { - "epoch": 0.40539833997777924, - "grad_norm": 0.4664841294288635, - "learning_rate": 9.663399192156988e-06, - "loss": 0.3828, - "step": 6203 - }, - { - "epoch": 0.40546369518332137, - "grad_norm": 0.5049656629562378, - "learning_rate": 9.663273226095729e-06, - "loss": 0.444, - "step": 6204 - }, - { - "epoch": 0.4055290503888635, - "grad_norm": 0.51225346326828, - "learning_rate": 9.663147237290069e-06, - "loss": 0.4186, - "step": 6205 - }, - { - "epoch": 0.40559440559440557, - "grad_norm": 0.4588335156440735, - "learning_rate": 9.663021225740626e-06, - "loss": 0.361, - "step": 6206 - }, - { - "epoch": 0.4056597607999477, - "grad_norm": 0.46261194348335266, - "learning_rate": 9.662895191448013e-06, - "loss": 0.3684, - "step": 6207 - }, - { - "epoch": 0.4057251160054898, - "grad_norm": 0.4598039984703064, - "learning_rate": 9.662769134412843e-06, - "loss": 0.3666, - "step": 6208 - }, - { - "epoch": 0.40579047121103196, - "grad_norm": 0.5030199885368347, - "learning_rate": 9.662643054635735e-06, - "loss": 0.4112, - "step": 6209 - }, - { - "epoch": 0.4058558264165741, - "grad_norm": 0.46446874737739563, - "learning_rate": 9.6625169521173e-06, - "loss": 0.4004, - "step": 6210 - }, - { - "epoch": 0.4059211816221162, - "grad_norm": 0.5079129338264465, - "learning_rate": 9.662390826858154e-06, - "loss": 0.4407, - "step": 6211 - }, - { - "epoch": 0.40598653682765834, - "grad_norm": 0.4871063828468323, - "learning_rate": 9.662264678858915e-06, - "loss": 0.3826, - "step": 6212 - }, - { - "epoch": 0.40605189203320047, - "grad_norm": 0.5497263073921204, - "learning_rate": 9.662138508120195e-06, - "loss": 0.5118, - "step": 6213 - }, - { - "epoch": 0.40611724723874254, - "grad_norm": 0.48818713426589966, - "learning_rate": 9.662012314642609e-06, - "loss": 0.4587, - "step": 6214 - }, - { - "epoch": 0.40618260244428467, - "grad_norm": 0.44175398349761963, - "learning_rate": 9.661886098426777e-06, - "loss": 0.3615, - "step": 6215 - }, - { - "epoch": 0.4062479576498268, - "grad_norm": 0.49936994910240173, - "learning_rate": 9.661759859473307e-06, - "loss": 0.448, - "step": 6216 - }, - { - "epoch": 0.4063133128553689, - "grad_norm": 0.4806167185306549, - "learning_rate": 9.661633597782823e-06, - "loss": 0.3578, - "step": 6217 - }, - { - "epoch": 0.40637866806091105, - "grad_norm": 0.4918518662452698, - "learning_rate": 9.661507313355935e-06, - "loss": 0.398, - "step": 6218 - }, - { - "epoch": 0.4064440232664532, - "grad_norm": 0.4765973389148712, - "learning_rate": 9.661381006193261e-06, - "loss": 0.4001, - "step": 6219 - }, - { - "epoch": 0.4065093784719953, - "grad_norm": 0.47209399938583374, - "learning_rate": 9.661254676295418e-06, - "loss": 0.358, - "step": 6220 - }, - { - "epoch": 0.40657473367753744, - "grad_norm": 0.4761291444301605, - "learning_rate": 9.66112832366302e-06, - "loss": 0.4053, - "step": 6221 - }, - { - "epoch": 0.40664008888307956, - "grad_norm": 0.4611034691333771, - "learning_rate": 9.661001948296685e-06, - "loss": 0.3908, - "step": 6222 - }, - { - "epoch": 0.40670544408862164, - "grad_norm": 0.46636754274368286, - "learning_rate": 9.660875550197028e-06, - "loss": 0.4007, - "step": 6223 - }, - { - "epoch": 0.40677079929416377, - "grad_norm": 0.45153120160102844, - "learning_rate": 9.660749129364668e-06, - "loss": 0.3634, - "step": 6224 - }, - { - "epoch": 0.4068361544997059, - "grad_norm": 0.4640316069126129, - "learning_rate": 9.660622685800218e-06, - "loss": 0.4311, - "step": 6225 - }, - { - "epoch": 0.406901509705248, - "grad_norm": 0.48421111702919006, - "learning_rate": 9.660496219504298e-06, - "loss": 0.3934, - "step": 6226 - }, - { - "epoch": 0.40696686491079015, - "grad_norm": 0.44161295890808105, - "learning_rate": 9.660369730477524e-06, - "loss": 0.3405, - "step": 6227 - }, - { - "epoch": 0.4070322201163323, - "grad_norm": 0.5003371238708496, - "learning_rate": 9.660243218720511e-06, - "loss": 0.4521, - "step": 6228 - }, - { - "epoch": 0.4070975753218744, - "grad_norm": 0.413867712020874, - "learning_rate": 9.660116684233877e-06, - "loss": 0.3357, - "step": 6229 - }, - { - "epoch": 0.40716293052741653, - "grad_norm": 0.4682891368865967, - "learning_rate": 9.65999012701824e-06, - "loss": 0.3931, - "step": 6230 - }, - { - "epoch": 0.4072282857329586, - "grad_norm": 0.4446837902069092, - "learning_rate": 9.659863547074218e-06, - "loss": 0.3493, - "step": 6231 - }, - { - "epoch": 0.40729364093850073, - "grad_norm": 0.4651505947113037, - "learning_rate": 9.659736944402424e-06, - "loss": 0.3864, - "step": 6232 - }, - { - "epoch": 0.40735899614404286, - "grad_norm": 0.4548189342021942, - "learning_rate": 9.659610319003481e-06, - "loss": 0.398, - "step": 6233 - }, - { - "epoch": 0.407424351349585, - "grad_norm": 0.4308524429798126, - "learning_rate": 9.659483670878004e-06, - "loss": 0.3717, - "step": 6234 - }, - { - "epoch": 0.4074897065551271, - "grad_norm": 0.498067170381546, - "learning_rate": 9.65935700002661e-06, - "loss": 0.4998, - "step": 6235 - }, - { - "epoch": 0.40755506176066925, - "grad_norm": 0.47328585386276245, - "learning_rate": 9.65923030644992e-06, - "loss": 0.4105, - "step": 6236 - }, - { - "epoch": 0.4076204169662114, - "grad_norm": 0.48229318857192993, - "learning_rate": 9.659103590148548e-06, - "loss": 0.3991, - "step": 6237 - }, - { - "epoch": 0.4076857721717535, - "grad_norm": 0.4877176880836487, - "learning_rate": 9.658976851123113e-06, - "loss": 0.427, - "step": 6238 - }, - { - "epoch": 0.4077511273772956, - "grad_norm": 0.4455759823322296, - "learning_rate": 9.658850089374234e-06, - "loss": 0.3682, - "step": 6239 - }, - { - "epoch": 0.4078164825828377, - "grad_norm": 0.4522955119609833, - "learning_rate": 9.65872330490253e-06, - "loss": 0.4046, - "step": 6240 - }, - { - "epoch": 0.40788183778837983, - "grad_norm": 0.4727816879749298, - "learning_rate": 9.658596497708618e-06, - "loss": 0.3414, - "step": 6241 - }, - { - "epoch": 0.40794719299392196, - "grad_norm": 0.4037397503852844, - "learning_rate": 9.65846966779312e-06, - "loss": 0.317, - "step": 6242 - }, - { - "epoch": 0.4080125481994641, - "grad_norm": 0.45989301800727844, - "learning_rate": 9.658342815156646e-06, - "loss": 0.4004, - "step": 6243 - }, - { - "epoch": 0.4080779034050062, - "grad_norm": 0.5048174262046814, - "learning_rate": 9.658215939799824e-06, - "loss": 0.4635, - "step": 6244 - }, - { - "epoch": 0.40814325861054834, - "grad_norm": 0.4990854561328888, - "learning_rate": 9.658089041723269e-06, - "loss": 0.4536, - "step": 6245 - }, - { - "epoch": 0.40820861381609047, - "grad_norm": 0.43396371603012085, - "learning_rate": 9.6579621209276e-06, - "loss": 0.3521, - "step": 6246 - }, - { - "epoch": 0.4082739690216326, - "grad_norm": 0.43475762009620667, - "learning_rate": 9.657835177413434e-06, - "loss": 0.3556, - "step": 6247 - }, - { - "epoch": 0.40833932422717467, - "grad_norm": 0.48515042662620544, - "learning_rate": 9.657708211181395e-06, - "loss": 0.4212, - "step": 6248 - }, - { - "epoch": 0.4084046794327168, - "grad_norm": 0.47048693895339966, - "learning_rate": 9.657581222232098e-06, - "loss": 0.3557, - "step": 6249 - }, - { - "epoch": 0.4084700346382589, - "grad_norm": 0.4376108944416046, - "learning_rate": 9.657454210566164e-06, - "loss": 0.3954, - "step": 6250 - }, - { - "epoch": 0.40853538984380106, - "grad_norm": 0.47545501589775085, - "learning_rate": 9.657327176184212e-06, - "loss": 0.4319, - "step": 6251 - }, - { - "epoch": 0.4086007450493432, - "grad_norm": 0.4782899022102356, - "learning_rate": 9.657200119086862e-06, - "loss": 0.3818, - "step": 6252 - }, - { - "epoch": 0.4086661002548853, - "grad_norm": 0.44529226422309875, - "learning_rate": 9.657073039274733e-06, - "loss": 0.397, - "step": 6253 - }, - { - "epoch": 0.40873145546042744, - "grad_norm": 0.4592170715332031, - "learning_rate": 9.656945936748448e-06, - "loss": 0.39, - "step": 6254 - }, - { - "epoch": 0.40879681066596957, - "grad_norm": 0.449066698551178, - "learning_rate": 9.65681881150862e-06, - "loss": 0.3809, - "step": 6255 - }, - { - "epoch": 0.40886216587151164, - "grad_norm": 0.4741554856300354, - "learning_rate": 9.656691663555877e-06, - "loss": 0.4109, - "step": 6256 - }, - { - "epoch": 0.40892752107705377, - "grad_norm": 0.48184025287628174, - "learning_rate": 9.656564492890835e-06, - "loss": 0.4077, - "step": 6257 - }, - { - "epoch": 0.4089928762825959, - "grad_norm": 0.4373905062675476, - "learning_rate": 9.656437299514114e-06, - "loss": 0.3539, - "step": 6258 - }, - { - "epoch": 0.409058231488138, - "grad_norm": 0.4374079406261444, - "learning_rate": 9.656310083426337e-06, - "loss": 0.3625, - "step": 6259 - }, - { - "epoch": 0.40912358669368015, - "grad_norm": 0.41604936122894287, - "learning_rate": 9.65618284462812e-06, - "loss": 0.3324, - "step": 6260 - }, - { - "epoch": 0.4091889418992223, - "grad_norm": 0.4871421456336975, - "learning_rate": 9.656055583120087e-06, - "loss": 0.3604, - "step": 6261 - }, - { - "epoch": 0.4092542971047644, - "grad_norm": 0.5023635625839233, - "learning_rate": 9.655928298902858e-06, - "loss": 0.4469, - "step": 6262 - }, - { - "epoch": 0.40931965231030654, - "grad_norm": 0.5003955960273743, - "learning_rate": 9.655800991977054e-06, - "loss": 0.3828, - "step": 6263 - }, - { - "epoch": 0.40938500751584866, - "grad_norm": 0.4711710810661316, - "learning_rate": 9.655673662343296e-06, - "loss": 0.405, - "step": 6264 - }, - { - "epoch": 0.40945036272139074, - "grad_norm": 0.46596553921699524, - "learning_rate": 9.655546310002204e-06, - "loss": 0.3941, - "step": 6265 - }, - { - "epoch": 0.40951571792693287, - "grad_norm": 0.48585209250450134, - "learning_rate": 9.6554189349544e-06, - "loss": 0.4594, - "step": 6266 - }, - { - "epoch": 0.409581073132475, - "grad_norm": 0.4733290672302246, - "learning_rate": 9.655291537200505e-06, - "loss": 0.3973, - "step": 6267 - }, - { - "epoch": 0.4096464283380171, - "grad_norm": 0.4435085952281952, - "learning_rate": 9.65516411674114e-06, - "loss": 0.371, - "step": 6268 - }, - { - "epoch": 0.40971178354355925, - "grad_norm": 0.5237358212471008, - "learning_rate": 9.655036673576927e-06, - "loss": 0.4664, - "step": 6269 - }, - { - "epoch": 0.4097771387491014, - "grad_norm": 0.4727523922920227, - "learning_rate": 9.654909207708489e-06, - "loss": 0.4214, - "step": 6270 - }, - { - "epoch": 0.4098424939546435, - "grad_norm": 0.4445841908454895, - "learning_rate": 9.654781719136445e-06, - "loss": 0.3532, - "step": 6271 - }, - { - "epoch": 0.40990784916018563, - "grad_norm": 0.470441609621048, - "learning_rate": 9.654654207861418e-06, - "loss": 0.3739, - "step": 6272 - }, - { - "epoch": 0.4099732043657277, - "grad_norm": 0.4501136541366577, - "learning_rate": 9.65452667388403e-06, - "loss": 0.3555, - "step": 6273 - }, - { - "epoch": 0.41003855957126983, - "grad_norm": 0.49914395809173584, - "learning_rate": 9.654399117204902e-06, - "loss": 0.4225, - "step": 6274 - }, - { - "epoch": 0.41010391477681196, - "grad_norm": 0.4688049256801605, - "learning_rate": 9.654271537824658e-06, - "loss": 0.4027, - "step": 6275 - }, - { - "epoch": 0.4101692699823541, - "grad_norm": 0.4889581799507141, - "learning_rate": 9.65414393574392e-06, - "loss": 0.4532, - "step": 6276 - }, - { - "epoch": 0.4102346251878962, - "grad_norm": 0.470244437456131, - "learning_rate": 9.654016310963308e-06, - "loss": 0.3591, - "step": 6277 - }, - { - "epoch": 0.41029998039343835, - "grad_norm": 0.48283493518829346, - "learning_rate": 9.653888663483448e-06, - "loss": 0.4036, - "step": 6278 - }, - { - "epoch": 0.4103653355989805, - "grad_norm": 0.49872222542762756, - "learning_rate": 9.653760993304959e-06, - "loss": 0.4432, - "step": 6279 - }, - { - "epoch": 0.4104306908045226, - "grad_norm": 0.5243247151374817, - "learning_rate": 9.653633300428467e-06, - "loss": 0.4611, - "step": 6280 - }, - { - "epoch": 0.4104960460100647, - "grad_norm": 0.4761289060115814, - "learning_rate": 9.653505584854592e-06, - "loss": 0.439, - "step": 6281 - }, - { - "epoch": 0.4105614012156068, - "grad_norm": 0.4796181619167328, - "learning_rate": 9.653377846583957e-06, - "loss": 0.4189, - "step": 6282 - }, - { - "epoch": 0.41062675642114893, - "grad_norm": 0.5055959820747375, - "learning_rate": 9.65325008561719e-06, - "loss": 0.4411, - "step": 6283 - }, - { - "epoch": 0.41069211162669106, - "grad_norm": 0.49860087037086487, - "learning_rate": 9.653122301954907e-06, - "loss": 0.4269, - "step": 6284 - }, - { - "epoch": 0.4107574668322332, - "grad_norm": 0.4254683554172516, - "learning_rate": 9.652994495597737e-06, - "loss": 0.3599, - "step": 6285 - }, - { - "epoch": 0.4108228220377753, - "grad_norm": 0.4287961721420288, - "learning_rate": 9.652866666546298e-06, - "loss": 0.2996, - "step": 6286 - }, - { - "epoch": 0.41088817724331744, - "grad_norm": 0.4624471962451935, - "learning_rate": 9.652738814801218e-06, - "loss": 0.397, - "step": 6287 - }, - { - "epoch": 0.41095353244885957, - "grad_norm": 0.45612502098083496, - "learning_rate": 9.652610940363119e-06, - "loss": 0.375, - "step": 6288 - }, - { - "epoch": 0.4110188876544017, - "grad_norm": 0.4525100290775299, - "learning_rate": 9.652483043232624e-06, - "loss": 0.4226, - "step": 6289 - }, - { - "epoch": 0.41108424285994377, - "grad_norm": 0.5095105767250061, - "learning_rate": 9.652355123410357e-06, - "loss": 0.4664, - "step": 6290 - }, - { - "epoch": 0.4111495980654859, - "grad_norm": 0.46207407116889954, - "learning_rate": 9.652227180896943e-06, - "loss": 0.3348, - "step": 6291 - }, - { - "epoch": 0.411214953271028, - "grad_norm": 0.4590258300304413, - "learning_rate": 9.652099215693006e-06, - "loss": 0.3997, - "step": 6292 - }, - { - "epoch": 0.41128030847657016, - "grad_norm": 0.4889128804206848, - "learning_rate": 9.651971227799168e-06, - "loss": 0.4472, - "step": 6293 - }, - { - "epoch": 0.4113456636821123, - "grad_norm": 0.45457106828689575, - "learning_rate": 9.651843217216057e-06, - "loss": 0.3805, - "step": 6294 - }, - { - "epoch": 0.4114110188876544, - "grad_norm": 0.4759896993637085, - "learning_rate": 9.651715183944294e-06, - "loss": 0.3926, - "step": 6295 - }, - { - "epoch": 0.41147637409319654, - "grad_norm": 0.4567562937736511, - "learning_rate": 9.651587127984503e-06, - "loss": 0.338, - "step": 6296 - }, - { - "epoch": 0.41154172929873867, - "grad_norm": 0.49032914638519287, - "learning_rate": 9.651459049337313e-06, - "loss": 0.4234, - "step": 6297 - }, - { - "epoch": 0.41160708450428074, - "grad_norm": 0.4713863432407379, - "learning_rate": 9.651330948003343e-06, - "loss": 0.4053, - "step": 6298 - }, - { - "epoch": 0.41167243970982287, - "grad_norm": 0.46855491399765015, - "learning_rate": 9.651202823983223e-06, - "loss": 0.3953, - "step": 6299 - }, - { - "epoch": 0.411737794915365, - "grad_norm": 0.5075733065605164, - "learning_rate": 9.651074677277574e-06, - "loss": 0.4222, - "step": 6300 - }, - { - "epoch": 0.4118031501209071, - "grad_norm": 0.4374162256717682, - "learning_rate": 9.650946507887022e-06, - "loss": 0.3527, - "step": 6301 - }, - { - "epoch": 0.41186850532644925, - "grad_norm": 0.4779459238052368, - "learning_rate": 9.650818315812194e-06, - "loss": 0.4286, - "step": 6302 - }, - { - "epoch": 0.4119338605319914, - "grad_norm": 0.4661043882369995, - "learning_rate": 9.650690101053712e-06, - "loss": 0.3923, - "step": 6303 - }, - { - "epoch": 0.4119992157375335, - "grad_norm": 0.436037540435791, - "learning_rate": 9.650561863612206e-06, - "loss": 0.3697, - "step": 6304 - }, - { - "epoch": 0.41206457094307564, - "grad_norm": 0.5179614424705505, - "learning_rate": 9.650433603488297e-06, - "loss": 0.4282, - "step": 6305 - }, - { - "epoch": 0.41212992614861776, - "grad_norm": 0.49485376477241516, - "learning_rate": 9.650305320682612e-06, - "loss": 0.4296, - "step": 6306 - }, - { - "epoch": 0.41219528135415984, - "grad_norm": 0.4761447310447693, - "learning_rate": 9.650177015195778e-06, - "loss": 0.4351, - "step": 6307 - }, - { - "epoch": 0.41226063655970197, - "grad_norm": 0.44707223773002625, - "learning_rate": 9.650048687028419e-06, - "loss": 0.3594, - "step": 6308 - }, - { - "epoch": 0.4123259917652441, - "grad_norm": 0.46764659881591797, - "learning_rate": 9.649920336181162e-06, - "loss": 0.3742, - "step": 6309 - }, - { - "epoch": 0.4123913469707862, - "grad_norm": 0.46206626296043396, - "learning_rate": 9.649791962654631e-06, - "loss": 0.3575, - "step": 6310 - }, - { - "epoch": 0.41245670217632835, - "grad_norm": 0.46091535687446594, - "learning_rate": 9.649663566449457e-06, - "loss": 0.3992, - "step": 6311 - }, - { - "epoch": 0.4125220573818705, - "grad_norm": 0.5275874137878418, - "learning_rate": 9.64953514756626e-06, - "loss": 0.3964, - "step": 6312 - }, - { - "epoch": 0.4125874125874126, - "grad_norm": 0.45352184772491455, - "learning_rate": 9.64940670600567e-06, - "loss": 0.4023, - "step": 6313 - }, - { - "epoch": 0.41265276779295473, - "grad_norm": 0.41809654235839844, - "learning_rate": 9.649278241768313e-06, - "loss": 0.3301, - "step": 6314 - }, - { - "epoch": 0.4127181229984968, - "grad_norm": 0.44547584652900696, - "learning_rate": 9.649149754854817e-06, - "loss": 0.4001, - "step": 6315 - }, - { - "epoch": 0.41278347820403893, - "grad_norm": 0.4872359037399292, - "learning_rate": 9.649021245265804e-06, - "loss": 0.4315, - "step": 6316 - }, - { - "epoch": 0.41284883340958106, - "grad_norm": 0.4649243652820587, - "learning_rate": 9.648892713001906e-06, - "loss": 0.4069, - "step": 6317 - }, - { - "epoch": 0.4129141886151232, - "grad_norm": 0.44608205556869507, - "learning_rate": 9.648764158063747e-06, - "loss": 0.3972, - "step": 6318 - }, - { - "epoch": 0.4129795438206653, - "grad_norm": 0.4536326229572296, - "learning_rate": 9.648635580451955e-06, - "loss": 0.3966, - "step": 6319 - }, - { - "epoch": 0.41304489902620745, - "grad_norm": 0.4427248239517212, - "learning_rate": 9.648506980167156e-06, - "loss": 0.3812, - "step": 6320 - }, - { - "epoch": 0.4131102542317496, - "grad_norm": 0.45658716559410095, - "learning_rate": 9.64837835720998e-06, - "loss": 0.3798, - "step": 6321 - }, - { - "epoch": 0.4131756094372917, - "grad_norm": 0.4524311125278473, - "learning_rate": 9.64824971158105e-06, - "loss": 0.374, - "step": 6322 - }, - { - "epoch": 0.4132409646428338, - "grad_norm": 0.4253866970539093, - "learning_rate": 9.648121043280997e-06, - "loss": 0.3699, - "step": 6323 - }, - { - "epoch": 0.4133063198483759, - "grad_norm": 0.4903950095176697, - "learning_rate": 9.647992352310449e-06, - "loss": 0.4332, - "step": 6324 - }, - { - "epoch": 0.41337167505391803, - "grad_norm": 0.477671355009079, - "learning_rate": 9.64786363867003e-06, - "loss": 0.43, - "step": 6325 - }, - { - "epoch": 0.41343703025946016, - "grad_norm": 0.48784369230270386, - "learning_rate": 9.647734902360369e-06, - "loss": 0.4405, - "step": 6326 - }, - { - "epoch": 0.4135023854650023, - "grad_norm": 0.5050990581512451, - "learning_rate": 9.647606143382097e-06, - "loss": 0.4562, - "step": 6327 - }, - { - "epoch": 0.4135677406705444, - "grad_norm": 0.4481131434440613, - "learning_rate": 9.64747736173584e-06, - "loss": 0.3883, - "step": 6328 - }, - { - "epoch": 0.41363309587608654, - "grad_norm": 0.43018585443496704, - "learning_rate": 9.647348557422223e-06, - "loss": 0.3423, - "step": 6329 - }, - { - "epoch": 0.41369845108162867, - "grad_norm": 0.4817647337913513, - "learning_rate": 9.64721973044188e-06, - "loss": 0.4058, - "step": 6330 - }, - { - "epoch": 0.4137638062871708, - "grad_norm": 0.46122077107429504, - "learning_rate": 9.647090880795434e-06, - "loss": 0.4009, - "step": 6331 - }, - { - "epoch": 0.41382916149271287, - "grad_norm": 0.4703610837459564, - "learning_rate": 9.646962008483518e-06, - "loss": 0.4012, - "step": 6332 - }, - { - "epoch": 0.413894516698255, - "grad_norm": 0.4957987666130066, - "learning_rate": 9.646833113506758e-06, - "loss": 0.4215, - "step": 6333 - }, - { - "epoch": 0.4139598719037971, - "grad_norm": 0.4563731849193573, - "learning_rate": 9.64670419586578e-06, - "loss": 0.3844, - "step": 6334 - }, - { - "epoch": 0.41402522710933926, - "grad_norm": 0.4593028426170349, - "learning_rate": 9.64657525556122e-06, - "loss": 0.3897, - "step": 6335 - }, - { - "epoch": 0.4140905823148814, - "grad_norm": 0.47703373432159424, - "learning_rate": 9.6464462925937e-06, - "loss": 0.4261, - "step": 6336 - }, - { - "epoch": 0.4141559375204235, - "grad_norm": 0.45836660265922546, - "learning_rate": 9.646317306963853e-06, - "loss": 0.4094, - "step": 6337 - }, - { - "epoch": 0.41422129272596564, - "grad_norm": 0.47570812702178955, - "learning_rate": 9.646188298672308e-06, - "loss": 0.4156, - "step": 6338 - }, - { - "epoch": 0.41428664793150777, - "grad_norm": 0.5125575661659241, - "learning_rate": 9.646059267719691e-06, - "loss": 0.3903, - "step": 6339 - }, - { - "epoch": 0.41435200313704984, - "grad_norm": 0.42409124970436096, - "learning_rate": 9.645930214106635e-06, - "loss": 0.3551, - "step": 6340 - }, - { - "epoch": 0.41441735834259197, - "grad_norm": 0.48722681403160095, - "learning_rate": 9.645801137833766e-06, - "loss": 0.4434, - "step": 6341 - }, - { - "epoch": 0.4144827135481341, - "grad_norm": 0.45247524976730347, - "learning_rate": 9.645672038901716e-06, - "loss": 0.3257, - "step": 6342 - }, - { - "epoch": 0.4145480687536762, - "grad_norm": 0.4561573565006256, - "learning_rate": 9.645542917311116e-06, - "loss": 0.3802, - "step": 6343 - }, - { - "epoch": 0.41461342395921835, - "grad_norm": 0.4621433615684509, - "learning_rate": 9.645413773062593e-06, - "loss": 0.3769, - "step": 6344 - }, - { - "epoch": 0.4146787791647605, - "grad_norm": 0.44296616315841675, - "learning_rate": 9.645284606156776e-06, - "loss": 0.3653, - "step": 6345 - }, - { - "epoch": 0.4147441343703026, - "grad_norm": 0.46599406003952026, - "learning_rate": 9.645155416594299e-06, - "loss": 0.3809, - "step": 6346 - }, - { - "epoch": 0.41480948957584474, - "grad_norm": 0.4901500344276428, - "learning_rate": 9.645026204375787e-06, - "loss": 0.4152, - "step": 6347 - }, - { - "epoch": 0.41487484478138686, - "grad_norm": 0.5185682773590088, - "learning_rate": 9.644896969501876e-06, - "loss": 0.4723, - "step": 6348 - }, - { - "epoch": 0.41494019998692894, - "grad_norm": 0.469554603099823, - "learning_rate": 9.64476771197319e-06, - "loss": 0.3674, - "step": 6349 - }, - { - "epoch": 0.41500555519247107, - "grad_norm": 0.41888344287872314, - "learning_rate": 9.644638431790366e-06, - "loss": 0.3479, - "step": 6350 - }, - { - "epoch": 0.4150709103980132, - "grad_norm": 0.4631621837615967, - "learning_rate": 9.644509128954029e-06, - "loss": 0.3965, - "step": 6351 - }, - { - "epoch": 0.4151362656035553, - "grad_norm": 0.43988871574401855, - "learning_rate": 9.644379803464814e-06, - "loss": 0.3656, - "step": 6352 - }, - { - "epoch": 0.41520162080909745, - "grad_norm": 0.4931758642196655, - "learning_rate": 9.644250455323349e-06, - "loss": 0.4208, - "step": 6353 - }, - { - "epoch": 0.4152669760146396, - "grad_norm": 0.42915892601013184, - "learning_rate": 9.644121084530265e-06, - "loss": 0.3935, - "step": 6354 - }, - { - "epoch": 0.4153323312201817, - "grad_norm": 0.4514559209346771, - "learning_rate": 9.643991691086194e-06, - "loss": 0.3745, - "step": 6355 - }, - { - "epoch": 0.41539768642572383, - "grad_norm": 0.4412904381752014, - "learning_rate": 9.643862274991766e-06, - "loss": 0.4059, - "step": 6356 - }, - { - "epoch": 0.4154630416312659, - "grad_norm": 0.5144051313400269, - "learning_rate": 9.643732836247614e-06, - "loss": 0.4785, - "step": 6357 - }, - { - "epoch": 0.41552839683680803, - "grad_norm": 0.45030677318573, - "learning_rate": 9.643603374854367e-06, - "loss": 0.4072, - "step": 6358 - }, - { - "epoch": 0.41559375204235016, - "grad_norm": 0.4990215301513672, - "learning_rate": 9.643473890812658e-06, - "loss": 0.3493, - "step": 6359 - }, - { - "epoch": 0.4156591072478923, - "grad_norm": 0.45090121030807495, - "learning_rate": 9.64334438412312e-06, - "loss": 0.3531, - "step": 6360 - }, - { - "epoch": 0.4157244624534344, - "grad_norm": 0.4247357249259949, - "learning_rate": 9.64321485478638e-06, - "loss": 0.3396, - "step": 6361 - }, - { - "epoch": 0.41578981765897655, - "grad_norm": 0.4885372817516327, - "learning_rate": 9.643085302803074e-06, - "loss": 0.3966, - "step": 6362 - }, - { - "epoch": 0.4158551728645187, - "grad_norm": 0.4586491584777832, - "learning_rate": 9.642955728173833e-06, - "loss": 0.3746, - "step": 6363 - }, - { - "epoch": 0.4159205280700608, - "grad_norm": 0.47208213806152344, - "learning_rate": 9.642826130899287e-06, - "loss": 0.4177, - "step": 6364 - }, - { - "epoch": 0.4159858832756029, - "grad_norm": 0.4739512503147125, - "learning_rate": 9.642696510980072e-06, - "loss": 0.4394, - "step": 6365 - }, - { - "epoch": 0.416051238481145, - "grad_norm": 0.4629870653152466, - "learning_rate": 9.642566868416814e-06, - "loss": 0.4089, - "step": 6366 - }, - { - "epoch": 0.41611659368668713, - "grad_norm": 0.4545626938343048, - "learning_rate": 9.642437203210154e-06, - "loss": 0.4, - "step": 6367 - }, - { - "epoch": 0.41618194889222926, - "grad_norm": 0.4711894989013672, - "learning_rate": 9.642307515360715e-06, - "loss": 0.4555, - "step": 6368 - }, - { - "epoch": 0.4162473040977714, - "grad_norm": 0.45401862263679504, - "learning_rate": 9.642177804869136e-06, - "loss": 0.4058, - "step": 6369 - }, - { - "epoch": 0.4163126593033135, - "grad_norm": 0.4288789927959442, - "learning_rate": 9.642048071736047e-06, - "loss": 0.3452, - "step": 6370 - }, - { - "epoch": 0.41637801450885564, - "grad_norm": 0.4517304599285126, - "learning_rate": 9.641918315962082e-06, - "loss": 0.3917, - "step": 6371 - }, - { - "epoch": 0.41644336971439777, - "grad_norm": 0.4933716058731079, - "learning_rate": 9.641788537547873e-06, - "loss": 0.4526, - "step": 6372 - }, - { - "epoch": 0.4165087249199399, - "grad_norm": 0.47293055057525635, - "learning_rate": 9.641658736494053e-06, - "loss": 0.3568, - "step": 6373 - }, - { - "epoch": 0.41657408012548197, - "grad_norm": 0.46282824873924255, - "learning_rate": 9.641528912801255e-06, - "loss": 0.3822, - "step": 6374 - }, - { - "epoch": 0.4166394353310241, - "grad_norm": 0.4570055305957794, - "learning_rate": 9.641399066470112e-06, - "loss": 0.4075, - "step": 6375 - }, - { - "epoch": 0.4167047905365662, - "grad_norm": 0.43674564361572266, - "learning_rate": 9.64126919750126e-06, - "loss": 0.3788, - "step": 6376 - }, - { - "epoch": 0.41677014574210836, - "grad_norm": 0.5012200474739075, - "learning_rate": 9.641139305895329e-06, - "loss": 0.4448, - "step": 6377 - }, - { - "epoch": 0.4168355009476505, - "grad_norm": 0.476524293422699, - "learning_rate": 9.641009391652954e-06, - "loss": 0.3513, - "step": 6378 - }, - { - "epoch": 0.4169008561531926, - "grad_norm": 0.47984185814857483, - "learning_rate": 9.640879454774768e-06, - "loss": 0.3993, - "step": 6379 - }, - { - "epoch": 0.41696621135873474, - "grad_norm": 0.4569160044193268, - "learning_rate": 9.640749495261406e-06, - "loss": 0.3524, - "step": 6380 - }, - { - "epoch": 0.41703156656427687, - "grad_norm": 0.4451634883880615, - "learning_rate": 9.640619513113499e-06, - "loss": 0.384, - "step": 6381 - }, - { - "epoch": 0.41709692176981894, - "grad_norm": 0.47604092955589294, - "learning_rate": 9.640489508331684e-06, - "loss": 0.4264, - "step": 6382 - }, - { - "epoch": 0.41716227697536107, - "grad_norm": 0.45808592438697815, - "learning_rate": 9.640359480916594e-06, - "loss": 0.3728, - "step": 6383 - }, - { - "epoch": 0.4172276321809032, - "grad_norm": 0.4437313675880432, - "learning_rate": 9.640229430868864e-06, - "loss": 0.3991, - "step": 6384 - }, - { - "epoch": 0.4172929873864453, - "grad_norm": 0.4536198377609253, - "learning_rate": 9.640099358189127e-06, - "loss": 0.3717, - "step": 6385 - }, - { - "epoch": 0.41735834259198745, - "grad_norm": 0.46030333638191223, - "learning_rate": 9.639969262878018e-06, - "loss": 0.388, - "step": 6386 - }, - { - "epoch": 0.4174236977975296, - "grad_norm": 0.45528873801231384, - "learning_rate": 9.639839144936172e-06, - "loss": 0.4102, - "step": 6387 - }, - { - "epoch": 0.4174890530030717, - "grad_norm": 0.43292397260665894, - "learning_rate": 9.639709004364222e-06, - "loss": 0.3313, - "step": 6388 - }, - { - "epoch": 0.41755440820861384, - "grad_norm": 0.4031912684440613, - "learning_rate": 9.639578841162804e-06, - "loss": 0.3074, - "step": 6389 - }, - { - "epoch": 0.41761976341415596, - "grad_norm": 0.44478580355644226, - "learning_rate": 9.639448655332553e-06, - "loss": 0.4039, - "step": 6390 - }, - { - "epoch": 0.41768511861969804, - "grad_norm": 0.500061571598053, - "learning_rate": 9.639318446874104e-06, - "loss": 0.4263, - "step": 6391 - }, - { - "epoch": 0.41775047382524017, - "grad_norm": 0.4445696175098419, - "learning_rate": 9.639188215788092e-06, - "loss": 0.3838, - "step": 6392 - }, - { - "epoch": 0.4178158290307823, - "grad_norm": 0.47839704155921936, - "learning_rate": 9.63905796207515e-06, - "loss": 0.3883, - "step": 6393 - }, - { - "epoch": 0.4178811842363244, - "grad_norm": 0.4655381739139557, - "learning_rate": 9.638927685735918e-06, - "loss": 0.3777, - "step": 6394 - }, - { - "epoch": 0.41794653944186655, - "grad_norm": 0.4355512261390686, - "learning_rate": 9.638797386771029e-06, - "loss": 0.3431, - "step": 6395 - }, - { - "epoch": 0.4180118946474087, - "grad_norm": 0.4772740304470062, - "learning_rate": 9.638667065181116e-06, - "loss": 0.3893, - "step": 6396 - }, - { - "epoch": 0.4180772498529508, - "grad_norm": 0.440621942281723, - "learning_rate": 9.638536720966816e-06, - "loss": 0.3854, - "step": 6397 - }, - { - "epoch": 0.41814260505849293, - "grad_norm": 0.46311086416244507, - "learning_rate": 9.638406354128767e-06, - "loss": 0.4199, - "step": 6398 - }, - { - "epoch": 0.418207960264035, - "grad_norm": 0.4810176193714142, - "learning_rate": 9.638275964667603e-06, - "loss": 0.4053, - "step": 6399 - }, - { - "epoch": 0.41827331546957713, - "grad_norm": 0.48047590255737305, - "learning_rate": 9.638145552583959e-06, - "loss": 0.4594, - "step": 6400 - }, - { - "epoch": 0.41833867067511926, - "grad_norm": 0.4568217098712921, - "learning_rate": 9.638015117878474e-06, - "loss": 0.3654, - "step": 6401 - }, - { - "epoch": 0.4184040258806614, - "grad_norm": 0.46990150213241577, - "learning_rate": 9.637884660551782e-06, - "loss": 0.4181, - "step": 6402 - }, - { - "epoch": 0.4184693810862035, - "grad_norm": 0.4413808584213257, - "learning_rate": 9.63775418060452e-06, - "loss": 0.3778, - "step": 6403 - }, - { - "epoch": 0.41853473629174565, - "grad_norm": 0.44534632563591003, - "learning_rate": 9.637623678037323e-06, - "loss": 0.383, - "step": 6404 - }, - { - "epoch": 0.4186000914972878, - "grad_norm": 0.4657782018184662, - "learning_rate": 9.63749315285083e-06, - "loss": 0.3808, - "step": 6405 - }, - { - "epoch": 0.4186654467028299, - "grad_norm": 0.4660731554031372, - "learning_rate": 9.637362605045675e-06, - "loss": 0.4353, - "step": 6406 - }, - { - "epoch": 0.418730801908372, - "grad_norm": 0.42939215898513794, - "learning_rate": 9.637232034622496e-06, - "loss": 0.3732, - "step": 6407 - }, - { - "epoch": 0.4187961571139141, - "grad_norm": 0.48954418301582336, - "learning_rate": 9.637101441581934e-06, - "loss": 0.4305, - "step": 6408 - }, - { - "epoch": 0.41886151231945623, - "grad_norm": 0.43653449416160583, - "learning_rate": 9.636970825924616e-06, - "loss": 0.3532, - "step": 6409 - }, - { - "epoch": 0.41892686752499836, - "grad_norm": 0.822792649269104, - "learning_rate": 9.636840187651188e-06, - "loss": 0.4229, - "step": 6410 - }, - { - "epoch": 0.4189922227305405, - "grad_norm": 0.4509654939174652, - "learning_rate": 9.636709526762285e-06, - "loss": 0.3586, - "step": 6411 - }, - { - "epoch": 0.4190575779360826, - "grad_norm": 0.4449145793914795, - "learning_rate": 9.636578843258541e-06, - "loss": 0.3807, - "step": 6412 - }, - { - "epoch": 0.41912293314162474, - "grad_norm": 0.4820125102996826, - "learning_rate": 9.636448137140597e-06, - "loss": 0.4073, - "step": 6413 - }, - { - "epoch": 0.41918828834716687, - "grad_norm": 0.4795423746109009, - "learning_rate": 9.636317408409088e-06, - "loss": 0.4163, - "step": 6414 - }, - { - "epoch": 0.419253643552709, - "grad_norm": 0.5062985420227051, - "learning_rate": 9.636186657064654e-06, - "loss": 0.4213, - "step": 6415 - }, - { - "epoch": 0.41931899875825107, - "grad_norm": 0.4475199580192566, - "learning_rate": 9.63605588310793e-06, - "loss": 0.3542, - "step": 6416 - }, - { - "epoch": 0.4193843539637932, - "grad_norm": 0.4813227355480194, - "learning_rate": 9.635925086539558e-06, - "loss": 0.4107, - "step": 6417 - }, - { - "epoch": 0.4194497091693353, - "grad_norm": 0.4664250910282135, - "learning_rate": 9.635794267360172e-06, - "loss": 0.3884, - "step": 6418 - }, - { - "epoch": 0.41951506437487746, - "grad_norm": 0.4789632558822632, - "learning_rate": 9.635663425570412e-06, - "loss": 0.4021, - "step": 6419 - }, - { - "epoch": 0.4195804195804196, - "grad_norm": 0.5104120373725891, - "learning_rate": 9.635532561170914e-06, - "loss": 0.4383, - "step": 6420 - }, - { - "epoch": 0.4196457747859617, - "grad_norm": 0.47397351264953613, - "learning_rate": 9.635401674162319e-06, - "loss": 0.3893, - "step": 6421 - }, - { - "epoch": 0.41971112999150384, - "grad_norm": 0.46960514783859253, - "learning_rate": 9.635270764545264e-06, - "loss": 0.3942, - "step": 6422 - }, - { - "epoch": 0.41977648519704597, - "grad_norm": 0.46536019444465637, - "learning_rate": 9.635139832320387e-06, - "loss": 0.403, - "step": 6423 - }, - { - "epoch": 0.41984184040258804, - "grad_norm": 0.45115309953689575, - "learning_rate": 9.635008877488327e-06, - "loss": 0.3534, - "step": 6424 - }, - { - "epoch": 0.41990719560813017, - "grad_norm": 0.44649505615234375, - "learning_rate": 9.634877900049724e-06, - "loss": 0.3768, - "step": 6425 - }, - { - "epoch": 0.4199725508136723, - "grad_norm": 0.4636431336402893, - "learning_rate": 9.634746900005216e-06, - "loss": 0.4057, - "step": 6426 - }, - { - "epoch": 0.4200379060192144, - "grad_norm": 0.47141778469085693, - "learning_rate": 9.634615877355441e-06, - "loss": 0.4041, - "step": 6427 - }, - { - "epoch": 0.42010326122475655, - "grad_norm": 0.4881165027618408, - "learning_rate": 9.634484832101039e-06, - "loss": 0.4366, - "step": 6428 - }, - { - "epoch": 0.4201686164302987, - "grad_norm": 0.4366433024406433, - "learning_rate": 9.63435376424265e-06, - "loss": 0.3677, - "step": 6429 - }, - { - "epoch": 0.4202339716358408, - "grad_norm": 0.47002291679382324, - "learning_rate": 9.63422267378091e-06, - "loss": 0.429, - "step": 6430 - }, - { - "epoch": 0.42029932684138294, - "grad_norm": 0.4882446527481079, - "learning_rate": 9.634091560716462e-06, - "loss": 0.4305, - "step": 6431 - }, - { - "epoch": 0.42036468204692506, - "grad_norm": 0.46278485655784607, - "learning_rate": 9.633960425049944e-06, - "loss": 0.4103, - "step": 6432 - }, - { - "epoch": 0.42043003725246714, - "grad_norm": 0.4388725459575653, - "learning_rate": 9.633829266781995e-06, - "loss": 0.3563, - "step": 6433 - }, - { - "epoch": 0.42049539245800927, - "grad_norm": 0.42902904748916626, - "learning_rate": 9.633698085913256e-06, - "loss": 0.3515, - "step": 6434 - }, - { - "epoch": 0.4205607476635514, - "grad_norm": 0.4717724621295929, - "learning_rate": 9.633566882444365e-06, - "loss": 0.3895, - "step": 6435 - }, - { - "epoch": 0.4206261028690935, - "grad_norm": 0.4701840579509735, - "learning_rate": 9.633435656375964e-06, - "loss": 0.4316, - "step": 6436 - }, - { - "epoch": 0.42069145807463565, - "grad_norm": 0.4383528530597687, - "learning_rate": 9.633304407708693e-06, - "loss": 0.4148, - "step": 6437 - }, - { - "epoch": 0.4207568132801778, - "grad_norm": 0.46183767914772034, - "learning_rate": 9.63317313644319e-06, - "loss": 0.4315, - "step": 6438 - }, - { - "epoch": 0.4208221684857199, - "grad_norm": 0.41978150606155396, - "learning_rate": 9.633041842580098e-06, - "loss": 0.3545, - "step": 6439 - }, - { - "epoch": 0.42088752369126203, - "grad_norm": 0.4599344730377197, - "learning_rate": 9.632910526120054e-06, - "loss": 0.3699, - "step": 6440 - }, - { - "epoch": 0.4209528788968041, - "grad_norm": 0.4426717162132263, - "learning_rate": 9.6327791870637e-06, - "loss": 0.3995, - "step": 6441 - }, - { - "epoch": 0.42101823410234623, - "grad_norm": 0.4519651234149933, - "learning_rate": 9.63264782541168e-06, - "loss": 0.4049, - "step": 6442 - }, - { - "epoch": 0.42108358930788836, - "grad_norm": 0.4410443603992462, - "learning_rate": 9.632516441164629e-06, - "loss": 0.3757, - "step": 6443 - }, - { - "epoch": 0.4211489445134305, - "grad_norm": 0.4512501060962677, - "learning_rate": 9.632385034323191e-06, - "loss": 0.3869, - "step": 6444 - }, - { - "epoch": 0.4212142997189726, - "grad_norm": 0.47190433740615845, - "learning_rate": 9.632253604888007e-06, - "loss": 0.4084, - "step": 6445 - }, - { - "epoch": 0.42127965492451475, - "grad_norm": 0.5133803486824036, - "learning_rate": 9.632122152859717e-06, - "loss": 0.4533, - "step": 6446 - }, - { - "epoch": 0.4213450101300569, - "grad_norm": 0.9352949261665344, - "learning_rate": 9.631990678238962e-06, - "loss": 0.408, - "step": 6447 - }, - { - "epoch": 0.421410365335599, - "grad_norm": 0.4503645896911621, - "learning_rate": 9.631859181026385e-06, - "loss": 0.3935, - "step": 6448 - }, - { - "epoch": 0.4214757205411411, - "grad_norm": 0.44001659750938416, - "learning_rate": 9.631727661222625e-06, - "loss": 0.354, - "step": 6449 - }, - { - "epoch": 0.4215410757466832, - "grad_norm": 0.4678772985935211, - "learning_rate": 9.631596118828326e-06, - "loss": 0.3696, - "step": 6450 - }, - { - "epoch": 0.42160643095222533, - "grad_norm": 0.4532861113548279, - "learning_rate": 9.631464553844128e-06, - "loss": 0.3874, - "step": 6451 - }, - { - "epoch": 0.42167178615776746, - "grad_norm": 0.4305182099342346, - "learning_rate": 9.631332966270671e-06, - "loss": 0.3309, - "step": 6452 - }, - { - "epoch": 0.4217371413633096, - "grad_norm": 0.44606882333755493, - "learning_rate": 9.6312013561086e-06, - "loss": 0.3164, - "step": 6453 - }, - { - "epoch": 0.4218024965688517, - "grad_norm": 0.4718857407569885, - "learning_rate": 9.631069723358555e-06, - "loss": 0.4263, - "step": 6454 - }, - { - "epoch": 0.42186785177439384, - "grad_norm": 0.48390716314315796, - "learning_rate": 9.630938068021181e-06, - "loss": 0.4291, - "step": 6455 - }, - { - "epoch": 0.42193320697993597, - "grad_norm": 0.42989227175712585, - "learning_rate": 9.630806390097113e-06, - "loss": 0.3709, - "step": 6456 - }, - { - "epoch": 0.4219985621854781, - "grad_norm": 0.4639935791492462, - "learning_rate": 9.630674689587003e-06, - "loss": 0.3991, - "step": 6457 - }, - { - "epoch": 0.42206391739102017, - "grad_norm": 0.472952663898468, - "learning_rate": 9.630542966491485e-06, - "loss": 0.4536, - "step": 6458 - }, - { - "epoch": 0.4221292725965623, - "grad_norm": 0.45817291736602783, - "learning_rate": 9.630411220811207e-06, - "loss": 0.3825, - "step": 6459 - }, - { - "epoch": 0.4221946278021044, - "grad_norm": 0.4815440773963928, - "learning_rate": 9.630279452546808e-06, - "loss": 0.43, - "step": 6460 - }, - { - "epoch": 0.42225998300764656, - "grad_norm": 0.45036017894744873, - "learning_rate": 9.630147661698931e-06, - "loss": 0.3647, - "step": 6461 - }, - { - "epoch": 0.4223253382131887, - "grad_norm": 0.4516957700252533, - "learning_rate": 9.630015848268221e-06, - "loss": 0.388, - "step": 6462 - }, - { - "epoch": 0.4223906934187308, - "grad_norm": 0.44138821959495544, - "learning_rate": 9.62988401225532e-06, - "loss": 0.384, - "step": 6463 - }, - { - "epoch": 0.42245604862427294, - "grad_norm": 0.4491787850856781, - "learning_rate": 9.629752153660871e-06, - "loss": 0.3638, - "step": 6464 - }, - { - "epoch": 0.42252140382981507, - "grad_norm": 0.48579147458076477, - "learning_rate": 9.629620272485517e-06, - "loss": 0.3969, - "step": 6465 - }, - { - "epoch": 0.42258675903535714, - "grad_norm": 0.4487808346748352, - "learning_rate": 9.6294883687299e-06, - "loss": 0.3842, - "step": 6466 - }, - { - "epoch": 0.42265211424089927, - "grad_norm": 0.4325678050518036, - "learning_rate": 9.629356442394666e-06, - "loss": 0.352, - "step": 6467 - }, - { - "epoch": 0.4227174694464414, - "grad_norm": 0.41130340099334717, - "learning_rate": 9.629224493480455e-06, - "loss": 0.3225, - "step": 6468 - }, - { - "epoch": 0.4227828246519835, - "grad_norm": 0.4503832757472992, - "learning_rate": 9.629092521987913e-06, - "loss": 0.3908, - "step": 6469 - }, - { - "epoch": 0.42284817985752565, - "grad_norm": 0.45339593291282654, - "learning_rate": 9.628960527917683e-06, - "loss": 0.3658, - "step": 6470 - }, - { - "epoch": 0.4229135350630678, - "grad_norm": 0.4623308777809143, - "learning_rate": 9.62882851127041e-06, - "loss": 0.4133, - "step": 6471 - }, - { - "epoch": 0.4229788902686099, - "grad_norm": 0.44871044158935547, - "learning_rate": 9.628696472046734e-06, - "loss": 0.3715, - "step": 6472 - }, - { - "epoch": 0.42304424547415204, - "grad_norm": 0.4574100077152252, - "learning_rate": 9.628564410247306e-06, - "loss": 0.4189, - "step": 6473 - }, - { - "epoch": 0.42310960067969416, - "grad_norm": 0.48398715257644653, - "learning_rate": 9.628432325872764e-06, - "loss": 0.406, - "step": 6474 - }, - { - "epoch": 0.42317495588523624, - "grad_norm": 0.44641536474227905, - "learning_rate": 9.628300218923752e-06, - "loss": 0.3963, - "step": 6475 - }, - { - "epoch": 0.42324031109077836, - "grad_norm": 0.47431057691574097, - "learning_rate": 9.628168089400917e-06, - "loss": 0.3926, - "step": 6476 - }, - { - "epoch": 0.4233056662963205, - "grad_norm": 0.48836711049079895, - "learning_rate": 9.628035937304905e-06, - "loss": 0.3975, - "step": 6477 - }, - { - "epoch": 0.4233710215018626, - "grad_norm": 0.46272969245910645, - "learning_rate": 9.627903762636358e-06, - "loss": 0.4067, - "step": 6478 - }, - { - "epoch": 0.42343637670740475, - "grad_norm": 0.47614696621894836, - "learning_rate": 9.62777156539592e-06, - "loss": 0.4382, - "step": 6479 - }, - { - "epoch": 0.4235017319129469, - "grad_norm": 0.4734412133693695, - "learning_rate": 9.627639345584236e-06, - "loss": 0.4084, - "step": 6480 - }, - { - "epoch": 0.423567087118489, - "grad_norm": 0.4440581500530243, - "learning_rate": 9.627507103201954e-06, - "loss": 0.3692, - "step": 6481 - }, - { - "epoch": 0.42363244232403113, - "grad_norm": 0.4822497069835663, - "learning_rate": 9.627374838249716e-06, - "loss": 0.4218, - "step": 6482 - }, - { - "epoch": 0.4236977975295732, - "grad_norm": 0.42719003558158875, - "learning_rate": 9.627242550728167e-06, - "loss": 0.3567, - "step": 6483 - }, - { - "epoch": 0.42376315273511533, - "grad_norm": 0.4925616979598999, - "learning_rate": 9.627110240637954e-06, - "loss": 0.4256, - "step": 6484 - }, - { - "epoch": 0.42382850794065746, - "grad_norm": 0.49054938554763794, - "learning_rate": 9.626977907979722e-06, - "loss": 0.4291, - "step": 6485 - }, - { - "epoch": 0.4238938631461996, - "grad_norm": 0.4381217062473297, - "learning_rate": 9.626845552754113e-06, - "loss": 0.3521, - "step": 6486 - }, - { - "epoch": 0.4239592183517417, - "grad_norm": 0.4951472282409668, - "learning_rate": 9.62671317496178e-06, - "loss": 0.4498, - "step": 6487 - }, - { - "epoch": 0.42402457355728385, - "grad_norm": 0.45642176270484924, - "learning_rate": 9.62658077460336e-06, - "loss": 0.3895, - "step": 6488 - }, - { - "epoch": 0.424089928762826, - "grad_norm": 0.4530963599681854, - "learning_rate": 9.626448351679504e-06, - "loss": 0.3609, - "step": 6489 - }, - { - "epoch": 0.4241552839683681, - "grad_norm": 0.4927248954772949, - "learning_rate": 9.626315906190856e-06, - "loss": 0.4507, - "step": 6490 - }, - { - "epoch": 0.4242206391739102, - "grad_norm": 0.45638951659202576, - "learning_rate": 9.626183438138064e-06, - "loss": 0.386, - "step": 6491 - }, - { - "epoch": 0.4242859943794523, - "grad_norm": 0.4506241977214813, - "learning_rate": 9.626050947521772e-06, - "loss": 0.4048, - "step": 6492 - }, - { - "epoch": 0.42435134958499443, - "grad_norm": 0.4654195010662079, - "learning_rate": 9.625918434342627e-06, - "loss": 0.3924, - "step": 6493 - }, - { - "epoch": 0.42441670479053656, - "grad_norm": 0.4722681939601898, - "learning_rate": 9.625785898601274e-06, - "loss": 0.3935, - "step": 6494 - }, - { - "epoch": 0.4244820599960787, - "grad_norm": 0.47420647740364075, - "learning_rate": 9.625653340298363e-06, - "loss": 0.4008, - "step": 6495 - }, - { - "epoch": 0.4245474152016208, - "grad_norm": 0.44712671637535095, - "learning_rate": 9.625520759434537e-06, - "loss": 0.3915, - "step": 6496 - }, - { - "epoch": 0.42461277040716294, - "grad_norm": 0.5193232297897339, - "learning_rate": 9.625388156010443e-06, - "loss": 0.4885, - "step": 6497 - }, - { - "epoch": 0.42467812561270507, - "grad_norm": 0.4673554301261902, - "learning_rate": 9.62525553002673e-06, - "loss": 0.3768, - "step": 6498 - }, - { - "epoch": 0.4247434808182472, - "grad_norm": 0.462789922952652, - "learning_rate": 9.625122881484041e-06, - "loss": 0.4034, - "step": 6499 - }, - { - "epoch": 0.42480883602378927, - "grad_norm": 0.4847203493118286, - "learning_rate": 9.624990210383027e-06, - "loss": 0.387, - "step": 6500 - }, - { - "epoch": 0.4248741912293314, - "grad_norm": 0.44709813594818115, - "learning_rate": 9.624857516724336e-06, - "loss": 0.3402, - "step": 6501 - }, - { - "epoch": 0.4249395464348735, - "grad_norm": 0.48443934321403503, - "learning_rate": 9.624724800508609e-06, - "loss": 0.4141, - "step": 6502 - }, - { - "epoch": 0.42500490164041566, - "grad_norm": 0.48059728741645813, - "learning_rate": 9.624592061736499e-06, - "loss": 0.4162, - "step": 6503 - }, - { - "epoch": 0.4250702568459578, - "grad_norm": 0.43630126118659973, - "learning_rate": 9.624459300408651e-06, - "loss": 0.4015, - "step": 6504 - }, - { - "epoch": 0.4251356120514999, - "grad_norm": 0.44657135009765625, - "learning_rate": 9.624326516525712e-06, - "loss": 0.3795, - "step": 6505 - }, - { - "epoch": 0.42520096725704204, - "grad_norm": 0.42472267150878906, - "learning_rate": 9.62419371008833e-06, - "loss": 0.3547, - "step": 6506 - }, - { - "epoch": 0.42526632246258417, - "grad_norm": 0.4591834843158722, - "learning_rate": 9.624060881097155e-06, - "loss": 0.3791, - "step": 6507 - }, - { - "epoch": 0.42533167766812624, - "grad_norm": 0.4807187020778656, - "learning_rate": 9.623928029552833e-06, - "loss": 0.4424, - "step": 6508 - }, - { - "epoch": 0.42539703287366837, - "grad_norm": 0.4762464463710785, - "learning_rate": 9.623795155456013e-06, - "loss": 0.398, - "step": 6509 - }, - { - "epoch": 0.4254623880792105, - "grad_norm": 0.4546484053134918, - "learning_rate": 9.623662258807341e-06, - "loss": 0.4143, - "step": 6510 - }, - { - "epoch": 0.4255277432847526, - "grad_norm": 0.49070531129837036, - "learning_rate": 9.623529339607465e-06, - "loss": 0.4133, - "step": 6511 - }, - { - "epoch": 0.42559309849029475, - "grad_norm": 0.4635466933250427, - "learning_rate": 9.623396397857036e-06, - "loss": 0.3814, - "step": 6512 - }, - { - "epoch": 0.4256584536958369, - "grad_norm": 0.476260781288147, - "learning_rate": 9.623263433556701e-06, - "loss": 0.393, - "step": 6513 - }, - { - "epoch": 0.425723808901379, - "grad_norm": 0.4596383571624756, - "learning_rate": 9.623130446707109e-06, - "loss": 0.3824, - "step": 6514 - }, - { - "epoch": 0.42578916410692114, - "grad_norm": 0.5997282266616821, - "learning_rate": 9.622997437308907e-06, - "loss": 0.3895, - "step": 6515 - }, - { - "epoch": 0.42585451931246326, - "grad_norm": 0.4949215352535248, - "learning_rate": 9.622864405362745e-06, - "loss": 0.4091, - "step": 6516 - }, - { - "epoch": 0.42591987451800534, - "grad_norm": 0.45569026470184326, - "learning_rate": 9.622731350869275e-06, - "loss": 0.4015, - "step": 6517 - }, - { - "epoch": 0.42598522972354746, - "grad_norm": 0.48326602578163147, - "learning_rate": 9.62259827382914e-06, - "loss": 0.4152, - "step": 6518 - }, - { - "epoch": 0.4260505849290896, - "grad_norm": 0.48029467463493347, - "learning_rate": 9.622465174242992e-06, - "loss": 0.42, - "step": 6519 - }, - { - "epoch": 0.4261159401346317, - "grad_norm": 0.4485394060611725, - "learning_rate": 9.62233205211148e-06, - "loss": 0.3772, - "step": 6520 - }, - { - "epoch": 0.42618129534017385, - "grad_norm": 0.4546124041080475, - "learning_rate": 9.622198907435253e-06, - "loss": 0.3682, - "step": 6521 - }, - { - "epoch": 0.426246650545716, - "grad_norm": 0.45446571707725525, - "learning_rate": 9.62206574021496e-06, - "loss": 0.4168, - "step": 6522 - }, - { - "epoch": 0.4263120057512581, - "grad_norm": 0.4523228704929352, - "learning_rate": 9.621932550451253e-06, - "loss": 0.3528, - "step": 6523 - }, - { - "epoch": 0.42637736095680023, - "grad_norm": 0.48179081082344055, - "learning_rate": 9.621799338144779e-06, - "loss": 0.4355, - "step": 6524 - }, - { - "epoch": 0.4264427161623423, - "grad_norm": 0.48248958587646484, - "learning_rate": 9.621666103296188e-06, - "loss": 0.4306, - "step": 6525 - }, - { - "epoch": 0.42650807136788443, - "grad_norm": 0.48483365774154663, - "learning_rate": 9.621532845906133e-06, - "loss": 0.3844, - "step": 6526 - }, - { - "epoch": 0.42657342657342656, - "grad_norm": 0.46387338638305664, - "learning_rate": 9.621399565975258e-06, - "loss": 0.405, - "step": 6527 - }, - { - "epoch": 0.4266387817789687, - "grad_norm": 0.4772443473339081, - "learning_rate": 9.621266263504216e-06, - "loss": 0.3901, - "step": 6528 - }, - { - "epoch": 0.4267041369845108, - "grad_norm": 0.4691782295703888, - "learning_rate": 9.621132938493658e-06, - "loss": 0.4061, - "step": 6529 - }, - { - "epoch": 0.42676949219005295, - "grad_norm": 0.4504014551639557, - "learning_rate": 9.620999590944235e-06, - "loss": 0.3875, - "step": 6530 - }, - { - "epoch": 0.4268348473955951, - "grad_norm": 0.5112310647964478, - "learning_rate": 9.620866220856595e-06, - "loss": 0.4634, - "step": 6531 - }, - { - "epoch": 0.4269002026011372, - "grad_norm": 0.49836134910583496, - "learning_rate": 9.620732828231391e-06, - "loss": 0.4101, - "step": 6532 - }, - { - "epoch": 0.4269655578066793, - "grad_norm": 0.446916788816452, - "learning_rate": 9.620599413069272e-06, - "loss": 0.4071, - "step": 6533 - }, - { - "epoch": 0.4270309130122214, - "grad_norm": 0.4787539541721344, - "learning_rate": 9.620465975370888e-06, - "loss": 0.4323, - "step": 6534 - }, - { - "epoch": 0.42709626821776353, - "grad_norm": 0.4808615744113922, - "learning_rate": 9.620332515136893e-06, - "loss": 0.4084, - "step": 6535 - }, - { - "epoch": 0.42716162342330566, - "grad_norm": 0.4856660068035126, - "learning_rate": 9.620199032367931e-06, - "loss": 0.4295, - "step": 6536 - }, - { - "epoch": 0.4272269786288478, - "grad_norm": 0.44857537746429443, - "learning_rate": 9.620065527064661e-06, - "loss": 0.3565, - "step": 6537 - }, - { - "epoch": 0.4272923338343899, - "grad_norm": 0.4612455368041992, - "learning_rate": 9.619931999227731e-06, - "loss": 0.4084, - "step": 6538 - }, - { - "epoch": 0.42735768903993204, - "grad_norm": 0.4732638895511627, - "learning_rate": 9.61979844885779e-06, - "loss": 0.4076, - "step": 6539 - }, - { - "epoch": 0.42742304424547417, - "grad_norm": 0.4432867169380188, - "learning_rate": 9.619664875955494e-06, - "loss": 0.3668, - "step": 6540 - }, - { - "epoch": 0.4274883994510163, - "grad_norm": 0.46726810932159424, - "learning_rate": 9.619531280521493e-06, - "loss": 0.3699, - "step": 6541 - }, - { - "epoch": 0.42755375465655837, - "grad_norm": 0.47225064039230347, - "learning_rate": 9.619397662556434e-06, - "loss": 0.3946, - "step": 6542 - }, - { - "epoch": 0.4276191098621005, - "grad_norm": 0.44583478569984436, - "learning_rate": 9.619264022060974e-06, - "loss": 0.396, - "step": 6543 - }, - { - "epoch": 0.4276844650676426, - "grad_norm": 0.48455506563186646, - "learning_rate": 9.619130359035765e-06, - "loss": 0.4289, - "step": 6544 - }, - { - "epoch": 0.42774982027318476, - "grad_norm": 0.4884521961212158, - "learning_rate": 9.618996673481453e-06, - "loss": 0.3857, - "step": 6545 - }, - { - "epoch": 0.4278151754787269, - "grad_norm": 0.5418428778648376, - "learning_rate": 9.618862965398696e-06, - "loss": 0.4713, - "step": 6546 - }, - { - "epoch": 0.427880530684269, - "grad_norm": 0.46160823106765747, - "learning_rate": 9.618729234788144e-06, - "loss": 0.3885, - "step": 6547 - }, - { - "epoch": 0.42794588588981114, - "grad_norm": 0.45779356360435486, - "learning_rate": 9.61859548165045e-06, - "loss": 0.3828, - "step": 6548 - }, - { - "epoch": 0.42801124109535327, - "grad_norm": 0.4436955749988556, - "learning_rate": 9.618461705986265e-06, - "loss": 0.3879, - "step": 6549 - }, - { - "epoch": 0.42807659630089534, - "grad_norm": 0.48656827211380005, - "learning_rate": 9.618327907796244e-06, - "loss": 0.3839, - "step": 6550 - }, - { - "epoch": 0.42814195150643747, - "grad_norm": 0.4393969476222992, - "learning_rate": 9.618194087081037e-06, - "loss": 0.3826, - "step": 6551 - }, - { - "epoch": 0.4282073067119796, - "grad_norm": 0.4598098695278168, - "learning_rate": 9.618060243841298e-06, - "loss": 0.4324, - "step": 6552 - }, - { - "epoch": 0.4282726619175217, - "grad_norm": 0.45974496006965637, - "learning_rate": 9.617926378077678e-06, - "loss": 0.3863, - "step": 6553 - }, - { - "epoch": 0.42833801712306385, - "grad_norm": 0.5933836102485657, - "learning_rate": 9.617792489790831e-06, - "loss": 0.3986, - "step": 6554 - }, - { - "epoch": 0.428403372328606, - "grad_norm": 0.473839670419693, - "learning_rate": 9.617658578981412e-06, - "loss": 0.4184, - "step": 6555 - }, - { - "epoch": 0.4284687275341481, - "grad_norm": 0.47087037563323975, - "learning_rate": 9.617524645650071e-06, - "loss": 0.4181, - "step": 6556 - }, - { - "epoch": 0.42853408273969024, - "grad_norm": 0.4623969793319702, - "learning_rate": 9.617390689797464e-06, - "loss": 0.3599, - "step": 6557 - }, - { - "epoch": 0.42859943794523236, - "grad_norm": 0.4589996933937073, - "learning_rate": 9.617256711424241e-06, - "loss": 0.3902, - "step": 6558 - }, - { - "epoch": 0.42866479315077444, - "grad_norm": 0.5123886466026306, - "learning_rate": 9.61712271053106e-06, - "loss": 0.4765, - "step": 6559 - }, - { - "epoch": 0.42873014835631656, - "grad_norm": 0.4635045528411865, - "learning_rate": 9.61698868711857e-06, - "loss": 0.3764, - "step": 6560 - }, - { - "epoch": 0.4287955035618587, - "grad_norm": 0.441690593957901, - "learning_rate": 9.616854641187426e-06, - "loss": 0.3511, - "step": 6561 - }, - { - "epoch": 0.4288608587674008, - "grad_norm": 0.44375911355018616, - "learning_rate": 9.616720572738285e-06, - "loss": 0.3672, - "step": 6562 - }, - { - "epoch": 0.42892621397294295, - "grad_norm": 0.46911972761154175, - "learning_rate": 9.616586481771797e-06, - "loss": 0.4233, - "step": 6563 - }, - { - "epoch": 0.4289915691784851, - "grad_norm": 0.4201802611351013, - "learning_rate": 9.616452368288616e-06, - "loss": 0.3447, - "step": 6564 - }, - { - "epoch": 0.4290569243840272, - "grad_norm": 0.4446719288825989, - "learning_rate": 9.6163182322894e-06, - "loss": 0.3708, - "step": 6565 - }, - { - "epoch": 0.42912227958956933, - "grad_norm": 0.4703514575958252, - "learning_rate": 9.616184073774798e-06, - "loss": 0.4014, - "step": 6566 - }, - { - "epoch": 0.4291876347951114, - "grad_norm": 0.5087060332298279, - "learning_rate": 9.61604989274547e-06, - "loss": 0.4018, - "step": 6567 - }, - { - "epoch": 0.42925299000065353, - "grad_norm": 0.4773683547973633, - "learning_rate": 9.615915689202066e-06, - "loss": 0.4229, - "step": 6568 - }, - { - "epoch": 0.42931834520619566, - "grad_norm": 0.48420435190200806, - "learning_rate": 9.615781463145244e-06, - "loss": 0.4031, - "step": 6569 - }, - { - "epoch": 0.4293837004117378, - "grad_norm": 0.4794633090496063, - "learning_rate": 9.615647214575655e-06, - "loss": 0.3943, - "step": 6570 - }, - { - "epoch": 0.4294490556172799, - "grad_norm": 0.4491889178752899, - "learning_rate": 9.615512943493955e-06, - "loss": 0.3662, - "step": 6571 - }, - { - "epoch": 0.42951441082282205, - "grad_norm": 0.4783543348312378, - "learning_rate": 9.6153786499008e-06, - "loss": 0.4304, - "step": 6572 - }, - { - "epoch": 0.4295797660283642, - "grad_norm": 0.43879467248916626, - "learning_rate": 9.615244333796844e-06, - "loss": 0.3744, - "step": 6573 - }, - { - "epoch": 0.4296451212339063, - "grad_norm": 0.44893524050712585, - "learning_rate": 9.615109995182744e-06, - "loss": 0.3888, - "step": 6574 - }, - { - "epoch": 0.4297104764394484, - "grad_norm": 0.4799850285053253, - "learning_rate": 9.614975634059152e-06, - "loss": 0.4486, - "step": 6575 - }, - { - "epoch": 0.4297758316449905, - "grad_norm": 0.4692569077014923, - "learning_rate": 9.614841250426726e-06, - "loss": 0.4038, - "step": 6576 - }, - { - "epoch": 0.42984118685053263, - "grad_norm": 0.4473058879375458, - "learning_rate": 9.614706844286122e-06, - "loss": 0.3885, - "step": 6577 - }, - { - "epoch": 0.42990654205607476, - "grad_norm": 0.469270259141922, - "learning_rate": 9.614572415637991e-06, - "loss": 0.4171, - "step": 6578 - }, - { - "epoch": 0.4299718972616169, - "grad_norm": 0.43836092948913574, - "learning_rate": 9.614437964482993e-06, - "loss": 0.3768, - "step": 6579 - }, - { - "epoch": 0.430037252467159, - "grad_norm": 0.4554644525051117, - "learning_rate": 9.614303490821783e-06, - "loss": 0.352, - "step": 6580 - }, - { - "epoch": 0.43010260767270114, - "grad_norm": 0.4886828064918518, - "learning_rate": 9.614168994655016e-06, - "loss": 0.4283, - "step": 6581 - }, - { - "epoch": 0.43016796287824327, - "grad_norm": 0.46111127734184265, - "learning_rate": 9.614034475983347e-06, - "loss": 0.3735, - "step": 6582 - }, - { - "epoch": 0.4302333180837854, - "grad_norm": 0.4684242904186249, - "learning_rate": 9.613899934807435e-06, - "loss": 0.4032, - "step": 6583 - }, - { - "epoch": 0.43029867328932747, - "grad_norm": 0.47861090302467346, - "learning_rate": 9.613765371127935e-06, - "loss": 0.3815, - "step": 6584 - }, - { - "epoch": 0.4303640284948696, - "grad_norm": 0.4744029939174652, - "learning_rate": 9.613630784945501e-06, - "loss": 0.3965, - "step": 6585 - }, - { - "epoch": 0.4304293837004117, - "grad_norm": 0.5068906545639038, - "learning_rate": 9.613496176260793e-06, - "loss": 0.4415, - "step": 6586 - }, - { - "epoch": 0.43049473890595386, - "grad_norm": 0.449878066778183, - "learning_rate": 9.613361545074465e-06, - "loss": 0.3999, - "step": 6587 - }, - { - "epoch": 0.430560094111496, - "grad_norm": 0.44311073422431946, - "learning_rate": 9.613226891387174e-06, - "loss": 0.384, - "step": 6588 - }, - { - "epoch": 0.4306254493170381, - "grad_norm": 0.44952529668807983, - "learning_rate": 9.613092215199577e-06, - "loss": 0.3478, - "step": 6589 - }, - { - "epoch": 0.43069080452258024, - "grad_norm": 0.454725056886673, - "learning_rate": 9.612957516512333e-06, - "loss": 0.3628, - "step": 6590 - }, - { - "epoch": 0.43075615972812237, - "grad_norm": 0.4151926338672638, - "learning_rate": 9.612822795326096e-06, - "loss": 0.3249, - "step": 6591 - }, - { - "epoch": 0.43082151493366444, - "grad_norm": 0.4689428210258484, - "learning_rate": 9.612688051641526e-06, - "loss": 0.4147, - "step": 6592 - }, - { - "epoch": 0.43088687013920657, - "grad_norm": 0.48327550292015076, - "learning_rate": 9.612553285459276e-06, - "loss": 0.3895, - "step": 6593 - }, - { - "epoch": 0.4309522253447487, - "grad_norm": 0.4606989622116089, - "learning_rate": 9.612418496780008e-06, - "loss": 0.4329, - "step": 6594 - }, - { - "epoch": 0.4310175805502908, - "grad_norm": 0.4373370409011841, - "learning_rate": 9.612283685604374e-06, - "loss": 0.3607, - "step": 6595 - }, - { - "epoch": 0.43108293575583295, - "grad_norm": 0.4986790716648102, - "learning_rate": 9.612148851933037e-06, - "loss": 0.467, - "step": 6596 - }, - { - "epoch": 0.4311482909613751, - "grad_norm": 0.45467954874038696, - "learning_rate": 9.612013995766652e-06, - "loss": 0.3826, - "step": 6597 - }, - { - "epoch": 0.4312136461669172, - "grad_norm": 0.4827488958835602, - "learning_rate": 9.611879117105876e-06, - "loss": 0.4219, - "step": 6598 - }, - { - "epoch": 0.43127900137245934, - "grad_norm": 0.4785784184932709, - "learning_rate": 9.611744215951369e-06, - "loss": 0.4181, - "step": 6599 - }, - { - "epoch": 0.43134435657800146, - "grad_norm": 0.4393242597579956, - "learning_rate": 9.611609292303787e-06, - "loss": 0.3263, - "step": 6600 - }, - { - "epoch": 0.43140971178354354, - "grad_norm": 0.45603376626968384, - "learning_rate": 9.611474346163788e-06, - "loss": 0.4086, - "step": 6601 - }, - { - "epoch": 0.43147506698908566, - "grad_norm": 0.4390104413032532, - "learning_rate": 9.611339377532031e-06, - "loss": 0.3695, - "step": 6602 - }, - { - "epoch": 0.4315404221946278, - "grad_norm": 0.41211044788360596, - "learning_rate": 9.611204386409176e-06, - "loss": 0.3459, - "step": 6603 - }, - { - "epoch": 0.4316057774001699, - "grad_norm": 0.4459226429462433, - "learning_rate": 9.61106937279588e-06, - "loss": 0.3845, - "step": 6604 - }, - { - "epoch": 0.43167113260571205, - "grad_norm": 0.45860961079597473, - "learning_rate": 9.610934336692799e-06, - "loss": 0.4194, - "step": 6605 - }, - { - "epoch": 0.4317364878112542, - "grad_norm": 0.47973817586898804, - "learning_rate": 9.610799278100595e-06, - "loss": 0.4071, - "step": 6606 - }, - { - "epoch": 0.4318018430167963, - "grad_norm": 0.5148317217826843, - "learning_rate": 9.610664197019927e-06, - "loss": 0.4215, - "step": 6607 - }, - { - "epoch": 0.43186719822233843, - "grad_norm": 0.4817494750022888, - "learning_rate": 9.610529093451451e-06, - "loss": 0.368, - "step": 6608 - }, - { - "epoch": 0.4319325534278805, - "grad_norm": 0.5141207575798035, - "learning_rate": 9.610393967395827e-06, - "loss": 0.4433, - "step": 6609 - }, - { - "epoch": 0.43199790863342263, - "grad_norm": 0.4598926305770874, - "learning_rate": 9.610258818853716e-06, - "loss": 0.4034, - "step": 6610 - }, - { - "epoch": 0.43206326383896476, - "grad_norm": 0.48298901319503784, - "learning_rate": 9.610123647825775e-06, - "loss": 0.4176, - "step": 6611 - }, - { - "epoch": 0.4321286190445069, - "grad_norm": 0.4944208860397339, - "learning_rate": 9.609988454312664e-06, - "loss": 0.4159, - "step": 6612 - }, - { - "epoch": 0.432193974250049, - "grad_norm": 0.5217379331588745, - "learning_rate": 9.609853238315041e-06, - "loss": 0.4539, - "step": 6613 - }, - { - "epoch": 0.43225932945559115, - "grad_norm": 0.4784821569919586, - "learning_rate": 9.609717999833568e-06, - "loss": 0.4028, - "step": 6614 - }, - { - "epoch": 0.4323246846611333, - "grad_norm": 0.49810606241226196, - "learning_rate": 9.609582738868903e-06, - "loss": 0.4603, - "step": 6615 - }, - { - "epoch": 0.4323900398666754, - "grad_norm": 0.49869388341903687, - "learning_rate": 9.609447455421706e-06, - "loss": 0.4198, - "step": 6616 - }, - { - "epoch": 0.4324553950722175, - "grad_norm": 0.4355716407299042, - "learning_rate": 9.609312149492636e-06, - "loss": 0.3627, - "step": 6617 - }, - { - "epoch": 0.4325207502777596, - "grad_norm": 0.434512734413147, - "learning_rate": 9.609176821082354e-06, - "loss": 0.3362, - "step": 6618 - }, - { - "epoch": 0.43258610548330173, - "grad_norm": 0.46171969175338745, - "learning_rate": 9.60904147019152e-06, - "loss": 0.3941, - "step": 6619 - }, - { - "epoch": 0.43265146068884386, - "grad_norm": 0.45991650223731995, - "learning_rate": 9.608906096820796e-06, - "loss": 0.3781, - "step": 6620 - }, - { - "epoch": 0.432716815894386, - "grad_norm": 0.9240472912788391, - "learning_rate": 9.608770700970838e-06, - "loss": 0.4062, - "step": 6621 - }, - { - "epoch": 0.4327821710999281, - "grad_norm": 0.48443368077278137, - "learning_rate": 9.60863528264231e-06, - "loss": 0.4296, - "step": 6622 - }, - { - "epoch": 0.43284752630547024, - "grad_norm": 0.48755085468292236, - "learning_rate": 9.60849984183587e-06, - "loss": 0.3904, - "step": 6623 - }, - { - "epoch": 0.43291288151101237, - "grad_norm": 0.47976285219192505, - "learning_rate": 9.608364378552181e-06, - "loss": 0.4182, - "step": 6624 - }, - { - "epoch": 0.4329782367165545, - "grad_norm": 0.4211183786392212, - "learning_rate": 9.608228892791902e-06, - "loss": 0.3518, - "step": 6625 - }, - { - "epoch": 0.43304359192209657, - "grad_norm": 0.45069074630737305, - "learning_rate": 9.608093384555695e-06, - "loss": 0.3975, - "step": 6626 - }, - { - "epoch": 0.4331089471276387, - "grad_norm": 0.5069361329078674, - "learning_rate": 9.607957853844218e-06, - "loss": 0.4385, - "step": 6627 - }, - { - "epoch": 0.4331743023331808, - "grad_norm": 0.5243623852729797, - "learning_rate": 9.607822300658136e-06, - "loss": 0.4152, - "step": 6628 - }, - { - "epoch": 0.43323965753872296, - "grad_norm": 0.456946462392807, - "learning_rate": 9.607686724998106e-06, - "loss": 0.4219, - "step": 6629 - }, - { - "epoch": 0.4333050127442651, - "grad_norm": 0.4868513345718384, - "learning_rate": 9.607551126864794e-06, - "loss": 0.4491, - "step": 6630 - }, - { - "epoch": 0.4333703679498072, - "grad_norm": 0.4848618805408478, - "learning_rate": 9.60741550625886e-06, - "loss": 0.4436, - "step": 6631 - }, - { - "epoch": 0.43343572315534934, - "grad_norm": 0.4648330807685852, - "learning_rate": 9.60727986318096e-06, - "loss": 0.4258, - "step": 6632 - }, - { - "epoch": 0.43350107836089147, - "grad_norm": 0.4540043771266937, - "learning_rate": 9.607144197631764e-06, - "loss": 0.3961, - "step": 6633 - }, - { - "epoch": 0.43356643356643354, - "grad_norm": 0.46273645758628845, - "learning_rate": 9.607008509611928e-06, - "loss": 0.3933, - "step": 6634 - }, - { - "epoch": 0.43363178877197567, - "grad_norm": 0.499675989151001, - "learning_rate": 9.606872799122115e-06, - "loss": 0.4195, - "step": 6635 - }, - { - "epoch": 0.4336971439775178, - "grad_norm": 0.45402762293815613, - "learning_rate": 9.60673706616299e-06, - "loss": 0.3823, - "step": 6636 - }, - { - "epoch": 0.4337624991830599, - "grad_norm": 0.45103034377098083, - "learning_rate": 9.60660131073521e-06, - "loss": 0.3487, - "step": 6637 - }, - { - "epoch": 0.43382785438860205, - "grad_norm": 0.4304608702659607, - "learning_rate": 9.60646553283944e-06, - "loss": 0.3297, - "step": 6638 - }, - { - "epoch": 0.4338932095941442, - "grad_norm": 0.46604615449905396, - "learning_rate": 9.606329732476343e-06, - "loss": 0.4074, - "step": 6639 - }, - { - "epoch": 0.4339585647996863, - "grad_norm": 0.42506149411201477, - "learning_rate": 9.606193909646579e-06, - "loss": 0.3117, - "step": 6640 - }, - { - "epoch": 0.43402392000522844, - "grad_norm": 0.473295122385025, - "learning_rate": 9.60605806435081e-06, - "loss": 0.4023, - "step": 6641 - }, - { - "epoch": 0.43408927521077056, - "grad_norm": 0.4828851521015167, - "learning_rate": 9.605922196589704e-06, - "loss": 0.4079, - "step": 6642 - }, - { - "epoch": 0.43415463041631264, - "grad_norm": 0.4616178572177887, - "learning_rate": 9.605786306363916e-06, - "loss": 0.3928, - "step": 6643 - }, - { - "epoch": 0.43421998562185476, - "grad_norm": 0.4404052197933197, - "learning_rate": 9.605650393674114e-06, - "loss": 0.381, - "step": 6644 - }, - { - "epoch": 0.4342853408273969, - "grad_norm": 0.4153364896774292, - "learning_rate": 9.605514458520959e-06, - "loss": 0.347, - "step": 6645 - }, - { - "epoch": 0.434350696032939, - "grad_norm": 0.4468926787376404, - "learning_rate": 9.605378500905116e-06, - "loss": 0.3646, - "step": 6646 - }, - { - "epoch": 0.43441605123848115, - "grad_norm": 0.48296794295310974, - "learning_rate": 9.605242520827245e-06, - "loss": 0.4171, - "step": 6647 - }, - { - "epoch": 0.4344814064440233, - "grad_norm": 0.4960406422615051, - "learning_rate": 9.605106518288012e-06, - "loss": 0.4635, - "step": 6648 - }, - { - "epoch": 0.4345467616495654, - "grad_norm": 0.45715218782424927, - "learning_rate": 9.604970493288078e-06, - "loss": 0.4043, - "step": 6649 - }, - { - "epoch": 0.43461211685510753, - "grad_norm": 0.47133275866508484, - "learning_rate": 9.604834445828109e-06, - "loss": 0.3652, - "step": 6650 - }, - { - "epoch": 0.4346774720606496, - "grad_norm": 0.43846094608306885, - "learning_rate": 9.604698375908766e-06, - "loss": 0.3651, - "step": 6651 - }, - { - "epoch": 0.43474282726619173, - "grad_norm": 0.4737315773963928, - "learning_rate": 9.604562283530714e-06, - "loss": 0.3809, - "step": 6652 - }, - { - "epoch": 0.43480818247173386, - "grad_norm": 0.4771938920021057, - "learning_rate": 9.604426168694618e-06, - "loss": 0.4058, - "step": 6653 - }, - { - "epoch": 0.434873537677276, - "grad_norm": 0.48013123869895935, - "learning_rate": 9.604290031401137e-06, - "loss": 0.4381, - "step": 6654 - }, - { - "epoch": 0.4349388928828181, - "grad_norm": 0.4409782886505127, - "learning_rate": 9.604153871650942e-06, - "loss": 0.3935, - "step": 6655 - }, - { - "epoch": 0.43500424808836025, - "grad_norm": 0.4736935496330261, - "learning_rate": 9.604017689444691e-06, - "loss": 0.3832, - "step": 6656 - }, - { - "epoch": 0.4350696032939024, - "grad_norm": 0.451788067817688, - "learning_rate": 9.603881484783054e-06, - "loss": 0.3746, - "step": 6657 - }, - { - "epoch": 0.4351349584994445, - "grad_norm": 0.45443475246429443, - "learning_rate": 9.60374525766669e-06, - "loss": 0.4336, - "step": 6658 - }, - { - "epoch": 0.43520031370498663, - "grad_norm": 0.44025322794914246, - "learning_rate": 9.603609008096265e-06, - "loss": 0.3863, - "step": 6659 - }, - { - "epoch": 0.4352656689105287, - "grad_norm": 0.5253258943557739, - "learning_rate": 9.603472736072443e-06, - "loss": 0.4156, - "step": 6660 - }, - { - "epoch": 0.43533102411607083, - "grad_norm": 0.4346178472042084, - "learning_rate": 9.603336441595892e-06, - "loss": 0.3726, - "step": 6661 - }, - { - "epoch": 0.43539637932161296, - "grad_norm": 0.48458564281463623, - "learning_rate": 9.603200124667273e-06, - "loss": 0.4368, - "step": 6662 - }, - { - "epoch": 0.4354617345271551, - "grad_norm": 0.4136316180229187, - "learning_rate": 9.603063785287252e-06, - "loss": 0.3479, - "step": 6663 - }, - { - "epoch": 0.4355270897326972, - "grad_norm": 0.4404667615890503, - "learning_rate": 9.602927423456497e-06, - "loss": 0.3883, - "step": 6664 - }, - { - "epoch": 0.43559244493823934, - "grad_norm": 0.44658559560775757, - "learning_rate": 9.602791039175668e-06, - "loss": 0.377, - "step": 6665 - }, - { - "epoch": 0.43565780014378147, - "grad_norm": 0.4379778802394867, - "learning_rate": 9.602654632445434e-06, - "loss": 0.3947, - "step": 6666 - }, - { - "epoch": 0.4357231553493236, - "grad_norm": 0.4784342348575592, - "learning_rate": 9.602518203266456e-06, - "loss": 0.4134, - "step": 6667 - }, - { - "epoch": 0.43578851055486567, - "grad_norm": 0.40214741230010986, - "learning_rate": 9.602381751639405e-06, - "loss": 0.3257, - "step": 6668 - }, - { - "epoch": 0.4358538657604078, - "grad_norm": 0.47082242369651794, - "learning_rate": 9.602245277564944e-06, - "loss": 0.4101, - "step": 6669 - }, - { - "epoch": 0.4359192209659499, - "grad_norm": 0.47592031955718994, - "learning_rate": 9.602108781043735e-06, - "loss": 0.4164, - "step": 6670 - }, - { - "epoch": 0.43598457617149206, - "grad_norm": 0.43255478143692017, - "learning_rate": 9.601972262076452e-06, - "loss": 0.3636, - "step": 6671 - }, - { - "epoch": 0.4360499313770342, - "grad_norm": 0.43682658672332764, - "learning_rate": 9.601835720663752e-06, - "loss": 0.3748, - "step": 6672 - }, - { - "epoch": 0.4361152865825763, - "grad_norm": 0.43344199657440186, - "learning_rate": 9.601699156806306e-06, - "loss": 0.3813, - "step": 6673 - }, - { - "epoch": 0.43618064178811844, - "grad_norm": 0.4494476318359375, - "learning_rate": 9.60156257050478e-06, - "loss": 0.372, - "step": 6674 - }, - { - "epoch": 0.43624599699366057, - "grad_norm": 0.44902560114860535, - "learning_rate": 9.601425961759837e-06, - "loss": 0.3584, - "step": 6675 - }, - { - "epoch": 0.43631135219920264, - "grad_norm": 0.4559073746204376, - "learning_rate": 9.601289330572149e-06, - "loss": 0.3785, - "step": 6676 - }, - { - "epoch": 0.43637670740474477, - "grad_norm": 0.4348633289337158, - "learning_rate": 9.601152676942376e-06, - "loss": 0.3499, - "step": 6677 - }, - { - "epoch": 0.4364420626102869, - "grad_norm": 0.4531196355819702, - "learning_rate": 9.601016000871189e-06, - "loss": 0.4095, - "step": 6678 - }, - { - "epoch": 0.436507417815829, - "grad_norm": 0.4794025719165802, - "learning_rate": 9.600879302359253e-06, - "loss": 0.4136, - "step": 6679 - }, - { - "epoch": 0.43657277302137115, - "grad_norm": 0.46024563908576965, - "learning_rate": 9.600742581407234e-06, - "loss": 0.3687, - "step": 6680 - }, - { - "epoch": 0.4366381282269133, - "grad_norm": 0.4755650758743286, - "learning_rate": 9.6006058380158e-06, - "loss": 0.4086, - "step": 6681 - }, - { - "epoch": 0.4367034834324554, - "grad_norm": 0.4605053663253784, - "learning_rate": 9.600469072185616e-06, - "loss": 0.3909, - "step": 6682 - }, - { - "epoch": 0.43676883863799754, - "grad_norm": 0.4819033145904541, - "learning_rate": 9.600332283917352e-06, - "loss": 0.4149, - "step": 6683 - }, - { - "epoch": 0.43683419384353966, - "grad_norm": 0.42655113339424133, - "learning_rate": 9.600195473211676e-06, - "loss": 0.3483, - "step": 6684 - }, - { - "epoch": 0.43689954904908174, - "grad_norm": 0.4536789059638977, - "learning_rate": 9.600058640069249e-06, - "loss": 0.4081, - "step": 6685 - }, - { - "epoch": 0.43696490425462386, - "grad_norm": 0.48175111413002014, - "learning_rate": 9.599921784490745e-06, - "loss": 0.4044, - "step": 6686 - }, - { - "epoch": 0.437030259460166, - "grad_norm": 0.4394722878932953, - "learning_rate": 9.599784906476827e-06, - "loss": 0.369, - "step": 6687 - }, - { - "epoch": 0.4370956146657081, - "grad_norm": 0.45626410841941833, - "learning_rate": 9.599648006028166e-06, - "loss": 0.3925, - "step": 6688 - }, - { - "epoch": 0.43716096987125025, - "grad_norm": 0.47347065806388855, - "learning_rate": 9.599511083145427e-06, - "loss": 0.4162, - "step": 6689 - }, - { - "epoch": 0.4372263250767924, - "grad_norm": 0.4870966076850891, - "learning_rate": 9.59937413782928e-06, - "loss": 0.4259, - "step": 6690 - }, - { - "epoch": 0.4372916802823345, - "grad_norm": 0.4528791308403015, - "learning_rate": 9.599237170080391e-06, - "loss": 0.3816, - "step": 6691 - }, - { - "epoch": 0.43735703548787663, - "grad_norm": 0.4730829894542694, - "learning_rate": 9.59910017989943e-06, - "loss": 0.4202, - "step": 6692 - }, - { - "epoch": 0.4374223906934187, - "grad_norm": 0.4537069797515869, - "learning_rate": 9.598963167287064e-06, - "loss": 0.4044, - "step": 6693 - }, - { - "epoch": 0.43748774589896083, - "grad_norm": 0.4376910626888275, - "learning_rate": 9.59882613224396e-06, - "loss": 0.3798, - "step": 6694 - }, - { - "epoch": 0.43755310110450296, - "grad_norm": 0.4318070113658905, - "learning_rate": 9.59868907477079e-06, - "loss": 0.3898, - "step": 6695 - }, - { - "epoch": 0.4376184563100451, - "grad_norm": 0.43683746457099915, - "learning_rate": 9.598551994868219e-06, - "loss": 0.3551, - "step": 6696 - }, - { - "epoch": 0.4376838115155872, - "grad_norm": 0.4378679692745209, - "learning_rate": 9.598414892536917e-06, - "loss": 0.391, - "step": 6697 - }, - { - "epoch": 0.43774916672112935, - "grad_norm": 0.46658703684806824, - "learning_rate": 9.598277767777553e-06, - "loss": 0.4143, - "step": 6698 - }, - { - "epoch": 0.4378145219266715, - "grad_norm": 0.4765806198120117, - "learning_rate": 9.598140620590794e-06, - "loss": 0.3998, - "step": 6699 - }, - { - "epoch": 0.4378798771322136, - "grad_norm": 0.48689937591552734, - "learning_rate": 9.59800345097731e-06, - "loss": 0.4645, - "step": 6700 - }, - { - "epoch": 0.43794523233775573, - "grad_norm": 0.43289056420326233, - "learning_rate": 9.597866258937774e-06, - "loss": 0.3401, - "step": 6701 - }, - { - "epoch": 0.4380105875432978, - "grad_norm": 0.4396793842315674, - "learning_rate": 9.597729044472847e-06, - "loss": 0.3612, - "step": 6702 - }, - { - "epoch": 0.43807594274883993, - "grad_norm": 0.4856289327144623, - "learning_rate": 9.597591807583206e-06, - "loss": 0.4165, - "step": 6703 - }, - { - "epoch": 0.43814129795438206, - "grad_norm": 0.4623776376247406, - "learning_rate": 9.597454548269514e-06, - "loss": 0.3631, - "step": 6704 - }, - { - "epoch": 0.4382066531599242, - "grad_norm": 0.44471532106399536, - "learning_rate": 9.597317266532446e-06, - "loss": 0.3867, - "step": 6705 - }, - { - "epoch": 0.4382720083654663, - "grad_norm": 0.47742483019828796, - "learning_rate": 9.597179962372668e-06, - "loss": 0.3764, - "step": 6706 - }, - { - "epoch": 0.43833736357100844, - "grad_norm": 0.4707384407520294, - "learning_rate": 9.59704263579085e-06, - "loss": 0.4285, - "step": 6707 - }, - { - "epoch": 0.43840271877655057, - "grad_norm": 0.4595271348953247, - "learning_rate": 9.596905286787663e-06, - "loss": 0.4254, - "step": 6708 - }, - { - "epoch": 0.4384680739820927, - "grad_norm": 0.4742881953716278, - "learning_rate": 9.596767915363777e-06, - "loss": 0.45, - "step": 6709 - }, - { - "epoch": 0.43853342918763477, - "grad_norm": 0.42788028717041016, - "learning_rate": 9.59663052151986e-06, - "loss": 0.3547, - "step": 6710 - }, - { - "epoch": 0.4385987843931769, - "grad_norm": 0.4922092854976654, - "learning_rate": 9.596493105256584e-06, - "loss": 0.4076, - "step": 6711 - }, - { - "epoch": 0.438664139598719, - "grad_norm": 0.480979859828949, - "learning_rate": 9.59635566657462e-06, - "loss": 0.4404, - "step": 6712 - }, - { - "epoch": 0.43872949480426116, - "grad_norm": 0.5063999891281128, - "learning_rate": 9.596218205474637e-06, - "loss": 0.4622, - "step": 6713 - }, - { - "epoch": 0.4387948500098033, - "grad_norm": 0.42097312211990356, - "learning_rate": 9.596080721957304e-06, - "loss": 0.3682, - "step": 6714 - }, - { - "epoch": 0.4388602052153454, - "grad_norm": 0.4473300278186798, - "learning_rate": 9.595943216023293e-06, - "loss": 0.3855, - "step": 6715 - }, - { - "epoch": 0.43892556042088754, - "grad_norm": 0.4737445116043091, - "learning_rate": 9.595805687673276e-06, - "loss": 0.387, - "step": 6716 - }, - { - "epoch": 0.43899091562642967, - "grad_norm": 0.4513935148715973, - "learning_rate": 9.595668136907924e-06, - "loss": 0.3955, - "step": 6717 - }, - { - "epoch": 0.43905627083197174, - "grad_norm": 0.4501236379146576, - "learning_rate": 9.595530563727904e-06, - "loss": 0.3791, - "step": 6718 - }, - { - "epoch": 0.43912162603751387, - "grad_norm": 0.46202975511550903, - "learning_rate": 9.59539296813389e-06, - "loss": 0.3753, - "step": 6719 - }, - { - "epoch": 0.439186981243056, - "grad_norm": 0.44744008779525757, - "learning_rate": 9.59525535012655e-06, - "loss": 0.3958, - "step": 6720 - }, - { - "epoch": 0.4392523364485981, - "grad_norm": 0.4539359211921692, - "learning_rate": 9.595117709706562e-06, - "loss": 0.3914, - "step": 6721 - }, - { - "epoch": 0.43931769165414025, - "grad_norm": 0.48395809531211853, - "learning_rate": 9.59498004687459e-06, - "loss": 0.4611, - "step": 6722 - }, - { - "epoch": 0.4393830468596824, - "grad_norm": 0.4520648419857025, - "learning_rate": 9.59484236163131e-06, - "loss": 0.3771, - "step": 6723 - }, - { - "epoch": 0.4394484020652245, - "grad_norm": 0.42805686593055725, - "learning_rate": 9.594704653977392e-06, - "loss": 0.3749, - "step": 6724 - }, - { - "epoch": 0.43951375727076664, - "grad_norm": 0.48044294118881226, - "learning_rate": 9.594566923913506e-06, - "loss": 0.4229, - "step": 6725 - }, - { - "epoch": 0.43957911247630876, - "grad_norm": 0.43216997385025024, - "learning_rate": 9.594429171440328e-06, - "loss": 0.3678, - "step": 6726 - }, - { - "epoch": 0.43964446768185084, - "grad_norm": 0.44116532802581787, - "learning_rate": 9.594291396558526e-06, - "loss": 0.3945, - "step": 6727 - }, - { - "epoch": 0.43970982288739296, - "grad_norm": 0.4396519362926483, - "learning_rate": 9.594153599268773e-06, - "loss": 0.3574, - "step": 6728 - }, - { - "epoch": 0.4397751780929351, - "grad_norm": 0.4698348641395569, - "learning_rate": 9.594015779571741e-06, - "loss": 0.4113, - "step": 6729 - }, - { - "epoch": 0.4398405332984772, - "grad_norm": 0.44042375683784485, - "learning_rate": 9.593877937468104e-06, - "loss": 0.3623, - "step": 6730 - }, - { - "epoch": 0.43990588850401935, - "grad_norm": 0.4320286810398102, - "learning_rate": 9.593740072958531e-06, - "loss": 0.3746, - "step": 6731 - }, - { - "epoch": 0.4399712437095615, - "grad_norm": 0.463545560836792, - "learning_rate": 9.593602186043698e-06, - "loss": 0.412, - "step": 6732 - }, - { - "epoch": 0.4400365989151036, - "grad_norm": 0.42265942692756653, - "learning_rate": 9.593464276724273e-06, - "loss": 0.3622, - "step": 6733 - }, - { - "epoch": 0.44010195412064573, - "grad_norm": 0.4521913230419159, - "learning_rate": 9.593326345000935e-06, - "loss": 0.3623, - "step": 6734 - }, - { - "epoch": 0.4401673093261878, - "grad_norm": 0.4236926734447479, - "learning_rate": 9.59318839087435e-06, - "loss": 0.3446, - "step": 6735 - }, - { - "epoch": 0.44023266453172993, - "grad_norm": 0.4713384509086609, - "learning_rate": 9.593050414345197e-06, - "loss": 0.4078, - "step": 6736 - }, - { - "epoch": 0.44029801973727206, - "grad_norm": 0.43656012415885925, - "learning_rate": 9.592912415414145e-06, - "loss": 0.3451, - "step": 6737 - }, - { - "epoch": 0.4403633749428142, - "grad_norm": 0.44159042835235596, - "learning_rate": 9.592774394081867e-06, - "loss": 0.374, - "step": 6738 - }, - { - "epoch": 0.4404287301483563, - "grad_norm": 0.47376754879951477, - "learning_rate": 9.592636350349036e-06, - "loss": 0.4055, - "step": 6739 - }, - { - "epoch": 0.44049408535389845, - "grad_norm": 0.48617663979530334, - "learning_rate": 9.592498284216328e-06, - "loss": 0.4265, - "step": 6740 - }, - { - "epoch": 0.4405594405594406, - "grad_norm": 0.47439900040626526, - "learning_rate": 9.592360195684417e-06, - "loss": 0.4048, - "step": 6741 - }, - { - "epoch": 0.4406247957649827, - "grad_norm": 0.4841964542865753, - "learning_rate": 9.59222208475397e-06, - "loss": 0.4097, - "step": 6742 - }, - { - "epoch": 0.44069015097052483, - "grad_norm": 0.4714210033416748, - "learning_rate": 9.592083951425668e-06, - "loss": 0.4126, - "step": 6743 - }, - { - "epoch": 0.4407555061760669, - "grad_norm": 0.46400946378707886, - "learning_rate": 9.591945795700181e-06, - "loss": 0.3896, - "step": 6744 - }, - { - "epoch": 0.44082086138160903, - "grad_norm": 0.47542211413383484, - "learning_rate": 9.591807617578184e-06, - "loss": 0.3663, - "step": 6745 - }, - { - "epoch": 0.44088621658715116, - "grad_norm": 0.4832114577293396, - "learning_rate": 9.59166941706035e-06, - "loss": 0.3592, - "step": 6746 - }, - { - "epoch": 0.4409515717926933, - "grad_norm": 0.4695150852203369, - "learning_rate": 9.591531194147352e-06, - "loss": 0.3695, - "step": 6747 - }, - { - "epoch": 0.4410169269982354, - "grad_norm": 0.4999988377094269, - "learning_rate": 9.591392948839867e-06, - "loss": 0.4373, - "step": 6748 - }, - { - "epoch": 0.44108228220377754, - "grad_norm": 0.45000162720680237, - "learning_rate": 9.591254681138568e-06, - "loss": 0.365, - "step": 6749 - }, - { - "epoch": 0.44114763740931967, - "grad_norm": 0.46372920274734497, - "learning_rate": 9.591116391044131e-06, - "loss": 0.3743, - "step": 6750 - }, - { - "epoch": 0.4412129926148618, - "grad_norm": 0.46051591634750366, - "learning_rate": 9.590978078557227e-06, - "loss": 0.3858, - "step": 6751 - }, - { - "epoch": 0.44127834782040387, - "grad_norm": 0.4539625942707062, - "learning_rate": 9.590839743678532e-06, - "loss": 0.3847, - "step": 6752 - }, - { - "epoch": 0.441343703025946, - "grad_norm": 0.4396856725215912, - "learning_rate": 9.590701386408723e-06, - "loss": 0.3595, - "step": 6753 - }, - { - "epoch": 0.4414090582314881, - "grad_norm": 0.4734814465045929, - "learning_rate": 9.590563006748472e-06, - "loss": 0.4586, - "step": 6754 - }, - { - "epoch": 0.44147441343703026, - "grad_norm": 0.4724050760269165, - "learning_rate": 9.590424604698455e-06, - "loss": 0.4052, - "step": 6755 - }, - { - "epoch": 0.4415397686425724, - "grad_norm": 0.44638800621032715, - "learning_rate": 9.590286180259347e-06, - "loss": 0.3669, - "step": 6756 - }, - { - "epoch": 0.4416051238481145, - "grad_norm": 0.44825056195259094, - "learning_rate": 9.590147733431823e-06, - "loss": 0.3405, - "step": 6757 - }, - { - "epoch": 0.44167047905365664, - "grad_norm": 0.46173352003097534, - "learning_rate": 9.590009264216557e-06, - "loss": 0.4144, - "step": 6758 - }, - { - "epoch": 0.44173583425919877, - "grad_norm": 0.4688032567501068, - "learning_rate": 9.589870772614227e-06, - "loss": 0.3929, - "step": 6759 - }, - { - "epoch": 0.44180118946474084, - "grad_norm": 0.4624279737472534, - "learning_rate": 9.589732258625508e-06, - "loss": 0.4148, - "step": 6760 - }, - { - "epoch": 0.44186654467028297, - "grad_norm": 0.5510392189025879, - "learning_rate": 9.589593722251074e-06, - "loss": 0.4373, - "step": 6761 - }, - { - "epoch": 0.4419318998758251, - "grad_norm": 0.46463918685913086, - "learning_rate": 9.589455163491601e-06, - "loss": 0.3678, - "step": 6762 - }, - { - "epoch": 0.4419972550813672, - "grad_norm": 0.47901713848114014, - "learning_rate": 9.589316582347766e-06, - "loss": 0.3773, - "step": 6763 - }, - { - "epoch": 0.44206261028690935, - "grad_norm": 0.43875572085380554, - "learning_rate": 9.589177978820244e-06, - "loss": 0.3904, - "step": 6764 - }, - { - "epoch": 0.4421279654924515, - "grad_norm": 0.4612356424331665, - "learning_rate": 9.58903935290971e-06, - "loss": 0.4024, - "step": 6765 - }, - { - "epoch": 0.4421933206979936, - "grad_norm": 0.4210243821144104, - "learning_rate": 9.588900704616841e-06, - "loss": 0.3601, - "step": 6766 - }, - { - "epoch": 0.44225867590353574, - "grad_norm": 0.46239984035491943, - "learning_rate": 9.588762033942316e-06, - "loss": 0.384, - "step": 6767 - }, - { - "epoch": 0.44232403110907786, - "grad_norm": 0.48965543508529663, - "learning_rate": 9.588623340886807e-06, - "loss": 0.4171, - "step": 6768 - }, - { - "epoch": 0.44238938631461994, - "grad_norm": 0.4603058099746704, - "learning_rate": 9.588484625450993e-06, - "loss": 0.4124, - "step": 6769 - }, - { - "epoch": 0.44245474152016206, - "grad_norm": 0.47310999035835266, - "learning_rate": 9.588345887635549e-06, - "loss": 0.4351, - "step": 6770 - }, - { - "epoch": 0.4425200967257042, - "grad_norm": 0.4399702847003937, - "learning_rate": 9.588207127441153e-06, - "loss": 0.342, - "step": 6771 - }, - { - "epoch": 0.4425854519312463, - "grad_norm": 0.4237600266933441, - "learning_rate": 9.588068344868482e-06, - "loss": 0.3306, - "step": 6772 - }, - { - "epoch": 0.44265080713678845, - "grad_norm": 0.4667479693889618, - "learning_rate": 9.587929539918212e-06, - "loss": 0.4316, - "step": 6773 - }, - { - "epoch": 0.4427161623423306, - "grad_norm": 0.42612531781196594, - "learning_rate": 9.587790712591018e-06, - "loss": 0.3685, - "step": 6774 - }, - { - "epoch": 0.4427815175478727, - "grad_norm": 0.46188637614250183, - "learning_rate": 9.587651862887582e-06, - "loss": 0.3896, - "step": 6775 - }, - { - "epoch": 0.44284687275341483, - "grad_norm": 0.43306100368499756, - "learning_rate": 9.587512990808578e-06, - "loss": 0.3615, - "step": 6776 - }, - { - "epoch": 0.4429122279589569, - "grad_norm": 0.4422586262226105, - "learning_rate": 9.587374096354685e-06, - "loss": 0.3931, - "step": 6777 - }, - { - "epoch": 0.44297758316449903, - "grad_norm": 0.47793295979499817, - "learning_rate": 9.587235179526578e-06, - "loss": 0.3986, - "step": 6778 - }, - { - "epoch": 0.44304293837004116, - "grad_norm": 0.4292960464954376, - "learning_rate": 9.587096240324935e-06, - "loss": 0.3387, - "step": 6779 - }, - { - "epoch": 0.4431082935755833, - "grad_norm": 0.4719371795654297, - "learning_rate": 9.586957278750436e-06, - "loss": 0.3838, - "step": 6780 - }, - { - "epoch": 0.4431736487811254, - "grad_norm": 0.45420241355895996, - "learning_rate": 9.586818294803756e-06, - "loss": 0.3659, - "step": 6781 - }, - { - "epoch": 0.44323900398666755, - "grad_norm": 0.42631033062934875, - "learning_rate": 9.586679288485575e-06, - "loss": 0.3518, - "step": 6782 - }, - { - "epoch": 0.4433043591922097, - "grad_norm": 0.5003683567047119, - "learning_rate": 9.58654025979657e-06, - "loss": 0.4471, - "step": 6783 - }, - { - "epoch": 0.4433697143977518, - "grad_norm": 0.4721042811870575, - "learning_rate": 9.586401208737419e-06, - "loss": 0.3909, - "step": 6784 - }, - { - "epoch": 0.44343506960329393, - "grad_norm": 0.4883996844291687, - "learning_rate": 9.5862621353088e-06, - "loss": 0.4348, - "step": 6785 - }, - { - "epoch": 0.443500424808836, - "grad_norm": 0.4878160059452057, - "learning_rate": 9.586123039511393e-06, - "loss": 0.437, - "step": 6786 - }, - { - "epoch": 0.44356578001437813, - "grad_norm": 0.47435104846954346, - "learning_rate": 9.585983921345875e-06, - "loss": 0.3554, - "step": 6787 - }, - { - "epoch": 0.44363113521992026, - "grad_norm": 0.4903028905391693, - "learning_rate": 9.585844780812922e-06, - "loss": 0.4556, - "step": 6788 - }, - { - "epoch": 0.4436964904254624, - "grad_norm": 0.44504037499427795, - "learning_rate": 9.58570561791322e-06, - "loss": 0.3913, - "step": 6789 - }, - { - "epoch": 0.4437618456310045, - "grad_norm": 0.4510614275932312, - "learning_rate": 9.58556643264744e-06, - "loss": 0.3747, - "step": 6790 - }, - { - "epoch": 0.44382720083654664, - "grad_norm": 0.48686614632606506, - "learning_rate": 9.585427225016264e-06, - "loss": 0.4173, - "step": 6791 - }, - { - "epoch": 0.44389255604208877, - "grad_norm": 0.4253038167953491, - "learning_rate": 9.585287995020371e-06, - "loss": 0.3401, - "step": 6792 - }, - { - "epoch": 0.4439579112476309, - "grad_norm": 0.4288231432437897, - "learning_rate": 9.58514874266044e-06, - "loss": 0.3617, - "step": 6793 - }, - { - "epoch": 0.44402326645317297, - "grad_norm": 0.4962444603443146, - "learning_rate": 9.58500946793715e-06, - "loss": 0.4344, - "step": 6794 - }, - { - "epoch": 0.4440886216587151, - "grad_norm": 0.3991558849811554, - "learning_rate": 9.584870170851182e-06, - "loss": 0.333, - "step": 6795 - }, - { - "epoch": 0.4441539768642572, - "grad_norm": 0.4364485740661621, - "learning_rate": 9.584730851403212e-06, - "loss": 0.3452, - "step": 6796 - }, - { - "epoch": 0.44421933206979936, - "grad_norm": 0.46209946274757385, - "learning_rate": 9.584591509593922e-06, - "loss": 0.4015, - "step": 6797 - }, - { - "epoch": 0.4442846872753415, - "grad_norm": 0.48259955644607544, - "learning_rate": 9.58445214542399e-06, - "loss": 0.3962, - "step": 6798 - }, - { - "epoch": 0.4443500424808836, - "grad_norm": 0.4641874432563782, - "learning_rate": 9.584312758894099e-06, - "loss": 0.3965, - "step": 6799 - }, - { - "epoch": 0.44441539768642574, - "grad_norm": 0.45316869020462036, - "learning_rate": 9.584173350004924e-06, - "loss": 0.4277, - "step": 6800 - }, - { - "epoch": 0.44448075289196787, - "grad_norm": 0.5300980806350708, - "learning_rate": 9.58403391875715e-06, - "loss": 0.4546, - "step": 6801 - }, - { - "epoch": 0.44454610809750994, - "grad_norm": 0.4747301936149597, - "learning_rate": 9.583894465151452e-06, - "loss": 0.3843, - "step": 6802 - }, - { - "epoch": 0.44461146330305207, - "grad_norm": 0.45713356137275696, - "learning_rate": 9.583754989188514e-06, - "loss": 0.3652, - "step": 6803 - }, - { - "epoch": 0.4446768185085942, - "grad_norm": 0.442258358001709, - "learning_rate": 9.583615490869014e-06, - "loss": 0.3867, - "step": 6804 - }, - { - "epoch": 0.4447421737141363, - "grad_norm": 0.488505095243454, - "learning_rate": 9.583475970193634e-06, - "loss": 0.434, - "step": 6805 - }, - { - "epoch": 0.44480752891967845, - "grad_norm": 0.4674488604068756, - "learning_rate": 9.583336427163054e-06, - "loss": 0.3893, - "step": 6806 - }, - { - "epoch": 0.4448728841252206, - "grad_norm": 0.4637359082698822, - "learning_rate": 9.583196861777955e-06, - "loss": 0.3814, - "step": 6807 - }, - { - "epoch": 0.4449382393307627, - "grad_norm": 0.42842769622802734, - "learning_rate": 9.583057274039016e-06, - "loss": 0.3652, - "step": 6808 - }, - { - "epoch": 0.44500359453630484, - "grad_norm": 0.4319152235984802, - "learning_rate": 9.58291766394692e-06, - "loss": 0.3533, - "step": 6809 - }, - { - "epoch": 0.44506894974184696, - "grad_norm": 0.530834436416626, - "learning_rate": 9.582778031502347e-06, - "loss": 0.4572, - "step": 6810 - }, - { - "epoch": 0.44513430494738904, - "grad_norm": 0.4620935022830963, - "learning_rate": 9.582638376705976e-06, - "loss": 0.3921, - "step": 6811 - }, - { - "epoch": 0.44519966015293116, - "grad_norm": 0.43870776891708374, - "learning_rate": 9.582498699558492e-06, - "loss": 0.3411, - "step": 6812 - }, - { - "epoch": 0.4452650153584733, - "grad_norm": 0.46446239948272705, - "learning_rate": 9.582359000060572e-06, - "loss": 0.4065, - "step": 6813 - }, - { - "epoch": 0.4453303705640154, - "grad_norm": 0.46167463064193726, - "learning_rate": 9.582219278212903e-06, - "loss": 0.4311, - "step": 6814 - }, - { - "epoch": 0.44539572576955755, - "grad_norm": 0.4565676152706146, - "learning_rate": 9.58207953401616e-06, - "loss": 0.4029, - "step": 6815 - }, - { - "epoch": 0.4454610809750997, - "grad_norm": 0.4476493000984192, - "learning_rate": 9.58193976747103e-06, - "loss": 0.402, - "step": 6816 - }, - { - "epoch": 0.4455264361806418, - "grad_norm": 0.4766305983066559, - "learning_rate": 9.581799978578191e-06, - "loss": 0.4243, - "step": 6817 - }, - { - "epoch": 0.44559179138618393, - "grad_norm": 0.5118014216423035, - "learning_rate": 9.581660167338327e-06, - "loss": 0.4625, - "step": 6818 - }, - { - "epoch": 0.445657146591726, - "grad_norm": 0.4302530884742737, - "learning_rate": 9.581520333752119e-06, - "loss": 0.3676, - "step": 6819 - }, - { - "epoch": 0.44572250179726813, - "grad_norm": 0.4763137698173523, - "learning_rate": 9.581380477820249e-06, - "loss": 0.4104, - "step": 6820 - }, - { - "epoch": 0.44578785700281026, - "grad_norm": 0.43492022156715393, - "learning_rate": 9.581240599543398e-06, - "loss": 0.3454, - "step": 6821 - }, - { - "epoch": 0.4458532122083524, - "grad_norm": 0.44110429286956787, - "learning_rate": 9.581100698922252e-06, - "loss": 0.3757, - "step": 6822 - }, - { - "epoch": 0.4459185674138945, - "grad_norm": 0.44938743114471436, - "learning_rate": 9.58096077595749e-06, - "loss": 0.3557, - "step": 6823 - }, - { - "epoch": 0.44598392261943665, - "grad_norm": 0.4719734489917755, - "learning_rate": 9.580820830649795e-06, - "loss": 0.4106, - "step": 6824 - }, - { - "epoch": 0.4460492778249788, - "grad_norm": 0.4498710334300995, - "learning_rate": 9.580680862999849e-06, - "loss": 0.4167, - "step": 6825 - }, - { - "epoch": 0.4461146330305209, - "grad_norm": 0.4458502531051636, - "learning_rate": 9.580540873008338e-06, - "loss": 0.3593, - "step": 6826 - }, - { - "epoch": 0.44617998823606303, - "grad_norm": 0.41527310013771057, - "learning_rate": 9.58040086067594e-06, - "loss": 0.3222, - "step": 6827 - }, - { - "epoch": 0.4462453434416051, - "grad_norm": 0.44922083616256714, - "learning_rate": 9.580260826003341e-06, - "loss": 0.3871, - "step": 6828 - }, - { - "epoch": 0.44631069864714723, - "grad_norm": 0.46734818816185, - "learning_rate": 9.58012076899122e-06, - "loss": 0.3843, - "step": 6829 - }, - { - "epoch": 0.44637605385268936, - "grad_norm": 0.4546533226966858, - "learning_rate": 9.579980689640268e-06, - "loss": 0.3803, - "step": 6830 - }, - { - "epoch": 0.4464414090582315, - "grad_norm": 0.4769834578037262, - "learning_rate": 9.579840587951161e-06, - "loss": 0.4091, - "step": 6831 - }, - { - "epoch": 0.4465067642637736, - "grad_norm": 0.4315880537033081, - "learning_rate": 9.579700463924586e-06, - "loss": 0.3267, - "step": 6832 - }, - { - "epoch": 0.44657211946931574, - "grad_norm": 0.44697728753089905, - "learning_rate": 9.579560317561225e-06, - "loss": 0.3975, - "step": 6833 - }, - { - "epoch": 0.44663747467485787, - "grad_norm": 0.45439666509628296, - "learning_rate": 9.579420148861763e-06, - "loss": 0.4024, - "step": 6834 - }, - { - "epoch": 0.4467028298804, - "grad_norm": 0.4275607168674469, - "learning_rate": 9.579279957826882e-06, - "loss": 0.3422, - "step": 6835 - }, - { - "epoch": 0.44676818508594207, - "grad_norm": 0.437131404876709, - "learning_rate": 9.579139744457264e-06, - "loss": 0.3626, - "step": 6836 - }, - { - "epoch": 0.4468335402914842, - "grad_norm": 0.42850175499916077, - "learning_rate": 9.578999508753597e-06, - "loss": 0.3785, - "step": 6837 - }, - { - "epoch": 0.4468988954970263, - "grad_norm": 0.47736141085624695, - "learning_rate": 9.578859250716562e-06, - "loss": 0.4251, - "step": 6838 - }, - { - "epoch": 0.44696425070256846, - "grad_norm": 0.41900646686553955, - "learning_rate": 9.578718970346846e-06, - "loss": 0.3303, - "step": 6839 - }, - { - "epoch": 0.4470296059081106, - "grad_norm": 0.4415460526943207, - "learning_rate": 9.578578667645128e-06, - "loss": 0.3121, - "step": 6840 - }, - { - "epoch": 0.4470949611136527, - "grad_norm": 0.44675517082214355, - "learning_rate": 9.5784383426121e-06, - "loss": 0.3917, - "step": 6841 - }, - { - "epoch": 0.44716031631919484, - "grad_norm": 0.43669185042381287, - "learning_rate": 9.57829799524844e-06, - "loss": 0.3253, - "step": 6842 - }, - { - "epoch": 0.44722567152473697, - "grad_norm": 0.45259734988212585, - "learning_rate": 9.578157625554833e-06, - "loss": 0.3789, - "step": 6843 - }, - { - "epoch": 0.44729102673027904, - "grad_norm": 0.44392073154449463, - "learning_rate": 9.578017233531968e-06, - "loss": 0.3581, - "step": 6844 - }, - { - "epoch": 0.44735638193582117, - "grad_norm": 0.49673157930374146, - "learning_rate": 9.577876819180525e-06, - "loss": 0.4376, - "step": 6845 - }, - { - "epoch": 0.4474217371413633, - "grad_norm": 0.43708527088165283, - "learning_rate": 9.577736382501192e-06, - "loss": 0.3545, - "step": 6846 - }, - { - "epoch": 0.4474870923469054, - "grad_norm": 0.45776787400245667, - "learning_rate": 9.577595923494651e-06, - "loss": 0.4155, - "step": 6847 - }, - { - "epoch": 0.44755244755244755, - "grad_norm": 0.47815778851509094, - "learning_rate": 9.577455442161591e-06, - "loss": 0.4418, - "step": 6848 - }, - { - "epoch": 0.4476178027579897, - "grad_norm": 0.45162200927734375, - "learning_rate": 9.577314938502696e-06, - "loss": 0.3803, - "step": 6849 - }, - { - "epoch": 0.4476831579635318, - "grad_norm": 0.4717872738838196, - "learning_rate": 9.577174412518648e-06, - "loss": 0.4355, - "step": 6850 - }, - { - "epoch": 0.44774851316907394, - "grad_norm": 0.45953986048698425, - "learning_rate": 9.577033864210135e-06, - "loss": 0.4222, - "step": 6851 - }, - { - "epoch": 0.44781386837461606, - "grad_norm": 0.4623877704143524, - "learning_rate": 9.576893293577842e-06, - "loss": 0.3616, - "step": 6852 - }, - { - "epoch": 0.44787922358015814, - "grad_norm": 0.5005372762680054, - "learning_rate": 9.576752700622455e-06, - "loss": 0.4262, - "step": 6853 - }, - { - "epoch": 0.44794457878570026, - "grad_norm": 0.4469951093196869, - "learning_rate": 9.57661208534466e-06, - "loss": 0.369, - "step": 6854 - }, - { - "epoch": 0.4480099339912424, - "grad_norm": 0.5558889508247375, - "learning_rate": 9.57647144774514e-06, - "loss": 0.503, - "step": 6855 - }, - { - "epoch": 0.4480752891967845, - "grad_norm": 0.47243532538414, - "learning_rate": 9.576330787824588e-06, - "loss": 0.4, - "step": 6856 - }, - { - "epoch": 0.44814064440232665, - "grad_norm": 0.4681210219860077, - "learning_rate": 9.576190105583683e-06, - "loss": 0.4047, - "step": 6857 - }, - { - "epoch": 0.4482059996078688, - "grad_norm": 0.48239651322364807, - "learning_rate": 9.576049401023112e-06, - "loss": 0.4473, - "step": 6858 - }, - { - "epoch": 0.4482713548134109, - "grad_norm": 0.44161128997802734, - "learning_rate": 9.575908674143564e-06, - "loss": 0.3642, - "step": 6859 - }, - { - "epoch": 0.44833671001895303, - "grad_norm": 0.5230162739753723, - "learning_rate": 9.575767924945725e-06, - "loss": 0.4412, - "step": 6860 - }, - { - "epoch": 0.4484020652244951, - "grad_norm": 0.4514910876750946, - "learning_rate": 9.57562715343028e-06, - "loss": 0.3841, - "step": 6861 - }, - { - "epoch": 0.44846742043003723, - "grad_norm": 0.44265827536582947, - "learning_rate": 9.575486359597916e-06, - "loss": 0.3373, - "step": 6862 - }, - { - "epoch": 0.44853277563557936, - "grad_norm": 0.4110846519470215, - "learning_rate": 9.57534554344932e-06, - "loss": 0.3182, - "step": 6863 - }, - { - "epoch": 0.4485981308411215, - "grad_norm": 0.43720105290412903, - "learning_rate": 9.575204704985178e-06, - "loss": 0.3838, - "step": 6864 - }, - { - "epoch": 0.4486634860466636, - "grad_norm": 0.44153308868408203, - "learning_rate": 9.57506384420618e-06, - "loss": 0.3737, - "step": 6865 - }, - { - "epoch": 0.44872884125220575, - "grad_norm": 0.44844532012939453, - "learning_rate": 9.574922961113009e-06, - "loss": 0.3858, - "step": 6866 - }, - { - "epoch": 0.4487941964577479, - "grad_norm": 0.4577677547931671, - "learning_rate": 9.574782055706353e-06, - "loss": 0.4042, - "step": 6867 - }, - { - "epoch": 0.44885955166329, - "grad_norm": 0.4581975042819977, - "learning_rate": 9.5746411279869e-06, - "loss": 0.3989, - "step": 6868 - }, - { - "epoch": 0.44892490686883213, - "grad_norm": 0.515884280204773, - "learning_rate": 9.574500177955338e-06, - "loss": 0.4982, - "step": 6869 - }, - { - "epoch": 0.4489902620743742, - "grad_norm": 0.43185150623321533, - "learning_rate": 9.574359205612356e-06, - "loss": 0.3413, - "step": 6870 - }, - { - "epoch": 0.44905561727991633, - "grad_norm": 0.4548414945602417, - "learning_rate": 9.574218210958638e-06, - "loss": 0.3875, - "step": 6871 - }, - { - "epoch": 0.44912097248545846, - "grad_norm": 0.45214909315109253, - "learning_rate": 9.574077193994873e-06, - "loss": 0.3822, - "step": 6872 - }, - { - "epoch": 0.4491863276910006, - "grad_norm": 0.481594443321228, - "learning_rate": 9.573936154721749e-06, - "loss": 0.3916, - "step": 6873 - }, - { - "epoch": 0.4492516828965427, - "grad_norm": 0.4700080156326294, - "learning_rate": 9.573795093139952e-06, - "loss": 0.4066, - "step": 6874 - }, - { - "epoch": 0.44931703810208484, - "grad_norm": 0.4320732057094574, - "learning_rate": 9.573654009250174e-06, - "loss": 0.3726, - "step": 6875 - }, - { - "epoch": 0.44938239330762697, - "grad_norm": 0.4457520544528961, - "learning_rate": 9.573512903053101e-06, - "loss": 0.407, - "step": 6876 - }, - { - "epoch": 0.4494477485131691, - "grad_norm": 0.531183660030365, - "learning_rate": 9.57337177454942e-06, - "loss": 0.4639, - "step": 6877 - }, - { - "epoch": 0.44951310371871117, - "grad_norm": 0.4703523814678192, - "learning_rate": 9.573230623739821e-06, - "loss": 0.3691, - "step": 6878 - }, - { - "epoch": 0.4495784589242533, - "grad_norm": 0.47748491168022156, - "learning_rate": 9.573089450624992e-06, - "loss": 0.4312, - "step": 6879 - }, - { - "epoch": 0.4496438141297954, - "grad_norm": 0.4636233150959015, - "learning_rate": 9.57294825520562e-06, - "loss": 0.3896, - "step": 6880 - }, - { - "epoch": 0.44970916933533756, - "grad_norm": 0.46522000432014465, - "learning_rate": 9.572807037482397e-06, - "loss": 0.4053, - "step": 6881 - }, - { - "epoch": 0.4497745245408797, - "grad_norm": 0.4766260087490082, - "learning_rate": 9.57266579745601e-06, - "loss": 0.4767, - "step": 6882 - }, - { - "epoch": 0.4498398797464218, - "grad_norm": 0.47308024764060974, - "learning_rate": 9.572524535127148e-06, - "loss": 0.4555, - "step": 6883 - }, - { - "epoch": 0.44990523495196394, - "grad_norm": 0.475248783826828, - "learning_rate": 9.572383250496498e-06, - "loss": 0.411, - "step": 6884 - }, - { - "epoch": 0.44997059015750607, - "grad_norm": 0.45154842734336853, - "learning_rate": 9.572241943564752e-06, - "loss": 0.3848, - "step": 6885 - }, - { - "epoch": 0.45003594536304814, - "grad_norm": 0.45776087045669556, - "learning_rate": 9.572100614332598e-06, - "loss": 0.4069, - "step": 6886 - }, - { - "epoch": 0.45010130056859027, - "grad_norm": 0.4290597140789032, - "learning_rate": 9.571959262800725e-06, - "loss": 0.3443, - "step": 6887 - }, - { - "epoch": 0.4501666557741324, - "grad_norm": 0.4693318009376526, - "learning_rate": 9.571817888969823e-06, - "loss": 0.4365, - "step": 6888 - }, - { - "epoch": 0.4502320109796745, - "grad_norm": 0.46669575572013855, - "learning_rate": 9.571676492840582e-06, - "loss": 0.3688, - "step": 6889 - }, - { - "epoch": 0.45029736618521665, - "grad_norm": 0.4279143214225769, - "learning_rate": 9.57153507441369e-06, - "loss": 0.3747, - "step": 6890 - }, - { - "epoch": 0.4503627213907588, - "grad_norm": 0.46470198035240173, - "learning_rate": 9.571393633689838e-06, - "loss": 0.3674, - "step": 6891 - }, - { - "epoch": 0.4504280765963009, - "grad_norm": 0.44927316904067993, - "learning_rate": 9.571252170669715e-06, - "loss": 0.3816, - "step": 6892 - }, - { - "epoch": 0.45049343180184304, - "grad_norm": 0.4965997636318207, - "learning_rate": 9.571110685354012e-06, - "loss": 0.3928, - "step": 6893 - }, - { - "epoch": 0.45055878700738516, - "grad_norm": 0.4474985897541046, - "learning_rate": 9.570969177743419e-06, - "loss": 0.3898, - "step": 6894 - }, - { - "epoch": 0.45062414221292724, - "grad_norm": 0.4462169110774994, - "learning_rate": 9.570827647838625e-06, - "loss": 0.3908, - "step": 6895 - }, - { - "epoch": 0.45068949741846936, - "grad_norm": 0.47301992774009705, - "learning_rate": 9.570686095640323e-06, - "loss": 0.4078, - "step": 6896 - }, - { - "epoch": 0.4507548526240115, - "grad_norm": 0.45238858461380005, - "learning_rate": 9.570544521149199e-06, - "loss": 0.3705, - "step": 6897 - }, - { - "epoch": 0.4508202078295536, - "grad_norm": 0.5131879448890686, - "learning_rate": 9.570402924365949e-06, - "loss": 0.4599, - "step": 6898 - }, - { - "epoch": 0.45088556303509575, - "grad_norm": 0.46553248167037964, - "learning_rate": 9.570261305291258e-06, - "loss": 0.4098, - "step": 6899 - }, - { - "epoch": 0.4509509182406379, - "grad_norm": 0.43932557106018066, - "learning_rate": 9.570119663925819e-06, - "loss": 0.3626, - "step": 6900 - }, - { - "epoch": 0.45101627344618, - "grad_norm": 0.45153963565826416, - "learning_rate": 9.569978000270325e-06, - "loss": 0.3823, - "step": 6901 - }, - { - "epoch": 0.45108162865172213, - "grad_norm": 1.0369046926498413, - "learning_rate": 9.569836314325463e-06, - "loss": 0.4856, - "step": 6902 - }, - { - "epoch": 0.4511469838572642, - "grad_norm": 0.4859563708305359, - "learning_rate": 9.569694606091928e-06, - "loss": 0.4273, - "step": 6903 - }, - { - "epoch": 0.45121233906280633, - "grad_norm": 0.41873952746391296, - "learning_rate": 9.56955287557041e-06, - "loss": 0.3501, - "step": 6904 - }, - { - "epoch": 0.45127769426834846, - "grad_norm": 0.42056483030319214, - "learning_rate": 9.569411122761597e-06, - "loss": 0.3593, - "step": 6905 - }, - { - "epoch": 0.4513430494738906, - "grad_norm": 0.45343002676963806, - "learning_rate": 9.569269347666185e-06, - "loss": 0.3415, - "step": 6906 - }, - { - "epoch": 0.4514084046794327, - "grad_norm": 0.49917858839035034, - "learning_rate": 9.569127550284863e-06, - "loss": 0.4187, - "step": 6907 - }, - { - "epoch": 0.45147375988497485, - "grad_norm": 0.43981149792671204, - "learning_rate": 9.568985730618321e-06, - "loss": 0.3795, - "step": 6908 - }, - { - "epoch": 0.451539115090517, - "grad_norm": 0.5186979174613953, - "learning_rate": 9.568843888667256e-06, - "loss": 0.3623, - "step": 6909 - }, - { - "epoch": 0.4516044702960591, - "grad_norm": 0.43448546528816223, - "learning_rate": 9.568702024432355e-06, - "loss": 0.36, - "step": 6910 - }, - { - "epoch": 0.45166982550160123, - "grad_norm": 0.45746171474456787, - "learning_rate": 9.568560137914312e-06, - "loss": 0.3569, - "step": 6911 - }, - { - "epoch": 0.4517351807071433, - "grad_norm": 0.5021383762359619, - "learning_rate": 9.568418229113816e-06, - "loss": 0.4158, - "step": 6912 - }, - { - "epoch": 0.45180053591268543, - "grad_norm": 0.4388960599899292, - "learning_rate": 9.568276298031565e-06, - "loss": 0.361, - "step": 6913 - }, - { - "epoch": 0.45186589111822756, - "grad_norm": 0.48843470215797424, - "learning_rate": 9.568134344668245e-06, - "loss": 0.3759, - "step": 6914 - }, - { - "epoch": 0.4519312463237697, - "grad_norm": 0.443772554397583, - "learning_rate": 9.567992369024554e-06, - "loss": 0.3917, - "step": 6915 - }, - { - "epoch": 0.4519966015293118, - "grad_norm": 0.48165953159332275, - "learning_rate": 9.56785037110118e-06, - "loss": 0.395, - "step": 6916 - }, - { - "epoch": 0.45206195673485394, - "grad_norm": 0.5079911351203918, - "learning_rate": 9.56770835089882e-06, - "loss": 0.4319, - "step": 6917 - }, - { - "epoch": 0.45212731194039607, - "grad_norm": 0.46010375022888184, - "learning_rate": 9.567566308418159e-06, - "loss": 0.4077, - "step": 6918 - }, - { - "epoch": 0.4521926671459382, - "grad_norm": 0.4574672281742096, - "learning_rate": 9.567424243659898e-06, - "loss": 0.4111, - "step": 6919 - }, - { - "epoch": 0.45225802235148027, - "grad_norm": 0.4674014449119568, - "learning_rate": 9.567282156624727e-06, - "loss": 0.3923, - "step": 6920 - }, - { - "epoch": 0.4523233775570224, - "grad_norm": 0.48392459750175476, - "learning_rate": 9.567140047313337e-06, - "loss": 0.446, - "step": 6921 - }, - { - "epoch": 0.4523887327625645, - "grad_norm": 0.4962601959705353, - "learning_rate": 9.566997915726423e-06, - "loss": 0.4486, - "step": 6922 - }, - { - "epoch": 0.45245408796810666, - "grad_norm": 0.4572274088859558, - "learning_rate": 9.566855761864679e-06, - "loss": 0.4068, - "step": 6923 - }, - { - "epoch": 0.4525194431736488, - "grad_norm": 0.4558427333831787, - "learning_rate": 9.566713585728797e-06, - "loss": 0.3974, - "step": 6924 - }, - { - "epoch": 0.4525847983791909, - "grad_norm": 0.47658196091651917, - "learning_rate": 9.566571387319473e-06, - "loss": 0.4158, - "step": 6925 - }, - { - "epoch": 0.45265015358473304, - "grad_norm": 0.47146621346473694, - "learning_rate": 9.566429166637395e-06, - "loss": 0.3965, - "step": 6926 - }, - { - "epoch": 0.45271550879027517, - "grad_norm": 0.44216328859329224, - "learning_rate": 9.566286923683261e-06, - "loss": 0.3713, - "step": 6927 - }, - { - "epoch": 0.45278086399581724, - "grad_norm": 0.4713079631328583, - "learning_rate": 9.566144658457763e-06, - "loss": 0.4071, - "step": 6928 - }, - { - "epoch": 0.45284621920135937, - "grad_norm": 0.489364355802536, - "learning_rate": 9.566002370961596e-06, - "loss": 0.4098, - "step": 6929 - }, - { - "epoch": 0.4529115744069015, - "grad_norm": 0.445444256067276, - "learning_rate": 9.565860061195455e-06, - "loss": 0.3858, - "step": 6930 - }, - { - "epoch": 0.4529769296124436, - "grad_norm": 0.46047458052635193, - "learning_rate": 9.56571772916003e-06, - "loss": 0.3969, - "step": 6931 - }, - { - "epoch": 0.45304228481798575, - "grad_norm": 0.4774382412433624, - "learning_rate": 9.565575374856022e-06, - "loss": 0.4024, - "step": 6932 - }, - { - "epoch": 0.4531076400235279, - "grad_norm": 0.463263601064682, - "learning_rate": 9.565432998284118e-06, - "loss": 0.4047, - "step": 6933 - }, - { - "epoch": 0.45317299522907, - "grad_norm": 0.46228039264678955, - "learning_rate": 9.565290599445016e-06, - "loss": 0.4126, - "step": 6934 - }, - { - "epoch": 0.45323835043461214, - "grad_norm": 0.476277232170105, - "learning_rate": 9.565148178339411e-06, - "loss": 0.4198, - "step": 6935 - }, - { - "epoch": 0.45330370564015426, - "grad_norm": 0.43697965145111084, - "learning_rate": 9.565005734967997e-06, - "loss": 0.3851, - "step": 6936 - }, - { - "epoch": 0.45336906084569634, - "grad_norm": 0.48467156291007996, - "learning_rate": 9.564863269331469e-06, - "loss": 0.4153, - "step": 6937 - }, - { - "epoch": 0.45343441605123846, - "grad_norm": 0.4783487021923065, - "learning_rate": 9.56472078143052e-06, - "loss": 0.3704, - "step": 6938 - }, - { - "epoch": 0.4534997712567806, - "grad_norm": 0.4710427224636078, - "learning_rate": 9.564578271265847e-06, - "loss": 0.4071, - "step": 6939 - }, - { - "epoch": 0.4535651264623227, - "grad_norm": 0.458793580532074, - "learning_rate": 9.564435738838144e-06, - "loss": 0.4282, - "step": 6940 - }, - { - "epoch": 0.45363048166786485, - "grad_norm": 0.4751000702381134, - "learning_rate": 9.564293184148109e-06, - "loss": 0.4182, - "step": 6941 - }, - { - "epoch": 0.453695836873407, - "grad_norm": 0.5245192646980286, - "learning_rate": 9.564150607196431e-06, - "loss": 0.4666, - "step": 6942 - }, - { - "epoch": 0.4537611920789491, - "grad_norm": 0.44270089268684387, - "learning_rate": 9.564008007983811e-06, - "loss": 0.3558, - "step": 6943 - }, - { - "epoch": 0.45382654728449123, - "grad_norm": 0.5179519057273865, - "learning_rate": 9.563865386510943e-06, - "loss": 0.4648, - "step": 6944 - }, - { - "epoch": 0.4538919024900333, - "grad_norm": 0.4951685070991516, - "learning_rate": 9.563722742778523e-06, - "loss": 0.4332, - "step": 6945 - }, - { - "epoch": 0.45395725769557543, - "grad_norm": 0.5189263224601746, - "learning_rate": 9.563580076787246e-06, - "loss": 0.4989, - "step": 6946 - }, - { - "epoch": 0.45402261290111756, - "grad_norm": 0.46859562397003174, - "learning_rate": 9.563437388537808e-06, - "loss": 0.4128, - "step": 6947 - }, - { - "epoch": 0.4540879681066597, - "grad_norm": 0.4332079589366913, - "learning_rate": 9.563294678030903e-06, - "loss": 0.3603, - "step": 6948 - }, - { - "epoch": 0.4541533233122018, - "grad_norm": 0.46565601229667664, - "learning_rate": 9.563151945267232e-06, - "loss": 0.4352, - "step": 6949 - }, - { - "epoch": 0.45421867851774395, - "grad_norm": 0.48951128125190735, - "learning_rate": 9.563009190247487e-06, - "loss": 0.4578, - "step": 6950 - }, - { - "epoch": 0.4542840337232861, - "grad_norm": 0.47698163986206055, - "learning_rate": 9.562866412972365e-06, - "loss": 0.3924, - "step": 6951 - }, - { - "epoch": 0.4543493889288282, - "grad_norm": 0.46763136982917786, - "learning_rate": 9.562723613442562e-06, - "loss": 0.4206, - "step": 6952 - }, - { - "epoch": 0.45441474413437033, - "grad_norm": 0.43223679065704346, - "learning_rate": 9.562580791658776e-06, - "loss": 0.372, - "step": 6953 - }, - { - "epoch": 0.4544800993399124, - "grad_norm": 0.4109695255756378, - "learning_rate": 9.562437947621703e-06, - "loss": 0.3387, - "step": 6954 - }, - { - "epoch": 0.45454545454545453, - "grad_norm": 0.4674420654773712, - "learning_rate": 9.56229508133204e-06, - "loss": 0.4166, - "step": 6955 - }, - { - "epoch": 0.45461080975099666, - "grad_norm": 0.4164307713508606, - "learning_rate": 9.562152192790482e-06, - "loss": 0.3337, - "step": 6956 - }, - { - "epoch": 0.4546761649565388, - "grad_norm": 0.45005887746810913, - "learning_rate": 9.562009281997728e-06, - "loss": 0.368, - "step": 6957 - }, - { - "epoch": 0.4547415201620809, - "grad_norm": 0.47211381793022156, - "learning_rate": 9.561866348954477e-06, - "loss": 0.3919, - "step": 6958 - }, - { - "epoch": 0.45480687536762304, - "grad_norm": 0.449897438287735, - "learning_rate": 9.56172339366142e-06, - "loss": 0.3737, - "step": 6959 - }, - { - "epoch": 0.45487223057316517, - "grad_norm": 0.46587154269218445, - "learning_rate": 9.561580416119259e-06, - "loss": 0.4175, - "step": 6960 - }, - { - "epoch": 0.4549375857787073, - "grad_norm": 0.479175865650177, - "learning_rate": 9.561437416328688e-06, - "loss": 0.4255, - "step": 6961 - }, - { - "epoch": 0.45500294098424937, - "grad_norm": 0.4773583710193634, - "learning_rate": 9.561294394290408e-06, - "loss": 0.3959, - "step": 6962 - }, - { - "epoch": 0.4550682961897915, - "grad_norm": 0.5353477001190186, - "learning_rate": 9.561151350005115e-06, - "loss": 0.507, - "step": 6963 - }, - { - "epoch": 0.4551336513953336, - "grad_norm": 0.4761268198490143, - "learning_rate": 9.561008283473507e-06, - "loss": 0.4126, - "step": 6964 - }, - { - "epoch": 0.45519900660087576, - "grad_norm": 0.43995970487594604, - "learning_rate": 9.560865194696282e-06, - "loss": 0.3706, - "step": 6965 - }, - { - "epoch": 0.4552643618064179, - "grad_norm": 0.49021244049072266, - "learning_rate": 9.560722083674136e-06, - "loss": 0.4448, - "step": 6966 - }, - { - "epoch": 0.45532971701196, - "grad_norm": 0.5180768370628357, - "learning_rate": 9.56057895040777e-06, - "loss": 0.4792, - "step": 6967 - }, - { - "epoch": 0.45539507221750214, - "grad_norm": 0.4723127484321594, - "learning_rate": 9.56043579489788e-06, - "loss": 0.4008, - "step": 6968 - }, - { - "epoch": 0.45546042742304427, - "grad_norm": 0.4562893509864807, - "learning_rate": 9.560292617145163e-06, - "loss": 0.4033, - "step": 6969 - }, - { - "epoch": 0.45552578262858634, - "grad_norm": 0.48076966404914856, - "learning_rate": 9.560149417150322e-06, - "loss": 0.4177, - "step": 6970 - }, - { - "epoch": 0.45559113783412847, - "grad_norm": 0.4708404541015625, - "learning_rate": 9.560006194914051e-06, - "loss": 0.4152, - "step": 6971 - }, - { - "epoch": 0.4556564930396706, - "grad_norm": 0.481901615858078, - "learning_rate": 9.55986295043705e-06, - "loss": 0.4467, - "step": 6972 - }, - { - "epoch": 0.4557218482452127, - "grad_norm": 0.47024330496788025, - "learning_rate": 9.559719683720017e-06, - "loss": 0.3817, - "step": 6973 - }, - { - "epoch": 0.45578720345075485, - "grad_norm": 0.46320924162864685, - "learning_rate": 9.559576394763652e-06, - "loss": 0.3843, - "step": 6974 - }, - { - "epoch": 0.455852558656297, - "grad_norm": 0.5211080312728882, - "learning_rate": 9.559433083568654e-06, - "loss": 0.4468, - "step": 6975 - }, - { - "epoch": 0.4559179138618391, - "grad_norm": 0.4338943660259247, - "learning_rate": 9.55928975013572e-06, - "loss": 0.3722, - "step": 6976 - }, - { - "epoch": 0.45598326906738124, - "grad_norm": 0.44695672392845154, - "learning_rate": 9.559146394465553e-06, - "loss": 0.3984, - "step": 6977 - }, - { - "epoch": 0.45604862427292336, - "grad_norm": 0.44805628061294556, - "learning_rate": 9.559003016558848e-06, - "loss": 0.3668, - "step": 6978 - }, - { - "epoch": 0.45611397947846544, - "grad_norm": 0.45200228691101074, - "learning_rate": 9.558859616416305e-06, - "loss": 0.3769, - "step": 6979 - }, - { - "epoch": 0.45617933468400756, - "grad_norm": 0.4438546299934387, - "learning_rate": 9.558716194038625e-06, - "loss": 0.387, - "step": 6980 - }, - { - "epoch": 0.4562446898895497, - "grad_norm": 0.4782806932926178, - "learning_rate": 9.558572749426507e-06, - "loss": 0.405, - "step": 6981 - }, - { - "epoch": 0.4563100450950918, - "grad_norm": 0.5054789185523987, - "learning_rate": 9.55842928258065e-06, - "loss": 0.4615, - "step": 6982 - }, - { - "epoch": 0.45637540030063395, - "grad_norm": 0.4915611147880554, - "learning_rate": 9.558285793501756e-06, - "loss": 0.4339, - "step": 6983 - }, - { - "epoch": 0.4564407555061761, - "grad_norm": 0.5091811418533325, - "learning_rate": 9.558142282190521e-06, - "loss": 0.4851, - "step": 6984 - }, - { - "epoch": 0.4565061107117182, - "grad_norm": 0.48367637395858765, - "learning_rate": 9.557998748647648e-06, - "loss": 0.3858, - "step": 6985 - }, - { - "epoch": 0.45657146591726033, - "grad_norm": 0.4622894525527954, - "learning_rate": 9.557855192873834e-06, - "loss": 0.4186, - "step": 6986 - }, - { - "epoch": 0.4566368211228024, - "grad_norm": 0.5128580331802368, - "learning_rate": 9.557711614869785e-06, - "loss": 0.4223, - "step": 6987 - }, - { - "epoch": 0.45670217632834453, - "grad_norm": 0.4722349941730499, - "learning_rate": 9.557568014636195e-06, - "loss": 0.4225, - "step": 6988 - }, - { - "epoch": 0.45676753153388666, - "grad_norm": 0.4965941905975342, - "learning_rate": 9.557424392173766e-06, - "loss": 0.3799, - "step": 6989 - }, - { - "epoch": 0.4568328867394288, - "grad_norm": 0.5437185168266296, - "learning_rate": 9.557280747483202e-06, - "loss": 0.3664, - "step": 6990 - }, - { - "epoch": 0.4568982419449709, - "grad_norm": 0.45322853326797485, - "learning_rate": 9.5571370805652e-06, - "loss": 0.4068, - "step": 6991 - }, - { - "epoch": 0.45696359715051305, - "grad_norm": 0.46104517579078674, - "learning_rate": 9.556993391420462e-06, - "loss": 0.3672, - "step": 6992 - }, - { - "epoch": 0.4570289523560552, - "grad_norm": 0.4782438278198242, - "learning_rate": 9.556849680049687e-06, - "loss": 0.3992, - "step": 6993 - }, - { - "epoch": 0.4570943075615973, - "grad_norm": 0.4755147397518158, - "learning_rate": 9.556705946453578e-06, - "loss": 0.3868, - "step": 6994 - }, - { - "epoch": 0.45715966276713943, - "grad_norm": 0.47630441188812256, - "learning_rate": 9.556562190632837e-06, - "loss": 0.4016, - "step": 6995 - }, - { - "epoch": 0.4572250179726815, - "grad_norm": 0.4549620747566223, - "learning_rate": 9.556418412588163e-06, - "loss": 0.378, - "step": 6996 - }, - { - "epoch": 0.45729037317822363, - "grad_norm": 0.4581489861011505, - "learning_rate": 9.556274612320257e-06, - "loss": 0.3912, - "step": 6997 - }, - { - "epoch": 0.45735572838376576, - "grad_norm": 0.4601094424724579, - "learning_rate": 9.556130789829821e-06, - "loss": 0.4044, - "step": 6998 - }, - { - "epoch": 0.4574210835893079, - "grad_norm": 0.448824942111969, - "learning_rate": 9.555986945117558e-06, - "loss": 0.3811, - "step": 6999 - }, - { - "epoch": 0.45748643879485, - "grad_norm": 0.484931617975235, - "learning_rate": 9.555843078184169e-06, - "loss": 0.3791, - "step": 7000 - }, - { - "epoch": 0.45755179400039214, - "grad_norm": 0.41543468832969666, - "learning_rate": 9.555699189030354e-06, - "loss": 0.3233, - "step": 7001 - }, - { - "epoch": 0.45761714920593427, - "grad_norm": 0.47216349840164185, - "learning_rate": 9.555555277656815e-06, - "loss": 0.4125, - "step": 7002 - }, - { - "epoch": 0.4576825044114764, - "grad_norm": 0.45830056071281433, - "learning_rate": 9.555411344064255e-06, - "loss": 0.3899, - "step": 7003 - }, - { - "epoch": 0.45774785961701847, - "grad_norm": 0.45786866545677185, - "learning_rate": 9.555267388253375e-06, - "loss": 0.3367, - "step": 7004 - }, - { - "epoch": 0.4578132148225606, - "grad_norm": 0.4865111708641052, - "learning_rate": 9.55512341022488e-06, - "loss": 0.3759, - "step": 7005 - }, - { - "epoch": 0.4578785700281027, - "grad_norm": 0.47519540786743164, - "learning_rate": 9.55497940997947e-06, - "loss": 0.3779, - "step": 7006 - }, - { - "epoch": 0.45794392523364486, - "grad_norm": 0.45519840717315674, - "learning_rate": 9.554835387517844e-06, - "loss": 0.4354, - "step": 7007 - }, - { - "epoch": 0.458009280439187, - "grad_norm": 0.47062450647354126, - "learning_rate": 9.554691342840711e-06, - "loss": 0.4141, - "step": 7008 - }, - { - "epoch": 0.4580746356447291, - "grad_norm": 0.4759996235370636, - "learning_rate": 9.554547275948772e-06, - "loss": 0.4266, - "step": 7009 - }, - { - "epoch": 0.45813999085027124, - "grad_norm": 0.4584622383117676, - "learning_rate": 9.554403186842725e-06, - "loss": 0.4054, - "step": 7010 - }, - { - "epoch": 0.45820534605581337, - "grad_norm": 0.46390944719314575, - "learning_rate": 9.554259075523276e-06, - "loss": 0.4432, - "step": 7011 - }, - { - "epoch": 0.45827070126135544, - "grad_norm": 0.4170962870121002, - "learning_rate": 9.554114941991128e-06, - "loss": 0.3316, - "step": 7012 - }, - { - "epoch": 0.45833605646689757, - "grad_norm": 0.4917965531349182, - "learning_rate": 9.553970786246985e-06, - "loss": 0.422, - "step": 7013 - }, - { - "epoch": 0.4584014116724397, - "grad_norm": 0.47754913568496704, - "learning_rate": 9.553826608291547e-06, - "loss": 0.3928, - "step": 7014 - }, - { - "epoch": 0.4584667668779818, - "grad_norm": 0.45876345038414, - "learning_rate": 9.553682408125521e-06, - "loss": 0.4065, - "step": 7015 - }, - { - "epoch": 0.45853212208352395, - "grad_norm": 0.47021669149398804, - "learning_rate": 9.553538185749607e-06, - "loss": 0.4149, - "step": 7016 - }, - { - "epoch": 0.4585974772890661, - "grad_norm": 0.47361770272254944, - "learning_rate": 9.55339394116451e-06, - "loss": 0.4048, - "step": 7017 - }, - { - "epoch": 0.4586628324946082, - "grad_norm": 0.4924563467502594, - "learning_rate": 9.553249674370935e-06, - "loss": 0.457, - "step": 7018 - }, - { - "epoch": 0.45872818770015034, - "grad_norm": 0.47624871134757996, - "learning_rate": 9.553105385369581e-06, - "loss": 0.4158, - "step": 7019 - }, - { - "epoch": 0.45879354290569246, - "grad_norm": 0.4325328469276428, - "learning_rate": 9.552961074161156e-06, - "loss": 0.3604, - "step": 7020 - }, - { - "epoch": 0.45885889811123454, - "grad_norm": 0.4883722960948944, - "learning_rate": 9.552816740746363e-06, - "loss": 0.4353, - "step": 7021 - }, - { - "epoch": 0.45892425331677666, - "grad_norm": 0.45907896757125854, - "learning_rate": 9.552672385125906e-06, - "loss": 0.3444, - "step": 7022 - }, - { - "epoch": 0.4589896085223188, - "grad_norm": 0.4705089330673218, - "learning_rate": 9.552528007300488e-06, - "loss": 0.4002, - "step": 7023 - }, - { - "epoch": 0.4590549637278609, - "grad_norm": 0.47056421637535095, - "learning_rate": 9.552383607270812e-06, - "loss": 0.3944, - "step": 7024 - }, - { - "epoch": 0.45912031893340305, - "grad_norm": 0.44806599617004395, - "learning_rate": 9.552239185037586e-06, - "loss": 0.3808, - "step": 7025 - }, - { - "epoch": 0.4591856741389452, - "grad_norm": 0.4405684769153595, - "learning_rate": 9.552094740601512e-06, - "loss": 0.3718, - "step": 7026 - }, - { - "epoch": 0.4592510293444873, - "grad_norm": 0.4540782570838928, - "learning_rate": 9.551950273963296e-06, - "loss": 0.3708, - "step": 7027 - }, - { - "epoch": 0.45931638455002943, - "grad_norm": 0.47009095549583435, - "learning_rate": 9.55180578512364e-06, - "loss": 0.4093, - "step": 7028 - }, - { - "epoch": 0.4593817397555715, - "grad_norm": 0.48366907238960266, - "learning_rate": 9.55166127408325e-06, - "loss": 0.3835, - "step": 7029 - }, - { - "epoch": 0.45944709496111363, - "grad_norm": 0.46774783730506897, - "learning_rate": 9.551516740842833e-06, - "loss": 0.3985, - "step": 7030 - }, - { - "epoch": 0.45951245016665576, - "grad_norm": 0.45185360312461853, - "learning_rate": 9.551372185403091e-06, - "loss": 0.3879, - "step": 7031 - }, - { - "epoch": 0.4595778053721979, - "grad_norm": 0.4647712707519531, - "learning_rate": 9.551227607764728e-06, - "loss": 0.4021, - "step": 7032 - }, - { - "epoch": 0.45964316057774, - "grad_norm": 0.4509907066822052, - "learning_rate": 9.551083007928455e-06, - "loss": 0.3703, - "step": 7033 - }, - { - "epoch": 0.45970851578328215, - "grad_norm": 0.4928545653820038, - "learning_rate": 9.550938385894973e-06, - "loss": 0.4177, - "step": 7034 - }, - { - "epoch": 0.4597738709888243, - "grad_norm": 0.6041052937507629, - "learning_rate": 9.550793741664985e-06, - "loss": 0.4794, - "step": 7035 - }, - { - "epoch": 0.4598392261943664, - "grad_norm": 0.4401216506958008, - "learning_rate": 9.550649075239203e-06, - "loss": 0.3415, - "step": 7036 - }, - { - "epoch": 0.45990458139990853, - "grad_norm": 0.47015464305877686, - "learning_rate": 9.550504386618326e-06, - "loss": 0.382, - "step": 7037 - }, - { - "epoch": 0.4599699366054506, - "grad_norm": 0.5325417518615723, - "learning_rate": 9.550359675803064e-06, - "loss": 0.4573, - "step": 7038 - }, - { - "epoch": 0.46003529181099273, - "grad_norm": 0.4384169578552246, - "learning_rate": 9.55021494279412e-06, - "loss": 0.3669, - "step": 7039 - }, - { - "epoch": 0.46010064701653486, - "grad_norm": 0.4498749077320099, - "learning_rate": 9.550070187592204e-06, - "loss": 0.3737, - "step": 7040 - }, - { - "epoch": 0.460166002222077, - "grad_norm": 0.49636998772621155, - "learning_rate": 9.549925410198017e-06, - "loss": 0.4166, - "step": 7041 - }, - { - "epoch": 0.4602313574276191, - "grad_norm": 0.4357587397098541, - "learning_rate": 9.549780610612269e-06, - "loss": 0.3362, - "step": 7042 - }, - { - "epoch": 0.46029671263316124, - "grad_norm": 0.4485979676246643, - "learning_rate": 9.549635788835665e-06, - "loss": 0.2888, - "step": 7043 - }, - { - "epoch": 0.46036206783870337, - "grad_norm": 0.4663568139076233, - "learning_rate": 9.549490944868908e-06, - "loss": 0.4285, - "step": 7044 - }, - { - "epoch": 0.4604274230442455, - "grad_norm": 0.4595623016357422, - "learning_rate": 9.549346078712711e-06, - "loss": 0.3949, - "step": 7045 - }, - { - "epoch": 0.46049277824978757, - "grad_norm": 0.45535972714424133, - "learning_rate": 9.549201190367776e-06, - "loss": 0.4061, - "step": 7046 - }, - { - "epoch": 0.4605581334553297, - "grad_norm": 0.4724358320236206, - "learning_rate": 9.549056279834811e-06, - "loss": 0.415, - "step": 7047 - }, - { - "epoch": 0.4606234886608718, - "grad_norm": 0.47900664806365967, - "learning_rate": 9.548911347114523e-06, - "loss": 0.416, - "step": 7048 - }, - { - "epoch": 0.46068884386641396, - "grad_norm": 0.45628440380096436, - "learning_rate": 9.548766392207618e-06, - "loss": 0.428, - "step": 7049 - }, - { - "epoch": 0.4607541990719561, - "grad_norm": 0.4388725459575653, - "learning_rate": 9.548621415114802e-06, - "loss": 0.3626, - "step": 7050 - }, - { - "epoch": 0.4608195542774982, - "grad_norm": 0.4458193778991699, - "learning_rate": 9.548476415836788e-06, - "loss": 0.3527, - "step": 7051 - }, - { - "epoch": 0.46088490948304034, - "grad_norm": 0.4481854736804962, - "learning_rate": 9.548331394374276e-06, - "loss": 0.3717, - "step": 7052 - }, - { - "epoch": 0.46095026468858247, - "grad_norm": 0.4708429276943207, - "learning_rate": 9.548186350727974e-06, - "loss": 0.3935, - "step": 7053 - }, - { - "epoch": 0.46101561989412454, - "grad_norm": 0.4286001920700073, - "learning_rate": 9.548041284898595e-06, - "loss": 0.3441, - "step": 7054 - }, - { - "epoch": 0.46108097509966667, - "grad_norm": 0.48979654908180237, - "learning_rate": 9.54789619688684e-06, - "loss": 0.419, - "step": 7055 - }, - { - "epoch": 0.4611463303052088, - "grad_norm": 0.4655849039554596, - "learning_rate": 9.547751086693422e-06, - "loss": 0.4043, - "step": 7056 - }, - { - "epoch": 0.4612116855107509, - "grad_norm": 0.4343613088130951, - "learning_rate": 9.547605954319045e-06, - "loss": 0.3885, - "step": 7057 - }, - { - "epoch": 0.46127704071629305, - "grad_norm": 0.46460336446762085, - "learning_rate": 9.547460799764418e-06, - "loss": 0.4341, - "step": 7058 - }, - { - "epoch": 0.4613423959218352, - "grad_norm": 0.451212078332901, - "learning_rate": 9.547315623030251e-06, - "loss": 0.3995, - "step": 7059 - }, - { - "epoch": 0.4614077511273773, - "grad_norm": 0.4431312382221222, - "learning_rate": 9.547170424117249e-06, - "loss": 0.3942, - "step": 7060 - }, - { - "epoch": 0.46147310633291944, - "grad_norm": 0.4502315819263458, - "learning_rate": 9.547025203026122e-06, - "loss": 0.4037, - "step": 7061 - }, - { - "epoch": 0.46153846153846156, - "grad_norm": 0.47694632411003113, - "learning_rate": 9.546879959757578e-06, - "loss": 0.3787, - "step": 7062 - }, - { - "epoch": 0.46160381674400364, - "grad_norm": 0.44212377071380615, - "learning_rate": 9.546734694312325e-06, - "loss": 0.4185, - "step": 7063 - }, - { - "epoch": 0.46166917194954576, - "grad_norm": 0.4314897656440735, - "learning_rate": 9.546589406691073e-06, - "loss": 0.3314, - "step": 7064 - }, - { - "epoch": 0.4617345271550879, - "grad_norm": 0.4295647442340851, - "learning_rate": 9.546444096894527e-06, - "loss": 0.396, - "step": 7065 - }, - { - "epoch": 0.46179988236063, - "grad_norm": 0.44813117384910583, - "learning_rate": 9.546298764923401e-06, - "loss": 0.3634, - "step": 7066 - }, - { - "epoch": 0.46186523756617215, - "grad_norm": 0.46085724234580994, - "learning_rate": 9.546153410778397e-06, - "loss": 0.393, - "step": 7067 - }, - { - "epoch": 0.4619305927717143, - "grad_norm": 0.4404080808162689, - "learning_rate": 9.546008034460233e-06, - "loss": 0.393, - "step": 7068 - }, - { - "epoch": 0.4619959479772564, - "grad_norm": 0.4515850841999054, - "learning_rate": 9.54586263596961e-06, - "loss": 0.3802, - "step": 7069 - }, - { - "epoch": 0.46206130318279853, - "grad_norm": 0.48453107476234436, - "learning_rate": 9.54571721530724e-06, - "loss": 0.4598, - "step": 7070 - }, - { - "epoch": 0.4621266583883406, - "grad_norm": 0.44939786195755005, - "learning_rate": 9.545571772473832e-06, - "loss": 0.4033, - "step": 7071 - }, - { - "epoch": 0.46219201359388273, - "grad_norm": 0.46518874168395996, - "learning_rate": 9.545426307470096e-06, - "loss": 0.4157, - "step": 7072 - }, - { - "epoch": 0.46225736879942486, - "grad_norm": 0.48767927289009094, - "learning_rate": 9.545280820296742e-06, - "loss": 0.4307, - "step": 7073 - }, - { - "epoch": 0.462322724004967, - "grad_norm": 0.4950529932975769, - "learning_rate": 9.545135310954479e-06, - "loss": 0.4363, - "step": 7074 - }, - { - "epoch": 0.4623880792105091, - "grad_norm": 0.45484399795532227, - "learning_rate": 9.544989779444017e-06, - "loss": 0.4088, - "step": 7075 - }, - { - "epoch": 0.46245343441605125, - "grad_norm": 0.4709393382072449, - "learning_rate": 9.544844225766064e-06, - "loss": 0.4355, - "step": 7076 - }, - { - "epoch": 0.4625187896215934, - "grad_norm": 0.46647417545318604, - "learning_rate": 9.54469864992133e-06, - "loss": 0.3886, - "step": 7077 - }, - { - "epoch": 0.4625841448271355, - "grad_norm": 0.4877423048019409, - "learning_rate": 9.544553051910527e-06, - "loss": 0.4058, - "step": 7078 - }, - { - "epoch": 0.46264950003267763, - "grad_norm": 0.44288796186447144, - "learning_rate": 9.544407431734366e-06, - "loss": 0.3706, - "step": 7079 - }, - { - "epoch": 0.4627148552382197, - "grad_norm": 0.41956257820129395, - "learning_rate": 9.544261789393554e-06, - "loss": 0.3172, - "step": 7080 - }, - { - "epoch": 0.46278021044376183, - "grad_norm": 0.46104365587234497, - "learning_rate": 9.544116124888806e-06, - "loss": 0.4235, - "step": 7081 - }, - { - "epoch": 0.46284556564930396, - "grad_norm": 0.4715181589126587, - "learning_rate": 9.543970438220825e-06, - "loss": 0.418, - "step": 7082 - }, - { - "epoch": 0.4629109208548461, - "grad_norm": 0.47676488757133484, - "learning_rate": 9.543824729390329e-06, - "loss": 0.3958, - "step": 7083 - }, - { - "epoch": 0.4629762760603882, - "grad_norm": 0.4482547640800476, - "learning_rate": 9.543678998398024e-06, - "loss": 0.4033, - "step": 7084 - }, - { - "epoch": 0.46304163126593034, - "grad_norm": 0.4512160122394562, - "learning_rate": 9.543533245244624e-06, - "loss": 0.4038, - "step": 7085 - }, - { - "epoch": 0.46310698647147247, - "grad_norm": 0.4391566514968872, - "learning_rate": 9.543387469930835e-06, - "loss": 0.3776, - "step": 7086 - }, - { - "epoch": 0.4631723416770146, - "grad_norm": 0.43100181221961975, - "learning_rate": 9.543241672457376e-06, - "loss": 0.3627, - "step": 7087 - }, - { - "epoch": 0.46323769688255667, - "grad_norm": 0.48517122864723206, - "learning_rate": 9.54309585282495e-06, - "loss": 0.4069, - "step": 7088 - }, - { - "epoch": 0.4633030520880988, - "grad_norm": 0.5059811472892761, - "learning_rate": 9.542950011034273e-06, - "loss": 0.4317, - "step": 7089 - }, - { - "epoch": 0.4633684072936409, - "grad_norm": 0.4587441384792328, - "learning_rate": 9.542804147086055e-06, - "loss": 0.4202, - "step": 7090 - }, - { - "epoch": 0.46343376249918306, - "grad_norm": 0.4466225802898407, - "learning_rate": 9.542658260981008e-06, - "loss": 0.4004, - "step": 7091 - }, - { - "epoch": 0.4634991177047252, - "grad_norm": 0.4705326557159424, - "learning_rate": 9.542512352719842e-06, - "loss": 0.3731, - "step": 7092 - }, - { - "epoch": 0.4635644729102673, - "grad_norm": 0.4367637634277344, - "learning_rate": 9.542366422303269e-06, - "loss": 0.3374, - "step": 7093 - }, - { - "epoch": 0.46362982811580944, - "grad_norm": 0.4710226058959961, - "learning_rate": 9.542220469732004e-06, - "loss": 0.4031, - "step": 7094 - }, - { - "epoch": 0.46369518332135157, - "grad_norm": 0.4866265654563904, - "learning_rate": 9.542074495006754e-06, - "loss": 0.4594, - "step": 7095 - }, - { - "epoch": 0.46376053852689364, - "grad_norm": 0.47064992785453796, - "learning_rate": 9.541928498128232e-06, - "loss": 0.3811, - "step": 7096 - }, - { - "epoch": 0.46382589373243577, - "grad_norm": 0.4414912462234497, - "learning_rate": 9.541782479097152e-06, - "loss": 0.3543, - "step": 7097 - }, - { - "epoch": 0.4638912489379779, - "grad_norm": 0.4506942331790924, - "learning_rate": 9.541636437914228e-06, - "loss": 0.3733, - "step": 7098 - }, - { - "epoch": 0.46395660414352, - "grad_norm": 0.5158799886703491, - "learning_rate": 9.541490374580166e-06, - "loss": 0.4275, - "step": 7099 - }, - { - "epoch": 0.46402195934906215, - "grad_norm": 0.48787835240364075, - "learning_rate": 9.541344289095686e-06, - "loss": 0.4212, - "step": 7100 - }, - { - "epoch": 0.4640873145546043, - "grad_norm": 0.43957990407943726, - "learning_rate": 9.541198181461493e-06, - "loss": 0.405, - "step": 7101 - }, - { - "epoch": 0.4641526697601464, - "grad_norm": 0.48659685254096985, - "learning_rate": 9.541052051678306e-06, - "loss": 0.4616, - "step": 7102 - }, - { - "epoch": 0.46421802496568854, - "grad_norm": 0.5065358877182007, - "learning_rate": 9.540905899746832e-06, - "loss": 0.4145, - "step": 7103 - }, - { - "epoch": 0.46428338017123066, - "grad_norm": 0.45447084307670593, - "learning_rate": 9.54075972566779e-06, - "loss": 0.3754, - "step": 7104 - }, - { - "epoch": 0.46434873537677274, - "grad_norm": 0.46564382314682007, - "learning_rate": 9.54061352944189e-06, - "loss": 0.3861, - "step": 7105 - }, - { - "epoch": 0.46441409058231486, - "grad_norm": 0.47878503799438477, - "learning_rate": 9.54046731106984e-06, - "loss": 0.3877, - "step": 7106 - }, - { - "epoch": 0.464479445787857, - "grad_norm": 0.4690788984298706, - "learning_rate": 9.540321070552362e-06, - "loss": 0.404, - "step": 7107 - }, - { - "epoch": 0.4645448009933991, - "grad_norm": 0.4505467712879181, - "learning_rate": 9.540174807890165e-06, - "loss": 0.3374, - "step": 7108 - }, - { - "epoch": 0.46461015619894125, - "grad_norm": 0.5293868184089661, - "learning_rate": 9.540028523083962e-06, - "loss": 0.3617, - "step": 7109 - }, - { - "epoch": 0.4646755114044834, - "grad_norm": 0.4439355432987213, - "learning_rate": 9.539882216134467e-06, - "loss": 0.3704, - "step": 7110 - }, - { - "epoch": 0.4647408666100255, - "grad_norm": 0.47333186864852905, - "learning_rate": 9.539735887042395e-06, - "loss": 0.398, - "step": 7111 - }, - { - "epoch": 0.46480622181556763, - "grad_norm": 0.4669288396835327, - "learning_rate": 9.539589535808456e-06, - "loss": 0.4264, - "step": 7112 - }, - { - "epoch": 0.4648715770211097, - "grad_norm": 0.4460848271846771, - "learning_rate": 9.539443162433367e-06, - "loss": 0.385, - "step": 7113 - }, - { - "epoch": 0.46493693222665183, - "grad_norm": 0.4974602460861206, - "learning_rate": 9.539296766917841e-06, - "loss": 0.4145, - "step": 7114 - }, - { - "epoch": 0.46500228743219396, - "grad_norm": 0.4577859044075012, - "learning_rate": 9.539150349262592e-06, - "loss": 0.4027, - "step": 7115 - }, - { - "epoch": 0.4650676426377361, - "grad_norm": 0.48755016922950745, - "learning_rate": 9.539003909468335e-06, - "loss": 0.4269, - "step": 7116 - }, - { - "epoch": 0.4651329978432782, - "grad_norm": 0.4554654657840729, - "learning_rate": 9.538857447535784e-06, - "loss": 0.3876, - "step": 7117 - }, - { - "epoch": 0.46519835304882035, - "grad_norm": 0.49087807536125183, - "learning_rate": 9.538710963465652e-06, - "loss": 0.4259, - "step": 7118 - }, - { - "epoch": 0.4652637082543625, - "grad_norm": 0.4510446786880493, - "learning_rate": 9.538564457258653e-06, - "loss": 0.3821, - "step": 7119 - }, - { - "epoch": 0.4653290634599046, - "grad_norm": 0.5753329992294312, - "learning_rate": 9.538417928915504e-06, - "loss": 0.4437, - "step": 7120 - }, - { - "epoch": 0.46539441866544673, - "grad_norm": 0.4266151487827301, - "learning_rate": 9.538271378436918e-06, - "loss": 0.3637, - "step": 7121 - }, - { - "epoch": 0.4654597738709888, - "grad_norm": 0.44344064593315125, - "learning_rate": 9.538124805823612e-06, - "loss": 0.3848, - "step": 7122 - }, - { - "epoch": 0.46552512907653093, - "grad_norm": 0.46759217977523804, - "learning_rate": 9.537978211076298e-06, - "loss": 0.4182, - "step": 7123 - }, - { - "epoch": 0.46559048428207306, - "grad_norm": 0.44066500663757324, - "learning_rate": 9.537831594195693e-06, - "loss": 0.3965, - "step": 7124 - }, - { - "epoch": 0.4656558394876152, - "grad_norm": 0.48578962683677673, - "learning_rate": 9.537684955182508e-06, - "loss": 0.3876, - "step": 7125 - }, - { - "epoch": 0.4657211946931573, - "grad_norm": 0.47319337725639343, - "learning_rate": 9.537538294037464e-06, - "loss": 0.404, - "step": 7126 - }, - { - "epoch": 0.46578654989869944, - "grad_norm": 0.4367903470993042, - "learning_rate": 9.537391610761275e-06, - "loss": 0.3791, - "step": 7127 - }, - { - "epoch": 0.46585190510424157, - "grad_norm": 0.4693010151386261, - "learning_rate": 9.537244905354655e-06, - "loss": 0.4292, - "step": 7128 - }, - { - "epoch": 0.4659172603097837, - "grad_norm": 0.44172173738479614, - "learning_rate": 9.537098177818318e-06, - "loss": 0.3718, - "step": 7129 - }, - { - "epoch": 0.46598261551532577, - "grad_norm": 0.47351235151290894, - "learning_rate": 9.53695142815298e-06, - "loss": 0.4046, - "step": 7130 - }, - { - "epoch": 0.4660479707208679, - "grad_norm": 0.4396744966506958, - "learning_rate": 9.536804656359362e-06, - "loss": 0.3659, - "step": 7131 - }, - { - "epoch": 0.46611332592641, - "grad_norm": 0.44549560546875, - "learning_rate": 9.536657862438173e-06, - "loss": 0.3389, - "step": 7132 - }, - { - "epoch": 0.46617868113195216, - "grad_norm": 0.4338420033454895, - "learning_rate": 9.536511046390134e-06, - "loss": 0.3673, - "step": 7133 - }, - { - "epoch": 0.4662440363374943, - "grad_norm": 0.46013087034225464, - "learning_rate": 9.536364208215957e-06, - "loss": 0.3877, - "step": 7134 - }, - { - "epoch": 0.4663093915430364, - "grad_norm": 0.47175872325897217, - "learning_rate": 9.536217347916362e-06, - "loss": 0.4194, - "step": 7135 - }, - { - "epoch": 0.46637474674857854, - "grad_norm": 0.4527713358402252, - "learning_rate": 9.536070465492062e-06, - "loss": 0.3947, - "step": 7136 - }, - { - "epoch": 0.46644010195412067, - "grad_norm": 0.44613954424858093, - "learning_rate": 9.535923560943776e-06, - "loss": 0.3834, - "step": 7137 - }, - { - "epoch": 0.46650545715966274, - "grad_norm": 0.4772361218929291, - "learning_rate": 9.53577663427222e-06, - "loss": 0.3975, - "step": 7138 - }, - { - "epoch": 0.46657081236520487, - "grad_norm": 0.5101822018623352, - "learning_rate": 9.53562968547811e-06, - "loss": 0.4177, - "step": 7139 - }, - { - "epoch": 0.466636167570747, - "grad_norm": 0.4716101586818695, - "learning_rate": 9.53548271456216e-06, - "loss": 0.3683, - "step": 7140 - }, - { - "epoch": 0.4667015227762891, - "grad_norm": 0.4602915346622467, - "learning_rate": 9.535335721525091e-06, - "loss": 0.38, - "step": 7141 - }, - { - "epoch": 0.46676687798183125, - "grad_norm": 0.5293527841567993, - "learning_rate": 9.53518870636762e-06, - "loss": 0.4298, - "step": 7142 - }, - { - "epoch": 0.4668322331873734, - "grad_norm": 0.4298951029777527, - "learning_rate": 9.535041669090461e-06, - "loss": 0.3779, - "step": 7143 - }, - { - "epoch": 0.4668975883929155, - "grad_norm": 0.47229844331741333, - "learning_rate": 9.534894609694333e-06, - "loss": 0.4069, - "step": 7144 - }, - { - "epoch": 0.46696294359845764, - "grad_norm": 0.4897076189517975, - "learning_rate": 9.534747528179953e-06, - "loss": 0.4297, - "step": 7145 - }, - { - "epoch": 0.46702829880399976, - "grad_norm": 0.4641999900341034, - "learning_rate": 9.53460042454804e-06, - "loss": 0.3812, - "step": 7146 - }, - { - "epoch": 0.46709365400954184, - "grad_norm": 0.45778489112854004, - "learning_rate": 9.534453298799307e-06, - "loss": 0.387, - "step": 7147 - }, - { - "epoch": 0.46715900921508396, - "grad_norm": 0.45132094621658325, - "learning_rate": 9.534306150934476e-06, - "loss": 0.4103, - "step": 7148 - }, - { - "epoch": 0.4672243644206261, - "grad_norm": 0.5260260701179504, - "learning_rate": 9.534158980954263e-06, - "loss": 0.403, - "step": 7149 - }, - { - "epoch": 0.4672897196261682, - "grad_norm": 0.46466195583343506, - "learning_rate": 9.534011788859386e-06, - "loss": 0.3899, - "step": 7150 - }, - { - "epoch": 0.46735507483171035, - "grad_norm": 0.4723871648311615, - "learning_rate": 9.53386457465056e-06, - "loss": 0.4237, - "step": 7151 - }, - { - "epoch": 0.4674204300372525, - "grad_norm": 0.42926061153411865, - "learning_rate": 9.533717338328508e-06, - "loss": 0.3933, - "step": 7152 - }, - { - "epoch": 0.4674857852427946, - "grad_norm": 0.4415152370929718, - "learning_rate": 9.533570079893946e-06, - "loss": 0.3496, - "step": 7153 - }, - { - "epoch": 0.46755114044833673, - "grad_norm": 0.5533658862113953, - "learning_rate": 9.533422799347594e-06, - "loss": 0.3923, - "step": 7154 - }, - { - "epoch": 0.4676164956538788, - "grad_norm": 0.4387933611869812, - "learning_rate": 9.533275496690165e-06, - "loss": 0.3481, - "step": 7155 - }, - { - "epoch": 0.46768185085942093, - "grad_norm": 0.4454059302806854, - "learning_rate": 9.533128171922384e-06, - "loss": 0.378, - "step": 7156 - }, - { - "epoch": 0.46774720606496306, - "grad_norm": 0.45458248257637024, - "learning_rate": 9.532980825044963e-06, - "loss": 0.4273, - "step": 7157 - }, - { - "epoch": 0.4678125612705052, - "grad_norm": 0.4645242393016815, - "learning_rate": 9.532833456058627e-06, - "loss": 0.4408, - "step": 7158 - }, - { - "epoch": 0.4678779164760473, - "grad_norm": 0.5080562829971313, - "learning_rate": 9.532686064964093e-06, - "loss": 0.4961, - "step": 7159 - }, - { - "epoch": 0.46794327168158945, - "grad_norm": 0.45649388432502747, - "learning_rate": 9.532538651762076e-06, - "loss": 0.3736, - "step": 7160 - }, - { - "epoch": 0.4680086268871316, - "grad_norm": 0.4337565302848816, - "learning_rate": 9.532391216453299e-06, - "loss": 0.3504, - "step": 7161 - }, - { - "epoch": 0.4680739820926737, - "grad_norm": 0.4585168659687042, - "learning_rate": 9.532243759038478e-06, - "loss": 0.3586, - "step": 7162 - }, - { - "epoch": 0.46813933729821583, - "grad_norm": 0.4669223725795746, - "learning_rate": 9.532096279518335e-06, - "loss": 0.4139, - "step": 7163 - }, - { - "epoch": 0.4682046925037579, - "grad_norm": 0.4383951723575592, - "learning_rate": 9.531948777893589e-06, - "loss": 0.3546, - "step": 7164 - }, - { - "epoch": 0.46827004770930003, - "grad_norm": 0.4296281635761261, - "learning_rate": 9.531801254164958e-06, - "loss": 0.3568, - "step": 7165 - }, - { - "epoch": 0.46833540291484216, - "grad_norm": 0.4933410882949829, - "learning_rate": 9.531653708333164e-06, - "loss": 0.4256, - "step": 7166 - }, - { - "epoch": 0.4684007581203843, - "grad_norm": 0.441978394985199, - "learning_rate": 9.531506140398925e-06, - "loss": 0.3638, - "step": 7167 - }, - { - "epoch": 0.4684661133259264, - "grad_norm": 0.4891020655632019, - "learning_rate": 9.53135855036296e-06, - "loss": 0.4353, - "step": 7168 - }, - { - "epoch": 0.46853146853146854, - "grad_norm": 0.44048675894737244, - "learning_rate": 9.531210938225988e-06, - "loss": 0.3646, - "step": 7169 - }, - { - "epoch": 0.46859682373701067, - "grad_norm": 0.4505417048931122, - "learning_rate": 9.531063303988732e-06, - "loss": 0.3837, - "step": 7170 - }, - { - "epoch": 0.4686621789425528, - "grad_norm": 0.42214491963386536, - "learning_rate": 9.53091564765191e-06, - "loss": 0.3387, - "step": 7171 - }, - { - "epoch": 0.46872753414809487, - "grad_norm": 0.456562876701355, - "learning_rate": 9.530767969216244e-06, - "loss": 0.3912, - "step": 7172 - }, - { - "epoch": 0.468792889353637, - "grad_norm": 0.43945470452308655, - "learning_rate": 9.53062026868245e-06, - "loss": 0.3756, - "step": 7173 - }, - { - "epoch": 0.4688582445591791, - "grad_norm": 0.44226452708244324, - "learning_rate": 9.530472546051255e-06, - "loss": 0.3674, - "step": 7174 - }, - { - "epoch": 0.46892359976472126, - "grad_norm": 0.4294166564941406, - "learning_rate": 9.530324801323375e-06, - "loss": 0.3487, - "step": 7175 - }, - { - "epoch": 0.4689889549702634, - "grad_norm": 0.4668973982334137, - "learning_rate": 9.53017703449953e-06, - "loss": 0.3798, - "step": 7176 - }, - { - "epoch": 0.4690543101758055, - "grad_norm": 0.5044274926185608, - "learning_rate": 9.530029245580442e-06, - "loss": 0.4084, - "step": 7177 - }, - { - "epoch": 0.46911966538134764, - "grad_norm": 0.4264613687992096, - "learning_rate": 9.529881434566833e-06, - "loss": 0.3728, - "step": 7178 - }, - { - "epoch": 0.46918502058688977, - "grad_norm": 0.4415418803691864, - "learning_rate": 9.529733601459424e-06, - "loss": 0.3912, - "step": 7179 - }, - { - "epoch": 0.46925037579243184, - "grad_norm": 0.4888932406902313, - "learning_rate": 9.529585746258934e-06, - "loss": 0.4292, - "step": 7180 - }, - { - "epoch": 0.46931573099797397, - "grad_norm": 0.4589439332485199, - "learning_rate": 9.529437868966085e-06, - "loss": 0.4056, - "step": 7181 - }, - { - "epoch": 0.4693810862035161, - "grad_norm": 0.47460922598838806, - "learning_rate": 9.529289969581596e-06, - "loss": 0.4395, - "step": 7182 - }, - { - "epoch": 0.4694464414090582, - "grad_norm": 0.43631982803344727, - "learning_rate": 9.529142048106194e-06, - "loss": 0.3347, - "step": 7183 - }, - { - "epoch": 0.46951179661460035, - "grad_norm": 0.44571515917778015, - "learning_rate": 9.528994104540596e-06, - "loss": 0.379, - "step": 7184 - }, - { - "epoch": 0.4695771518201425, - "grad_norm": 0.5230749249458313, - "learning_rate": 9.528846138885526e-06, - "loss": 0.493, - "step": 7185 - }, - { - "epoch": 0.4696425070256846, - "grad_norm": 0.5007441639900208, - "learning_rate": 9.528698151141702e-06, - "loss": 0.4655, - "step": 7186 - }, - { - "epoch": 0.46970786223122674, - "grad_norm": 0.4592949450016022, - "learning_rate": 9.52855014130985e-06, - "loss": 0.4223, - "step": 7187 - }, - { - "epoch": 0.46977321743676886, - "grad_norm": 0.40913125872612, - "learning_rate": 9.52840210939069e-06, - "loss": 0.32, - "step": 7188 - }, - { - "epoch": 0.46983857264231094, - "grad_norm": 0.46526312828063965, - "learning_rate": 9.528254055384944e-06, - "loss": 0.3993, - "step": 7189 - }, - { - "epoch": 0.46990392784785306, - "grad_norm": 0.4625626802444458, - "learning_rate": 9.528105979293334e-06, - "loss": 0.4317, - "step": 7190 - }, - { - "epoch": 0.4699692830533952, - "grad_norm": 0.46927690505981445, - "learning_rate": 9.527957881116582e-06, - "loss": 0.4157, - "step": 7191 - }, - { - "epoch": 0.4700346382589373, - "grad_norm": 0.4961572289466858, - "learning_rate": 9.527809760855412e-06, - "loss": 0.4453, - "step": 7192 - }, - { - "epoch": 0.47009999346447945, - "grad_norm": 0.43030011653900146, - "learning_rate": 9.527661618510545e-06, - "loss": 0.3681, - "step": 7193 - }, - { - "epoch": 0.4701653486700216, - "grad_norm": 0.461275190114975, - "learning_rate": 9.527513454082705e-06, - "loss": 0.3916, - "step": 7194 - }, - { - "epoch": 0.4702307038755637, - "grad_norm": 0.48548248410224915, - "learning_rate": 9.52736526757261e-06, - "loss": 0.4444, - "step": 7195 - }, - { - "epoch": 0.47029605908110583, - "grad_norm": 0.4657756984233856, - "learning_rate": 9.527217058980989e-06, - "loss": 0.4206, - "step": 7196 - }, - { - "epoch": 0.4703614142866479, - "grad_norm": 0.48155540227890015, - "learning_rate": 9.52706882830856e-06, - "loss": 0.421, - "step": 7197 - }, - { - "epoch": 0.47042676949219003, - "grad_norm": 0.4589233696460724, - "learning_rate": 9.526920575556048e-06, - "loss": 0.362, - "step": 7198 - }, - { - "epoch": 0.47049212469773216, - "grad_norm": 0.4520863890647888, - "learning_rate": 9.52677230072418e-06, - "loss": 0.4167, - "step": 7199 - }, - { - "epoch": 0.4705574799032743, - "grad_norm": 0.46362555027008057, - "learning_rate": 9.526624003813671e-06, - "loss": 0.3834, - "step": 7200 - }, - { - "epoch": 0.4706228351088164, - "grad_norm": 0.4626002907752991, - "learning_rate": 9.52647568482525e-06, - "loss": 0.4045, - "step": 7201 - }, - { - "epoch": 0.47068819031435855, - "grad_norm": 0.4694306254386902, - "learning_rate": 9.526327343759639e-06, - "loss": 0.4315, - "step": 7202 - }, - { - "epoch": 0.4707535455199007, - "grad_norm": 0.49735140800476074, - "learning_rate": 9.52617898061756e-06, - "loss": 0.4278, - "step": 7203 - }, - { - "epoch": 0.4708189007254428, - "grad_norm": 0.5209844708442688, - "learning_rate": 9.52603059539974e-06, - "loss": 0.484, - "step": 7204 - }, - { - "epoch": 0.47088425593098493, - "grad_norm": 0.4144379794597626, - "learning_rate": 9.525882188106899e-06, - "loss": 0.3293, - "step": 7205 - }, - { - "epoch": 0.470949611136527, - "grad_norm": 0.4277053475379944, - "learning_rate": 9.525733758739766e-06, - "loss": 0.3742, - "step": 7206 - }, - { - "epoch": 0.47101496634206913, - "grad_norm": 0.42117348313331604, - "learning_rate": 9.525585307299059e-06, - "loss": 0.3091, - "step": 7207 - }, - { - "epoch": 0.47108032154761126, - "grad_norm": 0.4527733325958252, - "learning_rate": 9.525436833785505e-06, - "loss": 0.3608, - "step": 7208 - }, - { - "epoch": 0.4711456767531534, - "grad_norm": 0.45344555377960205, - "learning_rate": 9.525288338199828e-06, - "loss": 0.3909, - "step": 7209 - }, - { - "epoch": 0.4712110319586955, - "grad_norm": 0.4333653151988983, - "learning_rate": 9.52513982054275e-06, - "loss": 0.3615, - "step": 7210 - }, - { - "epoch": 0.47127638716423764, - "grad_norm": 0.43686145544052124, - "learning_rate": 9.524991280815e-06, - "loss": 0.3294, - "step": 7211 - }, - { - "epoch": 0.47134174236977977, - "grad_norm": 0.46286964416503906, - "learning_rate": 9.5248427190173e-06, - "loss": 0.3977, - "step": 7212 - }, - { - "epoch": 0.4714070975753219, - "grad_norm": 0.5068244934082031, - "learning_rate": 9.524694135150374e-06, - "loss": 0.3957, - "step": 7213 - }, - { - "epoch": 0.47147245278086397, - "grad_norm": 0.5215017199516296, - "learning_rate": 9.524545529214945e-06, - "loss": 0.4687, - "step": 7214 - }, - { - "epoch": 0.4715378079864061, - "grad_norm": 0.4662645161151886, - "learning_rate": 9.524396901211743e-06, - "loss": 0.4009, - "step": 7215 - }, - { - "epoch": 0.4716031631919482, - "grad_norm": 0.4635929763317108, - "learning_rate": 9.524248251141488e-06, - "loss": 0.4474, - "step": 7216 - }, - { - "epoch": 0.47166851839749036, - "grad_norm": 0.45604032278060913, - "learning_rate": 9.524099579004909e-06, - "loss": 0.3905, - "step": 7217 - }, - { - "epoch": 0.4717338736030325, - "grad_norm": 0.4709457755088806, - "learning_rate": 9.523950884802728e-06, - "loss": 0.3947, - "step": 7218 - }, - { - "epoch": 0.4717992288085746, - "grad_norm": 0.4669475853443146, - "learning_rate": 9.523802168535673e-06, - "loss": 0.4133, - "step": 7219 - }, - { - "epoch": 0.47186458401411674, - "grad_norm": 0.4406854510307312, - "learning_rate": 9.523653430204464e-06, - "loss": 0.397, - "step": 7220 - }, - { - "epoch": 0.47192993921965887, - "grad_norm": 0.4447558522224426, - "learning_rate": 9.523504669809832e-06, - "loss": 0.361, - "step": 7221 - }, - { - "epoch": 0.47199529442520094, - "grad_norm": 0.43887776136398315, - "learning_rate": 9.523355887352501e-06, - "loss": 0.3803, - "step": 7222 - }, - { - "epoch": 0.47206064963074307, - "grad_norm": 0.427418053150177, - "learning_rate": 9.523207082833195e-06, - "loss": 0.3869, - "step": 7223 - }, - { - "epoch": 0.4721260048362852, - "grad_norm": 0.48189249634742737, - "learning_rate": 9.523058256252643e-06, - "loss": 0.4352, - "step": 7224 - }, - { - "epoch": 0.4721913600418273, - "grad_norm": 0.4351761043071747, - "learning_rate": 9.522909407611566e-06, - "loss": 0.3769, - "step": 7225 - }, - { - "epoch": 0.47225671524736945, - "grad_norm": 0.44468432664871216, - "learning_rate": 9.522760536910696e-06, - "loss": 0.3805, - "step": 7226 - }, - { - "epoch": 0.4723220704529116, - "grad_norm": 0.48574814200401306, - "learning_rate": 9.522611644150754e-06, - "loss": 0.4481, - "step": 7227 - }, - { - "epoch": 0.4723874256584537, - "grad_norm": 0.4702390730381012, - "learning_rate": 9.52246272933247e-06, - "loss": 0.3978, - "step": 7228 - }, - { - "epoch": 0.47245278086399584, - "grad_norm": 0.4346328377723694, - "learning_rate": 9.522313792456567e-06, - "loss": 0.3601, - "step": 7229 - }, - { - "epoch": 0.47251813606953796, - "grad_norm": 0.454008013010025, - "learning_rate": 9.522164833523775e-06, - "loss": 0.394, - "step": 7230 - }, - { - "epoch": 0.47258349127508004, - "grad_norm": 0.42442142963409424, - "learning_rate": 9.522015852534817e-06, - "loss": 0.303, - "step": 7231 - }, - { - "epoch": 0.47264884648062216, - "grad_norm": 0.4440680146217346, - "learning_rate": 9.52186684949042e-06, - "loss": 0.4029, - "step": 7232 - }, - { - "epoch": 0.4727142016861643, - "grad_norm": 0.42702242732048035, - "learning_rate": 9.521717824391312e-06, - "loss": 0.3779, - "step": 7233 - }, - { - "epoch": 0.4727795568917064, - "grad_norm": 0.4719691872596741, - "learning_rate": 9.521568777238221e-06, - "loss": 0.4281, - "step": 7234 - }, - { - "epoch": 0.47284491209724855, - "grad_norm": 0.47126439213752747, - "learning_rate": 9.521419708031873e-06, - "loss": 0.4204, - "step": 7235 - }, - { - "epoch": 0.4729102673027907, - "grad_norm": 0.43783560395240784, - "learning_rate": 9.521270616772993e-06, - "loss": 0.3696, - "step": 7236 - }, - { - "epoch": 0.4729756225083328, - "grad_norm": 0.43278399109840393, - "learning_rate": 9.521121503462311e-06, - "loss": 0.3602, - "step": 7237 - }, - { - "epoch": 0.47304097771387493, - "grad_norm": 0.47101151943206787, - "learning_rate": 9.520972368100554e-06, - "loss": 0.4192, - "step": 7238 - }, - { - "epoch": 0.473106332919417, - "grad_norm": 0.5181102752685547, - "learning_rate": 9.520823210688446e-06, - "loss": 0.4488, - "step": 7239 - }, - { - "epoch": 0.47317168812495913, - "grad_norm": 0.4515284299850464, - "learning_rate": 9.520674031226719e-06, - "loss": 0.388, - "step": 7240 - }, - { - "epoch": 0.47323704333050126, - "grad_norm": 0.4675734043121338, - "learning_rate": 9.5205248297161e-06, - "loss": 0.4155, - "step": 7241 - }, - { - "epoch": 0.4733023985360434, - "grad_norm": 0.46302253007888794, - "learning_rate": 9.520375606157312e-06, - "loss": 0.4102, - "step": 7242 - }, - { - "epoch": 0.4733677537415855, - "grad_norm": 0.4747518301010132, - "learning_rate": 9.520226360551089e-06, - "loss": 0.4373, - "step": 7243 - }, - { - "epoch": 0.47343310894712765, - "grad_norm": 0.4323144555091858, - "learning_rate": 9.520077092898155e-06, - "loss": 0.3076, - "step": 7244 - }, - { - "epoch": 0.4734984641526698, - "grad_norm": 0.44021075963974, - "learning_rate": 9.519927803199239e-06, - "loss": 0.334, - "step": 7245 - }, - { - "epoch": 0.4735638193582119, - "grad_norm": 0.4736710786819458, - "learning_rate": 9.519778491455068e-06, - "loss": 0.4281, - "step": 7246 - }, - { - "epoch": 0.47362917456375403, - "grad_norm": 0.48081645369529724, - "learning_rate": 9.519629157666373e-06, - "loss": 0.3862, - "step": 7247 - }, - { - "epoch": 0.4736945297692961, - "grad_norm": 0.4680396020412445, - "learning_rate": 9.51947980183388e-06, - "loss": 0.3978, - "step": 7248 - }, - { - "epoch": 0.47375988497483823, - "grad_norm": 0.4743956923484802, - "learning_rate": 9.519330423958319e-06, - "loss": 0.4567, - "step": 7249 - }, - { - "epoch": 0.47382524018038036, - "grad_norm": 0.46481868624687195, - "learning_rate": 9.519181024040418e-06, - "loss": 0.3635, - "step": 7250 - }, - { - "epoch": 0.4738905953859225, - "grad_norm": 0.4730256199836731, - "learning_rate": 9.519031602080905e-06, - "loss": 0.4023, - "step": 7251 - }, - { - "epoch": 0.4739559505914646, - "grad_norm": 0.46956583857536316, - "learning_rate": 9.51888215808051e-06, - "loss": 0.4263, - "step": 7252 - }, - { - "epoch": 0.47402130579700674, - "grad_norm": 0.4671914875507355, - "learning_rate": 9.518732692039962e-06, - "loss": 0.4106, - "step": 7253 - }, - { - "epoch": 0.47408666100254887, - "grad_norm": 0.4920426309108734, - "learning_rate": 9.518583203959988e-06, - "loss": 0.3802, - "step": 7254 - }, - { - "epoch": 0.474152016208091, - "grad_norm": 0.45804592967033386, - "learning_rate": 9.518433693841318e-06, - "loss": 0.405, - "step": 7255 - }, - { - "epoch": 0.47421737141363307, - "grad_norm": 0.4789954125881195, - "learning_rate": 9.518284161684681e-06, - "loss": 0.4155, - "step": 7256 - }, - { - "epoch": 0.4742827266191752, - "grad_norm": 0.4323576092720032, - "learning_rate": 9.518134607490808e-06, - "loss": 0.3587, - "step": 7257 - }, - { - "epoch": 0.4743480818247173, - "grad_norm": 0.4509740173816681, - "learning_rate": 9.517985031260429e-06, - "loss": 0.3557, - "step": 7258 - }, - { - "epoch": 0.47441343703025945, - "grad_norm": 0.4612359404563904, - "learning_rate": 9.517835432994269e-06, - "loss": 0.4052, - "step": 7259 - }, - { - "epoch": 0.4744787922358016, - "grad_norm": 0.4386858344078064, - "learning_rate": 9.51768581269306e-06, - "loss": 0.3728, - "step": 7260 - }, - { - "epoch": 0.4745441474413437, - "grad_norm": 0.47079646587371826, - "learning_rate": 9.517536170357535e-06, - "loss": 0.4406, - "step": 7261 - }, - { - "epoch": 0.47460950264688584, - "grad_norm": 0.46120864152908325, - "learning_rate": 9.517386505988419e-06, - "loss": 0.3856, - "step": 7262 - }, - { - "epoch": 0.47467485785242797, - "grad_norm": 0.4666607975959778, - "learning_rate": 9.517236819586445e-06, - "loss": 0.4161, - "step": 7263 - }, - { - "epoch": 0.47474021305797004, - "grad_norm": 0.4508492946624756, - "learning_rate": 9.51708711115234e-06, - "loss": 0.3842, - "step": 7264 - }, - { - "epoch": 0.47480556826351217, - "grad_norm": 0.4076070189476013, - "learning_rate": 9.516937380686839e-06, - "loss": 0.3313, - "step": 7265 - }, - { - "epoch": 0.4748709234690543, - "grad_norm": 0.4719148874282837, - "learning_rate": 9.516787628190668e-06, - "loss": 0.4112, - "step": 7266 - }, - { - "epoch": 0.4749362786745964, - "grad_norm": 0.41744285821914673, - "learning_rate": 9.51663785366456e-06, - "loss": 0.3318, - "step": 7267 - }, - { - "epoch": 0.47500163388013855, - "grad_norm": 0.44334912300109863, - "learning_rate": 9.516488057109245e-06, - "loss": 0.3724, - "step": 7268 - }, - { - "epoch": 0.4750669890856807, - "grad_norm": 0.4568454921245575, - "learning_rate": 9.516338238525453e-06, - "loss": 0.3515, - "step": 7269 - }, - { - "epoch": 0.4751323442912228, - "grad_norm": 0.4562966227531433, - "learning_rate": 9.516188397913913e-06, - "loss": 0.3878, - "step": 7270 - }, - { - "epoch": 0.47519769949676494, - "grad_norm": 0.4447886049747467, - "learning_rate": 9.51603853527536e-06, - "loss": 0.4111, - "step": 7271 - }, - { - "epoch": 0.47526305470230706, - "grad_norm": 0.47418299317359924, - "learning_rate": 9.51588865061052e-06, - "loss": 0.4049, - "step": 7272 - }, - { - "epoch": 0.47532840990784914, - "grad_norm": 0.44725048542022705, - "learning_rate": 9.515738743920128e-06, - "loss": 0.3943, - "step": 7273 - }, - { - "epoch": 0.47539376511339126, - "grad_norm": 0.4517868161201477, - "learning_rate": 9.515588815204913e-06, - "loss": 0.4052, - "step": 7274 - }, - { - "epoch": 0.4754591203189334, - "grad_norm": 0.4453171491622925, - "learning_rate": 9.515438864465608e-06, - "loss": 0.3633, - "step": 7275 - }, - { - "epoch": 0.4755244755244755, - "grad_norm": 0.48039841651916504, - "learning_rate": 9.515288891702943e-06, - "loss": 0.441, - "step": 7276 - }, - { - "epoch": 0.47558983073001765, - "grad_norm": 0.43807274103164673, - "learning_rate": 9.51513889691765e-06, - "loss": 0.3898, - "step": 7277 - }, - { - "epoch": 0.4756551859355598, - "grad_norm": 0.4547906816005707, - "learning_rate": 9.514988880110461e-06, - "loss": 0.4063, - "step": 7278 - }, - { - "epoch": 0.4757205411411019, - "grad_norm": 0.48315906524658203, - "learning_rate": 9.514838841282107e-06, - "loss": 0.4325, - "step": 7279 - }, - { - "epoch": 0.47578589634664403, - "grad_norm": 0.46735313534736633, - "learning_rate": 9.514688780433316e-06, - "loss": 0.4105, - "step": 7280 - }, - { - "epoch": 0.4758512515521861, - "grad_norm": 0.4949561059474945, - "learning_rate": 9.514538697564828e-06, - "loss": 0.4434, - "step": 7281 - }, - { - "epoch": 0.47591660675772823, - "grad_norm": 0.44944673776626587, - "learning_rate": 9.51438859267737e-06, - "loss": 0.3767, - "step": 7282 - }, - { - "epoch": 0.47598196196327036, - "grad_norm": 0.46788227558135986, - "learning_rate": 9.514238465771675e-06, - "loss": 0.4028, - "step": 7283 - }, - { - "epoch": 0.4760473171688125, - "grad_norm": 0.4889834523200989, - "learning_rate": 9.514088316848474e-06, - "loss": 0.4382, - "step": 7284 - }, - { - "epoch": 0.4761126723743546, - "grad_norm": 0.4904220998287201, - "learning_rate": 9.5139381459085e-06, - "loss": 0.4392, - "step": 7285 - }, - { - "epoch": 0.47617802757989675, - "grad_norm": 0.45664700865745544, - "learning_rate": 9.51378795295249e-06, - "loss": 0.4045, - "step": 7286 - }, - { - "epoch": 0.4762433827854389, - "grad_norm": 0.5146574378013611, - "learning_rate": 9.513637737981168e-06, - "loss": 0.4579, - "step": 7287 - }, - { - "epoch": 0.476308737990981, - "grad_norm": 0.4887774586677551, - "learning_rate": 9.513487500995273e-06, - "loss": 0.4011, - "step": 7288 - }, - { - "epoch": 0.47637409319652313, - "grad_norm": 0.46555158495903015, - "learning_rate": 9.513337241995535e-06, - "loss": 0.3898, - "step": 7289 - }, - { - "epoch": 0.4764394484020652, - "grad_norm": 0.4263874292373657, - "learning_rate": 9.513186960982688e-06, - "loss": 0.3445, - "step": 7290 - }, - { - "epoch": 0.47650480360760733, - "grad_norm": 0.4768146276473999, - "learning_rate": 9.513036657957464e-06, - "loss": 0.4141, - "step": 7291 - }, - { - "epoch": 0.47657015881314946, - "grad_norm": 0.5001056790351868, - "learning_rate": 9.512886332920598e-06, - "loss": 0.4119, - "step": 7292 - }, - { - "epoch": 0.4766355140186916, - "grad_norm": 0.47101029753685, - "learning_rate": 9.51273598587282e-06, - "loss": 0.3835, - "step": 7293 - }, - { - "epoch": 0.4767008692242337, - "grad_norm": 0.4715198874473572, - "learning_rate": 9.512585616814869e-06, - "loss": 0.4164, - "step": 7294 - }, - { - "epoch": 0.47676622442977584, - "grad_norm": 0.4667539596557617, - "learning_rate": 9.512435225747472e-06, - "loss": 0.3791, - "step": 7295 - }, - { - "epoch": 0.47683157963531797, - "grad_norm": 0.4642941355705261, - "learning_rate": 9.512284812671365e-06, - "loss": 0.3675, - "step": 7296 - }, - { - "epoch": 0.4768969348408601, - "grad_norm": 0.4901065230369568, - "learning_rate": 9.512134377587282e-06, - "loss": 0.3923, - "step": 7297 - }, - { - "epoch": 0.47696229004640217, - "grad_norm": 0.4683338701725006, - "learning_rate": 9.511983920495957e-06, - "loss": 0.3923, - "step": 7298 - }, - { - "epoch": 0.4770276452519443, - "grad_norm": 0.43025875091552734, - "learning_rate": 9.511833441398123e-06, - "loss": 0.3393, - "step": 7299 - }, - { - "epoch": 0.4770930004574864, - "grad_norm": 0.5275658369064331, - "learning_rate": 9.511682940294514e-06, - "loss": 0.4693, - "step": 7300 - }, - { - "epoch": 0.47715835566302855, - "grad_norm": 0.5425550937652588, - "learning_rate": 9.511532417185864e-06, - "loss": 0.4165, - "step": 7301 - }, - { - "epoch": 0.4772237108685707, - "grad_norm": 0.4689454138278961, - "learning_rate": 9.51138187207291e-06, - "loss": 0.395, - "step": 7302 - }, - { - "epoch": 0.4772890660741128, - "grad_norm": 0.4754444360733032, - "learning_rate": 9.51123130495638e-06, - "loss": 0.393, - "step": 7303 - }, - { - "epoch": 0.47735442127965494, - "grad_norm": 0.447358638048172, - "learning_rate": 9.511080715837015e-06, - "loss": 0.3535, - "step": 7304 - }, - { - "epoch": 0.47741977648519707, - "grad_norm": 0.4476067125797272, - "learning_rate": 9.510930104715545e-06, - "loss": 0.3602, - "step": 7305 - }, - { - "epoch": 0.47748513169073914, - "grad_norm": 0.48552852869033813, - "learning_rate": 9.510779471592706e-06, - "loss": 0.448, - "step": 7306 - }, - { - "epoch": 0.47755048689628127, - "grad_norm": 0.4522158205509186, - "learning_rate": 9.510628816469234e-06, - "loss": 0.3395, - "step": 7307 - }, - { - "epoch": 0.4776158421018234, - "grad_norm": 0.49904000759124756, - "learning_rate": 9.510478139345862e-06, - "loss": 0.4331, - "step": 7308 - }, - { - "epoch": 0.4776811973073655, - "grad_norm": 0.448441743850708, - "learning_rate": 9.510327440223324e-06, - "loss": 0.3785, - "step": 7309 - }, - { - "epoch": 0.47774655251290765, - "grad_norm": 0.4391007125377655, - "learning_rate": 9.51017671910236e-06, - "loss": 0.3522, - "step": 7310 - }, - { - "epoch": 0.4778119077184498, - "grad_norm": 0.5057657957077026, - "learning_rate": 9.510025975983699e-06, - "loss": 0.4436, - "step": 7311 - }, - { - "epoch": 0.4778772629239919, - "grad_norm": 0.43368351459503174, - "learning_rate": 9.50987521086808e-06, - "loss": 0.36, - "step": 7312 - }, - { - "epoch": 0.47794261812953404, - "grad_norm": 0.4711674451828003, - "learning_rate": 9.509724423756237e-06, - "loss": 0.4364, - "step": 7313 - }, - { - "epoch": 0.47800797333507616, - "grad_norm": 0.4643467664718628, - "learning_rate": 9.509573614648905e-06, - "loss": 0.4411, - "step": 7314 - }, - { - "epoch": 0.47807332854061824, - "grad_norm": 0.4575525224208832, - "learning_rate": 9.50942278354682e-06, - "loss": 0.3946, - "step": 7315 - }, - { - "epoch": 0.47813868374616036, - "grad_norm": 0.4377342462539673, - "learning_rate": 9.509271930450718e-06, - "loss": 0.3586, - "step": 7316 - }, - { - "epoch": 0.4782040389517025, - "grad_norm": 0.4354041814804077, - "learning_rate": 9.509121055361337e-06, - "loss": 0.3639, - "step": 7317 - }, - { - "epoch": 0.4782693941572446, - "grad_norm": 0.5191472172737122, - "learning_rate": 9.508970158279409e-06, - "loss": 0.4548, - "step": 7318 - }, - { - "epoch": 0.47833474936278675, - "grad_norm": 0.4496753215789795, - "learning_rate": 9.508819239205672e-06, - "loss": 0.383, - "step": 7319 - }, - { - "epoch": 0.4784001045683289, - "grad_norm": 0.4612496793270111, - "learning_rate": 9.508668298140859e-06, - "loss": 0.4039, - "step": 7320 - }, - { - "epoch": 0.478465459773871, - "grad_norm": 0.4505055546760559, - "learning_rate": 9.50851733508571e-06, - "loss": 0.3934, - "step": 7321 - }, - { - "epoch": 0.47853081497941313, - "grad_norm": 0.43406152725219727, - "learning_rate": 9.50836635004096e-06, - "loss": 0.3405, - "step": 7322 - }, - { - "epoch": 0.4785961701849552, - "grad_norm": 0.4261730909347534, - "learning_rate": 9.508215343007348e-06, - "loss": 0.3902, - "step": 7323 - }, - { - "epoch": 0.47866152539049733, - "grad_norm": 0.43612366914749146, - "learning_rate": 9.508064313985606e-06, - "loss": 0.3778, - "step": 7324 - }, - { - "epoch": 0.47872688059603946, - "grad_norm": 0.4492633640766144, - "learning_rate": 9.507913262976472e-06, - "loss": 0.3798, - "step": 7325 - }, - { - "epoch": 0.4787922358015816, - "grad_norm": 0.4606937766075134, - "learning_rate": 9.507762189980684e-06, - "loss": 0.3851, - "step": 7326 - }, - { - "epoch": 0.4788575910071237, - "grad_norm": 0.44337230920791626, - "learning_rate": 9.507611094998979e-06, - "loss": 0.3714, - "step": 7327 - }, - { - "epoch": 0.47892294621266585, - "grad_norm": 0.4208022654056549, - "learning_rate": 9.507459978032093e-06, - "loss": 0.3463, - "step": 7328 - }, - { - "epoch": 0.478988301418208, - "grad_norm": 0.4231928288936615, - "learning_rate": 9.507308839080761e-06, - "loss": 0.3846, - "step": 7329 - }, - { - "epoch": 0.4790536566237501, - "grad_norm": 0.46564728021621704, - "learning_rate": 9.507157678145725e-06, - "loss": 0.3758, - "step": 7330 - }, - { - "epoch": 0.47911901182929223, - "grad_norm": 0.45815080404281616, - "learning_rate": 9.507006495227718e-06, - "loss": 0.4189, - "step": 7331 - }, - { - "epoch": 0.4791843670348343, - "grad_norm": 0.4404975473880768, - "learning_rate": 9.506855290327479e-06, - "loss": 0.3809, - "step": 7332 - }, - { - "epoch": 0.47924972224037643, - "grad_norm": 0.4769819378852844, - "learning_rate": 9.506704063445743e-06, - "loss": 0.4282, - "step": 7333 - }, - { - "epoch": 0.47931507744591856, - "grad_norm": 0.4443832039833069, - "learning_rate": 9.506552814583254e-06, - "loss": 0.371, - "step": 7334 - }, - { - "epoch": 0.4793804326514607, - "grad_norm": 0.46927258372306824, - "learning_rate": 9.506401543740746e-06, - "loss": 0.3837, - "step": 7335 - }, - { - "epoch": 0.4794457878570028, - "grad_norm": 0.4402283728122711, - "learning_rate": 9.506250250918954e-06, - "loss": 0.3758, - "step": 7336 - }, - { - "epoch": 0.47951114306254494, - "grad_norm": 0.4432384669780731, - "learning_rate": 9.50609893611862e-06, - "loss": 0.3726, - "step": 7337 - }, - { - "epoch": 0.47957649826808707, - "grad_norm": 0.4765337109565735, - "learning_rate": 9.50594759934048e-06, - "loss": 0.3942, - "step": 7338 - }, - { - "epoch": 0.4796418534736292, - "grad_norm": 0.4251444935798645, - "learning_rate": 9.50579624058527e-06, - "loss": 0.3684, - "step": 7339 - }, - { - "epoch": 0.47970720867917127, - "grad_norm": 0.4497198462486267, - "learning_rate": 9.505644859853734e-06, - "loss": 0.3869, - "step": 7340 - }, - { - "epoch": 0.4797725638847134, - "grad_norm": 0.4948360025882721, - "learning_rate": 9.505493457146605e-06, - "loss": 0.4207, - "step": 7341 - }, - { - "epoch": 0.4798379190902555, - "grad_norm": 0.45769166946411133, - "learning_rate": 9.505342032464627e-06, - "loss": 0.3838, - "step": 7342 - }, - { - "epoch": 0.47990327429579765, - "grad_norm": 0.48405545949935913, - "learning_rate": 9.505190585808533e-06, - "loss": 0.456, - "step": 7343 - }, - { - "epoch": 0.4799686295013398, - "grad_norm": 0.49579980969429016, - "learning_rate": 9.505039117179063e-06, - "loss": 0.4385, - "step": 7344 - }, - { - "epoch": 0.4800339847068819, - "grad_norm": 0.6010561585426331, - "learning_rate": 9.504887626576958e-06, - "loss": 0.4404, - "step": 7345 - }, - { - "epoch": 0.48009933991242404, - "grad_norm": 0.4512021839618683, - "learning_rate": 9.504736114002953e-06, - "loss": 0.3693, - "step": 7346 - }, - { - "epoch": 0.48016469511796617, - "grad_norm": 0.4892708361148834, - "learning_rate": 9.50458457945779e-06, - "loss": 0.4398, - "step": 7347 - }, - { - "epoch": 0.4802300503235083, - "grad_norm": 0.494975209236145, - "learning_rate": 9.50443302294221e-06, - "loss": 0.3536, - "step": 7348 - }, - { - "epoch": 0.48029540552905037, - "grad_norm": 0.4584241509437561, - "learning_rate": 9.504281444456947e-06, - "loss": 0.4125, - "step": 7349 - }, - { - "epoch": 0.4803607607345925, - "grad_norm": 0.4221155345439911, - "learning_rate": 9.504129844002745e-06, - "loss": 0.3738, - "step": 7350 - }, - { - "epoch": 0.4804261159401346, - "grad_norm": 0.48600223660469055, - "learning_rate": 9.503978221580338e-06, - "loss": 0.4373, - "step": 7351 - }, - { - "epoch": 0.48049147114567675, - "grad_norm": 0.41237521171569824, - "learning_rate": 9.503826577190473e-06, - "loss": 0.3448, - "step": 7352 - }, - { - "epoch": 0.4805568263512189, - "grad_norm": 0.46173515915870667, - "learning_rate": 9.503674910833884e-06, - "loss": 0.3958, - "step": 7353 - }, - { - "epoch": 0.480622181556761, - "grad_norm": 0.47616085410118103, - "learning_rate": 9.503523222511311e-06, - "loss": 0.4173, - "step": 7354 - }, - { - "epoch": 0.48068753676230314, - "grad_norm": 0.4457026422023773, - "learning_rate": 9.503371512223497e-06, - "loss": 0.3822, - "step": 7355 - }, - { - "epoch": 0.48075289196784526, - "grad_norm": 0.46840551495552063, - "learning_rate": 9.503219779971178e-06, - "loss": 0.3957, - "step": 7356 - }, - { - "epoch": 0.48081824717338734, - "grad_norm": 0.43921956419944763, - "learning_rate": 9.503068025755099e-06, - "loss": 0.3762, - "step": 7357 - }, - { - "epoch": 0.48088360237892946, - "grad_norm": 0.47099873423576355, - "learning_rate": 9.502916249575993e-06, - "loss": 0.419, - "step": 7358 - }, - { - "epoch": 0.4809489575844716, - "grad_norm": 0.4435424506664276, - "learning_rate": 9.502764451434608e-06, - "loss": 0.3604, - "step": 7359 - }, - { - "epoch": 0.4810143127900137, - "grad_norm": 0.463450163602829, - "learning_rate": 9.50261263133168e-06, - "loss": 0.3797, - "step": 7360 - }, - { - "epoch": 0.48107966799555585, - "grad_norm": 0.5260501503944397, - "learning_rate": 9.50246078926795e-06, - "loss": 0.4759, - "step": 7361 - }, - { - "epoch": 0.481145023201098, - "grad_norm": 0.4505915641784668, - "learning_rate": 9.502308925244159e-06, - "loss": 0.4012, - "step": 7362 - }, - { - "epoch": 0.4812103784066401, - "grad_norm": 0.4749990403652191, - "learning_rate": 9.502157039261047e-06, - "loss": 0.3791, - "step": 7363 - }, - { - "epoch": 0.48127573361218223, - "grad_norm": 0.44820836186408997, - "learning_rate": 9.502005131319357e-06, - "loss": 0.4018, - "step": 7364 - }, - { - "epoch": 0.4813410888177243, - "grad_norm": 0.44692501425743103, - "learning_rate": 9.501853201419826e-06, - "loss": 0.3693, - "step": 7365 - }, - { - "epoch": 0.48140644402326643, - "grad_norm": 0.4645141065120697, - "learning_rate": 9.5017012495632e-06, - "loss": 0.3833, - "step": 7366 - }, - { - "epoch": 0.48147179922880856, - "grad_norm": 0.7762148380279541, - "learning_rate": 9.501549275750215e-06, - "loss": 0.3834, - "step": 7367 - }, - { - "epoch": 0.4815371544343507, - "grad_norm": 0.48271432518959045, - "learning_rate": 9.501397279981616e-06, - "loss": 0.4235, - "step": 7368 - }, - { - "epoch": 0.4816025096398928, - "grad_norm": 0.4508444666862488, - "learning_rate": 9.501245262258142e-06, - "loss": 0.3982, - "step": 7369 - }, - { - "epoch": 0.48166786484543495, - "grad_norm": 0.48299744725227356, - "learning_rate": 9.501093222580537e-06, - "loss": 0.394, - "step": 7370 - }, - { - "epoch": 0.4817332200509771, - "grad_norm": 0.43675726652145386, - "learning_rate": 9.500941160949541e-06, - "loss": 0.3428, - "step": 7371 - }, - { - "epoch": 0.4817985752565192, - "grad_norm": 0.44021663069725037, - "learning_rate": 9.500789077365894e-06, - "loss": 0.4024, - "step": 7372 - }, - { - "epoch": 0.48186393046206133, - "grad_norm": 0.4571131467819214, - "learning_rate": 9.500636971830342e-06, - "loss": 0.3693, - "step": 7373 - }, - { - "epoch": 0.4819292856676034, - "grad_norm": 0.44831281900405884, - "learning_rate": 9.500484844343622e-06, - "loss": 0.3586, - "step": 7374 - }, - { - "epoch": 0.48199464087314553, - "grad_norm": 0.4352133572101593, - "learning_rate": 9.50033269490648e-06, - "loss": 0.3445, - "step": 7375 - }, - { - "epoch": 0.48205999607868766, - "grad_norm": 0.4469340443611145, - "learning_rate": 9.500180523519655e-06, - "loss": 0.3669, - "step": 7376 - }, - { - "epoch": 0.4821253512842298, - "grad_norm": 0.4346008598804474, - "learning_rate": 9.500028330183892e-06, - "loss": 0.369, - "step": 7377 - }, - { - "epoch": 0.4821907064897719, - "grad_norm": 0.4379720091819763, - "learning_rate": 9.49987611489993e-06, - "loss": 0.3932, - "step": 7378 - }, - { - "epoch": 0.48225606169531404, - "grad_norm": 0.43308043479919434, - "learning_rate": 9.499723877668514e-06, - "loss": 0.3591, - "step": 7379 - }, - { - "epoch": 0.48232141690085617, - "grad_norm": 0.4782882630825043, - "learning_rate": 9.499571618490387e-06, - "loss": 0.4213, - "step": 7380 - }, - { - "epoch": 0.4823867721063983, - "grad_norm": 0.47381147742271423, - "learning_rate": 9.499419337366289e-06, - "loss": 0.4241, - "step": 7381 - }, - { - "epoch": 0.48245212731194037, - "grad_norm": 0.44306692481040955, - "learning_rate": 9.499267034296966e-06, - "loss": 0.3411, - "step": 7382 - }, - { - "epoch": 0.4825174825174825, - "grad_norm": 0.44983986020088196, - "learning_rate": 9.499114709283157e-06, - "loss": 0.3958, - "step": 7383 - }, - { - "epoch": 0.4825828377230246, - "grad_norm": 0.4825235605239868, - "learning_rate": 9.498962362325608e-06, - "loss": 0.4092, - "step": 7384 - }, - { - "epoch": 0.48264819292856675, - "grad_norm": 0.42523130774497986, - "learning_rate": 9.49880999342506e-06, - "loss": 0.3681, - "step": 7385 - }, - { - "epoch": 0.4827135481341089, - "grad_norm": 0.4949938654899597, - "learning_rate": 9.498657602582258e-06, - "loss": 0.4341, - "step": 7386 - }, - { - "epoch": 0.482778903339651, - "grad_norm": 0.49087655544281006, - "learning_rate": 9.498505189797945e-06, - "loss": 0.4445, - "step": 7387 - }, - { - "epoch": 0.48284425854519314, - "grad_norm": 0.4897131621837616, - "learning_rate": 9.498352755072861e-06, - "loss": 0.4489, - "step": 7388 - }, - { - "epoch": 0.48290961375073527, - "grad_norm": 0.47331148386001587, - "learning_rate": 9.498200298407754e-06, - "loss": 0.3556, - "step": 7389 - }, - { - "epoch": 0.4829749689562774, - "grad_norm": 0.47523513436317444, - "learning_rate": 9.498047819803367e-06, - "loss": 0.375, - "step": 7390 - }, - { - "epoch": 0.48304032416181947, - "grad_norm": 0.5040974020957947, - "learning_rate": 9.497895319260439e-06, - "loss": 0.4224, - "step": 7391 - }, - { - "epoch": 0.4831056793673616, - "grad_norm": 0.4312072694301605, - "learning_rate": 9.497742796779721e-06, - "loss": 0.3427, - "step": 7392 - }, - { - "epoch": 0.4831710345729037, - "grad_norm": 0.469063401222229, - "learning_rate": 9.497590252361952e-06, - "loss": 0.3891, - "step": 7393 - }, - { - "epoch": 0.48323638977844585, - "grad_norm": 0.4873962104320526, - "learning_rate": 9.497437686007877e-06, - "loss": 0.4093, - "step": 7394 - }, - { - "epoch": 0.483301744983988, - "grad_norm": 0.4571136236190796, - "learning_rate": 9.497285097718241e-06, - "loss": 0.3556, - "step": 7395 - }, - { - "epoch": 0.4833671001895301, - "grad_norm": 0.43147221207618713, - "learning_rate": 9.497132487493785e-06, - "loss": 0.3768, - "step": 7396 - }, - { - "epoch": 0.48343245539507224, - "grad_norm": 0.43853816390037537, - "learning_rate": 9.49697985533526e-06, - "loss": 0.3771, - "step": 7397 - }, - { - "epoch": 0.48349781060061436, - "grad_norm": 0.49010199308395386, - "learning_rate": 9.496827201243404e-06, - "loss": 0.4498, - "step": 7398 - }, - { - "epoch": 0.48356316580615644, - "grad_norm": 0.4329914450645447, - "learning_rate": 9.496674525218963e-06, - "loss": 0.3719, - "step": 7399 - }, - { - "epoch": 0.48362852101169856, - "grad_norm": 0.4928269386291504, - "learning_rate": 9.496521827262684e-06, - "loss": 0.4307, - "step": 7400 - }, - { - "epoch": 0.4836938762172407, - "grad_norm": 0.5182434320449829, - "learning_rate": 9.49636910737531e-06, - "loss": 0.3826, - "step": 7401 - }, - { - "epoch": 0.4837592314227828, - "grad_norm": 0.46465063095092773, - "learning_rate": 9.496216365557586e-06, - "loss": 0.393, - "step": 7402 - }, - { - "epoch": 0.48382458662832495, - "grad_norm": 0.4669272005558014, - "learning_rate": 9.496063601810257e-06, - "loss": 0.3953, - "step": 7403 - }, - { - "epoch": 0.4838899418338671, - "grad_norm": 0.4677024483680725, - "learning_rate": 9.495910816134069e-06, - "loss": 0.3773, - "step": 7404 - }, - { - "epoch": 0.4839552970394092, - "grad_norm": 0.4466164708137512, - "learning_rate": 9.495758008529766e-06, - "loss": 0.376, - "step": 7405 - }, - { - "epoch": 0.48402065224495133, - "grad_norm": 0.48743775486946106, - "learning_rate": 9.495605178998094e-06, - "loss": 0.3743, - "step": 7406 - }, - { - "epoch": 0.4840860074504934, - "grad_norm": 0.4799644351005554, - "learning_rate": 9.495452327539797e-06, - "loss": 0.4082, - "step": 7407 - }, - { - "epoch": 0.48415136265603553, - "grad_norm": 0.4941820800304413, - "learning_rate": 9.495299454155621e-06, - "loss": 0.4203, - "step": 7408 - }, - { - "epoch": 0.48421671786157766, - "grad_norm": 0.5379561185836792, - "learning_rate": 9.495146558846315e-06, - "loss": 0.3769, - "step": 7409 - }, - { - "epoch": 0.4842820730671198, - "grad_norm": 0.4449411630630493, - "learning_rate": 9.49499364161262e-06, - "loss": 0.3813, - "step": 7410 - }, - { - "epoch": 0.4843474282726619, - "grad_norm": 0.46515390276908875, - "learning_rate": 9.494840702455284e-06, - "loss": 0.393, - "step": 7411 - }, - { - "epoch": 0.48441278347820405, - "grad_norm": 0.5021597146987915, - "learning_rate": 9.494687741375054e-06, - "loss": 0.4439, - "step": 7412 - }, - { - "epoch": 0.4844781386837462, - "grad_norm": 0.5706249475479126, - "learning_rate": 9.494534758372674e-06, - "loss": 0.5165, - "step": 7413 - }, - { - "epoch": 0.4845434938892883, - "grad_norm": 0.46963274478912354, - "learning_rate": 9.494381753448891e-06, - "loss": 0.4729, - "step": 7414 - }, - { - "epoch": 0.48460884909483043, - "grad_norm": 0.4755740761756897, - "learning_rate": 9.49422872660445e-06, - "loss": 0.3955, - "step": 7415 - }, - { - "epoch": 0.4846742043003725, - "grad_norm": 0.4645135700702667, - "learning_rate": 9.4940756778401e-06, - "loss": 0.3823, - "step": 7416 - }, - { - "epoch": 0.48473955950591463, - "grad_norm": 0.4323805868625641, - "learning_rate": 9.493922607156585e-06, - "loss": 0.335, - "step": 7417 - }, - { - "epoch": 0.48480491471145676, - "grad_norm": 0.42189720273017883, - "learning_rate": 9.493769514554654e-06, - "loss": 0.3291, - "step": 7418 - }, - { - "epoch": 0.4848702699169989, - "grad_norm": 0.48483920097351074, - "learning_rate": 9.49361640003505e-06, - "loss": 0.4195, - "step": 7419 - }, - { - "epoch": 0.484935625122541, - "grad_norm": 0.4638504087924957, - "learning_rate": 9.493463263598525e-06, - "loss": 0.3907, - "step": 7420 - }, - { - "epoch": 0.48500098032808314, - "grad_norm": 0.5052520036697388, - "learning_rate": 9.49331010524582e-06, - "loss": 0.4968, - "step": 7421 - }, - { - "epoch": 0.48506633553362527, - "grad_norm": 0.4519069194793701, - "learning_rate": 9.493156924977689e-06, - "loss": 0.3484, - "step": 7422 - }, - { - "epoch": 0.4851316907391674, - "grad_norm": 0.4787712097167969, - "learning_rate": 9.493003722794871e-06, - "loss": 0.4144, - "step": 7423 - }, - { - "epoch": 0.48519704594470947, - "grad_norm": 0.45541858673095703, - "learning_rate": 9.492850498698119e-06, - "loss": 0.3542, - "step": 7424 - }, - { - "epoch": 0.4852624011502516, - "grad_norm": 0.4757027328014374, - "learning_rate": 9.492697252688178e-06, - "loss": 0.4123, - "step": 7425 - }, - { - "epoch": 0.4853277563557937, - "grad_norm": 0.5296993255615234, - "learning_rate": 9.492543984765797e-06, - "loss": 0.4714, - "step": 7426 - }, - { - "epoch": 0.48539311156133585, - "grad_norm": 0.43457597494125366, - "learning_rate": 9.492390694931722e-06, - "loss": 0.3275, - "step": 7427 - }, - { - "epoch": 0.485458466766878, - "grad_norm": 0.49319249391555786, - "learning_rate": 9.492237383186702e-06, - "loss": 0.4424, - "step": 7428 - }, - { - "epoch": 0.4855238219724201, - "grad_norm": 0.45508813858032227, - "learning_rate": 9.492084049531483e-06, - "loss": 0.3829, - "step": 7429 - }, - { - "epoch": 0.48558917717796224, - "grad_norm": 0.5218712687492371, - "learning_rate": 9.491930693966816e-06, - "loss": 0.4896, - "step": 7430 - }, - { - "epoch": 0.48565453238350437, - "grad_norm": 0.43735891580581665, - "learning_rate": 9.491777316493444e-06, - "loss": 0.3774, - "step": 7431 - }, - { - "epoch": 0.4857198875890465, - "grad_norm": 0.4714779257774353, - "learning_rate": 9.49162391711212e-06, - "loss": 0.3966, - "step": 7432 - }, - { - "epoch": 0.48578524279458857, - "grad_norm": 0.4681144654750824, - "learning_rate": 9.491470495823589e-06, - "loss": 0.417, - "step": 7433 - }, - { - "epoch": 0.4858505980001307, - "grad_norm": 0.439555823802948, - "learning_rate": 9.491317052628601e-06, - "loss": 0.372, - "step": 7434 - }, - { - "epoch": 0.4859159532056728, - "grad_norm": 0.46620187163352966, - "learning_rate": 9.491163587527902e-06, - "loss": 0.381, - "step": 7435 - }, - { - "epoch": 0.48598130841121495, - "grad_norm": 0.4731757342815399, - "learning_rate": 9.491010100522245e-06, - "loss": 0.4169, - "step": 7436 - }, - { - "epoch": 0.4860466636167571, - "grad_norm": 0.4192809462547302, - "learning_rate": 9.490856591612373e-06, - "loss": 0.3447, - "step": 7437 - }, - { - "epoch": 0.4861120188222992, - "grad_norm": 0.41696199774742126, - "learning_rate": 9.490703060799041e-06, - "loss": 0.3449, - "step": 7438 - }, - { - "epoch": 0.48617737402784134, - "grad_norm": 0.4259806275367737, - "learning_rate": 9.490549508082994e-06, - "loss": 0.3837, - "step": 7439 - }, - { - "epoch": 0.48624272923338346, - "grad_norm": 0.4262341856956482, - "learning_rate": 9.490395933464978e-06, - "loss": 0.3328, - "step": 7440 - }, - { - "epoch": 0.48630808443892554, - "grad_norm": 0.44839173555374146, - "learning_rate": 9.490242336945748e-06, - "loss": 0.4062, - "step": 7441 - }, - { - "epoch": 0.48637343964446766, - "grad_norm": 0.45787325501441956, - "learning_rate": 9.49008871852605e-06, - "loss": 0.4065, - "step": 7442 - }, - { - "epoch": 0.4864387948500098, - "grad_norm": 0.46419623494148254, - "learning_rate": 9.489935078206634e-06, - "loss": 0.4104, - "step": 7443 - }, - { - "epoch": 0.4865041500555519, - "grad_norm": 0.4577346742153168, - "learning_rate": 9.48978141598825e-06, - "loss": 0.3915, - "step": 7444 - }, - { - "epoch": 0.48656950526109405, - "grad_norm": 0.4654706120491028, - "learning_rate": 9.489627731871644e-06, - "loss": 0.3935, - "step": 7445 - }, - { - "epoch": 0.4866348604666362, - "grad_norm": 0.45058754086494446, - "learning_rate": 9.48947402585757e-06, - "loss": 0.3947, - "step": 7446 - }, - { - "epoch": 0.4867002156721783, - "grad_norm": 0.4263327419757843, - "learning_rate": 9.489320297946777e-06, - "loss": 0.3359, - "step": 7447 - }, - { - "epoch": 0.48676557087772043, - "grad_norm": 0.4461744427680969, - "learning_rate": 9.489166548140012e-06, - "loss": 0.3928, - "step": 7448 - }, - { - "epoch": 0.4868309260832625, - "grad_norm": 0.4722278416156769, - "learning_rate": 9.489012776438028e-06, - "loss": 0.4061, - "step": 7449 - }, - { - "epoch": 0.48689628128880463, - "grad_norm": 0.4542923867702484, - "learning_rate": 9.488858982841572e-06, - "loss": 0.3547, - "step": 7450 - }, - { - "epoch": 0.48696163649434676, - "grad_norm": 0.4683986008167267, - "learning_rate": 9.488705167351396e-06, - "loss": 0.4105, - "step": 7451 - }, - { - "epoch": 0.4870269916998889, - "grad_norm": 0.44002366065979004, - "learning_rate": 9.48855132996825e-06, - "loss": 0.3653, - "step": 7452 - }, - { - "epoch": 0.487092346905431, - "grad_norm": 0.500486433506012, - "learning_rate": 9.488397470692884e-06, - "loss": 0.4377, - "step": 7453 - }, - { - "epoch": 0.48715770211097315, - "grad_norm": 0.4667614996433258, - "learning_rate": 9.488243589526049e-06, - "loss": 0.3596, - "step": 7454 - }, - { - "epoch": 0.4872230573165153, - "grad_norm": 0.4702526032924652, - "learning_rate": 9.488089686468494e-06, - "loss": 0.412, - "step": 7455 - }, - { - "epoch": 0.4872884125220574, - "grad_norm": 0.4730609059333801, - "learning_rate": 9.487935761520972e-06, - "loss": 0.4101, - "step": 7456 - }, - { - "epoch": 0.48735376772759953, - "grad_norm": 0.4924863278865814, - "learning_rate": 9.487781814684233e-06, - "loss": 0.4295, - "step": 7457 - }, - { - "epoch": 0.4874191229331416, - "grad_norm": 0.4361676573753357, - "learning_rate": 9.487627845959027e-06, - "loss": 0.3684, - "step": 7458 - }, - { - "epoch": 0.48748447813868373, - "grad_norm": 0.4205648601055145, - "learning_rate": 9.487473855346104e-06, - "loss": 0.3522, - "step": 7459 - }, - { - "epoch": 0.48754983334422586, - "grad_norm": 0.4551865756511688, - "learning_rate": 9.487319842846218e-06, - "loss": 0.3701, - "step": 7460 - }, - { - "epoch": 0.487615188549768, - "grad_norm": 0.4413236379623413, - "learning_rate": 9.487165808460118e-06, - "loss": 0.3592, - "step": 7461 - }, - { - "epoch": 0.4876805437553101, - "grad_norm": 0.4950585961341858, - "learning_rate": 9.487011752188555e-06, - "loss": 0.4188, - "step": 7462 - }, - { - "epoch": 0.48774589896085224, - "grad_norm": 0.46799200773239136, - "learning_rate": 9.486857674032281e-06, - "loss": 0.411, - "step": 7463 - }, - { - "epoch": 0.48781125416639437, - "grad_norm": 0.49813807010650635, - "learning_rate": 9.48670357399205e-06, - "loss": 0.4423, - "step": 7464 - }, - { - "epoch": 0.4878766093719365, - "grad_norm": 0.42745304107666016, - "learning_rate": 9.48654945206861e-06, - "loss": 0.3688, - "step": 7465 - }, - { - "epoch": 0.48794196457747857, - "grad_norm": 0.4313168227672577, - "learning_rate": 9.486395308262714e-06, - "loss": 0.3619, - "step": 7466 - }, - { - "epoch": 0.4880073197830207, - "grad_norm": 0.45661038160324097, - "learning_rate": 9.486241142575114e-06, - "loss": 0.4127, - "step": 7467 - }, - { - "epoch": 0.4880726749885628, - "grad_norm": 0.46303310990333557, - "learning_rate": 9.486086955006562e-06, - "loss": 0.3923, - "step": 7468 - }, - { - "epoch": 0.48813803019410495, - "grad_norm": 0.4744683504104614, - "learning_rate": 9.485932745557808e-06, - "loss": 0.3959, - "step": 7469 - }, - { - "epoch": 0.4882033853996471, - "grad_norm": 0.43088585138320923, - "learning_rate": 9.485778514229609e-06, - "loss": 0.3495, - "step": 7470 - }, - { - "epoch": 0.4882687406051892, - "grad_norm": 0.4546554684638977, - "learning_rate": 9.485624261022712e-06, - "loss": 0.3717, - "step": 7471 - }, - { - "epoch": 0.48833409581073134, - "grad_norm": 0.4435812532901764, - "learning_rate": 9.485469985937871e-06, - "loss": 0.3625, - "step": 7472 - }, - { - "epoch": 0.48839945101627347, - "grad_norm": 0.4912225902080536, - "learning_rate": 9.485315688975842e-06, - "loss": 0.4572, - "step": 7473 - }, - { - "epoch": 0.4884648062218156, - "grad_norm": 0.5011767745018005, - "learning_rate": 9.485161370137372e-06, - "loss": 0.4345, - "step": 7474 - }, - { - "epoch": 0.48853016142735767, - "grad_norm": 0.49139299988746643, - "learning_rate": 9.485007029423216e-06, - "loss": 0.3975, - "step": 7475 - }, - { - "epoch": 0.4885955166328998, - "grad_norm": 0.47588053345680237, - "learning_rate": 9.484852666834128e-06, - "loss": 0.4218, - "step": 7476 - }, - { - "epoch": 0.4886608718384419, - "grad_norm": 0.43201300501823425, - "learning_rate": 9.48469828237086e-06, - "loss": 0.3376, - "step": 7477 - }, - { - "epoch": 0.48872622704398405, - "grad_norm": 0.48201489448547363, - "learning_rate": 9.484543876034164e-06, - "loss": 0.4284, - "step": 7478 - }, - { - "epoch": 0.4887915822495262, - "grad_norm": 0.45606234669685364, - "learning_rate": 9.484389447824795e-06, - "loss": 0.4013, - "step": 7479 - }, - { - "epoch": 0.4888569374550683, - "grad_norm": 0.4488910138607025, - "learning_rate": 9.484234997743505e-06, - "loss": 0.4096, - "step": 7480 - }, - { - "epoch": 0.48892229266061044, - "grad_norm": 0.48581674695014954, - "learning_rate": 9.484080525791047e-06, - "loss": 0.421, - "step": 7481 - }, - { - "epoch": 0.48898764786615256, - "grad_norm": 0.4653454124927521, - "learning_rate": 9.483926031968174e-06, - "loss": 0.3806, - "step": 7482 - }, - { - "epoch": 0.48905300307169464, - "grad_norm": 0.4340418577194214, - "learning_rate": 9.48377151627564e-06, - "loss": 0.3481, - "step": 7483 - }, - { - "epoch": 0.48911835827723676, - "grad_norm": 0.47066178917884827, - "learning_rate": 9.483616978714201e-06, - "loss": 0.4243, - "step": 7484 - }, - { - "epoch": 0.4891837134827789, - "grad_norm": 0.5573233962059021, - "learning_rate": 9.483462419284607e-06, - "loss": 0.5169, - "step": 7485 - }, - { - "epoch": 0.489249068688321, - "grad_norm": 0.4547271430492401, - "learning_rate": 9.483307837987615e-06, - "loss": 0.3952, - "step": 7486 - }, - { - "epoch": 0.48931442389386315, - "grad_norm": 0.4757891893386841, - "learning_rate": 9.483153234823976e-06, - "loss": 0.3764, - "step": 7487 - }, - { - "epoch": 0.4893797790994053, - "grad_norm": 0.4782652258872986, - "learning_rate": 9.482998609794447e-06, - "loss": 0.4111, - "step": 7488 - }, - { - "epoch": 0.4894451343049474, - "grad_norm": 0.4628230035305023, - "learning_rate": 9.48284396289978e-06, - "loss": 0.3851, - "step": 7489 - }, - { - "epoch": 0.48951048951048953, - "grad_norm": 0.4211656153202057, - "learning_rate": 9.48268929414073e-06, - "loss": 0.3503, - "step": 7490 - }, - { - "epoch": 0.4895758447160316, - "grad_norm": 0.4701022505760193, - "learning_rate": 9.482534603518052e-06, - "loss": 0.4122, - "step": 7491 - }, - { - "epoch": 0.48964119992157373, - "grad_norm": 0.45558977127075195, - "learning_rate": 9.482379891032499e-06, - "loss": 0.377, - "step": 7492 - }, - { - "epoch": 0.48970655512711586, - "grad_norm": 0.46655386686325073, - "learning_rate": 9.482225156684826e-06, - "loss": 0.4386, - "step": 7493 - }, - { - "epoch": 0.489771910332658, - "grad_norm": 0.4944005310535431, - "learning_rate": 9.48207040047579e-06, - "loss": 0.4416, - "step": 7494 - }, - { - "epoch": 0.4898372655382001, - "grad_norm": 0.44188159704208374, - "learning_rate": 9.481915622406141e-06, - "loss": 0.3857, - "step": 7495 - }, - { - "epoch": 0.48990262074374225, - "grad_norm": 0.42931339144706726, - "learning_rate": 9.48176082247664e-06, - "loss": 0.3474, - "step": 7496 - }, - { - "epoch": 0.4899679759492844, - "grad_norm": 0.44169536232948303, - "learning_rate": 9.481606000688037e-06, - "loss": 0.3868, - "step": 7497 - }, - { - "epoch": 0.4900333311548265, - "grad_norm": 0.4467918276786804, - "learning_rate": 9.481451157041089e-06, - "loss": 0.4069, - "step": 7498 - }, - { - "epoch": 0.49009868636036863, - "grad_norm": 0.44880470633506775, - "learning_rate": 9.481296291536553e-06, - "loss": 0.3887, - "step": 7499 - }, - { - "epoch": 0.4901640415659107, - "grad_norm": 0.4240676760673523, - "learning_rate": 9.48114140417518e-06, - "loss": 0.3818, - "step": 7500 - }, - { - "epoch": 0.49022939677145283, - "grad_norm": 0.4574185311794281, - "learning_rate": 9.480986494957729e-06, - "loss": 0.4024, - "step": 7501 - }, - { - "epoch": 0.49029475197699496, - "grad_norm": 0.4692107141017914, - "learning_rate": 9.480831563884955e-06, - "loss": 0.4067, - "step": 7502 - }, - { - "epoch": 0.4903601071825371, - "grad_norm": 0.4708959758281708, - "learning_rate": 9.480676610957612e-06, - "loss": 0.4044, - "step": 7503 - }, - { - "epoch": 0.4904254623880792, - "grad_norm": 0.4837050437927246, - "learning_rate": 9.48052163617646e-06, - "loss": 0.4194, - "step": 7504 - }, - { - "epoch": 0.49049081759362134, - "grad_norm": 0.44029760360717773, - "learning_rate": 9.480366639542247e-06, - "loss": 0.3344, - "step": 7505 - }, - { - "epoch": 0.49055617279916347, - "grad_norm": 0.46228793263435364, - "learning_rate": 9.480211621055737e-06, - "loss": 0.3911, - "step": 7506 - }, - { - "epoch": 0.4906215280047056, - "grad_norm": 0.47261396050453186, - "learning_rate": 9.480056580717681e-06, - "loss": 0.4063, - "step": 7507 - }, - { - "epoch": 0.49068688321024767, - "grad_norm": 0.47378870844841003, - "learning_rate": 9.479901518528839e-06, - "loss": 0.4487, - "step": 7508 - }, - { - "epoch": 0.4907522384157898, - "grad_norm": 0.4447380602359772, - "learning_rate": 9.479746434489964e-06, - "loss": 0.3941, - "step": 7509 - }, - { - "epoch": 0.4908175936213319, - "grad_norm": 0.4341123700141907, - "learning_rate": 9.479591328601814e-06, - "loss": 0.3261, - "step": 7510 - }, - { - "epoch": 0.49088294882687405, - "grad_norm": 0.47509121894836426, - "learning_rate": 9.479436200865144e-06, - "loss": 0.3712, - "step": 7511 - }, - { - "epoch": 0.4909483040324162, - "grad_norm": 0.4571378827095032, - "learning_rate": 9.479281051280713e-06, - "loss": 0.3586, - "step": 7512 - }, - { - "epoch": 0.4910136592379583, - "grad_norm": 0.4923804998397827, - "learning_rate": 9.479125879849278e-06, - "loss": 0.4517, - "step": 7513 - }, - { - "epoch": 0.49107901444350044, - "grad_norm": 0.4679352939128876, - "learning_rate": 9.478970686571593e-06, - "loss": 0.4071, - "step": 7514 - }, - { - "epoch": 0.49114436964904257, - "grad_norm": 0.4228973686695099, - "learning_rate": 9.478815471448416e-06, - "loss": 0.3366, - "step": 7515 - }, - { - "epoch": 0.4912097248545847, - "grad_norm": 0.4530769884586334, - "learning_rate": 9.478660234480504e-06, - "loss": 0.3564, - "step": 7516 - }, - { - "epoch": 0.49127508006012677, - "grad_norm": 0.5609104037284851, - "learning_rate": 9.478504975668616e-06, - "loss": 0.4835, - "step": 7517 - }, - { - "epoch": 0.4913404352656689, - "grad_norm": 0.46037769317626953, - "learning_rate": 9.478349695013506e-06, - "loss": 0.3851, - "step": 7518 - }, - { - "epoch": 0.491405790471211, - "grad_norm": 0.4529840350151062, - "learning_rate": 9.478194392515934e-06, - "loss": 0.385, - "step": 7519 - }, - { - "epoch": 0.49147114567675315, - "grad_norm": 0.4782061278820038, - "learning_rate": 9.478039068176657e-06, - "loss": 0.4092, - "step": 7520 - }, - { - "epoch": 0.4915365008822953, - "grad_norm": 0.4545944929122925, - "learning_rate": 9.47788372199643e-06, - "loss": 0.402, - "step": 7521 - }, - { - "epoch": 0.4916018560878374, - "grad_norm": 0.4744200110435486, - "learning_rate": 9.477728353976015e-06, - "loss": 0.4138, - "step": 7522 - }, - { - "epoch": 0.49166721129337954, - "grad_norm": 0.4526703357696533, - "learning_rate": 9.477572964116166e-06, - "loss": 0.3796, - "step": 7523 - }, - { - "epoch": 0.49173256649892166, - "grad_norm": 0.4858260154724121, - "learning_rate": 9.477417552417644e-06, - "loss": 0.4297, - "step": 7524 - }, - { - "epoch": 0.49179792170446374, - "grad_norm": 0.4929284453392029, - "learning_rate": 9.477262118881206e-06, - "loss": 0.455, - "step": 7525 - }, - { - "epoch": 0.49186327691000586, - "grad_norm": 0.4835423231124878, - "learning_rate": 9.477106663507607e-06, - "loss": 0.4435, - "step": 7526 - }, - { - "epoch": 0.491928632115548, - "grad_norm": 0.4589439630508423, - "learning_rate": 9.476951186297609e-06, - "loss": 0.3727, - "step": 7527 - }, - { - "epoch": 0.4919939873210901, - "grad_norm": 0.43981215357780457, - "learning_rate": 9.47679568725197e-06, - "loss": 0.3579, - "step": 7528 - }, - { - "epoch": 0.49205934252663225, - "grad_norm": 0.4801551103591919, - "learning_rate": 9.476640166371446e-06, - "loss": 0.4125, - "step": 7529 - }, - { - "epoch": 0.4921246977321744, - "grad_norm": 0.4690263271331787, - "learning_rate": 9.476484623656799e-06, - "loss": 0.4297, - "step": 7530 - }, - { - "epoch": 0.4921900529377165, - "grad_norm": 0.4995599687099457, - "learning_rate": 9.476329059108783e-06, - "loss": 0.431, - "step": 7531 - }, - { - "epoch": 0.49225540814325863, - "grad_norm": 0.4623638093471527, - "learning_rate": 9.476173472728163e-06, - "loss": 0.3899, - "step": 7532 - }, - { - "epoch": 0.4923207633488007, - "grad_norm": 0.5120333433151245, - "learning_rate": 9.476017864515692e-06, - "loss": 0.4663, - "step": 7533 - }, - { - "epoch": 0.49238611855434283, - "grad_norm": 0.47190403938293457, - "learning_rate": 9.475862234472131e-06, - "loss": 0.4761, - "step": 7534 - }, - { - "epoch": 0.49245147375988496, - "grad_norm": 0.4381028711795807, - "learning_rate": 9.475706582598241e-06, - "loss": 0.3473, - "step": 7535 - }, - { - "epoch": 0.4925168289654271, - "grad_norm": 0.4686459004878998, - "learning_rate": 9.475550908894777e-06, - "loss": 0.413, - "step": 7536 - }, - { - "epoch": 0.4925821841709692, - "grad_norm": 0.4188326597213745, - "learning_rate": 9.475395213362502e-06, - "loss": 0.3327, - "step": 7537 - }, - { - "epoch": 0.49264753937651135, - "grad_norm": 0.43074822425842285, - "learning_rate": 9.475239496002174e-06, - "loss": 0.3377, - "step": 7538 - }, - { - "epoch": 0.4927128945820535, - "grad_norm": 0.460660457611084, - "learning_rate": 9.475083756814554e-06, - "loss": 0.3871, - "step": 7539 - }, - { - "epoch": 0.4927782497875956, - "grad_norm": 0.4765304923057556, - "learning_rate": 9.474927995800398e-06, - "loss": 0.449, - "step": 7540 - }, - { - "epoch": 0.49284360499313773, - "grad_norm": 0.4728533625602722, - "learning_rate": 9.47477221296047e-06, - "loss": 0.3679, - "step": 7541 - }, - { - "epoch": 0.4929089601986798, - "grad_norm": 0.41927409172058105, - "learning_rate": 9.474616408295525e-06, - "loss": 0.3451, - "step": 7542 - }, - { - "epoch": 0.49297431540422193, - "grad_norm": 0.4750388264656067, - "learning_rate": 9.474460581806328e-06, - "loss": 0.4336, - "step": 7543 - }, - { - "epoch": 0.49303967060976406, - "grad_norm": 0.46211883425712585, - "learning_rate": 9.474304733493635e-06, - "loss": 0.3752, - "step": 7544 - }, - { - "epoch": 0.4931050258153062, - "grad_norm": 0.427681028842926, - "learning_rate": 9.474148863358209e-06, - "loss": 0.3273, - "step": 7545 - }, - { - "epoch": 0.4931703810208483, - "grad_norm": 0.48589053750038147, - "learning_rate": 9.473992971400809e-06, - "loss": 0.4333, - "step": 7546 - }, - { - "epoch": 0.49323573622639044, - "grad_norm": 0.459023118019104, - "learning_rate": 9.473837057622195e-06, - "loss": 0.4447, - "step": 7547 - }, - { - "epoch": 0.49330109143193257, - "grad_norm": 0.44562047719955444, - "learning_rate": 9.473681122023128e-06, - "loss": 0.3549, - "step": 7548 - }, - { - "epoch": 0.4933664466374747, - "grad_norm": 0.45815688371658325, - "learning_rate": 9.473525164604367e-06, - "loss": 0.3903, - "step": 7549 - }, - { - "epoch": 0.49343180184301677, - "grad_norm": 0.4764108955860138, - "learning_rate": 9.473369185366676e-06, - "loss": 0.4393, - "step": 7550 - }, - { - "epoch": 0.4934971570485589, - "grad_norm": 0.46385669708251953, - "learning_rate": 9.473213184310812e-06, - "loss": 0.3712, - "step": 7551 - }, - { - "epoch": 0.493562512254101, - "grad_norm": 0.4341394603252411, - "learning_rate": 9.473057161437539e-06, - "loss": 0.3423, - "step": 7552 - }, - { - "epoch": 0.49362786745964315, - "grad_norm": 0.4430205821990967, - "learning_rate": 9.472901116747616e-06, - "loss": 0.3706, - "step": 7553 - }, - { - "epoch": 0.4936932226651853, - "grad_norm": 0.4221726655960083, - "learning_rate": 9.472745050241805e-06, - "loss": 0.3316, - "step": 7554 - }, - { - "epoch": 0.4937585778707274, - "grad_norm": 0.45707279443740845, - "learning_rate": 9.472588961920866e-06, - "loss": 0.3918, - "step": 7555 - }, - { - "epoch": 0.49382393307626954, - "grad_norm": 0.43674278259277344, - "learning_rate": 9.472432851785563e-06, - "loss": 0.3556, - "step": 7556 - }, - { - "epoch": 0.49388928828181167, - "grad_norm": 0.4903871417045593, - "learning_rate": 9.472276719836652e-06, - "loss": 0.4391, - "step": 7557 - }, - { - "epoch": 0.4939546434873538, - "grad_norm": 0.4739069640636444, - "learning_rate": 9.472120566074902e-06, - "loss": 0.3787, - "step": 7558 - }, - { - "epoch": 0.49401999869289587, - "grad_norm": 0.39869147539138794, - "learning_rate": 9.471964390501069e-06, - "loss": 0.3024, - "step": 7559 - }, - { - "epoch": 0.494085353898438, - "grad_norm": 0.45095956325531006, - "learning_rate": 9.471808193115917e-06, - "loss": 0.3789, - "step": 7560 - }, - { - "epoch": 0.4941507091039801, - "grad_norm": 0.41862669587135315, - "learning_rate": 9.471651973920206e-06, - "loss": 0.3523, - "step": 7561 - }, - { - "epoch": 0.49421606430952225, - "grad_norm": 0.46903014183044434, - "learning_rate": 9.471495732914699e-06, - "loss": 0.4093, - "step": 7562 - }, - { - "epoch": 0.4942814195150644, - "grad_norm": 0.4792992174625397, - "learning_rate": 9.47133947010016e-06, - "loss": 0.4428, - "step": 7563 - }, - { - "epoch": 0.4943467747206065, - "grad_norm": 0.46021440625190735, - "learning_rate": 9.47118318547735e-06, - "loss": 0.3637, - "step": 7564 - }, - { - "epoch": 0.49441212992614864, - "grad_norm": 0.4644688069820404, - "learning_rate": 9.47102687904703e-06, - "loss": 0.4203, - "step": 7565 - }, - { - "epoch": 0.49447748513169076, - "grad_norm": 0.42194512486457825, - "learning_rate": 9.470870550809961e-06, - "loss": 0.314, - "step": 7566 - }, - { - "epoch": 0.49454284033723284, - "grad_norm": 0.41313961148262024, - "learning_rate": 9.47071420076691e-06, - "loss": 0.3436, - "step": 7567 - }, - { - "epoch": 0.49460819554277496, - "grad_norm": 0.4607747495174408, - "learning_rate": 9.470557828918634e-06, - "loss": 0.4399, - "step": 7568 - }, - { - "epoch": 0.4946735507483171, - "grad_norm": 0.39260971546173096, - "learning_rate": 9.4704014352659e-06, - "loss": 0.3153, - "step": 7569 - }, - { - "epoch": 0.4947389059538592, - "grad_norm": 0.442340612411499, - "learning_rate": 9.47024501980947e-06, - "loss": 0.3602, - "step": 7570 - }, - { - "epoch": 0.49480426115940135, - "grad_norm": 0.43838855624198914, - "learning_rate": 9.470088582550108e-06, - "loss": 0.3537, - "step": 7571 - }, - { - "epoch": 0.4948696163649435, - "grad_norm": 0.41797998547554016, - "learning_rate": 9.469932123488574e-06, - "loss": 0.3481, - "step": 7572 - }, - { - "epoch": 0.4949349715704856, - "grad_norm": 0.45995160937309265, - "learning_rate": 9.469775642625632e-06, - "loss": 0.3902, - "step": 7573 - }, - { - "epoch": 0.49500032677602773, - "grad_norm": 0.4653254449367523, - "learning_rate": 9.469619139962046e-06, - "loss": 0.4177, - "step": 7574 - }, - { - "epoch": 0.4950656819815698, - "grad_norm": 0.41050922870635986, - "learning_rate": 9.469462615498579e-06, - "loss": 0.3026, - "step": 7575 - }, - { - "epoch": 0.49513103718711193, - "grad_norm": 0.46875399351119995, - "learning_rate": 9.469306069235994e-06, - "loss": 0.4102, - "step": 7576 - }, - { - "epoch": 0.49519639239265406, - "grad_norm": 0.4898216128349304, - "learning_rate": 9.469149501175056e-06, - "loss": 0.4231, - "step": 7577 - }, - { - "epoch": 0.4952617475981962, - "grad_norm": 0.46499302983283997, - "learning_rate": 9.468992911316527e-06, - "loss": 0.4477, - "step": 7578 - }, - { - "epoch": 0.4953271028037383, - "grad_norm": 0.44518306851387024, - "learning_rate": 9.468836299661171e-06, - "loss": 0.4132, - "step": 7579 - }, - { - "epoch": 0.49539245800928045, - "grad_norm": 0.4644605219364166, - "learning_rate": 9.468679666209752e-06, - "loss": 0.4504, - "step": 7580 - }, - { - "epoch": 0.4954578132148226, - "grad_norm": 0.4470239281654358, - "learning_rate": 9.468523010963036e-06, - "loss": 0.3617, - "step": 7581 - }, - { - "epoch": 0.4955231684203647, - "grad_norm": 0.43484559655189514, - "learning_rate": 9.468366333921783e-06, - "loss": 0.3422, - "step": 7582 - }, - { - "epoch": 0.49558852362590683, - "grad_norm": 0.4375055134296417, - "learning_rate": 9.468209635086762e-06, - "loss": 0.3793, - "step": 7583 - }, - { - "epoch": 0.4956538788314489, - "grad_norm": 0.4536997079849243, - "learning_rate": 9.468052914458732e-06, - "loss": 0.4377, - "step": 7584 - }, - { - "epoch": 0.49571923403699103, - "grad_norm": 0.4456861615180969, - "learning_rate": 9.467896172038462e-06, - "loss": 0.3526, - "step": 7585 - }, - { - "epoch": 0.49578458924253316, - "grad_norm": 0.4149416387081146, - "learning_rate": 9.467739407826714e-06, - "loss": 0.3493, - "step": 7586 - }, - { - "epoch": 0.4958499444480753, - "grad_norm": 0.47360849380493164, - "learning_rate": 9.467582621824252e-06, - "loss": 0.447, - "step": 7587 - }, - { - "epoch": 0.4959152996536174, - "grad_norm": 0.44779253005981445, - "learning_rate": 9.467425814031843e-06, - "loss": 0.3444, - "step": 7588 - }, - { - "epoch": 0.49598065485915954, - "grad_norm": 0.46606341004371643, - "learning_rate": 9.46726898445025e-06, - "loss": 0.3895, - "step": 7589 - }, - { - "epoch": 0.49604601006470167, - "grad_norm": 0.49216240644454956, - "learning_rate": 9.467112133080239e-06, - "loss": 0.4247, - "step": 7590 - }, - { - "epoch": 0.4961113652702438, - "grad_norm": 0.4630853831768036, - "learning_rate": 9.466955259922574e-06, - "loss": 0.3872, - "step": 7591 - }, - { - "epoch": 0.49617672047578587, - "grad_norm": 0.465962290763855, - "learning_rate": 9.466798364978023e-06, - "loss": 0.4334, - "step": 7592 - }, - { - "epoch": 0.496242075681328, - "grad_norm": 0.4391988515853882, - "learning_rate": 9.466641448247346e-06, - "loss": 0.3757, - "step": 7593 - }, - { - "epoch": 0.4963074308868701, - "grad_norm": 0.4161827564239502, - "learning_rate": 9.466484509731313e-06, - "loss": 0.3297, - "step": 7594 - }, - { - "epoch": 0.49637278609241225, - "grad_norm": 0.4143718481063843, - "learning_rate": 9.466327549430688e-06, - "loss": 0.3426, - "step": 7595 - }, - { - "epoch": 0.4964381412979544, - "grad_norm": 0.5041487216949463, - "learning_rate": 9.466170567346233e-06, - "loss": 0.4603, - "step": 7596 - }, - { - "epoch": 0.4965034965034965, - "grad_norm": 0.44594258069992065, - "learning_rate": 9.46601356347872e-06, - "loss": 0.3618, - "step": 7597 - }, - { - "epoch": 0.49656885170903864, - "grad_norm": 0.6060497760772705, - "learning_rate": 9.465856537828911e-06, - "loss": 0.3925, - "step": 7598 - }, - { - "epoch": 0.49663420691458077, - "grad_norm": 0.4989464282989502, - "learning_rate": 9.465699490397572e-06, - "loss": 0.4287, - "step": 7599 - }, - { - "epoch": 0.4966995621201229, - "grad_norm": 0.4288352131843567, - "learning_rate": 9.46554242118547e-06, - "loss": 0.3779, - "step": 7600 - }, - { - "epoch": 0.49676491732566497, - "grad_norm": 0.4628756046295166, - "learning_rate": 9.46538533019337e-06, - "loss": 0.3878, - "step": 7601 - }, - { - "epoch": 0.4968302725312071, - "grad_norm": 0.5000662803649902, - "learning_rate": 9.465228217422042e-06, - "loss": 0.4319, - "step": 7602 - }, - { - "epoch": 0.4968956277367492, - "grad_norm": 0.4614022672176361, - "learning_rate": 9.465071082872246e-06, - "loss": 0.3907, - "step": 7603 - }, - { - "epoch": 0.49696098294229135, - "grad_norm": 0.4405010938644409, - "learning_rate": 9.46491392654475e-06, - "loss": 0.3847, - "step": 7604 - }, - { - "epoch": 0.4970263381478335, - "grad_norm": 0.45698612928390503, - "learning_rate": 9.464756748440324e-06, - "loss": 0.3885, - "step": 7605 - }, - { - "epoch": 0.4970916933533756, - "grad_norm": 0.4648208022117615, - "learning_rate": 9.464599548559734e-06, - "loss": 0.4246, - "step": 7606 - }, - { - "epoch": 0.49715704855891774, - "grad_norm": 0.49123719334602356, - "learning_rate": 9.464442326903745e-06, - "loss": 0.4749, - "step": 7607 - }, - { - "epoch": 0.49722240376445986, - "grad_norm": 0.5127215385437012, - "learning_rate": 9.46428508347312e-06, - "loss": 0.4446, - "step": 7608 - }, - { - "epoch": 0.49728775897000194, - "grad_norm": 0.42929407954216003, - "learning_rate": 9.464127818268635e-06, - "loss": 0.3494, - "step": 7609 - }, - { - "epoch": 0.49735311417554406, - "grad_norm": 0.476552277803421, - "learning_rate": 9.46397053129105e-06, - "loss": 0.4076, - "step": 7610 - }, - { - "epoch": 0.4974184693810862, - "grad_norm": 0.41644465923309326, - "learning_rate": 9.463813222541134e-06, - "loss": 0.3119, - "step": 7611 - }, - { - "epoch": 0.4974838245866283, - "grad_norm": 0.4688579738140106, - "learning_rate": 9.463655892019656e-06, - "loss": 0.4709, - "step": 7612 - }, - { - "epoch": 0.49754917979217045, - "grad_norm": 0.4837631583213806, - "learning_rate": 9.463498539727381e-06, - "loss": 0.4388, - "step": 7613 - }, - { - "epoch": 0.4976145349977126, - "grad_norm": 0.44618090987205505, - "learning_rate": 9.463341165665076e-06, - "loss": 0.3478, - "step": 7614 - }, - { - "epoch": 0.4976798902032547, - "grad_norm": 0.4475180208683014, - "learning_rate": 9.46318376983351e-06, - "loss": 0.3427, - "step": 7615 - }, - { - "epoch": 0.49774524540879683, - "grad_norm": 0.45974820852279663, - "learning_rate": 9.463026352233454e-06, - "loss": 0.3651, - "step": 7616 - }, - { - "epoch": 0.4978106006143389, - "grad_norm": 0.4433004558086395, - "learning_rate": 9.462868912865669e-06, - "loss": 0.3682, - "step": 7617 - }, - { - "epoch": 0.49787595581988103, - "grad_norm": 0.4583650529384613, - "learning_rate": 9.462711451730926e-06, - "loss": 0.4335, - "step": 7618 - }, - { - "epoch": 0.49794131102542316, - "grad_norm": 0.48022961616516113, - "learning_rate": 9.462553968829995e-06, - "loss": 0.4164, - "step": 7619 - }, - { - "epoch": 0.4980066662309653, - "grad_norm": 0.440767765045166, - "learning_rate": 9.462396464163642e-06, - "loss": 0.3813, - "step": 7620 - }, - { - "epoch": 0.4980720214365074, - "grad_norm": 0.5964440703392029, - "learning_rate": 9.462238937732635e-06, - "loss": 0.3971, - "step": 7621 - }, - { - "epoch": 0.49813737664204955, - "grad_norm": 0.43554821610450745, - "learning_rate": 9.462081389537742e-06, - "loss": 0.3623, - "step": 7622 - }, - { - "epoch": 0.4982027318475917, - "grad_norm": 0.8842023611068726, - "learning_rate": 9.461923819579733e-06, - "loss": 0.3747, - "step": 7623 - }, - { - "epoch": 0.4982680870531338, - "grad_norm": 0.4726372957229614, - "learning_rate": 9.461766227859376e-06, - "loss": 0.3909, - "step": 7624 - }, - { - "epoch": 0.49833344225867593, - "grad_norm": 0.47453370690345764, - "learning_rate": 9.46160861437744e-06, - "loss": 0.4305, - "step": 7625 - }, - { - "epoch": 0.498398797464218, - "grad_norm": 0.45982107520103455, - "learning_rate": 9.461450979134692e-06, - "loss": 0.388, - "step": 7626 - }, - { - "epoch": 0.49846415266976013, - "grad_norm": 0.4556705355644226, - "learning_rate": 9.461293322131903e-06, - "loss": 0.3979, - "step": 7627 - }, - { - "epoch": 0.49852950787530226, - "grad_norm": 0.4408996105194092, - "learning_rate": 9.46113564336984e-06, - "loss": 0.3409, - "step": 7628 - }, - { - "epoch": 0.4985948630808444, - "grad_norm": 0.41892609000205994, - "learning_rate": 9.460977942849274e-06, - "loss": 0.3577, - "step": 7629 - }, - { - "epoch": 0.4986602182863865, - "grad_norm": 0.4436472952365875, - "learning_rate": 9.460820220570972e-06, - "loss": 0.35, - "step": 7630 - }, - { - "epoch": 0.49872557349192864, - "grad_norm": 0.4511732757091522, - "learning_rate": 9.460662476535707e-06, - "loss": 0.3634, - "step": 7631 - }, - { - "epoch": 0.49879092869747077, - "grad_norm": 0.44922956824302673, - "learning_rate": 9.460504710744243e-06, - "loss": 0.3675, - "step": 7632 - }, - { - "epoch": 0.4988562839030129, - "grad_norm": 0.4979376196861267, - "learning_rate": 9.460346923197353e-06, - "loss": 0.4271, - "step": 7633 - }, - { - "epoch": 0.49892163910855497, - "grad_norm": 0.45745736360549927, - "learning_rate": 9.460189113895805e-06, - "loss": 0.3868, - "step": 7634 - }, - { - "epoch": 0.4989869943140971, - "grad_norm": 0.4833010733127594, - "learning_rate": 9.46003128284037e-06, - "loss": 0.4454, - "step": 7635 - }, - { - "epoch": 0.4990523495196392, - "grad_norm": 0.4739595353603363, - "learning_rate": 9.459873430031819e-06, - "loss": 0.3982, - "step": 7636 - }, - { - "epoch": 0.49911770472518135, - "grad_norm": 0.4343498945236206, - "learning_rate": 9.459715555470918e-06, - "loss": 0.3629, - "step": 7637 - }, - { - "epoch": 0.4991830599307235, - "grad_norm": 0.41716471314430237, - "learning_rate": 9.45955765915844e-06, - "loss": 0.325, - "step": 7638 - }, - { - "epoch": 0.4992484151362656, - "grad_norm": 0.4806312620639801, - "learning_rate": 9.459399741095155e-06, - "loss": 0.4212, - "step": 7639 - }, - { - "epoch": 0.49931377034180774, - "grad_norm": 0.42395907640457153, - "learning_rate": 9.459241801281833e-06, - "loss": 0.3453, - "step": 7640 - }, - { - "epoch": 0.49937912554734987, - "grad_norm": 0.43754011392593384, - "learning_rate": 9.459083839719244e-06, - "loss": 0.4266, - "step": 7641 - }, - { - "epoch": 0.499444480752892, - "grad_norm": 0.440309077501297, - "learning_rate": 9.458925856408157e-06, - "loss": 0.3651, - "step": 7642 - }, - { - "epoch": 0.49950983595843407, - "grad_norm": 0.45112180709838867, - "learning_rate": 9.458767851349344e-06, - "loss": 0.4001, - "step": 7643 - }, - { - "epoch": 0.4995751911639762, - "grad_norm": 0.4809283912181854, - "learning_rate": 9.458609824543575e-06, - "loss": 0.4402, - "step": 7644 - }, - { - "epoch": 0.4996405463695183, - "grad_norm": 0.4792030155658722, - "learning_rate": 9.458451775991622e-06, - "loss": 0.4109, - "step": 7645 - }, - { - "epoch": 0.49970590157506045, - "grad_norm": 0.4835467040538788, - "learning_rate": 9.458293705694255e-06, - "loss": 0.4157, - "step": 7646 - }, - { - "epoch": 0.4997712567806026, - "grad_norm": 0.4652770757675171, - "learning_rate": 9.458135613652245e-06, - "loss": 0.4242, - "step": 7647 - }, - { - "epoch": 0.4998366119861447, - "grad_norm": 0.46720170974731445, - "learning_rate": 9.457977499866364e-06, - "loss": 0.4125, - "step": 7648 - }, - { - "epoch": 0.49990196719168684, - "grad_norm": 0.4799783229827881, - "learning_rate": 9.457819364337382e-06, - "loss": 0.4267, - "step": 7649 - }, - { - "epoch": 0.49996732239722896, - "grad_norm": 0.436085045337677, - "learning_rate": 9.45766120706607e-06, - "loss": 0.3551, - "step": 7650 - }, - { - "epoch": 0.500032677602771, - "grad_norm": 0.4366012215614319, - "learning_rate": 9.457503028053201e-06, - "loss": 0.3679, - "step": 7651 - }, - { - "epoch": 0.5000980328083132, - "grad_norm": 0.4659341275691986, - "learning_rate": 9.457344827299543e-06, - "loss": 0.3962, - "step": 7652 - }, - { - "epoch": 0.5001633880138553, - "grad_norm": 0.44960251450538635, - "learning_rate": 9.457186604805873e-06, - "loss": 0.3708, - "step": 7653 - }, - { - "epoch": 0.5002287432193975, - "grad_norm": 0.45768725872039795, - "learning_rate": 9.45702836057296e-06, - "loss": 0.3933, - "step": 7654 - }, - { - "epoch": 0.5002940984249395, - "grad_norm": 0.4472002685070038, - "learning_rate": 9.456870094601573e-06, - "loss": 0.4221, - "step": 7655 - }, - { - "epoch": 0.5003594536304816, - "grad_norm": 0.4778624475002289, - "learning_rate": 9.456711806892488e-06, - "loss": 0.3788, - "step": 7656 - }, - { - "epoch": 0.5004248088360238, - "grad_norm": 0.49460941553115845, - "learning_rate": 9.456553497446477e-06, - "loss": 0.402, - "step": 7657 - }, - { - "epoch": 0.5004901640415659, - "grad_norm": 0.4877833127975464, - "learning_rate": 9.45639516626431e-06, - "loss": 0.4218, - "step": 7658 - }, - { - "epoch": 0.5005555192471081, - "grad_norm": 0.6446693539619446, - "learning_rate": 9.456236813346758e-06, - "loss": 0.4076, - "step": 7659 - }, - { - "epoch": 0.5006208744526501, - "grad_norm": 0.4596126973628998, - "learning_rate": 9.456078438694597e-06, - "loss": 0.3796, - "step": 7660 - }, - { - "epoch": 0.5006862296581923, - "grad_norm": 0.483416885137558, - "learning_rate": 9.455920042308598e-06, - "loss": 0.4063, - "step": 7661 - }, - { - "epoch": 0.5007515848637344, - "grad_norm": 0.4501649737358093, - "learning_rate": 9.455761624189531e-06, - "loss": 0.3991, - "step": 7662 - }, - { - "epoch": 0.5008169400692766, - "grad_norm": 0.42372453212738037, - "learning_rate": 9.455603184338174e-06, - "loss": 0.3454, - "step": 7663 - }, - { - "epoch": 0.5008822952748186, - "grad_norm": 0.45814022421836853, - "learning_rate": 9.455444722755294e-06, - "loss": 0.3552, - "step": 7664 - }, - { - "epoch": 0.5009476504803607, - "grad_norm": 0.46859997510910034, - "learning_rate": 9.455286239441668e-06, - "loss": 0.3987, - "step": 7665 - }, - { - "epoch": 0.5010130056859029, - "grad_norm": 0.44942134618759155, - "learning_rate": 9.455127734398066e-06, - "loss": 0.3784, - "step": 7666 - }, - { - "epoch": 0.501078360891445, - "grad_norm": 0.5670586824417114, - "learning_rate": 9.454969207625264e-06, - "loss": 0.4325, - "step": 7667 - }, - { - "epoch": 0.5011437160969872, - "grad_norm": 0.4901164174079895, - "learning_rate": 9.454810659124035e-06, - "loss": 0.4114, - "step": 7668 - }, - { - "epoch": 0.5012090713025292, - "grad_norm": 0.4789154827594757, - "learning_rate": 9.454652088895149e-06, - "loss": 0.4034, - "step": 7669 - }, - { - "epoch": 0.5012744265080714, - "grad_norm": 0.5097324252128601, - "learning_rate": 9.454493496939383e-06, - "loss": 0.4225, - "step": 7670 - }, - { - "epoch": 0.5013397817136135, - "grad_norm": 0.4476543664932251, - "learning_rate": 9.454334883257507e-06, - "loss": 0.3139, - "step": 7671 - }, - { - "epoch": 0.5014051369191556, - "grad_norm": 0.5303950309753418, - "learning_rate": 9.4541762478503e-06, - "loss": 0.3982, - "step": 7672 - }, - { - "epoch": 0.5014704921246977, - "grad_norm": 0.5110664963722229, - "learning_rate": 9.454017590718529e-06, - "loss": 0.4341, - "step": 7673 - }, - { - "epoch": 0.5015358473302398, - "grad_norm": 0.4471319317817688, - "learning_rate": 9.453858911862972e-06, - "loss": 0.3898, - "step": 7674 - }, - { - "epoch": 0.501601202535782, - "grad_norm": 0.4259728789329529, - "learning_rate": 9.453700211284404e-06, - "loss": 0.3544, - "step": 7675 - }, - { - "epoch": 0.5016665577413241, - "grad_norm": 0.4339010417461395, - "learning_rate": 9.453541488983595e-06, - "loss": 0.3714, - "step": 7676 - }, - { - "epoch": 0.5017319129468663, - "grad_norm": 0.4736887216567993, - "learning_rate": 9.453382744961322e-06, - "loss": 0.3923, - "step": 7677 - }, - { - "epoch": 0.5017972681524083, - "grad_norm": 0.45846840739250183, - "learning_rate": 9.453223979218359e-06, - "loss": 0.4146, - "step": 7678 - }, - { - "epoch": 0.5018626233579505, - "grad_norm": 0.4797497093677521, - "learning_rate": 9.45306519175548e-06, - "loss": 0.4006, - "step": 7679 - }, - { - "epoch": 0.5019279785634926, - "grad_norm": 0.4832633435726166, - "learning_rate": 9.45290638257346e-06, - "loss": 0.4162, - "step": 7680 - }, - { - "epoch": 0.5019933337690347, - "grad_norm": 0.4564540982246399, - "learning_rate": 9.452747551673072e-06, - "loss": 0.3874, - "step": 7681 - }, - { - "epoch": 0.5020586889745768, - "grad_norm": 0.4771483838558197, - "learning_rate": 9.452588699055089e-06, - "loss": 0.3998, - "step": 7682 - }, - { - "epoch": 0.5021240441801189, - "grad_norm": 0.4464082419872284, - "learning_rate": 9.452429824720292e-06, - "loss": 0.3977, - "step": 7683 - }, - { - "epoch": 0.5021893993856611, - "grad_norm": 0.4652882516384125, - "learning_rate": 9.452270928669451e-06, - "loss": 0.3685, - "step": 7684 - }, - { - "epoch": 0.5022547545912032, - "grad_norm": 0.5194174647331238, - "learning_rate": 9.452112010903342e-06, - "loss": 0.4626, - "step": 7685 - }, - { - "epoch": 0.5023201097967454, - "grad_norm": 0.43637266755104065, - "learning_rate": 9.451953071422741e-06, - "loss": 0.3608, - "step": 7686 - }, - { - "epoch": 0.5023854650022874, - "grad_norm": 0.45275160670280457, - "learning_rate": 9.451794110228423e-06, - "loss": 0.3952, - "step": 7687 - }, - { - "epoch": 0.5024508202078296, - "grad_norm": 0.4362596869468689, - "learning_rate": 9.451635127321161e-06, - "loss": 0.3654, - "step": 7688 - }, - { - "epoch": 0.5025161754133717, - "grad_norm": 0.452396035194397, - "learning_rate": 9.451476122701735e-06, - "loss": 0.3529, - "step": 7689 - }, - { - "epoch": 0.5025815306189138, - "grad_norm": 0.4514504075050354, - "learning_rate": 9.451317096370916e-06, - "loss": 0.3657, - "step": 7690 - }, - { - "epoch": 0.5026468858244559, - "grad_norm": 0.48859599232673645, - "learning_rate": 9.451158048329483e-06, - "loss": 0.4624, - "step": 7691 - }, - { - "epoch": 0.502712241029998, - "grad_norm": 0.4188653528690338, - "learning_rate": 9.450998978578207e-06, - "loss": 0.3429, - "step": 7692 - }, - { - "epoch": 0.5027775962355402, - "grad_norm": 0.47802087664604187, - "learning_rate": 9.450839887117871e-06, - "loss": 0.4537, - "step": 7693 - }, - { - "epoch": 0.5028429514410823, - "grad_norm": 0.4708169996738434, - "learning_rate": 9.450680773949243e-06, - "loss": 0.4179, - "step": 7694 - }, - { - "epoch": 0.5029083066466244, - "grad_norm": 0.46108609437942505, - "learning_rate": 9.450521639073106e-06, - "loss": 0.3941, - "step": 7695 - }, - { - "epoch": 0.5029736618521665, - "grad_norm": 0.46358931064605713, - "learning_rate": 9.450362482490232e-06, - "loss": 0.4126, - "step": 7696 - }, - { - "epoch": 0.5030390170577086, - "grad_norm": 0.47584667801856995, - "learning_rate": 9.450203304201398e-06, - "loss": 0.3975, - "step": 7697 - }, - { - "epoch": 0.5031043722632508, - "grad_norm": 0.45680591464042664, - "learning_rate": 9.450044104207382e-06, - "loss": 0.3725, - "step": 7698 - }, - { - "epoch": 0.5031697274687928, - "grad_norm": 0.527522087097168, - "learning_rate": 9.44988488250896e-06, - "loss": 0.478, - "step": 7699 - }, - { - "epoch": 0.503235082674335, - "grad_norm": 0.4551538825035095, - "learning_rate": 9.449725639106905e-06, - "loss": 0.3588, - "step": 7700 - }, - { - "epoch": 0.5033004378798771, - "grad_norm": 0.47754600644111633, - "learning_rate": 9.449566374001998e-06, - "loss": 0.43, - "step": 7701 - }, - { - "epoch": 0.5033657930854193, - "grad_norm": 0.4657004475593567, - "learning_rate": 9.449407087195014e-06, - "loss": 0.43, - "step": 7702 - }, - { - "epoch": 0.5034311482909614, - "grad_norm": 0.4533071219921112, - "learning_rate": 9.449247778686729e-06, - "loss": 0.4116, - "step": 7703 - }, - { - "epoch": 0.5034965034965035, - "grad_norm": 0.4522230923175812, - "learning_rate": 9.449088448477924e-06, - "loss": 0.3815, - "step": 7704 - }, - { - "epoch": 0.5035618587020456, - "grad_norm": 0.451035737991333, - "learning_rate": 9.448929096569372e-06, - "loss": 0.3808, - "step": 7705 - }, - { - "epoch": 0.5036272139075877, - "grad_norm": 0.4907190799713135, - "learning_rate": 9.448769722961852e-06, - "loss": 0.4145, - "step": 7706 - }, - { - "epoch": 0.5036925691131299, - "grad_norm": 0.4380427300930023, - "learning_rate": 9.448610327656141e-06, - "loss": 0.3452, - "step": 7707 - }, - { - "epoch": 0.503757924318672, - "grad_norm": 0.45540523529052734, - "learning_rate": 9.448450910653015e-06, - "loss": 0.392, - "step": 7708 - }, - { - "epoch": 0.5038232795242141, - "grad_norm": 0.4351827800273895, - "learning_rate": 9.448291471953252e-06, - "loss": 0.3518, - "step": 7709 - }, - { - "epoch": 0.5038886347297562, - "grad_norm": 0.4677373468875885, - "learning_rate": 9.448132011557634e-06, - "loss": 0.4016, - "step": 7710 - }, - { - "epoch": 0.5039539899352984, - "grad_norm": 0.463039755821228, - "learning_rate": 9.447972529466933e-06, - "loss": 0.3771, - "step": 7711 - }, - { - "epoch": 0.5040193451408405, - "grad_norm": 0.4625132381916046, - "learning_rate": 9.447813025681929e-06, - "loss": 0.4114, - "step": 7712 - }, - { - "epoch": 0.5040847003463826, - "grad_norm": 0.4245222210884094, - "learning_rate": 9.4476535002034e-06, - "loss": 0.3465, - "step": 7713 - }, - { - "epoch": 0.5041500555519247, - "grad_norm": 0.4942336976528168, - "learning_rate": 9.447493953032124e-06, - "loss": 0.3807, - "step": 7714 - }, - { - "epoch": 0.5042154107574668, - "grad_norm": 0.4409201443195343, - "learning_rate": 9.447334384168881e-06, - "loss": 0.3909, - "step": 7715 - }, - { - "epoch": 0.504280765963009, - "grad_norm": 0.4462164640426636, - "learning_rate": 9.447174793614444e-06, - "loss": 0.3674, - "step": 7716 - }, - { - "epoch": 0.504346121168551, - "grad_norm": 0.4723726809024811, - "learning_rate": 9.447015181369597e-06, - "loss": 0.4121, - "step": 7717 - }, - { - "epoch": 0.5044114763740932, - "grad_norm": 0.46016383171081543, - "learning_rate": 9.446855547435117e-06, - "loss": 0.3857, - "step": 7718 - }, - { - "epoch": 0.5044768315796353, - "grad_norm": 0.4830379784107208, - "learning_rate": 9.446695891811781e-06, - "loss": 0.4014, - "step": 7719 - }, - { - "epoch": 0.5045421867851775, - "grad_norm": 0.4311649203300476, - "learning_rate": 9.446536214500367e-06, - "loss": 0.3768, - "step": 7720 - }, - { - "epoch": 0.5046075419907196, - "grad_norm": 0.4620884954929352, - "learning_rate": 9.446376515501659e-06, - "loss": 0.4095, - "step": 7721 - }, - { - "epoch": 0.5046728971962616, - "grad_norm": 0.45689043402671814, - "learning_rate": 9.44621679481643e-06, - "loss": 0.3834, - "step": 7722 - }, - { - "epoch": 0.5047382524018038, - "grad_norm": 0.4588378071784973, - "learning_rate": 9.446057052445463e-06, - "loss": 0.3915, - "step": 7723 - }, - { - "epoch": 0.5048036076073459, - "grad_norm": 0.5513094067573547, - "learning_rate": 9.445897288389533e-06, - "loss": 0.3808, - "step": 7724 - }, - { - "epoch": 0.5048689628128881, - "grad_norm": 0.4702783226966858, - "learning_rate": 9.445737502649425e-06, - "loss": 0.3584, - "step": 7725 - }, - { - "epoch": 0.5049343180184301, - "grad_norm": 0.4639442265033722, - "learning_rate": 9.445577695225914e-06, - "loss": 0.4167, - "step": 7726 - }, - { - "epoch": 0.5049996732239723, - "grad_norm": 0.4771186113357544, - "learning_rate": 9.445417866119779e-06, - "loss": 0.4129, - "step": 7727 - }, - { - "epoch": 0.5050650284295144, - "grad_norm": 0.4485853314399719, - "learning_rate": 9.445258015331802e-06, - "loss": 0.4147, - "step": 7728 - }, - { - "epoch": 0.5051303836350566, - "grad_norm": 0.47932490706443787, - "learning_rate": 9.44509814286276e-06, - "loss": 0.3547, - "step": 7729 - }, - { - "epoch": 0.5051957388405987, - "grad_norm": 0.4371171295642853, - "learning_rate": 9.444938248713436e-06, - "loss": 0.3471, - "step": 7730 - }, - { - "epoch": 0.5052610940461407, - "grad_norm": 0.45352810621261597, - "learning_rate": 9.44477833288461e-06, - "loss": 0.4164, - "step": 7731 - }, - { - "epoch": 0.5053264492516829, - "grad_norm": 0.43929749727249146, - "learning_rate": 9.444618395377057e-06, - "loss": 0.3732, - "step": 7732 - }, - { - "epoch": 0.505391804457225, - "grad_norm": 0.49312737584114075, - "learning_rate": 9.444458436191562e-06, - "loss": 0.4635, - "step": 7733 - }, - { - "epoch": 0.5054571596627672, - "grad_norm": 0.5206209421157837, - "learning_rate": 9.444298455328903e-06, - "loss": 0.3835, - "step": 7734 - }, - { - "epoch": 0.5055225148683092, - "grad_norm": 0.5052789449691772, - "learning_rate": 9.444138452789862e-06, - "loss": 0.4572, - "step": 7735 - }, - { - "epoch": 0.5055878700738514, - "grad_norm": 0.4622563123703003, - "learning_rate": 9.443978428575216e-06, - "loss": 0.3999, - "step": 7736 - }, - { - "epoch": 0.5056532252793935, - "grad_norm": 0.4477774202823639, - "learning_rate": 9.44381838268575e-06, - "loss": 0.3646, - "step": 7737 - }, - { - "epoch": 0.5057185804849357, - "grad_norm": 0.4669139087200165, - "learning_rate": 9.44365831512224e-06, - "loss": 0.3797, - "step": 7738 - }, - { - "epoch": 0.5057839356904777, - "grad_norm": 0.42422449588775635, - "learning_rate": 9.44349822588547e-06, - "loss": 0.3642, - "step": 7739 - }, - { - "epoch": 0.5058492908960198, - "grad_norm": 0.431505024433136, - "learning_rate": 9.443338114976222e-06, - "loss": 0.3287, - "step": 7740 - }, - { - "epoch": 0.505914646101562, - "grad_norm": 0.4315491318702698, - "learning_rate": 9.443177982395272e-06, - "loss": 0.3306, - "step": 7741 - }, - { - "epoch": 0.5059800013071041, - "grad_norm": 0.41238635778427124, - "learning_rate": 9.443017828143403e-06, - "loss": 0.3391, - "step": 7742 - }, - { - "epoch": 0.5060453565126463, - "grad_norm": 0.4434930682182312, - "learning_rate": 9.442857652221398e-06, - "loss": 0.3726, - "step": 7743 - }, - { - "epoch": 0.5061107117181883, - "grad_norm": 0.4321887791156769, - "learning_rate": 9.442697454630039e-06, - "loss": 0.3517, - "step": 7744 - }, - { - "epoch": 0.5061760669237305, - "grad_norm": 0.4747454524040222, - "learning_rate": 9.442537235370103e-06, - "loss": 0.4571, - "step": 7745 - }, - { - "epoch": 0.5062414221292726, - "grad_norm": 0.4539855420589447, - "learning_rate": 9.442376994442375e-06, - "loss": 0.4042, - "step": 7746 - }, - { - "epoch": 0.5063067773348148, - "grad_norm": 0.45672607421875, - "learning_rate": 9.442216731847635e-06, - "loss": 0.4225, - "step": 7747 - }, - { - "epoch": 0.5063721325403568, - "grad_norm": 0.5286438465118408, - "learning_rate": 9.442056447586665e-06, - "loss": 0.3663, - "step": 7748 - }, - { - "epoch": 0.5064374877458989, - "grad_norm": 0.42539289593696594, - "learning_rate": 9.441896141660246e-06, - "loss": 0.3283, - "step": 7749 - }, - { - "epoch": 0.5065028429514411, - "grad_norm": 0.47858306765556335, - "learning_rate": 9.441735814069161e-06, - "loss": 0.4016, - "step": 7750 - }, - { - "epoch": 0.5065681981569832, - "grad_norm": 0.4335874915122986, - "learning_rate": 9.441575464814193e-06, - "loss": 0.373, - "step": 7751 - }, - { - "epoch": 0.5066335533625254, - "grad_norm": 0.45669421553611755, - "learning_rate": 9.441415093896123e-06, - "loss": 0.4007, - "step": 7752 - }, - { - "epoch": 0.5066989085680674, - "grad_norm": 0.4713069498538971, - "learning_rate": 9.441254701315731e-06, - "loss": 0.3703, - "step": 7753 - }, - { - "epoch": 0.5067642637736096, - "grad_norm": 0.4867474138736725, - "learning_rate": 9.441094287073801e-06, - "loss": 0.4422, - "step": 7754 - }, - { - "epoch": 0.5068296189791517, - "grad_norm": 0.45350489020347595, - "learning_rate": 9.440933851171117e-06, - "loss": 0.3916, - "step": 7755 - }, - { - "epoch": 0.5068949741846938, - "grad_norm": 0.443925142288208, - "learning_rate": 9.44077339360846e-06, - "loss": 0.4058, - "step": 7756 - }, - { - "epoch": 0.5069603293902359, - "grad_norm": 0.45000678300857544, - "learning_rate": 9.440612914386614e-06, - "loss": 0.4003, - "step": 7757 - }, - { - "epoch": 0.507025684595778, - "grad_norm": 0.46410927176475525, - "learning_rate": 9.440452413506358e-06, - "loss": 0.4028, - "step": 7758 - }, - { - "epoch": 0.5070910398013202, - "grad_norm": 0.470266729593277, - "learning_rate": 9.440291890968479e-06, - "loss": 0.403, - "step": 7759 - }, - { - "epoch": 0.5071563950068623, - "grad_norm": 0.8602852821350098, - "learning_rate": 9.440131346773757e-06, - "loss": 0.4246, - "step": 7760 - }, - { - "epoch": 0.5072217502124045, - "grad_norm": 0.515372633934021, - "learning_rate": 9.439970780922975e-06, - "loss": 0.4407, - "step": 7761 - }, - { - "epoch": 0.5072871054179465, - "grad_norm": 0.43363937735557556, - "learning_rate": 9.43981019341692e-06, - "loss": 0.3447, - "step": 7762 - }, - { - "epoch": 0.5073524606234887, - "grad_norm": 0.4343889653682709, - "learning_rate": 9.439649584256372e-06, - "loss": 0.3647, - "step": 7763 - }, - { - "epoch": 0.5074178158290308, - "grad_norm": 0.43482160568237305, - "learning_rate": 9.439488953442114e-06, - "loss": 0.3586, - "step": 7764 - }, - { - "epoch": 0.5074831710345729, - "grad_norm": 0.43836653232574463, - "learning_rate": 9.43932830097493e-06, - "loss": 0.365, - "step": 7765 - }, - { - "epoch": 0.507548526240115, - "grad_norm": 0.49678322672843933, - "learning_rate": 9.439167626855605e-06, - "loss": 0.429, - "step": 7766 - }, - { - "epoch": 0.5076138814456571, - "grad_norm": 0.47993841767311096, - "learning_rate": 9.439006931084921e-06, - "loss": 0.4042, - "step": 7767 - }, - { - "epoch": 0.5076792366511993, - "grad_norm": 0.4363028407096863, - "learning_rate": 9.438846213663664e-06, - "loss": 0.3648, - "step": 7768 - }, - { - "epoch": 0.5077445918567414, - "grad_norm": 0.48934856057167053, - "learning_rate": 9.438685474592615e-06, - "loss": 0.4221, - "step": 7769 - }, - { - "epoch": 0.5078099470622836, - "grad_norm": 0.5310774445533752, - "learning_rate": 9.43852471387256e-06, - "loss": 0.4706, - "step": 7770 - }, - { - "epoch": 0.5078753022678256, - "grad_norm": 0.41704484820365906, - "learning_rate": 9.438363931504282e-06, - "loss": 0.3225, - "step": 7771 - }, - { - "epoch": 0.5079406574733678, - "grad_norm": 0.4534618854522705, - "learning_rate": 9.438203127488564e-06, - "loss": 0.372, - "step": 7772 - }, - { - "epoch": 0.5080060126789099, - "grad_norm": 0.43370527029037476, - "learning_rate": 9.438042301826193e-06, - "loss": 0.3825, - "step": 7773 - }, - { - "epoch": 0.508071367884452, - "grad_norm": 0.4758480489253998, - "learning_rate": 9.437881454517952e-06, - "loss": 0.4192, - "step": 7774 - }, - { - "epoch": 0.5081367230899941, - "grad_norm": 0.46820536255836487, - "learning_rate": 9.437720585564628e-06, - "loss": 0.4079, - "step": 7775 - }, - { - "epoch": 0.5082020782955362, - "grad_norm": 0.43157297372817993, - "learning_rate": 9.437559694967e-06, - "loss": 0.3535, - "step": 7776 - }, - { - "epoch": 0.5082674335010784, - "grad_norm": 0.4799809157848358, - "learning_rate": 9.437398782725857e-06, - "loss": 0.3967, - "step": 7777 - }, - { - "epoch": 0.5083327887066205, - "grad_norm": 0.5004230737686157, - "learning_rate": 9.437237848841982e-06, - "loss": 0.3799, - "step": 7778 - }, - { - "epoch": 0.5083981439121626, - "grad_norm": 0.47232672572135925, - "learning_rate": 9.437076893316163e-06, - "loss": 0.3857, - "step": 7779 - }, - { - "epoch": 0.5084634991177047, - "grad_norm": 0.4624641239643097, - "learning_rate": 9.436915916149181e-06, - "loss": 0.3518, - "step": 7780 - }, - { - "epoch": 0.5085288543232468, - "grad_norm": 0.44828665256500244, - "learning_rate": 9.436754917341823e-06, - "loss": 0.3246, - "step": 7781 - }, - { - "epoch": 0.508594209528789, - "grad_norm": 0.46594393253326416, - "learning_rate": 9.436593896894876e-06, - "loss": 0.3773, - "step": 7782 - }, - { - "epoch": 0.508659564734331, - "grad_norm": 0.5000684261322021, - "learning_rate": 9.436432854809124e-06, - "loss": 0.4605, - "step": 7783 - }, - { - "epoch": 0.5087249199398732, - "grad_norm": 0.4356588125228882, - "learning_rate": 9.43627179108535e-06, - "loss": 0.3663, - "step": 7784 - }, - { - "epoch": 0.5087902751454153, - "grad_norm": 0.4482481777667999, - "learning_rate": 9.436110705724341e-06, - "loss": 0.3675, - "step": 7785 - }, - { - "epoch": 0.5088556303509575, - "grad_norm": 0.4329874813556671, - "learning_rate": 9.435949598726885e-06, - "loss": 0.356, - "step": 7786 - }, - { - "epoch": 0.5089209855564996, - "grad_norm": 0.45425376296043396, - "learning_rate": 9.435788470093766e-06, - "loss": 0.3541, - "step": 7787 - }, - { - "epoch": 0.5089863407620417, - "grad_norm": 0.47848591208457947, - "learning_rate": 9.435627319825769e-06, - "loss": 0.4598, - "step": 7788 - }, - { - "epoch": 0.5090516959675838, - "grad_norm": 0.4244730472564697, - "learning_rate": 9.435466147923682e-06, - "loss": 0.342, - "step": 7789 - }, - { - "epoch": 0.5091170511731259, - "grad_norm": 0.48533114790916443, - "learning_rate": 9.435304954388288e-06, - "loss": 0.4343, - "step": 7790 - }, - { - "epoch": 0.5091824063786681, - "grad_norm": 0.46364644169807434, - "learning_rate": 9.435143739220378e-06, - "loss": 0.371, - "step": 7791 - }, - { - "epoch": 0.5092477615842101, - "grad_norm": 0.4621589481830597, - "learning_rate": 9.434982502420733e-06, - "loss": 0.3861, - "step": 7792 - }, - { - "epoch": 0.5093131167897523, - "grad_norm": 0.4631290137767792, - "learning_rate": 9.434821243990145e-06, - "loss": 0.3889, - "step": 7793 - }, - { - "epoch": 0.5093784719952944, - "grad_norm": 0.47467970848083496, - "learning_rate": 9.434659963929394e-06, - "loss": 0.4032, - "step": 7794 - }, - { - "epoch": 0.5094438272008366, - "grad_norm": 0.4567209482192993, - "learning_rate": 9.434498662239271e-06, - "loss": 0.4168, - "step": 7795 - }, - { - "epoch": 0.5095091824063787, - "grad_norm": 0.45567426085472107, - "learning_rate": 9.434337338920562e-06, - "loss": 0.3438, - "step": 7796 - }, - { - "epoch": 0.5095745376119208, - "grad_norm": 0.4654828608036041, - "learning_rate": 9.434175993974055e-06, - "loss": 0.3891, - "step": 7797 - }, - { - "epoch": 0.5096398928174629, - "grad_norm": 0.449202299118042, - "learning_rate": 9.434014627400534e-06, - "loss": 0.3865, - "step": 7798 - }, - { - "epoch": 0.509705248023005, - "grad_norm": 0.4640216827392578, - "learning_rate": 9.433853239200787e-06, - "loss": 0.4083, - "step": 7799 - }, - { - "epoch": 0.5097706032285472, - "grad_norm": 0.4358929395675659, - "learning_rate": 9.433691829375605e-06, - "loss": 0.3395, - "step": 7800 - }, - { - "epoch": 0.5098359584340892, - "grad_norm": 0.48115190863609314, - "learning_rate": 9.433530397925768e-06, - "loss": 0.3927, - "step": 7801 - }, - { - "epoch": 0.5099013136396314, - "grad_norm": 0.4378347396850586, - "learning_rate": 9.433368944852069e-06, - "loss": 0.3703, - "step": 7802 - }, - { - "epoch": 0.5099666688451735, - "grad_norm": 0.5631244778633118, - "learning_rate": 9.433207470155294e-06, - "loss": 0.3516, - "step": 7803 - }, - { - "epoch": 0.5100320240507157, - "grad_norm": 0.4904472827911377, - "learning_rate": 9.43304597383623e-06, - "loss": 0.4362, - "step": 7804 - }, - { - "epoch": 0.5100973792562578, - "grad_norm": 0.45113232731819153, - "learning_rate": 9.432884455895665e-06, - "loss": 0.3536, - "step": 7805 - }, - { - "epoch": 0.5101627344617998, - "grad_norm": 0.4851573705673218, - "learning_rate": 9.432722916334387e-06, - "loss": 0.4351, - "step": 7806 - }, - { - "epoch": 0.510228089667342, - "grad_norm": 0.4441041350364685, - "learning_rate": 9.432561355153183e-06, - "loss": 0.3571, - "step": 7807 - }, - { - "epoch": 0.5102934448728841, - "grad_norm": 0.4777660369873047, - "learning_rate": 9.432399772352843e-06, - "loss": 0.4167, - "step": 7808 - }, - { - "epoch": 0.5103588000784263, - "grad_norm": 0.5215006470680237, - "learning_rate": 9.432238167934153e-06, - "loss": 0.4379, - "step": 7809 - }, - { - "epoch": 0.5104241552839683, - "grad_norm": 0.45751526951789856, - "learning_rate": 9.432076541897902e-06, - "loss": 0.406, - "step": 7810 - }, - { - "epoch": 0.5104895104895105, - "grad_norm": 0.49232354760169983, - "learning_rate": 9.431914894244878e-06, - "loss": 0.445, - "step": 7811 - }, - { - "epoch": 0.5105548656950526, - "grad_norm": 0.4416586458683014, - "learning_rate": 9.43175322497587e-06, - "loss": 0.3829, - "step": 7812 - }, - { - "epoch": 0.5106202209005948, - "grad_norm": 0.46103307604789734, - "learning_rate": 9.431591534091666e-06, - "loss": 0.3629, - "step": 7813 - }, - { - "epoch": 0.5106855761061369, - "grad_norm": 0.4570455551147461, - "learning_rate": 9.431429821593055e-06, - "loss": 0.4046, - "step": 7814 - }, - { - "epoch": 0.5107509313116789, - "grad_norm": 0.4625372886657715, - "learning_rate": 9.431268087480826e-06, - "loss": 0.4266, - "step": 7815 - }, - { - "epoch": 0.5108162865172211, - "grad_norm": 0.44160646200180054, - "learning_rate": 9.431106331755766e-06, - "loss": 0.3812, - "step": 7816 - }, - { - "epoch": 0.5108816417227632, - "grad_norm": 0.4473101496696472, - "learning_rate": 9.430944554418668e-06, - "loss": 0.3827, - "step": 7817 - }, - { - "epoch": 0.5109469969283054, - "grad_norm": 0.458060085773468, - "learning_rate": 9.430782755470316e-06, - "loss": 0.3877, - "step": 7818 - }, - { - "epoch": 0.5110123521338474, - "grad_norm": 0.4500366151332855, - "learning_rate": 9.430620934911503e-06, - "loss": 0.3948, - "step": 7819 - }, - { - "epoch": 0.5110777073393896, - "grad_norm": 0.45969077944755554, - "learning_rate": 9.430459092743016e-06, - "loss": 0.3902, - "step": 7820 - }, - { - "epoch": 0.5111430625449317, - "grad_norm": 0.43781375885009766, - "learning_rate": 9.430297228965645e-06, - "loss": 0.3538, - "step": 7821 - }, - { - "epoch": 0.5112084177504739, - "grad_norm": 0.4211559593677521, - "learning_rate": 9.430135343580181e-06, - "loss": 0.354, - "step": 7822 - }, - { - "epoch": 0.511273772956016, - "grad_norm": 0.4532836079597473, - "learning_rate": 9.429973436587409e-06, - "loss": 0.3727, - "step": 7823 - }, - { - "epoch": 0.511339128161558, - "grad_norm": 0.484272837638855, - "learning_rate": 9.429811507988124e-06, - "loss": 0.4122, - "step": 7824 - }, - { - "epoch": 0.5114044833671002, - "grad_norm": 0.5284202098846436, - "learning_rate": 9.429649557783115e-06, - "loss": 0.4599, - "step": 7825 - }, - { - "epoch": 0.5114698385726423, - "grad_norm": 0.43804940581321716, - "learning_rate": 9.429487585973167e-06, - "loss": 0.3355, - "step": 7826 - }, - { - "epoch": 0.5115351937781845, - "grad_norm": 0.4836186468601227, - "learning_rate": 9.429325592559077e-06, - "loss": 0.444, - "step": 7827 - }, - { - "epoch": 0.5116005489837265, - "grad_norm": 0.43310973048210144, - "learning_rate": 9.42916357754163e-06, - "loss": 0.3764, - "step": 7828 - }, - { - "epoch": 0.5116659041892687, - "grad_norm": 0.43801456689834595, - "learning_rate": 9.429001540921618e-06, - "loss": 0.4043, - "step": 7829 - }, - { - "epoch": 0.5117312593948108, - "grad_norm": 0.43409159779548645, - "learning_rate": 9.428839482699831e-06, - "loss": 0.3789, - "step": 7830 - }, - { - "epoch": 0.511796614600353, - "grad_norm": 0.45470452308654785, - "learning_rate": 9.42867740287706e-06, - "loss": 0.4111, - "step": 7831 - }, - { - "epoch": 0.511861969805895, - "grad_norm": 0.44637343287467957, - "learning_rate": 9.428515301454095e-06, - "loss": 0.3717, - "step": 7832 - }, - { - "epoch": 0.5119273250114371, - "grad_norm": 0.45838499069213867, - "learning_rate": 9.428353178431726e-06, - "loss": 0.4231, - "step": 7833 - }, - { - "epoch": 0.5119926802169793, - "grad_norm": 0.485531747341156, - "learning_rate": 9.428191033810746e-06, - "loss": 0.4097, - "step": 7834 - }, - { - "epoch": 0.5120580354225214, - "grad_norm": 0.4642377197742462, - "learning_rate": 9.428028867591943e-06, - "loss": 0.3873, - "step": 7835 - }, - { - "epoch": 0.5121233906280636, - "grad_norm": 0.46806132793426514, - "learning_rate": 9.427866679776109e-06, - "loss": 0.3881, - "step": 7836 - }, - { - "epoch": 0.5121887458336056, - "grad_norm": 0.4533544182777405, - "learning_rate": 9.427704470364035e-06, - "loss": 0.3792, - "step": 7837 - }, - { - "epoch": 0.5122541010391478, - "grad_norm": 0.4343569278717041, - "learning_rate": 9.427542239356512e-06, - "loss": 0.3568, - "step": 7838 - }, - { - "epoch": 0.5123194562446899, - "grad_norm": 0.45173031091690063, - "learning_rate": 9.427379986754333e-06, - "loss": 0.3966, - "step": 7839 - }, - { - "epoch": 0.512384811450232, - "grad_norm": 0.45075392723083496, - "learning_rate": 9.427217712558288e-06, - "loss": 0.3993, - "step": 7840 - }, - { - "epoch": 0.5124501666557741, - "grad_norm": 0.45193302631378174, - "learning_rate": 9.427055416769169e-06, - "loss": 0.4206, - "step": 7841 - }, - { - "epoch": 0.5125155218613162, - "grad_norm": 0.4493101239204407, - "learning_rate": 9.426893099387767e-06, - "loss": 0.3855, - "step": 7842 - }, - { - "epoch": 0.5125808770668584, - "grad_norm": 0.44628816843032837, - "learning_rate": 9.426730760414871e-06, - "loss": 0.387, - "step": 7843 - }, - { - "epoch": 0.5126462322724005, - "grad_norm": 0.45157426595687866, - "learning_rate": 9.426568399851277e-06, - "loss": 0.4002, - "step": 7844 - }, - { - "epoch": 0.5127115874779427, - "grad_norm": 0.4979647994041443, - "learning_rate": 9.426406017697777e-06, - "loss": 0.4369, - "step": 7845 - }, - { - "epoch": 0.5127769426834847, - "grad_norm": 0.4294925034046173, - "learning_rate": 9.42624361395516e-06, - "loss": 0.3506, - "step": 7846 - }, - { - "epoch": 0.5128422978890269, - "grad_norm": 0.4421248137950897, - "learning_rate": 9.42608118862422e-06, - "loss": 0.3887, - "step": 7847 - }, - { - "epoch": 0.512907653094569, - "grad_norm": 0.4316932260990143, - "learning_rate": 9.425918741705749e-06, - "loss": 0.3528, - "step": 7848 - }, - { - "epoch": 0.512973008300111, - "grad_norm": 0.45609912276268005, - "learning_rate": 9.425756273200536e-06, - "loss": 0.4086, - "step": 7849 - }, - { - "epoch": 0.5130383635056532, - "grad_norm": 0.44079044461250305, - "learning_rate": 9.42559378310938e-06, - "loss": 0.3338, - "step": 7850 - }, - { - "epoch": 0.5131037187111953, - "grad_norm": 0.4636673033237457, - "learning_rate": 9.42543127143307e-06, - "loss": 0.3685, - "step": 7851 - }, - { - "epoch": 0.5131690739167375, - "grad_norm": 0.46439874172210693, - "learning_rate": 9.425268738172397e-06, - "loss": 0.4018, - "step": 7852 - }, - { - "epoch": 0.5132344291222796, - "grad_norm": 0.48195880651474, - "learning_rate": 9.425106183328156e-06, - "loss": 0.4504, - "step": 7853 - }, - { - "epoch": 0.5132997843278218, - "grad_norm": 0.4582245349884033, - "learning_rate": 9.424943606901137e-06, - "loss": 0.3982, - "step": 7854 - }, - { - "epoch": 0.5133651395333638, - "grad_norm": 0.4763210415840149, - "learning_rate": 9.424781008892138e-06, - "loss": 0.4384, - "step": 7855 - }, - { - "epoch": 0.513430494738906, - "grad_norm": 0.959374189376831, - "learning_rate": 9.424618389301947e-06, - "loss": 0.4676, - "step": 7856 - }, - { - "epoch": 0.5134958499444481, - "grad_norm": 0.41970404982566833, - "learning_rate": 9.424455748131362e-06, - "loss": 0.3329, - "step": 7857 - }, - { - "epoch": 0.5135612051499902, - "grad_norm": 0.49007922410964966, - "learning_rate": 9.424293085381172e-06, - "loss": 0.4307, - "step": 7858 - }, - { - "epoch": 0.5136265603555323, - "grad_norm": 0.4442076086997986, - "learning_rate": 9.424130401052171e-06, - "loss": 0.352, - "step": 7859 - }, - { - "epoch": 0.5136919155610744, - "grad_norm": 0.4454544484615326, - "learning_rate": 9.423967695145154e-06, - "loss": 0.3823, - "step": 7860 - }, - { - "epoch": 0.5137572707666166, - "grad_norm": 0.490543931722641, - "learning_rate": 9.423804967660914e-06, - "loss": 0.4189, - "step": 7861 - }, - { - "epoch": 0.5138226259721587, - "grad_norm": 0.45555487275123596, - "learning_rate": 9.423642218600244e-06, - "loss": 0.3853, - "step": 7862 - }, - { - "epoch": 0.5138879811777008, - "grad_norm": 0.4604410231113434, - "learning_rate": 9.42347944796394e-06, - "loss": 0.381, - "step": 7863 - }, - { - "epoch": 0.5139533363832429, - "grad_norm": 0.4191436171531677, - "learning_rate": 9.423316655752793e-06, - "loss": 0.3562, - "step": 7864 - }, - { - "epoch": 0.514018691588785, - "grad_norm": 0.482517272233963, - "learning_rate": 9.423153841967598e-06, - "loss": 0.4471, - "step": 7865 - }, - { - "epoch": 0.5140840467943272, - "grad_norm": 0.42155736684799194, - "learning_rate": 9.42299100660915e-06, - "loss": 0.3321, - "step": 7866 - }, - { - "epoch": 0.5141494019998692, - "grad_norm": 0.4597262442111969, - "learning_rate": 9.422828149678244e-06, - "loss": 0.3689, - "step": 7867 - }, - { - "epoch": 0.5142147572054114, - "grad_norm": 0.4536152184009552, - "learning_rate": 9.42266527117567e-06, - "loss": 0.3856, - "step": 7868 - }, - { - "epoch": 0.5142801124109535, - "grad_norm": 0.46638286113739014, - "learning_rate": 9.422502371102228e-06, - "loss": 0.4212, - "step": 7869 - }, - { - "epoch": 0.5143454676164957, - "grad_norm": 0.48029616475105286, - "learning_rate": 9.422339449458709e-06, - "loss": 0.4453, - "step": 7870 - }, - { - "epoch": 0.5144108228220378, - "grad_norm": 0.4586377739906311, - "learning_rate": 9.422176506245908e-06, - "loss": 0.3542, - "step": 7871 - }, - { - "epoch": 0.51447617802758, - "grad_norm": 0.4356926381587982, - "learning_rate": 9.42201354146462e-06, - "loss": 0.3269, - "step": 7872 - }, - { - "epoch": 0.514541533233122, - "grad_norm": 0.46058180928230286, - "learning_rate": 9.421850555115641e-06, - "loss": 0.419, - "step": 7873 - }, - { - "epoch": 0.5146068884386641, - "grad_norm": 0.46969467401504517, - "learning_rate": 9.421687547199763e-06, - "loss": 0.414, - "step": 7874 - }, - { - "epoch": 0.5146722436442063, - "grad_norm": 0.5013342499732971, - "learning_rate": 9.421524517717784e-06, - "loss": 0.463, - "step": 7875 - }, - { - "epoch": 0.5147375988497483, - "grad_norm": 0.4446825087070465, - "learning_rate": 9.4213614666705e-06, - "loss": 0.3951, - "step": 7876 - }, - { - "epoch": 0.5148029540552905, - "grad_norm": 0.4600411355495453, - "learning_rate": 9.421198394058702e-06, - "loss": 0.4068, - "step": 7877 - }, - { - "epoch": 0.5148683092608326, - "grad_norm": 0.4615230858325958, - "learning_rate": 9.421035299883188e-06, - "loss": 0.3538, - "step": 7878 - }, - { - "epoch": 0.5149336644663748, - "grad_norm": 0.4540770351886749, - "learning_rate": 9.420872184144754e-06, - "loss": 0.3812, - "step": 7879 - }, - { - "epoch": 0.5149990196719169, - "grad_norm": 0.46806600689888, - "learning_rate": 9.420709046844196e-06, - "loss": 0.4016, - "step": 7880 - }, - { - "epoch": 0.515064374877459, - "grad_norm": 0.44293394684791565, - "learning_rate": 9.420545887982307e-06, - "loss": 0.3761, - "step": 7881 - }, - { - "epoch": 0.5151297300830011, - "grad_norm": 0.44064861536026, - "learning_rate": 9.420382707559885e-06, - "loss": 0.3823, - "step": 7882 - }, - { - "epoch": 0.5151950852885432, - "grad_norm": 0.46114489436149597, - "learning_rate": 9.420219505577724e-06, - "loss": 0.3906, - "step": 7883 - }, - { - "epoch": 0.5152604404940854, - "grad_norm": 0.45865270495414734, - "learning_rate": 9.420056282036623e-06, - "loss": 0.416, - "step": 7884 - }, - { - "epoch": 0.5153257956996274, - "grad_norm": 0.43503662943840027, - "learning_rate": 9.419893036937374e-06, - "loss": 0.3688, - "step": 7885 - }, - { - "epoch": 0.5153911509051696, - "grad_norm": 0.46031102538108826, - "learning_rate": 9.419729770280776e-06, - "loss": 0.4123, - "step": 7886 - }, - { - "epoch": 0.5154565061107117, - "grad_norm": 0.4093955159187317, - "learning_rate": 9.419566482067625e-06, - "loss": 0.3158, - "step": 7887 - }, - { - "epoch": 0.5155218613162539, - "grad_norm": 0.4443757236003876, - "learning_rate": 9.419403172298719e-06, - "loss": 0.3815, - "step": 7888 - }, - { - "epoch": 0.515587216521796, - "grad_norm": 0.45519816875457764, - "learning_rate": 9.41923984097485e-06, - "loss": 0.4094, - "step": 7889 - }, - { - "epoch": 0.515652571727338, - "grad_norm": 0.4669530689716339, - "learning_rate": 9.419076488096819e-06, - "loss": 0.3936, - "step": 7890 - }, - { - "epoch": 0.5157179269328802, - "grad_norm": 0.4384254217147827, - "learning_rate": 9.41891311366542e-06, - "loss": 0.359, - "step": 7891 - }, - { - "epoch": 0.5157832821384223, - "grad_norm": 0.45849543809890747, - "learning_rate": 9.418749717681452e-06, - "loss": 0.3967, - "step": 7892 - }, - { - "epoch": 0.5158486373439645, - "grad_norm": 0.4708442986011505, - "learning_rate": 9.418586300145711e-06, - "loss": 0.4218, - "step": 7893 - }, - { - "epoch": 0.5159139925495065, - "grad_norm": 0.45116758346557617, - "learning_rate": 9.418422861058993e-06, - "loss": 0.3908, - "step": 7894 - }, - { - "epoch": 0.5159793477550487, - "grad_norm": 0.45099860429763794, - "learning_rate": 9.418259400422095e-06, - "loss": 0.3619, - "step": 7895 - }, - { - "epoch": 0.5160447029605908, - "grad_norm": 0.836210310459137, - "learning_rate": 9.418095918235818e-06, - "loss": 0.3915, - "step": 7896 - }, - { - "epoch": 0.516110058166133, - "grad_norm": 0.4576779305934906, - "learning_rate": 9.417932414500954e-06, - "loss": 0.4526, - "step": 7897 - }, - { - "epoch": 0.516175413371675, - "grad_norm": 0.43634334206581116, - "learning_rate": 9.417768889218306e-06, - "loss": 0.3891, - "step": 7898 - }, - { - "epoch": 0.5162407685772171, - "grad_norm": 0.5078834891319275, - "learning_rate": 9.417605342388666e-06, - "loss": 0.4677, - "step": 7899 - }, - { - "epoch": 0.5163061237827593, - "grad_norm": 0.4795784056186676, - "learning_rate": 9.417441774012835e-06, - "loss": 0.409, - "step": 7900 - }, - { - "epoch": 0.5163714789883014, - "grad_norm": 0.46211862564086914, - "learning_rate": 9.41727818409161e-06, - "loss": 0.3788, - "step": 7901 - }, - { - "epoch": 0.5164368341938436, - "grad_norm": 0.45512014627456665, - "learning_rate": 9.417114572625789e-06, - "loss": 0.4262, - "step": 7902 - }, - { - "epoch": 0.5165021893993856, - "grad_norm": 0.46899402141571045, - "learning_rate": 9.416950939616172e-06, - "loss": 0.4267, - "step": 7903 - }, - { - "epoch": 0.5165675446049278, - "grad_norm": 0.4571188688278198, - "learning_rate": 9.416787285063553e-06, - "loss": 0.3428, - "step": 7904 - }, - { - "epoch": 0.5166328998104699, - "grad_norm": 0.45445504784584045, - "learning_rate": 9.416623608968732e-06, - "loss": 0.3989, - "step": 7905 - }, - { - "epoch": 0.5166982550160121, - "grad_norm": 0.4811440706253052, - "learning_rate": 9.416459911332509e-06, - "loss": 0.3949, - "step": 7906 - }, - { - "epoch": 0.5167636102215541, - "grad_norm": 0.45945218205451965, - "learning_rate": 9.416296192155681e-06, - "loss": 0.3999, - "step": 7907 - }, - { - "epoch": 0.5168289654270962, - "grad_norm": 0.4790816307067871, - "learning_rate": 9.416132451439046e-06, - "loss": 0.4115, - "step": 7908 - }, - { - "epoch": 0.5168943206326384, - "grad_norm": 0.5333146452903748, - "learning_rate": 9.4159686891834e-06, - "loss": 0.4436, - "step": 7909 - }, - { - "epoch": 0.5169596758381805, - "grad_norm": 0.4660441279411316, - "learning_rate": 9.41580490538955e-06, - "loss": 0.3939, - "step": 7910 - }, - { - "epoch": 0.5170250310437227, - "grad_norm": 0.4453851878643036, - "learning_rate": 9.415641100058287e-06, - "loss": 0.4342, - "step": 7911 - }, - { - "epoch": 0.5170903862492647, - "grad_norm": 0.47244173288345337, - "learning_rate": 9.415477273190415e-06, - "loss": 0.3913, - "step": 7912 - }, - { - "epoch": 0.5171557414548069, - "grad_norm": 0.4236837327480316, - "learning_rate": 9.415313424786727e-06, - "loss": 0.3579, - "step": 7913 - }, - { - "epoch": 0.517221096660349, - "grad_norm": 0.4299204349517822, - "learning_rate": 9.415149554848029e-06, - "loss": 0.3561, - "step": 7914 - }, - { - "epoch": 0.5172864518658912, - "grad_norm": 0.4726879894733429, - "learning_rate": 9.414985663375114e-06, - "loss": 0.4011, - "step": 7915 - }, - { - "epoch": 0.5173518070714332, - "grad_norm": 0.4675091803073883, - "learning_rate": 9.414821750368786e-06, - "loss": 0.3765, - "step": 7916 - }, - { - "epoch": 0.5174171622769753, - "grad_norm": 0.4623364210128784, - "learning_rate": 9.414657815829845e-06, - "loss": 0.3961, - "step": 7917 - }, - { - "epoch": 0.5174825174825175, - "grad_norm": 0.4778570532798767, - "learning_rate": 9.414493859759086e-06, - "loss": 0.419, - "step": 7918 - }, - { - "epoch": 0.5175478726880596, - "grad_norm": 0.4654449224472046, - "learning_rate": 9.414329882157311e-06, - "loss": 0.3996, - "step": 7919 - }, - { - "epoch": 0.5176132278936018, - "grad_norm": 0.48707547783851624, - "learning_rate": 9.41416588302532e-06, - "loss": 0.4329, - "step": 7920 - }, - { - "epoch": 0.5176785830991438, - "grad_norm": 0.4768628478050232, - "learning_rate": 9.414001862363913e-06, - "loss": 0.428, - "step": 7921 - }, - { - "epoch": 0.517743938304686, - "grad_norm": 0.4502464830875397, - "learning_rate": 9.41383782017389e-06, - "loss": 0.397, - "step": 7922 - }, - { - "epoch": 0.5178092935102281, - "grad_norm": 0.4830716550350189, - "learning_rate": 9.413673756456052e-06, - "loss": 0.4565, - "step": 7923 - }, - { - "epoch": 0.5178746487157702, - "grad_norm": 0.4762672483921051, - "learning_rate": 9.413509671211196e-06, - "loss": 0.4157, - "step": 7924 - }, - { - "epoch": 0.5179400039213123, - "grad_norm": 0.4664156138896942, - "learning_rate": 9.413345564440127e-06, - "loss": 0.3971, - "step": 7925 - }, - { - "epoch": 0.5180053591268544, - "grad_norm": 0.44348761439323425, - "learning_rate": 9.413181436143639e-06, - "loss": 0.3612, - "step": 7926 - }, - { - "epoch": 0.5180707143323966, - "grad_norm": 0.5411385297775269, - "learning_rate": 9.413017286322538e-06, - "loss": 0.4534, - "step": 7927 - }, - { - "epoch": 0.5181360695379387, - "grad_norm": 0.4380732476711273, - "learning_rate": 9.412853114977625e-06, - "loss": 0.3546, - "step": 7928 - }, - { - "epoch": 0.5182014247434809, - "grad_norm": 0.48528042435646057, - "learning_rate": 9.412688922109697e-06, - "loss": 0.4118, - "step": 7929 - }, - { - "epoch": 0.5182667799490229, - "grad_norm": 0.49747562408447266, - "learning_rate": 9.412524707719555e-06, - "loss": 0.4028, - "step": 7930 - }, - { - "epoch": 0.5183321351545651, - "grad_norm": 0.42012205719947815, - "learning_rate": 9.412360471808003e-06, - "loss": 0.3336, - "step": 7931 - }, - { - "epoch": 0.5183974903601072, - "grad_norm": 0.44400691986083984, - "learning_rate": 9.41219621437584e-06, - "loss": 0.3805, - "step": 7932 - }, - { - "epoch": 0.5184628455656493, - "grad_norm": 0.46124425530433655, - "learning_rate": 9.412031935423866e-06, - "loss": 0.3947, - "step": 7933 - }, - { - "epoch": 0.5185282007711914, - "grad_norm": 0.4640866816043854, - "learning_rate": 9.411867634952886e-06, - "loss": 0.3785, - "step": 7934 - }, - { - "epoch": 0.5185935559767335, - "grad_norm": 0.4519132077693939, - "learning_rate": 9.411703312963698e-06, - "loss": 0.3854, - "step": 7935 - }, - { - "epoch": 0.5186589111822757, - "grad_norm": 0.4467296600341797, - "learning_rate": 9.411538969457106e-06, - "loss": 0.3821, - "step": 7936 - }, - { - "epoch": 0.5187242663878178, - "grad_norm": 0.5134698748588562, - "learning_rate": 9.411374604433909e-06, - "loss": 0.4588, - "step": 7937 - }, - { - "epoch": 0.51878962159336, - "grad_norm": 0.460938960313797, - "learning_rate": 9.411210217894909e-06, - "loss": 0.408, - "step": 7938 - }, - { - "epoch": 0.518854976798902, - "grad_norm": 0.4559634327888489, - "learning_rate": 9.41104580984091e-06, - "loss": 0.3571, - "step": 7939 - }, - { - "epoch": 0.5189203320044442, - "grad_norm": 0.46090608835220337, - "learning_rate": 9.410881380272712e-06, - "loss": 0.4042, - "step": 7940 - }, - { - "epoch": 0.5189856872099863, - "grad_norm": 0.43631237745285034, - "learning_rate": 9.410716929191116e-06, - "loss": 0.4032, - "step": 7941 - }, - { - "epoch": 0.5190510424155284, - "grad_norm": 0.45229873061180115, - "learning_rate": 9.410552456596928e-06, - "loss": 0.3882, - "step": 7942 - }, - { - "epoch": 0.5191163976210705, - "grad_norm": 0.42981332540512085, - "learning_rate": 9.410387962490946e-06, - "loss": 0.3639, - "step": 7943 - }, - { - "epoch": 0.5191817528266126, - "grad_norm": 0.4539669156074524, - "learning_rate": 9.410223446873974e-06, - "loss": 0.3791, - "step": 7944 - }, - { - "epoch": 0.5192471080321548, - "grad_norm": 0.46513253450393677, - "learning_rate": 9.410058909746816e-06, - "loss": 0.4007, - "step": 7945 - }, - { - "epoch": 0.5193124632376969, - "grad_norm": 0.4235931932926178, - "learning_rate": 9.40989435111027e-06, - "loss": 0.3416, - "step": 7946 - }, - { - "epoch": 0.519377818443239, - "grad_norm": 0.5087857246398926, - "learning_rate": 9.409729770965145e-06, - "loss": 0.4563, - "step": 7947 - }, - { - "epoch": 0.5194431736487811, - "grad_norm": 0.4316421151161194, - "learning_rate": 9.40956516931224e-06, - "loss": 0.3859, - "step": 7948 - }, - { - "epoch": 0.5195085288543232, - "grad_norm": 0.5023858547210693, - "learning_rate": 9.409400546152357e-06, - "loss": 0.4635, - "step": 7949 - }, - { - "epoch": 0.5195738840598654, - "grad_norm": 0.45761409401893616, - "learning_rate": 9.409235901486297e-06, - "loss": 0.4269, - "step": 7950 - }, - { - "epoch": 0.5196392392654074, - "grad_norm": 0.4780981242656708, - "learning_rate": 9.40907123531487e-06, - "loss": 0.39, - "step": 7951 - }, - { - "epoch": 0.5197045944709496, - "grad_norm": 0.47008877992630005, - "learning_rate": 9.408906547638875e-06, - "loss": 0.4001, - "step": 7952 - }, - { - "epoch": 0.5197699496764917, - "grad_norm": 0.4629535973072052, - "learning_rate": 9.408741838459113e-06, - "loss": 0.4072, - "step": 7953 - }, - { - "epoch": 0.5198353048820339, - "grad_norm": 0.5037320852279663, - "learning_rate": 9.408577107776391e-06, - "loss": 0.465, - "step": 7954 - }, - { - "epoch": 0.519900660087576, - "grad_norm": 0.43817412853240967, - "learning_rate": 9.408412355591512e-06, - "loss": 0.3561, - "step": 7955 - }, - { - "epoch": 0.5199660152931181, - "grad_norm": 0.506892204284668, - "learning_rate": 9.408247581905276e-06, - "loss": 0.4385, - "step": 7956 - }, - { - "epoch": 0.5200313704986602, - "grad_norm": 0.4520317614078522, - "learning_rate": 9.40808278671849e-06, - "loss": 0.3834, - "step": 7957 - }, - { - "epoch": 0.5200967257042023, - "grad_norm": 0.5231791734695435, - "learning_rate": 9.40791797003196e-06, - "loss": 0.4623, - "step": 7958 - }, - { - "epoch": 0.5201620809097445, - "grad_norm": 0.45876771211624146, - "learning_rate": 9.407753131846485e-06, - "loss": 0.3952, - "step": 7959 - }, - { - "epoch": 0.5202274361152865, - "grad_norm": 0.4685318171977997, - "learning_rate": 9.40758827216287e-06, - "loss": 0.4043, - "step": 7960 - }, - { - "epoch": 0.5202927913208287, - "grad_norm": 0.4444715082645416, - "learning_rate": 9.407423390981922e-06, - "loss": 0.3723, - "step": 7961 - }, - { - "epoch": 0.5203581465263708, - "grad_norm": 0.5097283124923706, - "learning_rate": 9.407258488304444e-06, - "loss": 0.4508, - "step": 7962 - }, - { - "epoch": 0.520423501731913, - "grad_norm": 0.41760626435279846, - "learning_rate": 9.407093564131238e-06, - "loss": 0.3569, - "step": 7963 - }, - { - "epoch": 0.5204888569374551, - "grad_norm": 0.4615936279296875, - "learning_rate": 9.406928618463108e-06, - "loss": 0.3916, - "step": 7964 - }, - { - "epoch": 0.5205542121429972, - "grad_norm": 0.43561121821403503, - "learning_rate": 9.406763651300863e-06, - "loss": 0.3186, - "step": 7965 - }, - { - "epoch": 0.5206195673485393, - "grad_norm": 0.492966890335083, - "learning_rate": 9.406598662645305e-06, - "loss": 0.3873, - "step": 7966 - }, - { - "epoch": 0.5206849225540814, - "grad_norm": 0.4438392221927643, - "learning_rate": 9.406433652497239e-06, - "loss": 0.3748, - "step": 7967 - }, - { - "epoch": 0.5207502777596236, - "grad_norm": 0.44614288210868835, - "learning_rate": 9.406268620857469e-06, - "loss": 0.3828, - "step": 7968 - }, - { - "epoch": 0.5208156329651656, - "grad_norm": 0.5281800627708435, - "learning_rate": 9.4061035677268e-06, - "loss": 0.4166, - "step": 7969 - }, - { - "epoch": 0.5208809881707078, - "grad_norm": 0.4884756803512573, - "learning_rate": 9.405938493106037e-06, - "loss": 0.464, - "step": 7970 - }, - { - "epoch": 0.5209463433762499, - "grad_norm": 0.48060768842697144, - "learning_rate": 9.405773396995987e-06, - "loss": 0.3952, - "step": 7971 - }, - { - "epoch": 0.5210116985817921, - "grad_norm": 0.4539071023464203, - "learning_rate": 9.405608279397454e-06, - "loss": 0.3619, - "step": 7972 - }, - { - "epoch": 0.5210770537873342, - "grad_norm": 0.46185436844825745, - "learning_rate": 9.40544314031124e-06, - "loss": 0.4034, - "step": 7973 - }, - { - "epoch": 0.5211424089928762, - "grad_norm": 0.4426986277103424, - "learning_rate": 9.405277979738156e-06, - "loss": 0.3442, - "step": 7974 - }, - { - "epoch": 0.5212077641984184, - "grad_norm": 0.49941545724868774, - "learning_rate": 9.405112797679006e-06, - "loss": 0.4346, - "step": 7975 - }, - { - "epoch": 0.5212731194039605, - "grad_norm": 0.49342066049575806, - "learning_rate": 9.404947594134595e-06, - "loss": 0.4008, - "step": 7976 - }, - { - "epoch": 0.5213384746095027, - "grad_norm": 0.4608862102031708, - "learning_rate": 9.404782369105728e-06, - "loss": 0.4065, - "step": 7977 - }, - { - "epoch": 0.5214038298150447, - "grad_norm": 0.45691150426864624, - "learning_rate": 9.404617122593209e-06, - "loss": 0.3841, - "step": 7978 - }, - { - "epoch": 0.5214691850205869, - "grad_norm": 0.4427545964717865, - "learning_rate": 9.40445185459785e-06, - "loss": 0.3374, - "step": 7979 - }, - { - "epoch": 0.521534540226129, - "grad_norm": 0.47346892952919006, - "learning_rate": 9.40428656512045e-06, - "loss": 0.3938, - "step": 7980 - }, - { - "epoch": 0.5215998954316712, - "grad_norm": 0.4658379852771759, - "learning_rate": 9.40412125416182e-06, - "loss": 0.3964, - "step": 7981 - }, - { - "epoch": 0.5216652506372133, - "grad_norm": 0.4694722294807434, - "learning_rate": 9.403955921722766e-06, - "loss": 0.4014, - "step": 7982 - }, - { - "epoch": 0.5217306058427553, - "grad_norm": 0.4675805866718292, - "learning_rate": 9.403790567804092e-06, - "loss": 0.3909, - "step": 7983 - }, - { - "epoch": 0.5217959610482975, - "grad_norm": 0.4329233467578888, - "learning_rate": 9.403625192406606e-06, - "loss": 0.3259, - "step": 7984 - }, - { - "epoch": 0.5218613162538396, - "grad_norm": 0.4623168706893921, - "learning_rate": 9.403459795531117e-06, - "loss": 0.3807, - "step": 7985 - }, - { - "epoch": 0.5219266714593818, - "grad_norm": 0.4252798855304718, - "learning_rate": 9.403294377178425e-06, - "loss": 0.3704, - "step": 7986 - }, - { - "epoch": 0.5219920266649238, - "grad_norm": 0.44873565435409546, - "learning_rate": 9.403128937349344e-06, - "loss": 0.3797, - "step": 7987 - }, - { - "epoch": 0.522057381870466, - "grad_norm": 0.46728047728538513, - "learning_rate": 9.402963476044675e-06, - "loss": 0.3972, - "step": 7988 - }, - { - "epoch": 0.5221227370760081, - "grad_norm": 0.4808270037174225, - "learning_rate": 9.40279799326523e-06, - "loss": 0.3908, - "step": 7989 - }, - { - "epoch": 0.5221880922815503, - "grad_norm": 0.44836002588272095, - "learning_rate": 9.402632489011814e-06, - "loss": 0.3486, - "step": 7990 - }, - { - "epoch": 0.5222534474870923, - "grad_norm": 0.45154696702957153, - "learning_rate": 9.402466963285233e-06, - "loss": 0.3857, - "step": 7991 - }, - { - "epoch": 0.5223188026926344, - "grad_norm": 0.43766096234321594, - "learning_rate": 9.402301416086295e-06, - "loss": 0.4051, - "step": 7992 - }, - { - "epoch": 0.5223841578981766, - "grad_norm": 0.47397372126579285, - "learning_rate": 9.402135847415808e-06, - "loss": 0.4441, - "step": 7993 - }, - { - "epoch": 0.5224495131037187, - "grad_norm": 0.47660985589027405, - "learning_rate": 9.401970257274581e-06, - "loss": 0.4099, - "step": 7994 - }, - { - "epoch": 0.5225148683092609, - "grad_norm": 0.4678085446357727, - "learning_rate": 9.40180464566342e-06, - "loss": 0.4004, - "step": 7995 - }, - { - "epoch": 0.5225802235148029, - "grad_norm": 0.41487210988998413, - "learning_rate": 9.40163901258313e-06, - "loss": 0.3407, - "step": 7996 - }, - { - "epoch": 0.5226455787203451, - "grad_norm": 0.44676473736763, - "learning_rate": 9.401473358034526e-06, - "loss": 0.3804, - "step": 7997 - }, - { - "epoch": 0.5227109339258872, - "grad_norm": 0.4447825253009796, - "learning_rate": 9.401307682018408e-06, - "loss": 0.3553, - "step": 7998 - }, - { - "epoch": 0.5227762891314294, - "grad_norm": 0.46299442648887634, - "learning_rate": 9.40114198453559e-06, - "loss": 0.3994, - "step": 7999 - }, - { - "epoch": 0.5228416443369714, - "grad_norm": 0.488105446100235, - "learning_rate": 9.400976265586875e-06, - "loss": 0.3883, - "step": 8000 - }, - { - "epoch": 0.5229069995425135, - "grad_norm": 0.4538838267326355, - "learning_rate": 9.400810525173076e-06, - "loss": 0.3945, - "step": 8001 - }, - { - "epoch": 0.5229723547480557, - "grad_norm": 0.43405792117118835, - "learning_rate": 9.400644763294999e-06, - "loss": 0.3575, - "step": 8002 - }, - { - "epoch": 0.5230377099535978, - "grad_norm": 0.4692228138446808, - "learning_rate": 9.400478979953454e-06, - "loss": 0.4311, - "step": 8003 - }, - { - "epoch": 0.52310306515914, - "grad_norm": 0.4506242871284485, - "learning_rate": 9.400313175149247e-06, - "loss": 0.3687, - "step": 8004 - }, - { - "epoch": 0.523168420364682, - "grad_norm": 0.45200034976005554, - "learning_rate": 9.40014734888319e-06, - "loss": 0.3768, - "step": 8005 - }, - { - "epoch": 0.5232337755702242, - "grad_norm": 0.41679516434669495, - "learning_rate": 9.399981501156087e-06, - "loss": 0.3496, - "step": 8006 - }, - { - "epoch": 0.5232991307757663, - "grad_norm": 0.5029006004333496, - "learning_rate": 9.399815631968753e-06, - "loss": 0.43, - "step": 8007 - }, - { - "epoch": 0.5233644859813084, - "grad_norm": 0.548403799533844, - "learning_rate": 9.399649741321993e-06, - "loss": 0.4606, - "step": 8008 - }, - { - "epoch": 0.5234298411868505, - "grad_norm": 0.4558612108230591, - "learning_rate": 9.399483829216618e-06, - "loss": 0.4198, - "step": 8009 - }, - { - "epoch": 0.5234951963923926, - "grad_norm": 0.4317144751548767, - "learning_rate": 9.399317895653434e-06, - "loss": 0.3945, - "step": 8010 - }, - { - "epoch": 0.5235605515979348, - "grad_norm": 0.4775659143924713, - "learning_rate": 9.399151940633255e-06, - "loss": 0.4085, - "step": 8011 - }, - { - "epoch": 0.5236259068034769, - "grad_norm": 0.45099619030952454, - "learning_rate": 9.398985964156885e-06, - "loss": 0.3799, - "step": 8012 - }, - { - "epoch": 0.523691262009019, - "grad_norm": 0.4405990242958069, - "learning_rate": 9.39881996622514e-06, - "loss": 0.3953, - "step": 8013 - }, - { - "epoch": 0.5237566172145611, - "grad_norm": 0.43898147344589233, - "learning_rate": 9.398653946838824e-06, - "loss": 0.3527, - "step": 8014 - }, - { - "epoch": 0.5238219724201033, - "grad_norm": 0.43351277709007263, - "learning_rate": 9.398487905998749e-06, - "loss": 0.3583, - "step": 8015 - }, - { - "epoch": 0.5238873276256454, - "grad_norm": 0.43379583954811096, - "learning_rate": 9.398321843705724e-06, - "loss": 0.3586, - "step": 8016 - }, - { - "epoch": 0.5239526828311875, - "grad_norm": 0.5770723223686218, - "learning_rate": 9.398155759960562e-06, - "loss": 0.4713, - "step": 8017 - }, - { - "epoch": 0.5240180380367296, - "grad_norm": 0.4691585898399353, - "learning_rate": 9.397989654764068e-06, - "loss": 0.396, - "step": 8018 - }, - { - "epoch": 0.5240833932422717, - "grad_norm": 0.47056636214256287, - "learning_rate": 9.397823528117056e-06, - "loss": 0.3272, - "step": 8019 - }, - { - "epoch": 0.5241487484478139, - "grad_norm": 0.46248659491539, - "learning_rate": 9.397657380020335e-06, - "loss": 0.3404, - "step": 8020 - }, - { - "epoch": 0.524214103653356, - "grad_norm": 0.4465576708316803, - "learning_rate": 9.397491210474716e-06, - "loss": 0.378, - "step": 8021 - }, - { - "epoch": 0.5242794588588982, - "grad_norm": 0.43117213249206543, - "learning_rate": 9.39732501948101e-06, - "loss": 0.3901, - "step": 8022 - }, - { - "epoch": 0.5243448140644402, - "grad_norm": 0.4837399125099182, - "learning_rate": 9.397158807040023e-06, - "loss": 0.4328, - "step": 8023 - }, - { - "epoch": 0.5244101692699824, - "grad_norm": 0.4933948218822479, - "learning_rate": 9.396992573152572e-06, - "loss": 0.4332, - "step": 8024 - }, - { - "epoch": 0.5244755244755245, - "grad_norm": 0.4807804822921753, - "learning_rate": 9.396826317819465e-06, - "loss": 0.3978, - "step": 8025 - }, - { - "epoch": 0.5245408796810666, - "grad_norm": 0.44339725375175476, - "learning_rate": 9.396660041041511e-06, - "loss": 0.3732, - "step": 8026 - }, - { - "epoch": 0.5246062348866087, - "grad_norm": 0.46488943696022034, - "learning_rate": 9.396493742819524e-06, - "loss": 0.4205, - "step": 8027 - }, - { - "epoch": 0.5246715900921508, - "grad_norm": 0.461679607629776, - "learning_rate": 9.396327423154315e-06, - "loss": 0.3635, - "step": 8028 - }, - { - "epoch": 0.524736945297693, - "grad_norm": 0.4390772879123688, - "learning_rate": 9.396161082046693e-06, - "loss": 0.3654, - "step": 8029 - }, - { - "epoch": 0.5248023005032351, - "grad_norm": 0.5087174773216248, - "learning_rate": 9.39599471949747e-06, - "loss": 0.4039, - "step": 8030 - }, - { - "epoch": 0.5248676557087772, - "grad_norm": 0.49739664793014526, - "learning_rate": 9.395828335507457e-06, - "loss": 0.49, - "step": 8031 - }, - { - "epoch": 0.5249330109143193, - "grad_norm": 0.475120484828949, - "learning_rate": 9.395661930077469e-06, - "loss": 0.4052, - "step": 8032 - }, - { - "epoch": 0.5249983661198614, - "grad_norm": 0.4692741930484772, - "learning_rate": 9.395495503208311e-06, - "loss": 0.3939, - "step": 8033 - }, - { - "epoch": 0.5250637213254036, - "grad_norm": 0.5037876963615417, - "learning_rate": 9.395329054900802e-06, - "loss": 0.389, - "step": 8034 - }, - { - "epoch": 0.5251290765309456, - "grad_norm": 0.45203882455825806, - "learning_rate": 9.395162585155751e-06, - "loss": 0.3557, - "step": 8035 - }, - { - "epoch": 0.5251944317364878, - "grad_norm": 0.4454691708087921, - "learning_rate": 9.394996093973967e-06, - "loss": 0.3501, - "step": 8036 - }, - { - "epoch": 0.5252597869420299, - "grad_norm": 0.4618183672428131, - "learning_rate": 9.394829581356267e-06, - "loss": 0.4097, - "step": 8037 - }, - { - "epoch": 0.5253251421475721, - "grad_norm": 0.4642794728279114, - "learning_rate": 9.394663047303458e-06, - "loss": 0.4295, - "step": 8038 - }, - { - "epoch": 0.5253904973531142, - "grad_norm": 0.4464781582355499, - "learning_rate": 9.394496491816356e-06, - "loss": 0.3716, - "step": 8039 - }, - { - "epoch": 0.5254558525586563, - "grad_norm": 0.47627368569374084, - "learning_rate": 9.394329914895772e-06, - "loss": 0.4116, - "step": 8040 - }, - { - "epoch": 0.5255212077641984, - "grad_norm": 0.4596094787120819, - "learning_rate": 9.394163316542519e-06, - "loss": 0.4002, - "step": 8041 - }, - { - "epoch": 0.5255865629697405, - "grad_norm": 0.4332751929759979, - "learning_rate": 9.393996696757411e-06, - "loss": 0.3552, - "step": 8042 - }, - { - "epoch": 0.5256519181752827, - "grad_norm": 0.4469105303287506, - "learning_rate": 9.393830055541255e-06, - "loss": 0.35, - "step": 8043 - }, - { - "epoch": 0.5257172733808247, - "grad_norm": 0.43737125396728516, - "learning_rate": 9.39366339289487e-06, - "loss": 0.3871, - "step": 8044 - }, - { - "epoch": 0.5257826285863669, - "grad_norm": 0.46573638916015625, - "learning_rate": 9.393496708819066e-06, - "loss": 0.3775, - "step": 8045 - }, - { - "epoch": 0.525847983791909, - "grad_norm": 0.4459984600543976, - "learning_rate": 9.393330003314658e-06, - "loss": 0.3727, - "step": 8046 - }, - { - "epoch": 0.5259133389974512, - "grad_norm": 0.42989417910575867, - "learning_rate": 9.393163276382455e-06, - "loss": 0.332, - "step": 8047 - }, - { - "epoch": 0.5259786942029933, - "grad_norm": 0.444750040769577, - "learning_rate": 9.392996528023275e-06, - "loss": 0.3936, - "step": 8048 - }, - { - "epoch": 0.5260440494085354, - "grad_norm": 0.4220130443572998, - "learning_rate": 9.392829758237928e-06, - "loss": 0.3357, - "step": 8049 - }, - { - "epoch": 0.5261094046140775, - "grad_norm": 0.5116159319877625, - "learning_rate": 9.392662967027228e-06, - "loss": 0.3774, - "step": 8050 - }, - { - "epoch": 0.5261747598196196, - "grad_norm": 0.4552266299724579, - "learning_rate": 9.39249615439199e-06, - "loss": 0.3766, - "step": 8051 - }, - { - "epoch": 0.5262401150251618, - "grad_norm": 0.4574126899242401, - "learning_rate": 9.392329320333027e-06, - "loss": 0.3709, - "step": 8052 - }, - { - "epoch": 0.5263054702307038, - "grad_norm": 0.47054770588874817, - "learning_rate": 9.392162464851152e-06, - "loss": 0.4175, - "step": 8053 - }, - { - "epoch": 0.526370825436246, - "grad_norm": 0.48097535967826843, - "learning_rate": 9.391995587947175e-06, - "loss": 0.4163, - "step": 8054 - }, - { - "epoch": 0.5264361806417881, - "grad_norm": 0.49322423338890076, - "learning_rate": 9.391828689621918e-06, - "loss": 0.417, - "step": 8055 - }, - { - "epoch": 0.5265015358473303, - "grad_norm": 0.47926315665245056, - "learning_rate": 9.391661769876191e-06, - "loss": 0.4539, - "step": 8056 - }, - { - "epoch": 0.5265668910528724, - "grad_norm": 0.44520673155784607, - "learning_rate": 9.391494828710808e-06, - "loss": 0.3855, - "step": 8057 - }, - { - "epoch": 0.5266322462584145, - "grad_norm": 0.4670080840587616, - "learning_rate": 9.391327866126583e-06, - "loss": 0.3967, - "step": 8058 - }, - { - "epoch": 0.5266976014639566, - "grad_norm": 0.453642338514328, - "learning_rate": 9.391160882124331e-06, - "loss": 0.3721, - "step": 8059 - }, - { - "epoch": 0.5267629566694987, - "grad_norm": 0.44446754455566406, - "learning_rate": 9.390993876704865e-06, - "loss": 0.4194, - "step": 8060 - }, - { - "epoch": 0.5268283118750409, - "grad_norm": 0.4614487886428833, - "learning_rate": 9.390826849869001e-06, - "loss": 0.4017, - "step": 8061 - }, - { - "epoch": 0.5268936670805829, - "grad_norm": 0.48017358779907227, - "learning_rate": 9.390659801617554e-06, - "loss": 0.4488, - "step": 8062 - }, - { - "epoch": 0.5269590222861251, - "grad_norm": 0.4257350564002991, - "learning_rate": 9.390492731951337e-06, - "loss": 0.3588, - "step": 8063 - }, - { - "epoch": 0.5270243774916672, - "grad_norm": 0.5147051811218262, - "learning_rate": 9.390325640871168e-06, - "loss": 0.395, - "step": 8064 - }, - { - "epoch": 0.5270897326972094, - "grad_norm": 0.4848407804965973, - "learning_rate": 9.390158528377857e-06, - "loss": 0.4287, - "step": 8065 - }, - { - "epoch": 0.5271550879027515, - "grad_norm": 0.48413023352622986, - "learning_rate": 9.389991394472224e-06, - "loss": 0.4434, - "step": 8066 - }, - { - "epoch": 0.5272204431082935, - "grad_norm": 0.4852887988090515, - "learning_rate": 9.389824239155084e-06, - "loss": 0.4065, - "step": 8067 - }, - { - "epoch": 0.5272857983138357, - "grad_norm": 0.5066052079200745, - "learning_rate": 9.389657062427246e-06, - "loss": 0.3988, - "step": 8068 - }, - { - "epoch": 0.5273511535193778, - "grad_norm": 0.42358142137527466, - "learning_rate": 9.389489864289533e-06, - "loss": 0.3144, - "step": 8069 - }, - { - "epoch": 0.52741650872492, - "grad_norm": 0.44607630372047424, - "learning_rate": 9.389322644742755e-06, - "loss": 0.3655, - "step": 8070 - }, - { - "epoch": 0.527481863930462, - "grad_norm": 0.4566013514995575, - "learning_rate": 9.389155403787731e-06, - "loss": 0.369, - "step": 8071 - }, - { - "epoch": 0.5275472191360042, - "grad_norm": 0.4796139597892761, - "learning_rate": 9.388988141425276e-06, - "loss": 0.3988, - "step": 8072 - }, - { - "epoch": 0.5276125743415463, - "grad_norm": 0.48625293374061584, - "learning_rate": 9.388820857656204e-06, - "loss": 0.4024, - "step": 8073 - }, - { - "epoch": 0.5276779295470885, - "grad_norm": 0.47770261764526367, - "learning_rate": 9.388653552481335e-06, - "loss": 0.4033, - "step": 8074 - }, - { - "epoch": 0.5277432847526305, - "grad_norm": 0.4452119469642639, - "learning_rate": 9.388486225901478e-06, - "loss": 0.3679, - "step": 8075 - }, - { - "epoch": 0.5278086399581726, - "grad_norm": 0.4728561043739319, - "learning_rate": 9.388318877917458e-06, - "loss": 0.3997, - "step": 8076 - }, - { - "epoch": 0.5278739951637148, - "grad_norm": 0.4540014863014221, - "learning_rate": 9.388151508530083e-06, - "loss": 0.387, - "step": 8077 - }, - { - "epoch": 0.5279393503692569, - "grad_norm": 0.45731332898139954, - "learning_rate": 9.387984117740173e-06, - "loss": 0.36, - "step": 8078 - }, - { - "epoch": 0.5280047055747991, - "grad_norm": 0.45137470960617065, - "learning_rate": 9.387816705548547e-06, - "loss": 0.3594, - "step": 8079 - }, - { - "epoch": 0.5280700607803411, - "grad_norm": 0.4522440433502197, - "learning_rate": 9.387649271956017e-06, - "loss": 0.3452, - "step": 8080 - }, - { - "epoch": 0.5281354159858833, - "grad_norm": 0.7184723019599915, - "learning_rate": 9.387481816963402e-06, - "loss": 0.4073, - "step": 8081 - }, - { - "epoch": 0.5282007711914254, - "grad_norm": 0.42924800515174866, - "learning_rate": 9.387314340571518e-06, - "loss": 0.3835, - "step": 8082 - }, - { - "epoch": 0.5282661263969676, - "grad_norm": 0.4880116581916809, - "learning_rate": 9.387146842781184e-06, - "loss": 0.4272, - "step": 8083 - }, - { - "epoch": 0.5283314816025096, - "grad_norm": 0.4179949462413788, - "learning_rate": 9.386979323593212e-06, - "loss": 0.3205, - "step": 8084 - }, - { - "epoch": 0.5283968368080517, - "grad_norm": 0.4548265039920807, - "learning_rate": 9.386811783008423e-06, - "loss": 0.3805, - "step": 8085 - }, - { - "epoch": 0.5284621920135939, - "grad_norm": 0.4920929968357086, - "learning_rate": 9.386644221027633e-06, - "loss": 0.3991, - "step": 8086 - }, - { - "epoch": 0.528527547219136, - "grad_norm": 0.4421020746231079, - "learning_rate": 9.386476637651661e-06, - "loss": 0.3675, - "step": 8087 - }, - { - "epoch": 0.5285929024246782, - "grad_norm": 0.45332884788513184, - "learning_rate": 9.38630903288132e-06, - "loss": 0.3875, - "step": 8088 - }, - { - "epoch": 0.5286582576302202, - "grad_norm": 0.42376795411109924, - "learning_rate": 9.386141406717432e-06, - "loss": 0.3548, - "step": 8089 - }, - { - "epoch": 0.5287236128357624, - "grad_norm": 0.40692609548568726, - "learning_rate": 9.385973759160813e-06, - "loss": 0.3424, - "step": 8090 - }, - { - "epoch": 0.5287889680413045, - "grad_norm": 0.498879611492157, - "learning_rate": 9.38580609021228e-06, - "loss": 0.3947, - "step": 8091 - }, - { - "epoch": 0.5288543232468466, - "grad_norm": 0.4672560691833496, - "learning_rate": 9.385638399872651e-06, - "loss": 0.3551, - "step": 8092 - }, - { - "epoch": 0.5289196784523887, - "grad_norm": 0.4360695481300354, - "learning_rate": 9.385470688142746e-06, - "loss": 0.3975, - "step": 8093 - }, - { - "epoch": 0.5289850336579308, - "grad_norm": 0.4511704742908478, - "learning_rate": 9.385302955023379e-06, - "loss": 0.376, - "step": 8094 - }, - { - "epoch": 0.529050388863473, - "grad_norm": 0.48188722133636475, - "learning_rate": 9.385135200515372e-06, - "loss": 0.3924, - "step": 8095 - }, - { - "epoch": 0.5291157440690151, - "grad_norm": 0.4877548813819885, - "learning_rate": 9.38496742461954e-06, - "loss": 0.4149, - "step": 8096 - }, - { - "epoch": 0.5291810992745573, - "grad_norm": 0.4314827024936676, - "learning_rate": 9.384799627336703e-06, - "loss": 0.3474, - "step": 8097 - }, - { - "epoch": 0.5292464544800993, - "grad_norm": 0.44380849599838257, - "learning_rate": 9.38463180866768e-06, - "loss": 0.365, - "step": 8098 - }, - { - "epoch": 0.5293118096856415, - "grad_norm": 0.4292486011981964, - "learning_rate": 9.384463968613289e-06, - "loss": 0.3236, - "step": 8099 - }, - { - "epoch": 0.5293771648911836, - "grad_norm": 0.4576188027858734, - "learning_rate": 9.384296107174347e-06, - "loss": 0.3888, - "step": 8100 - }, - { - "epoch": 0.5294425200967257, - "grad_norm": 0.4467967748641968, - "learning_rate": 9.384128224351676e-06, - "loss": 0.4106, - "step": 8101 - }, - { - "epoch": 0.5295078753022678, - "grad_norm": 0.4577098786830902, - "learning_rate": 9.38396032014609e-06, - "loss": 0.3942, - "step": 8102 - }, - { - "epoch": 0.5295732305078099, - "grad_norm": 0.48310619592666626, - "learning_rate": 9.383792394558412e-06, - "loss": 0.4446, - "step": 8103 - }, - { - "epoch": 0.5296385857133521, - "grad_norm": 0.4269142746925354, - "learning_rate": 9.383624447589462e-06, - "loss": 0.3499, - "step": 8104 - }, - { - "epoch": 0.5297039409188942, - "grad_norm": 0.4764975905418396, - "learning_rate": 9.383456479240054e-06, - "loss": 0.4144, - "step": 8105 - }, - { - "epoch": 0.5297692961244364, - "grad_norm": 0.45994073152542114, - "learning_rate": 9.38328848951101e-06, - "loss": 0.3947, - "step": 8106 - }, - { - "epoch": 0.5298346513299784, - "grad_norm": 0.42793282866477966, - "learning_rate": 9.383120478403151e-06, - "loss": 0.3367, - "step": 8107 - }, - { - "epoch": 0.5299000065355206, - "grad_norm": 0.45754751563072205, - "learning_rate": 9.382952445917295e-06, - "loss": 0.4171, - "step": 8108 - }, - { - "epoch": 0.5299653617410627, - "grad_norm": 0.47818121314048767, - "learning_rate": 9.382784392054262e-06, - "loss": 0.4069, - "step": 8109 - }, - { - "epoch": 0.5300307169466048, - "grad_norm": 0.43848422169685364, - "learning_rate": 9.38261631681487e-06, - "loss": 0.3821, - "step": 8110 - }, - { - "epoch": 0.5300960721521469, - "grad_norm": 0.4554431736469269, - "learning_rate": 9.38244822019994e-06, - "loss": 0.37, - "step": 8111 - }, - { - "epoch": 0.530161427357689, - "grad_norm": 0.47809508442878723, - "learning_rate": 9.382280102210292e-06, - "loss": 0.3948, - "step": 8112 - }, - { - "epoch": 0.5302267825632312, - "grad_norm": 0.4500177502632141, - "learning_rate": 9.382111962846745e-06, - "loss": 0.3757, - "step": 8113 - }, - { - "epoch": 0.5302921377687733, - "grad_norm": 0.45078301429748535, - "learning_rate": 9.381943802110121e-06, - "loss": 0.3622, - "step": 8114 - }, - { - "epoch": 0.5303574929743154, - "grad_norm": 0.4388760030269623, - "learning_rate": 9.381775620001238e-06, - "loss": 0.3573, - "step": 8115 - }, - { - "epoch": 0.5304228481798575, - "grad_norm": 0.444981187582016, - "learning_rate": 9.38160741652092e-06, - "loss": 0.3788, - "step": 8116 - }, - { - "epoch": 0.5304882033853996, - "grad_norm": 0.39920949935913086, - "learning_rate": 9.381439191669983e-06, - "loss": 0.3128, - "step": 8117 - }, - { - "epoch": 0.5305535585909418, - "grad_norm": 0.4420296251773834, - "learning_rate": 9.381270945449247e-06, - "loss": 0.3753, - "step": 8118 - }, - { - "epoch": 0.5306189137964838, - "grad_norm": 0.5094230771064758, - "learning_rate": 9.381102677859537e-06, - "loss": 0.4332, - "step": 8119 - }, - { - "epoch": 0.530684269002026, - "grad_norm": 0.42383527755737305, - "learning_rate": 9.380934388901673e-06, - "loss": 0.3833, - "step": 8120 - }, - { - "epoch": 0.5307496242075681, - "grad_norm": 0.4477185606956482, - "learning_rate": 9.38076607857647e-06, - "loss": 0.4222, - "step": 8121 - }, - { - "epoch": 0.5308149794131103, - "grad_norm": 0.48360368609428406, - "learning_rate": 9.380597746884757e-06, - "loss": 0.4133, - "step": 8122 - }, - { - "epoch": 0.5308803346186524, - "grad_norm": 0.4472695589065552, - "learning_rate": 9.38042939382735e-06, - "loss": 0.368, - "step": 8123 - }, - { - "epoch": 0.5309456898241945, - "grad_norm": 0.44982022047042847, - "learning_rate": 9.380261019405071e-06, - "loss": 0.3726, - "step": 8124 - }, - { - "epoch": 0.5310110450297366, - "grad_norm": 0.4698355793952942, - "learning_rate": 9.380092623618743e-06, - "loss": 0.4508, - "step": 8125 - }, - { - "epoch": 0.5310764002352787, - "grad_norm": 0.4377356469631195, - "learning_rate": 9.379924206469184e-06, - "loss": 0.3549, - "step": 8126 - }, - { - "epoch": 0.5311417554408209, - "grad_norm": 0.4417353868484497, - "learning_rate": 9.379755767957217e-06, - "loss": 0.3952, - "step": 8127 - }, - { - "epoch": 0.531207110646363, - "grad_norm": 0.4596727192401886, - "learning_rate": 9.379587308083666e-06, - "loss": 0.3701, - "step": 8128 - }, - { - "epoch": 0.5312724658519051, - "grad_norm": 0.4739995300769806, - "learning_rate": 9.37941882684935e-06, - "loss": 0.42, - "step": 8129 - }, - { - "epoch": 0.5313378210574472, - "grad_norm": 0.437948077917099, - "learning_rate": 9.37925032425509e-06, - "loss": 0.38, - "step": 8130 - }, - { - "epoch": 0.5314031762629894, - "grad_norm": 0.45688772201538086, - "learning_rate": 9.379081800301709e-06, - "loss": 0.3973, - "step": 8131 - }, - { - "epoch": 0.5314685314685315, - "grad_norm": 0.45127663016319275, - "learning_rate": 9.37891325499003e-06, - "loss": 0.3685, - "step": 8132 - }, - { - "epoch": 0.5315338866740736, - "grad_norm": 0.4748782515525818, - "learning_rate": 9.378744688320871e-06, - "loss": 0.424, - "step": 8133 - }, - { - "epoch": 0.5315992418796157, - "grad_norm": 0.43217262625694275, - "learning_rate": 9.37857610029506e-06, - "loss": 0.3562, - "step": 8134 - }, - { - "epoch": 0.5316645970851578, - "grad_norm": 0.4683869481086731, - "learning_rate": 9.378407490913417e-06, - "loss": 0.4006, - "step": 8135 - }, - { - "epoch": 0.5317299522907, - "grad_norm": 0.4753198027610779, - "learning_rate": 9.378238860176762e-06, - "loss": 0.3981, - "step": 8136 - }, - { - "epoch": 0.531795307496242, - "grad_norm": 0.44580692052841187, - "learning_rate": 9.378070208085921e-06, - "loss": 0.368, - "step": 8137 - }, - { - "epoch": 0.5318606627017842, - "grad_norm": 0.42868709564208984, - "learning_rate": 9.377901534641714e-06, - "loss": 0.3439, - "step": 8138 - }, - { - "epoch": 0.5319260179073263, - "grad_norm": 0.5087711811065674, - "learning_rate": 9.377732839844966e-06, - "loss": 0.4215, - "step": 8139 - }, - { - "epoch": 0.5319913731128685, - "grad_norm": 0.46497300267219543, - "learning_rate": 9.377564123696497e-06, - "loss": 0.3777, - "step": 8140 - }, - { - "epoch": 0.5320567283184106, - "grad_norm": 0.4794805347919464, - "learning_rate": 9.37739538619713e-06, - "loss": 0.3762, - "step": 8141 - }, - { - "epoch": 0.5321220835239527, - "grad_norm": 0.5074891448020935, - "learning_rate": 9.377226627347692e-06, - "loss": 0.4516, - "step": 8142 - }, - { - "epoch": 0.5321874387294948, - "grad_norm": 0.41453054547309875, - "learning_rate": 9.377057847149002e-06, - "loss": 0.3397, - "step": 8143 - }, - { - "epoch": 0.5322527939350369, - "grad_norm": 0.4522910714149475, - "learning_rate": 9.376889045601885e-06, - "loss": 0.3591, - "step": 8144 - }, - { - "epoch": 0.5323181491405791, - "grad_norm": 0.46257084608078003, - "learning_rate": 9.376720222707163e-06, - "loss": 0.338, - "step": 8145 - }, - { - "epoch": 0.5323835043461211, - "grad_norm": 0.46426162123680115, - "learning_rate": 9.37655137846566e-06, - "loss": 0.4139, - "step": 8146 - }, - { - "epoch": 0.5324488595516633, - "grad_norm": 0.449640691280365, - "learning_rate": 9.3763825128782e-06, - "loss": 0.3962, - "step": 8147 - }, - { - "epoch": 0.5325142147572054, - "grad_norm": 0.44668322801589966, - "learning_rate": 9.376213625945607e-06, - "loss": 0.4036, - "step": 8148 - }, - { - "epoch": 0.5325795699627476, - "grad_norm": 0.48974403738975525, - "learning_rate": 9.376044717668704e-06, - "loss": 0.3797, - "step": 8149 - }, - { - "epoch": 0.5326449251682897, - "grad_norm": 0.4639590084552765, - "learning_rate": 9.375875788048315e-06, - "loss": 0.351, - "step": 8150 - }, - { - "epoch": 0.5327102803738317, - "grad_norm": 0.4961114823818207, - "learning_rate": 9.375706837085262e-06, - "loss": 0.4348, - "step": 8151 - }, - { - "epoch": 0.5327756355793739, - "grad_norm": 0.4123740494251251, - "learning_rate": 9.375537864780373e-06, - "loss": 0.3225, - "step": 8152 - }, - { - "epoch": 0.532840990784916, - "grad_norm": 0.4464639723300934, - "learning_rate": 9.37536887113447e-06, - "loss": 0.3851, - "step": 8153 - }, - { - "epoch": 0.5329063459904582, - "grad_norm": 0.4487189054489136, - "learning_rate": 9.375199856148376e-06, - "loss": 0.3333, - "step": 8154 - }, - { - "epoch": 0.5329717011960002, - "grad_norm": 0.4613710641860962, - "learning_rate": 9.375030819822916e-06, - "loss": 0.3953, - "step": 8155 - }, - { - "epoch": 0.5330370564015424, - "grad_norm": 0.4397062659263611, - "learning_rate": 9.374861762158917e-06, - "loss": 0.3552, - "step": 8156 - }, - { - "epoch": 0.5331024116070845, - "grad_norm": 0.47692734003067017, - "learning_rate": 9.3746926831572e-06, - "loss": 0.3797, - "step": 8157 - }, - { - "epoch": 0.5331677668126267, - "grad_norm": 0.535108745098114, - "learning_rate": 9.374523582818591e-06, - "loss": 0.4581, - "step": 8158 - }, - { - "epoch": 0.5332331220181687, - "grad_norm": 0.4900972247123718, - "learning_rate": 9.374354461143917e-06, - "loss": 0.4235, - "step": 8159 - }, - { - "epoch": 0.5332984772237108, - "grad_norm": 0.4511905908584595, - "learning_rate": 9.374185318134e-06, - "loss": 0.3701, - "step": 8160 - }, - { - "epoch": 0.533363832429253, - "grad_norm": 0.4689152240753174, - "learning_rate": 9.374016153789666e-06, - "loss": 0.339, - "step": 8161 - }, - { - "epoch": 0.5334291876347951, - "grad_norm": 0.46662044525146484, - "learning_rate": 9.373846968111739e-06, - "loss": 0.3868, - "step": 8162 - }, - { - "epoch": 0.5334945428403373, - "grad_norm": 0.4650476574897766, - "learning_rate": 9.373677761101045e-06, - "loss": 0.4015, - "step": 8163 - }, - { - "epoch": 0.5335598980458793, - "grad_norm": 0.45298993587493896, - "learning_rate": 9.37350853275841e-06, - "loss": 0.3488, - "step": 8164 - }, - { - "epoch": 0.5336252532514215, - "grad_norm": 0.4997517764568329, - "learning_rate": 9.37333928308466e-06, - "loss": 0.4216, - "step": 8165 - }, - { - "epoch": 0.5336906084569636, - "grad_norm": 0.4577353894710541, - "learning_rate": 9.373170012080618e-06, - "loss": 0.3896, - "step": 8166 - }, - { - "epoch": 0.5337559636625058, - "grad_norm": 0.45616233348846436, - "learning_rate": 9.37300071974711e-06, - "loss": 0.3477, - "step": 8167 - }, - { - "epoch": 0.5338213188680478, - "grad_norm": 0.5024468898773193, - "learning_rate": 9.372831406084965e-06, - "loss": 0.4235, - "step": 8168 - }, - { - "epoch": 0.5338866740735899, - "grad_norm": 0.4371213912963867, - "learning_rate": 9.372662071095004e-06, - "loss": 0.3692, - "step": 8169 - }, - { - "epoch": 0.5339520292791321, - "grad_norm": 0.4440755248069763, - "learning_rate": 9.372492714778057e-06, - "loss": 0.3826, - "step": 8170 - }, - { - "epoch": 0.5340173844846742, - "grad_norm": 0.47860926389694214, - "learning_rate": 9.372323337134947e-06, - "loss": 0.3847, - "step": 8171 - }, - { - "epoch": 0.5340827396902164, - "grad_norm": 0.4336561858654022, - "learning_rate": 9.372153938166502e-06, - "loss": 0.3769, - "step": 8172 - }, - { - "epoch": 0.5341480948957584, - "grad_norm": 0.4629087448120117, - "learning_rate": 9.371984517873547e-06, - "loss": 0.3691, - "step": 8173 - }, - { - "epoch": 0.5342134501013006, - "grad_norm": 0.4819185435771942, - "learning_rate": 9.37181507625691e-06, - "loss": 0.3814, - "step": 8174 - }, - { - "epoch": 0.5342788053068427, - "grad_norm": 0.47867029905319214, - "learning_rate": 9.371645613317414e-06, - "loss": 0.4223, - "step": 8175 - }, - { - "epoch": 0.5343441605123848, - "grad_norm": 0.47211819887161255, - "learning_rate": 9.371476129055889e-06, - "loss": 0.3672, - "step": 8176 - }, - { - "epoch": 0.5344095157179269, - "grad_norm": 0.5237996578216553, - "learning_rate": 9.37130662347316e-06, - "loss": 0.4479, - "step": 8177 - }, - { - "epoch": 0.534474870923469, - "grad_norm": 0.453158438205719, - "learning_rate": 9.371137096570056e-06, - "loss": 0.3836, - "step": 8178 - }, - { - "epoch": 0.5345402261290112, - "grad_norm": 0.4489046335220337, - "learning_rate": 9.3709675483474e-06, - "loss": 0.3997, - "step": 8179 - }, - { - "epoch": 0.5346055813345533, - "grad_norm": 0.451736718416214, - "learning_rate": 9.370797978806024e-06, - "loss": 0.3501, - "step": 8180 - }, - { - "epoch": 0.5346709365400955, - "grad_norm": 0.4901093542575836, - "learning_rate": 9.37062838794675e-06, - "loss": 0.3646, - "step": 8181 - }, - { - "epoch": 0.5347362917456375, - "grad_norm": 0.4496528208255768, - "learning_rate": 9.370458775770406e-06, - "loss": 0.3418, - "step": 8182 - }, - { - "epoch": 0.5348016469511797, - "grad_norm": 0.47359248995780945, - "learning_rate": 9.370289142277822e-06, - "loss": 0.3506, - "step": 8183 - }, - { - "epoch": 0.5348670021567218, - "grad_norm": 0.4595816135406494, - "learning_rate": 9.370119487469825e-06, - "loss": 0.3715, - "step": 8184 - }, - { - "epoch": 0.5349323573622639, - "grad_norm": 0.4757009744644165, - "learning_rate": 9.36994981134724e-06, - "loss": 0.4078, - "step": 8185 - }, - { - "epoch": 0.534997712567806, - "grad_norm": 0.49687087535858154, - "learning_rate": 9.369780113910897e-06, - "loss": 0.4572, - "step": 8186 - }, - { - "epoch": 0.5350630677733481, - "grad_norm": 0.4593808054924011, - "learning_rate": 9.36961039516162e-06, - "loss": 0.3805, - "step": 8187 - }, - { - "epoch": 0.5351284229788903, - "grad_norm": 0.5112813711166382, - "learning_rate": 9.369440655100241e-06, - "loss": 0.4195, - "step": 8188 - }, - { - "epoch": 0.5351937781844324, - "grad_norm": 0.4520493745803833, - "learning_rate": 9.369270893727586e-06, - "loss": 0.386, - "step": 8189 - }, - { - "epoch": 0.5352591333899746, - "grad_norm": 0.48702430725097656, - "learning_rate": 9.369101111044484e-06, - "loss": 0.4211, - "step": 8190 - }, - { - "epoch": 0.5353244885955166, - "grad_norm": 0.45532673597335815, - "learning_rate": 9.36893130705176e-06, - "loss": 0.3717, - "step": 8191 - }, - { - "epoch": 0.5353898438010588, - "grad_norm": 0.44316256046295166, - "learning_rate": 9.368761481750245e-06, - "loss": 0.3524, - "step": 8192 - }, - { - "epoch": 0.5354551990066009, - "grad_norm": 0.4726574420928955, - "learning_rate": 9.36859163514077e-06, - "loss": 0.4144, - "step": 8193 - }, - { - "epoch": 0.535520554212143, - "grad_norm": 0.44861721992492676, - "learning_rate": 9.368421767224157e-06, - "loss": 0.3529, - "step": 8194 - }, - { - "epoch": 0.5355859094176851, - "grad_norm": 0.44170206785202026, - "learning_rate": 9.368251878001238e-06, - "loss": 0.362, - "step": 8195 - }, - { - "epoch": 0.5356512646232272, - "grad_norm": 0.42073050141334534, - "learning_rate": 9.368081967472842e-06, - "loss": 0.3255, - "step": 8196 - }, - { - "epoch": 0.5357166198287694, - "grad_norm": 0.6058770418167114, - "learning_rate": 9.367912035639797e-06, - "loss": 0.541, - "step": 8197 - }, - { - "epoch": 0.5357819750343115, - "grad_norm": 0.451310932636261, - "learning_rate": 9.367742082502932e-06, - "loss": 0.3696, - "step": 8198 - }, - { - "epoch": 0.5358473302398536, - "grad_norm": 0.5183236598968506, - "learning_rate": 9.367572108063076e-06, - "loss": 0.4422, - "step": 8199 - }, - { - "epoch": 0.5359126854453957, - "grad_norm": 0.45446789264678955, - "learning_rate": 9.367402112321056e-06, - "loss": 0.3738, - "step": 8200 - }, - { - "epoch": 0.5359780406509378, - "grad_norm": 0.45923709869384766, - "learning_rate": 9.367232095277705e-06, - "loss": 0.4003, - "step": 8201 - }, - { - "epoch": 0.53604339585648, - "grad_norm": 0.4609838128089905, - "learning_rate": 9.36706205693385e-06, - "loss": 0.3745, - "step": 8202 - }, - { - "epoch": 0.536108751062022, - "grad_norm": 0.506086528301239, - "learning_rate": 9.366891997290318e-06, - "loss": 0.4049, - "step": 8203 - }, - { - "epoch": 0.5361741062675642, - "grad_norm": 0.4835215210914612, - "learning_rate": 9.366721916347942e-06, - "loss": 0.4267, - "step": 8204 - }, - { - "epoch": 0.5362394614731063, - "grad_norm": 0.526479184627533, - "learning_rate": 9.366551814107552e-06, - "loss": 0.419, - "step": 8205 - }, - { - "epoch": 0.5363048166786485, - "grad_norm": 0.5380675792694092, - "learning_rate": 9.366381690569974e-06, - "loss": 0.4852, - "step": 8206 - }, - { - "epoch": 0.5363701718841906, - "grad_norm": 0.48616766929626465, - "learning_rate": 9.366211545736042e-06, - "loss": 0.4343, - "step": 8207 - }, - { - "epoch": 0.5364355270897327, - "grad_norm": 0.41571882367134094, - "learning_rate": 9.366041379606582e-06, - "loss": 0.3588, - "step": 8208 - }, - { - "epoch": 0.5365008822952748, - "grad_norm": 0.5063007473945618, - "learning_rate": 9.365871192182428e-06, - "loss": 0.4522, - "step": 8209 - }, - { - "epoch": 0.5365662375008169, - "grad_norm": 0.4544987380504608, - "learning_rate": 9.365700983464406e-06, - "loss": 0.3496, - "step": 8210 - }, - { - "epoch": 0.5366315927063591, - "grad_norm": 0.4544180929660797, - "learning_rate": 9.365530753453346e-06, - "loss": 0.351, - "step": 8211 - }, - { - "epoch": 0.5366969479119011, - "grad_norm": 0.4394286572933197, - "learning_rate": 9.365360502150083e-06, - "loss": 0.3874, - "step": 8212 - }, - { - "epoch": 0.5367623031174433, - "grad_norm": 0.47277411818504333, - "learning_rate": 9.365190229555443e-06, - "loss": 0.3853, - "step": 8213 - }, - { - "epoch": 0.5368276583229854, - "grad_norm": 0.4341241419315338, - "learning_rate": 9.365019935670259e-06, - "loss": 0.3141, - "step": 8214 - }, - { - "epoch": 0.5368930135285276, - "grad_norm": 0.4787592589855194, - "learning_rate": 9.36484962049536e-06, - "loss": 0.3888, - "step": 8215 - }, - { - "epoch": 0.5369583687340697, - "grad_norm": 0.48868072032928467, - "learning_rate": 9.364679284031577e-06, - "loss": 0.4195, - "step": 8216 - }, - { - "epoch": 0.5370237239396118, - "grad_norm": 0.4858606457710266, - "learning_rate": 9.36450892627974e-06, - "loss": 0.4051, - "step": 8217 - }, - { - "epoch": 0.5370890791451539, - "grad_norm": 0.46490201354026794, - "learning_rate": 9.364338547240684e-06, - "loss": 0.4149, - "step": 8218 - }, - { - "epoch": 0.537154434350696, - "grad_norm": 0.49066856503486633, - "learning_rate": 9.364168146915234e-06, - "loss": 0.4001, - "step": 8219 - }, - { - "epoch": 0.5372197895562382, - "grad_norm": 0.48547351360321045, - "learning_rate": 9.363997725304225e-06, - "loss": 0.422, - "step": 8220 - }, - { - "epoch": 0.5372851447617802, - "grad_norm": 0.4581473767757416, - "learning_rate": 9.363827282408488e-06, - "loss": 0.361, - "step": 8221 - }, - { - "epoch": 0.5373504999673224, - "grad_norm": 0.44778013229370117, - "learning_rate": 9.363656818228853e-06, - "loss": 0.3722, - "step": 8222 - }, - { - "epoch": 0.5374158551728645, - "grad_norm": 0.4640384316444397, - "learning_rate": 9.363486332766153e-06, - "loss": 0.392, - "step": 8223 - }, - { - "epoch": 0.5374812103784067, - "grad_norm": 0.4378077983856201, - "learning_rate": 9.363315826021218e-06, - "loss": 0.3851, - "step": 8224 - }, - { - "epoch": 0.5375465655839488, - "grad_norm": 0.47820448875427246, - "learning_rate": 9.36314529799488e-06, - "loss": 0.4294, - "step": 8225 - }, - { - "epoch": 0.5376119207894909, - "grad_norm": 0.5370205044746399, - "learning_rate": 9.362974748687969e-06, - "loss": 0.4164, - "step": 8226 - }, - { - "epoch": 0.537677275995033, - "grad_norm": 0.47790366411209106, - "learning_rate": 9.36280417810132e-06, - "loss": 0.4198, - "step": 8227 - }, - { - "epoch": 0.5377426312005751, - "grad_norm": 0.49692991375923157, - "learning_rate": 9.362633586235766e-06, - "loss": 0.4289, - "step": 8228 - }, - { - "epoch": 0.5378079864061173, - "grad_norm": 0.4387872815132141, - "learning_rate": 9.362462973092134e-06, - "loss": 0.3348, - "step": 8229 - }, - { - "epoch": 0.5378733416116593, - "grad_norm": 0.47035783529281616, - "learning_rate": 9.36229233867126e-06, - "loss": 0.3845, - "step": 8230 - }, - { - "epoch": 0.5379386968172015, - "grad_norm": 0.46790122985839844, - "learning_rate": 9.362121682973972e-06, - "loss": 0.4011, - "step": 8231 - }, - { - "epoch": 0.5380040520227436, - "grad_norm": 0.4822106659412384, - "learning_rate": 9.361951006001109e-06, - "loss": 0.4041, - "step": 8232 - }, - { - "epoch": 0.5380694072282858, - "grad_norm": 0.5083541870117188, - "learning_rate": 9.361780307753498e-06, - "loss": 0.4704, - "step": 8233 - }, - { - "epoch": 0.5381347624338279, - "grad_norm": 0.48938223719596863, - "learning_rate": 9.361609588231974e-06, - "loss": 0.416, - "step": 8234 - }, - { - "epoch": 0.5382001176393699, - "grad_norm": 0.45437952876091003, - "learning_rate": 9.36143884743737e-06, - "loss": 0.3507, - "step": 8235 - }, - { - "epoch": 0.5382654728449121, - "grad_norm": 0.4548284411430359, - "learning_rate": 9.361268085370517e-06, - "loss": 0.3796, - "step": 8236 - }, - { - "epoch": 0.5383308280504542, - "grad_norm": 0.4518842399120331, - "learning_rate": 9.361097302032246e-06, - "loss": 0.3668, - "step": 8237 - }, - { - "epoch": 0.5383961832559964, - "grad_norm": 0.44761666655540466, - "learning_rate": 9.360926497423397e-06, - "loss": 0.3683, - "step": 8238 - }, - { - "epoch": 0.5384615384615384, - "grad_norm": 0.4721292555332184, - "learning_rate": 9.360755671544797e-06, - "loss": 0.3791, - "step": 8239 - }, - { - "epoch": 0.5385268936670806, - "grad_norm": 0.46845686435699463, - "learning_rate": 9.36058482439728e-06, - "loss": 0.3935, - "step": 8240 - }, - { - "epoch": 0.5385922488726227, - "grad_norm": 0.44099998474121094, - "learning_rate": 9.360413955981679e-06, - "loss": 0.3935, - "step": 8241 - }, - { - "epoch": 0.5386576040781649, - "grad_norm": 0.4769476056098938, - "learning_rate": 9.36024306629883e-06, - "loss": 0.4344, - "step": 8242 - }, - { - "epoch": 0.538722959283707, - "grad_norm": 0.45490676164627075, - "learning_rate": 9.360072155349567e-06, - "loss": 0.4212, - "step": 8243 - }, - { - "epoch": 0.538788314489249, - "grad_norm": 0.4358070492744446, - "learning_rate": 9.35990122313472e-06, - "loss": 0.3922, - "step": 8244 - }, - { - "epoch": 0.5388536696947912, - "grad_norm": 0.4774872064590454, - "learning_rate": 9.359730269655124e-06, - "loss": 0.416, - "step": 8245 - }, - { - "epoch": 0.5389190249003333, - "grad_norm": 0.4705999791622162, - "learning_rate": 9.359559294911613e-06, - "loss": 0.4095, - "step": 8246 - }, - { - "epoch": 0.5389843801058755, - "grad_norm": 0.4116984009742737, - "learning_rate": 9.359388298905023e-06, - "loss": 0.3215, - "step": 8247 - }, - { - "epoch": 0.5390497353114175, - "grad_norm": 0.4325437843799591, - "learning_rate": 9.359217281636183e-06, - "loss": 0.3766, - "step": 8248 - }, - { - "epoch": 0.5391150905169597, - "grad_norm": 0.4551779329776764, - "learning_rate": 9.359046243105932e-06, - "loss": 0.4135, - "step": 8249 - }, - { - "epoch": 0.5391804457225018, - "grad_norm": 0.4630483090877533, - "learning_rate": 9.358875183315102e-06, - "loss": 0.4103, - "step": 8250 - }, - { - "epoch": 0.539245800928044, - "grad_norm": 0.46130189299583435, - "learning_rate": 9.358704102264527e-06, - "loss": 0.3982, - "step": 8251 - }, - { - "epoch": 0.539311156133586, - "grad_norm": 0.4228488802909851, - "learning_rate": 9.358532999955043e-06, - "loss": 0.3311, - "step": 8252 - }, - { - "epoch": 0.5393765113391281, - "grad_norm": 0.4424111247062683, - "learning_rate": 9.358361876387482e-06, - "loss": 0.3562, - "step": 8253 - }, - { - "epoch": 0.5394418665446703, - "grad_norm": 0.48746955394744873, - "learning_rate": 9.358190731562682e-06, - "loss": 0.4175, - "step": 8254 - }, - { - "epoch": 0.5395072217502124, - "grad_norm": 0.43694406747817993, - "learning_rate": 9.358019565481477e-06, - "loss": 0.3566, - "step": 8255 - }, - { - "epoch": 0.5395725769557546, - "grad_norm": 0.44341006875038147, - "learning_rate": 9.357848378144698e-06, - "loss": 0.3726, - "step": 8256 - }, - { - "epoch": 0.5396379321612966, - "grad_norm": 0.47627320885658264, - "learning_rate": 9.357677169553186e-06, - "loss": 0.4182, - "step": 8257 - }, - { - "epoch": 0.5397032873668388, - "grad_norm": 0.4702316224575043, - "learning_rate": 9.357505939707769e-06, - "loss": 0.4434, - "step": 8258 - }, - { - "epoch": 0.5397686425723809, - "grad_norm": 0.45931556820869446, - "learning_rate": 9.35733468860929e-06, - "loss": 0.3788, - "step": 8259 - }, - { - "epoch": 0.539833997777923, - "grad_norm": 0.44867175817489624, - "learning_rate": 9.357163416258577e-06, - "loss": 0.382, - "step": 8260 - }, - { - "epoch": 0.5398993529834651, - "grad_norm": 0.4833263158798218, - "learning_rate": 9.35699212265647e-06, - "loss": 0.4326, - "step": 8261 - }, - { - "epoch": 0.5399647081890072, - "grad_norm": 0.45278486609458923, - "learning_rate": 9.356820807803802e-06, - "loss": 0.3846, - "step": 8262 - }, - { - "epoch": 0.5400300633945494, - "grad_norm": 0.5050371289253235, - "learning_rate": 9.35664947170141e-06, - "loss": 0.4079, - "step": 8263 - }, - { - "epoch": 0.5400954186000915, - "grad_norm": 0.4680522382259369, - "learning_rate": 9.35647811435013e-06, - "loss": 0.4307, - "step": 8264 - }, - { - "epoch": 0.5401607738056337, - "grad_norm": 0.4409938156604767, - "learning_rate": 9.356306735750796e-06, - "loss": 0.3881, - "step": 8265 - }, - { - "epoch": 0.5402261290111757, - "grad_norm": 0.450053870677948, - "learning_rate": 9.356135335904247e-06, - "loss": 0.4038, - "step": 8266 - }, - { - "epoch": 0.5402914842167179, - "grad_norm": 0.46919873356819153, - "learning_rate": 9.355963914811313e-06, - "loss": 0.419, - "step": 8267 - }, - { - "epoch": 0.54035683942226, - "grad_norm": 0.4970439672470093, - "learning_rate": 9.355792472472836e-06, - "loss": 0.436, - "step": 8268 - }, - { - "epoch": 0.540422194627802, - "grad_norm": 0.468803346157074, - "learning_rate": 9.355621008889651e-06, - "loss": 0.3586, - "step": 8269 - }, - { - "epoch": 0.5404875498333442, - "grad_norm": 0.5252040028572083, - "learning_rate": 9.355449524062592e-06, - "loss": 0.4647, - "step": 8270 - }, - { - "epoch": 0.5405529050388863, - "grad_norm": 0.46350598335266113, - "learning_rate": 9.355278017992498e-06, - "loss": 0.4013, - "step": 8271 - }, - { - "epoch": 0.5406182602444285, - "grad_norm": 0.44724878668785095, - "learning_rate": 9.355106490680204e-06, - "loss": 0.3655, - "step": 8272 - }, - { - "epoch": 0.5406836154499706, - "grad_norm": 0.43497711420059204, - "learning_rate": 9.354934942126545e-06, - "loss": 0.3421, - "step": 8273 - }, - { - "epoch": 0.5407489706555128, - "grad_norm": 0.46079549193382263, - "learning_rate": 9.354763372332362e-06, - "loss": 0.3899, - "step": 8274 - }, - { - "epoch": 0.5408143258610548, - "grad_norm": 0.4535283148288727, - "learning_rate": 9.35459178129849e-06, - "loss": 0.4033, - "step": 8275 - }, - { - "epoch": 0.540879681066597, - "grad_norm": 0.450179785490036, - "learning_rate": 9.354420169025763e-06, - "loss": 0.4097, - "step": 8276 - }, - { - "epoch": 0.5409450362721391, - "grad_norm": 0.4596855640411377, - "learning_rate": 9.354248535515021e-06, - "loss": 0.4309, - "step": 8277 - }, - { - "epoch": 0.5410103914776812, - "grad_norm": 0.41704466938972473, - "learning_rate": 9.354076880767102e-06, - "loss": 0.3716, - "step": 8278 - }, - { - "epoch": 0.5410757466832233, - "grad_norm": 0.44406241178512573, - "learning_rate": 9.35390520478284e-06, - "loss": 0.4148, - "step": 8279 - }, - { - "epoch": 0.5411411018887654, - "grad_norm": 0.461429238319397, - "learning_rate": 9.353733507563074e-06, - "loss": 0.4248, - "step": 8280 - }, - { - "epoch": 0.5412064570943076, - "grad_norm": 0.4740643799304962, - "learning_rate": 9.353561789108641e-06, - "loss": 0.3924, - "step": 8281 - }, - { - "epoch": 0.5412718122998497, - "grad_norm": 0.4778648018836975, - "learning_rate": 9.35339004942038e-06, - "loss": 0.4251, - "step": 8282 - }, - { - "epoch": 0.5413371675053918, - "grad_norm": 0.45684945583343506, - "learning_rate": 9.353218288499127e-06, - "loss": 0.3904, - "step": 8283 - }, - { - "epoch": 0.5414025227109339, - "grad_norm": 0.4378284811973572, - "learning_rate": 9.35304650634572e-06, - "loss": 0.3769, - "step": 8284 - }, - { - "epoch": 0.541467877916476, - "grad_norm": 0.4128952920436859, - "learning_rate": 9.352874702960998e-06, - "loss": 0.3363, - "step": 8285 - }, - { - "epoch": 0.5415332331220182, - "grad_norm": 0.4351802170276642, - "learning_rate": 9.352702878345799e-06, - "loss": 0.384, - "step": 8286 - }, - { - "epoch": 0.5415985883275602, - "grad_norm": 0.4509058892726898, - "learning_rate": 9.352531032500958e-06, - "loss": 0.3828, - "step": 8287 - }, - { - "epoch": 0.5416639435331024, - "grad_norm": 0.44013240933418274, - "learning_rate": 9.352359165427317e-06, - "loss": 0.3556, - "step": 8288 - }, - { - "epoch": 0.5417292987386445, - "grad_norm": 0.4528917074203491, - "learning_rate": 9.35218727712571e-06, - "loss": 0.3944, - "step": 8289 - }, - { - "epoch": 0.5417946539441867, - "grad_norm": 0.43234896659851074, - "learning_rate": 9.35201536759698e-06, - "loss": 0.3683, - "step": 8290 - }, - { - "epoch": 0.5418600091497288, - "grad_norm": 0.5005369186401367, - "learning_rate": 9.351843436841964e-06, - "loss": 0.4255, - "step": 8291 - }, - { - "epoch": 0.5419253643552709, - "grad_norm": 0.45198288559913635, - "learning_rate": 9.3516714848615e-06, - "loss": 0.3764, - "step": 8292 - }, - { - "epoch": 0.541990719560813, - "grad_norm": 0.45872727036476135, - "learning_rate": 9.351499511656424e-06, - "loss": 0.3912, - "step": 8293 - }, - { - "epoch": 0.5420560747663551, - "grad_norm": 0.48523542284965515, - "learning_rate": 9.35132751722758e-06, - "loss": 0.4227, - "step": 8294 - }, - { - "epoch": 0.5421214299718973, - "grad_norm": 0.4680887460708618, - "learning_rate": 9.351155501575803e-06, - "loss": 0.4333, - "step": 8295 - }, - { - "epoch": 0.5421867851774393, - "grad_norm": 0.45950746536254883, - "learning_rate": 9.350983464701932e-06, - "loss": 0.3704, - "step": 8296 - }, - { - "epoch": 0.5422521403829815, - "grad_norm": 0.4689929187297821, - "learning_rate": 9.35081140660681e-06, - "loss": 0.4157, - "step": 8297 - }, - { - "epoch": 0.5423174955885236, - "grad_norm": 0.4797610640525818, - "learning_rate": 9.35063932729127e-06, - "loss": 0.4177, - "step": 8298 - }, - { - "epoch": 0.5423828507940658, - "grad_norm": 0.4746837317943573, - "learning_rate": 9.350467226756159e-06, - "loss": 0.4244, - "step": 8299 - }, - { - "epoch": 0.5424482059996079, - "grad_norm": 0.4472707211971283, - "learning_rate": 9.350295105002311e-06, - "loss": 0.3941, - "step": 8300 - }, - { - "epoch": 0.54251356120515, - "grad_norm": 0.41988304257392883, - "learning_rate": 9.350122962030566e-06, - "loss": 0.3321, - "step": 8301 - }, - { - "epoch": 0.5425789164106921, - "grad_norm": 0.4819179177284241, - "learning_rate": 9.349950797841763e-06, - "loss": 0.4132, - "step": 8302 - }, - { - "epoch": 0.5426442716162342, - "grad_norm": 0.41595616936683655, - "learning_rate": 9.349778612436743e-06, - "loss": 0.3283, - "step": 8303 - }, - { - "epoch": 0.5427096268217764, - "grad_norm": 0.4350241720676422, - "learning_rate": 9.349606405816349e-06, - "loss": 0.3859, - "step": 8304 - }, - { - "epoch": 0.5427749820273184, - "grad_norm": 0.448354035615921, - "learning_rate": 9.349434177981416e-06, - "loss": 0.3946, - "step": 8305 - }, - { - "epoch": 0.5428403372328606, - "grad_norm": 0.4604875147342682, - "learning_rate": 9.349261928932785e-06, - "loss": 0.3571, - "step": 8306 - }, - { - "epoch": 0.5429056924384027, - "grad_norm": 0.47523099184036255, - "learning_rate": 9.349089658671297e-06, - "loss": 0.3528, - "step": 8307 - }, - { - "epoch": 0.5429710476439449, - "grad_norm": 0.43087831139564514, - "learning_rate": 9.348917367197791e-06, - "loss": 0.3273, - "step": 8308 - }, - { - "epoch": 0.543036402849487, - "grad_norm": 0.48309123516082764, - "learning_rate": 9.348745054513112e-06, - "loss": 0.407, - "step": 8309 - }, - { - "epoch": 0.5431017580550291, - "grad_norm": 0.432910293340683, - "learning_rate": 9.348572720618095e-06, - "loss": 0.354, - "step": 8310 - }, - { - "epoch": 0.5431671132605712, - "grad_norm": 0.4773752689361572, - "learning_rate": 9.348400365513582e-06, - "loss": 0.3865, - "step": 8311 - }, - { - "epoch": 0.5432324684661133, - "grad_norm": 0.48800089955329895, - "learning_rate": 9.348227989200413e-06, - "loss": 0.4368, - "step": 8312 - }, - { - "epoch": 0.5432978236716555, - "grad_norm": 0.45648813247680664, - "learning_rate": 9.34805559167943e-06, - "loss": 0.3918, - "step": 8313 - }, - { - "epoch": 0.5433631788771975, - "grad_norm": 0.4994303286075592, - "learning_rate": 9.347883172951474e-06, - "loss": 0.4546, - "step": 8314 - }, - { - "epoch": 0.5434285340827397, - "grad_norm": 0.46923038363456726, - "learning_rate": 9.347710733017386e-06, - "loss": 0.374, - "step": 8315 - }, - { - "epoch": 0.5434938892882818, - "grad_norm": 0.4745754301548004, - "learning_rate": 9.347538271878007e-06, - "loss": 0.3916, - "step": 8316 - }, - { - "epoch": 0.543559244493824, - "grad_norm": 0.43778276443481445, - "learning_rate": 9.347365789534176e-06, - "loss": 0.3438, - "step": 8317 - }, - { - "epoch": 0.543624599699366, - "grad_norm": 0.473345011472702, - "learning_rate": 9.347193285986738e-06, - "loss": 0.434, - "step": 8318 - }, - { - "epoch": 0.5436899549049081, - "grad_norm": 0.41520264744758606, - "learning_rate": 9.347020761236531e-06, - "loss": 0.3155, - "step": 8319 - }, - { - "epoch": 0.5437553101104503, - "grad_norm": 0.4335134029388428, - "learning_rate": 9.346848215284397e-06, - "loss": 0.3402, - "step": 8320 - }, - { - "epoch": 0.5438206653159924, - "grad_norm": 0.46292465925216675, - "learning_rate": 9.346675648131181e-06, - "loss": 0.3918, - "step": 8321 - }, - { - "epoch": 0.5438860205215346, - "grad_norm": 0.46981081366539, - "learning_rate": 9.34650305977772e-06, - "loss": 0.3766, - "step": 8322 - }, - { - "epoch": 0.5439513757270766, - "grad_norm": 0.47351184487342834, - "learning_rate": 9.346330450224858e-06, - "loss": 0.4171, - "step": 8323 - }, - { - "epoch": 0.5440167309326188, - "grad_norm": 0.44288018345832825, - "learning_rate": 9.346157819473437e-06, - "loss": 0.4062, - "step": 8324 - }, - { - "epoch": 0.5440820861381609, - "grad_norm": 0.5060564875602722, - "learning_rate": 9.345985167524298e-06, - "loss": 0.4093, - "step": 8325 - }, - { - "epoch": 0.5441474413437031, - "grad_norm": 0.492012619972229, - "learning_rate": 9.345812494378285e-06, - "loss": 0.3613, - "step": 8326 - }, - { - "epoch": 0.5442127965492451, - "grad_norm": 0.45168179273605347, - "learning_rate": 9.345639800036238e-06, - "loss": 0.3714, - "step": 8327 - }, - { - "epoch": 0.5442781517547872, - "grad_norm": 0.48553574085235596, - "learning_rate": 9.345467084499e-06, - "loss": 0.4131, - "step": 8328 - }, - { - "epoch": 0.5443435069603294, - "grad_norm": 0.4387553334236145, - "learning_rate": 9.345294347767415e-06, - "loss": 0.3659, - "step": 8329 - }, - { - "epoch": 0.5444088621658715, - "grad_norm": 0.419382244348526, - "learning_rate": 9.345121589842323e-06, - "loss": 0.3591, - "step": 8330 - }, - { - "epoch": 0.5444742173714137, - "grad_norm": 0.47618135809898376, - "learning_rate": 9.344948810724567e-06, - "loss": 0.3983, - "step": 8331 - }, - { - "epoch": 0.5445395725769557, - "grad_norm": 0.48393312096595764, - "learning_rate": 9.344776010414994e-06, - "loss": 0.4282, - "step": 8332 - }, - { - "epoch": 0.5446049277824979, - "grad_norm": 0.4687272906303406, - "learning_rate": 9.344603188914438e-06, - "loss": 0.4076, - "step": 8333 - }, - { - "epoch": 0.54467028298804, - "grad_norm": 0.4399765729904175, - "learning_rate": 9.34443034622375e-06, - "loss": 0.3748, - "step": 8334 - }, - { - "epoch": 0.5447356381935822, - "grad_norm": 0.4569462239742279, - "learning_rate": 9.344257482343771e-06, - "loss": 0.4102, - "step": 8335 - }, - { - "epoch": 0.5448009933991242, - "grad_norm": 0.4702807664871216, - "learning_rate": 9.34408459727534e-06, - "loss": 0.4229, - "step": 8336 - }, - { - "epoch": 0.5448663486046663, - "grad_norm": 0.4702181816101074, - "learning_rate": 9.343911691019308e-06, - "loss": 0.406, - "step": 8337 - }, - { - "epoch": 0.5449317038102085, - "grad_norm": 0.44575101137161255, - "learning_rate": 9.343738763576511e-06, - "loss": 0.4027, - "step": 8338 - }, - { - "epoch": 0.5449970590157506, - "grad_norm": 0.469691663980484, - "learning_rate": 9.343565814947796e-06, - "loss": 0.388, - "step": 8339 - }, - { - "epoch": 0.5450624142212928, - "grad_norm": 0.425841748714447, - "learning_rate": 9.343392845134005e-06, - "loss": 0.3468, - "step": 8340 - }, - { - "epoch": 0.5451277694268348, - "grad_norm": 0.43938112258911133, - "learning_rate": 9.343219854135984e-06, - "loss": 0.3291, - "step": 8341 - }, - { - "epoch": 0.545193124632377, - "grad_norm": 0.44285666942596436, - "learning_rate": 9.343046841954572e-06, - "loss": 0.3704, - "step": 8342 - }, - { - "epoch": 0.5452584798379191, - "grad_norm": 0.4263397455215454, - "learning_rate": 9.342873808590617e-06, - "loss": 0.3635, - "step": 8343 - }, - { - "epoch": 0.5453238350434612, - "grad_norm": 0.4233444929122925, - "learning_rate": 9.342700754044965e-06, - "loss": 0.3191, - "step": 8344 - }, - { - "epoch": 0.5453891902490033, - "grad_norm": 0.4461345374584198, - "learning_rate": 9.342527678318454e-06, - "loss": 0.3777, - "step": 8345 - }, - { - "epoch": 0.5454545454545454, - "grad_norm": 0.4585961699485779, - "learning_rate": 9.342354581411932e-06, - "loss": 0.4125, - "step": 8346 - }, - { - "epoch": 0.5455199006600876, - "grad_norm": 0.5012494921684265, - "learning_rate": 9.342181463326243e-06, - "loss": 0.4523, - "step": 8347 - }, - { - "epoch": 0.5455852558656297, - "grad_norm": 0.4398760199546814, - "learning_rate": 9.342008324062229e-06, - "loss": 0.3686, - "step": 8348 - }, - { - "epoch": 0.5456506110711719, - "grad_norm": 0.4209686815738678, - "learning_rate": 9.341835163620738e-06, - "loss": 0.3687, - "step": 8349 - }, - { - "epoch": 0.5457159662767139, - "grad_norm": 0.5082706212997437, - "learning_rate": 9.341661982002612e-06, - "loss": 0.4654, - "step": 8350 - }, - { - "epoch": 0.5457813214822561, - "grad_norm": 0.43471962213516235, - "learning_rate": 9.341488779208696e-06, - "loss": 0.3444, - "step": 8351 - }, - { - "epoch": 0.5458466766877982, - "grad_norm": 0.45494237542152405, - "learning_rate": 9.341315555239835e-06, - "loss": 0.4041, - "step": 8352 - }, - { - "epoch": 0.5459120318933403, - "grad_norm": 0.46279793977737427, - "learning_rate": 9.341142310096876e-06, - "loss": 0.3932, - "step": 8353 - }, - { - "epoch": 0.5459773870988824, - "grad_norm": 0.45075204968452454, - "learning_rate": 9.34096904378066e-06, - "loss": 0.3802, - "step": 8354 - }, - { - "epoch": 0.5460427423044245, - "grad_norm": 0.4650687277317047, - "learning_rate": 9.340795756292036e-06, - "loss": 0.3837, - "step": 8355 - }, - { - "epoch": 0.5461080975099667, - "grad_norm": 0.46025142073631287, - "learning_rate": 9.340622447631844e-06, - "loss": 0.3972, - "step": 8356 - }, - { - "epoch": 0.5461734527155088, - "grad_norm": 0.46367308497428894, - "learning_rate": 9.340449117800936e-06, - "loss": 0.3789, - "step": 8357 - }, - { - "epoch": 0.546238807921051, - "grad_norm": 0.4615289270877838, - "learning_rate": 9.340275766800153e-06, - "loss": 0.3768, - "step": 8358 - }, - { - "epoch": 0.546304163126593, - "grad_norm": 0.4335293471813202, - "learning_rate": 9.34010239463034e-06, - "loss": 0.365, - "step": 8359 - }, - { - "epoch": 0.5463695183321352, - "grad_norm": 0.4475209414958954, - "learning_rate": 9.339929001292345e-06, - "loss": 0.3904, - "step": 8360 - }, - { - "epoch": 0.5464348735376773, - "grad_norm": 0.45525529980659485, - "learning_rate": 9.339755586787014e-06, - "loss": 0.3787, - "step": 8361 - }, - { - "epoch": 0.5465002287432194, - "grad_norm": 0.47592693567276, - "learning_rate": 9.33958215111519e-06, - "loss": 0.4347, - "step": 8362 - }, - { - "epoch": 0.5465655839487615, - "grad_norm": 0.4509865939617157, - "learning_rate": 9.33940869427772e-06, - "loss": 0.4164, - "step": 8363 - }, - { - "epoch": 0.5466309391543036, - "grad_norm": 0.49032536149024963, - "learning_rate": 9.339235216275453e-06, - "loss": 0.4464, - "step": 8364 - }, - { - "epoch": 0.5466962943598458, - "grad_norm": 0.46070197224617004, - "learning_rate": 9.33906171710923e-06, - "loss": 0.3816, - "step": 8365 - }, - { - "epoch": 0.5467616495653879, - "grad_norm": 0.4691459536552429, - "learning_rate": 9.338888196779901e-06, - "loss": 0.4051, - "step": 8366 - }, - { - "epoch": 0.54682700477093, - "grad_norm": 0.4831182062625885, - "learning_rate": 9.338714655288311e-06, - "loss": 0.4048, - "step": 8367 - }, - { - "epoch": 0.5468923599764721, - "grad_norm": 0.4534238576889038, - "learning_rate": 9.338541092635307e-06, - "loss": 0.3946, - "step": 8368 - }, - { - "epoch": 0.5469577151820142, - "grad_norm": 0.448739230632782, - "learning_rate": 9.338367508821734e-06, - "loss": 0.3623, - "step": 8369 - }, - { - "epoch": 0.5470230703875564, - "grad_norm": 0.4333237409591675, - "learning_rate": 9.33819390384844e-06, - "loss": 0.3204, - "step": 8370 - }, - { - "epoch": 0.5470884255930984, - "grad_norm": 0.40983936190605164, - "learning_rate": 9.338020277716273e-06, - "loss": 0.3431, - "step": 8371 - }, - { - "epoch": 0.5471537807986406, - "grad_norm": 0.4308077394962311, - "learning_rate": 9.337846630426077e-06, - "loss": 0.3358, - "step": 8372 - }, - { - "epoch": 0.5472191360041827, - "grad_norm": 0.4654064178466797, - "learning_rate": 9.3376729619787e-06, - "loss": 0.3808, - "step": 8373 - }, - { - "epoch": 0.5472844912097249, - "grad_norm": 0.46433669328689575, - "learning_rate": 9.33749927237499e-06, - "loss": 0.4165, - "step": 8374 - }, - { - "epoch": 0.547349846415267, - "grad_norm": 0.4505382180213928, - "learning_rate": 9.337325561615793e-06, - "loss": 0.3827, - "step": 8375 - }, - { - "epoch": 0.5474152016208091, - "grad_norm": 0.4646647870540619, - "learning_rate": 9.337151829701955e-06, - "loss": 0.385, - "step": 8376 - }, - { - "epoch": 0.5474805568263512, - "grad_norm": 0.47459107637405396, - "learning_rate": 9.336978076634327e-06, - "loss": 0.4016, - "step": 8377 - }, - { - "epoch": 0.5475459120318933, - "grad_norm": 0.4663126468658447, - "learning_rate": 9.336804302413755e-06, - "loss": 0.4476, - "step": 8378 - }, - { - "epoch": 0.5476112672374355, - "grad_norm": 0.44067710638046265, - "learning_rate": 9.336630507041085e-06, - "loss": 0.3719, - "step": 8379 - }, - { - "epoch": 0.5476766224429775, - "grad_norm": 0.4350115954875946, - "learning_rate": 9.336456690517165e-06, - "loss": 0.3466, - "step": 8380 - }, - { - "epoch": 0.5477419776485197, - "grad_norm": 0.42475584149360657, - "learning_rate": 9.336282852842844e-06, - "loss": 0.3543, - "step": 8381 - }, - { - "epoch": 0.5478073328540618, - "grad_norm": 0.459639310836792, - "learning_rate": 9.33610899401897e-06, - "loss": 0.3779, - "step": 8382 - }, - { - "epoch": 0.547872688059604, - "grad_norm": 0.5365619659423828, - "learning_rate": 9.335935114046389e-06, - "loss": 0.405, - "step": 8383 - }, - { - "epoch": 0.5479380432651461, - "grad_norm": 0.4446386992931366, - "learning_rate": 9.335761212925951e-06, - "loss": 0.4018, - "step": 8384 - }, - { - "epoch": 0.5480033984706882, - "grad_norm": 0.4449726641178131, - "learning_rate": 9.335587290658504e-06, - "loss": 0.3521, - "step": 8385 - }, - { - "epoch": 0.5480687536762303, - "grad_norm": 0.4633252024650574, - "learning_rate": 9.335413347244895e-06, - "loss": 0.3893, - "step": 8386 - }, - { - "epoch": 0.5481341088817724, - "grad_norm": 0.4171968400478363, - "learning_rate": 9.335239382685974e-06, - "loss": 0.3183, - "step": 8387 - }, - { - "epoch": 0.5481994640873146, - "grad_norm": 0.43910449743270874, - "learning_rate": 9.335065396982588e-06, - "loss": 0.3865, - "step": 8388 - }, - { - "epoch": 0.5482648192928566, - "grad_norm": 0.4020332098007202, - "learning_rate": 9.334891390135586e-06, - "loss": 0.3498, - "step": 8389 - }, - { - "epoch": 0.5483301744983988, - "grad_norm": 0.4654817283153534, - "learning_rate": 9.334717362145818e-06, - "loss": 0.4165, - "step": 8390 - }, - { - "epoch": 0.5483955297039409, - "grad_norm": 0.4696250557899475, - "learning_rate": 9.33454331301413e-06, - "loss": 0.3939, - "step": 8391 - }, - { - "epoch": 0.5484608849094831, - "grad_norm": 0.5158215761184692, - "learning_rate": 9.334369242741374e-06, - "loss": 0.4496, - "step": 8392 - }, - { - "epoch": 0.5485262401150252, - "grad_norm": 0.4201121926307678, - "learning_rate": 9.334195151328398e-06, - "loss": 0.3564, - "step": 8393 - }, - { - "epoch": 0.5485915953205673, - "grad_norm": 0.4531320035457611, - "learning_rate": 9.334021038776048e-06, - "loss": 0.4175, - "step": 8394 - }, - { - "epoch": 0.5486569505261094, - "grad_norm": 0.42785894870758057, - "learning_rate": 9.33384690508518e-06, - "loss": 0.3401, - "step": 8395 - }, - { - "epoch": 0.5487223057316515, - "grad_norm": 0.4672718942165375, - "learning_rate": 9.333672750256636e-06, - "loss": 0.4006, - "step": 8396 - }, - { - "epoch": 0.5487876609371937, - "grad_norm": 0.455115407705307, - "learning_rate": 9.333498574291272e-06, - "loss": 0.3861, - "step": 8397 - }, - { - "epoch": 0.5488530161427357, - "grad_norm": 0.4673006534576416, - "learning_rate": 9.333324377189931e-06, - "loss": 0.4229, - "step": 8398 - }, - { - "epoch": 0.5489183713482779, - "grad_norm": 0.4351649880409241, - "learning_rate": 9.333150158953467e-06, - "loss": 0.3819, - "step": 8399 - }, - { - "epoch": 0.54898372655382, - "grad_norm": 0.44421637058258057, - "learning_rate": 9.332975919582727e-06, - "loss": 0.3912, - "step": 8400 - }, - { - "epoch": 0.5490490817593622, - "grad_norm": 0.4454924166202545, - "learning_rate": 9.332801659078565e-06, - "loss": 0.3946, - "step": 8401 - }, - { - "epoch": 0.5491144369649043, - "grad_norm": 0.47957855463027954, - "learning_rate": 9.332627377441827e-06, - "loss": 0.4334, - "step": 8402 - }, - { - "epoch": 0.5491797921704463, - "grad_norm": 0.48958173394203186, - "learning_rate": 9.332453074673365e-06, - "loss": 0.4617, - "step": 8403 - }, - { - "epoch": 0.5492451473759885, - "grad_norm": 0.46007731556892395, - "learning_rate": 9.332278750774026e-06, - "loss": 0.3761, - "step": 8404 - }, - { - "epoch": 0.5493105025815306, - "grad_norm": 0.48850181698799133, - "learning_rate": 9.332104405744666e-06, - "loss": 0.4474, - "step": 8405 - }, - { - "epoch": 0.5493758577870728, - "grad_norm": 0.4466870129108429, - "learning_rate": 9.33193003958613e-06, - "loss": 0.3646, - "step": 8406 - }, - { - "epoch": 0.5494412129926148, - "grad_norm": 0.49112334847450256, - "learning_rate": 9.331755652299271e-06, - "loss": 0.4152, - "step": 8407 - }, - { - "epoch": 0.549506568198157, - "grad_norm": 0.4598344564437866, - "learning_rate": 9.33158124388494e-06, - "loss": 0.3989, - "step": 8408 - }, - { - "epoch": 0.5495719234036991, - "grad_norm": 0.42260119318962097, - "learning_rate": 9.331406814343986e-06, - "loss": 0.3463, - "step": 8409 - }, - { - "epoch": 0.5496372786092413, - "grad_norm": 0.48907238245010376, - "learning_rate": 9.331232363677259e-06, - "loss": 0.427, - "step": 8410 - }, - { - "epoch": 0.5497026338147833, - "grad_norm": 0.4614992141723633, - "learning_rate": 9.331057891885614e-06, - "loss": 0.373, - "step": 8411 - }, - { - "epoch": 0.5497679890203254, - "grad_norm": 0.44425952434539795, - "learning_rate": 9.330883398969897e-06, - "loss": 0.3683, - "step": 8412 - }, - { - "epoch": 0.5498333442258676, - "grad_norm": 0.48977503180503845, - "learning_rate": 9.330708884930962e-06, - "loss": 0.4347, - "step": 8413 - }, - { - "epoch": 0.5498986994314097, - "grad_norm": 0.46309882402420044, - "learning_rate": 9.330534349769658e-06, - "loss": 0.3942, - "step": 8414 - }, - { - "epoch": 0.5499640546369519, - "grad_norm": 0.4486205577850342, - "learning_rate": 9.330359793486839e-06, - "loss": 0.3779, - "step": 8415 - }, - { - "epoch": 0.5500294098424939, - "grad_norm": 0.4534038007259369, - "learning_rate": 9.330185216083356e-06, - "loss": 0.3812, - "step": 8416 - }, - { - "epoch": 0.5500947650480361, - "grad_norm": 0.4830993413925171, - "learning_rate": 9.33001061756006e-06, - "loss": 0.4427, - "step": 8417 - }, - { - "epoch": 0.5501601202535782, - "grad_norm": 0.49837592244148254, - "learning_rate": 9.3298359979178e-06, - "loss": 0.4591, - "step": 8418 - }, - { - "epoch": 0.5502254754591204, - "grad_norm": 0.43504467606544495, - "learning_rate": 9.32966135715743e-06, - "loss": 0.3704, - "step": 8419 - }, - { - "epoch": 0.5502908306646624, - "grad_norm": 0.45898109674453735, - "learning_rate": 9.329486695279803e-06, - "loss": 0.378, - "step": 8420 - }, - { - "epoch": 0.5503561858702045, - "grad_norm": 0.42473548650741577, - "learning_rate": 9.32931201228577e-06, - "loss": 0.3357, - "step": 8421 - }, - { - "epoch": 0.5504215410757467, - "grad_norm": 0.43120864033699036, - "learning_rate": 9.32913730817618e-06, - "loss": 0.3832, - "step": 8422 - }, - { - "epoch": 0.5504868962812888, - "grad_norm": 0.4985445439815521, - "learning_rate": 9.328962582951889e-06, - "loss": 0.4308, - "step": 8423 - }, - { - "epoch": 0.550552251486831, - "grad_norm": 0.45426177978515625, - "learning_rate": 9.328787836613748e-06, - "loss": 0.4016, - "step": 8424 - }, - { - "epoch": 0.550617606692373, - "grad_norm": 0.440967321395874, - "learning_rate": 9.328613069162608e-06, - "loss": 0.3838, - "step": 8425 - }, - { - "epoch": 0.5506829618979152, - "grad_norm": 0.45935264229774475, - "learning_rate": 9.328438280599326e-06, - "loss": 0.4127, - "step": 8426 - }, - { - "epoch": 0.5507483171034573, - "grad_norm": 0.43026798963546753, - "learning_rate": 9.328263470924747e-06, - "loss": 0.3232, - "step": 8427 - }, - { - "epoch": 0.5508136723089994, - "grad_norm": 0.463697224855423, - "learning_rate": 9.328088640139729e-06, - "loss": 0.3898, - "step": 8428 - }, - { - "epoch": 0.5508790275145415, - "grad_norm": 0.46323880553245544, - "learning_rate": 9.327913788245125e-06, - "loss": 0.4116, - "step": 8429 - }, - { - "epoch": 0.5509443827200836, - "grad_norm": 0.4392174780368805, - "learning_rate": 9.327738915241782e-06, - "loss": 0.3827, - "step": 8430 - }, - { - "epoch": 0.5510097379256258, - "grad_norm": 0.44167813658714294, - "learning_rate": 9.32756402113056e-06, - "loss": 0.3553, - "step": 8431 - }, - { - "epoch": 0.5510750931311679, - "grad_norm": 0.4294022023677826, - "learning_rate": 9.327389105912308e-06, - "loss": 0.3603, - "step": 8432 - }, - { - "epoch": 0.55114044833671, - "grad_norm": 0.4425641596317291, - "learning_rate": 9.327214169587881e-06, - "loss": 0.3888, - "step": 8433 - }, - { - "epoch": 0.5512058035422521, - "grad_norm": 0.43455198407173157, - "learning_rate": 9.32703921215813e-06, - "loss": 0.3916, - "step": 8434 - }, - { - "epoch": 0.5512711587477943, - "grad_norm": 0.4327811598777771, - "learning_rate": 9.326864233623912e-06, - "loss": 0.3907, - "step": 8435 - }, - { - "epoch": 0.5513365139533364, - "grad_norm": 0.427016019821167, - "learning_rate": 9.326689233986077e-06, - "loss": 0.3403, - "step": 8436 - }, - { - "epoch": 0.5514018691588785, - "grad_norm": 0.48279014229774475, - "learning_rate": 9.32651421324548e-06, - "loss": 0.3987, - "step": 8437 - }, - { - "epoch": 0.5514672243644206, - "grad_norm": 0.46045464277267456, - "learning_rate": 9.326339171402974e-06, - "loss": 0.4103, - "step": 8438 - }, - { - "epoch": 0.5515325795699627, - "grad_norm": 0.5111485123634338, - "learning_rate": 9.326164108459413e-06, - "loss": 0.4348, - "step": 8439 - }, - { - "epoch": 0.5515979347755049, - "grad_norm": 0.4589383602142334, - "learning_rate": 9.325989024415652e-06, - "loss": 0.402, - "step": 8440 - }, - { - "epoch": 0.551663289981047, - "grad_norm": 0.4229415953159332, - "learning_rate": 9.325813919272542e-06, - "loss": 0.3224, - "step": 8441 - }, - { - "epoch": 0.5517286451865892, - "grad_norm": 0.4934961497783661, - "learning_rate": 9.32563879303094e-06, - "loss": 0.4481, - "step": 8442 - }, - { - "epoch": 0.5517940003921312, - "grad_norm": 0.45548489689826965, - "learning_rate": 9.325463645691699e-06, - "loss": 0.3794, - "step": 8443 - }, - { - "epoch": 0.5518593555976734, - "grad_norm": 0.42007023096084595, - "learning_rate": 9.325288477255673e-06, - "loss": 0.34, - "step": 8444 - }, - { - "epoch": 0.5519247108032155, - "grad_norm": 0.4694366753101349, - "learning_rate": 9.325113287723718e-06, - "loss": 0.3862, - "step": 8445 - }, - { - "epoch": 0.5519900660087576, - "grad_norm": 0.4611228108406067, - "learning_rate": 9.324938077096685e-06, - "loss": 0.414, - "step": 8446 - }, - { - "epoch": 0.5520554212142997, - "grad_norm": 0.48163169622421265, - "learning_rate": 9.324762845375433e-06, - "loss": 0.3617, - "step": 8447 - }, - { - "epoch": 0.5521207764198418, - "grad_norm": 0.42597824335098267, - "learning_rate": 9.324587592560812e-06, - "loss": 0.3365, - "step": 8448 - }, - { - "epoch": 0.552186131625384, - "grad_norm": 0.4741002023220062, - "learning_rate": 9.324412318653679e-06, - "loss": 0.4429, - "step": 8449 - }, - { - "epoch": 0.5522514868309261, - "grad_norm": 0.48227596282958984, - "learning_rate": 9.324237023654892e-06, - "loss": 0.4309, - "step": 8450 - }, - { - "epoch": 0.5523168420364682, - "grad_norm": 0.49901261925697327, - "learning_rate": 9.324061707565299e-06, - "loss": 0.3886, - "step": 8451 - }, - { - "epoch": 0.5523821972420103, - "grad_norm": 0.4953928589820862, - "learning_rate": 9.323886370385762e-06, - "loss": 0.4208, - "step": 8452 - }, - { - "epoch": 0.5524475524475524, - "grad_norm": 0.45888665318489075, - "learning_rate": 9.323711012117134e-06, - "loss": 0.3819, - "step": 8453 - }, - { - "epoch": 0.5525129076530946, - "grad_norm": 0.4151547849178314, - "learning_rate": 9.323535632760267e-06, - "loss": 0.3553, - "step": 8454 - }, - { - "epoch": 0.5525782628586366, - "grad_norm": 0.4288499057292938, - "learning_rate": 9.32336023231602e-06, - "loss": 0.3603, - "step": 8455 - }, - { - "epoch": 0.5526436180641788, - "grad_norm": 0.4717220962047577, - "learning_rate": 9.323184810785247e-06, - "loss": 0.4287, - "step": 8456 - }, - { - "epoch": 0.5527089732697209, - "grad_norm": 0.47539812326431274, - "learning_rate": 9.323009368168805e-06, - "loss": 0.4089, - "step": 8457 - }, - { - "epoch": 0.5527743284752631, - "grad_norm": 0.4591185748577118, - "learning_rate": 9.322833904467548e-06, - "loss": 0.3922, - "step": 8458 - }, - { - "epoch": 0.5528396836808052, - "grad_norm": 0.45324063301086426, - "learning_rate": 9.322658419682331e-06, - "loss": 0.4169, - "step": 8459 - }, - { - "epoch": 0.5529050388863473, - "grad_norm": 0.4990595281124115, - "learning_rate": 9.322482913814012e-06, - "loss": 0.4469, - "step": 8460 - }, - { - "epoch": 0.5529703940918894, - "grad_norm": 0.4342723786830902, - "learning_rate": 9.322307386863448e-06, - "loss": 0.3744, - "step": 8461 - }, - { - "epoch": 0.5530357492974315, - "grad_norm": 0.44127944111824036, - "learning_rate": 9.322131838831493e-06, - "loss": 0.3821, - "step": 8462 - }, - { - "epoch": 0.5531011045029737, - "grad_norm": 0.479885995388031, - "learning_rate": 9.321956269719003e-06, - "loss": 0.3973, - "step": 8463 - }, - { - "epoch": 0.5531664597085157, - "grad_norm": 0.4902941584587097, - "learning_rate": 9.321780679526835e-06, - "loss": 0.4142, - "step": 8464 - }, - { - "epoch": 0.5532318149140579, - "grad_norm": 0.5010184645652771, - "learning_rate": 9.321605068255845e-06, - "loss": 0.4483, - "step": 8465 - }, - { - "epoch": 0.5532971701196, - "grad_norm": 0.4486129879951477, - "learning_rate": 9.321429435906893e-06, - "loss": 0.3705, - "step": 8466 - }, - { - "epoch": 0.5533625253251422, - "grad_norm": 0.4655238389968872, - "learning_rate": 9.321253782480829e-06, - "loss": 0.4214, - "step": 8467 - }, - { - "epoch": 0.5534278805306843, - "grad_norm": 0.5725885033607483, - "learning_rate": 9.321078107978514e-06, - "loss": 0.3714, - "step": 8468 - }, - { - "epoch": 0.5534932357362264, - "grad_norm": 0.4592706263065338, - "learning_rate": 9.320902412400806e-06, - "loss": 0.417, - "step": 8469 - }, - { - "epoch": 0.5535585909417685, - "grad_norm": 0.4308174252510071, - "learning_rate": 9.320726695748558e-06, - "loss": 0.3735, - "step": 8470 - }, - { - "epoch": 0.5536239461473106, - "grad_norm": 0.45350342988967896, - "learning_rate": 9.320550958022629e-06, - "loss": 0.4151, - "step": 8471 - }, - { - "epoch": 0.5536893013528528, - "grad_norm": 0.43352949619293213, - "learning_rate": 9.320375199223877e-06, - "loss": 0.3383, - "step": 8472 - }, - { - "epoch": 0.5537546565583948, - "grad_norm": 0.48943135142326355, - "learning_rate": 9.32019941935316e-06, - "loss": 0.4346, - "step": 8473 - }, - { - "epoch": 0.553820011763937, - "grad_norm": 0.42777833342552185, - "learning_rate": 9.320023618411332e-06, - "loss": 0.3468, - "step": 8474 - }, - { - "epoch": 0.5538853669694791, - "grad_norm": 0.44771796464920044, - "learning_rate": 9.319847796399251e-06, - "loss": 0.3561, - "step": 8475 - }, - { - "epoch": 0.5539507221750213, - "grad_norm": 0.5173906087875366, - "learning_rate": 9.319671953317776e-06, - "loss": 0.4371, - "step": 8476 - }, - { - "epoch": 0.5540160773805634, - "grad_norm": 0.5046651363372803, - "learning_rate": 9.319496089167767e-06, - "loss": 0.3692, - "step": 8477 - }, - { - "epoch": 0.5540814325861055, - "grad_norm": 0.4478219151496887, - "learning_rate": 9.319320203950077e-06, - "loss": 0.3511, - "step": 8478 - }, - { - "epoch": 0.5541467877916476, - "grad_norm": 0.4522320032119751, - "learning_rate": 9.319144297665566e-06, - "loss": 0.4043, - "step": 8479 - }, - { - "epoch": 0.5542121429971897, - "grad_norm": 0.4755884110927582, - "learning_rate": 9.318968370315094e-06, - "loss": 0.397, - "step": 8480 - }, - { - "epoch": 0.5542774982027319, - "grad_norm": 0.4711938500404358, - "learning_rate": 9.318792421899516e-06, - "loss": 0.4588, - "step": 8481 - }, - { - "epoch": 0.5543428534082739, - "grad_norm": 0.4489021301269531, - "learning_rate": 9.31861645241969e-06, - "loss": 0.3607, - "step": 8482 - }, - { - "epoch": 0.5544082086138161, - "grad_norm": 0.49408623576164246, - "learning_rate": 9.318440461876476e-06, - "loss": 0.4324, - "step": 8483 - }, - { - "epoch": 0.5544735638193582, - "grad_norm": 0.5169716477394104, - "learning_rate": 9.318264450270733e-06, - "loss": 0.4674, - "step": 8484 - }, - { - "epoch": 0.5545389190249004, - "grad_norm": 0.4246198534965515, - "learning_rate": 9.318088417603317e-06, - "loss": 0.3611, - "step": 8485 - }, - { - "epoch": 0.5546042742304425, - "grad_norm": 0.4757101535797119, - "learning_rate": 9.317912363875089e-06, - "loss": 0.4551, - "step": 8486 - }, - { - "epoch": 0.5546696294359845, - "grad_norm": 0.4473075866699219, - "learning_rate": 9.317736289086904e-06, - "loss": 0.4081, - "step": 8487 - }, - { - "epoch": 0.5547349846415267, - "grad_norm": 0.45982101559638977, - "learning_rate": 9.317560193239626e-06, - "loss": 0.379, - "step": 8488 - }, - { - "epoch": 0.5548003398470688, - "grad_norm": 0.45575153827667236, - "learning_rate": 9.317384076334109e-06, - "loss": 0.3517, - "step": 8489 - }, - { - "epoch": 0.554865695052611, - "grad_norm": 0.46721091866493225, - "learning_rate": 9.317207938371216e-06, - "loss": 0.4049, - "step": 8490 - }, - { - "epoch": 0.554931050258153, - "grad_norm": 0.46285346150398254, - "learning_rate": 9.317031779351803e-06, - "loss": 0.3927, - "step": 8491 - }, - { - "epoch": 0.5549964054636952, - "grad_norm": 0.4649997055530548, - "learning_rate": 9.316855599276733e-06, - "loss": 0.4246, - "step": 8492 - }, - { - "epoch": 0.5550617606692373, - "grad_norm": 0.44084203243255615, - "learning_rate": 9.316679398146859e-06, - "loss": 0.351, - "step": 8493 - }, - { - "epoch": 0.5551271158747795, - "grad_norm": 0.477680504322052, - "learning_rate": 9.316503175963048e-06, - "loss": 0.3938, - "step": 8494 - }, - { - "epoch": 0.5551924710803215, - "grad_norm": 0.4778280258178711, - "learning_rate": 9.316326932726151e-06, - "loss": 0.422, - "step": 8495 - }, - { - "epoch": 0.5552578262858636, - "grad_norm": 0.4426637291908264, - "learning_rate": 9.316150668437037e-06, - "loss": 0.3715, - "step": 8496 - }, - { - "epoch": 0.5553231814914058, - "grad_norm": 0.42743223905563354, - "learning_rate": 9.315974383096557e-06, - "loss": 0.3376, - "step": 8497 - }, - { - "epoch": 0.5553885366969479, - "grad_norm": 0.45268023014068604, - "learning_rate": 9.315798076705578e-06, - "loss": 0.3829, - "step": 8498 - }, - { - "epoch": 0.5554538919024901, - "grad_norm": 0.4755485951900482, - "learning_rate": 9.315621749264956e-06, - "loss": 0.4254, - "step": 8499 - }, - { - "epoch": 0.5555192471080321, - "grad_norm": 0.4836467206478119, - "learning_rate": 9.315445400775549e-06, - "loss": 0.3897, - "step": 8500 - }, - { - "epoch": 0.5555846023135743, - "grad_norm": 0.4236466586589813, - "learning_rate": 9.315269031238222e-06, - "loss": 0.3356, - "step": 8501 - }, - { - "epoch": 0.5556499575191164, - "grad_norm": 0.4499092996120453, - "learning_rate": 9.315092640653834e-06, - "loss": 0.401, - "step": 8502 - }, - { - "epoch": 0.5557153127246586, - "grad_norm": 0.4628172516822815, - "learning_rate": 9.314916229023242e-06, - "loss": 0.382, - "step": 8503 - }, - { - "epoch": 0.5557806679302006, - "grad_norm": 0.47890061140060425, - "learning_rate": 9.31473979634731e-06, - "loss": 0.4077, - "step": 8504 - }, - { - "epoch": 0.5558460231357427, - "grad_norm": 0.479810506105423, - "learning_rate": 9.314563342626897e-06, - "loss": 0.4386, - "step": 8505 - }, - { - "epoch": 0.5559113783412849, - "grad_norm": 0.4570308327674866, - "learning_rate": 9.314386867862863e-06, - "loss": 0.3808, - "step": 8506 - }, - { - "epoch": 0.555976733546827, - "grad_norm": 0.4701350927352905, - "learning_rate": 9.31421037205607e-06, - "loss": 0.3972, - "step": 8507 - }, - { - "epoch": 0.5560420887523692, - "grad_norm": 0.44427141547203064, - "learning_rate": 9.314033855207379e-06, - "loss": 0.3435, - "step": 8508 - }, - { - "epoch": 0.5561074439579112, - "grad_norm": 0.4655913710594177, - "learning_rate": 9.31385731731765e-06, - "loss": 0.3739, - "step": 8509 - }, - { - "epoch": 0.5561727991634534, - "grad_norm": 0.45355671644210815, - "learning_rate": 9.313680758387745e-06, - "loss": 0.3977, - "step": 8510 - }, - { - "epoch": 0.5562381543689955, - "grad_norm": 0.43664395809173584, - "learning_rate": 9.313504178418524e-06, - "loss": 0.3566, - "step": 8511 - }, - { - "epoch": 0.5563035095745376, - "grad_norm": 0.42067351937294006, - "learning_rate": 9.313327577410849e-06, - "loss": 0.3508, - "step": 8512 - }, - { - "epoch": 0.5563688647800797, - "grad_norm": 0.48400741815567017, - "learning_rate": 9.31315095536558e-06, - "loss": 0.4489, - "step": 8513 - }, - { - "epoch": 0.5564342199856218, - "grad_norm": 0.4556126296520233, - "learning_rate": 9.31297431228358e-06, - "loss": 0.4071, - "step": 8514 - }, - { - "epoch": 0.556499575191164, - "grad_norm": 0.4724879264831543, - "learning_rate": 9.312797648165712e-06, - "loss": 0.3754, - "step": 8515 - }, - { - "epoch": 0.5565649303967061, - "grad_norm": 0.43408671021461487, - "learning_rate": 9.312620963012833e-06, - "loss": 0.3568, - "step": 8516 - }, - { - "epoch": 0.5566302856022483, - "grad_norm": 0.4010029435157776, - "learning_rate": 9.31244425682581e-06, - "loss": 0.3112, - "step": 8517 - }, - { - "epoch": 0.5566956408077903, - "grad_norm": 0.4599505662918091, - "learning_rate": 9.3122675296055e-06, - "loss": 0.356, - "step": 8518 - }, - { - "epoch": 0.5567609960133325, - "grad_norm": 0.41994336247444153, - "learning_rate": 9.31209078135277e-06, - "loss": 0.3447, - "step": 8519 - }, - { - "epoch": 0.5568263512188746, - "grad_norm": 0.46921485662460327, - "learning_rate": 9.311914012068478e-06, - "loss": 0.3964, - "step": 8520 - }, - { - "epoch": 0.5568917064244167, - "grad_norm": 0.4661003351211548, - "learning_rate": 9.311737221753487e-06, - "loss": 0.3939, - "step": 8521 - }, - { - "epoch": 0.5569570616299588, - "grad_norm": 0.4298548698425293, - "learning_rate": 9.31156041040866e-06, - "loss": 0.3406, - "step": 8522 - }, - { - "epoch": 0.5570224168355009, - "grad_norm": 0.47248151898384094, - "learning_rate": 9.31138357803486e-06, - "loss": 0.3952, - "step": 8523 - }, - { - "epoch": 0.5570877720410431, - "grad_norm": 0.4456464350223541, - "learning_rate": 9.311206724632949e-06, - "loss": 0.3926, - "step": 8524 - }, - { - "epoch": 0.5571531272465852, - "grad_norm": 0.4399384558200836, - "learning_rate": 9.311029850203788e-06, - "loss": 0.3757, - "step": 8525 - }, - { - "epoch": 0.5572184824521274, - "grad_norm": 0.4460543394088745, - "learning_rate": 9.310852954748241e-06, - "loss": 0.3574, - "step": 8526 - }, - { - "epoch": 0.5572838376576694, - "grad_norm": 0.4757439196109772, - "learning_rate": 9.310676038267173e-06, - "loss": 0.4155, - "step": 8527 - }, - { - "epoch": 0.5573491928632116, - "grad_norm": 0.45683062076568604, - "learning_rate": 9.310499100761443e-06, - "loss": 0.3955, - "step": 8528 - }, - { - "epoch": 0.5574145480687537, - "grad_norm": 0.43982434272766113, - "learning_rate": 9.310322142231916e-06, - "loss": 0.3825, - "step": 8529 - }, - { - "epoch": 0.5574799032742958, - "grad_norm": 0.4997251629829407, - "learning_rate": 9.310145162679454e-06, - "loss": 0.455, - "step": 8530 - }, - { - "epoch": 0.5575452584798379, - "grad_norm": 0.4151521325111389, - "learning_rate": 9.309968162104921e-06, - "loss": 0.3374, - "step": 8531 - }, - { - "epoch": 0.55761061368538, - "grad_norm": 0.4819811284542084, - "learning_rate": 9.309791140509178e-06, - "loss": 0.4262, - "step": 8532 - }, - { - "epoch": 0.5576759688909222, - "grad_norm": 0.4586372375488281, - "learning_rate": 9.309614097893093e-06, - "loss": 0.4032, - "step": 8533 - }, - { - "epoch": 0.5577413240964643, - "grad_norm": 0.43472665548324585, - "learning_rate": 9.309437034257526e-06, - "loss": 0.3699, - "step": 8534 - }, - { - "epoch": 0.5578066793020064, - "grad_norm": 0.45355939865112305, - "learning_rate": 9.309259949603344e-06, - "loss": 0.4229, - "step": 8535 - }, - { - "epoch": 0.5578720345075485, - "grad_norm": 0.48780471086502075, - "learning_rate": 9.309082843931407e-06, - "loss": 0.4228, - "step": 8536 - }, - { - "epoch": 0.5579373897130906, - "grad_norm": 0.49163946509361267, - "learning_rate": 9.30890571724258e-06, - "loss": 0.4377, - "step": 8537 - }, - { - "epoch": 0.5580027449186328, - "grad_norm": 0.4605172276496887, - "learning_rate": 9.308728569537728e-06, - "loss": 0.4274, - "step": 8538 - }, - { - "epoch": 0.5580681001241748, - "grad_norm": 0.45451197028160095, - "learning_rate": 9.308551400817712e-06, - "loss": 0.4312, - "step": 8539 - }, - { - "epoch": 0.558133455329717, - "grad_norm": 0.44905582070350647, - "learning_rate": 9.3083742110834e-06, - "loss": 0.376, - "step": 8540 - }, - { - "epoch": 0.5581988105352591, - "grad_norm": 0.43554186820983887, - "learning_rate": 9.308197000335652e-06, - "loss": 0.3728, - "step": 8541 - }, - { - "epoch": 0.5582641657408013, - "grad_norm": 0.4635400176048279, - "learning_rate": 9.308019768575338e-06, - "loss": 0.4027, - "step": 8542 - }, - { - "epoch": 0.5583295209463434, - "grad_norm": 0.4505763649940491, - "learning_rate": 9.307842515803318e-06, - "loss": 0.3809, - "step": 8543 - }, - { - "epoch": 0.5583948761518855, - "grad_norm": 0.46907296776771545, - "learning_rate": 9.307665242020456e-06, - "loss": 0.4243, - "step": 8544 - }, - { - "epoch": 0.5584602313574276, - "grad_norm": 0.4555659592151642, - "learning_rate": 9.30748794722762e-06, - "loss": 0.3883, - "step": 8545 - }, - { - "epoch": 0.5585255865629697, - "grad_norm": 0.4730190932750702, - "learning_rate": 9.307310631425673e-06, - "loss": 0.4317, - "step": 8546 - }, - { - "epoch": 0.5585909417685119, - "grad_norm": 0.42633357644081116, - "learning_rate": 9.307133294615479e-06, - "loss": 0.3149, - "step": 8547 - }, - { - "epoch": 0.558656296974054, - "grad_norm": 0.4453902542591095, - "learning_rate": 9.306955936797904e-06, - "loss": 0.387, - "step": 8548 - }, - { - "epoch": 0.5587216521795961, - "grad_norm": 0.4490743577480316, - "learning_rate": 9.306778557973813e-06, - "loss": 0.3792, - "step": 8549 - }, - { - "epoch": 0.5587870073851382, - "grad_norm": 0.40580257773399353, - "learning_rate": 9.306601158144071e-06, - "loss": 0.2995, - "step": 8550 - }, - { - "epoch": 0.5588523625906804, - "grad_norm": 0.4768867492675781, - "learning_rate": 9.306423737309544e-06, - "loss": 0.4078, - "step": 8551 - }, - { - "epoch": 0.5589177177962225, - "grad_norm": 0.5181326866149902, - "learning_rate": 9.306246295471096e-06, - "loss": 0.4095, - "step": 8552 - }, - { - "epoch": 0.5589830730017646, - "grad_norm": 0.4448120594024658, - "learning_rate": 9.30606883262959e-06, - "loss": 0.3416, - "step": 8553 - }, - { - "epoch": 0.5590484282073067, - "grad_norm": 0.4278298020362854, - "learning_rate": 9.305891348785898e-06, - "loss": 0.3481, - "step": 8554 - }, - { - "epoch": 0.5591137834128488, - "grad_norm": 0.48393508791923523, - "learning_rate": 9.305713843940883e-06, - "loss": 0.4162, - "step": 8555 - }, - { - "epoch": 0.559179138618391, - "grad_norm": 0.48160406947135925, - "learning_rate": 9.305536318095408e-06, - "loss": 0.3878, - "step": 8556 - }, - { - "epoch": 0.559244493823933, - "grad_norm": 0.510643482208252, - "learning_rate": 9.305358771250342e-06, - "loss": 0.4474, - "step": 8557 - }, - { - "epoch": 0.5593098490294752, - "grad_norm": 0.4668619930744171, - "learning_rate": 9.305181203406548e-06, - "loss": 0.3687, - "step": 8558 - }, - { - "epoch": 0.5593752042350173, - "grad_norm": 0.44012463092803955, - "learning_rate": 9.305003614564895e-06, - "loss": 0.3548, - "step": 8559 - }, - { - "epoch": 0.5594405594405595, - "grad_norm": 0.4808661937713623, - "learning_rate": 9.304826004726248e-06, - "loss": 0.4233, - "step": 8560 - }, - { - "epoch": 0.5595059146461016, - "grad_norm": 0.4720933139324188, - "learning_rate": 9.304648373891472e-06, - "loss": 0.3555, - "step": 8561 - }, - { - "epoch": 0.5595712698516437, - "grad_norm": 0.44952312111854553, - "learning_rate": 9.304470722061437e-06, - "loss": 0.3903, - "step": 8562 - }, - { - "epoch": 0.5596366250571858, - "grad_norm": 0.4470025300979614, - "learning_rate": 9.304293049237005e-06, - "loss": 0.368, - "step": 8563 - }, - { - "epoch": 0.5597019802627279, - "grad_norm": 0.46034061908721924, - "learning_rate": 9.304115355419045e-06, - "loss": 0.3909, - "step": 8564 - }, - { - "epoch": 0.5597673354682701, - "grad_norm": 0.48923662304878235, - "learning_rate": 9.303937640608423e-06, - "loss": 0.4617, - "step": 8565 - }, - { - "epoch": 0.5598326906738121, - "grad_norm": 0.4368457794189453, - "learning_rate": 9.303759904806007e-06, - "loss": 0.3402, - "step": 8566 - }, - { - "epoch": 0.5598980458793543, - "grad_norm": 0.5113939046859741, - "learning_rate": 9.303582148012663e-06, - "loss": 0.4111, - "step": 8567 - }, - { - "epoch": 0.5599634010848964, - "grad_norm": 0.43663060665130615, - "learning_rate": 9.303404370229257e-06, - "loss": 0.3611, - "step": 8568 - }, - { - "epoch": 0.5600287562904386, - "grad_norm": 0.4688543379306793, - "learning_rate": 9.303226571456658e-06, - "loss": 0.4179, - "step": 8569 - }, - { - "epoch": 0.5600941114959807, - "grad_norm": 0.47538647055625916, - "learning_rate": 9.303048751695732e-06, - "loss": 0.3674, - "step": 8570 - }, - { - "epoch": 0.5601594667015227, - "grad_norm": 0.48249363899230957, - "learning_rate": 9.302870910947346e-06, - "loss": 0.4418, - "step": 8571 - }, - { - "epoch": 0.5602248219070649, - "grad_norm": 0.4834454655647278, - "learning_rate": 9.30269304921237e-06, - "loss": 0.4054, - "step": 8572 - }, - { - "epoch": 0.560290177112607, - "grad_norm": 0.4889773726463318, - "learning_rate": 9.302515166491667e-06, - "loss": 0.4349, - "step": 8573 - }, - { - "epoch": 0.5603555323181492, - "grad_norm": 0.6230728030204773, - "learning_rate": 9.302337262786107e-06, - "loss": 0.4034, - "step": 8574 - }, - { - "epoch": 0.5604208875236912, - "grad_norm": 0.43193289637565613, - "learning_rate": 9.302159338096559e-06, - "loss": 0.3391, - "step": 8575 - }, - { - "epoch": 0.5604862427292334, - "grad_norm": 0.45328015089035034, - "learning_rate": 9.30198139242389e-06, - "loss": 0.3668, - "step": 8576 - }, - { - "epoch": 0.5605515979347755, - "grad_norm": 0.46770498156547546, - "learning_rate": 9.301803425768964e-06, - "loss": 0.3915, - "step": 8577 - }, - { - "epoch": 0.5606169531403177, - "grad_norm": 0.4672142267227173, - "learning_rate": 9.301625438132655e-06, - "loss": 0.3989, - "step": 8578 - }, - { - "epoch": 0.5606823083458597, - "grad_norm": 0.427051842212677, - "learning_rate": 9.301447429515829e-06, - "loss": 0.3246, - "step": 8579 - }, - { - "epoch": 0.5607476635514018, - "grad_norm": 0.41258934140205383, - "learning_rate": 9.301269399919352e-06, - "loss": 0.3339, - "step": 8580 - }, - { - "epoch": 0.560813018756944, - "grad_norm": 0.45607173442840576, - "learning_rate": 9.301091349344096e-06, - "loss": 0.372, - "step": 8581 - }, - { - "epoch": 0.5608783739624861, - "grad_norm": 0.4394467771053314, - "learning_rate": 9.300913277790926e-06, - "loss": 0.3935, - "step": 8582 - }, - { - "epoch": 0.5609437291680283, - "grad_norm": 0.42783913016319275, - "learning_rate": 9.300735185260713e-06, - "loss": 0.327, - "step": 8583 - }, - { - "epoch": 0.5610090843735703, - "grad_norm": 0.46981081366539, - "learning_rate": 9.300557071754324e-06, - "loss": 0.4184, - "step": 8584 - }, - { - "epoch": 0.5610744395791125, - "grad_norm": 0.4887408912181854, - "learning_rate": 9.300378937272629e-06, - "loss": 0.4318, - "step": 8585 - }, - { - "epoch": 0.5611397947846546, - "grad_norm": 0.4351556897163391, - "learning_rate": 9.300200781816495e-06, - "loss": 0.3603, - "step": 8586 - }, - { - "epoch": 0.5612051499901968, - "grad_norm": 0.42420145869255066, - "learning_rate": 9.300022605386793e-06, - "loss": 0.3405, - "step": 8587 - }, - { - "epoch": 0.5612705051957388, - "grad_norm": 0.4372050166130066, - "learning_rate": 9.29984440798439e-06, - "loss": 0.39, - "step": 8588 - }, - { - "epoch": 0.5613358604012809, - "grad_norm": 0.46907809376716614, - "learning_rate": 9.299666189610157e-06, - "loss": 0.3912, - "step": 8589 - }, - { - "epoch": 0.5614012156068231, - "grad_norm": 0.4684206545352936, - "learning_rate": 9.299487950264963e-06, - "loss": 0.3737, - "step": 8590 - }, - { - "epoch": 0.5614665708123652, - "grad_norm": 0.4382263422012329, - "learning_rate": 9.299309689949676e-06, - "loss": 0.3957, - "step": 8591 - }, - { - "epoch": 0.5615319260179074, - "grad_norm": 0.43257221579551697, - "learning_rate": 9.299131408665166e-06, - "loss": 0.3683, - "step": 8592 - }, - { - "epoch": 0.5615972812234494, - "grad_norm": 0.4597841203212738, - "learning_rate": 9.298953106412304e-06, - "loss": 0.4233, - "step": 8593 - }, - { - "epoch": 0.5616626364289916, - "grad_norm": 0.4499904215335846, - "learning_rate": 9.298774783191956e-06, - "loss": 0.3917, - "step": 8594 - }, - { - "epoch": 0.5617279916345337, - "grad_norm": 0.44387346506118774, - "learning_rate": 9.298596439004996e-06, - "loss": 0.354, - "step": 8595 - }, - { - "epoch": 0.5617933468400758, - "grad_norm": 0.4785110354423523, - "learning_rate": 9.298418073852291e-06, - "loss": 0.3891, - "step": 8596 - }, - { - "epoch": 0.5618587020456179, - "grad_norm": 0.4568953216075897, - "learning_rate": 9.298239687734712e-06, - "loss": 0.3957, - "step": 8597 - }, - { - "epoch": 0.56192405725116, - "grad_norm": 0.4183341860771179, - "learning_rate": 9.29806128065313e-06, - "loss": 0.3388, - "step": 8598 - }, - { - "epoch": 0.5619894124567022, - "grad_norm": 0.413400799036026, - "learning_rate": 9.297882852608413e-06, - "loss": 0.3108, - "step": 8599 - }, - { - "epoch": 0.5620547676622443, - "grad_norm": 0.5093491077423096, - "learning_rate": 9.297704403601433e-06, - "loss": 0.4676, - "step": 8600 - }, - { - "epoch": 0.5621201228677865, - "grad_norm": 0.47162389755249023, - "learning_rate": 9.297525933633061e-06, - "loss": 0.4524, - "step": 8601 - }, - { - "epoch": 0.5621854780733285, - "grad_norm": 0.4399264454841614, - "learning_rate": 9.297347442704164e-06, - "loss": 0.358, - "step": 8602 - }, - { - "epoch": 0.5622508332788707, - "grad_norm": 0.49042192101478577, - "learning_rate": 9.297168930815616e-06, - "loss": 0.4067, - "step": 8603 - }, - { - "epoch": 0.5623161884844128, - "grad_norm": 0.44997796416282654, - "learning_rate": 9.296990397968286e-06, - "loss": 0.3824, - "step": 8604 - }, - { - "epoch": 0.5623815436899549, - "grad_norm": 0.45137375593185425, - "learning_rate": 9.296811844163046e-06, - "loss": 0.3753, - "step": 8605 - }, - { - "epoch": 0.562446898895497, - "grad_norm": 0.46417152881622314, - "learning_rate": 9.296633269400766e-06, - "loss": 0.3899, - "step": 8606 - }, - { - "epoch": 0.5625122541010391, - "grad_norm": 0.44375890493392944, - "learning_rate": 9.296454673682316e-06, - "loss": 0.3849, - "step": 8607 - }, - { - "epoch": 0.5625776093065813, - "grad_norm": 0.46177980303764343, - "learning_rate": 9.296276057008569e-06, - "loss": 0.3794, - "step": 8608 - }, - { - "epoch": 0.5626429645121234, - "grad_norm": 0.4350912570953369, - "learning_rate": 9.296097419380394e-06, - "loss": 0.3384, - "step": 8609 - }, - { - "epoch": 0.5627083197176656, - "grad_norm": 0.4928731918334961, - "learning_rate": 9.295918760798665e-06, - "loss": 0.401, - "step": 8610 - }, - { - "epoch": 0.5627736749232076, - "grad_norm": 0.46854060888290405, - "learning_rate": 9.295740081264252e-06, - "loss": 0.3889, - "step": 8611 - }, - { - "epoch": 0.5628390301287498, - "grad_norm": 0.4220615327358246, - "learning_rate": 9.295561380778025e-06, - "loss": 0.3453, - "step": 8612 - }, - { - "epoch": 0.5629043853342919, - "grad_norm": 0.4408263564109802, - "learning_rate": 9.29538265934086e-06, - "loss": 0.3374, - "step": 8613 - }, - { - "epoch": 0.562969740539834, - "grad_norm": 0.4586383104324341, - "learning_rate": 9.295203916953622e-06, - "loss": 0.3845, - "step": 8614 - }, - { - "epoch": 0.5630350957453761, - "grad_norm": 0.47495511174201965, - "learning_rate": 9.295025153617189e-06, - "loss": 0.3718, - "step": 8615 - }, - { - "epoch": 0.5631004509509182, - "grad_norm": 0.46429243683815, - "learning_rate": 9.294846369332429e-06, - "loss": 0.3959, - "step": 8616 - }, - { - "epoch": 0.5631658061564604, - "grad_norm": 0.43938446044921875, - "learning_rate": 9.294667564100216e-06, - "loss": 0.3659, - "step": 8617 - }, - { - "epoch": 0.5632311613620025, - "grad_norm": 0.4337911307811737, - "learning_rate": 9.294488737921422e-06, - "loss": 0.3944, - "step": 8618 - }, - { - "epoch": 0.5632965165675446, - "grad_norm": 0.5173372030258179, - "learning_rate": 9.294309890796918e-06, - "loss": 0.4011, - "step": 8619 - }, - { - "epoch": 0.5633618717730867, - "grad_norm": 0.4294881820678711, - "learning_rate": 9.294131022727576e-06, - "loss": 0.3556, - "step": 8620 - }, - { - "epoch": 0.5634272269786288, - "grad_norm": 0.44056859612464905, - "learning_rate": 9.293952133714271e-06, - "loss": 0.3869, - "step": 8621 - }, - { - "epoch": 0.563492582184171, - "grad_norm": 0.43963128328323364, - "learning_rate": 9.293773223757873e-06, - "loss": 0.3359, - "step": 8622 - }, - { - "epoch": 0.563557937389713, - "grad_norm": 0.45700833201408386, - "learning_rate": 9.293594292859257e-06, - "loss": 0.4142, - "step": 8623 - }, - { - "epoch": 0.5636232925952552, - "grad_norm": 0.4210696518421173, - "learning_rate": 9.293415341019292e-06, - "loss": 0.3292, - "step": 8624 - }, - { - "epoch": 0.5636886478007973, - "grad_norm": 0.463018000125885, - "learning_rate": 9.293236368238854e-06, - "loss": 0.424, - "step": 8625 - }, - { - "epoch": 0.5637540030063395, - "grad_norm": 0.44253072142601013, - "learning_rate": 9.293057374518814e-06, - "loss": 0.4123, - "step": 8626 - }, - { - "epoch": 0.5638193582118816, - "grad_norm": 0.5666806697845459, - "learning_rate": 9.292878359860047e-06, - "loss": 0.4112, - "step": 8627 - }, - { - "epoch": 0.5638847134174237, - "grad_norm": 0.47362110018730164, - "learning_rate": 9.292699324263425e-06, - "loss": 0.4513, - "step": 8628 - }, - { - "epoch": 0.5639500686229658, - "grad_norm": 0.43033018708229065, - "learning_rate": 9.292520267729821e-06, - "loss": 0.3745, - "step": 8629 - }, - { - "epoch": 0.5640154238285079, - "grad_norm": 0.47930434346199036, - "learning_rate": 9.292341190260108e-06, - "loss": 0.4228, - "step": 8630 - }, - { - "epoch": 0.5640807790340501, - "grad_norm": 0.48313361406326294, - "learning_rate": 9.292162091855161e-06, - "loss": 0.3644, - "step": 8631 - }, - { - "epoch": 0.5641461342395921, - "grad_norm": 0.4595695734024048, - "learning_rate": 9.291982972515853e-06, - "loss": 0.4022, - "step": 8632 - }, - { - "epoch": 0.5642114894451343, - "grad_norm": 0.41800472140312195, - "learning_rate": 9.291803832243055e-06, - "loss": 0.3411, - "step": 8633 - }, - { - "epoch": 0.5642768446506764, - "grad_norm": 0.4657498896121979, - "learning_rate": 9.291624671037646e-06, - "loss": 0.3902, - "step": 8634 - }, - { - "epoch": 0.5643421998562186, - "grad_norm": 0.4660916328430176, - "learning_rate": 9.291445488900494e-06, - "loss": 0.3701, - "step": 8635 - }, - { - "epoch": 0.5644075550617607, - "grad_norm": 0.41811466217041016, - "learning_rate": 9.291266285832477e-06, - "loss": 0.3809, - "step": 8636 - }, - { - "epoch": 0.5644729102673028, - "grad_norm": 0.4972060024738312, - "learning_rate": 9.291087061834467e-06, - "loss": 0.4304, - "step": 8637 - }, - { - "epoch": 0.5645382654728449, - "grad_norm": 0.46764495968818665, - "learning_rate": 9.29090781690734e-06, - "loss": 0.4293, - "step": 8638 - }, - { - "epoch": 0.564603620678387, - "grad_norm": 0.4698627293109894, - "learning_rate": 9.290728551051969e-06, - "loss": 0.3943, - "step": 8639 - }, - { - "epoch": 0.5646689758839292, - "grad_norm": 0.4544629752635956, - "learning_rate": 9.290549264269227e-06, - "loss": 0.3561, - "step": 8640 - }, - { - "epoch": 0.5647343310894712, - "grad_norm": 0.468403697013855, - "learning_rate": 9.29036995655999e-06, - "loss": 0.3841, - "step": 8641 - }, - { - "epoch": 0.5647996862950134, - "grad_norm": 0.4759049713611603, - "learning_rate": 9.290190627925133e-06, - "loss": 0.4015, - "step": 8642 - }, - { - "epoch": 0.5648650415005555, - "grad_norm": 0.48529309034347534, - "learning_rate": 9.290011278365529e-06, - "loss": 0.409, - "step": 8643 - }, - { - "epoch": 0.5649303967060977, - "grad_norm": 0.5249614715576172, - "learning_rate": 9.289831907882055e-06, - "loss": 0.5118, - "step": 8644 - }, - { - "epoch": 0.5649957519116398, - "grad_norm": 0.4921436905860901, - "learning_rate": 9.289652516475584e-06, - "loss": 0.4659, - "step": 8645 - }, - { - "epoch": 0.5650611071171819, - "grad_norm": 0.45720160007476807, - "learning_rate": 9.289473104146993e-06, - "loss": 0.3783, - "step": 8646 - }, - { - "epoch": 0.565126462322724, - "grad_norm": 0.4505588114261627, - "learning_rate": 9.289293670897156e-06, - "loss": 0.3748, - "step": 8647 - }, - { - "epoch": 0.5651918175282661, - "grad_norm": 0.495913028717041, - "learning_rate": 9.289114216726944e-06, - "loss": 0.4725, - "step": 8648 - }, - { - "epoch": 0.5652571727338083, - "grad_norm": 0.4198303520679474, - "learning_rate": 9.288934741637239e-06, - "loss": 0.3522, - "step": 8649 - }, - { - "epoch": 0.5653225279393503, - "grad_norm": 0.43457698822021484, - "learning_rate": 9.288755245628913e-06, - "loss": 0.3483, - "step": 8650 - }, - { - "epoch": 0.5653878831448925, - "grad_norm": 0.47232887148857117, - "learning_rate": 9.288575728702842e-06, - "loss": 0.4, - "step": 8651 - }, - { - "epoch": 0.5654532383504346, - "grad_norm": 0.4795187711715698, - "learning_rate": 9.288396190859901e-06, - "loss": 0.4176, - "step": 8652 - }, - { - "epoch": 0.5655185935559768, - "grad_norm": 0.43719935417175293, - "learning_rate": 9.288216632100965e-06, - "loss": 0.3512, - "step": 8653 - }, - { - "epoch": 0.5655839487615189, - "grad_norm": 0.49101826548576355, - "learning_rate": 9.288037052426914e-06, - "loss": 0.4094, - "step": 8654 - }, - { - "epoch": 0.5656493039670609, - "grad_norm": 0.4694311022758484, - "learning_rate": 9.287857451838618e-06, - "loss": 0.4255, - "step": 8655 - }, - { - "epoch": 0.5657146591726031, - "grad_norm": 0.4479016363620758, - "learning_rate": 9.287677830336958e-06, - "loss": 0.3751, - "step": 8656 - }, - { - "epoch": 0.5657800143781452, - "grad_norm": 0.4267028272151947, - "learning_rate": 9.287498187922806e-06, - "loss": 0.3211, - "step": 8657 - }, - { - "epoch": 0.5658453695836874, - "grad_norm": 0.4701300263404846, - "learning_rate": 9.287318524597041e-06, - "loss": 0.3711, - "step": 8658 - }, - { - "epoch": 0.5659107247892294, - "grad_norm": 0.49085545539855957, - "learning_rate": 9.287138840360539e-06, - "loss": 0.444, - "step": 8659 - }, - { - "epoch": 0.5659760799947716, - "grad_norm": 0.4548317492008209, - "learning_rate": 9.286959135214174e-06, - "loss": 0.3932, - "step": 8660 - }, - { - "epoch": 0.5660414352003137, - "grad_norm": 0.5431153774261475, - "learning_rate": 9.286779409158825e-06, - "loss": 0.4172, - "step": 8661 - }, - { - "epoch": 0.5661067904058559, - "grad_norm": 0.7028505206108093, - "learning_rate": 9.286599662195366e-06, - "loss": 0.3475, - "step": 8662 - }, - { - "epoch": 0.566172145611398, - "grad_norm": 0.4354560375213623, - "learning_rate": 9.286419894324679e-06, - "loss": 0.3459, - "step": 8663 - }, - { - "epoch": 0.56623750081694, - "grad_norm": 0.47522449493408203, - "learning_rate": 9.286240105547634e-06, - "loss": 0.4194, - "step": 8664 - }, - { - "epoch": 0.5663028560224822, - "grad_norm": 0.4407506585121155, - "learning_rate": 9.286060295865113e-06, - "loss": 0.3603, - "step": 8665 - }, - { - "epoch": 0.5663682112280243, - "grad_norm": 0.4717441201210022, - "learning_rate": 9.28588046527799e-06, - "loss": 0.3719, - "step": 8666 - }, - { - "epoch": 0.5664335664335665, - "grad_norm": 0.4331178069114685, - "learning_rate": 9.285700613787143e-06, - "loss": 0.366, - "step": 8667 - }, - { - "epoch": 0.5664989216391085, - "grad_norm": 0.4383242428302765, - "learning_rate": 9.28552074139345e-06, - "loss": 0.3901, - "step": 8668 - }, - { - "epoch": 0.5665642768446507, - "grad_norm": 0.4694553315639496, - "learning_rate": 9.285340848097788e-06, - "loss": 0.4356, - "step": 8669 - }, - { - "epoch": 0.5666296320501928, - "grad_norm": 0.49563634395599365, - "learning_rate": 9.285160933901034e-06, - "loss": 0.4208, - "step": 8670 - }, - { - "epoch": 0.566694987255735, - "grad_norm": 0.4513710141181946, - "learning_rate": 9.284980998804065e-06, - "loss": 0.3626, - "step": 8671 - }, - { - "epoch": 0.566760342461277, - "grad_norm": 0.47239089012145996, - "learning_rate": 9.28480104280776e-06, - "loss": 0.4108, - "step": 8672 - }, - { - "epoch": 0.5668256976668191, - "grad_norm": 0.42150744795799255, - "learning_rate": 9.284621065912995e-06, - "loss": 0.3659, - "step": 8673 - }, - { - "epoch": 0.5668910528723613, - "grad_norm": 0.46837830543518066, - "learning_rate": 9.284441068120649e-06, - "loss": 0.43, - "step": 8674 - }, - { - "epoch": 0.5669564080779034, - "grad_norm": 0.45562320947647095, - "learning_rate": 9.2842610494316e-06, - "loss": 0.3655, - "step": 8675 - }, - { - "epoch": 0.5670217632834456, - "grad_norm": 0.5412616729736328, - "learning_rate": 9.284081009846725e-06, - "loss": 0.3798, - "step": 8676 - }, - { - "epoch": 0.5670871184889876, - "grad_norm": 0.45065397024154663, - "learning_rate": 9.283900949366903e-06, - "loss": 0.3324, - "step": 8677 - }, - { - "epoch": 0.5671524736945298, - "grad_norm": 0.4478610157966614, - "learning_rate": 9.283720867993013e-06, - "loss": 0.3807, - "step": 8678 - }, - { - "epoch": 0.5672178289000719, - "grad_norm": 0.48112455010414124, - "learning_rate": 9.28354076572593e-06, - "loss": 0.4082, - "step": 8679 - }, - { - "epoch": 0.567283184105614, - "grad_norm": 0.4559597074985504, - "learning_rate": 9.28336064256654e-06, - "loss": 0.3602, - "step": 8680 - }, - { - "epoch": 0.5673485393111561, - "grad_norm": 0.4585297703742981, - "learning_rate": 9.283180498515711e-06, - "loss": 0.3839, - "step": 8681 - }, - { - "epoch": 0.5674138945166982, - "grad_norm": 0.4685385823249817, - "learning_rate": 9.283000333574328e-06, - "loss": 0.3856, - "step": 8682 - }, - { - "epoch": 0.5674792497222404, - "grad_norm": 0.45225974917411804, - "learning_rate": 9.28282014774327e-06, - "loss": 0.394, - "step": 8683 - }, - { - "epoch": 0.5675446049277825, - "grad_norm": 0.4868801236152649, - "learning_rate": 9.282639941023413e-06, - "loss": 0.4484, - "step": 8684 - }, - { - "epoch": 0.5676099601333247, - "grad_norm": 0.43020132184028625, - "learning_rate": 9.282459713415637e-06, - "loss": 0.3681, - "step": 8685 - }, - { - "epoch": 0.5676753153388667, - "grad_norm": 0.4511564075946808, - "learning_rate": 9.282279464920823e-06, - "loss": 0.4038, - "step": 8686 - }, - { - "epoch": 0.5677406705444089, - "grad_norm": 0.40919962525367737, - "learning_rate": 9.282099195539847e-06, - "loss": 0.3372, - "step": 8687 - }, - { - "epoch": 0.567806025749951, - "grad_norm": 0.4809260666370392, - "learning_rate": 9.28191890527359e-06, - "loss": 0.4269, - "step": 8688 - }, - { - "epoch": 0.567871380955493, - "grad_norm": 0.4630745053291321, - "learning_rate": 9.281738594122933e-06, - "loss": 0.3846, - "step": 8689 - }, - { - "epoch": 0.5679367361610352, - "grad_norm": 0.4574970304965973, - "learning_rate": 9.281558262088753e-06, - "loss": 0.381, - "step": 8690 - }, - { - "epoch": 0.5680020913665773, - "grad_norm": 0.457764208316803, - "learning_rate": 9.281377909171927e-06, - "loss": 0.3929, - "step": 8691 - }, - { - "epoch": 0.5680674465721195, - "grad_norm": 0.39306744933128357, - "learning_rate": 9.281197535373341e-06, - "loss": 0.2826, - "step": 8692 - }, - { - "epoch": 0.5681328017776616, - "grad_norm": 0.4456539750099182, - "learning_rate": 9.28101714069387e-06, - "loss": 0.3812, - "step": 8693 - }, - { - "epoch": 0.5681981569832038, - "grad_norm": 0.4824658930301666, - "learning_rate": 9.280836725134396e-06, - "loss": 0.4175, - "step": 8694 - }, - { - "epoch": 0.5682635121887458, - "grad_norm": 0.43890756368637085, - "learning_rate": 9.280656288695797e-06, - "loss": 0.357, - "step": 8695 - }, - { - "epoch": 0.568328867394288, - "grad_norm": 0.4513535797595978, - "learning_rate": 9.280475831378957e-06, - "loss": 0.3599, - "step": 8696 - }, - { - "epoch": 0.5683942225998301, - "grad_norm": 0.4885607063770294, - "learning_rate": 9.280295353184751e-06, - "loss": 0.414, - "step": 8697 - }, - { - "epoch": 0.5684595778053722, - "grad_norm": 0.4631774127483368, - "learning_rate": 9.280114854114062e-06, - "loss": 0.3978, - "step": 8698 - }, - { - "epoch": 0.5685249330109143, - "grad_norm": 0.4900452494621277, - "learning_rate": 9.279934334167771e-06, - "loss": 0.4221, - "step": 8699 - }, - { - "epoch": 0.5685902882164564, - "grad_norm": 0.47625797986984253, - "learning_rate": 9.279753793346758e-06, - "loss": 0.4086, - "step": 8700 - }, - { - "epoch": 0.5686556434219986, - "grad_norm": 0.46046555042266846, - "learning_rate": 9.279573231651901e-06, - "loss": 0.3931, - "step": 8701 - }, - { - "epoch": 0.5687209986275407, - "grad_norm": 0.4615381956100464, - "learning_rate": 9.279392649084084e-06, - "loss": 0.4088, - "step": 8702 - }, - { - "epoch": 0.5687863538330828, - "grad_norm": 0.4618852138519287, - "learning_rate": 9.279212045644185e-06, - "loss": 0.4472, - "step": 8703 - }, - { - "epoch": 0.5688517090386249, - "grad_norm": 0.45024463534355164, - "learning_rate": 9.279031421333089e-06, - "loss": 0.3928, - "step": 8704 - }, - { - "epoch": 0.5689170642441671, - "grad_norm": 0.45709168910980225, - "learning_rate": 9.278850776151673e-06, - "loss": 0.4237, - "step": 8705 - }, - { - "epoch": 0.5689824194497092, - "grad_norm": 0.44051453471183777, - "learning_rate": 9.27867011010082e-06, - "loss": 0.3821, - "step": 8706 - }, - { - "epoch": 0.5690477746552512, - "grad_norm": 0.4611460566520691, - "learning_rate": 9.278489423181412e-06, - "loss": 0.3872, - "step": 8707 - }, - { - "epoch": 0.5691131298607934, - "grad_norm": 0.44791755080223083, - "learning_rate": 9.278308715394326e-06, - "loss": 0.3552, - "step": 8708 - }, - { - "epoch": 0.5691784850663355, - "grad_norm": 0.4372178018093109, - "learning_rate": 9.278127986740448e-06, - "loss": 0.3553, - "step": 8709 - }, - { - "epoch": 0.5692438402718777, - "grad_norm": 0.4763945937156677, - "learning_rate": 9.277947237220657e-06, - "loss": 0.4066, - "step": 8710 - }, - { - "epoch": 0.5693091954774198, - "grad_norm": 0.4812767207622528, - "learning_rate": 9.277766466835836e-06, - "loss": 0.3918, - "step": 8711 - }, - { - "epoch": 0.5693745506829619, - "grad_norm": 0.44954633712768555, - "learning_rate": 9.277585675586865e-06, - "loss": 0.3622, - "step": 8712 - }, - { - "epoch": 0.569439905888504, - "grad_norm": 0.465044766664505, - "learning_rate": 9.277404863474625e-06, - "loss": 0.3895, - "step": 8713 - }, - { - "epoch": 0.5695052610940461, - "grad_norm": 0.469230979681015, - "learning_rate": 9.277224030500004e-06, - "loss": 0.4119, - "step": 8714 - }, - { - "epoch": 0.5695706162995883, - "grad_norm": 0.4770100712776184, - "learning_rate": 9.277043176663877e-06, - "loss": 0.4144, - "step": 8715 - }, - { - "epoch": 0.5696359715051303, - "grad_norm": 0.42255279421806335, - "learning_rate": 9.276862301967129e-06, - "loss": 0.3563, - "step": 8716 - }, - { - "epoch": 0.5697013267106725, - "grad_norm": 0.45624223351478577, - "learning_rate": 9.276681406410642e-06, - "loss": 0.4024, - "step": 8717 - }, - { - "epoch": 0.5697666819162146, - "grad_norm": 0.4329855442047119, - "learning_rate": 9.276500489995299e-06, - "loss": 0.3916, - "step": 8718 - }, - { - "epoch": 0.5698320371217568, - "grad_norm": 0.4552854299545288, - "learning_rate": 9.27631955272198e-06, - "loss": 0.3883, - "step": 8719 - }, - { - "epoch": 0.5698973923272989, - "grad_norm": 0.4230673313140869, - "learning_rate": 9.27613859459157e-06, - "loss": 0.3505, - "step": 8720 - }, - { - "epoch": 0.569962747532841, - "grad_norm": 0.4523487091064453, - "learning_rate": 9.275957615604952e-06, - "loss": 0.4033, - "step": 8721 - }, - { - "epoch": 0.5700281027383831, - "grad_norm": 0.4278339743614197, - "learning_rate": 9.275776615763004e-06, - "loss": 0.3723, - "step": 8722 - }, - { - "epoch": 0.5700934579439252, - "grad_norm": 0.44875356554985046, - "learning_rate": 9.275595595066615e-06, - "loss": 0.3686, - "step": 8723 - }, - { - "epoch": 0.5701588131494674, - "grad_norm": 0.44797366857528687, - "learning_rate": 9.275414553516664e-06, - "loss": 0.3539, - "step": 8724 - }, - { - "epoch": 0.5702241683550094, - "grad_norm": 0.456897109746933, - "learning_rate": 9.275233491114035e-06, - "loss": 0.3608, - "step": 8725 - }, - { - "epoch": 0.5702895235605516, - "grad_norm": 0.4676462411880493, - "learning_rate": 9.275052407859612e-06, - "loss": 0.4194, - "step": 8726 - }, - { - "epoch": 0.5703548787660937, - "grad_norm": 0.4540182948112488, - "learning_rate": 9.274871303754277e-06, - "loss": 0.3996, - "step": 8727 - }, - { - "epoch": 0.5704202339716359, - "grad_norm": 0.45552679896354675, - "learning_rate": 9.274690178798913e-06, - "loss": 0.4044, - "step": 8728 - }, - { - "epoch": 0.570485589177178, - "grad_norm": 0.42875367403030396, - "learning_rate": 9.274509032994405e-06, - "loss": 0.355, - "step": 8729 - }, - { - "epoch": 0.5705509443827201, - "grad_norm": 0.47671782970428467, - "learning_rate": 9.274327866341635e-06, - "loss": 0.423, - "step": 8730 - }, - { - "epoch": 0.5706162995882622, - "grad_norm": 0.4635762870311737, - "learning_rate": 9.274146678841488e-06, - "loss": 0.4069, - "step": 8731 - }, - { - "epoch": 0.5706816547938043, - "grad_norm": 0.4347746670246124, - "learning_rate": 9.273965470494846e-06, - "loss": 0.3863, - "step": 8732 - }, - { - "epoch": 0.5707470099993465, - "grad_norm": 0.44302892684936523, - "learning_rate": 9.273784241302594e-06, - "loss": 0.3877, - "step": 8733 - }, - { - "epoch": 0.5708123652048885, - "grad_norm": 0.4646381139755249, - "learning_rate": 9.273602991265617e-06, - "loss": 0.386, - "step": 8734 - }, - { - "epoch": 0.5708777204104307, - "grad_norm": 0.490407258272171, - "learning_rate": 9.273421720384797e-06, - "loss": 0.4026, - "step": 8735 - }, - { - "epoch": 0.5709430756159728, - "grad_norm": 0.4498681128025055, - "learning_rate": 9.273240428661017e-06, - "loss": 0.3773, - "step": 8736 - }, - { - "epoch": 0.571008430821515, - "grad_norm": 0.48290586471557617, - "learning_rate": 9.273059116095165e-06, - "loss": 0.409, - "step": 8737 - }, - { - "epoch": 0.571073786027057, - "grad_norm": 0.47024261951446533, - "learning_rate": 9.272877782688123e-06, - "loss": 0.4237, - "step": 8738 - }, - { - "epoch": 0.5711391412325991, - "grad_norm": 0.4955271780490875, - "learning_rate": 9.272696428440775e-06, - "loss": 0.4309, - "step": 8739 - }, - { - "epoch": 0.5712044964381413, - "grad_norm": 0.4528626501560211, - "learning_rate": 9.272515053354008e-06, - "loss": 0.3896, - "step": 8740 - }, - { - "epoch": 0.5712698516436834, - "grad_norm": 0.4436284005641937, - "learning_rate": 9.272333657428703e-06, - "loss": 0.377, - "step": 8741 - }, - { - "epoch": 0.5713352068492256, - "grad_norm": 0.45342323184013367, - "learning_rate": 9.272152240665748e-06, - "loss": 0.3724, - "step": 8742 - }, - { - "epoch": 0.5714005620547676, - "grad_norm": 0.47489410638809204, - "learning_rate": 9.271970803066027e-06, - "loss": 0.3683, - "step": 8743 - }, - { - "epoch": 0.5714659172603098, - "grad_norm": 0.4638662040233612, - "learning_rate": 9.271789344630421e-06, - "loss": 0.4109, - "step": 8744 - }, - { - "epoch": 0.5715312724658519, - "grad_norm": 0.4163479208946228, - "learning_rate": 9.271607865359822e-06, - "loss": 0.3541, - "step": 8745 - }, - { - "epoch": 0.5715966276713941, - "grad_norm": 0.4930134117603302, - "learning_rate": 9.27142636525511e-06, - "loss": 0.4605, - "step": 8746 - }, - { - "epoch": 0.5716619828769361, - "grad_norm": 0.4300340712070465, - "learning_rate": 9.271244844317174e-06, - "loss": 0.3688, - "step": 8747 - }, - { - "epoch": 0.5717273380824782, - "grad_norm": 0.43568021059036255, - "learning_rate": 9.271063302546895e-06, - "loss": 0.3679, - "step": 8748 - }, - { - "epoch": 0.5717926932880204, - "grad_norm": 0.46499136090278625, - "learning_rate": 9.270881739945161e-06, - "loss": 0.4136, - "step": 8749 - }, - { - "epoch": 0.5718580484935625, - "grad_norm": 0.4746057987213135, - "learning_rate": 9.270700156512857e-06, - "loss": 0.4093, - "step": 8750 - }, - { - "epoch": 0.5719234036991047, - "grad_norm": 0.418390691280365, - "learning_rate": 9.270518552250868e-06, - "loss": 0.348, - "step": 8751 - }, - { - "epoch": 0.5719887589046467, - "grad_norm": 0.46858885884284973, - "learning_rate": 9.270336927160084e-06, - "loss": 0.3771, - "step": 8752 - }, - { - "epoch": 0.5720541141101889, - "grad_norm": 0.44084256887435913, - "learning_rate": 9.270155281241385e-06, - "loss": 0.3901, - "step": 8753 - }, - { - "epoch": 0.572119469315731, - "grad_norm": 0.41746649146080017, - "learning_rate": 9.26997361449566e-06, - "loss": 0.3739, - "step": 8754 - }, - { - "epoch": 0.5721848245212732, - "grad_norm": 0.4664878249168396, - "learning_rate": 9.269791926923793e-06, - "loss": 0.4085, - "step": 8755 - }, - { - "epoch": 0.5722501797268152, - "grad_norm": 0.4303506016731262, - "learning_rate": 9.269610218526673e-06, - "loss": 0.3734, - "step": 8756 - }, - { - "epoch": 0.5723155349323573, - "grad_norm": 0.4895287752151489, - "learning_rate": 9.269428489305185e-06, - "loss": 0.4059, - "step": 8757 - }, - { - "epoch": 0.5723808901378995, - "grad_norm": 0.45096322894096375, - "learning_rate": 9.269246739260214e-06, - "loss": 0.3922, - "step": 8758 - }, - { - "epoch": 0.5724462453434416, - "grad_norm": 0.44489356875419617, - "learning_rate": 9.269064968392649e-06, - "loss": 0.3794, - "step": 8759 - }, - { - "epoch": 0.5725116005489838, - "grad_norm": 0.42755281925201416, - "learning_rate": 9.268883176703374e-06, - "loss": 0.3288, - "step": 8760 - }, - { - "epoch": 0.5725769557545258, - "grad_norm": 0.47941091656684875, - "learning_rate": 9.268701364193277e-06, - "loss": 0.3859, - "step": 8761 - }, - { - "epoch": 0.572642310960068, - "grad_norm": 0.464324414730072, - "learning_rate": 9.268519530863244e-06, - "loss": 0.3824, - "step": 8762 - }, - { - "epoch": 0.5727076661656101, - "grad_norm": 0.4192202389240265, - "learning_rate": 9.268337676714165e-06, - "loss": 0.3234, - "step": 8763 - }, - { - "epoch": 0.5727730213711522, - "grad_norm": 0.4357430934906006, - "learning_rate": 9.268155801746923e-06, - "loss": 0.3375, - "step": 8764 - }, - { - "epoch": 0.5728383765766943, - "grad_norm": 0.5227752923965454, - "learning_rate": 9.267973905962406e-06, - "loss": 0.4523, - "step": 8765 - }, - { - "epoch": 0.5729037317822364, - "grad_norm": 0.44509416818618774, - "learning_rate": 9.267791989361501e-06, - "loss": 0.3794, - "step": 8766 - }, - { - "epoch": 0.5729690869877786, - "grad_norm": 0.4422967731952667, - "learning_rate": 9.267610051945097e-06, - "loss": 0.3316, - "step": 8767 - }, - { - "epoch": 0.5730344421933207, - "grad_norm": 0.452650785446167, - "learning_rate": 9.26742809371408e-06, - "loss": 0.3588, - "step": 8768 - }, - { - "epoch": 0.5730997973988629, - "grad_norm": 0.4376973807811737, - "learning_rate": 9.26724611466934e-06, - "loss": 0.3946, - "step": 8769 - }, - { - "epoch": 0.5731651526044049, - "grad_norm": 0.4528738558292389, - "learning_rate": 9.26706411481176e-06, - "loss": 0.4099, - "step": 8770 - }, - { - "epoch": 0.5732305078099471, - "grad_norm": 0.4566257894039154, - "learning_rate": 9.266882094142232e-06, - "loss": 0.3689, - "step": 8771 - }, - { - "epoch": 0.5732958630154892, - "grad_norm": 0.49401339888572693, - "learning_rate": 9.26670005266164e-06, - "loss": 0.4464, - "step": 8772 - }, - { - "epoch": 0.5733612182210313, - "grad_norm": 0.4841618537902832, - "learning_rate": 9.266517990370873e-06, - "loss": 0.4405, - "step": 8773 - }, - { - "epoch": 0.5734265734265734, - "grad_norm": 0.42835187911987305, - "learning_rate": 9.26633590727082e-06, - "loss": 0.341, - "step": 8774 - }, - { - "epoch": 0.5734919286321155, - "grad_norm": 0.42060476541519165, - "learning_rate": 9.26615380336237e-06, - "loss": 0.3694, - "step": 8775 - }, - { - "epoch": 0.5735572838376577, - "grad_norm": 0.44720593094825745, - "learning_rate": 9.26597167864641e-06, - "loss": 0.3982, - "step": 8776 - }, - { - "epoch": 0.5736226390431998, - "grad_norm": 0.4220096468925476, - "learning_rate": 9.265789533123828e-06, - "loss": 0.3518, - "step": 8777 - }, - { - "epoch": 0.573687994248742, - "grad_norm": 0.5278857350349426, - "learning_rate": 9.265607366795513e-06, - "loss": 0.4637, - "step": 8778 - }, - { - "epoch": 0.573753349454284, - "grad_norm": 0.4478168487548828, - "learning_rate": 9.265425179662354e-06, - "loss": 0.3902, - "step": 8779 - }, - { - "epoch": 0.5738187046598262, - "grad_norm": 0.44495293498039246, - "learning_rate": 9.265242971725235e-06, - "loss": 0.3646, - "step": 8780 - }, - { - "epoch": 0.5738840598653683, - "grad_norm": 0.45817679166793823, - "learning_rate": 9.265060742985052e-06, - "loss": 0.4093, - "step": 8781 - }, - { - "epoch": 0.5739494150709104, - "grad_norm": 0.45591941475868225, - "learning_rate": 9.264878493442689e-06, - "loss": 0.3696, - "step": 8782 - }, - { - "epoch": 0.5740147702764525, - "grad_norm": 0.4401930570602417, - "learning_rate": 9.264696223099036e-06, - "loss": 0.3476, - "step": 8783 - }, - { - "epoch": 0.5740801254819946, - "grad_norm": 0.44114094972610474, - "learning_rate": 9.264513931954981e-06, - "loss": 0.3577, - "step": 8784 - }, - { - "epoch": 0.5741454806875368, - "grad_norm": 0.4722091555595398, - "learning_rate": 9.264331620011416e-06, - "loss": 0.3775, - "step": 8785 - }, - { - "epoch": 0.5742108358930789, - "grad_norm": 0.4401685297489166, - "learning_rate": 9.264149287269229e-06, - "loss": 0.3753, - "step": 8786 - }, - { - "epoch": 0.574276191098621, - "grad_norm": 0.47649145126342773, - "learning_rate": 9.263966933729307e-06, - "loss": 0.4598, - "step": 8787 - }, - { - "epoch": 0.5743415463041631, - "grad_norm": 0.5930619239807129, - "learning_rate": 9.263784559392541e-06, - "loss": 0.4158, - "step": 8788 - }, - { - "epoch": 0.5744069015097053, - "grad_norm": 0.5872689485549927, - "learning_rate": 9.263602164259822e-06, - "loss": 0.3999, - "step": 8789 - }, - { - "epoch": 0.5744722567152474, - "grad_norm": 0.43598517775535583, - "learning_rate": 9.263419748332037e-06, - "loss": 0.3516, - "step": 8790 - }, - { - "epoch": 0.5745376119207894, - "grad_norm": 0.4877925217151642, - "learning_rate": 9.263237311610076e-06, - "loss": 0.4097, - "step": 8791 - }, - { - "epoch": 0.5746029671263316, - "grad_norm": 0.4351055920124054, - "learning_rate": 9.26305485409483e-06, - "loss": 0.3431, - "step": 8792 - }, - { - "epoch": 0.5746683223318737, - "grad_norm": 0.48612111806869507, - "learning_rate": 9.262872375787191e-06, - "loss": 0.4377, - "step": 8793 - }, - { - "epoch": 0.5747336775374159, - "grad_norm": 0.4468783140182495, - "learning_rate": 9.262689876688045e-06, - "loss": 0.407, - "step": 8794 - }, - { - "epoch": 0.574799032742958, - "grad_norm": 0.465574711561203, - "learning_rate": 9.262507356798284e-06, - "loss": 0.4172, - "step": 8795 - }, - { - "epoch": 0.5748643879485001, - "grad_norm": 0.44357535243034363, - "learning_rate": 9.262324816118798e-06, - "loss": 0.3849, - "step": 8796 - }, - { - "epoch": 0.5749297431540422, - "grad_norm": 0.46302565932273865, - "learning_rate": 9.262142254650476e-06, - "loss": 0.3913, - "step": 8797 - }, - { - "epoch": 0.5749950983595843, - "grad_norm": 0.44863948225975037, - "learning_rate": 9.261959672394212e-06, - "loss": 0.3917, - "step": 8798 - }, - { - "epoch": 0.5750604535651265, - "grad_norm": 0.4717997908592224, - "learning_rate": 9.261777069350892e-06, - "loss": 0.3964, - "step": 8799 - }, - { - "epoch": 0.5751258087706685, - "grad_norm": 0.4403414726257324, - "learning_rate": 9.26159444552141e-06, - "loss": 0.347, - "step": 8800 - }, - { - "epoch": 0.5751911639762107, - "grad_norm": 0.4735489785671234, - "learning_rate": 9.261411800906656e-06, - "loss": 0.3702, - "step": 8801 - }, - { - "epoch": 0.5752565191817528, - "grad_norm": 0.468436062335968, - "learning_rate": 9.261229135507519e-06, - "loss": 0.3949, - "step": 8802 - }, - { - "epoch": 0.575321874387295, - "grad_norm": 0.4591672122478485, - "learning_rate": 9.261046449324892e-06, - "loss": 0.3921, - "step": 8803 - }, - { - "epoch": 0.5753872295928371, - "grad_norm": 0.5436349511146545, - "learning_rate": 9.260863742359665e-06, - "loss": 0.3809, - "step": 8804 - }, - { - "epoch": 0.5754525847983792, - "grad_norm": 0.45340773463249207, - "learning_rate": 9.260681014612728e-06, - "loss": 0.4302, - "step": 8805 - }, - { - "epoch": 0.5755179400039213, - "grad_norm": 0.47672516107559204, - "learning_rate": 9.260498266084977e-06, - "loss": 0.4347, - "step": 8806 - }, - { - "epoch": 0.5755832952094634, - "grad_norm": 0.4370521605014801, - "learning_rate": 9.260315496777298e-06, - "loss": 0.3618, - "step": 8807 - }, - { - "epoch": 0.5756486504150056, - "grad_norm": 0.4761723279953003, - "learning_rate": 9.260132706690586e-06, - "loss": 0.3582, - "step": 8808 - }, - { - "epoch": 0.5757140056205476, - "grad_norm": 0.44726479053497314, - "learning_rate": 9.259949895825728e-06, - "loss": 0.3629, - "step": 8809 - }, - { - "epoch": 0.5757793608260898, - "grad_norm": 0.4513629972934723, - "learning_rate": 9.25976706418362e-06, - "loss": 0.3572, - "step": 8810 - }, - { - "epoch": 0.5758447160316319, - "grad_norm": 0.4882752299308777, - "learning_rate": 9.259584211765153e-06, - "loss": 0.4025, - "step": 8811 - }, - { - "epoch": 0.5759100712371741, - "grad_norm": 0.5161880254745483, - "learning_rate": 9.259401338571216e-06, - "loss": 0.4447, - "step": 8812 - }, - { - "epoch": 0.5759754264427162, - "grad_norm": 0.46421363949775696, - "learning_rate": 9.259218444602707e-06, - "loss": 0.3547, - "step": 8813 - }, - { - "epoch": 0.5760407816482583, - "grad_norm": 0.44217050075531006, - "learning_rate": 9.259035529860511e-06, - "loss": 0.3898, - "step": 8814 - }, - { - "epoch": 0.5761061368538004, - "grad_norm": 0.44459763169288635, - "learning_rate": 9.258852594345524e-06, - "loss": 0.3863, - "step": 8815 - }, - { - "epoch": 0.5761714920593425, - "grad_norm": 0.4126439392566681, - "learning_rate": 9.258669638058637e-06, - "loss": 0.3076, - "step": 8816 - }, - { - "epoch": 0.5762368472648847, - "grad_norm": 0.45681625604629517, - "learning_rate": 9.258486661000744e-06, - "loss": 0.3973, - "step": 8817 - }, - { - "epoch": 0.5763022024704267, - "grad_norm": 0.4271981418132782, - "learning_rate": 9.258303663172736e-06, - "loss": 0.3545, - "step": 8818 - }, - { - "epoch": 0.5763675576759689, - "grad_norm": 0.451896071434021, - "learning_rate": 9.258120644575505e-06, - "loss": 0.3967, - "step": 8819 - }, - { - "epoch": 0.576432912881511, - "grad_norm": 0.4758460223674774, - "learning_rate": 9.257937605209946e-06, - "loss": 0.4104, - "step": 8820 - }, - { - "epoch": 0.5764982680870532, - "grad_norm": 0.47071176767349243, - "learning_rate": 9.257754545076947e-06, - "loss": 0.3605, - "step": 8821 - }, - { - "epoch": 0.5765636232925953, - "grad_norm": 0.525622546672821, - "learning_rate": 9.257571464177408e-06, - "loss": 0.3436, - "step": 8822 - }, - { - "epoch": 0.5766289784981373, - "grad_norm": 0.5055347681045532, - "learning_rate": 9.257388362512215e-06, - "loss": 0.4054, - "step": 8823 - }, - { - "epoch": 0.5766943337036795, - "grad_norm": 0.46902206540107727, - "learning_rate": 9.257205240082265e-06, - "loss": 0.3875, - "step": 8824 - }, - { - "epoch": 0.5767596889092216, - "grad_norm": 0.44883987307548523, - "learning_rate": 9.257022096888451e-06, - "loss": 0.3924, - "step": 8825 - }, - { - "epoch": 0.5768250441147638, - "grad_norm": 0.46128007769584656, - "learning_rate": 9.256838932931664e-06, - "loss": 0.4266, - "step": 8826 - }, - { - "epoch": 0.5768903993203058, - "grad_norm": 0.4396490156650543, - "learning_rate": 9.2566557482128e-06, - "loss": 0.3625, - "step": 8827 - }, - { - "epoch": 0.576955754525848, - "grad_norm": 0.44678547978401184, - "learning_rate": 9.256472542732751e-06, - "loss": 0.3454, - "step": 8828 - }, - { - "epoch": 0.5770211097313901, - "grad_norm": 0.4691782295703888, - "learning_rate": 9.25628931649241e-06, - "loss": 0.3752, - "step": 8829 - }, - { - "epoch": 0.5770864649369323, - "grad_norm": 0.45704370737075806, - "learning_rate": 9.256106069492674e-06, - "loss": 0.4151, - "step": 8830 - }, - { - "epoch": 0.5771518201424743, - "grad_norm": 0.44608741998672485, - "learning_rate": 9.255922801734432e-06, - "loss": 0.3936, - "step": 8831 - }, - { - "epoch": 0.5772171753480164, - "grad_norm": 0.450510710477829, - "learning_rate": 9.255739513218581e-06, - "loss": 0.426, - "step": 8832 - }, - { - "epoch": 0.5772825305535586, - "grad_norm": 0.4668146073818207, - "learning_rate": 9.255556203946013e-06, - "loss": 0.4163, - "step": 8833 - }, - { - "epoch": 0.5773478857591007, - "grad_norm": 0.4742022752761841, - "learning_rate": 9.255372873917624e-06, - "loss": 0.405, - "step": 8834 - }, - { - "epoch": 0.5774132409646429, - "grad_norm": 0.41478219628334045, - "learning_rate": 9.255189523134307e-06, - "loss": 0.3562, - "step": 8835 - }, - { - "epoch": 0.5774785961701849, - "grad_norm": 0.45639121532440186, - "learning_rate": 9.255006151596956e-06, - "loss": 0.4122, - "step": 8836 - }, - { - "epoch": 0.5775439513757271, - "grad_norm": 0.48286187648773193, - "learning_rate": 9.254822759306468e-06, - "loss": 0.4057, - "step": 8837 - }, - { - "epoch": 0.5776093065812692, - "grad_norm": 0.4757295548915863, - "learning_rate": 9.254639346263734e-06, - "loss": 0.3853, - "step": 8838 - }, - { - "epoch": 0.5776746617868114, - "grad_norm": 0.45264384150505066, - "learning_rate": 9.25445591246965e-06, - "loss": 0.3896, - "step": 8839 - }, - { - "epoch": 0.5777400169923534, - "grad_norm": 0.44126197695732117, - "learning_rate": 9.25427245792511e-06, - "loss": 0.3699, - "step": 8840 - }, - { - "epoch": 0.5778053721978955, - "grad_norm": 0.4655766785144806, - "learning_rate": 9.25408898263101e-06, - "loss": 0.383, - "step": 8841 - }, - { - "epoch": 0.5778707274034377, - "grad_norm": 0.4766494631767273, - "learning_rate": 9.253905486588244e-06, - "loss": 0.4421, - "step": 8842 - }, - { - "epoch": 0.5779360826089798, - "grad_norm": 0.4508941173553467, - "learning_rate": 9.253721969797708e-06, - "loss": 0.3814, - "step": 8843 - }, - { - "epoch": 0.578001437814522, - "grad_norm": 0.4765235483646393, - "learning_rate": 9.253538432260296e-06, - "loss": 0.3952, - "step": 8844 - }, - { - "epoch": 0.578066793020064, - "grad_norm": 0.4527175724506378, - "learning_rate": 9.253354873976904e-06, - "loss": 0.3758, - "step": 8845 - }, - { - "epoch": 0.5781321482256062, - "grad_norm": 0.4632437527179718, - "learning_rate": 9.253171294948425e-06, - "loss": 0.3519, - "step": 8846 - }, - { - "epoch": 0.5781975034311483, - "grad_norm": 0.43092381954193115, - "learning_rate": 9.252987695175756e-06, - "loss": 0.3487, - "step": 8847 - }, - { - "epoch": 0.5782628586366904, - "grad_norm": 0.4423348903656006, - "learning_rate": 9.252804074659796e-06, - "loss": 0.4189, - "step": 8848 - }, - { - "epoch": 0.5783282138422325, - "grad_norm": 0.4612898826599121, - "learning_rate": 9.252620433401434e-06, - "loss": 0.4038, - "step": 8849 - }, - { - "epoch": 0.5783935690477746, - "grad_norm": 0.45391908288002014, - "learning_rate": 9.25243677140157e-06, - "loss": 0.3802, - "step": 8850 - }, - { - "epoch": 0.5784589242533168, - "grad_norm": 0.49826616048812866, - "learning_rate": 9.252253088661098e-06, - "loss": 0.4735, - "step": 8851 - }, - { - "epoch": 0.5785242794588589, - "grad_norm": 0.44494590163230896, - "learning_rate": 9.252069385180917e-06, - "loss": 0.3881, - "step": 8852 - }, - { - "epoch": 0.578589634664401, - "grad_norm": 0.40570759773254395, - "learning_rate": 9.251885660961917e-06, - "loss": 0.3431, - "step": 8853 - }, - { - "epoch": 0.5786549898699431, - "grad_norm": 0.4479600787162781, - "learning_rate": 9.251701916005e-06, - "loss": 0.3886, - "step": 8854 - }, - { - "epoch": 0.5787203450754853, - "grad_norm": 0.44045430421829224, - "learning_rate": 9.251518150311059e-06, - "loss": 0.3906, - "step": 8855 - }, - { - "epoch": 0.5787857002810274, - "grad_norm": 0.46660858392715454, - "learning_rate": 9.251334363880992e-06, - "loss": 0.3719, - "step": 8856 - }, - { - "epoch": 0.5788510554865695, - "grad_norm": 0.49139517545700073, - "learning_rate": 9.251150556715693e-06, - "loss": 0.3944, - "step": 8857 - }, - { - "epoch": 0.5789164106921116, - "grad_norm": 0.44801798462867737, - "learning_rate": 9.250966728816062e-06, - "loss": 0.3677, - "step": 8858 - }, - { - "epoch": 0.5789817658976537, - "grad_norm": 0.48235008120536804, - "learning_rate": 9.250782880182991e-06, - "loss": 0.4145, - "step": 8859 - }, - { - "epoch": 0.5790471211031959, - "grad_norm": 0.4536688029766083, - "learning_rate": 9.250599010817381e-06, - "loss": 0.4246, - "step": 8860 - }, - { - "epoch": 0.579112476308738, - "grad_norm": 0.4723179042339325, - "learning_rate": 9.250415120720128e-06, - "loss": 0.4068, - "step": 8861 - }, - { - "epoch": 0.5791778315142802, - "grad_norm": 0.44327279925346375, - "learning_rate": 9.250231209892126e-06, - "loss": 0.3807, - "step": 8862 - }, - { - "epoch": 0.5792431867198222, - "grad_norm": 0.45332780480384827, - "learning_rate": 9.250047278334276e-06, - "loss": 0.3733, - "step": 8863 - }, - { - "epoch": 0.5793085419253644, - "grad_norm": 0.44817081093788147, - "learning_rate": 9.24986332604747e-06, - "loss": 0.3986, - "step": 8864 - }, - { - "epoch": 0.5793738971309065, - "grad_norm": 0.4547162652015686, - "learning_rate": 9.24967935303261e-06, - "loss": 0.3977, - "step": 8865 - }, - { - "epoch": 0.5794392523364486, - "grad_norm": 0.48845168948173523, - "learning_rate": 9.249495359290592e-06, - "loss": 0.5043, - "step": 8866 - }, - { - "epoch": 0.5795046075419907, - "grad_norm": 0.4500035345554352, - "learning_rate": 9.249311344822313e-06, - "loss": 0.3944, - "step": 8867 - }, - { - "epoch": 0.5795699627475328, - "grad_norm": 0.47348421812057495, - "learning_rate": 9.24912730962867e-06, - "loss": 0.4127, - "step": 8868 - }, - { - "epoch": 0.579635317953075, - "grad_norm": 0.45496344566345215, - "learning_rate": 9.248943253710562e-06, - "loss": 0.4082, - "step": 8869 - }, - { - "epoch": 0.5797006731586171, - "grad_norm": 0.4660952091217041, - "learning_rate": 9.248759177068885e-06, - "loss": 0.3981, - "step": 8870 - }, - { - "epoch": 0.5797660283641592, - "grad_norm": 0.4442198872566223, - "learning_rate": 9.248575079704536e-06, - "loss": 0.38, - "step": 8871 - }, - { - "epoch": 0.5798313835697013, - "grad_norm": 0.46022218465805054, - "learning_rate": 9.248390961618416e-06, - "loss": 0.3583, - "step": 8872 - }, - { - "epoch": 0.5798967387752435, - "grad_norm": 0.5202801823616028, - "learning_rate": 9.248206822811422e-06, - "loss": 0.478, - "step": 8873 - }, - { - "epoch": 0.5799620939807856, - "grad_norm": 0.48663321137428284, - "learning_rate": 9.24802266328445e-06, - "loss": 0.4302, - "step": 8874 - }, - { - "epoch": 0.5800274491863276, - "grad_norm": 0.46237891912460327, - "learning_rate": 9.247838483038403e-06, - "loss": 0.4005, - "step": 8875 - }, - { - "epoch": 0.5800928043918698, - "grad_norm": 0.4653884470462799, - "learning_rate": 9.247654282074173e-06, - "loss": 0.3756, - "step": 8876 - }, - { - "epoch": 0.5801581595974119, - "grad_norm": 0.4650568962097168, - "learning_rate": 9.247470060392662e-06, - "loss": 0.4372, - "step": 8877 - }, - { - "epoch": 0.5802235148029541, - "grad_norm": 0.43710896372795105, - "learning_rate": 9.247285817994768e-06, - "loss": 0.3749, - "step": 8878 - }, - { - "epoch": 0.5802888700084962, - "grad_norm": 0.4592326581478119, - "learning_rate": 9.247101554881393e-06, - "loss": 0.4226, - "step": 8879 - }, - { - "epoch": 0.5803542252140383, - "grad_norm": 0.4464440643787384, - "learning_rate": 9.246917271053427e-06, - "loss": 0.3805, - "step": 8880 - }, - { - "epoch": 0.5804195804195804, - "grad_norm": 0.4540195167064667, - "learning_rate": 9.246732966511778e-06, - "loss": 0.3794, - "step": 8881 - }, - { - "epoch": 0.5804849356251225, - "grad_norm": 0.4573756754398346, - "learning_rate": 9.24654864125734e-06, - "loss": 0.3881, - "step": 8882 - }, - { - "epoch": 0.5805502908306647, - "grad_norm": 0.4530811309814453, - "learning_rate": 9.246364295291013e-06, - "loss": 0.3868, - "step": 8883 - }, - { - "epoch": 0.5806156460362067, - "grad_norm": 0.4787803888320923, - "learning_rate": 9.246179928613696e-06, - "loss": 0.3761, - "step": 8884 - }, - { - "epoch": 0.5806810012417489, - "grad_norm": 0.4541168510913849, - "learning_rate": 9.24599554122629e-06, - "loss": 0.3444, - "step": 8885 - }, - { - "epoch": 0.580746356447291, - "grad_norm": 0.4370727241039276, - "learning_rate": 9.245811133129692e-06, - "loss": 0.3431, - "step": 8886 - }, - { - "epoch": 0.5808117116528332, - "grad_norm": 0.4194653034210205, - "learning_rate": 9.245626704324802e-06, - "loss": 0.3466, - "step": 8887 - }, - { - "epoch": 0.5808770668583753, - "grad_norm": 0.4648381769657135, - "learning_rate": 9.24544225481252e-06, - "loss": 0.3866, - "step": 8888 - }, - { - "epoch": 0.5809424220639174, - "grad_norm": 0.4517500400543213, - "learning_rate": 9.245257784593744e-06, - "loss": 0.3616, - "step": 8889 - }, - { - "epoch": 0.5810077772694595, - "grad_norm": 0.45734766125679016, - "learning_rate": 9.245073293669376e-06, - "loss": 0.3746, - "step": 8890 - }, - { - "epoch": 0.5810731324750016, - "grad_norm": 0.4588703215122223, - "learning_rate": 9.244888782040315e-06, - "loss": 0.4092, - "step": 8891 - }, - { - "epoch": 0.5811384876805438, - "grad_norm": 0.46255338191986084, - "learning_rate": 9.24470424970746e-06, - "loss": 0.4092, - "step": 8892 - }, - { - "epoch": 0.5812038428860858, - "grad_norm": 0.47326725721359253, - "learning_rate": 9.244519696671712e-06, - "loss": 0.4156, - "step": 8893 - }, - { - "epoch": 0.581269198091628, - "grad_norm": 0.4809836745262146, - "learning_rate": 9.244335122933972e-06, - "loss": 0.4302, - "step": 8894 - }, - { - "epoch": 0.5813345532971701, - "grad_norm": 0.4386519491672516, - "learning_rate": 9.244150528495139e-06, - "loss": 0.3686, - "step": 8895 - }, - { - "epoch": 0.5813999085027123, - "grad_norm": 0.48370644450187683, - "learning_rate": 9.243965913356114e-06, - "loss": 0.4589, - "step": 8896 - }, - { - "epoch": 0.5814652637082544, - "grad_norm": 0.48877575993537903, - "learning_rate": 9.243781277517796e-06, - "loss": 0.4587, - "step": 8897 - }, - { - "epoch": 0.5815306189137965, - "grad_norm": 0.4940846860408783, - "learning_rate": 9.243596620981089e-06, - "loss": 0.4141, - "step": 8898 - }, - { - "epoch": 0.5815959741193386, - "grad_norm": 0.47367990016937256, - "learning_rate": 9.243411943746887e-06, - "loss": 0.4045, - "step": 8899 - }, - { - "epoch": 0.5816613293248807, - "grad_norm": 0.45567116141319275, - "learning_rate": 9.243227245816097e-06, - "loss": 0.3817, - "step": 8900 - }, - { - "epoch": 0.5817266845304229, - "grad_norm": 0.4673498272895813, - "learning_rate": 9.243042527189618e-06, - "loss": 0.4207, - "step": 8901 - }, - { - "epoch": 0.5817920397359649, - "grad_norm": 0.46828028559684753, - "learning_rate": 9.24285778786835e-06, - "loss": 0.3906, - "step": 8902 - }, - { - "epoch": 0.5818573949415071, - "grad_norm": 0.4705347716808319, - "learning_rate": 9.242673027853194e-06, - "loss": 0.3665, - "step": 8903 - }, - { - "epoch": 0.5819227501470492, - "grad_norm": 0.4391564428806305, - "learning_rate": 9.242488247145055e-06, - "loss": 0.3892, - "step": 8904 - }, - { - "epoch": 0.5819881053525914, - "grad_norm": 0.4890103340148926, - "learning_rate": 9.242303445744828e-06, - "loss": 0.4675, - "step": 8905 - }, - { - "epoch": 0.5820534605581335, - "grad_norm": 0.4659532606601715, - "learning_rate": 9.242118623653418e-06, - "loss": 0.4274, - "step": 8906 - }, - { - "epoch": 0.5821188157636755, - "grad_norm": 0.4810665547847748, - "learning_rate": 9.241933780871726e-06, - "loss": 0.3743, - "step": 8907 - }, - { - "epoch": 0.5821841709692177, - "grad_norm": 0.4475446343421936, - "learning_rate": 9.241748917400654e-06, - "loss": 0.3464, - "step": 8908 - }, - { - "epoch": 0.5822495261747598, - "grad_norm": 0.4875513017177582, - "learning_rate": 9.241564033241105e-06, - "loss": 0.3873, - "step": 8909 - }, - { - "epoch": 0.582314881380302, - "grad_norm": 0.4478124976158142, - "learning_rate": 9.241379128393974e-06, - "loss": 0.3583, - "step": 8910 - }, - { - "epoch": 0.582380236585844, - "grad_norm": 0.4584704041481018, - "learning_rate": 9.24119420286017e-06, - "loss": 0.3975, - "step": 8911 - }, - { - "epoch": 0.5824455917913862, - "grad_norm": 0.42169299721717834, - "learning_rate": 9.241009256640595e-06, - "loss": 0.345, - "step": 8912 - }, - { - "epoch": 0.5825109469969283, - "grad_norm": 0.4419157803058624, - "learning_rate": 9.240824289736146e-06, - "loss": 0.3586, - "step": 8913 - }, - { - "epoch": 0.5825763022024705, - "grad_norm": 0.45704102516174316, - "learning_rate": 9.240639302147727e-06, - "loss": 0.3507, - "step": 8914 - }, - { - "epoch": 0.5826416574080125, - "grad_norm": 0.41148611903190613, - "learning_rate": 9.240454293876243e-06, - "loss": 0.3338, - "step": 8915 - }, - { - "epoch": 0.5827070126135546, - "grad_norm": 0.4673536717891693, - "learning_rate": 9.240269264922592e-06, - "loss": 0.3775, - "step": 8916 - }, - { - "epoch": 0.5827723678190968, - "grad_norm": 0.4489016532897949, - "learning_rate": 9.240084215287682e-06, - "loss": 0.3975, - "step": 8917 - }, - { - "epoch": 0.5828377230246389, - "grad_norm": 0.453816682100296, - "learning_rate": 9.23989914497241e-06, - "loss": 0.4052, - "step": 8918 - }, - { - "epoch": 0.5829030782301811, - "grad_norm": 0.43146708607673645, - "learning_rate": 9.239714053977682e-06, - "loss": 0.331, - "step": 8919 - }, - { - "epoch": 0.5829684334357231, - "grad_norm": 0.45014113187789917, - "learning_rate": 9.2395289423044e-06, - "loss": 0.3412, - "step": 8920 - }, - { - "epoch": 0.5830337886412653, - "grad_norm": 0.4720291495323181, - "learning_rate": 9.239343809953464e-06, - "loss": 0.364, - "step": 8921 - }, - { - "epoch": 0.5830991438468074, - "grad_norm": 0.45932865142822266, - "learning_rate": 9.239158656925784e-06, - "loss": 0.3985, - "step": 8922 - }, - { - "epoch": 0.5831644990523496, - "grad_norm": 0.4266372323036194, - "learning_rate": 9.238973483222256e-06, - "loss": 0.3164, - "step": 8923 - }, - { - "epoch": 0.5832298542578916, - "grad_norm": 0.43437832593917847, - "learning_rate": 9.238788288843786e-06, - "loss": 0.3351, - "step": 8924 - }, - { - "epoch": 0.5832952094634337, - "grad_norm": 0.4799064099788666, - "learning_rate": 9.238603073791277e-06, - "loss": 0.3891, - "step": 8925 - }, - { - "epoch": 0.5833605646689759, - "grad_norm": 0.47381865978240967, - "learning_rate": 9.238417838065632e-06, - "loss": 0.3977, - "step": 8926 - }, - { - "epoch": 0.583425919874518, - "grad_norm": 0.45300525426864624, - "learning_rate": 9.238232581667757e-06, - "loss": 0.3913, - "step": 8927 - }, - { - "epoch": 0.5834912750800602, - "grad_norm": 0.4447530210018158, - "learning_rate": 9.23804730459855e-06, - "loss": 0.3714, - "step": 8928 - }, - { - "epoch": 0.5835566302856022, - "grad_norm": 0.452631413936615, - "learning_rate": 9.237862006858922e-06, - "loss": 0.4009, - "step": 8929 - }, - { - "epoch": 0.5836219854911444, - "grad_norm": 0.49760764837265015, - "learning_rate": 9.237676688449771e-06, - "loss": 0.3932, - "step": 8930 - }, - { - "epoch": 0.5836873406966865, - "grad_norm": 0.4606693387031555, - "learning_rate": 9.237491349372002e-06, - "loss": 0.3827, - "step": 8931 - }, - { - "epoch": 0.5837526959022286, - "grad_norm": 0.4380769729614258, - "learning_rate": 9.23730598962652e-06, - "loss": 0.3647, - "step": 8932 - }, - { - "epoch": 0.5838180511077707, - "grad_norm": 0.5160754919052124, - "learning_rate": 9.237120609214229e-06, - "loss": 0.4451, - "step": 8933 - }, - { - "epoch": 0.5838834063133128, - "grad_norm": 0.4520348310470581, - "learning_rate": 9.236935208136033e-06, - "loss": 0.4205, - "step": 8934 - }, - { - "epoch": 0.583948761518855, - "grad_norm": 0.452730268239975, - "learning_rate": 9.236749786392835e-06, - "loss": 0.4088, - "step": 8935 - }, - { - "epoch": 0.5840141167243971, - "grad_norm": 0.43783777952194214, - "learning_rate": 9.236564343985544e-06, - "loss": 0.3854, - "step": 8936 - }, - { - "epoch": 0.5840794719299393, - "grad_norm": 0.5229910016059875, - "learning_rate": 9.236378880915058e-06, - "loss": 0.4549, - "step": 8937 - }, - { - "epoch": 0.5841448271354813, - "grad_norm": 0.47226646542549133, - "learning_rate": 9.236193397182284e-06, - "loss": 0.3156, - "step": 8938 - }, - { - "epoch": 0.5842101823410235, - "grad_norm": 0.46665552258491516, - "learning_rate": 9.236007892788129e-06, - "loss": 0.3517, - "step": 8939 - }, - { - "epoch": 0.5842755375465656, - "grad_norm": 0.44889697432518005, - "learning_rate": 9.235822367733495e-06, - "loss": 0.3474, - "step": 8940 - }, - { - "epoch": 0.5843408927521077, - "grad_norm": 0.46805232763290405, - "learning_rate": 9.235636822019289e-06, - "loss": 0.4556, - "step": 8941 - }, - { - "epoch": 0.5844062479576498, - "grad_norm": 0.457375168800354, - "learning_rate": 9.235451255646415e-06, - "loss": 0.4091, - "step": 8942 - }, - { - "epoch": 0.5844716031631919, - "grad_norm": 0.4257332980632782, - "learning_rate": 9.235265668615778e-06, - "loss": 0.3381, - "step": 8943 - }, - { - "epoch": 0.5845369583687341, - "grad_norm": 0.4659012258052826, - "learning_rate": 9.23508006092828e-06, - "loss": 0.3906, - "step": 8944 - }, - { - "epoch": 0.5846023135742762, - "grad_norm": 0.432725727558136, - "learning_rate": 9.234894432584833e-06, - "loss": 0.3374, - "step": 8945 - }, - { - "epoch": 0.5846676687798184, - "grad_norm": 0.4783468246459961, - "learning_rate": 9.234708783586337e-06, - "loss": 0.4302, - "step": 8946 - }, - { - "epoch": 0.5847330239853604, - "grad_norm": 0.45998015999794006, - "learning_rate": 9.2345231139337e-06, - "loss": 0.4149, - "step": 8947 - }, - { - "epoch": 0.5847983791909026, - "grad_norm": 0.4069378674030304, - "learning_rate": 9.234337423627826e-06, - "loss": 0.302, - "step": 8948 - }, - { - "epoch": 0.5848637343964447, - "grad_norm": 0.438897043466568, - "learning_rate": 9.234151712669622e-06, - "loss": 0.3593, - "step": 8949 - }, - { - "epoch": 0.5849290896019868, - "grad_norm": 0.4755827784538269, - "learning_rate": 9.233965981059993e-06, - "loss": 0.4013, - "step": 8950 - }, - { - "epoch": 0.5849944448075289, - "grad_norm": 0.42014962434768677, - "learning_rate": 9.233780228799847e-06, - "loss": 0.3524, - "step": 8951 - }, - { - "epoch": 0.585059800013071, - "grad_norm": 0.4726681113243103, - "learning_rate": 9.233594455890084e-06, - "loss": 0.4245, - "step": 8952 - }, - { - "epoch": 0.5851251552186132, - "grad_norm": 0.442443311214447, - "learning_rate": 9.233408662331618e-06, - "loss": 0.3687, - "step": 8953 - }, - { - "epoch": 0.5851905104241553, - "grad_norm": 0.4709050953388214, - "learning_rate": 9.233222848125349e-06, - "loss": 0.3857, - "step": 8954 - }, - { - "epoch": 0.5852558656296974, - "grad_norm": 0.5213170051574707, - "learning_rate": 9.233037013272189e-06, - "loss": 0.4629, - "step": 8955 - }, - { - "epoch": 0.5853212208352395, - "grad_norm": 0.44747892022132874, - "learning_rate": 9.232851157773038e-06, - "loss": 0.3957, - "step": 8956 - }, - { - "epoch": 0.5853865760407817, - "grad_norm": 0.44545143842697144, - "learning_rate": 9.232665281628805e-06, - "loss": 0.4221, - "step": 8957 - }, - { - "epoch": 0.5854519312463238, - "grad_norm": 0.49116387963294983, - "learning_rate": 9.232479384840398e-06, - "loss": 0.4635, - "step": 8958 - }, - { - "epoch": 0.5855172864518658, - "grad_norm": 0.4828069806098938, - "learning_rate": 9.232293467408724e-06, - "loss": 0.3756, - "step": 8959 - }, - { - "epoch": 0.585582641657408, - "grad_norm": 0.48043420910835266, - "learning_rate": 9.232107529334687e-06, - "loss": 0.4416, - "step": 8960 - }, - { - "epoch": 0.5856479968629501, - "grad_norm": 0.43332362174987793, - "learning_rate": 9.231921570619196e-06, - "loss": 0.3349, - "step": 8961 - }, - { - "epoch": 0.5857133520684923, - "grad_norm": 0.48748546838760376, - "learning_rate": 9.231735591263157e-06, - "loss": 0.4007, - "step": 8962 - }, - { - "epoch": 0.5857787072740344, - "grad_norm": 0.54531329870224, - "learning_rate": 9.231549591267478e-06, - "loss": 0.4761, - "step": 8963 - }, - { - "epoch": 0.5858440624795765, - "grad_norm": 0.47210320830345154, - "learning_rate": 9.231363570633066e-06, - "loss": 0.4098, - "step": 8964 - }, - { - "epoch": 0.5859094176851186, - "grad_norm": 0.44633516669273376, - "learning_rate": 9.231177529360826e-06, - "loss": 0.3824, - "step": 8965 - }, - { - "epoch": 0.5859747728906607, - "grad_norm": 0.4741937816143036, - "learning_rate": 9.23099146745167e-06, - "loss": 0.3827, - "step": 8966 - }, - { - "epoch": 0.5860401280962029, - "grad_norm": 0.4759604036808014, - "learning_rate": 9.230805384906501e-06, - "loss": 0.3943, - "step": 8967 - }, - { - "epoch": 0.586105483301745, - "grad_norm": 0.4685611128807068, - "learning_rate": 9.23061928172623e-06, - "loss": 0.4034, - "step": 8968 - }, - { - "epoch": 0.5861708385072871, - "grad_norm": 0.46542221307754517, - "learning_rate": 9.230433157911762e-06, - "loss": 0.3717, - "step": 8969 - }, - { - "epoch": 0.5862361937128292, - "grad_norm": 0.4728912115097046, - "learning_rate": 9.230247013464006e-06, - "loss": 0.4212, - "step": 8970 - }, - { - "epoch": 0.5863015489183714, - "grad_norm": 0.4357195198535919, - "learning_rate": 9.23006084838387e-06, - "loss": 0.3659, - "step": 8971 - }, - { - "epoch": 0.5863669041239135, - "grad_norm": 0.62079918384552, - "learning_rate": 9.229874662672262e-06, - "loss": 0.3539, - "step": 8972 - }, - { - "epoch": 0.5864322593294556, - "grad_norm": 0.4491276144981384, - "learning_rate": 9.22968845633009e-06, - "loss": 0.392, - "step": 8973 - }, - { - "epoch": 0.5864976145349977, - "grad_norm": 0.4587726294994354, - "learning_rate": 9.229502229358263e-06, - "loss": 0.3987, - "step": 8974 - }, - { - "epoch": 0.5865629697405398, - "grad_norm": 0.4528464674949646, - "learning_rate": 9.229315981757686e-06, - "loss": 0.3658, - "step": 8975 - }, - { - "epoch": 0.586628324946082, - "grad_norm": 0.4658888876438141, - "learning_rate": 9.229129713529271e-06, - "loss": 0.3783, - "step": 8976 - }, - { - "epoch": 0.586693680151624, - "grad_norm": 0.467284619808197, - "learning_rate": 9.228943424673925e-06, - "loss": 0.3779, - "step": 8977 - }, - { - "epoch": 0.5867590353571662, - "grad_norm": 0.4674723148345947, - "learning_rate": 9.228757115192559e-06, - "loss": 0.397, - "step": 8978 - }, - { - "epoch": 0.5868243905627083, - "grad_norm": 0.48817887902259827, - "learning_rate": 9.228570785086077e-06, - "loss": 0.4502, - "step": 8979 - }, - { - "epoch": 0.5868897457682505, - "grad_norm": 0.4686819612979889, - "learning_rate": 9.228384434355391e-06, - "loss": 0.3895, - "step": 8980 - }, - { - "epoch": 0.5869551009737926, - "grad_norm": 0.45721468329429626, - "learning_rate": 9.22819806300141e-06, - "loss": 0.4018, - "step": 8981 - }, - { - "epoch": 0.5870204561793347, - "grad_norm": 0.46017321944236755, - "learning_rate": 9.228011671025041e-06, - "loss": 0.3517, - "step": 8982 - }, - { - "epoch": 0.5870858113848768, - "grad_norm": 0.49878695607185364, - "learning_rate": 9.227825258427194e-06, - "loss": 0.4122, - "step": 8983 - }, - { - "epoch": 0.5871511665904189, - "grad_norm": 0.4482685327529907, - "learning_rate": 9.22763882520878e-06, - "loss": 0.3812, - "step": 8984 - }, - { - "epoch": 0.5872165217959611, - "grad_norm": 0.4409409761428833, - "learning_rate": 9.227452371370706e-06, - "loss": 0.3757, - "step": 8985 - }, - { - "epoch": 0.5872818770015031, - "grad_norm": 0.46814683079719543, - "learning_rate": 9.227265896913884e-06, - "loss": 0.3781, - "step": 8986 - }, - { - "epoch": 0.5873472322070453, - "grad_norm": 0.44779160618782043, - "learning_rate": 9.22707940183922e-06, - "loss": 0.375, - "step": 8987 - }, - { - "epoch": 0.5874125874125874, - "grad_norm": 0.4393285810947418, - "learning_rate": 9.226892886147625e-06, - "loss": 0.3697, - "step": 8988 - }, - { - "epoch": 0.5874779426181296, - "grad_norm": 0.4500323235988617, - "learning_rate": 9.22670634984001e-06, - "loss": 0.3948, - "step": 8989 - }, - { - "epoch": 0.5875432978236717, - "grad_norm": 0.4728008806705475, - "learning_rate": 9.226519792917284e-06, - "loss": 0.3592, - "step": 8990 - }, - { - "epoch": 0.5876086530292137, - "grad_norm": 0.45040658116340637, - "learning_rate": 9.226333215380357e-06, - "loss": 0.3843, - "step": 8991 - }, - { - "epoch": 0.5876740082347559, - "grad_norm": 0.47039368748664856, - "learning_rate": 9.226146617230138e-06, - "loss": 0.3772, - "step": 8992 - }, - { - "epoch": 0.587739363440298, - "grad_norm": 0.4496113359928131, - "learning_rate": 9.225959998467538e-06, - "loss": 0.3652, - "step": 8993 - }, - { - "epoch": 0.5878047186458402, - "grad_norm": 0.45141497254371643, - "learning_rate": 9.225773359093467e-06, - "loss": 0.3887, - "step": 8994 - }, - { - "epoch": 0.5878700738513822, - "grad_norm": 0.4537845253944397, - "learning_rate": 9.225586699108835e-06, - "loss": 0.3885, - "step": 8995 - }, - { - "epoch": 0.5879354290569244, - "grad_norm": 0.46830037236213684, - "learning_rate": 9.225400018514554e-06, - "loss": 0.3788, - "step": 8996 - }, - { - "epoch": 0.5880007842624665, - "grad_norm": 0.4767850339412689, - "learning_rate": 9.225213317311532e-06, - "loss": 0.4265, - "step": 8997 - }, - { - "epoch": 0.5880661394680087, - "grad_norm": 0.4945718050003052, - "learning_rate": 9.225026595500683e-06, - "loss": 0.4166, - "step": 8998 - }, - { - "epoch": 0.5881314946735507, - "grad_norm": 0.4451162815093994, - "learning_rate": 9.224839853082912e-06, - "loss": 0.379, - "step": 8999 - }, - { - "epoch": 0.5881968498790928, - "grad_norm": 0.37969881296157837, - "learning_rate": 9.224653090059136e-06, - "loss": 0.2887, - "step": 9000 - }, - { - "epoch": 0.588262205084635, - "grad_norm": 0.42612314224243164, - "learning_rate": 9.224466306430264e-06, - "loss": 0.4071, - "step": 9001 - }, - { - "epoch": 0.5883275602901771, - "grad_norm": 0.47960203886032104, - "learning_rate": 9.224279502197205e-06, - "loss": 0.3894, - "step": 9002 - }, - { - "epoch": 0.5883929154957193, - "grad_norm": 0.4567774832248688, - "learning_rate": 9.224092677360872e-06, - "loss": 0.3386, - "step": 9003 - }, - { - "epoch": 0.5884582707012613, - "grad_norm": 0.4322126805782318, - "learning_rate": 9.223905831922174e-06, - "loss": 0.3557, - "step": 9004 - }, - { - "epoch": 0.5885236259068035, - "grad_norm": 0.4723544418811798, - "learning_rate": 9.223718965882026e-06, - "loss": 0.4089, - "step": 9005 - }, - { - "epoch": 0.5885889811123456, - "grad_norm": 0.4543313980102539, - "learning_rate": 9.223532079241336e-06, - "loss": 0.4024, - "step": 9006 - }, - { - "epoch": 0.5886543363178878, - "grad_norm": 0.41359829902648926, - "learning_rate": 9.223345172001018e-06, - "loss": 0.3174, - "step": 9007 - }, - { - "epoch": 0.5887196915234298, - "grad_norm": 0.4719434678554535, - "learning_rate": 9.223158244161982e-06, - "loss": 0.4227, - "step": 9008 - }, - { - "epoch": 0.5887850467289719, - "grad_norm": 0.47162926197052, - "learning_rate": 9.22297129572514e-06, - "loss": 0.4045, - "step": 9009 - }, - { - "epoch": 0.5888504019345141, - "grad_norm": 0.4442574977874756, - "learning_rate": 9.222784326691404e-06, - "loss": 0.3603, - "step": 9010 - }, - { - "epoch": 0.5889157571400562, - "grad_norm": 0.45001548528671265, - "learning_rate": 9.222597337061686e-06, - "loss": 0.3363, - "step": 9011 - }, - { - "epoch": 0.5889811123455984, - "grad_norm": 0.48516085743904114, - "learning_rate": 9.2224103268369e-06, - "loss": 0.4837, - "step": 9012 - }, - { - "epoch": 0.5890464675511404, - "grad_norm": 0.4140465557575226, - "learning_rate": 9.222223296017953e-06, - "loss": 0.3525, - "step": 9013 - }, - { - "epoch": 0.5891118227566826, - "grad_norm": 0.4737408459186554, - "learning_rate": 9.22203624460576e-06, - "loss": 0.4215, - "step": 9014 - }, - { - "epoch": 0.5891771779622247, - "grad_norm": 0.46026068925857544, - "learning_rate": 9.221849172601236e-06, - "loss": 0.4183, - "step": 9015 - }, - { - "epoch": 0.5892425331677668, - "grad_norm": 0.4173218309879303, - "learning_rate": 9.22166208000529e-06, - "loss": 0.3561, - "step": 9016 - }, - { - "epoch": 0.5893078883733089, - "grad_norm": 0.4410687983036041, - "learning_rate": 9.221474966818836e-06, - "loss": 0.3888, - "step": 9017 - }, - { - "epoch": 0.589373243578851, - "grad_norm": 0.45688557624816895, - "learning_rate": 9.221287833042784e-06, - "loss": 0.4263, - "step": 9018 - }, - { - "epoch": 0.5894385987843932, - "grad_norm": 0.4256860017776489, - "learning_rate": 9.221100678678051e-06, - "loss": 0.3654, - "step": 9019 - }, - { - "epoch": 0.5895039539899353, - "grad_norm": 0.4474024772644043, - "learning_rate": 9.220913503725548e-06, - "loss": 0.367, - "step": 9020 - }, - { - "epoch": 0.5895693091954775, - "grad_norm": 0.46031689643859863, - "learning_rate": 9.220726308186186e-06, - "loss": 0.3868, - "step": 9021 - }, - { - "epoch": 0.5896346644010195, - "grad_norm": 0.4893234074115753, - "learning_rate": 9.220539092060881e-06, - "loss": 0.3974, - "step": 9022 - }, - { - "epoch": 0.5897000196065617, - "grad_norm": 0.44660866260528564, - "learning_rate": 9.220351855350543e-06, - "loss": 0.3825, - "step": 9023 - }, - { - "epoch": 0.5897653748121038, - "grad_norm": 0.5068235397338867, - "learning_rate": 9.220164598056088e-06, - "loss": 0.4405, - "step": 9024 - }, - { - "epoch": 0.5898307300176459, - "grad_norm": 0.45615455508232117, - "learning_rate": 9.219977320178429e-06, - "loss": 0.3599, - "step": 9025 - }, - { - "epoch": 0.589896085223188, - "grad_norm": 0.4334595501422882, - "learning_rate": 9.219790021718477e-06, - "loss": 0.3544, - "step": 9026 - }, - { - "epoch": 0.5899614404287301, - "grad_norm": 0.5093353986740112, - "learning_rate": 9.219602702677148e-06, - "loss": 0.4572, - "step": 9027 - }, - { - "epoch": 0.5900267956342723, - "grad_norm": 0.4460398256778717, - "learning_rate": 9.219415363055355e-06, - "loss": 0.3717, - "step": 9028 - }, - { - "epoch": 0.5900921508398144, - "grad_norm": 0.44923314452171326, - "learning_rate": 9.219228002854011e-06, - "loss": 0.3922, - "step": 9029 - }, - { - "epoch": 0.5901575060453566, - "grad_norm": 0.45911285281181335, - "learning_rate": 9.219040622074031e-06, - "loss": 0.3819, - "step": 9030 - }, - { - "epoch": 0.5902228612508986, - "grad_norm": 0.44375184178352356, - "learning_rate": 9.218853220716329e-06, - "loss": 0.3863, - "step": 9031 - }, - { - "epoch": 0.5902882164564408, - "grad_norm": 0.4464426338672638, - "learning_rate": 9.218665798781817e-06, - "loss": 0.3765, - "step": 9032 - }, - { - "epoch": 0.5903535716619829, - "grad_norm": 0.4511531889438629, - "learning_rate": 9.21847835627141e-06, - "loss": 0.3684, - "step": 9033 - }, - { - "epoch": 0.590418926867525, - "grad_norm": 0.44682687520980835, - "learning_rate": 9.218290893186023e-06, - "loss": 0.4265, - "step": 9034 - }, - { - "epoch": 0.5904842820730671, - "grad_norm": 0.46610260009765625, - "learning_rate": 9.218103409526571e-06, - "loss": 0.4444, - "step": 9035 - }, - { - "epoch": 0.5905496372786092, - "grad_norm": 0.46726521849632263, - "learning_rate": 9.217915905293965e-06, - "loss": 0.3995, - "step": 9036 - }, - { - "epoch": 0.5906149924841514, - "grad_norm": 0.46023795008659363, - "learning_rate": 9.217728380489124e-06, - "loss": 0.3712, - "step": 9037 - }, - { - "epoch": 0.5906803476896935, - "grad_norm": 0.4558008909225464, - "learning_rate": 9.217540835112961e-06, - "loss": 0.3887, - "step": 9038 - }, - { - "epoch": 0.5907457028952356, - "grad_norm": 0.4319905936717987, - "learning_rate": 9.217353269166388e-06, - "loss": 0.342, - "step": 9039 - }, - { - "epoch": 0.5908110581007777, - "grad_norm": 0.488342821598053, - "learning_rate": 9.217165682650323e-06, - "loss": 0.3755, - "step": 9040 - }, - { - "epoch": 0.5908764133063199, - "grad_norm": 0.4749290943145752, - "learning_rate": 9.216978075565681e-06, - "loss": 0.4095, - "step": 9041 - }, - { - "epoch": 0.590941768511862, - "grad_norm": 0.44274985790252686, - "learning_rate": 9.216790447913376e-06, - "loss": 0.3559, - "step": 9042 - }, - { - "epoch": 0.591007123717404, - "grad_norm": 0.4593304693698883, - "learning_rate": 9.216602799694324e-06, - "loss": 0.3815, - "step": 9043 - }, - { - "epoch": 0.5910724789229462, - "grad_norm": 0.4413754642009735, - "learning_rate": 9.216415130909438e-06, - "loss": 0.3791, - "step": 9044 - }, - { - "epoch": 0.5911378341284883, - "grad_norm": 0.5065827369689941, - "learning_rate": 9.216227441559633e-06, - "loss": 0.4888, - "step": 9045 - }, - { - "epoch": 0.5912031893340305, - "grad_norm": 0.4619041085243225, - "learning_rate": 9.216039731645828e-06, - "loss": 0.3981, - "step": 9046 - }, - { - "epoch": 0.5912685445395726, - "grad_norm": 0.48635900020599365, - "learning_rate": 9.215852001168937e-06, - "loss": 0.4214, - "step": 9047 - }, - { - "epoch": 0.5913338997451147, - "grad_norm": 0.40824180841445923, - "learning_rate": 9.215664250129875e-06, - "loss": 0.3628, - "step": 9048 - }, - { - "epoch": 0.5913992549506568, - "grad_norm": 0.44060084223747253, - "learning_rate": 9.215476478529557e-06, - "loss": 0.3846, - "step": 9049 - }, - { - "epoch": 0.5914646101561989, - "grad_norm": 0.4784317910671234, - "learning_rate": 9.2152886863689e-06, - "loss": 0.4068, - "step": 9050 - }, - { - "epoch": 0.5915299653617411, - "grad_norm": 0.43177342414855957, - "learning_rate": 9.21510087364882e-06, - "loss": 0.3494, - "step": 9051 - }, - { - "epoch": 0.5915953205672831, - "grad_norm": 0.44881895184516907, - "learning_rate": 9.214913040370233e-06, - "loss": 0.3568, - "step": 9052 - }, - { - "epoch": 0.5916606757728253, - "grad_norm": 0.45579829812049866, - "learning_rate": 9.214725186534057e-06, - "loss": 0.4396, - "step": 9053 - }, - { - "epoch": 0.5917260309783674, - "grad_norm": 0.4880259335041046, - "learning_rate": 9.214537312141203e-06, - "loss": 0.4365, - "step": 9054 - }, - { - "epoch": 0.5917913861839096, - "grad_norm": 0.4429958760738373, - "learning_rate": 9.214349417192592e-06, - "loss": 0.3688, - "step": 9055 - }, - { - "epoch": 0.5918567413894517, - "grad_norm": 0.4387759566307068, - "learning_rate": 9.214161501689138e-06, - "loss": 0.3733, - "step": 9056 - }, - { - "epoch": 0.5919220965949938, - "grad_norm": 0.4535773694515228, - "learning_rate": 9.21397356563176e-06, - "loss": 0.4017, - "step": 9057 - }, - { - "epoch": 0.5919874518005359, - "grad_norm": 0.49486440420150757, - "learning_rate": 9.21378560902137e-06, - "loss": 0.4568, - "step": 9058 - }, - { - "epoch": 0.592052807006078, - "grad_norm": 0.46412143111228943, - "learning_rate": 9.21359763185889e-06, - "loss": 0.399, - "step": 9059 - }, - { - "epoch": 0.5921181622116202, - "grad_norm": 0.46160778403282166, - "learning_rate": 9.213409634145236e-06, - "loss": 0.4126, - "step": 9060 - }, - { - "epoch": 0.5921835174171622, - "grad_norm": 0.4399988055229187, - "learning_rate": 9.213221615881321e-06, - "loss": 0.3995, - "step": 9061 - }, - { - "epoch": 0.5922488726227044, - "grad_norm": 0.44348669052124023, - "learning_rate": 9.213033577068065e-06, - "loss": 0.3928, - "step": 9062 - }, - { - "epoch": 0.5923142278282465, - "grad_norm": 0.46584224700927734, - "learning_rate": 9.212845517706386e-06, - "loss": 0.4136, - "step": 9063 - }, - { - "epoch": 0.5923795830337887, - "grad_norm": 0.439632385969162, - "learning_rate": 9.212657437797198e-06, - "loss": 0.3576, - "step": 9064 - }, - { - "epoch": 0.5924449382393308, - "grad_norm": 0.4497891366481781, - "learning_rate": 9.212469337341422e-06, - "loss": 0.3729, - "step": 9065 - }, - { - "epoch": 0.5925102934448729, - "grad_norm": 0.43726828694343567, - "learning_rate": 9.212281216339975e-06, - "loss": 0.3618, - "step": 9066 - }, - { - "epoch": 0.592575648650415, - "grad_norm": 0.5010649561882019, - "learning_rate": 9.21209307479377e-06, - "loss": 0.4298, - "step": 9067 - }, - { - "epoch": 0.5926410038559571, - "grad_norm": 0.4569692015647888, - "learning_rate": 9.21190491270373e-06, - "loss": 0.3834, - "step": 9068 - }, - { - "epoch": 0.5927063590614993, - "grad_norm": 0.42680060863494873, - "learning_rate": 9.21171673007077e-06, - "loss": 0.3463, - "step": 9069 - }, - { - "epoch": 0.5927717142670413, - "grad_norm": 0.4854176938533783, - "learning_rate": 9.211528526895808e-06, - "loss": 0.4097, - "step": 9070 - }, - { - "epoch": 0.5928370694725835, - "grad_norm": 0.4481373727321625, - "learning_rate": 9.211340303179764e-06, - "loss": 0.3551, - "step": 9071 - }, - { - "epoch": 0.5929024246781256, - "grad_norm": 0.44821399450302124, - "learning_rate": 9.211152058923552e-06, - "loss": 0.3415, - "step": 9072 - }, - { - "epoch": 0.5929677798836678, - "grad_norm": 0.4925774037837982, - "learning_rate": 9.210963794128094e-06, - "loss": 0.4952, - "step": 9073 - }, - { - "epoch": 0.5930331350892099, - "grad_norm": 0.43282485008239746, - "learning_rate": 9.210775508794306e-06, - "loss": 0.3667, - "step": 9074 - }, - { - "epoch": 0.5930984902947519, - "grad_norm": 0.43615296483039856, - "learning_rate": 9.210587202923106e-06, - "loss": 0.3415, - "step": 9075 - }, - { - "epoch": 0.5931638455002941, - "grad_norm": 0.4541088342666626, - "learning_rate": 9.210398876515417e-06, - "loss": 0.4235, - "step": 9076 - }, - { - "epoch": 0.5932292007058362, - "grad_norm": 0.47272148728370667, - "learning_rate": 9.21021052957215e-06, - "loss": 0.4112, - "step": 9077 - }, - { - "epoch": 0.5932945559113784, - "grad_norm": 0.4851337671279907, - "learning_rate": 9.21002216209423e-06, - "loss": 0.4457, - "step": 9078 - }, - { - "epoch": 0.5933599111169204, - "grad_norm": 0.4812110960483551, - "learning_rate": 9.209833774082573e-06, - "loss": 0.4372, - "step": 9079 - }, - { - "epoch": 0.5934252663224626, - "grad_norm": 0.43391725420951843, - "learning_rate": 9.209645365538099e-06, - "loss": 0.3508, - "step": 9080 - }, - { - "epoch": 0.5934906215280047, - "grad_norm": 0.4428863823413849, - "learning_rate": 9.209456936461725e-06, - "loss": 0.3487, - "step": 9081 - }, - { - "epoch": 0.5935559767335469, - "grad_norm": 0.4442507326602936, - "learning_rate": 9.209268486854373e-06, - "loss": 0.3763, - "step": 9082 - }, - { - "epoch": 0.593621331939089, - "grad_norm": 0.41742801666259766, - "learning_rate": 9.209080016716957e-06, - "loss": 0.3612, - "step": 9083 - }, - { - "epoch": 0.593686687144631, - "grad_norm": 0.4596056640148163, - "learning_rate": 9.208891526050403e-06, - "loss": 0.4116, - "step": 9084 - }, - { - "epoch": 0.5937520423501732, - "grad_norm": 0.4446125030517578, - "learning_rate": 9.208703014855627e-06, - "loss": 0.3938, - "step": 9085 - }, - { - "epoch": 0.5938173975557153, - "grad_norm": 0.43721136450767517, - "learning_rate": 9.208514483133546e-06, - "loss": 0.3712, - "step": 9086 - }, - { - "epoch": 0.5938827527612575, - "grad_norm": 0.4259456396102905, - "learning_rate": 9.208325930885082e-06, - "loss": 0.3404, - "step": 9087 - }, - { - "epoch": 0.5939481079667995, - "grad_norm": 0.4259801506996155, - "learning_rate": 9.208137358111156e-06, - "loss": 0.3758, - "step": 9088 - }, - { - "epoch": 0.5940134631723417, - "grad_norm": 0.46871837973594666, - "learning_rate": 9.207948764812686e-06, - "loss": 0.3545, - "step": 9089 - }, - { - "epoch": 0.5940788183778838, - "grad_norm": 0.4888988733291626, - "learning_rate": 9.207760150990593e-06, - "loss": 0.4493, - "step": 9090 - }, - { - "epoch": 0.594144173583426, - "grad_norm": 0.4476315975189209, - "learning_rate": 9.207571516645795e-06, - "loss": 0.3781, - "step": 9091 - }, - { - "epoch": 0.594209528788968, - "grad_norm": 0.4323274493217468, - "learning_rate": 9.207382861779213e-06, - "loss": 0.3454, - "step": 9092 - }, - { - "epoch": 0.5942748839945101, - "grad_norm": 0.45750895142555237, - "learning_rate": 9.207194186391766e-06, - "loss": 0.3713, - "step": 9093 - }, - { - "epoch": 0.5943402392000523, - "grad_norm": 0.5198811292648315, - "learning_rate": 9.207005490484376e-06, - "loss": 0.3899, - "step": 9094 - }, - { - "epoch": 0.5944055944055944, - "grad_norm": 0.5023952126502991, - "learning_rate": 9.206816774057964e-06, - "loss": 0.4233, - "step": 9095 - }, - { - "epoch": 0.5944709496111366, - "grad_norm": 0.4601942002773285, - "learning_rate": 9.206628037113447e-06, - "loss": 0.3839, - "step": 9096 - }, - { - "epoch": 0.5945363048166786, - "grad_norm": 0.5169584155082703, - "learning_rate": 9.206439279651752e-06, - "loss": 0.4461, - "step": 9097 - }, - { - "epoch": 0.5946016600222208, - "grad_norm": 0.466145783662796, - "learning_rate": 9.206250501673791e-06, - "loss": 0.3887, - "step": 9098 - }, - { - "epoch": 0.5946670152277629, - "grad_norm": 0.44113680720329285, - "learning_rate": 9.206061703180491e-06, - "loss": 0.3796, - "step": 9099 - }, - { - "epoch": 0.594732370433305, - "grad_norm": 0.5136611461639404, - "learning_rate": 9.20587288417277e-06, - "loss": 0.4524, - "step": 9100 - }, - { - "epoch": 0.5947977256388471, - "grad_norm": 0.4596930742263794, - "learning_rate": 9.205684044651552e-06, - "loss": 0.3715, - "step": 9101 - }, - { - "epoch": 0.5948630808443892, - "grad_norm": 0.4330142140388489, - "learning_rate": 9.205495184617754e-06, - "loss": 0.3459, - "step": 9102 - }, - { - "epoch": 0.5949284360499314, - "grad_norm": 0.4458671808242798, - "learning_rate": 9.2053063040723e-06, - "loss": 0.3339, - "step": 9103 - }, - { - "epoch": 0.5949937912554735, - "grad_norm": 0.44250407814979553, - "learning_rate": 9.20511740301611e-06, - "loss": 0.3793, - "step": 9104 - }, - { - "epoch": 0.5950591464610157, - "grad_norm": 0.42624783515930176, - "learning_rate": 9.204928481450106e-06, - "loss": 0.3605, - "step": 9105 - }, - { - "epoch": 0.5951245016665577, - "grad_norm": 0.4709337055683136, - "learning_rate": 9.204739539375207e-06, - "loss": 0.4033, - "step": 9106 - }, - { - "epoch": 0.5951898568720999, - "grad_norm": 0.4625800549983978, - "learning_rate": 9.204550576792339e-06, - "loss": 0.3996, - "step": 9107 - }, - { - "epoch": 0.595255212077642, - "grad_norm": 0.46261706948280334, - "learning_rate": 9.204361593702421e-06, - "loss": 0.4234, - "step": 9108 - }, - { - "epoch": 0.595320567283184, - "grad_norm": 0.4749184250831604, - "learning_rate": 9.204172590106374e-06, - "loss": 0.4274, - "step": 9109 - }, - { - "epoch": 0.5953859224887262, - "grad_norm": 0.46007803082466125, - "learning_rate": 9.20398356600512e-06, - "loss": 0.4046, - "step": 9110 - }, - { - "epoch": 0.5954512776942683, - "grad_norm": 0.4391649663448334, - "learning_rate": 9.203794521399584e-06, - "loss": 0.3892, - "step": 9111 - }, - { - "epoch": 0.5955166328998105, - "grad_norm": 0.46174532175064087, - "learning_rate": 9.203605456290685e-06, - "loss": 0.421, - "step": 9112 - }, - { - "epoch": 0.5955819881053526, - "grad_norm": 0.43722209334373474, - "learning_rate": 9.203416370679346e-06, - "loss": 0.4242, - "step": 9113 - }, - { - "epoch": 0.5956473433108948, - "grad_norm": 0.4329133629798889, - "learning_rate": 9.20322726456649e-06, - "loss": 0.3767, - "step": 9114 - }, - { - "epoch": 0.5957126985164368, - "grad_norm": 0.6488149166107178, - "learning_rate": 9.203038137953036e-06, - "loss": 0.3931, - "step": 9115 - }, - { - "epoch": 0.595778053721979, - "grad_norm": 0.4795227646827698, - "learning_rate": 9.20284899083991e-06, - "loss": 0.3944, - "step": 9116 - }, - { - "epoch": 0.5958434089275211, - "grad_norm": 0.43959277868270874, - "learning_rate": 9.202659823228035e-06, - "loss": 0.3887, - "step": 9117 - }, - { - "epoch": 0.5959087641330632, - "grad_norm": 0.4309938848018646, - "learning_rate": 9.20247063511833e-06, - "loss": 0.357, - "step": 9118 - }, - { - "epoch": 0.5959741193386053, - "grad_norm": 0.46862471103668213, - "learning_rate": 9.20228142651172e-06, - "loss": 0.4304, - "step": 9119 - }, - { - "epoch": 0.5960394745441474, - "grad_norm": 0.42521026730537415, - "learning_rate": 9.202092197409129e-06, - "loss": 0.3472, - "step": 9120 - }, - { - "epoch": 0.5961048297496896, - "grad_norm": 0.46872183680534363, - "learning_rate": 9.201902947811478e-06, - "loss": 0.4417, - "step": 9121 - }, - { - "epoch": 0.5961701849552317, - "grad_norm": 0.4616164565086365, - "learning_rate": 9.201713677719692e-06, - "loss": 0.373, - "step": 9122 - }, - { - "epoch": 0.5962355401607738, - "grad_norm": 0.4247811734676361, - "learning_rate": 9.20152438713469e-06, - "loss": 0.3607, - "step": 9123 - }, - { - "epoch": 0.5963008953663159, - "grad_norm": 0.42037004232406616, - "learning_rate": 9.201335076057401e-06, - "loss": 0.3131, - "step": 9124 - }, - { - "epoch": 0.5963662505718581, - "grad_norm": 0.47550836205482483, - "learning_rate": 9.201145744488744e-06, - "loss": 0.4208, - "step": 9125 - }, - { - "epoch": 0.5964316057774002, - "grad_norm": 0.4449000358581543, - "learning_rate": 9.200956392429643e-06, - "loss": 0.3642, - "step": 9126 - }, - { - "epoch": 0.5964969609829422, - "grad_norm": 0.4218422472476959, - "learning_rate": 9.200767019881023e-06, - "loss": 0.3638, - "step": 9127 - }, - { - "epoch": 0.5965623161884844, - "grad_norm": 0.47300466895103455, - "learning_rate": 9.200577626843807e-06, - "loss": 0.4264, - "step": 9128 - }, - { - "epoch": 0.5966276713940265, - "grad_norm": 0.4390457570552826, - "learning_rate": 9.200388213318918e-06, - "loss": 0.3791, - "step": 9129 - }, - { - "epoch": 0.5966930265995687, - "grad_norm": 0.45548853278160095, - "learning_rate": 9.200198779307281e-06, - "loss": 0.4235, - "step": 9130 - }, - { - "epoch": 0.5967583818051108, - "grad_norm": 0.41298967599868774, - "learning_rate": 9.20000932480982e-06, - "loss": 0.355, - "step": 9131 - }, - { - "epoch": 0.5968237370106529, - "grad_norm": 0.46050941944122314, - "learning_rate": 9.199819849827458e-06, - "loss": 0.4065, - "step": 9132 - }, - { - "epoch": 0.596889092216195, - "grad_norm": 0.4615412950515747, - "learning_rate": 9.19963035436112e-06, - "loss": 0.4242, - "step": 9133 - }, - { - "epoch": 0.5969544474217371, - "grad_norm": 0.423781156539917, - "learning_rate": 9.199440838411729e-06, - "loss": 0.3362, - "step": 9134 - }, - { - "epoch": 0.5970198026272793, - "grad_norm": 0.48159605264663696, - "learning_rate": 9.19925130198021e-06, - "loss": 0.3747, - "step": 9135 - }, - { - "epoch": 0.5970851578328213, - "grad_norm": 0.43601229786872864, - "learning_rate": 9.199061745067488e-06, - "loss": 0.3831, - "step": 9136 - }, - { - "epoch": 0.5971505130383635, - "grad_norm": 0.44950616359710693, - "learning_rate": 9.198872167674488e-06, - "loss": 0.3983, - "step": 9137 - }, - { - "epoch": 0.5972158682439056, - "grad_norm": 0.4608835279941559, - "learning_rate": 9.198682569802135e-06, - "loss": 0.4323, - "step": 9138 - }, - { - "epoch": 0.5972812234494478, - "grad_norm": 0.4185160994529724, - "learning_rate": 9.19849295145135e-06, - "loss": 0.3578, - "step": 9139 - }, - { - "epoch": 0.5973465786549899, - "grad_norm": 0.4930746555328369, - "learning_rate": 9.198303312623062e-06, - "loss": 0.4701, - "step": 9140 - }, - { - "epoch": 0.597411933860532, - "grad_norm": 0.46320006251335144, - "learning_rate": 9.198113653318193e-06, - "loss": 0.3934, - "step": 9141 - }, - { - "epoch": 0.5974772890660741, - "grad_norm": 0.43301257491111755, - "learning_rate": 9.19792397353767e-06, - "loss": 0.3805, - "step": 9142 - }, - { - "epoch": 0.5975426442716162, - "grad_norm": 0.4329968988895416, - "learning_rate": 9.197734273282417e-06, - "loss": 0.3671, - "step": 9143 - }, - { - "epoch": 0.5976079994771584, - "grad_norm": 0.44462713599205017, - "learning_rate": 9.197544552553361e-06, - "loss": 0.3635, - "step": 9144 - }, - { - "epoch": 0.5976733546827004, - "grad_norm": 0.48472943902015686, - "learning_rate": 9.197354811351424e-06, - "loss": 0.4656, - "step": 9145 - }, - { - "epoch": 0.5977387098882426, - "grad_norm": 0.4545370936393738, - "learning_rate": 9.197165049677535e-06, - "loss": 0.4047, - "step": 9146 - }, - { - "epoch": 0.5978040650937847, - "grad_norm": 0.4666481018066406, - "learning_rate": 9.196975267532617e-06, - "loss": 0.4337, - "step": 9147 - }, - { - "epoch": 0.5978694202993269, - "grad_norm": 0.46113815903663635, - "learning_rate": 9.1967854649176e-06, - "loss": 0.3764, - "step": 9148 - }, - { - "epoch": 0.597934775504869, - "grad_norm": 0.4551337659358978, - "learning_rate": 9.196595641833402e-06, - "loss": 0.3893, - "step": 9149 - }, - { - "epoch": 0.5980001307104111, - "grad_norm": 0.44840535521507263, - "learning_rate": 9.196405798280956e-06, - "loss": 0.3531, - "step": 9150 - }, - { - "epoch": 0.5980654859159532, - "grad_norm": 0.42207643389701843, - "learning_rate": 9.196215934261184e-06, - "loss": 0.3542, - "step": 9151 - }, - { - "epoch": 0.5981308411214953, - "grad_norm": 0.45021456480026245, - "learning_rate": 9.196026049775013e-06, - "loss": 0.3805, - "step": 9152 - }, - { - "epoch": 0.5981961963270375, - "grad_norm": 0.43388471007347107, - "learning_rate": 9.195836144823368e-06, - "loss": 0.3322, - "step": 9153 - }, - { - "epoch": 0.5982615515325795, - "grad_norm": 0.49034833908081055, - "learning_rate": 9.19564621940718e-06, - "loss": 0.4043, - "step": 9154 - }, - { - "epoch": 0.5983269067381217, - "grad_norm": 0.44993117451667786, - "learning_rate": 9.195456273527369e-06, - "loss": 0.3794, - "step": 9155 - }, - { - "epoch": 0.5983922619436638, - "grad_norm": 0.4515838325023651, - "learning_rate": 9.195266307184866e-06, - "loss": 0.3834, - "step": 9156 - }, - { - "epoch": 0.598457617149206, - "grad_norm": 0.4347997307777405, - "learning_rate": 9.195076320380596e-06, - "loss": 0.3463, - "step": 9157 - }, - { - "epoch": 0.598522972354748, - "grad_norm": 0.4673284888267517, - "learning_rate": 9.194886313115482e-06, - "loss": 0.3996, - "step": 9158 - }, - { - "epoch": 0.5985883275602901, - "grad_norm": 0.4702996611595154, - "learning_rate": 9.194696285390458e-06, - "loss": 0.3775, - "step": 9159 - }, - { - "epoch": 0.5986536827658323, - "grad_norm": 0.4738173186779022, - "learning_rate": 9.194506237206447e-06, - "loss": 0.3839, - "step": 9160 - }, - { - "epoch": 0.5987190379713744, - "grad_norm": 0.4467931389808655, - "learning_rate": 9.194316168564374e-06, - "loss": 0.377, - "step": 9161 - }, - { - "epoch": 0.5987843931769166, - "grad_norm": 0.43088576197624207, - "learning_rate": 9.194126079465169e-06, - "loss": 0.354, - "step": 9162 - }, - { - "epoch": 0.5988497483824586, - "grad_norm": 0.4529799222946167, - "learning_rate": 9.193935969909758e-06, - "loss": 0.4335, - "step": 9163 - }, - { - "epoch": 0.5989151035880008, - "grad_norm": 0.44353675842285156, - "learning_rate": 9.19374583989907e-06, - "loss": 0.3328, - "step": 9164 - }, - { - "epoch": 0.5989804587935429, - "grad_norm": 0.4639052450656891, - "learning_rate": 9.193555689434026e-06, - "loss": 0.4115, - "step": 9165 - }, - { - "epoch": 0.5990458139990851, - "grad_norm": 0.4736430048942566, - "learning_rate": 9.193365518515562e-06, - "loss": 0.4268, - "step": 9166 - }, - { - "epoch": 0.5991111692046271, - "grad_norm": 0.44214147329330444, - "learning_rate": 9.1931753271446e-06, - "loss": 0.3593, - "step": 9167 - }, - { - "epoch": 0.5991765244101692, - "grad_norm": 0.4463392496109009, - "learning_rate": 9.192985115322071e-06, - "loss": 0.3657, - "step": 9168 - }, - { - "epoch": 0.5992418796157114, - "grad_norm": 0.45829901099205017, - "learning_rate": 9.1927948830489e-06, - "loss": 0.3923, - "step": 9169 - }, - { - "epoch": 0.5993072348212535, - "grad_norm": 0.4527924954891205, - "learning_rate": 9.192604630326017e-06, - "loss": 0.3811, - "step": 9170 - }, - { - "epoch": 0.5993725900267957, - "grad_norm": 0.43915653228759766, - "learning_rate": 9.192414357154346e-06, - "loss": 0.3959, - "step": 9171 - }, - { - "epoch": 0.5994379452323377, - "grad_norm": 0.4366587698459625, - "learning_rate": 9.19222406353482e-06, - "loss": 0.3524, - "step": 9172 - }, - { - "epoch": 0.5995033004378799, - "grad_norm": 0.4628661870956421, - "learning_rate": 9.192033749468365e-06, - "loss": 0.3899, - "step": 9173 - }, - { - "epoch": 0.599568655643422, - "grad_norm": 0.46450647711753845, - "learning_rate": 9.191843414955908e-06, - "loss": 0.3521, - "step": 9174 - }, - { - "epoch": 0.5996340108489642, - "grad_norm": 0.40289822220802307, - "learning_rate": 9.191653059998378e-06, - "loss": 0.3255, - "step": 9175 - }, - { - "epoch": 0.5996993660545062, - "grad_norm": 0.48823636770248413, - "learning_rate": 9.191462684596707e-06, - "loss": 0.4141, - "step": 9176 - }, - { - "epoch": 0.5997647212600483, - "grad_norm": 0.47811248898506165, - "learning_rate": 9.191272288751817e-06, - "loss": 0.4024, - "step": 9177 - }, - { - "epoch": 0.5998300764655905, - "grad_norm": 0.4462015926837921, - "learning_rate": 9.191081872464641e-06, - "loss": 0.4219, - "step": 9178 - }, - { - "epoch": 0.5998954316711326, - "grad_norm": 0.47095420956611633, - "learning_rate": 9.190891435736107e-06, - "loss": 0.4276, - "step": 9179 - }, - { - "epoch": 0.5999607868766748, - "grad_norm": 0.46351829171180725, - "learning_rate": 9.190700978567144e-06, - "loss": 0.3848, - "step": 9180 - }, - { - "epoch": 0.6000261420822168, - "grad_norm": 0.4518841803073883, - "learning_rate": 9.19051050095868e-06, - "loss": 0.4166, - "step": 9181 - }, - { - "epoch": 0.600091497287759, - "grad_norm": 0.4417521059513092, - "learning_rate": 9.190320002911644e-06, - "loss": 0.3937, - "step": 9182 - }, - { - "epoch": 0.6001568524933011, - "grad_norm": 0.43405234813690186, - "learning_rate": 9.190129484426967e-06, - "loss": 0.3838, - "step": 9183 - }, - { - "epoch": 0.6002222076988432, - "grad_norm": 0.4737446904182434, - "learning_rate": 9.189938945505576e-06, - "loss": 0.4147, - "step": 9184 - }, - { - "epoch": 0.6002875629043853, - "grad_norm": 0.428546279668808, - "learning_rate": 9.189748386148403e-06, - "loss": 0.3824, - "step": 9185 - }, - { - "epoch": 0.6003529181099274, - "grad_norm": 0.449856698513031, - "learning_rate": 9.189557806356374e-06, - "loss": 0.3733, - "step": 9186 - }, - { - "epoch": 0.6004182733154696, - "grad_norm": 0.41608351469039917, - "learning_rate": 9.18936720613042e-06, - "loss": 0.3484, - "step": 9187 - }, - { - "epoch": 0.6004836285210117, - "grad_norm": 0.4258476793766022, - "learning_rate": 9.189176585471471e-06, - "loss": 0.3671, - "step": 9188 - }, - { - "epoch": 0.6005489837265539, - "grad_norm": 0.46372899413108826, - "learning_rate": 9.188985944380457e-06, - "loss": 0.3733, - "step": 9189 - }, - { - "epoch": 0.6006143389320959, - "grad_norm": 0.45023807883262634, - "learning_rate": 9.188795282858307e-06, - "loss": 0.4154, - "step": 9190 - }, - { - "epoch": 0.6006796941376381, - "grad_norm": 0.39405637979507446, - "learning_rate": 9.188604600905952e-06, - "loss": 0.2964, - "step": 9191 - }, - { - "epoch": 0.6007450493431802, - "grad_norm": 0.49776163697242737, - "learning_rate": 9.18841389852432e-06, - "loss": 0.4051, - "step": 9192 - }, - { - "epoch": 0.6008104045487223, - "grad_norm": 0.4413900673389435, - "learning_rate": 9.188223175714343e-06, - "loss": 0.3814, - "step": 9193 - }, - { - "epoch": 0.6008757597542644, - "grad_norm": 0.45196112990379333, - "learning_rate": 9.18803243247695e-06, - "loss": 0.3959, - "step": 9194 - }, - { - "epoch": 0.6009411149598065, - "grad_norm": 0.44850677251815796, - "learning_rate": 9.187841668813074e-06, - "loss": 0.4009, - "step": 9195 - }, - { - "epoch": 0.6010064701653487, - "grad_norm": 0.4772990345954895, - "learning_rate": 9.187650884723642e-06, - "loss": 0.4528, - "step": 9196 - }, - { - "epoch": 0.6010718253708908, - "grad_norm": 0.43468621373176575, - "learning_rate": 9.187460080209585e-06, - "loss": 0.3953, - "step": 9197 - }, - { - "epoch": 0.601137180576433, - "grad_norm": 0.48254331946372986, - "learning_rate": 9.187269255271835e-06, - "loss": 0.4459, - "step": 9198 - }, - { - "epoch": 0.601202535781975, - "grad_norm": 0.45564985275268555, - "learning_rate": 9.187078409911322e-06, - "loss": 0.3997, - "step": 9199 - }, - { - "epoch": 0.6012678909875172, - "grad_norm": 0.4532981514930725, - "learning_rate": 9.18688754412898e-06, - "loss": 0.4067, - "step": 9200 - }, - { - "epoch": 0.6013332461930593, - "grad_norm": 0.430982381105423, - "learning_rate": 9.186696657925734e-06, - "loss": 0.3572, - "step": 9201 - }, - { - "epoch": 0.6013986013986014, - "grad_norm": 0.4672890603542328, - "learning_rate": 9.18650575130252e-06, - "loss": 0.4089, - "step": 9202 - }, - { - "epoch": 0.6014639566041435, - "grad_norm": 0.4574647545814514, - "learning_rate": 9.186314824260265e-06, - "loss": 0.3526, - "step": 9203 - }, - { - "epoch": 0.6015293118096856, - "grad_norm": 0.4146009683609009, - "learning_rate": 9.186123876799902e-06, - "loss": 0.3522, - "step": 9204 - }, - { - "epoch": 0.6015946670152278, - "grad_norm": 0.47112053632736206, - "learning_rate": 9.185932908922364e-06, - "loss": 0.3958, - "step": 9205 - }, - { - "epoch": 0.6016600222207699, - "grad_norm": 0.49419665336608887, - "learning_rate": 9.185741920628582e-06, - "loss": 0.4321, - "step": 9206 - }, - { - "epoch": 0.601725377426312, - "grad_norm": 0.44286206364631653, - "learning_rate": 9.185550911919485e-06, - "loss": 0.4233, - "step": 9207 - }, - { - "epoch": 0.6017907326318541, - "grad_norm": 0.4228411614894867, - "learning_rate": 9.185359882796006e-06, - "loss": 0.3486, - "step": 9208 - }, - { - "epoch": 0.6018560878373963, - "grad_norm": 0.4780553877353668, - "learning_rate": 9.185168833259077e-06, - "loss": 0.4282, - "step": 9209 - }, - { - "epoch": 0.6019214430429384, - "grad_norm": 0.4292789399623871, - "learning_rate": 9.18497776330963e-06, - "loss": 0.3883, - "step": 9210 - }, - { - "epoch": 0.6019867982484804, - "grad_norm": 0.4652314782142639, - "learning_rate": 9.184786672948599e-06, - "loss": 0.382, - "step": 9211 - }, - { - "epoch": 0.6020521534540226, - "grad_norm": 0.41927310824394226, - "learning_rate": 9.18459556217691e-06, - "loss": 0.3811, - "step": 9212 - }, - { - "epoch": 0.6021175086595647, - "grad_norm": 0.42894890904426575, - "learning_rate": 9.184404430995499e-06, - "loss": 0.3664, - "step": 9213 - }, - { - "epoch": 0.6021828638651069, - "grad_norm": 0.4503488540649414, - "learning_rate": 9.184213279405302e-06, - "loss": 0.3721, - "step": 9214 - }, - { - "epoch": 0.602248219070649, - "grad_norm": 0.4546520709991455, - "learning_rate": 9.184022107407243e-06, - "loss": 0.402, - "step": 9215 - }, - { - "epoch": 0.6023135742761911, - "grad_norm": 0.4758225381374359, - "learning_rate": 9.183830915002261e-06, - "loss": 0.411, - "step": 9216 - }, - { - "epoch": 0.6023789294817332, - "grad_norm": 0.4261079728603363, - "learning_rate": 9.183639702191285e-06, - "loss": 0.3331, - "step": 9217 - }, - { - "epoch": 0.6024442846872753, - "grad_norm": 0.4577682316303253, - "learning_rate": 9.183448468975248e-06, - "loss": 0.3785, - "step": 9218 - }, - { - "epoch": 0.6025096398928175, - "grad_norm": 0.49699866771698, - "learning_rate": 9.183257215355086e-06, - "loss": 0.4658, - "step": 9219 - }, - { - "epoch": 0.6025749950983595, - "grad_norm": 0.48968544602394104, - "learning_rate": 9.183065941331729e-06, - "loss": 0.3937, - "step": 9220 - }, - { - "epoch": 0.6026403503039017, - "grad_norm": 0.45355671644210815, - "learning_rate": 9.182874646906108e-06, - "loss": 0.4025, - "step": 9221 - }, - { - "epoch": 0.6027057055094438, - "grad_norm": 0.4412393271923065, - "learning_rate": 9.182683332079158e-06, - "loss": 0.3605, - "step": 9222 - }, - { - "epoch": 0.602771060714986, - "grad_norm": 0.45830315351486206, - "learning_rate": 9.182491996851816e-06, - "loss": 0.4112, - "step": 9223 - }, - { - "epoch": 0.6028364159205281, - "grad_norm": 0.46280384063720703, - "learning_rate": 9.182300641225009e-06, - "loss": 0.3903, - "step": 9224 - }, - { - "epoch": 0.6029017711260702, - "grad_norm": 0.47330477833747864, - "learning_rate": 9.182109265199674e-06, - "loss": 0.3751, - "step": 9225 - }, - { - "epoch": 0.6029671263316123, - "grad_norm": 0.41610467433929443, - "learning_rate": 9.181917868776741e-06, - "loss": 0.3447, - "step": 9226 - }, - { - "epoch": 0.6030324815371544, - "grad_norm": 0.4560438096523285, - "learning_rate": 9.18172645195715e-06, - "loss": 0.3833, - "step": 9227 - }, - { - "epoch": 0.6030978367426966, - "grad_norm": 0.5108293294906616, - "learning_rate": 9.181535014741827e-06, - "loss": 0.3996, - "step": 9228 - }, - { - "epoch": 0.6031631919482386, - "grad_norm": 0.4822651147842407, - "learning_rate": 9.18134355713171e-06, - "loss": 0.4373, - "step": 9229 - }, - { - "epoch": 0.6032285471537808, - "grad_norm": 0.43790221214294434, - "learning_rate": 9.18115207912773e-06, - "loss": 0.3831, - "step": 9230 - }, - { - "epoch": 0.6032939023593229, - "grad_norm": 0.4580710530281067, - "learning_rate": 9.180960580730826e-06, - "loss": 0.43, - "step": 9231 - }, - { - "epoch": 0.6033592575648651, - "grad_norm": 0.48753198981285095, - "learning_rate": 9.180769061941927e-06, - "loss": 0.4282, - "step": 9232 - }, - { - "epoch": 0.6034246127704072, - "grad_norm": 0.466320276260376, - "learning_rate": 9.18057752276197e-06, - "loss": 0.4073, - "step": 9233 - }, - { - "epoch": 0.6034899679759493, - "grad_norm": 0.45421427488327026, - "learning_rate": 9.180385963191888e-06, - "loss": 0.3648, - "step": 9234 - }, - { - "epoch": 0.6035553231814914, - "grad_norm": 0.4825940430164337, - "learning_rate": 9.180194383232614e-06, - "loss": 0.4729, - "step": 9235 - }, - { - "epoch": 0.6036206783870335, - "grad_norm": 0.4419664442539215, - "learning_rate": 9.180002782885086e-06, - "loss": 0.3844, - "step": 9236 - }, - { - "epoch": 0.6036860335925757, - "grad_norm": 0.4473826289176941, - "learning_rate": 9.179811162150234e-06, - "loss": 0.3653, - "step": 9237 - }, - { - "epoch": 0.6037513887981177, - "grad_norm": 0.46292537450790405, - "learning_rate": 9.179619521028997e-06, - "loss": 0.4106, - "step": 9238 - }, - { - "epoch": 0.6038167440036599, - "grad_norm": 0.4575139582157135, - "learning_rate": 9.179427859522307e-06, - "loss": 0.4048, - "step": 9239 - }, - { - "epoch": 0.603882099209202, - "grad_norm": 0.5099268555641174, - "learning_rate": 9.179236177631098e-06, - "loss": 0.4383, - "step": 9240 - }, - { - "epoch": 0.6039474544147442, - "grad_norm": 0.4492853879928589, - "learning_rate": 9.17904447535631e-06, - "loss": 0.4265, - "step": 9241 - }, - { - "epoch": 0.6040128096202863, - "grad_norm": 0.4846152365207672, - "learning_rate": 9.17885275269887e-06, - "loss": 0.3551, - "step": 9242 - }, - { - "epoch": 0.6040781648258283, - "grad_norm": 0.4736369550228119, - "learning_rate": 9.17866100965972e-06, - "loss": 0.3912, - "step": 9243 - }, - { - "epoch": 0.6041435200313705, - "grad_norm": 0.4351763129234314, - "learning_rate": 9.178469246239792e-06, - "loss": 0.3586, - "step": 9244 - }, - { - "epoch": 0.6042088752369126, - "grad_norm": 0.4278549253940582, - "learning_rate": 9.178277462440021e-06, - "loss": 0.3663, - "step": 9245 - }, - { - "epoch": 0.6042742304424548, - "grad_norm": 0.463532954454422, - "learning_rate": 9.178085658261345e-06, - "loss": 0.3654, - "step": 9246 - }, - { - "epoch": 0.6043395856479968, - "grad_norm": 0.48848146200180054, - "learning_rate": 9.177893833704697e-06, - "loss": 0.4193, - "step": 9247 - }, - { - "epoch": 0.604404940853539, - "grad_norm": 0.49999526143074036, - "learning_rate": 9.177701988771014e-06, - "loss": 0.4546, - "step": 9248 - }, - { - "epoch": 0.6044702960590811, - "grad_norm": 0.4398805499076843, - "learning_rate": 9.17751012346123e-06, - "loss": 0.3845, - "step": 9249 - }, - { - "epoch": 0.6045356512646233, - "grad_norm": 0.4630897045135498, - "learning_rate": 9.177318237776282e-06, - "loss": 0.3946, - "step": 9250 - }, - { - "epoch": 0.6046010064701653, - "grad_norm": 0.45941224694252014, - "learning_rate": 9.177126331717108e-06, - "loss": 0.3547, - "step": 9251 - }, - { - "epoch": 0.6046663616757074, - "grad_norm": 0.45796695351600647, - "learning_rate": 9.176934405284638e-06, - "loss": 0.4086, - "step": 9252 - }, - { - "epoch": 0.6047317168812496, - "grad_norm": 0.45740631222724915, - "learning_rate": 9.176742458479815e-06, - "loss": 0.379, - "step": 9253 - }, - { - "epoch": 0.6047970720867917, - "grad_norm": 0.45659109950065613, - "learning_rate": 9.176550491303571e-06, - "loss": 0.3924, - "step": 9254 - }, - { - "epoch": 0.6048624272923339, - "grad_norm": 0.4223993420600891, - "learning_rate": 9.176358503756844e-06, - "loss": 0.3316, - "step": 9255 - }, - { - "epoch": 0.6049277824978759, - "grad_norm": 0.47225451469421387, - "learning_rate": 9.176166495840569e-06, - "loss": 0.4003, - "step": 9256 - }, - { - "epoch": 0.6049931377034181, - "grad_norm": 0.4650261402130127, - "learning_rate": 9.175974467555682e-06, - "loss": 0.38, - "step": 9257 - }, - { - "epoch": 0.6050584929089602, - "grad_norm": 0.4409800171852112, - "learning_rate": 9.175782418903122e-06, - "loss": 0.3384, - "step": 9258 - }, - { - "epoch": 0.6051238481145024, - "grad_norm": 0.47074276208877563, - "learning_rate": 9.175590349883825e-06, - "loss": 0.4261, - "step": 9259 - }, - { - "epoch": 0.6051892033200444, - "grad_norm": 0.4849025309085846, - "learning_rate": 9.175398260498728e-06, - "loss": 0.4337, - "step": 9260 - }, - { - "epoch": 0.6052545585255865, - "grad_norm": 0.47750529646873474, - "learning_rate": 9.175206150748766e-06, - "loss": 0.4175, - "step": 9261 - }, - { - "epoch": 0.6053199137311287, - "grad_norm": 0.43006545305252075, - "learning_rate": 9.175014020634877e-06, - "loss": 0.3488, - "step": 9262 - }, - { - "epoch": 0.6053852689366708, - "grad_norm": 0.5229668617248535, - "learning_rate": 9.174821870158e-06, - "loss": 0.4438, - "step": 9263 - }, - { - "epoch": 0.605450624142213, - "grad_norm": 0.4007505476474762, - "learning_rate": 9.174629699319068e-06, - "loss": 0.3662, - "step": 9264 - }, - { - "epoch": 0.605515979347755, - "grad_norm": 0.4569067060947418, - "learning_rate": 9.174437508119022e-06, - "loss": 0.399, - "step": 9265 - }, - { - "epoch": 0.6055813345532972, - "grad_norm": 0.5018656849861145, - "learning_rate": 9.174245296558797e-06, - "loss": 0.4844, - "step": 9266 - }, - { - "epoch": 0.6056466897588393, - "grad_norm": 0.4632817506790161, - "learning_rate": 9.174053064639333e-06, - "loss": 0.4058, - "step": 9267 - }, - { - "epoch": 0.6057120449643814, - "grad_norm": 0.4557321071624756, - "learning_rate": 9.173860812361565e-06, - "loss": 0.3833, - "step": 9268 - }, - { - "epoch": 0.6057774001699235, - "grad_norm": 0.4242776930332184, - "learning_rate": 9.173668539726432e-06, - "loss": 0.3411, - "step": 9269 - }, - { - "epoch": 0.6058427553754656, - "grad_norm": 0.4838681221008301, - "learning_rate": 9.173476246734874e-06, - "loss": 0.4205, - "step": 9270 - }, - { - "epoch": 0.6059081105810078, - "grad_norm": 0.46381068229675293, - "learning_rate": 9.173283933387825e-06, - "loss": 0.3974, - "step": 9271 - }, - { - "epoch": 0.6059734657865499, - "grad_norm": 0.45779410004615784, - "learning_rate": 9.173091599686224e-06, - "loss": 0.397, - "step": 9272 - }, - { - "epoch": 0.606038820992092, - "grad_norm": 0.43464165925979614, - "learning_rate": 9.172899245631009e-06, - "loss": 0.3662, - "step": 9273 - }, - { - "epoch": 0.6061041761976341, - "grad_norm": 0.44983839988708496, - "learning_rate": 9.172706871223118e-06, - "loss": 0.3824, - "step": 9274 - }, - { - "epoch": 0.6061695314031763, - "grad_norm": 0.44361603260040283, - "learning_rate": 9.172514476463492e-06, - "loss": 0.3846, - "step": 9275 - }, - { - "epoch": 0.6062348866087184, - "grad_norm": 0.4228716790676117, - "learning_rate": 9.172322061353067e-06, - "loss": 0.3398, - "step": 9276 - }, - { - "epoch": 0.6063002418142605, - "grad_norm": 0.4249505400657654, - "learning_rate": 9.172129625892783e-06, - "loss": 0.3482, - "step": 9277 - }, - { - "epoch": 0.6063655970198026, - "grad_norm": 0.4705303907394409, - "learning_rate": 9.171937170083576e-06, - "loss": 0.3762, - "step": 9278 - }, - { - "epoch": 0.6064309522253447, - "grad_norm": 0.47127071022987366, - "learning_rate": 9.171744693926385e-06, - "loss": 0.4079, - "step": 9279 - }, - { - "epoch": 0.6064963074308869, - "grad_norm": 0.471608966588974, - "learning_rate": 9.171552197422152e-06, - "loss": 0.4513, - "step": 9280 - }, - { - "epoch": 0.606561662636429, - "grad_norm": 0.49204134941101074, - "learning_rate": 9.171359680571813e-06, - "loss": 0.4552, - "step": 9281 - }, - { - "epoch": 0.6066270178419712, - "grad_norm": 0.44507670402526855, - "learning_rate": 9.171167143376307e-06, - "loss": 0.3592, - "step": 9282 - }, - { - "epoch": 0.6066923730475132, - "grad_norm": 0.43678945302963257, - "learning_rate": 9.170974585836575e-06, - "loss": 0.3695, - "step": 9283 - }, - { - "epoch": 0.6067577282530554, - "grad_norm": 0.44916483759880066, - "learning_rate": 9.170782007953554e-06, - "loss": 0.399, - "step": 9284 - }, - { - "epoch": 0.6068230834585975, - "grad_norm": 0.4740779995918274, - "learning_rate": 9.170589409728185e-06, - "loss": 0.4306, - "step": 9285 - }, - { - "epoch": 0.6068884386641396, - "grad_norm": 0.481114000082016, - "learning_rate": 9.170396791161407e-06, - "loss": 0.4212, - "step": 9286 - }, - { - "epoch": 0.6069537938696817, - "grad_norm": 0.46107128262519836, - "learning_rate": 9.170204152254159e-06, - "loss": 0.4126, - "step": 9287 - }, - { - "epoch": 0.6070191490752238, - "grad_norm": 0.39378905296325684, - "learning_rate": 9.170011493007379e-06, - "loss": 0.3048, - "step": 9288 - }, - { - "epoch": 0.607084504280766, - "grad_norm": 0.5063765645027161, - "learning_rate": 9.16981881342201e-06, - "loss": 0.4835, - "step": 9289 - }, - { - "epoch": 0.6071498594863081, - "grad_norm": 0.45550718903541565, - "learning_rate": 9.16962611349899e-06, - "loss": 0.3817, - "step": 9290 - }, - { - "epoch": 0.6072152146918502, - "grad_norm": 0.5076361894607544, - "learning_rate": 9.169433393239258e-06, - "loss": 0.4454, - "step": 9291 - }, - { - "epoch": 0.6072805698973923, - "grad_norm": 0.43833643198013306, - "learning_rate": 9.169240652643756e-06, - "loss": 0.3643, - "step": 9292 - }, - { - "epoch": 0.6073459251029345, - "grad_norm": 0.40341854095458984, - "learning_rate": 9.169047891713422e-06, - "loss": 0.3335, - "step": 9293 - }, - { - "epoch": 0.6074112803084766, - "grad_norm": 0.4650060832500458, - "learning_rate": 9.168855110449198e-06, - "loss": 0.375, - "step": 9294 - }, - { - "epoch": 0.6074766355140186, - "grad_norm": 0.47409045696258545, - "learning_rate": 9.168662308852021e-06, - "loss": 0.3964, - "step": 9295 - }, - { - "epoch": 0.6075419907195608, - "grad_norm": 0.4652184247970581, - "learning_rate": 9.168469486922838e-06, - "loss": 0.3611, - "step": 9296 - }, - { - "epoch": 0.6076073459251029, - "grad_norm": 0.4325246810913086, - "learning_rate": 9.168276644662581e-06, - "loss": 0.3591, - "step": 9297 - }, - { - "epoch": 0.6076727011306451, - "grad_norm": 0.4370783567428589, - "learning_rate": 9.168083782072196e-06, - "loss": 0.334, - "step": 9298 - }, - { - "epoch": 0.6077380563361872, - "grad_norm": 0.46329495310783386, - "learning_rate": 9.167890899152624e-06, - "loss": 0.4051, - "step": 9299 - }, - { - "epoch": 0.6078034115417293, - "grad_norm": 0.43963465094566345, - "learning_rate": 9.167697995904802e-06, - "loss": 0.3603, - "step": 9300 - }, - { - "epoch": 0.6078687667472714, - "grad_norm": 0.4647124707698822, - "learning_rate": 9.167505072329677e-06, - "loss": 0.3787, - "step": 9301 - }, - { - "epoch": 0.6079341219528135, - "grad_norm": 0.5020391345024109, - "learning_rate": 9.167312128428181e-06, - "loss": 0.4252, - "step": 9302 - }, - { - "epoch": 0.6079994771583557, - "grad_norm": 0.45752403140068054, - "learning_rate": 9.167119164201263e-06, - "loss": 0.4102, - "step": 9303 - }, - { - "epoch": 0.6080648323638977, - "grad_norm": 0.44788506627082825, - "learning_rate": 9.16692617964986e-06, - "loss": 0.3645, - "step": 9304 - }, - { - "epoch": 0.6081301875694399, - "grad_norm": 0.42570140957832336, - "learning_rate": 9.166733174774915e-06, - "loss": 0.3683, - "step": 9305 - }, - { - "epoch": 0.608195542774982, - "grad_norm": 0.4321891963481903, - "learning_rate": 9.166540149577369e-06, - "loss": 0.3538, - "step": 9306 - }, - { - "epoch": 0.6082608979805242, - "grad_norm": 0.4512529671192169, - "learning_rate": 9.166347104058164e-06, - "loss": 0.3583, - "step": 9307 - }, - { - "epoch": 0.6083262531860663, - "grad_norm": 0.4835556149482727, - "learning_rate": 9.16615403821824e-06, - "loss": 0.378, - "step": 9308 - }, - { - "epoch": 0.6083916083916084, - "grad_norm": 0.48107215762138367, - "learning_rate": 9.165960952058538e-06, - "loss": 0.4056, - "step": 9309 - }, - { - "epoch": 0.6084569635971505, - "grad_norm": 0.48607125878334045, - "learning_rate": 9.165767845580004e-06, - "loss": 0.4121, - "step": 9310 - }, - { - "epoch": 0.6085223188026926, - "grad_norm": 0.453477144241333, - "learning_rate": 9.165574718783575e-06, - "loss": 0.4037, - "step": 9311 - }, - { - "epoch": 0.6085876740082348, - "grad_norm": 0.4463033676147461, - "learning_rate": 9.165381571670195e-06, - "loss": 0.3856, - "step": 9312 - }, - { - "epoch": 0.6086530292137768, - "grad_norm": 0.4055895209312439, - "learning_rate": 9.165188404240808e-06, - "loss": 0.3262, - "step": 9313 - }, - { - "epoch": 0.608718384419319, - "grad_norm": 0.4310225546360016, - "learning_rate": 9.164995216496354e-06, - "loss": 0.3695, - "step": 9314 - }, - { - "epoch": 0.6087837396248611, - "grad_norm": 0.48405686020851135, - "learning_rate": 9.164802008437772e-06, - "loss": 0.3959, - "step": 9315 - }, - { - "epoch": 0.6088490948304033, - "grad_norm": 0.4695260524749756, - "learning_rate": 9.164608780066011e-06, - "loss": 0.4043, - "step": 9316 - }, - { - "epoch": 0.6089144500359454, - "grad_norm": 0.4271462559700012, - "learning_rate": 9.164415531382009e-06, - "loss": 0.3703, - "step": 9317 - }, - { - "epoch": 0.6089798052414875, - "grad_norm": 0.4622548520565033, - "learning_rate": 9.16422226238671e-06, - "loss": 0.3775, - "step": 9318 - }, - { - "epoch": 0.6090451604470296, - "grad_norm": 0.4441933035850525, - "learning_rate": 9.164028973081057e-06, - "loss": 0.3782, - "step": 9319 - }, - { - "epoch": 0.6091105156525717, - "grad_norm": 0.45878997445106506, - "learning_rate": 9.163835663465992e-06, - "loss": 0.3747, - "step": 9320 - }, - { - "epoch": 0.6091758708581139, - "grad_norm": 0.45647621154785156, - "learning_rate": 9.163642333542457e-06, - "loss": 0.3813, - "step": 9321 - }, - { - "epoch": 0.6092412260636559, - "grad_norm": 0.43723830580711365, - "learning_rate": 9.163448983311396e-06, - "loss": 0.38, - "step": 9322 - }, - { - "epoch": 0.6093065812691981, - "grad_norm": 0.44617873430252075, - "learning_rate": 9.163255612773752e-06, - "loss": 0.3714, - "step": 9323 - }, - { - "epoch": 0.6093719364747402, - "grad_norm": 0.42884573340415955, - "learning_rate": 9.16306222193047e-06, - "loss": 0.3488, - "step": 9324 - }, - { - "epoch": 0.6094372916802824, - "grad_norm": 0.4828496277332306, - "learning_rate": 9.16286881078249e-06, - "loss": 0.4345, - "step": 9325 - }, - { - "epoch": 0.6095026468858245, - "grad_norm": 0.46828553080558777, - "learning_rate": 9.162675379330757e-06, - "loss": 0.4007, - "step": 9326 - }, - { - "epoch": 0.6095680020913665, - "grad_norm": 0.4563262164592743, - "learning_rate": 9.162481927576213e-06, - "loss": 0.3796, - "step": 9327 - }, - { - "epoch": 0.6096333572969087, - "grad_norm": 0.408500075340271, - "learning_rate": 9.162288455519803e-06, - "loss": 0.3276, - "step": 9328 - }, - { - "epoch": 0.6096987125024508, - "grad_norm": 0.5024822354316711, - "learning_rate": 9.16209496316247e-06, - "loss": 0.337, - "step": 9329 - }, - { - "epoch": 0.609764067707993, - "grad_norm": 0.47403833270072937, - "learning_rate": 9.16190145050516e-06, - "loss": 0.3705, - "step": 9330 - }, - { - "epoch": 0.609829422913535, - "grad_norm": 0.45196640491485596, - "learning_rate": 9.161707917548813e-06, - "loss": 0.385, - "step": 9331 - }, - { - "epoch": 0.6098947781190772, - "grad_norm": 0.47436442971229553, - "learning_rate": 9.161514364294373e-06, - "loss": 0.4057, - "step": 9332 - }, - { - "epoch": 0.6099601333246193, - "grad_norm": 0.4377812445163727, - "learning_rate": 9.16132079074279e-06, - "loss": 0.3866, - "step": 9333 - }, - { - "epoch": 0.6100254885301615, - "grad_norm": 0.45032066106796265, - "learning_rate": 9.161127196895e-06, - "loss": 0.401, - "step": 9334 - }, - { - "epoch": 0.6100908437357035, - "grad_norm": 0.46344923973083496, - "learning_rate": 9.160933582751953e-06, - "loss": 0.3959, - "step": 9335 - }, - { - "epoch": 0.6101561989412456, - "grad_norm": 0.4364248812198639, - "learning_rate": 9.160739948314591e-06, - "loss": 0.3634, - "step": 9336 - }, - { - "epoch": 0.6102215541467878, - "grad_norm": 0.46375197172164917, - "learning_rate": 9.160546293583858e-06, - "loss": 0.391, - "step": 9337 - }, - { - "epoch": 0.6102869093523299, - "grad_norm": 0.4995439946651459, - "learning_rate": 9.160352618560702e-06, - "loss": 0.4723, - "step": 9338 - }, - { - "epoch": 0.6103522645578721, - "grad_norm": 0.4590868651866913, - "learning_rate": 9.160158923246062e-06, - "loss": 0.3908, - "step": 9339 - }, - { - "epoch": 0.6104176197634141, - "grad_norm": 0.46573808789253235, - "learning_rate": 9.159965207640889e-06, - "loss": 0.3823, - "step": 9340 - }, - { - "epoch": 0.6104829749689563, - "grad_norm": 0.425341933965683, - "learning_rate": 9.159771471746122e-06, - "loss": 0.359, - "step": 9341 - }, - { - "epoch": 0.6105483301744984, - "grad_norm": 0.42857423424720764, - "learning_rate": 9.159577715562709e-06, - "loss": 0.374, - "step": 9342 - }, - { - "epoch": 0.6106136853800406, - "grad_norm": 0.41146010160446167, - "learning_rate": 9.159383939091594e-06, - "loss": 0.3417, - "step": 9343 - }, - { - "epoch": 0.6106790405855826, - "grad_norm": 0.4427056610584259, - "learning_rate": 9.159190142333724e-06, - "loss": 0.3579, - "step": 9344 - }, - { - "epoch": 0.6107443957911247, - "grad_norm": 0.38539740443229675, - "learning_rate": 9.158996325290041e-06, - "loss": 0.2726, - "step": 9345 - }, - { - "epoch": 0.6108097509966669, - "grad_norm": 0.44148820638656616, - "learning_rate": 9.158802487961493e-06, - "loss": 0.3861, - "step": 9346 - }, - { - "epoch": 0.610875106202209, - "grad_norm": 0.458450585603714, - "learning_rate": 9.158608630349025e-06, - "loss": 0.3775, - "step": 9347 - }, - { - "epoch": 0.6109404614077512, - "grad_norm": 0.4560156464576721, - "learning_rate": 9.158414752453582e-06, - "loss": 0.4279, - "step": 9348 - }, - { - "epoch": 0.6110058166132932, - "grad_norm": 0.44887226819992065, - "learning_rate": 9.158220854276108e-06, - "loss": 0.3703, - "step": 9349 - }, - { - "epoch": 0.6110711718188354, - "grad_norm": 0.4397490918636322, - "learning_rate": 9.158026935817552e-06, - "loss": 0.3885, - "step": 9350 - }, - { - "epoch": 0.6111365270243775, - "grad_norm": 0.4468318819999695, - "learning_rate": 9.157832997078859e-06, - "loss": 0.3997, - "step": 9351 - }, - { - "epoch": 0.6112018822299197, - "grad_norm": 0.4876280128955841, - "learning_rate": 9.157639038060972e-06, - "loss": 0.4132, - "step": 9352 - }, - { - "epoch": 0.6112672374354617, - "grad_norm": 0.4092002213001251, - "learning_rate": 9.15744505876484e-06, - "loss": 0.3486, - "step": 9353 - }, - { - "epoch": 0.6113325926410038, - "grad_norm": 0.4698246717453003, - "learning_rate": 9.157251059191408e-06, - "loss": 0.4555, - "step": 9354 - }, - { - "epoch": 0.611397947846546, - "grad_norm": 0.45148321986198425, - "learning_rate": 9.15705703934162e-06, - "loss": 0.4153, - "step": 9355 - }, - { - "epoch": 0.6114633030520881, - "grad_norm": 0.4464689791202545, - "learning_rate": 9.15686299921643e-06, - "loss": 0.4065, - "step": 9356 - }, - { - "epoch": 0.6115286582576303, - "grad_norm": 0.4503811001777649, - "learning_rate": 9.156668938816776e-06, - "loss": 0.4099, - "step": 9357 - }, - { - "epoch": 0.6115940134631723, - "grad_norm": 0.4475444555282593, - "learning_rate": 9.156474858143607e-06, - "loss": 0.3891, - "step": 9358 - }, - { - "epoch": 0.6116593686687145, - "grad_norm": 0.4001803398132324, - "learning_rate": 9.15628075719787e-06, - "loss": 0.2989, - "step": 9359 - }, - { - "epoch": 0.6117247238742566, - "grad_norm": 0.4261928200721741, - "learning_rate": 9.156086635980515e-06, - "loss": 0.3362, - "step": 9360 - }, - { - "epoch": 0.6117900790797987, - "grad_norm": 0.46899622678756714, - "learning_rate": 9.155892494492483e-06, - "loss": 0.3836, - "step": 9361 - }, - { - "epoch": 0.6118554342853408, - "grad_norm": 0.4666925370693207, - "learning_rate": 9.155698332734724e-06, - "loss": 0.3707, - "step": 9362 - }, - { - "epoch": 0.6119207894908829, - "grad_norm": 0.44752249121665955, - "learning_rate": 9.155504150708183e-06, - "loss": 0.3341, - "step": 9363 - }, - { - "epoch": 0.6119861446964251, - "grad_norm": 0.4533834159374237, - "learning_rate": 9.15530994841381e-06, - "loss": 0.3728, - "step": 9364 - }, - { - "epoch": 0.6120514999019672, - "grad_norm": 0.45615315437316895, - "learning_rate": 9.155115725852552e-06, - "loss": 0.4314, - "step": 9365 - }, - { - "epoch": 0.6121168551075094, - "grad_norm": 0.448549747467041, - "learning_rate": 9.154921483025355e-06, - "loss": 0.375, - "step": 9366 - }, - { - "epoch": 0.6121822103130514, - "grad_norm": 0.4391409158706665, - "learning_rate": 9.154727219933165e-06, - "loss": 0.3772, - "step": 9367 - }, - { - "epoch": 0.6122475655185936, - "grad_norm": 0.457964152097702, - "learning_rate": 9.154532936576931e-06, - "loss": 0.4139, - "step": 9368 - }, - { - "epoch": 0.6123129207241357, - "grad_norm": 0.4666399657726288, - "learning_rate": 9.154338632957603e-06, - "loss": 0.4676, - "step": 9369 - }, - { - "epoch": 0.6123782759296778, - "grad_norm": 0.4332391023635864, - "learning_rate": 9.154144309076124e-06, - "loss": 0.3582, - "step": 9370 - }, - { - "epoch": 0.6124436311352199, - "grad_norm": 0.450079083442688, - "learning_rate": 9.153949964933445e-06, - "loss": 0.3465, - "step": 9371 - }, - { - "epoch": 0.612508986340762, - "grad_norm": 0.46680015325546265, - "learning_rate": 9.153755600530512e-06, - "loss": 0.3781, - "step": 9372 - }, - { - "epoch": 0.6125743415463042, - "grad_norm": 0.5027528405189514, - "learning_rate": 9.153561215868274e-06, - "loss": 0.4332, - "step": 9373 - }, - { - "epoch": 0.6126396967518463, - "grad_norm": 0.4723973274230957, - "learning_rate": 9.15336681094768e-06, - "loss": 0.4151, - "step": 9374 - }, - { - "epoch": 0.6127050519573884, - "grad_norm": 0.5401503443717957, - "learning_rate": 9.153172385769678e-06, - "loss": 0.5215, - "step": 9375 - }, - { - "epoch": 0.6127704071629305, - "grad_norm": 0.46017375588417053, - "learning_rate": 9.152977940335213e-06, - "loss": 0.3931, - "step": 9376 - }, - { - "epoch": 0.6128357623684727, - "grad_norm": 0.46815547347068787, - "learning_rate": 9.152783474645237e-06, - "loss": 0.3997, - "step": 9377 - }, - { - "epoch": 0.6129011175740148, - "grad_norm": 0.4949282109737396, - "learning_rate": 9.152588988700697e-06, - "loss": 0.4594, - "step": 9378 - }, - { - "epoch": 0.6129664727795568, - "grad_norm": 0.4952649772167206, - "learning_rate": 9.152394482502543e-06, - "loss": 0.4142, - "step": 9379 - }, - { - "epoch": 0.613031827985099, - "grad_norm": 0.4407411217689514, - "learning_rate": 9.152199956051721e-06, - "loss": 0.3609, - "step": 9380 - }, - { - "epoch": 0.6130971831906411, - "grad_norm": 0.4774309992790222, - "learning_rate": 9.152005409349182e-06, - "loss": 0.4013, - "step": 9381 - }, - { - "epoch": 0.6131625383961833, - "grad_norm": 0.44647300243377686, - "learning_rate": 9.151810842395876e-06, - "loss": 0.3817, - "step": 9382 - }, - { - "epoch": 0.6132278936017254, - "grad_norm": 0.4410271644592285, - "learning_rate": 9.151616255192749e-06, - "loss": 0.3861, - "step": 9383 - }, - { - "epoch": 0.6132932488072675, - "grad_norm": 0.46410071849823, - "learning_rate": 9.151421647740751e-06, - "loss": 0.3684, - "step": 9384 - }, - { - "epoch": 0.6133586040128096, - "grad_norm": 0.4376247525215149, - "learning_rate": 9.151227020040832e-06, - "loss": 0.3688, - "step": 9385 - }, - { - "epoch": 0.6134239592183517, - "grad_norm": 0.4442978501319885, - "learning_rate": 9.15103237209394e-06, - "loss": 0.386, - "step": 9386 - }, - { - "epoch": 0.6134893144238939, - "grad_norm": 0.560441792011261, - "learning_rate": 9.150837703901025e-06, - "loss": 0.4521, - "step": 9387 - }, - { - "epoch": 0.613554669629436, - "grad_norm": 0.4365333616733551, - "learning_rate": 9.150643015463036e-06, - "loss": 0.3634, - "step": 9388 - }, - { - "epoch": 0.6136200248349781, - "grad_norm": 0.4355616867542267, - "learning_rate": 9.150448306780923e-06, - "loss": 0.3559, - "step": 9389 - }, - { - "epoch": 0.6136853800405202, - "grad_norm": 0.4717211127281189, - "learning_rate": 9.150253577855637e-06, - "loss": 0.426, - "step": 9390 - }, - { - "epoch": 0.6137507352460624, - "grad_norm": 0.451812207698822, - "learning_rate": 9.150058828688127e-06, - "loss": 0.395, - "step": 9391 - }, - { - "epoch": 0.6138160904516045, - "grad_norm": 0.4561154842376709, - "learning_rate": 9.14986405927934e-06, - "loss": 0.396, - "step": 9392 - }, - { - "epoch": 0.6138814456571466, - "grad_norm": 0.4951837956905365, - "learning_rate": 9.14966926963023e-06, - "loss": 0.455, - "step": 9393 - }, - { - "epoch": 0.6139468008626887, - "grad_norm": 0.4486224949359894, - "learning_rate": 9.149474459741747e-06, - "loss": 0.3446, - "step": 9394 - }, - { - "epoch": 0.6140121560682308, - "grad_norm": 0.4535962641239166, - "learning_rate": 9.149279629614836e-06, - "loss": 0.4157, - "step": 9395 - }, - { - "epoch": 0.614077511273773, - "grad_norm": 0.5602654814720154, - "learning_rate": 9.149084779250453e-06, - "loss": 0.3312, - "step": 9396 - }, - { - "epoch": 0.614142866479315, - "grad_norm": 0.4514555335044861, - "learning_rate": 9.148889908649546e-06, - "loss": 0.349, - "step": 9397 - }, - { - "epoch": 0.6142082216848572, - "grad_norm": 0.4832721948623657, - "learning_rate": 9.148695017813065e-06, - "loss": 0.4652, - "step": 9398 - }, - { - "epoch": 0.6142735768903993, - "grad_norm": 0.4278484284877777, - "learning_rate": 9.148500106741963e-06, - "loss": 0.3439, - "step": 9399 - }, - { - "epoch": 0.6143389320959415, - "grad_norm": 0.7185001969337463, - "learning_rate": 9.148305175437187e-06, - "loss": 0.4237, - "step": 9400 - }, - { - "epoch": 0.6144042873014836, - "grad_norm": 0.48224517703056335, - "learning_rate": 9.148110223899689e-06, - "loss": 0.4252, - "step": 9401 - }, - { - "epoch": 0.6144696425070257, - "grad_norm": 0.47076553106307983, - "learning_rate": 9.147915252130421e-06, - "loss": 0.3946, - "step": 9402 - }, - { - "epoch": 0.6145349977125678, - "grad_norm": 0.4633639454841614, - "learning_rate": 9.147720260130332e-06, - "loss": 0.3961, - "step": 9403 - }, - { - "epoch": 0.6146003529181099, - "grad_norm": 0.43805554509162903, - "learning_rate": 9.147525247900377e-06, - "loss": 0.3479, - "step": 9404 - }, - { - "epoch": 0.6146657081236521, - "grad_norm": 0.4406869411468506, - "learning_rate": 9.147330215441504e-06, - "loss": 0.3766, - "step": 9405 - }, - { - "epoch": 0.6147310633291941, - "grad_norm": 0.44196370244026184, - "learning_rate": 9.147135162754663e-06, - "loss": 0.3773, - "step": 9406 - }, - { - "epoch": 0.6147964185347363, - "grad_norm": 0.4745652675628662, - "learning_rate": 9.146940089840809e-06, - "loss": 0.353, - "step": 9407 - }, - { - "epoch": 0.6148617737402784, - "grad_norm": 0.41446709632873535, - "learning_rate": 9.146744996700891e-06, - "loss": 0.3251, - "step": 9408 - }, - { - "epoch": 0.6149271289458206, - "grad_norm": 0.45017939805984497, - "learning_rate": 9.14654988333586e-06, - "loss": 0.4431, - "step": 9409 - }, - { - "epoch": 0.6149924841513627, - "grad_norm": 0.45253807306289673, - "learning_rate": 9.146354749746672e-06, - "loss": 0.3608, - "step": 9410 - }, - { - "epoch": 0.6150578393569047, - "grad_norm": 0.4453875422477722, - "learning_rate": 9.146159595934272e-06, - "loss": 0.378, - "step": 9411 - }, - { - "epoch": 0.6151231945624469, - "grad_norm": 0.4556255638599396, - "learning_rate": 9.145964421899617e-06, - "loss": 0.3905, - "step": 9412 - }, - { - "epoch": 0.615188549767989, - "grad_norm": 0.4538557529449463, - "learning_rate": 9.145769227643655e-06, - "loss": 0.4013, - "step": 9413 - }, - { - "epoch": 0.6152539049735312, - "grad_norm": 0.43004491925239563, - "learning_rate": 9.145574013167342e-06, - "loss": 0.3288, - "step": 9414 - }, - { - "epoch": 0.6153192601790732, - "grad_norm": 0.4410993754863739, - "learning_rate": 9.14537877847163e-06, - "loss": 0.3944, - "step": 9415 - }, - { - "epoch": 0.6153846153846154, - "grad_norm": 0.4260723888874054, - "learning_rate": 9.145183523557465e-06, - "loss": 0.3568, - "step": 9416 - }, - { - "epoch": 0.6154499705901575, - "grad_norm": 0.478514164686203, - "learning_rate": 9.144988248425807e-06, - "loss": 0.4101, - "step": 9417 - }, - { - "epoch": 0.6155153257956997, - "grad_norm": 0.4262210428714752, - "learning_rate": 9.144792953077605e-06, - "loss": 0.3751, - "step": 9418 - }, - { - "epoch": 0.6155806810012417, - "grad_norm": 0.4362979829311371, - "learning_rate": 9.144597637513814e-06, - "loss": 0.3664, - "step": 9419 - }, - { - "epoch": 0.6156460362067838, - "grad_norm": 0.41937923431396484, - "learning_rate": 9.144402301735383e-06, - "loss": 0.3305, - "step": 9420 - }, - { - "epoch": 0.615711391412326, - "grad_norm": 0.4549425542354584, - "learning_rate": 9.144206945743264e-06, - "loss": 0.3817, - "step": 9421 - }, - { - "epoch": 0.6157767466178681, - "grad_norm": 0.43009620904922485, - "learning_rate": 9.144011569538414e-06, - "loss": 0.3465, - "step": 9422 - }, - { - "epoch": 0.6158421018234103, - "grad_norm": 0.5174602270126343, - "learning_rate": 9.143816173121785e-06, - "loss": 0.4855, - "step": 9423 - }, - { - "epoch": 0.6159074570289523, - "grad_norm": 0.44756612181663513, - "learning_rate": 9.143620756494327e-06, - "loss": 0.4159, - "step": 9424 - }, - { - "epoch": 0.6159728122344945, - "grad_norm": 0.47683805227279663, - "learning_rate": 9.143425319656995e-06, - "loss": 0.4467, - "step": 9425 - }, - { - "epoch": 0.6160381674400366, - "grad_norm": 0.4673953652381897, - "learning_rate": 9.143229862610742e-06, - "loss": 0.4009, - "step": 9426 - }, - { - "epoch": 0.6161035226455788, - "grad_norm": 0.45060044527053833, - "learning_rate": 9.143034385356525e-06, - "loss": 0.3755, - "step": 9427 - }, - { - "epoch": 0.6161688778511208, - "grad_norm": 0.4504525065422058, - "learning_rate": 9.14283888789529e-06, - "loss": 0.349, - "step": 9428 - }, - { - "epoch": 0.6162342330566629, - "grad_norm": 0.44463473558425903, - "learning_rate": 9.142643370227997e-06, - "loss": 0.3961, - "step": 9429 - }, - { - "epoch": 0.6162995882622051, - "grad_norm": 0.45240074396133423, - "learning_rate": 9.142447832355595e-06, - "loss": 0.4041, - "step": 9430 - }, - { - "epoch": 0.6163649434677472, - "grad_norm": 0.44422590732574463, - "learning_rate": 9.142252274279042e-06, - "loss": 0.3887, - "step": 9431 - }, - { - "epoch": 0.6164302986732894, - "grad_norm": 0.4349067211151123, - "learning_rate": 9.142056695999288e-06, - "loss": 0.3611, - "step": 9432 - }, - { - "epoch": 0.6164956538788314, - "grad_norm": 0.430692195892334, - "learning_rate": 9.14186109751729e-06, - "loss": 0.404, - "step": 9433 - }, - { - "epoch": 0.6165610090843736, - "grad_norm": 0.44676336646080017, - "learning_rate": 9.141665478834e-06, - "loss": 0.3599, - "step": 9434 - }, - { - "epoch": 0.6166263642899157, - "grad_norm": 0.4970923364162445, - "learning_rate": 9.141469839950372e-06, - "loss": 0.4883, - "step": 9435 - }, - { - "epoch": 0.6166917194954579, - "grad_norm": 0.4341626465320587, - "learning_rate": 9.141274180867361e-06, - "loss": 0.3818, - "step": 9436 - }, - { - "epoch": 0.6167570747009999, - "grad_norm": 0.4820190966129303, - "learning_rate": 9.141078501585921e-06, - "loss": 0.4028, - "step": 9437 - }, - { - "epoch": 0.616822429906542, - "grad_norm": 0.5133317112922668, - "learning_rate": 9.140882802107007e-06, - "loss": 0.447, - "step": 9438 - }, - { - "epoch": 0.6168877851120842, - "grad_norm": 0.42152732610702515, - "learning_rate": 9.140687082431574e-06, - "loss": 0.3504, - "step": 9439 - }, - { - "epoch": 0.6169531403176263, - "grad_norm": 0.4819971024990082, - "learning_rate": 9.140491342560575e-06, - "loss": 0.4251, - "step": 9440 - }, - { - "epoch": 0.6170184955231685, - "grad_norm": 0.4538039267063141, - "learning_rate": 9.140295582494965e-06, - "loss": 0.4082, - "step": 9441 - }, - { - "epoch": 0.6170838507287105, - "grad_norm": 0.42919978499412537, - "learning_rate": 9.140099802235699e-06, - "loss": 0.3378, - "step": 9442 - }, - { - "epoch": 0.6171492059342527, - "grad_norm": 0.4664906859397888, - "learning_rate": 9.139904001783732e-06, - "loss": 0.3922, - "step": 9443 - }, - { - "epoch": 0.6172145611397948, - "grad_norm": 0.47128826379776, - "learning_rate": 9.139708181140019e-06, - "loss": 0.3906, - "step": 9444 - }, - { - "epoch": 0.6172799163453369, - "grad_norm": 0.45355427265167236, - "learning_rate": 9.139512340305516e-06, - "loss": 0.4102, - "step": 9445 - }, - { - "epoch": 0.617345271550879, - "grad_norm": 0.4564480185508728, - "learning_rate": 9.139316479281175e-06, - "loss": 0.3898, - "step": 9446 - }, - { - "epoch": 0.6174106267564211, - "grad_norm": 0.4544818699359894, - "learning_rate": 9.139120598067955e-06, - "loss": 0.3651, - "step": 9447 - }, - { - "epoch": 0.6174759819619633, - "grad_norm": 0.41902658343315125, - "learning_rate": 9.13892469666681e-06, - "loss": 0.3753, - "step": 9448 - }, - { - "epoch": 0.6175413371675054, - "grad_norm": 0.4443846046924591, - "learning_rate": 9.138728775078695e-06, - "loss": 0.3739, - "step": 9449 - }, - { - "epoch": 0.6176066923730475, - "grad_norm": 0.42077818512916565, - "learning_rate": 9.138532833304567e-06, - "loss": 0.3499, - "step": 9450 - }, - { - "epoch": 0.6176720475785896, - "grad_norm": 0.4572470188140869, - "learning_rate": 9.13833687134538e-06, - "loss": 0.3819, - "step": 9451 - }, - { - "epoch": 0.6177374027841318, - "grad_norm": 0.5040394067764282, - "learning_rate": 9.138140889202089e-06, - "loss": 0.4459, - "step": 9452 - }, - { - "epoch": 0.6178027579896739, - "grad_norm": 0.44595399498939514, - "learning_rate": 9.137944886875654e-06, - "loss": 0.3474, - "step": 9453 - }, - { - "epoch": 0.617868113195216, - "grad_norm": 0.4859713315963745, - "learning_rate": 9.137748864367026e-06, - "loss": 0.3867, - "step": 9454 - }, - { - "epoch": 0.6179334684007581, - "grad_norm": 0.4658839702606201, - "learning_rate": 9.137552821677164e-06, - "loss": 0.3491, - "step": 9455 - }, - { - "epoch": 0.6179988236063002, - "grad_norm": 0.4738259017467499, - "learning_rate": 9.137356758807025e-06, - "loss": 0.4353, - "step": 9456 - }, - { - "epoch": 0.6180641788118424, - "grad_norm": 0.43269017338752747, - "learning_rate": 9.137160675757561e-06, - "loss": 0.3731, - "step": 9457 - }, - { - "epoch": 0.6181295340173845, - "grad_norm": 0.5084611177444458, - "learning_rate": 9.136964572529734e-06, - "loss": 0.4506, - "step": 9458 - }, - { - "epoch": 0.6181948892229266, - "grad_norm": 0.46275830268859863, - "learning_rate": 9.136768449124495e-06, - "loss": 0.4012, - "step": 9459 - }, - { - "epoch": 0.6182602444284687, - "grad_norm": 0.45646172761917114, - "learning_rate": 9.136572305542806e-06, - "loss": 0.4114, - "step": 9460 - }, - { - "epoch": 0.6183255996340109, - "grad_norm": 0.42141151428222656, - "learning_rate": 9.13637614178562e-06, - "loss": 0.3213, - "step": 9461 - }, - { - "epoch": 0.618390954839553, - "grad_norm": 0.4637605845928192, - "learning_rate": 9.136179957853893e-06, - "loss": 0.3814, - "step": 9462 - }, - { - "epoch": 0.618456310045095, - "grad_norm": 0.449079304933548, - "learning_rate": 9.135983753748582e-06, - "loss": 0.3794, - "step": 9463 - }, - { - "epoch": 0.6185216652506372, - "grad_norm": 0.4622590243816376, - "learning_rate": 9.135787529470649e-06, - "loss": 0.4105, - "step": 9464 - }, - { - "epoch": 0.6185870204561793, - "grad_norm": 0.43435537815093994, - "learning_rate": 9.135591285021045e-06, - "loss": 0.3599, - "step": 9465 - }, - { - "epoch": 0.6186523756617215, - "grad_norm": 0.47140899300575256, - "learning_rate": 9.135395020400733e-06, - "loss": 0.4003, - "step": 9466 - }, - { - "epoch": 0.6187177308672636, - "grad_norm": 0.4565064013004303, - "learning_rate": 9.135198735610664e-06, - "loss": 0.37, - "step": 9467 - }, - { - "epoch": 0.6187830860728057, - "grad_norm": 0.4229309558868408, - "learning_rate": 9.135002430651798e-06, - "loss": 0.3407, - "step": 9468 - }, - { - "epoch": 0.6188484412783478, - "grad_norm": 0.44590333104133606, - "learning_rate": 9.134806105525093e-06, - "loss": 0.3773, - "step": 9469 - }, - { - "epoch": 0.6189137964838899, - "grad_norm": 0.46590927243232727, - "learning_rate": 9.134609760231506e-06, - "loss": 0.3617, - "step": 9470 - }, - { - "epoch": 0.6189791516894321, - "grad_norm": 0.4947170913219452, - "learning_rate": 9.134413394771996e-06, - "loss": 0.4397, - "step": 9471 - }, - { - "epoch": 0.6190445068949741, - "grad_norm": 0.48017892241477966, - "learning_rate": 9.134217009147518e-06, - "loss": 0.3935, - "step": 9472 - }, - { - "epoch": 0.6191098621005163, - "grad_norm": 0.37933260202407837, - "learning_rate": 9.134020603359033e-06, - "loss": 0.287, - "step": 9473 - }, - { - "epoch": 0.6191752173060584, - "grad_norm": 0.4307333528995514, - "learning_rate": 9.133824177407496e-06, - "loss": 0.3453, - "step": 9474 - }, - { - "epoch": 0.6192405725116006, - "grad_norm": 0.4457012712955475, - "learning_rate": 9.133627731293868e-06, - "loss": 0.4216, - "step": 9475 - }, - { - "epoch": 0.6193059277171427, - "grad_norm": 0.4871172308921814, - "learning_rate": 9.133431265019106e-06, - "loss": 0.3958, - "step": 9476 - }, - { - "epoch": 0.6193712829226848, - "grad_norm": 0.43701526522636414, - "learning_rate": 9.133234778584166e-06, - "loss": 0.4003, - "step": 9477 - }, - { - "epoch": 0.6194366381282269, - "grad_norm": 0.4886641204357147, - "learning_rate": 9.133038271990007e-06, - "loss": 0.4187, - "step": 9478 - }, - { - "epoch": 0.619501993333769, - "grad_norm": 0.4610545039176941, - "learning_rate": 9.13284174523759e-06, - "loss": 0.4227, - "step": 9479 - }, - { - "epoch": 0.6195673485393112, - "grad_norm": 0.42697811126708984, - "learning_rate": 9.132645198327871e-06, - "loss": 0.3507, - "step": 9480 - }, - { - "epoch": 0.6196327037448532, - "grad_norm": 0.42546194791793823, - "learning_rate": 9.132448631261813e-06, - "loss": 0.3554, - "step": 9481 - }, - { - "epoch": 0.6196980589503954, - "grad_norm": 0.45359355211257935, - "learning_rate": 9.132252044040368e-06, - "loss": 0.4129, - "step": 9482 - }, - { - "epoch": 0.6197634141559375, - "grad_norm": 0.48987266421318054, - "learning_rate": 9.132055436664499e-06, - "loss": 0.4749, - "step": 9483 - }, - { - "epoch": 0.6198287693614797, - "grad_norm": 0.44682779908180237, - "learning_rate": 9.131858809135165e-06, - "loss": 0.3918, - "step": 9484 - }, - { - "epoch": 0.6198941245670218, - "grad_norm": 0.44950276613235474, - "learning_rate": 9.131662161453325e-06, - "loss": 0.393, - "step": 9485 - }, - { - "epoch": 0.6199594797725639, - "grad_norm": 0.4727022349834442, - "learning_rate": 9.131465493619936e-06, - "loss": 0.3808, - "step": 9486 - }, - { - "epoch": 0.620024834978106, - "grad_norm": 0.4463884234428406, - "learning_rate": 9.131268805635958e-06, - "loss": 0.4073, - "step": 9487 - }, - { - "epoch": 0.6200901901836481, - "grad_norm": 0.47201770544052124, - "learning_rate": 9.131072097502352e-06, - "loss": 0.4237, - "step": 9488 - }, - { - "epoch": 0.6201555453891903, - "grad_norm": 0.47684377431869507, - "learning_rate": 9.130875369220074e-06, - "loss": 0.4357, - "step": 9489 - }, - { - "epoch": 0.6202209005947323, - "grad_norm": 0.4322279095649719, - "learning_rate": 9.130678620790088e-06, - "loss": 0.3504, - "step": 9490 - }, - { - "epoch": 0.6202862558002745, - "grad_norm": 0.5034162998199463, - "learning_rate": 9.130481852213351e-06, - "loss": 0.4291, - "step": 9491 - }, - { - "epoch": 0.6203516110058166, - "grad_norm": 0.426683634519577, - "learning_rate": 9.130285063490822e-06, - "loss": 0.3221, - "step": 9492 - }, - { - "epoch": 0.6204169662113588, - "grad_norm": 0.4612686336040497, - "learning_rate": 9.130088254623462e-06, - "loss": 0.3957, - "step": 9493 - }, - { - "epoch": 0.6204823214169009, - "grad_norm": 0.4370593726634979, - "learning_rate": 9.129891425612232e-06, - "loss": 0.3698, - "step": 9494 - }, - { - "epoch": 0.6205476766224429, - "grad_norm": 0.43835359811782837, - "learning_rate": 9.12969457645809e-06, - "loss": 0.3693, - "step": 9495 - }, - { - "epoch": 0.6206130318279851, - "grad_norm": 0.4201563596725464, - "learning_rate": 9.129497707161998e-06, - "loss": 0.3282, - "step": 9496 - }, - { - "epoch": 0.6206783870335272, - "grad_norm": 0.48881956934928894, - "learning_rate": 9.129300817724914e-06, - "loss": 0.4248, - "step": 9497 - }, - { - "epoch": 0.6207437422390694, - "grad_norm": 0.46462857723236084, - "learning_rate": 9.129103908147798e-06, - "loss": 0.4152, - "step": 9498 - }, - { - "epoch": 0.6208090974446114, - "grad_norm": 0.39168354868888855, - "learning_rate": 9.128906978431615e-06, - "loss": 0.3081, - "step": 9499 - }, - { - "epoch": 0.6208744526501536, - "grad_norm": 0.4305526912212372, - "learning_rate": 9.12871002857732e-06, - "loss": 0.3577, - "step": 9500 - }, - { - "epoch": 0.6209398078556957, - "grad_norm": 0.49615317583084106, - "learning_rate": 9.128513058585877e-06, - "loss": 0.466, - "step": 9501 - }, - { - "epoch": 0.6210051630612379, - "grad_norm": 0.41772010922431946, - "learning_rate": 9.128316068458245e-06, - "loss": 0.3446, - "step": 9502 - }, - { - "epoch": 0.62107051826678, - "grad_norm": 0.455108106136322, - "learning_rate": 9.128119058195385e-06, - "loss": 0.3907, - "step": 9503 - }, - { - "epoch": 0.621135873472322, - "grad_norm": 0.4205496907234192, - "learning_rate": 9.127922027798259e-06, - "loss": 0.3337, - "step": 9504 - }, - { - "epoch": 0.6212012286778642, - "grad_norm": 0.45498114824295044, - "learning_rate": 9.127724977267827e-06, - "loss": 0.4106, - "step": 9505 - }, - { - "epoch": 0.6212665838834063, - "grad_norm": 0.43127432465553284, - "learning_rate": 9.12752790660505e-06, - "loss": 0.3621, - "step": 9506 - }, - { - "epoch": 0.6213319390889485, - "grad_norm": 0.42468681931495667, - "learning_rate": 9.127330815810888e-06, - "loss": 0.3383, - "step": 9507 - }, - { - "epoch": 0.6213972942944905, - "grad_norm": 0.41988471150398254, - "learning_rate": 9.127133704886307e-06, - "loss": 0.3574, - "step": 9508 - }, - { - "epoch": 0.6214626495000327, - "grad_norm": 0.4805356562137604, - "learning_rate": 9.126936573832264e-06, - "loss": 0.3758, - "step": 9509 - }, - { - "epoch": 0.6215280047055748, - "grad_norm": 0.4301708936691284, - "learning_rate": 9.12673942264972e-06, - "loss": 0.3779, - "step": 9510 - }, - { - "epoch": 0.621593359911117, - "grad_norm": 0.4447043836116791, - "learning_rate": 9.126542251339639e-06, - "loss": 0.364, - "step": 9511 - }, - { - "epoch": 0.621658715116659, - "grad_norm": 0.4358770549297333, - "learning_rate": 9.126345059902984e-06, - "loss": 0.3749, - "step": 9512 - }, - { - "epoch": 0.6217240703222011, - "grad_norm": 0.4476655125617981, - "learning_rate": 9.126147848340711e-06, - "loss": 0.3622, - "step": 9513 - }, - { - "epoch": 0.6217894255277433, - "grad_norm": 0.4278465807437897, - "learning_rate": 9.125950616653787e-06, - "loss": 0.3388, - "step": 9514 - }, - { - "epoch": 0.6218547807332854, - "grad_norm": 0.46168437600135803, - "learning_rate": 9.125753364843174e-06, - "loss": 0.3715, - "step": 9515 - }, - { - "epoch": 0.6219201359388276, - "grad_norm": 0.46428367495536804, - "learning_rate": 9.12555609290983e-06, - "loss": 0.3864, - "step": 9516 - }, - { - "epoch": 0.6219854911443696, - "grad_norm": 0.47808322310447693, - "learning_rate": 9.125358800854723e-06, - "loss": 0.3845, - "step": 9517 - }, - { - "epoch": 0.6220508463499118, - "grad_norm": 0.41962409019470215, - "learning_rate": 9.12516148867881e-06, - "loss": 0.3196, - "step": 9518 - }, - { - "epoch": 0.6221162015554539, - "grad_norm": 0.4695814549922943, - "learning_rate": 9.124964156383054e-06, - "loss": 0.4069, - "step": 9519 - }, - { - "epoch": 0.6221815567609961, - "grad_norm": 0.4293384850025177, - "learning_rate": 9.124766803968421e-06, - "loss": 0.3695, - "step": 9520 - }, - { - "epoch": 0.6222469119665381, - "grad_norm": 0.4725792109966278, - "learning_rate": 9.12456943143587e-06, - "loss": 0.4653, - "step": 9521 - }, - { - "epoch": 0.6223122671720802, - "grad_norm": 0.4451233446598053, - "learning_rate": 9.124372038786366e-06, - "loss": 0.36, - "step": 9522 - }, - { - "epoch": 0.6223776223776224, - "grad_norm": 0.46407151222229004, - "learning_rate": 9.124174626020869e-06, - "loss": 0.3954, - "step": 9523 - }, - { - "epoch": 0.6224429775831645, - "grad_norm": 0.4411452114582062, - "learning_rate": 9.123977193140346e-06, - "loss": 0.362, - "step": 9524 - }, - { - "epoch": 0.6225083327887067, - "grad_norm": 0.47463229298591614, - "learning_rate": 9.123779740145758e-06, - "loss": 0.3988, - "step": 9525 - }, - { - "epoch": 0.6225736879942487, - "grad_norm": 0.44011837244033813, - "learning_rate": 9.123582267038064e-06, - "loss": 0.3771, - "step": 9526 - }, - { - "epoch": 0.6226390431997909, - "grad_norm": 0.4925641119480133, - "learning_rate": 9.123384773818234e-06, - "loss": 0.3735, - "step": 9527 - }, - { - "epoch": 0.622704398405333, - "grad_norm": 0.4725680351257324, - "learning_rate": 9.123187260487226e-06, - "loss": 0.4013, - "step": 9528 - }, - { - "epoch": 0.622769753610875, - "grad_norm": 0.4580378532409668, - "learning_rate": 9.122989727046006e-06, - "loss": 0.4369, - "step": 9529 - }, - { - "epoch": 0.6228351088164172, - "grad_norm": 0.5389646291732788, - "learning_rate": 9.122792173495536e-06, - "loss": 0.3723, - "step": 9530 - }, - { - "epoch": 0.6229004640219593, - "grad_norm": 0.42884403467178345, - "learning_rate": 9.122594599836783e-06, - "loss": 0.3548, - "step": 9531 - }, - { - "epoch": 0.6229658192275015, - "grad_norm": 0.4398644268512726, - "learning_rate": 9.122397006070705e-06, - "loss": 0.3535, - "step": 9532 - }, - { - "epoch": 0.6230311744330436, - "grad_norm": 0.45770788192749023, - "learning_rate": 9.12219939219827e-06, - "loss": 0.3725, - "step": 9533 - }, - { - "epoch": 0.6230965296385857, - "grad_norm": 0.43530353903770447, - "learning_rate": 9.12200175822044e-06, - "loss": 0.3573, - "step": 9534 - }, - { - "epoch": 0.6231618848441278, - "grad_norm": 0.44287198781967163, - "learning_rate": 9.121804104138178e-06, - "loss": 0.3484, - "step": 9535 - }, - { - "epoch": 0.62322724004967, - "grad_norm": 0.43603357672691345, - "learning_rate": 9.121606429952453e-06, - "loss": 0.365, - "step": 9536 - }, - { - "epoch": 0.6232925952552121, - "grad_norm": 0.4030922055244446, - "learning_rate": 9.121408735664223e-06, - "loss": 0.3405, - "step": 9537 - }, - { - "epoch": 0.6233579504607542, - "grad_norm": 0.45667538046836853, - "learning_rate": 9.121211021274456e-06, - "loss": 0.3942, - "step": 9538 - }, - { - "epoch": 0.6234233056662963, - "grad_norm": 0.4265606999397278, - "learning_rate": 9.121013286784114e-06, - "loss": 0.372, - "step": 9539 - }, - { - "epoch": 0.6234886608718384, - "grad_norm": 0.4281396269798279, - "learning_rate": 9.120815532194162e-06, - "loss": 0.3541, - "step": 9540 - }, - { - "epoch": 0.6235540160773806, - "grad_norm": 0.439257949590683, - "learning_rate": 9.120617757505568e-06, - "loss": 0.3594, - "step": 9541 - }, - { - "epoch": 0.6236193712829227, - "grad_norm": 0.4113325774669647, - "learning_rate": 9.120419962719291e-06, - "loss": 0.3595, - "step": 9542 - }, - { - "epoch": 0.6236847264884648, - "grad_norm": 0.4289771616458893, - "learning_rate": 9.120222147836299e-06, - "loss": 0.36, - "step": 9543 - }, - { - "epoch": 0.6237500816940069, - "grad_norm": 0.47416773438453674, - "learning_rate": 9.120024312857557e-06, - "loss": 0.3907, - "step": 9544 - }, - { - "epoch": 0.6238154368995491, - "grad_norm": 0.42772915959358215, - "learning_rate": 9.119826457784028e-06, - "loss": 0.3318, - "step": 9545 - }, - { - "epoch": 0.6238807921050912, - "grad_norm": 0.46171560883522034, - "learning_rate": 9.11962858261668e-06, - "loss": 0.4351, - "step": 9546 - }, - { - "epoch": 0.6239461473106332, - "grad_norm": 0.4971867501735687, - "learning_rate": 9.119430687356474e-06, - "loss": 0.4852, - "step": 9547 - }, - { - "epoch": 0.6240115025161754, - "grad_norm": 0.46011096239089966, - "learning_rate": 9.119232772004378e-06, - "loss": 0.3876, - "step": 9548 - }, - { - "epoch": 0.6240768577217175, - "grad_norm": 0.4509351849555969, - "learning_rate": 9.119034836561358e-06, - "loss": 0.3718, - "step": 9549 - }, - { - "epoch": 0.6241422129272597, - "grad_norm": 0.43959373235702515, - "learning_rate": 9.118836881028377e-06, - "loss": 0.3949, - "step": 9550 - }, - { - "epoch": 0.6242075681328018, - "grad_norm": 0.43528345227241516, - "learning_rate": 9.118638905406402e-06, - "loss": 0.3672, - "step": 9551 - }, - { - "epoch": 0.6242729233383439, - "grad_norm": 0.5049495100975037, - "learning_rate": 9.118440909696397e-06, - "loss": 0.4554, - "step": 9552 - }, - { - "epoch": 0.624338278543886, - "grad_norm": 0.5229260325431824, - "learning_rate": 9.118242893899331e-06, - "loss": 0.4922, - "step": 9553 - }, - { - "epoch": 0.6244036337494281, - "grad_norm": 0.44667717814445496, - "learning_rate": 9.118044858016166e-06, - "loss": 0.3649, - "step": 9554 - }, - { - "epoch": 0.6244689889549703, - "grad_norm": 0.4446372091770172, - "learning_rate": 9.117846802047871e-06, - "loss": 0.3869, - "step": 9555 - }, - { - "epoch": 0.6245343441605123, - "grad_norm": 0.45658573508262634, - "learning_rate": 9.117648725995409e-06, - "loss": 0.3918, - "step": 9556 - }, - { - "epoch": 0.6245996993660545, - "grad_norm": 0.48933708667755127, - "learning_rate": 9.11745062985975e-06, - "loss": 0.4334, - "step": 9557 - }, - { - "epoch": 0.6246650545715966, - "grad_norm": 0.48574692010879517, - "learning_rate": 9.117252513641855e-06, - "loss": 0.4377, - "step": 9558 - }, - { - "epoch": 0.6247304097771388, - "grad_norm": 0.5007457733154297, - "learning_rate": 9.117054377342695e-06, - "loss": 0.4446, - "step": 9559 - }, - { - "epoch": 0.6247957649826809, - "grad_norm": 0.4362432658672333, - "learning_rate": 9.116856220963236e-06, - "loss": 0.3517, - "step": 9560 - }, - { - "epoch": 0.624861120188223, - "grad_norm": 0.4492630362510681, - "learning_rate": 9.11665804450444e-06, - "loss": 0.3935, - "step": 9561 - }, - { - "epoch": 0.6249264753937651, - "grad_norm": 0.45661497116088867, - "learning_rate": 9.116459847967276e-06, - "loss": 0.4001, - "step": 9562 - }, - { - "epoch": 0.6249918305993072, - "grad_norm": 0.44248563051223755, - "learning_rate": 9.116261631352714e-06, - "loss": 0.3819, - "step": 9563 - }, - { - "epoch": 0.6250571858048494, - "grad_norm": 0.4681680500507355, - "learning_rate": 9.116063394661716e-06, - "loss": 0.4456, - "step": 9564 - }, - { - "epoch": 0.6251225410103914, - "grad_norm": 0.45694154500961304, - "learning_rate": 9.115865137895252e-06, - "loss": 0.3714, - "step": 9565 - }, - { - "epoch": 0.6251878962159336, - "grad_norm": 0.46180787682533264, - "learning_rate": 9.115666861054289e-06, - "loss": 0.3567, - "step": 9566 - }, - { - "epoch": 0.6252532514214757, - "grad_norm": 0.4309927821159363, - "learning_rate": 9.115468564139791e-06, - "loss": 0.3564, - "step": 9567 - }, - { - "epoch": 0.6253186066270179, - "grad_norm": 0.48116418719291687, - "learning_rate": 9.115270247152728e-06, - "loss": 0.4245, - "step": 9568 - }, - { - "epoch": 0.62538396183256, - "grad_norm": 0.4717462956905365, - "learning_rate": 9.115071910094065e-06, - "loss": 0.4339, - "step": 9569 - }, - { - "epoch": 0.6254493170381021, - "grad_norm": 0.47250643372535706, - "learning_rate": 9.114873552964771e-06, - "loss": 0.3938, - "step": 9570 - }, - { - "epoch": 0.6255146722436442, - "grad_norm": 0.4773271977901459, - "learning_rate": 9.114675175765814e-06, - "loss": 0.3937, - "step": 9571 - }, - { - "epoch": 0.6255800274491863, - "grad_norm": 0.4597764313220978, - "learning_rate": 9.114476778498161e-06, - "loss": 0.4021, - "step": 9572 - }, - { - "epoch": 0.6256453826547285, - "grad_norm": 0.47342589497566223, - "learning_rate": 9.114278361162778e-06, - "loss": 0.4336, - "step": 9573 - }, - { - "epoch": 0.6257107378602705, - "grad_norm": 0.4563991129398346, - "learning_rate": 9.114079923760636e-06, - "loss": 0.3922, - "step": 9574 - }, - { - "epoch": 0.6257760930658127, - "grad_norm": 0.4252253472805023, - "learning_rate": 9.1138814662927e-06, - "loss": 0.3307, - "step": 9575 - }, - { - "epoch": 0.6258414482713548, - "grad_norm": 0.4634683430194855, - "learning_rate": 9.11368298875994e-06, - "loss": 0.3882, - "step": 9576 - }, - { - "epoch": 0.625906803476897, - "grad_norm": 0.4656892716884613, - "learning_rate": 9.11348449116332e-06, - "loss": 0.4308, - "step": 9577 - }, - { - "epoch": 0.625972158682439, - "grad_norm": 0.45494982600212097, - "learning_rate": 9.113285973503813e-06, - "loss": 0.4255, - "step": 9578 - }, - { - "epoch": 0.6260375138879811, - "grad_norm": 0.45242008566856384, - "learning_rate": 9.113087435782387e-06, - "loss": 0.3886, - "step": 9579 - }, - { - "epoch": 0.6261028690935233, - "grad_norm": 0.46740055084228516, - "learning_rate": 9.112888878000005e-06, - "loss": 0.3931, - "step": 9580 - }, - { - "epoch": 0.6261682242990654, - "grad_norm": 0.4570982754230499, - "learning_rate": 9.112690300157642e-06, - "loss": 0.4083, - "step": 9581 - }, - { - "epoch": 0.6262335795046076, - "grad_norm": 0.43806904554367065, - "learning_rate": 9.112491702256262e-06, - "loss": 0.3553, - "step": 9582 - }, - { - "epoch": 0.6262989347101496, - "grad_norm": 0.46506205201148987, - "learning_rate": 9.112293084296836e-06, - "loss": 0.3455, - "step": 9583 - }, - { - "epoch": 0.6263642899156918, - "grad_norm": 0.4456247389316559, - "learning_rate": 9.112094446280332e-06, - "loss": 0.3385, - "step": 9584 - }, - { - "epoch": 0.6264296451212339, - "grad_norm": 0.4501979649066925, - "learning_rate": 9.111895788207718e-06, - "loss": 0.3652, - "step": 9585 - }, - { - "epoch": 0.6264950003267761, - "grad_norm": 0.45896342396736145, - "learning_rate": 9.111697110079964e-06, - "loss": 0.3807, - "step": 9586 - }, - { - "epoch": 0.6265603555323181, - "grad_norm": 0.4563565254211426, - "learning_rate": 9.11149841189804e-06, - "loss": 0.3342, - "step": 9587 - }, - { - "epoch": 0.6266257107378602, - "grad_norm": 0.4744052588939667, - "learning_rate": 9.111299693662913e-06, - "loss": 0.3988, - "step": 9588 - }, - { - "epoch": 0.6266910659434024, - "grad_norm": 0.4680745303630829, - "learning_rate": 9.111100955375554e-06, - "loss": 0.4467, - "step": 9589 - }, - { - "epoch": 0.6267564211489445, - "grad_norm": 0.5005598068237305, - "learning_rate": 9.110902197036931e-06, - "loss": 0.4595, - "step": 9590 - }, - { - "epoch": 0.6268217763544867, - "grad_norm": 0.4681922197341919, - "learning_rate": 9.110703418648012e-06, - "loss": 0.4319, - "step": 9591 - }, - { - "epoch": 0.6268871315600287, - "grad_norm": 0.47896018624305725, - "learning_rate": 9.11050462020977e-06, - "loss": 0.4208, - "step": 9592 - }, - { - "epoch": 0.6269524867655709, - "grad_norm": 0.4624868333339691, - "learning_rate": 9.110305801723173e-06, - "loss": 0.3888, - "step": 9593 - }, - { - "epoch": 0.627017841971113, - "grad_norm": 0.4650159478187561, - "learning_rate": 9.11010696318919e-06, - "loss": 0.3707, - "step": 9594 - }, - { - "epoch": 0.6270831971766552, - "grad_norm": 0.4010483920574188, - "learning_rate": 9.109908104608792e-06, - "loss": 0.3541, - "step": 9595 - }, - { - "epoch": 0.6271485523821972, - "grad_norm": 0.44370007514953613, - "learning_rate": 9.109709225982947e-06, - "loss": 0.3813, - "step": 9596 - }, - { - "epoch": 0.6272139075877393, - "grad_norm": 0.4745508134365082, - "learning_rate": 9.109510327312628e-06, - "loss": 0.4053, - "step": 9597 - }, - { - "epoch": 0.6272792627932815, - "grad_norm": 0.4936065971851349, - "learning_rate": 9.109311408598805e-06, - "loss": 0.418, - "step": 9598 - }, - { - "epoch": 0.6273446179988236, - "grad_norm": 0.4746595621109009, - "learning_rate": 9.109112469842442e-06, - "loss": 0.4025, - "step": 9599 - }, - { - "epoch": 0.6274099732043658, - "grad_norm": 0.4503948986530304, - "learning_rate": 9.108913511044519e-06, - "loss": 0.3995, - "step": 9600 - }, - { - "epoch": 0.6274753284099078, - "grad_norm": 0.4455104470252991, - "learning_rate": 9.108714532205998e-06, - "loss": 0.358, - "step": 9601 - }, - { - "epoch": 0.62754068361545, - "grad_norm": 0.43602004647254944, - "learning_rate": 9.108515533327855e-06, - "loss": 0.3617, - "step": 9602 - }, - { - "epoch": 0.6276060388209921, - "grad_norm": 0.43457356095314026, - "learning_rate": 9.108316514411057e-06, - "loss": 0.3284, - "step": 9603 - }, - { - "epoch": 0.6276713940265343, - "grad_norm": 0.5141921639442444, - "learning_rate": 9.108117475456575e-06, - "loss": 0.3951, - "step": 9604 - }, - { - "epoch": 0.6277367492320763, - "grad_norm": 0.4688136577606201, - "learning_rate": 9.107918416465382e-06, - "loss": 0.3644, - "step": 9605 - }, - { - "epoch": 0.6278021044376184, - "grad_norm": 0.4773247241973877, - "learning_rate": 9.107719337438449e-06, - "loss": 0.4137, - "step": 9606 - }, - { - "epoch": 0.6278674596431606, - "grad_norm": 0.4520193934440613, - "learning_rate": 9.107520238376745e-06, - "loss": 0.402, - "step": 9607 - }, - { - "epoch": 0.6279328148487027, - "grad_norm": 0.4773165285587311, - "learning_rate": 9.10732111928124e-06, - "loss": 0.4258, - "step": 9608 - }, - { - "epoch": 0.6279981700542449, - "grad_norm": 0.47521254420280457, - "learning_rate": 9.107121980152908e-06, - "loss": 0.4111, - "step": 9609 - }, - { - "epoch": 0.6280635252597869, - "grad_norm": 0.5069689154624939, - "learning_rate": 9.106922820992721e-06, - "loss": 0.4194, - "step": 9610 - }, - { - "epoch": 0.6281288804653291, - "grad_norm": 0.460077166557312, - "learning_rate": 9.106723641801648e-06, - "loss": 0.3834, - "step": 9611 - }, - { - "epoch": 0.6281942356708712, - "grad_norm": 0.46278902888298035, - "learning_rate": 9.10652444258066e-06, - "loss": 0.4018, - "step": 9612 - }, - { - "epoch": 0.6282595908764133, - "grad_norm": 0.42567697167396545, - "learning_rate": 9.10632522333073e-06, - "loss": 0.3718, - "step": 9613 - }, - { - "epoch": 0.6283249460819554, - "grad_norm": 0.4435652494430542, - "learning_rate": 9.10612598405283e-06, - "loss": 0.3729, - "step": 9614 - }, - { - "epoch": 0.6283903012874975, - "grad_norm": 0.43816253542900085, - "learning_rate": 9.10592672474793e-06, - "loss": 0.3615, - "step": 9615 - }, - { - "epoch": 0.6284556564930397, - "grad_norm": 0.47340625524520874, - "learning_rate": 9.105727445417002e-06, - "loss": 0.4025, - "step": 9616 - }, - { - "epoch": 0.6285210116985818, - "grad_norm": 0.4708377420902252, - "learning_rate": 9.105528146061023e-06, - "loss": 0.3429, - "step": 9617 - }, - { - "epoch": 0.628586366904124, - "grad_norm": 0.48139289021492004, - "learning_rate": 9.105328826680957e-06, - "loss": 0.4622, - "step": 9618 - }, - { - "epoch": 0.628651722109666, - "grad_norm": 0.44649291038513184, - "learning_rate": 9.105129487277781e-06, - "loss": 0.4029, - "step": 9619 - }, - { - "epoch": 0.6287170773152082, - "grad_norm": 0.4421707093715668, - "learning_rate": 9.104930127852468e-06, - "loss": 0.355, - "step": 9620 - }, - { - "epoch": 0.6287824325207503, - "grad_norm": 0.46626222133636475, - "learning_rate": 9.104730748405988e-06, - "loss": 0.3937, - "step": 9621 - }, - { - "epoch": 0.6288477877262924, - "grad_norm": 0.479063481092453, - "learning_rate": 9.104531348939313e-06, - "loss": 0.4117, - "step": 9622 - }, - { - "epoch": 0.6289131429318345, - "grad_norm": 0.44687584042549133, - "learning_rate": 9.104331929453417e-06, - "loss": 0.4084, - "step": 9623 - }, - { - "epoch": 0.6289784981373766, - "grad_norm": 0.49097737669944763, - "learning_rate": 9.104132489949272e-06, - "loss": 0.3949, - "step": 9624 - }, - { - "epoch": 0.6290438533429188, - "grad_norm": 0.4492281973361969, - "learning_rate": 9.103933030427852e-06, - "loss": 0.4013, - "step": 9625 - }, - { - "epoch": 0.6291092085484609, - "grad_norm": 0.44036224484443665, - "learning_rate": 9.103733550890128e-06, - "loss": 0.3528, - "step": 9626 - }, - { - "epoch": 0.629174563754003, - "grad_norm": 0.41973841190338135, - "learning_rate": 9.103534051337074e-06, - "loss": 0.3228, - "step": 9627 - }, - { - "epoch": 0.6292399189595451, - "grad_norm": 0.46852245926856995, - "learning_rate": 9.103334531769664e-06, - "loss": 0.405, - "step": 9628 - }, - { - "epoch": 0.6293052741650873, - "grad_norm": 0.4788178503513336, - "learning_rate": 9.103134992188869e-06, - "loss": 0.3918, - "step": 9629 - }, - { - "epoch": 0.6293706293706294, - "grad_norm": 0.45014268159866333, - "learning_rate": 9.102935432595664e-06, - "loss": 0.4112, - "step": 9630 - }, - { - "epoch": 0.6294359845761714, - "grad_norm": 0.4370774030685425, - "learning_rate": 9.102735852991019e-06, - "loss": 0.3529, - "step": 9631 - }, - { - "epoch": 0.6295013397817136, - "grad_norm": 0.42277172207832336, - "learning_rate": 9.102536253375913e-06, - "loss": 0.3499, - "step": 9632 - }, - { - "epoch": 0.6295666949872557, - "grad_norm": 0.5183576941490173, - "learning_rate": 9.102336633751314e-06, - "loss": 0.4955, - "step": 9633 - }, - { - "epoch": 0.6296320501927979, - "grad_norm": 0.45432087779045105, - "learning_rate": 9.1021369941182e-06, - "loss": 0.3891, - "step": 9634 - }, - { - "epoch": 0.62969740539834, - "grad_norm": 0.4378865659236908, - "learning_rate": 9.101937334477542e-06, - "loss": 0.3496, - "step": 9635 - }, - { - "epoch": 0.6297627606038821, - "grad_norm": 0.4116170108318329, - "learning_rate": 9.101737654830313e-06, - "loss": 0.3077, - "step": 9636 - }, - { - "epoch": 0.6298281158094242, - "grad_norm": 0.5482897162437439, - "learning_rate": 9.101537955177491e-06, - "loss": 0.3867, - "step": 9637 - }, - { - "epoch": 0.6298934710149663, - "grad_norm": 0.4400225877761841, - "learning_rate": 9.101338235520046e-06, - "loss": 0.377, - "step": 9638 - }, - { - "epoch": 0.6299588262205085, - "grad_norm": 0.41989219188690186, - "learning_rate": 9.101138495858954e-06, - "loss": 0.3508, - "step": 9639 - }, - { - "epoch": 0.6300241814260505, - "grad_norm": 0.446665495634079, - "learning_rate": 9.100938736195188e-06, - "loss": 0.3776, - "step": 9640 - }, - { - "epoch": 0.6300895366315927, - "grad_norm": 0.47875264286994934, - "learning_rate": 9.100738956529724e-06, - "loss": 0.4348, - "step": 9641 - }, - { - "epoch": 0.6301548918371348, - "grad_norm": 0.4916093349456787, - "learning_rate": 9.100539156863536e-06, - "loss": 0.4538, - "step": 9642 - }, - { - "epoch": 0.630220247042677, - "grad_norm": 0.49365484714508057, - "learning_rate": 9.100339337197597e-06, - "loss": 0.421, - "step": 9643 - }, - { - "epoch": 0.6302856022482191, - "grad_norm": 0.41142141819000244, - "learning_rate": 9.100139497532882e-06, - "loss": 0.2936, - "step": 9644 - }, - { - "epoch": 0.6303509574537612, - "grad_norm": 0.5032975673675537, - "learning_rate": 9.099939637870369e-06, - "loss": 0.4358, - "step": 9645 - }, - { - "epoch": 0.6304163126593033, - "grad_norm": 0.4381714463233948, - "learning_rate": 9.099739758211028e-06, - "loss": 0.351, - "step": 9646 - }, - { - "epoch": 0.6304816678648454, - "grad_norm": 0.49288493394851685, - "learning_rate": 9.099539858555836e-06, - "loss": 0.4392, - "step": 9647 - }, - { - "epoch": 0.6305470230703876, - "grad_norm": 0.4541897177696228, - "learning_rate": 9.099339938905767e-06, - "loss": 0.3679, - "step": 9648 - }, - { - "epoch": 0.6306123782759296, - "grad_norm": 0.6171749830245972, - "learning_rate": 9.099139999261799e-06, - "loss": 0.3491, - "step": 9649 - }, - { - "epoch": 0.6306777334814718, - "grad_norm": 0.41822025179862976, - "learning_rate": 9.098940039624904e-06, - "loss": 0.3692, - "step": 9650 - }, - { - "epoch": 0.6307430886870139, - "grad_norm": 0.4923498034477234, - "learning_rate": 9.098740059996058e-06, - "loss": 0.4255, - "step": 9651 - }, - { - "epoch": 0.6308084438925561, - "grad_norm": 0.48193684220314026, - "learning_rate": 9.098540060376238e-06, - "loss": 0.3788, - "step": 9652 - }, - { - "epoch": 0.6308737990980982, - "grad_norm": 0.46530207991600037, - "learning_rate": 9.098340040766417e-06, - "loss": 0.3639, - "step": 9653 - }, - { - "epoch": 0.6309391543036403, - "grad_norm": 0.4175734221935272, - "learning_rate": 9.098140001167572e-06, - "loss": 0.3546, - "step": 9654 - }, - { - "epoch": 0.6310045095091824, - "grad_norm": 0.4892028570175171, - "learning_rate": 9.097939941580679e-06, - "loss": 0.3784, - "step": 9655 - }, - { - "epoch": 0.6310698647147245, - "grad_norm": 0.48101481795310974, - "learning_rate": 9.097739862006714e-06, - "loss": 0.4351, - "step": 9656 - }, - { - "epoch": 0.6311352199202667, - "grad_norm": 0.43300995230674744, - "learning_rate": 9.09753976244665e-06, - "loss": 0.3522, - "step": 9657 - }, - { - "epoch": 0.6312005751258087, - "grad_norm": 0.4683457612991333, - "learning_rate": 9.097339642901466e-06, - "loss": 0.3994, - "step": 9658 - }, - { - "epoch": 0.6312659303313509, - "grad_norm": 0.47194018959999084, - "learning_rate": 9.097139503372138e-06, - "loss": 0.3942, - "step": 9659 - }, - { - "epoch": 0.631331285536893, - "grad_norm": 0.4986540377140045, - "learning_rate": 9.096939343859641e-06, - "loss": 0.3847, - "step": 9660 - }, - { - "epoch": 0.6313966407424352, - "grad_norm": 0.6394871473312378, - "learning_rate": 9.09673916436495e-06, - "loss": 0.3852, - "step": 9661 - }, - { - "epoch": 0.6314619959479773, - "grad_norm": 0.44969215989112854, - "learning_rate": 9.096538964889043e-06, - "loss": 0.3612, - "step": 9662 - }, - { - "epoch": 0.6315273511535193, - "grad_norm": 0.4730345904827118, - "learning_rate": 9.096338745432899e-06, - "loss": 0.3715, - "step": 9663 - }, - { - "epoch": 0.6315927063590615, - "grad_norm": 0.456365168094635, - "learning_rate": 9.096138505997489e-06, - "loss": 0.4057, - "step": 9664 - }, - { - "epoch": 0.6316580615646036, - "grad_norm": 0.440775066614151, - "learning_rate": 9.095938246583796e-06, - "loss": 0.3604, - "step": 9665 - }, - { - "epoch": 0.6317234167701458, - "grad_norm": 0.4914442002773285, - "learning_rate": 9.09573796719279e-06, - "loss": 0.3982, - "step": 9666 - }, - { - "epoch": 0.6317887719756878, - "grad_norm": 0.49754732847213745, - "learning_rate": 9.095537667825452e-06, - "loss": 0.4219, - "step": 9667 - }, - { - "epoch": 0.63185412718123, - "grad_norm": 0.42917394638061523, - "learning_rate": 9.095337348482757e-06, - "loss": 0.3613, - "step": 9668 - }, - { - "epoch": 0.6319194823867721, - "grad_norm": 0.4514814615249634, - "learning_rate": 9.095137009165682e-06, - "loss": 0.4145, - "step": 9669 - }, - { - "epoch": 0.6319848375923143, - "grad_norm": 0.4623293876647949, - "learning_rate": 9.094936649875207e-06, - "loss": 0.3888, - "step": 9670 - }, - { - "epoch": 0.6320501927978563, - "grad_norm": 0.45264896750450134, - "learning_rate": 9.094736270612308e-06, - "loss": 0.3523, - "step": 9671 - }, - { - "epoch": 0.6321155480033984, - "grad_norm": 0.6229252815246582, - "learning_rate": 9.094535871377961e-06, - "loss": 0.4706, - "step": 9672 - }, - { - "epoch": 0.6321809032089406, - "grad_norm": 0.48615366220474243, - "learning_rate": 9.094335452173144e-06, - "loss": 0.4286, - "step": 9673 - }, - { - "epoch": 0.6322462584144827, - "grad_norm": 0.46924880146980286, - "learning_rate": 9.094135012998834e-06, - "loss": 0.4273, - "step": 9674 - }, - { - "epoch": 0.6323116136200249, - "grad_norm": 0.46026837825775146, - "learning_rate": 9.09393455385601e-06, - "loss": 0.3809, - "step": 9675 - }, - { - "epoch": 0.6323769688255669, - "grad_norm": 0.5779929161071777, - "learning_rate": 9.093734074745649e-06, - "loss": 0.3373, - "step": 9676 - }, - { - "epoch": 0.6324423240311091, - "grad_norm": 0.47199392318725586, - "learning_rate": 9.093533575668728e-06, - "loss": 0.3852, - "step": 9677 - }, - { - "epoch": 0.6325076792366512, - "grad_norm": 0.4600350856781006, - "learning_rate": 9.093333056626226e-06, - "loss": 0.3555, - "step": 9678 - }, - { - "epoch": 0.6325730344421934, - "grad_norm": 0.4776606857776642, - "learning_rate": 9.09313251761912e-06, - "loss": 0.4381, - "step": 9679 - }, - { - "epoch": 0.6326383896477354, - "grad_norm": 0.47482728958129883, - "learning_rate": 9.09293195864839e-06, - "loss": 0.3769, - "step": 9680 - }, - { - "epoch": 0.6327037448532775, - "grad_norm": 0.4603939354419708, - "learning_rate": 9.092731379715012e-06, - "loss": 0.3632, - "step": 9681 - }, - { - "epoch": 0.6327691000588197, - "grad_norm": 0.42856353521347046, - "learning_rate": 9.092530780819965e-06, - "loss": 0.3697, - "step": 9682 - }, - { - "epoch": 0.6328344552643618, - "grad_norm": 0.44450482726097107, - "learning_rate": 9.092330161964229e-06, - "loss": 0.3556, - "step": 9683 - }, - { - "epoch": 0.632899810469904, - "grad_norm": 0.4556718170642853, - "learning_rate": 9.09212952314878e-06, - "loss": 0.3703, - "step": 9684 - }, - { - "epoch": 0.632965165675446, - "grad_norm": 0.4377409815788269, - "learning_rate": 9.091928864374597e-06, - "loss": 0.3618, - "step": 9685 - }, - { - "epoch": 0.6330305208809882, - "grad_norm": 0.47488972544670105, - "learning_rate": 9.09172818564266e-06, - "loss": 0.401, - "step": 9686 - }, - { - "epoch": 0.6330958760865303, - "grad_norm": 0.4416325092315674, - "learning_rate": 9.091527486953947e-06, - "loss": 0.422, - "step": 9687 - }, - { - "epoch": 0.6331612312920725, - "grad_norm": 0.48478466272354126, - "learning_rate": 9.091326768309437e-06, - "loss": 0.4016, - "step": 9688 - }, - { - "epoch": 0.6332265864976145, - "grad_norm": 0.45282062888145447, - "learning_rate": 9.091126029710109e-06, - "loss": 0.4032, - "step": 9689 - }, - { - "epoch": 0.6332919417031566, - "grad_norm": 0.4405279755592346, - "learning_rate": 9.090925271156944e-06, - "loss": 0.3871, - "step": 9690 - }, - { - "epoch": 0.6333572969086988, - "grad_norm": 0.42671340703964233, - "learning_rate": 9.090724492650915e-06, - "loss": 0.3695, - "step": 9691 - }, - { - "epoch": 0.6334226521142409, - "grad_norm": 0.43295741081237793, - "learning_rate": 9.09052369419301e-06, - "loss": 0.3602, - "step": 9692 - }, - { - "epoch": 0.633488007319783, - "grad_norm": 0.47926750779151917, - "learning_rate": 9.090322875784202e-06, - "loss": 0.4064, - "step": 9693 - }, - { - "epoch": 0.6335533625253251, - "grad_norm": 0.40310317277908325, - "learning_rate": 9.090122037425471e-06, - "loss": 0.3284, - "step": 9694 - }, - { - "epoch": 0.6336187177308673, - "grad_norm": 0.46224668622016907, - "learning_rate": 9.089921179117798e-06, - "loss": 0.4115, - "step": 9695 - }, - { - "epoch": 0.6336840729364094, - "grad_norm": 0.4418547749519348, - "learning_rate": 9.089720300862164e-06, - "loss": 0.3853, - "step": 9696 - }, - { - "epoch": 0.6337494281419515, - "grad_norm": 0.4346838593482971, - "learning_rate": 9.089519402659548e-06, - "loss": 0.3575, - "step": 9697 - }, - { - "epoch": 0.6338147833474936, - "grad_norm": 0.4280930757522583, - "learning_rate": 9.089318484510927e-06, - "loss": 0.3947, - "step": 9698 - }, - { - "epoch": 0.6338801385530357, - "grad_norm": 0.4797299802303314, - "learning_rate": 9.089117546417284e-06, - "loss": 0.4431, - "step": 9699 - }, - { - "epoch": 0.6339454937585779, - "grad_norm": 0.4703907072544098, - "learning_rate": 9.088916588379598e-06, - "loss": 0.4059, - "step": 9700 - }, - { - "epoch": 0.63401084896412, - "grad_norm": 0.40756452083587646, - "learning_rate": 9.08871561039885e-06, - "loss": 0.3371, - "step": 9701 - }, - { - "epoch": 0.6340762041696621, - "grad_norm": 0.48780328035354614, - "learning_rate": 9.088514612476018e-06, - "loss": 0.4488, - "step": 9702 - }, - { - "epoch": 0.6341415593752042, - "grad_norm": 0.4277057945728302, - "learning_rate": 9.088313594612085e-06, - "loss": 0.3516, - "step": 9703 - }, - { - "epoch": 0.6342069145807464, - "grad_norm": 0.47396546602249146, - "learning_rate": 9.08811255680803e-06, - "loss": 0.4141, - "step": 9704 - }, - { - "epoch": 0.6342722697862885, - "grad_norm": 0.4338968098163605, - "learning_rate": 9.087911499064835e-06, - "loss": 0.3885, - "step": 9705 - }, - { - "epoch": 0.6343376249918306, - "grad_norm": 0.446594774723053, - "learning_rate": 9.087710421383477e-06, - "loss": 0.3842, - "step": 9706 - }, - { - "epoch": 0.6344029801973727, - "grad_norm": 0.4124334454536438, - "learning_rate": 9.087509323764941e-06, - "loss": 0.3193, - "step": 9707 - }, - { - "epoch": 0.6344683354029148, - "grad_norm": 0.45303478837013245, - "learning_rate": 9.087308206210204e-06, - "loss": 0.3565, - "step": 9708 - }, - { - "epoch": 0.634533690608457, - "grad_norm": 0.4626130759716034, - "learning_rate": 9.087107068720251e-06, - "loss": 0.3577, - "step": 9709 - }, - { - "epoch": 0.6345990458139991, - "grad_norm": 0.4484632611274719, - "learning_rate": 9.08690591129606e-06, - "loss": 0.3799, - "step": 9710 - }, - { - "epoch": 0.6346644010195412, - "grad_norm": 0.4217261075973511, - "learning_rate": 9.086704733938612e-06, - "loss": 0.3402, - "step": 9711 - }, - { - "epoch": 0.6347297562250833, - "grad_norm": 0.46096986532211304, - "learning_rate": 9.086503536648891e-06, - "loss": 0.4022, - "step": 9712 - }, - { - "epoch": 0.6347951114306255, - "grad_norm": 0.4396562874317169, - "learning_rate": 9.086302319427875e-06, - "loss": 0.3887, - "step": 9713 - }, - { - "epoch": 0.6348604666361676, - "grad_norm": 0.4368191063404083, - "learning_rate": 9.086101082276549e-06, - "loss": 0.4044, - "step": 9714 - }, - { - "epoch": 0.6349258218417096, - "grad_norm": 0.46362486481666565, - "learning_rate": 9.085899825195892e-06, - "loss": 0.3935, - "step": 9715 - }, - { - "epoch": 0.6349911770472518, - "grad_norm": 0.4828219413757324, - "learning_rate": 9.085698548186885e-06, - "loss": 0.4429, - "step": 9716 - }, - { - "epoch": 0.6350565322527939, - "grad_norm": 0.4345232844352722, - "learning_rate": 9.08549725125051e-06, - "loss": 0.3542, - "step": 9717 - }, - { - "epoch": 0.6351218874583361, - "grad_norm": 0.507585883140564, - "learning_rate": 9.085295934387752e-06, - "loss": 0.4885, - "step": 9718 - }, - { - "epoch": 0.6351872426638782, - "grad_norm": 0.4546085000038147, - "learning_rate": 9.085094597599589e-06, - "loss": 0.3786, - "step": 9719 - }, - { - "epoch": 0.6352525978694203, - "grad_norm": 0.43173351883888245, - "learning_rate": 9.084893240887005e-06, - "loss": 0.3803, - "step": 9720 - }, - { - "epoch": 0.6353179530749624, - "grad_norm": 0.4759143888950348, - "learning_rate": 9.08469186425098e-06, - "loss": 0.4048, - "step": 9721 - }, - { - "epoch": 0.6353833082805045, - "grad_norm": 0.45320361852645874, - "learning_rate": 9.0844904676925e-06, - "loss": 0.384, - "step": 9722 - }, - { - "epoch": 0.6354486634860467, - "grad_norm": 0.4534291923046112, - "learning_rate": 9.084289051212544e-06, - "loss": 0.3764, - "step": 9723 - }, - { - "epoch": 0.6355140186915887, - "grad_norm": 0.45015692710876465, - "learning_rate": 9.084087614812093e-06, - "loss": 0.3885, - "step": 9724 - }, - { - "epoch": 0.6355793738971309, - "grad_norm": 0.43396201729774475, - "learning_rate": 9.083886158492136e-06, - "loss": 0.3536, - "step": 9725 - }, - { - "epoch": 0.635644729102673, - "grad_norm": 0.4757612347602844, - "learning_rate": 9.08368468225365e-06, - "loss": 0.3941, - "step": 9726 - }, - { - "epoch": 0.6357100843082152, - "grad_norm": 0.4937000870704651, - "learning_rate": 9.083483186097616e-06, - "loss": 0.3941, - "step": 9727 - }, - { - "epoch": 0.6357754395137573, - "grad_norm": 0.47819656133651733, - "learning_rate": 9.083281670025024e-06, - "loss": 0.4073, - "step": 9728 - }, - { - "epoch": 0.6358407947192994, - "grad_norm": 0.4278254508972168, - "learning_rate": 9.083080134036851e-06, - "loss": 0.3591, - "step": 9729 - }, - { - "epoch": 0.6359061499248415, - "grad_norm": 0.4464288651943207, - "learning_rate": 9.082878578134082e-06, - "loss": 0.3979, - "step": 9730 - }, - { - "epoch": 0.6359715051303836, - "grad_norm": 0.45361876487731934, - "learning_rate": 9.0826770023177e-06, - "loss": 0.4091, - "step": 9731 - }, - { - "epoch": 0.6360368603359258, - "grad_norm": 0.4655058681964874, - "learning_rate": 9.082475406588686e-06, - "loss": 0.3877, - "step": 9732 - }, - { - "epoch": 0.6361022155414678, - "grad_norm": 0.44808417558670044, - "learning_rate": 9.082273790948027e-06, - "loss": 0.3874, - "step": 9733 - }, - { - "epoch": 0.63616757074701, - "grad_norm": 0.41832366585731506, - "learning_rate": 9.082072155396704e-06, - "loss": 0.3386, - "step": 9734 - }, - { - "epoch": 0.6362329259525521, - "grad_norm": 0.4112207889556885, - "learning_rate": 9.081870499935701e-06, - "loss": 0.3353, - "step": 9735 - }, - { - "epoch": 0.6362982811580943, - "grad_norm": 0.423401802778244, - "learning_rate": 9.081668824566002e-06, - "loss": 0.3663, - "step": 9736 - }, - { - "epoch": 0.6363636363636364, - "grad_norm": 0.46037980914115906, - "learning_rate": 9.081467129288589e-06, - "loss": 0.3953, - "step": 9737 - }, - { - "epoch": 0.6364289915691785, - "grad_norm": 0.43599933385849, - "learning_rate": 9.081265414104448e-06, - "loss": 0.3629, - "step": 9738 - }, - { - "epoch": 0.6364943467747206, - "grad_norm": 0.429604172706604, - "learning_rate": 9.08106367901456e-06, - "loss": 0.3367, - "step": 9739 - }, - { - "epoch": 0.6365597019802627, - "grad_norm": 0.42948323488235474, - "learning_rate": 9.080861924019912e-06, - "loss": 0.3685, - "step": 9740 - }, - { - "epoch": 0.6366250571858049, - "grad_norm": 0.4661257266998291, - "learning_rate": 9.080660149121487e-06, - "loss": 0.4667, - "step": 9741 - }, - { - "epoch": 0.6366904123913469, - "grad_norm": 0.4274749755859375, - "learning_rate": 9.080458354320267e-06, - "loss": 0.3483, - "step": 9742 - }, - { - "epoch": 0.6367557675968891, - "grad_norm": 0.46764200925827026, - "learning_rate": 9.08025653961724e-06, - "loss": 0.4217, - "step": 9743 - }, - { - "epoch": 0.6368211228024312, - "grad_norm": 0.4310452938079834, - "learning_rate": 9.080054705013387e-06, - "loss": 0.3702, - "step": 9744 - }, - { - "epoch": 0.6368864780079734, - "grad_norm": 0.4607413113117218, - "learning_rate": 9.079852850509694e-06, - "loss": 0.3794, - "step": 9745 - }, - { - "epoch": 0.6369518332135155, - "grad_norm": 0.46990257501602173, - "learning_rate": 9.079650976107147e-06, - "loss": 0.4215, - "step": 9746 - }, - { - "epoch": 0.6370171884190575, - "grad_norm": 0.46286189556121826, - "learning_rate": 9.079449081806726e-06, - "loss": 0.3654, - "step": 9747 - }, - { - "epoch": 0.6370825436245997, - "grad_norm": 0.41074416041374207, - "learning_rate": 9.079247167609419e-06, - "loss": 0.3235, - "step": 9748 - }, - { - "epoch": 0.6371478988301418, - "grad_norm": 0.4421272575855255, - "learning_rate": 9.079045233516213e-06, - "loss": 0.3509, - "step": 9749 - }, - { - "epoch": 0.637213254035684, - "grad_norm": 0.44864943623542786, - "learning_rate": 9.078843279528087e-06, - "loss": 0.3534, - "step": 9750 - }, - { - "epoch": 0.637278609241226, - "grad_norm": 0.45992445945739746, - "learning_rate": 9.078641305646032e-06, - "loss": 0.3895, - "step": 9751 - }, - { - "epoch": 0.6373439644467682, - "grad_norm": 0.432346373796463, - "learning_rate": 9.078439311871029e-06, - "loss": 0.3408, - "step": 9752 - }, - { - "epoch": 0.6374093196523103, - "grad_norm": 0.46255505084991455, - "learning_rate": 9.078237298204065e-06, - "loss": 0.3708, - "step": 9753 - }, - { - "epoch": 0.6374746748578525, - "grad_norm": 0.4515262246131897, - "learning_rate": 9.078035264646123e-06, - "loss": 0.383, - "step": 9754 - }, - { - "epoch": 0.6375400300633945, - "grad_norm": 0.4577791392803192, - "learning_rate": 9.077833211198192e-06, - "loss": 0.3924, - "step": 9755 - }, - { - "epoch": 0.6376053852689366, - "grad_norm": 0.4261878430843353, - "learning_rate": 9.077631137861255e-06, - "loss": 0.3724, - "step": 9756 - }, - { - "epoch": 0.6376707404744788, - "grad_norm": 0.4302404522895813, - "learning_rate": 9.0774290446363e-06, - "loss": 0.3362, - "step": 9757 - }, - { - "epoch": 0.6377360956800209, - "grad_norm": 0.47554245591163635, - "learning_rate": 9.07722693152431e-06, - "loss": 0.4322, - "step": 9758 - }, - { - "epoch": 0.6378014508855631, - "grad_norm": 0.4756726324558258, - "learning_rate": 9.077024798526273e-06, - "loss": 0.4354, - "step": 9759 - }, - { - "epoch": 0.6378668060911051, - "grad_norm": 0.44856563210487366, - "learning_rate": 9.07682264564317e-06, - "loss": 0.3674, - "step": 9760 - }, - { - "epoch": 0.6379321612966473, - "grad_norm": 0.4309118688106537, - "learning_rate": 9.076620472875994e-06, - "loss": 0.3612, - "step": 9761 - }, - { - "epoch": 0.6379975165021894, - "grad_norm": 0.4074327349662781, - "learning_rate": 9.076418280225727e-06, - "loss": 0.3181, - "step": 9762 - }, - { - "epoch": 0.6380628717077316, - "grad_norm": 0.46789079904556274, - "learning_rate": 9.076216067693355e-06, - "loss": 0.3931, - "step": 9763 - }, - { - "epoch": 0.6381282269132736, - "grad_norm": 0.423562616109848, - "learning_rate": 9.076013835279865e-06, - "loss": 0.3655, - "step": 9764 - }, - { - "epoch": 0.6381935821188157, - "grad_norm": 0.43467196822166443, - "learning_rate": 9.075811582986244e-06, - "loss": 0.3929, - "step": 9765 - }, - { - "epoch": 0.6382589373243579, - "grad_norm": 0.47196051478385925, - "learning_rate": 9.075609310813478e-06, - "loss": 0.3943, - "step": 9766 - }, - { - "epoch": 0.6383242925299, - "grad_norm": 0.4757579565048218, - "learning_rate": 9.075407018762554e-06, - "loss": 0.4466, - "step": 9767 - }, - { - "epoch": 0.6383896477354422, - "grad_norm": 0.42987725138664246, - "learning_rate": 9.075204706834458e-06, - "loss": 0.3845, - "step": 9768 - }, - { - "epoch": 0.6384550029409842, - "grad_norm": 0.4808485209941864, - "learning_rate": 9.075002375030176e-06, - "loss": 0.46, - "step": 9769 - }, - { - "epoch": 0.6385203581465264, - "grad_norm": 0.4296873211860657, - "learning_rate": 9.074800023350696e-06, - "loss": 0.3636, - "step": 9770 - }, - { - "epoch": 0.6385857133520685, - "grad_norm": 0.45422402024269104, - "learning_rate": 9.074597651797004e-06, - "loss": 0.3951, - "step": 9771 - }, - { - "epoch": 0.6386510685576107, - "grad_norm": 0.4010392725467682, - "learning_rate": 9.074395260370088e-06, - "loss": 0.3225, - "step": 9772 - }, - { - "epoch": 0.6387164237631527, - "grad_norm": 0.4246975779533386, - "learning_rate": 9.074192849070936e-06, - "loss": 0.3692, - "step": 9773 - }, - { - "epoch": 0.6387817789686948, - "grad_norm": 0.45356330275535583, - "learning_rate": 9.073990417900533e-06, - "loss": 0.3873, - "step": 9774 - }, - { - "epoch": 0.638847134174237, - "grad_norm": 0.45848318934440613, - "learning_rate": 9.073787966859866e-06, - "loss": 0.4449, - "step": 9775 - }, - { - "epoch": 0.6389124893797791, - "grad_norm": 0.43433430790901184, - "learning_rate": 9.073585495949927e-06, - "loss": 0.3788, - "step": 9776 - }, - { - "epoch": 0.6389778445853213, - "grad_norm": 0.47940051555633545, - "learning_rate": 9.073383005171699e-06, - "loss": 0.4517, - "step": 9777 - }, - { - "epoch": 0.6390431997908633, - "grad_norm": 0.416909784078598, - "learning_rate": 9.07318049452617e-06, - "loss": 0.3309, - "step": 9778 - }, - { - "epoch": 0.6391085549964055, - "grad_norm": 0.4129504859447479, - "learning_rate": 9.07297796401433e-06, - "loss": 0.3248, - "step": 9779 - }, - { - "epoch": 0.6391739102019476, - "grad_norm": 0.4612945318222046, - "learning_rate": 9.072775413637163e-06, - "loss": 0.3767, - "step": 9780 - }, - { - "epoch": 0.6392392654074897, - "grad_norm": 0.46480366587638855, - "learning_rate": 9.072572843395661e-06, - "loss": 0.4069, - "step": 9781 - }, - { - "epoch": 0.6393046206130318, - "grad_norm": 0.4225768744945526, - "learning_rate": 9.072370253290813e-06, - "loss": 0.3796, - "step": 9782 - }, - { - "epoch": 0.6393699758185739, - "grad_norm": 0.4254634380340576, - "learning_rate": 9.0721676433236e-06, - "loss": 0.3772, - "step": 9783 - }, - { - "epoch": 0.6394353310241161, - "grad_norm": 0.4734971225261688, - "learning_rate": 9.071965013495017e-06, - "loss": 0.3798, - "step": 9784 - }, - { - "epoch": 0.6395006862296582, - "grad_norm": 0.4534674286842346, - "learning_rate": 9.07176236380605e-06, - "loss": 0.4017, - "step": 9785 - }, - { - "epoch": 0.6395660414352003, - "grad_norm": 0.4039697051048279, - "learning_rate": 9.071559694257686e-06, - "loss": 0.3289, - "step": 9786 - }, - { - "epoch": 0.6396313966407424, - "grad_norm": 0.45211726427078247, - "learning_rate": 9.071357004850915e-06, - "loss": 0.3866, - "step": 9787 - }, - { - "epoch": 0.6396967518462846, - "grad_norm": 0.44150295853614807, - "learning_rate": 9.071154295586727e-06, - "loss": 0.3445, - "step": 9788 - }, - { - "epoch": 0.6397621070518267, - "grad_norm": 0.4525618553161621, - "learning_rate": 9.070951566466109e-06, - "loss": 0.3692, - "step": 9789 - }, - { - "epoch": 0.6398274622573688, - "grad_norm": 0.4716513156890869, - "learning_rate": 9.07074881749005e-06, - "loss": 0.387, - "step": 9790 - }, - { - "epoch": 0.6398928174629109, - "grad_norm": 0.45230627059936523, - "learning_rate": 9.070546048659537e-06, - "loss": 0.4024, - "step": 9791 - }, - { - "epoch": 0.639958172668453, - "grad_norm": 0.4527648687362671, - "learning_rate": 9.07034325997556e-06, - "loss": 0.3378, - "step": 9792 - }, - { - "epoch": 0.6400235278739952, - "grad_norm": 0.43456801772117615, - "learning_rate": 9.07014045143911e-06, - "loss": 0.3922, - "step": 9793 - }, - { - "epoch": 0.6400888830795373, - "grad_norm": 0.4388381242752075, - "learning_rate": 9.069937623051177e-06, - "loss": 0.3462, - "step": 9794 - }, - { - "epoch": 0.6401542382850794, - "grad_norm": 0.44833528995513916, - "learning_rate": 9.069734774812747e-06, - "loss": 0.3753, - "step": 9795 - }, - { - "epoch": 0.6402195934906215, - "grad_norm": 0.44104278087615967, - "learning_rate": 9.06953190672481e-06, - "loss": 0.3365, - "step": 9796 - }, - { - "epoch": 0.6402849486961637, - "grad_norm": 0.4822522699832916, - "learning_rate": 9.069329018788357e-06, - "loss": 0.4566, - "step": 9797 - }, - { - "epoch": 0.6403503039017058, - "grad_norm": 0.4667324125766754, - "learning_rate": 9.069126111004376e-06, - "loss": 0.4038, - "step": 9798 - }, - { - "epoch": 0.6404156591072478, - "grad_norm": 0.471744179725647, - "learning_rate": 9.068923183373856e-06, - "loss": 0.3891, - "step": 9799 - }, - { - "epoch": 0.64048101431279, - "grad_norm": 0.46452444791793823, - "learning_rate": 9.06872023589779e-06, - "loss": 0.3823, - "step": 9800 - }, - { - "epoch": 0.6405463695183321, - "grad_norm": 0.39168450236320496, - "learning_rate": 9.068517268577166e-06, - "loss": 0.2793, - "step": 9801 - }, - { - "epoch": 0.6406117247238743, - "grad_norm": 0.40817102789878845, - "learning_rate": 9.068314281412974e-06, - "loss": 0.3181, - "step": 9802 - }, - { - "epoch": 0.6406770799294164, - "grad_norm": 0.4927980601787567, - "learning_rate": 9.068111274406202e-06, - "loss": 0.4012, - "step": 9803 - }, - { - "epoch": 0.6407424351349585, - "grad_norm": 0.4919649660587311, - "learning_rate": 9.067908247557842e-06, - "loss": 0.443, - "step": 9804 - }, - { - "epoch": 0.6408077903405006, - "grad_norm": 0.4291025996208191, - "learning_rate": 9.067705200868886e-06, - "loss": 0.3336, - "step": 9805 - }, - { - "epoch": 0.6408731455460427, - "grad_norm": 0.46074026823043823, - "learning_rate": 9.067502134340321e-06, - "loss": 0.4087, - "step": 9806 - }, - { - "epoch": 0.6409385007515849, - "grad_norm": 0.45968097448349, - "learning_rate": 9.06729904797314e-06, - "loss": 0.4073, - "step": 9807 - }, - { - "epoch": 0.641003855957127, - "grad_norm": 0.4455585777759552, - "learning_rate": 9.067095941768332e-06, - "loss": 0.3662, - "step": 9808 - }, - { - "epoch": 0.6410692111626691, - "grad_norm": 0.442227303981781, - "learning_rate": 9.066892815726888e-06, - "loss": 0.3402, - "step": 9809 - }, - { - "epoch": 0.6411345663682112, - "grad_norm": 0.4429609179496765, - "learning_rate": 9.0666896698498e-06, - "loss": 0.3847, - "step": 9810 - }, - { - "epoch": 0.6411999215737534, - "grad_norm": 0.4293217957019806, - "learning_rate": 9.066486504138056e-06, - "loss": 0.3697, - "step": 9811 - }, - { - "epoch": 0.6412652767792955, - "grad_norm": 0.4350472092628479, - "learning_rate": 9.06628331859265e-06, - "loss": 0.372, - "step": 9812 - }, - { - "epoch": 0.6413306319848376, - "grad_norm": 0.4853617548942566, - "learning_rate": 9.066080113214571e-06, - "loss": 0.4415, - "step": 9813 - }, - { - "epoch": 0.6413959871903797, - "grad_norm": 0.4751304090023041, - "learning_rate": 9.06587688800481e-06, - "loss": 0.3688, - "step": 9814 - }, - { - "epoch": 0.6414613423959218, - "grad_norm": 0.44478118419647217, - "learning_rate": 9.065673642964358e-06, - "loss": 0.3377, - "step": 9815 - }, - { - "epoch": 0.641526697601464, - "grad_norm": 0.4865337312221527, - "learning_rate": 9.06547037809421e-06, - "loss": 0.4008, - "step": 9816 - }, - { - "epoch": 0.641592052807006, - "grad_norm": 0.4339168667793274, - "learning_rate": 9.065267093395353e-06, - "loss": 0.402, - "step": 9817 - }, - { - "epoch": 0.6416574080125482, - "grad_norm": 0.43428879976272583, - "learning_rate": 9.06506378886878e-06, - "loss": 0.3741, - "step": 9818 - }, - { - "epoch": 0.6417227632180903, - "grad_norm": 0.4526311159133911, - "learning_rate": 9.064860464515481e-06, - "loss": 0.4022, - "step": 9819 - }, - { - "epoch": 0.6417881184236325, - "grad_norm": 0.45775577425956726, - "learning_rate": 9.064657120336452e-06, - "loss": 0.3982, - "step": 9820 - }, - { - "epoch": 0.6418534736291746, - "grad_norm": 0.44582659006118774, - "learning_rate": 9.06445375633268e-06, - "loss": 0.3815, - "step": 9821 - }, - { - "epoch": 0.6419188288347167, - "grad_norm": 0.44306325912475586, - "learning_rate": 9.064250372505162e-06, - "loss": 0.3701, - "step": 9822 - }, - { - "epoch": 0.6419841840402588, - "grad_norm": 0.40925756096839905, - "learning_rate": 9.064046968854885e-06, - "loss": 0.3188, - "step": 9823 - }, - { - "epoch": 0.6420495392458009, - "grad_norm": 0.4516238570213318, - "learning_rate": 9.063843545382841e-06, - "loss": 0.4295, - "step": 9824 - }, - { - "epoch": 0.6421148944513431, - "grad_norm": 0.503147542476654, - "learning_rate": 9.063640102090029e-06, - "loss": 0.3328, - "step": 9825 - }, - { - "epoch": 0.6421802496568851, - "grad_norm": 0.4248664081096649, - "learning_rate": 9.063436638977432e-06, - "loss": 0.3851, - "step": 9826 - }, - { - "epoch": 0.6422456048624273, - "grad_norm": 0.46630939841270447, - "learning_rate": 9.06323315604605e-06, - "loss": 0.4135, - "step": 9827 - }, - { - "epoch": 0.6423109600679694, - "grad_norm": 0.4635048508644104, - "learning_rate": 9.063029653296868e-06, - "loss": 0.3847, - "step": 9828 - }, - { - "epoch": 0.6423763152735116, - "grad_norm": 0.4125978350639343, - "learning_rate": 9.062826130730886e-06, - "loss": 0.3321, - "step": 9829 - }, - { - "epoch": 0.6424416704790537, - "grad_norm": 0.4719914197921753, - "learning_rate": 9.062622588349094e-06, - "loss": 0.3317, - "step": 9830 - }, - { - "epoch": 0.6425070256845957, - "grad_norm": 0.4191725254058838, - "learning_rate": 9.062419026152483e-06, - "loss": 0.3445, - "step": 9831 - }, - { - "epoch": 0.6425723808901379, - "grad_norm": 0.4677099287509918, - "learning_rate": 9.062215444142047e-06, - "loss": 0.41, - "step": 9832 - }, - { - "epoch": 0.64263773609568, - "grad_norm": 0.4397822320461273, - "learning_rate": 9.06201184231878e-06, - "loss": 0.3687, - "step": 9833 - }, - { - "epoch": 0.6427030913012222, - "grad_norm": 0.4193592369556427, - "learning_rate": 9.061808220683672e-06, - "loss": 0.3517, - "step": 9834 - }, - { - "epoch": 0.6427684465067642, - "grad_norm": 0.4300253093242645, - "learning_rate": 9.06160457923772e-06, - "loss": 0.3616, - "step": 9835 - }, - { - "epoch": 0.6428338017123064, - "grad_norm": 0.42338287830352783, - "learning_rate": 9.061400917981915e-06, - "loss": 0.366, - "step": 9836 - }, - { - "epoch": 0.6428991569178485, - "grad_norm": 0.45557650923728943, - "learning_rate": 9.06119723691725e-06, - "loss": 0.3821, - "step": 9837 - }, - { - "epoch": 0.6429645121233907, - "grad_norm": 0.451945424079895, - "learning_rate": 9.06099353604472e-06, - "loss": 0.4168, - "step": 9838 - }, - { - "epoch": 0.6430298673289327, - "grad_norm": 0.4171278774738312, - "learning_rate": 9.060789815365317e-06, - "loss": 0.3309, - "step": 9839 - }, - { - "epoch": 0.6430952225344748, - "grad_norm": 0.45155543088912964, - "learning_rate": 9.060586074880036e-06, - "loss": 0.4161, - "step": 9840 - }, - { - "epoch": 0.643160577740017, - "grad_norm": 0.43141868710517883, - "learning_rate": 9.060382314589871e-06, - "loss": 0.3604, - "step": 9841 - }, - { - "epoch": 0.6432259329455591, - "grad_norm": 0.40812331438064575, - "learning_rate": 9.060178534495811e-06, - "loss": 0.3135, - "step": 9842 - }, - { - "epoch": 0.6432912881511013, - "grad_norm": 0.4390416145324707, - "learning_rate": 9.059974734598858e-06, - "loss": 0.3702, - "step": 9843 - }, - { - "epoch": 0.6433566433566433, - "grad_norm": 0.4729102849960327, - "learning_rate": 9.059770914899999e-06, - "loss": 0.3938, - "step": 9844 - }, - { - "epoch": 0.6434219985621855, - "grad_norm": 0.4624210596084595, - "learning_rate": 9.059567075400232e-06, - "loss": 0.434, - "step": 9845 - }, - { - "epoch": 0.6434873537677276, - "grad_norm": 0.42629072070121765, - "learning_rate": 9.05936321610055e-06, - "loss": 0.3571, - "step": 9846 - }, - { - "epoch": 0.6435527089732698, - "grad_norm": 0.46381500363349915, - "learning_rate": 9.059159337001945e-06, - "loss": 0.4149, - "step": 9847 - }, - { - "epoch": 0.6436180641788118, - "grad_norm": 0.46328607201576233, - "learning_rate": 9.058955438105416e-06, - "loss": 0.378, - "step": 9848 - }, - { - "epoch": 0.6436834193843539, - "grad_norm": 0.45271798968315125, - "learning_rate": 9.058751519411957e-06, - "loss": 0.4004, - "step": 9849 - }, - { - "epoch": 0.6437487745898961, - "grad_norm": 0.4705389738082886, - "learning_rate": 9.058547580922556e-06, - "loss": 0.4381, - "step": 9850 - }, - { - "epoch": 0.6438141297954382, - "grad_norm": 0.4463994801044464, - "learning_rate": 9.058343622638218e-06, - "loss": 0.3849, - "step": 9851 - }, - { - "epoch": 0.6438794850009804, - "grad_norm": 0.45492780208587646, - "learning_rate": 9.058139644559929e-06, - "loss": 0.4019, - "step": 9852 - }, - { - "epoch": 0.6439448402065224, - "grad_norm": 0.4657062590122223, - "learning_rate": 9.057935646688685e-06, - "loss": 0.3998, - "step": 9853 - }, - { - "epoch": 0.6440101954120646, - "grad_norm": 0.4456426799297333, - "learning_rate": 9.057731629025485e-06, - "loss": 0.426, - "step": 9854 - }, - { - "epoch": 0.6440755506176067, - "grad_norm": 0.44574275612831116, - "learning_rate": 9.057527591571325e-06, - "loss": 0.3418, - "step": 9855 - }, - { - "epoch": 0.6441409058231489, - "grad_norm": 0.4843095541000366, - "learning_rate": 9.057323534327194e-06, - "loss": 0.4494, - "step": 9856 - }, - { - "epoch": 0.6442062610286909, - "grad_norm": 0.45727428793907166, - "learning_rate": 9.05711945729409e-06, - "loss": 0.3891, - "step": 9857 - }, - { - "epoch": 0.644271616234233, - "grad_norm": 0.45042911171913147, - "learning_rate": 9.056915360473011e-06, - "loss": 0.3648, - "step": 9858 - }, - { - "epoch": 0.6443369714397752, - "grad_norm": 0.44673705101013184, - "learning_rate": 9.056711243864949e-06, - "loss": 0.3898, - "step": 9859 - }, - { - "epoch": 0.6444023266453173, - "grad_norm": 0.44384297728538513, - "learning_rate": 9.056507107470901e-06, - "loss": 0.3464, - "step": 9860 - }, - { - "epoch": 0.6444676818508595, - "grad_norm": 0.4594115912914276, - "learning_rate": 9.056302951291863e-06, - "loss": 0.3769, - "step": 9861 - }, - { - "epoch": 0.6445330370564015, - "grad_norm": 0.4539910554885864, - "learning_rate": 9.056098775328829e-06, - "loss": 0.3618, - "step": 9862 - }, - { - "epoch": 0.6445983922619437, - "grad_norm": 0.4160584509372711, - "learning_rate": 9.055894579582798e-06, - "loss": 0.2929, - "step": 9863 - }, - { - "epoch": 0.6446637474674858, - "grad_norm": 0.46673911809921265, - "learning_rate": 9.055690364054764e-06, - "loss": 0.3894, - "step": 9864 - }, - { - "epoch": 0.6447291026730279, - "grad_norm": 0.4321063756942749, - "learning_rate": 9.055486128745723e-06, - "loss": 0.3678, - "step": 9865 - }, - { - "epoch": 0.64479445787857, - "grad_norm": 0.446098268032074, - "learning_rate": 9.05528187365667e-06, - "loss": 0.336, - "step": 9866 - }, - { - "epoch": 0.6448598130841121, - "grad_norm": 0.43968868255615234, - "learning_rate": 9.055077598788603e-06, - "loss": 0.4202, - "step": 9867 - }, - { - "epoch": 0.6449251682896543, - "grad_norm": 0.47653335332870483, - "learning_rate": 9.054873304142518e-06, - "loss": 0.404, - "step": 9868 - }, - { - "epoch": 0.6449905234951964, - "grad_norm": 0.4718948006629944, - "learning_rate": 9.05466898971941e-06, - "loss": 0.4296, - "step": 9869 - }, - { - "epoch": 0.6450558787007385, - "grad_norm": 0.4796658754348755, - "learning_rate": 9.054464655520278e-06, - "loss": 0.451, - "step": 9870 - }, - { - "epoch": 0.6451212339062806, - "grad_norm": 0.45507287979125977, - "learning_rate": 9.054260301546116e-06, - "loss": 0.4176, - "step": 9871 - }, - { - "epoch": 0.6451865891118228, - "grad_norm": 0.4399968683719635, - "learning_rate": 9.054055927797924e-06, - "loss": 0.3779, - "step": 9872 - }, - { - "epoch": 0.6452519443173649, - "grad_norm": 0.4842342436313629, - "learning_rate": 9.053851534276695e-06, - "loss": 0.4452, - "step": 9873 - }, - { - "epoch": 0.645317299522907, - "grad_norm": 0.4472092092037201, - "learning_rate": 9.053647120983428e-06, - "loss": 0.3702, - "step": 9874 - }, - { - "epoch": 0.6453826547284491, - "grad_norm": 0.4460185170173645, - "learning_rate": 9.053442687919121e-06, - "loss": 0.4037, - "step": 9875 - }, - { - "epoch": 0.6454480099339912, - "grad_norm": 0.4718061685562134, - "learning_rate": 9.053238235084768e-06, - "loss": 0.4239, - "step": 9876 - }, - { - "epoch": 0.6455133651395334, - "grad_norm": 0.45335057377815247, - "learning_rate": 9.05303376248137e-06, - "loss": 0.4205, - "step": 9877 - }, - { - "epoch": 0.6455787203450755, - "grad_norm": 0.4372648596763611, - "learning_rate": 9.05282927010992e-06, - "loss": 0.3722, - "step": 9878 - }, - { - "epoch": 0.6456440755506176, - "grad_norm": 0.44156113266944885, - "learning_rate": 9.052624757971418e-06, - "loss": 0.3832, - "step": 9879 - }, - { - "epoch": 0.6457094307561597, - "grad_norm": 0.44538334012031555, - "learning_rate": 9.05242022606686e-06, - "loss": 0.3445, - "step": 9880 - }, - { - "epoch": 0.6457747859617019, - "grad_norm": 0.44533470273017883, - "learning_rate": 9.052215674397249e-06, - "loss": 0.3613, - "step": 9881 - }, - { - "epoch": 0.645840141167244, - "grad_norm": 0.41601648926734924, - "learning_rate": 9.052011102963574e-06, - "loss": 0.3331, - "step": 9882 - }, - { - "epoch": 0.645905496372786, - "grad_norm": 0.4672471284866333, - "learning_rate": 9.051806511766839e-06, - "loss": 0.3815, - "step": 9883 - }, - { - "epoch": 0.6459708515783282, - "grad_norm": 0.4555833637714386, - "learning_rate": 9.051601900808041e-06, - "loss": 0.414, - "step": 9884 - }, - { - "epoch": 0.6460362067838703, - "grad_norm": 0.4334162175655365, - "learning_rate": 9.051397270088174e-06, - "loss": 0.3493, - "step": 9885 - }, - { - "epoch": 0.6461015619894125, - "grad_norm": 0.4448656141757965, - "learning_rate": 9.05119261960824e-06, - "loss": 0.3652, - "step": 9886 - }, - { - "epoch": 0.6461669171949546, - "grad_norm": 0.495453804731369, - "learning_rate": 9.050987949369237e-06, - "loss": 0.4441, - "step": 9887 - }, - { - "epoch": 0.6462322724004967, - "grad_norm": 0.43579965829849243, - "learning_rate": 9.050783259372163e-06, - "loss": 0.3957, - "step": 9888 - }, - { - "epoch": 0.6462976276060388, - "grad_norm": 0.4534274637699127, - "learning_rate": 9.050578549618015e-06, - "loss": 0.4208, - "step": 9889 - }, - { - "epoch": 0.6463629828115809, - "grad_norm": 0.4602999687194824, - "learning_rate": 9.050373820107791e-06, - "loss": 0.395, - "step": 9890 - }, - { - "epoch": 0.6464283380171231, - "grad_norm": 0.4523613452911377, - "learning_rate": 9.050169070842492e-06, - "loss": 0.3936, - "step": 9891 - }, - { - "epoch": 0.6464936932226651, - "grad_norm": 0.4505181908607483, - "learning_rate": 9.049964301823114e-06, - "loss": 0.3851, - "step": 9892 - }, - { - "epoch": 0.6465590484282073, - "grad_norm": 0.47814926505088806, - "learning_rate": 9.049759513050657e-06, - "loss": 0.4121, - "step": 9893 - }, - { - "epoch": 0.6466244036337494, - "grad_norm": 0.43603309988975525, - "learning_rate": 9.049554704526122e-06, - "loss": 0.3961, - "step": 9894 - }, - { - "epoch": 0.6466897588392916, - "grad_norm": 0.4300692677497864, - "learning_rate": 9.049349876250506e-06, - "loss": 0.3836, - "step": 9895 - }, - { - "epoch": 0.6467551140448337, - "grad_norm": 0.4427059292793274, - "learning_rate": 9.049145028224806e-06, - "loss": 0.377, - "step": 9896 - }, - { - "epoch": 0.6468204692503758, - "grad_norm": 0.506506085395813, - "learning_rate": 9.048940160450023e-06, - "loss": 0.3877, - "step": 9897 - }, - { - "epoch": 0.6468858244559179, - "grad_norm": 0.45513319969177246, - "learning_rate": 9.048735272927156e-06, - "loss": 0.4026, - "step": 9898 - }, - { - "epoch": 0.64695117966146, - "grad_norm": 0.42491936683654785, - "learning_rate": 9.048530365657205e-06, - "loss": 0.3396, - "step": 9899 - }, - { - "epoch": 0.6470165348670022, - "grad_norm": 0.46710488200187683, - "learning_rate": 9.04832543864117e-06, - "loss": 0.3938, - "step": 9900 - }, - { - "epoch": 0.6470818900725442, - "grad_norm": 0.5009361505508423, - "learning_rate": 9.048120491880047e-06, - "loss": 0.5027, - "step": 9901 - }, - { - "epoch": 0.6471472452780864, - "grad_norm": 0.4301183521747589, - "learning_rate": 9.04791552537484e-06, - "loss": 0.3785, - "step": 9902 - }, - { - "epoch": 0.6472126004836285, - "grad_norm": 0.43310508131980896, - "learning_rate": 9.047710539126546e-06, - "loss": 0.3588, - "step": 9903 - }, - { - "epoch": 0.6472779556891707, - "grad_norm": 0.439488023519516, - "learning_rate": 9.047505533136165e-06, - "loss": 0.3706, - "step": 9904 - }, - { - "epoch": 0.6473433108947128, - "grad_norm": 0.43672558665275574, - "learning_rate": 9.047300507404698e-06, - "loss": 0.4033, - "step": 9905 - }, - { - "epoch": 0.6474086661002549, - "grad_norm": 0.4547431468963623, - "learning_rate": 9.047095461933145e-06, - "loss": 0.4193, - "step": 9906 - }, - { - "epoch": 0.647474021305797, - "grad_norm": 0.4387088716030121, - "learning_rate": 9.046890396722503e-06, - "loss": 0.3858, - "step": 9907 - }, - { - "epoch": 0.6475393765113391, - "grad_norm": 0.45879948139190674, - "learning_rate": 9.046685311773775e-06, - "loss": 0.3902, - "step": 9908 - }, - { - "epoch": 0.6476047317168813, - "grad_norm": 0.4278230369091034, - "learning_rate": 9.046480207087962e-06, - "loss": 0.349, - "step": 9909 - }, - { - "epoch": 0.6476700869224233, - "grad_norm": 0.46290960907936096, - "learning_rate": 9.046275082666064e-06, - "loss": 0.3722, - "step": 9910 - }, - { - "epoch": 0.6477354421279655, - "grad_norm": 0.4628424048423767, - "learning_rate": 9.046069938509078e-06, - "loss": 0.4117, - "step": 9911 - }, - { - "epoch": 0.6478007973335076, - "grad_norm": 0.44287049770355225, - "learning_rate": 9.04586477461801e-06, - "loss": 0.3894, - "step": 9912 - }, - { - "epoch": 0.6478661525390498, - "grad_norm": 0.4193098843097687, - "learning_rate": 9.045659590993856e-06, - "loss": 0.3577, - "step": 9913 - }, - { - "epoch": 0.6479315077445919, - "grad_norm": 0.4594117999076843, - "learning_rate": 9.04545438763762e-06, - "loss": 0.4194, - "step": 9914 - }, - { - "epoch": 0.6479968629501339, - "grad_norm": 0.4377639889717102, - "learning_rate": 9.0452491645503e-06, - "loss": 0.3881, - "step": 9915 - }, - { - "epoch": 0.6480622181556761, - "grad_norm": 0.4739892780780792, - "learning_rate": 9.0450439217329e-06, - "loss": 0.3911, - "step": 9916 - }, - { - "epoch": 0.6481275733612182, - "grad_norm": 0.4282979667186737, - "learning_rate": 9.044838659186417e-06, - "loss": 0.3675, - "step": 9917 - }, - { - "epoch": 0.6481929285667604, - "grad_norm": 0.4598449468612671, - "learning_rate": 9.044633376911857e-06, - "loss": 0.3787, - "step": 9918 - }, - { - "epoch": 0.6482582837723024, - "grad_norm": 0.4358254075050354, - "learning_rate": 9.04442807491022e-06, - "loss": 0.3695, - "step": 9919 - }, - { - "epoch": 0.6483236389778446, - "grad_norm": 0.4686878025531769, - "learning_rate": 9.044222753182502e-06, - "loss": 0.4344, - "step": 9920 - }, - { - "epoch": 0.6483889941833867, - "grad_norm": 0.4279601573944092, - "learning_rate": 9.04401741172971e-06, - "loss": 0.3576, - "step": 9921 - }, - { - "epoch": 0.6484543493889289, - "grad_norm": 0.4866029918193817, - "learning_rate": 9.043812050552847e-06, - "loss": 0.403, - "step": 9922 - }, - { - "epoch": 0.648519704594471, - "grad_norm": 0.4783462584018707, - "learning_rate": 9.043606669652909e-06, - "loss": 0.4194, - "step": 9923 - }, - { - "epoch": 0.648585059800013, - "grad_norm": 0.456325888633728, - "learning_rate": 9.0434012690309e-06, - "loss": 0.3844, - "step": 9924 - }, - { - "epoch": 0.6486504150055552, - "grad_norm": 0.4528539180755615, - "learning_rate": 9.043195848687824e-06, - "loss": 0.38, - "step": 9925 - }, - { - "epoch": 0.6487157702110973, - "grad_norm": 0.5295087695121765, - "learning_rate": 9.04299040862468e-06, - "loss": 0.4731, - "step": 9926 - }, - { - "epoch": 0.6487811254166395, - "grad_norm": 0.4058385193347931, - "learning_rate": 9.042784948842471e-06, - "loss": 0.3166, - "step": 9927 - }, - { - "epoch": 0.6488464806221815, - "grad_norm": 0.4409596920013428, - "learning_rate": 9.042579469342201e-06, - "loss": 0.3637, - "step": 9928 - }, - { - "epoch": 0.6489118358277237, - "grad_norm": 0.44786572456359863, - "learning_rate": 9.042373970124869e-06, - "loss": 0.3934, - "step": 9929 - }, - { - "epoch": 0.6489771910332658, - "grad_norm": 0.4519539177417755, - "learning_rate": 9.042168451191478e-06, - "loss": 0.3635, - "step": 9930 - }, - { - "epoch": 0.649042546238808, - "grad_norm": 0.42357999086380005, - "learning_rate": 9.041962912543033e-06, - "loss": 0.3735, - "step": 9931 - }, - { - "epoch": 0.64910790144435, - "grad_norm": 0.44480448961257935, - "learning_rate": 9.041757354180533e-06, - "loss": 0.3543, - "step": 9932 - }, - { - "epoch": 0.6491732566498921, - "grad_norm": 0.43090957403182983, - "learning_rate": 9.041551776104982e-06, - "loss": 0.3414, - "step": 9933 - }, - { - "epoch": 0.6492386118554343, - "grad_norm": 0.4563465714454651, - "learning_rate": 9.041346178317385e-06, - "loss": 0.4193, - "step": 9934 - }, - { - "epoch": 0.6493039670609764, - "grad_norm": 0.43380168080329895, - "learning_rate": 9.041140560818742e-06, - "loss": 0.3844, - "step": 9935 - }, - { - "epoch": 0.6493693222665186, - "grad_norm": 0.4181695878505707, - "learning_rate": 9.040934923610055e-06, - "loss": 0.3412, - "step": 9936 - }, - { - "epoch": 0.6494346774720606, - "grad_norm": 0.4257018566131592, - "learning_rate": 9.040729266692329e-06, - "loss": 0.3695, - "step": 9937 - }, - { - "epoch": 0.6495000326776028, - "grad_norm": 0.4463997185230255, - "learning_rate": 9.040523590066567e-06, - "loss": 0.3977, - "step": 9938 - }, - { - "epoch": 0.6495653878831449, - "grad_norm": 0.4287792444229126, - "learning_rate": 9.040317893733772e-06, - "loss": 0.3249, - "step": 9939 - }, - { - "epoch": 0.6496307430886871, - "grad_norm": 0.44113147258758545, - "learning_rate": 9.040112177694947e-06, - "loss": 0.3643, - "step": 9940 - }, - { - "epoch": 0.6496960982942291, - "grad_norm": 0.4458673596382141, - "learning_rate": 9.039906441951095e-06, - "loss": 0.3844, - "step": 9941 - }, - { - "epoch": 0.6497614534997712, - "grad_norm": 0.5157591700553894, - "learning_rate": 9.039700686503218e-06, - "loss": 0.4501, - "step": 9942 - }, - { - "epoch": 0.6498268087053134, - "grad_norm": 0.42506077885627747, - "learning_rate": 9.039494911352324e-06, - "loss": 0.372, - "step": 9943 - }, - { - "epoch": 0.6498921639108555, - "grad_norm": 0.44462844729423523, - "learning_rate": 9.039289116499412e-06, - "loss": 0.4034, - "step": 9944 - }, - { - "epoch": 0.6499575191163977, - "grad_norm": 0.4465774893760681, - "learning_rate": 9.039083301945489e-06, - "loss": 0.3769, - "step": 9945 - }, - { - "epoch": 0.6500228743219397, - "grad_norm": 0.5269215703010559, - "learning_rate": 9.038877467691555e-06, - "loss": 0.489, - "step": 9946 - }, - { - "epoch": 0.6500882295274819, - "grad_norm": 0.4372009336948395, - "learning_rate": 9.03867161373862e-06, - "loss": 0.3708, - "step": 9947 - }, - { - "epoch": 0.650153584733024, - "grad_norm": 0.40767350792884827, - "learning_rate": 9.038465740087683e-06, - "loss": 0.3304, - "step": 9948 - }, - { - "epoch": 0.650218939938566, - "grad_norm": 0.4977891445159912, - "learning_rate": 9.038259846739748e-06, - "loss": 0.3841, - "step": 9949 - }, - { - "epoch": 0.6502842951441082, - "grad_norm": 0.4250839650630951, - "learning_rate": 9.038053933695823e-06, - "loss": 0.3424, - "step": 9950 - }, - { - "epoch": 0.6503496503496503, - "grad_norm": 0.4356164038181305, - "learning_rate": 9.037848000956908e-06, - "loss": 0.3512, - "step": 9951 - }, - { - "epoch": 0.6504150055551925, - "grad_norm": 0.46105310320854187, - "learning_rate": 9.03764204852401e-06, - "loss": 0.4104, - "step": 9952 - }, - { - "epoch": 0.6504803607607346, - "grad_norm": 0.4970625340938568, - "learning_rate": 9.037436076398134e-06, - "loss": 0.4359, - "step": 9953 - }, - { - "epoch": 0.6505457159662767, - "grad_norm": 0.47230228781700134, - "learning_rate": 9.037230084580281e-06, - "loss": 0.3773, - "step": 9954 - }, - { - "epoch": 0.6506110711718188, - "grad_norm": 0.46225905418395996, - "learning_rate": 9.037024073071461e-06, - "loss": 0.405, - "step": 9955 - }, - { - "epoch": 0.650676426377361, - "grad_norm": 0.462067186832428, - "learning_rate": 9.036818041872674e-06, - "loss": 0.413, - "step": 9956 - }, - { - "epoch": 0.6507417815829031, - "grad_norm": 0.45248153805732727, - "learning_rate": 9.036611990984929e-06, - "loss": 0.4196, - "step": 9957 - }, - { - "epoch": 0.6508071367884452, - "grad_norm": 0.4767220914363861, - "learning_rate": 9.036405920409229e-06, - "loss": 0.3788, - "step": 9958 - }, - { - "epoch": 0.6508724919939873, - "grad_norm": 0.4543706178665161, - "learning_rate": 9.036199830146577e-06, - "loss": 0.3815, - "step": 9959 - }, - { - "epoch": 0.6509378471995294, - "grad_norm": 0.46349817514419556, - "learning_rate": 9.035993720197982e-06, - "loss": 0.3651, - "step": 9960 - }, - { - "epoch": 0.6510032024050716, - "grad_norm": 0.44254183769226074, - "learning_rate": 9.035787590564446e-06, - "loss": 0.3244, - "step": 9961 - }, - { - "epoch": 0.6510685576106137, - "grad_norm": 0.47205328941345215, - "learning_rate": 9.035581441246977e-06, - "loss": 0.3976, - "step": 9962 - }, - { - "epoch": 0.6511339128161558, - "grad_norm": 0.48458147048950195, - "learning_rate": 9.035375272246579e-06, - "loss": 0.4239, - "step": 9963 - }, - { - "epoch": 0.6511992680216979, - "grad_norm": 0.45239412784576416, - "learning_rate": 9.035169083564257e-06, - "loss": 0.3671, - "step": 9964 - }, - { - "epoch": 0.6512646232272401, - "grad_norm": 0.46970847249031067, - "learning_rate": 9.034962875201016e-06, - "loss": 0.3501, - "step": 9965 - }, - { - "epoch": 0.6513299784327822, - "grad_norm": 0.46872833371162415, - "learning_rate": 9.034756647157864e-06, - "loss": 0.3421, - "step": 9966 - }, - { - "epoch": 0.6513953336383242, - "grad_norm": 0.46653464436531067, - "learning_rate": 9.034550399435808e-06, - "loss": 0.3739, - "step": 9967 - }, - { - "epoch": 0.6514606888438664, - "grad_norm": 0.4715349078178406, - "learning_rate": 9.034344132035853e-06, - "loss": 0.4085, - "step": 9968 - }, - { - "epoch": 0.6515260440494085, - "grad_norm": 0.4499291479587555, - "learning_rate": 9.034137844959e-06, - "loss": 0.3884, - "step": 9969 - }, - { - "epoch": 0.6515913992549507, - "grad_norm": 0.49279195070266724, - "learning_rate": 9.033931538206263e-06, - "loss": 0.3394, - "step": 9970 - }, - { - "epoch": 0.6516567544604928, - "grad_norm": 0.4347681999206543, - "learning_rate": 9.033725211778641e-06, - "loss": 0.3516, - "step": 9971 - }, - { - "epoch": 0.6517221096660349, - "grad_norm": 0.4902048110961914, - "learning_rate": 9.033518865677147e-06, - "loss": 0.3936, - "step": 9972 - }, - { - "epoch": 0.651787464871577, - "grad_norm": 0.4415493607521057, - "learning_rate": 9.033312499902782e-06, - "loss": 0.3792, - "step": 9973 - }, - { - "epoch": 0.6518528200771191, - "grad_norm": 0.4481008052825928, - "learning_rate": 9.033106114456555e-06, - "loss": 0.342, - "step": 9974 - }, - { - "epoch": 0.6519181752826613, - "grad_norm": 0.42084819078445435, - "learning_rate": 9.032899709339473e-06, - "loss": 0.3781, - "step": 9975 - }, - { - "epoch": 0.6519835304882033, - "grad_norm": 0.4919080436229706, - "learning_rate": 9.032693284552541e-06, - "loss": 0.4113, - "step": 9976 - }, - { - "epoch": 0.6520488856937455, - "grad_norm": 0.5064375996589661, - "learning_rate": 9.032486840096768e-06, - "loss": 0.4421, - "step": 9977 - }, - { - "epoch": 0.6521142408992876, - "grad_norm": 0.48175880312919617, - "learning_rate": 9.03228037597316e-06, - "loss": 0.4486, - "step": 9978 - }, - { - "epoch": 0.6521795961048298, - "grad_norm": 0.42637887597084045, - "learning_rate": 9.032073892182721e-06, - "loss": 0.3972, - "step": 9979 - }, - { - "epoch": 0.6522449513103719, - "grad_norm": 0.44986969232559204, - "learning_rate": 9.031867388726463e-06, - "loss": 0.4007, - "step": 9980 - }, - { - "epoch": 0.652310306515914, - "grad_norm": 0.420212984085083, - "learning_rate": 9.031660865605389e-06, - "loss": 0.3474, - "step": 9981 - }, - { - "epoch": 0.6523756617214561, - "grad_norm": 0.4912068247795105, - "learning_rate": 9.031454322820511e-06, - "loss": 0.4405, - "step": 9982 - }, - { - "epoch": 0.6524410169269982, - "grad_norm": 0.46444404125213623, - "learning_rate": 9.031247760372831e-06, - "loss": 0.3987, - "step": 9983 - }, - { - "epoch": 0.6525063721325404, - "grad_norm": 0.4622892141342163, - "learning_rate": 9.031041178263362e-06, - "loss": 0.4178, - "step": 9984 - }, - { - "epoch": 0.6525717273380824, - "grad_norm": 0.4760500192642212, - "learning_rate": 9.030834576493105e-06, - "loss": 0.4224, - "step": 9985 - }, - { - "epoch": 0.6526370825436246, - "grad_norm": 0.44501349329948425, - "learning_rate": 9.030627955063075e-06, - "loss": 0.3686, - "step": 9986 - }, - { - "epoch": 0.6527024377491667, - "grad_norm": 0.4655097723007202, - "learning_rate": 9.030421313974275e-06, - "loss": 0.4026, - "step": 9987 - }, - { - "epoch": 0.6527677929547089, - "grad_norm": 0.5385804176330566, - "learning_rate": 9.030214653227713e-06, - "loss": 0.4118, - "step": 9988 - }, - { - "epoch": 0.652833148160251, - "grad_norm": 0.4324919581413269, - "learning_rate": 9.0300079728244e-06, - "loss": 0.3532, - "step": 9989 - }, - { - "epoch": 0.6528985033657931, - "grad_norm": 0.4297036826610565, - "learning_rate": 9.02980127276534e-06, - "loss": 0.3586, - "step": 9990 - }, - { - "epoch": 0.6529638585713352, - "grad_norm": 0.436282753944397, - "learning_rate": 9.029594553051543e-06, - "loss": 0.3716, - "step": 9991 - }, - { - "epoch": 0.6530292137768773, - "grad_norm": 0.46413522958755493, - "learning_rate": 9.029387813684018e-06, - "loss": 0.4013, - "step": 9992 - }, - { - "epoch": 0.6530945689824195, - "grad_norm": 0.48544174432754517, - "learning_rate": 9.029181054663772e-06, - "loss": 0.3904, - "step": 9993 - }, - { - "epoch": 0.6531599241879615, - "grad_norm": 0.43784505128860474, - "learning_rate": 9.028974275991815e-06, - "loss": 0.3558, - "step": 9994 - }, - { - "epoch": 0.6532252793935037, - "grad_norm": 0.4579785168170929, - "learning_rate": 9.028767477669156e-06, - "loss": 0.4014, - "step": 9995 - }, - { - "epoch": 0.6532906345990458, - "grad_norm": 0.5092059969902039, - "learning_rate": 9.0285606596968e-06, - "loss": 0.501, - "step": 9996 - }, - { - "epoch": 0.653355989804588, - "grad_norm": 0.4619104266166687, - "learning_rate": 9.02835382207576e-06, - "loss": 0.417, - "step": 9997 - }, - { - "epoch": 0.65342134501013, - "grad_norm": 0.4432462155818939, - "learning_rate": 9.02814696480704e-06, - "loss": 0.4044, - "step": 9998 - }, - { - "epoch": 0.6534867002156722, - "grad_norm": 0.45185190439224243, - "learning_rate": 9.027940087891655e-06, - "loss": 0.4071, - "step": 9999 - }, - { - "epoch": 0.6535520554212143, - "grad_norm": 0.42490679025650024, - "learning_rate": 9.02773319133061e-06, - "loss": 0.3441, - "step": 10000 - }, - { - "epoch": 0.6536174106267564, - "grad_norm": 0.4673260748386383, - "learning_rate": 9.027526275124913e-06, - "loss": 0.4092, - "step": 10001 - }, - { - "epoch": 0.6536827658322986, - "grad_norm": 0.4620700478553772, - "learning_rate": 9.027319339275577e-06, - "loss": 0.3714, - "step": 10002 - }, - { - "epoch": 0.6537481210378406, - "grad_norm": 0.45150327682495117, - "learning_rate": 9.027112383783608e-06, - "loss": 0.3714, - "step": 10003 - }, - { - "epoch": 0.6538134762433828, - "grad_norm": 0.47253942489624023, - "learning_rate": 9.026905408650017e-06, - "loss": 0.4131, - "step": 10004 - }, - { - "epoch": 0.6538788314489249, - "grad_norm": 0.47828763723373413, - "learning_rate": 9.026698413875815e-06, - "loss": 0.3979, - "step": 10005 - }, - { - "epoch": 0.6539441866544671, - "grad_norm": 0.4524450898170471, - "learning_rate": 9.026491399462008e-06, - "loss": 0.3672, - "step": 10006 - }, - { - "epoch": 0.6540095418600091, - "grad_norm": 0.5243582725524902, - "learning_rate": 9.026284365409608e-06, - "loss": 0.4326, - "step": 10007 - }, - { - "epoch": 0.6540748970655512, - "grad_norm": 0.44284766912460327, - "learning_rate": 9.026077311719622e-06, - "loss": 0.3493, - "step": 10008 - }, - { - "epoch": 0.6541402522710934, - "grad_norm": 0.4645557403564453, - "learning_rate": 9.025870238393067e-06, - "loss": 0.3862, - "step": 10009 - }, - { - "epoch": 0.6542056074766355, - "grad_norm": 0.4484542906284332, - "learning_rate": 9.025663145430945e-06, - "loss": 0.3679, - "step": 10010 - }, - { - "epoch": 0.6542709626821777, - "grad_norm": 0.47134894132614136, - "learning_rate": 9.02545603283427e-06, - "loss": 0.4028, - "step": 10011 - }, - { - "epoch": 0.6543363178877197, - "grad_norm": 0.46165215969085693, - "learning_rate": 9.025248900604052e-06, - "loss": 0.3893, - "step": 10012 - }, - { - "epoch": 0.6544016730932619, - "grad_norm": 0.4478997588157654, - "learning_rate": 9.0250417487413e-06, - "loss": 0.378, - "step": 10013 - }, - { - "epoch": 0.654467028298804, - "grad_norm": 0.4455048143863678, - "learning_rate": 9.024834577247024e-06, - "loss": 0.3596, - "step": 10014 - }, - { - "epoch": 0.6545323835043462, - "grad_norm": 0.45794814825057983, - "learning_rate": 9.024627386122238e-06, - "loss": 0.4211, - "step": 10015 - }, - { - "epoch": 0.6545977387098882, - "grad_norm": 0.543355405330658, - "learning_rate": 9.024420175367947e-06, - "loss": 0.3924, - "step": 10016 - }, - { - "epoch": 0.6546630939154303, - "grad_norm": 0.4654608368873596, - "learning_rate": 9.024212944985167e-06, - "loss": 0.4013, - "step": 10017 - }, - { - "epoch": 0.6547284491209725, - "grad_norm": 0.4858318865299225, - "learning_rate": 9.024005694974904e-06, - "loss": 0.416, - "step": 10018 - }, - { - "epoch": 0.6547938043265146, - "grad_norm": 0.4304412603378296, - "learning_rate": 9.023798425338173e-06, - "loss": 0.3342, - "step": 10019 - }, - { - "epoch": 0.6548591595320568, - "grad_norm": 0.4354506731033325, - "learning_rate": 9.023591136075982e-06, - "loss": 0.3341, - "step": 10020 - }, - { - "epoch": 0.6549245147375988, - "grad_norm": 0.4382390081882477, - "learning_rate": 9.023383827189345e-06, - "loss": 0.3692, - "step": 10021 - }, - { - "epoch": 0.654989869943141, - "grad_norm": 0.48136866092681885, - "learning_rate": 9.02317649867927e-06, - "loss": 0.4207, - "step": 10022 - }, - { - "epoch": 0.6550552251486831, - "grad_norm": 0.47718822956085205, - "learning_rate": 9.022969150546769e-06, - "loss": 0.3914, - "step": 10023 - }, - { - "epoch": 0.6551205803542253, - "grad_norm": 0.45839494466781616, - "learning_rate": 9.022761782792855e-06, - "loss": 0.388, - "step": 10024 - }, - { - "epoch": 0.6551859355597673, - "grad_norm": 0.40892085433006287, - "learning_rate": 9.022554395418537e-06, - "loss": 0.3322, - "step": 10025 - }, - { - "epoch": 0.6552512907653094, - "grad_norm": 0.4357730448246002, - "learning_rate": 9.022346988424827e-06, - "loss": 0.3414, - "step": 10026 - }, - { - "epoch": 0.6553166459708516, - "grad_norm": 0.41670113801956177, - "learning_rate": 9.02213956181274e-06, - "loss": 0.3221, - "step": 10027 - }, - { - "epoch": 0.6553820011763937, - "grad_norm": 0.4133478105068207, - "learning_rate": 9.021932115583282e-06, - "loss": 0.342, - "step": 10028 - }, - { - "epoch": 0.6554473563819359, - "grad_norm": 0.49278882145881653, - "learning_rate": 9.021724649737469e-06, - "loss": 0.3871, - "step": 10029 - }, - { - "epoch": 0.6555127115874779, - "grad_norm": 0.482332706451416, - "learning_rate": 9.021517164276312e-06, - "loss": 0.4091, - "step": 10030 - }, - { - "epoch": 0.6555780667930201, - "grad_norm": 0.448758602142334, - "learning_rate": 9.021309659200822e-06, - "loss": 0.3432, - "step": 10031 - }, - { - "epoch": 0.6556434219985622, - "grad_norm": 0.44994425773620605, - "learning_rate": 9.021102134512011e-06, - "loss": 0.3957, - "step": 10032 - }, - { - "epoch": 0.6557087772041043, - "grad_norm": 0.4392760097980499, - "learning_rate": 9.020894590210893e-06, - "loss": 0.3801, - "step": 10033 - }, - { - "epoch": 0.6557741324096464, - "grad_norm": 0.48466914892196655, - "learning_rate": 9.020687026298478e-06, - "loss": 0.441, - "step": 10034 - }, - { - "epoch": 0.6558394876151885, - "grad_norm": 0.4930497407913208, - "learning_rate": 9.02047944277578e-06, - "loss": 0.3783, - "step": 10035 - }, - { - "epoch": 0.6559048428207307, - "grad_norm": 0.4536300003528595, - "learning_rate": 9.020271839643813e-06, - "loss": 0.3825, - "step": 10036 - }, - { - "epoch": 0.6559701980262728, - "grad_norm": 0.44621482491493225, - "learning_rate": 9.020064216903586e-06, - "loss": 0.3624, - "step": 10037 - }, - { - "epoch": 0.656035553231815, - "grad_norm": 0.4715002477169037, - "learning_rate": 9.019856574556112e-06, - "loss": 0.4179, - "step": 10038 - }, - { - "epoch": 0.656100908437357, - "grad_norm": 0.4448879361152649, - "learning_rate": 9.019648912602405e-06, - "loss": 0.3903, - "step": 10039 - }, - { - "epoch": 0.6561662636428992, - "grad_norm": 0.43843865394592285, - "learning_rate": 9.01944123104348e-06, - "loss": 0.3967, - "step": 10040 - }, - { - "epoch": 0.6562316188484413, - "grad_norm": 0.4688895642757416, - "learning_rate": 9.019233529880346e-06, - "loss": 0.4029, - "step": 10041 - }, - { - "epoch": 0.6562969740539834, - "grad_norm": 0.433026522397995, - "learning_rate": 9.019025809114018e-06, - "loss": 0.3715, - "step": 10042 - }, - { - "epoch": 0.6563623292595255, - "grad_norm": 0.5355081558227539, - "learning_rate": 9.018818068745507e-06, - "loss": 0.4969, - "step": 10043 - }, - { - "epoch": 0.6564276844650676, - "grad_norm": 0.42214125394821167, - "learning_rate": 9.01861030877583e-06, - "loss": 0.3387, - "step": 10044 - }, - { - "epoch": 0.6564930396706098, - "grad_norm": 0.4066252112388611, - "learning_rate": 9.018402529205998e-06, - "loss": 0.3542, - "step": 10045 - }, - { - "epoch": 0.6565583948761519, - "grad_norm": 0.4637167453765869, - "learning_rate": 9.018194730037024e-06, - "loss": 0.4214, - "step": 10046 - }, - { - "epoch": 0.656623750081694, - "grad_norm": 0.4537128806114197, - "learning_rate": 9.017986911269924e-06, - "loss": 0.4039, - "step": 10047 - }, - { - "epoch": 0.6566891052872361, - "grad_norm": 0.468189001083374, - "learning_rate": 9.01777907290571e-06, - "loss": 0.4015, - "step": 10048 - }, - { - "epoch": 0.6567544604927783, - "grad_norm": 0.4477204382419586, - "learning_rate": 9.017571214945394e-06, - "loss": 0.3523, - "step": 10049 - }, - { - "epoch": 0.6568198156983204, - "grad_norm": 0.46103546023368835, - "learning_rate": 9.01736333738999e-06, - "loss": 0.4216, - "step": 10050 - }, - { - "epoch": 0.6568851709038624, - "grad_norm": 0.5244162082672119, - "learning_rate": 9.017155440240517e-06, - "loss": 0.4784, - "step": 10051 - }, - { - "epoch": 0.6569505261094046, - "grad_norm": 0.4482291340827942, - "learning_rate": 9.016947523497983e-06, - "loss": 0.3992, - "step": 10052 - }, - { - "epoch": 0.6570158813149467, - "grad_norm": 0.4626525342464447, - "learning_rate": 9.016739587163403e-06, - "loss": 0.3678, - "step": 10053 - }, - { - "epoch": 0.6570812365204889, - "grad_norm": 0.428783655166626, - "learning_rate": 9.016531631237794e-06, - "loss": 0.3786, - "step": 10054 - }, - { - "epoch": 0.657146591726031, - "grad_norm": 0.4373444616794586, - "learning_rate": 9.01632365572217e-06, - "loss": 0.3669, - "step": 10055 - }, - { - "epoch": 0.6572119469315731, - "grad_norm": 0.44966745376586914, - "learning_rate": 9.016115660617543e-06, - "loss": 0.3896, - "step": 10056 - }, - { - "epoch": 0.6572773021371152, - "grad_norm": 0.45996198058128357, - "learning_rate": 9.015907645924929e-06, - "loss": 0.4169, - "step": 10057 - }, - { - "epoch": 0.6573426573426573, - "grad_norm": 0.44895103573799133, - "learning_rate": 9.01569961164534e-06, - "loss": 0.3689, - "step": 10058 - }, - { - "epoch": 0.6574080125481995, - "grad_norm": 0.4172862470149994, - "learning_rate": 9.015491557779796e-06, - "loss": 0.3591, - "step": 10059 - }, - { - "epoch": 0.6574733677537415, - "grad_norm": 0.4612584710121155, - "learning_rate": 9.015283484329307e-06, - "loss": 0.4059, - "step": 10060 - }, - { - "epoch": 0.6575387229592837, - "grad_norm": 0.43840399384498596, - "learning_rate": 9.015075391294889e-06, - "loss": 0.3864, - "step": 10061 - }, - { - "epoch": 0.6576040781648258, - "grad_norm": 0.47902926802635193, - "learning_rate": 9.014867278677559e-06, - "loss": 0.431, - "step": 10062 - }, - { - "epoch": 0.657669433370368, - "grad_norm": 0.41883060336112976, - "learning_rate": 9.014659146478329e-06, - "loss": 0.3382, - "step": 10063 - }, - { - "epoch": 0.6577347885759101, - "grad_norm": 0.4539177715778351, - "learning_rate": 9.014450994698217e-06, - "loss": 0.3757, - "step": 10064 - }, - { - "epoch": 0.6578001437814522, - "grad_norm": 0.41955089569091797, - "learning_rate": 9.014242823338235e-06, - "loss": 0.3736, - "step": 10065 - }, - { - "epoch": 0.6578654989869943, - "grad_norm": 0.4466819167137146, - "learning_rate": 9.0140346323994e-06, - "loss": 0.3787, - "step": 10066 - }, - { - "epoch": 0.6579308541925364, - "grad_norm": 0.46580857038497925, - "learning_rate": 9.01382642188273e-06, - "loss": 0.4382, - "step": 10067 - }, - { - "epoch": 0.6579962093980786, - "grad_norm": 0.46903690695762634, - "learning_rate": 9.013618191789236e-06, - "loss": 0.4091, - "step": 10068 - }, - { - "epoch": 0.6580615646036206, - "grad_norm": 0.42461681365966797, - "learning_rate": 9.013409942119935e-06, - "loss": 0.3415, - "step": 10069 - }, - { - "epoch": 0.6581269198091628, - "grad_norm": 0.45153144001960754, - "learning_rate": 9.013201672875844e-06, - "loss": 0.3655, - "step": 10070 - }, - { - "epoch": 0.6581922750147049, - "grad_norm": 0.44758397340774536, - "learning_rate": 9.012993384057978e-06, - "loss": 0.4027, - "step": 10071 - }, - { - "epoch": 0.6582576302202471, - "grad_norm": 0.4283483624458313, - "learning_rate": 9.012785075667354e-06, - "loss": 0.356, - "step": 10072 - }, - { - "epoch": 0.6583229854257892, - "grad_norm": 0.4353366494178772, - "learning_rate": 9.012576747704987e-06, - "loss": 0.3689, - "step": 10073 - }, - { - "epoch": 0.6583883406313313, - "grad_norm": 0.5016412734985352, - "learning_rate": 9.012368400171891e-06, - "loss": 0.5056, - "step": 10074 - }, - { - "epoch": 0.6584536958368734, - "grad_norm": 0.44737544655799866, - "learning_rate": 9.012160033069087e-06, - "loss": 0.3622, - "step": 10075 - }, - { - "epoch": 0.6585190510424155, - "grad_norm": 0.440403014421463, - "learning_rate": 9.011951646397587e-06, - "loss": 0.3641, - "step": 10076 - }, - { - "epoch": 0.6585844062479577, - "grad_norm": 0.474662184715271, - "learning_rate": 9.01174324015841e-06, - "loss": 0.4777, - "step": 10077 - }, - { - "epoch": 0.6586497614534997, - "grad_norm": 0.45909667015075684, - "learning_rate": 9.01153481435257e-06, - "loss": 0.4237, - "step": 10078 - }, - { - "epoch": 0.6587151166590419, - "grad_norm": 0.4570311903953552, - "learning_rate": 9.011326368981086e-06, - "loss": 0.4152, - "step": 10079 - }, - { - "epoch": 0.658780471864584, - "grad_norm": 0.46616530418395996, - "learning_rate": 9.011117904044972e-06, - "loss": 0.4215, - "step": 10080 - }, - { - "epoch": 0.6588458270701262, - "grad_norm": 0.4463542103767395, - "learning_rate": 9.010909419545248e-06, - "loss": 0.3888, - "step": 10081 - }, - { - "epoch": 0.6589111822756683, - "grad_norm": 0.466397225856781, - "learning_rate": 9.01070091548293e-06, - "loss": 0.4124, - "step": 10082 - }, - { - "epoch": 0.6589765374812104, - "grad_norm": 0.4393870532512665, - "learning_rate": 9.010492391859033e-06, - "loss": 0.3644, - "step": 10083 - }, - { - "epoch": 0.6590418926867525, - "grad_norm": 0.5008548498153687, - "learning_rate": 9.010283848674574e-06, - "loss": 0.4393, - "step": 10084 - }, - { - "epoch": 0.6591072478922946, - "grad_norm": 0.4108535349369049, - "learning_rate": 9.010075285930574e-06, - "loss": 0.3062, - "step": 10085 - }, - { - "epoch": 0.6591726030978368, - "grad_norm": 0.46413442492485046, - "learning_rate": 9.009866703628047e-06, - "loss": 0.4246, - "step": 10086 - }, - { - "epoch": 0.6592379583033788, - "grad_norm": 0.45351120829582214, - "learning_rate": 9.009658101768011e-06, - "loss": 0.4035, - "step": 10087 - }, - { - "epoch": 0.659303313508921, - "grad_norm": 0.44074538350105286, - "learning_rate": 9.009449480351483e-06, - "loss": 0.3542, - "step": 10088 - }, - { - "epoch": 0.6593686687144631, - "grad_norm": 0.45235756039619446, - "learning_rate": 9.009240839379479e-06, - "loss": 0.3827, - "step": 10089 - }, - { - "epoch": 0.6594340239200053, - "grad_norm": 0.4972812831401825, - "learning_rate": 9.00903217885302e-06, - "loss": 0.4378, - "step": 10090 - }, - { - "epoch": 0.6594993791255473, - "grad_norm": 0.42305776476860046, - "learning_rate": 9.008823498773122e-06, - "loss": 0.3952, - "step": 10091 - }, - { - "epoch": 0.6595647343310894, - "grad_norm": 0.4234190583229065, - "learning_rate": 9.008614799140804e-06, - "loss": 0.3445, - "step": 10092 - }, - { - "epoch": 0.6596300895366316, - "grad_norm": 0.46396926045417786, - "learning_rate": 9.008406079957081e-06, - "loss": 0.3906, - "step": 10093 - }, - { - "epoch": 0.6596954447421737, - "grad_norm": 0.4174894392490387, - "learning_rate": 9.008197341222975e-06, - "loss": 0.3444, - "step": 10094 - }, - { - "epoch": 0.6597607999477159, - "grad_norm": 0.45253807306289673, - "learning_rate": 9.007988582939502e-06, - "loss": 0.4106, - "step": 10095 - }, - { - "epoch": 0.6598261551532579, - "grad_norm": 0.425199031829834, - "learning_rate": 9.007779805107679e-06, - "loss": 0.3645, - "step": 10096 - }, - { - "epoch": 0.6598915103588001, - "grad_norm": 0.45950135588645935, - "learning_rate": 9.007571007728526e-06, - "loss": 0.3749, - "step": 10097 - }, - { - "epoch": 0.6599568655643422, - "grad_norm": 0.4661332666873932, - "learning_rate": 9.007362190803059e-06, - "loss": 0.4126, - "step": 10098 - }, - { - "epoch": 0.6600222207698844, - "grad_norm": 0.427520215511322, - "learning_rate": 9.0071533543323e-06, - "loss": 0.3579, - "step": 10099 - }, - { - "epoch": 0.6600875759754264, - "grad_norm": 0.45677629113197327, - "learning_rate": 9.006944498317268e-06, - "loss": 0.3946, - "step": 10100 - }, - { - "epoch": 0.6601529311809685, - "grad_norm": 0.4137193262577057, - "learning_rate": 9.006735622758976e-06, - "loss": 0.3316, - "step": 10101 - }, - { - "epoch": 0.6602182863865107, - "grad_norm": 0.408157080411911, - "learning_rate": 9.006526727658446e-06, - "loss": 0.3489, - "step": 10102 - }, - { - "epoch": 0.6602836415920528, - "grad_norm": 0.4726017713546753, - "learning_rate": 9.0063178130167e-06, - "loss": 0.4008, - "step": 10103 - }, - { - "epoch": 0.660348996797595, - "grad_norm": 0.4968073070049286, - "learning_rate": 9.006108878834752e-06, - "loss": 0.4466, - "step": 10104 - }, - { - "epoch": 0.660414352003137, - "grad_norm": 0.5128054618835449, - "learning_rate": 9.005899925113625e-06, - "loss": 0.4228, - "step": 10105 - }, - { - "epoch": 0.6604797072086792, - "grad_norm": 0.45394113659858704, - "learning_rate": 9.005690951854335e-06, - "loss": 0.3922, - "step": 10106 - }, - { - "epoch": 0.6605450624142213, - "grad_norm": 0.4606708884239197, - "learning_rate": 9.005481959057903e-06, - "loss": 0.3878, - "step": 10107 - }, - { - "epoch": 0.6606104176197635, - "grad_norm": 0.4643462896347046, - "learning_rate": 9.005272946725347e-06, - "loss": 0.3779, - "step": 10108 - }, - { - "epoch": 0.6606757728253055, - "grad_norm": 0.4563404619693756, - "learning_rate": 9.00506391485769e-06, - "loss": 0.4075, - "step": 10109 - }, - { - "epoch": 0.6607411280308476, - "grad_norm": 0.41870981454849243, - "learning_rate": 9.004854863455945e-06, - "loss": 0.3354, - "step": 10110 - }, - { - "epoch": 0.6608064832363898, - "grad_norm": 0.43378958106040955, - "learning_rate": 9.004645792521139e-06, - "loss": 0.3719, - "step": 10111 - }, - { - "epoch": 0.6608718384419319, - "grad_norm": 0.45352861285209656, - "learning_rate": 9.004436702054285e-06, - "loss": 0.3937, - "step": 10112 - }, - { - "epoch": 0.660937193647474, - "grad_norm": 0.4332524538040161, - "learning_rate": 9.004227592056408e-06, - "loss": 0.3672, - "step": 10113 - }, - { - "epoch": 0.6610025488530161, - "grad_norm": 0.4357057511806488, - "learning_rate": 9.004018462528524e-06, - "loss": 0.363, - "step": 10114 - }, - { - "epoch": 0.6610679040585583, - "grad_norm": 0.44081050157546997, - "learning_rate": 9.003809313471657e-06, - "loss": 0.3888, - "step": 10115 - }, - { - "epoch": 0.6611332592641004, - "grad_norm": 0.4499940574169159, - "learning_rate": 9.003600144886823e-06, - "loss": 0.3809, - "step": 10116 - }, - { - "epoch": 0.6611986144696425, - "grad_norm": 0.5249917507171631, - "learning_rate": 9.003390956775046e-06, - "loss": 0.4402, - "step": 10117 - }, - { - "epoch": 0.6612639696751846, - "grad_norm": 0.4136514663696289, - "learning_rate": 9.003181749137342e-06, - "loss": 0.3356, - "step": 10118 - }, - { - "epoch": 0.6613293248807267, - "grad_norm": 0.44662079215049744, - "learning_rate": 9.002972521974735e-06, - "loss": 0.3668, - "step": 10119 - }, - { - "epoch": 0.6613946800862689, - "grad_norm": 0.45356494188308716, - "learning_rate": 9.002763275288244e-06, - "loss": 0.4114, - "step": 10120 - }, - { - "epoch": 0.661460035291811, - "grad_norm": 0.46098124980926514, - "learning_rate": 9.00255400907889e-06, - "loss": 0.4092, - "step": 10121 - }, - { - "epoch": 0.6615253904973531, - "grad_norm": 0.4753960371017456, - "learning_rate": 9.002344723347694e-06, - "loss": 0.415, - "step": 10122 - }, - { - "epoch": 0.6615907457028952, - "grad_norm": 0.46595531702041626, - "learning_rate": 9.002135418095677e-06, - "loss": 0.4204, - "step": 10123 - }, - { - "epoch": 0.6616561009084374, - "grad_norm": 0.4618552029132843, - "learning_rate": 9.001926093323858e-06, - "loss": 0.3964, - "step": 10124 - }, - { - "epoch": 0.6617214561139795, - "grad_norm": 0.4507905840873718, - "learning_rate": 9.001716749033259e-06, - "loss": 0.3689, - "step": 10125 - }, - { - "epoch": 0.6617868113195216, - "grad_norm": 0.4708883762359619, - "learning_rate": 9.001507385224902e-06, - "loss": 0.4309, - "step": 10126 - }, - { - "epoch": 0.6618521665250637, - "grad_norm": 0.4288370907306671, - "learning_rate": 9.001298001899806e-06, - "loss": 0.3147, - "step": 10127 - }, - { - "epoch": 0.6619175217306058, - "grad_norm": 0.4522435963153839, - "learning_rate": 9.001088599058993e-06, - "loss": 0.3859, - "step": 10128 - }, - { - "epoch": 0.661982876936148, - "grad_norm": 0.46277713775634766, - "learning_rate": 9.000879176703485e-06, - "loss": 0.4061, - "step": 10129 - }, - { - "epoch": 0.6620482321416901, - "grad_norm": 0.43465983867645264, - "learning_rate": 9.000669734834304e-06, - "loss": 0.343, - "step": 10130 - }, - { - "epoch": 0.6621135873472322, - "grad_norm": 0.44178035855293274, - "learning_rate": 9.000460273452471e-06, - "loss": 0.3689, - "step": 10131 - }, - { - "epoch": 0.6621789425527743, - "grad_norm": 0.4584408104419708, - "learning_rate": 9.000250792559007e-06, - "loss": 0.3767, - "step": 10132 - }, - { - "epoch": 0.6622442977583165, - "grad_norm": 0.43404266238212585, - "learning_rate": 9.000041292154934e-06, - "loss": 0.3772, - "step": 10133 - }, - { - "epoch": 0.6623096529638586, - "grad_norm": 0.4522802233695984, - "learning_rate": 8.999831772241274e-06, - "loss": 0.4029, - "step": 10134 - }, - { - "epoch": 0.6623750081694006, - "grad_norm": 0.43835920095443726, - "learning_rate": 8.999622232819048e-06, - "loss": 0.3624, - "step": 10135 - }, - { - "epoch": 0.6624403633749428, - "grad_norm": 0.45403867959976196, - "learning_rate": 8.99941267388928e-06, - "loss": 0.3808, - "step": 10136 - }, - { - "epoch": 0.6625057185804849, - "grad_norm": 0.46891212463378906, - "learning_rate": 8.999203095452992e-06, - "loss": 0.399, - "step": 10137 - }, - { - "epoch": 0.6625710737860271, - "grad_norm": 0.4498675465583801, - "learning_rate": 8.9989934975112e-06, - "loss": 0.3691, - "step": 10138 - }, - { - "epoch": 0.6626364289915692, - "grad_norm": 0.460763156414032, - "learning_rate": 8.998783880064936e-06, - "loss": 0.383, - "step": 10139 - }, - { - "epoch": 0.6627017841971113, - "grad_norm": 0.4195670783519745, - "learning_rate": 8.998574243115216e-06, - "loss": 0.3422, - "step": 10140 - }, - { - "epoch": 0.6627671394026534, - "grad_norm": 0.46796733140945435, - "learning_rate": 8.998364586663064e-06, - "loss": 0.3729, - "step": 10141 - }, - { - "epoch": 0.6628324946081955, - "grad_norm": 0.4123980700969696, - "learning_rate": 8.998154910709505e-06, - "loss": 0.3425, - "step": 10142 - }, - { - "epoch": 0.6628978498137377, - "grad_norm": 0.4784967005252838, - "learning_rate": 8.997945215255557e-06, - "loss": 0.4061, - "step": 10143 - }, - { - "epoch": 0.6629632050192797, - "grad_norm": 0.4615115821361542, - "learning_rate": 8.997735500302246e-06, - "loss": 0.3443, - "step": 10144 - }, - { - "epoch": 0.6630285602248219, - "grad_norm": 0.44597235321998596, - "learning_rate": 8.997525765850594e-06, - "loss": 0.4011, - "step": 10145 - }, - { - "epoch": 0.663093915430364, - "grad_norm": 0.44559240341186523, - "learning_rate": 8.997316011901624e-06, - "loss": 0.3581, - "step": 10146 - }, - { - "epoch": 0.6631592706359062, - "grad_norm": 0.44751402735710144, - "learning_rate": 8.997106238456358e-06, - "loss": 0.3693, - "step": 10147 - }, - { - "epoch": 0.6632246258414483, - "grad_norm": 0.42394378781318665, - "learning_rate": 8.996896445515821e-06, - "loss": 0.3066, - "step": 10148 - }, - { - "epoch": 0.6632899810469904, - "grad_norm": 0.47000759840011597, - "learning_rate": 8.996686633081036e-06, - "loss": 0.4365, - "step": 10149 - }, - { - "epoch": 0.6633553362525325, - "grad_norm": 0.4312097132205963, - "learning_rate": 8.996476801153025e-06, - "loss": 0.35, - "step": 10150 - }, - { - "epoch": 0.6634206914580746, - "grad_norm": 0.48415398597717285, - "learning_rate": 8.996266949732811e-06, - "loss": 0.4425, - "step": 10151 - }, - { - "epoch": 0.6634860466636168, - "grad_norm": 0.4553367793560028, - "learning_rate": 8.996057078821421e-06, - "loss": 0.3917, - "step": 10152 - }, - { - "epoch": 0.6635514018691588, - "grad_norm": 0.4832029938697815, - "learning_rate": 8.995847188419875e-06, - "loss": 0.3904, - "step": 10153 - }, - { - "epoch": 0.663616757074701, - "grad_norm": 0.4952784776687622, - "learning_rate": 8.995637278529197e-06, - "loss": 0.402, - "step": 10154 - }, - { - "epoch": 0.6636821122802431, - "grad_norm": 0.4514484405517578, - "learning_rate": 8.995427349150414e-06, - "loss": 0.4033, - "step": 10155 - }, - { - "epoch": 0.6637474674857853, - "grad_norm": 0.4274147152900696, - "learning_rate": 8.995217400284547e-06, - "loss": 0.3255, - "step": 10156 - }, - { - "epoch": 0.6638128226913274, - "grad_norm": 0.4665510356426239, - "learning_rate": 8.995007431932619e-06, - "loss": 0.3793, - "step": 10157 - }, - { - "epoch": 0.6638781778968695, - "grad_norm": 0.46595051884651184, - "learning_rate": 8.994797444095658e-06, - "loss": 0.3842, - "step": 10158 - }, - { - "epoch": 0.6639435331024116, - "grad_norm": 0.464510977268219, - "learning_rate": 8.994587436774684e-06, - "loss": 0.3673, - "step": 10159 - }, - { - "epoch": 0.6640088883079537, - "grad_norm": 0.4403513967990875, - "learning_rate": 8.994377409970723e-06, - "loss": 0.4024, - "step": 10160 - }, - { - "epoch": 0.6640742435134959, - "grad_norm": 0.49048399925231934, - "learning_rate": 8.994167363684803e-06, - "loss": 0.4465, - "step": 10161 - }, - { - "epoch": 0.6641395987190379, - "grad_norm": 0.44556862115859985, - "learning_rate": 8.993957297917942e-06, - "loss": 0.3659, - "step": 10162 - }, - { - "epoch": 0.6642049539245801, - "grad_norm": 0.44027361273765564, - "learning_rate": 8.993747212671167e-06, - "loss": 0.3457, - "step": 10163 - }, - { - "epoch": 0.6642703091301222, - "grad_norm": 0.46174749732017517, - "learning_rate": 8.993537107945505e-06, - "loss": 0.3912, - "step": 10164 - }, - { - "epoch": 0.6643356643356644, - "grad_norm": 0.43804579973220825, - "learning_rate": 8.99332698374198e-06, - "loss": 0.3369, - "step": 10165 - }, - { - "epoch": 0.6644010195412065, - "grad_norm": 0.42869675159454346, - "learning_rate": 8.993116840061613e-06, - "loss": 0.3668, - "step": 10166 - }, - { - "epoch": 0.6644663747467486, - "grad_norm": 0.43275701999664307, - "learning_rate": 8.992906676905432e-06, - "loss": 0.3489, - "step": 10167 - }, - { - "epoch": 0.6645317299522907, - "grad_norm": 0.46868565678596497, - "learning_rate": 8.992696494274464e-06, - "loss": 0.4167, - "step": 10168 - }, - { - "epoch": 0.6645970851578328, - "grad_norm": 0.45260968804359436, - "learning_rate": 8.992486292169732e-06, - "loss": 0.3629, - "step": 10169 - }, - { - "epoch": 0.664662440363375, - "grad_norm": 0.43209195137023926, - "learning_rate": 8.99227607059226e-06, - "loss": 0.3508, - "step": 10170 - }, - { - "epoch": 0.664727795568917, - "grad_norm": 0.4369298815727234, - "learning_rate": 8.992065829543075e-06, - "loss": 0.3673, - "step": 10171 - }, - { - "epoch": 0.6647931507744592, - "grad_norm": 0.4446268379688263, - "learning_rate": 8.991855569023203e-06, - "loss": 0.3434, - "step": 10172 - }, - { - "epoch": 0.6648585059800013, - "grad_norm": 0.4940401613712311, - "learning_rate": 8.991645289033666e-06, - "loss": 0.4306, - "step": 10173 - }, - { - "epoch": 0.6649238611855435, - "grad_norm": 0.45349887013435364, - "learning_rate": 8.991434989575493e-06, - "loss": 0.368, - "step": 10174 - }, - { - "epoch": 0.6649892163910855, - "grad_norm": 0.46481719613075256, - "learning_rate": 8.99122467064971e-06, - "loss": 0.4212, - "step": 10175 - }, - { - "epoch": 0.6650545715966276, - "grad_norm": 0.46498775482177734, - "learning_rate": 8.991014332257341e-06, - "loss": 0.401, - "step": 10176 - }, - { - "epoch": 0.6651199268021698, - "grad_norm": 0.42424118518829346, - "learning_rate": 8.990803974399413e-06, - "loss": 0.3369, - "step": 10177 - }, - { - "epoch": 0.6651852820077119, - "grad_norm": 0.4934605658054352, - "learning_rate": 8.99059359707695e-06, - "loss": 0.4262, - "step": 10178 - }, - { - "epoch": 0.6652506372132541, - "grad_norm": 0.46958664059638977, - "learning_rate": 8.99038320029098e-06, - "loss": 0.4016, - "step": 10179 - }, - { - "epoch": 0.6653159924187961, - "grad_norm": 0.49012336134910583, - "learning_rate": 8.99017278404253e-06, - "loss": 0.3984, - "step": 10180 - }, - { - "epoch": 0.6653813476243383, - "grad_norm": 0.47108718752861023, - "learning_rate": 8.989962348332624e-06, - "loss": 0.3778, - "step": 10181 - }, - { - "epoch": 0.6654467028298804, - "grad_norm": 0.44810351729393005, - "learning_rate": 8.98975189316229e-06, - "loss": 0.3931, - "step": 10182 - }, - { - "epoch": 0.6655120580354226, - "grad_norm": 0.4136602580547333, - "learning_rate": 8.989541418532552e-06, - "loss": 0.3269, - "step": 10183 - }, - { - "epoch": 0.6655774132409646, - "grad_norm": 0.4662415385246277, - "learning_rate": 8.989330924444441e-06, - "loss": 0.3793, - "step": 10184 - }, - { - "epoch": 0.6656427684465067, - "grad_norm": 0.4687197804450989, - "learning_rate": 8.989120410898979e-06, - "loss": 0.3868, - "step": 10185 - }, - { - "epoch": 0.6657081236520489, - "grad_norm": 0.45381027460098267, - "learning_rate": 8.988909877897196e-06, - "loss": 0.385, - "step": 10186 - }, - { - "epoch": 0.665773478857591, - "grad_norm": 0.44065558910369873, - "learning_rate": 8.988699325440117e-06, - "loss": 0.3872, - "step": 10187 - }, - { - "epoch": 0.6658388340631332, - "grad_norm": 0.43239185214042664, - "learning_rate": 8.98848875352877e-06, - "loss": 0.3974, - "step": 10188 - }, - { - "epoch": 0.6659041892686752, - "grad_norm": 0.4161105453968048, - "learning_rate": 8.988278162164181e-06, - "loss": 0.3458, - "step": 10189 - }, - { - "epoch": 0.6659695444742174, - "grad_norm": 0.46066081523895264, - "learning_rate": 8.988067551347378e-06, - "loss": 0.4135, - "step": 10190 - }, - { - "epoch": 0.6660348996797595, - "grad_norm": 0.42983028292655945, - "learning_rate": 8.987856921079387e-06, - "loss": 0.3636, - "step": 10191 - }, - { - "epoch": 0.6661002548853017, - "grad_norm": 0.45788678526878357, - "learning_rate": 8.98764627136124e-06, - "loss": 0.3747, - "step": 10192 - }, - { - "epoch": 0.6661656100908437, - "grad_norm": 0.4494451582431793, - "learning_rate": 8.987435602193956e-06, - "loss": 0.324, - "step": 10193 - }, - { - "epoch": 0.6662309652963858, - "grad_norm": 0.4337490499019623, - "learning_rate": 8.98722491357857e-06, - "loss": 0.38, - "step": 10194 - }, - { - "epoch": 0.666296320501928, - "grad_norm": 0.47315219044685364, - "learning_rate": 8.987014205516104e-06, - "loss": 0.3972, - "step": 10195 - }, - { - "epoch": 0.6663616757074701, - "grad_norm": 0.4512397348880768, - "learning_rate": 8.98680347800759e-06, - "loss": 0.4027, - "step": 10196 - }, - { - "epoch": 0.6664270309130123, - "grad_norm": 0.42925357818603516, - "learning_rate": 8.986592731054056e-06, - "loss": 0.3546, - "step": 10197 - }, - { - "epoch": 0.6664923861185543, - "grad_norm": 0.4276587963104248, - "learning_rate": 8.986381964656527e-06, - "loss": 0.3737, - "step": 10198 - }, - { - "epoch": 0.6665577413240965, - "grad_norm": 0.4193640649318695, - "learning_rate": 8.986171178816032e-06, - "loss": 0.3719, - "step": 10199 - }, - { - "epoch": 0.6666230965296386, - "grad_norm": 0.451722115278244, - "learning_rate": 8.9859603735336e-06, - "loss": 0.4072, - "step": 10200 - }, - { - "epoch": 0.6666884517351807, - "grad_norm": 0.4299343228340149, - "learning_rate": 8.985749548810256e-06, - "loss": 0.3142, - "step": 10201 - }, - { - "epoch": 0.6667538069407228, - "grad_norm": 0.4440017640590668, - "learning_rate": 8.985538704647034e-06, - "loss": 0.3948, - "step": 10202 - }, - { - "epoch": 0.6668191621462649, - "grad_norm": 0.479810893535614, - "learning_rate": 8.985327841044957e-06, - "loss": 0.4336, - "step": 10203 - }, - { - "epoch": 0.6668845173518071, - "grad_norm": 0.4192984402179718, - "learning_rate": 8.985116958005056e-06, - "loss": 0.3201, - "step": 10204 - }, - { - "epoch": 0.6669498725573492, - "grad_norm": 0.4778785705566406, - "learning_rate": 8.984906055528357e-06, - "loss": 0.3669, - "step": 10205 - }, - { - "epoch": 0.6670152277628913, - "grad_norm": 0.45121195912361145, - "learning_rate": 8.984695133615893e-06, - "loss": 0.3842, - "step": 10206 - }, - { - "epoch": 0.6670805829684334, - "grad_norm": 0.4319092631340027, - "learning_rate": 8.98448419226869e-06, - "loss": 0.3804, - "step": 10207 - }, - { - "epoch": 0.6671459381739756, - "grad_norm": 0.4295366108417511, - "learning_rate": 8.984273231487776e-06, - "loss": 0.3643, - "step": 10208 - }, - { - "epoch": 0.6672112933795177, - "grad_norm": 0.4476512670516968, - "learning_rate": 8.984062251274184e-06, - "loss": 0.3509, - "step": 10209 - }, - { - "epoch": 0.6672766485850598, - "grad_norm": 0.4452911615371704, - "learning_rate": 8.98385125162894e-06, - "loss": 0.363, - "step": 10210 - }, - { - "epoch": 0.6673420037906019, - "grad_norm": 0.498222678899765, - "learning_rate": 8.983640232553071e-06, - "loss": 0.4138, - "step": 10211 - }, - { - "epoch": 0.667407358996144, - "grad_norm": 0.4494498670101166, - "learning_rate": 8.983429194047608e-06, - "loss": 0.3839, - "step": 10212 - }, - { - "epoch": 0.6674727142016862, - "grad_norm": 0.47864049673080444, - "learning_rate": 8.983218136113583e-06, - "loss": 0.3595, - "step": 10213 - }, - { - "epoch": 0.6675380694072283, - "grad_norm": 0.4909796714782715, - "learning_rate": 8.98300705875202e-06, - "loss": 0.4401, - "step": 10214 - }, - { - "epoch": 0.6676034246127704, - "grad_norm": 0.46578124165534973, - "learning_rate": 8.982795961963956e-06, - "loss": 0.3778, - "step": 10215 - }, - { - "epoch": 0.6676687798183125, - "grad_norm": 0.45613938570022583, - "learning_rate": 8.982584845750415e-06, - "loss": 0.4082, - "step": 10216 - }, - { - "epoch": 0.6677341350238547, - "grad_norm": 0.46102020144462585, - "learning_rate": 8.982373710112426e-06, - "loss": 0.3641, - "step": 10217 - }, - { - "epoch": 0.6677994902293968, - "grad_norm": 0.4523315131664276, - "learning_rate": 8.982162555051024e-06, - "loss": 0.4057, - "step": 10218 - }, - { - "epoch": 0.6678648454349388, - "grad_norm": 0.44271978735923767, - "learning_rate": 8.981951380567233e-06, - "loss": 0.3704, - "step": 10219 - }, - { - "epoch": 0.667930200640481, - "grad_norm": 0.44290855526924133, - "learning_rate": 8.981740186662087e-06, - "loss": 0.3823, - "step": 10220 - }, - { - "epoch": 0.6679955558460231, - "grad_norm": 0.4370899200439453, - "learning_rate": 8.981528973336614e-06, - "loss": 0.3811, - "step": 10221 - }, - { - "epoch": 0.6680609110515653, - "grad_norm": 0.4476775825023651, - "learning_rate": 8.981317740591844e-06, - "loss": 0.3949, - "step": 10222 - }, - { - "epoch": 0.6681262662571074, - "grad_norm": 0.4386078715324402, - "learning_rate": 8.981106488428809e-06, - "loss": 0.3702, - "step": 10223 - }, - { - "epoch": 0.6681916214626495, - "grad_norm": 0.44177818298339844, - "learning_rate": 8.98089521684854e-06, - "loss": 0.3688, - "step": 10224 - }, - { - "epoch": 0.6682569766681916, - "grad_norm": 0.43435531854629517, - "learning_rate": 8.980683925852062e-06, - "loss": 0.3591, - "step": 10225 - }, - { - "epoch": 0.6683223318737337, - "grad_norm": 0.471281498670578, - "learning_rate": 8.980472615440412e-06, - "loss": 0.4274, - "step": 10226 - }, - { - "epoch": 0.6683876870792759, - "grad_norm": 0.44629737734794617, - "learning_rate": 8.98026128561462e-06, - "loss": 0.3701, - "step": 10227 - }, - { - "epoch": 0.6684530422848179, - "grad_norm": 0.43073129653930664, - "learning_rate": 8.980049936375712e-06, - "loss": 0.3695, - "step": 10228 - }, - { - "epoch": 0.6685183974903601, - "grad_norm": 0.43494707345962524, - "learning_rate": 8.979838567724723e-06, - "loss": 0.401, - "step": 10229 - }, - { - "epoch": 0.6685837526959022, - "grad_norm": 0.4401400685310364, - "learning_rate": 8.979627179662683e-06, - "loss": 0.3818, - "step": 10230 - }, - { - "epoch": 0.6686491079014444, - "grad_norm": 0.4596883952617645, - "learning_rate": 8.97941577219062e-06, - "loss": 0.3776, - "step": 10231 - }, - { - "epoch": 0.6687144631069865, - "grad_norm": 0.4445232152938843, - "learning_rate": 8.97920434530957e-06, - "loss": 0.369, - "step": 10232 - }, - { - "epoch": 0.6687798183125286, - "grad_norm": 0.48138898611068726, - "learning_rate": 8.978992899020561e-06, - "loss": 0.4236, - "step": 10233 - }, - { - "epoch": 0.6688451735180707, - "grad_norm": 0.4211460053920746, - "learning_rate": 8.978781433324626e-06, - "loss": 0.3362, - "step": 10234 - }, - { - "epoch": 0.6689105287236128, - "grad_norm": 0.4986729621887207, - "learning_rate": 8.978569948222796e-06, - "loss": 0.4129, - "step": 10235 - }, - { - "epoch": 0.668975883929155, - "grad_norm": 0.42038753628730774, - "learning_rate": 8.978358443716099e-06, - "loss": 0.3466, - "step": 10236 - }, - { - "epoch": 0.669041239134697, - "grad_norm": 0.43704989552497864, - "learning_rate": 8.978146919805573e-06, - "loss": 0.371, - "step": 10237 - }, - { - "epoch": 0.6691065943402392, - "grad_norm": 0.46122992038726807, - "learning_rate": 8.977935376492244e-06, - "loss": 0.4032, - "step": 10238 - }, - { - "epoch": 0.6691719495457813, - "grad_norm": 0.43309664726257324, - "learning_rate": 8.977723813777148e-06, - "loss": 0.3422, - "step": 10239 - }, - { - "epoch": 0.6692373047513235, - "grad_norm": 0.4502905309200287, - "learning_rate": 8.977512231661313e-06, - "loss": 0.3827, - "step": 10240 - }, - { - "epoch": 0.6693026599568656, - "grad_norm": 0.5038543343544006, - "learning_rate": 8.977300630145773e-06, - "loss": 0.4451, - "step": 10241 - }, - { - "epoch": 0.6693680151624077, - "grad_norm": 0.45643964409828186, - "learning_rate": 8.97708900923156e-06, - "loss": 0.376, - "step": 10242 - }, - { - "epoch": 0.6694333703679498, - "grad_norm": 0.43658164143562317, - "learning_rate": 8.976877368919709e-06, - "loss": 0.3633, - "step": 10243 - }, - { - "epoch": 0.6694987255734919, - "grad_norm": 0.6266259551048279, - "learning_rate": 8.976665709211248e-06, - "loss": 0.3804, - "step": 10244 - }, - { - "epoch": 0.6695640807790341, - "grad_norm": 0.4313041865825653, - "learning_rate": 8.976454030107209e-06, - "loss": 0.3749, - "step": 10245 - }, - { - "epoch": 0.6696294359845761, - "grad_norm": 0.4237399101257324, - "learning_rate": 8.976242331608627e-06, - "loss": 0.3571, - "step": 10246 - }, - { - "epoch": 0.6696947911901183, - "grad_norm": 0.4492327868938446, - "learning_rate": 8.976030613716533e-06, - "loss": 0.3879, - "step": 10247 - }, - { - "epoch": 0.6697601463956604, - "grad_norm": 0.4370712637901306, - "learning_rate": 8.97581887643196e-06, - "loss": 0.3185, - "step": 10248 - }, - { - "epoch": 0.6698255016012026, - "grad_norm": 0.4478670060634613, - "learning_rate": 8.975607119755944e-06, - "loss": 0.366, - "step": 10249 - }, - { - "epoch": 0.6698908568067447, - "grad_norm": 0.4454340636730194, - "learning_rate": 8.975395343689512e-06, - "loss": 0.3365, - "step": 10250 - }, - { - "epoch": 0.6699562120122868, - "grad_norm": 0.47780612111091614, - "learning_rate": 8.975183548233702e-06, - "loss": 0.4427, - "step": 10251 - }, - { - "epoch": 0.6700215672178289, - "grad_norm": 0.45170411467552185, - "learning_rate": 8.974971733389542e-06, - "loss": 0.3855, - "step": 10252 - }, - { - "epoch": 0.670086922423371, - "grad_norm": 0.4412384629249573, - "learning_rate": 8.97475989915807e-06, - "loss": 0.3835, - "step": 10253 - }, - { - "epoch": 0.6701522776289132, - "grad_norm": 0.44694584608078003, - "learning_rate": 8.974548045540315e-06, - "loss": 0.3484, - "step": 10254 - }, - { - "epoch": 0.6702176328344552, - "grad_norm": 0.43447887897491455, - "learning_rate": 8.974336172537313e-06, - "loss": 0.394, - "step": 10255 - }, - { - "epoch": 0.6702829880399974, - "grad_norm": 0.43939846754074097, - "learning_rate": 8.974124280150098e-06, - "loss": 0.3757, - "step": 10256 - }, - { - "epoch": 0.6703483432455395, - "grad_norm": 0.4591800570487976, - "learning_rate": 8.973912368379701e-06, - "loss": 0.3886, - "step": 10257 - }, - { - "epoch": 0.6704136984510817, - "grad_norm": 0.44207674264907837, - "learning_rate": 8.973700437227156e-06, - "loss": 0.3891, - "step": 10258 - }, - { - "epoch": 0.6704790536566237, - "grad_norm": 0.5039398074150085, - "learning_rate": 8.973488486693499e-06, - "loss": 0.4825, - "step": 10259 - }, - { - "epoch": 0.6705444088621658, - "grad_norm": 0.4677008390426636, - "learning_rate": 8.97327651677976e-06, - "loss": 0.4448, - "step": 10260 - }, - { - "epoch": 0.670609764067708, - "grad_norm": 0.45407986640930176, - "learning_rate": 8.973064527486976e-06, - "loss": 0.3861, - "step": 10261 - }, - { - "epoch": 0.6706751192732501, - "grad_norm": 0.43893203139305115, - "learning_rate": 8.97285251881618e-06, - "loss": 0.3145, - "step": 10262 - }, - { - "epoch": 0.6707404744787923, - "grad_norm": 0.4304412305355072, - "learning_rate": 8.972640490768407e-06, - "loss": 0.3881, - "step": 10263 - }, - { - "epoch": 0.6708058296843343, - "grad_norm": 0.4039929211139679, - "learning_rate": 8.972428443344687e-06, - "loss": 0.3233, - "step": 10264 - }, - { - "epoch": 0.6708711848898765, - "grad_norm": 0.4385417103767395, - "learning_rate": 8.972216376546061e-06, - "loss": 0.3776, - "step": 10265 - }, - { - "epoch": 0.6709365400954186, - "grad_norm": 0.46117353439331055, - "learning_rate": 8.972004290373558e-06, - "loss": 0.3903, - "step": 10266 - }, - { - "epoch": 0.6710018953009608, - "grad_norm": 0.432743102312088, - "learning_rate": 8.971792184828213e-06, - "loss": 0.3508, - "step": 10267 - }, - { - "epoch": 0.6710672505065028, - "grad_norm": 0.8923166990280151, - "learning_rate": 8.971580059911063e-06, - "loss": 0.4817, - "step": 10268 - }, - { - "epoch": 0.6711326057120449, - "grad_norm": 0.45550790429115295, - "learning_rate": 8.971367915623141e-06, - "loss": 0.3702, - "step": 10269 - }, - { - "epoch": 0.6711979609175871, - "grad_norm": 0.45742085576057434, - "learning_rate": 8.971155751965481e-06, - "loss": 0.3985, - "step": 10270 - }, - { - "epoch": 0.6712633161231292, - "grad_norm": 0.4713760316371918, - "learning_rate": 8.970943568939119e-06, - "loss": 0.3894, - "step": 10271 - }, - { - "epoch": 0.6713286713286714, - "grad_norm": 0.43509918451309204, - "learning_rate": 8.970731366545089e-06, - "loss": 0.3609, - "step": 10272 - }, - { - "epoch": 0.6713940265342134, - "grad_norm": 0.45157599449157715, - "learning_rate": 8.970519144784428e-06, - "loss": 0.3755, - "step": 10273 - }, - { - "epoch": 0.6714593817397556, - "grad_norm": 0.45870983600616455, - "learning_rate": 8.97030690365817e-06, - "loss": 0.3668, - "step": 10274 - }, - { - "epoch": 0.6715247369452977, - "grad_norm": 0.4608015716075897, - "learning_rate": 8.970094643167347e-06, - "loss": 0.3638, - "step": 10275 - }, - { - "epoch": 0.6715900921508399, - "grad_norm": 0.48547616600990295, - "learning_rate": 8.969882363313e-06, - "loss": 0.4158, - "step": 10276 - }, - { - "epoch": 0.6716554473563819, - "grad_norm": 0.4456138610839844, - "learning_rate": 8.969670064096158e-06, - "loss": 0.3733, - "step": 10277 - }, - { - "epoch": 0.671720802561924, - "grad_norm": 0.4796470105648041, - "learning_rate": 8.969457745517863e-06, - "loss": 0.3957, - "step": 10278 - }, - { - "epoch": 0.6717861577674662, - "grad_norm": 0.43504852056503296, - "learning_rate": 8.969245407579147e-06, - "loss": 0.4011, - "step": 10279 - }, - { - "epoch": 0.6718515129730083, - "grad_norm": 0.41327905654907227, - "learning_rate": 8.969033050281045e-06, - "loss": 0.3547, - "step": 10280 - }, - { - "epoch": 0.6719168681785505, - "grad_norm": 0.4182872176170349, - "learning_rate": 8.968820673624594e-06, - "loss": 0.3735, - "step": 10281 - }, - { - "epoch": 0.6719822233840925, - "grad_norm": 0.44985899329185486, - "learning_rate": 8.968608277610831e-06, - "loss": 0.3691, - "step": 10282 - }, - { - "epoch": 0.6720475785896347, - "grad_norm": 0.43563640117645264, - "learning_rate": 8.968395862240789e-06, - "loss": 0.3708, - "step": 10283 - }, - { - "epoch": 0.6721129337951768, - "grad_norm": 0.4224267899990082, - "learning_rate": 8.968183427515506e-06, - "loss": 0.376, - "step": 10284 - }, - { - "epoch": 0.6721782890007189, - "grad_norm": 0.4384387135505676, - "learning_rate": 8.967970973436017e-06, - "loss": 0.3929, - "step": 10285 - }, - { - "epoch": 0.672243644206261, - "grad_norm": 0.42206332087516785, - "learning_rate": 8.96775850000336e-06, - "loss": 0.3545, - "step": 10286 - }, - { - "epoch": 0.6723089994118031, - "grad_norm": 0.42261624336242676, - "learning_rate": 8.96754600721857e-06, - "loss": 0.3598, - "step": 10287 - }, - { - "epoch": 0.6723743546173453, - "grad_norm": 0.39704015851020813, - "learning_rate": 8.967333495082684e-06, - "loss": 0.3231, - "step": 10288 - }, - { - "epoch": 0.6724397098228874, - "grad_norm": 0.4982104003429413, - "learning_rate": 8.967120963596738e-06, - "loss": 0.4331, - "step": 10289 - }, - { - "epoch": 0.6725050650284295, - "grad_norm": 0.4854123294353485, - "learning_rate": 8.966908412761768e-06, - "loss": 0.3927, - "step": 10290 - }, - { - "epoch": 0.6725704202339716, - "grad_norm": 0.44044816493988037, - "learning_rate": 8.966695842578812e-06, - "loss": 0.38, - "step": 10291 - }, - { - "epoch": 0.6726357754395138, - "grad_norm": 0.4597327411174774, - "learning_rate": 8.966483253048906e-06, - "loss": 0.4095, - "step": 10292 - }, - { - "epoch": 0.6727011306450559, - "grad_norm": 0.4429006278514862, - "learning_rate": 8.966270644173087e-06, - "loss": 0.3505, - "step": 10293 - }, - { - "epoch": 0.672766485850598, - "grad_norm": 0.4318757653236389, - "learning_rate": 8.966058015952392e-06, - "loss": 0.3857, - "step": 10294 - }, - { - "epoch": 0.6728318410561401, - "grad_norm": 0.4408669173717499, - "learning_rate": 8.965845368387859e-06, - "loss": 0.392, - "step": 10295 - }, - { - "epoch": 0.6728971962616822, - "grad_norm": 0.42073020339012146, - "learning_rate": 8.965632701480524e-06, - "loss": 0.3516, - "step": 10296 - }, - { - "epoch": 0.6729625514672244, - "grad_norm": 0.4711887538433075, - "learning_rate": 8.965420015231423e-06, - "loss": 0.4263, - "step": 10297 - }, - { - "epoch": 0.6730279066727665, - "grad_norm": 0.4669957756996155, - "learning_rate": 8.965207309641596e-06, - "loss": 0.4359, - "step": 10298 - }, - { - "epoch": 0.6730932618783086, - "grad_norm": 0.4311840236186981, - "learning_rate": 8.964994584712078e-06, - "loss": 0.3683, - "step": 10299 - }, - { - "epoch": 0.6731586170838507, - "grad_norm": 0.4037550985813141, - "learning_rate": 8.96478184044391e-06, - "loss": 0.3208, - "step": 10300 - }, - { - "epoch": 0.6732239722893929, - "grad_norm": 0.4380420744419098, - "learning_rate": 8.964569076838125e-06, - "loss": 0.3864, - "step": 10301 - }, - { - "epoch": 0.673289327494935, - "grad_norm": 0.45357048511505127, - "learning_rate": 8.964356293895765e-06, - "loss": 0.3947, - "step": 10302 - }, - { - "epoch": 0.673354682700477, - "grad_norm": 0.46027740836143494, - "learning_rate": 8.964143491617865e-06, - "loss": 0.3709, - "step": 10303 - }, - { - "epoch": 0.6734200379060192, - "grad_norm": 0.4550207555294037, - "learning_rate": 8.963930670005465e-06, - "loss": 0.3782, - "step": 10304 - }, - { - "epoch": 0.6734853931115613, - "grad_norm": 0.44852039217948914, - "learning_rate": 8.963717829059601e-06, - "loss": 0.4004, - "step": 10305 - }, - { - "epoch": 0.6735507483171035, - "grad_norm": 0.43903085589408875, - "learning_rate": 8.963504968781312e-06, - "loss": 0.3443, - "step": 10306 - }, - { - "epoch": 0.6736161035226456, - "grad_norm": 0.4645479619503021, - "learning_rate": 8.963292089171637e-06, - "loss": 0.3752, - "step": 10307 - }, - { - "epoch": 0.6736814587281877, - "grad_norm": 0.44583413004875183, - "learning_rate": 8.963079190231611e-06, - "loss": 0.3689, - "step": 10308 - }, - { - "epoch": 0.6737468139337298, - "grad_norm": 0.4629260003566742, - "learning_rate": 8.962866271962278e-06, - "loss": 0.3696, - "step": 10309 - }, - { - "epoch": 0.6738121691392719, - "grad_norm": 0.449081689119339, - "learning_rate": 8.962653334364671e-06, - "loss": 0.3862, - "step": 10310 - }, - { - "epoch": 0.6738775243448141, - "grad_norm": 0.4531875550746918, - "learning_rate": 8.962440377439833e-06, - "loss": 0.3647, - "step": 10311 - }, - { - "epoch": 0.6739428795503561, - "grad_norm": 0.46704763174057007, - "learning_rate": 8.9622274011888e-06, - "loss": 0.4292, - "step": 10312 - }, - { - "epoch": 0.6740082347558983, - "grad_norm": 0.4282761812210083, - "learning_rate": 8.96201440561261e-06, - "loss": 0.3708, - "step": 10313 - }, - { - "epoch": 0.6740735899614404, - "grad_norm": 0.48998647928237915, - "learning_rate": 8.961801390712304e-06, - "loss": 0.3607, - "step": 10314 - }, - { - "epoch": 0.6741389451669826, - "grad_norm": 0.45318475365638733, - "learning_rate": 8.96158835648892e-06, - "loss": 0.3827, - "step": 10315 - }, - { - "epoch": 0.6742043003725247, - "grad_norm": 0.49559691548347473, - "learning_rate": 8.961375302943497e-06, - "loss": 0.4332, - "step": 10316 - }, - { - "epoch": 0.6742696555780668, - "grad_norm": 0.43280029296875, - "learning_rate": 8.961162230077073e-06, - "loss": 0.361, - "step": 10317 - }, - { - "epoch": 0.6743350107836089, - "grad_norm": 0.41879433393478394, - "learning_rate": 8.96094913789069e-06, - "loss": 0.3611, - "step": 10318 - }, - { - "epoch": 0.674400365989151, - "grad_norm": 0.4279939830303192, - "learning_rate": 8.960736026385387e-06, - "loss": 0.3486, - "step": 10319 - }, - { - "epoch": 0.6744657211946932, - "grad_norm": 0.4643529951572418, - "learning_rate": 8.960522895562201e-06, - "loss": 0.417, - "step": 10320 - }, - { - "epoch": 0.6745310764002352, - "grad_norm": 0.43617352843284607, - "learning_rate": 8.960309745422173e-06, - "loss": 0.4038, - "step": 10321 - }, - { - "epoch": 0.6745964316057774, - "grad_norm": 0.4094836413860321, - "learning_rate": 8.960096575966341e-06, - "loss": 0.3373, - "step": 10322 - }, - { - "epoch": 0.6746617868113195, - "grad_norm": 0.4339154064655304, - "learning_rate": 8.959883387195749e-06, - "loss": 0.3267, - "step": 10323 - }, - { - "epoch": 0.6747271420168617, - "grad_norm": 0.45063844323158264, - "learning_rate": 8.959670179111433e-06, - "loss": 0.4068, - "step": 10324 - }, - { - "epoch": 0.6747924972224038, - "grad_norm": 0.43268445134162903, - "learning_rate": 8.959456951714431e-06, - "loss": 0.3635, - "step": 10325 - }, - { - "epoch": 0.6748578524279459, - "grad_norm": 0.41768670082092285, - "learning_rate": 8.95924370500579e-06, - "loss": 0.3458, - "step": 10326 - }, - { - "epoch": 0.674923207633488, - "grad_norm": 0.4855414628982544, - "learning_rate": 8.959030438986542e-06, - "loss": 0.4194, - "step": 10327 - }, - { - "epoch": 0.6749885628390301, - "grad_norm": 0.4134969711303711, - "learning_rate": 8.958817153657732e-06, - "loss": 0.3513, - "step": 10328 - }, - { - "epoch": 0.6750539180445723, - "grad_norm": 0.4402543008327484, - "learning_rate": 8.9586038490204e-06, - "loss": 0.3819, - "step": 10329 - }, - { - "epoch": 0.6751192732501143, - "grad_norm": 0.4257301986217499, - "learning_rate": 8.958390525075587e-06, - "loss": 0.348, - "step": 10330 - }, - { - "epoch": 0.6751846284556565, - "grad_norm": 0.44497087597846985, - "learning_rate": 8.95817718182433e-06, - "loss": 0.356, - "step": 10331 - }, - { - "epoch": 0.6752499836611986, - "grad_norm": 0.8429772257804871, - "learning_rate": 8.957963819267671e-06, - "loss": 0.4179, - "step": 10332 - }, - { - "epoch": 0.6753153388667408, - "grad_norm": 0.4714336097240448, - "learning_rate": 8.957750437406654e-06, - "loss": 0.4201, - "step": 10333 - }, - { - "epoch": 0.6753806940722829, - "grad_norm": 0.4567609429359436, - "learning_rate": 8.957537036242315e-06, - "loss": 0.383, - "step": 10334 - }, - { - "epoch": 0.675446049277825, - "grad_norm": 0.459941565990448, - "learning_rate": 8.957323615775698e-06, - "loss": 0.4143, - "step": 10335 - }, - { - "epoch": 0.6755114044833671, - "grad_norm": 0.4573533236980438, - "learning_rate": 8.95711017600784e-06, - "loss": 0.3875, - "step": 10336 - }, - { - "epoch": 0.6755767596889092, - "grad_norm": 0.49541574716567993, - "learning_rate": 8.956896716939789e-06, - "loss": 0.4481, - "step": 10337 - }, - { - "epoch": 0.6756421148944514, - "grad_norm": 0.43781739473342896, - "learning_rate": 8.95668323857258e-06, - "loss": 0.3754, - "step": 10338 - }, - { - "epoch": 0.6757074700999934, - "grad_norm": 0.4408060312271118, - "learning_rate": 8.956469740907256e-06, - "loss": 0.4003, - "step": 10339 - }, - { - "epoch": 0.6757728253055356, - "grad_norm": 0.4413546025753021, - "learning_rate": 8.95625622394486e-06, - "loss": 0.3688, - "step": 10340 - }, - { - "epoch": 0.6758381805110777, - "grad_norm": 0.43218860030174255, - "learning_rate": 8.956042687686428e-06, - "loss": 0.3789, - "step": 10341 - }, - { - "epoch": 0.6759035357166199, - "grad_norm": 0.44783079624176025, - "learning_rate": 8.955829132133009e-06, - "loss": 0.4132, - "step": 10342 - }, - { - "epoch": 0.675968890922162, - "grad_norm": 0.4339519143104553, - "learning_rate": 8.955615557285638e-06, - "loss": 0.389, - "step": 10343 - }, - { - "epoch": 0.676034246127704, - "grad_norm": 0.4588874280452728, - "learning_rate": 8.955401963145362e-06, - "loss": 0.3935, - "step": 10344 - }, - { - "epoch": 0.6760996013332462, - "grad_norm": 0.43356090784072876, - "learning_rate": 8.95518834971322e-06, - "loss": 0.3769, - "step": 10345 - }, - { - "epoch": 0.6761649565387883, - "grad_norm": 0.4621839225292206, - "learning_rate": 8.954974716990253e-06, - "loss": 0.3769, - "step": 10346 - }, - { - "epoch": 0.6762303117443305, - "grad_norm": 0.4320388436317444, - "learning_rate": 8.954761064977504e-06, - "loss": 0.3976, - "step": 10347 - }, - { - "epoch": 0.6762956669498725, - "grad_norm": 0.4683499038219452, - "learning_rate": 8.954547393676017e-06, - "loss": 0.3769, - "step": 10348 - }, - { - "epoch": 0.6763610221554147, - "grad_norm": 0.4357524812221527, - "learning_rate": 8.95433370308683e-06, - "loss": 0.3384, - "step": 10349 - }, - { - "epoch": 0.6764263773609568, - "grad_norm": 0.4497321546077728, - "learning_rate": 8.95411999321099e-06, - "loss": 0.4055, - "step": 10350 - }, - { - "epoch": 0.676491732566499, - "grad_norm": 0.45245251059532166, - "learning_rate": 8.953906264049537e-06, - "loss": 0.4032, - "step": 10351 - }, - { - "epoch": 0.676557087772041, - "grad_norm": 0.469921737909317, - "learning_rate": 8.953692515603512e-06, - "loss": 0.3844, - "step": 10352 - }, - { - "epoch": 0.6766224429775831, - "grad_norm": 0.4377079904079437, - "learning_rate": 8.953478747873958e-06, - "loss": 0.3858, - "step": 10353 - }, - { - "epoch": 0.6766877981831253, - "grad_norm": 0.4820059835910797, - "learning_rate": 8.95326496086192e-06, - "loss": 0.4289, - "step": 10354 - }, - { - "epoch": 0.6767531533886674, - "grad_norm": 0.4293309152126312, - "learning_rate": 8.953051154568439e-06, - "loss": 0.3383, - "step": 10355 - }, - { - "epoch": 0.6768185085942096, - "grad_norm": 0.48739153146743774, - "learning_rate": 8.952837328994557e-06, - "loss": 0.4239, - "step": 10356 - }, - { - "epoch": 0.6768838637997516, - "grad_norm": 0.45735102891921997, - "learning_rate": 8.952623484141321e-06, - "loss": 0.3719, - "step": 10357 - }, - { - "epoch": 0.6769492190052938, - "grad_norm": 0.4400736093521118, - "learning_rate": 8.952409620009768e-06, - "loss": 0.3507, - "step": 10358 - }, - { - "epoch": 0.6770145742108359, - "grad_norm": 0.45328009128570557, - "learning_rate": 8.952195736600946e-06, - "loss": 0.4131, - "step": 10359 - }, - { - "epoch": 0.6770799294163781, - "grad_norm": 0.49128541350364685, - "learning_rate": 8.951981833915895e-06, - "loss": 0.4185, - "step": 10360 - }, - { - "epoch": 0.6771452846219201, - "grad_norm": 0.49053069949150085, - "learning_rate": 8.951767911955659e-06, - "loss": 0.405, - "step": 10361 - }, - { - "epoch": 0.6772106398274622, - "grad_norm": 0.496187299489975, - "learning_rate": 8.951553970721283e-06, - "loss": 0.39, - "step": 10362 - }, - { - "epoch": 0.6772759950330044, - "grad_norm": 0.43509534001350403, - "learning_rate": 8.951340010213807e-06, - "loss": 0.3868, - "step": 10363 - }, - { - "epoch": 0.6773413502385465, - "grad_norm": 0.47046181559562683, - "learning_rate": 8.951126030434281e-06, - "loss": 0.3827, - "step": 10364 - }, - { - "epoch": 0.6774067054440887, - "grad_norm": 0.4119715094566345, - "learning_rate": 8.950912031383742e-06, - "loss": 0.3062, - "step": 10365 - }, - { - "epoch": 0.6774720606496307, - "grad_norm": 0.42869341373443604, - "learning_rate": 8.950698013063237e-06, - "loss": 0.3603, - "step": 10366 - }, - { - "epoch": 0.6775374158551729, - "grad_norm": 0.5217639207839966, - "learning_rate": 8.950483975473808e-06, - "loss": 0.4298, - "step": 10367 - }, - { - "epoch": 0.677602771060715, - "grad_norm": 0.5211995840072632, - "learning_rate": 8.950269918616501e-06, - "loss": 0.4159, - "step": 10368 - }, - { - "epoch": 0.677668126266257, - "grad_norm": 0.4355018436908722, - "learning_rate": 8.950055842492359e-06, - "loss": 0.3571, - "step": 10369 - }, - { - "epoch": 0.6777334814717992, - "grad_norm": 0.4326300621032715, - "learning_rate": 8.949841747102425e-06, - "loss": 0.3681, - "step": 10370 - }, - { - "epoch": 0.6777988366773413, - "grad_norm": 0.4292009174823761, - "learning_rate": 8.949627632447747e-06, - "loss": 0.3575, - "step": 10371 - }, - { - "epoch": 0.6778641918828835, - "grad_norm": 0.42942360043525696, - "learning_rate": 8.949413498529364e-06, - "loss": 0.3786, - "step": 10372 - }, - { - "epoch": 0.6779295470884256, - "grad_norm": 0.4630257189273834, - "learning_rate": 8.949199345348326e-06, - "loss": 0.3831, - "step": 10373 - }, - { - "epoch": 0.6779949022939677, - "grad_norm": 0.45308834314346313, - "learning_rate": 8.948985172905673e-06, - "loss": 0.3735, - "step": 10374 - }, - { - "epoch": 0.6780602574995098, - "grad_norm": 0.46515166759490967, - "learning_rate": 8.94877098120245e-06, - "loss": 0.4155, - "step": 10375 - }, - { - "epoch": 0.678125612705052, - "grad_norm": 0.42242324352264404, - "learning_rate": 8.948556770239706e-06, - "loss": 0.3537, - "step": 10376 - }, - { - "epoch": 0.6781909679105941, - "grad_norm": 0.4392739236354828, - "learning_rate": 8.948342540018482e-06, - "loss": 0.3618, - "step": 10377 - }, - { - "epoch": 0.6782563231161362, - "grad_norm": 0.459957093000412, - "learning_rate": 8.94812829053982e-06, - "loss": 0.3923, - "step": 10378 - }, - { - "epoch": 0.6783216783216783, - "grad_norm": 0.42192912101745605, - "learning_rate": 8.947914021804774e-06, - "loss": 0.3502, - "step": 10379 - }, - { - "epoch": 0.6783870335272204, - "grad_norm": 0.45790332555770874, - "learning_rate": 8.94769973381438e-06, - "loss": 0.372, - "step": 10380 - }, - { - "epoch": 0.6784523887327626, - "grad_norm": 0.47560402750968933, - "learning_rate": 8.947485426569688e-06, - "loss": 0.3781, - "step": 10381 - }, - { - "epoch": 0.6785177439383047, - "grad_norm": 0.42340466380119324, - "learning_rate": 8.94727110007174e-06, - "loss": 0.3238, - "step": 10382 - }, - { - "epoch": 0.6785830991438468, - "grad_norm": 0.46564173698425293, - "learning_rate": 8.947056754321585e-06, - "loss": 0.4187, - "step": 10383 - }, - { - "epoch": 0.6786484543493889, - "grad_norm": 0.4427543580532074, - "learning_rate": 8.946842389320267e-06, - "loss": 0.3505, - "step": 10384 - }, - { - "epoch": 0.6787138095549311, - "grad_norm": 0.45892271399497986, - "learning_rate": 8.946628005068831e-06, - "loss": 0.3271, - "step": 10385 - }, - { - "epoch": 0.6787791647604732, - "grad_norm": 0.4201567769050598, - "learning_rate": 8.946413601568325e-06, - "loss": 0.3062, - "step": 10386 - }, - { - "epoch": 0.6788445199660152, - "grad_norm": 0.4735974371433258, - "learning_rate": 8.94619917881979e-06, - "loss": 0.4426, - "step": 10387 - }, - { - "epoch": 0.6789098751715574, - "grad_norm": 0.4726061224937439, - "learning_rate": 8.945984736824276e-06, - "loss": 0.4126, - "step": 10388 - }, - { - "epoch": 0.6789752303770995, - "grad_norm": 0.4313628375530243, - "learning_rate": 8.945770275582826e-06, - "loss": 0.3585, - "step": 10389 - }, - { - "epoch": 0.6790405855826417, - "grad_norm": 0.45539984107017517, - "learning_rate": 8.94555579509649e-06, - "loss": 0.3647, - "step": 10390 - }, - { - "epoch": 0.6791059407881838, - "grad_norm": 0.4311884045600891, - "learning_rate": 8.945341295366309e-06, - "loss": 0.3227, - "step": 10391 - }, - { - "epoch": 0.6791712959937259, - "grad_norm": 0.40638798475265503, - "learning_rate": 8.945126776393333e-06, - "loss": 0.3343, - "step": 10392 - }, - { - "epoch": 0.679236651199268, - "grad_norm": 0.427643358707428, - "learning_rate": 8.944912238178606e-06, - "loss": 0.3345, - "step": 10393 - }, - { - "epoch": 0.6793020064048101, - "grad_norm": 0.4728054702281952, - "learning_rate": 8.944697680723176e-06, - "loss": 0.4271, - "step": 10394 - }, - { - "epoch": 0.6793673616103523, - "grad_norm": 0.4511755108833313, - "learning_rate": 8.944483104028088e-06, - "loss": 0.3649, - "step": 10395 - }, - { - "epoch": 0.6794327168158943, - "grad_norm": 0.4559018909931183, - "learning_rate": 8.94426850809439e-06, - "loss": 0.3862, - "step": 10396 - }, - { - "epoch": 0.6794980720214365, - "grad_norm": 0.4411986470222473, - "learning_rate": 8.944053892923128e-06, - "loss": 0.3485, - "step": 10397 - }, - { - "epoch": 0.6795634272269786, - "grad_norm": 0.44940850138664246, - "learning_rate": 8.94383925851535e-06, - "loss": 0.3835, - "step": 10398 - }, - { - "epoch": 0.6796287824325208, - "grad_norm": 0.44346609711647034, - "learning_rate": 8.9436246048721e-06, - "loss": 0.3672, - "step": 10399 - }, - { - "epoch": 0.6796941376380629, - "grad_norm": 0.4554852843284607, - "learning_rate": 8.943409931994427e-06, - "loss": 0.3846, - "step": 10400 - }, - { - "epoch": 0.679759492843605, - "grad_norm": 0.4254249036312103, - "learning_rate": 8.943195239883377e-06, - "loss": 0.3456, - "step": 10401 - }, - { - "epoch": 0.6798248480491471, - "grad_norm": 0.4523789584636688, - "learning_rate": 8.94298052854e-06, - "loss": 0.3877, - "step": 10402 - }, - { - "epoch": 0.6798902032546892, - "grad_norm": 0.4845171570777893, - "learning_rate": 8.94276579796534e-06, - "loss": 0.4727, - "step": 10403 - }, - { - "epoch": 0.6799555584602314, - "grad_norm": 0.46508994698524475, - "learning_rate": 8.942551048160444e-06, - "loss": 0.3713, - "step": 10404 - }, - { - "epoch": 0.6800209136657734, - "grad_norm": 0.4565008580684662, - "learning_rate": 8.942336279126363e-06, - "loss": 0.38, - "step": 10405 - }, - { - "epoch": 0.6800862688713156, - "grad_norm": 0.4528326392173767, - "learning_rate": 8.942121490864139e-06, - "loss": 0.3936, - "step": 10406 - }, - { - "epoch": 0.6801516240768577, - "grad_norm": 0.4814760386943817, - "learning_rate": 8.941906683374826e-06, - "loss": 0.4181, - "step": 10407 - }, - { - "epoch": 0.6802169792823999, - "grad_norm": 0.4270453155040741, - "learning_rate": 8.941691856659466e-06, - "loss": 0.3847, - "step": 10408 - }, - { - "epoch": 0.680282334487942, - "grad_norm": 0.46762701869010925, - "learning_rate": 8.94147701071911e-06, - "loss": 0.387, - "step": 10409 - }, - { - "epoch": 0.6803476896934841, - "grad_norm": 0.4895259141921997, - "learning_rate": 8.941262145554807e-06, - "loss": 0.327, - "step": 10410 - }, - { - "epoch": 0.6804130448990262, - "grad_norm": 0.45179054141044617, - "learning_rate": 8.941047261167601e-06, - "loss": 0.3695, - "step": 10411 - }, - { - "epoch": 0.6804784001045683, - "grad_norm": 0.4389207661151886, - "learning_rate": 8.940832357558543e-06, - "loss": 0.3548, - "step": 10412 - }, - { - "epoch": 0.6805437553101105, - "grad_norm": 0.44351649284362793, - "learning_rate": 8.94061743472868e-06, - "loss": 0.3724, - "step": 10413 - }, - { - "epoch": 0.6806091105156525, - "grad_norm": 0.4505099654197693, - "learning_rate": 8.94040249267906e-06, - "loss": 0.3614, - "step": 10414 - }, - { - "epoch": 0.6806744657211947, - "grad_norm": 0.4455447494983673, - "learning_rate": 8.940187531410735e-06, - "loss": 0.3888, - "step": 10415 - }, - { - "epoch": 0.6807398209267368, - "grad_norm": 0.437529981136322, - "learning_rate": 8.939972550924746e-06, - "loss": 0.3555, - "step": 10416 - }, - { - "epoch": 0.680805176132279, - "grad_norm": 0.43726852536201477, - "learning_rate": 8.93975755122215e-06, - "loss": 0.3418, - "step": 10417 - }, - { - "epoch": 0.680870531337821, - "grad_norm": 0.45562511682510376, - "learning_rate": 8.93954253230399e-06, - "loss": 0.4094, - "step": 10418 - }, - { - "epoch": 0.6809358865433632, - "grad_norm": 0.42359310388565063, - "learning_rate": 8.939327494171315e-06, - "loss": 0.4072, - "step": 10419 - }, - { - "epoch": 0.6810012417489053, - "grad_norm": 0.46157050132751465, - "learning_rate": 8.939112436825177e-06, - "loss": 0.4001, - "step": 10420 - }, - { - "epoch": 0.6810665969544474, - "grad_norm": 0.5074307322502136, - "learning_rate": 8.938897360266621e-06, - "loss": 0.4772, - "step": 10421 - }, - { - "epoch": 0.6811319521599896, - "grad_norm": 0.44071605801582336, - "learning_rate": 8.938682264496699e-06, - "loss": 0.3764, - "step": 10422 - }, - { - "epoch": 0.6811973073655316, - "grad_norm": 0.6126153469085693, - "learning_rate": 8.938467149516459e-06, - "loss": 0.339, - "step": 10423 - }, - { - "epoch": 0.6812626625710738, - "grad_norm": 0.44011783599853516, - "learning_rate": 8.938252015326952e-06, - "loss": 0.3559, - "step": 10424 - }, - { - "epoch": 0.6813280177766159, - "grad_norm": 0.4675701856613159, - "learning_rate": 8.938036861929223e-06, - "loss": 0.3983, - "step": 10425 - }, - { - "epoch": 0.6813933729821581, - "grad_norm": 0.42904967069625854, - "learning_rate": 8.937821689324325e-06, - "loss": 0.3597, - "step": 10426 - }, - { - "epoch": 0.6814587281877001, - "grad_norm": 0.4446243643760681, - "learning_rate": 8.937606497513308e-06, - "loss": 0.3716, - "step": 10427 - }, - { - "epoch": 0.6815240833932422, - "grad_norm": 0.4483039081096649, - "learning_rate": 8.937391286497216e-06, - "loss": 0.3966, - "step": 10428 - }, - { - "epoch": 0.6815894385987844, - "grad_norm": 0.46698901057243347, - "learning_rate": 8.937176056277105e-06, - "loss": 0.4258, - "step": 10429 - }, - { - "epoch": 0.6816547938043265, - "grad_norm": 0.47981882095336914, - "learning_rate": 8.936960806854024e-06, - "loss": 0.3886, - "step": 10430 - }, - { - "epoch": 0.6817201490098687, - "grad_norm": 0.4871785044670105, - "learning_rate": 8.93674553822902e-06, - "loss": 0.4052, - "step": 10431 - }, - { - "epoch": 0.6817855042154107, - "grad_norm": 0.46038419008255005, - "learning_rate": 8.936530250403143e-06, - "loss": 0.392, - "step": 10432 - }, - { - "epoch": 0.6818508594209529, - "grad_norm": 0.47744181752204895, - "learning_rate": 8.936314943377447e-06, - "loss": 0.37, - "step": 10433 - }, - { - "epoch": 0.681916214626495, - "grad_norm": 0.4498889446258545, - "learning_rate": 8.936099617152977e-06, - "loss": 0.4048, - "step": 10434 - }, - { - "epoch": 0.6819815698320372, - "grad_norm": 0.4357393682003021, - "learning_rate": 8.935884271730787e-06, - "loss": 0.3686, - "step": 10435 - }, - { - "epoch": 0.6820469250375792, - "grad_norm": 0.44549882411956787, - "learning_rate": 8.935668907111923e-06, - "loss": 0.3881, - "step": 10436 - }, - { - "epoch": 0.6821122802431213, - "grad_norm": 0.44550469517707825, - "learning_rate": 8.935453523297442e-06, - "loss": 0.3674, - "step": 10437 - }, - { - "epoch": 0.6821776354486635, - "grad_norm": 0.4659557044506073, - "learning_rate": 8.935238120288388e-06, - "loss": 0.4223, - "step": 10438 - }, - { - "epoch": 0.6822429906542056, - "grad_norm": 0.443538099527359, - "learning_rate": 8.935022698085815e-06, - "loss": 0.3767, - "step": 10439 - }, - { - "epoch": 0.6823083458597478, - "grad_norm": 0.4619375169277191, - "learning_rate": 8.934807256690774e-06, - "loss": 0.402, - "step": 10440 - }, - { - "epoch": 0.6823737010652898, - "grad_norm": 0.4323079288005829, - "learning_rate": 8.934591796104315e-06, - "loss": 0.3486, - "step": 10441 - }, - { - "epoch": 0.682439056270832, - "grad_norm": 0.4612639844417572, - "learning_rate": 8.934376316327486e-06, - "loss": 0.4015, - "step": 10442 - }, - { - "epoch": 0.6825044114763741, - "grad_norm": 0.45452800393104553, - "learning_rate": 8.934160817361345e-06, - "loss": 0.4516, - "step": 10443 - }, - { - "epoch": 0.6825697666819163, - "grad_norm": 0.4550628960132599, - "learning_rate": 8.933945299206937e-06, - "loss": 0.4084, - "step": 10444 - }, - { - "epoch": 0.6826351218874583, - "grad_norm": 0.43161723017692566, - "learning_rate": 8.933729761865312e-06, - "loss": 0.3897, - "step": 10445 - }, - { - "epoch": 0.6827004770930004, - "grad_norm": 0.49983176589012146, - "learning_rate": 8.933514205337527e-06, - "loss": 0.3792, - "step": 10446 - }, - { - "epoch": 0.6827658322985426, - "grad_norm": 0.4445848762989044, - "learning_rate": 8.933298629624632e-06, - "loss": 0.3764, - "step": 10447 - }, - { - "epoch": 0.6828311875040847, - "grad_norm": 0.43100154399871826, - "learning_rate": 8.933083034727674e-06, - "loss": 0.3877, - "step": 10448 - }, - { - "epoch": 0.6828965427096269, - "grad_norm": 0.442611426115036, - "learning_rate": 8.932867420647709e-06, - "loss": 0.359, - "step": 10449 - }, - { - "epoch": 0.6829618979151689, - "grad_norm": 0.45714277029037476, - "learning_rate": 8.932651787385786e-06, - "loss": 0.3883, - "step": 10450 - }, - { - "epoch": 0.6830272531207111, - "grad_norm": 0.427101194858551, - "learning_rate": 8.93243613494296e-06, - "loss": 0.3387, - "step": 10451 - }, - { - "epoch": 0.6830926083262532, - "grad_norm": 0.4358592927455902, - "learning_rate": 8.932220463320278e-06, - "loss": 0.3739, - "step": 10452 - }, - { - "epoch": 0.6831579635317953, - "grad_norm": 0.4458143413066864, - "learning_rate": 8.932004772518796e-06, - "loss": 0.4092, - "step": 10453 - }, - { - "epoch": 0.6832233187373374, - "grad_norm": 0.47435426712036133, - "learning_rate": 8.931789062539566e-06, - "loss": 0.4197, - "step": 10454 - }, - { - "epoch": 0.6832886739428795, - "grad_norm": 0.4375659227371216, - "learning_rate": 8.931573333383636e-06, - "loss": 0.4025, - "step": 10455 - }, - { - "epoch": 0.6833540291484217, - "grad_norm": 0.42265719175338745, - "learning_rate": 8.931357585052063e-06, - "loss": 0.3746, - "step": 10456 - }, - { - "epoch": 0.6834193843539638, - "grad_norm": 0.44514164328575134, - "learning_rate": 8.931141817545896e-06, - "loss": 0.3928, - "step": 10457 - }, - { - "epoch": 0.683484739559506, - "grad_norm": 0.4276603162288666, - "learning_rate": 8.930926030866188e-06, - "loss": 0.3629, - "step": 10458 - }, - { - "epoch": 0.683550094765048, - "grad_norm": 0.4545811712741852, - "learning_rate": 8.930710225013992e-06, - "loss": 0.3797, - "step": 10459 - }, - { - "epoch": 0.6836154499705902, - "grad_norm": 0.4180017113685608, - "learning_rate": 8.930494399990361e-06, - "loss": 0.3488, - "step": 10460 - }, - { - "epoch": 0.6836808051761323, - "grad_norm": 0.43427959084510803, - "learning_rate": 8.930278555796347e-06, - "loss": 0.3388, - "step": 10461 - }, - { - "epoch": 0.6837461603816744, - "grad_norm": 0.4505934715270996, - "learning_rate": 8.930062692433004e-06, - "loss": 0.3623, - "step": 10462 - }, - { - "epoch": 0.6838115155872165, - "grad_norm": 0.4257461726665497, - "learning_rate": 8.92984680990138e-06, - "loss": 0.3729, - "step": 10463 - }, - { - "epoch": 0.6838768707927586, - "grad_norm": 0.457512766122818, - "learning_rate": 8.929630908202535e-06, - "loss": 0.3984, - "step": 10464 - }, - { - "epoch": 0.6839422259983008, - "grad_norm": 0.45155709981918335, - "learning_rate": 8.929414987337519e-06, - "loss": 0.3871, - "step": 10465 - }, - { - "epoch": 0.6840075812038429, - "grad_norm": 0.45669347047805786, - "learning_rate": 8.929199047307384e-06, - "loss": 0.3828, - "step": 10466 - }, - { - "epoch": 0.684072936409385, - "grad_norm": 0.4452977180480957, - "learning_rate": 8.928983088113184e-06, - "loss": 0.3853, - "step": 10467 - }, - { - "epoch": 0.6841382916149271, - "grad_norm": 0.46452596783638, - "learning_rate": 8.928767109755971e-06, - "loss": 0.4117, - "step": 10468 - }, - { - "epoch": 0.6842036468204693, - "grad_norm": 0.4296901524066925, - "learning_rate": 8.928551112236803e-06, - "loss": 0.3625, - "step": 10469 - }, - { - "epoch": 0.6842690020260114, - "grad_norm": 0.4388759732246399, - "learning_rate": 8.928335095556727e-06, - "loss": 0.3551, - "step": 10470 - }, - { - "epoch": 0.6843343572315534, - "grad_norm": 0.44955843687057495, - "learning_rate": 8.928119059716802e-06, - "loss": 0.3911, - "step": 10471 - }, - { - "epoch": 0.6843997124370956, - "grad_norm": 0.47844985127449036, - "learning_rate": 8.927903004718078e-06, - "loss": 0.4492, - "step": 10472 - }, - { - "epoch": 0.6844650676426377, - "grad_norm": 0.4134272336959839, - "learning_rate": 8.927686930561612e-06, - "loss": 0.3117, - "step": 10473 - }, - { - "epoch": 0.6845304228481799, - "grad_norm": 0.43954187631607056, - "learning_rate": 8.927470837248455e-06, - "loss": 0.3459, - "step": 10474 - }, - { - "epoch": 0.684595778053722, - "grad_norm": 0.5005001425743103, - "learning_rate": 8.927254724779661e-06, - "loss": 0.4935, - "step": 10475 - }, - { - "epoch": 0.6846611332592641, - "grad_norm": 0.42396217584609985, - "learning_rate": 8.927038593156287e-06, - "loss": 0.3381, - "step": 10476 - }, - { - "epoch": 0.6847264884648062, - "grad_norm": 0.46023210883140564, - "learning_rate": 8.926822442379383e-06, - "loss": 0.3611, - "step": 10477 - }, - { - "epoch": 0.6847918436703483, - "grad_norm": 0.4212297201156616, - "learning_rate": 8.92660627245001e-06, - "loss": 0.3165, - "step": 10478 - }, - { - "epoch": 0.6848571988758905, - "grad_norm": 0.6105642318725586, - "learning_rate": 8.926390083369214e-06, - "loss": 0.4126, - "step": 10479 - }, - { - "epoch": 0.6849225540814325, - "grad_norm": 0.38947272300720215, - "learning_rate": 8.926173875138053e-06, - "loss": 0.3039, - "step": 10480 - }, - { - "epoch": 0.6849879092869747, - "grad_norm": 0.4735604524612427, - "learning_rate": 8.925957647757584e-06, - "loss": 0.4305, - "step": 10481 - }, - { - "epoch": 0.6850532644925168, - "grad_norm": 0.4116586148738861, - "learning_rate": 8.92574140122886e-06, - "loss": 0.3562, - "step": 10482 - }, - { - "epoch": 0.685118619698059, - "grad_norm": 0.4315769374370575, - "learning_rate": 8.925525135552932e-06, - "loss": 0.373, - "step": 10483 - }, - { - "epoch": 0.6851839749036011, - "grad_norm": 0.4615154266357422, - "learning_rate": 8.925308850730862e-06, - "loss": 0.404, - "step": 10484 - }, - { - "epoch": 0.6852493301091432, - "grad_norm": 0.4887750446796417, - "learning_rate": 8.925092546763698e-06, - "loss": 0.3828, - "step": 10485 - }, - { - "epoch": 0.6853146853146853, - "grad_norm": 0.4732937812805176, - "learning_rate": 8.9248762236525e-06, - "loss": 0.4293, - "step": 10486 - }, - { - "epoch": 0.6853800405202274, - "grad_norm": 0.44955384731292725, - "learning_rate": 8.924659881398318e-06, - "loss": 0.4109, - "step": 10487 - }, - { - "epoch": 0.6854453957257696, - "grad_norm": 0.5000100135803223, - "learning_rate": 8.924443520002213e-06, - "loss": 0.4199, - "step": 10488 - }, - { - "epoch": 0.6855107509313116, - "grad_norm": 0.4586191475391388, - "learning_rate": 8.924227139465236e-06, - "loss": 0.4165, - "step": 10489 - }, - { - "epoch": 0.6855761061368538, - "grad_norm": 0.43072134256362915, - "learning_rate": 8.924010739788444e-06, - "loss": 0.3507, - "step": 10490 - }, - { - "epoch": 0.6856414613423959, - "grad_norm": 0.4799150824546814, - "learning_rate": 8.923794320972892e-06, - "loss": 0.4124, - "step": 10491 - }, - { - "epoch": 0.6857068165479381, - "grad_norm": 0.40612852573394775, - "learning_rate": 8.923577883019636e-06, - "loss": 0.3159, - "step": 10492 - }, - { - "epoch": 0.6857721717534802, - "grad_norm": 0.4481818675994873, - "learning_rate": 8.923361425929731e-06, - "loss": 0.3713, - "step": 10493 - }, - { - "epoch": 0.6858375269590223, - "grad_norm": 0.43309223651885986, - "learning_rate": 8.923144949704233e-06, - "loss": 0.3704, - "step": 10494 - }, - { - "epoch": 0.6859028821645644, - "grad_norm": 0.4391562342643738, - "learning_rate": 8.9229284543442e-06, - "loss": 0.375, - "step": 10495 - }, - { - "epoch": 0.6859682373701065, - "grad_norm": 0.4453137218952179, - "learning_rate": 8.922711939850684e-06, - "loss": 0.3949, - "step": 10496 - }, - { - "epoch": 0.6860335925756487, - "grad_norm": 0.4381144642829895, - "learning_rate": 8.922495406224743e-06, - "loss": 0.3782, - "step": 10497 - }, - { - "epoch": 0.6860989477811907, - "grad_norm": 0.45431220531463623, - "learning_rate": 8.922278853467432e-06, - "loss": 0.3895, - "step": 10498 - }, - { - "epoch": 0.6861643029867329, - "grad_norm": 0.4399212598800659, - "learning_rate": 8.922062281579811e-06, - "loss": 0.3682, - "step": 10499 - }, - { - "epoch": 0.686229658192275, - "grad_norm": 0.4298236072063446, - "learning_rate": 8.92184569056293e-06, - "loss": 0.3811, - "step": 10500 - }, - { - "epoch": 0.6862950133978172, - "grad_norm": 0.4866264760494232, - "learning_rate": 8.921629080417852e-06, - "loss": 0.4035, - "step": 10501 - }, - { - "epoch": 0.6863603686033592, - "grad_norm": 0.4533084034919739, - "learning_rate": 8.92141245114563e-06, - "loss": 0.3838, - "step": 10502 - }, - { - "epoch": 0.6864257238089014, - "grad_norm": 0.4726482331752777, - "learning_rate": 8.92119580274732e-06, - "loss": 0.4461, - "step": 10503 - }, - { - "epoch": 0.6864910790144435, - "grad_norm": 0.4625649154186249, - "learning_rate": 8.92097913522398e-06, - "loss": 0.3746, - "step": 10504 - }, - { - "epoch": 0.6865564342199856, - "grad_norm": 0.4844793975353241, - "learning_rate": 8.920762448576665e-06, - "loss": 0.414, - "step": 10505 - }, - { - "epoch": 0.6866217894255278, - "grad_norm": 0.4288451075553894, - "learning_rate": 8.920545742806436e-06, - "loss": 0.3974, - "step": 10506 - }, - { - "epoch": 0.6866871446310698, - "grad_norm": 0.43219995498657227, - "learning_rate": 8.920329017914345e-06, - "loss": 0.3614, - "step": 10507 - }, - { - "epoch": 0.686752499836612, - "grad_norm": 0.46927735209465027, - "learning_rate": 8.920112273901452e-06, - "loss": 0.3791, - "step": 10508 - }, - { - "epoch": 0.6868178550421541, - "grad_norm": 0.48191893100738525, - "learning_rate": 8.919895510768814e-06, - "loss": 0.426, - "step": 10509 - }, - { - "epoch": 0.6868832102476963, - "grad_norm": 0.4131309390068054, - "learning_rate": 8.919678728517487e-06, - "loss": 0.3243, - "step": 10510 - }, - { - "epoch": 0.6869485654532383, - "grad_norm": 0.4218220114707947, - "learning_rate": 8.91946192714853e-06, - "loss": 0.2949, - "step": 10511 - }, - { - "epoch": 0.6870139206587804, - "grad_norm": 0.4162497818470001, - "learning_rate": 8.919245106662997e-06, - "loss": 0.3278, - "step": 10512 - }, - { - "epoch": 0.6870792758643226, - "grad_norm": 0.425112247467041, - "learning_rate": 8.919028267061948e-06, - "loss": 0.3554, - "step": 10513 - }, - { - "epoch": 0.6871446310698647, - "grad_norm": 0.4841451942920685, - "learning_rate": 8.918811408346442e-06, - "loss": 0.4142, - "step": 10514 - }, - { - "epoch": 0.6872099862754069, - "grad_norm": 0.47394266724586487, - "learning_rate": 8.918594530517536e-06, - "loss": 0.4179, - "step": 10515 - }, - { - "epoch": 0.6872753414809489, - "grad_norm": 0.4626839756965637, - "learning_rate": 8.918377633576285e-06, - "loss": 0.4002, - "step": 10516 - }, - { - "epoch": 0.6873406966864911, - "grad_norm": 0.42641302943229675, - "learning_rate": 8.91816071752375e-06, - "loss": 0.3356, - "step": 10517 - }, - { - "epoch": 0.6874060518920332, - "grad_norm": 0.4462212920188904, - "learning_rate": 8.917943782360986e-06, - "loss": 0.3899, - "step": 10518 - }, - { - "epoch": 0.6874714070975754, - "grad_norm": 0.46592387557029724, - "learning_rate": 8.917726828089054e-06, - "loss": 0.3808, - "step": 10519 - }, - { - "epoch": 0.6875367623031174, - "grad_norm": 0.46447572112083435, - "learning_rate": 8.917509854709012e-06, - "loss": 0.4059, - "step": 10520 - }, - { - "epoch": 0.6876021175086595, - "grad_norm": 0.4563732445240021, - "learning_rate": 8.917292862221918e-06, - "loss": 0.365, - "step": 10521 - }, - { - "epoch": 0.6876674727142017, - "grad_norm": 0.4624747931957245, - "learning_rate": 8.917075850628827e-06, - "loss": 0.3966, - "step": 10522 - }, - { - "epoch": 0.6877328279197438, - "grad_norm": 0.48354753851890564, - "learning_rate": 8.916858819930801e-06, - "loss": 0.3962, - "step": 10523 - }, - { - "epoch": 0.687798183125286, - "grad_norm": 0.43341320753097534, - "learning_rate": 8.916641770128899e-06, - "loss": 0.3535, - "step": 10524 - }, - { - "epoch": 0.687863538330828, - "grad_norm": 0.3981284201145172, - "learning_rate": 8.916424701224176e-06, - "loss": 0.3221, - "step": 10525 - }, - { - "epoch": 0.6879288935363702, - "grad_norm": 0.45291438698768616, - "learning_rate": 8.916207613217695e-06, - "loss": 0.3689, - "step": 10526 - }, - { - "epoch": 0.6879942487419123, - "grad_norm": 0.4812171161174774, - "learning_rate": 8.91599050611051e-06, - "loss": 0.4138, - "step": 10527 - }, - { - "epoch": 0.6880596039474545, - "grad_norm": 0.42696821689605713, - "learning_rate": 8.915773379903685e-06, - "loss": 0.3286, - "step": 10528 - }, - { - "epoch": 0.6881249591529965, - "grad_norm": 0.46794548630714417, - "learning_rate": 8.915556234598276e-06, - "loss": 0.4165, - "step": 10529 - }, - { - "epoch": 0.6881903143585386, - "grad_norm": 0.46507856249809265, - "learning_rate": 8.915339070195344e-06, - "loss": 0.3388, - "step": 10530 - }, - { - "epoch": 0.6882556695640808, - "grad_norm": 0.434635728597641, - "learning_rate": 8.915121886695946e-06, - "loss": 0.385, - "step": 10531 - }, - { - "epoch": 0.6883210247696229, - "grad_norm": 0.4649295508861542, - "learning_rate": 8.914904684101143e-06, - "loss": 0.4223, - "step": 10532 - }, - { - "epoch": 0.688386379975165, - "grad_norm": 0.4520520567893982, - "learning_rate": 8.91468746241199e-06, - "loss": 0.3779, - "step": 10533 - }, - { - "epoch": 0.6884517351807071, - "grad_norm": 0.463011234998703, - "learning_rate": 8.914470221629554e-06, - "loss": 0.3743, - "step": 10534 - }, - { - "epoch": 0.6885170903862493, - "grad_norm": 0.4743647873401642, - "learning_rate": 8.91425296175489e-06, - "loss": 0.4088, - "step": 10535 - }, - { - "epoch": 0.6885824455917914, - "grad_norm": 0.42367467284202576, - "learning_rate": 8.914035682789058e-06, - "loss": 0.3493, - "step": 10536 - }, - { - "epoch": 0.6886478007973335, - "grad_norm": 0.48695218563079834, - "learning_rate": 8.913818384733117e-06, - "loss": 0.4029, - "step": 10537 - }, - { - "epoch": 0.6887131560028756, - "grad_norm": 0.4481523334980011, - "learning_rate": 8.913601067588128e-06, - "loss": 0.3826, - "step": 10538 - }, - { - "epoch": 0.6887785112084177, - "grad_norm": 0.48517176508903503, - "learning_rate": 8.913383731355152e-06, - "loss": 0.402, - "step": 10539 - }, - { - "epoch": 0.6888438664139599, - "grad_norm": 0.4523976445198059, - "learning_rate": 8.913166376035247e-06, - "loss": 0.403, - "step": 10540 - }, - { - "epoch": 0.688909221619502, - "grad_norm": 0.44856175780296326, - "learning_rate": 8.912949001629474e-06, - "loss": 0.3922, - "step": 10541 - }, - { - "epoch": 0.6889745768250441, - "grad_norm": 0.44000181555747986, - "learning_rate": 8.912731608138894e-06, - "loss": 0.3479, - "step": 10542 - }, - { - "epoch": 0.6890399320305862, - "grad_norm": 0.4488789439201355, - "learning_rate": 8.912514195564566e-06, - "loss": 0.3213, - "step": 10543 - }, - { - "epoch": 0.6891052872361284, - "grad_norm": 0.4425378739833832, - "learning_rate": 8.912296763907548e-06, - "loss": 0.3203, - "step": 10544 - }, - { - "epoch": 0.6891706424416705, - "grad_norm": 0.46558132767677307, - "learning_rate": 8.912079313168907e-06, - "loss": 0.4087, - "step": 10545 - }, - { - "epoch": 0.6892359976472126, - "grad_norm": 0.437294602394104, - "learning_rate": 8.9118618433497e-06, - "loss": 0.3789, - "step": 10546 - }, - { - "epoch": 0.6893013528527547, - "grad_norm": 0.4735149145126343, - "learning_rate": 8.911644354450986e-06, - "loss": 0.366, - "step": 10547 - }, - { - "epoch": 0.6893667080582968, - "grad_norm": 0.48468002676963806, - "learning_rate": 8.911426846473828e-06, - "loss": 0.4307, - "step": 10548 - }, - { - "epoch": 0.689432063263839, - "grad_norm": 0.425825297832489, - "learning_rate": 8.911209319419285e-06, - "loss": 0.3594, - "step": 10549 - }, - { - "epoch": 0.6894974184693811, - "grad_norm": 0.45564308762550354, - "learning_rate": 8.91099177328842e-06, - "loss": 0.3853, - "step": 10550 - }, - { - "epoch": 0.6895627736749232, - "grad_norm": 0.44014772772789, - "learning_rate": 8.910774208082293e-06, - "loss": 0.3767, - "step": 10551 - }, - { - "epoch": 0.6896281288804653, - "grad_norm": 0.47790876030921936, - "learning_rate": 8.910556623801966e-06, - "loss": 0.4091, - "step": 10552 - }, - { - "epoch": 0.6896934840860075, - "grad_norm": 0.4382498562335968, - "learning_rate": 8.9103390204485e-06, - "loss": 0.3735, - "step": 10553 - }, - { - "epoch": 0.6897588392915496, - "grad_norm": 0.4551611542701721, - "learning_rate": 8.910121398022956e-06, - "loss": 0.4214, - "step": 10554 - }, - { - "epoch": 0.6898241944970916, - "grad_norm": 0.4938529133796692, - "learning_rate": 8.909903756526393e-06, - "loss": 0.4592, - "step": 10555 - }, - { - "epoch": 0.6898895497026338, - "grad_norm": 0.44927355647087097, - "learning_rate": 8.909686095959877e-06, - "loss": 0.3901, - "step": 10556 - }, - { - "epoch": 0.6899549049081759, - "grad_norm": 0.47456619143486023, - "learning_rate": 8.909468416324467e-06, - "loss": 0.4062, - "step": 10557 - }, - { - "epoch": 0.6900202601137181, - "grad_norm": 0.44300612807273865, - "learning_rate": 8.909250717621225e-06, - "loss": 0.4017, - "step": 10558 - }, - { - "epoch": 0.6900856153192602, - "grad_norm": 0.45416632294654846, - "learning_rate": 8.909032999851213e-06, - "loss": 0.3796, - "step": 10559 - }, - { - "epoch": 0.6901509705248023, - "grad_norm": 0.4438626766204834, - "learning_rate": 8.908815263015492e-06, - "loss": 0.4059, - "step": 10560 - }, - { - "epoch": 0.6902163257303444, - "grad_norm": 0.41670164465904236, - "learning_rate": 8.908597507115127e-06, - "loss": 0.3566, - "step": 10561 - }, - { - "epoch": 0.6902816809358865, - "grad_norm": 0.47049680352211, - "learning_rate": 8.908379732151175e-06, - "loss": 0.3999, - "step": 10562 - }, - { - "epoch": 0.6903470361414287, - "grad_norm": 0.4610985219478607, - "learning_rate": 8.908161938124704e-06, - "loss": 0.3784, - "step": 10563 - }, - { - "epoch": 0.6904123913469707, - "grad_norm": 0.4252164363861084, - "learning_rate": 8.907944125036771e-06, - "loss": 0.3729, - "step": 10564 - }, - { - "epoch": 0.6904777465525129, - "grad_norm": 0.4724777340888977, - "learning_rate": 8.90772629288844e-06, - "loss": 0.3858, - "step": 10565 - }, - { - "epoch": 0.690543101758055, - "grad_norm": 0.44275957345962524, - "learning_rate": 8.907508441680778e-06, - "loss": 0.3927, - "step": 10566 - }, - { - "epoch": 0.6906084569635972, - "grad_norm": 0.4292038679122925, - "learning_rate": 8.907290571414841e-06, - "loss": 0.3836, - "step": 10567 - }, - { - "epoch": 0.6906738121691393, - "grad_norm": 0.4941498637199402, - "learning_rate": 8.907072682091695e-06, - "loss": 0.3412, - "step": 10568 - }, - { - "epoch": 0.6907391673746814, - "grad_norm": 0.4277919828891754, - "learning_rate": 8.906854773712402e-06, - "loss": 0.3665, - "step": 10569 - }, - { - "epoch": 0.6908045225802235, - "grad_norm": 0.4419439733028412, - "learning_rate": 8.906636846278023e-06, - "loss": 0.4015, - "step": 10570 - }, - { - "epoch": 0.6908698777857656, - "grad_norm": 0.39750900864601135, - "learning_rate": 8.906418899789624e-06, - "loss": 0.3012, - "step": 10571 - }, - { - "epoch": 0.6909352329913078, - "grad_norm": 0.5598298907279968, - "learning_rate": 8.906200934248267e-06, - "loss": 0.4767, - "step": 10572 - }, - { - "epoch": 0.6910005881968498, - "grad_norm": 0.4465446472167969, - "learning_rate": 8.905982949655014e-06, - "loss": 0.3541, - "step": 10573 - }, - { - "epoch": 0.691065943402392, - "grad_norm": 0.4126950204372406, - "learning_rate": 8.905764946010931e-06, - "loss": 0.3185, - "step": 10574 - }, - { - "epoch": 0.6911312986079341, - "grad_norm": 0.4510418772697449, - "learning_rate": 8.905546923317077e-06, - "loss": 0.3961, - "step": 10575 - }, - { - "epoch": 0.6911966538134763, - "grad_norm": 0.43746069073677063, - "learning_rate": 8.90532888157452e-06, - "loss": 0.404, - "step": 10576 - }, - { - "epoch": 0.6912620090190184, - "grad_norm": 0.49543002247810364, - "learning_rate": 8.905110820784319e-06, - "loss": 0.4365, - "step": 10577 - }, - { - "epoch": 0.6913273642245605, - "grad_norm": 0.48675885796546936, - "learning_rate": 8.904892740947539e-06, - "loss": 0.4306, - "step": 10578 - }, - { - "epoch": 0.6913927194301026, - "grad_norm": 0.4443257749080658, - "learning_rate": 8.904674642065247e-06, - "loss": 0.3956, - "step": 10579 - }, - { - "epoch": 0.6914580746356447, - "grad_norm": 0.4747105836868286, - "learning_rate": 8.904456524138503e-06, - "loss": 0.3717, - "step": 10580 - }, - { - "epoch": 0.6915234298411869, - "grad_norm": 0.48398444056510925, - "learning_rate": 8.904238387168371e-06, - "loss": 0.4199, - "step": 10581 - }, - { - "epoch": 0.6915887850467289, - "grad_norm": 0.46877458691596985, - "learning_rate": 8.904020231155916e-06, - "loss": 0.4041, - "step": 10582 - }, - { - "epoch": 0.6916541402522711, - "grad_norm": 0.4279765188694, - "learning_rate": 8.903802056102202e-06, - "loss": 0.3479, - "step": 10583 - }, - { - "epoch": 0.6917194954578132, - "grad_norm": 0.4481217563152313, - "learning_rate": 8.903583862008294e-06, - "loss": 0.3472, - "step": 10584 - }, - { - "epoch": 0.6917848506633554, - "grad_norm": 0.43060222268104553, - "learning_rate": 8.903365648875254e-06, - "loss": 0.3549, - "step": 10585 - }, - { - "epoch": 0.6918502058688974, - "grad_norm": 0.44700443744659424, - "learning_rate": 8.903147416704148e-06, - "loss": 0.4061, - "step": 10586 - }, - { - "epoch": 0.6919155610744396, - "grad_norm": 0.4763336777687073, - "learning_rate": 8.90292916549604e-06, - "loss": 0.4159, - "step": 10587 - }, - { - "epoch": 0.6919809162799817, - "grad_norm": 0.44735389947891235, - "learning_rate": 8.902710895251993e-06, - "loss": 0.383, - "step": 10588 - }, - { - "epoch": 0.6920462714855238, - "grad_norm": 0.4600410759449005, - "learning_rate": 8.902492605973074e-06, - "loss": 0.4224, - "step": 10589 - }, - { - "epoch": 0.692111626691066, - "grad_norm": 0.4634614586830139, - "learning_rate": 8.902274297660347e-06, - "loss": 0.3581, - "step": 10590 - }, - { - "epoch": 0.692176981896608, - "grad_norm": 0.4893110990524292, - "learning_rate": 8.902055970314875e-06, - "loss": 0.4458, - "step": 10591 - }, - { - "epoch": 0.6922423371021502, - "grad_norm": 0.4451541304588318, - "learning_rate": 8.901837623937726e-06, - "loss": 0.3626, - "step": 10592 - }, - { - "epoch": 0.6923076923076923, - "grad_norm": 0.45439523458480835, - "learning_rate": 8.901619258529963e-06, - "loss": 0.3615, - "step": 10593 - }, - { - "epoch": 0.6923730475132345, - "grad_norm": 0.4473332464694977, - "learning_rate": 8.90140087409265e-06, - "loss": 0.3609, - "step": 10594 - }, - { - "epoch": 0.6924384027187765, - "grad_norm": 0.4544123113155365, - "learning_rate": 8.901182470626855e-06, - "loss": 0.4165, - "step": 10595 - }, - { - "epoch": 0.6925037579243186, - "grad_norm": 0.4130313992500305, - "learning_rate": 8.90096404813364e-06, - "loss": 0.3646, - "step": 10596 - }, - { - "epoch": 0.6925691131298608, - "grad_norm": 0.4376696050167084, - "learning_rate": 8.900745606614072e-06, - "loss": 0.3931, - "step": 10597 - }, - { - "epoch": 0.6926344683354029, - "grad_norm": 0.4156436324119568, - "learning_rate": 8.900527146069218e-06, - "loss": 0.3595, - "step": 10598 - }, - { - "epoch": 0.6926998235409451, - "grad_norm": 0.4557853937149048, - "learning_rate": 8.90030866650014e-06, - "loss": 0.418, - "step": 10599 - }, - { - "epoch": 0.6927651787464871, - "grad_norm": 0.4538869559764862, - "learning_rate": 8.900090167907906e-06, - "loss": 0.3923, - "step": 10600 - }, - { - "epoch": 0.6928305339520293, - "grad_norm": 0.43243837356567383, - "learning_rate": 8.89987165029358e-06, - "loss": 0.369, - "step": 10601 - }, - { - "epoch": 0.6928958891575714, - "grad_norm": 0.4296075701713562, - "learning_rate": 8.899653113658232e-06, - "loss": 0.3524, - "step": 10602 - }, - { - "epoch": 0.6929612443631136, - "grad_norm": 0.4318280518054962, - "learning_rate": 8.899434558002923e-06, - "loss": 0.3608, - "step": 10603 - }, - { - "epoch": 0.6930265995686556, - "grad_norm": 0.46237117052078247, - "learning_rate": 8.899215983328721e-06, - "loss": 0.3822, - "step": 10604 - }, - { - "epoch": 0.6930919547741977, - "grad_norm": 0.46481773257255554, - "learning_rate": 8.898997389636691e-06, - "loss": 0.3839, - "step": 10605 - }, - { - "epoch": 0.6931573099797399, - "grad_norm": 0.4405609965324402, - "learning_rate": 8.898778776927901e-06, - "loss": 0.3792, - "step": 10606 - }, - { - "epoch": 0.693222665185282, - "grad_norm": 0.4056107997894287, - "learning_rate": 8.898560145203416e-06, - "loss": 0.3491, - "step": 10607 - }, - { - "epoch": 0.6932880203908242, - "grad_norm": 0.46750712394714355, - "learning_rate": 8.898341494464302e-06, - "loss": 0.4107, - "step": 10608 - }, - { - "epoch": 0.6933533755963662, - "grad_norm": 0.4560029208660126, - "learning_rate": 8.898122824711626e-06, - "loss": 0.3831, - "step": 10609 - }, - { - "epoch": 0.6934187308019084, - "grad_norm": 0.4878546893596649, - "learning_rate": 8.897904135946456e-06, - "loss": 0.445, - "step": 10610 - }, - { - "epoch": 0.6934840860074505, - "grad_norm": 0.4243931174278259, - "learning_rate": 8.897685428169856e-06, - "loss": 0.3499, - "step": 10611 - }, - { - "epoch": 0.6935494412129927, - "grad_norm": 0.4623061716556549, - "learning_rate": 8.897466701382894e-06, - "loss": 0.3608, - "step": 10612 - }, - { - "epoch": 0.6936147964185347, - "grad_norm": 0.4468550682067871, - "learning_rate": 8.897247955586637e-06, - "loss": 0.3618, - "step": 10613 - }, - { - "epoch": 0.6936801516240768, - "grad_norm": 0.49978509545326233, - "learning_rate": 8.89702919078215e-06, - "loss": 0.4836, - "step": 10614 - }, - { - "epoch": 0.693745506829619, - "grad_norm": 0.4840392768383026, - "learning_rate": 8.896810406970503e-06, - "loss": 0.4928, - "step": 10615 - }, - { - "epoch": 0.6938108620351611, - "grad_norm": 0.46464452147483826, - "learning_rate": 8.89659160415276e-06, - "loss": 0.3697, - "step": 10616 - }, - { - "epoch": 0.6938762172407033, - "grad_norm": 0.4350418448448181, - "learning_rate": 8.896372782329993e-06, - "loss": 0.3573, - "step": 10617 - }, - { - "epoch": 0.6939415724462453, - "grad_norm": 0.423578679561615, - "learning_rate": 8.896153941503265e-06, - "loss": 0.3606, - "step": 10618 - }, - { - "epoch": 0.6940069276517875, - "grad_norm": 0.407560259103775, - "learning_rate": 8.895935081673644e-06, - "loss": 0.3463, - "step": 10619 - }, - { - "epoch": 0.6940722828573296, - "grad_norm": 0.4485580325126648, - "learning_rate": 8.895716202842198e-06, - "loss": 0.3953, - "step": 10620 - }, - { - "epoch": 0.6941376380628717, - "grad_norm": 0.49691659212112427, - "learning_rate": 8.895497305009993e-06, - "loss": 0.3891, - "step": 10621 - }, - { - "epoch": 0.6942029932684138, - "grad_norm": 0.43800756335258484, - "learning_rate": 8.895278388178099e-06, - "loss": 0.4062, - "step": 10622 - }, - { - "epoch": 0.6942683484739559, - "grad_norm": 0.4745366871356964, - "learning_rate": 8.895059452347583e-06, - "loss": 0.4419, - "step": 10623 - }, - { - "epoch": 0.6943337036794981, - "grad_norm": 0.46037518978118896, - "learning_rate": 8.894840497519514e-06, - "loss": 0.3688, - "step": 10624 - }, - { - "epoch": 0.6943990588850402, - "grad_norm": 0.45265913009643555, - "learning_rate": 8.894621523694957e-06, - "loss": 0.4068, - "step": 10625 - }, - { - "epoch": 0.6944644140905823, - "grad_norm": 0.44792941212654114, - "learning_rate": 8.894402530874982e-06, - "loss": 0.3871, - "step": 10626 - }, - { - "epoch": 0.6945297692961244, - "grad_norm": 0.4481916129589081, - "learning_rate": 8.894183519060657e-06, - "loss": 0.3744, - "step": 10627 - }, - { - "epoch": 0.6945951245016666, - "grad_norm": 0.47108253836631775, - "learning_rate": 8.89396448825305e-06, - "loss": 0.4096, - "step": 10628 - }, - { - "epoch": 0.6946604797072087, - "grad_norm": 0.47146904468536377, - "learning_rate": 8.89374543845323e-06, - "loss": 0.3721, - "step": 10629 - }, - { - "epoch": 0.6947258349127508, - "grad_norm": 0.44914835691452026, - "learning_rate": 8.893526369662263e-06, - "loss": 0.3933, - "step": 10630 - }, - { - "epoch": 0.6947911901182929, - "grad_norm": 0.4471861720085144, - "learning_rate": 8.893307281881219e-06, - "loss": 0.3742, - "step": 10631 - }, - { - "epoch": 0.694856545323835, - "grad_norm": 0.4269276261329651, - "learning_rate": 8.893088175111167e-06, - "loss": 0.3649, - "step": 10632 - }, - { - "epoch": 0.6949219005293772, - "grad_norm": 0.45189711451530457, - "learning_rate": 8.892869049353175e-06, - "loss": 0.3628, - "step": 10633 - }, - { - "epoch": 0.6949872557349193, - "grad_norm": 0.45149579644203186, - "learning_rate": 8.892649904608312e-06, - "loss": 0.3919, - "step": 10634 - }, - { - "epoch": 0.6950526109404614, - "grad_norm": 0.4281890392303467, - "learning_rate": 8.892430740877649e-06, - "loss": 0.3673, - "step": 10635 - }, - { - "epoch": 0.6951179661460035, - "grad_norm": 0.42137566208839417, - "learning_rate": 8.89221155816225e-06, - "loss": 0.3714, - "step": 10636 - }, - { - "epoch": 0.6951833213515457, - "grad_norm": 0.43112459778785706, - "learning_rate": 8.891992356463188e-06, - "loss": 0.3598, - "step": 10637 - }, - { - "epoch": 0.6952486765570878, - "grad_norm": 0.4220387041568756, - "learning_rate": 8.891773135781533e-06, - "loss": 0.3343, - "step": 10638 - }, - { - "epoch": 0.6953140317626298, - "grad_norm": 0.4726293385028839, - "learning_rate": 8.891553896118348e-06, - "loss": 0.416, - "step": 10639 - }, - { - "epoch": 0.695379386968172, - "grad_norm": 0.4971306025981903, - "learning_rate": 8.89133463747471e-06, - "loss": 0.4577, - "step": 10640 - }, - { - "epoch": 0.6954447421737141, - "grad_norm": 0.4709372818470001, - "learning_rate": 8.891115359851683e-06, - "loss": 0.4255, - "step": 10641 - }, - { - "epoch": 0.6955100973792563, - "grad_norm": 0.4117278456687927, - "learning_rate": 8.890896063250338e-06, - "loss": 0.3245, - "step": 10642 - }, - { - "epoch": 0.6955754525847984, - "grad_norm": 0.4369412064552307, - "learning_rate": 8.890676747671746e-06, - "loss": 0.3713, - "step": 10643 - }, - { - "epoch": 0.6956408077903405, - "grad_norm": 0.4706905484199524, - "learning_rate": 8.890457413116976e-06, - "loss": 0.3993, - "step": 10644 - }, - { - "epoch": 0.6957061629958826, - "grad_norm": 0.4339410662651062, - "learning_rate": 8.890238059587096e-06, - "loss": 0.3546, - "step": 10645 - }, - { - "epoch": 0.6957715182014248, - "grad_norm": 0.44695916771888733, - "learning_rate": 8.890018687083178e-06, - "loss": 0.3152, - "step": 10646 - }, - { - "epoch": 0.6958368734069669, - "grad_norm": 0.4529878497123718, - "learning_rate": 8.889799295606292e-06, - "loss": 0.3884, - "step": 10647 - }, - { - "epoch": 0.6959022286125089, - "grad_norm": 0.4456702470779419, - "learning_rate": 8.889579885157507e-06, - "loss": 0.3849, - "step": 10648 - }, - { - "epoch": 0.6959675838180511, - "grad_norm": 0.4578702449798584, - "learning_rate": 8.889360455737892e-06, - "loss": 0.3821, - "step": 10649 - }, - { - "epoch": 0.6960329390235932, - "grad_norm": 0.44034576416015625, - "learning_rate": 8.88914100734852e-06, - "loss": 0.3437, - "step": 10650 - }, - { - "epoch": 0.6960982942291354, - "grad_norm": 0.4882797598838806, - "learning_rate": 8.888921539990461e-06, - "loss": 0.3948, - "step": 10651 - }, - { - "epoch": 0.6961636494346775, - "grad_norm": 0.44824862480163574, - "learning_rate": 8.888702053664781e-06, - "loss": 0.3996, - "step": 10652 - }, - { - "epoch": 0.6962290046402196, - "grad_norm": 0.46059808135032654, - "learning_rate": 8.888482548372556e-06, - "loss": 0.3877, - "step": 10653 - }, - { - "epoch": 0.6962943598457617, - "grad_norm": 0.41871094703674316, - "learning_rate": 8.888263024114857e-06, - "loss": 0.3669, - "step": 10654 - }, - { - "epoch": 0.6963597150513038, - "grad_norm": 0.4514888823032379, - "learning_rate": 8.888043480892749e-06, - "loss": 0.3934, - "step": 10655 - }, - { - "epoch": 0.696425070256846, - "grad_norm": 0.45794615149497986, - "learning_rate": 8.887823918707306e-06, - "loss": 0.3726, - "step": 10656 - }, - { - "epoch": 0.696490425462388, - "grad_norm": 0.4819653332233429, - "learning_rate": 8.8876043375596e-06, - "loss": 0.4225, - "step": 10657 - }, - { - "epoch": 0.6965557806679302, - "grad_norm": 0.4350774586200714, - "learning_rate": 8.887384737450701e-06, - "loss": 0.3375, - "step": 10658 - }, - { - "epoch": 0.6966211358734723, - "grad_norm": 0.5108208060264587, - "learning_rate": 8.887165118381678e-06, - "loss": 0.459, - "step": 10659 - }, - { - "epoch": 0.6966864910790145, - "grad_norm": 0.4641295075416565, - "learning_rate": 8.886945480353605e-06, - "loss": 0.4061, - "step": 10660 - }, - { - "epoch": 0.6967518462845566, - "grad_norm": 0.44970735907554626, - "learning_rate": 8.886725823367554e-06, - "loss": 0.3566, - "step": 10661 - }, - { - "epoch": 0.6968172014900987, - "grad_norm": 0.4390512704849243, - "learning_rate": 8.886506147424594e-06, - "loss": 0.3557, - "step": 10662 - }, - { - "epoch": 0.6968825566956408, - "grad_norm": 0.4361428916454315, - "learning_rate": 8.886286452525797e-06, - "loss": 0.3534, - "step": 10663 - }, - { - "epoch": 0.6969479119011829, - "grad_norm": 0.44793328642845154, - "learning_rate": 8.886066738672234e-06, - "loss": 0.3942, - "step": 10664 - }, - { - "epoch": 0.6970132671067251, - "grad_norm": 0.4622913599014282, - "learning_rate": 8.885847005864975e-06, - "loss": 0.4141, - "step": 10665 - }, - { - "epoch": 0.6970786223122671, - "grad_norm": 0.4830019772052765, - "learning_rate": 8.885627254105097e-06, - "loss": 0.4287, - "step": 10666 - }, - { - "epoch": 0.6971439775178093, - "grad_norm": 0.4581737816333771, - "learning_rate": 8.885407483393669e-06, - "loss": 0.3737, - "step": 10667 - }, - { - "epoch": 0.6972093327233514, - "grad_norm": 0.43531617522239685, - "learning_rate": 8.885187693731763e-06, - "loss": 0.3911, - "step": 10668 - }, - { - "epoch": 0.6972746879288936, - "grad_norm": 0.4650973081588745, - "learning_rate": 8.884967885120448e-06, - "loss": 0.388, - "step": 10669 - }, - { - "epoch": 0.6973400431344356, - "grad_norm": 1.0223784446716309, - "learning_rate": 8.884748057560801e-06, - "loss": 0.4184, - "step": 10670 - }, - { - "epoch": 0.6974053983399778, - "grad_norm": 0.47753241658210754, - "learning_rate": 8.884528211053891e-06, - "loss": 0.4055, - "step": 10671 - }, - { - "epoch": 0.6974707535455199, - "grad_norm": 0.4437274634838104, - "learning_rate": 8.884308345600792e-06, - "loss": 0.3933, - "step": 10672 - }, - { - "epoch": 0.697536108751062, - "grad_norm": 0.46149584650993347, - "learning_rate": 8.884088461202574e-06, - "loss": 0.3898, - "step": 10673 - }, - { - "epoch": 0.6976014639566042, - "grad_norm": 0.4530940353870392, - "learning_rate": 8.883868557860313e-06, - "loss": 0.4197, - "step": 10674 - }, - { - "epoch": 0.6976668191621462, - "grad_norm": 0.4554557502269745, - "learning_rate": 8.883648635575077e-06, - "loss": 0.3742, - "step": 10675 - }, - { - "epoch": 0.6977321743676884, - "grad_norm": 0.4334377348423004, - "learning_rate": 8.883428694347944e-06, - "loss": 0.3337, - "step": 10676 - }, - { - "epoch": 0.6977975295732305, - "grad_norm": 0.4683746099472046, - "learning_rate": 8.883208734179981e-06, - "loss": 0.4032, - "step": 10677 - }, - { - "epoch": 0.6978628847787727, - "grad_norm": 0.38879600167274475, - "learning_rate": 8.882988755072266e-06, - "loss": 0.3101, - "step": 10678 - }, - { - "epoch": 0.6979282399843147, - "grad_norm": 0.5270335674285889, - "learning_rate": 8.88276875702587e-06, - "loss": 0.4585, - "step": 10679 - }, - { - "epoch": 0.6979935951898568, - "grad_norm": 0.4615952968597412, - "learning_rate": 8.882548740041862e-06, - "loss": 0.3749, - "step": 10680 - }, - { - "epoch": 0.698058950395399, - "grad_norm": 0.43616750836372375, - "learning_rate": 8.882328704121324e-06, - "loss": 0.3485, - "step": 10681 - }, - { - "epoch": 0.6981243056009411, - "grad_norm": 0.44347694516181946, - "learning_rate": 8.882108649265319e-06, - "loss": 0.3613, - "step": 10682 - }, - { - "epoch": 0.6981896608064833, - "grad_norm": 0.4720376133918762, - "learning_rate": 8.881888575474929e-06, - "loss": 0.4117, - "step": 10683 - }, - { - "epoch": 0.6982550160120253, - "grad_norm": 0.43622103333473206, - "learning_rate": 8.881668482751221e-06, - "loss": 0.3714, - "step": 10684 - }, - { - "epoch": 0.6983203712175675, - "grad_norm": 0.41271963715553284, - "learning_rate": 8.881448371095272e-06, - "loss": 0.3646, - "step": 10685 - }, - { - "epoch": 0.6983857264231096, - "grad_norm": 0.4676041603088379, - "learning_rate": 8.881228240508155e-06, - "loss": 0.4016, - "step": 10686 - }, - { - "epoch": 0.6984510816286518, - "grad_norm": 0.4412485361099243, - "learning_rate": 8.881008090990944e-06, - "loss": 0.3905, - "step": 10687 - }, - { - "epoch": 0.6985164368341938, - "grad_norm": 0.459614098072052, - "learning_rate": 8.88078792254471e-06, - "loss": 0.3864, - "step": 10688 - }, - { - "epoch": 0.6985817920397359, - "grad_norm": 0.41537952423095703, - "learning_rate": 8.880567735170531e-06, - "loss": 0.3406, - "step": 10689 - }, - { - "epoch": 0.6986471472452781, - "grad_norm": 0.4189286231994629, - "learning_rate": 8.880347528869477e-06, - "loss": 0.3227, - "step": 10690 - }, - { - "epoch": 0.6987125024508202, - "grad_norm": 0.4343949258327484, - "learning_rate": 8.880127303642625e-06, - "loss": 0.371, - "step": 10691 - }, - { - "epoch": 0.6987778576563624, - "grad_norm": 0.43783894181251526, - "learning_rate": 8.879907059491048e-06, - "loss": 0.3517, - "step": 10692 - }, - { - "epoch": 0.6988432128619044, - "grad_norm": 0.4129463732242584, - "learning_rate": 8.879686796415818e-06, - "loss": 0.3378, - "step": 10693 - }, - { - "epoch": 0.6989085680674466, - "grad_norm": 0.45865529775619507, - "learning_rate": 8.879466514418014e-06, - "loss": 0.3631, - "step": 10694 - }, - { - "epoch": 0.6989739232729887, - "grad_norm": 0.4360487163066864, - "learning_rate": 8.879246213498707e-06, - "loss": 0.3628, - "step": 10695 - }, - { - "epoch": 0.6990392784785309, - "grad_norm": 0.46427857875823975, - "learning_rate": 8.879025893658973e-06, - "loss": 0.3751, - "step": 10696 - }, - { - "epoch": 0.6991046336840729, - "grad_norm": 0.5423113703727722, - "learning_rate": 8.878805554899885e-06, - "loss": 0.4671, - "step": 10697 - }, - { - "epoch": 0.699169988889615, - "grad_norm": 0.47656580805778503, - "learning_rate": 8.878585197222519e-06, - "loss": 0.4071, - "step": 10698 - }, - { - "epoch": 0.6992353440951572, - "grad_norm": 0.41309502720832825, - "learning_rate": 8.878364820627948e-06, - "loss": 0.3586, - "step": 10699 - }, - { - "epoch": 0.6993006993006993, - "grad_norm": 0.43951088190078735, - "learning_rate": 8.87814442511725e-06, - "loss": 0.3331, - "step": 10700 - }, - { - "epoch": 0.6993660545062415, - "grad_norm": 0.45432737469673157, - "learning_rate": 8.877924010691496e-06, - "loss": 0.3692, - "step": 10701 - }, - { - "epoch": 0.6994314097117835, - "grad_norm": 0.4661157727241516, - "learning_rate": 8.877703577351766e-06, - "loss": 0.4104, - "step": 10702 - }, - { - "epoch": 0.6994967649173257, - "grad_norm": 0.4366321861743927, - "learning_rate": 8.87748312509913e-06, - "loss": 0.369, - "step": 10703 - }, - { - "epoch": 0.6995621201228678, - "grad_norm": 0.45107367634773254, - "learning_rate": 8.877262653934667e-06, - "loss": 0.3703, - "step": 10704 - }, - { - "epoch": 0.6996274753284099, - "grad_norm": 0.41421547532081604, - "learning_rate": 8.87704216385945e-06, - "loss": 0.3259, - "step": 10705 - }, - { - "epoch": 0.699692830533952, - "grad_norm": 0.5069754719734192, - "learning_rate": 8.876821654874555e-06, - "loss": 0.4366, - "step": 10706 - }, - { - "epoch": 0.6997581857394941, - "grad_norm": 0.46444544196128845, - "learning_rate": 8.876601126981059e-06, - "loss": 0.4085, - "step": 10707 - }, - { - "epoch": 0.6998235409450363, - "grad_norm": 0.44071781635284424, - "learning_rate": 8.876380580180034e-06, - "loss": 0.3671, - "step": 10708 - }, - { - "epoch": 0.6998888961505784, - "grad_norm": 0.4483624994754791, - "learning_rate": 8.87616001447256e-06, - "loss": 0.4174, - "step": 10709 - }, - { - "epoch": 0.6999542513561205, - "grad_norm": 0.4800783693790436, - "learning_rate": 8.87593942985971e-06, - "loss": 0.3825, - "step": 10710 - }, - { - "epoch": 0.7000196065616626, - "grad_norm": 0.4607797861099243, - "learning_rate": 8.875718826342561e-06, - "loss": 0.3604, - "step": 10711 - }, - { - "epoch": 0.7000849617672048, - "grad_norm": 0.4988437294960022, - "learning_rate": 8.875498203922189e-06, - "loss": 0.4535, - "step": 10712 - }, - { - "epoch": 0.7001503169727469, - "grad_norm": 0.49454623460769653, - "learning_rate": 8.875277562599668e-06, - "loss": 0.4754, - "step": 10713 - }, - { - "epoch": 0.700215672178289, - "grad_norm": 0.4355059266090393, - "learning_rate": 8.875056902376076e-06, - "loss": 0.398, - "step": 10714 - }, - { - "epoch": 0.7002810273838311, - "grad_norm": 0.4247625470161438, - "learning_rate": 8.874836223252491e-06, - "loss": 0.3552, - "step": 10715 - }, - { - "epoch": 0.7003463825893732, - "grad_norm": 0.45553550124168396, - "learning_rate": 8.874615525229986e-06, - "loss": 0.4185, - "step": 10716 - }, - { - "epoch": 0.7004117377949154, - "grad_norm": 0.4440927505493164, - "learning_rate": 8.874394808309638e-06, - "loss": 0.3783, - "step": 10717 - }, - { - "epoch": 0.7004770930004575, - "grad_norm": 0.47648704051971436, - "learning_rate": 8.874174072492528e-06, - "loss": 0.4023, - "step": 10718 - }, - { - "epoch": 0.7005424482059996, - "grad_norm": 0.4460158944129944, - "learning_rate": 8.873953317779724e-06, - "loss": 0.3869, - "step": 10719 - }, - { - "epoch": 0.7006078034115417, - "grad_norm": 0.4699411690235138, - "learning_rate": 8.87373254417231e-06, - "loss": 0.4209, - "step": 10720 - }, - { - "epoch": 0.7006731586170839, - "grad_norm": 0.5043027997016907, - "learning_rate": 8.873511751671361e-06, - "loss": 0.3493, - "step": 10721 - }, - { - "epoch": 0.700738513822626, - "grad_norm": 0.46910256147384644, - "learning_rate": 8.873290940277952e-06, - "loss": 0.4264, - "step": 10722 - }, - { - "epoch": 0.700803869028168, - "grad_norm": 0.43721339106559753, - "learning_rate": 8.873070109993162e-06, - "loss": 0.351, - "step": 10723 - }, - { - "epoch": 0.7008692242337102, - "grad_norm": 0.45217421650886536, - "learning_rate": 8.872849260818066e-06, - "loss": 0.3642, - "step": 10724 - }, - { - "epoch": 0.7009345794392523, - "grad_norm": 0.42875149846076965, - "learning_rate": 8.872628392753747e-06, - "loss": 0.3758, - "step": 10725 - }, - { - "epoch": 0.7009999346447945, - "grad_norm": 0.45782139897346497, - "learning_rate": 8.872407505801273e-06, - "loss": 0.4035, - "step": 10726 - }, - { - "epoch": 0.7010652898503366, - "grad_norm": 0.45796889066696167, - "learning_rate": 8.872186599961727e-06, - "loss": 0.3799, - "step": 10727 - }, - { - "epoch": 0.7011306450558787, - "grad_norm": 0.4108940660953522, - "learning_rate": 8.871965675236185e-06, - "loss": 0.3738, - "step": 10728 - }, - { - "epoch": 0.7011960002614208, - "grad_norm": 0.46935567259788513, - "learning_rate": 8.871744731625727e-06, - "loss": 0.3723, - "step": 10729 - }, - { - "epoch": 0.701261355466963, - "grad_norm": 0.44690999388694763, - "learning_rate": 8.871523769131426e-06, - "loss": 0.393, - "step": 10730 - }, - { - "epoch": 0.7013267106725051, - "grad_norm": 0.46299657225608826, - "learning_rate": 8.871302787754364e-06, - "loss": 0.3847, - "step": 10731 - }, - { - "epoch": 0.7013920658780471, - "grad_norm": 0.44792985916137695, - "learning_rate": 8.871081787495617e-06, - "loss": 0.361, - "step": 10732 - }, - { - "epoch": 0.7014574210835893, - "grad_norm": 0.4198303520679474, - "learning_rate": 8.870860768356264e-06, - "loss": 0.3334, - "step": 10733 - }, - { - "epoch": 0.7015227762891314, - "grad_norm": 0.45148947834968567, - "learning_rate": 8.87063973033738e-06, - "loss": 0.3715, - "step": 10734 - }, - { - "epoch": 0.7015881314946736, - "grad_norm": 0.44864290952682495, - "learning_rate": 8.870418673440048e-06, - "loss": 0.3615, - "step": 10735 - }, - { - "epoch": 0.7016534867002157, - "grad_norm": 0.4621596932411194, - "learning_rate": 8.870197597665342e-06, - "loss": 0.4117, - "step": 10736 - }, - { - "epoch": 0.7017188419057578, - "grad_norm": 0.49969062209129333, - "learning_rate": 8.86997650301434e-06, - "loss": 0.4514, - "step": 10737 - }, - { - "epoch": 0.7017841971112999, - "grad_norm": 0.4572676420211792, - "learning_rate": 8.869755389488122e-06, - "loss": 0.3911, - "step": 10738 - }, - { - "epoch": 0.701849552316842, - "grad_norm": 0.4414505362510681, - "learning_rate": 8.86953425708777e-06, - "loss": 0.4044, - "step": 10739 - }, - { - "epoch": 0.7019149075223842, - "grad_norm": 0.4709620177745819, - "learning_rate": 8.869313105814355e-06, - "loss": 0.4538, - "step": 10740 - }, - { - "epoch": 0.7019802627279262, - "grad_norm": 0.4614220857620239, - "learning_rate": 8.86909193566896e-06, - "loss": 0.4106, - "step": 10741 - }, - { - "epoch": 0.7020456179334684, - "grad_norm": 0.4344942569732666, - "learning_rate": 8.868870746652664e-06, - "loss": 0.3755, - "step": 10742 - }, - { - "epoch": 0.7021109731390105, - "grad_norm": 0.44991180300712585, - "learning_rate": 8.868649538766545e-06, - "loss": 0.3968, - "step": 10743 - }, - { - "epoch": 0.7021763283445527, - "grad_norm": 0.5085800886154175, - "learning_rate": 8.868428312011684e-06, - "loss": 0.4734, - "step": 10744 - }, - { - "epoch": 0.7022416835500948, - "grad_norm": 0.4519469439983368, - "learning_rate": 8.868207066389153e-06, - "loss": 0.4017, - "step": 10745 - }, - { - "epoch": 0.7023070387556369, - "grad_norm": 0.4577236771583557, - "learning_rate": 8.867985801900041e-06, - "loss": 0.3542, - "step": 10746 - }, - { - "epoch": 0.702372393961179, - "grad_norm": 0.47221171855926514, - "learning_rate": 8.86776451854542e-06, - "loss": 0.4036, - "step": 10747 - }, - { - "epoch": 0.7024377491667211, - "grad_norm": 0.4412024915218353, - "learning_rate": 8.86754321632637e-06, - "loss": 0.3753, - "step": 10748 - }, - { - "epoch": 0.7025031043722633, - "grad_norm": 0.43691593408584595, - "learning_rate": 8.867321895243974e-06, - "loss": 0.328, - "step": 10749 - }, - { - "epoch": 0.7025684595778053, - "grad_norm": 0.47266703844070435, - "learning_rate": 8.86710055529931e-06, - "loss": 0.4345, - "step": 10750 - }, - { - "epoch": 0.7026338147833475, - "grad_norm": 0.43213972449302673, - "learning_rate": 8.866879196493457e-06, - "loss": 0.3555, - "step": 10751 - }, - { - "epoch": 0.7026991699888896, - "grad_norm": 0.4508489668369293, - "learning_rate": 8.866657818827494e-06, - "loss": 0.4101, - "step": 10752 - }, - { - "epoch": 0.7027645251944318, - "grad_norm": 0.4328119456768036, - "learning_rate": 8.8664364223025e-06, - "loss": 0.3716, - "step": 10753 - }, - { - "epoch": 0.7028298803999738, - "grad_norm": 0.45387157797813416, - "learning_rate": 8.866215006919557e-06, - "loss": 0.3587, - "step": 10754 - }, - { - "epoch": 0.702895235605516, - "grad_norm": 0.4766331613063812, - "learning_rate": 8.865993572679743e-06, - "loss": 0.4045, - "step": 10755 - }, - { - "epoch": 0.7029605908110581, - "grad_norm": 0.4131259620189667, - "learning_rate": 8.865772119584141e-06, - "loss": 0.3334, - "step": 10756 - }, - { - "epoch": 0.7030259460166002, - "grad_norm": 0.41277629137039185, - "learning_rate": 8.865550647633828e-06, - "loss": 0.3157, - "step": 10757 - }, - { - "epoch": 0.7030913012221424, - "grad_norm": 0.4751417636871338, - "learning_rate": 8.865329156829886e-06, - "loss": 0.3785, - "step": 10758 - }, - { - "epoch": 0.7031566564276844, - "grad_norm": 0.469399094581604, - "learning_rate": 8.865107647173392e-06, - "loss": 0.4298, - "step": 10759 - }, - { - "epoch": 0.7032220116332266, - "grad_norm": 0.4965677261352539, - "learning_rate": 8.864886118665432e-06, - "loss": 0.3875, - "step": 10760 - }, - { - "epoch": 0.7032873668387687, - "grad_norm": 0.4735341966152191, - "learning_rate": 8.864664571307082e-06, - "loss": 0.3821, - "step": 10761 - }, - { - "epoch": 0.7033527220443109, - "grad_norm": 0.4623195230960846, - "learning_rate": 8.864443005099425e-06, - "loss": 0.3912, - "step": 10762 - }, - { - "epoch": 0.703418077249853, - "grad_norm": 0.46555569767951965, - "learning_rate": 8.86422142004354e-06, - "loss": 0.4143, - "step": 10763 - }, - { - "epoch": 0.703483432455395, - "grad_norm": 0.49839991331100464, - "learning_rate": 8.86399981614051e-06, - "loss": 0.4137, - "step": 10764 - }, - { - "epoch": 0.7035487876609372, - "grad_norm": 0.5141803622245789, - "learning_rate": 8.863778193391413e-06, - "loss": 0.4206, - "step": 10765 - }, - { - "epoch": 0.7036141428664793, - "grad_norm": 0.45348691940307617, - "learning_rate": 8.86355655179733e-06, - "loss": 0.3676, - "step": 10766 - }, - { - "epoch": 0.7036794980720215, - "grad_norm": 0.4878885746002197, - "learning_rate": 8.863334891359345e-06, - "loss": 0.4447, - "step": 10767 - }, - { - "epoch": 0.7037448532775635, - "grad_norm": 0.44455888867378235, - "learning_rate": 8.863113212078536e-06, - "loss": 0.3908, - "step": 10768 - }, - { - "epoch": 0.7038102084831057, - "grad_norm": 0.47392991185188293, - "learning_rate": 8.862891513955987e-06, - "loss": 0.3833, - "step": 10769 - }, - { - "epoch": 0.7038755636886478, - "grad_norm": 0.4671472907066345, - "learning_rate": 8.862669796992776e-06, - "loss": 0.377, - "step": 10770 - }, - { - "epoch": 0.70394091889419, - "grad_norm": 0.4605567455291748, - "learning_rate": 8.862448061189988e-06, - "loss": 0.4077, - "step": 10771 - }, - { - "epoch": 0.704006274099732, - "grad_norm": 0.426021009683609, - "learning_rate": 8.862226306548702e-06, - "loss": 0.336, - "step": 10772 - }, - { - "epoch": 0.7040716293052741, - "grad_norm": 0.47108277678489685, - "learning_rate": 8.86200453307e-06, - "loss": 0.4316, - "step": 10773 - }, - { - "epoch": 0.7041369845108163, - "grad_norm": 0.45935899019241333, - "learning_rate": 8.861782740754966e-06, - "loss": 0.3574, - "step": 10774 - }, - { - "epoch": 0.7042023397163584, - "grad_norm": 0.4728596806526184, - "learning_rate": 8.861560929604677e-06, - "loss": 0.3752, - "step": 10775 - }, - { - "epoch": 0.7042676949219006, - "grad_norm": 0.4555656313896179, - "learning_rate": 8.861339099620219e-06, - "loss": 0.3752, - "step": 10776 - }, - { - "epoch": 0.7043330501274426, - "grad_norm": 0.4627569913864136, - "learning_rate": 8.861117250802672e-06, - "loss": 0.435, - "step": 10777 - }, - { - "epoch": 0.7043984053329848, - "grad_norm": 0.44748517870903015, - "learning_rate": 8.860895383153119e-06, - "loss": 0.366, - "step": 10778 - }, - { - "epoch": 0.7044637605385269, - "grad_norm": 0.42327216267585754, - "learning_rate": 8.860673496672642e-06, - "loss": 0.3302, - "step": 10779 - }, - { - "epoch": 0.7045291157440691, - "grad_norm": 0.4513353109359741, - "learning_rate": 8.860451591362322e-06, - "loss": 0.4067, - "step": 10780 - }, - { - "epoch": 0.7045944709496111, - "grad_norm": 0.41613486409187317, - "learning_rate": 8.860229667223243e-06, - "loss": 0.3562, - "step": 10781 - }, - { - "epoch": 0.7046598261551532, - "grad_norm": 0.5639072060585022, - "learning_rate": 8.860007724256484e-06, - "loss": 0.4195, - "step": 10782 - }, - { - "epoch": 0.7047251813606954, - "grad_norm": 0.45426589250564575, - "learning_rate": 8.859785762463133e-06, - "loss": 0.3876, - "step": 10783 - }, - { - "epoch": 0.7047905365662375, - "grad_norm": 0.4279865324497223, - "learning_rate": 8.85956378184427e-06, - "loss": 0.3242, - "step": 10784 - }, - { - "epoch": 0.7048558917717797, - "grad_norm": 0.4605855345726013, - "learning_rate": 8.859341782400976e-06, - "loss": 0.3947, - "step": 10785 - }, - { - "epoch": 0.7049212469773217, - "grad_norm": 0.4505729675292969, - "learning_rate": 8.859119764134336e-06, - "loss": 0.4176, - "step": 10786 - }, - { - "epoch": 0.7049866021828639, - "grad_norm": 0.47500374913215637, - "learning_rate": 8.85889772704543e-06, - "loss": 0.4212, - "step": 10787 - }, - { - "epoch": 0.705051957388406, - "grad_norm": 0.41787174344062805, - "learning_rate": 8.858675671135345e-06, - "loss": 0.339, - "step": 10788 - }, - { - "epoch": 0.705117312593948, - "grad_norm": 0.4474586546421051, - "learning_rate": 8.858453596405161e-06, - "loss": 0.3584, - "step": 10789 - }, - { - "epoch": 0.7051826677994902, - "grad_norm": 0.44654515385627747, - "learning_rate": 8.858231502855964e-06, - "loss": 0.4001, - "step": 10790 - }, - { - "epoch": 0.7052480230050323, - "grad_norm": 0.45959019660949707, - "learning_rate": 8.858009390488833e-06, - "loss": 0.3826, - "step": 10791 - }, - { - "epoch": 0.7053133782105745, - "grad_norm": 0.47081857919692993, - "learning_rate": 8.857787259304854e-06, - "loss": 0.3235, - "step": 10792 - }, - { - "epoch": 0.7053787334161166, - "grad_norm": 0.4579770267009735, - "learning_rate": 8.857565109305112e-06, - "loss": 0.338, - "step": 10793 - }, - { - "epoch": 0.7054440886216587, - "grad_norm": 0.4581635594367981, - "learning_rate": 8.857342940490686e-06, - "loss": 0.3985, - "step": 10794 - }, - { - "epoch": 0.7055094438272008, - "grad_norm": 0.4913211166858673, - "learning_rate": 8.857120752862662e-06, - "loss": 0.4794, - "step": 10795 - }, - { - "epoch": 0.705574799032743, - "grad_norm": 0.4294610917568207, - "learning_rate": 8.856898546422126e-06, - "loss": 0.3445, - "step": 10796 - }, - { - "epoch": 0.7056401542382851, - "grad_norm": 0.459158331155777, - "learning_rate": 8.856676321170159e-06, - "loss": 0.3882, - "step": 10797 - }, - { - "epoch": 0.7057055094438272, - "grad_norm": 0.44121789932250977, - "learning_rate": 8.856454077107845e-06, - "loss": 0.3523, - "step": 10798 - }, - { - "epoch": 0.7057708646493693, - "grad_norm": 0.4617336392402649, - "learning_rate": 8.856231814236268e-06, - "loss": 0.4026, - "step": 10799 - }, - { - "epoch": 0.7058362198549114, - "grad_norm": 0.46041619777679443, - "learning_rate": 8.856009532556513e-06, - "loss": 0.3947, - "step": 10800 - }, - { - "epoch": 0.7059015750604536, - "grad_norm": 0.42607614398002625, - "learning_rate": 8.855787232069664e-06, - "loss": 0.373, - "step": 10801 - }, - { - "epoch": 0.7059669302659957, - "grad_norm": 0.42943528294563293, - "learning_rate": 8.855564912776806e-06, - "loss": 0.3641, - "step": 10802 - }, - { - "epoch": 0.7060322854715378, - "grad_norm": 0.479065477848053, - "learning_rate": 8.85534257467902e-06, - "loss": 0.4223, - "step": 10803 - }, - { - "epoch": 0.7060976406770799, - "grad_norm": 0.43014097213745117, - "learning_rate": 8.855120217777393e-06, - "loss": 0.3647, - "step": 10804 - }, - { - "epoch": 0.7061629958826221, - "grad_norm": 0.43675094842910767, - "learning_rate": 8.854897842073011e-06, - "loss": 0.3742, - "step": 10805 - }, - { - "epoch": 0.7062283510881642, - "grad_norm": 0.46332669258117676, - "learning_rate": 8.854675447566954e-06, - "loss": 0.3773, - "step": 10806 - }, - { - "epoch": 0.7062937062937062, - "grad_norm": 0.43585094809532166, - "learning_rate": 8.854453034260312e-06, - "loss": 0.3562, - "step": 10807 - }, - { - "epoch": 0.7063590614992484, - "grad_norm": 0.4546062648296356, - "learning_rate": 8.854230602154166e-06, - "loss": 0.4249, - "step": 10808 - }, - { - "epoch": 0.7064244167047905, - "grad_norm": 0.42176294326782227, - "learning_rate": 8.854008151249602e-06, - "loss": 0.3444, - "step": 10809 - }, - { - "epoch": 0.7064897719103327, - "grad_norm": 0.4762844443321228, - "learning_rate": 8.853785681547706e-06, - "loss": 0.4243, - "step": 10810 - }, - { - "epoch": 0.7065551271158748, - "grad_norm": 0.4549350142478943, - "learning_rate": 8.85356319304956e-06, - "loss": 0.3679, - "step": 10811 - }, - { - "epoch": 0.7066204823214169, - "grad_norm": 0.44891974329948425, - "learning_rate": 8.853340685756254e-06, - "loss": 0.4075, - "step": 10812 - }, - { - "epoch": 0.706685837526959, - "grad_norm": 0.45329487323760986, - "learning_rate": 8.85311815966887e-06, - "loss": 0.3636, - "step": 10813 - }, - { - "epoch": 0.7067511927325012, - "grad_norm": 0.44027382135391235, - "learning_rate": 8.852895614788493e-06, - "loss": 0.3424, - "step": 10814 - }, - { - "epoch": 0.7068165479380433, - "grad_norm": 0.4336678683757782, - "learning_rate": 8.85267305111621e-06, - "loss": 0.3408, - "step": 10815 - }, - { - "epoch": 0.7068819031435853, - "grad_norm": 0.4381355345249176, - "learning_rate": 8.852450468653105e-06, - "loss": 0.4034, - "step": 10816 - }, - { - "epoch": 0.7069472583491275, - "grad_norm": 0.4556027948856354, - "learning_rate": 8.852227867400265e-06, - "loss": 0.4002, - "step": 10817 - }, - { - "epoch": 0.7070126135546696, - "grad_norm": 0.4505718946456909, - "learning_rate": 8.852005247358775e-06, - "loss": 0.3931, - "step": 10818 - }, - { - "epoch": 0.7070779687602118, - "grad_norm": 0.4421040117740631, - "learning_rate": 8.851782608529721e-06, - "loss": 0.3873, - "step": 10819 - }, - { - "epoch": 0.7071433239657539, - "grad_norm": 0.4468221068382263, - "learning_rate": 8.851559950914189e-06, - "loss": 0.4217, - "step": 10820 - }, - { - "epoch": 0.707208679171296, - "grad_norm": 0.4986788332462311, - "learning_rate": 8.851337274513265e-06, - "loss": 0.3857, - "step": 10821 - }, - { - "epoch": 0.7072740343768381, - "grad_norm": 0.4252099096775055, - "learning_rate": 8.851114579328034e-06, - "loss": 0.3546, - "step": 10822 - }, - { - "epoch": 0.7073393895823802, - "grad_norm": 0.4833270311355591, - "learning_rate": 8.850891865359583e-06, - "loss": 0.4502, - "step": 10823 - }, - { - "epoch": 0.7074047447879224, - "grad_norm": 0.42057788372039795, - "learning_rate": 8.850669132609e-06, - "loss": 0.3267, - "step": 10824 - }, - { - "epoch": 0.7074700999934644, - "grad_norm": 0.45054686069488525, - "learning_rate": 8.850446381077368e-06, - "loss": 0.4101, - "step": 10825 - }, - { - "epoch": 0.7075354551990066, - "grad_norm": 0.4439249634742737, - "learning_rate": 8.850223610765777e-06, - "loss": 0.3795, - "step": 10826 - }, - { - "epoch": 0.7076008104045487, - "grad_norm": 0.45448774099349976, - "learning_rate": 8.850000821675308e-06, - "loss": 0.3883, - "step": 10827 - }, - { - "epoch": 0.7076661656100909, - "grad_norm": 0.45995283126831055, - "learning_rate": 8.849778013807053e-06, - "loss": 0.396, - "step": 10828 - }, - { - "epoch": 0.707731520815633, - "grad_norm": 0.4646743834018707, - "learning_rate": 8.849555187162099e-06, - "loss": 0.4227, - "step": 10829 - }, - { - "epoch": 0.7077968760211751, - "grad_norm": 0.4693724811077118, - "learning_rate": 8.849332341741529e-06, - "loss": 0.406, - "step": 10830 - }, - { - "epoch": 0.7078622312267172, - "grad_norm": 0.4972003102302551, - "learning_rate": 8.849109477546431e-06, - "loss": 0.4384, - "step": 10831 - }, - { - "epoch": 0.7079275864322593, - "grad_norm": 0.44735318422317505, - "learning_rate": 8.848886594577894e-06, - "loss": 0.4097, - "step": 10832 - }, - { - "epoch": 0.7079929416378015, - "grad_norm": 0.41161873936653137, - "learning_rate": 8.848663692837002e-06, - "loss": 0.3219, - "step": 10833 - }, - { - "epoch": 0.7080582968433435, - "grad_norm": 0.4374428391456604, - "learning_rate": 8.848440772324846e-06, - "loss": 0.3727, - "step": 10834 - }, - { - "epoch": 0.7081236520488857, - "grad_norm": 0.45904046297073364, - "learning_rate": 8.84821783304251e-06, - "loss": 0.3954, - "step": 10835 - }, - { - "epoch": 0.7081890072544278, - "grad_norm": 0.4530632793903351, - "learning_rate": 8.847994874991082e-06, - "loss": 0.3746, - "step": 10836 - }, - { - "epoch": 0.70825436245997, - "grad_norm": 0.4787178635597229, - "learning_rate": 8.847771898171651e-06, - "loss": 0.3831, - "step": 10837 - }, - { - "epoch": 0.708319717665512, - "grad_norm": 0.4304639995098114, - "learning_rate": 8.847548902585304e-06, - "loss": 0.361, - "step": 10838 - }, - { - "epoch": 0.7083850728710542, - "grad_norm": 0.46742579340934753, - "learning_rate": 8.847325888233125e-06, - "loss": 0.3653, - "step": 10839 - }, - { - "epoch": 0.7084504280765963, - "grad_norm": 0.46168291568756104, - "learning_rate": 8.847102855116207e-06, - "loss": 0.3687, - "step": 10840 - }, - { - "epoch": 0.7085157832821384, - "grad_norm": 0.4948406517505646, - "learning_rate": 8.846879803235636e-06, - "loss": 0.4293, - "step": 10841 - }, - { - "epoch": 0.7085811384876806, - "grad_norm": 0.46802011132240295, - "learning_rate": 8.846656732592498e-06, - "loss": 0.4366, - "step": 10842 - }, - { - "epoch": 0.7086464936932226, - "grad_norm": 0.4439346492290497, - "learning_rate": 8.846433643187884e-06, - "loss": 0.3525, - "step": 10843 - }, - { - "epoch": 0.7087118488987648, - "grad_norm": 0.43533918261528015, - "learning_rate": 8.846210535022878e-06, - "loss": 0.348, - "step": 10844 - }, - { - "epoch": 0.7087772041043069, - "grad_norm": 0.43249449133872986, - "learning_rate": 8.845987408098574e-06, - "loss": 0.3338, - "step": 10845 - }, - { - "epoch": 0.7088425593098491, - "grad_norm": 0.4447890520095825, - "learning_rate": 8.845764262416056e-06, - "loss": 0.3561, - "step": 10846 - }, - { - "epoch": 0.7089079145153911, - "grad_norm": 0.46714460849761963, - "learning_rate": 8.845541097976414e-06, - "loss": 0.4183, - "step": 10847 - }, - { - "epoch": 0.7089732697209332, - "grad_norm": 0.4190499782562256, - "learning_rate": 8.845317914780734e-06, - "loss": 0.3403, - "step": 10848 - }, - { - "epoch": 0.7090386249264754, - "grad_norm": 0.45065832138061523, - "learning_rate": 8.845094712830107e-06, - "loss": 0.3823, - "step": 10849 - }, - { - "epoch": 0.7091039801320175, - "grad_norm": 0.4573337137699127, - "learning_rate": 8.844871492125622e-06, - "loss": 0.4012, - "step": 10850 - }, - { - "epoch": 0.7091693353375597, - "grad_norm": 0.4844611585140228, - "learning_rate": 8.844648252668366e-06, - "loss": 0.3647, - "step": 10851 - }, - { - "epoch": 0.7092346905431017, - "grad_norm": 0.41030028462409973, - "learning_rate": 8.844424994459428e-06, - "loss": 0.3293, - "step": 10852 - }, - { - "epoch": 0.7093000457486439, - "grad_norm": 0.4440174996852875, - "learning_rate": 8.8442017174999e-06, - "loss": 0.3452, - "step": 10853 - }, - { - "epoch": 0.709365400954186, - "grad_norm": 0.4625392258167267, - "learning_rate": 8.843978421790866e-06, - "loss": 0.4057, - "step": 10854 - }, - { - "epoch": 0.7094307561597282, - "grad_norm": 0.4622591733932495, - "learning_rate": 8.843755107333418e-06, - "loss": 0.4069, - "step": 10855 - }, - { - "epoch": 0.7094961113652702, - "grad_norm": 0.4473128616809845, - "learning_rate": 8.843531774128646e-06, - "loss": 0.371, - "step": 10856 - }, - { - "epoch": 0.7095614665708123, - "grad_norm": 0.43539655208587646, - "learning_rate": 8.843308422177637e-06, - "loss": 0.342, - "step": 10857 - }, - { - "epoch": 0.7096268217763545, - "grad_norm": 0.48266685009002686, - "learning_rate": 8.84308505148148e-06, - "loss": 0.3869, - "step": 10858 - }, - { - "epoch": 0.7096921769818966, - "grad_norm": 0.43436679244041443, - "learning_rate": 8.842861662041268e-06, - "loss": 0.3357, - "step": 10859 - }, - { - "epoch": 0.7097575321874388, - "grad_norm": 0.4314224421977997, - "learning_rate": 8.842638253858086e-06, - "loss": 0.3478, - "step": 10860 - }, - { - "epoch": 0.7098228873929808, - "grad_norm": 0.478385329246521, - "learning_rate": 8.842414826933028e-06, - "loss": 0.4209, - "step": 10861 - }, - { - "epoch": 0.709888242598523, - "grad_norm": 0.42900168895721436, - "learning_rate": 8.842191381267182e-06, - "loss": 0.3746, - "step": 10862 - }, - { - "epoch": 0.7099535978040651, - "grad_norm": 0.47261881828308105, - "learning_rate": 8.841967916861636e-06, - "loss": 0.41, - "step": 10863 - }, - { - "epoch": 0.7100189530096073, - "grad_norm": 0.45555639266967773, - "learning_rate": 8.841744433717484e-06, - "loss": 0.3696, - "step": 10864 - }, - { - "epoch": 0.7100843082151493, - "grad_norm": 0.41827529668807983, - "learning_rate": 8.841520931835812e-06, - "loss": 0.3498, - "step": 10865 - }, - { - "epoch": 0.7101496634206914, - "grad_norm": 0.4690840542316437, - "learning_rate": 8.84129741121771e-06, - "loss": 0.4432, - "step": 10866 - }, - { - "epoch": 0.7102150186262336, - "grad_norm": 0.45258966088294983, - "learning_rate": 8.841073871864272e-06, - "loss": 0.3969, - "step": 10867 - }, - { - "epoch": 0.7102803738317757, - "grad_norm": 0.4109076261520386, - "learning_rate": 8.840850313776586e-06, - "loss": 0.3324, - "step": 10868 - }, - { - "epoch": 0.7103457290373179, - "grad_norm": 0.430026113986969, - "learning_rate": 8.84062673695574e-06, - "loss": 0.3603, - "step": 10869 - }, - { - "epoch": 0.7104110842428599, - "grad_norm": 0.4078415036201477, - "learning_rate": 8.840403141402829e-06, - "loss": 0.3253, - "step": 10870 - }, - { - "epoch": 0.7104764394484021, - "grad_norm": 0.43680548667907715, - "learning_rate": 8.840179527118942e-06, - "loss": 0.3311, - "step": 10871 - }, - { - "epoch": 0.7105417946539442, - "grad_norm": 0.5105580687522888, - "learning_rate": 8.839955894105167e-06, - "loss": 0.4159, - "step": 10872 - }, - { - "epoch": 0.7106071498594863, - "grad_norm": 0.5044821500778198, - "learning_rate": 8.839732242362598e-06, - "loss": 0.4195, - "step": 10873 - }, - { - "epoch": 0.7106725050650284, - "grad_norm": 0.4528474807739258, - "learning_rate": 8.839508571892325e-06, - "loss": 0.3536, - "step": 10874 - }, - { - "epoch": 0.7107378602705705, - "grad_norm": 0.40835288166999817, - "learning_rate": 8.839284882695438e-06, - "loss": 0.3191, - "step": 10875 - }, - { - "epoch": 0.7108032154761127, - "grad_norm": 0.5332349538803101, - "learning_rate": 8.839061174773029e-06, - "loss": 0.4065, - "step": 10876 - }, - { - "epoch": 0.7108685706816548, - "grad_norm": 0.44312018156051636, - "learning_rate": 8.838837448126188e-06, - "loss": 0.3668, - "step": 10877 - }, - { - "epoch": 0.710933925887197, - "grad_norm": 0.48014044761657715, - "learning_rate": 8.838613702756007e-06, - "loss": 0.409, - "step": 10878 - }, - { - "epoch": 0.710999281092739, - "grad_norm": 0.46533316373825073, - "learning_rate": 8.838389938663577e-06, - "loss": 0.3773, - "step": 10879 - }, - { - "epoch": 0.7110646362982812, - "grad_norm": 0.42617470026016235, - "learning_rate": 8.83816615584999e-06, - "loss": 0.3422, - "step": 10880 - }, - { - "epoch": 0.7111299915038233, - "grad_norm": 0.42829635739326477, - "learning_rate": 8.837942354316339e-06, - "loss": 0.3169, - "step": 10881 - }, - { - "epoch": 0.7111953467093654, - "grad_norm": 0.4599711298942566, - "learning_rate": 8.83771853406371e-06, - "loss": 0.3753, - "step": 10882 - }, - { - "epoch": 0.7112607019149075, - "grad_norm": 0.4736286997795105, - "learning_rate": 8.837494695093199e-06, - "loss": 0.4014, - "step": 10883 - }, - { - "epoch": 0.7113260571204496, - "grad_norm": 0.4122152328491211, - "learning_rate": 8.837270837405898e-06, - "loss": 0.3421, - "step": 10884 - }, - { - "epoch": 0.7113914123259918, - "grad_norm": 0.4581123888492584, - "learning_rate": 8.837046961002897e-06, - "loss": 0.3795, - "step": 10885 - }, - { - "epoch": 0.7114567675315339, - "grad_norm": 0.45057132840156555, - "learning_rate": 8.83682306588529e-06, - "loss": 0.4064, - "step": 10886 - }, - { - "epoch": 0.711522122737076, - "grad_norm": 0.4579734802246094, - "learning_rate": 8.836599152054166e-06, - "loss": 0.3461, - "step": 10887 - }, - { - "epoch": 0.7115874779426181, - "grad_norm": 0.4159832000732422, - "learning_rate": 8.836375219510618e-06, - "loss": 0.3296, - "step": 10888 - }, - { - "epoch": 0.7116528331481603, - "grad_norm": 0.4370971620082855, - "learning_rate": 8.836151268255742e-06, - "loss": 0.3648, - "step": 10889 - }, - { - "epoch": 0.7117181883537024, - "grad_norm": 0.45852354168891907, - "learning_rate": 8.835927298290625e-06, - "loss": 0.4075, - "step": 10890 - }, - { - "epoch": 0.7117835435592444, - "grad_norm": 0.4565734565258026, - "learning_rate": 8.835703309616364e-06, - "loss": 0.3773, - "step": 10891 - }, - { - "epoch": 0.7118488987647866, - "grad_norm": 0.43849247694015503, - "learning_rate": 8.835479302234047e-06, - "loss": 0.3948, - "step": 10892 - }, - { - "epoch": 0.7119142539703287, - "grad_norm": 0.4963937997817993, - "learning_rate": 8.835255276144769e-06, - "loss": 0.4058, - "step": 10893 - }, - { - "epoch": 0.7119796091758709, - "grad_norm": 0.4483307898044586, - "learning_rate": 8.835031231349622e-06, - "loss": 0.3813, - "step": 10894 - }, - { - "epoch": 0.712044964381413, - "grad_norm": 0.4734781086444855, - "learning_rate": 8.8348071678497e-06, - "loss": 0.394, - "step": 10895 - }, - { - "epoch": 0.7121103195869551, - "grad_norm": 0.4316871464252472, - "learning_rate": 8.834583085646095e-06, - "loss": 0.3194, - "step": 10896 - }, - { - "epoch": 0.7121756747924972, - "grad_norm": 0.48908916115760803, - "learning_rate": 8.8343589847399e-06, - "loss": 0.4083, - "step": 10897 - }, - { - "epoch": 0.7122410299980394, - "grad_norm": 0.4344671368598938, - "learning_rate": 8.834134865132207e-06, - "loss": 0.3855, - "step": 10898 - }, - { - "epoch": 0.7123063852035815, - "grad_norm": 0.4473215341567993, - "learning_rate": 8.833910726824111e-06, - "loss": 0.3448, - "step": 10899 - }, - { - "epoch": 0.7123717404091235, - "grad_norm": 0.4628905653953552, - "learning_rate": 8.833686569816702e-06, - "loss": 0.4, - "step": 10900 - }, - { - "epoch": 0.7124370956146657, - "grad_norm": 0.48413145542144775, - "learning_rate": 8.833462394111078e-06, - "loss": 0.3781, - "step": 10901 - }, - { - "epoch": 0.7125024508202078, - "grad_norm": 0.45187827944755554, - "learning_rate": 8.83323819970833e-06, - "loss": 0.3468, - "step": 10902 - }, - { - "epoch": 0.71256780602575, - "grad_norm": 0.44939520955085754, - "learning_rate": 8.833013986609549e-06, - "loss": 0.3865, - "step": 10903 - }, - { - "epoch": 0.7126331612312921, - "grad_norm": 0.4265718460083008, - "learning_rate": 8.832789754815834e-06, - "loss": 0.3789, - "step": 10904 - }, - { - "epoch": 0.7126985164368342, - "grad_norm": 0.4289838373661041, - "learning_rate": 8.832565504328274e-06, - "loss": 0.351, - "step": 10905 - }, - { - "epoch": 0.7127638716423763, - "grad_norm": 0.46329349279403687, - "learning_rate": 8.832341235147963e-06, - "loss": 0.4032, - "step": 10906 - }, - { - "epoch": 0.7128292268479184, - "grad_norm": 0.48146799206733704, - "learning_rate": 8.832116947275997e-06, - "loss": 0.3834, - "step": 10907 - }, - { - "epoch": 0.7128945820534606, - "grad_norm": 0.45707768201828003, - "learning_rate": 8.831892640713469e-06, - "loss": 0.3914, - "step": 10908 - }, - { - "epoch": 0.7129599372590026, - "grad_norm": 0.48288625478744507, - "learning_rate": 8.831668315461475e-06, - "loss": 0.4414, - "step": 10909 - }, - { - "epoch": 0.7130252924645448, - "grad_norm": 0.44983968138694763, - "learning_rate": 8.831443971521106e-06, - "loss": 0.4292, - "step": 10910 - }, - { - "epoch": 0.7130906476700869, - "grad_norm": 0.44615569710731506, - "learning_rate": 8.831219608893456e-06, - "loss": 0.3608, - "step": 10911 - }, - { - "epoch": 0.7131560028756291, - "grad_norm": 0.46797874569892883, - "learning_rate": 8.830995227579622e-06, - "loss": 0.4129, - "step": 10912 - }, - { - "epoch": 0.7132213580811712, - "grad_norm": 0.4359447956085205, - "learning_rate": 8.830770827580697e-06, - "loss": 0.3618, - "step": 10913 - }, - { - "epoch": 0.7132867132867133, - "grad_norm": 0.44202476739883423, - "learning_rate": 8.830546408897775e-06, - "loss": 0.3496, - "step": 10914 - }, - { - "epoch": 0.7133520684922554, - "grad_norm": 0.4439420998096466, - "learning_rate": 8.830321971531952e-06, - "loss": 0.3795, - "step": 10915 - }, - { - "epoch": 0.7134174236977975, - "grad_norm": 0.47986286878585815, - "learning_rate": 8.830097515484322e-06, - "loss": 0.3911, - "step": 10916 - }, - { - "epoch": 0.7134827789033397, - "grad_norm": 0.6670001149177551, - "learning_rate": 8.829873040755979e-06, - "loss": 0.4094, - "step": 10917 - }, - { - "epoch": 0.7135481341088817, - "grad_norm": 0.4551219642162323, - "learning_rate": 8.829648547348017e-06, - "loss": 0.3808, - "step": 10918 - }, - { - "epoch": 0.7136134893144239, - "grad_norm": 0.4279273748397827, - "learning_rate": 8.829424035261534e-06, - "loss": 0.349, - "step": 10919 - }, - { - "epoch": 0.713678844519966, - "grad_norm": 0.4488070011138916, - "learning_rate": 8.829199504497623e-06, - "loss": 0.3762, - "step": 10920 - }, - { - "epoch": 0.7137441997255082, - "grad_norm": 0.46613001823425293, - "learning_rate": 8.828974955057378e-06, - "loss": 0.3977, - "step": 10921 - }, - { - "epoch": 0.7138095549310502, - "grad_norm": 0.5064405202865601, - "learning_rate": 8.828750386941898e-06, - "loss": 0.4216, - "step": 10922 - }, - { - "epoch": 0.7138749101365924, - "grad_norm": 0.4371580183506012, - "learning_rate": 8.828525800152276e-06, - "loss": 0.3806, - "step": 10923 - }, - { - "epoch": 0.7139402653421345, - "grad_norm": 0.4593411684036255, - "learning_rate": 8.828301194689606e-06, - "loss": 0.3727, - "step": 10924 - }, - { - "epoch": 0.7140056205476766, - "grad_norm": 0.4481455087661743, - "learning_rate": 8.828076570554985e-06, - "loss": 0.4087, - "step": 10925 - }, - { - "epoch": 0.7140709757532188, - "grad_norm": 0.44991838932037354, - "learning_rate": 8.827851927749509e-06, - "loss": 0.3683, - "step": 10926 - }, - { - "epoch": 0.7141363309587608, - "grad_norm": 0.43994560837745667, - "learning_rate": 8.827627266274272e-06, - "loss": 0.377, - "step": 10927 - }, - { - "epoch": 0.714201686164303, - "grad_norm": 0.4755774438381195, - "learning_rate": 8.827402586130372e-06, - "loss": 0.3778, - "step": 10928 - }, - { - "epoch": 0.7142670413698451, - "grad_norm": 0.4460085332393646, - "learning_rate": 8.827177887318903e-06, - "loss": 0.4226, - "step": 10929 - }, - { - "epoch": 0.7143323965753873, - "grad_norm": 0.464880108833313, - "learning_rate": 8.82695316984096e-06, - "loss": 0.4037, - "step": 10930 - }, - { - "epoch": 0.7143977517809293, - "grad_norm": 0.4421575963497162, - "learning_rate": 8.826728433697643e-06, - "loss": 0.3343, - "step": 10931 - }, - { - "epoch": 0.7144631069864714, - "grad_norm": 0.4808433949947357, - "learning_rate": 8.826503678890045e-06, - "loss": 0.3898, - "step": 10932 - }, - { - "epoch": 0.7145284621920136, - "grad_norm": 0.42213189601898193, - "learning_rate": 8.826278905419264e-06, - "loss": 0.3359, - "step": 10933 - }, - { - "epoch": 0.7145938173975557, - "grad_norm": 0.4662804901599884, - "learning_rate": 8.826054113286394e-06, - "loss": 0.4158, - "step": 10934 - }, - { - "epoch": 0.7146591726030979, - "grad_norm": 0.4736062288284302, - "learning_rate": 8.825829302492532e-06, - "loss": 0.4586, - "step": 10935 - }, - { - "epoch": 0.7147245278086399, - "grad_norm": 0.4594738483428955, - "learning_rate": 8.825604473038777e-06, - "loss": 0.4074, - "step": 10936 - }, - { - "epoch": 0.7147898830141821, - "grad_norm": 0.4078019857406616, - "learning_rate": 8.825379624926222e-06, - "loss": 0.3007, - "step": 10937 - }, - { - "epoch": 0.7148552382197242, - "grad_norm": 0.5145253539085388, - "learning_rate": 8.825154758155964e-06, - "loss": 0.3368, - "step": 10938 - }, - { - "epoch": 0.7149205934252664, - "grad_norm": 0.4444692134857178, - "learning_rate": 8.824929872729105e-06, - "loss": 0.3356, - "step": 10939 - }, - { - "epoch": 0.7149859486308084, - "grad_norm": 0.4701055586338043, - "learning_rate": 8.824704968646736e-06, - "loss": 0.4099, - "step": 10940 - }, - { - "epoch": 0.7150513038363505, - "grad_norm": 0.6250319480895996, - "learning_rate": 8.824480045909955e-06, - "loss": 0.3978, - "step": 10941 - }, - { - "epoch": 0.7151166590418927, - "grad_norm": 0.42224377393722534, - "learning_rate": 8.82425510451986e-06, - "loss": 0.3229, - "step": 10942 - }, - { - "epoch": 0.7151820142474348, - "grad_norm": 0.44007593393325806, - "learning_rate": 8.824030144477548e-06, - "loss": 0.3425, - "step": 10943 - }, - { - "epoch": 0.715247369452977, - "grad_norm": 0.48423925042152405, - "learning_rate": 8.823805165784118e-06, - "loss": 0.449, - "step": 10944 - }, - { - "epoch": 0.715312724658519, - "grad_norm": 0.49116432666778564, - "learning_rate": 8.823580168440664e-06, - "loss": 0.439, - "step": 10945 - }, - { - "epoch": 0.7153780798640612, - "grad_norm": 0.4677906334400177, - "learning_rate": 8.823355152448285e-06, - "loss": 0.4068, - "step": 10946 - }, - { - "epoch": 0.7154434350696033, - "grad_norm": 0.4655870199203491, - "learning_rate": 8.823130117808079e-06, - "loss": 0.4314, - "step": 10947 - }, - { - "epoch": 0.7155087902751455, - "grad_norm": 0.4523985981941223, - "learning_rate": 8.822905064521143e-06, - "loss": 0.3642, - "step": 10948 - }, - { - "epoch": 0.7155741454806875, - "grad_norm": 0.4735022187232971, - "learning_rate": 8.822679992588575e-06, - "loss": 0.3843, - "step": 10949 - }, - { - "epoch": 0.7156395006862296, - "grad_norm": 0.4337882399559021, - "learning_rate": 8.82245490201147e-06, - "loss": 0.3373, - "step": 10950 - }, - { - "epoch": 0.7157048558917718, - "grad_norm": 0.4603765308856964, - "learning_rate": 8.82222979279093e-06, - "loss": 0.4014, - "step": 10951 - }, - { - "epoch": 0.7157702110973139, - "grad_norm": 0.421307772397995, - "learning_rate": 8.82200466492805e-06, - "loss": 0.3556, - "step": 10952 - }, - { - "epoch": 0.715835566302856, - "grad_norm": 0.44478708505630493, - "learning_rate": 8.821779518423932e-06, - "loss": 0.3485, - "step": 10953 - }, - { - "epoch": 0.7159009215083981, - "grad_norm": 0.44603121280670166, - "learning_rate": 8.82155435327967e-06, - "loss": 0.381, - "step": 10954 - }, - { - "epoch": 0.7159662767139403, - "grad_norm": 0.43115556240081787, - "learning_rate": 8.821329169496362e-06, - "loss": 0.3724, - "step": 10955 - }, - { - "epoch": 0.7160316319194824, - "grad_norm": 0.4249560832977295, - "learning_rate": 8.821103967075108e-06, - "loss": 0.3255, - "step": 10956 - }, - { - "epoch": 0.7160969871250245, - "grad_norm": 0.4381152093410492, - "learning_rate": 8.820878746017008e-06, - "loss": 0.3402, - "step": 10957 - }, - { - "epoch": 0.7161623423305666, - "grad_norm": 0.46246418356895447, - "learning_rate": 8.820653506323156e-06, - "loss": 0.3846, - "step": 10958 - }, - { - "epoch": 0.7162276975361087, - "grad_norm": 0.49482643604278564, - "learning_rate": 8.820428247994656e-06, - "loss": 0.3852, - "step": 10959 - }, - { - "epoch": 0.7162930527416509, - "grad_norm": 0.4686237573623657, - "learning_rate": 8.820202971032604e-06, - "loss": 0.3347, - "step": 10960 - }, - { - "epoch": 0.716358407947193, - "grad_norm": 0.4484345316886902, - "learning_rate": 8.819977675438096e-06, - "loss": 0.3881, - "step": 10961 - }, - { - "epoch": 0.7164237631527351, - "grad_norm": 0.43936142325401306, - "learning_rate": 8.819752361212235e-06, - "loss": 0.3689, - "step": 10962 - }, - { - "epoch": 0.7164891183582772, - "grad_norm": 0.4533930718898773, - "learning_rate": 8.819527028356118e-06, - "loss": 0.3805, - "step": 10963 - }, - { - "epoch": 0.7165544735638194, - "grad_norm": 0.43360522389411926, - "learning_rate": 8.819301676870847e-06, - "loss": 0.3692, - "step": 10964 - }, - { - "epoch": 0.7166198287693615, - "grad_norm": 0.4650508165359497, - "learning_rate": 8.819076306757514e-06, - "loss": 0.3679, - "step": 10965 - }, - { - "epoch": 0.7166851839749036, - "grad_norm": 0.4472239315509796, - "learning_rate": 8.818850918017225e-06, - "loss": 0.3899, - "step": 10966 - }, - { - "epoch": 0.7167505391804457, - "grad_norm": 0.4909222424030304, - "learning_rate": 8.818625510651077e-06, - "loss": 0.397, - "step": 10967 - }, - { - "epoch": 0.7168158943859878, - "grad_norm": 0.4445730149745941, - "learning_rate": 8.81840008466017e-06, - "loss": 0.3811, - "step": 10968 - }, - { - "epoch": 0.71688124959153, - "grad_norm": 0.4273037612438202, - "learning_rate": 8.818174640045605e-06, - "loss": 0.3553, - "step": 10969 - }, - { - "epoch": 0.7169466047970721, - "grad_norm": 0.4292222261428833, - "learning_rate": 8.817949176808476e-06, - "loss": 0.3503, - "step": 10970 - }, - { - "epoch": 0.7170119600026142, - "grad_norm": 0.45991745591163635, - "learning_rate": 8.817723694949887e-06, - "loss": 0.3717, - "step": 10971 - }, - { - "epoch": 0.7170773152081563, - "grad_norm": 0.44813650846481323, - "learning_rate": 8.81749819447094e-06, - "loss": 0.3981, - "step": 10972 - }, - { - "epoch": 0.7171426704136985, - "grad_norm": 0.49451524019241333, - "learning_rate": 8.817272675372728e-06, - "loss": 0.4176, - "step": 10973 - }, - { - "epoch": 0.7172080256192406, - "grad_norm": 0.47646835446357727, - "learning_rate": 8.817047137656356e-06, - "loss": 0.426, - "step": 10974 - }, - { - "epoch": 0.7172733808247826, - "grad_norm": 0.4439930021762848, - "learning_rate": 8.816821581322922e-06, - "loss": 0.3521, - "step": 10975 - }, - { - "epoch": 0.7173387360303248, - "grad_norm": 0.43444332480430603, - "learning_rate": 8.816596006373529e-06, - "loss": 0.3492, - "step": 10976 - }, - { - "epoch": 0.7174040912358669, - "grad_norm": 0.4854743778705597, - "learning_rate": 8.816370412809273e-06, - "loss": 0.4523, - "step": 10977 - }, - { - "epoch": 0.7174694464414091, - "grad_norm": 0.4733608663082123, - "learning_rate": 8.816144800631256e-06, - "loss": 0.4003, - "step": 10978 - }, - { - "epoch": 0.7175348016469512, - "grad_norm": 0.43836793303489685, - "learning_rate": 8.81591916984058e-06, - "loss": 0.3709, - "step": 10979 - }, - { - "epoch": 0.7176001568524933, - "grad_norm": 0.45413318276405334, - "learning_rate": 8.815693520438346e-06, - "loss": 0.3645, - "step": 10980 - }, - { - "epoch": 0.7176655120580354, - "grad_norm": 0.45094919204711914, - "learning_rate": 8.81546785242565e-06, - "loss": 0.3656, - "step": 10981 - }, - { - "epoch": 0.7177308672635776, - "grad_norm": 0.5256023406982422, - "learning_rate": 8.815242165803597e-06, - "loss": 0.4817, - "step": 10982 - }, - { - "epoch": 0.7177962224691197, - "grad_norm": 0.4483894407749176, - "learning_rate": 8.815016460573284e-06, - "loss": 0.3846, - "step": 10983 - }, - { - "epoch": 0.7178615776746617, - "grad_norm": 0.5038370490074158, - "learning_rate": 8.814790736735817e-06, - "loss": 0.4298, - "step": 10984 - }, - { - "epoch": 0.7179269328802039, - "grad_norm": 0.44803082942962646, - "learning_rate": 8.814564994292293e-06, - "loss": 0.3324, - "step": 10985 - }, - { - "epoch": 0.717992288085746, - "grad_norm": 0.4388437867164612, - "learning_rate": 8.814339233243813e-06, - "loss": 0.3726, - "step": 10986 - }, - { - "epoch": 0.7180576432912882, - "grad_norm": 0.46978771686553955, - "learning_rate": 8.81411345359148e-06, - "loss": 0.4075, - "step": 10987 - }, - { - "epoch": 0.7181229984968303, - "grad_norm": 0.429244726896286, - "learning_rate": 8.813887655336394e-06, - "loss": 0.3628, - "step": 10988 - }, - { - "epoch": 0.7181883537023724, - "grad_norm": 0.43495580554008484, - "learning_rate": 8.813661838479658e-06, - "loss": 0.3758, - "step": 10989 - }, - { - "epoch": 0.7182537089079145, - "grad_norm": 0.48604434728622437, - "learning_rate": 8.81343600302237e-06, - "loss": 0.3737, - "step": 10990 - }, - { - "epoch": 0.7183190641134566, - "grad_norm": 0.4476868510246277, - "learning_rate": 8.813210148965634e-06, - "loss": 0.3622, - "step": 10991 - }, - { - "epoch": 0.7183844193189988, - "grad_norm": 0.45974016189575195, - "learning_rate": 8.812984276310551e-06, - "loss": 0.3763, - "step": 10992 - }, - { - "epoch": 0.7184497745245408, - "grad_norm": 0.428249329328537, - "learning_rate": 8.812758385058225e-06, - "loss": 0.3667, - "step": 10993 - }, - { - "epoch": 0.718515129730083, - "grad_norm": 0.43195784091949463, - "learning_rate": 8.812532475209754e-06, - "loss": 0.3286, - "step": 10994 - }, - { - "epoch": 0.7185804849356251, - "grad_norm": 0.450755774974823, - "learning_rate": 8.81230654676624e-06, - "loss": 0.3568, - "step": 10995 - }, - { - "epoch": 0.7186458401411673, - "grad_norm": 0.47830522060394287, - "learning_rate": 8.812080599728787e-06, - "loss": 0.3891, - "step": 10996 - }, - { - "epoch": 0.7187111953467094, - "grad_norm": 0.42130833864212036, - "learning_rate": 8.811854634098497e-06, - "loss": 0.3639, - "step": 10997 - }, - { - "epoch": 0.7187765505522515, - "grad_norm": 0.4895265996456146, - "learning_rate": 8.811628649876471e-06, - "loss": 0.4119, - "step": 10998 - }, - { - "epoch": 0.7188419057577936, - "grad_norm": 0.5014936923980713, - "learning_rate": 8.811402647063812e-06, - "loss": 0.3907, - "step": 10999 - }, - { - "epoch": 0.7189072609633357, - "grad_norm": 0.4365863502025604, - "learning_rate": 8.811176625661622e-06, - "loss": 0.3865, - "step": 11000 - }, - { - "epoch": 0.7189726161688779, - "grad_norm": 0.4857371747493744, - "learning_rate": 8.810950585671003e-06, - "loss": 0.4793, - "step": 11001 - }, - { - "epoch": 0.7190379713744199, - "grad_norm": 0.4356517791748047, - "learning_rate": 8.810724527093057e-06, - "loss": 0.3624, - "step": 11002 - }, - { - "epoch": 0.7191033265799621, - "grad_norm": 0.4523765444755554, - "learning_rate": 8.810498449928888e-06, - "loss": 0.4028, - "step": 11003 - }, - { - "epoch": 0.7191686817855042, - "grad_norm": 0.47722652554512024, - "learning_rate": 8.810272354179598e-06, - "loss": 0.4262, - "step": 11004 - }, - { - "epoch": 0.7192340369910464, - "grad_norm": 0.43598616123199463, - "learning_rate": 8.81004623984629e-06, - "loss": 0.3193, - "step": 11005 - }, - { - "epoch": 0.7192993921965884, - "grad_norm": 0.4548710286617279, - "learning_rate": 8.809820106930066e-06, - "loss": 0.3794, - "step": 11006 - }, - { - "epoch": 0.7193647474021306, - "grad_norm": 0.4733636677265167, - "learning_rate": 8.80959395543203e-06, - "loss": 0.4245, - "step": 11007 - }, - { - "epoch": 0.7194301026076727, - "grad_norm": 0.46189406514167786, - "learning_rate": 8.809367785353284e-06, - "loss": 0.4062, - "step": 11008 - }, - { - "epoch": 0.7194954578132148, - "grad_norm": 0.48950931429862976, - "learning_rate": 8.809141596694932e-06, - "loss": 0.4353, - "step": 11009 - }, - { - "epoch": 0.719560813018757, - "grad_norm": 0.45719993114471436, - "learning_rate": 8.808915389458076e-06, - "loss": 0.3878, - "step": 11010 - }, - { - "epoch": 0.719626168224299, - "grad_norm": 0.42908379435539246, - "learning_rate": 8.80868916364382e-06, - "loss": 0.3694, - "step": 11011 - }, - { - "epoch": 0.7196915234298412, - "grad_norm": 0.4180840849876404, - "learning_rate": 8.808462919253268e-06, - "loss": 0.3811, - "step": 11012 - }, - { - "epoch": 0.7197568786353833, - "grad_norm": 0.4631643295288086, - "learning_rate": 8.808236656287523e-06, - "loss": 0.3788, - "step": 11013 - }, - { - "epoch": 0.7198222338409255, - "grad_norm": 0.5063216090202332, - "learning_rate": 8.808010374747688e-06, - "loss": 0.4949, - "step": 11014 - }, - { - "epoch": 0.7198875890464675, - "grad_norm": 0.47948136925697327, - "learning_rate": 8.807784074634868e-06, - "loss": 0.4339, - "step": 11015 - }, - { - "epoch": 0.7199529442520096, - "grad_norm": 0.45391443371772766, - "learning_rate": 8.807557755950167e-06, - "loss": 0.3784, - "step": 11016 - }, - { - "epoch": 0.7200182994575518, - "grad_norm": 0.43957698345184326, - "learning_rate": 8.807331418694687e-06, - "loss": 0.3866, - "step": 11017 - }, - { - "epoch": 0.7200836546630939, - "grad_norm": 0.4413336217403412, - "learning_rate": 8.807105062869533e-06, - "loss": 0.3682, - "step": 11018 - }, - { - "epoch": 0.7201490098686361, - "grad_norm": 0.4598570466041565, - "learning_rate": 8.806878688475806e-06, - "loss": 0.4059, - "step": 11019 - }, - { - "epoch": 0.7202143650741781, - "grad_norm": 0.4452202320098877, - "learning_rate": 8.806652295514615e-06, - "loss": 0.3558, - "step": 11020 - }, - { - "epoch": 0.7202797202797203, - "grad_norm": 0.4020130932331085, - "learning_rate": 8.806425883987063e-06, - "loss": 0.2888, - "step": 11021 - }, - { - "epoch": 0.7203450754852624, - "grad_norm": 0.4242806136608124, - "learning_rate": 8.806199453894251e-06, - "loss": 0.3682, - "step": 11022 - }, - { - "epoch": 0.7204104306908046, - "grad_norm": 0.4291136562824249, - "learning_rate": 8.805973005237287e-06, - "loss": 0.3582, - "step": 11023 - }, - { - "epoch": 0.7204757858963466, - "grad_norm": 0.455759733915329, - "learning_rate": 8.805746538017275e-06, - "loss": 0.3962, - "step": 11024 - }, - { - "epoch": 0.7205411411018887, - "grad_norm": 0.4600485563278198, - "learning_rate": 8.805520052235316e-06, - "loss": 0.4046, - "step": 11025 - }, - { - "epoch": 0.7206064963074309, - "grad_norm": 0.45278605818748474, - "learning_rate": 8.80529354789252e-06, - "loss": 0.377, - "step": 11026 - }, - { - "epoch": 0.720671851512973, - "grad_norm": 0.4613986313343048, - "learning_rate": 8.805067024989989e-06, - "loss": 0.44, - "step": 11027 - }, - { - "epoch": 0.7207372067185152, - "grad_norm": 0.4480716586112976, - "learning_rate": 8.804840483528824e-06, - "loss": 0.3674, - "step": 11028 - }, - { - "epoch": 0.7208025619240572, - "grad_norm": 0.43719980120658875, - "learning_rate": 8.804613923510138e-06, - "loss": 0.3594, - "step": 11029 - }, - { - "epoch": 0.7208679171295994, - "grad_norm": 0.4525455832481384, - "learning_rate": 8.804387344935031e-06, - "loss": 0.3968, - "step": 11030 - }, - { - "epoch": 0.7209332723351415, - "grad_norm": 0.43577420711517334, - "learning_rate": 8.804160747804608e-06, - "loss": 0.3375, - "step": 11031 - }, - { - "epoch": 0.7209986275406837, - "grad_norm": 0.4039052128791809, - "learning_rate": 8.803934132119976e-06, - "loss": 0.3682, - "step": 11032 - }, - { - "epoch": 0.7210639827462257, - "grad_norm": 0.4610409736633301, - "learning_rate": 8.803707497882239e-06, - "loss": 0.3992, - "step": 11033 - }, - { - "epoch": 0.7211293379517678, - "grad_norm": 0.5072036981582642, - "learning_rate": 8.803480845092503e-06, - "loss": 0.4154, - "step": 11034 - }, - { - "epoch": 0.72119469315731, - "grad_norm": 0.44230931997299194, - "learning_rate": 8.803254173751874e-06, - "loss": 0.3602, - "step": 11035 - }, - { - "epoch": 0.7212600483628521, - "grad_norm": 0.43132802844047546, - "learning_rate": 8.803027483861455e-06, - "loss": 0.3406, - "step": 11036 - }, - { - "epoch": 0.7213254035683943, - "grad_norm": 0.4197312593460083, - "learning_rate": 8.802800775422354e-06, - "loss": 0.3151, - "step": 11037 - }, - { - "epoch": 0.7213907587739363, - "grad_norm": 0.43612998723983765, - "learning_rate": 8.802574048435677e-06, - "loss": 0.3549, - "step": 11038 - }, - { - "epoch": 0.7214561139794785, - "grad_norm": 0.450834721326828, - "learning_rate": 8.802347302902528e-06, - "loss": 0.3917, - "step": 11039 - }, - { - "epoch": 0.7215214691850206, - "grad_norm": 0.44693103432655334, - "learning_rate": 8.802120538824015e-06, - "loss": 0.3489, - "step": 11040 - }, - { - "epoch": 0.7215868243905627, - "grad_norm": 0.43119072914123535, - "learning_rate": 8.801893756201242e-06, - "loss": 0.3489, - "step": 11041 - }, - { - "epoch": 0.7216521795961048, - "grad_norm": 0.4801838994026184, - "learning_rate": 8.801666955035317e-06, - "loss": 0.352, - "step": 11042 - }, - { - "epoch": 0.7217175348016469, - "grad_norm": 0.42381563782691956, - "learning_rate": 8.801440135327347e-06, - "loss": 0.3085, - "step": 11043 - }, - { - "epoch": 0.7217828900071891, - "grad_norm": 0.4279800355434418, - "learning_rate": 8.801213297078433e-06, - "loss": 0.2879, - "step": 11044 - }, - { - "epoch": 0.7218482452127312, - "grad_norm": 0.4466213285923004, - "learning_rate": 8.800986440289685e-06, - "loss": 0.4095, - "step": 11045 - }, - { - "epoch": 0.7219136004182733, - "grad_norm": 0.43376627564430237, - "learning_rate": 8.80075956496221e-06, - "loss": 0.3627, - "step": 11046 - }, - { - "epoch": 0.7219789556238154, - "grad_norm": 0.43545806407928467, - "learning_rate": 8.800532671097117e-06, - "loss": 0.3329, - "step": 11047 - }, - { - "epoch": 0.7220443108293576, - "grad_norm": 0.4581034481525421, - "learning_rate": 8.800305758695507e-06, - "loss": 0.395, - "step": 11048 - }, - { - "epoch": 0.7221096660348997, - "grad_norm": 0.44845613837242126, - "learning_rate": 8.80007882775849e-06, - "loss": 0.4128, - "step": 11049 - }, - { - "epoch": 0.7221750212404418, - "grad_norm": 0.4453403949737549, - "learning_rate": 8.79985187828717e-06, - "loss": 0.3796, - "step": 11050 - }, - { - "epoch": 0.7222403764459839, - "grad_norm": 0.42938685417175293, - "learning_rate": 8.799624910282658e-06, - "loss": 0.3521, - "step": 11051 - }, - { - "epoch": 0.722305731651526, - "grad_norm": 0.46365949511528015, - "learning_rate": 8.799397923746057e-06, - "loss": 0.4298, - "step": 11052 - }, - { - "epoch": 0.7223710868570682, - "grad_norm": 0.420899897813797, - "learning_rate": 8.799170918678479e-06, - "loss": 0.3483, - "step": 11053 - }, - { - "epoch": 0.7224364420626103, - "grad_norm": 0.4637686312198639, - "learning_rate": 8.798943895081026e-06, - "loss": 0.425, - "step": 11054 - }, - { - "epoch": 0.7225017972681524, - "grad_norm": 0.4429585635662079, - "learning_rate": 8.798716852954807e-06, - "loss": 0.3456, - "step": 11055 - }, - { - "epoch": 0.7225671524736945, - "grad_norm": 0.45941469073295593, - "learning_rate": 8.798489792300933e-06, - "loss": 0.3958, - "step": 11056 - }, - { - "epoch": 0.7226325076792367, - "grad_norm": 0.48170894384384155, - "learning_rate": 8.798262713120506e-06, - "loss": 0.4519, - "step": 11057 - }, - { - "epoch": 0.7226978628847788, - "grad_norm": 0.4276023209095001, - "learning_rate": 8.798035615414636e-06, - "loss": 0.3398, - "step": 11058 - }, - { - "epoch": 0.7227632180903208, - "grad_norm": 0.4634799361228943, - "learning_rate": 8.797808499184432e-06, - "loss": 0.3692, - "step": 11059 - }, - { - "epoch": 0.722828573295863, - "grad_norm": 0.4455621540546417, - "learning_rate": 8.797581364430997e-06, - "loss": 0.356, - "step": 11060 - }, - { - "epoch": 0.7228939285014051, - "grad_norm": 0.4130583107471466, - "learning_rate": 8.797354211155445e-06, - "loss": 0.3258, - "step": 11061 - }, - { - "epoch": 0.7229592837069473, - "grad_norm": 0.4679669737815857, - "learning_rate": 8.797127039358881e-06, - "loss": 0.3873, - "step": 11062 - }, - { - "epoch": 0.7230246389124894, - "grad_norm": 0.4282200038433075, - "learning_rate": 8.796899849042414e-06, - "loss": 0.3655, - "step": 11063 - }, - { - "epoch": 0.7230899941180315, - "grad_norm": 0.4634385108947754, - "learning_rate": 8.796672640207148e-06, - "loss": 0.3765, - "step": 11064 - }, - { - "epoch": 0.7231553493235736, - "grad_norm": 0.4974842667579651, - "learning_rate": 8.796445412854195e-06, - "loss": 0.4121, - "step": 11065 - }, - { - "epoch": 0.7232207045291158, - "grad_norm": 0.42820289731025696, - "learning_rate": 8.796218166984663e-06, - "loss": 0.3593, - "step": 11066 - }, - { - "epoch": 0.7232860597346579, - "grad_norm": 0.42884114384651184, - "learning_rate": 8.79599090259966e-06, - "loss": 0.3554, - "step": 11067 - }, - { - "epoch": 0.7233514149401999, - "grad_norm": 0.46795573830604553, - "learning_rate": 8.795763619700295e-06, - "loss": 0.409, - "step": 11068 - }, - { - "epoch": 0.7234167701457421, - "grad_norm": 0.48327428102493286, - "learning_rate": 8.795536318287674e-06, - "loss": 0.4499, - "step": 11069 - }, - { - "epoch": 0.7234821253512842, - "grad_norm": 0.4357799291610718, - "learning_rate": 8.795308998362909e-06, - "loss": 0.3592, - "step": 11070 - }, - { - "epoch": 0.7235474805568264, - "grad_norm": 0.45475080609321594, - "learning_rate": 8.795081659927108e-06, - "loss": 0.3872, - "step": 11071 - }, - { - "epoch": 0.7236128357623685, - "grad_norm": 0.43749937415122986, - "learning_rate": 8.794854302981376e-06, - "loss": 0.3966, - "step": 11072 - }, - { - "epoch": 0.7236781909679106, - "grad_norm": 0.4310551881790161, - "learning_rate": 8.794626927526829e-06, - "loss": 0.3256, - "step": 11073 - }, - { - "epoch": 0.7237435461734527, - "grad_norm": 0.42545050382614136, - "learning_rate": 8.794399533564569e-06, - "loss": 0.3265, - "step": 11074 - }, - { - "epoch": 0.7238089013789948, - "grad_norm": 0.48232147097587585, - "learning_rate": 8.794172121095708e-06, - "loss": 0.4247, - "step": 11075 - }, - { - "epoch": 0.723874256584537, - "grad_norm": 0.4450146555900574, - "learning_rate": 8.793944690121355e-06, - "loss": 0.3738, - "step": 11076 - }, - { - "epoch": 0.723939611790079, - "grad_norm": 0.4868414103984833, - "learning_rate": 8.793717240642621e-06, - "loss": 0.4165, - "step": 11077 - }, - { - "epoch": 0.7240049669956212, - "grad_norm": 0.4174163341522217, - "learning_rate": 8.793489772660613e-06, - "loss": 0.2987, - "step": 11078 - }, - { - "epoch": 0.7240703222011633, - "grad_norm": 0.4786052703857422, - "learning_rate": 8.793262286176441e-06, - "loss": 0.376, - "step": 11079 - }, - { - "epoch": 0.7241356774067055, - "grad_norm": 0.4679298996925354, - "learning_rate": 8.793034781191215e-06, - "loss": 0.3844, - "step": 11080 - }, - { - "epoch": 0.7242010326122476, - "grad_norm": 0.42517128586769104, - "learning_rate": 8.792807257706043e-06, - "loss": 0.3547, - "step": 11081 - }, - { - "epoch": 0.7242663878177897, - "grad_norm": 0.4264257848262787, - "learning_rate": 8.792579715722038e-06, - "loss": 0.3504, - "step": 11082 - }, - { - "epoch": 0.7243317430233318, - "grad_norm": 0.44104328751564026, - "learning_rate": 8.792352155240307e-06, - "loss": 0.3861, - "step": 11083 - }, - { - "epoch": 0.7243970982288739, - "grad_norm": 0.46385055780410767, - "learning_rate": 8.79212457626196e-06, - "loss": 0.387, - "step": 11084 - }, - { - "epoch": 0.7244624534344161, - "grad_norm": 0.4661731421947479, - "learning_rate": 8.791896978788108e-06, - "loss": 0.3996, - "step": 11085 - }, - { - "epoch": 0.7245278086399581, - "grad_norm": 0.4316199719905853, - "learning_rate": 8.79166936281986e-06, - "loss": 0.3463, - "step": 11086 - }, - { - "epoch": 0.7245931638455003, - "grad_norm": 0.43848204612731934, - "learning_rate": 8.79144172835833e-06, - "loss": 0.3674, - "step": 11087 - }, - { - "epoch": 0.7246585190510424, - "grad_norm": 0.4132622182369232, - "learning_rate": 8.791214075404623e-06, - "loss": 0.377, - "step": 11088 - }, - { - "epoch": 0.7247238742565846, - "grad_norm": 0.44539204239845276, - "learning_rate": 8.790986403959851e-06, - "loss": 0.3695, - "step": 11089 - }, - { - "epoch": 0.7247892294621266, - "grad_norm": 0.4502166211605072, - "learning_rate": 8.790758714025128e-06, - "loss": 0.3776, - "step": 11090 - }, - { - "epoch": 0.7248545846676688, - "grad_norm": 0.4646752178668976, - "learning_rate": 8.790531005601559e-06, - "loss": 0.4114, - "step": 11091 - }, - { - "epoch": 0.7249199398732109, - "grad_norm": 0.46941882371902466, - "learning_rate": 8.790303278690258e-06, - "loss": 0.4163, - "step": 11092 - }, - { - "epoch": 0.724985295078753, - "grad_norm": 0.45404377579689026, - "learning_rate": 8.790075533292332e-06, - "loss": 0.402, - "step": 11093 - }, - { - "epoch": 0.7250506502842952, - "grad_norm": 0.5291372537612915, - "learning_rate": 8.789847769408898e-06, - "loss": 0.4224, - "step": 11094 - }, - { - "epoch": 0.7251160054898372, - "grad_norm": 0.43661803007125854, - "learning_rate": 8.789619987041063e-06, - "loss": 0.3662, - "step": 11095 - }, - { - "epoch": 0.7251813606953794, - "grad_norm": 0.43203383684158325, - "learning_rate": 8.789392186189938e-06, - "loss": 0.3522, - "step": 11096 - }, - { - "epoch": 0.7252467159009215, - "grad_norm": 0.45868775248527527, - "learning_rate": 8.789164366856634e-06, - "loss": 0.3845, - "step": 11097 - }, - { - "epoch": 0.7253120711064637, - "grad_norm": 0.445512980222702, - "learning_rate": 8.788936529042264e-06, - "loss": 0.349, - "step": 11098 - }, - { - "epoch": 0.7253774263120057, - "grad_norm": 0.41102826595306396, - "learning_rate": 8.788708672747937e-06, - "loss": 0.3229, - "step": 11099 - }, - { - "epoch": 0.7254427815175478, - "grad_norm": 0.47712117433547974, - "learning_rate": 8.788480797974765e-06, - "loss": 0.383, - "step": 11100 - }, - { - "epoch": 0.72550813672309, - "grad_norm": 0.4242284297943115, - "learning_rate": 8.78825290472386e-06, - "loss": 0.3781, - "step": 11101 - }, - { - "epoch": 0.7255734919286321, - "grad_norm": 0.47264206409454346, - "learning_rate": 8.788024992996333e-06, - "loss": 0.4067, - "step": 11102 - }, - { - "epoch": 0.7256388471341743, - "grad_norm": 0.44981124997138977, - "learning_rate": 8.787797062793298e-06, - "loss": 0.3509, - "step": 11103 - }, - { - "epoch": 0.7257042023397163, - "grad_norm": 0.47995010018348694, - "learning_rate": 8.787569114115862e-06, - "loss": 0.4037, - "step": 11104 - }, - { - "epoch": 0.7257695575452585, - "grad_norm": 0.433910995721817, - "learning_rate": 8.78734114696514e-06, - "loss": 0.3509, - "step": 11105 - }, - { - "epoch": 0.7258349127508006, - "grad_norm": 0.44411203265190125, - "learning_rate": 8.787113161342243e-06, - "loss": 0.3583, - "step": 11106 - }, - { - "epoch": 0.7259002679563428, - "grad_norm": 0.45076310634613037, - "learning_rate": 8.786885157248281e-06, - "loss": 0.4022, - "step": 11107 - }, - { - "epoch": 0.7259656231618848, - "grad_norm": 0.48805731534957886, - "learning_rate": 8.78665713468437e-06, - "loss": 0.4192, - "step": 11108 - }, - { - "epoch": 0.7260309783674269, - "grad_norm": 0.4489264190196991, - "learning_rate": 8.786429093651622e-06, - "loss": 0.3725, - "step": 11109 - }, - { - "epoch": 0.7260963335729691, - "grad_norm": 0.46248388290405273, - "learning_rate": 8.786201034151147e-06, - "loss": 0.4336, - "step": 11110 - }, - { - "epoch": 0.7261616887785112, - "grad_norm": 0.45819878578186035, - "learning_rate": 8.785972956184056e-06, - "loss": 0.4106, - "step": 11111 - }, - { - "epoch": 0.7262270439840534, - "grad_norm": 0.4444325566291809, - "learning_rate": 8.785744859751465e-06, - "loss": 0.3858, - "step": 11112 - }, - { - "epoch": 0.7262923991895954, - "grad_norm": 0.4306245744228363, - "learning_rate": 8.785516744854485e-06, - "loss": 0.321, - "step": 11113 - }, - { - "epoch": 0.7263577543951376, - "grad_norm": 0.44880321621894836, - "learning_rate": 8.785288611494227e-06, - "loss": 0.3481, - "step": 11114 - }, - { - "epoch": 0.7264231096006797, - "grad_norm": 0.44093531370162964, - "learning_rate": 8.785060459671806e-06, - "loss": 0.3624, - "step": 11115 - }, - { - "epoch": 0.7264884648062219, - "grad_norm": 0.43183866143226624, - "learning_rate": 8.784832289388334e-06, - "loss": 0.356, - "step": 11116 - }, - { - "epoch": 0.7265538200117639, - "grad_norm": 0.4692407548427582, - "learning_rate": 8.784604100644922e-06, - "loss": 0.3844, - "step": 11117 - }, - { - "epoch": 0.726619175217306, - "grad_norm": 0.420350044965744, - "learning_rate": 8.784375893442687e-06, - "loss": 0.3425, - "step": 11118 - }, - { - "epoch": 0.7266845304228482, - "grad_norm": 0.44382867217063904, - "learning_rate": 8.784147667782739e-06, - "loss": 0.3661, - "step": 11119 - }, - { - "epoch": 0.7267498856283903, - "grad_norm": 0.45902663469314575, - "learning_rate": 8.783919423666191e-06, - "loss": 0.381, - "step": 11120 - }, - { - "epoch": 0.7268152408339325, - "grad_norm": 0.42323535680770874, - "learning_rate": 8.78369116109416e-06, - "loss": 0.3179, - "step": 11121 - }, - { - "epoch": 0.7268805960394745, - "grad_norm": 0.43687260150909424, - "learning_rate": 8.783462880067753e-06, - "loss": 0.386, - "step": 11122 - }, - { - "epoch": 0.7269459512450167, - "grad_norm": 0.4666963815689087, - "learning_rate": 8.783234580588089e-06, - "loss": 0.4866, - "step": 11123 - }, - { - "epoch": 0.7270113064505588, - "grad_norm": 0.4805375635623932, - "learning_rate": 8.783006262656277e-06, - "loss": 0.3845, - "step": 11124 - }, - { - "epoch": 0.7270766616561009, - "grad_norm": 0.444644033908844, - "learning_rate": 8.782777926273434e-06, - "loss": 0.4082, - "step": 11125 - }, - { - "epoch": 0.727142016861643, - "grad_norm": 0.4642644226551056, - "learning_rate": 8.782549571440673e-06, - "loss": 0.4384, - "step": 11126 - }, - { - "epoch": 0.7272073720671851, - "grad_norm": 0.4263255298137665, - "learning_rate": 8.782321198159107e-06, - "loss": 0.3425, - "step": 11127 - }, - { - "epoch": 0.7272727272727273, - "grad_norm": 0.41924771666526794, - "learning_rate": 8.78209280642985e-06, - "loss": 0.3759, - "step": 11128 - }, - { - "epoch": 0.7273380824782694, - "grad_norm": 0.46761152148246765, - "learning_rate": 8.781864396254016e-06, - "loss": 0.3966, - "step": 11129 - }, - { - "epoch": 0.7274034376838115, - "grad_norm": 0.4359188377857208, - "learning_rate": 8.78163596763272e-06, - "loss": 0.3722, - "step": 11130 - }, - { - "epoch": 0.7274687928893536, - "grad_norm": 0.4260074496269226, - "learning_rate": 8.781407520567076e-06, - "loss": 0.3709, - "step": 11131 - }, - { - "epoch": 0.7275341480948958, - "grad_norm": 0.43425846099853516, - "learning_rate": 8.781179055058196e-06, - "loss": 0.3514, - "step": 11132 - }, - { - "epoch": 0.7275995033004379, - "grad_norm": 0.45331141352653503, - "learning_rate": 8.780950571107197e-06, - "loss": 0.3464, - "step": 11133 - }, - { - "epoch": 0.72766485850598, - "grad_norm": 0.45970654487609863, - "learning_rate": 8.780722068715191e-06, - "loss": 0.3853, - "step": 11134 - }, - { - "epoch": 0.7277302137115221, - "grad_norm": 0.46538349986076355, - "learning_rate": 8.780493547883293e-06, - "loss": 0.4237, - "step": 11135 - }, - { - "epoch": 0.7277955689170642, - "grad_norm": 0.5452269911766052, - "learning_rate": 8.780265008612621e-06, - "loss": 0.3933, - "step": 11136 - }, - { - "epoch": 0.7278609241226064, - "grad_norm": 0.42780470848083496, - "learning_rate": 8.780036450904285e-06, - "loss": 0.3543, - "step": 11137 - }, - { - "epoch": 0.7279262793281485, - "grad_norm": 0.4398547410964966, - "learning_rate": 8.779807874759403e-06, - "loss": 0.3589, - "step": 11138 - }, - { - "epoch": 0.7279916345336906, - "grad_norm": 0.43431010842323303, - "learning_rate": 8.779579280179087e-06, - "loss": 0.3569, - "step": 11139 - }, - { - "epoch": 0.7280569897392327, - "grad_norm": 0.4678983986377716, - "learning_rate": 8.779350667164453e-06, - "loss": 0.4278, - "step": 11140 - }, - { - "epoch": 0.7281223449447749, - "grad_norm": 0.41606494784355164, - "learning_rate": 8.779122035716619e-06, - "loss": 0.3293, - "step": 11141 - }, - { - "epoch": 0.728187700150317, - "grad_norm": 0.437587708234787, - "learning_rate": 8.778893385836695e-06, - "loss": 0.3377, - "step": 11142 - }, - { - "epoch": 0.728253055355859, - "grad_norm": 0.4146372973918915, - "learning_rate": 8.778664717525802e-06, - "loss": 0.3291, - "step": 11143 - }, - { - "epoch": 0.7283184105614012, - "grad_norm": 0.4122779965400696, - "learning_rate": 8.778436030785049e-06, - "loss": 0.3035, - "step": 11144 - }, - { - "epoch": 0.7283837657669433, - "grad_norm": 0.4226057231426239, - "learning_rate": 8.778207325615556e-06, - "loss": 0.3606, - "step": 11145 - }, - { - "epoch": 0.7284491209724855, - "grad_norm": 0.46493902802467346, - "learning_rate": 8.777978602018436e-06, - "loss": 0.4174, - "step": 11146 - }, - { - "epoch": 0.7285144761780276, - "grad_norm": 0.4412344992160797, - "learning_rate": 8.777749859994806e-06, - "loss": 0.3742, - "step": 11147 - }, - { - "epoch": 0.7285798313835697, - "grad_norm": 0.4301016330718994, - "learning_rate": 8.777521099545783e-06, - "loss": 0.3946, - "step": 11148 - }, - { - "epoch": 0.7286451865891118, - "grad_norm": 0.43630465865135193, - "learning_rate": 8.777292320672479e-06, - "loss": 0.3496, - "step": 11149 - }, - { - "epoch": 0.728710541794654, - "grad_norm": 0.46967384219169617, - "learning_rate": 8.777063523376012e-06, - "loss": 0.4097, - "step": 11150 - }, - { - "epoch": 0.7287758970001961, - "grad_norm": 0.46150636672973633, - "learning_rate": 8.776834707657498e-06, - "loss": 0.4559, - "step": 11151 - }, - { - "epoch": 0.7288412522057381, - "grad_norm": 0.4512837529182434, - "learning_rate": 8.776605873518052e-06, - "loss": 0.3979, - "step": 11152 - }, - { - "epoch": 0.7289066074112803, - "grad_norm": 0.4495929777622223, - "learning_rate": 8.77637702095879e-06, - "loss": 0.3379, - "step": 11153 - }, - { - "epoch": 0.7289719626168224, - "grad_norm": 0.4882166087627411, - "learning_rate": 8.776148149980833e-06, - "loss": 0.4665, - "step": 11154 - }, - { - "epoch": 0.7290373178223646, - "grad_norm": 0.4099055230617523, - "learning_rate": 8.775919260585289e-06, - "loss": 0.3413, - "step": 11155 - }, - { - "epoch": 0.7291026730279067, - "grad_norm": 0.465614914894104, - "learning_rate": 8.77569035277328e-06, - "loss": 0.4076, - "step": 11156 - }, - { - "epoch": 0.7291680282334488, - "grad_norm": 0.42142704129219055, - "learning_rate": 8.775461426545922e-06, - "loss": 0.3596, - "step": 11157 - }, - { - "epoch": 0.7292333834389909, - "grad_norm": 0.4787689447402954, - "learning_rate": 8.77523248190433e-06, - "loss": 0.4263, - "step": 11158 - }, - { - "epoch": 0.729298738644533, - "grad_norm": 0.4259145259857178, - "learning_rate": 8.775003518849622e-06, - "loss": 0.3228, - "step": 11159 - }, - { - "epoch": 0.7293640938500752, - "grad_norm": 0.4332257807254791, - "learning_rate": 8.774774537382913e-06, - "loss": 0.3498, - "step": 11160 - }, - { - "epoch": 0.7294294490556172, - "grad_norm": 0.4691733121871948, - "learning_rate": 8.774545537505321e-06, - "loss": 0.4267, - "step": 11161 - }, - { - "epoch": 0.7294948042611594, - "grad_norm": 0.45611223578453064, - "learning_rate": 8.774316519217963e-06, - "loss": 0.3928, - "step": 11162 - }, - { - "epoch": 0.7295601594667015, - "grad_norm": 0.44422146677970886, - "learning_rate": 8.774087482521955e-06, - "loss": 0.3437, - "step": 11163 - }, - { - "epoch": 0.7296255146722437, - "grad_norm": 0.4369373321533203, - "learning_rate": 8.773858427418417e-06, - "loss": 0.3521, - "step": 11164 - }, - { - "epoch": 0.7296908698777858, - "grad_norm": 0.45053038001060486, - "learning_rate": 8.773629353908463e-06, - "loss": 0.3761, - "step": 11165 - }, - { - "epoch": 0.7297562250833279, - "grad_norm": 0.44090405106544495, - "learning_rate": 8.773400261993211e-06, - "loss": 0.388, - "step": 11166 - }, - { - "epoch": 0.72982158028887, - "grad_norm": 0.4256582260131836, - "learning_rate": 8.77317115167378e-06, - "loss": 0.354, - "step": 11167 - }, - { - "epoch": 0.7298869354944121, - "grad_norm": 0.4984571933746338, - "learning_rate": 8.772942022951285e-06, - "loss": 0.4391, - "step": 11168 - }, - { - "epoch": 0.7299522906999543, - "grad_norm": 0.46019721031188965, - "learning_rate": 8.772712875826842e-06, - "loss": 0.3867, - "step": 11169 - }, - { - "epoch": 0.7300176459054963, - "grad_norm": 0.5745295882225037, - "learning_rate": 8.772483710301577e-06, - "loss": 0.3707, - "step": 11170 - }, - { - "epoch": 0.7300830011110385, - "grad_norm": 0.4176986515522003, - "learning_rate": 8.772254526376599e-06, - "loss": 0.351, - "step": 11171 - }, - { - "epoch": 0.7301483563165806, - "grad_norm": 0.4469206929206848, - "learning_rate": 8.772025324053027e-06, - "loss": 0.413, - "step": 11172 - }, - { - "epoch": 0.7302137115221228, - "grad_norm": 0.48123699426651, - "learning_rate": 8.771796103331984e-06, - "loss": 0.4142, - "step": 11173 - }, - { - "epoch": 0.7302790667276648, - "grad_norm": 0.41997450590133667, - "learning_rate": 8.771566864214583e-06, - "loss": 0.3341, - "step": 11174 - }, - { - "epoch": 0.730344421933207, - "grad_norm": 0.4403407573699951, - "learning_rate": 8.771337606701944e-06, - "loss": 0.374, - "step": 11175 - }, - { - "epoch": 0.7304097771387491, - "grad_norm": 0.4437764286994934, - "learning_rate": 8.771108330795185e-06, - "loss": 0.3682, - "step": 11176 - }, - { - "epoch": 0.7304751323442912, - "grad_norm": 0.41584858298301697, - "learning_rate": 8.770879036495424e-06, - "loss": 0.3315, - "step": 11177 - }, - { - "epoch": 0.7305404875498334, - "grad_norm": 0.43823719024658203, - "learning_rate": 8.77064972380378e-06, - "loss": 0.3654, - "step": 11178 - }, - { - "epoch": 0.7306058427553754, - "grad_norm": 0.43495121598243713, - "learning_rate": 8.770420392721372e-06, - "loss": 0.3459, - "step": 11179 - }, - { - "epoch": 0.7306711979609176, - "grad_norm": 0.46856689453125, - "learning_rate": 8.770191043249316e-06, - "loss": 0.4379, - "step": 11180 - }, - { - "epoch": 0.7307365531664597, - "grad_norm": 0.4437059462070465, - "learning_rate": 8.769961675388731e-06, - "loss": 0.3853, - "step": 11181 - }, - { - "epoch": 0.7308019083720019, - "grad_norm": 0.43872666358947754, - "learning_rate": 8.76973228914074e-06, - "loss": 0.3891, - "step": 11182 - }, - { - "epoch": 0.730867263577544, - "grad_norm": 0.42102178931236267, - "learning_rate": 8.769502884506457e-06, - "loss": 0.3451, - "step": 11183 - }, - { - "epoch": 0.730932618783086, - "grad_norm": 0.4101921617984772, - "learning_rate": 8.769273461487003e-06, - "loss": 0.3137, - "step": 11184 - }, - { - "epoch": 0.7309979739886282, - "grad_norm": 0.45409631729125977, - "learning_rate": 8.769044020083497e-06, - "loss": 0.4182, - "step": 11185 - }, - { - "epoch": 0.7310633291941703, - "grad_norm": 0.4334227740764618, - "learning_rate": 8.768814560297056e-06, - "loss": 0.3833, - "step": 11186 - }, - { - "epoch": 0.7311286843997125, - "grad_norm": 0.4318891763687134, - "learning_rate": 8.768585082128802e-06, - "loss": 0.3325, - "step": 11187 - }, - { - "epoch": 0.7311940396052545, - "grad_norm": 0.4427895247936249, - "learning_rate": 8.768355585579852e-06, - "loss": 0.3587, - "step": 11188 - }, - { - "epoch": 0.7312593948107967, - "grad_norm": 0.4658011794090271, - "learning_rate": 8.768126070651328e-06, - "loss": 0.3791, - "step": 11189 - }, - { - "epoch": 0.7313247500163388, - "grad_norm": 0.4469871520996094, - "learning_rate": 8.767896537344346e-06, - "loss": 0.3529, - "step": 11190 - }, - { - "epoch": 0.731390105221881, - "grad_norm": 0.4537349343299866, - "learning_rate": 8.767666985660027e-06, - "loss": 0.3733, - "step": 11191 - }, - { - "epoch": 0.731455460427423, - "grad_norm": 0.6510783433914185, - "learning_rate": 8.767437415599493e-06, - "loss": 0.4483, - "step": 11192 - }, - { - "epoch": 0.7315208156329651, - "grad_norm": 0.42360860109329224, - "learning_rate": 8.76720782716386e-06, - "loss": 0.325, - "step": 11193 - }, - { - "epoch": 0.7315861708385073, - "grad_norm": 0.4338391423225403, - "learning_rate": 8.76697822035425e-06, - "loss": 0.3768, - "step": 11194 - }, - { - "epoch": 0.7316515260440494, - "grad_norm": 0.424140065908432, - "learning_rate": 8.766748595171783e-06, - "loss": 0.3347, - "step": 11195 - }, - { - "epoch": 0.7317168812495916, - "grad_norm": 0.42875343561172485, - "learning_rate": 8.766518951617576e-06, - "loss": 0.3437, - "step": 11196 - }, - { - "epoch": 0.7317822364551336, - "grad_norm": 0.46043136715888977, - "learning_rate": 8.766289289692753e-06, - "loss": 0.4028, - "step": 11197 - }, - { - "epoch": 0.7318475916606758, - "grad_norm": 0.4252198338508606, - "learning_rate": 8.766059609398432e-06, - "loss": 0.3685, - "step": 11198 - }, - { - "epoch": 0.7319129468662179, - "grad_norm": 0.4179631769657135, - "learning_rate": 8.765829910735733e-06, - "loss": 0.35, - "step": 11199 - }, - { - "epoch": 0.7319783020717601, - "grad_norm": 0.41065219044685364, - "learning_rate": 8.765600193705777e-06, - "loss": 0.384, - "step": 11200 - }, - { - "epoch": 0.7320436572773021, - "grad_norm": 0.41988229751586914, - "learning_rate": 8.765370458309684e-06, - "loss": 0.3401, - "step": 11201 - }, - { - "epoch": 0.7321090124828442, - "grad_norm": 0.42483600974082947, - "learning_rate": 8.765140704548576e-06, - "loss": 0.3849, - "step": 11202 - }, - { - "epoch": 0.7321743676883864, - "grad_norm": 0.4606691896915436, - "learning_rate": 8.76491093242357e-06, - "loss": 0.3649, - "step": 11203 - }, - { - "epoch": 0.7322397228939285, - "grad_norm": 0.4374525547027588, - "learning_rate": 8.764681141935792e-06, - "loss": 0.3538, - "step": 11204 - }, - { - "epoch": 0.7323050780994707, - "grad_norm": 0.4549223780632019, - "learning_rate": 8.764451333086358e-06, - "loss": 0.4093, - "step": 11205 - }, - { - "epoch": 0.7323704333050127, - "grad_norm": 0.42030951380729675, - "learning_rate": 8.764221505876393e-06, - "loss": 0.351, - "step": 11206 - }, - { - "epoch": 0.7324357885105549, - "grad_norm": 0.4788500964641571, - "learning_rate": 8.763991660307014e-06, - "loss": 0.4289, - "step": 11207 - }, - { - "epoch": 0.732501143716097, - "grad_norm": 0.46380653977394104, - "learning_rate": 8.763761796379343e-06, - "loss": 0.4169, - "step": 11208 - }, - { - "epoch": 0.732566498921639, - "grad_norm": 0.4581639766693115, - "learning_rate": 8.763531914094502e-06, - "loss": 0.4127, - "step": 11209 - }, - { - "epoch": 0.7326318541271812, - "grad_norm": 0.45701515674591064, - "learning_rate": 8.763302013453614e-06, - "loss": 0.4049, - "step": 11210 - }, - { - "epoch": 0.7326972093327233, - "grad_norm": 0.46352502703666687, - "learning_rate": 8.763072094457797e-06, - "loss": 0.3964, - "step": 11211 - }, - { - "epoch": 0.7327625645382655, - "grad_norm": 0.4479861557483673, - "learning_rate": 8.762842157108173e-06, - "loss": 0.3951, - "step": 11212 - }, - { - "epoch": 0.7328279197438076, - "grad_norm": 0.40743589401245117, - "learning_rate": 8.762612201405865e-06, - "loss": 0.3412, - "step": 11213 - }, - { - "epoch": 0.7328932749493497, - "grad_norm": 0.4625330865383148, - "learning_rate": 8.762382227351995e-06, - "loss": 0.3849, - "step": 11214 - }, - { - "epoch": 0.7329586301548918, - "grad_norm": 0.4576355814933777, - "learning_rate": 8.76215223494768e-06, - "loss": 0.4092, - "step": 11215 - }, - { - "epoch": 0.733023985360434, - "grad_norm": 0.4445648789405823, - "learning_rate": 8.76192222419405e-06, - "loss": 0.39, - "step": 11216 - }, - { - "epoch": 0.7330893405659761, - "grad_norm": 0.47072356939315796, - "learning_rate": 8.761692195092219e-06, - "loss": 0.388, - "step": 11217 - }, - { - "epoch": 0.7331546957715182, - "grad_norm": 0.4482540488243103, - "learning_rate": 8.761462147643311e-06, - "loss": 0.3848, - "step": 11218 - }, - { - "epoch": 0.7332200509770603, - "grad_norm": 0.4511313736438751, - "learning_rate": 8.761232081848452e-06, - "loss": 0.3986, - "step": 11219 - }, - { - "epoch": 0.7332854061826024, - "grad_norm": 0.45848849415779114, - "learning_rate": 8.761001997708759e-06, - "loss": 0.3967, - "step": 11220 - }, - { - "epoch": 0.7333507613881446, - "grad_norm": 0.4291442930698395, - "learning_rate": 8.760771895225358e-06, - "loss": 0.3428, - "step": 11221 - }, - { - "epoch": 0.7334161165936867, - "grad_norm": 0.4486805498600006, - "learning_rate": 8.760541774399368e-06, - "loss": 0.3835, - "step": 11222 - }, - { - "epoch": 0.7334814717992288, - "grad_norm": 0.40763527154922485, - "learning_rate": 8.760311635231913e-06, - "loss": 0.3184, - "step": 11223 - }, - { - "epoch": 0.7335468270047709, - "grad_norm": 0.44409674406051636, - "learning_rate": 8.760081477724116e-06, - "loss": 0.3848, - "step": 11224 - }, - { - "epoch": 0.7336121822103131, - "grad_norm": 0.4710194766521454, - "learning_rate": 8.7598513018771e-06, - "loss": 0.3996, - "step": 11225 - }, - { - "epoch": 0.7336775374158552, - "grad_norm": 0.4506535530090332, - "learning_rate": 8.759621107691985e-06, - "loss": 0.3897, - "step": 11226 - }, - { - "epoch": 0.7337428926213972, - "grad_norm": 0.508561909198761, - "learning_rate": 8.759390895169896e-06, - "loss": 0.4555, - "step": 11227 - }, - { - "epoch": 0.7338082478269394, - "grad_norm": 0.4735959768295288, - "learning_rate": 8.759160664311957e-06, - "loss": 0.4099, - "step": 11228 - }, - { - "epoch": 0.7338736030324815, - "grad_norm": 0.46254050731658936, - "learning_rate": 8.758930415119286e-06, - "loss": 0.4225, - "step": 11229 - }, - { - "epoch": 0.7339389582380237, - "grad_norm": 0.446707546710968, - "learning_rate": 8.75870014759301e-06, - "loss": 0.3645, - "step": 11230 - }, - { - "epoch": 0.7340043134435658, - "grad_norm": 0.4319542348384857, - "learning_rate": 8.758469861734252e-06, - "loss": 0.3557, - "step": 11231 - }, - { - "epoch": 0.7340696686491079, - "grad_norm": 0.47102925181388855, - "learning_rate": 8.758239557544135e-06, - "loss": 0.3969, - "step": 11232 - }, - { - "epoch": 0.73413502385465, - "grad_norm": 0.434438556432724, - "learning_rate": 8.758009235023782e-06, - "loss": 0.3691, - "step": 11233 - }, - { - "epoch": 0.7342003790601922, - "grad_norm": 0.5033477544784546, - "learning_rate": 8.757778894174314e-06, - "loss": 0.4246, - "step": 11234 - }, - { - "epoch": 0.7342657342657343, - "grad_norm": 0.42868471145629883, - "learning_rate": 8.757548534996858e-06, - "loss": 0.3827, - "step": 11235 - }, - { - "epoch": 0.7343310894712763, - "grad_norm": 0.47100287675857544, - "learning_rate": 8.757318157492535e-06, - "loss": 0.4058, - "step": 11236 - }, - { - "epoch": 0.7343964446768185, - "grad_norm": 0.5234546661376953, - "learning_rate": 8.75708776166247e-06, - "loss": 0.4786, - "step": 11237 - }, - { - "epoch": 0.7344617998823606, - "grad_norm": 0.4534815847873688, - "learning_rate": 8.756857347507787e-06, - "loss": 0.4056, - "step": 11238 - }, - { - "epoch": 0.7345271550879028, - "grad_norm": 0.49612924456596375, - "learning_rate": 8.75662691502961e-06, - "loss": 0.3897, - "step": 11239 - }, - { - "epoch": 0.7345925102934449, - "grad_norm": 0.4975704252719879, - "learning_rate": 8.75639646422906e-06, - "loss": 0.3943, - "step": 11240 - }, - { - "epoch": 0.734657865498987, - "grad_norm": 0.4549933671951294, - "learning_rate": 8.756165995107265e-06, - "loss": 0.4065, - "step": 11241 - }, - { - "epoch": 0.7347232207045291, - "grad_norm": 0.4527456760406494, - "learning_rate": 8.755935507665346e-06, - "loss": 0.3533, - "step": 11242 - }, - { - "epoch": 0.7347885759100712, - "grad_norm": 0.4188726544380188, - "learning_rate": 8.755705001904428e-06, - "loss": 0.3398, - "step": 11243 - }, - { - "epoch": 0.7348539311156134, - "grad_norm": 0.4060506820678711, - "learning_rate": 8.755474477825636e-06, - "loss": 0.3237, - "step": 11244 - }, - { - "epoch": 0.7349192863211554, - "grad_norm": 0.4342059791088104, - "learning_rate": 8.755243935430095e-06, - "loss": 0.3419, - "step": 11245 - }, - { - "epoch": 0.7349846415266976, - "grad_norm": 0.5011023283004761, - "learning_rate": 8.755013374718928e-06, - "loss": 0.4923, - "step": 11246 - }, - { - "epoch": 0.7350499967322397, - "grad_norm": 1.7879060506820679, - "learning_rate": 8.75478279569326e-06, - "loss": 0.425, - "step": 11247 - }, - { - "epoch": 0.7351153519377819, - "grad_norm": 0.44676414132118225, - "learning_rate": 8.754552198354214e-06, - "loss": 0.3395, - "step": 11248 - }, - { - "epoch": 0.735180707143324, - "grad_norm": 0.4825112223625183, - "learning_rate": 8.754321582702917e-06, - "loss": 0.4456, - "step": 11249 - }, - { - "epoch": 0.7352460623488661, - "grad_norm": 0.47368019819259644, - "learning_rate": 8.754090948740494e-06, - "loss": 0.421, - "step": 11250 - }, - { - "epoch": 0.7353114175544082, - "grad_norm": 0.4699123501777649, - "learning_rate": 8.753860296468069e-06, - "loss": 0.403, - "step": 11251 - }, - { - "epoch": 0.7353767727599503, - "grad_norm": 0.41633594036102295, - "learning_rate": 8.753629625886764e-06, - "loss": 0.3443, - "step": 11252 - }, - { - "epoch": 0.7354421279654925, - "grad_norm": 0.4335797429084778, - "learning_rate": 8.75339893699771e-06, - "loss": 0.3668, - "step": 11253 - }, - { - "epoch": 0.7355074831710345, - "grad_norm": 0.43298983573913574, - "learning_rate": 8.753168229802028e-06, - "loss": 0.3585, - "step": 11254 - }, - { - "epoch": 0.7355728383765767, - "grad_norm": 0.42844730615615845, - "learning_rate": 8.752937504300845e-06, - "loss": 0.3314, - "step": 11255 - }, - { - "epoch": 0.7356381935821188, - "grad_norm": 0.45574915409088135, - "learning_rate": 8.752706760495284e-06, - "loss": 0.3768, - "step": 11256 - }, - { - "epoch": 0.735703548787661, - "grad_norm": 0.42137467861175537, - "learning_rate": 8.752475998386474e-06, - "loss": 0.3425, - "step": 11257 - }, - { - "epoch": 0.735768903993203, - "grad_norm": 0.43730729818344116, - "learning_rate": 8.752245217975537e-06, - "loss": 0.3447, - "step": 11258 - }, - { - "epoch": 0.7358342591987452, - "grad_norm": 0.4258533716201782, - "learning_rate": 8.752014419263601e-06, - "loss": 0.3603, - "step": 11259 - }, - { - "epoch": 0.7358996144042873, - "grad_norm": 0.4704715311527252, - "learning_rate": 8.751783602251791e-06, - "loss": 0.4428, - "step": 11260 - }, - { - "epoch": 0.7359649696098294, - "grad_norm": 0.43059343099594116, - "learning_rate": 8.751552766941233e-06, - "loss": 0.3586, - "step": 11261 - }, - { - "epoch": 0.7360303248153716, - "grad_norm": 0.41468125581741333, - "learning_rate": 8.751321913333051e-06, - "loss": 0.3551, - "step": 11262 - }, - { - "epoch": 0.7360956800209136, - "grad_norm": 0.40264788269996643, - "learning_rate": 8.751091041428373e-06, - "loss": 0.2891, - "step": 11263 - }, - { - "epoch": 0.7361610352264558, - "grad_norm": 0.4065679609775543, - "learning_rate": 8.750860151228326e-06, - "loss": 0.3291, - "step": 11264 - }, - { - "epoch": 0.7362263904319979, - "grad_norm": 0.4681910276412964, - "learning_rate": 8.750629242734032e-06, - "loss": 0.3894, - "step": 11265 - }, - { - "epoch": 0.7362917456375401, - "grad_norm": 0.46393883228302, - "learning_rate": 8.750398315946623e-06, - "loss": 0.3906, - "step": 11266 - }, - { - "epoch": 0.7363571008430821, - "grad_norm": 0.49299412965774536, - "learning_rate": 8.750167370867219e-06, - "loss": 0.4371, - "step": 11267 - }, - { - "epoch": 0.7364224560486242, - "grad_norm": 0.4403276741504669, - "learning_rate": 8.74993640749695e-06, - "loss": 0.3671, - "step": 11268 - }, - { - "epoch": 0.7364878112541664, - "grad_norm": 0.4243725836277008, - "learning_rate": 8.749705425836945e-06, - "loss": 0.3669, - "step": 11269 - }, - { - "epoch": 0.7365531664597085, - "grad_norm": 0.4288892149925232, - "learning_rate": 8.749474425888324e-06, - "loss": 0.3821, - "step": 11270 - }, - { - "epoch": 0.7366185216652507, - "grad_norm": 0.4476723372936249, - "learning_rate": 8.74924340765222e-06, - "loss": 0.3891, - "step": 11271 - }, - { - "epoch": 0.7366838768707927, - "grad_norm": 0.4608297646045685, - "learning_rate": 8.749012371129756e-06, - "loss": 0.4004, - "step": 11272 - }, - { - "epoch": 0.7367492320763349, - "grad_norm": 0.4542662799358368, - "learning_rate": 8.748781316322058e-06, - "loss": 0.3932, - "step": 11273 - }, - { - "epoch": 0.736814587281877, - "grad_norm": 0.4622139632701874, - "learning_rate": 8.748550243230259e-06, - "loss": 0.3562, - "step": 11274 - }, - { - "epoch": 0.7368799424874192, - "grad_norm": 0.45630398392677307, - "learning_rate": 8.748319151855478e-06, - "loss": 0.3839, - "step": 11275 - }, - { - "epoch": 0.7369452976929612, - "grad_norm": 0.47129204869270325, - "learning_rate": 8.748088042198848e-06, - "loss": 0.4271, - "step": 11276 - }, - { - "epoch": 0.7370106528985033, - "grad_norm": 0.43397608399391174, - "learning_rate": 8.747856914261493e-06, - "loss": 0.3838, - "step": 11277 - }, - { - "epoch": 0.7370760081040455, - "grad_norm": 0.46451449394226074, - "learning_rate": 8.747625768044542e-06, - "loss": 0.3854, - "step": 11278 - }, - { - "epoch": 0.7371413633095876, - "grad_norm": 0.41327789425849915, - "learning_rate": 8.747394603549122e-06, - "loss": 0.3437, - "step": 11279 - }, - { - "epoch": 0.7372067185151298, - "grad_norm": 0.4603644907474518, - "learning_rate": 8.747163420776361e-06, - "loss": 0.3815, - "step": 11280 - }, - { - "epoch": 0.7372720737206718, - "grad_norm": 0.470671147108078, - "learning_rate": 8.746932219727384e-06, - "loss": 0.403, - "step": 11281 - }, - { - "epoch": 0.737337428926214, - "grad_norm": 0.43903297185897827, - "learning_rate": 8.746701000403321e-06, - "loss": 0.3519, - "step": 11282 - }, - { - "epoch": 0.7374027841317561, - "grad_norm": 0.46954628825187683, - "learning_rate": 8.7464697628053e-06, - "loss": 0.4205, - "step": 11283 - }, - { - "epoch": 0.7374681393372983, - "grad_norm": 0.4562249481678009, - "learning_rate": 8.746238506934448e-06, - "loss": 0.3838, - "step": 11284 - }, - { - "epoch": 0.7375334945428403, - "grad_norm": 0.42818036675453186, - "learning_rate": 8.746007232791893e-06, - "loss": 0.3689, - "step": 11285 - }, - { - "epoch": 0.7375988497483824, - "grad_norm": 0.47790461778640747, - "learning_rate": 8.745775940378762e-06, - "loss": 0.3995, - "step": 11286 - }, - { - "epoch": 0.7376642049539246, - "grad_norm": 0.46306198835372925, - "learning_rate": 8.745544629696185e-06, - "loss": 0.3946, - "step": 11287 - }, - { - "epoch": 0.7377295601594667, - "grad_norm": 0.43406930565834045, - "learning_rate": 8.74531330074529e-06, - "loss": 0.3459, - "step": 11288 - }, - { - "epoch": 0.7377949153650089, - "grad_norm": 0.44612717628479004, - "learning_rate": 8.745081953527203e-06, - "loss": 0.3576, - "step": 11289 - }, - { - "epoch": 0.7378602705705509, - "grad_norm": 0.48750752210617065, - "learning_rate": 8.744850588043055e-06, - "loss": 0.4311, - "step": 11290 - }, - { - "epoch": 0.7379256257760931, - "grad_norm": 0.49298563599586487, - "learning_rate": 8.744619204293974e-06, - "loss": 0.4336, - "step": 11291 - }, - { - "epoch": 0.7379909809816352, - "grad_norm": 0.453427255153656, - "learning_rate": 8.744387802281086e-06, - "loss": 0.3561, - "step": 11292 - }, - { - "epoch": 0.7380563361871773, - "grad_norm": 0.43164753913879395, - "learning_rate": 8.744156382005521e-06, - "loss": 0.4027, - "step": 11293 - }, - { - "epoch": 0.7381216913927194, - "grad_norm": 0.5250502228736877, - "learning_rate": 8.74392494346841e-06, - "loss": 0.4318, - "step": 11294 - }, - { - "epoch": 0.7381870465982615, - "grad_norm": 0.439285010099411, - "learning_rate": 8.74369348667088e-06, - "loss": 0.3363, - "step": 11295 - }, - { - "epoch": 0.7382524018038037, - "grad_norm": 0.45224079489707947, - "learning_rate": 8.74346201161406e-06, - "loss": 0.3764, - "step": 11296 - }, - { - "epoch": 0.7383177570093458, - "grad_norm": 0.4611002802848816, - "learning_rate": 8.743230518299078e-06, - "loss": 0.3675, - "step": 11297 - }, - { - "epoch": 0.738383112214888, - "grad_norm": 0.46360254287719727, - "learning_rate": 8.742999006727064e-06, - "loss": 0.4002, - "step": 11298 - }, - { - "epoch": 0.73844846742043, - "grad_norm": 0.4859495759010315, - "learning_rate": 8.742767476899148e-06, - "loss": 0.3767, - "step": 11299 - }, - { - "epoch": 0.7385138226259722, - "grad_norm": 0.4485567510128021, - "learning_rate": 8.742535928816457e-06, - "loss": 0.3742, - "step": 11300 - }, - { - "epoch": 0.7385791778315143, - "grad_norm": 0.45822906494140625, - "learning_rate": 8.742304362480123e-06, - "loss": 0.3788, - "step": 11301 - }, - { - "epoch": 0.7386445330370564, - "grad_norm": 0.4571850001811981, - "learning_rate": 8.742072777891275e-06, - "loss": 0.4086, - "step": 11302 - }, - { - "epoch": 0.7387098882425985, - "grad_norm": 0.4510149359703064, - "learning_rate": 8.74184117505104e-06, - "loss": 0.4104, - "step": 11303 - }, - { - "epoch": 0.7387752434481406, - "grad_norm": 0.45367246866226196, - "learning_rate": 8.741609553960548e-06, - "loss": 0.3683, - "step": 11304 - }, - { - "epoch": 0.7388405986536828, - "grad_norm": 0.4416310787200928, - "learning_rate": 8.741377914620933e-06, - "loss": 0.3951, - "step": 11305 - }, - { - "epoch": 0.7389059538592249, - "grad_norm": 0.46273839473724365, - "learning_rate": 8.741146257033321e-06, - "loss": 0.4341, - "step": 11306 - }, - { - "epoch": 0.738971309064767, - "grad_norm": 0.4469950199127197, - "learning_rate": 8.740914581198841e-06, - "loss": 0.3583, - "step": 11307 - }, - { - "epoch": 0.7390366642703091, - "grad_norm": 0.4812050759792328, - "learning_rate": 8.740682887118626e-06, - "loss": 0.4002, - "step": 11308 - }, - { - "epoch": 0.7391020194758513, - "grad_norm": 0.4215378761291504, - "learning_rate": 8.740451174793805e-06, - "loss": 0.3537, - "step": 11309 - }, - { - "epoch": 0.7391673746813934, - "grad_norm": 0.4300982654094696, - "learning_rate": 8.740219444225506e-06, - "loss": 0.3314, - "step": 11310 - }, - { - "epoch": 0.7392327298869354, - "grad_norm": 0.4511035680770874, - "learning_rate": 8.739987695414863e-06, - "loss": 0.37, - "step": 11311 - }, - { - "epoch": 0.7392980850924776, - "grad_norm": 0.48972102999687195, - "learning_rate": 8.739755928363004e-06, - "loss": 0.4092, - "step": 11312 - }, - { - "epoch": 0.7393634402980197, - "grad_norm": 0.5016107559204102, - "learning_rate": 8.739524143071058e-06, - "loss": 0.4012, - "step": 11313 - }, - { - "epoch": 0.7394287955035619, - "grad_norm": 0.4714929759502411, - "learning_rate": 8.73929233954016e-06, - "loss": 0.4362, - "step": 11314 - }, - { - "epoch": 0.739494150709104, - "grad_norm": 0.4383006989955902, - "learning_rate": 8.739060517771437e-06, - "loss": 0.3916, - "step": 11315 - }, - { - "epoch": 0.7395595059146461, - "grad_norm": 0.45401644706726074, - "learning_rate": 8.73882867776602e-06, - "loss": 0.3956, - "step": 11316 - }, - { - "epoch": 0.7396248611201882, - "grad_norm": 0.41924989223480225, - "learning_rate": 8.73859681952504e-06, - "loss": 0.3462, - "step": 11317 - }, - { - "epoch": 0.7396902163257304, - "grad_norm": 0.4588722288608551, - "learning_rate": 8.738364943049628e-06, - "loss": 0.4063, - "step": 11318 - }, - { - "epoch": 0.7397555715312725, - "grad_norm": 0.4037357568740845, - "learning_rate": 8.738133048340916e-06, - "loss": 0.3313, - "step": 11319 - }, - { - "epoch": 0.7398209267368145, - "grad_norm": 0.4058557450771332, - "learning_rate": 8.737901135400034e-06, - "loss": 0.31, - "step": 11320 - }, - { - "epoch": 0.7398862819423567, - "grad_norm": 0.4838840663433075, - "learning_rate": 8.737669204228112e-06, - "loss": 0.361, - "step": 11321 - }, - { - "epoch": 0.7399516371478988, - "grad_norm": 0.4566826820373535, - "learning_rate": 8.737437254826283e-06, - "loss": 0.4101, - "step": 11322 - }, - { - "epoch": 0.740016992353441, - "grad_norm": 0.4308919608592987, - "learning_rate": 8.737205287195678e-06, - "loss": 0.3454, - "step": 11323 - }, - { - "epoch": 0.7400823475589831, - "grad_norm": 0.5028971433639526, - "learning_rate": 8.736973301337428e-06, - "loss": 0.4994, - "step": 11324 - }, - { - "epoch": 0.7401477027645252, - "grad_norm": 0.4332844913005829, - "learning_rate": 8.736741297252665e-06, - "loss": 0.345, - "step": 11325 - }, - { - "epoch": 0.7402130579700673, - "grad_norm": 0.43167319893836975, - "learning_rate": 8.73650927494252e-06, - "loss": 0.3938, - "step": 11326 - }, - { - "epoch": 0.7402784131756094, - "grad_norm": 0.4252494275569916, - "learning_rate": 8.736277234408125e-06, - "loss": 0.3029, - "step": 11327 - }, - { - "epoch": 0.7403437683811516, - "grad_norm": 0.46765902638435364, - "learning_rate": 8.736045175650612e-06, - "loss": 0.402, - "step": 11328 - }, - { - "epoch": 0.7404091235866936, - "grad_norm": 0.4168873727321625, - "learning_rate": 8.735813098671111e-06, - "loss": 0.3234, - "step": 11329 - }, - { - "epoch": 0.7404744787922358, - "grad_norm": 0.45331600308418274, - "learning_rate": 8.735581003470754e-06, - "loss": 0.3539, - "step": 11330 - }, - { - "epoch": 0.7405398339977779, - "grad_norm": 0.45428404211997986, - "learning_rate": 8.735348890050678e-06, - "loss": 0.3962, - "step": 11331 - }, - { - "epoch": 0.7406051892033201, - "grad_norm": 0.47911834716796875, - "learning_rate": 8.73511675841201e-06, - "loss": 0.4235, - "step": 11332 - }, - { - "epoch": 0.7406705444088622, - "grad_norm": 0.4567180573940277, - "learning_rate": 8.734884608555882e-06, - "loss": 0.4001, - "step": 11333 - }, - { - "epoch": 0.7407358996144043, - "grad_norm": 1.2108349800109863, - "learning_rate": 8.73465244048343e-06, - "loss": 0.3833, - "step": 11334 - }, - { - "epoch": 0.7408012548199464, - "grad_norm": 0.45654308795928955, - "learning_rate": 8.734420254195784e-06, - "loss": 0.4051, - "step": 11335 - }, - { - "epoch": 0.7408666100254885, - "grad_norm": 0.4677703380584717, - "learning_rate": 8.734188049694075e-06, - "loss": 0.3768, - "step": 11336 - }, - { - "epoch": 0.7409319652310307, - "grad_norm": 0.5017822980880737, - "learning_rate": 8.733955826979439e-06, - "loss": 0.4115, - "step": 11337 - }, - { - "epoch": 0.7409973204365727, - "grad_norm": 0.4488432705402374, - "learning_rate": 8.733723586053006e-06, - "loss": 0.3549, - "step": 11338 - }, - { - "epoch": 0.7410626756421149, - "grad_norm": 0.4527190327644348, - "learning_rate": 8.733491326915909e-06, - "loss": 0.3453, - "step": 11339 - }, - { - "epoch": 0.741128030847657, - "grad_norm": 0.4319392144680023, - "learning_rate": 8.733259049569282e-06, - "loss": 0.3334, - "step": 11340 - }, - { - "epoch": 0.7411933860531992, - "grad_norm": 0.4645390212535858, - "learning_rate": 8.733026754014258e-06, - "loss": 0.3774, - "step": 11341 - }, - { - "epoch": 0.7412587412587412, - "grad_norm": 0.4230360984802246, - "learning_rate": 8.73279444025197e-06, - "loss": 0.3439, - "step": 11342 - }, - { - "epoch": 0.7413240964642834, - "grad_norm": 0.415467232465744, - "learning_rate": 8.73256210828355e-06, - "loss": 0.3112, - "step": 11343 - }, - { - "epoch": 0.7413894516698255, - "grad_norm": 0.4561569094657898, - "learning_rate": 8.732329758110128e-06, - "loss": 0.398, - "step": 11344 - }, - { - "epoch": 0.7414548068753676, - "grad_norm": 0.4417307674884796, - "learning_rate": 8.732097389732845e-06, - "loss": 0.381, - "step": 11345 - }, - { - "epoch": 0.7415201620809098, - "grad_norm": 0.4971565306186676, - "learning_rate": 8.731865003152829e-06, - "loss": 0.4116, - "step": 11346 - }, - { - "epoch": 0.7415855172864518, - "grad_norm": 0.4177454710006714, - "learning_rate": 8.731632598371213e-06, - "loss": 0.3468, - "step": 11347 - }, - { - "epoch": 0.741650872491994, - "grad_norm": 0.4392794668674469, - "learning_rate": 8.731400175389133e-06, - "loss": 0.3693, - "step": 11348 - }, - { - "epoch": 0.7417162276975361, - "grad_norm": 0.4654693901538849, - "learning_rate": 8.731167734207722e-06, - "loss": 0.4123, - "step": 11349 - }, - { - "epoch": 0.7417815829030783, - "grad_norm": 0.4322023093700409, - "learning_rate": 8.730935274828112e-06, - "loss": 0.3722, - "step": 11350 - }, - { - "epoch": 0.7418469381086203, - "grad_norm": 0.4423964321613312, - "learning_rate": 8.73070279725144e-06, - "loss": 0.3735, - "step": 11351 - }, - { - "epoch": 0.7419122933141624, - "grad_norm": 0.45737385749816895, - "learning_rate": 8.730470301478836e-06, - "loss": 0.3687, - "step": 11352 - }, - { - "epoch": 0.7419776485197046, - "grad_norm": 0.45981091260910034, - "learning_rate": 8.730237787511438e-06, - "loss": 0.4041, - "step": 11353 - }, - { - "epoch": 0.7420430037252467, - "grad_norm": 0.44975706934928894, - "learning_rate": 8.730005255350377e-06, - "loss": 0.4054, - "step": 11354 - }, - { - "epoch": 0.7421083589307889, - "grad_norm": 0.43052786588668823, - "learning_rate": 8.72977270499679e-06, - "loss": 0.3409, - "step": 11355 - }, - { - "epoch": 0.7421737141363309, - "grad_norm": 0.4633021652698517, - "learning_rate": 8.729540136451808e-06, - "loss": 0.456, - "step": 11356 - }, - { - "epoch": 0.7422390693418731, - "grad_norm": 0.44586482644081116, - "learning_rate": 8.729307549716565e-06, - "loss": 0.4209, - "step": 11357 - }, - { - "epoch": 0.7423044245474152, - "grad_norm": 0.44594958424568176, - "learning_rate": 8.7290749447922e-06, - "loss": 0.3821, - "step": 11358 - }, - { - "epoch": 0.7423697797529574, - "grad_norm": 0.43272367119789124, - "learning_rate": 8.728842321679842e-06, - "loss": 0.3524, - "step": 11359 - }, - { - "epoch": 0.7424351349584994, - "grad_norm": 0.46184101700782776, - "learning_rate": 8.72860968038063e-06, - "loss": 0.3372, - "step": 11360 - }, - { - "epoch": 0.7425004901640415, - "grad_norm": 0.4214572012424469, - "learning_rate": 8.728377020895696e-06, - "loss": 0.3307, - "step": 11361 - }, - { - "epoch": 0.7425658453695837, - "grad_norm": 0.4390409290790558, - "learning_rate": 8.728144343226177e-06, - "loss": 0.3887, - "step": 11362 - }, - { - "epoch": 0.7426312005751258, - "grad_norm": 0.44752877950668335, - "learning_rate": 8.727911647373206e-06, - "loss": 0.3604, - "step": 11363 - }, - { - "epoch": 0.742696555780668, - "grad_norm": 0.4602724611759186, - "learning_rate": 8.727678933337918e-06, - "loss": 0.3972, - "step": 11364 - }, - { - "epoch": 0.74276191098621, - "grad_norm": 0.4244244396686554, - "learning_rate": 8.727446201121447e-06, - "loss": 0.3764, - "step": 11365 - }, - { - "epoch": 0.7428272661917522, - "grad_norm": 0.4103724956512451, - "learning_rate": 8.727213450724931e-06, - "loss": 0.3205, - "step": 11366 - }, - { - "epoch": 0.7428926213972943, - "grad_norm": 0.45078158378601074, - "learning_rate": 8.726980682149503e-06, - "loss": 0.3665, - "step": 11367 - }, - { - "epoch": 0.7429579766028365, - "grad_norm": 0.4778234362602234, - "learning_rate": 8.726747895396302e-06, - "loss": 0.374, - "step": 11368 - }, - { - "epoch": 0.7430233318083785, - "grad_norm": 0.4740595519542694, - "learning_rate": 8.726515090466456e-06, - "loss": 0.421, - "step": 11369 - }, - { - "epoch": 0.7430886870139206, - "grad_norm": 0.43524232506752014, - "learning_rate": 8.726282267361109e-06, - "loss": 0.3649, - "step": 11370 - }, - { - "epoch": 0.7431540422194628, - "grad_norm": 0.4340595602989197, - "learning_rate": 8.72604942608139e-06, - "loss": 0.3752, - "step": 11371 - }, - { - "epoch": 0.7432193974250049, - "grad_norm": 0.4169885814189911, - "learning_rate": 8.725816566628437e-06, - "loss": 0.3461, - "step": 11372 - }, - { - "epoch": 0.743284752630547, - "grad_norm": 0.4595286250114441, - "learning_rate": 8.725583689003388e-06, - "loss": 0.4014, - "step": 11373 - }, - { - "epoch": 0.7433501078360891, - "grad_norm": 0.4522557556629181, - "learning_rate": 8.725350793207374e-06, - "loss": 0.3678, - "step": 11374 - }, - { - "epoch": 0.7434154630416313, - "grad_norm": 0.4524635076522827, - "learning_rate": 8.725117879241536e-06, - "loss": 0.3877, - "step": 11375 - }, - { - "epoch": 0.7434808182471734, - "grad_norm": 0.4340183436870575, - "learning_rate": 8.724884947107006e-06, - "loss": 0.3676, - "step": 11376 - }, - { - "epoch": 0.7435461734527156, - "grad_norm": 0.45854222774505615, - "learning_rate": 8.724651996804922e-06, - "loss": 0.4319, - "step": 11377 - }, - { - "epoch": 0.7436115286582576, - "grad_norm": 0.4483458697795868, - "learning_rate": 8.72441902833642e-06, - "loss": 0.3776, - "step": 11378 - }, - { - "epoch": 0.7436768838637997, - "grad_norm": 0.44911783933639526, - "learning_rate": 8.724186041702636e-06, - "loss": 0.369, - "step": 11379 - }, - { - "epoch": 0.7437422390693419, - "grad_norm": 0.4570876657962799, - "learning_rate": 8.723953036904707e-06, - "loss": 0.3888, - "step": 11380 - }, - { - "epoch": 0.743807594274884, - "grad_norm": 0.45264339447021484, - "learning_rate": 8.723720013943769e-06, - "loss": 0.3889, - "step": 11381 - }, - { - "epoch": 0.7438729494804261, - "grad_norm": 0.4470329284667969, - "learning_rate": 8.723486972820957e-06, - "loss": 0.3413, - "step": 11382 - }, - { - "epoch": 0.7439383046859682, - "grad_norm": 0.46752843260765076, - "learning_rate": 8.72325391353741e-06, - "loss": 0.4063, - "step": 11383 - }, - { - "epoch": 0.7440036598915104, - "grad_norm": 0.45190879702568054, - "learning_rate": 8.723020836094265e-06, - "loss": 0.4091, - "step": 11384 - }, - { - "epoch": 0.7440690150970525, - "grad_norm": 0.4984181225299835, - "learning_rate": 8.722787740492655e-06, - "loss": 0.4156, - "step": 11385 - }, - { - "epoch": 0.7441343703025945, - "grad_norm": 0.44895896315574646, - "learning_rate": 8.72255462673372e-06, - "loss": 0.3793, - "step": 11386 - }, - { - "epoch": 0.7441997255081367, - "grad_norm": 0.4525301158428192, - "learning_rate": 8.7223214948186e-06, - "loss": 0.3845, - "step": 11387 - }, - { - "epoch": 0.7442650807136788, - "grad_norm": 0.4790029525756836, - "learning_rate": 8.722088344748425e-06, - "loss": 0.4163, - "step": 11388 - }, - { - "epoch": 0.744330435919221, - "grad_norm": 0.46557191014289856, - "learning_rate": 8.721855176524337e-06, - "loss": 0.4317, - "step": 11389 - }, - { - "epoch": 0.7443957911247631, - "grad_norm": 0.49655681848526, - "learning_rate": 8.721621990147472e-06, - "loss": 0.4354, - "step": 11390 - }, - { - "epoch": 0.7444611463303052, - "grad_norm": 0.4580974578857422, - "learning_rate": 8.721388785618967e-06, - "loss": 0.4089, - "step": 11391 - }, - { - "epoch": 0.7445265015358473, - "grad_norm": 0.4186343252658844, - "learning_rate": 8.721155562939961e-06, - "loss": 0.3283, - "step": 11392 - }, - { - "epoch": 0.7445918567413895, - "grad_norm": 0.4594210684299469, - "learning_rate": 8.720922322111587e-06, - "loss": 0.3889, - "step": 11393 - }, - { - "epoch": 0.7446572119469316, - "grad_norm": 0.4190291464328766, - "learning_rate": 8.720689063134989e-06, - "loss": 0.3284, - "step": 11394 - }, - { - "epoch": 0.7447225671524736, - "grad_norm": 0.4274213910102844, - "learning_rate": 8.720455786011299e-06, - "loss": 0.354, - "step": 11395 - }, - { - "epoch": 0.7447879223580158, - "grad_norm": 0.421599805355072, - "learning_rate": 8.720222490741659e-06, - "loss": 0.3247, - "step": 11396 - }, - { - "epoch": 0.7448532775635579, - "grad_norm": 0.4587099552154541, - "learning_rate": 8.719989177327204e-06, - "loss": 0.401, - "step": 11397 - }, - { - "epoch": 0.7449186327691001, - "grad_norm": 0.535896360874176, - "learning_rate": 8.719755845769073e-06, - "loss": 0.4355, - "step": 11398 - }, - { - "epoch": 0.7449839879746422, - "grad_norm": 0.4585549533367157, - "learning_rate": 8.719522496068405e-06, - "loss": 0.3988, - "step": 11399 - }, - { - "epoch": 0.7450493431801843, - "grad_norm": 0.4203391969203949, - "learning_rate": 8.719289128226336e-06, - "loss": 0.3494, - "step": 11400 - }, - { - "epoch": 0.7451146983857264, - "grad_norm": 0.44131430983543396, - "learning_rate": 8.719055742244007e-06, - "loss": 0.35, - "step": 11401 - }, - { - "epoch": 0.7451800535912686, - "grad_norm": 0.40603551268577576, - "learning_rate": 8.718822338122554e-06, - "loss": 0.3217, - "step": 11402 - }, - { - "epoch": 0.7452454087968107, - "grad_norm": 0.45532020926475525, - "learning_rate": 8.718588915863116e-06, - "loss": 0.403, - "step": 11403 - }, - { - "epoch": 0.7453107640023527, - "grad_norm": 0.44275617599487305, - "learning_rate": 8.71835547546683e-06, - "loss": 0.3747, - "step": 11404 - }, - { - "epoch": 0.7453761192078949, - "grad_norm": 0.4695359766483307, - "learning_rate": 8.71812201693484e-06, - "loss": 0.4013, - "step": 11405 - }, - { - "epoch": 0.745441474413437, - "grad_norm": 0.46241793036460876, - "learning_rate": 8.717888540268279e-06, - "loss": 0.3933, - "step": 11406 - }, - { - "epoch": 0.7455068296189792, - "grad_norm": 0.4245399832725525, - "learning_rate": 8.717655045468286e-06, - "loss": 0.355, - "step": 11407 - }, - { - "epoch": 0.7455721848245213, - "grad_norm": 0.42461681365966797, - "learning_rate": 8.717421532536004e-06, - "loss": 0.3682, - "step": 11408 - }, - { - "epoch": 0.7456375400300634, - "grad_norm": 0.4408150911331177, - "learning_rate": 8.717188001472567e-06, - "loss": 0.3808, - "step": 11409 - }, - { - "epoch": 0.7457028952356055, - "grad_norm": 0.4594395160675049, - "learning_rate": 8.716954452279118e-06, - "loss": 0.4048, - "step": 11410 - }, - { - "epoch": 0.7457682504411476, - "grad_norm": 0.445923775434494, - "learning_rate": 8.716720884956793e-06, - "loss": 0.3763, - "step": 11411 - }, - { - "epoch": 0.7458336056466898, - "grad_norm": 0.4819449186325073, - "learning_rate": 8.716487299506733e-06, - "loss": 0.4028, - "step": 11412 - }, - { - "epoch": 0.7458989608522318, - "grad_norm": 0.43233078718185425, - "learning_rate": 8.716253695930079e-06, - "loss": 0.4006, - "step": 11413 - }, - { - "epoch": 0.745964316057774, - "grad_norm": 0.4277048707008362, - "learning_rate": 8.716020074227966e-06, - "loss": 0.3059, - "step": 11414 - }, - { - "epoch": 0.7460296712633161, - "grad_norm": 0.4572831094264984, - "learning_rate": 8.715786434401537e-06, - "loss": 0.4158, - "step": 11415 - }, - { - "epoch": 0.7460950264688583, - "grad_norm": 0.4788439869880676, - "learning_rate": 8.71555277645193e-06, - "loss": 0.4349, - "step": 11416 - }, - { - "epoch": 0.7461603816744004, - "grad_norm": 0.4642075002193451, - "learning_rate": 8.715319100380284e-06, - "loss": 0.3932, - "step": 11417 - }, - { - "epoch": 0.7462257368799425, - "grad_norm": 0.4323691129684448, - "learning_rate": 8.71508540618774e-06, - "loss": 0.3656, - "step": 11418 - }, - { - "epoch": 0.7462910920854846, - "grad_norm": 0.441805362701416, - "learning_rate": 8.71485169387544e-06, - "loss": 0.3654, - "step": 11419 - }, - { - "epoch": 0.7463564472910267, - "grad_norm": 0.46433573961257935, - "learning_rate": 8.714617963444518e-06, - "loss": 0.4275, - "step": 11420 - }, - { - "epoch": 0.7464218024965689, - "grad_norm": 0.4299795925617218, - "learning_rate": 8.714384214896118e-06, - "loss": 0.3541, - "step": 11421 - }, - { - "epoch": 0.7464871577021109, - "grad_norm": 0.44786059856414795, - "learning_rate": 8.714150448231379e-06, - "loss": 0.3939, - "step": 11422 - }, - { - "epoch": 0.7465525129076531, - "grad_norm": 0.41369113326072693, - "learning_rate": 8.713916663451443e-06, - "loss": 0.3466, - "step": 11423 - }, - { - "epoch": 0.7466178681131952, - "grad_norm": 0.4648091197013855, - "learning_rate": 8.713682860557448e-06, - "loss": 0.3972, - "step": 11424 - }, - { - "epoch": 0.7466832233187374, - "grad_norm": 0.45776987075805664, - "learning_rate": 8.713449039550536e-06, - "loss": 0.3989, - "step": 11425 - }, - { - "epoch": 0.7467485785242794, - "grad_norm": 0.4523124098777771, - "learning_rate": 8.713215200431845e-06, - "loss": 0.3766, - "step": 11426 - }, - { - "epoch": 0.7468139337298216, - "grad_norm": 0.4293292462825775, - "learning_rate": 8.71298134320252e-06, - "loss": 0.3733, - "step": 11427 - }, - { - "epoch": 0.7468792889353637, - "grad_norm": 0.42689448595046997, - "learning_rate": 8.712747467863696e-06, - "loss": 0.3553, - "step": 11428 - }, - { - "epoch": 0.7469446441409058, - "grad_norm": 0.4537889361381531, - "learning_rate": 8.712513574416517e-06, - "loss": 0.4033, - "step": 11429 - }, - { - "epoch": 0.747009999346448, - "grad_norm": 0.4374942183494568, - "learning_rate": 8.712279662862123e-06, - "loss": 0.3793, - "step": 11430 - }, - { - "epoch": 0.74707535455199, - "grad_norm": 0.42012590169906616, - "learning_rate": 8.712045733201655e-06, - "loss": 0.3506, - "step": 11431 - }, - { - "epoch": 0.7471407097575322, - "grad_norm": 0.44662463665008545, - "learning_rate": 8.711811785436254e-06, - "loss": 0.3759, - "step": 11432 - }, - { - "epoch": 0.7472060649630743, - "grad_norm": 0.48212146759033203, - "learning_rate": 8.711577819567062e-06, - "loss": 0.4619, - "step": 11433 - }, - { - "epoch": 0.7472714201686165, - "grad_norm": 0.4506356120109558, - "learning_rate": 8.71134383559522e-06, - "loss": 0.3988, - "step": 11434 - }, - { - "epoch": 0.7473367753741585, - "grad_norm": 0.43944063782691956, - "learning_rate": 8.711109833521865e-06, - "loss": 0.3839, - "step": 11435 - }, - { - "epoch": 0.7474021305797006, - "grad_norm": 0.4397209584712982, - "learning_rate": 8.710875813348144e-06, - "loss": 0.4005, - "step": 11436 - }, - { - "epoch": 0.7474674857852428, - "grad_norm": 0.45714902877807617, - "learning_rate": 8.710641775075194e-06, - "loss": 0.3947, - "step": 11437 - }, - { - "epoch": 0.7475328409907849, - "grad_norm": 0.4064410328865051, - "learning_rate": 8.710407718704162e-06, - "loss": 0.3565, - "step": 11438 - }, - { - "epoch": 0.7475981961963271, - "grad_norm": 0.4440568685531616, - "learning_rate": 8.710173644236185e-06, - "loss": 0.4005, - "step": 11439 - }, - { - "epoch": 0.7476635514018691, - "grad_norm": 0.44599300622940063, - "learning_rate": 8.709939551672404e-06, - "loss": 0.3689, - "step": 11440 - }, - { - "epoch": 0.7477289066074113, - "grad_norm": 0.45519688725471497, - "learning_rate": 8.709705441013965e-06, - "loss": 0.3883, - "step": 11441 - }, - { - "epoch": 0.7477942618129534, - "grad_norm": 0.4230014681816101, - "learning_rate": 8.709471312262005e-06, - "loss": 0.3695, - "step": 11442 - }, - { - "epoch": 0.7478596170184956, - "grad_norm": 0.45030948519706726, - "learning_rate": 8.70923716541767e-06, - "loss": 0.3881, - "step": 11443 - }, - { - "epoch": 0.7479249722240376, - "grad_norm": 0.4607604146003723, - "learning_rate": 8.7090030004821e-06, - "loss": 0.4196, - "step": 11444 - }, - { - "epoch": 0.7479903274295797, - "grad_norm": 0.4362353980541229, - "learning_rate": 8.708768817456437e-06, - "loss": 0.3863, - "step": 11445 - }, - { - "epoch": 0.7480556826351219, - "grad_norm": 0.406549870967865, - "learning_rate": 8.708534616341824e-06, - "loss": 0.3502, - "step": 11446 - }, - { - "epoch": 0.748121037840664, - "grad_norm": 0.46241170167922974, - "learning_rate": 8.708300397139403e-06, - "loss": 0.4045, - "step": 11447 - }, - { - "epoch": 0.7481863930462062, - "grad_norm": 0.44195207953453064, - "learning_rate": 8.708066159850315e-06, - "loss": 0.3711, - "step": 11448 - }, - { - "epoch": 0.7482517482517482, - "grad_norm": 0.4226051867008209, - "learning_rate": 8.707831904475706e-06, - "loss": 0.38, - "step": 11449 - }, - { - "epoch": 0.7483171034572904, - "grad_norm": 0.44911104440689087, - "learning_rate": 8.707597631016714e-06, - "loss": 0.4057, - "step": 11450 - }, - { - "epoch": 0.7483824586628325, - "grad_norm": 0.42989981174468994, - "learning_rate": 8.707363339474486e-06, - "loss": 0.376, - "step": 11451 - }, - { - "epoch": 0.7484478138683747, - "grad_norm": 0.4480683207511902, - "learning_rate": 8.70712902985016e-06, - "loss": 0.3671, - "step": 11452 - }, - { - "epoch": 0.7485131690739167, - "grad_norm": 0.44000551104545593, - "learning_rate": 8.706894702144883e-06, - "loss": 0.3978, - "step": 11453 - }, - { - "epoch": 0.7485785242794588, - "grad_norm": 0.4732186794281006, - "learning_rate": 8.706660356359796e-06, - "loss": 0.3743, - "step": 11454 - }, - { - "epoch": 0.748643879485001, - "grad_norm": 0.4579940736293793, - "learning_rate": 8.706425992496043e-06, - "loss": 0.413, - "step": 11455 - }, - { - "epoch": 0.7487092346905431, - "grad_norm": 0.4685218334197998, - "learning_rate": 8.706191610554767e-06, - "loss": 0.4361, - "step": 11456 - }, - { - "epoch": 0.7487745898960853, - "grad_norm": 0.413667768239975, - "learning_rate": 8.70595721053711e-06, - "loss": 0.311, - "step": 11457 - }, - { - "epoch": 0.7488399451016273, - "grad_norm": 0.43470972776412964, - "learning_rate": 8.705722792444215e-06, - "loss": 0.3814, - "step": 11458 - }, - { - "epoch": 0.7489053003071695, - "grad_norm": 0.44430139660835266, - "learning_rate": 8.705488356277227e-06, - "loss": 0.3979, - "step": 11459 - }, - { - "epoch": 0.7489706555127116, - "grad_norm": 0.44423583149909973, - "learning_rate": 8.705253902037289e-06, - "loss": 0.4073, - "step": 11460 - }, - { - "epoch": 0.7490360107182538, - "grad_norm": 0.4940393567085266, - "learning_rate": 8.705019429725543e-06, - "loss": 0.4367, - "step": 11461 - }, - { - "epoch": 0.7491013659237958, - "grad_norm": 0.5035269260406494, - "learning_rate": 8.704784939343134e-06, - "loss": 0.4584, - "step": 11462 - }, - { - "epoch": 0.7491667211293379, - "grad_norm": 0.47204816341400146, - "learning_rate": 8.704550430891206e-06, - "loss": 0.3979, - "step": 11463 - }, - { - "epoch": 0.7492320763348801, - "grad_norm": 0.43368858098983765, - "learning_rate": 8.704315904370901e-06, - "loss": 0.3773, - "step": 11464 - }, - { - "epoch": 0.7492974315404222, - "grad_norm": 0.4607824385166168, - "learning_rate": 8.704081359783365e-06, - "loss": 0.3685, - "step": 11465 - }, - { - "epoch": 0.7493627867459643, - "grad_norm": 0.462039053440094, - "learning_rate": 8.70384679712974e-06, - "loss": 0.436, - "step": 11466 - }, - { - "epoch": 0.7494281419515064, - "grad_norm": 0.4594377279281616, - "learning_rate": 8.703612216411172e-06, - "loss": 0.4097, - "step": 11467 - }, - { - "epoch": 0.7494934971570486, - "grad_norm": 0.47541284561157227, - "learning_rate": 8.703377617628804e-06, - "loss": 0.3993, - "step": 11468 - }, - { - "epoch": 0.7495588523625907, - "grad_norm": 0.4668521583080292, - "learning_rate": 8.70314300078378e-06, - "loss": 0.4087, - "step": 11469 - }, - { - "epoch": 0.7496242075681327, - "grad_norm": 0.44209030270576477, - "learning_rate": 8.702908365877245e-06, - "loss": 0.3757, - "step": 11470 - }, - { - "epoch": 0.7496895627736749, - "grad_norm": 0.44060683250427246, - "learning_rate": 8.702673712910344e-06, - "loss": 0.3288, - "step": 11471 - }, - { - "epoch": 0.749754917979217, - "grad_norm": 0.4443775415420532, - "learning_rate": 8.702439041884219e-06, - "loss": 0.3851, - "step": 11472 - }, - { - "epoch": 0.7498202731847592, - "grad_norm": 0.4726814031600952, - "learning_rate": 8.702204352800016e-06, - "loss": 0.4268, - "step": 11473 - }, - { - "epoch": 0.7498856283903013, - "grad_norm": 0.48055610060691833, - "learning_rate": 8.701969645658881e-06, - "loss": 0.4545, - "step": 11474 - }, - { - "epoch": 0.7499509835958434, - "grad_norm": 0.3886030614376068, - "learning_rate": 8.701734920461957e-06, - "loss": 0.2669, - "step": 11475 - }, - { - "epoch": 0.7500163388013855, - "grad_norm": 0.4247395396232605, - "learning_rate": 8.70150017721039e-06, - "loss": 0.3886, - "step": 11476 - }, - { - "epoch": 0.7500816940069277, - "grad_norm": 0.4572971761226654, - "learning_rate": 8.701265415905324e-06, - "loss": 0.4013, - "step": 11477 - }, - { - "epoch": 0.7501470492124698, - "grad_norm": 0.4315987229347229, - "learning_rate": 8.701030636547905e-06, - "loss": 0.3727, - "step": 11478 - }, - { - "epoch": 0.7502124044180118, - "grad_norm": 0.47833821177482605, - "learning_rate": 8.700795839139276e-06, - "loss": 0.4174, - "step": 11479 - }, - { - "epoch": 0.750277759623554, - "grad_norm": 0.42190808057785034, - "learning_rate": 8.700561023680584e-06, - "loss": 0.3662, - "step": 11480 - }, - { - "epoch": 0.7503431148290961, - "grad_norm": 0.4663477838039398, - "learning_rate": 8.700326190172974e-06, - "loss": 0.4081, - "step": 11481 - }, - { - "epoch": 0.7504084700346383, - "grad_norm": 0.40893182158470154, - "learning_rate": 8.70009133861759e-06, - "loss": 0.3448, - "step": 11482 - }, - { - "epoch": 0.7504738252401804, - "grad_norm": 0.4747457802295685, - "learning_rate": 8.699856469015581e-06, - "loss": 0.3802, - "step": 11483 - }, - { - "epoch": 0.7505391804457225, - "grad_norm": 0.4700453281402588, - "learning_rate": 8.69962158136809e-06, - "loss": 0.4044, - "step": 11484 - }, - { - "epoch": 0.7506045356512646, - "grad_norm": 0.4343928396701813, - "learning_rate": 8.699386675676263e-06, - "loss": 0.3571, - "step": 11485 - }, - { - "epoch": 0.7506698908568068, - "grad_norm": 0.45376333594322205, - "learning_rate": 8.699151751941245e-06, - "loss": 0.4145, - "step": 11486 - }, - { - "epoch": 0.7507352460623489, - "grad_norm": 0.4375240206718445, - "learning_rate": 8.69891681016418e-06, - "loss": 0.4194, - "step": 11487 - }, - { - "epoch": 0.7508006012678909, - "grad_norm": 0.4365542531013489, - "learning_rate": 8.698681850346218e-06, - "loss": 0.3651, - "step": 11488 - }, - { - "epoch": 0.7508659564734331, - "grad_norm": 0.448963463306427, - "learning_rate": 8.698446872488504e-06, - "loss": 0.3901, - "step": 11489 - }, - { - "epoch": 0.7509313116789752, - "grad_norm": 0.4558291733264923, - "learning_rate": 8.698211876592184e-06, - "loss": 0.4189, - "step": 11490 - }, - { - "epoch": 0.7509966668845174, - "grad_norm": 0.4616054892539978, - "learning_rate": 8.697976862658401e-06, - "loss": 0.3803, - "step": 11491 - }, - { - "epoch": 0.7510620220900595, - "grad_norm": 0.44523996114730835, - "learning_rate": 8.697741830688307e-06, - "loss": 0.389, - "step": 11492 - }, - { - "epoch": 0.7511273772956016, - "grad_norm": 0.4284723997116089, - "learning_rate": 8.697506780683043e-06, - "loss": 0.3622, - "step": 11493 - }, - { - "epoch": 0.7511927325011437, - "grad_norm": 0.423247754573822, - "learning_rate": 8.697271712643758e-06, - "loss": 0.2948, - "step": 11494 - }, - { - "epoch": 0.7512580877066858, - "grad_norm": 0.4341980814933777, - "learning_rate": 8.697036626571598e-06, - "loss": 0.3732, - "step": 11495 - }, - { - "epoch": 0.751323442912228, - "grad_norm": 0.4351692199707031, - "learning_rate": 8.696801522467708e-06, - "loss": 0.3614, - "step": 11496 - }, - { - "epoch": 0.75138879811777, - "grad_norm": 0.47556573152542114, - "learning_rate": 8.696566400333239e-06, - "loss": 0.4038, - "step": 11497 - }, - { - "epoch": 0.7514541533233122, - "grad_norm": 0.4805038571357727, - "learning_rate": 8.696331260169332e-06, - "loss": 0.4759, - "step": 11498 - }, - { - "epoch": 0.7515195085288543, - "grad_norm": 0.4455169141292572, - "learning_rate": 8.696096101977141e-06, - "loss": 0.3553, - "step": 11499 - }, - { - "epoch": 0.7515848637343965, - "grad_norm": 0.41066446900367737, - "learning_rate": 8.695860925757807e-06, - "loss": 0.3282, - "step": 11500 - }, - { - "epoch": 0.7516502189399386, - "grad_norm": 0.47480136156082153, - "learning_rate": 8.695625731512477e-06, - "loss": 0.4168, - "step": 11501 - }, - { - "epoch": 0.7517155741454807, - "grad_norm": 0.4357443153858185, - "learning_rate": 8.695390519242302e-06, - "loss": 0.3992, - "step": 11502 - }, - { - "epoch": 0.7517809293510228, - "grad_norm": 0.4462246894836426, - "learning_rate": 8.695155288948425e-06, - "loss": 0.3938, - "step": 11503 - }, - { - "epoch": 0.7518462845565649, - "grad_norm": 0.4426057040691376, - "learning_rate": 8.694920040632e-06, - "loss": 0.3831, - "step": 11504 - }, - { - "epoch": 0.7519116397621071, - "grad_norm": 0.44983747601509094, - "learning_rate": 8.694684774294167e-06, - "loss": 0.3367, - "step": 11505 - }, - { - "epoch": 0.7519769949676491, - "grad_norm": 0.4833635985851288, - "learning_rate": 8.694449489936076e-06, - "loss": 0.4439, - "step": 11506 - }, - { - "epoch": 0.7520423501731913, - "grad_norm": 0.453665167093277, - "learning_rate": 8.694214187558875e-06, - "loss": 0.3747, - "step": 11507 - }, - { - "epoch": 0.7521077053787334, - "grad_norm": 0.4427410662174225, - "learning_rate": 8.693978867163712e-06, - "loss": 0.3837, - "step": 11508 - }, - { - "epoch": 0.7521730605842756, - "grad_norm": 0.44865310192108154, - "learning_rate": 8.693743528751734e-06, - "loss": 0.4089, - "step": 11509 - }, - { - "epoch": 0.7522384157898176, - "grad_norm": 0.4555530846118927, - "learning_rate": 8.69350817232409e-06, - "loss": 0.3709, - "step": 11510 - }, - { - "epoch": 0.7523037709953598, - "grad_norm": 0.45255982875823975, - "learning_rate": 8.693272797881926e-06, - "loss": 0.3983, - "step": 11511 - }, - { - "epoch": 0.7523691262009019, - "grad_norm": 0.4345415532588959, - "learning_rate": 8.693037405426392e-06, - "loss": 0.3691, - "step": 11512 - }, - { - "epoch": 0.752434481406444, - "grad_norm": 0.4903244078159332, - "learning_rate": 8.692801994958636e-06, - "loss": 0.3971, - "step": 11513 - }, - { - "epoch": 0.7524998366119862, - "grad_norm": 0.4598952531814575, - "learning_rate": 8.692566566479803e-06, - "loss": 0.3886, - "step": 11514 - }, - { - "epoch": 0.7525651918175282, - "grad_norm": 0.4512850046157837, - "learning_rate": 8.692331119991046e-06, - "loss": 0.3896, - "step": 11515 - }, - { - "epoch": 0.7526305470230704, - "grad_norm": 0.4707580804824829, - "learning_rate": 8.69209565549351e-06, - "loss": 0.3967, - "step": 11516 - }, - { - "epoch": 0.7526959022286125, - "grad_norm": 0.41047024726867676, - "learning_rate": 8.691860172988344e-06, - "loss": 0.3369, - "step": 11517 - }, - { - "epoch": 0.7527612574341547, - "grad_norm": 0.4959609806537628, - "learning_rate": 8.691624672476698e-06, - "loss": 0.4684, - "step": 11518 - }, - { - "epoch": 0.7528266126396967, - "grad_norm": 0.5354952216148376, - "learning_rate": 8.691389153959717e-06, - "loss": 0.4638, - "step": 11519 - }, - { - "epoch": 0.7528919678452388, - "grad_norm": 0.42644286155700684, - "learning_rate": 8.691153617438555e-06, - "loss": 0.332, - "step": 11520 - }, - { - "epoch": 0.752957323050781, - "grad_norm": 0.5352382659912109, - "learning_rate": 8.690918062914357e-06, - "loss": 0.3896, - "step": 11521 - }, - { - "epoch": 0.7530226782563231, - "grad_norm": 0.49492889642715454, - "learning_rate": 8.690682490388273e-06, - "loss": 0.4288, - "step": 11522 - }, - { - "epoch": 0.7530880334618653, - "grad_norm": 0.4558436870574951, - "learning_rate": 8.690446899861453e-06, - "loss": 0.3642, - "step": 11523 - }, - { - "epoch": 0.7531533886674073, - "grad_norm": 0.44027870893478394, - "learning_rate": 8.690211291335045e-06, - "loss": 0.3486, - "step": 11524 - }, - { - "epoch": 0.7532187438729495, - "grad_norm": 0.4419024586677551, - "learning_rate": 8.689975664810197e-06, - "loss": 0.3875, - "step": 11525 - }, - { - "epoch": 0.7532840990784916, - "grad_norm": 0.45365065336227417, - "learning_rate": 8.689740020288059e-06, - "loss": 0.3595, - "step": 11526 - }, - { - "epoch": 0.7533494542840338, - "grad_norm": 0.44982078671455383, - "learning_rate": 8.689504357769781e-06, - "loss": 0.325, - "step": 11527 - }, - { - "epoch": 0.7534148094895758, - "grad_norm": 0.45227643847465515, - "learning_rate": 8.689268677256514e-06, - "loss": 0.3648, - "step": 11528 - }, - { - "epoch": 0.7534801646951179, - "grad_norm": 0.4553734362125397, - "learning_rate": 8.689032978749402e-06, - "loss": 0.3971, - "step": 11529 - }, - { - "epoch": 0.7535455199006601, - "grad_norm": 0.4540592432022095, - "learning_rate": 8.6887972622496e-06, - "loss": 0.4231, - "step": 11530 - }, - { - "epoch": 0.7536108751062022, - "grad_norm": 0.3962876498699188, - "learning_rate": 8.688561527758257e-06, - "loss": 0.3009, - "step": 11531 - }, - { - "epoch": 0.7536762303117444, - "grad_norm": 0.4543502926826477, - "learning_rate": 8.68832577527652e-06, - "loss": 0.351, - "step": 11532 - }, - { - "epoch": 0.7537415855172864, - "grad_norm": 0.4949527978897095, - "learning_rate": 8.68809000480554e-06, - "loss": 0.4191, - "step": 11533 - }, - { - "epoch": 0.7538069407228286, - "grad_norm": 0.42827823758125305, - "learning_rate": 8.68785421634647e-06, - "loss": 0.3214, - "step": 11534 - }, - { - "epoch": 0.7538722959283707, - "grad_norm": 0.4728573262691498, - "learning_rate": 8.687618409900455e-06, - "loss": 0.4247, - "step": 11535 - }, - { - "epoch": 0.7539376511339129, - "grad_norm": 0.4322061836719513, - "learning_rate": 8.687382585468648e-06, - "loss": 0.3699, - "step": 11536 - }, - { - "epoch": 0.7540030063394549, - "grad_norm": 0.45697495341300964, - "learning_rate": 8.6871467430522e-06, - "loss": 0.4033, - "step": 11537 - }, - { - "epoch": 0.754068361544997, - "grad_norm": 0.446732759475708, - "learning_rate": 8.686910882652257e-06, - "loss": 0.3816, - "step": 11538 - }, - { - "epoch": 0.7541337167505392, - "grad_norm": 0.4661967158317566, - "learning_rate": 8.686675004269974e-06, - "loss": 0.401, - "step": 11539 - }, - { - "epoch": 0.7541990719560813, - "grad_norm": 0.46498775482177734, - "learning_rate": 8.6864391079065e-06, - "loss": 0.4261, - "step": 11540 - }, - { - "epoch": 0.7542644271616235, - "grad_norm": 0.4342498183250427, - "learning_rate": 8.686203193562985e-06, - "loss": 0.3716, - "step": 11541 - }, - { - "epoch": 0.7543297823671655, - "grad_norm": 0.42994990944862366, - "learning_rate": 8.685967261240583e-06, - "loss": 0.3809, - "step": 11542 - }, - { - "epoch": 0.7543951375727077, - "grad_norm": 0.4623600244522095, - "learning_rate": 8.685731310940437e-06, - "loss": 0.4416, - "step": 11543 - }, - { - "epoch": 0.7544604927782498, - "grad_norm": 0.40724843740463257, - "learning_rate": 8.685495342663706e-06, - "loss": 0.3419, - "step": 11544 - }, - { - "epoch": 0.754525847983792, - "grad_norm": 0.43778303265571594, - "learning_rate": 8.685259356411534e-06, - "loss": 0.3604, - "step": 11545 - }, - { - "epoch": 0.754591203189334, - "grad_norm": 0.4433067739009857, - "learning_rate": 8.685023352185078e-06, - "loss": 0.3869, - "step": 11546 - }, - { - "epoch": 0.7546565583948761, - "grad_norm": 0.4431113302707672, - "learning_rate": 8.684787329985488e-06, - "loss": 0.3949, - "step": 11547 - }, - { - "epoch": 0.7547219136004183, - "grad_norm": 0.44400447607040405, - "learning_rate": 8.684551289813911e-06, - "loss": 0.3815, - "step": 11548 - }, - { - "epoch": 0.7547872688059604, - "grad_norm": 0.474870890378952, - "learning_rate": 8.6843152316715e-06, - "loss": 0.4021, - "step": 11549 - }, - { - "epoch": 0.7548526240115025, - "grad_norm": 0.4529237449169159, - "learning_rate": 8.684079155559411e-06, - "loss": 0.3408, - "step": 11550 - }, - { - "epoch": 0.7549179792170446, - "grad_norm": 0.4417824149131775, - "learning_rate": 8.683843061478789e-06, - "loss": 0.3718, - "step": 11551 - }, - { - "epoch": 0.7549833344225868, - "grad_norm": 0.44871315360069275, - "learning_rate": 8.683606949430788e-06, - "loss": 0.3567, - "step": 11552 - }, - { - "epoch": 0.7550486896281289, - "grad_norm": 0.43852290511131287, - "learning_rate": 8.683370819416561e-06, - "loss": 0.3519, - "step": 11553 - }, - { - "epoch": 0.755114044833671, - "grad_norm": 0.43814173340797424, - "learning_rate": 8.683134671437257e-06, - "loss": 0.3662, - "step": 11554 - }, - { - "epoch": 0.7551794000392131, - "grad_norm": 0.4667989909648895, - "learning_rate": 8.68289850549403e-06, - "loss": 0.4218, - "step": 11555 - }, - { - "epoch": 0.7552447552447552, - "grad_norm": 0.470702588558197, - "learning_rate": 8.68266232158803e-06, - "loss": 0.4268, - "step": 11556 - }, - { - "epoch": 0.7553101104502974, - "grad_norm": 0.42009657621383667, - "learning_rate": 8.682426119720412e-06, - "loss": 0.3181, - "step": 11557 - }, - { - "epoch": 0.7553754656558395, - "grad_norm": 0.43755048513412476, - "learning_rate": 8.682189899892326e-06, - "loss": 0.3637, - "step": 11558 - }, - { - "epoch": 0.7554408208613816, - "grad_norm": 0.4409218430519104, - "learning_rate": 8.681953662104925e-06, - "loss": 0.3784, - "step": 11559 - }, - { - "epoch": 0.7555061760669237, - "grad_norm": 0.40497660636901855, - "learning_rate": 8.681717406359359e-06, - "loss": 0.3233, - "step": 11560 - }, - { - "epoch": 0.7555715312724659, - "grad_norm": 0.42020371556282043, - "learning_rate": 8.681481132656782e-06, - "loss": 0.3554, - "step": 11561 - }, - { - "epoch": 0.755636886478008, - "grad_norm": 0.42821750044822693, - "learning_rate": 8.681244840998347e-06, - "loss": 0.3715, - "step": 11562 - }, - { - "epoch": 0.75570224168355, - "grad_norm": 0.4186302423477173, - "learning_rate": 8.681008531385204e-06, - "loss": 0.3454, - "step": 11563 - }, - { - "epoch": 0.7557675968890922, - "grad_norm": 0.48060446977615356, - "learning_rate": 8.680772203818507e-06, - "loss": 0.3975, - "step": 11564 - }, - { - "epoch": 0.7558329520946343, - "grad_norm": 0.44453316926956177, - "learning_rate": 8.680535858299409e-06, - "loss": 0.3829, - "step": 11565 - }, - { - "epoch": 0.7558983073001765, - "grad_norm": 0.4027644693851471, - "learning_rate": 8.680299494829063e-06, - "loss": 0.343, - "step": 11566 - }, - { - "epoch": 0.7559636625057186, - "grad_norm": 0.4423559010028839, - "learning_rate": 8.680063113408622e-06, - "loss": 0.3849, - "step": 11567 - }, - { - "epoch": 0.7560290177112607, - "grad_norm": 0.4409993886947632, - "learning_rate": 8.679826714039238e-06, - "loss": 0.4017, - "step": 11568 - }, - { - "epoch": 0.7560943729168028, - "grad_norm": 0.4309713840484619, - "learning_rate": 8.679590296722065e-06, - "loss": 0.3557, - "step": 11569 - }, - { - "epoch": 0.756159728122345, - "grad_norm": 0.4162459373474121, - "learning_rate": 8.679353861458252e-06, - "loss": 0.3479, - "step": 11570 - }, - { - "epoch": 0.7562250833278871, - "grad_norm": 0.4345146119594574, - "learning_rate": 8.67911740824896e-06, - "loss": 0.3962, - "step": 11571 - }, - { - "epoch": 0.7562904385334291, - "grad_norm": 0.4284350275993347, - "learning_rate": 8.678880937095336e-06, - "loss": 0.3865, - "step": 11572 - }, - { - "epoch": 0.7563557937389713, - "grad_norm": 0.427379310131073, - "learning_rate": 8.678644447998535e-06, - "loss": 0.3639, - "step": 11573 - }, - { - "epoch": 0.7564211489445134, - "grad_norm": 0.4237443804740906, - "learning_rate": 8.67840794095971e-06, - "loss": 0.3371, - "step": 11574 - }, - { - "epoch": 0.7564865041500556, - "grad_norm": 0.4654266834259033, - "learning_rate": 8.678171415980017e-06, - "loss": 0.3696, - "step": 11575 - }, - { - "epoch": 0.7565518593555977, - "grad_norm": 0.4269372522830963, - "learning_rate": 8.677934873060606e-06, - "loss": 0.352, - "step": 11576 - }, - { - "epoch": 0.7566172145611398, - "grad_norm": 0.44193196296691895, - "learning_rate": 8.677698312202634e-06, - "loss": 0.383, - "step": 11577 - }, - { - "epoch": 0.7566825697666819, - "grad_norm": 0.44568783044815063, - "learning_rate": 8.677461733407251e-06, - "loss": 0.3944, - "step": 11578 - }, - { - "epoch": 0.756747924972224, - "grad_norm": 0.4438956677913666, - "learning_rate": 8.677225136675616e-06, - "loss": 0.3602, - "step": 11579 - }, - { - "epoch": 0.7568132801777662, - "grad_norm": 0.4461032748222351, - "learning_rate": 8.676988522008878e-06, - "loss": 0.3527, - "step": 11580 - }, - { - "epoch": 0.7568786353833082, - "grad_norm": 0.45144277811050415, - "learning_rate": 8.676751889408192e-06, - "loss": 0.3758, - "step": 11581 - }, - { - "epoch": 0.7569439905888504, - "grad_norm": 0.3841136693954468, - "learning_rate": 8.676515238874716e-06, - "loss": 0.3076, - "step": 11582 - }, - { - "epoch": 0.7570093457943925, - "grad_norm": 0.4647257328033447, - "learning_rate": 8.676278570409602e-06, - "loss": 0.4046, - "step": 11583 - }, - { - "epoch": 0.7570747009999347, - "grad_norm": 0.4637050926685333, - "learning_rate": 8.676041884014001e-06, - "loss": 0.385, - "step": 11584 - }, - { - "epoch": 0.7571400562054768, - "grad_norm": 0.4185953736305237, - "learning_rate": 8.675805179689073e-06, - "loss": 0.3431, - "step": 11585 - }, - { - "epoch": 0.7572054114110189, - "grad_norm": 0.4225028157234192, - "learning_rate": 8.675568457435967e-06, - "loss": 0.3503, - "step": 11586 - }, - { - "epoch": 0.757270766616561, - "grad_norm": 0.41787388920783997, - "learning_rate": 8.67533171725584e-06, - "loss": 0.3315, - "step": 11587 - }, - { - "epoch": 0.7573361218221031, - "grad_norm": 0.4110637307167053, - "learning_rate": 8.67509495914985e-06, - "loss": 0.327, - "step": 11588 - }, - { - "epoch": 0.7574014770276453, - "grad_norm": 0.4453466534614563, - "learning_rate": 8.674858183119147e-06, - "loss": 0.3696, - "step": 11589 - }, - { - "epoch": 0.7574668322331873, - "grad_norm": 0.41781923174858093, - "learning_rate": 8.674621389164887e-06, - "loss": 0.3557, - "step": 11590 - }, - { - "epoch": 0.7575321874387295, - "grad_norm": 0.45862486958503723, - "learning_rate": 8.674384577288228e-06, - "loss": 0.3765, - "step": 11591 - }, - { - "epoch": 0.7575975426442716, - "grad_norm": 0.4591810703277588, - "learning_rate": 8.67414774749032e-06, - "loss": 0.4035, - "step": 11592 - }, - { - "epoch": 0.7576628978498138, - "grad_norm": 0.45626866817474365, - "learning_rate": 8.673910899772323e-06, - "loss": 0.3924, - "step": 11593 - }, - { - "epoch": 0.7577282530553558, - "grad_norm": 0.43373769521713257, - "learning_rate": 8.673674034135386e-06, - "loss": 0.382, - "step": 11594 - }, - { - "epoch": 0.757793608260898, - "grad_norm": 0.4272853136062622, - "learning_rate": 8.673437150580671e-06, - "loss": 0.3436, - "step": 11595 - }, - { - "epoch": 0.7578589634664401, - "grad_norm": 0.4375975430011749, - "learning_rate": 8.673200249109329e-06, - "loss": 0.3753, - "step": 11596 - }, - { - "epoch": 0.7579243186719822, - "grad_norm": 0.43419358134269714, - "learning_rate": 8.672963329722518e-06, - "loss": 0.3626, - "step": 11597 - }, - { - "epoch": 0.7579896738775244, - "grad_norm": 0.4664026200771332, - "learning_rate": 8.672726392421391e-06, - "loss": 0.3891, - "step": 11598 - }, - { - "epoch": 0.7580550290830664, - "grad_norm": 0.4879235327243805, - "learning_rate": 8.672489437207106e-06, - "loss": 0.4508, - "step": 11599 - }, - { - "epoch": 0.7581203842886086, - "grad_norm": 0.4599400758743286, - "learning_rate": 8.672252464080817e-06, - "loss": 0.3914, - "step": 11600 - }, - { - "epoch": 0.7581857394941507, - "grad_norm": 0.44503307342529297, - "learning_rate": 8.672015473043683e-06, - "loss": 0.3927, - "step": 11601 - }, - { - "epoch": 0.7582510946996929, - "grad_norm": 0.43716150522232056, - "learning_rate": 8.671778464096855e-06, - "loss": 0.3692, - "step": 11602 - }, - { - "epoch": 0.758316449905235, - "grad_norm": 0.4063815176486969, - "learning_rate": 8.671541437241493e-06, - "loss": 0.3268, - "step": 11603 - }, - { - "epoch": 0.758381805110777, - "grad_norm": 0.4981492757797241, - "learning_rate": 8.671304392478749e-06, - "loss": 0.4306, - "step": 11604 - }, - { - "epoch": 0.7584471603163192, - "grad_norm": 0.43988507986068726, - "learning_rate": 8.671067329809783e-06, - "loss": 0.3765, - "step": 11605 - }, - { - "epoch": 0.7585125155218613, - "grad_norm": 0.4441068172454834, - "learning_rate": 8.67083024923575e-06, - "loss": 0.3436, - "step": 11606 - }, - { - "epoch": 0.7585778707274035, - "grad_norm": 0.47273120284080505, - "learning_rate": 8.670593150757806e-06, - "loss": 0.4095, - "step": 11607 - }, - { - "epoch": 0.7586432259329455, - "grad_norm": 0.4163842499256134, - "learning_rate": 8.670356034377109e-06, - "loss": 0.357, - "step": 11608 - }, - { - "epoch": 0.7587085811384877, - "grad_norm": 0.43506866693496704, - "learning_rate": 8.670118900094812e-06, - "loss": 0.3778, - "step": 11609 - }, - { - "epoch": 0.7587739363440298, - "grad_norm": 0.46837303042411804, - "learning_rate": 8.669881747912074e-06, - "loss": 0.4012, - "step": 11610 - }, - { - "epoch": 0.758839291549572, - "grad_norm": 0.4551672041416168, - "learning_rate": 8.669644577830052e-06, - "loss": 0.3732, - "step": 11611 - }, - { - "epoch": 0.758904646755114, - "grad_norm": 0.4556867182254791, - "learning_rate": 8.669407389849902e-06, - "loss": 0.3865, - "step": 11612 - }, - { - "epoch": 0.7589700019606561, - "grad_norm": 0.6006876826286316, - "learning_rate": 8.66917018397278e-06, - "loss": 0.3906, - "step": 11613 - }, - { - "epoch": 0.7590353571661983, - "grad_norm": 0.4503045976161957, - "learning_rate": 8.668932960199846e-06, - "loss": 0.3708, - "step": 11614 - }, - { - "epoch": 0.7591007123717404, - "grad_norm": 0.42857062816619873, - "learning_rate": 8.668695718532254e-06, - "loss": 0.3392, - "step": 11615 - }, - { - "epoch": 0.7591660675772826, - "grad_norm": 0.4834391176700592, - "learning_rate": 8.668458458971162e-06, - "loss": 0.4009, - "step": 11616 - }, - { - "epoch": 0.7592314227828246, - "grad_norm": 0.4342235326766968, - "learning_rate": 8.668221181517726e-06, - "loss": 0.3548, - "step": 11617 - }, - { - "epoch": 0.7592967779883668, - "grad_norm": 0.49423882365226746, - "learning_rate": 8.667983886173106e-06, - "loss": 0.4, - "step": 11618 - }, - { - "epoch": 0.7593621331939089, - "grad_norm": 0.4574379324913025, - "learning_rate": 8.667746572938458e-06, - "loss": 0.4107, - "step": 11619 - }, - { - "epoch": 0.7594274883994511, - "grad_norm": 0.46529674530029297, - "learning_rate": 8.667509241814938e-06, - "loss": 0.3733, - "step": 11620 - }, - { - "epoch": 0.7594928436049931, - "grad_norm": 0.41194331645965576, - "learning_rate": 8.667271892803706e-06, - "loss": 0.3436, - "step": 11621 - }, - { - "epoch": 0.7595581988105352, - "grad_norm": 0.412412166595459, - "learning_rate": 8.667034525905918e-06, - "loss": 0.336, - "step": 11622 - }, - { - "epoch": 0.7596235540160774, - "grad_norm": 0.43101462721824646, - "learning_rate": 8.666797141122731e-06, - "loss": 0.3668, - "step": 11623 - }, - { - "epoch": 0.7596889092216195, - "grad_norm": 0.4349445700645447, - "learning_rate": 8.666559738455306e-06, - "loss": 0.3421, - "step": 11624 - }, - { - "epoch": 0.7597542644271617, - "grad_norm": 0.4164186716079712, - "learning_rate": 8.666322317904798e-06, - "loss": 0.3516, - "step": 11625 - }, - { - "epoch": 0.7598196196327037, - "grad_norm": 0.4714847505092621, - "learning_rate": 8.666084879472367e-06, - "loss": 0.3593, - "step": 11626 - }, - { - "epoch": 0.7598849748382459, - "grad_norm": 0.4621010720729828, - "learning_rate": 8.665847423159168e-06, - "loss": 0.3688, - "step": 11627 - }, - { - "epoch": 0.759950330043788, - "grad_norm": 0.41812488436698914, - "learning_rate": 8.665609948966363e-06, - "loss": 0.3475, - "step": 11628 - }, - { - "epoch": 0.7600156852493302, - "grad_norm": 0.4732639789581299, - "learning_rate": 8.665372456895108e-06, - "loss": 0.3967, - "step": 11629 - }, - { - "epoch": 0.7600810404548722, - "grad_norm": 0.5000624060630798, - "learning_rate": 8.66513494694656e-06, - "loss": 0.4287, - "step": 11630 - }, - { - "epoch": 0.7601463956604143, - "grad_norm": 0.44705361127853394, - "learning_rate": 8.664897419121881e-06, - "loss": 0.366, - "step": 11631 - }, - { - "epoch": 0.7602117508659565, - "grad_norm": 0.4290550649166107, - "learning_rate": 8.664659873422228e-06, - "loss": 0.3688, - "step": 11632 - }, - { - "epoch": 0.7602771060714986, - "grad_norm": 0.5169782638549805, - "learning_rate": 8.664422309848758e-06, - "loss": 0.4368, - "step": 11633 - }, - { - "epoch": 0.7603424612770407, - "grad_norm": 0.42105633020401, - "learning_rate": 8.66418472840263e-06, - "loss": 0.353, - "step": 11634 - }, - { - "epoch": 0.7604078164825828, - "grad_norm": 0.4356044828891754, - "learning_rate": 8.663947129085006e-06, - "loss": 0.3447, - "step": 11635 - }, - { - "epoch": 0.760473171688125, - "grad_norm": 0.44438812136650085, - "learning_rate": 8.663709511897043e-06, - "loss": 0.3641, - "step": 11636 - }, - { - "epoch": 0.7605385268936671, - "grad_norm": 0.5030198097229004, - "learning_rate": 8.663471876839898e-06, - "loss": 0.4122, - "step": 11637 - }, - { - "epoch": 0.7606038820992091, - "grad_norm": 0.4163077175617218, - "learning_rate": 8.663234223914732e-06, - "loss": 0.2977, - "step": 11638 - }, - { - "epoch": 0.7606692373047513, - "grad_norm": 0.48841458559036255, - "learning_rate": 8.662996553122702e-06, - "loss": 0.3781, - "step": 11639 - }, - { - "epoch": 0.7607345925102934, - "grad_norm": 0.42547884583473206, - "learning_rate": 8.662758864464971e-06, - "loss": 0.3418, - "step": 11640 - }, - { - "epoch": 0.7607999477158356, - "grad_norm": 0.39035564661026, - "learning_rate": 8.662521157942694e-06, - "loss": 0.2919, - "step": 11641 - }, - { - "epoch": 0.7608653029213777, - "grad_norm": 0.47192656993865967, - "learning_rate": 8.662283433557033e-06, - "loss": 0.413, - "step": 11642 - }, - { - "epoch": 0.7609306581269198, - "grad_norm": 0.44570621848106384, - "learning_rate": 8.66204569130915e-06, - "loss": 0.4303, - "step": 11643 - }, - { - "epoch": 0.7609960133324619, - "grad_norm": 0.45997560024261475, - "learning_rate": 8.661807931200199e-06, - "loss": 0.3719, - "step": 11644 - }, - { - "epoch": 0.7610613685380041, - "grad_norm": 0.47064775228500366, - "learning_rate": 8.66157015323134e-06, - "loss": 0.4037, - "step": 11645 - }, - { - "epoch": 0.7611267237435462, - "grad_norm": 0.42042768001556396, - "learning_rate": 8.661332357403738e-06, - "loss": 0.3108, - "step": 11646 - }, - { - "epoch": 0.7611920789490882, - "grad_norm": 0.45020681619644165, - "learning_rate": 8.66109454371855e-06, - "loss": 0.3675, - "step": 11647 - }, - { - "epoch": 0.7612574341546304, - "grad_norm": 0.4344450831413269, - "learning_rate": 8.660856712176933e-06, - "loss": 0.3771, - "step": 11648 - }, - { - "epoch": 0.7613227893601725, - "grad_norm": 0.43782344460487366, - "learning_rate": 8.660618862780051e-06, - "loss": 0.3482, - "step": 11649 - }, - { - "epoch": 0.7613881445657147, - "grad_norm": 0.45356184244155884, - "learning_rate": 8.660380995529063e-06, - "loss": 0.3907, - "step": 11650 - }, - { - "epoch": 0.7614534997712568, - "grad_norm": 0.4710615277290344, - "learning_rate": 8.660143110425127e-06, - "loss": 0.3906, - "step": 11651 - }, - { - "epoch": 0.7615188549767989, - "grad_norm": 0.431894987821579, - "learning_rate": 8.659905207469408e-06, - "loss": 0.3656, - "step": 11652 - }, - { - "epoch": 0.761584210182341, - "grad_norm": 0.4785623550415039, - "learning_rate": 8.659667286663062e-06, - "loss": 0.4397, - "step": 11653 - }, - { - "epoch": 0.7616495653878832, - "grad_norm": 0.4310261309146881, - "learning_rate": 8.65942934800725e-06, - "loss": 0.3557, - "step": 11654 - }, - { - "epoch": 0.7617149205934253, - "grad_norm": 0.436513751745224, - "learning_rate": 8.659191391503135e-06, - "loss": 0.3647, - "step": 11655 - }, - { - "epoch": 0.7617802757989673, - "grad_norm": 0.44582149386405945, - "learning_rate": 8.658953417151874e-06, - "loss": 0.4017, - "step": 11656 - }, - { - "epoch": 0.7618456310045095, - "grad_norm": 0.5000804662704468, - "learning_rate": 8.65871542495463e-06, - "loss": 0.4561, - "step": 11657 - }, - { - "epoch": 0.7619109862100516, - "grad_norm": 0.46625787019729614, - "learning_rate": 8.658477414912564e-06, - "loss": 0.4236, - "step": 11658 - }, - { - "epoch": 0.7619763414155938, - "grad_norm": 0.4202720522880554, - "learning_rate": 8.658239387026836e-06, - "loss": 0.3506, - "step": 11659 - }, - { - "epoch": 0.7620416966211359, - "grad_norm": 0.44120508432388306, - "learning_rate": 8.658001341298608e-06, - "loss": 0.3746, - "step": 11660 - }, - { - "epoch": 0.762107051826678, - "grad_norm": 0.49611538648605347, - "learning_rate": 8.65776327772904e-06, - "loss": 0.4009, - "step": 11661 - }, - { - "epoch": 0.7621724070322201, - "grad_norm": 0.46743810176849365, - "learning_rate": 8.657525196319292e-06, - "loss": 0.4368, - "step": 11662 - }, - { - "epoch": 0.7622377622377622, - "grad_norm": 0.40163370966911316, - "learning_rate": 8.657287097070528e-06, - "loss": 0.336, - "step": 11663 - }, - { - "epoch": 0.7623031174433044, - "grad_norm": 0.44429242610931396, - "learning_rate": 8.657048979983906e-06, - "loss": 0.418, - "step": 11664 - }, - { - "epoch": 0.7623684726488464, - "grad_norm": 0.5027108192443848, - "learning_rate": 8.656810845060591e-06, - "loss": 0.4699, - "step": 11665 - }, - { - "epoch": 0.7624338278543886, - "grad_norm": 0.4208289384841919, - "learning_rate": 8.656572692301742e-06, - "loss": 0.3212, - "step": 11666 - }, - { - "epoch": 0.7624991830599307, - "grad_norm": 0.41135329008102417, - "learning_rate": 8.65633452170852e-06, - "loss": 0.345, - "step": 11667 - }, - { - "epoch": 0.7625645382654729, - "grad_norm": 0.45532602071762085, - "learning_rate": 8.656096333282092e-06, - "loss": 0.3948, - "step": 11668 - }, - { - "epoch": 0.762629893471015, - "grad_norm": 0.477816104888916, - "learning_rate": 8.65585812702361e-06, - "loss": 0.4097, - "step": 11669 - }, - { - "epoch": 0.7626952486765571, - "grad_norm": 0.4528370201587677, - "learning_rate": 8.655619902934244e-06, - "loss": 0.3611, - "step": 11670 - }, - { - "epoch": 0.7627606038820992, - "grad_norm": 0.4283353090286255, - "learning_rate": 8.655381661015154e-06, - "loss": 0.333, - "step": 11671 - }, - { - "epoch": 0.7628259590876413, - "grad_norm": 0.46747609972953796, - "learning_rate": 8.6551434012675e-06, - "loss": 0.3931, - "step": 11672 - }, - { - "epoch": 0.7628913142931835, - "grad_norm": 0.40733906626701355, - "learning_rate": 8.654905123692448e-06, - "loss": 0.3223, - "step": 11673 - }, - { - "epoch": 0.7629566694987255, - "grad_norm": 0.4334840774536133, - "learning_rate": 8.654666828291155e-06, - "loss": 0.3357, - "step": 11674 - }, - { - "epoch": 0.7630220247042677, - "grad_norm": 0.4467930197715759, - "learning_rate": 8.654428515064787e-06, - "loss": 0.356, - "step": 11675 - }, - { - "epoch": 0.7630873799098098, - "grad_norm": 0.4429667592048645, - "learning_rate": 8.654190184014503e-06, - "loss": 0.3475, - "step": 11676 - }, - { - "epoch": 0.763152735115352, - "grad_norm": 0.4922010898590088, - "learning_rate": 8.65395183514147e-06, - "loss": 0.4329, - "step": 11677 - }, - { - "epoch": 0.763218090320894, - "grad_norm": 0.4463573694229126, - "learning_rate": 8.653713468446847e-06, - "loss": 0.3797, - "step": 11678 - }, - { - "epoch": 0.7632834455264362, - "grad_norm": 0.4786367118358612, - "learning_rate": 8.653475083931798e-06, - "loss": 0.4105, - "step": 11679 - }, - { - "epoch": 0.7633488007319783, - "grad_norm": 0.5591999888420105, - "learning_rate": 8.653236681597486e-06, - "loss": 0.3655, - "step": 11680 - }, - { - "epoch": 0.7634141559375204, - "grad_norm": 0.4530841112136841, - "learning_rate": 8.652998261445072e-06, - "loss": 0.3665, - "step": 11681 - }, - { - "epoch": 0.7634795111430626, - "grad_norm": 0.46425333619117737, - "learning_rate": 8.65275982347572e-06, - "loss": 0.415, - "step": 11682 - }, - { - "epoch": 0.7635448663486046, - "grad_norm": 0.4381583333015442, - "learning_rate": 8.652521367690592e-06, - "loss": 0.3453, - "step": 11683 - }, - { - "epoch": 0.7636102215541468, - "grad_norm": 0.4222116470336914, - "learning_rate": 8.652282894090853e-06, - "loss": 0.3456, - "step": 11684 - }, - { - "epoch": 0.7636755767596889, - "grad_norm": 0.4758513271808624, - "learning_rate": 8.652044402677666e-06, - "loss": 0.4071, - "step": 11685 - }, - { - "epoch": 0.7637409319652311, - "grad_norm": 0.43865343928337097, - "learning_rate": 8.651805893452192e-06, - "loss": 0.3294, - "step": 11686 - }, - { - "epoch": 0.7638062871707731, - "grad_norm": 0.47241854667663574, - "learning_rate": 8.651567366415596e-06, - "loss": 0.3922, - "step": 11687 - }, - { - "epoch": 0.7638716423763152, - "grad_norm": 0.5159560441970825, - "learning_rate": 8.651328821569041e-06, - "loss": 0.3647, - "step": 11688 - }, - { - "epoch": 0.7639369975818574, - "grad_norm": 0.45398226380348206, - "learning_rate": 8.65109025891369e-06, - "loss": 0.3486, - "step": 11689 - }, - { - "epoch": 0.7640023527873995, - "grad_norm": 0.45617467164993286, - "learning_rate": 8.650851678450707e-06, - "loss": 0.3656, - "step": 11690 - }, - { - "epoch": 0.7640677079929417, - "grad_norm": 0.4122573733329773, - "learning_rate": 8.650613080181256e-06, - "loss": 0.3143, - "step": 11691 - }, - { - "epoch": 0.7641330631984837, - "grad_norm": 0.45565274357795715, - "learning_rate": 8.650374464106499e-06, - "loss": 0.3446, - "step": 11692 - }, - { - "epoch": 0.7641984184040259, - "grad_norm": 0.43330758810043335, - "learning_rate": 8.650135830227601e-06, - "loss": 0.3757, - "step": 11693 - }, - { - "epoch": 0.764263773609568, - "grad_norm": 0.4667772650718689, - "learning_rate": 8.64989717854573e-06, - "loss": 0.3864, - "step": 11694 - }, - { - "epoch": 0.7643291288151102, - "grad_norm": 0.5273546576499939, - "learning_rate": 8.649658509062042e-06, - "loss": 0.4488, - "step": 11695 - }, - { - "epoch": 0.7643944840206522, - "grad_norm": 0.515292763710022, - "learning_rate": 8.649419821777705e-06, - "loss": 0.4618, - "step": 11696 - }, - { - "epoch": 0.7644598392261943, - "grad_norm": 0.4481314420700073, - "learning_rate": 8.649181116693886e-06, - "loss": 0.3739, - "step": 11697 - }, - { - "epoch": 0.7645251944317365, - "grad_norm": 0.45073914527893066, - "learning_rate": 8.648942393811744e-06, - "loss": 0.3912, - "step": 11698 - }, - { - "epoch": 0.7645905496372786, - "grad_norm": 0.5078251957893372, - "learning_rate": 8.648703653132447e-06, - "loss": 0.3904, - "step": 11699 - }, - { - "epoch": 0.7646559048428208, - "grad_norm": 0.39665457606315613, - "learning_rate": 8.648464894657158e-06, - "loss": 0.3211, - "step": 11700 - }, - { - "epoch": 0.7647212600483628, - "grad_norm": 0.45499134063720703, - "learning_rate": 8.648226118387041e-06, - "loss": 0.3692, - "step": 11701 - }, - { - "epoch": 0.764786615253905, - "grad_norm": 0.4568077623844147, - "learning_rate": 8.647987324323264e-06, - "loss": 0.3588, - "step": 11702 - }, - { - "epoch": 0.7648519704594471, - "grad_norm": 0.46008485555648804, - "learning_rate": 8.647748512466986e-06, - "loss": 0.4029, - "step": 11703 - }, - { - "epoch": 0.7649173256649893, - "grad_norm": 0.45498141646385193, - "learning_rate": 8.647509682819377e-06, - "loss": 0.3804, - "step": 11704 - }, - { - "epoch": 0.7649826808705313, - "grad_norm": 0.4846314787864685, - "learning_rate": 8.647270835381598e-06, - "loss": 0.4366, - "step": 11705 - }, - { - "epoch": 0.7650480360760734, - "grad_norm": 0.44207775592803955, - "learning_rate": 8.647031970154817e-06, - "loss": 0.3541, - "step": 11706 - }, - { - "epoch": 0.7651133912816156, - "grad_norm": 0.44153493642807007, - "learning_rate": 8.646793087140197e-06, - "loss": 0.3694, - "step": 11707 - }, - { - "epoch": 0.7651787464871577, - "grad_norm": 0.4551956057548523, - "learning_rate": 8.646554186338902e-06, - "loss": 0.3745, - "step": 11708 - }, - { - "epoch": 0.7652441016926999, - "grad_norm": 0.499629408121109, - "learning_rate": 8.646315267752102e-06, - "loss": 0.4758, - "step": 11709 - }, - { - "epoch": 0.7653094568982419, - "grad_norm": 0.44776612520217896, - "learning_rate": 8.646076331380957e-06, - "loss": 0.3833, - "step": 11710 - }, - { - "epoch": 0.7653748121037841, - "grad_norm": 0.48136380314826965, - "learning_rate": 8.645837377226635e-06, - "loss": 0.4217, - "step": 11711 - }, - { - "epoch": 0.7654401673093262, - "grad_norm": 0.4859139919281006, - "learning_rate": 8.645598405290303e-06, - "loss": 0.4277, - "step": 11712 - }, - { - "epoch": 0.7655055225148684, - "grad_norm": 0.4509076774120331, - "learning_rate": 8.645359415573122e-06, - "loss": 0.3796, - "step": 11713 - }, - { - "epoch": 0.7655708777204104, - "grad_norm": 0.4640675187110901, - "learning_rate": 8.645120408076262e-06, - "loss": 0.4104, - "step": 11714 - }, - { - "epoch": 0.7656362329259525, - "grad_norm": 0.4932800233364105, - "learning_rate": 8.644881382800888e-06, - "loss": 0.4048, - "step": 11715 - }, - { - "epoch": 0.7657015881314947, - "grad_norm": 0.431494802236557, - "learning_rate": 8.644642339748161e-06, - "loss": 0.3651, - "step": 11716 - }, - { - "epoch": 0.7657669433370368, - "grad_norm": 0.4577261209487915, - "learning_rate": 8.644403278919254e-06, - "loss": 0.3665, - "step": 11717 - }, - { - "epoch": 0.765832298542579, - "grad_norm": 0.4488811194896698, - "learning_rate": 8.644164200315327e-06, - "loss": 0.3928, - "step": 11718 - }, - { - "epoch": 0.765897653748121, - "grad_norm": 0.45216381549835205, - "learning_rate": 8.643925103937552e-06, - "loss": 0.3864, - "step": 11719 - }, - { - "epoch": 0.7659630089536632, - "grad_norm": 0.4638056755065918, - "learning_rate": 8.64368598978709e-06, - "loss": 0.4073, - "step": 11720 - }, - { - "epoch": 0.7660283641592053, - "grad_norm": 0.4283357262611389, - "learning_rate": 8.64344685786511e-06, - "loss": 0.3315, - "step": 11721 - }, - { - "epoch": 0.7660937193647473, - "grad_norm": 0.4663817286491394, - "learning_rate": 8.643207708172776e-06, - "loss": 0.4222, - "step": 11722 - }, - { - "epoch": 0.7661590745702895, - "grad_norm": 0.4846576452255249, - "learning_rate": 8.642968540711257e-06, - "loss": 0.4261, - "step": 11723 - }, - { - "epoch": 0.7662244297758316, - "grad_norm": 0.4734724164009094, - "learning_rate": 8.642729355481719e-06, - "loss": 0.3842, - "step": 11724 - }, - { - "epoch": 0.7662897849813738, - "grad_norm": 0.4373061954975128, - "learning_rate": 8.642490152485326e-06, - "loss": 0.3705, - "step": 11725 - }, - { - "epoch": 0.7663551401869159, - "grad_norm": 0.4628601670265198, - "learning_rate": 8.642250931723247e-06, - "loss": 0.395, - "step": 11726 - }, - { - "epoch": 0.766420495392458, - "grad_norm": 0.4402123987674713, - "learning_rate": 8.64201169319665e-06, - "loss": 0.3592, - "step": 11727 - }, - { - "epoch": 0.7664858505980001, - "grad_norm": 0.4120115637779236, - "learning_rate": 8.641772436906698e-06, - "loss": 0.3524, - "step": 11728 - }, - { - "epoch": 0.7665512058035423, - "grad_norm": 0.48394501209259033, - "learning_rate": 8.641533162854561e-06, - "loss": 0.4382, - "step": 11729 - }, - { - "epoch": 0.7666165610090844, - "grad_norm": 0.47838151454925537, - "learning_rate": 8.641293871041407e-06, - "loss": 0.3989, - "step": 11730 - }, - { - "epoch": 0.7666819162146264, - "grad_norm": 0.42966246604919434, - "learning_rate": 8.6410545614684e-06, - "loss": 0.3814, - "step": 11731 - }, - { - "epoch": 0.7667472714201686, - "grad_norm": 0.4653118848800659, - "learning_rate": 8.640815234136708e-06, - "loss": 0.4118, - "step": 11732 - }, - { - "epoch": 0.7668126266257107, - "grad_norm": 0.44381260871887207, - "learning_rate": 8.6405758890475e-06, - "loss": 0.3608, - "step": 11733 - }, - { - "epoch": 0.7668779818312529, - "grad_norm": 0.45664727687835693, - "learning_rate": 8.640336526201942e-06, - "loss": 0.4156, - "step": 11734 - }, - { - "epoch": 0.766943337036795, - "grad_norm": 0.45807671546936035, - "learning_rate": 8.6400971456012e-06, - "loss": 0.3962, - "step": 11735 - }, - { - "epoch": 0.7670086922423371, - "grad_norm": 0.43573200702667236, - "learning_rate": 8.639857747246444e-06, - "loss": 0.3771, - "step": 11736 - }, - { - "epoch": 0.7670740474478792, - "grad_norm": 0.4622085690498352, - "learning_rate": 8.639618331138842e-06, - "loss": 0.405, - "step": 11737 - }, - { - "epoch": 0.7671394026534214, - "grad_norm": 0.4761989712715149, - "learning_rate": 8.63937889727956e-06, - "loss": 0.4079, - "step": 11738 - }, - { - "epoch": 0.7672047578589635, - "grad_norm": 0.4308473765850067, - "learning_rate": 8.639139445669765e-06, - "loss": 0.3626, - "step": 11739 - }, - { - "epoch": 0.7672701130645055, - "grad_norm": 0.46983495354652405, - "learning_rate": 8.638899976310628e-06, - "loss": 0.4076, - "step": 11740 - }, - { - "epoch": 0.7673354682700477, - "grad_norm": 0.43866461515426636, - "learning_rate": 8.638660489203314e-06, - "loss": 0.3467, - "step": 11741 - }, - { - "epoch": 0.7674008234755898, - "grad_norm": 0.46919476985931396, - "learning_rate": 8.638420984348992e-06, - "loss": 0.4043, - "step": 11742 - }, - { - "epoch": 0.767466178681132, - "grad_norm": 0.508739173412323, - "learning_rate": 8.638181461748831e-06, - "loss": 0.4371, - "step": 11743 - }, - { - "epoch": 0.7675315338866741, - "grad_norm": 0.4344213008880615, - "learning_rate": 8.637941921403998e-06, - "loss": 0.3567, - "step": 11744 - }, - { - "epoch": 0.7675968890922162, - "grad_norm": 0.445400208234787, - "learning_rate": 8.637702363315663e-06, - "loss": 0.399, - "step": 11745 - }, - { - "epoch": 0.7676622442977583, - "grad_norm": 0.43348976969718933, - "learning_rate": 8.637462787484994e-06, - "loss": 0.3566, - "step": 11746 - }, - { - "epoch": 0.7677275995033004, - "grad_norm": 0.47843995690345764, - "learning_rate": 8.637223193913157e-06, - "loss": 0.412, - "step": 11747 - }, - { - "epoch": 0.7677929547088426, - "grad_norm": 0.5175793170928955, - "learning_rate": 8.636983582601324e-06, - "loss": 0.4553, - "step": 11748 - }, - { - "epoch": 0.7678583099143846, - "grad_norm": 0.4482249617576599, - "learning_rate": 8.636743953550662e-06, - "loss": 0.3796, - "step": 11749 - }, - { - "epoch": 0.7679236651199268, - "grad_norm": 0.43500909209251404, - "learning_rate": 8.636504306762339e-06, - "loss": 0.3559, - "step": 11750 - }, - { - "epoch": 0.7679890203254689, - "grad_norm": 0.4549643397331238, - "learning_rate": 8.636264642237523e-06, - "loss": 0.3493, - "step": 11751 - }, - { - "epoch": 0.7680543755310111, - "grad_norm": 0.4634072184562683, - "learning_rate": 8.636024959977387e-06, - "loss": 0.3774, - "step": 11752 - }, - { - "epoch": 0.7681197307365532, - "grad_norm": 0.4879359304904938, - "learning_rate": 8.635785259983097e-06, - "loss": 0.4071, - "step": 11753 - }, - { - "epoch": 0.7681850859420953, - "grad_norm": 0.4796138107776642, - "learning_rate": 8.635545542255823e-06, - "loss": 0.3805, - "step": 11754 - }, - { - "epoch": 0.7682504411476374, - "grad_norm": 0.4613151550292969, - "learning_rate": 8.635305806796733e-06, - "loss": 0.4339, - "step": 11755 - }, - { - "epoch": 0.7683157963531795, - "grad_norm": 0.5890063047409058, - "learning_rate": 8.635066053607e-06, - "loss": 0.4169, - "step": 11756 - }, - { - "epoch": 0.7683811515587217, - "grad_norm": 0.45519086718559265, - "learning_rate": 8.634826282687787e-06, - "loss": 0.3775, - "step": 11757 - }, - { - "epoch": 0.7684465067642637, - "grad_norm": 0.45684412121772766, - "learning_rate": 8.63458649404027e-06, - "loss": 0.3844, - "step": 11758 - }, - { - "epoch": 0.7685118619698059, - "grad_norm": 0.42663997411727905, - "learning_rate": 8.634346687665613e-06, - "loss": 0.3461, - "step": 11759 - }, - { - "epoch": 0.768577217175348, - "grad_norm": 0.45491963624954224, - "learning_rate": 8.634106863564988e-06, - "loss": 0.4126, - "step": 11760 - }, - { - "epoch": 0.7686425723808902, - "grad_norm": 0.4103669226169586, - "learning_rate": 8.633867021739567e-06, - "loss": 0.3376, - "step": 11761 - }, - { - "epoch": 0.7687079275864322, - "grad_norm": 0.46748030185699463, - "learning_rate": 8.633627162190516e-06, - "loss": 0.4257, - "step": 11762 - }, - { - "epoch": 0.7687732827919744, - "grad_norm": 0.4791647493839264, - "learning_rate": 8.633387284919007e-06, - "loss": 0.416, - "step": 11763 - }, - { - "epoch": 0.7688386379975165, - "grad_norm": 0.4199827015399933, - "learning_rate": 8.63314738992621e-06, - "loss": 0.3352, - "step": 11764 - }, - { - "epoch": 0.7689039932030586, - "grad_norm": 0.4442897439002991, - "learning_rate": 8.632907477213293e-06, - "loss": 0.3389, - "step": 11765 - }, - { - "epoch": 0.7689693484086008, - "grad_norm": 0.45387426018714905, - "learning_rate": 8.63266754678143e-06, - "loss": 0.3596, - "step": 11766 - }, - { - "epoch": 0.7690347036141428, - "grad_norm": 0.4821358323097229, - "learning_rate": 8.632427598631787e-06, - "loss": 0.4174, - "step": 11767 - }, - { - "epoch": 0.769100058819685, - "grad_norm": 0.4582551419734955, - "learning_rate": 8.632187632765538e-06, - "loss": 0.3776, - "step": 11768 - }, - { - "epoch": 0.7691654140252271, - "grad_norm": 0.4344165027141571, - "learning_rate": 8.63194764918385e-06, - "loss": 0.3635, - "step": 11769 - }, - { - "epoch": 0.7692307692307693, - "grad_norm": 0.5245004892349243, - "learning_rate": 8.631707647887895e-06, - "loss": 0.3813, - "step": 11770 - }, - { - "epoch": 0.7692961244363113, - "grad_norm": 0.4463549256324768, - "learning_rate": 8.631467628878844e-06, - "loss": 0.368, - "step": 11771 - }, - { - "epoch": 0.7693614796418534, - "grad_norm": 0.43788066506385803, - "learning_rate": 8.631227592157869e-06, - "loss": 0.349, - "step": 11772 - }, - { - "epoch": 0.7694268348473956, - "grad_norm": 0.5027048587799072, - "learning_rate": 8.630987537726136e-06, - "loss": 0.4162, - "step": 11773 - }, - { - "epoch": 0.7694921900529377, - "grad_norm": 0.4503048062324524, - "learning_rate": 8.630747465584821e-06, - "loss": 0.3879, - "step": 11774 - }, - { - "epoch": 0.7695575452584799, - "grad_norm": 0.45998138189315796, - "learning_rate": 8.630507375735093e-06, - "loss": 0.3644, - "step": 11775 - }, - { - "epoch": 0.7696229004640219, - "grad_norm": 0.4338878393173218, - "learning_rate": 8.630267268178121e-06, - "loss": 0.3608, - "step": 11776 - }, - { - "epoch": 0.7696882556695641, - "grad_norm": 0.42915183305740356, - "learning_rate": 8.630027142915081e-06, - "loss": 0.353, - "step": 11777 - }, - { - "epoch": 0.7697536108751062, - "grad_norm": 0.49270331859588623, - "learning_rate": 8.629786999947138e-06, - "loss": 0.4232, - "step": 11778 - }, - { - "epoch": 0.7698189660806484, - "grad_norm": 0.3997019827365875, - "learning_rate": 8.629546839275467e-06, - "loss": 0.3377, - "step": 11779 - }, - { - "epoch": 0.7698843212861904, - "grad_norm": 0.44041168689727783, - "learning_rate": 8.62930666090124e-06, - "loss": 0.4241, - "step": 11780 - }, - { - "epoch": 0.7699496764917325, - "grad_norm": 0.43760037422180176, - "learning_rate": 8.629066464825625e-06, - "loss": 0.38, - "step": 11781 - }, - { - "epoch": 0.7700150316972747, - "grad_norm": 0.38996458053588867, - "learning_rate": 8.628826251049797e-06, - "loss": 0.3225, - "step": 11782 - }, - { - "epoch": 0.7700803869028168, - "grad_norm": 0.465849369764328, - "learning_rate": 8.628586019574927e-06, - "loss": 0.3934, - "step": 11783 - }, - { - "epoch": 0.770145742108359, - "grad_norm": 0.41999539732933044, - "learning_rate": 8.628345770402185e-06, - "loss": 0.3513, - "step": 11784 - }, - { - "epoch": 0.770211097313901, - "grad_norm": 0.40005627274513245, - "learning_rate": 8.628105503532742e-06, - "loss": 0.3082, - "step": 11785 - }, - { - "epoch": 0.7702764525194432, - "grad_norm": 0.4402957260608673, - "learning_rate": 8.627865218967775e-06, - "loss": 0.3717, - "step": 11786 - }, - { - "epoch": 0.7703418077249853, - "grad_norm": 0.45970413088798523, - "learning_rate": 8.62762491670845e-06, - "loss": 0.3922, - "step": 11787 - }, - { - "epoch": 0.7704071629305275, - "grad_norm": 0.4797110855579376, - "learning_rate": 8.627384596755942e-06, - "loss": 0.4188, - "step": 11788 - }, - { - "epoch": 0.7704725181360695, - "grad_norm": 0.45043545961380005, - "learning_rate": 8.627144259111423e-06, - "loss": 0.3721, - "step": 11789 - }, - { - "epoch": 0.7705378733416116, - "grad_norm": 0.45030996203422546, - "learning_rate": 8.626903903776064e-06, - "loss": 0.3714, - "step": 11790 - }, - { - "epoch": 0.7706032285471538, - "grad_norm": 0.4351586103439331, - "learning_rate": 8.62666353075104e-06, - "loss": 0.3561, - "step": 11791 - }, - { - "epoch": 0.7706685837526959, - "grad_norm": 0.4533548653125763, - "learning_rate": 8.626423140037522e-06, - "loss": 0.3344, - "step": 11792 - }, - { - "epoch": 0.770733938958238, - "grad_norm": 0.46541261672973633, - "learning_rate": 8.626182731636678e-06, - "loss": 0.3848, - "step": 11793 - }, - { - "epoch": 0.7707992941637801, - "grad_norm": 0.45900705456733704, - "learning_rate": 8.625942305549688e-06, - "loss": 0.3664, - "step": 11794 - }, - { - "epoch": 0.7708646493693223, - "grad_norm": 0.4331720769405365, - "learning_rate": 8.625701861777721e-06, - "loss": 0.3679, - "step": 11795 - }, - { - "epoch": 0.7709300045748644, - "grad_norm": 0.4515995979309082, - "learning_rate": 8.62546140032195e-06, - "loss": 0.3557, - "step": 11796 - }, - { - "epoch": 0.7709953597804066, - "grad_norm": 0.44696739315986633, - "learning_rate": 8.625220921183546e-06, - "loss": 0.3791, - "step": 11797 - }, - { - "epoch": 0.7710607149859486, - "grad_norm": 0.4370102882385254, - "learning_rate": 8.624980424363684e-06, - "loss": 0.3438, - "step": 11798 - }, - { - "epoch": 0.7711260701914907, - "grad_norm": 0.4815715551376343, - "learning_rate": 8.62473990986354e-06, - "loss": 0.4063, - "step": 11799 - }, - { - "epoch": 0.7711914253970329, - "grad_norm": 0.4522111117839813, - "learning_rate": 8.624499377684279e-06, - "loss": 0.4132, - "step": 11800 - }, - { - "epoch": 0.771256780602575, - "grad_norm": 0.43564996123313904, - "learning_rate": 8.624258827827082e-06, - "loss": 0.3764, - "step": 11801 - }, - { - "epoch": 0.7713221358081171, - "grad_norm": 0.42831745743751526, - "learning_rate": 8.624018260293118e-06, - "loss": 0.367, - "step": 11802 - }, - { - "epoch": 0.7713874910136592, - "grad_norm": 0.44943052530288696, - "learning_rate": 8.623777675083562e-06, - "loss": 0.3461, - "step": 11803 - }, - { - "epoch": 0.7714528462192014, - "grad_norm": 0.5182769894599915, - "learning_rate": 8.623537072199587e-06, - "loss": 0.4253, - "step": 11804 - }, - { - "epoch": 0.7715182014247435, - "grad_norm": 0.4539603888988495, - "learning_rate": 8.623296451642365e-06, - "loss": 0.4197, - "step": 11805 - }, - { - "epoch": 0.7715835566302855, - "grad_norm": 0.4099823832511902, - "learning_rate": 8.623055813413072e-06, - "loss": 0.3466, - "step": 11806 - }, - { - "epoch": 0.7716489118358277, - "grad_norm": 0.44863128662109375, - "learning_rate": 8.62281515751288e-06, - "loss": 0.3664, - "step": 11807 - }, - { - "epoch": 0.7717142670413698, - "grad_norm": 0.7694123387336731, - "learning_rate": 8.622574483942965e-06, - "loss": 0.3734, - "step": 11808 - }, - { - "epoch": 0.771779622246912, - "grad_norm": 0.43434423208236694, - "learning_rate": 8.622333792704499e-06, - "loss": 0.3694, - "step": 11809 - }, - { - "epoch": 0.7718449774524541, - "grad_norm": 0.42186304926872253, - "learning_rate": 8.622093083798654e-06, - "loss": 0.3346, - "step": 11810 - }, - { - "epoch": 0.7719103326579962, - "grad_norm": 0.4894380569458008, - "learning_rate": 8.621852357226608e-06, - "loss": 0.4371, - "step": 11811 - }, - { - "epoch": 0.7719756878635383, - "grad_norm": 0.44940561056137085, - "learning_rate": 8.621611612989533e-06, - "loss": 0.4001, - "step": 11812 - }, - { - "epoch": 0.7720410430690805, - "grad_norm": 0.4715287387371063, - "learning_rate": 8.621370851088603e-06, - "loss": 0.3795, - "step": 11813 - }, - { - "epoch": 0.7721063982746226, - "grad_norm": 0.43742066621780396, - "learning_rate": 8.621130071524995e-06, - "loss": 0.356, - "step": 11814 - }, - { - "epoch": 0.7721717534801646, - "grad_norm": 0.44761908054351807, - "learning_rate": 8.620889274299879e-06, - "loss": 0.3809, - "step": 11815 - }, - { - "epoch": 0.7722371086857068, - "grad_norm": 0.41823145747184753, - "learning_rate": 8.620648459414431e-06, - "loss": 0.3349, - "step": 11816 - }, - { - "epoch": 0.7723024638912489, - "grad_norm": 0.4446566700935364, - "learning_rate": 8.620407626869828e-06, - "loss": 0.3676, - "step": 11817 - }, - { - "epoch": 0.7723678190967911, - "grad_norm": 0.4725385904312134, - "learning_rate": 8.620166776667242e-06, - "loss": 0.3795, - "step": 11818 - }, - { - "epoch": 0.7724331743023332, - "grad_norm": 0.4345307946205139, - "learning_rate": 8.619925908807848e-06, - "loss": 0.3829, - "step": 11819 - }, - { - "epoch": 0.7724985295078753, - "grad_norm": 0.4570246934890747, - "learning_rate": 8.61968502329282e-06, - "loss": 0.3506, - "step": 11820 - }, - { - "epoch": 0.7725638847134174, - "grad_norm": 0.47539472579956055, - "learning_rate": 8.619444120123337e-06, - "loss": 0.4179, - "step": 11821 - }, - { - "epoch": 0.7726292399189596, - "grad_norm": 0.44697678089141846, - "learning_rate": 8.619203199300571e-06, - "loss": 0.3929, - "step": 11822 - }, - { - "epoch": 0.7726945951245017, - "grad_norm": 0.5241457223892212, - "learning_rate": 8.618962260825696e-06, - "loss": 0.3915, - "step": 11823 - }, - { - "epoch": 0.7727599503300437, - "grad_norm": 0.4437718689441681, - "learning_rate": 8.61872130469989e-06, - "loss": 0.3843, - "step": 11824 - }, - { - "epoch": 0.7728253055355859, - "grad_norm": 0.41975370049476624, - "learning_rate": 8.618480330924323e-06, - "loss": 0.3247, - "step": 11825 - }, - { - "epoch": 0.772890660741128, - "grad_norm": 0.4372032582759857, - "learning_rate": 8.618239339500177e-06, - "loss": 0.3659, - "step": 11826 - }, - { - "epoch": 0.7729560159466702, - "grad_norm": 0.4222608208656311, - "learning_rate": 8.617998330428622e-06, - "loss": 0.3426, - "step": 11827 - }, - { - "epoch": 0.7730213711522123, - "grad_norm": 0.44943708181381226, - "learning_rate": 8.617757303710839e-06, - "loss": 0.3547, - "step": 11828 - }, - { - "epoch": 0.7730867263577544, - "grad_norm": 0.44556036591529846, - "learning_rate": 8.617516259347997e-06, - "loss": 0.3786, - "step": 11829 - }, - { - "epoch": 0.7731520815632965, - "grad_norm": 0.4915030896663666, - "learning_rate": 8.617275197341277e-06, - "loss": 0.3299, - "step": 11830 - }, - { - "epoch": 0.7732174367688386, - "grad_norm": 0.4466126263141632, - "learning_rate": 8.617034117691852e-06, - "loss": 0.3947, - "step": 11831 - }, - { - "epoch": 0.7732827919743808, - "grad_norm": 0.4619917571544647, - "learning_rate": 8.616793020400898e-06, - "loss": 0.38, - "step": 11832 - }, - { - "epoch": 0.7733481471799228, - "grad_norm": 0.43117502331733704, - "learning_rate": 8.616551905469592e-06, - "loss": 0.3548, - "step": 11833 - }, - { - "epoch": 0.773413502385465, - "grad_norm": 0.46063923835754395, - "learning_rate": 8.61631077289911e-06, - "loss": 0.3802, - "step": 11834 - }, - { - "epoch": 0.7734788575910071, - "grad_norm": 0.43890947103500366, - "learning_rate": 8.616069622690627e-06, - "loss": 0.3819, - "step": 11835 - }, - { - "epoch": 0.7735442127965493, - "grad_norm": 0.43641477823257446, - "learning_rate": 8.61582845484532e-06, - "loss": 0.38, - "step": 11836 - }, - { - "epoch": 0.7736095680020914, - "grad_norm": 0.42910319566726685, - "learning_rate": 8.615587269364365e-06, - "loss": 0.3313, - "step": 11837 - }, - { - "epoch": 0.7736749232076335, - "grad_norm": 0.43791988492012024, - "learning_rate": 8.615346066248938e-06, - "loss": 0.3416, - "step": 11838 - }, - { - "epoch": 0.7737402784131756, - "grad_norm": 0.4538554549217224, - "learning_rate": 8.615104845500215e-06, - "loss": 0.3836, - "step": 11839 - }, - { - "epoch": 0.7738056336187177, - "grad_norm": 0.46823617815971375, - "learning_rate": 8.614863607119374e-06, - "loss": 0.3878, - "step": 11840 - }, - { - "epoch": 0.7738709888242599, - "grad_norm": 0.47379255294799805, - "learning_rate": 8.614622351107592e-06, - "loss": 0.4466, - "step": 11841 - }, - { - "epoch": 0.7739363440298019, - "grad_norm": 0.4573115408420563, - "learning_rate": 8.614381077466043e-06, - "loss": 0.4009, - "step": 11842 - }, - { - "epoch": 0.7740016992353441, - "grad_norm": 0.4688892960548401, - "learning_rate": 8.614139786195905e-06, - "loss": 0.3851, - "step": 11843 - }, - { - "epoch": 0.7740670544408862, - "grad_norm": 0.4632177948951721, - "learning_rate": 8.613898477298356e-06, - "loss": 0.4063, - "step": 11844 - }, - { - "epoch": 0.7741324096464284, - "grad_norm": 0.4439104199409485, - "learning_rate": 8.613657150774573e-06, - "loss": 0.3591, - "step": 11845 - }, - { - "epoch": 0.7741977648519704, - "grad_norm": 0.45160600543022156, - "learning_rate": 8.61341580662573e-06, - "loss": 0.402, - "step": 11846 - }, - { - "epoch": 0.7742631200575126, - "grad_norm": 0.4168391525745392, - "learning_rate": 8.61317444485301e-06, - "loss": 0.3711, - "step": 11847 - }, - { - "epoch": 0.7743284752630547, - "grad_norm": 0.40535280108451843, - "learning_rate": 8.612933065457583e-06, - "loss": 0.3128, - "step": 11848 - }, - { - "epoch": 0.7743938304685968, - "grad_norm": 0.43484732508659363, - "learning_rate": 8.612691668440631e-06, - "loss": 0.3893, - "step": 11849 - }, - { - "epoch": 0.774459185674139, - "grad_norm": 0.4906025230884552, - "learning_rate": 8.61245025380333e-06, - "loss": 0.4006, - "step": 11850 - }, - { - "epoch": 0.774524540879681, - "grad_norm": 0.42672377824783325, - "learning_rate": 8.61220882154686e-06, - "loss": 0.3681, - "step": 11851 - }, - { - "epoch": 0.7745898960852232, - "grad_norm": 0.44538670778274536, - "learning_rate": 8.611967371672392e-06, - "loss": 0.3864, - "step": 11852 - }, - { - "epoch": 0.7746552512907653, - "grad_norm": 0.422595351934433, - "learning_rate": 8.61172590418111e-06, - "loss": 0.3771, - "step": 11853 - }, - { - "epoch": 0.7747206064963075, - "grad_norm": 0.4126081168651581, - "learning_rate": 8.611484419074189e-06, - "loss": 0.3522, - "step": 11854 - }, - { - "epoch": 0.7747859617018495, - "grad_norm": 0.44276732206344604, - "learning_rate": 8.611242916352809e-06, - "loss": 0.3568, - "step": 11855 - }, - { - "epoch": 0.7748513169073916, - "grad_norm": 0.44415268301963806, - "learning_rate": 8.611001396018144e-06, - "loss": 0.347, - "step": 11856 - }, - { - "epoch": 0.7749166721129338, - "grad_norm": 0.45888668298721313, - "learning_rate": 8.610759858071376e-06, - "loss": 0.3663, - "step": 11857 - }, - { - "epoch": 0.7749820273184759, - "grad_norm": 0.4071204364299774, - "learning_rate": 8.61051830251368e-06, - "loss": 0.3298, - "step": 11858 - }, - { - "epoch": 0.7750473825240181, - "grad_norm": 0.4355985224246979, - "learning_rate": 8.610276729346236e-06, - "loss": 0.3539, - "step": 11859 - }, - { - "epoch": 0.7751127377295601, - "grad_norm": 0.41884443163871765, - "learning_rate": 8.61003513857022e-06, - "loss": 0.3401, - "step": 11860 - }, - { - "epoch": 0.7751780929351023, - "grad_norm": 0.47849681973457336, - "learning_rate": 8.609793530186815e-06, - "loss": 0.4239, - "step": 11861 - }, - { - "epoch": 0.7752434481406444, - "grad_norm": 0.4448944628238678, - "learning_rate": 8.609551904197196e-06, - "loss": 0.3872, - "step": 11862 - }, - { - "epoch": 0.7753088033461866, - "grad_norm": 0.4501670300960541, - "learning_rate": 8.60931026060254e-06, - "loss": 0.3578, - "step": 11863 - }, - { - "epoch": 0.7753741585517286, - "grad_norm": 0.4469984471797943, - "learning_rate": 8.609068599404028e-06, - "loss": 0.3946, - "step": 11864 - }, - { - "epoch": 0.7754395137572707, - "grad_norm": 0.44684213399887085, - "learning_rate": 8.608826920602838e-06, - "loss": 0.381, - "step": 11865 - }, - { - "epoch": 0.7755048689628129, - "grad_norm": 0.4682392179965973, - "learning_rate": 8.60858522420015e-06, - "loss": 0.4327, - "step": 11866 - }, - { - "epoch": 0.775570224168355, - "grad_norm": 0.4276103079319, - "learning_rate": 8.608343510197141e-06, - "loss": 0.3352, - "step": 11867 - }, - { - "epoch": 0.7756355793738972, - "grad_norm": 0.4345168471336365, - "learning_rate": 8.60810177859499e-06, - "loss": 0.3869, - "step": 11868 - }, - { - "epoch": 0.7757009345794392, - "grad_norm": 0.42673447728157043, - "learning_rate": 8.607860029394879e-06, - "loss": 0.3634, - "step": 11869 - }, - { - "epoch": 0.7757662897849814, - "grad_norm": 0.4504968822002411, - "learning_rate": 8.607618262597982e-06, - "loss": 0.3999, - "step": 11870 - }, - { - "epoch": 0.7758316449905235, - "grad_norm": 0.443198561668396, - "learning_rate": 8.607376478205482e-06, - "loss": 0.3923, - "step": 11871 - }, - { - "epoch": 0.7758970001960657, - "grad_norm": 0.45323601365089417, - "learning_rate": 8.607134676218558e-06, - "loss": 0.4069, - "step": 11872 - }, - { - "epoch": 0.7759623554016077, - "grad_norm": 0.46169528365135193, - "learning_rate": 8.606892856638388e-06, - "loss": 0.3533, - "step": 11873 - }, - { - "epoch": 0.7760277106071498, - "grad_norm": 0.4142061471939087, - "learning_rate": 8.606651019466153e-06, - "loss": 0.3583, - "step": 11874 - }, - { - "epoch": 0.776093065812692, - "grad_norm": 0.4259793162345886, - "learning_rate": 8.60640916470303e-06, - "loss": 0.3554, - "step": 11875 - }, - { - "epoch": 0.7761584210182341, - "grad_norm": 0.4354163408279419, - "learning_rate": 8.6061672923502e-06, - "loss": 0.3571, - "step": 11876 - }, - { - "epoch": 0.7762237762237763, - "grad_norm": 0.4246130883693695, - "learning_rate": 8.605925402408843e-06, - "loss": 0.3546, - "step": 11877 - }, - { - "epoch": 0.7762891314293183, - "grad_norm": 0.4725601375102997, - "learning_rate": 8.60568349488014e-06, - "loss": 0.3952, - "step": 11878 - }, - { - "epoch": 0.7763544866348605, - "grad_norm": 0.44709452986717224, - "learning_rate": 8.605441569765266e-06, - "loss": 0.3688, - "step": 11879 - }, - { - "epoch": 0.7764198418404026, - "grad_norm": 0.4556303918361664, - "learning_rate": 8.605199627065409e-06, - "loss": 0.3664, - "step": 11880 - }, - { - "epoch": 0.7764851970459448, - "grad_norm": 0.4555344581604004, - "learning_rate": 8.604957666781741e-06, - "loss": 0.398, - "step": 11881 - }, - { - "epoch": 0.7765505522514868, - "grad_norm": 0.44141873717308044, - "learning_rate": 8.604715688915449e-06, - "loss": 0.3664, - "step": 11882 - }, - { - "epoch": 0.7766159074570289, - "grad_norm": 0.44355231523513794, - "learning_rate": 8.604473693467707e-06, - "loss": 0.3717, - "step": 11883 - }, - { - "epoch": 0.7766812626625711, - "grad_norm": 0.4414403736591339, - "learning_rate": 8.6042316804397e-06, - "loss": 0.3954, - "step": 11884 - }, - { - "epoch": 0.7767466178681132, - "grad_norm": 0.39469870924949646, - "learning_rate": 8.603989649832602e-06, - "loss": 0.3054, - "step": 11885 - }, - { - "epoch": 0.7768119730736553, - "grad_norm": 0.4306773543357849, - "learning_rate": 8.603747601647601e-06, - "loss": 0.352, - "step": 11886 - }, - { - "epoch": 0.7768773282791974, - "grad_norm": 0.4756735861301422, - "learning_rate": 8.603505535885877e-06, - "loss": 0.4161, - "step": 11887 - }, - { - "epoch": 0.7769426834847396, - "grad_norm": 0.504156231880188, - "learning_rate": 8.603263452548604e-06, - "loss": 0.4627, - "step": 11888 - }, - { - "epoch": 0.7770080386902817, - "grad_norm": 0.44968071579933167, - "learning_rate": 8.60302135163697e-06, - "loss": 0.3838, - "step": 11889 - }, - { - "epoch": 0.7770733938958237, - "grad_norm": 0.4513949155807495, - "learning_rate": 8.60277923315215e-06, - "loss": 0.4074, - "step": 11890 - }, - { - "epoch": 0.7771387491013659, - "grad_norm": 0.43221279978752136, - "learning_rate": 8.60253709709533e-06, - "loss": 0.3711, - "step": 11891 - }, - { - "epoch": 0.777204104306908, - "grad_norm": 0.4418954849243164, - "learning_rate": 8.602294943467686e-06, - "loss": 0.3901, - "step": 11892 - }, - { - "epoch": 0.7772694595124502, - "grad_norm": 0.4856281280517578, - "learning_rate": 8.602052772270405e-06, - "loss": 0.403, - "step": 11893 - }, - { - "epoch": 0.7773348147179923, - "grad_norm": 0.45415908098220825, - "learning_rate": 8.601810583504662e-06, - "loss": 0.3739, - "step": 11894 - }, - { - "epoch": 0.7774001699235344, - "grad_norm": 0.473394513130188, - "learning_rate": 8.60156837717164e-06, - "loss": 0.4296, - "step": 11895 - }, - { - "epoch": 0.7774655251290765, - "grad_norm": 0.4633063077926636, - "learning_rate": 8.601326153272524e-06, - "loss": 0.3543, - "step": 11896 - }, - { - "epoch": 0.7775308803346187, - "grad_norm": 0.5050225853919983, - "learning_rate": 8.601083911808492e-06, - "loss": 0.4812, - "step": 11897 - }, - { - "epoch": 0.7775962355401608, - "grad_norm": 0.4297986924648285, - "learning_rate": 8.600841652780726e-06, - "loss": 0.3362, - "step": 11898 - }, - { - "epoch": 0.7776615907457028, - "grad_norm": 0.4877097010612488, - "learning_rate": 8.600599376190408e-06, - "loss": 0.4079, - "step": 11899 - }, - { - "epoch": 0.777726945951245, - "grad_norm": 0.4294080436229706, - "learning_rate": 8.60035708203872e-06, - "loss": 0.3678, - "step": 11900 - }, - { - "epoch": 0.7777923011567871, - "grad_norm": 0.45811372995376587, - "learning_rate": 8.600114770326842e-06, - "loss": 0.3645, - "step": 11901 - }, - { - "epoch": 0.7778576563623293, - "grad_norm": 0.44701892137527466, - "learning_rate": 8.599872441055957e-06, - "loss": 0.3905, - "step": 11902 - }, - { - "epoch": 0.7779230115678714, - "grad_norm": 0.4324595034122467, - "learning_rate": 8.599630094227247e-06, - "loss": 0.3514, - "step": 11903 - }, - { - "epoch": 0.7779883667734135, - "grad_norm": 0.4528386890888214, - "learning_rate": 8.599387729841894e-06, - "loss": 0.4042, - "step": 11904 - }, - { - "epoch": 0.7780537219789556, - "grad_norm": 0.4724828004837036, - "learning_rate": 8.599145347901082e-06, - "loss": 0.3706, - "step": 11905 - }, - { - "epoch": 0.7781190771844978, - "grad_norm": 0.44598785042762756, - "learning_rate": 8.59890294840599e-06, - "loss": 0.3439, - "step": 11906 - }, - { - "epoch": 0.7781844323900399, - "grad_norm": 0.4361238181591034, - "learning_rate": 8.598660531357802e-06, - "loss": 0.3724, - "step": 11907 - }, - { - "epoch": 0.7782497875955819, - "grad_norm": 0.40344589948654175, - "learning_rate": 8.5984180967577e-06, - "loss": 0.3263, - "step": 11908 - }, - { - "epoch": 0.7783151428011241, - "grad_norm": 0.4686245322227478, - "learning_rate": 8.598175644606865e-06, - "loss": 0.4181, - "step": 11909 - }, - { - "epoch": 0.7783804980066662, - "grad_norm": 0.408418744802475, - "learning_rate": 8.59793317490648e-06, - "loss": 0.3287, - "step": 11910 - }, - { - "epoch": 0.7784458532122084, - "grad_norm": 0.44714394211769104, - "learning_rate": 8.597690687657732e-06, - "loss": 0.3932, - "step": 11911 - }, - { - "epoch": 0.7785112084177505, - "grad_norm": 0.4426407814025879, - "learning_rate": 8.597448182861797e-06, - "loss": 0.3653, - "step": 11912 - }, - { - "epoch": 0.7785765636232926, - "grad_norm": 0.4467528760433197, - "learning_rate": 8.597205660519863e-06, - "loss": 0.3644, - "step": 11913 - }, - { - "epoch": 0.7786419188288347, - "grad_norm": 0.3986530005931854, - "learning_rate": 8.596963120633109e-06, - "loss": 0.3087, - "step": 11914 - }, - { - "epoch": 0.7787072740343768, - "grad_norm": 0.4246610999107361, - "learning_rate": 8.59672056320272e-06, - "loss": 0.361, - "step": 11915 - }, - { - "epoch": 0.778772629239919, - "grad_norm": 0.42618927359580994, - "learning_rate": 8.59647798822988e-06, - "loss": 0.3509, - "step": 11916 - }, - { - "epoch": 0.778837984445461, - "grad_norm": 0.45555341243743896, - "learning_rate": 8.596235395715771e-06, - "loss": 0.4139, - "step": 11917 - }, - { - "epoch": 0.7789033396510032, - "grad_norm": 0.43304362893104553, - "learning_rate": 8.595992785661575e-06, - "loss": 0.3584, - "step": 11918 - }, - { - "epoch": 0.7789686948565453, - "grad_norm": 0.4583284258842468, - "learning_rate": 8.595750158068477e-06, - "loss": 0.406, - "step": 11919 - }, - { - "epoch": 0.7790340500620875, - "grad_norm": 0.4887494444847107, - "learning_rate": 8.595507512937659e-06, - "loss": 0.3575, - "step": 11920 - }, - { - "epoch": 0.7790994052676296, - "grad_norm": 0.4497702717781067, - "learning_rate": 8.595264850270306e-06, - "loss": 0.3533, - "step": 11921 - }, - { - "epoch": 0.7791647604731717, - "grad_norm": 0.42957308888435364, - "learning_rate": 8.595022170067602e-06, - "loss": 0.4051, - "step": 11922 - }, - { - "epoch": 0.7792301156787138, - "grad_norm": 0.478105753660202, - "learning_rate": 8.594779472330727e-06, - "loss": 0.3959, - "step": 11923 - }, - { - "epoch": 0.7792954708842559, - "grad_norm": 0.46036097407341003, - "learning_rate": 8.59453675706087e-06, - "loss": 0.3813, - "step": 11924 - }, - { - "epoch": 0.7793608260897981, - "grad_norm": 0.4499993920326233, - "learning_rate": 8.594294024259209e-06, - "loss": 0.3961, - "step": 11925 - }, - { - "epoch": 0.7794261812953401, - "grad_norm": 0.48920682072639465, - "learning_rate": 8.594051273926934e-06, - "loss": 0.4296, - "step": 11926 - }, - { - "epoch": 0.7794915365008823, - "grad_norm": 0.4173191487789154, - "learning_rate": 8.593808506065222e-06, - "loss": 0.3475, - "step": 11927 - }, - { - "epoch": 0.7795568917064244, - "grad_norm": 0.3997310400009155, - "learning_rate": 8.593565720675263e-06, - "loss": 0.3031, - "step": 11928 - }, - { - "epoch": 0.7796222469119666, - "grad_norm": 0.45031315088272095, - "learning_rate": 8.59332291775824e-06, - "loss": 0.3866, - "step": 11929 - }, - { - "epoch": 0.7796876021175086, - "grad_norm": 0.45433446764945984, - "learning_rate": 8.593080097315335e-06, - "loss": 0.3941, - "step": 11930 - }, - { - "epoch": 0.7797529573230508, - "grad_norm": 0.4518393576145172, - "learning_rate": 8.592837259347734e-06, - "loss": 0.3848, - "step": 11931 - }, - { - "epoch": 0.7798183125285929, - "grad_norm": 0.43654945492744446, - "learning_rate": 8.59259440385662e-06, - "loss": 0.3693, - "step": 11932 - }, - { - "epoch": 0.779883667734135, - "grad_norm": 0.456781804561615, - "learning_rate": 8.59235153084318e-06, - "loss": 0.4074, - "step": 11933 - }, - { - "epoch": 0.7799490229396772, - "grad_norm": 0.4224834740161896, - "learning_rate": 8.592108640308596e-06, - "loss": 0.3505, - "step": 11934 - }, - { - "epoch": 0.7800143781452192, - "grad_norm": 0.45787283778190613, - "learning_rate": 8.591865732254054e-06, - "loss": 0.3908, - "step": 11935 - }, - { - "epoch": 0.7800797333507614, - "grad_norm": 0.4545110762119293, - "learning_rate": 8.59162280668074e-06, - "loss": 0.3792, - "step": 11936 - }, - { - "epoch": 0.7801450885563035, - "grad_norm": 0.4336988031864166, - "learning_rate": 8.591379863589836e-06, - "loss": 0.3984, - "step": 11937 - }, - { - "epoch": 0.7802104437618457, - "grad_norm": 0.42259207367897034, - "learning_rate": 8.591136902982526e-06, - "loss": 0.3212, - "step": 11938 - }, - { - "epoch": 0.7802757989673877, - "grad_norm": 0.46574777364730835, - "learning_rate": 8.59089392486e-06, - "loss": 0.3856, - "step": 11939 - }, - { - "epoch": 0.7803411541729298, - "grad_norm": 0.4679214060306549, - "learning_rate": 8.590650929223441e-06, - "loss": 0.4304, - "step": 11940 - }, - { - "epoch": 0.780406509378472, - "grad_norm": 0.45430073142051697, - "learning_rate": 8.590407916074031e-06, - "loss": 0.3464, - "step": 11941 - }, - { - "epoch": 0.7804718645840141, - "grad_norm": 0.45531001687049866, - "learning_rate": 8.59016488541296e-06, - "loss": 0.4346, - "step": 11942 - }, - { - "epoch": 0.7805372197895563, - "grad_norm": 0.5119999647140503, - "learning_rate": 8.58992183724141e-06, - "loss": 0.4221, - "step": 11943 - }, - { - "epoch": 0.7806025749950983, - "grad_norm": 0.4710194766521454, - "learning_rate": 8.589678771560568e-06, - "loss": 0.409, - "step": 11944 - }, - { - "epoch": 0.7806679302006405, - "grad_norm": 0.42829418182373047, - "learning_rate": 8.58943568837162e-06, - "loss": 0.3526, - "step": 11945 - }, - { - "epoch": 0.7807332854061826, - "grad_norm": 0.45222848653793335, - "learning_rate": 8.589192587675747e-06, - "loss": 0.4011, - "step": 11946 - }, - { - "epoch": 0.7807986406117248, - "grad_norm": 0.40631863474845886, - "learning_rate": 8.588949469474141e-06, - "loss": 0.3294, - "step": 11947 - }, - { - "epoch": 0.7808639958172668, - "grad_norm": 0.4742248058319092, - "learning_rate": 8.588706333767984e-06, - "loss": 0.4256, - "step": 11948 - }, - { - "epoch": 0.7809293510228089, - "grad_norm": 0.442628413438797, - "learning_rate": 8.588463180558464e-06, - "loss": 0.3713, - "step": 11949 - }, - { - "epoch": 0.7809947062283511, - "grad_norm": 0.4321967363357544, - "learning_rate": 8.588220009846766e-06, - "loss": 0.3699, - "step": 11950 - }, - { - "epoch": 0.7810600614338932, - "grad_norm": 0.4628668427467346, - "learning_rate": 8.587976821634076e-06, - "loss": 0.3675, - "step": 11951 - }, - { - "epoch": 0.7811254166394354, - "grad_norm": 0.4346633851528168, - "learning_rate": 8.587733615921579e-06, - "loss": 0.3761, - "step": 11952 - }, - { - "epoch": 0.7811907718449774, - "grad_norm": 0.4312704801559448, - "learning_rate": 8.587490392710464e-06, - "loss": 0.3458, - "step": 11953 - }, - { - "epoch": 0.7812561270505196, - "grad_norm": 0.46004918217658997, - "learning_rate": 8.587247152001914e-06, - "loss": 0.3894, - "step": 11954 - }, - { - "epoch": 0.7813214822560617, - "grad_norm": 0.45357799530029297, - "learning_rate": 8.587003893797117e-06, - "loss": 0.4008, - "step": 11955 - }, - { - "epoch": 0.7813868374616039, - "grad_norm": 0.41232830286026, - "learning_rate": 8.586760618097261e-06, - "loss": 0.3083, - "step": 11956 - }, - { - "epoch": 0.7814521926671459, - "grad_norm": 0.44780534505844116, - "learning_rate": 8.586517324903529e-06, - "loss": 0.3812, - "step": 11957 - }, - { - "epoch": 0.781517547872688, - "grad_norm": 0.4769149124622345, - "learning_rate": 8.58627401421711e-06, - "loss": 0.3674, - "step": 11958 - }, - { - "epoch": 0.7815829030782302, - "grad_norm": 0.43085014820098877, - "learning_rate": 8.58603068603919e-06, - "loss": 0.3468, - "step": 11959 - }, - { - "epoch": 0.7816482582837723, - "grad_norm": 0.4569220244884491, - "learning_rate": 8.585787340370955e-06, - "loss": 0.3927, - "step": 11960 - }, - { - "epoch": 0.7817136134893145, - "grad_norm": 0.7949502468109131, - "learning_rate": 8.585543977213595e-06, - "loss": 0.3838, - "step": 11961 - }, - { - "epoch": 0.7817789686948565, - "grad_norm": 0.4006519317626953, - "learning_rate": 8.585300596568294e-06, - "loss": 0.3475, - "step": 11962 - }, - { - "epoch": 0.7818443239003987, - "grad_norm": 0.4714229106903076, - "learning_rate": 8.585057198436239e-06, - "loss": 0.408, - "step": 11963 - }, - { - "epoch": 0.7819096791059408, - "grad_norm": 0.42171719670295715, - "learning_rate": 8.58481378281862e-06, - "loss": 0.3405, - "step": 11964 - }, - { - "epoch": 0.781975034311483, - "grad_norm": 0.4483102858066559, - "learning_rate": 8.584570349716623e-06, - "loss": 0.3699, - "step": 11965 - }, - { - "epoch": 0.782040389517025, - "grad_norm": 0.49136093258857727, - "learning_rate": 8.584326899131433e-06, - "loss": 0.3813, - "step": 11966 - }, - { - "epoch": 0.7821057447225671, - "grad_norm": 0.44293758273124695, - "learning_rate": 8.584083431064238e-06, - "loss": 0.3483, - "step": 11967 - }, - { - "epoch": 0.7821710999281093, - "grad_norm": 0.4744596779346466, - "learning_rate": 8.583839945516229e-06, - "loss": 0.3792, - "step": 11968 - }, - { - "epoch": 0.7822364551336514, - "grad_norm": 0.6347219347953796, - "learning_rate": 8.583596442488588e-06, - "loss": 0.3338, - "step": 11969 - }, - { - "epoch": 0.7823018103391935, - "grad_norm": 0.44006791710853577, - "learning_rate": 8.583352921982507e-06, - "loss": 0.3547, - "step": 11970 - }, - { - "epoch": 0.7823671655447356, - "grad_norm": 0.4571622908115387, - "learning_rate": 8.583109383999173e-06, - "loss": 0.4068, - "step": 11971 - }, - { - "epoch": 0.7824325207502778, - "grad_norm": 0.4679630398750305, - "learning_rate": 8.582865828539773e-06, - "loss": 0.3377, - "step": 11972 - }, - { - "epoch": 0.7824978759558199, - "grad_norm": 0.470459908246994, - "learning_rate": 8.582622255605494e-06, - "loss": 0.4005, - "step": 11973 - }, - { - "epoch": 0.782563231161362, - "grad_norm": 0.44971734285354614, - "learning_rate": 8.582378665197526e-06, - "loss": 0.3851, - "step": 11974 - }, - { - "epoch": 0.7826285863669041, - "grad_norm": 0.4586564898490906, - "learning_rate": 8.582135057317055e-06, - "loss": 0.4114, - "step": 11975 - }, - { - "epoch": 0.7826939415724462, - "grad_norm": 0.5088337063789368, - "learning_rate": 8.581891431965272e-06, - "loss": 0.4243, - "step": 11976 - }, - { - "epoch": 0.7827592967779884, - "grad_norm": 0.45787137746810913, - "learning_rate": 8.581647789143364e-06, - "loss": 0.391, - "step": 11977 - }, - { - "epoch": 0.7828246519835305, - "grad_norm": 0.45902103185653687, - "learning_rate": 8.581404128852517e-06, - "loss": 0.3869, - "step": 11978 - }, - { - "epoch": 0.7828900071890726, - "grad_norm": 0.45155927538871765, - "learning_rate": 8.581160451093922e-06, - "loss": 0.349, - "step": 11979 - }, - { - "epoch": 0.7829553623946147, - "grad_norm": 0.4716495871543884, - "learning_rate": 8.58091675586877e-06, - "loss": 0.4086, - "step": 11980 - }, - { - "epoch": 0.7830207176001569, - "grad_norm": 0.4456287622451782, - "learning_rate": 8.580673043178242e-06, - "loss": 0.4151, - "step": 11981 - }, - { - "epoch": 0.783086072805699, - "grad_norm": 0.45639076828956604, - "learning_rate": 8.580429313023532e-06, - "loss": 0.3948, - "step": 11982 - }, - { - "epoch": 0.783151428011241, - "grad_norm": 0.46890559792518616, - "learning_rate": 8.58018556540583e-06, - "loss": 0.4031, - "step": 11983 - }, - { - "epoch": 0.7832167832167832, - "grad_norm": 0.48633939027786255, - "learning_rate": 8.579941800326322e-06, - "loss": 0.4006, - "step": 11984 - }, - { - "epoch": 0.7832821384223253, - "grad_norm": 0.45148807764053345, - "learning_rate": 8.579698017786196e-06, - "loss": 0.4164, - "step": 11985 - }, - { - "epoch": 0.7833474936278675, - "grad_norm": 0.4746284484863281, - "learning_rate": 8.579454217786644e-06, - "loss": 0.4326, - "step": 11986 - }, - { - "epoch": 0.7834128488334096, - "grad_norm": 0.46255096793174744, - "learning_rate": 8.579210400328852e-06, - "loss": 0.3971, - "step": 11987 - }, - { - "epoch": 0.7834782040389517, - "grad_norm": 0.45016103982925415, - "learning_rate": 8.578966565414014e-06, - "loss": 0.3565, - "step": 11988 - }, - { - "epoch": 0.7835435592444938, - "grad_norm": 0.443569540977478, - "learning_rate": 8.578722713043312e-06, - "loss": 0.3509, - "step": 11989 - }, - { - "epoch": 0.783608914450036, - "grad_norm": 0.4615786373615265, - "learning_rate": 8.578478843217944e-06, - "loss": 0.3812, - "step": 11990 - }, - { - "epoch": 0.7836742696555781, - "grad_norm": 0.43236657977104187, - "learning_rate": 8.578234955939094e-06, - "loss": 0.3713, - "step": 11991 - }, - { - "epoch": 0.7837396248611201, - "grad_norm": 0.4728842079639435, - "learning_rate": 8.57799105120795e-06, - "loss": 0.4118, - "step": 11992 - }, - { - "epoch": 0.7838049800666623, - "grad_norm": 0.44954821467399597, - "learning_rate": 8.577747129025706e-06, - "loss": 0.3861, - "step": 11993 - }, - { - "epoch": 0.7838703352722044, - "grad_norm": 0.4186398684978485, - "learning_rate": 8.577503189393549e-06, - "loss": 0.3541, - "step": 11994 - }, - { - "epoch": 0.7839356904777466, - "grad_norm": 0.45853495597839355, - "learning_rate": 8.57725923231267e-06, - "loss": 0.3808, - "step": 11995 - }, - { - "epoch": 0.7840010456832887, - "grad_norm": 0.43140414357185364, - "learning_rate": 8.577015257784258e-06, - "loss": 0.3362, - "step": 11996 - }, - { - "epoch": 0.7840664008888308, - "grad_norm": 0.439466267824173, - "learning_rate": 8.576771265809504e-06, - "loss": 0.3711, - "step": 11997 - }, - { - "epoch": 0.7841317560943729, - "grad_norm": 0.45974549651145935, - "learning_rate": 8.576527256389598e-06, - "loss": 0.377, - "step": 11998 - }, - { - "epoch": 0.784197111299915, - "grad_norm": 0.4531913101673126, - "learning_rate": 8.576283229525728e-06, - "loss": 0.3755, - "step": 11999 - }, - { - "epoch": 0.7842624665054572, - "grad_norm": 0.40491098165512085, - "learning_rate": 8.576039185219087e-06, - "loss": 0.3634, - "step": 12000 - }, - { - "epoch": 0.7843278217109992, - "grad_norm": 0.3893328011035919, - "learning_rate": 8.575795123470863e-06, - "loss": 0.3201, - "step": 12001 - }, - { - "epoch": 0.7843931769165414, - "grad_norm": 0.47420990467071533, - "learning_rate": 8.57555104428225e-06, - "loss": 0.4297, - "step": 12002 - }, - { - "epoch": 0.7844585321220835, - "grad_norm": 0.43641334772109985, - "learning_rate": 8.575306947654431e-06, - "loss": 0.3679, - "step": 12003 - }, - { - "epoch": 0.7845238873276257, - "grad_norm": 0.4373196065425873, - "learning_rate": 8.575062833588606e-06, - "loss": 0.3955, - "step": 12004 - }, - { - "epoch": 0.7845892425331678, - "grad_norm": 0.46208757162094116, - "learning_rate": 8.57481870208596e-06, - "loss": 0.4197, - "step": 12005 - }, - { - "epoch": 0.7846545977387099, - "grad_norm": 0.4125015437602997, - "learning_rate": 8.574574553147683e-06, - "loss": 0.3479, - "step": 12006 - }, - { - "epoch": 0.784719952944252, - "grad_norm": 0.45740821957588196, - "learning_rate": 8.574330386774968e-06, - "loss": 0.3843, - "step": 12007 - }, - { - "epoch": 0.7847853081497941, - "grad_norm": 0.42983338236808777, - "learning_rate": 8.574086202969006e-06, - "loss": 0.3678, - "step": 12008 - }, - { - "epoch": 0.7848506633553363, - "grad_norm": 0.4891479015350342, - "learning_rate": 8.573842001730987e-06, - "loss": 0.4414, - "step": 12009 - }, - { - "epoch": 0.7849160185608783, - "grad_norm": 0.4560488760471344, - "learning_rate": 8.573597783062104e-06, - "loss": 0.3986, - "step": 12010 - }, - { - "epoch": 0.7849813737664205, - "grad_norm": 0.427053302526474, - "learning_rate": 8.573353546963545e-06, - "loss": 0.3535, - "step": 12011 - }, - { - "epoch": 0.7850467289719626, - "grad_norm": 0.4430866241455078, - "learning_rate": 8.573109293436503e-06, - "loss": 0.3594, - "step": 12012 - }, - { - "epoch": 0.7851120841775048, - "grad_norm": 0.4583944082260132, - "learning_rate": 8.57286502248217e-06, - "loss": 0.393, - "step": 12013 - }, - { - "epoch": 0.7851774393830468, - "grad_norm": 0.4045864939689636, - "learning_rate": 8.572620734101735e-06, - "loss": 0.3289, - "step": 12014 - }, - { - "epoch": 0.785242794588589, - "grad_norm": 0.4391320049762726, - "learning_rate": 8.572376428296393e-06, - "loss": 0.3879, - "step": 12015 - }, - { - "epoch": 0.7853081497941311, - "grad_norm": 0.4494188725948334, - "learning_rate": 8.572132105067332e-06, - "loss": 0.3913, - "step": 12016 - }, - { - "epoch": 0.7853735049996732, - "grad_norm": 0.4446926414966583, - "learning_rate": 8.571887764415747e-06, - "loss": 0.357, - "step": 12017 - }, - { - "epoch": 0.7854388602052154, - "grad_norm": 0.42256808280944824, - "learning_rate": 8.571643406342825e-06, - "loss": 0.3386, - "step": 12018 - }, - { - "epoch": 0.7855042154107574, - "grad_norm": 0.4125842750072479, - "learning_rate": 8.571399030849763e-06, - "loss": 0.3023, - "step": 12019 - }, - { - "epoch": 0.7855695706162996, - "grad_norm": 0.446566641330719, - "learning_rate": 8.571154637937751e-06, - "loss": 0.3649, - "step": 12020 - }, - { - "epoch": 0.7856349258218417, - "grad_norm": 0.45315021276474, - "learning_rate": 8.570910227607979e-06, - "loss": 0.344, - "step": 12021 - }, - { - "epoch": 0.7857002810273839, - "grad_norm": 0.4215909242630005, - "learning_rate": 8.570665799861642e-06, - "loss": 0.3381, - "step": 12022 - }, - { - "epoch": 0.785765636232926, - "grad_norm": 0.4065757989883423, - "learning_rate": 8.57042135469993e-06, - "loss": 0.3218, - "step": 12023 - }, - { - "epoch": 0.7858309914384681, - "grad_norm": 0.4728504717350006, - "learning_rate": 8.570176892124037e-06, - "loss": 0.4315, - "step": 12024 - }, - { - "epoch": 0.7858963466440102, - "grad_norm": 0.48537716269493103, - "learning_rate": 8.569932412135153e-06, - "loss": 0.4065, - "step": 12025 - }, - { - "epoch": 0.7859617018495523, - "grad_norm": 0.4314275085926056, - "learning_rate": 8.569687914734474e-06, - "loss": 0.3648, - "step": 12026 - }, - { - "epoch": 0.7860270570550945, - "grad_norm": 0.4772936999797821, - "learning_rate": 8.56944339992319e-06, - "loss": 0.4191, - "step": 12027 - }, - { - "epoch": 0.7860924122606365, - "grad_norm": 0.4536139667034149, - "learning_rate": 8.569198867702493e-06, - "loss": 0.4019, - "step": 12028 - }, - { - "epoch": 0.7861577674661787, - "grad_norm": 0.45113837718963623, - "learning_rate": 8.568954318073577e-06, - "loss": 0.3901, - "step": 12029 - }, - { - "epoch": 0.7862231226717208, - "grad_norm": 0.4572070837020874, - "learning_rate": 8.568709751037634e-06, - "loss": 0.4117, - "step": 12030 - }, - { - "epoch": 0.786288477877263, - "grad_norm": 0.47010403871536255, - "learning_rate": 8.568465166595857e-06, - "loss": 0.387, - "step": 12031 - }, - { - "epoch": 0.786353833082805, - "grad_norm": 0.39718037843704224, - "learning_rate": 8.56822056474944e-06, - "loss": 0.3084, - "step": 12032 - }, - { - "epoch": 0.7864191882883471, - "grad_norm": 0.4905865788459778, - "learning_rate": 8.567975945499575e-06, - "loss": 0.3872, - "step": 12033 - }, - { - "epoch": 0.7864845434938893, - "grad_norm": 0.4178222417831421, - "learning_rate": 8.567731308847456e-06, - "loss": 0.3547, - "step": 12034 - }, - { - "epoch": 0.7865498986994314, - "grad_norm": 0.43827691674232483, - "learning_rate": 8.567486654794274e-06, - "loss": 0.3531, - "step": 12035 - }, - { - "epoch": 0.7866152539049736, - "grad_norm": 0.49150943756103516, - "learning_rate": 8.567241983341223e-06, - "loss": 0.4313, - "step": 12036 - }, - { - "epoch": 0.7866806091105156, - "grad_norm": 0.4697049856185913, - "learning_rate": 8.566997294489498e-06, - "loss": 0.4167, - "step": 12037 - }, - { - "epoch": 0.7867459643160578, - "grad_norm": 0.4594012498855591, - "learning_rate": 8.566752588240292e-06, - "loss": 0.3817, - "step": 12038 - }, - { - "epoch": 0.7868113195215999, - "grad_norm": 0.4005836546421051, - "learning_rate": 8.566507864594799e-06, - "loss": 0.3363, - "step": 12039 - }, - { - "epoch": 0.7868766747271421, - "grad_norm": 0.44431227445602417, - "learning_rate": 8.566263123554211e-06, - "loss": 0.3806, - "step": 12040 - }, - { - "epoch": 0.7869420299326841, - "grad_norm": 0.4370727837085724, - "learning_rate": 8.56601836511972e-06, - "loss": 0.3657, - "step": 12041 - }, - { - "epoch": 0.7870073851382262, - "grad_norm": 0.41017067432403564, - "learning_rate": 8.565773589292524e-06, - "loss": 0.278, - "step": 12042 - }, - { - "epoch": 0.7870727403437684, - "grad_norm": 0.47175443172454834, - "learning_rate": 8.565528796073815e-06, - "loss": 0.4219, - "step": 12043 - }, - { - "epoch": 0.7871380955493105, - "grad_norm": 0.42466551065444946, - "learning_rate": 8.565283985464785e-06, - "loss": 0.3525, - "step": 12044 - }, - { - "epoch": 0.7872034507548527, - "grad_norm": 0.5120432376861572, - "learning_rate": 8.565039157466632e-06, - "loss": 0.4914, - "step": 12045 - }, - { - "epoch": 0.7872688059603947, - "grad_norm": 0.45365110039711, - "learning_rate": 8.564794312080546e-06, - "loss": 0.408, - "step": 12046 - }, - { - "epoch": 0.7873341611659369, - "grad_norm": 0.43041422963142395, - "learning_rate": 8.564549449307725e-06, - "loss": 0.3815, - "step": 12047 - }, - { - "epoch": 0.787399516371479, - "grad_norm": 0.4978421926498413, - "learning_rate": 8.564304569149362e-06, - "loss": 0.3623, - "step": 12048 - }, - { - "epoch": 0.7874648715770212, - "grad_norm": 0.46047160029411316, - "learning_rate": 8.564059671606648e-06, - "loss": 0.3712, - "step": 12049 - }, - { - "epoch": 0.7875302267825632, - "grad_norm": 0.41224947571754456, - "learning_rate": 8.563814756680782e-06, - "loss": 0.3372, - "step": 12050 - }, - { - "epoch": 0.7875955819881053, - "grad_norm": 0.4488600194454193, - "learning_rate": 8.563569824372957e-06, - "loss": 0.3705, - "step": 12051 - }, - { - "epoch": 0.7876609371936475, - "grad_norm": 0.49246707558631897, - "learning_rate": 8.563324874684367e-06, - "loss": 0.4294, - "step": 12052 - }, - { - "epoch": 0.7877262923991896, - "grad_norm": 0.45360997319221497, - "learning_rate": 8.563079907616208e-06, - "loss": 0.395, - "step": 12053 - }, - { - "epoch": 0.7877916476047317, - "grad_norm": 0.45833253860473633, - "learning_rate": 8.562834923169673e-06, - "loss": 0.3672, - "step": 12054 - }, - { - "epoch": 0.7878570028102738, - "grad_norm": 0.47918999195098877, - "learning_rate": 8.562589921345958e-06, - "loss": 0.3636, - "step": 12055 - }, - { - "epoch": 0.787922358015816, - "grad_norm": 0.4814555048942566, - "learning_rate": 8.562344902146259e-06, - "loss": 0.3856, - "step": 12056 - }, - { - "epoch": 0.7879877132213581, - "grad_norm": 0.4582223892211914, - "learning_rate": 8.562099865571767e-06, - "loss": 0.3809, - "step": 12057 - }, - { - "epoch": 0.7880530684269001, - "grad_norm": 0.4686221480369568, - "learning_rate": 8.561854811623683e-06, - "loss": 0.4026, - "step": 12058 - }, - { - "epoch": 0.7881184236324423, - "grad_norm": 0.43056002259254456, - "learning_rate": 8.561609740303197e-06, - "loss": 0.336, - "step": 12059 - }, - { - "epoch": 0.7881837788379844, - "grad_norm": 0.4640575051307678, - "learning_rate": 8.561364651611507e-06, - "loss": 0.4138, - "step": 12060 - }, - { - "epoch": 0.7882491340435266, - "grad_norm": 0.4174641966819763, - "learning_rate": 8.561119545549807e-06, - "loss": 0.343, - "step": 12061 - }, - { - "epoch": 0.7883144892490687, - "grad_norm": 0.42930740118026733, - "learning_rate": 8.560874422119295e-06, - "loss": 0.3573, - "step": 12062 - }, - { - "epoch": 0.7883798444546108, - "grad_norm": 0.5078632235527039, - "learning_rate": 8.560629281321163e-06, - "loss": 0.4038, - "step": 12063 - }, - { - "epoch": 0.7884451996601529, - "grad_norm": 0.444355845451355, - "learning_rate": 8.56038412315661e-06, - "loss": 0.393, - "step": 12064 - }, - { - "epoch": 0.7885105548656951, - "grad_norm": 0.41402336955070496, - "learning_rate": 8.560138947626831e-06, - "loss": 0.354, - "step": 12065 - }, - { - "epoch": 0.7885759100712372, - "grad_norm": 0.45499247312545776, - "learning_rate": 8.559893754733019e-06, - "loss": 0.3967, - "step": 12066 - }, - { - "epoch": 0.7886412652767792, - "grad_norm": 0.4375172555446625, - "learning_rate": 8.559648544476371e-06, - "loss": 0.3498, - "step": 12067 - }, - { - "epoch": 0.7887066204823214, - "grad_norm": 0.4388751983642578, - "learning_rate": 8.559403316858086e-06, - "loss": 0.3374, - "step": 12068 - }, - { - "epoch": 0.7887719756878635, - "grad_norm": 0.41842880845069885, - "learning_rate": 8.55915807187936e-06, - "loss": 0.3576, - "step": 12069 - }, - { - "epoch": 0.7888373308934057, - "grad_norm": 0.4384239614009857, - "learning_rate": 8.558912809541382e-06, - "loss": 0.3983, - "step": 12070 - }, - { - "epoch": 0.7889026860989478, - "grad_norm": 0.4617427587509155, - "learning_rate": 8.558667529845358e-06, - "loss": 0.4209, - "step": 12071 - }, - { - "epoch": 0.7889680413044899, - "grad_norm": 0.44441232085227966, - "learning_rate": 8.558422232792477e-06, - "loss": 0.3714, - "step": 12072 - }, - { - "epoch": 0.789033396510032, - "grad_norm": 0.4267987608909607, - "learning_rate": 8.558176918383936e-06, - "loss": 0.3215, - "step": 12073 - }, - { - "epoch": 0.7890987517155742, - "grad_norm": 0.46230125427246094, - "learning_rate": 8.557931586620936e-06, - "loss": 0.3761, - "step": 12074 - }, - { - "epoch": 0.7891641069211163, - "grad_norm": 0.4586085379123688, - "learning_rate": 8.557686237504672e-06, - "loss": 0.3891, - "step": 12075 - }, - { - "epoch": 0.7892294621266583, - "grad_norm": 0.4671705961227417, - "learning_rate": 8.557440871036338e-06, - "loss": 0.3737, - "step": 12076 - }, - { - "epoch": 0.7892948173322005, - "grad_norm": 0.4593253433704376, - "learning_rate": 8.557195487217134e-06, - "loss": 0.4024, - "step": 12077 - }, - { - "epoch": 0.7893601725377426, - "grad_norm": 0.45820721983909607, - "learning_rate": 8.556950086048255e-06, - "loss": 0.3694, - "step": 12078 - }, - { - "epoch": 0.7894255277432848, - "grad_norm": 0.4556812047958374, - "learning_rate": 8.556704667530897e-06, - "loss": 0.3528, - "step": 12079 - }, - { - "epoch": 0.7894908829488269, - "grad_norm": 0.46928471326828003, - "learning_rate": 8.556459231666259e-06, - "loss": 0.3941, - "step": 12080 - }, - { - "epoch": 0.789556238154369, - "grad_norm": 0.4141043424606323, - "learning_rate": 8.556213778455535e-06, - "loss": 0.3726, - "step": 12081 - }, - { - "epoch": 0.7896215933599111, - "grad_norm": 0.45621001720428467, - "learning_rate": 8.555968307899927e-06, - "loss": 0.38, - "step": 12082 - }, - { - "epoch": 0.7896869485654532, - "grad_norm": 0.5583800077438354, - "learning_rate": 8.55572282000063e-06, - "loss": 0.4037, - "step": 12083 - }, - { - "epoch": 0.7897523037709954, - "grad_norm": 0.4414021968841553, - "learning_rate": 8.55547731475884e-06, - "loss": 0.3924, - "step": 12084 - }, - { - "epoch": 0.7898176589765374, - "grad_norm": 0.44545963406562805, - "learning_rate": 8.555231792175755e-06, - "loss": 0.3902, - "step": 12085 - }, - { - "epoch": 0.7898830141820796, - "grad_norm": 0.4575885832309723, - "learning_rate": 8.554986252252573e-06, - "loss": 0.374, - "step": 12086 - }, - { - "epoch": 0.7899483693876217, - "grad_norm": 0.43306130170822144, - "learning_rate": 8.554740694990493e-06, - "loss": 0.3648, - "step": 12087 - }, - { - "epoch": 0.7900137245931639, - "grad_norm": 0.43865975737571716, - "learning_rate": 8.55449512039071e-06, - "loss": 0.3752, - "step": 12088 - }, - { - "epoch": 0.790079079798706, - "grad_norm": 0.40036046504974365, - "learning_rate": 8.554249528454422e-06, - "loss": 0.3302, - "step": 12089 - }, - { - "epoch": 0.7901444350042481, - "grad_norm": 0.4885970950126648, - "learning_rate": 8.554003919182829e-06, - "loss": 0.4241, - "step": 12090 - }, - { - "epoch": 0.7902097902097902, - "grad_norm": 0.43497803807258606, - "learning_rate": 8.553758292577128e-06, - "loss": 0.3737, - "step": 12091 - }, - { - "epoch": 0.7902751454153323, - "grad_norm": 0.4482162892818451, - "learning_rate": 8.553512648638515e-06, - "loss": 0.3932, - "step": 12092 - }, - { - "epoch": 0.7903405006208745, - "grad_norm": 0.42345380783081055, - "learning_rate": 8.55326698736819e-06, - "loss": 0.3651, - "step": 12093 - }, - { - "epoch": 0.7904058558264165, - "grad_norm": 0.4518062174320221, - "learning_rate": 8.553021308767353e-06, - "loss": 0.4081, - "step": 12094 - }, - { - "epoch": 0.7904712110319587, - "grad_norm": 0.41406533122062683, - "learning_rate": 8.552775612837198e-06, - "loss": 0.319, - "step": 12095 - }, - { - "epoch": 0.7905365662375008, - "grad_norm": 0.43780308961868286, - "learning_rate": 8.552529899578927e-06, - "loss": 0.3725, - "step": 12096 - }, - { - "epoch": 0.790601921443043, - "grad_norm": 0.4579823315143585, - "learning_rate": 8.552284168993736e-06, - "loss": 0.3625, - "step": 12097 - }, - { - "epoch": 0.790667276648585, - "grad_norm": 0.42775416374206543, - "learning_rate": 8.552038421082824e-06, - "loss": 0.357, - "step": 12098 - }, - { - "epoch": 0.7907326318541272, - "grad_norm": 0.4572090804576874, - "learning_rate": 8.551792655847392e-06, - "loss": 0.3869, - "step": 12099 - }, - { - "epoch": 0.7907979870596693, - "grad_norm": 0.442373126745224, - "learning_rate": 8.551546873288636e-06, - "loss": 0.3707, - "step": 12100 - }, - { - "epoch": 0.7908633422652114, - "grad_norm": 0.42555850744247437, - "learning_rate": 8.551301073407755e-06, - "loss": 0.3863, - "step": 12101 - }, - { - "epoch": 0.7909286974707536, - "grad_norm": 0.43132033944129944, - "learning_rate": 8.551055256205947e-06, - "loss": 0.3972, - "step": 12102 - }, - { - "epoch": 0.7909940526762956, - "grad_norm": 0.44253009557724, - "learning_rate": 8.550809421684415e-06, - "loss": 0.4081, - "step": 12103 - }, - { - "epoch": 0.7910594078818378, - "grad_norm": 0.47049644589424133, - "learning_rate": 8.550563569844354e-06, - "loss": 0.4063, - "step": 12104 - }, - { - "epoch": 0.7911247630873799, - "grad_norm": 0.469762921333313, - "learning_rate": 8.550317700686965e-06, - "loss": 0.4042, - "step": 12105 - }, - { - "epoch": 0.7911901182929221, - "grad_norm": 0.4723997116088867, - "learning_rate": 8.550071814213447e-06, - "loss": 0.4079, - "step": 12106 - }, - { - "epoch": 0.7912554734984641, - "grad_norm": 0.43045368790626526, - "learning_rate": 8.549825910424999e-06, - "loss": 0.3287, - "step": 12107 - }, - { - "epoch": 0.7913208287040063, - "grad_norm": 0.43018412590026855, - "learning_rate": 8.549579989322818e-06, - "loss": 0.3721, - "step": 12108 - }, - { - "epoch": 0.7913861839095484, - "grad_norm": 0.46723949909210205, - "learning_rate": 8.549334050908108e-06, - "loss": 0.4454, - "step": 12109 - }, - { - "epoch": 0.7914515391150905, - "grad_norm": 0.44700801372528076, - "learning_rate": 8.549088095182064e-06, - "loss": 0.3945, - "step": 12110 - }, - { - "epoch": 0.7915168943206327, - "grad_norm": 0.45005619525909424, - "learning_rate": 8.54884212214589e-06, - "loss": 0.3962, - "step": 12111 - }, - { - "epoch": 0.7915822495261747, - "grad_norm": 0.47676965594291687, - "learning_rate": 8.548596131800782e-06, - "loss": 0.4342, - "step": 12112 - }, - { - "epoch": 0.7916476047317169, - "grad_norm": 0.41420450806617737, - "learning_rate": 8.548350124147941e-06, - "loss": 0.3374, - "step": 12113 - }, - { - "epoch": 0.791712959937259, - "grad_norm": 0.42397040128707886, - "learning_rate": 8.54810409918857e-06, - "loss": 0.3492, - "step": 12114 - }, - { - "epoch": 0.7917783151428012, - "grad_norm": 0.4471323788166046, - "learning_rate": 8.547858056923863e-06, - "loss": 0.3641, - "step": 12115 - }, - { - "epoch": 0.7918436703483432, - "grad_norm": 0.44073548913002014, - "learning_rate": 8.547611997355025e-06, - "loss": 0.4107, - "step": 12116 - }, - { - "epoch": 0.7919090255538853, - "grad_norm": 0.4198542535305023, - "learning_rate": 8.547365920483253e-06, - "loss": 0.3616, - "step": 12117 - }, - { - "epoch": 0.7919743807594275, - "grad_norm": 0.4398666322231293, - "learning_rate": 8.547119826309751e-06, - "loss": 0.3616, - "step": 12118 - }, - { - "epoch": 0.7920397359649696, - "grad_norm": 0.47608432173728943, - "learning_rate": 8.546873714835714e-06, - "loss": 0.4092, - "step": 12119 - }, - { - "epoch": 0.7921050911705118, - "grad_norm": 0.478184312582016, - "learning_rate": 8.546627586062346e-06, - "loss": 0.4113, - "step": 12120 - }, - { - "epoch": 0.7921704463760538, - "grad_norm": 0.4458080232143402, - "learning_rate": 8.546381439990847e-06, - "loss": 0.3804, - "step": 12121 - }, - { - "epoch": 0.792235801581596, - "grad_norm": 0.5123405456542969, - "learning_rate": 8.546135276622417e-06, - "loss": 0.4001, - "step": 12122 - }, - { - "epoch": 0.7923011567871381, - "grad_norm": 0.4072600305080414, - "learning_rate": 8.545889095958257e-06, - "loss": 0.3518, - "step": 12123 - }, - { - "epoch": 0.7923665119926803, - "grad_norm": 0.43664419651031494, - "learning_rate": 8.545642897999567e-06, - "loss": 0.3409, - "step": 12124 - }, - { - "epoch": 0.7924318671982223, - "grad_norm": 0.43069079518318176, - "learning_rate": 8.545396682747548e-06, - "loss": 0.3779, - "step": 12125 - }, - { - "epoch": 0.7924972224037644, - "grad_norm": 0.46924296021461487, - "learning_rate": 8.545150450203401e-06, - "loss": 0.4053, - "step": 12126 - }, - { - "epoch": 0.7925625776093066, - "grad_norm": 0.4362129867076874, - "learning_rate": 8.544904200368328e-06, - "loss": 0.3578, - "step": 12127 - }, - { - "epoch": 0.7926279328148487, - "grad_norm": 0.4406747817993164, - "learning_rate": 8.544657933243529e-06, - "loss": 0.3544, - "step": 12128 - }, - { - "epoch": 0.7926932880203909, - "grad_norm": 0.4239201843738556, - "learning_rate": 8.544411648830205e-06, - "loss": 0.3447, - "step": 12129 - }, - { - "epoch": 0.7927586432259329, - "grad_norm": 0.44481077790260315, - "learning_rate": 8.544165347129558e-06, - "loss": 0.3991, - "step": 12130 - }, - { - "epoch": 0.7928239984314751, - "grad_norm": 0.43191221356391907, - "learning_rate": 8.543919028142788e-06, - "loss": 0.3629, - "step": 12131 - }, - { - "epoch": 0.7928893536370172, - "grad_norm": 0.48538875579833984, - "learning_rate": 8.543672691871096e-06, - "loss": 0.3817, - "step": 12132 - }, - { - "epoch": 0.7929547088425594, - "grad_norm": 0.44361257553100586, - "learning_rate": 8.543426338315687e-06, - "loss": 0.3706, - "step": 12133 - }, - { - "epoch": 0.7930200640481014, - "grad_norm": 0.42010653018951416, - "learning_rate": 8.54317996747776e-06, - "loss": 0.3392, - "step": 12134 - }, - { - "epoch": 0.7930854192536435, - "grad_norm": 0.42698851227760315, - "learning_rate": 8.542933579358516e-06, - "loss": 0.3383, - "step": 12135 - }, - { - "epoch": 0.7931507744591857, - "grad_norm": 0.5849870443344116, - "learning_rate": 8.542687173959156e-06, - "loss": 0.3811, - "step": 12136 - }, - { - "epoch": 0.7932161296647278, - "grad_norm": 0.4533376097679138, - "learning_rate": 8.542440751280888e-06, - "loss": 0.4063, - "step": 12137 - }, - { - "epoch": 0.79328148487027, - "grad_norm": 0.4817275404930115, - "learning_rate": 8.542194311324904e-06, - "loss": 0.4029, - "step": 12138 - }, - { - "epoch": 0.793346840075812, - "grad_norm": 0.4313760995864868, - "learning_rate": 8.541947854092413e-06, - "loss": 0.3845, - "step": 12139 - }, - { - "epoch": 0.7934121952813542, - "grad_norm": 0.47342512011528015, - "learning_rate": 8.541701379584618e-06, - "loss": 0.4543, - "step": 12140 - }, - { - "epoch": 0.7934775504868963, - "grad_norm": 0.4508894085884094, - "learning_rate": 8.541454887802715e-06, - "loss": 0.3724, - "step": 12141 - }, - { - "epoch": 0.7935429056924383, - "grad_norm": 0.4701017141342163, - "learning_rate": 8.541208378747912e-06, - "loss": 0.3684, - "step": 12142 - }, - { - "epoch": 0.7936082608979805, - "grad_norm": 0.4481446146965027, - "learning_rate": 8.540961852421407e-06, - "loss": 0.3518, - "step": 12143 - }, - { - "epoch": 0.7936736161035226, - "grad_norm": 0.43516772985458374, - "learning_rate": 8.540715308824405e-06, - "loss": 0.3384, - "step": 12144 - }, - { - "epoch": 0.7937389713090648, - "grad_norm": 0.43795207142829895, - "learning_rate": 8.540468747958107e-06, - "loss": 0.3831, - "step": 12145 - }, - { - "epoch": 0.7938043265146069, - "grad_norm": 0.4666205048561096, - "learning_rate": 8.540222169823718e-06, - "loss": 0.4061, - "step": 12146 - }, - { - "epoch": 0.793869681720149, - "grad_norm": 0.4302486777305603, - "learning_rate": 8.539975574422438e-06, - "loss": 0.3591, - "step": 12147 - }, - { - "epoch": 0.7939350369256911, - "grad_norm": 0.432462602853775, - "learning_rate": 8.539728961755471e-06, - "loss": 0.3907, - "step": 12148 - }, - { - "epoch": 0.7940003921312333, - "grad_norm": 0.42387595772743225, - "learning_rate": 8.539482331824018e-06, - "loss": 0.3565, - "step": 12149 - }, - { - "epoch": 0.7940657473367754, - "grad_norm": 0.4779075086116791, - "learning_rate": 8.539235684629286e-06, - "loss": 0.4376, - "step": 12150 - }, - { - "epoch": 0.7941311025423174, - "grad_norm": 0.4292527735233307, - "learning_rate": 8.538989020172475e-06, - "loss": 0.352, - "step": 12151 - }, - { - "epoch": 0.7941964577478596, - "grad_norm": 0.4452821910381317, - "learning_rate": 8.538742338454789e-06, - "loss": 0.4291, - "step": 12152 - }, - { - "epoch": 0.7942618129534017, - "grad_norm": 0.5022962093353271, - "learning_rate": 8.538495639477428e-06, - "loss": 0.4611, - "step": 12153 - }, - { - "epoch": 0.7943271681589439, - "grad_norm": 0.44315487146377563, - "learning_rate": 8.538248923241601e-06, - "loss": 0.3611, - "step": 12154 - }, - { - "epoch": 0.794392523364486, - "grad_norm": 0.4711770713329315, - "learning_rate": 8.538002189748507e-06, - "loss": 0.4083, - "step": 12155 - }, - { - "epoch": 0.7944578785700281, - "grad_norm": 0.4432166516780853, - "learning_rate": 8.537755438999348e-06, - "loss": 0.4045, - "step": 12156 - }, - { - "epoch": 0.7945232337755702, - "grad_norm": 0.46643519401550293, - "learning_rate": 8.537508670995334e-06, - "loss": 0.39, - "step": 12157 - }, - { - "epoch": 0.7945885889811124, - "grad_norm": 0.41968482732772827, - "learning_rate": 8.537261885737662e-06, - "loss": 0.3605, - "step": 12158 - }, - { - "epoch": 0.7946539441866545, - "grad_norm": 0.4558030664920807, - "learning_rate": 8.53701508322754e-06, - "loss": 0.3832, - "step": 12159 - }, - { - "epoch": 0.7947192993921965, - "grad_norm": 0.44898343086242676, - "learning_rate": 8.536768263466171e-06, - "loss": 0.3798, - "step": 12160 - }, - { - "epoch": 0.7947846545977387, - "grad_norm": 0.45856231451034546, - "learning_rate": 8.536521426454758e-06, - "loss": 0.3989, - "step": 12161 - }, - { - "epoch": 0.7948500098032808, - "grad_norm": 0.41914859414100647, - "learning_rate": 8.536274572194502e-06, - "loss": 0.3207, - "step": 12162 - }, - { - "epoch": 0.794915365008823, - "grad_norm": 0.4341502785682678, - "learning_rate": 8.536027700686613e-06, - "loss": 0.3604, - "step": 12163 - }, - { - "epoch": 0.794980720214365, - "grad_norm": 0.4363396167755127, - "learning_rate": 8.535780811932289e-06, - "loss": 0.3688, - "step": 12164 - }, - { - "epoch": 0.7950460754199072, - "grad_norm": 0.44624993205070496, - "learning_rate": 8.535533905932739e-06, - "loss": 0.4038, - "step": 12165 - }, - { - "epoch": 0.7951114306254493, - "grad_norm": 0.42349255084991455, - "learning_rate": 8.535286982689164e-06, - "loss": 0.3669, - "step": 12166 - }, - { - "epoch": 0.7951767858309914, - "grad_norm": 0.4911760687828064, - "learning_rate": 8.53504004220277e-06, - "loss": 0.4371, - "step": 12167 - }, - { - "epoch": 0.7952421410365336, - "grad_norm": 0.4804944097995758, - "learning_rate": 8.534793084474761e-06, - "loss": 0.3773, - "step": 12168 - }, - { - "epoch": 0.7953074962420756, - "grad_norm": 0.4469417631626129, - "learning_rate": 8.534546109506342e-06, - "loss": 0.384, - "step": 12169 - }, - { - "epoch": 0.7953728514476178, - "grad_norm": 0.4474070966243744, - "learning_rate": 8.534299117298718e-06, - "loss": 0.3865, - "step": 12170 - }, - { - "epoch": 0.7954382066531599, - "grad_norm": 0.4352922737598419, - "learning_rate": 8.534052107853092e-06, - "loss": 0.3551, - "step": 12171 - }, - { - "epoch": 0.7955035618587021, - "grad_norm": 0.45055779814720154, - "learning_rate": 8.533805081170669e-06, - "loss": 0.3833, - "step": 12172 - }, - { - "epoch": 0.7955689170642442, - "grad_norm": 0.3985026180744171, - "learning_rate": 8.533558037252654e-06, - "loss": 0.3392, - "step": 12173 - }, - { - "epoch": 0.7956342722697863, - "grad_norm": 0.40868639945983887, - "learning_rate": 8.533310976100252e-06, - "loss": 0.3638, - "step": 12174 - }, - { - "epoch": 0.7956996274753284, - "grad_norm": 0.4311105012893677, - "learning_rate": 8.53306389771467e-06, - "loss": 0.3356, - "step": 12175 - }, - { - "epoch": 0.7957649826808705, - "grad_norm": 0.4652247130870819, - "learning_rate": 8.532816802097112e-06, - "loss": 0.3835, - "step": 12176 - }, - { - "epoch": 0.7958303378864127, - "grad_norm": 0.44967931509017944, - "learning_rate": 8.532569689248782e-06, - "loss": 0.3794, - "step": 12177 - }, - { - "epoch": 0.7958956930919547, - "grad_norm": 0.4602295160293579, - "learning_rate": 8.532322559170885e-06, - "loss": 0.3753, - "step": 12178 - }, - { - "epoch": 0.7959610482974969, - "grad_norm": 0.4297156035900116, - "learning_rate": 8.532075411864628e-06, - "loss": 0.3654, - "step": 12179 - }, - { - "epoch": 0.796026403503039, - "grad_norm": 0.46110835671424866, - "learning_rate": 8.531828247331213e-06, - "loss": 0.4192, - "step": 12180 - }, - { - "epoch": 0.7960917587085812, - "grad_norm": 0.43538546562194824, - "learning_rate": 8.531581065571852e-06, - "loss": 0.3274, - "step": 12181 - }, - { - "epoch": 0.7961571139141232, - "grad_norm": 0.46276628971099854, - "learning_rate": 8.531333866587744e-06, - "loss": 0.38, - "step": 12182 - }, - { - "epoch": 0.7962224691196654, - "grad_norm": 0.4322505593299866, - "learning_rate": 8.531086650380098e-06, - "loss": 0.344, - "step": 12183 - }, - { - "epoch": 0.7962878243252075, - "grad_norm": 0.43760770559310913, - "learning_rate": 8.53083941695012e-06, - "loss": 0.3951, - "step": 12184 - }, - { - "epoch": 0.7963531795307496, - "grad_norm": 0.45469361543655396, - "learning_rate": 8.530592166299014e-06, - "loss": 0.4405, - "step": 12185 - }, - { - "epoch": 0.7964185347362918, - "grad_norm": 0.41518306732177734, - "learning_rate": 8.530344898427987e-06, - "loss": 0.3175, - "step": 12186 - }, - { - "epoch": 0.7964838899418338, - "grad_norm": 0.4440505802631378, - "learning_rate": 8.530097613338244e-06, - "loss": 0.3986, - "step": 12187 - }, - { - "epoch": 0.796549245147376, - "grad_norm": 0.4459461569786072, - "learning_rate": 8.529850311030994e-06, - "loss": 0.3866, - "step": 12188 - }, - { - "epoch": 0.7966146003529181, - "grad_norm": 0.5046373009681702, - "learning_rate": 8.52960299150744e-06, - "loss": 0.3608, - "step": 12189 - }, - { - "epoch": 0.7966799555584603, - "grad_norm": 0.44640427827835083, - "learning_rate": 8.52935565476879e-06, - "loss": 0.3942, - "step": 12190 - }, - { - "epoch": 0.7967453107640023, - "grad_norm": 0.4735654592514038, - "learning_rate": 8.52910830081625e-06, - "loss": 0.4165, - "step": 12191 - }, - { - "epoch": 0.7968106659695445, - "grad_norm": 0.47037145495414734, - "learning_rate": 8.528860929651024e-06, - "loss": 0.3997, - "step": 12192 - }, - { - "epoch": 0.7968760211750866, - "grad_norm": 0.41389429569244385, - "learning_rate": 8.528613541274323e-06, - "loss": 0.3523, - "step": 12193 - }, - { - "epoch": 0.7969413763806287, - "grad_norm": 0.44254520535469055, - "learning_rate": 8.528366135687351e-06, - "loss": 0.3442, - "step": 12194 - }, - { - "epoch": 0.7970067315861709, - "grad_norm": 0.4370190501213074, - "learning_rate": 8.528118712891314e-06, - "loss": 0.3524, - "step": 12195 - }, - { - "epoch": 0.7970720867917129, - "grad_norm": 0.423596054315567, - "learning_rate": 8.52787127288742e-06, - "loss": 0.382, - "step": 12196 - }, - { - "epoch": 0.7971374419972551, - "grad_norm": 0.4484865367412567, - "learning_rate": 8.527623815676878e-06, - "loss": 0.3859, - "step": 12197 - }, - { - "epoch": 0.7972027972027972, - "grad_norm": 0.4844169020652771, - "learning_rate": 8.52737634126089e-06, - "loss": 0.4199, - "step": 12198 - }, - { - "epoch": 0.7972681524083394, - "grad_norm": 0.4097655117511749, - "learning_rate": 8.527128849640667e-06, - "loss": 0.3088, - "step": 12199 - }, - { - "epoch": 0.7973335076138814, - "grad_norm": 0.4613126516342163, - "learning_rate": 8.526881340817414e-06, - "loss": 0.3832, - "step": 12200 - }, - { - "epoch": 0.7973988628194235, - "grad_norm": 0.4373398423194885, - "learning_rate": 8.526633814792338e-06, - "loss": 0.3604, - "step": 12201 - }, - { - "epoch": 0.7974642180249657, - "grad_norm": 0.43399253487586975, - "learning_rate": 8.526386271566647e-06, - "loss": 0.39, - "step": 12202 - }, - { - "epoch": 0.7975295732305078, - "grad_norm": 0.4530579447746277, - "learning_rate": 8.526138711141551e-06, - "loss": 0.3794, - "step": 12203 - }, - { - "epoch": 0.79759492843605, - "grad_norm": 0.4073675870895386, - "learning_rate": 8.525891133518252e-06, - "loss": 0.3299, - "step": 12204 - }, - { - "epoch": 0.797660283641592, - "grad_norm": 0.4187154173851013, - "learning_rate": 8.525643538697963e-06, - "loss": 0.3465, - "step": 12205 - }, - { - "epoch": 0.7977256388471342, - "grad_norm": 0.4881112575531006, - "learning_rate": 8.525395926681887e-06, - "loss": 0.3826, - "step": 12206 - }, - { - "epoch": 0.7977909940526763, - "grad_norm": 0.4807402491569519, - "learning_rate": 8.525148297471236e-06, - "loss": 0.4153, - "step": 12207 - }, - { - "epoch": 0.7978563492582185, - "grad_norm": 0.4205600917339325, - "learning_rate": 8.524900651067213e-06, - "loss": 0.3446, - "step": 12208 - }, - { - "epoch": 0.7979217044637605, - "grad_norm": 0.47125667333602905, - "learning_rate": 8.524652987471029e-06, - "loss": 0.3948, - "step": 12209 - }, - { - "epoch": 0.7979870596693026, - "grad_norm": 0.40634217858314514, - "learning_rate": 8.524405306683892e-06, - "loss": 0.3124, - "step": 12210 - }, - { - "epoch": 0.7980524148748448, - "grad_norm": 0.4541061818599701, - "learning_rate": 8.52415760870701e-06, - "loss": 0.3901, - "step": 12211 - }, - { - "epoch": 0.7981177700803869, - "grad_norm": 0.45898061990737915, - "learning_rate": 8.52390989354159e-06, - "loss": 0.3824, - "step": 12212 - }, - { - "epoch": 0.798183125285929, - "grad_norm": 0.3998471796512604, - "learning_rate": 8.523662161188839e-06, - "loss": 0.3108, - "step": 12213 - }, - { - "epoch": 0.7982484804914711, - "grad_norm": 0.4205678701400757, - "learning_rate": 8.523414411649968e-06, - "loss": 0.3818, - "step": 12214 - }, - { - "epoch": 0.7983138356970133, - "grad_norm": 0.4338730275630951, - "learning_rate": 8.523166644926185e-06, - "loss": 0.3599, - "step": 12215 - }, - { - "epoch": 0.7983791909025554, - "grad_norm": 0.4278663396835327, - "learning_rate": 8.522918861018698e-06, - "loss": 0.3531, - "step": 12216 - }, - { - "epoch": 0.7984445461080976, - "grad_norm": 0.488136887550354, - "learning_rate": 8.522671059928714e-06, - "loss": 0.4025, - "step": 12217 - }, - { - "epoch": 0.7985099013136396, - "grad_norm": 0.4601065516471863, - "learning_rate": 8.522423241657446e-06, - "loss": 0.4304, - "step": 12218 - }, - { - "epoch": 0.7985752565191817, - "grad_norm": 0.4728713631629944, - "learning_rate": 8.522175406206096e-06, - "loss": 0.4518, - "step": 12219 - }, - { - "epoch": 0.7986406117247239, - "grad_norm": 0.6645670533180237, - "learning_rate": 8.521927553575877e-06, - "loss": 0.4378, - "step": 12220 - }, - { - "epoch": 0.798705966930266, - "grad_norm": 0.4364054203033447, - "learning_rate": 8.521679683767996e-06, - "loss": 0.3364, - "step": 12221 - }, - { - "epoch": 0.7987713221358081, - "grad_norm": 0.46858781576156616, - "learning_rate": 8.521431796783666e-06, - "loss": 0.3816, - "step": 12222 - }, - { - "epoch": 0.7988366773413502, - "grad_norm": 0.4912148118019104, - "learning_rate": 8.521183892624092e-06, - "loss": 0.4333, - "step": 12223 - }, - { - "epoch": 0.7989020325468924, - "grad_norm": 0.4223543405532837, - "learning_rate": 8.520935971290484e-06, - "loss": 0.3715, - "step": 12224 - }, - { - "epoch": 0.7989673877524345, - "grad_norm": 0.47039949893951416, - "learning_rate": 8.520688032784051e-06, - "loss": 0.4214, - "step": 12225 - }, - { - "epoch": 0.7990327429579765, - "grad_norm": 0.43631458282470703, - "learning_rate": 8.520440077106004e-06, - "loss": 0.3829, - "step": 12226 - }, - { - "epoch": 0.7990980981635187, - "grad_norm": 0.4213436543941498, - "learning_rate": 8.52019210425755e-06, - "loss": 0.351, - "step": 12227 - }, - { - "epoch": 0.7991634533690608, - "grad_norm": 0.4542752802371979, - "learning_rate": 8.519944114239901e-06, - "loss": 0.4028, - "step": 12228 - }, - { - "epoch": 0.799228808574603, - "grad_norm": 0.4376654624938965, - "learning_rate": 8.519696107054265e-06, - "loss": 0.358, - "step": 12229 - }, - { - "epoch": 0.7992941637801451, - "grad_norm": 0.461787611246109, - "learning_rate": 8.51944808270185e-06, - "loss": 0.4355, - "step": 12230 - }, - { - "epoch": 0.7993595189856872, - "grad_norm": 0.49870526790618896, - "learning_rate": 8.519200041183868e-06, - "loss": 0.3983, - "step": 12231 - }, - { - "epoch": 0.7994248741912293, - "grad_norm": 0.4470604360103607, - "learning_rate": 8.518951982501529e-06, - "loss": 0.3696, - "step": 12232 - }, - { - "epoch": 0.7994902293967715, - "grad_norm": 0.46619316935539246, - "learning_rate": 8.518703906656042e-06, - "loss": 0.4443, - "step": 12233 - }, - { - "epoch": 0.7995555846023136, - "grad_norm": 0.47135692834854126, - "learning_rate": 8.518455813648616e-06, - "loss": 0.3962, - "step": 12234 - }, - { - "epoch": 0.7996209398078556, - "grad_norm": 0.4475511908531189, - "learning_rate": 8.518207703480463e-06, - "loss": 0.3908, - "step": 12235 - }, - { - "epoch": 0.7996862950133978, - "grad_norm": 0.4283462166786194, - "learning_rate": 8.517959576152793e-06, - "loss": 0.3298, - "step": 12236 - }, - { - "epoch": 0.7997516502189399, - "grad_norm": 0.41792038083076477, - "learning_rate": 8.517711431666816e-06, - "loss": 0.3231, - "step": 12237 - }, - { - "epoch": 0.7998170054244821, - "grad_norm": 0.4252481758594513, - "learning_rate": 8.517463270023737e-06, - "loss": 0.3798, - "step": 12238 - }, - { - "epoch": 0.7998823606300242, - "grad_norm": 0.42247527837753296, - "learning_rate": 8.517215091224777e-06, - "loss": 0.3541, - "step": 12239 - }, - { - "epoch": 0.7999477158355663, - "grad_norm": 0.43758153915405273, - "learning_rate": 8.516966895271137e-06, - "loss": 0.3501, - "step": 12240 - }, - { - "epoch": 0.8000130710411084, - "grad_norm": 0.4436210095882416, - "learning_rate": 8.516718682164032e-06, - "loss": 0.3677, - "step": 12241 - }, - { - "epoch": 0.8000784262466506, - "grad_norm": 0.4343346655368805, - "learning_rate": 8.516470451904673e-06, - "loss": 0.4103, - "step": 12242 - }, - { - "epoch": 0.8001437814521927, - "grad_norm": 0.41750410199165344, - "learning_rate": 8.516222204494267e-06, - "loss": 0.3427, - "step": 12243 - }, - { - "epoch": 0.8002091366577347, - "grad_norm": 0.4535248279571533, - "learning_rate": 8.515973939934027e-06, - "loss": 0.4254, - "step": 12244 - }, - { - "epoch": 0.8002744918632769, - "grad_norm": 0.44574499130249023, - "learning_rate": 8.515725658225167e-06, - "loss": 0.3841, - "step": 12245 - }, - { - "epoch": 0.800339847068819, - "grad_norm": 0.4469446539878845, - "learning_rate": 8.515477359368894e-06, - "loss": 0.3697, - "step": 12246 - }, - { - "epoch": 0.8004052022743612, - "grad_norm": 0.403382807970047, - "learning_rate": 8.515229043366417e-06, - "loss": 0.3328, - "step": 12247 - }, - { - "epoch": 0.8004705574799033, - "grad_norm": 0.42954376339912415, - "learning_rate": 8.514980710218955e-06, - "loss": 0.3752, - "step": 12248 - }, - { - "epoch": 0.8005359126854454, - "grad_norm": 0.4429025650024414, - "learning_rate": 8.51473235992771e-06, - "loss": 0.3573, - "step": 12249 - }, - { - "epoch": 0.8006012678909875, - "grad_norm": 0.43553319573402405, - "learning_rate": 8.5144839924939e-06, - "loss": 0.3615, - "step": 12250 - }, - { - "epoch": 0.8006666230965296, - "grad_norm": 0.44023191928863525, - "learning_rate": 8.514235607918732e-06, - "loss": 0.3924, - "step": 12251 - }, - { - "epoch": 0.8007319783020718, - "grad_norm": 0.46684056520462036, - "learning_rate": 8.513987206203423e-06, - "loss": 0.4431, - "step": 12252 - }, - { - "epoch": 0.8007973335076138, - "grad_norm": 0.42626672983169556, - "learning_rate": 8.513738787349178e-06, - "loss": 0.3234, - "step": 12253 - }, - { - "epoch": 0.800862688713156, - "grad_norm": 0.4428284168243408, - "learning_rate": 8.513490351357212e-06, - "loss": 0.3766, - "step": 12254 - }, - { - "epoch": 0.8009280439186981, - "grad_norm": 0.43598613142967224, - "learning_rate": 8.513241898228737e-06, - "loss": 0.3352, - "step": 12255 - }, - { - "epoch": 0.8009933991242403, - "grad_norm": 0.45488062500953674, - "learning_rate": 8.512993427964964e-06, - "loss": 0.3913, - "step": 12256 - }, - { - "epoch": 0.8010587543297824, - "grad_norm": 0.42344170808792114, - "learning_rate": 8.512744940567105e-06, - "loss": 0.3673, - "step": 12257 - }, - { - "epoch": 0.8011241095353245, - "grad_norm": 0.4180864095687866, - "learning_rate": 8.512496436036372e-06, - "loss": 0.3367, - "step": 12258 - }, - { - "epoch": 0.8011894647408666, - "grad_norm": 0.4518779218196869, - "learning_rate": 8.512247914373979e-06, - "loss": 0.38, - "step": 12259 - }, - { - "epoch": 0.8012548199464087, - "grad_norm": 0.4913984537124634, - "learning_rate": 8.511999375581134e-06, - "loss": 0.439, - "step": 12260 - }, - { - "epoch": 0.8013201751519509, - "grad_norm": 0.4122963845729828, - "learning_rate": 8.511750819659052e-06, - "loss": 0.3594, - "step": 12261 - }, - { - "epoch": 0.8013855303574929, - "grad_norm": 0.4171193242073059, - "learning_rate": 8.511502246608945e-06, - "loss": 0.3445, - "step": 12262 - }, - { - "epoch": 0.8014508855630351, - "grad_norm": 0.43022075295448303, - "learning_rate": 8.511253656432024e-06, - "loss": 0.3554, - "step": 12263 - }, - { - "epoch": 0.8015162407685772, - "grad_norm": 0.4417416453361511, - "learning_rate": 8.511005049129502e-06, - "loss": 0.3807, - "step": 12264 - }, - { - "epoch": 0.8015815959741194, - "grad_norm": 0.45779842138290405, - "learning_rate": 8.510756424702594e-06, - "loss": 0.3854, - "step": 12265 - }, - { - "epoch": 0.8016469511796614, - "grad_norm": 0.43774542212486267, - "learning_rate": 8.51050778315251e-06, - "loss": 0.3775, - "step": 12266 - }, - { - "epoch": 0.8017123063852036, - "grad_norm": 0.44353967905044556, - "learning_rate": 8.510259124480462e-06, - "loss": 0.3792, - "step": 12267 - }, - { - "epoch": 0.8017776615907457, - "grad_norm": 0.4183558523654938, - "learning_rate": 8.510010448687666e-06, - "loss": 0.3253, - "step": 12268 - }, - { - "epoch": 0.8018430167962878, - "grad_norm": 0.426309734582901, - "learning_rate": 8.509761755775331e-06, - "loss": 0.3579, - "step": 12269 - }, - { - "epoch": 0.80190837200183, - "grad_norm": 0.43674948811531067, - "learning_rate": 8.509513045744674e-06, - "loss": 0.4057, - "step": 12270 - }, - { - "epoch": 0.801973727207372, - "grad_norm": 0.41553187370300293, - "learning_rate": 8.509264318596906e-06, - "loss": 0.3532, - "step": 12271 - }, - { - "epoch": 0.8020390824129142, - "grad_norm": 0.4216391146183014, - "learning_rate": 8.50901557433324e-06, - "loss": 0.3515, - "step": 12272 - }, - { - "epoch": 0.8021044376184563, - "grad_norm": 0.48671314120292664, - "learning_rate": 8.50876681295489e-06, - "loss": 0.4558, - "step": 12273 - }, - { - "epoch": 0.8021697928239985, - "grad_norm": 0.4296746850013733, - "learning_rate": 8.508518034463066e-06, - "loss": 0.3793, - "step": 12274 - }, - { - "epoch": 0.8022351480295405, - "grad_norm": 0.45327967405319214, - "learning_rate": 8.508269238858986e-06, - "loss": 0.4051, - "step": 12275 - }, - { - "epoch": 0.8023005032350827, - "grad_norm": 0.4724697172641754, - "learning_rate": 8.508020426143862e-06, - "loss": 0.3397, - "step": 12276 - }, - { - "epoch": 0.8023658584406248, - "grad_norm": 0.44395411014556885, - "learning_rate": 8.507771596318905e-06, - "loss": 0.3966, - "step": 12277 - }, - { - "epoch": 0.8024312136461669, - "grad_norm": 0.4271918535232544, - "learning_rate": 8.507522749385334e-06, - "loss": 0.3414, - "step": 12278 - }, - { - "epoch": 0.8024965688517091, - "grad_norm": 0.47543302178382874, - "learning_rate": 8.507273885344356e-06, - "loss": 0.3468, - "step": 12279 - }, - { - "epoch": 0.8025619240572511, - "grad_norm": 0.4737512469291687, - "learning_rate": 8.50702500419719e-06, - "loss": 0.4015, - "step": 12280 - }, - { - "epoch": 0.8026272792627933, - "grad_norm": 0.45727089047431946, - "learning_rate": 8.506776105945049e-06, - "loss": 0.3922, - "step": 12281 - }, - { - "epoch": 0.8026926344683354, - "grad_norm": 0.4550285041332245, - "learning_rate": 8.506527190589145e-06, - "loss": 0.3911, - "step": 12282 - }, - { - "epoch": 0.8027579896738776, - "grad_norm": 0.4296853542327881, - "learning_rate": 8.506278258130692e-06, - "loss": 0.4053, - "step": 12283 - }, - { - "epoch": 0.8028233448794196, - "grad_norm": 0.42667803168296814, - "learning_rate": 8.506029308570907e-06, - "loss": 0.3255, - "step": 12284 - }, - { - "epoch": 0.8028887000849617, - "grad_norm": 0.4762926995754242, - "learning_rate": 8.505780341911001e-06, - "loss": 0.4228, - "step": 12285 - }, - { - "epoch": 0.8029540552905039, - "grad_norm": 0.43546655774116516, - "learning_rate": 8.505531358152191e-06, - "loss": 0.3599, - "step": 12286 - }, - { - "epoch": 0.803019410496046, - "grad_norm": 0.47526538372039795, - "learning_rate": 8.50528235729569e-06, - "loss": 0.4114, - "step": 12287 - }, - { - "epoch": 0.8030847657015882, - "grad_norm": 0.452122300863266, - "learning_rate": 8.505033339342713e-06, - "loss": 0.3833, - "step": 12288 - }, - { - "epoch": 0.8031501209071302, - "grad_norm": 0.4374464750289917, - "learning_rate": 8.504784304294474e-06, - "loss": 0.3714, - "step": 12289 - }, - { - "epoch": 0.8032154761126724, - "grad_norm": 0.4244959354400635, - "learning_rate": 8.504535252152186e-06, - "loss": 0.3376, - "step": 12290 - }, - { - "epoch": 0.8032808313182145, - "grad_norm": 0.47145622968673706, - "learning_rate": 8.504286182917066e-06, - "loss": 0.4154, - "step": 12291 - }, - { - "epoch": 0.8033461865237567, - "grad_norm": 0.45672184228897095, - "learning_rate": 8.50403709659033e-06, - "loss": 0.3833, - "step": 12292 - }, - { - "epoch": 0.8034115417292987, - "grad_norm": 0.4509463608264923, - "learning_rate": 8.50378799317319e-06, - "loss": 0.394, - "step": 12293 - }, - { - "epoch": 0.8034768969348408, - "grad_norm": 0.44769760966300964, - "learning_rate": 8.503538872666861e-06, - "loss": 0.4273, - "step": 12294 - }, - { - "epoch": 0.803542252140383, - "grad_norm": 0.4521794021129608, - "learning_rate": 8.503289735072561e-06, - "loss": 0.3701, - "step": 12295 - }, - { - "epoch": 0.8036076073459251, - "grad_norm": 0.46313586831092834, - "learning_rate": 8.503040580391503e-06, - "loss": 0.386, - "step": 12296 - }, - { - "epoch": 0.8036729625514673, - "grad_norm": 0.47603359818458557, - "learning_rate": 8.502791408624902e-06, - "loss": 0.4026, - "step": 12297 - }, - { - "epoch": 0.8037383177570093, - "grad_norm": 0.485567569732666, - "learning_rate": 8.502542219773974e-06, - "loss": 0.4434, - "step": 12298 - }, - { - "epoch": 0.8038036729625515, - "grad_norm": 0.4439026117324829, - "learning_rate": 8.502293013839935e-06, - "loss": 0.3615, - "step": 12299 - }, - { - "epoch": 0.8038690281680936, - "grad_norm": 0.42811155319213867, - "learning_rate": 8.502043790823999e-06, - "loss": 0.3823, - "step": 12300 - }, - { - "epoch": 0.8039343833736358, - "grad_norm": 0.45521873235702515, - "learning_rate": 8.50179455072738e-06, - "loss": 0.4057, - "step": 12301 - }, - { - "epoch": 0.8039997385791778, - "grad_norm": 0.4294131100177765, - "learning_rate": 8.5015452935513e-06, - "loss": 0.3587, - "step": 12302 - }, - { - "epoch": 0.8040650937847199, - "grad_norm": 0.45609045028686523, - "learning_rate": 8.50129601929697e-06, - "loss": 0.3966, - "step": 12303 - }, - { - "epoch": 0.8041304489902621, - "grad_norm": 0.43618375062942505, - "learning_rate": 8.501046727965603e-06, - "loss": 0.3732, - "step": 12304 - }, - { - "epoch": 0.8041958041958042, - "grad_norm": 0.4643310010433197, - "learning_rate": 8.50079741955842e-06, - "loss": 0.4294, - "step": 12305 - }, - { - "epoch": 0.8042611594013463, - "grad_norm": 0.40513744950294495, - "learning_rate": 8.500548094076635e-06, - "loss": 0.3319, - "step": 12306 - }, - { - "epoch": 0.8043265146068884, - "grad_norm": 0.4380747675895691, - "learning_rate": 8.500298751521465e-06, - "loss": 0.3565, - "step": 12307 - }, - { - "epoch": 0.8043918698124306, - "grad_norm": 0.4322327971458435, - "learning_rate": 8.500049391894125e-06, - "loss": 0.3767, - "step": 12308 - }, - { - "epoch": 0.8044572250179727, - "grad_norm": 0.41715767979621887, - "learning_rate": 8.499800015195832e-06, - "loss": 0.3245, - "step": 12309 - }, - { - "epoch": 0.8045225802235147, - "grad_norm": 0.41645169258117676, - "learning_rate": 8.499550621427801e-06, - "loss": 0.3221, - "step": 12310 - }, - { - "epoch": 0.8045879354290569, - "grad_norm": 0.42277106642723083, - "learning_rate": 8.49930121059125e-06, - "loss": 0.3426, - "step": 12311 - }, - { - "epoch": 0.804653290634599, - "grad_norm": 0.4513515830039978, - "learning_rate": 8.499051782687394e-06, - "loss": 0.3751, - "step": 12312 - }, - { - "epoch": 0.8047186458401412, - "grad_norm": 0.41784802079200745, - "learning_rate": 8.498802337717451e-06, - "loss": 0.3433, - "step": 12313 - }, - { - "epoch": 0.8047840010456833, - "grad_norm": 0.4818885028362274, - "learning_rate": 8.498552875682635e-06, - "loss": 0.4179, - "step": 12314 - }, - { - "epoch": 0.8048493562512254, - "grad_norm": 0.43679431080818176, - "learning_rate": 8.498303396584166e-06, - "loss": 0.3568, - "step": 12315 - }, - { - "epoch": 0.8049147114567675, - "grad_norm": 0.4772944450378418, - "learning_rate": 8.498053900423259e-06, - "loss": 0.4143, - "step": 12316 - }, - { - "epoch": 0.8049800666623097, - "grad_norm": 0.47681671380996704, - "learning_rate": 8.497804387201133e-06, - "loss": 0.4233, - "step": 12317 - }, - { - "epoch": 0.8050454218678518, - "grad_norm": 0.47561073303222656, - "learning_rate": 8.497554856919001e-06, - "loss": 0.376, - "step": 12318 - }, - { - "epoch": 0.8051107770733938, - "grad_norm": 0.4355849027633667, - "learning_rate": 8.497305309578085e-06, - "loss": 0.3985, - "step": 12319 - }, - { - "epoch": 0.805176132278936, - "grad_norm": 0.43489521741867065, - "learning_rate": 8.497055745179597e-06, - "loss": 0.315, - "step": 12320 - }, - { - "epoch": 0.8052414874844781, - "grad_norm": 0.45800769329071045, - "learning_rate": 8.496806163724758e-06, - "loss": 0.3793, - "step": 12321 - }, - { - "epoch": 0.8053068426900203, - "grad_norm": 0.47356700897216797, - "learning_rate": 8.496556565214783e-06, - "loss": 0.4409, - "step": 12322 - }, - { - "epoch": 0.8053721978955624, - "grad_norm": 0.44182270765304565, - "learning_rate": 8.496306949650892e-06, - "loss": 0.3491, - "step": 12323 - }, - { - "epoch": 0.8054375531011045, - "grad_norm": 0.4367234408855438, - "learning_rate": 8.496057317034299e-06, - "loss": 0.3939, - "step": 12324 - }, - { - "epoch": 0.8055029083066466, - "grad_norm": 0.45257240533828735, - "learning_rate": 8.495807667366222e-06, - "loss": 0.3617, - "step": 12325 - }, - { - "epoch": 0.8055682635121888, - "grad_norm": 0.4408845007419586, - "learning_rate": 8.495558000647883e-06, - "loss": 0.3758, - "step": 12326 - }, - { - "epoch": 0.8056336187177309, - "grad_norm": 0.47732558846473694, - "learning_rate": 8.495308316880496e-06, - "loss": 0.3908, - "step": 12327 - }, - { - "epoch": 0.8056989739232729, - "grad_norm": 0.41833725571632385, - "learning_rate": 8.495058616065278e-06, - "loss": 0.3457, - "step": 12328 - }, - { - "epoch": 0.8057643291288151, - "grad_norm": 0.43821439146995544, - "learning_rate": 8.494808898203448e-06, - "loss": 0.366, - "step": 12329 - }, - { - "epoch": 0.8058296843343572, - "grad_norm": 0.47947388887405396, - "learning_rate": 8.494559163296227e-06, - "loss": 0.3653, - "step": 12330 - }, - { - "epoch": 0.8058950395398994, - "grad_norm": 0.400860071182251, - "learning_rate": 8.494309411344828e-06, - "loss": 0.3411, - "step": 12331 - }, - { - "epoch": 0.8059603947454415, - "grad_norm": 0.4695069193840027, - "learning_rate": 8.494059642350471e-06, - "loss": 0.4497, - "step": 12332 - }, - { - "epoch": 0.8060257499509836, - "grad_norm": 0.4215098023414612, - "learning_rate": 8.493809856314376e-06, - "loss": 0.3389, - "step": 12333 - }, - { - "epoch": 0.8060911051565257, - "grad_norm": 0.4937513470649719, - "learning_rate": 8.493560053237762e-06, - "loss": 0.4805, - "step": 12334 - }, - { - "epoch": 0.8061564603620678, - "grad_norm": 0.43248605728149414, - "learning_rate": 8.493310233121842e-06, - "loss": 0.3614, - "step": 12335 - }, - { - "epoch": 0.80622181556761, - "grad_norm": 0.4014105796813965, - "learning_rate": 8.493060395967839e-06, - "loss": 0.3035, - "step": 12336 - }, - { - "epoch": 0.806287170773152, - "grad_norm": 0.43782347440719604, - "learning_rate": 8.492810541776971e-06, - "loss": 0.3547, - "step": 12337 - }, - { - "epoch": 0.8063525259786942, - "grad_norm": 0.4088995158672333, - "learning_rate": 8.492560670550454e-06, - "loss": 0.3, - "step": 12338 - }, - { - "epoch": 0.8064178811842363, - "grad_norm": 0.4330845773220062, - "learning_rate": 8.49231078228951e-06, - "loss": 0.3511, - "step": 12339 - }, - { - "epoch": 0.8064832363897785, - "grad_norm": 0.502913773059845, - "learning_rate": 8.492060876995356e-06, - "loss": 0.4662, - "step": 12340 - }, - { - "epoch": 0.8065485915953206, - "grad_norm": 0.4259401559829712, - "learning_rate": 8.491810954669213e-06, - "loss": 0.3502, - "step": 12341 - }, - { - "epoch": 0.8066139468008627, - "grad_norm": 0.4137631058692932, - "learning_rate": 8.491561015312296e-06, - "loss": 0.3505, - "step": 12342 - }, - { - "epoch": 0.8066793020064048, - "grad_norm": 0.4844277501106262, - "learning_rate": 8.491311058925827e-06, - "loss": 0.4683, - "step": 12343 - }, - { - "epoch": 0.8067446572119469, - "grad_norm": 0.40809574723243713, - "learning_rate": 8.491061085511026e-06, - "loss": 0.3349, - "step": 12344 - }, - { - "epoch": 0.8068100124174891, - "grad_norm": 0.4811766743659973, - "learning_rate": 8.49081109506911e-06, - "loss": 0.4193, - "step": 12345 - }, - { - "epoch": 0.8068753676230311, - "grad_norm": 0.5202414393424988, - "learning_rate": 8.490561087601297e-06, - "loss": 0.4854, - "step": 12346 - }, - { - "epoch": 0.8069407228285733, - "grad_norm": 0.4288029074668884, - "learning_rate": 8.49031106310881e-06, - "loss": 0.3517, - "step": 12347 - }, - { - "epoch": 0.8070060780341154, - "grad_norm": 0.4680384695529938, - "learning_rate": 8.490061021592867e-06, - "loss": 0.3673, - "step": 12348 - }, - { - "epoch": 0.8070714332396576, - "grad_norm": 0.5072221755981445, - "learning_rate": 8.489810963054687e-06, - "loss": 0.4978, - "step": 12349 - }, - { - "epoch": 0.8071367884451996, - "grad_norm": 0.4739428460597992, - "learning_rate": 8.489560887495489e-06, - "loss": 0.3841, - "step": 12350 - }, - { - "epoch": 0.8072021436507418, - "grad_norm": 0.4591532051563263, - "learning_rate": 8.489310794916495e-06, - "loss": 0.3838, - "step": 12351 - }, - { - "epoch": 0.8072674988562839, - "grad_norm": 0.49754616618156433, - "learning_rate": 8.489060685318923e-06, - "loss": 0.4498, - "step": 12352 - }, - { - "epoch": 0.807332854061826, - "grad_norm": 0.4266905188560486, - "learning_rate": 8.488810558703992e-06, - "loss": 0.354, - "step": 12353 - }, - { - "epoch": 0.8073982092673682, - "grad_norm": 0.43139177560806274, - "learning_rate": 8.488560415072925e-06, - "loss": 0.3461, - "step": 12354 - }, - { - "epoch": 0.8074635644729102, - "grad_norm": 0.5105013847351074, - "learning_rate": 8.48831025442694e-06, - "loss": 0.4323, - "step": 12355 - }, - { - "epoch": 0.8075289196784524, - "grad_norm": 0.43758299946784973, - "learning_rate": 8.488060076767257e-06, - "loss": 0.3361, - "step": 12356 - }, - { - "epoch": 0.8075942748839945, - "grad_norm": 0.4944319725036621, - "learning_rate": 8.487809882095097e-06, - "loss": 0.4287, - "step": 12357 - }, - { - "epoch": 0.8076596300895367, - "grad_norm": 0.4521756172180176, - "learning_rate": 8.48755967041168e-06, - "loss": 0.3709, - "step": 12358 - }, - { - "epoch": 0.8077249852950787, - "grad_norm": 0.45124170184135437, - "learning_rate": 8.487309441718226e-06, - "loss": 0.355, - "step": 12359 - }, - { - "epoch": 0.8077903405006209, - "grad_norm": 0.4594513177871704, - "learning_rate": 8.487059196015955e-06, - "loss": 0.4258, - "step": 12360 - }, - { - "epoch": 0.807855695706163, - "grad_norm": 0.412546843290329, - "learning_rate": 8.48680893330609e-06, - "loss": 0.3401, - "step": 12361 - }, - { - "epoch": 0.8079210509117051, - "grad_norm": 0.40831243991851807, - "learning_rate": 8.486558653589848e-06, - "loss": 0.3259, - "step": 12362 - }, - { - "epoch": 0.8079864061172473, - "grad_norm": 0.4548529088497162, - "learning_rate": 8.486308356868452e-06, - "loss": 0.3863, - "step": 12363 - }, - { - "epoch": 0.8080517613227893, - "grad_norm": 0.4557829201221466, - "learning_rate": 8.486058043143123e-06, - "loss": 0.4039, - "step": 12364 - }, - { - "epoch": 0.8081171165283315, - "grad_norm": 0.45605432987213135, - "learning_rate": 8.485807712415082e-06, - "loss": 0.4013, - "step": 12365 - }, - { - "epoch": 0.8081824717338736, - "grad_norm": 0.4350353181362152, - "learning_rate": 8.485557364685547e-06, - "loss": 0.4002, - "step": 12366 - }, - { - "epoch": 0.8082478269394158, - "grad_norm": 0.4219260811805725, - "learning_rate": 8.485306999955743e-06, - "loss": 0.3429, - "step": 12367 - }, - { - "epoch": 0.8083131821449578, - "grad_norm": 0.4718751907348633, - "learning_rate": 8.48505661822689e-06, - "loss": 0.3948, - "step": 12368 - }, - { - "epoch": 0.8083785373504999, - "grad_norm": 0.43288910388946533, - "learning_rate": 8.484806219500208e-06, - "loss": 0.3871, - "step": 12369 - }, - { - "epoch": 0.8084438925560421, - "grad_norm": 0.4838804006576538, - "learning_rate": 8.484555803776916e-06, - "loss": 0.4673, - "step": 12370 - }, - { - "epoch": 0.8085092477615842, - "grad_norm": 0.4964217245578766, - "learning_rate": 8.48430537105824e-06, - "loss": 0.4269, - "step": 12371 - }, - { - "epoch": 0.8085746029671264, - "grad_norm": 0.46102991700172424, - "learning_rate": 8.484054921345402e-06, - "loss": 0.4195, - "step": 12372 - }, - { - "epoch": 0.8086399581726684, - "grad_norm": 0.447543203830719, - "learning_rate": 8.48380445463962e-06, - "loss": 0.3753, - "step": 12373 - }, - { - "epoch": 0.8087053133782106, - "grad_norm": 0.41645434498786926, - "learning_rate": 8.483553970942115e-06, - "loss": 0.3441, - "step": 12374 - }, - { - "epoch": 0.8087706685837527, - "grad_norm": 0.47619786858558655, - "learning_rate": 8.48330347025411e-06, - "loss": 0.4637, - "step": 12375 - }, - { - "epoch": 0.8088360237892949, - "grad_norm": 0.45063620805740356, - "learning_rate": 8.48305295257683e-06, - "loss": 0.3661, - "step": 12376 - }, - { - "epoch": 0.8089013789948369, - "grad_norm": 0.42111918330192566, - "learning_rate": 8.482802417911492e-06, - "loss": 0.3705, - "step": 12377 - }, - { - "epoch": 0.808966734200379, - "grad_norm": 0.44826894998550415, - "learning_rate": 8.482551866259321e-06, - "loss": 0.3673, - "step": 12378 - }, - { - "epoch": 0.8090320894059212, - "grad_norm": 0.4359002411365509, - "learning_rate": 8.482301297621538e-06, - "loss": 0.3374, - "step": 12379 - }, - { - "epoch": 0.8090974446114633, - "grad_norm": 0.4520050585269928, - "learning_rate": 8.482050711999364e-06, - "loss": 0.3802, - "step": 12380 - }, - { - "epoch": 0.8091627998170055, - "grad_norm": 0.46814998984336853, - "learning_rate": 8.481800109394025e-06, - "loss": 0.3661, - "step": 12381 - }, - { - "epoch": 0.8092281550225475, - "grad_norm": 0.4591486155986786, - "learning_rate": 8.481549489806738e-06, - "loss": 0.4094, - "step": 12382 - }, - { - "epoch": 0.8092935102280897, - "grad_norm": 0.47207266092300415, - "learning_rate": 8.481298853238728e-06, - "loss": 0.3787, - "step": 12383 - }, - { - "epoch": 0.8093588654336318, - "grad_norm": 0.46365997195243835, - "learning_rate": 8.48104819969122e-06, - "loss": 0.409, - "step": 12384 - }, - { - "epoch": 0.809424220639174, - "grad_norm": 0.44643640518188477, - "learning_rate": 8.480797529165431e-06, - "loss": 0.3811, - "step": 12385 - }, - { - "epoch": 0.809489575844716, - "grad_norm": 0.46963217854499817, - "learning_rate": 8.48054684166259e-06, - "loss": 0.4256, - "step": 12386 - }, - { - "epoch": 0.8095549310502581, - "grad_norm": 0.45074307918548584, - "learning_rate": 8.480296137183914e-06, - "loss": 0.3761, - "step": 12387 - }, - { - "epoch": 0.8096202862558003, - "grad_norm": 0.4560701251029968, - "learning_rate": 8.48004541573063e-06, - "loss": 0.4014, - "step": 12388 - }, - { - "epoch": 0.8096856414613424, - "grad_norm": 0.42646080255508423, - "learning_rate": 8.479794677303957e-06, - "loss": 0.3746, - "step": 12389 - }, - { - "epoch": 0.8097509966668845, - "grad_norm": 0.45829257369041443, - "learning_rate": 8.479543921905119e-06, - "loss": 0.3924, - "step": 12390 - }, - { - "epoch": 0.8098163518724266, - "grad_norm": 0.46567419171333313, - "learning_rate": 8.479293149535342e-06, - "loss": 0.3624, - "step": 12391 - }, - { - "epoch": 0.8098817070779688, - "grad_norm": 0.4683837592601776, - "learning_rate": 8.479042360195844e-06, - "loss": 0.3922, - "step": 12392 - }, - { - "epoch": 0.8099470622835109, - "grad_norm": 0.4206649661064148, - "learning_rate": 8.478791553887854e-06, - "loss": 0.3616, - "step": 12393 - }, - { - "epoch": 0.810012417489053, - "grad_norm": 0.45548540353775024, - "learning_rate": 8.478540730612592e-06, - "loss": 0.395, - "step": 12394 - }, - { - "epoch": 0.8100777726945951, - "grad_norm": 0.4326566755771637, - "learning_rate": 8.478289890371281e-06, - "loss": 0.3296, - "step": 12395 - }, - { - "epoch": 0.8101431279001372, - "grad_norm": 0.4756647050380707, - "learning_rate": 8.478039033165146e-06, - "loss": 0.4634, - "step": 12396 - }, - { - "epoch": 0.8102084831056794, - "grad_norm": 0.41559094190597534, - "learning_rate": 8.477788158995409e-06, - "loss": 0.3115, - "step": 12397 - }, - { - "epoch": 0.8102738383112215, - "grad_norm": 0.45956042408943176, - "learning_rate": 8.477537267863295e-06, - "loss": 0.4063, - "step": 12398 - }, - { - "epoch": 0.8103391935167636, - "grad_norm": 0.41898688673973083, - "learning_rate": 8.477286359770025e-06, - "loss": 0.33, - "step": 12399 - }, - { - "epoch": 0.8104045487223057, - "grad_norm": 0.4533012807369232, - "learning_rate": 8.477035434716827e-06, - "loss": 0.4039, - "step": 12400 - }, - { - "epoch": 0.8104699039278479, - "grad_norm": 0.47679269313812256, - "learning_rate": 8.476784492704921e-06, - "loss": 0.4135, - "step": 12401 - }, - { - "epoch": 0.81053525913339, - "grad_norm": 0.41880643367767334, - "learning_rate": 8.476533533735534e-06, - "loss": 0.357, - "step": 12402 - }, - { - "epoch": 0.810600614338932, - "grad_norm": 0.49020689725875854, - "learning_rate": 8.47628255780989e-06, - "loss": 0.4069, - "step": 12403 - }, - { - "epoch": 0.8106659695444742, - "grad_norm": 0.4441443979740143, - "learning_rate": 8.476031564929208e-06, - "loss": 0.3994, - "step": 12404 - }, - { - "epoch": 0.8107313247500163, - "grad_norm": 0.45281073451042175, - "learning_rate": 8.475780555094719e-06, - "loss": 0.4222, - "step": 12405 - }, - { - "epoch": 0.8107966799555585, - "grad_norm": 0.4089464843273163, - "learning_rate": 8.475529528307642e-06, - "loss": 0.3225, - "step": 12406 - }, - { - "epoch": 0.8108620351611006, - "grad_norm": 0.4631759822368622, - "learning_rate": 8.475278484569206e-06, - "loss": 0.3728, - "step": 12407 - }, - { - "epoch": 0.8109273903666427, - "grad_norm": 0.4352424740791321, - "learning_rate": 8.47502742388063e-06, - "loss": 0.3604, - "step": 12408 - }, - { - "epoch": 0.8109927455721848, - "grad_norm": 0.4908069372177124, - "learning_rate": 8.474776346243143e-06, - "loss": 0.403, - "step": 12409 - }, - { - "epoch": 0.811058100777727, - "grad_norm": 0.4476626217365265, - "learning_rate": 8.474525251657966e-06, - "loss": 0.3652, - "step": 12410 - }, - { - "epoch": 0.8111234559832691, - "grad_norm": 0.4301026165485382, - "learning_rate": 8.47427414012633e-06, - "loss": 0.3927, - "step": 12411 - }, - { - "epoch": 0.8111888111888111, - "grad_norm": 0.4547356367111206, - "learning_rate": 8.474023011649451e-06, - "loss": 0.4077, - "step": 12412 - }, - { - "epoch": 0.8112541663943533, - "grad_norm": 0.4329404830932617, - "learning_rate": 8.47377186622856e-06, - "loss": 0.3508, - "step": 12413 - }, - { - "epoch": 0.8113195215998954, - "grad_norm": 0.44247904419898987, - "learning_rate": 8.47352070386488e-06, - "loss": 0.3645, - "step": 12414 - }, - { - "epoch": 0.8113848768054376, - "grad_norm": 0.4779067635536194, - "learning_rate": 8.473269524559634e-06, - "loss": 0.4328, - "step": 12415 - }, - { - "epoch": 0.8114502320109797, - "grad_norm": 0.4541876018047333, - "learning_rate": 8.473018328314054e-06, - "loss": 0.3194, - "step": 12416 - }, - { - "epoch": 0.8115155872165218, - "grad_norm": 0.41460004448890686, - "learning_rate": 8.472767115129356e-06, - "loss": 0.3238, - "step": 12417 - }, - { - "epoch": 0.8115809424220639, - "grad_norm": 0.4341851770877838, - "learning_rate": 8.472515885006771e-06, - "loss": 0.3722, - "step": 12418 - }, - { - "epoch": 0.811646297627606, - "grad_norm": 0.4967648684978485, - "learning_rate": 8.472264637947525e-06, - "loss": 0.449, - "step": 12419 - }, - { - "epoch": 0.8117116528331482, - "grad_norm": 0.46293848752975464, - "learning_rate": 8.472013373952839e-06, - "loss": 0.4007, - "step": 12420 - }, - { - "epoch": 0.8117770080386902, - "grad_norm": 0.4613957405090332, - "learning_rate": 8.471762093023943e-06, - "loss": 0.3926, - "step": 12421 - }, - { - "epoch": 0.8118423632442324, - "grad_norm": 0.469547301530838, - "learning_rate": 8.471510795162058e-06, - "loss": 0.3919, - "step": 12422 - }, - { - "epoch": 0.8119077184497745, - "grad_norm": 0.4456419050693512, - "learning_rate": 8.471259480368415e-06, - "loss": 0.379, - "step": 12423 - }, - { - "epoch": 0.8119730736553167, - "grad_norm": 0.42810481786727905, - "learning_rate": 8.471008148644236e-06, - "loss": 0.3436, - "step": 12424 - }, - { - "epoch": 0.8120384288608588, - "grad_norm": 0.4358358681201935, - "learning_rate": 8.470756799990746e-06, - "loss": 0.3955, - "step": 12425 - }, - { - "epoch": 0.8121037840664009, - "grad_norm": 0.4390687346458435, - "learning_rate": 8.470505434409175e-06, - "loss": 0.3886, - "step": 12426 - }, - { - "epoch": 0.812169139271943, - "grad_norm": 0.4392316937446594, - "learning_rate": 8.470254051900746e-06, - "loss": 0.3729, - "step": 12427 - }, - { - "epoch": 0.8122344944774851, - "grad_norm": 0.43075257539749146, - "learning_rate": 8.470002652466686e-06, - "loss": 0.3687, - "step": 12428 - }, - { - "epoch": 0.8122998496830273, - "grad_norm": 0.43682315945625305, - "learning_rate": 8.46975123610822e-06, - "loss": 0.347, - "step": 12429 - }, - { - "epoch": 0.8123652048885693, - "grad_norm": 0.4574038088321686, - "learning_rate": 8.469499802826577e-06, - "loss": 0.3743, - "step": 12430 - }, - { - "epoch": 0.8124305600941115, - "grad_norm": 0.4498804211616516, - "learning_rate": 8.46924835262298e-06, - "loss": 0.3911, - "step": 12431 - }, - { - "epoch": 0.8124959152996536, - "grad_norm": 0.44626232981681824, - "learning_rate": 8.468996885498657e-06, - "loss": 0.3355, - "step": 12432 - }, - { - "epoch": 0.8125612705051958, - "grad_norm": 0.4404367208480835, - "learning_rate": 8.468745401454834e-06, - "loss": 0.3445, - "step": 12433 - }, - { - "epoch": 0.8126266257107378, - "grad_norm": 0.5081859827041626, - "learning_rate": 8.468493900492738e-06, - "loss": 0.481, - "step": 12434 - }, - { - "epoch": 0.81269198091628, - "grad_norm": 0.43948298692703247, - "learning_rate": 8.468242382613598e-06, - "loss": 0.3587, - "step": 12435 - }, - { - "epoch": 0.8127573361218221, - "grad_norm": 0.46249765157699585, - "learning_rate": 8.467990847818637e-06, - "loss": 0.3974, - "step": 12436 - }, - { - "epoch": 0.8128226913273642, - "grad_norm": 0.4180997908115387, - "learning_rate": 8.467739296109081e-06, - "loss": 0.3066, - "step": 12437 - }, - { - "epoch": 0.8128880465329064, - "grad_norm": 0.48867717385292053, - "learning_rate": 8.46748772748616e-06, - "loss": 0.3398, - "step": 12438 - }, - { - "epoch": 0.8129534017384484, - "grad_norm": 0.46018847823143005, - "learning_rate": 8.467236141951103e-06, - "loss": 0.3739, - "step": 12439 - }, - { - "epoch": 0.8130187569439906, - "grad_norm": 0.4579855799674988, - "learning_rate": 8.466984539505132e-06, - "loss": 0.4262, - "step": 12440 - }, - { - "epoch": 0.8130841121495327, - "grad_norm": 0.4060579240322113, - "learning_rate": 8.466732920149476e-06, - "loss": 0.329, - "step": 12441 - }, - { - "epoch": 0.8131494673550749, - "grad_norm": 0.44370123744010925, - "learning_rate": 8.466481283885363e-06, - "loss": 0.3779, - "step": 12442 - }, - { - "epoch": 0.813214822560617, - "grad_norm": 0.435161828994751, - "learning_rate": 8.46622963071402e-06, - "loss": 0.3696, - "step": 12443 - }, - { - "epoch": 0.8132801777661591, - "grad_norm": 0.46991202235221863, - "learning_rate": 8.465977960636676e-06, - "loss": 0.4029, - "step": 12444 - }, - { - "epoch": 0.8133455329717012, - "grad_norm": 0.4920562207698822, - "learning_rate": 8.465726273654555e-06, - "loss": 0.4524, - "step": 12445 - }, - { - "epoch": 0.8134108881772433, - "grad_norm": 0.4491201639175415, - "learning_rate": 8.465474569768885e-06, - "loss": 0.36, - "step": 12446 - }, - { - "epoch": 0.8134762433827855, - "grad_norm": 0.5497409105300903, - "learning_rate": 8.465222848980896e-06, - "loss": 0.4177, - "step": 12447 - }, - { - "epoch": 0.8135415985883275, - "grad_norm": 0.48143699765205383, - "learning_rate": 8.464971111291815e-06, - "loss": 0.415, - "step": 12448 - }, - { - "epoch": 0.8136069537938697, - "grad_norm": 0.42294394969940186, - "learning_rate": 8.464719356702871e-06, - "loss": 0.3423, - "step": 12449 - }, - { - "epoch": 0.8136723089994118, - "grad_norm": 0.45985540747642517, - "learning_rate": 8.464467585215288e-06, - "loss": 0.3896, - "step": 12450 - }, - { - "epoch": 0.813737664204954, - "grad_norm": 0.4446311295032501, - "learning_rate": 8.464215796830298e-06, - "loss": 0.3767, - "step": 12451 - }, - { - "epoch": 0.813803019410496, - "grad_norm": 0.4525317847728729, - "learning_rate": 8.463963991549127e-06, - "loss": 0.3662, - "step": 12452 - }, - { - "epoch": 0.8138683746160381, - "grad_norm": 0.44176629185676575, - "learning_rate": 8.463712169373002e-06, - "loss": 0.3943, - "step": 12453 - }, - { - "epoch": 0.8139337298215803, - "grad_norm": 0.45090174674987793, - "learning_rate": 8.463460330303154e-06, - "loss": 0.4166, - "step": 12454 - }, - { - "epoch": 0.8139990850271224, - "grad_norm": 0.4586697816848755, - "learning_rate": 8.463208474340811e-06, - "loss": 0.3981, - "step": 12455 - }, - { - "epoch": 0.8140644402326646, - "grad_norm": 0.44277840852737427, - "learning_rate": 8.4629566014872e-06, - "loss": 0.3811, - "step": 12456 - }, - { - "epoch": 0.8141297954382066, - "grad_norm": 0.46791690587997437, - "learning_rate": 8.46270471174355e-06, - "loss": 0.3989, - "step": 12457 - }, - { - "epoch": 0.8141951506437488, - "grad_norm": 0.41792604327201843, - "learning_rate": 8.462452805111089e-06, - "loss": 0.3242, - "step": 12458 - }, - { - "epoch": 0.8142605058492909, - "grad_norm": 0.45778384804725647, - "learning_rate": 8.462200881591046e-06, - "loss": 0.4405, - "step": 12459 - }, - { - "epoch": 0.8143258610548331, - "grad_norm": 0.42692655324935913, - "learning_rate": 8.46194894118465e-06, - "loss": 0.3709, - "step": 12460 - }, - { - "epoch": 0.8143912162603751, - "grad_norm": 0.4614044427871704, - "learning_rate": 8.46169698389313e-06, - "loss": 0.4372, - "step": 12461 - }, - { - "epoch": 0.8144565714659172, - "grad_norm": 0.47064560651779175, - "learning_rate": 8.461445009717714e-06, - "loss": 0.4273, - "step": 12462 - }, - { - "epoch": 0.8145219266714594, - "grad_norm": 0.46201393008232117, - "learning_rate": 8.461193018659633e-06, - "loss": 0.3968, - "step": 12463 - }, - { - "epoch": 0.8145872818770015, - "grad_norm": 0.46701836585998535, - "learning_rate": 8.460941010720114e-06, - "loss": 0.3761, - "step": 12464 - }, - { - "epoch": 0.8146526370825437, - "grad_norm": 0.45106998085975647, - "learning_rate": 8.460688985900386e-06, - "loss": 0.4021, - "step": 12465 - }, - { - "epoch": 0.8147179922880857, - "grad_norm": 0.4375004470348358, - "learning_rate": 8.460436944201678e-06, - "loss": 0.394, - "step": 12466 - }, - { - "epoch": 0.8147833474936279, - "grad_norm": 0.4488752484321594, - "learning_rate": 8.460184885625222e-06, - "loss": 0.3968, - "step": 12467 - }, - { - "epoch": 0.81484870269917, - "grad_norm": 0.44091296195983887, - "learning_rate": 8.459932810172246e-06, - "loss": 0.3347, - "step": 12468 - }, - { - "epoch": 0.8149140579047122, - "grad_norm": 0.42159703373908997, - "learning_rate": 8.459680717843978e-06, - "loss": 0.3616, - "step": 12469 - }, - { - "epoch": 0.8149794131102542, - "grad_norm": 0.47401347756385803, - "learning_rate": 8.459428608641649e-06, - "loss": 0.3785, - "step": 12470 - }, - { - "epoch": 0.8150447683157963, - "grad_norm": 0.4911261200904846, - "learning_rate": 8.459176482566487e-06, - "loss": 0.334, - "step": 12471 - }, - { - "epoch": 0.8151101235213385, - "grad_norm": 0.4237688183784485, - "learning_rate": 8.458924339619726e-06, - "loss": 0.3871, - "step": 12472 - }, - { - "epoch": 0.8151754787268806, - "grad_norm": 0.468522310256958, - "learning_rate": 8.45867217980259e-06, - "loss": 0.4178, - "step": 12473 - }, - { - "epoch": 0.8152408339324227, - "grad_norm": 0.4498448669910431, - "learning_rate": 8.45842000311631e-06, - "loss": 0.3646, - "step": 12474 - }, - { - "epoch": 0.8153061891379648, - "grad_norm": 0.4376983642578125, - "learning_rate": 8.458167809562122e-06, - "loss": 0.3605, - "step": 12475 - }, - { - "epoch": 0.815371544343507, - "grad_norm": 0.4309103786945343, - "learning_rate": 8.457915599141249e-06, - "loss": 0.3235, - "step": 12476 - }, - { - "epoch": 0.8154368995490491, - "grad_norm": 0.43921563029289246, - "learning_rate": 8.457663371854924e-06, - "loss": 0.3385, - "step": 12477 - }, - { - "epoch": 0.8155022547545911, - "grad_norm": 0.44701018929481506, - "learning_rate": 8.457411127704376e-06, - "loss": 0.3906, - "step": 12478 - }, - { - "epoch": 0.8155676099601333, - "grad_norm": 0.46366724371910095, - "learning_rate": 8.457158866690836e-06, - "loss": 0.4068, - "step": 12479 - }, - { - "epoch": 0.8156329651656754, - "grad_norm": 0.43142345547676086, - "learning_rate": 8.456906588815536e-06, - "loss": 0.3306, - "step": 12480 - }, - { - "epoch": 0.8156983203712176, - "grad_norm": 0.4284942150115967, - "learning_rate": 8.456654294079704e-06, - "loss": 0.334, - "step": 12481 - }, - { - "epoch": 0.8157636755767597, - "grad_norm": 0.45439621806144714, - "learning_rate": 8.456401982484573e-06, - "loss": 0.4044, - "step": 12482 - }, - { - "epoch": 0.8158290307823018, - "grad_norm": 0.42266830801963806, - "learning_rate": 8.45614965403137e-06, - "loss": 0.3675, - "step": 12483 - }, - { - "epoch": 0.8158943859878439, - "grad_norm": 0.4663552939891815, - "learning_rate": 8.455897308721329e-06, - "loss": 0.3888, - "step": 12484 - }, - { - "epoch": 0.8159597411933861, - "grad_norm": 0.453592449426651, - "learning_rate": 8.45564494655568e-06, - "loss": 0.3982, - "step": 12485 - }, - { - "epoch": 0.8160250963989282, - "grad_norm": 0.4378078877925873, - "learning_rate": 8.455392567535651e-06, - "loss": 0.3343, - "step": 12486 - }, - { - "epoch": 0.8160904516044702, - "grad_norm": 0.47541993856430054, - "learning_rate": 8.45514017166248e-06, - "loss": 0.4538, - "step": 12487 - }, - { - "epoch": 0.8161558068100124, - "grad_norm": 0.4201316833496094, - "learning_rate": 8.45488775893739e-06, - "loss": 0.3422, - "step": 12488 - }, - { - "epoch": 0.8162211620155545, - "grad_norm": 0.44259822368621826, - "learning_rate": 8.454635329361615e-06, - "loss": 0.3678, - "step": 12489 - }, - { - "epoch": 0.8162865172210967, - "grad_norm": 0.42908021807670593, - "learning_rate": 8.45438288293639e-06, - "loss": 0.3764, - "step": 12490 - }, - { - "epoch": 0.8163518724266388, - "grad_norm": 0.5003162622451782, - "learning_rate": 8.454130419662941e-06, - "loss": 0.4751, - "step": 12491 - }, - { - "epoch": 0.8164172276321809, - "grad_norm": 0.48089399933815, - "learning_rate": 8.4538779395425e-06, - "loss": 0.4612, - "step": 12492 - }, - { - "epoch": 0.816482582837723, - "grad_norm": 0.4475057125091553, - "learning_rate": 8.4536254425763e-06, - "loss": 0.3528, - "step": 12493 - }, - { - "epoch": 0.8165479380432652, - "grad_norm": 0.4424389600753784, - "learning_rate": 8.453372928765575e-06, - "loss": 0.3877, - "step": 12494 - }, - { - "epoch": 0.8166132932488073, - "grad_norm": 0.47818002104759216, - "learning_rate": 8.453120398111552e-06, - "loss": 0.4147, - "step": 12495 - }, - { - "epoch": 0.8166786484543493, - "grad_norm": 0.44536691904067993, - "learning_rate": 8.452867850615464e-06, - "loss": 0.3688, - "step": 12496 - }, - { - "epoch": 0.8167440036598915, - "grad_norm": 0.47052431106567383, - "learning_rate": 8.452615286278544e-06, - "loss": 0.3977, - "step": 12497 - }, - { - "epoch": 0.8168093588654336, - "grad_norm": 0.4413776099681854, - "learning_rate": 8.452362705102025e-06, - "loss": 0.3607, - "step": 12498 - }, - { - "epoch": 0.8168747140709758, - "grad_norm": 0.46936506032943726, - "learning_rate": 8.452110107087134e-06, - "loss": 0.4001, - "step": 12499 - }, - { - "epoch": 0.8169400692765179, - "grad_norm": 0.42547914385795593, - "learning_rate": 8.451857492235107e-06, - "loss": 0.3593, - "step": 12500 - }, - { - "epoch": 0.81700542448206, - "grad_norm": 0.44651374220848083, - "learning_rate": 8.451604860547176e-06, - "loss": 0.3998, - "step": 12501 - }, - { - "epoch": 0.8170707796876021, - "grad_norm": 0.472301721572876, - "learning_rate": 8.45135221202457e-06, - "loss": 0.3869, - "step": 12502 - }, - { - "epoch": 0.8171361348931442, - "grad_norm": 0.4374332129955292, - "learning_rate": 8.451099546668527e-06, - "loss": 0.3615, - "step": 12503 - }, - { - "epoch": 0.8172014900986864, - "grad_norm": 0.4162733852863312, - "learning_rate": 8.450846864480274e-06, - "loss": 0.34, - "step": 12504 - }, - { - "epoch": 0.8172668453042284, - "grad_norm": 0.4110073745250702, - "learning_rate": 8.450594165461046e-06, - "loss": 0.3359, - "step": 12505 - }, - { - "epoch": 0.8173322005097706, - "grad_norm": 0.41492143273353577, - "learning_rate": 8.450341449612075e-06, - "loss": 0.3471, - "step": 12506 - }, - { - "epoch": 0.8173975557153127, - "grad_norm": 0.42728742957115173, - "learning_rate": 8.450088716934593e-06, - "loss": 0.3163, - "step": 12507 - }, - { - "epoch": 0.8174629109208549, - "grad_norm": 0.4918995499610901, - "learning_rate": 8.449835967429832e-06, - "loss": 0.371, - "step": 12508 - }, - { - "epoch": 0.817528266126397, - "grad_norm": 0.4346361458301544, - "learning_rate": 8.449583201099027e-06, - "loss": 0.3674, - "step": 12509 - }, - { - "epoch": 0.8175936213319391, - "grad_norm": 0.42874497175216675, - "learning_rate": 8.44933041794341e-06, - "loss": 0.3638, - "step": 12510 - }, - { - "epoch": 0.8176589765374812, - "grad_norm": 0.430454283952713, - "learning_rate": 8.449077617964212e-06, - "loss": 0.3258, - "step": 12511 - }, - { - "epoch": 0.8177243317430233, - "grad_norm": 0.46780121326446533, - "learning_rate": 8.44882480116267e-06, - "loss": 0.4165, - "step": 12512 - }, - { - "epoch": 0.8177896869485655, - "grad_norm": 0.47941410541534424, - "learning_rate": 8.448571967540014e-06, - "loss": 0.3929, - "step": 12513 - }, - { - "epoch": 0.8178550421541075, - "grad_norm": 0.4879732131958008, - "learning_rate": 8.448319117097477e-06, - "loss": 0.4137, - "step": 12514 - }, - { - "epoch": 0.8179203973596497, - "grad_norm": 0.42019909620285034, - "learning_rate": 8.448066249836293e-06, - "loss": 0.3325, - "step": 12515 - }, - { - "epoch": 0.8179857525651918, - "grad_norm": 0.42211028933525085, - "learning_rate": 8.447813365757696e-06, - "loss": 0.3542, - "step": 12516 - }, - { - "epoch": 0.818051107770734, - "grad_norm": 0.43780139088630676, - "learning_rate": 8.447560464862917e-06, - "loss": 0.3282, - "step": 12517 - }, - { - "epoch": 0.818116462976276, - "grad_norm": 0.4416830539703369, - "learning_rate": 8.447307547153194e-06, - "loss": 0.3729, - "step": 12518 - }, - { - "epoch": 0.8181818181818182, - "grad_norm": 0.42733025550842285, - "learning_rate": 8.447054612629756e-06, - "loss": 0.3661, - "step": 12519 - }, - { - "epoch": 0.8182471733873603, - "grad_norm": 0.4293109178543091, - "learning_rate": 8.44680166129384e-06, - "loss": 0.3666, - "step": 12520 - }, - { - "epoch": 0.8183125285929024, - "grad_norm": 0.4422782063484192, - "learning_rate": 8.446548693146675e-06, - "loss": 0.3762, - "step": 12521 - }, - { - "epoch": 0.8183778837984446, - "grad_norm": 0.4182201027870178, - "learning_rate": 8.4462957081895e-06, - "loss": 0.3298, - "step": 12522 - }, - { - "epoch": 0.8184432390039866, - "grad_norm": 0.4257052540779114, - "learning_rate": 8.446042706423547e-06, - "loss": 0.3414, - "step": 12523 - }, - { - "epoch": 0.8185085942095288, - "grad_norm": 0.4190555810928345, - "learning_rate": 8.445789687850052e-06, - "loss": 0.3553, - "step": 12524 - }, - { - "epoch": 0.8185739494150709, - "grad_norm": 0.45774173736572266, - "learning_rate": 8.445536652470244e-06, - "loss": 0.3719, - "step": 12525 - }, - { - "epoch": 0.8186393046206131, - "grad_norm": 0.43524685502052307, - "learning_rate": 8.44528360028536e-06, - "loss": 0.3561, - "step": 12526 - }, - { - "epoch": 0.8187046598261551, - "grad_norm": 0.440887987613678, - "learning_rate": 8.445030531296636e-06, - "loss": 0.395, - "step": 12527 - }, - { - "epoch": 0.8187700150316973, - "grad_norm": 0.423302561044693, - "learning_rate": 8.444777445505306e-06, - "loss": 0.3353, - "step": 12528 - }, - { - "epoch": 0.8188353702372394, - "grad_norm": 0.5028983354568481, - "learning_rate": 8.4445243429126e-06, - "loss": 0.4601, - "step": 12529 - }, - { - "epoch": 0.8189007254427815, - "grad_norm": 0.44041121006011963, - "learning_rate": 8.444271223519756e-06, - "loss": 0.355, - "step": 12530 - }, - { - "epoch": 0.8189660806483237, - "grad_norm": 0.4212939441204071, - "learning_rate": 8.44401808732801e-06, - "loss": 0.3588, - "step": 12531 - }, - { - "epoch": 0.8190314358538657, - "grad_norm": 0.42517799139022827, - "learning_rate": 8.443764934338592e-06, - "loss": 0.3378, - "step": 12532 - }, - { - "epoch": 0.8190967910594079, - "grad_norm": 0.43304795026779175, - "learning_rate": 8.443511764552741e-06, - "loss": 0.3646, - "step": 12533 - }, - { - "epoch": 0.81916214626495, - "grad_norm": 0.44127559661865234, - "learning_rate": 8.443258577971691e-06, - "loss": 0.373, - "step": 12534 - }, - { - "epoch": 0.8192275014704922, - "grad_norm": 0.4303012192249298, - "learning_rate": 8.443005374596673e-06, - "loss": 0.3365, - "step": 12535 - }, - { - "epoch": 0.8192928566760342, - "grad_norm": 0.44123363494873047, - "learning_rate": 8.442752154428928e-06, - "loss": 0.3838, - "step": 12536 - }, - { - "epoch": 0.8193582118815763, - "grad_norm": 0.4430181086063385, - "learning_rate": 8.442498917469687e-06, - "loss": 0.3588, - "step": 12537 - }, - { - "epoch": 0.8194235670871185, - "grad_norm": 0.443930447101593, - "learning_rate": 8.442245663720186e-06, - "loss": 0.3757, - "step": 12538 - }, - { - "epoch": 0.8194889222926606, - "grad_norm": 0.4582585096359253, - "learning_rate": 8.44199239318166e-06, - "loss": 0.4024, - "step": 12539 - }, - { - "epoch": 0.8195542774982028, - "grad_norm": 0.42140451073646545, - "learning_rate": 8.441739105855345e-06, - "loss": 0.3613, - "step": 12540 - }, - { - "epoch": 0.8196196327037448, - "grad_norm": 0.43065759539604187, - "learning_rate": 8.441485801742478e-06, - "loss": 0.3625, - "step": 12541 - }, - { - "epoch": 0.819684987909287, - "grad_norm": 0.4550173878669739, - "learning_rate": 8.44123248084429e-06, - "loss": 0.3965, - "step": 12542 - }, - { - "epoch": 0.8197503431148291, - "grad_norm": 0.4012031555175781, - "learning_rate": 8.44097914316202e-06, - "loss": 0.3358, - "step": 12543 - }, - { - "epoch": 0.8198156983203713, - "grad_norm": 0.439313679933548, - "learning_rate": 8.440725788696903e-06, - "loss": 0.3399, - "step": 12544 - }, - { - "epoch": 0.8198810535259133, - "grad_norm": 0.45871588587760925, - "learning_rate": 8.440472417450171e-06, - "loss": 0.3961, - "step": 12545 - }, - { - "epoch": 0.8199464087314554, - "grad_norm": 0.4367266595363617, - "learning_rate": 8.440219029423066e-06, - "loss": 0.4182, - "step": 12546 - }, - { - "epoch": 0.8200117639369976, - "grad_norm": 0.4367128610610962, - "learning_rate": 8.439965624616822e-06, - "loss": 0.3719, - "step": 12547 - }, - { - "epoch": 0.8200771191425397, - "grad_norm": 0.4566749930381775, - "learning_rate": 8.439712203032674e-06, - "loss": 0.3619, - "step": 12548 - }, - { - "epoch": 0.8201424743480819, - "grad_norm": 0.4530434310436249, - "learning_rate": 8.439458764671856e-06, - "loss": 0.383, - "step": 12549 - }, - { - "epoch": 0.8202078295536239, - "grad_norm": 0.40557971596717834, - "learning_rate": 8.439205309535606e-06, - "loss": 0.3006, - "step": 12550 - }, - { - "epoch": 0.8202731847591661, - "grad_norm": 0.42845040559768677, - "learning_rate": 8.438951837625162e-06, - "loss": 0.3518, - "step": 12551 - }, - { - "epoch": 0.8203385399647082, - "grad_norm": 0.4427779018878937, - "learning_rate": 8.438698348941756e-06, - "loss": 0.3612, - "step": 12552 - }, - { - "epoch": 0.8204038951702504, - "grad_norm": 0.4268951117992401, - "learning_rate": 8.43844484348663e-06, - "loss": 0.3638, - "step": 12553 - }, - { - "epoch": 0.8204692503757924, - "grad_norm": 0.4337146580219269, - "learning_rate": 8.438191321261015e-06, - "loss": 0.3393, - "step": 12554 - }, - { - "epoch": 0.8205346055813345, - "grad_norm": 0.44662535190582275, - "learning_rate": 8.43793778226615e-06, - "loss": 0.3739, - "step": 12555 - }, - { - "epoch": 0.8205999607868767, - "grad_norm": 0.4049738645553589, - "learning_rate": 8.437684226503273e-06, - "loss": 0.2878, - "step": 12556 - }, - { - "epoch": 0.8206653159924188, - "grad_norm": 0.40649691224098206, - "learning_rate": 8.437430653973619e-06, - "loss": 0.3269, - "step": 12557 - }, - { - "epoch": 0.820730671197961, - "grad_norm": 0.4747774302959442, - "learning_rate": 8.437177064678423e-06, - "loss": 0.4154, - "step": 12558 - }, - { - "epoch": 0.820796026403503, - "grad_norm": 0.45044398307800293, - "learning_rate": 8.436923458618925e-06, - "loss": 0.418, - "step": 12559 - }, - { - "epoch": 0.8208613816090452, - "grad_norm": 0.4268885552883148, - "learning_rate": 8.436669835796361e-06, - "loss": 0.3599, - "step": 12560 - }, - { - "epoch": 0.8209267368145873, - "grad_norm": 0.4495190680027008, - "learning_rate": 8.436416196211967e-06, - "loss": 0.4057, - "step": 12561 - }, - { - "epoch": 0.8209920920201293, - "grad_norm": 0.5106366872787476, - "learning_rate": 8.43616253986698e-06, - "loss": 0.39, - "step": 12562 - }, - { - "epoch": 0.8210574472256715, - "grad_norm": 0.444840669631958, - "learning_rate": 8.435908866762639e-06, - "loss": 0.3786, - "step": 12563 - }, - { - "epoch": 0.8211228024312136, - "grad_norm": 0.45178380608558655, - "learning_rate": 8.43565517690018e-06, - "loss": 0.3901, - "step": 12564 - }, - { - "epoch": 0.8211881576367558, - "grad_norm": 0.4797780215740204, - "learning_rate": 8.435401470280839e-06, - "loss": 0.4795, - "step": 12565 - }, - { - "epoch": 0.8212535128422979, - "grad_norm": 0.4234144985675812, - "learning_rate": 8.435147746905857e-06, - "loss": 0.3544, - "step": 12566 - }, - { - "epoch": 0.82131886804784, - "grad_norm": 0.441814124584198, - "learning_rate": 8.434894006776468e-06, - "loss": 0.38, - "step": 12567 - }, - { - "epoch": 0.8213842232533821, - "grad_norm": 0.463226854801178, - "learning_rate": 8.434640249893911e-06, - "loss": 0.4182, - "step": 12568 - }, - { - "epoch": 0.8214495784589243, - "grad_norm": 0.4492681622505188, - "learning_rate": 8.434386476259425e-06, - "loss": 0.3647, - "step": 12569 - }, - { - "epoch": 0.8215149336644664, - "grad_norm": 0.4281264543533325, - "learning_rate": 8.434132685874245e-06, - "loss": 0.3621, - "step": 12570 - }, - { - "epoch": 0.8215802888700084, - "grad_norm": 0.47149139642715454, - "learning_rate": 8.43387887873961e-06, - "loss": 0.3658, - "step": 12571 - }, - { - "epoch": 0.8216456440755506, - "grad_norm": 0.42672228813171387, - "learning_rate": 8.433625054856759e-06, - "loss": 0.3755, - "step": 12572 - }, - { - "epoch": 0.8217109992810927, - "grad_norm": 0.4492327570915222, - "learning_rate": 8.433371214226928e-06, - "loss": 0.378, - "step": 12573 - }, - { - "epoch": 0.8217763544866349, - "grad_norm": 0.46517860889434814, - "learning_rate": 8.433117356851358e-06, - "loss": 0.3939, - "step": 12574 - }, - { - "epoch": 0.821841709692177, - "grad_norm": 0.4429529905319214, - "learning_rate": 8.432863482731284e-06, - "loss": 0.3642, - "step": 12575 - }, - { - "epoch": 0.8219070648977191, - "grad_norm": 0.43571212887763977, - "learning_rate": 8.432609591867945e-06, - "loss": 0.3885, - "step": 12576 - }, - { - "epoch": 0.8219724201032612, - "grad_norm": 0.4329453110694885, - "learning_rate": 8.432355684262582e-06, - "loss": 0.3936, - "step": 12577 - }, - { - "epoch": 0.8220377753088034, - "grad_norm": 0.4580669701099396, - "learning_rate": 8.43210175991643e-06, - "loss": 0.3492, - "step": 12578 - }, - { - "epoch": 0.8221031305143455, - "grad_norm": 0.5206211805343628, - "learning_rate": 8.43184781883073e-06, - "loss": 0.4663, - "step": 12579 - }, - { - "epoch": 0.8221684857198875, - "grad_norm": 0.44426101446151733, - "learning_rate": 8.431593861006716e-06, - "loss": 0.3826, - "step": 12580 - }, - { - "epoch": 0.8222338409254297, - "grad_norm": 0.4304702579975128, - "learning_rate": 8.431339886445633e-06, - "loss": 0.3713, - "step": 12581 - }, - { - "epoch": 0.8222991961309718, - "grad_norm": 0.46631917357444763, - "learning_rate": 8.431085895148713e-06, - "loss": 0.4133, - "step": 12582 - }, - { - "epoch": 0.822364551336514, - "grad_norm": 0.4335556626319885, - "learning_rate": 8.430831887117201e-06, - "loss": 0.3605, - "step": 12583 - }, - { - "epoch": 0.822429906542056, - "grad_norm": 0.4497116506099701, - "learning_rate": 8.430577862352333e-06, - "loss": 0.359, - "step": 12584 - }, - { - "epoch": 0.8224952617475982, - "grad_norm": 0.4470299482345581, - "learning_rate": 8.43032382085535e-06, - "loss": 0.3743, - "step": 12585 - }, - { - "epoch": 0.8225606169531403, - "grad_norm": 0.4260590672492981, - "learning_rate": 8.430069762627489e-06, - "loss": 0.3585, - "step": 12586 - }, - { - "epoch": 0.8226259721586824, - "grad_norm": 0.4832247495651245, - "learning_rate": 8.429815687669986e-06, - "loss": 0.4748, - "step": 12587 - }, - { - "epoch": 0.8226913273642246, - "grad_norm": 0.4708634912967682, - "learning_rate": 8.429561595984087e-06, - "loss": 0.3798, - "step": 12588 - }, - { - "epoch": 0.8227566825697666, - "grad_norm": 0.4397064447402954, - "learning_rate": 8.429307487571028e-06, - "loss": 0.3797, - "step": 12589 - }, - { - "epoch": 0.8228220377753088, - "grad_norm": 0.4195902645587921, - "learning_rate": 8.429053362432047e-06, - "loss": 0.3741, - "step": 12590 - }, - { - "epoch": 0.8228873929808509, - "grad_norm": 0.4073476791381836, - "learning_rate": 8.428799220568384e-06, - "loss": 0.344, - "step": 12591 - }, - { - "epoch": 0.8229527481863931, - "grad_norm": 0.4689033031463623, - "learning_rate": 8.42854506198128e-06, - "loss": 0.4245, - "step": 12592 - }, - { - "epoch": 0.8230181033919352, - "grad_norm": 0.4563535451889038, - "learning_rate": 8.428290886671976e-06, - "loss": 0.384, - "step": 12593 - }, - { - "epoch": 0.8230834585974773, - "grad_norm": 0.45254579186439514, - "learning_rate": 8.428036694641708e-06, - "loss": 0.413, - "step": 12594 - }, - { - "epoch": 0.8231488138030194, - "grad_norm": 0.46842119097709656, - "learning_rate": 8.427782485891717e-06, - "loss": 0.4436, - "step": 12595 - }, - { - "epoch": 0.8232141690085615, - "grad_norm": 0.4607884883880615, - "learning_rate": 8.427528260423246e-06, - "loss": 0.4048, - "step": 12596 - }, - { - "epoch": 0.8232795242141037, - "grad_norm": 0.42940106987953186, - "learning_rate": 8.42727401823753e-06, - "loss": 0.3517, - "step": 12597 - }, - { - "epoch": 0.8233448794196457, - "grad_norm": 0.4498884975910187, - "learning_rate": 8.42701975933581e-06, - "loss": 0.3729, - "step": 12598 - }, - { - "epoch": 0.8234102346251879, - "grad_norm": 0.49875614047050476, - "learning_rate": 8.42676548371933e-06, - "loss": 0.4688, - "step": 12599 - }, - { - "epoch": 0.82347558983073, - "grad_norm": 0.4211624562740326, - "learning_rate": 8.426511191389325e-06, - "loss": 0.3682, - "step": 12600 - }, - { - "epoch": 0.8235409450362722, - "grad_norm": 0.4365617334842682, - "learning_rate": 8.42625688234704e-06, - "loss": 0.3668, - "step": 12601 - }, - { - "epoch": 0.8236063002418142, - "grad_norm": 0.40886780619621277, - "learning_rate": 8.426002556593712e-06, - "loss": 0.3267, - "step": 12602 - }, - { - "epoch": 0.8236716554473564, - "grad_norm": 0.3838039040565491, - "learning_rate": 8.425748214130584e-06, - "loss": 0.2832, - "step": 12603 - }, - { - "epoch": 0.8237370106528985, - "grad_norm": 0.4402284622192383, - "learning_rate": 8.425493854958895e-06, - "loss": 0.3838, - "step": 12604 - }, - { - "epoch": 0.8238023658584406, - "grad_norm": 0.460764616727829, - "learning_rate": 8.425239479079885e-06, - "loss": 0.4122, - "step": 12605 - }, - { - "epoch": 0.8238677210639828, - "grad_norm": 0.4580022096633911, - "learning_rate": 8.424985086494795e-06, - "loss": 0.3883, - "step": 12606 - }, - { - "epoch": 0.8239330762695248, - "grad_norm": 0.41545212268829346, - "learning_rate": 8.424730677204867e-06, - "loss": 0.3249, - "step": 12607 - }, - { - "epoch": 0.823998431475067, - "grad_norm": 0.46373510360717773, - "learning_rate": 8.42447625121134e-06, - "loss": 0.4204, - "step": 12608 - }, - { - "epoch": 0.8240637866806091, - "grad_norm": 0.4270155727863312, - "learning_rate": 8.424221808515458e-06, - "loss": 0.3435, - "step": 12609 - }, - { - "epoch": 0.8241291418861513, - "grad_norm": 0.4649266302585602, - "learning_rate": 8.423967349118459e-06, - "loss": 0.3716, - "step": 12610 - }, - { - "epoch": 0.8241944970916933, - "grad_norm": 0.42872345447540283, - "learning_rate": 8.423712873021585e-06, - "loss": 0.328, - "step": 12611 - }, - { - "epoch": 0.8242598522972355, - "grad_norm": 0.47963541746139526, - "learning_rate": 8.423458380226077e-06, - "loss": 0.422, - "step": 12612 - }, - { - "epoch": 0.8243252075027776, - "grad_norm": 0.46212321519851685, - "learning_rate": 8.423203870733177e-06, - "loss": 0.3602, - "step": 12613 - }, - { - "epoch": 0.8243905627083197, - "grad_norm": 0.43106013536453247, - "learning_rate": 8.422949344544125e-06, - "loss": 0.3455, - "step": 12614 - }, - { - "epoch": 0.8244559179138619, - "grad_norm": 0.4852938950061798, - "learning_rate": 8.422694801660162e-06, - "loss": 0.4061, - "step": 12615 - }, - { - "epoch": 0.8245212731194039, - "grad_norm": 0.4699553847312927, - "learning_rate": 8.422440242082533e-06, - "loss": 0.4368, - "step": 12616 - }, - { - "epoch": 0.8245866283249461, - "grad_norm": 0.4388345777988434, - "learning_rate": 8.422185665812479e-06, - "loss": 0.3468, - "step": 12617 - }, - { - "epoch": 0.8246519835304882, - "grad_norm": 0.47370830178260803, - "learning_rate": 8.421931072851237e-06, - "loss": 0.3819, - "step": 12618 - }, - { - "epoch": 0.8247173387360304, - "grad_norm": 0.4548191726207733, - "learning_rate": 8.42167646320005e-06, - "loss": 0.3617, - "step": 12619 - }, - { - "epoch": 0.8247826939415724, - "grad_norm": 0.43634673953056335, - "learning_rate": 8.421421836860166e-06, - "loss": 0.3671, - "step": 12620 - }, - { - "epoch": 0.8248480491471145, - "grad_norm": 0.4693472981452942, - "learning_rate": 8.42116719383282e-06, - "loss": 0.3635, - "step": 12621 - }, - { - "epoch": 0.8249134043526567, - "grad_norm": 0.4220568537712097, - "learning_rate": 8.420912534119256e-06, - "loss": 0.3292, - "step": 12622 - }, - { - "epoch": 0.8249787595581988, - "grad_norm": 0.45013129711151123, - "learning_rate": 8.420657857720717e-06, - "loss": 0.3635, - "step": 12623 - }, - { - "epoch": 0.825044114763741, - "grad_norm": 0.45930156111717224, - "learning_rate": 8.420403164638444e-06, - "loss": 0.3796, - "step": 12624 - }, - { - "epoch": 0.825109469969283, - "grad_norm": 0.44628220796585083, - "learning_rate": 8.420148454873681e-06, - "loss": 0.3437, - "step": 12625 - }, - { - "epoch": 0.8251748251748252, - "grad_norm": 0.4620745778083801, - "learning_rate": 8.419893728427668e-06, - "loss": 0.3779, - "step": 12626 - }, - { - "epoch": 0.8252401803803673, - "grad_norm": 0.4369930922985077, - "learning_rate": 8.41963898530165e-06, - "loss": 0.3178, - "step": 12627 - }, - { - "epoch": 0.8253055355859095, - "grad_norm": 0.47579482197761536, - "learning_rate": 8.419384225496868e-06, - "loss": 0.4016, - "step": 12628 - }, - { - "epoch": 0.8253708907914515, - "grad_norm": 0.400603324174881, - "learning_rate": 8.419129449014562e-06, - "loss": 0.324, - "step": 12629 - }, - { - "epoch": 0.8254362459969936, - "grad_norm": 0.44178307056427, - "learning_rate": 8.41887465585598e-06, - "loss": 0.3445, - "step": 12630 - }, - { - "epoch": 0.8255016012025358, - "grad_norm": 0.4656490981578827, - "learning_rate": 8.41861984602236e-06, - "loss": 0.4028, - "step": 12631 - }, - { - "epoch": 0.8255669564080779, - "grad_norm": 0.4748069941997528, - "learning_rate": 8.418365019514946e-06, - "loss": 0.4132, - "step": 12632 - }, - { - "epoch": 0.82563231161362, - "grad_norm": 0.42328181862831116, - "learning_rate": 8.418110176334984e-06, - "loss": 0.3367, - "step": 12633 - }, - { - "epoch": 0.8256976668191621, - "grad_norm": 0.49895450472831726, - "learning_rate": 8.417855316483715e-06, - "loss": 0.4207, - "step": 12634 - }, - { - "epoch": 0.8257630220247043, - "grad_norm": 0.46849435567855835, - "learning_rate": 8.41760043996238e-06, - "loss": 0.3787, - "step": 12635 - }, - { - "epoch": 0.8258283772302464, - "grad_norm": 0.4596104919910431, - "learning_rate": 8.417345546772222e-06, - "loss": 0.3656, - "step": 12636 - }, - { - "epoch": 0.8258937324357886, - "grad_norm": 0.43287724256515503, - "learning_rate": 8.417090636914487e-06, - "loss": 0.3826, - "step": 12637 - }, - { - "epoch": 0.8259590876413306, - "grad_norm": 0.4663711190223694, - "learning_rate": 8.416835710390418e-06, - "loss": 0.4286, - "step": 12638 - }, - { - "epoch": 0.8260244428468727, - "grad_norm": 0.4456733167171478, - "learning_rate": 8.41658076720126e-06, - "loss": 0.3693, - "step": 12639 - }, - { - "epoch": 0.8260897980524149, - "grad_norm": 0.4583789110183716, - "learning_rate": 8.416325807348249e-06, - "loss": 0.3734, - "step": 12640 - }, - { - "epoch": 0.826155153257957, - "grad_norm": 0.49686723947525024, - "learning_rate": 8.416070830832636e-06, - "loss": 0.4909, - "step": 12641 - }, - { - "epoch": 0.8262205084634991, - "grad_norm": 0.43204590678215027, - "learning_rate": 8.415815837655663e-06, - "loss": 0.3754, - "step": 12642 - }, - { - "epoch": 0.8262858636690412, - "grad_norm": 0.45484909415245056, - "learning_rate": 8.415560827818572e-06, - "loss": 0.3711, - "step": 12643 - }, - { - "epoch": 0.8263512188745834, - "grad_norm": 0.46242430806159973, - "learning_rate": 8.415305801322607e-06, - "loss": 0.3748, - "step": 12644 - }, - { - "epoch": 0.8264165740801255, - "grad_norm": 0.42559534311294556, - "learning_rate": 8.415050758169011e-06, - "loss": 0.3543, - "step": 12645 - }, - { - "epoch": 0.8264819292856675, - "grad_norm": 0.45685240626335144, - "learning_rate": 8.414795698359033e-06, - "loss": 0.4031, - "step": 12646 - }, - { - "epoch": 0.8265472844912097, - "grad_norm": 0.4635372757911682, - "learning_rate": 8.41454062189391e-06, - "loss": 0.3985, - "step": 12647 - }, - { - "epoch": 0.8266126396967518, - "grad_norm": 0.4519408941268921, - "learning_rate": 8.414285528774892e-06, - "loss": 0.3706, - "step": 12648 - }, - { - "epoch": 0.826677994902294, - "grad_norm": 0.41850516200065613, - "learning_rate": 8.41403041900322e-06, - "loss": 0.3429, - "step": 12649 - }, - { - "epoch": 0.8267433501078361, - "grad_norm": 0.4194774925708771, - "learning_rate": 8.413775292580137e-06, - "loss": 0.3302, - "step": 12650 - }, - { - "epoch": 0.8268087053133782, - "grad_norm": 0.4301159679889679, - "learning_rate": 8.413520149506892e-06, - "loss": 0.3525, - "step": 12651 - }, - { - "epoch": 0.8268740605189203, - "grad_norm": 0.4377503991127014, - "learning_rate": 8.413264989784726e-06, - "loss": 0.355, - "step": 12652 - }, - { - "epoch": 0.8269394157244625, - "grad_norm": 0.46422407031059265, - "learning_rate": 8.413009813414882e-06, - "loss": 0.3483, - "step": 12653 - }, - { - "epoch": 0.8270047709300046, - "grad_norm": 0.568018913269043, - "learning_rate": 8.412754620398609e-06, - "loss": 0.3467, - "step": 12654 - }, - { - "epoch": 0.8270701261355466, - "grad_norm": 0.4812344014644623, - "learning_rate": 8.412499410737149e-06, - "loss": 0.4122, - "step": 12655 - }, - { - "epoch": 0.8271354813410888, - "grad_norm": 0.4537135362625122, - "learning_rate": 8.412244184431745e-06, - "loss": 0.3408, - "step": 12656 - }, - { - "epoch": 0.8272008365466309, - "grad_norm": 0.45598137378692627, - "learning_rate": 8.411988941483649e-06, - "loss": 0.3643, - "step": 12657 - }, - { - "epoch": 0.8272661917521731, - "grad_norm": 0.5274432897567749, - "learning_rate": 8.411733681894096e-06, - "loss": 0.2587, - "step": 12658 - }, - { - "epoch": 0.8273315469577152, - "grad_norm": 0.4589974284172058, - "learning_rate": 8.41147840566434e-06, - "loss": 0.3845, - "step": 12659 - }, - { - "epoch": 0.8273969021632573, - "grad_norm": 0.4544839560985565, - "learning_rate": 8.41122311279562e-06, - "loss": 0.3663, - "step": 12660 - }, - { - "epoch": 0.8274622573687994, - "grad_norm": 0.4455678164958954, - "learning_rate": 8.410967803289182e-06, - "loss": 0.3912, - "step": 12661 - }, - { - "epoch": 0.8275276125743416, - "grad_norm": 0.5165808796882629, - "learning_rate": 8.410712477146272e-06, - "loss": 0.443, - "step": 12662 - }, - { - "epoch": 0.8275929677798837, - "grad_norm": 0.5062906742095947, - "learning_rate": 8.410457134368137e-06, - "loss": 0.4557, - "step": 12663 - }, - { - "epoch": 0.8276583229854257, - "grad_norm": 0.4476670026779175, - "learning_rate": 8.410201774956021e-06, - "loss": 0.3941, - "step": 12664 - }, - { - "epoch": 0.8277236781909679, - "grad_norm": 0.4280488193035126, - "learning_rate": 8.40994639891117e-06, - "loss": 0.3442, - "step": 12665 - }, - { - "epoch": 0.82778903339651, - "grad_norm": 0.467495858669281, - "learning_rate": 8.409691006234829e-06, - "loss": 0.4366, - "step": 12666 - }, - { - "epoch": 0.8278543886020522, - "grad_norm": 0.43454861640930176, - "learning_rate": 8.409435596928243e-06, - "loss": 0.3333, - "step": 12667 - }, - { - "epoch": 0.8279197438075943, - "grad_norm": 0.4636070728302002, - "learning_rate": 8.40918017099266e-06, - "loss": 0.3907, - "step": 12668 - }, - { - "epoch": 0.8279850990131364, - "grad_norm": 0.44613513350486755, - "learning_rate": 8.408924728429321e-06, - "loss": 0.3979, - "step": 12669 - }, - { - "epoch": 0.8280504542186785, - "grad_norm": 0.423446387052536, - "learning_rate": 8.408669269239478e-06, - "loss": 0.3373, - "step": 12670 - }, - { - "epoch": 0.8281158094242207, - "grad_norm": 0.4337618052959442, - "learning_rate": 8.408413793424372e-06, - "loss": 0.352, - "step": 12671 - }, - { - "epoch": 0.8281811646297628, - "grad_norm": 0.4586661458015442, - "learning_rate": 8.408158300985254e-06, - "loss": 0.3742, - "step": 12672 - }, - { - "epoch": 0.8282465198353048, - "grad_norm": 0.41534069180488586, - "learning_rate": 8.407902791923366e-06, - "loss": 0.3567, - "step": 12673 - }, - { - "epoch": 0.828311875040847, - "grad_norm": 0.4235283136367798, - "learning_rate": 8.407647266239954e-06, - "loss": 0.3254, - "step": 12674 - }, - { - "epoch": 0.8283772302463891, - "grad_norm": 0.4674146771430969, - "learning_rate": 8.407391723936267e-06, - "loss": 0.4166, - "step": 12675 - }, - { - "epoch": 0.8284425854519313, - "grad_norm": 0.4396532475948334, - "learning_rate": 8.40713616501355e-06, - "loss": 0.3555, - "step": 12676 - }, - { - "epoch": 0.8285079406574734, - "grad_norm": 0.4806467294692993, - "learning_rate": 8.40688058947305e-06, - "loss": 0.4116, - "step": 12677 - }, - { - "epoch": 0.8285732958630155, - "grad_norm": 0.46783992648124695, - "learning_rate": 8.406624997316014e-06, - "loss": 0.3982, - "step": 12678 - }, - { - "epoch": 0.8286386510685576, - "grad_norm": 0.4396979808807373, - "learning_rate": 8.406369388543684e-06, - "loss": 0.3676, - "step": 12679 - }, - { - "epoch": 0.8287040062740997, - "grad_norm": 0.40401342511177063, - "learning_rate": 8.406113763157313e-06, - "loss": 0.3376, - "step": 12680 - }, - { - "epoch": 0.8287693614796419, - "grad_norm": 0.46379831433296204, - "learning_rate": 8.405858121158146e-06, - "loss": 0.4036, - "step": 12681 - }, - { - "epoch": 0.8288347166851839, - "grad_norm": 0.45665764808654785, - "learning_rate": 8.405602462547428e-06, - "loss": 0.4325, - "step": 12682 - }, - { - "epoch": 0.8289000718907261, - "grad_norm": 0.42489299178123474, - "learning_rate": 8.405346787326408e-06, - "loss": 0.3322, - "step": 12683 - }, - { - "epoch": 0.8289654270962682, - "grad_norm": 0.43218982219696045, - "learning_rate": 8.40509109549633e-06, - "loss": 0.3358, - "step": 12684 - }, - { - "epoch": 0.8290307823018104, - "grad_norm": 0.4428415596485138, - "learning_rate": 8.404835387058445e-06, - "loss": 0.3758, - "step": 12685 - }, - { - "epoch": 0.8290961375073524, - "grad_norm": 0.4392610192298889, - "learning_rate": 8.404579662013996e-06, - "loss": 0.3538, - "step": 12686 - }, - { - "epoch": 0.8291614927128946, - "grad_norm": 0.43328729271888733, - "learning_rate": 8.404323920364235e-06, - "loss": 0.3634, - "step": 12687 - }, - { - "epoch": 0.8292268479184367, - "grad_norm": 0.47693005204200745, - "learning_rate": 8.404068162110406e-06, - "loss": 0.4126, - "step": 12688 - }, - { - "epoch": 0.8292922031239788, - "grad_norm": 0.4226832389831543, - "learning_rate": 8.403812387253755e-06, - "loss": 0.343, - "step": 12689 - }, - { - "epoch": 0.829357558329521, - "grad_norm": 0.44714322686195374, - "learning_rate": 8.403556595795536e-06, - "loss": 0.3805, - "step": 12690 - }, - { - "epoch": 0.829422913535063, - "grad_norm": 0.44101881980895996, - "learning_rate": 8.40330078773699e-06, - "loss": 0.3581, - "step": 12691 - }, - { - "epoch": 0.8294882687406052, - "grad_norm": 0.4675297439098358, - "learning_rate": 8.403044963079367e-06, - "loss": 0.3865, - "step": 12692 - }, - { - "epoch": 0.8295536239461473, - "grad_norm": 0.44370898604393005, - "learning_rate": 8.402789121823916e-06, - "loss": 0.3907, - "step": 12693 - }, - { - "epoch": 0.8296189791516895, - "grad_norm": 0.4831228256225586, - "learning_rate": 8.402533263971882e-06, - "loss": 0.4117, - "step": 12694 - }, - { - "epoch": 0.8296843343572315, - "grad_norm": 0.42903903126716614, - "learning_rate": 8.402277389524516e-06, - "loss": 0.3316, - "step": 12695 - }, - { - "epoch": 0.8297496895627737, - "grad_norm": 0.43575528264045715, - "learning_rate": 8.402021498483063e-06, - "loss": 0.3866, - "step": 12696 - }, - { - "epoch": 0.8298150447683158, - "grad_norm": 0.4131913185119629, - "learning_rate": 8.401765590848773e-06, - "loss": 0.3395, - "step": 12697 - }, - { - "epoch": 0.8298803999738579, - "grad_norm": 0.4388498067855835, - "learning_rate": 8.401509666622894e-06, - "loss": 0.3918, - "step": 12698 - }, - { - "epoch": 0.8299457551794001, - "grad_norm": 0.43172523379325867, - "learning_rate": 8.401253725806674e-06, - "loss": 0.3843, - "step": 12699 - }, - { - "epoch": 0.8300111103849421, - "grad_norm": 0.45876553654670715, - "learning_rate": 8.40099776840136e-06, - "loss": 0.3912, - "step": 12700 - }, - { - "epoch": 0.8300764655904843, - "grad_norm": 0.41548505425453186, - "learning_rate": 8.400741794408204e-06, - "loss": 0.3754, - "step": 12701 - }, - { - "epoch": 0.8301418207960264, - "grad_norm": 0.4330996572971344, - "learning_rate": 8.40048580382845e-06, - "loss": 0.3474, - "step": 12702 - }, - { - "epoch": 0.8302071760015686, - "grad_norm": 0.4715730547904968, - "learning_rate": 8.400229796663351e-06, - "loss": 0.4227, - "step": 12703 - }, - { - "epoch": 0.8302725312071106, - "grad_norm": 0.44692686200141907, - "learning_rate": 8.399973772914151e-06, - "loss": 0.3728, - "step": 12704 - }, - { - "epoch": 0.8303378864126527, - "grad_norm": 0.4354611337184906, - "learning_rate": 8.399717732582103e-06, - "loss": 0.3529, - "step": 12705 - }, - { - "epoch": 0.8304032416181949, - "grad_norm": 0.43565553426742554, - "learning_rate": 8.399461675668454e-06, - "loss": 0.3681, - "step": 12706 - }, - { - "epoch": 0.830468596823737, - "grad_norm": 0.4220907688140869, - "learning_rate": 8.399205602174451e-06, - "loss": 0.3278, - "step": 12707 - }, - { - "epoch": 0.8305339520292792, - "grad_norm": 0.45381301641464233, - "learning_rate": 8.398949512101345e-06, - "loss": 0.4048, - "step": 12708 - }, - { - "epoch": 0.8305993072348212, - "grad_norm": 0.43763771653175354, - "learning_rate": 8.398693405450385e-06, - "loss": 0.3559, - "step": 12709 - }, - { - "epoch": 0.8306646624403634, - "grad_norm": 0.45486941933631897, - "learning_rate": 8.39843728222282e-06, - "loss": 0.4211, - "step": 12710 - }, - { - "epoch": 0.8307300176459055, - "grad_norm": 0.44698017835617065, - "learning_rate": 8.3981811424199e-06, - "loss": 0.362, - "step": 12711 - }, - { - "epoch": 0.8307953728514477, - "grad_norm": 0.4658489525318146, - "learning_rate": 8.397924986042872e-06, - "loss": 0.3659, - "step": 12712 - }, - { - "epoch": 0.8308607280569897, - "grad_norm": 0.4626188278198242, - "learning_rate": 8.397668813092988e-06, - "loss": 0.3945, - "step": 12713 - }, - { - "epoch": 0.8309260832625318, - "grad_norm": 0.4650971293449402, - "learning_rate": 8.397412623571495e-06, - "loss": 0.431, - "step": 12714 - }, - { - "epoch": 0.830991438468074, - "grad_norm": 0.4560522437095642, - "learning_rate": 8.397156417479642e-06, - "loss": 0.4491, - "step": 12715 - }, - { - "epoch": 0.8310567936736161, - "grad_norm": 0.4106829762458801, - "learning_rate": 8.396900194818682e-06, - "loss": 0.34, - "step": 12716 - }, - { - "epoch": 0.8311221488791583, - "grad_norm": 0.4265146553516388, - "learning_rate": 8.396643955589863e-06, - "loss": 0.3529, - "step": 12717 - }, - { - "epoch": 0.8311875040847003, - "grad_norm": 0.45510897040367126, - "learning_rate": 8.396387699794436e-06, - "loss": 0.3871, - "step": 12718 - }, - { - "epoch": 0.8312528592902425, - "grad_norm": 0.4605911076068878, - "learning_rate": 8.396131427433648e-06, - "loss": 0.3943, - "step": 12719 - }, - { - "epoch": 0.8313182144957846, - "grad_norm": 0.46505945920944214, - "learning_rate": 8.39587513850875e-06, - "loss": 0.3951, - "step": 12720 - }, - { - "epoch": 0.8313835697013268, - "grad_norm": 0.43580713868141174, - "learning_rate": 8.395618833020993e-06, - "loss": 0.3512, - "step": 12721 - }, - { - "epoch": 0.8314489249068688, - "grad_norm": 0.4356127679347992, - "learning_rate": 8.395362510971628e-06, - "loss": 0.3679, - "step": 12722 - }, - { - "epoch": 0.8315142801124109, - "grad_norm": 0.47002744674682617, - "learning_rate": 8.395106172361903e-06, - "loss": 0.4242, - "step": 12723 - }, - { - "epoch": 0.8315796353179531, - "grad_norm": 0.44220444560050964, - "learning_rate": 8.394849817193068e-06, - "loss": 0.3727, - "step": 12724 - }, - { - "epoch": 0.8316449905234952, - "grad_norm": 0.43452540040016174, - "learning_rate": 8.394593445466375e-06, - "loss": 0.3664, - "step": 12725 - }, - { - "epoch": 0.8317103457290373, - "grad_norm": 0.47207656502723694, - "learning_rate": 8.394337057183074e-06, - "loss": 0.4063, - "step": 12726 - }, - { - "epoch": 0.8317757009345794, - "grad_norm": 0.4225301146507263, - "learning_rate": 8.394080652344415e-06, - "loss": 0.3163, - "step": 12727 - }, - { - "epoch": 0.8318410561401216, - "grad_norm": 0.42666900157928467, - "learning_rate": 8.393824230951647e-06, - "loss": 0.348, - "step": 12728 - }, - { - "epoch": 0.8319064113456637, - "grad_norm": 0.4660142958164215, - "learning_rate": 8.393567793006025e-06, - "loss": 0.3963, - "step": 12729 - }, - { - "epoch": 0.8319717665512057, - "grad_norm": 0.4486709535121918, - "learning_rate": 8.393311338508799e-06, - "loss": 0.3684, - "step": 12730 - }, - { - "epoch": 0.8320371217567479, - "grad_norm": 0.38883456587791443, - "learning_rate": 8.393054867461214e-06, - "loss": 0.275, - "step": 12731 - }, - { - "epoch": 0.83210247696229, - "grad_norm": 0.4592151939868927, - "learning_rate": 8.392798379864526e-06, - "loss": 0.3827, - "step": 12732 - }, - { - "epoch": 0.8321678321678322, - "grad_norm": 0.4490208327770233, - "learning_rate": 8.392541875719987e-06, - "loss": 0.3601, - "step": 12733 - }, - { - "epoch": 0.8322331873733743, - "grad_norm": 0.46575114130973816, - "learning_rate": 8.392285355028844e-06, - "loss": 0.4081, - "step": 12734 - }, - { - "epoch": 0.8322985425789164, - "grad_norm": 0.45290032029151917, - "learning_rate": 8.39202881779235e-06, - "loss": 0.3864, - "step": 12735 - }, - { - "epoch": 0.8323638977844585, - "grad_norm": 0.48589763045310974, - "learning_rate": 8.391772264011757e-06, - "loss": 0.4194, - "step": 12736 - }, - { - "epoch": 0.8324292529900007, - "grad_norm": 0.47278133034706116, - "learning_rate": 8.391515693688317e-06, - "loss": 0.3597, - "step": 12737 - }, - { - "epoch": 0.8324946081955428, - "grad_norm": 0.4128890931606293, - "learning_rate": 8.391259106823277e-06, - "loss": 0.3481, - "step": 12738 - }, - { - "epoch": 0.8325599634010848, - "grad_norm": 0.45492124557495117, - "learning_rate": 8.391002503417893e-06, - "loss": 0.3701, - "step": 12739 - }, - { - "epoch": 0.832625318606627, - "grad_norm": 0.46682876348495483, - "learning_rate": 8.390745883473417e-06, - "loss": 0.3607, - "step": 12740 - }, - { - "epoch": 0.8326906738121691, - "grad_norm": 0.4522010087966919, - "learning_rate": 8.390489246991096e-06, - "loss": 0.3826, - "step": 12741 - }, - { - "epoch": 0.8327560290177113, - "grad_norm": 0.4297303259372711, - "learning_rate": 8.390232593972185e-06, - "loss": 0.3733, - "step": 12742 - }, - { - "epoch": 0.8328213842232534, - "grad_norm": 0.4380183517932892, - "learning_rate": 8.389975924417936e-06, - "loss": 0.3735, - "step": 12743 - }, - { - "epoch": 0.8328867394287955, - "grad_norm": 0.4422387480735779, - "learning_rate": 8.389719238329598e-06, - "loss": 0.4107, - "step": 12744 - }, - { - "epoch": 0.8329520946343376, - "grad_norm": 0.46929383277893066, - "learning_rate": 8.389462535708428e-06, - "loss": 0.386, - "step": 12745 - }, - { - "epoch": 0.8330174498398798, - "grad_norm": 0.44553861021995544, - "learning_rate": 8.389205816555673e-06, - "loss": 0.3709, - "step": 12746 - }, - { - "epoch": 0.8330828050454219, - "grad_norm": 0.5020803213119507, - "learning_rate": 8.388949080872588e-06, - "loss": 0.3955, - "step": 12747 - }, - { - "epoch": 0.8331481602509639, - "grad_norm": 0.45638465881347656, - "learning_rate": 8.388692328660423e-06, - "loss": 0.3936, - "step": 12748 - }, - { - "epoch": 0.8332135154565061, - "grad_norm": 0.46880820393562317, - "learning_rate": 8.388435559920433e-06, - "loss": 0.4002, - "step": 12749 - }, - { - "epoch": 0.8332788706620482, - "grad_norm": 0.4836241602897644, - "learning_rate": 8.388178774653869e-06, - "loss": 0.4489, - "step": 12750 - }, - { - "epoch": 0.8333442258675904, - "grad_norm": 0.5328258275985718, - "learning_rate": 8.38792197286198e-06, - "loss": 0.5287, - "step": 12751 - }, - { - "epoch": 0.8334095810731325, - "grad_norm": 0.4341478645801544, - "learning_rate": 8.387665154546025e-06, - "loss": 0.3451, - "step": 12752 - }, - { - "epoch": 0.8334749362786746, - "grad_norm": 0.4613303542137146, - "learning_rate": 8.387408319707254e-06, - "loss": 0.4009, - "step": 12753 - }, - { - "epoch": 0.8335402914842167, - "grad_norm": 0.4375651776790619, - "learning_rate": 8.387151468346916e-06, - "loss": 0.3779, - "step": 12754 - }, - { - "epoch": 0.8336056466897589, - "grad_norm": 0.43719443678855896, - "learning_rate": 8.38689460046627e-06, - "loss": 0.3549, - "step": 12755 - }, - { - "epoch": 0.833671001895301, - "grad_norm": 0.43977174162864685, - "learning_rate": 8.386637716066563e-06, - "loss": 0.3449, - "step": 12756 - }, - { - "epoch": 0.833736357100843, - "grad_norm": 0.43269723653793335, - "learning_rate": 8.386380815149053e-06, - "loss": 0.3469, - "step": 12757 - }, - { - "epoch": 0.8338017123063852, - "grad_norm": 0.4300509989261627, - "learning_rate": 8.386123897714991e-06, - "loss": 0.3808, - "step": 12758 - }, - { - "epoch": 0.8338670675119273, - "grad_norm": 0.43225598335266113, - "learning_rate": 8.385866963765628e-06, - "loss": 0.3836, - "step": 12759 - }, - { - "epoch": 0.8339324227174695, - "grad_norm": 0.4335007071495056, - "learning_rate": 8.38561001330222e-06, - "loss": 0.373, - "step": 12760 - }, - { - "epoch": 0.8339977779230116, - "grad_norm": 0.4306311011314392, - "learning_rate": 8.385353046326017e-06, - "loss": 0.3542, - "step": 12761 - }, - { - "epoch": 0.8340631331285537, - "grad_norm": 0.4562755823135376, - "learning_rate": 8.385096062838275e-06, - "loss": 0.403, - "step": 12762 - }, - { - "epoch": 0.8341284883340958, - "grad_norm": 0.5411015748977661, - "learning_rate": 8.38483906284025e-06, - "loss": 0.4526, - "step": 12763 - }, - { - "epoch": 0.8341938435396379, - "grad_norm": 0.4221334755420685, - "learning_rate": 8.384582046333188e-06, - "loss": 0.3668, - "step": 12764 - }, - { - "epoch": 0.8342591987451801, - "grad_norm": 0.46379244327545166, - "learning_rate": 8.384325013318348e-06, - "loss": 0.3976, - "step": 12765 - }, - { - "epoch": 0.8343245539507221, - "grad_norm": 0.46996960043907166, - "learning_rate": 8.384067963796984e-06, - "loss": 0.3805, - "step": 12766 - }, - { - "epoch": 0.8343899091562643, - "grad_norm": 0.45651906728744507, - "learning_rate": 8.383810897770348e-06, - "loss": 0.3634, - "step": 12767 - }, - { - "epoch": 0.8344552643618064, - "grad_norm": 0.6424210667610168, - "learning_rate": 8.383553815239693e-06, - "loss": 0.3589, - "step": 12768 - }, - { - "epoch": 0.8345206195673486, - "grad_norm": 0.43621474504470825, - "learning_rate": 8.383296716206273e-06, - "loss": 0.3912, - "step": 12769 - }, - { - "epoch": 0.8345859747728906, - "grad_norm": 0.43640974164009094, - "learning_rate": 8.383039600671344e-06, - "loss": 0.3523, - "step": 12770 - }, - { - "epoch": 0.8346513299784328, - "grad_norm": 0.4584968090057373, - "learning_rate": 8.38278246863616e-06, - "loss": 0.3949, - "step": 12771 - }, - { - "epoch": 0.8347166851839749, - "grad_norm": 0.4630500376224518, - "learning_rate": 8.382525320101972e-06, - "loss": 0.4189, - "step": 12772 - }, - { - "epoch": 0.834782040389517, - "grad_norm": 0.4346463978290558, - "learning_rate": 8.382268155070037e-06, - "loss": 0.423, - "step": 12773 - }, - { - "epoch": 0.8348473955950592, - "grad_norm": 0.4622276723384857, - "learning_rate": 8.382010973541608e-06, - "loss": 0.4342, - "step": 12774 - }, - { - "epoch": 0.8349127508006012, - "grad_norm": 0.43050628900527954, - "learning_rate": 8.38175377551794e-06, - "loss": 0.3645, - "step": 12775 - }, - { - "epoch": 0.8349781060061434, - "grad_norm": 0.4691722095012665, - "learning_rate": 8.381496561000289e-06, - "loss": 0.4378, - "step": 12776 - }, - { - "epoch": 0.8350434612116855, - "grad_norm": 0.45498543977737427, - "learning_rate": 8.381239329989905e-06, - "loss": 0.419, - "step": 12777 - }, - { - "epoch": 0.8351088164172277, - "grad_norm": 0.47149309515953064, - "learning_rate": 8.380982082488047e-06, - "loss": 0.4234, - "step": 12778 - }, - { - "epoch": 0.8351741716227697, - "grad_norm": 0.43489494919776917, - "learning_rate": 8.380724818495968e-06, - "loss": 0.3675, - "step": 12779 - }, - { - "epoch": 0.8352395268283119, - "grad_norm": 0.43044498562812805, - "learning_rate": 8.380467538014923e-06, - "loss": 0.3623, - "step": 12780 - }, - { - "epoch": 0.835304882033854, - "grad_norm": 0.41972649097442627, - "learning_rate": 8.380210241046167e-06, - "loss": 0.3121, - "step": 12781 - }, - { - "epoch": 0.8353702372393961, - "grad_norm": 0.4362650513648987, - "learning_rate": 8.379952927590952e-06, - "loss": 0.3709, - "step": 12782 - }, - { - "epoch": 0.8354355924449383, - "grad_norm": 0.4583292305469513, - "learning_rate": 8.379695597650539e-06, - "loss": 0.3699, - "step": 12783 - }, - { - "epoch": 0.8355009476504803, - "grad_norm": 0.465030699968338, - "learning_rate": 8.37943825122618e-06, - "loss": 0.3931, - "step": 12784 - }, - { - "epoch": 0.8355663028560225, - "grad_norm": 0.4656769037246704, - "learning_rate": 8.379180888319127e-06, - "loss": 0.4428, - "step": 12785 - }, - { - "epoch": 0.8356316580615646, - "grad_norm": 0.43806612491607666, - "learning_rate": 8.37892350893064e-06, - "loss": 0.3307, - "step": 12786 - }, - { - "epoch": 0.8356970132671068, - "grad_norm": 0.4317433536052704, - "learning_rate": 8.378666113061973e-06, - "loss": 0.3668, - "step": 12787 - }, - { - "epoch": 0.8357623684726488, - "grad_norm": 0.4749669134616852, - "learning_rate": 8.378408700714378e-06, - "loss": 0.4327, - "step": 12788 - }, - { - "epoch": 0.8358277236781909, - "grad_norm": 0.48393934965133667, - "learning_rate": 8.378151271889117e-06, - "loss": 0.4455, - "step": 12789 - }, - { - "epoch": 0.8358930788837331, - "grad_norm": 0.4799225330352783, - "learning_rate": 8.37789382658744e-06, - "loss": 0.3969, - "step": 12790 - }, - { - "epoch": 0.8359584340892752, - "grad_norm": 0.4521721303462982, - "learning_rate": 8.377636364810605e-06, - "loss": 0.3794, - "step": 12791 - }, - { - "epoch": 0.8360237892948174, - "grad_norm": 0.4709000885486603, - "learning_rate": 8.377378886559865e-06, - "loss": 0.4029, - "step": 12792 - }, - { - "epoch": 0.8360891445003594, - "grad_norm": 0.4529739320278168, - "learning_rate": 8.377121391836483e-06, - "loss": 0.3792, - "step": 12793 - }, - { - "epoch": 0.8361544997059016, - "grad_norm": 0.4557335078716278, - "learning_rate": 8.376863880641705e-06, - "loss": 0.3751, - "step": 12794 - }, - { - "epoch": 0.8362198549114437, - "grad_norm": 0.4018639326095581, - "learning_rate": 8.376606352976795e-06, - "loss": 0.3067, - "step": 12795 - }, - { - "epoch": 0.8362852101169859, - "grad_norm": 0.429113507270813, - "learning_rate": 8.376348808843006e-06, - "loss": 0.3596, - "step": 12796 - }, - { - "epoch": 0.8363505653225279, - "grad_norm": 0.43608278036117554, - "learning_rate": 8.376091248241594e-06, - "loss": 0.3448, - "step": 12797 - }, - { - "epoch": 0.83641592052807, - "grad_norm": 0.4282897114753723, - "learning_rate": 8.375833671173814e-06, - "loss": 0.3537, - "step": 12798 - }, - { - "epoch": 0.8364812757336122, - "grad_norm": 0.44926175475120544, - "learning_rate": 8.375576077640925e-06, - "loss": 0.3979, - "step": 12799 - }, - { - "epoch": 0.8365466309391543, - "grad_norm": 0.45824864506721497, - "learning_rate": 8.375318467644182e-06, - "loss": 0.3742, - "step": 12800 - }, - { - "epoch": 0.8366119861446965, - "grad_norm": 0.43586090207099915, - "learning_rate": 8.375060841184841e-06, - "loss": 0.3422, - "step": 12801 - }, - { - "epoch": 0.8366773413502385, - "grad_norm": 0.4497327208518982, - "learning_rate": 8.374803198264158e-06, - "loss": 0.3646, - "step": 12802 - }, - { - "epoch": 0.8367426965557807, - "grad_norm": 0.45988771319389343, - "learning_rate": 8.374545538883392e-06, - "loss": 0.3795, - "step": 12803 - }, - { - "epoch": 0.8368080517613228, - "grad_norm": 0.4657236933708191, - "learning_rate": 8.374287863043798e-06, - "loss": 0.4016, - "step": 12804 - }, - { - "epoch": 0.836873406966865, - "grad_norm": 0.4531661570072174, - "learning_rate": 8.374030170746635e-06, - "loss": 0.3946, - "step": 12805 - }, - { - "epoch": 0.836938762172407, - "grad_norm": 0.46240153908729553, - "learning_rate": 8.373772461993156e-06, - "loss": 0.3895, - "step": 12806 - }, - { - "epoch": 0.8370041173779491, - "grad_norm": 0.45432382822036743, - "learning_rate": 8.37351473678462e-06, - "loss": 0.375, - "step": 12807 - }, - { - "epoch": 0.8370694725834913, - "grad_norm": 0.4486439526081085, - "learning_rate": 8.373256995122284e-06, - "loss": 0.3807, - "step": 12808 - }, - { - "epoch": 0.8371348277890334, - "grad_norm": 0.4164700508117676, - "learning_rate": 8.372999237007405e-06, - "loss": 0.3043, - "step": 12809 - }, - { - "epoch": 0.8372001829945755, - "grad_norm": 0.44789445400238037, - "learning_rate": 8.37274146244124e-06, - "loss": 0.3774, - "step": 12810 - }, - { - "epoch": 0.8372655382001176, - "grad_norm": 0.442098468542099, - "learning_rate": 8.372483671425047e-06, - "loss": 0.3628, - "step": 12811 - }, - { - "epoch": 0.8373308934056598, - "grad_norm": 0.44000622630119324, - "learning_rate": 8.372225863960083e-06, - "loss": 0.3874, - "step": 12812 - }, - { - "epoch": 0.8373962486112019, - "grad_norm": 0.46801048517227173, - "learning_rate": 8.371968040047604e-06, - "loss": 0.3724, - "step": 12813 - }, - { - "epoch": 0.837461603816744, - "grad_norm": 0.45521214604377747, - "learning_rate": 8.37171019968887e-06, - "loss": 0.3803, - "step": 12814 - }, - { - "epoch": 0.8375269590222861, - "grad_norm": 0.44060996174812317, - "learning_rate": 8.371452342885139e-06, - "loss": 0.3878, - "step": 12815 - }, - { - "epoch": 0.8375923142278282, - "grad_norm": 0.4750652313232422, - "learning_rate": 8.371194469637662e-06, - "loss": 0.3971, - "step": 12816 - }, - { - "epoch": 0.8376576694333704, - "grad_norm": 0.4220958352088928, - "learning_rate": 8.370936579947706e-06, - "loss": 0.355, - "step": 12817 - }, - { - "epoch": 0.8377230246389125, - "grad_norm": 0.44775012135505676, - "learning_rate": 8.370678673816523e-06, - "loss": 0.3658, - "step": 12818 - }, - { - "epoch": 0.8377883798444546, - "grad_norm": 0.43990033864974976, - "learning_rate": 8.370420751245371e-06, - "loss": 0.3252, - "step": 12819 - }, - { - "epoch": 0.8378537350499967, - "grad_norm": 0.4598587453365326, - "learning_rate": 8.370162812235512e-06, - "loss": 0.402, - "step": 12820 - }, - { - "epoch": 0.8379190902555389, - "grad_norm": 0.45981690287590027, - "learning_rate": 8.3699048567882e-06, - "loss": 0.3977, - "step": 12821 - }, - { - "epoch": 0.837984445461081, - "grad_norm": 0.44870638847351074, - "learning_rate": 8.369646884904694e-06, - "loss": 0.3702, - "step": 12822 - }, - { - "epoch": 0.838049800666623, - "grad_norm": 0.4393848478794098, - "learning_rate": 8.369388896586254e-06, - "loss": 0.3926, - "step": 12823 - }, - { - "epoch": 0.8381151558721652, - "grad_norm": 0.45588740706443787, - "learning_rate": 8.369130891834136e-06, - "loss": 0.4076, - "step": 12824 - }, - { - "epoch": 0.8381805110777073, - "grad_norm": 0.4453178644180298, - "learning_rate": 8.3688728706496e-06, - "loss": 0.3758, - "step": 12825 - }, - { - "epoch": 0.8382458662832495, - "grad_norm": 0.4394017457962036, - "learning_rate": 8.368614833033906e-06, - "loss": 0.3672, - "step": 12826 - }, - { - "epoch": 0.8383112214887916, - "grad_norm": 0.48031318187713623, - "learning_rate": 8.368356778988306e-06, - "loss": 0.4246, - "step": 12827 - }, - { - "epoch": 0.8383765766943337, - "grad_norm": 0.4832472503185272, - "learning_rate": 8.368098708514068e-06, - "loss": 0.3781, - "step": 12828 - }, - { - "epoch": 0.8384419318998758, - "grad_norm": 0.4725753962993622, - "learning_rate": 8.367840621612443e-06, - "loss": 0.4146, - "step": 12829 - }, - { - "epoch": 0.838507287105418, - "grad_norm": 0.489314466714859, - "learning_rate": 8.367582518284692e-06, - "loss": 0.4495, - "step": 12830 - }, - { - "epoch": 0.8385726423109601, - "grad_norm": 0.4142928421497345, - "learning_rate": 8.367324398532076e-06, - "loss": 0.3103, - "step": 12831 - }, - { - "epoch": 0.8386379975165021, - "grad_norm": 0.43661683797836304, - "learning_rate": 8.36706626235585e-06, - "loss": 0.3608, - "step": 12832 - }, - { - "epoch": 0.8387033527220443, - "grad_norm": 0.45844224095344543, - "learning_rate": 8.366808109757279e-06, - "loss": 0.3807, - "step": 12833 - }, - { - "epoch": 0.8387687079275864, - "grad_norm": 0.4577506482601166, - "learning_rate": 8.366549940737615e-06, - "loss": 0.3649, - "step": 12834 - }, - { - "epoch": 0.8388340631331286, - "grad_norm": 0.5160108804702759, - "learning_rate": 8.366291755298122e-06, - "loss": 0.4036, - "step": 12835 - }, - { - "epoch": 0.8388994183386707, - "grad_norm": 0.4346136748790741, - "learning_rate": 8.366033553440058e-06, - "loss": 0.3832, - "step": 12836 - }, - { - "epoch": 0.8389647735442128, - "grad_norm": 0.4836990535259247, - "learning_rate": 8.365775335164683e-06, - "loss": 0.4562, - "step": 12837 - }, - { - "epoch": 0.8390301287497549, - "grad_norm": 0.4245145916938782, - "learning_rate": 8.365517100473255e-06, - "loss": 0.3644, - "step": 12838 - }, - { - "epoch": 0.8390954839552971, - "grad_norm": 0.43731689453125, - "learning_rate": 8.365258849367034e-06, - "loss": 0.3737, - "step": 12839 - }, - { - "epoch": 0.8391608391608392, - "grad_norm": 0.47260788083076477, - "learning_rate": 8.365000581847281e-06, - "loss": 0.4177, - "step": 12840 - }, - { - "epoch": 0.8392261943663812, - "grad_norm": 0.4614366590976715, - "learning_rate": 8.364742297915251e-06, - "loss": 0.4336, - "step": 12841 - }, - { - "epoch": 0.8392915495719234, - "grad_norm": 0.5142934918403625, - "learning_rate": 8.364483997572211e-06, - "loss": 0.4161, - "step": 12842 - }, - { - "epoch": 0.8393569047774655, - "grad_norm": 0.43350541591644287, - "learning_rate": 8.364225680819415e-06, - "loss": 0.3439, - "step": 12843 - }, - { - "epoch": 0.8394222599830077, - "grad_norm": 0.4446878433227539, - "learning_rate": 8.363967347658124e-06, - "loss": 0.3702, - "step": 12844 - }, - { - "epoch": 0.8394876151885498, - "grad_norm": 0.43523842096328735, - "learning_rate": 8.3637089980896e-06, - "loss": 0.3728, - "step": 12845 - }, - { - "epoch": 0.8395529703940919, - "grad_norm": 0.4645645022392273, - "learning_rate": 8.363450632115103e-06, - "loss": 0.3806, - "step": 12846 - }, - { - "epoch": 0.839618325599634, - "grad_norm": 0.42271658778190613, - "learning_rate": 8.363192249735892e-06, - "loss": 0.3416, - "step": 12847 - }, - { - "epoch": 0.8396836808051761, - "grad_norm": 0.4454825222492218, - "learning_rate": 8.362933850953227e-06, - "loss": 0.3334, - "step": 12848 - }, - { - "epoch": 0.8397490360107183, - "grad_norm": 0.42654910683631897, - "learning_rate": 8.362675435768369e-06, - "loss": 0.34, - "step": 12849 - }, - { - "epoch": 0.8398143912162603, - "grad_norm": 0.45451635122299194, - "learning_rate": 8.362417004182575e-06, - "loss": 0.3877, - "step": 12850 - }, - { - "epoch": 0.8398797464218025, - "grad_norm": 0.4170895218849182, - "learning_rate": 8.362158556197112e-06, - "loss": 0.3222, - "step": 12851 - }, - { - "epoch": 0.8399451016273446, - "grad_norm": 0.4565311074256897, - "learning_rate": 8.361900091813234e-06, - "loss": 0.3909, - "step": 12852 - }, - { - "epoch": 0.8400104568328868, - "grad_norm": 0.4371476173400879, - "learning_rate": 8.361641611032206e-06, - "loss": 0.3945, - "step": 12853 - }, - { - "epoch": 0.8400758120384288, - "grad_norm": 0.44827279448509216, - "learning_rate": 8.361383113855287e-06, - "loss": 0.413, - "step": 12854 - }, - { - "epoch": 0.840141167243971, - "grad_norm": 0.4154590666294098, - "learning_rate": 8.361124600283738e-06, - "loss": 0.3703, - "step": 12855 - }, - { - "epoch": 0.8402065224495131, - "grad_norm": 0.44536155462265015, - "learning_rate": 8.36086607031882e-06, - "loss": 0.4032, - "step": 12856 - }, - { - "epoch": 0.8402718776550552, - "grad_norm": 0.44725316762924194, - "learning_rate": 8.360607523961794e-06, - "loss": 0.4169, - "step": 12857 - }, - { - "epoch": 0.8403372328605974, - "grad_norm": 0.43830201029777527, - "learning_rate": 8.360348961213922e-06, - "loss": 0.3712, - "step": 12858 - }, - { - "epoch": 0.8404025880661394, - "grad_norm": 0.4728457033634186, - "learning_rate": 8.360090382076462e-06, - "loss": 0.3956, - "step": 12859 - }, - { - "epoch": 0.8404679432716816, - "grad_norm": 0.40700605511665344, - "learning_rate": 8.359831786550679e-06, - "loss": 0.3399, - "step": 12860 - }, - { - "epoch": 0.8405332984772237, - "grad_norm": 0.4341380000114441, - "learning_rate": 8.35957317463783e-06, - "loss": 0.4009, - "step": 12861 - }, - { - "epoch": 0.8405986536827659, - "grad_norm": 0.4405883252620697, - "learning_rate": 8.359314546339181e-06, - "loss": 0.3622, - "step": 12862 - }, - { - "epoch": 0.840664008888308, - "grad_norm": 0.41906869411468506, - "learning_rate": 8.35905590165599e-06, - "loss": 0.347, - "step": 12863 - }, - { - "epoch": 0.8407293640938501, - "grad_norm": 0.44313690066337585, - "learning_rate": 8.35879724058952e-06, - "loss": 0.3953, - "step": 12864 - }, - { - "epoch": 0.8407947192993922, - "grad_norm": 0.41211745142936707, - "learning_rate": 8.358538563141033e-06, - "loss": 0.3221, - "step": 12865 - }, - { - "epoch": 0.8408600745049343, - "grad_norm": 0.4759785830974579, - "learning_rate": 8.358279869311788e-06, - "loss": 0.3467, - "step": 12866 - }, - { - "epoch": 0.8409254297104765, - "grad_norm": 0.4384678602218628, - "learning_rate": 8.35802115910305e-06, - "loss": 0.4046, - "step": 12867 - }, - { - "epoch": 0.8409907849160185, - "grad_norm": 0.4619692265987396, - "learning_rate": 8.357762432516081e-06, - "loss": 0.3903, - "step": 12868 - }, - { - "epoch": 0.8410561401215607, - "grad_norm": 0.4471484422683716, - "learning_rate": 8.35750368955214e-06, - "loss": 0.416, - "step": 12869 - }, - { - "epoch": 0.8411214953271028, - "grad_norm": 0.463309109210968, - "learning_rate": 8.35724493021249e-06, - "loss": 0.3772, - "step": 12870 - }, - { - "epoch": 0.841186850532645, - "grad_norm": 0.4475812613964081, - "learning_rate": 8.356986154498393e-06, - "loss": 0.3611, - "step": 12871 - }, - { - "epoch": 0.841252205738187, - "grad_norm": 0.41159477829933167, - "learning_rate": 8.356727362411112e-06, - "loss": 0.3442, - "step": 12872 - }, - { - "epoch": 0.8413175609437291, - "grad_norm": 0.4358052909374237, - "learning_rate": 8.356468553951908e-06, - "loss": 0.3527, - "step": 12873 - }, - { - "epoch": 0.8413829161492713, - "grad_norm": 0.4975724220275879, - "learning_rate": 8.356209729122045e-06, - "loss": 0.4439, - "step": 12874 - }, - { - "epoch": 0.8414482713548134, - "grad_norm": 0.41729211807250977, - "learning_rate": 8.355950887922786e-06, - "loss": 0.3382, - "step": 12875 - }, - { - "epoch": 0.8415136265603556, - "grad_norm": 0.4276593029499054, - "learning_rate": 8.35569203035539e-06, - "loss": 0.3649, - "step": 12876 - }, - { - "epoch": 0.8415789817658976, - "grad_norm": 0.4574490785598755, - "learning_rate": 8.35543315642112e-06, - "loss": 0.38, - "step": 12877 - }, - { - "epoch": 0.8416443369714398, - "grad_norm": 0.47274157404899597, - "learning_rate": 8.355174266121241e-06, - "loss": 0.4263, - "step": 12878 - }, - { - "epoch": 0.8417096921769819, - "grad_norm": 0.42799681425094604, - "learning_rate": 8.354915359457016e-06, - "loss": 0.3276, - "step": 12879 - }, - { - "epoch": 0.8417750473825241, - "grad_norm": 0.43358737230300903, - "learning_rate": 8.354656436429707e-06, - "loss": 0.3579, - "step": 12880 - }, - { - "epoch": 0.8418404025880661, - "grad_norm": 0.4326688051223755, - "learning_rate": 8.354397497040576e-06, - "loss": 0.3788, - "step": 12881 - }, - { - "epoch": 0.8419057577936082, - "grad_norm": 0.4247555732727051, - "learning_rate": 8.354138541290885e-06, - "loss": 0.3503, - "step": 12882 - }, - { - "epoch": 0.8419711129991504, - "grad_norm": 0.40687647461891174, - "learning_rate": 8.353879569181899e-06, - "loss": 0.3005, - "step": 12883 - }, - { - "epoch": 0.8420364682046925, - "grad_norm": 0.44955679774284363, - "learning_rate": 8.353620580714881e-06, - "loss": 0.3785, - "step": 12884 - }, - { - "epoch": 0.8421018234102347, - "grad_norm": 0.4818287789821625, - "learning_rate": 8.353361575891094e-06, - "loss": 0.3912, - "step": 12885 - }, - { - "epoch": 0.8421671786157767, - "grad_norm": 0.464008629322052, - "learning_rate": 8.3531025547118e-06, - "loss": 0.3708, - "step": 12886 - }, - { - "epoch": 0.8422325338213189, - "grad_norm": 0.4107102155685425, - "learning_rate": 8.352843517178262e-06, - "loss": 0.352, - "step": 12887 - }, - { - "epoch": 0.842297889026861, - "grad_norm": 0.45311489701271057, - "learning_rate": 8.352584463291746e-06, - "loss": 0.3707, - "step": 12888 - }, - { - "epoch": 0.8423632442324032, - "grad_norm": 0.4310702979564667, - "learning_rate": 8.352325393053516e-06, - "loss": 0.3502, - "step": 12889 - }, - { - "epoch": 0.8424285994379452, - "grad_norm": 0.4197126030921936, - "learning_rate": 8.352066306464831e-06, - "loss": 0.373, - "step": 12890 - }, - { - "epoch": 0.8424939546434873, - "grad_norm": 0.4376210868358612, - "learning_rate": 8.351807203526958e-06, - "loss": 0.3484, - "step": 12891 - }, - { - "epoch": 0.8425593098490295, - "grad_norm": 0.4755789041519165, - "learning_rate": 8.35154808424116e-06, - "loss": 0.4205, - "step": 12892 - }, - { - "epoch": 0.8426246650545716, - "grad_norm": 0.4546374976634979, - "learning_rate": 8.351288948608701e-06, - "loss": 0.3911, - "step": 12893 - }, - { - "epoch": 0.8426900202601137, - "grad_norm": 0.47475185990333557, - "learning_rate": 8.351029796630846e-06, - "loss": 0.4237, - "step": 12894 - }, - { - "epoch": 0.8427553754656558, - "grad_norm": 0.495795875787735, - "learning_rate": 8.350770628308857e-06, - "loss": 0.4562, - "step": 12895 - }, - { - "epoch": 0.842820730671198, - "grad_norm": 0.43273425102233887, - "learning_rate": 8.350511443643998e-06, - "loss": 0.3165, - "step": 12896 - }, - { - "epoch": 0.8428860858767401, - "grad_norm": 0.41151395440101624, - "learning_rate": 8.350252242637533e-06, - "loss": 0.311, - "step": 12897 - }, - { - "epoch": 0.8429514410822821, - "grad_norm": 0.4324301481246948, - "learning_rate": 8.34999302529073e-06, - "loss": 0.3442, - "step": 12898 - }, - { - "epoch": 0.8430167962878243, - "grad_norm": 0.4583076536655426, - "learning_rate": 8.349733791604849e-06, - "loss": 0.4233, - "step": 12899 - }, - { - "epoch": 0.8430821514933664, - "grad_norm": 0.45870479941368103, - "learning_rate": 8.349474541581155e-06, - "loss": 0.4224, - "step": 12900 - }, - { - "epoch": 0.8431475066989086, - "grad_norm": 0.4472617208957672, - "learning_rate": 8.349215275220914e-06, - "loss": 0.4033, - "step": 12901 - }, - { - "epoch": 0.8432128619044507, - "grad_norm": 0.4329296946525574, - "learning_rate": 8.348955992525392e-06, - "loss": 0.3575, - "step": 12902 - }, - { - "epoch": 0.8432782171099928, - "grad_norm": 0.48093709349632263, - "learning_rate": 8.348696693495848e-06, - "loss": 0.4019, - "step": 12903 - }, - { - "epoch": 0.8433435723155349, - "grad_norm": 0.458065390586853, - "learning_rate": 8.348437378133552e-06, - "loss": 0.4211, - "step": 12904 - }, - { - "epoch": 0.8434089275210771, - "grad_norm": 0.447160542011261, - "learning_rate": 8.348178046439766e-06, - "loss": 0.3533, - "step": 12905 - }, - { - "epoch": 0.8434742827266192, - "grad_norm": 0.5594804286956787, - "learning_rate": 8.347918698415756e-06, - "loss": 0.4342, - "step": 12906 - }, - { - "epoch": 0.8435396379321612, - "grad_norm": 0.45463624596595764, - "learning_rate": 8.347659334062787e-06, - "loss": 0.3642, - "step": 12907 - }, - { - "epoch": 0.8436049931377034, - "grad_norm": 0.4449283182621002, - "learning_rate": 8.347399953382125e-06, - "loss": 0.3833, - "step": 12908 - }, - { - "epoch": 0.8436703483432455, - "grad_norm": 0.4590797424316406, - "learning_rate": 8.347140556375031e-06, - "loss": 0.3704, - "step": 12909 - }, - { - "epoch": 0.8437357035487877, - "grad_norm": 0.46438759565353394, - "learning_rate": 8.346881143042775e-06, - "loss": 0.3612, - "step": 12910 - }, - { - "epoch": 0.8438010587543298, - "grad_norm": 0.4726444482803345, - "learning_rate": 8.34662171338662e-06, - "loss": 0.4013, - "step": 12911 - }, - { - "epoch": 0.8438664139598719, - "grad_norm": 0.41788357496261597, - "learning_rate": 8.34636226740783e-06, - "loss": 0.3407, - "step": 12912 - }, - { - "epoch": 0.843931769165414, - "grad_norm": 0.45723414421081543, - "learning_rate": 8.346102805107674e-06, - "loss": 0.3949, - "step": 12913 - }, - { - "epoch": 0.8439971243709562, - "grad_norm": 0.44049227237701416, - "learning_rate": 8.345843326487415e-06, - "loss": 0.3829, - "step": 12914 - }, - { - "epoch": 0.8440624795764983, - "grad_norm": 0.4747612774372101, - "learning_rate": 8.345583831548318e-06, - "loss": 0.3722, - "step": 12915 - }, - { - "epoch": 0.8441278347820403, - "grad_norm": 0.44940876960754395, - "learning_rate": 8.34532432029165e-06, - "loss": 0.4181, - "step": 12916 - }, - { - "epoch": 0.8441931899875825, - "grad_norm": 0.4739570915699005, - "learning_rate": 8.345064792718676e-06, - "loss": 0.4567, - "step": 12917 - }, - { - "epoch": 0.8442585451931246, - "grad_norm": 0.4361695945262909, - "learning_rate": 8.344805248830664e-06, - "loss": 0.3475, - "step": 12918 - }, - { - "epoch": 0.8443239003986668, - "grad_norm": 0.4131276309490204, - "learning_rate": 8.344545688628876e-06, - "loss": 0.3658, - "step": 12919 - }, - { - "epoch": 0.8443892556042089, - "grad_norm": 0.45152372121810913, - "learning_rate": 8.344286112114581e-06, - "loss": 0.3904, - "step": 12920 - }, - { - "epoch": 0.844454610809751, - "grad_norm": 0.43002691864967346, - "learning_rate": 8.344026519289043e-06, - "loss": 0.3846, - "step": 12921 - }, - { - "epoch": 0.8445199660152931, - "grad_norm": 0.4692656397819519, - "learning_rate": 8.34376691015353e-06, - "loss": 0.4316, - "step": 12922 - }, - { - "epoch": 0.8445853212208353, - "grad_norm": 0.4222685694694519, - "learning_rate": 8.343507284709307e-06, - "loss": 0.346, - "step": 12923 - }, - { - "epoch": 0.8446506764263774, - "grad_norm": 0.4289930462837219, - "learning_rate": 8.343247642957642e-06, - "loss": 0.3635, - "step": 12924 - }, - { - "epoch": 0.8447160316319194, - "grad_norm": 0.4378208816051483, - "learning_rate": 8.342987984899798e-06, - "loss": 0.3636, - "step": 12925 - }, - { - "epoch": 0.8447813868374616, - "grad_norm": 0.4417200982570648, - "learning_rate": 8.342728310537044e-06, - "loss": 0.3965, - "step": 12926 - }, - { - "epoch": 0.8448467420430037, - "grad_norm": 0.4177982211112976, - "learning_rate": 8.342468619870646e-06, - "loss": 0.3686, - "step": 12927 - }, - { - "epoch": 0.8449120972485459, - "grad_norm": 0.4426078498363495, - "learning_rate": 8.342208912901873e-06, - "loss": 0.3696, - "step": 12928 - }, - { - "epoch": 0.844977452454088, - "grad_norm": 0.43379274010658264, - "learning_rate": 8.341949189631986e-06, - "loss": 0.3437, - "step": 12929 - }, - { - "epoch": 0.8450428076596301, - "grad_norm": 0.48187845945358276, - "learning_rate": 8.341689450062258e-06, - "loss": 0.4527, - "step": 12930 - }, - { - "epoch": 0.8451081628651722, - "grad_norm": 0.45894575119018555, - "learning_rate": 8.34142969419395e-06, - "loss": 0.381, - "step": 12931 - }, - { - "epoch": 0.8451735180707143, - "grad_norm": 0.4325745701789856, - "learning_rate": 8.341169922028334e-06, - "loss": 0.3602, - "step": 12932 - }, - { - "epoch": 0.8452388732762565, - "grad_norm": 0.4661005437374115, - "learning_rate": 8.340910133566673e-06, - "loss": 0.4117, - "step": 12933 - }, - { - "epoch": 0.8453042284817985, - "grad_norm": 0.46226686239242554, - "learning_rate": 8.340650328810238e-06, - "loss": 0.3866, - "step": 12934 - }, - { - "epoch": 0.8453695836873407, - "grad_norm": 0.47056636214256287, - "learning_rate": 8.340390507760292e-06, - "loss": 0.4109, - "step": 12935 - }, - { - "epoch": 0.8454349388928828, - "grad_norm": 0.42317309975624084, - "learning_rate": 8.340130670418104e-06, - "loss": 0.3643, - "step": 12936 - }, - { - "epoch": 0.845500294098425, - "grad_norm": 0.48881959915161133, - "learning_rate": 8.339870816784942e-06, - "loss": 0.447, - "step": 12937 - }, - { - "epoch": 0.845565649303967, - "grad_norm": 0.43261072039604187, - "learning_rate": 8.339610946862075e-06, - "loss": 0.3463, - "step": 12938 - }, - { - "epoch": 0.8456310045095092, - "grad_norm": 0.479663610458374, - "learning_rate": 8.339351060650767e-06, - "loss": 0.4781, - "step": 12939 - }, - { - "epoch": 0.8456963597150513, - "grad_norm": 0.43825334310531616, - "learning_rate": 8.339091158152288e-06, - "loss": 0.3867, - "step": 12940 - }, - { - "epoch": 0.8457617149205934, - "grad_norm": 0.43406298756599426, - "learning_rate": 8.338831239367903e-06, - "loss": 0.3708, - "step": 12941 - }, - { - "epoch": 0.8458270701261356, - "grad_norm": 0.4622132480144501, - "learning_rate": 8.33857130429888e-06, - "loss": 0.4289, - "step": 12942 - }, - { - "epoch": 0.8458924253316776, - "grad_norm": 0.42942553758621216, - "learning_rate": 8.338311352946492e-06, - "loss": 0.3265, - "step": 12943 - }, - { - "epoch": 0.8459577805372198, - "grad_norm": 0.45094743371009827, - "learning_rate": 8.338051385312001e-06, - "loss": 0.3818, - "step": 12944 - }, - { - "epoch": 0.8460231357427619, - "grad_norm": 0.4672107398509979, - "learning_rate": 8.337791401396678e-06, - "loss": 0.4254, - "step": 12945 - }, - { - "epoch": 0.8460884909483041, - "grad_norm": 0.43508535623550415, - "learning_rate": 8.337531401201788e-06, - "loss": 0.3795, - "step": 12946 - }, - { - "epoch": 0.8461538461538461, - "grad_norm": 0.46768462657928467, - "learning_rate": 8.337271384728602e-06, - "loss": 0.4143, - "step": 12947 - }, - { - "epoch": 0.8462192013593883, - "grad_norm": 0.4383191764354706, - "learning_rate": 8.337011351978388e-06, - "loss": 0.3866, - "step": 12948 - }, - { - "epoch": 0.8462845565649304, - "grad_norm": 0.47301405668258667, - "learning_rate": 8.336751302952413e-06, - "loss": 0.3686, - "step": 12949 - }, - { - "epoch": 0.8463499117704725, - "grad_norm": 0.4595911502838135, - "learning_rate": 8.336491237651947e-06, - "loss": 0.4079, - "step": 12950 - }, - { - "epoch": 0.8464152669760147, - "grad_norm": 0.43553805351257324, - "learning_rate": 8.336231156078256e-06, - "loss": 0.3164, - "step": 12951 - }, - { - "epoch": 0.8464806221815567, - "grad_norm": 0.45041853189468384, - "learning_rate": 8.335971058232612e-06, - "loss": 0.3903, - "step": 12952 - }, - { - "epoch": 0.8465459773870989, - "grad_norm": 0.4387279152870178, - "learning_rate": 8.33571094411628e-06, - "loss": 0.3634, - "step": 12953 - }, - { - "epoch": 0.846611332592641, - "grad_norm": 0.4489176273345947, - "learning_rate": 8.33545081373053e-06, - "loss": 0.3724, - "step": 12954 - }, - { - "epoch": 0.8466766877981832, - "grad_norm": 0.3960930109024048, - "learning_rate": 8.33519066707663e-06, - "loss": 0.3207, - "step": 12955 - }, - { - "epoch": 0.8467420430037252, - "grad_norm": 0.47986871004104614, - "learning_rate": 8.33493050415585e-06, - "loss": 0.4222, - "step": 12956 - }, - { - "epoch": 0.8468073982092673, - "grad_norm": 0.42395836114883423, - "learning_rate": 8.33467032496946e-06, - "loss": 0.3493, - "step": 12957 - }, - { - "epoch": 0.8468727534148095, - "grad_norm": 0.43092361092567444, - "learning_rate": 8.334410129518726e-06, - "loss": 0.3499, - "step": 12958 - }, - { - "epoch": 0.8469381086203516, - "grad_norm": 0.47599807381629944, - "learning_rate": 8.334149917804921e-06, - "loss": 0.3793, - "step": 12959 - }, - { - "epoch": 0.8470034638258938, - "grad_norm": 0.6613618731498718, - "learning_rate": 8.33388968982931e-06, - "loss": 0.3604, - "step": 12960 - }, - { - "epoch": 0.8470688190314358, - "grad_norm": 0.46646255254745483, - "learning_rate": 8.333629445593165e-06, - "loss": 0.3681, - "step": 12961 - }, - { - "epoch": 0.847134174236978, - "grad_norm": 0.3952161371707916, - "learning_rate": 8.333369185097752e-06, - "loss": 0.3101, - "step": 12962 - }, - { - "epoch": 0.8471995294425201, - "grad_norm": 0.44239893555641174, - "learning_rate": 8.333108908344345e-06, - "loss": 0.4, - "step": 12963 - }, - { - "epoch": 0.8472648846480623, - "grad_norm": 0.4682541489601135, - "learning_rate": 8.33284861533421e-06, - "loss": 0.3764, - "step": 12964 - }, - { - "epoch": 0.8473302398536043, - "grad_norm": 0.43617960810661316, - "learning_rate": 8.33258830606862e-06, - "loss": 0.3537, - "step": 12965 - }, - { - "epoch": 0.8473955950591464, - "grad_norm": 0.4275916814804077, - "learning_rate": 8.332327980548838e-06, - "loss": 0.3557, - "step": 12966 - }, - { - "epoch": 0.8474609502646886, - "grad_norm": 0.5054025650024414, - "learning_rate": 8.33206763877614e-06, - "loss": 0.3797, - "step": 12967 - }, - { - "epoch": 0.8475263054702307, - "grad_norm": 0.4242538809776306, - "learning_rate": 8.331807280751796e-06, - "loss": 0.3303, - "step": 12968 - }, - { - "epoch": 0.8475916606757729, - "grad_norm": 0.45625555515289307, - "learning_rate": 8.33154690647707e-06, - "loss": 0.3952, - "step": 12969 - }, - { - "epoch": 0.8476570158813149, - "grad_norm": 0.4328351318836212, - "learning_rate": 8.331286515953238e-06, - "loss": 0.3684, - "step": 12970 - }, - { - "epoch": 0.8477223710868571, - "grad_norm": 0.45689383149147034, - "learning_rate": 8.331026109181568e-06, - "loss": 0.3839, - "step": 12971 - }, - { - "epoch": 0.8477877262923992, - "grad_norm": 0.44383302330970764, - "learning_rate": 8.330765686163328e-06, - "loss": 0.3954, - "step": 12972 - }, - { - "epoch": 0.8478530814979414, - "grad_norm": 0.44091150164604187, - "learning_rate": 8.330505246899792e-06, - "loss": 0.3801, - "step": 12973 - }, - { - "epoch": 0.8479184367034834, - "grad_norm": 0.47562843561172485, - "learning_rate": 8.330244791392226e-06, - "loss": 0.4355, - "step": 12974 - }, - { - "epoch": 0.8479837919090255, - "grad_norm": 0.45469504594802856, - "learning_rate": 8.329984319641902e-06, - "loss": 0.343, - "step": 12975 - }, - { - "epoch": 0.8480491471145677, - "grad_norm": 0.4577144384384155, - "learning_rate": 8.329723831650092e-06, - "loss": 0.3717, - "step": 12976 - }, - { - "epoch": 0.8481145023201098, - "grad_norm": 0.437324196100235, - "learning_rate": 8.329463327418066e-06, - "loss": 0.3834, - "step": 12977 - }, - { - "epoch": 0.848179857525652, - "grad_norm": 0.4720422327518463, - "learning_rate": 8.329202806947093e-06, - "loss": 0.4333, - "step": 12978 - }, - { - "epoch": 0.848245212731194, - "grad_norm": 0.4393203556537628, - "learning_rate": 8.328942270238444e-06, - "loss": 0.3896, - "step": 12979 - }, - { - "epoch": 0.8483105679367362, - "grad_norm": 0.4401390254497528, - "learning_rate": 8.328681717293392e-06, - "loss": 0.3921, - "step": 12980 - }, - { - "epoch": 0.8483759231422783, - "grad_norm": 0.43827110528945923, - "learning_rate": 8.328421148113207e-06, - "loss": 0.3831, - "step": 12981 - }, - { - "epoch": 0.8484412783478203, - "grad_norm": 0.4346137046813965, - "learning_rate": 8.328160562699155e-06, - "loss": 0.3519, - "step": 12982 - }, - { - "epoch": 0.8485066335533625, - "grad_norm": 0.4775579571723938, - "learning_rate": 8.327899961052514e-06, - "loss": 0.4441, - "step": 12983 - }, - { - "epoch": 0.8485719887589046, - "grad_norm": 0.4290394186973572, - "learning_rate": 8.327639343174551e-06, - "loss": 0.3536, - "step": 12984 - }, - { - "epoch": 0.8486373439644468, - "grad_norm": 0.41752007603645325, - "learning_rate": 8.327378709066538e-06, - "loss": 0.3457, - "step": 12985 - }, - { - "epoch": 0.8487026991699889, - "grad_norm": 0.4521631896495819, - "learning_rate": 8.327118058729745e-06, - "loss": 0.4116, - "step": 12986 - }, - { - "epoch": 0.848768054375531, - "grad_norm": 0.45426443219184875, - "learning_rate": 8.326857392165449e-06, - "loss": 0.4414, - "step": 12987 - }, - { - "epoch": 0.8488334095810731, - "grad_norm": 0.43946558237075806, - "learning_rate": 8.326596709374913e-06, - "loss": 0.3719, - "step": 12988 - }, - { - "epoch": 0.8488987647866153, - "grad_norm": 0.42910999059677124, - "learning_rate": 8.326336010359413e-06, - "loss": 0.3541, - "step": 12989 - }, - { - "epoch": 0.8489641199921574, - "grad_norm": 0.4289521872997284, - "learning_rate": 8.326075295120222e-06, - "loss": 0.3593, - "step": 12990 - }, - { - "epoch": 0.8490294751976994, - "grad_norm": 0.43595993518829346, - "learning_rate": 8.325814563658607e-06, - "loss": 0.3625, - "step": 12991 - }, - { - "epoch": 0.8490948304032416, - "grad_norm": 0.577158510684967, - "learning_rate": 8.325553815975842e-06, - "loss": 0.3838, - "step": 12992 - }, - { - "epoch": 0.8491601856087837, - "grad_norm": 0.4579011797904968, - "learning_rate": 8.325293052073201e-06, - "loss": 0.4171, - "step": 12993 - }, - { - "epoch": 0.8492255408143259, - "grad_norm": 0.44544026255607605, - "learning_rate": 8.325032271951954e-06, - "loss": 0.4117, - "step": 12994 - }, - { - "epoch": 0.849290896019868, - "grad_norm": 0.4383991062641144, - "learning_rate": 8.324771475613372e-06, - "loss": 0.3807, - "step": 12995 - }, - { - "epoch": 0.8493562512254101, - "grad_norm": 0.41317757964134216, - "learning_rate": 8.324510663058726e-06, - "loss": 0.3656, - "step": 12996 - }, - { - "epoch": 0.8494216064309522, - "grad_norm": 0.4217069149017334, - "learning_rate": 8.32424983428929e-06, - "loss": 0.3476, - "step": 12997 - }, - { - "epoch": 0.8494869616364944, - "grad_norm": 0.47838321328163147, - "learning_rate": 8.32398898930634e-06, - "loss": 0.3787, - "step": 12998 - }, - { - "epoch": 0.8495523168420365, - "grad_norm": 0.40194016695022583, - "learning_rate": 8.323728128111141e-06, - "loss": 0.3327, - "step": 12999 - }, - { - "epoch": 0.8496176720475785, - "grad_norm": 0.4378214478492737, - "learning_rate": 8.323467250704968e-06, - "loss": 0.38, - "step": 13000 - }, - { - "epoch": 0.8496830272531207, - "grad_norm": 0.40850594639778137, - "learning_rate": 8.323206357089094e-06, - "loss": 0.3331, - "step": 13001 - }, - { - "epoch": 0.8497483824586628, - "grad_norm": 0.4367838203907013, - "learning_rate": 8.322945447264792e-06, - "loss": 0.4046, - "step": 13002 - }, - { - "epoch": 0.849813737664205, - "grad_norm": 0.46336469054222107, - "learning_rate": 8.322684521233332e-06, - "loss": 0.3661, - "step": 13003 - }, - { - "epoch": 0.849879092869747, - "grad_norm": 0.4387741684913635, - "learning_rate": 8.322423578995991e-06, - "loss": 0.3521, - "step": 13004 - }, - { - "epoch": 0.8499444480752892, - "grad_norm": 0.4483397901058197, - "learning_rate": 8.32216262055404e-06, - "loss": 0.3372, - "step": 13005 - }, - { - "epoch": 0.8500098032808313, - "grad_norm": 0.47723543643951416, - "learning_rate": 8.321901645908748e-06, - "loss": 0.4397, - "step": 13006 - }, - { - "epoch": 0.8500751584863735, - "grad_norm": 0.42409658432006836, - "learning_rate": 8.321640655061394e-06, - "loss": 0.3949, - "step": 13007 - }, - { - "epoch": 0.8501405136919156, - "grad_norm": 0.4384118914604187, - "learning_rate": 8.321379648013246e-06, - "loss": 0.3864, - "step": 13008 - }, - { - "epoch": 0.8502058688974576, - "grad_norm": 0.44307631254196167, - "learning_rate": 8.321118624765578e-06, - "loss": 0.3899, - "step": 13009 - }, - { - "epoch": 0.8502712241029998, - "grad_norm": 0.41734904050827026, - "learning_rate": 8.320857585319664e-06, - "loss": 0.346, - "step": 13010 - }, - { - "epoch": 0.8503365793085419, - "grad_norm": 0.4242594540119171, - "learning_rate": 8.320596529676778e-06, - "loss": 0.3722, - "step": 13011 - }, - { - "epoch": 0.8504019345140841, - "grad_norm": 0.41269031167030334, - "learning_rate": 8.320335457838194e-06, - "loss": 0.3617, - "step": 13012 - }, - { - "epoch": 0.8504672897196262, - "grad_norm": 0.4592479467391968, - "learning_rate": 8.320074369805182e-06, - "loss": 0.3988, - "step": 13013 - }, - { - "epoch": 0.8505326449251683, - "grad_norm": 0.41280412673950195, - "learning_rate": 8.319813265579017e-06, - "loss": 0.3548, - "step": 13014 - }, - { - "epoch": 0.8505980001307104, - "grad_norm": 0.46887463331222534, - "learning_rate": 8.319552145160972e-06, - "loss": 0.3939, - "step": 13015 - }, - { - "epoch": 0.8506633553362525, - "grad_norm": 0.41253677010536194, - "learning_rate": 8.319291008552321e-06, - "loss": 0.3155, - "step": 13016 - }, - { - "epoch": 0.8507287105417947, - "grad_norm": 0.4332970082759857, - "learning_rate": 8.31902985575434e-06, - "loss": 0.3999, - "step": 13017 - }, - { - "epoch": 0.8507940657473367, - "grad_norm": 0.4350966215133667, - "learning_rate": 8.3187686867683e-06, - "loss": 0.4045, - "step": 13018 - }, - { - "epoch": 0.8508594209528789, - "grad_norm": 0.44339117407798767, - "learning_rate": 8.318507501595474e-06, - "loss": 0.3705, - "step": 13019 - }, - { - "epoch": 0.850924776158421, - "grad_norm": 0.4180876612663269, - "learning_rate": 8.318246300237139e-06, - "loss": 0.3499, - "step": 13020 - }, - { - "epoch": 0.8509901313639632, - "grad_norm": 0.4269477128982544, - "learning_rate": 8.317985082694566e-06, - "loss": 0.3773, - "step": 13021 - }, - { - "epoch": 0.8510554865695052, - "grad_norm": 0.4391435384750366, - "learning_rate": 8.317723848969029e-06, - "loss": 0.3592, - "step": 13022 - }, - { - "epoch": 0.8511208417750474, - "grad_norm": 0.4249681532382965, - "learning_rate": 8.317462599061805e-06, - "loss": 0.3507, - "step": 13023 - }, - { - "epoch": 0.8511861969805895, - "grad_norm": 0.41892850399017334, - "learning_rate": 8.317201332974167e-06, - "loss": 0.3491, - "step": 13024 - }, - { - "epoch": 0.8512515521861316, - "grad_norm": 0.42302414774894714, - "learning_rate": 8.31694005070739e-06, - "loss": 0.3122, - "step": 13025 - }, - { - "epoch": 0.8513169073916738, - "grad_norm": 0.5472805500030518, - "learning_rate": 8.316678752262743e-06, - "loss": 0.3863, - "step": 13026 - }, - { - "epoch": 0.8513822625972158, - "grad_norm": 0.43365710973739624, - "learning_rate": 8.316417437641509e-06, - "loss": 0.3487, - "step": 13027 - }, - { - "epoch": 0.851447617802758, - "grad_norm": 0.46539852023124695, - "learning_rate": 8.316156106844958e-06, - "loss": 0.4255, - "step": 13028 - }, - { - "epoch": 0.8515129730083001, - "grad_norm": 0.478466272354126, - "learning_rate": 8.315894759874361e-06, - "loss": 0.4772, - "step": 13029 - }, - { - "epoch": 0.8515783282138423, - "grad_norm": 0.45742014050483704, - "learning_rate": 8.315633396731e-06, - "loss": 0.4016, - "step": 13030 - }, - { - "epoch": 0.8516436834193843, - "grad_norm": 0.4491812288761139, - "learning_rate": 8.315372017416146e-06, - "loss": 0.3737, - "step": 13031 - }, - { - "epoch": 0.8517090386249265, - "grad_norm": 0.4290905296802521, - "learning_rate": 8.315110621931074e-06, - "loss": 0.352, - "step": 13032 - }, - { - "epoch": 0.8517743938304686, - "grad_norm": 0.46106651425361633, - "learning_rate": 8.314849210277057e-06, - "loss": 0.3846, - "step": 13033 - }, - { - "epoch": 0.8518397490360107, - "grad_norm": 0.5297082662582397, - "learning_rate": 8.314587782455373e-06, - "loss": 0.4311, - "step": 13034 - }, - { - "epoch": 0.8519051042415529, - "grad_norm": 0.42537423968315125, - "learning_rate": 8.314326338467297e-06, - "loss": 0.3878, - "step": 13035 - }, - { - "epoch": 0.8519704594470949, - "grad_norm": 0.4676016867160797, - "learning_rate": 8.314064878314103e-06, - "loss": 0.4051, - "step": 13036 - }, - { - "epoch": 0.8520358146526371, - "grad_norm": 0.44034287333488464, - "learning_rate": 8.313803401997068e-06, - "loss": 0.3257, - "step": 13037 - }, - { - "epoch": 0.8521011698581792, - "grad_norm": 0.712868869304657, - "learning_rate": 8.313541909517463e-06, - "loss": 0.3715, - "step": 13038 - }, - { - "epoch": 0.8521665250637214, - "grad_norm": 0.4472745358943939, - "learning_rate": 8.313280400876566e-06, - "loss": 0.3826, - "step": 13039 - }, - { - "epoch": 0.8522318802692634, - "grad_norm": 0.4714382588863373, - "learning_rate": 8.313018876075656e-06, - "loss": 0.4292, - "step": 13040 - }, - { - "epoch": 0.8522972354748055, - "grad_norm": 0.4770013689994812, - "learning_rate": 8.312757335116002e-06, - "loss": 0.4185, - "step": 13041 - }, - { - "epoch": 0.8523625906803477, - "grad_norm": 0.42746424674987793, - "learning_rate": 8.312495777998883e-06, - "loss": 0.3549, - "step": 13042 - }, - { - "epoch": 0.8524279458858898, - "grad_norm": 0.452098548412323, - "learning_rate": 8.312234204725576e-06, - "loss": 0.4103, - "step": 13043 - }, - { - "epoch": 0.852493301091432, - "grad_norm": 0.45813503861427307, - "learning_rate": 8.311972615297356e-06, - "loss": 0.4049, - "step": 13044 - }, - { - "epoch": 0.852558656296974, - "grad_norm": 0.4434608519077301, - "learning_rate": 8.311711009715497e-06, - "loss": 0.3839, - "step": 13045 - }, - { - "epoch": 0.8526240115025162, - "grad_norm": 0.4325890839099884, - "learning_rate": 8.311449387981277e-06, - "loss": 0.398, - "step": 13046 - }, - { - "epoch": 0.8526893667080583, - "grad_norm": 0.44462740421295166, - "learning_rate": 8.31118775009597e-06, - "loss": 0.3865, - "step": 13047 - }, - { - "epoch": 0.8527547219136005, - "grad_norm": 0.44148850440979004, - "learning_rate": 8.310926096060851e-06, - "loss": 0.3735, - "step": 13048 - }, - { - "epoch": 0.8528200771191425, - "grad_norm": 0.40927594900131226, - "learning_rate": 8.310664425877202e-06, - "loss": 0.353, - "step": 13049 - }, - { - "epoch": 0.8528854323246846, - "grad_norm": 0.4303451478481293, - "learning_rate": 8.310402739546296e-06, - "loss": 0.3371, - "step": 13050 - }, - { - "epoch": 0.8529507875302268, - "grad_norm": 0.4334465563297272, - "learning_rate": 8.310141037069405e-06, - "loss": 0.333, - "step": 13051 - }, - { - "epoch": 0.8530161427357689, - "grad_norm": 0.3735904097557068, - "learning_rate": 8.309879318447814e-06, - "loss": 0.266, - "step": 13052 - }, - { - "epoch": 0.853081497941311, - "grad_norm": 0.4237179458141327, - "learning_rate": 8.309617583682792e-06, - "loss": 0.3713, - "step": 13053 - }, - { - "epoch": 0.8531468531468531, - "grad_norm": 0.47589799761772156, - "learning_rate": 8.30935583277562e-06, - "loss": 0.4209, - "step": 13054 - }, - { - "epoch": 0.8532122083523953, - "grad_norm": 0.46652454137802124, - "learning_rate": 8.309094065727571e-06, - "loss": 0.3755, - "step": 13055 - }, - { - "epoch": 0.8532775635579374, - "grad_norm": 0.45973077416419983, - "learning_rate": 8.308832282539927e-06, - "loss": 0.3731, - "step": 13056 - }, - { - "epoch": 0.8533429187634796, - "grad_norm": 0.43615809082984924, - "learning_rate": 8.30857048321396e-06, - "loss": 0.3577, - "step": 13057 - }, - { - "epoch": 0.8534082739690216, - "grad_norm": 0.4429352283477783, - "learning_rate": 8.30830866775095e-06, - "loss": 0.3624, - "step": 13058 - }, - { - "epoch": 0.8534736291745637, - "grad_norm": 0.42994409799575806, - "learning_rate": 8.30804683615217e-06, - "loss": 0.3623, - "step": 13059 - }, - { - "epoch": 0.8535389843801059, - "grad_norm": 0.44007644057273865, - "learning_rate": 8.3077849884189e-06, - "loss": 0.3665, - "step": 13060 - }, - { - "epoch": 0.853604339585648, - "grad_norm": 0.4082667827606201, - "learning_rate": 8.30752312455242e-06, - "loss": 0.3338, - "step": 13061 - }, - { - "epoch": 0.8536696947911901, - "grad_norm": 0.44889017939567566, - "learning_rate": 8.307261244554e-06, - "loss": 0.3684, - "step": 13062 - }, - { - "epoch": 0.8537350499967322, - "grad_norm": 0.4447444975376129, - "learning_rate": 8.306999348424922e-06, - "loss": 0.3814, - "step": 13063 - }, - { - "epoch": 0.8538004052022744, - "grad_norm": 0.446433424949646, - "learning_rate": 8.306737436166463e-06, - "loss": 0.3823, - "step": 13064 - }, - { - "epoch": 0.8538657604078165, - "grad_norm": 0.4314079284667969, - "learning_rate": 8.306475507779902e-06, - "loss": 0.3573, - "step": 13065 - }, - { - "epoch": 0.8539311156133585, - "grad_norm": 0.4226812720298767, - "learning_rate": 8.30621356326651e-06, - "loss": 0.3445, - "step": 13066 - }, - { - "epoch": 0.8539964708189007, - "grad_norm": 0.45340994000434875, - "learning_rate": 8.305951602627573e-06, - "loss": 0.4014, - "step": 13067 - }, - { - "epoch": 0.8540618260244428, - "grad_norm": 0.4504512548446655, - "learning_rate": 8.305689625864361e-06, - "loss": 0.3886, - "step": 13068 - }, - { - "epoch": 0.854127181229985, - "grad_norm": 0.45493951439857483, - "learning_rate": 8.305427632978159e-06, - "loss": 0.3873, - "step": 13069 - }, - { - "epoch": 0.8541925364355271, - "grad_norm": 0.48128390312194824, - "learning_rate": 8.30516562397024e-06, - "loss": 0.4406, - "step": 13070 - }, - { - "epoch": 0.8542578916410692, - "grad_norm": 0.4014410972595215, - "learning_rate": 8.304903598841884e-06, - "loss": 0.3239, - "step": 13071 - }, - { - "epoch": 0.8543232468466113, - "grad_norm": 0.43254926800727844, - "learning_rate": 8.304641557594366e-06, - "loss": 0.3368, - "step": 13072 - }, - { - "epoch": 0.8543886020521535, - "grad_norm": 0.4835487902164459, - "learning_rate": 8.304379500228968e-06, - "loss": 0.3558, - "step": 13073 - }, - { - "epoch": 0.8544539572576956, - "grad_norm": 0.44744396209716797, - "learning_rate": 8.304117426746966e-06, - "loss": 0.3908, - "step": 13074 - }, - { - "epoch": 0.8545193124632376, - "grad_norm": 0.43773603439331055, - "learning_rate": 8.30385533714964e-06, - "loss": 0.3436, - "step": 13075 - }, - { - "epoch": 0.8545846676687798, - "grad_norm": 0.45900505781173706, - "learning_rate": 8.303593231438265e-06, - "loss": 0.4142, - "step": 13076 - }, - { - "epoch": 0.8546500228743219, - "grad_norm": 0.43883055448532104, - "learning_rate": 8.303331109614122e-06, - "loss": 0.3875, - "step": 13077 - }, - { - "epoch": 0.8547153780798641, - "grad_norm": 0.4569999873638153, - "learning_rate": 8.30306897167849e-06, - "loss": 0.3787, - "step": 13078 - }, - { - "epoch": 0.8547807332854062, - "grad_norm": 0.4314734935760498, - "learning_rate": 8.302806817632645e-06, - "loss": 0.3593, - "step": 13079 - }, - { - "epoch": 0.8548460884909483, - "grad_norm": 0.4136468172073364, - "learning_rate": 8.302544647477868e-06, - "loss": 0.355, - "step": 13080 - }, - { - "epoch": 0.8549114436964904, - "grad_norm": 0.48597288131713867, - "learning_rate": 8.302282461215436e-06, - "loss": 0.4367, - "step": 13081 - }, - { - "epoch": 0.8549767989020326, - "grad_norm": 0.41474828124046326, - "learning_rate": 8.302020258846629e-06, - "loss": 0.356, - "step": 13082 - }, - { - "epoch": 0.8550421541075747, - "grad_norm": 0.3947749137878418, - "learning_rate": 8.301758040372723e-06, - "loss": 0.3259, - "step": 13083 - }, - { - "epoch": 0.8551075093131167, - "grad_norm": 0.4721922278404236, - "learning_rate": 8.301495805795e-06, - "loss": 0.4015, - "step": 13084 - }, - { - "epoch": 0.8551728645186589, - "grad_norm": 0.4693576991558075, - "learning_rate": 8.301233555114741e-06, - "loss": 0.4217, - "step": 13085 - }, - { - "epoch": 0.855238219724201, - "grad_norm": 0.42758774757385254, - "learning_rate": 8.30097128833322e-06, - "loss": 0.3216, - "step": 13086 - }, - { - "epoch": 0.8553035749297432, - "grad_norm": 0.4395238757133484, - "learning_rate": 8.300709005451719e-06, - "loss": 0.3893, - "step": 13087 - }, - { - "epoch": 0.8553689301352853, - "grad_norm": 0.429995596408844, - "learning_rate": 8.300446706471516e-06, - "loss": 0.4013, - "step": 13088 - }, - { - "epoch": 0.8554342853408274, - "grad_norm": 0.4792253077030182, - "learning_rate": 8.300184391393891e-06, - "loss": 0.421, - "step": 13089 - }, - { - "epoch": 0.8554996405463695, - "grad_norm": 0.49514615535736084, - "learning_rate": 8.299922060220124e-06, - "loss": 0.455, - "step": 13090 - }, - { - "epoch": 0.8555649957519117, - "grad_norm": 0.4884258210659027, - "learning_rate": 8.299659712951493e-06, - "loss": 0.3791, - "step": 13091 - }, - { - "epoch": 0.8556303509574538, - "grad_norm": 0.4589777886867523, - "learning_rate": 8.29939734958928e-06, - "loss": 0.3846, - "step": 13092 - }, - { - "epoch": 0.8556957061629958, - "grad_norm": 0.4226579964160919, - "learning_rate": 8.299134970134762e-06, - "loss": 0.3427, - "step": 13093 - }, - { - "epoch": 0.855761061368538, - "grad_norm": 0.44223830103874207, - "learning_rate": 8.29887257458922e-06, - "loss": 0.3543, - "step": 13094 - }, - { - "epoch": 0.8558264165740801, - "grad_norm": 0.4701899588108063, - "learning_rate": 8.298610162953934e-06, - "loss": 0.389, - "step": 13095 - }, - { - "epoch": 0.8558917717796223, - "grad_norm": 0.44645747542381287, - "learning_rate": 8.298347735230184e-06, - "loss": 0.3791, - "step": 13096 - }, - { - "epoch": 0.8559571269851644, - "grad_norm": 0.45808687806129456, - "learning_rate": 8.298085291419248e-06, - "loss": 0.3744, - "step": 13097 - }, - { - "epoch": 0.8560224821907065, - "grad_norm": 0.4933817982673645, - "learning_rate": 8.29782283152241e-06, - "loss": 0.4361, - "step": 13098 - }, - { - "epoch": 0.8560878373962486, - "grad_norm": 0.39033064246177673, - "learning_rate": 8.297560355540945e-06, - "loss": 0.3279, - "step": 13099 - }, - { - "epoch": 0.8561531926017907, - "grad_norm": 0.42793524265289307, - "learning_rate": 8.297297863476136e-06, - "loss": 0.3582, - "step": 13100 - }, - { - "epoch": 0.8562185478073329, - "grad_norm": 0.4983679950237274, - "learning_rate": 8.297035355329264e-06, - "loss": 0.4492, - "step": 13101 - }, - { - "epoch": 0.8562839030128749, - "grad_norm": 0.4555116295814514, - "learning_rate": 8.296772831101608e-06, - "loss": 0.4207, - "step": 13102 - }, - { - "epoch": 0.8563492582184171, - "grad_norm": 0.443266898393631, - "learning_rate": 8.296510290794449e-06, - "loss": 0.3645, - "step": 13103 - }, - { - "epoch": 0.8564146134239592, - "grad_norm": 0.4548618495464325, - "learning_rate": 8.296247734409067e-06, - "loss": 0.4258, - "step": 13104 - }, - { - "epoch": 0.8564799686295014, - "grad_norm": 0.47024303674697876, - "learning_rate": 8.295985161946742e-06, - "loss": 0.3809, - "step": 13105 - }, - { - "epoch": 0.8565453238350434, - "grad_norm": 0.4740467965602875, - "learning_rate": 8.295722573408757e-06, - "loss": 0.3743, - "step": 13106 - }, - { - "epoch": 0.8566106790405856, - "grad_norm": 0.4315088987350464, - "learning_rate": 8.29545996879639e-06, - "loss": 0.3476, - "step": 13107 - }, - { - "epoch": 0.8566760342461277, - "grad_norm": 0.44808855652809143, - "learning_rate": 8.295197348110924e-06, - "loss": 0.4072, - "step": 13108 - }, - { - "epoch": 0.8567413894516698, - "grad_norm": 0.4371505379676819, - "learning_rate": 8.29493471135364e-06, - "loss": 0.401, - "step": 13109 - }, - { - "epoch": 0.856806744657212, - "grad_norm": 0.4380919933319092, - "learning_rate": 8.294672058525815e-06, - "loss": 0.3667, - "step": 13110 - }, - { - "epoch": 0.856872099862754, - "grad_norm": 0.45407402515411377, - "learning_rate": 8.294409389628735e-06, - "loss": 0.3687, - "step": 13111 - }, - { - "epoch": 0.8569374550682962, - "grad_norm": 0.46568161249160767, - "learning_rate": 8.29414670466368e-06, - "loss": 0.378, - "step": 13112 - }, - { - "epoch": 0.8570028102738383, - "grad_norm": 0.46108999848365784, - "learning_rate": 8.293884003631928e-06, - "loss": 0.3927, - "step": 13113 - }, - { - "epoch": 0.8570681654793805, - "grad_norm": 0.44488200545310974, - "learning_rate": 8.293621286534763e-06, - "loss": 0.3885, - "step": 13114 - }, - { - "epoch": 0.8571335206849225, - "grad_norm": 0.4455205500125885, - "learning_rate": 8.293358553373468e-06, - "loss": 0.3902, - "step": 13115 - }, - { - "epoch": 0.8571988758904647, - "grad_norm": 0.45098504424095154, - "learning_rate": 8.29309580414932e-06, - "loss": 0.399, - "step": 13116 - }, - { - "epoch": 0.8572642310960068, - "grad_norm": 0.4192991256713867, - "learning_rate": 8.292833038863603e-06, - "loss": 0.3511, - "step": 13117 - }, - { - "epoch": 0.8573295863015489, - "grad_norm": 0.4548405408859253, - "learning_rate": 8.2925702575176e-06, - "loss": 0.3941, - "step": 13118 - }, - { - "epoch": 0.8573949415070911, - "grad_norm": 0.4591832458972931, - "learning_rate": 8.292307460112592e-06, - "loss": 0.3841, - "step": 13119 - }, - { - "epoch": 0.8574602967126331, - "grad_norm": 0.4723235070705414, - "learning_rate": 8.29204464664986e-06, - "loss": 0.4016, - "step": 13120 - }, - { - "epoch": 0.8575256519181753, - "grad_norm": 0.4260718524456024, - "learning_rate": 8.291781817130682e-06, - "loss": 0.3348, - "step": 13121 - }, - { - "epoch": 0.8575910071237174, - "grad_norm": 0.43534210324287415, - "learning_rate": 8.291518971556348e-06, - "loss": 0.3474, - "step": 13122 - }, - { - "epoch": 0.8576563623292596, - "grad_norm": 0.4349510073661804, - "learning_rate": 8.291256109928133e-06, - "loss": 0.3444, - "step": 13123 - }, - { - "epoch": 0.8577217175348016, - "grad_norm": 0.44429323077201843, - "learning_rate": 8.290993232247322e-06, - "loss": 0.3352, - "step": 13124 - }, - { - "epoch": 0.8577870727403437, - "grad_norm": 0.48946329951286316, - "learning_rate": 8.290730338515198e-06, - "loss": 0.4274, - "step": 13125 - }, - { - "epoch": 0.8578524279458859, - "grad_norm": 0.4377772808074951, - "learning_rate": 8.29046742873304e-06, - "loss": 0.3861, - "step": 13126 - }, - { - "epoch": 0.857917783151428, - "grad_norm": 0.4502428472042084, - "learning_rate": 8.290204502902134e-06, - "loss": 0.3836, - "step": 13127 - }, - { - "epoch": 0.8579831383569702, - "grad_norm": 0.4187738001346588, - "learning_rate": 8.289941561023762e-06, - "loss": 0.3425, - "step": 13128 - }, - { - "epoch": 0.8580484935625122, - "grad_norm": 0.44563737511634827, - "learning_rate": 8.289678603099205e-06, - "loss": 0.3748, - "step": 13129 - }, - { - "epoch": 0.8581138487680544, - "grad_norm": 0.41198623180389404, - "learning_rate": 8.289415629129744e-06, - "loss": 0.3382, - "step": 13130 - }, - { - "epoch": 0.8581792039735965, - "grad_norm": 0.43811723589897156, - "learning_rate": 8.289152639116664e-06, - "loss": 0.3676, - "step": 13131 - }, - { - "epoch": 0.8582445591791387, - "grad_norm": 0.4321269690990448, - "learning_rate": 8.288889633061248e-06, - "loss": 0.338, - "step": 13132 - }, - { - "epoch": 0.8583099143846807, - "grad_norm": 0.4447796046733856, - "learning_rate": 8.288626610964777e-06, - "loss": 0.4187, - "step": 13133 - }, - { - "epoch": 0.8583752695902228, - "grad_norm": 0.4782610535621643, - "learning_rate": 8.288363572828535e-06, - "loss": 0.4608, - "step": 13134 - }, - { - "epoch": 0.858440624795765, - "grad_norm": 0.481644868850708, - "learning_rate": 8.288100518653804e-06, - "loss": 0.4104, - "step": 13135 - }, - { - "epoch": 0.8585059800013071, - "grad_norm": 0.4687405824661255, - "learning_rate": 8.287837448441868e-06, - "loss": 0.3788, - "step": 13136 - }, - { - "epoch": 0.8585713352068493, - "grad_norm": 0.4344463646411896, - "learning_rate": 8.287574362194011e-06, - "loss": 0.3625, - "step": 13137 - }, - { - "epoch": 0.8586366904123913, - "grad_norm": 0.4399704933166504, - "learning_rate": 8.287311259911514e-06, - "loss": 0.3852, - "step": 13138 - }, - { - "epoch": 0.8587020456179335, - "grad_norm": 0.4202140271663666, - "learning_rate": 8.287048141595662e-06, - "loss": 0.3669, - "step": 13139 - }, - { - "epoch": 0.8587674008234756, - "grad_norm": 0.4349260926246643, - "learning_rate": 8.286785007247735e-06, - "loss": 0.3761, - "step": 13140 - }, - { - "epoch": 0.8588327560290178, - "grad_norm": 0.45881718397140503, - "learning_rate": 8.286521856869021e-06, - "loss": 0.3957, - "step": 13141 - }, - { - "epoch": 0.8588981112345598, - "grad_norm": 0.45289260149002075, - "learning_rate": 8.286258690460802e-06, - "loss": 0.3853, - "step": 13142 - }, - { - "epoch": 0.8589634664401019, - "grad_norm": 0.462878555059433, - "learning_rate": 8.285995508024357e-06, - "loss": 0.4019, - "step": 13143 - }, - { - "epoch": 0.8590288216456441, - "grad_norm": 0.44377198815345764, - "learning_rate": 8.28573230956098e-06, - "loss": 0.3537, - "step": 13144 - }, - { - "epoch": 0.8590941768511862, - "grad_norm": 0.4682895541191101, - "learning_rate": 8.285469095071943e-06, - "loss": 0.3954, - "step": 13145 - }, - { - "epoch": 0.8591595320567283, - "grad_norm": 0.49238693714141846, - "learning_rate": 8.285205864558537e-06, - "loss": 0.4497, - "step": 13146 - }, - { - "epoch": 0.8592248872622704, - "grad_norm": 0.4370424449443817, - "learning_rate": 8.284942618022043e-06, - "loss": 0.3772, - "step": 13147 - }, - { - "epoch": 0.8592902424678126, - "grad_norm": 0.4467172920703888, - "learning_rate": 8.284679355463746e-06, - "loss": 0.3624, - "step": 13148 - }, - { - "epoch": 0.8593555976733547, - "grad_norm": 0.4331428110599518, - "learning_rate": 8.284416076884928e-06, - "loss": 0.3635, - "step": 13149 - }, - { - "epoch": 0.8594209528788967, - "grad_norm": 0.4427790939807892, - "learning_rate": 8.284152782286878e-06, - "loss": 0.3956, - "step": 13150 - }, - { - "epoch": 0.8594863080844389, - "grad_norm": 0.4413788914680481, - "learning_rate": 8.283889471670876e-06, - "loss": 0.4163, - "step": 13151 - }, - { - "epoch": 0.859551663289981, - "grad_norm": 0.46344879269599915, - "learning_rate": 8.283626145038208e-06, - "loss": 0.3845, - "step": 13152 - }, - { - "epoch": 0.8596170184955232, - "grad_norm": 0.4417228698730469, - "learning_rate": 8.283362802390157e-06, - "loss": 0.363, - "step": 13153 - }, - { - "epoch": 0.8596823737010653, - "grad_norm": 0.43309471011161804, - "learning_rate": 8.283099443728009e-06, - "loss": 0.3351, - "step": 13154 - }, - { - "epoch": 0.8597477289066074, - "grad_norm": 0.4852140247821808, - "learning_rate": 8.282836069053048e-06, - "loss": 0.474, - "step": 13155 - }, - { - "epoch": 0.8598130841121495, - "grad_norm": 0.4612131714820862, - "learning_rate": 8.282572678366556e-06, - "loss": 0.4347, - "step": 13156 - }, - { - "epoch": 0.8598784393176917, - "grad_norm": 0.43646934628486633, - "learning_rate": 8.282309271669822e-06, - "loss": 0.4069, - "step": 13157 - }, - { - "epoch": 0.8599437945232338, - "grad_norm": 0.42238733172416687, - "learning_rate": 8.282045848964128e-06, - "loss": 0.3631, - "step": 13158 - }, - { - "epoch": 0.8600091497287758, - "grad_norm": 0.44889938831329346, - "learning_rate": 8.281782410250759e-06, - "loss": 0.3734, - "step": 13159 - }, - { - "epoch": 0.860074504934318, - "grad_norm": 0.44265878200531006, - "learning_rate": 8.281518955531001e-06, - "loss": 0.3891, - "step": 13160 - }, - { - "epoch": 0.8601398601398601, - "grad_norm": 0.4276835024356842, - "learning_rate": 8.281255484806136e-06, - "loss": 0.3475, - "step": 13161 - }, - { - "epoch": 0.8602052153454023, - "grad_norm": 0.4474591612815857, - "learning_rate": 8.280991998077454e-06, - "loss": 0.3883, - "step": 13162 - }, - { - "epoch": 0.8602705705509444, - "grad_norm": 0.4535478949546814, - "learning_rate": 8.280728495346236e-06, - "loss": 0.3967, - "step": 13163 - }, - { - "epoch": 0.8603359257564865, - "grad_norm": 0.48304906487464905, - "learning_rate": 8.280464976613768e-06, - "loss": 0.4667, - "step": 13164 - }, - { - "epoch": 0.8604012809620286, - "grad_norm": 0.463156133890152, - "learning_rate": 8.280201441881337e-06, - "loss": 0.3567, - "step": 13165 - }, - { - "epoch": 0.8604666361675708, - "grad_norm": 0.4587669372558594, - "learning_rate": 8.279937891150228e-06, - "loss": 0.3872, - "step": 13166 - }, - { - "epoch": 0.8605319913731129, - "grad_norm": 0.44457775354385376, - "learning_rate": 8.279674324421725e-06, - "loss": 0.3724, - "step": 13167 - }, - { - "epoch": 0.8605973465786549, - "grad_norm": 0.4893188774585724, - "learning_rate": 8.279410741697114e-06, - "loss": 0.4544, - "step": 13168 - }, - { - "epoch": 0.8606627017841971, - "grad_norm": 0.43643447756767273, - "learning_rate": 8.27914714297768e-06, - "loss": 0.357, - "step": 13169 - }, - { - "epoch": 0.8607280569897392, - "grad_norm": 0.44617798924446106, - "learning_rate": 8.27888352826471e-06, - "loss": 0.393, - "step": 13170 - }, - { - "epoch": 0.8607934121952814, - "grad_norm": 0.46605929732322693, - "learning_rate": 8.278619897559488e-06, - "loss": 0.4163, - "step": 13171 - }, - { - "epoch": 0.8608587674008235, - "grad_norm": 0.41804397106170654, - "learning_rate": 8.278356250863302e-06, - "loss": 0.3314, - "step": 13172 - }, - { - "epoch": 0.8609241226063656, - "grad_norm": 0.42674967646598816, - "learning_rate": 8.278092588177435e-06, - "loss": 0.3338, - "step": 13173 - }, - { - "epoch": 0.8609894778119077, - "grad_norm": 0.43258965015411377, - "learning_rate": 8.277828909503178e-06, - "loss": 0.3899, - "step": 13174 - }, - { - "epoch": 0.8610548330174499, - "grad_norm": 0.3933675289154053, - "learning_rate": 8.277565214841812e-06, - "loss": 0.3442, - "step": 13175 - }, - { - "epoch": 0.861120188222992, - "grad_norm": 0.4413454830646515, - "learning_rate": 8.277301504194626e-06, - "loss": 0.3419, - "step": 13176 - }, - { - "epoch": 0.861185543428534, - "grad_norm": 0.47428226470947266, - "learning_rate": 8.277037777562905e-06, - "loss": 0.4339, - "step": 13177 - }, - { - "epoch": 0.8612508986340762, - "grad_norm": 0.45410338044166565, - "learning_rate": 8.276774034947935e-06, - "loss": 0.389, - "step": 13178 - }, - { - "epoch": 0.8613162538396183, - "grad_norm": 0.5930436253547668, - "learning_rate": 8.276510276351003e-06, - "loss": 0.3865, - "step": 13179 - }, - { - "epoch": 0.8613816090451605, - "grad_norm": 0.4242371618747711, - "learning_rate": 8.276246501773393e-06, - "loss": 0.335, - "step": 13180 - }, - { - "epoch": 0.8614469642507026, - "grad_norm": 0.41871318221092224, - "learning_rate": 8.275982711216397e-06, - "loss": 0.3378, - "step": 13181 - }, - { - "epoch": 0.8615123194562447, - "grad_norm": 0.43339404463768005, - "learning_rate": 8.275718904681297e-06, - "loss": 0.3761, - "step": 13182 - }, - { - "epoch": 0.8615776746617868, - "grad_norm": 0.4290432035923004, - "learning_rate": 8.275455082169381e-06, - "loss": 0.3475, - "step": 13183 - }, - { - "epoch": 0.8616430298673289, - "grad_norm": 0.41263851523399353, - "learning_rate": 8.275191243681937e-06, - "loss": 0.3305, - "step": 13184 - }, - { - "epoch": 0.8617083850728711, - "grad_norm": 0.4255129396915436, - "learning_rate": 8.27492738922025e-06, - "loss": 0.3382, - "step": 13185 - }, - { - "epoch": 0.8617737402784131, - "grad_norm": 0.4154873788356781, - "learning_rate": 8.274663518785608e-06, - "loss": 0.3365, - "step": 13186 - }, - { - "epoch": 0.8618390954839553, - "grad_norm": 0.41808241605758667, - "learning_rate": 8.274399632379298e-06, - "loss": 0.3077, - "step": 13187 - }, - { - "epoch": 0.8619044506894974, - "grad_norm": 0.4594711661338806, - "learning_rate": 8.274135730002605e-06, - "loss": 0.4076, - "step": 13188 - }, - { - "epoch": 0.8619698058950396, - "grad_norm": 0.4508814811706543, - "learning_rate": 8.273871811656817e-06, - "loss": 0.3974, - "step": 13189 - }, - { - "epoch": 0.8620351611005816, - "grad_norm": 0.40516120195388794, - "learning_rate": 8.273607877343224e-06, - "loss": 0.3491, - "step": 13190 - }, - { - "epoch": 0.8621005163061238, - "grad_norm": 0.4845518171787262, - "learning_rate": 8.27334392706311e-06, - "loss": 0.4144, - "step": 13191 - }, - { - "epoch": 0.8621658715116659, - "grad_norm": 0.4769570231437683, - "learning_rate": 8.273079960817764e-06, - "loss": 0.4041, - "step": 13192 - }, - { - "epoch": 0.862231226717208, - "grad_norm": 0.43818965554237366, - "learning_rate": 8.272815978608475e-06, - "loss": 0.3578, - "step": 13193 - }, - { - "epoch": 0.8622965819227502, - "grad_norm": 0.4566902816295624, - "learning_rate": 8.272551980436527e-06, - "loss": 0.3759, - "step": 13194 - }, - { - "epoch": 0.8623619371282922, - "grad_norm": 0.46521174907684326, - "learning_rate": 8.27228796630321e-06, - "loss": 0.3969, - "step": 13195 - }, - { - "epoch": 0.8624272923338344, - "grad_norm": 0.4606292247772217, - "learning_rate": 8.27202393620981e-06, - "loss": 0.3811, - "step": 13196 - }, - { - "epoch": 0.8624926475393765, - "grad_norm": 0.4627600610256195, - "learning_rate": 8.271759890157616e-06, - "loss": 0.391, - "step": 13197 - }, - { - "epoch": 0.8625580027449187, - "grad_norm": 0.42293885350227356, - "learning_rate": 8.271495828147916e-06, - "loss": 0.3564, - "step": 13198 - }, - { - "epoch": 0.8626233579504607, - "grad_norm": 0.4492563307285309, - "learning_rate": 8.271231750181997e-06, - "loss": 0.4146, - "step": 13199 - }, - { - "epoch": 0.8626887131560029, - "grad_norm": 0.47421994805336, - "learning_rate": 8.270967656261148e-06, - "loss": 0.422, - "step": 13200 - }, - { - "epoch": 0.862754068361545, - "grad_norm": 0.42958301305770874, - "learning_rate": 8.270703546386656e-06, - "loss": 0.3305, - "step": 13201 - }, - { - "epoch": 0.8628194235670871, - "grad_norm": 0.431548148393631, - "learning_rate": 8.27043942055981e-06, - "loss": 0.3772, - "step": 13202 - }, - { - "epoch": 0.8628847787726293, - "grad_norm": 0.40252622961997986, - "learning_rate": 8.270175278781898e-06, - "loss": 0.3363, - "step": 13203 - }, - { - "epoch": 0.8629501339781713, - "grad_norm": 0.4037279784679413, - "learning_rate": 8.269911121054209e-06, - "loss": 0.3279, - "step": 13204 - }, - { - "epoch": 0.8630154891837135, - "grad_norm": 0.48043838143348694, - "learning_rate": 8.269646947378029e-06, - "loss": 0.4278, - "step": 13205 - }, - { - "epoch": 0.8630808443892556, - "grad_norm": 0.4289033114910126, - "learning_rate": 8.26938275775465e-06, - "loss": 0.3295, - "step": 13206 - }, - { - "epoch": 0.8631461995947978, - "grad_norm": 0.4539353847503662, - "learning_rate": 8.269118552185358e-06, - "loss": 0.3967, - "step": 13207 - }, - { - "epoch": 0.8632115548003398, - "grad_norm": 0.45041242241859436, - "learning_rate": 8.268854330671441e-06, - "loss": 0.3757, - "step": 13208 - }, - { - "epoch": 0.8632769100058819, - "grad_norm": 0.4105256497859955, - "learning_rate": 8.26859009321419e-06, - "loss": 0.3341, - "step": 13209 - }, - { - "epoch": 0.8633422652114241, - "grad_norm": 0.4560367465019226, - "learning_rate": 8.268325839814892e-06, - "loss": 0.3842, - "step": 13210 - }, - { - "epoch": 0.8634076204169662, - "grad_norm": 0.42491069436073303, - "learning_rate": 8.26806157047484e-06, - "loss": 0.3448, - "step": 13211 - }, - { - "epoch": 0.8634729756225084, - "grad_norm": 0.39981043338775635, - "learning_rate": 8.267797285195317e-06, - "loss": 0.2981, - "step": 13212 - }, - { - "epoch": 0.8635383308280504, - "grad_norm": 0.4760648310184479, - "learning_rate": 8.267532983977613e-06, - "loss": 0.4309, - "step": 13213 - }, - { - "epoch": 0.8636036860335926, - "grad_norm": 0.44349217414855957, - "learning_rate": 8.26726866682302e-06, - "loss": 0.3741, - "step": 13214 - }, - { - "epoch": 0.8636690412391347, - "grad_norm": 0.46941787004470825, - "learning_rate": 8.267004333732826e-06, - "loss": 0.395, - "step": 13215 - }, - { - "epoch": 0.8637343964446769, - "grad_norm": 0.4467675983905792, - "learning_rate": 8.26673998470832e-06, - "loss": 0.3858, - "step": 13216 - }, - { - "epoch": 0.8637997516502189, - "grad_norm": 0.47093433141708374, - "learning_rate": 8.266475619750792e-06, - "loss": 0.4136, - "step": 13217 - }, - { - "epoch": 0.863865106855761, - "grad_norm": 0.47002628445625305, - "learning_rate": 8.26621123886153e-06, - "loss": 0.3918, - "step": 13218 - }, - { - "epoch": 0.8639304620613032, - "grad_norm": 0.44025665521621704, - "learning_rate": 8.265946842041823e-06, - "loss": 0.3946, - "step": 13219 - }, - { - "epoch": 0.8639958172668453, - "grad_norm": 0.4498584270477295, - "learning_rate": 8.265682429292964e-06, - "loss": 0.3751, - "step": 13220 - }, - { - "epoch": 0.8640611724723875, - "grad_norm": 0.46606284379959106, - "learning_rate": 8.265418000616242e-06, - "loss": 0.3992, - "step": 13221 - }, - { - "epoch": 0.8641265276779295, - "grad_norm": 0.48162829875946045, - "learning_rate": 8.265153556012942e-06, - "loss": 0.407, - "step": 13222 - }, - { - "epoch": 0.8641918828834717, - "grad_norm": 0.4349437654018402, - "learning_rate": 8.264889095484357e-06, - "loss": 0.3403, - "step": 13223 - }, - { - "epoch": 0.8642572380890138, - "grad_norm": 0.45929232239723206, - "learning_rate": 8.264624619031777e-06, - "loss": 0.4039, - "step": 13224 - }, - { - "epoch": 0.864322593294556, - "grad_norm": 0.42121267318725586, - "learning_rate": 8.264360126656495e-06, - "loss": 0.3597, - "step": 13225 - }, - { - "epoch": 0.864387948500098, - "grad_norm": 0.4389696717262268, - "learning_rate": 8.264095618359794e-06, - "loss": 0.354, - "step": 13226 - }, - { - "epoch": 0.8644533037056401, - "grad_norm": 0.48989543318748474, - "learning_rate": 8.263831094142969e-06, - "loss": 0.4056, - "step": 13227 - }, - { - "epoch": 0.8645186589111823, - "grad_norm": 0.46081024408340454, - "learning_rate": 8.26356655400731e-06, - "loss": 0.3697, - "step": 13228 - }, - { - "epoch": 0.8645840141167244, - "grad_norm": 0.47132226824760437, - "learning_rate": 8.263301997954104e-06, - "loss": 0.4067, - "step": 13229 - }, - { - "epoch": 0.8646493693222665, - "grad_norm": 0.43644627928733826, - "learning_rate": 8.263037425984646e-06, - "loss": 0.3557, - "step": 13230 - }, - { - "epoch": 0.8647147245278086, - "grad_norm": 0.4656889736652374, - "learning_rate": 8.26277283810022e-06, - "loss": 0.4019, - "step": 13231 - }, - { - "epoch": 0.8647800797333508, - "grad_norm": 0.4414185583591461, - "learning_rate": 8.262508234302127e-06, - "loss": 0.368, - "step": 13232 - }, - { - "epoch": 0.8648454349388929, - "grad_norm": 0.5081361532211304, - "learning_rate": 8.262243614591648e-06, - "loss": 0.4635, - "step": 13233 - }, - { - "epoch": 0.864910790144435, - "grad_norm": 0.4259694516658783, - "learning_rate": 8.261978978970075e-06, - "loss": 0.3211, - "step": 13234 - }, - { - "epoch": 0.8649761453499771, - "grad_norm": 0.43436840176582336, - "learning_rate": 8.261714327438703e-06, - "loss": 0.3436, - "step": 13235 - }, - { - "epoch": 0.8650415005555192, - "grad_norm": 0.44253620505332947, - "learning_rate": 8.261449659998819e-06, - "loss": 0.4192, - "step": 13236 - }, - { - "epoch": 0.8651068557610614, - "grad_norm": 0.44048523902893066, - "learning_rate": 8.261184976651715e-06, - "loss": 0.3689, - "step": 13237 - }, - { - "epoch": 0.8651722109666035, - "grad_norm": 0.4383637607097626, - "learning_rate": 8.260920277398683e-06, - "loss": 0.3796, - "step": 13238 - }, - { - "epoch": 0.8652375661721456, - "grad_norm": 0.43479683995246887, - "learning_rate": 8.260655562241011e-06, - "loss": 0.3625, - "step": 13239 - }, - { - "epoch": 0.8653029213776877, - "grad_norm": 0.4594433605670929, - "learning_rate": 8.260390831179995e-06, - "loss": 0.3887, - "step": 13240 - }, - { - "epoch": 0.8653682765832299, - "grad_norm": 0.45842957496643066, - "learning_rate": 8.260126084216922e-06, - "loss": 0.3745, - "step": 13241 - }, - { - "epoch": 0.865433631788772, - "grad_norm": 0.40813007950782776, - "learning_rate": 8.259861321353084e-06, - "loss": 0.3102, - "step": 13242 - }, - { - "epoch": 0.865498986994314, - "grad_norm": 0.4562146067619324, - "learning_rate": 8.259596542589774e-06, - "loss": 0.3692, - "step": 13243 - }, - { - "epoch": 0.8655643421998562, - "grad_norm": 0.41048356890678406, - "learning_rate": 8.259331747928284e-06, - "loss": 0.3489, - "step": 13244 - }, - { - "epoch": 0.8656296974053983, - "grad_norm": 0.46980148553848267, - "learning_rate": 8.259066937369901e-06, - "loss": 0.418, - "step": 13245 - }, - { - "epoch": 0.8656950526109405, - "grad_norm": 0.4779313802719116, - "learning_rate": 8.258802110915922e-06, - "loss": 0.4736, - "step": 13246 - }, - { - "epoch": 0.8657604078164826, - "grad_norm": 0.4522343575954437, - "learning_rate": 8.258537268567634e-06, - "loss": 0.3772, - "step": 13247 - }, - { - "epoch": 0.8658257630220247, - "grad_norm": 0.45030477643013, - "learning_rate": 8.258272410326331e-06, - "loss": 0.3695, - "step": 13248 - }, - { - "epoch": 0.8658911182275668, - "grad_norm": 0.45079004764556885, - "learning_rate": 8.258007536193306e-06, - "loss": 0.4004, - "step": 13249 - }, - { - "epoch": 0.865956473433109, - "grad_norm": 0.43524375557899475, - "learning_rate": 8.257742646169848e-06, - "loss": 0.388, - "step": 13250 - }, - { - "epoch": 0.8660218286386511, - "grad_norm": 0.4434921443462372, - "learning_rate": 8.257477740257254e-06, - "loss": 0.3801, - "step": 13251 - }, - { - "epoch": 0.8660871838441931, - "grad_norm": 0.4336143434047699, - "learning_rate": 8.257212818456809e-06, - "loss": 0.3739, - "step": 13252 - }, - { - "epoch": 0.8661525390497353, - "grad_norm": 0.45957279205322266, - "learning_rate": 8.25694788076981e-06, - "loss": 0.3649, - "step": 13253 - }, - { - "epoch": 0.8662178942552774, - "grad_norm": 0.4267975687980652, - "learning_rate": 8.256682927197547e-06, - "loss": 0.3414, - "step": 13254 - }, - { - "epoch": 0.8662832494608196, - "grad_norm": 0.6603761315345764, - "learning_rate": 8.256417957741313e-06, - "loss": 0.4134, - "step": 13255 - }, - { - "epoch": 0.8663486046663617, - "grad_norm": 0.462980180978775, - "learning_rate": 8.256152972402403e-06, - "loss": 0.347, - "step": 13256 - }, - { - "epoch": 0.8664139598719038, - "grad_norm": 0.44443926215171814, - "learning_rate": 8.255887971182103e-06, - "loss": 0.3384, - "step": 13257 - }, - { - "epoch": 0.8664793150774459, - "grad_norm": 0.4551098942756653, - "learning_rate": 8.255622954081713e-06, - "loss": 0.4131, - "step": 13258 - }, - { - "epoch": 0.8665446702829881, - "grad_norm": 0.4636535346508026, - "learning_rate": 8.25535792110252e-06, - "loss": 0.4575, - "step": 13259 - }, - { - "epoch": 0.8666100254885302, - "grad_norm": 0.4483840763568878, - "learning_rate": 8.25509287224582e-06, - "loss": 0.345, - "step": 13260 - }, - { - "epoch": 0.8666753806940722, - "grad_norm": 0.43046480417251587, - "learning_rate": 8.254827807512904e-06, - "loss": 0.3733, - "step": 13261 - }, - { - "epoch": 0.8667407358996144, - "grad_norm": 0.4565449059009552, - "learning_rate": 8.254562726905064e-06, - "loss": 0.3785, - "step": 13262 - }, - { - "epoch": 0.8668060911051565, - "grad_norm": 0.44150421023368835, - "learning_rate": 8.254297630423595e-06, - "loss": 0.3821, - "step": 13263 - }, - { - "epoch": 0.8668714463106987, - "grad_norm": 0.49192196130752563, - "learning_rate": 8.25403251806979e-06, - "loss": 0.4364, - "step": 13264 - }, - { - "epoch": 0.8669368015162408, - "grad_norm": 0.42523103952407837, - "learning_rate": 8.253767389844939e-06, - "loss": 0.3604, - "step": 13265 - }, - { - "epoch": 0.8670021567217829, - "grad_norm": 0.4398192763328552, - "learning_rate": 8.253502245750338e-06, - "loss": 0.3873, - "step": 13266 - }, - { - "epoch": 0.867067511927325, - "grad_norm": 0.39868608117103577, - "learning_rate": 8.25323708578728e-06, - "loss": 0.2922, - "step": 13267 - }, - { - "epoch": 0.8671328671328671, - "grad_norm": 0.4361017048358917, - "learning_rate": 8.252971909957058e-06, - "loss": 0.3355, - "step": 13268 - }, - { - "epoch": 0.8671982223384093, - "grad_norm": 0.4512507915496826, - "learning_rate": 8.252706718260964e-06, - "loss": 0.4073, - "step": 13269 - }, - { - "epoch": 0.8672635775439513, - "grad_norm": 0.4323147237300873, - "learning_rate": 8.252441510700294e-06, - "loss": 0.3667, - "step": 13270 - }, - { - "epoch": 0.8673289327494935, - "grad_norm": 0.43514636158943176, - "learning_rate": 8.252176287276338e-06, - "loss": 0.3669, - "step": 13271 - }, - { - "epoch": 0.8673942879550356, - "grad_norm": 0.4541299343109131, - "learning_rate": 8.251911047990393e-06, - "loss": 0.3936, - "step": 13272 - }, - { - "epoch": 0.8674596431605778, - "grad_norm": 0.477446973323822, - "learning_rate": 8.25164579284375e-06, - "loss": 0.3783, - "step": 13273 - }, - { - "epoch": 0.8675249983661198, - "grad_norm": 0.38905927538871765, - "learning_rate": 8.251380521837706e-06, - "loss": 0.3131, - "step": 13274 - }, - { - "epoch": 0.867590353571662, - "grad_norm": 0.42171740531921387, - "learning_rate": 8.25111523497355e-06, - "loss": 0.3596, - "step": 13275 - }, - { - "epoch": 0.8676557087772041, - "grad_norm": 0.46107205748558044, - "learning_rate": 8.250849932252581e-06, - "loss": 0.3548, - "step": 13276 - }, - { - "epoch": 0.8677210639827462, - "grad_norm": 0.4519311189651489, - "learning_rate": 8.25058461367609e-06, - "loss": 0.373, - "step": 13277 - }, - { - "epoch": 0.8677864191882884, - "grad_norm": 0.49622842669487, - "learning_rate": 8.250319279245373e-06, - "loss": 0.4223, - "step": 13278 - }, - { - "epoch": 0.8678517743938304, - "grad_norm": 0.4517560601234436, - "learning_rate": 8.250053928961722e-06, - "loss": 0.3535, - "step": 13279 - }, - { - "epoch": 0.8679171295993726, - "grad_norm": 0.43382585048675537, - "learning_rate": 8.249788562826431e-06, - "loss": 0.3795, - "step": 13280 - }, - { - "epoch": 0.8679824848049147, - "grad_norm": 0.43188732862472534, - "learning_rate": 8.249523180840795e-06, - "loss": 0.3755, - "step": 13281 - }, - { - "epoch": 0.8680478400104569, - "grad_norm": 0.4784072935581207, - "learning_rate": 8.249257783006111e-06, - "loss": 0.4162, - "step": 13282 - }, - { - "epoch": 0.868113195215999, - "grad_norm": 0.4593978822231293, - "learning_rate": 8.24899236932367e-06, - "loss": 0.3703, - "step": 13283 - }, - { - "epoch": 0.8681785504215411, - "grad_norm": 0.43106651306152344, - "learning_rate": 8.248726939794767e-06, - "loss": 0.3776, - "step": 13284 - }, - { - "epoch": 0.8682439056270832, - "grad_norm": 0.43448102474212646, - "learning_rate": 8.248461494420696e-06, - "loss": 0.3613, - "step": 13285 - }, - { - "epoch": 0.8683092608326253, - "grad_norm": 0.42373764514923096, - "learning_rate": 8.248196033202756e-06, - "loss": 0.3631, - "step": 13286 - }, - { - "epoch": 0.8683746160381675, - "grad_norm": 0.45077648758888245, - "learning_rate": 8.247930556142238e-06, - "loss": 0.3562, - "step": 13287 - }, - { - "epoch": 0.8684399712437095, - "grad_norm": 0.4450948238372803, - "learning_rate": 8.247665063240437e-06, - "loss": 0.4043, - "step": 13288 - }, - { - "epoch": 0.8685053264492517, - "grad_norm": 0.45834505558013916, - "learning_rate": 8.247399554498647e-06, - "loss": 0.372, - "step": 13289 - }, - { - "epoch": 0.8685706816547938, - "grad_norm": 0.44911620020866394, - "learning_rate": 8.247134029918167e-06, - "loss": 0.4059, - "step": 13290 - }, - { - "epoch": 0.868636036860336, - "grad_norm": 0.4145478308200836, - "learning_rate": 8.246868489500287e-06, - "loss": 0.3053, - "step": 13291 - }, - { - "epoch": 0.868701392065878, - "grad_norm": 0.44386187195777893, - "learning_rate": 8.246602933246306e-06, - "loss": 0.3635, - "step": 13292 - }, - { - "epoch": 0.8687667472714201, - "grad_norm": 0.42866528034210205, - "learning_rate": 8.246337361157517e-06, - "loss": 0.3544, - "step": 13293 - }, - { - "epoch": 0.8688321024769623, - "grad_norm": 0.4357426166534424, - "learning_rate": 8.246071773235217e-06, - "loss": 0.3562, - "step": 13294 - }, - { - "epoch": 0.8688974576825044, - "grad_norm": 0.462495356798172, - "learning_rate": 8.2458061694807e-06, - "loss": 0.4126, - "step": 13295 - }, - { - "epoch": 0.8689628128880466, - "grad_norm": 0.43929940462112427, - "learning_rate": 8.245540549895262e-06, - "loss": 0.351, - "step": 13296 - }, - { - "epoch": 0.8690281680935886, - "grad_norm": 0.4430977404117584, - "learning_rate": 8.245274914480196e-06, - "loss": 0.4129, - "step": 13297 - }, - { - "epoch": 0.8690935232991308, - "grad_norm": 0.49655118584632874, - "learning_rate": 8.245009263236803e-06, - "loss": 0.4958, - "step": 13298 - }, - { - "epoch": 0.8691588785046729, - "grad_norm": 0.39067956805229187, - "learning_rate": 8.244743596166374e-06, - "loss": 0.2936, - "step": 13299 - }, - { - "epoch": 0.8692242337102151, - "grad_norm": 0.41374117136001587, - "learning_rate": 8.244477913270208e-06, - "loss": 0.3257, - "step": 13300 - }, - { - "epoch": 0.8692895889157571, - "grad_norm": 0.44752123951911926, - "learning_rate": 8.244212214549598e-06, - "loss": 0.3663, - "step": 13301 - }, - { - "epoch": 0.8693549441212992, - "grad_norm": 0.41446512937545776, - "learning_rate": 8.24394650000584e-06, - "loss": 0.3404, - "step": 13302 - }, - { - "epoch": 0.8694202993268414, - "grad_norm": 0.4655797481536865, - "learning_rate": 8.243680769640234e-06, - "loss": 0.4057, - "step": 13303 - }, - { - "epoch": 0.8694856545323835, - "grad_norm": 0.4714828431606293, - "learning_rate": 8.24341502345407e-06, - "loss": 0.4201, - "step": 13304 - }, - { - "epoch": 0.8695510097379257, - "grad_norm": 0.4187307059764862, - "learning_rate": 8.24314926144865e-06, - "loss": 0.3593, - "step": 13305 - }, - { - "epoch": 0.8696163649434677, - "grad_norm": 0.4411788284778595, - "learning_rate": 8.242883483625266e-06, - "loss": 0.3757, - "step": 13306 - }, - { - "epoch": 0.8696817201490099, - "grad_norm": 0.48096975684165955, - "learning_rate": 8.242617689985217e-06, - "loss": 0.4154, - "step": 13307 - }, - { - "epoch": 0.869747075354552, - "grad_norm": 0.4491497576236725, - "learning_rate": 8.242351880529797e-06, - "loss": 0.4145, - "step": 13308 - }, - { - "epoch": 0.8698124305600942, - "grad_norm": 0.40352532267570496, - "learning_rate": 8.242086055260306e-06, - "loss": 0.3419, - "step": 13309 - }, - { - "epoch": 0.8698777857656362, - "grad_norm": 0.4654463231563568, - "learning_rate": 8.241820214178036e-06, - "loss": 0.4165, - "step": 13310 - }, - { - "epoch": 0.8699431409711783, - "grad_norm": 0.4417929947376251, - "learning_rate": 8.241554357284284e-06, - "loss": 0.3897, - "step": 13311 - }, - { - "epoch": 0.8700084961767205, - "grad_norm": 0.42508944869041443, - "learning_rate": 8.241288484580352e-06, - "loss": 0.3593, - "step": 13312 - }, - { - "epoch": 0.8700738513822626, - "grad_norm": 0.4737236797809601, - "learning_rate": 8.24102259606753e-06, - "loss": 0.4201, - "step": 13313 - }, - { - "epoch": 0.8701392065878047, - "grad_norm": 0.47437816858291626, - "learning_rate": 8.24075669174712e-06, - "loss": 0.4089, - "step": 13314 - }, - { - "epoch": 0.8702045617933468, - "grad_norm": 0.48102623224258423, - "learning_rate": 8.240490771620416e-06, - "loss": 0.3758, - "step": 13315 - }, - { - "epoch": 0.870269916998889, - "grad_norm": 0.46707481145858765, - "learning_rate": 8.240224835688716e-06, - "loss": 0.3877, - "step": 13316 - }, - { - "epoch": 0.8703352722044311, - "grad_norm": 0.4536297023296356, - "learning_rate": 8.239958883953319e-06, - "loss": 0.3981, - "step": 13317 - }, - { - "epoch": 0.8704006274099733, - "grad_norm": 0.42421531677246094, - "learning_rate": 8.239692916415518e-06, - "loss": 0.3487, - "step": 13318 - }, - { - "epoch": 0.8704659826155153, - "grad_norm": 0.43656229972839355, - "learning_rate": 8.239426933076613e-06, - "loss": 0.3673, - "step": 13319 - }, - { - "epoch": 0.8705313378210574, - "grad_norm": 0.4687826633453369, - "learning_rate": 8.2391609339379e-06, - "loss": 0.42, - "step": 13320 - }, - { - "epoch": 0.8705966930265996, - "grad_norm": 0.42849722504615784, - "learning_rate": 8.238894919000677e-06, - "loss": 0.3645, - "step": 13321 - }, - { - "epoch": 0.8706620482321417, - "grad_norm": 0.43855661153793335, - "learning_rate": 8.238628888266241e-06, - "loss": 0.3674, - "step": 13322 - }, - { - "epoch": 0.8707274034376838, - "grad_norm": 0.4502992630004883, - "learning_rate": 8.238362841735891e-06, - "loss": 0.3542, - "step": 13323 - }, - { - "epoch": 0.8707927586432259, - "grad_norm": 0.42601001262664795, - "learning_rate": 8.238096779410923e-06, - "loss": 0.3577, - "step": 13324 - }, - { - "epoch": 0.8708581138487681, - "grad_norm": 0.4329441487789154, - "learning_rate": 8.237830701292634e-06, - "loss": 0.378, - "step": 13325 - }, - { - "epoch": 0.8709234690543102, - "grad_norm": 0.45150476694107056, - "learning_rate": 8.237564607382328e-06, - "loss": 0.3538, - "step": 13326 - }, - { - "epoch": 0.8709888242598522, - "grad_norm": 0.4647296369075775, - "learning_rate": 8.237298497681292e-06, - "loss": 0.4128, - "step": 13327 - }, - { - "epoch": 0.8710541794653944, - "grad_norm": 0.42833244800567627, - "learning_rate": 8.237032372190832e-06, - "loss": 0.3441, - "step": 13328 - }, - { - "epoch": 0.8711195346709365, - "grad_norm": 0.4719427227973938, - "learning_rate": 8.236766230912243e-06, - "loss": 0.3978, - "step": 13329 - }, - { - "epoch": 0.8711848898764787, - "grad_norm": 0.40451669692993164, - "learning_rate": 8.236500073846826e-06, - "loss": 0.3538, - "step": 13330 - }, - { - "epoch": 0.8712502450820208, - "grad_norm": 0.45975998044013977, - "learning_rate": 8.236233900995874e-06, - "loss": 0.3982, - "step": 13331 - }, - { - "epoch": 0.8713156002875629, - "grad_norm": 0.45589467883110046, - "learning_rate": 8.23596771236069e-06, - "loss": 0.3863, - "step": 13332 - }, - { - "epoch": 0.871380955493105, - "grad_norm": 0.44050973653793335, - "learning_rate": 8.235701507942571e-06, - "loss": 0.3772, - "step": 13333 - }, - { - "epoch": 0.8714463106986472, - "grad_norm": 0.47511225938796997, - "learning_rate": 8.235435287742813e-06, - "loss": 0.4241, - "step": 13334 - }, - { - "epoch": 0.8715116659041893, - "grad_norm": 0.45889803767204285, - "learning_rate": 8.235169051762718e-06, - "loss": 0.381, - "step": 13335 - }, - { - "epoch": 0.8715770211097313, - "grad_norm": 0.44785118103027344, - "learning_rate": 8.234902800003581e-06, - "loss": 0.3952, - "step": 13336 - }, - { - "epoch": 0.8716423763152735, - "grad_norm": 0.4250652492046356, - "learning_rate": 8.234636532466702e-06, - "loss": 0.3512, - "step": 13337 - }, - { - "epoch": 0.8717077315208156, - "grad_norm": 0.4104115664958954, - "learning_rate": 8.234370249153381e-06, - "loss": 0.3504, - "step": 13338 - }, - { - "epoch": 0.8717730867263578, - "grad_norm": 0.4380771815776825, - "learning_rate": 8.234103950064916e-06, - "loss": 0.3525, - "step": 13339 - }, - { - "epoch": 0.8718384419318999, - "grad_norm": 0.43101492524147034, - "learning_rate": 8.233837635202604e-06, - "loss": 0.3409, - "step": 13340 - }, - { - "epoch": 0.871903797137442, - "grad_norm": 0.4336869716644287, - "learning_rate": 8.23357130456775e-06, - "loss": 0.3367, - "step": 13341 - }, - { - "epoch": 0.8719691523429841, - "grad_norm": 0.46948251128196716, - "learning_rate": 8.233304958161643e-06, - "loss": 0.3894, - "step": 13342 - }, - { - "epoch": 0.8720345075485263, - "grad_norm": 0.4291236698627472, - "learning_rate": 8.233038595985592e-06, - "loss": 0.3703, - "step": 13343 - }, - { - "epoch": 0.8720998627540684, - "grad_norm": 0.42561033368110657, - "learning_rate": 8.23277221804089e-06, - "loss": 0.3664, - "step": 13344 - }, - { - "epoch": 0.8721652179596104, - "grad_norm": 0.4196188747882843, - "learning_rate": 8.232505824328837e-06, - "loss": 0.343, - "step": 13345 - }, - { - "epoch": 0.8722305731651526, - "grad_norm": 0.4520960748195648, - "learning_rate": 8.232239414850734e-06, - "loss": 0.3751, - "step": 13346 - }, - { - "epoch": 0.8722959283706947, - "grad_norm": 0.4551304578781128, - "learning_rate": 8.23197298960788e-06, - "loss": 0.3863, - "step": 13347 - }, - { - "epoch": 0.8723612835762369, - "grad_norm": 0.4123007655143738, - "learning_rate": 8.231706548601572e-06, - "loss": 0.355, - "step": 13348 - }, - { - "epoch": 0.872426638781779, - "grad_norm": 0.43818527460098267, - "learning_rate": 8.231440091833113e-06, - "loss": 0.3584, - "step": 13349 - }, - { - "epoch": 0.8724919939873211, - "grad_norm": 0.45201465487480164, - "learning_rate": 8.231173619303802e-06, - "loss": 0.3575, - "step": 13350 - }, - { - "epoch": 0.8725573491928632, - "grad_norm": 0.42593809962272644, - "learning_rate": 8.23090713101494e-06, - "loss": 0.3527, - "step": 13351 - }, - { - "epoch": 0.8726227043984053, - "grad_norm": 0.4379211366176605, - "learning_rate": 8.230640626967821e-06, - "loss": 0.397, - "step": 13352 - }, - { - "epoch": 0.8726880596039475, - "grad_norm": 0.4190900921821594, - "learning_rate": 8.23037410716375e-06, - "loss": 0.3235, - "step": 13353 - }, - { - "epoch": 0.8727534148094895, - "grad_norm": 0.4702818691730499, - "learning_rate": 8.230107571604025e-06, - "loss": 0.4428, - "step": 13354 - }, - { - "epoch": 0.8728187700150317, - "grad_norm": 0.48189520835876465, - "learning_rate": 8.229841020289947e-06, - "loss": 0.3803, - "step": 13355 - }, - { - "epoch": 0.8728841252205738, - "grad_norm": 0.4529944360256195, - "learning_rate": 8.229574453222812e-06, - "loss": 0.354, - "step": 13356 - }, - { - "epoch": 0.872949480426116, - "grad_norm": 0.4514579772949219, - "learning_rate": 8.229307870403928e-06, - "loss": 0.3307, - "step": 13357 - }, - { - "epoch": 0.873014835631658, - "grad_norm": 0.4548887312412262, - "learning_rate": 8.229041271834588e-06, - "loss": 0.4085, - "step": 13358 - }, - { - "epoch": 0.8730801908372002, - "grad_norm": 0.4029901325702667, - "learning_rate": 8.228774657516097e-06, - "loss": 0.3146, - "step": 13359 - }, - { - "epoch": 0.8731455460427423, - "grad_norm": 0.4346332252025604, - "learning_rate": 8.228508027449752e-06, - "loss": 0.3511, - "step": 13360 - }, - { - "epoch": 0.8732109012482844, - "grad_norm": 0.4323817491531372, - "learning_rate": 8.228241381636855e-06, - "loss": 0.3758, - "step": 13361 - }, - { - "epoch": 0.8732762564538266, - "grad_norm": 0.43761152029037476, - "learning_rate": 8.227974720078708e-06, - "loss": 0.383, - "step": 13362 - }, - { - "epoch": 0.8733416116593686, - "grad_norm": 0.4516158103942871, - "learning_rate": 8.227708042776608e-06, - "loss": 0.3827, - "step": 13363 - }, - { - "epoch": 0.8734069668649108, - "grad_norm": 0.40703877806663513, - "learning_rate": 8.227441349731858e-06, - "loss": 0.3404, - "step": 13364 - }, - { - "epoch": 0.8734723220704529, - "grad_norm": 0.4412977397441864, - "learning_rate": 8.227174640945759e-06, - "loss": 0.3778, - "step": 13365 - }, - { - "epoch": 0.8735376772759951, - "grad_norm": 0.4496159553527832, - "learning_rate": 8.226907916419611e-06, - "loss": 0.3676, - "step": 13366 - }, - { - "epoch": 0.8736030324815371, - "grad_norm": 0.43719109892845154, - "learning_rate": 8.226641176154715e-06, - "loss": 0.3709, - "step": 13367 - }, - { - "epoch": 0.8736683876870793, - "grad_norm": 0.4351934492588043, - "learning_rate": 8.226374420152372e-06, - "loss": 0.3732, - "step": 13368 - }, - { - "epoch": 0.8737337428926214, - "grad_norm": 0.4426000416278839, - "learning_rate": 8.226107648413885e-06, - "loss": 0.3802, - "step": 13369 - }, - { - "epoch": 0.8737990980981635, - "grad_norm": 0.4504286050796509, - "learning_rate": 8.225840860940554e-06, - "loss": 0.4121, - "step": 13370 - }, - { - "epoch": 0.8738644533037057, - "grad_norm": 0.4409453868865967, - "learning_rate": 8.225574057733676e-06, - "loss": 0.3669, - "step": 13371 - }, - { - "epoch": 0.8739298085092477, - "grad_norm": 0.44252267479896545, - "learning_rate": 8.225307238794558e-06, - "loss": 0.3754, - "step": 13372 - }, - { - "epoch": 0.8739951637147899, - "grad_norm": 0.45822015404701233, - "learning_rate": 8.2250404041245e-06, - "loss": 0.4182, - "step": 13373 - }, - { - "epoch": 0.874060518920332, - "grad_norm": 0.4433598220348358, - "learning_rate": 8.224773553724802e-06, - "loss": 0.3604, - "step": 13374 - }, - { - "epoch": 0.8741258741258742, - "grad_norm": 0.4358460307121277, - "learning_rate": 8.224506687596764e-06, - "loss": 0.375, - "step": 13375 - }, - { - "epoch": 0.8741912293314162, - "grad_norm": 0.4563209116458893, - "learning_rate": 8.224239805741692e-06, - "loss": 0.4319, - "step": 13376 - }, - { - "epoch": 0.8742565845369583, - "grad_norm": 0.4441712498664856, - "learning_rate": 8.223972908160884e-06, - "loss": 0.3805, - "step": 13377 - }, - { - "epoch": 0.8743219397425005, - "grad_norm": 0.4398649036884308, - "learning_rate": 8.223705994855646e-06, - "loss": 0.3994, - "step": 13378 - }, - { - "epoch": 0.8743872949480426, - "grad_norm": 0.4517597258090973, - "learning_rate": 8.223439065827274e-06, - "loss": 0.4171, - "step": 13379 - }, - { - "epoch": 0.8744526501535848, - "grad_norm": 0.42800846695899963, - "learning_rate": 8.223172121077074e-06, - "loss": 0.3614, - "step": 13380 - }, - { - "epoch": 0.8745180053591268, - "grad_norm": 0.4026123881340027, - "learning_rate": 8.22290516060635e-06, - "loss": 0.3238, - "step": 13381 - }, - { - "epoch": 0.874583360564669, - "grad_norm": 0.3890553116798401, - "learning_rate": 8.222638184416397e-06, - "loss": 0.2857, - "step": 13382 - }, - { - "epoch": 0.8746487157702111, - "grad_norm": 0.4353051781654358, - "learning_rate": 8.222371192508522e-06, - "loss": 0.3355, - "step": 13383 - }, - { - "epoch": 0.8747140709757533, - "grad_norm": 0.43043285608291626, - "learning_rate": 8.222104184884026e-06, - "loss": 0.36, - "step": 13384 - }, - { - "epoch": 0.8747794261812953, - "grad_norm": 0.42799270153045654, - "learning_rate": 8.221837161544212e-06, - "loss": 0.373, - "step": 13385 - }, - { - "epoch": 0.8748447813868374, - "grad_norm": 0.4003741443157196, - "learning_rate": 8.221570122490384e-06, - "loss": 0.3133, - "step": 13386 - }, - { - "epoch": 0.8749101365923796, - "grad_norm": 0.4339466989040375, - "learning_rate": 8.22130306772384e-06, - "loss": 0.3597, - "step": 13387 - }, - { - "epoch": 0.8749754917979217, - "grad_norm": 0.45489901304244995, - "learning_rate": 8.221035997245886e-06, - "loss": 0.412, - "step": 13388 - }, - { - "epoch": 0.8750408470034639, - "grad_norm": 0.42823946475982666, - "learning_rate": 8.220768911057823e-06, - "loss": 0.3535, - "step": 13389 - }, - { - "epoch": 0.8751062022090059, - "grad_norm": 0.4632386863231659, - "learning_rate": 8.220501809160955e-06, - "loss": 0.3883, - "step": 13390 - }, - { - "epoch": 0.8751715574145481, - "grad_norm": 0.39702773094177246, - "learning_rate": 8.220234691556583e-06, - "loss": 0.309, - "step": 13391 - }, - { - "epoch": 0.8752369126200902, - "grad_norm": 0.4396913945674896, - "learning_rate": 8.219967558246013e-06, - "loss": 0.3775, - "step": 13392 - }, - { - "epoch": 0.8753022678256324, - "grad_norm": 0.4301755428314209, - "learning_rate": 8.219700409230545e-06, - "loss": 0.3493, - "step": 13393 - }, - { - "epoch": 0.8753676230311744, - "grad_norm": 0.49475932121276855, - "learning_rate": 8.219433244511481e-06, - "loss": 0.4561, - "step": 13394 - }, - { - "epoch": 0.8754329782367165, - "grad_norm": 0.4847348928451538, - "learning_rate": 8.219166064090127e-06, - "loss": 0.423, - "step": 13395 - }, - { - "epoch": 0.8754983334422587, - "grad_norm": 0.45300471782684326, - "learning_rate": 8.218898867967785e-06, - "loss": 0.3565, - "step": 13396 - }, - { - "epoch": 0.8755636886478008, - "grad_norm": 0.4917092025279999, - "learning_rate": 8.218631656145757e-06, - "loss": 0.4421, - "step": 13397 - }, - { - "epoch": 0.875629043853343, - "grad_norm": 0.4750673174858093, - "learning_rate": 8.218364428625347e-06, - "loss": 0.3823, - "step": 13398 - }, - { - "epoch": 0.875694399058885, - "grad_norm": 0.4574526846408844, - "learning_rate": 8.21809718540786e-06, - "loss": 0.411, - "step": 13399 - }, - { - "epoch": 0.8757597542644272, - "grad_norm": 0.43643367290496826, - "learning_rate": 8.217829926494598e-06, - "loss": 0.3821, - "step": 13400 - }, - { - "epoch": 0.8758251094699693, - "grad_norm": 0.4311563968658447, - "learning_rate": 8.217562651886866e-06, - "loss": 0.326, - "step": 13401 - }, - { - "epoch": 0.8758904646755115, - "grad_norm": 0.4255685806274414, - "learning_rate": 8.217295361585964e-06, - "loss": 0.3526, - "step": 13402 - }, - { - "epoch": 0.8759558198810535, - "grad_norm": 0.4884625971317291, - "learning_rate": 8.2170280555932e-06, - "loss": 0.3865, - "step": 13403 - }, - { - "epoch": 0.8760211750865956, - "grad_norm": 0.4539845585823059, - "learning_rate": 8.216760733909874e-06, - "loss": 0.3797, - "step": 13404 - }, - { - "epoch": 0.8760865302921378, - "grad_norm": 0.44040000438690186, - "learning_rate": 8.216493396537291e-06, - "loss": 0.3672, - "step": 13405 - }, - { - "epoch": 0.8761518854976799, - "grad_norm": 0.43532800674438477, - "learning_rate": 8.216226043476757e-06, - "loss": 0.3606, - "step": 13406 - }, - { - "epoch": 0.876217240703222, - "grad_norm": 0.46042540669441223, - "learning_rate": 8.215958674729572e-06, - "loss": 0.4519, - "step": 13407 - }, - { - "epoch": 0.8762825959087641, - "grad_norm": 0.4292353391647339, - "learning_rate": 8.215691290297045e-06, - "loss": 0.3696, - "step": 13408 - }, - { - "epoch": 0.8763479511143063, - "grad_norm": 0.4638332426548004, - "learning_rate": 8.215423890180478e-06, - "loss": 0.4069, - "step": 13409 - }, - { - "epoch": 0.8764133063198484, - "grad_norm": 0.4396990239620209, - "learning_rate": 8.215156474381173e-06, - "loss": 0.4132, - "step": 13410 - }, - { - "epoch": 0.8764786615253904, - "grad_norm": 0.4405803978443146, - "learning_rate": 8.214889042900436e-06, - "loss": 0.3993, - "step": 13411 - }, - { - "epoch": 0.8765440167309326, - "grad_norm": 0.4370073676109314, - "learning_rate": 8.214621595739571e-06, - "loss": 0.3423, - "step": 13412 - }, - { - "epoch": 0.8766093719364747, - "grad_norm": 0.47184428572654724, - "learning_rate": 8.214354132899884e-06, - "loss": 0.4272, - "step": 13413 - }, - { - "epoch": 0.8766747271420169, - "grad_norm": 0.4257681667804718, - "learning_rate": 8.214086654382681e-06, - "loss": 0.3701, - "step": 13414 - }, - { - "epoch": 0.876740082347559, - "grad_norm": 0.4332144558429718, - "learning_rate": 8.213819160189261e-06, - "loss": 0.3746, - "step": 13415 - }, - { - "epoch": 0.8768054375531011, - "grad_norm": 0.4621717929840088, - "learning_rate": 8.21355165032093e-06, - "loss": 0.3906, - "step": 13416 - }, - { - "epoch": 0.8768707927586432, - "grad_norm": 0.40356600284576416, - "learning_rate": 8.213284124778994e-06, - "loss": 0.3285, - "step": 13417 - }, - { - "epoch": 0.8769361479641854, - "grad_norm": 0.43172144889831543, - "learning_rate": 8.213016583564761e-06, - "loss": 0.381, - "step": 13418 - }, - { - "epoch": 0.8770015031697275, - "grad_norm": 0.4825325906276703, - "learning_rate": 8.212749026679533e-06, - "loss": 0.4114, - "step": 13419 - }, - { - "epoch": 0.8770668583752695, - "grad_norm": 0.45453810691833496, - "learning_rate": 8.212481454124615e-06, - "loss": 0.3914, - "step": 13420 - }, - { - "epoch": 0.8771322135808117, - "grad_norm": 0.4427175521850586, - "learning_rate": 8.21221386590131e-06, - "loss": 0.3682, - "step": 13421 - }, - { - "epoch": 0.8771975687863538, - "grad_norm": 0.4396561086177826, - "learning_rate": 8.211946262010925e-06, - "loss": 0.4073, - "step": 13422 - }, - { - "epoch": 0.877262923991896, - "grad_norm": 0.4252471625804901, - "learning_rate": 8.211678642454768e-06, - "loss": 0.3584, - "step": 13423 - }, - { - "epoch": 0.877328279197438, - "grad_norm": 0.45310062170028687, - "learning_rate": 8.21141100723414e-06, - "loss": 0.3729, - "step": 13424 - }, - { - "epoch": 0.8773936344029802, - "grad_norm": 0.44264861941337585, - "learning_rate": 8.211143356350348e-06, - "loss": 0.3864, - "step": 13425 - }, - { - "epoch": 0.8774589896085223, - "grad_norm": 0.41715845465660095, - "learning_rate": 8.210875689804699e-06, - "loss": 0.3247, - "step": 13426 - }, - { - "epoch": 0.8775243448140645, - "grad_norm": 0.4420039653778076, - "learning_rate": 8.210608007598495e-06, - "loss": 0.363, - "step": 13427 - }, - { - "epoch": 0.8775897000196066, - "grad_norm": 0.45903250575065613, - "learning_rate": 8.210340309733043e-06, - "loss": 0.3951, - "step": 13428 - }, - { - "epoch": 0.8776550552251486, - "grad_norm": 0.4627886116504669, - "learning_rate": 8.21007259620965e-06, - "loss": 0.4165, - "step": 13429 - }, - { - "epoch": 0.8777204104306908, - "grad_norm": 0.428143709897995, - "learning_rate": 8.209804867029623e-06, - "loss": 0.3477, - "step": 13430 - }, - { - "epoch": 0.8777857656362329, - "grad_norm": 0.4345650374889374, - "learning_rate": 8.209537122194262e-06, - "loss": 0.385, - "step": 13431 - }, - { - "epoch": 0.8778511208417751, - "grad_norm": 0.44252845644950867, - "learning_rate": 8.20926936170488e-06, - "loss": 0.3801, - "step": 13432 - }, - { - "epoch": 0.8779164760473172, - "grad_norm": 0.43946585059165955, - "learning_rate": 8.209001585562777e-06, - "loss": 0.3596, - "step": 13433 - }, - { - "epoch": 0.8779818312528593, - "grad_norm": 0.4799671471118927, - "learning_rate": 8.208733793769261e-06, - "loss": 0.4373, - "step": 13434 - }, - { - "epoch": 0.8780471864584014, - "grad_norm": 0.4021196663379669, - "learning_rate": 8.208465986325642e-06, - "loss": 0.3167, - "step": 13435 - }, - { - "epoch": 0.8781125416639435, - "grad_norm": 0.44872426986694336, - "learning_rate": 8.20819816323322e-06, - "loss": 0.3781, - "step": 13436 - }, - { - "epoch": 0.8781778968694857, - "grad_norm": 0.43821343779563904, - "learning_rate": 8.207930324493304e-06, - "loss": 0.3715, - "step": 13437 - }, - { - "epoch": 0.8782432520750277, - "grad_norm": 0.4665985107421875, - "learning_rate": 8.207662470107203e-06, - "loss": 0.4173, - "step": 13438 - }, - { - "epoch": 0.8783086072805699, - "grad_norm": 0.42889121174812317, - "learning_rate": 8.20739460007622e-06, - "loss": 0.3498, - "step": 13439 - }, - { - "epoch": 0.878373962486112, - "grad_norm": 0.48825544118881226, - "learning_rate": 8.207126714401661e-06, - "loss": 0.4491, - "step": 13440 - }, - { - "epoch": 0.8784393176916542, - "grad_norm": 0.443212628364563, - "learning_rate": 8.206858813084835e-06, - "loss": 0.39, - "step": 13441 - }, - { - "epoch": 0.8785046728971962, - "grad_norm": 0.417248010635376, - "learning_rate": 8.206590896127048e-06, - "loss": 0.3144, - "step": 13442 - }, - { - "epoch": 0.8785700281027384, - "grad_norm": 0.41721847653388977, - "learning_rate": 8.206322963529605e-06, - "loss": 0.3571, - "step": 13443 - }, - { - "epoch": 0.8786353833082805, - "grad_norm": 0.47829684615135193, - "learning_rate": 8.206055015293815e-06, - "loss": 0.4379, - "step": 13444 - }, - { - "epoch": 0.8787007385138226, - "grad_norm": 0.42701518535614014, - "learning_rate": 8.205787051420983e-06, - "loss": 0.3682, - "step": 13445 - }, - { - "epoch": 0.8787660937193648, - "grad_norm": 0.5073907375335693, - "learning_rate": 8.205519071912418e-06, - "loss": 0.372, - "step": 13446 - }, - { - "epoch": 0.8788314489249068, - "grad_norm": 0.44476792216300964, - "learning_rate": 8.205251076769427e-06, - "loss": 0.4086, - "step": 13447 - }, - { - "epoch": 0.878896804130449, - "grad_norm": 0.4226808547973633, - "learning_rate": 8.204983065993315e-06, - "loss": 0.3642, - "step": 13448 - }, - { - "epoch": 0.8789621593359911, - "grad_norm": 0.44482994079589844, - "learning_rate": 8.204715039585389e-06, - "loss": 0.3877, - "step": 13449 - }, - { - "epoch": 0.8790275145415333, - "grad_norm": 0.4508343040943146, - "learning_rate": 8.20444699754696e-06, - "loss": 0.3891, - "step": 13450 - }, - { - "epoch": 0.8790928697470753, - "grad_norm": 0.4405125081539154, - "learning_rate": 8.20417893987933e-06, - "loss": 0.3835, - "step": 13451 - }, - { - "epoch": 0.8791582249526175, - "grad_norm": 0.41821563243865967, - "learning_rate": 8.203910866583811e-06, - "loss": 0.3321, - "step": 13452 - }, - { - "epoch": 0.8792235801581596, - "grad_norm": 0.43777090311050415, - "learning_rate": 8.203642777661708e-06, - "loss": 0.3788, - "step": 13453 - }, - { - "epoch": 0.8792889353637017, - "grad_norm": 0.4499454200267792, - "learning_rate": 8.20337467311433e-06, - "loss": 0.3819, - "step": 13454 - }, - { - "epoch": 0.8793542905692439, - "grad_norm": 0.44110482931137085, - "learning_rate": 8.203106552942985e-06, - "loss": 0.3551, - "step": 13455 - }, - { - "epoch": 0.8794196457747859, - "grad_norm": 0.4749086797237396, - "learning_rate": 8.202838417148979e-06, - "loss": 0.4466, - "step": 13456 - }, - { - "epoch": 0.8794850009803281, - "grad_norm": 0.4303574860095978, - "learning_rate": 8.202570265733619e-06, - "loss": 0.3556, - "step": 13457 - }, - { - "epoch": 0.8795503561858702, - "grad_norm": 0.46187305450439453, - "learning_rate": 8.202302098698215e-06, - "loss": 0.4206, - "step": 13458 - }, - { - "epoch": 0.8796157113914124, - "grad_norm": 0.4228116571903229, - "learning_rate": 8.202033916044076e-06, - "loss": 0.3433, - "step": 13459 - }, - { - "epoch": 0.8796810665969544, - "grad_norm": 0.49848175048828125, - "learning_rate": 8.201765717772507e-06, - "loss": 0.3984, - "step": 13460 - }, - { - "epoch": 0.8797464218024965, - "grad_norm": 0.4178866147994995, - "learning_rate": 8.201497503884816e-06, - "loss": 0.3018, - "step": 13461 - }, - { - "epoch": 0.8798117770080387, - "grad_norm": 0.46181097626686096, - "learning_rate": 8.201229274382315e-06, - "loss": 0.4107, - "step": 13462 - }, - { - "epoch": 0.8798771322135808, - "grad_norm": 0.44718632102012634, - "learning_rate": 8.200961029266308e-06, - "loss": 0.366, - "step": 13463 - }, - { - "epoch": 0.879942487419123, - "grad_norm": 0.43121084570884705, - "learning_rate": 8.200692768538105e-06, - "loss": 0.355, - "step": 13464 - }, - { - "epoch": 0.880007842624665, - "grad_norm": 0.5150291919708252, - "learning_rate": 8.200424492199017e-06, - "loss": 0.4489, - "step": 13465 - }, - { - "epoch": 0.8800731978302072, - "grad_norm": 0.4343971610069275, - "learning_rate": 8.200156200250348e-06, - "loss": 0.3467, - "step": 13466 - }, - { - "epoch": 0.8801385530357493, - "grad_norm": 0.46228814125061035, - "learning_rate": 8.199887892693409e-06, - "loss": 0.4217, - "step": 13467 - }, - { - "epoch": 0.8802039082412915, - "grad_norm": 0.45131438970565796, - "learning_rate": 8.19961956952951e-06, - "loss": 0.394, - "step": 13468 - }, - { - "epoch": 0.8802692634468335, - "grad_norm": 0.48295700550079346, - "learning_rate": 8.199351230759955e-06, - "loss": 0.4558, - "step": 13469 - }, - { - "epoch": 0.8803346186523756, - "grad_norm": 0.436484158039093, - "learning_rate": 8.199082876386056e-06, - "loss": 0.3622, - "step": 13470 - }, - { - "epoch": 0.8803999738579178, - "grad_norm": 0.39471927285194397, - "learning_rate": 8.198814506409122e-06, - "loss": 0.3218, - "step": 13471 - }, - { - "epoch": 0.8804653290634599, - "grad_norm": 0.4155243933200836, - "learning_rate": 8.198546120830462e-06, - "loss": 0.366, - "step": 13472 - }, - { - "epoch": 0.880530684269002, - "grad_norm": 0.4520508944988251, - "learning_rate": 8.198277719651384e-06, - "loss": 0.3685, - "step": 13473 - }, - { - "epoch": 0.8805960394745441, - "grad_norm": 0.4244966506958008, - "learning_rate": 8.198009302873198e-06, - "loss": 0.3691, - "step": 13474 - }, - { - "epoch": 0.8806613946800863, - "grad_norm": 0.4450370967388153, - "learning_rate": 8.197740870497212e-06, - "loss": 0.4094, - "step": 13475 - }, - { - "epoch": 0.8807267498856284, - "grad_norm": 0.4767009913921356, - "learning_rate": 8.197472422524738e-06, - "loss": 0.4617, - "step": 13476 - }, - { - "epoch": 0.8807921050911706, - "grad_norm": 0.45346152782440186, - "learning_rate": 8.197203958957082e-06, - "loss": 0.3877, - "step": 13477 - }, - { - "epoch": 0.8808574602967126, - "grad_norm": 0.4302191138267517, - "learning_rate": 8.196935479795555e-06, - "loss": 0.3602, - "step": 13478 - }, - { - "epoch": 0.8809228155022547, - "grad_norm": 0.43985727429389954, - "learning_rate": 8.196666985041465e-06, - "loss": 0.375, - "step": 13479 - }, - { - "epoch": 0.8809881707077969, - "grad_norm": 0.4335164725780487, - "learning_rate": 8.196398474696123e-06, - "loss": 0.3597, - "step": 13480 - }, - { - "epoch": 0.881053525913339, - "grad_norm": 0.4480811059474945, - "learning_rate": 8.196129948760839e-06, - "loss": 0.396, - "step": 13481 - }, - { - "epoch": 0.8811188811188811, - "grad_norm": 0.43360355496406555, - "learning_rate": 8.195861407236921e-06, - "loss": 0.3488, - "step": 13482 - }, - { - "epoch": 0.8811842363244232, - "grad_norm": 0.42258724570274353, - "learning_rate": 8.195592850125681e-06, - "loss": 0.3353, - "step": 13483 - }, - { - "epoch": 0.8812495915299654, - "grad_norm": 0.43493205308914185, - "learning_rate": 8.195324277428427e-06, - "loss": 0.3353, - "step": 13484 - }, - { - "epoch": 0.8813149467355075, - "grad_norm": 0.4324595332145691, - "learning_rate": 8.195055689146469e-06, - "loss": 0.3577, - "step": 13485 - }, - { - "epoch": 0.8813803019410497, - "grad_norm": 0.4718596935272217, - "learning_rate": 8.194787085281118e-06, - "loss": 0.3921, - "step": 13486 - }, - { - "epoch": 0.8814456571465917, - "grad_norm": 0.41776734590530396, - "learning_rate": 8.194518465833684e-06, - "loss": 0.3356, - "step": 13487 - }, - { - "epoch": 0.8815110123521338, - "grad_norm": 0.4012083411216736, - "learning_rate": 8.194249830805476e-06, - "loss": 0.3277, - "step": 13488 - }, - { - "epoch": 0.881576367557676, - "grad_norm": 0.45053204894065857, - "learning_rate": 8.193981180197806e-06, - "loss": 0.3435, - "step": 13489 - }, - { - "epoch": 0.8816417227632181, - "grad_norm": 0.43780970573425293, - "learning_rate": 8.193712514011982e-06, - "loss": 0.3802, - "step": 13490 - }, - { - "epoch": 0.8817070779687602, - "grad_norm": 0.445178359746933, - "learning_rate": 8.193443832249316e-06, - "loss": 0.3613, - "step": 13491 - }, - { - "epoch": 0.8817724331743023, - "grad_norm": 0.4471881687641144, - "learning_rate": 8.19317513491112e-06, - "loss": 0.3854, - "step": 13492 - }, - { - "epoch": 0.8818377883798445, - "grad_norm": 0.41607406735420227, - "learning_rate": 8.192906421998701e-06, - "loss": 0.3689, - "step": 13493 - }, - { - "epoch": 0.8819031435853866, - "grad_norm": 0.4103538990020752, - "learning_rate": 8.192637693513372e-06, - "loss": 0.3649, - "step": 13494 - }, - { - "epoch": 0.8819684987909286, - "grad_norm": 0.47198987007141113, - "learning_rate": 8.192368949456441e-06, - "loss": 0.4608, - "step": 13495 - }, - { - "epoch": 0.8820338539964708, - "grad_norm": 0.4210141897201538, - "learning_rate": 8.192100189829222e-06, - "loss": 0.3392, - "step": 13496 - }, - { - "epoch": 0.8820992092020129, - "grad_norm": 0.47057467699050903, - "learning_rate": 8.191831414633024e-06, - "loss": 0.4058, - "step": 13497 - }, - { - "epoch": 0.8821645644075551, - "grad_norm": 0.4252711832523346, - "learning_rate": 8.19156262386916e-06, - "loss": 0.3404, - "step": 13498 - }, - { - "epoch": 0.8822299196130972, - "grad_norm": 0.4368647038936615, - "learning_rate": 8.19129381753894e-06, - "loss": 0.3897, - "step": 13499 - }, - { - "epoch": 0.8822952748186393, - "grad_norm": 0.4264031946659088, - "learning_rate": 8.191024995643672e-06, - "loss": 0.3994, - "step": 13500 - }, - { - "epoch": 0.8823606300241814, - "grad_norm": 0.4451930820941925, - "learning_rate": 8.19075615818467e-06, - "loss": 0.3871, - "step": 13501 - }, - { - "epoch": 0.8824259852297236, - "grad_norm": 0.435319721698761, - "learning_rate": 8.190487305163245e-06, - "loss": 0.3474, - "step": 13502 - }, - { - "epoch": 0.8824913404352657, - "grad_norm": 0.4761326014995575, - "learning_rate": 8.190218436580707e-06, - "loss": 0.3979, - "step": 13503 - }, - { - "epoch": 0.8825566956408077, - "grad_norm": 0.4505530595779419, - "learning_rate": 8.189949552438373e-06, - "loss": 0.3187, - "step": 13504 - }, - { - "epoch": 0.8826220508463499, - "grad_norm": 0.4660145342350006, - "learning_rate": 8.189680652737546e-06, - "loss": 0.4287, - "step": 13505 - }, - { - "epoch": 0.882687406051892, - "grad_norm": 0.41383031010627747, - "learning_rate": 8.189411737479542e-06, - "loss": 0.3507, - "step": 13506 - }, - { - "epoch": 0.8827527612574342, - "grad_norm": 0.46919748187065125, - "learning_rate": 8.189142806665672e-06, - "loss": 0.4421, - "step": 13507 - }, - { - "epoch": 0.8828181164629763, - "grad_norm": 0.4191434383392334, - "learning_rate": 8.188873860297248e-06, - "loss": 0.3416, - "step": 13508 - }, - { - "epoch": 0.8828834716685184, - "grad_norm": 0.4576224088668823, - "learning_rate": 8.18860489837558e-06, - "loss": 0.4024, - "step": 13509 - }, - { - "epoch": 0.8829488268740605, - "grad_norm": 0.4578748047351837, - "learning_rate": 8.188335920901984e-06, - "loss": 0.4308, - "step": 13510 - }, - { - "epoch": 0.8830141820796027, - "grad_norm": 0.4221654236316681, - "learning_rate": 8.188066927877769e-06, - "loss": 0.3446, - "step": 13511 - }, - { - "epoch": 0.8830795372851448, - "grad_norm": 0.43121111392974854, - "learning_rate": 8.187797919304246e-06, - "loss": 0.3931, - "step": 13512 - }, - { - "epoch": 0.8831448924906868, - "grad_norm": 0.49072685837745667, - "learning_rate": 8.187528895182727e-06, - "loss": 0.4501, - "step": 13513 - }, - { - "epoch": 0.883210247696229, - "grad_norm": 0.46443790197372437, - "learning_rate": 8.187259855514527e-06, - "loss": 0.414, - "step": 13514 - }, - { - "epoch": 0.8832756029017711, - "grad_norm": 0.44713345170021057, - "learning_rate": 8.186990800300956e-06, - "loss": 0.3852, - "step": 13515 - }, - { - "epoch": 0.8833409581073133, - "grad_norm": 0.4597233533859253, - "learning_rate": 8.186721729543326e-06, - "loss": 0.3726, - "step": 13516 - }, - { - "epoch": 0.8834063133128554, - "grad_norm": 0.42686402797698975, - "learning_rate": 8.186452643242952e-06, - "loss": 0.3824, - "step": 13517 - }, - { - "epoch": 0.8834716685183975, - "grad_norm": 0.4627307057380676, - "learning_rate": 8.186183541401142e-06, - "loss": 0.3968, - "step": 13518 - }, - { - "epoch": 0.8835370237239396, - "grad_norm": 0.45088204741477966, - "learning_rate": 8.185914424019214e-06, - "loss": 0.3726, - "step": 13519 - }, - { - "epoch": 0.8836023789294817, - "grad_norm": 0.5059247612953186, - "learning_rate": 8.185645291098476e-06, - "loss": 0.4832, - "step": 13520 - }, - { - "epoch": 0.8836677341350239, - "grad_norm": 0.4470418691635132, - "learning_rate": 8.185376142640244e-06, - "loss": 0.3688, - "step": 13521 - }, - { - "epoch": 0.8837330893405659, - "grad_norm": 0.4880659580230713, - "learning_rate": 8.185106978645827e-06, - "loss": 0.4504, - "step": 13522 - }, - { - "epoch": 0.8837984445461081, - "grad_norm": 0.5302170515060425, - "learning_rate": 8.18483779911654e-06, - "loss": 0.4002, - "step": 13523 - }, - { - "epoch": 0.8838637997516502, - "grad_norm": 0.5098953247070312, - "learning_rate": 8.184568604053696e-06, - "loss": 0.4668, - "step": 13524 - }, - { - "epoch": 0.8839291549571924, - "grad_norm": 0.49490153789520264, - "learning_rate": 8.184299393458608e-06, - "loss": 0.3942, - "step": 13525 - }, - { - "epoch": 0.8839945101627344, - "grad_norm": 0.4526436924934387, - "learning_rate": 8.184030167332589e-06, - "loss": 0.3904, - "step": 13526 - }, - { - "epoch": 0.8840598653682766, - "grad_norm": 0.4353998303413391, - "learning_rate": 8.183760925676951e-06, - "loss": 0.36, - "step": 13527 - }, - { - "epoch": 0.8841252205738187, - "grad_norm": 0.4766745865345001, - "learning_rate": 8.183491668493009e-06, - "loss": 0.4335, - "step": 13528 - }, - { - "epoch": 0.8841905757793608, - "grad_norm": 0.45100441575050354, - "learning_rate": 8.183222395782074e-06, - "loss": 0.366, - "step": 13529 - }, - { - "epoch": 0.884255930984903, - "grad_norm": 0.4555814266204834, - "learning_rate": 8.182953107545462e-06, - "loss": 0.353, - "step": 13530 - }, - { - "epoch": 0.884321286190445, - "grad_norm": 0.4688302278518677, - "learning_rate": 8.182683803784484e-06, - "loss": 0.4396, - "step": 13531 - }, - { - "epoch": 0.8843866413959872, - "grad_norm": 0.7728043794631958, - "learning_rate": 8.182414484500454e-06, - "loss": 0.36, - "step": 13532 - }, - { - "epoch": 0.8844519966015293, - "grad_norm": 0.41783252358436584, - "learning_rate": 8.182145149694687e-06, - "loss": 0.3638, - "step": 13533 - }, - { - "epoch": 0.8845173518070715, - "grad_norm": 0.4418119788169861, - "learning_rate": 8.181875799368496e-06, - "loss": 0.3429, - "step": 13534 - }, - { - "epoch": 0.8845827070126135, - "grad_norm": 0.42237022519111633, - "learning_rate": 8.181606433523193e-06, - "loss": 0.3164, - "step": 13535 - }, - { - "epoch": 0.8846480622181557, - "grad_norm": 0.466979444026947, - "learning_rate": 8.181337052160094e-06, - "loss": 0.3914, - "step": 13536 - }, - { - "epoch": 0.8847134174236978, - "grad_norm": 0.49458831548690796, - "learning_rate": 8.181067655280512e-06, - "loss": 0.4054, - "step": 13537 - }, - { - "epoch": 0.8847787726292399, - "grad_norm": 0.44746413826942444, - "learning_rate": 8.180798242885762e-06, - "loss": 0.4021, - "step": 13538 - }, - { - "epoch": 0.8848441278347821, - "grad_norm": 0.44023093581199646, - "learning_rate": 8.180528814977157e-06, - "loss": 0.3858, - "step": 13539 - }, - { - "epoch": 0.8849094830403241, - "grad_norm": 0.41607964038848877, - "learning_rate": 8.180259371556011e-06, - "loss": 0.3404, - "step": 13540 - }, - { - "epoch": 0.8849748382458663, - "grad_norm": 0.44271618127822876, - "learning_rate": 8.179989912623638e-06, - "loss": 0.3684, - "step": 13541 - }, - { - "epoch": 0.8850401934514084, - "grad_norm": 0.43409037590026855, - "learning_rate": 8.179720438181352e-06, - "loss": 0.3791, - "step": 13542 - }, - { - "epoch": 0.8851055486569506, - "grad_norm": 0.4831492602825165, - "learning_rate": 8.179450948230467e-06, - "loss": 0.3961, - "step": 13543 - }, - { - "epoch": 0.8851709038624926, - "grad_norm": 0.4891977310180664, - "learning_rate": 8.1791814427723e-06, - "loss": 0.4174, - "step": 13544 - }, - { - "epoch": 0.8852362590680347, - "grad_norm": 0.41980496048927307, - "learning_rate": 8.178911921808164e-06, - "loss": 0.3158, - "step": 13545 - }, - { - "epoch": 0.8853016142735769, - "grad_norm": 0.46518459916114807, - "learning_rate": 8.178642385339372e-06, - "loss": 0.3933, - "step": 13546 - }, - { - "epoch": 0.885366969479119, - "grad_norm": 0.43595242500305176, - "learning_rate": 8.178372833367239e-06, - "loss": 0.4049, - "step": 13547 - }, - { - "epoch": 0.8854323246846612, - "grad_norm": 0.4373858571052551, - "learning_rate": 8.178103265893082e-06, - "loss": 0.3689, - "step": 13548 - }, - { - "epoch": 0.8854976798902032, - "grad_norm": 0.41803237795829773, - "learning_rate": 8.177833682918215e-06, - "loss": 0.3161, - "step": 13549 - }, - { - "epoch": 0.8855630350957454, - "grad_norm": 0.4319811165332794, - "learning_rate": 8.177564084443951e-06, - "loss": 0.372, - "step": 13550 - }, - { - "epoch": 0.8856283903012875, - "grad_norm": 0.45131462812423706, - "learning_rate": 8.177294470471607e-06, - "loss": 0.4278, - "step": 13551 - }, - { - "epoch": 0.8856937455068297, - "grad_norm": 0.4333389401435852, - "learning_rate": 8.177024841002497e-06, - "loss": 0.3837, - "step": 13552 - }, - { - "epoch": 0.8857591007123717, - "grad_norm": 0.4520132541656494, - "learning_rate": 8.176755196037935e-06, - "loss": 0.3613, - "step": 13553 - }, - { - "epoch": 0.8858244559179138, - "grad_norm": 0.5520606637001038, - "learning_rate": 8.17648553557924e-06, - "loss": 0.3561, - "step": 13554 - }, - { - "epoch": 0.885889811123456, - "grad_norm": 0.4240726828575134, - "learning_rate": 8.176215859627722e-06, - "loss": 0.3279, - "step": 13555 - }, - { - "epoch": 0.8859551663289981, - "grad_norm": 0.44275182485580444, - "learning_rate": 8.1759461681847e-06, - "loss": 0.3705, - "step": 13556 - }, - { - "epoch": 0.8860205215345403, - "grad_norm": 0.4383064806461334, - "learning_rate": 8.175676461251488e-06, - "loss": 0.3734, - "step": 13557 - }, - { - "epoch": 0.8860858767400823, - "grad_norm": 0.4564521312713623, - "learning_rate": 8.175406738829402e-06, - "loss": 0.3925, - "step": 13558 - }, - { - "epoch": 0.8861512319456245, - "grad_norm": 0.45131510496139526, - "learning_rate": 8.175137000919758e-06, - "loss": 0.4209, - "step": 13559 - }, - { - "epoch": 0.8862165871511666, - "grad_norm": 0.43068280816078186, - "learning_rate": 8.17486724752387e-06, - "loss": 0.3589, - "step": 13560 - }, - { - "epoch": 0.8862819423567088, - "grad_norm": 0.4380665123462677, - "learning_rate": 8.174597478643055e-06, - "loss": 0.3863, - "step": 13561 - }, - { - "epoch": 0.8863472975622508, - "grad_norm": 0.44074466824531555, - "learning_rate": 8.174327694278627e-06, - "loss": 0.3839, - "step": 13562 - }, - { - "epoch": 0.8864126527677929, - "grad_norm": 0.43280836939811707, - "learning_rate": 8.174057894431904e-06, - "loss": 0.3674, - "step": 13563 - }, - { - "epoch": 0.8864780079733351, - "grad_norm": 0.4463036060333252, - "learning_rate": 8.173788079104202e-06, - "loss": 0.3661, - "step": 13564 - }, - { - "epoch": 0.8865433631788772, - "grad_norm": 0.43831667304039, - "learning_rate": 8.173518248296834e-06, - "loss": 0.3903, - "step": 13565 - }, - { - "epoch": 0.8866087183844193, - "grad_norm": 0.4397267699241638, - "learning_rate": 8.17324840201112e-06, - "loss": 0.3919, - "step": 13566 - }, - { - "epoch": 0.8866740735899614, - "grad_norm": 0.4563021659851074, - "learning_rate": 8.172978540248374e-06, - "loss": 0.401, - "step": 13567 - }, - { - "epoch": 0.8867394287955036, - "grad_norm": 0.42863720655441284, - "learning_rate": 8.17270866300991e-06, - "loss": 0.3793, - "step": 13568 - }, - { - "epoch": 0.8868047840010457, - "grad_norm": 0.40967488288879395, - "learning_rate": 8.17243877029705e-06, - "loss": 0.3378, - "step": 13569 - }, - { - "epoch": 0.8868701392065879, - "grad_norm": 0.46297669410705566, - "learning_rate": 8.172168862111105e-06, - "loss": 0.4137, - "step": 13570 - }, - { - "epoch": 0.8869354944121299, - "grad_norm": 0.46139878034591675, - "learning_rate": 8.171898938453395e-06, - "loss": 0.4214, - "step": 13571 - }, - { - "epoch": 0.887000849617672, - "grad_norm": 0.5869432091712952, - "learning_rate": 8.171628999325234e-06, - "loss": 0.3508, - "step": 13572 - }, - { - "epoch": 0.8870662048232142, - "grad_norm": 0.44121357798576355, - "learning_rate": 8.17135904472794e-06, - "loss": 0.3831, - "step": 13573 - }, - { - "epoch": 0.8871315600287563, - "grad_norm": 0.44181132316589355, - "learning_rate": 8.171089074662827e-06, - "loss": 0.3809, - "step": 13574 - }, - { - "epoch": 0.8871969152342984, - "grad_norm": 0.4677775502204895, - "learning_rate": 8.170819089131217e-06, - "loss": 0.436, - "step": 13575 - }, - { - "epoch": 0.8872622704398405, - "grad_norm": 0.6737505197525024, - "learning_rate": 8.170549088134423e-06, - "loss": 0.3741, - "step": 13576 - }, - { - "epoch": 0.8873276256453827, - "grad_norm": 0.4607175588607788, - "learning_rate": 8.170279071673764e-06, - "loss": 0.3698, - "step": 13577 - }, - { - "epoch": 0.8873929808509248, - "grad_norm": 0.45459550619125366, - "learning_rate": 8.170009039750554e-06, - "loss": 0.3706, - "step": 13578 - }, - { - "epoch": 0.8874583360564668, - "grad_norm": 0.45948904752731323, - "learning_rate": 8.169738992366111e-06, - "loss": 0.4077, - "step": 13579 - }, - { - "epoch": 0.887523691262009, - "grad_norm": 0.44636574387550354, - "learning_rate": 8.169468929521755e-06, - "loss": 0.3731, - "step": 13580 - }, - { - "epoch": 0.8875890464675511, - "grad_norm": 0.4502796530723572, - "learning_rate": 8.169198851218799e-06, - "loss": 0.3737, - "step": 13581 - }, - { - "epoch": 0.8876544016730933, - "grad_norm": 0.4874180257320404, - "learning_rate": 8.168928757458565e-06, - "loss": 0.415, - "step": 13582 - }, - { - "epoch": 0.8877197568786354, - "grad_norm": 0.43732890486717224, - "learning_rate": 8.168658648242365e-06, - "loss": 0.366, - "step": 13583 - }, - { - "epoch": 0.8877851120841775, - "grad_norm": 0.43505555391311646, - "learning_rate": 8.16838852357152e-06, - "loss": 0.3621, - "step": 13584 - }, - { - "epoch": 0.8878504672897196, - "grad_norm": 0.4564209282398224, - "learning_rate": 8.168118383447346e-06, - "loss": 0.3748, - "step": 13585 - }, - { - "epoch": 0.8879158224952618, - "grad_norm": 0.4783664345741272, - "learning_rate": 8.167848227871163e-06, - "loss": 0.4131, - "step": 13586 - }, - { - "epoch": 0.8879811777008039, - "grad_norm": 0.448490172624588, - "learning_rate": 8.167578056844284e-06, - "loss": 0.3866, - "step": 13587 - }, - { - "epoch": 0.8880465329063459, - "grad_norm": 0.48500195145606995, - "learning_rate": 8.16730787036803e-06, - "loss": 0.4342, - "step": 13588 - }, - { - "epoch": 0.8881118881118881, - "grad_norm": 0.38912174105644226, - "learning_rate": 8.16703766844372e-06, - "loss": 0.3086, - "step": 13589 - }, - { - "epoch": 0.8881772433174302, - "grad_norm": 0.4245205223560333, - "learning_rate": 8.166767451072669e-06, - "loss": 0.3638, - "step": 13590 - }, - { - "epoch": 0.8882425985229724, - "grad_norm": 0.44031450152397156, - "learning_rate": 8.166497218256195e-06, - "loss": 0.3993, - "step": 13591 - }, - { - "epoch": 0.8883079537285145, - "grad_norm": 0.43893659114837646, - "learning_rate": 8.166226969995618e-06, - "loss": 0.3339, - "step": 13592 - }, - { - "epoch": 0.8883733089340566, - "grad_norm": 0.4216693341732025, - "learning_rate": 8.165956706292256e-06, - "loss": 0.3282, - "step": 13593 - }, - { - "epoch": 0.8884386641395987, - "grad_norm": 0.45118266344070435, - "learning_rate": 8.165686427147425e-06, - "loss": 0.3719, - "step": 13594 - }, - { - "epoch": 0.8885040193451409, - "grad_norm": 0.43111103773117065, - "learning_rate": 8.165416132562447e-06, - "loss": 0.3888, - "step": 13595 - }, - { - "epoch": 0.888569374550683, - "grad_norm": 0.4648542106151581, - "learning_rate": 8.165145822538635e-06, - "loss": 0.4031, - "step": 13596 - }, - { - "epoch": 0.888634729756225, - "grad_norm": 0.4463261365890503, - "learning_rate": 8.164875497077313e-06, - "loss": 0.3744, - "step": 13597 - }, - { - "epoch": 0.8887000849617672, - "grad_norm": 0.4441666007041931, - "learning_rate": 8.164605156179794e-06, - "loss": 0.3784, - "step": 13598 - }, - { - "epoch": 0.8887654401673093, - "grad_norm": 0.4290092885494232, - "learning_rate": 8.164334799847402e-06, - "loss": 0.3232, - "step": 13599 - }, - { - "epoch": 0.8888307953728515, - "grad_norm": 0.46337729692459106, - "learning_rate": 8.164064428081451e-06, - "loss": 0.3923, - "step": 13600 - }, - { - "epoch": 0.8888961505783936, - "grad_norm": 0.4588935971260071, - "learning_rate": 8.163794040883261e-06, - "loss": 0.346, - "step": 13601 - }, - { - "epoch": 0.8889615057839357, - "grad_norm": 0.42463478446006775, - "learning_rate": 8.163523638254154e-06, - "loss": 0.3771, - "step": 13602 - }, - { - "epoch": 0.8890268609894778, - "grad_norm": 0.42009827494621277, - "learning_rate": 8.163253220195446e-06, - "loss": 0.3443, - "step": 13603 - }, - { - "epoch": 0.8890922161950199, - "grad_norm": 0.48326075077056885, - "learning_rate": 8.162982786708455e-06, - "loss": 0.368, - "step": 13604 - }, - { - "epoch": 0.8891575714005621, - "grad_norm": 0.4304189682006836, - "learning_rate": 8.1627123377945e-06, - "loss": 0.3572, - "step": 13605 - }, - { - "epoch": 0.8892229266061041, - "grad_norm": 0.4433961510658264, - "learning_rate": 8.162441873454904e-06, - "loss": 0.3799, - "step": 13606 - }, - { - "epoch": 0.8892882818116463, - "grad_norm": 0.433493435382843, - "learning_rate": 8.162171393690982e-06, - "loss": 0.4022, - "step": 13607 - }, - { - "epoch": 0.8893536370171884, - "grad_norm": 0.411797434091568, - "learning_rate": 8.161900898504055e-06, - "loss": 0.343, - "step": 13608 - }, - { - "epoch": 0.8894189922227306, - "grad_norm": 0.4686589539051056, - "learning_rate": 8.16163038789544e-06, - "loss": 0.4436, - "step": 13609 - }, - { - "epoch": 0.8894843474282726, - "grad_norm": 0.40443435311317444, - "learning_rate": 8.16135986186646e-06, - "loss": 0.3243, - "step": 13610 - }, - { - "epoch": 0.8895497026338148, - "grad_norm": 0.44047921895980835, - "learning_rate": 8.161089320418434e-06, - "loss": 0.3616, - "step": 13611 - }, - { - "epoch": 0.8896150578393569, - "grad_norm": 0.43029654026031494, - "learning_rate": 8.160818763552677e-06, - "loss": 0.3781, - "step": 13612 - }, - { - "epoch": 0.889680413044899, - "grad_norm": 0.4242212176322937, - "learning_rate": 8.160548191270516e-06, - "loss": 0.3692, - "step": 13613 - }, - { - "epoch": 0.8897457682504412, - "grad_norm": 0.4410896301269531, - "learning_rate": 8.160277603573263e-06, - "loss": 0.4013, - "step": 13614 - }, - { - "epoch": 0.8898111234559832, - "grad_norm": 0.45493629574775696, - "learning_rate": 8.160007000462243e-06, - "loss": 0.4028, - "step": 13615 - }, - { - "epoch": 0.8898764786615254, - "grad_norm": 0.41715767979621887, - "learning_rate": 8.159736381938775e-06, - "loss": 0.324, - "step": 13616 - }, - { - "epoch": 0.8899418338670675, - "grad_norm": 0.47945287823677063, - "learning_rate": 8.159465748004177e-06, - "loss": 0.3804, - "step": 13617 - }, - { - "epoch": 0.8900071890726097, - "grad_norm": 0.46555617451667786, - "learning_rate": 8.15919509865977e-06, - "loss": 0.3864, - "step": 13618 - }, - { - "epoch": 0.8900725442781517, - "grad_norm": 0.4466501772403717, - "learning_rate": 8.158924433906872e-06, - "loss": 0.3498, - "step": 13619 - }, - { - "epoch": 0.8901378994836939, - "grad_norm": 0.4793967008590698, - "learning_rate": 8.158653753746808e-06, - "loss": 0.4582, - "step": 13620 - }, - { - "epoch": 0.890203254689236, - "grad_norm": 0.4156058728694916, - "learning_rate": 8.158383058180894e-06, - "loss": 0.3434, - "step": 13621 - }, - { - "epoch": 0.8902686098947781, - "grad_norm": 0.41390785574913025, - "learning_rate": 8.158112347210452e-06, - "loss": 0.3234, - "step": 13622 - }, - { - "epoch": 0.8903339651003203, - "grad_norm": 0.4056573808193207, - "learning_rate": 8.157841620836802e-06, - "loss": 0.3568, - "step": 13623 - }, - { - "epoch": 0.8903993203058623, - "grad_norm": 0.4190270006656647, - "learning_rate": 8.157570879061265e-06, - "loss": 0.3441, - "step": 13624 - }, - { - "epoch": 0.8904646755114045, - "grad_norm": 0.4631539583206177, - "learning_rate": 8.157300121885162e-06, - "loss": 0.4338, - "step": 13625 - }, - { - "epoch": 0.8905300307169466, - "grad_norm": 0.47524601221084595, - "learning_rate": 8.15702934930981e-06, - "loss": 0.3547, - "step": 13626 - }, - { - "epoch": 0.8905953859224888, - "grad_norm": 0.4354708194732666, - "learning_rate": 8.156758561336533e-06, - "loss": 0.3513, - "step": 13627 - }, - { - "epoch": 0.8906607411280308, - "grad_norm": 0.42499879002571106, - "learning_rate": 8.156487757966652e-06, - "loss": 0.3374, - "step": 13628 - }, - { - "epoch": 0.8907260963335729, - "grad_norm": 0.42320162057876587, - "learning_rate": 8.156216939201484e-06, - "loss": 0.3458, - "step": 13629 - }, - { - "epoch": 0.8907914515391151, - "grad_norm": 0.4369945526123047, - "learning_rate": 8.155946105042355e-06, - "loss": 0.3914, - "step": 13630 - }, - { - "epoch": 0.8908568067446572, - "grad_norm": 0.46945497393608093, - "learning_rate": 8.155675255490582e-06, - "loss": 0.4225, - "step": 13631 - }, - { - "epoch": 0.8909221619501994, - "grad_norm": 0.43981820344924927, - "learning_rate": 8.155404390547489e-06, - "loss": 0.3629, - "step": 13632 - }, - { - "epoch": 0.8909875171557414, - "grad_norm": 0.4373462200164795, - "learning_rate": 8.155133510214395e-06, - "loss": 0.3807, - "step": 13633 - }, - { - "epoch": 0.8910528723612836, - "grad_norm": 0.5137832164764404, - "learning_rate": 8.154862614492623e-06, - "loss": 0.396, - "step": 13634 - }, - { - "epoch": 0.8911182275668257, - "grad_norm": 0.40821897983551025, - "learning_rate": 8.154591703383492e-06, - "loss": 0.3357, - "step": 13635 - }, - { - "epoch": 0.8911835827723679, - "grad_norm": 0.9074820876121521, - "learning_rate": 8.154320776888323e-06, - "loss": 0.5001, - "step": 13636 - }, - { - "epoch": 0.8912489379779099, - "grad_norm": 0.45739322900772095, - "learning_rate": 8.15404983500844e-06, - "loss": 0.3331, - "step": 13637 - }, - { - "epoch": 0.891314293183452, - "grad_norm": 0.38675034046173096, - "learning_rate": 8.153778877745164e-06, - "loss": 0.2627, - "step": 13638 - }, - { - "epoch": 0.8913796483889942, - "grad_norm": 0.4908689558506012, - "learning_rate": 8.153507905099814e-06, - "loss": 0.4555, - "step": 13639 - }, - { - "epoch": 0.8914450035945363, - "grad_norm": 0.4473533630371094, - "learning_rate": 8.153236917073714e-06, - "loss": 0.3491, - "step": 13640 - }, - { - "epoch": 0.8915103588000785, - "grad_norm": 0.44573792815208435, - "learning_rate": 8.152965913668188e-06, - "loss": 0.396, - "step": 13641 - }, - { - "epoch": 0.8915757140056205, - "grad_norm": 0.43579259514808655, - "learning_rate": 8.152694894884552e-06, - "loss": 0.3697, - "step": 13642 - }, - { - "epoch": 0.8916410692111627, - "grad_norm": 0.4252687692642212, - "learning_rate": 8.152423860724132e-06, - "loss": 0.3572, - "step": 13643 - }, - { - "epoch": 0.8917064244167048, - "grad_norm": 0.42283689975738525, - "learning_rate": 8.152152811188248e-06, - "loss": 0.3982, - "step": 13644 - }, - { - "epoch": 0.891771779622247, - "grad_norm": 0.3983820974826813, - "learning_rate": 8.151881746278224e-06, - "loss": 0.3204, - "step": 13645 - }, - { - "epoch": 0.891837134827789, - "grad_norm": 0.4324599504470825, - "learning_rate": 8.15161066599538e-06, - "loss": 0.3933, - "step": 13646 - }, - { - "epoch": 0.8919024900333311, - "grad_norm": 0.4834281802177429, - "learning_rate": 8.151339570341039e-06, - "loss": 0.419, - "step": 13647 - }, - { - "epoch": 0.8919678452388733, - "grad_norm": 0.44146692752838135, - "learning_rate": 8.151068459316522e-06, - "loss": 0.3747, - "step": 13648 - }, - { - "epoch": 0.8920332004444154, - "grad_norm": 0.4567866027355194, - "learning_rate": 8.150797332923154e-06, - "loss": 0.4196, - "step": 13649 - }, - { - "epoch": 0.8920985556499575, - "grad_norm": 0.4537763297557831, - "learning_rate": 8.150526191162255e-06, - "loss": 0.3938, - "step": 13650 - }, - { - "epoch": 0.8921639108554996, - "grad_norm": 0.430987685918808, - "learning_rate": 8.15025503403515e-06, - "loss": 0.3707, - "step": 13651 - }, - { - "epoch": 0.8922292660610418, - "grad_norm": 0.43445083498954773, - "learning_rate": 8.149983861543159e-06, - "loss": 0.3291, - "step": 13652 - }, - { - "epoch": 0.8922946212665839, - "grad_norm": 0.45490071177482605, - "learning_rate": 8.149712673687604e-06, - "loss": 0.4088, - "step": 13653 - }, - { - "epoch": 0.8923599764721261, - "grad_norm": 0.43494483828544617, - "learning_rate": 8.14944147046981e-06, - "loss": 0.3474, - "step": 13654 - }, - { - "epoch": 0.8924253316776681, - "grad_norm": 0.4581224322319031, - "learning_rate": 8.1491702518911e-06, - "loss": 0.365, - "step": 13655 - }, - { - "epoch": 0.8924906868832102, - "grad_norm": 0.44352850317955017, - "learning_rate": 8.148899017952794e-06, - "loss": 0.3834, - "step": 13656 - }, - { - "epoch": 0.8925560420887524, - "grad_norm": 0.5057024359703064, - "learning_rate": 8.148627768656217e-06, - "loss": 0.4331, - "step": 13657 - }, - { - "epoch": 0.8926213972942945, - "grad_norm": 0.4180637300014496, - "learning_rate": 8.148356504002692e-06, - "loss": 0.3211, - "step": 13658 - }, - { - "epoch": 0.8926867524998366, - "grad_norm": 0.4715318977832794, - "learning_rate": 8.148085223993541e-06, - "loss": 0.3602, - "step": 13659 - }, - { - "epoch": 0.8927521077053787, - "grad_norm": 0.4708700478076935, - "learning_rate": 8.147813928630088e-06, - "loss": 0.4002, - "step": 13660 - }, - { - "epoch": 0.8928174629109209, - "grad_norm": 0.42736631631851196, - "learning_rate": 8.147542617913657e-06, - "loss": 0.3477, - "step": 13661 - }, - { - "epoch": 0.892882818116463, - "grad_norm": 0.4274823069572449, - "learning_rate": 8.14727129184557e-06, - "loss": 0.3696, - "step": 13662 - }, - { - "epoch": 0.892948173322005, - "grad_norm": 0.45414844155311584, - "learning_rate": 8.14699995042715e-06, - "loss": 0.4161, - "step": 13663 - }, - { - "epoch": 0.8930135285275472, - "grad_norm": 0.4576912522315979, - "learning_rate": 8.14672859365972e-06, - "loss": 0.3728, - "step": 13664 - }, - { - "epoch": 0.8930788837330893, - "grad_norm": 0.3824891746044159, - "learning_rate": 8.146457221544606e-06, - "loss": 0.3073, - "step": 13665 - }, - { - "epoch": 0.8931442389386315, - "grad_norm": 0.44985726475715637, - "learning_rate": 8.14618583408313e-06, - "loss": 0.4023, - "step": 13666 - }, - { - "epoch": 0.8932095941441736, - "grad_norm": 0.42375385761260986, - "learning_rate": 8.145914431276616e-06, - "loss": 0.3415, - "step": 13667 - }, - { - "epoch": 0.8932749493497157, - "grad_norm": 0.45005932450294495, - "learning_rate": 8.145643013126388e-06, - "loss": 0.3987, - "step": 13668 - }, - { - "epoch": 0.8933403045552578, - "grad_norm": 0.4389643967151642, - "learning_rate": 8.145371579633767e-06, - "loss": 0.3559, - "step": 13669 - }, - { - "epoch": 0.8934056597608, - "grad_norm": 0.4423743188381195, - "learning_rate": 8.14510013080008e-06, - "loss": 0.3757, - "step": 13670 - }, - { - "epoch": 0.8934710149663421, - "grad_norm": 0.4107572138309479, - "learning_rate": 8.144828666626651e-06, - "loss": 0.3334, - "step": 13671 - }, - { - "epoch": 0.8935363701718841, - "grad_norm": 0.4441332221031189, - "learning_rate": 8.144557187114803e-06, - "loss": 0.347, - "step": 13672 - }, - { - "epoch": 0.8936017253774263, - "grad_norm": 0.45187175273895264, - "learning_rate": 8.14428569226586e-06, - "loss": 0.3978, - "step": 13673 - }, - { - "epoch": 0.8936670805829684, - "grad_norm": 0.46893805265426636, - "learning_rate": 8.144014182081145e-06, - "loss": 0.4094, - "step": 13674 - }, - { - "epoch": 0.8937324357885106, - "grad_norm": 0.4110645651817322, - "learning_rate": 8.143742656561986e-06, - "loss": 0.3416, - "step": 13675 - }, - { - "epoch": 0.8937977909940527, - "grad_norm": 0.4819435477256775, - "learning_rate": 8.143471115709703e-06, - "loss": 0.4356, - "step": 13676 - }, - { - "epoch": 0.8938631461995948, - "grad_norm": 0.43986037373542786, - "learning_rate": 8.143199559525624e-06, - "loss": 0.3576, - "step": 13677 - }, - { - "epoch": 0.8939285014051369, - "grad_norm": 0.4579463005065918, - "learning_rate": 8.14292798801107e-06, - "loss": 0.418, - "step": 13678 - }, - { - "epoch": 0.8939938566106791, - "grad_norm": 0.4032513201236725, - "learning_rate": 8.142656401167367e-06, - "loss": 0.3178, - "step": 13679 - }, - { - "epoch": 0.8940592118162212, - "grad_norm": 0.46665820479393005, - "learning_rate": 8.142384798995843e-06, - "loss": 0.4178, - "step": 13680 - }, - { - "epoch": 0.8941245670217632, - "grad_norm": 0.44561415910720825, - "learning_rate": 8.142113181497819e-06, - "loss": 0.3954, - "step": 13681 - }, - { - "epoch": 0.8941899222273054, - "grad_norm": 0.4361459016799927, - "learning_rate": 8.141841548674619e-06, - "loss": 0.3518, - "step": 13682 - }, - { - "epoch": 0.8942552774328475, - "grad_norm": 0.46624186635017395, - "learning_rate": 8.141569900527571e-06, - "loss": 0.4021, - "step": 13683 - }, - { - "epoch": 0.8943206326383897, - "grad_norm": 0.42903366684913635, - "learning_rate": 8.141298237057995e-06, - "loss": 0.3499, - "step": 13684 - }, - { - "epoch": 0.8943859878439318, - "grad_norm": 0.4311058521270752, - "learning_rate": 8.141026558267222e-06, - "loss": 0.363, - "step": 13685 - }, - { - "epoch": 0.8944513430494739, - "grad_norm": 0.4667481780052185, - "learning_rate": 8.140754864156573e-06, - "loss": 0.4418, - "step": 13686 - }, - { - "epoch": 0.894516698255016, - "grad_norm": 0.4319532811641693, - "learning_rate": 8.140483154727376e-06, - "loss": 0.3388, - "step": 13687 - }, - { - "epoch": 0.8945820534605581, - "grad_norm": 0.4356854259967804, - "learning_rate": 8.140211429980955e-06, - "loss": 0.3668, - "step": 13688 - }, - { - "epoch": 0.8946474086661003, - "grad_norm": 0.4317588806152344, - "learning_rate": 8.139939689918634e-06, - "loss": 0.3533, - "step": 13689 - }, - { - "epoch": 0.8947127638716423, - "grad_norm": 0.4060245156288147, - "learning_rate": 8.139667934541738e-06, - "loss": 0.2844, - "step": 13690 - }, - { - "epoch": 0.8947781190771845, - "grad_norm": 0.4455457925796509, - "learning_rate": 8.139396163851596e-06, - "loss": 0.3959, - "step": 13691 - }, - { - "epoch": 0.8948434742827266, - "grad_norm": 0.464456170797348, - "learning_rate": 8.13912437784953e-06, - "loss": 0.433, - "step": 13692 - }, - { - "epoch": 0.8949088294882688, - "grad_norm": 0.43154770135879517, - "learning_rate": 8.138852576536868e-06, - "loss": 0.3251, - "step": 13693 - }, - { - "epoch": 0.8949741846938108, - "grad_norm": 0.43340033292770386, - "learning_rate": 8.138580759914933e-06, - "loss": 0.3567, - "step": 13694 - }, - { - "epoch": 0.895039539899353, - "grad_norm": 0.42331504821777344, - "learning_rate": 8.138308927985053e-06, - "loss": 0.3336, - "step": 13695 - }, - { - "epoch": 0.8951048951048951, - "grad_norm": 0.4122401177883148, - "learning_rate": 8.138037080748552e-06, - "loss": 0.3355, - "step": 13696 - }, - { - "epoch": 0.8951702503104372, - "grad_norm": 0.4513709247112274, - "learning_rate": 8.137765218206759e-06, - "loss": 0.393, - "step": 13697 - }, - { - "epoch": 0.8952356055159794, - "grad_norm": 0.42458638548851013, - "learning_rate": 8.137493340360998e-06, - "loss": 0.367, - "step": 13698 - }, - { - "epoch": 0.8953009607215214, - "grad_norm": 0.4798518717288971, - "learning_rate": 8.137221447212594e-06, - "loss": 0.4237, - "step": 13699 - }, - { - "epoch": 0.8953663159270636, - "grad_norm": 0.4395330250263214, - "learning_rate": 8.136949538762874e-06, - "loss": 0.3851, - "step": 13700 - }, - { - "epoch": 0.8954316711326057, - "grad_norm": 0.8126868605613708, - "learning_rate": 8.136677615013167e-06, - "loss": 0.4555, - "step": 13701 - }, - { - "epoch": 0.8954970263381479, - "grad_norm": 0.4594406485557556, - "learning_rate": 8.136405675964793e-06, - "loss": 0.3757, - "step": 13702 - }, - { - "epoch": 0.89556238154369, - "grad_norm": 0.4782710373401642, - "learning_rate": 8.136133721619084e-06, - "loss": 0.4029, - "step": 13703 - }, - { - "epoch": 0.8956277367492321, - "grad_norm": 0.4623786509037018, - "learning_rate": 8.135861751977363e-06, - "loss": 0.4077, - "step": 13704 - }, - { - "epoch": 0.8956930919547742, - "grad_norm": 0.4414733946323395, - "learning_rate": 8.135589767040959e-06, - "loss": 0.393, - "step": 13705 - }, - { - "epoch": 0.8957584471603163, - "grad_norm": 0.4439255893230438, - "learning_rate": 8.135317766811197e-06, - "loss": 0.3723, - "step": 13706 - }, - { - "epoch": 0.8958238023658585, - "grad_norm": 0.47301366925239563, - "learning_rate": 8.135045751289405e-06, - "loss": 0.4238, - "step": 13707 - }, - { - "epoch": 0.8958891575714005, - "grad_norm": 0.440405011177063, - "learning_rate": 8.13477372047691e-06, - "loss": 0.3672, - "step": 13708 - }, - { - "epoch": 0.8959545127769427, - "grad_norm": 0.47691601514816284, - "learning_rate": 8.134501674375035e-06, - "loss": 0.438, - "step": 13709 - }, - { - "epoch": 0.8960198679824848, - "grad_norm": 0.41470226645469666, - "learning_rate": 8.13422961298511e-06, - "loss": 0.3602, - "step": 13710 - }, - { - "epoch": 0.896085223188027, - "grad_norm": 0.43777281045913696, - "learning_rate": 8.133957536308461e-06, - "loss": 0.4027, - "step": 13711 - }, - { - "epoch": 0.896150578393569, - "grad_norm": 0.49178236722946167, - "learning_rate": 8.133685444346415e-06, - "loss": 0.402, - "step": 13712 - }, - { - "epoch": 0.8962159335991111, - "grad_norm": 0.41768455505371094, - "learning_rate": 8.133413337100302e-06, - "loss": 0.3535, - "step": 13713 - }, - { - "epoch": 0.8962812888046533, - "grad_norm": 0.47148391604423523, - "learning_rate": 8.133141214571444e-06, - "loss": 0.4033, - "step": 13714 - }, - { - "epoch": 0.8963466440101954, - "grad_norm": 0.45111405849456787, - "learning_rate": 8.132869076761171e-06, - "loss": 0.3994, - "step": 13715 - }, - { - "epoch": 0.8964119992157376, - "grad_norm": 0.41851431131362915, - "learning_rate": 8.132596923670811e-06, - "loss": 0.3361, - "step": 13716 - }, - { - "epoch": 0.8964773544212796, - "grad_norm": 0.451364129781723, - "learning_rate": 8.13232475530169e-06, - "loss": 0.3664, - "step": 13717 - }, - { - "epoch": 0.8965427096268218, - "grad_norm": 0.4423421621322632, - "learning_rate": 8.132052571655138e-06, - "loss": 0.4072, - "step": 13718 - }, - { - "epoch": 0.8966080648323639, - "grad_norm": 0.43970921635627747, - "learning_rate": 8.131780372732479e-06, - "loss": 0.3657, - "step": 13719 - }, - { - "epoch": 0.8966734200379061, - "grad_norm": 0.4629170000553131, - "learning_rate": 8.131508158535042e-06, - "loss": 0.4172, - "step": 13720 - }, - { - "epoch": 0.8967387752434481, - "grad_norm": 0.47206780314445496, - "learning_rate": 8.131235929064155e-06, - "loss": 0.4114, - "step": 13721 - }, - { - "epoch": 0.8968041304489902, - "grad_norm": 0.44379761815071106, - "learning_rate": 8.130963684321146e-06, - "loss": 0.3913, - "step": 13722 - }, - { - "epoch": 0.8968694856545324, - "grad_norm": 0.44583436846733093, - "learning_rate": 8.130691424307342e-06, - "loss": 0.4011, - "step": 13723 - }, - { - "epoch": 0.8969348408600745, - "grad_norm": 0.42743930220603943, - "learning_rate": 8.130419149024071e-06, - "loss": 0.3323, - "step": 13724 - }, - { - "epoch": 0.8970001960656167, - "grad_norm": 0.3742446303367615, - "learning_rate": 8.130146858472662e-06, - "loss": 0.2674, - "step": 13725 - }, - { - "epoch": 0.8970655512711587, - "grad_norm": 0.49502402544021606, - "learning_rate": 8.129874552654441e-06, - "loss": 0.4736, - "step": 13726 - }, - { - "epoch": 0.8971309064767009, - "grad_norm": 0.4888174533843994, - "learning_rate": 8.129602231570742e-06, - "loss": 0.4525, - "step": 13727 - }, - { - "epoch": 0.897196261682243, - "grad_norm": 0.4640890061855316, - "learning_rate": 8.129329895222884e-06, - "loss": 0.3483, - "step": 13728 - }, - { - "epoch": 0.8972616168877852, - "grad_norm": 0.45433375239372253, - "learning_rate": 8.129057543612202e-06, - "loss": 0.4101, - "step": 13729 - }, - { - "epoch": 0.8973269720933272, - "grad_norm": 0.4321691691875458, - "learning_rate": 8.128785176740021e-06, - "loss": 0.3739, - "step": 13730 - }, - { - "epoch": 0.8973923272988693, - "grad_norm": 0.4399643540382385, - "learning_rate": 8.128512794607672e-06, - "loss": 0.3908, - "step": 13731 - }, - { - "epoch": 0.8974576825044115, - "grad_norm": 0.44024062156677246, - "learning_rate": 8.128240397216482e-06, - "loss": 0.3608, - "step": 13732 - }, - { - "epoch": 0.8975230377099536, - "grad_norm": 0.4510750472545624, - "learning_rate": 8.12796798456778e-06, - "loss": 0.3969, - "step": 13733 - }, - { - "epoch": 0.8975883929154957, - "grad_norm": 0.42435726523399353, - "learning_rate": 8.127695556662895e-06, - "loss": 0.3484, - "step": 13734 - }, - { - "epoch": 0.8976537481210378, - "grad_norm": 0.4564155638217926, - "learning_rate": 8.127423113503154e-06, - "loss": 0.4008, - "step": 13735 - }, - { - "epoch": 0.89771910332658, - "grad_norm": 0.46148067712783813, - "learning_rate": 8.127150655089886e-06, - "loss": 0.3756, - "step": 13736 - }, - { - "epoch": 0.8977844585321221, - "grad_norm": 0.44304126501083374, - "learning_rate": 8.126878181424423e-06, - "loss": 0.3744, - "step": 13737 - }, - { - "epoch": 0.8978498137376643, - "grad_norm": 0.44300344586372375, - "learning_rate": 8.126605692508091e-06, - "loss": 0.338, - "step": 13738 - }, - { - "epoch": 0.8979151689432063, - "grad_norm": 0.46930742263793945, - "learning_rate": 8.126333188342222e-06, - "loss": 0.3926, - "step": 13739 - }, - { - "epoch": 0.8979805241487484, - "grad_norm": 0.4636451303958893, - "learning_rate": 8.126060668928141e-06, - "loss": 0.4222, - "step": 13740 - }, - { - "epoch": 0.8980458793542906, - "grad_norm": 0.4638378918170929, - "learning_rate": 8.125788134267178e-06, - "loss": 0.3767, - "step": 13741 - }, - { - "epoch": 0.8981112345598327, - "grad_norm": 0.4696529805660248, - "learning_rate": 8.125515584360666e-06, - "loss": 0.4268, - "step": 13742 - }, - { - "epoch": 0.8981765897653748, - "grad_norm": 0.4246528446674347, - "learning_rate": 8.12524301920993e-06, - "loss": 0.347, - "step": 13743 - }, - { - "epoch": 0.8982419449709169, - "grad_norm": 0.4272640645503998, - "learning_rate": 8.124970438816302e-06, - "loss": 0.3747, - "step": 13744 - }, - { - "epoch": 0.8983073001764591, - "grad_norm": 0.48389050364494324, - "learning_rate": 8.124697843181108e-06, - "loss": 0.3115, - "step": 13745 - }, - { - "epoch": 0.8983726553820012, - "grad_norm": 0.40088316798210144, - "learning_rate": 8.124425232305684e-06, - "loss": 0.3072, - "step": 13746 - }, - { - "epoch": 0.8984380105875432, - "grad_norm": 0.4198073148727417, - "learning_rate": 8.124152606191353e-06, - "loss": 0.35, - "step": 13747 - }, - { - "epoch": 0.8985033657930854, - "grad_norm": 0.40573132038116455, - "learning_rate": 8.12387996483945e-06, - "loss": 0.336, - "step": 13748 - }, - { - "epoch": 0.8985687209986275, - "grad_norm": 0.4379951059818268, - "learning_rate": 8.1236073082513e-06, - "loss": 0.3724, - "step": 13749 - }, - { - "epoch": 0.8986340762041697, - "grad_norm": 0.43794867396354675, - "learning_rate": 8.123334636428236e-06, - "loss": 0.3534, - "step": 13750 - }, - { - "epoch": 0.8986994314097118, - "grad_norm": 0.43171659111976624, - "learning_rate": 8.123061949371587e-06, - "loss": 0.3549, - "step": 13751 - }, - { - "epoch": 0.8987647866152539, - "grad_norm": 0.3995577394962311, - "learning_rate": 8.122789247082681e-06, - "loss": 0.3258, - "step": 13752 - }, - { - "epoch": 0.898830141820796, - "grad_norm": 0.45060256123542786, - "learning_rate": 8.122516529562852e-06, - "loss": 0.3708, - "step": 13753 - }, - { - "epoch": 0.8988954970263382, - "grad_norm": 0.45653200149536133, - "learning_rate": 8.122243796813427e-06, - "loss": 0.4065, - "step": 13754 - }, - { - "epoch": 0.8989608522318803, - "grad_norm": 0.4437415599822998, - "learning_rate": 8.121971048835739e-06, - "loss": 0.3822, - "step": 13755 - }, - { - "epoch": 0.8990262074374223, - "grad_norm": 0.45178404450416565, - "learning_rate": 8.121698285631114e-06, - "loss": 0.4043, - "step": 13756 - }, - { - "epoch": 0.8990915626429645, - "grad_norm": 0.506862998008728, - "learning_rate": 8.12142550720089e-06, - "loss": 0.4152, - "step": 13757 - }, - { - "epoch": 0.8991569178485066, - "grad_norm": 0.4507945477962494, - "learning_rate": 8.121152713546387e-06, - "loss": 0.3737, - "step": 13758 - }, - { - "epoch": 0.8992222730540488, - "grad_norm": 0.4415077865123749, - "learning_rate": 8.120879904668943e-06, - "loss": 0.3758, - "step": 13759 - }, - { - "epoch": 0.8992876282595909, - "grad_norm": 0.4535658061504364, - "learning_rate": 8.120607080569886e-06, - "loss": 0.4043, - "step": 13760 - }, - { - "epoch": 0.899352983465133, - "grad_norm": 0.44528043270111084, - "learning_rate": 8.120334241250549e-06, - "loss": 0.3303, - "step": 13761 - }, - { - "epoch": 0.8994183386706751, - "grad_norm": 0.43991243839263916, - "learning_rate": 8.120061386712259e-06, - "loss": 0.3726, - "step": 13762 - }, - { - "epoch": 0.8994836938762173, - "grad_norm": 0.4297688901424408, - "learning_rate": 8.119788516956348e-06, - "loss": 0.4037, - "step": 13763 - }, - { - "epoch": 0.8995490490817594, - "grad_norm": 0.43185365200042725, - "learning_rate": 8.11951563198415e-06, - "loss": 0.3607, - "step": 13764 - }, - { - "epoch": 0.8996144042873014, - "grad_norm": 0.44259557127952576, - "learning_rate": 8.119242731796993e-06, - "loss": 0.3392, - "step": 13765 - }, - { - "epoch": 0.8996797594928436, - "grad_norm": 0.41821321845054626, - "learning_rate": 8.118969816396208e-06, - "loss": 0.3211, - "step": 13766 - }, - { - "epoch": 0.8997451146983857, - "grad_norm": 0.40150561928749084, - "learning_rate": 8.118696885783125e-06, - "loss": 0.3271, - "step": 13767 - }, - { - "epoch": 0.8998104699039279, - "grad_norm": 0.46457138657569885, - "learning_rate": 8.11842393995908e-06, - "loss": 0.3877, - "step": 13768 - }, - { - "epoch": 0.89987582510947, - "grad_norm": 0.4529329836368561, - "learning_rate": 8.118150978925399e-06, - "loss": 0.3659, - "step": 13769 - }, - { - "epoch": 0.8999411803150121, - "grad_norm": 0.4387982189655304, - "learning_rate": 8.117878002683418e-06, - "loss": 0.3639, - "step": 13770 - }, - { - "epoch": 0.9000065355205542, - "grad_norm": 0.4521043300628662, - "learning_rate": 8.117605011234464e-06, - "loss": 0.3952, - "step": 13771 - }, - { - "epoch": 0.9000718907260963, - "grad_norm": 0.4835241734981537, - "learning_rate": 8.11733200457987e-06, - "loss": 0.4276, - "step": 13772 - }, - { - "epoch": 0.9001372459316385, - "grad_norm": 0.4648262560367584, - "learning_rate": 8.117058982720968e-06, - "loss": 0.3848, - "step": 13773 - }, - { - "epoch": 0.9002026011371805, - "grad_norm": 0.4252789616584778, - "learning_rate": 8.11678594565909e-06, - "loss": 0.3104, - "step": 13774 - }, - { - "epoch": 0.9002679563427227, - "grad_norm": 0.4380900263786316, - "learning_rate": 8.116512893395567e-06, - "loss": 0.3858, - "step": 13775 - }, - { - "epoch": 0.9003333115482648, - "grad_norm": 0.4245436191558838, - "learning_rate": 8.11623982593173e-06, - "loss": 0.3176, - "step": 13776 - }, - { - "epoch": 0.900398666753807, - "grad_norm": 0.4453778862953186, - "learning_rate": 8.115966743268914e-06, - "loss": 0.3885, - "step": 13777 - }, - { - "epoch": 0.900464021959349, - "grad_norm": 0.4539322257041931, - "learning_rate": 8.115693645408447e-06, - "loss": 0.3565, - "step": 13778 - }, - { - "epoch": 0.9005293771648912, - "grad_norm": 0.42480286955833435, - "learning_rate": 8.115420532351662e-06, - "loss": 0.3322, - "step": 13779 - }, - { - "epoch": 0.9005947323704333, - "grad_norm": 0.4224318861961365, - "learning_rate": 8.115147404099895e-06, - "loss": 0.3568, - "step": 13780 - }, - { - "epoch": 0.9006600875759754, - "grad_norm": 0.425488144159317, - "learning_rate": 8.114874260654472e-06, - "loss": 0.3614, - "step": 13781 - }, - { - "epoch": 0.9007254427815176, - "grad_norm": 0.4492007791996002, - "learning_rate": 8.114601102016727e-06, - "loss": 0.3752, - "step": 13782 - }, - { - "epoch": 0.9007907979870596, - "grad_norm": 0.4592430889606476, - "learning_rate": 8.114327928187997e-06, - "loss": 0.4078, - "step": 13783 - }, - { - "epoch": 0.9008561531926018, - "grad_norm": 0.4468863904476166, - "learning_rate": 8.11405473916961e-06, - "loss": 0.3765, - "step": 13784 - }, - { - "epoch": 0.9009215083981439, - "grad_norm": 0.4448256492614746, - "learning_rate": 8.113781534962897e-06, - "loss": 0.401, - "step": 13785 - }, - { - "epoch": 0.9009868636036861, - "grad_norm": 0.44565296173095703, - "learning_rate": 8.113508315569195e-06, - "loss": 0.3931, - "step": 13786 - }, - { - "epoch": 0.9010522188092281, - "grad_norm": 0.46520334482192993, - "learning_rate": 8.113235080989834e-06, - "loss": 0.4184, - "step": 13787 - }, - { - "epoch": 0.9011175740147703, - "grad_norm": 0.42197686433792114, - "learning_rate": 8.112961831226145e-06, - "loss": 0.3337, - "step": 13788 - }, - { - "epoch": 0.9011829292203124, - "grad_norm": 0.4567323327064514, - "learning_rate": 8.112688566279465e-06, - "loss": 0.4058, - "step": 13789 - }, - { - "epoch": 0.9012482844258545, - "grad_norm": 0.43261972069740295, - "learning_rate": 8.112415286151123e-06, - "loss": 0.3538, - "step": 13790 - }, - { - "epoch": 0.9013136396313967, - "grad_norm": 0.4408476650714874, - "learning_rate": 8.112141990842455e-06, - "loss": 0.329, - "step": 13791 - }, - { - "epoch": 0.9013789948369387, - "grad_norm": 0.4256812036037445, - "learning_rate": 8.111868680354792e-06, - "loss": 0.3502, - "step": 13792 - }, - { - "epoch": 0.9014443500424809, - "grad_norm": 0.43711304664611816, - "learning_rate": 8.111595354689466e-06, - "loss": 0.3636, - "step": 13793 - }, - { - "epoch": 0.901509705248023, - "grad_norm": 0.4469901919364929, - "learning_rate": 8.111322013847813e-06, - "loss": 0.3766, - "step": 13794 - }, - { - "epoch": 0.9015750604535652, - "grad_norm": 0.4421667158603668, - "learning_rate": 8.111048657831164e-06, - "loss": 0.3681, - "step": 13795 - }, - { - "epoch": 0.9016404156591072, - "grad_norm": 0.460467666387558, - "learning_rate": 8.110775286640852e-06, - "loss": 0.4402, - "step": 13796 - }, - { - "epoch": 0.9017057708646493, - "grad_norm": 0.44781821966171265, - "learning_rate": 8.110501900278213e-06, - "loss": 0.3603, - "step": 13797 - }, - { - "epoch": 0.9017711260701915, - "grad_norm": 0.429470032453537, - "learning_rate": 8.110228498744577e-06, - "loss": 0.3647, - "step": 13798 - }, - { - "epoch": 0.9018364812757336, - "grad_norm": 0.45834243297576904, - "learning_rate": 8.10995508204128e-06, - "loss": 0.4131, - "step": 13799 - }, - { - "epoch": 0.9019018364812758, - "grad_norm": 0.43037450313568115, - "learning_rate": 8.109681650169655e-06, - "loss": 0.3253, - "step": 13800 - }, - { - "epoch": 0.9019671916868178, - "grad_norm": 0.44325825572013855, - "learning_rate": 8.109408203131034e-06, - "loss": 0.381, - "step": 13801 - }, - { - "epoch": 0.90203254689236, - "grad_norm": 0.47546830773353577, - "learning_rate": 8.109134740926754e-06, - "loss": 0.4066, - "step": 13802 - }, - { - "epoch": 0.9020979020979021, - "grad_norm": 0.42107558250427246, - "learning_rate": 8.108861263558145e-06, - "loss": 0.3827, - "step": 13803 - }, - { - "epoch": 0.9021632573034443, - "grad_norm": 0.4274398386478424, - "learning_rate": 8.108587771026543e-06, - "loss": 0.3857, - "step": 13804 - }, - { - "epoch": 0.9022286125089863, - "grad_norm": 0.4399937689304352, - "learning_rate": 8.108314263333283e-06, - "loss": 0.34, - "step": 13805 - }, - { - "epoch": 0.9022939677145284, - "grad_norm": 0.3941992223262787, - "learning_rate": 8.108040740479696e-06, - "loss": 0.311, - "step": 13806 - }, - { - "epoch": 0.9023593229200706, - "grad_norm": 0.45216676592826843, - "learning_rate": 8.107767202467119e-06, - "loss": 0.3997, - "step": 13807 - }, - { - "epoch": 0.9024246781256127, - "grad_norm": 0.5037730932235718, - "learning_rate": 8.107493649296883e-06, - "loss": 0.3851, - "step": 13808 - }, - { - "epoch": 0.9024900333311549, - "grad_norm": 0.46031659841537476, - "learning_rate": 8.107220080970325e-06, - "loss": 0.3871, - "step": 13809 - }, - { - "epoch": 0.9025553885366969, - "grad_norm": 0.4860627353191376, - "learning_rate": 8.106946497488777e-06, - "loss": 0.4026, - "step": 13810 - }, - { - "epoch": 0.9026207437422391, - "grad_norm": 0.49016475677490234, - "learning_rate": 8.106672898853576e-06, - "loss": 0.385, - "step": 13811 - }, - { - "epoch": 0.9026860989477812, - "grad_norm": 0.4665054976940155, - "learning_rate": 8.106399285066053e-06, - "loss": 0.4158, - "step": 13812 - }, - { - "epoch": 0.9027514541533234, - "grad_norm": 0.4561840891838074, - "learning_rate": 8.106125656127547e-06, - "loss": 0.3633, - "step": 13813 - }, - { - "epoch": 0.9028168093588654, - "grad_norm": 0.4671241343021393, - "learning_rate": 8.10585201203939e-06, - "loss": 0.3925, - "step": 13814 - }, - { - "epoch": 0.9028821645644075, - "grad_norm": 0.42687904834747314, - "learning_rate": 8.105578352802915e-06, - "loss": 0.3553, - "step": 13815 - }, - { - "epoch": 0.9029475197699497, - "grad_norm": 0.4388181269168854, - "learning_rate": 8.10530467841946e-06, - "loss": 0.3148, - "step": 13816 - }, - { - "epoch": 0.9030128749754918, - "grad_norm": 0.467172771692276, - "learning_rate": 8.105030988890357e-06, - "loss": 0.4019, - "step": 13817 - }, - { - "epoch": 0.903078230181034, - "grad_norm": 0.4428863227367401, - "learning_rate": 8.104757284216942e-06, - "loss": 0.3665, - "step": 13818 - }, - { - "epoch": 0.903143585386576, - "grad_norm": 0.4159409999847412, - "learning_rate": 8.104483564400552e-06, - "loss": 0.3494, - "step": 13819 - }, - { - "epoch": 0.9032089405921182, - "grad_norm": 0.42701825499534607, - "learning_rate": 8.104209829442518e-06, - "loss": 0.3642, - "step": 13820 - }, - { - "epoch": 0.9032742957976603, - "grad_norm": 0.43664073944091797, - "learning_rate": 8.103936079344179e-06, - "loss": 0.3884, - "step": 13821 - }, - { - "epoch": 0.9033396510032025, - "grad_norm": 0.4308601915836334, - "learning_rate": 8.103662314106869e-06, - "loss": 0.3211, - "step": 13822 - }, - { - "epoch": 0.9034050062087445, - "grad_norm": 0.50096195936203, - "learning_rate": 8.10338853373192e-06, - "loss": 0.4162, - "step": 13823 - }, - { - "epoch": 0.9034703614142866, - "grad_norm": 0.42874860763549805, - "learning_rate": 8.103114738220673e-06, - "loss": 0.3999, - "step": 13824 - }, - { - "epoch": 0.9035357166198288, - "grad_norm": 0.42832908034324646, - "learning_rate": 8.102840927574458e-06, - "loss": 0.3604, - "step": 13825 - }, - { - "epoch": 0.9036010718253709, - "grad_norm": 0.44836941361427307, - "learning_rate": 8.102567101794613e-06, - "loss": 0.3981, - "step": 13826 - }, - { - "epoch": 0.903666427030913, - "grad_norm": 0.4566391706466675, - "learning_rate": 8.102293260882475e-06, - "loss": 0.4006, - "step": 13827 - }, - { - "epoch": 0.9037317822364551, - "grad_norm": 0.4433964788913727, - "learning_rate": 8.102019404839377e-06, - "loss": 0.3463, - "step": 13828 - }, - { - "epoch": 0.9037971374419973, - "grad_norm": 0.45907121896743774, - "learning_rate": 8.101745533666655e-06, - "loss": 0.3543, - "step": 13829 - }, - { - "epoch": 0.9038624926475394, - "grad_norm": 0.44854235649108887, - "learning_rate": 8.101471647365646e-06, - "loss": 0.3626, - "step": 13830 - }, - { - "epoch": 0.9039278478530814, - "grad_norm": 0.4422043263912201, - "learning_rate": 8.101197745937686e-06, - "loss": 0.3769, - "step": 13831 - }, - { - "epoch": 0.9039932030586236, - "grad_norm": 0.4449627101421356, - "learning_rate": 8.10092382938411e-06, - "loss": 0.4009, - "step": 13832 - }, - { - "epoch": 0.9040585582641657, - "grad_norm": 0.46181216835975647, - "learning_rate": 8.100649897706254e-06, - "loss": 0.3798, - "step": 13833 - }, - { - "epoch": 0.9041239134697079, - "grad_norm": 0.4497895836830139, - "learning_rate": 8.100375950905454e-06, - "loss": 0.369, - "step": 13834 - }, - { - "epoch": 0.90418926867525, - "grad_norm": 0.43777915835380554, - "learning_rate": 8.100101988983048e-06, - "loss": 0.3561, - "step": 13835 - }, - { - "epoch": 0.9042546238807921, - "grad_norm": 0.4683510959148407, - "learning_rate": 8.09982801194037e-06, - "loss": 0.4394, - "step": 13836 - }, - { - "epoch": 0.9043199790863342, - "grad_norm": 0.45694056153297424, - "learning_rate": 8.099554019778755e-06, - "loss": 0.3922, - "step": 13837 - }, - { - "epoch": 0.9043853342918764, - "grad_norm": 0.4695807695388794, - "learning_rate": 8.099280012499542e-06, - "loss": 0.3461, - "step": 13838 - }, - { - "epoch": 0.9044506894974185, - "grad_norm": 0.42546918988227844, - "learning_rate": 8.099005990104068e-06, - "loss": 0.3417, - "step": 13839 - }, - { - "epoch": 0.9045160447029605, - "grad_norm": 0.4493861198425293, - "learning_rate": 8.098731952593668e-06, - "loss": 0.371, - "step": 13840 - }, - { - "epoch": 0.9045813999085027, - "grad_norm": 0.45559778809547424, - "learning_rate": 8.098457899969679e-06, - "loss": 0.3691, - "step": 13841 - }, - { - "epoch": 0.9046467551140448, - "grad_norm": 0.4804634749889374, - "learning_rate": 8.098183832233437e-06, - "loss": 0.3904, - "step": 13842 - }, - { - "epoch": 0.904712110319587, - "grad_norm": 0.4272224009037018, - "learning_rate": 8.097909749386276e-06, - "loss": 0.3362, - "step": 13843 - }, - { - "epoch": 0.904777465525129, - "grad_norm": 0.4513688087463379, - "learning_rate": 8.09763565142954e-06, - "loss": 0.3895, - "step": 13844 - }, - { - "epoch": 0.9048428207306712, - "grad_norm": 0.4479421079158783, - "learning_rate": 8.097361538364561e-06, - "loss": 0.368, - "step": 13845 - }, - { - "epoch": 0.9049081759362133, - "grad_norm": 0.42748603224754333, - "learning_rate": 8.097087410192676e-06, - "loss": 0.3533, - "step": 13846 - }, - { - "epoch": 0.9049735311417555, - "grad_norm": 0.4828256070613861, - "learning_rate": 8.096813266915222e-06, - "loss": 0.4548, - "step": 13847 - }, - { - "epoch": 0.9050388863472976, - "grad_norm": 0.47530660033226013, - "learning_rate": 8.09653910853354e-06, - "loss": 0.4373, - "step": 13848 - }, - { - "epoch": 0.9051042415528396, - "grad_norm": 0.44951948523521423, - "learning_rate": 8.096264935048961e-06, - "loss": 0.3812, - "step": 13849 - }, - { - "epoch": 0.9051695967583818, - "grad_norm": 0.4373512268066406, - "learning_rate": 8.095990746462826e-06, - "loss": 0.3731, - "step": 13850 - }, - { - "epoch": 0.9052349519639239, - "grad_norm": 0.42946070432662964, - "learning_rate": 8.095716542776471e-06, - "loss": 0.4, - "step": 13851 - }, - { - "epoch": 0.9053003071694661, - "grad_norm": 0.46854913234710693, - "learning_rate": 8.095442323991236e-06, - "loss": 0.3603, - "step": 13852 - }, - { - "epoch": 0.9053656623750082, - "grad_norm": 0.48517006635665894, - "learning_rate": 8.095168090108453e-06, - "loss": 0.4112, - "step": 13853 - }, - { - "epoch": 0.9054310175805503, - "grad_norm": 0.4519181251525879, - "learning_rate": 8.094893841129468e-06, - "loss": 0.3424, - "step": 13854 - }, - { - "epoch": 0.9054963727860924, - "grad_norm": 0.45790523290634155, - "learning_rate": 8.094619577055609e-06, - "loss": 0.3832, - "step": 13855 - }, - { - "epoch": 0.9055617279916345, - "grad_norm": 0.44878947734832764, - "learning_rate": 8.09434529788822e-06, - "loss": 0.3947, - "step": 13856 - }, - { - "epoch": 0.9056270831971767, - "grad_norm": 0.4312663972377777, - "learning_rate": 8.094071003628637e-06, - "loss": 0.362, - "step": 13857 - }, - { - "epoch": 0.9056924384027187, - "grad_norm": 0.44874027371406555, - "learning_rate": 8.093796694278198e-06, - "loss": 0.3727, - "step": 13858 - }, - { - "epoch": 0.9057577936082609, - "grad_norm": 0.41829913854599, - "learning_rate": 8.09352236983824e-06, - "loss": 0.3432, - "step": 13859 - }, - { - "epoch": 0.905823148813803, - "grad_norm": 0.45856165885925293, - "learning_rate": 8.093248030310102e-06, - "loss": 0.4302, - "step": 13860 - }, - { - "epoch": 0.9058885040193452, - "grad_norm": 0.5171898007392883, - "learning_rate": 8.092973675695122e-06, - "loss": 0.3942, - "step": 13861 - }, - { - "epoch": 0.9059538592248872, - "grad_norm": 0.4549436867237091, - "learning_rate": 8.092699305994639e-06, - "loss": 0.3762, - "step": 13862 - }, - { - "epoch": 0.9060192144304294, - "grad_norm": 0.4204540252685547, - "learning_rate": 8.092424921209989e-06, - "loss": 0.3248, - "step": 13863 - }, - { - "epoch": 0.9060845696359715, - "grad_norm": 0.41366302967071533, - "learning_rate": 8.09215052134251e-06, - "loss": 0.3608, - "step": 13864 - }, - { - "epoch": 0.9061499248415136, - "grad_norm": 0.3849446177482605, - "learning_rate": 8.091876106393544e-06, - "loss": 0.2911, - "step": 13865 - }, - { - "epoch": 0.9062152800470558, - "grad_norm": 0.4732769727706909, - "learning_rate": 8.091601676364424e-06, - "loss": 0.3774, - "step": 13866 - }, - { - "epoch": 0.9062806352525978, - "grad_norm": 0.5221768617630005, - "learning_rate": 8.091327231256495e-06, - "loss": 0.5066, - "step": 13867 - }, - { - "epoch": 0.90634599045814, - "grad_norm": 0.4407196640968323, - "learning_rate": 8.09105277107109e-06, - "loss": 0.3817, - "step": 13868 - }, - { - "epoch": 0.9064113456636821, - "grad_norm": 0.4109002649784088, - "learning_rate": 8.090778295809552e-06, - "loss": 0.3026, - "step": 13869 - }, - { - "epoch": 0.9064767008692243, - "grad_norm": 0.43740326166152954, - "learning_rate": 8.090503805473216e-06, - "loss": 0.3809, - "step": 13870 - }, - { - "epoch": 0.9065420560747663, - "grad_norm": 0.45587050914764404, - "learning_rate": 8.09022930006342e-06, - "loss": 0.4207, - "step": 13871 - }, - { - "epoch": 0.9066074112803085, - "grad_norm": 0.4603224992752075, - "learning_rate": 8.089954779581508e-06, - "loss": 0.3795, - "step": 13872 - }, - { - "epoch": 0.9066727664858506, - "grad_norm": 0.44550371170043945, - "learning_rate": 8.089680244028817e-06, - "loss": 0.3529, - "step": 13873 - }, - { - "epoch": 0.9067381216913927, - "grad_norm": 0.41442403197288513, - "learning_rate": 8.089405693406683e-06, - "loss": 0.3249, - "step": 13874 - }, - { - "epoch": 0.9068034768969349, - "grad_norm": 0.4551732540130615, - "learning_rate": 8.08913112771645e-06, - "loss": 0.3902, - "step": 13875 - }, - { - "epoch": 0.9068688321024769, - "grad_norm": 0.4341244399547577, - "learning_rate": 8.08885654695945e-06, - "loss": 0.3342, - "step": 13876 - }, - { - "epoch": 0.9069341873080191, - "grad_norm": 0.4735921025276184, - "learning_rate": 8.088581951137029e-06, - "loss": 0.4375, - "step": 13877 - }, - { - "epoch": 0.9069995425135612, - "grad_norm": 0.4558418393135071, - "learning_rate": 8.088307340250524e-06, - "loss": 0.3953, - "step": 13878 - }, - { - "epoch": 0.9070648977191034, - "grad_norm": 0.39925768971443176, - "learning_rate": 8.088032714301272e-06, - "loss": 0.3412, - "step": 13879 - }, - { - "epoch": 0.9071302529246454, - "grad_norm": 0.4197119176387787, - "learning_rate": 8.087758073290618e-06, - "loss": 0.3374, - "step": 13880 - }, - { - "epoch": 0.9071956081301875, - "grad_norm": 0.43608343601226807, - "learning_rate": 8.087483417219897e-06, - "loss": 0.3535, - "step": 13881 - }, - { - "epoch": 0.9072609633357297, - "grad_norm": 0.46259912848472595, - "learning_rate": 8.087208746090448e-06, - "loss": 0.3699, - "step": 13882 - }, - { - "epoch": 0.9073263185412718, - "grad_norm": 0.4515265226364136, - "learning_rate": 8.086934059903613e-06, - "loss": 0.402, - "step": 13883 - }, - { - "epoch": 0.907391673746814, - "grad_norm": 0.43163689970970154, - "learning_rate": 8.086659358660734e-06, - "loss": 0.3499, - "step": 13884 - }, - { - "epoch": 0.907457028952356, - "grad_norm": 0.4312930107116699, - "learning_rate": 8.086384642363144e-06, - "loss": 0.3928, - "step": 13885 - }, - { - "epoch": 0.9075223841578982, - "grad_norm": 0.4166937470436096, - "learning_rate": 8.08610991101219e-06, - "loss": 0.326, - "step": 13886 - }, - { - "epoch": 0.9075877393634403, - "grad_norm": 0.47274652123451233, - "learning_rate": 8.085835164609205e-06, - "loss": 0.3898, - "step": 13887 - }, - { - "epoch": 0.9076530945689825, - "grad_norm": 0.4820028245449066, - "learning_rate": 8.085560403155536e-06, - "loss": 0.404, - "step": 13888 - }, - { - "epoch": 0.9077184497745245, - "grad_norm": 0.40644168853759766, - "learning_rate": 8.08528562665252e-06, - "loss": 0.3365, - "step": 13889 - }, - { - "epoch": 0.9077838049800666, - "grad_norm": 0.4479687809944153, - "learning_rate": 8.085010835101496e-06, - "loss": 0.41, - "step": 13890 - }, - { - "epoch": 0.9078491601856088, - "grad_norm": 0.44872233271598816, - "learning_rate": 8.084736028503808e-06, - "loss": 0.3813, - "step": 13891 - }, - { - "epoch": 0.9079145153911509, - "grad_norm": 0.4451698064804077, - "learning_rate": 8.08446120686079e-06, - "loss": 0.3877, - "step": 13892 - }, - { - "epoch": 0.907979870596693, - "grad_norm": 0.4394383728504181, - "learning_rate": 8.084186370173787e-06, - "loss": 0.3586, - "step": 13893 - }, - { - "epoch": 0.9080452258022351, - "grad_norm": 0.43240517377853394, - "learning_rate": 8.083911518444141e-06, - "loss": 0.3569, - "step": 13894 - }, - { - "epoch": 0.9081105810077773, - "grad_norm": 0.4298829436302185, - "learning_rate": 8.083636651673187e-06, - "loss": 0.3438, - "step": 13895 - }, - { - "epoch": 0.9081759362133194, - "grad_norm": 0.44177982211112976, - "learning_rate": 8.083361769862272e-06, - "loss": 0.3519, - "step": 13896 - }, - { - "epoch": 0.9082412914188616, - "grad_norm": 0.45620620250701904, - "learning_rate": 8.083086873012732e-06, - "loss": 0.383, - "step": 13897 - }, - { - "epoch": 0.9083066466244036, - "grad_norm": 0.42998021841049194, - "learning_rate": 8.082811961125908e-06, - "loss": 0.3387, - "step": 13898 - }, - { - "epoch": 0.9083720018299457, - "grad_norm": 0.4094111919403076, - "learning_rate": 8.082537034203145e-06, - "loss": 0.3298, - "step": 13899 - }, - { - "epoch": 0.9084373570354879, - "grad_norm": 0.46914973855018616, - "learning_rate": 8.082262092245779e-06, - "loss": 0.4069, - "step": 13900 - }, - { - "epoch": 0.90850271224103, - "grad_norm": 0.44797879457473755, - "learning_rate": 8.081987135255152e-06, - "loss": 0.3753, - "step": 13901 - }, - { - "epoch": 0.9085680674465721, - "grad_norm": 0.436219722032547, - "learning_rate": 8.081712163232607e-06, - "loss": 0.385, - "step": 13902 - }, - { - "epoch": 0.9086334226521142, - "grad_norm": 0.4074752628803253, - "learning_rate": 8.081437176179485e-06, - "loss": 0.3354, - "step": 13903 - }, - { - "epoch": 0.9086987778576564, - "grad_norm": 0.43233710527420044, - "learning_rate": 8.081162174097125e-06, - "loss": 0.3769, - "step": 13904 - }, - { - "epoch": 0.9087641330631985, - "grad_norm": 0.4211293160915375, - "learning_rate": 8.080887156986873e-06, - "loss": 0.3617, - "step": 13905 - }, - { - "epoch": 0.9088294882687407, - "grad_norm": 0.46026962995529175, - "learning_rate": 8.080612124850062e-06, - "loss": 0.4012, - "step": 13906 - }, - { - "epoch": 0.9088948434742827, - "grad_norm": 0.4387584328651428, - "learning_rate": 8.080337077688042e-06, - "loss": 0.3735, - "step": 13907 - }, - { - "epoch": 0.9089601986798248, - "grad_norm": 0.43514472246170044, - "learning_rate": 8.08006201550215e-06, - "loss": 0.3609, - "step": 13908 - }, - { - "epoch": 0.909025553885367, - "grad_norm": 0.4435819685459137, - "learning_rate": 8.079786938293727e-06, - "loss": 0.3997, - "step": 13909 - }, - { - "epoch": 0.9090909090909091, - "grad_norm": 0.4352990388870239, - "learning_rate": 8.079511846064119e-06, - "loss": 0.3341, - "step": 13910 - }, - { - "epoch": 0.9091562642964512, - "grad_norm": 0.47828343510627747, - "learning_rate": 8.079236738814662e-06, - "loss": 0.3977, - "step": 13911 - }, - { - "epoch": 0.9092216195019933, - "grad_norm": 0.45470914244651794, - "learning_rate": 8.078961616546702e-06, - "loss": 0.3826, - "step": 13912 - }, - { - "epoch": 0.9092869747075355, - "grad_norm": 0.4326404929161072, - "learning_rate": 8.07868647926158e-06, - "loss": 0.3691, - "step": 13913 - }, - { - "epoch": 0.9093523299130776, - "grad_norm": 0.4797097146511078, - "learning_rate": 8.078411326960637e-06, - "loss": 0.3779, - "step": 13914 - }, - { - "epoch": 0.9094176851186196, - "grad_norm": 0.47024965286254883, - "learning_rate": 8.078136159645216e-06, - "loss": 0.3828, - "step": 13915 - }, - { - "epoch": 0.9094830403241618, - "grad_norm": 0.4575149416923523, - "learning_rate": 8.077860977316657e-06, - "loss": 0.3733, - "step": 13916 - }, - { - "epoch": 0.9095483955297039, - "grad_norm": 0.4415610432624817, - "learning_rate": 8.077585779976306e-06, - "loss": 0.3381, - "step": 13917 - }, - { - "epoch": 0.9096137507352461, - "grad_norm": 0.43210041522979736, - "learning_rate": 8.077310567625503e-06, - "loss": 0.3489, - "step": 13918 - }, - { - "epoch": 0.9096791059407882, - "grad_norm": 0.4586709439754486, - "learning_rate": 8.077035340265588e-06, - "loss": 0.394, - "step": 13919 - }, - { - "epoch": 0.9097444611463303, - "grad_norm": 0.44284117221832275, - "learning_rate": 8.076760097897907e-06, - "loss": 0.359, - "step": 13920 - }, - { - "epoch": 0.9098098163518724, - "grad_norm": 0.49802714586257935, - "learning_rate": 8.0764848405238e-06, - "loss": 0.4215, - "step": 13921 - }, - { - "epoch": 0.9098751715574146, - "grad_norm": 0.4488286077976227, - "learning_rate": 8.076209568144612e-06, - "loss": 0.4021, - "step": 13922 - }, - { - "epoch": 0.9099405267629567, - "grad_norm": 0.48121508955955505, - "learning_rate": 8.075934280761684e-06, - "loss": 0.3896, - "step": 13923 - }, - { - "epoch": 0.9100058819684987, - "grad_norm": 0.45127177238464355, - "learning_rate": 8.075658978376358e-06, - "loss": 0.3658, - "step": 13924 - }, - { - "epoch": 0.9100712371740409, - "grad_norm": 0.4304015338420868, - "learning_rate": 8.075383660989978e-06, - "loss": 0.3454, - "step": 13925 - }, - { - "epoch": 0.910136592379583, - "grad_norm": 0.44155532121658325, - "learning_rate": 8.075108328603886e-06, - "loss": 0.3974, - "step": 13926 - }, - { - "epoch": 0.9102019475851252, - "grad_norm": 0.45621612668037415, - "learning_rate": 8.074832981219428e-06, - "loss": 0.3569, - "step": 13927 - }, - { - "epoch": 0.9102673027906673, - "grad_norm": 0.40090957283973694, - "learning_rate": 8.07455761883794e-06, - "loss": 0.323, - "step": 13928 - }, - { - "epoch": 0.9103326579962094, - "grad_norm": 0.4417174160480499, - "learning_rate": 8.074282241460774e-06, - "loss": 0.3923, - "step": 13929 - }, - { - "epoch": 0.9103980132017515, - "grad_norm": 0.4724140465259552, - "learning_rate": 8.074006849089266e-06, - "loss": 0.3918, - "step": 13930 - }, - { - "epoch": 0.9104633684072937, - "grad_norm": 0.40536820888519287, - "learning_rate": 8.073731441724762e-06, - "loss": 0.3269, - "step": 13931 - }, - { - "epoch": 0.9105287236128358, - "grad_norm": 0.455199658870697, - "learning_rate": 8.073456019368604e-06, - "loss": 0.394, - "step": 13932 - }, - { - "epoch": 0.9105940788183778, - "grad_norm": 0.4606650769710541, - "learning_rate": 8.073180582022138e-06, - "loss": 0.4054, - "step": 13933 - }, - { - "epoch": 0.91065943402392, - "grad_norm": 0.4483288824558258, - "learning_rate": 8.072905129686705e-06, - "loss": 0.3586, - "step": 13934 - }, - { - "epoch": 0.9107247892294621, - "grad_norm": 0.45802754163742065, - "learning_rate": 8.072629662363648e-06, - "loss": 0.373, - "step": 13935 - }, - { - "epoch": 0.9107901444350043, - "grad_norm": 0.4426094591617584, - "learning_rate": 8.072354180054312e-06, - "loss": 0.3642, - "step": 13936 - }, - { - "epoch": 0.9108554996405464, - "grad_norm": 0.46466970443725586, - "learning_rate": 8.072078682760042e-06, - "loss": 0.4344, - "step": 13937 - }, - { - "epoch": 0.9109208548460885, - "grad_norm": 0.46857601404190063, - "learning_rate": 8.07180317048218e-06, - "loss": 0.4125, - "step": 13938 - }, - { - "epoch": 0.9109862100516306, - "grad_norm": 0.5448115468025208, - "learning_rate": 8.071527643222068e-06, - "loss": 0.4113, - "step": 13939 - }, - { - "epoch": 0.9110515652571727, - "grad_norm": 0.43733006715774536, - "learning_rate": 8.071252100981053e-06, - "loss": 0.3353, - "step": 13940 - }, - { - "epoch": 0.9111169204627149, - "grad_norm": 0.4095253348350525, - "learning_rate": 8.070976543760475e-06, - "loss": 0.3314, - "step": 13941 - }, - { - "epoch": 0.9111822756682569, - "grad_norm": 0.4773953855037689, - "learning_rate": 8.070700971561682e-06, - "loss": 0.4402, - "step": 13942 - }, - { - "epoch": 0.9112476308737991, - "grad_norm": 0.4827759861946106, - "learning_rate": 8.070425384386018e-06, - "loss": 0.455, - "step": 13943 - }, - { - "epoch": 0.9113129860793412, - "grad_norm": 0.44401463866233826, - "learning_rate": 8.070149782234823e-06, - "loss": 0.3515, - "step": 13944 - }, - { - "epoch": 0.9113783412848834, - "grad_norm": 0.45008859038352966, - "learning_rate": 8.069874165109447e-06, - "loss": 0.3595, - "step": 13945 - }, - { - "epoch": 0.9114436964904254, - "grad_norm": 0.44510313868522644, - "learning_rate": 8.06959853301123e-06, - "loss": 0.3815, - "step": 13946 - }, - { - "epoch": 0.9115090516959676, - "grad_norm": 0.4350227117538452, - "learning_rate": 8.069322885941517e-06, - "loss": 0.3498, - "step": 13947 - }, - { - "epoch": 0.9115744069015097, - "grad_norm": 0.46352311968803406, - "learning_rate": 8.069047223901652e-06, - "loss": 0.3966, - "step": 13948 - }, - { - "epoch": 0.9116397621070518, - "grad_norm": 0.44037729501724243, - "learning_rate": 8.068771546892982e-06, - "loss": 0.3924, - "step": 13949 - }, - { - "epoch": 0.911705117312594, - "grad_norm": 0.4215856194496155, - "learning_rate": 8.068495854916849e-06, - "loss": 0.3472, - "step": 13950 - }, - { - "epoch": 0.911770472518136, - "grad_norm": 0.44475454092025757, - "learning_rate": 8.068220147974599e-06, - "loss": 0.3879, - "step": 13951 - }, - { - "epoch": 0.9118358277236782, - "grad_norm": 0.4228137731552124, - "learning_rate": 8.067944426067577e-06, - "loss": 0.3527, - "step": 13952 - }, - { - "epoch": 0.9119011829292203, - "grad_norm": 0.44084176421165466, - "learning_rate": 8.067668689197128e-06, - "loss": 0.4205, - "step": 13953 - }, - { - "epoch": 0.9119665381347625, - "grad_norm": 0.42345526814460754, - "learning_rate": 8.067392937364594e-06, - "loss": 0.3312, - "step": 13954 - }, - { - "epoch": 0.9120318933403045, - "grad_norm": 0.4450491964817047, - "learning_rate": 8.067117170571323e-06, - "loss": 0.364, - "step": 13955 - }, - { - "epoch": 0.9120972485458467, - "grad_norm": 0.41773533821105957, - "learning_rate": 8.06684138881866e-06, - "loss": 0.3477, - "step": 13956 - }, - { - "epoch": 0.9121626037513888, - "grad_norm": 0.4562980830669403, - "learning_rate": 8.066565592107947e-06, - "loss": 0.3764, - "step": 13957 - }, - { - "epoch": 0.9122279589569309, - "grad_norm": 0.4346955716609955, - "learning_rate": 8.066289780440532e-06, - "loss": 0.4004, - "step": 13958 - }, - { - "epoch": 0.9122933141624731, - "grad_norm": 0.41704708337783813, - "learning_rate": 8.066013953817762e-06, - "loss": 0.3318, - "step": 13959 - }, - { - "epoch": 0.9123586693680151, - "grad_norm": 0.4251265227794647, - "learning_rate": 8.065738112240977e-06, - "loss": 0.3644, - "step": 13960 - }, - { - "epoch": 0.9124240245735573, - "grad_norm": 0.41447633504867554, - "learning_rate": 8.065462255711526e-06, - "loss": 0.3574, - "step": 13961 - }, - { - "epoch": 0.9124893797790994, - "grad_norm": 0.43733349442481995, - "learning_rate": 8.065186384230752e-06, - "loss": 0.3472, - "step": 13962 - }, - { - "epoch": 0.9125547349846416, - "grad_norm": 0.48272380232810974, - "learning_rate": 8.064910497800005e-06, - "loss": 0.4234, - "step": 13963 - }, - { - "epoch": 0.9126200901901836, - "grad_norm": 0.465209037065506, - "learning_rate": 8.064634596420627e-06, - "loss": 0.4303, - "step": 13964 - }, - { - "epoch": 0.9126854453957257, - "grad_norm": 0.42566102743148804, - "learning_rate": 8.064358680093962e-06, - "loss": 0.3427, - "step": 13965 - }, - { - "epoch": 0.9127508006012679, - "grad_norm": 0.44575703144073486, - "learning_rate": 8.06408274882136e-06, - "loss": 0.3666, - "step": 13966 - }, - { - "epoch": 0.91281615580681, - "grad_norm": 0.46295708417892456, - "learning_rate": 8.063806802604164e-06, - "loss": 0.3987, - "step": 13967 - }, - { - "epoch": 0.9128815110123522, - "grad_norm": 0.48901307582855225, - "learning_rate": 8.063530841443721e-06, - "loss": 0.3808, - "step": 13968 - }, - { - "epoch": 0.9129468662178942, - "grad_norm": 0.47041264176368713, - "learning_rate": 8.063254865341378e-06, - "loss": 0.4315, - "step": 13969 - }, - { - "epoch": 0.9130122214234364, - "grad_norm": 0.4150421917438507, - "learning_rate": 8.062978874298479e-06, - "loss": 0.3498, - "step": 13970 - }, - { - "epoch": 0.9130775766289785, - "grad_norm": 0.45428165793418884, - "learning_rate": 8.06270286831637e-06, - "loss": 0.3606, - "step": 13971 - }, - { - "epoch": 0.9131429318345207, - "grad_norm": 0.435101717710495, - "learning_rate": 8.062426847396401e-06, - "loss": 0.3515, - "step": 13972 - }, - { - "epoch": 0.9132082870400627, - "grad_norm": 0.46090081334114075, - "learning_rate": 8.062150811539912e-06, - "loss": 0.3965, - "step": 13973 - }, - { - "epoch": 0.9132736422456048, - "grad_norm": 0.492020845413208, - "learning_rate": 8.061874760748254e-06, - "loss": 0.3953, - "step": 13974 - }, - { - "epoch": 0.913338997451147, - "grad_norm": 0.4630420207977295, - "learning_rate": 8.061598695022772e-06, - "loss": 0.3355, - "step": 13975 - }, - { - "epoch": 0.9134043526566891, - "grad_norm": 0.42773622274398804, - "learning_rate": 8.061322614364813e-06, - "loss": 0.336, - "step": 13976 - }, - { - "epoch": 0.9134697078622313, - "grad_norm": 0.45446163415908813, - "learning_rate": 8.061046518775722e-06, - "loss": 0.3332, - "step": 13977 - }, - { - "epoch": 0.9135350630677733, - "grad_norm": 0.4189533591270447, - "learning_rate": 8.060770408256849e-06, - "loss": 0.3306, - "step": 13978 - }, - { - "epoch": 0.9136004182733155, - "grad_norm": 0.4757717549800873, - "learning_rate": 8.060494282809534e-06, - "loss": 0.4246, - "step": 13979 - }, - { - "epoch": 0.9136657734788576, - "grad_norm": 0.4250771999359131, - "learning_rate": 8.060218142435133e-06, - "loss": 0.3579, - "step": 13980 - }, - { - "epoch": 0.9137311286843998, - "grad_norm": 0.44313472509384155, - "learning_rate": 8.059941987134985e-06, - "loss": 0.346, - "step": 13981 - }, - { - "epoch": 0.9137964838899418, - "grad_norm": 0.4288141429424286, - "learning_rate": 8.05966581691044e-06, - "loss": 0.3292, - "step": 13982 - }, - { - "epoch": 0.9138618390954839, - "grad_norm": 0.4938511848449707, - "learning_rate": 8.059389631762847e-06, - "loss": 0.4498, - "step": 13983 - }, - { - "epoch": 0.9139271943010261, - "grad_norm": 0.45982861518859863, - "learning_rate": 8.05911343169355e-06, - "loss": 0.3969, - "step": 13984 - }, - { - "epoch": 0.9139925495065682, - "grad_norm": 0.44021573662757874, - "learning_rate": 8.058837216703897e-06, - "loss": 0.3446, - "step": 13985 - }, - { - "epoch": 0.9140579047121103, - "grad_norm": 0.44781169295310974, - "learning_rate": 8.058560986795233e-06, - "loss": 0.3935, - "step": 13986 - }, - { - "epoch": 0.9141232599176524, - "grad_norm": 0.43591830134391785, - "learning_rate": 8.05828474196891e-06, - "loss": 0.344, - "step": 13987 - }, - { - "epoch": 0.9141886151231946, - "grad_norm": 0.47213393449783325, - "learning_rate": 8.058008482226272e-06, - "loss": 0.3925, - "step": 13988 - }, - { - "epoch": 0.9142539703287367, - "grad_norm": 0.43465113639831543, - "learning_rate": 8.057732207568666e-06, - "loss": 0.3828, - "step": 13989 - }, - { - "epoch": 0.9143193255342789, - "grad_norm": 0.4505982995033264, - "learning_rate": 8.057455917997443e-06, - "loss": 0.4209, - "step": 13990 - }, - { - "epoch": 0.9143846807398209, - "grad_norm": 0.43934178352355957, - "learning_rate": 8.057179613513945e-06, - "loss": 0.3914, - "step": 13991 - }, - { - "epoch": 0.914450035945363, - "grad_norm": 0.4296603500843048, - "learning_rate": 8.056903294119527e-06, - "loss": 0.3319, - "step": 13992 - }, - { - "epoch": 0.9145153911509052, - "grad_norm": 0.43655118346214294, - "learning_rate": 8.05662695981553e-06, - "loss": 0.3668, - "step": 13993 - }, - { - "epoch": 0.9145807463564473, - "grad_norm": 0.42281386256217957, - "learning_rate": 8.056350610603305e-06, - "loss": 0.3508, - "step": 13994 - }, - { - "epoch": 0.9146461015619894, - "grad_norm": 0.46946394443511963, - "learning_rate": 8.0560742464842e-06, - "loss": 0.39, - "step": 13995 - }, - { - "epoch": 0.9147114567675315, - "grad_norm": 0.44197699427604675, - "learning_rate": 8.05579786745956e-06, - "loss": 0.3943, - "step": 13996 - }, - { - "epoch": 0.9147768119730737, - "grad_norm": 0.43652769923210144, - "learning_rate": 8.055521473530737e-06, - "loss": 0.3576, - "step": 13997 - }, - { - "epoch": 0.9148421671786158, - "grad_norm": 0.43092644214630127, - "learning_rate": 8.055245064699077e-06, - "loss": 0.3719, - "step": 13998 - }, - { - "epoch": 0.9149075223841578, - "grad_norm": 0.4001566469669342, - "learning_rate": 8.054968640965929e-06, - "loss": 0.2946, - "step": 13999 - }, - { - "epoch": 0.9149728775897, - "grad_norm": 0.4617891311645508, - "learning_rate": 8.05469220233264e-06, - "loss": 0.423, - "step": 14000 - }, - { - "epoch": 0.9150382327952421, - "grad_norm": 0.44813108444213867, - "learning_rate": 8.054415748800559e-06, - "loss": 0.3614, - "step": 14001 - }, - { - "epoch": 0.9151035880007843, - "grad_norm": 0.47789061069488525, - "learning_rate": 8.054139280371034e-06, - "loss": 0.4192, - "step": 14002 - }, - { - "epoch": 0.9151689432063264, - "grad_norm": 0.42289867997169495, - "learning_rate": 8.053862797045413e-06, - "loss": 0.3332, - "step": 14003 - }, - { - "epoch": 0.9152342984118685, - "grad_norm": 0.4325043261051178, - "learning_rate": 8.053586298825047e-06, - "loss": 0.3664, - "step": 14004 - }, - { - "epoch": 0.9152996536174106, - "grad_norm": 0.41553187370300293, - "learning_rate": 8.053309785711281e-06, - "loss": 0.3513, - "step": 14005 - }, - { - "epoch": 0.9153650088229528, - "grad_norm": 0.4572798013687134, - "learning_rate": 8.053033257705467e-06, - "loss": 0.3855, - "step": 14006 - }, - { - "epoch": 0.9154303640284949, - "grad_norm": 0.4481726288795471, - "learning_rate": 8.052756714808951e-06, - "loss": 0.4085, - "step": 14007 - }, - { - "epoch": 0.9154957192340369, - "grad_norm": 0.3968733847141266, - "learning_rate": 8.052480157023083e-06, - "loss": 0.2995, - "step": 14008 - }, - { - "epoch": 0.9155610744395791, - "grad_norm": 0.45097097754478455, - "learning_rate": 8.052203584349211e-06, - "loss": 0.3681, - "step": 14009 - }, - { - "epoch": 0.9156264296451212, - "grad_norm": 0.46446356177330017, - "learning_rate": 8.051926996788685e-06, - "loss": 0.4181, - "step": 14010 - }, - { - "epoch": 0.9156917848506634, - "grad_norm": 0.44969937205314636, - "learning_rate": 8.051650394342856e-06, - "loss": 0.3817, - "step": 14011 - }, - { - "epoch": 0.9157571400562055, - "grad_norm": 0.44434255361557007, - "learning_rate": 8.05137377701307e-06, - "loss": 0.4061, - "step": 14012 - }, - { - "epoch": 0.9158224952617476, - "grad_norm": 0.3985587954521179, - "learning_rate": 8.051097144800675e-06, - "loss": 0.3089, - "step": 14013 - }, - { - "epoch": 0.9158878504672897, - "grad_norm": 0.4738868772983551, - "learning_rate": 8.050820497707023e-06, - "loss": 0.4415, - "step": 14014 - }, - { - "epoch": 0.9159532056728319, - "grad_norm": 0.4262668490409851, - "learning_rate": 8.050543835733463e-06, - "loss": 0.3278, - "step": 14015 - }, - { - "epoch": 0.916018560878374, - "grad_norm": 0.4603838622570038, - "learning_rate": 8.050267158881344e-06, - "loss": 0.3748, - "step": 14016 - }, - { - "epoch": 0.916083916083916, - "grad_norm": 0.4666097164154053, - "learning_rate": 8.049990467152016e-06, - "loss": 0.4056, - "step": 14017 - }, - { - "epoch": 0.9161492712894582, - "grad_norm": 0.46902790665626526, - "learning_rate": 8.049713760546827e-06, - "loss": 0.4064, - "step": 14018 - }, - { - "epoch": 0.9162146264950003, - "grad_norm": 0.4662460684776306, - "learning_rate": 8.049437039067127e-06, - "loss": 0.4338, - "step": 14019 - }, - { - "epoch": 0.9162799817005425, - "grad_norm": 0.4359288811683655, - "learning_rate": 8.049160302714267e-06, - "loss": 0.3762, - "step": 14020 - }, - { - "epoch": 0.9163453369060846, - "grad_norm": 0.4116736650466919, - "learning_rate": 8.048883551489595e-06, - "loss": 0.3213, - "step": 14021 - }, - { - "epoch": 0.9164106921116267, - "grad_norm": 0.4233904480934143, - "learning_rate": 8.048606785394464e-06, - "loss": 0.349, - "step": 14022 - }, - { - "epoch": 0.9164760473171688, - "grad_norm": 0.4174635112285614, - "learning_rate": 8.048330004430219e-06, - "loss": 0.3442, - "step": 14023 - }, - { - "epoch": 0.9165414025227109, - "grad_norm": 0.49676185846328735, - "learning_rate": 8.048053208598213e-06, - "loss": 0.4151, - "step": 14024 - }, - { - "epoch": 0.9166067577282531, - "grad_norm": 0.43644121289253235, - "learning_rate": 8.047776397899796e-06, - "loss": 0.3533, - "step": 14025 - }, - { - "epoch": 0.9166721129337951, - "grad_norm": 0.4449264705181122, - "learning_rate": 8.047499572336316e-06, - "loss": 0.3655, - "step": 14026 - }, - { - "epoch": 0.9167374681393373, - "grad_norm": 0.43979716300964355, - "learning_rate": 8.047222731909128e-06, - "loss": 0.3836, - "step": 14027 - }, - { - "epoch": 0.9168028233448794, - "grad_norm": 0.44416573643684387, - "learning_rate": 8.046945876619577e-06, - "loss": 0.3648, - "step": 14028 - }, - { - "epoch": 0.9168681785504216, - "grad_norm": 0.4326207637786865, - "learning_rate": 8.046669006469017e-06, - "loss": 0.3672, - "step": 14029 - }, - { - "epoch": 0.9169335337559636, - "grad_norm": 0.4313278794288635, - "learning_rate": 8.046392121458795e-06, - "loss": 0.3228, - "step": 14030 - }, - { - "epoch": 0.9169988889615058, - "grad_norm": 0.46649694442749023, - "learning_rate": 8.046115221590263e-06, - "loss": 0.3869, - "step": 14031 - }, - { - "epoch": 0.9170642441670479, - "grad_norm": 0.46140167117118835, - "learning_rate": 8.045838306864772e-06, - "loss": 0.3821, - "step": 14032 - }, - { - "epoch": 0.91712959937259, - "grad_norm": 0.4016384184360504, - "learning_rate": 8.045561377283675e-06, - "loss": 0.3536, - "step": 14033 - }, - { - "epoch": 0.9171949545781322, - "grad_norm": 0.41999879479408264, - "learning_rate": 8.045284432848317e-06, - "loss": 0.3638, - "step": 14034 - }, - { - "epoch": 0.9172603097836742, - "grad_norm": 0.41920387744903564, - "learning_rate": 8.045007473560053e-06, - "loss": 0.3452, - "step": 14035 - }, - { - "epoch": 0.9173256649892164, - "grad_norm": 0.4589771330356598, - "learning_rate": 8.044730499420233e-06, - "loss": 0.3609, - "step": 14036 - }, - { - "epoch": 0.9173910201947585, - "grad_norm": 0.46324291825294495, - "learning_rate": 8.044453510430208e-06, - "loss": 0.4555, - "step": 14037 - }, - { - "epoch": 0.9174563754003007, - "grad_norm": 0.46082064509391785, - "learning_rate": 8.044176506591328e-06, - "loss": 0.3829, - "step": 14038 - }, - { - "epoch": 0.9175217306058427, - "grad_norm": 0.4257607161998749, - "learning_rate": 8.043899487904943e-06, - "loss": 0.3424, - "step": 14039 - }, - { - "epoch": 0.9175870858113849, - "grad_norm": 0.4278295040130615, - "learning_rate": 8.043622454372407e-06, - "loss": 0.3565, - "step": 14040 - }, - { - "epoch": 0.917652441016927, - "grad_norm": 0.4225910007953644, - "learning_rate": 8.04334540599507e-06, - "loss": 0.3504, - "step": 14041 - }, - { - "epoch": 0.9177177962224691, - "grad_norm": 0.4419727623462677, - "learning_rate": 8.043068342774283e-06, - "loss": 0.3587, - "step": 14042 - }, - { - "epoch": 0.9177831514280113, - "grad_norm": 0.4980928897857666, - "learning_rate": 8.042791264711398e-06, - "loss": 0.4225, - "step": 14043 - }, - { - "epoch": 0.9178485066335533, - "grad_norm": 0.44496211409568787, - "learning_rate": 8.042514171807767e-06, - "loss": 0.4079, - "step": 14044 - }, - { - "epoch": 0.9179138618390955, - "grad_norm": 0.4453846514225006, - "learning_rate": 8.042237064064737e-06, - "loss": 0.3677, - "step": 14045 - }, - { - "epoch": 0.9179792170446376, - "grad_norm": 0.4587365388870239, - "learning_rate": 8.041959941483666e-06, - "loss": 0.401, - "step": 14046 - }, - { - "epoch": 0.9180445722501798, - "grad_norm": 0.432699590921402, - "learning_rate": 8.0416828040659e-06, - "loss": 0.3428, - "step": 14047 - }, - { - "epoch": 0.9181099274557218, - "grad_norm": 0.47861722111701965, - "learning_rate": 8.041405651812794e-06, - "loss": 0.4425, - "step": 14048 - }, - { - "epoch": 0.918175282661264, - "grad_norm": 0.457981675863266, - "learning_rate": 8.0411284847257e-06, - "loss": 0.3687, - "step": 14049 - }, - { - "epoch": 0.9182406378668061, - "grad_norm": 0.46564221382141113, - "learning_rate": 8.040851302805968e-06, - "loss": 0.3863, - "step": 14050 - }, - { - "epoch": 0.9183059930723482, - "grad_norm": 0.4485551714897156, - "learning_rate": 8.040574106054952e-06, - "loss": 0.3785, - "step": 14051 - }, - { - "epoch": 0.9183713482778904, - "grad_norm": 0.4502100944519043, - "learning_rate": 8.040296894474e-06, - "loss": 0.3991, - "step": 14052 - }, - { - "epoch": 0.9184367034834324, - "grad_norm": 0.46024709939956665, - "learning_rate": 8.04001966806447e-06, - "loss": 0.376, - "step": 14053 - }, - { - "epoch": 0.9185020586889746, - "grad_norm": 0.42094698548316956, - "learning_rate": 8.039742426827709e-06, - "loss": 0.3352, - "step": 14054 - }, - { - "epoch": 0.9185674138945167, - "grad_norm": 0.4074893593788147, - "learning_rate": 8.03946517076507e-06, - "loss": 0.3268, - "step": 14055 - }, - { - "epoch": 0.9186327691000589, - "grad_norm": 0.4475942552089691, - "learning_rate": 8.03918789987791e-06, - "loss": 0.4081, - "step": 14056 - }, - { - "epoch": 0.9186981243056009, - "grad_norm": 0.44974157214164734, - "learning_rate": 8.038910614167574e-06, - "loss": 0.405, - "step": 14057 - }, - { - "epoch": 0.918763479511143, - "grad_norm": 0.4386823773384094, - "learning_rate": 8.03863331363542e-06, - "loss": 0.3863, - "step": 14058 - }, - { - "epoch": 0.9188288347166852, - "grad_norm": 0.45584502816200256, - "learning_rate": 8.038355998282799e-06, - "loss": 0.3887, - "step": 14059 - }, - { - "epoch": 0.9188941899222273, - "grad_norm": 0.42862972617149353, - "learning_rate": 8.038078668111062e-06, - "loss": 0.3615, - "step": 14060 - }, - { - "epoch": 0.9189595451277695, - "grad_norm": 0.4222893714904785, - "learning_rate": 8.037801323121564e-06, - "loss": 0.3486, - "step": 14061 - }, - { - "epoch": 0.9190249003333115, - "grad_norm": 0.4321688711643219, - "learning_rate": 8.037523963315655e-06, - "loss": 0.3694, - "step": 14062 - }, - { - "epoch": 0.9190902555388537, - "grad_norm": 0.4637817442417145, - "learning_rate": 8.037246588694692e-06, - "loss": 0.3965, - "step": 14063 - }, - { - "epoch": 0.9191556107443958, - "grad_norm": 0.4694924056529999, - "learning_rate": 8.036969199260023e-06, - "loss": 0.4058, - "step": 14064 - }, - { - "epoch": 0.919220965949938, - "grad_norm": 0.4824604392051697, - "learning_rate": 8.036691795013004e-06, - "loss": 0.446, - "step": 14065 - }, - { - "epoch": 0.91928632115548, - "grad_norm": 0.5190874934196472, - "learning_rate": 8.036414375954986e-06, - "loss": 0.3854, - "step": 14066 - }, - { - "epoch": 0.9193516763610221, - "grad_norm": 0.42385995388031006, - "learning_rate": 8.036136942087324e-06, - "loss": 0.3283, - "step": 14067 - }, - { - "epoch": 0.9194170315665643, - "grad_norm": 0.43375396728515625, - "learning_rate": 8.03585949341137e-06, - "loss": 0.3572, - "step": 14068 - }, - { - "epoch": 0.9194823867721064, - "grad_norm": 0.43796199560165405, - "learning_rate": 8.03558202992848e-06, - "loss": 0.3424, - "step": 14069 - }, - { - "epoch": 0.9195477419776485, - "grad_norm": 0.4456939101219177, - "learning_rate": 8.035304551640002e-06, - "loss": 0.3824, - "step": 14070 - }, - { - "epoch": 0.9196130971831906, - "grad_norm": 0.45292073488235474, - "learning_rate": 8.035027058547292e-06, - "loss": 0.3733, - "step": 14071 - }, - { - "epoch": 0.9196784523887328, - "grad_norm": 0.48807671666145325, - "learning_rate": 8.034749550651704e-06, - "loss": 0.4123, - "step": 14072 - }, - { - "epoch": 0.9197438075942749, - "grad_norm": 0.47011175751686096, - "learning_rate": 8.034472027954592e-06, - "loss": 0.3735, - "step": 14073 - }, - { - "epoch": 0.9198091627998171, - "grad_norm": 0.412279337644577, - "learning_rate": 8.034194490457308e-06, - "loss": 0.3731, - "step": 14074 - }, - { - "epoch": 0.9198745180053591, - "grad_norm": 0.45856234431266785, - "learning_rate": 8.033916938161205e-06, - "loss": 0.371, - "step": 14075 - }, - { - "epoch": 0.9199398732109012, - "grad_norm": 0.4508492648601532, - "learning_rate": 8.03363937106764e-06, - "loss": 0.3777, - "step": 14076 - }, - { - "epoch": 0.9200052284164434, - "grad_norm": 0.4206467866897583, - "learning_rate": 8.033361789177964e-06, - "loss": 0.366, - "step": 14077 - }, - { - "epoch": 0.9200705836219855, - "grad_norm": 0.5154498815536499, - "learning_rate": 8.033084192493534e-06, - "loss": 0.4033, - "step": 14078 - }, - { - "epoch": 0.9201359388275276, - "grad_norm": 0.4669542908668518, - "learning_rate": 8.0328065810157e-06, - "loss": 0.4259, - "step": 14079 - }, - { - "epoch": 0.9202012940330697, - "grad_norm": 0.45770663022994995, - "learning_rate": 8.032528954745817e-06, - "loss": 0.3668, - "step": 14080 - }, - { - "epoch": 0.9202666492386119, - "grad_norm": 0.418260782957077, - "learning_rate": 8.03225131368524e-06, - "loss": 0.3461, - "step": 14081 - }, - { - "epoch": 0.920332004444154, - "grad_norm": 0.4325152337551117, - "learning_rate": 8.031973657835321e-06, - "loss": 0.3519, - "step": 14082 - }, - { - "epoch": 0.920397359649696, - "grad_norm": 0.4321635663509369, - "learning_rate": 8.03169598719742e-06, - "loss": 0.3512, - "step": 14083 - }, - { - "epoch": 0.9204627148552382, - "grad_norm": 0.48397862911224365, - "learning_rate": 8.031418301772884e-06, - "loss": 0.4605, - "step": 14084 - }, - { - "epoch": 0.9205280700607803, - "grad_norm": 0.43660688400268555, - "learning_rate": 8.031140601563073e-06, - "loss": 0.3559, - "step": 14085 - }, - { - "epoch": 0.9205934252663225, - "grad_norm": 0.4134843945503235, - "learning_rate": 8.030862886569339e-06, - "loss": 0.3341, - "step": 14086 - }, - { - "epoch": 0.9206587804718646, - "grad_norm": 0.40737178921699524, - "learning_rate": 8.030585156793035e-06, - "loss": 0.3015, - "step": 14087 - }, - { - "epoch": 0.9207241356774067, - "grad_norm": 0.42058470845222473, - "learning_rate": 8.030307412235519e-06, - "loss": 0.3553, - "step": 14088 - }, - { - "epoch": 0.9207894908829488, - "grad_norm": 0.402561753988266, - "learning_rate": 8.030029652898144e-06, - "loss": 0.3393, - "step": 14089 - }, - { - "epoch": 0.920854846088491, - "grad_norm": 0.4387030601501465, - "learning_rate": 8.029751878782264e-06, - "loss": 0.3579, - "step": 14090 - }, - { - "epoch": 0.9209202012940331, - "grad_norm": 0.4189580976963043, - "learning_rate": 8.029474089889232e-06, - "loss": 0.346, - "step": 14091 - }, - { - "epoch": 0.9209855564995751, - "grad_norm": 0.40532082319259644, - "learning_rate": 8.029196286220409e-06, - "loss": 0.3278, - "step": 14092 - }, - { - "epoch": 0.9210509117051173, - "grad_norm": 0.4416372776031494, - "learning_rate": 8.028918467777145e-06, - "loss": 0.3642, - "step": 14093 - }, - { - "epoch": 0.9211162669106594, - "grad_norm": 0.4237021803855896, - "learning_rate": 8.028640634560796e-06, - "loss": 0.3623, - "step": 14094 - }, - { - "epoch": 0.9211816221162016, - "grad_norm": 0.4447159171104431, - "learning_rate": 8.028362786572718e-06, - "loss": 0.3813, - "step": 14095 - }, - { - "epoch": 0.9212469773217437, - "grad_norm": 0.4404435157775879, - "learning_rate": 8.028084923814266e-06, - "loss": 0.3373, - "step": 14096 - }, - { - "epoch": 0.9213123325272858, - "grad_norm": 0.4356805086135864, - "learning_rate": 8.027807046286795e-06, - "loss": 0.3691, - "step": 14097 - }, - { - "epoch": 0.9213776877328279, - "grad_norm": 0.455952525138855, - "learning_rate": 8.027529153991659e-06, - "loss": 0.355, - "step": 14098 - }, - { - "epoch": 0.9214430429383701, - "grad_norm": 0.42456090450286865, - "learning_rate": 8.027251246930214e-06, - "loss": 0.342, - "step": 14099 - }, - { - "epoch": 0.9215083981439122, - "grad_norm": 0.4053103029727936, - "learning_rate": 8.026973325103818e-06, - "loss": 0.3009, - "step": 14100 - }, - { - "epoch": 0.9215737533494542, - "grad_norm": 0.44154906272888184, - "learning_rate": 8.026695388513822e-06, - "loss": 0.399, - "step": 14101 - }, - { - "epoch": 0.9216391085549964, - "grad_norm": 0.41678181290626526, - "learning_rate": 8.026417437161585e-06, - "loss": 0.3506, - "step": 14102 - }, - { - "epoch": 0.9217044637605385, - "grad_norm": 0.41899269819259644, - "learning_rate": 8.026139471048462e-06, - "loss": 0.3613, - "step": 14103 - }, - { - "epoch": 0.9217698189660807, - "grad_norm": 0.4102371037006378, - "learning_rate": 8.025861490175809e-06, - "loss": 0.3574, - "step": 14104 - }, - { - "epoch": 0.9218351741716228, - "grad_norm": 0.415998637676239, - "learning_rate": 8.025583494544979e-06, - "loss": 0.3489, - "step": 14105 - }, - { - "epoch": 0.9219005293771649, - "grad_norm": 0.4359937310218811, - "learning_rate": 8.025305484157332e-06, - "loss": 0.4027, - "step": 14106 - }, - { - "epoch": 0.921965884582707, - "grad_norm": 0.44541677832603455, - "learning_rate": 8.025027459014223e-06, - "loss": 0.3686, - "step": 14107 - }, - { - "epoch": 0.9220312397882491, - "grad_norm": 0.4568682014942169, - "learning_rate": 8.024749419117007e-06, - "loss": 0.3972, - "step": 14108 - }, - { - "epoch": 0.9220965949937913, - "grad_norm": 0.47529077529907227, - "learning_rate": 8.024471364467039e-06, - "loss": 0.4396, - "step": 14109 - }, - { - "epoch": 0.9221619501993333, - "grad_norm": 0.4220034182071686, - "learning_rate": 8.024193295065677e-06, - "loss": 0.336, - "step": 14110 - }, - { - "epoch": 0.9222273054048755, - "grad_norm": 0.45059680938720703, - "learning_rate": 8.023915210914274e-06, - "loss": 0.3526, - "step": 14111 - }, - { - "epoch": 0.9222926606104176, - "grad_norm": 0.4628525674343109, - "learning_rate": 8.023637112014192e-06, - "loss": 0.4058, - "step": 14112 - }, - { - "epoch": 0.9223580158159598, - "grad_norm": 0.42804980278015137, - "learning_rate": 8.023358998366783e-06, - "loss": 0.3568, - "step": 14113 - }, - { - "epoch": 0.9224233710215018, - "grad_norm": 0.4664314091205597, - "learning_rate": 8.023080869973405e-06, - "loss": 0.408, - "step": 14114 - }, - { - "epoch": 0.922488726227044, - "grad_norm": 0.43588483333587646, - "learning_rate": 8.022802726835415e-06, - "loss": 0.4116, - "step": 14115 - }, - { - "epoch": 0.9225540814325861, - "grad_norm": 0.43505632877349854, - "learning_rate": 8.022524568954169e-06, - "loss": 0.3795, - "step": 14116 - }, - { - "epoch": 0.9226194366381282, - "grad_norm": 0.4895874261856079, - "learning_rate": 8.022246396331022e-06, - "loss": 0.3742, - "step": 14117 - }, - { - "epoch": 0.9226847918436704, - "grad_norm": 0.4584488570690155, - "learning_rate": 8.021968208967334e-06, - "loss": 0.4031, - "step": 14118 - }, - { - "epoch": 0.9227501470492124, - "grad_norm": 0.4242425560951233, - "learning_rate": 8.021690006864459e-06, - "loss": 0.3383, - "step": 14119 - }, - { - "epoch": 0.9228155022547546, - "grad_norm": 0.4379027187824249, - "learning_rate": 8.021411790023755e-06, - "loss": 0.3827, - "step": 14120 - }, - { - "epoch": 0.9228808574602967, - "grad_norm": 0.4540347158908844, - "learning_rate": 8.02113355844658e-06, - "loss": 0.4023, - "step": 14121 - }, - { - "epoch": 0.9229462126658389, - "grad_norm": 0.528438925743103, - "learning_rate": 8.020855312134289e-06, - "loss": 0.4738, - "step": 14122 - }, - { - "epoch": 0.923011567871381, - "grad_norm": 0.4343968331813812, - "learning_rate": 8.020577051088241e-06, - "loss": 0.3494, - "step": 14123 - }, - { - "epoch": 0.9230769230769231, - "grad_norm": 0.41011497378349304, - "learning_rate": 8.020298775309792e-06, - "loss": 0.3222, - "step": 14124 - }, - { - "epoch": 0.9231422782824652, - "grad_norm": 0.42649218440055847, - "learning_rate": 8.0200204848003e-06, - "loss": 0.3851, - "step": 14125 - }, - { - "epoch": 0.9232076334880073, - "grad_norm": 0.4764016568660736, - "learning_rate": 8.019742179561119e-06, - "loss": 0.3977, - "step": 14126 - }, - { - "epoch": 0.9232729886935495, - "grad_norm": 0.4466722905635834, - "learning_rate": 8.019463859593613e-06, - "loss": 0.3403, - "step": 14127 - }, - { - "epoch": 0.9233383438990915, - "grad_norm": 0.41392815113067627, - "learning_rate": 8.019185524899133e-06, - "loss": 0.3264, - "step": 14128 - }, - { - "epoch": 0.9234036991046337, - "grad_norm": 0.4437607526779175, - "learning_rate": 8.018907175479041e-06, - "loss": 0.3684, - "step": 14129 - }, - { - "epoch": 0.9234690543101758, - "grad_norm": 0.4070267677307129, - "learning_rate": 8.018628811334693e-06, - "loss": 0.3307, - "step": 14130 - }, - { - "epoch": 0.923534409515718, - "grad_norm": 0.44076064229011536, - "learning_rate": 8.018350432467446e-06, - "loss": 0.3766, - "step": 14131 - }, - { - "epoch": 0.92359976472126, - "grad_norm": 0.4590109884738922, - "learning_rate": 8.018072038878657e-06, - "loss": 0.3683, - "step": 14132 - }, - { - "epoch": 0.9236651199268022, - "grad_norm": 0.4435414671897888, - "learning_rate": 8.017793630569689e-06, - "loss": 0.3853, - "step": 14133 - }, - { - "epoch": 0.9237304751323443, - "grad_norm": 0.44672533869743347, - "learning_rate": 8.017515207541892e-06, - "loss": 0.4068, - "step": 14134 - }, - { - "epoch": 0.9237958303378864, - "grad_norm": 0.41453424096107483, - "learning_rate": 8.017236769796628e-06, - "loss": 0.3774, - "step": 14135 - }, - { - "epoch": 0.9238611855434286, - "grad_norm": 0.4354002773761749, - "learning_rate": 8.016958317335257e-06, - "loss": 0.3926, - "step": 14136 - }, - { - "epoch": 0.9239265407489706, - "grad_norm": 0.43771126866340637, - "learning_rate": 8.016679850159134e-06, - "loss": 0.3681, - "step": 14137 - }, - { - "epoch": 0.9239918959545128, - "grad_norm": 0.3939455449581146, - "learning_rate": 8.016401368269618e-06, - "loss": 0.3074, - "step": 14138 - }, - { - "epoch": 0.9240572511600549, - "grad_norm": 0.5587556958198547, - "learning_rate": 8.016122871668068e-06, - "loss": 0.3394, - "step": 14139 - }, - { - "epoch": 0.9241226063655971, - "grad_norm": 0.4305996894836426, - "learning_rate": 8.015844360355841e-06, - "loss": 0.3716, - "step": 14140 - }, - { - "epoch": 0.9241879615711391, - "grad_norm": 0.45352616906166077, - "learning_rate": 8.0155658343343e-06, - "loss": 0.3955, - "step": 14141 - }, - { - "epoch": 0.9242533167766812, - "grad_norm": 0.4218350052833557, - "learning_rate": 8.015287293604796e-06, - "loss": 0.3251, - "step": 14142 - }, - { - "epoch": 0.9243186719822234, - "grad_norm": 0.44621509313583374, - "learning_rate": 8.015008738168692e-06, - "loss": 0.3738, - "step": 14143 - }, - { - "epoch": 0.9243840271877655, - "grad_norm": 0.4402707815170288, - "learning_rate": 8.014730168027345e-06, - "loss": 0.3782, - "step": 14144 - }, - { - "epoch": 0.9244493823933077, - "grad_norm": 0.4362731873989105, - "learning_rate": 8.014451583182117e-06, - "loss": 0.3671, - "step": 14145 - }, - { - "epoch": 0.9245147375988497, - "grad_norm": 0.4343254566192627, - "learning_rate": 8.014172983634363e-06, - "loss": 0.36, - "step": 14146 - }, - { - "epoch": 0.9245800928043919, - "grad_norm": 0.43919089436531067, - "learning_rate": 8.013894369385442e-06, - "loss": 0.3773, - "step": 14147 - }, - { - "epoch": 0.924645448009934, - "grad_norm": 0.490822970867157, - "learning_rate": 8.013615740436717e-06, - "loss": 0.4198, - "step": 14148 - }, - { - "epoch": 0.9247108032154762, - "grad_norm": 0.48890480399131775, - "learning_rate": 8.013337096789541e-06, - "loss": 0.4519, - "step": 14149 - }, - { - "epoch": 0.9247761584210182, - "grad_norm": 0.4303399324417114, - "learning_rate": 8.013058438445278e-06, - "loss": 0.3518, - "step": 14150 - }, - { - "epoch": 0.9248415136265603, - "grad_norm": 0.4564321041107178, - "learning_rate": 8.012779765405285e-06, - "loss": 0.4345, - "step": 14151 - }, - { - "epoch": 0.9249068688321025, - "grad_norm": 0.4025900661945343, - "learning_rate": 8.012501077670922e-06, - "loss": 0.3022, - "step": 14152 - }, - { - "epoch": 0.9249722240376446, - "grad_norm": 0.5529888272285461, - "learning_rate": 8.012222375243545e-06, - "loss": 0.565, - "step": 14153 - }, - { - "epoch": 0.9250375792431867, - "grad_norm": 0.4156367778778076, - "learning_rate": 8.011943658124516e-06, - "loss": 0.3465, - "step": 14154 - }, - { - "epoch": 0.9251029344487288, - "grad_norm": 0.4275127649307251, - "learning_rate": 8.011664926315197e-06, - "loss": 0.356, - "step": 14155 - }, - { - "epoch": 0.925168289654271, - "grad_norm": 0.47950467467308044, - "learning_rate": 8.011386179816944e-06, - "loss": 0.3727, - "step": 14156 - }, - { - "epoch": 0.9252336448598131, - "grad_norm": 0.4584794044494629, - "learning_rate": 8.011107418631117e-06, - "loss": 0.4177, - "step": 14157 - }, - { - "epoch": 0.9252990000653553, - "grad_norm": 0.4537228047847748, - "learning_rate": 8.010828642759076e-06, - "loss": 0.3941, - "step": 14158 - }, - { - "epoch": 0.9253643552708973, - "grad_norm": 0.46418997645378113, - "learning_rate": 8.01054985220218e-06, - "loss": 0.4151, - "step": 14159 - }, - { - "epoch": 0.9254297104764394, - "grad_norm": 0.42200714349746704, - "learning_rate": 8.010271046961791e-06, - "loss": 0.3486, - "step": 14160 - }, - { - "epoch": 0.9254950656819816, - "grad_norm": 0.4400850236415863, - "learning_rate": 8.009992227039264e-06, - "loss": 0.3802, - "step": 14161 - }, - { - "epoch": 0.9255604208875237, - "grad_norm": 0.4134165346622467, - "learning_rate": 8.009713392435966e-06, - "loss": 0.3162, - "step": 14162 - }, - { - "epoch": 0.9256257760930658, - "grad_norm": 0.4397551119327545, - "learning_rate": 8.009434543153252e-06, - "loss": 0.3634, - "step": 14163 - }, - { - "epoch": 0.9256911312986079, - "grad_norm": 0.43329912424087524, - "learning_rate": 8.009155679192482e-06, - "loss": 0.3662, - "step": 14164 - }, - { - "epoch": 0.9257564865041501, - "grad_norm": 0.44430699944496155, - "learning_rate": 8.008876800555018e-06, - "loss": 0.3675, - "step": 14165 - }, - { - "epoch": 0.9258218417096922, - "grad_norm": 0.424127995967865, - "learning_rate": 8.00859790724222e-06, - "loss": 0.3598, - "step": 14166 - }, - { - "epoch": 0.9258871969152342, - "grad_norm": 0.41890501976013184, - "learning_rate": 8.008318999255447e-06, - "loss": 0.3285, - "step": 14167 - }, - { - "epoch": 0.9259525521207764, - "grad_norm": 0.4419791102409363, - "learning_rate": 8.008040076596059e-06, - "loss": 0.4035, - "step": 14168 - }, - { - "epoch": 0.9260179073263185, - "grad_norm": 0.4523894786834717, - "learning_rate": 8.00776113926542e-06, - "loss": 0.4088, - "step": 14169 - }, - { - "epoch": 0.9260832625318607, - "grad_norm": 0.4440203905105591, - "learning_rate": 8.007482187264885e-06, - "loss": 0.3575, - "step": 14170 - }, - { - "epoch": 0.9261486177374028, - "grad_norm": 0.4431575834751129, - "learning_rate": 8.007203220595821e-06, - "loss": 0.4295, - "step": 14171 - }, - { - "epoch": 0.9262139729429449, - "grad_norm": 0.46677714586257935, - "learning_rate": 8.006924239259582e-06, - "loss": 0.4087, - "step": 14172 - }, - { - "epoch": 0.926279328148487, - "grad_norm": 0.40673959255218506, - "learning_rate": 8.006645243257534e-06, - "loss": 0.3306, - "step": 14173 - }, - { - "epoch": 0.9263446833540292, - "grad_norm": 0.45933017134666443, - "learning_rate": 8.006366232591035e-06, - "loss": 0.4008, - "step": 14174 - }, - { - "epoch": 0.9264100385595713, - "grad_norm": 0.42018696665763855, - "learning_rate": 8.006087207261445e-06, - "loss": 0.3508, - "step": 14175 - }, - { - "epoch": 0.9264753937651133, - "grad_norm": 0.42296692728996277, - "learning_rate": 8.005808167270126e-06, - "loss": 0.3451, - "step": 14176 - }, - { - "epoch": 0.9265407489706555, - "grad_norm": 0.43631839752197266, - "learning_rate": 8.00552911261844e-06, - "loss": 0.3254, - "step": 14177 - }, - { - "epoch": 0.9266061041761976, - "grad_norm": 0.47382402420043945, - "learning_rate": 8.005250043307749e-06, - "loss": 0.4232, - "step": 14178 - }, - { - "epoch": 0.9266714593817398, - "grad_norm": 0.43479087948799133, - "learning_rate": 8.004970959339411e-06, - "loss": 0.3538, - "step": 14179 - }, - { - "epoch": 0.9267368145872819, - "grad_norm": 0.44161510467529297, - "learning_rate": 8.004691860714788e-06, - "loss": 0.3582, - "step": 14180 - }, - { - "epoch": 0.926802169792824, - "grad_norm": 0.4279502332210541, - "learning_rate": 8.004412747435244e-06, - "loss": 0.3639, - "step": 14181 - }, - { - "epoch": 0.9268675249983661, - "grad_norm": 0.432258278131485, - "learning_rate": 8.004133619502137e-06, - "loss": 0.3585, - "step": 14182 - }, - { - "epoch": 0.9269328802039083, - "grad_norm": 0.43750303983688354, - "learning_rate": 8.003854476916828e-06, - "loss": 0.3829, - "step": 14183 - }, - { - "epoch": 0.9269982354094504, - "grad_norm": 0.42415958642959595, - "learning_rate": 8.003575319680682e-06, - "loss": 0.3415, - "step": 14184 - }, - { - "epoch": 0.9270635906149924, - "grad_norm": 0.4484017491340637, - "learning_rate": 8.003296147795058e-06, - "loss": 0.3768, - "step": 14185 - }, - { - "epoch": 0.9271289458205346, - "grad_norm": 0.4493042826652527, - "learning_rate": 8.003016961261318e-06, - "loss": 0.373, - "step": 14186 - }, - { - "epoch": 0.9271943010260767, - "grad_norm": 0.43740928173065186, - "learning_rate": 8.002737760080826e-06, - "loss": 0.3696, - "step": 14187 - }, - { - "epoch": 0.9272596562316189, - "grad_norm": 0.42805278301239014, - "learning_rate": 8.002458544254939e-06, - "loss": 0.3669, - "step": 14188 - }, - { - "epoch": 0.927325011437161, - "grad_norm": 0.4790594279766083, - "learning_rate": 8.002179313785023e-06, - "loss": 0.4044, - "step": 14189 - }, - { - "epoch": 0.9273903666427031, - "grad_norm": 0.5047306418418884, - "learning_rate": 8.001900068672438e-06, - "loss": 0.4053, - "step": 14190 - }, - { - "epoch": 0.9274557218482452, - "grad_norm": 0.490181028842926, - "learning_rate": 8.001620808918546e-06, - "loss": 0.4357, - "step": 14191 - }, - { - "epoch": 0.9275210770537873, - "grad_norm": 0.44902509450912476, - "learning_rate": 8.00134153452471e-06, - "loss": 0.391, - "step": 14192 - }, - { - "epoch": 0.9275864322593295, - "grad_norm": 0.43170663714408875, - "learning_rate": 8.001062245492291e-06, - "loss": 0.3439, - "step": 14193 - }, - { - "epoch": 0.9276517874648715, - "grad_norm": 0.3930788040161133, - "learning_rate": 8.000782941822653e-06, - "loss": 0.2906, - "step": 14194 - }, - { - "epoch": 0.9277171426704137, - "grad_norm": 0.4794231355190277, - "learning_rate": 8.000503623517155e-06, - "loss": 0.3957, - "step": 14195 - }, - { - "epoch": 0.9277824978759558, - "grad_norm": 0.4684624969959259, - "learning_rate": 8.000224290577164e-06, - "loss": 0.4034, - "step": 14196 - }, - { - "epoch": 0.927847853081498, - "grad_norm": 0.4549058675765991, - "learning_rate": 7.999944943004036e-06, - "loss": 0.4003, - "step": 14197 - }, - { - "epoch": 0.92791320828704, - "grad_norm": 0.45974260568618774, - "learning_rate": 7.999665580799142e-06, - "loss": 0.405, - "step": 14198 - }, - { - "epoch": 0.9279785634925822, - "grad_norm": 0.47040218114852905, - "learning_rate": 7.999386203963836e-06, - "loss": 0.4308, - "step": 14199 - }, - { - "epoch": 0.9280439186981243, - "grad_norm": 0.524333119392395, - "learning_rate": 7.999106812499486e-06, - "loss": 0.5041, - "step": 14200 - }, - { - "epoch": 0.9281092739036664, - "grad_norm": 0.4343332350254059, - "learning_rate": 7.998827406407453e-06, - "loss": 0.3523, - "step": 14201 - }, - { - "epoch": 0.9281746291092086, - "grad_norm": 0.46028608083724976, - "learning_rate": 7.9985479856891e-06, - "loss": 0.3914, - "step": 14202 - }, - { - "epoch": 0.9282399843147506, - "grad_norm": 0.4479673206806183, - "learning_rate": 7.998268550345788e-06, - "loss": 0.3111, - "step": 14203 - }, - { - "epoch": 0.9283053395202928, - "grad_norm": 0.46122410893440247, - "learning_rate": 7.997989100378883e-06, - "loss": 0.4118, - "step": 14204 - }, - { - "epoch": 0.9283706947258349, - "grad_norm": 0.4570966362953186, - "learning_rate": 7.997709635789746e-06, - "loss": 0.39, - "step": 14205 - }, - { - "epoch": 0.9284360499313771, - "grad_norm": 0.42992284893989563, - "learning_rate": 7.997430156579742e-06, - "loss": 0.3462, - "step": 14206 - }, - { - "epoch": 0.9285014051369191, - "grad_norm": 0.4584921598434448, - "learning_rate": 7.997150662750232e-06, - "loss": 0.3759, - "step": 14207 - }, - { - "epoch": 0.9285667603424613, - "grad_norm": 0.45437732338905334, - "learning_rate": 7.99687115430258e-06, - "loss": 0.3729, - "step": 14208 - }, - { - "epoch": 0.9286321155480034, - "grad_norm": 0.47542712092399597, - "learning_rate": 7.996591631238149e-06, - "loss": 0.3741, - "step": 14209 - }, - { - "epoch": 0.9286974707535455, - "grad_norm": 0.4160076975822449, - "learning_rate": 7.9963120935583e-06, - "loss": 0.3752, - "step": 14210 - }, - { - "epoch": 0.9287628259590877, - "grad_norm": 0.4273441731929779, - "learning_rate": 7.996032541264403e-06, - "loss": 0.3467, - "step": 14211 - }, - { - "epoch": 0.9288281811646297, - "grad_norm": 0.4041471481323242, - "learning_rate": 7.995752974357816e-06, - "loss": 0.2815, - "step": 14212 - }, - { - "epoch": 0.9288935363701719, - "grad_norm": 0.4183214008808136, - "learning_rate": 7.995473392839903e-06, - "loss": 0.3351, - "step": 14213 - }, - { - "epoch": 0.928958891575714, - "grad_norm": 0.4557066857814789, - "learning_rate": 7.995193796712028e-06, - "loss": 0.3843, - "step": 14214 - }, - { - "epoch": 0.9290242467812562, - "grad_norm": 0.4735686779022217, - "learning_rate": 7.994914185975557e-06, - "loss": 0.4101, - "step": 14215 - }, - { - "epoch": 0.9290896019867982, - "grad_norm": 0.43914374709129333, - "learning_rate": 7.99463456063185e-06, - "loss": 0.3932, - "step": 14216 - }, - { - "epoch": 0.9291549571923404, - "grad_norm": 0.42830193042755127, - "learning_rate": 7.994354920682276e-06, - "loss": 0.3551, - "step": 14217 - }, - { - "epoch": 0.9292203123978825, - "grad_norm": 0.4560054838657379, - "learning_rate": 7.994075266128194e-06, - "loss": 0.369, - "step": 14218 - }, - { - "epoch": 0.9292856676034246, - "grad_norm": 0.4357489347457886, - "learning_rate": 7.993795596970968e-06, - "loss": 0.3852, - "step": 14219 - }, - { - "epoch": 0.9293510228089668, - "grad_norm": 0.42314496636390686, - "learning_rate": 7.993515913211967e-06, - "loss": 0.3185, - "step": 14220 - }, - { - "epoch": 0.9294163780145088, - "grad_norm": 0.4712805151939392, - "learning_rate": 7.993236214852548e-06, - "loss": 0.3932, - "step": 14221 - }, - { - "epoch": 0.929481733220051, - "grad_norm": 0.43544238805770874, - "learning_rate": 7.992956501894083e-06, - "loss": 0.376, - "step": 14222 - }, - { - "epoch": 0.9295470884255931, - "grad_norm": 0.4176951050758362, - "learning_rate": 7.992676774337928e-06, - "loss": 0.3721, - "step": 14223 - }, - { - "epoch": 0.9296124436311353, - "grad_norm": 0.45315301418304443, - "learning_rate": 7.992397032185455e-06, - "loss": 0.3852, - "step": 14224 - }, - { - "epoch": 0.9296777988366773, - "grad_norm": 0.47756463289260864, - "learning_rate": 7.992117275438024e-06, - "loss": 0.408, - "step": 14225 - }, - { - "epoch": 0.9297431540422194, - "grad_norm": 0.5052573084831238, - "learning_rate": 7.991837504097e-06, - "loss": 0.4338, - "step": 14226 - }, - { - "epoch": 0.9298085092477616, - "grad_norm": 0.4030114412307739, - "learning_rate": 7.99155771816375e-06, - "loss": 0.3056, - "step": 14227 - }, - { - "epoch": 0.9298738644533037, - "grad_norm": 0.45975929498672485, - "learning_rate": 7.991277917639633e-06, - "loss": 0.3831, - "step": 14228 - }, - { - "epoch": 0.9299392196588459, - "grad_norm": 0.49597951769828796, - "learning_rate": 7.99099810252602e-06, - "loss": 0.3914, - "step": 14229 - }, - { - "epoch": 0.9300045748643879, - "grad_norm": 0.4525391161441803, - "learning_rate": 7.990718272824271e-06, - "loss": 0.3737, - "step": 14230 - }, - { - "epoch": 0.9300699300699301, - "grad_norm": 0.4771629869937897, - "learning_rate": 7.990438428535757e-06, - "loss": 0.4152, - "step": 14231 - }, - { - "epoch": 0.9301352852754722, - "grad_norm": 0.46415191888809204, - "learning_rate": 7.990158569661836e-06, - "loss": 0.3592, - "step": 14232 - }, - { - "epoch": 0.9302006404810144, - "grad_norm": 0.43217816948890686, - "learning_rate": 7.989878696203875e-06, - "loss": 0.384, - "step": 14233 - }, - { - "epoch": 0.9302659956865564, - "grad_norm": 0.4254879057407379, - "learning_rate": 7.98959880816324e-06, - "loss": 0.3658, - "step": 14234 - }, - { - "epoch": 0.9303313508920985, - "grad_norm": 0.4584832787513733, - "learning_rate": 7.989318905541297e-06, - "loss": 0.3765, - "step": 14235 - }, - { - "epoch": 0.9303967060976407, - "grad_norm": 0.46705707907676697, - "learning_rate": 7.98903898833941e-06, - "loss": 0.4238, - "step": 14236 - }, - { - "epoch": 0.9304620613031828, - "grad_norm": 0.436081200838089, - "learning_rate": 7.988759056558945e-06, - "loss": 0.3584, - "step": 14237 - }, - { - "epoch": 0.930527416508725, - "grad_norm": 0.4634794592857361, - "learning_rate": 7.988479110201266e-06, - "loss": 0.3619, - "step": 14238 - }, - { - "epoch": 0.930592771714267, - "grad_norm": 0.47877487540245056, - "learning_rate": 7.988199149267737e-06, - "loss": 0.4499, - "step": 14239 - }, - { - "epoch": 0.9306581269198092, - "grad_norm": 0.4497900605201721, - "learning_rate": 7.987919173759728e-06, - "loss": 0.3661, - "step": 14240 - }, - { - "epoch": 0.9307234821253513, - "grad_norm": 0.44208231568336487, - "learning_rate": 7.987639183678601e-06, - "loss": 0.3839, - "step": 14241 - }, - { - "epoch": 0.9307888373308935, - "grad_norm": 0.472713440656662, - "learning_rate": 7.987359179025725e-06, - "loss": 0.412, - "step": 14242 - }, - { - "epoch": 0.9308541925364355, - "grad_norm": 0.4360504448413849, - "learning_rate": 7.98707915980246e-06, - "loss": 0.372, - "step": 14243 - }, - { - "epoch": 0.9309195477419776, - "grad_norm": 0.46010735630989075, - "learning_rate": 7.986799126010176e-06, - "loss": 0.3819, - "step": 14244 - }, - { - "epoch": 0.9309849029475198, - "grad_norm": 0.41593727469444275, - "learning_rate": 7.986519077650239e-06, - "loss": 0.3237, - "step": 14245 - }, - { - "epoch": 0.9310502581530619, - "grad_norm": 0.45757660269737244, - "learning_rate": 7.986239014724013e-06, - "loss": 0.3619, - "step": 14246 - }, - { - "epoch": 0.931115613358604, - "grad_norm": 0.4612928330898285, - "learning_rate": 7.985958937232865e-06, - "loss": 0.3523, - "step": 14247 - }, - { - "epoch": 0.9311809685641461, - "grad_norm": 0.4958532452583313, - "learning_rate": 7.985678845178162e-06, - "loss": 0.4336, - "step": 14248 - }, - { - "epoch": 0.9312463237696883, - "grad_norm": 0.43681925535202026, - "learning_rate": 7.985398738561267e-06, - "loss": 0.3707, - "step": 14249 - }, - { - "epoch": 0.9313116789752304, - "grad_norm": 0.44724389910697937, - "learning_rate": 7.98511861738355e-06, - "loss": 0.3772, - "step": 14250 - }, - { - "epoch": 0.9313770341807724, - "grad_norm": 0.4231940805912018, - "learning_rate": 7.984838481646374e-06, - "loss": 0.3316, - "step": 14251 - }, - { - "epoch": 0.9314423893863146, - "grad_norm": 0.4736866056919098, - "learning_rate": 7.984558331351106e-06, - "loss": 0.3722, - "step": 14252 - }, - { - "epoch": 0.9315077445918567, - "grad_norm": 0.41867658495903015, - "learning_rate": 7.984278166499116e-06, - "loss": 0.3261, - "step": 14253 - }, - { - "epoch": 0.9315730997973989, - "grad_norm": 0.4349023401737213, - "learning_rate": 7.983997987091765e-06, - "loss": 0.3559, - "step": 14254 - }, - { - "epoch": 0.931638455002941, - "grad_norm": 0.46389687061309814, - "learning_rate": 7.983717793130423e-06, - "loss": 0.3973, - "step": 14255 - }, - { - "epoch": 0.9317038102084831, - "grad_norm": 0.4258340299129486, - "learning_rate": 7.983437584616455e-06, - "loss": 0.3873, - "step": 14256 - }, - { - "epoch": 0.9317691654140252, - "grad_norm": 0.4512598216533661, - "learning_rate": 7.983157361551229e-06, - "loss": 0.3656, - "step": 14257 - }, - { - "epoch": 0.9318345206195674, - "grad_norm": 0.5446600317955017, - "learning_rate": 7.98287712393611e-06, - "loss": 0.4037, - "step": 14258 - }, - { - "epoch": 0.9318998758251095, - "grad_norm": 0.437092125415802, - "learning_rate": 7.982596871772468e-06, - "loss": 0.3498, - "step": 14259 - }, - { - "epoch": 0.9319652310306515, - "grad_norm": 1.145544409751892, - "learning_rate": 7.982316605061665e-06, - "loss": 0.3887, - "step": 14260 - }, - { - "epoch": 0.9320305862361937, - "grad_norm": 0.42923155426979065, - "learning_rate": 7.982036323805074e-06, - "loss": 0.3733, - "step": 14261 - }, - { - "epoch": 0.9320959414417358, - "grad_norm": 0.43429166078567505, - "learning_rate": 7.981756028004054e-06, - "loss": 0.3385, - "step": 14262 - }, - { - "epoch": 0.932161296647278, - "grad_norm": 0.47522273659706116, - "learning_rate": 7.98147571765998e-06, - "loss": 0.3935, - "step": 14263 - }, - { - "epoch": 0.93222665185282, - "grad_norm": 0.4046888053417206, - "learning_rate": 7.981195392774218e-06, - "loss": 0.3657, - "step": 14264 - }, - { - "epoch": 0.9322920070583622, - "grad_norm": 0.43544861674308777, - "learning_rate": 7.98091505334813e-06, - "loss": 0.3991, - "step": 14265 - }, - { - "epoch": 0.9323573622639043, - "grad_norm": 0.4282855987548828, - "learning_rate": 7.98063469938309e-06, - "loss": 0.3398, - "step": 14266 - }, - { - "epoch": 0.9324227174694465, - "grad_norm": 0.4156448245048523, - "learning_rate": 7.980354330880458e-06, - "loss": 0.3609, - "step": 14267 - }, - { - "epoch": 0.9324880726749886, - "grad_norm": 0.48186618089675903, - "learning_rate": 7.980073947841607e-06, - "loss": 0.4331, - "step": 14268 - }, - { - "epoch": 0.9325534278805306, - "grad_norm": 0.45654037594795227, - "learning_rate": 7.979793550267904e-06, - "loss": 0.406, - "step": 14269 - }, - { - "epoch": 0.9326187830860728, - "grad_norm": 0.4358753561973572, - "learning_rate": 7.979513138160716e-06, - "loss": 0.3524, - "step": 14270 - }, - { - "epoch": 0.9326841382916149, - "grad_norm": 0.4549356698989868, - "learning_rate": 7.979232711521407e-06, - "loss": 0.3817, - "step": 14271 - }, - { - "epoch": 0.9327494934971571, - "grad_norm": 0.41262155771255493, - "learning_rate": 7.97895227035135e-06, - "loss": 0.3298, - "step": 14272 - }, - { - "epoch": 0.9328148487026992, - "grad_norm": 0.40901660919189453, - "learning_rate": 7.97867181465191e-06, - "loss": 0.3205, - "step": 14273 - }, - { - "epoch": 0.9328802039082413, - "grad_norm": 0.4325229525566101, - "learning_rate": 7.978391344424457e-06, - "loss": 0.3487, - "step": 14274 - }, - { - "epoch": 0.9329455591137834, - "grad_norm": 0.42647942900657654, - "learning_rate": 7.978110859670358e-06, - "loss": 0.3353, - "step": 14275 - }, - { - "epoch": 0.9330109143193255, - "grad_norm": 0.4505908489227295, - "learning_rate": 7.97783036039098e-06, - "loss": 0.4073, - "step": 14276 - }, - { - "epoch": 0.9330762695248677, - "grad_norm": 0.4391935467720032, - "learning_rate": 7.977549846587691e-06, - "loss": 0.3993, - "step": 14277 - }, - { - "epoch": 0.9331416247304097, - "grad_norm": 0.45251569151878357, - "learning_rate": 7.97726931826186e-06, - "loss": 0.3808, - "step": 14278 - }, - { - "epoch": 0.9332069799359519, - "grad_norm": 0.4612550735473633, - "learning_rate": 7.976988775414855e-06, - "loss": 0.383, - "step": 14279 - }, - { - "epoch": 0.933272335141494, - "grad_norm": 0.40265682339668274, - "learning_rate": 7.976708218048044e-06, - "loss": 0.3279, - "step": 14280 - }, - { - "epoch": 0.9333376903470362, - "grad_norm": 0.46680349111557007, - "learning_rate": 7.976427646162796e-06, - "loss": 0.4127, - "step": 14281 - }, - { - "epoch": 0.9334030455525782, - "grad_norm": 0.4239867031574249, - "learning_rate": 7.97614705976048e-06, - "loss": 0.341, - "step": 14282 - }, - { - "epoch": 0.9334684007581204, - "grad_norm": 0.48194319009780884, - "learning_rate": 7.975866458842463e-06, - "loss": 0.4154, - "step": 14283 - }, - { - "epoch": 0.9335337559636625, - "grad_norm": 0.5396852493286133, - "learning_rate": 7.975585843410115e-06, - "loss": 0.3535, - "step": 14284 - }, - { - "epoch": 0.9335991111692046, - "grad_norm": 0.45082712173461914, - "learning_rate": 7.975305213464805e-06, - "loss": 0.3234, - "step": 14285 - }, - { - "epoch": 0.9336644663747468, - "grad_norm": 0.4681273102760315, - "learning_rate": 7.975024569007899e-06, - "loss": 0.4297, - "step": 14286 - }, - { - "epoch": 0.9337298215802888, - "grad_norm": 0.4718344211578369, - "learning_rate": 7.974743910040768e-06, - "loss": 0.4079, - "step": 14287 - }, - { - "epoch": 0.933795176785831, - "grad_norm": 0.4437362849712372, - "learning_rate": 7.97446323656478e-06, - "loss": 0.3928, - "step": 14288 - }, - { - "epoch": 0.9338605319913731, - "grad_norm": 0.44397616386413574, - "learning_rate": 7.974182548581305e-06, - "loss": 0.3746, - "step": 14289 - }, - { - "epoch": 0.9339258871969153, - "grad_norm": 0.44536322355270386, - "learning_rate": 7.973901846091712e-06, - "loss": 0.3745, - "step": 14290 - }, - { - "epoch": 0.9339912424024573, - "grad_norm": 0.47594279050827026, - "learning_rate": 7.973621129097369e-06, - "loss": 0.3749, - "step": 14291 - }, - { - "epoch": 0.9340565976079995, - "grad_norm": 0.464814692735672, - "learning_rate": 7.973340397599644e-06, - "loss": 0.379, - "step": 14292 - }, - { - "epoch": 0.9341219528135416, - "grad_norm": 0.4069635570049286, - "learning_rate": 7.973059651599908e-06, - "loss": 0.2935, - "step": 14293 - }, - { - "epoch": 0.9341873080190837, - "grad_norm": 0.45391401648521423, - "learning_rate": 7.972778891099532e-06, - "loss": 0.3521, - "step": 14294 - }, - { - "epoch": 0.9342526632246259, - "grad_norm": 0.48291680216789246, - "learning_rate": 7.972498116099882e-06, - "loss": 0.4188, - "step": 14295 - }, - { - "epoch": 0.9343180184301679, - "grad_norm": 0.4733894169330597, - "learning_rate": 7.972217326602331e-06, - "loss": 0.4372, - "step": 14296 - }, - { - "epoch": 0.9343833736357101, - "grad_norm": 0.4554188549518585, - "learning_rate": 7.971936522608245e-06, - "loss": 0.3554, - "step": 14297 - }, - { - "epoch": 0.9344487288412522, - "grad_norm": 0.48150956630706787, - "learning_rate": 7.971655704118994e-06, - "loss": 0.4032, - "step": 14298 - }, - { - "epoch": 0.9345140840467944, - "grad_norm": 0.42098093032836914, - "learning_rate": 7.971374871135951e-06, - "loss": 0.3273, - "step": 14299 - }, - { - "epoch": 0.9345794392523364, - "grad_norm": 0.44666042923927307, - "learning_rate": 7.97109402366048e-06, - "loss": 0.3735, - "step": 14300 - }, - { - "epoch": 0.9346447944578786, - "grad_norm": 0.42981797456741333, - "learning_rate": 7.970813161693957e-06, - "loss": 0.3409, - "step": 14301 - }, - { - "epoch": 0.9347101496634207, - "grad_norm": 0.4457097053527832, - "learning_rate": 7.970532285237749e-06, - "loss": 0.3904, - "step": 14302 - }, - { - "epoch": 0.9347755048689628, - "grad_norm": 0.4529748558998108, - "learning_rate": 7.970251394293225e-06, - "loss": 0.3798, - "step": 14303 - }, - { - "epoch": 0.934840860074505, - "grad_norm": 0.4386380910873413, - "learning_rate": 7.969970488861757e-06, - "loss": 0.352, - "step": 14304 - }, - { - "epoch": 0.934906215280047, - "grad_norm": 0.4380452334880829, - "learning_rate": 7.969689568944711e-06, - "loss": 0.3498, - "step": 14305 - }, - { - "epoch": 0.9349715704855892, - "grad_norm": 0.45035868883132935, - "learning_rate": 7.969408634543462e-06, - "loss": 0.389, - "step": 14306 - }, - { - "epoch": 0.9350369256911313, - "grad_norm": 0.45483487844467163, - "learning_rate": 7.96912768565938e-06, - "loss": 0.3827, - "step": 14307 - }, - { - "epoch": 0.9351022808966735, - "grad_norm": 0.41082796454429626, - "learning_rate": 7.96884672229383e-06, - "loss": 0.3463, - "step": 14308 - }, - { - "epoch": 0.9351676361022155, - "grad_norm": 0.59320068359375, - "learning_rate": 7.968565744448188e-06, - "loss": 0.3686, - "step": 14309 - }, - { - "epoch": 0.9352329913077576, - "grad_norm": 0.4542233347892761, - "learning_rate": 7.968284752123825e-06, - "loss": 0.4166, - "step": 14310 - }, - { - "epoch": 0.9352983465132998, - "grad_norm": 0.4471418261528015, - "learning_rate": 7.968003745322106e-06, - "loss": 0.3917, - "step": 14311 - }, - { - "epoch": 0.9353637017188419, - "grad_norm": 0.47591525316238403, - "learning_rate": 7.967722724044404e-06, - "loss": 0.4491, - "step": 14312 - }, - { - "epoch": 0.935429056924384, - "grad_norm": 0.4212082624435425, - "learning_rate": 7.967441688292093e-06, - "loss": 0.3488, - "step": 14313 - }, - { - "epoch": 0.9354944121299261, - "grad_norm": 0.4605009853839874, - "learning_rate": 7.967160638066537e-06, - "loss": 0.4503, - "step": 14314 - }, - { - "epoch": 0.9355597673354683, - "grad_norm": 0.4623265564441681, - "learning_rate": 7.966879573369115e-06, - "loss": 0.4164, - "step": 14315 - }, - { - "epoch": 0.9356251225410104, - "grad_norm": 0.42452770471572876, - "learning_rate": 7.966598494201192e-06, - "loss": 0.3894, - "step": 14316 - }, - { - "epoch": 0.9356904777465526, - "grad_norm": 0.42500030994415283, - "learning_rate": 7.966317400564139e-06, - "loss": 0.362, - "step": 14317 - }, - { - "epoch": 0.9357558329520946, - "grad_norm": 0.4604122042655945, - "learning_rate": 7.96603629245933e-06, - "loss": 0.4283, - "step": 14318 - }, - { - "epoch": 0.9358211881576367, - "grad_norm": 0.4024207890033722, - "learning_rate": 7.965755169888132e-06, - "loss": 0.3342, - "step": 14319 - }, - { - "epoch": 0.9358865433631789, - "grad_norm": 0.4626029133796692, - "learning_rate": 7.965474032851921e-06, - "loss": 0.3919, - "step": 14320 - }, - { - "epoch": 0.935951898568721, - "grad_norm": 0.46209120750427246, - "learning_rate": 7.965192881352067e-06, - "loss": 0.3819, - "step": 14321 - }, - { - "epoch": 0.9360172537742631, - "grad_norm": 0.4297623038291931, - "learning_rate": 7.964911715389937e-06, - "loss": 0.3824, - "step": 14322 - }, - { - "epoch": 0.9360826089798052, - "grad_norm": 0.4633261561393738, - "learning_rate": 7.964630534966906e-06, - "loss": 0.4345, - "step": 14323 - }, - { - "epoch": 0.9361479641853474, - "grad_norm": 0.444621205329895, - "learning_rate": 7.964349340084345e-06, - "loss": 0.3714, - "step": 14324 - }, - { - "epoch": 0.9362133193908895, - "grad_norm": 0.43743178248405457, - "learning_rate": 7.964068130743626e-06, - "loss": 0.3333, - "step": 14325 - }, - { - "epoch": 0.9362786745964317, - "grad_norm": 0.45553866028785706, - "learning_rate": 7.96378690694612e-06, - "loss": 0.3631, - "step": 14326 - }, - { - "epoch": 0.9363440298019737, - "grad_norm": 0.40102672576904297, - "learning_rate": 7.963505668693197e-06, - "loss": 0.3029, - "step": 14327 - }, - { - "epoch": 0.9364093850075158, - "grad_norm": 0.42924097180366516, - "learning_rate": 7.963224415986233e-06, - "loss": 0.3497, - "step": 14328 - }, - { - "epoch": 0.936474740213058, - "grad_norm": 0.46029043197631836, - "learning_rate": 7.962943148826595e-06, - "loss": 0.3981, - "step": 14329 - }, - { - "epoch": 0.9365400954186001, - "grad_norm": 0.41947486996650696, - "learning_rate": 7.962661867215658e-06, - "loss": 0.3515, - "step": 14330 - }, - { - "epoch": 0.9366054506241422, - "grad_norm": 0.44270771741867065, - "learning_rate": 7.962380571154791e-06, - "loss": 0.3921, - "step": 14331 - }, - { - "epoch": 0.9366708058296843, - "grad_norm": 0.43892335891723633, - "learning_rate": 7.962099260645369e-06, - "loss": 0.3395, - "step": 14332 - }, - { - "epoch": 0.9367361610352265, - "grad_norm": 0.4416338801383972, - "learning_rate": 7.96181793568876e-06, - "loss": 0.3705, - "step": 14333 - }, - { - "epoch": 0.9368015162407686, - "grad_norm": 0.43199318647384644, - "learning_rate": 7.961536596286343e-06, - "loss": 0.3922, - "step": 14334 - }, - { - "epoch": 0.9368668714463106, - "grad_norm": 0.44440150260925293, - "learning_rate": 7.961255242439483e-06, - "loss": 0.3825, - "step": 14335 - }, - { - "epoch": 0.9369322266518528, - "grad_norm": 0.4097616374492645, - "learning_rate": 7.960973874149555e-06, - "loss": 0.3482, - "step": 14336 - }, - { - "epoch": 0.9369975818573949, - "grad_norm": 0.5028608441352844, - "learning_rate": 7.960692491417934e-06, - "loss": 0.4794, - "step": 14337 - }, - { - "epoch": 0.9370629370629371, - "grad_norm": 0.4232047200202942, - "learning_rate": 7.96041109424599e-06, - "loss": 0.3595, - "step": 14338 - }, - { - "epoch": 0.9371282922684792, - "grad_norm": 0.4544631540775299, - "learning_rate": 7.960129682635095e-06, - "loss": 0.4215, - "step": 14339 - }, - { - "epoch": 0.9371936474740213, - "grad_norm": 0.4575744569301605, - "learning_rate": 7.959848256586621e-06, - "loss": 0.4379, - "step": 14340 - }, - { - "epoch": 0.9372590026795634, - "grad_norm": 0.43337470293045044, - "learning_rate": 7.959566816101943e-06, - "loss": 0.3786, - "step": 14341 - }, - { - "epoch": 0.9373243578851056, - "grad_norm": 0.4470236897468567, - "learning_rate": 7.959285361182432e-06, - "loss": 0.4205, - "step": 14342 - }, - { - "epoch": 0.9373897130906477, - "grad_norm": 0.4310753047466278, - "learning_rate": 7.95900389182946e-06, - "loss": 0.3795, - "step": 14343 - }, - { - "epoch": 0.9374550682961897, - "grad_norm": 0.4945099651813507, - "learning_rate": 7.958722408044401e-06, - "loss": 0.4124, - "step": 14344 - }, - { - "epoch": 0.9375204235017319, - "grad_norm": 0.4588758647441864, - "learning_rate": 7.958440909828629e-06, - "loss": 0.4088, - "step": 14345 - }, - { - "epoch": 0.937585778707274, - "grad_norm": 0.440149188041687, - "learning_rate": 7.958159397183516e-06, - "loss": 0.3932, - "step": 14346 - }, - { - "epoch": 0.9376511339128162, - "grad_norm": 0.4193704426288605, - "learning_rate": 7.957877870110434e-06, - "loss": 0.3608, - "step": 14347 - }, - { - "epoch": 0.9377164891183583, - "grad_norm": 0.4371584951877594, - "learning_rate": 7.957596328610757e-06, - "loss": 0.377, - "step": 14348 - }, - { - "epoch": 0.9377818443239004, - "grad_norm": 0.44903409481048584, - "learning_rate": 7.957314772685858e-06, - "loss": 0.3684, - "step": 14349 - }, - { - "epoch": 0.9378471995294425, - "grad_norm": 0.42451244592666626, - "learning_rate": 7.95703320233711e-06, - "loss": 0.3402, - "step": 14350 - }, - { - "epoch": 0.9379125547349847, - "grad_norm": 0.444200336933136, - "learning_rate": 7.956751617565888e-06, - "loss": 0.3602, - "step": 14351 - }, - { - "epoch": 0.9379779099405268, - "grad_norm": 0.4307136833667755, - "learning_rate": 7.956470018373563e-06, - "loss": 0.3421, - "step": 14352 - }, - { - "epoch": 0.9380432651460688, - "grad_norm": 0.4414706826210022, - "learning_rate": 7.95618840476151e-06, - "loss": 0.344, - "step": 14353 - }, - { - "epoch": 0.938108620351611, - "grad_norm": 0.4512485861778259, - "learning_rate": 7.9559067767311e-06, - "loss": 0.3523, - "step": 14354 - }, - { - "epoch": 0.9381739755571531, - "grad_norm": 0.40102237462997437, - "learning_rate": 7.955625134283712e-06, - "loss": 0.3295, - "step": 14355 - }, - { - "epoch": 0.9382393307626953, - "grad_norm": 0.4707501530647278, - "learning_rate": 7.955343477420714e-06, - "loss": 0.3757, - "step": 14356 - }, - { - "epoch": 0.9383046859682374, - "grad_norm": 0.4292069971561432, - "learning_rate": 7.955061806143483e-06, - "loss": 0.3438, - "step": 14357 - }, - { - "epoch": 0.9383700411737795, - "grad_norm": 0.4546838402748108, - "learning_rate": 7.954780120453392e-06, - "loss": 0.3824, - "step": 14358 - }, - { - "epoch": 0.9384353963793216, - "grad_norm": 0.453620970249176, - "learning_rate": 7.954498420351814e-06, - "loss": 0.3681, - "step": 14359 - }, - { - "epoch": 0.9385007515848637, - "grad_norm": 0.44989854097366333, - "learning_rate": 7.954216705840124e-06, - "loss": 0.3727, - "step": 14360 - }, - { - "epoch": 0.9385661067904059, - "grad_norm": 0.4276033341884613, - "learning_rate": 7.953934976919695e-06, - "loss": 0.3781, - "step": 14361 - }, - { - "epoch": 0.9386314619959479, - "grad_norm": 0.4170686602592468, - "learning_rate": 7.953653233591903e-06, - "loss": 0.3124, - "step": 14362 - }, - { - "epoch": 0.9386968172014901, - "grad_norm": 0.45001140236854553, - "learning_rate": 7.953371475858119e-06, - "loss": 0.4009, - "step": 14363 - }, - { - "epoch": 0.9387621724070322, - "grad_norm": 0.44703391194343567, - "learning_rate": 7.953089703719721e-06, - "loss": 0.3673, - "step": 14364 - }, - { - "epoch": 0.9388275276125744, - "grad_norm": 0.4642198085784912, - "learning_rate": 7.95280791717808e-06, - "loss": 0.379, - "step": 14365 - }, - { - "epoch": 0.9388928828181164, - "grad_norm": 0.4425627291202545, - "learning_rate": 7.952526116234574e-06, - "loss": 0.3786, - "step": 14366 - }, - { - "epoch": 0.9389582380236586, - "grad_norm": 0.46043291687965393, - "learning_rate": 7.952244300890574e-06, - "loss": 0.3847, - "step": 14367 - }, - { - "epoch": 0.9390235932292007, - "grad_norm": 0.43328171968460083, - "learning_rate": 7.951962471147456e-06, - "loss": 0.3951, - "step": 14368 - }, - { - "epoch": 0.9390889484347428, - "grad_norm": 0.4538939893245697, - "learning_rate": 7.951680627006592e-06, - "loss": 0.3976, - "step": 14369 - }, - { - "epoch": 0.939154303640285, - "grad_norm": 0.46278855204582214, - "learning_rate": 7.951398768469363e-06, - "loss": 0.4246, - "step": 14370 - }, - { - "epoch": 0.939219658845827, - "grad_norm": 0.46772995591163635, - "learning_rate": 7.951116895537137e-06, - "loss": 0.416, - "step": 14371 - }, - { - "epoch": 0.9392850140513692, - "grad_norm": 0.4080042839050293, - "learning_rate": 7.950835008211292e-06, - "loss": 0.351, - "step": 14372 - }, - { - "epoch": 0.9393503692569113, - "grad_norm": 0.4318680763244629, - "learning_rate": 7.950553106493203e-06, - "loss": 0.3589, - "step": 14373 - }, - { - "epoch": 0.9394157244624535, - "grad_norm": 0.42919856309890747, - "learning_rate": 7.950271190384244e-06, - "loss": 0.3761, - "step": 14374 - }, - { - "epoch": 0.9394810796679955, - "grad_norm": 0.45179271697998047, - "learning_rate": 7.94998925988579e-06, - "loss": 0.3819, - "step": 14375 - }, - { - "epoch": 0.9395464348735377, - "grad_norm": 0.4313129186630249, - "learning_rate": 7.949707314999217e-06, - "loss": 0.3557, - "step": 14376 - }, - { - "epoch": 0.9396117900790798, - "grad_norm": 0.4538089334964752, - "learning_rate": 7.9494253557259e-06, - "loss": 0.3771, - "step": 14377 - }, - { - "epoch": 0.9396771452846219, - "grad_norm": 0.4204086363315582, - "learning_rate": 7.949143382067214e-06, - "loss": 0.3571, - "step": 14378 - }, - { - "epoch": 0.9397425004901641, - "grad_norm": 0.4639827609062195, - "learning_rate": 7.948861394024534e-06, - "loss": 0.395, - "step": 14379 - }, - { - "epoch": 0.9398078556957061, - "grad_norm": 0.4357139766216278, - "learning_rate": 7.948579391599233e-06, - "loss": 0.3651, - "step": 14380 - }, - { - "epoch": 0.9398732109012483, - "grad_norm": 0.46814483404159546, - "learning_rate": 7.948297374792692e-06, - "loss": 0.4106, - "step": 14381 - }, - { - "epoch": 0.9399385661067904, - "grad_norm": 0.4309825897216797, - "learning_rate": 7.948015343606281e-06, - "loss": 0.3979, - "step": 14382 - }, - { - "epoch": 0.9400039213123326, - "grad_norm": 0.49262019991874695, - "learning_rate": 7.947733298041379e-06, - "loss": 0.4262, - "step": 14383 - }, - { - "epoch": 0.9400692765178746, - "grad_norm": 0.45704779028892517, - "learning_rate": 7.94745123809936e-06, - "loss": 0.3921, - "step": 14384 - }, - { - "epoch": 0.9401346317234168, - "grad_norm": 0.445494145154953, - "learning_rate": 7.9471691637816e-06, - "loss": 0.4015, - "step": 14385 - }, - { - "epoch": 0.9401999869289589, - "grad_norm": 0.4356983006000519, - "learning_rate": 7.946887075089477e-06, - "loss": 0.3884, - "step": 14386 - }, - { - "epoch": 0.940265342134501, - "grad_norm": 0.4237995743751526, - "learning_rate": 7.946604972024363e-06, - "loss": 0.3447, - "step": 14387 - }, - { - "epoch": 0.9403306973400432, - "grad_norm": 0.43086937069892883, - "learning_rate": 7.946322854587636e-06, - "loss": 0.3722, - "step": 14388 - }, - { - "epoch": 0.9403960525455852, - "grad_norm": 0.4470529556274414, - "learning_rate": 7.946040722780672e-06, - "loss": 0.3997, - "step": 14389 - }, - { - "epoch": 0.9404614077511274, - "grad_norm": 0.4738561511039734, - "learning_rate": 7.945758576604846e-06, - "loss": 0.4423, - "step": 14390 - }, - { - "epoch": 0.9405267629566695, - "grad_norm": 0.45183515548706055, - "learning_rate": 7.945476416061535e-06, - "loss": 0.3991, - "step": 14391 - }, - { - "epoch": 0.9405921181622117, - "grad_norm": 0.4276072382926941, - "learning_rate": 7.945194241152115e-06, - "loss": 0.358, - "step": 14392 - }, - { - "epoch": 0.9406574733677537, - "grad_norm": 0.45037370920181274, - "learning_rate": 7.944912051877963e-06, - "loss": 0.3945, - "step": 14393 - }, - { - "epoch": 0.9407228285732958, - "grad_norm": 0.4249766767024994, - "learning_rate": 7.944629848240455e-06, - "loss": 0.393, - "step": 14394 - }, - { - "epoch": 0.940788183778838, - "grad_norm": 0.45881056785583496, - "learning_rate": 7.944347630240968e-06, - "loss": 0.3784, - "step": 14395 - }, - { - "epoch": 0.9408535389843801, - "grad_norm": 0.4441053867340088, - "learning_rate": 7.944065397880876e-06, - "loss": 0.3548, - "step": 14396 - }, - { - "epoch": 0.9409188941899223, - "grad_norm": 0.47556474804878235, - "learning_rate": 7.943783151161558e-06, - "loss": 0.4012, - "step": 14397 - }, - { - "epoch": 0.9409842493954643, - "grad_norm": 0.43414306640625, - "learning_rate": 7.943500890084387e-06, - "loss": 0.3776, - "step": 14398 - }, - { - "epoch": 0.9410496046010065, - "grad_norm": 0.4786895215511322, - "learning_rate": 7.943218614650745e-06, - "loss": 0.4717, - "step": 14399 - }, - { - "epoch": 0.9411149598065486, - "grad_norm": 0.4821021556854248, - "learning_rate": 7.942936324862007e-06, - "loss": 0.3752, - "step": 14400 - }, - { - "epoch": 0.9411803150120908, - "grad_norm": 0.4355505704879761, - "learning_rate": 7.942654020719548e-06, - "loss": 0.3903, - "step": 14401 - }, - { - "epoch": 0.9412456702176328, - "grad_norm": 0.4327200651168823, - "learning_rate": 7.942371702224745e-06, - "loss": 0.3628, - "step": 14402 - }, - { - "epoch": 0.9413110254231749, - "grad_norm": 0.4677167534828186, - "learning_rate": 7.942089369378977e-06, - "loss": 0.4297, - "step": 14403 - }, - { - "epoch": 0.9413763806287171, - "grad_norm": 0.42914190888404846, - "learning_rate": 7.941807022183618e-06, - "loss": 0.3379, - "step": 14404 - }, - { - "epoch": 0.9414417358342592, - "grad_norm": 0.3983922600746155, - "learning_rate": 7.94152466064005e-06, - "loss": 0.3071, - "step": 14405 - }, - { - "epoch": 0.9415070910398013, - "grad_norm": 0.4407617747783661, - "learning_rate": 7.941242284749647e-06, - "loss": 0.3445, - "step": 14406 - }, - { - "epoch": 0.9415724462453434, - "grad_norm": 0.42539024353027344, - "learning_rate": 7.940959894513784e-06, - "loss": 0.3476, - "step": 14407 - }, - { - "epoch": 0.9416378014508856, - "grad_norm": 0.41399627923965454, - "learning_rate": 7.940677489933842e-06, - "loss": 0.3264, - "step": 14408 - }, - { - "epoch": 0.9417031566564277, - "grad_norm": 0.4142743647098541, - "learning_rate": 7.940395071011196e-06, - "loss": 0.3233, - "step": 14409 - }, - { - "epoch": 0.9417685118619699, - "grad_norm": 0.4264567792415619, - "learning_rate": 7.940112637747227e-06, - "loss": 0.3709, - "step": 14410 - }, - { - "epoch": 0.9418338670675119, - "grad_norm": 0.43007078766822815, - "learning_rate": 7.939830190143308e-06, - "loss": 0.3273, - "step": 14411 - }, - { - "epoch": 0.941899222273054, - "grad_norm": 0.45897284150123596, - "learning_rate": 7.939547728200819e-06, - "loss": 0.4395, - "step": 14412 - }, - { - "epoch": 0.9419645774785962, - "grad_norm": 0.4416148364543915, - "learning_rate": 7.939265251921137e-06, - "loss": 0.3751, - "step": 14413 - }, - { - "epoch": 0.9420299326841383, - "grad_norm": 0.4365776479244232, - "learning_rate": 7.938982761305643e-06, - "loss": 0.3315, - "step": 14414 - }, - { - "epoch": 0.9420952878896804, - "grad_norm": 0.44567838311195374, - "learning_rate": 7.93870025635571e-06, - "loss": 0.4074, - "step": 14415 - }, - { - "epoch": 0.9421606430952225, - "grad_norm": 0.4563732147216797, - "learning_rate": 7.938417737072717e-06, - "loss": 0.4041, - "step": 14416 - }, - { - "epoch": 0.9422259983007647, - "grad_norm": 0.4569830298423767, - "learning_rate": 7.938135203458043e-06, - "loss": 0.402, - "step": 14417 - }, - { - "epoch": 0.9422913535063068, - "grad_norm": 0.47362250089645386, - "learning_rate": 7.937852655513065e-06, - "loss": 0.4038, - "step": 14418 - }, - { - "epoch": 0.9423567087118488, - "grad_norm": 0.4402032792568207, - "learning_rate": 7.937570093239164e-06, - "loss": 0.3827, - "step": 14419 - }, - { - "epoch": 0.942422063917391, - "grad_norm": 0.46891897916793823, - "learning_rate": 7.937287516637714e-06, - "loss": 0.4175, - "step": 14420 - }, - { - "epoch": 0.9424874191229331, - "grad_norm": 0.4435456693172455, - "learning_rate": 7.937004925710097e-06, - "loss": 0.3641, - "step": 14421 - }, - { - "epoch": 0.9425527743284753, - "grad_norm": 0.40931662917137146, - "learning_rate": 7.936722320457689e-06, - "loss": 0.3061, - "step": 14422 - }, - { - "epoch": 0.9426181295340174, - "grad_norm": 0.42935508489608765, - "learning_rate": 7.936439700881868e-06, - "loss": 0.3579, - "step": 14423 - }, - { - "epoch": 0.9426834847395595, - "grad_norm": 0.41177570819854736, - "learning_rate": 7.936157066984014e-06, - "loss": 0.3376, - "step": 14424 - }, - { - "epoch": 0.9427488399451016, - "grad_norm": 0.4286854565143585, - "learning_rate": 7.935874418765505e-06, - "loss": 0.346, - "step": 14425 - }, - { - "epoch": 0.9428141951506438, - "grad_norm": 0.4349355101585388, - "learning_rate": 7.935591756227718e-06, - "loss": 0.349, - "step": 14426 - }, - { - "epoch": 0.9428795503561859, - "grad_norm": 0.46722865104675293, - "learning_rate": 7.935309079372035e-06, - "loss": 0.3911, - "step": 14427 - }, - { - "epoch": 0.9429449055617279, - "grad_norm": 0.4100147485733032, - "learning_rate": 7.935026388199832e-06, - "loss": 0.3162, - "step": 14428 - }, - { - "epoch": 0.9430102607672701, - "grad_norm": 0.46007734537124634, - "learning_rate": 7.934743682712487e-06, - "loss": 0.3979, - "step": 14429 - }, - { - "epoch": 0.9430756159728122, - "grad_norm": 0.4268944263458252, - "learning_rate": 7.934460962911382e-06, - "loss": 0.3655, - "step": 14430 - }, - { - "epoch": 0.9431409711783544, - "grad_norm": 0.4173218905925751, - "learning_rate": 7.934178228797894e-06, - "loss": 0.3553, - "step": 14431 - }, - { - "epoch": 0.9432063263838965, - "grad_norm": 0.4390046000480652, - "learning_rate": 7.933895480373403e-06, - "loss": 0.3792, - "step": 14432 - }, - { - "epoch": 0.9432716815894386, - "grad_norm": 0.43881070613861084, - "learning_rate": 7.933612717639285e-06, - "loss": 0.3819, - "step": 14433 - }, - { - "epoch": 0.9433370367949807, - "grad_norm": 0.41829654574394226, - "learning_rate": 7.933329940596923e-06, - "loss": 0.3199, - "step": 14434 - }, - { - "epoch": 0.9434023920005229, - "grad_norm": 0.43282178044319153, - "learning_rate": 7.933047149247695e-06, - "loss": 0.353, - "step": 14435 - }, - { - "epoch": 0.943467747206065, - "grad_norm": 0.45350539684295654, - "learning_rate": 7.93276434359298e-06, - "loss": 0.3797, - "step": 14436 - }, - { - "epoch": 0.943533102411607, - "grad_norm": 0.43426403403282166, - "learning_rate": 7.932481523634157e-06, - "loss": 0.3329, - "step": 14437 - }, - { - "epoch": 0.9435984576171492, - "grad_norm": 0.44173911213874817, - "learning_rate": 7.932198689372605e-06, - "loss": 0.4018, - "step": 14438 - }, - { - "epoch": 0.9436638128226913, - "grad_norm": 0.43885087966918945, - "learning_rate": 7.931915840809705e-06, - "loss": 0.3791, - "step": 14439 - }, - { - "epoch": 0.9437291680282335, - "grad_norm": 0.43043452501296997, - "learning_rate": 7.931632977946836e-06, - "loss": 0.3639, - "step": 14440 - }, - { - "epoch": 0.9437945232337756, - "grad_norm": 0.41387373208999634, - "learning_rate": 7.931350100785377e-06, - "loss": 0.3323, - "step": 14441 - }, - { - "epoch": 0.9438598784393177, - "grad_norm": 0.42461729049682617, - "learning_rate": 7.931067209326708e-06, - "loss": 0.3292, - "step": 14442 - }, - { - "epoch": 0.9439252336448598, - "grad_norm": 0.4541226923465729, - "learning_rate": 7.930784303572207e-06, - "loss": 0.3808, - "step": 14443 - }, - { - "epoch": 0.9439905888504019, - "grad_norm": 0.4451179504394531, - "learning_rate": 7.93050138352326e-06, - "loss": 0.4153, - "step": 14444 - }, - { - "epoch": 0.9440559440559441, - "grad_norm": 0.4671146273612976, - "learning_rate": 7.93021844918124e-06, - "loss": 0.3824, - "step": 14445 - }, - { - "epoch": 0.9441212992614861, - "grad_norm": 0.41073986887931824, - "learning_rate": 7.929935500547528e-06, - "loss": 0.3462, - "step": 14446 - }, - { - "epoch": 0.9441866544670283, - "grad_norm": 0.4628707468509674, - "learning_rate": 7.929652537623507e-06, - "loss": 0.4038, - "step": 14447 - }, - { - "epoch": 0.9442520096725704, - "grad_norm": 0.4600439667701721, - "learning_rate": 7.929369560410556e-06, - "loss": 0.3781, - "step": 14448 - }, - { - "epoch": 0.9443173648781126, - "grad_norm": 0.4218600392341614, - "learning_rate": 7.929086568910053e-06, - "loss": 0.3257, - "step": 14449 - }, - { - "epoch": 0.9443827200836546, - "grad_norm": 0.4293709695339203, - "learning_rate": 7.928803563123381e-06, - "loss": 0.3433, - "step": 14450 - }, - { - "epoch": 0.9444480752891968, - "grad_norm": 0.4584823548793793, - "learning_rate": 7.928520543051921e-06, - "loss": 0.3536, - "step": 14451 - }, - { - "epoch": 0.9445134304947389, - "grad_norm": 0.44159606099128723, - "learning_rate": 7.92823750869705e-06, - "loss": 0.3742, - "step": 14452 - }, - { - "epoch": 0.944578785700281, - "grad_norm": 0.4415980875492096, - "learning_rate": 7.927954460060152e-06, - "loss": 0.3648, - "step": 14453 - }, - { - "epoch": 0.9446441409058232, - "grad_norm": 0.45986682176589966, - "learning_rate": 7.927671397142605e-06, - "loss": 0.4111, - "step": 14454 - }, - { - "epoch": 0.9447094961113652, - "grad_norm": 0.435773104429245, - "learning_rate": 7.92738831994579e-06, - "loss": 0.3548, - "step": 14455 - }, - { - "epoch": 0.9447748513169074, - "grad_norm": 0.4182360768318176, - "learning_rate": 7.927105228471086e-06, - "loss": 0.2893, - "step": 14456 - }, - { - "epoch": 0.9448402065224495, - "grad_norm": 0.4321918785572052, - "learning_rate": 7.926822122719879e-06, - "loss": 0.3459, - "step": 14457 - }, - { - "epoch": 0.9449055617279917, - "grad_norm": 0.4098641276359558, - "learning_rate": 7.926539002693546e-06, - "loss": 0.3425, - "step": 14458 - }, - { - "epoch": 0.9449709169335337, - "grad_norm": 0.4105234444141388, - "learning_rate": 7.926255868393466e-06, - "loss": 0.355, - "step": 14459 - }, - { - "epoch": 0.9450362721390759, - "grad_norm": 0.4359675943851471, - "learning_rate": 7.925972719821025e-06, - "loss": 0.3748, - "step": 14460 - }, - { - "epoch": 0.945101627344618, - "grad_norm": 0.47171032428741455, - "learning_rate": 7.9256895569776e-06, - "loss": 0.4521, - "step": 14461 - }, - { - "epoch": 0.9451669825501601, - "grad_norm": 0.4673002064228058, - "learning_rate": 7.925406379864573e-06, - "loss": 0.3746, - "step": 14462 - }, - { - "epoch": 0.9452323377557023, - "grad_norm": 0.44654732942581177, - "learning_rate": 7.925123188483328e-06, - "loss": 0.3558, - "step": 14463 - }, - { - "epoch": 0.9452976929612443, - "grad_norm": 0.3931218981742859, - "learning_rate": 7.924839982835242e-06, - "loss": 0.3041, - "step": 14464 - }, - { - "epoch": 0.9453630481667865, - "grad_norm": 0.40793532133102417, - "learning_rate": 7.9245567629217e-06, - "loss": 0.3247, - "step": 14465 - }, - { - "epoch": 0.9454284033723286, - "grad_norm": 0.4634629786014557, - "learning_rate": 7.92427352874408e-06, - "loss": 0.421, - "step": 14466 - }, - { - "epoch": 0.9454937585778708, - "grad_norm": 0.45527854561805725, - "learning_rate": 7.923990280303763e-06, - "loss": 0.3612, - "step": 14467 - }, - { - "epoch": 0.9455591137834128, - "grad_norm": 0.460274875164032, - "learning_rate": 7.923707017602135e-06, - "loss": 0.4056, - "step": 14468 - }, - { - "epoch": 0.945624468988955, - "grad_norm": 0.4577227830886841, - "learning_rate": 7.923423740640575e-06, - "loss": 0.3811, - "step": 14469 - }, - { - "epoch": 0.9456898241944971, - "grad_norm": 0.44590795040130615, - "learning_rate": 7.923140449420464e-06, - "loss": 0.4015, - "step": 14470 - }, - { - "epoch": 0.9457551794000392, - "grad_norm": 0.47536012530326843, - "learning_rate": 7.922857143943184e-06, - "loss": 0.4492, - "step": 14471 - }, - { - "epoch": 0.9458205346055814, - "grad_norm": 0.4406832158565521, - "learning_rate": 7.922573824210118e-06, - "loss": 0.3236, - "step": 14472 - }, - { - "epoch": 0.9458858898111234, - "grad_norm": 0.42595773935317993, - "learning_rate": 7.922290490222646e-06, - "loss": 0.3211, - "step": 14473 - }, - { - "epoch": 0.9459512450166656, - "grad_norm": 0.4478296637535095, - "learning_rate": 7.922007141982152e-06, - "loss": 0.3974, - "step": 14474 - }, - { - "epoch": 0.9460166002222077, - "grad_norm": 0.42474326491355896, - "learning_rate": 7.921723779490015e-06, - "loss": 0.3207, - "step": 14475 - }, - { - "epoch": 0.9460819554277499, - "grad_norm": 0.44154810905456543, - "learning_rate": 7.921440402747622e-06, - "loss": 0.368, - "step": 14476 - }, - { - "epoch": 0.9461473106332919, - "grad_norm": 0.40492886304855347, - "learning_rate": 7.92115701175635e-06, - "loss": 0.3183, - "step": 14477 - }, - { - "epoch": 0.946212665838834, - "grad_norm": 0.4571344554424286, - "learning_rate": 7.920873606517583e-06, - "loss": 0.3703, - "step": 14478 - }, - { - "epoch": 0.9462780210443762, - "grad_norm": 0.5302592515945435, - "learning_rate": 7.920590187032704e-06, - "loss": 0.3917, - "step": 14479 - }, - { - "epoch": 0.9463433762499183, - "grad_norm": 0.4236275255680084, - "learning_rate": 7.920306753303096e-06, - "loss": 0.3139, - "step": 14480 - }, - { - "epoch": 0.9464087314554605, - "grad_norm": 0.44597524404525757, - "learning_rate": 7.92002330533014e-06, - "loss": 0.3798, - "step": 14481 - }, - { - "epoch": 0.9464740866610025, - "grad_norm": 0.4571422338485718, - "learning_rate": 7.919739843115218e-06, - "loss": 0.4051, - "step": 14482 - }, - { - "epoch": 0.9465394418665447, - "grad_norm": 0.40712401270866394, - "learning_rate": 7.919456366659713e-06, - "loss": 0.316, - "step": 14483 - }, - { - "epoch": 0.9466047970720868, - "grad_norm": 0.45134514570236206, - "learning_rate": 7.919172875965008e-06, - "loss": 0.3486, - "step": 14484 - }, - { - "epoch": 0.946670152277629, - "grad_norm": 0.4873284697532654, - "learning_rate": 7.918889371032486e-06, - "loss": 0.4607, - "step": 14485 - }, - { - "epoch": 0.946735507483171, - "grad_norm": 0.4418301284313202, - "learning_rate": 7.918605851863528e-06, - "loss": 0.368, - "step": 14486 - }, - { - "epoch": 0.9468008626887131, - "grad_norm": 0.4351485073566437, - "learning_rate": 7.91832231845952e-06, - "loss": 0.392, - "step": 14487 - }, - { - "epoch": 0.9468662178942553, - "grad_norm": 0.4526745080947876, - "learning_rate": 7.918038770821844e-06, - "loss": 0.3803, - "step": 14488 - }, - { - "epoch": 0.9469315730997974, - "grad_norm": 0.4391677975654602, - "learning_rate": 7.917755208951879e-06, - "loss": 0.3997, - "step": 14489 - }, - { - "epoch": 0.9469969283053395, - "grad_norm": 0.43049854040145874, - "learning_rate": 7.917471632851013e-06, - "loss": 0.372, - "step": 14490 - }, - { - "epoch": 0.9470622835108816, - "grad_norm": 0.4770206809043884, - "learning_rate": 7.917188042520625e-06, - "loss": 0.3895, - "step": 14491 - }, - { - "epoch": 0.9471276387164238, - "grad_norm": 0.43693140149116516, - "learning_rate": 7.916904437962103e-06, - "loss": 0.3716, - "step": 14492 - }, - { - "epoch": 0.9471929939219659, - "grad_norm": 0.46739405393600464, - "learning_rate": 7.916620819176825e-06, - "loss": 0.4037, - "step": 14493 - }, - { - "epoch": 0.9472583491275081, - "grad_norm": 0.4732327461242676, - "learning_rate": 7.916337186166178e-06, - "loss": 0.3726, - "step": 14494 - }, - { - "epoch": 0.9473237043330501, - "grad_norm": 0.433886855840683, - "learning_rate": 7.916053538931545e-06, - "loss": 0.3584, - "step": 14495 - }, - { - "epoch": 0.9473890595385922, - "grad_norm": 0.4469623863697052, - "learning_rate": 7.915769877474308e-06, - "loss": 0.3454, - "step": 14496 - }, - { - "epoch": 0.9474544147441344, - "grad_norm": 0.4504670798778534, - "learning_rate": 7.915486201795853e-06, - "loss": 0.387, - "step": 14497 - }, - { - "epoch": 0.9475197699496765, - "grad_norm": 0.4406098425388336, - "learning_rate": 7.915202511897559e-06, - "loss": 0.3981, - "step": 14498 - }, - { - "epoch": 0.9475851251552186, - "grad_norm": 0.47060224413871765, - "learning_rate": 7.914918807780814e-06, - "loss": 0.4474, - "step": 14499 - }, - { - "epoch": 0.9476504803607607, - "grad_norm": 0.43092191219329834, - "learning_rate": 7.914635089447e-06, - "loss": 0.3676, - "step": 14500 - }, - { - "epoch": 0.9477158355663029, - "grad_norm": 0.43119972944259644, - "learning_rate": 7.914351356897501e-06, - "loss": 0.3553, - "step": 14501 - }, - { - "epoch": 0.947781190771845, - "grad_norm": 0.424753338098526, - "learning_rate": 7.9140676101337e-06, - "loss": 0.3377, - "step": 14502 - }, - { - "epoch": 0.947846545977387, - "grad_norm": 0.4283413290977478, - "learning_rate": 7.913783849156981e-06, - "loss": 0.3405, - "step": 14503 - }, - { - "epoch": 0.9479119011829292, - "grad_norm": 0.4264403283596039, - "learning_rate": 7.91350007396873e-06, - "loss": 0.3488, - "step": 14504 - }, - { - "epoch": 0.9479772563884713, - "grad_norm": 0.48066049814224243, - "learning_rate": 7.91321628457033e-06, - "loss": 0.4064, - "step": 14505 - }, - { - "epoch": 0.9480426115940135, - "grad_norm": 0.42478930950164795, - "learning_rate": 7.912932480963166e-06, - "loss": 0.377, - "step": 14506 - }, - { - "epoch": 0.9481079667995556, - "grad_norm": 0.4370587170124054, - "learning_rate": 7.912648663148619e-06, - "loss": 0.3409, - "step": 14507 - }, - { - "epoch": 0.9481733220050977, - "grad_norm": 0.47303035855293274, - "learning_rate": 7.912364831128076e-06, - "loss": 0.4258, - "step": 14508 - }, - { - "epoch": 0.9482386772106398, - "grad_norm": 0.4379528760910034, - "learning_rate": 7.912080984902922e-06, - "loss": 0.3946, - "step": 14509 - }, - { - "epoch": 0.948304032416182, - "grad_norm": 0.44799181818962097, - "learning_rate": 7.911797124474539e-06, - "loss": 0.3844, - "step": 14510 - }, - { - "epoch": 0.9483693876217241, - "grad_norm": 0.42166668176651, - "learning_rate": 7.911513249844314e-06, - "loss": 0.3829, - "step": 14511 - }, - { - "epoch": 0.9484347428272661, - "grad_norm": 0.4153478443622589, - "learning_rate": 7.911229361013629e-06, - "loss": 0.3489, - "step": 14512 - }, - { - "epoch": 0.9485000980328083, - "grad_norm": 0.4486326277256012, - "learning_rate": 7.91094545798387e-06, - "loss": 0.3955, - "step": 14513 - }, - { - "epoch": 0.9485654532383504, - "grad_norm": 0.4419936537742615, - "learning_rate": 7.91066154075642e-06, - "loss": 0.3856, - "step": 14514 - }, - { - "epoch": 0.9486308084438926, - "grad_norm": 0.42571067810058594, - "learning_rate": 7.910377609332666e-06, - "loss": 0.3343, - "step": 14515 - }, - { - "epoch": 0.9486961636494347, - "grad_norm": 0.45720112323760986, - "learning_rate": 7.910093663713994e-06, - "loss": 0.4466, - "step": 14516 - }, - { - "epoch": 0.9487615188549768, - "grad_norm": 0.45299604535102844, - "learning_rate": 7.909809703901787e-06, - "loss": 0.4152, - "step": 14517 - }, - { - "epoch": 0.9488268740605189, - "grad_norm": 0.4278375506401062, - "learning_rate": 7.90952572989743e-06, - "loss": 0.3984, - "step": 14518 - }, - { - "epoch": 0.9488922292660611, - "grad_norm": 0.4548674523830414, - "learning_rate": 7.909241741702306e-06, - "loss": 0.3994, - "step": 14519 - }, - { - "epoch": 0.9489575844716032, - "grad_norm": 0.4342322051525116, - "learning_rate": 7.908957739317803e-06, - "loss": 0.3509, - "step": 14520 - }, - { - "epoch": 0.9490229396771452, - "grad_norm": 0.455106258392334, - "learning_rate": 7.908673722745306e-06, - "loss": 0.3708, - "step": 14521 - }, - { - "epoch": 0.9490882948826874, - "grad_norm": 0.41844823956489563, - "learning_rate": 7.908389691986197e-06, - "loss": 0.3622, - "step": 14522 - }, - { - "epoch": 0.9491536500882295, - "grad_norm": 0.4724261462688446, - "learning_rate": 7.908105647041868e-06, - "loss": 0.4171, - "step": 14523 - }, - { - "epoch": 0.9492190052937717, - "grad_norm": 0.4346033036708832, - "learning_rate": 7.907821587913698e-06, - "loss": 0.3714, - "step": 14524 - }, - { - "epoch": 0.9492843604993138, - "grad_norm": 0.45755767822265625, - "learning_rate": 7.907537514603076e-06, - "loss": 0.3902, - "step": 14525 - }, - { - "epoch": 0.9493497157048559, - "grad_norm": 0.49286603927612305, - "learning_rate": 7.907253427111386e-06, - "loss": 0.4217, - "step": 14526 - }, - { - "epoch": 0.949415070910398, - "grad_norm": 0.4719918966293335, - "learning_rate": 7.906969325440012e-06, - "loss": 0.4078, - "step": 14527 - }, - { - "epoch": 0.9494804261159401, - "grad_norm": 0.4337770938873291, - "learning_rate": 7.906685209590343e-06, - "loss": 0.3701, - "step": 14528 - }, - { - "epoch": 0.9495457813214823, - "grad_norm": 0.4309212267398834, - "learning_rate": 7.906401079563764e-06, - "loss": 0.36, - "step": 14529 - }, - { - "epoch": 0.9496111365270243, - "grad_norm": 0.420102059841156, - "learning_rate": 7.906116935361659e-06, - "loss": 0.3389, - "step": 14530 - }, - { - "epoch": 0.9496764917325665, - "grad_norm": 0.43637362122535706, - "learning_rate": 7.905832776985415e-06, - "loss": 0.378, - "step": 14531 - }, - { - "epoch": 0.9497418469381086, - "grad_norm": 0.41150134801864624, - "learning_rate": 7.905548604436418e-06, - "loss": 0.3682, - "step": 14532 - }, - { - "epoch": 0.9498072021436508, - "grad_norm": 0.45195427536964417, - "learning_rate": 7.905264417716055e-06, - "loss": 0.3716, - "step": 14533 - }, - { - "epoch": 0.9498725573491928, - "grad_norm": 0.40737611055374146, - "learning_rate": 7.904980216825708e-06, - "loss": 0.3467, - "step": 14534 - }, - { - "epoch": 0.949937912554735, - "grad_norm": 0.4473322629928589, - "learning_rate": 7.904696001766768e-06, - "loss": 0.3875, - "step": 14535 - }, - { - "epoch": 0.9500032677602771, - "grad_norm": 0.4047374725341797, - "learning_rate": 7.90441177254062e-06, - "loss": 0.3549, - "step": 14536 - }, - { - "epoch": 0.9500686229658192, - "grad_norm": 0.4511867165565491, - "learning_rate": 7.904127529148648e-06, - "loss": 0.3615, - "step": 14537 - }, - { - "epoch": 0.9501339781713614, - "grad_norm": 0.4167231619358063, - "learning_rate": 7.903843271592242e-06, - "loss": 0.36, - "step": 14538 - }, - { - "epoch": 0.9501993333769034, - "grad_norm": 0.4476933479309082, - "learning_rate": 7.903558999872785e-06, - "loss": 0.3949, - "step": 14539 - }, - { - "epoch": 0.9502646885824456, - "grad_norm": 0.42178258299827576, - "learning_rate": 7.903274713991664e-06, - "loss": 0.3716, - "step": 14540 - }, - { - "epoch": 0.9503300437879877, - "grad_norm": 0.4218887388706207, - "learning_rate": 7.90299041395027e-06, - "loss": 0.3376, - "step": 14541 - }, - { - "epoch": 0.9503953989935299, - "grad_norm": 0.44128820300102234, - "learning_rate": 7.902706099749982e-06, - "loss": 0.3949, - "step": 14542 - }, - { - "epoch": 0.9504607541990719, - "grad_norm": 0.4702761471271515, - "learning_rate": 7.902421771392192e-06, - "loss": 0.3833, - "step": 14543 - }, - { - "epoch": 0.9505261094046141, - "grad_norm": 0.4297698736190796, - "learning_rate": 7.902137428878287e-06, - "loss": 0.3363, - "step": 14544 - }, - { - "epoch": 0.9505914646101562, - "grad_norm": 0.43554675579071045, - "learning_rate": 7.90185307220965e-06, - "loss": 0.3495, - "step": 14545 - }, - { - "epoch": 0.9506568198156983, - "grad_norm": 0.4012167155742645, - "learning_rate": 7.901568701387671e-06, - "loss": 0.322, - "step": 14546 - }, - { - "epoch": 0.9507221750212405, - "grad_norm": 0.40920358896255493, - "learning_rate": 7.901284316413738e-06, - "loss": 0.3448, - "step": 14547 - }, - { - "epoch": 0.9507875302267825, - "grad_norm": 0.4171324372291565, - "learning_rate": 7.900999917289234e-06, - "loss": 0.3518, - "step": 14548 - }, - { - "epoch": 0.9508528854323247, - "grad_norm": 0.43824639916419983, - "learning_rate": 7.90071550401555e-06, - "loss": 0.3964, - "step": 14549 - }, - { - "epoch": 0.9509182406378668, - "grad_norm": 0.44572529196739197, - "learning_rate": 7.90043107659407e-06, - "loss": 0.348, - "step": 14550 - }, - { - "epoch": 0.950983595843409, - "grad_norm": 0.42346009612083435, - "learning_rate": 7.900146635026184e-06, - "loss": 0.3132, - "step": 14551 - }, - { - "epoch": 0.951048951048951, - "grad_norm": 0.42569127678871155, - "learning_rate": 7.899862179313278e-06, - "loss": 0.3783, - "step": 14552 - }, - { - "epoch": 0.9511143062544932, - "grad_norm": 0.4125511646270752, - "learning_rate": 7.89957770945674e-06, - "loss": 0.3229, - "step": 14553 - }, - { - "epoch": 0.9511796614600353, - "grad_norm": 0.4504038393497467, - "learning_rate": 7.899293225457956e-06, - "loss": 0.367, - "step": 14554 - }, - { - "epoch": 0.9512450166655774, - "grad_norm": 0.4786413311958313, - "learning_rate": 7.899008727318315e-06, - "loss": 0.3977, - "step": 14555 - }, - { - "epoch": 0.9513103718711196, - "grad_norm": 0.4554285407066345, - "learning_rate": 7.898724215039204e-06, - "loss": 0.4162, - "step": 14556 - }, - { - "epoch": 0.9513757270766616, - "grad_norm": 0.4733133614063263, - "learning_rate": 7.898439688622011e-06, - "loss": 0.4213, - "step": 14557 - }, - { - "epoch": 0.9514410822822038, - "grad_norm": 0.45218655467033386, - "learning_rate": 7.898155148068124e-06, - "loss": 0.4109, - "step": 14558 - }, - { - "epoch": 0.9515064374877459, - "grad_norm": 0.436526894569397, - "learning_rate": 7.897870593378928e-06, - "loss": 0.3445, - "step": 14559 - }, - { - "epoch": 0.9515717926932881, - "grad_norm": 0.448528528213501, - "learning_rate": 7.897586024555816e-06, - "loss": 0.3581, - "step": 14560 - }, - { - "epoch": 0.9516371478988301, - "grad_norm": 0.4428890347480774, - "learning_rate": 7.897301441600172e-06, - "loss": 0.3828, - "step": 14561 - }, - { - "epoch": 0.9517025031043722, - "grad_norm": 0.46396172046661377, - "learning_rate": 7.897016844513386e-06, - "loss": 0.4034, - "step": 14562 - }, - { - "epoch": 0.9517678583099144, - "grad_norm": 0.4298264682292938, - "learning_rate": 7.896732233296844e-06, - "loss": 0.3458, - "step": 14563 - }, - { - "epoch": 0.9518332135154565, - "grad_norm": 0.4606119394302368, - "learning_rate": 7.896447607951936e-06, - "loss": 0.3958, - "step": 14564 - }, - { - "epoch": 0.9518985687209987, - "grad_norm": 0.39340078830718994, - "learning_rate": 7.89616296848005e-06, - "loss": 0.3042, - "step": 14565 - }, - { - "epoch": 0.9519639239265407, - "grad_norm": 0.45184096693992615, - "learning_rate": 7.895878314882572e-06, - "loss": 0.4126, - "step": 14566 - }, - { - "epoch": 0.9520292791320829, - "grad_norm": 0.4581175148487091, - "learning_rate": 7.895593647160895e-06, - "loss": 0.3521, - "step": 14567 - }, - { - "epoch": 0.952094634337625, - "grad_norm": 0.4212242066860199, - "learning_rate": 7.895308965316403e-06, - "loss": 0.3325, - "step": 14568 - }, - { - "epoch": 0.9521599895431672, - "grad_norm": 0.43856918811798096, - "learning_rate": 7.895024269350486e-06, - "loss": 0.3645, - "step": 14569 - }, - { - "epoch": 0.9522253447487092, - "grad_norm": 0.42181190848350525, - "learning_rate": 7.894739559264534e-06, - "loss": 0.3402, - "step": 14570 - }, - { - "epoch": 0.9522906999542513, - "grad_norm": 0.4480799436569214, - "learning_rate": 7.894454835059933e-06, - "loss": 0.4022, - "step": 14571 - }, - { - "epoch": 0.9523560551597935, - "grad_norm": 0.4167082607746124, - "learning_rate": 7.894170096738074e-06, - "loss": 0.3372, - "step": 14572 - }, - { - "epoch": 0.9524214103653356, - "grad_norm": 0.42250657081604004, - "learning_rate": 7.893885344300344e-06, - "loss": 0.3501, - "step": 14573 - }, - { - "epoch": 0.9524867655708777, - "grad_norm": 0.5498653650283813, - "learning_rate": 7.893600577748133e-06, - "loss": 0.3949, - "step": 14574 - }, - { - "epoch": 0.9525521207764198, - "grad_norm": 0.4462451934814453, - "learning_rate": 7.89331579708283e-06, - "loss": 0.3799, - "step": 14575 - }, - { - "epoch": 0.952617475981962, - "grad_norm": 0.4206028878688812, - "learning_rate": 7.893031002305822e-06, - "loss": 0.3208, - "step": 14576 - }, - { - "epoch": 0.9526828311875041, - "grad_norm": 0.418376088142395, - "learning_rate": 7.892746193418502e-06, - "loss": 0.3666, - "step": 14577 - }, - { - "epoch": 0.9527481863930463, - "grad_norm": 0.4303472340106964, - "learning_rate": 7.892461370422252e-06, - "loss": 0.3816, - "step": 14578 - }, - { - "epoch": 0.9528135415985883, - "grad_norm": 0.42829039692878723, - "learning_rate": 7.89217653331847e-06, - "loss": 0.3637, - "step": 14579 - }, - { - "epoch": 0.9528788968041304, - "grad_norm": 0.43662217259407043, - "learning_rate": 7.891891682108541e-06, - "loss": 0.3887, - "step": 14580 - }, - { - "epoch": 0.9529442520096726, - "grad_norm": 0.4312734603881836, - "learning_rate": 7.891606816793853e-06, - "loss": 0.3579, - "step": 14581 - }, - { - "epoch": 0.9530096072152147, - "grad_norm": 0.4352143406867981, - "learning_rate": 7.891321937375798e-06, - "loss": 0.3467, - "step": 14582 - }, - { - "epoch": 0.9530749624207568, - "grad_norm": 0.43026095628738403, - "learning_rate": 7.891037043855763e-06, - "loss": 0.3621, - "step": 14583 - }, - { - "epoch": 0.9531403176262989, - "grad_norm": 0.44248607754707336, - "learning_rate": 7.89075213623514e-06, - "loss": 0.3931, - "step": 14584 - }, - { - "epoch": 0.9532056728318411, - "grad_norm": 0.418418824672699, - "learning_rate": 7.890467214515315e-06, - "loss": 0.339, - "step": 14585 - }, - { - "epoch": 0.9532710280373832, - "grad_norm": 0.45571669936180115, - "learning_rate": 7.890182278697682e-06, - "loss": 0.43, - "step": 14586 - }, - { - "epoch": 0.9533363832429252, - "grad_norm": 0.8395232558250427, - "learning_rate": 7.889897328783628e-06, - "loss": 0.4226, - "step": 14587 - }, - { - "epoch": 0.9534017384484674, - "grad_norm": 0.43124720454216003, - "learning_rate": 7.889612364774547e-06, - "loss": 0.3707, - "step": 14588 - }, - { - "epoch": 0.9534670936540095, - "grad_norm": 0.48433342576026917, - "learning_rate": 7.88932738667182e-06, - "loss": 0.4657, - "step": 14589 - }, - { - "epoch": 0.9535324488595517, - "grad_norm": 0.4577804207801819, - "learning_rate": 7.889042394476847e-06, - "loss": 0.3597, - "step": 14590 - }, - { - "epoch": 0.9535978040650938, - "grad_norm": 0.43687108159065247, - "learning_rate": 7.88875738819101e-06, - "loss": 0.3499, - "step": 14591 - }, - { - "epoch": 0.9536631592706359, - "grad_norm": 0.4336129128932953, - "learning_rate": 7.888472367815705e-06, - "loss": 0.3431, - "step": 14592 - }, - { - "epoch": 0.953728514476178, - "grad_norm": 0.45300954580307007, - "learning_rate": 7.888187333352318e-06, - "loss": 0.4078, - "step": 14593 - }, - { - "epoch": 0.9537938696817202, - "grad_norm": 0.41192471981048584, - "learning_rate": 7.887902284802242e-06, - "loss": 0.3475, - "step": 14594 - }, - { - "epoch": 0.9538592248872623, - "grad_norm": 0.4481332302093506, - "learning_rate": 7.887617222166866e-06, - "loss": 0.4063, - "step": 14595 - }, - { - "epoch": 0.9539245800928043, - "grad_norm": 0.4861341416835785, - "learning_rate": 7.887332145447579e-06, - "loss": 0.3733, - "step": 14596 - }, - { - "epoch": 0.9539899352983465, - "grad_norm": 0.4583840072154999, - "learning_rate": 7.887047054645773e-06, - "loss": 0.4015, - "step": 14597 - }, - { - "epoch": 0.9540552905038886, - "grad_norm": 0.45041805505752563, - "learning_rate": 7.886761949762838e-06, - "loss": 0.3877, - "step": 14598 - }, - { - "epoch": 0.9541206457094308, - "grad_norm": 0.4253000020980835, - "learning_rate": 7.886476830800166e-06, - "loss": 0.3459, - "step": 14599 - }, - { - "epoch": 0.9541860009149729, - "grad_norm": 0.420580118894577, - "learning_rate": 7.886191697759146e-06, - "loss": 0.3515, - "step": 14600 - }, - { - "epoch": 0.954251356120515, - "grad_norm": 0.45473599433898926, - "learning_rate": 7.885906550641172e-06, - "loss": 0.3688, - "step": 14601 - }, - { - "epoch": 0.9543167113260571, - "grad_norm": 0.43718987703323364, - "learning_rate": 7.885621389447628e-06, - "loss": 0.3817, - "step": 14602 - }, - { - "epoch": 0.9543820665315993, - "grad_norm": 0.4303869307041168, - "learning_rate": 7.88533621417991e-06, - "loss": 0.3704, - "step": 14603 - }, - { - "epoch": 0.9544474217371414, - "grad_norm": 0.4161582887172699, - "learning_rate": 7.885051024839408e-06, - "loss": 0.3437, - "step": 14604 - }, - { - "epoch": 0.9545127769426834, - "grad_norm": 0.46883052587509155, - "learning_rate": 7.884765821427514e-06, - "loss": 0.3489, - "step": 14605 - }, - { - "epoch": 0.9545781321482256, - "grad_norm": 0.4682151973247528, - "learning_rate": 7.884480603945615e-06, - "loss": 0.4144, - "step": 14606 - }, - { - "epoch": 0.9546434873537677, - "grad_norm": 0.4272187054157257, - "learning_rate": 7.884195372395107e-06, - "loss": 0.3414, - "step": 14607 - }, - { - "epoch": 0.9547088425593099, - "grad_norm": 0.4370492100715637, - "learning_rate": 7.88391012677738e-06, - "loss": 0.3601, - "step": 14608 - }, - { - "epoch": 0.954774197764852, - "grad_norm": 0.4563125669956207, - "learning_rate": 7.88362486709382e-06, - "loss": 0.4149, - "step": 14609 - }, - { - "epoch": 0.9548395529703941, - "grad_norm": 0.44245320558547974, - "learning_rate": 7.883339593345827e-06, - "loss": 0.4101, - "step": 14610 - }, - { - "epoch": 0.9549049081759362, - "grad_norm": 0.4485817849636078, - "learning_rate": 7.883054305534786e-06, - "loss": 0.3841, - "step": 14611 - }, - { - "epoch": 0.9549702633814783, - "grad_norm": 0.4516976773738861, - "learning_rate": 7.88276900366209e-06, - "loss": 0.4044, - "step": 14612 - }, - { - "epoch": 0.9550356185870205, - "grad_norm": 0.5184816122055054, - "learning_rate": 7.882483687729131e-06, - "loss": 0.466, - "step": 14613 - }, - { - "epoch": 0.9551009737925625, - "grad_norm": 0.43364837765693665, - "learning_rate": 7.882198357737301e-06, - "loss": 0.3713, - "step": 14614 - }, - { - "epoch": 0.9551663289981047, - "grad_norm": 0.4156990349292755, - "learning_rate": 7.88191301368799e-06, - "loss": 0.3477, - "step": 14615 - }, - { - "epoch": 0.9552316842036468, - "grad_norm": 0.42361167073249817, - "learning_rate": 7.881627655582593e-06, - "loss": 0.3702, - "step": 14616 - }, - { - "epoch": 0.955297039409189, - "grad_norm": 0.4145409166812897, - "learning_rate": 7.881342283422498e-06, - "loss": 0.3354, - "step": 14617 - }, - { - "epoch": 0.955362394614731, - "grad_norm": 0.4254988133907318, - "learning_rate": 7.881056897209098e-06, - "loss": 0.3606, - "step": 14618 - }, - { - "epoch": 0.9554277498202732, - "grad_norm": 0.6123539805412292, - "learning_rate": 7.880771496943786e-06, - "loss": 0.3528, - "step": 14619 - }, - { - "epoch": 0.9554931050258153, - "grad_norm": 0.43436136841773987, - "learning_rate": 7.880486082627954e-06, - "loss": 0.3623, - "step": 14620 - }, - { - "epoch": 0.9555584602313574, - "grad_norm": 0.47374093532562256, - "learning_rate": 7.880200654262993e-06, - "loss": 0.4154, - "step": 14621 - }, - { - "epoch": 0.9556238154368996, - "grad_norm": 0.43287163972854614, - "learning_rate": 7.879915211850296e-06, - "loss": 0.3361, - "step": 14622 - }, - { - "epoch": 0.9556891706424416, - "grad_norm": 0.4072272479534149, - "learning_rate": 7.879629755391254e-06, - "loss": 0.3355, - "step": 14623 - }, - { - "epoch": 0.9557545258479838, - "grad_norm": 0.4534406363964081, - "learning_rate": 7.87934428488726e-06, - "loss": 0.3798, - "step": 14624 - }, - { - "epoch": 0.9558198810535259, - "grad_norm": 0.427864134311676, - "learning_rate": 7.879058800339708e-06, - "loss": 0.3236, - "step": 14625 - }, - { - "epoch": 0.9558852362590681, - "grad_norm": 0.4547727406024933, - "learning_rate": 7.878773301749986e-06, - "loss": 0.376, - "step": 14626 - }, - { - "epoch": 0.9559505914646101, - "grad_norm": 0.4406866133213043, - "learning_rate": 7.878487789119492e-06, - "loss": 0.373, - "step": 14627 - }, - { - "epoch": 0.9560159466701523, - "grad_norm": 0.45401880145072937, - "learning_rate": 7.878202262449615e-06, - "loss": 0.3786, - "step": 14628 - }, - { - "epoch": 0.9560813018756944, - "grad_norm": 0.4420032203197479, - "learning_rate": 7.87791672174175e-06, - "loss": 0.397, - "step": 14629 - }, - { - "epoch": 0.9561466570812365, - "grad_norm": 0.43429428339004517, - "learning_rate": 7.877631166997286e-06, - "loss": 0.3917, - "step": 14630 - }, - { - "epoch": 0.9562120122867787, - "grad_norm": 0.4955896735191345, - "learning_rate": 7.877345598217618e-06, - "loss": 0.4578, - "step": 14631 - }, - { - "epoch": 0.9562773674923207, - "grad_norm": 0.4426642954349518, - "learning_rate": 7.87706001540414e-06, - "loss": 0.4086, - "step": 14632 - }, - { - "epoch": 0.9563427226978629, - "grad_norm": 0.4321775734424591, - "learning_rate": 7.876774418558242e-06, - "loss": 0.3608, - "step": 14633 - }, - { - "epoch": 0.956408077903405, - "grad_norm": 0.45228639245033264, - "learning_rate": 7.87648880768132e-06, - "loss": 0.3884, - "step": 14634 - }, - { - "epoch": 0.9564734331089472, - "grad_norm": 0.4376620948314667, - "learning_rate": 7.876203182774764e-06, - "loss": 0.3385, - "step": 14635 - }, - { - "epoch": 0.9565387883144892, - "grad_norm": 0.42276784777641296, - "learning_rate": 7.87591754383997e-06, - "loss": 0.3366, - "step": 14636 - }, - { - "epoch": 0.9566041435200314, - "grad_norm": 0.4231339395046234, - "learning_rate": 7.87563189087833e-06, - "loss": 0.3407, - "step": 14637 - }, - { - "epoch": 0.9566694987255735, - "grad_norm": 0.40449103713035583, - "learning_rate": 7.875346223891236e-06, - "loss": 0.3293, - "step": 14638 - }, - { - "epoch": 0.9567348539311156, - "grad_norm": 0.4093177914619446, - "learning_rate": 7.875060542880083e-06, - "loss": 0.346, - "step": 14639 - }, - { - "epoch": 0.9568002091366578, - "grad_norm": 0.4191904067993164, - "learning_rate": 7.874774847846263e-06, - "loss": 0.3648, - "step": 14640 - }, - { - "epoch": 0.9568655643421998, - "grad_norm": 0.44178369641304016, - "learning_rate": 7.87448913879117e-06, - "loss": 0.3717, - "step": 14641 - }, - { - "epoch": 0.956930919547742, - "grad_norm": 0.4869230389595032, - "learning_rate": 7.8742034157162e-06, - "loss": 0.4358, - "step": 14642 - }, - { - "epoch": 0.9569962747532841, - "grad_norm": 0.47952571511268616, - "learning_rate": 7.873917678622742e-06, - "loss": 0.4481, - "step": 14643 - }, - { - "epoch": 0.9570616299588263, - "grad_norm": 0.4684247672557831, - "learning_rate": 7.873631927512192e-06, - "loss": 0.3839, - "step": 14644 - }, - { - "epoch": 0.9571269851643683, - "grad_norm": 0.43733447790145874, - "learning_rate": 7.873346162385944e-06, - "loss": 0.3691, - "step": 14645 - }, - { - "epoch": 0.9571923403699104, - "grad_norm": 0.4437146782875061, - "learning_rate": 7.873060383245391e-06, - "loss": 0.3883, - "step": 14646 - }, - { - "epoch": 0.9572576955754526, - "grad_norm": 0.41246497631073, - "learning_rate": 7.872774590091927e-06, - "loss": 0.3176, - "step": 14647 - }, - { - "epoch": 0.9573230507809947, - "grad_norm": 0.42654070258140564, - "learning_rate": 7.872488782926948e-06, - "loss": 0.3929, - "step": 14648 - }, - { - "epoch": 0.9573884059865369, - "grad_norm": 0.41639426350593567, - "learning_rate": 7.872202961751845e-06, - "loss": 0.3336, - "step": 14649 - }, - { - "epoch": 0.9574537611920789, - "grad_norm": 0.4656263291835785, - "learning_rate": 7.871917126568011e-06, - "loss": 0.382, - "step": 14650 - }, - { - "epoch": 0.9575191163976211, - "grad_norm": 0.4734870195388794, - "learning_rate": 7.871631277376845e-06, - "loss": 0.4118, - "step": 14651 - }, - { - "epoch": 0.9575844716031632, - "grad_norm": 0.45249998569488525, - "learning_rate": 7.871345414179738e-06, - "loss": 0.3986, - "step": 14652 - }, - { - "epoch": 0.9576498268087054, - "grad_norm": 0.4351339042186737, - "learning_rate": 7.871059536978085e-06, - "loss": 0.3862, - "step": 14653 - }, - { - "epoch": 0.9577151820142474, - "grad_norm": 0.44380736351013184, - "learning_rate": 7.870773645773278e-06, - "loss": 0.3743, - "step": 14654 - }, - { - "epoch": 0.9577805372197895, - "grad_norm": 0.42017319798469543, - "learning_rate": 7.870487740566714e-06, - "loss": 0.33, - "step": 14655 - }, - { - "epoch": 0.9578458924253317, - "grad_norm": 0.44864538311958313, - "learning_rate": 7.87020182135979e-06, - "loss": 0.368, - "step": 14656 - }, - { - "epoch": 0.9579112476308738, - "grad_norm": 0.44515731930732727, - "learning_rate": 7.869915888153893e-06, - "loss": 0.3473, - "step": 14657 - }, - { - "epoch": 0.957976602836416, - "grad_norm": 0.4643704891204834, - "learning_rate": 7.869629940950423e-06, - "loss": 0.413, - "step": 14658 - }, - { - "epoch": 0.958041958041958, - "grad_norm": 0.44642704725265503, - "learning_rate": 7.869343979750776e-06, - "loss": 0.4036, - "step": 14659 - }, - { - "epoch": 0.9581073132475002, - "grad_norm": 0.43242260813713074, - "learning_rate": 7.869058004556342e-06, - "loss": 0.3551, - "step": 14660 - }, - { - "epoch": 0.9581726684530423, - "grad_norm": 0.42438915371894836, - "learning_rate": 7.868772015368518e-06, - "loss": 0.3662, - "step": 14661 - }, - { - "epoch": 0.9582380236585845, - "grad_norm": 0.43452465534210205, - "learning_rate": 7.8684860121887e-06, - "loss": 0.3656, - "step": 14662 - }, - { - "epoch": 0.9583033788641265, - "grad_norm": 0.4757184386253357, - "learning_rate": 7.868199995018283e-06, - "loss": 0.4081, - "step": 14663 - }, - { - "epoch": 0.9583687340696686, - "grad_norm": 0.4319245517253876, - "learning_rate": 7.86791396385866e-06, - "loss": 0.3569, - "step": 14664 - }, - { - "epoch": 0.9584340892752108, - "grad_norm": 0.41545370221138, - "learning_rate": 7.867627918711226e-06, - "loss": 0.3381, - "step": 14665 - }, - { - "epoch": 0.9584994444807529, - "grad_norm": 0.40892425179481506, - "learning_rate": 7.86734185957738e-06, - "loss": 0.3569, - "step": 14666 - }, - { - "epoch": 0.958564799686295, - "grad_norm": 0.4482435882091522, - "learning_rate": 7.867055786458512e-06, - "loss": 0.3833, - "step": 14667 - }, - { - "epoch": 0.9586301548918371, - "grad_norm": 0.4535319209098816, - "learning_rate": 7.86676969935602e-06, - "loss": 0.3964, - "step": 14668 - }, - { - "epoch": 0.9586955100973793, - "grad_norm": 0.4511040449142456, - "learning_rate": 7.866483598271299e-06, - "loss": 0.3656, - "step": 14669 - }, - { - "epoch": 0.9587608653029214, - "grad_norm": 0.4494752585887909, - "learning_rate": 7.866197483205745e-06, - "loss": 0.3738, - "step": 14670 - }, - { - "epoch": 0.9588262205084634, - "grad_norm": 0.462099552154541, - "learning_rate": 7.865911354160754e-06, - "loss": 0.41, - "step": 14671 - }, - { - "epoch": 0.9588915757140056, - "grad_norm": 0.424837201833725, - "learning_rate": 7.865625211137717e-06, - "loss": 0.3404, - "step": 14672 - }, - { - "epoch": 0.9589569309195477, - "grad_norm": 0.45248329639434814, - "learning_rate": 7.865339054138037e-06, - "loss": 0.4095, - "step": 14673 - }, - { - "epoch": 0.9590222861250899, - "grad_norm": 0.4722941517829895, - "learning_rate": 7.865052883163104e-06, - "loss": 0.4126, - "step": 14674 - }, - { - "epoch": 0.959087641330632, - "grad_norm": 0.4175872802734375, - "learning_rate": 7.864766698214315e-06, - "loss": 0.3417, - "step": 14675 - }, - { - "epoch": 0.9591529965361741, - "grad_norm": 0.4347960352897644, - "learning_rate": 7.864480499293069e-06, - "loss": 0.3559, - "step": 14676 - }, - { - "epoch": 0.9592183517417162, - "grad_norm": 0.5596758127212524, - "learning_rate": 7.864194286400756e-06, - "loss": 0.3684, - "step": 14677 - }, - { - "epoch": 0.9592837069472584, - "grad_norm": 0.4326117932796478, - "learning_rate": 7.863908059538776e-06, - "loss": 0.311, - "step": 14678 - }, - { - "epoch": 0.9593490621528005, - "grad_norm": 0.4768591523170471, - "learning_rate": 7.863621818708526e-06, - "loss": 0.4144, - "step": 14679 - }, - { - "epoch": 0.9594144173583425, - "grad_norm": 0.44171446561813354, - "learning_rate": 7.863335563911399e-06, - "loss": 0.3635, - "step": 14680 - }, - { - "epoch": 0.9594797725638847, - "grad_norm": 0.43145930767059326, - "learning_rate": 7.863049295148793e-06, - "loss": 0.3436, - "step": 14681 - }, - { - "epoch": 0.9595451277694268, - "grad_norm": 0.9899649620056152, - "learning_rate": 7.862763012422102e-06, - "loss": 0.3586, - "step": 14682 - }, - { - "epoch": 0.959610482974969, - "grad_norm": 0.44094836711883545, - "learning_rate": 7.862476715732726e-06, - "loss": 0.3718, - "step": 14683 - }, - { - "epoch": 0.959675838180511, - "grad_norm": 0.5062451362609863, - "learning_rate": 7.862190405082057e-06, - "loss": 0.4498, - "step": 14684 - }, - { - "epoch": 0.9597411933860532, - "grad_norm": 0.44377270340919495, - "learning_rate": 7.861904080471497e-06, - "loss": 0.3573, - "step": 14685 - }, - { - "epoch": 0.9598065485915953, - "grad_norm": 0.5252393484115601, - "learning_rate": 7.861617741902437e-06, - "loss": 0.4324, - "step": 14686 - }, - { - "epoch": 0.9598719037971375, - "grad_norm": 0.44677838683128357, - "learning_rate": 7.861331389376277e-06, - "loss": 0.3811, - "step": 14687 - }, - { - "epoch": 0.9599372590026796, - "grad_norm": 0.4279773235321045, - "learning_rate": 7.861045022894414e-06, - "loss": 0.378, - "step": 14688 - }, - { - "epoch": 0.9600026142082216, - "grad_norm": 0.40343350172042847, - "learning_rate": 7.86075864245824e-06, - "loss": 0.3158, - "step": 14689 - }, - { - "epoch": 0.9600679694137638, - "grad_norm": 0.4713086485862732, - "learning_rate": 7.860472248069157e-06, - "loss": 0.3722, - "step": 14690 - }, - { - "epoch": 0.9601333246193059, - "grad_norm": 0.4739154577255249, - "learning_rate": 7.860185839728559e-06, - "loss": 0.3753, - "step": 14691 - }, - { - "epoch": 0.9601986798248481, - "grad_norm": 0.4589148759841919, - "learning_rate": 7.859899417437845e-06, - "loss": 0.3922, - "step": 14692 - }, - { - "epoch": 0.9602640350303902, - "grad_norm": 0.45614346861839294, - "learning_rate": 7.85961298119841e-06, - "loss": 0.3829, - "step": 14693 - }, - { - "epoch": 0.9603293902359323, - "grad_norm": 0.4335387647151947, - "learning_rate": 7.859326531011654e-06, - "loss": 0.3583, - "step": 14694 - }, - { - "epoch": 0.9603947454414744, - "grad_norm": 0.43192481994628906, - "learning_rate": 7.859040066878969e-06, - "loss": 0.3662, - "step": 14695 - }, - { - "epoch": 0.9604601006470166, - "grad_norm": 0.46029818058013916, - "learning_rate": 7.858753588801755e-06, - "loss": 0.42, - "step": 14696 - }, - { - "epoch": 0.9605254558525587, - "grad_norm": 0.48127636313438416, - "learning_rate": 7.858467096781411e-06, - "loss": 0.4115, - "step": 14697 - }, - { - "epoch": 0.9605908110581007, - "grad_norm": 0.4949004352092743, - "learning_rate": 7.858180590819332e-06, - "loss": 0.4736, - "step": 14698 - }, - { - "epoch": 0.9606561662636429, - "grad_norm": 0.44555923342704773, - "learning_rate": 7.857894070916915e-06, - "loss": 0.3926, - "step": 14699 - }, - { - "epoch": 0.960721521469185, - "grad_norm": 0.5973967909812927, - "learning_rate": 7.85760753707556e-06, - "loss": 0.3727, - "step": 14700 - }, - { - "epoch": 0.9607868766747272, - "grad_norm": 0.4441831707954407, - "learning_rate": 7.857320989296664e-06, - "loss": 0.3914, - "step": 14701 - }, - { - "epoch": 0.9608522318802692, - "grad_norm": 0.4172165095806122, - "learning_rate": 7.857034427581623e-06, - "loss": 0.3664, - "step": 14702 - }, - { - "epoch": 0.9609175870858114, - "grad_norm": 0.4362238943576813, - "learning_rate": 7.856747851931834e-06, - "loss": 0.3455, - "step": 14703 - }, - { - "epoch": 0.9609829422913535, - "grad_norm": 0.44920337200164795, - "learning_rate": 7.856461262348696e-06, - "loss": 0.3945, - "step": 14704 - }, - { - "epoch": 0.9610482974968956, - "grad_norm": 0.44160401821136475, - "learning_rate": 7.856174658833609e-06, - "loss": 0.3607, - "step": 14705 - }, - { - "epoch": 0.9611136527024378, - "grad_norm": 0.43351274728775024, - "learning_rate": 7.855888041387967e-06, - "loss": 0.3786, - "step": 14706 - }, - { - "epoch": 0.9611790079079798, - "grad_norm": 0.45133909583091736, - "learning_rate": 7.85560141001317e-06, - "loss": 0.3585, - "step": 14707 - }, - { - "epoch": 0.961244363113522, - "grad_norm": 0.4177149534225464, - "learning_rate": 7.855314764710616e-06, - "loss": 0.3664, - "step": 14708 - }, - { - "epoch": 0.9613097183190641, - "grad_norm": 0.47566458582878113, - "learning_rate": 7.8550281054817e-06, - "loss": 0.4278, - "step": 14709 - }, - { - "epoch": 0.9613750735246063, - "grad_norm": 0.39405885338783264, - "learning_rate": 7.854741432327827e-06, - "loss": 0.3067, - "step": 14710 - }, - { - "epoch": 0.9614404287301483, - "grad_norm": 0.4599893093109131, - "learning_rate": 7.854454745250388e-06, - "loss": 0.3991, - "step": 14711 - }, - { - "epoch": 0.9615057839356905, - "grad_norm": 0.4538350999355316, - "learning_rate": 7.854168044250788e-06, - "loss": 0.3745, - "step": 14712 - }, - { - "epoch": 0.9615711391412326, - "grad_norm": 0.4846935272216797, - "learning_rate": 7.853881329330419e-06, - "loss": 0.4154, - "step": 14713 - }, - { - "epoch": 0.9616364943467747, - "grad_norm": 0.4401126205921173, - "learning_rate": 7.853594600490681e-06, - "loss": 0.3682, - "step": 14714 - }, - { - "epoch": 0.9617018495523169, - "grad_norm": 0.41723647713661194, - "learning_rate": 7.853307857732976e-06, - "loss": 0.3163, - "step": 14715 - }, - { - "epoch": 0.9617672047578589, - "grad_norm": 0.4554825723171234, - "learning_rate": 7.8530211010587e-06, - "loss": 0.3879, - "step": 14716 - }, - { - "epoch": 0.9618325599634011, - "grad_norm": 0.4212765395641327, - "learning_rate": 7.852734330469249e-06, - "loss": 0.3478, - "step": 14717 - }, - { - "epoch": 0.9618979151689432, - "grad_norm": 0.4703579246997833, - "learning_rate": 7.852447545966026e-06, - "loss": 0.3866, - "step": 14718 - }, - { - "epoch": 0.9619632703744854, - "grad_norm": 0.4920124411582947, - "learning_rate": 7.85216074755043e-06, - "loss": 0.4278, - "step": 14719 - }, - { - "epoch": 0.9620286255800274, - "grad_norm": 0.41396501660346985, - "learning_rate": 7.851873935223856e-06, - "loss": 0.3861, - "step": 14720 - }, - { - "epoch": 0.9620939807855696, - "grad_norm": 0.43075990676879883, - "learning_rate": 7.851587108987705e-06, - "loss": 0.3568, - "step": 14721 - }, - { - "epoch": 0.9621593359911117, - "grad_norm": 0.42916256189346313, - "learning_rate": 7.851300268843376e-06, - "loss": 0.3821, - "step": 14722 - }, - { - "epoch": 0.9622246911966538, - "grad_norm": 0.42286500334739685, - "learning_rate": 7.851013414792267e-06, - "loss": 0.3592, - "step": 14723 - }, - { - "epoch": 0.962290046402196, - "grad_norm": 0.4404940605163574, - "learning_rate": 7.850726546835779e-06, - "loss": 0.3643, - "step": 14724 - }, - { - "epoch": 0.962355401607738, - "grad_norm": 0.41407260298728943, - "learning_rate": 7.85043966497531e-06, - "loss": 0.3598, - "step": 14725 - }, - { - "epoch": 0.9624207568132802, - "grad_norm": 0.4436608552932739, - "learning_rate": 7.850152769212258e-06, - "loss": 0.3768, - "step": 14726 - }, - { - "epoch": 0.9624861120188223, - "grad_norm": 0.4768117666244507, - "learning_rate": 7.849865859548025e-06, - "loss": 0.4293, - "step": 14727 - }, - { - "epoch": 0.9625514672243645, - "grad_norm": 0.42851582169532776, - "learning_rate": 7.849578935984007e-06, - "loss": 0.3621, - "step": 14728 - }, - { - "epoch": 0.9626168224299065, - "grad_norm": 0.46475011110305786, - "learning_rate": 7.849291998521608e-06, - "loss": 0.3942, - "step": 14729 - }, - { - "epoch": 0.9626821776354486, - "grad_norm": 0.420767217874527, - "learning_rate": 7.849005047162223e-06, - "loss": 0.3538, - "step": 14730 - }, - { - "epoch": 0.9627475328409908, - "grad_norm": 0.4748491048812866, - "learning_rate": 7.848718081907253e-06, - "loss": 0.3727, - "step": 14731 - }, - { - "epoch": 0.9628128880465329, - "grad_norm": 0.43817439675331116, - "learning_rate": 7.848431102758101e-06, - "loss": 0.3554, - "step": 14732 - }, - { - "epoch": 0.962878243252075, - "grad_norm": 0.4493640959262848, - "learning_rate": 7.84814410971616e-06, - "loss": 0.3965, - "step": 14733 - }, - { - "epoch": 0.9629435984576171, - "grad_norm": 0.4303871691226959, - "learning_rate": 7.847857102782836e-06, - "loss": 0.3788, - "step": 14734 - }, - { - "epoch": 0.9630089536631593, - "grad_norm": 0.40862104296684265, - "learning_rate": 7.847570081959525e-06, - "loss": 0.312, - "step": 14735 - }, - { - "epoch": 0.9630743088687014, - "grad_norm": 0.44714468717575073, - "learning_rate": 7.847283047247629e-06, - "loss": 0.3861, - "step": 14736 - }, - { - "epoch": 0.9631396640742436, - "grad_norm": 0.4178265333175659, - "learning_rate": 7.846995998648547e-06, - "loss": 0.3532, - "step": 14737 - }, - { - "epoch": 0.9632050192797856, - "grad_norm": 0.4428759813308716, - "learning_rate": 7.846708936163679e-06, - "loss": 0.3965, - "step": 14738 - }, - { - "epoch": 0.9632703744853277, - "grad_norm": 0.41116204857826233, - "learning_rate": 7.846421859794426e-06, - "loss": 0.3523, - "step": 14739 - }, - { - "epoch": 0.9633357296908699, - "grad_norm": 0.39715877175331116, - "learning_rate": 7.846134769542186e-06, - "loss": 0.3151, - "step": 14740 - }, - { - "epoch": 0.963401084896412, - "grad_norm": 0.4801284372806549, - "learning_rate": 7.845847665408362e-06, - "loss": 0.3931, - "step": 14741 - }, - { - "epoch": 0.9634664401019541, - "grad_norm": 0.43569839000701904, - "learning_rate": 7.845560547394353e-06, - "loss": 0.3778, - "step": 14742 - }, - { - "epoch": 0.9635317953074962, - "grad_norm": 0.4289999008178711, - "learning_rate": 7.84527341550156e-06, - "loss": 0.395, - "step": 14743 - }, - { - "epoch": 0.9635971505130384, - "grad_norm": 0.4477882981300354, - "learning_rate": 7.844986269731381e-06, - "loss": 0.3788, - "step": 14744 - }, - { - "epoch": 0.9636625057185805, - "grad_norm": 0.4254097044467926, - "learning_rate": 7.844699110085218e-06, - "loss": 0.3743, - "step": 14745 - }, - { - "epoch": 0.9637278609241227, - "grad_norm": 0.45810019969940186, - "learning_rate": 7.844411936564475e-06, - "loss": 0.385, - "step": 14746 - }, - { - "epoch": 0.9637932161296647, - "grad_norm": 0.42661023139953613, - "learning_rate": 7.844124749170547e-06, - "loss": 0.3897, - "step": 14747 - }, - { - "epoch": 0.9638585713352068, - "grad_norm": 0.45324674248695374, - "learning_rate": 7.843837547904838e-06, - "loss": 0.4, - "step": 14748 - }, - { - "epoch": 0.963923926540749, - "grad_norm": 0.4282146394252777, - "learning_rate": 7.843550332768747e-06, - "loss": 0.3479, - "step": 14749 - }, - { - "epoch": 0.9639892817462911, - "grad_norm": 0.47269314527511597, - "learning_rate": 7.84326310376368e-06, - "loss": 0.4153, - "step": 14750 - }, - { - "epoch": 0.9640546369518332, - "grad_norm": 0.431690514087677, - "learning_rate": 7.842975860891029e-06, - "loss": 0.3873, - "step": 14751 - }, - { - "epoch": 0.9641199921573753, - "grad_norm": 0.5653694272041321, - "learning_rate": 7.842688604152202e-06, - "loss": 0.3715, - "step": 14752 - }, - { - "epoch": 0.9641853473629175, - "grad_norm": 0.44812360405921936, - "learning_rate": 7.842401333548599e-06, - "loss": 0.4027, - "step": 14753 - }, - { - "epoch": 0.9642507025684596, - "grad_norm": 0.46026045083999634, - "learning_rate": 7.842114049081618e-06, - "loss": 0.4292, - "step": 14754 - }, - { - "epoch": 0.9643160577740016, - "grad_norm": 0.44783565402030945, - "learning_rate": 7.841826750752663e-06, - "loss": 0.3687, - "step": 14755 - }, - { - "epoch": 0.9643814129795438, - "grad_norm": 0.4119025468826294, - "learning_rate": 7.841539438563134e-06, - "loss": 0.3316, - "step": 14756 - }, - { - "epoch": 0.9644467681850859, - "grad_norm": 0.43547311425209045, - "learning_rate": 7.841252112514433e-06, - "loss": 0.3565, - "step": 14757 - }, - { - "epoch": 0.9645121233906281, - "grad_norm": 0.4158417284488678, - "learning_rate": 7.840964772607962e-06, - "loss": 0.3285, - "step": 14758 - }, - { - "epoch": 0.9645774785961702, - "grad_norm": 0.4588179886341095, - "learning_rate": 7.840677418845119e-06, - "loss": 0.3837, - "step": 14759 - }, - { - "epoch": 0.9646428338017123, - "grad_norm": 0.4297809898853302, - "learning_rate": 7.84039005122731e-06, - "loss": 0.3471, - "step": 14760 - }, - { - "epoch": 0.9647081890072544, - "grad_norm": 0.4243960678577423, - "learning_rate": 7.840102669755936e-06, - "loss": 0.3413, - "step": 14761 - }, - { - "epoch": 0.9647735442127966, - "grad_norm": 0.5048423409461975, - "learning_rate": 7.839815274432397e-06, - "loss": 0.3964, - "step": 14762 - }, - { - "epoch": 0.9648388994183387, - "grad_norm": 0.45856770873069763, - "learning_rate": 7.839527865258093e-06, - "loss": 0.4471, - "step": 14763 - }, - { - "epoch": 0.9649042546238807, - "grad_norm": 0.4537294805049896, - "learning_rate": 7.839240442234428e-06, - "loss": 0.3744, - "step": 14764 - }, - { - "epoch": 0.9649696098294229, - "grad_norm": 0.4455563426017761, - "learning_rate": 7.838953005362807e-06, - "loss": 0.3939, - "step": 14765 - }, - { - "epoch": 0.965034965034965, - "grad_norm": 0.4145625829696655, - "learning_rate": 7.838665554644624e-06, - "loss": 0.3369, - "step": 14766 - }, - { - "epoch": 0.9651003202405072, - "grad_norm": 0.4203234016895294, - "learning_rate": 7.838378090081289e-06, - "loss": 0.3372, - "step": 14767 - }, - { - "epoch": 0.9651656754460493, - "grad_norm": 0.48393452167510986, - "learning_rate": 7.838090611674199e-06, - "loss": 0.4139, - "step": 14768 - }, - { - "epoch": 0.9652310306515914, - "grad_norm": 0.48538827896118164, - "learning_rate": 7.837803119424759e-06, - "loss": 0.4358, - "step": 14769 - }, - { - "epoch": 0.9652963858571335, - "grad_norm": 0.46151402592658997, - "learning_rate": 7.83751561333437e-06, - "loss": 0.4115, - "step": 14770 - }, - { - "epoch": 0.9653617410626757, - "grad_norm": 0.4766049385070801, - "learning_rate": 7.837228093404434e-06, - "loss": 0.3971, - "step": 14771 - }, - { - "epoch": 0.9654270962682178, - "grad_norm": 0.46706485748291016, - "learning_rate": 7.836940559636354e-06, - "loss": 0.4104, - "step": 14772 - }, - { - "epoch": 0.9654924514737598, - "grad_norm": 0.45811277627944946, - "learning_rate": 7.836653012031533e-06, - "loss": 0.4028, - "step": 14773 - }, - { - "epoch": 0.965557806679302, - "grad_norm": 0.4126622676849365, - "learning_rate": 7.83636545059137e-06, - "loss": 0.336, - "step": 14774 - }, - { - "epoch": 0.9656231618848441, - "grad_norm": 0.4496912956237793, - "learning_rate": 7.836077875317271e-06, - "loss": 0.3969, - "step": 14775 - }, - { - "epoch": 0.9656885170903863, - "grad_norm": 0.44422343373298645, - "learning_rate": 7.835790286210639e-06, - "loss": 0.3789, - "step": 14776 - }, - { - "epoch": 0.9657538722959284, - "grad_norm": 0.4231109023094177, - "learning_rate": 7.835502683272874e-06, - "loss": 0.3419, - "step": 14777 - }, - { - "epoch": 0.9658192275014705, - "grad_norm": 0.4425690770149231, - "learning_rate": 7.835215066505382e-06, - "loss": 0.3368, - "step": 14778 - }, - { - "epoch": 0.9658845827070126, - "grad_norm": 0.4472150206565857, - "learning_rate": 7.834927435909562e-06, - "loss": 0.422, - "step": 14779 - }, - { - "epoch": 0.9659499379125548, - "grad_norm": 0.48612692952156067, - "learning_rate": 7.83463979148682e-06, - "loss": 0.4519, - "step": 14780 - }, - { - "epoch": 0.9660152931180969, - "grad_norm": 0.4408412277698517, - "learning_rate": 7.834352133238558e-06, - "loss": 0.3827, - "step": 14781 - }, - { - "epoch": 0.9660806483236389, - "grad_norm": 0.43729573488235474, - "learning_rate": 7.834064461166178e-06, - "loss": 0.3933, - "step": 14782 - }, - { - "epoch": 0.9661460035291811, - "grad_norm": 0.41399234533309937, - "learning_rate": 7.833776775271083e-06, - "loss": 0.3313, - "step": 14783 - }, - { - "epoch": 0.9662113587347232, - "grad_norm": 0.4549391269683838, - "learning_rate": 7.833489075554679e-06, - "loss": 0.3768, - "step": 14784 - }, - { - "epoch": 0.9662767139402654, - "grad_norm": 0.4329865276813507, - "learning_rate": 7.833201362018364e-06, - "loss": 0.3603, - "step": 14785 - }, - { - "epoch": 0.9663420691458074, - "grad_norm": 0.42552855610847473, - "learning_rate": 7.832913634663545e-06, - "loss": 0.3512, - "step": 14786 - }, - { - "epoch": 0.9664074243513496, - "grad_norm": 0.4280606210231781, - "learning_rate": 7.832625893491627e-06, - "loss": 0.3397, - "step": 14787 - }, - { - "epoch": 0.9664727795568917, - "grad_norm": 0.47604286670684814, - "learning_rate": 7.83233813850401e-06, - "loss": 0.4267, - "step": 14788 - }, - { - "epoch": 0.9665381347624338, - "grad_norm": 0.4646662771701813, - "learning_rate": 7.8320503697021e-06, - "loss": 0.383, - "step": 14789 - }, - { - "epoch": 0.966603489967976, - "grad_norm": 0.4610874652862549, - "learning_rate": 7.831762587087297e-06, - "loss": 0.4114, - "step": 14790 - }, - { - "epoch": 0.966668845173518, - "grad_norm": 0.45965999364852905, - "learning_rate": 7.831474790661009e-06, - "loss": 0.3919, - "step": 14791 - }, - { - "epoch": 0.9667342003790602, - "grad_norm": 0.43267151713371277, - "learning_rate": 7.831186980424637e-06, - "loss": 0.3704, - "step": 14792 - }, - { - "epoch": 0.9667995555846023, - "grad_norm": 0.4539198875427246, - "learning_rate": 7.830899156379584e-06, - "loss": 0.3653, - "step": 14793 - }, - { - "epoch": 0.9668649107901445, - "grad_norm": 0.4215865135192871, - "learning_rate": 7.830611318527254e-06, - "loss": 0.3673, - "step": 14794 - }, - { - "epoch": 0.9669302659956865, - "grad_norm": 0.4116964638233185, - "learning_rate": 7.830323466869055e-06, - "loss": 0.3611, - "step": 14795 - }, - { - "epoch": 0.9669956212012287, - "grad_norm": 0.5104700326919556, - "learning_rate": 7.830035601406386e-06, - "loss": 0.3595, - "step": 14796 - }, - { - "epoch": 0.9670609764067708, - "grad_norm": 0.4243166446685791, - "learning_rate": 7.829747722140656e-06, - "loss": 0.3695, - "step": 14797 - }, - { - "epoch": 0.9671263316123129, - "grad_norm": 0.42462068796157837, - "learning_rate": 7.829459829073263e-06, - "loss": 0.3653, - "step": 14798 - }, - { - "epoch": 0.9671916868178551, - "grad_norm": 0.4319928288459778, - "learning_rate": 7.829171922205615e-06, - "loss": 0.3521, - "step": 14799 - }, - { - "epoch": 0.9672570420233971, - "grad_norm": 0.43317168951034546, - "learning_rate": 7.828884001539117e-06, - "loss": 0.3671, - "step": 14800 - }, - { - "epoch": 0.9673223972289393, - "grad_norm": 0.4600657522678375, - "learning_rate": 7.82859606707517e-06, - "loss": 0.3758, - "step": 14801 - }, - { - "epoch": 0.9673877524344814, - "grad_norm": 0.4432556629180908, - "learning_rate": 7.82830811881518e-06, - "loss": 0.3742, - "step": 14802 - }, - { - "epoch": 0.9674531076400236, - "grad_norm": 0.4621134102344513, - "learning_rate": 7.82802015676055e-06, - "loss": 0.365, - "step": 14803 - }, - { - "epoch": 0.9675184628455656, - "grad_norm": 0.3997276723384857, - "learning_rate": 7.827732180912689e-06, - "loss": 0.3464, - "step": 14804 - }, - { - "epoch": 0.9675838180511078, - "grad_norm": 0.46412381529808044, - "learning_rate": 7.827444191272997e-06, - "loss": 0.4052, - "step": 14805 - }, - { - "epoch": 0.9676491732566499, - "grad_norm": 0.4773043990135193, - "learning_rate": 7.82715618784288e-06, - "loss": 0.4325, - "step": 14806 - }, - { - "epoch": 0.967714528462192, - "grad_norm": 0.4289936423301697, - "learning_rate": 7.826868170623742e-06, - "loss": 0.3608, - "step": 14807 - }, - { - "epoch": 0.9677798836677342, - "grad_norm": 0.4527800381183624, - "learning_rate": 7.82658013961699e-06, - "loss": 0.3931, - "step": 14808 - }, - { - "epoch": 0.9678452388732762, - "grad_norm": 0.4790951907634735, - "learning_rate": 7.826292094824029e-06, - "loss": 0.4221, - "step": 14809 - }, - { - "epoch": 0.9679105940788184, - "grad_norm": 0.4474199414253235, - "learning_rate": 7.82600403624626e-06, - "loss": 0.3521, - "step": 14810 - }, - { - "epoch": 0.9679759492843605, - "grad_norm": 0.4509504735469818, - "learning_rate": 7.82571596388509e-06, - "loss": 0.3564, - "step": 14811 - }, - { - "epoch": 0.9680413044899027, - "grad_norm": 0.44744789600372314, - "learning_rate": 7.825427877741925e-06, - "loss": 0.3412, - "step": 14812 - }, - { - "epoch": 0.9681066596954447, - "grad_norm": 0.4231618344783783, - "learning_rate": 7.825139777818169e-06, - "loss": 0.3509, - "step": 14813 - }, - { - "epoch": 0.9681720149009868, - "grad_norm": 0.4987364709377289, - "learning_rate": 7.82485166411523e-06, - "loss": 0.3934, - "step": 14814 - }, - { - "epoch": 0.968237370106529, - "grad_norm": 0.45702850818634033, - "learning_rate": 7.824563536634507e-06, - "loss": 0.3632, - "step": 14815 - }, - { - "epoch": 0.9683027253120711, - "grad_norm": 0.40645474195480347, - "learning_rate": 7.824275395377411e-06, - "loss": 0.3432, - "step": 14816 - }, - { - "epoch": 0.9683680805176133, - "grad_norm": 0.4501011371612549, - "learning_rate": 7.823987240345346e-06, - "loss": 0.3772, - "step": 14817 - }, - { - "epoch": 0.9684334357231553, - "grad_norm": 0.4299066662788391, - "learning_rate": 7.823699071539717e-06, - "loss": 0.354, - "step": 14818 - }, - { - "epoch": 0.9684987909286975, - "grad_norm": 0.43744340538978577, - "learning_rate": 7.823410888961928e-06, - "loss": 0.3451, - "step": 14819 - }, - { - "epoch": 0.9685641461342396, - "grad_norm": 0.44244512915611267, - "learning_rate": 7.82312269261339e-06, - "loss": 0.3441, - "step": 14820 - }, - { - "epoch": 0.9686295013397818, - "grad_norm": 0.45788103342056274, - "learning_rate": 7.8228344824955e-06, - "loss": 0.3931, - "step": 14821 - }, - { - "epoch": 0.9686948565453238, - "grad_norm": 0.45720037817955017, - "learning_rate": 7.82254625860967e-06, - "loss": 0.4013, - "step": 14822 - }, - { - "epoch": 0.9687602117508659, - "grad_norm": 0.4614906311035156, - "learning_rate": 7.822258020957302e-06, - "loss": 0.4173, - "step": 14823 - }, - { - "epoch": 0.9688255669564081, - "grad_norm": 0.4436750113964081, - "learning_rate": 7.821969769539806e-06, - "loss": 0.3766, - "step": 14824 - }, - { - "epoch": 0.9688909221619502, - "grad_norm": 0.3952990472316742, - "learning_rate": 7.821681504358587e-06, - "loss": 0.2851, - "step": 14825 - }, - { - "epoch": 0.9689562773674923, - "grad_norm": 0.4649125933647156, - "learning_rate": 7.821393225415047e-06, - "loss": 0.3955, - "step": 14826 - }, - { - "epoch": 0.9690216325730344, - "grad_norm": 0.42980360984802246, - "learning_rate": 7.821104932710597e-06, - "loss": 0.3687, - "step": 14827 - }, - { - "epoch": 0.9690869877785766, - "grad_norm": 0.4904744327068329, - "learning_rate": 7.82081662624664e-06, - "loss": 0.455, - "step": 14828 - }, - { - "epoch": 0.9691523429841187, - "grad_norm": 0.4130229651927948, - "learning_rate": 7.820528306024583e-06, - "loss": 0.3605, - "step": 14829 - }, - { - "epoch": 0.9692176981896609, - "grad_norm": 0.5697034597396851, - "learning_rate": 7.820239972045832e-06, - "loss": 0.4163, - "step": 14830 - }, - { - "epoch": 0.9692830533952029, - "grad_norm": 0.44115638732910156, - "learning_rate": 7.819951624311794e-06, - "loss": 0.3685, - "step": 14831 - }, - { - "epoch": 0.969348408600745, - "grad_norm": 0.4227757155895233, - "learning_rate": 7.819663262823876e-06, - "loss": 0.323, - "step": 14832 - }, - { - "epoch": 0.9694137638062872, - "grad_norm": 0.47103533148765564, - "learning_rate": 7.819374887583481e-06, - "loss": 0.3533, - "step": 14833 - }, - { - "epoch": 0.9694791190118293, - "grad_norm": 0.4258157014846802, - "learning_rate": 7.81908649859202e-06, - "loss": 0.333, - "step": 14834 - }, - { - "epoch": 0.9695444742173714, - "grad_norm": 0.46860530972480774, - "learning_rate": 7.818798095850897e-06, - "loss": 0.3989, - "step": 14835 - }, - { - "epoch": 0.9696098294229135, - "grad_norm": 0.49100691080093384, - "learning_rate": 7.81850967936152e-06, - "loss": 0.4704, - "step": 14836 - }, - { - "epoch": 0.9696751846284557, - "grad_norm": 0.44359099864959717, - "learning_rate": 7.818221249125293e-06, - "loss": 0.4016, - "step": 14837 - }, - { - "epoch": 0.9697405398339978, - "grad_norm": 0.44306522607803345, - "learning_rate": 7.817932805143627e-06, - "loss": 0.3773, - "step": 14838 - }, - { - "epoch": 0.9698058950395398, - "grad_norm": 0.4392610490322113, - "learning_rate": 7.817644347417924e-06, - "loss": 0.3406, - "step": 14839 - }, - { - "epoch": 0.969871250245082, - "grad_norm": 0.42378175258636475, - "learning_rate": 7.817355875949594e-06, - "loss": 0.3291, - "step": 14840 - }, - { - "epoch": 0.9699366054506241, - "grad_norm": 0.4888037145137787, - "learning_rate": 7.817067390740046e-06, - "loss": 0.4358, - "step": 14841 - }, - { - "epoch": 0.9700019606561663, - "grad_norm": 0.45393961668014526, - "learning_rate": 7.816778891790682e-06, - "loss": 0.3731, - "step": 14842 - }, - { - "epoch": 0.9700673158617084, - "grad_norm": 0.43428176641464233, - "learning_rate": 7.816490379102912e-06, - "loss": 0.3791, - "step": 14843 - }, - { - "epoch": 0.9701326710672505, - "grad_norm": 0.46117889881134033, - "learning_rate": 7.816201852678143e-06, - "loss": 0.4169, - "step": 14844 - }, - { - "epoch": 0.9701980262727926, - "grad_norm": 0.43631690740585327, - "learning_rate": 7.81591331251778e-06, - "loss": 0.3376, - "step": 14845 - }, - { - "epoch": 0.9702633814783348, - "grad_norm": 0.43752554059028625, - "learning_rate": 7.815624758623235e-06, - "loss": 0.3766, - "step": 14846 - }, - { - "epoch": 0.9703287366838769, - "grad_norm": 0.40561220049858093, - "learning_rate": 7.81533619099591e-06, - "loss": 0.3261, - "step": 14847 - }, - { - "epoch": 0.9703940918894189, - "grad_norm": 0.42520958185195923, - "learning_rate": 7.815047609637216e-06, - "loss": 0.363, - "step": 14848 - }, - { - "epoch": 0.9704594470949611, - "grad_norm": 0.44699394702911377, - "learning_rate": 7.814759014548561e-06, - "loss": 0.3687, - "step": 14849 - }, - { - "epoch": 0.9705248023005032, - "grad_norm": 0.4572710394859314, - "learning_rate": 7.81447040573135e-06, - "loss": 0.3858, - "step": 14850 - }, - { - "epoch": 0.9705901575060454, - "grad_norm": 0.41853249073028564, - "learning_rate": 7.814181783186992e-06, - "loss": 0.3659, - "step": 14851 - }, - { - "epoch": 0.9706555127115875, - "grad_norm": 0.4620044231414795, - "learning_rate": 7.813893146916895e-06, - "loss": 0.4128, - "step": 14852 - }, - { - "epoch": 0.9707208679171296, - "grad_norm": 0.4533671736717224, - "learning_rate": 7.813604496922465e-06, - "loss": 0.3396, - "step": 14853 - }, - { - "epoch": 0.9707862231226717, - "grad_norm": 0.5005420446395874, - "learning_rate": 7.813315833205114e-06, - "loss": 0.4176, - "step": 14854 - }, - { - "epoch": 0.9708515783282139, - "grad_norm": 0.46725791692733765, - "learning_rate": 7.813027155766244e-06, - "loss": 0.4292, - "step": 14855 - }, - { - "epoch": 0.970916933533756, - "grad_norm": 0.4560980200767517, - "learning_rate": 7.812738464607266e-06, - "loss": 0.4013, - "step": 14856 - }, - { - "epoch": 0.970982288739298, - "grad_norm": 0.4414271414279938, - "learning_rate": 7.81244975972959e-06, - "loss": 0.3898, - "step": 14857 - }, - { - "epoch": 0.9710476439448402, - "grad_norm": 0.5079962015151978, - "learning_rate": 7.81216104113462e-06, - "loss": 0.4037, - "step": 14858 - }, - { - "epoch": 0.9711129991503823, - "grad_norm": 0.46962466835975647, - "learning_rate": 7.811872308823768e-06, - "loss": 0.4226, - "step": 14859 - }, - { - "epoch": 0.9711783543559245, - "grad_norm": 0.4703862965106964, - "learning_rate": 7.81158356279844e-06, - "loss": 0.4363, - "step": 14860 - }, - { - "epoch": 0.9712437095614666, - "grad_norm": 0.4480913281440735, - "learning_rate": 7.811294803060046e-06, - "loss": 0.3877, - "step": 14861 - }, - { - "epoch": 0.9713090647670087, - "grad_norm": 0.4308094084262848, - "learning_rate": 7.811006029609993e-06, - "loss": 0.3454, - "step": 14862 - }, - { - "epoch": 0.9713744199725508, - "grad_norm": 0.42413821816444397, - "learning_rate": 7.810717242449689e-06, - "loss": 0.3517, - "step": 14863 - }, - { - "epoch": 0.971439775178093, - "grad_norm": 0.43644389510154724, - "learning_rate": 7.810428441580544e-06, - "loss": 0.3773, - "step": 14864 - }, - { - "epoch": 0.9715051303836351, - "grad_norm": 0.4299981892108917, - "learning_rate": 7.810139627003966e-06, - "loss": 0.3818, - "step": 14865 - }, - { - "epoch": 0.9715704855891771, - "grad_norm": 0.43086904287338257, - "learning_rate": 7.809850798721362e-06, - "loss": 0.3508, - "step": 14866 - }, - { - "epoch": 0.9716358407947193, - "grad_norm": 0.44815102219581604, - "learning_rate": 7.809561956734145e-06, - "loss": 0.3749, - "step": 14867 - }, - { - "epoch": 0.9717011960002614, - "grad_norm": 0.4685414731502533, - "learning_rate": 7.809273101043717e-06, - "loss": 0.4099, - "step": 14868 - }, - { - "epoch": 0.9717665512058036, - "grad_norm": 0.4301880896091461, - "learning_rate": 7.808984231651492e-06, - "loss": 0.3639, - "step": 14869 - }, - { - "epoch": 0.9718319064113456, - "grad_norm": 0.43557488918304443, - "learning_rate": 7.80869534855888e-06, - "loss": 0.3494, - "step": 14870 - }, - { - "epoch": 0.9718972616168878, - "grad_norm": 0.4650508463382721, - "learning_rate": 7.808406451767287e-06, - "loss": 0.4192, - "step": 14871 - }, - { - "epoch": 0.9719626168224299, - "grad_norm": 0.44013047218322754, - "learning_rate": 7.808117541278121e-06, - "loss": 0.374, - "step": 14872 - }, - { - "epoch": 0.972027972027972, - "grad_norm": 0.47921615839004517, - "learning_rate": 7.807828617092796e-06, - "loss": 0.4231, - "step": 14873 - }, - { - "epoch": 0.9720933272335142, - "grad_norm": 0.4601583778858185, - "learning_rate": 7.807539679212716e-06, - "loss": 0.4013, - "step": 14874 - }, - { - "epoch": 0.9721586824390562, - "grad_norm": 0.4489063620567322, - "learning_rate": 7.807250727639293e-06, - "loss": 0.3465, - "step": 14875 - }, - { - "epoch": 0.9722240376445984, - "grad_norm": 0.45170992612838745, - "learning_rate": 7.806961762373935e-06, - "loss": 0.3802, - "step": 14876 - }, - { - "epoch": 0.9722893928501405, - "grad_norm": 0.44086071848869324, - "learning_rate": 7.806672783418053e-06, - "loss": 0.3723, - "step": 14877 - }, - { - "epoch": 0.9723547480556827, - "grad_norm": 0.4645160734653473, - "learning_rate": 7.806383790773055e-06, - "loss": 0.3882, - "step": 14878 - }, - { - "epoch": 0.9724201032612247, - "grad_norm": 0.44723886251449585, - "learning_rate": 7.806094784440351e-06, - "loss": 0.396, - "step": 14879 - }, - { - "epoch": 0.9724854584667669, - "grad_norm": 0.4568825960159302, - "learning_rate": 7.80580576442135e-06, - "loss": 0.4182, - "step": 14880 - }, - { - "epoch": 0.972550813672309, - "grad_norm": 0.44152265787124634, - "learning_rate": 7.805516730717464e-06, - "loss": 0.3922, - "step": 14881 - }, - { - "epoch": 0.9726161688778511, - "grad_norm": 0.4171949028968811, - "learning_rate": 7.8052276833301e-06, - "loss": 0.349, - "step": 14882 - }, - { - "epoch": 0.9726815240833933, - "grad_norm": 0.4647041857242584, - "learning_rate": 7.804938622260669e-06, - "loss": 0.4251, - "step": 14883 - }, - { - "epoch": 0.9727468792889353, - "grad_norm": 0.4064752459526062, - "learning_rate": 7.804649547510581e-06, - "loss": 0.3155, - "step": 14884 - }, - { - "epoch": 0.9728122344944775, - "grad_norm": 0.40594616532325745, - "learning_rate": 7.804360459081244e-06, - "loss": 0.3373, - "step": 14885 - }, - { - "epoch": 0.9728775897000196, - "grad_norm": 0.4173528254032135, - "learning_rate": 7.804071356974071e-06, - "loss": 0.3313, - "step": 14886 - }, - { - "epoch": 0.9729429449055618, - "grad_norm": 0.44771790504455566, - "learning_rate": 7.803782241190469e-06, - "loss": 0.3663, - "step": 14887 - }, - { - "epoch": 0.9730083001111038, - "grad_norm": 0.4582705497741699, - "learning_rate": 7.803493111731852e-06, - "loss": 0.3892, - "step": 14888 - }, - { - "epoch": 0.973073655316646, - "grad_norm": 0.4479402005672455, - "learning_rate": 7.803203968599626e-06, - "loss": 0.4031, - "step": 14889 - }, - { - "epoch": 0.9731390105221881, - "grad_norm": 0.4287157356739044, - "learning_rate": 7.802914811795204e-06, - "loss": 0.3344, - "step": 14890 - }, - { - "epoch": 0.9732043657277302, - "grad_norm": 0.45062702894210815, - "learning_rate": 7.802625641319994e-06, - "loss": 0.3422, - "step": 14891 - }, - { - "epoch": 0.9732697209332724, - "grad_norm": 0.4496932029724121, - "learning_rate": 7.80233645717541e-06, - "loss": 0.3813, - "step": 14892 - }, - { - "epoch": 0.9733350761388144, - "grad_norm": 0.4372115433216095, - "learning_rate": 7.80204725936286e-06, - "loss": 0.3871, - "step": 14893 - }, - { - "epoch": 0.9734004313443566, - "grad_norm": 0.5093250274658203, - "learning_rate": 7.801758047883752e-06, - "loss": 0.4199, - "step": 14894 - }, - { - "epoch": 0.9734657865498987, - "grad_norm": 0.41162726283073425, - "learning_rate": 7.801468822739502e-06, - "loss": 0.3288, - "step": 14895 - }, - { - "epoch": 0.9735311417554409, - "grad_norm": 0.4383789598941803, - "learning_rate": 7.801179583931517e-06, - "loss": 0.3474, - "step": 14896 - }, - { - "epoch": 0.9735964969609829, - "grad_norm": 0.441730797290802, - "learning_rate": 7.80089033146121e-06, - "loss": 0.3567, - "step": 14897 - }, - { - "epoch": 0.973661852166525, - "grad_norm": 0.44002655148506165, - "learning_rate": 7.80060106532999e-06, - "loss": 0.4038, - "step": 14898 - }, - { - "epoch": 0.9737272073720672, - "grad_norm": 0.42360547184944153, - "learning_rate": 7.800311785539267e-06, - "loss": 0.3493, - "step": 14899 - }, - { - "epoch": 0.9737925625776093, - "grad_norm": 0.46605971455574036, - "learning_rate": 7.800022492090455e-06, - "loss": 0.4651, - "step": 14900 - }, - { - "epoch": 0.9738579177831514, - "grad_norm": 0.44473716616630554, - "learning_rate": 7.799733184984961e-06, - "loss": 0.3775, - "step": 14901 - }, - { - "epoch": 0.9739232729886935, - "grad_norm": 0.49451306462287903, - "learning_rate": 7.7994438642242e-06, - "loss": 0.3583, - "step": 14902 - }, - { - "epoch": 0.9739886281942357, - "grad_norm": 0.4529349207878113, - "learning_rate": 7.799154529809583e-06, - "loss": 0.3843, - "step": 14903 - }, - { - "epoch": 0.9740539833997778, - "grad_norm": 0.42561668157577515, - "learning_rate": 7.798865181742518e-06, - "loss": 0.3504, - "step": 14904 - }, - { - "epoch": 0.97411933860532, - "grad_norm": 0.4208686053752899, - "learning_rate": 7.798575820024418e-06, - "loss": 0.3495, - "step": 14905 - }, - { - "epoch": 0.974184693810862, - "grad_norm": 0.44645193219184875, - "learning_rate": 7.798286444656694e-06, - "loss": 0.3732, - "step": 14906 - }, - { - "epoch": 0.9742500490164041, - "grad_norm": 0.4234239459037781, - "learning_rate": 7.797997055640758e-06, - "loss": 0.3313, - "step": 14907 - }, - { - "epoch": 0.9743154042219463, - "grad_norm": 0.42849862575531006, - "learning_rate": 7.79770765297802e-06, - "loss": 0.3701, - "step": 14908 - }, - { - "epoch": 0.9743807594274884, - "grad_norm": 0.4528283476829529, - "learning_rate": 7.797418236669894e-06, - "loss": 0.4159, - "step": 14909 - }, - { - "epoch": 0.9744461146330305, - "grad_norm": 0.4759887158870697, - "learning_rate": 7.79712880671779e-06, - "loss": 0.4212, - "step": 14910 - }, - { - "epoch": 0.9745114698385726, - "grad_norm": 0.460155725479126, - "learning_rate": 7.796839363123119e-06, - "loss": 0.3844, - "step": 14911 - }, - { - "epoch": 0.9745768250441148, - "grad_norm": 0.4385068118572235, - "learning_rate": 7.796549905887293e-06, - "loss": 0.3679, - "step": 14912 - }, - { - "epoch": 0.9746421802496569, - "grad_norm": 0.45536938309669495, - "learning_rate": 7.796260435011726e-06, - "loss": 0.3861, - "step": 14913 - }, - { - "epoch": 0.9747075354551991, - "grad_norm": 0.4241807162761688, - "learning_rate": 7.795970950497826e-06, - "loss": 0.3312, - "step": 14914 - }, - { - "epoch": 0.9747728906607411, - "grad_norm": 0.45421335101127625, - "learning_rate": 7.795681452347009e-06, - "loss": 0.4028, - "step": 14915 - }, - { - "epoch": 0.9748382458662832, - "grad_norm": 0.40803903341293335, - "learning_rate": 7.795391940560684e-06, - "loss": 0.3306, - "step": 14916 - }, - { - "epoch": 0.9749036010718254, - "grad_norm": 0.43891775608062744, - "learning_rate": 7.795102415140265e-06, - "loss": 0.3945, - "step": 14917 - }, - { - "epoch": 0.9749689562773675, - "grad_norm": 0.41384536027908325, - "learning_rate": 7.794812876087161e-06, - "loss": 0.3119, - "step": 14918 - }, - { - "epoch": 0.9750343114829096, - "grad_norm": 0.42975956201553345, - "learning_rate": 7.794523323402788e-06, - "loss": 0.3619, - "step": 14919 - }, - { - "epoch": 0.9750996666884517, - "grad_norm": 0.4349423050880432, - "learning_rate": 7.794233757088558e-06, - "loss": 0.3588, - "step": 14920 - }, - { - "epoch": 0.9751650218939939, - "grad_norm": 0.43726763129234314, - "learning_rate": 7.79394417714588e-06, - "loss": 0.365, - "step": 14921 - }, - { - "epoch": 0.975230377099536, - "grad_norm": 0.45229989290237427, - "learning_rate": 7.793654583576168e-06, - "loss": 0.3725, - "step": 14922 - }, - { - "epoch": 0.975295732305078, - "grad_norm": 0.4289722144603729, - "learning_rate": 7.793364976380837e-06, - "loss": 0.3946, - "step": 14923 - }, - { - "epoch": 0.9753610875106202, - "grad_norm": 0.4572765827178955, - "learning_rate": 7.793075355561295e-06, - "loss": 0.3567, - "step": 14924 - }, - { - "epoch": 0.9754264427161623, - "grad_norm": 0.42398735880851746, - "learning_rate": 7.792785721118959e-06, - "loss": 0.3585, - "step": 14925 - }, - { - "epoch": 0.9754917979217045, - "grad_norm": 0.43802231550216675, - "learning_rate": 7.792496073055238e-06, - "loss": 0.3532, - "step": 14926 - }, - { - "epoch": 0.9755571531272466, - "grad_norm": 0.4520270824432373, - "learning_rate": 7.792206411371547e-06, - "loss": 0.4067, - "step": 14927 - }, - { - "epoch": 0.9756225083327887, - "grad_norm": 0.4393850266933441, - "learning_rate": 7.791916736069298e-06, - "loss": 0.3951, - "step": 14928 - }, - { - "epoch": 0.9756878635383308, - "grad_norm": 0.4602534770965576, - "learning_rate": 7.791627047149903e-06, - "loss": 0.405, - "step": 14929 - }, - { - "epoch": 0.975753218743873, - "grad_norm": 0.4499501585960388, - "learning_rate": 7.791337344614776e-06, - "loss": 0.3758, - "step": 14930 - }, - { - "epoch": 0.9758185739494151, - "grad_norm": 0.48889756202697754, - "learning_rate": 7.79104762846533e-06, - "loss": 0.3803, - "step": 14931 - }, - { - "epoch": 0.9758839291549571, - "grad_norm": 0.4424671530723572, - "learning_rate": 7.79075789870298e-06, - "loss": 0.3588, - "step": 14932 - }, - { - "epoch": 0.9759492843604993, - "grad_norm": 0.4272640347480774, - "learning_rate": 7.790468155329132e-06, - "loss": 0.3631, - "step": 14933 - }, - { - "epoch": 0.9760146395660414, - "grad_norm": 0.4260696470737457, - "learning_rate": 7.790178398345208e-06, - "loss": 0.3453, - "step": 14934 - }, - { - "epoch": 0.9760799947715836, - "grad_norm": 0.4083070456981659, - "learning_rate": 7.789888627752616e-06, - "loss": 0.3532, - "step": 14935 - }, - { - "epoch": 0.9761453499771257, - "grad_norm": 0.4304865002632141, - "learning_rate": 7.78959884355277e-06, - "loss": 0.333, - "step": 14936 - }, - { - "epoch": 0.9762107051826678, - "grad_norm": 0.4329855740070343, - "learning_rate": 7.789309045747085e-06, - "loss": 0.3387, - "step": 14937 - }, - { - "epoch": 0.9762760603882099, - "grad_norm": 0.48346036672592163, - "learning_rate": 7.789019234336974e-06, - "loss": 0.3844, - "step": 14938 - }, - { - "epoch": 0.9763414155937521, - "grad_norm": 0.4386554956436157, - "learning_rate": 7.78872940932385e-06, - "loss": 0.3475, - "step": 14939 - }, - { - "epoch": 0.9764067707992942, - "grad_norm": 0.4556921422481537, - "learning_rate": 7.788439570709126e-06, - "loss": 0.3973, - "step": 14940 - }, - { - "epoch": 0.9764721260048362, - "grad_norm": 0.4295085072517395, - "learning_rate": 7.788149718494215e-06, - "loss": 0.3681, - "step": 14941 - }, - { - "epoch": 0.9765374812103784, - "grad_norm": 0.42635348439216614, - "learning_rate": 7.787859852680533e-06, - "loss": 0.3908, - "step": 14942 - }, - { - "epoch": 0.9766028364159205, - "grad_norm": 0.42449191212654114, - "learning_rate": 7.787569973269493e-06, - "loss": 0.3439, - "step": 14943 - }, - { - "epoch": 0.9766681916214627, - "grad_norm": 0.4358311593532562, - "learning_rate": 7.787280080262509e-06, - "loss": 0.3678, - "step": 14944 - }, - { - "epoch": 0.9767335468270048, - "grad_norm": 0.4210241436958313, - "learning_rate": 7.786990173660993e-06, - "loss": 0.3371, - "step": 14945 - }, - { - "epoch": 0.9767989020325469, - "grad_norm": 0.47323113679885864, - "learning_rate": 7.786700253466362e-06, - "loss": 0.4061, - "step": 14946 - }, - { - "epoch": 0.976864257238089, - "grad_norm": 0.41978371143341064, - "learning_rate": 7.786410319680027e-06, - "loss": 0.3564, - "step": 14947 - }, - { - "epoch": 0.9769296124436312, - "grad_norm": 0.4457114338874817, - "learning_rate": 7.786120372303404e-06, - "loss": 0.3786, - "step": 14948 - }, - { - "epoch": 0.9769949676491733, - "grad_norm": 0.45904749631881714, - "learning_rate": 7.785830411337906e-06, - "loss": 0.3654, - "step": 14949 - }, - { - "epoch": 0.9770603228547153, - "grad_norm": 0.45155349373817444, - "learning_rate": 7.785540436784951e-06, - "loss": 0.3628, - "step": 14950 - }, - { - "epoch": 0.9771256780602575, - "grad_norm": 0.457391619682312, - "learning_rate": 7.785250448645947e-06, - "loss": 0.4203, - "step": 14951 - }, - { - "epoch": 0.9771910332657996, - "grad_norm": 0.4627765119075775, - "learning_rate": 7.784960446922313e-06, - "loss": 0.4565, - "step": 14952 - }, - { - "epoch": 0.9772563884713418, - "grad_norm": 0.4499048888683319, - "learning_rate": 7.784670431615462e-06, - "loss": 0.381, - "step": 14953 - }, - { - "epoch": 0.9773217436768838, - "grad_norm": 0.43460148572921753, - "learning_rate": 7.784380402726807e-06, - "loss": 0.3833, - "step": 14954 - }, - { - "epoch": 0.977387098882426, - "grad_norm": 0.42551854252815247, - "learning_rate": 7.784090360257766e-06, - "loss": 0.3546, - "step": 14955 - }, - { - "epoch": 0.9774524540879681, - "grad_norm": 0.4280862808227539, - "learning_rate": 7.783800304209752e-06, - "loss": 0.3759, - "step": 14956 - }, - { - "epoch": 0.9775178092935102, - "grad_norm": 0.42694562673568726, - "learning_rate": 7.783510234584179e-06, - "loss": 0.3735, - "step": 14957 - }, - { - "epoch": 0.9775831644990524, - "grad_norm": 0.4526824653148651, - "learning_rate": 7.783220151382462e-06, - "loss": 0.4044, - "step": 14958 - }, - { - "epoch": 0.9776485197045944, - "grad_norm": 0.4598309397697449, - "learning_rate": 7.782930054606017e-06, - "loss": 0.4073, - "step": 14959 - }, - { - "epoch": 0.9777138749101366, - "grad_norm": 0.4279153645038605, - "learning_rate": 7.782639944256257e-06, - "loss": 0.336, - "step": 14960 - }, - { - "epoch": 0.9777792301156787, - "grad_norm": 0.4534274637699127, - "learning_rate": 7.782349820334598e-06, - "loss": 0.3593, - "step": 14961 - }, - { - "epoch": 0.9778445853212209, - "grad_norm": 0.4329860210418701, - "learning_rate": 7.782059682842455e-06, - "loss": 0.3679, - "step": 14962 - }, - { - "epoch": 0.9779099405267629, - "grad_norm": 0.4311714470386505, - "learning_rate": 7.781769531781244e-06, - "loss": 0.3706, - "step": 14963 - }, - { - "epoch": 0.9779752957323051, - "grad_norm": 0.44840192794799805, - "learning_rate": 7.78147936715238e-06, - "loss": 0.377, - "step": 14964 - }, - { - "epoch": 0.9780406509378472, - "grad_norm": 0.4737732410430908, - "learning_rate": 7.781189188957276e-06, - "loss": 0.4512, - "step": 14965 - }, - { - "epoch": 0.9781060061433893, - "grad_norm": 0.4669395983219147, - "learning_rate": 7.780898997197348e-06, - "loss": 0.3887, - "step": 14966 - }, - { - "epoch": 0.9781713613489315, - "grad_norm": 0.4276529550552368, - "learning_rate": 7.780608791874014e-06, - "loss": 0.3566, - "step": 14967 - }, - { - "epoch": 0.9782367165544735, - "grad_norm": 0.4112605154514313, - "learning_rate": 7.780318572988688e-06, - "loss": 0.3341, - "step": 14968 - }, - { - "epoch": 0.9783020717600157, - "grad_norm": 0.5153815150260925, - "learning_rate": 7.780028340542785e-06, - "loss": 0.419, - "step": 14969 - }, - { - "epoch": 0.9783674269655578, - "grad_norm": 0.4861021935939789, - "learning_rate": 7.779738094537718e-06, - "loss": 0.4132, - "step": 14970 - }, - { - "epoch": 0.9784327821711, - "grad_norm": 0.4335067868232727, - "learning_rate": 7.779447834974909e-06, - "loss": 0.3842, - "step": 14971 - }, - { - "epoch": 0.978498137376642, - "grad_norm": 0.4382305443286896, - "learning_rate": 7.779157561855767e-06, - "loss": 0.3874, - "step": 14972 - }, - { - "epoch": 0.9785634925821842, - "grad_norm": 0.4398297965526581, - "learning_rate": 7.778867275181712e-06, - "loss": 0.3877, - "step": 14973 - }, - { - "epoch": 0.9786288477877263, - "grad_norm": 0.43479427695274353, - "learning_rate": 7.77857697495416e-06, - "loss": 0.3797, - "step": 14974 - }, - { - "epoch": 0.9786942029932684, - "grad_norm": 0.41617661714553833, - "learning_rate": 7.778286661174523e-06, - "loss": 0.3377, - "step": 14975 - }, - { - "epoch": 0.9787595581988106, - "grad_norm": 0.468075692653656, - "learning_rate": 7.777996333844219e-06, - "loss": 0.4461, - "step": 14976 - }, - { - "epoch": 0.9788249134043526, - "grad_norm": 0.47167110443115234, - "learning_rate": 7.777705992964668e-06, - "loss": 0.388, - "step": 14977 - }, - { - "epoch": 0.9788902686098948, - "grad_norm": 0.44846072793006897, - "learning_rate": 7.77741563853728e-06, - "loss": 0.4309, - "step": 14978 - }, - { - "epoch": 0.9789556238154369, - "grad_norm": 0.41806384921073914, - "learning_rate": 7.777125270563474e-06, - "loss": 0.3486, - "step": 14979 - }, - { - "epoch": 0.9790209790209791, - "grad_norm": 0.418844997882843, - "learning_rate": 7.776834889044666e-06, - "loss": 0.3492, - "step": 14980 - }, - { - "epoch": 0.9790863342265211, - "grad_norm": 0.43911558389663696, - "learning_rate": 7.776544493982274e-06, - "loss": 0.3571, - "step": 14981 - }, - { - "epoch": 0.9791516894320632, - "grad_norm": 0.4887353777885437, - "learning_rate": 7.77625408537771e-06, - "loss": 0.4171, - "step": 14982 - }, - { - "epoch": 0.9792170446376054, - "grad_norm": 0.3884548246860504, - "learning_rate": 7.775963663232395e-06, - "loss": 0.2972, - "step": 14983 - }, - { - "epoch": 0.9792823998431475, - "grad_norm": 0.44042345881462097, - "learning_rate": 7.775673227547743e-06, - "loss": 0.3984, - "step": 14984 - }, - { - "epoch": 0.9793477550486896, - "grad_norm": 0.43387743830680847, - "learning_rate": 7.775382778325171e-06, - "loss": 0.3768, - "step": 14985 - }, - { - "epoch": 0.9794131102542317, - "grad_norm": 0.4563215672969818, - "learning_rate": 7.775092315566095e-06, - "loss": 0.3593, - "step": 14986 - }, - { - "epoch": 0.9794784654597739, - "grad_norm": 0.4310094118118286, - "learning_rate": 7.774801839271933e-06, - "loss": 0.3451, - "step": 14987 - }, - { - "epoch": 0.979543820665316, - "grad_norm": 0.4018457233905792, - "learning_rate": 7.774511349444103e-06, - "loss": 0.3125, - "step": 14988 - }, - { - "epoch": 0.9796091758708582, - "grad_norm": 0.4388629198074341, - "learning_rate": 7.774220846084017e-06, - "loss": 0.3657, - "step": 14989 - }, - { - "epoch": 0.9796745310764002, - "grad_norm": 0.3832870125770569, - "learning_rate": 7.773930329193096e-06, - "loss": 0.2841, - "step": 14990 - }, - { - "epoch": 0.9797398862819423, - "grad_norm": 0.4359722435474396, - "learning_rate": 7.773639798772755e-06, - "loss": 0.3823, - "step": 14991 - }, - { - "epoch": 0.9798052414874845, - "grad_norm": 0.4525127410888672, - "learning_rate": 7.773349254824412e-06, - "loss": 0.4331, - "step": 14992 - }, - { - "epoch": 0.9798705966930266, - "grad_norm": 0.44532421231269836, - "learning_rate": 7.773058697349485e-06, - "loss": 0.3725, - "step": 14993 - }, - { - "epoch": 0.9799359518985687, - "grad_norm": 0.4242517948150635, - "learning_rate": 7.772768126349391e-06, - "loss": 0.3564, - "step": 14994 - }, - { - "epoch": 0.9800013071041108, - "grad_norm": 0.41339805722236633, - "learning_rate": 7.772477541825542e-06, - "loss": 0.3018, - "step": 14995 - }, - { - "epoch": 0.980066662309653, - "grad_norm": 0.40947771072387695, - "learning_rate": 7.772186943779365e-06, - "loss": 0.3315, - "step": 14996 - }, - { - "epoch": 0.9801320175151951, - "grad_norm": 0.43682199716567993, - "learning_rate": 7.771896332212268e-06, - "loss": 0.3525, - "step": 14997 - }, - { - "epoch": 0.9801973727207373, - "grad_norm": 0.4362927973270416, - "learning_rate": 7.771605707125673e-06, - "loss": 0.3632, - "step": 14998 - }, - { - "epoch": 0.9802627279262793, - "grad_norm": 0.4838549792766571, - "learning_rate": 7.771315068520997e-06, - "loss": 0.4175, - "step": 14999 - }, - { - "epoch": 0.9803280831318214, - "grad_norm": 0.4268008768558502, - "learning_rate": 7.771024416399658e-06, - "loss": 0.3336, - "step": 15000 - }, - { - "epoch": 0.9803934383373636, - "grad_norm": 0.4409523904323578, - "learning_rate": 7.770733750763072e-06, - "loss": 0.3742, - "step": 15001 - }, - { - "epoch": 0.9804587935429057, - "grad_norm": 0.4567115008831024, - "learning_rate": 7.770443071612658e-06, - "loss": 0.4437, - "step": 15002 - }, - { - "epoch": 0.9805241487484478, - "grad_norm": 0.42176324129104614, - "learning_rate": 7.770152378949834e-06, - "loss": 0.3487, - "step": 15003 - }, - { - "epoch": 0.9805895039539899, - "grad_norm": 0.4577464163303375, - "learning_rate": 7.769861672776018e-06, - "loss": 0.4139, - "step": 15004 - }, - { - "epoch": 0.9806548591595321, - "grad_norm": 0.5067284107208252, - "learning_rate": 7.769570953092625e-06, - "loss": 0.4526, - "step": 15005 - }, - { - "epoch": 0.9807202143650742, - "grad_norm": 0.45248618721961975, - "learning_rate": 7.769280219901077e-06, - "loss": 0.3726, - "step": 15006 - }, - { - "epoch": 0.9807855695706162, - "grad_norm": 0.47500354051589966, - "learning_rate": 7.768989473202789e-06, - "loss": 0.4138, - "step": 15007 - }, - { - "epoch": 0.9808509247761584, - "grad_norm": 0.4679728150367737, - "learning_rate": 7.768698712999179e-06, - "loss": 0.4271, - "step": 15008 - }, - { - "epoch": 0.9809162799817005, - "grad_norm": 0.5358428955078125, - "learning_rate": 7.768407939291667e-06, - "loss": 0.3613, - "step": 15009 - }, - { - "epoch": 0.9809816351872427, - "grad_norm": 0.42767661809921265, - "learning_rate": 7.768117152081672e-06, - "loss": 0.3778, - "step": 15010 - }, - { - "epoch": 0.9810469903927848, - "grad_norm": 0.5026191473007202, - "learning_rate": 7.767826351370608e-06, - "loss": 0.4399, - "step": 15011 - }, - { - "epoch": 0.9811123455983269, - "grad_norm": 0.5366755127906799, - "learning_rate": 7.767535537159896e-06, - "loss": 0.3971, - "step": 15012 - }, - { - "epoch": 0.981177700803869, - "grad_norm": 0.443352073431015, - "learning_rate": 7.767244709450955e-06, - "loss": 0.3624, - "step": 15013 - }, - { - "epoch": 0.9812430560094112, - "grad_norm": 0.4398881196975708, - "learning_rate": 7.766953868245205e-06, - "loss": 0.3638, - "step": 15014 - }, - { - "epoch": 0.9813084112149533, - "grad_norm": 0.4046792984008789, - "learning_rate": 7.766663013544062e-06, - "loss": 0.2867, - "step": 15015 - }, - { - "epoch": 0.9813737664204953, - "grad_norm": 0.41330233216285706, - "learning_rate": 7.766372145348944e-06, - "loss": 0.3038, - "step": 15016 - }, - { - "epoch": 0.9814391216260375, - "grad_norm": 0.4522208273410797, - "learning_rate": 7.76608126366127e-06, - "loss": 0.4102, - "step": 15017 - }, - { - "epoch": 0.9815044768315796, - "grad_norm": 0.43401139974594116, - "learning_rate": 7.76579036848246e-06, - "loss": 0.3851, - "step": 15018 - }, - { - "epoch": 0.9815698320371218, - "grad_norm": 0.44505298137664795, - "learning_rate": 7.765499459813932e-06, - "loss": 0.3695, - "step": 15019 - }, - { - "epoch": 0.9816351872426639, - "grad_norm": 0.45697569847106934, - "learning_rate": 7.765208537657106e-06, - "loss": 0.3891, - "step": 15020 - }, - { - "epoch": 0.981700542448206, - "grad_norm": 0.49235251545906067, - "learning_rate": 7.764917602013398e-06, - "loss": 0.4273, - "step": 15021 - }, - { - "epoch": 0.9817658976537481, - "grad_norm": 0.46051719784736633, - "learning_rate": 7.76462665288423e-06, - "loss": 0.3993, - "step": 15022 - }, - { - "epoch": 0.9818312528592903, - "grad_norm": 0.4594170153141022, - "learning_rate": 7.764335690271022e-06, - "loss": 0.3817, - "step": 15023 - }, - { - "epoch": 0.9818966080648324, - "grad_norm": 0.4451127350330353, - "learning_rate": 7.764044714175188e-06, - "loss": 0.3676, - "step": 15024 - }, - { - "epoch": 0.9819619632703744, - "grad_norm": 0.4703153371810913, - "learning_rate": 7.763753724598153e-06, - "loss": 0.4086, - "step": 15025 - }, - { - "epoch": 0.9820273184759166, - "grad_norm": 0.42368975281715393, - "learning_rate": 7.763462721541332e-06, - "loss": 0.3568, - "step": 15026 - }, - { - "epoch": 0.9820926736814587, - "grad_norm": 0.4411110281944275, - "learning_rate": 7.763171705006147e-06, - "loss": 0.3804, - "step": 15027 - }, - { - "epoch": 0.9821580288870009, - "grad_norm": 0.4607815742492676, - "learning_rate": 7.762880674994015e-06, - "loss": 0.3616, - "step": 15028 - }, - { - "epoch": 0.982223384092543, - "grad_norm": 0.48068633675575256, - "learning_rate": 7.762589631506358e-06, - "loss": 0.4028, - "step": 15029 - }, - { - "epoch": 0.9822887392980851, - "grad_norm": 0.4616956114768982, - "learning_rate": 7.762298574544594e-06, - "loss": 0.3516, - "step": 15030 - }, - { - "epoch": 0.9823540945036272, - "grad_norm": 0.42269325256347656, - "learning_rate": 7.762007504110143e-06, - "loss": 0.3247, - "step": 15031 - }, - { - "epoch": 0.9824194497091694, - "grad_norm": 0.4312247037887573, - "learning_rate": 7.761716420204423e-06, - "loss": 0.3971, - "step": 15032 - }, - { - "epoch": 0.9824848049147115, - "grad_norm": 0.4473051428794861, - "learning_rate": 7.761425322828859e-06, - "loss": 0.3676, - "step": 15033 - }, - { - "epoch": 0.9825501601202535, - "grad_norm": 0.4145049750804901, - "learning_rate": 7.761134211984864e-06, - "loss": 0.3693, - "step": 15034 - }, - { - "epoch": 0.9826155153257957, - "grad_norm": 0.4665650427341461, - "learning_rate": 7.760843087673861e-06, - "loss": 0.397, - "step": 15035 - }, - { - "epoch": 0.9826808705313378, - "grad_norm": 0.41736987233161926, - "learning_rate": 7.76055194989727e-06, - "loss": 0.3444, - "step": 15036 - }, - { - "epoch": 0.98274622573688, - "grad_norm": 0.4125063121318817, - "learning_rate": 7.760260798656512e-06, - "loss": 0.3401, - "step": 15037 - }, - { - "epoch": 0.982811580942422, - "grad_norm": 0.43488919734954834, - "learning_rate": 7.759969633953006e-06, - "loss": 0.4122, - "step": 15038 - }, - { - "epoch": 0.9828769361479642, - "grad_norm": 0.43481025099754333, - "learning_rate": 7.759678455788169e-06, - "loss": 0.3605, - "step": 15039 - }, - { - "epoch": 0.9829422913535063, - "grad_norm": 0.44786256551742554, - "learning_rate": 7.759387264163427e-06, - "loss": 0.4014, - "step": 15040 - }, - { - "epoch": 0.9830076465590484, - "grad_norm": 0.44559410214424133, - "learning_rate": 7.759096059080196e-06, - "loss": 0.4099, - "step": 15041 - }, - { - "epoch": 0.9830730017645906, - "grad_norm": 0.4063272476196289, - "learning_rate": 7.7588048405399e-06, - "loss": 0.3129, - "step": 15042 - }, - { - "epoch": 0.9831383569701326, - "grad_norm": 0.44426804780960083, - "learning_rate": 7.758513608543954e-06, - "loss": 0.3615, - "step": 15043 - }, - { - "epoch": 0.9832037121756748, - "grad_norm": 0.4609695076942444, - "learning_rate": 7.758222363093783e-06, - "loss": 0.4033, - "step": 15044 - }, - { - "epoch": 0.9832690673812169, - "grad_norm": 0.4464883804321289, - "learning_rate": 7.757931104190806e-06, - "loss": 0.38, - "step": 15045 - }, - { - "epoch": 0.9833344225867591, - "grad_norm": 0.4010449945926666, - "learning_rate": 7.757639831836443e-06, - "loss": 0.3287, - "step": 15046 - }, - { - "epoch": 0.9833997777923011, - "grad_norm": 0.4331609904766083, - "learning_rate": 7.757348546032114e-06, - "loss": 0.4166, - "step": 15047 - }, - { - "epoch": 0.9834651329978433, - "grad_norm": 0.4953961670398712, - "learning_rate": 7.757057246779242e-06, - "loss": 0.4914, - "step": 15048 - }, - { - "epoch": 0.9835304882033854, - "grad_norm": 0.44227883219718933, - "learning_rate": 7.75676593407925e-06, - "loss": 0.4118, - "step": 15049 - }, - { - "epoch": 0.9835958434089275, - "grad_norm": 0.4692818820476532, - "learning_rate": 7.756474607933552e-06, - "loss": 0.4065, - "step": 15050 - }, - { - "epoch": 0.9836611986144697, - "grad_norm": 0.4538833200931549, - "learning_rate": 7.756183268343574e-06, - "loss": 0.3836, - "step": 15051 - }, - { - "epoch": 0.9837265538200117, - "grad_norm": 0.45548778772354126, - "learning_rate": 7.755891915310733e-06, - "loss": 0.3752, - "step": 15052 - }, - { - "epoch": 0.9837919090255539, - "grad_norm": 0.44198480248451233, - "learning_rate": 7.755600548836454e-06, - "loss": 0.3788, - "step": 15053 - }, - { - "epoch": 0.983857264231096, - "grad_norm": 0.45371824502944946, - "learning_rate": 7.755309168922156e-06, - "loss": 0.4105, - "step": 15054 - }, - { - "epoch": 0.9839226194366382, - "grad_norm": 0.47075727581977844, - "learning_rate": 7.75501777556926e-06, - "loss": 0.4289, - "step": 15055 - }, - { - "epoch": 0.9839879746421802, - "grad_norm": 0.4131263792514801, - "learning_rate": 7.75472636877919e-06, - "loss": 0.3411, - "step": 15056 - }, - { - "epoch": 0.9840533298477224, - "grad_norm": 0.4250119626522064, - "learning_rate": 7.754434948553364e-06, - "loss": 0.3678, - "step": 15057 - }, - { - "epoch": 0.9841186850532645, - "grad_norm": 0.6285251975059509, - "learning_rate": 7.754143514893204e-06, - "loss": 0.4136, - "step": 15058 - }, - { - "epoch": 0.9841840402588066, - "grad_norm": 0.424927294254303, - "learning_rate": 7.753852067800131e-06, - "loss": 0.3615, - "step": 15059 - }, - { - "epoch": 0.9842493954643488, - "grad_norm": 0.4634118378162384, - "learning_rate": 7.75356060727557e-06, - "loss": 0.3876, - "step": 15060 - }, - { - "epoch": 0.9843147506698908, - "grad_norm": 0.4112243950366974, - "learning_rate": 7.75326913332094e-06, - "loss": 0.3078, - "step": 15061 - }, - { - "epoch": 0.984380105875433, - "grad_norm": 0.4566067159175873, - "learning_rate": 7.75297764593766e-06, - "loss": 0.3923, - "step": 15062 - }, - { - "epoch": 0.9844454610809751, - "grad_norm": 0.45016443729400635, - "learning_rate": 7.752686145127157e-06, - "loss": 0.3537, - "step": 15063 - }, - { - "epoch": 0.9845108162865173, - "grad_norm": 0.4299840033054352, - "learning_rate": 7.752394630890846e-06, - "loss": 0.3854, - "step": 15064 - }, - { - "epoch": 0.9845761714920593, - "grad_norm": 0.4266165792942047, - "learning_rate": 7.752103103230158e-06, - "loss": 0.3752, - "step": 15065 - }, - { - "epoch": 0.9846415266976014, - "grad_norm": 0.4175054728984833, - "learning_rate": 7.751811562146506e-06, - "loss": 0.3811, - "step": 15066 - }, - { - "epoch": 0.9847068819031436, - "grad_norm": 0.4678158164024353, - "learning_rate": 7.751520007641318e-06, - "loss": 0.3762, - "step": 15067 - }, - { - "epoch": 0.9847722371086857, - "grad_norm": 0.4228020906448364, - "learning_rate": 7.751228439716012e-06, - "loss": 0.3345, - "step": 15068 - }, - { - "epoch": 0.9848375923142278, - "grad_norm": 0.4379521906375885, - "learning_rate": 7.750936858372014e-06, - "loss": 0.3681, - "step": 15069 - }, - { - "epoch": 0.9849029475197699, - "grad_norm": 0.45895805954933167, - "learning_rate": 7.750645263610742e-06, - "loss": 0.3839, - "step": 15070 - }, - { - "epoch": 0.9849683027253121, - "grad_norm": 0.4747651219367981, - "learning_rate": 7.750353655433621e-06, - "loss": 0.3837, - "step": 15071 - }, - { - "epoch": 0.9850336579308542, - "grad_norm": 0.4802089035511017, - "learning_rate": 7.750062033842071e-06, - "loss": 0.3606, - "step": 15072 - }, - { - "epoch": 0.9850990131363964, - "grad_norm": 0.4471137225627899, - "learning_rate": 7.749770398837518e-06, - "loss": 0.3585, - "step": 15073 - }, - { - "epoch": 0.9851643683419384, - "grad_norm": 0.4187166094779968, - "learning_rate": 7.749478750421381e-06, - "loss": 0.3396, - "step": 15074 - }, - { - "epoch": 0.9852297235474805, - "grad_norm": 0.4761616587638855, - "learning_rate": 7.749187088595084e-06, - "loss": 0.4656, - "step": 15075 - }, - { - "epoch": 0.9852950787530227, - "grad_norm": 0.45303037762641907, - "learning_rate": 7.748895413360048e-06, - "loss": 0.4055, - "step": 15076 - }, - { - "epoch": 0.9853604339585648, - "grad_norm": 0.47813448309898376, - "learning_rate": 7.748603724717699e-06, - "loss": 0.4264, - "step": 15077 - }, - { - "epoch": 0.985425789164107, - "grad_norm": 0.4306236803531647, - "learning_rate": 7.748312022669454e-06, - "loss": 0.3509, - "step": 15078 - }, - { - "epoch": 0.985491144369649, - "grad_norm": 0.4116508960723877, - "learning_rate": 7.748020307216742e-06, - "loss": 0.3041, - "step": 15079 - }, - { - "epoch": 0.9855564995751912, - "grad_norm": 0.44778963923454285, - "learning_rate": 7.747728578360981e-06, - "loss": 0.38, - "step": 15080 - }, - { - "epoch": 0.9856218547807333, - "grad_norm": 0.44965118169784546, - "learning_rate": 7.747436836103598e-06, - "loss": 0.3644, - "step": 15081 - }, - { - "epoch": 0.9856872099862755, - "grad_norm": 0.43906593322753906, - "learning_rate": 7.747145080446013e-06, - "loss": 0.3662, - "step": 15082 - }, - { - "epoch": 0.9857525651918175, - "grad_norm": 0.4133242070674896, - "learning_rate": 7.746853311389649e-06, - "loss": 0.3374, - "step": 15083 - }, - { - "epoch": 0.9858179203973596, - "grad_norm": 0.4331609904766083, - "learning_rate": 7.746561528935929e-06, - "loss": 0.3654, - "step": 15084 - }, - { - "epoch": 0.9858832756029018, - "grad_norm": 0.4636089503765106, - "learning_rate": 7.746269733086278e-06, - "loss": 0.4293, - "step": 15085 - }, - { - "epoch": 0.9859486308084439, - "grad_norm": 0.4301362931728363, - "learning_rate": 7.745977923842119e-06, - "loss": 0.3659, - "step": 15086 - }, - { - "epoch": 0.986013986013986, - "grad_norm": 0.43521547317504883, - "learning_rate": 7.745686101204872e-06, - "loss": 0.3904, - "step": 15087 - }, - { - "epoch": 0.9860793412195281, - "grad_norm": 0.4536517560482025, - "learning_rate": 7.745394265175965e-06, - "loss": 0.3839, - "step": 15088 - }, - { - "epoch": 0.9861446964250703, - "grad_norm": 0.43011221289634705, - "learning_rate": 7.745102415756819e-06, - "loss": 0.3868, - "step": 15089 - }, - { - "epoch": 0.9862100516306124, - "grad_norm": 0.41555312275886536, - "learning_rate": 7.744810552948856e-06, - "loss": 0.3491, - "step": 15090 - }, - { - "epoch": 0.9862754068361544, - "grad_norm": 0.5033795833587646, - "learning_rate": 7.744518676753503e-06, - "loss": 0.4098, - "step": 15091 - }, - { - "epoch": 0.9863407620416966, - "grad_norm": 0.43643197417259216, - "learning_rate": 7.744226787172179e-06, - "loss": 0.3489, - "step": 15092 - }, - { - "epoch": 0.9864061172472387, - "grad_norm": 0.46359705924987793, - "learning_rate": 7.743934884206313e-06, - "loss": 0.4206, - "step": 15093 - }, - { - "epoch": 0.9864714724527809, - "grad_norm": 0.4805338680744171, - "learning_rate": 7.743642967857325e-06, - "loss": 0.4336, - "step": 15094 - }, - { - "epoch": 0.986536827658323, - "grad_norm": 0.47086626291275024, - "learning_rate": 7.743351038126639e-06, - "loss": 0.4097, - "step": 15095 - }, - { - "epoch": 0.9866021828638651, - "grad_norm": 0.4841609597206116, - "learning_rate": 7.74305909501568e-06, - "loss": 0.4055, - "step": 15096 - }, - { - "epoch": 0.9866675380694072, - "grad_norm": 0.4449782073497772, - "learning_rate": 7.742767138525872e-06, - "loss": 0.3974, - "step": 15097 - }, - { - "epoch": 0.9867328932749494, - "grad_norm": 0.4355780780315399, - "learning_rate": 7.742475168658638e-06, - "loss": 0.3612, - "step": 15098 - }, - { - "epoch": 0.9867982484804915, - "grad_norm": 0.4568578898906708, - "learning_rate": 7.742183185415402e-06, - "loss": 0.3879, - "step": 15099 - }, - { - "epoch": 0.9868636036860335, - "grad_norm": 0.4183160960674286, - "learning_rate": 7.74189118879759e-06, - "loss": 0.3242, - "step": 15100 - }, - { - "epoch": 0.9869289588915757, - "grad_norm": 0.4333636164665222, - "learning_rate": 7.741599178806625e-06, - "loss": 0.3584, - "step": 15101 - }, - { - "epoch": 0.9869943140971178, - "grad_norm": 0.46936675906181335, - "learning_rate": 7.74130715544393e-06, - "loss": 0.4096, - "step": 15102 - }, - { - "epoch": 0.98705966930266, - "grad_norm": 0.4286305606365204, - "learning_rate": 7.74101511871093e-06, - "loss": 0.3366, - "step": 15103 - }, - { - "epoch": 0.987125024508202, - "grad_norm": 0.4025880992412567, - "learning_rate": 7.740723068609049e-06, - "loss": 0.3235, - "step": 15104 - }, - { - "epoch": 0.9871903797137442, - "grad_norm": 0.4574611186981201, - "learning_rate": 7.740431005139712e-06, - "loss": 0.3791, - "step": 15105 - }, - { - "epoch": 0.9872557349192863, - "grad_norm": 0.44784244894981384, - "learning_rate": 7.740138928304345e-06, - "loss": 0.3748, - "step": 15106 - }, - { - "epoch": 0.9873210901248285, - "grad_norm": 0.5053922533988953, - "learning_rate": 7.739846838104372e-06, - "loss": 0.4492, - "step": 15107 - }, - { - "epoch": 0.9873864453303706, - "grad_norm": 0.4396847188472748, - "learning_rate": 7.739554734541216e-06, - "loss": 0.3866, - "step": 15108 - }, - { - "epoch": 0.9874518005359126, - "grad_norm": 0.4559018909931183, - "learning_rate": 7.739262617616303e-06, - "loss": 0.4325, - "step": 15109 - }, - { - "epoch": 0.9875171557414548, - "grad_norm": 0.41015422344207764, - "learning_rate": 7.738970487331056e-06, - "loss": 0.3572, - "step": 15110 - }, - { - "epoch": 0.9875825109469969, - "grad_norm": 0.43143847584724426, - "learning_rate": 7.7386783436869e-06, - "loss": 0.364, - "step": 15111 - }, - { - "epoch": 0.9876478661525391, - "grad_norm": 0.41663238406181335, - "learning_rate": 7.738386186685262e-06, - "loss": 0.3503, - "step": 15112 - }, - { - "epoch": 0.9877132213580812, - "grad_norm": 0.4315636157989502, - "learning_rate": 7.738094016327568e-06, - "loss": 0.3623, - "step": 15113 - }, - { - "epoch": 0.9877785765636233, - "grad_norm": 0.43158262968063354, - "learning_rate": 7.737801832615239e-06, - "loss": 0.3761, - "step": 15114 - }, - { - "epoch": 0.9878439317691654, - "grad_norm": 0.43610435724258423, - "learning_rate": 7.737509635549703e-06, - "loss": 0.369, - "step": 15115 - }, - { - "epoch": 0.9879092869747076, - "grad_norm": 0.42928194999694824, - "learning_rate": 7.737217425132385e-06, - "loss": 0.3565, - "step": 15116 - }, - { - "epoch": 0.9879746421802497, - "grad_norm": 0.42556947469711304, - "learning_rate": 7.736925201364706e-06, - "loss": 0.3466, - "step": 15117 - }, - { - "epoch": 0.9880399973857917, - "grad_norm": 0.42862996459007263, - "learning_rate": 7.736632964248096e-06, - "loss": 0.3509, - "step": 15118 - }, - { - "epoch": 0.9881053525913339, - "grad_norm": 0.4334465265274048, - "learning_rate": 7.73634071378398e-06, - "loss": 0.3704, - "step": 15119 - }, - { - "epoch": 0.988170707796876, - "grad_norm": 0.45901745557785034, - "learning_rate": 7.736048449973781e-06, - "loss": 0.389, - "step": 15120 - }, - { - "epoch": 0.9882360630024182, - "grad_norm": 0.44808298349380493, - "learning_rate": 7.735756172818927e-06, - "loss": 0.4065, - "step": 15121 - }, - { - "epoch": 0.9883014182079602, - "grad_norm": 0.48540815711021423, - "learning_rate": 7.735463882320842e-06, - "loss": 0.4113, - "step": 15122 - }, - { - "epoch": 0.9883667734135024, - "grad_norm": 0.45461204648017883, - "learning_rate": 7.735171578480952e-06, - "loss": 0.4028, - "step": 15123 - }, - { - "epoch": 0.9884321286190445, - "grad_norm": 0.42914292216300964, - "learning_rate": 7.734879261300683e-06, - "loss": 0.3713, - "step": 15124 - }, - { - "epoch": 0.9884974838245866, - "grad_norm": 0.48916131258010864, - "learning_rate": 7.73458693078146e-06, - "loss": 0.4628, - "step": 15125 - }, - { - "epoch": 0.9885628390301288, - "grad_norm": 0.4181376099586487, - "learning_rate": 7.73429458692471e-06, - "loss": 0.3482, - "step": 15126 - }, - { - "epoch": 0.9886281942356708, - "grad_norm": 0.4241492450237274, - "learning_rate": 7.734002229731855e-06, - "loss": 0.3357, - "step": 15127 - }, - { - "epoch": 0.988693549441213, - "grad_norm": 0.4653661847114563, - "learning_rate": 7.733709859204328e-06, - "loss": 0.4316, - "step": 15128 - }, - { - "epoch": 0.9887589046467551, - "grad_norm": 0.4530976414680481, - "learning_rate": 7.73341747534355e-06, - "loss": 0.4004, - "step": 15129 - }, - { - "epoch": 0.9888242598522973, - "grad_norm": 0.44103583693504333, - "learning_rate": 7.733125078150947e-06, - "loss": 0.3703, - "step": 15130 - }, - { - "epoch": 0.9888896150578393, - "grad_norm": 0.3934401273727417, - "learning_rate": 7.732832667627946e-06, - "loss": 0.2976, - "step": 15131 - }, - { - "epoch": 0.9889549702633815, - "grad_norm": 0.4212241470813751, - "learning_rate": 7.732540243775972e-06, - "loss": 0.3617, - "step": 15132 - }, - { - "epoch": 0.9890203254689236, - "grad_norm": 0.4378197193145752, - "learning_rate": 7.732247806596455e-06, - "loss": 0.3672, - "step": 15133 - }, - { - "epoch": 0.9890856806744657, - "grad_norm": 0.45740073919296265, - "learning_rate": 7.731955356090818e-06, - "loss": 0.36, - "step": 15134 - }, - { - "epoch": 0.9891510358800079, - "grad_norm": 0.4653393030166626, - "learning_rate": 7.73166289226049e-06, - "loss": 0.3825, - "step": 15135 - }, - { - "epoch": 0.9892163910855499, - "grad_norm": 0.42049020528793335, - "learning_rate": 7.731370415106893e-06, - "loss": 0.3627, - "step": 15136 - }, - { - "epoch": 0.9892817462910921, - "grad_norm": 0.4357064664363861, - "learning_rate": 7.731077924631458e-06, - "loss": 0.3699, - "step": 15137 - }, - { - "epoch": 0.9893471014966342, - "grad_norm": 0.4246349334716797, - "learning_rate": 7.73078542083561e-06, - "loss": 0.3611, - "step": 15138 - }, - { - "epoch": 0.9894124567021764, - "grad_norm": 0.44023597240448, - "learning_rate": 7.730492903720774e-06, - "loss": 0.3727, - "step": 15139 - }, - { - "epoch": 0.9894778119077184, - "grad_norm": 0.43610572814941406, - "learning_rate": 7.73020037328838e-06, - "loss": 0.3794, - "step": 15140 - }, - { - "epoch": 0.9895431671132606, - "grad_norm": 0.46135908365249634, - "learning_rate": 7.729907829539851e-06, - "loss": 0.4275, - "step": 15141 - }, - { - "epoch": 0.9896085223188027, - "grad_norm": 0.4838557243347168, - "learning_rate": 7.729615272476617e-06, - "loss": 0.4404, - "step": 15142 - }, - { - "epoch": 0.9896738775243448, - "grad_norm": 0.4211517870426178, - "learning_rate": 7.729322702100103e-06, - "loss": 0.3532, - "step": 15143 - }, - { - "epoch": 0.989739232729887, - "grad_norm": 0.4415057897567749, - "learning_rate": 7.729030118411737e-06, - "loss": 0.3632, - "step": 15144 - }, - { - "epoch": 0.989804587935429, - "grad_norm": 0.43831971287727356, - "learning_rate": 7.728737521412946e-06, - "loss": 0.3539, - "step": 15145 - }, - { - "epoch": 0.9898699431409712, - "grad_norm": 0.4630366265773773, - "learning_rate": 7.728444911105158e-06, - "loss": 0.433, - "step": 15146 - }, - { - "epoch": 0.9899352983465133, - "grad_norm": 0.44352343678474426, - "learning_rate": 7.728152287489796e-06, - "loss": 0.3744, - "step": 15147 - }, - { - "epoch": 0.9900006535520555, - "grad_norm": 0.4524170160293579, - "learning_rate": 7.727859650568292e-06, - "loss": 0.3961, - "step": 15148 - }, - { - "epoch": 0.9900660087575975, - "grad_norm": 0.43163421750068665, - "learning_rate": 7.727567000342071e-06, - "loss": 0.3605, - "step": 15149 - }, - { - "epoch": 0.9901313639631396, - "grad_norm": 0.45648854970932007, - "learning_rate": 7.72727433681256e-06, - "loss": 0.4121, - "step": 15150 - }, - { - "epoch": 0.9901967191686818, - "grad_norm": 0.46180275082588196, - "learning_rate": 7.726981659981188e-06, - "loss": 0.4064, - "step": 15151 - }, - { - "epoch": 0.9902620743742239, - "grad_norm": 0.40367501974105835, - "learning_rate": 7.726688969849383e-06, - "loss": 0.3111, - "step": 15152 - }, - { - "epoch": 0.990327429579766, - "grad_norm": 0.46861517429351807, - "learning_rate": 7.72639626641857e-06, - "loss": 0.3952, - "step": 15153 - }, - { - "epoch": 0.9903927847853081, - "grad_norm": 0.4397759437561035, - "learning_rate": 7.726103549690178e-06, - "loss": 0.4075, - "step": 15154 - }, - { - "epoch": 0.9904581399908503, - "grad_norm": 0.4731493890285492, - "learning_rate": 7.725810819665635e-06, - "loss": 0.4191, - "step": 15155 - }, - { - "epoch": 0.9905234951963924, - "grad_norm": 0.43350693583488464, - "learning_rate": 7.725518076346368e-06, - "loss": 0.3566, - "step": 15156 - }, - { - "epoch": 0.9905888504019346, - "grad_norm": 0.45391082763671875, - "learning_rate": 7.725225319733806e-06, - "loss": 0.3863, - "step": 15157 - }, - { - "epoch": 0.9906542056074766, - "grad_norm": 0.4475744664669037, - "learning_rate": 7.724932549829373e-06, - "loss": 0.3566, - "step": 15158 - }, - { - "epoch": 0.9907195608130187, - "grad_norm": 0.4379841983318329, - "learning_rate": 7.724639766634503e-06, - "loss": 0.3772, - "step": 15159 - }, - { - "epoch": 0.9907849160185609, - "grad_norm": 0.4198358952999115, - "learning_rate": 7.72434697015062e-06, - "loss": 0.3373, - "step": 15160 - }, - { - "epoch": 0.990850271224103, - "grad_norm": 0.4721154272556305, - "learning_rate": 7.724054160379153e-06, - "loss": 0.3497, - "step": 15161 - }, - { - "epoch": 0.9909156264296451, - "grad_norm": 0.5004555583000183, - "learning_rate": 7.72376133732153e-06, - "loss": 0.3997, - "step": 15162 - }, - { - "epoch": 0.9909809816351872, - "grad_norm": 0.42507079243659973, - "learning_rate": 7.72346850097918e-06, - "loss": 0.3492, - "step": 15163 - }, - { - "epoch": 0.9910463368407294, - "grad_norm": 0.4355694353580475, - "learning_rate": 7.72317565135353e-06, - "loss": 0.3697, - "step": 15164 - }, - { - "epoch": 0.9911116920462715, - "grad_norm": 0.4397289752960205, - "learning_rate": 7.722882788446009e-06, - "loss": 0.3895, - "step": 15165 - }, - { - "epoch": 0.9911770472518137, - "grad_norm": 0.4112222492694855, - "learning_rate": 7.722589912258045e-06, - "loss": 0.324, - "step": 15166 - }, - { - "epoch": 0.9912424024573557, - "grad_norm": 0.4551739990711212, - "learning_rate": 7.722297022791067e-06, - "loss": 0.4271, - "step": 15167 - }, - { - "epoch": 0.9913077576628978, - "grad_norm": 0.42867451906204224, - "learning_rate": 7.722004120046504e-06, - "loss": 0.3599, - "step": 15168 - }, - { - "epoch": 0.99137311286844, - "grad_norm": 0.4233817160129547, - "learning_rate": 7.721711204025784e-06, - "loss": 0.3715, - "step": 15169 - }, - { - "epoch": 0.9914384680739821, - "grad_norm": 0.4558604061603546, - "learning_rate": 7.721418274730335e-06, - "loss": 0.3593, - "step": 15170 - }, - { - "epoch": 0.9915038232795242, - "grad_norm": 0.455513596534729, - "learning_rate": 7.721125332161585e-06, - "loss": 0.4041, - "step": 15171 - }, - { - "epoch": 0.9915691784850663, - "grad_norm": 0.4515846371650696, - "learning_rate": 7.720832376320967e-06, - "loss": 0.4205, - "step": 15172 - }, - { - "epoch": 0.9916345336906085, - "grad_norm": 0.43746036291122437, - "learning_rate": 7.720539407209905e-06, - "loss": 0.3791, - "step": 15173 - }, - { - "epoch": 0.9916998888961506, - "grad_norm": 0.4344053268432617, - "learning_rate": 7.720246424829829e-06, - "loss": 0.3499, - "step": 15174 - }, - { - "epoch": 0.9917652441016926, - "grad_norm": 0.4476267397403717, - "learning_rate": 7.71995342918217e-06, - "loss": 0.4105, - "step": 15175 - }, - { - "epoch": 0.9918305993072348, - "grad_norm": 0.41979485750198364, - "learning_rate": 7.719660420268359e-06, - "loss": 0.3365, - "step": 15176 - }, - { - "epoch": 0.9918959545127769, - "grad_norm": 0.46881213784217834, - "learning_rate": 7.719367398089816e-06, - "loss": 0.4182, - "step": 15177 - }, - { - "epoch": 0.9919613097183191, - "grad_norm": 0.4455852210521698, - "learning_rate": 7.71907436264798e-06, - "loss": 0.3764, - "step": 15178 - }, - { - "epoch": 0.9920266649238612, - "grad_norm": 0.43420225381851196, - "learning_rate": 7.718781313944274e-06, - "loss": 0.389, - "step": 15179 - }, - { - "epoch": 0.9920920201294033, - "grad_norm": 0.4379095435142517, - "learning_rate": 7.718488251980131e-06, - "loss": 0.3902, - "step": 15180 - }, - { - "epoch": 0.9921573753349454, - "grad_norm": 0.40903136134147644, - "learning_rate": 7.71819517675698e-06, - "loss": 0.2969, - "step": 15181 - }, - { - "epoch": 0.9922227305404876, - "grad_norm": 0.4408978819847107, - "learning_rate": 7.717902088276247e-06, - "loss": 0.395, - "step": 15182 - }, - { - "epoch": 0.9922880857460297, - "grad_norm": 0.4464319944381714, - "learning_rate": 7.717608986539366e-06, - "loss": 0.4024, - "step": 15183 - }, - { - "epoch": 0.9923534409515717, - "grad_norm": 0.48024967312812805, - "learning_rate": 7.717315871547764e-06, - "loss": 0.4134, - "step": 15184 - }, - { - "epoch": 0.9924187961571139, - "grad_norm": 0.43580445647239685, - "learning_rate": 7.717022743302871e-06, - "loss": 0.372, - "step": 15185 - }, - { - "epoch": 0.992484151362656, - "grad_norm": 0.46773090958595276, - "learning_rate": 7.716729601806117e-06, - "loss": 0.3923, - "step": 15186 - }, - { - "epoch": 0.9925495065681982, - "grad_norm": 0.4395577609539032, - "learning_rate": 7.71643644705893e-06, - "loss": 0.388, - "step": 15187 - }, - { - "epoch": 0.9926148617737403, - "grad_norm": 0.4634782671928406, - "learning_rate": 7.716143279062743e-06, - "loss": 0.4041, - "step": 15188 - }, - { - "epoch": 0.9926802169792824, - "grad_norm": 0.45354804396629333, - "learning_rate": 7.715850097818985e-06, - "loss": 0.3718, - "step": 15189 - }, - { - "epoch": 0.9927455721848245, - "grad_norm": 0.41226768493652344, - "learning_rate": 7.715556903329084e-06, - "loss": 0.3385, - "step": 15190 - }, - { - "epoch": 0.9928109273903667, - "grad_norm": 0.44063106179237366, - "learning_rate": 7.71526369559447e-06, - "loss": 0.3482, - "step": 15191 - }, - { - "epoch": 0.9928762825959088, - "grad_norm": 0.48568663001060486, - "learning_rate": 7.714970474616577e-06, - "loss": 0.4381, - "step": 15192 - }, - { - "epoch": 0.9929416378014508, - "grad_norm": 0.4456183612346649, - "learning_rate": 7.71467724039683e-06, - "loss": 0.372, - "step": 15193 - }, - { - "epoch": 0.993006993006993, - "grad_norm": 0.470907598733902, - "learning_rate": 7.714383992936663e-06, - "loss": 0.3964, - "step": 15194 - }, - { - "epoch": 0.9930723482125351, - "grad_norm": 0.4126276671886444, - "learning_rate": 7.714090732237505e-06, - "loss": 0.3026, - "step": 15195 - }, - { - "epoch": 0.9931377034180773, - "grad_norm": 0.42819276452064514, - "learning_rate": 7.713797458300785e-06, - "loss": 0.3508, - "step": 15196 - }, - { - "epoch": 0.9932030586236194, - "grad_norm": 0.43126991391181946, - "learning_rate": 7.713504171127937e-06, - "loss": 0.3684, - "step": 15197 - }, - { - "epoch": 0.9932684138291615, - "grad_norm": 0.43060463666915894, - "learning_rate": 7.713210870720388e-06, - "loss": 0.3688, - "step": 15198 - }, - { - "epoch": 0.9933337690347036, - "grad_norm": 0.43413522839546204, - "learning_rate": 7.712917557079568e-06, - "loss": 0.3755, - "step": 15199 - }, - { - "epoch": 0.9933991242402458, - "grad_norm": 0.4354079067707062, - "learning_rate": 7.712624230206911e-06, - "loss": 0.3722, - "step": 15200 - }, - { - "epoch": 0.9934644794457879, - "grad_norm": 0.47235366702079773, - "learning_rate": 7.712330890103845e-06, - "loss": 0.482, - "step": 15201 - }, - { - "epoch": 0.9935298346513299, - "grad_norm": 0.38225215673446655, - "learning_rate": 7.712037536771802e-06, - "loss": 0.3072, - "step": 15202 - }, - { - "epoch": 0.9935951898568721, - "grad_norm": 0.4030027687549591, - "learning_rate": 7.711744170212214e-06, - "loss": 0.3259, - "step": 15203 - }, - { - "epoch": 0.9936605450624142, - "grad_norm": 0.42075735330581665, - "learning_rate": 7.711450790426508e-06, - "loss": 0.3567, - "step": 15204 - }, - { - "epoch": 0.9937259002679564, - "grad_norm": 0.37558045983314514, - "learning_rate": 7.711157397416118e-06, - "loss": 0.2928, - "step": 15205 - }, - { - "epoch": 0.9937912554734984, - "grad_norm": 0.4586002826690674, - "learning_rate": 7.710863991182473e-06, - "loss": 0.4227, - "step": 15206 - }, - { - "epoch": 0.9938566106790406, - "grad_norm": 0.4154171049594879, - "learning_rate": 7.710570571727007e-06, - "loss": 0.3406, - "step": 15207 - }, - { - "epoch": 0.9939219658845827, - "grad_norm": 0.4015105068683624, - "learning_rate": 7.710277139051148e-06, - "loss": 0.333, - "step": 15208 - }, - { - "epoch": 0.9939873210901248, - "grad_norm": 0.4767809510231018, - "learning_rate": 7.709983693156328e-06, - "loss": 0.3896, - "step": 15209 - }, - { - "epoch": 0.994052676295667, - "grad_norm": 0.4133701026439667, - "learning_rate": 7.709690234043981e-06, - "loss": 0.3661, - "step": 15210 - }, - { - "epoch": 0.994118031501209, - "grad_norm": 0.45379364490509033, - "learning_rate": 7.709396761715535e-06, - "loss": 0.3688, - "step": 15211 - }, - { - "epoch": 0.9941833867067512, - "grad_norm": 0.44322243332862854, - "learning_rate": 7.709103276172421e-06, - "loss": 0.387, - "step": 15212 - }, - { - "epoch": 0.9942487419122933, - "grad_norm": 0.416967511177063, - "learning_rate": 7.708809777416073e-06, - "loss": 0.3111, - "step": 15213 - }, - { - "epoch": 0.9943140971178355, - "grad_norm": 0.4574109613895416, - "learning_rate": 7.708516265447921e-06, - "loss": 0.4361, - "step": 15214 - }, - { - "epoch": 0.9943794523233775, - "grad_norm": 0.46277549862861633, - "learning_rate": 7.708222740269396e-06, - "loss": 0.4233, - "step": 15215 - }, - { - "epoch": 0.9944448075289197, - "grad_norm": 0.44610607624053955, - "learning_rate": 7.707929201881931e-06, - "loss": 0.4095, - "step": 15216 - }, - { - "epoch": 0.9945101627344618, - "grad_norm": 0.3953724801540375, - "learning_rate": 7.707635650286958e-06, - "loss": 0.3288, - "step": 15217 - }, - { - "epoch": 0.9945755179400039, - "grad_norm": 0.3972908556461334, - "learning_rate": 7.707342085485909e-06, - "loss": 0.3129, - "step": 15218 - }, - { - "epoch": 0.9946408731455461, - "grad_norm": 0.4491443336009979, - "learning_rate": 7.707048507480213e-06, - "loss": 0.406, - "step": 15219 - }, - { - "epoch": 0.9947062283510881, - "grad_norm": 0.43897825479507446, - "learning_rate": 7.706754916271304e-06, - "loss": 0.3719, - "step": 15220 - }, - { - "epoch": 0.9947715835566303, - "grad_norm": 0.4349232017993927, - "learning_rate": 7.706461311860614e-06, - "loss": 0.3875, - "step": 15221 - }, - { - "epoch": 0.9948369387621724, - "grad_norm": 0.4542638957500458, - "learning_rate": 7.706167694249573e-06, - "loss": 0.4226, - "step": 15222 - }, - { - "epoch": 0.9949022939677146, - "grad_norm": 0.43496763706207275, - "learning_rate": 7.705874063439616e-06, - "loss": 0.3792, - "step": 15223 - }, - { - "epoch": 0.9949676491732566, - "grad_norm": 0.4258120357990265, - "learning_rate": 7.705580419432172e-06, - "loss": 0.3533, - "step": 15224 - }, - { - "epoch": 0.9950330043787988, - "grad_norm": 0.45798373222351074, - "learning_rate": 7.705286762228678e-06, - "loss": 0.4284, - "step": 15225 - }, - { - "epoch": 0.9950983595843409, - "grad_norm": 0.4548629820346832, - "learning_rate": 7.704993091830561e-06, - "loss": 0.3759, - "step": 15226 - }, - { - "epoch": 0.995163714789883, - "grad_norm": 0.44336462020874023, - "learning_rate": 7.704699408239255e-06, - "loss": 0.3659, - "step": 15227 - }, - { - "epoch": 0.9952290699954252, - "grad_norm": 0.4453061819076538, - "learning_rate": 7.704405711456195e-06, - "loss": 0.3722, - "step": 15228 - }, - { - "epoch": 0.9952944252009672, - "grad_norm": 0.42155423760414124, - "learning_rate": 7.704112001482812e-06, - "loss": 0.373, - "step": 15229 - }, - { - "epoch": 0.9953597804065094, - "grad_norm": 0.45993250608444214, - "learning_rate": 7.703818278320537e-06, - "loss": 0.37, - "step": 15230 - }, - { - "epoch": 0.9954251356120515, - "grad_norm": 0.4333488643169403, - "learning_rate": 7.703524541970801e-06, - "loss": 0.3474, - "step": 15231 - }, - { - "epoch": 0.9954904908175937, - "grad_norm": 0.4314553141593933, - "learning_rate": 7.703230792435043e-06, - "loss": 0.3741, - "step": 15232 - }, - { - "epoch": 0.9955558460231357, - "grad_norm": 0.4804209768772125, - "learning_rate": 7.70293702971469e-06, - "loss": 0.436, - "step": 15233 - }, - { - "epoch": 0.9956212012286778, - "grad_norm": 0.4595136046409607, - "learning_rate": 7.702643253811177e-06, - "loss": 0.4032, - "step": 15234 - }, - { - "epoch": 0.99568655643422, - "grad_norm": 0.4507693946361542, - "learning_rate": 7.702349464725936e-06, - "loss": 0.3555, - "step": 15235 - }, - { - "epoch": 0.9957519116397621, - "grad_norm": 0.46738341450691223, - "learning_rate": 7.702055662460401e-06, - "loss": 0.4038, - "step": 15236 - }, - { - "epoch": 0.9958172668453042, - "grad_norm": 0.44166356325149536, - "learning_rate": 7.701761847016005e-06, - "loss": 0.4056, - "step": 15237 - }, - { - "epoch": 0.9958826220508463, - "grad_norm": 0.44396546483039856, - "learning_rate": 7.701468018394181e-06, - "loss": 0.3467, - "step": 15238 - }, - { - "epoch": 0.9959479772563885, - "grad_norm": 0.4619816541671753, - "learning_rate": 7.701174176596362e-06, - "loss": 0.4245, - "step": 15239 - }, - { - "epoch": 0.9960133324619306, - "grad_norm": 0.42113983631134033, - "learning_rate": 7.700880321623978e-06, - "loss": 0.3423, - "step": 15240 - }, - { - "epoch": 0.9960786876674728, - "grad_norm": 0.4135285019874573, - "learning_rate": 7.700586453478467e-06, - "loss": 0.3514, - "step": 15241 - }, - { - "epoch": 0.9961440428730148, - "grad_norm": 0.4091458022594452, - "learning_rate": 7.700292572161258e-06, - "loss": 0.3323, - "step": 15242 - }, - { - "epoch": 0.9962093980785569, - "grad_norm": 0.43799135088920593, - "learning_rate": 7.69999867767379e-06, - "loss": 0.3518, - "step": 15243 - }, - { - "epoch": 0.9962747532840991, - "grad_norm": 0.44951286911964417, - "learning_rate": 7.69970477001749e-06, - "loss": 0.4021, - "step": 15244 - }, - { - "epoch": 0.9963401084896412, - "grad_norm": 0.4571133852005005, - "learning_rate": 7.699410849193796e-06, - "loss": 0.3498, - "step": 15245 - }, - { - "epoch": 0.9964054636951833, - "grad_norm": 0.4132849872112274, - "learning_rate": 7.699116915204142e-06, - "loss": 0.3502, - "step": 15246 - }, - { - "epoch": 0.9964708189007254, - "grad_norm": 0.452383428812027, - "learning_rate": 7.698822968049957e-06, - "loss": 0.4164, - "step": 15247 - }, - { - "epoch": 0.9965361741062676, - "grad_norm": 0.42526212334632874, - "learning_rate": 7.698529007732678e-06, - "loss": 0.3573, - "step": 15248 - }, - { - "epoch": 0.9966015293118097, - "grad_norm": 0.4335181415081024, - "learning_rate": 7.698235034253739e-06, - "loss": 0.3501, - "step": 15249 - }, - { - "epoch": 0.9966668845173519, - "grad_norm": 0.4348534941673279, - "learning_rate": 7.697941047614571e-06, - "loss": 0.3519, - "step": 15250 - }, - { - "epoch": 0.9967322397228939, - "grad_norm": 0.44404658675193787, - "learning_rate": 7.697647047816612e-06, - "loss": 0.3901, - "step": 15251 - }, - { - "epoch": 0.996797594928436, - "grad_norm": 0.46016421914100647, - "learning_rate": 7.697353034861294e-06, - "loss": 0.3907, - "step": 15252 - }, - { - "epoch": 0.9968629501339782, - "grad_norm": 0.4186476171016693, - "learning_rate": 7.69705900875005e-06, - "loss": 0.3281, - "step": 15253 - }, - { - "epoch": 0.9969283053395203, - "grad_norm": 0.45298847556114197, - "learning_rate": 7.696764969484313e-06, - "loss": 0.3973, - "step": 15254 - }, - { - "epoch": 0.9969936605450624, - "grad_norm": 0.5015976428985596, - "learning_rate": 7.696470917065522e-06, - "loss": 0.4053, - "step": 15255 - }, - { - "epoch": 0.9970590157506045, - "grad_norm": 0.4459259808063507, - "learning_rate": 7.696176851495107e-06, - "loss": 0.3998, - "step": 15256 - }, - { - "epoch": 0.9971243709561467, - "grad_norm": 0.4428805708885193, - "learning_rate": 7.695882772774503e-06, - "loss": 0.406, - "step": 15257 - }, - { - "epoch": 0.9971897261616888, - "grad_norm": 0.4324735403060913, - "learning_rate": 7.695588680905145e-06, - "loss": 0.3621, - "step": 15258 - }, - { - "epoch": 0.9972550813672308, - "grad_norm": 0.46941500902175903, - "learning_rate": 7.695294575888467e-06, - "loss": 0.3866, - "step": 15259 - }, - { - "epoch": 0.997320436572773, - "grad_norm": 0.41056957840919495, - "learning_rate": 7.695000457725905e-06, - "loss": 0.3619, - "step": 15260 - }, - { - "epoch": 0.9973857917783151, - "grad_norm": 0.4279012084007263, - "learning_rate": 7.694706326418892e-06, - "loss": 0.3594, - "step": 15261 - }, - { - "epoch": 0.9974511469838573, - "grad_norm": 0.468083918094635, - "learning_rate": 7.69441218196886e-06, - "loss": 0.4272, - "step": 15262 - }, - { - "epoch": 0.9975165021893994, - "grad_norm": 0.42653143405914307, - "learning_rate": 7.69411802437725e-06, - "loss": 0.362, - "step": 15263 - }, - { - "epoch": 0.9975818573949415, - "grad_norm": 0.4228259027004242, - "learning_rate": 7.69382385364549e-06, - "loss": 0.3308, - "step": 15264 - }, - { - "epoch": 0.9976472126004836, - "grad_norm": 0.45300742983818054, - "learning_rate": 7.693529669775019e-06, - "loss": 0.415, - "step": 15265 - }, - { - "epoch": 0.9977125678060258, - "grad_norm": 0.4374513328075409, - "learning_rate": 7.69323547276727e-06, - "loss": 0.4007, - "step": 15266 - }, - { - "epoch": 0.9977779230115679, - "grad_norm": 0.4085589647293091, - "learning_rate": 7.692941262623681e-06, - "loss": 0.3631, - "step": 15267 - }, - { - "epoch": 0.9978432782171099, - "grad_norm": 0.4044470191001892, - "learning_rate": 7.692647039345682e-06, - "loss": 0.3021, - "step": 15268 - }, - { - "epoch": 0.9979086334226521, - "grad_norm": 0.4228689968585968, - "learning_rate": 7.692352802934711e-06, - "loss": 0.3354, - "step": 15269 - }, - { - "epoch": 0.9979739886281942, - "grad_norm": 0.43912720680236816, - "learning_rate": 7.692058553392204e-06, - "loss": 0.3761, - "step": 15270 - }, - { - "epoch": 0.9980393438337364, - "grad_norm": 0.4621241092681885, - "learning_rate": 7.691764290719593e-06, - "loss": 0.3938, - "step": 15271 - }, - { - "epoch": 0.9981046990392785, - "grad_norm": 0.41059187054634094, - "learning_rate": 7.691470014918316e-06, - "loss": 0.3337, - "step": 15272 - }, - { - "epoch": 0.9981700542448206, - "grad_norm": 0.4118594229221344, - "learning_rate": 7.691175725989808e-06, - "loss": 0.3332, - "step": 15273 - }, - { - "epoch": 0.9982354094503627, - "grad_norm": 0.45159950852394104, - "learning_rate": 7.690881423935502e-06, - "loss": 0.3743, - "step": 15274 - }, - { - "epoch": 0.9983007646559049, - "grad_norm": 0.45238029956817627, - "learning_rate": 7.690587108756837e-06, - "loss": 0.389, - "step": 15275 - }, - { - "epoch": 0.998366119861447, - "grad_norm": 0.41418617963790894, - "learning_rate": 7.690292780455244e-06, - "loss": 0.3242, - "step": 15276 - }, - { - "epoch": 0.998431475066989, - "grad_norm": 0.45793813467025757, - "learning_rate": 7.689998439032164e-06, - "loss": 0.3962, - "step": 15277 - }, - { - "epoch": 0.9984968302725312, - "grad_norm": 0.4461209774017334, - "learning_rate": 7.689704084489027e-06, - "loss": 0.3709, - "step": 15278 - }, - { - "epoch": 0.9985621854780733, - "grad_norm": 0.4224388897418976, - "learning_rate": 7.689409716827274e-06, - "loss": 0.3789, - "step": 15279 - }, - { - "epoch": 0.9986275406836155, - "grad_norm": 0.45713791251182556, - "learning_rate": 7.689115336048338e-06, - "loss": 0.3699, - "step": 15280 - }, - { - "epoch": 0.9986928958891576, - "grad_norm": 0.5281928181648254, - "learning_rate": 7.688820942153653e-06, - "loss": 0.497, - "step": 15281 - }, - { - "epoch": 0.9987582510946997, - "grad_norm": 0.4724256992340088, - "learning_rate": 7.688526535144658e-06, - "loss": 0.3828, - "step": 15282 - }, - { - "epoch": 0.9988236063002418, - "grad_norm": 0.47158998250961304, - "learning_rate": 7.688232115022786e-06, - "loss": 0.4414, - "step": 15283 - }, - { - "epoch": 0.998888961505784, - "grad_norm": 0.47896477580070496, - "learning_rate": 7.687937681789477e-06, - "loss": 0.4437, - "step": 15284 - }, - { - "epoch": 0.9989543167113261, - "grad_norm": 0.4669880270957947, - "learning_rate": 7.687643235446162e-06, - "loss": 0.378, - "step": 15285 - }, - { - "epoch": 0.9990196719168681, - "grad_norm": 0.4483124613761902, - "learning_rate": 7.687348775994283e-06, - "loss": 0.3631, - "step": 15286 - }, - { - "epoch": 0.9990850271224103, - "grad_norm": 0.4364282190799713, - "learning_rate": 7.687054303435271e-06, - "loss": 0.37, - "step": 15287 - }, - { - "epoch": 0.9991503823279524, - "grad_norm": 0.4038535952568054, - "learning_rate": 7.686759817770565e-06, - "loss": 0.3372, - "step": 15288 - }, - { - "epoch": 0.9992157375334946, - "grad_norm": 0.44478845596313477, - "learning_rate": 7.6864653190016e-06, - "loss": 0.3814, - "step": 15289 - }, - { - "epoch": 0.9992810927390366, - "grad_norm": 0.412781685590744, - "learning_rate": 7.686170807129814e-06, - "loss": 0.3322, - "step": 15290 - }, - { - "epoch": 0.9993464479445788, - "grad_norm": 0.4233294129371643, - "learning_rate": 7.68587628215664e-06, - "loss": 0.322, - "step": 15291 - }, - { - "epoch": 0.9994118031501209, - "grad_norm": 0.4360736906528473, - "learning_rate": 7.68558174408352e-06, - "loss": 0.3529, - "step": 15292 - }, - { - "epoch": 0.999477158355663, - "grad_norm": 0.4752248227596283, - "learning_rate": 7.685287192911886e-06, - "loss": 0.3983, - "step": 15293 - }, - { - "epoch": 0.9995425135612052, - "grad_norm": 0.42139577865600586, - "learning_rate": 7.684992628643176e-06, - "loss": 0.3448, - "step": 15294 - }, - { - "epoch": 0.9996078687667472, - "grad_norm": 0.4572370648384094, - "learning_rate": 7.684698051278826e-06, - "loss": 0.3902, - "step": 15295 - }, - { - "epoch": 0.9996732239722894, - "grad_norm": 0.4292619228363037, - "learning_rate": 7.684403460820276e-06, - "loss": 0.3385, - "step": 15296 - }, - { - "epoch": 0.9997385791778315, - "grad_norm": 0.44764095544815063, - "learning_rate": 7.68410885726896e-06, - "loss": 0.3863, - "step": 15297 - }, - { - "epoch": 0.9998039343833737, - "grad_norm": 0.4193493127822876, - "learning_rate": 7.683814240626313e-06, - "loss": 0.3307, - "step": 15298 - }, - { - "epoch": 0.9998692895889157, - "grad_norm": 0.47969362139701843, - "learning_rate": 7.683519610893776e-06, - "loss": 0.4549, - "step": 15299 - }, - { - "epoch": 0.9999346447944579, - "grad_norm": 0.4604553282260895, - "learning_rate": 7.683224968072782e-06, - "loss": 0.4099, - "step": 15300 - }, - { - "epoch": 1.0, - "grad_norm": 0.4571715295314789, - "learning_rate": 7.682930312164771e-06, - "loss": 0.4088, - "step": 15301 - }, - { - "epoch": 1.000065355205542, - "grad_norm": 0.45919936895370483, - "learning_rate": 7.682635643171181e-06, - "loss": 0.3865, - "step": 15302 - }, - { - "epoch": 1.0001307104110841, - "grad_norm": 0.45753178000450134, - "learning_rate": 7.682340961093447e-06, - "loss": 0.3374, - "step": 15303 - }, - { - "epoch": 1.0001960656166264, - "grad_norm": 0.4426369369029999, - "learning_rate": 7.682046265933007e-06, - "loss": 0.3333, - "step": 15304 - }, - { - "epoch": 1.0002614208221685, - "grad_norm": 0.4452736973762512, - "learning_rate": 7.681751557691298e-06, - "loss": 0.3645, - "step": 15305 - }, - { - "epoch": 1.0003267760277106, - "grad_norm": 0.43928152322769165, - "learning_rate": 7.681456836369758e-06, - "loss": 0.3344, - "step": 15306 - }, - { - "epoch": 1.0003921312332527, - "grad_norm": 0.45254141092300415, - "learning_rate": 7.681162101969822e-06, - "loss": 0.3561, - "step": 15307 - }, - { - "epoch": 1.000457486438795, - "grad_norm": 0.471185564994812, - "learning_rate": 7.680867354492932e-06, - "loss": 0.4038, - "step": 15308 - }, - { - "epoch": 1.000522841644337, - "grad_norm": 0.42634573578834534, - "learning_rate": 7.680572593940521e-06, - "loss": 0.3478, - "step": 15309 - }, - { - "epoch": 1.000588196849879, - "grad_norm": 0.45847830176353455, - "learning_rate": 7.680277820314032e-06, - "loss": 0.3783, - "step": 15310 - }, - { - "epoch": 1.0006535520554212, - "grad_norm": 0.49452534317970276, - "learning_rate": 7.679983033614897e-06, - "loss": 0.3404, - "step": 15311 - }, - { - "epoch": 1.0007189072609632, - "grad_norm": 0.4476128816604614, - "learning_rate": 7.679688233844557e-06, - "loss": 0.3377, - "step": 15312 - }, - { - "epoch": 1.0007842624665055, - "grad_norm": 0.4552595317363739, - "learning_rate": 7.679393421004449e-06, - "loss": 0.317, - "step": 15313 - }, - { - "epoch": 1.0008496176720476, - "grad_norm": 0.4845251739025116, - "learning_rate": 7.679098595096011e-06, - "loss": 0.3454, - "step": 15314 - }, - { - "epoch": 1.0009149728775897, - "grad_norm": 0.4067078232765198, - "learning_rate": 7.67880375612068e-06, - "loss": 0.297, - "step": 15315 - }, - { - "epoch": 1.0009803280831318, - "grad_norm": 0.4564893841743469, - "learning_rate": 7.678508904079898e-06, - "loss": 0.3575, - "step": 15316 - }, - { - "epoch": 1.001045683288674, - "grad_norm": 0.43375900387763977, - "learning_rate": 7.678214038975098e-06, - "loss": 0.3163, - "step": 15317 - }, - { - "epoch": 1.0011110384942161, - "grad_norm": 0.4573245048522949, - "learning_rate": 7.67791916080772e-06, - "loss": 0.3447, - "step": 15318 - }, - { - "epoch": 1.0011763936997582, - "grad_norm": 0.4501189887523651, - "learning_rate": 7.677624269579204e-06, - "loss": 0.3953, - "step": 15319 - }, - { - "epoch": 1.0012417489053003, - "grad_norm": 0.45240911841392517, - "learning_rate": 7.677329365290986e-06, - "loss": 0.3691, - "step": 15320 - }, - { - "epoch": 1.0013071041108423, - "grad_norm": 0.452606201171875, - "learning_rate": 7.677034447944505e-06, - "loss": 0.3414, - "step": 15321 - }, - { - "epoch": 1.0013724593163846, - "grad_norm": 0.40821969509124756, - "learning_rate": 7.676739517541199e-06, - "loss": 0.3018, - "step": 15322 - }, - { - "epoch": 1.0014378145219267, - "grad_norm": 0.43178510665893555, - "learning_rate": 7.67644457408251e-06, - "loss": 0.3211, - "step": 15323 - }, - { - "epoch": 1.0015031697274688, - "grad_norm": 0.48483988642692566, - "learning_rate": 7.67614961756987e-06, - "loss": 0.3888, - "step": 15324 - }, - { - "epoch": 1.0015685249330109, - "grad_norm": 0.45330503582954407, - "learning_rate": 7.675854648004724e-06, - "loss": 0.3478, - "step": 15325 - }, - { - "epoch": 1.0016338801385531, - "grad_norm": 0.448544442653656, - "learning_rate": 7.675559665388506e-06, - "loss": 0.3452, - "step": 15326 - }, - { - "epoch": 1.0016992353440952, - "grad_norm": 0.4484884440898895, - "learning_rate": 7.675264669722657e-06, - "loss": 0.3512, - "step": 15327 - }, - { - "epoch": 1.0017645905496373, - "grad_norm": 0.46183279156684875, - "learning_rate": 7.674969661008615e-06, - "loss": 0.3651, - "step": 15328 - }, - { - "epoch": 1.0018299457551794, - "grad_norm": 0.4510844349861145, - "learning_rate": 7.67467463924782e-06, - "loss": 0.3529, - "step": 15329 - }, - { - "epoch": 1.0018953009607214, - "grad_norm": 0.4594692289829254, - "learning_rate": 7.674379604441711e-06, - "loss": 0.353, - "step": 15330 - }, - { - "epoch": 1.0019606561662637, - "grad_norm": 0.3801827132701874, - "learning_rate": 7.674084556591726e-06, - "loss": 0.2522, - "step": 15331 - }, - { - "epoch": 1.0020260113718058, - "grad_norm": 0.4472469985485077, - "learning_rate": 7.673789495699303e-06, - "loss": 0.3603, - "step": 15332 - }, - { - "epoch": 1.0020913665773479, - "grad_norm": 0.43683260679244995, - "learning_rate": 7.67349442176588e-06, - "loss": 0.318, - "step": 15333 - }, - { - "epoch": 1.00215672178289, - "grad_norm": 0.4052855968475342, - "learning_rate": 7.673199334792902e-06, - "loss": 0.309, - "step": 15334 - }, - { - "epoch": 1.002222076988432, - "grad_norm": 0.4850192666053772, - "learning_rate": 7.672904234781805e-06, - "loss": 0.4052, - "step": 15335 - }, - { - "epoch": 1.0022874321939743, - "grad_norm": 0.45603764057159424, - "learning_rate": 7.672609121734026e-06, - "loss": 0.3831, - "step": 15336 - }, - { - "epoch": 1.0023527873995164, - "grad_norm": 0.4760010540485382, - "learning_rate": 7.672313995651009e-06, - "loss": 0.3299, - "step": 15337 - }, - { - "epoch": 1.0024181426050585, - "grad_norm": 0.44374123215675354, - "learning_rate": 7.672018856534188e-06, - "loss": 0.3344, - "step": 15338 - }, - { - "epoch": 1.0024834978106005, - "grad_norm": 0.4514628052711487, - "learning_rate": 7.671723704385007e-06, - "loss": 0.3269, - "step": 15339 - }, - { - "epoch": 1.0025488530161428, - "grad_norm": 0.45178747177124023, - "learning_rate": 7.671428539204903e-06, - "loss": 0.3727, - "step": 15340 - }, - { - "epoch": 1.002614208221685, - "grad_norm": 0.4662570059299469, - "learning_rate": 7.671133360995317e-06, - "loss": 0.3559, - "step": 15341 - }, - { - "epoch": 1.002679563427227, - "grad_norm": 0.4465523362159729, - "learning_rate": 7.67083816975769e-06, - "loss": 0.3418, - "step": 15342 - }, - { - "epoch": 1.002744918632769, - "grad_norm": 0.43686559796333313, - "learning_rate": 7.670542965493456e-06, - "loss": 0.3217, - "step": 15343 - }, - { - "epoch": 1.0028102738383111, - "grad_norm": 0.45063772797584534, - "learning_rate": 7.670247748204065e-06, - "loss": 0.3198, - "step": 15344 - }, - { - "epoch": 1.0028756290438534, - "grad_norm": 0.45118752121925354, - "learning_rate": 7.669952517890946e-06, - "loss": 0.3677, - "step": 15345 - }, - { - "epoch": 1.0029409842493955, - "grad_norm": 0.4444842040538788, - "learning_rate": 7.669657274555545e-06, - "loss": 0.3484, - "step": 15346 - }, - { - "epoch": 1.0030063394549376, - "grad_norm": 0.43761003017425537, - "learning_rate": 7.669362018199298e-06, - "loss": 0.3364, - "step": 15347 - }, - { - "epoch": 1.0030716946604796, - "grad_norm": 0.48077958822250366, - "learning_rate": 7.66906674882365e-06, - "loss": 0.3413, - "step": 15348 - }, - { - "epoch": 1.003137049866022, - "grad_norm": 0.46071937680244446, - "learning_rate": 7.66877146643004e-06, - "loss": 0.33, - "step": 15349 - }, - { - "epoch": 1.003202405071564, - "grad_norm": 0.4771740138530731, - "learning_rate": 7.668476171019905e-06, - "loss": 0.3935, - "step": 15350 - }, - { - "epoch": 1.003267760277106, - "grad_norm": 0.4689539074897766, - "learning_rate": 7.668180862594688e-06, - "loss": 0.3358, - "step": 15351 - }, - { - "epoch": 1.0033331154826481, - "grad_norm": 0.46587684750556946, - "learning_rate": 7.667885541155828e-06, - "loss": 0.3521, - "step": 15352 - }, - { - "epoch": 1.0033984706881902, - "grad_norm": 0.4376724362373352, - "learning_rate": 7.667590206704767e-06, - "loss": 0.3183, - "step": 15353 - }, - { - "epoch": 1.0034638258937325, - "grad_norm": 0.4335780441761017, - "learning_rate": 7.667294859242945e-06, - "loss": 0.351, - "step": 15354 - }, - { - "epoch": 1.0035291810992746, - "grad_norm": 0.42169657349586487, - "learning_rate": 7.666999498771799e-06, - "loss": 0.3094, - "step": 15355 - }, - { - "epoch": 1.0035945363048167, - "grad_norm": 0.44098904728889465, - "learning_rate": 7.666704125292775e-06, - "loss": 0.3119, - "step": 15356 - }, - { - "epoch": 1.0036598915103587, - "grad_norm": 0.45945441722869873, - "learning_rate": 7.666408738807309e-06, - "loss": 0.3185, - "step": 15357 - }, - { - "epoch": 1.003725246715901, - "grad_norm": 0.460377961397171, - "learning_rate": 7.666113339316847e-06, - "loss": 0.3729, - "step": 15358 - }, - { - "epoch": 1.003790601921443, - "grad_norm": 0.43052223324775696, - "learning_rate": 7.665817926822824e-06, - "loss": 0.3301, - "step": 15359 - }, - { - "epoch": 1.0038559571269852, - "grad_norm": 0.522735595703125, - "learning_rate": 7.665522501326685e-06, - "loss": 0.3339, - "step": 15360 - }, - { - "epoch": 1.0039213123325272, - "grad_norm": 0.46012312173843384, - "learning_rate": 7.665227062829868e-06, - "loss": 0.3682, - "step": 15361 - }, - { - "epoch": 1.0039866675380693, - "grad_norm": 0.4636308550834656, - "learning_rate": 7.664931611333817e-06, - "loss": 0.343, - "step": 15362 - }, - { - "epoch": 1.0040520227436116, - "grad_norm": 0.4381164312362671, - "learning_rate": 7.66463614683997e-06, - "loss": 0.321, - "step": 15363 - }, - { - "epoch": 1.0041173779491537, - "grad_norm": 0.43972429633140564, - "learning_rate": 7.664340669349768e-06, - "loss": 0.3149, - "step": 15364 - }, - { - "epoch": 1.0041827331546958, - "grad_norm": 0.48464396595954895, - "learning_rate": 7.664045178864657e-06, - "loss": 0.348, - "step": 15365 - }, - { - "epoch": 1.0042480883602378, - "grad_norm": 0.5554192662239075, - "learning_rate": 7.663749675386072e-06, - "loss": 0.3856, - "step": 15366 - }, - { - "epoch": 1.0043134435657801, - "grad_norm": 0.420454204082489, - "learning_rate": 7.663454158915459e-06, - "loss": 0.3005, - "step": 15367 - }, - { - "epoch": 1.0043787987713222, - "grad_norm": 0.4903525114059448, - "learning_rate": 7.663158629454256e-06, - "loss": 0.3939, - "step": 15368 - }, - { - "epoch": 1.0044441539768643, - "grad_norm": 0.4578380286693573, - "learning_rate": 7.662863087003907e-06, - "loss": 0.3386, - "step": 15369 - }, - { - "epoch": 1.0045095091824063, - "grad_norm": 0.47643154859542847, - "learning_rate": 7.66256753156585e-06, - "loss": 0.3828, - "step": 15370 - }, - { - "epoch": 1.0045748643879484, - "grad_norm": 0.4753783643245697, - "learning_rate": 7.66227196314153e-06, - "loss": 0.3813, - "step": 15371 - }, - { - "epoch": 1.0046402195934907, - "grad_norm": 0.469293475151062, - "learning_rate": 7.661976381732387e-06, - "loss": 0.3765, - "step": 15372 - }, - { - "epoch": 1.0047055747990328, - "grad_norm": 0.4429028034210205, - "learning_rate": 7.661680787339864e-06, - "loss": 0.317, - "step": 15373 - }, - { - "epoch": 1.0047709300045748, - "grad_norm": 0.476163774728775, - "learning_rate": 7.661385179965402e-06, - "loss": 0.3822, - "step": 15374 - }, - { - "epoch": 1.004836285210117, - "grad_norm": 0.4372885525226593, - "learning_rate": 7.661089559610442e-06, - "loss": 0.3428, - "step": 15375 - }, - { - "epoch": 1.0049016404156592, - "grad_norm": 0.43586692214012146, - "learning_rate": 7.660793926276426e-06, - "loss": 0.3577, - "step": 15376 - }, - { - "epoch": 1.0049669956212013, - "grad_norm": 0.47613614797592163, - "learning_rate": 7.660498279964794e-06, - "loss": 0.3437, - "step": 15377 - }, - { - "epoch": 1.0050323508267434, - "grad_norm": 0.4132579565048218, - "learning_rate": 7.660202620676992e-06, - "loss": 0.303, - "step": 15378 - }, - { - "epoch": 1.0050977060322854, - "grad_norm": 0.43297308683395386, - "learning_rate": 7.659906948414462e-06, - "loss": 0.3349, - "step": 15379 - }, - { - "epoch": 1.0051630612378275, - "grad_norm": 0.45414796471595764, - "learning_rate": 7.659611263178643e-06, - "loss": 0.3343, - "step": 15380 - }, - { - "epoch": 1.0052284164433698, - "grad_norm": 0.45960375666618347, - "learning_rate": 7.659315564970977e-06, - "loss": 0.3439, - "step": 15381 - }, - { - "epoch": 1.0052937716489119, - "grad_norm": 0.4501376152038574, - "learning_rate": 7.65901985379291e-06, - "loss": 0.3565, - "step": 15382 - }, - { - "epoch": 1.005359126854454, - "grad_norm": 0.48546937108039856, - "learning_rate": 7.658724129645881e-06, - "loss": 0.3952, - "step": 15383 - }, - { - "epoch": 1.005424482059996, - "grad_norm": 0.4542507827281952, - "learning_rate": 7.658428392531333e-06, - "loss": 0.3589, - "step": 15384 - }, - { - "epoch": 1.005489837265538, - "grad_norm": 0.4576326906681061, - "learning_rate": 7.65813264245071e-06, - "loss": 0.3754, - "step": 15385 - }, - { - "epoch": 1.0055551924710804, - "grad_norm": 0.4136732518672943, - "learning_rate": 7.657836879405454e-06, - "loss": 0.3026, - "step": 15386 - }, - { - "epoch": 1.0056205476766225, - "grad_norm": 0.44388332962989807, - "learning_rate": 7.657541103397005e-06, - "loss": 0.3383, - "step": 15387 - }, - { - "epoch": 1.0056859028821645, - "grad_norm": 0.45588627457618713, - "learning_rate": 7.657245314426807e-06, - "loss": 0.3403, - "step": 15388 - }, - { - "epoch": 1.0057512580877066, - "grad_norm": 0.4652750790119171, - "learning_rate": 7.656949512496304e-06, - "loss": 0.3716, - "step": 15389 - }, - { - "epoch": 1.005816613293249, - "grad_norm": 0.4347158372402191, - "learning_rate": 7.656653697606937e-06, - "loss": 0.3183, - "step": 15390 - }, - { - "epoch": 1.005881968498791, - "grad_norm": 0.4857756793498993, - "learning_rate": 7.65635786976015e-06, - "loss": 0.3689, - "step": 15391 - }, - { - "epoch": 1.005947323704333, - "grad_norm": 0.4873138964176178, - "learning_rate": 7.656062028957385e-06, - "loss": 0.3659, - "step": 15392 - }, - { - "epoch": 1.0060126789098751, - "grad_norm": 0.4715665876865387, - "learning_rate": 7.655766175200086e-06, - "loss": 0.3644, - "step": 15393 - }, - { - "epoch": 1.0060780341154172, - "grad_norm": 0.44285252690315247, - "learning_rate": 7.655470308489694e-06, - "loss": 0.3466, - "step": 15394 - }, - { - "epoch": 1.0061433893209595, - "grad_norm": 0.4487248659133911, - "learning_rate": 7.655174428827655e-06, - "loss": 0.3311, - "step": 15395 - }, - { - "epoch": 1.0062087445265016, - "grad_norm": 0.4194047749042511, - "learning_rate": 7.654878536215412e-06, - "loss": 0.3137, - "step": 15396 - }, - { - "epoch": 1.0062740997320436, - "grad_norm": 0.4552207291126251, - "learning_rate": 7.654582630654404e-06, - "loss": 0.354, - "step": 15397 - }, - { - "epoch": 1.0063394549375857, - "grad_norm": 0.4308977425098419, - "learning_rate": 7.654286712146077e-06, - "loss": 0.3331, - "step": 15398 - }, - { - "epoch": 1.006404810143128, - "grad_norm": 0.45188385248184204, - "learning_rate": 7.653990780691874e-06, - "loss": 0.3712, - "step": 15399 - }, - { - "epoch": 1.00647016534867, - "grad_norm": 0.45716938376426697, - "learning_rate": 7.65369483629324e-06, - "loss": 0.3443, - "step": 15400 - }, - { - "epoch": 1.0065355205542121, - "grad_norm": 0.42230695486068726, - "learning_rate": 7.653398878951615e-06, - "loss": 0.3088, - "step": 15401 - }, - { - "epoch": 1.0066008757597542, - "grad_norm": 0.4392012059688568, - "learning_rate": 7.653102908668447e-06, - "loss": 0.3307, - "step": 15402 - }, - { - "epoch": 1.0066662309652963, - "grad_norm": 0.42826029658317566, - "learning_rate": 7.652806925445175e-06, - "loss": 0.3196, - "step": 15403 - }, - { - "epoch": 1.0067315861708386, - "grad_norm": 0.4623169004917145, - "learning_rate": 7.652510929283244e-06, - "loss": 0.3779, - "step": 15404 - }, - { - "epoch": 1.0067969413763806, - "grad_norm": 0.48810315132141113, - "learning_rate": 7.6522149201841e-06, - "loss": 0.4204, - "step": 15405 - }, - { - "epoch": 1.0068622965819227, - "grad_norm": 0.4800114631652832, - "learning_rate": 7.651918898149185e-06, - "loss": 0.3685, - "step": 15406 - }, - { - "epoch": 1.0069276517874648, - "grad_norm": 0.44584551453590393, - "learning_rate": 7.651622863179941e-06, - "loss": 0.355, - "step": 15407 - }, - { - "epoch": 1.006993006993007, - "grad_norm": 0.5116623640060425, - "learning_rate": 7.651326815277816e-06, - "loss": 0.3904, - "step": 15408 - }, - { - "epoch": 1.0070583621985492, - "grad_norm": 0.4597768485546112, - "learning_rate": 7.65103075444425e-06, - "loss": 0.3345, - "step": 15409 - }, - { - "epoch": 1.0071237174040912, - "grad_norm": 0.4301532506942749, - "learning_rate": 7.65073468068069e-06, - "loss": 0.3144, - "step": 15410 - }, - { - "epoch": 1.0071890726096333, - "grad_norm": 0.42474237084388733, - "learning_rate": 7.650438593988577e-06, - "loss": 0.3291, - "step": 15411 - }, - { - "epoch": 1.0072544278151754, - "grad_norm": 0.4560867249965668, - "learning_rate": 7.650142494369356e-06, - "loss": 0.372, - "step": 15412 - }, - { - "epoch": 1.0073197830207177, - "grad_norm": 0.46581974625587463, - "learning_rate": 7.649846381824475e-06, - "loss": 0.3413, - "step": 15413 - }, - { - "epoch": 1.0073851382262597, - "grad_norm": 0.489432692527771, - "learning_rate": 7.649550256355374e-06, - "loss": 0.4066, - "step": 15414 - }, - { - "epoch": 1.0074504934318018, - "grad_norm": 0.44276174902915955, - "learning_rate": 7.649254117963498e-06, - "loss": 0.2954, - "step": 15415 - }, - { - "epoch": 1.007515848637344, - "grad_norm": 0.4767237901687622, - "learning_rate": 7.648957966650293e-06, - "loss": 0.3718, - "step": 15416 - }, - { - "epoch": 1.0075812038428862, - "grad_norm": 0.4617912173271179, - "learning_rate": 7.648661802417201e-06, - "loss": 0.3705, - "step": 15417 - }, - { - "epoch": 1.0076465590484283, - "grad_norm": 0.46661320328712463, - "learning_rate": 7.648365625265668e-06, - "loss": 0.3902, - "step": 15418 - }, - { - "epoch": 1.0077119142539703, - "grad_norm": 0.4534335136413574, - "learning_rate": 7.648069435197138e-06, - "loss": 0.3621, - "step": 15419 - }, - { - "epoch": 1.0077772694595124, - "grad_norm": 0.42317673563957214, - "learning_rate": 7.647773232213057e-06, - "loss": 0.2892, - "step": 15420 - }, - { - "epoch": 1.0078426246650545, - "grad_norm": 0.46837151050567627, - "learning_rate": 7.647477016314869e-06, - "loss": 0.3836, - "step": 15421 - }, - { - "epoch": 1.0079079798705968, - "grad_norm": 0.43539056181907654, - "learning_rate": 7.647180787504016e-06, - "loss": 0.3194, - "step": 15422 - }, - { - "epoch": 1.0079733350761388, - "grad_norm": 0.47065645456314087, - "learning_rate": 7.646884545781947e-06, - "loss": 0.3834, - "step": 15423 - }, - { - "epoch": 1.008038690281681, - "grad_norm": 0.4062655568122864, - "learning_rate": 7.646588291150106e-06, - "loss": 0.2855, - "step": 15424 - }, - { - "epoch": 1.008104045487223, - "grad_norm": 0.4419330954551697, - "learning_rate": 7.646292023609935e-06, - "loss": 0.3253, - "step": 15425 - }, - { - "epoch": 1.0081694006927653, - "grad_norm": 0.4646981358528137, - "learning_rate": 7.645995743162882e-06, - "loss": 0.3656, - "step": 15426 - }, - { - "epoch": 1.0082347558983074, - "grad_norm": 0.45052090287208557, - "learning_rate": 7.645699449810391e-06, - "loss": 0.3549, - "step": 15427 - }, - { - "epoch": 1.0083001111038494, - "grad_norm": 0.43203607201576233, - "learning_rate": 7.645403143553908e-06, - "loss": 0.3263, - "step": 15428 - }, - { - "epoch": 1.0083654663093915, - "grad_norm": 0.4358994662761688, - "learning_rate": 7.645106824394878e-06, - "loss": 0.3478, - "step": 15429 - }, - { - "epoch": 1.0084308215149336, - "grad_norm": 0.4181872010231018, - "learning_rate": 7.644810492334744e-06, - "loss": 0.327, - "step": 15430 - }, - { - "epoch": 1.0084961767204759, - "grad_norm": 0.43759000301361084, - "learning_rate": 7.644514147374954e-06, - "loss": 0.3424, - "step": 15431 - }, - { - "epoch": 1.008561531926018, - "grad_norm": 0.4420819878578186, - "learning_rate": 7.64421778951695e-06, - "loss": 0.3644, - "step": 15432 - }, - { - "epoch": 1.00862688713156, - "grad_norm": 0.4365168809890747, - "learning_rate": 7.643921418762183e-06, - "loss": 0.3126, - "step": 15433 - }, - { - "epoch": 1.008692242337102, - "grad_norm": 0.45583680272102356, - "learning_rate": 7.643625035112095e-06, - "loss": 0.3818, - "step": 15434 - }, - { - "epoch": 1.0087575975426444, - "grad_norm": 0.44000598788261414, - "learning_rate": 7.643328638568129e-06, - "loss": 0.3492, - "step": 15435 - }, - { - "epoch": 1.0088229527481865, - "grad_norm": 0.44959017634391785, - "learning_rate": 7.643032229131737e-06, - "loss": 0.3672, - "step": 15436 - }, - { - "epoch": 1.0088883079537285, - "grad_norm": 0.4667029082775116, - "learning_rate": 7.642735806804357e-06, - "loss": 0.3256, - "step": 15437 - }, - { - "epoch": 1.0089536631592706, - "grad_norm": 0.4460352957248688, - "learning_rate": 7.642439371587442e-06, - "loss": 0.3574, - "step": 15438 - }, - { - "epoch": 1.0090190183648127, - "grad_norm": 0.4368128776550293, - "learning_rate": 7.642142923482434e-06, - "loss": 0.3556, - "step": 15439 - }, - { - "epoch": 1.009084373570355, - "grad_norm": 0.4322856664657593, - "learning_rate": 7.641846462490782e-06, - "loss": 0.3077, - "step": 15440 - }, - { - "epoch": 1.009149728775897, - "grad_norm": 0.4305310845375061, - "learning_rate": 7.641549988613925e-06, - "loss": 0.319, - "step": 15441 - }, - { - "epoch": 1.009215083981439, - "grad_norm": 0.4275602102279663, - "learning_rate": 7.641253501853317e-06, - "loss": 0.3593, - "step": 15442 - }, - { - "epoch": 1.0092804391869812, - "grad_norm": 0.4231933653354645, - "learning_rate": 7.6409570022104e-06, - "loss": 0.3258, - "step": 15443 - }, - { - "epoch": 1.0093457943925233, - "grad_norm": 0.4606062173843384, - "learning_rate": 7.64066048968662e-06, - "loss": 0.3681, - "step": 15444 - }, - { - "epoch": 1.0094111495980655, - "grad_norm": 0.4889924228191376, - "learning_rate": 7.640363964283423e-06, - "loss": 0.377, - "step": 15445 - }, - { - "epoch": 1.0094765048036076, - "grad_norm": 0.4719698131084442, - "learning_rate": 7.640067426002257e-06, - "loss": 0.3691, - "step": 15446 - }, - { - "epoch": 1.0095418600091497, - "grad_norm": 0.44663551449775696, - "learning_rate": 7.63977087484457e-06, - "loss": 0.3553, - "step": 15447 - }, - { - "epoch": 1.0096072152146918, - "grad_norm": 0.5834388732910156, - "learning_rate": 7.639474310811804e-06, - "loss": 0.3731, - "step": 15448 - }, - { - "epoch": 1.009672570420234, - "grad_norm": 0.4788368046283722, - "learning_rate": 7.639177733905407e-06, - "loss": 0.3708, - "step": 15449 - }, - { - "epoch": 1.0097379256257761, - "grad_norm": 0.4483938217163086, - "learning_rate": 7.638881144126825e-06, - "loss": 0.3378, - "step": 15450 - }, - { - "epoch": 1.0098032808313182, - "grad_norm": 0.5010645389556885, - "learning_rate": 7.63858454147751e-06, - "loss": 0.3979, - "step": 15451 - }, - { - "epoch": 1.0098686360368603, - "grad_norm": 0.4544999301433563, - "learning_rate": 7.6382879259589e-06, - "loss": 0.3321, - "step": 15452 - }, - { - "epoch": 1.0099339912424024, - "grad_norm": 0.4219933748245239, - "learning_rate": 7.637991297572447e-06, - "loss": 0.322, - "step": 15453 - }, - { - "epoch": 1.0099993464479446, - "grad_norm": 0.4401116967201233, - "learning_rate": 7.637694656319596e-06, - "loss": 0.3596, - "step": 15454 - }, - { - "epoch": 1.0100647016534867, - "grad_norm": 0.45100435614585876, - "learning_rate": 7.637398002201795e-06, - "loss": 0.379, - "step": 15455 - }, - { - "epoch": 1.0101300568590288, - "grad_norm": 0.4402828514575958, - "learning_rate": 7.63710133522049e-06, - "loss": 0.326, - "step": 15456 - }, - { - "epoch": 1.0101954120645709, - "grad_norm": 0.5119789242744446, - "learning_rate": 7.636804655377129e-06, - "loss": 0.2976, - "step": 15457 - }, - { - "epoch": 1.0102607672701132, - "grad_norm": 0.4435166120529175, - "learning_rate": 7.636507962673156e-06, - "loss": 0.3631, - "step": 15458 - }, - { - "epoch": 1.0103261224756552, - "grad_norm": 0.44343629479408264, - "learning_rate": 7.63621125711002e-06, - "loss": 0.3082, - "step": 15459 - }, - { - "epoch": 1.0103914776811973, - "grad_norm": 0.46621182560920715, - "learning_rate": 7.63591453868917e-06, - "loss": 0.3447, - "step": 15460 - }, - { - "epoch": 1.0104568328867394, - "grad_norm": 0.433179646730423, - "learning_rate": 7.635617807412053e-06, - "loss": 0.3114, - "step": 15461 - }, - { - "epoch": 1.0105221880922814, - "grad_norm": 0.502048671245575, - "learning_rate": 7.635321063280114e-06, - "loss": 0.385, - "step": 15462 - }, - { - "epoch": 1.0105875432978237, - "grad_norm": 0.4555855691432953, - "learning_rate": 7.635024306294801e-06, - "loss": 0.3222, - "step": 15463 - }, - { - "epoch": 1.0106528985033658, - "grad_norm": 0.49316129088401794, - "learning_rate": 7.63472753645756e-06, - "loss": 0.3508, - "step": 15464 - }, - { - "epoch": 1.0107182537089079, - "grad_norm": 0.4633665382862091, - "learning_rate": 7.634430753769842e-06, - "loss": 0.3432, - "step": 15465 - }, - { - "epoch": 1.01078360891445, - "grad_norm": 0.4293590188026428, - "learning_rate": 7.634133958233092e-06, - "loss": 0.307, - "step": 15466 - }, - { - "epoch": 1.0108489641199923, - "grad_norm": 0.4356231987476349, - "learning_rate": 7.633837149848759e-06, - "loss": 0.337, - "step": 15467 - }, - { - "epoch": 1.0109143193255343, - "grad_norm": 0.4453873634338379, - "learning_rate": 7.633540328618289e-06, - "loss": 0.3537, - "step": 15468 - }, - { - "epoch": 1.0109796745310764, - "grad_norm": 0.44065070152282715, - "learning_rate": 7.63324349454313e-06, - "loss": 0.346, - "step": 15469 - }, - { - "epoch": 1.0110450297366185, - "grad_norm": 0.45693904161453247, - "learning_rate": 7.63294664762473e-06, - "loss": 0.3457, - "step": 15470 - }, - { - "epoch": 1.0111103849421605, - "grad_norm": 0.47622743248939514, - "learning_rate": 7.63264978786454e-06, - "loss": 0.3711, - "step": 15471 - }, - { - "epoch": 1.0111757401477028, - "grad_norm": 0.4707341194152832, - "learning_rate": 7.632352915264001e-06, - "loss": 0.3864, - "step": 15472 - }, - { - "epoch": 1.011241095353245, - "grad_norm": 0.4664469361305237, - "learning_rate": 7.632056029824566e-06, - "loss": 0.357, - "step": 15473 - }, - { - "epoch": 1.011306450558787, - "grad_norm": 0.4514675438404083, - "learning_rate": 7.631759131547684e-06, - "loss": 0.364, - "step": 15474 - }, - { - "epoch": 1.011371805764329, - "grad_norm": 0.46300041675567627, - "learning_rate": 7.631462220434798e-06, - "loss": 0.3668, - "step": 15475 - }, - { - "epoch": 1.0114371609698714, - "grad_norm": 0.43007588386535645, - "learning_rate": 7.631165296487361e-06, - "loss": 0.3153, - "step": 15476 - }, - { - "epoch": 1.0115025161754134, - "grad_norm": 0.4446285367012024, - "learning_rate": 7.630868359706818e-06, - "loss": 0.3435, - "step": 15477 - }, - { - "epoch": 1.0115678713809555, - "grad_norm": 0.4426470398902893, - "learning_rate": 7.63057141009462e-06, - "loss": 0.3393, - "step": 15478 - }, - { - "epoch": 1.0116332265864976, - "grad_norm": 0.45051711797714233, - "learning_rate": 7.630274447652214e-06, - "loss": 0.3619, - "step": 15479 - }, - { - "epoch": 1.0116985817920396, - "grad_norm": 0.4386603832244873, - "learning_rate": 7.629977472381047e-06, - "loss": 0.3058, - "step": 15480 - }, - { - "epoch": 1.011763936997582, - "grad_norm": 0.45015019178390503, - "learning_rate": 7.62968048428257e-06, - "loss": 0.3635, - "step": 15481 - }, - { - "epoch": 1.011829292203124, - "grad_norm": 0.4465944170951843, - "learning_rate": 7.629383483358229e-06, - "loss": 0.3267, - "step": 15482 - }, - { - "epoch": 1.011894647408666, - "grad_norm": 0.45590606331825256, - "learning_rate": 7.629086469609474e-06, - "loss": 0.3687, - "step": 15483 - }, - { - "epoch": 1.0119600026142082, - "grad_norm": 0.4663674235343933, - "learning_rate": 7.6287894430377544e-06, - "loss": 0.3875, - "step": 15484 - }, - { - "epoch": 1.0120253578197504, - "grad_norm": 0.403889924287796, - "learning_rate": 7.628492403644519e-06, - "loss": 0.2805, - "step": 15485 - }, - { - "epoch": 1.0120907130252925, - "grad_norm": 0.4771665036678314, - "learning_rate": 7.6281953514312125e-06, - "loss": 0.3715, - "step": 15486 - }, - { - "epoch": 1.0121560682308346, - "grad_norm": 0.44601204991340637, - "learning_rate": 7.627898286399289e-06, - "loss": 0.3322, - "step": 15487 - }, - { - "epoch": 1.0122214234363767, - "grad_norm": 0.4793049097061157, - "learning_rate": 7.627601208550195e-06, - "loss": 0.3865, - "step": 15488 - }, - { - "epoch": 1.0122867786419187, - "grad_norm": 0.45158910751342773, - "learning_rate": 7.627304117885379e-06, - "loss": 0.3605, - "step": 15489 - }, - { - "epoch": 1.012352133847461, - "grad_norm": 0.6445653438568115, - "learning_rate": 7.6270070144062914e-06, - "loss": 0.3375, - "step": 15490 - }, - { - "epoch": 1.012417489053003, - "grad_norm": 0.43588805198669434, - "learning_rate": 7.626709898114379e-06, - "loss": 0.3352, - "step": 15491 - }, - { - "epoch": 1.0124828442585452, - "grad_norm": 0.4423539638519287, - "learning_rate": 7.626412769011095e-06, - "loss": 0.3427, - "step": 15492 - }, - { - "epoch": 1.0125481994640873, - "grad_norm": 0.45310357213020325, - "learning_rate": 7.626115627097883e-06, - "loss": 0.3426, - "step": 15493 - }, - { - "epoch": 1.0126135546696295, - "grad_norm": 0.5024445056915283, - "learning_rate": 7.625818472376198e-06, - "loss": 0.3781, - "step": 15494 - }, - { - "epoch": 1.0126789098751716, - "grad_norm": 0.46750733256340027, - "learning_rate": 7.625521304847484e-06, - "loss": 0.3281, - "step": 15495 - }, - { - "epoch": 1.0127442650807137, - "grad_norm": 0.4888159930706024, - "learning_rate": 7.625224124513195e-06, - "loss": 0.3946, - "step": 15496 - }, - { - "epoch": 1.0128096202862558, - "grad_norm": 0.4495035707950592, - "learning_rate": 7.624926931374777e-06, - "loss": 0.356, - "step": 15497 - }, - { - "epoch": 1.0128749754917978, - "grad_norm": 0.44272077083587646, - "learning_rate": 7.624629725433683e-06, - "loss": 0.3578, - "step": 15498 - }, - { - "epoch": 1.0129403306973401, - "grad_norm": 0.44645029306411743, - "learning_rate": 7.62433250669136e-06, - "loss": 0.3423, - "step": 15499 - }, - { - "epoch": 1.0130056859028822, - "grad_norm": 0.45229387283325195, - "learning_rate": 7.624035275149258e-06, - "loss": 0.3625, - "step": 15500 - }, - { - "epoch": 1.0130710411084243, - "grad_norm": 0.5196219682693481, - "learning_rate": 7.623738030808826e-06, - "loss": 0.4103, - "step": 15501 - }, - { - "epoch": 1.0131363963139663, - "grad_norm": 0.442560613155365, - "learning_rate": 7.623440773671514e-06, - "loss": 0.3557, - "step": 15502 - }, - { - "epoch": 1.0132017515195084, - "grad_norm": 0.4808710515499115, - "learning_rate": 7.623143503738774e-06, - "loss": 0.3612, - "step": 15503 - }, - { - "epoch": 1.0132671067250507, - "grad_norm": 0.45476454496383667, - "learning_rate": 7.622846221012055e-06, - "loss": 0.3635, - "step": 15504 - }, - { - "epoch": 1.0133324619305928, - "grad_norm": 0.49299511313438416, - "learning_rate": 7.622548925492805e-06, - "loss": 0.3837, - "step": 15505 - }, - { - "epoch": 1.0133978171361349, - "grad_norm": 0.4372859299182892, - "learning_rate": 7.622251617182477e-06, - "loss": 0.3665, - "step": 15506 - }, - { - "epoch": 1.013463172341677, - "grad_norm": 0.4558941423892975, - "learning_rate": 7.6219542960825165e-06, - "loss": 0.3644, - "step": 15507 - }, - { - "epoch": 1.0135285275472192, - "grad_norm": 0.4683453440666199, - "learning_rate": 7.621656962194379e-06, - "loss": 0.379, - "step": 15508 - }, - { - "epoch": 1.0135938827527613, - "grad_norm": 0.46672216057777405, - "learning_rate": 7.621359615519512e-06, - "loss": 0.3532, - "step": 15509 - }, - { - "epoch": 1.0136592379583034, - "grad_norm": 0.45947110652923584, - "learning_rate": 7.621062256059366e-06, - "loss": 0.3599, - "step": 15510 - }, - { - "epoch": 1.0137245931638454, - "grad_norm": 0.4729503095149994, - "learning_rate": 7.62076488381539e-06, - "loss": 0.3569, - "step": 15511 - }, - { - "epoch": 1.0137899483693875, - "grad_norm": 0.46331289410591125, - "learning_rate": 7.620467498789038e-06, - "loss": 0.3518, - "step": 15512 - }, - { - "epoch": 1.0138553035749298, - "grad_norm": 0.4517684578895569, - "learning_rate": 7.620170100981756e-06, - "loss": 0.3554, - "step": 15513 - }, - { - "epoch": 1.0139206587804719, - "grad_norm": 0.43650633096694946, - "learning_rate": 7.619872690394998e-06, - "loss": 0.3244, - "step": 15514 - }, - { - "epoch": 1.013986013986014, - "grad_norm": 0.4438323378562927, - "learning_rate": 7.619575267030213e-06, - "loss": 0.3476, - "step": 15515 - }, - { - "epoch": 1.014051369191556, - "grad_norm": 0.44066789746284485, - "learning_rate": 7.619277830888851e-06, - "loss": 0.3298, - "step": 15516 - }, - { - "epoch": 1.0141167243970983, - "grad_norm": 0.4373476803302765, - "learning_rate": 7.618980381972365e-06, - "loss": 0.342, - "step": 15517 - }, - { - "epoch": 1.0141820796026404, - "grad_norm": 0.4374128580093384, - "learning_rate": 7.618682920282203e-06, - "loss": 0.3365, - "step": 15518 - }, - { - "epoch": 1.0142474348081825, - "grad_norm": 0.4472385048866272, - "learning_rate": 7.618385445819819e-06, - "loss": 0.3203, - "step": 15519 - }, - { - "epoch": 1.0143127900137245, - "grad_norm": 0.4670279324054718, - "learning_rate": 7.618087958586661e-06, - "loss": 0.3559, - "step": 15520 - }, - { - "epoch": 1.0143781452192666, - "grad_norm": 0.43676018714904785, - "learning_rate": 7.617790458584181e-06, - "loss": 0.3416, - "step": 15521 - }, - { - "epoch": 1.014443500424809, - "grad_norm": 0.42490100860595703, - "learning_rate": 7.617492945813829e-06, - "loss": 0.3344, - "step": 15522 - }, - { - "epoch": 1.014508855630351, - "grad_norm": 0.43293091654777527, - "learning_rate": 7.617195420277059e-06, - "loss": 0.3396, - "step": 15523 - }, - { - "epoch": 1.014574210835893, - "grad_norm": 0.4176598787307739, - "learning_rate": 7.6168978819753204e-06, - "loss": 0.31, - "step": 15524 - }, - { - "epoch": 1.0146395660414351, - "grad_norm": 0.43753165006637573, - "learning_rate": 7.6166003309100626e-06, - "loss": 0.3, - "step": 15525 - }, - { - "epoch": 1.0147049212469774, - "grad_norm": 0.4823523163795471, - "learning_rate": 7.61630276708274e-06, - "loss": 0.354, - "step": 15526 - }, - { - "epoch": 1.0147702764525195, - "grad_norm": 0.4753963053226471, - "learning_rate": 7.616005190494802e-06, - "loss": 0.3897, - "step": 15527 - }, - { - "epoch": 1.0148356316580616, - "grad_norm": 0.4880515933036804, - "learning_rate": 7.615707601147699e-06, - "loss": 0.4284, - "step": 15528 - }, - { - "epoch": 1.0149009868636036, - "grad_norm": 0.4545324146747589, - "learning_rate": 7.615409999042886e-06, - "loss": 0.3765, - "step": 15529 - }, - { - "epoch": 1.0149663420691457, - "grad_norm": 0.45359712839126587, - "learning_rate": 7.615112384181811e-06, - "loss": 0.3638, - "step": 15530 - }, - { - "epoch": 1.015031697274688, - "grad_norm": 0.40306368470191956, - "learning_rate": 7.614814756565927e-06, - "loss": 0.2961, - "step": 15531 - }, - { - "epoch": 1.01509705248023, - "grad_norm": 0.47215649485588074, - "learning_rate": 7.614517116196686e-06, - "loss": 0.3792, - "step": 15532 - }, - { - "epoch": 1.0151624076857722, - "grad_norm": 0.5271449089050293, - "learning_rate": 7.614219463075539e-06, - "loss": 0.4131, - "step": 15533 - }, - { - "epoch": 1.0152277628913142, - "grad_norm": 0.46284154057502747, - "learning_rate": 7.6139217972039395e-06, - "loss": 0.3572, - "step": 15534 - }, - { - "epoch": 1.0152931180968565, - "grad_norm": 0.435781866312027, - "learning_rate": 7.613624118583336e-06, - "loss": 0.3287, - "step": 15535 - }, - { - "epoch": 1.0153584733023986, - "grad_norm": 0.40714776515960693, - "learning_rate": 7.613326427215182e-06, - "loss": 0.2723, - "step": 15536 - }, - { - "epoch": 1.0154238285079407, - "grad_norm": 0.4516788423061371, - "learning_rate": 7.6130287231009315e-06, - "loss": 0.2979, - "step": 15537 - }, - { - "epoch": 1.0154891837134827, - "grad_norm": 0.4320693016052246, - "learning_rate": 7.612731006242034e-06, - "loss": 0.3485, - "step": 15538 - }, - { - "epoch": 1.0155545389190248, - "grad_norm": 0.4644910395145416, - "learning_rate": 7.612433276639941e-06, - "loss": 0.3736, - "step": 15539 - }, - { - "epoch": 1.015619894124567, - "grad_norm": 0.53074711561203, - "learning_rate": 7.6121355342961065e-06, - "loss": 0.3927, - "step": 15540 - }, - { - "epoch": 1.0156852493301092, - "grad_norm": 0.4280640184879303, - "learning_rate": 7.611837779211982e-06, - "loss": 0.3167, - "step": 15541 - }, - { - "epoch": 1.0157506045356512, - "grad_norm": 0.44264957308769226, - "learning_rate": 7.6115400113890205e-06, - "loss": 0.338, - "step": 15542 - }, - { - "epoch": 1.0158159597411933, - "grad_norm": 0.46898049116134644, - "learning_rate": 7.611242230828673e-06, - "loss": 0.3833, - "step": 15543 - }, - { - "epoch": 1.0158813149467356, - "grad_norm": 0.46865010261535645, - "learning_rate": 7.610944437532393e-06, - "loss": 0.3781, - "step": 15544 - }, - { - "epoch": 1.0159466701522777, - "grad_norm": 0.46959206461906433, - "learning_rate": 7.610646631501632e-06, - "loss": 0.3463, - "step": 15545 - }, - { - "epoch": 1.0160120253578198, - "grad_norm": 0.41981449723243713, - "learning_rate": 7.610348812737845e-06, - "loss": 0.3069, - "step": 15546 - }, - { - "epoch": 1.0160773805633618, - "grad_norm": 0.47139954566955566, - "learning_rate": 7.610050981242479e-06, - "loss": 0.3744, - "step": 15547 - }, - { - "epoch": 1.016142735768904, - "grad_norm": 0.4501635432243347, - "learning_rate": 7.609753137016993e-06, - "loss": 0.3554, - "step": 15548 - }, - { - "epoch": 1.0162080909744462, - "grad_norm": 0.44787856936454773, - "learning_rate": 7.609455280062834e-06, - "loss": 0.3349, - "step": 15549 - }, - { - "epoch": 1.0162734461799883, - "grad_norm": 0.43762820959091187, - "learning_rate": 7.609157410381461e-06, - "loss": 0.3561, - "step": 15550 - }, - { - "epoch": 1.0163388013855303, - "grad_norm": 0.4427582621574402, - "learning_rate": 7.60885952797432e-06, - "loss": 0.3086, - "step": 15551 - }, - { - "epoch": 1.0164041565910724, - "grad_norm": 0.4594787061214447, - "learning_rate": 7.60856163284287e-06, - "loss": 0.3906, - "step": 15552 - }, - { - "epoch": 1.0164695117966147, - "grad_norm": 0.44954901933670044, - "learning_rate": 7.60826372498856e-06, - "loss": 0.322, - "step": 15553 - }, - { - "epoch": 1.0165348670021568, - "grad_norm": 0.4757879674434662, - "learning_rate": 7.607965804412845e-06, - "loss": 0.4043, - "step": 15554 - }, - { - "epoch": 1.0166002222076989, - "grad_norm": 0.4479594826698303, - "learning_rate": 7.607667871117177e-06, - "loss": 0.358, - "step": 15555 - }, - { - "epoch": 1.016665577413241, - "grad_norm": 0.42708635330200195, - "learning_rate": 7.60736992510301e-06, - "loss": 0.3367, - "step": 15556 - }, - { - "epoch": 1.016730932618783, - "grad_norm": 0.44823840260505676, - "learning_rate": 7.6070719663717955e-06, - "loss": 0.3242, - "step": 15557 - }, - { - "epoch": 1.0167962878243253, - "grad_norm": 0.4422805905342102, - "learning_rate": 7.606773994924987e-06, - "loss": 0.3226, - "step": 15558 - }, - { - "epoch": 1.0168616430298674, - "grad_norm": 0.4667592942714691, - "learning_rate": 7.60647601076404e-06, - "loss": 0.3558, - "step": 15559 - }, - { - "epoch": 1.0169269982354094, - "grad_norm": 0.4609525203704834, - "learning_rate": 7.606178013890406e-06, - "loss": 0.3569, - "step": 15560 - }, - { - "epoch": 1.0169923534409515, - "grad_norm": 0.42644402384757996, - "learning_rate": 7.605880004305539e-06, - "loss": 0.3427, - "step": 15561 - }, - { - "epoch": 1.0170577086464936, - "grad_norm": 0.4584433138370514, - "learning_rate": 7.6055819820108926e-06, - "loss": 0.3281, - "step": 15562 - }, - { - "epoch": 1.0171230638520359, - "grad_norm": 0.4662013649940491, - "learning_rate": 7.605283947007921e-06, - "loss": 0.3769, - "step": 15563 - }, - { - "epoch": 1.017188419057578, - "grad_norm": 0.5001772046089172, - "learning_rate": 7.604985899298077e-06, - "loss": 0.3784, - "step": 15564 - }, - { - "epoch": 1.01725377426312, - "grad_norm": 0.4466564655303955, - "learning_rate": 7.604687838882813e-06, - "loss": 0.3573, - "step": 15565 - }, - { - "epoch": 1.017319129468662, - "grad_norm": 0.45362588763237, - "learning_rate": 7.6043897657635846e-06, - "loss": 0.3724, - "step": 15566 - }, - { - "epoch": 1.0173844846742044, - "grad_norm": 0.44119858741760254, - "learning_rate": 7.604091679941845e-06, - "loss": 0.3187, - "step": 15567 - }, - { - "epoch": 1.0174498398797465, - "grad_norm": 0.47164979577064514, - "learning_rate": 7.603793581419048e-06, - "loss": 0.3772, - "step": 15568 - }, - { - "epoch": 1.0175151950852885, - "grad_norm": 0.4713986814022064, - "learning_rate": 7.603495470196648e-06, - "loss": 0.3581, - "step": 15569 - }, - { - "epoch": 1.0175805502908306, - "grad_norm": 0.4711362421512604, - "learning_rate": 7.603197346276099e-06, - "loss": 0.3672, - "step": 15570 - }, - { - "epoch": 1.0176459054963727, - "grad_norm": 0.43596675992012024, - "learning_rate": 7.602899209658854e-06, - "loss": 0.3263, - "step": 15571 - }, - { - "epoch": 1.017711260701915, - "grad_norm": 0.470787912607193, - "learning_rate": 7.602601060346369e-06, - "loss": 0.313, - "step": 15572 - }, - { - "epoch": 1.017776615907457, - "grad_norm": 0.4555983245372772, - "learning_rate": 7.602302898340096e-06, - "loss": 0.3734, - "step": 15573 - }, - { - "epoch": 1.0178419711129991, - "grad_norm": 0.4501747488975525, - "learning_rate": 7.60200472364149e-06, - "loss": 0.3384, - "step": 15574 - }, - { - "epoch": 1.0179073263185412, - "grad_norm": 0.4738941490650177, - "learning_rate": 7.601706536252007e-06, - "loss": 0.3629, - "step": 15575 - }, - { - "epoch": 1.0179726815240835, - "grad_norm": 0.44299745559692383, - "learning_rate": 7.601408336173098e-06, - "loss": 0.3284, - "step": 15576 - }, - { - "epoch": 1.0180380367296256, - "grad_norm": 0.4432782530784607, - "learning_rate": 7.601110123406221e-06, - "loss": 0.3249, - "step": 15577 - }, - { - "epoch": 1.0181033919351676, - "grad_norm": 0.44111546874046326, - "learning_rate": 7.600811897952827e-06, - "loss": 0.3534, - "step": 15578 - }, - { - "epoch": 1.0181687471407097, - "grad_norm": 0.46954017877578735, - "learning_rate": 7.600513659814375e-06, - "loss": 0.3831, - "step": 15579 - }, - { - "epoch": 1.0182341023462518, - "grad_norm": 0.4363420903682709, - "learning_rate": 7.600215408992314e-06, - "loss": 0.3443, - "step": 15580 - }, - { - "epoch": 1.018299457551794, - "grad_norm": 0.4859357178211212, - "learning_rate": 7.5999171454881024e-06, - "loss": 0.4015, - "step": 15581 - }, - { - "epoch": 1.0183648127573361, - "grad_norm": 0.43814557790756226, - "learning_rate": 7.599618869303196e-06, - "loss": 0.3096, - "step": 15582 - }, - { - "epoch": 1.0184301679628782, - "grad_norm": 0.44182729721069336, - "learning_rate": 7.599320580439047e-06, - "loss": 0.331, - "step": 15583 - }, - { - "epoch": 1.0184955231684203, - "grad_norm": 0.45808476209640503, - "learning_rate": 7.5990222788971105e-06, - "loss": 0.3625, - "step": 15584 - }, - { - "epoch": 1.0185608783739626, - "grad_norm": 0.45872604846954346, - "learning_rate": 7.59872396467884e-06, - "loss": 0.3723, - "step": 15585 - }, - { - "epoch": 1.0186262335795047, - "grad_norm": 0.44794178009033203, - "learning_rate": 7.598425637785696e-06, - "loss": 0.3518, - "step": 15586 - }, - { - "epoch": 1.0186915887850467, - "grad_norm": 0.4414597153663635, - "learning_rate": 7.598127298219128e-06, - "loss": 0.349, - "step": 15587 - }, - { - "epoch": 1.0187569439905888, - "grad_norm": 0.42647597193717957, - "learning_rate": 7.5978289459805946e-06, - "loss": 0.3371, - "step": 15588 - }, - { - "epoch": 1.0188222991961309, - "grad_norm": 0.47627365589141846, - "learning_rate": 7.5975305810715476e-06, - "loss": 0.4082, - "step": 15589 - }, - { - "epoch": 1.0188876544016732, - "grad_norm": 0.4444546103477478, - "learning_rate": 7.597232203493446e-06, - "loss": 0.3418, - "step": 15590 - }, - { - "epoch": 1.0189530096072152, - "grad_norm": 0.43940412998199463, - "learning_rate": 7.596933813247741e-06, - "loss": 0.3608, - "step": 15591 - }, - { - "epoch": 1.0190183648127573, - "grad_norm": 0.4503110349178314, - "learning_rate": 7.596635410335891e-06, - "loss": 0.35, - "step": 15592 - }, - { - "epoch": 1.0190837200182994, - "grad_norm": 0.4423142373561859, - "learning_rate": 7.596336994759351e-06, - "loss": 0.3254, - "step": 15593 - }, - { - "epoch": 1.0191490752238417, - "grad_norm": 0.43201929330825806, - "learning_rate": 7.596038566519574e-06, - "loss": 0.3319, - "step": 15594 - }, - { - "epoch": 1.0192144304293838, - "grad_norm": 0.471842885017395, - "learning_rate": 7.59574012561802e-06, - "loss": 0.3944, - "step": 15595 - }, - { - "epoch": 1.0192797856349258, - "grad_norm": 0.4996616840362549, - "learning_rate": 7.595441672056143e-06, - "loss": 0.2886, - "step": 15596 - }, - { - "epoch": 1.019345140840468, - "grad_norm": 0.4460655748844147, - "learning_rate": 7.595143205835396e-06, - "loss": 0.3637, - "step": 15597 - }, - { - "epoch": 1.01941049604601, - "grad_norm": 0.4370627999305725, - "learning_rate": 7.5948447269572365e-06, - "loss": 0.3552, - "step": 15598 - }, - { - "epoch": 1.0194758512515523, - "grad_norm": 0.43534135818481445, - "learning_rate": 7.594546235423122e-06, - "loss": 0.3304, - "step": 15599 - }, - { - "epoch": 1.0195412064570943, - "grad_norm": 0.44917362928390503, - "learning_rate": 7.594247731234504e-06, - "loss": 0.3691, - "step": 15600 - }, - { - "epoch": 1.0196065616626364, - "grad_norm": 0.4325159192085266, - "learning_rate": 7.593949214392843e-06, - "loss": 0.3009, - "step": 15601 - }, - { - "epoch": 1.0196719168681785, - "grad_norm": 0.4442124664783478, - "learning_rate": 7.593650684899593e-06, - "loss": 0.3392, - "step": 15602 - }, - { - "epoch": 1.0197372720737208, - "grad_norm": 0.4569205939769745, - "learning_rate": 7.593352142756209e-06, - "loss": 0.3585, - "step": 15603 - }, - { - "epoch": 1.0198026272792629, - "grad_norm": 0.42676761746406555, - "learning_rate": 7.59305358796415e-06, - "loss": 0.2972, - "step": 15604 - }, - { - "epoch": 1.019867982484805, - "grad_norm": 0.4391583204269409, - "learning_rate": 7.592755020524867e-06, - "loss": 0.3521, - "step": 15605 - }, - { - "epoch": 1.019933337690347, - "grad_norm": 0.45182445645332336, - "learning_rate": 7.592456440439823e-06, - "loss": 0.322, - "step": 15606 - }, - { - "epoch": 1.019998692895889, - "grad_norm": 0.45521080493927, - "learning_rate": 7.592157847710468e-06, - "loss": 0.3542, - "step": 15607 - }, - { - "epoch": 1.0200640481014314, - "grad_norm": 0.4373794198036194, - "learning_rate": 7.591859242338265e-06, - "loss": 0.3233, - "step": 15608 - }, - { - "epoch": 1.0201294033069734, - "grad_norm": 0.5093589425086975, - "learning_rate": 7.591560624324662e-06, - "loss": 0.3061, - "step": 15609 - }, - { - "epoch": 1.0201947585125155, - "grad_norm": 0.4563637673854828, - "learning_rate": 7.591261993671122e-06, - "loss": 0.3382, - "step": 15610 - }, - { - "epoch": 1.0202601137180576, - "grad_norm": 0.4290340542793274, - "learning_rate": 7.5909633503791015e-06, - "loss": 0.3262, - "step": 15611 - }, - { - "epoch": 1.0203254689235997, - "grad_norm": 0.43287280201911926, - "learning_rate": 7.5906646944500515e-06, - "loss": 0.3256, - "step": 15612 - }, - { - "epoch": 1.020390824129142, - "grad_norm": 0.46789512038230896, - "learning_rate": 7.590366025885435e-06, - "loss": 0.3841, - "step": 15613 - }, - { - "epoch": 1.020456179334684, - "grad_norm": 0.4521138668060303, - "learning_rate": 7.5900673446867045e-06, - "loss": 0.325, - "step": 15614 - }, - { - "epoch": 1.020521534540226, - "grad_norm": 0.44588905572891235, - "learning_rate": 7.5897686508553205e-06, - "loss": 0.3001, - "step": 15615 - }, - { - "epoch": 1.0205868897457682, - "grad_norm": 0.4593271017074585, - "learning_rate": 7.5894699443927355e-06, - "loss": 0.3669, - "step": 15616 - }, - { - "epoch": 1.0206522449513105, - "grad_norm": 0.427633672952652, - "learning_rate": 7.589171225300409e-06, - "loss": 0.3014, - "step": 15617 - }, - { - "epoch": 1.0207176001568525, - "grad_norm": 0.4613807797431946, - "learning_rate": 7.588872493579798e-06, - "loss": 0.337, - "step": 15618 - }, - { - "epoch": 1.0207829553623946, - "grad_norm": 0.458882600069046, - "learning_rate": 7.588573749232359e-06, - "loss": 0.3357, - "step": 15619 - }, - { - "epoch": 1.0208483105679367, - "grad_norm": 0.473646342754364, - "learning_rate": 7.588274992259548e-06, - "loss": 0.3369, - "step": 15620 - }, - { - "epoch": 1.0209136657734788, - "grad_norm": 0.451913058757782, - "learning_rate": 7.587976222662824e-06, - "loss": 0.3612, - "step": 15621 - }, - { - "epoch": 1.020979020979021, - "grad_norm": 0.4600163400173187, - "learning_rate": 7.587677440443643e-06, - "loss": 0.3463, - "step": 15622 - }, - { - "epoch": 1.0210443761845631, - "grad_norm": 0.43435609340667725, - "learning_rate": 7.587378645603463e-06, - "loss": 0.3294, - "step": 15623 - }, - { - "epoch": 1.0211097313901052, - "grad_norm": 0.43905115127563477, - "learning_rate": 7.58707983814374e-06, - "loss": 0.3392, - "step": 15624 - }, - { - "epoch": 1.0211750865956473, - "grad_norm": 0.4720829725265503, - "learning_rate": 7.586781018065934e-06, - "loss": 0.349, - "step": 15625 - }, - { - "epoch": 1.0212404418011896, - "grad_norm": 0.46056434512138367, - "learning_rate": 7.5864821853714995e-06, - "loss": 0.362, - "step": 15626 - }, - { - "epoch": 1.0213057970067316, - "grad_norm": 0.44007593393325806, - "learning_rate": 7.5861833400618965e-06, - "loss": 0.3156, - "step": 15627 - }, - { - "epoch": 1.0213711522122737, - "grad_norm": 0.4564206898212433, - "learning_rate": 7.585884482138581e-06, - "loss": 0.3777, - "step": 15628 - }, - { - "epoch": 1.0214365074178158, - "grad_norm": 0.45567262172698975, - "learning_rate": 7.58558561160301e-06, - "loss": 0.3566, - "step": 15629 - }, - { - "epoch": 1.0215018626233578, - "grad_norm": 0.4550042748451233, - "learning_rate": 7.585286728456643e-06, - "loss": 0.3754, - "step": 15630 - }, - { - "epoch": 1.0215672178289001, - "grad_norm": 0.4357951879501343, - "learning_rate": 7.5849878327009385e-06, - "loss": 0.333, - "step": 15631 - }, - { - "epoch": 1.0216325730344422, - "grad_norm": 0.47056224942207336, - "learning_rate": 7.58468892433735e-06, - "loss": 0.3795, - "step": 15632 - }, - { - "epoch": 1.0216979282399843, - "grad_norm": 0.469344824552536, - "learning_rate": 7.58439000336734e-06, - "loss": 0.3319, - "step": 15633 - }, - { - "epoch": 1.0217632834455264, - "grad_norm": 0.42325741052627563, - "learning_rate": 7.584091069792363e-06, - "loss": 0.3211, - "step": 15634 - }, - { - "epoch": 1.0218286386510687, - "grad_norm": 0.47051575779914856, - "learning_rate": 7.583792123613881e-06, - "loss": 0.3919, - "step": 15635 - }, - { - "epoch": 1.0218939938566107, - "grad_norm": 0.4938119053840637, - "learning_rate": 7.583493164833349e-06, - "loss": 0.3704, - "step": 15636 - }, - { - "epoch": 1.0219593490621528, - "grad_norm": 0.48237931728363037, - "learning_rate": 7.583194193452224e-06, - "loss": 0.3793, - "step": 15637 - }, - { - "epoch": 1.0220247042676949, - "grad_norm": 0.4226386845111847, - "learning_rate": 7.582895209471969e-06, - "loss": 0.3399, - "step": 15638 - }, - { - "epoch": 1.022090059473237, - "grad_norm": 0.4548985958099365, - "learning_rate": 7.582596212894038e-06, - "loss": 0.3411, - "step": 15639 - }, - { - "epoch": 1.0221554146787792, - "grad_norm": 0.47761985659599304, - "learning_rate": 7.58229720371989e-06, - "loss": 0.3662, - "step": 15640 - }, - { - "epoch": 1.0222207698843213, - "grad_norm": 0.4808357059955597, - "learning_rate": 7.581998181950985e-06, - "loss": 0.3728, - "step": 15641 - }, - { - "epoch": 1.0222861250898634, - "grad_norm": 0.4505424201488495, - "learning_rate": 7.5816991475887795e-06, - "loss": 0.3426, - "step": 15642 - }, - { - "epoch": 1.0223514802954055, - "grad_norm": 0.4237038493156433, - "learning_rate": 7.581400100634732e-06, - "loss": 0.2975, - "step": 15643 - }, - { - "epoch": 1.0224168355009478, - "grad_norm": 0.5180762410163879, - "learning_rate": 7.581101041090303e-06, - "loss": 0.4273, - "step": 15644 - }, - { - "epoch": 1.0224821907064898, - "grad_norm": 0.45106804370880127, - "learning_rate": 7.58080196895695e-06, - "loss": 0.3688, - "step": 15645 - }, - { - "epoch": 1.022547545912032, - "grad_norm": 0.44238874316215515, - "learning_rate": 7.580502884236132e-06, - "loss": 0.3661, - "step": 15646 - }, - { - "epoch": 1.022612901117574, - "grad_norm": 0.4164060652256012, - "learning_rate": 7.580203786929305e-06, - "loss": 0.3183, - "step": 15647 - }, - { - "epoch": 1.022678256323116, - "grad_norm": 0.4736102223396301, - "learning_rate": 7.5799046770379335e-06, - "loss": 0.3742, - "step": 15648 - }, - { - "epoch": 1.0227436115286583, - "grad_norm": 0.45015087723731995, - "learning_rate": 7.5796055545634716e-06, - "loss": 0.3235, - "step": 15649 - }, - { - "epoch": 1.0228089667342004, - "grad_norm": 0.4537907540798187, - "learning_rate": 7.5793064195073805e-06, - "loss": 0.3773, - "step": 15650 - }, - { - "epoch": 1.0228743219397425, - "grad_norm": 0.4417157769203186, - "learning_rate": 7.579007271871118e-06, - "loss": 0.3503, - "step": 15651 - }, - { - "epoch": 1.0229396771452846, - "grad_norm": 0.47209954261779785, - "learning_rate": 7.578708111656143e-06, - "loss": 0.3604, - "step": 15652 - }, - { - "epoch": 1.0230050323508268, - "grad_norm": 0.4348152279853821, - "learning_rate": 7.578408938863917e-06, - "loss": 0.2944, - "step": 15653 - }, - { - "epoch": 1.023070387556369, - "grad_norm": 0.4781006872653961, - "learning_rate": 7.578109753495895e-06, - "loss": 0.3449, - "step": 15654 - }, - { - "epoch": 1.023135742761911, - "grad_norm": 0.45873141288757324, - "learning_rate": 7.577810555553539e-06, - "loss": 0.3276, - "step": 15655 - }, - { - "epoch": 1.023201097967453, - "grad_norm": 0.454061895608902, - "learning_rate": 7.577511345038311e-06, - "loss": 0.3653, - "step": 15656 - }, - { - "epoch": 1.0232664531729951, - "grad_norm": 0.4541439116001129, - "learning_rate": 7.577212121951664e-06, - "loss": 0.3379, - "step": 15657 - }, - { - "epoch": 1.0233318083785374, - "grad_norm": 0.43676990270614624, - "learning_rate": 7.576912886295063e-06, - "loss": 0.3348, - "step": 15658 - }, - { - "epoch": 1.0233971635840795, - "grad_norm": 0.43730729818344116, - "learning_rate": 7.5766136380699625e-06, - "loss": 0.3143, - "step": 15659 - }, - { - "epoch": 1.0234625187896216, - "grad_norm": 0.459166944026947, - "learning_rate": 7.5763143772778265e-06, - "loss": 0.3468, - "step": 15660 - }, - { - "epoch": 1.0235278739951637, - "grad_norm": 0.4604553282260895, - "learning_rate": 7.576015103920111e-06, - "loss": 0.37, - "step": 15661 - }, - { - "epoch": 1.023593229200706, - "grad_norm": 0.42918458580970764, - "learning_rate": 7.575715817998279e-06, - "loss": 0.3299, - "step": 15662 - }, - { - "epoch": 1.023658584406248, - "grad_norm": 0.45924463868141174, - "learning_rate": 7.575416519513787e-06, - "loss": 0.3286, - "step": 15663 - }, - { - "epoch": 1.02372393961179, - "grad_norm": 0.4635273814201355, - "learning_rate": 7.575117208468099e-06, - "loss": 0.3547, - "step": 15664 - }, - { - "epoch": 1.0237892948173322, - "grad_norm": 0.5107904672622681, - "learning_rate": 7.574817884862671e-06, - "loss": 0.4042, - "step": 15665 - }, - { - "epoch": 1.0238546500228742, - "grad_norm": 0.4265628457069397, - "learning_rate": 7.574518548698964e-06, - "loss": 0.3104, - "step": 15666 - }, - { - "epoch": 1.0239200052284165, - "grad_norm": 0.44664040207862854, - "learning_rate": 7.574219199978438e-06, - "loss": 0.3492, - "step": 15667 - }, - { - "epoch": 1.0239853604339586, - "grad_norm": 0.4648476541042328, - "learning_rate": 7.573919838702553e-06, - "loss": 0.3439, - "step": 15668 - }, - { - "epoch": 1.0240507156395007, - "grad_norm": 0.46731606125831604, - "learning_rate": 7.57362046487277e-06, - "loss": 0.3724, - "step": 15669 - }, - { - "epoch": 1.0241160708450427, - "grad_norm": 0.39791247248649597, - "learning_rate": 7.573321078490548e-06, - "loss": 0.2588, - "step": 15670 - }, - { - "epoch": 1.0241814260505848, - "grad_norm": 0.4004862308502197, - "learning_rate": 7.573021679557347e-06, - "loss": 0.3032, - "step": 15671 - }, - { - "epoch": 1.0242467812561271, - "grad_norm": 0.47330042719841003, - "learning_rate": 7.572722268074628e-06, - "loss": 0.3475, - "step": 15672 - }, - { - "epoch": 1.0243121364616692, - "grad_norm": 0.45570123195648193, - "learning_rate": 7.572422844043852e-06, - "loss": 0.3452, - "step": 15673 - }, - { - "epoch": 1.0243774916672113, - "grad_norm": 0.4535650908946991, - "learning_rate": 7.572123407466479e-06, - "loss": 0.3785, - "step": 15674 - }, - { - "epoch": 1.0244428468727533, - "grad_norm": 0.4466755986213684, - "learning_rate": 7.571823958343968e-06, - "loss": 0.35, - "step": 15675 - }, - { - "epoch": 1.0245082020782956, - "grad_norm": 0.4653805196285248, - "learning_rate": 7.5715244966777816e-06, - "loss": 0.3861, - "step": 15676 - }, - { - "epoch": 1.0245735572838377, - "grad_norm": 0.4834197163581848, - "learning_rate": 7.571225022469377e-06, - "loss": 0.3795, - "step": 15677 - }, - { - "epoch": 1.0246389124893798, - "grad_norm": 0.46364825963974, - "learning_rate": 7.570925535720221e-06, - "loss": 0.3252, - "step": 15678 - }, - { - "epoch": 1.0247042676949218, - "grad_norm": 0.4515097737312317, - "learning_rate": 7.570626036431767e-06, - "loss": 0.3314, - "step": 15679 - }, - { - "epoch": 1.024769622900464, - "grad_norm": 0.4300325810909271, - "learning_rate": 7.570326524605482e-06, - "loss": 0.3063, - "step": 15680 - }, - { - "epoch": 1.0248349781060062, - "grad_norm": 0.4211832582950592, - "learning_rate": 7.570027000242823e-06, - "loss": 0.2805, - "step": 15681 - }, - { - "epoch": 1.0249003333115483, - "grad_norm": 0.4727987051010132, - "learning_rate": 7.569727463345251e-06, - "loss": 0.3884, - "step": 15682 - }, - { - "epoch": 1.0249656885170904, - "grad_norm": 0.4563107490539551, - "learning_rate": 7.569427913914229e-06, - "loss": 0.3434, - "step": 15683 - }, - { - "epoch": 1.0250310437226324, - "grad_norm": 0.5002644062042236, - "learning_rate": 7.569128351951217e-06, - "loss": 0.3987, - "step": 15684 - }, - { - "epoch": 1.0250963989281747, - "grad_norm": 0.4213293790817261, - "learning_rate": 7.5688287774576756e-06, - "loss": 0.2893, - "step": 15685 - }, - { - "epoch": 1.0251617541337168, - "grad_norm": 0.45721110701560974, - "learning_rate": 7.568529190435066e-06, - "loss": 0.3567, - "step": 15686 - }, - { - "epoch": 1.0252271093392589, - "grad_norm": 0.43511372804641724, - "learning_rate": 7.56822959088485e-06, - "loss": 0.3365, - "step": 15687 - }, - { - "epoch": 1.025292464544801, - "grad_norm": 0.4605725109577179, - "learning_rate": 7.567929978808488e-06, - "loss": 0.377, - "step": 15688 - }, - { - "epoch": 1.025357819750343, - "grad_norm": 0.4372442662715912, - "learning_rate": 7.567630354207443e-06, - "loss": 0.3495, - "step": 15689 - }, - { - "epoch": 1.0254231749558853, - "grad_norm": 0.4581087827682495, - "learning_rate": 7.567330717083174e-06, - "loss": 0.34, - "step": 15690 - }, - { - "epoch": 1.0254885301614274, - "grad_norm": 0.44667884707450867, - "learning_rate": 7.567031067437146e-06, - "loss": 0.3298, - "step": 15691 - }, - { - "epoch": 1.0255538853669695, - "grad_norm": 0.4301671087741852, - "learning_rate": 7.566731405270815e-06, - "loss": 0.3281, - "step": 15692 - }, - { - "epoch": 1.0256192405725115, - "grad_norm": 0.44388964772224426, - "learning_rate": 7.5664317305856485e-06, - "loss": 0.3654, - "step": 15693 - }, - { - "epoch": 1.0256845957780538, - "grad_norm": 0.4563218355178833, - "learning_rate": 7.566132043383105e-06, - "loss": 0.3612, - "step": 15694 - }, - { - "epoch": 1.025749950983596, - "grad_norm": 0.4254835546016693, - "learning_rate": 7.565832343664645e-06, - "loss": 0.322, - "step": 15695 - }, - { - "epoch": 1.025815306189138, - "grad_norm": 0.4602677822113037, - "learning_rate": 7.565532631431734e-06, - "loss": 0.3146, - "step": 15696 - }, - { - "epoch": 1.02588066139468, - "grad_norm": 0.4197689890861511, - "learning_rate": 7.565232906685829e-06, - "loss": 0.3123, - "step": 15697 - }, - { - "epoch": 1.025946016600222, - "grad_norm": 0.4866635501384735, - "learning_rate": 7.564933169428396e-06, - "loss": 0.3787, - "step": 15698 - }, - { - "epoch": 1.0260113718057644, - "grad_norm": 0.4042617976665497, - "learning_rate": 7.564633419660894e-06, - "loss": 0.2913, - "step": 15699 - }, - { - "epoch": 1.0260767270113065, - "grad_norm": 0.45293474197387695, - "learning_rate": 7.564333657384788e-06, - "loss": 0.3151, - "step": 15700 - }, - { - "epoch": 1.0261420822168486, - "grad_norm": 0.5300982594490051, - "learning_rate": 7.564033882601538e-06, - "loss": 0.4056, - "step": 15701 - }, - { - "epoch": 1.0262074374223906, - "grad_norm": 0.4827274680137634, - "learning_rate": 7.563734095312606e-06, - "loss": 0.3787, - "step": 15702 - }, - { - "epoch": 1.026272792627933, - "grad_norm": 0.44669026136398315, - "learning_rate": 7.563434295519454e-06, - "loss": 0.3386, - "step": 15703 - }, - { - "epoch": 1.026338147833475, - "grad_norm": 0.4354687035083771, - "learning_rate": 7.563134483223547e-06, - "loss": 0.3434, - "step": 15704 - }, - { - "epoch": 1.026403503039017, - "grad_norm": 0.4427279829978943, - "learning_rate": 7.562834658426342e-06, - "loss": 0.3398, - "step": 15705 - }, - { - "epoch": 1.0264688582445591, - "grad_norm": 0.43261969089508057, - "learning_rate": 7.562534821129307e-06, - "loss": 0.3103, - "step": 15706 - }, - { - "epoch": 1.0265342134501012, - "grad_norm": 0.4481985867023468, - "learning_rate": 7.562234971333901e-06, - "loss": 0.3434, - "step": 15707 - }, - { - "epoch": 1.0265995686556435, - "grad_norm": 0.48565202951431274, - "learning_rate": 7.561935109041588e-06, - "loss": 0.3725, - "step": 15708 - }, - { - "epoch": 1.0266649238611856, - "grad_norm": 0.46509653329849243, - "learning_rate": 7.56163523425383e-06, - "loss": 0.3693, - "step": 15709 - }, - { - "epoch": 1.0267302790667276, - "grad_norm": 0.41026821732521057, - "learning_rate": 7.561335346972088e-06, - "loss": 0.305, - "step": 15710 - }, - { - "epoch": 1.0267956342722697, - "grad_norm": 0.411944180727005, - "learning_rate": 7.561035447197828e-06, - "loss": 0.2877, - "step": 15711 - }, - { - "epoch": 1.026860989477812, - "grad_norm": 0.43637987971305847, - "learning_rate": 7.56073553493251e-06, - "loss": 0.3183, - "step": 15712 - }, - { - "epoch": 1.026926344683354, - "grad_norm": 0.4577435255050659, - "learning_rate": 7.560435610177599e-06, - "loss": 0.3603, - "step": 15713 - }, - { - "epoch": 1.0269916998888962, - "grad_norm": 0.4846012592315674, - "learning_rate": 7.560135672934554e-06, - "loss": 0.3273, - "step": 15714 - }, - { - "epoch": 1.0270570550944382, - "grad_norm": 0.4223800003528595, - "learning_rate": 7.559835723204842e-06, - "loss": 0.3203, - "step": 15715 - }, - { - "epoch": 1.0271224102999803, - "grad_norm": 0.44712093472480774, - "learning_rate": 7.559535760989924e-06, - "loss": 0.3608, - "step": 15716 - }, - { - "epoch": 1.0271877655055226, - "grad_norm": 0.4641527235507965, - "learning_rate": 7.559235786291264e-06, - "loss": 0.3396, - "step": 15717 - }, - { - "epoch": 1.0272531207110647, - "grad_norm": 0.4962295591831207, - "learning_rate": 7.558935799110324e-06, - "loss": 0.3735, - "step": 15718 - }, - { - "epoch": 1.0273184759166067, - "grad_norm": 0.4628616273403168, - "learning_rate": 7.558635799448567e-06, - "loss": 0.3774, - "step": 15719 - }, - { - "epoch": 1.0273838311221488, - "grad_norm": 0.4333202838897705, - "learning_rate": 7.558335787307458e-06, - "loss": 0.3188, - "step": 15720 - }, - { - "epoch": 1.0274491863276909, - "grad_norm": 0.4812498688697815, - "learning_rate": 7.5580357626884584e-06, - "loss": 0.36, - "step": 15721 - }, - { - "epoch": 1.0275145415332332, - "grad_norm": 0.45785200595855713, - "learning_rate": 7.5577357255930336e-06, - "loss": 0.3638, - "step": 15722 - }, - { - "epoch": 1.0275798967387753, - "grad_norm": 0.4334566593170166, - "learning_rate": 7.557435676022643e-06, - "loss": 0.3177, - "step": 15723 - }, - { - "epoch": 1.0276452519443173, - "grad_norm": 0.4395442306995392, - "learning_rate": 7.5571356139787546e-06, - "loss": 0.3307, - "step": 15724 - }, - { - "epoch": 1.0277106071498594, - "grad_norm": 0.44063910841941833, - "learning_rate": 7.55683553946283e-06, - "loss": 0.3013, - "step": 15725 - }, - { - "epoch": 1.0277759623554017, - "grad_norm": 0.3947782814502716, - "learning_rate": 7.5565354524763305e-06, - "loss": 0.251, - "step": 15726 - }, - { - "epoch": 1.0278413175609438, - "grad_norm": 0.45565927028656006, - "learning_rate": 7.556235353020725e-06, - "loss": 0.3636, - "step": 15727 - }, - { - "epoch": 1.0279066727664858, - "grad_norm": 0.437971830368042, - "learning_rate": 7.555935241097472e-06, - "loss": 0.328, - "step": 15728 - }, - { - "epoch": 1.027972027972028, - "grad_norm": 0.4572085440158844, - "learning_rate": 7.555635116708037e-06, - "loss": 0.3514, - "step": 15729 - }, - { - "epoch": 1.02803738317757, - "grad_norm": 0.4883074164390564, - "learning_rate": 7.555334979853886e-06, - "loss": 0.3838, - "step": 15730 - }, - { - "epoch": 1.0281027383831123, - "grad_norm": 0.45139598846435547, - "learning_rate": 7.555034830536479e-06, - "loss": 0.3583, - "step": 15731 - }, - { - "epoch": 1.0281680935886544, - "grad_norm": 0.461537629365921, - "learning_rate": 7.554734668757282e-06, - "loss": 0.3387, - "step": 15732 - }, - { - "epoch": 1.0282334487941964, - "grad_norm": 0.422015905380249, - "learning_rate": 7.554434494517762e-06, - "loss": 0.3131, - "step": 15733 - }, - { - "epoch": 1.0282988039997385, - "grad_norm": 0.4193853437900543, - "learning_rate": 7.5541343078193784e-06, - "loss": 0.3254, - "step": 15734 - }, - { - "epoch": 1.0283641592052808, - "grad_norm": 0.48123207688331604, - "learning_rate": 7.553834108663596e-06, - "loss": 0.3688, - "step": 15735 - }, - { - "epoch": 1.0284295144108229, - "grad_norm": 0.4750046730041504, - "learning_rate": 7.55353389705188e-06, - "loss": 0.3578, - "step": 15736 - }, - { - "epoch": 1.028494869616365, - "grad_norm": 0.44931864738464355, - "learning_rate": 7.553233672985695e-06, - "loss": 0.3192, - "step": 15737 - }, - { - "epoch": 1.028560224821907, - "grad_norm": 0.4558142125606537, - "learning_rate": 7.552933436466505e-06, - "loss": 0.3411, - "step": 15738 - }, - { - "epoch": 1.028625580027449, - "grad_norm": 0.4375033378601074, - "learning_rate": 7.552633187495774e-06, - "loss": 0.2989, - "step": 15739 - }, - { - "epoch": 1.0286909352329914, - "grad_norm": 0.45783933997154236, - "learning_rate": 7.5523329260749665e-06, - "loss": 0.3227, - "step": 15740 - }, - { - "epoch": 1.0287562904385334, - "grad_norm": 0.41327139735221863, - "learning_rate": 7.552032652205548e-06, - "loss": 0.2733, - "step": 15741 - }, - { - "epoch": 1.0288216456440755, - "grad_norm": 0.47870659828186035, - "learning_rate": 7.551732365888982e-06, - "loss": 0.3487, - "step": 15742 - }, - { - "epoch": 1.0288870008496176, - "grad_norm": 0.4363199770450592, - "learning_rate": 7.551432067126732e-06, - "loss": 0.3411, - "step": 15743 - }, - { - "epoch": 1.02895235605516, - "grad_norm": 0.4649825096130371, - "learning_rate": 7.5511317559202646e-06, - "loss": 0.3686, - "step": 15744 - }, - { - "epoch": 1.029017711260702, - "grad_norm": 0.44911760091781616, - "learning_rate": 7.550831432271045e-06, - "loss": 0.3485, - "step": 15745 - }, - { - "epoch": 1.029083066466244, - "grad_norm": 0.4803098440170288, - "learning_rate": 7.550531096180536e-06, - "loss": 0.4048, - "step": 15746 - }, - { - "epoch": 1.029148421671786, - "grad_norm": 0.4453189969062805, - "learning_rate": 7.550230747650202e-06, - "loss": 0.3577, - "step": 15747 - }, - { - "epoch": 1.0292137768773282, - "grad_norm": 0.4523233473300934, - "learning_rate": 7.54993038668151e-06, - "loss": 0.3569, - "step": 15748 - }, - { - "epoch": 1.0292791320828705, - "grad_norm": 0.4204012453556061, - "learning_rate": 7.549630013275924e-06, - "loss": 0.3072, - "step": 15749 - }, - { - "epoch": 1.0293444872884125, - "grad_norm": 0.44491085410118103, - "learning_rate": 7.549329627434909e-06, - "loss": 0.3342, - "step": 15750 - }, - { - "epoch": 1.0294098424939546, - "grad_norm": 0.4380131959915161, - "learning_rate": 7.549029229159932e-06, - "loss": 0.3425, - "step": 15751 - }, - { - "epoch": 1.0294751976994967, - "grad_norm": 0.42915481328964233, - "learning_rate": 7.548728818452456e-06, - "loss": 0.3047, - "step": 15752 - }, - { - "epoch": 1.029540552905039, - "grad_norm": 0.4336947500705719, - "learning_rate": 7.5484283953139445e-06, - "loss": 0.3354, - "step": 15753 - }, - { - "epoch": 1.029605908110581, - "grad_norm": 0.4183482527732849, - "learning_rate": 7.548127959745866e-06, - "loss": 0.2959, - "step": 15754 - }, - { - "epoch": 1.0296712633161231, - "grad_norm": 0.49265751242637634, - "learning_rate": 7.547827511749684e-06, - "loss": 0.4263, - "step": 15755 - }, - { - "epoch": 1.0297366185216652, - "grad_norm": 0.4421324133872986, - "learning_rate": 7.547527051326866e-06, - "loss": 0.308, - "step": 15756 - }, - { - "epoch": 1.0298019737272073, - "grad_norm": 0.442550927400589, - "learning_rate": 7.547226578478874e-06, - "loss": 0.3369, - "step": 15757 - }, - { - "epoch": 1.0298673289327496, - "grad_norm": 0.4535270929336548, - "learning_rate": 7.546926093207178e-06, - "loss": 0.3429, - "step": 15758 - }, - { - "epoch": 1.0299326841382916, - "grad_norm": 0.46581920981407166, - "learning_rate": 7.54662559551324e-06, - "loss": 0.3119, - "step": 15759 - }, - { - "epoch": 1.0299980393438337, - "grad_norm": 0.43248432874679565, - "learning_rate": 7.546325085398526e-06, - "loss": 0.3178, - "step": 15760 - }, - { - "epoch": 1.0300633945493758, - "grad_norm": 0.46498411893844604, - "learning_rate": 7.546024562864503e-06, - "loss": 0.383, - "step": 15761 - }, - { - "epoch": 1.030128749754918, - "grad_norm": 0.45315492153167725, - "learning_rate": 7.545724027912635e-06, - "loss": 0.3368, - "step": 15762 - }, - { - "epoch": 1.0301941049604602, - "grad_norm": 0.47556623816490173, - "learning_rate": 7.545423480544392e-06, - "loss": 0.3694, - "step": 15763 - }, - { - "epoch": 1.0302594601660022, - "grad_norm": 0.4934963583946228, - "learning_rate": 7.545122920761235e-06, - "loss": 0.3796, - "step": 15764 - }, - { - "epoch": 1.0303248153715443, - "grad_norm": 0.45008614659309387, - "learning_rate": 7.544822348564633e-06, - "loss": 0.3567, - "step": 15765 - }, - { - "epoch": 1.0303901705770864, - "grad_norm": 0.48054632544517517, - "learning_rate": 7.544521763956048e-06, - "loss": 0.3848, - "step": 15766 - }, - { - "epoch": 1.0304555257826287, - "grad_norm": 0.4531192183494568, - "learning_rate": 7.544221166936951e-06, - "loss": 0.38, - "step": 15767 - }, - { - "epoch": 1.0305208809881707, - "grad_norm": 0.47871488332748413, - "learning_rate": 7.543920557508806e-06, - "loss": 0.2815, - "step": 15768 - }, - { - "epoch": 1.0305862361937128, - "grad_norm": 0.4792271554470062, - "learning_rate": 7.543619935673079e-06, - "loss": 0.3528, - "step": 15769 - }, - { - "epoch": 1.0306515913992549, - "grad_norm": 0.4431339204311371, - "learning_rate": 7.543319301431235e-06, - "loss": 0.3456, - "step": 15770 - }, - { - "epoch": 1.0307169466047972, - "grad_norm": 0.45511385798454285, - "learning_rate": 7.543018654784743e-06, - "loss": 0.3498, - "step": 15771 - }, - { - "epoch": 1.0307823018103393, - "grad_norm": 0.44410762190818787, - "learning_rate": 7.542717995735068e-06, - "loss": 0.3237, - "step": 15772 - }, - { - "epoch": 1.0308476570158813, - "grad_norm": 0.43734708428382874, - "learning_rate": 7.542417324283675e-06, - "loss": 0.3369, - "step": 15773 - }, - { - "epoch": 1.0309130122214234, - "grad_norm": 0.42639923095703125, - "learning_rate": 7.542116640432035e-06, - "loss": 0.3141, - "step": 15774 - }, - { - "epoch": 1.0309783674269655, - "grad_norm": 0.4694978594779968, - "learning_rate": 7.541815944181609e-06, - "loss": 0.3422, - "step": 15775 - }, - { - "epoch": 1.0310437226325078, - "grad_norm": 0.45306161046028137, - "learning_rate": 7.541515235533866e-06, - "loss": 0.3126, - "step": 15776 - }, - { - "epoch": 1.0311090778380498, - "grad_norm": 0.4308871626853943, - "learning_rate": 7.5412145144902735e-06, - "loss": 0.3519, - "step": 15777 - }, - { - "epoch": 1.031174433043592, - "grad_norm": 0.4713728427886963, - "learning_rate": 7.540913781052297e-06, - "loss": 0.339, - "step": 15778 - }, - { - "epoch": 1.031239788249134, - "grad_norm": 0.49831151962280273, - "learning_rate": 7.5406130352214045e-06, - "loss": 0.3671, - "step": 15779 - }, - { - "epoch": 1.0313051434546763, - "grad_norm": 0.43398699164390564, - "learning_rate": 7.540312276999062e-06, - "loss": 0.3094, - "step": 15780 - }, - { - "epoch": 1.0313704986602183, - "grad_norm": 0.42147430777549744, - "learning_rate": 7.540011506386736e-06, - "loss": 0.3189, - "step": 15781 - }, - { - "epoch": 1.0314358538657604, - "grad_norm": 0.4849579632282257, - "learning_rate": 7.539710723385894e-06, - "loss": 0.3753, - "step": 15782 - }, - { - "epoch": 1.0315012090713025, - "grad_norm": 0.4356859028339386, - "learning_rate": 7.5394099279980025e-06, - "loss": 0.3375, - "step": 15783 - }, - { - "epoch": 1.0315665642768446, - "grad_norm": 0.43548744916915894, - "learning_rate": 7.539109120224529e-06, - "loss": 0.3086, - "step": 15784 - }, - { - "epoch": 1.0316319194823869, - "grad_norm": 0.40212714672088623, - "learning_rate": 7.538808300066943e-06, - "loss": 0.2989, - "step": 15785 - }, - { - "epoch": 1.031697274687929, - "grad_norm": 0.42388996481895447, - "learning_rate": 7.538507467526708e-06, - "loss": 0.3054, - "step": 15786 - }, - { - "epoch": 1.031762629893471, - "grad_norm": 0.4657570421695709, - "learning_rate": 7.538206622605292e-06, - "loss": 0.3469, - "step": 15787 - }, - { - "epoch": 1.031827985099013, - "grad_norm": 0.4387129545211792, - "learning_rate": 7.5379057653041635e-06, - "loss": 0.35, - "step": 15788 - }, - { - "epoch": 1.0318933403045552, - "grad_norm": 0.41033414006233215, - "learning_rate": 7.5376048956247886e-06, - "loss": 0.2854, - "step": 15789 - }, - { - "epoch": 1.0319586955100974, - "grad_norm": 0.48620110750198364, - "learning_rate": 7.537304013568636e-06, - "loss": 0.378, - "step": 15790 - }, - { - "epoch": 1.0320240507156395, - "grad_norm": 0.43816041946411133, - "learning_rate": 7.537003119137173e-06, - "loss": 0.3215, - "step": 15791 - }, - { - "epoch": 1.0320894059211816, - "grad_norm": 0.4366806745529175, - "learning_rate": 7.536702212331867e-06, - "loss": 0.3555, - "step": 15792 - }, - { - "epoch": 1.0321547611267237, - "grad_norm": 0.433204710483551, - "learning_rate": 7.5364012931541844e-06, - "loss": 0.3106, - "step": 15793 - }, - { - "epoch": 1.032220116332266, - "grad_norm": 0.434613436460495, - "learning_rate": 7.536100361605595e-06, - "loss": 0.325, - "step": 15794 - }, - { - "epoch": 1.032285471537808, - "grad_norm": 0.4877561926841736, - "learning_rate": 7.535799417687565e-06, - "loss": 0.3829, - "step": 15795 - }, - { - "epoch": 1.03235082674335, - "grad_norm": 0.468234658241272, - "learning_rate": 7.535498461401563e-06, - "loss": 0.3629, - "step": 15796 - }, - { - "epoch": 1.0324161819488922, - "grad_norm": 0.43487778306007385, - "learning_rate": 7.535197492749057e-06, - "loss": 0.3268, - "step": 15797 - }, - { - "epoch": 1.0324815371544342, - "grad_norm": 0.45069482922554016, - "learning_rate": 7.534896511731514e-06, - "loss": 0.3265, - "step": 15798 - }, - { - "epoch": 1.0325468923599765, - "grad_norm": 0.4581204056739807, - "learning_rate": 7.534595518350403e-06, - "loss": 0.3389, - "step": 15799 - }, - { - "epoch": 1.0326122475655186, - "grad_norm": 0.43256765604019165, - "learning_rate": 7.534294512607191e-06, - "loss": 0.3505, - "step": 15800 - }, - { - "epoch": 1.0326776027710607, - "grad_norm": 0.5151135921478271, - "learning_rate": 7.533993494503347e-06, - "loss": 0.41, - "step": 15801 - }, - { - "epoch": 1.0327429579766028, - "grad_norm": 0.4975196421146393, - "learning_rate": 7.533692464040338e-06, - "loss": 0.369, - "step": 15802 - }, - { - "epoch": 1.032808313182145, - "grad_norm": 0.4950621724128723, - "learning_rate": 7.5333914212196355e-06, - "loss": 0.359, - "step": 15803 - }, - { - "epoch": 1.0328736683876871, - "grad_norm": 0.46530085802078247, - "learning_rate": 7.533090366042703e-06, - "loss": 0.3691, - "step": 15804 - }, - { - "epoch": 1.0329390235932292, - "grad_norm": 0.4382416307926178, - "learning_rate": 7.532789298511013e-06, - "loss": 0.3219, - "step": 15805 - }, - { - "epoch": 1.0330043787987713, - "grad_norm": 0.4749968647956848, - "learning_rate": 7.532488218626032e-06, - "loss": 0.3661, - "step": 15806 - }, - { - "epoch": 1.0330697340043133, - "grad_norm": 0.4506654739379883, - "learning_rate": 7.5321871263892275e-06, - "loss": 0.3336, - "step": 15807 - }, - { - "epoch": 1.0331350892098556, - "grad_norm": 0.43492308259010315, - "learning_rate": 7.53188602180207e-06, - "loss": 0.3099, - "step": 15808 - }, - { - "epoch": 1.0332004444153977, - "grad_norm": 0.4308479130268097, - "learning_rate": 7.531584904866027e-06, - "loss": 0.3346, - "step": 15809 - }, - { - "epoch": 1.0332657996209398, - "grad_norm": 0.4674423336982727, - "learning_rate": 7.5312837755825676e-06, - "loss": 0.3869, - "step": 15810 - }, - { - "epoch": 1.0333311548264819, - "grad_norm": 0.45534729957580566, - "learning_rate": 7.530982633953159e-06, - "loss": 0.3499, - "step": 15811 - }, - { - "epoch": 1.0333965100320242, - "grad_norm": 0.49120113253593445, - "learning_rate": 7.530681479979273e-06, - "loss": 0.3672, - "step": 15812 - }, - { - "epoch": 1.0334618652375662, - "grad_norm": 0.44581127166748047, - "learning_rate": 7.5303803136623755e-06, - "loss": 0.332, - "step": 15813 - }, - { - "epoch": 1.0335272204431083, - "grad_norm": 0.42955002188682556, - "learning_rate": 7.530079135003937e-06, - "loss": 0.3419, - "step": 15814 - }, - { - "epoch": 1.0335925756486504, - "grad_norm": 0.45427507162094116, - "learning_rate": 7.529777944005425e-06, - "loss": 0.3561, - "step": 15815 - }, - { - "epoch": 1.0336579308541924, - "grad_norm": 0.45866936445236206, - "learning_rate": 7.529476740668311e-06, - "loss": 0.3328, - "step": 15816 - }, - { - "epoch": 1.0337232860597347, - "grad_norm": 0.42922618985176086, - "learning_rate": 7.529175524994063e-06, - "loss": 0.3147, - "step": 15817 - }, - { - "epoch": 1.0337886412652768, - "grad_norm": 0.46843546628952026, - "learning_rate": 7.528874296984149e-06, - "loss": 0.3595, - "step": 15818 - }, - { - "epoch": 1.0338539964708189, - "grad_norm": 0.4613758325576782, - "learning_rate": 7.528573056640039e-06, - "loss": 0.3531, - "step": 15819 - }, - { - "epoch": 1.033919351676361, - "grad_norm": 0.4563542902469635, - "learning_rate": 7.528271803963202e-06, - "loss": 0.3467, - "step": 15820 - }, - { - "epoch": 1.0339847068819032, - "grad_norm": 0.41521739959716797, - "learning_rate": 7.527970538955109e-06, - "loss": 0.3076, - "step": 15821 - }, - { - "epoch": 1.0340500620874453, - "grad_norm": 0.4420032203197479, - "learning_rate": 7.5276692616172254e-06, - "loss": 0.3262, - "step": 15822 - }, - { - "epoch": 1.0341154172929874, - "grad_norm": 0.48577195405960083, - "learning_rate": 7.527367971951025e-06, - "loss": 0.3784, - "step": 15823 - }, - { - "epoch": 1.0341807724985295, - "grad_norm": 0.47907888889312744, - "learning_rate": 7.527066669957974e-06, - "loss": 0.3493, - "step": 15824 - }, - { - "epoch": 1.0342461277040715, - "grad_norm": 0.48235344886779785, - "learning_rate": 7.526765355639545e-06, - "loss": 0.3887, - "step": 15825 - }, - { - "epoch": 1.0343114829096138, - "grad_norm": 0.4414643347263336, - "learning_rate": 7.5264640289972045e-06, - "loss": 0.3285, - "step": 15826 - }, - { - "epoch": 1.034376838115156, - "grad_norm": 0.4412213861942291, - "learning_rate": 7.526162690032426e-06, - "loss": 0.346, - "step": 15827 - }, - { - "epoch": 1.034442193320698, - "grad_norm": 0.44358623027801514, - "learning_rate": 7.525861338746676e-06, - "loss": 0.3407, - "step": 15828 - }, - { - "epoch": 1.03450754852624, - "grad_norm": 0.45164433121681213, - "learning_rate": 7.525559975141423e-06, - "loss": 0.3273, - "step": 15829 - }, - { - "epoch": 1.0345729037317821, - "grad_norm": 0.4686276614665985, - "learning_rate": 7.5252585992181415e-06, - "loss": 0.363, - "step": 15830 - }, - { - "epoch": 1.0346382589373244, - "grad_norm": 0.5003420114517212, - "learning_rate": 7.524957210978297e-06, - "loss": 0.3692, - "step": 15831 - }, - { - "epoch": 1.0347036141428665, - "grad_norm": 0.4188174307346344, - "learning_rate": 7.524655810423364e-06, - "loss": 0.327, - "step": 15832 - }, - { - "epoch": 1.0347689693484086, - "grad_norm": 0.4743111729621887, - "learning_rate": 7.524354397554807e-06, - "loss": 0.3685, - "step": 15833 - }, - { - "epoch": 1.0348343245539506, - "grad_norm": 0.4771732687950134, - "learning_rate": 7.524052972374102e-06, - "loss": 0.3671, - "step": 15834 - }, - { - "epoch": 1.034899679759493, - "grad_norm": 0.4349338412284851, - "learning_rate": 7.523751534882714e-06, - "loss": 0.3602, - "step": 15835 - }, - { - "epoch": 1.034965034965035, - "grad_norm": 0.5162888169288635, - "learning_rate": 7.523450085082117e-06, - "loss": 0.3988, - "step": 15836 - }, - { - "epoch": 1.035030390170577, - "grad_norm": 0.44995149970054626, - "learning_rate": 7.523148622973779e-06, - "loss": 0.3276, - "step": 15837 - }, - { - "epoch": 1.0350957453761191, - "grad_norm": 0.47283655405044556, - "learning_rate": 7.522847148559171e-06, - "loss": 0.3675, - "step": 15838 - }, - { - "epoch": 1.0351611005816612, - "grad_norm": 0.4758037030696869, - "learning_rate": 7.5225456618397645e-06, - "loss": 0.3598, - "step": 15839 - }, - { - "epoch": 1.0352264557872035, - "grad_norm": 0.42274487018585205, - "learning_rate": 7.522244162817027e-06, - "loss": 0.2811, - "step": 15840 - }, - { - "epoch": 1.0352918109927456, - "grad_norm": 0.43868711590766907, - "learning_rate": 7.521942651492432e-06, - "loss": 0.355, - "step": 15841 - }, - { - "epoch": 1.0353571661982877, - "grad_norm": 0.46339425444602966, - "learning_rate": 7.521641127867448e-06, - "loss": 0.3626, - "step": 15842 - }, - { - "epoch": 1.0354225214038297, - "grad_norm": 0.4463191330432892, - "learning_rate": 7.5213395919435486e-06, - "loss": 0.3418, - "step": 15843 - }, - { - "epoch": 1.035487876609372, - "grad_norm": 0.45070981979370117, - "learning_rate": 7.521038043722202e-06, - "loss": 0.3415, - "step": 15844 - }, - { - "epoch": 1.035553231814914, - "grad_norm": 0.44040626287460327, - "learning_rate": 7.520736483204878e-06, - "loss": 0.3169, - "step": 15845 - }, - { - "epoch": 1.0356185870204562, - "grad_norm": 0.48595234751701355, - "learning_rate": 7.520434910393051e-06, - "loss": 0.3627, - "step": 15846 - }, - { - "epoch": 1.0356839422259982, - "grad_norm": 0.424405038356781, - "learning_rate": 7.5201333252881884e-06, - "loss": 0.2826, - "step": 15847 - }, - { - "epoch": 1.0357492974315403, - "grad_norm": 0.47548767924308777, - "learning_rate": 7.519831727891763e-06, - "loss": 0.3796, - "step": 15848 - }, - { - "epoch": 1.0358146526370826, - "grad_norm": 0.4742758572101593, - "learning_rate": 7.5195301182052445e-06, - "loss": 0.3719, - "step": 15849 - }, - { - "epoch": 1.0358800078426247, - "grad_norm": 0.463164359331131, - "learning_rate": 7.519228496230107e-06, - "loss": 0.3471, - "step": 15850 - }, - { - "epoch": 1.0359453630481668, - "grad_norm": 0.43213051557540894, - "learning_rate": 7.5189268619678165e-06, - "loss": 0.328, - "step": 15851 - }, - { - "epoch": 1.0360107182537088, - "grad_norm": 0.4856230914592743, - "learning_rate": 7.518625215419848e-06, - "loss": 0.3789, - "step": 15852 - }, - { - "epoch": 1.0360760734592511, - "grad_norm": 0.47976210713386536, - "learning_rate": 7.518323556587672e-06, - "loss": 0.3589, - "step": 15853 - }, - { - "epoch": 1.0361414286647932, - "grad_norm": 0.44848567247390747, - "learning_rate": 7.518021885472759e-06, - "loss": 0.3257, - "step": 15854 - }, - { - "epoch": 1.0362067838703353, - "grad_norm": 0.4461570680141449, - "learning_rate": 7.517720202076583e-06, - "loss": 0.3669, - "step": 15855 - }, - { - "epoch": 1.0362721390758773, - "grad_norm": 0.4441314935684204, - "learning_rate": 7.517418506400611e-06, - "loss": 0.3579, - "step": 15856 - }, - { - "epoch": 1.0363374942814194, - "grad_norm": 0.3970010280609131, - "learning_rate": 7.5171167984463175e-06, - "loss": 0.3013, - "step": 15857 - }, - { - "epoch": 1.0364028494869617, - "grad_norm": 0.4426477551460266, - "learning_rate": 7.516815078215174e-06, - "loss": 0.3418, - "step": 15858 - }, - { - "epoch": 1.0364682046925038, - "grad_norm": 0.4288714826107025, - "learning_rate": 7.516513345708651e-06, - "loss": 0.324, - "step": 15859 - }, - { - "epoch": 1.0365335598980459, - "grad_norm": 0.45848506689071655, - "learning_rate": 7.51621160092822e-06, - "loss": 0.3737, - "step": 15860 - }, - { - "epoch": 1.036598915103588, - "grad_norm": 0.4246535301208496, - "learning_rate": 7.515909843875355e-06, - "loss": 0.35, - "step": 15861 - }, - { - "epoch": 1.0366642703091302, - "grad_norm": 0.4486737549304962, - "learning_rate": 7.515608074551525e-06, - "loss": 0.3268, - "step": 15862 - }, - { - "epoch": 1.0367296255146723, - "grad_norm": 0.43175071477890015, - "learning_rate": 7.515306292958203e-06, - "loss": 0.3326, - "step": 15863 - }, - { - "epoch": 1.0367949807202144, - "grad_norm": 0.4423149824142456, - "learning_rate": 7.51500449909686e-06, - "loss": 0.3277, - "step": 15864 - }, - { - "epoch": 1.0368603359257564, - "grad_norm": 0.4635217487812042, - "learning_rate": 7.51470269296897e-06, - "loss": 0.376, - "step": 15865 - }, - { - "epoch": 1.0369256911312985, - "grad_norm": 0.4465751349925995, - "learning_rate": 7.514400874576004e-06, - "loss": 0.3325, - "step": 15866 - }, - { - "epoch": 1.0369910463368408, - "grad_norm": 0.4463614225387573, - "learning_rate": 7.514099043919433e-06, - "loss": 0.3564, - "step": 15867 - }, - { - "epoch": 1.0370564015423829, - "grad_norm": 0.4268401265144348, - "learning_rate": 7.513797201000731e-06, - "loss": 0.3269, - "step": 15868 - }, - { - "epoch": 1.037121756747925, - "grad_norm": 0.46786606311798096, - "learning_rate": 7.513495345821369e-06, - "loss": 0.3661, - "step": 15869 - }, - { - "epoch": 1.037187111953467, - "grad_norm": 0.4640296995639801, - "learning_rate": 7.513193478382819e-06, - "loss": 0.3499, - "step": 15870 - }, - { - "epoch": 1.0372524671590093, - "grad_norm": 0.4440566897392273, - "learning_rate": 7.512891598686554e-06, - "loss": 0.3424, - "step": 15871 - }, - { - "epoch": 1.0373178223645514, - "grad_norm": 0.48283493518829346, - "learning_rate": 7.512589706734046e-06, - "loss": 0.3586, - "step": 15872 - }, - { - "epoch": 1.0373831775700935, - "grad_norm": 0.43852174282073975, - "learning_rate": 7.512287802526765e-06, - "loss": 0.3199, - "step": 15873 - }, - { - "epoch": 1.0374485327756355, - "grad_norm": 0.45028823614120483, - "learning_rate": 7.51198588606619e-06, - "loss": 0.3405, - "step": 15874 - }, - { - "epoch": 1.0375138879811776, - "grad_norm": 0.43766912817955017, - "learning_rate": 7.5116839573537885e-06, - "loss": 0.3134, - "step": 15875 - }, - { - "epoch": 1.03757924318672, - "grad_norm": 0.4694659411907196, - "learning_rate": 7.511382016391033e-06, - "loss": 0.3764, - "step": 15876 - }, - { - "epoch": 1.037644598392262, - "grad_norm": 0.4550599455833435, - "learning_rate": 7.511080063179399e-06, - "loss": 0.3861, - "step": 15877 - }, - { - "epoch": 1.037709953597804, - "grad_norm": 0.43722736835479736, - "learning_rate": 7.510778097720355e-06, - "loss": 0.3501, - "step": 15878 - }, - { - "epoch": 1.0377753088033461, - "grad_norm": 0.45036008954048157, - "learning_rate": 7.51047612001538e-06, - "loss": 0.3378, - "step": 15879 - }, - { - "epoch": 1.0378406640088884, - "grad_norm": 0.4425663948059082, - "learning_rate": 7.5101741300659395e-06, - "loss": 0.3491, - "step": 15880 - }, - { - "epoch": 1.0379060192144305, - "grad_norm": 0.43346884846687317, - "learning_rate": 7.509872127873512e-06, - "loss": 0.3115, - "step": 15881 - }, - { - "epoch": 1.0379713744199726, - "grad_norm": 0.4575836658477783, - "learning_rate": 7.509570113439571e-06, - "loss": 0.3651, - "step": 15882 - }, - { - "epoch": 1.0380367296255146, - "grad_norm": 0.4507502019405365, - "learning_rate": 7.509268086765583e-06, - "loss": 0.3471, - "step": 15883 - }, - { - "epoch": 1.0381020848310567, - "grad_norm": 0.4554588496685028, - "learning_rate": 7.508966047853028e-06, - "loss": 0.3509, - "step": 15884 - }, - { - "epoch": 1.038167440036599, - "grad_norm": 0.46485376358032227, - "learning_rate": 7.5086639967033745e-06, - "loss": 0.3446, - "step": 15885 - }, - { - "epoch": 1.038232795242141, - "grad_norm": 0.46044859290122986, - "learning_rate": 7.508361933318099e-06, - "loss": 0.3594, - "step": 15886 - }, - { - "epoch": 1.0382981504476831, - "grad_norm": 0.4197836220264435, - "learning_rate": 7.508059857698673e-06, - "loss": 0.3052, - "step": 15887 - }, - { - "epoch": 1.0383635056532252, - "grad_norm": 0.44064804911613464, - "learning_rate": 7.5077577698465696e-06, - "loss": 0.3232, - "step": 15888 - }, - { - "epoch": 1.0384288608587675, - "grad_norm": 0.4554585814476013, - "learning_rate": 7.507455669763263e-06, - "loss": 0.3554, - "step": 15889 - }, - { - "epoch": 1.0384942160643096, - "grad_norm": 0.4691154360771179, - "learning_rate": 7.507153557450228e-06, - "loss": 0.3761, - "step": 15890 - }, - { - "epoch": 1.0385595712698517, - "grad_norm": 0.44869810342788696, - "learning_rate": 7.506851432908935e-06, - "loss": 0.3441, - "step": 15891 - }, - { - "epoch": 1.0386249264753937, - "grad_norm": 0.534701406955719, - "learning_rate": 7.506549296140859e-06, - "loss": 0.4173, - "step": 15892 - }, - { - "epoch": 1.0386902816809358, - "grad_norm": 0.46809908747673035, - "learning_rate": 7.506247147147474e-06, - "loss": 0.3832, - "step": 15893 - }, - { - "epoch": 1.038755636886478, - "grad_norm": 0.4488159418106079, - "learning_rate": 7.5059449859302535e-06, - "loss": 0.3458, - "step": 15894 - }, - { - "epoch": 1.0388209920920202, - "grad_norm": 0.491122841835022, - "learning_rate": 7.505642812490672e-06, - "loss": 0.3664, - "step": 15895 - }, - { - "epoch": 1.0388863472975622, - "grad_norm": 0.3987635374069214, - "learning_rate": 7.505340626830202e-06, - "loss": 0.2887, - "step": 15896 - }, - { - "epoch": 1.0389517025031043, - "grad_norm": 0.4556143581867218, - "learning_rate": 7.505038428950318e-06, - "loss": 0.3274, - "step": 15897 - }, - { - "epoch": 1.0390170577086466, - "grad_norm": 0.4318532347679138, - "learning_rate": 7.504736218852492e-06, - "loss": 0.3048, - "step": 15898 - }, - { - "epoch": 1.0390824129141887, - "grad_norm": 0.41274791955947876, - "learning_rate": 7.504433996538201e-06, - "loss": 0.3012, - "step": 15899 - }, - { - "epoch": 1.0391477681197308, - "grad_norm": 0.42034855484962463, - "learning_rate": 7.504131762008917e-06, - "loss": 0.3239, - "step": 15900 - }, - { - "epoch": 1.0392131233252728, - "grad_norm": 0.44173717498779297, - "learning_rate": 7.503829515266117e-06, - "loss": 0.3506, - "step": 15901 - }, - { - "epoch": 1.039278478530815, - "grad_norm": 0.45919856429100037, - "learning_rate": 7.503527256311272e-06, - "loss": 0.3474, - "step": 15902 - }, - { - "epoch": 1.0393438337363572, - "grad_norm": 0.40479904413223267, - "learning_rate": 7.5032249851458564e-06, - "loss": 0.3053, - "step": 15903 - }, - { - "epoch": 1.0394091889418993, - "grad_norm": 0.48598939180374146, - "learning_rate": 7.502922701771345e-06, - "loss": 0.4101, - "step": 15904 - }, - { - "epoch": 1.0394745441474413, - "grad_norm": 0.47933247685432434, - "learning_rate": 7.502620406189214e-06, - "loss": 0.383, - "step": 15905 - }, - { - "epoch": 1.0395398993529834, - "grad_norm": 0.4721432626247406, - "learning_rate": 7.502318098400936e-06, - "loss": 0.3682, - "step": 15906 - }, - { - "epoch": 1.0396052545585255, - "grad_norm": 0.4942317008972168, - "learning_rate": 7.502015778407985e-06, - "loss": 0.3913, - "step": 15907 - }, - { - "epoch": 1.0396706097640678, - "grad_norm": 0.4402003288269043, - "learning_rate": 7.5017134462118366e-06, - "loss": 0.3078, - "step": 15908 - }, - { - "epoch": 1.0397359649696098, - "grad_norm": 0.4501919448375702, - "learning_rate": 7.501411101813966e-06, - "loss": 0.3358, - "step": 15909 - }, - { - "epoch": 1.039801320175152, - "grad_norm": 0.4389926493167877, - "learning_rate": 7.501108745215845e-06, - "loss": 0.3166, - "step": 15910 - }, - { - "epoch": 1.039866675380694, - "grad_norm": 0.43391063809394836, - "learning_rate": 7.500806376418952e-06, - "loss": 0.3236, - "step": 15911 - }, - { - "epoch": 1.0399320305862363, - "grad_norm": 0.4366278350353241, - "learning_rate": 7.50050399542476e-06, - "loss": 0.3254, - "step": 15912 - }, - { - "epoch": 1.0399973857917784, - "grad_norm": 0.4802301228046417, - "learning_rate": 7.500201602234743e-06, - "loss": 0.377, - "step": 15913 - }, - { - "epoch": 1.0400627409973204, - "grad_norm": 0.47257718443870544, - "learning_rate": 7.499899196850375e-06, - "loss": 0.3135, - "step": 15914 - }, - { - "epoch": 1.0401280962028625, - "grad_norm": 0.48913314938545227, - "learning_rate": 7.499596779273135e-06, - "loss": 0.368, - "step": 15915 - }, - { - "epoch": 1.0401934514084046, - "grad_norm": 0.43871310353279114, - "learning_rate": 7.499294349504494e-06, - "loss": 0.3225, - "step": 15916 - }, - { - "epoch": 1.0402588066139469, - "grad_norm": 0.46505528688430786, - "learning_rate": 7.49899190754593e-06, - "loss": 0.343, - "step": 15917 - }, - { - "epoch": 1.040324161819489, - "grad_norm": 0.4820912480354309, - "learning_rate": 7.498689453398915e-06, - "loss": 0.3418, - "step": 15918 - }, - { - "epoch": 1.040389517025031, - "grad_norm": 0.4644339680671692, - "learning_rate": 7.498386987064927e-06, - "loss": 0.3709, - "step": 15919 - }, - { - "epoch": 1.040454872230573, - "grad_norm": 0.41147926449775696, - "learning_rate": 7.4980845085454405e-06, - "loss": 0.285, - "step": 15920 - }, - { - "epoch": 1.0405202274361154, - "grad_norm": 0.45514875650405884, - "learning_rate": 7.49778201784193e-06, - "loss": 0.3397, - "step": 15921 - }, - { - "epoch": 1.0405855826416575, - "grad_norm": 0.429605096578598, - "learning_rate": 7.497479514955872e-06, - "loss": 0.3036, - "step": 15922 - }, - { - "epoch": 1.0406509378471995, - "grad_norm": 0.45561668276786804, - "learning_rate": 7.49717699988874e-06, - "loss": 0.3491, - "step": 15923 - }, - { - "epoch": 1.0407162930527416, - "grad_norm": 0.44797518849372864, - "learning_rate": 7.49687447264201e-06, - "loss": 0.3723, - "step": 15924 - }, - { - "epoch": 1.0407816482582837, - "grad_norm": 0.4650128483772278, - "learning_rate": 7.496571933217159e-06, - "loss": 0.3712, - "step": 15925 - }, - { - "epoch": 1.040847003463826, - "grad_norm": 0.43579956889152527, - "learning_rate": 7.496269381615664e-06, - "loss": 0.3409, - "step": 15926 - }, - { - "epoch": 1.040912358669368, - "grad_norm": 0.4608747661113739, - "learning_rate": 7.4959668178389956e-06, - "loss": 0.3508, - "step": 15927 - }, - { - "epoch": 1.0409777138749101, - "grad_norm": 0.44788262248039246, - "learning_rate": 7.4956642418886336e-06, - "loss": 0.3299, - "step": 15928 - }, - { - "epoch": 1.0410430690804522, - "grad_norm": 0.4167988896369934, - "learning_rate": 7.495361653766052e-06, - "loss": 0.3054, - "step": 15929 - }, - { - "epoch": 1.0411084242859945, - "grad_norm": 0.47324731945991516, - "learning_rate": 7.4950590534727274e-06, - "loss": 0.3878, - "step": 15930 - }, - { - "epoch": 1.0411737794915366, - "grad_norm": 0.47262775897979736, - "learning_rate": 7.494756441010136e-06, - "loss": 0.3687, - "step": 15931 - }, - { - "epoch": 1.0412391346970786, - "grad_norm": 0.44201600551605225, - "learning_rate": 7.494453816379752e-06, - "loss": 0.3237, - "step": 15932 - }, - { - "epoch": 1.0413044899026207, - "grad_norm": 0.45032525062561035, - "learning_rate": 7.494151179583054e-06, - "loss": 0.3654, - "step": 15933 - }, - { - "epoch": 1.0413698451081628, - "grad_norm": 0.47340017557144165, - "learning_rate": 7.4938485306215145e-06, - "loss": 0.3374, - "step": 15934 - }, - { - "epoch": 1.041435200313705, - "grad_norm": 0.49045827984809875, - "learning_rate": 7.493545869496614e-06, - "loss": 0.4076, - "step": 15935 - }, - { - "epoch": 1.0415005555192471, - "grad_norm": 0.42957648634910583, - "learning_rate": 7.493243196209825e-06, - "loss": 0.3085, - "step": 15936 - }, - { - "epoch": 1.0415659107247892, - "grad_norm": 0.4105693995952606, - "learning_rate": 7.492940510762627e-06, - "loss": 0.2988, - "step": 15937 - }, - { - "epoch": 1.0416312659303313, - "grad_norm": 0.4495973587036133, - "learning_rate": 7.492637813156492e-06, - "loss": 0.351, - "step": 15938 - }, - { - "epoch": 1.0416966211358736, - "grad_norm": 0.4780735969543457, - "learning_rate": 7.492335103392901e-06, - "loss": 0.377, - "step": 15939 - }, - { - "epoch": 1.0417619763414157, - "grad_norm": 0.427325040102005, - "learning_rate": 7.492032381473326e-06, - "loss": 0.3408, - "step": 15940 - }, - { - "epoch": 1.0418273315469577, - "grad_norm": 0.46285080909729004, - "learning_rate": 7.4917296473992476e-06, - "loss": 0.3731, - "step": 15941 - }, - { - "epoch": 1.0418926867524998, - "grad_norm": 0.45030486583709717, - "learning_rate": 7.49142690117214e-06, - "loss": 0.3405, - "step": 15942 - }, - { - "epoch": 1.0419580419580419, - "grad_norm": 0.41049444675445557, - "learning_rate": 7.49112414279348e-06, - "loss": 0.2875, - "step": 15943 - }, - { - "epoch": 1.0420233971635842, - "grad_norm": 0.4690943956375122, - "learning_rate": 7.490821372264745e-06, - "loss": 0.3651, - "step": 15944 - }, - { - "epoch": 1.0420887523691262, - "grad_norm": 0.4470690190792084, - "learning_rate": 7.49051858958741e-06, - "loss": 0.382, - "step": 15945 - }, - { - "epoch": 1.0421541075746683, - "grad_norm": 0.4730600416660309, - "learning_rate": 7.490215794762955e-06, - "loss": 0.3601, - "step": 15946 - }, - { - "epoch": 1.0422194627802104, - "grad_norm": 0.41757455468177795, - "learning_rate": 7.489912987792853e-06, - "loss": 0.3122, - "step": 15947 - }, - { - "epoch": 1.0422848179857525, - "grad_norm": 0.40229374170303345, - "learning_rate": 7.489610168678585e-06, - "loss": 0.3071, - "step": 15948 - }, - { - "epoch": 1.0423501731912947, - "grad_norm": 0.4292026162147522, - "learning_rate": 7.4893073374216245e-06, - "loss": 0.3465, - "step": 15949 - }, - { - "epoch": 1.0424155283968368, - "grad_norm": 0.46020931005477905, - "learning_rate": 7.489004494023449e-06, - "loss": 0.3576, - "step": 15950 - }, - { - "epoch": 1.042480883602379, - "grad_norm": 0.4452332854270935, - "learning_rate": 7.488701638485537e-06, - "loss": 0.3183, - "step": 15951 - }, - { - "epoch": 1.042546238807921, - "grad_norm": 0.47011852264404297, - "learning_rate": 7.488398770809364e-06, - "loss": 0.3887, - "step": 15952 - }, - { - "epoch": 1.0426115940134633, - "grad_norm": 0.395666241645813, - "learning_rate": 7.488095890996411e-06, - "loss": 0.2706, - "step": 15953 - }, - { - "epoch": 1.0426769492190053, - "grad_norm": 0.44158506393432617, - "learning_rate": 7.487792999048149e-06, - "loss": 0.3308, - "step": 15954 - }, - { - "epoch": 1.0427423044245474, - "grad_norm": 0.44987404346466064, - "learning_rate": 7.487490094966061e-06, - "loss": 0.3301, - "step": 15955 - }, - { - "epoch": 1.0428076596300895, - "grad_norm": 0.43153107166290283, - "learning_rate": 7.4871871787516206e-06, - "loss": 0.3159, - "step": 15956 - }, - { - "epoch": 1.0428730148356316, - "grad_norm": 0.44239094853401184, - "learning_rate": 7.486884250406308e-06, - "loss": 0.3351, - "step": 15957 - }, - { - "epoch": 1.0429383700411738, - "grad_norm": 0.44070103764533997, - "learning_rate": 7.486581309931598e-06, - "loss": 0.3244, - "step": 15958 - }, - { - "epoch": 1.043003725246716, - "grad_norm": 0.4654008150100708, - "learning_rate": 7.486278357328971e-06, - "loss": 0.3834, - "step": 15959 - }, - { - "epoch": 1.043069080452258, - "grad_norm": 0.42152294516563416, - "learning_rate": 7.485975392599902e-06, - "loss": 0.2991, - "step": 15960 - }, - { - "epoch": 1.0431344356578, - "grad_norm": 0.47432637214660645, - "learning_rate": 7.485672415745869e-06, - "loss": 0.3729, - "step": 15961 - }, - { - "epoch": 1.0431997908633424, - "grad_norm": 0.44445574283599854, - "learning_rate": 7.485369426768353e-06, - "loss": 0.3265, - "step": 15962 - }, - { - "epoch": 1.0432651460688844, - "grad_norm": 0.4614851772785187, - "learning_rate": 7.485066425668827e-06, - "loss": 0.3501, - "step": 15963 - }, - { - "epoch": 1.0433305012744265, - "grad_norm": 0.46275201439857483, - "learning_rate": 7.484763412448772e-06, - "loss": 0.3407, - "step": 15964 - }, - { - "epoch": 1.0433958564799686, - "grad_norm": 0.44269418716430664, - "learning_rate": 7.484460387109664e-06, - "loss": 0.3365, - "step": 15965 - }, - { - "epoch": 1.0434612116855106, - "grad_norm": 0.46993061900138855, - "learning_rate": 7.484157349652984e-06, - "loss": 0.3623, - "step": 15966 - }, - { - "epoch": 1.043526566891053, - "grad_norm": 0.43421483039855957, - "learning_rate": 7.4838543000802075e-06, - "loss": 0.3135, - "step": 15967 - }, - { - "epoch": 1.043591922096595, - "grad_norm": 0.454470694065094, - "learning_rate": 7.4835512383928125e-06, - "loss": 0.337, - "step": 15968 - }, - { - "epoch": 1.043657277302137, - "grad_norm": 0.43242812156677246, - "learning_rate": 7.483248164592278e-06, - "loss": 0.349, - "step": 15969 - }, - { - "epoch": 1.0437226325076792, - "grad_norm": 0.48517265915870667, - "learning_rate": 7.482945078680081e-06, - "loss": 0.3454, - "step": 15970 - }, - { - "epoch": 1.0437879877132215, - "grad_norm": 0.44539085030555725, - "learning_rate": 7.482641980657702e-06, - "loss": 0.3283, - "step": 15971 - }, - { - "epoch": 1.0438533429187635, - "grad_norm": 0.45339563488960266, - "learning_rate": 7.482338870526617e-06, - "loss": 0.3659, - "step": 15972 - }, - { - "epoch": 1.0439186981243056, - "grad_norm": 0.4391777813434601, - "learning_rate": 7.482035748288306e-06, - "loss": 0.3239, - "step": 15973 - }, - { - "epoch": 1.0439840533298477, - "grad_norm": 0.4881090521812439, - "learning_rate": 7.481732613944247e-06, - "loss": 0.3715, - "step": 15974 - }, - { - "epoch": 1.0440494085353897, - "grad_norm": 0.4271552860736847, - "learning_rate": 7.481429467495919e-06, - "loss": 0.3305, - "step": 15975 - }, - { - "epoch": 1.044114763740932, - "grad_norm": 0.45213812589645386, - "learning_rate": 7.481126308944797e-06, - "loss": 0.3603, - "step": 15976 - }, - { - "epoch": 1.0441801189464741, - "grad_norm": 0.46165549755096436, - "learning_rate": 7.480823138292365e-06, - "loss": 0.3607, - "step": 15977 - }, - { - "epoch": 1.0442454741520162, - "grad_norm": 0.44902893900871277, - "learning_rate": 7.480519955540099e-06, - "loss": 0.315, - "step": 15978 - }, - { - "epoch": 1.0443108293575583, - "grad_norm": 0.4467196762561798, - "learning_rate": 7.480216760689476e-06, - "loss": 0.3216, - "step": 15979 - }, - { - "epoch": 1.0443761845631006, - "grad_norm": 0.5242639780044556, - "learning_rate": 7.4799135537419785e-06, - "loss": 0.3875, - "step": 15980 - }, - { - "epoch": 1.0444415397686426, - "grad_norm": 0.4836612939834595, - "learning_rate": 7.4796103346990825e-06, - "loss": 0.3904, - "step": 15981 - }, - { - "epoch": 1.0445068949741847, - "grad_norm": 0.45392730832099915, - "learning_rate": 7.479307103562268e-06, - "loss": 0.3063, - "step": 15982 - }, - { - "epoch": 1.0445722501797268, - "grad_norm": 0.43818655610084534, - "learning_rate": 7.479003860333014e-06, - "loss": 0.3209, - "step": 15983 - }, - { - "epoch": 1.0446376053852688, - "grad_norm": 0.521374523639679, - "learning_rate": 7.478700605012798e-06, - "loss": 0.4192, - "step": 15984 - }, - { - "epoch": 1.0447029605908111, - "grad_norm": 0.4648186266422272, - "learning_rate": 7.478397337603103e-06, - "loss": 0.3595, - "step": 15985 - }, - { - "epoch": 1.0447683157963532, - "grad_norm": 0.4561239778995514, - "learning_rate": 7.478094058105404e-06, - "loss": 0.3624, - "step": 15986 - }, - { - "epoch": 1.0448336710018953, - "grad_norm": 0.4138014018535614, - "learning_rate": 7.477790766521182e-06, - "loss": 0.3176, - "step": 15987 - }, - { - "epoch": 1.0448990262074374, - "grad_norm": 0.5003045797348022, - "learning_rate": 7.477487462851916e-06, - "loss": 0.418, - "step": 15988 - }, - { - "epoch": 1.0449643814129796, - "grad_norm": 0.4711742699146271, - "learning_rate": 7.4771841470990854e-06, - "loss": 0.3621, - "step": 15989 - }, - { - "epoch": 1.0450297366185217, - "grad_norm": 0.4732164740562439, - "learning_rate": 7.47688081926417e-06, - "loss": 0.3575, - "step": 15990 - }, - { - "epoch": 1.0450950918240638, - "grad_norm": 0.4564274847507477, - "learning_rate": 7.476577479348649e-06, - "loss": 0.352, - "step": 15991 - }, - { - "epoch": 1.0451604470296059, - "grad_norm": 0.421882688999176, - "learning_rate": 7.476274127353999e-06, - "loss": 0.335, - "step": 15992 - }, - { - "epoch": 1.045225802235148, - "grad_norm": 0.3941395878791809, - "learning_rate": 7.475970763281705e-06, - "loss": 0.2798, - "step": 15993 - }, - { - "epoch": 1.0452911574406902, - "grad_norm": 0.42184966802597046, - "learning_rate": 7.475667387133244e-06, - "loss": 0.3187, - "step": 15994 - }, - { - "epoch": 1.0453565126462323, - "grad_norm": 0.4963744878768921, - "learning_rate": 7.475363998910096e-06, - "loss": 0.3281, - "step": 15995 - }, - { - "epoch": 1.0454218678517744, - "grad_norm": 0.4685758054256439, - "learning_rate": 7.475060598613738e-06, - "loss": 0.3775, - "step": 15996 - }, - { - "epoch": 1.0454872230573165, - "grad_norm": 0.4629594385623932, - "learning_rate": 7.4747571862456534e-06, - "loss": 0.3707, - "step": 15997 - }, - { - "epoch": 1.0455525782628587, - "grad_norm": 0.4789305329322815, - "learning_rate": 7.4744537618073194e-06, - "loss": 0.3796, - "step": 15998 - }, - { - "epoch": 1.0456179334684008, - "grad_norm": 0.4757988452911377, - "learning_rate": 7.474150325300218e-06, - "loss": 0.3859, - "step": 15999 - }, - { - "epoch": 1.045683288673943, - "grad_norm": 0.4701661169528961, - "learning_rate": 7.473846876725829e-06, - "loss": 0.3355, - "step": 16000 - }, - { - "epoch": 1.045748643879485, - "grad_norm": 0.44279995560646057, - "learning_rate": 7.47354341608563e-06, - "loss": 0.3167, - "step": 16001 - }, - { - "epoch": 1.045813999085027, - "grad_norm": 0.4664006531238556, - "learning_rate": 7.473239943381104e-06, - "loss": 0.3415, - "step": 16002 - }, - { - "epoch": 1.0458793542905693, - "grad_norm": 0.4168444573879242, - "learning_rate": 7.4729364586137295e-06, - "loss": 0.2826, - "step": 16003 - }, - { - "epoch": 1.0459447094961114, - "grad_norm": 0.43262356519699097, - "learning_rate": 7.472632961784988e-06, - "loss": 0.2785, - "step": 16004 - }, - { - "epoch": 1.0460100647016535, - "grad_norm": 0.44316473603248596, - "learning_rate": 7.472329452896358e-06, - "loss": 0.3253, - "step": 16005 - }, - { - "epoch": 1.0460754199071955, - "grad_norm": 0.42974090576171875, - "learning_rate": 7.472025931949321e-06, - "loss": 0.3071, - "step": 16006 - }, - { - "epoch": 1.0461407751127378, - "grad_norm": 0.450324147939682, - "learning_rate": 7.471722398945358e-06, - "loss": 0.3471, - "step": 16007 - }, - { - "epoch": 1.04620613031828, - "grad_norm": 0.4668424725532532, - "learning_rate": 7.471418853885948e-06, - "loss": 0.3629, - "step": 16008 - }, - { - "epoch": 1.046271485523822, - "grad_norm": 0.4546244442462921, - "learning_rate": 7.471115296772572e-06, - "loss": 0.3608, - "step": 16009 - }, - { - "epoch": 1.046336840729364, - "grad_norm": 0.44972726702690125, - "learning_rate": 7.470811727606711e-06, - "loss": 0.3624, - "step": 16010 - }, - { - "epoch": 1.0464021959349061, - "grad_norm": 0.4652496576309204, - "learning_rate": 7.470508146389844e-06, - "loss": 0.3359, - "step": 16011 - }, - { - "epoch": 1.0464675511404484, - "grad_norm": 0.46383875608444214, - "learning_rate": 7.470204553123453e-06, - "loss": 0.39, - "step": 16012 - }, - { - "epoch": 1.0465329063459905, - "grad_norm": 0.4285947382450104, - "learning_rate": 7.469900947809021e-06, - "loss": 0.3534, - "step": 16013 - }, - { - "epoch": 1.0465982615515326, - "grad_norm": 0.4756484031677246, - "learning_rate": 7.469597330448025e-06, - "loss": 0.3788, - "step": 16014 - }, - { - "epoch": 1.0466636167570746, - "grad_norm": 0.4348495304584503, - "learning_rate": 7.4692937010419465e-06, - "loss": 0.3232, - "step": 16015 - }, - { - "epoch": 1.0467289719626167, - "grad_norm": 0.4852817952632904, - "learning_rate": 7.46899005959227e-06, - "loss": 0.3604, - "step": 16016 - }, - { - "epoch": 1.046794327168159, - "grad_norm": 0.44126904010772705, - "learning_rate": 7.468686406100469e-06, - "loss": 0.2922, - "step": 16017 - }, - { - "epoch": 1.046859682373701, - "grad_norm": 0.44158703088760376, - "learning_rate": 7.468382740568033e-06, - "loss": 0.3494, - "step": 16018 - }, - { - "epoch": 1.0469250375792432, - "grad_norm": 0.42098841071128845, - "learning_rate": 7.468079062996437e-06, - "loss": 0.3307, - "step": 16019 - }, - { - "epoch": 1.0469903927847852, - "grad_norm": 0.46187469363212585, - "learning_rate": 7.467775373387165e-06, - "loss": 0.3707, - "step": 16020 - }, - { - "epoch": 1.0470557479903275, - "grad_norm": 0.45330387353897095, - "learning_rate": 7.467471671741697e-06, - "loss": 0.3111, - "step": 16021 - }, - { - "epoch": 1.0471211031958696, - "grad_norm": 0.447543203830719, - "learning_rate": 7.467167958061516e-06, - "loss": 0.3418, - "step": 16022 - }, - { - "epoch": 1.0471864584014117, - "grad_norm": 0.4412584602832794, - "learning_rate": 7.466864232348102e-06, - "loss": 0.3129, - "step": 16023 - }, - { - "epoch": 1.0472518136069537, - "grad_norm": 0.47823694348335266, - "learning_rate": 7.4665604946029365e-06, - "loss": 0.3881, - "step": 16024 - }, - { - "epoch": 1.0473171688124958, - "grad_norm": 0.4403769075870514, - "learning_rate": 7.466256744827501e-06, - "loss": 0.3291, - "step": 16025 - }, - { - "epoch": 1.047382524018038, - "grad_norm": 0.5087055563926697, - "learning_rate": 7.465952983023277e-06, - "loss": 0.4303, - "step": 16026 - }, - { - "epoch": 1.0474478792235802, - "grad_norm": 0.44996869564056396, - "learning_rate": 7.465649209191746e-06, - "loss": 0.3593, - "step": 16027 - }, - { - "epoch": 1.0475132344291223, - "grad_norm": 0.44411277770996094, - "learning_rate": 7.465345423334389e-06, - "loss": 0.3351, - "step": 16028 - }, - { - "epoch": 1.0475785896346643, - "grad_norm": 0.4417063891887665, - "learning_rate": 7.465041625452689e-06, - "loss": 0.3357, - "step": 16029 - }, - { - "epoch": 1.0476439448402066, - "grad_norm": 0.4979275166988373, - "learning_rate": 7.464737815548126e-06, - "loss": 0.3533, - "step": 16030 - }, - { - "epoch": 1.0477093000457487, - "grad_norm": 0.44359245896339417, - "learning_rate": 7.464433993622185e-06, - "loss": 0.3252, - "step": 16031 - }, - { - "epoch": 1.0477746552512908, - "grad_norm": 0.4697877764701843, - "learning_rate": 7.464130159676344e-06, - "loss": 0.3826, - "step": 16032 - }, - { - "epoch": 1.0478400104568328, - "grad_norm": 0.48326537013053894, - "learning_rate": 7.463826313712086e-06, - "loss": 0.3844, - "step": 16033 - }, - { - "epoch": 1.047905365662375, - "grad_norm": 0.45493197441101074, - "learning_rate": 7.463522455730894e-06, - "loss": 0.336, - "step": 16034 - }, - { - "epoch": 1.0479707208679172, - "grad_norm": 0.4514913856983185, - "learning_rate": 7.463218585734249e-06, - "loss": 0.3621, - "step": 16035 - }, - { - "epoch": 1.0480360760734593, - "grad_norm": 0.4070689082145691, - "learning_rate": 7.462914703723635e-06, - "loss": 0.2869, - "step": 16036 - }, - { - "epoch": 1.0481014312790013, - "grad_norm": 0.46747660636901855, - "learning_rate": 7.462610809700533e-06, - "loss": 0.36, - "step": 16037 - }, - { - "epoch": 1.0481667864845434, - "grad_norm": 0.44843965768814087, - "learning_rate": 7.462306903666424e-06, - "loss": 0.3083, - "step": 16038 - }, - { - "epoch": 1.0482321416900857, - "grad_norm": 0.458403080701828, - "learning_rate": 7.46200298562279e-06, - "loss": 0.3547, - "step": 16039 - }, - { - "epoch": 1.0482974968956278, - "grad_norm": 0.4130711555480957, - "learning_rate": 7.461699055571117e-06, - "loss": 0.3083, - "step": 16040 - }, - { - "epoch": 1.0483628521011699, - "grad_norm": 0.46022409200668335, - "learning_rate": 7.461395113512883e-06, - "loss": 0.3379, - "step": 16041 - }, - { - "epoch": 1.048428207306712, - "grad_norm": 0.48253992199897766, - "learning_rate": 7.461091159449574e-06, - "loss": 0.351, - "step": 16042 - }, - { - "epoch": 1.048493562512254, - "grad_norm": 0.4672299325466156, - "learning_rate": 7.46078719338267e-06, - "loss": 0.345, - "step": 16043 - }, - { - "epoch": 1.0485589177177963, - "grad_norm": 0.4777798354625702, - "learning_rate": 7.460483215313653e-06, - "loss": 0.398, - "step": 16044 - }, - { - "epoch": 1.0486242729233384, - "grad_norm": 0.4742797315120697, - "learning_rate": 7.460179225244009e-06, - "loss": 0.3299, - "step": 16045 - }, - { - "epoch": 1.0486896281288804, - "grad_norm": 0.4599107503890991, - "learning_rate": 7.459875223175217e-06, - "loss": 0.2892, - "step": 16046 - }, - { - "epoch": 1.0487549833344225, - "grad_norm": 0.4627104699611664, - "learning_rate": 7.459571209108762e-06, - "loss": 0.3515, - "step": 16047 - }, - { - "epoch": 1.0488203385399648, - "grad_norm": 0.42812010645866394, - "learning_rate": 7.459267183046126e-06, - "loss": 0.2939, - "step": 16048 - }, - { - "epoch": 1.0488856937455069, - "grad_norm": 0.4522375762462616, - "learning_rate": 7.4589631449887934e-06, - "loss": 0.3608, - "step": 16049 - }, - { - "epoch": 1.048951048951049, - "grad_norm": 0.40866246819496155, - "learning_rate": 7.4586590949382435e-06, - "loss": 0.2941, - "step": 16050 - }, - { - "epoch": 1.049016404156591, - "grad_norm": 0.4577885866165161, - "learning_rate": 7.4583550328959635e-06, - "loss": 0.3588, - "step": 16051 - }, - { - "epoch": 1.049081759362133, - "grad_norm": 0.4313019812107086, - "learning_rate": 7.458050958863433e-06, - "loss": 0.3215, - "step": 16052 - }, - { - "epoch": 1.0491471145676754, - "grad_norm": 0.45992523431777954, - "learning_rate": 7.457746872842137e-06, - "loss": 0.3674, - "step": 16053 - }, - { - "epoch": 1.0492124697732175, - "grad_norm": 0.438744455575943, - "learning_rate": 7.457442774833558e-06, - "loss": 0.3452, - "step": 16054 - }, - { - "epoch": 1.0492778249787595, - "grad_norm": 0.4470614194869995, - "learning_rate": 7.457138664839178e-06, - "loss": 0.3629, - "step": 16055 - }, - { - "epoch": 1.0493431801843016, - "grad_norm": 0.4344133734703064, - "learning_rate": 7.456834542860483e-06, - "loss": 0.3384, - "step": 16056 - }, - { - "epoch": 1.0494085353898437, - "grad_norm": 0.4609927237033844, - "learning_rate": 7.456530408898954e-06, - "loss": 0.3581, - "step": 16057 - }, - { - "epoch": 1.049473890595386, - "grad_norm": 0.44912052154541016, - "learning_rate": 7.456226262956077e-06, - "loss": 0.3278, - "step": 16058 - }, - { - "epoch": 1.049539245800928, - "grad_norm": 0.4931497573852539, - "learning_rate": 7.455922105033331e-06, - "loss": 0.3756, - "step": 16059 - }, - { - "epoch": 1.0496046010064701, - "grad_norm": 0.45639166235923767, - "learning_rate": 7.455617935132205e-06, - "loss": 0.3309, - "step": 16060 - }, - { - "epoch": 1.0496699562120122, - "grad_norm": 0.4729221761226654, - "learning_rate": 7.455313753254177e-06, - "loss": 0.3772, - "step": 16061 - }, - { - "epoch": 1.0497353114175545, - "grad_norm": 0.4363824427127838, - "learning_rate": 7.455009559400733e-06, - "loss": 0.3135, - "step": 16062 - }, - { - "epoch": 1.0498006666230966, - "grad_norm": 0.466463565826416, - "learning_rate": 7.454705353573359e-06, - "loss": 0.366, - "step": 16063 - }, - { - "epoch": 1.0498660218286386, - "grad_norm": 0.45507103204727173, - "learning_rate": 7.454401135773535e-06, - "loss": 0.3462, - "step": 16064 - }, - { - "epoch": 1.0499313770341807, - "grad_norm": 0.5240984559059143, - "learning_rate": 7.454096906002747e-06, - "loss": 0.3706, - "step": 16065 - }, - { - "epoch": 1.0499967322397228, - "grad_norm": 0.46580490469932556, - "learning_rate": 7.453792664262478e-06, - "loss": 0.362, - "step": 16066 - }, - { - "epoch": 1.050062087445265, - "grad_norm": 0.46701693534851074, - "learning_rate": 7.453488410554213e-06, - "loss": 0.384, - "step": 16067 - }, - { - "epoch": 1.0501274426508072, - "grad_norm": 0.4378340244293213, - "learning_rate": 7.453184144879433e-06, - "loss": 0.3146, - "step": 16068 - }, - { - "epoch": 1.0501927978563492, - "grad_norm": 0.454223096370697, - "learning_rate": 7.452879867239627e-06, - "loss": 0.3054, - "step": 16069 - }, - { - "epoch": 1.0502581530618913, - "grad_norm": 0.43760383129119873, - "learning_rate": 7.452575577636274e-06, - "loss": 0.3388, - "step": 16070 - }, - { - "epoch": 1.0503235082674336, - "grad_norm": 0.42061540484428406, - "learning_rate": 7.45227127607086e-06, - "loss": 0.3195, - "step": 16071 - }, - { - "epoch": 1.0503888634729757, - "grad_norm": 0.4589472711086273, - "learning_rate": 7.45196696254487e-06, - "loss": 0.3657, - "step": 16072 - }, - { - "epoch": 1.0504542186785177, - "grad_norm": 0.46775344014167786, - "learning_rate": 7.451662637059788e-06, - "loss": 0.3756, - "step": 16073 - }, - { - "epoch": 1.0505195738840598, - "grad_norm": 0.46782544255256653, - "learning_rate": 7.451358299617097e-06, - "loss": 0.3192, - "step": 16074 - }, - { - "epoch": 1.0505849290896019, - "grad_norm": 0.43963491916656494, - "learning_rate": 7.4510539502182835e-06, - "loss": 0.3565, - "step": 16075 - }, - { - "epoch": 1.0506502842951442, - "grad_norm": 0.48291751742362976, - "learning_rate": 7.45074958886483e-06, - "loss": 0.3714, - "step": 16076 - }, - { - "epoch": 1.0507156395006862, - "grad_norm": 0.4731026887893677, - "learning_rate": 7.450445215558222e-06, - "loss": 0.3431, - "step": 16077 - }, - { - "epoch": 1.0507809947062283, - "grad_norm": 0.4549613296985626, - "learning_rate": 7.450140830299945e-06, - "loss": 0.3671, - "step": 16078 - }, - { - "epoch": 1.0508463499117704, - "grad_norm": 0.41363072395324707, - "learning_rate": 7.449836433091481e-06, - "loss": 0.2931, - "step": 16079 - }, - { - "epoch": 1.0509117051173127, - "grad_norm": 0.4579743444919586, - "learning_rate": 7.449532023934316e-06, - "loss": 0.3524, - "step": 16080 - }, - { - "epoch": 1.0509770603228548, - "grad_norm": 0.43860164284706116, - "learning_rate": 7.449227602829936e-06, - "loss": 0.3211, - "step": 16081 - }, - { - "epoch": 1.0510424155283968, - "grad_norm": 0.5000020861625671, - "learning_rate": 7.448923169779822e-06, - "loss": 0.392, - "step": 16082 - }, - { - "epoch": 1.051107770733939, - "grad_norm": 0.46752145886421204, - "learning_rate": 7.448618724785464e-06, - "loss": 0.3347, - "step": 16083 - }, - { - "epoch": 1.051173125939481, - "grad_norm": 0.46789035201072693, - "learning_rate": 7.448314267848342e-06, - "loss": 0.395, - "step": 16084 - }, - { - "epoch": 1.0512384811450233, - "grad_norm": 0.44531866908073425, - "learning_rate": 7.448009798969945e-06, - "loss": 0.3261, - "step": 16085 - }, - { - "epoch": 1.0513038363505653, - "grad_norm": 0.48364758491516113, - "learning_rate": 7.447705318151754e-06, - "loss": 0.3822, - "step": 16086 - }, - { - "epoch": 1.0513691915561074, - "grad_norm": 0.45722150802612305, - "learning_rate": 7.447400825395259e-06, - "loss": 0.3327, - "step": 16087 - }, - { - "epoch": 1.0514345467616495, - "grad_norm": 0.4609781801700592, - "learning_rate": 7.44709632070194e-06, - "loss": 0.3468, - "step": 16088 - }, - { - "epoch": 1.0514999019671918, - "grad_norm": 0.44670552015304565, - "learning_rate": 7.446791804073285e-06, - "loss": 0.3474, - "step": 16089 - }, - { - "epoch": 1.0515652571727339, - "grad_norm": 0.44275641441345215, - "learning_rate": 7.44648727551078e-06, - "loss": 0.3545, - "step": 16090 - }, - { - "epoch": 1.051630612378276, - "grad_norm": 0.497925728559494, - "learning_rate": 7.446182735015908e-06, - "loss": 0.3845, - "step": 16091 - }, - { - "epoch": 1.051695967583818, - "grad_norm": 0.49487441778182983, - "learning_rate": 7.445878182590155e-06, - "loss": 0.3849, - "step": 16092 - }, - { - "epoch": 1.05176132278936, - "grad_norm": 0.48519906401634216, - "learning_rate": 7.445573618235007e-06, - "loss": 0.3539, - "step": 16093 - }, - { - "epoch": 1.0518266779949024, - "grad_norm": 0.4065033495426178, - "learning_rate": 7.445269041951949e-06, - "loss": 0.2864, - "step": 16094 - }, - { - "epoch": 1.0518920332004444, - "grad_norm": 0.4485379755496979, - "learning_rate": 7.444964453742467e-06, - "loss": 0.3567, - "step": 16095 - }, - { - "epoch": 1.0519573884059865, - "grad_norm": 0.42287561297416687, - "learning_rate": 7.444659853608047e-06, - "loss": 0.3125, - "step": 16096 - }, - { - "epoch": 1.0520227436115286, - "grad_norm": 0.41896817088127136, - "learning_rate": 7.444355241550174e-06, - "loss": 0.2971, - "step": 16097 - }, - { - "epoch": 1.0520880988170709, - "grad_norm": 0.45448678731918335, - "learning_rate": 7.444050617570332e-06, - "loss": 0.3442, - "step": 16098 - }, - { - "epoch": 1.052153454022613, - "grad_norm": 0.48453983664512634, - "learning_rate": 7.44374598167001e-06, - "loss": 0.374, - "step": 16099 - }, - { - "epoch": 1.052218809228155, - "grad_norm": 0.44335755705833435, - "learning_rate": 7.443441333850693e-06, - "loss": 0.3608, - "step": 16100 - }, - { - "epoch": 1.052284164433697, - "grad_norm": 0.44495922327041626, - "learning_rate": 7.443136674113865e-06, - "loss": 0.3715, - "step": 16101 - }, - { - "epoch": 1.0523495196392392, - "grad_norm": 0.46403542160987854, - "learning_rate": 7.442832002461012e-06, - "loss": 0.3703, - "step": 16102 - }, - { - "epoch": 1.0524148748447815, - "grad_norm": 0.45666810870170593, - "learning_rate": 7.442527318893623e-06, - "loss": 0.3269, - "step": 16103 - }, - { - "epoch": 1.0524802300503235, - "grad_norm": 0.46022355556488037, - "learning_rate": 7.44222262341318e-06, - "loss": 0.3435, - "step": 16104 - }, - { - "epoch": 1.0525455852558656, - "grad_norm": 0.4367315471172333, - "learning_rate": 7.441917916021173e-06, - "loss": 0.2846, - "step": 16105 - }, - { - "epoch": 1.0526109404614077, - "grad_norm": 0.42847898602485657, - "learning_rate": 7.441613196719085e-06, - "loss": 0.3082, - "step": 16106 - }, - { - "epoch": 1.05267629566695, - "grad_norm": 0.41425612568855286, - "learning_rate": 7.441308465508405e-06, - "loss": 0.2609, - "step": 16107 - }, - { - "epoch": 1.052741650872492, - "grad_norm": 0.4618186950683594, - "learning_rate": 7.441003722390617e-06, - "loss": 0.3516, - "step": 16108 - }, - { - "epoch": 1.0528070060780341, - "grad_norm": 0.49293121695518494, - "learning_rate": 7.440698967367208e-06, - "loss": 0.3753, - "step": 16109 - }, - { - "epoch": 1.0528723612835762, - "grad_norm": 0.4523603320121765, - "learning_rate": 7.440394200439665e-06, - "loss": 0.3325, - "step": 16110 - }, - { - "epoch": 1.0529377164891183, - "grad_norm": 0.43645426630973816, - "learning_rate": 7.440089421609475e-06, - "loss": 0.329, - "step": 16111 - }, - { - "epoch": 1.0530030716946606, - "grad_norm": 0.46498623490333557, - "learning_rate": 7.4397846308781214e-06, - "loss": 0.3477, - "step": 16112 - }, - { - "epoch": 1.0530684269002026, - "grad_norm": 0.43298086524009705, - "learning_rate": 7.439479828247094e-06, - "loss": 0.327, - "step": 16113 - }, - { - "epoch": 1.0531337821057447, - "grad_norm": 0.4323459267616272, - "learning_rate": 7.439175013717879e-06, - "loss": 0.3293, - "step": 16114 - }, - { - "epoch": 1.0531991373112868, - "grad_norm": 0.4473608136177063, - "learning_rate": 7.438870187291961e-06, - "loss": 0.3665, - "step": 16115 - }, - { - "epoch": 1.053264492516829, - "grad_norm": 0.39610910415649414, - "learning_rate": 7.43856534897083e-06, - "loss": 0.2983, - "step": 16116 - }, - { - "epoch": 1.0533298477223711, - "grad_norm": 0.4124915301799774, - "learning_rate": 7.43826049875597e-06, - "loss": 0.2992, - "step": 16117 - }, - { - "epoch": 1.0533952029279132, - "grad_norm": 0.4507128596305847, - "learning_rate": 7.437955636648868e-06, - "loss": 0.3268, - "step": 16118 - }, - { - "epoch": 1.0534605581334553, - "grad_norm": 0.44693493843078613, - "learning_rate": 7.437650762651014e-06, - "loss": 0.3318, - "step": 16119 - }, - { - "epoch": 1.0535259133389974, - "grad_norm": 0.45532897114753723, - "learning_rate": 7.4373458767638915e-06, - "loss": 0.3558, - "step": 16120 - }, - { - "epoch": 1.0535912685445397, - "grad_norm": 0.47934550046920776, - "learning_rate": 7.43704097898899e-06, - "loss": 0.3719, - "step": 16121 - }, - { - "epoch": 1.0536566237500817, - "grad_norm": 0.46038565039634705, - "learning_rate": 7.436736069327792e-06, - "loss": 0.3473, - "step": 16122 - }, - { - "epoch": 1.0537219789556238, - "grad_norm": 0.4664520025253296, - "learning_rate": 7.436431147781791e-06, - "loss": 0.3668, - "step": 16123 - }, - { - "epoch": 1.0537873341611659, - "grad_norm": 0.41246747970581055, - "learning_rate": 7.43612621435247e-06, - "loss": 0.2976, - "step": 16124 - }, - { - "epoch": 1.053852689366708, - "grad_norm": 0.4643338620662689, - "learning_rate": 7.435821269041319e-06, - "loss": 0.3622, - "step": 16125 - }, - { - "epoch": 1.0539180445722502, - "grad_norm": 0.4554542005062103, - "learning_rate": 7.435516311849822e-06, - "loss": 0.3613, - "step": 16126 - }, - { - "epoch": 1.0539833997777923, - "grad_norm": 0.4273832440376282, - "learning_rate": 7.43521134277947e-06, - "loss": 0.3206, - "step": 16127 - }, - { - "epoch": 1.0540487549833344, - "grad_norm": 0.4744766354560852, - "learning_rate": 7.434906361831746e-06, - "loss": 0.3163, - "step": 16128 - }, - { - "epoch": 1.0541141101888765, - "grad_norm": 0.42165735363960266, - "learning_rate": 7.434601369008142e-06, - "loss": 0.2951, - "step": 16129 - }, - { - "epoch": 1.0541794653944188, - "grad_norm": 0.442953884601593, - "learning_rate": 7.434296364310144e-06, - "loss": 0.3424, - "step": 16130 - }, - { - "epoch": 1.0542448205999608, - "grad_norm": 0.4295116364955902, - "learning_rate": 7.433991347739238e-06, - "loss": 0.3412, - "step": 16131 - }, - { - "epoch": 1.054310175805503, - "grad_norm": 0.44163334369659424, - "learning_rate": 7.4336863192969135e-06, - "loss": 0.348, - "step": 16132 - }, - { - "epoch": 1.054375531011045, - "grad_norm": 0.43274348974227905, - "learning_rate": 7.433381278984657e-06, - "loss": 0.3264, - "step": 16133 - }, - { - "epoch": 1.054440886216587, - "grad_norm": 0.4627331793308258, - "learning_rate": 7.433076226803959e-06, - "loss": 0.3732, - "step": 16134 - }, - { - "epoch": 1.0545062414221293, - "grad_norm": 0.4267916679382324, - "learning_rate": 7.432771162756305e-06, - "loss": 0.3559, - "step": 16135 - }, - { - "epoch": 1.0545715966276714, - "grad_norm": 0.5071887373924255, - "learning_rate": 7.432466086843182e-06, - "loss": 0.3824, - "step": 16136 - }, - { - "epoch": 1.0546369518332135, - "grad_norm": 0.4612812101840973, - "learning_rate": 7.432160999066079e-06, - "loss": 0.3652, - "step": 16137 - }, - { - "epoch": 1.0547023070387556, - "grad_norm": 0.45583727955818176, - "learning_rate": 7.431855899426485e-06, - "loss": 0.3629, - "step": 16138 - }, - { - "epoch": 1.0547676622442979, - "grad_norm": 0.4504631459712982, - "learning_rate": 7.431550787925887e-06, - "loss": 0.3415, - "step": 16139 - }, - { - "epoch": 1.05483301744984, - "grad_norm": 0.4481804370880127, - "learning_rate": 7.431245664565774e-06, - "loss": 0.3423, - "step": 16140 - }, - { - "epoch": 1.054898372655382, - "grad_norm": 0.4327700734138489, - "learning_rate": 7.4309405293476344e-06, - "loss": 0.3375, - "step": 16141 - }, - { - "epoch": 1.054963727860924, - "grad_norm": 0.46866220235824585, - "learning_rate": 7.430635382272954e-06, - "loss": 0.3837, - "step": 16142 - }, - { - "epoch": 1.0550290830664661, - "grad_norm": 0.452713280916214, - "learning_rate": 7.430330223343223e-06, - "loss": 0.3364, - "step": 16143 - }, - { - "epoch": 1.0550944382720084, - "grad_norm": 0.450216144323349, - "learning_rate": 7.430025052559929e-06, - "loss": 0.3799, - "step": 16144 - }, - { - "epoch": 1.0551597934775505, - "grad_norm": 0.45943623781204224, - "learning_rate": 7.429719869924563e-06, - "loss": 0.3693, - "step": 16145 - }, - { - "epoch": 1.0552251486830926, - "grad_norm": 0.42963409423828125, - "learning_rate": 7.42941467543861e-06, - "loss": 0.3046, - "step": 16146 - }, - { - "epoch": 1.0552905038886347, - "grad_norm": 0.42389383912086487, - "learning_rate": 7.42910946910356e-06, - "loss": 0.3167, - "step": 16147 - }, - { - "epoch": 1.055355859094177, - "grad_norm": 0.4476156532764435, - "learning_rate": 7.4288042509209026e-06, - "loss": 0.3475, - "step": 16148 - }, - { - "epoch": 1.055421214299719, - "grad_norm": 0.4937572479248047, - "learning_rate": 7.428499020892123e-06, - "loss": 0.3817, - "step": 16149 - }, - { - "epoch": 1.055486569505261, - "grad_norm": 0.4730284512042999, - "learning_rate": 7.428193779018715e-06, - "loss": 0.3641, - "step": 16150 - }, - { - "epoch": 1.0555519247108032, - "grad_norm": 0.43692266941070557, - "learning_rate": 7.427888525302164e-06, - "loss": 0.3665, - "step": 16151 - }, - { - "epoch": 1.0556172799163452, - "grad_norm": 0.4530215263366699, - "learning_rate": 7.42758325974396e-06, - "loss": 0.349, - "step": 16152 - }, - { - "epoch": 1.0556826351218875, - "grad_norm": 0.4344078302383423, - "learning_rate": 7.427277982345591e-06, - "loss": 0.3323, - "step": 16153 - }, - { - "epoch": 1.0557479903274296, - "grad_norm": 0.4584748446941376, - "learning_rate": 7.426972693108547e-06, - "loss": 0.3416, - "step": 16154 - }, - { - "epoch": 1.0558133455329717, - "grad_norm": 0.41894954442977905, - "learning_rate": 7.426667392034315e-06, - "loss": 0.2976, - "step": 16155 - }, - { - "epoch": 1.0558787007385138, - "grad_norm": 0.474901020526886, - "learning_rate": 7.426362079124385e-06, - "loss": 0.4069, - "step": 16156 - }, - { - "epoch": 1.055944055944056, - "grad_norm": 0.4707310199737549, - "learning_rate": 7.426056754380249e-06, - "loss": 0.3668, - "step": 16157 - }, - { - "epoch": 1.0560094111495981, - "grad_norm": 0.42840293049812317, - "learning_rate": 7.425751417803392e-06, - "loss": 0.332, - "step": 16158 - }, - { - "epoch": 1.0560747663551402, - "grad_norm": 0.48791441321372986, - "learning_rate": 7.4254460693953054e-06, - "loss": 0.3722, - "step": 16159 - }, - { - "epoch": 1.0561401215606823, - "grad_norm": 0.4557345509529114, - "learning_rate": 7.425140709157477e-06, - "loss": 0.3654, - "step": 16160 - }, - { - "epoch": 1.0562054767662243, - "grad_norm": 0.4374857246875763, - "learning_rate": 7.4248353370913985e-06, - "loss": 0.3404, - "step": 16161 - }, - { - "epoch": 1.0562708319717666, - "grad_norm": 0.46094179153442383, - "learning_rate": 7.4245299531985584e-06, - "loss": 0.3567, - "step": 16162 - }, - { - "epoch": 1.0563361871773087, - "grad_norm": 0.454353392124176, - "learning_rate": 7.4242245574804464e-06, - "loss": 0.3497, - "step": 16163 - }, - { - "epoch": 1.0564015423828508, - "grad_norm": 0.46465417742729187, - "learning_rate": 7.423919149938549e-06, - "loss": 0.3566, - "step": 16164 - }, - { - "epoch": 1.0564668975883929, - "grad_norm": 0.4740929901599884, - "learning_rate": 7.4236137305743595e-06, - "loss": 0.3601, - "step": 16165 - }, - { - "epoch": 1.056532252793935, - "grad_norm": 0.43093177676200867, - "learning_rate": 7.423308299389367e-06, - "loss": 0.3255, - "step": 16166 - }, - { - "epoch": 1.0565976079994772, - "grad_norm": 0.5268555283546448, - "learning_rate": 7.423002856385059e-06, - "loss": 0.3888, - "step": 16167 - }, - { - "epoch": 1.0566629632050193, - "grad_norm": 0.49433204531669617, - "learning_rate": 7.422697401562927e-06, - "loss": 0.3602, - "step": 16168 - }, - { - "epoch": 1.0567283184105614, - "grad_norm": 0.4234844446182251, - "learning_rate": 7.42239193492446e-06, - "loss": 0.291, - "step": 16169 - }, - { - "epoch": 1.0567936736161034, - "grad_norm": 0.4375132918357849, - "learning_rate": 7.422086456471149e-06, - "loss": 0.3128, - "step": 16170 - }, - { - "epoch": 1.0568590288216457, - "grad_norm": 0.4753700792789459, - "learning_rate": 7.421780966204483e-06, - "loss": 0.3788, - "step": 16171 - }, - { - "epoch": 1.0569243840271878, - "grad_norm": 0.4939681887626648, - "learning_rate": 7.421475464125954e-06, - "loss": 0.4117, - "step": 16172 - }, - { - "epoch": 1.0569897392327299, - "grad_norm": 0.4380776286125183, - "learning_rate": 7.421169950237047e-06, - "loss": 0.3213, - "step": 16173 - }, - { - "epoch": 1.057055094438272, - "grad_norm": 0.41436949372291565, - "learning_rate": 7.420864424539258e-06, - "loss": 0.2979, - "step": 16174 - }, - { - "epoch": 1.057120449643814, - "grad_norm": 0.46490415930747986, - "learning_rate": 7.420558887034074e-06, - "loss": 0.3494, - "step": 16175 - }, - { - "epoch": 1.0571858048493563, - "grad_norm": 0.42988309264183044, - "learning_rate": 7.4202533377229845e-06, - "loss": 0.3515, - "step": 16176 - }, - { - "epoch": 1.0572511600548984, - "grad_norm": 0.43051785230636597, - "learning_rate": 7.419947776607482e-06, - "loss": 0.3153, - "step": 16177 - }, - { - "epoch": 1.0573165152604405, - "grad_norm": 0.4236520230770111, - "learning_rate": 7.4196422036890545e-06, - "loss": 0.3057, - "step": 16178 - }, - { - "epoch": 1.0573818704659825, - "grad_norm": 0.4344591498374939, - "learning_rate": 7.419336618969196e-06, - "loss": 0.3324, - "step": 16179 - }, - { - "epoch": 1.0574472256715248, - "grad_norm": 0.44322726130485535, - "learning_rate": 7.419031022449393e-06, - "loss": 0.3086, - "step": 16180 - }, - { - "epoch": 1.057512580877067, - "grad_norm": 0.4004443287849426, - "learning_rate": 7.418725414131138e-06, - "loss": 0.2773, - "step": 16181 - }, - { - "epoch": 1.057577936082609, - "grad_norm": 0.4375944435596466, - "learning_rate": 7.418419794015923e-06, - "loss": 0.3639, - "step": 16182 - }, - { - "epoch": 1.057643291288151, - "grad_norm": 0.47543880343437195, - "learning_rate": 7.418114162105236e-06, - "loss": 0.3951, - "step": 16183 - }, - { - "epoch": 1.0577086464936931, - "grad_norm": 0.4490235447883606, - "learning_rate": 7.417808518400566e-06, - "loss": 0.3689, - "step": 16184 - }, - { - "epoch": 1.0577740016992354, - "grad_norm": 0.455479234457016, - "learning_rate": 7.41750286290341e-06, - "loss": 0.3619, - "step": 16185 - }, - { - "epoch": 1.0578393569047775, - "grad_norm": 0.41816574335098267, - "learning_rate": 7.417197195615253e-06, - "loss": 0.2951, - "step": 16186 - }, - { - "epoch": 1.0579047121103196, - "grad_norm": 0.45505598187446594, - "learning_rate": 7.4168915165375875e-06, - "loss": 0.3274, - "step": 16187 - }, - { - "epoch": 1.0579700673158616, - "grad_norm": 0.44512656331062317, - "learning_rate": 7.4165858256719055e-06, - "loss": 0.3405, - "step": 16188 - }, - { - "epoch": 1.058035422521404, - "grad_norm": 0.45190003514289856, - "learning_rate": 7.4162801230196965e-06, - "loss": 0.3491, - "step": 16189 - }, - { - "epoch": 1.058100777726946, - "grad_norm": 0.4577966630458832, - "learning_rate": 7.415974408582454e-06, - "loss": 0.312, - "step": 16190 - }, - { - "epoch": 1.058166132932488, - "grad_norm": 0.4371005892753601, - "learning_rate": 7.415668682361665e-06, - "loss": 0.3099, - "step": 16191 - }, - { - "epoch": 1.0582314881380301, - "grad_norm": 0.43823009729385376, - "learning_rate": 7.415362944358825e-06, - "loss": 0.3149, - "step": 16192 - }, - { - "epoch": 1.0582968433435722, - "grad_norm": 0.4281201660633087, - "learning_rate": 7.415057194575422e-06, - "loss": 0.3473, - "step": 16193 - }, - { - "epoch": 1.0583621985491145, - "grad_norm": 0.47924160957336426, - "learning_rate": 7.414751433012948e-06, - "loss": 0.3802, - "step": 16194 - }, - { - "epoch": 1.0584275537546566, - "grad_norm": 0.44475483894348145, - "learning_rate": 7.414445659672897e-06, - "loss": 0.3564, - "step": 16195 - }, - { - "epoch": 1.0584929089601987, - "grad_norm": 0.45348167419433594, - "learning_rate": 7.414139874556755e-06, - "loss": 0.3677, - "step": 16196 - }, - { - "epoch": 1.0585582641657407, - "grad_norm": 0.43412622809410095, - "learning_rate": 7.4138340776660185e-06, - "loss": 0.3366, - "step": 16197 - }, - { - "epoch": 1.058623619371283, - "grad_norm": 0.4788056015968323, - "learning_rate": 7.413528269002176e-06, - "loss": 0.3894, - "step": 16198 - }, - { - "epoch": 1.058688974576825, - "grad_norm": 0.4885151982307434, - "learning_rate": 7.4132224485667215e-06, - "loss": 0.3112, - "step": 16199 - }, - { - "epoch": 1.0587543297823672, - "grad_norm": 0.45134130120277405, - "learning_rate": 7.4129166163611434e-06, - "loss": 0.3681, - "step": 16200 - }, - { - "epoch": 1.0588196849879092, - "grad_norm": 0.4581235647201538, - "learning_rate": 7.412610772386935e-06, - "loss": 0.3533, - "step": 16201 - }, - { - "epoch": 1.0588850401934513, - "grad_norm": 0.4625314474105835, - "learning_rate": 7.41230491664559e-06, - "loss": 0.3772, - "step": 16202 - }, - { - "epoch": 1.0589503953989936, - "grad_norm": 0.47362852096557617, - "learning_rate": 7.411999049138596e-06, - "loss": 0.3575, - "step": 16203 - }, - { - "epoch": 1.0590157506045357, - "grad_norm": 0.4151182174682617, - "learning_rate": 7.411693169867449e-06, - "loss": 0.2837, - "step": 16204 - }, - { - "epoch": 1.0590811058100777, - "grad_norm": 0.4111938178539276, - "learning_rate": 7.411387278833637e-06, - "loss": 0.2965, - "step": 16205 - }, - { - "epoch": 1.0591464610156198, - "grad_norm": 0.48952680826187134, - "learning_rate": 7.4110813760386555e-06, - "loss": 0.378, - "step": 16206 - }, - { - "epoch": 1.0592118162211621, - "grad_norm": 0.4351980984210968, - "learning_rate": 7.410775461483995e-06, - "loss": 0.301, - "step": 16207 - }, - { - "epoch": 1.0592771714267042, - "grad_norm": 0.44695401191711426, - "learning_rate": 7.410469535171145e-06, - "loss": 0.3177, - "step": 16208 - }, - { - "epoch": 1.0593425266322463, - "grad_norm": 0.44649437069892883, - "learning_rate": 7.410163597101601e-06, - "loss": 0.3352, - "step": 16209 - }, - { - "epoch": 1.0594078818377883, - "grad_norm": 0.4653000831604004, - "learning_rate": 7.4098576472768555e-06, - "loss": 0.3736, - "step": 16210 - }, - { - "epoch": 1.0594732370433304, - "grad_norm": 0.5027621388435364, - "learning_rate": 7.409551685698398e-06, - "loss": 0.4325, - "step": 16211 - }, - { - "epoch": 1.0595385922488727, - "grad_norm": 0.4689064025878906, - "learning_rate": 7.409245712367724e-06, - "loss": 0.3849, - "step": 16212 - }, - { - "epoch": 1.0596039474544148, - "grad_norm": 0.43116524815559387, - "learning_rate": 7.408939727286323e-06, - "loss": 0.3181, - "step": 16213 - }, - { - "epoch": 1.0596693026599568, - "grad_norm": 0.4424514174461365, - "learning_rate": 7.4086337304556875e-06, - "loss": 0.3607, - "step": 16214 - }, - { - "epoch": 1.059734657865499, - "grad_norm": 0.4705367088317871, - "learning_rate": 7.408327721877312e-06, - "loss": 0.391, - "step": 16215 - }, - { - "epoch": 1.0598000130710412, - "grad_norm": 0.46798431873321533, - "learning_rate": 7.408021701552688e-06, - "loss": 0.3364, - "step": 16216 - }, - { - "epoch": 1.0598653682765833, - "grad_norm": 0.4381452798843384, - "learning_rate": 7.407715669483306e-06, - "loss": 0.3528, - "step": 16217 - }, - { - "epoch": 1.0599307234821254, - "grad_norm": 0.4279400408267975, - "learning_rate": 7.407409625670663e-06, - "loss": 0.316, - "step": 16218 - }, - { - "epoch": 1.0599960786876674, - "grad_norm": 0.47679150104522705, - "learning_rate": 7.4071035701162475e-06, - "loss": 0.4004, - "step": 16219 - }, - { - "epoch": 1.0600614338932095, - "grad_norm": 0.4515838325023651, - "learning_rate": 7.4067975028215555e-06, - "loss": 0.3322, - "step": 16220 - }, - { - "epoch": 1.0601267890987518, - "grad_norm": 0.44531142711639404, - "learning_rate": 7.406491423788077e-06, - "loss": 0.3397, - "step": 16221 - }, - { - "epoch": 1.0601921443042939, - "grad_norm": 0.44834426045417786, - "learning_rate": 7.406185333017307e-06, - "loss": 0.3311, - "step": 16222 - }, - { - "epoch": 1.060257499509836, - "grad_norm": 0.41237568855285645, - "learning_rate": 7.405879230510737e-06, - "loss": 0.2965, - "step": 16223 - }, - { - "epoch": 1.060322854715378, - "grad_norm": 0.48074665665626526, - "learning_rate": 7.405573116269861e-06, - "loss": 0.3557, - "step": 16224 - }, - { - "epoch": 1.0603882099209203, - "grad_norm": 0.46773916482925415, - "learning_rate": 7.405266990296172e-06, - "loss": 0.3379, - "step": 16225 - }, - { - "epoch": 1.0604535651264624, - "grad_norm": 0.4513261914253235, - "learning_rate": 7.404960852591162e-06, - "loss": 0.3516, - "step": 16226 - }, - { - "epoch": 1.0605189203320045, - "grad_norm": 0.43539005517959595, - "learning_rate": 7.404654703156324e-06, - "loss": 0.3409, - "step": 16227 - }, - { - "epoch": 1.0605842755375465, - "grad_norm": 0.45895013213157654, - "learning_rate": 7.404348541993152e-06, - "loss": 0.3344, - "step": 16228 - }, - { - "epoch": 1.0606496307430886, - "grad_norm": 0.4350387752056122, - "learning_rate": 7.40404236910314e-06, - "loss": 0.3463, - "step": 16229 - }, - { - "epoch": 1.060714985948631, - "grad_norm": 0.4167777895927429, - "learning_rate": 7.403736184487781e-06, - "loss": 0.289, - "step": 16230 - }, - { - "epoch": 1.060780341154173, - "grad_norm": 0.4347829520702362, - "learning_rate": 7.403429988148567e-06, - "loss": 0.328, - "step": 16231 - }, - { - "epoch": 1.060845696359715, - "grad_norm": 0.4480380117893219, - "learning_rate": 7.403123780086993e-06, - "loss": 0.3337, - "step": 16232 - }, - { - "epoch": 1.0609110515652571, - "grad_norm": 0.42906734347343445, - "learning_rate": 7.402817560304551e-06, - "loss": 0.3286, - "step": 16233 - }, - { - "epoch": 1.0609764067707994, - "grad_norm": 0.44069650769233704, - "learning_rate": 7.402511328802735e-06, - "loss": 0.3252, - "step": 16234 - }, - { - "epoch": 1.0610417619763415, - "grad_norm": 0.4493826627731323, - "learning_rate": 7.4022050855830405e-06, - "loss": 0.3774, - "step": 16235 - }, - { - "epoch": 1.0611071171818836, - "grad_norm": 0.42076724767684937, - "learning_rate": 7.401898830646958e-06, - "loss": 0.318, - "step": 16236 - }, - { - "epoch": 1.0611724723874256, - "grad_norm": 0.4470517635345459, - "learning_rate": 7.401592563995984e-06, - "loss": 0.3305, - "step": 16237 - }, - { - "epoch": 1.0612378275929677, - "grad_norm": 0.44872480630874634, - "learning_rate": 7.40128628563161e-06, - "loss": 0.3292, - "step": 16238 - }, - { - "epoch": 1.06130318279851, - "grad_norm": 0.4473561644554138, - "learning_rate": 7.400979995555332e-06, - "loss": 0.3351, - "step": 16239 - }, - { - "epoch": 1.061368538004052, - "grad_norm": 0.45642489194869995, - "learning_rate": 7.400673693768643e-06, - "loss": 0.3534, - "step": 16240 - }, - { - "epoch": 1.0614338932095941, - "grad_norm": 0.48431986570358276, - "learning_rate": 7.4003673802730345e-06, - "loss": 0.3511, - "step": 16241 - }, - { - "epoch": 1.0614992484151362, - "grad_norm": 0.43051964044570923, - "learning_rate": 7.400061055070005e-06, - "loss": 0.3029, - "step": 16242 - }, - { - "epoch": 1.0615646036206783, - "grad_norm": 0.47000324726104736, - "learning_rate": 7.399754718161045e-06, - "loss": 0.3818, - "step": 16243 - }, - { - "epoch": 1.0616299588262206, - "grad_norm": 0.5296894311904907, - "learning_rate": 7.39944836954765e-06, - "loss": 0.4262, - "step": 16244 - }, - { - "epoch": 1.0616953140317626, - "grad_norm": 0.436626136302948, - "learning_rate": 7.399142009231315e-06, - "loss": 0.3277, - "step": 16245 - }, - { - "epoch": 1.0617606692373047, - "grad_norm": 0.4629237949848175, - "learning_rate": 7.398835637213534e-06, - "loss": 0.3524, - "step": 16246 - }, - { - "epoch": 1.0618260244428468, - "grad_norm": 0.48072561621665955, - "learning_rate": 7.3985292534957986e-06, - "loss": 0.3825, - "step": 16247 - }, - { - "epoch": 1.061891379648389, - "grad_norm": 0.4785764217376709, - "learning_rate": 7.398222858079607e-06, - "loss": 0.3943, - "step": 16248 - }, - { - "epoch": 1.0619567348539312, - "grad_norm": 0.4860216975212097, - "learning_rate": 7.3979164509664494e-06, - "loss": 0.3805, - "step": 16249 - }, - { - "epoch": 1.0620220900594732, - "grad_norm": 0.4337969124317169, - "learning_rate": 7.3976100321578235e-06, - "loss": 0.3047, - "step": 16250 - }, - { - "epoch": 1.0620874452650153, - "grad_norm": 0.4114070534706116, - "learning_rate": 7.397303601655223e-06, - "loss": 0.3047, - "step": 16251 - }, - { - "epoch": 1.0621528004705574, - "grad_norm": 0.43916887044906616, - "learning_rate": 7.396997159460142e-06, - "loss": 0.3256, - "step": 16252 - }, - { - "epoch": 1.0622181556760997, - "grad_norm": 0.4180777072906494, - "learning_rate": 7.396690705574077e-06, - "loss": 0.2866, - "step": 16253 - }, - { - "epoch": 1.0622835108816417, - "grad_norm": 0.4620179235935211, - "learning_rate": 7.39638423999852e-06, - "loss": 0.3344, - "step": 16254 - }, - { - "epoch": 1.0623488660871838, - "grad_norm": 0.4804803431034088, - "learning_rate": 7.396077762734967e-06, - "loss": 0.3902, - "step": 16255 - }, - { - "epoch": 1.062414221292726, - "grad_norm": 0.4444611370563507, - "learning_rate": 7.3957712737849106e-06, - "loss": 0.322, - "step": 16256 - }, - { - "epoch": 1.0624795764982682, - "grad_norm": 0.4772826135158539, - "learning_rate": 7.395464773149851e-06, - "loss": 0.3355, - "step": 16257 - }, - { - "epoch": 1.0625449317038103, - "grad_norm": 0.5063560605049133, - "learning_rate": 7.395158260831279e-06, - "loss": 0.4246, - "step": 16258 - }, - { - "epoch": 1.0626102869093523, - "grad_norm": 0.4525230824947357, - "learning_rate": 7.394851736830688e-06, - "loss": 0.3292, - "step": 16259 - }, - { - "epoch": 1.0626756421148944, - "grad_norm": 0.44800588488578796, - "learning_rate": 7.3945452011495785e-06, - "loss": 0.3447, - "step": 16260 - }, - { - "epoch": 1.0627409973204365, - "grad_norm": 0.4656405746936798, - "learning_rate": 7.3942386537894404e-06, - "loss": 0.352, - "step": 16261 - }, - { - "epoch": 1.0628063525259788, - "grad_norm": 0.4371345639228821, - "learning_rate": 7.3939320947517725e-06, - "loss": 0.3089, - "step": 16262 - }, - { - "epoch": 1.0628717077315208, - "grad_norm": 0.4875541925430298, - "learning_rate": 7.393625524038067e-06, - "loss": 0.387, - "step": 16263 - }, - { - "epoch": 1.062937062937063, - "grad_norm": 0.4274907112121582, - "learning_rate": 7.393318941649822e-06, - "loss": 0.3171, - "step": 16264 - }, - { - "epoch": 1.063002418142605, - "grad_norm": 0.4773643910884857, - "learning_rate": 7.39301234758853e-06, - "loss": 0.412, - "step": 16265 - }, - { - "epoch": 1.0630677733481473, - "grad_norm": 0.44621041417121887, - "learning_rate": 7.392705741855688e-06, - "loss": 0.3252, - "step": 16266 - }, - { - "epoch": 1.0631331285536894, - "grad_norm": 0.4797162115573883, - "learning_rate": 7.392399124452793e-06, - "loss": 0.2904, - "step": 16267 - }, - { - "epoch": 1.0631984837592314, - "grad_norm": 0.4833502173423767, - "learning_rate": 7.392092495381338e-06, - "loss": 0.366, - "step": 16268 - }, - { - "epoch": 1.0632638389647735, - "grad_norm": 0.4435287117958069, - "learning_rate": 7.391785854642819e-06, - "loss": 0.3415, - "step": 16269 - }, - { - "epoch": 1.0633291941703156, - "grad_norm": 0.4675077199935913, - "learning_rate": 7.3914792022387295e-06, - "loss": 0.3398, - "step": 16270 - }, - { - "epoch": 1.0633945493758579, - "grad_norm": 0.4949605166912079, - "learning_rate": 7.39117253817057e-06, - "loss": 0.3716, - "step": 16271 - }, - { - "epoch": 1.0634599045814, - "grad_norm": 0.47651195526123047, - "learning_rate": 7.390865862439832e-06, - "loss": 0.3266, - "step": 16272 - }, - { - "epoch": 1.063525259786942, - "grad_norm": 0.4458908140659332, - "learning_rate": 7.390559175048015e-06, - "loss": 0.3515, - "step": 16273 - }, - { - "epoch": 1.063590614992484, - "grad_norm": 0.42917072772979736, - "learning_rate": 7.390252475996611e-06, - "loss": 0.3246, - "step": 16274 - }, - { - "epoch": 1.0636559701980262, - "grad_norm": 0.45001745223999023, - "learning_rate": 7.389945765287119e-06, - "loss": 0.3107, - "step": 16275 - }, - { - "epoch": 1.0637213254035685, - "grad_norm": 0.44215458631515503, - "learning_rate": 7.389639042921031e-06, - "loss": 0.3005, - "step": 16276 - }, - { - "epoch": 1.0637866806091105, - "grad_norm": 0.4572295546531677, - "learning_rate": 7.3893323088998484e-06, - "loss": 0.3516, - "step": 16277 - }, - { - "epoch": 1.0638520358146526, - "grad_norm": 0.4386812150478363, - "learning_rate": 7.389025563225063e-06, - "loss": 0.3306, - "step": 16278 - }, - { - "epoch": 1.0639173910201947, - "grad_norm": 0.5081034302711487, - "learning_rate": 7.388718805898172e-06, - "loss": 0.3801, - "step": 16279 - }, - { - "epoch": 1.063982746225737, - "grad_norm": 0.42380234599113464, - "learning_rate": 7.3884120369206735e-06, - "loss": 0.3291, - "step": 16280 - }, - { - "epoch": 1.064048101431279, - "grad_norm": 0.4563099443912506, - "learning_rate": 7.38810525629406e-06, - "loss": 0.3471, - "step": 16281 - }, - { - "epoch": 1.064113456636821, - "grad_norm": 0.4397892951965332, - "learning_rate": 7.387798464019831e-06, - "loss": 0.3364, - "step": 16282 - }, - { - "epoch": 1.0641788118423632, - "grad_norm": 0.46197909116744995, - "learning_rate": 7.3874916600994804e-06, - "loss": 0.38, - "step": 16283 - }, - { - "epoch": 1.0642441670479053, - "grad_norm": 0.44674259424209595, - "learning_rate": 7.387184844534507e-06, - "loss": 0.3556, - "step": 16284 - }, - { - "epoch": 1.0643095222534475, - "grad_norm": 0.4053109288215637, - "learning_rate": 7.386878017326407e-06, - "loss": 0.2841, - "step": 16285 - }, - { - "epoch": 1.0643748774589896, - "grad_norm": 0.42249050736427307, - "learning_rate": 7.3865711784766746e-06, - "loss": 0.2992, - "step": 16286 - }, - { - "epoch": 1.0644402326645317, - "grad_norm": 0.46023961901664734, - "learning_rate": 7.386264327986808e-06, - "loss": 0.3845, - "step": 16287 - }, - { - "epoch": 1.0645055878700738, - "grad_norm": 0.4664514660835266, - "learning_rate": 7.385957465858305e-06, - "loss": 0.3466, - "step": 16288 - }, - { - "epoch": 1.064570943075616, - "grad_norm": 0.4404003620147705, - "learning_rate": 7.38565059209266e-06, - "loss": 0.3432, - "step": 16289 - }, - { - "epoch": 1.0646362982811581, - "grad_norm": 0.4451841413974762, - "learning_rate": 7.3853437066913705e-06, - "loss": 0.3616, - "step": 16290 - }, - { - "epoch": 1.0647016534867002, - "grad_norm": 0.46495670080184937, - "learning_rate": 7.3850368096559335e-06, - "loss": 0.3601, - "step": 16291 - }, - { - "epoch": 1.0647670086922423, - "grad_norm": 0.43297263979911804, - "learning_rate": 7.384729900987844e-06, - "loss": 0.3304, - "step": 16292 - }, - { - "epoch": 1.0648323638977844, - "grad_norm": 0.44345468282699585, - "learning_rate": 7.384422980688602e-06, - "loss": 0.3415, - "step": 16293 - }, - { - "epoch": 1.0648977191033266, - "grad_norm": 0.43672364950180054, - "learning_rate": 7.384116048759703e-06, - "loss": 0.3134, - "step": 16294 - }, - { - "epoch": 1.0649630743088687, - "grad_norm": 0.4559917449951172, - "learning_rate": 7.383809105202645e-06, - "loss": 0.3181, - "step": 16295 - }, - { - "epoch": 1.0650284295144108, - "grad_norm": 0.44162875413894653, - "learning_rate": 7.3835021500189245e-06, - "loss": 0.3213, - "step": 16296 - }, - { - "epoch": 1.0650937847199529, - "grad_norm": 0.40349656343460083, - "learning_rate": 7.383195183210036e-06, - "loss": 0.2713, - "step": 16297 - }, - { - "epoch": 1.0651591399254952, - "grad_norm": 0.4892532229423523, - "learning_rate": 7.38288820477748e-06, - "loss": 0.2965, - "step": 16298 - }, - { - "epoch": 1.0652244951310372, - "grad_norm": 0.45245566964149475, - "learning_rate": 7.382581214722753e-06, - "loss": 0.3269, - "step": 16299 - }, - { - "epoch": 1.0652898503365793, - "grad_norm": 0.476072758436203, - "learning_rate": 7.382274213047352e-06, - "loss": 0.3661, - "step": 16300 - }, - { - "epoch": 1.0653552055421214, - "grad_norm": 0.46941715478897095, - "learning_rate": 7.381967199752773e-06, - "loss": 0.3666, - "step": 16301 - }, - { - "epoch": 1.0654205607476634, - "grad_norm": 0.4570242464542389, - "learning_rate": 7.381660174840517e-06, - "loss": 0.3335, - "step": 16302 - }, - { - "epoch": 1.0654859159532057, - "grad_norm": 0.5833566188812256, - "learning_rate": 7.381353138312078e-06, - "loss": 0.3668, - "step": 16303 - }, - { - "epoch": 1.0655512711587478, - "grad_norm": 0.4997336268424988, - "learning_rate": 7.381046090168955e-06, - "loss": 0.3706, - "step": 16304 - }, - { - "epoch": 1.0656166263642899, - "grad_norm": 0.48211637139320374, - "learning_rate": 7.380739030412645e-06, - "loss": 0.3203, - "step": 16305 - }, - { - "epoch": 1.065681981569832, - "grad_norm": 0.4539371430873871, - "learning_rate": 7.380431959044646e-06, - "loss": 0.3193, - "step": 16306 - }, - { - "epoch": 1.0657473367753743, - "grad_norm": 0.43627384305000305, - "learning_rate": 7.380124876066456e-06, - "loss": 0.2922, - "step": 16307 - }, - { - "epoch": 1.0658126919809163, - "grad_norm": 0.4480058252811432, - "learning_rate": 7.379817781479572e-06, - "loss": 0.3639, - "step": 16308 - }, - { - "epoch": 1.0658780471864584, - "grad_norm": 0.43765416741371155, - "learning_rate": 7.379510675285494e-06, - "loss": 0.3269, - "step": 16309 - }, - { - "epoch": 1.0659434023920005, - "grad_norm": 0.44008341431617737, - "learning_rate": 7.379203557485717e-06, - "loss": 0.3131, - "step": 16310 - }, - { - "epoch": 1.0660087575975425, - "grad_norm": 0.47002214193344116, - "learning_rate": 7.3788964280817395e-06, - "loss": 0.3586, - "step": 16311 - }, - { - "epoch": 1.0660741128030848, - "grad_norm": 0.47891783714294434, - "learning_rate": 7.3785892870750596e-06, - "loss": 0.3631, - "step": 16312 - }, - { - "epoch": 1.066139468008627, - "grad_norm": 0.4591046869754791, - "learning_rate": 7.378282134467176e-06, - "loss": 0.366, - "step": 16313 - }, - { - "epoch": 1.066204823214169, - "grad_norm": 0.4661593437194824, - "learning_rate": 7.377974970259587e-06, - "loss": 0.3694, - "step": 16314 - }, - { - "epoch": 1.066270178419711, - "grad_norm": 0.44471195340156555, - "learning_rate": 7.3776677944537915e-06, - "loss": 0.3427, - "step": 16315 - }, - { - "epoch": 1.0663355336252534, - "grad_norm": 0.481689453125, - "learning_rate": 7.377360607051285e-06, - "loss": 0.3825, - "step": 16316 - }, - { - "epoch": 1.0664008888307954, - "grad_norm": 0.4517310857772827, - "learning_rate": 7.377053408053566e-06, - "loss": 0.3297, - "step": 16317 - }, - { - "epoch": 1.0664662440363375, - "grad_norm": 0.43472006916999817, - "learning_rate": 7.376746197462137e-06, - "loss": 0.2861, - "step": 16318 - }, - { - "epoch": 1.0665315992418796, - "grad_norm": 0.4931611716747284, - "learning_rate": 7.376438975278491e-06, - "loss": 0.3898, - "step": 16319 - }, - { - "epoch": 1.0665969544474216, - "grad_norm": 0.4449213743209839, - "learning_rate": 7.376131741504129e-06, - "loss": 0.3398, - "step": 16320 - }, - { - "epoch": 1.066662309652964, - "grad_norm": 0.44252240657806396, - "learning_rate": 7.37582449614055e-06, - "loss": 0.303, - "step": 16321 - }, - { - "epoch": 1.066727664858506, - "grad_norm": 0.47769948840141296, - "learning_rate": 7.375517239189251e-06, - "loss": 0.363, - "step": 16322 - }, - { - "epoch": 1.066793020064048, - "grad_norm": 0.45678725838661194, - "learning_rate": 7.375209970651733e-06, - "loss": 0.3468, - "step": 16323 - }, - { - "epoch": 1.0668583752695902, - "grad_norm": 0.43321335315704346, - "learning_rate": 7.374902690529493e-06, - "loss": 0.3491, - "step": 16324 - }, - { - "epoch": 1.0669237304751324, - "grad_norm": 0.45774635672569275, - "learning_rate": 7.374595398824029e-06, - "loss": 0.3771, - "step": 16325 - }, - { - "epoch": 1.0669890856806745, - "grad_norm": 0.45558732748031616, - "learning_rate": 7.37428809553684e-06, - "loss": 0.3552, - "step": 16326 - }, - { - "epoch": 1.0670544408862166, - "grad_norm": 0.43057477474212646, - "learning_rate": 7.373980780669427e-06, - "loss": 0.3136, - "step": 16327 - }, - { - "epoch": 1.0671197960917587, - "grad_norm": 0.4441756308078766, - "learning_rate": 7.373673454223285e-06, - "loss": 0.3574, - "step": 16328 - }, - { - "epoch": 1.0671851512973007, - "grad_norm": 0.47489261627197266, - "learning_rate": 7.373366116199918e-06, - "loss": 0.376, - "step": 16329 - }, - { - "epoch": 1.067250506502843, - "grad_norm": 0.46269145607948303, - "learning_rate": 7.373058766600821e-06, - "loss": 0.3112, - "step": 16330 - }, - { - "epoch": 1.067315861708385, - "grad_norm": 0.43337777256965637, - "learning_rate": 7.372751405427495e-06, - "loss": 0.3225, - "step": 16331 - }, - { - "epoch": 1.0673812169139272, - "grad_norm": 0.42257869243621826, - "learning_rate": 7.3724440326814376e-06, - "loss": 0.2858, - "step": 16332 - }, - { - "epoch": 1.0674465721194693, - "grad_norm": 0.4334564805030823, - "learning_rate": 7.37213664836415e-06, - "loss": 0.3132, - "step": 16333 - }, - { - "epoch": 1.0675119273250115, - "grad_norm": 0.46192216873168945, - "learning_rate": 7.371829252477127e-06, - "loss": 0.3667, - "step": 16334 - }, - { - "epoch": 1.0675772825305536, - "grad_norm": 0.4625583589076996, - "learning_rate": 7.371521845021874e-06, - "loss": 0.379, - "step": 16335 - }, - { - "epoch": 1.0676426377360957, - "grad_norm": 0.4332091510295868, - "learning_rate": 7.371214425999888e-06, - "loss": 0.3264, - "step": 16336 - }, - { - "epoch": 1.0677079929416378, - "grad_norm": 0.44488924741744995, - "learning_rate": 7.370906995412665e-06, - "loss": 0.323, - "step": 16337 - }, - { - "epoch": 1.0677733481471798, - "grad_norm": 0.4677157402038574, - "learning_rate": 7.370599553261709e-06, - "loss": 0.3615, - "step": 16338 - }, - { - "epoch": 1.0678387033527221, - "grad_norm": 0.4546584188938141, - "learning_rate": 7.370292099548516e-06, - "loss": 0.3497, - "step": 16339 - }, - { - "epoch": 1.0679040585582642, - "grad_norm": 0.423282653093338, - "learning_rate": 7.369984634274589e-06, - "loss": 0.3264, - "step": 16340 - }, - { - "epoch": 1.0679694137638063, - "grad_norm": 0.46851375699043274, - "learning_rate": 7.369677157441425e-06, - "loss": 0.3952, - "step": 16341 - }, - { - "epoch": 1.0680347689693483, - "grad_norm": 0.4636993706226349, - "learning_rate": 7.369369669050526e-06, - "loss": 0.333, - "step": 16342 - }, - { - "epoch": 1.0681001241748906, - "grad_norm": 0.49464917182922363, - "learning_rate": 7.3690621691033895e-06, - "loss": 0.3477, - "step": 16343 - }, - { - "epoch": 1.0681654793804327, - "grad_norm": 0.4228525757789612, - "learning_rate": 7.368754657601516e-06, - "loss": 0.3055, - "step": 16344 - }, - { - "epoch": 1.0682308345859748, - "grad_norm": 0.48316940665245056, - "learning_rate": 7.3684471345464046e-06, - "loss": 0.3625, - "step": 16345 - }, - { - "epoch": 1.0682961897915169, - "grad_norm": 0.49780622124671936, - "learning_rate": 7.368139599939557e-06, - "loss": 0.3822, - "step": 16346 - }, - { - "epoch": 1.068361544997059, - "grad_norm": 0.4277776777744293, - "learning_rate": 7.367832053782471e-06, - "loss": 0.3077, - "step": 16347 - }, - { - "epoch": 1.0684269002026012, - "grad_norm": 0.4928586781024933, - "learning_rate": 7.367524496076648e-06, - "loss": 0.3912, - "step": 16348 - }, - { - "epoch": 1.0684922554081433, - "grad_norm": 0.4875708520412445, - "learning_rate": 7.36721692682359e-06, - "loss": 0.3722, - "step": 16349 - }, - { - "epoch": 1.0685576106136854, - "grad_norm": 0.42442789673805237, - "learning_rate": 7.366909346024793e-06, - "loss": 0.2881, - "step": 16350 - }, - { - "epoch": 1.0686229658192274, - "grad_norm": 0.4443688988685608, - "learning_rate": 7.3666017536817605e-06, - "loss": 0.3451, - "step": 16351 - }, - { - "epoch": 1.0686883210247697, - "grad_norm": 0.40238940715789795, - "learning_rate": 7.36629414979599e-06, - "loss": 0.2745, - "step": 16352 - }, - { - "epoch": 1.0687536762303118, - "grad_norm": 0.4478525221347809, - "learning_rate": 7.3659865343689844e-06, - "loss": 0.3602, - "step": 16353 - }, - { - "epoch": 1.0688190314358539, - "grad_norm": 0.4733360707759857, - "learning_rate": 7.365678907402242e-06, - "loss": 0.366, - "step": 16354 - }, - { - "epoch": 1.068884386641396, - "grad_norm": 0.49481990933418274, - "learning_rate": 7.365371268897263e-06, - "loss": 0.3781, - "step": 16355 - }, - { - "epoch": 1.068949741846938, - "grad_norm": 0.4631558060646057, - "learning_rate": 7.36506361885555e-06, - "loss": 0.3767, - "step": 16356 - }, - { - "epoch": 1.0690150970524803, - "grad_norm": 0.42552196979522705, - "learning_rate": 7.364755957278602e-06, - "loss": 0.3118, - "step": 16357 - }, - { - "epoch": 1.0690804522580224, - "grad_norm": 0.4559949040412903, - "learning_rate": 7.364448284167921e-06, - "loss": 0.3312, - "step": 16358 - }, - { - "epoch": 1.0691458074635645, - "grad_norm": 0.41191062331199646, - "learning_rate": 7.3641405995250045e-06, - "loss": 0.3148, - "step": 16359 - }, - { - "epoch": 1.0692111626691065, - "grad_norm": 0.4348178207874298, - "learning_rate": 7.363832903351358e-06, - "loss": 0.3173, - "step": 16360 - }, - { - "epoch": 1.0692765178746486, - "grad_norm": 0.4582132399082184, - "learning_rate": 7.363525195648477e-06, - "loss": 0.3132, - "step": 16361 - }, - { - "epoch": 1.069341873080191, - "grad_norm": 0.44204777479171753, - "learning_rate": 7.363217476417868e-06, - "loss": 0.3175, - "step": 16362 - }, - { - "epoch": 1.069407228285733, - "grad_norm": 0.47324949502944946, - "learning_rate": 7.3629097456610266e-06, - "loss": 0.4177, - "step": 16363 - }, - { - "epoch": 1.069472583491275, - "grad_norm": 0.43854910135269165, - "learning_rate": 7.362602003379456e-06, - "loss": 0.2915, - "step": 16364 - }, - { - "epoch": 1.0695379386968171, - "grad_norm": 0.4717777669429779, - "learning_rate": 7.362294249574656e-06, - "loss": 0.3806, - "step": 16365 - }, - { - "epoch": 1.0696032939023594, - "grad_norm": 0.4293909966945648, - "learning_rate": 7.3619864842481295e-06, - "loss": 0.3067, - "step": 16366 - }, - { - "epoch": 1.0696686491079015, - "grad_norm": 0.45948585867881775, - "learning_rate": 7.361678707401376e-06, - "loss": 0.3194, - "step": 16367 - }, - { - "epoch": 1.0697340043134436, - "grad_norm": 0.4923698902130127, - "learning_rate": 7.361370919035898e-06, - "loss": 0.3417, - "step": 16368 - }, - { - "epoch": 1.0697993595189856, - "grad_norm": 0.46501636505126953, - "learning_rate": 7.3610631191531955e-06, - "loss": 0.3676, - "step": 16369 - }, - { - "epoch": 1.0698647147245277, - "grad_norm": 0.4651261866092682, - "learning_rate": 7.360755307754771e-06, - "loss": 0.3326, - "step": 16370 - }, - { - "epoch": 1.06993006993007, - "grad_norm": 0.4757455289363861, - "learning_rate": 7.360447484842123e-06, - "loss": 0.3405, - "step": 16371 - }, - { - "epoch": 1.069995425135612, - "grad_norm": 0.4725337326526642, - "learning_rate": 7.360139650416758e-06, - "loss": 0.351, - "step": 16372 - }, - { - "epoch": 1.0700607803411541, - "grad_norm": 0.454908162355423, - "learning_rate": 7.359831804480173e-06, - "loss": 0.3071, - "step": 16373 - }, - { - "epoch": 1.0701261355466962, - "grad_norm": 0.46444931626319885, - "learning_rate": 7.3595239470338705e-06, - "loss": 0.3658, - "step": 16374 - }, - { - "epoch": 1.0701914907522385, - "grad_norm": 0.4421345293521881, - "learning_rate": 7.359216078079352e-06, - "loss": 0.3291, - "step": 16375 - }, - { - "epoch": 1.0702568459577806, - "grad_norm": 0.4262407124042511, - "learning_rate": 7.35890819761812e-06, - "loss": 0.2771, - "step": 16376 - }, - { - "epoch": 1.0703222011633227, - "grad_norm": 0.45567482709884644, - "learning_rate": 7.358600305651674e-06, - "loss": 0.3641, - "step": 16377 - }, - { - "epoch": 1.0703875563688647, - "grad_norm": 0.4575360119342804, - "learning_rate": 7.35829240218152e-06, - "loss": 0.3211, - "step": 16378 - }, - { - "epoch": 1.0704529115744068, - "grad_norm": 0.4639178514480591, - "learning_rate": 7.3579844872091535e-06, - "loss": 0.3944, - "step": 16379 - }, - { - "epoch": 1.070518266779949, - "grad_norm": 0.41772863268852234, - "learning_rate": 7.357676560736082e-06, - "loss": 0.2997, - "step": 16380 - }, - { - "epoch": 1.0705836219854912, - "grad_norm": 0.4523961544036865, - "learning_rate": 7.357368622763805e-06, - "loss": 0.3414, - "step": 16381 - }, - { - "epoch": 1.0706489771910332, - "grad_norm": 0.45985814929008484, - "learning_rate": 7.357060673293824e-06, - "loss": 0.3424, - "step": 16382 - }, - { - "epoch": 1.0707143323965753, - "grad_norm": 0.4585316777229309, - "learning_rate": 7.3567527123276425e-06, - "loss": 0.322, - "step": 16383 - }, - { - "epoch": 1.0707796876021174, - "grad_norm": 0.4948931634426117, - "learning_rate": 7.3564447398667605e-06, - "loss": 0.3905, - "step": 16384 - }, - { - "epoch": 1.0708450428076597, - "grad_norm": 0.47296464443206787, - "learning_rate": 7.356136755912682e-06, - "loss": 0.3613, - "step": 16385 - }, - { - "epoch": 1.0709103980132018, - "grad_norm": 0.4087711274623871, - "learning_rate": 7.3558287604669075e-06, - "loss": 0.2598, - "step": 16386 - }, - { - "epoch": 1.0709757532187438, - "grad_norm": 0.46858835220336914, - "learning_rate": 7.35552075353094e-06, - "loss": 0.3376, - "step": 16387 - }, - { - "epoch": 1.071041108424286, - "grad_norm": 0.47748062014579773, - "learning_rate": 7.355212735106282e-06, - "loss": 0.3537, - "step": 16388 - }, - { - "epoch": 1.0711064636298282, - "grad_norm": 0.48925772309303284, - "learning_rate": 7.354904705194436e-06, - "loss": 0.3954, - "step": 16389 - }, - { - "epoch": 1.0711718188353703, - "grad_norm": 0.44123828411102295, - "learning_rate": 7.354596663796903e-06, - "loss": 0.3451, - "step": 16390 - }, - { - "epoch": 1.0712371740409123, - "grad_norm": 0.45615312457084656, - "learning_rate": 7.354288610915187e-06, - "loss": 0.3485, - "step": 16391 - }, - { - "epoch": 1.0713025292464544, - "grad_norm": 0.4208703339099884, - "learning_rate": 7.35398054655079e-06, - "loss": 0.317, - "step": 16392 - }, - { - "epoch": 1.0713678844519965, - "grad_norm": 0.42598918080329895, - "learning_rate": 7.353672470705216e-06, - "loss": 0.3097, - "step": 16393 - }, - { - "epoch": 1.0714332396575388, - "grad_norm": 0.47139468789100647, - "learning_rate": 7.3533643833799636e-06, - "loss": 0.3549, - "step": 16394 - }, - { - "epoch": 1.0714985948630809, - "grad_norm": 0.46468374133110046, - "learning_rate": 7.353056284576537e-06, - "loss": 0.3782, - "step": 16395 - }, - { - "epoch": 1.071563950068623, - "grad_norm": 0.4394523501396179, - "learning_rate": 7.3527481742964424e-06, - "loss": 0.3223, - "step": 16396 - }, - { - "epoch": 1.071629305274165, - "grad_norm": 0.47597095370292664, - "learning_rate": 7.352440052541178e-06, - "loss": 0.4177, - "step": 16397 - }, - { - "epoch": 1.0716946604797073, - "grad_norm": 0.4225848615169525, - "learning_rate": 7.352131919312248e-06, - "loss": 0.3357, - "step": 16398 - }, - { - "epoch": 1.0717600156852494, - "grad_norm": 0.44464272260665894, - "learning_rate": 7.351823774611158e-06, - "loss": 0.3294, - "step": 16399 - }, - { - "epoch": 1.0718253708907914, - "grad_norm": 0.4471379220485687, - "learning_rate": 7.351515618439406e-06, - "loss": 0.3125, - "step": 16400 - }, - { - "epoch": 1.0718907260963335, - "grad_norm": 0.4395327866077423, - "learning_rate": 7.351207450798501e-06, - "loss": 0.2769, - "step": 16401 - }, - { - "epoch": 1.0719560813018756, - "grad_norm": 0.4541919231414795, - "learning_rate": 7.3508992716899395e-06, - "loss": 0.3563, - "step": 16402 - }, - { - "epoch": 1.0720214365074179, - "grad_norm": 0.4204372465610504, - "learning_rate": 7.35059108111523e-06, - "loss": 0.3301, - "step": 16403 - }, - { - "epoch": 1.07208679171296, - "grad_norm": 0.47538837790489197, - "learning_rate": 7.350282879075872e-06, - "loss": 0.3863, - "step": 16404 - }, - { - "epoch": 1.072152146918502, - "grad_norm": 0.4320773184299469, - "learning_rate": 7.349974665573372e-06, - "loss": 0.3314, - "step": 16405 - }, - { - "epoch": 1.072217502124044, - "grad_norm": 0.4484153985977173, - "learning_rate": 7.34966644060923e-06, - "loss": 0.3533, - "step": 16406 - }, - { - "epoch": 1.0722828573295864, - "grad_norm": 0.42012518644332886, - "learning_rate": 7.349358204184951e-06, - "loss": 0.3221, - "step": 16407 - }, - { - "epoch": 1.0723482125351285, - "grad_norm": 0.4573407769203186, - "learning_rate": 7.349049956302039e-06, - "loss": 0.3671, - "step": 16408 - }, - { - "epoch": 1.0724135677406705, - "grad_norm": 0.442060649394989, - "learning_rate": 7.348741696961995e-06, - "loss": 0.33, - "step": 16409 - }, - { - "epoch": 1.0724789229462126, - "grad_norm": 0.44182801246643066, - "learning_rate": 7.348433426166326e-06, - "loss": 0.3029, - "step": 16410 - }, - { - "epoch": 1.0725442781517547, - "grad_norm": 0.4631810486316681, - "learning_rate": 7.348125143916531e-06, - "loss": 0.3541, - "step": 16411 - }, - { - "epoch": 1.072609633357297, - "grad_norm": 0.4371936023235321, - "learning_rate": 7.347816850214118e-06, - "loss": 0.3704, - "step": 16412 - }, - { - "epoch": 1.072674988562839, - "grad_norm": 0.44719499349594116, - "learning_rate": 7.347508545060589e-06, - "loss": 0.3192, - "step": 16413 - }, - { - "epoch": 1.0727403437683811, - "grad_norm": 0.44428518414497375, - "learning_rate": 7.347200228457447e-06, - "loss": 0.323, - "step": 16414 - }, - { - "epoch": 1.0728056989739232, - "grad_norm": 0.4393537640571594, - "learning_rate": 7.346891900406197e-06, - "loss": 0.3456, - "step": 16415 - }, - { - "epoch": 1.0728710541794655, - "grad_norm": 0.4477136731147766, - "learning_rate": 7.346583560908343e-06, - "loss": 0.3341, - "step": 16416 - }, - { - "epoch": 1.0729364093850076, - "grad_norm": 0.4214542806148529, - "learning_rate": 7.346275209965386e-06, - "loss": 0.3476, - "step": 16417 - }, - { - "epoch": 1.0730017645905496, - "grad_norm": 0.46620863676071167, - "learning_rate": 7.345966847578831e-06, - "loss": 0.3528, - "step": 16418 - }, - { - "epoch": 1.0730671197960917, - "grad_norm": 0.47876277565956116, - "learning_rate": 7.345658473750186e-06, - "loss": 0.3961, - "step": 16419 - }, - { - "epoch": 1.0731324750016338, - "grad_norm": 0.47290778160095215, - "learning_rate": 7.345350088480951e-06, - "loss": 0.3804, - "step": 16420 - }, - { - "epoch": 1.073197830207176, - "grad_norm": 0.44703933596611023, - "learning_rate": 7.34504169177263e-06, - "loss": 0.3388, - "step": 16421 - }, - { - "epoch": 1.0732631854127181, - "grad_norm": 0.417653352022171, - "learning_rate": 7.344733283626728e-06, - "loss": 0.3017, - "step": 16422 - }, - { - "epoch": 1.0733285406182602, - "grad_norm": 0.48385077714920044, - "learning_rate": 7.344424864044751e-06, - "loss": 0.3913, - "step": 16423 - }, - { - "epoch": 1.0733938958238023, - "grad_norm": 0.4289487600326538, - "learning_rate": 7.3441164330282015e-06, - "loss": 0.3273, - "step": 16424 - }, - { - "epoch": 1.0734592510293446, - "grad_norm": 0.4383748769760132, - "learning_rate": 7.343807990578584e-06, - "loss": 0.3162, - "step": 16425 - }, - { - "epoch": 1.0735246062348867, - "grad_norm": 0.46777331829071045, - "learning_rate": 7.343499536697403e-06, - "loss": 0.4019, - "step": 16426 - }, - { - "epoch": 1.0735899614404287, - "grad_norm": 0.470061331987381, - "learning_rate": 7.343191071386162e-06, - "loss": 0.3817, - "step": 16427 - }, - { - "epoch": 1.0736553166459708, - "grad_norm": 0.4281580150127411, - "learning_rate": 7.342882594646368e-06, - "loss": 0.3224, - "step": 16428 - }, - { - "epoch": 1.0737206718515129, - "grad_norm": 0.5027729868888855, - "learning_rate": 7.342574106479522e-06, - "loss": 0.4134, - "step": 16429 - }, - { - "epoch": 1.0737860270570552, - "grad_norm": 0.4550667703151703, - "learning_rate": 7.342265606887132e-06, - "loss": 0.388, - "step": 16430 - }, - { - "epoch": 1.0738513822625972, - "grad_norm": 0.47943225502967834, - "learning_rate": 7.3419570958707e-06, - "loss": 0.3665, - "step": 16431 - }, - { - "epoch": 1.0739167374681393, - "grad_norm": 0.4488272964954376, - "learning_rate": 7.341648573431734e-06, - "loss": 0.312, - "step": 16432 - }, - { - "epoch": 1.0739820926736814, - "grad_norm": 0.4339866638183594, - "learning_rate": 7.341340039571734e-06, - "loss": 0.2891, - "step": 16433 - }, - { - "epoch": 1.0740474478792237, - "grad_norm": 0.4570108652114868, - "learning_rate": 7.341031494292209e-06, - "loss": 0.3712, - "step": 16434 - }, - { - "epoch": 1.0741128030847658, - "grad_norm": 0.4831475019454956, - "learning_rate": 7.340722937594663e-06, - "loss": 0.3824, - "step": 16435 - }, - { - "epoch": 1.0741781582903078, - "grad_norm": 0.438126802444458, - "learning_rate": 7.3404143694806e-06, - "loss": 0.3262, - "step": 16436 - }, - { - "epoch": 1.07424351349585, - "grad_norm": 0.44917792081832886, - "learning_rate": 7.340105789951524e-06, - "loss": 0.3042, - "step": 16437 - }, - { - "epoch": 1.074308868701392, - "grad_norm": 0.43990835547447205, - "learning_rate": 7.339797199008942e-06, - "loss": 0.3545, - "step": 16438 - }, - { - "epoch": 1.0743742239069343, - "grad_norm": 0.43734100461006165, - "learning_rate": 7.33948859665436e-06, - "loss": 0.3273, - "step": 16439 - }, - { - "epoch": 1.0744395791124763, - "grad_norm": 0.4544733464717865, - "learning_rate": 7.33917998288928e-06, - "loss": 0.3407, - "step": 16440 - }, - { - "epoch": 1.0745049343180184, - "grad_norm": 0.45032691955566406, - "learning_rate": 7.3388713577152095e-06, - "loss": 0.3571, - "step": 16441 - }, - { - "epoch": 1.0745702895235605, - "grad_norm": 0.439270555973053, - "learning_rate": 7.338562721133652e-06, - "loss": 0.3322, - "step": 16442 - }, - { - "epoch": 1.0746356447291028, - "grad_norm": 0.44508621096611023, - "learning_rate": 7.338254073146115e-06, - "loss": 0.3273, - "step": 16443 - }, - { - "epoch": 1.0747009999346449, - "grad_norm": 0.4212568700313568, - "learning_rate": 7.3379454137541015e-06, - "loss": 0.3178, - "step": 16444 - }, - { - "epoch": 1.074766355140187, - "grad_norm": 0.46879836916923523, - "learning_rate": 7.33763674295912e-06, - "loss": 0.3564, - "step": 16445 - }, - { - "epoch": 1.074831710345729, - "grad_norm": 0.4160691201686859, - "learning_rate": 7.337328060762674e-06, - "loss": 0.3006, - "step": 16446 - }, - { - "epoch": 1.074897065551271, - "grad_norm": 0.46859854459762573, - "learning_rate": 7.337019367166269e-06, - "loss": 0.3387, - "step": 16447 - }, - { - "epoch": 1.0749624207568134, - "grad_norm": 0.45349177718162537, - "learning_rate": 7.336710662171411e-06, - "loss": 0.3477, - "step": 16448 - }, - { - "epoch": 1.0750277759623554, - "grad_norm": 0.4608268737792969, - "learning_rate": 7.336401945779605e-06, - "loss": 0.3461, - "step": 16449 - }, - { - "epoch": 1.0750931311678975, - "grad_norm": 0.4648530185222626, - "learning_rate": 7.336093217992359e-06, - "loss": 0.3703, - "step": 16450 - }, - { - "epoch": 1.0751584863734396, - "grad_norm": 0.4413801431655884, - "learning_rate": 7.335784478811175e-06, - "loss": 0.3433, - "step": 16451 - }, - { - "epoch": 1.0752238415789819, - "grad_norm": 0.44498562812805176, - "learning_rate": 7.335475728237562e-06, - "loss": 0.328, - "step": 16452 - }, - { - "epoch": 1.075289196784524, - "grad_norm": 0.4815255403518677, - "learning_rate": 7.3351669662730245e-06, - "loss": 0.3981, - "step": 16453 - }, - { - "epoch": 1.075354551990066, - "grad_norm": 0.44285887479782104, - "learning_rate": 7.334858192919069e-06, - "loss": 0.3424, - "step": 16454 - }, - { - "epoch": 1.075419907195608, - "grad_norm": 0.4815467596054077, - "learning_rate": 7.3345494081772005e-06, - "loss": 0.3668, - "step": 16455 - }, - { - "epoch": 1.0754852624011502, - "grad_norm": 0.429591566324234, - "learning_rate": 7.3342406120489275e-06, - "loss": 0.3143, - "step": 16456 - }, - { - "epoch": 1.0755506176066925, - "grad_norm": 0.488942414522171, - "learning_rate": 7.333931804535753e-06, - "loss": 0.3861, - "step": 16457 - }, - { - "epoch": 1.0756159728122345, - "grad_norm": 0.5187869071960449, - "learning_rate": 7.333622985639184e-06, - "loss": 0.3928, - "step": 16458 - }, - { - "epoch": 1.0756813280177766, - "grad_norm": 0.46916940808296204, - "learning_rate": 7.333314155360729e-06, - "loss": 0.2882, - "step": 16459 - }, - { - "epoch": 1.0757466832233187, - "grad_norm": 0.46073561906814575, - "learning_rate": 7.3330053137018905e-06, - "loss": 0.3545, - "step": 16460 - }, - { - "epoch": 1.075812038428861, - "grad_norm": 0.40526437759399414, - "learning_rate": 7.332696460664178e-06, - "loss": 0.2692, - "step": 16461 - }, - { - "epoch": 1.075877393634403, - "grad_norm": 0.4188133776187897, - "learning_rate": 7.332387596249096e-06, - "loss": 0.305, - "step": 16462 - }, - { - "epoch": 1.0759427488399451, - "grad_norm": 0.4839152991771698, - "learning_rate": 7.332078720458154e-06, - "loss": 0.3972, - "step": 16463 - }, - { - "epoch": 1.0760081040454872, - "grad_norm": 0.4279783368110657, - "learning_rate": 7.331769833292853e-06, - "loss": 0.3122, - "step": 16464 - }, - { - "epoch": 1.0760734592510293, - "grad_norm": 0.41427361965179443, - "learning_rate": 7.331460934754704e-06, - "loss": 0.3015, - "step": 16465 - }, - { - "epoch": 1.0761388144565716, - "grad_norm": 0.4339550733566284, - "learning_rate": 7.331152024845214e-06, - "loss": 0.3076, - "step": 16466 - }, - { - "epoch": 1.0762041696621136, - "grad_norm": 0.41602861881256104, - "learning_rate": 7.330843103565885e-06, - "loss": 0.2882, - "step": 16467 - }, - { - "epoch": 1.0762695248676557, - "grad_norm": 0.43938368558883667, - "learning_rate": 7.330534170918229e-06, - "loss": 0.3717, - "step": 16468 - }, - { - "epoch": 1.0763348800731978, - "grad_norm": 0.46085667610168457, - "learning_rate": 7.330225226903749e-06, - "loss": 0.3675, - "step": 16469 - }, - { - "epoch": 1.07640023527874, - "grad_norm": 0.46619588136672974, - "learning_rate": 7.3299162715239536e-06, - "loss": 0.3642, - "step": 16470 - }, - { - "epoch": 1.0764655904842821, - "grad_norm": 0.4606631398200989, - "learning_rate": 7.32960730478035e-06, - "loss": 0.3556, - "step": 16471 - }, - { - "epoch": 1.0765309456898242, - "grad_norm": 0.4193075895309448, - "learning_rate": 7.329298326674444e-06, - "loss": 0.3133, - "step": 16472 - }, - { - "epoch": 1.0765963008953663, - "grad_norm": 0.4827231168746948, - "learning_rate": 7.3289893372077435e-06, - "loss": 0.3762, - "step": 16473 - }, - { - "epoch": 1.0766616561009084, - "grad_norm": 0.4541878402233124, - "learning_rate": 7.328680336381755e-06, - "loss": 0.3667, - "step": 16474 - }, - { - "epoch": 1.0767270113064507, - "grad_norm": 0.45358648896217346, - "learning_rate": 7.328371324197986e-06, - "loss": 0.3467, - "step": 16475 - }, - { - "epoch": 1.0767923665119927, - "grad_norm": 0.46547931432724, - "learning_rate": 7.328062300657943e-06, - "loss": 0.3499, - "step": 16476 - }, - { - "epoch": 1.0768577217175348, - "grad_norm": 0.45730358362197876, - "learning_rate": 7.327753265763136e-06, - "loss": 0.3589, - "step": 16477 - }, - { - "epoch": 1.0769230769230769, - "grad_norm": 0.47796475887298584, - "learning_rate": 7.327444219515066e-06, - "loss": 0.3846, - "step": 16478 - }, - { - "epoch": 1.076988432128619, - "grad_norm": 0.4266526699066162, - "learning_rate": 7.327135161915247e-06, - "loss": 0.3193, - "step": 16479 - }, - { - "epoch": 1.0770537873341612, - "grad_norm": 0.4600265920162201, - "learning_rate": 7.326826092965182e-06, - "loss": 0.3449, - "step": 16480 - }, - { - "epoch": 1.0771191425397033, - "grad_norm": 0.4514351487159729, - "learning_rate": 7.326517012666381e-06, - "loss": 0.3345, - "step": 16481 - }, - { - "epoch": 1.0771844977452454, - "grad_norm": 0.42739933729171753, - "learning_rate": 7.32620792102035e-06, - "loss": 0.3231, - "step": 16482 - }, - { - "epoch": 1.0772498529507875, - "grad_norm": 0.4579569697380066, - "learning_rate": 7.325898818028597e-06, - "loss": 0.3518, - "step": 16483 - }, - { - "epoch": 1.0773152081563298, - "grad_norm": 0.4700823128223419, - "learning_rate": 7.32558970369263e-06, - "loss": 0.3788, - "step": 16484 - }, - { - "epoch": 1.0773805633618718, - "grad_norm": 0.48954805731773376, - "learning_rate": 7.325280578013955e-06, - "loss": 0.4094, - "step": 16485 - }, - { - "epoch": 1.077445918567414, - "grad_norm": 0.4473172128200531, - "learning_rate": 7.3249714409940846e-06, - "loss": 0.3053, - "step": 16486 - }, - { - "epoch": 1.077511273772956, - "grad_norm": 0.4740552008152008, - "learning_rate": 7.32466229263452e-06, - "loss": 0.3613, - "step": 16487 - }, - { - "epoch": 1.077576628978498, - "grad_norm": 0.466447114944458, - "learning_rate": 7.324353132936773e-06, - "loss": 0.3444, - "step": 16488 - }, - { - "epoch": 1.0776419841840403, - "grad_norm": 0.4432372450828552, - "learning_rate": 7.3240439619023495e-06, - "loss": 0.3414, - "step": 16489 - }, - { - "epoch": 1.0777073393895824, - "grad_norm": 0.4533449113368988, - "learning_rate": 7.323734779532761e-06, - "loss": 0.3494, - "step": 16490 - }, - { - "epoch": 1.0777726945951245, - "grad_norm": 0.4982556998729706, - "learning_rate": 7.32342558582951e-06, - "loss": 0.3855, - "step": 16491 - }, - { - "epoch": 1.0778380498006666, - "grad_norm": 0.47109490633010864, - "learning_rate": 7.3231163807941094e-06, - "loss": 0.3666, - "step": 16492 - }, - { - "epoch": 1.0779034050062088, - "grad_norm": 0.4856020510196686, - "learning_rate": 7.322807164428065e-06, - "loss": 0.4174, - "step": 16493 - }, - { - "epoch": 1.077968760211751, - "grad_norm": 0.4741486608982086, - "learning_rate": 7.322497936732885e-06, - "loss": 0.3674, - "step": 16494 - }, - { - "epoch": 1.078034115417293, - "grad_norm": 0.5163902044296265, - "learning_rate": 7.322188697710079e-06, - "loss": 0.4159, - "step": 16495 - }, - { - "epoch": 1.078099470622835, - "grad_norm": 0.4352622628211975, - "learning_rate": 7.3218794473611535e-06, - "loss": 0.3349, - "step": 16496 - }, - { - "epoch": 1.0781648258283771, - "grad_norm": 0.470027893781662, - "learning_rate": 7.321570185687616e-06, - "loss": 0.3793, - "step": 16497 - }, - { - "epoch": 1.0782301810339194, - "grad_norm": 0.4309051036834717, - "learning_rate": 7.321260912690979e-06, - "loss": 0.2972, - "step": 16498 - }, - { - "epoch": 1.0782955362394615, - "grad_norm": 0.42376551032066345, - "learning_rate": 7.320951628372748e-06, - "loss": 0.3162, - "step": 16499 - }, - { - "epoch": 1.0783608914450036, - "grad_norm": 0.4436875283718109, - "learning_rate": 7.3206423327344314e-06, - "loss": 0.3284, - "step": 16500 - }, - { - "epoch": 1.0784262466505457, - "grad_norm": 0.44389382004737854, - "learning_rate": 7.320333025777537e-06, - "loss": 0.3438, - "step": 16501 - }, - { - "epoch": 1.0784916018560877, - "grad_norm": 0.42851725220680237, - "learning_rate": 7.320023707503576e-06, - "loss": 0.3202, - "step": 16502 - }, - { - "epoch": 1.07855695706163, - "grad_norm": 0.4838922619819641, - "learning_rate": 7.319714377914056e-06, - "loss": 0.3562, - "step": 16503 - }, - { - "epoch": 1.078622312267172, - "grad_norm": 0.47055715322494507, - "learning_rate": 7.319405037010484e-06, - "loss": 0.3466, - "step": 16504 - }, - { - "epoch": 1.0786876674727142, - "grad_norm": 0.4585447311401367, - "learning_rate": 7.31909568479437e-06, - "loss": 0.3712, - "step": 16505 - }, - { - "epoch": 1.0787530226782562, - "grad_norm": 0.4848266839981079, - "learning_rate": 7.318786321267224e-06, - "loss": 0.3992, - "step": 16506 - }, - { - "epoch": 1.0788183778837985, - "grad_norm": 0.5263270139694214, - "learning_rate": 7.318476946430553e-06, - "loss": 0.4139, - "step": 16507 - }, - { - "epoch": 1.0788837330893406, - "grad_norm": 0.42925411462783813, - "learning_rate": 7.318167560285868e-06, - "loss": 0.294, - "step": 16508 - }, - { - "epoch": 1.0789490882948827, - "grad_norm": 0.469771146774292, - "learning_rate": 7.317858162834674e-06, - "loss": 0.3535, - "step": 16509 - }, - { - "epoch": 1.0790144435004247, - "grad_norm": 0.4327908456325531, - "learning_rate": 7.317548754078486e-06, - "loss": 0.3095, - "step": 16510 - }, - { - "epoch": 1.0790797987059668, - "grad_norm": 0.4393147826194763, - "learning_rate": 7.317239334018809e-06, - "loss": 0.3269, - "step": 16511 - }, - { - "epoch": 1.0791451539115091, - "grad_norm": 0.41900089383125305, - "learning_rate": 7.316929902657152e-06, - "loss": 0.2763, - "step": 16512 - }, - { - "epoch": 1.0792105091170512, - "grad_norm": 0.412723183631897, - "learning_rate": 7.316620459995027e-06, - "loss": 0.2744, - "step": 16513 - }, - { - "epoch": 1.0792758643225933, - "grad_norm": 0.4568878412246704, - "learning_rate": 7.316311006033938e-06, - "loss": 0.3509, - "step": 16514 - }, - { - "epoch": 1.0793412195281353, - "grad_norm": 0.4407266080379486, - "learning_rate": 7.316001540775401e-06, - "loss": 0.3305, - "step": 16515 - }, - { - "epoch": 1.0794065747336776, - "grad_norm": 0.4252583086490631, - "learning_rate": 7.315692064220921e-06, - "loss": 0.3049, - "step": 16516 - }, - { - "epoch": 1.0794719299392197, - "grad_norm": 0.5093232989311218, - "learning_rate": 7.315382576372009e-06, - "loss": 0.3745, - "step": 16517 - }, - { - "epoch": 1.0795372851447618, - "grad_norm": 0.43224215507507324, - "learning_rate": 7.315073077230173e-06, - "loss": 0.3075, - "step": 16518 - }, - { - "epoch": 1.0796026403503038, - "grad_norm": 0.4391690194606781, - "learning_rate": 7.314763566796925e-06, - "loss": 0.3346, - "step": 16519 - }, - { - "epoch": 1.079667995555846, - "grad_norm": 0.46438995003700256, - "learning_rate": 7.314454045073772e-06, - "loss": 0.3624, - "step": 16520 - }, - { - "epoch": 1.0797333507613882, - "grad_norm": 0.4262702763080597, - "learning_rate": 7.314144512062225e-06, - "loss": 0.3174, - "step": 16521 - }, - { - "epoch": 1.0797987059669303, - "grad_norm": 0.4562312066555023, - "learning_rate": 7.313834967763795e-06, - "loss": 0.3395, - "step": 16522 - }, - { - "epoch": 1.0798640611724724, - "grad_norm": 0.4660331606864929, - "learning_rate": 7.313525412179989e-06, - "loss": 0.3577, - "step": 16523 - }, - { - "epoch": 1.0799294163780144, - "grad_norm": 0.4537416100502014, - "learning_rate": 7.313215845312318e-06, - "loss": 0.3696, - "step": 16524 - }, - { - "epoch": 1.0799947715835567, - "grad_norm": 0.4765056371688843, - "learning_rate": 7.312906267162292e-06, - "loss": 0.358, - "step": 16525 - }, - { - "epoch": 1.0800601267890988, - "grad_norm": 0.403167724609375, - "learning_rate": 7.31259667773142e-06, - "loss": 0.256, - "step": 16526 - }, - { - "epoch": 1.0801254819946409, - "grad_norm": 0.4833615720272064, - "learning_rate": 7.312287077021214e-06, - "loss": 0.3468, - "step": 16527 - }, - { - "epoch": 1.080190837200183, - "grad_norm": 0.4581005871295929, - "learning_rate": 7.311977465033183e-06, - "loss": 0.3421, - "step": 16528 - }, - { - "epoch": 1.080256192405725, - "grad_norm": 0.4372236430644989, - "learning_rate": 7.311667841768837e-06, - "loss": 0.3001, - "step": 16529 - }, - { - "epoch": 1.0803215476112673, - "grad_norm": 0.4552151560783386, - "learning_rate": 7.311358207229685e-06, - "loss": 0.3439, - "step": 16530 - }, - { - "epoch": 1.0803869028168094, - "grad_norm": 0.40541872382164, - "learning_rate": 7.31104856141724e-06, - "loss": 0.295, - "step": 16531 - }, - { - "epoch": 1.0804522580223515, - "grad_norm": 0.4337131381034851, - "learning_rate": 7.310738904333009e-06, - "loss": 0.3135, - "step": 16532 - }, - { - "epoch": 1.0805176132278935, - "grad_norm": 0.42380663752555847, - "learning_rate": 7.310429235978504e-06, - "loss": 0.306, - "step": 16533 - }, - { - "epoch": 1.0805829684334358, - "grad_norm": 0.4487023651599884, - "learning_rate": 7.310119556355235e-06, - "loss": 0.342, - "step": 16534 - }, - { - "epoch": 1.080648323638978, - "grad_norm": 0.4362575113773346, - "learning_rate": 7.309809865464714e-06, - "loss": 0.3431, - "step": 16535 - }, - { - "epoch": 1.08071367884452, - "grad_norm": 0.4611964523792267, - "learning_rate": 7.3095001633084495e-06, - "loss": 0.3738, - "step": 16536 - }, - { - "epoch": 1.080779034050062, - "grad_norm": 0.43904024362564087, - "learning_rate": 7.309190449887953e-06, - "loss": 0.3321, - "step": 16537 - }, - { - "epoch": 1.080844389255604, - "grad_norm": 0.4666799306869507, - "learning_rate": 7.3088807252047345e-06, - "loss": 0.3767, - "step": 16538 - }, - { - "epoch": 1.0809097444611464, - "grad_norm": 0.43932658433914185, - "learning_rate": 7.308570989260304e-06, - "loss": 0.3191, - "step": 16539 - }, - { - "epoch": 1.0809750996666885, - "grad_norm": 0.469312846660614, - "learning_rate": 7.308261242056174e-06, - "loss": 0.3545, - "step": 16540 - }, - { - "epoch": 1.0810404548722305, - "grad_norm": 0.4795440137386322, - "learning_rate": 7.3079514835938536e-06, - "loss": 0.3001, - "step": 16541 - }, - { - "epoch": 1.0811058100777726, - "grad_norm": 0.41839662194252014, - "learning_rate": 7.307641713874855e-06, - "loss": 0.3012, - "step": 16542 - }, - { - "epoch": 1.081171165283315, - "grad_norm": 0.4089526832103729, - "learning_rate": 7.307331932900688e-06, - "loss": 0.3355, - "step": 16543 - }, - { - "epoch": 1.081236520488857, - "grad_norm": 0.4382639229297638, - "learning_rate": 7.307022140672863e-06, - "loss": 0.3158, - "step": 16544 - }, - { - "epoch": 1.081301875694399, - "grad_norm": 0.49214106798171997, - "learning_rate": 7.3067123371928915e-06, - "loss": 0.3886, - "step": 16545 - }, - { - "epoch": 1.0813672308999411, - "grad_norm": 0.47231751680374146, - "learning_rate": 7.306402522462286e-06, - "loss": 0.3779, - "step": 16546 - }, - { - "epoch": 1.0814325861054832, - "grad_norm": 0.4860769510269165, - "learning_rate": 7.306092696482556e-06, - "loss": 0.3672, - "step": 16547 - }, - { - "epoch": 1.0814979413110255, - "grad_norm": 0.49187856912612915, - "learning_rate": 7.305782859255213e-06, - "loss": 0.3827, - "step": 16548 - }, - { - "epoch": 1.0815632965165676, - "grad_norm": 0.43406373262405396, - "learning_rate": 7.305473010781767e-06, - "loss": 0.321, - "step": 16549 - }, - { - "epoch": 1.0816286517221096, - "grad_norm": 0.4567713439464569, - "learning_rate": 7.305163151063733e-06, - "loss": 0.3867, - "step": 16550 - }, - { - "epoch": 1.0816940069276517, - "grad_norm": 0.43306538462638855, - "learning_rate": 7.304853280102618e-06, - "loss": 0.3175, - "step": 16551 - }, - { - "epoch": 1.081759362133194, - "grad_norm": 0.4578908085823059, - "learning_rate": 7.304543397899936e-06, - "loss": 0.3499, - "step": 16552 - }, - { - "epoch": 1.081824717338736, - "grad_norm": 0.4774589538574219, - "learning_rate": 7.304233504457197e-06, - "loss": 0.3686, - "step": 16553 - }, - { - "epoch": 1.0818900725442782, - "grad_norm": 0.46102818846702576, - "learning_rate": 7.303923599775913e-06, - "loss": 0.32, - "step": 16554 - }, - { - "epoch": 1.0819554277498202, - "grad_norm": 0.4556474983692169, - "learning_rate": 7.303613683857596e-06, - "loss": 0.3604, - "step": 16555 - }, - { - "epoch": 1.0820207829553623, - "grad_norm": 0.4312027394771576, - "learning_rate": 7.3033037567037545e-06, - "loss": 0.3155, - "step": 16556 - }, - { - "epoch": 1.0820861381609046, - "grad_norm": 0.4546574354171753, - "learning_rate": 7.302993818315906e-06, - "loss": 0.3319, - "step": 16557 - }, - { - "epoch": 1.0821514933664467, - "grad_norm": 0.43697255849838257, - "learning_rate": 7.302683868695557e-06, - "loss": 0.3518, - "step": 16558 - }, - { - "epoch": 1.0822168485719887, - "grad_norm": 0.4205278754234314, - "learning_rate": 7.30237390784422e-06, - "loss": 0.2999, - "step": 16559 - }, - { - "epoch": 1.0822822037775308, - "grad_norm": 0.4171440303325653, - "learning_rate": 7.3020639357634105e-06, - "loss": 0.3081, - "step": 16560 - }, - { - "epoch": 1.082347558983073, - "grad_norm": 0.47382208704948425, - "learning_rate": 7.301753952454636e-06, - "loss": 0.3629, - "step": 16561 - }, - { - "epoch": 1.0824129141886152, - "grad_norm": 0.46627259254455566, - "learning_rate": 7.301443957919412e-06, - "loss": 0.3904, - "step": 16562 - }, - { - "epoch": 1.0824782693941573, - "grad_norm": 0.45141178369522095, - "learning_rate": 7.301133952159246e-06, - "loss": 0.3585, - "step": 16563 - }, - { - "epoch": 1.0825436245996993, - "grad_norm": 0.4470028877258301, - "learning_rate": 7.300823935175654e-06, - "loss": 0.329, - "step": 16564 - }, - { - "epoch": 1.0826089798052414, - "grad_norm": 0.4529719352722168, - "learning_rate": 7.300513906970146e-06, - "loss": 0.3575, - "step": 16565 - }, - { - "epoch": 1.0826743350107837, - "grad_norm": 0.46421998739242554, - "learning_rate": 7.3002038675442345e-06, - "loss": 0.3287, - "step": 16566 - }, - { - "epoch": 1.0827396902163258, - "grad_norm": 0.4930346608161926, - "learning_rate": 7.299893816899432e-06, - "loss": 0.3565, - "step": 16567 - }, - { - "epoch": 1.0828050454218678, - "grad_norm": 0.4851548969745636, - "learning_rate": 7.299583755037251e-06, - "loss": 0.3723, - "step": 16568 - }, - { - "epoch": 1.08287040062741, - "grad_norm": 0.44411322474479675, - "learning_rate": 7.299273681959202e-06, - "loss": 0.3256, - "step": 16569 - }, - { - "epoch": 1.0829357558329522, - "grad_norm": 0.4573776125907898, - "learning_rate": 7.2989635976668e-06, - "loss": 0.3408, - "step": 16570 - }, - { - "epoch": 1.0830011110384943, - "grad_norm": 0.4168480634689331, - "learning_rate": 7.2986535021615565e-06, - "loss": 0.3042, - "step": 16571 - }, - { - "epoch": 1.0830664662440364, - "grad_norm": 0.4562520682811737, - "learning_rate": 7.298343395444982e-06, - "loss": 0.3362, - "step": 16572 - }, - { - "epoch": 1.0831318214495784, - "grad_norm": 0.48239827156066895, - "learning_rate": 7.298033277518592e-06, - "loss": 0.3679, - "step": 16573 - }, - { - "epoch": 1.0831971766551205, - "grad_norm": 0.44063931703567505, - "learning_rate": 7.2977231483838975e-06, - "loss": 0.3185, - "step": 16574 - }, - { - "epoch": 1.0832625318606628, - "grad_norm": 0.47119778394699097, - "learning_rate": 7.297413008042411e-06, - "loss": 0.3706, - "step": 16575 - }, - { - "epoch": 1.0833278870662049, - "grad_norm": 0.47423574328422546, - "learning_rate": 7.297102856495644e-06, - "loss": 0.3848, - "step": 16576 - }, - { - "epoch": 1.083393242271747, - "grad_norm": 0.4561114013195038, - "learning_rate": 7.296792693745113e-06, - "loss": 0.3536, - "step": 16577 - }, - { - "epoch": 1.083458597477289, - "grad_norm": 0.4443838894367218, - "learning_rate": 7.2964825197923275e-06, - "loss": 0.3422, - "step": 16578 - }, - { - "epoch": 1.0835239526828313, - "grad_norm": 0.5358433127403259, - "learning_rate": 7.2961723346387996e-06, - "loss": 0.3857, - "step": 16579 - }, - { - "epoch": 1.0835893078883734, - "grad_norm": 0.46897754073143005, - "learning_rate": 7.295862138286045e-06, - "loss": 0.3495, - "step": 16580 - }, - { - "epoch": 1.0836546630939154, - "grad_norm": 0.4311322569847107, - "learning_rate": 7.295551930735575e-06, - "loss": 0.3175, - "step": 16581 - }, - { - "epoch": 1.0837200182994575, - "grad_norm": 0.4757993221282959, - "learning_rate": 7.295241711988905e-06, - "loss": 0.3386, - "step": 16582 - }, - { - "epoch": 1.0837853735049996, - "grad_norm": 0.493779718875885, - "learning_rate": 7.294931482047544e-06, - "loss": 0.3759, - "step": 16583 - }, - { - "epoch": 1.0838507287105419, - "grad_norm": 0.459670752286911, - "learning_rate": 7.294621240913007e-06, - "loss": 0.3342, - "step": 16584 - }, - { - "epoch": 1.083916083916084, - "grad_norm": 0.405508816242218, - "learning_rate": 7.294310988586807e-06, - "loss": 0.2745, - "step": 16585 - }, - { - "epoch": 1.083981439121626, - "grad_norm": 0.4545801877975464, - "learning_rate": 7.294000725070458e-06, - "loss": 0.3489, - "step": 16586 - }, - { - "epoch": 1.084046794327168, - "grad_norm": 0.48119500279426575, - "learning_rate": 7.2936904503654736e-06, - "loss": 0.3517, - "step": 16587 - }, - { - "epoch": 1.0841121495327102, - "grad_norm": 0.4570417106151581, - "learning_rate": 7.293380164473365e-06, - "loss": 0.3557, - "step": 16588 - }, - { - "epoch": 1.0841775047382525, - "grad_norm": 0.47389963269233704, - "learning_rate": 7.293069867395648e-06, - "loss": 0.3664, - "step": 16589 - }, - { - "epoch": 1.0842428599437945, - "grad_norm": 0.46701309084892273, - "learning_rate": 7.292759559133834e-06, - "loss": 0.3657, - "step": 16590 - }, - { - "epoch": 1.0843082151493366, - "grad_norm": 0.45863738656044006, - "learning_rate": 7.2924492396894385e-06, - "loss": 0.3456, - "step": 16591 - }, - { - "epoch": 1.0843735703548787, - "grad_norm": 0.4857933223247528, - "learning_rate": 7.292138909063972e-06, - "loss": 0.3812, - "step": 16592 - }, - { - "epoch": 1.084438925560421, - "grad_norm": 0.45012250542640686, - "learning_rate": 7.291828567258952e-06, - "loss": 0.3387, - "step": 16593 - }, - { - "epoch": 1.084504280765963, - "grad_norm": 0.42833054065704346, - "learning_rate": 7.291518214275888e-06, - "loss": 0.3393, - "step": 16594 - }, - { - "epoch": 1.0845696359715051, - "grad_norm": 0.4619414806365967, - "learning_rate": 7.291207850116298e-06, - "loss": 0.3357, - "step": 16595 - }, - { - "epoch": 1.0846349911770472, - "grad_norm": 0.45225340127944946, - "learning_rate": 7.290897474781692e-06, - "loss": 0.3457, - "step": 16596 - }, - { - "epoch": 1.0847003463825893, - "grad_norm": 0.4553976356983185, - "learning_rate": 7.290587088273586e-06, - "loss": 0.3762, - "step": 16597 - }, - { - "epoch": 1.0847657015881316, - "grad_norm": 0.4105634093284607, - "learning_rate": 7.290276690593493e-06, - "loss": 0.3153, - "step": 16598 - }, - { - "epoch": 1.0848310567936736, - "grad_norm": 0.4671085476875305, - "learning_rate": 7.289966281742926e-06, - "loss": 0.3669, - "step": 16599 - }, - { - "epoch": 1.0848964119992157, - "grad_norm": 0.48248225450515747, - "learning_rate": 7.2896558617234014e-06, - "loss": 0.3773, - "step": 16600 - }, - { - "epoch": 1.0849617672047578, - "grad_norm": 0.42600542306900024, - "learning_rate": 7.2893454305364316e-06, - "loss": 0.3315, - "step": 16601 - }, - { - "epoch": 1.0850271224103, - "grad_norm": 0.42937231063842773, - "learning_rate": 7.289034988183532e-06, - "loss": 0.3507, - "step": 16602 - }, - { - "epoch": 1.0850924776158422, - "grad_norm": 0.441170871257782, - "learning_rate": 7.288724534666215e-06, - "loss": 0.3409, - "step": 16603 - }, - { - "epoch": 1.0851578328213842, - "grad_norm": 0.47401493787765503, - "learning_rate": 7.288414069985996e-06, - "loss": 0.3792, - "step": 16604 - }, - { - "epoch": 1.0852231880269263, - "grad_norm": 0.4646795690059662, - "learning_rate": 7.288103594144389e-06, - "loss": 0.3706, - "step": 16605 - }, - { - "epoch": 1.0852885432324684, - "grad_norm": 0.412864625453949, - "learning_rate": 7.287793107142908e-06, - "loss": 0.2999, - "step": 16606 - }, - { - "epoch": 1.0853538984380107, - "grad_norm": 0.4567086696624756, - "learning_rate": 7.287482608983066e-06, - "loss": 0.3645, - "step": 16607 - }, - { - "epoch": 1.0854192536435527, - "grad_norm": 0.4699552655220032, - "learning_rate": 7.2871720996663794e-06, - "loss": 0.3602, - "step": 16608 - }, - { - "epoch": 1.0854846088490948, - "grad_norm": 0.4367991089820862, - "learning_rate": 7.286861579194363e-06, - "loss": 0.321, - "step": 16609 - }, - { - "epoch": 1.0855499640546369, - "grad_norm": 0.4446961581707001, - "learning_rate": 7.28655104756853e-06, - "loss": 0.3254, - "step": 16610 - }, - { - "epoch": 1.085615319260179, - "grad_norm": 0.43319615721702576, - "learning_rate": 7.286240504790396e-06, - "loss": 0.3424, - "step": 16611 - }, - { - "epoch": 1.0856806744657213, - "grad_norm": 0.5113908648490906, - "learning_rate": 7.285929950861474e-06, - "loss": 0.3604, - "step": 16612 - }, - { - "epoch": 1.0857460296712633, - "grad_norm": 0.4660574793815613, - "learning_rate": 7.28561938578328e-06, - "loss": 0.3717, - "step": 16613 - }, - { - "epoch": 1.0858113848768054, - "grad_norm": 0.47809529304504395, - "learning_rate": 7.285308809557328e-06, - "loss": 0.3558, - "step": 16614 - }, - { - "epoch": 1.0858767400823475, - "grad_norm": 0.42160192131996155, - "learning_rate": 7.284998222185134e-06, - "loss": 0.332, - "step": 16615 - }, - { - "epoch": 1.0859420952878898, - "grad_norm": 0.7761409282684326, - "learning_rate": 7.284687623668212e-06, - "loss": 0.2734, - "step": 16616 - }, - { - "epoch": 1.0860074504934318, - "grad_norm": 0.4404222071170807, - "learning_rate": 7.284377014008077e-06, - "loss": 0.2978, - "step": 16617 - }, - { - "epoch": 1.086072805698974, - "grad_norm": 0.4651528000831604, - "learning_rate": 7.284066393206244e-06, - "loss": 0.3765, - "step": 16618 - }, - { - "epoch": 1.086138160904516, - "grad_norm": 0.44234445691108704, - "learning_rate": 7.283755761264226e-06, - "loss": 0.3143, - "step": 16619 - }, - { - "epoch": 1.086203516110058, - "grad_norm": 0.45051178336143494, - "learning_rate": 7.283445118183543e-06, - "loss": 0.3292, - "step": 16620 - }, - { - "epoch": 1.0862688713156003, - "grad_norm": 0.44953030347824097, - "learning_rate": 7.283134463965706e-06, - "loss": 0.3201, - "step": 16621 - }, - { - "epoch": 1.0863342265211424, - "grad_norm": 0.4718022048473358, - "learning_rate": 7.2828237986122304e-06, - "loss": 0.3647, - "step": 16622 - }, - { - "epoch": 1.0863995817266845, - "grad_norm": 0.46884533762931824, - "learning_rate": 7.2825131221246325e-06, - "loss": 0.323, - "step": 16623 - }, - { - "epoch": 1.0864649369322266, - "grad_norm": 0.460284560918808, - "learning_rate": 7.282202434504428e-06, - "loss": 0.3487, - "step": 16624 - }, - { - "epoch": 1.0865302921377689, - "grad_norm": 0.44646936655044556, - "learning_rate": 7.281891735753132e-06, - "loss": 0.3438, - "step": 16625 - }, - { - "epoch": 1.086595647343311, - "grad_norm": 0.43540501594543457, - "learning_rate": 7.281581025872258e-06, - "loss": 0.3481, - "step": 16626 - }, - { - "epoch": 1.086661002548853, - "grad_norm": 0.46796274185180664, - "learning_rate": 7.281270304863325e-06, - "loss": 0.3581, - "step": 16627 - }, - { - "epoch": 1.086726357754395, - "grad_norm": 0.471216082572937, - "learning_rate": 7.280959572727845e-06, - "loss": 0.3666, - "step": 16628 - }, - { - "epoch": 1.0867917129599372, - "grad_norm": 0.4512973427772522, - "learning_rate": 7.280648829467335e-06, - "loss": 0.3325, - "step": 16629 - }, - { - "epoch": 1.0868570681654794, - "grad_norm": 0.4285413324832916, - "learning_rate": 7.2803380750833105e-06, - "loss": 0.2966, - "step": 16630 - }, - { - "epoch": 1.0869224233710215, - "grad_norm": 0.46235746145248413, - "learning_rate": 7.280027309577288e-06, - "loss": 0.3445, - "step": 16631 - }, - { - "epoch": 1.0869877785765636, - "grad_norm": 0.4764554500579834, - "learning_rate": 7.279716532950781e-06, - "loss": 0.3852, - "step": 16632 - }, - { - "epoch": 1.0870531337821057, - "grad_norm": 0.4783000349998474, - "learning_rate": 7.279405745205308e-06, - "loss": 0.3554, - "step": 16633 - }, - { - "epoch": 1.087118488987648, - "grad_norm": 0.46820077300071716, - "learning_rate": 7.279094946342383e-06, - "loss": 0.3709, - "step": 16634 - }, - { - "epoch": 1.08718384419319, - "grad_norm": 0.4597938060760498, - "learning_rate": 7.2787841363635225e-06, - "loss": 0.3285, - "step": 16635 - }, - { - "epoch": 1.087249199398732, - "grad_norm": 0.4381828010082245, - "learning_rate": 7.278473315270242e-06, - "loss": 0.3536, - "step": 16636 - }, - { - "epoch": 1.0873145546042742, - "grad_norm": 0.4854438304901123, - "learning_rate": 7.278162483064057e-06, - "loss": 0.4449, - "step": 16637 - }, - { - "epoch": 1.0873799098098162, - "grad_norm": 0.45786261558532715, - "learning_rate": 7.277851639746487e-06, - "loss": 0.3612, - "step": 16638 - }, - { - "epoch": 1.0874452650153585, - "grad_norm": 0.4783628284931183, - "learning_rate": 7.277540785319044e-06, - "loss": 0.3537, - "step": 16639 - }, - { - "epoch": 1.0875106202209006, - "grad_norm": 0.44216424226760864, - "learning_rate": 7.277229919783246e-06, - "loss": 0.3429, - "step": 16640 - }, - { - "epoch": 1.0875759754264427, - "grad_norm": 0.4420805871486664, - "learning_rate": 7.276919043140607e-06, - "loss": 0.3023, - "step": 16641 - }, - { - "epoch": 1.0876413306319848, - "grad_norm": 0.4524914026260376, - "learning_rate": 7.276608155392646e-06, - "loss": 0.3175, - "step": 16642 - }, - { - "epoch": 1.087706685837527, - "grad_norm": 0.4534817039966583, - "learning_rate": 7.27629725654088e-06, - "loss": 0.3516, - "step": 16643 - }, - { - "epoch": 1.0877720410430691, - "grad_norm": 0.43316760659217834, - "learning_rate": 7.275986346586821e-06, - "loss": 0.3229, - "step": 16644 - }, - { - "epoch": 1.0878373962486112, - "grad_norm": 0.4456009566783905, - "learning_rate": 7.275675425531991e-06, - "loss": 0.3459, - "step": 16645 - }, - { - "epoch": 1.0879027514541533, - "grad_norm": 0.47747665643692017, - "learning_rate": 7.275364493377901e-06, - "loss": 0.3522, - "step": 16646 - }, - { - "epoch": 1.0879681066596953, - "grad_norm": 0.47131046652793884, - "learning_rate": 7.275053550126072e-06, - "loss": 0.3549, - "step": 16647 - }, - { - "epoch": 1.0880334618652376, - "grad_norm": 0.4717196524143219, - "learning_rate": 7.274742595778017e-06, - "loss": 0.2843, - "step": 16648 - }, - { - "epoch": 1.0880988170707797, - "grad_norm": 0.43104586005210876, - "learning_rate": 7.274431630335255e-06, - "loss": 0.3127, - "step": 16649 - }, - { - "epoch": 1.0881641722763218, - "grad_norm": 0.43192827701568604, - "learning_rate": 7.274120653799302e-06, - "loss": 0.3303, - "step": 16650 - }, - { - "epoch": 1.0882295274818639, - "grad_norm": 0.471676230430603, - "learning_rate": 7.273809666171675e-06, - "loss": 0.3577, - "step": 16651 - }, - { - "epoch": 1.0882948826874062, - "grad_norm": 0.44355347752571106, - "learning_rate": 7.27349866745389e-06, - "loss": 0.3378, - "step": 16652 - }, - { - "epoch": 1.0883602378929482, - "grad_norm": 0.4836813807487488, - "learning_rate": 7.273187657647465e-06, - "loss": 0.3551, - "step": 16653 - }, - { - "epoch": 1.0884255930984903, - "grad_norm": 0.46259772777557373, - "learning_rate": 7.272876636753916e-06, - "loss": 0.3915, - "step": 16654 - }, - { - "epoch": 1.0884909483040324, - "grad_norm": 0.468009352684021, - "learning_rate": 7.27256560477476e-06, - "loss": 0.3754, - "step": 16655 - }, - { - "epoch": 1.0885563035095744, - "grad_norm": 0.44383275508880615, - "learning_rate": 7.272254561711515e-06, - "loss": 0.3247, - "step": 16656 - }, - { - "epoch": 1.0886216587151167, - "grad_norm": 0.5151002407073975, - "learning_rate": 7.271943507565696e-06, - "loss": 0.3486, - "step": 16657 - }, - { - "epoch": 1.0886870139206588, - "grad_norm": 0.44885411858558655, - "learning_rate": 7.271632442338823e-06, - "loss": 0.3312, - "step": 16658 - }, - { - "epoch": 1.0887523691262009, - "grad_norm": 0.44856178760528564, - "learning_rate": 7.27132136603241e-06, - "loss": 0.3213, - "step": 16659 - }, - { - "epoch": 1.088817724331743, - "grad_norm": 0.44864511489868164, - "learning_rate": 7.2710102786479765e-06, - "loss": 0.3487, - "step": 16660 - }, - { - "epoch": 1.0888830795372852, - "grad_norm": 0.4368108808994293, - "learning_rate": 7.27069918018704e-06, - "loss": 0.2972, - "step": 16661 - }, - { - "epoch": 1.0889484347428273, - "grad_norm": 0.47293275594711304, - "learning_rate": 7.270388070651115e-06, - "loss": 0.3712, - "step": 16662 - }, - { - "epoch": 1.0890137899483694, - "grad_norm": 0.5326557159423828, - "learning_rate": 7.270076950041724e-06, - "loss": 0.3363, - "step": 16663 - }, - { - "epoch": 1.0890791451539115, - "grad_norm": 0.4424080550670624, - "learning_rate": 7.269765818360378e-06, - "loss": 0.3214, - "step": 16664 - }, - { - "epoch": 1.0891445003594535, - "grad_norm": 0.4424096643924713, - "learning_rate": 7.2694546756086e-06, - "loss": 0.3188, - "step": 16665 - }, - { - "epoch": 1.0892098555649958, - "grad_norm": 0.4487593173980713, - "learning_rate": 7.269143521787904e-06, - "loss": 0.3561, - "step": 16666 - }, - { - "epoch": 1.089275210770538, - "grad_norm": 0.4540695250034332, - "learning_rate": 7.2688323568998105e-06, - "loss": 0.3746, - "step": 16667 - }, - { - "epoch": 1.08934056597608, - "grad_norm": 0.4934118986129761, - "learning_rate": 7.2685211809458335e-06, - "loss": 0.3494, - "step": 16668 - }, - { - "epoch": 1.089405921181622, - "grad_norm": 0.4706835150718689, - "learning_rate": 7.2682099939274954e-06, - "loss": 0.3261, - "step": 16669 - }, - { - "epoch": 1.0894712763871643, - "grad_norm": 0.4687422215938568, - "learning_rate": 7.267898795846309e-06, - "loss": 0.3665, - "step": 16670 - }, - { - "epoch": 1.0895366315927064, - "grad_norm": 0.5007064342498779, - "learning_rate": 7.267587586703795e-06, - "loss": 0.4104, - "step": 16671 - }, - { - "epoch": 1.0896019867982485, - "grad_norm": 0.4735453128814697, - "learning_rate": 7.267276366501472e-06, - "loss": 0.3431, - "step": 16672 - }, - { - "epoch": 1.0896673420037906, - "grad_norm": 0.43810102343559265, - "learning_rate": 7.266965135240856e-06, - "loss": 0.3421, - "step": 16673 - }, - { - "epoch": 1.0897326972093326, - "grad_norm": 0.4781905710697174, - "learning_rate": 7.266653892923465e-06, - "loss": 0.3772, - "step": 16674 - }, - { - "epoch": 1.089798052414875, - "grad_norm": 0.4426269829273224, - "learning_rate": 7.2663426395508175e-06, - "loss": 0.3132, - "step": 16675 - }, - { - "epoch": 1.089863407620417, - "grad_norm": 0.44136500358581543, - "learning_rate": 7.266031375124433e-06, - "loss": 0.2931, - "step": 16676 - }, - { - "epoch": 1.089928762825959, - "grad_norm": 0.40366947650909424, - "learning_rate": 7.265720099645828e-06, - "loss": 0.2761, - "step": 16677 - }, - { - "epoch": 1.0899941180315011, - "grad_norm": 0.4639582633972168, - "learning_rate": 7.265408813116521e-06, - "loss": 0.3549, - "step": 16678 - }, - { - "epoch": 1.0900594732370434, - "grad_norm": 0.494266539812088, - "learning_rate": 7.26509751553803e-06, - "loss": 0.3884, - "step": 16679 - }, - { - "epoch": 1.0901248284425855, - "grad_norm": 0.4520329236984253, - "learning_rate": 7.264786206911875e-06, - "loss": 0.3515, - "step": 16680 - }, - { - "epoch": 1.0901901836481276, - "grad_norm": 0.44954192638397217, - "learning_rate": 7.264474887239572e-06, - "loss": 0.3645, - "step": 16681 - }, - { - "epoch": 1.0902555388536697, - "grad_norm": 0.431308388710022, - "learning_rate": 7.264163556522641e-06, - "loss": 0.3215, - "step": 16682 - }, - { - "epoch": 1.0903208940592117, - "grad_norm": 0.46347272396087646, - "learning_rate": 7.263852214762599e-06, - "loss": 0.3478, - "step": 16683 - }, - { - "epoch": 1.090386249264754, - "grad_norm": 0.4492810070514679, - "learning_rate": 7.263540861960966e-06, - "loss": 0.3225, - "step": 16684 - }, - { - "epoch": 1.090451604470296, - "grad_norm": 0.4365510940551758, - "learning_rate": 7.263229498119261e-06, - "loss": 0.2907, - "step": 16685 - }, - { - "epoch": 1.0905169596758382, - "grad_norm": 0.43292418122291565, - "learning_rate": 7.262918123239e-06, - "loss": 0.3079, - "step": 16686 - }, - { - "epoch": 1.0905823148813802, - "grad_norm": 0.4367837607860565, - "learning_rate": 7.2626067373217056e-06, - "loss": 0.3292, - "step": 16687 - }, - { - "epoch": 1.0906476700869225, - "grad_norm": 0.46882861852645874, - "learning_rate": 7.2622953403688925e-06, - "loss": 0.3513, - "step": 16688 - }, - { - "epoch": 1.0907130252924646, - "grad_norm": 0.4271145761013031, - "learning_rate": 7.261983932382083e-06, - "loss": 0.3155, - "step": 16689 - }, - { - "epoch": 1.0907783804980067, - "grad_norm": 0.4409774839878082, - "learning_rate": 7.261672513362792e-06, - "loss": 0.3328, - "step": 16690 - }, - { - "epoch": 1.0908437357035488, - "grad_norm": 0.44551536440849304, - "learning_rate": 7.261361083312542e-06, - "loss": 0.3674, - "step": 16691 - }, - { - "epoch": 1.0909090909090908, - "grad_norm": 0.4474148750305176, - "learning_rate": 7.261049642232851e-06, - "loss": 0.3619, - "step": 16692 - }, - { - "epoch": 1.0909744461146331, - "grad_norm": 0.4353700578212738, - "learning_rate": 7.260738190125236e-06, - "loss": 0.3308, - "step": 16693 - }, - { - "epoch": 1.0910398013201752, - "grad_norm": 0.4502025842666626, - "learning_rate": 7.260426726991219e-06, - "loss": 0.3334, - "step": 16694 - }, - { - "epoch": 1.0911051565257173, - "grad_norm": 0.4544939398765564, - "learning_rate": 7.260115252832316e-06, - "loss": 0.3559, - "step": 16695 - }, - { - "epoch": 1.0911705117312593, - "grad_norm": 0.4724854528903961, - "learning_rate": 7.25980376765005e-06, - "loss": 0.3604, - "step": 16696 - }, - { - "epoch": 1.0912358669368014, - "grad_norm": 0.492141991853714, - "learning_rate": 7.259492271445937e-06, - "loss": 0.4084, - "step": 16697 - }, - { - "epoch": 1.0913012221423437, - "grad_norm": 0.4570273756980896, - "learning_rate": 7.259180764221498e-06, - "loss": 0.3641, - "step": 16698 - }, - { - "epoch": 1.0913665773478858, - "grad_norm": 0.45201194286346436, - "learning_rate": 7.258869245978252e-06, - "loss": 0.3351, - "step": 16699 - }, - { - "epoch": 1.0914319325534279, - "grad_norm": 0.47722914814949036, - "learning_rate": 7.258557716717717e-06, - "loss": 0.3501, - "step": 16700 - }, - { - "epoch": 1.09149728775897, - "grad_norm": 0.43264567852020264, - "learning_rate": 7.258246176441414e-06, - "loss": 0.3307, - "step": 16701 - }, - { - "epoch": 1.0915626429645122, - "grad_norm": 0.4860178530216217, - "learning_rate": 7.257934625150862e-06, - "loss": 0.3704, - "step": 16702 - }, - { - "epoch": 1.0916279981700543, - "grad_norm": 0.4728662669658661, - "learning_rate": 7.257623062847582e-06, - "loss": 0.3698, - "step": 16703 - }, - { - "epoch": 1.0916933533755964, - "grad_norm": 0.47944486141204834, - "learning_rate": 7.25731148953309e-06, - "loss": 0.3921, - "step": 16704 - }, - { - "epoch": 1.0917587085811384, - "grad_norm": 0.4611533582210541, - "learning_rate": 7.25699990520891e-06, - "loss": 0.3652, - "step": 16705 - }, - { - "epoch": 1.0918240637866805, - "grad_norm": 0.4495225250720978, - "learning_rate": 7.256688309876558e-06, - "loss": 0.3677, - "step": 16706 - }, - { - "epoch": 1.0918894189922228, - "grad_norm": 0.40167364478111267, - "learning_rate": 7.256376703537556e-06, - "loss": 0.3059, - "step": 16707 - }, - { - "epoch": 1.0919547741977649, - "grad_norm": 0.445865273475647, - "learning_rate": 7.256065086193424e-06, - "loss": 0.3662, - "step": 16708 - }, - { - "epoch": 1.092020129403307, - "grad_norm": 0.4442897140979767, - "learning_rate": 7.25575345784568e-06, - "loss": 0.3276, - "step": 16709 - }, - { - "epoch": 1.092085484608849, - "grad_norm": 0.4639493525028229, - "learning_rate": 7.255441818495845e-06, - "loss": 0.3479, - "step": 16710 - }, - { - "epoch": 1.0921508398143913, - "grad_norm": 0.42112016677856445, - "learning_rate": 7.255130168145439e-06, - "loss": 0.3392, - "step": 16711 - }, - { - "epoch": 1.0922161950199334, - "grad_norm": 0.4336766302585602, - "learning_rate": 7.254818506795982e-06, - "loss": 0.3293, - "step": 16712 - }, - { - "epoch": 1.0922815502254755, - "grad_norm": 0.4338686764240265, - "learning_rate": 7.254506834448993e-06, - "loss": 0.3514, - "step": 16713 - }, - { - "epoch": 1.0923469054310175, - "grad_norm": 0.4458778500556946, - "learning_rate": 7.254195151105994e-06, - "loss": 0.3216, - "step": 16714 - }, - { - "epoch": 1.0924122606365596, - "grad_norm": 0.47227126359939575, - "learning_rate": 7.253883456768503e-06, - "loss": 0.3934, - "step": 16715 - }, - { - "epoch": 1.092477615842102, - "grad_norm": 0.4433448612689972, - "learning_rate": 7.253571751438045e-06, - "loss": 0.3563, - "step": 16716 - }, - { - "epoch": 1.092542971047644, - "grad_norm": 0.46793797612190247, - "learning_rate": 7.253260035116133e-06, - "loss": 0.3743, - "step": 16717 - }, - { - "epoch": 1.092608326253186, - "grad_norm": 0.4590999186038971, - "learning_rate": 7.252948307804293e-06, - "loss": 0.3688, - "step": 16718 - }, - { - "epoch": 1.0926736814587281, - "grad_norm": 0.4602227807044983, - "learning_rate": 7.252636569504044e-06, - "loss": 0.3868, - "step": 16719 - }, - { - "epoch": 1.0927390366642702, - "grad_norm": 0.4311557710170746, - "learning_rate": 7.252324820216905e-06, - "loss": 0.3202, - "step": 16720 - }, - { - "epoch": 1.0928043918698125, - "grad_norm": 0.41199976205825806, - "learning_rate": 7.252013059944398e-06, - "loss": 0.3134, - "step": 16721 - }, - { - "epoch": 1.0928697470753546, - "grad_norm": 0.4609593451023102, - "learning_rate": 7.251701288688042e-06, - "loss": 0.3304, - "step": 16722 - }, - { - "epoch": 1.0929351022808966, - "grad_norm": 0.4601035714149475, - "learning_rate": 7.251389506449361e-06, - "loss": 0.3679, - "step": 16723 - }, - { - "epoch": 1.0930004574864387, - "grad_norm": 0.445535808801651, - "learning_rate": 7.251077713229873e-06, - "loss": 0.3218, - "step": 16724 - }, - { - "epoch": 1.093065812691981, - "grad_norm": 0.4361458122730255, - "learning_rate": 7.250765909031098e-06, - "loss": 0.3401, - "step": 16725 - }, - { - "epoch": 1.093131167897523, - "grad_norm": 0.4398879110813141, - "learning_rate": 7.2504540938545585e-06, - "loss": 0.3459, - "step": 16726 - }, - { - "epoch": 1.0931965231030651, - "grad_norm": 0.4309949576854706, - "learning_rate": 7.250142267701774e-06, - "loss": 0.3259, - "step": 16727 - }, - { - "epoch": 1.0932618783086072, - "grad_norm": 0.43104955554008484, - "learning_rate": 7.249830430574267e-06, - "loss": 0.3568, - "step": 16728 - }, - { - "epoch": 1.0933272335141493, - "grad_norm": 0.45479997992515564, - "learning_rate": 7.249518582473558e-06, - "loss": 0.3697, - "step": 16729 - }, - { - "epoch": 1.0933925887196916, - "grad_norm": 0.4632102847099304, - "learning_rate": 7.249206723401167e-06, - "loss": 0.3612, - "step": 16730 - }, - { - "epoch": 1.0934579439252337, - "grad_norm": 0.429729700088501, - "learning_rate": 7.248894853358616e-06, - "loss": 0.3179, - "step": 16731 - }, - { - "epoch": 1.0935232991307757, - "grad_norm": 0.46751073002815247, - "learning_rate": 7.248582972347426e-06, - "loss": 0.3663, - "step": 16732 - }, - { - "epoch": 1.0935886543363178, - "grad_norm": 0.48656025528907776, - "learning_rate": 7.248271080369116e-06, - "loss": 0.3637, - "step": 16733 - }, - { - "epoch": 1.09365400954186, - "grad_norm": 0.4686853289604187, - "learning_rate": 7.24795917742521e-06, - "loss": 0.3496, - "step": 16734 - }, - { - "epoch": 1.0937193647474022, - "grad_norm": 0.4280194044113159, - "learning_rate": 7.247647263517228e-06, - "loss": 0.3324, - "step": 16735 - }, - { - "epoch": 1.0937847199529442, - "grad_norm": 0.4650658369064331, - "learning_rate": 7.247335338646693e-06, - "loss": 0.375, - "step": 16736 - }, - { - "epoch": 1.0938500751584863, - "grad_norm": 0.45358169078826904, - "learning_rate": 7.247023402815125e-06, - "loss": 0.3457, - "step": 16737 - }, - { - "epoch": 1.0939154303640284, - "grad_norm": 0.4725961685180664, - "learning_rate": 7.2467114560240435e-06, - "loss": 0.3802, - "step": 16738 - }, - { - "epoch": 1.0939807855695707, - "grad_norm": 0.45289045572280884, - "learning_rate": 7.246399498274974e-06, - "loss": 0.3774, - "step": 16739 - }, - { - "epoch": 1.0940461407751128, - "grad_norm": 0.5127090811729431, - "learning_rate": 7.246087529569435e-06, - "loss": 0.4047, - "step": 16740 - }, - { - "epoch": 1.0941114959806548, - "grad_norm": 0.43285638093948364, - "learning_rate": 7.245775549908948e-06, - "loss": 0.3237, - "step": 16741 - }, - { - "epoch": 1.094176851186197, - "grad_norm": 0.43867310881614685, - "learning_rate": 7.245463559295036e-06, - "loss": 0.2945, - "step": 16742 - }, - { - "epoch": 1.0942422063917392, - "grad_norm": 0.44885164499282837, - "learning_rate": 7.245151557729221e-06, - "loss": 0.3465, - "step": 16743 - }, - { - "epoch": 1.0943075615972813, - "grad_norm": 0.4602097272872925, - "learning_rate": 7.244839545213024e-06, - "loss": 0.3657, - "step": 16744 - }, - { - "epoch": 1.0943729168028233, - "grad_norm": 0.48046061396598816, - "learning_rate": 7.244527521747966e-06, - "loss": 0.3974, - "step": 16745 - }, - { - "epoch": 1.0944382720083654, - "grad_norm": 0.3908010423183441, - "learning_rate": 7.2442154873355716e-06, - "loss": 0.2774, - "step": 16746 - }, - { - "epoch": 1.0945036272139075, - "grad_norm": 0.4774419069290161, - "learning_rate": 7.2439034419773594e-06, - "loss": 0.347, - "step": 16747 - }, - { - "epoch": 1.0945689824194498, - "grad_norm": 0.46570348739624023, - "learning_rate": 7.243591385674853e-06, - "loss": 0.3568, - "step": 16748 - }, - { - "epoch": 1.0946343376249918, - "grad_norm": 0.4292398989200592, - "learning_rate": 7.243279318429574e-06, - "loss": 0.31, - "step": 16749 - }, - { - "epoch": 1.094699692830534, - "grad_norm": 0.43820425868034363, - "learning_rate": 7.2429672402430465e-06, - "loss": 0.3023, - "step": 16750 - }, - { - "epoch": 1.094765048036076, - "grad_norm": 0.44285324215888977, - "learning_rate": 7.242655151116788e-06, - "loss": 0.3122, - "step": 16751 - }, - { - "epoch": 1.0948304032416183, - "grad_norm": 0.46688154339790344, - "learning_rate": 7.242343051052325e-06, - "loss": 0.3507, - "step": 16752 - }, - { - "epoch": 1.0948957584471604, - "grad_norm": 0.4655936360359192, - "learning_rate": 7.242030940051177e-06, - "loss": 0.3698, - "step": 16753 - }, - { - "epoch": 1.0949611136527024, - "grad_norm": 0.4363328218460083, - "learning_rate": 7.241718818114868e-06, - "loss": 0.3251, - "step": 16754 - }, - { - "epoch": 1.0950264688582445, - "grad_norm": 0.4805351197719574, - "learning_rate": 7.241406685244918e-06, - "loss": 0.4087, - "step": 16755 - }, - { - "epoch": 1.0950918240637866, - "grad_norm": 0.44228002429008484, - "learning_rate": 7.241094541442854e-06, - "loss": 0.3326, - "step": 16756 - }, - { - "epoch": 1.0951571792693289, - "grad_norm": 0.44409671425819397, - "learning_rate": 7.240782386710194e-06, - "loss": 0.3334, - "step": 16757 - }, - { - "epoch": 1.095222534474871, - "grad_norm": 0.4377843141555786, - "learning_rate": 7.240470221048462e-06, - "loss": 0.335, - "step": 16758 - }, - { - "epoch": 1.095287889680413, - "grad_norm": 0.46416565775871277, - "learning_rate": 7.240158044459181e-06, - "loss": 0.3792, - "step": 16759 - }, - { - "epoch": 1.095353244885955, - "grad_norm": 0.422510027885437, - "learning_rate": 7.2398458569438726e-06, - "loss": 0.3248, - "step": 16760 - }, - { - "epoch": 1.0954186000914974, - "grad_norm": 0.440060019493103, - "learning_rate": 7.239533658504061e-06, - "loss": 0.3467, - "step": 16761 - }, - { - "epoch": 1.0954839552970395, - "grad_norm": 0.4493173658847809, - "learning_rate": 7.239221449141267e-06, - "loss": 0.3306, - "step": 16762 - }, - { - "epoch": 1.0955493105025815, - "grad_norm": 0.4283548891544342, - "learning_rate": 7.238909228857015e-06, - "loss": 0.3026, - "step": 16763 - }, - { - "epoch": 1.0956146657081236, - "grad_norm": 0.4168672263622284, - "learning_rate": 7.238596997652827e-06, - "loss": 0.2905, - "step": 16764 - }, - { - "epoch": 1.0956800209136657, - "grad_norm": 0.4448527693748474, - "learning_rate": 7.2382847555302245e-06, - "loss": 0.3385, - "step": 16765 - }, - { - "epoch": 1.095745376119208, - "grad_norm": 0.4585151672363281, - "learning_rate": 7.237972502490733e-06, - "loss": 0.3513, - "step": 16766 - }, - { - "epoch": 1.09581073132475, - "grad_norm": 0.4385329782962799, - "learning_rate": 7.237660238535874e-06, - "loss": 0.3327, - "step": 16767 - }, - { - "epoch": 1.0958760865302921, - "grad_norm": 0.43763411045074463, - "learning_rate": 7.237347963667172e-06, - "loss": 0.3275, - "step": 16768 - }, - { - "epoch": 1.0959414417358342, - "grad_norm": 0.43371498584747314, - "learning_rate": 7.2370356778861464e-06, - "loss": 0.3247, - "step": 16769 - }, - { - "epoch": 1.0960067969413765, - "grad_norm": 0.4241376519203186, - "learning_rate": 7.236723381194325e-06, - "loss": 0.3314, - "step": 16770 - }, - { - "epoch": 1.0960721521469186, - "grad_norm": 0.4330827295780182, - "learning_rate": 7.236411073593228e-06, - "loss": 0.3488, - "step": 16771 - }, - { - "epoch": 1.0961375073524606, - "grad_norm": 0.4511134922504425, - "learning_rate": 7.23609875508438e-06, - "loss": 0.3495, - "step": 16772 - }, - { - "epoch": 1.0962028625580027, - "grad_norm": 0.4721181392669678, - "learning_rate": 7.235786425669302e-06, - "loss": 0.3905, - "step": 16773 - }, - { - "epoch": 1.0962682177635448, - "grad_norm": 0.4329351484775543, - "learning_rate": 7.235474085349521e-06, - "loss": 0.3027, - "step": 16774 - }, - { - "epoch": 1.096333572969087, - "grad_norm": 0.46141180396080017, - "learning_rate": 7.235161734126558e-06, - "loss": 0.3889, - "step": 16775 - }, - { - "epoch": 1.0963989281746291, - "grad_norm": 0.44180065393447876, - "learning_rate": 7.234849372001936e-06, - "loss": 0.3602, - "step": 16776 - }, - { - "epoch": 1.0964642833801712, - "grad_norm": 0.480880469083786, - "learning_rate": 7.234536998977181e-06, - "loss": 0.3797, - "step": 16777 - }, - { - "epoch": 1.0965296385857133, - "grad_norm": 0.43409091234207153, - "learning_rate": 7.234224615053813e-06, - "loss": 0.3144, - "step": 16778 - }, - { - "epoch": 1.0965949937912556, - "grad_norm": 0.4776366651058197, - "learning_rate": 7.233912220233359e-06, - "loss": 0.3799, - "step": 16779 - }, - { - "epoch": 1.0966603489967977, - "grad_norm": 0.4754861891269684, - "learning_rate": 7.233599814517341e-06, - "loss": 0.3543, - "step": 16780 - }, - { - "epoch": 1.0967257042023397, - "grad_norm": 0.44954970479011536, - "learning_rate": 7.233287397907283e-06, - "loss": 0.3416, - "step": 16781 - }, - { - "epoch": 1.0967910594078818, - "grad_norm": 0.4645956754684448, - "learning_rate": 7.232974970404707e-06, - "loss": 0.3773, - "step": 16782 - }, - { - "epoch": 1.0968564146134239, - "grad_norm": 0.4409351050853729, - "learning_rate": 7.23266253201114e-06, - "loss": 0.3812, - "step": 16783 - }, - { - "epoch": 1.0969217698189662, - "grad_norm": 0.5002554059028625, - "learning_rate": 7.232350082728105e-06, - "loss": 0.3846, - "step": 16784 - }, - { - "epoch": 1.0969871250245082, - "grad_norm": 0.47278743982315063, - "learning_rate": 7.232037622557123e-06, - "loss": 0.3442, - "step": 16785 - }, - { - "epoch": 1.0970524802300503, - "grad_norm": 0.45567455887794495, - "learning_rate": 7.231725151499722e-06, - "loss": 0.3794, - "step": 16786 - }, - { - "epoch": 1.0971178354355924, - "grad_norm": 0.4341067969799042, - "learning_rate": 7.231412669557424e-06, - "loss": 0.3131, - "step": 16787 - }, - { - "epoch": 1.0971831906411347, - "grad_norm": 0.4420716166496277, - "learning_rate": 7.231100176731753e-06, - "loss": 0.3193, - "step": 16788 - }, - { - "epoch": 1.0972485458466767, - "grad_norm": 0.4943370223045349, - "learning_rate": 7.2307876730242336e-06, - "loss": 0.3535, - "step": 16789 - }, - { - "epoch": 1.0973139010522188, - "grad_norm": 0.4515933692455292, - "learning_rate": 7.23047515843639e-06, - "loss": 0.3519, - "step": 16790 - }, - { - "epoch": 1.097379256257761, - "grad_norm": 0.44382259249687195, - "learning_rate": 7.230162632969746e-06, - "loss": 0.2976, - "step": 16791 - }, - { - "epoch": 1.097444611463303, - "grad_norm": 0.4211956262588501, - "learning_rate": 7.229850096625828e-06, - "loss": 0.256, - "step": 16792 - }, - { - "epoch": 1.0975099666688453, - "grad_norm": 0.4627498984336853, - "learning_rate": 7.229537549406157e-06, - "loss": 0.3423, - "step": 16793 - }, - { - "epoch": 1.0975753218743873, - "grad_norm": 0.4721478521823883, - "learning_rate": 7.22922499131226e-06, - "loss": 0.3423, - "step": 16794 - }, - { - "epoch": 1.0976406770799294, - "grad_norm": 0.42021995782852173, - "learning_rate": 7.22891242234566e-06, - "loss": 0.2962, - "step": 16795 - }, - { - "epoch": 1.0977060322854715, - "grad_norm": 0.45924872159957886, - "learning_rate": 7.228599842507881e-06, - "loss": 0.3606, - "step": 16796 - }, - { - "epoch": 1.0977713874910138, - "grad_norm": 0.44723430275917053, - "learning_rate": 7.22828725180045e-06, - "loss": 0.3605, - "step": 16797 - }, - { - "epoch": 1.0978367426965558, - "grad_norm": 0.4550357162952423, - "learning_rate": 7.227974650224888e-06, - "loss": 0.3062, - "step": 16798 - }, - { - "epoch": 1.097902097902098, - "grad_norm": 0.4332388937473297, - "learning_rate": 7.227662037782723e-06, - "loss": 0.3046, - "step": 16799 - }, - { - "epoch": 1.09796745310764, - "grad_norm": 0.4315112233161926, - "learning_rate": 7.227349414475479e-06, - "loss": 0.3376, - "step": 16800 - }, - { - "epoch": 1.098032808313182, - "grad_norm": 0.43725505471229553, - "learning_rate": 7.227036780304679e-06, - "loss": 0.3364, - "step": 16801 - }, - { - "epoch": 1.0980981635187244, - "grad_norm": 0.44807305932044983, - "learning_rate": 7.22672413527185e-06, - "loss": 0.3466, - "step": 16802 - }, - { - "epoch": 1.0981635187242664, - "grad_norm": 0.49027779698371887, - "learning_rate": 7.226411479378517e-06, - "loss": 0.361, - "step": 16803 - }, - { - "epoch": 1.0982288739298085, - "grad_norm": 0.451984167098999, - "learning_rate": 7.2260988126262035e-06, - "loss": 0.3379, - "step": 16804 - }, - { - "epoch": 1.0982942291353506, - "grad_norm": 0.4550917446613312, - "learning_rate": 7.225786135016433e-06, - "loss": 0.3425, - "step": 16805 - }, - { - "epoch": 1.0983595843408929, - "grad_norm": 0.4698694944381714, - "learning_rate": 7.225473446550733e-06, - "loss": 0.3635, - "step": 16806 - }, - { - "epoch": 1.098424939546435, - "grad_norm": 0.461752712726593, - "learning_rate": 7.225160747230628e-06, - "loss": 0.3439, - "step": 16807 - }, - { - "epoch": 1.098490294751977, - "grad_norm": 0.4706531763076782, - "learning_rate": 7.224848037057646e-06, - "loss": 0.391, - "step": 16808 - }, - { - "epoch": 1.098555649957519, - "grad_norm": 0.47599413990974426, - "learning_rate": 7.224535316033305e-06, - "loss": 0.3778, - "step": 16809 - }, - { - "epoch": 1.0986210051630612, - "grad_norm": 0.44619935750961304, - "learning_rate": 7.224222584159139e-06, - "loss": 0.3472, - "step": 16810 - }, - { - "epoch": 1.0986863603686035, - "grad_norm": 0.4722409248352051, - "learning_rate": 7.223909841436666e-06, - "loss": 0.3464, - "step": 16811 - }, - { - "epoch": 1.0987517155741455, - "grad_norm": 0.4703991115093231, - "learning_rate": 7.223597087867414e-06, - "loss": 0.3813, - "step": 16812 - }, - { - "epoch": 1.0988170707796876, - "grad_norm": 0.4867233633995056, - "learning_rate": 7.22328432345291e-06, - "loss": 0.3903, - "step": 16813 - }, - { - "epoch": 1.0988824259852297, - "grad_norm": 0.46626606583595276, - "learning_rate": 7.222971548194679e-06, - "loss": 0.3176, - "step": 16814 - }, - { - "epoch": 1.0989477811907717, - "grad_norm": 0.45740047097206116, - "learning_rate": 7.2226587620942456e-06, - "loss": 0.3675, - "step": 16815 - }, - { - "epoch": 1.099013136396314, - "grad_norm": 0.47037217020988464, - "learning_rate": 7.222345965153133e-06, - "loss": 0.3611, - "step": 16816 - }, - { - "epoch": 1.0990784916018561, - "grad_norm": 0.4419795870780945, - "learning_rate": 7.222033157372871e-06, - "loss": 0.3487, - "step": 16817 - }, - { - "epoch": 1.0991438468073982, - "grad_norm": 0.42824700474739075, - "learning_rate": 7.221720338754983e-06, - "loss": 0.3176, - "step": 16818 - }, - { - "epoch": 1.0992092020129403, - "grad_norm": 0.44660231471061707, - "learning_rate": 7.221407509300995e-06, - "loss": 0.333, - "step": 16819 - }, - { - "epoch": 1.0992745572184826, - "grad_norm": 0.4343295991420746, - "learning_rate": 7.221094669012433e-06, - "loss": 0.2936, - "step": 16820 - }, - { - "epoch": 1.0993399124240246, - "grad_norm": 0.4637698531150818, - "learning_rate": 7.220781817890823e-06, - "loss": 0.3182, - "step": 16821 - }, - { - "epoch": 1.0994052676295667, - "grad_norm": 0.42962801456451416, - "learning_rate": 7.220468955937692e-06, - "loss": 0.3522, - "step": 16822 - }, - { - "epoch": 1.0994706228351088, - "grad_norm": 0.4624047875404358, - "learning_rate": 7.220156083154562e-06, - "loss": 0.3624, - "step": 16823 - }, - { - "epoch": 1.0995359780406508, - "grad_norm": 0.43798285722732544, - "learning_rate": 7.219843199542964e-06, - "loss": 0.3019, - "step": 16824 - }, - { - "epoch": 1.0996013332461931, - "grad_norm": 0.47579702734947205, - "learning_rate": 7.219530305104421e-06, - "loss": 0.3589, - "step": 16825 - }, - { - "epoch": 1.0996666884517352, - "grad_norm": 0.45312392711639404, - "learning_rate": 7.2192173998404595e-06, - "loss": 0.3251, - "step": 16826 - }, - { - "epoch": 1.0997320436572773, - "grad_norm": 0.4656205177307129, - "learning_rate": 7.218904483752605e-06, - "loss": 0.3657, - "step": 16827 - }, - { - "epoch": 1.0997973988628194, - "grad_norm": 0.44865405559539795, - "learning_rate": 7.218591556842386e-06, - "loss": 0.3208, - "step": 16828 - }, - { - "epoch": 1.0998627540683616, - "grad_norm": 0.4409961998462677, - "learning_rate": 7.218278619111326e-06, - "loss": 0.3416, - "step": 16829 - }, - { - "epoch": 1.0999281092739037, - "grad_norm": 0.49070578813552856, - "learning_rate": 7.217965670560955e-06, - "loss": 0.3824, - "step": 16830 - }, - { - "epoch": 1.0999934644794458, - "grad_norm": 0.44723668694496155, - "learning_rate": 7.217652711192796e-06, - "loss": 0.3456, - "step": 16831 - }, - { - "epoch": 1.1000588196849879, - "grad_norm": 0.4212040603160858, - "learning_rate": 7.217339741008376e-06, - "loss": 0.3011, - "step": 16832 - }, - { - "epoch": 1.10012417489053, - "grad_norm": 0.48938217759132385, - "learning_rate": 7.217026760009223e-06, - "loss": 0.3438, - "step": 16833 - }, - { - "epoch": 1.1001895300960722, - "grad_norm": 0.455473393201828, - "learning_rate": 7.216713768196861e-06, - "loss": 0.3657, - "step": 16834 - }, - { - "epoch": 1.1002548853016143, - "grad_norm": 0.4355417490005493, - "learning_rate": 7.21640076557282e-06, - "loss": 0.3286, - "step": 16835 - }, - { - "epoch": 1.1003202405071564, - "grad_norm": 0.4471430480480194, - "learning_rate": 7.216087752138622e-06, - "loss": 0.3384, - "step": 16836 - }, - { - "epoch": 1.1003855957126984, - "grad_norm": 0.496289998292923, - "learning_rate": 7.215774727895798e-06, - "loss": 0.3855, - "step": 16837 - }, - { - "epoch": 1.1004509509182405, - "grad_norm": 0.41548284888267517, - "learning_rate": 7.215461692845872e-06, - "loss": 0.3097, - "step": 16838 - }, - { - "epoch": 1.1005163061237828, - "grad_norm": 0.42794421315193176, - "learning_rate": 7.215148646990373e-06, - "loss": 0.2958, - "step": 16839 - }, - { - "epoch": 1.100581661329325, - "grad_norm": 0.4622229337692261, - "learning_rate": 7.214835590330825e-06, - "loss": 0.388, - "step": 16840 - }, - { - "epoch": 1.100647016534867, - "grad_norm": 0.4127064347267151, - "learning_rate": 7.214522522868758e-06, - "loss": 0.2861, - "step": 16841 - }, - { - "epoch": 1.100712371740409, - "grad_norm": 0.46275952458381653, - "learning_rate": 7.2142094446056974e-06, - "loss": 0.3678, - "step": 16842 - }, - { - "epoch": 1.1007777269459513, - "grad_norm": 0.4520907998085022, - "learning_rate": 7.213896355543169e-06, - "loss": 0.3444, - "step": 16843 - }, - { - "epoch": 1.1008430821514934, - "grad_norm": 0.43794530630111694, - "learning_rate": 7.213583255682702e-06, - "loss": 0.3608, - "step": 16844 - }, - { - "epoch": 1.1009084373570355, - "grad_norm": 0.4261733591556549, - "learning_rate": 7.213270145025822e-06, - "loss": 0.327, - "step": 16845 - }, - { - "epoch": 1.1009737925625775, - "grad_norm": 0.44544142484664917, - "learning_rate": 7.212957023574059e-06, - "loss": 0.324, - "step": 16846 - }, - { - "epoch": 1.1010391477681196, - "grad_norm": 0.44111916422843933, - "learning_rate": 7.212643891328935e-06, - "loss": 0.3333, - "step": 16847 - }, - { - "epoch": 1.101104502973662, - "grad_norm": 0.4675199091434479, - "learning_rate": 7.212330748291982e-06, - "loss": 0.3668, - "step": 16848 - }, - { - "epoch": 1.101169858179204, - "grad_norm": 0.4404946565628052, - "learning_rate": 7.212017594464725e-06, - "loss": 0.3765, - "step": 16849 - }, - { - "epoch": 1.101235213384746, - "grad_norm": 0.4739381670951843, - "learning_rate": 7.211704429848691e-06, - "loss": 0.3786, - "step": 16850 - }, - { - "epoch": 1.1013005685902881, - "grad_norm": 0.4321777820587158, - "learning_rate": 7.2113912544454105e-06, - "loss": 0.343, - "step": 16851 - }, - { - "epoch": 1.1013659237958304, - "grad_norm": 0.4226875305175781, - "learning_rate": 7.211078068256408e-06, - "loss": 0.3301, - "step": 16852 - }, - { - "epoch": 1.1014312790013725, - "grad_norm": 0.4376566410064697, - "learning_rate": 7.210764871283211e-06, - "loss": 0.3529, - "step": 16853 - }, - { - "epoch": 1.1014966342069146, - "grad_norm": 0.45876818895339966, - "learning_rate": 7.210451663527347e-06, - "loss": 0.3348, - "step": 16854 - }, - { - "epoch": 1.1015619894124566, - "grad_norm": 0.4542117118835449, - "learning_rate": 7.210138444990347e-06, - "loss": 0.3774, - "step": 16855 - }, - { - "epoch": 1.1016273446179987, - "grad_norm": 0.45462122559547424, - "learning_rate": 7.209825215673734e-06, - "loss": 0.3707, - "step": 16856 - }, - { - "epoch": 1.101692699823541, - "grad_norm": 0.4399607181549072, - "learning_rate": 7.209511975579039e-06, - "loss": 0.3169, - "step": 16857 - }, - { - "epoch": 1.101758055029083, - "grad_norm": 0.49060624837875366, - "learning_rate": 7.209198724707788e-06, - "loss": 0.4028, - "step": 16858 - }, - { - "epoch": 1.1018234102346252, - "grad_norm": 0.4618798792362213, - "learning_rate": 7.20888546306151e-06, - "loss": 0.3329, - "step": 16859 - }, - { - "epoch": 1.1018887654401672, - "grad_norm": 0.46027952432632446, - "learning_rate": 7.208572190641732e-06, - "loss": 0.34, - "step": 16860 - }, - { - "epoch": 1.1019541206457095, - "grad_norm": 0.45319730043411255, - "learning_rate": 7.208258907449982e-06, - "loss": 0.356, - "step": 16861 - }, - { - "epoch": 1.1020194758512516, - "grad_norm": 0.4011819362640381, - "learning_rate": 7.207945613487789e-06, - "loss": 0.2816, - "step": 16862 - }, - { - "epoch": 1.1020848310567937, - "grad_norm": 0.4350883662700653, - "learning_rate": 7.207632308756679e-06, - "loss": 0.3276, - "step": 16863 - }, - { - "epoch": 1.1021501862623357, - "grad_norm": 0.4412400424480438, - "learning_rate": 7.2073189932581835e-06, - "loss": 0.3586, - "step": 16864 - }, - { - "epoch": 1.1022155414678778, - "grad_norm": 0.43144404888153076, - "learning_rate": 7.207005666993827e-06, - "loss": 0.3026, - "step": 16865 - }, - { - "epoch": 1.10228089667342, - "grad_norm": 0.4390637278556824, - "learning_rate": 7.206692329965139e-06, - "loss": 0.3139, - "step": 16866 - }, - { - "epoch": 1.1023462518789622, - "grad_norm": 0.4494231343269348, - "learning_rate": 7.20637898217365e-06, - "loss": 0.3573, - "step": 16867 - }, - { - "epoch": 1.1024116070845043, - "grad_norm": 0.46788421273231506, - "learning_rate": 7.206065623620885e-06, - "loss": 0.4113, - "step": 16868 - }, - { - "epoch": 1.1024769622900463, - "grad_norm": 0.43642088770866394, - "learning_rate": 7.205752254308374e-06, - "loss": 0.3424, - "step": 16869 - }, - { - "epoch": 1.1025423174955886, - "grad_norm": 0.4403359293937683, - "learning_rate": 7.205438874237644e-06, - "loss": 0.341, - "step": 16870 - }, - { - "epoch": 1.1026076727011307, - "grad_norm": 0.43854424357414246, - "learning_rate": 7.205125483410226e-06, - "loss": 0.3129, - "step": 16871 - }, - { - "epoch": 1.1026730279066728, - "grad_norm": 0.4508354663848877, - "learning_rate": 7.204812081827645e-06, - "loss": 0.3557, - "step": 16872 - }, - { - "epoch": 1.1027383831122148, - "grad_norm": 0.47603222727775574, - "learning_rate": 7.204498669491435e-06, - "loss": 0.3784, - "step": 16873 - }, - { - "epoch": 1.102803738317757, - "grad_norm": 0.4385119676589966, - "learning_rate": 7.2041852464031195e-06, - "loss": 0.3386, - "step": 16874 - }, - { - "epoch": 1.1028690935232992, - "grad_norm": 0.4601020812988281, - "learning_rate": 7.203871812564229e-06, - "loss": 0.3329, - "step": 16875 - }, - { - "epoch": 1.1029344487288413, - "grad_norm": 0.47119858860969543, - "learning_rate": 7.203558367976292e-06, - "loss": 0.3425, - "step": 16876 - }, - { - "epoch": 1.1029998039343833, - "grad_norm": 0.5225222706794739, - "learning_rate": 7.203244912640839e-06, - "loss": 0.4024, - "step": 16877 - }, - { - "epoch": 1.1030651591399254, - "grad_norm": 0.4457537531852722, - "learning_rate": 7.202931446559395e-06, - "loss": 0.3248, - "step": 16878 - }, - { - "epoch": 1.1031305143454677, - "grad_norm": 0.4651030898094177, - "learning_rate": 7.202617969733492e-06, - "loss": 0.3476, - "step": 16879 - }, - { - "epoch": 1.1031958695510098, - "grad_norm": 0.5119554996490479, - "learning_rate": 7.202304482164659e-06, - "loss": 0.4074, - "step": 16880 - }, - { - "epoch": 1.1032612247565519, - "grad_norm": 0.43499282002449036, - "learning_rate": 7.201990983854422e-06, - "loss": 0.3081, - "step": 16881 - }, - { - "epoch": 1.103326579962094, - "grad_norm": 0.4564814269542694, - "learning_rate": 7.201677474804314e-06, - "loss": 0.3473, - "step": 16882 - }, - { - "epoch": 1.103391935167636, - "grad_norm": 0.5000506043434143, - "learning_rate": 7.201363955015861e-06, - "loss": 0.3481, - "step": 16883 - }, - { - "epoch": 1.1034572903731783, - "grad_norm": 0.44875526428222656, - "learning_rate": 7.201050424490594e-06, - "loss": 0.3359, - "step": 16884 - }, - { - "epoch": 1.1035226455787204, - "grad_norm": 0.47175514698028564, - "learning_rate": 7.200736883230042e-06, - "loss": 0.3684, - "step": 16885 - }, - { - "epoch": 1.1035880007842624, - "grad_norm": 0.46201175451278687, - "learning_rate": 7.200423331235733e-06, - "loss": 0.3293, - "step": 16886 - }, - { - "epoch": 1.1036533559898045, - "grad_norm": 0.4432675540447235, - "learning_rate": 7.200109768509198e-06, - "loss": 0.3247, - "step": 16887 - }, - { - "epoch": 1.1037187111953468, - "grad_norm": 0.43380874395370483, - "learning_rate": 7.1997961950519646e-06, - "loss": 0.3128, - "step": 16888 - }, - { - "epoch": 1.1037840664008889, - "grad_norm": 0.416278213262558, - "learning_rate": 7.199482610865563e-06, - "loss": 0.3319, - "step": 16889 - }, - { - "epoch": 1.103849421606431, - "grad_norm": 0.4704750180244446, - "learning_rate": 7.199169015951523e-06, - "loss": 0.3685, - "step": 16890 - }, - { - "epoch": 1.103914776811973, - "grad_norm": 0.432136207818985, - "learning_rate": 7.198855410311374e-06, - "loss": 0.3094, - "step": 16891 - }, - { - "epoch": 1.103980132017515, - "grad_norm": 0.44062837958335876, - "learning_rate": 7.198541793946645e-06, - "loss": 0.3376, - "step": 16892 - }, - { - "epoch": 1.1040454872230574, - "grad_norm": 0.4458533525466919, - "learning_rate": 7.1982281668588675e-06, - "loss": 0.3564, - "step": 16893 - }, - { - "epoch": 1.1041108424285995, - "grad_norm": 0.4284687042236328, - "learning_rate": 7.197914529049568e-06, - "loss": 0.3412, - "step": 16894 - }, - { - "epoch": 1.1041761976341415, - "grad_norm": 0.42662525177001953, - "learning_rate": 7.197600880520279e-06, - "loss": 0.3052, - "step": 16895 - }, - { - "epoch": 1.1042415528396836, - "grad_norm": 0.44712355732917786, - "learning_rate": 7.1972872212725284e-06, - "loss": 0.3426, - "step": 16896 - }, - { - "epoch": 1.104306908045226, - "grad_norm": 0.45476073026657104, - "learning_rate": 7.1969735513078475e-06, - "loss": 0.3327, - "step": 16897 - }, - { - "epoch": 1.104372263250768, - "grad_norm": 0.4307636022567749, - "learning_rate": 7.196659870627765e-06, - "loss": 0.317, - "step": 16898 - }, - { - "epoch": 1.10443761845631, - "grad_norm": 0.4835704267024994, - "learning_rate": 7.1963461792338115e-06, - "loss": 0.3859, - "step": 16899 - }, - { - "epoch": 1.1045029736618521, - "grad_norm": 0.4754636883735657, - "learning_rate": 7.196032477127517e-06, - "loss": 0.3721, - "step": 16900 - }, - { - "epoch": 1.1045683288673942, - "grad_norm": 0.4795001447200775, - "learning_rate": 7.195718764310411e-06, - "loss": 0.3767, - "step": 16901 - }, - { - "epoch": 1.1046336840729365, - "grad_norm": 0.4457133114337921, - "learning_rate": 7.195405040784025e-06, - "loss": 0.3298, - "step": 16902 - }, - { - "epoch": 1.1046990392784786, - "grad_norm": 0.454456090927124, - "learning_rate": 7.1950913065498865e-06, - "loss": 0.3312, - "step": 16903 - }, - { - "epoch": 1.1047643944840206, - "grad_norm": 0.4530438780784607, - "learning_rate": 7.194777561609527e-06, - "loss": 0.3398, - "step": 16904 - }, - { - "epoch": 1.1048297496895627, - "grad_norm": 0.44374170899391174, - "learning_rate": 7.194463805964478e-06, - "loss": 0.3344, - "step": 16905 - }, - { - "epoch": 1.104895104895105, - "grad_norm": 0.4460992217063904, - "learning_rate": 7.1941500396162675e-06, - "loss": 0.3491, - "step": 16906 - }, - { - "epoch": 1.104960460100647, - "grad_norm": 0.44110846519470215, - "learning_rate": 7.19383626256643e-06, - "loss": 0.3368, - "step": 16907 - }, - { - "epoch": 1.1050258153061892, - "grad_norm": 0.4690539836883545, - "learning_rate": 7.19352247481649e-06, - "loss": 0.3799, - "step": 16908 - }, - { - "epoch": 1.1050911705117312, - "grad_norm": 0.46475502848625183, - "learning_rate": 7.193208676367982e-06, - "loss": 0.3432, - "step": 16909 - }, - { - "epoch": 1.1051565257172733, - "grad_norm": 0.4278426468372345, - "learning_rate": 7.192894867222435e-06, - "loss": 0.3075, - "step": 16910 - }, - { - "epoch": 1.1052218809228156, - "grad_norm": 0.42875391244888306, - "learning_rate": 7.192581047381382e-06, - "loss": 0.3429, - "step": 16911 - }, - { - "epoch": 1.1052872361283577, - "grad_norm": 0.44750601053237915, - "learning_rate": 7.19226721684635e-06, - "loss": 0.367, - "step": 16912 - }, - { - "epoch": 1.1053525913338997, - "grad_norm": 0.4273897409439087, - "learning_rate": 7.191953375618872e-06, - "loss": 0.3022, - "step": 16913 - }, - { - "epoch": 1.1054179465394418, - "grad_norm": 0.4383806884288788, - "learning_rate": 7.191639523700478e-06, - "loss": 0.3435, - "step": 16914 - }, - { - "epoch": 1.105483301744984, - "grad_norm": 0.4066704213619232, - "learning_rate": 7.1913256610926975e-06, - "loss": 0.2842, - "step": 16915 - }, - { - "epoch": 1.1055486569505262, - "grad_norm": 0.4943954348564148, - "learning_rate": 7.191011787797064e-06, - "loss": 0.355, - "step": 16916 - }, - { - "epoch": 1.1056140121560682, - "grad_norm": 0.4474785625934601, - "learning_rate": 7.190697903815106e-06, - "loss": 0.3463, - "step": 16917 - }, - { - "epoch": 1.1056793673616103, - "grad_norm": 0.48214051127433777, - "learning_rate": 7.190384009148357e-06, - "loss": 0.38, - "step": 16918 - }, - { - "epoch": 1.1057447225671524, - "grad_norm": 0.4600478410720825, - "learning_rate": 7.190070103798346e-06, - "loss": 0.3504, - "step": 16919 - }, - { - "epoch": 1.1058100777726947, - "grad_norm": 0.4889717698097229, - "learning_rate": 7.1897561877666035e-06, - "loss": 0.3853, - "step": 16920 - }, - { - "epoch": 1.1058754329782368, - "grad_norm": 0.4611831307411194, - "learning_rate": 7.18944226105466e-06, - "loss": 0.3442, - "step": 16921 - }, - { - "epoch": 1.1059407881837788, - "grad_norm": 0.42291581630706787, - "learning_rate": 7.189128323664051e-06, - "loss": 0.3171, - "step": 16922 - }, - { - "epoch": 1.106006143389321, - "grad_norm": 0.46375003457069397, - "learning_rate": 7.1888143755963026e-06, - "loss": 0.3743, - "step": 16923 - }, - { - "epoch": 1.106071498594863, - "grad_norm": 0.4620855450630188, - "learning_rate": 7.188500416852949e-06, - "loss": 0.3471, - "step": 16924 - }, - { - "epoch": 1.1061368538004053, - "grad_norm": 0.41443929076194763, - "learning_rate": 7.188186447435521e-06, - "loss": 0.3073, - "step": 16925 - }, - { - "epoch": 1.1062022090059473, - "grad_norm": 0.4544248580932617, - "learning_rate": 7.187872467345549e-06, - "loss": 0.3464, - "step": 16926 - }, - { - "epoch": 1.1062675642114894, - "grad_norm": 0.4621868133544922, - "learning_rate": 7.187558476584566e-06, - "loss": 0.3777, - "step": 16927 - }, - { - "epoch": 1.1063329194170315, - "grad_norm": 0.4542226195335388, - "learning_rate": 7.1872444751541025e-06, - "loss": 0.3592, - "step": 16928 - }, - { - "epoch": 1.1063982746225738, - "grad_norm": 0.44520843029022217, - "learning_rate": 7.186930463055689e-06, - "loss": 0.3215, - "step": 16929 - }, - { - "epoch": 1.1064636298281159, - "grad_norm": 0.4341967701911926, - "learning_rate": 7.186616440290858e-06, - "loss": 0.3175, - "step": 16930 - }, - { - "epoch": 1.106528985033658, - "grad_norm": 0.44681474566459656, - "learning_rate": 7.186302406861142e-06, - "loss": 0.3556, - "step": 16931 - }, - { - "epoch": 1.1065943402392, - "grad_norm": 0.4931948781013489, - "learning_rate": 7.18598836276807e-06, - "loss": 0.3931, - "step": 16932 - }, - { - "epoch": 1.106659695444742, - "grad_norm": 0.45590752363204956, - "learning_rate": 7.185674308013177e-06, - "loss": 0.3807, - "step": 16933 - }, - { - "epoch": 1.1067250506502844, - "grad_norm": 0.41704118251800537, - "learning_rate": 7.185360242597994e-06, - "loss": 0.3275, - "step": 16934 - }, - { - "epoch": 1.1067904058558264, - "grad_norm": 0.47248926758766174, - "learning_rate": 7.18504616652405e-06, - "loss": 0.3537, - "step": 16935 - }, - { - "epoch": 1.1068557610613685, - "grad_norm": 0.4390184283256531, - "learning_rate": 7.184732079792881e-06, - "loss": 0.3107, - "step": 16936 - }, - { - "epoch": 1.1069211162669106, - "grad_norm": 0.4599098265171051, - "learning_rate": 7.1844179824060155e-06, - "loss": 0.3688, - "step": 16937 - }, - { - "epoch": 1.1069864714724529, - "grad_norm": 0.4395337402820587, - "learning_rate": 7.184103874364987e-06, - "loss": 0.3454, - "step": 16938 - }, - { - "epoch": 1.107051826677995, - "grad_norm": 0.44652825593948364, - "learning_rate": 7.183789755671328e-06, - "loss": 0.3321, - "step": 16939 - }, - { - "epoch": 1.107117181883537, - "grad_norm": 0.46515724062919617, - "learning_rate": 7.183475626326568e-06, - "loss": 0.3302, - "step": 16940 - }, - { - "epoch": 1.107182537089079, - "grad_norm": 0.45389124751091003, - "learning_rate": 7.183161486332242e-06, - "loss": 0.3465, - "step": 16941 - }, - { - "epoch": 1.1072478922946212, - "grad_norm": 0.5035362243652344, - "learning_rate": 7.182847335689882e-06, - "loss": 0.3836, - "step": 16942 - }, - { - "epoch": 1.1073132475001635, - "grad_norm": 0.4707864224910736, - "learning_rate": 7.182533174401017e-06, - "loss": 0.3422, - "step": 16943 - }, - { - "epoch": 1.1073786027057055, - "grad_norm": 0.4576711058616638, - "learning_rate": 7.182219002467183e-06, - "loss": 0.3255, - "step": 16944 - }, - { - "epoch": 1.1074439579112476, - "grad_norm": 0.44810256361961365, - "learning_rate": 7.181904819889912e-06, - "loss": 0.3555, - "step": 16945 - }, - { - "epoch": 1.1075093131167897, - "grad_norm": 0.4620898365974426, - "learning_rate": 7.181590626670734e-06, - "loss": 0.3527, - "step": 16946 - }, - { - "epoch": 1.1075746683223318, - "grad_norm": 0.4596816301345825, - "learning_rate": 7.181276422811183e-06, - "loss": 0.3592, - "step": 16947 - }, - { - "epoch": 1.107640023527874, - "grad_norm": 0.4484982490539551, - "learning_rate": 7.18096220831279e-06, - "loss": 0.3202, - "step": 16948 - }, - { - "epoch": 1.1077053787334161, - "grad_norm": 0.44571617245674133, - "learning_rate": 7.1806479831770905e-06, - "loss": 0.356, - "step": 16949 - }, - { - "epoch": 1.1077707339389582, - "grad_norm": 0.4459291398525238, - "learning_rate": 7.180333747405615e-06, - "loss": 0.3339, - "step": 16950 - }, - { - "epoch": 1.1078360891445003, - "grad_norm": 0.4683772921562195, - "learning_rate": 7.180019500999895e-06, - "loss": 0.3843, - "step": 16951 - }, - { - "epoch": 1.1079014443500426, - "grad_norm": 0.43534207344055176, - "learning_rate": 7.179705243961467e-06, - "loss": 0.3137, - "step": 16952 - }, - { - "epoch": 1.1079667995555846, - "grad_norm": 0.47841548919677734, - "learning_rate": 7.17939097629186e-06, - "loss": 0.3655, - "step": 16953 - }, - { - "epoch": 1.1080321547611267, - "grad_norm": 0.44662392139434814, - "learning_rate": 7.179076697992608e-06, - "loss": 0.3349, - "step": 16954 - }, - { - "epoch": 1.1080975099666688, - "grad_norm": 0.43585604429244995, - "learning_rate": 7.178762409065245e-06, - "loss": 0.3222, - "step": 16955 - }, - { - "epoch": 1.1081628651722109, - "grad_norm": 0.4386887550354004, - "learning_rate": 7.178448109511303e-06, - "loss": 0.3301, - "step": 16956 - }, - { - "epoch": 1.1082282203777531, - "grad_norm": 0.45249950885772705, - "learning_rate": 7.178133799332313e-06, - "loss": 0.374, - "step": 16957 - }, - { - "epoch": 1.1082935755832952, - "grad_norm": 0.44922906160354614, - "learning_rate": 7.177819478529811e-06, - "loss": 0.3431, - "step": 16958 - }, - { - "epoch": 1.1083589307888373, - "grad_norm": 0.4794429838657379, - "learning_rate": 7.177505147105329e-06, - "loss": 0.3538, - "step": 16959 - }, - { - "epoch": 1.1084242859943794, - "grad_norm": 0.4555109739303589, - "learning_rate": 7.177190805060402e-06, - "loss": 0.3371, - "step": 16960 - }, - { - "epoch": 1.1084896411999217, - "grad_norm": 0.45904722809791565, - "learning_rate": 7.176876452396558e-06, - "loss": 0.375, - "step": 16961 - }, - { - "epoch": 1.1085549964054637, - "grad_norm": 0.4988830089569092, - "learning_rate": 7.1765620891153354e-06, - "loss": 0.4157, - "step": 16962 - }, - { - "epoch": 1.1086203516110058, - "grad_norm": 0.462028831243515, - "learning_rate": 7.1762477152182655e-06, - "loss": 0.342, - "step": 16963 - }, - { - "epoch": 1.1086857068165479, - "grad_norm": 0.4333418011665344, - "learning_rate": 7.17593333070688e-06, - "loss": 0.3029, - "step": 16964 - }, - { - "epoch": 1.10875106202209, - "grad_norm": 0.4603612422943115, - "learning_rate": 7.175618935582716e-06, - "loss": 0.3413, - "step": 16965 - }, - { - "epoch": 1.1088164172276322, - "grad_norm": 0.43920230865478516, - "learning_rate": 7.175304529847303e-06, - "loss": 0.3438, - "step": 16966 - }, - { - "epoch": 1.1088817724331743, - "grad_norm": 0.45885127782821655, - "learning_rate": 7.174990113502176e-06, - "loss": 0.3498, - "step": 16967 - }, - { - "epoch": 1.1089471276387164, - "grad_norm": 0.44494903087615967, - "learning_rate": 7.17467568654887e-06, - "loss": 0.3211, - "step": 16968 - }, - { - "epoch": 1.1090124828442585, - "grad_norm": 0.44241607189178467, - "learning_rate": 7.174361248988917e-06, - "loss": 0.3431, - "step": 16969 - }, - { - "epoch": 1.1090778380498008, - "grad_norm": 0.4518430531024933, - "learning_rate": 7.1740468008238494e-06, - "loss": 0.3164, - "step": 16970 - }, - { - "epoch": 1.1091431932553428, - "grad_norm": 0.44299453496932983, - "learning_rate": 7.173732342055204e-06, - "loss": 0.3187, - "step": 16971 - }, - { - "epoch": 1.109208548460885, - "grad_norm": 0.4495779573917389, - "learning_rate": 7.173417872684513e-06, - "loss": 0.338, - "step": 16972 - }, - { - "epoch": 1.109273903666427, - "grad_norm": 0.42455625534057617, - "learning_rate": 7.17310339271331e-06, - "loss": 0.3367, - "step": 16973 - }, - { - "epoch": 1.109339258871969, - "grad_norm": 0.4374285042285919, - "learning_rate": 7.172788902143128e-06, - "loss": 0.3414, - "step": 16974 - }, - { - "epoch": 1.1094046140775113, - "grad_norm": 0.4373040199279785, - "learning_rate": 7.172474400975502e-06, - "loss": 0.3114, - "step": 16975 - }, - { - "epoch": 1.1094699692830534, - "grad_norm": 0.5009270906448364, - "learning_rate": 7.172159889211966e-06, - "loss": 0.3965, - "step": 16976 - }, - { - "epoch": 1.1095353244885955, - "grad_norm": 0.4263479709625244, - "learning_rate": 7.171845366854053e-06, - "loss": 0.3364, - "step": 16977 - }, - { - "epoch": 1.1096006796941376, - "grad_norm": 0.44614484906196594, - "learning_rate": 7.1715308339033e-06, - "loss": 0.3529, - "step": 16978 - }, - { - "epoch": 1.1096660348996799, - "grad_norm": 0.44420912861824036, - "learning_rate": 7.171216290361237e-06, - "loss": 0.3363, - "step": 16979 - }, - { - "epoch": 1.109731390105222, - "grad_norm": 0.4217231571674347, - "learning_rate": 7.1709017362294e-06, - "loss": 0.3295, - "step": 16980 - }, - { - "epoch": 1.109796745310764, - "grad_norm": 0.4209829270839691, - "learning_rate": 7.170587171509325e-06, - "loss": 0.3025, - "step": 16981 - }, - { - "epoch": 1.109862100516306, - "grad_norm": 0.485849529504776, - "learning_rate": 7.170272596202542e-06, - "loss": 0.3501, - "step": 16982 - }, - { - "epoch": 1.1099274557218481, - "grad_norm": 0.4528864920139313, - "learning_rate": 7.169958010310589e-06, - "loss": 0.3316, - "step": 16983 - }, - { - "epoch": 1.1099928109273904, - "grad_norm": 0.4277852475643158, - "learning_rate": 7.169643413834998e-06, - "loss": 0.3273, - "step": 16984 - }, - { - "epoch": 1.1100581661329325, - "grad_norm": 0.44532978534698486, - "learning_rate": 7.169328806777306e-06, - "loss": 0.3384, - "step": 16985 - }, - { - "epoch": 1.1101235213384746, - "grad_norm": 0.42431533336639404, - "learning_rate": 7.169014189139044e-06, - "loss": 0.3247, - "step": 16986 - }, - { - "epoch": 1.1101888765440167, - "grad_norm": 0.4513096213340759, - "learning_rate": 7.16869956092175e-06, - "loss": 0.3473, - "step": 16987 - }, - { - "epoch": 1.110254231749559, - "grad_norm": 0.42926570773124695, - "learning_rate": 7.168384922126955e-06, - "loss": 0.328, - "step": 16988 - }, - { - "epoch": 1.110319586955101, - "grad_norm": 0.4437406659126282, - "learning_rate": 7.168070272756198e-06, - "loss": 0.3527, - "step": 16989 - }, - { - "epoch": 1.110384942160643, - "grad_norm": 0.4713248312473297, - "learning_rate": 7.167755612811009e-06, - "loss": 0.3474, - "step": 16990 - }, - { - "epoch": 1.1104502973661852, - "grad_norm": 0.42764395475387573, - "learning_rate": 7.167440942292926e-06, - "loss": 0.3138, - "step": 16991 - }, - { - "epoch": 1.1105156525717272, - "grad_norm": 0.44529637694358826, - "learning_rate": 7.167126261203483e-06, - "loss": 0.326, - "step": 16992 - }, - { - "epoch": 1.1105810077772695, - "grad_norm": 0.4588848948478699, - "learning_rate": 7.166811569544213e-06, - "loss": 0.3536, - "step": 16993 - }, - { - "epoch": 1.1106463629828116, - "grad_norm": 0.4248858094215393, - "learning_rate": 7.1664968673166545e-06, - "loss": 0.3144, - "step": 16994 - }, - { - "epoch": 1.1107117181883537, - "grad_norm": 0.4718970060348511, - "learning_rate": 7.1661821545223385e-06, - "loss": 0.3595, - "step": 16995 - }, - { - "epoch": 1.1107770733938958, - "grad_norm": 0.44681426882743835, - "learning_rate": 7.165867431162802e-06, - "loss": 0.316, - "step": 16996 - }, - { - "epoch": 1.110842428599438, - "grad_norm": 0.4472370743751526, - "learning_rate": 7.165552697239579e-06, - "loss": 0.3217, - "step": 16997 - }, - { - "epoch": 1.1109077838049801, - "grad_norm": 0.5172298550605774, - "learning_rate": 7.1652379527542075e-06, - "loss": 0.3161, - "step": 16998 - }, - { - "epoch": 1.1109731390105222, - "grad_norm": 0.46733778715133667, - "learning_rate": 7.164923197708219e-06, - "loss": 0.3647, - "step": 16999 - }, - { - "epoch": 1.1110384942160643, - "grad_norm": 0.44942665100097656, - "learning_rate": 7.164608432103149e-06, - "loss": 0.3586, - "step": 17000 - }, - { - "epoch": 1.1111038494216063, - "grad_norm": 0.4257781505584717, - "learning_rate": 7.164293655940537e-06, - "loss": 0.3365, - "step": 17001 - }, - { - "epoch": 1.1111692046271486, - "grad_norm": 0.42540618777275085, - "learning_rate": 7.163978869221912e-06, - "loss": 0.2947, - "step": 17002 - }, - { - "epoch": 1.1112345598326907, - "grad_norm": 0.44917404651641846, - "learning_rate": 7.1636640719488145e-06, - "loss": 0.3513, - "step": 17003 - }, - { - "epoch": 1.1112999150382328, - "grad_norm": 0.45079484581947327, - "learning_rate": 7.163349264122776e-06, - "loss": 0.353, - "step": 17004 - }, - { - "epoch": 1.1113652702437748, - "grad_norm": 0.44623708724975586, - "learning_rate": 7.163034445745335e-06, - "loss": 0.362, - "step": 17005 - }, - { - "epoch": 1.1114306254493171, - "grad_norm": 0.4129381477832794, - "learning_rate": 7.162719616818024e-06, - "loss": 0.3163, - "step": 17006 - }, - { - "epoch": 1.1114959806548592, - "grad_norm": 0.4586268365383148, - "learning_rate": 7.162404777342382e-06, - "loss": 0.3562, - "step": 17007 - }, - { - "epoch": 1.1115613358604013, - "grad_norm": 0.4528006911277771, - "learning_rate": 7.1620899273199404e-06, - "loss": 0.3402, - "step": 17008 - }, - { - "epoch": 1.1116266910659434, - "grad_norm": 0.4343746602535248, - "learning_rate": 7.161775066752239e-06, - "loss": 0.3353, - "step": 17009 - }, - { - "epoch": 1.1116920462714854, - "grad_norm": 0.45098209381103516, - "learning_rate": 7.161460195640812e-06, - "loss": 0.3769, - "step": 17010 - }, - { - "epoch": 1.1117574014770277, - "grad_norm": 0.45243769884109497, - "learning_rate": 7.161145313987194e-06, - "loss": 0.3783, - "step": 17011 - }, - { - "epoch": 1.1118227566825698, - "grad_norm": 0.4434148073196411, - "learning_rate": 7.160830421792922e-06, - "loss": 0.3198, - "step": 17012 - }, - { - "epoch": 1.1118881118881119, - "grad_norm": 0.44666770100593567, - "learning_rate": 7.160515519059531e-06, - "loss": 0.3365, - "step": 17013 - }, - { - "epoch": 1.111953467093654, - "grad_norm": 0.45525074005126953, - "learning_rate": 7.160200605788559e-06, - "loss": 0.311, - "step": 17014 - }, - { - "epoch": 1.1120188222991962, - "grad_norm": 0.450960636138916, - "learning_rate": 7.1598856819815374e-06, - "loss": 0.3373, - "step": 17015 - }, - { - "epoch": 1.1120841775047383, - "grad_norm": 0.4486139118671417, - "learning_rate": 7.159570747640008e-06, - "loss": 0.3087, - "step": 17016 - }, - { - "epoch": 1.1121495327102804, - "grad_norm": 0.5001394152641296, - "learning_rate": 7.1592558027655016e-06, - "loss": 0.3286, - "step": 17017 - }, - { - "epoch": 1.1122148879158225, - "grad_norm": 0.4579133689403534, - "learning_rate": 7.158940847359558e-06, - "loss": 0.3631, - "step": 17018 - }, - { - "epoch": 1.1122802431213645, - "grad_norm": 0.46382418274879456, - "learning_rate": 7.158625881423711e-06, - "loss": 0.365, - "step": 17019 - }, - { - "epoch": 1.1123455983269068, - "grad_norm": 0.443668395280838, - "learning_rate": 7.158310904959498e-06, - "loss": 0.3347, - "step": 17020 - }, - { - "epoch": 1.112410953532449, - "grad_norm": 0.4584289491176605, - "learning_rate": 7.157995917968455e-06, - "loss": 0.3545, - "step": 17021 - }, - { - "epoch": 1.112476308737991, - "grad_norm": 0.5357624292373657, - "learning_rate": 7.157680920452119e-06, - "loss": 0.3404, - "step": 17022 - }, - { - "epoch": 1.112541663943533, - "grad_norm": 0.4695124626159668, - "learning_rate": 7.157365912412027e-06, - "loss": 0.3458, - "step": 17023 - }, - { - "epoch": 1.1126070191490753, - "grad_norm": 0.4648081064224243, - "learning_rate": 7.1570508938497105e-06, - "loss": 0.3763, - "step": 17024 - }, - { - "epoch": 1.1126723743546174, - "grad_norm": 0.4514486491680145, - "learning_rate": 7.156735864766713e-06, - "loss": 0.3454, - "step": 17025 - }, - { - "epoch": 1.1127377295601595, - "grad_norm": 0.4472137689590454, - "learning_rate": 7.156420825164565e-06, - "loss": 0.3281, - "step": 17026 - }, - { - "epoch": 1.1128030847657016, - "grad_norm": 0.49321311712265015, - "learning_rate": 7.156105775044806e-06, - "loss": 0.3666, - "step": 17027 - }, - { - "epoch": 1.1128684399712436, - "grad_norm": 0.42386817932128906, - "learning_rate": 7.155790714408972e-06, - "loss": 0.3051, - "step": 17028 - }, - { - "epoch": 1.112933795176786, - "grad_norm": 0.42108550667762756, - "learning_rate": 7.155475643258601e-06, - "loss": 0.2892, - "step": 17029 - }, - { - "epoch": 1.112999150382328, - "grad_norm": 0.44198665022850037, - "learning_rate": 7.155160561595229e-06, - "loss": 0.343, - "step": 17030 - }, - { - "epoch": 1.11306450558787, - "grad_norm": 0.49344179034233093, - "learning_rate": 7.15484546942039e-06, - "loss": 0.4028, - "step": 17031 - }, - { - "epoch": 1.1131298607934121, - "grad_norm": 0.4633345603942871, - "learning_rate": 7.154530366735626e-06, - "loss": 0.331, - "step": 17032 - }, - { - "epoch": 1.1131952159989542, - "grad_norm": 0.4551130533218384, - "learning_rate": 7.154215253542468e-06, - "loss": 0.3422, - "step": 17033 - }, - { - "epoch": 1.1132605712044965, - "grad_norm": 0.41660186648368835, - "learning_rate": 7.153900129842458e-06, - "loss": 0.2963, - "step": 17034 - }, - { - "epoch": 1.1133259264100386, - "grad_norm": 0.45533299446105957, - "learning_rate": 7.153584995637129e-06, - "loss": 0.3208, - "step": 17035 - }, - { - "epoch": 1.1133912816155807, - "grad_norm": 0.461054265499115, - "learning_rate": 7.1532698509280215e-06, - "loss": 0.3527, - "step": 17036 - }, - { - "epoch": 1.1134566368211227, - "grad_norm": 0.49841558933258057, - "learning_rate": 7.152954695716672e-06, - "loss": 0.367, - "step": 17037 - }, - { - "epoch": 1.113521992026665, - "grad_norm": 0.45055150985717773, - "learning_rate": 7.152639530004615e-06, - "loss": 0.3344, - "step": 17038 - }, - { - "epoch": 1.113587347232207, - "grad_norm": 0.43217653036117554, - "learning_rate": 7.152324353793389e-06, - "loss": 0.32, - "step": 17039 - }, - { - "epoch": 1.1136527024377492, - "grad_norm": 0.46337515115737915, - "learning_rate": 7.1520091670845316e-06, - "loss": 0.3885, - "step": 17040 - }, - { - "epoch": 1.1137180576432912, - "grad_norm": 0.39758139848709106, - "learning_rate": 7.15169396987958e-06, - "loss": 0.2717, - "step": 17041 - }, - { - "epoch": 1.1137834128488333, - "grad_norm": 0.48315921425819397, - "learning_rate": 7.151378762180072e-06, - "loss": 0.381, - "step": 17042 - }, - { - "epoch": 1.1138487680543756, - "grad_norm": 0.4399372637271881, - "learning_rate": 7.151063543987544e-06, - "loss": 0.3466, - "step": 17043 - }, - { - "epoch": 1.1139141232599177, - "grad_norm": 0.45378926396369934, - "learning_rate": 7.150748315303535e-06, - "loss": 0.3627, - "step": 17044 - }, - { - "epoch": 1.1139794784654597, - "grad_norm": 0.45435047149658203, - "learning_rate": 7.150433076129581e-06, - "loss": 0.3663, - "step": 17045 - }, - { - "epoch": 1.1140448336710018, - "grad_norm": 0.4683096706867218, - "learning_rate": 7.15011782646722e-06, - "loss": 0.3699, - "step": 17046 - }, - { - "epoch": 1.1141101888765441, - "grad_norm": 0.4378737807273865, - "learning_rate": 7.14980256631799e-06, - "loss": 0.3424, - "step": 17047 - }, - { - "epoch": 1.1141755440820862, - "grad_norm": 0.44605475664138794, - "learning_rate": 7.149487295683426e-06, - "loss": 0.359, - "step": 17048 - }, - { - "epoch": 1.1142408992876283, - "grad_norm": 0.46032944321632385, - "learning_rate": 7.149172014565069e-06, - "loss": 0.3676, - "step": 17049 - }, - { - "epoch": 1.1143062544931703, - "grad_norm": 0.47950488328933716, - "learning_rate": 7.148856722964456e-06, - "loss": 0.3446, - "step": 17050 - }, - { - "epoch": 1.1143716096987124, - "grad_norm": 0.438961923122406, - "learning_rate": 7.148541420883123e-06, - "loss": 0.3495, - "step": 17051 - }, - { - "epoch": 1.1144369649042547, - "grad_norm": 0.4428153932094574, - "learning_rate": 7.14822610832261e-06, - "loss": 0.368, - "step": 17052 - }, - { - "epoch": 1.1145023201097968, - "grad_norm": 0.4724746644496918, - "learning_rate": 7.147910785284453e-06, - "loss": 0.2795, - "step": 17053 - }, - { - "epoch": 1.1145676753153388, - "grad_norm": 0.4283941984176636, - "learning_rate": 7.147595451770193e-06, - "loss": 0.3386, - "step": 17054 - }, - { - "epoch": 1.114633030520881, - "grad_norm": 0.4613955318927765, - "learning_rate": 7.147280107781365e-06, - "loss": 0.3307, - "step": 17055 - }, - { - "epoch": 1.1146983857264232, - "grad_norm": 0.4663427174091339, - "learning_rate": 7.1469647533195075e-06, - "loss": 0.3536, - "step": 17056 - }, - { - "epoch": 1.1147637409319653, - "grad_norm": 0.4404975473880768, - "learning_rate": 7.14664938838616e-06, - "loss": 0.3439, - "step": 17057 - }, - { - "epoch": 1.1148290961375074, - "grad_norm": 0.4159106910228729, - "learning_rate": 7.146334012982859e-06, - "loss": 0.2972, - "step": 17058 - }, - { - "epoch": 1.1148944513430494, - "grad_norm": 0.45903849601745605, - "learning_rate": 7.146018627111144e-06, - "loss": 0.3559, - "step": 17059 - }, - { - "epoch": 1.1149598065485915, - "grad_norm": 0.4485066533088684, - "learning_rate": 7.145703230772552e-06, - "loss": 0.3582, - "step": 17060 - }, - { - "epoch": 1.1150251617541338, - "grad_norm": 0.4486883580684662, - "learning_rate": 7.1453878239686235e-06, - "loss": 0.3249, - "step": 17061 - }, - { - "epoch": 1.1150905169596759, - "grad_norm": 0.5061477422714233, - "learning_rate": 7.145072406700894e-06, - "loss": 0.3625, - "step": 17062 - }, - { - "epoch": 1.115155872165218, - "grad_norm": 0.45438152551651, - "learning_rate": 7.144756978970904e-06, - "loss": 0.3392, - "step": 17063 - }, - { - "epoch": 1.11522122737076, - "grad_norm": 0.4226972162723541, - "learning_rate": 7.14444154078019e-06, - "loss": 0.3355, - "step": 17064 - }, - { - "epoch": 1.115286582576302, - "grad_norm": 0.4342862069606781, - "learning_rate": 7.144126092130293e-06, - "loss": 0.3066, - "step": 17065 - }, - { - "epoch": 1.1153519377818444, - "grad_norm": 0.47154200077056885, - "learning_rate": 7.14381063302275e-06, - "loss": 0.4142, - "step": 17066 - }, - { - "epoch": 1.1154172929873865, - "grad_norm": 0.40317896008491516, - "learning_rate": 7.1434951634591e-06, - "loss": 0.2885, - "step": 17067 - }, - { - "epoch": 1.1154826481929285, - "grad_norm": 0.40597960352897644, - "learning_rate": 7.143179683440882e-06, - "loss": 0.2789, - "step": 17068 - }, - { - "epoch": 1.1155480033984706, - "grad_norm": 0.4608341157436371, - "learning_rate": 7.142864192969632e-06, - "loss": 0.3355, - "step": 17069 - }, - { - "epoch": 1.115613358604013, - "grad_norm": 0.39357125759124756, - "learning_rate": 7.142548692046893e-06, - "loss": 0.2598, - "step": 17070 - }, - { - "epoch": 1.115678713809555, - "grad_norm": 0.46614694595336914, - "learning_rate": 7.142233180674201e-06, - "loss": 0.3533, - "step": 17071 - }, - { - "epoch": 1.115744069015097, - "grad_norm": 0.5154941082000732, - "learning_rate": 7.141917658853095e-06, - "loss": 0.3515, - "step": 17072 - }, - { - "epoch": 1.1158094242206391, - "grad_norm": 0.422382652759552, - "learning_rate": 7.141602126585116e-06, - "loss": 0.3078, - "step": 17073 - }, - { - "epoch": 1.1158747794261812, - "grad_norm": 0.4533899426460266, - "learning_rate": 7.1412865838718e-06, - "loss": 0.3355, - "step": 17074 - }, - { - "epoch": 1.1159401346317235, - "grad_norm": 0.41989266872406006, - "learning_rate": 7.140971030714689e-06, - "loss": 0.3067, - "step": 17075 - }, - { - "epoch": 1.1160054898372656, - "grad_norm": 0.4281536936759949, - "learning_rate": 7.14065546711532e-06, - "loss": 0.3241, - "step": 17076 - }, - { - "epoch": 1.1160708450428076, - "grad_norm": 0.4291760325431824, - "learning_rate": 7.1403398930752324e-06, - "loss": 0.3083, - "step": 17077 - }, - { - "epoch": 1.1161362002483497, - "grad_norm": 0.4882870614528656, - "learning_rate": 7.1400243085959655e-06, - "loss": 0.3699, - "step": 17078 - }, - { - "epoch": 1.116201555453892, - "grad_norm": 0.4481711685657501, - "learning_rate": 7.139708713679059e-06, - "loss": 0.3397, - "step": 17079 - }, - { - "epoch": 1.116266910659434, - "grad_norm": 0.42289987206459045, - "learning_rate": 7.1393931083260515e-06, - "loss": 0.3328, - "step": 17080 - }, - { - "epoch": 1.1163322658649761, - "grad_norm": 0.44162896275520325, - "learning_rate": 7.1390774925384835e-06, - "loss": 0.3447, - "step": 17081 - }, - { - "epoch": 1.1163976210705182, - "grad_norm": 0.44941946864128113, - "learning_rate": 7.138761866317893e-06, - "loss": 0.3567, - "step": 17082 - }, - { - "epoch": 1.1164629762760603, - "grad_norm": 0.4282679855823517, - "learning_rate": 7.1384462296658196e-06, - "loss": 0.3486, - "step": 17083 - }, - { - "epoch": 1.1165283314816026, - "grad_norm": 0.46869492530822754, - "learning_rate": 7.138130582583804e-06, - "loss": 0.3442, - "step": 17084 - }, - { - "epoch": 1.1165936866871446, - "grad_norm": 0.4324144423007965, - "learning_rate": 7.137814925073383e-06, - "loss": 0.3361, - "step": 17085 - }, - { - "epoch": 1.1166590418926867, - "grad_norm": 0.42913392186164856, - "learning_rate": 7.137499257136101e-06, - "loss": 0.3381, - "step": 17086 - }, - { - "epoch": 1.1167243970982288, - "grad_norm": 0.429119735956192, - "learning_rate": 7.137183578773492e-06, - "loss": 0.3093, - "step": 17087 - }, - { - "epoch": 1.116789752303771, - "grad_norm": 0.4642656743526459, - "learning_rate": 7.1368678899871e-06, - "loss": 0.3809, - "step": 17088 - }, - { - "epoch": 1.1168551075093132, - "grad_norm": 0.4095107614994049, - "learning_rate": 7.136552190778462e-06, - "loss": 0.3191, - "step": 17089 - }, - { - "epoch": 1.1169204627148552, - "grad_norm": 0.48212072253227234, - "learning_rate": 7.136236481149119e-06, - "loss": 0.3946, - "step": 17090 - }, - { - "epoch": 1.1169858179203973, - "grad_norm": 0.44342443346977234, - "learning_rate": 7.13592076110061e-06, - "loss": 0.3605, - "step": 17091 - }, - { - "epoch": 1.1170511731259394, - "grad_norm": 0.43573877215385437, - "learning_rate": 7.135605030634477e-06, - "loss": 0.3141, - "step": 17092 - }, - { - "epoch": 1.1171165283314817, - "grad_norm": 0.4341411292552948, - "learning_rate": 7.1352892897522564e-06, - "loss": 0.338, - "step": 17093 - }, - { - "epoch": 1.1171818835370237, - "grad_norm": 0.44523581862449646, - "learning_rate": 7.134973538455492e-06, - "loss": 0.3067, - "step": 17094 - }, - { - "epoch": 1.1172472387425658, - "grad_norm": 0.4501059651374817, - "learning_rate": 7.1346577767457225e-06, - "loss": 0.3807, - "step": 17095 - }, - { - "epoch": 1.117312593948108, - "grad_norm": 0.45240458846092224, - "learning_rate": 7.134342004624486e-06, - "loss": 0.3493, - "step": 17096 - }, - { - "epoch": 1.1173779491536502, - "grad_norm": 0.4435292184352875, - "learning_rate": 7.134026222093325e-06, - "loss": 0.3455, - "step": 17097 - }, - { - "epoch": 1.1174433043591923, - "grad_norm": 0.47913676500320435, - "learning_rate": 7.133710429153778e-06, - "loss": 0.3685, - "step": 17098 - }, - { - "epoch": 1.1175086595647343, - "grad_norm": 0.4573393166065216, - "learning_rate": 7.133394625807386e-06, - "loss": 0.3274, - "step": 17099 - }, - { - "epoch": 1.1175740147702764, - "grad_norm": 0.49698054790496826, - "learning_rate": 7.133078812055689e-06, - "loss": 0.3822, - "step": 17100 - }, - { - "epoch": 1.1176393699758185, - "grad_norm": 0.4798371195793152, - "learning_rate": 7.132762987900229e-06, - "loss": 0.3962, - "step": 17101 - }, - { - "epoch": 1.1177047251813608, - "grad_norm": 0.4546820819377899, - "learning_rate": 7.132447153342545e-06, - "loss": 0.3626, - "step": 17102 - }, - { - "epoch": 1.1177700803869028, - "grad_norm": 0.42338061332702637, - "learning_rate": 7.1321313083841755e-06, - "loss": 0.3095, - "step": 17103 - }, - { - "epoch": 1.117835435592445, - "grad_norm": 0.4631916284561157, - "learning_rate": 7.131815453026665e-06, - "loss": 0.3072, - "step": 17104 - }, - { - "epoch": 1.117900790797987, - "grad_norm": 0.45935285091400146, - "learning_rate": 7.131499587271551e-06, - "loss": 0.3538, - "step": 17105 - }, - { - "epoch": 1.1179661460035293, - "grad_norm": 0.5046459436416626, - "learning_rate": 7.131183711120376e-06, - "loss": 0.4196, - "step": 17106 - }, - { - "epoch": 1.1180315012090714, - "grad_norm": 0.44703638553619385, - "learning_rate": 7.13086782457468e-06, - "loss": 0.3624, - "step": 17107 - }, - { - "epoch": 1.1180968564146134, - "grad_norm": 0.4890722632408142, - "learning_rate": 7.130551927636002e-06, - "loss": 0.3827, - "step": 17108 - }, - { - "epoch": 1.1181622116201555, - "grad_norm": 0.4590572118759155, - "learning_rate": 7.130236020305885e-06, - "loss": 0.3573, - "step": 17109 - }, - { - "epoch": 1.1182275668256976, - "grad_norm": 0.4598666727542877, - "learning_rate": 7.1299201025858685e-06, - "loss": 0.3667, - "step": 17110 - }, - { - "epoch": 1.1182929220312399, - "grad_norm": 0.4720495343208313, - "learning_rate": 7.129604174477493e-06, - "loss": 0.3939, - "step": 17111 - }, - { - "epoch": 1.118358277236782, - "grad_norm": 0.5097851753234863, - "learning_rate": 7.129288235982303e-06, - "loss": 0.4218, - "step": 17112 - }, - { - "epoch": 1.118423632442324, - "grad_norm": 0.4786634147167206, - "learning_rate": 7.128972287101835e-06, - "loss": 0.3638, - "step": 17113 - }, - { - "epoch": 1.118488987647866, - "grad_norm": 0.44928136467933655, - "learning_rate": 7.1286563278376306e-06, - "loss": 0.3444, - "step": 17114 - }, - { - "epoch": 1.1185543428534084, - "grad_norm": 0.44733908772468567, - "learning_rate": 7.128340358191234e-06, - "loss": 0.3408, - "step": 17115 - }, - { - "epoch": 1.1186196980589505, - "grad_norm": 0.4302018880844116, - "learning_rate": 7.128024378164181e-06, - "loss": 0.3191, - "step": 17116 - }, - { - "epoch": 1.1186850532644925, - "grad_norm": 0.48240160942077637, - "learning_rate": 7.127708387758019e-06, - "loss": 0.3985, - "step": 17117 - }, - { - "epoch": 1.1187504084700346, - "grad_norm": 0.4628893733024597, - "learning_rate": 7.127392386974283e-06, - "loss": 0.3424, - "step": 17118 - }, - { - "epoch": 1.1188157636755767, - "grad_norm": 0.47859904170036316, - "learning_rate": 7.12707637581452e-06, - "loss": 0.3783, - "step": 17119 - }, - { - "epoch": 1.118881118881119, - "grad_norm": 0.535497784614563, - "learning_rate": 7.126760354280268e-06, - "loss": 0.4356, - "step": 17120 - }, - { - "epoch": 1.118946474086661, - "grad_norm": 0.47680404782295227, - "learning_rate": 7.126444322373069e-06, - "loss": 0.3919, - "step": 17121 - }, - { - "epoch": 1.119011829292203, - "grad_norm": 0.4746123254299164, - "learning_rate": 7.126128280094464e-06, - "loss": 0.3583, - "step": 17122 - }, - { - "epoch": 1.1190771844977452, - "grad_norm": 0.44779065251350403, - "learning_rate": 7.125812227445994e-06, - "loss": 0.3289, - "step": 17123 - }, - { - "epoch": 1.1191425397032875, - "grad_norm": 0.4273865818977356, - "learning_rate": 7.125496164429203e-06, - "loss": 0.3279, - "step": 17124 - }, - { - "epoch": 1.1192078949088295, - "grad_norm": 0.46545201539993286, - "learning_rate": 7.1251800910456295e-06, - "loss": 0.3697, - "step": 17125 - }, - { - "epoch": 1.1192732501143716, - "grad_norm": 0.43651264905929565, - "learning_rate": 7.124864007296818e-06, - "loss": 0.3335, - "step": 17126 - }, - { - "epoch": 1.1193386053199137, - "grad_norm": 0.439040869474411, - "learning_rate": 7.1245479131843065e-06, - "loss": 0.3244, - "step": 17127 - }, - { - "epoch": 1.1194039605254558, - "grad_norm": 0.46707066893577576, - "learning_rate": 7.124231808709642e-06, - "loss": 0.3564, - "step": 17128 - }, - { - "epoch": 1.119469315730998, - "grad_norm": 0.4685092568397522, - "learning_rate": 7.123915693874359e-06, - "loss": 0.3397, - "step": 17129 - }, - { - "epoch": 1.1195346709365401, - "grad_norm": 0.5096257925033569, - "learning_rate": 7.1235995686800065e-06, - "loss": 0.414, - "step": 17130 - }, - { - "epoch": 1.1196000261420822, - "grad_norm": 0.48026302456855774, - "learning_rate": 7.123283433128122e-06, - "loss": 0.4028, - "step": 17131 - }, - { - "epoch": 1.1196653813476243, - "grad_norm": 0.4236884117126465, - "learning_rate": 7.122967287220248e-06, - "loss": 0.2925, - "step": 17132 - }, - { - "epoch": 1.1197307365531666, - "grad_norm": 0.4994780719280243, - "learning_rate": 7.122651130957929e-06, - "loss": 0.3995, - "step": 17133 - }, - { - "epoch": 1.1197960917587086, - "grad_norm": 0.44404107332229614, - "learning_rate": 7.122334964342703e-06, - "loss": 0.3332, - "step": 17134 - }, - { - "epoch": 1.1198614469642507, - "grad_norm": 0.44905710220336914, - "learning_rate": 7.122018787376116e-06, - "loss": 0.3577, - "step": 17135 - }, - { - "epoch": 1.1199268021697928, - "grad_norm": 0.45197802782058716, - "learning_rate": 7.1217026000597066e-06, - "loss": 0.3721, - "step": 17136 - }, - { - "epoch": 1.1199921573753349, - "grad_norm": 0.4590807259082794, - "learning_rate": 7.1213864023950195e-06, - "loss": 0.3588, - "step": 17137 - }, - { - "epoch": 1.1200575125808772, - "grad_norm": 0.43286678194999695, - "learning_rate": 7.1210701943835945e-06, - "loss": 0.3369, - "step": 17138 - }, - { - "epoch": 1.1201228677864192, - "grad_norm": 0.4448758065700531, - "learning_rate": 7.1207539760269776e-06, - "loss": 0.3432, - "step": 17139 - }, - { - "epoch": 1.1201882229919613, - "grad_norm": 0.47225460410118103, - "learning_rate": 7.1204377473267085e-06, - "loss": 0.3713, - "step": 17140 - }, - { - "epoch": 1.1202535781975034, - "grad_norm": 0.46399086713790894, - "learning_rate": 7.120121508284329e-06, - "loss": 0.3534, - "step": 17141 - }, - { - "epoch": 1.1203189334030457, - "grad_norm": 0.4282212555408478, - "learning_rate": 7.119805258901382e-06, - "loss": 0.3155, - "step": 17142 - }, - { - "epoch": 1.1203842886085877, - "grad_norm": 0.4380335509777069, - "learning_rate": 7.1194889991794115e-06, - "loss": 0.3339, - "step": 17143 - }, - { - "epoch": 1.1204496438141298, - "grad_norm": 0.41226276755332947, - "learning_rate": 7.1191727291199585e-06, - "loss": 0.2779, - "step": 17144 - }, - { - "epoch": 1.1205149990196719, - "grad_norm": 0.4880550503730774, - "learning_rate": 7.118856448724565e-06, - "loss": 0.3772, - "step": 17145 - }, - { - "epoch": 1.120580354225214, - "grad_norm": 0.4138718545436859, - "learning_rate": 7.118540157994775e-06, - "loss": 0.3123, - "step": 17146 - }, - { - "epoch": 1.1206457094307563, - "grad_norm": 0.4755130410194397, - "learning_rate": 7.118223856932132e-06, - "loss": 0.3422, - "step": 17147 - }, - { - "epoch": 1.1207110646362983, - "grad_norm": 0.475567489862442, - "learning_rate": 7.117907545538177e-06, - "loss": 0.3583, - "step": 17148 - }, - { - "epoch": 1.1207764198418404, - "grad_norm": 0.44259950518608093, - "learning_rate": 7.117591223814453e-06, - "loss": 0.3487, - "step": 17149 - }, - { - "epoch": 1.1208417750473825, - "grad_norm": 0.4870612621307373, - "learning_rate": 7.117274891762503e-06, - "loss": 0.3889, - "step": 17150 - }, - { - "epoch": 1.1209071302529245, - "grad_norm": 0.4553016722202301, - "learning_rate": 7.116958549383869e-06, - "loss": 0.3538, - "step": 17151 - }, - { - "epoch": 1.1209724854584668, - "grad_norm": 0.4354605972766876, - "learning_rate": 7.116642196680095e-06, - "loss": 0.3306, - "step": 17152 - }, - { - "epoch": 1.121037840664009, - "grad_norm": 0.43308666348457336, - "learning_rate": 7.116325833652726e-06, - "loss": 0.3248, - "step": 17153 - }, - { - "epoch": 1.121103195869551, - "grad_norm": 0.4298359453678131, - "learning_rate": 7.116009460303301e-06, - "loss": 0.3055, - "step": 17154 - }, - { - "epoch": 1.121168551075093, - "grad_norm": 0.45559221506118774, - "learning_rate": 7.115693076633364e-06, - "loss": 0.354, - "step": 17155 - }, - { - "epoch": 1.1212339062806354, - "grad_norm": 0.49907585978507996, - "learning_rate": 7.1153766826444595e-06, - "loss": 0.4032, - "step": 17156 - }, - { - "epoch": 1.1212992614861774, - "grad_norm": 0.43977731466293335, - "learning_rate": 7.1150602783381304e-06, - "loss": 0.3404, - "step": 17157 - }, - { - "epoch": 1.1213646166917195, - "grad_norm": 0.43656906485557556, - "learning_rate": 7.114743863715918e-06, - "loss": 0.3361, - "step": 17158 - }, - { - "epoch": 1.1214299718972616, - "grad_norm": 0.4312306344509125, - "learning_rate": 7.11442743877937e-06, - "loss": 0.3148, - "step": 17159 - }, - { - "epoch": 1.1214953271028036, - "grad_norm": 0.49295681715011597, - "learning_rate": 7.114111003530025e-06, - "loss": 0.3862, - "step": 17160 - }, - { - "epoch": 1.121560682308346, - "grad_norm": 0.45185586810112, - "learning_rate": 7.113794557969429e-06, - "loss": 0.3599, - "step": 17161 - }, - { - "epoch": 1.121626037513888, - "grad_norm": 0.46818238496780396, - "learning_rate": 7.113478102099124e-06, - "loss": 0.3434, - "step": 17162 - }, - { - "epoch": 1.12169139271943, - "grad_norm": 0.4316209852695465, - "learning_rate": 7.113161635920654e-06, - "loss": 0.3291, - "step": 17163 - }, - { - "epoch": 1.1217567479249722, - "grad_norm": 0.44057488441467285, - "learning_rate": 7.112845159435564e-06, - "loss": 0.3314, - "step": 17164 - }, - { - "epoch": 1.1218221031305144, - "grad_norm": 0.4619652032852173, - "learning_rate": 7.112528672645395e-06, - "loss": 0.3739, - "step": 17165 - }, - { - "epoch": 1.1218874583360565, - "grad_norm": 0.4332674741744995, - "learning_rate": 7.112212175551691e-06, - "loss": 0.3151, - "step": 17166 - }, - { - "epoch": 1.1219528135415986, - "grad_norm": 0.40701258182525635, - "learning_rate": 7.111895668155999e-06, - "loss": 0.2896, - "step": 17167 - }, - { - "epoch": 1.1220181687471407, - "grad_norm": 0.4592593312263489, - "learning_rate": 7.111579150459857e-06, - "loss": 0.3421, - "step": 17168 - }, - { - "epoch": 1.1220835239526827, - "grad_norm": 0.45556318759918213, - "learning_rate": 7.111262622464815e-06, - "loss": 0.3604, - "step": 17169 - }, - { - "epoch": 1.122148879158225, - "grad_norm": 0.4613608121871948, - "learning_rate": 7.110946084172413e-06, - "loss": 0.3299, - "step": 17170 - }, - { - "epoch": 1.122214234363767, - "grad_norm": 0.48709607124328613, - "learning_rate": 7.1106295355841955e-06, - "loss": 0.3556, - "step": 17171 - }, - { - "epoch": 1.1222795895693092, - "grad_norm": 0.48541393876075745, - "learning_rate": 7.110312976701706e-06, - "loss": 0.3564, - "step": 17172 - }, - { - "epoch": 1.1223449447748512, - "grad_norm": 0.4681394696235657, - "learning_rate": 7.109996407526489e-06, - "loss": 0.3664, - "step": 17173 - }, - { - "epoch": 1.1224102999803933, - "grad_norm": 0.46374887228012085, - "learning_rate": 7.1096798280600885e-06, - "loss": 0.3517, - "step": 17174 - }, - { - "epoch": 1.1224756551859356, - "grad_norm": 0.4465112090110779, - "learning_rate": 7.10936323830405e-06, - "loss": 0.3474, - "step": 17175 - }, - { - "epoch": 1.1225410103914777, - "grad_norm": 0.46198412775993347, - "learning_rate": 7.109046638259913e-06, - "loss": 0.3666, - "step": 17176 - }, - { - "epoch": 1.1226063655970198, - "grad_norm": 0.4331059455871582, - "learning_rate": 7.108730027929228e-06, - "loss": 0.3321, - "step": 17177 - }, - { - "epoch": 1.1226717208025618, - "grad_norm": 0.4709119200706482, - "learning_rate": 7.108413407313535e-06, - "loss": 0.3912, - "step": 17178 - }, - { - "epoch": 1.1227370760081041, - "grad_norm": 0.44842803478240967, - "learning_rate": 7.1080967764143795e-06, - "loss": 0.3671, - "step": 17179 - }, - { - "epoch": 1.1228024312136462, - "grad_norm": 0.435075044631958, - "learning_rate": 7.107780135233306e-06, - "loss": 0.3467, - "step": 17180 - }, - { - "epoch": 1.1228677864191883, - "grad_norm": 0.4641771912574768, - "learning_rate": 7.107463483771858e-06, - "loss": 0.3622, - "step": 17181 - }, - { - "epoch": 1.1229331416247303, - "grad_norm": 0.44011834263801575, - "learning_rate": 7.107146822031581e-06, - "loss": 0.3295, - "step": 17182 - }, - { - "epoch": 1.1229984968302724, - "grad_norm": 0.4464653730392456, - "learning_rate": 7.106830150014019e-06, - "loss": 0.3349, - "step": 17183 - }, - { - "epoch": 1.1230638520358147, - "grad_norm": 0.4518473446369171, - "learning_rate": 7.106513467720717e-06, - "loss": 0.3593, - "step": 17184 - }, - { - "epoch": 1.1231292072413568, - "grad_norm": 0.46642863750457764, - "learning_rate": 7.106196775153217e-06, - "loss": 0.3619, - "step": 17185 - }, - { - "epoch": 1.1231945624468989, - "grad_norm": 0.42322593927383423, - "learning_rate": 7.105880072313067e-06, - "loss": 0.3039, - "step": 17186 - }, - { - "epoch": 1.123259917652441, - "grad_norm": 0.475157231092453, - "learning_rate": 7.1055633592018115e-06, - "loss": 0.3536, - "step": 17187 - }, - { - "epoch": 1.1233252728579832, - "grad_norm": 0.4709039330482483, - "learning_rate": 7.105246635820993e-06, - "loss": 0.3579, - "step": 17188 - }, - { - "epoch": 1.1233906280635253, - "grad_norm": 0.44005364179611206, - "learning_rate": 7.1049299021721575e-06, - "loss": 0.3304, - "step": 17189 - }, - { - "epoch": 1.1234559832690674, - "grad_norm": 0.44961556792259216, - "learning_rate": 7.104613158256848e-06, - "loss": 0.3742, - "step": 17190 - }, - { - "epoch": 1.1235213384746094, - "grad_norm": 0.4559229612350464, - "learning_rate": 7.104296404076614e-06, - "loss": 0.319, - "step": 17191 - }, - { - "epoch": 1.1235866936801515, - "grad_norm": 0.4463317096233368, - "learning_rate": 7.103979639632995e-06, - "loss": 0.3539, - "step": 17192 - }, - { - "epoch": 1.1236520488856938, - "grad_norm": 0.4417072534561157, - "learning_rate": 7.1036628649275394e-06, - "loss": 0.3378, - "step": 17193 - }, - { - "epoch": 1.1237174040912359, - "grad_norm": 0.4529339373111725, - "learning_rate": 7.10334607996179e-06, - "loss": 0.3673, - "step": 17194 - }, - { - "epoch": 1.123782759296778, - "grad_norm": 0.4649065136909485, - "learning_rate": 7.103029284737295e-06, - "loss": 0.3341, - "step": 17195 - }, - { - "epoch": 1.12384811450232, - "grad_norm": 0.44754651188850403, - "learning_rate": 7.102712479255597e-06, - "loss": 0.3473, - "step": 17196 - }, - { - "epoch": 1.1239134697078623, - "grad_norm": 0.48458895087242126, - "learning_rate": 7.102395663518241e-06, - "loss": 0.3475, - "step": 17197 - }, - { - "epoch": 1.1239788249134044, - "grad_norm": 0.437276154756546, - "learning_rate": 7.102078837526774e-06, - "loss": 0.3279, - "step": 17198 - }, - { - "epoch": 1.1240441801189465, - "grad_norm": 0.43904247879981995, - "learning_rate": 7.10176200128274e-06, - "loss": 0.349, - "step": 17199 - }, - { - "epoch": 1.1241095353244885, - "grad_norm": 0.4686586260795593, - "learning_rate": 7.101445154787685e-06, - "loss": 0.3532, - "step": 17200 - }, - { - "epoch": 1.1241748905300306, - "grad_norm": 0.46144312620162964, - "learning_rate": 7.1011282980431525e-06, - "loss": 0.3368, - "step": 17201 - }, - { - "epoch": 1.124240245735573, - "grad_norm": 0.4716927707195282, - "learning_rate": 7.10081143105069e-06, - "loss": 0.3552, - "step": 17202 - }, - { - "epoch": 1.124305600941115, - "grad_norm": 0.4556477963924408, - "learning_rate": 7.100494553811843e-06, - "loss": 0.3556, - "step": 17203 - }, - { - "epoch": 1.124370956146657, - "grad_norm": 0.4434954524040222, - "learning_rate": 7.100177666328156e-06, - "loss": 0.3342, - "step": 17204 - }, - { - "epoch": 1.1244363113521991, - "grad_norm": 0.48426884412765503, - "learning_rate": 7.099860768601174e-06, - "loss": 0.3437, - "step": 17205 - }, - { - "epoch": 1.1245016665577414, - "grad_norm": 0.4554997384548187, - "learning_rate": 7.0995438606324454e-06, - "loss": 0.3594, - "step": 17206 - }, - { - "epoch": 1.1245670217632835, - "grad_norm": 0.4420515298843384, - "learning_rate": 7.099226942423514e-06, - "loss": 0.3036, - "step": 17207 - }, - { - "epoch": 1.1246323769688256, - "grad_norm": 0.4349117875099182, - "learning_rate": 7.098910013975924e-06, - "loss": 0.3294, - "step": 17208 - }, - { - "epoch": 1.1246977321743676, - "grad_norm": 0.41185781359672546, - "learning_rate": 7.098593075291225e-06, - "loss": 0.3016, - "step": 17209 - }, - { - "epoch": 1.1247630873799097, - "grad_norm": 0.44127434492111206, - "learning_rate": 7.0982761263709575e-06, - "loss": 0.3238, - "step": 17210 - }, - { - "epoch": 1.124828442585452, - "grad_norm": 0.4465019106864929, - "learning_rate": 7.097959167216672e-06, - "loss": 0.3065, - "step": 17211 - }, - { - "epoch": 1.124893797790994, - "grad_norm": 0.448458194732666, - "learning_rate": 7.097642197829914e-06, - "loss": 0.333, - "step": 17212 - }, - { - "epoch": 1.1249591529965361, - "grad_norm": 0.40496647357940674, - "learning_rate": 7.097325218212228e-06, - "loss": 0.2952, - "step": 17213 - }, - { - "epoch": 1.1250245082020782, - "grad_norm": 0.4957025349140167, - "learning_rate": 7.097008228365157e-06, - "loss": 0.4294, - "step": 17214 - }, - { - "epoch": 1.1250898634076205, - "grad_norm": 0.41872408986091614, - "learning_rate": 7.096691228290255e-06, - "loss": 0.3298, - "step": 17215 - }, - { - "epoch": 1.1251552186131626, - "grad_norm": 0.41255950927734375, - "learning_rate": 7.0963742179890595e-06, - "loss": 0.2982, - "step": 17216 - }, - { - "epoch": 1.1252205738187047, - "grad_norm": 0.4755578637123108, - "learning_rate": 7.096057197463123e-06, - "loss": 0.3683, - "step": 17217 - }, - { - "epoch": 1.1252859290242467, - "grad_norm": 0.46129918098449707, - "learning_rate": 7.095740166713989e-06, - "loss": 0.3584, - "step": 17218 - }, - { - "epoch": 1.1253512842297888, - "grad_norm": 0.4215393364429474, - "learning_rate": 7.0954231257432034e-06, - "loss": 0.3159, - "step": 17219 - }, - { - "epoch": 1.125416639435331, - "grad_norm": 0.4933900833129883, - "learning_rate": 7.095106074552316e-06, - "loss": 0.3759, - "step": 17220 - }, - { - "epoch": 1.1254819946408732, - "grad_norm": 0.4249468147754669, - "learning_rate": 7.0947890131428674e-06, - "loss": 0.2888, - "step": 17221 - }, - { - "epoch": 1.1255473498464152, - "grad_norm": 0.46237921714782715, - "learning_rate": 7.094471941516409e-06, - "loss": 0.377, - "step": 17222 - }, - { - "epoch": 1.1256127050519573, - "grad_norm": 0.4628616273403168, - "learning_rate": 7.0941548596744835e-06, - "loss": 0.3554, - "step": 17223 - }, - { - "epoch": 1.1256780602574996, - "grad_norm": 0.4421902894973755, - "learning_rate": 7.093837767618641e-06, - "loss": 0.3316, - "step": 17224 - }, - { - "epoch": 1.1257434154630417, - "grad_norm": 0.44672462344169617, - "learning_rate": 7.093520665350425e-06, - "loss": 0.3217, - "step": 17225 - }, - { - "epoch": 1.1258087706685838, - "grad_norm": 0.4612988531589508, - "learning_rate": 7.093203552871384e-06, - "loss": 0.3672, - "step": 17226 - }, - { - "epoch": 1.1258741258741258, - "grad_norm": 0.47889474034309387, - "learning_rate": 7.0928864301830646e-06, - "loss": 0.3712, - "step": 17227 - }, - { - "epoch": 1.125939481079668, - "grad_norm": 0.5105122923851013, - "learning_rate": 7.092569297287012e-06, - "loss": 0.3045, - "step": 17228 - }, - { - "epoch": 1.1260048362852102, - "grad_norm": 0.4600527882575989, - "learning_rate": 7.092252154184776e-06, - "loss": 0.3523, - "step": 17229 - }, - { - "epoch": 1.1260701914907523, - "grad_norm": 0.43037718534469604, - "learning_rate": 7.0919350008778985e-06, - "loss": 0.3056, - "step": 17230 - }, - { - "epoch": 1.1261355466962943, - "grad_norm": 0.45579734444618225, - "learning_rate": 7.091617837367931e-06, - "loss": 0.3468, - "step": 17231 - }, - { - "epoch": 1.1262009019018364, - "grad_norm": 0.455057829618454, - "learning_rate": 7.09130066365642e-06, - "loss": 0.3353, - "step": 17232 - }, - { - "epoch": 1.1262662571073787, - "grad_norm": 0.46316277980804443, - "learning_rate": 7.09098347974491e-06, - "loss": 0.3341, - "step": 17233 - }, - { - "epoch": 1.1263316123129208, - "grad_norm": 0.467692494392395, - "learning_rate": 7.090666285634947e-06, - "loss": 0.3758, - "step": 17234 - }, - { - "epoch": 1.1263969675184629, - "grad_norm": 0.4547955393791199, - "learning_rate": 7.090349081328083e-06, - "loss": 0.3544, - "step": 17235 - }, - { - "epoch": 1.126462322724005, - "grad_norm": 0.4579767882823944, - "learning_rate": 7.0900318668258614e-06, - "loss": 0.3571, - "step": 17236 - }, - { - "epoch": 1.126527677929547, - "grad_norm": 0.46709659695625305, - "learning_rate": 7.08971464212983e-06, - "loss": 0.3283, - "step": 17237 - }, - { - "epoch": 1.1265930331350893, - "grad_norm": 0.4610109031200409, - "learning_rate": 7.089397407241537e-06, - "loss": 0.3211, - "step": 17238 - }, - { - "epoch": 1.1266583883406314, - "grad_norm": 0.4366426169872284, - "learning_rate": 7.089080162162528e-06, - "loss": 0.3692, - "step": 17239 - }, - { - "epoch": 1.1267237435461734, - "grad_norm": 0.4222938120365143, - "learning_rate": 7.088762906894353e-06, - "loss": 0.2909, - "step": 17240 - }, - { - "epoch": 1.1267890987517155, - "grad_norm": 0.436970591545105, - "learning_rate": 7.088445641438556e-06, - "loss": 0.3162, - "step": 17241 - }, - { - "epoch": 1.1268544539572578, - "grad_norm": 0.4476236402988434, - "learning_rate": 7.088128365796687e-06, - "loss": 0.3716, - "step": 17242 - }, - { - "epoch": 1.1269198091627999, - "grad_norm": 0.4152679443359375, - "learning_rate": 7.0878110799702925e-06, - "loss": 0.2782, - "step": 17243 - }, - { - "epoch": 1.126985164368342, - "grad_norm": 0.4600754976272583, - "learning_rate": 7.08749378396092e-06, - "loss": 0.3583, - "step": 17244 - }, - { - "epoch": 1.127050519573884, - "grad_norm": 0.44209912419319153, - "learning_rate": 7.087176477770118e-06, - "loss": 0.3286, - "step": 17245 - }, - { - "epoch": 1.127115874779426, - "grad_norm": 0.46539053320884705, - "learning_rate": 7.0868591613994305e-06, - "loss": 0.3895, - "step": 17246 - }, - { - "epoch": 1.1271812299849684, - "grad_norm": 0.42643725872039795, - "learning_rate": 7.0865418348504115e-06, - "loss": 0.3279, - "step": 17247 - }, - { - "epoch": 1.1272465851905105, - "grad_norm": 0.4708240330219269, - "learning_rate": 7.086224498124603e-06, - "loss": 0.3709, - "step": 17248 - }, - { - "epoch": 1.1273119403960525, - "grad_norm": 0.4962260127067566, - "learning_rate": 7.085907151223555e-06, - "loss": 0.3836, - "step": 17249 - }, - { - "epoch": 1.1273772956015946, - "grad_norm": 0.4395652711391449, - "learning_rate": 7.085589794148815e-06, - "loss": 0.3448, - "step": 17250 - }, - { - "epoch": 1.127442650807137, - "grad_norm": 0.4021154046058655, - "learning_rate": 7.085272426901932e-06, - "loss": 0.2926, - "step": 17251 - }, - { - "epoch": 1.127508006012679, - "grad_norm": 0.4648852050304413, - "learning_rate": 7.0849550494844535e-06, - "loss": 0.3571, - "step": 17252 - }, - { - "epoch": 1.127573361218221, - "grad_norm": 0.4426382780075073, - "learning_rate": 7.084637661897926e-06, - "loss": 0.3423, - "step": 17253 - }, - { - "epoch": 1.1276387164237631, - "grad_norm": 0.4879113733768463, - "learning_rate": 7.084320264143898e-06, - "loss": 0.404, - "step": 17254 - }, - { - "epoch": 1.1277040716293052, - "grad_norm": 0.4888441562652588, - "learning_rate": 7.084002856223918e-06, - "loss": 0.3869, - "step": 17255 - }, - { - "epoch": 1.1277694268348475, - "grad_norm": 0.4416881799697876, - "learning_rate": 7.0836854381395355e-06, - "loss": 0.3111, - "step": 17256 - }, - { - "epoch": 1.1278347820403896, - "grad_norm": 0.4856817126274109, - "learning_rate": 7.083368009892295e-06, - "loss": 0.3985, - "step": 17257 - }, - { - "epoch": 1.1279001372459316, - "grad_norm": 0.459825724363327, - "learning_rate": 7.083050571483749e-06, - "loss": 0.3739, - "step": 17258 - }, - { - "epoch": 1.1279654924514737, - "grad_norm": 0.45025455951690674, - "learning_rate": 7.082733122915443e-06, - "loss": 0.3269, - "step": 17259 - }, - { - "epoch": 1.128030847657016, - "grad_norm": 0.42813920974731445, - "learning_rate": 7.082415664188927e-06, - "loss": 0.3193, - "step": 17260 - }, - { - "epoch": 1.128096202862558, - "grad_norm": 0.4929443597793579, - "learning_rate": 7.082098195305746e-06, - "loss": 0.3561, - "step": 17261 - }, - { - "epoch": 1.1281615580681001, - "grad_norm": 0.4523185193538666, - "learning_rate": 7.081780716267452e-06, - "loss": 0.3523, - "step": 17262 - }, - { - "epoch": 1.1282269132736422, - "grad_norm": 0.4614277184009552, - "learning_rate": 7.081463227075593e-06, - "loss": 0.3751, - "step": 17263 - }, - { - "epoch": 1.1282922684791843, - "grad_norm": 0.42780762910842896, - "learning_rate": 7.0811457277317156e-06, - "loss": 0.3102, - "step": 17264 - }, - { - "epoch": 1.1283576236847266, - "grad_norm": 0.44840002059936523, - "learning_rate": 7.08082821823737e-06, - "loss": 0.3278, - "step": 17265 - }, - { - "epoch": 1.1284229788902687, - "grad_norm": 0.4721134305000305, - "learning_rate": 7.0805106985941045e-06, - "loss": 0.3775, - "step": 17266 - }, - { - "epoch": 1.1284883340958107, - "grad_norm": 0.44914865493774414, - "learning_rate": 7.080193168803468e-06, - "loss": 0.3528, - "step": 17267 - }, - { - "epoch": 1.1285536893013528, - "grad_norm": 0.44426921010017395, - "learning_rate": 7.0798756288670075e-06, - "loss": 0.3063, - "step": 17268 - }, - { - "epoch": 1.128619044506895, - "grad_norm": 0.4684821665287018, - "learning_rate": 7.079558078786274e-06, - "loss": 0.3765, - "step": 17269 - }, - { - "epoch": 1.1286843997124372, - "grad_norm": 0.4223394989967346, - "learning_rate": 7.079240518562814e-06, - "loss": 0.3101, - "step": 17270 - }, - { - "epoch": 1.1287497549179792, - "grad_norm": 0.47070014476776123, - "learning_rate": 7.078922948198179e-06, - "loss": 0.4017, - "step": 17271 - }, - { - "epoch": 1.1288151101235213, - "grad_norm": 0.4264320731163025, - "learning_rate": 7.078605367693917e-06, - "loss": 0.3007, - "step": 17272 - }, - { - "epoch": 1.1288804653290634, - "grad_norm": 0.40820813179016113, - "learning_rate": 7.0782877770515755e-06, - "loss": 0.2684, - "step": 17273 - }, - { - "epoch": 1.1289458205346055, - "grad_norm": 0.42854416370391846, - "learning_rate": 7.077970176272706e-06, - "loss": 0.3291, - "step": 17274 - }, - { - "epoch": 1.1290111757401478, - "grad_norm": 0.4528523087501526, - "learning_rate": 7.077652565358855e-06, - "loss": 0.3345, - "step": 17275 - }, - { - "epoch": 1.1290765309456898, - "grad_norm": 0.5198642015457153, - "learning_rate": 7.077334944311572e-06, - "loss": 0.3885, - "step": 17276 - }, - { - "epoch": 1.129141886151232, - "grad_norm": 0.4706581234931946, - "learning_rate": 7.077017313132407e-06, - "loss": 0.3607, - "step": 17277 - }, - { - "epoch": 1.129207241356774, - "grad_norm": 0.4525388479232788, - "learning_rate": 7.07669967182291e-06, - "loss": 0.3389, - "step": 17278 - }, - { - "epoch": 1.1292725965623163, - "grad_norm": 0.4727247655391693, - "learning_rate": 7.076382020384628e-06, - "loss": 0.3645, - "step": 17279 - }, - { - "epoch": 1.1293379517678583, - "grad_norm": 0.483717679977417, - "learning_rate": 7.076064358819113e-06, - "loss": 0.3592, - "step": 17280 - }, - { - "epoch": 1.1294033069734004, - "grad_norm": 0.49619531631469727, - "learning_rate": 7.075746687127912e-06, - "loss": 0.391, - "step": 17281 - }, - { - "epoch": 1.1294686621789425, - "grad_norm": 0.4986744523048401, - "learning_rate": 7.075429005312575e-06, - "loss": 0.4127, - "step": 17282 - }, - { - "epoch": 1.1295340173844846, - "grad_norm": 0.4431566894054413, - "learning_rate": 7.075111313374653e-06, - "loss": 0.3448, - "step": 17283 - }, - { - "epoch": 1.1295993725900269, - "grad_norm": 0.45887160301208496, - "learning_rate": 7.074793611315692e-06, - "loss": 0.3302, - "step": 17284 - }, - { - "epoch": 1.129664727795569, - "grad_norm": 0.7908359169960022, - "learning_rate": 7.074475899137246e-06, - "loss": 0.3337, - "step": 17285 - }, - { - "epoch": 1.129730083001111, - "grad_norm": 0.46458959579467773, - "learning_rate": 7.074158176840862e-06, - "loss": 0.3844, - "step": 17286 - }, - { - "epoch": 1.129795438206653, - "grad_norm": 0.4635846018791199, - "learning_rate": 7.073840444428089e-06, - "loss": 0.3298, - "step": 17287 - }, - { - "epoch": 1.1298607934121954, - "grad_norm": 0.4312458038330078, - "learning_rate": 7.073522701900478e-06, - "loss": 0.3172, - "step": 17288 - }, - { - "epoch": 1.1299261486177374, - "grad_norm": 0.4425434172153473, - "learning_rate": 7.073204949259579e-06, - "loss": 0.3209, - "step": 17289 - }, - { - "epoch": 1.1299915038232795, - "grad_norm": 0.47515788674354553, - "learning_rate": 7.072887186506941e-06, - "loss": 0.3655, - "step": 17290 - }, - { - "epoch": 1.1300568590288216, - "grad_norm": 0.4508166015148163, - "learning_rate": 7.072569413644113e-06, - "loss": 0.3149, - "step": 17291 - }, - { - "epoch": 1.1301222142343637, - "grad_norm": 0.4490228295326233, - "learning_rate": 7.0722516306726485e-06, - "loss": 0.3145, - "step": 17292 - }, - { - "epoch": 1.130187569439906, - "grad_norm": 0.4573231637477875, - "learning_rate": 7.071933837594093e-06, - "loss": 0.3391, - "step": 17293 - }, - { - "epoch": 1.130252924645448, - "grad_norm": 0.4532839357852936, - "learning_rate": 7.071616034409998e-06, - "loss": 0.3115, - "step": 17294 - }, - { - "epoch": 1.13031827985099, - "grad_norm": 0.4641956090927124, - "learning_rate": 7.071298221121916e-06, - "loss": 0.3409, - "step": 17295 - }, - { - "epoch": 1.1303836350565322, - "grad_norm": 0.47702524065971375, - "learning_rate": 7.070980397731394e-06, - "loss": 0.373, - "step": 17296 - }, - { - "epoch": 1.1304489902620745, - "grad_norm": 0.46357297897338867, - "learning_rate": 7.070662564239983e-06, - "loss": 0.3628, - "step": 17297 - }, - { - "epoch": 1.1305143454676165, - "grad_norm": 0.4431051015853882, - "learning_rate": 7.070344720649234e-06, - "loss": 0.3188, - "step": 17298 - }, - { - "epoch": 1.1305797006731586, - "grad_norm": 0.475641667842865, - "learning_rate": 7.0700268669606945e-06, - "loss": 0.3754, - "step": 17299 - }, - { - "epoch": 1.1306450558787007, - "grad_norm": 0.45539844036102295, - "learning_rate": 7.0697090031759186e-06, - "loss": 0.3549, - "step": 17300 - }, - { - "epoch": 1.1307104110842428, - "grad_norm": 0.454843133687973, - "learning_rate": 7.069391129296455e-06, - "loss": 0.3482, - "step": 17301 - }, - { - "epoch": 1.130775766289785, - "grad_norm": 0.4210955500602722, - "learning_rate": 7.069073245323852e-06, - "loss": 0.3487, - "step": 17302 - }, - { - "epoch": 1.1308411214953271, - "grad_norm": 0.45039451122283936, - "learning_rate": 7.068755351259664e-06, - "loss": 0.3586, - "step": 17303 - }, - { - "epoch": 1.1309064767008692, - "grad_norm": 0.45915690064430237, - "learning_rate": 7.068437447105439e-06, - "loss": 0.3412, - "step": 17304 - }, - { - "epoch": 1.1309718319064113, - "grad_norm": 0.4373253881931305, - "learning_rate": 7.068119532862727e-06, - "loss": 0.3269, - "step": 17305 - }, - { - "epoch": 1.1310371871119536, - "grad_norm": 0.4582827389240265, - "learning_rate": 7.06780160853308e-06, - "loss": 0.3389, - "step": 17306 - }, - { - "epoch": 1.1311025423174956, - "grad_norm": 0.4619799554347992, - "learning_rate": 7.06748367411805e-06, - "loss": 0.3718, - "step": 17307 - }, - { - "epoch": 1.1311678975230377, - "grad_norm": 0.43546101450920105, - "learning_rate": 7.067165729619183e-06, - "loss": 0.3282, - "step": 17308 - }, - { - "epoch": 1.1312332527285798, - "grad_norm": 0.43418750166893005, - "learning_rate": 7.066847775038035e-06, - "loss": 0.3286, - "step": 17309 - }, - { - "epoch": 1.1312986079341218, - "grad_norm": 0.4487304389476776, - "learning_rate": 7.0665298103761525e-06, - "loss": 0.3483, - "step": 17310 - }, - { - "epoch": 1.1313639631396641, - "grad_norm": 0.45989903807640076, - "learning_rate": 7.066211835635089e-06, - "loss": 0.3212, - "step": 17311 - }, - { - "epoch": 1.1314293183452062, - "grad_norm": 0.5298823118209839, - "learning_rate": 7.065893850816396e-06, - "loss": 0.436, - "step": 17312 - }, - { - "epoch": 1.1314946735507483, - "grad_norm": 0.48152822256088257, - "learning_rate": 7.06557585592162e-06, - "loss": 0.3857, - "step": 17313 - }, - { - "epoch": 1.1315600287562904, - "grad_norm": 0.4999312162399292, - "learning_rate": 7.065257850952318e-06, - "loss": 0.3871, - "step": 17314 - }, - { - "epoch": 1.1316253839618327, - "grad_norm": 0.42304015159606934, - "learning_rate": 7.064939835910035e-06, - "loss": 0.3182, - "step": 17315 - }, - { - "epoch": 1.1316907391673747, - "grad_norm": 0.438496857881546, - "learning_rate": 7.064621810796328e-06, - "loss": 0.3471, - "step": 17316 - }, - { - "epoch": 1.1317560943729168, - "grad_norm": 0.4253050684928894, - "learning_rate": 7.064303775612745e-06, - "loss": 0.2895, - "step": 17317 - }, - { - "epoch": 1.1318214495784589, - "grad_norm": 0.46796098351478577, - "learning_rate": 7.063985730360836e-06, - "loss": 0.3597, - "step": 17318 - }, - { - "epoch": 1.131886804784001, - "grad_norm": 0.43548649549484253, - "learning_rate": 7.063667675042153e-06, - "loss": 0.316, - "step": 17319 - }, - { - "epoch": 1.1319521599895432, - "grad_norm": 0.4427529573440552, - "learning_rate": 7.063349609658248e-06, - "loss": 0.3383, - "step": 17320 - }, - { - "epoch": 1.1320175151950853, - "grad_norm": 0.42355749011039734, - "learning_rate": 7.063031534210673e-06, - "loss": 0.3267, - "step": 17321 - }, - { - "epoch": 1.1320828704006274, - "grad_norm": 0.4459403455257416, - "learning_rate": 7.062713448700979e-06, - "loss": 0.3042, - "step": 17322 - }, - { - "epoch": 1.1321482256061695, - "grad_norm": 0.49038663506507874, - "learning_rate": 7.062395353130716e-06, - "loss": 0.366, - "step": 17323 - }, - { - "epoch": 1.1322135808117118, - "grad_norm": 0.4366703927516937, - "learning_rate": 7.062077247501436e-06, - "loss": 0.3511, - "step": 17324 - }, - { - "epoch": 1.1322789360172538, - "grad_norm": 0.43770113587379456, - "learning_rate": 7.0617591318146915e-06, - "loss": 0.3072, - "step": 17325 - }, - { - "epoch": 1.132344291222796, - "grad_norm": 0.4596775770187378, - "learning_rate": 7.061441006072033e-06, - "loss": 0.3467, - "step": 17326 - }, - { - "epoch": 1.132409646428338, - "grad_norm": 0.44386380910873413, - "learning_rate": 7.061122870275013e-06, - "loss": 0.3455, - "step": 17327 - }, - { - "epoch": 1.13247500163388, - "grad_norm": 0.4150366187095642, - "learning_rate": 7.0608047244251834e-06, - "loss": 0.3035, - "step": 17328 - }, - { - "epoch": 1.1325403568394223, - "grad_norm": 0.4691945016384125, - "learning_rate": 7.060486568524094e-06, - "loss": 0.3742, - "step": 17329 - }, - { - "epoch": 1.1326057120449644, - "grad_norm": 0.47214385867118835, - "learning_rate": 7.060168402573298e-06, - "loss": 0.3936, - "step": 17330 - }, - { - "epoch": 1.1326710672505065, - "grad_norm": 0.4524897634983063, - "learning_rate": 7.059850226574347e-06, - "loss": 0.3194, - "step": 17331 - }, - { - "epoch": 1.1327364224560486, - "grad_norm": 0.4278281033039093, - "learning_rate": 7.0595320405287935e-06, - "loss": 0.3272, - "step": 17332 - }, - { - "epoch": 1.1328017776615908, - "grad_norm": 0.44247061014175415, - "learning_rate": 7.059213844438189e-06, - "loss": 0.3389, - "step": 17333 - }, - { - "epoch": 1.132867132867133, - "grad_norm": 0.42941904067993164, - "learning_rate": 7.058895638304084e-06, - "loss": 0.3351, - "step": 17334 - }, - { - "epoch": 1.132932488072675, - "grad_norm": 0.4680541753768921, - "learning_rate": 7.0585774221280325e-06, - "loss": 0.3967, - "step": 17335 - }, - { - "epoch": 1.132997843278217, - "grad_norm": 0.42064833641052246, - "learning_rate": 7.0582591959115855e-06, - "loss": 0.3048, - "step": 17336 - }, - { - "epoch": 1.1330631984837591, - "grad_norm": 0.4507811665534973, - "learning_rate": 7.057940959656295e-06, - "loss": 0.3747, - "step": 17337 - }, - { - "epoch": 1.1331285536893014, - "grad_norm": 0.4998791515827179, - "learning_rate": 7.057622713363714e-06, - "loss": 0.3948, - "step": 17338 - }, - { - "epoch": 1.1331939088948435, - "grad_norm": 0.4143909215927124, - "learning_rate": 7.057304457035395e-06, - "loss": 0.2958, - "step": 17339 - }, - { - "epoch": 1.1332592641003856, - "grad_norm": 0.43302014470100403, - "learning_rate": 7.0569861906728885e-06, - "loss": 0.3005, - "step": 17340 - }, - { - "epoch": 1.1333246193059276, - "grad_norm": 0.4498705565929413, - "learning_rate": 7.056667914277748e-06, - "loss": 0.3151, - "step": 17341 - }, - { - "epoch": 1.13338997451147, - "grad_norm": 0.43818414211273193, - "learning_rate": 7.056349627851524e-06, - "loss": 0.3361, - "step": 17342 - }, - { - "epoch": 1.133455329717012, - "grad_norm": 0.45202022790908813, - "learning_rate": 7.056031331395773e-06, - "loss": 0.3702, - "step": 17343 - }, - { - "epoch": 1.133520684922554, - "grad_norm": 0.4247361421585083, - "learning_rate": 7.0557130249120434e-06, - "loss": 0.3453, - "step": 17344 - }, - { - "epoch": 1.1335860401280962, - "grad_norm": 0.45013856887817383, - "learning_rate": 7.05539470840189e-06, - "loss": 0.3532, - "step": 17345 - }, - { - "epoch": 1.1336513953336382, - "grad_norm": 0.4905974864959717, - "learning_rate": 7.055076381866863e-06, - "loss": 0.3757, - "step": 17346 - }, - { - "epoch": 1.1337167505391805, - "grad_norm": 0.4609545171260834, - "learning_rate": 7.054758045308518e-06, - "loss": 0.3376, - "step": 17347 - }, - { - "epoch": 1.1337821057447226, - "grad_norm": 0.5002155303955078, - "learning_rate": 7.0544396987284055e-06, - "loss": 0.3553, - "step": 17348 - }, - { - "epoch": 1.1338474609502647, - "grad_norm": 0.42377957701683044, - "learning_rate": 7.0541213421280795e-06, - "loss": 0.3133, - "step": 17349 - }, - { - "epoch": 1.1339128161558067, - "grad_norm": 0.4309867024421692, - "learning_rate": 7.053802975509092e-06, - "loss": 0.3233, - "step": 17350 - }, - { - "epoch": 1.133978171361349, - "grad_norm": 0.44097164273262024, - "learning_rate": 7.053484598872995e-06, - "loss": 0.3527, - "step": 17351 - }, - { - "epoch": 1.1340435265668911, - "grad_norm": 0.43234068155288696, - "learning_rate": 7.053166212221343e-06, - "loss": 0.3388, - "step": 17352 - }, - { - "epoch": 1.1341088817724332, - "grad_norm": 0.502611517906189, - "learning_rate": 7.0528478155556875e-06, - "loss": 0.3413, - "step": 17353 - }, - { - "epoch": 1.1341742369779753, - "grad_norm": 0.4061679542064667, - "learning_rate": 7.0525294088775835e-06, - "loss": 0.28, - "step": 17354 - }, - { - "epoch": 1.1342395921835173, - "grad_norm": 0.4602612257003784, - "learning_rate": 7.052210992188582e-06, - "loss": 0.3532, - "step": 17355 - }, - { - "epoch": 1.1343049473890596, - "grad_norm": 0.4474022090435028, - "learning_rate": 7.051892565490235e-06, - "loss": 0.3531, - "step": 17356 - }, - { - "epoch": 1.1343703025946017, - "grad_norm": 0.4401814043521881, - "learning_rate": 7.051574128784099e-06, - "loss": 0.3305, - "step": 17357 - }, - { - "epoch": 1.1344356578001438, - "grad_norm": 0.4765278995037079, - "learning_rate": 7.051255682071725e-06, - "loss": 0.3655, - "step": 17358 - }, - { - "epoch": 1.1345010130056858, - "grad_norm": 0.4542257487773895, - "learning_rate": 7.050937225354666e-06, - "loss": 0.3807, - "step": 17359 - }, - { - "epoch": 1.1345663682112281, - "grad_norm": 0.48853549361228943, - "learning_rate": 7.050618758634475e-06, - "loss": 0.3486, - "step": 17360 - }, - { - "epoch": 1.1346317234167702, - "grad_norm": 0.4309377372264862, - "learning_rate": 7.0503002819127065e-06, - "loss": 0.339, - "step": 17361 - }, - { - "epoch": 1.1346970786223123, - "grad_norm": 0.43339797854423523, - "learning_rate": 7.049981795190912e-06, - "loss": 0.328, - "step": 17362 - }, - { - "epoch": 1.1347624338278544, - "grad_norm": 0.4359287619590759, - "learning_rate": 7.049663298470648e-06, - "loss": 0.3394, - "step": 17363 - }, - { - "epoch": 1.1348277890333964, - "grad_norm": 0.4393298625946045, - "learning_rate": 7.049344791753465e-06, - "loss": 0.344, - "step": 17364 - }, - { - "epoch": 1.1348931442389387, - "grad_norm": 0.44869673252105713, - "learning_rate": 7.049026275040918e-06, - "loss": 0.3265, - "step": 17365 - }, - { - "epoch": 1.1349584994444808, - "grad_norm": 0.44936755299568176, - "learning_rate": 7.048707748334559e-06, - "loss": 0.3354, - "step": 17366 - }, - { - "epoch": 1.1350238546500229, - "grad_norm": 0.44537898898124695, - "learning_rate": 7.048389211635943e-06, - "loss": 0.3392, - "step": 17367 - }, - { - "epoch": 1.135089209855565, - "grad_norm": 0.4435463547706604, - "learning_rate": 7.048070664946624e-06, - "loss": 0.3456, - "step": 17368 - }, - { - "epoch": 1.1351545650611072, - "grad_norm": 0.47491025924682617, - "learning_rate": 7.0477521082681545e-06, - "loss": 0.3888, - "step": 17369 - }, - { - "epoch": 1.1352199202666493, - "grad_norm": 0.45296916365623474, - "learning_rate": 7.047433541602089e-06, - "loss": 0.3213, - "step": 17370 - }, - { - "epoch": 1.1352852754721914, - "grad_norm": 0.47255194187164307, - "learning_rate": 7.047114964949979e-06, - "loss": 0.3834, - "step": 17371 - }, - { - "epoch": 1.1353506306777335, - "grad_norm": 0.43570056557655334, - "learning_rate": 7.046796378313382e-06, - "loss": 0.3601, - "step": 17372 - }, - { - "epoch": 1.1354159858832755, - "grad_norm": 0.45375707745552063, - "learning_rate": 7.046477781693848e-06, - "loss": 0.3781, - "step": 17373 - }, - { - "epoch": 1.1354813410888178, - "grad_norm": 0.4503547251224518, - "learning_rate": 7.046159175092935e-06, - "loss": 0.3285, - "step": 17374 - }, - { - "epoch": 1.13554669629436, - "grad_norm": 0.44876909255981445, - "learning_rate": 7.045840558512194e-06, - "loss": 0.312, - "step": 17375 - }, - { - "epoch": 1.135612051499902, - "grad_norm": 0.4256334602832794, - "learning_rate": 7.0455219319531795e-06, - "loss": 0.3399, - "step": 17376 - }, - { - "epoch": 1.135677406705444, - "grad_norm": 0.4213603138923645, - "learning_rate": 7.045203295417446e-06, - "loss": 0.3154, - "step": 17377 - }, - { - "epoch": 1.1357427619109863, - "grad_norm": 0.45095837116241455, - "learning_rate": 7.044884648906548e-06, - "loss": 0.3509, - "step": 17378 - }, - { - "epoch": 1.1358081171165284, - "grad_norm": 0.5239996910095215, - "learning_rate": 7.044565992422041e-06, - "loss": 0.4397, - "step": 17379 - }, - { - "epoch": 1.1358734723220705, - "grad_norm": 0.47133052349090576, - "learning_rate": 7.044247325965474e-06, - "loss": 0.3864, - "step": 17380 - }, - { - "epoch": 1.1359388275276125, - "grad_norm": 0.4539269804954529, - "learning_rate": 7.043928649538406e-06, - "loss": 0.3475, - "step": 17381 - }, - { - "epoch": 1.1360041827331546, - "grad_norm": 0.48296546936035156, - "learning_rate": 7.04360996314239e-06, - "loss": 0.3258, - "step": 17382 - }, - { - "epoch": 1.1360695379386967, - "grad_norm": 0.4233575761318207, - "learning_rate": 7.04329126677898e-06, - "loss": 0.3677, - "step": 17383 - }, - { - "epoch": 1.136134893144239, - "grad_norm": 0.42596665024757385, - "learning_rate": 7.04297256044973e-06, - "loss": 0.3294, - "step": 17384 - }, - { - "epoch": 1.136200248349781, - "grad_norm": 0.4421609342098236, - "learning_rate": 7.0426538441561965e-06, - "loss": 0.3398, - "step": 17385 - }, - { - "epoch": 1.1362656035553231, - "grad_norm": 0.45286962389945984, - "learning_rate": 7.042335117899932e-06, - "loss": 0.353, - "step": 17386 - }, - { - "epoch": 1.1363309587608652, - "grad_norm": 0.42848026752471924, - "learning_rate": 7.0420163816824906e-06, - "loss": 0.3356, - "step": 17387 - }, - { - "epoch": 1.1363963139664075, - "grad_norm": 0.4370197057723999, - "learning_rate": 7.041697635505429e-06, - "loss": 0.3385, - "step": 17388 - }, - { - "epoch": 1.1364616691719496, - "grad_norm": 0.46252763271331787, - "learning_rate": 7.0413788793703e-06, - "loss": 0.37, - "step": 17389 - }, - { - "epoch": 1.1365270243774916, - "grad_norm": 0.5076103806495667, - "learning_rate": 7.041060113278659e-06, - "loss": 0.4124, - "step": 17390 - }, - { - "epoch": 1.1365923795830337, - "grad_norm": 0.45653036236763, - "learning_rate": 7.040741337232061e-06, - "loss": 0.3391, - "step": 17391 - }, - { - "epoch": 1.1366577347885758, - "grad_norm": 0.4983507990837097, - "learning_rate": 7.040422551232061e-06, - "loss": 0.3108, - "step": 17392 - }, - { - "epoch": 1.136723089994118, - "grad_norm": 0.49780896306037903, - "learning_rate": 7.040103755280213e-06, - "loss": 0.401, - "step": 17393 - }, - { - "epoch": 1.1367884451996602, - "grad_norm": 0.4493112564086914, - "learning_rate": 7.039784949378073e-06, - "loss": 0.3412, - "step": 17394 - }, - { - "epoch": 1.1368538004052022, - "grad_norm": 0.43972402811050415, - "learning_rate": 7.039466133527194e-06, - "loss": 0.3299, - "step": 17395 - }, - { - "epoch": 1.1369191556107443, - "grad_norm": 0.4225434362888336, - "learning_rate": 7.039147307729133e-06, - "loss": 0.2964, - "step": 17396 - }, - { - "epoch": 1.1369845108162866, - "grad_norm": 0.45975571870803833, - "learning_rate": 7.038828471985444e-06, - "loss": 0.3665, - "step": 17397 - }, - { - "epoch": 1.1370498660218287, - "grad_norm": 0.4576628506183624, - "learning_rate": 7.03850962629768e-06, - "loss": 0.3618, - "step": 17398 - }, - { - "epoch": 1.1371152212273707, - "grad_norm": 0.4470856785774231, - "learning_rate": 7.038190770667401e-06, - "loss": 0.3628, - "step": 17399 - }, - { - "epoch": 1.1371805764329128, - "grad_norm": 0.4435643255710602, - "learning_rate": 7.037871905096159e-06, - "loss": 0.2982, - "step": 17400 - }, - { - "epoch": 1.1372459316384549, - "grad_norm": 0.46543169021606445, - "learning_rate": 7.03755302958551e-06, - "loss": 0.3415, - "step": 17401 - }, - { - "epoch": 1.1373112868439972, - "grad_norm": 0.45349591970443726, - "learning_rate": 7.037234144137009e-06, - "loss": 0.3329, - "step": 17402 - }, - { - "epoch": 1.1373766420495393, - "grad_norm": 0.4510424733161926, - "learning_rate": 7.03691524875221e-06, - "loss": 0.3286, - "step": 17403 - }, - { - "epoch": 1.1374419972550813, - "grad_norm": 0.42036014795303345, - "learning_rate": 7.03659634343267e-06, - "loss": 0.2916, - "step": 17404 - }, - { - "epoch": 1.1375073524606234, - "grad_norm": 0.48616576194763184, - "learning_rate": 7.036277428179945e-06, - "loss": 0.3563, - "step": 17405 - }, - { - "epoch": 1.1375727076661657, - "grad_norm": 0.45995670557022095, - "learning_rate": 7.035958502995589e-06, - "loss": 0.3807, - "step": 17406 - }, - { - "epoch": 1.1376380628717078, - "grad_norm": 0.4322569966316223, - "learning_rate": 7.035639567881158e-06, - "loss": 0.3034, - "step": 17407 - }, - { - "epoch": 1.1377034180772498, - "grad_norm": 0.4015660881996155, - "learning_rate": 7.035320622838208e-06, - "loss": 0.294, - "step": 17408 - }, - { - "epoch": 1.137768773282792, - "grad_norm": 0.44619235396385193, - "learning_rate": 7.035001667868293e-06, - "loss": 0.3385, - "step": 17409 - }, - { - "epoch": 1.137834128488334, - "grad_norm": 0.4452032446861267, - "learning_rate": 7.034682702972971e-06, - "loss": 0.3633, - "step": 17410 - }, - { - "epoch": 1.1378994836938763, - "grad_norm": 0.4686889052391052, - "learning_rate": 7.034363728153797e-06, - "loss": 0.3536, - "step": 17411 - }, - { - "epoch": 1.1379648388994184, - "grad_norm": 0.4439161419868469, - "learning_rate": 7.0340447434123264e-06, - "loss": 0.3422, - "step": 17412 - }, - { - "epoch": 1.1380301941049604, - "grad_norm": 0.4715063273906708, - "learning_rate": 7.0337257487501145e-06, - "loss": 0.3479, - "step": 17413 - }, - { - "epoch": 1.1380955493105025, - "grad_norm": 0.4390163719654083, - "learning_rate": 7.033406744168716e-06, - "loss": 0.323, - "step": 17414 - }, - { - "epoch": 1.1381609045160448, - "grad_norm": 0.4483895003795624, - "learning_rate": 7.03308772966969e-06, - "loss": 0.356, - "step": 17415 - }, - { - "epoch": 1.1382262597215869, - "grad_norm": 0.4739121198654175, - "learning_rate": 7.03276870525459e-06, - "loss": 0.3489, - "step": 17416 - }, - { - "epoch": 1.138291614927129, - "grad_norm": 0.43798214197158813, - "learning_rate": 7.032449670924974e-06, - "loss": 0.3043, - "step": 17417 - }, - { - "epoch": 1.138356970132671, - "grad_norm": 0.4182601273059845, - "learning_rate": 7.032130626682395e-06, - "loss": 0.3046, - "step": 17418 - }, - { - "epoch": 1.138422325338213, - "grad_norm": 0.4340640604496002, - "learning_rate": 7.031811572528413e-06, - "loss": 0.3359, - "step": 17419 - }, - { - "epoch": 1.1384876805437554, - "grad_norm": 0.4664618968963623, - "learning_rate": 7.031492508464581e-06, - "loss": 0.3772, - "step": 17420 - }, - { - "epoch": 1.1385530357492974, - "grad_norm": 0.4334782660007477, - "learning_rate": 7.031173434492458e-06, - "loss": 0.2973, - "step": 17421 - }, - { - "epoch": 1.1386183909548395, - "grad_norm": 0.4512031376361847, - "learning_rate": 7.030854350613596e-06, - "loss": 0.3546, - "step": 17422 - }, - { - "epoch": 1.1386837461603816, - "grad_norm": 0.4125944674015045, - "learning_rate": 7.030535256829555e-06, - "loss": 0.3129, - "step": 17423 - }, - { - "epoch": 1.1387491013659239, - "grad_norm": 0.45096346735954285, - "learning_rate": 7.03021615314189e-06, - "loss": 0.3389, - "step": 17424 - }, - { - "epoch": 1.138814456571466, - "grad_norm": 0.41682204604148865, - "learning_rate": 7.029897039552157e-06, - "loss": 0.3124, - "step": 17425 - }, - { - "epoch": 1.138879811777008, - "grad_norm": 0.42640236020088196, - "learning_rate": 7.029577916061913e-06, - "loss": 0.3147, - "step": 17426 - }, - { - "epoch": 1.13894516698255, - "grad_norm": 0.4052651524543762, - "learning_rate": 7.029258782672714e-06, - "loss": 0.3047, - "step": 17427 - }, - { - "epoch": 1.1390105221880922, - "grad_norm": 0.4783850312232971, - "learning_rate": 7.028939639386118e-06, - "loss": 0.3281, - "step": 17428 - }, - { - "epoch": 1.1390758773936345, - "grad_norm": 0.4420865774154663, - "learning_rate": 7.02862048620368e-06, - "loss": 0.3389, - "step": 17429 - }, - { - "epoch": 1.1391412325991765, - "grad_norm": 0.4368607997894287, - "learning_rate": 7.028301323126958e-06, - "loss": 0.3255, - "step": 17430 - }, - { - "epoch": 1.1392065878047186, - "grad_norm": 0.4040074646472931, - "learning_rate": 7.027982150157506e-06, - "loss": 0.3003, - "step": 17431 - }, - { - "epoch": 1.1392719430102607, - "grad_norm": 0.477368026971817, - "learning_rate": 7.027662967296885e-06, - "loss": 0.4153, - "step": 17432 - }, - { - "epoch": 1.139337298215803, - "grad_norm": 0.4319467544555664, - "learning_rate": 7.0273437745466465e-06, - "loss": 0.3039, - "step": 17433 - }, - { - "epoch": 1.139402653421345, - "grad_norm": 0.45744410157203674, - "learning_rate": 7.027024571908351e-06, - "loss": 0.3536, - "step": 17434 - }, - { - "epoch": 1.1394680086268871, - "grad_norm": 0.49372386932373047, - "learning_rate": 7.026705359383554e-06, - "loss": 0.3724, - "step": 17435 - }, - { - "epoch": 1.1395333638324292, - "grad_norm": 0.45175907015800476, - "learning_rate": 7.026386136973814e-06, - "loss": 0.3877, - "step": 17436 - }, - { - "epoch": 1.1395987190379713, - "grad_norm": 0.4616450071334839, - "learning_rate": 7.0260669046806864e-06, - "loss": 0.4162, - "step": 17437 - }, - { - "epoch": 1.1396640742435136, - "grad_norm": 0.4368988573551178, - "learning_rate": 7.0257476625057285e-06, - "loss": 0.3153, - "step": 17438 - }, - { - "epoch": 1.1397294294490556, - "grad_norm": 0.4661909341812134, - "learning_rate": 7.025428410450498e-06, - "loss": 0.3211, - "step": 17439 - }, - { - "epoch": 1.1397947846545977, - "grad_norm": 0.45357945561408997, - "learning_rate": 7.025109148516552e-06, - "loss": 0.345, - "step": 17440 - }, - { - "epoch": 1.1398601398601398, - "grad_norm": 0.44125261902809143, - "learning_rate": 7.024789876705445e-06, - "loss": 0.3397, - "step": 17441 - }, - { - "epoch": 1.139925495065682, - "grad_norm": 0.4618300795555115, - "learning_rate": 7.024470595018738e-06, - "loss": 0.3772, - "step": 17442 - }, - { - "epoch": 1.1399908502712242, - "grad_norm": 0.478115051984787, - "learning_rate": 7.024151303457985e-06, - "loss": 0.3532, - "step": 17443 - }, - { - "epoch": 1.1400562054767662, - "grad_norm": 0.5167980194091797, - "learning_rate": 7.023832002024746e-06, - "loss": 0.4005, - "step": 17444 - }, - { - "epoch": 1.1401215606823083, - "grad_norm": 0.45658424496650696, - "learning_rate": 7.0235126907205775e-06, - "loss": 0.36, - "step": 17445 - }, - { - "epoch": 1.1401869158878504, - "grad_norm": 0.44158434867858887, - "learning_rate": 7.023193369547037e-06, - "loss": 0.3505, - "step": 17446 - }, - { - "epoch": 1.1402522710933927, - "grad_norm": 0.45263805985450745, - "learning_rate": 7.022874038505679e-06, - "loss": 0.3631, - "step": 17447 - }, - { - "epoch": 1.1403176262989347, - "grad_norm": 0.4402000606060028, - "learning_rate": 7.022554697598065e-06, - "loss": 0.3399, - "step": 17448 - }, - { - "epoch": 1.1403829815044768, - "grad_norm": 0.4918239712715149, - "learning_rate": 7.022235346825751e-06, - "loss": 0.3941, - "step": 17449 - }, - { - "epoch": 1.1404483367100189, - "grad_norm": 0.42927759885787964, - "learning_rate": 7.021915986190295e-06, - "loss": 0.2929, - "step": 17450 - }, - { - "epoch": 1.1405136919155612, - "grad_norm": 0.5000864863395691, - "learning_rate": 7.021596615693256e-06, - "loss": 0.3882, - "step": 17451 - }, - { - "epoch": 1.1405790471211033, - "grad_norm": 0.41612523794174194, - "learning_rate": 7.021277235336187e-06, - "loss": 0.3019, - "step": 17452 - }, - { - "epoch": 1.1406444023266453, - "grad_norm": 0.46470025181770325, - "learning_rate": 7.020957845120649e-06, - "loss": 0.343, - "step": 17453 - }, - { - "epoch": 1.1407097575321874, - "grad_norm": 0.43162819743156433, - "learning_rate": 7.0206384450482e-06, - "loss": 0.3456, - "step": 17454 - }, - { - "epoch": 1.1407751127377295, - "grad_norm": 0.45312026143074036, - "learning_rate": 7.020319035120399e-06, - "loss": 0.3533, - "step": 17455 - }, - { - "epoch": 1.1408404679432718, - "grad_norm": 0.4765598177909851, - "learning_rate": 7.019999615338799e-06, - "loss": 0.4242, - "step": 17456 - }, - { - "epoch": 1.1409058231488138, - "grad_norm": 0.41143378615379333, - "learning_rate": 7.019680185704964e-06, - "loss": 0.2977, - "step": 17457 - }, - { - "epoch": 1.140971178354356, - "grad_norm": 0.44259515404701233, - "learning_rate": 7.019360746220447e-06, - "loss": 0.3014, - "step": 17458 - }, - { - "epoch": 1.141036533559898, - "grad_norm": 0.4750520586967468, - "learning_rate": 7.0190412968868095e-06, - "loss": 0.3738, - "step": 17459 - }, - { - "epoch": 1.1411018887654403, - "grad_norm": 0.5006691217422485, - "learning_rate": 7.018721837705608e-06, - "loss": 0.4195, - "step": 17460 - }, - { - "epoch": 1.1411672439709823, - "grad_norm": 0.42368045449256897, - "learning_rate": 7.018402368678399e-06, - "loss": 0.3209, - "step": 17461 - }, - { - "epoch": 1.1412325991765244, - "grad_norm": 0.47609299421310425, - "learning_rate": 7.0180828898067445e-06, - "loss": 0.3686, - "step": 17462 - }, - { - "epoch": 1.1412979543820665, - "grad_norm": 0.5127665996551514, - "learning_rate": 7.0177634010922e-06, - "loss": 0.3663, - "step": 17463 - }, - { - "epoch": 1.1413633095876086, - "grad_norm": 0.4121999740600586, - "learning_rate": 7.017443902536325e-06, - "loss": 0.2799, - "step": 17464 - }, - { - "epoch": 1.1414286647931509, - "grad_norm": 0.4780321717262268, - "learning_rate": 7.0171243941406755e-06, - "loss": 0.3486, - "step": 17465 - }, - { - "epoch": 1.141494019998693, - "grad_norm": 0.482979953289032, - "learning_rate": 7.016804875906813e-06, - "loss": 0.3814, - "step": 17466 - }, - { - "epoch": 1.141559375204235, - "grad_norm": 0.43570512533187866, - "learning_rate": 7.0164853478362925e-06, - "loss": 0.3134, - "step": 17467 - }, - { - "epoch": 1.141624730409777, - "grad_norm": 0.4267442524433136, - "learning_rate": 7.016165809930676e-06, - "loss": 0.3265, - "step": 17468 - }, - { - "epoch": 1.1416900856153194, - "grad_norm": 0.45084404945373535, - "learning_rate": 7.01584626219152e-06, - "loss": 0.3261, - "step": 17469 - }, - { - "epoch": 1.1417554408208614, - "grad_norm": 0.4325253367424011, - "learning_rate": 7.015526704620383e-06, - "loss": 0.3033, - "step": 17470 - }, - { - "epoch": 1.1418207960264035, - "grad_norm": 0.4355321228504181, - "learning_rate": 7.015207137218826e-06, - "loss": 0.2788, - "step": 17471 - }, - { - "epoch": 1.1418861512319456, - "grad_norm": 0.4479791224002838, - "learning_rate": 7.014887559988403e-06, - "loss": 0.3348, - "step": 17472 - }, - { - "epoch": 1.1419515064374877, - "grad_norm": 0.4449215531349182, - "learning_rate": 7.0145679729306775e-06, - "loss": 0.3448, - "step": 17473 - }, - { - "epoch": 1.14201686164303, - "grad_norm": 0.4846709072589874, - "learning_rate": 7.014248376047205e-06, - "loss": 0.3845, - "step": 17474 - }, - { - "epoch": 1.142082216848572, - "grad_norm": 0.4538358449935913, - "learning_rate": 7.013928769339545e-06, - "loss": 0.3541, - "step": 17475 - }, - { - "epoch": 1.142147572054114, - "grad_norm": 0.4530636668205261, - "learning_rate": 7.013609152809256e-06, - "loss": 0.3285, - "step": 17476 - }, - { - "epoch": 1.1422129272596562, - "grad_norm": 0.4355792701244354, - "learning_rate": 7.013289526457901e-06, - "loss": 0.3523, - "step": 17477 - }, - { - "epoch": 1.1422782824651985, - "grad_norm": 0.47689175605773926, - "learning_rate": 7.012969890287033e-06, - "loss": 0.3395, - "step": 17478 - }, - { - "epoch": 1.1423436376707405, - "grad_norm": 0.44496792554855347, - "learning_rate": 7.0126502442982125e-06, - "loss": 0.3261, - "step": 17479 - }, - { - "epoch": 1.1424089928762826, - "grad_norm": 0.44975581765174866, - "learning_rate": 7.012330588493001e-06, - "loss": 0.3583, - "step": 17480 - }, - { - "epoch": 1.1424743480818247, - "grad_norm": 0.42740964889526367, - "learning_rate": 7.012010922872956e-06, - "loss": 0.3192, - "step": 17481 - }, - { - "epoch": 1.1425397032873668, - "grad_norm": 0.4553546607494354, - "learning_rate": 7.011691247439636e-06, - "loss": 0.3381, - "step": 17482 - }, - { - "epoch": 1.142605058492909, - "grad_norm": 0.4539209306240082, - "learning_rate": 7.011371562194601e-06, - "loss": 0.3636, - "step": 17483 - }, - { - "epoch": 1.1426704136984511, - "grad_norm": 0.47124984860420227, - "learning_rate": 7.011051867139412e-06, - "loss": 0.3436, - "step": 17484 - }, - { - "epoch": 1.1427357689039932, - "grad_norm": 0.4807920753955841, - "learning_rate": 7.010732162275624e-06, - "loss": 0.3838, - "step": 17485 - }, - { - "epoch": 1.1428011241095353, - "grad_norm": 0.5294554829597473, - "learning_rate": 7.0104124476048e-06, - "loss": 0.3826, - "step": 17486 - }, - { - "epoch": 1.1428664793150776, - "grad_norm": 0.4663105010986328, - "learning_rate": 7.010092723128497e-06, - "loss": 0.3824, - "step": 17487 - }, - { - "epoch": 1.1429318345206196, - "grad_norm": 0.4112255573272705, - "learning_rate": 7.0097729888482764e-06, - "loss": 0.2874, - "step": 17488 - }, - { - "epoch": 1.1429971897261617, - "grad_norm": 0.47517916560173035, - "learning_rate": 7.009453244765697e-06, - "loss": 0.3853, - "step": 17489 - }, - { - "epoch": 1.1430625449317038, - "grad_norm": 0.4611404240131378, - "learning_rate": 7.009133490882316e-06, - "loss": 0.3286, - "step": 17490 - }, - { - "epoch": 1.1431279001372459, - "grad_norm": 0.4590357840061188, - "learning_rate": 7.008813727199697e-06, - "loss": 0.3412, - "step": 17491 - }, - { - "epoch": 1.143193255342788, - "grad_norm": 0.45553115010261536, - "learning_rate": 7.008493953719396e-06, - "loss": 0.3621, - "step": 17492 - }, - { - "epoch": 1.1432586105483302, - "grad_norm": 0.43632274866104126, - "learning_rate": 7.008174170442975e-06, - "loss": 0.3244, - "step": 17493 - }, - { - "epoch": 1.1433239657538723, - "grad_norm": 0.42470139265060425, - "learning_rate": 7.007854377371992e-06, - "loss": 0.3033, - "step": 17494 - }, - { - "epoch": 1.1433893209594144, - "grad_norm": 0.4470202922821045, - "learning_rate": 7.007534574508009e-06, - "loss": 0.3364, - "step": 17495 - }, - { - "epoch": 1.1434546761649567, - "grad_norm": 0.45505478978157043, - "learning_rate": 7.007214761852583e-06, - "loss": 0.3493, - "step": 17496 - }, - { - "epoch": 1.1435200313704987, - "grad_norm": 0.4418489933013916, - "learning_rate": 7.0068949394072765e-06, - "loss": 0.3341, - "step": 17497 - }, - { - "epoch": 1.1435853865760408, - "grad_norm": 0.4500032067298889, - "learning_rate": 7.006575107173647e-06, - "loss": 0.3466, - "step": 17498 - }, - { - "epoch": 1.1436507417815829, - "grad_norm": 0.48150408267974854, - "learning_rate": 7.006255265153257e-06, - "loss": 0.3992, - "step": 17499 - }, - { - "epoch": 1.143716096987125, - "grad_norm": 0.45815691351890564, - "learning_rate": 7.005935413347664e-06, - "loss": 0.3628, - "step": 17500 - }, - { - "epoch": 1.143781452192667, - "grad_norm": 0.4734034836292267, - "learning_rate": 7.005615551758429e-06, - "loss": 0.3366, - "step": 17501 - }, - { - "epoch": 1.1438468073982093, - "grad_norm": 0.44199973344802856, - "learning_rate": 7.005295680387113e-06, - "loss": 0.3496, - "step": 17502 - }, - { - "epoch": 1.1439121626037514, - "grad_norm": 0.4660091698169708, - "learning_rate": 7.004975799235274e-06, - "loss": 0.3755, - "step": 17503 - }, - { - "epoch": 1.1439775178092935, - "grad_norm": 0.45670491456985474, - "learning_rate": 7.004655908304474e-06, - "loss": 0.3547, - "step": 17504 - }, - { - "epoch": 1.1440428730148355, - "grad_norm": 0.45676979422569275, - "learning_rate": 7.004336007596273e-06, - "loss": 0.3471, - "step": 17505 - }, - { - "epoch": 1.1441082282203778, - "grad_norm": 0.42680084705352783, - "learning_rate": 7.0040160971122315e-06, - "loss": 0.3421, - "step": 17506 - }, - { - "epoch": 1.14417358342592, - "grad_norm": 0.4537406861782074, - "learning_rate": 7.003696176853908e-06, - "loss": 0.3673, - "step": 17507 - }, - { - "epoch": 1.144238938631462, - "grad_norm": 0.4100183844566345, - "learning_rate": 7.003376246822865e-06, - "loss": 0.3022, - "step": 17508 - }, - { - "epoch": 1.144304293837004, - "grad_norm": 0.46144744753837585, - "learning_rate": 7.003056307020662e-06, - "loss": 0.3927, - "step": 17509 - }, - { - "epoch": 1.1443696490425461, - "grad_norm": 0.4399982690811157, - "learning_rate": 7.002736357448858e-06, - "loss": 0.3438, - "step": 17510 - }, - { - "epoch": 1.1444350042480884, - "grad_norm": 0.4397696852684021, - "learning_rate": 7.002416398109016e-06, - "loss": 0.3508, - "step": 17511 - }, - { - "epoch": 1.1445003594536305, - "grad_norm": 0.4235154986381531, - "learning_rate": 7.002096429002696e-06, - "loss": 0.3124, - "step": 17512 - }, - { - "epoch": 1.1445657146591726, - "grad_norm": 0.4556167721748352, - "learning_rate": 7.001776450131458e-06, - "loss": 0.3625, - "step": 17513 - }, - { - "epoch": 1.1446310698647146, - "grad_norm": 0.4215102195739746, - "learning_rate": 7.001456461496862e-06, - "loss": 0.3155, - "step": 17514 - }, - { - "epoch": 1.144696425070257, - "grad_norm": 0.44292616844177246, - "learning_rate": 7.0011364631004695e-06, - "loss": 0.3468, - "step": 17515 - }, - { - "epoch": 1.144761780275799, - "grad_norm": 0.49077779054641724, - "learning_rate": 7.000816454943842e-06, - "loss": 0.3644, - "step": 17516 - }, - { - "epoch": 1.144827135481341, - "grad_norm": 0.42309874296188354, - "learning_rate": 7.000496437028539e-06, - "loss": 0.3288, - "step": 17517 - }, - { - "epoch": 1.1448924906868831, - "grad_norm": 0.419172078371048, - "learning_rate": 7.000176409356122e-06, - "loss": 0.2896, - "step": 17518 - }, - { - "epoch": 1.1449578458924252, - "grad_norm": 0.5341691970825195, - "learning_rate": 6.999856371928151e-06, - "loss": 0.4543, - "step": 17519 - }, - { - "epoch": 1.1450232010979675, - "grad_norm": 0.4835638403892517, - "learning_rate": 6.9995363247461874e-06, - "loss": 0.3563, - "step": 17520 - }, - { - "epoch": 1.1450885563035096, - "grad_norm": 0.43735870718955994, - "learning_rate": 6.9992162678117935e-06, - "loss": 0.3101, - "step": 17521 - }, - { - "epoch": 1.1451539115090517, - "grad_norm": 0.477425754070282, - "learning_rate": 6.998896201126529e-06, - "loss": 0.3656, - "step": 17522 - }, - { - "epoch": 1.1452192667145937, - "grad_norm": 0.4202739894390106, - "learning_rate": 6.998576124691956e-06, - "loss": 0.292, - "step": 17523 - }, - { - "epoch": 1.145284621920136, - "grad_norm": 0.47121620178222656, - "learning_rate": 6.998256038509635e-06, - "loss": 0.3313, - "step": 17524 - }, - { - "epoch": 1.145349977125678, - "grad_norm": 0.44480106234550476, - "learning_rate": 6.997935942581125e-06, - "loss": 0.3318, - "step": 17525 - }, - { - "epoch": 1.1454153323312202, - "grad_norm": 0.4418734312057495, - "learning_rate": 6.997615836907991e-06, - "loss": 0.3407, - "step": 17526 - }, - { - "epoch": 1.1454806875367622, - "grad_norm": 0.47333475947380066, - "learning_rate": 6.997295721491792e-06, - "loss": 0.3563, - "step": 17527 - }, - { - "epoch": 1.1455460427423043, - "grad_norm": 0.4485928416252136, - "learning_rate": 6.99697559633409e-06, - "loss": 0.3343, - "step": 17528 - }, - { - "epoch": 1.1456113979478466, - "grad_norm": 0.4543781876564026, - "learning_rate": 6.996655461436447e-06, - "loss": 0.3431, - "step": 17529 - }, - { - "epoch": 1.1456767531533887, - "grad_norm": 0.42821744084358215, - "learning_rate": 6.9963353168004225e-06, - "loss": 0.3175, - "step": 17530 - }, - { - "epoch": 1.1457421083589308, - "grad_norm": 0.43268415331840515, - "learning_rate": 6.996015162427578e-06, - "loss": 0.325, - "step": 17531 - }, - { - "epoch": 1.1458074635644728, - "grad_norm": 0.4574171304702759, - "learning_rate": 6.995694998319478e-06, - "loss": 0.3478, - "step": 17532 - }, - { - "epoch": 1.1458728187700151, - "grad_norm": 0.5189958810806274, - "learning_rate": 6.995374824477681e-06, - "loss": 0.4125, - "step": 17533 - }, - { - "epoch": 1.1459381739755572, - "grad_norm": 0.468641996383667, - "learning_rate": 6.99505464090375e-06, - "loss": 0.3532, - "step": 17534 - }, - { - "epoch": 1.1460035291810993, - "grad_norm": 0.4337131381034851, - "learning_rate": 6.994734447599247e-06, - "loss": 0.3247, - "step": 17535 - }, - { - "epoch": 1.1460688843866413, - "grad_norm": 0.45070910453796387, - "learning_rate": 6.994414244565732e-06, - "loss": 0.338, - "step": 17536 - }, - { - "epoch": 1.1461342395921834, - "grad_norm": 0.46367672085762024, - "learning_rate": 6.994094031804768e-06, - "loss": 0.3399, - "step": 17537 - }, - { - "epoch": 1.1461995947977257, - "grad_norm": 0.426435261964798, - "learning_rate": 6.993773809317918e-06, - "loss": 0.3239, - "step": 17538 - }, - { - "epoch": 1.1462649500032678, - "grad_norm": 0.4395165741443634, - "learning_rate": 6.99345357710674e-06, - "loss": 0.3019, - "step": 17539 - }, - { - "epoch": 1.1463303052088099, - "grad_norm": 0.4694999158382416, - "learning_rate": 6.9931333351728e-06, - "loss": 0.3633, - "step": 17540 - }, - { - "epoch": 1.146395660414352, - "grad_norm": 0.47705039381980896, - "learning_rate": 6.992813083517658e-06, - "loss": 0.3617, - "step": 17541 - }, - { - "epoch": 1.1464610156198942, - "grad_norm": 0.47072869539260864, - "learning_rate": 6.992492822142877e-06, - "loss": 0.3725, - "step": 17542 - }, - { - "epoch": 1.1465263708254363, - "grad_norm": 0.44895902276039124, - "learning_rate": 6.992172551050017e-06, - "loss": 0.3579, - "step": 17543 - }, - { - "epoch": 1.1465917260309784, - "grad_norm": 0.4714765250682831, - "learning_rate": 6.991852270240641e-06, - "loss": 0.3867, - "step": 17544 - }, - { - "epoch": 1.1466570812365204, - "grad_norm": 0.4682939350605011, - "learning_rate": 6.991531979716313e-06, - "loss": 0.3784, - "step": 17545 - }, - { - "epoch": 1.1467224364420625, - "grad_norm": 0.46957314014434814, - "learning_rate": 6.991211679478591e-06, - "loss": 0.3572, - "step": 17546 - }, - { - "epoch": 1.1467877916476048, - "grad_norm": 0.4267326593399048, - "learning_rate": 6.990891369529044e-06, - "loss": 0.3181, - "step": 17547 - }, - { - "epoch": 1.1468531468531469, - "grad_norm": 0.5047838091850281, - "learning_rate": 6.990571049869227e-06, - "loss": 0.4355, - "step": 17548 - }, - { - "epoch": 1.146918502058689, - "grad_norm": 0.4337526857852936, - "learning_rate": 6.990250720500706e-06, - "loss": 0.3486, - "step": 17549 - }, - { - "epoch": 1.146983857264231, - "grad_norm": 0.4063011407852173, - "learning_rate": 6.989930381425042e-06, - "loss": 0.2792, - "step": 17550 - }, - { - "epoch": 1.1470492124697733, - "grad_norm": 0.4726828634738922, - "learning_rate": 6.989610032643799e-06, - "loss": 0.3203, - "step": 17551 - }, - { - "epoch": 1.1471145676753154, - "grad_norm": 0.46124228835105896, - "learning_rate": 6.989289674158538e-06, - "loss": 0.3609, - "step": 17552 - }, - { - "epoch": 1.1471799228808575, - "grad_norm": 0.4671699106693268, - "learning_rate": 6.988969305970823e-06, - "loss": 0.3576, - "step": 17553 - }, - { - "epoch": 1.1472452780863995, - "grad_norm": 0.4478949010372162, - "learning_rate": 6.9886489280822155e-06, - "loss": 0.3381, - "step": 17554 - }, - { - "epoch": 1.1473106332919416, - "grad_norm": 0.4265245795249939, - "learning_rate": 6.988328540494278e-06, - "loss": 0.3315, - "step": 17555 - }, - { - "epoch": 1.147375988497484, - "grad_norm": 0.46849754452705383, - "learning_rate": 6.988008143208574e-06, - "loss": 0.3821, - "step": 17556 - }, - { - "epoch": 1.147441343703026, - "grad_norm": 0.43850085139274597, - "learning_rate": 6.987687736226664e-06, - "loss": 0.3496, - "step": 17557 - }, - { - "epoch": 1.147506698908568, - "grad_norm": 0.41968628764152527, - "learning_rate": 6.987367319550113e-06, - "loss": 0.2851, - "step": 17558 - }, - { - "epoch": 1.1475720541141101, - "grad_norm": 0.4520379602909088, - "learning_rate": 6.987046893180483e-06, - "loss": 0.3583, - "step": 17559 - }, - { - "epoch": 1.1476374093196524, - "grad_norm": 0.4553259611129761, - "learning_rate": 6.986726457119339e-06, - "loss": 0.3364, - "step": 17560 - }, - { - "epoch": 1.1477027645251945, - "grad_norm": 0.48487669229507446, - "learning_rate": 6.986406011368239e-06, - "loss": 0.3601, - "step": 17561 - }, - { - "epoch": 1.1477681197307366, - "grad_norm": 0.42817962169647217, - "learning_rate": 6.98608555592875e-06, - "loss": 0.3134, - "step": 17562 - }, - { - "epoch": 1.1478334749362786, - "grad_norm": 0.4315100610256195, - "learning_rate": 6.985765090802434e-06, - "loss": 0.3078, - "step": 17563 - }, - { - "epoch": 1.1478988301418207, - "grad_norm": 0.4427310824394226, - "learning_rate": 6.985444615990852e-06, - "loss": 0.3253, - "step": 17564 - }, - { - "epoch": 1.147964185347363, - "grad_norm": 0.43725985288619995, - "learning_rate": 6.985124131495571e-06, - "loss": 0.3281, - "step": 17565 - }, - { - "epoch": 1.148029540552905, - "grad_norm": 0.4995577037334442, - "learning_rate": 6.984803637318149e-06, - "loss": 0.4056, - "step": 17566 - }, - { - "epoch": 1.1480948957584471, - "grad_norm": 0.43872538208961487, - "learning_rate": 6.984483133460155e-06, - "loss": 0.3165, - "step": 17567 - }, - { - "epoch": 1.1481602509639892, - "grad_norm": 0.4488801062107086, - "learning_rate": 6.984162619923149e-06, - "loss": 0.3374, - "step": 17568 - }, - { - "epoch": 1.1482256061695315, - "grad_norm": 0.49378132820129395, - "learning_rate": 6.983842096708694e-06, - "loss": 0.3707, - "step": 17569 - }, - { - "epoch": 1.1482909613750736, - "grad_norm": 0.4640345275402069, - "learning_rate": 6.983521563818353e-06, - "loss": 0.3884, - "step": 17570 - }, - { - "epoch": 1.1483563165806157, - "grad_norm": 0.43276870250701904, - "learning_rate": 6.983201021253692e-06, - "loss": 0.3334, - "step": 17571 - }, - { - "epoch": 1.1484216717861577, - "grad_norm": 0.4319801330566406, - "learning_rate": 6.98288046901627e-06, - "loss": 0.3075, - "step": 17572 - }, - { - "epoch": 1.1484870269916998, - "grad_norm": 0.456879198551178, - "learning_rate": 6.982559907107655e-06, - "loss": 0.3089, - "step": 17573 - }, - { - "epoch": 1.148552382197242, - "grad_norm": 0.4286223351955414, - "learning_rate": 6.982239335529408e-06, - "loss": 0.3112, - "step": 17574 - }, - { - "epoch": 1.1486177374027842, - "grad_norm": 0.4080653786659241, - "learning_rate": 6.981918754283092e-06, - "loss": 0.2859, - "step": 17575 - }, - { - "epoch": 1.1486830926083262, - "grad_norm": 0.46638861298561096, - "learning_rate": 6.981598163370273e-06, - "loss": 0.3636, - "step": 17576 - }, - { - "epoch": 1.1487484478138683, - "grad_norm": 0.4485006332397461, - "learning_rate": 6.981277562792512e-06, - "loss": 0.3506, - "step": 17577 - }, - { - "epoch": 1.1488138030194106, - "grad_norm": 0.4528709352016449, - "learning_rate": 6.9809569525513755e-06, - "loss": 0.3436, - "step": 17578 - }, - { - "epoch": 1.1488791582249527, - "grad_norm": 0.43235430121421814, - "learning_rate": 6.980636332648424e-06, - "loss": 0.3247, - "step": 17579 - }, - { - "epoch": 1.1489445134304948, - "grad_norm": 0.4458158016204834, - "learning_rate": 6.980315703085224e-06, - "loss": 0.3461, - "step": 17580 - }, - { - "epoch": 1.1490098686360368, - "grad_norm": 0.4641415476799011, - "learning_rate": 6.979995063863339e-06, - "loss": 0.3714, - "step": 17581 - }, - { - "epoch": 1.149075223841579, - "grad_norm": 0.44364693760871887, - "learning_rate": 6.97967441498433e-06, - "loss": 0.3324, - "step": 17582 - }, - { - "epoch": 1.1491405790471212, - "grad_norm": 0.45736822485923767, - "learning_rate": 6.979353756449765e-06, - "loss": 0.3605, - "step": 17583 - }, - { - "epoch": 1.1492059342526633, - "grad_norm": 0.4471518397331238, - "learning_rate": 6.979033088261205e-06, - "loss": 0.3289, - "step": 17584 - }, - { - "epoch": 1.1492712894582053, - "grad_norm": 0.45127806067466736, - "learning_rate": 6.978712410420215e-06, - "loss": 0.331, - "step": 17585 - }, - { - "epoch": 1.1493366446637474, - "grad_norm": 0.4386953115463257, - "learning_rate": 6.978391722928359e-06, - "loss": 0.3444, - "step": 17586 - }, - { - "epoch": 1.1494019998692897, - "grad_norm": 0.4590171277523041, - "learning_rate": 6.978071025787202e-06, - "loss": 0.3367, - "step": 17587 - }, - { - "epoch": 1.1494673550748318, - "grad_norm": 0.44228118658065796, - "learning_rate": 6.977750318998306e-06, - "loss": 0.34, - "step": 17588 - }, - { - "epoch": 1.1495327102803738, - "grad_norm": 0.4404328167438507, - "learning_rate": 6.9774296025632396e-06, - "loss": 0.3181, - "step": 17589 - }, - { - "epoch": 1.149598065485916, - "grad_norm": 0.4597437381744385, - "learning_rate": 6.977108876483562e-06, - "loss": 0.3594, - "step": 17590 - }, - { - "epoch": 1.149663420691458, - "grad_norm": 0.4291709065437317, - "learning_rate": 6.976788140760839e-06, - "loss": 0.3273, - "step": 17591 - }, - { - "epoch": 1.1497287758970003, - "grad_norm": 0.44610679149627686, - "learning_rate": 6.9764673953966355e-06, - "loss": 0.3387, - "step": 17592 - }, - { - "epoch": 1.1497941311025424, - "grad_norm": 0.44380804896354675, - "learning_rate": 6.976146640392516e-06, - "loss": 0.3483, - "step": 17593 - }, - { - "epoch": 1.1498594863080844, - "grad_norm": 0.4359813332557678, - "learning_rate": 6.975825875750045e-06, - "loss": 0.3295, - "step": 17594 - }, - { - "epoch": 1.1499248415136265, - "grad_norm": 0.4144361615180969, - "learning_rate": 6.975505101470786e-06, - "loss": 0.3056, - "step": 17595 - }, - { - "epoch": 1.1499901967191688, - "grad_norm": 0.4788469970226288, - "learning_rate": 6.975184317556305e-06, - "loss": 0.3747, - "step": 17596 - }, - { - "epoch": 1.1500555519247109, - "grad_norm": 0.41650545597076416, - "learning_rate": 6.9748635240081656e-06, - "loss": 0.3002, - "step": 17597 - }, - { - "epoch": 1.150120907130253, - "grad_norm": 0.5181236863136292, - "learning_rate": 6.974542720827932e-06, - "loss": 0.3124, - "step": 17598 - }, - { - "epoch": 1.150186262335795, - "grad_norm": 0.42471063137054443, - "learning_rate": 6.9742219080171706e-06, - "loss": 0.2872, - "step": 17599 - }, - { - "epoch": 1.150251617541337, - "grad_norm": 0.43877092003822327, - "learning_rate": 6.973901085577445e-06, - "loss": 0.3382, - "step": 17600 - }, - { - "epoch": 1.1503169727468794, - "grad_norm": 0.4526621699333191, - "learning_rate": 6.9735802535103194e-06, - "loss": 0.321, - "step": 17601 - }, - { - "epoch": 1.1503823279524215, - "grad_norm": 0.4256632328033447, - "learning_rate": 6.973259411817359e-06, - "loss": 0.323, - "step": 17602 - }, - { - "epoch": 1.1504476831579635, - "grad_norm": 0.4291374683380127, - "learning_rate": 6.972938560500129e-06, - "loss": 0.3168, - "step": 17603 - }, - { - "epoch": 1.1505130383635056, - "grad_norm": 0.43796679377555847, - "learning_rate": 6.972617699560194e-06, - "loss": 0.3158, - "step": 17604 - }, - { - "epoch": 1.150578393569048, - "grad_norm": 0.47402623295783997, - "learning_rate": 6.972296828999119e-06, - "loss": 0.3625, - "step": 17605 - }, - { - "epoch": 1.15064374877459, - "grad_norm": 0.4425138831138611, - "learning_rate": 6.971975948818469e-06, - "loss": 0.3465, - "step": 17606 - }, - { - "epoch": 1.150709103980132, - "grad_norm": 0.4444705545902252, - "learning_rate": 6.971655059019811e-06, - "loss": 0.342, - "step": 17607 - }, - { - "epoch": 1.1507744591856741, - "grad_norm": 0.4593660235404968, - "learning_rate": 6.9713341596047066e-06, - "loss": 0.3692, - "step": 17608 - }, - { - "epoch": 1.1508398143912162, - "grad_norm": 0.4351654052734375, - "learning_rate": 6.971013250574722e-06, - "loss": 0.3101, - "step": 17609 - }, - { - "epoch": 1.1509051695967583, - "grad_norm": 0.5078103542327881, - "learning_rate": 6.970692331931425e-06, - "loss": 0.4376, - "step": 17610 - }, - { - "epoch": 1.1509705248023006, - "grad_norm": 0.4377695918083191, - "learning_rate": 6.970371403676377e-06, - "loss": 0.3433, - "step": 17611 - }, - { - "epoch": 1.1510358800078426, - "grad_norm": 0.4759107232093811, - "learning_rate": 6.9700504658111465e-06, - "loss": 0.3577, - "step": 17612 - }, - { - "epoch": 1.1511012352133847, - "grad_norm": 0.4467790126800537, - "learning_rate": 6.969729518337296e-06, - "loss": 0.3493, - "step": 17613 - }, - { - "epoch": 1.1511665904189268, - "grad_norm": 0.5905129909515381, - "learning_rate": 6.969408561256393e-06, - "loss": 0.3877, - "step": 17614 - }, - { - "epoch": 1.151231945624469, - "grad_norm": 0.4773085117340088, - "learning_rate": 6.969087594570001e-06, - "loss": 0.3467, - "step": 17615 - }, - { - "epoch": 1.1512973008300111, - "grad_norm": 0.4675079584121704, - "learning_rate": 6.968766618279688e-06, - "loss": 0.3375, - "step": 17616 - }, - { - "epoch": 1.1513626560355532, - "grad_norm": 0.39735788106918335, - "learning_rate": 6.968445632387017e-06, - "loss": 0.2697, - "step": 17617 - }, - { - "epoch": 1.1514280112410953, - "grad_norm": 0.4492117464542389, - "learning_rate": 6.9681246368935565e-06, - "loss": 0.3555, - "step": 17618 - }, - { - "epoch": 1.1514933664466374, - "grad_norm": 0.42149829864501953, - "learning_rate": 6.967803631800868e-06, - "loss": 0.3139, - "step": 17619 - }, - { - "epoch": 1.1515587216521797, - "grad_norm": 0.4348292648792267, - "learning_rate": 6.9674826171105214e-06, - "loss": 0.347, - "step": 17620 - }, - { - "epoch": 1.1516240768577217, - "grad_norm": 0.4530472457408905, - "learning_rate": 6.96716159282408e-06, - "loss": 0.3629, - "step": 17621 - }, - { - "epoch": 1.1516894320632638, - "grad_norm": 0.48482373356819153, - "learning_rate": 6.96684055894311e-06, - "loss": 0.3436, - "step": 17622 - }, - { - "epoch": 1.1517547872688059, - "grad_norm": 0.4720245599746704, - "learning_rate": 6.9665195154691765e-06, - "loss": 0.3515, - "step": 17623 - }, - { - "epoch": 1.1518201424743482, - "grad_norm": 0.46872296929359436, - "learning_rate": 6.966198462403846e-06, - "loss": 0.3815, - "step": 17624 - }, - { - "epoch": 1.1518854976798902, - "grad_norm": 0.4645110070705414, - "learning_rate": 6.965877399748686e-06, - "loss": 0.353, - "step": 17625 - }, - { - "epoch": 1.1519508528854323, - "grad_norm": 0.4311627745628357, - "learning_rate": 6.96555632750526e-06, - "loss": 0.3039, - "step": 17626 - }, - { - "epoch": 1.1520162080909744, - "grad_norm": 0.4705878496170044, - "learning_rate": 6.965235245675135e-06, - "loss": 0.362, - "step": 17627 - }, - { - "epoch": 1.1520815632965165, - "grad_norm": 0.45533472299575806, - "learning_rate": 6.964914154259876e-06, - "loss": 0.3374, - "step": 17628 - }, - { - "epoch": 1.1521469185020587, - "grad_norm": 0.42912060022354126, - "learning_rate": 6.964593053261051e-06, - "loss": 0.3403, - "step": 17629 - }, - { - "epoch": 1.1522122737076008, - "grad_norm": 0.458856463432312, - "learning_rate": 6.9642719426802255e-06, - "loss": 0.344, - "step": 17630 - }, - { - "epoch": 1.152277628913143, - "grad_norm": 0.41162678599357605, - "learning_rate": 6.963950822518963e-06, - "loss": 0.2972, - "step": 17631 - }, - { - "epoch": 1.152342984118685, - "grad_norm": 0.46424978971481323, - "learning_rate": 6.963629692778835e-06, - "loss": 0.3678, - "step": 17632 - }, - { - "epoch": 1.1524083393242273, - "grad_norm": 0.46035653352737427, - "learning_rate": 6.963308553461402e-06, - "loss": 0.3763, - "step": 17633 - }, - { - "epoch": 1.1524736945297693, - "grad_norm": 0.45069998502731323, - "learning_rate": 6.962987404568235e-06, - "loss": 0.3582, - "step": 17634 - }, - { - "epoch": 1.1525390497353114, - "grad_norm": 0.4333679676055908, - "learning_rate": 6.962666246100897e-06, - "loss": 0.33, - "step": 17635 - }, - { - "epoch": 1.1526044049408535, - "grad_norm": 0.4406760036945343, - "learning_rate": 6.962345078060957e-06, - "loss": 0.3489, - "step": 17636 - }, - { - "epoch": 1.1526697601463956, - "grad_norm": 0.47684451937675476, - "learning_rate": 6.962023900449979e-06, - "loss": 0.33, - "step": 17637 - }, - { - "epoch": 1.1527351153519378, - "grad_norm": 0.4457132816314697, - "learning_rate": 6.961702713269532e-06, - "loss": 0.3284, - "step": 17638 - }, - { - "epoch": 1.15280047055748, - "grad_norm": 0.46327537298202515, - "learning_rate": 6.96138151652118e-06, - "loss": 0.3782, - "step": 17639 - }, - { - "epoch": 1.152865825763022, - "grad_norm": 0.4420397877693176, - "learning_rate": 6.961060310206491e-06, - "loss": 0.3444, - "step": 17640 - }, - { - "epoch": 1.152931180968564, - "grad_norm": 0.4100053608417511, - "learning_rate": 6.9607390943270325e-06, - "loss": 0.3062, - "step": 17641 - }, - { - "epoch": 1.1529965361741064, - "grad_norm": 0.49853360652923584, - "learning_rate": 6.960417868884368e-06, - "loss": 0.3897, - "step": 17642 - }, - { - "epoch": 1.1530618913796484, - "grad_norm": 0.49394646286964417, - "learning_rate": 6.960096633880069e-06, - "loss": 0.3565, - "step": 17643 - }, - { - "epoch": 1.1531272465851905, - "grad_norm": 0.46234485507011414, - "learning_rate": 6.959775389315698e-06, - "loss": 0.2861, - "step": 17644 - }, - { - "epoch": 1.1531926017907326, - "grad_norm": 0.4874555766582489, - "learning_rate": 6.959454135192825e-06, - "loss": 0.376, - "step": 17645 - }, - { - "epoch": 1.1532579569962746, - "grad_norm": 0.491465300321579, - "learning_rate": 6.9591328715130135e-06, - "loss": 0.3935, - "step": 17646 - }, - { - "epoch": 1.153323312201817, - "grad_norm": 0.46655192971229553, - "learning_rate": 6.958811598277834e-06, - "loss": 0.3553, - "step": 17647 - }, - { - "epoch": 1.153388667407359, - "grad_norm": 0.4627162516117096, - "learning_rate": 6.958490315488851e-06, - "loss": 0.3721, - "step": 17648 - }, - { - "epoch": 1.153454022612901, - "grad_norm": 0.43710124492645264, - "learning_rate": 6.958169023147632e-06, - "loss": 0.3179, - "step": 17649 - }, - { - "epoch": 1.1535193778184432, - "grad_norm": 0.4593852162361145, - "learning_rate": 6.957847721255745e-06, - "loss": 0.3253, - "step": 17650 - }, - { - "epoch": 1.1535847330239855, - "grad_norm": 0.49218297004699707, - "learning_rate": 6.957526409814755e-06, - "loss": 0.3889, - "step": 17651 - }, - { - "epoch": 1.1536500882295275, - "grad_norm": 0.46324622631073, - "learning_rate": 6.957205088826233e-06, - "loss": 0.3722, - "step": 17652 - }, - { - "epoch": 1.1537154434350696, - "grad_norm": 0.45687490701675415, - "learning_rate": 6.956883758291742e-06, - "loss": 0.3406, - "step": 17653 - }, - { - "epoch": 1.1537807986406117, - "grad_norm": 0.46883320808410645, - "learning_rate": 6.956562418212851e-06, - "loss": 0.3465, - "step": 17654 - }, - { - "epoch": 1.1538461538461537, - "grad_norm": 0.47016748785972595, - "learning_rate": 6.956241068591127e-06, - "loss": 0.36, - "step": 17655 - }, - { - "epoch": 1.153911509051696, - "grad_norm": 0.43132883310317993, - "learning_rate": 6.955919709428139e-06, - "loss": 0.323, - "step": 17656 - }, - { - "epoch": 1.153976864257238, - "grad_norm": 0.4467172920703888, - "learning_rate": 6.955598340725451e-06, - "loss": 0.3083, - "step": 17657 - }, - { - "epoch": 1.1540422194627802, - "grad_norm": 0.4533108174800873, - "learning_rate": 6.9552769624846335e-06, - "loss": 0.3499, - "step": 17658 - }, - { - "epoch": 1.1541075746683223, - "grad_norm": 0.4377674162387848, - "learning_rate": 6.954955574707254e-06, - "loss": 0.3076, - "step": 17659 - }, - { - "epoch": 1.1541729298738646, - "grad_norm": 0.45000824332237244, - "learning_rate": 6.954634177394877e-06, - "loss": 0.3548, - "step": 17660 - }, - { - "epoch": 1.1542382850794066, - "grad_norm": 0.45199882984161377, - "learning_rate": 6.954312770549073e-06, - "loss": 0.3378, - "step": 17661 - }, - { - "epoch": 1.1543036402849487, - "grad_norm": 0.4631260335445404, - "learning_rate": 6.953991354171408e-06, - "loss": 0.3984, - "step": 17662 - }, - { - "epoch": 1.1543689954904908, - "grad_norm": 0.44825875759124756, - "learning_rate": 6.953669928263451e-06, - "loss": 0.3303, - "step": 17663 - }, - { - "epoch": 1.1544343506960328, - "grad_norm": 0.45595115423202515, - "learning_rate": 6.953348492826768e-06, - "loss": 0.3467, - "step": 17664 - }, - { - "epoch": 1.1544997059015751, - "grad_norm": 0.43641722202301025, - "learning_rate": 6.953027047862928e-06, - "loss": 0.3045, - "step": 17665 - }, - { - "epoch": 1.1545650611071172, - "grad_norm": 0.6549420356750488, - "learning_rate": 6.9527055933735e-06, - "loss": 0.3434, - "step": 17666 - }, - { - "epoch": 1.1546304163126593, - "grad_norm": 0.43091461062431335, - "learning_rate": 6.952384129360048e-06, - "loss": 0.3141, - "step": 17667 - }, - { - "epoch": 1.1546957715182014, - "grad_norm": 0.4651185870170593, - "learning_rate": 6.952062655824143e-06, - "loss": 0.3909, - "step": 17668 - }, - { - "epoch": 1.1547611267237436, - "grad_norm": 0.47757580876350403, - "learning_rate": 6.951741172767352e-06, - "loss": 0.3397, - "step": 17669 - }, - { - "epoch": 1.1548264819292857, - "grad_norm": 0.4739300310611725, - "learning_rate": 6.951419680191245e-06, - "loss": 0.3436, - "step": 17670 - }, - { - "epoch": 1.1548918371348278, - "grad_norm": 0.4194020926952362, - "learning_rate": 6.951098178097386e-06, - "loss": 0.295, - "step": 17671 - }, - { - "epoch": 1.1549571923403699, - "grad_norm": 0.5356296896934509, - "learning_rate": 6.950776666487347e-06, - "loss": 0.419, - "step": 17672 - }, - { - "epoch": 1.155022547545912, - "grad_norm": 0.4514598548412323, - "learning_rate": 6.950455145362694e-06, - "loss": 0.3575, - "step": 17673 - }, - { - "epoch": 1.1550879027514542, - "grad_norm": 0.457670658826828, - "learning_rate": 6.950133614724994e-06, - "loss": 0.3522, - "step": 17674 - }, - { - "epoch": 1.1551532579569963, - "grad_norm": 0.4478275179862976, - "learning_rate": 6.949812074575817e-06, - "loss": 0.3332, - "step": 17675 - }, - { - "epoch": 1.1552186131625384, - "grad_norm": 0.44643643498420715, - "learning_rate": 6.949490524916734e-06, - "loss": 0.3601, - "step": 17676 - }, - { - "epoch": 1.1552839683680804, - "grad_norm": 0.3907199800014496, - "learning_rate": 6.949168965749307e-06, - "loss": 0.2759, - "step": 17677 - }, - { - "epoch": 1.1553493235736227, - "grad_norm": 0.4266630709171295, - "learning_rate": 6.948847397075108e-06, - "loss": 0.3327, - "step": 17678 - }, - { - "epoch": 1.1554146787791648, - "grad_norm": 0.4935249090194702, - "learning_rate": 6.948525818895706e-06, - "loss": 0.3744, - "step": 17679 - }, - { - "epoch": 1.155480033984707, - "grad_norm": 0.46187731623649597, - "learning_rate": 6.948204231212668e-06, - "loss": 0.3264, - "step": 17680 - }, - { - "epoch": 1.155545389190249, - "grad_norm": 0.4567898213863373, - "learning_rate": 6.947882634027564e-06, - "loss": 0.3579, - "step": 17681 - }, - { - "epoch": 1.155610744395791, - "grad_norm": 0.4668227732181549, - "learning_rate": 6.94756102734196e-06, - "loss": 0.3983, - "step": 17682 - }, - { - "epoch": 1.1556760996013333, - "grad_norm": 0.4595906436443329, - "learning_rate": 6.947239411157428e-06, - "loss": 0.3598, - "step": 17683 - }, - { - "epoch": 1.1557414548068754, - "grad_norm": 0.44565173983573914, - "learning_rate": 6.9469177854755344e-06, - "loss": 0.3388, - "step": 17684 - }, - { - "epoch": 1.1558068100124175, - "grad_norm": 0.409270703792572, - "learning_rate": 6.9465961502978485e-06, - "loss": 0.3103, - "step": 17685 - }, - { - "epoch": 1.1558721652179595, - "grad_norm": 0.44563353061676025, - "learning_rate": 6.946274505625939e-06, - "loss": 0.3063, - "step": 17686 - }, - { - "epoch": 1.1559375204235018, - "grad_norm": 0.4690755605697632, - "learning_rate": 6.945952851461373e-06, - "loss": 0.4142, - "step": 17687 - }, - { - "epoch": 1.156002875629044, - "grad_norm": 0.6353440880775452, - "learning_rate": 6.945631187805721e-06, - "loss": 0.3421, - "step": 17688 - }, - { - "epoch": 1.156068230834586, - "grad_norm": 0.4302791953086853, - "learning_rate": 6.945309514660552e-06, - "loss": 0.3397, - "step": 17689 - }, - { - "epoch": 1.156133586040128, - "grad_norm": 0.4376680254936218, - "learning_rate": 6.944987832027434e-06, - "loss": 0.3278, - "step": 17690 - }, - { - "epoch": 1.1561989412456701, - "grad_norm": 0.4576703906059265, - "learning_rate": 6.944666139907937e-06, - "loss": 0.3536, - "step": 17691 - }, - { - "epoch": 1.1562642964512124, - "grad_norm": 0.4490030109882355, - "learning_rate": 6.944344438303631e-06, - "loss": 0.3558, - "step": 17692 - }, - { - "epoch": 1.1563296516567545, - "grad_norm": 0.45046737790107727, - "learning_rate": 6.944022727216082e-06, - "loss": 0.3403, - "step": 17693 - }, - { - "epoch": 1.1563950068622966, - "grad_norm": 0.44379544258117676, - "learning_rate": 6.943701006646863e-06, - "loss": 0.3344, - "step": 17694 - }, - { - "epoch": 1.1564603620678386, - "grad_norm": 0.4549551010131836, - "learning_rate": 6.943379276597537e-06, - "loss": 0.3387, - "step": 17695 - }, - { - "epoch": 1.156525717273381, - "grad_norm": 0.4415263831615448, - "learning_rate": 6.9430575370696796e-06, - "loss": 0.3227, - "step": 17696 - }, - { - "epoch": 1.156591072478923, - "grad_norm": 0.4730100631713867, - "learning_rate": 6.9427357880648576e-06, - "loss": 0.3809, - "step": 17697 - }, - { - "epoch": 1.156656427684465, - "grad_norm": 0.42756032943725586, - "learning_rate": 6.942414029584639e-06, - "loss": 0.2957, - "step": 17698 - }, - { - "epoch": 1.1567217828900072, - "grad_norm": 0.4337129592895508, - "learning_rate": 6.942092261630594e-06, - "loss": 0.3325, - "step": 17699 - }, - { - "epoch": 1.1567871380955492, - "grad_norm": 0.4741310775279999, - "learning_rate": 6.941770484204294e-06, - "loss": 0.4079, - "step": 17700 - }, - { - "epoch": 1.1568524933010915, - "grad_norm": 0.42564648389816284, - "learning_rate": 6.9414486973073045e-06, - "loss": 0.3486, - "step": 17701 - }, - { - "epoch": 1.1569178485066336, - "grad_norm": 0.42298024892807007, - "learning_rate": 6.941126900941198e-06, - "loss": 0.3225, - "step": 17702 - }, - { - "epoch": 1.1569832037121757, - "grad_norm": 0.4448823630809784, - "learning_rate": 6.940805095107544e-06, - "loss": 0.3463, - "step": 17703 - }, - { - "epoch": 1.1570485589177177, - "grad_norm": 0.42664510011672974, - "learning_rate": 6.94048327980791e-06, - "loss": 0.3466, - "step": 17704 - }, - { - "epoch": 1.15711391412326, - "grad_norm": 0.4197862446308136, - "learning_rate": 6.940161455043867e-06, - "loss": 0.3309, - "step": 17705 - }, - { - "epoch": 1.157179269328802, - "grad_norm": 0.44740229845046997, - "learning_rate": 6.939839620816985e-06, - "loss": 0.3104, - "step": 17706 - }, - { - "epoch": 1.1572446245343442, - "grad_norm": 0.4196939170360565, - "learning_rate": 6.939517777128833e-06, - "loss": 0.2841, - "step": 17707 - }, - { - "epoch": 1.1573099797398863, - "grad_norm": 0.46484148502349854, - "learning_rate": 6.93919592398098e-06, - "loss": 0.3176, - "step": 17708 - }, - { - "epoch": 1.1573753349454283, - "grad_norm": 0.41649261116981506, - "learning_rate": 6.938874061374997e-06, - "loss": 0.3064, - "step": 17709 - }, - { - "epoch": 1.1574406901509706, - "grad_norm": 0.427366703748703, - "learning_rate": 6.938552189312454e-06, - "loss": 0.3121, - "step": 17710 - }, - { - "epoch": 1.1575060453565127, - "grad_norm": 0.4428960382938385, - "learning_rate": 6.93823030779492e-06, - "loss": 0.3787, - "step": 17711 - }, - { - "epoch": 1.1575714005620548, - "grad_norm": 0.45263683795928955, - "learning_rate": 6.937908416823967e-06, - "loss": 0.3844, - "step": 17712 - }, - { - "epoch": 1.1576367557675968, - "grad_norm": 0.44688180088996887, - "learning_rate": 6.937586516401162e-06, - "loss": 0.3575, - "step": 17713 - }, - { - "epoch": 1.1577021109731391, - "grad_norm": 0.47628867626190186, - "learning_rate": 6.937264606528074e-06, - "loss": 0.3529, - "step": 17714 - }, - { - "epoch": 1.1577674661786812, - "grad_norm": 0.4480690360069275, - "learning_rate": 6.9369426872062804e-06, - "loss": 0.3399, - "step": 17715 - }, - { - "epoch": 1.1578328213842233, - "grad_norm": 0.47651416063308716, - "learning_rate": 6.9366207584373425e-06, - "loss": 0.3867, - "step": 17716 - }, - { - "epoch": 1.1578981765897653, - "grad_norm": 0.4467816948890686, - "learning_rate": 6.936298820222835e-06, - "loss": 0.3522, - "step": 17717 - }, - { - "epoch": 1.1579635317953074, - "grad_norm": 0.44865909218788147, - "learning_rate": 6.935976872564327e-06, - "loss": 0.3101, - "step": 17718 - }, - { - "epoch": 1.1580288870008495, - "grad_norm": 0.4765969216823578, - "learning_rate": 6.93565491546339e-06, - "loss": 0.3837, - "step": 17719 - }, - { - "epoch": 1.1580942422063918, - "grad_norm": 0.42105284333229065, - "learning_rate": 6.935332948921594e-06, - "loss": 0.3226, - "step": 17720 - }, - { - "epoch": 1.1581595974119339, - "grad_norm": 0.5028206706047058, - "learning_rate": 6.935010972940508e-06, - "loss": 0.4379, - "step": 17721 - }, - { - "epoch": 1.158224952617476, - "grad_norm": 0.4653327763080597, - "learning_rate": 6.934688987521702e-06, - "loss": 0.3671, - "step": 17722 - }, - { - "epoch": 1.1582903078230182, - "grad_norm": 0.4500982463359833, - "learning_rate": 6.934366992666749e-06, - "loss": 0.3709, - "step": 17723 - }, - { - "epoch": 1.1583556630285603, - "grad_norm": 0.44609150290489197, - "learning_rate": 6.934044988377218e-06, - "loss": 0.3462, - "step": 17724 - }, - { - "epoch": 1.1584210182341024, - "grad_norm": 0.4506785273551941, - "learning_rate": 6.9337229746546784e-06, - "loss": 0.3338, - "step": 17725 - }, - { - "epoch": 1.1584863734396444, - "grad_norm": 0.48381128907203674, - "learning_rate": 6.933400951500704e-06, - "loss": 0.3838, - "step": 17726 - }, - { - "epoch": 1.1585517286451865, - "grad_norm": 0.4423137605190277, - "learning_rate": 6.93307891891686e-06, - "loss": 0.3434, - "step": 17727 - }, - { - "epoch": 1.1586170838507286, - "grad_norm": 0.47422897815704346, - "learning_rate": 6.932756876904724e-06, - "loss": 0.385, - "step": 17728 - }, - { - "epoch": 1.1586824390562709, - "grad_norm": 0.40563276410102844, - "learning_rate": 6.932434825465862e-06, - "loss": 0.2791, - "step": 17729 - }, - { - "epoch": 1.158747794261813, - "grad_norm": 0.4454939365386963, - "learning_rate": 6.932112764601845e-06, - "loss": 0.3452, - "step": 17730 - }, - { - "epoch": 1.158813149467355, - "grad_norm": 0.4607152044773102, - "learning_rate": 6.931790694314246e-06, - "loss": 0.3982, - "step": 17731 - }, - { - "epoch": 1.158878504672897, - "grad_norm": 0.4172993004322052, - "learning_rate": 6.931468614604633e-06, - "loss": 0.3101, - "step": 17732 - }, - { - "epoch": 1.1589438598784394, - "grad_norm": 0.42265626788139343, - "learning_rate": 6.931146525474579e-06, - "loss": 0.3181, - "step": 17733 - }, - { - "epoch": 1.1590092150839815, - "grad_norm": 0.45781445503234863, - "learning_rate": 6.930824426925654e-06, - "loss": 0.3529, - "step": 17734 - }, - { - "epoch": 1.1590745702895235, - "grad_norm": 0.4498709738254547, - "learning_rate": 6.930502318959431e-06, - "loss": 0.3337, - "step": 17735 - }, - { - "epoch": 1.1591399254950656, - "grad_norm": 0.4513479173183441, - "learning_rate": 6.930180201577479e-06, - "loss": 0.3381, - "step": 17736 - }, - { - "epoch": 1.1592052807006077, - "grad_norm": 0.4964551627635956, - "learning_rate": 6.929858074781368e-06, - "loss": 0.4226, - "step": 17737 - }, - { - "epoch": 1.15927063590615, - "grad_norm": 0.47749781608581543, - "learning_rate": 6.929535938572671e-06, - "loss": 0.3853, - "step": 17738 - }, - { - "epoch": 1.159335991111692, - "grad_norm": 0.44127658009529114, - "learning_rate": 6.92921379295296e-06, - "loss": 0.3188, - "step": 17739 - }, - { - "epoch": 1.1594013463172341, - "grad_norm": 0.47328656911849976, - "learning_rate": 6.928891637923804e-06, - "loss": 0.3156, - "step": 17740 - }, - { - "epoch": 1.1594667015227762, - "grad_norm": 0.48997533321380615, - "learning_rate": 6.928569473486776e-06, - "loss": 0.359, - "step": 17741 - }, - { - "epoch": 1.1595320567283185, - "grad_norm": 0.41671857237815857, - "learning_rate": 6.928247299643447e-06, - "loss": 0.2964, - "step": 17742 - }, - { - "epoch": 1.1595974119338606, - "grad_norm": 0.4364735186100006, - "learning_rate": 6.927925116395385e-06, - "loss": 0.3275, - "step": 17743 - }, - { - "epoch": 1.1596627671394026, - "grad_norm": 0.4537159502506256, - "learning_rate": 6.927602923744168e-06, - "loss": 0.3778, - "step": 17744 - }, - { - "epoch": 1.1597281223449447, - "grad_norm": 0.44723019003868103, - "learning_rate": 6.9272807216913606e-06, - "loss": 0.3503, - "step": 17745 - }, - { - "epoch": 1.1597934775504868, - "grad_norm": 0.43190619349479675, - "learning_rate": 6.926958510238539e-06, - "loss": 0.3023, - "step": 17746 - }, - { - "epoch": 1.159858832756029, - "grad_norm": 0.46130454540252686, - "learning_rate": 6.926636289387273e-06, - "loss": 0.3464, - "step": 17747 - }, - { - "epoch": 1.1599241879615712, - "grad_norm": 0.504646897315979, - "learning_rate": 6.926314059139134e-06, - "loss": 0.4161, - "step": 17748 - }, - { - "epoch": 1.1599895431671132, - "grad_norm": 0.4602302014827728, - "learning_rate": 6.925991819495694e-06, - "loss": 0.3713, - "step": 17749 - }, - { - "epoch": 1.1600548983726553, - "grad_norm": 0.46173661947250366, - "learning_rate": 6.925669570458526e-06, - "loss": 0.3683, - "step": 17750 - }, - { - "epoch": 1.1601202535781976, - "grad_norm": 0.46331456303596497, - "learning_rate": 6.925347312029201e-06, - "loss": 0.3431, - "step": 17751 - }, - { - "epoch": 1.1601856087837397, - "grad_norm": 0.4749436676502228, - "learning_rate": 6.925025044209287e-06, - "loss": 0.3864, - "step": 17752 - }, - { - "epoch": 1.1602509639892817, - "grad_norm": 0.4597143530845642, - "learning_rate": 6.924702767000361e-06, - "loss": 0.3766, - "step": 17753 - }, - { - "epoch": 1.1603163191948238, - "grad_norm": 0.4547629952430725, - "learning_rate": 6.924380480403991e-06, - "loss": 0.3411, - "step": 17754 - }, - { - "epoch": 1.1603816744003659, - "grad_norm": 0.5494674444198608, - "learning_rate": 6.924058184421753e-06, - "loss": 0.3224, - "step": 17755 - }, - { - "epoch": 1.1604470296059082, - "grad_norm": 0.49714577198028564, - "learning_rate": 6.923735879055215e-06, - "loss": 0.4199, - "step": 17756 - }, - { - "epoch": 1.1605123848114502, - "grad_norm": 0.43896931409835815, - "learning_rate": 6.923413564305953e-06, - "loss": 0.3384, - "step": 17757 - }, - { - "epoch": 1.1605777400169923, - "grad_norm": 0.49413934350013733, - "learning_rate": 6.923091240175534e-06, - "loss": 0.3911, - "step": 17758 - }, - { - "epoch": 1.1606430952225344, - "grad_norm": 0.4775960445404053, - "learning_rate": 6.9227689066655355e-06, - "loss": 0.3785, - "step": 17759 - }, - { - "epoch": 1.1607084504280767, - "grad_norm": 0.4625619947910309, - "learning_rate": 6.922446563777525e-06, - "loss": 0.372, - "step": 17760 - }, - { - "epoch": 1.1607738056336188, - "grad_norm": 0.45002758502960205, - "learning_rate": 6.9221242115130775e-06, - "loss": 0.3351, - "step": 17761 - }, - { - "epoch": 1.1608391608391608, - "grad_norm": 0.5079486966133118, - "learning_rate": 6.921801849873763e-06, - "loss": 0.3899, - "step": 17762 - }, - { - "epoch": 1.160904516044703, - "grad_norm": 0.4468036890029907, - "learning_rate": 6.921479478861156e-06, - "loss": 0.3339, - "step": 17763 - }, - { - "epoch": 1.160969871250245, - "grad_norm": 0.3972158133983612, - "learning_rate": 6.921157098476829e-06, - "loss": 0.2574, - "step": 17764 - }, - { - "epoch": 1.1610352264557873, - "grad_norm": 0.42045748233795166, - "learning_rate": 6.9208347087223505e-06, - "loss": 0.3088, - "step": 17765 - }, - { - "epoch": 1.1611005816613293, - "grad_norm": 0.48316705226898193, - "learning_rate": 6.920512309599298e-06, - "loss": 0.3765, - "step": 17766 - }, - { - "epoch": 1.1611659368668714, - "grad_norm": 0.4461493194103241, - "learning_rate": 6.920189901109241e-06, - "loss": 0.3493, - "step": 17767 - }, - { - "epoch": 1.1612312920724135, - "grad_norm": 0.4933953583240509, - "learning_rate": 6.919867483253753e-06, - "loss": 0.349, - "step": 17768 - }, - { - "epoch": 1.1612966472779558, - "grad_norm": 0.47226646542549133, - "learning_rate": 6.919545056034406e-06, - "loss": 0.3645, - "step": 17769 - }, - { - "epoch": 1.1613620024834979, - "grad_norm": 0.4418177008628845, - "learning_rate": 6.919222619452772e-06, - "loss": 0.3337, - "step": 17770 - }, - { - "epoch": 1.16142735768904, - "grad_norm": 0.43331435322761536, - "learning_rate": 6.918900173510426e-06, - "loss": 0.3225, - "step": 17771 - }, - { - "epoch": 1.161492712894582, - "grad_norm": 0.49668073654174805, - "learning_rate": 6.918577718208937e-06, - "loss": 0.3043, - "step": 17772 - }, - { - "epoch": 1.161558068100124, - "grad_norm": 0.44126200675964355, - "learning_rate": 6.918255253549882e-06, - "loss": 0.327, - "step": 17773 - }, - { - "epoch": 1.1616234233056664, - "grad_norm": 0.42060813307762146, - "learning_rate": 6.917932779534828e-06, - "loss": 0.3096, - "step": 17774 - }, - { - "epoch": 1.1616887785112084, - "grad_norm": 0.44270995259284973, - "learning_rate": 6.917610296165356e-06, - "loss": 0.3379, - "step": 17775 - }, - { - "epoch": 1.1617541337167505, - "grad_norm": 0.47732046246528625, - "learning_rate": 6.917287803443031e-06, - "loss": 0.3892, - "step": 17776 - }, - { - "epoch": 1.1618194889222926, - "grad_norm": 0.45870134234428406, - "learning_rate": 6.91696530136943e-06, - "loss": 0.342, - "step": 17777 - }, - { - "epoch": 1.1618848441278349, - "grad_norm": 0.4363591969013214, - "learning_rate": 6.9166427899461266e-06, - "loss": 0.3613, - "step": 17778 - }, - { - "epoch": 1.161950199333377, - "grad_norm": 0.4601621627807617, - "learning_rate": 6.916320269174691e-06, - "loss": 0.3556, - "step": 17779 - }, - { - "epoch": 1.162015554538919, - "grad_norm": 0.4446568489074707, - "learning_rate": 6.915997739056697e-06, - "loss": 0.3062, - "step": 17780 - }, - { - "epoch": 1.162080909744461, - "grad_norm": 0.5495322346687317, - "learning_rate": 6.91567519959372e-06, - "loss": 0.4479, - "step": 17781 - }, - { - "epoch": 1.1621462649500032, - "grad_norm": 0.482977032661438, - "learning_rate": 6.9153526507873305e-06, - "loss": 0.3716, - "step": 17782 - }, - { - "epoch": 1.1622116201555455, - "grad_norm": 0.45155516266822815, - "learning_rate": 6.915030092639101e-06, - "loss": 0.3546, - "step": 17783 - }, - { - "epoch": 1.1622769753610875, - "grad_norm": 0.4688279628753662, - "learning_rate": 6.914707525150609e-06, - "loss": 0.3787, - "step": 17784 - }, - { - "epoch": 1.1623423305666296, - "grad_norm": 0.4659804403781891, - "learning_rate": 6.914384948323423e-06, - "loss": 0.3616, - "step": 17785 - }, - { - "epoch": 1.1624076857721717, - "grad_norm": 0.4333311915397644, - "learning_rate": 6.9140623621591194e-06, - "loss": 0.3351, - "step": 17786 - }, - { - "epoch": 1.162473040977714, - "grad_norm": 0.45172226428985596, - "learning_rate": 6.9137397666592695e-06, - "loss": 0.338, - "step": 17787 - }, - { - "epoch": 1.162538396183256, - "grad_norm": 0.45358651876449585, - "learning_rate": 6.913417161825449e-06, - "loss": 0.3667, - "step": 17788 - }, - { - "epoch": 1.1626037513887981, - "grad_norm": 0.45765697956085205, - "learning_rate": 6.913094547659231e-06, - "loss": 0.3433, - "step": 17789 - }, - { - "epoch": 1.1626691065943402, - "grad_norm": 0.44429054856300354, - "learning_rate": 6.912771924162186e-06, - "loss": 0.3509, - "step": 17790 - }, - { - "epoch": 1.1627344617998823, - "grad_norm": 0.4193040728569031, - "learning_rate": 6.912449291335891e-06, - "loss": 0.3035, - "step": 17791 - }, - { - "epoch": 1.1627998170054246, - "grad_norm": 0.4288153648376465, - "learning_rate": 6.912126649181917e-06, - "loss": 0.289, - "step": 17792 - }, - { - "epoch": 1.1628651722109666, - "grad_norm": 0.4442983865737915, - "learning_rate": 6.91180399770184e-06, - "loss": 0.3481, - "step": 17793 - }, - { - "epoch": 1.1629305274165087, - "grad_norm": 0.4443356990814209, - "learning_rate": 6.911481336897232e-06, - "loss": 0.3515, - "step": 17794 - }, - { - "epoch": 1.1629958826220508, - "grad_norm": 0.45085409283638, - "learning_rate": 6.911158666769668e-06, - "loss": 0.3724, - "step": 17795 - }, - { - "epoch": 1.163061237827593, - "grad_norm": 0.4919372797012329, - "learning_rate": 6.9108359873207224e-06, - "loss": 0.381, - "step": 17796 - }, - { - "epoch": 1.1631265930331351, - "grad_norm": 0.46046745777130127, - "learning_rate": 6.9105132985519655e-06, - "loss": 0.3274, - "step": 17797 - }, - { - "epoch": 1.1631919482386772, - "grad_norm": 0.46758633852005005, - "learning_rate": 6.910190600464975e-06, - "loss": 0.3695, - "step": 17798 - }, - { - "epoch": 1.1632573034442193, - "grad_norm": 0.4499673545360565, - "learning_rate": 6.909867893061322e-06, - "loss": 0.3334, - "step": 17799 - }, - { - "epoch": 1.1633226586497614, - "grad_norm": 0.44051435589790344, - "learning_rate": 6.909545176342582e-06, - "loss": 0.3454, - "step": 17800 - }, - { - "epoch": 1.1633880138553037, - "grad_norm": 0.47183161973953247, - "learning_rate": 6.909222450310326e-06, - "loss": 0.3527, - "step": 17801 - }, - { - "epoch": 1.1634533690608457, - "grad_norm": 0.45104748010635376, - "learning_rate": 6.908899714966136e-06, - "loss": 0.3158, - "step": 17802 - }, - { - "epoch": 1.1635187242663878, - "grad_norm": 0.4534842073917389, - "learning_rate": 6.9085769703115755e-06, - "loss": 0.3206, - "step": 17803 - }, - { - "epoch": 1.1635840794719299, - "grad_norm": 0.49478787183761597, - "learning_rate": 6.908254216348227e-06, - "loss": 0.38, - "step": 17804 - }, - { - "epoch": 1.1636494346774722, - "grad_norm": 0.43771791458129883, - "learning_rate": 6.907931453077661e-06, - "loss": 0.3526, - "step": 17805 - }, - { - "epoch": 1.1637147898830142, - "grad_norm": 0.42836201190948486, - "learning_rate": 6.907608680501452e-06, - "loss": 0.3058, - "step": 17806 - }, - { - "epoch": 1.1637801450885563, - "grad_norm": 0.46062058210372925, - "learning_rate": 6.907285898621174e-06, - "loss": 0.3535, - "step": 17807 - }, - { - "epoch": 1.1638455002940984, - "grad_norm": 0.47398918867111206, - "learning_rate": 6.906963107438405e-06, - "loss": 0.3896, - "step": 17808 - }, - { - "epoch": 1.1639108554996405, - "grad_norm": 0.4412325918674469, - "learning_rate": 6.906640306954714e-06, - "loss": 0.3461, - "step": 17809 - }, - { - "epoch": 1.1639762107051828, - "grad_norm": 0.4329341948032379, - "learning_rate": 6.906317497171677e-06, - "loss": 0.3211, - "step": 17810 - }, - { - "epoch": 1.1640415659107248, - "grad_norm": 0.44721320271492004, - "learning_rate": 6.9059946780908705e-06, - "loss": 0.3465, - "step": 17811 - }, - { - "epoch": 1.164106921116267, - "grad_norm": 0.485296368598938, - "learning_rate": 6.905671849713866e-06, - "loss": 0.4212, - "step": 17812 - }, - { - "epoch": 1.164172276321809, - "grad_norm": 0.4662768840789795, - "learning_rate": 6.905349012042242e-06, - "loss": 0.3689, - "step": 17813 - }, - { - "epoch": 1.1642376315273513, - "grad_norm": 0.44688907265663147, - "learning_rate": 6.905026165077569e-06, - "loss": 0.3446, - "step": 17814 - }, - { - "epoch": 1.1643029867328933, - "grad_norm": 0.44490015506744385, - "learning_rate": 6.904703308821424e-06, - "loss": 0.3175, - "step": 17815 - }, - { - "epoch": 1.1643683419384354, - "grad_norm": 0.4581475853919983, - "learning_rate": 6.904380443275381e-06, - "loss": 0.3194, - "step": 17816 - }, - { - "epoch": 1.1644336971439775, - "grad_norm": 0.42191195487976074, - "learning_rate": 6.904057568441015e-06, - "loss": 0.3052, - "step": 17817 - }, - { - "epoch": 1.1644990523495196, - "grad_norm": 0.4517256021499634, - "learning_rate": 6.9037346843199e-06, - "loss": 0.3487, - "step": 17818 - }, - { - "epoch": 1.1645644075550619, - "grad_norm": 0.4611721634864807, - "learning_rate": 6.903411790913612e-06, - "loss": 0.3529, - "step": 17819 - }, - { - "epoch": 1.164629762760604, - "grad_norm": 0.46251773834228516, - "learning_rate": 6.903088888223727e-06, - "loss": 0.3625, - "step": 17820 - }, - { - "epoch": 1.164695117966146, - "grad_norm": 0.47848039865493774, - "learning_rate": 6.902765976251817e-06, - "loss": 0.3975, - "step": 17821 - }, - { - "epoch": 1.164760473171688, - "grad_norm": 0.4365377426147461, - "learning_rate": 6.902443054999457e-06, - "loss": 0.3437, - "step": 17822 - }, - { - "epoch": 1.1648258283772304, - "grad_norm": 0.46792539954185486, - "learning_rate": 6.902120124468224e-06, - "loss": 0.3529, - "step": 17823 - }, - { - "epoch": 1.1648911835827724, - "grad_norm": 0.4476098120212555, - "learning_rate": 6.901797184659691e-06, - "loss": 0.3185, - "step": 17824 - }, - { - "epoch": 1.1649565387883145, - "grad_norm": 0.49423664808273315, - "learning_rate": 6.901474235575435e-06, - "loss": 0.343, - "step": 17825 - }, - { - "epoch": 1.1650218939938566, - "grad_norm": 0.42680567502975464, - "learning_rate": 6.9011512772170304e-06, - "loss": 0.3039, - "step": 17826 - }, - { - "epoch": 1.1650872491993987, - "grad_norm": 0.47794899344444275, - "learning_rate": 6.900828309586054e-06, - "loss": 0.3699, - "step": 17827 - }, - { - "epoch": 1.165152604404941, - "grad_norm": 0.43290624022483826, - "learning_rate": 6.900505332684078e-06, - "loss": 0.3418, - "step": 17828 - }, - { - "epoch": 1.165217959610483, - "grad_norm": 0.4656946659088135, - "learning_rate": 6.900182346512679e-06, - "loss": 0.3703, - "step": 17829 - }, - { - "epoch": 1.165283314816025, - "grad_norm": 0.5058578848838806, - "learning_rate": 6.899859351073432e-06, - "loss": 0.411, - "step": 17830 - }, - { - "epoch": 1.1653486700215672, - "grad_norm": 0.46284496784210205, - "learning_rate": 6.899536346367914e-06, - "loss": 0.377, - "step": 17831 - }, - { - "epoch": 1.1654140252271095, - "grad_norm": 0.47973090410232544, - "learning_rate": 6.8992133323976985e-06, - "loss": 0.3759, - "step": 17832 - }, - { - "epoch": 1.1654793804326515, - "grad_norm": 0.43616053462028503, - "learning_rate": 6.898890309164362e-06, - "loss": 0.3073, - "step": 17833 - }, - { - "epoch": 1.1655447356381936, - "grad_norm": 0.44696760177612305, - "learning_rate": 6.8985672766694785e-06, - "loss": 0.3433, - "step": 17834 - }, - { - "epoch": 1.1656100908437357, - "grad_norm": 0.44514200091362, - "learning_rate": 6.898244234914626e-06, - "loss": 0.3291, - "step": 17835 - }, - { - "epoch": 1.1656754460492778, - "grad_norm": 0.4564495086669922, - "learning_rate": 6.8979211839013785e-06, - "loss": 0.3639, - "step": 17836 - }, - { - "epoch": 1.1657408012548198, - "grad_norm": 0.4443361163139343, - "learning_rate": 6.8975981236313105e-06, - "loss": 0.3421, - "step": 17837 - }, - { - "epoch": 1.1658061564603621, - "grad_norm": 0.4628848433494568, - "learning_rate": 6.897275054106001e-06, - "loss": 0.3543, - "step": 17838 - }, - { - "epoch": 1.1658715116659042, - "grad_norm": 0.5016350150108337, - "learning_rate": 6.896951975327022e-06, - "loss": 0.4043, - "step": 17839 - }, - { - "epoch": 1.1659368668714463, - "grad_norm": 0.46526437997817993, - "learning_rate": 6.896628887295953e-06, - "loss": 0.3563, - "step": 17840 - }, - { - "epoch": 1.1660022220769883, - "grad_norm": 0.4498814642429352, - "learning_rate": 6.896305790014367e-06, - "loss": 0.3353, - "step": 17841 - }, - { - "epoch": 1.1660675772825306, - "grad_norm": 0.44351500272750854, - "learning_rate": 6.895982683483842e-06, - "loss": 0.3213, - "step": 17842 - }, - { - "epoch": 1.1661329324880727, - "grad_norm": 0.4809412658214569, - "learning_rate": 6.89565956770595e-06, - "loss": 0.3811, - "step": 17843 - }, - { - "epoch": 1.1661982876936148, - "grad_norm": 0.469224750995636, - "learning_rate": 6.895336442682271e-06, - "loss": 0.3414, - "step": 17844 - }, - { - "epoch": 1.1662636428991568, - "grad_norm": 0.4733920097351074, - "learning_rate": 6.895013308414379e-06, - "loss": 0.3381, - "step": 17845 - }, - { - "epoch": 1.166328998104699, - "grad_norm": 0.4229116141796112, - "learning_rate": 6.894690164903851e-06, - "loss": 0.3322, - "step": 17846 - }, - { - "epoch": 1.1663943533102412, - "grad_norm": 0.442176878452301, - "learning_rate": 6.894367012152263e-06, - "loss": 0.3645, - "step": 17847 - }, - { - "epoch": 1.1664597085157833, - "grad_norm": 0.4134009778499603, - "learning_rate": 6.89404385016119e-06, - "loss": 0.3302, - "step": 17848 - }, - { - "epoch": 1.1665250637213254, - "grad_norm": 0.47841525077819824, - "learning_rate": 6.89372067893221e-06, - "loss": 0.3731, - "step": 17849 - }, - { - "epoch": 1.1665904189268674, - "grad_norm": 0.42091986536979675, - "learning_rate": 6.893397498466897e-06, - "loss": 0.2816, - "step": 17850 - }, - { - "epoch": 1.1666557741324097, - "grad_norm": 0.4541703760623932, - "learning_rate": 6.89307430876683e-06, - "loss": 0.3566, - "step": 17851 - }, - { - "epoch": 1.1667211293379518, - "grad_norm": 0.4824099540710449, - "learning_rate": 6.892751109833582e-06, - "loss": 0.4082, - "step": 17852 - }, - { - "epoch": 1.1667864845434939, - "grad_norm": 0.4634397029876709, - "learning_rate": 6.892427901668732e-06, - "loss": 0.4, - "step": 17853 - }, - { - "epoch": 1.166851839749036, - "grad_norm": 0.4780362844467163, - "learning_rate": 6.892104684273856e-06, - "loss": 0.387, - "step": 17854 - }, - { - "epoch": 1.166917194954578, - "grad_norm": 0.4563106596469879, - "learning_rate": 6.8917814576505296e-06, - "loss": 0.3391, - "step": 17855 - }, - { - "epoch": 1.1669825501601203, - "grad_norm": 0.4266104996204376, - "learning_rate": 6.891458221800329e-06, - "loss": 0.308, - "step": 17856 - }, - { - "epoch": 1.1670479053656624, - "grad_norm": 0.46188053488731384, - "learning_rate": 6.891134976724831e-06, - "loss": 0.3884, - "step": 17857 - }, - { - "epoch": 1.1671132605712045, - "grad_norm": 0.45284342765808105, - "learning_rate": 6.890811722425615e-06, - "loss": 0.3515, - "step": 17858 - }, - { - "epoch": 1.1671786157767465, - "grad_norm": 0.46304458379745483, - "learning_rate": 6.890488458904253e-06, - "loss": 0.3471, - "step": 17859 - }, - { - "epoch": 1.1672439709822888, - "grad_norm": 0.5158681869506836, - "learning_rate": 6.890165186162326e-06, - "loss": 0.4391, - "step": 17860 - }, - { - "epoch": 1.167309326187831, - "grad_norm": 0.41425713896751404, - "learning_rate": 6.889841904201405e-06, - "loss": 0.2848, - "step": 17861 - }, - { - "epoch": 1.167374681393373, - "grad_norm": 0.4457348883152008, - "learning_rate": 6.889518613023074e-06, - "loss": 0.3369, - "step": 17862 - }, - { - "epoch": 1.167440036598915, - "grad_norm": 0.43998560309410095, - "learning_rate": 6.889195312628905e-06, - "loss": 0.3146, - "step": 17863 - }, - { - "epoch": 1.1675053918044571, - "grad_norm": 0.47874224185943604, - "learning_rate": 6.888872003020475e-06, - "loss": 0.3399, - "step": 17864 - }, - { - "epoch": 1.1675707470099994, - "grad_norm": 0.4386598765850067, - "learning_rate": 6.888548684199363e-06, - "loss": 0.3483, - "step": 17865 - }, - { - "epoch": 1.1676361022155415, - "grad_norm": 0.48109519481658936, - "learning_rate": 6.888225356167144e-06, - "loss": 0.3436, - "step": 17866 - }, - { - "epoch": 1.1677014574210836, - "grad_norm": 0.47091230750083923, - "learning_rate": 6.887902018925396e-06, - "loss": 0.3733, - "step": 17867 - }, - { - "epoch": 1.1677668126266256, - "grad_norm": 0.43233412504196167, - "learning_rate": 6.887578672475695e-06, - "loss": 0.322, - "step": 17868 - }, - { - "epoch": 1.167832167832168, - "grad_norm": 0.42750242352485657, - "learning_rate": 6.887255316819621e-06, - "loss": 0.3278, - "step": 17869 - }, - { - "epoch": 1.16789752303771, - "grad_norm": 0.4594009816646576, - "learning_rate": 6.886931951958746e-06, - "loss": 0.3568, - "step": 17870 - }, - { - "epoch": 1.167962878243252, - "grad_norm": 0.47360721230506897, - "learning_rate": 6.8866085778946535e-06, - "loss": 0.351, - "step": 17871 - }, - { - "epoch": 1.1680282334487941, - "grad_norm": 0.4647512137889862, - "learning_rate": 6.886285194628914e-06, - "loss": 0.3348, - "step": 17872 - }, - { - "epoch": 1.1680935886543362, - "grad_norm": 0.4592376947402954, - "learning_rate": 6.885961802163111e-06, - "loss": 0.3706, - "step": 17873 - }, - { - "epoch": 1.1681589438598785, - "grad_norm": 0.4431360363960266, - "learning_rate": 6.885638400498819e-06, - "loss": 0.3496, - "step": 17874 - }, - { - "epoch": 1.1682242990654206, - "grad_norm": 0.4597468972206116, - "learning_rate": 6.8853149896376125e-06, - "loss": 0.3638, - "step": 17875 - }, - { - "epoch": 1.1682896542709627, - "grad_norm": 0.4695626497268677, - "learning_rate": 6.884991569581074e-06, - "loss": 0.3899, - "step": 17876 - }, - { - "epoch": 1.1683550094765047, - "grad_norm": 0.4574628174304962, - "learning_rate": 6.884668140330777e-06, - "loss": 0.3438, - "step": 17877 - }, - { - "epoch": 1.168420364682047, - "grad_norm": 0.44924554228782654, - "learning_rate": 6.884344701888303e-06, - "loss": 0.3581, - "step": 17878 - }, - { - "epoch": 1.168485719887589, - "grad_norm": 0.39787736535072327, - "learning_rate": 6.8840212542552245e-06, - "loss": 0.2588, - "step": 17879 - }, - { - "epoch": 1.1685510750931312, - "grad_norm": 0.43013879656791687, - "learning_rate": 6.883697797433123e-06, - "loss": 0.3417, - "step": 17880 - }, - { - "epoch": 1.1686164302986732, - "grad_norm": 0.4616085886955261, - "learning_rate": 6.883374331423574e-06, - "loss": 0.3781, - "step": 17881 - }, - { - "epoch": 1.1686817855042153, - "grad_norm": 0.4305798411369324, - "learning_rate": 6.8830508562281575e-06, - "loss": 0.2914, - "step": 17882 - }, - { - "epoch": 1.1687471407097576, - "grad_norm": 0.4259510934352875, - "learning_rate": 6.882727371848448e-06, - "loss": 0.326, - "step": 17883 - }, - { - "epoch": 1.1688124959152997, - "grad_norm": 0.44786539673805237, - "learning_rate": 6.8824038782860255e-06, - "loss": 0.3315, - "step": 17884 - }, - { - "epoch": 1.1688778511208417, - "grad_norm": 0.44690102338790894, - "learning_rate": 6.882080375542468e-06, - "loss": 0.3348, - "step": 17885 - }, - { - "epoch": 1.1689432063263838, - "grad_norm": 0.4234074652194977, - "learning_rate": 6.881756863619351e-06, - "loss": 0.3269, - "step": 17886 - }, - { - "epoch": 1.1690085615319261, - "grad_norm": 0.46854862570762634, - "learning_rate": 6.8814333425182545e-06, - "loss": 0.3723, - "step": 17887 - }, - { - "epoch": 1.1690739167374682, - "grad_norm": 0.450252890586853, - "learning_rate": 6.881109812240755e-06, - "loss": 0.3484, - "step": 17888 - }, - { - "epoch": 1.1691392719430103, - "grad_norm": 0.4471893608570099, - "learning_rate": 6.880786272788433e-06, - "loss": 0.3578, - "step": 17889 - }, - { - "epoch": 1.1692046271485523, - "grad_norm": 0.471813440322876, - "learning_rate": 6.880462724162863e-06, - "loss": 0.3707, - "step": 17890 - }, - { - "epoch": 1.1692699823540944, - "grad_norm": 0.44897350668907166, - "learning_rate": 6.8801391663656256e-06, - "loss": 0.3486, - "step": 17891 - }, - { - "epoch": 1.1693353375596367, - "grad_norm": 0.4620920717716217, - "learning_rate": 6.879815599398299e-06, - "loss": 0.3596, - "step": 17892 - }, - { - "epoch": 1.1694006927651788, - "grad_norm": 0.44977056980133057, - "learning_rate": 6.8794920232624594e-06, - "loss": 0.3241, - "step": 17893 - }, - { - "epoch": 1.1694660479707208, - "grad_norm": 0.40761831402778625, - "learning_rate": 6.8791684379596865e-06, - "loss": 0.3117, - "step": 17894 - }, - { - "epoch": 1.169531403176263, - "grad_norm": 0.45136305689811707, - "learning_rate": 6.878844843491556e-06, - "loss": 0.3411, - "step": 17895 - }, - { - "epoch": 1.1695967583818052, - "grad_norm": 0.48416197299957275, - "learning_rate": 6.878521239859652e-06, - "loss": 0.3472, - "step": 17896 - }, - { - "epoch": 1.1696621135873473, - "grad_norm": 0.4517776668071747, - "learning_rate": 6.8781976270655456e-06, - "loss": 0.3441, - "step": 17897 - }, - { - "epoch": 1.1697274687928894, - "grad_norm": 0.4336404800415039, - "learning_rate": 6.877874005110822e-06, - "loss": 0.3115, - "step": 17898 - }, - { - "epoch": 1.1697928239984314, - "grad_norm": 0.47181832790374756, - "learning_rate": 6.877550373997054e-06, - "loss": 0.3448, - "step": 17899 - }, - { - "epoch": 1.1698581792039735, - "grad_norm": 0.43525490164756775, - "learning_rate": 6.877226733725824e-06, - "loss": 0.3438, - "step": 17900 - }, - { - "epoch": 1.1699235344095158, - "grad_norm": 0.4715517461299896, - "learning_rate": 6.876903084298709e-06, - "loss": 0.3539, - "step": 17901 - }, - { - "epoch": 1.1699888896150579, - "grad_norm": 0.45197275280952454, - "learning_rate": 6.876579425717285e-06, - "loss": 0.3148, - "step": 17902 - }, - { - "epoch": 1.1700542448206, - "grad_norm": 0.4133222997188568, - "learning_rate": 6.876255757983134e-06, - "loss": 0.2975, - "step": 17903 - }, - { - "epoch": 1.170119600026142, - "grad_norm": 0.4909352660179138, - "learning_rate": 6.875932081097836e-06, - "loss": 0.3903, - "step": 17904 - }, - { - "epoch": 1.1701849552316843, - "grad_norm": 0.4491754472255707, - "learning_rate": 6.875608395062966e-06, - "loss": 0.3523, - "step": 17905 - }, - { - "epoch": 1.1702503104372264, - "grad_norm": 0.47093915939331055, - "learning_rate": 6.8752846998801025e-06, - "loss": 0.3744, - "step": 17906 - }, - { - "epoch": 1.1703156656427685, - "grad_norm": 0.46209442615509033, - "learning_rate": 6.8749609955508275e-06, - "loss": 0.3467, - "step": 17907 - }, - { - "epoch": 1.1703810208483105, - "grad_norm": 0.4198269844055176, - "learning_rate": 6.874637282076717e-06, - "loss": 0.3002, - "step": 17908 - }, - { - "epoch": 1.1704463760538526, - "grad_norm": 0.4138481616973877, - "learning_rate": 6.874313559459352e-06, - "loss": 0.2938, - "step": 17909 - }, - { - "epoch": 1.170511731259395, - "grad_norm": 0.4861946403980255, - "learning_rate": 6.873989827700309e-06, - "loss": 0.3591, - "step": 17910 - }, - { - "epoch": 1.170577086464937, - "grad_norm": 0.4596599340438843, - "learning_rate": 6.87366608680117e-06, - "loss": 0.3359, - "step": 17911 - }, - { - "epoch": 1.170642441670479, - "grad_norm": 0.44050946831703186, - "learning_rate": 6.873342336763513e-06, - "loss": 0.3522, - "step": 17912 - }, - { - "epoch": 1.1707077968760211, - "grad_norm": 0.4483802914619446, - "learning_rate": 6.873018577588915e-06, - "loss": 0.3739, - "step": 17913 - }, - { - "epoch": 1.1707731520815634, - "grad_norm": 0.4418737590312958, - "learning_rate": 6.872694809278957e-06, - "loss": 0.356, - "step": 17914 - }, - { - "epoch": 1.1708385072871055, - "grad_norm": 0.4524979591369629, - "learning_rate": 6.872371031835217e-06, - "loss": 0.3578, - "step": 17915 - }, - { - "epoch": 1.1709038624926476, - "grad_norm": 0.4703204333782196, - "learning_rate": 6.872047245259276e-06, - "loss": 0.361, - "step": 17916 - }, - { - "epoch": 1.1709692176981896, - "grad_norm": 0.45783576369285583, - "learning_rate": 6.871723449552711e-06, - "loss": 0.3662, - "step": 17917 - }, - { - "epoch": 1.1710345729037317, - "grad_norm": 0.42472904920578003, - "learning_rate": 6.871399644717103e-06, - "loss": 0.3266, - "step": 17918 - }, - { - "epoch": 1.171099928109274, - "grad_norm": 0.49030977487564087, - "learning_rate": 6.87107583075403e-06, - "loss": 0.3957, - "step": 17919 - }, - { - "epoch": 1.171165283314816, - "grad_norm": 0.5081116557121277, - "learning_rate": 6.870752007665072e-06, - "loss": 0.3791, - "step": 17920 - }, - { - "epoch": 1.1712306385203581, - "grad_norm": 0.5148254036903381, - "learning_rate": 6.87042817545181e-06, - "loss": 0.3364, - "step": 17921 - }, - { - "epoch": 1.1712959937259002, - "grad_norm": 0.4610372483730316, - "learning_rate": 6.870104334115819e-06, - "loss": 0.3724, - "step": 17922 - }, - { - "epoch": 1.1713613489314425, - "grad_norm": 0.4673974812030792, - "learning_rate": 6.869780483658684e-06, - "loss": 0.3504, - "step": 17923 - }, - { - "epoch": 1.1714267041369846, - "grad_norm": 0.4357917904853821, - "learning_rate": 6.86945662408198e-06, - "loss": 0.3364, - "step": 17924 - }, - { - "epoch": 1.1714920593425266, - "grad_norm": 0.42845776677131653, - "learning_rate": 6.86913275538729e-06, - "loss": 0.313, - "step": 17925 - }, - { - "epoch": 1.1715574145480687, - "grad_norm": 0.424049973487854, - "learning_rate": 6.868808877576191e-06, - "loss": 0.3193, - "step": 17926 - }, - { - "epoch": 1.1716227697536108, - "grad_norm": 0.46391093730926514, - "learning_rate": 6.868484990650264e-06, - "loss": 0.3976, - "step": 17927 - }, - { - "epoch": 1.171688124959153, - "grad_norm": 0.4479975402355194, - "learning_rate": 6.868161094611088e-06, - "loss": 0.3427, - "step": 17928 - }, - { - "epoch": 1.1717534801646952, - "grad_norm": 0.44597792625427246, - "learning_rate": 6.867837189460244e-06, - "loss": 0.3146, - "step": 17929 - }, - { - "epoch": 1.1718188353702372, - "grad_norm": 0.4174831211566925, - "learning_rate": 6.86751327519931e-06, - "loss": 0.2994, - "step": 17930 - }, - { - "epoch": 1.1718841905757793, - "grad_norm": 0.4617291986942291, - "learning_rate": 6.867189351829866e-06, - "loss": 0.3786, - "step": 17931 - }, - { - "epoch": 1.1719495457813216, - "grad_norm": 0.43605837225914, - "learning_rate": 6.866865419353494e-06, - "loss": 0.3054, - "step": 17932 - }, - { - "epoch": 1.1720149009868637, - "grad_norm": 0.46060532331466675, - "learning_rate": 6.866541477771772e-06, - "loss": 0.3479, - "step": 17933 - }, - { - "epoch": 1.1720802561924057, - "grad_norm": 0.45015233755111694, - "learning_rate": 6.866217527086281e-06, - "loss": 0.3334, - "step": 17934 - }, - { - "epoch": 1.1721456113979478, - "grad_norm": 0.43359237909317017, - "learning_rate": 6.8658935672986e-06, - "loss": 0.3339, - "step": 17935 - }, - { - "epoch": 1.17221096660349, - "grad_norm": 0.4737197756767273, - "learning_rate": 6.865569598410311e-06, - "loss": 0.362, - "step": 17936 - }, - { - "epoch": 1.1722763218090322, - "grad_norm": 0.44698429107666016, - "learning_rate": 6.865245620422991e-06, - "loss": 0.3612, - "step": 17937 - }, - { - "epoch": 1.1723416770145743, - "grad_norm": 0.4733181893825531, - "learning_rate": 6.864921633338224e-06, - "loss": 0.3675, - "step": 17938 - }, - { - "epoch": 1.1724070322201163, - "grad_norm": 0.44927340745925903, - "learning_rate": 6.864597637157586e-06, - "loss": 0.3319, - "step": 17939 - }, - { - "epoch": 1.1724723874256584, - "grad_norm": 0.45221009850502014, - "learning_rate": 6.864273631882661e-06, - "loss": 0.3829, - "step": 17940 - }, - { - "epoch": 1.1725377426312007, - "grad_norm": 0.446685791015625, - "learning_rate": 6.863949617515027e-06, - "loss": 0.316, - "step": 17941 - }, - { - "epoch": 1.1726030978367428, - "grad_norm": 0.4642782211303711, - "learning_rate": 6.863625594056264e-06, - "loss": 0.378, - "step": 17942 - }, - { - "epoch": 1.1726684530422848, - "grad_norm": 0.49931108951568604, - "learning_rate": 6.8633015615079555e-06, - "loss": 0.3358, - "step": 17943 - }, - { - "epoch": 1.172733808247827, - "grad_norm": 0.4535532295703888, - "learning_rate": 6.862977519871678e-06, - "loss": 0.3257, - "step": 17944 - }, - { - "epoch": 1.172799163453369, - "grad_norm": 0.45970287919044495, - "learning_rate": 6.862653469149014e-06, - "loss": 0.3246, - "step": 17945 - }, - { - "epoch": 1.172864518658911, - "grad_norm": 0.509086012840271, - "learning_rate": 6.862329409341545e-06, - "loss": 0.3155, - "step": 17946 - }, - { - "epoch": 1.1729298738644534, - "grad_norm": 0.4492672085762024, - "learning_rate": 6.86200534045085e-06, - "loss": 0.3446, - "step": 17947 - }, - { - "epoch": 1.1729952290699954, - "grad_norm": 0.48400983214378357, - "learning_rate": 6.861681262478508e-06, - "loss": 0.4013, - "step": 17948 - }, - { - "epoch": 1.1730605842755375, - "grad_norm": 0.43667343258857727, - "learning_rate": 6.8613571754261036e-06, - "loss": 0.3211, - "step": 17949 - }, - { - "epoch": 1.1731259394810796, - "grad_norm": 0.46238502860069275, - "learning_rate": 6.861033079295215e-06, - "loss": 0.3235, - "step": 17950 - }, - { - "epoch": 1.1731912946866219, - "grad_norm": 0.4288751482963562, - "learning_rate": 6.860708974087422e-06, - "loss": 0.3117, - "step": 17951 - }, - { - "epoch": 1.173256649892164, - "grad_norm": 0.43703600764274597, - "learning_rate": 6.860384859804308e-06, - "loss": 0.3282, - "step": 17952 - }, - { - "epoch": 1.173322005097706, - "grad_norm": 0.43666911125183105, - "learning_rate": 6.860060736447452e-06, - "loss": 0.3464, - "step": 17953 - }, - { - "epoch": 1.173387360303248, - "grad_norm": 0.467313677072525, - "learning_rate": 6.8597366040184365e-06, - "loss": 0.3535, - "step": 17954 - }, - { - "epoch": 1.1734527155087902, - "grad_norm": 0.4325222074985504, - "learning_rate": 6.8594124625188395e-06, - "loss": 0.2938, - "step": 17955 - }, - { - "epoch": 1.1735180707143325, - "grad_norm": 0.45261213183403015, - "learning_rate": 6.859088311950245e-06, - "loss": 0.3602, - "step": 17956 - }, - { - "epoch": 1.1735834259198745, - "grad_norm": 0.48950883746147156, - "learning_rate": 6.858764152314234e-06, - "loss": 0.3792, - "step": 17957 - }, - { - "epoch": 1.1736487811254166, - "grad_norm": 0.4489250183105469, - "learning_rate": 6.858439983612384e-06, - "loss": 0.3597, - "step": 17958 - }, - { - "epoch": 1.1737141363309587, - "grad_norm": 0.40376684069633484, - "learning_rate": 6.858115805846279e-06, - "loss": 0.2981, - "step": 17959 - }, - { - "epoch": 1.173779491536501, - "grad_norm": 0.4631209373474121, - "learning_rate": 6.857791619017499e-06, - "loss": 0.39, - "step": 17960 - }, - { - "epoch": 1.173844846742043, - "grad_norm": 0.4509110450744629, - "learning_rate": 6.857467423127626e-06, - "loss": 0.3629, - "step": 17961 - }, - { - "epoch": 1.173910201947585, - "grad_norm": 0.4790554642677307, - "learning_rate": 6.857143218178242e-06, - "loss": 0.3856, - "step": 17962 - }, - { - "epoch": 1.1739755571531272, - "grad_norm": 0.4161204397678375, - "learning_rate": 6.856819004170926e-06, - "loss": 0.3239, - "step": 17963 - }, - { - "epoch": 1.1740409123586693, - "grad_norm": 0.4591652750968933, - "learning_rate": 6.8564947811072606e-06, - "loss": 0.3707, - "step": 17964 - }, - { - "epoch": 1.1741062675642115, - "grad_norm": 0.44288069009780884, - "learning_rate": 6.856170548988827e-06, - "loss": 0.3542, - "step": 17965 - }, - { - "epoch": 1.1741716227697536, - "grad_norm": 0.42953959107398987, - "learning_rate": 6.855846307817206e-06, - "loss": 0.3028, - "step": 17966 - }, - { - "epoch": 1.1742369779752957, - "grad_norm": 0.45490196347236633, - "learning_rate": 6.8555220575939816e-06, - "loss": 0.3233, - "step": 17967 - }, - { - "epoch": 1.1743023331808378, - "grad_norm": 0.4385647475719452, - "learning_rate": 6.8551977983207314e-06, - "loss": 0.3477, - "step": 17968 - }, - { - "epoch": 1.17436768838638, - "grad_norm": 0.41461601853370667, - "learning_rate": 6.85487352999904e-06, - "loss": 0.3006, - "step": 17969 - }, - { - "epoch": 1.1744330435919221, - "grad_norm": 0.42973747849464417, - "learning_rate": 6.854549252630488e-06, - "loss": 0.3175, - "step": 17970 - }, - { - "epoch": 1.1744983987974642, - "grad_norm": 0.501099169254303, - "learning_rate": 6.854224966216656e-06, - "loss": 0.4013, - "step": 17971 - }, - { - "epoch": 1.1745637540030063, - "grad_norm": 0.4343818724155426, - "learning_rate": 6.853900670759127e-06, - "loss": 0.3505, - "step": 17972 - }, - { - "epoch": 1.1746291092085483, - "grad_norm": 0.4484075605869293, - "learning_rate": 6.853576366259481e-06, - "loss": 0.331, - "step": 17973 - }, - { - "epoch": 1.1746944644140906, - "grad_norm": 0.47266674041748047, - "learning_rate": 6.853252052719302e-06, - "loss": 0.3887, - "step": 17974 - }, - { - "epoch": 1.1747598196196327, - "grad_norm": 0.45336630940437317, - "learning_rate": 6.852927730140171e-06, - "loss": 0.3515, - "step": 17975 - }, - { - "epoch": 1.1748251748251748, - "grad_norm": 0.42022061347961426, - "learning_rate": 6.852603398523668e-06, - "loss": 0.3281, - "step": 17976 - }, - { - "epoch": 1.1748905300307169, - "grad_norm": 0.41096192598342896, - "learning_rate": 6.8522790578713785e-06, - "loss": 0.3085, - "step": 17977 - }, - { - "epoch": 1.1749558852362592, - "grad_norm": 0.4493367373943329, - "learning_rate": 6.8519547081848804e-06, - "loss": 0.3501, - "step": 17978 - }, - { - "epoch": 1.1750212404418012, - "grad_norm": 0.4425660967826843, - "learning_rate": 6.8516303494657585e-06, - "loss": 0.3773, - "step": 17979 - }, - { - "epoch": 1.1750865956473433, - "grad_norm": 0.4296548366546631, - "learning_rate": 6.851305981715595e-06, - "loss": 0.3014, - "step": 17980 - }, - { - "epoch": 1.1751519508528854, - "grad_norm": 0.43769317865371704, - "learning_rate": 6.850981604935969e-06, - "loss": 0.3179, - "step": 17981 - }, - { - "epoch": 1.1752173060584274, - "grad_norm": 0.40472865104675293, - "learning_rate": 6.850657219128465e-06, - "loss": 0.2798, - "step": 17982 - }, - { - "epoch": 1.1752826612639697, - "grad_norm": 0.4564395546913147, - "learning_rate": 6.850332824294666e-06, - "loss": 0.3953, - "step": 17983 - }, - { - "epoch": 1.1753480164695118, - "grad_norm": 0.5194770693778992, - "learning_rate": 6.850008420436152e-06, - "loss": 0.3887, - "step": 17984 - }, - { - "epoch": 1.1754133716750539, - "grad_norm": 0.4259186387062073, - "learning_rate": 6.849684007554505e-06, - "loss": 0.3249, - "step": 17985 - }, - { - "epoch": 1.175478726880596, - "grad_norm": 0.45520758628845215, - "learning_rate": 6.8493595856513085e-06, - "loss": 0.3679, - "step": 17986 - }, - { - "epoch": 1.1755440820861383, - "grad_norm": 0.4616486132144928, - "learning_rate": 6.849035154728145e-06, - "loss": 0.3656, - "step": 17987 - }, - { - "epoch": 1.1756094372916803, - "grad_norm": 0.4529689848423004, - "learning_rate": 6.848710714786597e-06, - "loss": 0.3262, - "step": 17988 - }, - { - "epoch": 1.1756747924972224, - "grad_norm": 0.4659520089626312, - "learning_rate": 6.848386265828247e-06, - "loss": 0.3768, - "step": 17989 - }, - { - "epoch": 1.1757401477027645, - "grad_norm": 0.4518166780471802, - "learning_rate": 6.8480618078546755e-06, - "loss": 0.3296, - "step": 17990 - }, - { - "epoch": 1.1758055029083065, - "grad_norm": 0.42663031816482544, - "learning_rate": 6.847737340867466e-06, - "loss": 0.3271, - "step": 17991 - }, - { - "epoch": 1.1758708581138488, - "grad_norm": 0.44441017508506775, - "learning_rate": 6.847412864868203e-06, - "loss": 0.3583, - "step": 17992 - }, - { - "epoch": 1.175936213319391, - "grad_norm": 0.4520832300186157, - "learning_rate": 6.847088379858466e-06, - "loss": 0.3435, - "step": 17993 - }, - { - "epoch": 1.176001568524933, - "grad_norm": 0.4381174147129059, - "learning_rate": 6.846763885839839e-06, - "loss": 0.3236, - "step": 17994 - }, - { - "epoch": 1.176066923730475, - "grad_norm": 0.46784117817878723, - "learning_rate": 6.846439382813906e-06, - "loss": 0.398, - "step": 17995 - }, - { - "epoch": 1.1761322789360174, - "grad_norm": 0.4486067295074463, - "learning_rate": 6.846114870782248e-06, - "loss": 0.339, - "step": 17996 - }, - { - "epoch": 1.1761976341415594, - "grad_norm": 0.4041271209716797, - "learning_rate": 6.845790349746447e-06, - "loss": 0.2812, - "step": 17997 - }, - { - "epoch": 1.1762629893471015, - "grad_norm": 0.4424710273742676, - "learning_rate": 6.845465819708088e-06, - "loss": 0.3598, - "step": 17998 - }, - { - "epoch": 1.1763283445526436, - "grad_norm": 0.42119044065475464, - "learning_rate": 6.845141280668753e-06, - "loss": 0.2893, - "step": 17999 - }, - { - "epoch": 1.1763936997581856, - "grad_norm": 0.4513300955295563, - "learning_rate": 6.844816732630024e-06, - "loss": 0.3302, - "step": 18000 - }, - { - "epoch": 1.176459054963728, - "grad_norm": 0.4212856888771057, - "learning_rate": 6.844492175593486e-06, - "loss": 0.3114, - "step": 18001 - }, - { - "epoch": 1.17652441016927, - "grad_norm": 0.46957045793533325, - "learning_rate": 6.844167609560719e-06, - "loss": 0.3836, - "step": 18002 - }, - { - "epoch": 1.176589765374812, - "grad_norm": 0.4354605972766876, - "learning_rate": 6.843843034533309e-06, - "loss": 0.3219, - "step": 18003 - }, - { - "epoch": 1.1766551205803542, - "grad_norm": 0.47783634066581726, - "learning_rate": 6.843518450512838e-06, - "loss": 0.3954, - "step": 18004 - }, - { - "epoch": 1.1767204757858964, - "grad_norm": 0.4695514738559723, - "learning_rate": 6.843193857500888e-06, - "loss": 0.3413, - "step": 18005 - }, - { - "epoch": 1.1767858309914385, - "grad_norm": 0.41028517484664917, - "learning_rate": 6.842869255499044e-06, - "loss": 0.2983, - "step": 18006 - }, - { - "epoch": 1.1768511861969806, - "grad_norm": 0.4204944670200348, - "learning_rate": 6.842544644508886e-06, - "loss": 0.2998, - "step": 18007 - }, - { - "epoch": 1.1769165414025227, - "grad_norm": 0.4137457013130188, - "learning_rate": 6.842220024532003e-06, - "loss": 0.295, - "step": 18008 - }, - { - "epoch": 1.1769818966080647, - "grad_norm": 0.4356216490268707, - "learning_rate": 6.841895395569972e-06, - "loss": 0.2984, - "step": 18009 - }, - { - "epoch": 1.177047251813607, - "grad_norm": 0.4260989725589752, - "learning_rate": 6.8415707576243806e-06, - "loss": 0.3023, - "step": 18010 - }, - { - "epoch": 1.177112607019149, - "grad_norm": 0.4351992607116699, - "learning_rate": 6.841246110696809e-06, - "loss": 0.3071, - "step": 18011 - }, - { - "epoch": 1.1771779622246912, - "grad_norm": 0.44766145944595337, - "learning_rate": 6.840921454788844e-06, - "loss": 0.3562, - "step": 18012 - }, - { - "epoch": 1.1772433174302332, - "grad_norm": 0.43967947363853455, - "learning_rate": 6.840596789902065e-06, - "loss": 0.3328, - "step": 18013 - }, - { - "epoch": 1.1773086726357755, - "grad_norm": 0.46116381883621216, - "learning_rate": 6.84027211603806e-06, - "loss": 0.374, - "step": 18014 - }, - { - "epoch": 1.1773740278413176, - "grad_norm": 0.41465020179748535, - "learning_rate": 6.83994743319841e-06, - "loss": 0.3381, - "step": 18015 - }, - { - "epoch": 1.1774393830468597, - "grad_norm": 0.47038406133651733, - "learning_rate": 6.839622741384697e-06, - "loss": 0.3456, - "step": 18016 - }, - { - "epoch": 1.1775047382524018, - "grad_norm": 0.45181000232696533, - "learning_rate": 6.839298040598509e-06, - "loss": 0.371, - "step": 18017 - }, - { - "epoch": 1.1775700934579438, - "grad_norm": 0.4977002739906311, - "learning_rate": 6.838973330841425e-06, - "loss": 0.4463, - "step": 18018 - }, - { - "epoch": 1.1776354486634861, - "grad_norm": 0.4479779601097107, - "learning_rate": 6.838648612115033e-06, - "loss": 0.359, - "step": 18019 - }, - { - "epoch": 1.1777008038690282, - "grad_norm": 0.4374707341194153, - "learning_rate": 6.8383238844209144e-06, - "loss": 0.3255, - "step": 18020 - }, - { - "epoch": 1.1777661590745703, - "grad_norm": 0.4624062180519104, - "learning_rate": 6.837999147760653e-06, - "loss": 0.3627, - "step": 18021 - }, - { - "epoch": 1.1778315142801123, - "grad_norm": 0.4592170715332031, - "learning_rate": 6.837674402135832e-06, - "loss": 0.3425, - "step": 18022 - }, - { - "epoch": 1.1778968694856546, - "grad_norm": 0.4088749885559082, - "learning_rate": 6.837349647548039e-06, - "loss": 0.2792, - "step": 18023 - }, - { - "epoch": 1.1779622246911967, - "grad_norm": 0.481981486082077, - "learning_rate": 6.837024883998853e-06, - "loss": 0.382, - "step": 18024 - }, - { - "epoch": 1.1780275798967388, - "grad_norm": 0.46223366260528564, - "learning_rate": 6.8367001114898605e-06, - "loss": 0.3844, - "step": 18025 - }, - { - "epoch": 1.1780929351022809, - "grad_norm": 0.4582575857639313, - "learning_rate": 6.836375330022646e-06, - "loss": 0.3676, - "step": 18026 - }, - { - "epoch": 1.178158290307823, - "grad_norm": 0.47594618797302246, - "learning_rate": 6.836050539598792e-06, - "loss": 0.3668, - "step": 18027 - }, - { - "epoch": 1.1782236455133652, - "grad_norm": 0.45693692564964294, - "learning_rate": 6.835725740219884e-06, - "loss": 0.3543, - "step": 18028 - }, - { - "epoch": 1.1782890007189073, - "grad_norm": 0.4364302456378937, - "learning_rate": 6.835400931887505e-06, - "loss": 0.3492, - "step": 18029 - }, - { - "epoch": 1.1783543559244494, - "grad_norm": 0.5103545188903809, - "learning_rate": 6.835076114603242e-06, - "loss": 0.3329, - "step": 18030 - }, - { - "epoch": 1.1784197111299914, - "grad_norm": 0.42652902007102966, - "learning_rate": 6.834751288368674e-06, - "loss": 0.3129, - "step": 18031 - }, - { - "epoch": 1.1784850663355337, - "grad_norm": 0.4542108178138733, - "learning_rate": 6.8344264531853896e-06, - "loss": 0.3731, - "step": 18032 - }, - { - "epoch": 1.1785504215410758, - "grad_norm": 0.5060190558433533, - "learning_rate": 6.834101609054973e-06, - "loss": 0.3602, - "step": 18033 - }, - { - "epoch": 1.1786157767466179, - "grad_norm": 0.45472753047943115, - "learning_rate": 6.833776755979006e-06, - "loss": 0.3628, - "step": 18034 - }, - { - "epoch": 1.17868113195216, - "grad_norm": 0.4651648998260498, - "learning_rate": 6.833451893959076e-06, - "loss": 0.3496, - "step": 18035 - }, - { - "epoch": 1.178746487157702, - "grad_norm": 0.4575902223587036, - "learning_rate": 6.833127022996764e-06, - "loss": 0.3791, - "step": 18036 - }, - { - "epoch": 1.1788118423632443, - "grad_norm": 0.4694966971874237, - "learning_rate": 6.832802143093657e-06, - "loss": 0.3645, - "step": 18037 - }, - { - "epoch": 1.1788771975687864, - "grad_norm": 0.4156460464000702, - "learning_rate": 6.832477254251339e-06, - "loss": 0.2651, - "step": 18038 - }, - { - "epoch": 1.1789425527743285, - "grad_norm": 0.46168753504753113, - "learning_rate": 6.832152356471396e-06, - "loss": 0.3498, - "step": 18039 - }, - { - "epoch": 1.1790079079798705, - "grad_norm": 0.5003201365470886, - "learning_rate": 6.831827449755408e-06, - "loss": 0.4062, - "step": 18040 - }, - { - "epoch": 1.1790732631854128, - "grad_norm": 0.4539341330528259, - "learning_rate": 6.831502534104966e-06, - "loss": 0.3593, - "step": 18041 - }, - { - "epoch": 1.179138618390955, - "grad_norm": 0.4290333092212677, - "learning_rate": 6.831177609521651e-06, - "loss": 0.3138, - "step": 18042 - }, - { - "epoch": 1.179203973596497, - "grad_norm": 0.4450630247592926, - "learning_rate": 6.830852676007048e-06, - "loss": 0.3692, - "step": 18043 - }, - { - "epoch": 1.179269328802039, - "grad_norm": 0.4453296363353729, - "learning_rate": 6.830527733562743e-06, - "loss": 0.3635, - "step": 18044 - }, - { - "epoch": 1.1793346840075811, - "grad_norm": 0.44329598546028137, - "learning_rate": 6.8302027821903185e-06, - "loss": 0.3469, - "step": 18045 - }, - { - "epoch": 1.1794000392131234, - "grad_norm": 0.4198266863822937, - "learning_rate": 6.829877821891362e-06, - "loss": 0.3051, - "step": 18046 - }, - { - "epoch": 1.1794653944186655, - "grad_norm": 0.440769761800766, - "learning_rate": 6.829552852667457e-06, - "loss": 0.3198, - "step": 18047 - }, - { - "epoch": 1.1795307496242076, - "grad_norm": 0.4311501383781433, - "learning_rate": 6.82922787452019e-06, - "loss": 0.2923, - "step": 18048 - }, - { - "epoch": 1.1795961048297496, - "grad_norm": 0.42883557081222534, - "learning_rate": 6.828902887451143e-06, - "loss": 0.3423, - "step": 18049 - }, - { - "epoch": 1.179661460035292, - "grad_norm": 0.409929096698761, - "learning_rate": 6.828577891461905e-06, - "loss": 0.2918, - "step": 18050 - }, - { - "epoch": 1.179726815240834, - "grad_norm": 0.4326501786708832, - "learning_rate": 6.8282528865540585e-06, - "loss": 0.328, - "step": 18051 - }, - { - "epoch": 1.179792170446376, - "grad_norm": 0.44737958908081055, - "learning_rate": 6.82792787272919e-06, - "loss": 0.3356, - "step": 18052 - }, - { - "epoch": 1.1798575256519181, - "grad_norm": 0.4553202986717224, - "learning_rate": 6.827602849988883e-06, - "loss": 0.3593, - "step": 18053 - }, - { - "epoch": 1.1799228808574602, - "grad_norm": 0.4543243646621704, - "learning_rate": 6.827277818334724e-06, - "loss": 0.3824, - "step": 18054 - }, - { - "epoch": 1.1799882360630023, - "grad_norm": 0.4133303463459015, - "learning_rate": 6.826952777768299e-06, - "loss": 0.2872, - "step": 18055 - }, - { - "epoch": 1.1800535912685446, - "grad_norm": 0.43816137313842773, - "learning_rate": 6.826627728291191e-06, - "loss": 0.3177, - "step": 18056 - }, - { - "epoch": 1.1801189464740867, - "grad_norm": 0.41716015338897705, - "learning_rate": 6.826302669904987e-06, - "loss": 0.312, - "step": 18057 - }, - { - "epoch": 1.1801843016796287, - "grad_norm": 0.4646686911582947, - "learning_rate": 6.825977602611271e-06, - "loss": 0.3586, - "step": 18058 - }, - { - "epoch": 1.180249656885171, - "grad_norm": 0.4383331835269928, - "learning_rate": 6.825652526411632e-06, - "loss": 0.3362, - "step": 18059 - }, - { - "epoch": 1.180315012090713, - "grad_norm": 0.4383421838283539, - "learning_rate": 6.825327441307652e-06, - "loss": 0.3199, - "step": 18060 - }, - { - "epoch": 1.1803803672962552, - "grad_norm": 0.45525622367858887, - "learning_rate": 6.825002347300919e-06, - "loss": 0.3394, - "step": 18061 - }, - { - "epoch": 1.1804457225017972, - "grad_norm": 0.4475131034851074, - "learning_rate": 6.824677244393017e-06, - "loss": 0.322, - "step": 18062 - }, - { - "epoch": 1.1805110777073393, - "grad_norm": 0.44492107629776, - "learning_rate": 6.82435213258553e-06, - "loss": 0.3685, - "step": 18063 - }, - { - "epoch": 1.1805764329128814, - "grad_norm": 0.44649577140808105, - "learning_rate": 6.824027011880047e-06, - "loss": 0.3119, - "step": 18064 - }, - { - "epoch": 1.1806417881184237, - "grad_norm": 0.44526177644729614, - "learning_rate": 6.823701882278151e-06, - "loss": 0.3457, - "step": 18065 - }, - { - "epoch": 1.1807071433239658, - "grad_norm": 0.4460541009902954, - "learning_rate": 6.823376743781432e-06, - "loss": 0.3343, - "step": 18066 - }, - { - "epoch": 1.1807724985295078, - "grad_norm": 0.4453355073928833, - "learning_rate": 6.82305159639147e-06, - "loss": 0.3244, - "step": 18067 - }, - { - "epoch": 1.18083785373505, - "grad_norm": 0.49602919816970825, - "learning_rate": 6.822726440109854e-06, - "loss": 0.4142, - "step": 18068 - }, - { - "epoch": 1.1809032089405922, - "grad_norm": 0.44526922702789307, - "learning_rate": 6.8224012749381714e-06, - "loss": 0.3234, - "step": 18069 - }, - { - "epoch": 1.1809685641461343, - "grad_norm": 0.4330725371837616, - "learning_rate": 6.822076100878006e-06, - "loss": 0.3019, - "step": 18070 - }, - { - "epoch": 1.1810339193516763, - "grad_norm": 0.4495363235473633, - "learning_rate": 6.821750917930945e-06, - "loss": 0.3354, - "step": 18071 - }, - { - "epoch": 1.1810992745572184, - "grad_norm": 0.4604150056838989, - "learning_rate": 6.821425726098572e-06, - "loss": 0.3837, - "step": 18072 - }, - { - "epoch": 1.1811646297627605, - "grad_norm": 0.44327229261398315, - "learning_rate": 6.821100525382476e-06, - "loss": 0.3367, - "step": 18073 - }, - { - "epoch": 1.1812299849683028, - "grad_norm": 0.4184994697570801, - "learning_rate": 6.82077531578424e-06, - "loss": 0.3098, - "step": 18074 - }, - { - "epoch": 1.1812953401738449, - "grad_norm": 0.4676985740661621, - "learning_rate": 6.820450097305454e-06, - "loss": 0.3401, - "step": 18075 - }, - { - "epoch": 1.181360695379387, - "grad_norm": 0.47087225317955017, - "learning_rate": 6.820124869947702e-06, - "loss": 0.3685, - "step": 18076 - }, - { - "epoch": 1.181426050584929, - "grad_norm": 0.42198848724365234, - "learning_rate": 6.819799633712569e-06, - "loss": 0.3365, - "step": 18077 - }, - { - "epoch": 1.1814914057904713, - "grad_norm": 0.45783135294914246, - "learning_rate": 6.819474388601644e-06, - "loss": 0.3503, - "step": 18078 - }, - { - "epoch": 1.1815567609960134, - "grad_norm": 0.44989100098609924, - "learning_rate": 6.8191491346165114e-06, - "loss": 0.3586, - "step": 18079 - }, - { - "epoch": 1.1816221162015554, - "grad_norm": 0.4347403645515442, - "learning_rate": 6.81882387175876e-06, - "loss": 0.335, - "step": 18080 - }, - { - "epoch": 1.1816874714070975, - "grad_norm": 0.48094576597213745, - "learning_rate": 6.818498600029972e-06, - "loss": 0.3587, - "step": 18081 - }, - { - "epoch": 1.1817528266126396, - "grad_norm": 0.4099532663822174, - "learning_rate": 6.818173319431738e-06, - "loss": 0.3026, - "step": 18082 - }, - { - "epoch": 1.1818181818181819, - "grad_norm": 0.47157734632492065, - "learning_rate": 6.817848029965641e-06, - "loss": 0.3631, - "step": 18083 - }, - { - "epoch": 1.181883537023724, - "grad_norm": 0.4975356161594391, - "learning_rate": 6.817522731633271e-06, - "loss": 0.3912, - "step": 18084 - }, - { - "epoch": 1.181948892229266, - "grad_norm": 0.4343315362930298, - "learning_rate": 6.817197424436212e-06, - "loss": 0.321, - "step": 18085 - }, - { - "epoch": 1.182014247434808, - "grad_norm": 0.46990543603897095, - "learning_rate": 6.816872108376054e-06, - "loss": 0.37, - "step": 18086 - }, - { - "epoch": 1.1820796026403504, - "grad_norm": 0.4753810465335846, - "learning_rate": 6.816546783454379e-06, - "loss": 0.39, - "step": 18087 - }, - { - "epoch": 1.1821449578458925, - "grad_norm": 0.45715683698654175, - "learning_rate": 6.8162214496727765e-06, - "loss": 0.3603, - "step": 18088 - }, - { - "epoch": 1.1822103130514345, - "grad_norm": 0.45637282729148865, - "learning_rate": 6.815896107032833e-06, - "loss": 0.3615, - "step": 18089 - }, - { - "epoch": 1.1822756682569766, - "grad_norm": 0.4541541635990143, - "learning_rate": 6.815570755536134e-06, - "loss": 0.3329, - "step": 18090 - }, - { - "epoch": 1.1823410234625187, - "grad_norm": 0.460896760225296, - "learning_rate": 6.815245395184269e-06, - "loss": 0.3797, - "step": 18091 - }, - { - "epoch": 1.182406378668061, - "grad_norm": 0.41321203112602234, - "learning_rate": 6.814920025978822e-06, - "loss": 0.2847, - "step": 18092 - }, - { - "epoch": 1.182471733873603, - "grad_norm": 0.44384047389030457, - "learning_rate": 6.814594647921384e-06, - "loss": 0.322, - "step": 18093 - }, - { - "epoch": 1.1825370890791451, - "grad_norm": 0.5093187689781189, - "learning_rate": 6.814269261013537e-06, - "loss": 0.407, - "step": 18094 - }, - { - "epoch": 1.1826024442846872, - "grad_norm": 0.502673327922821, - "learning_rate": 6.81394386525687e-06, - "loss": 0.4091, - "step": 18095 - }, - { - "epoch": 1.1826677994902295, - "grad_norm": 0.4660140573978424, - "learning_rate": 6.813618460652971e-06, - "loss": 0.3839, - "step": 18096 - }, - { - "epoch": 1.1827331546957716, - "grad_norm": 0.44856759905815125, - "learning_rate": 6.813293047203426e-06, - "loss": 0.3727, - "step": 18097 - }, - { - "epoch": 1.1827985099013136, - "grad_norm": 0.4533741772174835, - "learning_rate": 6.812967624909823e-06, - "loss": 0.3414, - "step": 18098 - }, - { - "epoch": 1.1828638651068557, - "grad_norm": 0.47776374220848083, - "learning_rate": 6.81264219377375e-06, - "loss": 0.383, - "step": 18099 - }, - { - "epoch": 1.1829292203123978, - "grad_norm": 0.4365442991256714, - "learning_rate": 6.812316753796791e-06, - "loss": 0.3531, - "step": 18100 - }, - { - "epoch": 1.18299457551794, - "grad_norm": 0.48415282368659973, - "learning_rate": 6.811991304980536e-06, - "loss": 0.3852, - "step": 18101 - }, - { - "epoch": 1.1830599307234821, - "grad_norm": 0.45601290464401245, - "learning_rate": 6.8116658473265725e-06, - "loss": 0.3581, - "step": 18102 - }, - { - "epoch": 1.1831252859290242, - "grad_norm": 0.43471112847328186, - "learning_rate": 6.811340380836486e-06, - "loss": 0.3346, - "step": 18103 - }, - { - "epoch": 1.1831906411345663, - "grad_norm": 0.4638686776161194, - "learning_rate": 6.811014905511866e-06, - "loss": 0.3616, - "step": 18104 - }, - { - "epoch": 1.1832559963401086, - "grad_norm": 0.46512529253959656, - "learning_rate": 6.810689421354297e-06, - "loss": 0.3722, - "step": 18105 - }, - { - "epoch": 1.1833213515456507, - "grad_norm": 0.4197812080383301, - "learning_rate": 6.810363928365371e-06, - "loss": 0.3259, - "step": 18106 - }, - { - "epoch": 1.1833867067511927, - "grad_norm": 0.4628385603427887, - "learning_rate": 6.810038426546672e-06, - "loss": 0.3653, - "step": 18107 - }, - { - "epoch": 1.1834520619567348, - "grad_norm": 0.43266236782073975, - "learning_rate": 6.809712915899788e-06, - "loss": 0.2942, - "step": 18108 - }, - { - "epoch": 1.1835174171622769, - "grad_norm": 0.4563673734664917, - "learning_rate": 6.809387396426308e-06, - "loss": 0.3504, - "step": 18109 - }, - { - "epoch": 1.1835827723678192, - "grad_norm": 0.4399195611476898, - "learning_rate": 6.809061868127817e-06, - "loss": 0.3247, - "step": 18110 - }, - { - "epoch": 1.1836481275733612, - "grad_norm": 0.45540115237236023, - "learning_rate": 6.8087363310059075e-06, - "loss": 0.3551, - "step": 18111 - }, - { - "epoch": 1.1837134827789033, - "grad_norm": 0.45073649287223816, - "learning_rate": 6.808410785062161e-06, - "loss": 0.3349, - "step": 18112 - }, - { - "epoch": 1.1837788379844454, - "grad_norm": 0.4552570581436157, - "learning_rate": 6.808085230298172e-06, - "loss": 0.3421, - "step": 18113 - }, - { - "epoch": 1.1838441931899877, - "grad_norm": 0.4731093943119049, - "learning_rate": 6.807759666715522e-06, - "loss": 0.3883, - "step": 18114 - }, - { - "epoch": 1.1839095483955298, - "grad_norm": 0.44675278663635254, - "learning_rate": 6.807434094315803e-06, - "loss": 0.3526, - "step": 18115 - }, - { - "epoch": 1.1839749036010718, - "grad_norm": 0.47140228748321533, - "learning_rate": 6.807108513100602e-06, - "loss": 0.3963, - "step": 18116 - }, - { - "epoch": 1.184040258806614, - "grad_norm": 0.4452797472476959, - "learning_rate": 6.806782923071506e-06, - "loss": 0.3123, - "step": 18117 - }, - { - "epoch": 1.184105614012156, - "grad_norm": 0.4607033133506775, - "learning_rate": 6.8064573242301056e-06, - "loss": 0.3494, - "step": 18118 - }, - { - "epoch": 1.1841709692176983, - "grad_norm": 0.43921059370040894, - "learning_rate": 6.806131716577985e-06, - "loss": 0.3067, - "step": 18119 - }, - { - "epoch": 1.1842363244232403, - "grad_norm": 0.46310117840766907, - "learning_rate": 6.805806100116735e-06, - "loss": 0.3681, - "step": 18120 - }, - { - "epoch": 1.1843016796287824, - "grad_norm": 0.4808109402656555, - "learning_rate": 6.805480474847943e-06, - "loss": 0.3667, - "step": 18121 - }, - { - "epoch": 1.1843670348343245, - "grad_norm": 0.4988486170768738, - "learning_rate": 6.805154840773198e-06, - "loss": 0.3889, - "step": 18122 - }, - { - "epoch": 1.1844323900398668, - "grad_norm": 0.46786004304885864, - "learning_rate": 6.804829197894086e-06, - "loss": 0.3852, - "step": 18123 - }, - { - "epoch": 1.1844977452454089, - "grad_norm": 0.46828898787498474, - "learning_rate": 6.804503546212198e-06, - "loss": 0.3394, - "step": 18124 - }, - { - "epoch": 1.184563100450951, - "grad_norm": 0.4303341507911682, - "learning_rate": 6.804177885729119e-06, - "loss": 0.3183, - "step": 18125 - }, - { - "epoch": 1.184628455656493, - "grad_norm": 0.46578550338745117, - "learning_rate": 6.803852216446443e-06, - "loss": 0.3197, - "step": 18126 - }, - { - "epoch": 1.184693810862035, - "grad_norm": 0.45559433102607727, - "learning_rate": 6.803526538365752e-06, - "loss": 0.3527, - "step": 18127 - }, - { - "epoch": 1.1847591660675774, - "grad_norm": 0.4716496467590332, - "learning_rate": 6.803200851488638e-06, - "loss": 0.3583, - "step": 18128 - }, - { - "epoch": 1.1848245212731194, - "grad_norm": 0.4363313317298889, - "learning_rate": 6.802875155816689e-06, - "loss": 0.3159, - "step": 18129 - }, - { - "epoch": 1.1848898764786615, - "grad_norm": 0.43479371070861816, - "learning_rate": 6.802549451351494e-06, - "loss": 0.3507, - "step": 18130 - }, - { - "epoch": 1.1849552316842036, - "grad_norm": 0.4546174705028534, - "learning_rate": 6.80222373809464e-06, - "loss": 0.3647, - "step": 18131 - }, - { - "epoch": 1.1850205868897459, - "grad_norm": 0.4394910931587219, - "learning_rate": 6.8018980160477155e-06, - "loss": 0.3369, - "step": 18132 - }, - { - "epoch": 1.185085942095288, - "grad_norm": 0.4569244980812073, - "learning_rate": 6.801572285212311e-06, - "loss": 0.3609, - "step": 18133 - }, - { - "epoch": 1.18515129730083, - "grad_norm": 0.4328986406326294, - "learning_rate": 6.801246545590016e-06, - "loss": 0.3182, - "step": 18134 - }, - { - "epoch": 1.185216652506372, - "grad_norm": 0.4514637291431427, - "learning_rate": 6.800920797182416e-06, - "loss": 0.3076, - "step": 18135 - }, - { - "epoch": 1.1852820077119142, - "grad_norm": 0.4802858233451843, - "learning_rate": 6.800595039991101e-06, - "loss": 0.3824, - "step": 18136 - }, - { - "epoch": 1.1853473629174565, - "grad_norm": 0.4062332510948181, - "learning_rate": 6.8002692740176615e-06, - "loss": 0.2999, - "step": 18137 - }, - { - "epoch": 1.1854127181229985, - "grad_norm": 0.44033023715019226, - "learning_rate": 6.799943499263683e-06, - "loss": 0.3488, - "step": 18138 - }, - { - "epoch": 1.1854780733285406, - "grad_norm": 0.4408622086048126, - "learning_rate": 6.7996177157307574e-06, - "loss": 0.3293, - "step": 18139 - }, - { - "epoch": 1.1855434285340827, - "grad_norm": 0.4373001456260681, - "learning_rate": 6.799291923420475e-06, - "loss": 0.3306, - "step": 18140 - }, - { - "epoch": 1.185608783739625, - "grad_norm": 0.44007745385169983, - "learning_rate": 6.79896612233442e-06, - "loss": 0.3139, - "step": 18141 - }, - { - "epoch": 1.185674138945167, - "grad_norm": 0.48035043478012085, - "learning_rate": 6.7986403124741836e-06, - "loss": 0.39, - "step": 18142 - }, - { - "epoch": 1.1857394941507091, - "grad_norm": 0.43871375918388367, - "learning_rate": 6.798314493841356e-06, - "loss": 0.3506, - "step": 18143 - }, - { - "epoch": 1.1858048493562512, - "grad_norm": 0.49691376090049744, - "learning_rate": 6.797988666437527e-06, - "loss": 0.4343, - "step": 18144 - }, - { - "epoch": 1.1858702045617933, - "grad_norm": 0.46149203181266785, - "learning_rate": 6.797662830264283e-06, - "loss": 0.3351, - "step": 18145 - }, - { - "epoch": 1.1859355597673356, - "grad_norm": 0.4465920329093933, - "learning_rate": 6.797336985323215e-06, - "loss": 0.3617, - "step": 18146 - }, - { - "epoch": 1.1860009149728776, - "grad_norm": 0.5167786478996277, - "learning_rate": 6.797011131615912e-06, - "loss": 0.3092, - "step": 18147 - }, - { - "epoch": 1.1860662701784197, - "grad_norm": 0.4532272517681122, - "learning_rate": 6.796685269143962e-06, - "loss": 0.3655, - "step": 18148 - }, - { - "epoch": 1.1861316253839618, - "grad_norm": 0.4581167995929718, - "learning_rate": 6.796359397908957e-06, - "loss": 0.356, - "step": 18149 - }, - { - "epoch": 1.186196980589504, - "grad_norm": 0.46039530634880066, - "learning_rate": 6.796033517912483e-06, - "loss": 0.3574, - "step": 18150 - }, - { - "epoch": 1.1862623357950461, - "grad_norm": 0.4544249176979065, - "learning_rate": 6.795707629156134e-06, - "loss": 0.3652, - "step": 18151 - }, - { - "epoch": 1.1863276910005882, - "grad_norm": 0.43098723888397217, - "learning_rate": 6.7953817316414946e-06, - "loss": 0.31, - "step": 18152 - }, - { - "epoch": 1.1863930462061303, - "grad_norm": 0.44438937306404114, - "learning_rate": 6.795055825370158e-06, - "loss": 0.3508, - "step": 18153 - }, - { - "epoch": 1.1864584014116724, - "grad_norm": 0.4400290250778198, - "learning_rate": 6.794729910343712e-06, - "loss": 0.3403, - "step": 18154 - }, - { - "epoch": 1.1865237566172147, - "grad_norm": 0.4446077048778534, - "learning_rate": 6.794403986563746e-06, - "loss": 0.3293, - "step": 18155 - }, - { - "epoch": 1.1865891118227567, - "grad_norm": 0.44412150979042053, - "learning_rate": 6.794078054031852e-06, - "loss": 0.3233, - "step": 18156 - }, - { - "epoch": 1.1866544670282988, - "grad_norm": 0.45930489897727966, - "learning_rate": 6.793752112749616e-06, - "loss": 0.3671, - "step": 18157 - }, - { - "epoch": 1.1867198222338409, - "grad_norm": 0.46669676899909973, - "learning_rate": 6.793426162718629e-06, - "loss": 0.312, - "step": 18158 - }, - { - "epoch": 1.1867851774393832, - "grad_norm": 0.43126240372657776, - "learning_rate": 6.793100203940481e-06, - "loss": 0.3312, - "step": 18159 - }, - { - "epoch": 1.1868505326449252, - "grad_norm": 0.46476152539253235, - "learning_rate": 6.792774236416764e-06, - "loss": 0.3863, - "step": 18160 - }, - { - "epoch": 1.1869158878504673, - "grad_norm": 0.43273457884788513, - "learning_rate": 6.792448260149065e-06, - "loss": 0.3454, - "step": 18161 - }, - { - "epoch": 1.1869812430560094, - "grad_norm": 0.43777090311050415, - "learning_rate": 6.7921222751389746e-06, - "loss": 0.3272, - "step": 18162 - }, - { - "epoch": 1.1870465982615515, - "grad_norm": 0.436638742685318, - "learning_rate": 6.791796281388084e-06, - "loss": 0.3388, - "step": 18163 - }, - { - "epoch": 1.1871119534670938, - "grad_norm": 0.46859070658683777, - "learning_rate": 6.79147027889798e-06, - "loss": 0.3713, - "step": 18164 - }, - { - "epoch": 1.1871773086726358, - "grad_norm": 0.4711884558200836, - "learning_rate": 6.791144267670258e-06, - "loss": 0.3519, - "step": 18165 - }, - { - "epoch": 1.187242663878178, - "grad_norm": 0.4488896131515503, - "learning_rate": 6.790818247706502e-06, - "loss": 0.3804, - "step": 18166 - }, - { - "epoch": 1.18730801908372, - "grad_norm": 0.48901477456092834, - "learning_rate": 6.790492219008306e-06, - "loss": 0.4211, - "step": 18167 - }, - { - "epoch": 1.1873733742892623, - "grad_norm": 0.4810936450958252, - "learning_rate": 6.790166181577259e-06, - "loss": 0.3855, - "step": 18168 - }, - { - "epoch": 1.1874387294948043, - "grad_norm": 0.4303014278411865, - "learning_rate": 6.789840135414952e-06, - "loss": 0.308, - "step": 18169 - }, - { - "epoch": 1.1875040847003464, - "grad_norm": 0.4382808804512024, - "learning_rate": 6.7895140805229745e-06, - "loss": 0.3377, - "step": 18170 - }, - { - "epoch": 1.1875694399058885, - "grad_norm": 0.4406227171421051, - "learning_rate": 6.789188016902917e-06, - "loss": 0.3416, - "step": 18171 - }, - { - "epoch": 1.1876347951114306, - "grad_norm": 0.4523710608482361, - "learning_rate": 6.78886194455637e-06, - "loss": 0.327, - "step": 18172 - }, - { - "epoch": 1.1877001503169726, - "grad_norm": 0.47915711998939514, - "learning_rate": 6.788535863484922e-06, - "loss": 0.3772, - "step": 18173 - }, - { - "epoch": 1.187765505522515, - "grad_norm": 0.5097954273223877, - "learning_rate": 6.788209773690166e-06, - "loss": 0.4274, - "step": 18174 - }, - { - "epoch": 1.187830860728057, - "grad_norm": 0.46755391359329224, - "learning_rate": 6.787883675173691e-06, - "loss": 0.3384, - "step": 18175 - }, - { - "epoch": 1.187896215933599, - "grad_norm": 0.45391738414764404, - "learning_rate": 6.787557567937089e-06, - "loss": 0.361, - "step": 18176 - }, - { - "epoch": 1.1879615711391411, - "grad_norm": 0.45209866762161255, - "learning_rate": 6.787231451981949e-06, - "loss": 0.3293, - "step": 18177 - }, - { - "epoch": 1.1880269263446834, - "grad_norm": 0.43817824125289917, - "learning_rate": 6.786905327309863e-06, - "loss": 0.3186, - "step": 18178 - }, - { - "epoch": 1.1880922815502255, - "grad_norm": 0.6073752045631409, - "learning_rate": 6.786579193922418e-06, - "loss": 0.3617, - "step": 18179 - }, - { - "epoch": 1.1881576367557676, - "grad_norm": 0.4198361337184906, - "learning_rate": 6.78625305182121e-06, - "loss": 0.3138, - "step": 18180 - }, - { - "epoch": 1.1882229919613096, - "grad_norm": 0.46060195565223694, - "learning_rate": 6.7859269010078255e-06, - "loss": 0.3766, - "step": 18181 - }, - { - "epoch": 1.1882883471668517, - "grad_norm": 0.4528558552265167, - "learning_rate": 6.785600741483857e-06, - "loss": 0.3605, - "step": 18182 - }, - { - "epoch": 1.188353702372394, - "grad_norm": 0.4450226426124573, - "learning_rate": 6.785274573250896e-06, - "loss": 0.3322, - "step": 18183 - }, - { - "epoch": 1.188419057577936, - "grad_norm": 0.4433911144733429, - "learning_rate": 6.7849483963105314e-06, - "loss": 0.3423, - "step": 18184 - }, - { - "epoch": 1.1884844127834782, - "grad_norm": 0.44697684049606323, - "learning_rate": 6.784622210664355e-06, - "loss": 0.2936, - "step": 18185 - }, - { - "epoch": 1.1885497679890202, - "grad_norm": 0.4662095308303833, - "learning_rate": 6.784296016313958e-06, - "loss": 0.3624, - "step": 18186 - }, - { - "epoch": 1.1886151231945625, - "grad_norm": 0.4551587402820587, - "learning_rate": 6.783969813260932e-06, - "loss": 0.3524, - "step": 18187 - }, - { - "epoch": 1.1886804784001046, - "grad_norm": 0.46663618087768555, - "learning_rate": 6.783643601506866e-06, - "loss": 0.3618, - "step": 18188 - }, - { - "epoch": 1.1887458336056467, - "grad_norm": 0.4618913531303406, - "learning_rate": 6.783317381053354e-06, - "loss": 0.3654, - "step": 18189 - }, - { - "epoch": 1.1888111888111887, - "grad_norm": 0.44946882128715515, - "learning_rate": 6.782991151901983e-06, - "loss": 0.3071, - "step": 18190 - }, - { - "epoch": 1.1888765440167308, - "grad_norm": 0.5025569200515747, - "learning_rate": 6.782664914054349e-06, - "loss": 0.3888, - "step": 18191 - }, - { - "epoch": 1.1889418992222731, - "grad_norm": 0.4740449786186218, - "learning_rate": 6.78233866751204e-06, - "loss": 0.3712, - "step": 18192 - }, - { - "epoch": 1.1890072544278152, - "grad_norm": 0.43006962537765503, - "learning_rate": 6.782012412276646e-06, - "loss": 0.3298, - "step": 18193 - }, - { - "epoch": 1.1890726096333573, - "grad_norm": 0.44759437441825867, - "learning_rate": 6.781686148349762e-06, - "loss": 0.3543, - "step": 18194 - }, - { - "epoch": 1.1891379648388993, - "grad_norm": 0.5013605356216431, - "learning_rate": 6.781359875732976e-06, - "loss": 0.3386, - "step": 18195 - }, - { - "epoch": 1.1892033200444416, - "grad_norm": 0.438453733921051, - "learning_rate": 6.781033594427882e-06, - "loss": 0.3291, - "step": 18196 - }, - { - "epoch": 1.1892686752499837, - "grad_norm": 0.48102906346321106, - "learning_rate": 6.780707304436069e-06, - "loss": 0.4035, - "step": 18197 - }, - { - "epoch": 1.1893340304555258, - "grad_norm": 0.4649040699005127, - "learning_rate": 6.780381005759131e-06, - "loss": 0.3428, - "step": 18198 - }, - { - "epoch": 1.1893993856610678, - "grad_norm": 0.450388640165329, - "learning_rate": 6.780054698398657e-06, - "loss": 0.359, - "step": 18199 - }, - { - "epoch": 1.18946474086661, - "grad_norm": 0.5376363396644592, - "learning_rate": 6.779728382356241e-06, - "loss": 0.387, - "step": 18200 - }, - { - "epoch": 1.1895300960721522, - "grad_norm": 0.486173152923584, - "learning_rate": 6.7794020576334705e-06, - "loss": 0.3701, - "step": 18201 - }, - { - "epoch": 1.1895954512776943, - "grad_norm": 0.44787952303886414, - "learning_rate": 6.779075724231942e-06, - "loss": 0.3636, - "step": 18202 - }, - { - "epoch": 1.1896608064832364, - "grad_norm": 0.4356648623943329, - "learning_rate": 6.778749382153245e-06, - "loss": 0.333, - "step": 18203 - }, - { - "epoch": 1.1897261616887784, - "grad_norm": 0.42434027791023254, - "learning_rate": 6.778423031398968e-06, - "loss": 0.2784, - "step": 18204 - }, - { - "epoch": 1.1897915168943207, - "grad_norm": 0.45641762018203735, - "learning_rate": 6.778096671970709e-06, - "loss": 0.3339, - "step": 18205 - }, - { - "epoch": 1.1898568720998628, - "grad_norm": 0.43157657980918884, - "learning_rate": 6.7777703038700546e-06, - "loss": 0.3295, - "step": 18206 - }, - { - "epoch": 1.1899222273054049, - "grad_norm": 0.4396790564060211, - "learning_rate": 6.7774439270986e-06, - "loss": 0.3312, - "step": 18207 - }, - { - "epoch": 1.189987582510947, - "grad_norm": 0.447013258934021, - "learning_rate": 6.777117541657935e-06, - "loss": 0.3442, - "step": 18208 - }, - { - "epoch": 1.190052937716489, - "grad_norm": 0.4444758892059326, - "learning_rate": 6.776791147549652e-06, - "loss": 0.3545, - "step": 18209 - }, - { - "epoch": 1.1901182929220313, - "grad_norm": 0.4434734284877777, - "learning_rate": 6.776464744775344e-06, - "loss": 0.3232, - "step": 18210 - }, - { - "epoch": 1.1901836481275734, - "grad_norm": 0.4431239366531372, - "learning_rate": 6.7761383333366e-06, - "loss": 0.3371, - "step": 18211 - }, - { - "epoch": 1.1902490033331155, - "grad_norm": 0.48813825845718384, - "learning_rate": 6.775811913235015e-06, - "loss": 0.3763, - "step": 18212 - }, - { - "epoch": 1.1903143585386575, - "grad_norm": 0.43185481429100037, - "learning_rate": 6.775485484472181e-06, - "loss": 0.3367, - "step": 18213 - }, - { - "epoch": 1.1903797137441998, - "grad_norm": 0.4722045063972473, - "learning_rate": 6.775159047049689e-06, - "loss": 0.3731, - "step": 18214 - }, - { - "epoch": 1.190445068949742, - "grad_norm": 0.4516633450984955, - "learning_rate": 6.77483260096913e-06, - "loss": 0.3773, - "step": 18215 - }, - { - "epoch": 1.190510424155284, - "grad_norm": 0.420488566160202, - "learning_rate": 6.774506146232098e-06, - "loss": 0.3009, - "step": 18216 - }, - { - "epoch": 1.190575779360826, - "grad_norm": 0.46615272760391235, - "learning_rate": 6.774179682840185e-06, - "loss": 0.3495, - "step": 18217 - }, - { - "epoch": 1.190641134566368, - "grad_norm": 0.4303334951400757, - "learning_rate": 6.773853210794983e-06, - "loss": 0.3512, - "step": 18218 - }, - { - "epoch": 1.1907064897719104, - "grad_norm": 0.45234352350234985, - "learning_rate": 6.773526730098085e-06, - "loss": 0.3491, - "step": 18219 - }, - { - "epoch": 1.1907718449774525, - "grad_norm": 0.4492032527923584, - "learning_rate": 6.773200240751083e-06, - "loss": 0.35, - "step": 18220 - }, - { - "epoch": 1.1908372001829945, - "grad_norm": 0.46623244881629944, - "learning_rate": 6.772873742755568e-06, - "loss": 0.3798, - "step": 18221 - }, - { - "epoch": 1.1909025553885366, - "grad_norm": 0.4667704403400421, - "learning_rate": 6.772547236113134e-06, - "loss": 0.3679, - "step": 18222 - }, - { - "epoch": 1.190967910594079, - "grad_norm": 0.4145137071609497, - "learning_rate": 6.772220720825373e-06, - "loss": 0.3064, - "step": 18223 - }, - { - "epoch": 1.191033265799621, - "grad_norm": 0.4596615433692932, - "learning_rate": 6.771894196893878e-06, - "loss": 0.3566, - "step": 18224 - }, - { - "epoch": 1.191098621005163, - "grad_norm": 0.44246578216552734, - "learning_rate": 6.771567664320241e-06, - "loss": 0.3383, - "step": 18225 - }, - { - "epoch": 1.1911639762107051, - "grad_norm": 0.4468989670276642, - "learning_rate": 6.7712411231060535e-06, - "loss": 0.3472, - "step": 18226 - }, - { - "epoch": 1.1912293314162472, - "grad_norm": 0.4473535418510437, - "learning_rate": 6.770914573252911e-06, - "loss": 0.3263, - "step": 18227 - }, - { - "epoch": 1.1912946866217895, - "grad_norm": 0.4716193974018097, - "learning_rate": 6.770588014762403e-06, - "loss": 0.3683, - "step": 18228 - }, - { - "epoch": 1.1913600418273316, - "grad_norm": 0.41069507598876953, - "learning_rate": 6.770261447636126e-06, - "loss": 0.3095, - "step": 18229 - }, - { - "epoch": 1.1914253970328736, - "grad_norm": 0.41994649171829224, - "learning_rate": 6.76993487187567e-06, - "loss": 0.2926, - "step": 18230 - }, - { - "epoch": 1.1914907522384157, - "grad_norm": 0.4434123635292053, - "learning_rate": 6.769608287482627e-06, - "loss": 0.3388, - "step": 18231 - }, - { - "epoch": 1.191556107443958, - "grad_norm": 0.5141905546188354, - "learning_rate": 6.769281694458593e-06, - "loss": 0.4069, - "step": 18232 - }, - { - "epoch": 1.1916214626495, - "grad_norm": 0.44772619009017944, - "learning_rate": 6.768955092805158e-06, - "loss": 0.3512, - "step": 18233 - }, - { - "epoch": 1.1916868178550422, - "grad_norm": 0.4542248249053955, - "learning_rate": 6.768628482523918e-06, - "loss": 0.3474, - "step": 18234 - }, - { - "epoch": 1.1917521730605842, - "grad_norm": 0.43119528889656067, - "learning_rate": 6.768301863616462e-06, - "loss": 0.2986, - "step": 18235 - }, - { - "epoch": 1.1918175282661263, - "grad_norm": 0.45107516646385193, - "learning_rate": 6.767975236084387e-06, - "loss": 0.3603, - "step": 18236 - }, - { - "epoch": 1.1918828834716686, - "grad_norm": 0.4860307574272156, - "learning_rate": 6.767648599929284e-06, - "loss": 0.4286, - "step": 18237 - }, - { - "epoch": 1.1919482386772107, - "grad_norm": 0.4243297576904297, - "learning_rate": 6.767321955152746e-06, - "loss": 0.2967, - "step": 18238 - }, - { - "epoch": 1.1920135938827527, - "grad_norm": 0.4643009305000305, - "learning_rate": 6.766995301756366e-06, - "loss": 0.3699, - "step": 18239 - }, - { - "epoch": 1.1920789490882948, - "grad_norm": 0.43786120414733887, - "learning_rate": 6.766668639741738e-06, - "loss": 0.3363, - "step": 18240 - }, - { - "epoch": 1.192144304293837, - "grad_norm": 0.4591931700706482, - "learning_rate": 6.766341969110457e-06, - "loss": 0.3099, - "step": 18241 - }, - { - "epoch": 1.1922096594993792, - "grad_norm": 0.41595813632011414, - "learning_rate": 6.766015289864112e-06, - "loss": 0.3128, - "step": 18242 - }, - { - "epoch": 1.1922750147049213, - "grad_norm": 0.5058223009109497, - "learning_rate": 6.765688602004299e-06, - "loss": 0.4074, - "step": 18243 - }, - { - "epoch": 1.1923403699104633, - "grad_norm": 0.4456130564212799, - "learning_rate": 6.76536190553261e-06, - "loss": 0.3295, - "step": 18244 - }, - { - "epoch": 1.1924057251160054, - "grad_norm": 0.45912837982177734, - "learning_rate": 6.765035200450641e-06, - "loss": 0.3172, - "step": 18245 - }, - { - "epoch": 1.1924710803215477, - "grad_norm": 0.4306924045085907, - "learning_rate": 6.764708486759984e-06, - "loss": 0.32, - "step": 18246 - }, - { - "epoch": 1.1925364355270898, - "grad_norm": 0.46732673048973083, - "learning_rate": 6.764381764462231e-06, - "loss": 0.3679, - "step": 18247 - }, - { - "epoch": 1.1926017907326318, - "grad_norm": 0.42428305745124817, - "learning_rate": 6.764055033558978e-06, - "loss": 0.3328, - "step": 18248 - }, - { - "epoch": 1.192667145938174, - "grad_norm": 0.4748455584049225, - "learning_rate": 6.763728294051817e-06, - "loss": 0.327, - "step": 18249 - }, - { - "epoch": 1.1927325011437162, - "grad_norm": 0.4488120973110199, - "learning_rate": 6.763401545942343e-06, - "loss": 0.357, - "step": 18250 - }, - { - "epoch": 1.1927978563492583, - "grad_norm": 0.4734101891517639, - "learning_rate": 6.763074789232147e-06, - "loss": 0.3517, - "step": 18251 - }, - { - "epoch": 1.1928632115548004, - "grad_norm": 0.4629245400428772, - "learning_rate": 6.762748023922826e-06, - "loss": 0.3853, - "step": 18252 - }, - { - "epoch": 1.1929285667603424, - "grad_norm": 0.45737552642822266, - "learning_rate": 6.762421250015971e-06, - "loss": 0.3353, - "step": 18253 - }, - { - "epoch": 1.1929939219658845, - "grad_norm": 0.42586827278137207, - "learning_rate": 6.762094467513179e-06, - "loss": 0.314, - "step": 18254 - }, - { - "epoch": 1.1930592771714268, - "grad_norm": 0.4478102922439575, - "learning_rate": 6.76176767641604e-06, - "loss": 0.3367, - "step": 18255 - }, - { - "epoch": 1.1931246323769689, - "grad_norm": 0.4543229937553406, - "learning_rate": 6.761440876726151e-06, - "loss": 0.3543, - "step": 18256 - }, - { - "epoch": 1.193189987582511, - "grad_norm": 0.42153725028038025, - "learning_rate": 6.761114068445104e-06, - "loss": 0.3132, - "step": 18257 - }, - { - "epoch": 1.193255342788053, - "grad_norm": 0.4443165063858032, - "learning_rate": 6.760787251574492e-06, - "loss": 0.331, - "step": 18258 - }, - { - "epoch": 1.1933206979935953, - "grad_norm": 0.4544033408164978, - "learning_rate": 6.760460426115913e-06, - "loss": 0.3367, - "step": 18259 - }, - { - "epoch": 1.1933860531991374, - "grad_norm": 0.46163734793663025, - "learning_rate": 6.7601335920709566e-06, - "loss": 0.3337, - "step": 18260 - }, - { - "epoch": 1.1934514084046794, - "grad_norm": 0.46515271067619324, - "learning_rate": 6.759806749441222e-06, - "loss": 0.3807, - "step": 18261 - }, - { - "epoch": 1.1935167636102215, - "grad_norm": 0.42006489634513855, - "learning_rate": 6.759479898228297e-06, - "loss": 0.3045, - "step": 18262 - }, - { - "epoch": 1.1935821188157636, - "grad_norm": 0.4615705907344818, - "learning_rate": 6.759153038433781e-06, - "loss": 0.3286, - "step": 18263 - }, - { - "epoch": 1.1936474740213059, - "grad_norm": 0.4246770739555359, - "learning_rate": 6.758826170059265e-06, - "loss": 0.3131, - "step": 18264 - }, - { - "epoch": 1.193712829226848, - "grad_norm": 0.4041743874549866, - "learning_rate": 6.758499293106345e-06, - "loss": 0.287, - "step": 18265 - }, - { - "epoch": 1.19377818443239, - "grad_norm": 0.44723162055015564, - "learning_rate": 6.758172407576614e-06, - "loss": 0.3509, - "step": 18266 - }, - { - "epoch": 1.193843539637932, - "grad_norm": 0.4195310175418854, - "learning_rate": 6.757845513471668e-06, - "loss": 0.312, - "step": 18267 - }, - { - "epoch": 1.1939088948434744, - "grad_norm": 0.45519694685935974, - "learning_rate": 6.7575186107931e-06, - "loss": 0.3444, - "step": 18268 - }, - { - "epoch": 1.1939742500490165, - "grad_norm": 0.46090462803840637, - "learning_rate": 6.757191699542505e-06, - "loss": 0.3635, - "step": 18269 - }, - { - "epoch": 1.1940396052545585, - "grad_norm": 0.41247397661209106, - "learning_rate": 6.756864779721477e-06, - "loss": 0.2704, - "step": 18270 - }, - { - "epoch": 1.1941049604601006, - "grad_norm": 0.44314804673194885, - "learning_rate": 6.756537851331611e-06, - "loss": 0.3595, - "step": 18271 - }, - { - "epoch": 1.1941703156656427, - "grad_norm": 0.5021336078643799, - "learning_rate": 6.756210914374501e-06, - "loss": 0.4201, - "step": 18272 - }, - { - "epoch": 1.194235670871185, - "grad_norm": 0.4680226445198059, - "learning_rate": 6.755883968851743e-06, - "loss": 0.3646, - "step": 18273 - }, - { - "epoch": 1.194301026076727, - "grad_norm": 0.42127227783203125, - "learning_rate": 6.75555701476493e-06, - "loss": 0.3272, - "step": 18274 - }, - { - "epoch": 1.1943663812822691, - "grad_norm": 0.4428028166294098, - "learning_rate": 6.7552300521156576e-06, - "loss": 0.3445, - "step": 18275 - }, - { - "epoch": 1.1944317364878112, - "grad_norm": 0.4234282374382019, - "learning_rate": 6.754903080905519e-06, - "loss": 0.3154, - "step": 18276 - }, - { - "epoch": 1.1944970916933535, - "grad_norm": 0.4369146525859833, - "learning_rate": 6.754576101136112e-06, - "loss": 0.3119, - "step": 18277 - }, - { - "epoch": 1.1945624468988956, - "grad_norm": 0.4304760992527008, - "learning_rate": 6.754249112809028e-06, - "loss": 0.3306, - "step": 18278 - }, - { - "epoch": 1.1946278021044376, - "grad_norm": 0.41174158453941345, - "learning_rate": 6.753922115925864e-06, - "loss": 0.2967, - "step": 18279 - }, - { - "epoch": 1.1946931573099797, - "grad_norm": 0.44075703620910645, - "learning_rate": 6.753595110488214e-06, - "loss": 0.3228, - "step": 18280 - }, - { - "epoch": 1.1947585125155218, - "grad_norm": 0.4231216013431549, - "learning_rate": 6.753268096497674e-06, - "loss": 0.3147, - "step": 18281 - }, - { - "epoch": 1.1948238677210639, - "grad_norm": 0.4534105062484741, - "learning_rate": 6.752941073955837e-06, - "loss": 0.3398, - "step": 18282 - }, - { - "epoch": 1.1948892229266062, - "grad_norm": 0.47695019841194153, - "learning_rate": 6.752614042864301e-06, - "loss": 0.3946, - "step": 18283 - }, - { - "epoch": 1.1949545781321482, - "grad_norm": 0.44449788331985474, - "learning_rate": 6.752287003224656e-06, - "loss": 0.3492, - "step": 18284 - }, - { - "epoch": 1.1950199333376903, - "grad_norm": 0.4293326437473297, - "learning_rate": 6.751959955038503e-06, - "loss": 0.3232, - "step": 18285 - }, - { - "epoch": 1.1950852885432324, - "grad_norm": 0.4535491168498993, - "learning_rate": 6.751632898307432e-06, - "loss": 0.3344, - "step": 18286 - }, - { - "epoch": 1.1951506437487747, - "grad_norm": 0.4369806945323944, - "learning_rate": 6.751305833033041e-06, - "loss": 0.3412, - "step": 18287 - }, - { - "epoch": 1.1952159989543167, - "grad_norm": 0.45930948853492737, - "learning_rate": 6.750978759216928e-06, - "loss": 0.3445, - "step": 18288 - }, - { - "epoch": 1.1952813541598588, - "grad_norm": 0.4697064459323883, - "learning_rate": 6.750651676860681e-06, - "loss": 0.3336, - "step": 18289 - }, - { - "epoch": 1.1953467093654009, - "grad_norm": 0.44547131657600403, - "learning_rate": 6.7503245859659014e-06, - "loss": 0.3166, - "step": 18290 - }, - { - "epoch": 1.195412064570943, - "grad_norm": 0.5118493437767029, - "learning_rate": 6.7499974865341815e-06, - "loss": 0.3937, - "step": 18291 - }, - { - "epoch": 1.1954774197764853, - "grad_norm": 0.4325563311576843, - "learning_rate": 6.749670378567117e-06, - "loss": 0.3135, - "step": 18292 - }, - { - "epoch": 1.1955427749820273, - "grad_norm": 0.4876798987388611, - "learning_rate": 6.749343262066304e-06, - "loss": 0.3801, - "step": 18293 - }, - { - "epoch": 1.1956081301875694, - "grad_norm": 0.41790640354156494, - "learning_rate": 6.74901613703334e-06, - "loss": 0.3113, - "step": 18294 - }, - { - "epoch": 1.1956734853931115, - "grad_norm": 0.4941082298755646, - "learning_rate": 6.748689003469817e-06, - "loss": 0.3767, - "step": 18295 - }, - { - "epoch": 1.1957388405986538, - "grad_norm": 0.4460056722164154, - "learning_rate": 6.748361861377331e-06, - "loss": 0.3448, - "step": 18296 - }, - { - "epoch": 1.1958041958041958, - "grad_norm": 0.41867968440055847, - "learning_rate": 6.748034710757481e-06, - "loss": 0.3002, - "step": 18297 - }, - { - "epoch": 1.195869551009738, - "grad_norm": 0.46213915944099426, - "learning_rate": 6.747707551611857e-06, - "loss": 0.3563, - "step": 18298 - }, - { - "epoch": 1.19593490621528, - "grad_norm": 0.43928977847099304, - "learning_rate": 6.74738038394206e-06, - "loss": 0.336, - "step": 18299 - }, - { - "epoch": 1.196000261420822, - "grad_norm": 0.4726327657699585, - "learning_rate": 6.747053207749683e-06, - "loss": 0.3712, - "step": 18300 - }, - { - "epoch": 1.1960656166263643, - "grad_norm": 0.488456666469574, - "learning_rate": 6.746726023036323e-06, - "loss": 0.3862, - "step": 18301 - }, - { - "epoch": 1.1961309718319064, - "grad_norm": 0.435140997171402, - "learning_rate": 6.746398829803574e-06, - "loss": 0.3398, - "step": 18302 - }, - { - "epoch": 1.1961963270374485, - "grad_norm": 0.4667286276817322, - "learning_rate": 6.746071628053033e-06, - "loss": 0.3788, - "step": 18303 - }, - { - "epoch": 1.1962616822429906, - "grad_norm": 0.41344180703163147, - "learning_rate": 6.745744417786297e-06, - "loss": 0.3298, - "step": 18304 - }, - { - "epoch": 1.1963270374485329, - "grad_norm": 0.4400014281272888, - "learning_rate": 6.745417199004959e-06, - "loss": 0.3286, - "step": 18305 - }, - { - "epoch": 1.196392392654075, - "grad_norm": 0.46907752752304077, - "learning_rate": 6.745089971710618e-06, - "loss": 0.383, - "step": 18306 - }, - { - "epoch": 1.196457747859617, - "grad_norm": 0.4877195656299591, - "learning_rate": 6.744762735904867e-06, - "loss": 0.3518, - "step": 18307 - }, - { - "epoch": 1.196523103065159, - "grad_norm": 0.4745230972766876, - "learning_rate": 6.744435491589305e-06, - "loss": 0.3855, - "step": 18308 - }, - { - "epoch": 1.1965884582707011, - "grad_norm": 0.42876559495925903, - "learning_rate": 6.7441082387655255e-06, - "loss": 0.3208, - "step": 18309 - }, - { - "epoch": 1.1966538134762434, - "grad_norm": 0.43249624967575073, - "learning_rate": 6.743780977435128e-06, - "loss": 0.3201, - "step": 18310 - }, - { - "epoch": 1.1967191686817855, - "grad_norm": 0.48484688997268677, - "learning_rate": 6.743453707599704e-06, - "loss": 0.3531, - "step": 18311 - }, - { - "epoch": 1.1967845238873276, - "grad_norm": 0.5050974488258362, - "learning_rate": 6.743126429260855e-06, - "loss": 0.4019, - "step": 18312 - }, - { - "epoch": 1.1968498790928697, - "grad_norm": 0.45557287335395813, - "learning_rate": 6.742799142420172e-06, - "loss": 0.3717, - "step": 18313 - }, - { - "epoch": 1.196915234298412, - "grad_norm": 0.4626365900039673, - "learning_rate": 6.742471847079255e-06, - "loss": 0.3663, - "step": 18314 - }, - { - "epoch": 1.196980589503954, - "grad_norm": 0.4385165274143219, - "learning_rate": 6.742144543239701e-06, - "loss": 0.3638, - "step": 18315 - }, - { - "epoch": 1.197045944709496, - "grad_norm": 0.456708699464798, - "learning_rate": 6.741817230903102e-06, - "loss": 0.3552, - "step": 18316 - }, - { - "epoch": 1.1971112999150382, - "grad_norm": 0.4330587685108185, - "learning_rate": 6.741489910071057e-06, - "loss": 0.339, - "step": 18317 - }, - { - "epoch": 1.1971766551205802, - "grad_norm": 0.44074746966362, - "learning_rate": 6.741162580745163e-06, - "loss": 0.3683, - "step": 18318 - }, - { - "epoch": 1.1972420103261225, - "grad_norm": 0.4403582811355591, - "learning_rate": 6.740835242927016e-06, - "loss": 0.3602, - "step": 18319 - }, - { - "epoch": 1.1973073655316646, - "grad_norm": 0.42535579204559326, - "learning_rate": 6.740507896618211e-06, - "loss": 0.3044, - "step": 18320 - }, - { - "epoch": 1.1973727207372067, - "grad_norm": 0.47823721170425415, - "learning_rate": 6.74018054182035e-06, - "loss": 0.379, - "step": 18321 - }, - { - "epoch": 1.1974380759427488, - "grad_norm": 0.46522536873817444, - "learning_rate": 6.739853178535022e-06, - "loss": 0.3186, - "step": 18322 - }, - { - "epoch": 1.197503431148291, - "grad_norm": 0.4860043525695801, - "learning_rate": 6.739525806763828e-06, - "loss": 0.3255, - "step": 18323 - }, - { - "epoch": 1.1975687863538331, - "grad_norm": 0.4254266321659088, - "learning_rate": 6.739198426508364e-06, - "loss": 0.3066, - "step": 18324 - }, - { - "epoch": 1.1976341415593752, - "grad_norm": 0.4584939777851105, - "learning_rate": 6.738871037770228e-06, - "loss": 0.3675, - "step": 18325 - }, - { - "epoch": 1.1976994967649173, - "grad_norm": 0.4622330665588379, - "learning_rate": 6.738543640551015e-06, - "loss": 0.3752, - "step": 18326 - }, - { - "epoch": 1.1977648519704593, - "grad_norm": 0.47860780358314514, - "learning_rate": 6.738216234852321e-06, - "loss": 0.386, - "step": 18327 - }, - { - "epoch": 1.1978302071760016, - "grad_norm": 0.456599622964859, - "learning_rate": 6.737888820675747e-06, - "loss": 0.3106, - "step": 18328 - }, - { - "epoch": 1.1978955623815437, - "grad_norm": 0.45784202218055725, - "learning_rate": 6.737561398022884e-06, - "loss": 0.3593, - "step": 18329 - }, - { - "epoch": 1.1979609175870858, - "grad_norm": 0.5206227898597717, - "learning_rate": 6.7372339668953335e-06, - "loss": 0.3107, - "step": 18330 - }, - { - "epoch": 1.1980262727926279, - "grad_norm": 0.41274091601371765, - "learning_rate": 6.736906527294691e-06, - "loss": 0.2937, - "step": 18331 - }, - { - "epoch": 1.1980916279981701, - "grad_norm": 0.485586941242218, - "learning_rate": 6.736579079222554e-06, - "loss": 0.3808, - "step": 18332 - }, - { - "epoch": 1.1981569832037122, - "grad_norm": 0.40877220034599304, - "learning_rate": 6.73625162268052e-06, - "loss": 0.2818, - "step": 18333 - }, - { - "epoch": 1.1982223384092543, - "grad_norm": 0.4090054929256439, - "learning_rate": 6.735924157670184e-06, - "loss": 0.2949, - "step": 18334 - }, - { - "epoch": 1.1982876936147964, - "grad_norm": 0.45550888776779175, - "learning_rate": 6.7355966841931445e-06, - "loss": 0.3454, - "step": 18335 - }, - { - "epoch": 1.1983530488203384, - "grad_norm": 0.408974826335907, - "learning_rate": 6.735269202250998e-06, - "loss": 0.3092, - "step": 18336 - }, - { - "epoch": 1.1984184040258807, - "grad_norm": 0.46963611245155334, - "learning_rate": 6.734941711845344e-06, - "loss": 0.3839, - "step": 18337 - }, - { - "epoch": 1.1984837592314228, - "grad_norm": 0.4983595907688141, - "learning_rate": 6.734614212977777e-06, - "loss": 0.3335, - "step": 18338 - }, - { - "epoch": 1.1985491144369649, - "grad_norm": 0.5197293162345886, - "learning_rate": 6.7342867056498975e-06, - "loss": 0.4474, - "step": 18339 - }, - { - "epoch": 1.198614469642507, - "grad_norm": 0.41194507479667664, - "learning_rate": 6.7339591898633e-06, - "loss": 0.3003, - "step": 18340 - }, - { - "epoch": 1.1986798248480492, - "grad_norm": 0.5035253763198853, - "learning_rate": 6.733631665619582e-06, - "loss": 0.4243, - "step": 18341 - }, - { - "epoch": 1.1987451800535913, - "grad_norm": 0.4093713164329529, - "learning_rate": 6.733304132920342e-06, - "loss": 0.3176, - "step": 18342 - }, - { - "epoch": 1.1988105352591334, - "grad_norm": 0.4392082393169403, - "learning_rate": 6.732976591767177e-06, - "loss": 0.3054, - "step": 18343 - }, - { - "epoch": 1.1988758904646755, - "grad_norm": 0.45759931206703186, - "learning_rate": 6.732649042161686e-06, - "loss": 0.357, - "step": 18344 - }, - { - "epoch": 1.1989412456702175, - "grad_norm": 0.4255608916282654, - "learning_rate": 6.732321484105465e-06, - "loss": 0.3113, - "step": 18345 - }, - { - "epoch": 1.1990066008757598, - "grad_norm": 0.47021111845970154, - "learning_rate": 6.731993917600113e-06, - "loss": 0.3626, - "step": 18346 - }, - { - "epoch": 1.199071956081302, - "grad_norm": 0.48122110962867737, - "learning_rate": 6.731666342647225e-06, - "loss": 0.3777, - "step": 18347 - }, - { - "epoch": 1.199137311286844, - "grad_norm": 0.45146089792251587, - "learning_rate": 6.7313387592484e-06, - "loss": 0.3636, - "step": 18348 - }, - { - "epoch": 1.199202666492386, - "grad_norm": 0.4471517503261566, - "learning_rate": 6.731011167405237e-06, - "loss": 0.3177, - "step": 18349 - }, - { - "epoch": 1.1992680216979283, - "grad_norm": 0.41821399331092834, - "learning_rate": 6.7306835671193325e-06, - "loss": 0.2989, - "step": 18350 - }, - { - "epoch": 1.1993333769034704, - "grad_norm": 0.451188325881958, - "learning_rate": 6.730355958392285e-06, - "loss": 0.3262, - "step": 18351 - }, - { - "epoch": 1.1993987321090125, - "grad_norm": 0.4433478116989136, - "learning_rate": 6.730028341225692e-06, - "loss": 0.3204, - "step": 18352 - }, - { - "epoch": 1.1994640873145546, - "grad_norm": 0.48006168007850647, - "learning_rate": 6.7297007156211516e-06, - "loss": 0.3424, - "step": 18353 - }, - { - "epoch": 1.1995294425200966, - "grad_norm": 0.4221143424510956, - "learning_rate": 6.72937308158026e-06, - "loss": 0.297, - "step": 18354 - }, - { - "epoch": 1.199594797725639, - "grad_norm": 0.42380595207214355, - "learning_rate": 6.729045439104619e-06, - "loss": 0.3239, - "step": 18355 - }, - { - "epoch": 1.199660152931181, - "grad_norm": 0.47803959250450134, - "learning_rate": 6.728717788195823e-06, - "loss": 0.3746, - "step": 18356 - }, - { - "epoch": 1.199725508136723, - "grad_norm": 0.40294092893600464, - "learning_rate": 6.728390128855472e-06, - "loss": 0.313, - "step": 18357 - }, - { - "epoch": 1.1997908633422651, - "grad_norm": 0.4656482934951782, - "learning_rate": 6.728062461085163e-06, - "loss": 0.3661, - "step": 18358 - }, - { - "epoch": 1.1998562185478074, - "grad_norm": 0.49004656076431274, - "learning_rate": 6.727734784886496e-06, - "loss": 0.3781, - "step": 18359 - }, - { - "epoch": 1.1999215737533495, - "grad_norm": 0.46724215149879456, - "learning_rate": 6.7274071002610675e-06, - "loss": 0.4008, - "step": 18360 - }, - { - "epoch": 1.1999869289588916, - "grad_norm": 0.4653272032737732, - "learning_rate": 6.727079407210475e-06, - "loss": 0.3873, - "step": 18361 - }, - { - "epoch": 1.2000522841644337, - "grad_norm": 0.42921003699302673, - "learning_rate": 6.72675170573632e-06, - "loss": 0.2975, - "step": 18362 - }, - { - "epoch": 1.2001176393699757, - "grad_norm": 0.4449147880077362, - "learning_rate": 6.726423995840197e-06, - "loss": 0.3383, - "step": 18363 - }, - { - "epoch": 1.200182994575518, - "grad_norm": 0.4797692894935608, - "learning_rate": 6.726096277523706e-06, - "loss": 0.3716, - "step": 18364 - }, - { - "epoch": 1.20024834978106, - "grad_norm": 0.4608686566352844, - "learning_rate": 6.725768550788446e-06, - "loss": 0.3385, - "step": 18365 - }, - { - "epoch": 1.2003137049866022, - "grad_norm": 0.44967734813690186, - "learning_rate": 6.725440815636015e-06, - "loss": 0.3531, - "step": 18366 - }, - { - "epoch": 1.2003790601921442, - "grad_norm": 0.46174466609954834, - "learning_rate": 6.725113072068011e-06, - "loss": 0.3359, - "step": 18367 - }, - { - "epoch": 1.2004444153976865, - "grad_norm": 0.4304310083389282, - "learning_rate": 6.724785320086034e-06, - "loss": 0.3457, - "step": 18368 - }, - { - "epoch": 1.2005097706032286, - "grad_norm": 0.4110058546066284, - "learning_rate": 6.724457559691679e-06, - "loss": 0.3237, - "step": 18369 - }, - { - "epoch": 1.2005751258087707, - "grad_norm": 0.4807645380496979, - "learning_rate": 6.72412979088655e-06, - "loss": 0.364, - "step": 18370 - }, - { - "epoch": 1.2006404810143128, - "grad_norm": 0.45398521423339844, - "learning_rate": 6.723802013672243e-06, - "loss": 0.3271, - "step": 18371 - }, - { - "epoch": 1.2007058362198548, - "grad_norm": 0.4776749312877655, - "learning_rate": 6.723474228050353e-06, - "loss": 0.3146, - "step": 18372 - }, - { - "epoch": 1.2007711914253971, - "grad_norm": 0.507209837436676, - "learning_rate": 6.723146434022485e-06, - "loss": 0.4261, - "step": 18373 - }, - { - "epoch": 1.2008365466309392, - "grad_norm": 0.4721026122570038, - "learning_rate": 6.7228186315902335e-06, - "loss": 0.3839, - "step": 18374 - }, - { - "epoch": 1.2009019018364813, - "grad_norm": 0.4585725665092468, - "learning_rate": 6.722490820755199e-06, - "loss": 0.3017, - "step": 18375 - }, - { - "epoch": 1.2009672570420233, - "grad_norm": 0.4542023539543152, - "learning_rate": 6.72216300151898e-06, - "loss": 0.3204, - "step": 18376 - }, - { - "epoch": 1.2010326122475656, - "grad_norm": 0.45127978920936584, - "learning_rate": 6.721835173883175e-06, - "loss": 0.3538, - "step": 18377 - }, - { - "epoch": 1.2010979674531077, - "grad_norm": 0.411732941865921, - "learning_rate": 6.721507337849383e-06, - "loss": 0.2875, - "step": 18378 - }, - { - "epoch": 1.2011633226586498, - "grad_norm": 0.49450406432151794, - "learning_rate": 6.721179493419205e-06, - "loss": 0.3799, - "step": 18379 - }, - { - "epoch": 1.2012286778641919, - "grad_norm": 0.45164230465888977, - "learning_rate": 6.720851640594238e-06, - "loss": 0.3679, - "step": 18380 - }, - { - "epoch": 1.201294033069734, - "grad_norm": 0.4650866687297821, - "learning_rate": 6.72052377937608e-06, - "loss": 0.3671, - "step": 18381 - }, - { - "epoch": 1.2013593882752762, - "grad_norm": 0.44707417488098145, - "learning_rate": 6.720195909766333e-06, - "loss": 0.3516, - "step": 18382 - }, - { - "epoch": 1.2014247434808183, - "grad_norm": 0.4069920480251312, - "learning_rate": 6.719868031766593e-06, - "loss": 0.2836, - "step": 18383 - }, - { - "epoch": 1.2014900986863604, - "grad_norm": 0.5384685397148132, - "learning_rate": 6.719540145378463e-06, - "loss": 0.3649, - "step": 18384 - }, - { - "epoch": 1.2015554538919024, - "grad_norm": 0.44358280301094055, - "learning_rate": 6.719212250603537e-06, - "loss": 0.3455, - "step": 18385 - }, - { - "epoch": 1.2016208090974447, - "grad_norm": 0.4112396836280823, - "learning_rate": 6.718884347443422e-06, - "loss": 0.289, - "step": 18386 - }, - { - "epoch": 1.2016861643029868, - "grad_norm": 0.46106982231140137, - "learning_rate": 6.718556435899708e-06, - "loss": 0.3395, - "step": 18387 - }, - { - "epoch": 1.2017515195085289, - "grad_norm": 0.45882683992385864, - "learning_rate": 6.718228515974001e-06, - "loss": 0.3475, - "step": 18388 - }, - { - "epoch": 1.201816874714071, - "grad_norm": 0.4271091818809509, - "learning_rate": 6.717900587667898e-06, - "loss": 0.2913, - "step": 18389 - }, - { - "epoch": 1.201882229919613, - "grad_norm": 0.43459373712539673, - "learning_rate": 6.717572650982998e-06, - "loss": 0.3317, - "step": 18390 - }, - { - "epoch": 1.2019475851251553, - "grad_norm": 0.4594324231147766, - "learning_rate": 6.717244705920902e-06, - "loss": 0.3937, - "step": 18391 - }, - { - "epoch": 1.2020129403306974, - "grad_norm": 0.4645597040653229, - "learning_rate": 6.716916752483208e-06, - "loss": 0.3541, - "step": 18392 - }, - { - "epoch": 1.2020782955362395, - "grad_norm": 0.47792962193489075, - "learning_rate": 6.716588790671516e-06, - "loss": 0.3837, - "step": 18393 - }, - { - "epoch": 1.2021436507417815, - "grad_norm": 0.4746323227882385, - "learning_rate": 6.716260820487427e-06, - "loss": 0.3692, - "step": 18394 - }, - { - "epoch": 1.2022090059473238, - "grad_norm": 0.44234099984169006, - "learning_rate": 6.715932841932539e-06, - "loss": 0.3298, - "step": 18395 - }, - { - "epoch": 1.202274361152866, - "grad_norm": 0.4199508726596832, - "learning_rate": 6.715604855008451e-06, - "loss": 0.2807, - "step": 18396 - }, - { - "epoch": 1.202339716358408, - "grad_norm": 0.45626598596572876, - "learning_rate": 6.715276859716765e-06, - "loss": 0.3825, - "step": 18397 - }, - { - "epoch": 1.20240507156395, - "grad_norm": 0.44778361916542053, - "learning_rate": 6.714948856059079e-06, - "loss": 0.36, - "step": 18398 - }, - { - "epoch": 1.2024704267694921, - "grad_norm": 0.4117039740085602, - "learning_rate": 6.714620844036993e-06, - "loss": 0.3113, - "step": 18399 - }, - { - "epoch": 1.2025357819750342, - "grad_norm": 0.46973392367362976, - "learning_rate": 6.714292823652109e-06, - "loss": 0.3927, - "step": 18400 - }, - { - "epoch": 1.2026011371805765, - "grad_norm": 0.4537544846534729, - "learning_rate": 6.7139647949060224e-06, - "loss": 0.3453, - "step": 18401 - }, - { - "epoch": 1.2026664923861186, - "grad_norm": 0.4220884144306183, - "learning_rate": 6.713636757800337e-06, - "loss": 0.3525, - "step": 18402 - }, - { - "epoch": 1.2027318475916606, - "grad_norm": 0.4099014699459076, - "learning_rate": 6.71330871233665e-06, - "loss": 0.2811, - "step": 18403 - }, - { - "epoch": 1.2027972027972027, - "grad_norm": 0.44728389382362366, - "learning_rate": 6.7129806585165654e-06, - "loss": 0.3036, - "step": 18404 - }, - { - "epoch": 1.202862558002745, - "grad_norm": 0.4400940537452698, - "learning_rate": 6.712652596341679e-06, - "loss": 0.3257, - "step": 18405 - }, - { - "epoch": 1.202927913208287, - "grad_norm": 0.4527154564857483, - "learning_rate": 6.712324525813594e-06, - "loss": 0.313, - "step": 18406 - }, - { - "epoch": 1.2029932684138291, - "grad_norm": 0.47343170642852783, - "learning_rate": 6.711996446933908e-06, - "loss": 0.3562, - "step": 18407 - }, - { - "epoch": 1.2030586236193712, - "grad_norm": 0.47466331720352173, - "learning_rate": 6.711668359704223e-06, - "loss": 0.3581, - "step": 18408 - }, - { - "epoch": 1.2031239788249133, - "grad_norm": 0.44942548871040344, - "learning_rate": 6.7113402641261376e-06, - "loss": 0.3391, - "step": 18409 - }, - { - "epoch": 1.2031893340304556, - "grad_norm": 0.48919522762298584, - "learning_rate": 6.711012160201253e-06, - "loss": 0.3793, - "step": 18410 - }, - { - "epoch": 1.2032546892359977, - "grad_norm": 0.504368007183075, - "learning_rate": 6.71068404793117e-06, - "loss": 0.3952, - "step": 18411 - }, - { - "epoch": 1.2033200444415397, - "grad_norm": 0.4561106562614441, - "learning_rate": 6.710355927317487e-06, - "loss": 0.3273, - "step": 18412 - }, - { - "epoch": 1.2033853996470818, - "grad_norm": 0.4240868389606476, - "learning_rate": 6.710027798361807e-06, - "loss": 0.312, - "step": 18413 - }, - { - "epoch": 1.203450754852624, - "grad_norm": 0.4330175817012787, - "learning_rate": 6.709699661065727e-06, - "loss": 0.3233, - "step": 18414 - }, - { - "epoch": 1.2035161100581662, - "grad_norm": 0.4198254942893982, - "learning_rate": 6.7093715154308516e-06, - "loss": 0.314, - "step": 18415 - }, - { - "epoch": 1.2035814652637082, - "grad_norm": 0.6476282477378845, - "learning_rate": 6.709043361458778e-06, - "loss": 0.3672, - "step": 18416 - }, - { - "epoch": 1.2036468204692503, - "grad_norm": 0.46822741627693176, - "learning_rate": 6.708715199151108e-06, - "loss": 0.3261, - "step": 18417 - }, - { - "epoch": 1.2037121756747924, - "grad_norm": 0.4359154999256134, - "learning_rate": 6.708387028509442e-06, - "loss": 0.367, - "step": 18418 - }, - { - "epoch": 1.2037775308803347, - "grad_norm": 0.46764418482780457, - "learning_rate": 6.708058849535382e-06, - "loss": 0.3664, - "step": 18419 - }, - { - "epoch": 1.2038428860858768, - "grad_norm": 0.4774992763996124, - "learning_rate": 6.707730662230525e-06, - "loss": 0.3881, - "step": 18420 - }, - { - "epoch": 1.2039082412914188, - "grad_norm": 0.4189451336860657, - "learning_rate": 6.707402466596475e-06, - "loss": 0.3069, - "step": 18421 - }, - { - "epoch": 1.203973596496961, - "grad_norm": 0.44668343663215637, - "learning_rate": 6.7070742626348314e-06, - "loss": 0.3215, - "step": 18422 - }, - { - "epoch": 1.2040389517025032, - "grad_norm": 0.4437035322189331, - "learning_rate": 6.706746050347195e-06, - "loss": 0.343, - "step": 18423 - }, - { - "epoch": 1.2041043069080453, - "grad_norm": 0.40654462575912476, - "learning_rate": 6.706417829735168e-06, - "loss": 0.3015, - "step": 18424 - }, - { - "epoch": 1.2041696621135873, - "grad_norm": 0.42640653252601624, - "learning_rate": 6.706089600800349e-06, - "loss": 0.3275, - "step": 18425 - }, - { - "epoch": 1.2042350173191294, - "grad_norm": 0.4433779716491699, - "learning_rate": 6.705761363544341e-06, - "loss": 0.325, - "step": 18426 - }, - { - "epoch": 1.2043003725246715, - "grad_norm": 0.4791772663593292, - "learning_rate": 6.705433117968744e-06, - "loss": 0.3717, - "step": 18427 - }, - { - "epoch": 1.2043657277302138, - "grad_norm": 0.4121028482913971, - "learning_rate": 6.705104864075158e-06, - "loss": 0.3148, - "step": 18428 - }, - { - "epoch": 1.2044310829357558, - "grad_norm": 0.45768189430236816, - "learning_rate": 6.7047766018651864e-06, - "loss": 0.3427, - "step": 18429 - }, - { - "epoch": 1.204496438141298, - "grad_norm": 0.4377157390117645, - "learning_rate": 6.704448331340427e-06, - "loss": 0.3247, - "step": 18430 - }, - { - "epoch": 1.20456179334684, - "grad_norm": 0.41707608103752136, - "learning_rate": 6.704120052502483e-06, - "loss": 0.33, - "step": 18431 - }, - { - "epoch": 1.2046271485523823, - "grad_norm": 0.442849725484848, - "learning_rate": 6.703791765352954e-06, - "loss": 0.3614, - "step": 18432 - }, - { - "epoch": 1.2046925037579244, - "grad_norm": 0.467950701713562, - "learning_rate": 6.703463469893443e-06, - "loss": 0.3797, - "step": 18433 - }, - { - "epoch": 1.2047578589634664, - "grad_norm": 0.4545117914676666, - "learning_rate": 6.703135166125552e-06, - "loss": 0.3355, - "step": 18434 - }, - { - "epoch": 1.2048232141690085, - "grad_norm": 0.4704337418079376, - "learning_rate": 6.70280685405088e-06, - "loss": 0.3615, - "step": 18435 - }, - { - "epoch": 1.2048885693745506, - "grad_norm": 0.4516729414463043, - "learning_rate": 6.702478533671028e-06, - "loss": 0.3353, - "step": 18436 - }, - { - "epoch": 1.2049539245800929, - "grad_norm": 0.468779593706131, - "learning_rate": 6.702150204987598e-06, - "loss": 0.3744, - "step": 18437 - }, - { - "epoch": 1.205019279785635, - "grad_norm": 0.4292548596858978, - "learning_rate": 6.701821868002194e-06, - "loss": 0.329, - "step": 18438 - }, - { - "epoch": 1.205084634991177, - "grad_norm": 0.46684563159942627, - "learning_rate": 6.701493522716414e-06, - "loss": 0.368, - "step": 18439 - }, - { - "epoch": 1.205149990196719, - "grad_norm": 0.44545918703079224, - "learning_rate": 6.7011651691318615e-06, - "loss": 0.3256, - "step": 18440 - }, - { - "epoch": 1.2052153454022614, - "grad_norm": 0.48918187618255615, - "learning_rate": 6.700836807250135e-06, - "loss": 0.3876, - "step": 18441 - }, - { - "epoch": 1.2052807006078035, - "grad_norm": 0.48126718401908875, - "learning_rate": 6.70050843707284e-06, - "loss": 0.3348, - "step": 18442 - }, - { - "epoch": 1.2053460558133455, - "grad_norm": 0.5070162415504456, - "learning_rate": 6.700180058601576e-06, - "loss": 0.393, - "step": 18443 - }, - { - "epoch": 1.2054114110188876, - "grad_norm": 0.44988730549812317, - "learning_rate": 6.699851671837945e-06, - "loss": 0.3106, - "step": 18444 - }, - { - "epoch": 1.2054767662244297, - "grad_norm": 0.44040974974632263, - "learning_rate": 6.699523276783548e-06, - "loss": 0.3293, - "step": 18445 - }, - { - "epoch": 1.205542121429972, - "grad_norm": 0.48614758253097534, - "learning_rate": 6.699194873439987e-06, - "loss": 0.4042, - "step": 18446 - }, - { - "epoch": 1.205607476635514, - "grad_norm": 0.43726658821105957, - "learning_rate": 6.698866461808865e-06, - "loss": 0.3358, - "step": 18447 - }, - { - "epoch": 1.2056728318410561, - "grad_norm": 0.495114803314209, - "learning_rate": 6.698538041891781e-06, - "loss": 0.3923, - "step": 18448 - }, - { - "epoch": 1.2057381870465982, - "grad_norm": 0.42943546175956726, - "learning_rate": 6.698209613690341e-06, - "loss": 0.3211, - "step": 18449 - }, - { - "epoch": 1.2058035422521405, - "grad_norm": 0.4599703550338745, - "learning_rate": 6.697881177206143e-06, - "loss": 0.3527, - "step": 18450 - }, - { - "epoch": 1.2058688974576826, - "grad_norm": 0.41305315494537354, - "learning_rate": 6.697552732440791e-06, - "loss": 0.3041, - "step": 18451 - }, - { - "epoch": 1.2059342526632246, - "grad_norm": 0.4313637316226959, - "learning_rate": 6.697224279395884e-06, - "loss": 0.3675, - "step": 18452 - }, - { - "epoch": 1.2059996078687667, - "grad_norm": 0.48010942339897156, - "learning_rate": 6.696895818073028e-06, - "loss": 0.3661, - "step": 18453 - }, - { - "epoch": 1.2060649630743088, - "grad_norm": 0.42056751251220703, - "learning_rate": 6.696567348473823e-06, - "loss": 0.3159, - "step": 18454 - }, - { - "epoch": 1.206130318279851, - "grad_norm": 0.4515971541404724, - "learning_rate": 6.696238870599871e-06, - "loss": 0.3728, - "step": 18455 - }, - { - "epoch": 1.2061956734853931, - "grad_norm": 0.42346087098121643, - "learning_rate": 6.695910384452775e-06, - "loss": 0.3413, - "step": 18456 - }, - { - "epoch": 1.2062610286909352, - "grad_norm": 0.45464929938316345, - "learning_rate": 6.695581890034136e-06, - "loss": 0.3215, - "step": 18457 - }, - { - "epoch": 1.2063263838964773, - "grad_norm": 0.4392387866973877, - "learning_rate": 6.695253387345557e-06, - "loss": 0.3225, - "step": 18458 - }, - { - "epoch": 1.2063917391020196, - "grad_norm": 0.4536644220352173, - "learning_rate": 6.69492487638864e-06, - "loss": 0.3767, - "step": 18459 - }, - { - "epoch": 1.2064570943075617, - "grad_norm": 0.43927276134490967, - "learning_rate": 6.694596357164986e-06, - "loss": 0.3384, - "step": 18460 - }, - { - "epoch": 1.2065224495131037, - "grad_norm": 0.5052178502082825, - "learning_rate": 6.6942678296762e-06, - "loss": 0.4275, - "step": 18461 - }, - { - "epoch": 1.2065878047186458, - "grad_norm": 0.4483092725276947, - "learning_rate": 6.693939293923883e-06, - "loss": 0.3586, - "step": 18462 - }, - { - "epoch": 1.2066531599241879, - "grad_norm": 0.4657086431980133, - "learning_rate": 6.693610749909636e-06, - "loss": 0.3051, - "step": 18463 - }, - { - "epoch": 1.2067185151297302, - "grad_norm": 0.4663565754890442, - "learning_rate": 6.693282197635063e-06, - "loss": 0.3384, - "step": 18464 - }, - { - "epoch": 1.2067838703352722, - "grad_norm": 0.4573187232017517, - "learning_rate": 6.692953637101766e-06, - "loss": 0.3564, - "step": 18465 - }, - { - "epoch": 1.2068492255408143, - "grad_norm": 0.42286911606788635, - "learning_rate": 6.692625068311349e-06, - "loss": 0.3141, - "step": 18466 - }, - { - "epoch": 1.2069145807463564, - "grad_norm": 0.4303493797779083, - "learning_rate": 6.692296491265412e-06, - "loss": 0.3104, - "step": 18467 - }, - { - "epoch": 1.2069799359518987, - "grad_norm": 0.450505793094635, - "learning_rate": 6.691967905965559e-06, - "loss": 0.3506, - "step": 18468 - }, - { - "epoch": 1.2070452911574407, - "grad_norm": 0.45550888776779175, - "learning_rate": 6.691639312413392e-06, - "loss": 0.3427, - "step": 18469 - }, - { - "epoch": 1.2071106463629828, - "grad_norm": 0.4892937242984772, - "learning_rate": 6.691310710610515e-06, - "loss": 0.377, - "step": 18470 - }, - { - "epoch": 1.207176001568525, - "grad_norm": 0.45304617285728455, - "learning_rate": 6.69098210055853e-06, - "loss": 0.3365, - "step": 18471 - }, - { - "epoch": 1.207241356774067, - "grad_norm": 0.4439897835254669, - "learning_rate": 6.690653482259038e-06, - "loss": 0.3241, - "step": 18472 - }, - { - "epoch": 1.2073067119796093, - "grad_norm": 0.44223734736442566, - "learning_rate": 6.690324855713643e-06, - "loss": 0.3118, - "step": 18473 - }, - { - "epoch": 1.2073720671851513, - "grad_norm": 0.4482691287994385, - "learning_rate": 6.689996220923949e-06, - "loss": 0.3248, - "step": 18474 - }, - { - "epoch": 1.2074374223906934, - "grad_norm": 0.4404681921005249, - "learning_rate": 6.689667577891557e-06, - "loss": 0.3376, - "step": 18475 - }, - { - "epoch": 1.2075027775962355, - "grad_norm": 0.4479188919067383, - "learning_rate": 6.689338926618073e-06, - "loss": 0.3424, - "step": 18476 - }, - { - "epoch": 1.2075681328017778, - "grad_norm": 0.424572229385376, - "learning_rate": 6.689010267105096e-06, - "loss": 0.3115, - "step": 18477 - }, - { - "epoch": 1.2076334880073198, - "grad_norm": 0.4301793575286865, - "learning_rate": 6.688681599354232e-06, - "loss": 0.3187, - "step": 18478 - }, - { - "epoch": 1.207698843212862, - "grad_norm": 0.5272138118743896, - "learning_rate": 6.688352923367081e-06, - "loss": 0.4015, - "step": 18479 - }, - { - "epoch": 1.207764198418404, - "grad_norm": 0.4448002576828003, - "learning_rate": 6.68802423914525e-06, - "loss": 0.3472, - "step": 18480 - }, - { - "epoch": 1.207829553623946, - "grad_norm": 0.43331530690193176, - "learning_rate": 6.687695546690338e-06, - "loss": 0.3182, - "step": 18481 - }, - { - "epoch": 1.2078949088294884, - "grad_norm": 0.49392253160476685, - "learning_rate": 6.687366846003952e-06, - "loss": 0.4234, - "step": 18482 - }, - { - "epoch": 1.2079602640350304, - "grad_norm": 0.4425753951072693, - "learning_rate": 6.687038137087693e-06, - "loss": 0.3392, - "step": 18483 - }, - { - "epoch": 1.2080256192405725, - "grad_norm": 0.44402262568473816, - "learning_rate": 6.686709419943163e-06, - "loss": 0.3223, - "step": 18484 - }, - { - "epoch": 1.2080909744461146, - "grad_norm": 0.4544754922389984, - "learning_rate": 6.686380694571968e-06, - "loss": 0.3324, - "step": 18485 - }, - { - "epoch": 1.2081563296516569, - "grad_norm": 0.4638741910457611, - "learning_rate": 6.68605196097571e-06, - "loss": 0.3662, - "step": 18486 - }, - { - "epoch": 1.208221684857199, - "grad_norm": 0.45993903279304504, - "learning_rate": 6.685723219155993e-06, - "loss": 0.3411, - "step": 18487 - }, - { - "epoch": 1.208287040062741, - "grad_norm": 0.4373694360256195, - "learning_rate": 6.685394469114419e-06, - "loss": 0.3029, - "step": 18488 - }, - { - "epoch": 1.208352395268283, - "grad_norm": 0.4439953565597534, - "learning_rate": 6.685065710852593e-06, - "loss": 0.3079, - "step": 18489 - }, - { - "epoch": 1.2084177504738252, - "grad_norm": 0.44726791977882385, - "learning_rate": 6.684736944372117e-06, - "loss": 0.3184, - "step": 18490 - }, - { - "epoch": 1.2084831056793675, - "grad_norm": 0.4733264446258545, - "learning_rate": 6.684408169674597e-06, - "loss": 0.3566, - "step": 18491 - }, - { - "epoch": 1.2085484608849095, - "grad_norm": 0.4719187915325165, - "learning_rate": 6.684079386761633e-06, - "loss": 0.3937, - "step": 18492 - }, - { - "epoch": 1.2086138160904516, - "grad_norm": 0.4557696580886841, - "learning_rate": 6.68375059563483e-06, - "loss": 0.3288, - "step": 18493 - }, - { - "epoch": 1.2086791712959937, - "grad_norm": 0.44552236795425415, - "learning_rate": 6.683421796295795e-06, - "loss": 0.3279, - "step": 18494 - }, - { - "epoch": 1.208744526501536, - "grad_norm": 0.45396193861961365, - "learning_rate": 6.683092988746125e-06, - "loss": 0.3198, - "step": 18495 - }, - { - "epoch": 1.208809881707078, - "grad_norm": 0.42722782492637634, - "learning_rate": 6.68276417298743e-06, - "loss": 0.3131, - "step": 18496 - }, - { - "epoch": 1.20887523691262, - "grad_norm": 0.48307183384895325, - "learning_rate": 6.6824353490213105e-06, - "loss": 0.4044, - "step": 18497 - }, - { - "epoch": 1.2089405921181622, - "grad_norm": 0.42966213822364807, - "learning_rate": 6.68210651684937e-06, - "loss": 0.3289, - "step": 18498 - }, - { - "epoch": 1.2090059473237043, - "grad_norm": 0.43984195590019226, - "learning_rate": 6.681777676473214e-06, - "loss": 0.345, - "step": 18499 - }, - { - "epoch": 1.2090713025292465, - "grad_norm": 0.49030372500419617, - "learning_rate": 6.681448827894446e-06, - "loss": 0.3603, - "step": 18500 - } - ], - "logging_steps": 1, - "max_steps": 45903, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 4.037016123604992e+19, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -} +version https://git-lfs.github.com/spec/v1 +oid sha256:a350d620a7b8b47bd5794fc71ef41903558c4cfdd587518d5f3e89a4823125cf +size 7698445