{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9990224828934506, "global_step": 1533, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002998043052837573, "loss": 9.8511, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0002998043052837573, "loss": 6.188, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00029960861056751466, "loss": 5.9422, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.000299412915851272, "loss": 4.4474, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.00029921722113502935, "loss": 3.7152, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00029902152641878666, "loss": 3.7503, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.000298825831702544, "loss": 3.443, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.00029863013698630135, "loss": 3.4837, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.00029843444227005867, "loss": 3.4489, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.000298238747553816, "loss": 3.3525, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00029804305283757335, "loss": 3.3712, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.00029784735812133067, "loss": 3.3725, "step": 12 }, { "epoch": 0.03, "learning_rate": 0.00029765166340508804, "loss": 3.3204, "step": 13 }, { "epoch": 0.03, "learning_rate": 0.00029745596868884536, "loss": 3.3487, "step": 14 }, { "epoch": 0.03, "learning_rate": 0.0002972602739726027, "loss": 3.3468, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.00029706457925636004, "loss": 3.3251, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.0002968688845401174, "loss": 3.3036, "step": 17 }, { "epoch": 0.04, "learning_rate": 0.00029667318982387473, "loss": 3.3156, "step": 18 }, { "epoch": 0.04, "learning_rate": 0.00029647749510763204, "loss": 3.3083, "step": 19 }, { "epoch": 0.04, "learning_rate": 0.0002962818003913894, "loss": 3.3087, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.00029608610567514673, "loss": 3.3111, "step": 21 }, { "epoch": 0.04, "learning_rate": 0.00029589041095890405, "loss": 3.3252, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.0002956947162426614, "loss": 3.3096, "step": 23 }, { "epoch": 0.05, "learning_rate": 0.00029549902152641873, "loss": 3.3336, "step": 24 }, { "epoch": 0.05, "learning_rate": 0.0002953033268101761, "loss": 3.3167, "step": 25 }, { "epoch": 0.05, "learning_rate": 0.0002951076320939334, "loss": 3.2763, "step": 26 }, { "epoch": 0.05, "learning_rate": 0.0002949119373776908, "loss": 3.3215, "step": 27 }, { "epoch": 0.05, "learning_rate": 0.0002947162426614481, "loss": 3.3226, "step": 28 }, { "epoch": 0.06, "learning_rate": 0.0002945205479452055, "loss": 3.3105, "step": 29 }, { "epoch": 0.06, "learning_rate": 0.0002943248532289628, "loss": 3.3021, "step": 30 }, { "epoch": 0.06, "learning_rate": 0.0002941291585127201, "loss": 3.2943, "step": 31 }, { "epoch": 0.06, "learning_rate": 0.0002939334637964774, "loss": 3.3157, "step": 32 }, { "epoch": 0.06, "learning_rate": 0.0002937377690802348, "loss": 3.2878, "step": 33 }, { "epoch": 0.07, "learning_rate": 0.0002935420743639921, "loss": 3.3081, "step": 34 }, { "epoch": 0.07, "learning_rate": 0.0002933463796477495, "loss": 3.2917, "step": 35 }, { "epoch": 0.07, "learning_rate": 0.0002931506849315068, "loss": 3.2955, "step": 36 }, { "epoch": 0.07, "learning_rate": 0.00029295499021526417, "loss": 3.3038, "step": 37 }, { "epoch": 0.07, "learning_rate": 0.0002927592954990215, "loss": 3.2788, "step": 38 }, { "epoch": 0.08, "learning_rate": 0.00029256360078277886, "loss": 3.3037, "step": 39 }, { "epoch": 0.08, "learning_rate": 0.00029236790606653617, "loss": 3.2877, "step": 40 }, { "epoch": 0.08, "learning_rate": 0.00029217221135029354, "loss": 3.2931, "step": 41 }, { "epoch": 0.08, "learning_rate": 0.00029197651663405086, "loss": 3.3072, "step": 42 }, { "epoch": 0.08, "learning_rate": 0.0002917808219178082, "loss": 3.3041, "step": 43 }, { "epoch": 0.09, "learning_rate": 0.0002915851272015655, "loss": 3.3031, "step": 44 }, { "epoch": 0.09, "learning_rate": 0.00029138943248532286, "loss": 3.3022, "step": 45 }, { "epoch": 0.09, "learning_rate": 0.0002911937377690802, "loss": 3.2758, "step": 46 }, { "epoch": 0.09, "learning_rate": 0.00029099804305283755, "loss": 3.3018, "step": 47 }, { "epoch": 0.09, "learning_rate": 0.00029080234833659486, "loss": 3.2911, "step": 48 }, { "epoch": 0.1, "learning_rate": 0.00029060665362035223, "loss": 3.3105, "step": 49 }, { "epoch": 0.1, "learning_rate": 0.00029041095890410955, "loss": 3.3076, "step": 50 }, { "epoch": 0.1, "learning_rate": 0.0002902152641878669, "loss": 3.2728, "step": 51 }, { "epoch": 0.1, "learning_rate": 0.00029001956947162424, "loss": 3.2984, "step": 52 }, { "epoch": 0.1, "learning_rate": 0.00028982387475538155, "loss": 3.2977, "step": 53 }, { "epoch": 0.11, "learning_rate": 0.0002896281800391389, "loss": 3.2912, "step": 54 }, { "epoch": 0.11, "learning_rate": 0.00028943248532289624, "loss": 3.2878, "step": 55 }, { "epoch": 0.11, "learning_rate": 0.00028923679060665356, "loss": 3.2831, "step": 56 }, { "epoch": 0.11, "learning_rate": 0.0002890410958904109, "loss": 3.291, "step": 57 }, { "epoch": 0.11, "learning_rate": 0.00028884540117416824, "loss": 3.3009, "step": 58 }, { "epoch": 0.12, "learning_rate": 0.0002886497064579256, "loss": 3.3219, "step": 59 }, { "epoch": 0.12, "learning_rate": 0.00028845401174168293, "loss": 3.2959, "step": 60 }, { "epoch": 0.12, "learning_rate": 0.0002882583170254403, "loss": 3.2987, "step": 61 }, { "epoch": 0.12, "learning_rate": 0.0002880626223091976, "loss": 3.3099, "step": 62 }, { "epoch": 0.12, "learning_rate": 0.000287866927592955, "loss": 3.3113, "step": 63 }, { "epoch": 0.13, "learning_rate": 0.0002876712328767123, "loss": 3.2871, "step": 64 }, { "epoch": 0.13, "learning_rate": 0.0002874755381604696, "loss": 3.3086, "step": 65 }, { "epoch": 0.13, "learning_rate": 0.000287279843444227, "loss": 3.2915, "step": 66 }, { "epoch": 0.13, "learning_rate": 0.0002870841487279843, "loss": 3.3023, "step": 67 }, { "epoch": 0.13, "learning_rate": 0.0002868884540117416, "loss": 3.2963, "step": 68 }, { "epoch": 0.13, "learning_rate": 0.000286692759295499, "loss": 3.3007, "step": 69 }, { "epoch": 0.14, "learning_rate": 0.0002864970645792563, "loss": 3.298, "step": 70 }, { "epoch": 0.14, "learning_rate": 0.0002863013698630137, "loss": 3.3059, "step": 71 }, { "epoch": 0.14, "learning_rate": 0.000286105675146771, "loss": 3.2833, "step": 72 }, { "epoch": 0.14, "learning_rate": 0.00028590998043052837, "loss": 3.2876, "step": 73 }, { "epoch": 0.14, "learning_rate": 0.0002857142857142857, "loss": 3.3049, "step": 74 }, { "epoch": 0.15, "learning_rate": 0.00028551859099804305, "loss": 3.2997, "step": 75 }, { "epoch": 0.15, "learning_rate": 0.00028532289628180037, "loss": 3.2983, "step": 76 }, { "epoch": 0.15, "learning_rate": 0.0002851272015655577, "loss": 3.2836, "step": 77 }, { "epoch": 0.15, "learning_rate": 0.00028493150684931505, "loss": 3.2893, "step": 78 }, { "epoch": 0.15, "learning_rate": 0.00028473581213307237, "loss": 3.2929, "step": 79 }, { "epoch": 0.16, "learning_rate": 0.0002845401174168297, "loss": 3.2806, "step": 80 }, { "epoch": 0.16, "learning_rate": 0.00028434442270058706, "loss": 3.2935, "step": 81 }, { "epoch": 0.16, "learning_rate": 0.0002841487279843444, "loss": 3.3072, "step": 82 }, { "epoch": 0.16, "learning_rate": 0.00028395303326810174, "loss": 3.3108, "step": 83 }, { "epoch": 0.16, "learning_rate": 0.00028375733855185906, "loss": 3.2932, "step": 84 }, { "epoch": 0.17, "learning_rate": 0.00028356164383561643, "loss": 3.2835, "step": 85 }, { "epoch": 0.17, "learning_rate": 0.00028336594911937375, "loss": 3.3251, "step": 86 }, { "epoch": 0.17, "learning_rate": 0.0002831702544031311, "loss": 3.2801, "step": 87 }, { "epoch": 0.17, "learning_rate": 0.00028297455968688843, "loss": 3.2857, "step": 88 }, { "epoch": 0.17, "learning_rate": 0.00028277886497064575, "loss": 3.3075, "step": 89 }, { "epoch": 0.18, "learning_rate": 0.00028258317025440307, "loss": 3.2917, "step": 90 }, { "epoch": 0.18, "learning_rate": 0.00028238747553816044, "loss": 3.2721, "step": 91 }, { "epoch": 0.18, "learning_rate": 0.00028219178082191775, "loss": 3.2839, "step": 92 }, { "epoch": 0.18, "learning_rate": 0.0002819960861056751, "loss": 3.3037, "step": 93 }, { "epoch": 0.18, "learning_rate": 0.00028180039138943244, "loss": 3.2903, "step": 94 }, { "epoch": 0.19, "learning_rate": 0.0002816046966731898, "loss": 3.283, "step": 95 }, { "epoch": 0.19, "learning_rate": 0.0002814090019569471, "loss": 3.2963, "step": 96 }, { "epoch": 0.19, "learning_rate": 0.0002812133072407045, "loss": 3.3007, "step": 97 }, { "epoch": 0.19, "learning_rate": 0.0002810176125244618, "loss": 3.2914, "step": 98 }, { "epoch": 0.19, "learning_rate": 0.00028082191780821913, "loss": 3.3052, "step": 99 }, { "epoch": 0.2, "learning_rate": 0.0002806262230919765, "loss": 3.308, "step": 100 }, { "epoch": 0.2, "learning_rate": 0.0002804305283757338, "loss": 3.3233, "step": 101 }, { "epoch": 0.2, "learning_rate": 0.00028023483365949113, "loss": 3.2813, "step": 102 }, { "epoch": 0.2, "learning_rate": 0.0002800391389432485, "loss": 3.2777, "step": 103 }, { "epoch": 0.2, "learning_rate": 0.0002798434442270058, "loss": 3.29, "step": 104 }, { "epoch": 0.21, "learning_rate": 0.0002796477495107632, "loss": 3.2916, "step": 105 }, { "epoch": 0.21, "learning_rate": 0.0002794520547945205, "loss": 3.2815, "step": 106 }, { "epoch": 0.21, "learning_rate": 0.0002792563600782779, "loss": 3.2872, "step": 107 }, { "epoch": 0.21, "learning_rate": 0.0002790606653620352, "loss": 3.3095, "step": 108 }, { "epoch": 0.21, "learning_rate": 0.00027886497064579256, "loss": 3.3155, "step": 109 }, { "epoch": 0.22, "learning_rate": 0.0002786692759295499, "loss": 3.3012, "step": 110 }, { "epoch": 0.22, "learning_rate": 0.0002784735812133072, "loss": 3.2934, "step": 111 }, { "epoch": 0.22, "learning_rate": 0.00027827788649706456, "loss": 3.3186, "step": 112 }, { "epoch": 0.22, "learning_rate": 0.0002780821917808219, "loss": 3.2776, "step": 113 }, { "epoch": 0.22, "learning_rate": 0.0002778864970645792, "loss": 3.3155, "step": 114 }, { "epoch": 0.22, "learning_rate": 0.00027769080234833657, "loss": 3.3051, "step": 115 }, { "epoch": 0.23, "learning_rate": 0.0002774951076320939, "loss": 3.3012, "step": 116 }, { "epoch": 0.23, "learning_rate": 0.00027729941291585125, "loss": 3.2965, "step": 117 }, { "epoch": 0.23, "learning_rate": 0.00027710371819960857, "loss": 3.31, "step": 118 }, { "epoch": 0.23, "learning_rate": 0.00027690802348336594, "loss": 3.2916, "step": 119 }, { "epoch": 0.23, "learning_rate": 0.00027671232876712326, "loss": 3.2975, "step": 120 }, { "epoch": 0.24, "learning_rate": 0.0002765166340508806, "loss": 3.2767, "step": 121 }, { "epoch": 0.24, "learning_rate": 0.00027632093933463794, "loss": 3.2795, "step": 122 }, { "epoch": 0.24, "learning_rate": 0.00027612524461839526, "loss": 3.2931, "step": 123 }, { "epoch": 0.24, "learning_rate": 0.00027592954990215263, "loss": 3.2788, "step": 124 }, { "epoch": 0.24, "learning_rate": 0.00027573385518590995, "loss": 3.2713, "step": 125 }, { "epoch": 0.25, "learning_rate": 0.00027553816046966726, "loss": 3.2781, "step": 126 }, { "epoch": 0.25, "learning_rate": 0.00027534246575342463, "loss": 3.2881, "step": 127 }, { "epoch": 0.25, "learning_rate": 0.00027514677103718195, "loss": 3.2577, "step": 128 }, { "epoch": 0.25, "learning_rate": 0.0002749510763209393, "loss": 3.2739, "step": 129 }, { "epoch": 0.25, "learning_rate": 0.00027475538160469663, "loss": 3.277, "step": 130 }, { "epoch": 0.26, "learning_rate": 0.000274559686888454, "loss": 3.2712, "step": 131 }, { "epoch": 0.26, "learning_rate": 0.0002743639921722113, "loss": 3.2791, "step": 132 }, { "epoch": 0.26, "learning_rate": 0.0002741682974559687, "loss": 3.2689, "step": 133 }, { "epoch": 0.26, "learning_rate": 0.000273972602739726, "loss": 3.266, "step": 134 }, { "epoch": 0.26, "learning_rate": 0.0002737769080234833, "loss": 3.2763, "step": 135 }, { "epoch": 0.27, "learning_rate": 0.00027358121330724064, "loss": 3.2733, "step": 136 }, { "epoch": 0.27, "learning_rate": 0.000273385518590998, "loss": 3.2581, "step": 137 }, { "epoch": 0.27, "learning_rate": 0.0002731898238747553, "loss": 3.2686, "step": 138 }, { "epoch": 0.27, "learning_rate": 0.0002729941291585127, "loss": 3.262, "step": 139 }, { "epoch": 0.27, "learning_rate": 0.00027279843444227, "loss": 3.2614, "step": 140 }, { "epoch": 0.28, "learning_rate": 0.0002726027397260274, "loss": 3.2648, "step": 141 }, { "epoch": 0.28, "learning_rate": 0.0002724070450097847, "loss": 3.264, "step": 142 }, { "epoch": 0.28, "learning_rate": 0.00027221135029354207, "loss": 3.2752, "step": 143 }, { "epoch": 0.28, "learning_rate": 0.0002720156555772994, "loss": 3.2459, "step": 144 }, { "epoch": 0.28, "learning_rate": 0.0002718199608610567, "loss": 3.2722, "step": 145 }, { "epoch": 0.29, "learning_rate": 0.0002716242661448141, "loss": 3.2584, "step": 146 }, { "epoch": 0.29, "learning_rate": 0.0002714285714285714, "loss": 3.2534, "step": 147 }, { "epoch": 0.29, "learning_rate": 0.0002712328767123287, "loss": 3.2674, "step": 148 }, { "epoch": 0.29, "learning_rate": 0.0002710371819960861, "loss": 3.2833, "step": 149 }, { "epoch": 0.29, "learning_rate": 0.0002708414872798434, "loss": 3.2681, "step": 150 }, { "epoch": 0.3, "learning_rate": 0.00027064579256360076, "loss": 3.2882, "step": 151 }, { "epoch": 0.3, "learning_rate": 0.0002704500978473581, "loss": 3.2487, "step": 152 }, { "epoch": 0.3, "learning_rate": 0.00027025440313111545, "loss": 3.2686, "step": 153 }, { "epoch": 0.3, "learning_rate": 0.00027005870841487277, "loss": 3.2715, "step": 154 }, { "epoch": 0.3, "learning_rate": 0.00026986301369863014, "loss": 3.3318, "step": 155 }, { "epoch": 0.3, "learning_rate": 0.00026966731898238745, "loss": 3.2894, "step": 156 }, { "epoch": 0.31, "learning_rate": 0.00026947162426614477, "loss": 3.2855, "step": 157 }, { "epoch": 0.31, "learning_rate": 0.00026927592954990214, "loss": 3.2884, "step": 158 }, { "epoch": 0.31, "learning_rate": 0.00026908023483365945, "loss": 3.2857, "step": 159 }, { "epoch": 0.31, "learning_rate": 0.00026888454011741677, "loss": 3.2585, "step": 160 }, { "epoch": 0.31, "learning_rate": 0.00026868884540117414, "loss": 3.2767, "step": 161 }, { "epoch": 0.32, "learning_rate": 0.00026849315068493146, "loss": 3.2683, "step": 162 }, { "epoch": 0.32, "learning_rate": 0.00026829745596868883, "loss": 3.278, "step": 163 }, { "epoch": 0.32, "learning_rate": 0.00026810176125244614, "loss": 3.2791, "step": 164 }, { "epoch": 0.32, "learning_rate": 0.0002679060665362035, "loss": 3.2493, "step": 165 }, { "epoch": 0.32, "learning_rate": 0.00026771037181996083, "loss": 3.2582, "step": 166 }, { "epoch": 0.33, "learning_rate": 0.0002675146771037182, "loss": 3.2674, "step": 167 }, { "epoch": 0.33, "learning_rate": 0.0002673189823874755, "loss": 3.2644, "step": 168 }, { "epoch": 0.33, "learning_rate": 0.00026712328767123283, "loss": 3.275, "step": 169 }, { "epoch": 0.33, "learning_rate": 0.0002669275929549902, "loss": 3.261, "step": 170 }, { "epoch": 0.33, "learning_rate": 0.0002667318982387475, "loss": 3.2672, "step": 171 }, { "epoch": 0.34, "learning_rate": 0.00026653620352250484, "loss": 3.2667, "step": 172 }, { "epoch": 0.34, "learning_rate": 0.0002663405088062622, "loss": 3.2546, "step": 173 }, { "epoch": 0.34, "learning_rate": 0.0002661448140900195, "loss": 3.2809, "step": 174 }, { "epoch": 0.34, "learning_rate": 0.0002659491193737769, "loss": 3.2547, "step": 175 }, { "epoch": 0.34, "learning_rate": 0.0002657534246575342, "loss": 3.267, "step": 176 }, { "epoch": 0.35, "learning_rate": 0.0002655577299412916, "loss": 3.2613, "step": 177 }, { "epoch": 0.35, "learning_rate": 0.0002653620352250489, "loss": 3.2727, "step": 178 }, { "epoch": 0.35, "learning_rate": 0.00026516634050880627, "loss": 3.2651, "step": 179 }, { "epoch": 0.35, "learning_rate": 0.0002649706457925636, "loss": 3.2562, "step": 180 }, { "epoch": 0.35, "learning_rate": 0.0002647749510763209, "loss": 3.2423, "step": 181 }, { "epoch": 0.36, "learning_rate": 0.0002645792563600782, "loss": 3.2899, "step": 182 }, { "epoch": 0.36, "learning_rate": 0.0002643835616438356, "loss": 3.2654, "step": 183 }, { "epoch": 0.36, "learning_rate": 0.0002641878669275929, "loss": 3.2669, "step": 184 }, { "epoch": 0.36, "learning_rate": 0.00026399217221135027, "loss": 3.2416, "step": 185 }, { "epoch": 0.36, "learning_rate": 0.0002637964774951076, "loss": 3.2704, "step": 186 }, { "epoch": 0.37, "learning_rate": 0.00026360078277886496, "loss": 3.2871, "step": 187 }, { "epoch": 0.37, "learning_rate": 0.0002634050880626223, "loss": 3.2606, "step": 188 }, { "epoch": 0.37, "learning_rate": 0.00026320939334637964, "loss": 3.2691, "step": 189 }, { "epoch": 0.37, "learning_rate": 0.00026301369863013696, "loss": 3.2654, "step": 190 }, { "epoch": 0.37, "learning_rate": 0.0002628180039138943, "loss": 3.2508, "step": 191 }, { "epoch": 0.38, "learning_rate": 0.00026262230919765165, "loss": 3.2768, "step": 192 }, { "epoch": 0.38, "learning_rate": 0.00026242661448140896, "loss": 3.2809, "step": 193 }, { "epoch": 0.38, "learning_rate": 0.0002622309197651663, "loss": 3.2623, "step": 194 }, { "epoch": 0.38, "learning_rate": 0.00026203522504892365, "loss": 3.2536, "step": 195 }, { "epoch": 0.38, "learning_rate": 0.00026183953033268097, "loss": 3.2727, "step": 196 }, { "epoch": 0.39, "learning_rate": 0.00026164383561643834, "loss": 3.2631, "step": 197 }, { "epoch": 0.39, "learning_rate": 0.00026144814090019565, "loss": 3.2742, "step": 198 }, { "epoch": 0.39, "learning_rate": 0.000261252446183953, "loss": 3.2635, "step": 199 }, { "epoch": 0.39, "learning_rate": 0.00026105675146771034, "loss": 3.2664, "step": 200 }, { "epoch": 0.39, "learning_rate": 0.0002608610567514677, "loss": 3.2646, "step": 201 }, { "epoch": 0.39, "learning_rate": 0.000260665362035225, "loss": 3.2695, "step": 202 }, { "epoch": 0.4, "learning_rate": 0.00026046966731898234, "loss": 3.2475, "step": 203 }, { "epoch": 0.4, "learning_rate": 0.0002602739726027397, "loss": 3.2742, "step": 204 }, { "epoch": 0.4, "learning_rate": 0.00026007827788649703, "loss": 3.2562, "step": 205 }, { "epoch": 0.4, "learning_rate": 0.00025988258317025435, "loss": 3.2738, "step": 206 }, { "epoch": 0.4, "learning_rate": 0.0002596868884540117, "loss": 3.241, "step": 207 }, { "epoch": 0.41, "learning_rate": 0.00025949119373776903, "loss": 3.2863, "step": 208 }, { "epoch": 0.41, "learning_rate": 0.0002592954990215264, "loss": 3.2579, "step": 209 }, { "epoch": 0.41, "learning_rate": 0.0002590998043052837, "loss": 3.2495, "step": 210 }, { "epoch": 0.41, "learning_rate": 0.0002589041095890411, "loss": 3.2385, "step": 211 }, { "epoch": 0.41, "learning_rate": 0.0002587084148727984, "loss": 3.2534, "step": 212 }, { "epoch": 0.42, "learning_rate": 0.0002585127201565558, "loss": 3.272, "step": 213 }, { "epoch": 0.42, "learning_rate": 0.0002583170254403131, "loss": 3.2582, "step": 214 }, { "epoch": 0.42, "learning_rate": 0.0002581213307240704, "loss": 3.2588, "step": 215 }, { "epoch": 0.42, "learning_rate": 0.0002579256360078278, "loss": 3.2652, "step": 216 }, { "epoch": 0.42, "learning_rate": 0.0002577299412915851, "loss": 3.2526, "step": 217 }, { "epoch": 0.43, "learning_rate": 0.0002575342465753424, "loss": 3.2574, "step": 218 }, { "epoch": 0.43, "learning_rate": 0.0002573385518590998, "loss": 3.2686, "step": 219 }, { "epoch": 0.43, "learning_rate": 0.0002571428571428571, "loss": 3.2537, "step": 220 }, { "epoch": 0.43, "learning_rate": 0.00025694716242661447, "loss": 3.2528, "step": 221 }, { "epoch": 0.43, "learning_rate": 0.0002567514677103718, "loss": 3.2461, "step": 222 }, { "epoch": 0.44, "learning_rate": 0.00025655577299412915, "loss": 3.2619, "step": 223 }, { "epoch": 0.44, "learning_rate": 0.00025636007827788647, "loss": 3.2562, "step": 224 }, { "epoch": 0.44, "learning_rate": 0.00025616438356164384, "loss": 3.2308, "step": 225 }, { "epoch": 0.44, "learning_rate": 0.00025596868884540116, "loss": 3.2509, "step": 226 }, { "epoch": 0.44, "learning_rate": 0.0002557729941291585, "loss": 3.245, "step": 227 }, { "epoch": 0.45, "learning_rate": 0.0002555772994129158, "loss": 3.2481, "step": 228 }, { "epoch": 0.45, "learning_rate": 0.00025538160469667316, "loss": 3.2704, "step": 229 }, { "epoch": 0.45, "learning_rate": 0.0002551859099804305, "loss": 3.2524, "step": 230 }, { "epoch": 0.45, "learning_rate": 0.00025499021526418785, "loss": 3.2566, "step": 231 }, { "epoch": 0.45, "learning_rate": 0.00025479452054794516, "loss": 3.2582, "step": 232 }, { "epoch": 0.46, "learning_rate": 0.00025459882583170253, "loss": 3.2517, "step": 233 }, { "epoch": 0.46, "learning_rate": 0.00025440313111545985, "loss": 3.2534, "step": 234 }, { "epoch": 0.46, "learning_rate": 0.0002542074363992172, "loss": 3.2599, "step": 235 }, { "epoch": 0.46, "learning_rate": 0.00025401174168297454, "loss": 3.2448, "step": 236 }, { "epoch": 0.46, "learning_rate": 0.00025381604696673185, "loss": 3.2577, "step": 237 }, { "epoch": 0.47, "learning_rate": 0.0002536203522504892, "loss": 3.2621, "step": 238 }, { "epoch": 0.47, "learning_rate": 0.00025342465753424654, "loss": 3.2591, "step": 239 }, { "epoch": 0.47, "learning_rate": 0.00025322896281800385, "loss": 3.2394, "step": 240 }, { "epoch": 0.47, "learning_rate": 0.0002530332681017612, "loss": 3.2334, "step": 241 }, { "epoch": 0.47, "learning_rate": 0.00025283757338551854, "loss": 3.2331, "step": 242 }, { "epoch": 0.48, "learning_rate": 0.0002526418786692759, "loss": 3.2516, "step": 243 }, { "epoch": 0.48, "learning_rate": 0.00025244618395303323, "loss": 3.2424, "step": 244 }, { "epoch": 0.48, "learning_rate": 0.0002522504892367906, "loss": 3.2745, "step": 245 }, { "epoch": 0.48, "learning_rate": 0.0002520547945205479, "loss": 3.2458, "step": 246 }, { "epoch": 0.48, "learning_rate": 0.0002518590998043053, "loss": 3.2672, "step": 247 }, { "epoch": 0.48, "learning_rate": 0.0002516634050880626, "loss": 3.2674, "step": 248 }, { "epoch": 0.49, "learning_rate": 0.0002514677103718199, "loss": 3.2412, "step": 249 }, { "epoch": 0.49, "learning_rate": 0.0002512720156555773, "loss": 3.2619, "step": 250 }, { "epoch": 0.49, "learning_rate": 0.0002510763209393346, "loss": 3.2398, "step": 251 }, { "epoch": 0.49, "learning_rate": 0.0002508806262230919, "loss": 3.3514, "step": 252 }, { "epoch": 0.49, "learning_rate": 0.0002506849315068493, "loss": 3.241, "step": 253 }, { "epoch": 0.5, "learning_rate": 0.0002504892367906066, "loss": 3.2831, "step": 254 }, { "epoch": 0.5, "learning_rate": 0.000250293542074364, "loss": 3.2722, "step": 255 }, { "epoch": 0.5, "learning_rate": 0.0002500978473581213, "loss": 3.2489, "step": 256 }, { "epoch": 0.5, "learning_rate": 0.00024990215264187866, "loss": 3.2612, "step": 257 }, { "epoch": 0.5, "learning_rate": 0.000249706457925636, "loss": 3.2677, "step": 258 }, { "epoch": 0.51, "learning_rate": 0.00024951076320939335, "loss": 3.2593, "step": 259 }, { "epoch": 0.51, "learning_rate": 0.00024931506849315067, "loss": 3.2472, "step": 260 }, { "epoch": 0.51, "learning_rate": 0.000249119373776908, "loss": 3.2613, "step": 261 }, { "epoch": 0.51, "learning_rate": 0.00024892367906066535, "loss": 3.2534, "step": 262 }, { "epoch": 0.51, "learning_rate": 0.00024872798434442267, "loss": 3.2596, "step": 263 }, { "epoch": 0.52, "learning_rate": 0.00024853228962818, "loss": 3.2533, "step": 264 }, { "epoch": 0.52, "learning_rate": 0.00024833659491193736, "loss": 3.2543, "step": 265 }, { "epoch": 0.52, "learning_rate": 0.00024814090019569467, "loss": 3.2532, "step": 266 }, { "epoch": 0.52, "learning_rate": 0.00024794520547945204, "loss": 3.2553, "step": 267 }, { "epoch": 0.52, "learning_rate": 0.00024774951076320936, "loss": 3.2615, "step": 268 }, { "epoch": 0.53, "learning_rate": 0.00024755381604696673, "loss": 3.2719, "step": 269 }, { "epoch": 0.53, "learning_rate": 0.00024735812133072404, "loss": 3.2568, "step": 270 }, { "epoch": 0.53, "learning_rate": 0.0002471624266144814, "loss": 3.2807, "step": 271 }, { "epoch": 0.53, "learning_rate": 0.00024696673189823873, "loss": 3.2515, "step": 272 }, { "epoch": 0.53, "learning_rate": 0.00024677103718199605, "loss": 3.252, "step": 273 }, { "epoch": 0.54, "learning_rate": 0.00024657534246575336, "loss": 3.2526, "step": 274 }, { "epoch": 0.54, "learning_rate": 0.00024637964774951073, "loss": 3.2635, "step": 275 }, { "epoch": 0.54, "learning_rate": 0.00024618395303326805, "loss": 3.2602, "step": 276 }, { "epoch": 0.54, "learning_rate": 0.0002459882583170254, "loss": 3.2795, "step": 277 }, { "epoch": 0.54, "learning_rate": 0.00024579256360078274, "loss": 3.2548, "step": 278 }, { "epoch": 0.55, "learning_rate": 0.0002455968688845401, "loss": 3.2573, "step": 279 }, { "epoch": 0.55, "learning_rate": 0.0002454011741682974, "loss": 3.2685, "step": 280 }, { "epoch": 0.55, "learning_rate": 0.0002452054794520548, "loss": 3.2407, "step": 281 }, { "epoch": 0.55, "learning_rate": 0.0002450097847358121, "loss": 3.2391, "step": 282 }, { "epoch": 0.55, "learning_rate": 0.0002448140900195694, "loss": 3.261, "step": 283 }, { "epoch": 0.56, "learning_rate": 0.0002446183953033268, "loss": 3.2429, "step": 284 }, { "epoch": 0.56, "learning_rate": 0.0002444227005870841, "loss": 3.2453, "step": 285 }, { "epoch": 0.56, "learning_rate": 0.00024422700587084143, "loss": 3.2567, "step": 286 }, { "epoch": 0.56, "learning_rate": 0.0002440313111545988, "loss": 3.2556, "step": 287 }, { "epoch": 0.56, "learning_rate": 0.00024383561643835614, "loss": 3.2319, "step": 288 }, { "epoch": 0.57, "learning_rate": 0.00024363992172211349, "loss": 3.2526, "step": 289 }, { "epoch": 0.57, "learning_rate": 0.0002434442270058708, "loss": 3.2126, "step": 290 }, { "epoch": 0.57, "learning_rate": 0.00024324853228962817, "loss": 3.2325, "step": 291 }, { "epoch": 0.57, "learning_rate": 0.0002430528375733855, "loss": 3.2457, "step": 292 }, { "epoch": 0.57, "learning_rate": 0.00024285714285714283, "loss": 3.2599, "step": 293 }, { "epoch": 0.57, "learning_rate": 0.00024266144814090015, "loss": 3.2293, "step": 294 }, { "epoch": 0.58, "learning_rate": 0.00024246575342465752, "loss": 3.2366, "step": 295 }, { "epoch": 0.58, "learning_rate": 0.00024227005870841483, "loss": 3.2429, "step": 296 }, { "epoch": 0.58, "learning_rate": 0.0002420743639921722, "loss": 3.2486, "step": 297 }, { "epoch": 0.58, "learning_rate": 0.00024187866927592952, "loss": 3.2709, "step": 298 }, { "epoch": 0.58, "learning_rate": 0.00024168297455968686, "loss": 3.2296, "step": 299 }, { "epoch": 0.59, "learning_rate": 0.00024148727984344418, "loss": 3.2423, "step": 300 }, { "epoch": 0.59, "learning_rate": 0.00024129158512720155, "loss": 3.2379, "step": 301 }, { "epoch": 0.59, "learning_rate": 0.00024109589041095887, "loss": 3.2257, "step": 302 }, { "epoch": 0.59, "learning_rate": 0.00024090019569471624, "loss": 3.2138, "step": 303 }, { "epoch": 0.59, "learning_rate": 0.00024070450097847355, "loss": 3.2305, "step": 304 }, { "epoch": 0.6, "learning_rate": 0.0002405088062622309, "loss": 3.2308, "step": 305 }, { "epoch": 0.6, "learning_rate": 0.0002403131115459882, "loss": 3.2321, "step": 306 }, { "epoch": 0.6, "learning_rate": 0.00024011741682974558, "loss": 3.235, "step": 307 }, { "epoch": 0.6, "learning_rate": 0.0002399217221135029, "loss": 3.2562, "step": 308 }, { "epoch": 0.6, "learning_rate": 0.00023972602739726024, "loss": 3.2388, "step": 309 }, { "epoch": 0.61, "learning_rate": 0.0002395303326810176, "loss": 3.2424, "step": 310 }, { "epoch": 0.61, "learning_rate": 0.00023933463796477493, "loss": 3.2445, "step": 311 }, { "epoch": 0.61, "learning_rate": 0.00023913894324853225, "loss": 3.2203, "step": 312 }, { "epoch": 0.61, "learning_rate": 0.00023894324853228962, "loss": 3.2501, "step": 313 }, { "epoch": 0.61, "learning_rate": 0.00023874755381604693, "loss": 3.2524, "step": 314 }, { "epoch": 0.62, "learning_rate": 0.00023855185909980428, "loss": 3.2162, "step": 315 }, { "epoch": 0.62, "learning_rate": 0.00023835616438356162, "loss": 3.2458, "step": 316 }, { "epoch": 0.62, "learning_rate": 0.00023816046966731896, "loss": 3.2323, "step": 317 }, { "epoch": 0.62, "learning_rate": 0.00023796477495107628, "loss": 3.2198, "step": 318 }, { "epoch": 0.62, "learning_rate": 0.00023776908023483365, "loss": 3.2279, "step": 319 }, { "epoch": 0.63, "learning_rate": 0.00023757338551859097, "loss": 3.2285, "step": 320 }, { "epoch": 0.63, "learning_rate": 0.0002373776908023483, "loss": 3.2181, "step": 321 }, { "epoch": 0.63, "learning_rate": 0.00023718199608610565, "loss": 3.2484, "step": 322 }, { "epoch": 0.63, "learning_rate": 0.000236986301369863, "loss": 3.2226, "step": 323 }, { "epoch": 0.63, "learning_rate": 0.0002367906066536203, "loss": 3.2449, "step": 324 }, { "epoch": 0.64, "learning_rate": 0.00023659491193737768, "loss": 3.214, "step": 325 }, { "epoch": 0.64, "learning_rate": 0.000236399217221135, "loss": 3.2303, "step": 326 }, { "epoch": 0.64, "learning_rate": 0.00023620352250489234, "loss": 3.2398, "step": 327 }, { "epoch": 0.64, "learning_rate": 0.00023600782778864968, "loss": 3.23, "step": 328 }, { "epoch": 0.64, "learning_rate": 0.00023581213307240703, "loss": 3.2279, "step": 329 }, { "epoch": 0.65, "learning_rate": 0.00023561643835616434, "loss": 3.2459, "step": 330 }, { "epoch": 0.65, "learning_rate": 0.00023542074363992171, "loss": 3.231, "step": 331 }, { "epoch": 0.65, "learning_rate": 0.00023522504892367903, "loss": 3.231, "step": 332 }, { "epoch": 0.65, "learning_rate": 0.00023502935420743637, "loss": 3.2127, "step": 333 }, { "epoch": 0.65, "learning_rate": 0.00023483365949119372, "loss": 3.2426, "step": 334 }, { "epoch": 0.65, "learning_rate": 0.00023463796477495106, "loss": 3.2341, "step": 335 }, { "epoch": 0.66, "learning_rate": 0.00023444227005870838, "loss": 3.2154, "step": 336 }, { "epoch": 0.66, "learning_rate": 0.00023424657534246575, "loss": 3.2525, "step": 337 }, { "epoch": 0.66, "learning_rate": 0.00023405088062622306, "loss": 3.226, "step": 338 }, { "epoch": 0.66, "learning_rate": 0.0002338551859099804, "loss": 3.2392, "step": 339 }, { "epoch": 0.66, "learning_rate": 0.00023365949119373772, "loss": 3.2265, "step": 340 }, { "epoch": 0.67, "learning_rate": 0.0002334637964774951, "loss": 3.2227, "step": 341 }, { "epoch": 0.67, "learning_rate": 0.0002332681017612524, "loss": 3.2645, "step": 342 }, { "epoch": 0.67, "learning_rate": 0.00023307240704500978, "loss": 3.2254, "step": 343 }, { "epoch": 0.67, "learning_rate": 0.0002328767123287671, "loss": 3.2208, "step": 344 }, { "epoch": 0.67, "learning_rate": 0.00023268101761252444, "loss": 3.2552, "step": 345 }, { "epoch": 0.68, "learning_rate": 0.00023248532289628176, "loss": 3.2221, "step": 346 }, { "epoch": 0.68, "learning_rate": 0.00023228962818003913, "loss": 3.2311, "step": 347 }, { "epoch": 0.68, "learning_rate": 0.00023209393346379644, "loss": 3.2324, "step": 348 }, { "epoch": 0.68, "learning_rate": 0.0002318982387475538, "loss": 3.231, "step": 349 }, { "epoch": 0.68, "learning_rate": 0.00023170254403131113, "loss": 3.2104, "step": 350 }, { "epoch": 0.69, "learning_rate": 0.00023150684931506847, "loss": 3.2188, "step": 351 }, { "epoch": 0.69, "learning_rate": 0.0002313111545988258, "loss": 3.2336, "step": 352 }, { "epoch": 0.69, "learning_rate": 0.00023111545988258316, "loss": 3.2231, "step": 353 }, { "epoch": 0.69, "learning_rate": 0.00023091976516634047, "loss": 3.2291, "step": 354 }, { "epoch": 0.69, "learning_rate": 0.00023072407045009782, "loss": 3.2345, "step": 355 }, { "epoch": 0.7, "learning_rate": 0.00023052837573385516, "loss": 3.2295, "step": 356 }, { "epoch": 0.7, "learning_rate": 0.0002303326810176125, "loss": 3.2328, "step": 357 }, { "epoch": 0.7, "learning_rate": 0.00023013698630136982, "loss": 3.2326, "step": 358 }, { "epoch": 0.7, "learning_rate": 0.0002299412915851272, "loss": 3.2417, "step": 359 }, { "epoch": 0.7, "learning_rate": 0.0002297455968688845, "loss": 3.2101, "step": 360 }, { "epoch": 0.71, "learning_rate": 0.00022954990215264185, "loss": 3.2424, "step": 361 }, { "epoch": 0.71, "learning_rate": 0.0002293542074363992, "loss": 3.2306, "step": 362 }, { "epoch": 0.71, "learning_rate": 0.00022915851272015654, "loss": 3.2162, "step": 363 }, { "epoch": 0.71, "learning_rate": 0.00022896281800391385, "loss": 3.2224, "step": 364 }, { "epoch": 0.71, "learning_rate": 0.00022876712328767122, "loss": 3.2394, "step": 365 }, { "epoch": 0.72, "learning_rate": 0.00022857142857142854, "loss": 3.2171, "step": 366 }, { "epoch": 0.72, "learning_rate": 0.00022837573385518588, "loss": 3.231, "step": 367 }, { "epoch": 0.72, "learning_rate": 0.00022818003913894323, "loss": 3.2285, "step": 368 }, { "epoch": 0.72, "learning_rate": 0.00022798434442270057, "loss": 3.2165, "step": 369 }, { "epoch": 0.72, "learning_rate": 0.00022778864970645789, "loss": 3.2229, "step": 370 }, { "epoch": 0.73, "learning_rate": 0.00022759295499021526, "loss": 3.2362, "step": 371 }, { "epoch": 0.73, "learning_rate": 0.00022739726027397257, "loss": 3.2053, "step": 372 }, { "epoch": 0.73, "learning_rate": 0.00022720156555772992, "loss": 3.225, "step": 373 }, { "epoch": 0.73, "learning_rate": 0.00022700587084148726, "loss": 3.2213, "step": 374 }, { "epoch": 0.73, "learning_rate": 0.0002268101761252446, "loss": 3.2369, "step": 375 }, { "epoch": 0.74, "learning_rate": 0.00022661448140900192, "loss": 3.2185, "step": 376 }, { "epoch": 0.74, "learning_rate": 0.0002264187866927593, "loss": 3.2339, "step": 377 }, { "epoch": 0.74, "learning_rate": 0.0002262230919765166, "loss": 3.2277, "step": 378 }, { "epoch": 0.74, "learning_rate": 0.00022602739726027395, "loss": 3.2195, "step": 379 }, { "epoch": 0.74, "learning_rate": 0.0002258317025440313, "loss": 3.2055, "step": 380 }, { "epoch": 0.74, "learning_rate": 0.00022563600782778863, "loss": 3.2305, "step": 381 }, { "epoch": 0.75, "learning_rate": 0.00022544031311154595, "loss": 3.2258, "step": 382 }, { "epoch": 0.75, "learning_rate": 0.00022524461839530332, "loss": 3.2236, "step": 383 }, { "epoch": 0.75, "learning_rate": 0.00022504892367906064, "loss": 3.2307, "step": 384 }, { "epoch": 0.75, "learning_rate": 0.00022485322896281798, "loss": 3.227, "step": 385 }, { "epoch": 0.75, "learning_rate": 0.0002246575342465753, "loss": 3.2127, "step": 386 }, { "epoch": 0.76, "learning_rate": 0.00022446183953033267, "loss": 3.2243, "step": 387 }, { "epoch": 0.76, "learning_rate": 0.00022426614481408998, "loss": 3.2343, "step": 388 }, { "epoch": 0.76, "learning_rate": 0.00022407045009784735, "loss": 3.2228, "step": 389 }, { "epoch": 0.76, "learning_rate": 0.00022387475538160467, "loss": 3.214, "step": 390 }, { "epoch": 0.76, "learning_rate": 0.00022367906066536201, "loss": 3.2134, "step": 391 }, { "epoch": 0.77, "learning_rate": 0.00022348336594911933, "loss": 3.2233, "step": 392 }, { "epoch": 0.77, "learning_rate": 0.0002232876712328767, "loss": 3.2006, "step": 393 }, { "epoch": 0.77, "learning_rate": 0.00022309197651663402, "loss": 3.2294, "step": 394 }, { "epoch": 0.77, "learning_rate": 0.0002228962818003914, "loss": 3.2246, "step": 395 }, { "epoch": 0.77, "learning_rate": 0.0002227005870841487, "loss": 3.2249, "step": 396 }, { "epoch": 0.78, "learning_rate": 0.00022250489236790605, "loss": 3.2249, "step": 397 }, { "epoch": 0.78, "learning_rate": 0.00022230919765166336, "loss": 3.2321, "step": 398 }, { "epoch": 0.78, "learning_rate": 0.00022211350293542073, "loss": 3.2351, "step": 399 }, { "epoch": 0.78, "learning_rate": 0.00022191780821917805, "loss": 3.2277, "step": 400 }, { "epoch": 0.78, "learning_rate": 0.0002217221135029354, "loss": 3.2224, "step": 401 }, { "epoch": 0.79, "learning_rate": 0.00022152641878669274, "loss": 3.2254, "step": 402 }, { "epoch": 0.79, "learning_rate": 0.00022133072407045008, "loss": 3.2131, "step": 403 }, { "epoch": 0.79, "learning_rate": 0.0002211350293542074, "loss": 3.2256, "step": 404 }, { "epoch": 0.79, "learning_rate": 0.00022093933463796477, "loss": 3.2157, "step": 405 }, { "epoch": 0.79, "learning_rate": 0.00022074363992172208, "loss": 3.2321, "step": 406 }, { "epoch": 0.8, "learning_rate": 0.00022054794520547942, "loss": 3.2162, "step": 407 }, { "epoch": 0.8, "learning_rate": 0.00022035225048923677, "loss": 3.2012, "step": 408 }, { "epoch": 0.8, "learning_rate": 0.0002201565557729941, "loss": 3.2206, "step": 409 }, { "epoch": 0.8, "learning_rate": 0.00021996086105675143, "loss": 3.2201, "step": 410 }, { "epoch": 0.8, "learning_rate": 0.0002197651663405088, "loss": 3.2085, "step": 411 }, { "epoch": 0.81, "learning_rate": 0.00021956947162426611, "loss": 3.2232, "step": 412 }, { "epoch": 0.81, "learning_rate": 0.00021937377690802346, "loss": 3.2345, "step": 413 }, { "epoch": 0.81, "learning_rate": 0.0002191780821917808, "loss": 3.2241, "step": 414 }, { "epoch": 0.81, "learning_rate": 0.00021898238747553814, "loss": 3.2347, "step": 415 }, { "epoch": 0.81, "learning_rate": 0.00021878669275929546, "loss": 3.2207, "step": 416 }, { "epoch": 0.82, "learning_rate": 0.00021859099804305283, "loss": 3.2173, "step": 417 }, { "epoch": 0.82, "learning_rate": 0.00021839530332681015, "loss": 3.2202, "step": 418 }, { "epoch": 0.82, "learning_rate": 0.0002181996086105675, "loss": 3.218, "step": 419 }, { "epoch": 0.82, "learning_rate": 0.00021800391389432483, "loss": 3.211, "step": 420 }, { "epoch": 0.82, "learning_rate": 0.00021780821917808218, "loss": 3.2094, "step": 421 }, { "epoch": 0.83, "learning_rate": 0.0002176125244618395, "loss": 3.2192, "step": 422 }, { "epoch": 0.83, "learning_rate": 0.00021741682974559686, "loss": 3.2214, "step": 423 }, { "epoch": 0.83, "learning_rate": 0.00021722113502935418, "loss": 3.2346, "step": 424 }, { "epoch": 0.83, "learning_rate": 0.00021702544031311152, "loss": 3.2299, "step": 425 }, { "epoch": 0.83, "learning_rate": 0.00021682974559686887, "loss": 3.2275, "step": 426 }, { "epoch": 0.83, "learning_rate": 0.0002166340508806262, "loss": 3.2017, "step": 427 }, { "epoch": 0.84, "learning_rate": 0.00021643835616438353, "loss": 3.2083, "step": 428 }, { "epoch": 0.84, "learning_rate": 0.0002162426614481409, "loss": 3.2247, "step": 429 }, { "epoch": 0.84, "learning_rate": 0.0002160469667318982, "loss": 3.211, "step": 430 }, { "epoch": 0.84, "learning_rate": 0.00021585127201565556, "loss": 3.2229, "step": 431 }, { "epoch": 0.84, "learning_rate": 0.00021565557729941287, "loss": 3.197, "step": 432 }, { "epoch": 0.85, "learning_rate": 0.00021545988258317024, "loss": 3.2035, "step": 433 }, { "epoch": 0.85, "learning_rate": 0.00021526418786692756, "loss": 3.2118, "step": 434 }, { "epoch": 0.85, "learning_rate": 0.00021506849315068493, "loss": 3.2077, "step": 435 }, { "epoch": 0.85, "learning_rate": 0.00021487279843444224, "loss": 3.215, "step": 436 }, { "epoch": 0.85, "learning_rate": 0.0002146771037181996, "loss": 3.2145, "step": 437 }, { "epoch": 0.86, "learning_rate": 0.0002144814090019569, "loss": 3.1987, "step": 438 }, { "epoch": 0.86, "learning_rate": 0.00021428571428571427, "loss": 3.2123, "step": 439 }, { "epoch": 0.86, "learning_rate": 0.0002140900195694716, "loss": 3.1901, "step": 440 }, { "epoch": 0.86, "learning_rate": 0.00021389432485322896, "loss": 3.1919, "step": 441 }, { "epoch": 0.86, "learning_rate": 0.00021369863013698628, "loss": 3.2068, "step": 442 }, { "epoch": 0.87, "learning_rate": 0.00021350293542074362, "loss": 3.2125, "step": 443 }, { "epoch": 0.87, "learning_rate": 0.00021330724070450094, "loss": 3.1952, "step": 444 }, { "epoch": 0.87, "learning_rate": 0.0002131115459882583, "loss": 3.2172, "step": 445 }, { "epoch": 0.87, "learning_rate": 0.00021291585127201562, "loss": 3.2029, "step": 446 }, { "epoch": 0.87, "learning_rate": 0.00021272015655577297, "loss": 3.197, "step": 447 }, { "epoch": 0.88, "learning_rate": 0.0002125244618395303, "loss": 3.1921, "step": 448 }, { "epoch": 0.88, "learning_rate": 0.00021232876712328765, "loss": 3.1898, "step": 449 }, { "epoch": 0.88, "learning_rate": 0.00021213307240704497, "loss": 3.201, "step": 450 }, { "epoch": 0.88, "learning_rate": 0.00021193737769080234, "loss": 3.2142, "step": 451 }, { "epoch": 0.88, "learning_rate": 0.00021174168297455966, "loss": 3.2015, "step": 452 }, { "epoch": 0.89, "learning_rate": 0.000211545988258317, "loss": 3.1952, "step": 453 }, { "epoch": 0.89, "learning_rate": 0.00021135029354207434, "loss": 3.2191, "step": 454 }, { "epoch": 0.89, "learning_rate": 0.00021115459882583169, "loss": 3.2111, "step": 455 }, { "epoch": 0.89, "learning_rate": 0.000210958904109589, "loss": 3.1956, "step": 456 }, { "epoch": 0.89, "learning_rate": 0.00021076320939334637, "loss": 3.1995, "step": 457 }, { "epoch": 0.9, "learning_rate": 0.0002105675146771037, "loss": 3.1866, "step": 458 }, { "epoch": 0.9, "learning_rate": 0.00021037181996086103, "loss": 3.2022, "step": 459 }, { "epoch": 0.9, "learning_rate": 0.00021017612524461838, "loss": 3.197, "step": 460 }, { "epoch": 0.9, "learning_rate": 0.00020998043052837572, "loss": 3.1843, "step": 461 }, { "epoch": 0.9, "learning_rate": 0.00020978473581213303, "loss": 3.202, "step": 462 }, { "epoch": 0.91, "learning_rate": 0.0002095890410958904, "loss": 3.1863, "step": 463 }, { "epoch": 0.91, "learning_rate": 0.00020939334637964772, "loss": 3.2066, "step": 464 }, { "epoch": 0.91, "learning_rate": 0.00020919765166340506, "loss": 3.217, "step": 465 }, { "epoch": 0.91, "learning_rate": 0.0002090019569471624, "loss": 3.204, "step": 466 }, { "epoch": 0.91, "learning_rate": 0.00020880626223091975, "loss": 3.2038, "step": 467 }, { "epoch": 0.91, "learning_rate": 0.00020861056751467707, "loss": 3.2124, "step": 468 }, { "epoch": 0.92, "learning_rate": 0.00020841487279843444, "loss": 3.1845, "step": 469 }, { "epoch": 0.92, "learning_rate": 0.00020821917808219175, "loss": 3.2088, "step": 470 }, { "epoch": 0.92, "learning_rate": 0.0002080234833659491, "loss": 3.2019, "step": 471 }, { "epoch": 0.92, "learning_rate": 0.00020782778864970644, "loss": 3.1932, "step": 472 }, { "epoch": 0.92, "learning_rate": 0.00020763209393346378, "loss": 3.1972, "step": 473 }, { "epoch": 0.93, "learning_rate": 0.0002074363992172211, "loss": 3.1964, "step": 474 }, { "epoch": 0.93, "learning_rate": 0.00020724070450097847, "loss": 3.197, "step": 475 }, { "epoch": 0.93, "learning_rate": 0.0002070450097847358, "loss": 3.1952, "step": 476 }, { "epoch": 0.93, "learning_rate": 0.00020684931506849313, "loss": 3.1906, "step": 477 }, { "epoch": 0.93, "learning_rate": 0.00020665362035225045, "loss": 3.1882, "step": 478 }, { "epoch": 0.94, "learning_rate": 0.00020645792563600782, "loss": 3.2031, "step": 479 }, { "epoch": 0.94, "learning_rate": 0.00020626223091976513, "loss": 3.1884, "step": 480 }, { "epoch": 0.94, "learning_rate": 0.0002060665362035225, "loss": 3.1871, "step": 481 }, { "epoch": 0.94, "learning_rate": 0.00020587084148727982, "loss": 3.1905, "step": 482 }, { "epoch": 0.94, "learning_rate": 0.00020567514677103716, "loss": 3.1984, "step": 483 }, { "epoch": 0.95, "learning_rate": 0.00020547945205479448, "loss": 3.1861, "step": 484 }, { "epoch": 0.95, "learning_rate": 0.00020528375733855185, "loss": 3.2009, "step": 485 }, { "epoch": 0.95, "learning_rate": 0.00020508806262230917, "loss": 3.1891, "step": 486 }, { "epoch": 0.95, "learning_rate": 0.00020489236790606654, "loss": 3.1748, "step": 487 }, { "epoch": 0.95, "learning_rate": 0.00020469667318982385, "loss": 3.2082, "step": 488 }, { "epoch": 0.96, "learning_rate": 0.0002045009784735812, "loss": 3.2025, "step": 489 }, { "epoch": 0.96, "learning_rate": 0.0002043052837573385, "loss": 3.186, "step": 490 }, { "epoch": 0.96, "learning_rate": 0.00020410958904109588, "loss": 3.1866, "step": 491 }, { "epoch": 0.96, "learning_rate": 0.0002039138943248532, "loss": 3.1825, "step": 492 }, { "epoch": 0.96, "learning_rate": 0.00020371819960861057, "loss": 3.1905, "step": 493 }, { "epoch": 0.97, "learning_rate": 0.00020352250489236788, "loss": 3.1809, "step": 494 }, { "epoch": 0.97, "learning_rate": 0.00020332681017612523, "loss": 3.1974, "step": 495 }, { "epoch": 0.97, "learning_rate": 0.00020313111545988254, "loss": 3.2088, "step": 496 }, { "epoch": 0.97, "learning_rate": 0.00020293542074363991, "loss": 3.1773, "step": 497 }, { "epoch": 0.97, "learning_rate": 0.00020273972602739723, "loss": 3.2, "step": 498 }, { "epoch": 0.98, "learning_rate": 0.00020254403131115457, "loss": 3.1715, "step": 499 }, { "epoch": 0.98, "learning_rate": 0.00020234833659491192, "loss": 3.1883, "step": 500 }, { "epoch": 0.98, "learning_rate": 0.00020215264187866926, "loss": 3.1853, "step": 501 }, { "epoch": 0.98, "learning_rate": 0.00020195694716242658, "loss": 3.1999, "step": 502 }, { "epoch": 0.98, "learning_rate": 0.00020176125244618395, "loss": 3.1896, "step": 503 }, { "epoch": 0.99, "learning_rate": 0.00020156555772994126, "loss": 3.1796, "step": 504 }, { "epoch": 0.99, "learning_rate": 0.0002013698630136986, "loss": 3.1942, "step": 505 }, { "epoch": 0.99, "learning_rate": 0.00020117416829745595, "loss": 3.2231, "step": 506 }, { "epoch": 0.99, "learning_rate": 0.0002009784735812133, "loss": 3.1886, "step": 507 }, { "epoch": 0.99, "learning_rate": 0.0002007827788649706, "loss": 3.1945, "step": 508 }, { "epoch": 1.0, "learning_rate": 0.00020058708414872798, "loss": 3.1898, "step": 509 }, { "epoch": 1.0, "learning_rate": 0.0002003913894324853, "loss": 3.1947, "step": 510 }, { "epoch": 1.0, "learning_rate": 0.00020019569471624264, "loss": 3.1883, "step": 511 }, { "epoch": 1.0, "eval_loss": 3.519240379333496, "eval_runtime": 508.4632, "eval_samples_per_second": 31.135, "eval_steps_per_second": 0.071, "eval_wer": 1.0, "step": 511 }, { "epoch": 1.0, "learning_rate": 0.00019999999999999998, "loss": 4.7961, "step": 512 }, { "epoch": 1.0, "learning_rate": 0.00019980430528375733, "loss": 3.1885, "step": 513 }, { "epoch": 1.01, "learning_rate": 0.00019960861056751464, "loss": 3.1757, "step": 514 }, { "epoch": 1.01, "learning_rate": 0.000199412915851272, "loss": 3.202, "step": 515 }, { "epoch": 1.01, "learning_rate": 0.00019921722113502933, "loss": 3.1831, "step": 516 }, { "epoch": 1.01, "learning_rate": 0.00019902152641878667, "loss": 3.201, "step": 517 }, { "epoch": 1.01, "learning_rate": 0.00019882583170254402, "loss": 3.175, "step": 518 }, { "epoch": 1.02, "learning_rate": 0.00019863013698630136, "loss": 3.1899, "step": 519 }, { "epoch": 1.02, "learning_rate": 0.00019843444227005867, "loss": 3.1941, "step": 520 }, { "epoch": 1.02, "learning_rate": 0.00019823874755381604, "loss": 3.1991, "step": 521 }, { "epoch": 1.02, "learning_rate": 0.00019804305283757336, "loss": 3.1991, "step": 522 }, { "epoch": 1.02, "learning_rate": 0.0001978473581213307, "loss": 3.1906, "step": 523 }, { "epoch": 1.03, "learning_rate": 0.00019765166340508805, "loss": 3.1671, "step": 524 }, { "epoch": 1.03, "learning_rate": 0.0001974559686888454, "loss": 3.1912, "step": 525 }, { "epoch": 1.03, "learning_rate": 0.0001972602739726027, "loss": 3.1836, "step": 526 }, { "epoch": 1.03, "learning_rate": 0.00019706457925636008, "loss": 3.1905, "step": 527 }, { "epoch": 1.03, "learning_rate": 0.0001968688845401174, "loss": 3.1853, "step": 528 }, { "epoch": 1.04, "learning_rate": 0.00019667318982387474, "loss": 3.2102, "step": 529 }, { "epoch": 1.04, "learning_rate": 0.00019647749510763205, "loss": 3.1815, "step": 530 }, { "epoch": 1.04, "learning_rate": 0.00019628180039138942, "loss": 3.1788, "step": 531 }, { "epoch": 1.04, "learning_rate": 0.00019608610567514674, "loss": 3.1824, "step": 532 }, { "epoch": 1.04, "learning_rate": 0.0001958904109589041, "loss": 3.1827, "step": 533 }, { "epoch": 1.04, "learning_rate": 0.00019569471624266143, "loss": 3.1924, "step": 534 }, { "epoch": 1.05, "learning_rate": 0.00019549902152641877, "loss": 3.177, "step": 535 }, { "epoch": 1.05, "learning_rate": 0.00019530332681017609, "loss": 3.1616, "step": 536 }, { "epoch": 1.05, "learning_rate": 0.00019510763209393346, "loss": 3.1888, "step": 537 }, { "epoch": 1.05, "learning_rate": 0.00019491193737769077, "loss": 3.1857, "step": 538 }, { "epoch": 1.05, "learning_rate": 0.00019471624266144814, "loss": 3.1935, "step": 539 }, { "epoch": 1.06, "learning_rate": 0.00019452054794520546, "loss": 3.1786, "step": 540 }, { "epoch": 1.06, "learning_rate": 0.0001943248532289628, "loss": 3.1816, "step": 541 }, { "epoch": 1.06, "learning_rate": 0.00019412915851272012, "loss": 3.1987, "step": 542 }, { "epoch": 1.06, "learning_rate": 0.0001939334637964775, "loss": 3.1934, "step": 543 }, { "epoch": 1.06, "learning_rate": 0.0001937377690802348, "loss": 3.1769, "step": 544 }, { "epoch": 1.07, "learning_rate": 0.00019354207436399215, "loss": 3.1844, "step": 545 }, { "epoch": 1.07, "learning_rate": 0.0001933463796477495, "loss": 3.2072, "step": 546 }, { "epoch": 1.07, "learning_rate": 0.00019315068493150683, "loss": 3.2114, "step": 547 }, { "epoch": 1.07, "learning_rate": 0.00019295499021526415, "loss": 3.2147, "step": 548 }, { "epoch": 1.07, "learning_rate": 0.00019275929549902152, "loss": 3.2072, "step": 549 }, { "epoch": 1.08, "learning_rate": 0.00019256360078277884, "loss": 3.179, "step": 550 }, { "epoch": 1.08, "learning_rate": 0.00019236790606653618, "loss": 3.1849, "step": 551 }, { "epoch": 1.08, "learning_rate": 0.00019217221135029352, "loss": 3.1866, "step": 552 }, { "epoch": 1.08, "learning_rate": 0.00019197651663405087, "loss": 3.1894, "step": 553 }, { "epoch": 1.08, "learning_rate": 0.00019178082191780818, "loss": 3.1924, "step": 554 }, { "epoch": 1.09, "learning_rate": 0.00019158512720156555, "loss": 3.1798, "step": 555 }, { "epoch": 1.09, "learning_rate": 0.00019138943248532287, "loss": 3.1698, "step": 556 }, { "epoch": 1.09, "learning_rate": 0.00019119373776908021, "loss": 3.1712, "step": 557 }, { "epoch": 1.09, "learning_rate": 0.00019099804305283756, "loss": 3.1818, "step": 558 }, { "epoch": 1.09, "learning_rate": 0.0001908023483365949, "loss": 3.178, "step": 559 }, { "epoch": 1.1, "learning_rate": 0.00019060665362035222, "loss": 3.1869, "step": 560 }, { "epoch": 1.1, "learning_rate": 0.0001904109589041096, "loss": 3.1863, "step": 561 }, { "epoch": 1.1, "learning_rate": 0.0001902152641878669, "loss": 3.1727, "step": 562 }, { "epoch": 1.1, "learning_rate": 0.00019001956947162425, "loss": 3.1704, "step": 563 }, { "epoch": 1.1, "learning_rate": 0.0001898238747553816, "loss": 3.1992, "step": 564 }, { "epoch": 1.11, "learning_rate": 0.00018962818003913893, "loss": 3.1775, "step": 565 }, { "epoch": 1.11, "learning_rate": 0.00018943248532289625, "loss": 3.1862, "step": 566 }, { "epoch": 1.11, "learning_rate": 0.00018923679060665362, "loss": 3.1678, "step": 567 }, { "epoch": 1.11, "learning_rate": 0.00018904109589041094, "loss": 3.1795, "step": 568 }, { "epoch": 1.11, "learning_rate": 0.00018884540117416828, "loss": 3.1739, "step": 569 }, { "epoch": 1.12, "learning_rate": 0.00018864970645792562, "loss": 3.1721, "step": 570 }, { "epoch": 1.12, "learning_rate": 0.00018845401174168297, "loss": 3.1792, "step": 571 }, { "epoch": 1.12, "learning_rate": 0.00018825831702544028, "loss": 3.1794, "step": 572 }, { "epoch": 1.12, "learning_rate": 0.00018806262230919765, "loss": 3.1725, "step": 573 }, { "epoch": 1.12, "learning_rate": 0.00018786692759295497, "loss": 3.183, "step": 574 }, { "epoch": 1.13, "learning_rate": 0.0001876712328767123, "loss": 3.1853, "step": 575 }, { "epoch": 1.13, "learning_rate": 0.00018747553816046963, "loss": 3.1672, "step": 576 }, { "epoch": 1.13, "learning_rate": 0.000187279843444227, "loss": 3.159, "step": 577 }, { "epoch": 1.13, "learning_rate": 0.00018708414872798431, "loss": 3.1886, "step": 578 }, { "epoch": 1.13, "learning_rate": 0.00018688845401174168, "loss": 3.1689, "step": 579 }, { "epoch": 1.13, "learning_rate": 0.000186692759295499, "loss": 3.1761, "step": 580 }, { "epoch": 1.14, "learning_rate": 0.00018649706457925634, "loss": 3.1779, "step": 581 }, { "epoch": 1.14, "learning_rate": 0.00018630136986301366, "loss": 3.1857, "step": 582 }, { "epoch": 1.14, "learning_rate": 0.00018610567514677103, "loss": 3.1752, "step": 583 }, { "epoch": 1.14, "learning_rate": 0.00018590998043052835, "loss": 3.1783, "step": 584 }, { "epoch": 1.14, "learning_rate": 0.00018571428571428572, "loss": 3.1647, "step": 585 }, { "epoch": 1.15, "learning_rate": 0.00018551859099804303, "loss": 3.1541, "step": 586 }, { "epoch": 1.15, "learning_rate": 0.00018532289628180038, "loss": 3.1607, "step": 587 }, { "epoch": 1.15, "learning_rate": 0.0001851272015655577, "loss": 3.1729, "step": 588 }, { "epoch": 1.15, "learning_rate": 0.00018493150684931506, "loss": 3.172, "step": 589 }, { "epoch": 1.15, "learning_rate": 0.00018473581213307238, "loss": 3.1645, "step": 590 }, { "epoch": 1.16, "learning_rate": 0.00018454011741682972, "loss": 3.1598, "step": 591 }, { "epoch": 1.16, "learning_rate": 0.00018434442270058707, "loss": 3.1866, "step": 592 }, { "epoch": 1.16, "learning_rate": 0.0001841487279843444, "loss": 3.179, "step": 593 }, { "epoch": 1.16, "learning_rate": 0.00018395303326810173, "loss": 3.1719, "step": 594 }, { "epoch": 1.16, "learning_rate": 0.0001837573385518591, "loss": 3.1766, "step": 595 }, { "epoch": 1.17, "learning_rate": 0.0001835616438356164, "loss": 3.1781, "step": 596 }, { "epoch": 1.17, "learning_rate": 0.00018336594911937376, "loss": 3.1735, "step": 597 }, { "epoch": 1.17, "learning_rate": 0.0001831702544031311, "loss": 3.1646, "step": 598 }, { "epoch": 1.17, "learning_rate": 0.00018297455968688844, "loss": 3.1759, "step": 599 }, { "epoch": 1.17, "learning_rate": 0.00018277886497064576, "loss": 3.1729, "step": 600 }, { "epoch": 1.18, "learning_rate": 0.00018258317025440313, "loss": 3.1831, "step": 601 }, { "epoch": 1.18, "learning_rate": 0.00018238747553816044, "loss": 3.1864, "step": 602 }, { "epoch": 1.18, "learning_rate": 0.0001821917808219178, "loss": 3.1831, "step": 603 }, { "epoch": 1.18, "learning_rate": 0.00018199608610567513, "loss": 3.1806, "step": 604 }, { "epoch": 1.18, "learning_rate": 0.00018180039138943247, "loss": 3.1729, "step": 605 }, { "epoch": 1.19, "learning_rate": 0.0001816046966731898, "loss": 3.1798, "step": 606 }, { "epoch": 1.19, "learning_rate": 0.00018140900195694716, "loss": 3.1612, "step": 607 }, { "epoch": 1.19, "learning_rate": 0.00018121330724070448, "loss": 3.1533, "step": 608 }, { "epoch": 1.19, "learning_rate": 0.00018101761252446182, "loss": 3.1838, "step": 609 }, { "epoch": 1.19, "learning_rate": 0.00018082191780821916, "loss": 3.1636, "step": 610 }, { "epoch": 1.2, "learning_rate": 0.0001806262230919765, "loss": 3.1589, "step": 611 }, { "epoch": 1.2, "learning_rate": 0.00018043052837573382, "loss": 3.1738, "step": 612 }, { "epoch": 1.2, "learning_rate": 0.0001802348336594912, "loss": 3.2014, "step": 613 }, { "epoch": 1.2, "learning_rate": 0.0001800391389432485, "loss": 3.1662, "step": 614 }, { "epoch": 1.2, "learning_rate": 0.00017984344422700585, "loss": 3.1626, "step": 615 }, { "epoch": 1.21, "learning_rate": 0.0001796477495107632, "loss": 3.1654, "step": 616 }, { "epoch": 1.21, "learning_rate": 0.00017945205479452054, "loss": 3.1632, "step": 617 }, { "epoch": 1.21, "learning_rate": 0.00017925636007827786, "loss": 3.1679, "step": 618 }, { "epoch": 1.21, "learning_rate": 0.00017906066536203523, "loss": 3.1821, "step": 619 }, { "epoch": 1.21, "learning_rate": 0.00017886497064579254, "loss": 3.1765, "step": 620 }, { "epoch": 1.22, "learning_rate": 0.00017866927592954989, "loss": 3.185, "step": 621 }, { "epoch": 1.22, "learning_rate": 0.0001784735812133072, "loss": 3.1564, "step": 622 }, { "epoch": 1.22, "learning_rate": 0.00017827788649706457, "loss": 3.1747, "step": 623 }, { "epoch": 1.22, "learning_rate": 0.0001780821917808219, "loss": 3.1589, "step": 624 }, { "epoch": 1.22, "learning_rate": 0.00017788649706457926, "loss": 3.1813, "step": 625 }, { "epoch": 1.22, "learning_rate": 0.00017769080234833658, "loss": 3.1688, "step": 626 }, { "epoch": 1.23, "learning_rate": 0.00017749510763209392, "loss": 3.1721, "step": 627 }, { "epoch": 1.23, "learning_rate": 0.00017729941291585123, "loss": 3.1574, "step": 628 }, { "epoch": 1.23, "learning_rate": 0.0001771037181996086, "loss": 3.1615, "step": 629 }, { "epoch": 1.23, "learning_rate": 0.00017690802348336592, "loss": 3.1536, "step": 630 }, { "epoch": 1.23, "learning_rate": 0.0001767123287671233, "loss": 3.1602, "step": 631 }, { "epoch": 1.24, "learning_rate": 0.0001765166340508806, "loss": 3.18, "step": 632 }, { "epoch": 1.24, "learning_rate": 0.00017632093933463795, "loss": 3.1716, "step": 633 }, { "epoch": 1.24, "learning_rate": 0.00017612524461839527, "loss": 3.1635, "step": 634 }, { "epoch": 1.24, "learning_rate": 0.00017592954990215264, "loss": 3.1576, "step": 635 }, { "epoch": 1.24, "learning_rate": 0.00017573385518590995, "loss": 3.1521, "step": 636 }, { "epoch": 1.25, "learning_rate": 0.0001755381604696673, "loss": 3.1763, "step": 637 }, { "epoch": 1.25, "learning_rate": 0.00017534246575342464, "loss": 3.1736, "step": 638 }, { "epoch": 1.25, "learning_rate": 0.00017514677103718198, "loss": 3.1842, "step": 639 }, { "epoch": 1.25, "learning_rate": 0.0001749510763209393, "loss": 3.155, "step": 640 }, { "epoch": 1.25, "learning_rate": 0.00017475538160469667, "loss": 3.1671, "step": 641 }, { "epoch": 1.26, "learning_rate": 0.000174559686888454, "loss": 3.1791, "step": 642 }, { "epoch": 1.26, "learning_rate": 0.00017436399217221133, "loss": 3.1427, "step": 643 }, { "epoch": 1.26, "learning_rate": 0.00017416829745596867, "loss": 3.1859, "step": 644 }, { "epoch": 1.26, "learning_rate": 0.00017397260273972602, "loss": 3.183, "step": 645 }, { "epoch": 1.26, "learning_rate": 0.00017377690802348333, "loss": 3.1634, "step": 646 }, { "epoch": 1.27, "learning_rate": 0.0001735812133072407, "loss": 3.1799, "step": 647 }, { "epoch": 1.27, "learning_rate": 0.00017338551859099802, "loss": 3.1703, "step": 648 }, { "epoch": 1.27, "learning_rate": 0.00017318982387475536, "loss": 3.1582, "step": 649 }, { "epoch": 1.27, "learning_rate": 0.0001729941291585127, "loss": 3.1662, "step": 650 }, { "epoch": 1.27, "learning_rate": 0.00017279843444227005, "loss": 3.1642, "step": 651 }, { "epoch": 1.28, "learning_rate": 0.00017260273972602737, "loss": 3.173, "step": 652 }, { "epoch": 1.28, "learning_rate": 0.00017240704500978474, "loss": 3.168, "step": 653 }, { "epoch": 1.28, "learning_rate": 0.00017221135029354205, "loss": 3.1578, "step": 654 }, { "epoch": 1.28, "learning_rate": 0.0001720156555772994, "loss": 3.1685, "step": 655 }, { "epoch": 1.28, "learning_rate": 0.00017181996086105674, "loss": 3.1567, "step": 656 }, { "epoch": 1.29, "learning_rate": 0.00017162426614481408, "loss": 3.1639, "step": 657 }, { "epoch": 1.29, "learning_rate": 0.0001714285714285714, "loss": 3.1586, "step": 658 }, { "epoch": 1.29, "learning_rate": 0.00017123287671232877, "loss": 3.1552, "step": 659 }, { "epoch": 1.29, "learning_rate": 0.00017103718199608608, "loss": 3.153, "step": 660 }, { "epoch": 1.29, "learning_rate": 0.00017084148727984343, "loss": 3.1729, "step": 661 }, { "epoch": 1.3, "learning_rate": 0.00017064579256360077, "loss": 3.1465, "step": 662 }, { "epoch": 1.3, "learning_rate": 0.00017045009784735811, "loss": 3.168, "step": 663 }, { "epoch": 1.3, "learning_rate": 0.00017025440313111543, "loss": 3.1802, "step": 664 }, { "epoch": 1.3, "learning_rate": 0.0001700587084148728, "loss": 3.1603, "step": 665 }, { "epoch": 1.3, "learning_rate": 0.00016986301369863012, "loss": 3.1563, "step": 666 }, { "epoch": 1.3, "learning_rate": 0.00016966731898238746, "loss": 3.1637, "step": 667 }, { "epoch": 1.31, "learning_rate": 0.00016947162426614478, "loss": 3.1618, "step": 668 }, { "epoch": 1.31, "learning_rate": 0.00016927592954990215, "loss": 3.1485, "step": 669 }, { "epoch": 1.31, "learning_rate": 0.00016908023483365946, "loss": 3.1536, "step": 670 }, { "epoch": 1.31, "learning_rate": 0.00016888454011741683, "loss": 3.1656, "step": 671 }, { "epoch": 1.31, "learning_rate": 0.00016868884540117415, "loss": 3.1689, "step": 672 }, { "epoch": 1.32, "learning_rate": 0.0001684931506849315, "loss": 3.1441, "step": 673 }, { "epoch": 1.32, "learning_rate": 0.0001682974559686888, "loss": 3.1692, "step": 674 }, { "epoch": 1.32, "learning_rate": 0.00016810176125244618, "loss": 3.1777, "step": 675 }, { "epoch": 1.32, "learning_rate": 0.0001679060665362035, "loss": 3.1608, "step": 676 }, { "epoch": 1.32, "learning_rate": 0.00016771037181996087, "loss": 3.1557, "step": 677 }, { "epoch": 1.33, "learning_rate": 0.00016751467710371818, "loss": 3.1526, "step": 678 }, { "epoch": 1.33, "learning_rate": 0.00016731898238747553, "loss": 3.1393, "step": 679 }, { "epoch": 1.33, "learning_rate": 0.00016712328767123284, "loss": 3.165, "step": 680 }, { "epoch": 1.33, "learning_rate": 0.0001669275929549902, "loss": 3.1819, "step": 681 }, { "epoch": 1.33, "learning_rate": 0.00016673189823874753, "loss": 3.1709, "step": 682 }, { "epoch": 1.34, "learning_rate": 0.00016653620352250487, "loss": 3.166, "step": 683 }, { "epoch": 1.34, "learning_rate": 0.00016634050880626222, "loss": 3.1599, "step": 684 }, { "epoch": 1.34, "learning_rate": 0.00016614481409001956, "loss": 3.1628, "step": 685 }, { "epoch": 1.34, "learning_rate": 0.00016594911937377687, "loss": 3.1642, "step": 686 }, { "epoch": 1.34, "learning_rate": 0.00016575342465753425, "loss": 3.1734, "step": 687 }, { "epoch": 1.35, "learning_rate": 0.00016555772994129156, "loss": 3.1581, "step": 688 }, { "epoch": 1.35, "learning_rate": 0.0001653620352250489, "loss": 3.1575, "step": 689 }, { "epoch": 1.35, "learning_rate": 0.00016516634050880625, "loss": 3.1557, "step": 690 }, { "epoch": 1.35, "learning_rate": 0.0001649706457925636, "loss": 3.15, "step": 691 }, { "epoch": 1.35, "learning_rate": 0.0001647749510763209, "loss": 3.1475, "step": 692 }, { "epoch": 1.36, "learning_rate": 0.00016457925636007828, "loss": 3.1371, "step": 693 }, { "epoch": 1.36, "learning_rate": 0.0001643835616438356, "loss": 3.1573, "step": 694 }, { "epoch": 1.36, "learning_rate": 0.00016418786692759294, "loss": 3.1629, "step": 695 }, { "epoch": 1.36, "learning_rate": 0.00016399217221135028, "loss": 3.1715, "step": 696 }, { "epoch": 1.36, "learning_rate": 0.00016379647749510762, "loss": 3.1557, "step": 697 }, { "epoch": 1.37, "learning_rate": 0.00016360078277886494, "loss": 3.1765, "step": 698 }, { "epoch": 1.37, "learning_rate": 0.0001634050880626223, "loss": 3.1585, "step": 699 }, { "epoch": 1.37, "learning_rate": 0.00016320939334637963, "loss": 3.186, "step": 700 }, { "epoch": 1.37, "learning_rate": 0.00016301369863013697, "loss": 3.1744, "step": 701 }, { "epoch": 1.37, "learning_rate": 0.0001628180039138943, "loss": 3.1756, "step": 702 }, { "epoch": 1.38, "learning_rate": 0.00016262230919765166, "loss": 3.1623, "step": 703 }, { "epoch": 1.38, "learning_rate": 0.00016242661448140897, "loss": 3.1626, "step": 704 }, { "epoch": 1.38, "learning_rate": 0.00016223091976516634, "loss": 3.1468, "step": 705 }, { "epoch": 1.38, "learning_rate": 0.00016203522504892366, "loss": 3.1574, "step": 706 }, { "epoch": 1.38, "learning_rate": 0.000161839530332681, "loss": 3.1872, "step": 707 }, { "epoch": 1.39, "learning_rate": 0.00016164383561643835, "loss": 3.1618, "step": 708 }, { "epoch": 1.39, "learning_rate": 0.0001614481409001957, "loss": 3.1654, "step": 709 }, { "epoch": 1.39, "learning_rate": 0.000161252446183953, "loss": 3.1765, "step": 710 }, { "epoch": 1.39, "learning_rate": 0.00016105675146771038, "loss": 3.1534, "step": 711 }, { "epoch": 1.39, "learning_rate": 0.0001608610567514677, "loss": 3.1434, "step": 712 }, { "epoch": 1.39, "learning_rate": 0.00016066536203522504, "loss": 3.1704, "step": 713 }, { "epoch": 1.4, "learning_rate": 0.00016046966731898235, "loss": 3.1692, "step": 714 }, { "epoch": 1.4, "learning_rate": 0.00016027397260273972, "loss": 3.1699, "step": 715 }, { "epoch": 1.4, "learning_rate": 0.00016007827788649704, "loss": 3.1624, "step": 716 }, { "epoch": 1.4, "learning_rate": 0.0001598825831702544, "loss": 3.1482, "step": 717 }, { "epoch": 1.4, "learning_rate": 0.00015968688845401172, "loss": 3.1488, "step": 718 }, { "epoch": 1.41, "learning_rate": 0.00015949119373776907, "loss": 3.1766, "step": 719 }, { "epoch": 1.41, "learning_rate": 0.00015929549902152638, "loss": 3.1617, "step": 720 }, { "epoch": 1.41, "learning_rate": 0.00015909980430528375, "loss": 3.1672, "step": 721 }, { "epoch": 1.41, "learning_rate": 0.00015890410958904107, "loss": 3.1481, "step": 722 }, { "epoch": 1.41, "learning_rate": 0.00015870841487279844, "loss": 3.1539, "step": 723 }, { "epoch": 1.42, "learning_rate": 0.00015851272015655576, "loss": 3.1513, "step": 724 }, { "epoch": 1.42, "learning_rate": 0.0001583170254403131, "loss": 3.1677, "step": 725 }, { "epoch": 1.42, "learning_rate": 0.00015812133072407042, "loss": 3.1623, "step": 726 }, { "epoch": 1.42, "learning_rate": 0.0001579256360078278, "loss": 3.1733, "step": 727 }, { "epoch": 1.42, "learning_rate": 0.0001577299412915851, "loss": 3.1467, "step": 728 }, { "epoch": 1.43, "learning_rate": 0.00015753424657534245, "loss": 3.1513, "step": 729 }, { "epoch": 1.43, "learning_rate": 0.0001573385518590998, "loss": 3.1497, "step": 730 }, { "epoch": 1.43, "learning_rate": 0.00015714285714285713, "loss": 3.159, "step": 731 }, { "epoch": 1.43, "learning_rate": 0.00015694716242661445, "loss": 3.1563, "step": 732 }, { "epoch": 1.43, "learning_rate": 0.00015675146771037182, "loss": 3.1581, "step": 733 }, { "epoch": 1.44, "learning_rate": 0.00015655577299412914, "loss": 3.133, "step": 734 }, { "epoch": 1.44, "learning_rate": 0.00015636007827788648, "loss": 3.145, "step": 735 }, { "epoch": 1.44, "learning_rate": 0.00015616438356164382, "loss": 3.1565, "step": 736 }, { "epoch": 1.44, "learning_rate": 0.00015596868884540117, "loss": 3.1405, "step": 737 }, { "epoch": 1.44, "learning_rate": 0.00015577299412915848, "loss": 3.1518, "step": 738 }, { "epoch": 1.45, "learning_rate": 0.00015557729941291585, "loss": 3.1659, "step": 739 }, { "epoch": 1.45, "learning_rate": 0.00015538160469667317, "loss": 3.1323, "step": 740 }, { "epoch": 1.45, "learning_rate": 0.0001551859099804305, "loss": 3.1682, "step": 741 }, { "epoch": 1.45, "learning_rate": 0.00015499021526418785, "loss": 3.1342, "step": 742 }, { "epoch": 1.45, "learning_rate": 0.0001547945205479452, "loss": 3.1752, "step": 743 }, { "epoch": 1.46, "learning_rate": 0.00015459882583170251, "loss": 3.156, "step": 744 }, { "epoch": 1.46, "learning_rate": 0.00015440313111545988, "loss": 3.1592, "step": 745 }, { "epoch": 1.46, "learning_rate": 0.0001542074363992172, "loss": 3.1566, "step": 746 }, { "epoch": 1.46, "learning_rate": 0.00015401174168297454, "loss": 3.1442, "step": 747 }, { "epoch": 1.46, "learning_rate": 0.0001538160469667319, "loss": 3.1806, "step": 748 }, { "epoch": 1.47, "learning_rate": 0.00015362035225048923, "loss": 3.1548, "step": 749 }, { "epoch": 1.47, "learning_rate": 0.00015342465753424655, "loss": 3.1635, "step": 750 }, { "epoch": 1.47, "learning_rate": 0.00015322896281800392, "loss": 3.1544, "step": 751 }, { "epoch": 1.47, "learning_rate": 0.00015303326810176123, "loss": 3.1547, "step": 752 }, { "epoch": 1.47, "learning_rate": 0.00015283757338551858, "loss": 3.1348, "step": 753 }, { "epoch": 1.48, "learning_rate": 0.00015264187866927592, "loss": 3.1474, "step": 754 }, { "epoch": 1.48, "learning_rate": 0.00015244618395303326, "loss": 3.16, "step": 755 }, { "epoch": 1.48, "learning_rate": 0.00015225048923679058, "loss": 3.1442, "step": 756 }, { "epoch": 1.48, "learning_rate": 0.00015205479452054795, "loss": 3.1437, "step": 757 }, { "epoch": 1.48, "learning_rate": 0.00015185909980430527, "loss": 3.1491, "step": 758 }, { "epoch": 1.48, "learning_rate": 0.0001516634050880626, "loss": 3.1727, "step": 759 }, { "epoch": 1.49, "learning_rate": 0.00015146771037181993, "loss": 3.1545, "step": 760 }, { "epoch": 1.49, "learning_rate": 0.0001512720156555773, "loss": 3.1386, "step": 761 }, { "epoch": 1.49, "learning_rate": 0.0001510763209393346, "loss": 3.128, "step": 762 }, { "epoch": 1.49, "learning_rate": 0.00015088062622309198, "loss": 3.1668, "step": 763 }, { "epoch": 1.49, "learning_rate": 0.0001506849315068493, "loss": 3.1344, "step": 764 }, { "epoch": 1.5, "learning_rate": 0.00015048923679060664, "loss": 3.1301, "step": 765 }, { "epoch": 1.5, "learning_rate": 0.00015029354207436396, "loss": 3.1607, "step": 766 }, { "epoch": 1.5, "learning_rate": 0.00015009784735812133, "loss": 3.1406, "step": 767 }, { "epoch": 1.5, "learning_rate": 0.00014990215264187864, "loss": 3.141, "step": 768 }, { "epoch": 1.5, "learning_rate": 0.000149706457925636, "loss": 3.1406, "step": 769 }, { "epoch": 1.51, "learning_rate": 0.00014951076320939333, "loss": 3.1468, "step": 770 }, { "epoch": 1.51, "learning_rate": 0.00014931506849315067, "loss": 3.1326, "step": 771 }, { "epoch": 1.51, "learning_rate": 0.000149119373776908, "loss": 3.1391, "step": 772 }, { "epoch": 1.51, "learning_rate": 0.00014892367906066533, "loss": 3.1678, "step": 773 }, { "epoch": 1.51, "learning_rate": 0.00014872798434442268, "loss": 3.1543, "step": 774 }, { "epoch": 1.52, "learning_rate": 0.00014853228962818002, "loss": 3.1469, "step": 775 }, { "epoch": 1.52, "learning_rate": 0.00014833659491193736, "loss": 3.1584, "step": 776 }, { "epoch": 1.52, "learning_rate": 0.0001481409001956947, "loss": 3.1335, "step": 777 }, { "epoch": 1.52, "learning_rate": 0.00014794520547945202, "loss": 3.1468, "step": 778 }, { "epoch": 1.52, "learning_rate": 0.00014774951076320937, "loss": 3.1406, "step": 779 }, { "epoch": 1.53, "learning_rate": 0.0001475538160469667, "loss": 3.1297, "step": 780 }, { "epoch": 1.53, "learning_rate": 0.00014735812133072405, "loss": 3.1374, "step": 781 }, { "epoch": 1.53, "learning_rate": 0.0001471624266144814, "loss": 3.1366, "step": 782 }, { "epoch": 1.53, "learning_rate": 0.0001469667318982387, "loss": 3.1248, "step": 783 }, { "epoch": 1.53, "learning_rate": 0.00014677103718199606, "loss": 3.1382, "step": 784 }, { "epoch": 1.54, "learning_rate": 0.0001465753424657534, "loss": 3.1226, "step": 785 }, { "epoch": 1.54, "learning_rate": 0.00014637964774951074, "loss": 3.1489, "step": 786 }, { "epoch": 1.54, "learning_rate": 0.00014618395303326809, "loss": 3.1356, "step": 787 }, { "epoch": 1.54, "learning_rate": 0.00014598825831702543, "loss": 3.129, "step": 788 }, { "epoch": 1.54, "learning_rate": 0.00014579256360078275, "loss": 3.1357, "step": 789 }, { "epoch": 1.55, "learning_rate": 0.0001455968688845401, "loss": 3.1262, "step": 790 }, { "epoch": 1.55, "learning_rate": 0.00014540117416829743, "loss": 3.1393, "step": 791 }, { "epoch": 1.55, "learning_rate": 0.00014520547945205478, "loss": 3.1368, "step": 792 }, { "epoch": 1.55, "learning_rate": 0.00014500978473581212, "loss": 3.1179, "step": 793 }, { "epoch": 1.55, "learning_rate": 0.00014481409001956946, "loss": 3.1203, "step": 794 }, { "epoch": 1.56, "learning_rate": 0.00014461839530332678, "loss": 3.1377, "step": 795 }, { "epoch": 1.56, "learning_rate": 0.00014442270058708412, "loss": 3.1352, "step": 796 }, { "epoch": 1.56, "learning_rate": 0.00014422700587084146, "loss": 3.1483, "step": 797 }, { "epoch": 1.56, "learning_rate": 0.0001440313111545988, "loss": 3.1367, "step": 798 }, { "epoch": 1.56, "learning_rate": 0.00014383561643835615, "loss": 3.1331, "step": 799 }, { "epoch": 1.57, "learning_rate": 0.0001436399217221135, "loss": 3.1125, "step": 800 }, { "epoch": 1.57, "learning_rate": 0.0001434442270058708, "loss": 3.1554, "step": 801 }, { "epoch": 1.57, "learning_rate": 0.00014324853228962815, "loss": 3.1296, "step": 802 }, { "epoch": 1.57, "learning_rate": 0.0001430528375733855, "loss": 3.1257, "step": 803 }, { "epoch": 1.57, "learning_rate": 0.00014285714285714284, "loss": 3.1232, "step": 804 }, { "epoch": 1.57, "learning_rate": 0.00014266144814090018, "loss": 3.1271, "step": 805 }, { "epoch": 1.58, "learning_rate": 0.00014246575342465753, "loss": 3.1358, "step": 806 }, { "epoch": 1.58, "learning_rate": 0.00014227005870841484, "loss": 3.1306, "step": 807 }, { "epoch": 1.58, "learning_rate": 0.0001420743639921722, "loss": 3.1541, "step": 808 }, { "epoch": 1.58, "learning_rate": 0.00014187866927592953, "loss": 3.1495, "step": 809 }, { "epoch": 1.58, "learning_rate": 0.00014168297455968687, "loss": 3.1358, "step": 810 }, { "epoch": 1.59, "learning_rate": 0.00014148727984344422, "loss": 3.1495, "step": 811 }, { "epoch": 1.59, "learning_rate": 0.00014129158512720153, "loss": 3.1224, "step": 812 }, { "epoch": 1.59, "learning_rate": 0.00014109589041095888, "loss": 3.1549, "step": 813 }, { "epoch": 1.59, "learning_rate": 0.00014090019569471622, "loss": 3.1578, "step": 814 }, { "epoch": 1.59, "learning_rate": 0.00014070450097847356, "loss": 3.1453, "step": 815 }, { "epoch": 1.6, "learning_rate": 0.0001405088062622309, "loss": 3.1538, "step": 816 }, { "epoch": 1.6, "learning_rate": 0.00014031311154598825, "loss": 3.139, "step": 817 }, { "epoch": 1.6, "learning_rate": 0.00014011741682974557, "loss": 3.1142, "step": 818 }, { "epoch": 1.6, "learning_rate": 0.0001399217221135029, "loss": 3.1151, "step": 819 }, { "epoch": 1.6, "learning_rate": 0.00013972602739726025, "loss": 3.1302, "step": 820 }, { "epoch": 1.61, "learning_rate": 0.0001395303326810176, "loss": 3.1555, "step": 821 }, { "epoch": 1.61, "learning_rate": 0.00013933463796477494, "loss": 3.1429, "step": 822 }, { "epoch": 1.61, "learning_rate": 0.00013913894324853228, "loss": 3.1318, "step": 823 }, { "epoch": 1.61, "learning_rate": 0.0001389432485322896, "loss": 3.1529, "step": 824 }, { "epoch": 1.61, "learning_rate": 0.00013874755381604694, "loss": 3.1258, "step": 825 }, { "epoch": 1.62, "learning_rate": 0.00013855185909980428, "loss": 3.1296, "step": 826 }, { "epoch": 1.62, "learning_rate": 0.00013835616438356163, "loss": 3.1461, "step": 827 }, { "epoch": 1.62, "learning_rate": 0.00013816046966731897, "loss": 3.1311, "step": 828 }, { "epoch": 1.62, "learning_rate": 0.00013796477495107631, "loss": 3.1395, "step": 829 }, { "epoch": 1.62, "learning_rate": 0.00013776908023483363, "loss": 3.1253, "step": 830 }, { "epoch": 1.63, "learning_rate": 0.00013757338551859097, "loss": 3.1341, "step": 831 }, { "epoch": 1.63, "learning_rate": 0.00013737769080234832, "loss": 3.1389, "step": 832 }, { "epoch": 1.63, "learning_rate": 0.00013718199608610566, "loss": 3.1301, "step": 833 }, { "epoch": 1.63, "learning_rate": 0.000136986301369863, "loss": 3.125, "step": 834 }, { "epoch": 1.63, "learning_rate": 0.00013679060665362032, "loss": 3.127, "step": 835 }, { "epoch": 1.64, "learning_rate": 0.00013659491193737766, "loss": 3.1212, "step": 836 }, { "epoch": 1.64, "learning_rate": 0.000136399217221135, "loss": 3.1257, "step": 837 }, { "epoch": 1.64, "learning_rate": 0.00013620352250489235, "loss": 3.1402, "step": 838 }, { "epoch": 1.64, "learning_rate": 0.0001360078277886497, "loss": 3.1311, "step": 839 }, { "epoch": 1.64, "learning_rate": 0.00013581213307240704, "loss": 3.1327, "step": 840 }, { "epoch": 1.65, "learning_rate": 0.00013561643835616435, "loss": 3.1322, "step": 841 }, { "epoch": 1.65, "learning_rate": 0.0001354207436399217, "loss": 3.1303, "step": 842 }, { "epoch": 1.65, "learning_rate": 0.00013522504892367904, "loss": 3.1196, "step": 843 }, { "epoch": 1.65, "learning_rate": 0.00013502935420743638, "loss": 3.1346, "step": 844 }, { "epoch": 1.65, "learning_rate": 0.00013483365949119373, "loss": 3.1205, "step": 845 }, { "epoch": 1.65, "learning_rate": 0.00013463796477495107, "loss": 3.1428, "step": 846 }, { "epoch": 1.66, "learning_rate": 0.00013444227005870839, "loss": 3.1473, "step": 847 }, { "epoch": 1.66, "learning_rate": 0.00013424657534246573, "loss": 3.1177, "step": 848 }, { "epoch": 1.66, "learning_rate": 0.00013405088062622307, "loss": 3.128, "step": 849 }, { "epoch": 1.66, "learning_rate": 0.00013385518590998042, "loss": 3.162, "step": 850 }, { "epoch": 1.66, "learning_rate": 0.00013365949119373776, "loss": 3.1314, "step": 851 }, { "epoch": 1.67, "learning_rate": 0.0001334637964774951, "loss": 3.1364, "step": 852 }, { "epoch": 1.67, "learning_rate": 0.00013326810176125242, "loss": 3.1359, "step": 853 }, { "epoch": 1.67, "learning_rate": 0.00013307240704500976, "loss": 3.1367, "step": 854 }, { "epoch": 1.67, "learning_rate": 0.0001328767123287671, "loss": 3.1389, "step": 855 }, { "epoch": 1.67, "learning_rate": 0.00013268101761252445, "loss": 3.1369, "step": 856 }, { "epoch": 1.68, "learning_rate": 0.0001324853228962818, "loss": 3.1663, "step": 857 }, { "epoch": 1.68, "learning_rate": 0.0001322896281800391, "loss": 3.1757, "step": 858 }, { "epoch": 1.68, "learning_rate": 0.00013209393346379645, "loss": 3.1359, "step": 859 }, { "epoch": 1.68, "learning_rate": 0.0001318982387475538, "loss": 3.1262, "step": 860 }, { "epoch": 1.68, "learning_rate": 0.00013170254403131114, "loss": 3.145, "step": 861 }, { "epoch": 1.69, "learning_rate": 0.00013150684931506848, "loss": 3.1185, "step": 862 }, { "epoch": 1.69, "learning_rate": 0.00013131115459882582, "loss": 3.1311, "step": 863 }, { "epoch": 1.69, "learning_rate": 0.00013111545988258314, "loss": 3.1238, "step": 864 }, { "epoch": 1.69, "learning_rate": 0.00013091976516634048, "loss": 3.1349, "step": 865 }, { "epoch": 1.69, "learning_rate": 0.00013072407045009783, "loss": 3.1431, "step": 866 }, { "epoch": 1.7, "learning_rate": 0.00013052837573385517, "loss": 3.1335, "step": 867 }, { "epoch": 1.7, "learning_rate": 0.0001303326810176125, "loss": 3.1396, "step": 868 }, { "epoch": 1.7, "learning_rate": 0.00013013698630136986, "loss": 3.1351, "step": 869 }, { "epoch": 1.7, "learning_rate": 0.00012994129158512717, "loss": 3.1457, "step": 870 }, { "epoch": 1.7, "learning_rate": 0.00012974559686888452, "loss": 3.124, "step": 871 }, { "epoch": 1.71, "learning_rate": 0.00012954990215264186, "loss": 3.127, "step": 872 }, { "epoch": 1.71, "learning_rate": 0.0001293542074363992, "loss": 3.1555, "step": 873 }, { "epoch": 1.71, "learning_rate": 0.00012915851272015655, "loss": 3.1196, "step": 874 }, { "epoch": 1.71, "learning_rate": 0.0001289628180039139, "loss": 3.1233, "step": 875 }, { "epoch": 1.71, "learning_rate": 0.0001287671232876712, "loss": 3.1462, "step": 876 }, { "epoch": 1.72, "learning_rate": 0.00012857142857142855, "loss": 3.1208, "step": 877 }, { "epoch": 1.72, "learning_rate": 0.0001283757338551859, "loss": 3.1295, "step": 878 }, { "epoch": 1.72, "learning_rate": 0.00012818003913894324, "loss": 3.1144, "step": 879 }, { "epoch": 1.72, "learning_rate": 0.00012798434442270058, "loss": 3.1134, "step": 880 }, { "epoch": 1.72, "learning_rate": 0.0001277886497064579, "loss": 3.1273, "step": 881 }, { "epoch": 1.73, "learning_rate": 0.00012759295499021524, "loss": 3.1219, "step": 882 }, { "epoch": 1.73, "learning_rate": 0.00012739726027397258, "loss": 3.124, "step": 883 }, { "epoch": 1.73, "learning_rate": 0.00012720156555772992, "loss": 3.1346, "step": 884 }, { "epoch": 1.73, "learning_rate": 0.00012700587084148727, "loss": 3.1182, "step": 885 }, { "epoch": 1.73, "learning_rate": 0.0001268101761252446, "loss": 3.1364, "step": 886 }, { "epoch": 1.74, "learning_rate": 0.00012661448140900193, "loss": 3.1302, "step": 887 }, { "epoch": 1.74, "learning_rate": 0.00012641878669275927, "loss": 3.1325, "step": 888 }, { "epoch": 1.74, "learning_rate": 0.00012622309197651661, "loss": 3.1439, "step": 889 }, { "epoch": 1.74, "learning_rate": 0.00012602739726027396, "loss": 3.1263, "step": 890 }, { "epoch": 1.74, "learning_rate": 0.0001258317025440313, "loss": 3.1286, "step": 891 }, { "epoch": 1.74, "learning_rate": 0.00012563600782778864, "loss": 3.1035, "step": 892 }, { "epoch": 1.75, "learning_rate": 0.00012544031311154596, "loss": 3.1113, "step": 893 }, { "epoch": 1.75, "learning_rate": 0.0001252446183953033, "loss": 3.1172, "step": 894 }, { "epoch": 1.75, "learning_rate": 0.00012504892367906065, "loss": 3.147, "step": 895 }, { "epoch": 1.75, "learning_rate": 0.000124853228962818, "loss": 3.1284, "step": 896 }, { "epoch": 1.75, "learning_rate": 0.00012465753424657533, "loss": 3.1333, "step": 897 }, { "epoch": 1.76, "learning_rate": 0.00012446183953033268, "loss": 3.1376, "step": 898 }, { "epoch": 1.76, "learning_rate": 0.00012426614481409, "loss": 3.1115, "step": 899 }, { "epoch": 1.76, "learning_rate": 0.00012407045009784734, "loss": 3.1494, "step": 900 }, { "epoch": 1.76, "learning_rate": 0.00012387475538160468, "loss": 3.1442, "step": 901 }, { "epoch": 1.76, "learning_rate": 0.00012367906066536202, "loss": 3.1194, "step": 902 }, { "epoch": 1.77, "learning_rate": 0.00012348336594911937, "loss": 3.1343, "step": 903 }, { "epoch": 1.77, "learning_rate": 0.00012328767123287668, "loss": 3.1355, "step": 904 }, { "epoch": 1.77, "learning_rate": 0.00012309197651663403, "loss": 3.1238, "step": 905 }, { "epoch": 1.77, "learning_rate": 0.00012289628180039137, "loss": 3.111, "step": 906 }, { "epoch": 1.77, "learning_rate": 0.0001227005870841487, "loss": 3.1149, "step": 907 }, { "epoch": 1.78, "learning_rate": 0.00012250489236790606, "loss": 3.1266, "step": 908 }, { "epoch": 1.78, "learning_rate": 0.0001223091976516634, "loss": 3.1254, "step": 909 }, { "epoch": 1.78, "learning_rate": 0.00012211350293542071, "loss": 3.1104, "step": 910 }, { "epoch": 1.78, "learning_rate": 0.00012191780821917807, "loss": 3.1247, "step": 911 }, { "epoch": 1.78, "learning_rate": 0.0001217221135029354, "loss": 3.1289, "step": 912 }, { "epoch": 1.79, "learning_rate": 0.00012152641878669274, "loss": 3.1161, "step": 913 }, { "epoch": 1.79, "learning_rate": 0.00012133072407045007, "loss": 3.1296, "step": 914 }, { "epoch": 1.79, "learning_rate": 0.00012113502935420742, "loss": 3.1252, "step": 915 }, { "epoch": 1.79, "learning_rate": 0.00012093933463796476, "loss": 3.1422, "step": 916 }, { "epoch": 1.79, "learning_rate": 0.00012074363992172209, "loss": 3.1013, "step": 917 }, { "epoch": 1.8, "learning_rate": 0.00012054794520547943, "loss": 3.1349, "step": 918 }, { "epoch": 1.8, "learning_rate": 0.00012035225048923678, "loss": 3.1256, "step": 919 }, { "epoch": 1.8, "learning_rate": 0.0001201565557729941, "loss": 3.1272, "step": 920 }, { "epoch": 1.8, "learning_rate": 0.00011996086105675145, "loss": 3.1389, "step": 921 }, { "epoch": 1.8, "learning_rate": 0.0001197651663405088, "loss": 3.1222, "step": 922 }, { "epoch": 1.81, "learning_rate": 0.00011956947162426612, "loss": 3.122, "step": 923 }, { "epoch": 1.81, "learning_rate": 0.00011937377690802347, "loss": 3.1322, "step": 924 }, { "epoch": 1.81, "learning_rate": 0.00011917808219178081, "loss": 3.1271, "step": 925 }, { "epoch": 1.81, "learning_rate": 0.00011898238747553814, "loss": 3.126, "step": 926 }, { "epoch": 1.81, "learning_rate": 0.00011878669275929548, "loss": 3.1209, "step": 927 }, { "epoch": 1.82, "learning_rate": 0.00011859099804305283, "loss": 3.1359, "step": 928 }, { "epoch": 1.82, "learning_rate": 0.00011839530332681016, "loss": 3.1275, "step": 929 }, { "epoch": 1.82, "learning_rate": 0.0001181996086105675, "loss": 3.1173, "step": 930 }, { "epoch": 1.82, "learning_rate": 0.00011800391389432484, "loss": 3.1109, "step": 931 }, { "epoch": 1.82, "learning_rate": 0.00011780821917808217, "loss": 3.1367, "step": 932 }, { "epoch": 1.83, "learning_rate": 0.00011761252446183952, "loss": 3.1124, "step": 933 }, { "epoch": 1.83, "learning_rate": 0.00011741682974559686, "loss": 3.1189, "step": 934 }, { "epoch": 1.83, "learning_rate": 0.00011722113502935419, "loss": 3.1114, "step": 935 }, { "epoch": 1.83, "learning_rate": 0.00011702544031311153, "loss": 3.1296, "step": 936 }, { "epoch": 1.83, "learning_rate": 0.00011682974559686886, "loss": 3.1141, "step": 937 }, { "epoch": 1.83, "learning_rate": 0.0001166340508806262, "loss": 3.1284, "step": 938 }, { "epoch": 1.84, "learning_rate": 0.00011643835616438355, "loss": 3.1284, "step": 939 }, { "epoch": 1.84, "learning_rate": 0.00011624266144814088, "loss": 3.1153, "step": 940 }, { "epoch": 1.84, "learning_rate": 0.00011604696673189822, "loss": 3.1182, "step": 941 }, { "epoch": 1.84, "learning_rate": 0.00011585127201565556, "loss": 3.1171, "step": 942 }, { "epoch": 1.84, "learning_rate": 0.0001156555772994129, "loss": 3.1289, "step": 943 }, { "epoch": 1.85, "learning_rate": 0.00011545988258317024, "loss": 3.1143, "step": 944 }, { "epoch": 1.85, "learning_rate": 0.00011526418786692758, "loss": 3.1236, "step": 945 }, { "epoch": 1.85, "learning_rate": 0.00011506849315068491, "loss": 3.1242, "step": 946 }, { "epoch": 1.85, "learning_rate": 0.00011487279843444225, "loss": 3.1404, "step": 947 }, { "epoch": 1.85, "learning_rate": 0.0001146771037181996, "loss": 3.1173, "step": 948 }, { "epoch": 1.86, "learning_rate": 0.00011448140900195693, "loss": 3.1406, "step": 949 }, { "epoch": 1.86, "learning_rate": 0.00011428571428571427, "loss": 3.1177, "step": 950 }, { "epoch": 1.86, "learning_rate": 0.00011409001956947161, "loss": 3.1103, "step": 951 }, { "epoch": 1.86, "learning_rate": 0.00011389432485322894, "loss": 3.128, "step": 952 }, { "epoch": 1.86, "learning_rate": 0.00011369863013698629, "loss": 3.1282, "step": 953 }, { "epoch": 1.87, "learning_rate": 0.00011350293542074363, "loss": 3.1315, "step": 954 }, { "epoch": 1.87, "learning_rate": 0.00011330724070450096, "loss": 3.1205, "step": 955 }, { "epoch": 1.87, "learning_rate": 0.0001131115459882583, "loss": 3.1464, "step": 956 }, { "epoch": 1.87, "learning_rate": 0.00011291585127201565, "loss": 3.135, "step": 957 }, { "epoch": 1.87, "learning_rate": 0.00011272015655577298, "loss": 3.1292, "step": 958 }, { "epoch": 1.88, "learning_rate": 0.00011252446183953032, "loss": 3.1267, "step": 959 }, { "epoch": 1.88, "learning_rate": 0.00011232876712328765, "loss": 3.1131, "step": 960 }, { "epoch": 1.88, "learning_rate": 0.00011213307240704499, "loss": 3.128, "step": 961 }, { "epoch": 1.88, "learning_rate": 0.00011193737769080234, "loss": 3.1223, "step": 962 }, { "epoch": 1.88, "learning_rate": 0.00011174168297455966, "loss": 3.1283, "step": 963 }, { "epoch": 1.89, "learning_rate": 0.00011154598825831701, "loss": 3.1163, "step": 964 }, { "epoch": 1.89, "learning_rate": 0.00011135029354207435, "loss": 3.1357, "step": 965 }, { "epoch": 1.89, "learning_rate": 0.00011115459882583168, "loss": 3.1144, "step": 966 }, { "epoch": 1.89, "learning_rate": 0.00011095890410958902, "loss": 3.1232, "step": 967 }, { "epoch": 1.89, "learning_rate": 0.00011076320939334637, "loss": 3.1239, "step": 968 }, { "epoch": 1.9, "learning_rate": 0.0001105675146771037, "loss": 3.1255, "step": 969 }, { "epoch": 1.9, "learning_rate": 0.00011037181996086104, "loss": 3.0973, "step": 970 }, { "epoch": 1.9, "learning_rate": 0.00011017612524461838, "loss": 3.1072, "step": 971 }, { "epoch": 1.9, "learning_rate": 0.00010998043052837571, "loss": 3.1245, "step": 972 }, { "epoch": 1.9, "learning_rate": 0.00010978473581213306, "loss": 3.1314, "step": 973 }, { "epoch": 1.91, "learning_rate": 0.0001095890410958904, "loss": 3.1118, "step": 974 }, { "epoch": 1.91, "learning_rate": 0.00010939334637964773, "loss": 3.1211, "step": 975 }, { "epoch": 1.91, "learning_rate": 0.00010919765166340507, "loss": 3.1143, "step": 976 }, { "epoch": 1.91, "learning_rate": 0.00010900195694716242, "loss": 3.1095, "step": 977 }, { "epoch": 1.91, "learning_rate": 0.00010880626223091975, "loss": 3.1202, "step": 978 }, { "epoch": 1.91, "learning_rate": 0.00010861056751467709, "loss": 3.1172, "step": 979 }, { "epoch": 1.92, "learning_rate": 0.00010841487279843443, "loss": 3.1, "step": 980 }, { "epoch": 1.92, "learning_rate": 0.00010821917808219176, "loss": 3.1008, "step": 981 }, { "epoch": 1.92, "learning_rate": 0.0001080234833659491, "loss": 3.1302, "step": 982 }, { "epoch": 1.92, "learning_rate": 0.00010782778864970644, "loss": 3.1089, "step": 983 }, { "epoch": 1.92, "learning_rate": 0.00010763209393346378, "loss": 3.0935, "step": 984 }, { "epoch": 1.93, "learning_rate": 0.00010743639921722112, "loss": 3.1132, "step": 985 }, { "epoch": 1.93, "learning_rate": 0.00010724070450097845, "loss": 3.1217, "step": 986 }, { "epoch": 1.93, "learning_rate": 0.0001070450097847358, "loss": 3.1082, "step": 987 }, { "epoch": 1.93, "learning_rate": 0.00010684931506849314, "loss": 3.1156, "step": 988 }, { "epoch": 1.93, "learning_rate": 0.00010665362035225047, "loss": 3.1278, "step": 989 }, { "epoch": 1.94, "learning_rate": 0.00010645792563600781, "loss": 3.1095, "step": 990 }, { "epoch": 1.94, "learning_rate": 0.00010626223091976516, "loss": 3.1043, "step": 991 }, { "epoch": 1.94, "learning_rate": 0.00010606653620352248, "loss": 3.1249, "step": 992 }, { "epoch": 1.94, "learning_rate": 0.00010587084148727983, "loss": 3.1214, "step": 993 }, { "epoch": 1.94, "learning_rate": 0.00010567514677103717, "loss": 3.1249, "step": 994 }, { "epoch": 1.95, "learning_rate": 0.0001054794520547945, "loss": 3.1, "step": 995 }, { "epoch": 1.95, "learning_rate": 0.00010528375733855184, "loss": 3.1339, "step": 996 }, { "epoch": 1.95, "learning_rate": 0.00010508806262230919, "loss": 3.1316, "step": 997 }, { "epoch": 1.95, "learning_rate": 0.00010489236790606652, "loss": 3.1167, "step": 998 }, { "epoch": 1.95, "learning_rate": 0.00010469667318982386, "loss": 3.1114, "step": 999 }, { "epoch": 1.96, "learning_rate": 0.0001045009784735812, "loss": 3.1149, "step": 1000 }, { "epoch": 1.96, "learning_rate": 0.00010430528375733853, "loss": 3.124, "step": 1001 }, { "epoch": 1.96, "learning_rate": 0.00010410958904109588, "loss": 3.1216, "step": 1002 }, { "epoch": 1.96, "learning_rate": 0.00010391389432485322, "loss": 3.1094, "step": 1003 }, { "epoch": 1.96, "learning_rate": 0.00010371819960861055, "loss": 3.1256, "step": 1004 }, { "epoch": 1.97, "learning_rate": 0.0001035225048923679, "loss": 3.1146, "step": 1005 }, { "epoch": 1.97, "learning_rate": 0.00010332681017612522, "loss": 3.1392, "step": 1006 }, { "epoch": 1.97, "learning_rate": 0.00010313111545988257, "loss": 3.106, "step": 1007 }, { "epoch": 1.97, "learning_rate": 0.00010293542074363991, "loss": 3.1158, "step": 1008 }, { "epoch": 1.97, "learning_rate": 0.00010273972602739724, "loss": 3.1201, "step": 1009 }, { "epoch": 1.98, "learning_rate": 0.00010254403131115458, "loss": 3.131, "step": 1010 }, { "epoch": 1.98, "learning_rate": 0.00010234833659491193, "loss": 3.1173, "step": 1011 }, { "epoch": 1.98, "learning_rate": 0.00010215264187866926, "loss": 3.1042, "step": 1012 }, { "epoch": 1.98, "learning_rate": 0.0001019569471624266, "loss": 3.1043, "step": 1013 }, { "epoch": 1.98, "learning_rate": 0.00010176125244618394, "loss": 3.1047, "step": 1014 }, { "epoch": 1.99, "learning_rate": 0.00010156555772994127, "loss": 3.1142, "step": 1015 }, { "epoch": 1.99, "learning_rate": 0.00010136986301369862, "loss": 3.1218, "step": 1016 }, { "epoch": 1.99, "learning_rate": 0.00010117416829745596, "loss": 3.1069, "step": 1017 }, { "epoch": 1.99, "learning_rate": 0.00010097847358121329, "loss": 3.1048, "step": 1018 }, { "epoch": 1.99, "learning_rate": 0.00010078277886497063, "loss": 3.1196, "step": 1019 }, { "epoch": 2.0, "learning_rate": 0.00010058708414872797, "loss": 3.1062, "step": 1020 }, { "epoch": 2.0, "learning_rate": 0.0001003913894324853, "loss": 3.109, "step": 1021 }, { "epoch": 2.0, "learning_rate": 0.00010019569471624265, "loss": 3.1097, "step": 1022 }, { "epoch": 2.0, "eval_loss": 11.071310043334961, "eval_runtime": 512.791, "eval_samples_per_second": 30.872, "eval_steps_per_second": 0.07, "eval_wer": 0.9999636220770339, "step": 1022 }, { "epoch": 2.0, "learning_rate": 9.999999999999999e-05, "loss": 4.6471, "step": 1023 }, { "epoch": 2.0, "learning_rate": 9.980430528375732e-05, "loss": 3.1252, "step": 1024 }, { "epoch": 2.01, "learning_rate": 9.960861056751466e-05, "loss": 3.0992, "step": 1025 }, { "epoch": 2.01, "learning_rate": 9.941291585127201e-05, "loss": 3.1093, "step": 1026 }, { "epoch": 2.01, "learning_rate": 9.921722113502934e-05, "loss": 3.1178, "step": 1027 }, { "epoch": 2.01, "learning_rate": 9.902152641878668e-05, "loss": 3.1203, "step": 1028 }, { "epoch": 2.01, "learning_rate": 9.882583170254402e-05, "loss": 3.1096, "step": 1029 }, { "epoch": 2.02, "learning_rate": 9.863013698630135e-05, "loss": 3.1118, "step": 1030 }, { "epoch": 2.02, "learning_rate": 9.84344422700587e-05, "loss": 3.0968, "step": 1031 }, { "epoch": 2.02, "learning_rate": 9.823874755381603e-05, "loss": 3.116, "step": 1032 }, { "epoch": 2.02, "learning_rate": 9.804305283757337e-05, "loss": 3.0984, "step": 1033 }, { "epoch": 2.02, "learning_rate": 9.784735812133071e-05, "loss": 3.1049, "step": 1034 }, { "epoch": 2.03, "learning_rate": 9.765166340508804e-05, "loss": 3.0993, "step": 1035 }, { "epoch": 2.03, "learning_rate": 9.745596868884539e-05, "loss": 3.1017, "step": 1036 }, { "epoch": 2.03, "learning_rate": 9.726027397260273e-05, "loss": 3.1103, "step": 1037 }, { "epoch": 2.03, "learning_rate": 9.706457925636006e-05, "loss": 3.1097, "step": 1038 }, { "epoch": 2.03, "learning_rate": 9.68688845401174e-05, "loss": 3.1288, "step": 1039 }, { "epoch": 2.04, "learning_rate": 9.667318982387475e-05, "loss": 3.118, "step": 1040 }, { "epoch": 2.04, "learning_rate": 9.647749510763208e-05, "loss": 3.1166, "step": 1041 }, { "epoch": 2.04, "learning_rate": 9.628180039138942e-05, "loss": 3.106, "step": 1042 }, { "epoch": 2.04, "learning_rate": 9.608610567514676e-05, "loss": 3.0979, "step": 1043 }, { "epoch": 2.04, "learning_rate": 9.589041095890409e-05, "loss": 3.1159, "step": 1044 }, { "epoch": 2.04, "learning_rate": 9.569471624266144e-05, "loss": 3.1071, "step": 1045 }, { "epoch": 2.05, "learning_rate": 9.549902152641878e-05, "loss": 3.0906, "step": 1046 }, { "epoch": 2.05, "learning_rate": 9.530332681017611e-05, "loss": 3.1089, "step": 1047 }, { "epoch": 2.05, "learning_rate": 9.510763209393345e-05, "loss": 3.1008, "step": 1048 }, { "epoch": 2.05, "learning_rate": 9.49119373776908e-05, "loss": 3.1032, "step": 1049 }, { "epoch": 2.05, "learning_rate": 9.471624266144812e-05, "loss": 3.0994, "step": 1050 }, { "epoch": 2.06, "learning_rate": 9.452054794520547e-05, "loss": 3.093, "step": 1051 }, { "epoch": 2.06, "learning_rate": 9.432485322896281e-05, "loss": 3.1114, "step": 1052 }, { "epoch": 2.06, "learning_rate": 9.412915851272014e-05, "loss": 3.0827, "step": 1053 }, { "epoch": 2.06, "learning_rate": 9.393346379647748e-05, "loss": 3.1041, "step": 1054 }, { "epoch": 2.06, "learning_rate": 9.373776908023481e-05, "loss": 3.1068, "step": 1055 }, { "epoch": 2.07, "learning_rate": 9.354207436399216e-05, "loss": 3.0941, "step": 1056 }, { "epoch": 2.07, "learning_rate": 9.33463796477495e-05, "loss": 3.1066, "step": 1057 }, { "epoch": 2.07, "learning_rate": 9.315068493150683e-05, "loss": 3.1036, "step": 1058 }, { "epoch": 2.07, "learning_rate": 9.295499021526417e-05, "loss": 3.1035, "step": 1059 }, { "epoch": 2.07, "learning_rate": 9.275929549902152e-05, "loss": 3.0722, "step": 1060 }, { "epoch": 2.08, "learning_rate": 9.256360078277885e-05, "loss": 3.1077, "step": 1061 }, { "epoch": 2.08, "learning_rate": 9.236790606653619e-05, "loss": 3.0914, "step": 1062 }, { "epoch": 2.08, "learning_rate": 9.217221135029353e-05, "loss": 3.0951, "step": 1063 }, { "epoch": 2.08, "learning_rate": 9.197651663405086e-05, "loss": 3.104, "step": 1064 }, { "epoch": 2.08, "learning_rate": 9.17808219178082e-05, "loss": 3.1262, "step": 1065 }, { "epoch": 2.09, "learning_rate": 9.158512720156555e-05, "loss": 3.0933, "step": 1066 }, { "epoch": 2.09, "learning_rate": 9.138943248532288e-05, "loss": 3.1002, "step": 1067 }, { "epoch": 2.09, "learning_rate": 9.119373776908022e-05, "loss": 3.1073, "step": 1068 }, { "epoch": 2.09, "learning_rate": 9.099804305283757e-05, "loss": 3.0895, "step": 1069 }, { "epoch": 2.09, "learning_rate": 9.08023483365949e-05, "loss": 3.0823, "step": 1070 }, { "epoch": 2.1, "learning_rate": 9.060665362035224e-05, "loss": 3.0955, "step": 1071 }, { "epoch": 2.1, "learning_rate": 9.041095890410958e-05, "loss": 3.1231, "step": 1072 }, { "epoch": 2.1, "learning_rate": 9.021526418786691e-05, "loss": 3.1017, "step": 1073 }, { "epoch": 2.1, "learning_rate": 9.001956947162426e-05, "loss": 3.103, "step": 1074 }, { "epoch": 2.1, "learning_rate": 8.98238747553816e-05, "loss": 3.0829, "step": 1075 }, { "epoch": 2.11, "learning_rate": 8.962818003913893e-05, "loss": 3.0942, "step": 1076 }, { "epoch": 2.11, "learning_rate": 8.943248532289627e-05, "loss": 3.0983, "step": 1077 }, { "epoch": 2.11, "learning_rate": 8.92367906066536e-05, "loss": 3.0937, "step": 1078 }, { "epoch": 2.11, "learning_rate": 8.904109589041094e-05, "loss": 3.0885, "step": 1079 }, { "epoch": 2.11, "learning_rate": 8.884540117416829e-05, "loss": 3.0858, "step": 1080 }, { "epoch": 2.12, "learning_rate": 8.864970645792562e-05, "loss": 3.121, "step": 1081 }, { "epoch": 2.12, "learning_rate": 8.845401174168296e-05, "loss": 3.1038, "step": 1082 }, { "epoch": 2.12, "learning_rate": 8.82583170254403e-05, "loss": 3.0919, "step": 1083 }, { "epoch": 2.12, "learning_rate": 8.806262230919763e-05, "loss": 3.1125, "step": 1084 }, { "epoch": 2.12, "learning_rate": 8.786692759295498e-05, "loss": 3.1011, "step": 1085 }, { "epoch": 2.13, "learning_rate": 8.767123287671232e-05, "loss": 3.1193, "step": 1086 }, { "epoch": 2.13, "learning_rate": 8.747553816046965e-05, "loss": 3.0789, "step": 1087 }, { "epoch": 2.13, "learning_rate": 8.7279843444227e-05, "loss": 3.1006, "step": 1088 }, { "epoch": 2.13, "learning_rate": 8.708414872798434e-05, "loss": 3.1077, "step": 1089 }, { "epoch": 2.13, "learning_rate": 8.688845401174167e-05, "loss": 3.0825, "step": 1090 }, { "epoch": 2.13, "learning_rate": 8.669275929549901e-05, "loss": 3.0824, "step": 1091 }, { "epoch": 2.14, "learning_rate": 8.649706457925635e-05, "loss": 3.1055, "step": 1092 }, { "epoch": 2.14, "learning_rate": 8.630136986301368e-05, "loss": 3.0984, "step": 1093 }, { "epoch": 2.14, "learning_rate": 8.610567514677103e-05, "loss": 3.0951, "step": 1094 }, { "epoch": 2.14, "learning_rate": 8.590998043052837e-05, "loss": 3.1181, "step": 1095 }, { "epoch": 2.14, "learning_rate": 8.57142857142857e-05, "loss": 3.1022, "step": 1096 }, { "epoch": 2.15, "learning_rate": 8.551859099804304e-05, "loss": 3.0843, "step": 1097 }, { "epoch": 2.15, "learning_rate": 8.532289628180039e-05, "loss": 3.0949, "step": 1098 }, { "epoch": 2.15, "learning_rate": 8.512720156555772e-05, "loss": 3.1284, "step": 1099 }, { "epoch": 2.15, "learning_rate": 8.493150684931506e-05, "loss": 3.0959, "step": 1100 }, { "epoch": 2.15, "learning_rate": 8.473581213307239e-05, "loss": 3.0898, "step": 1101 }, { "epoch": 2.16, "learning_rate": 8.454011741682973e-05, "loss": 3.0951, "step": 1102 }, { "epoch": 2.16, "learning_rate": 8.434442270058707e-05, "loss": 3.1127, "step": 1103 }, { "epoch": 2.16, "learning_rate": 8.41487279843444e-05, "loss": 3.0788, "step": 1104 }, { "epoch": 2.16, "learning_rate": 8.395303326810175e-05, "loss": 3.0924, "step": 1105 }, { "epoch": 2.16, "learning_rate": 8.375733855185909e-05, "loss": 3.0868, "step": 1106 }, { "epoch": 2.17, "learning_rate": 8.356164383561642e-05, "loss": 3.0955, "step": 1107 }, { "epoch": 2.17, "learning_rate": 8.336594911937376e-05, "loss": 3.0779, "step": 1108 }, { "epoch": 2.17, "learning_rate": 8.317025440313111e-05, "loss": 3.0863, "step": 1109 }, { "epoch": 2.17, "learning_rate": 8.297455968688844e-05, "loss": 3.0776, "step": 1110 }, { "epoch": 2.17, "learning_rate": 8.277886497064578e-05, "loss": 3.0884, "step": 1111 }, { "epoch": 2.18, "learning_rate": 8.258317025440312e-05, "loss": 3.0996, "step": 1112 }, { "epoch": 2.18, "learning_rate": 8.238747553816045e-05, "loss": 3.0983, "step": 1113 }, { "epoch": 2.18, "learning_rate": 8.21917808219178e-05, "loss": 3.078, "step": 1114 }, { "epoch": 2.18, "learning_rate": 8.199608610567514e-05, "loss": 3.0982, "step": 1115 }, { "epoch": 2.18, "learning_rate": 8.180039138943247e-05, "loss": 3.1144, "step": 1116 }, { "epoch": 2.19, "learning_rate": 8.160469667318981e-05, "loss": 3.0973, "step": 1117 }, { "epoch": 2.19, "learning_rate": 8.140900195694716e-05, "loss": 3.0844, "step": 1118 }, { "epoch": 2.19, "learning_rate": 8.121330724070449e-05, "loss": 3.0951, "step": 1119 }, { "epoch": 2.19, "learning_rate": 8.101761252446183e-05, "loss": 3.0945, "step": 1120 }, { "epoch": 2.19, "learning_rate": 8.082191780821917e-05, "loss": 3.0839, "step": 1121 }, { "epoch": 2.2, "learning_rate": 8.06262230919765e-05, "loss": 3.0876, "step": 1122 }, { "epoch": 2.2, "learning_rate": 8.043052837573385e-05, "loss": 3.0903, "step": 1123 }, { "epoch": 2.2, "learning_rate": 8.023483365949118e-05, "loss": 3.0901, "step": 1124 }, { "epoch": 2.2, "learning_rate": 8.003913894324852e-05, "loss": 3.0799, "step": 1125 }, { "epoch": 2.2, "learning_rate": 7.984344422700586e-05, "loss": 3.0929, "step": 1126 }, { "epoch": 2.21, "learning_rate": 7.964774951076319e-05, "loss": 3.1026, "step": 1127 }, { "epoch": 2.21, "learning_rate": 7.945205479452054e-05, "loss": 3.0934, "step": 1128 }, { "epoch": 2.21, "learning_rate": 7.925636007827788e-05, "loss": 3.0998, "step": 1129 }, { "epoch": 2.21, "learning_rate": 7.906066536203521e-05, "loss": 3.0951, "step": 1130 }, { "epoch": 2.21, "learning_rate": 7.886497064579255e-05, "loss": 3.1021, "step": 1131 }, { "epoch": 2.22, "learning_rate": 7.86692759295499e-05, "loss": 3.0904, "step": 1132 }, { "epoch": 2.22, "learning_rate": 7.847358121330722e-05, "loss": 3.0769, "step": 1133 }, { "epoch": 2.22, "learning_rate": 7.827788649706457e-05, "loss": 3.0951, "step": 1134 }, { "epoch": 2.22, "learning_rate": 7.808219178082191e-05, "loss": 3.0719, "step": 1135 }, { "epoch": 2.22, "learning_rate": 7.788649706457924e-05, "loss": 3.0889, "step": 1136 }, { "epoch": 2.22, "learning_rate": 7.769080234833658e-05, "loss": 3.0789, "step": 1137 }, { "epoch": 2.23, "learning_rate": 7.749510763209393e-05, "loss": 3.0973, "step": 1138 }, { "epoch": 2.23, "learning_rate": 7.729941291585126e-05, "loss": 3.0977, "step": 1139 }, { "epoch": 2.23, "learning_rate": 7.71037181996086e-05, "loss": 3.0884, "step": 1140 }, { "epoch": 2.23, "learning_rate": 7.690802348336594e-05, "loss": 3.0909, "step": 1141 }, { "epoch": 2.23, "learning_rate": 7.671232876712327e-05, "loss": 3.0774, "step": 1142 }, { "epoch": 2.24, "learning_rate": 7.651663405088062e-05, "loss": 3.0874, "step": 1143 }, { "epoch": 2.24, "learning_rate": 7.632093933463796e-05, "loss": 3.0765, "step": 1144 }, { "epoch": 2.24, "learning_rate": 7.612524461839529e-05, "loss": 3.0828, "step": 1145 }, { "epoch": 2.24, "learning_rate": 7.592954990215263e-05, "loss": 3.0936, "step": 1146 }, { "epoch": 2.24, "learning_rate": 7.573385518590996e-05, "loss": 3.0783, "step": 1147 }, { "epoch": 2.25, "learning_rate": 7.55381604696673e-05, "loss": 3.0877, "step": 1148 }, { "epoch": 2.25, "learning_rate": 7.534246575342465e-05, "loss": 3.0906, "step": 1149 }, { "epoch": 2.25, "learning_rate": 7.514677103718198e-05, "loss": 3.0953, "step": 1150 }, { "epoch": 2.25, "learning_rate": 7.495107632093932e-05, "loss": 3.0831, "step": 1151 }, { "epoch": 2.25, "learning_rate": 7.475538160469667e-05, "loss": 3.0823, "step": 1152 }, { "epoch": 2.26, "learning_rate": 7.4559686888454e-05, "loss": 3.063, "step": 1153 }, { "epoch": 2.26, "learning_rate": 7.436399217221134e-05, "loss": 3.1071, "step": 1154 }, { "epoch": 2.26, "learning_rate": 7.416829745596868e-05, "loss": 3.0938, "step": 1155 }, { "epoch": 2.26, "learning_rate": 7.397260273972601e-05, "loss": 3.1188, "step": 1156 }, { "epoch": 2.26, "learning_rate": 7.377690802348336e-05, "loss": 3.0981, "step": 1157 }, { "epoch": 2.27, "learning_rate": 7.35812133072407e-05, "loss": 3.0792, "step": 1158 }, { "epoch": 2.27, "learning_rate": 7.338551859099803e-05, "loss": 3.0976, "step": 1159 }, { "epoch": 2.27, "learning_rate": 7.318982387475537e-05, "loss": 3.0991, "step": 1160 }, { "epoch": 2.27, "learning_rate": 7.299412915851271e-05, "loss": 3.1003, "step": 1161 }, { "epoch": 2.27, "learning_rate": 7.279843444227004e-05, "loss": 3.0793, "step": 1162 }, { "epoch": 2.28, "learning_rate": 7.260273972602739e-05, "loss": 3.0891, "step": 1163 }, { "epoch": 2.28, "learning_rate": 7.240704500978473e-05, "loss": 3.0808, "step": 1164 }, { "epoch": 2.28, "learning_rate": 7.221135029354206e-05, "loss": 3.08, "step": 1165 }, { "epoch": 2.28, "learning_rate": 7.20156555772994e-05, "loss": 3.0784, "step": 1166 }, { "epoch": 2.28, "learning_rate": 7.181996086105675e-05, "loss": 3.0652, "step": 1167 }, { "epoch": 2.29, "learning_rate": 7.162426614481408e-05, "loss": 3.0888, "step": 1168 }, { "epoch": 2.29, "learning_rate": 7.142857142857142e-05, "loss": 3.0916, "step": 1169 }, { "epoch": 2.29, "learning_rate": 7.123287671232876e-05, "loss": 3.084, "step": 1170 }, { "epoch": 2.29, "learning_rate": 7.10371819960861e-05, "loss": 3.0874, "step": 1171 }, { "epoch": 2.29, "learning_rate": 7.084148727984344e-05, "loss": 3.0831, "step": 1172 }, { "epoch": 2.3, "learning_rate": 7.064579256360077e-05, "loss": 3.0965, "step": 1173 }, { "epoch": 2.3, "learning_rate": 7.045009784735811e-05, "loss": 3.0963, "step": 1174 }, { "epoch": 2.3, "learning_rate": 7.025440313111545e-05, "loss": 3.1057, "step": 1175 }, { "epoch": 2.3, "learning_rate": 7.005870841487278e-05, "loss": 3.0931, "step": 1176 }, { "epoch": 2.3, "learning_rate": 6.986301369863013e-05, "loss": 3.0869, "step": 1177 }, { "epoch": 2.3, "learning_rate": 6.966731898238747e-05, "loss": 3.0825, "step": 1178 }, { "epoch": 2.31, "learning_rate": 6.94716242661448e-05, "loss": 3.1187, "step": 1179 }, { "epoch": 2.31, "learning_rate": 6.927592954990214e-05, "loss": 3.0852, "step": 1180 }, { "epoch": 2.31, "learning_rate": 6.908023483365949e-05, "loss": 3.0945, "step": 1181 }, { "epoch": 2.31, "learning_rate": 6.888454011741682e-05, "loss": 3.0845, "step": 1182 }, { "epoch": 2.31, "learning_rate": 6.868884540117416e-05, "loss": 3.0884, "step": 1183 }, { "epoch": 2.32, "learning_rate": 6.84931506849315e-05, "loss": 3.0952, "step": 1184 }, { "epoch": 2.32, "learning_rate": 6.829745596868883e-05, "loss": 3.0746, "step": 1185 }, { "epoch": 2.32, "learning_rate": 6.810176125244617e-05, "loss": 3.0781, "step": 1186 }, { "epoch": 2.32, "learning_rate": 6.790606653620352e-05, "loss": 3.0805, "step": 1187 }, { "epoch": 2.32, "learning_rate": 6.771037181996085e-05, "loss": 3.0887, "step": 1188 }, { "epoch": 2.33, "learning_rate": 6.751467710371819e-05, "loss": 3.0738, "step": 1189 }, { "epoch": 2.33, "learning_rate": 6.731898238747553e-05, "loss": 3.1072, "step": 1190 }, { "epoch": 2.33, "learning_rate": 6.712328767123286e-05, "loss": 3.0933, "step": 1191 }, { "epoch": 2.33, "learning_rate": 6.692759295499021e-05, "loss": 3.0806, "step": 1192 }, { "epoch": 2.33, "learning_rate": 6.673189823874755e-05, "loss": 3.0914, "step": 1193 }, { "epoch": 2.34, "learning_rate": 6.653620352250488e-05, "loss": 3.0833, "step": 1194 }, { "epoch": 2.34, "learning_rate": 6.634050880626222e-05, "loss": 3.0953, "step": 1195 }, { "epoch": 2.34, "learning_rate": 6.614481409001955e-05, "loss": 3.0787, "step": 1196 }, { "epoch": 2.34, "learning_rate": 6.59491193737769e-05, "loss": 3.0927, "step": 1197 }, { "epoch": 2.34, "learning_rate": 6.575342465753424e-05, "loss": 3.0778, "step": 1198 }, { "epoch": 2.35, "learning_rate": 6.555772994129157e-05, "loss": 3.1037, "step": 1199 }, { "epoch": 2.35, "learning_rate": 6.536203522504891e-05, "loss": 3.0948, "step": 1200 }, { "epoch": 2.35, "learning_rate": 6.516634050880626e-05, "loss": 3.0799, "step": 1201 }, { "epoch": 2.35, "learning_rate": 6.497064579256359e-05, "loss": 3.0921, "step": 1202 }, { "epoch": 2.35, "learning_rate": 6.477495107632093e-05, "loss": 3.0831, "step": 1203 }, { "epoch": 2.36, "learning_rate": 6.457925636007827e-05, "loss": 3.0792, "step": 1204 }, { "epoch": 2.36, "learning_rate": 6.43835616438356e-05, "loss": 3.0563, "step": 1205 }, { "epoch": 2.36, "learning_rate": 6.418786692759295e-05, "loss": 3.0839, "step": 1206 }, { "epoch": 2.36, "learning_rate": 6.399217221135029e-05, "loss": 3.074, "step": 1207 }, { "epoch": 2.36, "learning_rate": 6.379647749510762e-05, "loss": 3.081, "step": 1208 }, { "epoch": 2.37, "learning_rate": 6.360078277886496e-05, "loss": 3.094, "step": 1209 }, { "epoch": 2.37, "learning_rate": 6.34050880626223e-05, "loss": 3.088, "step": 1210 }, { "epoch": 2.37, "learning_rate": 6.320939334637964e-05, "loss": 3.0876, "step": 1211 }, { "epoch": 2.37, "learning_rate": 6.301369863013698e-05, "loss": 3.0858, "step": 1212 }, { "epoch": 2.37, "learning_rate": 6.281800391389432e-05, "loss": 3.0721, "step": 1213 }, { "epoch": 2.38, "learning_rate": 6.262230919765165e-05, "loss": 3.0885, "step": 1214 }, { "epoch": 2.38, "learning_rate": 6.2426614481409e-05, "loss": 3.0734, "step": 1215 }, { "epoch": 2.38, "learning_rate": 6.223091976516634e-05, "loss": 3.0742, "step": 1216 }, { "epoch": 2.38, "learning_rate": 6.203522504892367e-05, "loss": 3.0683, "step": 1217 }, { "epoch": 2.38, "learning_rate": 6.183953033268101e-05, "loss": 3.0922, "step": 1218 }, { "epoch": 2.39, "learning_rate": 6.164383561643834e-05, "loss": 3.0714, "step": 1219 }, { "epoch": 2.39, "learning_rate": 6.144814090019568e-05, "loss": 3.0796, "step": 1220 }, { "epoch": 2.39, "learning_rate": 6.125244618395303e-05, "loss": 3.0699, "step": 1221 }, { "epoch": 2.39, "learning_rate": 6.105675146771036e-05, "loss": 3.0954, "step": 1222 }, { "epoch": 2.39, "learning_rate": 6.08610567514677e-05, "loss": 3.0947, "step": 1223 }, { "epoch": 2.39, "learning_rate": 6.066536203522504e-05, "loss": 3.1089, "step": 1224 }, { "epoch": 2.4, "learning_rate": 6.046966731898238e-05, "loss": 3.0642, "step": 1225 }, { "epoch": 2.4, "learning_rate": 6.027397260273972e-05, "loss": 3.0729, "step": 1226 }, { "epoch": 2.4, "learning_rate": 6.007827788649705e-05, "loss": 3.0939, "step": 1227 }, { "epoch": 2.4, "learning_rate": 5.98825831702544e-05, "loss": 3.0843, "step": 1228 }, { "epoch": 2.4, "learning_rate": 5.968688845401173e-05, "loss": 3.0808, "step": 1229 }, { "epoch": 2.41, "learning_rate": 5.949119373776907e-05, "loss": 3.0813, "step": 1230 }, { "epoch": 2.41, "learning_rate": 5.929549902152641e-05, "loss": 3.0939, "step": 1231 }, { "epoch": 2.41, "learning_rate": 5.909980430528375e-05, "loss": 3.106, "step": 1232 }, { "epoch": 2.41, "learning_rate": 5.8904109589041086e-05, "loss": 3.0975, "step": 1233 }, { "epoch": 2.41, "learning_rate": 5.870841487279843e-05, "loss": 3.0838, "step": 1234 }, { "epoch": 2.42, "learning_rate": 5.8512720156555766e-05, "loss": 3.0846, "step": 1235 }, { "epoch": 2.42, "learning_rate": 5.83170254403131e-05, "loss": 3.0797, "step": 1236 }, { "epoch": 2.42, "learning_rate": 5.812133072407044e-05, "loss": 3.0816, "step": 1237 }, { "epoch": 2.42, "learning_rate": 5.792563600782778e-05, "loss": 3.0816, "step": 1238 }, { "epoch": 2.42, "learning_rate": 5.772994129158512e-05, "loss": 3.0732, "step": 1239 }, { "epoch": 2.43, "learning_rate": 5.7534246575342455e-05, "loss": 3.0737, "step": 1240 }, { "epoch": 2.43, "learning_rate": 5.73385518590998e-05, "loss": 3.065, "step": 1241 }, { "epoch": 2.43, "learning_rate": 5.7142857142857135e-05, "loss": 3.0787, "step": 1242 }, { "epoch": 2.43, "learning_rate": 5.694716242661447e-05, "loss": 3.0977, "step": 1243 }, { "epoch": 2.43, "learning_rate": 5.6751467710371815e-05, "loss": 3.0921, "step": 1244 }, { "epoch": 2.44, "learning_rate": 5.655577299412915e-05, "loss": 3.0748, "step": 1245 }, { "epoch": 2.44, "learning_rate": 5.636007827788649e-05, "loss": 3.0966, "step": 1246 }, { "epoch": 2.44, "learning_rate": 5.6164383561643824e-05, "loss": 3.0837, "step": 1247 }, { "epoch": 2.44, "learning_rate": 5.596868884540117e-05, "loss": 3.087, "step": 1248 }, { "epoch": 2.44, "learning_rate": 5.5772994129158504e-05, "loss": 3.082, "step": 1249 }, { "epoch": 2.45, "learning_rate": 5.557729941291584e-05, "loss": 3.0773, "step": 1250 }, { "epoch": 2.45, "learning_rate": 5.5381604696673184e-05, "loss": 3.0805, "step": 1251 }, { "epoch": 2.45, "learning_rate": 5.518590998043052e-05, "loss": 3.0782, "step": 1252 }, { "epoch": 2.45, "learning_rate": 5.499021526418786e-05, "loss": 3.0895, "step": 1253 }, { "epoch": 2.45, "learning_rate": 5.47945205479452e-05, "loss": 3.0899, "step": 1254 }, { "epoch": 2.46, "learning_rate": 5.459882583170254e-05, "loss": 3.0689, "step": 1255 }, { "epoch": 2.46, "learning_rate": 5.440313111545987e-05, "loss": 3.0842, "step": 1256 }, { "epoch": 2.46, "learning_rate": 5.4207436399217217e-05, "loss": 3.0907, "step": 1257 }, { "epoch": 2.46, "learning_rate": 5.401174168297455e-05, "loss": 3.0873, "step": 1258 }, { "epoch": 2.46, "learning_rate": 5.381604696673189e-05, "loss": 3.0894, "step": 1259 }, { "epoch": 2.47, "learning_rate": 5.3620352250489226e-05, "loss": 3.0787, "step": 1260 }, { "epoch": 2.47, "learning_rate": 5.342465753424657e-05, "loss": 3.0702, "step": 1261 }, { "epoch": 2.47, "learning_rate": 5.3228962818003906e-05, "loss": 3.1048, "step": 1262 }, { "epoch": 2.47, "learning_rate": 5.303326810176124e-05, "loss": 3.0777, "step": 1263 }, { "epoch": 2.47, "learning_rate": 5.2837573385518586e-05, "loss": 3.0874, "step": 1264 }, { "epoch": 2.48, "learning_rate": 5.264187866927592e-05, "loss": 3.0819, "step": 1265 }, { "epoch": 2.48, "learning_rate": 5.244618395303326e-05, "loss": 3.0687, "step": 1266 }, { "epoch": 2.48, "learning_rate": 5.22504892367906e-05, "loss": 3.0667, "step": 1267 }, { "epoch": 2.48, "learning_rate": 5.205479452054794e-05, "loss": 3.0798, "step": 1268 }, { "epoch": 2.48, "learning_rate": 5.1859099804305275e-05, "loss": 3.0666, "step": 1269 }, { "epoch": 2.48, "learning_rate": 5.166340508806261e-05, "loss": 3.083, "step": 1270 }, { "epoch": 2.49, "learning_rate": 5.1467710371819955e-05, "loss": 3.0885, "step": 1271 }, { "epoch": 2.49, "learning_rate": 5.127201565557729e-05, "loss": 3.1091, "step": 1272 }, { "epoch": 2.49, "learning_rate": 5.107632093933463e-05, "loss": 3.0811, "step": 1273 }, { "epoch": 2.49, "learning_rate": 5.088062622309197e-05, "loss": 3.08, "step": 1274 }, { "epoch": 2.49, "learning_rate": 5.068493150684931e-05, "loss": 3.0862, "step": 1275 }, { "epoch": 2.5, "learning_rate": 5.0489236790606644e-05, "loss": 3.0712, "step": 1276 }, { "epoch": 2.5, "learning_rate": 5.029354207436399e-05, "loss": 3.0791, "step": 1277 }, { "epoch": 2.5, "learning_rate": 5.0097847358121324e-05, "loss": 3.0695, "step": 1278 }, { "epoch": 2.5, "learning_rate": 4.990215264187866e-05, "loss": 3.0748, "step": 1279 }, { "epoch": 2.5, "learning_rate": 4.9706457925636004e-05, "loss": 3.0672, "step": 1280 }, { "epoch": 2.51, "learning_rate": 4.951076320939334e-05, "loss": 3.0857, "step": 1281 }, { "epoch": 2.51, "learning_rate": 4.931506849315068e-05, "loss": 3.0796, "step": 1282 }, { "epoch": 2.51, "learning_rate": 4.911937377690801e-05, "loss": 3.0754, "step": 1283 }, { "epoch": 2.51, "learning_rate": 4.8923679060665357e-05, "loss": 3.0985, "step": 1284 }, { "epoch": 2.51, "learning_rate": 4.872798434442269e-05, "loss": 3.0685, "step": 1285 }, { "epoch": 2.52, "learning_rate": 4.853228962818003e-05, "loss": 3.0901, "step": 1286 }, { "epoch": 2.52, "learning_rate": 4.833659491193737e-05, "loss": 3.0864, "step": 1287 }, { "epoch": 2.52, "learning_rate": 4.814090019569471e-05, "loss": 3.0777, "step": 1288 }, { "epoch": 2.52, "learning_rate": 4.7945205479452046e-05, "loss": 3.0787, "step": 1289 }, { "epoch": 2.52, "learning_rate": 4.774951076320939e-05, "loss": 3.0945, "step": 1290 }, { "epoch": 2.53, "learning_rate": 4.7553816046966726e-05, "loss": 3.1029, "step": 1291 }, { "epoch": 2.53, "learning_rate": 4.735812133072406e-05, "loss": 3.0755, "step": 1292 }, { "epoch": 2.53, "learning_rate": 4.7162426614481406e-05, "loss": 3.0828, "step": 1293 }, { "epoch": 2.53, "learning_rate": 4.696673189823874e-05, "loss": 3.0773, "step": 1294 }, { "epoch": 2.53, "learning_rate": 4.677103718199608e-05, "loss": 3.0886, "step": 1295 }, { "epoch": 2.54, "learning_rate": 4.6575342465753415e-05, "loss": 3.0616, "step": 1296 }, { "epoch": 2.54, "learning_rate": 4.637964774951076e-05, "loss": 3.112, "step": 1297 }, { "epoch": 2.54, "learning_rate": 4.6183953033268095e-05, "loss": 3.0566, "step": 1298 }, { "epoch": 2.54, "learning_rate": 4.598825831702543e-05, "loss": 3.0837, "step": 1299 }, { "epoch": 2.54, "learning_rate": 4.5792563600782775e-05, "loss": 3.0935, "step": 1300 }, { "epoch": 2.55, "learning_rate": 4.559686888454011e-05, "loss": 3.0689, "step": 1301 }, { "epoch": 2.55, "learning_rate": 4.540117416829745e-05, "loss": 3.0551, "step": 1302 }, { "epoch": 2.55, "learning_rate": 4.520547945205479e-05, "loss": 3.076, "step": 1303 }, { "epoch": 2.55, "learning_rate": 4.500978473581213e-05, "loss": 3.0757, "step": 1304 }, { "epoch": 2.55, "learning_rate": 4.4814090019569464e-05, "loss": 3.0753, "step": 1305 }, { "epoch": 2.56, "learning_rate": 4.46183953033268e-05, "loss": 3.0839, "step": 1306 }, { "epoch": 2.56, "learning_rate": 4.4422700587084144e-05, "loss": 3.0787, "step": 1307 }, { "epoch": 2.56, "learning_rate": 4.422700587084148e-05, "loss": 3.0667, "step": 1308 }, { "epoch": 2.56, "learning_rate": 4.403131115459882e-05, "loss": 3.0868, "step": 1309 }, { "epoch": 2.56, "learning_rate": 4.383561643835616e-05, "loss": 3.0668, "step": 1310 }, { "epoch": 2.57, "learning_rate": 4.36399217221135e-05, "loss": 3.0727, "step": 1311 }, { "epoch": 2.57, "learning_rate": 4.344422700587083e-05, "loss": 3.0775, "step": 1312 }, { "epoch": 2.57, "learning_rate": 4.3248532289628176e-05, "loss": 3.1012, "step": 1313 }, { "epoch": 2.57, "learning_rate": 4.305283757338551e-05, "loss": 3.0824, "step": 1314 }, { "epoch": 2.57, "learning_rate": 4.285714285714285e-05, "loss": 3.081, "step": 1315 }, { "epoch": 2.57, "learning_rate": 4.266144814090019e-05, "loss": 3.0902, "step": 1316 }, { "epoch": 2.58, "learning_rate": 4.246575342465753e-05, "loss": 3.0717, "step": 1317 }, { "epoch": 2.58, "learning_rate": 4.2270058708414866e-05, "loss": 3.1039, "step": 1318 }, { "epoch": 2.58, "learning_rate": 4.20743639921722e-05, "loss": 3.0871, "step": 1319 }, { "epoch": 2.58, "learning_rate": 4.1878669275929546e-05, "loss": 3.0765, "step": 1320 }, { "epoch": 2.58, "learning_rate": 4.168297455968688e-05, "loss": 3.0723, "step": 1321 }, { "epoch": 2.59, "learning_rate": 4.148727984344422e-05, "loss": 3.0852, "step": 1322 }, { "epoch": 2.59, "learning_rate": 4.129158512720156e-05, "loss": 3.0862, "step": 1323 }, { "epoch": 2.59, "learning_rate": 4.10958904109589e-05, "loss": 3.0799, "step": 1324 }, { "epoch": 2.59, "learning_rate": 4.0900195694716235e-05, "loss": 3.0794, "step": 1325 }, { "epoch": 2.59, "learning_rate": 4.070450097847358e-05, "loss": 3.0801, "step": 1326 }, { "epoch": 2.6, "learning_rate": 4.0508806262230915e-05, "loss": 3.0738, "step": 1327 }, { "epoch": 2.6, "learning_rate": 4.031311154598825e-05, "loss": 3.0657, "step": 1328 }, { "epoch": 2.6, "learning_rate": 4.011741682974559e-05, "loss": 3.0888, "step": 1329 }, { "epoch": 2.6, "learning_rate": 3.992172211350293e-05, "loss": 3.0913, "step": 1330 }, { "epoch": 2.6, "learning_rate": 3.972602739726027e-05, "loss": 3.0793, "step": 1331 }, { "epoch": 2.61, "learning_rate": 3.9530332681017604e-05, "loss": 3.0803, "step": 1332 }, { "epoch": 2.61, "learning_rate": 3.933463796477495e-05, "loss": 3.072, "step": 1333 }, { "epoch": 2.61, "learning_rate": 3.9138943248532284e-05, "loss": 3.1062, "step": 1334 }, { "epoch": 2.61, "learning_rate": 3.894324853228962e-05, "loss": 3.0677, "step": 1335 }, { "epoch": 2.61, "learning_rate": 3.8747553816046964e-05, "loss": 3.0691, "step": 1336 }, { "epoch": 2.62, "learning_rate": 3.85518590998043e-05, "loss": 3.0733, "step": 1337 }, { "epoch": 2.62, "learning_rate": 3.835616438356164e-05, "loss": 3.0785, "step": 1338 }, { "epoch": 2.62, "learning_rate": 3.816046966731898e-05, "loss": 3.0713, "step": 1339 }, { "epoch": 2.62, "learning_rate": 3.7964774951076317e-05, "loss": 3.0738, "step": 1340 }, { "epoch": 2.62, "learning_rate": 3.776908023483365e-05, "loss": 3.0579, "step": 1341 }, { "epoch": 2.63, "learning_rate": 3.757338551859099e-05, "loss": 3.0828, "step": 1342 }, { "epoch": 2.63, "learning_rate": 3.737769080234833e-05, "loss": 3.0829, "step": 1343 }, { "epoch": 2.63, "learning_rate": 3.718199608610567e-05, "loss": 3.0813, "step": 1344 }, { "epoch": 2.63, "learning_rate": 3.6986301369863006e-05, "loss": 3.0789, "step": 1345 }, { "epoch": 2.63, "learning_rate": 3.679060665362035e-05, "loss": 3.0724, "step": 1346 }, { "epoch": 2.64, "learning_rate": 3.6594911937377686e-05, "loss": 3.0742, "step": 1347 }, { "epoch": 2.64, "learning_rate": 3.639921722113502e-05, "loss": 3.07, "step": 1348 }, { "epoch": 2.64, "learning_rate": 3.6203522504892366e-05, "loss": 3.1018, "step": 1349 }, { "epoch": 2.64, "learning_rate": 3.60078277886497e-05, "loss": 3.102, "step": 1350 }, { "epoch": 2.64, "learning_rate": 3.581213307240704e-05, "loss": 3.0771, "step": 1351 }, { "epoch": 2.65, "learning_rate": 3.561643835616438e-05, "loss": 3.0765, "step": 1352 }, { "epoch": 2.65, "learning_rate": 3.542074363992172e-05, "loss": 3.086, "step": 1353 }, { "epoch": 2.65, "learning_rate": 3.5225048923679055e-05, "loss": 3.092, "step": 1354 }, { "epoch": 2.65, "learning_rate": 3.502935420743639e-05, "loss": 3.0907, "step": 1355 }, { "epoch": 2.65, "learning_rate": 3.4833659491193735e-05, "loss": 3.0756, "step": 1356 }, { "epoch": 2.65, "learning_rate": 3.463796477495107e-05, "loss": 3.0839, "step": 1357 }, { "epoch": 2.66, "learning_rate": 3.444227005870841e-05, "loss": 3.0753, "step": 1358 }, { "epoch": 2.66, "learning_rate": 3.424657534246575e-05, "loss": 3.0714, "step": 1359 }, { "epoch": 2.66, "learning_rate": 3.405088062622309e-05, "loss": 3.0728, "step": 1360 }, { "epoch": 2.66, "learning_rate": 3.3855185909980424e-05, "loss": 3.0777, "step": 1361 }, { "epoch": 2.66, "learning_rate": 3.365949119373777e-05, "loss": 3.0667, "step": 1362 }, { "epoch": 2.67, "learning_rate": 3.3463796477495104e-05, "loss": 3.0796, "step": 1363 }, { "epoch": 2.67, "learning_rate": 3.326810176125244e-05, "loss": 3.0698, "step": 1364 }, { "epoch": 2.67, "learning_rate": 3.307240704500978e-05, "loss": 3.0719, "step": 1365 }, { "epoch": 2.67, "learning_rate": 3.287671232876712e-05, "loss": 3.0711, "step": 1366 }, { "epoch": 2.67, "learning_rate": 3.268101761252446e-05, "loss": 3.0869, "step": 1367 }, { "epoch": 2.68, "learning_rate": 3.248532289628179e-05, "loss": 3.0937, "step": 1368 }, { "epoch": 2.68, "learning_rate": 3.2289628180039136e-05, "loss": 3.0653, "step": 1369 }, { "epoch": 2.68, "learning_rate": 3.209393346379647e-05, "loss": 3.0683, "step": 1370 }, { "epoch": 2.68, "learning_rate": 3.189823874755381e-05, "loss": 3.0893, "step": 1371 }, { "epoch": 2.68, "learning_rate": 3.170254403131115e-05, "loss": 3.0647, "step": 1372 }, { "epoch": 2.69, "learning_rate": 3.150684931506849e-05, "loss": 3.0788, "step": 1373 }, { "epoch": 2.69, "learning_rate": 3.1311154598825826e-05, "loss": 3.0813, "step": 1374 }, { "epoch": 2.69, "learning_rate": 3.111545988258317e-05, "loss": 3.0601, "step": 1375 }, { "epoch": 2.69, "learning_rate": 3.0919765166340506e-05, "loss": 3.0741, "step": 1376 }, { "epoch": 2.69, "learning_rate": 3.072407045009784e-05, "loss": 3.0684, "step": 1377 }, { "epoch": 2.7, "learning_rate": 3.052837573385518e-05, "loss": 3.0627, "step": 1378 }, { "epoch": 2.7, "learning_rate": 3.033268101761252e-05, "loss": 3.0901, "step": 1379 }, { "epoch": 2.7, "learning_rate": 3.013698630136986e-05, "loss": 3.0768, "step": 1380 }, { "epoch": 2.7, "learning_rate": 2.99412915851272e-05, "loss": 3.0755, "step": 1381 }, { "epoch": 2.7, "learning_rate": 2.9745596868884535e-05, "loss": 3.0837, "step": 1382 }, { "epoch": 2.71, "learning_rate": 2.9549902152641875e-05, "loss": 3.0699, "step": 1383 }, { "epoch": 2.71, "learning_rate": 2.9354207436399215e-05, "loss": 3.0797, "step": 1384 }, { "epoch": 2.71, "learning_rate": 2.915851272015655e-05, "loss": 3.0603, "step": 1385 }, { "epoch": 2.71, "learning_rate": 2.896281800391389e-05, "loss": 3.0857, "step": 1386 }, { "epoch": 2.71, "learning_rate": 2.8767123287671228e-05, "loss": 3.0896, "step": 1387 }, { "epoch": 2.72, "learning_rate": 2.8571428571428567e-05, "loss": 3.0757, "step": 1388 }, { "epoch": 2.72, "learning_rate": 2.8375733855185907e-05, "loss": 3.0854, "step": 1389 }, { "epoch": 2.72, "learning_rate": 2.8180039138943244e-05, "loss": 3.097, "step": 1390 }, { "epoch": 2.72, "learning_rate": 2.7984344422700584e-05, "loss": 3.0782, "step": 1391 }, { "epoch": 2.72, "learning_rate": 2.778864970645792e-05, "loss": 3.0538, "step": 1392 }, { "epoch": 2.73, "learning_rate": 2.759295499021526e-05, "loss": 3.061, "step": 1393 }, { "epoch": 2.73, "learning_rate": 2.73972602739726e-05, "loss": 3.0633, "step": 1394 }, { "epoch": 2.73, "learning_rate": 2.7201565557729937e-05, "loss": 3.0649, "step": 1395 }, { "epoch": 2.73, "learning_rate": 2.7005870841487277e-05, "loss": 3.0662, "step": 1396 }, { "epoch": 2.73, "learning_rate": 2.6810176125244613e-05, "loss": 3.0493, "step": 1397 }, { "epoch": 2.74, "learning_rate": 2.6614481409001953e-05, "loss": 3.0856, "step": 1398 }, { "epoch": 2.74, "learning_rate": 2.6418786692759293e-05, "loss": 3.0655, "step": 1399 }, { "epoch": 2.74, "learning_rate": 2.622309197651663e-05, "loss": 3.0681, "step": 1400 }, { "epoch": 2.74, "learning_rate": 2.602739726027397e-05, "loss": 3.0607, "step": 1401 }, { "epoch": 2.74, "learning_rate": 2.5831702544031306e-05, "loss": 3.0934, "step": 1402 }, { "epoch": 2.74, "learning_rate": 2.5636007827788646e-05, "loss": 3.075, "step": 1403 }, { "epoch": 2.75, "learning_rate": 2.5440313111545986e-05, "loss": 3.0954, "step": 1404 }, { "epoch": 2.75, "learning_rate": 2.5244618395303322e-05, "loss": 3.0619, "step": 1405 }, { "epoch": 2.75, "learning_rate": 2.5048923679060662e-05, "loss": 3.0723, "step": 1406 }, { "epoch": 2.75, "learning_rate": 2.4853228962818002e-05, "loss": 3.0663, "step": 1407 }, { "epoch": 2.75, "learning_rate": 2.465753424657534e-05, "loss": 3.0554, "step": 1408 }, { "epoch": 2.76, "learning_rate": 2.4461839530332678e-05, "loss": 3.0691, "step": 1409 }, { "epoch": 2.76, "learning_rate": 2.4266144814090015e-05, "loss": 3.0708, "step": 1410 }, { "epoch": 2.76, "learning_rate": 2.4070450097847355e-05, "loss": 3.0729, "step": 1411 }, { "epoch": 2.76, "learning_rate": 2.3874755381604695e-05, "loss": 3.0599, "step": 1412 }, { "epoch": 2.76, "learning_rate": 2.367906066536203e-05, "loss": 3.0671, "step": 1413 }, { "epoch": 2.77, "learning_rate": 2.348336594911937e-05, "loss": 3.0763, "step": 1414 }, { "epoch": 2.77, "learning_rate": 2.3287671232876708e-05, "loss": 3.0725, "step": 1415 }, { "epoch": 2.77, "learning_rate": 2.3091976516634047e-05, "loss": 3.0855, "step": 1416 }, { "epoch": 2.77, "learning_rate": 2.2896281800391387e-05, "loss": 3.0451, "step": 1417 }, { "epoch": 2.77, "learning_rate": 2.2700587084148724e-05, "loss": 3.0603, "step": 1418 }, { "epoch": 2.78, "learning_rate": 2.2504892367906064e-05, "loss": 3.0644, "step": 1419 }, { "epoch": 2.78, "learning_rate": 2.23091976516634e-05, "loss": 3.0869, "step": 1420 }, { "epoch": 2.78, "learning_rate": 2.211350293542074e-05, "loss": 3.0638, "step": 1421 }, { "epoch": 2.78, "learning_rate": 2.191780821917808e-05, "loss": 3.0608, "step": 1422 }, { "epoch": 2.78, "learning_rate": 2.1722113502935417e-05, "loss": 3.0477, "step": 1423 }, { "epoch": 2.79, "learning_rate": 2.1526418786692757e-05, "loss": 3.0687, "step": 1424 }, { "epoch": 2.79, "learning_rate": 2.1330724070450096e-05, "loss": 3.0685, "step": 1425 }, { "epoch": 2.79, "learning_rate": 2.1135029354207433e-05, "loss": 3.0634, "step": 1426 }, { "epoch": 2.79, "learning_rate": 2.0939334637964773e-05, "loss": 3.0752, "step": 1427 }, { "epoch": 2.79, "learning_rate": 2.074363992172211e-05, "loss": 3.0728, "step": 1428 }, { "epoch": 2.8, "learning_rate": 2.054794520547945e-05, "loss": 3.0551, "step": 1429 }, { "epoch": 2.8, "learning_rate": 2.035225048923679e-05, "loss": 3.063, "step": 1430 }, { "epoch": 2.8, "learning_rate": 2.0156555772994126e-05, "loss": 3.0545, "step": 1431 }, { "epoch": 2.8, "learning_rate": 1.9960861056751466e-05, "loss": 3.0742, "step": 1432 }, { "epoch": 2.8, "learning_rate": 1.9765166340508802e-05, "loss": 3.0602, "step": 1433 }, { "epoch": 2.81, "learning_rate": 1.9569471624266142e-05, "loss": 3.0536, "step": 1434 }, { "epoch": 2.81, "learning_rate": 1.9373776908023482e-05, "loss": 3.084, "step": 1435 }, { "epoch": 2.81, "learning_rate": 1.917808219178082e-05, "loss": 3.0582, "step": 1436 }, { "epoch": 2.81, "learning_rate": 1.8982387475538158e-05, "loss": 3.0698, "step": 1437 }, { "epoch": 2.81, "learning_rate": 1.8786692759295495e-05, "loss": 3.053, "step": 1438 }, { "epoch": 2.82, "learning_rate": 1.8590998043052835e-05, "loss": 3.0486, "step": 1439 }, { "epoch": 2.82, "learning_rate": 1.8395303326810175e-05, "loss": 3.0623, "step": 1440 }, { "epoch": 2.82, "learning_rate": 1.819960861056751e-05, "loss": 3.0892, "step": 1441 }, { "epoch": 2.82, "learning_rate": 1.800391389432485e-05, "loss": 3.0567, "step": 1442 }, { "epoch": 2.82, "learning_rate": 1.780821917808219e-05, "loss": 3.064, "step": 1443 }, { "epoch": 2.83, "learning_rate": 1.7612524461839527e-05, "loss": 3.0547, "step": 1444 }, { "epoch": 2.83, "learning_rate": 1.7416829745596867e-05, "loss": 3.0642, "step": 1445 }, { "epoch": 2.83, "learning_rate": 1.7221135029354204e-05, "loss": 3.0613, "step": 1446 }, { "epoch": 2.83, "learning_rate": 1.7025440313111544e-05, "loss": 3.0615, "step": 1447 }, { "epoch": 2.83, "learning_rate": 1.6829745596868884e-05, "loss": 3.0612, "step": 1448 }, { "epoch": 2.83, "learning_rate": 1.663405088062622e-05, "loss": 3.0518, "step": 1449 }, { "epoch": 2.84, "learning_rate": 1.643835616438356e-05, "loss": 3.0547, "step": 1450 }, { "epoch": 2.84, "learning_rate": 1.6242661448140897e-05, "loss": 3.049, "step": 1451 }, { "epoch": 2.84, "learning_rate": 1.6046966731898236e-05, "loss": 3.0518, "step": 1452 }, { "epoch": 2.84, "learning_rate": 1.5851272015655576e-05, "loss": 3.0503, "step": 1453 }, { "epoch": 2.84, "learning_rate": 1.5655577299412913e-05, "loss": 3.0655, "step": 1454 }, { "epoch": 2.85, "learning_rate": 1.5459882583170253e-05, "loss": 3.0711, "step": 1455 }, { "epoch": 2.85, "learning_rate": 1.526418786692759e-05, "loss": 3.062, "step": 1456 }, { "epoch": 2.85, "learning_rate": 1.506849315068493e-05, "loss": 3.0557, "step": 1457 }, { "epoch": 2.85, "learning_rate": 1.4872798434442267e-05, "loss": 3.0654, "step": 1458 }, { "epoch": 2.85, "learning_rate": 1.4677103718199607e-05, "loss": 3.0582, "step": 1459 }, { "epoch": 2.86, "learning_rate": 1.4481409001956946e-05, "loss": 3.0748, "step": 1460 }, { "epoch": 2.86, "learning_rate": 1.4285714285714284e-05, "loss": 3.0422, "step": 1461 }, { "epoch": 2.86, "learning_rate": 1.4090019569471622e-05, "loss": 3.0655, "step": 1462 }, { "epoch": 2.86, "learning_rate": 1.389432485322896e-05, "loss": 3.0531, "step": 1463 }, { "epoch": 2.86, "learning_rate": 1.36986301369863e-05, "loss": 3.0632, "step": 1464 }, { "epoch": 2.87, "learning_rate": 1.3502935420743638e-05, "loss": 3.0606, "step": 1465 }, { "epoch": 2.87, "learning_rate": 1.3307240704500976e-05, "loss": 3.0665, "step": 1466 }, { "epoch": 2.87, "learning_rate": 1.3111545988258315e-05, "loss": 3.0723, "step": 1467 }, { "epoch": 2.87, "learning_rate": 1.2915851272015653e-05, "loss": 3.0579, "step": 1468 }, { "epoch": 2.87, "learning_rate": 1.2720156555772993e-05, "loss": 3.0634, "step": 1469 }, { "epoch": 2.88, "learning_rate": 1.2524461839530331e-05, "loss": 3.0561, "step": 1470 }, { "epoch": 2.88, "learning_rate": 1.232876712328767e-05, "loss": 3.0639, "step": 1471 }, { "epoch": 2.88, "learning_rate": 1.2133072407045007e-05, "loss": 3.0578, "step": 1472 }, { "epoch": 2.88, "learning_rate": 1.1937377690802347e-05, "loss": 3.0509, "step": 1473 }, { "epoch": 2.88, "learning_rate": 1.1741682974559686e-05, "loss": 3.0622, "step": 1474 }, { "epoch": 2.89, "learning_rate": 1.1545988258317024e-05, "loss": 3.0732, "step": 1475 }, { "epoch": 2.89, "learning_rate": 1.1350293542074362e-05, "loss": 3.0526, "step": 1476 }, { "epoch": 2.89, "learning_rate": 1.11545988258317e-05, "loss": 3.0411, "step": 1477 }, { "epoch": 2.89, "learning_rate": 1.095890410958904e-05, "loss": 3.0522, "step": 1478 }, { "epoch": 2.89, "learning_rate": 1.0763209393346378e-05, "loss": 3.0538, "step": 1479 }, { "epoch": 2.9, "learning_rate": 1.0567514677103716e-05, "loss": 3.0508, "step": 1480 }, { "epoch": 2.9, "learning_rate": 1.0371819960861055e-05, "loss": 3.0542, "step": 1481 }, { "epoch": 2.9, "learning_rate": 1.0176125244618395e-05, "loss": 3.0544, "step": 1482 }, { "epoch": 2.9, "learning_rate": 9.980430528375733e-06, "loss": 3.0592, "step": 1483 }, { "epoch": 2.9, "learning_rate": 9.784735812133071e-06, "loss": 3.0523, "step": 1484 }, { "epoch": 2.91, "learning_rate": 9.58904109589041e-06, "loss": 3.0467, "step": 1485 }, { "epoch": 2.91, "learning_rate": 9.393346379647747e-06, "loss": 3.0587, "step": 1486 }, { "epoch": 2.91, "learning_rate": 9.197651663405087e-06, "loss": 3.0535, "step": 1487 }, { "epoch": 2.91, "learning_rate": 9.001956947162426e-06, "loss": 3.0468, "step": 1488 }, { "epoch": 2.91, "learning_rate": 8.806262230919764e-06, "loss": 3.0594, "step": 1489 }, { "epoch": 2.91, "learning_rate": 8.610567514677102e-06, "loss": 3.0614, "step": 1490 }, { "epoch": 2.92, "learning_rate": 8.414872798434442e-06, "loss": 3.0695, "step": 1491 }, { "epoch": 2.92, "learning_rate": 8.21917808219178e-06, "loss": 3.0522, "step": 1492 }, { "epoch": 2.92, "learning_rate": 8.023483365949118e-06, "loss": 3.0282, "step": 1493 }, { "epoch": 2.92, "learning_rate": 7.827788649706456e-06, "loss": 3.0886, "step": 1494 }, { "epoch": 2.92, "learning_rate": 7.632093933463795e-06, "loss": 3.0477, "step": 1495 }, { "epoch": 2.93, "learning_rate": 7.436399217221134e-06, "loss": 3.0401, "step": 1496 }, { "epoch": 2.93, "learning_rate": 7.240704500978473e-06, "loss": 3.0552, "step": 1497 }, { "epoch": 2.93, "learning_rate": 7.045009784735811e-06, "loss": 3.0469, "step": 1498 }, { "epoch": 2.93, "learning_rate": 6.84931506849315e-06, "loss": 3.043, "step": 1499 }, { "epoch": 2.93, "learning_rate": 6.653620352250488e-06, "loss": 3.0467, "step": 1500 }, { "epoch": 2.94, "learning_rate": 6.4579256360078264e-06, "loss": 3.0634, "step": 1501 }, { "epoch": 2.94, "learning_rate": 6.2622309197651655e-06, "loss": 3.0546, "step": 1502 }, { "epoch": 2.94, "learning_rate": 6.066536203522504e-06, "loss": 3.0741, "step": 1503 }, { "epoch": 2.94, "learning_rate": 5.870841487279843e-06, "loss": 3.0711, "step": 1504 }, { "epoch": 2.94, "learning_rate": 5.675146771037181e-06, "loss": 3.0573, "step": 1505 }, { "epoch": 2.95, "learning_rate": 5.47945205479452e-06, "loss": 3.0528, "step": 1506 }, { "epoch": 2.95, "learning_rate": 5.283757338551858e-06, "loss": 3.0352, "step": 1507 }, { "epoch": 2.95, "learning_rate": 5.088062622309197e-06, "loss": 3.0307, "step": 1508 }, { "epoch": 2.95, "learning_rate": 4.8923679060665355e-06, "loss": 3.0397, "step": 1509 }, { "epoch": 2.95, "learning_rate": 4.696673189823874e-06, "loss": 3.0631, "step": 1510 }, { "epoch": 2.96, "learning_rate": 4.500978473581213e-06, "loss": 3.0502, "step": 1511 }, { "epoch": 2.96, "learning_rate": 4.305283757338551e-06, "loss": 3.0575, "step": 1512 }, { "epoch": 2.96, "learning_rate": 4.10958904109589e-06, "loss": 3.0654, "step": 1513 }, { "epoch": 2.96, "learning_rate": 3.913894324853228e-06, "loss": 3.0512, "step": 1514 }, { "epoch": 2.96, "learning_rate": 3.718199608610567e-06, "loss": 3.0493, "step": 1515 }, { "epoch": 2.97, "learning_rate": 3.5225048923679055e-06, "loss": 3.0607, "step": 1516 }, { "epoch": 2.97, "learning_rate": 3.326810176125244e-06, "loss": 3.0561, "step": 1517 }, { "epoch": 2.97, "learning_rate": 3.1311154598825827e-06, "loss": 3.0601, "step": 1518 }, { "epoch": 2.97, "learning_rate": 2.9354207436399214e-06, "loss": 3.0527, "step": 1519 }, { "epoch": 2.97, "learning_rate": 2.73972602739726e-06, "loss": 3.0516, "step": 1520 }, { "epoch": 2.98, "learning_rate": 2.5440313111545986e-06, "loss": 3.0704, "step": 1521 }, { "epoch": 2.98, "learning_rate": 2.348336594911937e-06, "loss": 3.0557, "step": 1522 }, { "epoch": 2.98, "learning_rate": 2.1526418786692755e-06, "loss": 3.0465, "step": 1523 }, { "epoch": 2.98, "learning_rate": 1.956947162426614e-06, "loss": 3.06, "step": 1524 }, { "epoch": 2.98, "learning_rate": 1.7612524461839527e-06, "loss": 3.0631, "step": 1525 }, { "epoch": 2.99, "learning_rate": 1.5655577299412914e-06, "loss": 3.0715, "step": 1526 }, { "epoch": 2.99, "learning_rate": 1.36986301369863e-06, "loss": 3.0547, "step": 1527 }, { "epoch": 2.99, "learning_rate": 1.1741682974559684e-06, "loss": 3.0642, "step": 1528 }, { "epoch": 2.99, "learning_rate": 9.78473581213307e-07, "loss": 3.0562, "step": 1529 }, { "epoch": 2.99, "learning_rate": 7.827788649706457e-07, "loss": 3.0795, "step": 1530 }, { "epoch": 3.0, "learning_rate": 5.870841487279842e-07, "loss": 3.0513, "step": 1531 }, { "epoch": 3.0, "learning_rate": 3.9138943248532284e-07, "loss": 3.0782, "step": 1532 }, { "epoch": 3.0, "learning_rate": 1.9569471624266142e-07, "loss": 3.0541, "step": 1533 }, { "epoch": 3.0, "eval_loss": 14.038721084594727, "eval_runtime": 520.0539, "eval_samples_per_second": 30.441, "eval_steps_per_second": 0.069, "eval_wer": 0.9999636220770339, "step": 1533 }, { "epoch": 3.0, "step": 1533, "total_flos": 4.180774836907213e+19, "train_loss": 3.1717406544744464, "train_runtime": 43485.0285, "train_samples_per_second": 31.616, "train_steps_per_second": 0.035 } ], "max_steps": 1533, "num_train_epochs": 3, "total_flos": 4.180774836907213e+19, "trial_name": null, "trial_params": null }