{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999774113395076, "eval_steps": 1000, "global_step": 33201, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00903546419697312, "grad_norm": 7.14754056930542, "learning_rate": 6.8599999999999995e-06, "loss": 1.8388, "step": 100 }, { "epoch": 0.01807092839394624, "grad_norm": 6.974137783050537, "learning_rate": 1.386e-05, "loss": 1.2229, "step": 200 }, { "epoch": 0.02710639259091936, "grad_norm": 6.240023612976074, "learning_rate": 2.0859999999999997e-05, "loss": 0.9997, "step": 300 }, { "epoch": 0.03614185678789248, "grad_norm": 5.526381492614746, "learning_rate": 2.7859999999999998e-05, "loss": 0.8603, "step": 400 }, { "epoch": 0.0451773209848656, "grad_norm": 6.0143938064575195, "learning_rate": 3.4859999999999995e-05, "loss": 0.7584, "step": 500 }, { "epoch": 0.05421278518183872, "grad_norm": 5.181371212005615, "learning_rate": 4.1859999999999996e-05, "loss": 0.6841, "step": 600 }, { "epoch": 0.06324824937881184, "grad_norm": 4.984240531921387, "learning_rate": 4.885999999999999e-05, "loss": 0.644, "step": 700 }, { "epoch": 0.07228371357578496, "grad_norm": 5.0428266525268555, "learning_rate": 5.586e-05, "loss": 0.6025, "step": 800 }, { "epoch": 0.08131917777275807, "grad_norm": 4.736971378326416, "learning_rate": 6.285999999999999e-05, "loss": 0.5776, "step": 900 }, { "epoch": 0.0903546419697312, "grad_norm": 4.559544086456299, "learning_rate": 6.986e-05, "loss": 0.551, "step": 1000 }, { "epoch": 0.0903546419697312, "eval_loss": 0.2710006833076477, "eval_runtime": 89.4841, "eval_samples_per_second": 47.64, "eval_steps_per_second": 0.749, "eval_wer": 0.26942939113802994, "step": 1000 }, { "epoch": 0.09939010616670431, "grad_norm": 4.705749988555908, "learning_rate": 6.978696313779074e-05, "loss": 0.5356, "step": 1100 }, { "epoch": 0.10842557036367743, "grad_norm": 4.287839412689209, "learning_rate": 6.9569578584516e-05, "loss": 0.5058, "step": 1200 }, { "epoch": 0.11746103456065056, "grad_norm": 3.9484827518463135, "learning_rate": 6.935219403124125e-05, "loss": 0.4863, "step": 1300 }, { "epoch": 0.12649649875762367, "grad_norm": 4.207424640655518, "learning_rate": 6.913480947796651e-05, "loss": 0.468, "step": 1400 }, { "epoch": 0.1355319629545968, "grad_norm": 4.078378200531006, "learning_rate": 6.891742492469178e-05, "loss": 0.4522, "step": 1500 }, { "epoch": 0.14456742715156992, "grad_norm": 3.6946797370910645, "learning_rate": 6.870004037141703e-05, "loss": 0.4396, "step": 1600 }, { "epoch": 0.15360289134854302, "grad_norm": 3.742530345916748, "learning_rate": 6.848265581814229e-05, "loss": 0.4338, "step": 1700 }, { "epoch": 0.16263835554551614, "grad_norm": 4.0423078536987305, "learning_rate": 6.826527126486755e-05, "loss": 0.4232, "step": 1800 }, { "epoch": 0.17167381974248927, "grad_norm": 3.7348833084106445, "learning_rate": 6.80478867115928e-05, "loss": 0.4144, "step": 1900 }, { "epoch": 0.1807092839394624, "grad_norm": 3.4496703147888184, "learning_rate": 6.783050215831805e-05, "loss": 0.4016, "step": 2000 }, { "epoch": 0.1807092839394624, "eval_loss": 0.20093074440956116, "eval_runtime": 89.3016, "eval_samples_per_second": 47.737, "eval_steps_per_second": 0.75, "eval_wer": 0.20614174901710763, "step": 2000 }, { "epoch": 0.18974474813643552, "grad_norm": 3.3866732120513916, "learning_rate": 6.761311760504332e-05, "loss": 0.3858, "step": 2100 }, { "epoch": 0.19878021233340862, "grad_norm": 4.071012496948242, "learning_rate": 6.739573305176857e-05, "loss": 0.3875, "step": 2200 }, { "epoch": 0.20781567653038174, "grad_norm": 3.373796224594116, "learning_rate": 6.717834849849383e-05, "loss": 0.3795, "step": 2300 }, { "epoch": 0.21685114072735487, "grad_norm": 3.105025291442871, "learning_rate": 6.696096394521908e-05, "loss": 0.3787, "step": 2400 }, { "epoch": 0.225886604924328, "grad_norm": 3.8723206520080566, "learning_rate": 6.674357939194434e-05, "loss": 0.3716, "step": 2500 }, { "epoch": 0.23492206912130112, "grad_norm": 3.2043449878692627, "learning_rate": 6.65261948386696e-05, "loss": 0.3662, "step": 2600 }, { "epoch": 0.24395753331827422, "grad_norm": 3.2647688388824463, "learning_rate": 6.631098413092761e-05, "loss": 0.3567, "step": 2700 }, { "epoch": 0.25299299751524734, "grad_norm": 3.255851984024048, "learning_rate": 6.609359957765287e-05, "loss": 0.3541, "step": 2800 }, { "epoch": 0.26202846171222044, "grad_norm": 3.103607177734375, "learning_rate": 6.587621502437812e-05, "loss": 0.3551, "step": 2900 }, { "epoch": 0.2710639259091936, "grad_norm": 3.7592177391052246, "learning_rate": 6.565883047110337e-05, "loss": 0.3449, "step": 3000 }, { "epoch": 0.2710639259091936, "eval_loss": 0.17070473730564117, "eval_runtime": 88.3474, "eval_samples_per_second": 48.253, "eval_steps_per_second": 0.758, "eval_wer": 0.17702688343427903, "step": 3000 }, { "epoch": 0.2800993901061667, "grad_norm": 2.7764692306518555, "learning_rate": 6.544144591782863e-05, "loss": 0.3477, "step": 3100 }, { "epoch": 0.28913485430313984, "grad_norm": 2.980421543121338, "learning_rate": 6.522406136455388e-05, "loss": 0.3367, "step": 3200 }, { "epoch": 0.29817031850011294, "grad_norm": 3.0955636501312256, "learning_rate": 6.500667681127915e-05, "loss": 0.3347, "step": 3300 }, { "epoch": 0.30720578269708604, "grad_norm": 2.942781925201416, "learning_rate": 6.47892922580044e-05, "loss": 0.3363, "step": 3400 }, { "epoch": 0.3162412468940592, "grad_norm": 2.7990803718566895, "learning_rate": 6.457190770472966e-05, "loss": 0.3324, "step": 3500 }, { "epoch": 0.3252767110910323, "grad_norm": 3.0384480953216553, "learning_rate": 6.435452315145492e-05, "loss": 0.3273, "step": 3600 }, { "epoch": 0.33431217528800544, "grad_norm": 2.8415443897247314, "learning_rate": 6.413713859818017e-05, "loss": 0.3231, "step": 3700 }, { "epoch": 0.34334763948497854, "grad_norm": 2.706265687942505, "learning_rate": 6.391975404490544e-05, "loss": 0.3224, "step": 3800 }, { "epoch": 0.35238310368195164, "grad_norm": 2.77278995513916, "learning_rate": 6.370236949163069e-05, "loss": 0.32, "step": 3900 }, { "epoch": 0.3614185678789248, "grad_norm": 2.9242990016937256, "learning_rate": 6.348498493835595e-05, "loss": 0.3147, "step": 4000 }, { "epoch": 0.3614185678789248, "eval_loss": 0.1588164120912552, "eval_runtime": 89.3911, "eval_samples_per_second": 47.689, "eval_steps_per_second": 0.75, "eval_wer": 0.1649984061204973, "step": 4000 }, { "epoch": 0.3704540320758979, "grad_norm": 3.196282148361206, "learning_rate": 6.32676003850812e-05, "loss": 0.3112, "step": 4100 }, { "epoch": 0.37948949627287104, "grad_norm": 3.880776882171631, "learning_rate": 6.305021583180646e-05, "loss": 0.3154, "step": 4200 }, { "epoch": 0.38852496046984414, "grad_norm": 2.7569668292999268, "learning_rate": 6.283283127853171e-05, "loss": 0.3108, "step": 4300 }, { "epoch": 0.39756042466681724, "grad_norm": 2.951040267944336, "learning_rate": 6.261544672525697e-05, "loss": 0.3093, "step": 4400 }, { "epoch": 0.4065958888637904, "grad_norm": 2.667750358581543, "learning_rate": 6.239806217198222e-05, "loss": 0.3082, "step": 4500 }, { "epoch": 0.4156313530607635, "grad_norm": 2.872540235519409, "learning_rate": 6.218067761870749e-05, "loss": 0.3005, "step": 4600 }, { "epoch": 0.42466681725773664, "grad_norm": 3.15378999710083, "learning_rate": 6.196329306543275e-05, "loss": 0.2994, "step": 4700 }, { "epoch": 0.43370228145470974, "grad_norm": 2.879260301589966, "learning_rate": 6.1745908512158e-05, "loss": 0.2959, "step": 4800 }, { "epoch": 0.44273774565168283, "grad_norm": 2.811612367630005, "learning_rate": 6.152852395888326e-05, "loss": 0.2974, "step": 4900 }, { "epoch": 0.451773209848656, "grad_norm": 2.7307889461517334, "learning_rate": 6.131113940560851e-05, "loss": 0.2936, "step": 5000 }, { "epoch": 0.451773209848656, "eval_loss": 0.1471971571445465, "eval_runtime": 88.7501, "eval_samples_per_second": 48.034, "eval_steps_per_second": 0.755, "eval_wer": 0.1551376049304006, "step": 5000 }, { "epoch": 0.4608086740456291, "grad_norm": 2.734050750732422, "learning_rate": 6.109375485233378e-05, "loss": 0.2917, "step": 5100 }, { "epoch": 0.46984413824260224, "grad_norm": 2.650491952896118, "learning_rate": 6.0876370299059026e-05, "loss": 0.2929, "step": 5200 }, { "epoch": 0.47887960243957534, "grad_norm": 2.519413709640503, "learning_rate": 6.065898574578429e-05, "loss": 0.2919, "step": 5300 }, { "epoch": 0.48791506663654843, "grad_norm": 2.6014676094055176, "learning_rate": 6.0441601192509545e-05, "loss": 0.2811, "step": 5400 }, { "epoch": 0.4969505308335216, "grad_norm": 2.7325778007507324, "learning_rate": 6.02242166392348e-05, "loss": 0.2878, "step": 5500 }, { "epoch": 0.5059859950304947, "grad_norm": 2.636491298675537, "learning_rate": 6.000683208596006e-05, "loss": 0.2821, "step": 5600 }, { "epoch": 0.5150214592274678, "grad_norm": 2.6922860145568848, "learning_rate": 5.9789447532685315e-05, "loss": 0.2828, "step": 5700 }, { "epoch": 0.5240569234244409, "grad_norm": 2.4657480716705322, "learning_rate": 5.957206297941057e-05, "loss": 0.2845, "step": 5800 }, { "epoch": 0.5330923876214141, "grad_norm": 2.6574530601501465, "learning_rate": 5.935467842613583e-05, "loss": 0.28, "step": 5900 }, { "epoch": 0.5421278518183872, "grad_norm": 2.769786834716797, "learning_rate": 5.913729387286109e-05, "loss": 0.2758, "step": 6000 }, { "epoch": 0.5421278518183872, "eval_loss": 0.1405603438615799, "eval_runtime": 90.2531, "eval_samples_per_second": 47.234, "eval_steps_per_second": 0.742, "eval_wer": 0.14793326957815323, "step": 6000 }, { "epoch": 0.5511633160153603, "grad_norm": 2.6292548179626465, "learning_rate": 5.891990931958634e-05, "loss": 0.2744, "step": 6100 }, { "epoch": 0.5601987802123334, "grad_norm": 2.536770820617676, "learning_rate": 5.87025247663116e-05, "loss": 0.2735, "step": 6200 }, { "epoch": 0.5692342444093065, "grad_norm": 2.3336434364318848, "learning_rate": 5.848514021303685e-05, "loss": 0.2764, "step": 6300 }, { "epoch": 0.5782697086062797, "grad_norm": 2.677401542663574, "learning_rate": 5.8267755659762116e-05, "loss": 0.2761, "step": 6400 }, { "epoch": 0.5873051728032528, "grad_norm": 2.634038209915161, "learning_rate": 5.805037110648737e-05, "loss": 0.2694, "step": 6500 }, { "epoch": 0.5963406370002259, "grad_norm": 2.643404245376587, "learning_rate": 5.783298655321262e-05, "loss": 0.263, "step": 6600 }, { "epoch": 0.605376101197199, "grad_norm": 2.2921056747436523, "learning_rate": 5.7615601999937885e-05, "loss": 0.2738, "step": 6700 }, { "epoch": 0.6144115653941721, "grad_norm": 2.398670196533203, "learning_rate": 5.739821744666314e-05, "loss": 0.2682, "step": 6800 }, { "epoch": 0.6234470295911453, "grad_norm": 2.447571277618408, "learning_rate": 5.71808328933884e-05, "loss": 0.2653, "step": 6900 }, { "epoch": 0.6324824937881184, "grad_norm": 2.270413637161255, "learning_rate": 5.6963448340113654e-05, "loss": 0.2663, "step": 7000 }, { "epoch": 0.6324824937881184, "eval_loss": 0.13218513131141663, "eval_runtime": 89.0739, "eval_samples_per_second": 47.859, "eval_steps_per_second": 0.752, "eval_wer": 0.13926256508341303, "step": 7000 }, { "epoch": 0.6415179579850915, "grad_norm": 2.406534433364868, "learning_rate": 5.674606378683892e-05, "loss": 0.2701, "step": 7100 }, { "epoch": 0.6505534221820646, "grad_norm": 2.3954741954803467, "learning_rate": 5.652867923356417e-05, "loss": 0.2661, "step": 7200 }, { "epoch": 0.6595888863790377, "grad_norm": 2.3920400142669678, "learning_rate": 5.631129468028943e-05, "loss": 0.2662, "step": 7300 }, { "epoch": 0.6686243505760109, "grad_norm": 2.6168298721313477, "learning_rate": 5.6096083972547435e-05, "loss": 0.259, "step": 7400 }, { "epoch": 0.677659814772984, "grad_norm": 2.351517915725708, "learning_rate": 5.587869941927269e-05, "loss": 0.2531, "step": 7500 }, { "epoch": 0.6866952789699571, "grad_norm": 2.4925589561462402, "learning_rate": 5.566131486599794e-05, "loss": 0.2584, "step": 7600 }, { "epoch": 0.6957307431669302, "grad_norm": 2.465437650680542, "learning_rate": 5.5443930312723204e-05, "loss": 0.2572, "step": 7700 }, { "epoch": 0.7047662073639033, "grad_norm": 2.383103370666504, "learning_rate": 5.522654575944846e-05, "loss": 0.2541, "step": 7800 }, { "epoch": 0.7138016715608765, "grad_norm": 2.254746675491333, "learning_rate": 5.5009161206173716e-05, "loss": 0.2551, "step": 7900 }, { "epoch": 0.7228371357578496, "grad_norm": 2.601073980331421, "learning_rate": 5.479177665289897e-05, "loss": 0.2613, "step": 8000 }, { "epoch": 0.7228371357578496, "eval_loss": 0.1282639354467392, "eval_runtime": 89.4564, "eval_samples_per_second": 47.654, "eval_steps_per_second": 0.749, "eval_wer": 0.1401763893316332, "step": 8000 }, { "epoch": 0.7318725999548227, "grad_norm": 2.6043508052825928, "learning_rate": 5.4574392099624236e-05, "loss": 0.2527, "step": 8100 }, { "epoch": 0.7409080641517958, "grad_norm": 2.4817826747894287, "learning_rate": 5.4357007546349486e-05, "loss": 0.2531, "step": 8200 }, { "epoch": 0.7499435283487689, "grad_norm": 2.2043120861053467, "learning_rate": 5.413962299307475e-05, "loss": 0.2508, "step": 8300 }, { "epoch": 0.7589789925457421, "grad_norm": 2.436621904373169, "learning_rate": 5.39222384398e-05, "loss": 0.2524, "step": 8400 }, { "epoch": 0.7680144567427152, "grad_norm": 2.2948272228240967, "learning_rate": 5.3704853886525255e-05, "loss": 0.2511, "step": 8500 }, { "epoch": 0.7770499209396883, "grad_norm": 2.516068935394287, "learning_rate": 5.348746933325052e-05, "loss": 0.2503, "step": 8600 }, { "epoch": 0.7860853851366614, "grad_norm": 2.286062002182007, "learning_rate": 5.327008477997577e-05, "loss": 0.249, "step": 8700 }, { "epoch": 0.7951208493336345, "grad_norm": 2.2099480628967285, "learning_rate": 5.305270022670103e-05, "loss": 0.2476, "step": 8800 }, { "epoch": 0.8041563135306077, "grad_norm": 2.279094934463501, "learning_rate": 5.283531567342629e-05, "loss": 0.2477, "step": 8900 }, { "epoch": 0.8131917777275808, "grad_norm": 2.5608932971954346, "learning_rate": 5.2617931120151544e-05, "loss": 0.2491, "step": 9000 }, { "epoch": 0.8131917777275808, "eval_loss": 0.12159302085638046, "eval_runtime": 88.1859, "eval_samples_per_second": 48.341, "eval_steps_per_second": 0.76, "eval_wer": 0.1319094676442461, "step": 9000 }, { "epoch": 0.8222272419245539, "grad_norm": 2.8134467601776123, "learning_rate": 5.24005465668768e-05, "loss": 0.2393, "step": 9100 }, { "epoch": 0.831262706121527, "grad_norm": 2.109177589416504, "learning_rate": 5.218316201360206e-05, "loss": 0.247, "step": 9200 }, { "epoch": 0.8402981703185001, "grad_norm": 2.333599090576172, "learning_rate": 5.196577746032731e-05, "loss": 0.2396, "step": 9300 }, { "epoch": 0.8493336345154733, "grad_norm": 2.263291120529175, "learning_rate": 5.174839290705257e-05, "loss": 0.2454, "step": 9400 }, { "epoch": 0.8583690987124464, "grad_norm": 2.1932239532470703, "learning_rate": 5.153100835377783e-05, "loss": 0.2441, "step": 9500 }, { "epoch": 0.8674045629094195, "grad_norm": 2.3545312881469727, "learning_rate": 5.131362380050308e-05, "loss": 0.2388, "step": 9600 }, { "epoch": 0.8764400271063926, "grad_norm": 1.9302074909210205, "learning_rate": 5.1096239247228345e-05, "loss": 0.2386, "step": 9700 }, { "epoch": 0.8854754913033657, "grad_norm": 2.2227907180786133, "learning_rate": 5.0878854693953595e-05, "loss": 0.245, "step": 9800 }, { "epoch": 0.8945109555003389, "grad_norm": 2.0656354427337646, "learning_rate": 5.066147014067886e-05, "loss": 0.2341, "step": 9900 }, { "epoch": 0.903546419697312, "grad_norm": 2.062394142150879, "learning_rate": 5.0444085587404114e-05, "loss": 0.238, "step": 10000 }, { "epoch": 0.903546419697312, "eval_loss": 0.11923061311244965, "eval_runtime": 88.4115, "eval_samples_per_second": 48.218, "eval_steps_per_second": 0.758, "eval_wer": 0.1290829879927744, "step": 10000 }, { "epoch": 0.9125818838942851, "grad_norm": 2.264702081680298, "learning_rate": 5.022670103412938e-05, "loss": 0.2386, "step": 10100 }, { "epoch": 0.9216173480912582, "grad_norm": 2.0281338691711426, "learning_rate": 5.000931648085463e-05, "loss": 0.2374, "step": 10200 }, { "epoch": 0.9306528122882313, "grad_norm": 2.0940310955047607, "learning_rate": 4.9791931927579883e-05, "loss": 0.2349, "step": 10300 }, { "epoch": 0.9396882764852045, "grad_norm": 2.1335864067077637, "learning_rate": 4.957454737430514e-05, "loss": 0.2326, "step": 10400 }, { "epoch": 0.9487237406821776, "grad_norm": 2.3644163608551025, "learning_rate": 4.9357162821030396e-05, "loss": 0.2314, "step": 10500 }, { "epoch": 0.9577592048791507, "grad_norm": 2.029175043106079, "learning_rate": 4.91419521132884e-05, "loss": 0.2363, "step": 10600 }, { "epoch": 0.9667946690761238, "grad_norm": 2.630101203918457, "learning_rate": 4.8924567560013664e-05, "loss": 0.2298, "step": 10700 }, { "epoch": 0.9758301332730969, "grad_norm": 2.356724500656128, "learning_rate": 4.870718300673891e-05, "loss": 0.2269, "step": 10800 }, { "epoch": 0.9848655974700701, "grad_norm": 2.1543145179748535, "learning_rate": 4.8489798453464176e-05, "loss": 0.2377, "step": 10900 }, { "epoch": 0.9939010616670432, "grad_norm": 2.399824857711792, "learning_rate": 4.827241390018943e-05, "loss": 0.2287, "step": 11000 }, { "epoch": 0.9939010616670432, "eval_loss": 0.11506820470094681, "eval_runtime": 89.5431, "eval_samples_per_second": 47.608, "eval_steps_per_second": 0.748, "eval_wer": 0.1275528636701732, "step": 11000 }, { "epoch": 1.0028913485430313, "grad_norm": 2.18354868888855, "learning_rate": 4.805502934691468e-05, "loss": 0.2129, "step": 11100 }, { "epoch": 1.0119268127400045, "grad_norm": 2.018084764480591, "learning_rate": 4.7837644793639945e-05, "loss": 0.1792, "step": 11200 }, { "epoch": 1.0209622769369777, "grad_norm": 2.1397042274475098, "learning_rate": 4.76202602403652e-05, "loss": 0.1794, "step": 11300 }, { "epoch": 1.0299977411339507, "grad_norm": 1.925986886024475, "learning_rate": 4.740287568709046e-05, "loss": 0.1816, "step": 11400 }, { "epoch": 1.039033205330924, "grad_norm": 2.0704362392425537, "learning_rate": 4.7185491133815715e-05, "loss": 0.1767, "step": 11500 }, { "epoch": 1.048068669527897, "grad_norm": 1.8338583707809448, "learning_rate": 4.696810658054098e-05, "loss": 0.1767, "step": 11600 }, { "epoch": 1.05710413372487, "grad_norm": 1.9655053615570068, "learning_rate": 4.675072202726623e-05, "loss": 0.1814, "step": 11700 }, { "epoch": 1.0661395979218433, "grad_norm": 1.880100965499878, "learning_rate": 4.653333747399149e-05, "loss": 0.1786, "step": 11800 }, { "epoch": 1.0751750621188163, "grad_norm": 2.52089524269104, "learning_rate": 4.631595292071674e-05, "loss": 0.1796, "step": 11900 }, { "epoch": 1.0842105263157895, "grad_norm": 2.179574728012085, "learning_rate": 4.6098568367441997e-05, "loss": 0.1798, "step": 12000 }, { "epoch": 1.0842105263157895, "eval_loss": 0.11312589794397354, "eval_runtime": 89.9356, "eval_samples_per_second": 47.401, "eval_steps_per_second": 0.745, "eval_wer": 0.12343002868983105, "step": 12000 }, { "epoch": 1.0932459905127625, "grad_norm": 2.3577959537506104, "learning_rate": 4.588118381416726e-05, "loss": 0.1834, "step": 12100 }, { "epoch": 1.1022814547097357, "grad_norm": 2.2626988887786865, "learning_rate": 4.566379926089251e-05, "loss": 0.1792, "step": 12200 }, { "epoch": 1.111316918906709, "grad_norm": 2.0373926162719727, "learning_rate": 4.544641470761777e-05, "loss": 0.1773, "step": 12300 }, { "epoch": 1.120352383103682, "grad_norm": 1.8774733543395996, "learning_rate": 4.522903015434303e-05, "loss": 0.1763, "step": 12400 }, { "epoch": 1.1293878473006551, "grad_norm": 2.0867061614990234, "learning_rate": 4.5011645601068285e-05, "loss": 0.1775, "step": 12500 }, { "epoch": 1.138423311497628, "grad_norm": 1.822313904762268, "learning_rate": 4.479426104779354e-05, "loss": 0.182, "step": 12600 }, { "epoch": 1.1474587756946013, "grad_norm": 1.9483801126480103, "learning_rate": 4.4579050340051546e-05, "loss": 0.1801, "step": 12700 }, { "epoch": 1.1564942398915745, "grad_norm": 1.7819561958312988, "learning_rate": 4.436166578677681e-05, "loss": 0.175, "step": 12800 }, { "epoch": 1.1655297040885475, "grad_norm": 2.2512149810791016, "learning_rate": 4.414428123350206e-05, "loss": 0.1771, "step": 12900 }, { "epoch": 1.1745651682855207, "grad_norm": 2.0755016803741455, "learning_rate": 4.3926896680227315e-05, "loss": 0.1791, "step": 13000 }, { "epoch": 1.1745651682855207, "eval_loss": 0.1113397553563118, "eval_runtime": 89.8896, "eval_samples_per_second": 47.425, "eval_steps_per_second": 0.745, "eval_wer": 0.11858463500159389, "step": 13000 }, { "epoch": 1.1836006324824937, "grad_norm": 1.8246344327926636, "learning_rate": 4.370951212695258e-05, "loss": 0.1826, "step": 13100 }, { "epoch": 1.192636096679467, "grad_norm": 2.0341689586639404, "learning_rate": 4.349212757367783e-05, "loss": 0.1795, "step": 13200 }, { "epoch": 1.2016715608764401, "grad_norm": 1.8964906930923462, "learning_rate": 4.327474302040309e-05, "loss": 0.1777, "step": 13300 }, { "epoch": 1.210707025073413, "grad_norm": 1.9983662366867065, "learning_rate": 4.305735846712835e-05, "loss": 0.1777, "step": 13400 }, { "epoch": 1.2197424892703863, "grad_norm": 1.9901524782180786, "learning_rate": 4.2839973913853604e-05, "loss": 0.1745, "step": 13500 }, { "epoch": 1.2287779534673593, "grad_norm": 2.0231523513793945, "learning_rate": 4.262258936057886e-05, "loss": 0.183, "step": 13600 }, { "epoch": 1.2378134176643325, "grad_norm": 2.097205877304077, "learning_rate": 4.240520480730412e-05, "loss": 0.1795, "step": 13700 }, { "epoch": 1.2468488818613057, "grad_norm": 1.8367393016815186, "learning_rate": 4.218782025402937e-05, "loss": 0.1746, "step": 13800 }, { "epoch": 1.2558843460582787, "grad_norm": 2.2997806072235107, "learning_rate": 4.197043570075463e-05, "loss": 0.1781, "step": 13900 }, { "epoch": 1.264919810255252, "grad_norm": 1.9972946643829346, "learning_rate": 4.1753051147479886e-05, "loss": 0.1787, "step": 14000 }, { "epoch": 1.264919810255252, "eval_loss": 0.10852447897195816, "eval_runtime": 88.2121, "eval_samples_per_second": 48.327, "eval_steps_per_second": 0.76, "eval_wer": 0.11862713845499948, "step": 14000 }, { "epoch": 1.273955274452225, "grad_norm": 1.9734628200531006, "learning_rate": 4.153566659420514e-05, "loss": 0.178, "step": 14100 }, { "epoch": 1.282990738649198, "grad_norm": 2.0544159412384033, "learning_rate": 4.1318282040930405e-05, "loss": 0.1704, "step": 14200 }, { "epoch": 1.2920262028461713, "grad_norm": 1.8968679904937744, "learning_rate": 4.1100897487655655e-05, "loss": 0.1772, "step": 14300 }, { "epoch": 1.3010616670431443, "grad_norm": 1.8103258609771729, "learning_rate": 4.088351293438092e-05, "loss": 0.179, "step": 14400 }, { "epoch": 1.3100971312401175, "grad_norm": 1.9365414381027222, "learning_rate": 4.0666128381106174e-05, "loss": 0.1775, "step": 14500 }, { "epoch": 1.3191325954370905, "grad_norm": 1.9121586084365845, "learning_rate": 4.044874382783143e-05, "loss": 0.1772, "step": 14600 }, { "epoch": 1.3281680596340637, "grad_norm": 2.0764715671539307, "learning_rate": 4.023135927455669e-05, "loss": 0.1719, "step": 14700 }, { "epoch": 1.337203523831037, "grad_norm": 1.9687429666519165, "learning_rate": 4.0013974721281944e-05, "loss": 0.1735, "step": 14800 }, { "epoch": 1.34623898802801, "grad_norm": 2.0690395832061768, "learning_rate": 3.97965901680072e-05, "loss": 0.1797, "step": 14900 }, { "epoch": 1.355274452224983, "grad_norm": 2.121548891067505, "learning_rate": 3.9579205614732456e-05, "loss": 0.1771, "step": 15000 }, { "epoch": 1.355274452224983, "eval_loss": 0.10677234828472137, "eval_runtime": 88.6946, "eval_samples_per_second": 48.064, "eval_steps_per_second": 0.755, "eval_wer": 0.11541812772287749, "step": 15000 }, { "epoch": 1.364309916421956, "grad_norm": 2.3323662281036377, "learning_rate": 3.936182106145772e-05, "loss": 0.173, "step": 15100 }, { "epoch": 1.3733453806189293, "grad_norm": 2.262308359146118, "learning_rate": 3.914443650818297e-05, "loss": 0.1723, "step": 15200 }, { "epoch": 1.3823808448159025, "grad_norm": 2.0854151248931885, "learning_rate": 3.892705195490823e-05, "loss": 0.1753, "step": 15300 }, { "epoch": 1.3914163090128755, "grad_norm": 2.0246262550354004, "learning_rate": 3.870966740163348e-05, "loss": 0.1742, "step": 15400 }, { "epoch": 1.4004517732098487, "grad_norm": 2.0298593044281006, "learning_rate": 3.8492282848358745e-05, "loss": 0.1727, "step": 15500 }, { "epoch": 1.4094872374068217, "grad_norm": 1.8497194051742554, "learning_rate": 3.8274898295084e-05, "loss": 0.1738, "step": 15600 }, { "epoch": 1.418522701603795, "grad_norm": 2.052497386932373, "learning_rate": 3.805751374180925e-05, "loss": 0.1719, "step": 15700 }, { "epoch": 1.427558165800768, "grad_norm": 1.948426604270935, "learning_rate": 3.7840129188534514e-05, "loss": 0.1692, "step": 15800 }, { "epoch": 1.436593629997741, "grad_norm": 2.078310012817383, "learning_rate": 3.762274463525977e-05, "loss": 0.1736, "step": 15900 }, { "epoch": 1.4456290941947143, "grad_norm": 1.8413662910461426, "learning_rate": 3.740536008198503e-05, "loss": 0.1728, "step": 16000 }, { "epoch": 1.4456290941947143, "eval_loss": 0.10456942021846771, "eval_runtime": 88.873, "eval_samples_per_second": 47.967, "eval_steps_per_second": 0.754, "eval_wer": 0.11354797577303156, "step": 16000 }, { "epoch": 1.4546645583916873, "grad_norm": 1.894006371498108, "learning_rate": 3.7187975528710283e-05, "loss": 0.1737, "step": 16100 }, { "epoch": 1.4637000225886605, "grad_norm": 2.0090203285217285, "learning_rate": 3.6970590975435547e-05, "loss": 0.1723, "step": 16200 }, { "epoch": 1.4727354867856337, "grad_norm": 1.896735668182373, "learning_rate": 3.6753206422160796e-05, "loss": 0.1744, "step": 16300 }, { "epoch": 1.4817709509826067, "grad_norm": 1.9422425031661987, "learning_rate": 3.653582186888606e-05, "loss": 0.1662, "step": 16400 }, { "epoch": 1.49080641517958, "grad_norm": 2.205997943878174, "learning_rate": 3.6318437315611316e-05, "loss": 0.1726, "step": 16500 }, { "epoch": 1.4998418793765529, "grad_norm": 2.2248659133911133, "learning_rate": 3.6101052762336565e-05, "loss": 0.1739, "step": 16600 }, { "epoch": 1.508877343573526, "grad_norm": 1.9154504537582397, "learning_rate": 3.588366820906183e-05, "loss": 0.1751, "step": 16700 }, { "epoch": 1.5179128077704993, "grad_norm": 3.7510364055633545, "learning_rate": 3.566845750131983e-05, "loss": 0.1691, "step": 16800 }, { "epoch": 1.5269482719674723, "grad_norm": 1.9326035976409912, "learning_rate": 3.545107294804509e-05, "loss": 0.1736, "step": 16900 }, { "epoch": 1.5359837361644455, "grad_norm": 2.1534535884857178, "learning_rate": 3.5233688394770345e-05, "loss": 0.1714, "step": 17000 }, { "epoch": 1.5359837361644455, "eval_loss": 0.10288450121879578, "eval_runtime": 88.7852, "eval_samples_per_second": 48.015, "eval_steps_per_second": 0.755, "eval_wer": 0.11522686218255233, "step": 17000 }, { "epoch": 1.5450192003614185, "grad_norm": 2.0503385066986084, "learning_rate": 3.50163038414956e-05, "loss": 0.1697, "step": 17100 }, { "epoch": 1.5540546645583917, "grad_norm": 2.1852426528930664, "learning_rate": 3.479891928822086e-05, "loss": 0.1687, "step": 17200 }, { "epoch": 1.563090128755365, "grad_norm": 1.9237619638442993, "learning_rate": 3.4581534734946115e-05, "loss": 0.1699, "step": 17300 }, { "epoch": 1.572125592952338, "grad_norm": 1.9139324426651, "learning_rate": 3.436415018167137e-05, "loss": 0.1721, "step": 17400 }, { "epoch": 1.581161057149311, "grad_norm": 1.8762294054031372, "learning_rate": 3.414676562839663e-05, "loss": 0.1682, "step": 17500 }, { "epoch": 1.590196521346284, "grad_norm": 1.6753225326538086, "learning_rate": 3.392938107512189e-05, "loss": 0.1648, "step": 17600 }, { "epoch": 1.5992319855432573, "grad_norm": 2.4316673278808594, "learning_rate": 3.371199652184715e-05, "loss": 0.1701, "step": 17700 }, { "epoch": 1.6082674497402305, "grad_norm": 1.9219187498092651, "learning_rate": 3.34946119685724e-05, "loss": 0.1669, "step": 17800 }, { "epoch": 1.6173029139372035, "grad_norm": 1.6715503931045532, "learning_rate": 3.327722741529766e-05, "loss": 0.1675, "step": 17900 }, { "epoch": 1.6263383781341767, "grad_norm": 1.9405934810638428, "learning_rate": 3.3059842862022916e-05, "loss": 0.1706, "step": 18000 }, { "epoch": 1.6263383781341767, "eval_loss": 0.10067987442016602, "eval_runtime": 89.3754, "eval_samples_per_second": 47.698, "eval_steps_per_second": 0.75, "eval_wer": 0.11174157900329401, "step": 18000 }, { "epoch": 1.6353738423311497, "grad_norm": 2.1481971740722656, "learning_rate": 3.284245830874817e-05, "loss": 0.1668, "step": 18100 }, { "epoch": 1.644409306528123, "grad_norm": 2.29831600189209, "learning_rate": 3.262507375547343e-05, "loss": 0.1683, "step": 18200 }, { "epoch": 1.653444770725096, "grad_norm": 1.698500633239746, "learning_rate": 3.2407689202198685e-05, "loss": 0.1651, "step": 18300 }, { "epoch": 1.662480234922069, "grad_norm": 2.0010197162628174, "learning_rate": 3.219030464892394e-05, "loss": 0.1647, "step": 18400 }, { "epoch": 1.671515699119042, "grad_norm": 1.8577830791473389, "learning_rate": 3.19729200956492e-05, "loss": 0.1649, "step": 18500 }, { "epoch": 1.6805511633160153, "grad_norm": 2.0325686931610107, "learning_rate": 3.175553554237446e-05, "loss": 0.1664, "step": 18600 }, { "epoch": 1.6895866275129885, "grad_norm": 1.8574236631393433, "learning_rate": 3.153815098909972e-05, "loss": 0.1646, "step": 18700 }, { "epoch": 1.6986220917099617, "grad_norm": 1.94573175907135, "learning_rate": 3.1320766435824974e-05, "loss": 0.1623, "step": 18800 }, { "epoch": 1.7076575559069347, "grad_norm": 1.9908078908920288, "learning_rate": 3.1103381882550224e-05, "loss": 0.1632, "step": 18900 }, { "epoch": 1.7166930201039077, "grad_norm": 1.7018805742263794, "learning_rate": 3.088599732927549e-05, "loss": 0.163, "step": 19000 }, { "epoch": 1.7166930201039077, "eval_loss": 0.09983944892883301, "eval_runtime": 88.5039, "eval_samples_per_second": 48.167, "eval_steps_per_second": 0.757, "eval_wer": 0.10740622675592391, "step": 19000 }, { "epoch": 1.7257284843008809, "grad_norm": 1.8709958791732788, "learning_rate": 3.066861277600074e-05, "loss": 0.163, "step": 19100 }, { "epoch": 1.734763948497854, "grad_norm": 2.1051034927368164, "learning_rate": 3.0451228222726e-05, "loss": 0.1632, "step": 19200 }, { "epoch": 1.7437994126948273, "grad_norm": 2.1160008907318115, "learning_rate": 3.0233843669451256e-05, "loss": 0.1677, "step": 19300 }, { "epoch": 1.7528348768918003, "grad_norm": 1.7885472774505615, "learning_rate": 3.0016459116176512e-05, "loss": 0.1628, "step": 19400 }, { "epoch": 1.7618703410887733, "grad_norm": 1.7749061584472656, "learning_rate": 2.9799074562901772e-05, "loss": 0.1623, "step": 19500 }, { "epoch": 1.7709058052857465, "grad_norm": 1.933435320854187, "learning_rate": 2.958169000962703e-05, "loss": 0.1639, "step": 19600 }, { "epoch": 1.7799412694827197, "grad_norm": 1.7979782819747925, "learning_rate": 2.9364305456352285e-05, "loss": 0.1581, "step": 19700 }, { "epoch": 1.788976733679693, "grad_norm": 1.9905706644058228, "learning_rate": 2.914692090307754e-05, "loss": 0.1623, "step": 19800 }, { "epoch": 1.7980121978766659, "grad_norm": 2.146162271499634, "learning_rate": 2.8929536349802798e-05, "loss": 0.1632, "step": 19900 }, { "epoch": 1.8070476620736389, "grad_norm": 1.861401081085205, "learning_rate": 2.8712151796528054e-05, "loss": 0.1613, "step": 20000 }, { "epoch": 1.8070476620736389, "eval_loss": 0.09824151545763016, "eval_runtime": 87.8053, "eval_samples_per_second": 48.551, "eval_steps_per_second": 0.763, "eval_wer": 0.10753373711614068, "step": 20000 }, { "epoch": 1.816083126270612, "grad_norm": 1.8411866426467896, "learning_rate": 2.849476724325331e-05, "loss": 0.165, "step": 20100 }, { "epoch": 1.8251185904675853, "grad_norm": 1.7575931549072266, "learning_rate": 2.827738268997857e-05, "loss": 0.1564, "step": 20200 }, { "epoch": 1.8341540546645585, "grad_norm": 2.028254985809326, "learning_rate": 2.8059998136703827e-05, "loss": 0.1589, "step": 20300 }, { "epoch": 1.8431895188615315, "grad_norm": 1.9810631275177002, "learning_rate": 2.7842613583429083e-05, "loss": 0.1586, "step": 20400 }, { "epoch": 1.8522249830585045, "grad_norm": 1.8610142469406128, "learning_rate": 2.7625229030154343e-05, "loss": 0.1602, "step": 20500 }, { "epoch": 1.8612604472554777, "grad_norm": 1.9897997379302979, "learning_rate": 2.74078444768796e-05, "loss": 0.1625, "step": 20600 }, { "epoch": 1.8702959114524509, "grad_norm": 1.7494564056396484, "learning_rate": 2.7190459923604856e-05, "loss": 0.1593, "step": 20700 }, { "epoch": 1.879331375649424, "grad_norm": 1.9486002922058105, "learning_rate": 2.6975249215862856e-05, "loss": 0.1595, "step": 20800 }, { "epoch": 1.888366839846397, "grad_norm": 1.950518012046814, "learning_rate": 2.6757864662588116e-05, "loss": 0.1619, "step": 20900 }, { "epoch": 1.89740230404337, "grad_norm": 1.9625803232192993, "learning_rate": 2.6540480109313373e-05, "loss": 0.1568, "step": 21000 }, { "epoch": 1.89740230404337, "eval_loss": 0.09674616158008575, "eval_runtime": 88.8971, "eval_samples_per_second": 47.954, "eval_steps_per_second": 0.754, "eval_wer": 0.10868133035809159, "step": 21000 }, { "epoch": 1.9064377682403433, "grad_norm": 1.7447710037231445, "learning_rate": 2.632309555603863e-05, "loss": 0.1566, "step": 21100 }, { "epoch": 1.9154732324373165, "grad_norm": 2.0597004890441895, "learning_rate": 2.610571100276389e-05, "loss": 0.1594, "step": 21200 }, { "epoch": 1.9245086966342897, "grad_norm": 2.045921802520752, "learning_rate": 2.5888326449489145e-05, "loss": 0.1592, "step": 21300 }, { "epoch": 1.9335441608312627, "grad_norm": 1.9995648860931396, "learning_rate": 2.56709418962144e-05, "loss": 0.1591, "step": 21400 }, { "epoch": 1.9425796250282357, "grad_norm": 1.765527367591858, "learning_rate": 2.5455731188472406e-05, "loss": 0.1578, "step": 21500 }, { "epoch": 1.9516150892252089, "grad_norm": 1.8758126497268677, "learning_rate": 2.5238346635197665e-05, "loss": 0.1577, "step": 21600 }, { "epoch": 1.960650553422182, "grad_norm": 1.770780324935913, "learning_rate": 2.502096208192292e-05, "loss": 0.1584, "step": 21700 }, { "epoch": 1.9696860176191553, "grad_norm": 1.8630551099777222, "learning_rate": 2.4803577528648175e-05, "loss": 0.1548, "step": 21800 }, { "epoch": 1.9787214818161283, "grad_norm": 1.8517158031463623, "learning_rate": 2.458619297537343e-05, "loss": 0.1593, "step": 21900 }, { "epoch": 1.9877569460131013, "grad_norm": 1.6973580121994019, "learning_rate": 2.436880842209869e-05, "loss": 0.1525, "step": 22000 }, { "epoch": 1.9877569460131013, "eval_loss": 0.0945153757929802, "eval_runtime": 87.5175, "eval_samples_per_second": 48.71, "eval_steps_per_second": 0.766, "eval_wer": 0.10449474019764106, "step": 22000 }, { "epoch": 1.9967924102100745, "grad_norm": 2.0748767852783203, "learning_rate": 2.4151423868823947e-05, "loss": 0.1573, "step": 22100 }, { "epoch": 2.0057826970860626, "grad_norm": 1.6151518821716309, "learning_rate": 2.3934039315549204e-05, "loss": 0.1241, "step": 22200 }, { "epoch": 2.014818161283036, "grad_norm": 1.5904980897903442, "learning_rate": 2.3716654762274464e-05, "loss": 0.1074, "step": 22300 }, { "epoch": 2.023853625480009, "grad_norm": 1.4857326745986938, "learning_rate": 2.349927020899972e-05, "loss": 0.1029, "step": 22400 }, { "epoch": 2.0328890896769822, "grad_norm": 1.7787961959838867, "learning_rate": 2.3281885655724976e-05, "loss": 0.1066, "step": 22500 }, { "epoch": 2.0419245538739554, "grad_norm": 1.6591817140579224, "learning_rate": 2.3066674947982977e-05, "loss": 0.1057, "step": 22600 }, { "epoch": 2.050960018070928, "grad_norm": 1.6939488649368286, "learning_rate": 2.2849290394708237e-05, "loss": 0.1051, "step": 22700 }, { "epoch": 2.0599954822679014, "grad_norm": 1.5981281995773315, "learning_rate": 2.2631905841433493e-05, "loss": 0.1036, "step": 22800 }, { "epoch": 2.0690309464648746, "grad_norm": 1.8668162822723389, "learning_rate": 2.241452128815875e-05, "loss": 0.1063, "step": 22900 }, { "epoch": 2.078066410661848, "grad_norm": 1.627382755279541, "learning_rate": 2.219713673488401e-05, "loss": 0.1063, "step": 23000 }, { "epoch": 2.078066410661848, "eval_loss": 0.0966850146651268, "eval_runtime": 88.5935, "eval_samples_per_second": 48.119, "eval_steps_per_second": 0.756, "eval_wer": 0.10462225055785783, "step": 23000 }, { "epoch": 2.087101874858821, "grad_norm": 1.6317180395126343, "learning_rate": 2.1979752181609266e-05, "loss": 0.1067, "step": 23100 }, { "epoch": 2.096137339055794, "grad_norm": 1.5637694597244263, "learning_rate": 2.1762367628334522e-05, "loss": 0.1061, "step": 23200 }, { "epoch": 2.105172803252767, "grad_norm": 1.561661720275879, "learning_rate": 2.154498307505978e-05, "loss": 0.1066, "step": 23300 }, { "epoch": 2.11420826744974, "grad_norm": 1.570977807044983, "learning_rate": 2.132759852178504e-05, "loss": 0.1057, "step": 23400 }, { "epoch": 2.1232437316467134, "grad_norm": 1.6354864835739136, "learning_rate": 2.111021396851029e-05, "loss": 0.1061, "step": 23500 }, { "epoch": 2.1322791958436866, "grad_norm": 1.6001309156417847, "learning_rate": 2.0892829415235548e-05, "loss": 0.1038, "step": 23600 }, { "epoch": 2.1413146600406594, "grad_norm": 1.7492948770523071, "learning_rate": 2.0675444861960808e-05, "loss": 0.1051, "step": 23700 }, { "epoch": 2.1503501242376326, "grad_norm": 1.7432228326797485, "learning_rate": 2.0458060308686064e-05, "loss": 0.1029, "step": 23800 }, { "epoch": 2.159385588434606, "grad_norm": 1.5974751710891724, "learning_rate": 2.024067575541132e-05, "loss": 0.1061, "step": 23900 }, { "epoch": 2.168421052631579, "grad_norm": 1.8045574426651, "learning_rate": 2.0023291202136577e-05, "loss": 0.1075, "step": 24000 }, { "epoch": 2.168421052631579, "eval_loss": 0.0951407328248024, "eval_runtime": 88.9045, "eval_samples_per_second": 47.95, "eval_steps_per_second": 0.754, "eval_wer": 0.10304962278185102, "step": 24000 }, { "epoch": 2.1774565168285522, "grad_norm": 1.6032062768936157, "learning_rate": 1.9805906648861836e-05, "loss": 0.1065, "step": 24100 }, { "epoch": 2.186491981025525, "grad_norm": 1.5442743301391602, "learning_rate": 1.9588522095587093e-05, "loss": 0.1063, "step": 24200 }, { "epoch": 2.195527445222498, "grad_norm": 1.6346817016601562, "learning_rate": 1.937113754231235e-05, "loss": 0.1036, "step": 24300 }, { "epoch": 2.2045629094194714, "grad_norm": 1.6535338163375854, "learning_rate": 1.9153752989037606e-05, "loss": 0.1051, "step": 24400 }, { "epoch": 2.2135983736164446, "grad_norm": 1.6055641174316406, "learning_rate": 1.8936368435762862e-05, "loss": 0.1064, "step": 24500 }, { "epoch": 2.222633837813418, "grad_norm": 1.936577558517456, "learning_rate": 1.871898388248812e-05, "loss": 0.1045, "step": 24600 }, { "epoch": 2.2316693020103906, "grad_norm": 1.58518385887146, "learning_rate": 1.8501599329213375e-05, "loss": 0.1071, "step": 24700 }, { "epoch": 2.240704766207364, "grad_norm": 1.73505437374115, "learning_rate": 1.8284214775938635e-05, "loss": 0.1065, "step": 24800 }, { "epoch": 2.249740230404337, "grad_norm": 1.7908620834350586, "learning_rate": 1.806683022266389e-05, "loss": 0.1065, "step": 24900 }, { "epoch": 2.2587756946013102, "grad_norm": 1.654637336730957, "learning_rate": 1.7849445669389147e-05, "loss": 0.1035, "step": 25000 }, { "epoch": 2.2587756946013102, "eval_loss": 0.09359237551689148, "eval_runtime": 90.6143, "eval_samples_per_second": 47.046, "eval_steps_per_second": 0.739, "eval_wer": 0.10149824673254702, "step": 25000 }, { "epoch": 2.2678111587982834, "grad_norm": 1.6015100479125977, "learning_rate": 1.7632061116114407e-05, "loss": 0.1062, "step": 25100 }, { "epoch": 2.276846622995256, "grad_norm": 1.6547913551330566, "learning_rate": 1.741467656283966e-05, "loss": 0.1053, "step": 25200 }, { "epoch": 2.2858820871922294, "grad_norm": 1.7010306119918823, "learning_rate": 1.719729200956492e-05, "loss": 0.1041, "step": 25300 }, { "epoch": 2.2949175513892026, "grad_norm": 1.8139252662658691, "learning_rate": 1.6979907456290176e-05, "loss": 0.103, "step": 25400 }, { "epoch": 2.303953015586176, "grad_norm": 1.6318985223770142, "learning_rate": 1.6762522903015433e-05, "loss": 0.104, "step": 25500 }, { "epoch": 2.312988479783149, "grad_norm": 1.798727035522461, "learning_rate": 1.654513834974069e-05, "loss": 0.1055, "step": 25600 }, { "epoch": 2.322023943980122, "grad_norm": 1.527917504310608, "learning_rate": 1.6327753796465945e-05, "loss": 0.106, "step": 25700 }, { "epoch": 2.331059408177095, "grad_norm": 1.6333855390548706, "learning_rate": 1.6110369243191205e-05, "loss": 0.1024, "step": 25800 }, { "epoch": 2.340094872374068, "grad_norm": 1.5563682317733765, "learning_rate": 1.589298468991646e-05, "loss": 0.1031, "step": 25900 }, { "epoch": 2.3491303365710414, "grad_norm": 1.6106479167938232, "learning_rate": 1.5675600136641718e-05, "loss": 0.1056, "step": 26000 }, { "epoch": 2.3491303365710414, "eval_loss": 0.09276529401540756, "eval_runtime": 88.7242, "eval_samples_per_second": 48.048, "eval_steps_per_second": 0.755, "eval_wer": 0.10132823291892466, "step": 26000 }, { "epoch": 2.3581658007680146, "grad_norm": 1.8455883264541626, "learning_rate": 1.5458215583366974e-05, "loss": 0.1043, "step": 26100 }, { "epoch": 2.3672012649649874, "grad_norm": 1.7726097106933594, "learning_rate": 1.5240831030092233e-05, "loss": 0.1015, "step": 26200 }, { "epoch": 2.3762367291619606, "grad_norm": 1.6910566091537476, "learning_rate": 1.5023446476817489e-05, "loss": 0.1055, "step": 26300 }, { "epoch": 2.385272193358934, "grad_norm": 1.642712116241455, "learning_rate": 1.4806061923542747e-05, "loss": 0.1027, "step": 26400 }, { "epoch": 2.394307657555907, "grad_norm": 1.6066936254501343, "learning_rate": 1.4588677370268002e-05, "loss": 0.1052, "step": 26500 }, { "epoch": 2.4033431217528802, "grad_norm": 1.7851406335830688, "learning_rate": 1.437129281699326e-05, "loss": 0.1029, "step": 26600 }, { "epoch": 2.412378585949853, "grad_norm": 1.9918655157089233, "learning_rate": 1.4153908263718516e-05, "loss": 0.1006, "step": 26700 }, { "epoch": 2.421414050146826, "grad_norm": 1.6415534019470215, "learning_rate": 1.3936523710443774e-05, "loss": 0.1038, "step": 26800 }, { "epoch": 2.4304495143437994, "grad_norm": 1.9253250360488892, "learning_rate": 1.3719139157169032e-05, "loss": 0.1024, "step": 26900 }, { "epoch": 2.4394849785407726, "grad_norm": 1.86326265335083, "learning_rate": 1.3501754603894287e-05, "loss": 0.1019, "step": 27000 }, { "epoch": 2.4394849785407726, "eval_loss": 0.09212099760770798, "eval_runtime": 88.0292, "eval_samples_per_second": 48.427, "eval_steps_per_second": 0.761, "eval_wer": 0.1000106258633514, "step": 27000 }, { "epoch": 2.448520442737746, "grad_norm": 1.7671024799346924, "learning_rate": 1.3284370050619545e-05, "loss": 0.1026, "step": 27100 }, { "epoch": 2.4575559069347186, "grad_norm": 1.7686715126037598, "learning_rate": 1.3066985497344802e-05, "loss": 0.1041, "step": 27200 }, { "epoch": 2.466591371131692, "grad_norm": 1.743655800819397, "learning_rate": 1.284960094407006e-05, "loss": 0.099, "step": 27300 }, { "epoch": 2.475626835328665, "grad_norm": 1.7912476062774658, "learning_rate": 1.2632216390795314e-05, "loss": 0.1034, "step": 27400 }, { "epoch": 2.484662299525638, "grad_norm": 1.5481427907943726, "learning_rate": 1.2414831837520572e-05, "loss": 0.1037, "step": 27500 }, { "epoch": 2.4936977637226114, "grad_norm": 1.5013809204101562, "learning_rate": 1.219744728424583e-05, "loss": 0.1028, "step": 27600 }, { "epoch": 2.5027332279195846, "grad_norm": 1.592502236366272, "learning_rate": 1.1980062730971087e-05, "loss": 0.1024, "step": 27700 }, { "epoch": 2.5117686921165574, "grad_norm": 1.6279585361480713, "learning_rate": 1.1762678177696345e-05, "loss": 0.1017, "step": 27800 }, { "epoch": 2.5208041563135306, "grad_norm": 1.718693733215332, "learning_rate": 1.15452936244216e-05, "loss": 0.0991, "step": 27900 }, { "epoch": 2.529839620510504, "grad_norm": 1.721211314201355, "learning_rate": 1.1327909071146858e-05, "loss": 0.1004, "step": 28000 }, { "epoch": 2.529839620510504, "eval_loss": 0.0911058560013771, "eval_runtime": 86.9208, "eval_samples_per_second": 49.045, "eval_steps_per_second": 0.771, "eval_wer": 0.09856550844756136, "step": 28000 }, { "epoch": 2.5388750847074766, "grad_norm": 1.708903193473816, "learning_rate": 1.1110524517872116e-05, "loss": 0.1032, "step": 28100 }, { "epoch": 2.54791054890445, "grad_norm": 1.6191095113754272, "learning_rate": 1.0893139964597372e-05, "loss": 0.1031, "step": 28200 }, { "epoch": 2.556946013101423, "grad_norm": 1.5952250957489014, "learning_rate": 1.0677929256855375e-05, "loss": 0.0991, "step": 28300 }, { "epoch": 2.565981477298396, "grad_norm": 1.8054704666137695, "learning_rate": 1.0460544703580633e-05, "loss": 0.0994, "step": 28400 }, { "epoch": 2.5750169414953694, "grad_norm": 1.4976806640625, "learning_rate": 1.024316015030589e-05, "loss": 0.0988, "step": 28500 }, { "epoch": 2.5840524056923426, "grad_norm": 1.6461458206176758, "learning_rate": 1.0025775597031147e-05, "loss": 0.0989, "step": 28600 }, { "epoch": 2.593087869889316, "grad_norm": 1.631536841392517, "learning_rate": 9.808391043756405e-06, "loss": 0.1001, "step": 28700 }, { "epoch": 2.6021233340862886, "grad_norm": 1.8152861595153809, "learning_rate": 9.59100649048166e-06, "loss": 0.1001, "step": 28800 }, { "epoch": 2.611158798283262, "grad_norm": 1.4996885061264038, "learning_rate": 9.373621937206918e-06, "loss": 0.103, "step": 28900 }, { "epoch": 2.620194262480235, "grad_norm": 1.8811280727386475, "learning_rate": 9.156237383932176e-06, "loss": 0.0992, "step": 29000 }, { "epoch": 2.620194262480235, "eval_loss": 0.09040974825620651, "eval_runtime": 87.3549, "eval_samples_per_second": 48.801, "eval_steps_per_second": 0.767, "eval_wer": 0.0979917118265859, "step": 29000 }, { "epoch": 2.629229726677208, "grad_norm": 1.550436019897461, "learning_rate": 8.938852830657433e-06, "loss": 0.0997, "step": 29100 }, { "epoch": 2.638265190874181, "grad_norm": 1.7116386890411377, "learning_rate": 8.721468277382689e-06, "loss": 0.1021, "step": 29200 }, { "epoch": 2.647300655071154, "grad_norm": 1.8250106573104858, "learning_rate": 8.504083724107947e-06, "loss": 0.0992, "step": 29300 }, { "epoch": 2.6563361192681274, "grad_norm": 1.704163670539856, "learning_rate": 8.286699170833203e-06, "loss": 0.0974, "step": 29400 }, { "epoch": 2.6653715834651006, "grad_norm": 1.7405962944030762, "learning_rate": 8.06931461755846e-06, "loss": 0.0997, "step": 29500 }, { "epoch": 2.674407047662074, "grad_norm": 1.599592685699463, "learning_rate": 7.851930064283716e-06, "loss": 0.0978, "step": 29600 }, { "epoch": 2.683442511859047, "grad_norm": 1.666237711906433, "learning_rate": 7.634545511008974e-06, "loss": 0.0986, "step": 29700 }, { "epoch": 2.69247797605602, "grad_norm": 1.6730016469955444, "learning_rate": 7.417160957734231e-06, "loss": 0.0958, "step": 29800 }, { "epoch": 2.701513440252993, "grad_norm": 1.800661325454712, "learning_rate": 7.199776404459488e-06, "loss": 0.0967, "step": 29900 }, { "epoch": 2.710548904449966, "grad_norm": 1.4267141819000244, "learning_rate": 6.982391851184745e-06, "loss": 0.1011, "step": 30000 }, { "epoch": 2.710548904449966, "eval_loss": 0.08978110551834106, "eval_runtime": 92.3004, "eval_samples_per_second": 46.186, "eval_steps_per_second": 0.726, "eval_wer": 0.09784294973966635, "step": 30000 }, { "epoch": 2.719584368646939, "grad_norm": 1.7578014135360718, "learning_rate": 6.765007297910002e-06, "loss": 0.0988, "step": 30100 }, { "epoch": 2.728619832843912, "grad_norm": 1.747879981994629, "learning_rate": 6.54762274463526e-06, "loss": 0.0982, "step": 30200 }, { "epoch": 2.7376552970408854, "grad_norm": 1.4880852699279785, "learning_rate": 6.330238191360516e-06, "loss": 0.0944, "step": 30300 }, { "epoch": 2.7466907612378586, "grad_norm": 1.6102066040039062, "learning_rate": 6.112853638085773e-06, "loss": 0.099, "step": 30400 }, { "epoch": 2.755726225434832, "grad_norm": 2.1802284717559814, "learning_rate": 5.89546908481103e-06, "loss": 0.0963, "step": 30500 }, { "epoch": 2.764761689631805, "grad_norm": 1.65652334690094, "learning_rate": 5.680258377069035e-06, "loss": 0.099, "step": 30600 }, { "epoch": 2.7737971538287782, "grad_norm": 1.344401240348816, "learning_rate": 5.462873823794291e-06, "loss": 0.0979, "step": 30700 }, { "epoch": 2.782832618025751, "grad_norm": 1.6446696519851685, "learning_rate": 5.245489270519548e-06, "loss": 0.0944, "step": 30800 }, { "epoch": 2.791868082222724, "grad_norm": 1.529815435409546, "learning_rate": 5.028104717244806e-06, "loss": 0.0967, "step": 30900 }, { "epoch": 2.8009035464196974, "grad_norm": 1.7729915380477905, "learning_rate": 4.810720163970063e-06, "loss": 0.095, "step": 31000 }, { "epoch": 2.8009035464196974, "eval_loss": 0.08919844031333923, "eval_runtime": 90.4055, "eval_samples_per_second": 47.154, "eval_steps_per_second": 0.741, "eval_wer": 0.09748167038571884, "step": 31000 }, { "epoch": 2.80993901061667, "grad_norm": 1.6226630210876465, "learning_rate": 4.59333561069532e-06, "loss": 0.0982, "step": 31100 }, { "epoch": 2.8189744748136434, "grad_norm": 1.5628806352615356, "learning_rate": 4.375951057420576e-06, "loss": 0.095, "step": 31200 }, { "epoch": 2.8280099390106166, "grad_norm": 1.5284922122955322, "learning_rate": 4.158566504145834e-06, "loss": 0.0945, "step": 31300 }, { "epoch": 2.83704540320759, "grad_norm": 1.9399908781051636, "learning_rate": 3.941181950871091e-06, "loss": 0.0954, "step": 31400 }, { "epoch": 2.846080867404563, "grad_norm": 1.7431321144104004, "learning_rate": 3.7237973975963476e-06, "loss": 0.0973, "step": 31500 }, { "epoch": 2.855116331601536, "grad_norm": 1.4165501594543457, "learning_rate": 3.5064128443216044e-06, "loss": 0.0954, "step": 31600 }, { "epoch": 2.8641517957985094, "grad_norm": 1.8231940269470215, "learning_rate": 3.2890282910468617e-06, "loss": 0.0969, "step": 31700 }, { "epoch": 2.873187259995482, "grad_norm": 1.9092686176300049, "learning_rate": 3.0716437377721185e-06, "loss": 0.0967, "step": 31800 }, { "epoch": 2.8822227241924554, "grad_norm": 1.6101560592651367, "learning_rate": 2.8542591844973753e-06, "loss": 0.0973, "step": 31900 }, { "epoch": 2.8912581883894286, "grad_norm": 1.6077231168746948, "learning_rate": 2.636874631222633e-06, "loss": 0.0975, "step": 32000 }, { "epoch": 2.8912581883894286, "eval_loss": 0.08852633088827133, "eval_runtime": 88.8694, "eval_samples_per_second": 47.969, "eval_steps_per_second": 0.754, "eval_wer": 0.096015301243226, "step": 32000 }, { "epoch": 2.9002936525864014, "grad_norm": 1.6472060680389404, "learning_rate": 2.4194900779478898e-06, "loss": 0.0953, "step": 32100 }, { "epoch": 2.9093291167833746, "grad_norm": 1.5193005800247192, "learning_rate": 2.2021055246731466e-06, "loss": 0.0947, "step": 32200 }, { "epoch": 2.918364580980348, "grad_norm": 1.3484536409378052, "learning_rate": 1.984720971398404e-06, "loss": 0.0937, "step": 32300 }, { "epoch": 2.927400045177321, "grad_norm": 1.6725506782531738, "learning_rate": 1.7673364181236606e-06, "loss": 0.0949, "step": 32400 }, { "epoch": 2.936435509374294, "grad_norm": 1.5670363903045654, "learning_rate": 1.5499518648489175e-06, "loss": 0.0928, "step": 32500 }, { "epoch": 2.9454709735712674, "grad_norm": 1.5655218362808228, "learning_rate": 1.3325673115741747e-06, "loss": 0.0923, "step": 32600 }, { "epoch": 2.95450643776824, "grad_norm": 1.7287861108779907, "learning_rate": 1.1151827582994317e-06, "loss": 0.0945, "step": 32700 }, { "epoch": 2.9635419019652134, "grad_norm": 1.5101486444473267, "learning_rate": 8.977982050246885e-07, "loss": 0.0938, "step": 32800 }, { "epoch": 2.9725773661621866, "grad_norm": 1.4109468460083008, "learning_rate": 6.804136517499457e-07, "loss": 0.091, "step": 32900 }, { "epoch": 2.98161283035916, "grad_norm": 1.537053108215332, "learning_rate": 4.6302909847520263e-07, "loss": 0.0963, "step": 33000 }, { "epoch": 2.98161283035916, "eval_loss": 0.08801376074552536, "eval_runtime": 90.4227, "eval_samples_per_second": 47.145, "eval_steps_per_second": 0.741, "eval_wer": 0.09624907023695675, "step": 33000 }, { "epoch": 2.9906482945561326, "grad_norm": 1.575260043144226, "learning_rate": 2.47818390733207e-07, "loss": 0.0934, "step": 33100 }, { "epoch": 2.9996837587531058, "grad_norm": 1.5032224655151367, "learning_rate": 3.043383745846402e-08, "loss": 0.0941, "step": 33200 }, { "epoch": 2.999774113395076, "step": 33201, "total_flos": 2.756290459511145e+20, "train_loss": 0.20740618139383307, "train_runtime": 51184.2268, "train_samples_per_second": 83.03, "train_steps_per_second": 0.649 } ], "logging_steps": 100, "max_steps": 33201, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.756290459511145e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }