whisper-small-eu / trainer_state.json
xezpeleta's picture
End of training
c248687 verified
{
"best_metric": 9.547895591357562,
"best_model_checkpoint": "./checkpoint-10000",
"epoch": 1.0,
"eval_steps": 1000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0025,
"grad_norm": 17.154964447021484,
"learning_rate": 5.5e-07,
"loss": 2.3215,
"step": 25
},
{
"epoch": 0.005,
"grad_norm": 9.312045097351074,
"learning_rate": 1.175e-06,
"loss": 1.864,
"step": 50
},
{
"epoch": 0.0075,
"grad_norm": 9.26079273223877,
"learning_rate": 1.8e-06,
"loss": 1.2223,
"step": 75
},
{
"epoch": 0.01,
"grad_norm": 6.739448070526123,
"learning_rate": 2.425e-06,
"loss": 0.9215,
"step": 100
},
{
"epoch": 0.0125,
"grad_norm": 5.92506217956543,
"learning_rate": 3.05e-06,
"loss": 0.7773,
"step": 125
},
{
"epoch": 0.015,
"grad_norm": 5.521960735321045,
"learning_rate": 3.675e-06,
"loss": 0.6549,
"step": 150
},
{
"epoch": 0.0175,
"grad_norm": 5.140420913696289,
"learning_rate": 4.2999999999999995e-06,
"loss": 0.6097,
"step": 175
},
{
"epoch": 0.02,
"grad_norm": 5.54941987991333,
"learning_rate": 4.925e-06,
"loss": 0.5707,
"step": 200
},
{
"epoch": 0.0225,
"grad_norm": 5.2704644203186035,
"learning_rate": 5.55e-06,
"loss": 0.4844,
"step": 225
},
{
"epoch": 0.025,
"grad_norm": 6.43791389465332,
"learning_rate": 6.175e-06,
"loss": 0.5029,
"step": 250
},
{
"epoch": 0.0275,
"grad_norm": 5.382338523864746,
"learning_rate": 6.800000000000001e-06,
"loss": 0.5223,
"step": 275
},
{
"epoch": 0.03,
"grad_norm": 5.741837024688721,
"learning_rate": 7.425e-06,
"loss": 0.5109,
"step": 300
},
{
"epoch": 0.0325,
"grad_norm": 4.826135635375977,
"learning_rate": 8.050000000000001e-06,
"loss": 0.447,
"step": 325
},
{
"epoch": 0.035,
"grad_norm": 4.86759090423584,
"learning_rate": 8.674999999999999e-06,
"loss": 0.3823,
"step": 350
},
{
"epoch": 0.0375,
"grad_norm": 5.171136379241943,
"learning_rate": 9.3e-06,
"loss": 0.3243,
"step": 375
},
{
"epoch": 0.04,
"grad_norm": 3.679882287979126,
"learning_rate": 9.925e-06,
"loss": 0.3157,
"step": 400
},
{
"epoch": 0.0425,
"grad_norm": 3.856377124786377,
"learning_rate": 1.055e-05,
"loss": 0.2852,
"step": 425
},
{
"epoch": 0.045,
"grad_norm": 3.587339401245117,
"learning_rate": 1.1175e-05,
"loss": 0.2729,
"step": 450
},
{
"epoch": 0.0475,
"grad_norm": 3.875290870666504,
"learning_rate": 1.18e-05,
"loss": 0.253,
"step": 475
},
{
"epoch": 0.05,
"grad_norm": 3.3402156829833984,
"learning_rate": 1.2425e-05,
"loss": 0.2432,
"step": 500
},
{
"epoch": 0.0525,
"grad_norm": 4.392176151275635,
"learning_rate": 1.2471052631578948e-05,
"loss": 0.2231,
"step": 525
},
{
"epoch": 0.055,
"grad_norm": 3.0165233612060547,
"learning_rate": 1.2438157894736843e-05,
"loss": 0.221,
"step": 550
},
{
"epoch": 0.0575,
"grad_norm": 3.5857527256011963,
"learning_rate": 1.2405263157894738e-05,
"loss": 0.2065,
"step": 575
},
{
"epoch": 0.06,
"grad_norm": 4.286252021789551,
"learning_rate": 1.2372368421052632e-05,
"loss": 0.2102,
"step": 600
},
{
"epoch": 0.0625,
"grad_norm": 3.300917148590088,
"learning_rate": 1.2339473684210527e-05,
"loss": 0.1838,
"step": 625
},
{
"epoch": 0.065,
"grad_norm": 2.5947461128234863,
"learning_rate": 1.2306578947368421e-05,
"loss": 0.2015,
"step": 650
},
{
"epoch": 0.0675,
"grad_norm": 2.932685375213623,
"learning_rate": 1.2273684210526316e-05,
"loss": 0.1983,
"step": 675
},
{
"epoch": 0.07,
"grad_norm": 3.6400482654571533,
"learning_rate": 1.224078947368421e-05,
"loss": 0.1977,
"step": 700
},
{
"epoch": 0.0725,
"grad_norm": 4.641479015350342,
"learning_rate": 1.2207894736842105e-05,
"loss": 0.2214,
"step": 725
},
{
"epoch": 0.075,
"grad_norm": 4.521893501281738,
"learning_rate": 1.2175e-05,
"loss": 0.3166,
"step": 750
},
{
"epoch": 0.0775,
"grad_norm": 4.501023292541504,
"learning_rate": 1.2142105263157896e-05,
"loss": 0.3019,
"step": 775
},
{
"epoch": 0.08,
"grad_norm": 4.930638790130615,
"learning_rate": 1.210921052631579e-05,
"loss": 0.3381,
"step": 800
},
{
"epoch": 0.0825,
"grad_norm": 3.6409413814544678,
"learning_rate": 1.2076315789473685e-05,
"loss": 0.2239,
"step": 825
},
{
"epoch": 0.085,
"grad_norm": 2.9868454933166504,
"learning_rate": 1.204342105263158e-05,
"loss": 0.1808,
"step": 850
},
{
"epoch": 0.0875,
"grad_norm": 2.4896903038024902,
"learning_rate": 1.2010526315789476e-05,
"loss": 0.1638,
"step": 875
},
{
"epoch": 0.09,
"grad_norm": 5.112133502960205,
"learning_rate": 1.197763157894737e-05,
"loss": 0.2276,
"step": 900
},
{
"epoch": 0.0925,
"grad_norm": 4.977311134338379,
"learning_rate": 1.1944736842105265e-05,
"loss": 0.2745,
"step": 925
},
{
"epoch": 0.095,
"grad_norm": 4.248500347137451,
"learning_rate": 1.191184210526316e-05,
"loss": 0.2651,
"step": 950
},
{
"epoch": 0.0975,
"grad_norm": 5.643575191497803,
"learning_rate": 1.1878947368421054e-05,
"loss": 0.4676,
"step": 975
},
{
"epoch": 0.1,
"grad_norm": 5.4507365226745605,
"learning_rate": 1.1846052631578948e-05,
"loss": 0.3863,
"step": 1000
},
{
"epoch": 0.1,
"eval_loss": 0.4089725911617279,
"eval_runtime": 110.8228,
"eval_samples_per_second": 15.259,
"eval_steps_per_second": 0.956,
"eval_wer": 21.21893343324591,
"step": 1000
},
{
"epoch": 0.1025,
"grad_norm": 4.655358791351318,
"learning_rate": 1.1813157894736843e-05,
"loss": 0.3148,
"step": 1025
},
{
"epoch": 0.105,
"grad_norm": 3.134038209915161,
"learning_rate": 1.1780263157894738e-05,
"loss": 0.2218,
"step": 1050
},
{
"epoch": 0.1075,
"grad_norm": 2.7970399856567383,
"learning_rate": 1.1747368421052632e-05,
"loss": 0.163,
"step": 1075
},
{
"epoch": 0.11,
"grad_norm": 2.5460939407348633,
"learning_rate": 1.1714473684210527e-05,
"loss": 0.1353,
"step": 1100
},
{
"epoch": 0.1125,
"grad_norm": 2.4967522621154785,
"learning_rate": 1.1681578947368421e-05,
"loss": 0.1318,
"step": 1125
},
{
"epoch": 0.115,
"grad_norm": 3.2984485626220703,
"learning_rate": 1.1648684210526316e-05,
"loss": 0.1417,
"step": 1150
},
{
"epoch": 0.1175,
"grad_norm": 2.9972639083862305,
"learning_rate": 1.161578947368421e-05,
"loss": 0.1429,
"step": 1175
},
{
"epoch": 0.12,
"grad_norm": 2.817171335220337,
"learning_rate": 1.1582894736842107e-05,
"loss": 0.1407,
"step": 1200
},
{
"epoch": 0.1225,
"grad_norm": 4.157276153564453,
"learning_rate": 1.1550000000000001e-05,
"loss": 0.2104,
"step": 1225
},
{
"epoch": 0.125,
"grad_norm": 4.1519012451171875,
"learning_rate": 1.1517105263157896e-05,
"loss": 0.228,
"step": 1250
},
{
"epoch": 0.1275,
"grad_norm": 4.5924811363220215,
"learning_rate": 1.148421052631579e-05,
"loss": 0.2494,
"step": 1275
},
{
"epoch": 0.13,
"grad_norm": 4.487509727478027,
"learning_rate": 1.1451315789473685e-05,
"loss": 0.2569,
"step": 1300
},
{
"epoch": 0.1325,
"grad_norm": 4.964361667633057,
"learning_rate": 1.141842105263158e-05,
"loss": 0.2242,
"step": 1325
},
{
"epoch": 0.135,
"grad_norm": 4.045988082885742,
"learning_rate": 1.1385526315789474e-05,
"loss": 0.2291,
"step": 1350
},
{
"epoch": 0.1375,
"grad_norm": 5.821484088897705,
"learning_rate": 1.1352631578947369e-05,
"loss": 0.2133,
"step": 1375
},
{
"epoch": 0.14,
"grad_norm": 4.365011692047119,
"learning_rate": 1.1319736842105263e-05,
"loss": 0.2065,
"step": 1400
},
{
"epoch": 0.1425,
"grad_norm": 3.424377202987671,
"learning_rate": 1.1286842105263158e-05,
"loss": 0.2126,
"step": 1425
},
{
"epoch": 0.145,
"grad_norm": 3.383793830871582,
"learning_rate": 1.1253947368421052e-05,
"loss": 0.2013,
"step": 1450
},
{
"epoch": 0.1475,
"grad_norm": 2.5089175701141357,
"learning_rate": 1.1221052631578947e-05,
"loss": 0.1484,
"step": 1475
},
{
"epoch": 0.15,
"grad_norm": 2.754504442214966,
"learning_rate": 1.1188157894736841e-05,
"loss": 0.1276,
"step": 1500
},
{
"epoch": 0.1525,
"grad_norm": 2.481276512145996,
"learning_rate": 1.1155263157894736e-05,
"loss": 0.1363,
"step": 1525
},
{
"epoch": 0.155,
"grad_norm": 3.6863863468170166,
"learning_rate": 1.1122368421052632e-05,
"loss": 0.1297,
"step": 1550
},
{
"epoch": 0.1575,
"grad_norm": 2.3154377937316895,
"learning_rate": 1.1089473684210527e-05,
"loss": 0.1122,
"step": 1575
},
{
"epoch": 0.16,
"grad_norm": 2.529965877532959,
"learning_rate": 1.1056578947368421e-05,
"loss": 0.1206,
"step": 1600
},
{
"epoch": 0.1625,
"grad_norm": 4.41377592086792,
"learning_rate": 1.1023684210526316e-05,
"loss": 0.1778,
"step": 1625
},
{
"epoch": 0.165,
"grad_norm": 3.306654214859009,
"learning_rate": 1.099078947368421e-05,
"loss": 0.1909,
"step": 1650
},
{
"epoch": 0.1675,
"grad_norm": 4.408228874206543,
"learning_rate": 1.0957894736842107e-05,
"loss": 0.1935,
"step": 1675
},
{
"epoch": 0.17,
"grad_norm": 2.594836950302124,
"learning_rate": 1.0925000000000001e-05,
"loss": 0.1859,
"step": 1700
},
{
"epoch": 0.1725,
"grad_norm": 2.800992250442505,
"learning_rate": 1.0892105263157896e-05,
"loss": 0.1377,
"step": 1725
},
{
"epoch": 0.175,
"grad_norm": 2.8291330337524414,
"learning_rate": 1.085921052631579e-05,
"loss": 0.1344,
"step": 1750
},
{
"epoch": 0.1775,
"grad_norm": 2.815859317779541,
"learning_rate": 1.0826315789473685e-05,
"loss": 0.1157,
"step": 1775
},
{
"epoch": 0.18,
"grad_norm": 2.485546827316284,
"learning_rate": 1.079342105263158e-05,
"loss": 0.131,
"step": 1800
},
{
"epoch": 0.1825,
"grad_norm": 2.3912546634674072,
"learning_rate": 1.0760526315789474e-05,
"loss": 0.1143,
"step": 1825
},
{
"epoch": 0.185,
"grad_norm": 2.0489096641540527,
"learning_rate": 1.0727631578947369e-05,
"loss": 0.1112,
"step": 1850
},
{
"epoch": 0.1875,
"grad_norm": 2.72310209274292,
"learning_rate": 1.0694736842105265e-05,
"loss": 0.1128,
"step": 1875
},
{
"epoch": 0.19,
"grad_norm": 2.5979485511779785,
"learning_rate": 1.066184210526316e-05,
"loss": 0.1145,
"step": 1900
},
{
"epoch": 0.1925,
"grad_norm": 2.1391546726226807,
"learning_rate": 1.0628947368421054e-05,
"loss": 0.1083,
"step": 1925
},
{
"epoch": 0.195,
"grad_norm": 3.562283515930176,
"learning_rate": 1.0596052631578949e-05,
"loss": 0.1456,
"step": 1950
},
{
"epoch": 0.1975,
"grad_norm": 3.5763707160949707,
"learning_rate": 1.0563157894736843e-05,
"loss": 0.1929,
"step": 1975
},
{
"epoch": 0.2,
"grad_norm": 3.659284830093384,
"learning_rate": 1.0530263157894738e-05,
"loss": 0.1897,
"step": 2000
},
{
"epoch": 0.2,
"eval_loss": 0.3456783592700958,
"eval_runtime": 109.2542,
"eval_samples_per_second": 15.478,
"eval_steps_per_second": 0.97,
"eval_wer": 15.448982140626951,
"step": 2000
},
{
"epoch": 0.2025,
"grad_norm": 2.3411598205566406,
"learning_rate": 1.0497368421052632e-05,
"loss": 0.1575,
"step": 2025
},
{
"epoch": 0.205,
"grad_norm": 2.3811259269714355,
"learning_rate": 1.0464473684210527e-05,
"loss": 0.1096,
"step": 2050
},
{
"epoch": 0.2075,
"grad_norm": 2.5568268299102783,
"learning_rate": 1.0431578947368421e-05,
"loss": 0.1079,
"step": 2075
},
{
"epoch": 0.21,
"grad_norm": 2.217919111251831,
"learning_rate": 1.0398684210526316e-05,
"loss": 0.109,
"step": 2100
},
{
"epoch": 0.2125,
"grad_norm": 2.830374002456665,
"learning_rate": 1.036578947368421e-05,
"loss": 0.096,
"step": 2125
},
{
"epoch": 0.215,
"grad_norm": 2.265371084213257,
"learning_rate": 1.0332894736842105e-05,
"loss": 0.0942,
"step": 2150
},
{
"epoch": 0.2175,
"grad_norm": 2.7084972858428955,
"learning_rate": 1.03e-05,
"loss": 0.0966,
"step": 2175
},
{
"epoch": 0.22,
"grad_norm": 3.6277427673339844,
"learning_rate": 1.0267105263157894e-05,
"loss": 0.168,
"step": 2200
},
{
"epoch": 0.2225,
"grad_norm": 4.01047945022583,
"learning_rate": 1.023421052631579e-05,
"loss": 0.1743,
"step": 2225
},
{
"epoch": 0.225,
"grad_norm": 3.7068288326263428,
"learning_rate": 1.0201315789473685e-05,
"loss": 0.192,
"step": 2250
},
{
"epoch": 0.2275,
"grad_norm": 2.932413339614868,
"learning_rate": 1.016842105263158e-05,
"loss": 0.1261,
"step": 2275
},
{
"epoch": 0.23,
"grad_norm": 2.187391757965088,
"learning_rate": 1.0135526315789474e-05,
"loss": 0.0932,
"step": 2300
},
{
"epoch": 0.2325,
"grad_norm": 2.829944372177124,
"learning_rate": 1.0102631578947369e-05,
"loss": 0.0906,
"step": 2325
},
{
"epoch": 0.235,
"grad_norm": 3.1608800888061523,
"learning_rate": 1.0069736842105263e-05,
"loss": 0.0892,
"step": 2350
},
{
"epoch": 0.2375,
"grad_norm": 1.8967108726501465,
"learning_rate": 1.0036842105263158e-05,
"loss": 0.0976,
"step": 2375
},
{
"epoch": 0.24,
"grad_norm": 2.088034152984619,
"learning_rate": 1.0003947368421052e-05,
"loss": 0.0926,
"step": 2400
},
{
"epoch": 0.2425,
"grad_norm": 1.8807507753372192,
"learning_rate": 9.971052631578947e-06,
"loss": 0.1022,
"step": 2425
},
{
"epoch": 0.245,
"grad_norm": 1.8424572944641113,
"learning_rate": 9.938157894736842e-06,
"loss": 0.0945,
"step": 2450
},
{
"epoch": 0.2475,
"grad_norm": 2.1998050212860107,
"learning_rate": 9.905263157894736e-06,
"loss": 0.1008,
"step": 2475
},
{
"epoch": 0.25,
"grad_norm": 3.174199104309082,
"learning_rate": 9.872368421052632e-06,
"loss": 0.1111,
"step": 2500
},
{
"epoch": 0.2525,
"grad_norm": 3.0761404037475586,
"learning_rate": 9.839473684210527e-06,
"loss": 0.1474,
"step": 2525
},
{
"epoch": 0.255,
"grad_norm": 3.375256299972534,
"learning_rate": 9.806578947368421e-06,
"loss": 0.1427,
"step": 2550
},
{
"epoch": 0.2575,
"grad_norm": 3.164018154144287,
"learning_rate": 9.773684210526316e-06,
"loss": 0.1707,
"step": 2575
},
{
"epoch": 0.26,
"grad_norm": 2.6457083225250244,
"learning_rate": 9.740789473684212e-06,
"loss": 0.1277,
"step": 2600
},
{
"epoch": 0.2625,
"grad_norm": 2.7457237243652344,
"learning_rate": 9.707894736842107e-06,
"loss": 0.1052,
"step": 2625
},
{
"epoch": 0.265,
"grad_norm": 1.7891429662704468,
"learning_rate": 9.675000000000001e-06,
"loss": 0.0954,
"step": 2650
},
{
"epoch": 0.2675,
"grad_norm": 2.1033453941345215,
"learning_rate": 9.642105263157896e-06,
"loss": 0.0897,
"step": 2675
},
{
"epoch": 0.27,
"grad_norm": 2.163673162460327,
"learning_rate": 9.60921052631579e-06,
"loss": 0.0914,
"step": 2700
},
{
"epoch": 0.2725,
"grad_norm": 1.7669174671173096,
"learning_rate": 9.576315789473685e-06,
"loss": 0.1058,
"step": 2725
},
{
"epoch": 0.275,
"grad_norm": 2.96284818649292,
"learning_rate": 9.54342105263158e-06,
"loss": 0.1092,
"step": 2750
},
{
"epoch": 0.2775,
"grad_norm": 3.970527410507202,
"learning_rate": 9.510526315789474e-06,
"loss": 0.1937,
"step": 2775
},
{
"epoch": 0.28,
"grad_norm": 3.5204854011535645,
"learning_rate": 9.477631578947369e-06,
"loss": 0.183,
"step": 2800
},
{
"epoch": 0.2825,
"grad_norm": 3.5082786083221436,
"learning_rate": 9.444736842105263e-06,
"loss": 0.152,
"step": 2825
},
{
"epoch": 0.285,
"grad_norm": 3.042353630065918,
"learning_rate": 9.411842105263158e-06,
"loss": 0.1505,
"step": 2850
},
{
"epoch": 0.2875,
"grad_norm": 3.401519536972046,
"learning_rate": 9.378947368421052e-06,
"loss": 0.1568,
"step": 2875
},
{
"epoch": 0.29,
"grad_norm": 2.9089572429656982,
"learning_rate": 9.346052631578949e-06,
"loss": 0.1482,
"step": 2900
},
{
"epoch": 0.2925,
"grad_norm": 2.2120282649993896,
"learning_rate": 9.313157894736843e-06,
"loss": 0.1219,
"step": 2925
},
{
"epoch": 0.295,
"grad_norm": 2.358677625656128,
"learning_rate": 9.280263157894738e-06,
"loss": 0.0925,
"step": 2950
},
{
"epoch": 0.2975,
"grad_norm": 2.1122074127197266,
"learning_rate": 9.247368421052632e-06,
"loss": 0.0993,
"step": 2975
},
{
"epoch": 0.3,
"grad_norm": 2.7415077686309814,
"learning_rate": 9.214473684210527e-06,
"loss": 0.1379,
"step": 3000
},
{
"epoch": 0.3,
"eval_loss": 0.3283405303955078,
"eval_runtime": 108.699,
"eval_samples_per_second": 15.557,
"eval_steps_per_second": 0.975,
"eval_wer": 13.57562133133508,
"step": 3000
},
{
"epoch": 0.3025,
"grad_norm": 3.9535341262817383,
"learning_rate": 9.181578947368422e-06,
"loss": 0.1678,
"step": 3025
},
{
"epoch": 0.305,
"grad_norm": 4.159903526306152,
"learning_rate": 9.148684210526316e-06,
"loss": 0.1903,
"step": 3050
},
{
"epoch": 0.3075,
"grad_norm": 2.060307264328003,
"learning_rate": 9.11578947368421e-06,
"loss": 0.1233,
"step": 3075
},
{
"epoch": 0.31,
"grad_norm": 2.7883658409118652,
"learning_rate": 9.082894736842105e-06,
"loss": 0.0932,
"step": 3100
},
{
"epoch": 0.3125,
"grad_norm": 2.121624231338501,
"learning_rate": 9.05e-06,
"loss": 0.0903,
"step": 3125
},
{
"epoch": 0.315,
"grad_norm": 2.487900972366333,
"learning_rate": 9.017105263157894e-06,
"loss": 0.1073,
"step": 3150
},
{
"epoch": 0.3175,
"grad_norm": 3.3630971908569336,
"learning_rate": 8.984210526315789e-06,
"loss": 0.1667,
"step": 3175
},
{
"epoch": 0.32,
"grad_norm": 3.9546568393707275,
"learning_rate": 8.951315789473683e-06,
"loss": 0.152,
"step": 3200
},
{
"epoch": 0.3225,
"grad_norm": 3.8344779014587402,
"learning_rate": 8.91842105263158e-06,
"loss": 0.1644,
"step": 3225
},
{
"epoch": 0.325,
"grad_norm": 3.418964147567749,
"learning_rate": 8.885526315789474e-06,
"loss": 0.1287,
"step": 3250
},
{
"epoch": 0.3275,
"grad_norm": 3.6898674964904785,
"learning_rate": 8.852631578947369e-06,
"loss": 0.1363,
"step": 3275
},
{
"epoch": 0.33,
"grad_norm": 2.754051685333252,
"learning_rate": 8.819736842105263e-06,
"loss": 0.1568,
"step": 3300
},
{
"epoch": 0.3325,
"grad_norm": 2.718393087387085,
"learning_rate": 8.786842105263158e-06,
"loss": 0.1177,
"step": 3325
},
{
"epoch": 0.335,
"grad_norm": 2.952477216720581,
"learning_rate": 8.753947368421052e-06,
"loss": 0.0932,
"step": 3350
},
{
"epoch": 0.3375,
"grad_norm": 2.300719976425171,
"learning_rate": 8.721052631578947e-06,
"loss": 0.0814,
"step": 3375
},
{
"epoch": 0.34,
"grad_norm": 2.1434884071350098,
"learning_rate": 8.688157894736843e-06,
"loss": 0.0882,
"step": 3400
},
{
"epoch": 0.3425,
"grad_norm": 2.9985721111297607,
"learning_rate": 8.655263157894738e-06,
"loss": 0.0921,
"step": 3425
},
{
"epoch": 0.345,
"grad_norm": 2.030388116836548,
"learning_rate": 8.622368421052632e-06,
"loss": 0.0839,
"step": 3450
},
{
"epoch": 0.3475,
"grad_norm": 1.9179843664169312,
"learning_rate": 8.589473684210527e-06,
"loss": 0.0789,
"step": 3475
},
{
"epoch": 0.35,
"grad_norm": 2.291533946990967,
"learning_rate": 8.556578947368422e-06,
"loss": 0.0928,
"step": 3500
},
{
"epoch": 0.3525,
"grad_norm": 2.0841875076293945,
"learning_rate": 8.523684210526316e-06,
"loss": 0.0871,
"step": 3525
},
{
"epoch": 0.355,
"grad_norm": 2.898160457611084,
"learning_rate": 8.49078947368421e-06,
"loss": 0.0872,
"step": 3550
},
{
"epoch": 0.3575,
"grad_norm": 3.860246419906616,
"learning_rate": 8.457894736842107e-06,
"loss": 0.1361,
"step": 3575
},
{
"epoch": 0.36,
"grad_norm": 3.1953701972961426,
"learning_rate": 8.425000000000001e-06,
"loss": 0.1536,
"step": 3600
},
{
"epoch": 0.3625,
"grad_norm": 2.907952308654785,
"learning_rate": 8.392105263157896e-06,
"loss": 0.1502,
"step": 3625
},
{
"epoch": 0.365,
"grad_norm": 1.8253568410873413,
"learning_rate": 8.35921052631579e-06,
"loss": 0.0884,
"step": 3650
},
{
"epoch": 0.3675,
"grad_norm": 1.9003596305847168,
"learning_rate": 8.326315789473685e-06,
"loss": 0.0815,
"step": 3675
},
{
"epoch": 0.37,
"grad_norm": 2.44956111907959,
"learning_rate": 8.29342105263158e-06,
"loss": 0.0868,
"step": 3700
},
{
"epoch": 0.3725,
"grad_norm": 2.3651952743530273,
"learning_rate": 8.260526315789474e-06,
"loss": 0.0966,
"step": 3725
},
{
"epoch": 0.375,
"grad_norm": 3.287203550338745,
"learning_rate": 8.227631578947369e-06,
"loss": 0.1213,
"step": 3750
},
{
"epoch": 0.3775,
"grad_norm": 3.2496094703674316,
"learning_rate": 8.194736842105263e-06,
"loss": 0.1525,
"step": 3775
},
{
"epoch": 0.38,
"grad_norm": 4.660492420196533,
"learning_rate": 8.161842105263158e-06,
"loss": 0.1604,
"step": 3800
},
{
"epoch": 0.3825,
"grad_norm": 3.705150842666626,
"learning_rate": 8.128947368421053e-06,
"loss": 0.1473,
"step": 3825
},
{
"epoch": 0.385,
"grad_norm": 3.797199010848999,
"learning_rate": 8.096052631578947e-06,
"loss": 0.1544,
"step": 3850
},
{
"epoch": 0.3875,
"grad_norm": 2.2780754566192627,
"learning_rate": 8.063157894736842e-06,
"loss": 0.1452,
"step": 3875
},
{
"epoch": 0.39,
"grad_norm": 2.3213751316070557,
"learning_rate": 8.030263157894736e-06,
"loss": 0.1036,
"step": 3900
},
{
"epoch": 0.3925,
"grad_norm": 2.1721794605255127,
"learning_rate": 7.997368421052632e-06,
"loss": 0.0883,
"step": 3925
},
{
"epoch": 0.395,
"grad_norm": 2.4722604751586914,
"learning_rate": 7.964473684210527e-06,
"loss": 0.0825,
"step": 3950
},
{
"epoch": 0.3975,
"grad_norm": 3.6939098834991455,
"learning_rate": 7.931578947368422e-06,
"loss": 0.1424,
"step": 3975
},
{
"epoch": 0.4,
"grad_norm": 3.4190080165863037,
"learning_rate": 7.898684210526316e-06,
"loss": 0.1825,
"step": 4000
},
{
"epoch": 0.4,
"eval_loss": 0.3023500144481659,
"eval_runtime": 108.6852,
"eval_samples_per_second": 15.559,
"eval_steps_per_second": 0.975,
"eval_wer": 12.395404021481205,
"step": 4000
},
{
"epoch": 0.4025,
"grad_norm": 2.4307820796966553,
"learning_rate": 7.86578947368421e-06,
"loss": 0.1358,
"step": 4025
},
{
"epoch": 0.405,
"grad_norm": 4.717021942138672,
"learning_rate": 7.832894736842105e-06,
"loss": 0.1963,
"step": 4050
},
{
"epoch": 0.4075,
"grad_norm": 3.309920310974121,
"learning_rate": 7.8e-06,
"loss": 0.1378,
"step": 4075
},
{
"epoch": 0.41,
"grad_norm": 2.411762237548828,
"learning_rate": 7.767105263157894e-06,
"loss": 0.1299,
"step": 4100
},
{
"epoch": 0.4125,
"grad_norm": 2.3351423740386963,
"learning_rate": 7.734210526315789e-06,
"loss": 0.0917,
"step": 4125
},
{
"epoch": 0.415,
"grad_norm": 2.250337839126587,
"learning_rate": 7.701315789473684e-06,
"loss": 0.0798,
"step": 4150
},
{
"epoch": 0.4175,
"grad_norm": 1.9465301036834717,
"learning_rate": 7.668421052631578e-06,
"loss": 0.082,
"step": 4175
},
{
"epoch": 0.42,
"grad_norm": 1.652019739151001,
"learning_rate": 7.635526315789474e-06,
"loss": 0.076,
"step": 4200
},
{
"epoch": 0.4225,
"grad_norm": 1.7151554822921753,
"learning_rate": 7.602631578947368e-06,
"loss": 0.0823,
"step": 4225
},
{
"epoch": 0.425,
"grad_norm": 2.1287248134613037,
"learning_rate": 7.569736842105264e-06,
"loss": 0.0786,
"step": 4250
},
{
"epoch": 0.4275,
"grad_norm": 3.07344388961792,
"learning_rate": 7.536842105263159e-06,
"loss": 0.088,
"step": 4275
},
{
"epoch": 0.43,
"grad_norm": 3.5005462169647217,
"learning_rate": 7.5039473684210535e-06,
"loss": 0.1402,
"step": 4300
},
{
"epoch": 0.4325,
"grad_norm": 2.705017328262329,
"learning_rate": 7.471052631578948e-06,
"loss": 0.1201,
"step": 4325
},
{
"epoch": 0.435,
"grad_norm": 3.2306909561157227,
"learning_rate": 7.4381578947368426e-06,
"loss": 0.1263,
"step": 4350
},
{
"epoch": 0.4375,
"grad_norm": 1.8453395366668701,
"learning_rate": 7.405263157894737e-06,
"loss": 0.0749,
"step": 4375
},
{
"epoch": 0.44,
"grad_norm": 1.7151731252670288,
"learning_rate": 7.372368421052632e-06,
"loss": 0.067,
"step": 4400
},
{
"epoch": 0.4425,
"grad_norm": 2.109825372695923,
"learning_rate": 7.339473684210526e-06,
"loss": 0.0773,
"step": 4425
},
{
"epoch": 0.445,
"grad_norm": 2.3926897048950195,
"learning_rate": 7.306578947368422e-06,
"loss": 0.0827,
"step": 4450
},
{
"epoch": 0.4475,
"grad_norm": 2.0515196323394775,
"learning_rate": 7.273684210526316e-06,
"loss": 0.0916,
"step": 4475
},
{
"epoch": 0.45,
"grad_norm": 2.471918821334839,
"learning_rate": 7.240789473684211e-06,
"loss": 0.0857,
"step": 4500
},
{
"epoch": 0.4525,
"grad_norm": 2.649667263031006,
"learning_rate": 7.207894736842105e-06,
"loss": 0.1089,
"step": 4525
},
{
"epoch": 0.455,
"grad_norm": 3.062638998031616,
"learning_rate": 7.175e-06,
"loss": 0.1376,
"step": 4550
},
{
"epoch": 0.4575,
"grad_norm": 3.592533826828003,
"learning_rate": 7.1421052631578945e-06,
"loss": 0.1378,
"step": 4575
},
{
"epoch": 0.46,
"grad_norm": 3.7652299404144287,
"learning_rate": 7.109210526315791e-06,
"loss": 0.1408,
"step": 4600
},
{
"epoch": 0.4625,
"grad_norm": 3.15897798538208,
"learning_rate": 7.076315789473685e-06,
"loss": 0.1446,
"step": 4625
},
{
"epoch": 0.465,
"grad_norm": 3.27856707572937,
"learning_rate": 7.04342105263158e-06,
"loss": 0.1358,
"step": 4650
},
{
"epoch": 0.4675,
"grad_norm": 3.6503591537475586,
"learning_rate": 7.010526315789474e-06,
"loss": 0.1236,
"step": 4675
},
{
"epoch": 0.47,
"grad_norm": 2.3351187705993652,
"learning_rate": 6.977631578947369e-06,
"loss": 0.109,
"step": 4700
},
{
"epoch": 0.4725,
"grad_norm": 3.16107439994812,
"learning_rate": 6.9447368421052635e-06,
"loss": 0.1162,
"step": 4725
},
{
"epoch": 0.475,
"grad_norm": 2.666114091873169,
"learning_rate": 6.911842105263158e-06,
"loss": 0.1169,
"step": 4750
},
{
"epoch": 0.4775,
"grad_norm": 1.9524102210998535,
"learning_rate": 6.878947368421053e-06,
"loss": 0.0837,
"step": 4775
},
{
"epoch": 0.48,
"grad_norm": 1.8562246561050415,
"learning_rate": 6.846052631578947e-06,
"loss": 0.0907,
"step": 4800
},
{
"epoch": 0.4825,
"grad_norm": 2.4069790840148926,
"learning_rate": 6.813157894736842e-06,
"loss": 0.075,
"step": 4825
},
{
"epoch": 0.485,
"grad_norm": 2.5987889766693115,
"learning_rate": 6.780263157894736e-06,
"loss": 0.0944,
"step": 4850
},
{
"epoch": 0.4875,
"grad_norm": 2.7949743270874023,
"learning_rate": 6.747368421052632e-06,
"loss": 0.1001,
"step": 4875
},
{
"epoch": 0.49,
"grad_norm": 3.0958948135375977,
"learning_rate": 6.714473684210526e-06,
"loss": 0.112,
"step": 4900
},
{
"epoch": 0.4925,
"grad_norm": 2.0432355403900146,
"learning_rate": 6.681578947368422e-06,
"loss": 0.1006,
"step": 4925
},
{
"epoch": 0.495,
"grad_norm": 2.425602912902832,
"learning_rate": 6.648684210526316e-06,
"loss": 0.0853,
"step": 4950
},
{
"epoch": 0.4975,
"grad_norm": 2.167152166366577,
"learning_rate": 6.615789473684212e-06,
"loss": 0.0809,
"step": 4975
},
{
"epoch": 0.5,
"grad_norm": 2.699022054672241,
"learning_rate": 6.582894736842106e-06,
"loss": 0.0775,
"step": 5000
},
{
"epoch": 0.5,
"eval_loss": 0.3198271691799164,
"eval_runtime": 108.5474,
"eval_samples_per_second": 15.578,
"eval_steps_per_second": 0.977,
"eval_wer": 11.877107530910454,
"step": 5000
},
{
"epoch": 0.5025,
"grad_norm": 2.366492748260498,
"learning_rate": 6.550000000000001e-06,
"loss": 0.1144,
"step": 5025
},
{
"epoch": 0.505,
"grad_norm": 3.174971342086792,
"learning_rate": 6.517105263157895e-06,
"loss": 0.104,
"step": 5050
},
{
"epoch": 0.5075,
"grad_norm": 3.391756296157837,
"learning_rate": 6.48421052631579e-06,
"loss": 0.1545,
"step": 5075
},
{
"epoch": 0.51,
"grad_norm": 3.718075752258301,
"learning_rate": 6.4513157894736845e-06,
"loss": 0.1432,
"step": 5100
},
{
"epoch": 0.5125,
"grad_norm": 3.2013394832611084,
"learning_rate": 6.418421052631579e-06,
"loss": 0.1258,
"step": 5125
},
{
"epoch": 0.515,
"grad_norm": 3.259122610092163,
"learning_rate": 6.385526315789474e-06,
"loss": 0.1265,
"step": 5150
},
{
"epoch": 0.5175,
"grad_norm": 2.2417266368865967,
"learning_rate": 6.352631578947368e-06,
"loss": 0.1043,
"step": 5175
},
{
"epoch": 0.52,
"grad_norm": 2.150083541870117,
"learning_rate": 6.319736842105263e-06,
"loss": 0.0687,
"step": 5200
},
{
"epoch": 0.5225,
"grad_norm": 1.597461462020874,
"learning_rate": 6.286842105263157e-06,
"loss": 0.0719,
"step": 5225
},
{
"epoch": 0.525,
"grad_norm": 1.995166540145874,
"learning_rate": 6.253947368421052e-06,
"loss": 0.0761,
"step": 5250
},
{
"epoch": 0.5275,
"grad_norm": 1.8248108625411987,
"learning_rate": 6.221052631578947e-06,
"loss": 0.0777,
"step": 5275
},
{
"epoch": 0.53,
"grad_norm": 2.475926637649536,
"learning_rate": 6.188157894736842e-06,
"loss": 0.08,
"step": 5300
},
{
"epoch": 0.5325,
"grad_norm": 2.4988951683044434,
"learning_rate": 6.155263157894737e-06,
"loss": 0.0781,
"step": 5325
},
{
"epoch": 0.535,
"grad_norm": 3.3215720653533936,
"learning_rate": 6.122368421052632e-06,
"loss": 0.1015,
"step": 5350
},
{
"epoch": 0.5375,
"grad_norm": 3.0651330947875977,
"learning_rate": 6.089473684210527e-06,
"loss": 0.1196,
"step": 5375
},
{
"epoch": 0.54,
"grad_norm": 2.73099684715271,
"learning_rate": 6.056578947368422e-06,
"loss": 0.1132,
"step": 5400
},
{
"epoch": 0.5425,
"grad_norm": 3.1001064777374268,
"learning_rate": 6.023684210526316e-06,
"loss": 0.1152,
"step": 5425
},
{
"epoch": 0.545,
"grad_norm": 2.7154345512390137,
"learning_rate": 5.990789473684211e-06,
"loss": 0.1034,
"step": 5450
},
{
"epoch": 0.5475,
"grad_norm": 2.9686317443847656,
"learning_rate": 5.9578947368421055e-06,
"loss": 0.0817,
"step": 5475
},
{
"epoch": 0.55,
"grad_norm": 2.079238176345825,
"learning_rate": 5.925e-06,
"loss": 0.0863,
"step": 5500
},
{
"epoch": 0.5525,
"grad_norm": 3.6043894290924072,
"learning_rate": 5.8921052631578954e-06,
"loss": 0.109,
"step": 5525
},
{
"epoch": 0.555,
"grad_norm": 3.189924716949463,
"learning_rate": 5.85921052631579e-06,
"loss": 0.1268,
"step": 5550
},
{
"epoch": 0.5575,
"grad_norm": 2.064168691635132,
"learning_rate": 5.8263157894736846e-06,
"loss": 0.0964,
"step": 5575
},
{
"epoch": 0.56,
"grad_norm": 1.738265872001648,
"learning_rate": 5.793421052631579e-06,
"loss": 0.0625,
"step": 5600
},
{
"epoch": 0.5625,
"grad_norm": 2.038506269454956,
"learning_rate": 5.760526315789474e-06,
"loss": 0.0497,
"step": 5625
},
{
"epoch": 0.565,
"grad_norm": 2.134082078933716,
"learning_rate": 5.727631578947368e-06,
"loss": 0.051,
"step": 5650
},
{
"epoch": 0.5675,
"grad_norm": 2.1143264770507812,
"learning_rate": 5.694736842105263e-06,
"loss": 0.0672,
"step": 5675
},
{
"epoch": 0.57,
"grad_norm": 2.5272130966186523,
"learning_rate": 5.661842105263158e-06,
"loss": 0.0733,
"step": 5700
},
{
"epoch": 0.5725,
"grad_norm": 1.941775918006897,
"learning_rate": 5.628947368421053e-06,
"loss": 0.0721,
"step": 5725
},
{
"epoch": 0.575,
"grad_norm": 1.680059790611267,
"learning_rate": 5.596052631578947e-06,
"loss": 0.0727,
"step": 5750
},
{
"epoch": 0.5775,
"grad_norm": 1.909376859664917,
"learning_rate": 5.563157894736843e-06,
"loss": 0.0708,
"step": 5775
},
{
"epoch": 0.58,
"grad_norm": 1.746079444885254,
"learning_rate": 5.530263157894737e-06,
"loss": 0.0668,
"step": 5800
},
{
"epoch": 0.5825,
"grad_norm": 1.6772959232330322,
"learning_rate": 5.497368421052632e-06,
"loss": 0.061,
"step": 5825
},
{
"epoch": 0.585,
"grad_norm": 2.339190721511841,
"learning_rate": 5.4644736842105264e-06,
"loss": 0.0665,
"step": 5850
},
{
"epoch": 0.5875,
"grad_norm": 1.8088990449905396,
"learning_rate": 5.431578947368422e-06,
"loss": 0.07,
"step": 5875
},
{
"epoch": 0.59,
"grad_norm": 2.6846957206726074,
"learning_rate": 5.398684210526316e-06,
"loss": 0.0866,
"step": 5900
},
{
"epoch": 0.5925,
"grad_norm": 3.235233783721924,
"learning_rate": 5.365789473684211e-06,
"loss": 0.1282,
"step": 5925
},
{
"epoch": 0.595,
"grad_norm": 2.8694686889648438,
"learning_rate": 5.3328947368421055e-06,
"loss": 0.1188,
"step": 5950
},
{
"epoch": 0.5975,
"grad_norm": 2.7638120651245117,
"learning_rate": 5.3e-06,
"loss": 0.0998,
"step": 5975
},
{
"epoch": 0.6,
"grad_norm": 2.570558786392212,
"learning_rate": 5.267105263157895e-06,
"loss": 0.0975,
"step": 6000
},
{
"epoch": 0.6,
"eval_loss": 0.2923527956008911,
"eval_runtime": 108.3631,
"eval_samples_per_second": 15.605,
"eval_steps_per_second": 0.978,
"eval_wer": 11.258898463844137,
"step": 6000
},
{
"epoch": 0.6025,
"grad_norm": 2.571687698364258,
"learning_rate": 5.23421052631579e-06,
"loss": 0.0904,
"step": 6025
},
{
"epoch": 0.605,
"grad_norm": 2.654676914215088,
"learning_rate": 5.201315789473685e-06,
"loss": 0.0793,
"step": 6050
},
{
"epoch": 0.6075,
"grad_norm": 1.8399786949157715,
"learning_rate": 5.168421052631579e-06,
"loss": 0.0708,
"step": 6075
},
{
"epoch": 0.61,
"grad_norm": 1.8271868228912354,
"learning_rate": 5.135526315789474e-06,
"loss": 0.0672,
"step": 6100
},
{
"epoch": 0.6125,
"grad_norm": 2.6549460887908936,
"learning_rate": 5.102631578947368e-06,
"loss": 0.0714,
"step": 6125
},
{
"epoch": 0.615,
"grad_norm": 2.3854265213012695,
"learning_rate": 5.069736842105263e-06,
"loss": 0.0772,
"step": 6150
},
{
"epoch": 0.6175,
"grad_norm": 2.193765163421631,
"learning_rate": 5.036842105263158e-06,
"loss": 0.0769,
"step": 6175
},
{
"epoch": 0.62,
"grad_norm": 2.371039390563965,
"learning_rate": 5.003947368421053e-06,
"loss": 0.0763,
"step": 6200
},
{
"epoch": 0.6225,
"grad_norm": 2.19585919380188,
"learning_rate": 4.971052631578948e-06,
"loss": 0.0718,
"step": 6225
},
{
"epoch": 0.625,
"grad_norm": 2.88238787651062,
"learning_rate": 4.938157894736843e-06,
"loss": 0.0736,
"step": 6250
},
{
"epoch": 0.6275,
"grad_norm": 2.133556604385376,
"learning_rate": 4.905263157894737e-06,
"loss": 0.0835,
"step": 6275
},
{
"epoch": 0.63,
"grad_norm": 2.584014654159546,
"learning_rate": 4.872368421052632e-06,
"loss": 0.0899,
"step": 6300
},
{
"epoch": 0.6325,
"grad_norm": 2.235517740249634,
"learning_rate": 4.8394736842105265e-06,
"loss": 0.1156,
"step": 6325
},
{
"epoch": 0.635,
"grad_norm": 3.3249387741088867,
"learning_rate": 4.806578947368421e-06,
"loss": 0.1104,
"step": 6350
},
{
"epoch": 0.6375,
"grad_norm": 2.0971710681915283,
"learning_rate": 4.7736842105263164e-06,
"loss": 0.0994,
"step": 6375
},
{
"epoch": 0.64,
"grad_norm": 1.9291256666183472,
"learning_rate": 4.740789473684211e-06,
"loss": 0.0823,
"step": 6400
},
{
"epoch": 0.6425,
"grad_norm": 2.2015464305877686,
"learning_rate": 4.7078947368421056e-06,
"loss": 0.0701,
"step": 6425
},
{
"epoch": 0.645,
"grad_norm": 2.357741355895996,
"learning_rate": 4.675e-06,
"loss": 0.0684,
"step": 6450
},
{
"epoch": 0.6475,
"grad_norm": 3.1389389038085938,
"learning_rate": 4.642105263157895e-06,
"loss": 0.0993,
"step": 6475
},
{
"epoch": 0.65,
"grad_norm": 2.419999122619629,
"learning_rate": 4.609210526315789e-06,
"loss": 0.0944,
"step": 6500
},
{
"epoch": 0.6525,
"grad_norm": 3.8924739360809326,
"learning_rate": 4.576315789473684e-06,
"loss": 0.1216,
"step": 6525
},
{
"epoch": 0.655,
"grad_norm": 2.7731151580810547,
"learning_rate": 4.543421052631579e-06,
"loss": 0.1094,
"step": 6550
},
{
"epoch": 0.6575,
"grad_norm": 2.8039772510528564,
"learning_rate": 4.510526315789474e-06,
"loss": 0.1107,
"step": 6575
},
{
"epoch": 0.66,
"grad_norm": 2.598423719406128,
"learning_rate": 4.477631578947368e-06,
"loss": 0.1028,
"step": 6600
},
{
"epoch": 0.6625,
"grad_norm": 2.9384443759918213,
"learning_rate": 4.444736842105264e-06,
"loss": 0.1046,
"step": 6625
},
{
"epoch": 0.665,
"grad_norm": 2.54249906539917,
"learning_rate": 4.411842105263158e-06,
"loss": 0.12,
"step": 6650
},
{
"epoch": 0.6675,
"grad_norm": 2.804525136947632,
"learning_rate": 4.378947368421053e-06,
"loss": 0.1024,
"step": 6675
},
{
"epoch": 0.67,
"grad_norm": 1.912115454673767,
"learning_rate": 4.3460526315789474e-06,
"loss": 0.0956,
"step": 6700
},
{
"epoch": 0.6725,
"grad_norm": 1.8333637714385986,
"learning_rate": 4.313157894736843e-06,
"loss": 0.0724,
"step": 6725
},
{
"epoch": 0.675,
"grad_norm": 2.1709723472595215,
"learning_rate": 4.280263157894737e-06,
"loss": 0.068,
"step": 6750
},
{
"epoch": 0.6775,
"grad_norm": 2.6692330837249756,
"learning_rate": 4.247368421052632e-06,
"loss": 0.0809,
"step": 6775
},
{
"epoch": 0.68,
"grad_norm": 2.787583589553833,
"learning_rate": 4.2144736842105265e-06,
"loss": 0.1336,
"step": 6800
},
{
"epoch": 0.6825,
"grad_norm": 3.7850124835968018,
"learning_rate": 4.181578947368421e-06,
"loss": 0.1178,
"step": 6825
},
{
"epoch": 0.685,
"grad_norm": 3.4124090671539307,
"learning_rate": 4.148684210526316e-06,
"loss": 0.1211,
"step": 6850
},
{
"epoch": 0.6875,
"grad_norm": 2.470259189605713,
"learning_rate": 4.115789473684211e-06,
"loss": 0.1133,
"step": 6875
},
{
"epoch": 0.69,
"grad_norm": 3.100062847137451,
"learning_rate": 4.082894736842106e-06,
"loss": 0.1083,
"step": 6900
},
{
"epoch": 0.6925,
"grad_norm": 2.644653558731079,
"learning_rate": 4.05e-06,
"loss": 0.1019,
"step": 6925
},
{
"epoch": 0.695,
"grad_norm": 2.8097853660583496,
"learning_rate": 4.018421052631579e-06,
"loss": 0.1068,
"step": 6950
},
{
"epoch": 0.6975,
"grad_norm": 3.0686898231506348,
"learning_rate": 3.985526315789474e-06,
"loss": 0.1187,
"step": 6975
},
{
"epoch": 0.7,
"grad_norm": 2.7191102504730225,
"learning_rate": 3.952631578947369e-06,
"loss": 0.1132,
"step": 7000
},
{
"epoch": 0.7,
"eval_loss": 0.2969239056110382,
"eval_runtime": 108.3888,
"eval_samples_per_second": 15.601,
"eval_steps_per_second": 0.978,
"eval_wer": 10.846759085799926,
"step": 7000
},
{
"epoch": 0.7025,
"grad_norm": 4.9354567527771,
"learning_rate": 3.922368421052632e-06,
"loss": 0.2171,
"step": 7025
},
{
"epoch": 0.705,
"grad_norm": 4.480522632598877,
"learning_rate": 3.889473684210526e-06,
"loss": 0.3333,
"step": 7050
},
{
"epoch": 0.7075,
"grad_norm": 4.159786701202393,
"learning_rate": 3.856578947368422e-06,
"loss": 0.3131,
"step": 7075
},
{
"epoch": 0.71,
"grad_norm": 3.261507987976074,
"learning_rate": 3.823684210526316e-06,
"loss": 0.2489,
"step": 7100
},
{
"epoch": 0.7125,
"grad_norm": 2.8575055599212646,
"learning_rate": 3.7907894736842108e-06,
"loss": 0.147,
"step": 7125
},
{
"epoch": 0.715,
"grad_norm": 3.800828218460083,
"learning_rate": 3.7578947368421053e-06,
"loss": 0.1173,
"step": 7150
},
{
"epoch": 0.7175,
"grad_norm": 2.675981283187866,
"learning_rate": 3.725e-06,
"loss": 0.1039,
"step": 7175
},
{
"epoch": 0.72,
"grad_norm": 2.7404372692108154,
"learning_rate": 3.6921052631578945e-06,
"loss": 0.0817,
"step": 7200
},
{
"epoch": 0.7225,
"grad_norm": 2.429400682449341,
"learning_rate": 3.65921052631579e-06,
"loss": 0.0816,
"step": 7225
},
{
"epoch": 0.725,
"grad_norm": 1.525938630104065,
"learning_rate": 3.6263157894736844e-06,
"loss": 0.0825,
"step": 7250
},
{
"epoch": 0.7275,
"grad_norm": 1.7473913431167603,
"learning_rate": 3.5934210526315794e-06,
"loss": 0.071,
"step": 7275
},
{
"epoch": 0.73,
"grad_norm": 1.690190315246582,
"learning_rate": 3.560526315789474e-06,
"loss": 0.0582,
"step": 7300
},
{
"epoch": 0.7325,
"grad_norm": 1.7410528659820557,
"learning_rate": 3.5276315789473685e-06,
"loss": 0.0628,
"step": 7325
},
{
"epoch": 0.735,
"grad_norm": 2.584449052810669,
"learning_rate": 3.494736842105263e-06,
"loss": 0.081,
"step": 7350
},
{
"epoch": 0.7375,
"grad_norm": 2.2174675464630127,
"learning_rate": 3.4618421052631577e-06,
"loss": 0.1095,
"step": 7375
},
{
"epoch": 0.74,
"grad_norm": 2.5199625492095947,
"learning_rate": 3.428947368421053e-06,
"loss": 0.0859,
"step": 7400
},
{
"epoch": 0.7425,
"grad_norm": 1.9601330757141113,
"learning_rate": 3.3960526315789476e-06,
"loss": 0.0973,
"step": 7425
},
{
"epoch": 0.745,
"grad_norm": 1.9718409776687622,
"learning_rate": 3.363157894736842e-06,
"loss": 0.069,
"step": 7450
},
{
"epoch": 0.7475,
"grad_norm": 2.3192267417907715,
"learning_rate": 3.330263157894737e-06,
"loss": 0.0662,
"step": 7475
},
{
"epoch": 0.75,
"grad_norm": 2.357396364212036,
"learning_rate": 3.2973684210526318e-06,
"loss": 0.0625,
"step": 7500
},
{
"epoch": 0.7525,
"grad_norm": 2.821035623550415,
"learning_rate": 3.2644736842105263e-06,
"loss": 0.1253,
"step": 7525
},
{
"epoch": 0.755,
"grad_norm": 3.693251848220825,
"learning_rate": 3.231578947368421e-06,
"loss": 0.165,
"step": 7550
},
{
"epoch": 0.7575,
"grad_norm": 3.8312151432037354,
"learning_rate": 3.1986842105263163e-06,
"loss": 0.1901,
"step": 7575
},
{
"epoch": 0.76,
"grad_norm": 1.4963386058807373,
"learning_rate": 3.165789473684211e-06,
"loss": 0.1203,
"step": 7600
},
{
"epoch": 0.7625,
"grad_norm": 2.3876097202301025,
"learning_rate": 3.1328947368421054e-06,
"loss": 0.08,
"step": 7625
},
{
"epoch": 0.765,
"grad_norm": 1.5950793027877808,
"learning_rate": 3.1e-06,
"loss": 0.066,
"step": 7650
},
{
"epoch": 0.7675,
"grad_norm": 2.980989694595337,
"learning_rate": 3.067105263157895e-06,
"loss": 0.0912,
"step": 7675
},
{
"epoch": 0.77,
"grad_norm": 4.786663055419922,
"learning_rate": 3.03421052631579e-06,
"loss": 0.1057,
"step": 7700
},
{
"epoch": 0.7725,
"grad_norm": 3.2149651050567627,
"learning_rate": 3.0013157894736845e-06,
"loss": 0.1162,
"step": 7725
},
{
"epoch": 0.775,
"grad_norm": 2.1451265811920166,
"learning_rate": 2.968421052631579e-06,
"loss": 0.1024,
"step": 7750
},
{
"epoch": 0.7775,
"grad_norm": 2.4378089904785156,
"learning_rate": 2.935526315789474e-06,
"loss": 0.0731,
"step": 7775
},
{
"epoch": 0.78,
"grad_norm": 2.8874340057373047,
"learning_rate": 2.9026315789473686e-06,
"loss": 0.0885,
"step": 7800
},
{
"epoch": 0.7825,
"grad_norm": 2.4116005897521973,
"learning_rate": 2.869736842105263e-06,
"loss": 0.183,
"step": 7825
},
{
"epoch": 0.785,
"grad_norm": 1.9551997184753418,
"learning_rate": 2.8368421052631577e-06,
"loss": 0.0911,
"step": 7850
},
{
"epoch": 0.7875,
"grad_norm": 1.4724156856536865,
"learning_rate": 2.8039473684210527e-06,
"loss": 0.073,
"step": 7875
},
{
"epoch": 0.79,
"grad_norm": 1.8941609859466553,
"learning_rate": 2.7710526315789477e-06,
"loss": 0.0697,
"step": 7900
},
{
"epoch": 0.7925,
"grad_norm": 1.880815863609314,
"learning_rate": 2.7381578947368423e-06,
"loss": 0.0722,
"step": 7925
},
{
"epoch": 0.795,
"grad_norm": 1.5334910154342651,
"learning_rate": 2.7052631578947372e-06,
"loss": 0.0631,
"step": 7950
},
{
"epoch": 0.7975,
"grad_norm": 1.5681651830673218,
"learning_rate": 2.672368421052632e-06,
"loss": 0.0718,
"step": 7975
},
{
"epoch": 0.8,
"grad_norm": 2.644123077392578,
"learning_rate": 2.6394736842105264e-06,
"loss": 0.0852,
"step": 8000
},
{
"epoch": 0.8,
"eval_loss": 0.22370582818984985,
"eval_runtime": 108.5849,
"eval_samples_per_second": 15.573,
"eval_steps_per_second": 0.976,
"eval_wer": 9.772698888472586,
"step": 8000
},
{
"epoch": 0.8025,
"grad_norm": 2.5814223289489746,
"learning_rate": 2.606578947368421e-06,
"loss": 0.0991,
"step": 8025
},
{
"epoch": 0.805,
"grad_norm": 2.196101188659668,
"learning_rate": 2.573684210526316e-06,
"loss": 0.1063,
"step": 8050
},
{
"epoch": 0.8075,
"grad_norm": 2.1308398246765137,
"learning_rate": 2.5407894736842105e-06,
"loss": 0.082,
"step": 8075
},
{
"epoch": 0.81,
"grad_norm": 1.9144341945648193,
"learning_rate": 2.5078947368421055e-06,
"loss": 0.0754,
"step": 8100
},
{
"epoch": 0.8125,
"grad_norm": 1.7134053707122803,
"learning_rate": 2.4750000000000004e-06,
"loss": 0.0608,
"step": 8125
},
{
"epoch": 0.815,
"grad_norm": 2.3351683616638184,
"learning_rate": 2.442105263157895e-06,
"loss": 0.0626,
"step": 8150
},
{
"epoch": 0.8175,
"grad_norm": 1.5327272415161133,
"learning_rate": 2.4092105263157896e-06,
"loss": 0.0587,
"step": 8175
},
{
"epoch": 0.82,
"grad_norm": 2.235727071762085,
"learning_rate": 2.3763157894736846e-06,
"loss": 0.0474,
"step": 8200
},
{
"epoch": 0.8225,
"grad_norm": 1.5293081998825073,
"learning_rate": 2.343421052631579e-06,
"loss": 0.0619,
"step": 8225
},
{
"epoch": 0.825,
"grad_norm": 2.498485565185547,
"learning_rate": 2.3105263157894737e-06,
"loss": 0.0542,
"step": 8250
},
{
"epoch": 0.8275,
"grad_norm": 1.7372788190841675,
"learning_rate": 2.2776315789473682e-06,
"loss": 0.0581,
"step": 8275
},
{
"epoch": 0.83,
"grad_norm": 1.3887490034103394,
"learning_rate": 2.2447368421052632e-06,
"loss": 0.0592,
"step": 8300
},
{
"epoch": 0.8325,
"grad_norm": 2.6547152996063232,
"learning_rate": 2.2118421052631578e-06,
"loss": 0.0818,
"step": 8325
},
{
"epoch": 0.835,
"grad_norm": 2.8091819286346436,
"learning_rate": 2.1789473684210528e-06,
"loss": 0.1006,
"step": 8350
},
{
"epoch": 0.8375,
"grad_norm": 2.7749269008636475,
"learning_rate": 2.1460526315789478e-06,
"loss": 0.1182,
"step": 8375
},
{
"epoch": 0.84,
"grad_norm": 2.2278335094451904,
"learning_rate": 2.1131578947368423e-06,
"loss": 0.0957,
"step": 8400
},
{
"epoch": 0.8425,
"grad_norm": 2.1413509845733643,
"learning_rate": 2.080263157894737e-06,
"loss": 0.0707,
"step": 8425
},
{
"epoch": 0.845,
"grad_norm": 1.9786673784255981,
"learning_rate": 2.0473684210526314e-06,
"loss": 0.0649,
"step": 8450
},
{
"epoch": 0.8475,
"grad_norm": 2.812910318374634,
"learning_rate": 2.0144736842105264e-06,
"loss": 0.074,
"step": 8475
},
{
"epoch": 0.85,
"grad_norm": 2.2044918537139893,
"learning_rate": 1.981578947368421e-06,
"loss": 0.0828,
"step": 8500
},
{
"epoch": 0.8525,
"grad_norm": 2.3589258193969727,
"learning_rate": 1.9486842105263155e-06,
"loss": 0.087,
"step": 8525
},
{
"epoch": 0.855,
"grad_norm": 4.131094455718994,
"learning_rate": 1.9157894736842105e-06,
"loss": 0.1186,
"step": 8550
},
{
"epoch": 0.8575,
"grad_norm": 3.9992008209228516,
"learning_rate": 1.8828947368421053e-06,
"loss": 0.2169,
"step": 8575
},
{
"epoch": 0.86,
"grad_norm": 5.6015849113464355,
"learning_rate": 1.85e-06,
"loss": 0.303,
"step": 8600
},
{
"epoch": 0.8625,
"grad_norm": 5.061160087585449,
"learning_rate": 1.817105263157895e-06,
"loss": 0.3653,
"step": 8625
},
{
"epoch": 0.865,
"grad_norm": 2.206181287765503,
"learning_rate": 1.7842105263157896e-06,
"loss": 0.2101,
"step": 8650
},
{
"epoch": 0.8675,
"grad_norm": 3.2147305011749268,
"learning_rate": 1.7513157894736842e-06,
"loss": 0.1292,
"step": 8675
},
{
"epoch": 0.87,
"grad_norm": 3.8435158729553223,
"learning_rate": 1.718421052631579e-06,
"loss": 0.1566,
"step": 8700
},
{
"epoch": 0.8725,
"grad_norm": 2.289605140686035,
"learning_rate": 1.685526315789474e-06,
"loss": 0.1001,
"step": 8725
},
{
"epoch": 0.875,
"grad_norm": 1.7797821760177612,
"learning_rate": 1.6526315789473685e-06,
"loss": 0.0631,
"step": 8750
},
{
"epoch": 0.8775,
"grad_norm": 1.8683706521987915,
"learning_rate": 1.619736842105263e-06,
"loss": 0.0684,
"step": 8775
},
{
"epoch": 0.88,
"grad_norm": 1.9965925216674805,
"learning_rate": 1.586842105263158e-06,
"loss": 0.0761,
"step": 8800
},
{
"epoch": 0.8825,
"grad_norm": 2.9362261295318604,
"learning_rate": 1.5539473684210528e-06,
"loss": 0.0933,
"step": 8825
},
{
"epoch": 0.885,
"grad_norm": 2.6258389949798584,
"learning_rate": 1.5210526315789474e-06,
"loss": 0.0951,
"step": 8850
},
{
"epoch": 0.8875,
"grad_norm": 2.6913561820983887,
"learning_rate": 1.4881578947368422e-06,
"loss": 0.0894,
"step": 8875
},
{
"epoch": 0.89,
"grad_norm": 2.097146511077881,
"learning_rate": 1.455263157894737e-06,
"loss": 0.0642,
"step": 8900
},
{
"epoch": 0.8925,
"grad_norm": 2.0721096992492676,
"learning_rate": 1.4223684210526317e-06,
"loss": 0.0653,
"step": 8925
},
{
"epoch": 0.895,
"grad_norm": 2.383824586868286,
"learning_rate": 1.3894736842105265e-06,
"loss": 0.0673,
"step": 8950
},
{
"epoch": 0.8975,
"grad_norm": 2.066480875015259,
"learning_rate": 1.356578947368421e-06,
"loss": 0.0567,
"step": 8975
},
{
"epoch": 0.9,
"grad_norm": 1.9253851175308228,
"learning_rate": 1.3236842105263158e-06,
"loss": 0.0585,
"step": 9000
},
{
"epoch": 0.9,
"eval_loss": 0.23174172639846802,
"eval_runtime": 108.0471,
"eval_samples_per_second": 15.651,
"eval_steps_per_second": 0.981,
"eval_wer": 9.62907455976021,
"step": 9000
},
{
"epoch": 0.9025,
"grad_norm": 2.2203736305236816,
"learning_rate": 1.2907894736842106e-06,
"loss": 0.0696,
"step": 9025
},
{
"epoch": 0.905,
"grad_norm": 2.5881643295288086,
"learning_rate": 1.2578947368421054e-06,
"loss": 0.0802,
"step": 9050
},
{
"epoch": 0.9075,
"grad_norm": 3.149264097213745,
"learning_rate": 1.2250000000000001e-06,
"loss": 0.1068,
"step": 9075
},
{
"epoch": 0.91,
"grad_norm": 2.7941627502441406,
"learning_rate": 1.1921052631578947e-06,
"loss": 0.1036,
"step": 9100
},
{
"epoch": 0.9125,
"grad_norm": 1.6882115602493286,
"learning_rate": 1.1592105263157895e-06,
"loss": 0.0818,
"step": 9125
},
{
"epoch": 0.915,
"grad_norm": 1.7995328903198242,
"learning_rate": 1.1263157894736842e-06,
"loss": 0.0817,
"step": 9150
},
{
"epoch": 0.9175,
"grad_norm": 2.3154258728027344,
"learning_rate": 1.093421052631579e-06,
"loss": 0.0752,
"step": 9175
},
{
"epoch": 0.92,
"grad_norm": 2.428879737854004,
"learning_rate": 1.0605263157894738e-06,
"loss": 0.0805,
"step": 9200
},
{
"epoch": 0.9225,
"grad_norm": 3.1283998489379883,
"learning_rate": 1.0276315789473683e-06,
"loss": 0.0957,
"step": 9225
},
{
"epoch": 0.925,
"grad_norm": 2.8848423957824707,
"learning_rate": 9.947368421052633e-07,
"loss": 0.1028,
"step": 9250
},
{
"epoch": 0.9275,
"grad_norm": 2.741641044616699,
"learning_rate": 9.618421052631579e-07,
"loss": 0.1203,
"step": 9275
},
{
"epoch": 0.93,
"grad_norm": 1.3423601388931274,
"learning_rate": 9.289473684210527e-07,
"loss": 0.0712,
"step": 9300
},
{
"epoch": 0.9325,
"grad_norm": 1.9067927598953247,
"learning_rate": 8.960526315789474e-07,
"loss": 0.076,
"step": 9325
},
{
"epoch": 0.935,
"grad_norm": 1.605100154876709,
"learning_rate": 8.631578947368421e-07,
"loss": 0.0624,
"step": 9350
},
{
"epoch": 0.9375,
"grad_norm": 2.283745050430298,
"learning_rate": 8.302631578947369e-07,
"loss": 0.0722,
"step": 9375
},
{
"epoch": 0.94,
"grad_norm": 2.803049087524414,
"learning_rate": 7.973684210526315e-07,
"loss": 0.0947,
"step": 9400
},
{
"epoch": 0.9425,
"grad_norm": 2.6914260387420654,
"learning_rate": 7.644736842105263e-07,
"loss": 0.1081,
"step": 9425
},
{
"epoch": 0.945,
"grad_norm": 1.8262979984283447,
"learning_rate": 7.315789473684211e-07,
"loss": 0.2328,
"step": 9450
},
{
"epoch": 0.9475,
"grad_norm": 1.848897099494934,
"learning_rate": 6.986842105263158e-07,
"loss": 0.1125,
"step": 9475
},
{
"epoch": 0.95,
"grad_norm": 2.561948299407959,
"learning_rate": 6.657894736842105e-07,
"loss": 0.0851,
"step": 9500
},
{
"epoch": 0.9525,
"grad_norm": 3.199554204940796,
"learning_rate": 6.328947368421053e-07,
"loss": 0.1023,
"step": 9525
},
{
"epoch": 0.955,
"grad_norm": 2.5224554538726807,
"learning_rate": 6.000000000000001e-07,
"loss": 0.1093,
"step": 9550
},
{
"epoch": 0.9575,
"grad_norm": 3.5115911960601807,
"learning_rate": 5.671052631578947e-07,
"loss": 0.1171,
"step": 9575
},
{
"epoch": 0.96,
"grad_norm": 2.1708128452301025,
"learning_rate": 5.342105263157895e-07,
"loss": 0.0901,
"step": 9600
},
{
"epoch": 0.9625,
"grad_norm": 2.0158002376556396,
"learning_rate": 5.013157894736842e-07,
"loss": 0.0714,
"step": 9625
},
{
"epoch": 0.965,
"grad_norm": 1.924566388130188,
"learning_rate": 4.68421052631579e-07,
"loss": 0.0555,
"step": 9650
},
{
"epoch": 0.9675,
"grad_norm": 1.6960805654525757,
"learning_rate": 4.3552631578947373e-07,
"loss": 0.0598,
"step": 9675
},
{
"epoch": 0.97,
"grad_norm": 1.8255702257156372,
"learning_rate": 4.0263157894736845e-07,
"loss": 0.0606,
"step": 9700
},
{
"epoch": 0.9725,
"grad_norm": 1.2460139989852905,
"learning_rate": 3.6973684210526317e-07,
"loss": 0.0581,
"step": 9725
},
{
"epoch": 0.975,
"grad_norm": 1.5349912643432617,
"learning_rate": 3.3684210526315795e-07,
"loss": 0.0628,
"step": 9750
},
{
"epoch": 0.9775,
"grad_norm": 1.9929566383361816,
"learning_rate": 3.0394736842105267e-07,
"loss": 0.0689,
"step": 9775
},
{
"epoch": 0.98,
"grad_norm": 2.2622244358062744,
"learning_rate": 2.710526315789474e-07,
"loss": 0.0945,
"step": 9800
},
{
"epoch": 0.9825,
"grad_norm": 3.0095293521881104,
"learning_rate": 2.3815789473684213e-07,
"loss": 0.1028,
"step": 9825
},
{
"epoch": 0.985,
"grad_norm": 2.2643580436706543,
"learning_rate": 2.0526315789473685e-07,
"loss": 0.0892,
"step": 9850
},
{
"epoch": 0.9875,
"grad_norm": 2.1976678371429443,
"learning_rate": 1.723684210526316e-07,
"loss": 0.0904,
"step": 9875
},
{
"epoch": 0.99,
"grad_norm": 4.183482646942139,
"learning_rate": 1.3947368421052632e-07,
"loss": 0.1343,
"step": 9900
},
{
"epoch": 0.9925,
"grad_norm": 1.8398394584655762,
"learning_rate": 1.0657894736842105e-07,
"loss": 0.1207,
"step": 9925
},
{
"epoch": 0.995,
"grad_norm": 2.1588056087493896,
"learning_rate": 7.36842105263158e-08,
"loss": 0.0644,
"step": 9950
},
{
"epoch": 0.9975,
"grad_norm": 1.9245156049728394,
"learning_rate": 4.078947368421053e-08,
"loss": 0.0659,
"step": 9975
},
{
"epoch": 1.0,
"grad_norm": 1.6552762985229492,
"learning_rate": 7.894736842105264e-09,
"loss": 0.0654,
"step": 10000
},
{
"epoch": 1.0,
"eval_loss": 0.23531682789325714,
"eval_runtime": 109.2289,
"eval_samples_per_second": 15.481,
"eval_steps_per_second": 0.97,
"eval_wer": 9.547895591357562,
"step": 10000
},
{
"epoch": 1.0,
"step": 10000,
"total_flos": 9.23473281024e+19,
"train_loss": 0.14711211260557175,
"train_runtime": 12741.5974,
"train_samples_per_second": 25.115,
"train_steps_per_second": 0.785
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.23473281024e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}