BPE-HF-CC100-FR-Morphemes / trainer_state.json
qanastek's picture
Upload 43 files
e39406d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 21.0,
"global_step": 80388,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 10.5212,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 2.5e-06,
"loss": 9.5192,
"step": 500
},
{
"epoch": 0.26,
"learning_rate": 5e-06,
"loss": 7.8983,
"step": 1000
},
{
"epoch": 0.39,
"learning_rate": 7.5e-06,
"loss": 6.8402,
"step": 1500
},
{
"epoch": 0.52,
"learning_rate": 1e-05,
"loss": 6.5431,
"step": 2000
},
{
"epoch": 0.65,
"learning_rate": 1.25e-05,
"loss": 6.3797,
"step": 2500
},
{
"epoch": 0.78,
"learning_rate": 1.5e-05,
"loss": 6.269,
"step": 3000
},
{
"epoch": 0.91,
"learning_rate": 1.75e-05,
"loss": 6.1795,
"step": 3500
},
{
"epoch": 1.04,
"learning_rate": 2e-05,
"loss": 6.1091,
"step": 4000
},
{
"epoch": 1.18,
"learning_rate": 2.25e-05,
"loss": 6.0508,
"step": 4500
},
{
"epoch": 1.31,
"learning_rate": 2.5e-05,
"loss": 5.997,
"step": 5000
},
{
"epoch": 1.44,
"learning_rate": 2.7500000000000004e-05,
"loss": 5.9521,
"step": 5500
},
{
"epoch": 1.57,
"learning_rate": 3e-05,
"loss": 5.9117,
"step": 6000
},
{
"epoch": 1.7,
"learning_rate": 3.2500000000000004e-05,
"loss": 5.8794,
"step": 6500
},
{
"epoch": 1.83,
"learning_rate": 3.5e-05,
"loss": 5.8467,
"step": 7000
},
{
"epoch": 1.96,
"learning_rate": 3.7500000000000003e-05,
"loss": 5.8184,
"step": 7500
},
{
"epoch": 2.09,
"learning_rate": 4e-05,
"loss": 5.792,
"step": 8000
},
{
"epoch": 2.22,
"learning_rate": 4.2495e-05,
"loss": 5.7694,
"step": 8500
},
{
"epoch": 2.35,
"learning_rate": 4.4995000000000005e-05,
"loss": 5.7477,
"step": 9000
},
{
"epoch": 2.48,
"learning_rate": 4.7495e-05,
"loss": 5.7314,
"step": 9500
},
{
"epoch": 2.61,
"learning_rate": 4.9995000000000005e-05,
"loss": 5.7146,
"step": 10000
},
{
"epoch": 2.74,
"learning_rate": 4.998363134367605e-05,
"loss": 5.6981,
"step": 10500
},
{
"epoch": 2.87,
"learning_rate": 4.996719694977649e-05,
"loss": 5.6829,
"step": 11000
},
{
"epoch": 3.0,
"learning_rate": 4.995076255587694e-05,
"loss": 5.6715,
"step": 11500
},
{
"epoch": 3.13,
"learning_rate": 4.993432816197739e-05,
"loss": 5.6573,
"step": 12000
},
{
"epoch": 3.27,
"learning_rate": 4.9917893768077834e-05,
"loss": 5.6461,
"step": 12500
},
{
"epoch": 3.4,
"learning_rate": 4.990149224296608e-05,
"loss": 5.6392,
"step": 13000
},
{
"epoch": 3.53,
"learning_rate": 4.988505784906653e-05,
"loss": 5.6298,
"step": 13500
},
{
"epoch": 3.66,
"learning_rate": 4.986862345516697e-05,
"loss": 5.6171,
"step": 14000
},
{
"epoch": 3.79,
"learning_rate": 4.9852189061267425e-05,
"loss": 5.6121,
"step": 14500
},
{
"epoch": 3.92,
"learning_rate": 4.983578753615567e-05,
"loss": 5.6068,
"step": 15000
},
{
"epoch": 4.05,
"learning_rate": 4.981935314225611e-05,
"loss": 5.5992,
"step": 15500
},
{
"epoch": 4.18,
"learning_rate": 4.980291874835656e-05,
"loss": 5.5909,
"step": 16000
},
{
"epoch": 4.31,
"learning_rate": 4.978648435445701e-05,
"loss": 5.5844,
"step": 16500
},
{
"epoch": 4.44,
"learning_rate": 4.9770082829345257e-05,
"loss": 5.5779,
"step": 17000
},
{
"epoch": 4.57,
"learning_rate": 4.97536484354457e-05,
"loss": 5.5732,
"step": 17500
},
{
"epoch": 4.7,
"learning_rate": 4.9737214041546147e-05,
"loss": 5.5661,
"step": 18000
},
{
"epoch": 4.83,
"learning_rate": 4.97207796476466e-05,
"loss": 5.5631,
"step": 18500
},
{
"epoch": 4.96,
"learning_rate": 4.970437812253485e-05,
"loss": 5.5598,
"step": 19000
},
{
"epoch": 5.09,
"learning_rate": 4.968794372863529e-05,
"loss": 5.5494,
"step": 19500
},
{
"epoch": 5.22,
"learning_rate": 4.967150933473574e-05,
"loss": 5.5472,
"step": 20000
},
{
"epoch": 5.36,
"learning_rate": 4.965507494083618e-05,
"loss": 5.5456,
"step": 20500
},
{
"epoch": 5.49,
"learning_rate": 4.963867341572443e-05,
"loss": 5.54,
"step": 21000
},
{
"epoch": 5.62,
"learning_rate": 4.962223902182488e-05,
"loss": 5.5374,
"step": 21500
},
{
"epoch": 5.75,
"learning_rate": 4.960580462792532e-05,
"loss": 5.5322,
"step": 22000
},
{
"epoch": 5.88,
"learning_rate": 4.958937023402577e-05,
"loss": 5.5285,
"step": 22500
},
{
"epoch": 6.01,
"learning_rate": 4.957296870891402e-05,
"loss": 5.5252,
"step": 23000
},
{
"epoch": 6.14,
"learning_rate": 4.9556534315014465e-05,
"loss": 5.5193,
"step": 23500
},
{
"epoch": 6.27,
"learning_rate": 4.954009992111491e-05,
"loss": 5.5185,
"step": 24000
},
{
"epoch": 6.4,
"learning_rate": 4.9523665527215355e-05,
"loss": 5.5127,
"step": 24500
},
{
"epoch": 6.53,
"learning_rate": 4.950723113331581e-05,
"loss": 5.5134,
"step": 25000
},
{
"epoch": 6.66,
"learning_rate": 4.9490829608204056e-05,
"loss": 5.5089,
"step": 25500
},
{
"epoch": 6.79,
"learning_rate": 4.94744280830923e-05,
"loss": 5.5095,
"step": 26000
},
{
"epoch": 6.92,
"learning_rate": 4.945799368919274e-05,
"loss": 5.503,
"step": 26500
},
{
"epoch": 7.05,
"learning_rate": 4.9441559295293194e-05,
"loss": 5.5002,
"step": 27000
},
{
"epoch": 7.18,
"learning_rate": 4.942512490139364e-05,
"loss": 5.4949,
"step": 27500
},
{
"epoch": 7.31,
"learning_rate": 4.940869050749409e-05,
"loss": 5.4935,
"step": 28000
},
{
"epoch": 7.45,
"learning_rate": 4.939225611359453e-05,
"loss": 5.4899,
"step": 28500
},
{
"epoch": 7.58,
"learning_rate": 4.937585458848278e-05,
"loss": 5.4846,
"step": 29000
},
{
"epoch": 7.71,
"learning_rate": 4.935942019458323e-05,
"loss": 5.4852,
"step": 29500
},
{
"epoch": 7.84,
"learning_rate": 4.9342985800683674e-05,
"loss": 5.4813,
"step": 30000
},
{
"epoch": 7.97,
"learning_rate": 4.932655140678412e-05,
"loss": 5.4776,
"step": 30500
},
{
"epoch": 8.1,
"learning_rate": 4.9310117012884564e-05,
"loss": 5.4747,
"step": 31000
},
{
"epoch": 8.23,
"learning_rate": 4.9293682618985016e-05,
"loss": 5.4725,
"step": 31500
},
{
"epoch": 8.36,
"learning_rate": 4.9277281093873265e-05,
"loss": 5.4728,
"step": 32000
},
{
"epoch": 8.49,
"learning_rate": 4.92608466999737e-05,
"loss": 5.4715,
"step": 32500
},
{
"epoch": 8.62,
"learning_rate": 4.9244412306074155e-05,
"loss": 5.3441,
"step": 33000
},
{
"epoch": 8.75,
"learning_rate": 4.92279779121746e-05,
"loss": 5.1326,
"step": 33500
},
{
"epoch": 8.88,
"learning_rate": 4.921154351827505e-05,
"loss": 4.9556,
"step": 34000
},
{
"epoch": 9.01,
"learning_rate": 4.9195109124375496e-05,
"loss": 4.7853,
"step": 34500
},
{
"epoch": 9.14,
"learning_rate": 4.917867473047594e-05,
"loss": 4.6104,
"step": 35000
},
{
"epoch": 9.27,
"learning_rate": 4.9162240336576386e-05,
"loss": 4.445,
"step": 35500
},
{
"epoch": 9.4,
"learning_rate": 4.914580594267684e-05,
"loss": 4.2896,
"step": 36000
},
{
"epoch": 9.54,
"learning_rate": 4.9129404417565087e-05,
"loss": 4.1327,
"step": 36500
},
{
"epoch": 9.67,
"learning_rate": 4.9112970023665525e-05,
"loss": 3.9728,
"step": 37000
},
{
"epoch": 9.8,
"learning_rate": 4.9096535629765976e-05,
"loss": 3.7422,
"step": 37500
},
{
"epoch": 9.93,
"learning_rate": 4.908010123586642e-05,
"loss": 3.2578,
"step": 38000
},
{
"epoch": 10.06,
"learning_rate": 4.906369971075467e-05,
"loss": 2.5751,
"step": 38500
},
{
"epoch": 10.19,
"learning_rate": 4.904729818564291e-05,
"loss": 2.2565,
"step": 39000
},
{
"epoch": 10.32,
"learning_rate": 4.9030863791743363e-05,
"loss": 2.0889,
"step": 39500
},
{
"epoch": 10.45,
"learning_rate": 4.901442939784381e-05,
"loss": 1.9841,
"step": 40000
},
{
"epoch": 10.58,
"learning_rate": 4.899799500394426e-05,
"loss": 1.9043,
"step": 40500
},
{
"epoch": 10.71,
"learning_rate": 4.89815934788325e-05,
"loss": 1.8385,
"step": 41000
},
{
"epoch": 10.84,
"learning_rate": 4.896515908493295e-05,
"loss": 1.7863,
"step": 41500
},
{
"epoch": 10.97,
"learning_rate": 4.89487246910334e-05,
"loss": 1.7441,
"step": 42000
},
{
"epoch": 11.1,
"learning_rate": 4.8932290297133844e-05,
"loss": 1.7056,
"step": 42500
},
{
"epoch": 11.23,
"learning_rate": 4.891592164080989e-05,
"loss": 1.6683,
"step": 43000
},
{
"epoch": 11.36,
"learning_rate": 4.8899487246910334e-05,
"loss": 1.636,
"step": 43500
},
{
"epoch": 11.49,
"learning_rate": 4.8883052853010786e-05,
"loss": 1.6046,
"step": 44000
},
{
"epoch": 11.62,
"learning_rate": 4.886661845911123e-05,
"loss": 1.5762,
"step": 44500
},
{
"epoch": 11.76,
"learning_rate": 4.885018406521168e-05,
"loss": 1.548,
"step": 45000
},
{
"epoch": 11.89,
"learning_rate": 4.883374967131212e-05,
"loss": 1.5272,
"step": 45500
},
{
"epoch": 12.02,
"learning_rate": 4.881731527741257e-05,
"loss": 1.5034,
"step": 46000
},
{
"epoch": 12.15,
"learning_rate": 4.880091375230082e-05,
"loss": 1.4815,
"step": 46500
},
{
"epoch": 12.28,
"learning_rate": 4.8784479358401266e-05,
"loss": 1.4652,
"step": 47000
},
{
"epoch": 12.41,
"learning_rate": 4.876804496450171e-05,
"loss": 1.4483,
"step": 47500
},
{
"epoch": 12.54,
"learning_rate": 4.8751610570602156e-05,
"loss": 1.4294,
"step": 48000
},
{
"epoch": 12.67,
"learning_rate": 4.873517617670261e-05,
"loss": 1.4124,
"step": 48500
},
{
"epoch": 12.8,
"learning_rate": 4.871874178280305e-05,
"loss": 1.3963,
"step": 49000
},
{
"epoch": 12.93,
"learning_rate": 4.87023073889035e-05,
"loss": 1.3831,
"step": 49500
},
{
"epoch": 13.06,
"learning_rate": 4.8685905863791746e-05,
"loss": 1.3693,
"step": 50000
},
{
"epoch": 13.19,
"learning_rate": 4.866947146989219e-05,
"loss": 1.3559,
"step": 50500
},
{
"epoch": 13.32,
"learning_rate": 4.865303707599264e-05,
"loss": 1.3435,
"step": 51000
},
{
"epoch": 13.45,
"learning_rate": 4.863660268209309e-05,
"loss": 1.3346,
"step": 51500
},
{
"epoch": 13.58,
"learning_rate": 4.862016828819353e-05,
"loss": 1.3212,
"step": 52000
},
{
"epoch": 13.71,
"learning_rate": 4.860373389429398e-05,
"loss": 1.3113,
"step": 52500
},
{
"epoch": 13.85,
"learning_rate": 4.8587332369182226e-05,
"loss": 1.2998,
"step": 53000
},
{
"epoch": 13.98,
"learning_rate": 4.857089797528268e-05,
"loss": 1.2916,
"step": 53500
},
{
"epoch": 14.11,
"learning_rate": 4.8554463581383116e-05,
"loss": 1.2814,
"step": 54000
},
{
"epoch": 14.24,
"learning_rate": 4.853802918748357e-05,
"loss": 1.2705,
"step": 54500
},
{
"epoch": 14.37,
"learning_rate": 4.852159479358401e-05,
"loss": 1.2614,
"step": 55000
},
{
"epoch": 14.5,
"learning_rate": 4.8505160399684465e-05,
"loss": 1.2524,
"step": 55500
},
{
"epoch": 14.63,
"learning_rate": 4.8488758874572706e-05,
"loss": 1.2443,
"step": 56000
},
{
"epoch": 14.76,
"learning_rate": 4.847232448067315e-05,
"loss": 1.2362,
"step": 56500
},
{
"epoch": 14.89,
"learning_rate": 4.84558900867736e-05,
"loss": 1.2292,
"step": 57000
},
{
"epoch": 15.02,
"learning_rate": 4.843945569287405e-05,
"loss": 1.2209,
"step": 57500
},
{
"epoch": 15.15,
"learning_rate": 4.84230212989745e-05,
"loss": 1.2124,
"step": 58000
},
{
"epoch": 15.28,
"learning_rate": 4.840658690507494e-05,
"loss": 1.2048,
"step": 58500
},
{
"epoch": 15.41,
"learning_rate": 4.839018537996319e-05,
"loss": 1.1975,
"step": 59000
},
{
"epoch": 15.54,
"learning_rate": 4.837375098606364e-05,
"loss": 1.1905,
"step": 59500
},
{
"epoch": 15.67,
"learning_rate": 4.835731659216408e-05,
"loss": 1.1845,
"step": 60000
},
{
"epoch": 15.8,
"learning_rate": 4.834088219826453e-05,
"loss": 1.1797,
"step": 60500
},
{
"epoch": 15.94,
"learning_rate": 4.832444780436497e-05,
"loss": 1.1723,
"step": 61000
},
{
"epoch": 16.07,
"learning_rate": 4.830804627925322e-05,
"loss": 1.1661,
"step": 61500
},
{
"epoch": 16.2,
"learning_rate": 4.8291611885353674e-05,
"loss": 1.1585,
"step": 62000
},
{
"epoch": 16.33,
"learning_rate": 4.827517749145412e-05,
"loss": 1.1523,
"step": 62500
},
{
"epoch": 16.46,
"learning_rate": 4.8258743097554564e-05,
"loss": 1.15,
"step": 63000
},
{
"epoch": 16.59,
"learning_rate": 4.824230870365501e-05,
"loss": 1.141,
"step": 63500
},
{
"epoch": 16.72,
"learning_rate": 4.822587430975546e-05,
"loss": 1.1383,
"step": 64000
},
{
"epoch": 16.85,
"learning_rate": 4.8209439915855905e-05,
"loss": 1.1336,
"step": 64500
},
{
"epoch": 16.98,
"learning_rate": 4.819303839074415e-05,
"loss": 1.129,
"step": 65000
},
{
"epoch": 17.11,
"learning_rate": 4.81766039968446e-05,
"loss": 1.1204,
"step": 65500
},
{
"epoch": 17.24,
"learning_rate": 4.8160169602945044e-05,
"loss": 1.1149,
"step": 66000
},
{
"epoch": 17.37,
"learning_rate": 4.8143735209045495e-05,
"loss": 1.1134,
"step": 66500
},
{
"epoch": 17.5,
"learning_rate": 4.8127300815145934e-05,
"loss": 1.1095,
"step": 67000
},
{
"epoch": 17.63,
"learning_rate": 4.811089929003418e-05,
"loss": 1.1029,
"step": 67500
},
{
"epoch": 17.76,
"learning_rate": 4.8094464896134634e-05,
"loss": 1.0995,
"step": 68000
},
{
"epoch": 17.89,
"learning_rate": 4.807803050223508e-05,
"loss": 1.0941,
"step": 68500
},
{
"epoch": 18.03,
"learning_rate": 4.8061596108335524e-05,
"loss": 1.0912,
"step": 69000
},
{
"epoch": 18.16,
"learning_rate": 4.804519458322377e-05,
"loss": 1.0866,
"step": 69500
},
{
"epoch": 18.29,
"learning_rate": 4.802876018932422e-05,
"loss": 1.0812,
"step": 70000
},
{
"epoch": 18.42,
"learning_rate": 4.801232579542467e-05,
"loss": 1.0769,
"step": 70500
},
{
"epoch": 18.55,
"learning_rate": 4.7995891401525114e-05,
"loss": 1.0731,
"step": 71000
},
{
"epoch": 18.68,
"learning_rate": 4.797945700762556e-05,
"loss": 1.0704,
"step": 71500
},
{
"epoch": 18.81,
"learning_rate": 4.796302261372601e-05,
"loss": 1.0654,
"step": 72000
},
{
"epoch": 18.94,
"learning_rate": 4.7946588219826456e-05,
"loss": 1.0629,
"step": 72500
},
{
"epoch": 19.07,
"learning_rate": 4.7930186694714704e-05,
"loss": 1.0585,
"step": 73000
},
{
"epoch": 19.2,
"learning_rate": 4.791375230081514e-05,
"loss": 1.0539,
"step": 73500
},
{
"epoch": 19.33,
"learning_rate": 4.7897317906915594e-05,
"loss": 1.0529,
"step": 74000
},
{
"epoch": 19.46,
"learning_rate": 4.7880883513016046e-05,
"loss": 1.0509,
"step": 74500
},
{
"epoch": 19.59,
"learning_rate": 4.786448198790429e-05,
"loss": 1.0462,
"step": 75000
},
{
"epoch": 19.72,
"learning_rate": 4.784804759400473e-05,
"loss": 1.0416,
"step": 75500
},
{
"epoch": 19.85,
"learning_rate": 4.783161320010518e-05,
"loss": 1.0387,
"step": 76000
},
{
"epoch": 19.98,
"learning_rate": 4.781517880620563e-05,
"loss": 1.0356,
"step": 76500
},
{
"epoch": 20.11,
"learning_rate": 4.779874441230608e-05,
"loss": 1.0326,
"step": 77000
},
{
"epoch": 20.25,
"learning_rate": 4.7782310018406526e-05,
"loss": 1.0282,
"step": 77500
},
{
"epoch": 20.38,
"learning_rate": 4.776587562450697e-05,
"loss": 1.0276,
"step": 78000
},
{
"epoch": 20.51,
"learning_rate": 4.774947409939521e-05,
"loss": 1.0227,
"step": 78500
},
{
"epoch": 20.64,
"learning_rate": 4.773307257428346e-05,
"loss": 1.0211,
"step": 79000
},
{
"epoch": 20.77,
"learning_rate": 4.771663818038391e-05,
"loss": 1.0184,
"step": 79500
},
{
"epoch": 20.9,
"learning_rate": 4.770020378648435e-05,
"loss": 1.0152,
"step": 80000
}
],
"max_steps": 1531200,
"num_train_epochs": 400,
"total_flos": 2.1665978964733067e+19,
"trial_name": null,
"trial_params": null
}