|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_accuracy": 0.7505882352941177, |
|
"eval_f1": 0.12396694214876033, |
|
"eval_loss": 0.6111783981323242, |
|
"eval_precision": 0.36585365853658536, |
|
"eval_recall": 0.07462686567164178, |
|
"eval_runtime": 34.3787, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 0.204, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.0026595744680851063, |
|
"grad_norm": 3.310136556625366, |
|
"learning_rate": 5.263157894736843e-07, |
|
"loss": 0.6542, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005319148936170213, |
|
"grad_norm": 2.5591301918029785, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.6609, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007978723404255319, |
|
"grad_norm": 2.7341604232788086, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.6752, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010638297872340425, |
|
"grad_norm": 2.8091554641723633, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.618, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013297872340425532, |
|
"grad_norm": 2.5653722286224365, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.6752, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015957446808510637, |
|
"grad_norm": 3.4398417472839355, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.6783, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.018617021276595744, |
|
"grad_norm": 2.5178332328796387, |
|
"learning_rate": 3.6842105263157896e-06, |
|
"loss": 0.631, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02127659574468085, |
|
"grad_norm": 2.8207452297210693, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.6361, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023936170212765957, |
|
"grad_norm": 2.5525949001312256, |
|
"learning_rate": 4.736842105263158e-06, |
|
"loss": 0.6088, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.026595744680851064, |
|
"grad_norm": 2.415248155593872, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.6556, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02925531914893617, |
|
"grad_norm": 3.0247888565063477, |
|
"learning_rate": 5.789473684210527e-06, |
|
"loss": 0.6039, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.031914893617021274, |
|
"grad_norm": 2.5060417652130127, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.6012, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.034574468085106384, |
|
"grad_norm": 2.770636558532715, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.6775, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03723404255319149, |
|
"grad_norm": 2.582097291946411, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 0.5993, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0398936170212766, |
|
"grad_norm": 2.5408666133880615, |
|
"learning_rate": 7.894736842105265e-06, |
|
"loss": 0.596, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 2.478731870651245, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.5919, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04521276595744681, |
|
"grad_norm": 2.5440561771392822, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 0.5223, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.047872340425531915, |
|
"grad_norm": 2.1620945930480957, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.5412, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05053191489361702, |
|
"grad_norm": 2.2398860454559326, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5847, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05319148936170213, |
|
"grad_norm": 2.1689343452453613, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.5265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05319148936170213, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_f1": 0.05660377358490566, |
|
"eval_loss": 0.6081312894821167, |
|
"eval_precision": 0.5454545454545454, |
|
"eval_recall": 0.029850746268656716, |
|
"eval_runtime": 34.9953, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 0.2, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05585106382978723, |
|
"grad_norm": 1.8998128175735474, |
|
"learning_rate": 1.105263157894737e-05, |
|
"loss": 0.5347, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05851063829787234, |
|
"grad_norm": 2.079780340194702, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 0.4814, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.061170212765957445, |
|
"grad_norm": 1.8792980909347534, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 0.5084, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"grad_norm": 1.9132519960403442, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 0.5027, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06648936170212766, |
|
"grad_norm": 1.3962018489837646, |
|
"learning_rate": 1.3157894736842108e-05, |
|
"loss": 0.5136, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06914893617021277, |
|
"grad_norm": 1.4877433776855469, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 0.4567, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07180851063829788, |
|
"grad_norm": 1.5485683679580688, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 0.4365, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07446808510638298, |
|
"grad_norm": 1.164844036102295, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 0.4142, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07712765957446809, |
|
"grad_norm": 1.354490876197815, |
|
"learning_rate": 1.5263157894736846e-05, |
|
"loss": 0.4492, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0797872340425532, |
|
"grad_norm": 1.067051649093628, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 0.4294, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08244680851063829, |
|
"grad_norm": 1.3097209930419922, |
|
"learning_rate": 1.6315789473684213e-05, |
|
"loss": 0.452, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.9226462244987488, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 0.3848, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08776595744680851, |
|
"grad_norm": 1.1755656003952026, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 0.4307, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09042553191489362, |
|
"grad_norm": 1.2210921049118042, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 0.4232, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09308510638297872, |
|
"grad_norm": 0.9078745245933533, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.3752, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09574468085106383, |
|
"grad_norm": 0.936310350894928, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 0.3655, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09840425531914894, |
|
"grad_norm": 1.5738509893417358, |
|
"learning_rate": 1.9473684210526318e-05, |
|
"loss": 0.4547, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10106382978723404, |
|
"grad_norm": 1.1838228702545166, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4347, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10372340425531915, |
|
"grad_norm": 1.4948188066482544, |
|
"learning_rate": 1.9999568050254373e-05, |
|
"loss": 0.4135, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"grad_norm": 1.1098586320877075, |
|
"learning_rate": 1.9998272238333606e-05, |
|
"loss": 0.4127, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"eval_accuracy": 0.768235294117647, |
|
"eval_f1": 0.06635071090047394, |
|
"eval_loss": 0.5411638021469116, |
|
"eval_precision": 0.7, |
|
"eval_recall": 0.03482587064676617, |
|
"eval_runtime": 34.6313, |
|
"eval_samples_per_second": 6.468, |
|
"eval_steps_per_second": 0.202, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10904255319148937, |
|
"grad_norm": 0.779417872428894, |
|
"learning_rate": 1.999611267618283e-05, |
|
"loss": 0.3893, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11170212765957446, |
|
"grad_norm": 1.021106481552124, |
|
"learning_rate": 1.99930895503665e-05, |
|
"loss": 0.3104, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11436170212765957, |
|
"grad_norm": 1.174867868423462, |
|
"learning_rate": 1.998920312205231e-05, |
|
"loss": 0.4124, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11702127659574468, |
|
"grad_norm": 0.8697633743286133, |
|
"learning_rate": 1.99844537269886e-05, |
|
"loss": 0.3785, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1196808510638298, |
|
"grad_norm": 0.9835452437400818, |
|
"learning_rate": 1.9978841775475368e-05, |
|
"loss": 0.4014, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12234042553191489, |
|
"grad_norm": 1.1810511350631714, |
|
"learning_rate": 1.9972367752328824e-05, |
|
"loss": 0.3518, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.9265549778938293, |
|
"learning_rate": 1.9965032216839493e-05, |
|
"loss": 0.4024, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 1.186259150505066, |
|
"learning_rate": 1.9956835802723916e-05, |
|
"loss": 0.3599, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13031914893617022, |
|
"grad_norm": 1.2196171283721924, |
|
"learning_rate": 1.994777921806989e-05, |
|
"loss": 0.3411, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13297872340425532, |
|
"grad_norm": 1.1862437725067139, |
|
"learning_rate": 1.9937863245275303e-05, |
|
"loss": 0.362, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1356382978723404, |
|
"grad_norm": 1.319501280784607, |
|
"learning_rate": 1.992708874098054e-05, |
|
"loss": 0.4189, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13829787234042554, |
|
"grad_norm": 0.9766789674758911, |
|
"learning_rate": 1.991545663599448e-05, |
|
"loss": 0.358, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14095744680851063, |
|
"grad_norm": 1.0482966899871826, |
|
"learning_rate": 1.990296793521408e-05, |
|
"loss": 0.3736, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14361702127659576, |
|
"grad_norm": 0.8634902834892273, |
|
"learning_rate": 1.9889623717537564e-05, |
|
"loss": 0.3582, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14627659574468085, |
|
"grad_norm": 0.9416165947914124, |
|
"learning_rate": 1.987542513577122e-05, |
|
"loss": 0.3495, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14893617021276595, |
|
"grad_norm": 0.9823614358901978, |
|
"learning_rate": 1.9860373416529804e-05, |
|
"loss": 0.3446, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15159574468085107, |
|
"grad_norm": 0.8403105139732361, |
|
"learning_rate": 1.984446986013057e-05, |
|
"loss": 0.3177, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.15425531914893617, |
|
"grad_norm": 1.0707823038101196, |
|
"learning_rate": 1.9827715840480962e-05, |
|
"loss": 0.323, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15691489361702127, |
|
"grad_norm": 0.933045506477356, |
|
"learning_rate": 1.9810112804959867e-05, |
|
"loss": 0.3123, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1595744680851064, |
|
"grad_norm": 0.9361464977264404, |
|
"learning_rate": 1.9791662274292638e-05, |
|
"loss": 0.3347, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1595744680851064, |
|
"eval_accuracy": 0.7741176470588236, |
|
"eval_f1": 0.12727272727272726, |
|
"eval_loss": 0.5019634962081909, |
|
"eval_precision": 0.7368421052631579, |
|
"eval_recall": 0.06965174129353234, |
|
"eval_runtime": 34.7325, |
|
"eval_samples_per_second": 6.449, |
|
"eval_steps_per_second": 0.202, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1622340425531915, |
|
"grad_norm": 0.8992587327957153, |
|
"learning_rate": 1.977236584241968e-05, |
|
"loss": 0.3457, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16489361702127658, |
|
"grad_norm": 1.282809853553772, |
|
"learning_rate": 1.9752225176358757e-05, |
|
"loss": 0.3226, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1675531914893617, |
|
"grad_norm": 2.4324252605438232, |
|
"learning_rate": 1.9731242016060985e-05, |
|
"loss": 0.4227, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 1.0456701517105103, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"loss": 0.3102, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17287234042553193, |
|
"grad_norm": 1.2882471084594727, |
|
"learning_rate": 1.9686755536317945e-05, |
|
"loss": 0.3145, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.17553191489361702, |
|
"grad_norm": 1.1312603950500488, |
|
"learning_rate": 1.9663256060057395e-05, |
|
"loss": 0.3353, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17819148936170212, |
|
"grad_norm": 1.0174272060394287, |
|
"learning_rate": 1.9638921775597428e-05, |
|
"loss": 0.2845, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.18085106382978725, |
|
"grad_norm": 1.241572380065918, |
|
"learning_rate": 1.961375478517564e-05, |
|
"loss": 0.3015, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18351063829787234, |
|
"grad_norm": 1.3726611137390137, |
|
"learning_rate": 1.958775726296706e-05, |
|
"loss": 0.3671, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.18617021276595744, |
|
"grad_norm": 1.2311499118804932, |
|
"learning_rate": 1.95609314548963e-05, |
|
"loss": 0.2902, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18882978723404256, |
|
"grad_norm": 1.3199646472930908, |
|
"learning_rate": 1.953327967844356e-05, |
|
"loss": 0.3594, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"grad_norm": 1.6513502597808838, |
|
"learning_rate": 1.95048043224444e-05, |
|
"loss": 0.2831, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19414893617021275, |
|
"grad_norm": 1.763235330581665, |
|
"learning_rate": 1.9475507846883377e-05, |
|
"loss": 0.3675, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19680851063829788, |
|
"grad_norm": 1.8195736408233643, |
|
"learning_rate": 1.9445392782681523e-05, |
|
"loss": 0.398, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19946808510638298, |
|
"grad_norm": 1.9659175872802734, |
|
"learning_rate": 1.94144617314777e-05, |
|
"loss": 0.353, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.20212765957446807, |
|
"grad_norm": 1.60419762134552, |
|
"learning_rate": 1.9382717365403854e-05, |
|
"loss": 0.3565, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2047872340425532, |
|
"grad_norm": 1.5443696975708008, |
|
"learning_rate": 1.9350162426854152e-05, |
|
"loss": 0.3246, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2074468085106383, |
|
"grad_norm": 1.8536072969436646, |
|
"learning_rate": 1.9316799728248074e-05, |
|
"loss": 0.3491, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.21010638297872342, |
|
"grad_norm": 2.2563788890838623, |
|
"learning_rate": 1.9282632151787462e-05, |
|
"loss": 0.4211, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 1.3425776958465576, |
|
"learning_rate": 1.924766264920751e-05, |
|
"loss": 0.3077, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"eval_accuracy": 0.7964705882352942, |
|
"eval_f1": 0.3663003663003663, |
|
"eval_loss": 0.4462856650352478, |
|
"eval_precision": 0.6944444444444444, |
|
"eval_recall": 0.24875621890547264, |
|
"eval_runtime": 34.8097, |
|
"eval_samples_per_second": 6.435, |
|
"eval_steps_per_second": 0.201, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2154255319148936, |
|
"grad_norm": 2.540194272994995, |
|
"learning_rate": 1.9211894241521757e-05, |
|
"loss": 0.3127, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.21808510638297873, |
|
"grad_norm": 1.8769720792770386, |
|
"learning_rate": 1.917533001876113e-05, |
|
"loss": 0.2998, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.22074468085106383, |
|
"grad_norm": 1.4883919954299927, |
|
"learning_rate": 1.9137973139706973e-05, |
|
"loss": 0.3245, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.22340425531914893, |
|
"grad_norm": 1.6703698635101318, |
|
"learning_rate": 1.9099826831618168e-05, |
|
"loss": 0.3199, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.22606382978723405, |
|
"grad_norm": 2.8918988704681396, |
|
"learning_rate": 1.9060894389952328e-05, |
|
"loss": 0.2825, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22872340425531915, |
|
"grad_norm": 1.5494073629379272, |
|
"learning_rate": 1.9021179178081107e-05, |
|
"loss": 0.3213, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.23138297872340424, |
|
"grad_norm": 1.331063151359558, |
|
"learning_rate": 1.898068462699964e-05, |
|
"loss": 0.2572, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.23404255319148937, |
|
"grad_norm": 1.5478427410125732, |
|
"learning_rate": 1.8939414235030137e-05, |
|
"loss": 0.3001, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.23670212765957446, |
|
"grad_norm": 2.469545602798462, |
|
"learning_rate": 1.889737156751965e-05, |
|
"loss": 0.3199, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2393617021276596, |
|
"grad_norm": 2.134981155395508, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"loss": 0.3192, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24202127659574468, |
|
"grad_norm": 1.727616548538208, |
|
"learning_rate": 1.8810984000534457e-05, |
|
"loss": 0.3072, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.24468085106382978, |
|
"grad_norm": 2.0483591556549072, |
|
"learning_rate": 1.8766646564077265e-05, |
|
"loss": 0.297, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2473404255319149, |
|
"grad_norm": 1.8018875122070312, |
|
"learning_rate": 1.8721551777469397e-05, |
|
"loss": 0.2798, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.7120018005371094, |
|
"learning_rate": 1.8675703536447178e-05, |
|
"loss": 0.2438, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2526595744680851, |
|
"grad_norm": 1.8456470966339111, |
|
"learning_rate": 1.862910580183782e-05, |
|
"loss": 0.333, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 2.701077461242676, |
|
"learning_rate": 1.858176259921724e-05, |
|
"loss": 0.3214, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2579787234042553, |
|
"grad_norm": 1.6109999418258667, |
|
"learning_rate": 1.853367801856231e-05, |
|
"loss": 0.2701, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.26063829787234044, |
|
"grad_norm": 1.524688482284546, |
|
"learning_rate": 1.8484856213897496e-05, |
|
"loss": 0.2455, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2632978723404255, |
|
"grad_norm": 1.8650296926498413, |
|
"learning_rate": 1.843530140293603e-05, |
|
"loss": 0.273, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"grad_norm": 1.5295664072036743, |
|
"learning_rate": 1.8385017866715507e-05, |
|
"loss": 0.307, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_accuracy": 0.8, |
|
"eval_f1": 0.4097222222222222, |
|
"eval_loss": 0.44977959990501404, |
|
"eval_precision": 0.6781609195402298, |
|
"eval_recall": 0.2935323383084577, |
|
"eval_runtime": 34.7061, |
|
"eval_samples_per_second": 6.454, |
|
"eval_steps_per_second": 0.202, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26861702127659576, |
|
"grad_norm": 2.1255381107330322, |
|
"learning_rate": 1.833400994922806e-05, |
|
"loss": 0.2532, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2712765957446808, |
|
"grad_norm": 2.4879391193389893, |
|
"learning_rate": 1.8282282057045087e-05, |
|
"loss": 0.3593, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.27393617021276595, |
|
"grad_norm": 2.0561375617980957, |
|
"learning_rate": 1.8229838658936566e-05, |
|
"loss": 0.266, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2765957446808511, |
|
"grad_norm": 2.101980447769165, |
|
"learning_rate": 1.8176684285484985e-05, |
|
"loss": 0.3686, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.27925531914893614, |
|
"grad_norm": 2.0041894912719727, |
|
"learning_rate": 1.8122823528693966e-05, |
|
"loss": 0.2551, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.28191489361702127, |
|
"grad_norm": 1.981961727142334, |
|
"learning_rate": 1.8068261041591548e-05, |
|
"loss": 0.2932, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2845744680851064, |
|
"grad_norm": 2.636021614074707, |
|
"learning_rate": 1.8013001537828213e-05, |
|
"loss": 0.2584, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2872340425531915, |
|
"grad_norm": 2.6354217529296875, |
|
"learning_rate": 1.7957049791269684e-05, |
|
"loss": 0.3208, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2898936170212766, |
|
"grad_norm": 3.6334121227264404, |
|
"learning_rate": 1.79004106355845e-05, |
|
"loss": 0.3142, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2925531914893617, |
|
"grad_norm": 2.6944894790649414, |
|
"learning_rate": 1.7843088963826437e-05, |
|
"loss": 0.2854, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29521276595744683, |
|
"grad_norm": 4.576889514923096, |
|
"learning_rate": 1.7785089728011798e-05, |
|
"loss": 0.2685, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 2.23494029045105, |
|
"learning_rate": 1.772641793869162e-05, |
|
"loss": 0.2604, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.300531914893617, |
|
"grad_norm": 3.0733425617218018, |
|
"learning_rate": 1.7667078664518796e-05, |
|
"loss": 0.2542, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.30319148936170215, |
|
"grad_norm": 1.9046289920806885, |
|
"learning_rate": 1.7607077031810204e-05, |
|
"loss": 0.2879, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3058510638297872, |
|
"grad_norm": 2.2374041080474854, |
|
"learning_rate": 1.7546418224103838e-05, |
|
"loss": 0.2998, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.30851063829787234, |
|
"grad_norm": 5.9824395179748535, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 0.3637, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.31117021276595747, |
|
"grad_norm": 3.0998919010162354, |
|
"learning_rate": 1.7423150101263645e-05, |
|
"loss": 0.2746, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.31382978723404253, |
|
"grad_norm": 2.05523419380188, |
|
"learning_rate": 1.7360551435256673e-05, |
|
"loss": 0.2776, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.31648936170212766, |
|
"grad_norm": 2.1908273696899414, |
|
"learning_rate": 1.729731689158568e-05, |
|
"loss": 0.3184, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 2.3177342414855957, |
|
"learning_rate": 1.7233451933079663e-05, |
|
"loss": 0.2413, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"eval_accuracy": 0.8141176470588235, |
|
"eval_f1": 0.48026315789473684, |
|
"eval_loss": 0.43155437707901, |
|
"eval_precision": 0.7087378640776699, |
|
"eval_recall": 0.36318407960199006, |
|
"eval_runtime": 34.0012, |
|
"eval_samples_per_second": 6.588, |
|
"eval_steps_per_second": 0.206, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32180851063829785, |
|
"grad_norm": 2.1571784019470215, |
|
"learning_rate": 1.7168962077029146e-05, |
|
"loss": 0.3229, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.324468085106383, |
|
"grad_norm": 3.056910991668701, |
|
"learning_rate": 1.7103852894709517e-05, |
|
"loss": 0.3116, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3271276595744681, |
|
"grad_norm": 1.9665093421936035, |
|
"learning_rate": 1.7038130010899716e-05, |
|
"loss": 0.2743, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.32978723404255317, |
|
"grad_norm": 2.3583879470825195, |
|
"learning_rate": 1.6971799103396332e-05, |
|
"loss": 0.2776, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3324468085106383, |
|
"grad_norm": 2.8476576805114746, |
|
"learning_rate": 1.6904865902523098e-05, |
|
"loss": 0.3213, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3351063829787234, |
|
"grad_norm": 1.9458303451538086, |
|
"learning_rate": 1.6837336190635824e-05, |
|
"loss": 0.2771, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3377659574468085, |
|
"grad_norm": 2.4472289085388184, |
|
"learning_rate": 1.6769215801622884e-05, |
|
"loss": 0.2924, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 2.520463228225708, |
|
"learning_rate": 1.6700510620401223e-05, |
|
"loss": 0.269, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.34308510638297873, |
|
"grad_norm": 2.2465851306915283, |
|
"learning_rate": 1.6631226582407954e-05, |
|
"loss": 0.3043, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.34574468085106386, |
|
"grad_norm": 2.4705588817596436, |
|
"learning_rate": 1.6561369673087588e-05, |
|
"loss": 0.3375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3484042553191489, |
|
"grad_norm": 2.332902669906616, |
|
"learning_rate": 1.649094592737497e-05, |
|
"loss": 0.2313, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.35106382978723405, |
|
"grad_norm": 2.050671100616455, |
|
"learning_rate": 1.641996142917391e-05, |
|
"loss": 0.3066, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3537234042553192, |
|
"grad_norm": 3.541461706161499, |
|
"learning_rate": 1.63484223108316e-05, |
|
"loss": 0.2937, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.35638297872340424, |
|
"grad_norm": 2.344451665878296, |
|
"learning_rate": 1.6276334752608823e-05, |
|
"loss": 0.2666, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.35904255319148937, |
|
"grad_norm": 2.1711394786834717, |
|
"learning_rate": 1.6203704982146073e-05, |
|
"loss": 0.2457, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3617021276595745, |
|
"grad_norm": 3.414870023727417, |
|
"learning_rate": 1.613053927392553e-05, |
|
"loss": 0.331, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.36436170212765956, |
|
"grad_norm": 3.037440299987793, |
|
"learning_rate": 1.6056843948729e-05, |
|
"loss": 0.3025, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3670212765957447, |
|
"grad_norm": 3.548393726348877, |
|
"learning_rate": 1.5982625373091877e-05, |
|
"loss": 0.3203, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3696808510638298, |
|
"grad_norm": 2.598219633102417, |
|
"learning_rate": 1.5907889958753134e-05, |
|
"loss": 0.3155, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3723404255319149, |
|
"grad_norm": 2.790419101715088, |
|
"learning_rate": 1.5832644162101417e-05, |
|
"loss": 0.326, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3723404255319149, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_f1": 0.5222929936305732, |
|
"eval_loss": 0.4106709063053131, |
|
"eval_precision": 0.7256637168141593, |
|
"eval_recall": 0.4079601990049751, |
|
"eval_runtime": 33.9867, |
|
"eval_samples_per_second": 6.591, |
|
"eval_steps_per_second": 0.206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 3.642287492752075, |
|
"learning_rate": 1.5756894483617268e-05, |
|
"loss": 0.2809, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3776595744680851, |
|
"grad_norm": 2.40323805809021, |
|
"learning_rate": 1.568064746731156e-05, |
|
"loss": 0.2835, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3803191489361702, |
|
"grad_norm": 1.9183332920074463, |
|
"learning_rate": 1.560390970016015e-05, |
|
"loss": 0.2534, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 3.2929575443267822, |
|
"learning_rate": 1.552668781153484e-05, |
|
"loss": 0.373, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.38563829787234044, |
|
"grad_norm": 2.27150559425354, |
|
"learning_rate": 1.5448988472630654e-05, |
|
"loss": 0.2783, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3882978723404255, |
|
"grad_norm": 2.780089855194092, |
|
"learning_rate": 1.5370818395889536e-05, |
|
"loss": 0.322, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.39095744680851063, |
|
"grad_norm": 2.2651729583740234, |
|
"learning_rate": 1.5292184334420434e-05, |
|
"loss": 0.3145, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.39361702127659576, |
|
"grad_norm": 2.8416588306427, |
|
"learning_rate": 1.521309308141592e-05, |
|
"loss": 0.2979, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3962765957446808, |
|
"grad_norm": 2.6914663314819336, |
|
"learning_rate": 1.5133551469565313e-05, |
|
"loss": 0.3314, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.39893617021276595, |
|
"grad_norm": 4.730180740356445, |
|
"learning_rate": 1.5053566370464416e-05, |
|
"loss": 0.2545, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4015957446808511, |
|
"grad_norm": 2.2047128677368164, |
|
"learning_rate": 1.4973144694021874e-05, |
|
"loss": 0.2487, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.40425531914893614, |
|
"grad_norm": 2.841487407684326, |
|
"learning_rate": 1.4892293387862221e-05, |
|
"loss": 0.3067, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.40691489361702127, |
|
"grad_norm": 5.28929328918457, |
|
"learning_rate": 1.4811019436725684e-05, |
|
"loss": 0.242, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4095744680851064, |
|
"grad_norm": 3.347501039505005, |
|
"learning_rate": 1.472932986186477e-05, |
|
"loss": 0.207, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4122340425531915, |
|
"grad_norm": 3.1569905281066895, |
|
"learning_rate": 1.4647231720437687e-05, |
|
"loss": 0.3062, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4148936170212766, |
|
"grad_norm": 2.134598970413208, |
|
"learning_rate": 1.4564732104898702e-05, |
|
"loss": 0.2443, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4175531914893617, |
|
"grad_norm": 2.528136968612671, |
|
"learning_rate": 1.4481838142385403e-05, |
|
"loss": 0.2308, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.42021276595744683, |
|
"grad_norm": 2.756695032119751, |
|
"learning_rate": 1.4398556994102996e-05, |
|
"loss": 0.2461, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4228723404255319, |
|
"grad_norm": 4.9117631912231445, |
|
"learning_rate": 1.4314895854705641e-05, |
|
"loss": 0.2911, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 2.877560615539551, |
|
"learning_rate": 1.4230861951674914e-05, |
|
"loss": 0.2404, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"eval_accuracy": 0.8094117647058824, |
|
"eval_f1": 0.40875912408759124, |
|
"eval_loss": 0.46145251393318176, |
|
"eval_precision": 0.7671232876712328, |
|
"eval_recall": 0.27860696517412936, |
|
"eval_runtime": 34.0326, |
|
"eval_samples_per_second": 6.582, |
|
"eval_steps_per_second": 0.206, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42819148936170215, |
|
"grad_norm": 4.159635066986084, |
|
"learning_rate": 1.4146462544695428e-05, |
|
"loss": 0.2858, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4308510638297872, |
|
"grad_norm": 2.716390609741211, |
|
"learning_rate": 1.4061704925027653e-05, |
|
"loss": 0.2299, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.43351063829787234, |
|
"grad_norm": 2.3737223148345947, |
|
"learning_rate": 1.3976596414878044e-05, |
|
"loss": 0.2371, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.43617021276595747, |
|
"grad_norm": 3.5703928470611572, |
|
"learning_rate": 1.3891144366766457e-05, |
|
"loss": 0.3007, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.43882978723404253, |
|
"grad_norm": 2.449308156967163, |
|
"learning_rate": 1.380535616289099e-05, |
|
"loss": 0.2414, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.44148936170212766, |
|
"grad_norm": 3.272531509399414, |
|
"learning_rate": 1.3719239214490203e-05, |
|
"loss": 0.2961, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4441489361702128, |
|
"grad_norm": 3.6306636333465576, |
|
"learning_rate": 1.363280096120289e-05, |
|
"loss": 0.2923, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"grad_norm": 2.5956878662109375, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"loss": 0.251, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.449468085106383, |
|
"grad_norm": 5.468013286590576, |
|
"learning_rate": 1.3458990436666313e-05, |
|
"loss": 0.287, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4521276595744681, |
|
"grad_norm": 2.5763583183288574, |
|
"learning_rate": 1.3371633180899417e-05, |
|
"loss": 0.2779, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45478723404255317, |
|
"grad_norm": 3.8822455406188965, |
|
"learning_rate": 1.3283984649913552e-05, |
|
"loss": 0.2197, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4574468085106383, |
|
"grad_norm": 2.4867823123931885, |
|
"learning_rate": 1.3196052415660856e-05, |
|
"loss": 0.2875, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4601063829787234, |
|
"grad_norm": 2.161820888519287, |
|
"learning_rate": 1.3107844074602566e-05, |
|
"loss": 0.2416, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4627659574468085, |
|
"grad_norm": 3.0401649475097656, |
|
"learning_rate": 1.3019367247052781e-05, |
|
"loss": 0.2634, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4654255319148936, |
|
"grad_norm": 2.273088216781616, |
|
"learning_rate": 1.2930629576520133e-05, |
|
"loss": 0.2709, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 3.001025438308716, |
|
"learning_rate": 1.2841638729047463e-05, |
|
"loss": 0.2806, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.47074468085106386, |
|
"grad_norm": 2.348917245864868, |
|
"learning_rate": 1.2752402392549556e-05, |
|
"loss": 0.2702, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4734042553191489, |
|
"grad_norm": 2.713019847869873, |
|
"learning_rate": 1.2662928276148985e-05, |
|
"loss": 0.2588, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.47606382978723405, |
|
"grad_norm": 3.061501979827881, |
|
"learning_rate": 1.2573224109510112e-05, |
|
"loss": 0.2701, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4787234042553192, |
|
"grad_norm": 5.120430946350098, |
|
"learning_rate": 1.2483297642171332e-05, |
|
"loss": 0.2962, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4787234042553192, |
|
"eval_accuracy": 0.8282352941176471, |
|
"eval_f1": 0.5228758169934641, |
|
"eval_loss": 0.42048707604408264, |
|
"eval_precision": 0.7619047619047619, |
|
"eval_recall": 0.39800995024875624, |
|
"eval_runtime": 34.4467, |
|
"eval_samples_per_second": 6.503, |
|
"eval_steps_per_second": 0.203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.48138297872340424, |
|
"grad_norm": 2.8563108444213867, |
|
"learning_rate": 1.2393156642875579e-05, |
|
"loss": 0.2855, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.48404255319148937, |
|
"grad_norm": 3.6837549209594727, |
|
"learning_rate": 1.23028088988992e-05, |
|
"loss": 0.2976, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4867021276595745, |
|
"grad_norm": 3.085362434387207, |
|
"learning_rate": 1.2212262215379199e-05, |
|
"loss": 0.2775, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.48936170212765956, |
|
"grad_norm": 3.395561695098877, |
|
"learning_rate": 1.2121524414638958e-05, |
|
"loss": 0.3076, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4920212765957447, |
|
"grad_norm": 3.6867411136627197, |
|
"learning_rate": 1.2030603335512467e-05, |
|
"loss": 0.2402, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4946808510638298, |
|
"grad_norm": 5.76826810836792, |
|
"learning_rate": 1.1939506832667129e-05, |
|
"loss": 0.2715, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4973404255319149, |
|
"grad_norm": 3.938023328781128, |
|
"learning_rate": 1.1848242775925188e-05, |
|
"loss": 0.2773, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.6675262451171875, |
|
"learning_rate": 1.1756819049583861e-05, |
|
"loss": 0.2752, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5026595744680851, |
|
"grad_norm": 2.274174213409424, |
|
"learning_rate": 1.166524355173422e-05, |
|
"loss": 0.2545, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5053191489361702, |
|
"grad_norm": 3.854417562484741, |
|
"learning_rate": 1.1573524193578863e-05, |
|
"loss": 0.2804, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5079787234042553, |
|
"grad_norm": 5.1708550453186035, |
|
"learning_rate": 1.1481668898748474e-05, |
|
"loss": 0.2371, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 4.153345584869385, |
|
"learning_rate": 1.1389685602617302e-05, |
|
"loss": 0.2405, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5132978723404256, |
|
"grad_norm": 3.244084119796753, |
|
"learning_rate": 1.1297582251617618e-05, |
|
"loss": 0.2737, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5159574468085106, |
|
"grad_norm": 2.50569486618042, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"loss": 0.2647, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5186170212765957, |
|
"grad_norm": 2.3251872062683105, |
|
"learning_rate": 1.1113047221912097e-05, |
|
"loss": 0.1958, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5212765957446809, |
|
"grad_norm": 2.288127899169922, |
|
"learning_rate": 1.1020631485178084e-05, |
|
"loss": 0.2109, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.523936170212766, |
|
"grad_norm": 4.095820426940918, |
|
"learning_rate": 1.0928127576141992e-05, |
|
"loss": 0.2998, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.526595744680851, |
|
"grad_norm": 5.008273601531982, |
|
"learning_rate": 1.0835543486211815e-05, |
|
"loss": 0.2841, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5292553191489362, |
|
"grad_norm": 5.711911678314209, |
|
"learning_rate": 1.0742887213722372e-05, |
|
"loss": 0.2488, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 5.29080867767334, |
|
"learning_rate": 1.065016676324433e-05, |
|
"loss": 0.2727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_accuracy": 0.8, |
|
"eval_f1": 0.34615384615384615, |
|
"eval_loss": 0.4829849600791931, |
|
"eval_precision": 0.7627118644067796, |
|
"eval_recall": 0.22388059701492538, |
|
"eval_runtime": 33.8087, |
|
"eval_samples_per_second": 6.626, |
|
"eval_steps_per_second": 0.207, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5345744680851063, |
|
"grad_norm": 6.333003044128418, |
|
"learning_rate": 1.0557390144892684e-05, |
|
"loss": 0.3334, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5372340425531915, |
|
"grad_norm": 2.1432178020477295, |
|
"learning_rate": 1.0464565373634784e-05, |
|
"loss": 0.2513, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5398936170212766, |
|
"grad_norm": 5.119022369384766, |
|
"learning_rate": 1.0371700468597886e-05, |
|
"loss": 0.2566, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5425531914893617, |
|
"grad_norm": 3.5691733360290527, |
|
"learning_rate": 1.0278803452376416e-05, |
|
"loss": 0.3084, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5452127659574468, |
|
"grad_norm": 3.0961036682128906, |
|
"learning_rate": 1.018588235033888e-05, |
|
"loss": 0.2085, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5478723404255319, |
|
"grad_norm": 2.27486515045166, |
|
"learning_rate": 1.0092945189934558e-05, |
|
"loss": 0.2524, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.550531914893617, |
|
"grad_norm": 2.3716437816619873, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2011, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 2.6007697582244873, |
|
"learning_rate": 9.907054810065446e-06, |
|
"loss": 0.2451, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5558510638297872, |
|
"grad_norm": 2.5963995456695557, |
|
"learning_rate": 9.81411764966112e-06, |
|
"loss": 0.2705, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5585106382978723, |
|
"grad_norm": 2.1203646659851074, |
|
"learning_rate": 9.721196547623585e-06, |
|
"loss": 0.2101, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5611702127659575, |
|
"grad_norm": 3.2986724376678467, |
|
"learning_rate": 9.628299531402118e-06, |
|
"loss": 0.2659, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5638297872340425, |
|
"grad_norm": 2.127525568008423, |
|
"learning_rate": 9.535434626365221e-06, |
|
"loss": 0.251, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5664893617021277, |
|
"grad_norm": 3.1327059268951416, |
|
"learning_rate": 9.442609855107317e-06, |
|
"loss": 0.2255, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5691489361702128, |
|
"grad_norm": 2.0999770164489746, |
|
"learning_rate": 9.349833236755675e-06, |
|
"loss": 0.2549, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5718085106382979, |
|
"grad_norm": 2.7766880989074707, |
|
"learning_rate": 9.257112786277631e-06, |
|
"loss": 0.2224, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"grad_norm": 2.451842784881592, |
|
"learning_rate": 9.164456513788186e-06, |
|
"loss": 0.2599, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5771276595744681, |
|
"grad_norm": 2.7746975421905518, |
|
"learning_rate": 9.07187242385801e-06, |
|
"loss": 0.2601, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5797872340425532, |
|
"grad_norm": 2.561441421508789, |
|
"learning_rate": 8.979368514821917e-06, |
|
"loss": 0.284, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5824468085106383, |
|
"grad_norm": 2.425262928009033, |
|
"learning_rate": 8.88695277808791e-06, |
|
"loss": 0.2593, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5851063829787234, |
|
"grad_norm": 3.180457830429077, |
|
"learning_rate": 8.79463319744677e-06, |
|
"loss": 0.2844, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5851063829787234, |
|
"eval_accuracy": 0.8258823529411765, |
|
"eval_f1": 0.5163398692810458, |
|
"eval_loss": 0.41871950030326843, |
|
"eval_precision": 0.7523809523809524, |
|
"eval_recall": 0.39303482587064675, |
|
"eval_runtime": 34.0471, |
|
"eval_samples_per_second": 6.579, |
|
"eval_steps_per_second": 0.206, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5877659574468085, |
|
"grad_norm": 2.8783645629882812, |
|
"learning_rate": 8.702417748382384e-06, |
|
"loss": 0.2458, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5904255319148937, |
|
"grad_norm": 2.950291395187378, |
|
"learning_rate": 8.610314397382701e-06, |
|
"loss": 0.3062, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5930851063829787, |
|
"grad_norm": 2.8430628776550293, |
|
"learning_rate": 8.51833110125153e-06, |
|
"loss": 0.2913, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 6.691501617431641, |
|
"learning_rate": 8.426475806421139e-06, |
|
"loss": 0.3716, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.598404255319149, |
|
"grad_norm": 2.705397367477417, |
|
"learning_rate": 8.334756448265782e-06, |
|
"loss": 0.2692, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.601063829787234, |
|
"grad_norm": 2.276686429977417, |
|
"learning_rate": 8.243180950416142e-06, |
|
"loss": 0.214, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6037234042553191, |
|
"grad_norm": 4.622035980224609, |
|
"learning_rate": 8.151757224074815e-06, |
|
"loss": 0.1863, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6063829787234043, |
|
"grad_norm": 2.3402657508850098, |
|
"learning_rate": 8.060493167332874e-06, |
|
"loss": 0.2895, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6090425531914894, |
|
"grad_norm": 4.533783912658691, |
|
"learning_rate": 7.969396664487534e-06, |
|
"loss": 0.256, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6117021276595744, |
|
"grad_norm": 4.254709243774414, |
|
"learning_rate": 7.878475585361045e-06, |
|
"loss": 0.2798, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6143617021276596, |
|
"grad_norm": 2.4173777103424072, |
|
"learning_rate": 7.787737784620803e-06, |
|
"loss": 0.3046, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6170212765957447, |
|
"grad_norm": 2.9640042781829834, |
|
"learning_rate": 7.697191101100802e-06, |
|
"loss": 0.2893, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6196808510638298, |
|
"grad_norm": 2.9573986530303955, |
|
"learning_rate": 7.606843357124426e-06, |
|
"loss": 0.2764, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6223404255319149, |
|
"grad_norm": 3.9960691928863525, |
|
"learning_rate": 7.516702357828672e-06, |
|
"loss": 0.3243, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.9117209911346436, |
|
"learning_rate": 7.42677589048989e-06, |
|
"loss": 0.2863, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6276595744680851, |
|
"grad_norm": 2.57856822013855, |
|
"learning_rate": 7.337071723851018e-06, |
|
"loss": 0.2433, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6303191489361702, |
|
"grad_norm": 3.1635406017303467, |
|
"learning_rate": 7.247597607450446e-06, |
|
"loss": 0.2622, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6329787234042553, |
|
"grad_norm": 3.4039433002471924, |
|
"learning_rate": 7.1583612709525405e-06, |
|
"loss": 0.2313, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6356382978723404, |
|
"grad_norm": 3.072800397872925, |
|
"learning_rate": 7.06937042347987e-06, |
|
"loss": 0.3117, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 3.175246000289917, |
|
"learning_rate": 6.980632752947221e-06, |
|
"loss": 0.2632, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_f1": 0.5161290322580645, |
|
"eval_loss": 0.4037013053894043, |
|
"eval_precision": 0.7339449541284404, |
|
"eval_recall": 0.39800995024875624, |
|
"eval_runtime": 34.0215, |
|
"eval_samples_per_second": 6.584, |
|
"eval_steps_per_second": 0.206, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6409574468085106, |
|
"grad_norm": 2.5714304447174072, |
|
"learning_rate": 6.892155925397437e-06, |
|
"loss": 0.2749, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6436170212765957, |
|
"grad_norm": 3.128525733947754, |
|
"learning_rate": 6.803947584339148e-06, |
|
"loss": 0.3527, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6462765957446809, |
|
"grad_norm": 3.6604840755462646, |
|
"learning_rate": 6.716015350086449e-06, |
|
"loss": 0.2686, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.648936170212766, |
|
"grad_norm": 2.6133296489715576, |
|
"learning_rate": 6.628366819100586e-06, |
|
"loss": 0.2836, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.651595744680851, |
|
"grad_norm": 2.5161774158477783, |
|
"learning_rate": 6.54100956333369e-06, |
|
"loss": 0.2395, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6542553191489362, |
|
"grad_norm": 2.824259042739868, |
|
"learning_rate": 6.453951129574644e-06, |
|
"loss": 0.2906, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6569148936170213, |
|
"grad_norm": 2.747422456741333, |
|
"learning_rate": 6.3671990387971096e-06, |
|
"loss": 0.2368, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6595744680851063, |
|
"grad_norm": 2.540599822998047, |
|
"learning_rate": 6.280760785509802e-06, |
|
"loss": 0.3036, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6622340425531915, |
|
"grad_norm": 2.4649527072906494, |
|
"learning_rate": 6.194643837109015e-06, |
|
"loss": 0.2935, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6648936170212766, |
|
"grad_norm": 2.2564632892608643, |
|
"learning_rate": 6.108855633233546e-06, |
|
"loss": 0.2276, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6675531914893617, |
|
"grad_norm": 2.5052363872528076, |
|
"learning_rate": 6.0234035851219604e-06, |
|
"loss": 0.2464, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6702127659574468, |
|
"grad_norm": 3.091642141342163, |
|
"learning_rate": 5.93829507497235e-06, |
|
"loss": 0.2766, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6728723404255319, |
|
"grad_norm": 3.3672595024108887, |
|
"learning_rate": 5.853537455304575e-06, |
|
"loss": 0.2567, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.675531914893617, |
|
"grad_norm": 2.4779727458953857, |
|
"learning_rate": 5.769138048325087e-06, |
|
"loss": 0.2628, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6781914893617021, |
|
"grad_norm": 2.5639469623565674, |
|
"learning_rate": 5.685104145294364e-06, |
|
"loss": 0.2204, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 3.3351776599884033, |
|
"learning_rate": 5.601443005897012e-06, |
|
"loss": 0.2535, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6835106382978723, |
|
"grad_norm": 2.3642754554748535, |
|
"learning_rate": 5.5181618576146e-06, |
|
"loss": 0.2234, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6861702127659575, |
|
"grad_norm": 2.9997129440307617, |
|
"learning_rate": 5.435267895101303e-06, |
|
"loss": 0.2643, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6888297872340425, |
|
"grad_norm": 2.4532787799835205, |
|
"learning_rate": 5.352768279562315e-06, |
|
"loss": 0.2621, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6914893617021277, |
|
"grad_norm": 2.572538137435913, |
|
"learning_rate": 5.270670138135234e-06, |
|
"loss": 0.2499, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6914893617021277, |
|
"eval_accuracy": 0.8247058823529412, |
|
"eval_f1": 0.5299684542586751, |
|
"eval_loss": 0.3885125517845154, |
|
"eval_precision": 0.7241379310344828, |
|
"eval_recall": 0.417910447761194, |
|
"eval_runtime": 33.8843, |
|
"eval_samples_per_second": 6.611, |
|
"eval_steps_per_second": 0.207, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6941489361702128, |
|
"grad_norm": 2.906144618988037, |
|
"learning_rate": 5.188980563274315e-06, |
|
"loss": 0.3095, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6968085106382979, |
|
"grad_norm": 2.319133996963501, |
|
"learning_rate": 5.107706612137776e-06, |
|
"loss": 0.2388, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.699468085106383, |
|
"grad_norm": 3.162642478942871, |
|
"learning_rate": 5.026855305978129e-06, |
|
"loss": 0.2462, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"grad_norm": 2.749540090560913, |
|
"learning_rate": 4.946433629535585e-06, |
|
"loss": 0.2659, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7047872340425532, |
|
"grad_norm": 2.891836643218994, |
|
"learning_rate": 4.866448530434692e-06, |
|
"loss": 0.2332, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7074468085106383, |
|
"grad_norm": 2.4717514514923096, |
|
"learning_rate": 4.786906918584083e-06, |
|
"loss": 0.2136, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7101063829787234, |
|
"grad_norm": 2.679591655731201, |
|
"learning_rate": 4.707815665579569e-06, |
|
"loss": 0.3036, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7127659574468085, |
|
"grad_norm": 2.3344614505767822, |
|
"learning_rate": 4.629181604110464e-06, |
|
"loss": 0.2853, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7154255319148937, |
|
"grad_norm": 2.839320182800293, |
|
"learning_rate": 4.551011527369348e-06, |
|
"loss": 0.2394, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7180851063829787, |
|
"grad_norm": 2.27245831489563, |
|
"learning_rate": 4.4733121884651665e-06, |
|
"loss": 0.2496, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7207446808510638, |
|
"grad_norm": 3.038536548614502, |
|
"learning_rate": 4.3960902998398524e-06, |
|
"loss": 0.2787, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 3.1204025745391846, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 0.2678, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.726063829787234, |
|
"grad_norm": 3.8436288833618164, |
|
"learning_rate": 4.243105516382732e-06, |
|
"loss": 0.2405, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7287234042553191, |
|
"grad_norm": 3.1559836864471436, |
|
"learning_rate": 4.167355837898585e-06, |
|
"loss": 0.2881, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7313829787234043, |
|
"grad_norm": 2.5084681510925293, |
|
"learning_rate": 4.092110041246865e-06, |
|
"loss": 0.2365, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7340425531914894, |
|
"grad_norm": 3.0584487915039062, |
|
"learning_rate": 4.017374626908125e-06, |
|
"loss": 0.2808, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7367021276595744, |
|
"grad_norm": 3.6234519481658936, |
|
"learning_rate": 3.943156051271003e-06, |
|
"loss": 0.2993, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7393617021276596, |
|
"grad_norm": 1.8584307432174683, |
|
"learning_rate": 3.8694607260744745e-06, |
|
"loss": 0.2012, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7420212765957447, |
|
"grad_norm": 2.4248085021972656, |
|
"learning_rate": 3.7962950178539282e-06, |
|
"loss": 0.2352, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"grad_norm": 2.5359675884246826, |
|
"learning_rate": 3.7236652473911817e-06, |
|
"loss": 0.2121, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"eval_accuracy": 0.8223529411764706, |
|
"eval_f1": 0.5175718849840255, |
|
"eval_loss": 0.3953240215778351, |
|
"eval_precision": 0.7232142857142857, |
|
"eval_recall": 0.40298507462686567, |
|
"eval_runtime": 34.1139, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 0.205, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7473404255319149, |
|
"grad_norm": 2.3844354152679443, |
|
"learning_rate": 3.651577689168405e-06, |
|
"loss": 0.2212, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.8635263442993164, |
|
"learning_rate": 3.580038570826093e-06, |
|
"loss": 0.2259, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7526595744680851, |
|
"grad_norm": 3.1672933101654053, |
|
"learning_rate": 3.509054072625031e-06, |
|
"loss": 0.2691, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7553191489361702, |
|
"grad_norm": 3.298377752304077, |
|
"learning_rate": 3.4386303269124142e-06, |
|
"loss": 0.261, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7579787234042553, |
|
"grad_norm": 3.3718481063842773, |
|
"learning_rate": 3.3687734175920505e-06, |
|
"loss": 0.2842, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7606382978723404, |
|
"grad_norm": 2.822702646255493, |
|
"learning_rate": 3.299489379598777e-06, |
|
"loss": 0.2416, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7632978723404256, |
|
"grad_norm": 3.209895372390747, |
|
"learning_rate": 3.2307841983771182e-06, |
|
"loss": 0.2706, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 2.953824996948242, |
|
"learning_rate": 3.162663809364178e-06, |
|
"loss": 0.2629, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7686170212765957, |
|
"grad_norm": 4.190698623657227, |
|
"learning_rate": 3.095134097476904e-06, |
|
"loss": 0.2609, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7712765957446809, |
|
"grad_norm": 4.36337423324585, |
|
"learning_rate": 3.0282008966036647e-06, |
|
"loss": 0.2549, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.773936170212766, |
|
"grad_norm": 2.8681600093841553, |
|
"learning_rate": 2.9618699891002843e-06, |
|
"loss": 0.2464, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.776595744680851, |
|
"grad_norm": 3.781843900680542, |
|
"learning_rate": 2.8961471052904855e-06, |
|
"loss": 0.3261, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7792553191489362, |
|
"grad_norm": 3.1815481185913086, |
|
"learning_rate": 2.831037922970855e-06, |
|
"loss": 0.2659, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7819148936170213, |
|
"grad_norm": 3.2825517654418945, |
|
"learning_rate": 2.7665480669203383e-06, |
|
"loss": 0.2239, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7845744680851063, |
|
"grad_norm": 2.418006420135498, |
|
"learning_rate": 2.702683108414326e-06, |
|
"loss": 0.2476, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7872340425531915, |
|
"grad_norm": 3.483743906021118, |
|
"learning_rate": 2.639448564743328e-06, |
|
"loss": 0.2306, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7898936170212766, |
|
"grad_norm": 3.201629638671875, |
|
"learning_rate": 2.57684989873636e-06, |
|
"loss": 0.2562, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7925531914893617, |
|
"grad_norm": 2.7855303287506104, |
|
"learning_rate": 2.514892518288988e-06, |
|
"loss": 0.2245, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7952127659574468, |
|
"grad_norm": 3.742940664291382, |
|
"learning_rate": 2.4535817758961644e-06, |
|
"loss": 0.3192, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"grad_norm": 2.966266393661499, |
|
"learning_rate": 2.3929229681898005e-06, |
|
"loss": 0.2704, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_accuracy": 0.8329411764705882, |
|
"eval_f1": 0.5644171779141104, |
|
"eval_loss": 0.38487711548805237, |
|
"eval_precision": 0.736, |
|
"eval_recall": 0.4577114427860697, |
|
"eval_runtime": 33.5166, |
|
"eval_samples_per_second": 6.683, |
|
"eval_steps_per_second": 0.209, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.800531914893617, |
|
"grad_norm": 3.4099960327148438, |
|
"learning_rate": 2.332921335481205e-06, |
|
"loss": 0.2715, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8031914893617021, |
|
"grad_norm": 4.202554702758789, |
|
"learning_rate": 2.2735820613083837e-06, |
|
"loss": 0.2616, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8058510638297872, |
|
"grad_norm": 2.95456862449646, |
|
"learning_rate": 2.2149102719882044e-06, |
|
"loss": 0.2455, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 2.7879536151885986, |
|
"learning_rate": 2.156911036173568e-06, |
|
"loss": 0.2054, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8111702127659575, |
|
"grad_norm": 2.4969985485076904, |
|
"learning_rate": 2.0995893644155007e-06, |
|
"loss": 0.2814, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8138297872340425, |
|
"grad_norm": 3.3959643840789795, |
|
"learning_rate": 2.0429502087303164e-06, |
|
"loss": 0.2382, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8164893617021277, |
|
"grad_norm": 2.825615882873535, |
|
"learning_rate": 1.9869984621717888e-06, |
|
"loss": 0.2808, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8191489361702128, |
|
"grad_norm": 2.766301155090332, |
|
"learning_rate": 1.931738958408457e-06, |
|
"loss": 0.2371, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8218085106382979, |
|
"grad_norm": 3.683234930038452, |
|
"learning_rate": 1.8771764713060359e-06, |
|
"loss": 0.2617, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.824468085106383, |
|
"grad_norm": 3.0581727027893066, |
|
"learning_rate": 1.8233157145150183e-06, |
|
"loss": 0.254, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8271276595744681, |
|
"grad_norm": 3.316701889038086, |
|
"learning_rate": 1.7701613410634367e-06, |
|
"loss": 0.2596, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"grad_norm": 2.8315346240997314, |
|
"learning_rate": 1.717717942954914e-06, |
|
"loss": 0.222, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8324468085106383, |
|
"grad_norm": 2.781020164489746, |
|
"learning_rate": 1.6659900507719406e-06, |
|
"loss": 0.2643, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8351063829787234, |
|
"grad_norm": 2.389970302581787, |
|
"learning_rate": 1.614982133284495e-06, |
|
"loss": 0.2161, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8377659574468085, |
|
"grad_norm": 3.4777987003326416, |
|
"learning_rate": 1.5646985970639717e-06, |
|
"loss": 0.3309, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8404255319148937, |
|
"grad_norm": 4.487973690032959, |
|
"learning_rate": 1.5151437861025032e-06, |
|
"loss": 0.3284, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8430851063829787, |
|
"grad_norm": 4.822957515716553, |
|
"learning_rate": 1.466321981437694e-06, |
|
"loss": 0.2033, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8457446808510638, |
|
"grad_norm": 2.9255247116088867, |
|
"learning_rate": 1.4182374007827605e-06, |
|
"loss": 0.2528, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.848404255319149, |
|
"grad_norm": 2.9784889221191406, |
|
"learning_rate": 1.3708941981621814e-06, |
|
"loss": 0.2151, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 2.522810459136963, |
|
"learning_rate": 1.324296463552821e-06, |
|
"loss": 0.2333, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"eval_accuracy": 0.831764705882353, |
|
"eval_f1": 0.5545171339563862, |
|
"eval_loss": 0.38777896761894226, |
|
"eval_precision": 0.7416666666666667, |
|
"eval_recall": 0.4427860696517413, |
|
"eval_runtime": 34.5031, |
|
"eval_samples_per_second": 6.492, |
|
"eval_steps_per_second": 0.203, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8537234042553191, |
|
"grad_norm": 2.794802665710449, |
|
"learning_rate": 1.2784482225306061e-06, |
|
"loss": 0.2338, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8563829787234043, |
|
"grad_norm": 2.8740601539611816, |
|
"learning_rate": 1.2333534359227383e-06, |
|
"loss": 0.2526, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8590425531914894, |
|
"grad_norm": 2.600721597671509, |
|
"learning_rate": 1.1890159994655425e-06, |
|
"loss": 0.2165, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8617021276595744, |
|
"grad_norm": 2.781907796859741, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 0.2414, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8643617021276596, |
|
"grad_norm": 2.8299474716186523, |
|
"learning_rate": 1.1026284324803493e-06, |
|
"loss": 0.2389, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8670212765957447, |
|
"grad_norm": 2.6625523567199707, |
|
"learning_rate": 1.060585764969867e-06, |
|
"loss": 0.2444, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8696808510638298, |
|
"grad_norm": 3.0182435512542725, |
|
"learning_rate": 1.0193153730003603e-06, |
|
"loss": 0.2967, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8723404255319149, |
|
"grad_norm": 2.5358083248138428, |
|
"learning_rate": 9.788208219188932e-07, |
|
"loss": 0.2091, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 3.2480201721191406, |
|
"learning_rate": 9.391056100476736e-07, |
|
"loss": 0.2195, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8776595744680851, |
|
"grad_norm": 2.449801445007324, |
|
"learning_rate": 9.001731683818338e-07, |
|
"loss": 0.2316, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8803191489361702, |
|
"grad_norm": 3.304652690887451, |
|
"learning_rate": 8.620268602930271e-07, |
|
"loss": 0.2719, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8829787234042553, |
|
"grad_norm": 3.1013834476470947, |
|
"learning_rate": 8.246699812388714e-07, |
|
"loss": 0.2412, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8856382978723404, |
|
"grad_norm": 2.4398679733276367, |
|
"learning_rate": 7.881057584782448e-07, |
|
"loss": 0.1909, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8882978723404256, |
|
"grad_norm": 3.296792984008789, |
|
"learning_rate": 7.523373507924947e-07, |
|
"loss": 0.2592, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8909574468085106, |
|
"grad_norm": 3.5089118480682373, |
|
"learning_rate": 7.17367848212539e-07, |
|
"loss": 0.2341, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 2.9826953411102295, |
|
"learning_rate": 6.83200271751927e-07, |
|
"loss": 0.239, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8962765957446809, |
|
"grad_norm": 2.965322732925415, |
|
"learning_rate": 6.498375731458529e-07, |
|
"loss": 0.242, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.898936170212766, |
|
"grad_norm": 2.855252504348755, |
|
"learning_rate": 6.17282634596148e-07, |
|
"loss": 0.2503, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.901595744680851, |
|
"grad_norm": 5.112611293792725, |
|
"learning_rate": 5.85538268522301e-07, |
|
"loss": 0.2665, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9042553191489362, |
|
"grad_norm": 3.4850215911865234, |
|
"learning_rate": 5.546072173184791e-07, |
|
"loss": 0.2896, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9042553191489362, |
|
"eval_accuracy": 0.8305882352941176, |
|
"eval_f1": 0.55, |
|
"eval_loss": 0.38858291506767273, |
|
"eval_precision": 0.7394957983193278, |
|
"eval_recall": 0.43781094527363185, |
|
"eval_runtime": 34.3336, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 0.204, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9069148936170213, |
|
"grad_norm": 2.3722422122955322, |
|
"learning_rate": 5.244921531166247e-07, |
|
"loss": 0.2334, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9095744680851063, |
|
"grad_norm": 2.8881895542144775, |
|
"learning_rate": 4.951956775556e-07, |
|
"loss": 0.2339, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9122340425531915, |
|
"grad_norm": 4.109971046447754, |
|
"learning_rate": 4.667203215564431e-07, |
|
"loss": 0.2837, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9148936170212766, |
|
"grad_norm": 3.7027337551116943, |
|
"learning_rate": 4.3906854510370245e-07, |
|
"loss": 0.2862, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9175531914893617, |
|
"grad_norm": 3.069493532180786, |
|
"learning_rate": 4.1224273703294515e-07, |
|
"loss": 0.2456, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9202127659574468, |
|
"grad_norm": 2.9162609577178955, |
|
"learning_rate": 3.862452148243623e-07, |
|
"loss": 0.2633, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9228723404255319, |
|
"grad_norm": 3.10223388671875, |
|
"learning_rate": 3.610782244025768e-07, |
|
"loss": 0.2165, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.925531914893617, |
|
"grad_norm": 3.3466663360595703, |
|
"learning_rate": 3.367439399426087e-07, |
|
"loss": 0.2748, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9281914893617021, |
|
"grad_norm": 3.4505677223205566, |
|
"learning_rate": 3.132444636820575e-07, |
|
"loss": 0.2789, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9308510638297872, |
|
"grad_norm": 3.7714152336120605, |
|
"learning_rate": 2.905818257394799e-07, |
|
"loss": 0.233, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9335106382978723, |
|
"grad_norm": 5.176234722137451, |
|
"learning_rate": 2.687579839390153e-07, |
|
"loss": 0.2933, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 2.8145923614501953, |
|
"learning_rate": 2.4777482364124695e-07, |
|
"loss": 0.2916, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9388297872340425, |
|
"grad_norm": 2.452026605606079, |
|
"learning_rate": 2.2763415758032316e-07, |
|
"loss": 0.2072, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9414893617021277, |
|
"grad_norm": 2.741774559020996, |
|
"learning_rate": 2.0833772570736376e-07, |
|
"loss": 0.2365, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9441489361702128, |
|
"grad_norm": 2.6265206336975098, |
|
"learning_rate": 1.8988719504013375e-07, |
|
"loss": 0.2226, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9468085106382979, |
|
"grad_norm": 4.149282932281494, |
|
"learning_rate": 1.7228415951904165e-07, |
|
"loss": 0.1923, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.949468085106383, |
|
"grad_norm": 2.389505624771118, |
|
"learning_rate": 1.5553013986942645e-07, |
|
"loss": 0.21, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9521276595744681, |
|
"grad_norm": 4.067861557006836, |
|
"learning_rate": 1.3962658347019819e-07, |
|
"loss": 0.2497, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9547872340425532, |
|
"grad_norm": 2.5128250122070312, |
|
"learning_rate": 1.245748642287814e-07, |
|
"loss": 0.2559, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 2.755162477493286, |
|
"learning_rate": 1.103762824624377e-07, |
|
"loss": 0.2398, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"eval_accuracy": 0.8329411764705882, |
|
"eval_f1": 0.5617283950617284, |
|
"eval_loss": 0.38481393456459045, |
|
"eval_precision": 0.7398373983739838, |
|
"eval_recall": 0.4527363184079602, |
|
"eval_runtime": 34.7008, |
|
"eval_samples_per_second": 6.455, |
|
"eval_steps_per_second": 0.202, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9601063829787234, |
|
"grad_norm": 3.078138828277588, |
|
"learning_rate": 9.70320647859213e-08, |
|
"loss": 0.2091, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9627659574468085, |
|
"grad_norm": 2.8632972240448, |
|
"learning_rate": 8.454336400552154e-08, |
|
"loss": 0.2513, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9654255319148937, |
|
"grad_norm": 2.500767469406128, |
|
"learning_rate": 7.291125901946027e-08, |
|
"loss": 0.2346, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9680851063829787, |
|
"grad_norm": 4.420257091522217, |
|
"learning_rate": 6.21367547246976e-08, |
|
"loss": 0.2701, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9707446808510638, |
|
"grad_norm": 2.459460973739624, |
|
"learning_rate": 5.2220781930111263e-08, |
|
"loss": 0.2441, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.973404255319149, |
|
"grad_norm": 3.661996841430664, |
|
"learning_rate": 4.316419727608434e-08, |
|
"loss": 0.2704, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.976063829787234, |
|
"grad_norm": 3.0439155101776123, |
|
"learning_rate": 3.4967783160507753e-08, |
|
"loss": 0.2187, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 3.629185914993286, |
|
"learning_rate": 2.763224767117767e-08, |
|
"loss": 0.3418, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9813829787234043, |
|
"grad_norm": 2.30877423286438, |
|
"learning_rate": 2.115822452463223e-08, |
|
"loss": 0.2607, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9840425531914894, |
|
"grad_norm": 3.398482084274292, |
|
"learning_rate": 1.554627301140199e-08, |
|
"loss": 0.2494, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9867021276595744, |
|
"grad_norm": 3.0833022594451904, |
|
"learning_rate": 1.0796877947691909e-08, |
|
"loss": 0.2924, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.9893617021276596, |
|
"grad_norm": 2.702519655227661, |
|
"learning_rate": 6.910449633501515e-09, |
|
"loss": 0.2222, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9920212765957447, |
|
"grad_norm": 3.0397112369537354, |
|
"learning_rate": 3.887323817173272e-09, |
|
"loss": 0.2145, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9946808510638298, |
|
"grad_norm": 2.342505931854248, |
|
"learning_rate": 1.7277616663946562e-09, |
|
"loss": 0.2471, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9973404255319149, |
|
"grad_norm": 2.674713611602783, |
|
"learning_rate": 4.319497456273247e-10, |
|
"loss": 0.2519, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.508094310760498, |
|
"learning_rate": 0.0, |
|
"loss": 0.3025, |
|
"step": 376 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 376, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2170791543740826e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|