v3d_mistral_lora / last-checkpoint /trainer_state.json
mtzig's picture
Training in progress, step 376, checkpoint
991c0fd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 20,
"global_step": 376,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_accuracy": 0.7505882352941177,
"eval_f1": 0.12396694214876033,
"eval_loss": 0.6111783981323242,
"eval_precision": 0.36585365853658536,
"eval_recall": 0.07462686567164178,
"eval_runtime": 34.3787,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 0.204,
"step": 0
},
{
"epoch": 0.0026595744680851063,
"grad_norm": 3.310136556625366,
"learning_rate": 5.263157894736843e-07,
"loss": 0.6542,
"step": 1
},
{
"epoch": 0.005319148936170213,
"grad_norm": 2.5591301918029785,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.6609,
"step": 2
},
{
"epoch": 0.007978723404255319,
"grad_norm": 2.7341604232788086,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.6752,
"step": 3
},
{
"epoch": 0.010638297872340425,
"grad_norm": 2.8091554641723633,
"learning_rate": 2.105263157894737e-06,
"loss": 0.618,
"step": 4
},
{
"epoch": 0.013297872340425532,
"grad_norm": 2.5653722286224365,
"learning_rate": 2.631578947368421e-06,
"loss": 0.6752,
"step": 5
},
{
"epoch": 0.015957446808510637,
"grad_norm": 3.4398417472839355,
"learning_rate": 3.157894736842105e-06,
"loss": 0.6783,
"step": 6
},
{
"epoch": 0.018617021276595744,
"grad_norm": 2.5178332328796387,
"learning_rate": 3.6842105263157896e-06,
"loss": 0.631,
"step": 7
},
{
"epoch": 0.02127659574468085,
"grad_norm": 2.8207452297210693,
"learning_rate": 4.210526315789474e-06,
"loss": 0.6361,
"step": 8
},
{
"epoch": 0.023936170212765957,
"grad_norm": 2.5525949001312256,
"learning_rate": 4.736842105263158e-06,
"loss": 0.6088,
"step": 9
},
{
"epoch": 0.026595744680851064,
"grad_norm": 2.415248155593872,
"learning_rate": 5.263157894736842e-06,
"loss": 0.6556,
"step": 10
},
{
"epoch": 0.02925531914893617,
"grad_norm": 3.0247888565063477,
"learning_rate": 5.789473684210527e-06,
"loss": 0.6039,
"step": 11
},
{
"epoch": 0.031914893617021274,
"grad_norm": 2.5060417652130127,
"learning_rate": 6.31578947368421e-06,
"loss": 0.6012,
"step": 12
},
{
"epoch": 0.034574468085106384,
"grad_norm": 2.770636558532715,
"learning_rate": 6.842105263157896e-06,
"loss": 0.6775,
"step": 13
},
{
"epoch": 0.03723404255319149,
"grad_norm": 2.582097291946411,
"learning_rate": 7.368421052631579e-06,
"loss": 0.5993,
"step": 14
},
{
"epoch": 0.0398936170212766,
"grad_norm": 2.5408666133880615,
"learning_rate": 7.894736842105265e-06,
"loss": 0.596,
"step": 15
},
{
"epoch": 0.0425531914893617,
"grad_norm": 2.478731870651245,
"learning_rate": 8.421052631578948e-06,
"loss": 0.5919,
"step": 16
},
{
"epoch": 0.04521276595744681,
"grad_norm": 2.5440561771392822,
"learning_rate": 8.947368421052632e-06,
"loss": 0.5223,
"step": 17
},
{
"epoch": 0.047872340425531915,
"grad_norm": 2.1620945930480957,
"learning_rate": 9.473684210526315e-06,
"loss": 0.5412,
"step": 18
},
{
"epoch": 0.05053191489361702,
"grad_norm": 2.2398860454559326,
"learning_rate": 1e-05,
"loss": 0.5847,
"step": 19
},
{
"epoch": 0.05319148936170213,
"grad_norm": 2.1689343452453613,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.5265,
"step": 20
},
{
"epoch": 0.05319148936170213,
"eval_accuracy": 0.7647058823529411,
"eval_f1": 0.05660377358490566,
"eval_loss": 0.6081312894821167,
"eval_precision": 0.5454545454545454,
"eval_recall": 0.029850746268656716,
"eval_runtime": 34.9953,
"eval_samples_per_second": 6.401,
"eval_steps_per_second": 0.2,
"step": 20
},
{
"epoch": 0.05585106382978723,
"grad_norm": 1.8998128175735474,
"learning_rate": 1.105263157894737e-05,
"loss": 0.5347,
"step": 21
},
{
"epoch": 0.05851063829787234,
"grad_norm": 2.079780340194702,
"learning_rate": 1.1578947368421053e-05,
"loss": 0.4814,
"step": 22
},
{
"epoch": 0.061170212765957445,
"grad_norm": 1.8792980909347534,
"learning_rate": 1.2105263157894737e-05,
"loss": 0.5084,
"step": 23
},
{
"epoch": 0.06382978723404255,
"grad_norm": 1.9132519960403442,
"learning_rate": 1.263157894736842e-05,
"loss": 0.5027,
"step": 24
},
{
"epoch": 0.06648936170212766,
"grad_norm": 1.3962018489837646,
"learning_rate": 1.3157894736842108e-05,
"loss": 0.5136,
"step": 25
},
{
"epoch": 0.06914893617021277,
"grad_norm": 1.4877433776855469,
"learning_rate": 1.3684210526315791e-05,
"loss": 0.4567,
"step": 26
},
{
"epoch": 0.07180851063829788,
"grad_norm": 1.5485683679580688,
"learning_rate": 1.4210526315789475e-05,
"loss": 0.4365,
"step": 27
},
{
"epoch": 0.07446808510638298,
"grad_norm": 1.164844036102295,
"learning_rate": 1.4736842105263159e-05,
"loss": 0.4142,
"step": 28
},
{
"epoch": 0.07712765957446809,
"grad_norm": 1.354490876197815,
"learning_rate": 1.5263157894736846e-05,
"loss": 0.4492,
"step": 29
},
{
"epoch": 0.0797872340425532,
"grad_norm": 1.067051649093628,
"learning_rate": 1.578947368421053e-05,
"loss": 0.4294,
"step": 30
},
{
"epoch": 0.08244680851063829,
"grad_norm": 1.3097209930419922,
"learning_rate": 1.6315789473684213e-05,
"loss": 0.452,
"step": 31
},
{
"epoch": 0.0851063829787234,
"grad_norm": 0.9226462244987488,
"learning_rate": 1.6842105263157896e-05,
"loss": 0.3848,
"step": 32
},
{
"epoch": 0.08776595744680851,
"grad_norm": 1.1755656003952026,
"learning_rate": 1.736842105263158e-05,
"loss": 0.4307,
"step": 33
},
{
"epoch": 0.09042553191489362,
"grad_norm": 1.2210921049118042,
"learning_rate": 1.7894736842105264e-05,
"loss": 0.4232,
"step": 34
},
{
"epoch": 0.09308510638297872,
"grad_norm": 0.9078745245933533,
"learning_rate": 1.8421052631578947e-05,
"loss": 0.3752,
"step": 35
},
{
"epoch": 0.09574468085106383,
"grad_norm": 0.936310350894928,
"learning_rate": 1.894736842105263e-05,
"loss": 0.3655,
"step": 36
},
{
"epoch": 0.09840425531914894,
"grad_norm": 1.5738509893417358,
"learning_rate": 1.9473684210526318e-05,
"loss": 0.4547,
"step": 37
},
{
"epoch": 0.10106382978723404,
"grad_norm": 1.1838228702545166,
"learning_rate": 2e-05,
"loss": 0.4347,
"step": 38
},
{
"epoch": 0.10372340425531915,
"grad_norm": 1.4948188066482544,
"learning_rate": 1.9999568050254373e-05,
"loss": 0.4135,
"step": 39
},
{
"epoch": 0.10638297872340426,
"grad_norm": 1.1098586320877075,
"learning_rate": 1.9998272238333606e-05,
"loss": 0.4127,
"step": 40
},
{
"epoch": 0.10638297872340426,
"eval_accuracy": 0.768235294117647,
"eval_f1": 0.06635071090047394,
"eval_loss": 0.5411638021469116,
"eval_precision": 0.7,
"eval_recall": 0.03482587064676617,
"eval_runtime": 34.6313,
"eval_samples_per_second": 6.468,
"eval_steps_per_second": 0.202,
"step": 40
},
{
"epoch": 0.10904255319148937,
"grad_norm": 0.779417872428894,
"learning_rate": 1.999611267618283e-05,
"loss": 0.3893,
"step": 41
},
{
"epoch": 0.11170212765957446,
"grad_norm": 1.021106481552124,
"learning_rate": 1.99930895503665e-05,
"loss": 0.3104,
"step": 42
},
{
"epoch": 0.11436170212765957,
"grad_norm": 1.174867868423462,
"learning_rate": 1.998920312205231e-05,
"loss": 0.4124,
"step": 43
},
{
"epoch": 0.11702127659574468,
"grad_norm": 0.8697633743286133,
"learning_rate": 1.99844537269886e-05,
"loss": 0.3785,
"step": 44
},
{
"epoch": 0.1196808510638298,
"grad_norm": 0.9835452437400818,
"learning_rate": 1.9978841775475368e-05,
"loss": 0.4014,
"step": 45
},
{
"epoch": 0.12234042553191489,
"grad_norm": 1.1810511350631714,
"learning_rate": 1.9972367752328824e-05,
"loss": 0.3518,
"step": 46
},
{
"epoch": 0.125,
"grad_norm": 0.9265549778938293,
"learning_rate": 1.9965032216839493e-05,
"loss": 0.4024,
"step": 47
},
{
"epoch": 0.1276595744680851,
"grad_norm": 1.186259150505066,
"learning_rate": 1.9956835802723916e-05,
"loss": 0.3599,
"step": 48
},
{
"epoch": 0.13031914893617022,
"grad_norm": 1.2196171283721924,
"learning_rate": 1.994777921806989e-05,
"loss": 0.3411,
"step": 49
},
{
"epoch": 0.13297872340425532,
"grad_norm": 1.1862437725067139,
"learning_rate": 1.9937863245275303e-05,
"loss": 0.362,
"step": 50
},
{
"epoch": 0.1356382978723404,
"grad_norm": 1.319501280784607,
"learning_rate": 1.992708874098054e-05,
"loss": 0.4189,
"step": 51
},
{
"epoch": 0.13829787234042554,
"grad_norm": 0.9766789674758911,
"learning_rate": 1.991545663599448e-05,
"loss": 0.358,
"step": 52
},
{
"epoch": 0.14095744680851063,
"grad_norm": 1.0482966899871826,
"learning_rate": 1.990296793521408e-05,
"loss": 0.3736,
"step": 53
},
{
"epoch": 0.14361702127659576,
"grad_norm": 0.8634902834892273,
"learning_rate": 1.9889623717537564e-05,
"loss": 0.3582,
"step": 54
},
{
"epoch": 0.14627659574468085,
"grad_norm": 0.9416165947914124,
"learning_rate": 1.987542513577122e-05,
"loss": 0.3495,
"step": 55
},
{
"epoch": 0.14893617021276595,
"grad_norm": 0.9823614358901978,
"learning_rate": 1.9860373416529804e-05,
"loss": 0.3446,
"step": 56
},
{
"epoch": 0.15159574468085107,
"grad_norm": 0.8403105139732361,
"learning_rate": 1.984446986013057e-05,
"loss": 0.3177,
"step": 57
},
{
"epoch": 0.15425531914893617,
"grad_norm": 1.0707823038101196,
"learning_rate": 1.9827715840480962e-05,
"loss": 0.323,
"step": 58
},
{
"epoch": 0.15691489361702127,
"grad_norm": 0.933045506477356,
"learning_rate": 1.9810112804959867e-05,
"loss": 0.3123,
"step": 59
},
{
"epoch": 0.1595744680851064,
"grad_norm": 0.9361464977264404,
"learning_rate": 1.9791662274292638e-05,
"loss": 0.3347,
"step": 60
},
{
"epoch": 0.1595744680851064,
"eval_accuracy": 0.7741176470588236,
"eval_f1": 0.12727272727272726,
"eval_loss": 0.5019634962081909,
"eval_precision": 0.7368421052631579,
"eval_recall": 0.06965174129353234,
"eval_runtime": 34.7325,
"eval_samples_per_second": 6.449,
"eval_steps_per_second": 0.202,
"step": 60
},
{
"epoch": 0.1622340425531915,
"grad_norm": 0.8992587327957153,
"learning_rate": 1.977236584241968e-05,
"loss": 0.3457,
"step": 61
},
{
"epoch": 0.16489361702127658,
"grad_norm": 1.282809853553772,
"learning_rate": 1.9752225176358757e-05,
"loss": 0.3226,
"step": 62
},
{
"epoch": 0.1675531914893617,
"grad_norm": 2.4324252605438232,
"learning_rate": 1.9731242016060985e-05,
"loss": 0.4227,
"step": 63
},
{
"epoch": 0.1702127659574468,
"grad_norm": 1.0456701517105103,
"learning_rate": 1.9709418174260523e-05,
"loss": 0.3102,
"step": 64
},
{
"epoch": 0.17287234042553193,
"grad_norm": 1.2882471084594727,
"learning_rate": 1.9686755536317945e-05,
"loss": 0.3145,
"step": 65
},
{
"epoch": 0.17553191489361702,
"grad_norm": 1.1312603950500488,
"learning_rate": 1.9663256060057395e-05,
"loss": 0.3353,
"step": 66
},
{
"epoch": 0.17819148936170212,
"grad_norm": 1.0174272060394287,
"learning_rate": 1.9638921775597428e-05,
"loss": 0.2845,
"step": 67
},
{
"epoch": 0.18085106382978725,
"grad_norm": 1.241572380065918,
"learning_rate": 1.961375478517564e-05,
"loss": 0.3015,
"step": 68
},
{
"epoch": 0.18351063829787234,
"grad_norm": 1.3726611137390137,
"learning_rate": 1.958775726296706e-05,
"loss": 0.3671,
"step": 69
},
{
"epoch": 0.18617021276595744,
"grad_norm": 1.2311499118804932,
"learning_rate": 1.95609314548963e-05,
"loss": 0.2902,
"step": 70
},
{
"epoch": 0.18882978723404256,
"grad_norm": 1.3199646472930908,
"learning_rate": 1.953327967844356e-05,
"loss": 0.3594,
"step": 71
},
{
"epoch": 0.19148936170212766,
"grad_norm": 1.6513502597808838,
"learning_rate": 1.95048043224444e-05,
"loss": 0.2831,
"step": 72
},
{
"epoch": 0.19414893617021275,
"grad_norm": 1.763235330581665,
"learning_rate": 1.9475507846883377e-05,
"loss": 0.3675,
"step": 73
},
{
"epoch": 0.19680851063829788,
"grad_norm": 1.8195736408233643,
"learning_rate": 1.9445392782681523e-05,
"loss": 0.398,
"step": 74
},
{
"epoch": 0.19946808510638298,
"grad_norm": 1.9659175872802734,
"learning_rate": 1.94144617314777e-05,
"loss": 0.353,
"step": 75
},
{
"epoch": 0.20212765957446807,
"grad_norm": 1.60419762134552,
"learning_rate": 1.9382717365403854e-05,
"loss": 0.3565,
"step": 76
},
{
"epoch": 0.2047872340425532,
"grad_norm": 1.5443696975708008,
"learning_rate": 1.9350162426854152e-05,
"loss": 0.3246,
"step": 77
},
{
"epoch": 0.2074468085106383,
"grad_norm": 1.8536072969436646,
"learning_rate": 1.9316799728248074e-05,
"loss": 0.3491,
"step": 78
},
{
"epoch": 0.21010638297872342,
"grad_norm": 2.2563788890838623,
"learning_rate": 1.9282632151787462e-05,
"loss": 0.4211,
"step": 79
},
{
"epoch": 0.2127659574468085,
"grad_norm": 1.3425776958465576,
"learning_rate": 1.924766264920751e-05,
"loss": 0.3077,
"step": 80
},
{
"epoch": 0.2127659574468085,
"eval_accuracy": 0.7964705882352942,
"eval_f1": 0.3663003663003663,
"eval_loss": 0.4462856650352478,
"eval_precision": 0.6944444444444444,
"eval_recall": 0.24875621890547264,
"eval_runtime": 34.8097,
"eval_samples_per_second": 6.435,
"eval_steps_per_second": 0.201,
"step": 80
},
{
"epoch": 0.2154255319148936,
"grad_norm": 2.540194272994995,
"learning_rate": 1.9211894241521757e-05,
"loss": 0.3127,
"step": 81
},
{
"epoch": 0.21808510638297873,
"grad_norm": 1.8769720792770386,
"learning_rate": 1.917533001876113e-05,
"loss": 0.2998,
"step": 82
},
{
"epoch": 0.22074468085106383,
"grad_norm": 1.4883919954299927,
"learning_rate": 1.9137973139706973e-05,
"loss": 0.3245,
"step": 83
},
{
"epoch": 0.22340425531914893,
"grad_norm": 1.6703698635101318,
"learning_rate": 1.9099826831618168e-05,
"loss": 0.3199,
"step": 84
},
{
"epoch": 0.22606382978723405,
"grad_norm": 2.8918988704681396,
"learning_rate": 1.9060894389952328e-05,
"loss": 0.2825,
"step": 85
},
{
"epoch": 0.22872340425531915,
"grad_norm": 1.5494073629379272,
"learning_rate": 1.9021179178081107e-05,
"loss": 0.3213,
"step": 86
},
{
"epoch": 0.23138297872340424,
"grad_norm": 1.331063151359558,
"learning_rate": 1.898068462699964e-05,
"loss": 0.2572,
"step": 87
},
{
"epoch": 0.23404255319148937,
"grad_norm": 1.5478427410125732,
"learning_rate": 1.8939414235030137e-05,
"loss": 0.3001,
"step": 88
},
{
"epoch": 0.23670212765957446,
"grad_norm": 2.469545602798462,
"learning_rate": 1.889737156751965e-05,
"loss": 0.3199,
"step": 89
},
{
"epoch": 0.2393617021276596,
"grad_norm": 2.134981155395508,
"learning_rate": 1.8854560256532098e-05,
"loss": 0.3192,
"step": 90
},
{
"epoch": 0.24202127659574468,
"grad_norm": 1.727616548538208,
"learning_rate": 1.8810984000534457e-05,
"loss": 0.3072,
"step": 91
},
{
"epoch": 0.24468085106382978,
"grad_norm": 2.0483591556549072,
"learning_rate": 1.8766646564077265e-05,
"loss": 0.297,
"step": 92
},
{
"epoch": 0.2473404255319149,
"grad_norm": 1.8018875122070312,
"learning_rate": 1.8721551777469397e-05,
"loss": 0.2798,
"step": 93
},
{
"epoch": 0.25,
"grad_norm": 1.7120018005371094,
"learning_rate": 1.8675703536447178e-05,
"loss": 0.2438,
"step": 94
},
{
"epoch": 0.2526595744680851,
"grad_norm": 1.8456470966339111,
"learning_rate": 1.862910580183782e-05,
"loss": 0.333,
"step": 95
},
{
"epoch": 0.2553191489361702,
"grad_norm": 2.701077461242676,
"learning_rate": 1.858176259921724e-05,
"loss": 0.3214,
"step": 96
},
{
"epoch": 0.2579787234042553,
"grad_norm": 1.6109999418258667,
"learning_rate": 1.853367801856231e-05,
"loss": 0.2701,
"step": 97
},
{
"epoch": 0.26063829787234044,
"grad_norm": 1.524688482284546,
"learning_rate": 1.8484856213897496e-05,
"loss": 0.2455,
"step": 98
},
{
"epoch": 0.2632978723404255,
"grad_norm": 1.8650296926498413,
"learning_rate": 1.843530140293603e-05,
"loss": 0.273,
"step": 99
},
{
"epoch": 0.26595744680851063,
"grad_norm": 1.5295664072036743,
"learning_rate": 1.8385017866715507e-05,
"loss": 0.307,
"step": 100
},
{
"epoch": 0.26595744680851063,
"eval_accuracy": 0.8,
"eval_f1": 0.4097222222222222,
"eval_loss": 0.44977959990501404,
"eval_precision": 0.6781609195402298,
"eval_recall": 0.2935323383084577,
"eval_runtime": 34.7061,
"eval_samples_per_second": 6.454,
"eval_steps_per_second": 0.202,
"step": 100
},
{
"epoch": 0.26861702127659576,
"grad_norm": 2.1255381107330322,
"learning_rate": 1.833400994922806e-05,
"loss": 0.2532,
"step": 101
},
{
"epoch": 0.2712765957446808,
"grad_norm": 2.4879391193389893,
"learning_rate": 1.8282282057045087e-05,
"loss": 0.3593,
"step": 102
},
{
"epoch": 0.27393617021276595,
"grad_norm": 2.0561375617980957,
"learning_rate": 1.8229838658936566e-05,
"loss": 0.266,
"step": 103
},
{
"epoch": 0.2765957446808511,
"grad_norm": 2.101980447769165,
"learning_rate": 1.8176684285484985e-05,
"loss": 0.3686,
"step": 104
},
{
"epoch": 0.27925531914893614,
"grad_norm": 2.0041894912719727,
"learning_rate": 1.8122823528693966e-05,
"loss": 0.2551,
"step": 105
},
{
"epoch": 0.28191489361702127,
"grad_norm": 1.981961727142334,
"learning_rate": 1.8068261041591548e-05,
"loss": 0.2932,
"step": 106
},
{
"epoch": 0.2845744680851064,
"grad_norm": 2.636021614074707,
"learning_rate": 1.8013001537828213e-05,
"loss": 0.2584,
"step": 107
},
{
"epoch": 0.2872340425531915,
"grad_norm": 2.6354217529296875,
"learning_rate": 1.7957049791269684e-05,
"loss": 0.3208,
"step": 108
},
{
"epoch": 0.2898936170212766,
"grad_norm": 3.6334121227264404,
"learning_rate": 1.79004106355845e-05,
"loss": 0.3142,
"step": 109
},
{
"epoch": 0.2925531914893617,
"grad_norm": 2.6944894790649414,
"learning_rate": 1.7843088963826437e-05,
"loss": 0.2854,
"step": 110
},
{
"epoch": 0.29521276595744683,
"grad_norm": 4.576889514923096,
"learning_rate": 1.7785089728011798e-05,
"loss": 0.2685,
"step": 111
},
{
"epoch": 0.2978723404255319,
"grad_norm": 2.23494029045105,
"learning_rate": 1.772641793869162e-05,
"loss": 0.2604,
"step": 112
},
{
"epoch": 0.300531914893617,
"grad_norm": 3.0733425617218018,
"learning_rate": 1.7667078664518796e-05,
"loss": 0.2542,
"step": 113
},
{
"epoch": 0.30319148936170215,
"grad_norm": 1.9046289920806885,
"learning_rate": 1.7607077031810204e-05,
"loss": 0.2879,
"step": 114
},
{
"epoch": 0.3058510638297872,
"grad_norm": 2.2374041080474854,
"learning_rate": 1.7546418224103838e-05,
"loss": 0.2998,
"step": 115
},
{
"epoch": 0.30851063829787234,
"grad_norm": 5.9824395179748535,
"learning_rate": 1.7485107481711014e-05,
"loss": 0.3637,
"step": 116
},
{
"epoch": 0.31117021276595747,
"grad_norm": 3.0998919010162354,
"learning_rate": 1.7423150101263645e-05,
"loss": 0.2746,
"step": 117
},
{
"epoch": 0.31382978723404253,
"grad_norm": 2.05523419380188,
"learning_rate": 1.7360551435256673e-05,
"loss": 0.2776,
"step": 118
},
{
"epoch": 0.31648936170212766,
"grad_norm": 2.1908273696899414,
"learning_rate": 1.729731689158568e-05,
"loss": 0.3184,
"step": 119
},
{
"epoch": 0.3191489361702128,
"grad_norm": 2.3177342414855957,
"learning_rate": 1.7233451933079663e-05,
"loss": 0.2413,
"step": 120
},
{
"epoch": 0.3191489361702128,
"eval_accuracy": 0.8141176470588235,
"eval_f1": 0.48026315789473684,
"eval_loss": 0.43155437707901,
"eval_precision": 0.7087378640776699,
"eval_recall": 0.36318407960199006,
"eval_runtime": 34.0012,
"eval_samples_per_second": 6.588,
"eval_steps_per_second": 0.206,
"step": 120
},
{
"epoch": 0.32180851063829785,
"grad_norm": 2.1571784019470215,
"learning_rate": 1.7168962077029146e-05,
"loss": 0.3229,
"step": 121
},
{
"epoch": 0.324468085106383,
"grad_norm": 3.056910991668701,
"learning_rate": 1.7103852894709517e-05,
"loss": 0.3116,
"step": 122
},
{
"epoch": 0.3271276595744681,
"grad_norm": 1.9665093421936035,
"learning_rate": 1.7038130010899716e-05,
"loss": 0.2743,
"step": 123
},
{
"epoch": 0.32978723404255317,
"grad_norm": 2.3583879470825195,
"learning_rate": 1.6971799103396332e-05,
"loss": 0.2776,
"step": 124
},
{
"epoch": 0.3324468085106383,
"grad_norm": 2.8476576805114746,
"learning_rate": 1.6904865902523098e-05,
"loss": 0.3213,
"step": 125
},
{
"epoch": 0.3351063829787234,
"grad_norm": 1.9458303451538086,
"learning_rate": 1.6837336190635824e-05,
"loss": 0.2771,
"step": 126
},
{
"epoch": 0.3377659574468085,
"grad_norm": 2.4472289085388184,
"learning_rate": 1.6769215801622884e-05,
"loss": 0.2924,
"step": 127
},
{
"epoch": 0.3404255319148936,
"grad_norm": 2.520463228225708,
"learning_rate": 1.6700510620401223e-05,
"loss": 0.269,
"step": 128
},
{
"epoch": 0.34308510638297873,
"grad_norm": 2.2465851306915283,
"learning_rate": 1.6631226582407954e-05,
"loss": 0.3043,
"step": 129
},
{
"epoch": 0.34574468085106386,
"grad_norm": 2.4705588817596436,
"learning_rate": 1.6561369673087588e-05,
"loss": 0.3375,
"step": 130
},
{
"epoch": 0.3484042553191489,
"grad_norm": 2.332902669906616,
"learning_rate": 1.649094592737497e-05,
"loss": 0.2313,
"step": 131
},
{
"epoch": 0.35106382978723405,
"grad_norm": 2.050671100616455,
"learning_rate": 1.641996142917391e-05,
"loss": 0.3066,
"step": 132
},
{
"epoch": 0.3537234042553192,
"grad_norm": 3.541461706161499,
"learning_rate": 1.63484223108316e-05,
"loss": 0.2937,
"step": 133
},
{
"epoch": 0.35638297872340424,
"grad_norm": 2.344451665878296,
"learning_rate": 1.6276334752608823e-05,
"loss": 0.2666,
"step": 134
},
{
"epoch": 0.35904255319148937,
"grad_norm": 2.1711394786834717,
"learning_rate": 1.6203704982146073e-05,
"loss": 0.2457,
"step": 135
},
{
"epoch": 0.3617021276595745,
"grad_norm": 3.414870023727417,
"learning_rate": 1.613053927392553e-05,
"loss": 0.331,
"step": 136
},
{
"epoch": 0.36436170212765956,
"grad_norm": 3.037440299987793,
"learning_rate": 1.6056843948729e-05,
"loss": 0.3025,
"step": 137
},
{
"epoch": 0.3670212765957447,
"grad_norm": 3.548393726348877,
"learning_rate": 1.5982625373091877e-05,
"loss": 0.3203,
"step": 138
},
{
"epoch": 0.3696808510638298,
"grad_norm": 2.598219633102417,
"learning_rate": 1.5907889958753134e-05,
"loss": 0.3155,
"step": 139
},
{
"epoch": 0.3723404255319149,
"grad_norm": 2.790419101715088,
"learning_rate": 1.5832644162101417e-05,
"loss": 0.326,
"step": 140
},
{
"epoch": 0.3723404255319149,
"eval_accuracy": 0.8235294117647058,
"eval_f1": 0.5222929936305732,
"eval_loss": 0.4106709063053131,
"eval_precision": 0.7256637168141593,
"eval_recall": 0.4079601990049751,
"eval_runtime": 33.9867,
"eval_samples_per_second": 6.591,
"eval_steps_per_second": 0.206,
"step": 140
},
{
"epoch": 0.375,
"grad_norm": 3.642287492752075,
"learning_rate": 1.5756894483617268e-05,
"loss": 0.2809,
"step": 141
},
{
"epoch": 0.3776595744680851,
"grad_norm": 2.40323805809021,
"learning_rate": 1.568064746731156e-05,
"loss": 0.2835,
"step": 142
},
{
"epoch": 0.3803191489361702,
"grad_norm": 1.9183332920074463,
"learning_rate": 1.560390970016015e-05,
"loss": 0.2534,
"step": 143
},
{
"epoch": 0.3829787234042553,
"grad_norm": 3.2929575443267822,
"learning_rate": 1.552668781153484e-05,
"loss": 0.373,
"step": 144
},
{
"epoch": 0.38563829787234044,
"grad_norm": 2.27150559425354,
"learning_rate": 1.5448988472630654e-05,
"loss": 0.2783,
"step": 145
},
{
"epoch": 0.3882978723404255,
"grad_norm": 2.780089855194092,
"learning_rate": 1.5370818395889536e-05,
"loss": 0.322,
"step": 146
},
{
"epoch": 0.39095744680851063,
"grad_norm": 2.2651729583740234,
"learning_rate": 1.5292184334420434e-05,
"loss": 0.3145,
"step": 147
},
{
"epoch": 0.39361702127659576,
"grad_norm": 2.8416588306427,
"learning_rate": 1.521309308141592e-05,
"loss": 0.2979,
"step": 148
},
{
"epoch": 0.3962765957446808,
"grad_norm": 2.6914663314819336,
"learning_rate": 1.5133551469565313e-05,
"loss": 0.3314,
"step": 149
},
{
"epoch": 0.39893617021276595,
"grad_norm": 4.730180740356445,
"learning_rate": 1.5053566370464416e-05,
"loss": 0.2545,
"step": 150
},
{
"epoch": 0.4015957446808511,
"grad_norm": 2.2047128677368164,
"learning_rate": 1.4973144694021874e-05,
"loss": 0.2487,
"step": 151
},
{
"epoch": 0.40425531914893614,
"grad_norm": 2.841487407684326,
"learning_rate": 1.4892293387862221e-05,
"loss": 0.3067,
"step": 152
},
{
"epoch": 0.40691489361702127,
"grad_norm": 5.28929328918457,
"learning_rate": 1.4811019436725684e-05,
"loss": 0.242,
"step": 153
},
{
"epoch": 0.4095744680851064,
"grad_norm": 3.347501039505005,
"learning_rate": 1.472932986186477e-05,
"loss": 0.207,
"step": 154
},
{
"epoch": 0.4122340425531915,
"grad_norm": 3.1569905281066895,
"learning_rate": 1.4647231720437687e-05,
"loss": 0.3062,
"step": 155
},
{
"epoch": 0.4148936170212766,
"grad_norm": 2.134598970413208,
"learning_rate": 1.4564732104898702e-05,
"loss": 0.2443,
"step": 156
},
{
"epoch": 0.4175531914893617,
"grad_norm": 2.528136968612671,
"learning_rate": 1.4481838142385403e-05,
"loss": 0.2308,
"step": 157
},
{
"epoch": 0.42021276595744683,
"grad_norm": 2.756695032119751,
"learning_rate": 1.4398556994102996e-05,
"loss": 0.2461,
"step": 158
},
{
"epoch": 0.4228723404255319,
"grad_norm": 4.9117631912231445,
"learning_rate": 1.4314895854705641e-05,
"loss": 0.2911,
"step": 159
},
{
"epoch": 0.425531914893617,
"grad_norm": 2.877560615539551,
"learning_rate": 1.4230861951674914e-05,
"loss": 0.2404,
"step": 160
},
{
"epoch": 0.425531914893617,
"eval_accuracy": 0.8094117647058824,
"eval_f1": 0.40875912408759124,
"eval_loss": 0.46145251393318176,
"eval_precision": 0.7671232876712328,
"eval_recall": 0.27860696517412936,
"eval_runtime": 34.0326,
"eval_samples_per_second": 6.582,
"eval_steps_per_second": 0.206,
"step": 160
},
{
"epoch": 0.42819148936170215,
"grad_norm": 4.159635066986084,
"learning_rate": 1.4146462544695428e-05,
"loss": 0.2858,
"step": 161
},
{
"epoch": 0.4308510638297872,
"grad_norm": 2.716390609741211,
"learning_rate": 1.4061704925027653e-05,
"loss": 0.2299,
"step": 162
},
{
"epoch": 0.43351063829787234,
"grad_norm": 2.3737223148345947,
"learning_rate": 1.3976596414878044e-05,
"loss": 0.2371,
"step": 163
},
{
"epoch": 0.43617021276595747,
"grad_norm": 3.5703928470611572,
"learning_rate": 1.3891144366766457e-05,
"loss": 0.3007,
"step": 164
},
{
"epoch": 0.43882978723404253,
"grad_norm": 2.449308156967163,
"learning_rate": 1.380535616289099e-05,
"loss": 0.2414,
"step": 165
},
{
"epoch": 0.44148936170212766,
"grad_norm": 3.272531509399414,
"learning_rate": 1.3719239214490203e-05,
"loss": 0.2961,
"step": 166
},
{
"epoch": 0.4441489361702128,
"grad_norm": 3.6306636333465576,
"learning_rate": 1.363280096120289e-05,
"loss": 0.2923,
"step": 167
},
{
"epoch": 0.44680851063829785,
"grad_norm": 2.5956878662109375,
"learning_rate": 1.3546048870425356e-05,
"loss": 0.251,
"step": 168
},
{
"epoch": 0.449468085106383,
"grad_norm": 5.468013286590576,
"learning_rate": 1.3458990436666313e-05,
"loss": 0.287,
"step": 169
},
{
"epoch": 0.4521276595744681,
"grad_norm": 2.5763583183288574,
"learning_rate": 1.3371633180899417e-05,
"loss": 0.2779,
"step": 170
},
{
"epoch": 0.45478723404255317,
"grad_norm": 3.8822455406188965,
"learning_rate": 1.3283984649913552e-05,
"loss": 0.2197,
"step": 171
},
{
"epoch": 0.4574468085106383,
"grad_norm": 2.4867823123931885,
"learning_rate": 1.3196052415660856e-05,
"loss": 0.2875,
"step": 172
},
{
"epoch": 0.4601063829787234,
"grad_norm": 2.161820888519287,
"learning_rate": 1.3107844074602566e-05,
"loss": 0.2416,
"step": 173
},
{
"epoch": 0.4627659574468085,
"grad_norm": 3.0401649475097656,
"learning_rate": 1.3019367247052781e-05,
"loss": 0.2634,
"step": 174
},
{
"epoch": 0.4654255319148936,
"grad_norm": 2.273088216781616,
"learning_rate": 1.2930629576520133e-05,
"loss": 0.2709,
"step": 175
},
{
"epoch": 0.46808510638297873,
"grad_norm": 3.001025438308716,
"learning_rate": 1.2841638729047463e-05,
"loss": 0.2806,
"step": 176
},
{
"epoch": 0.47074468085106386,
"grad_norm": 2.348917245864868,
"learning_rate": 1.2752402392549556e-05,
"loss": 0.2702,
"step": 177
},
{
"epoch": 0.4734042553191489,
"grad_norm": 2.713019847869873,
"learning_rate": 1.2662928276148985e-05,
"loss": 0.2588,
"step": 178
},
{
"epoch": 0.47606382978723405,
"grad_norm": 3.061501979827881,
"learning_rate": 1.2573224109510112e-05,
"loss": 0.2701,
"step": 179
},
{
"epoch": 0.4787234042553192,
"grad_norm": 5.120430946350098,
"learning_rate": 1.2483297642171332e-05,
"loss": 0.2962,
"step": 180
},
{
"epoch": 0.4787234042553192,
"eval_accuracy": 0.8282352941176471,
"eval_f1": 0.5228758169934641,
"eval_loss": 0.42048707604408264,
"eval_precision": 0.7619047619047619,
"eval_recall": 0.39800995024875624,
"eval_runtime": 34.4467,
"eval_samples_per_second": 6.503,
"eval_steps_per_second": 0.203,
"step": 180
},
{
"epoch": 0.48138297872340424,
"grad_norm": 2.8563108444213867,
"learning_rate": 1.2393156642875579e-05,
"loss": 0.2855,
"step": 181
},
{
"epoch": 0.48404255319148937,
"grad_norm": 3.6837549209594727,
"learning_rate": 1.23028088988992e-05,
"loss": 0.2976,
"step": 182
},
{
"epoch": 0.4867021276595745,
"grad_norm": 3.085362434387207,
"learning_rate": 1.2212262215379199e-05,
"loss": 0.2775,
"step": 183
},
{
"epoch": 0.48936170212765956,
"grad_norm": 3.395561695098877,
"learning_rate": 1.2121524414638958e-05,
"loss": 0.3076,
"step": 184
},
{
"epoch": 0.4920212765957447,
"grad_norm": 3.6867411136627197,
"learning_rate": 1.2030603335512467e-05,
"loss": 0.2402,
"step": 185
},
{
"epoch": 0.4946808510638298,
"grad_norm": 5.76826810836792,
"learning_rate": 1.1939506832667129e-05,
"loss": 0.2715,
"step": 186
},
{
"epoch": 0.4973404255319149,
"grad_norm": 3.938023328781128,
"learning_rate": 1.1848242775925188e-05,
"loss": 0.2773,
"step": 187
},
{
"epoch": 0.5,
"grad_norm": 3.6675262451171875,
"learning_rate": 1.1756819049583861e-05,
"loss": 0.2752,
"step": 188
},
{
"epoch": 0.5026595744680851,
"grad_norm": 2.274174213409424,
"learning_rate": 1.166524355173422e-05,
"loss": 0.2545,
"step": 189
},
{
"epoch": 0.5053191489361702,
"grad_norm": 3.854417562484741,
"learning_rate": 1.1573524193578863e-05,
"loss": 0.2804,
"step": 190
},
{
"epoch": 0.5079787234042553,
"grad_norm": 5.1708550453186035,
"learning_rate": 1.1481668898748474e-05,
"loss": 0.2371,
"step": 191
},
{
"epoch": 0.5106382978723404,
"grad_norm": 4.153345584869385,
"learning_rate": 1.1389685602617302e-05,
"loss": 0.2405,
"step": 192
},
{
"epoch": 0.5132978723404256,
"grad_norm": 3.244084119796753,
"learning_rate": 1.1297582251617618e-05,
"loss": 0.2737,
"step": 193
},
{
"epoch": 0.5159574468085106,
"grad_norm": 2.50569486618042,
"learning_rate": 1.1205366802553231e-05,
"loss": 0.2647,
"step": 194
},
{
"epoch": 0.5186170212765957,
"grad_norm": 2.3251872062683105,
"learning_rate": 1.1113047221912097e-05,
"loss": 0.1958,
"step": 195
},
{
"epoch": 0.5212765957446809,
"grad_norm": 2.288127899169922,
"learning_rate": 1.1020631485178084e-05,
"loss": 0.2109,
"step": 196
},
{
"epoch": 0.523936170212766,
"grad_norm": 4.095820426940918,
"learning_rate": 1.0928127576141992e-05,
"loss": 0.2998,
"step": 197
},
{
"epoch": 0.526595744680851,
"grad_norm": 5.008273601531982,
"learning_rate": 1.0835543486211815e-05,
"loss": 0.2841,
"step": 198
},
{
"epoch": 0.5292553191489362,
"grad_norm": 5.711911678314209,
"learning_rate": 1.0742887213722372e-05,
"loss": 0.2488,
"step": 199
},
{
"epoch": 0.5319148936170213,
"grad_norm": 5.29080867767334,
"learning_rate": 1.065016676324433e-05,
"loss": 0.2727,
"step": 200
},
{
"epoch": 0.5319148936170213,
"eval_accuracy": 0.8,
"eval_f1": 0.34615384615384615,
"eval_loss": 0.4829849600791931,
"eval_precision": 0.7627118644067796,
"eval_recall": 0.22388059701492538,
"eval_runtime": 33.8087,
"eval_samples_per_second": 6.626,
"eval_steps_per_second": 0.207,
"step": 200
},
{
"epoch": 0.5345744680851063,
"grad_norm": 6.333003044128418,
"learning_rate": 1.0557390144892684e-05,
"loss": 0.3334,
"step": 201
},
{
"epoch": 0.5372340425531915,
"grad_norm": 2.1432178020477295,
"learning_rate": 1.0464565373634784e-05,
"loss": 0.2513,
"step": 202
},
{
"epoch": 0.5398936170212766,
"grad_norm": 5.119022369384766,
"learning_rate": 1.0371700468597886e-05,
"loss": 0.2566,
"step": 203
},
{
"epoch": 0.5425531914893617,
"grad_norm": 3.5691733360290527,
"learning_rate": 1.0278803452376416e-05,
"loss": 0.3084,
"step": 204
},
{
"epoch": 0.5452127659574468,
"grad_norm": 3.0961036682128906,
"learning_rate": 1.018588235033888e-05,
"loss": 0.2085,
"step": 205
},
{
"epoch": 0.5478723404255319,
"grad_norm": 2.27486515045166,
"learning_rate": 1.0092945189934558e-05,
"loss": 0.2524,
"step": 206
},
{
"epoch": 0.550531914893617,
"grad_norm": 2.3716437816619873,
"learning_rate": 1e-05,
"loss": 0.2011,
"step": 207
},
{
"epoch": 0.5531914893617021,
"grad_norm": 2.6007697582244873,
"learning_rate": 9.907054810065446e-06,
"loss": 0.2451,
"step": 208
},
{
"epoch": 0.5558510638297872,
"grad_norm": 2.5963995456695557,
"learning_rate": 9.81411764966112e-06,
"loss": 0.2705,
"step": 209
},
{
"epoch": 0.5585106382978723,
"grad_norm": 2.1203646659851074,
"learning_rate": 9.721196547623585e-06,
"loss": 0.2101,
"step": 210
},
{
"epoch": 0.5611702127659575,
"grad_norm": 3.2986724376678467,
"learning_rate": 9.628299531402118e-06,
"loss": 0.2659,
"step": 211
},
{
"epoch": 0.5638297872340425,
"grad_norm": 2.127525568008423,
"learning_rate": 9.535434626365221e-06,
"loss": 0.251,
"step": 212
},
{
"epoch": 0.5664893617021277,
"grad_norm": 3.1327059268951416,
"learning_rate": 9.442609855107317e-06,
"loss": 0.2255,
"step": 213
},
{
"epoch": 0.5691489361702128,
"grad_norm": 2.0999770164489746,
"learning_rate": 9.349833236755675e-06,
"loss": 0.2549,
"step": 214
},
{
"epoch": 0.5718085106382979,
"grad_norm": 2.7766880989074707,
"learning_rate": 9.257112786277631e-06,
"loss": 0.2224,
"step": 215
},
{
"epoch": 0.574468085106383,
"grad_norm": 2.451842784881592,
"learning_rate": 9.164456513788186e-06,
"loss": 0.2599,
"step": 216
},
{
"epoch": 0.5771276595744681,
"grad_norm": 2.7746975421905518,
"learning_rate": 9.07187242385801e-06,
"loss": 0.2601,
"step": 217
},
{
"epoch": 0.5797872340425532,
"grad_norm": 2.561441421508789,
"learning_rate": 8.979368514821917e-06,
"loss": 0.284,
"step": 218
},
{
"epoch": 0.5824468085106383,
"grad_norm": 2.425262928009033,
"learning_rate": 8.88695277808791e-06,
"loss": 0.2593,
"step": 219
},
{
"epoch": 0.5851063829787234,
"grad_norm": 3.180457830429077,
"learning_rate": 8.79463319744677e-06,
"loss": 0.2844,
"step": 220
},
{
"epoch": 0.5851063829787234,
"eval_accuracy": 0.8258823529411765,
"eval_f1": 0.5163398692810458,
"eval_loss": 0.41871950030326843,
"eval_precision": 0.7523809523809524,
"eval_recall": 0.39303482587064675,
"eval_runtime": 34.0471,
"eval_samples_per_second": 6.579,
"eval_steps_per_second": 0.206,
"step": 220
},
{
"epoch": 0.5877659574468085,
"grad_norm": 2.8783645629882812,
"learning_rate": 8.702417748382384e-06,
"loss": 0.2458,
"step": 221
},
{
"epoch": 0.5904255319148937,
"grad_norm": 2.950291395187378,
"learning_rate": 8.610314397382701e-06,
"loss": 0.3062,
"step": 222
},
{
"epoch": 0.5930851063829787,
"grad_norm": 2.8430628776550293,
"learning_rate": 8.51833110125153e-06,
"loss": 0.2913,
"step": 223
},
{
"epoch": 0.5957446808510638,
"grad_norm": 6.691501617431641,
"learning_rate": 8.426475806421139e-06,
"loss": 0.3716,
"step": 224
},
{
"epoch": 0.598404255319149,
"grad_norm": 2.705397367477417,
"learning_rate": 8.334756448265782e-06,
"loss": 0.2692,
"step": 225
},
{
"epoch": 0.601063829787234,
"grad_norm": 2.276686429977417,
"learning_rate": 8.243180950416142e-06,
"loss": 0.214,
"step": 226
},
{
"epoch": 0.6037234042553191,
"grad_norm": 4.622035980224609,
"learning_rate": 8.151757224074815e-06,
"loss": 0.1863,
"step": 227
},
{
"epoch": 0.6063829787234043,
"grad_norm": 2.3402657508850098,
"learning_rate": 8.060493167332874e-06,
"loss": 0.2895,
"step": 228
},
{
"epoch": 0.6090425531914894,
"grad_norm": 4.533783912658691,
"learning_rate": 7.969396664487534e-06,
"loss": 0.256,
"step": 229
},
{
"epoch": 0.6117021276595744,
"grad_norm": 4.254709243774414,
"learning_rate": 7.878475585361045e-06,
"loss": 0.2798,
"step": 230
},
{
"epoch": 0.6143617021276596,
"grad_norm": 2.4173777103424072,
"learning_rate": 7.787737784620803e-06,
"loss": 0.3046,
"step": 231
},
{
"epoch": 0.6170212765957447,
"grad_norm": 2.9640042781829834,
"learning_rate": 7.697191101100802e-06,
"loss": 0.2893,
"step": 232
},
{
"epoch": 0.6196808510638298,
"grad_norm": 2.9573986530303955,
"learning_rate": 7.606843357124426e-06,
"loss": 0.2764,
"step": 233
},
{
"epoch": 0.6223404255319149,
"grad_norm": 3.9960691928863525,
"learning_rate": 7.516702357828672e-06,
"loss": 0.3243,
"step": 234
},
{
"epoch": 0.625,
"grad_norm": 2.9117209911346436,
"learning_rate": 7.42677589048989e-06,
"loss": 0.2863,
"step": 235
},
{
"epoch": 0.6276595744680851,
"grad_norm": 2.57856822013855,
"learning_rate": 7.337071723851018e-06,
"loss": 0.2433,
"step": 236
},
{
"epoch": 0.6303191489361702,
"grad_norm": 3.1635406017303467,
"learning_rate": 7.247597607450446e-06,
"loss": 0.2622,
"step": 237
},
{
"epoch": 0.6329787234042553,
"grad_norm": 3.4039433002471924,
"learning_rate": 7.1583612709525405e-06,
"loss": 0.2313,
"step": 238
},
{
"epoch": 0.6356382978723404,
"grad_norm": 3.072800397872925,
"learning_rate": 7.06937042347987e-06,
"loss": 0.3117,
"step": 239
},
{
"epoch": 0.6382978723404256,
"grad_norm": 3.175246000289917,
"learning_rate": 6.980632752947221e-06,
"loss": 0.2632,
"step": 240
},
{
"epoch": 0.6382978723404256,
"eval_accuracy": 0.8235294117647058,
"eval_f1": 0.5161290322580645,
"eval_loss": 0.4037013053894043,
"eval_precision": 0.7339449541284404,
"eval_recall": 0.39800995024875624,
"eval_runtime": 34.0215,
"eval_samples_per_second": 6.584,
"eval_steps_per_second": 0.206,
"step": 240
},
{
"epoch": 0.6409574468085106,
"grad_norm": 2.5714304447174072,
"learning_rate": 6.892155925397437e-06,
"loss": 0.2749,
"step": 241
},
{
"epoch": 0.6436170212765957,
"grad_norm": 3.128525733947754,
"learning_rate": 6.803947584339148e-06,
"loss": 0.3527,
"step": 242
},
{
"epoch": 0.6462765957446809,
"grad_norm": 3.6604840755462646,
"learning_rate": 6.716015350086449e-06,
"loss": 0.2686,
"step": 243
},
{
"epoch": 0.648936170212766,
"grad_norm": 2.6133296489715576,
"learning_rate": 6.628366819100586e-06,
"loss": 0.2836,
"step": 244
},
{
"epoch": 0.651595744680851,
"grad_norm": 2.5161774158477783,
"learning_rate": 6.54100956333369e-06,
"loss": 0.2395,
"step": 245
},
{
"epoch": 0.6542553191489362,
"grad_norm": 2.824259042739868,
"learning_rate": 6.453951129574644e-06,
"loss": 0.2906,
"step": 246
},
{
"epoch": 0.6569148936170213,
"grad_norm": 2.747422456741333,
"learning_rate": 6.3671990387971096e-06,
"loss": 0.2368,
"step": 247
},
{
"epoch": 0.6595744680851063,
"grad_norm": 2.540599822998047,
"learning_rate": 6.280760785509802e-06,
"loss": 0.3036,
"step": 248
},
{
"epoch": 0.6622340425531915,
"grad_norm": 2.4649527072906494,
"learning_rate": 6.194643837109015e-06,
"loss": 0.2935,
"step": 249
},
{
"epoch": 0.6648936170212766,
"grad_norm": 2.2564632892608643,
"learning_rate": 6.108855633233546e-06,
"loss": 0.2276,
"step": 250
},
{
"epoch": 0.6675531914893617,
"grad_norm": 2.5052363872528076,
"learning_rate": 6.0234035851219604e-06,
"loss": 0.2464,
"step": 251
},
{
"epoch": 0.6702127659574468,
"grad_norm": 3.091642141342163,
"learning_rate": 5.93829507497235e-06,
"loss": 0.2766,
"step": 252
},
{
"epoch": 0.6728723404255319,
"grad_norm": 3.3672595024108887,
"learning_rate": 5.853537455304575e-06,
"loss": 0.2567,
"step": 253
},
{
"epoch": 0.675531914893617,
"grad_norm": 2.4779727458953857,
"learning_rate": 5.769138048325087e-06,
"loss": 0.2628,
"step": 254
},
{
"epoch": 0.6781914893617021,
"grad_norm": 2.5639469623565674,
"learning_rate": 5.685104145294364e-06,
"loss": 0.2204,
"step": 255
},
{
"epoch": 0.6808510638297872,
"grad_norm": 3.3351776599884033,
"learning_rate": 5.601443005897012e-06,
"loss": 0.2535,
"step": 256
},
{
"epoch": 0.6835106382978723,
"grad_norm": 2.3642754554748535,
"learning_rate": 5.5181618576146e-06,
"loss": 0.2234,
"step": 257
},
{
"epoch": 0.6861702127659575,
"grad_norm": 2.9997129440307617,
"learning_rate": 5.435267895101303e-06,
"loss": 0.2643,
"step": 258
},
{
"epoch": 0.6888297872340425,
"grad_norm": 2.4532787799835205,
"learning_rate": 5.352768279562315e-06,
"loss": 0.2621,
"step": 259
},
{
"epoch": 0.6914893617021277,
"grad_norm": 2.572538137435913,
"learning_rate": 5.270670138135234e-06,
"loss": 0.2499,
"step": 260
},
{
"epoch": 0.6914893617021277,
"eval_accuracy": 0.8247058823529412,
"eval_f1": 0.5299684542586751,
"eval_loss": 0.3885125517845154,
"eval_precision": 0.7241379310344828,
"eval_recall": 0.417910447761194,
"eval_runtime": 33.8843,
"eval_samples_per_second": 6.611,
"eval_steps_per_second": 0.207,
"step": 260
},
{
"epoch": 0.6941489361702128,
"grad_norm": 2.906144618988037,
"learning_rate": 5.188980563274315e-06,
"loss": 0.3095,
"step": 261
},
{
"epoch": 0.6968085106382979,
"grad_norm": 2.319133996963501,
"learning_rate": 5.107706612137776e-06,
"loss": 0.2388,
"step": 262
},
{
"epoch": 0.699468085106383,
"grad_norm": 3.162642478942871,
"learning_rate": 5.026855305978129e-06,
"loss": 0.2462,
"step": 263
},
{
"epoch": 0.7021276595744681,
"grad_norm": 2.749540090560913,
"learning_rate": 4.946433629535585e-06,
"loss": 0.2659,
"step": 264
},
{
"epoch": 0.7047872340425532,
"grad_norm": 2.891836643218994,
"learning_rate": 4.866448530434692e-06,
"loss": 0.2332,
"step": 265
},
{
"epoch": 0.7074468085106383,
"grad_norm": 2.4717514514923096,
"learning_rate": 4.786906918584083e-06,
"loss": 0.2136,
"step": 266
},
{
"epoch": 0.7101063829787234,
"grad_norm": 2.679591655731201,
"learning_rate": 4.707815665579569e-06,
"loss": 0.3036,
"step": 267
},
{
"epoch": 0.7127659574468085,
"grad_norm": 2.3344614505767822,
"learning_rate": 4.629181604110464e-06,
"loss": 0.2853,
"step": 268
},
{
"epoch": 0.7154255319148937,
"grad_norm": 2.839320182800293,
"learning_rate": 4.551011527369348e-06,
"loss": 0.2394,
"step": 269
},
{
"epoch": 0.7180851063829787,
"grad_norm": 2.27245831489563,
"learning_rate": 4.4733121884651665e-06,
"loss": 0.2496,
"step": 270
},
{
"epoch": 0.7207446808510638,
"grad_norm": 3.038536548614502,
"learning_rate": 4.3960902998398524e-06,
"loss": 0.2787,
"step": 271
},
{
"epoch": 0.723404255319149,
"grad_norm": 3.1204025745391846,
"learning_rate": 4.319352532688444e-06,
"loss": 0.2678,
"step": 272
},
{
"epoch": 0.726063829787234,
"grad_norm": 3.8436288833618164,
"learning_rate": 4.243105516382732e-06,
"loss": 0.2405,
"step": 273
},
{
"epoch": 0.7287234042553191,
"grad_norm": 3.1559836864471436,
"learning_rate": 4.167355837898585e-06,
"loss": 0.2881,
"step": 274
},
{
"epoch": 0.7313829787234043,
"grad_norm": 2.5084681510925293,
"learning_rate": 4.092110041246865e-06,
"loss": 0.2365,
"step": 275
},
{
"epoch": 0.7340425531914894,
"grad_norm": 3.0584487915039062,
"learning_rate": 4.017374626908125e-06,
"loss": 0.2808,
"step": 276
},
{
"epoch": 0.7367021276595744,
"grad_norm": 3.6234519481658936,
"learning_rate": 3.943156051271003e-06,
"loss": 0.2993,
"step": 277
},
{
"epoch": 0.7393617021276596,
"grad_norm": 1.8584307432174683,
"learning_rate": 3.8694607260744745e-06,
"loss": 0.2012,
"step": 278
},
{
"epoch": 0.7420212765957447,
"grad_norm": 2.4248085021972656,
"learning_rate": 3.7962950178539282e-06,
"loss": 0.2352,
"step": 279
},
{
"epoch": 0.7446808510638298,
"grad_norm": 2.5359675884246826,
"learning_rate": 3.7236652473911817e-06,
"loss": 0.2121,
"step": 280
},
{
"epoch": 0.7446808510638298,
"eval_accuracy": 0.8223529411764706,
"eval_f1": 0.5175718849840255,
"eval_loss": 0.3953240215778351,
"eval_precision": 0.7232142857142857,
"eval_recall": 0.40298507462686567,
"eval_runtime": 34.1139,
"eval_samples_per_second": 6.566,
"eval_steps_per_second": 0.205,
"step": 280
},
{
"epoch": 0.7473404255319149,
"grad_norm": 2.3844354152679443,
"learning_rate": 3.651577689168405e-06,
"loss": 0.2212,
"step": 281
},
{
"epoch": 0.75,
"grad_norm": 2.8635263442993164,
"learning_rate": 3.580038570826093e-06,
"loss": 0.2259,
"step": 282
},
{
"epoch": 0.7526595744680851,
"grad_norm": 3.1672933101654053,
"learning_rate": 3.509054072625031e-06,
"loss": 0.2691,
"step": 283
},
{
"epoch": 0.7553191489361702,
"grad_norm": 3.298377752304077,
"learning_rate": 3.4386303269124142e-06,
"loss": 0.261,
"step": 284
},
{
"epoch": 0.7579787234042553,
"grad_norm": 3.3718481063842773,
"learning_rate": 3.3687734175920505e-06,
"loss": 0.2842,
"step": 285
},
{
"epoch": 0.7606382978723404,
"grad_norm": 2.822702646255493,
"learning_rate": 3.299489379598777e-06,
"loss": 0.2416,
"step": 286
},
{
"epoch": 0.7632978723404256,
"grad_norm": 3.209895372390747,
"learning_rate": 3.2307841983771182e-06,
"loss": 0.2706,
"step": 287
},
{
"epoch": 0.7659574468085106,
"grad_norm": 2.953824996948242,
"learning_rate": 3.162663809364178e-06,
"loss": 0.2629,
"step": 288
},
{
"epoch": 0.7686170212765957,
"grad_norm": 4.190698623657227,
"learning_rate": 3.095134097476904e-06,
"loss": 0.2609,
"step": 289
},
{
"epoch": 0.7712765957446809,
"grad_norm": 4.36337423324585,
"learning_rate": 3.0282008966036647e-06,
"loss": 0.2549,
"step": 290
},
{
"epoch": 0.773936170212766,
"grad_norm": 2.8681600093841553,
"learning_rate": 2.9618699891002843e-06,
"loss": 0.2464,
"step": 291
},
{
"epoch": 0.776595744680851,
"grad_norm": 3.781843900680542,
"learning_rate": 2.8961471052904855e-06,
"loss": 0.3261,
"step": 292
},
{
"epoch": 0.7792553191489362,
"grad_norm": 3.1815481185913086,
"learning_rate": 2.831037922970855e-06,
"loss": 0.2659,
"step": 293
},
{
"epoch": 0.7819148936170213,
"grad_norm": 3.2825517654418945,
"learning_rate": 2.7665480669203383e-06,
"loss": 0.2239,
"step": 294
},
{
"epoch": 0.7845744680851063,
"grad_norm": 2.418006420135498,
"learning_rate": 2.702683108414326e-06,
"loss": 0.2476,
"step": 295
},
{
"epoch": 0.7872340425531915,
"grad_norm": 3.483743906021118,
"learning_rate": 2.639448564743328e-06,
"loss": 0.2306,
"step": 296
},
{
"epoch": 0.7898936170212766,
"grad_norm": 3.201629638671875,
"learning_rate": 2.57684989873636e-06,
"loss": 0.2562,
"step": 297
},
{
"epoch": 0.7925531914893617,
"grad_norm": 2.7855303287506104,
"learning_rate": 2.514892518288988e-06,
"loss": 0.2245,
"step": 298
},
{
"epoch": 0.7952127659574468,
"grad_norm": 3.742940664291382,
"learning_rate": 2.4535817758961644e-06,
"loss": 0.3192,
"step": 299
},
{
"epoch": 0.7978723404255319,
"grad_norm": 2.966266393661499,
"learning_rate": 2.3929229681898005e-06,
"loss": 0.2704,
"step": 300
},
{
"epoch": 0.7978723404255319,
"eval_accuracy": 0.8329411764705882,
"eval_f1": 0.5644171779141104,
"eval_loss": 0.38487711548805237,
"eval_precision": 0.736,
"eval_recall": 0.4577114427860697,
"eval_runtime": 33.5166,
"eval_samples_per_second": 6.683,
"eval_steps_per_second": 0.209,
"step": 300
},
{
"epoch": 0.800531914893617,
"grad_norm": 3.4099960327148438,
"learning_rate": 2.332921335481205e-06,
"loss": 0.2715,
"step": 301
},
{
"epoch": 0.8031914893617021,
"grad_norm": 4.202554702758789,
"learning_rate": 2.2735820613083837e-06,
"loss": 0.2616,
"step": 302
},
{
"epoch": 0.8058510638297872,
"grad_norm": 2.95456862449646,
"learning_rate": 2.2149102719882044e-06,
"loss": 0.2455,
"step": 303
},
{
"epoch": 0.8085106382978723,
"grad_norm": 2.7879536151885986,
"learning_rate": 2.156911036173568e-06,
"loss": 0.2054,
"step": 304
},
{
"epoch": 0.8111702127659575,
"grad_norm": 2.4969985485076904,
"learning_rate": 2.0995893644155007e-06,
"loss": 0.2814,
"step": 305
},
{
"epoch": 0.8138297872340425,
"grad_norm": 3.3959643840789795,
"learning_rate": 2.0429502087303164e-06,
"loss": 0.2382,
"step": 306
},
{
"epoch": 0.8164893617021277,
"grad_norm": 2.825615882873535,
"learning_rate": 1.9869984621717888e-06,
"loss": 0.2808,
"step": 307
},
{
"epoch": 0.8191489361702128,
"grad_norm": 2.766301155090332,
"learning_rate": 1.931738958408457e-06,
"loss": 0.2371,
"step": 308
},
{
"epoch": 0.8218085106382979,
"grad_norm": 3.683234930038452,
"learning_rate": 1.8771764713060359e-06,
"loss": 0.2617,
"step": 309
},
{
"epoch": 0.824468085106383,
"grad_norm": 3.0581727027893066,
"learning_rate": 1.8233157145150183e-06,
"loss": 0.254,
"step": 310
},
{
"epoch": 0.8271276595744681,
"grad_norm": 3.316701889038086,
"learning_rate": 1.7701613410634367e-06,
"loss": 0.2596,
"step": 311
},
{
"epoch": 0.8297872340425532,
"grad_norm": 2.8315346240997314,
"learning_rate": 1.717717942954914e-06,
"loss": 0.222,
"step": 312
},
{
"epoch": 0.8324468085106383,
"grad_norm": 2.781020164489746,
"learning_rate": 1.6659900507719406e-06,
"loss": 0.2643,
"step": 313
},
{
"epoch": 0.8351063829787234,
"grad_norm": 2.389970302581787,
"learning_rate": 1.614982133284495e-06,
"loss": 0.2161,
"step": 314
},
{
"epoch": 0.8377659574468085,
"grad_norm": 3.4777987003326416,
"learning_rate": 1.5646985970639717e-06,
"loss": 0.3309,
"step": 315
},
{
"epoch": 0.8404255319148937,
"grad_norm": 4.487973690032959,
"learning_rate": 1.5151437861025032e-06,
"loss": 0.3284,
"step": 316
},
{
"epoch": 0.8430851063829787,
"grad_norm": 4.822957515716553,
"learning_rate": 1.466321981437694e-06,
"loss": 0.2033,
"step": 317
},
{
"epoch": 0.8457446808510638,
"grad_norm": 2.9255247116088867,
"learning_rate": 1.4182374007827605e-06,
"loss": 0.2528,
"step": 318
},
{
"epoch": 0.848404255319149,
"grad_norm": 2.9784889221191406,
"learning_rate": 1.3708941981621814e-06,
"loss": 0.2151,
"step": 319
},
{
"epoch": 0.851063829787234,
"grad_norm": 2.522810459136963,
"learning_rate": 1.324296463552821e-06,
"loss": 0.2333,
"step": 320
},
{
"epoch": 0.851063829787234,
"eval_accuracy": 0.831764705882353,
"eval_f1": 0.5545171339563862,
"eval_loss": 0.38777896761894226,
"eval_precision": 0.7416666666666667,
"eval_recall": 0.4427860696517413,
"eval_runtime": 34.5031,
"eval_samples_per_second": 6.492,
"eval_steps_per_second": 0.203,
"step": 320
},
{
"epoch": 0.8537234042553191,
"grad_norm": 2.794802665710449,
"learning_rate": 1.2784482225306061e-06,
"loss": 0.2338,
"step": 321
},
{
"epoch": 0.8563829787234043,
"grad_norm": 2.8740601539611816,
"learning_rate": 1.2333534359227383e-06,
"loss": 0.2526,
"step": 322
},
{
"epoch": 0.8590425531914894,
"grad_norm": 2.600721597671509,
"learning_rate": 1.1890159994655425e-06,
"loss": 0.2165,
"step": 323
},
{
"epoch": 0.8617021276595744,
"grad_norm": 2.781907796859741,
"learning_rate": 1.1454397434679022e-06,
"loss": 0.2414,
"step": 324
},
{
"epoch": 0.8643617021276596,
"grad_norm": 2.8299474716186523,
"learning_rate": 1.1026284324803493e-06,
"loss": 0.2389,
"step": 325
},
{
"epoch": 0.8670212765957447,
"grad_norm": 2.6625523567199707,
"learning_rate": 1.060585764969867e-06,
"loss": 0.2444,
"step": 326
},
{
"epoch": 0.8696808510638298,
"grad_norm": 3.0182435512542725,
"learning_rate": 1.0193153730003603e-06,
"loss": 0.2967,
"step": 327
},
{
"epoch": 0.8723404255319149,
"grad_norm": 2.5358083248138428,
"learning_rate": 9.788208219188932e-07,
"loss": 0.2091,
"step": 328
},
{
"epoch": 0.875,
"grad_norm": 3.2480201721191406,
"learning_rate": 9.391056100476736e-07,
"loss": 0.2195,
"step": 329
},
{
"epoch": 0.8776595744680851,
"grad_norm": 2.449801445007324,
"learning_rate": 9.001731683818338e-07,
"loss": 0.2316,
"step": 330
},
{
"epoch": 0.8803191489361702,
"grad_norm": 3.304652690887451,
"learning_rate": 8.620268602930271e-07,
"loss": 0.2719,
"step": 331
},
{
"epoch": 0.8829787234042553,
"grad_norm": 3.1013834476470947,
"learning_rate": 8.246699812388714e-07,
"loss": 0.2412,
"step": 332
},
{
"epoch": 0.8856382978723404,
"grad_norm": 2.4398679733276367,
"learning_rate": 7.881057584782448e-07,
"loss": 0.1909,
"step": 333
},
{
"epoch": 0.8882978723404256,
"grad_norm": 3.296792984008789,
"learning_rate": 7.523373507924947e-07,
"loss": 0.2592,
"step": 334
},
{
"epoch": 0.8909574468085106,
"grad_norm": 3.5089118480682373,
"learning_rate": 7.17367848212539e-07,
"loss": 0.2341,
"step": 335
},
{
"epoch": 0.8936170212765957,
"grad_norm": 2.9826953411102295,
"learning_rate": 6.83200271751927e-07,
"loss": 0.239,
"step": 336
},
{
"epoch": 0.8962765957446809,
"grad_norm": 2.965322732925415,
"learning_rate": 6.498375731458529e-07,
"loss": 0.242,
"step": 337
},
{
"epoch": 0.898936170212766,
"grad_norm": 2.855252504348755,
"learning_rate": 6.17282634596148e-07,
"loss": 0.2503,
"step": 338
},
{
"epoch": 0.901595744680851,
"grad_norm": 5.112611293792725,
"learning_rate": 5.85538268522301e-07,
"loss": 0.2665,
"step": 339
},
{
"epoch": 0.9042553191489362,
"grad_norm": 3.4850215911865234,
"learning_rate": 5.546072173184791e-07,
"loss": 0.2896,
"step": 340
},
{
"epoch": 0.9042553191489362,
"eval_accuracy": 0.8305882352941176,
"eval_f1": 0.55,
"eval_loss": 0.38858291506767273,
"eval_precision": 0.7394957983193278,
"eval_recall": 0.43781094527363185,
"eval_runtime": 34.3336,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 0.204,
"step": 340
},
{
"epoch": 0.9069148936170213,
"grad_norm": 2.3722422122955322,
"learning_rate": 5.244921531166247e-07,
"loss": 0.2334,
"step": 341
},
{
"epoch": 0.9095744680851063,
"grad_norm": 2.8881895542144775,
"learning_rate": 4.951956775556e-07,
"loss": 0.2339,
"step": 342
},
{
"epoch": 0.9122340425531915,
"grad_norm": 4.109971046447754,
"learning_rate": 4.667203215564431e-07,
"loss": 0.2837,
"step": 343
},
{
"epoch": 0.9148936170212766,
"grad_norm": 3.7027337551116943,
"learning_rate": 4.3906854510370245e-07,
"loss": 0.2862,
"step": 344
},
{
"epoch": 0.9175531914893617,
"grad_norm": 3.069493532180786,
"learning_rate": 4.1224273703294515e-07,
"loss": 0.2456,
"step": 345
},
{
"epoch": 0.9202127659574468,
"grad_norm": 2.9162609577178955,
"learning_rate": 3.862452148243623e-07,
"loss": 0.2633,
"step": 346
},
{
"epoch": 0.9228723404255319,
"grad_norm": 3.10223388671875,
"learning_rate": 3.610782244025768e-07,
"loss": 0.2165,
"step": 347
},
{
"epoch": 0.925531914893617,
"grad_norm": 3.3466663360595703,
"learning_rate": 3.367439399426087e-07,
"loss": 0.2748,
"step": 348
},
{
"epoch": 0.9281914893617021,
"grad_norm": 3.4505677223205566,
"learning_rate": 3.132444636820575e-07,
"loss": 0.2789,
"step": 349
},
{
"epoch": 0.9308510638297872,
"grad_norm": 3.7714152336120605,
"learning_rate": 2.905818257394799e-07,
"loss": 0.233,
"step": 350
},
{
"epoch": 0.9335106382978723,
"grad_norm": 5.176234722137451,
"learning_rate": 2.687579839390153e-07,
"loss": 0.2933,
"step": 351
},
{
"epoch": 0.9361702127659575,
"grad_norm": 2.8145923614501953,
"learning_rate": 2.4777482364124695e-07,
"loss": 0.2916,
"step": 352
},
{
"epoch": 0.9388297872340425,
"grad_norm": 2.452026605606079,
"learning_rate": 2.2763415758032316e-07,
"loss": 0.2072,
"step": 353
},
{
"epoch": 0.9414893617021277,
"grad_norm": 2.741774559020996,
"learning_rate": 2.0833772570736376e-07,
"loss": 0.2365,
"step": 354
},
{
"epoch": 0.9441489361702128,
"grad_norm": 2.6265206336975098,
"learning_rate": 1.8988719504013375e-07,
"loss": 0.2226,
"step": 355
},
{
"epoch": 0.9468085106382979,
"grad_norm": 4.149282932281494,
"learning_rate": 1.7228415951904165e-07,
"loss": 0.1923,
"step": 356
},
{
"epoch": 0.949468085106383,
"grad_norm": 2.389505624771118,
"learning_rate": 1.5553013986942645e-07,
"loss": 0.21,
"step": 357
},
{
"epoch": 0.9521276595744681,
"grad_norm": 4.067861557006836,
"learning_rate": 1.3962658347019819e-07,
"loss": 0.2497,
"step": 358
},
{
"epoch": 0.9547872340425532,
"grad_norm": 2.5128250122070312,
"learning_rate": 1.245748642287814e-07,
"loss": 0.2559,
"step": 359
},
{
"epoch": 0.9574468085106383,
"grad_norm": 2.755162477493286,
"learning_rate": 1.103762824624377e-07,
"loss": 0.2398,
"step": 360
},
{
"epoch": 0.9574468085106383,
"eval_accuracy": 0.8329411764705882,
"eval_f1": 0.5617283950617284,
"eval_loss": 0.38481393456459045,
"eval_precision": 0.7398373983739838,
"eval_recall": 0.4527363184079602,
"eval_runtime": 34.7008,
"eval_samples_per_second": 6.455,
"eval_steps_per_second": 0.202,
"step": 360
},
{
"epoch": 0.9601063829787234,
"grad_norm": 3.078138828277588,
"learning_rate": 9.70320647859213e-08,
"loss": 0.2091,
"step": 361
},
{
"epoch": 0.9627659574468085,
"grad_norm": 2.8632972240448,
"learning_rate": 8.454336400552154e-08,
"loss": 0.2513,
"step": 362
},
{
"epoch": 0.9654255319148937,
"grad_norm": 2.500767469406128,
"learning_rate": 7.291125901946027e-08,
"loss": 0.2346,
"step": 363
},
{
"epoch": 0.9680851063829787,
"grad_norm": 4.420257091522217,
"learning_rate": 6.21367547246976e-08,
"loss": 0.2701,
"step": 364
},
{
"epoch": 0.9707446808510638,
"grad_norm": 2.459460973739624,
"learning_rate": 5.2220781930111263e-08,
"loss": 0.2441,
"step": 365
},
{
"epoch": 0.973404255319149,
"grad_norm": 3.661996841430664,
"learning_rate": 4.316419727608434e-08,
"loss": 0.2704,
"step": 366
},
{
"epoch": 0.976063829787234,
"grad_norm": 3.0439155101776123,
"learning_rate": 3.4967783160507753e-08,
"loss": 0.2187,
"step": 367
},
{
"epoch": 0.9787234042553191,
"grad_norm": 3.629185914993286,
"learning_rate": 2.763224767117767e-08,
"loss": 0.3418,
"step": 368
},
{
"epoch": 0.9813829787234043,
"grad_norm": 2.30877423286438,
"learning_rate": 2.115822452463223e-08,
"loss": 0.2607,
"step": 369
},
{
"epoch": 0.9840425531914894,
"grad_norm": 3.398482084274292,
"learning_rate": 1.554627301140199e-08,
"loss": 0.2494,
"step": 370
},
{
"epoch": 0.9867021276595744,
"grad_norm": 3.0833022594451904,
"learning_rate": 1.0796877947691909e-08,
"loss": 0.2924,
"step": 371
},
{
"epoch": 0.9893617021276596,
"grad_norm": 2.702519655227661,
"learning_rate": 6.910449633501515e-09,
"loss": 0.2222,
"step": 372
},
{
"epoch": 0.9920212765957447,
"grad_norm": 3.0397112369537354,
"learning_rate": 3.887323817173272e-09,
"loss": 0.2145,
"step": 373
},
{
"epoch": 0.9946808510638298,
"grad_norm": 2.342505931854248,
"learning_rate": 1.7277616663946562e-09,
"loss": 0.2471,
"step": 374
},
{
"epoch": 0.9973404255319149,
"grad_norm": 2.674713611602783,
"learning_rate": 4.319497456273247e-10,
"loss": 0.2519,
"step": 375
},
{
"epoch": 1.0,
"grad_norm": 4.508094310760498,
"learning_rate": 0.0,
"loss": 0.3025,
"step": 376
}
],
"logging_steps": 1,
"max_steps": 376,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2170791543740826e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}