Text Generation
Transformers
PyTorch
English
llama
Eval Results
text-generation-inference
Inference Endpoints
orca_mini_v2_13b / trainer_state.json
Eric Hartford
Initial Commit
628b27f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.926829268292683,
"global_step": 840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.9326,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.899180437875726e-06,
"loss": 0.9014,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 6.1800547775785244e-06,
"loss": 0.8828,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 7.798360875751452e-06,
"loss": 0.8145,
"step": 4
},
{
"epoch": 0.02,
"learning_rate": 9.053616605738856e-06,
"loss": 0.7803,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 1.007923521545425e-05,
"loss": 0.7842,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 1.0946383394261144e-05,
"loss": 0.7754,
"step": 7
},
{
"epoch": 0.03,
"learning_rate": 1.169754131362718e-05,
"loss": 0.7061,
"step": 8
},
{
"epoch": 0.03,
"learning_rate": 1.2360109555157049e-05,
"loss": 0.7168,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 1.2952797043614585e-05,
"loss": 0.7148,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 1.348894809355931e-05,
"loss": 0.7031,
"step": 11
},
{
"epoch": 0.04,
"learning_rate": 1.3978415653329978e-05,
"loss": 0.7236,
"step": 12
},
{
"epoch": 0.05,
"learning_rate": 1.4428682160514114e-05,
"loss": 0.7021,
"step": 13
},
{
"epoch": 0.05,
"learning_rate": 1.484556383213687e-05,
"loss": 0.6875,
"step": 14
},
{
"epoch": 0.05,
"learning_rate": 1.5233671383317381e-05,
"loss": 0.7129,
"step": 15
},
{
"epoch": 0.06,
"learning_rate": 1.5596721751502905e-05,
"loss": 0.6943,
"step": 16
},
{
"epoch": 0.06,
"learning_rate": 1.593775515114726e-05,
"loss": 0.6836,
"step": 17
},
{
"epoch": 0.06,
"learning_rate": 1.6259289993032772e-05,
"loss": 0.668,
"step": 18
},
{
"epoch": 0.07,
"learning_rate": 1.6563435861933304e-05,
"loss": 0.6621,
"step": 19
},
{
"epoch": 0.07,
"learning_rate": 1.685197748149031e-05,
"loss": 0.6455,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 1.7126438171839668e-05,
"loss": 0.6631,
"step": 21
},
{
"epoch": 0.08,
"learning_rate": 1.7388128531435038e-05,
"loss": 0.6348,
"step": 22
},
{
"epoch": 0.08,
"learning_rate": 1.7638184288576362e-05,
"loss": 0.6602,
"step": 23
},
{
"epoch": 0.08,
"learning_rate": 1.7877596091205705e-05,
"loss": 0.6006,
"step": 24
},
{
"epoch": 0.09,
"learning_rate": 1.810723321147771e-05,
"loss": 0.6777,
"step": 25
},
{
"epoch": 0.09,
"learning_rate": 1.832786259838984e-05,
"loss": 0.6318,
"step": 26
},
{
"epoch": 0.09,
"learning_rate": 1.8540164332735572e-05,
"loss": 0.6406,
"step": 27
},
{
"epoch": 0.1,
"learning_rate": 1.8744744270012596e-05,
"loss": 0.6396,
"step": 28
},
{
"epoch": 0.1,
"learning_rate": 1.8942144463773483e-05,
"loss": 0.6348,
"step": 29
},
{
"epoch": 0.1,
"learning_rate": 1.9132851821193106e-05,
"loss": 0.6484,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 1.9317305338856604e-05,
"loss": 0.6133,
"step": 31
},
{
"epoch": 0.11,
"learning_rate": 1.9495902189378633e-05,
"loss": 0.6211,
"step": 32
},
{
"epoch": 0.11,
"learning_rate": 1.9669002871137834e-05,
"loss": 0.6299,
"step": 33
},
{
"epoch": 0.12,
"learning_rate": 1.9836935589022987e-05,
"loss": 0.6162,
"step": 34
},
{
"epoch": 0.12,
"learning_rate": 1.9999999999999998e-05,
"loss": 0.6201,
"step": 35
},
{
"epoch": 0.13,
"learning_rate": 2e-05,
"loss": 0.6279,
"step": 36
},
{
"epoch": 0.13,
"learning_rate": 1.9975786924939468e-05,
"loss": 0.6289,
"step": 37
},
{
"epoch": 0.13,
"learning_rate": 1.9951573849878937e-05,
"loss": 0.6152,
"step": 38
},
{
"epoch": 0.14,
"learning_rate": 1.9927360774818403e-05,
"loss": 0.6318,
"step": 39
},
{
"epoch": 0.14,
"learning_rate": 1.990314769975787e-05,
"loss": 0.6094,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 1.987893462469734e-05,
"loss": 0.6416,
"step": 41
},
{
"epoch": 0.15,
"learning_rate": 1.9854721549636805e-05,
"loss": 0.6279,
"step": 42
},
{
"epoch": 0.15,
"learning_rate": 1.9830508474576275e-05,
"loss": 0.6348,
"step": 43
},
{
"epoch": 0.15,
"learning_rate": 1.980629539951574e-05,
"loss": 0.6318,
"step": 44
},
{
"epoch": 0.16,
"learning_rate": 1.9782082324455207e-05,
"loss": 0.6348,
"step": 45
},
{
"epoch": 0.16,
"learning_rate": 1.9757869249394673e-05,
"loss": 0.6201,
"step": 46
},
{
"epoch": 0.16,
"learning_rate": 1.9733656174334143e-05,
"loss": 0.6318,
"step": 47
},
{
"epoch": 0.17,
"learning_rate": 1.970944309927361e-05,
"loss": 0.6367,
"step": 48
},
{
"epoch": 0.17,
"learning_rate": 1.9685230024213075e-05,
"loss": 0.6172,
"step": 49
},
{
"epoch": 0.17,
"learning_rate": 1.9661016949152545e-05,
"loss": 0.624,
"step": 50
},
{
"epoch": 0.18,
"learning_rate": 1.963680387409201e-05,
"loss": 0.6348,
"step": 51
},
{
"epoch": 0.18,
"learning_rate": 1.961259079903148e-05,
"loss": 0.6182,
"step": 52
},
{
"epoch": 0.18,
"learning_rate": 1.9588377723970946e-05,
"loss": 0.6494,
"step": 53
},
{
"epoch": 0.19,
"learning_rate": 1.9564164648910413e-05,
"loss": 0.6025,
"step": 54
},
{
"epoch": 0.19,
"learning_rate": 1.953995157384988e-05,
"loss": 0.6279,
"step": 55
},
{
"epoch": 0.2,
"learning_rate": 1.9515738498789348e-05,
"loss": 0.6387,
"step": 56
},
{
"epoch": 0.2,
"learning_rate": 1.9491525423728814e-05,
"loss": 0.6064,
"step": 57
},
{
"epoch": 0.2,
"learning_rate": 1.9467312348668284e-05,
"loss": 0.6094,
"step": 58
},
{
"epoch": 0.21,
"learning_rate": 1.944309927360775e-05,
"loss": 0.626,
"step": 59
},
{
"epoch": 0.21,
"learning_rate": 1.9418886198547216e-05,
"loss": 0.6094,
"step": 60
},
{
"epoch": 0.21,
"learning_rate": 1.9394673123486686e-05,
"loss": 0.6182,
"step": 61
},
{
"epoch": 0.22,
"learning_rate": 1.9370460048426152e-05,
"loss": 0.6016,
"step": 62
},
{
"epoch": 0.22,
"learning_rate": 1.9346246973365618e-05,
"loss": 0.6289,
"step": 63
},
{
"epoch": 0.22,
"learning_rate": 1.9322033898305087e-05,
"loss": 0.6289,
"step": 64
},
{
"epoch": 0.23,
"learning_rate": 1.9297820823244554e-05,
"loss": 0.5928,
"step": 65
},
{
"epoch": 0.23,
"learning_rate": 1.927360774818402e-05,
"loss": 0.6182,
"step": 66
},
{
"epoch": 0.23,
"learning_rate": 1.924939467312349e-05,
"loss": 0.5918,
"step": 67
},
{
"epoch": 0.24,
"learning_rate": 1.9225181598062955e-05,
"loss": 0.6221,
"step": 68
},
{
"epoch": 0.24,
"learning_rate": 1.9200968523002425e-05,
"loss": 0.6162,
"step": 69
},
{
"epoch": 0.24,
"learning_rate": 1.917675544794189e-05,
"loss": 0.6162,
"step": 70
},
{
"epoch": 0.25,
"learning_rate": 1.9152542372881357e-05,
"loss": 0.6445,
"step": 71
},
{
"epoch": 0.25,
"learning_rate": 1.9128329297820823e-05,
"loss": 0.6172,
"step": 72
},
{
"epoch": 0.25,
"learning_rate": 1.9104116222760293e-05,
"loss": 0.6152,
"step": 73
},
{
"epoch": 0.26,
"learning_rate": 1.907990314769976e-05,
"loss": 0.6084,
"step": 74
},
{
"epoch": 0.26,
"learning_rate": 1.9055690072639225e-05,
"loss": 0.6162,
"step": 75
},
{
"epoch": 0.26,
"learning_rate": 1.9031476997578695e-05,
"loss": 0.5996,
"step": 76
},
{
"epoch": 0.27,
"learning_rate": 1.900726392251816e-05,
"loss": 0.6064,
"step": 77
},
{
"epoch": 0.27,
"learning_rate": 1.898305084745763e-05,
"loss": 0.5996,
"step": 78
},
{
"epoch": 0.28,
"learning_rate": 1.8958837772397097e-05,
"loss": 0.6328,
"step": 79
},
{
"epoch": 0.28,
"learning_rate": 1.8934624697336563e-05,
"loss": 0.6133,
"step": 80
},
{
"epoch": 0.28,
"learning_rate": 1.891041162227603e-05,
"loss": 0.6143,
"step": 81
},
{
"epoch": 0.29,
"learning_rate": 1.88861985472155e-05,
"loss": 0.5996,
"step": 82
},
{
"epoch": 0.29,
"learning_rate": 1.8861985472154964e-05,
"loss": 0.6182,
"step": 83
},
{
"epoch": 0.29,
"learning_rate": 1.883777239709443e-05,
"loss": 0.623,
"step": 84
},
{
"epoch": 0.3,
"learning_rate": 1.88135593220339e-05,
"loss": 0.6055,
"step": 85
},
{
"epoch": 0.3,
"learning_rate": 1.8789346246973366e-05,
"loss": 0.5967,
"step": 86
},
{
"epoch": 0.3,
"learning_rate": 1.8765133171912836e-05,
"loss": 0.5947,
"step": 87
},
{
"epoch": 0.31,
"learning_rate": 1.8740920096852302e-05,
"loss": 0.6182,
"step": 88
},
{
"epoch": 0.31,
"learning_rate": 1.8716707021791768e-05,
"loss": 0.6152,
"step": 89
},
{
"epoch": 0.31,
"learning_rate": 1.8692493946731238e-05,
"loss": 0.5977,
"step": 90
},
{
"epoch": 0.32,
"learning_rate": 1.8668280871670704e-05,
"loss": 0.6348,
"step": 91
},
{
"epoch": 0.32,
"learning_rate": 1.864406779661017e-05,
"loss": 0.6074,
"step": 92
},
{
"epoch": 0.32,
"learning_rate": 1.8619854721549636e-05,
"loss": 0.5898,
"step": 93
},
{
"epoch": 0.33,
"learning_rate": 1.8595641646489106e-05,
"loss": 0.5967,
"step": 94
},
{
"epoch": 0.33,
"learning_rate": 1.8571428571428575e-05,
"loss": 0.6113,
"step": 95
},
{
"epoch": 0.33,
"learning_rate": 1.854721549636804e-05,
"loss": 0.6123,
"step": 96
},
{
"epoch": 0.34,
"learning_rate": 1.8523002421307507e-05,
"loss": 0.6211,
"step": 97
},
{
"epoch": 0.34,
"learning_rate": 1.8498789346246974e-05,
"loss": 0.6006,
"step": 98
},
{
"epoch": 0.34,
"learning_rate": 1.8474576271186443e-05,
"loss": 0.6113,
"step": 99
},
{
"epoch": 0.35,
"learning_rate": 1.845036319612591e-05,
"loss": 0.6006,
"step": 100
},
{
"epoch": 0.35,
"learning_rate": 1.8426150121065375e-05,
"loss": 0.6123,
"step": 101
},
{
"epoch": 0.36,
"learning_rate": 1.8401937046004845e-05,
"loss": 0.5938,
"step": 102
},
{
"epoch": 0.36,
"learning_rate": 1.837772397094431e-05,
"loss": 0.6045,
"step": 103
},
{
"epoch": 0.36,
"learning_rate": 1.835351089588378e-05,
"loss": 0.5928,
"step": 104
},
{
"epoch": 0.37,
"learning_rate": 1.8329297820823247e-05,
"loss": 0.5977,
"step": 105
},
{
"epoch": 0.37,
"learning_rate": 1.8305084745762713e-05,
"loss": 0.6299,
"step": 106
},
{
"epoch": 0.37,
"learning_rate": 1.828087167070218e-05,
"loss": 0.5801,
"step": 107
},
{
"epoch": 0.38,
"learning_rate": 1.825665859564165e-05,
"loss": 0.6035,
"step": 108
},
{
"epoch": 0.38,
"learning_rate": 1.8232445520581115e-05,
"loss": 0.5938,
"step": 109
},
{
"epoch": 0.38,
"learning_rate": 1.820823244552058e-05,
"loss": 0.5977,
"step": 110
},
{
"epoch": 0.39,
"learning_rate": 1.818401937046005e-05,
"loss": 0.6016,
"step": 111
},
{
"epoch": 0.39,
"learning_rate": 1.8159806295399516e-05,
"loss": 0.5957,
"step": 112
},
{
"epoch": 0.39,
"learning_rate": 1.8135593220338986e-05,
"loss": 0.6113,
"step": 113
},
{
"epoch": 0.4,
"learning_rate": 1.8111380145278452e-05,
"loss": 0.6094,
"step": 114
},
{
"epoch": 0.4,
"learning_rate": 1.8087167070217918e-05,
"loss": 0.6016,
"step": 115
},
{
"epoch": 0.4,
"learning_rate": 1.8062953995157388e-05,
"loss": 0.6006,
"step": 116
},
{
"epoch": 0.41,
"learning_rate": 1.8038740920096854e-05,
"loss": 0.6182,
"step": 117
},
{
"epoch": 0.41,
"learning_rate": 1.801452784503632e-05,
"loss": 0.6025,
"step": 118
},
{
"epoch": 0.41,
"learning_rate": 1.7990314769975786e-05,
"loss": 0.5938,
"step": 119
},
{
"epoch": 0.42,
"learning_rate": 1.7966101694915256e-05,
"loss": 0.6172,
"step": 120
},
{
"epoch": 0.42,
"learning_rate": 1.7941888619854725e-05,
"loss": 0.6074,
"step": 121
},
{
"epoch": 0.43,
"learning_rate": 1.791767554479419e-05,
"loss": 0.6055,
"step": 122
},
{
"epoch": 0.43,
"learning_rate": 1.7893462469733658e-05,
"loss": 0.583,
"step": 123
},
{
"epoch": 0.43,
"learning_rate": 1.7869249394673124e-05,
"loss": 0.5918,
"step": 124
},
{
"epoch": 0.44,
"learning_rate": 1.7845036319612593e-05,
"loss": 0.6016,
"step": 125
},
{
"epoch": 0.44,
"learning_rate": 1.782082324455206e-05,
"loss": 0.6123,
"step": 126
},
{
"epoch": 0.44,
"learning_rate": 1.7796610169491526e-05,
"loss": 0.6104,
"step": 127
},
{
"epoch": 0.45,
"learning_rate": 1.777239709443099e-05,
"loss": 0.5801,
"step": 128
},
{
"epoch": 0.45,
"learning_rate": 1.774818401937046e-05,
"loss": 0.6035,
"step": 129
},
{
"epoch": 0.45,
"learning_rate": 1.772397094430993e-05,
"loss": 0.5811,
"step": 130
},
{
"epoch": 0.46,
"learning_rate": 1.7699757869249397e-05,
"loss": 0.5908,
"step": 131
},
{
"epoch": 0.46,
"learning_rate": 1.7675544794188863e-05,
"loss": 0.6035,
"step": 132
},
{
"epoch": 0.46,
"learning_rate": 1.765133171912833e-05,
"loss": 0.6094,
"step": 133
},
{
"epoch": 0.47,
"learning_rate": 1.76271186440678e-05,
"loss": 0.5957,
"step": 134
},
{
"epoch": 0.47,
"learning_rate": 1.7602905569007265e-05,
"loss": 0.6152,
"step": 135
},
{
"epoch": 0.47,
"learning_rate": 1.757869249394673e-05,
"loss": 0.5908,
"step": 136
},
{
"epoch": 0.48,
"learning_rate": 1.75544794188862e-05,
"loss": 0.5908,
"step": 137
},
{
"epoch": 0.48,
"learning_rate": 1.7530266343825667e-05,
"loss": 0.5957,
"step": 138
},
{
"epoch": 0.48,
"learning_rate": 1.7506053268765136e-05,
"loss": 0.585,
"step": 139
},
{
"epoch": 0.49,
"learning_rate": 1.7481840193704602e-05,
"loss": 0.6143,
"step": 140
},
{
"epoch": 0.49,
"learning_rate": 1.745762711864407e-05,
"loss": 0.6064,
"step": 141
},
{
"epoch": 0.49,
"learning_rate": 1.7433414043583538e-05,
"loss": 0.6025,
"step": 142
},
{
"epoch": 0.5,
"learning_rate": 1.7409200968523004e-05,
"loss": 0.583,
"step": 143
},
{
"epoch": 0.5,
"learning_rate": 1.738498789346247e-05,
"loss": 0.5967,
"step": 144
},
{
"epoch": 0.51,
"learning_rate": 1.7360774818401936e-05,
"loss": 0.5947,
"step": 145
},
{
"epoch": 0.51,
"learning_rate": 1.7336561743341406e-05,
"loss": 0.6123,
"step": 146
},
{
"epoch": 0.51,
"learning_rate": 1.7312348668280875e-05,
"loss": 0.627,
"step": 147
},
{
"epoch": 0.52,
"learning_rate": 1.728813559322034e-05,
"loss": 0.5986,
"step": 148
},
{
"epoch": 0.52,
"learning_rate": 1.7263922518159808e-05,
"loss": 0.623,
"step": 149
},
{
"epoch": 0.52,
"learning_rate": 1.7239709443099274e-05,
"loss": 0.6016,
"step": 150
},
{
"epoch": 0.53,
"learning_rate": 1.7215496368038743e-05,
"loss": 0.6016,
"step": 151
},
{
"epoch": 0.53,
"learning_rate": 1.719128329297821e-05,
"loss": 0.5928,
"step": 152
},
{
"epoch": 0.53,
"learning_rate": 1.7167070217917676e-05,
"loss": 0.582,
"step": 153
},
{
"epoch": 0.54,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.5938,
"step": 154
},
{
"epoch": 0.54,
"learning_rate": 1.711864406779661e-05,
"loss": 0.6016,
"step": 155
},
{
"epoch": 0.54,
"learning_rate": 1.709443099273608e-05,
"loss": 0.584,
"step": 156
},
{
"epoch": 0.55,
"learning_rate": 1.7070217917675547e-05,
"loss": 0.5674,
"step": 157
},
{
"epoch": 0.55,
"learning_rate": 1.7046004842615013e-05,
"loss": 0.5957,
"step": 158
},
{
"epoch": 0.55,
"learning_rate": 1.702179176755448e-05,
"loss": 0.6035,
"step": 159
},
{
"epoch": 0.56,
"learning_rate": 1.699757869249395e-05,
"loss": 0.5879,
"step": 160
},
{
"epoch": 0.56,
"learning_rate": 1.6973365617433415e-05,
"loss": 0.5898,
"step": 161
},
{
"epoch": 0.56,
"learning_rate": 1.694915254237288e-05,
"loss": 0.5732,
"step": 162
},
{
"epoch": 0.57,
"learning_rate": 1.692493946731235e-05,
"loss": 0.5859,
"step": 163
},
{
"epoch": 0.57,
"learning_rate": 1.6900726392251817e-05,
"loss": 0.5908,
"step": 164
},
{
"epoch": 0.57,
"learning_rate": 1.6876513317191286e-05,
"loss": 0.6133,
"step": 165
},
{
"epoch": 0.58,
"learning_rate": 1.6852300242130752e-05,
"loss": 0.5645,
"step": 166
},
{
"epoch": 0.58,
"learning_rate": 1.682808716707022e-05,
"loss": 0.584,
"step": 167
},
{
"epoch": 0.59,
"learning_rate": 1.6803874092009688e-05,
"loss": 0.6035,
"step": 168
},
{
"epoch": 0.59,
"learning_rate": 1.6779661016949154e-05,
"loss": 0.5938,
"step": 169
},
{
"epoch": 0.59,
"learning_rate": 1.675544794188862e-05,
"loss": 0.5664,
"step": 170
},
{
"epoch": 0.6,
"learning_rate": 1.6731234866828087e-05,
"loss": 0.6064,
"step": 171
},
{
"epoch": 0.6,
"learning_rate": 1.6707021791767556e-05,
"loss": 0.5977,
"step": 172
},
{
"epoch": 0.6,
"learning_rate": 1.6682808716707026e-05,
"loss": 0.5889,
"step": 173
},
{
"epoch": 0.61,
"learning_rate": 1.6658595641646492e-05,
"loss": 0.6016,
"step": 174
},
{
"epoch": 0.61,
"learning_rate": 1.6634382566585958e-05,
"loss": 0.5986,
"step": 175
},
{
"epoch": 0.61,
"learning_rate": 1.6610169491525424e-05,
"loss": 0.5908,
"step": 176
},
{
"epoch": 0.62,
"learning_rate": 1.6585956416464894e-05,
"loss": 0.5811,
"step": 177
},
{
"epoch": 0.62,
"learning_rate": 1.656174334140436e-05,
"loss": 0.5791,
"step": 178
},
{
"epoch": 0.62,
"learning_rate": 1.6537530266343826e-05,
"loss": 0.5967,
"step": 179
},
{
"epoch": 0.63,
"learning_rate": 1.6513317191283292e-05,
"loss": 0.6055,
"step": 180
},
{
"epoch": 0.63,
"learning_rate": 1.648910411622276e-05,
"loss": 0.5977,
"step": 181
},
{
"epoch": 0.63,
"learning_rate": 1.646489104116223e-05,
"loss": 0.583,
"step": 182
},
{
"epoch": 0.64,
"learning_rate": 1.6440677966101697e-05,
"loss": 0.6162,
"step": 183
},
{
"epoch": 0.64,
"learning_rate": 1.6416464891041163e-05,
"loss": 0.6016,
"step": 184
},
{
"epoch": 0.64,
"learning_rate": 1.639225181598063e-05,
"loss": 0.5869,
"step": 185
},
{
"epoch": 0.65,
"learning_rate": 1.63680387409201e-05,
"loss": 0.5576,
"step": 186
},
{
"epoch": 0.65,
"learning_rate": 1.6343825665859565e-05,
"loss": 0.5938,
"step": 187
},
{
"epoch": 0.66,
"learning_rate": 1.631961259079903e-05,
"loss": 0.5879,
"step": 188
},
{
"epoch": 0.66,
"learning_rate": 1.62953995157385e-05,
"loss": 0.583,
"step": 189
},
{
"epoch": 0.66,
"learning_rate": 1.6271186440677967e-05,
"loss": 0.583,
"step": 190
},
{
"epoch": 0.67,
"learning_rate": 1.6246973365617437e-05,
"loss": 0.5781,
"step": 191
},
{
"epoch": 0.67,
"learning_rate": 1.6222760290556903e-05,
"loss": 0.5918,
"step": 192
},
{
"epoch": 0.67,
"learning_rate": 1.619854721549637e-05,
"loss": 0.5859,
"step": 193
},
{
"epoch": 0.68,
"learning_rate": 1.617433414043584e-05,
"loss": 0.6152,
"step": 194
},
{
"epoch": 0.68,
"learning_rate": 1.6150121065375304e-05,
"loss": 0.582,
"step": 195
},
{
"epoch": 0.68,
"learning_rate": 1.612590799031477e-05,
"loss": 0.5986,
"step": 196
},
{
"epoch": 0.69,
"learning_rate": 1.6101694915254237e-05,
"loss": 0.5752,
"step": 197
},
{
"epoch": 0.69,
"learning_rate": 1.6077481840193706e-05,
"loss": 0.5791,
"step": 198
},
{
"epoch": 0.69,
"learning_rate": 1.6053268765133176e-05,
"loss": 0.5732,
"step": 199
},
{
"epoch": 0.7,
"learning_rate": 1.6029055690072642e-05,
"loss": 0.6094,
"step": 200
},
{
"epoch": 0.7,
"learning_rate": 1.6004842615012108e-05,
"loss": 0.5996,
"step": 201
},
{
"epoch": 0.7,
"learning_rate": 1.5980629539951574e-05,
"loss": 0.6006,
"step": 202
},
{
"epoch": 0.71,
"learning_rate": 1.5956416464891044e-05,
"loss": 0.6113,
"step": 203
},
{
"epoch": 0.71,
"learning_rate": 1.593220338983051e-05,
"loss": 0.5889,
"step": 204
},
{
"epoch": 0.71,
"learning_rate": 1.5907990314769976e-05,
"loss": 0.5781,
"step": 205
},
{
"epoch": 0.72,
"learning_rate": 1.5883777239709442e-05,
"loss": 0.6113,
"step": 206
},
{
"epoch": 0.72,
"learning_rate": 1.5859564164648912e-05,
"loss": 0.5918,
"step": 207
},
{
"epoch": 0.72,
"learning_rate": 1.583535108958838e-05,
"loss": 0.5693,
"step": 208
},
{
"epoch": 0.73,
"learning_rate": 1.5811138014527847e-05,
"loss": 0.5986,
"step": 209
},
{
"epoch": 0.73,
"learning_rate": 1.5786924939467314e-05,
"loss": 0.5625,
"step": 210
},
{
"epoch": 0.74,
"learning_rate": 1.576271186440678e-05,
"loss": 0.5908,
"step": 211
},
{
"epoch": 0.74,
"learning_rate": 1.573849878934625e-05,
"loss": 0.5938,
"step": 212
},
{
"epoch": 0.74,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.6123,
"step": 213
},
{
"epoch": 0.75,
"learning_rate": 1.569007263922518e-05,
"loss": 0.5879,
"step": 214
},
{
"epoch": 0.75,
"learning_rate": 1.566585956416465e-05,
"loss": 0.583,
"step": 215
},
{
"epoch": 0.75,
"learning_rate": 1.5641646489104117e-05,
"loss": 0.6094,
"step": 216
},
{
"epoch": 0.76,
"learning_rate": 1.5617433414043587e-05,
"loss": 0.624,
"step": 217
},
{
"epoch": 0.76,
"learning_rate": 1.5593220338983053e-05,
"loss": 0.5947,
"step": 218
},
{
"epoch": 0.76,
"learning_rate": 1.556900726392252e-05,
"loss": 0.6084,
"step": 219
},
{
"epoch": 0.77,
"learning_rate": 1.554479418886199e-05,
"loss": 0.5908,
"step": 220
},
{
"epoch": 0.77,
"learning_rate": 1.5520581113801455e-05,
"loss": 0.5938,
"step": 221
},
{
"epoch": 0.77,
"learning_rate": 1.549636803874092e-05,
"loss": 0.5625,
"step": 222
},
{
"epoch": 0.78,
"learning_rate": 1.5472154963680387e-05,
"loss": 0.583,
"step": 223
},
{
"epoch": 0.78,
"learning_rate": 1.5447941888619856e-05,
"loss": 0.5938,
"step": 224
},
{
"epoch": 0.78,
"learning_rate": 1.5423728813559326e-05,
"loss": 0.583,
"step": 225
},
{
"epoch": 0.79,
"learning_rate": 1.5399515738498792e-05,
"loss": 0.5918,
"step": 226
},
{
"epoch": 0.79,
"learning_rate": 1.5375302663438258e-05,
"loss": 0.582,
"step": 227
},
{
"epoch": 0.79,
"learning_rate": 1.5351089588377724e-05,
"loss": 0.5996,
"step": 228
},
{
"epoch": 0.8,
"learning_rate": 1.5326876513317194e-05,
"loss": 0.5684,
"step": 229
},
{
"epoch": 0.8,
"learning_rate": 1.530266343825666e-05,
"loss": 0.5996,
"step": 230
},
{
"epoch": 0.8,
"learning_rate": 1.5278450363196126e-05,
"loss": 0.5791,
"step": 231
},
{
"epoch": 0.81,
"learning_rate": 1.5254237288135594e-05,
"loss": 0.5869,
"step": 232
},
{
"epoch": 0.81,
"learning_rate": 1.523002421307506e-05,
"loss": 0.5625,
"step": 233
},
{
"epoch": 0.82,
"learning_rate": 1.520581113801453e-05,
"loss": 0.5771,
"step": 234
},
{
"epoch": 0.82,
"learning_rate": 1.5181598062953998e-05,
"loss": 0.5869,
"step": 235
},
{
"epoch": 0.82,
"learning_rate": 1.5157384987893464e-05,
"loss": 0.5654,
"step": 236
},
{
"epoch": 0.83,
"learning_rate": 1.5133171912832932e-05,
"loss": 0.5605,
"step": 237
},
{
"epoch": 0.83,
"learning_rate": 1.5108958837772398e-05,
"loss": 0.5967,
"step": 238
},
{
"epoch": 0.83,
"learning_rate": 1.5084745762711865e-05,
"loss": 0.5674,
"step": 239
},
{
"epoch": 0.84,
"learning_rate": 1.5060532687651332e-05,
"loss": 0.5889,
"step": 240
},
{
"epoch": 0.84,
"learning_rate": 1.50363196125908e-05,
"loss": 0.5947,
"step": 241
},
{
"epoch": 0.84,
"learning_rate": 1.5012106537530267e-05,
"loss": 0.5801,
"step": 242
},
{
"epoch": 0.85,
"learning_rate": 1.4987893462469735e-05,
"loss": 0.5771,
"step": 243
},
{
"epoch": 0.85,
"learning_rate": 1.4963680387409203e-05,
"loss": 0.6006,
"step": 244
},
{
"epoch": 0.85,
"learning_rate": 1.493946731234867e-05,
"loss": 0.5693,
"step": 245
},
{
"epoch": 0.86,
"learning_rate": 1.4915254237288137e-05,
"loss": 0.5898,
"step": 246
},
{
"epoch": 0.86,
"learning_rate": 1.4891041162227605e-05,
"loss": 0.5918,
"step": 247
},
{
"epoch": 0.86,
"learning_rate": 1.4866828087167071e-05,
"loss": 0.583,
"step": 248
},
{
"epoch": 0.87,
"learning_rate": 1.4842615012106539e-05,
"loss": 0.585,
"step": 249
},
{
"epoch": 0.87,
"learning_rate": 1.4818401937046005e-05,
"loss": 0.5664,
"step": 250
},
{
"epoch": 0.87,
"learning_rate": 1.4794188861985473e-05,
"loss": 0.5771,
"step": 251
},
{
"epoch": 0.88,
"learning_rate": 1.4769975786924942e-05,
"loss": 0.5771,
"step": 252
},
{
"epoch": 0.88,
"learning_rate": 1.4745762711864408e-05,
"loss": 0.6104,
"step": 253
},
{
"epoch": 0.89,
"learning_rate": 1.4721549636803876e-05,
"loss": 0.5732,
"step": 254
},
{
"epoch": 0.89,
"learning_rate": 1.4697336561743342e-05,
"loss": 0.5859,
"step": 255
},
{
"epoch": 0.89,
"learning_rate": 1.467312348668281e-05,
"loss": 0.6025,
"step": 256
},
{
"epoch": 0.9,
"learning_rate": 1.4648910411622276e-05,
"loss": 0.5869,
"step": 257
},
{
"epoch": 0.9,
"learning_rate": 1.4624697336561744e-05,
"loss": 0.6006,
"step": 258
},
{
"epoch": 0.9,
"learning_rate": 1.460048426150121e-05,
"loss": 0.585,
"step": 259
},
{
"epoch": 0.91,
"learning_rate": 1.4576271186440678e-05,
"loss": 0.5859,
"step": 260
},
{
"epoch": 0.91,
"learning_rate": 1.4552058111380148e-05,
"loss": 0.5654,
"step": 261
},
{
"epoch": 0.91,
"learning_rate": 1.4527845036319614e-05,
"loss": 0.5811,
"step": 262
},
{
"epoch": 0.92,
"learning_rate": 1.4503631961259082e-05,
"loss": 0.6006,
"step": 263
},
{
"epoch": 0.92,
"learning_rate": 1.4479418886198548e-05,
"loss": 0.5674,
"step": 264
},
{
"epoch": 0.92,
"learning_rate": 1.4455205811138016e-05,
"loss": 0.5996,
"step": 265
},
{
"epoch": 0.93,
"learning_rate": 1.4430992736077482e-05,
"loss": 0.5889,
"step": 266
},
{
"epoch": 0.93,
"learning_rate": 1.440677966101695e-05,
"loss": 0.585,
"step": 267
},
{
"epoch": 0.93,
"learning_rate": 1.4382566585956417e-05,
"loss": 0.5625,
"step": 268
},
{
"epoch": 0.94,
"learning_rate": 1.4358353510895885e-05,
"loss": 0.5791,
"step": 269
},
{
"epoch": 0.94,
"learning_rate": 1.4334140435835353e-05,
"loss": 0.5869,
"step": 270
},
{
"epoch": 0.94,
"learning_rate": 1.4309927360774821e-05,
"loss": 0.5918,
"step": 271
},
{
"epoch": 0.95,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.5791,
"step": 272
},
{
"epoch": 0.95,
"learning_rate": 1.4261501210653755e-05,
"loss": 0.5674,
"step": 273
},
{
"epoch": 0.95,
"learning_rate": 1.4237288135593221e-05,
"loss": 0.5947,
"step": 274
},
{
"epoch": 0.96,
"learning_rate": 1.4213075060532689e-05,
"loss": 0.6133,
"step": 275
},
{
"epoch": 0.96,
"learning_rate": 1.4188861985472155e-05,
"loss": 0.5986,
"step": 276
},
{
"epoch": 0.97,
"learning_rate": 1.4164648910411623e-05,
"loss": 0.5996,
"step": 277
},
{
"epoch": 0.97,
"learning_rate": 1.4140435835351092e-05,
"loss": 0.5713,
"step": 278
},
{
"epoch": 0.97,
"learning_rate": 1.4116222760290559e-05,
"loss": 0.5762,
"step": 279
},
{
"epoch": 0.98,
"learning_rate": 1.4092009685230026e-05,
"loss": 0.5908,
"step": 280
},
{
"epoch": 0.98,
"learning_rate": 1.4067796610169493e-05,
"loss": 0.5703,
"step": 281
},
{
"epoch": 0.98,
"learning_rate": 1.404358353510896e-05,
"loss": 0.5596,
"step": 282
},
{
"epoch": 0.99,
"learning_rate": 1.4019370460048427e-05,
"loss": 0.5908,
"step": 283
},
{
"epoch": 0.99,
"learning_rate": 1.3995157384987894e-05,
"loss": 0.5674,
"step": 284
},
{
"epoch": 0.99,
"learning_rate": 1.397094430992736e-05,
"loss": 0.5781,
"step": 285
},
{
"epoch": 1.0,
"learning_rate": 1.3946731234866828e-05,
"loss": 0.5762,
"step": 286
},
{
"epoch": 1.0,
"learning_rate": 1.3922518159806298e-05,
"loss": 0.7271,
"step": 287
},
{
"epoch": 1.0,
"learning_rate": 1.3898305084745764e-05,
"loss": 0.3965,
"step": 288
},
{
"epoch": 1.01,
"learning_rate": 1.3874092009685232e-05,
"loss": 0.3896,
"step": 289
},
{
"epoch": 1.01,
"learning_rate": 1.3849878934624698e-05,
"loss": 0.4058,
"step": 290
},
{
"epoch": 1.01,
"learning_rate": 1.3825665859564166e-05,
"loss": 0.3989,
"step": 291
},
{
"epoch": 1.02,
"learning_rate": 1.3801452784503632e-05,
"loss": 0.4155,
"step": 292
},
{
"epoch": 1.02,
"learning_rate": 1.37772397094431e-05,
"loss": 0.4067,
"step": 293
},
{
"epoch": 1.02,
"learning_rate": 1.3753026634382568e-05,
"loss": 0.4009,
"step": 294
},
{
"epoch": 1.03,
"learning_rate": 1.3728813559322034e-05,
"loss": 0.416,
"step": 295
},
{
"epoch": 1.03,
"learning_rate": 1.3704600484261503e-05,
"loss": 0.4058,
"step": 296
},
{
"epoch": 1.03,
"learning_rate": 1.368038740920097e-05,
"loss": 0.3701,
"step": 297
},
{
"epoch": 1.04,
"learning_rate": 1.3656174334140437e-05,
"loss": 0.3926,
"step": 298
},
{
"epoch": 1.04,
"learning_rate": 1.3631961259079905e-05,
"loss": 0.3955,
"step": 299
},
{
"epoch": 1.05,
"learning_rate": 1.3607748184019371e-05,
"loss": 0.3755,
"step": 300
},
{
"epoch": 1.05,
"learning_rate": 1.3583535108958839e-05,
"loss": 0.3726,
"step": 301
},
{
"epoch": 1.05,
"learning_rate": 1.3559322033898305e-05,
"loss": 0.3911,
"step": 302
},
{
"epoch": 1.06,
"learning_rate": 1.3535108958837773e-05,
"loss": 0.3706,
"step": 303
},
{
"epoch": 1.06,
"learning_rate": 1.351089588377724e-05,
"loss": 0.3696,
"step": 304
},
{
"epoch": 1.06,
"learning_rate": 1.3486682808716709e-05,
"loss": 0.3804,
"step": 305
},
{
"epoch": 1.07,
"learning_rate": 1.3462469733656177e-05,
"loss": 0.373,
"step": 306
},
{
"epoch": 1.07,
"learning_rate": 1.3438256658595643e-05,
"loss": 0.3667,
"step": 307
},
{
"epoch": 1.07,
"learning_rate": 1.341404358353511e-05,
"loss": 0.3643,
"step": 308
},
{
"epoch": 1.08,
"learning_rate": 1.3389830508474577e-05,
"loss": 0.3457,
"step": 309
},
{
"epoch": 1.08,
"learning_rate": 1.3365617433414045e-05,
"loss": 0.3525,
"step": 310
},
{
"epoch": 1.08,
"learning_rate": 1.334140435835351e-05,
"loss": 0.356,
"step": 311
},
{
"epoch": 1.09,
"learning_rate": 1.3317191283292979e-05,
"loss": 0.3545,
"step": 312
},
{
"epoch": 1.09,
"learning_rate": 1.3292978208232448e-05,
"loss": 0.353,
"step": 313
},
{
"epoch": 1.09,
"learning_rate": 1.3268765133171914e-05,
"loss": 0.3447,
"step": 314
},
{
"epoch": 1.1,
"learning_rate": 1.3244552058111382e-05,
"loss": 0.3569,
"step": 315
},
{
"epoch": 1.1,
"learning_rate": 1.3220338983050848e-05,
"loss": 0.3545,
"step": 316
},
{
"epoch": 1.1,
"learning_rate": 1.3196125907990316e-05,
"loss": 0.3438,
"step": 317
},
{
"epoch": 1.11,
"learning_rate": 1.3171912832929782e-05,
"loss": 0.3423,
"step": 318
},
{
"epoch": 1.11,
"learning_rate": 1.314769975786925e-05,
"loss": 0.3594,
"step": 319
},
{
"epoch": 1.11,
"learning_rate": 1.3123486682808718e-05,
"loss": 0.3442,
"step": 320
},
{
"epoch": 1.12,
"learning_rate": 1.3099273607748184e-05,
"loss": 0.3662,
"step": 321
},
{
"epoch": 1.12,
"learning_rate": 1.3075060532687653e-05,
"loss": 0.3584,
"step": 322
},
{
"epoch": 1.13,
"learning_rate": 1.305084745762712e-05,
"loss": 0.3511,
"step": 323
},
{
"epoch": 1.13,
"learning_rate": 1.3026634382566587e-05,
"loss": 0.3711,
"step": 324
},
{
"epoch": 1.13,
"learning_rate": 1.3002421307506055e-05,
"loss": 0.3486,
"step": 325
},
{
"epoch": 1.14,
"learning_rate": 1.2978208232445521e-05,
"loss": 0.355,
"step": 326
},
{
"epoch": 1.14,
"learning_rate": 1.295399515738499e-05,
"loss": 0.3589,
"step": 327
},
{
"epoch": 1.14,
"learning_rate": 1.2929782082324455e-05,
"loss": 0.3447,
"step": 328
},
{
"epoch": 1.15,
"learning_rate": 1.2905569007263923e-05,
"loss": 0.3442,
"step": 329
},
{
"epoch": 1.15,
"learning_rate": 1.288135593220339e-05,
"loss": 0.335,
"step": 330
},
{
"epoch": 1.15,
"learning_rate": 1.2857142857142859e-05,
"loss": 0.3369,
"step": 331
},
{
"epoch": 1.16,
"learning_rate": 1.2832929782082327e-05,
"loss": 0.3467,
"step": 332
},
{
"epoch": 1.16,
"learning_rate": 1.2808716707021793e-05,
"loss": 0.3428,
"step": 333
},
{
"epoch": 1.16,
"learning_rate": 1.278450363196126e-05,
"loss": 0.3452,
"step": 334
},
{
"epoch": 1.17,
"learning_rate": 1.2760290556900727e-05,
"loss": 0.3428,
"step": 335
},
{
"epoch": 1.17,
"learning_rate": 1.2736077481840195e-05,
"loss": 0.3525,
"step": 336
},
{
"epoch": 1.17,
"learning_rate": 1.2711864406779661e-05,
"loss": 0.3398,
"step": 337
},
{
"epoch": 1.18,
"learning_rate": 1.2687651331719129e-05,
"loss": 0.3477,
"step": 338
},
{
"epoch": 1.18,
"learning_rate": 1.2663438256658595e-05,
"loss": 0.3433,
"step": 339
},
{
"epoch": 1.18,
"learning_rate": 1.2639225181598064e-05,
"loss": 0.3438,
"step": 340
},
{
"epoch": 1.19,
"learning_rate": 1.2615012106537532e-05,
"loss": 0.3442,
"step": 341
},
{
"epoch": 1.19,
"learning_rate": 1.2590799031476998e-05,
"loss": 0.3457,
"step": 342
},
{
"epoch": 1.2,
"learning_rate": 1.2566585956416466e-05,
"loss": 0.3374,
"step": 343
},
{
"epoch": 1.2,
"learning_rate": 1.2542372881355932e-05,
"loss": 0.333,
"step": 344
},
{
"epoch": 1.2,
"learning_rate": 1.25181598062954e-05,
"loss": 0.3486,
"step": 345
},
{
"epoch": 1.21,
"learning_rate": 1.2493946731234868e-05,
"loss": 0.3462,
"step": 346
},
{
"epoch": 1.21,
"learning_rate": 1.2469733656174334e-05,
"loss": 0.3335,
"step": 347
},
{
"epoch": 1.21,
"learning_rate": 1.2445520581113804e-05,
"loss": 0.3379,
"step": 348
},
{
"epoch": 1.22,
"learning_rate": 1.242130750605327e-05,
"loss": 0.3467,
"step": 349
},
{
"epoch": 1.22,
"learning_rate": 1.2397094430992738e-05,
"loss": 0.3481,
"step": 350
},
{
"epoch": 1.22,
"learning_rate": 1.2372881355932205e-05,
"loss": 0.354,
"step": 351
},
{
"epoch": 1.23,
"learning_rate": 1.2348668280871672e-05,
"loss": 0.3433,
"step": 352
},
{
"epoch": 1.23,
"learning_rate": 1.232445520581114e-05,
"loss": 0.3501,
"step": 353
},
{
"epoch": 1.23,
"learning_rate": 1.2300242130750606e-05,
"loss": 0.3369,
"step": 354
},
{
"epoch": 1.24,
"learning_rate": 1.2276029055690073e-05,
"loss": 0.3403,
"step": 355
},
{
"epoch": 1.24,
"learning_rate": 1.225181598062954e-05,
"loss": 0.3535,
"step": 356
},
{
"epoch": 1.24,
"learning_rate": 1.2227602905569009e-05,
"loss": 0.353,
"step": 357
},
{
"epoch": 1.25,
"learning_rate": 1.2203389830508477e-05,
"loss": 0.3555,
"step": 358
},
{
"epoch": 1.25,
"learning_rate": 1.2179176755447943e-05,
"loss": 0.3389,
"step": 359
},
{
"epoch": 1.25,
"learning_rate": 1.2154963680387411e-05,
"loss": 0.3657,
"step": 360
},
{
"epoch": 1.26,
"learning_rate": 1.2130750605326877e-05,
"loss": 0.3398,
"step": 361
},
{
"epoch": 1.26,
"learning_rate": 1.2106537530266345e-05,
"loss": 0.3374,
"step": 362
},
{
"epoch": 1.26,
"learning_rate": 1.2082324455205811e-05,
"loss": 0.3325,
"step": 363
},
{
"epoch": 1.27,
"learning_rate": 1.2058111380145279e-05,
"loss": 0.3452,
"step": 364
},
{
"epoch": 1.27,
"learning_rate": 1.2033898305084745e-05,
"loss": 0.3281,
"step": 365
},
{
"epoch": 1.28,
"learning_rate": 1.2009685230024215e-05,
"loss": 0.3228,
"step": 366
},
{
"epoch": 1.28,
"learning_rate": 1.1985472154963682e-05,
"loss": 0.353,
"step": 367
},
{
"epoch": 1.28,
"learning_rate": 1.1961259079903149e-05,
"loss": 0.3457,
"step": 368
},
{
"epoch": 1.29,
"learning_rate": 1.1937046004842616e-05,
"loss": 0.3506,
"step": 369
},
{
"epoch": 1.29,
"learning_rate": 1.1912832929782082e-05,
"loss": 0.3457,
"step": 370
},
{
"epoch": 1.29,
"learning_rate": 1.188861985472155e-05,
"loss": 0.3413,
"step": 371
},
{
"epoch": 1.3,
"learning_rate": 1.1864406779661018e-05,
"loss": 0.3462,
"step": 372
},
{
"epoch": 1.3,
"learning_rate": 1.1840193704600484e-05,
"loss": 0.3354,
"step": 373
},
{
"epoch": 1.3,
"learning_rate": 1.1815980629539952e-05,
"loss": 0.3452,
"step": 374
},
{
"epoch": 1.31,
"learning_rate": 1.179176755447942e-05,
"loss": 0.3594,
"step": 375
},
{
"epoch": 1.31,
"learning_rate": 1.1767554479418888e-05,
"loss": 0.3511,
"step": 376
},
{
"epoch": 1.31,
"learning_rate": 1.1743341404358356e-05,
"loss": 0.3521,
"step": 377
},
{
"epoch": 1.32,
"learning_rate": 1.1719128329297822e-05,
"loss": 0.3311,
"step": 378
},
{
"epoch": 1.32,
"learning_rate": 1.169491525423729e-05,
"loss": 0.3301,
"step": 379
},
{
"epoch": 1.32,
"learning_rate": 1.1670702179176756e-05,
"loss": 0.3564,
"step": 380
},
{
"epoch": 1.33,
"learning_rate": 1.1646489104116224e-05,
"loss": 0.3345,
"step": 381
},
{
"epoch": 1.33,
"learning_rate": 1.162227602905569e-05,
"loss": 0.355,
"step": 382
},
{
"epoch": 1.33,
"learning_rate": 1.1598062953995158e-05,
"loss": 0.3364,
"step": 383
},
{
"epoch": 1.34,
"learning_rate": 1.1573849878934627e-05,
"loss": 0.3335,
"step": 384
},
{
"epoch": 1.34,
"learning_rate": 1.1549636803874093e-05,
"loss": 0.3477,
"step": 385
},
{
"epoch": 1.34,
"learning_rate": 1.1525423728813561e-05,
"loss": 0.3433,
"step": 386
},
{
"epoch": 1.35,
"learning_rate": 1.1501210653753027e-05,
"loss": 0.3467,
"step": 387
},
{
"epoch": 1.35,
"learning_rate": 1.1476997578692495e-05,
"loss": 0.3462,
"step": 388
},
{
"epoch": 1.36,
"learning_rate": 1.1452784503631961e-05,
"loss": 0.3354,
"step": 389
},
{
"epoch": 1.36,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.334,
"step": 390
},
{
"epoch": 1.36,
"learning_rate": 1.1404358353510895e-05,
"loss": 0.3354,
"step": 391
},
{
"epoch": 1.37,
"learning_rate": 1.1380145278450365e-05,
"loss": 0.3501,
"step": 392
},
{
"epoch": 1.37,
"learning_rate": 1.1355932203389833e-05,
"loss": 0.3564,
"step": 393
},
{
"epoch": 1.37,
"learning_rate": 1.1331719128329299e-05,
"loss": 0.3442,
"step": 394
},
{
"epoch": 1.38,
"learning_rate": 1.1307506053268767e-05,
"loss": 0.333,
"step": 395
},
{
"epoch": 1.38,
"learning_rate": 1.1283292978208233e-05,
"loss": 0.3428,
"step": 396
},
{
"epoch": 1.38,
"learning_rate": 1.12590799031477e-05,
"loss": 0.3564,
"step": 397
},
{
"epoch": 1.39,
"learning_rate": 1.1234866828087168e-05,
"loss": 0.3359,
"step": 398
},
{
"epoch": 1.39,
"learning_rate": 1.1210653753026634e-05,
"loss": 0.3335,
"step": 399
},
{
"epoch": 1.39,
"learning_rate": 1.1186440677966102e-05,
"loss": 0.3462,
"step": 400
},
{
"epoch": 1.4,
"learning_rate": 1.116222760290557e-05,
"loss": 0.3408,
"step": 401
},
{
"epoch": 1.4,
"learning_rate": 1.1138014527845038e-05,
"loss": 0.3335,
"step": 402
},
{
"epoch": 1.4,
"learning_rate": 1.1113801452784506e-05,
"loss": 0.3457,
"step": 403
},
{
"epoch": 1.41,
"learning_rate": 1.1089588377723972e-05,
"loss": 0.3457,
"step": 404
},
{
"epoch": 1.41,
"learning_rate": 1.106537530266344e-05,
"loss": 0.3477,
"step": 405
},
{
"epoch": 1.41,
"learning_rate": 1.1041162227602906e-05,
"loss": 0.3564,
"step": 406
},
{
"epoch": 1.42,
"learning_rate": 1.1016949152542374e-05,
"loss": 0.3135,
"step": 407
},
{
"epoch": 1.42,
"learning_rate": 1.099273607748184e-05,
"loss": 0.3438,
"step": 408
},
{
"epoch": 1.43,
"learning_rate": 1.0968523002421308e-05,
"loss": 0.3389,
"step": 409
},
{
"epoch": 1.43,
"learning_rate": 1.0944309927360777e-05,
"loss": 0.3447,
"step": 410
},
{
"epoch": 1.43,
"learning_rate": 1.0920096852300243e-05,
"loss": 0.3428,
"step": 411
},
{
"epoch": 1.44,
"learning_rate": 1.0895883777239711e-05,
"loss": 0.3545,
"step": 412
},
{
"epoch": 1.44,
"learning_rate": 1.0871670702179177e-05,
"loss": 0.3394,
"step": 413
},
{
"epoch": 1.44,
"learning_rate": 1.0847457627118645e-05,
"loss": 0.3442,
"step": 414
},
{
"epoch": 1.45,
"learning_rate": 1.0823244552058111e-05,
"loss": 0.3237,
"step": 415
},
{
"epoch": 1.45,
"learning_rate": 1.079903147699758e-05,
"loss": 0.3286,
"step": 416
},
{
"epoch": 1.45,
"learning_rate": 1.0774818401937045e-05,
"loss": 0.3296,
"step": 417
},
{
"epoch": 1.46,
"learning_rate": 1.0750605326876513e-05,
"loss": 0.3447,
"step": 418
},
{
"epoch": 1.46,
"learning_rate": 1.0726392251815983e-05,
"loss": 0.3301,
"step": 419
},
{
"epoch": 1.46,
"learning_rate": 1.0702179176755449e-05,
"loss": 0.3398,
"step": 420
},
{
"epoch": 1.47,
"learning_rate": 1.0677966101694917e-05,
"loss": 0.3452,
"step": 421
},
{
"epoch": 1.47,
"learning_rate": 1.0653753026634383e-05,
"loss": 0.3608,
"step": 422
},
{
"epoch": 1.47,
"learning_rate": 1.062953995157385e-05,
"loss": 0.3345,
"step": 423
},
{
"epoch": 1.48,
"learning_rate": 1.0605326876513318e-05,
"loss": 0.3291,
"step": 424
},
{
"epoch": 1.48,
"learning_rate": 1.0581113801452785e-05,
"loss": 0.3667,
"step": 425
},
{
"epoch": 1.48,
"learning_rate": 1.0556900726392252e-05,
"loss": 0.3594,
"step": 426
},
{
"epoch": 1.49,
"learning_rate": 1.0532687651331719e-05,
"loss": 0.3579,
"step": 427
},
{
"epoch": 1.49,
"learning_rate": 1.0508474576271188e-05,
"loss": 0.3345,
"step": 428
},
{
"epoch": 1.49,
"learning_rate": 1.0484261501210656e-05,
"loss": 0.3281,
"step": 429
},
{
"epoch": 1.5,
"learning_rate": 1.0460048426150122e-05,
"loss": 0.3403,
"step": 430
},
{
"epoch": 1.5,
"learning_rate": 1.043583535108959e-05,
"loss": 0.3604,
"step": 431
},
{
"epoch": 1.51,
"learning_rate": 1.0411622276029056e-05,
"loss": 0.3335,
"step": 432
},
{
"epoch": 1.51,
"learning_rate": 1.0387409200968524e-05,
"loss": 0.3462,
"step": 433
},
{
"epoch": 1.51,
"learning_rate": 1.036319612590799e-05,
"loss": 0.3423,
"step": 434
},
{
"epoch": 1.52,
"learning_rate": 1.0338983050847458e-05,
"loss": 0.3364,
"step": 435
},
{
"epoch": 1.52,
"learning_rate": 1.0314769975786927e-05,
"loss": 0.3496,
"step": 436
},
{
"epoch": 1.52,
"learning_rate": 1.0290556900726394e-05,
"loss": 0.3325,
"step": 437
},
{
"epoch": 1.53,
"learning_rate": 1.0266343825665861e-05,
"loss": 0.3413,
"step": 438
},
{
"epoch": 1.53,
"learning_rate": 1.0242130750605328e-05,
"loss": 0.335,
"step": 439
},
{
"epoch": 1.53,
"learning_rate": 1.0217917675544795e-05,
"loss": 0.3423,
"step": 440
},
{
"epoch": 1.54,
"learning_rate": 1.0193704600484262e-05,
"loss": 0.3335,
"step": 441
},
{
"epoch": 1.54,
"learning_rate": 1.016949152542373e-05,
"loss": 0.3379,
"step": 442
},
{
"epoch": 1.54,
"learning_rate": 1.0145278450363195e-05,
"loss": 0.3506,
"step": 443
},
{
"epoch": 1.55,
"learning_rate": 1.0121065375302663e-05,
"loss": 0.3457,
"step": 444
},
{
"epoch": 1.55,
"learning_rate": 1.0096852300242133e-05,
"loss": 0.334,
"step": 445
},
{
"epoch": 1.55,
"learning_rate": 1.0072639225181599e-05,
"loss": 0.3408,
"step": 446
},
{
"epoch": 1.56,
"learning_rate": 1.0048426150121067e-05,
"loss": 0.3525,
"step": 447
},
{
"epoch": 1.56,
"learning_rate": 1.0024213075060533e-05,
"loss": 0.3491,
"step": 448
},
{
"epoch": 1.56,
"learning_rate": 1e-05,
"loss": 0.3286,
"step": 449
},
{
"epoch": 1.57,
"learning_rate": 9.975786924939469e-06,
"loss": 0.3159,
"step": 450
},
{
"epoch": 1.57,
"learning_rate": 9.951573849878935e-06,
"loss": 0.3252,
"step": 451
},
{
"epoch": 1.57,
"learning_rate": 9.927360774818403e-06,
"loss": 0.335,
"step": 452
},
{
"epoch": 1.58,
"learning_rate": 9.90314769975787e-06,
"loss": 0.3481,
"step": 453
},
{
"epoch": 1.58,
"learning_rate": 9.878934624697337e-06,
"loss": 0.3403,
"step": 454
},
{
"epoch": 1.59,
"learning_rate": 9.854721549636804e-06,
"loss": 0.3467,
"step": 455
},
{
"epoch": 1.59,
"learning_rate": 9.830508474576272e-06,
"loss": 0.3364,
"step": 456
},
{
"epoch": 1.59,
"learning_rate": 9.80629539951574e-06,
"loss": 0.3589,
"step": 457
},
{
"epoch": 1.6,
"learning_rate": 9.782082324455206e-06,
"loss": 0.3408,
"step": 458
},
{
"epoch": 1.6,
"learning_rate": 9.757869249394674e-06,
"loss": 0.3359,
"step": 459
},
{
"epoch": 1.6,
"learning_rate": 9.733656174334142e-06,
"loss": 0.3403,
"step": 460
},
{
"epoch": 1.61,
"learning_rate": 9.709443099273608e-06,
"loss": 0.3462,
"step": 461
},
{
"epoch": 1.61,
"learning_rate": 9.685230024213076e-06,
"loss": 0.3335,
"step": 462
},
{
"epoch": 1.61,
"learning_rate": 9.661016949152544e-06,
"loss": 0.3516,
"step": 463
},
{
"epoch": 1.62,
"learning_rate": 9.63680387409201e-06,
"loss": 0.3638,
"step": 464
},
{
"epoch": 1.62,
"learning_rate": 9.612590799031478e-06,
"loss": 0.3364,
"step": 465
},
{
"epoch": 1.62,
"learning_rate": 9.588377723970946e-06,
"loss": 0.3364,
"step": 466
},
{
"epoch": 1.63,
"learning_rate": 9.564164648910412e-06,
"loss": 0.3496,
"step": 467
},
{
"epoch": 1.63,
"learning_rate": 9.53995157384988e-06,
"loss": 0.3403,
"step": 468
},
{
"epoch": 1.63,
"learning_rate": 9.515738498789347e-06,
"loss": 0.3481,
"step": 469
},
{
"epoch": 1.64,
"learning_rate": 9.491525423728815e-06,
"loss": 0.3374,
"step": 470
},
{
"epoch": 1.64,
"learning_rate": 9.467312348668281e-06,
"loss": 0.3174,
"step": 471
},
{
"epoch": 1.64,
"learning_rate": 9.44309927360775e-06,
"loss": 0.3633,
"step": 472
},
{
"epoch": 1.65,
"learning_rate": 9.418886198547215e-06,
"loss": 0.3472,
"step": 473
},
{
"epoch": 1.65,
"learning_rate": 9.394673123486683e-06,
"loss": 0.3379,
"step": 474
},
{
"epoch": 1.66,
"learning_rate": 9.370460048426151e-06,
"loss": 0.3428,
"step": 475
},
{
"epoch": 1.66,
"learning_rate": 9.346246973365619e-06,
"loss": 0.3369,
"step": 476
},
{
"epoch": 1.66,
"learning_rate": 9.322033898305085e-06,
"loss": 0.354,
"step": 477
},
{
"epoch": 1.67,
"learning_rate": 9.297820823244553e-06,
"loss": 0.3467,
"step": 478
},
{
"epoch": 1.67,
"learning_rate": 9.27360774818402e-06,
"loss": 0.3379,
"step": 479
},
{
"epoch": 1.67,
"learning_rate": 9.249394673123487e-06,
"loss": 0.3584,
"step": 480
},
{
"epoch": 1.68,
"learning_rate": 9.225181598062955e-06,
"loss": 0.3335,
"step": 481
},
{
"epoch": 1.68,
"learning_rate": 9.200968523002422e-06,
"loss": 0.3247,
"step": 482
},
{
"epoch": 1.68,
"learning_rate": 9.17675544794189e-06,
"loss": 0.3403,
"step": 483
},
{
"epoch": 1.69,
"learning_rate": 9.152542372881356e-06,
"loss": 0.3442,
"step": 484
},
{
"epoch": 1.69,
"learning_rate": 9.128329297820824e-06,
"loss": 0.334,
"step": 485
},
{
"epoch": 1.69,
"learning_rate": 9.10411622276029e-06,
"loss": 0.3369,
"step": 486
},
{
"epoch": 1.7,
"learning_rate": 9.079903147699758e-06,
"loss": 0.3496,
"step": 487
},
{
"epoch": 1.7,
"learning_rate": 9.055690072639226e-06,
"loss": 0.3477,
"step": 488
},
{
"epoch": 1.7,
"learning_rate": 9.031476997578694e-06,
"loss": 0.3447,
"step": 489
},
{
"epoch": 1.71,
"learning_rate": 9.00726392251816e-06,
"loss": 0.3286,
"step": 490
},
{
"epoch": 1.71,
"learning_rate": 8.983050847457628e-06,
"loss": 0.3306,
"step": 491
},
{
"epoch": 1.71,
"learning_rate": 8.958837772397096e-06,
"loss": 0.3379,
"step": 492
},
{
"epoch": 1.72,
"learning_rate": 8.934624697336562e-06,
"loss": 0.3257,
"step": 493
},
{
"epoch": 1.72,
"learning_rate": 8.91041162227603e-06,
"loss": 0.3447,
"step": 494
},
{
"epoch": 1.72,
"learning_rate": 8.886198547215496e-06,
"loss": 0.3237,
"step": 495
},
{
"epoch": 1.73,
"learning_rate": 8.861985472154965e-06,
"loss": 0.3296,
"step": 496
},
{
"epoch": 1.73,
"learning_rate": 8.837772397094432e-06,
"loss": 0.3452,
"step": 497
},
{
"epoch": 1.74,
"learning_rate": 8.8135593220339e-06,
"loss": 0.3257,
"step": 498
},
{
"epoch": 1.74,
"learning_rate": 8.789346246973365e-06,
"loss": 0.353,
"step": 499
},
{
"epoch": 1.74,
"learning_rate": 8.765133171912833e-06,
"loss": 0.3345,
"step": 500
},
{
"epoch": 1.75,
"learning_rate": 8.740920096852301e-06,
"loss": 0.3374,
"step": 501
},
{
"epoch": 1.75,
"learning_rate": 8.716707021791769e-06,
"loss": 0.3418,
"step": 502
},
{
"epoch": 1.75,
"learning_rate": 8.692493946731235e-06,
"loss": 0.3481,
"step": 503
},
{
"epoch": 1.76,
"learning_rate": 8.668280871670703e-06,
"loss": 0.3389,
"step": 504
},
{
"epoch": 1.76,
"learning_rate": 8.64406779661017e-06,
"loss": 0.3413,
"step": 505
},
{
"epoch": 1.76,
"learning_rate": 8.619854721549637e-06,
"loss": 0.3438,
"step": 506
},
{
"epoch": 1.77,
"learning_rate": 8.595641646489105e-06,
"loss": 0.332,
"step": 507
},
{
"epoch": 1.77,
"learning_rate": 8.571428571428571e-06,
"loss": 0.3687,
"step": 508
},
{
"epoch": 1.77,
"learning_rate": 8.54721549636804e-06,
"loss": 0.3315,
"step": 509
},
{
"epoch": 1.78,
"learning_rate": 8.523002421307507e-06,
"loss": 0.3442,
"step": 510
},
{
"epoch": 1.78,
"learning_rate": 8.498789346246974e-06,
"loss": 0.3467,
"step": 511
},
{
"epoch": 1.78,
"learning_rate": 8.47457627118644e-06,
"loss": 0.3325,
"step": 512
},
{
"epoch": 1.79,
"learning_rate": 8.450363196125908e-06,
"loss": 0.3213,
"step": 513
},
{
"epoch": 1.79,
"learning_rate": 8.426150121065376e-06,
"loss": 0.3325,
"step": 514
},
{
"epoch": 1.79,
"learning_rate": 8.401937046004844e-06,
"loss": 0.334,
"step": 515
},
{
"epoch": 1.8,
"learning_rate": 8.37772397094431e-06,
"loss": 0.3418,
"step": 516
},
{
"epoch": 1.8,
"learning_rate": 8.353510895883778e-06,
"loss": 0.3311,
"step": 517
},
{
"epoch": 1.8,
"learning_rate": 8.329297820823246e-06,
"loss": 0.3394,
"step": 518
},
{
"epoch": 1.81,
"learning_rate": 8.305084745762712e-06,
"loss": 0.3306,
"step": 519
},
{
"epoch": 1.81,
"learning_rate": 8.28087167070218e-06,
"loss": 0.3506,
"step": 520
},
{
"epoch": 1.82,
"learning_rate": 8.256658595641646e-06,
"loss": 0.3291,
"step": 521
},
{
"epoch": 1.82,
"learning_rate": 8.232445520581116e-06,
"loss": 0.353,
"step": 522
},
{
"epoch": 1.82,
"learning_rate": 8.208232445520582e-06,
"loss": 0.354,
"step": 523
},
{
"epoch": 1.83,
"learning_rate": 8.18401937046005e-06,
"loss": 0.3364,
"step": 524
},
{
"epoch": 1.83,
"learning_rate": 8.159806295399516e-06,
"loss": 0.3394,
"step": 525
},
{
"epoch": 1.83,
"learning_rate": 8.135593220338983e-06,
"loss": 0.3457,
"step": 526
},
{
"epoch": 1.84,
"learning_rate": 8.111380145278451e-06,
"loss": 0.3296,
"step": 527
},
{
"epoch": 1.84,
"learning_rate": 8.08716707021792e-06,
"loss": 0.3569,
"step": 528
},
{
"epoch": 1.84,
"learning_rate": 8.062953995157385e-06,
"loss": 0.3389,
"step": 529
},
{
"epoch": 1.85,
"learning_rate": 8.038740920096853e-06,
"loss": 0.3394,
"step": 530
},
{
"epoch": 1.85,
"learning_rate": 8.014527845036321e-06,
"loss": 0.3364,
"step": 531
},
{
"epoch": 1.85,
"learning_rate": 7.990314769975787e-06,
"loss": 0.3467,
"step": 532
},
{
"epoch": 1.86,
"learning_rate": 7.966101694915255e-06,
"loss": 0.3462,
"step": 533
},
{
"epoch": 1.86,
"learning_rate": 7.941888619854721e-06,
"loss": 0.334,
"step": 534
},
{
"epoch": 1.86,
"learning_rate": 7.91767554479419e-06,
"loss": 0.3501,
"step": 535
},
{
"epoch": 1.87,
"learning_rate": 7.893462469733657e-06,
"loss": 0.3296,
"step": 536
},
{
"epoch": 1.87,
"learning_rate": 7.869249394673125e-06,
"loss": 0.3438,
"step": 537
},
{
"epoch": 1.87,
"learning_rate": 7.84503631961259e-06,
"loss": 0.3306,
"step": 538
},
{
"epoch": 1.88,
"learning_rate": 7.820823244552059e-06,
"loss": 0.3491,
"step": 539
},
{
"epoch": 1.88,
"learning_rate": 7.796610169491526e-06,
"loss": 0.3286,
"step": 540
},
{
"epoch": 1.89,
"learning_rate": 7.772397094430994e-06,
"loss": 0.356,
"step": 541
},
{
"epoch": 1.89,
"learning_rate": 7.74818401937046e-06,
"loss": 0.3535,
"step": 542
},
{
"epoch": 1.89,
"learning_rate": 7.723970944309928e-06,
"loss": 0.3345,
"step": 543
},
{
"epoch": 1.9,
"learning_rate": 7.699757869249396e-06,
"loss": 0.3398,
"step": 544
},
{
"epoch": 1.9,
"learning_rate": 7.675544794188862e-06,
"loss": 0.3296,
"step": 545
},
{
"epoch": 1.9,
"learning_rate": 7.65133171912833e-06,
"loss": 0.3496,
"step": 546
},
{
"epoch": 1.91,
"learning_rate": 7.627118644067797e-06,
"loss": 0.3301,
"step": 547
},
{
"epoch": 1.91,
"learning_rate": 7.602905569007265e-06,
"loss": 0.3574,
"step": 548
},
{
"epoch": 1.91,
"learning_rate": 7.578692493946732e-06,
"loss": 0.3418,
"step": 549
},
{
"epoch": 1.92,
"learning_rate": 7.554479418886199e-06,
"loss": 0.3398,
"step": 550
},
{
"epoch": 1.92,
"learning_rate": 7.530266343825666e-06,
"loss": 0.354,
"step": 551
},
{
"epoch": 1.92,
"learning_rate": 7.506053268765134e-06,
"loss": 0.3247,
"step": 552
},
{
"epoch": 1.93,
"learning_rate": 7.4818401937046015e-06,
"loss": 0.353,
"step": 553
},
{
"epoch": 1.93,
"learning_rate": 7.4576271186440685e-06,
"loss": 0.3359,
"step": 554
},
{
"epoch": 1.93,
"learning_rate": 7.4334140435835355e-06,
"loss": 0.3535,
"step": 555
},
{
"epoch": 1.94,
"learning_rate": 7.4092009685230025e-06,
"loss": 0.3276,
"step": 556
},
{
"epoch": 1.94,
"learning_rate": 7.384987893462471e-06,
"loss": 0.3457,
"step": 557
},
{
"epoch": 1.94,
"learning_rate": 7.360774818401938e-06,
"loss": 0.3423,
"step": 558
},
{
"epoch": 1.95,
"learning_rate": 7.336561743341405e-06,
"loss": 0.3408,
"step": 559
},
{
"epoch": 1.95,
"learning_rate": 7.312348668280872e-06,
"loss": 0.3467,
"step": 560
},
{
"epoch": 1.95,
"learning_rate": 7.288135593220339e-06,
"loss": 0.3291,
"step": 561
},
{
"epoch": 1.96,
"learning_rate": 7.263922518159807e-06,
"loss": 0.3423,
"step": 562
},
{
"epoch": 1.96,
"learning_rate": 7.239709443099274e-06,
"loss": 0.3325,
"step": 563
},
{
"epoch": 1.97,
"learning_rate": 7.215496368038741e-06,
"loss": 0.3413,
"step": 564
},
{
"epoch": 1.97,
"learning_rate": 7.191283292978209e-06,
"loss": 0.3472,
"step": 565
},
{
"epoch": 1.97,
"learning_rate": 7.1670702179176766e-06,
"loss": 0.3462,
"step": 566
},
{
"epoch": 1.98,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.3394,
"step": 567
},
{
"epoch": 1.98,
"learning_rate": 7.1186440677966106e-06,
"loss": 0.3315,
"step": 568
},
{
"epoch": 1.98,
"learning_rate": 7.0944309927360775e-06,
"loss": 0.3496,
"step": 569
},
{
"epoch": 1.99,
"learning_rate": 7.070217917675546e-06,
"loss": 0.3638,
"step": 570
},
{
"epoch": 1.99,
"learning_rate": 7.046004842615013e-06,
"loss": 0.3408,
"step": 571
},
{
"epoch": 1.99,
"learning_rate": 7.02179176755448e-06,
"loss": 0.3457,
"step": 572
},
{
"epoch": 2.0,
"learning_rate": 6.997578692493947e-06,
"loss": 0.3374,
"step": 573
},
{
"epoch": 2.0,
"learning_rate": 6.973365617433414e-06,
"loss": 0.2184,
"step": 574
},
{
"epoch": 2.0,
"learning_rate": 6.949152542372882e-06,
"loss": 0.1985,
"step": 575
},
{
"epoch": 2.01,
"learning_rate": 6.924939467312349e-06,
"loss": 0.2029,
"step": 576
},
{
"epoch": 2.01,
"learning_rate": 6.900726392251816e-06,
"loss": 0.1963,
"step": 577
},
{
"epoch": 2.01,
"learning_rate": 6.876513317191284e-06,
"loss": 0.2009,
"step": 578
},
{
"epoch": 2.02,
"learning_rate": 6.852300242130752e-06,
"loss": 0.1885,
"step": 579
},
{
"epoch": 2.02,
"learning_rate": 6.828087167070219e-06,
"loss": 0.1909,
"step": 580
},
{
"epoch": 2.02,
"learning_rate": 6.803874092009686e-06,
"loss": 0.1858,
"step": 581
},
{
"epoch": 2.03,
"learning_rate": 6.779661016949153e-06,
"loss": 0.1929,
"step": 582
},
{
"epoch": 2.03,
"learning_rate": 6.75544794188862e-06,
"loss": 0.1863,
"step": 583
},
{
"epoch": 2.03,
"learning_rate": 6.731234866828088e-06,
"loss": 0.1941,
"step": 584
},
{
"epoch": 2.04,
"learning_rate": 6.707021791767555e-06,
"loss": 0.1946,
"step": 585
},
{
"epoch": 2.04,
"learning_rate": 6.682808716707022e-06,
"loss": 0.198,
"step": 586
},
{
"epoch": 2.05,
"learning_rate": 6.658595641646489e-06,
"loss": 0.1833,
"step": 587
},
{
"epoch": 2.05,
"learning_rate": 6.634382566585957e-06,
"loss": 0.1975,
"step": 588
},
{
"epoch": 2.05,
"learning_rate": 6.610169491525424e-06,
"loss": 0.1831,
"step": 589
},
{
"epoch": 2.06,
"learning_rate": 6.585956416464891e-06,
"loss": 0.1826,
"step": 590
},
{
"epoch": 2.06,
"learning_rate": 6.561743341404359e-06,
"loss": 0.188,
"step": 591
},
{
"epoch": 2.06,
"learning_rate": 6.537530266343827e-06,
"loss": 0.1812,
"step": 592
},
{
"epoch": 2.07,
"learning_rate": 6.513317191283294e-06,
"loss": 0.1882,
"step": 593
},
{
"epoch": 2.07,
"learning_rate": 6.489104116222761e-06,
"loss": 0.1992,
"step": 594
},
{
"epoch": 2.07,
"learning_rate": 6.464891041162228e-06,
"loss": 0.1919,
"step": 595
},
{
"epoch": 2.08,
"learning_rate": 6.440677966101695e-06,
"loss": 0.1775,
"step": 596
},
{
"epoch": 2.08,
"learning_rate": 6.416464891041163e-06,
"loss": 0.1812,
"step": 597
},
{
"epoch": 2.08,
"learning_rate": 6.39225181598063e-06,
"loss": 0.1824,
"step": 598
},
{
"epoch": 2.09,
"learning_rate": 6.368038740920097e-06,
"loss": 0.1648,
"step": 599
},
{
"epoch": 2.09,
"learning_rate": 6.343825665859564e-06,
"loss": 0.1882,
"step": 600
},
{
"epoch": 2.09,
"learning_rate": 6.319612590799032e-06,
"loss": 0.1824,
"step": 601
},
{
"epoch": 2.1,
"learning_rate": 6.295399515738499e-06,
"loss": 0.179,
"step": 602
},
{
"epoch": 2.1,
"learning_rate": 6.271186440677966e-06,
"loss": 0.1785,
"step": 603
},
{
"epoch": 2.1,
"learning_rate": 6.246973365617434e-06,
"loss": 0.1843,
"step": 604
},
{
"epoch": 2.11,
"learning_rate": 6.222760290556902e-06,
"loss": 0.1995,
"step": 605
},
{
"epoch": 2.11,
"learning_rate": 6.198547215496369e-06,
"loss": 0.179,
"step": 606
},
{
"epoch": 2.11,
"learning_rate": 6.174334140435836e-06,
"loss": 0.1758,
"step": 607
},
{
"epoch": 2.12,
"learning_rate": 6.150121065375303e-06,
"loss": 0.1804,
"step": 608
},
{
"epoch": 2.12,
"learning_rate": 6.12590799031477e-06,
"loss": 0.1895,
"step": 609
},
{
"epoch": 2.13,
"learning_rate": 6.1016949152542385e-06,
"loss": 0.1841,
"step": 610
},
{
"epoch": 2.13,
"learning_rate": 6.0774818401937055e-06,
"loss": 0.1719,
"step": 611
},
{
"epoch": 2.13,
"learning_rate": 6.0532687651331724e-06,
"loss": 0.1838,
"step": 612
},
{
"epoch": 2.14,
"learning_rate": 6.0290556900726394e-06,
"loss": 0.179,
"step": 613
},
{
"epoch": 2.14,
"learning_rate": 6.004842615012107e-06,
"loss": 0.1711,
"step": 614
},
{
"epoch": 2.14,
"learning_rate": 5.980629539951574e-06,
"loss": 0.1777,
"step": 615
},
{
"epoch": 2.15,
"learning_rate": 5.956416464891041e-06,
"loss": 0.1643,
"step": 616
},
{
"epoch": 2.15,
"learning_rate": 5.932203389830509e-06,
"loss": 0.177,
"step": 617
},
{
"epoch": 2.15,
"learning_rate": 5.907990314769976e-06,
"loss": 0.1804,
"step": 618
},
{
"epoch": 2.16,
"learning_rate": 5.883777239709444e-06,
"loss": 0.187,
"step": 619
},
{
"epoch": 2.16,
"learning_rate": 5.859564164648911e-06,
"loss": 0.1609,
"step": 620
},
{
"epoch": 2.16,
"learning_rate": 5.835351089588378e-06,
"loss": 0.1819,
"step": 621
},
{
"epoch": 2.17,
"learning_rate": 5.811138014527845e-06,
"loss": 0.1743,
"step": 622
},
{
"epoch": 2.17,
"learning_rate": 5.7869249394673135e-06,
"loss": 0.1711,
"step": 623
},
{
"epoch": 2.17,
"learning_rate": 5.7627118644067805e-06,
"loss": 0.1799,
"step": 624
},
{
"epoch": 2.18,
"learning_rate": 5.7384987893462475e-06,
"loss": 0.1843,
"step": 625
},
{
"epoch": 2.18,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.1768,
"step": 626
},
{
"epoch": 2.18,
"learning_rate": 5.690072639225182e-06,
"loss": 0.1804,
"step": 627
},
{
"epoch": 2.19,
"learning_rate": 5.665859564164649e-06,
"loss": 0.1653,
"step": 628
},
{
"epoch": 2.19,
"learning_rate": 5.641646489104116e-06,
"loss": 0.1763,
"step": 629
},
{
"epoch": 2.2,
"learning_rate": 5.617433414043584e-06,
"loss": 0.1841,
"step": 630
},
{
"epoch": 2.2,
"learning_rate": 5.593220338983051e-06,
"loss": 0.1792,
"step": 631
},
{
"epoch": 2.2,
"learning_rate": 5.569007263922519e-06,
"loss": 0.1631,
"step": 632
},
{
"epoch": 2.21,
"learning_rate": 5.544794188861986e-06,
"loss": 0.178,
"step": 633
},
{
"epoch": 2.21,
"learning_rate": 5.520581113801453e-06,
"loss": 0.1785,
"step": 634
},
{
"epoch": 2.21,
"learning_rate": 5.49636803874092e-06,
"loss": 0.1682,
"step": 635
},
{
"epoch": 2.22,
"learning_rate": 5.472154963680389e-06,
"loss": 0.1768,
"step": 636
},
{
"epoch": 2.22,
"learning_rate": 5.447941888619856e-06,
"loss": 0.178,
"step": 637
},
{
"epoch": 2.22,
"learning_rate": 5.423728813559323e-06,
"loss": 0.1777,
"step": 638
},
{
"epoch": 2.23,
"learning_rate": 5.39951573849879e-06,
"loss": 0.176,
"step": 639
},
{
"epoch": 2.23,
"learning_rate": 5.375302663438257e-06,
"loss": 0.178,
"step": 640
},
{
"epoch": 2.23,
"learning_rate": 5.351089588377724e-06,
"loss": 0.1628,
"step": 641
},
{
"epoch": 2.24,
"learning_rate": 5.326876513317191e-06,
"loss": 0.1719,
"step": 642
},
{
"epoch": 2.24,
"learning_rate": 5.302663438256659e-06,
"loss": 0.1687,
"step": 643
},
{
"epoch": 2.24,
"learning_rate": 5.278450363196126e-06,
"loss": 0.1819,
"step": 644
},
{
"epoch": 2.25,
"learning_rate": 5.254237288135594e-06,
"loss": 0.1741,
"step": 645
},
{
"epoch": 2.25,
"learning_rate": 5.230024213075061e-06,
"loss": 0.168,
"step": 646
},
{
"epoch": 2.25,
"learning_rate": 5.205811138014528e-06,
"loss": 0.1782,
"step": 647
},
{
"epoch": 2.26,
"learning_rate": 5.181598062953995e-06,
"loss": 0.1829,
"step": 648
},
{
"epoch": 2.26,
"learning_rate": 5.157384987893464e-06,
"loss": 0.1807,
"step": 649
},
{
"epoch": 2.26,
"learning_rate": 5.133171912832931e-06,
"loss": 0.1775,
"step": 650
},
{
"epoch": 2.27,
"learning_rate": 5.108958837772398e-06,
"loss": 0.1865,
"step": 651
},
{
"epoch": 2.27,
"learning_rate": 5.084745762711865e-06,
"loss": 0.1792,
"step": 652
},
{
"epoch": 2.28,
"learning_rate": 5.060532687651332e-06,
"loss": 0.1807,
"step": 653
},
{
"epoch": 2.28,
"learning_rate": 5.0363196125907995e-06,
"loss": 0.1768,
"step": 654
},
{
"epoch": 2.28,
"learning_rate": 5.0121065375302665e-06,
"loss": 0.1714,
"step": 655
},
{
"epoch": 2.29,
"learning_rate": 4.987893462469734e-06,
"loss": 0.1758,
"step": 656
},
{
"epoch": 2.29,
"learning_rate": 4.963680387409201e-06,
"loss": 0.1794,
"step": 657
},
{
"epoch": 2.29,
"learning_rate": 4.939467312348668e-06,
"loss": 0.1794,
"step": 658
},
{
"epoch": 2.3,
"learning_rate": 4.915254237288136e-06,
"loss": 0.1677,
"step": 659
},
{
"epoch": 2.3,
"learning_rate": 4.891041162227603e-06,
"loss": 0.1672,
"step": 660
},
{
"epoch": 2.3,
"learning_rate": 4.866828087167071e-06,
"loss": 0.1807,
"step": 661
},
{
"epoch": 2.31,
"learning_rate": 4.842615012106538e-06,
"loss": 0.1731,
"step": 662
},
{
"epoch": 2.31,
"learning_rate": 4.818401937046005e-06,
"loss": 0.1721,
"step": 663
},
{
"epoch": 2.31,
"learning_rate": 4.794188861985473e-06,
"loss": 0.1724,
"step": 664
},
{
"epoch": 2.32,
"learning_rate": 4.76997578692494e-06,
"loss": 0.1711,
"step": 665
},
{
"epoch": 2.32,
"learning_rate": 4.745762711864408e-06,
"loss": 0.1826,
"step": 666
},
{
"epoch": 2.32,
"learning_rate": 4.721549636803875e-06,
"loss": 0.1907,
"step": 667
},
{
"epoch": 2.33,
"learning_rate": 4.6973365617433416e-06,
"loss": 0.1743,
"step": 668
},
{
"epoch": 2.33,
"learning_rate": 4.673123486682809e-06,
"loss": 0.1602,
"step": 669
},
{
"epoch": 2.33,
"learning_rate": 4.648910411622276e-06,
"loss": 0.1702,
"step": 670
},
{
"epoch": 2.34,
"learning_rate": 4.624697336561743e-06,
"loss": 0.1914,
"step": 671
},
{
"epoch": 2.34,
"learning_rate": 4.600484261501211e-06,
"loss": 0.1677,
"step": 672
},
{
"epoch": 2.34,
"learning_rate": 4.576271186440678e-06,
"loss": 0.1775,
"step": 673
},
{
"epoch": 2.35,
"learning_rate": 4.552058111380145e-06,
"loss": 0.1685,
"step": 674
},
{
"epoch": 2.35,
"learning_rate": 4.527845036319613e-06,
"loss": 0.1709,
"step": 675
},
{
"epoch": 2.36,
"learning_rate": 4.50363196125908e-06,
"loss": 0.1743,
"step": 676
},
{
"epoch": 2.36,
"learning_rate": 4.479418886198548e-06,
"loss": 0.1753,
"step": 677
},
{
"epoch": 2.36,
"learning_rate": 4.455205811138015e-06,
"loss": 0.1685,
"step": 678
},
{
"epoch": 2.37,
"learning_rate": 4.430992736077483e-06,
"loss": 0.1746,
"step": 679
},
{
"epoch": 2.37,
"learning_rate": 4.40677966101695e-06,
"loss": 0.1851,
"step": 680
},
{
"epoch": 2.37,
"learning_rate": 4.382566585956417e-06,
"loss": 0.179,
"step": 681
},
{
"epoch": 2.38,
"learning_rate": 4.3583535108958845e-06,
"loss": 0.1853,
"step": 682
},
{
"epoch": 2.38,
"learning_rate": 4.3341404358353515e-06,
"loss": 0.1721,
"step": 683
},
{
"epoch": 2.38,
"learning_rate": 4.3099273607748185e-06,
"loss": 0.1702,
"step": 684
},
{
"epoch": 2.39,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.1636,
"step": 685
},
{
"epoch": 2.39,
"learning_rate": 4.261501210653753e-06,
"loss": 0.1816,
"step": 686
},
{
"epoch": 2.39,
"learning_rate": 4.23728813559322e-06,
"loss": 0.1741,
"step": 687
},
{
"epoch": 2.4,
"learning_rate": 4.213075060532688e-06,
"loss": 0.1765,
"step": 688
},
{
"epoch": 2.4,
"learning_rate": 4.188861985472155e-06,
"loss": 0.1731,
"step": 689
},
{
"epoch": 2.4,
"learning_rate": 4.164648910411623e-06,
"loss": 0.1782,
"step": 690
},
{
"epoch": 2.41,
"learning_rate": 4.14043583535109e-06,
"loss": 0.1709,
"step": 691
},
{
"epoch": 2.41,
"learning_rate": 4.116222760290558e-06,
"loss": 0.1758,
"step": 692
},
{
"epoch": 2.41,
"learning_rate": 4.092009685230025e-06,
"loss": 0.1719,
"step": 693
},
{
"epoch": 2.42,
"learning_rate": 4.067796610169492e-06,
"loss": 0.1741,
"step": 694
},
{
"epoch": 2.42,
"learning_rate": 4.04358353510896e-06,
"loss": 0.1731,
"step": 695
},
{
"epoch": 2.43,
"learning_rate": 4.0193704600484266e-06,
"loss": 0.1699,
"step": 696
},
{
"epoch": 2.43,
"learning_rate": 3.9951573849878936e-06,
"loss": 0.178,
"step": 697
},
{
"epoch": 2.43,
"learning_rate": 3.9709443099273605e-06,
"loss": 0.1807,
"step": 698
},
{
"epoch": 2.44,
"learning_rate": 3.946731234866828e-06,
"loss": 0.1711,
"step": 699
},
{
"epoch": 2.44,
"learning_rate": 3.922518159806295e-06,
"loss": 0.1765,
"step": 700
},
{
"epoch": 2.44,
"learning_rate": 3.898305084745763e-06,
"loss": 0.1743,
"step": 701
},
{
"epoch": 2.45,
"learning_rate": 3.87409200968523e-06,
"loss": 0.1719,
"step": 702
},
{
"epoch": 2.45,
"learning_rate": 3.849878934624698e-06,
"loss": 0.175,
"step": 703
},
{
"epoch": 2.45,
"learning_rate": 3.825665859564165e-06,
"loss": 0.1816,
"step": 704
},
{
"epoch": 2.46,
"learning_rate": 3.8014527845036324e-06,
"loss": 0.165,
"step": 705
},
{
"epoch": 2.46,
"learning_rate": 3.7772397094430994e-06,
"loss": 0.177,
"step": 706
},
{
"epoch": 2.46,
"learning_rate": 3.753026634382567e-06,
"loss": 0.1829,
"step": 707
},
{
"epoch": 2.47,
"learning_rate": 3.7288135593220342e-06,
"loss": 0.179,
"step": 708
},
{
"epoch": 2.47,
"learning_rate": 3.7046004842615012e-06,
"loss": 0.177,
"step": 709
},
{
"epoch": 2.47,
"learning_rate": 3.680387409200969e-06,
"loss": 0.1821,
"step": 710
},
{
"epoch": 2.48,
"learning_rate": 3.656174334140436e-06,
"loss": 0.1733,
"step": 711
},
{
"epoch": 2.48,
"learning_rate": 3.6319612590799035e-06,
"loss": 0.1792,
"step": 712
},
{
"epoch": 2.48,
"learning_rate": 3.6077481840193705e-06,
"loss": 0.1785,
"step": 713
},
{
"epoch": 2.49,
"learning_rate": 3.5835351089588383e-06,
"loss": 0.1638,
"step": 714
},
{
"epoch": 2.49,
"learning_rate": 3.5593220338983053e-06,
"loss": 0.1643,
"step": 715
},
{
"epoch": 2.49,
"learning_rate": 3.535108958837773e-06,
"loss": 0.1775,
"step": 716
},
{
"epoch": 2.5,
"learning_rate": 3.51089588377724e-06,
"loss": 0.1614,
"step": 717
},
{
"epoch": 2.5,
"learning_rate": 3.486682808716707e-06,
"loss": 0.186,
"step": 718
},
{
"epoch": 2.51,
"learning_rate": 3.4624697336561745e-06,
"loss": 0.1772,
"step": 719
},
{
"epoch": 2.51,
"learning_rate": 3.438256658595642e-06,
"loss": 0.1729,
"step": 720
},
{
"epoch": 2.51,
"learning_rate": 3.4140435835351093e-06,
"loss": 0.176,
"step": 721
},
{
"epoch": 2.52,
"learning_rate": 3.3898305084745763e-06,
"loss": 0.1738,
"step": 722
},
{
"epoch": 2.52,
"learning_rate": 3.365617433414044e-06,
"loss": 0.1626,
"step": 723
},
{
"epoch": 2.52,
"learning_rate": 3.341404358353511e-06,
"loss": 0.1792,
"step": 724
},
{
"epoch": 2.53,
"learning_rate": 3.3171912832929785e-06,
"loss": 0.1621,
"step": 725
},
{
"epoch": 2.53,
"learning_rate": 3.2929782082324455e-06,
"loss": 0.1802,
"step": 726
},
{
"epoch": 2.53,
"learning_rate": 3.2687651331719134e-06,
"loss": 0.1675,
"step": 727
},
{
"epoch": 2.54,
"learning_rate": 3.2445520581113804e-06,
"loss": 0.1729,
"step": 728
},
{
"epoch": 2.54,
"learning_rate": 3.2203389830508473e-06,
"loss": 0.1748,
"step": 729
},
{
"epoch": 2.54,
"learning_rate": 3.196125907990315e-06,
"loss": 0.1716,
"step": 730
},
{
"epoch": 2.55,
"learning_rate": 3.171912832929782e-06,
"loss": 0.1699,
"step": 731
},
{
"epoch": 2.55,
"learning_rate": 3.1476997578692496e-06,
"loss": 0.1841,
"step": 732
},
{
"epoch": 2.55,
"learning_rate": 3.123486682808717e-06,
"loss": 0.1943,
"step": 733
},
{
"epoch": 2.56,
"learning_rate": 3.0992736077481844e-06,
"loss": 0.1865,
"step": 734
},
{
"epoch": 2.56,
"learning_rate": 3.0750605326876514e-06,
"loss": 0.168,
"step": 735
},
{
"epoch": 2.56,
"learning_rate": 3.0508474576271192e-06,
"loss": 0.1768,
"step": 736
},
{
"epoch": 2.57,
"learning_rate": 3.0266343825665862e-06,
"loss": 0.1689,
"step": 737
},
{
"epoch": 2.57,
"learning_rate": 3.0024213075060536e-06,
"loss": 0.1753,
"step": 738
},
{
"epoch": 2.57,
"learning_rate": 2.9782082324455206e-06,
"loss": 0.1672,
"step": 739
},
{
"epoch": 2.58,
"learning_rate": 2.953995157384988e-06,
"loss": 0.1831,
"step": 740
},
{
"epoch": 2.58,
"learning_rate": 2.9297820823244554e-06,
"loss": 0.1697,
"step": 741
},
{
"epoch": 2.59,
"learning_rate": 2.9055690072639224e-06,
"loss": 0.1626,
"step": 742
},
{
"epoch": 2.59,
"learning_rate": 2.8813559322033903e-06,
"loss": 0.176,
"step": 743
},
{
"epoch": 2.59,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.1785,
"step": 744
},
{
"epoch": 2.6,
"learning_rate": 2.8329297820823247e-06,
"loss": 0.1716,
"step": 745
},
{
"epoch": 2.6,
"learning_rate": 2.808716707021792e-06,
"loss": 0.1831,
"step": 746
},
{
"epoch": 2.6,
"learning_rate": 2.7845036319612595e-06,
"loss": 0.1721,
"step": 747
},
{
"epoch": 2.61,
"learning_rate": 2.7602905569007265e-06,
"loss": 0.1829,
"step": 748
},
{
"epoch": 2.61,
"learning_rate": 2.7360774818401943e-06,
"loss": 0.167,
"step": 749
},
{
"epoch": 2.61,
"learning_rate": 2.7118644067796613e-06,
"loss": 0.1804,
"step": 750
},
{
"epoch": 2.62,
"learning_rate": 2.6876513317191283e-06,
"loss": 0.179,
"step": 751
},
{
"epoch": 2.62,
"learning_rate": 2.6634382566585957e-06,
"loss": 0.1851,
"step": 752
},
{
"epoch": 2.62,
"learning_rate": 2.639225181598063e-06,
"loss": 0.1699,
"step": 753
},
{
"epoch": 2.63,
"learning_rate": 2.6150121065375305e-06,
"loss": 0.1743,
"step": 754
},
{
"epoch": 2.63,
"learning_rate": 2.5907990314769975e-06,
"loss": 0.1763,
"step": 755
},
{
"epoch": 2.63,
"learning_rate": 2.5665859564164654e-06,
"loss": 0.1719,
"step": 756
},
{
"epoch": 2.64,
"learning_rate": 2.5423728813559323e-06,
"loss": 0.1743,
"step": 757
},
{
"epoch": 2.64,
"learning_rate": 2.5181598062953998e-06,
"loss": 0.1714,
"step": 758
},
{
"epoch": 2.64,
"learning_rate": 2.493946731234867e-06,
"loss": 0.1689,
"step": 759
},
{
"epoch": 2.65,
"learning_rate": 2.469733656174334e-06,
"loss": 0.1753,
"step": 760
},
{
"epoch": 2.65,
"learning_rate": 2.4455205811138016e-06,
"loss": 0.1741,
"step": 761
},
{
"epoch": 2.66,
"learning_rate": 2.421307506053269e-06,
"loss": 0.1711,
"step": 762
},
{
"epoch": 2.66,
"learning_rate": 2.3970944309927364e-06,
"loss": 0.1692,
"step": 763
},
{
"epoch": 2.66,
"learning_rate": 2.372881355932204e-06,
"loss": 0.1692,
"step": 764
},
{
"epoch": 2.67,
"learning_rate": 2.3486682808716708e-06,
"loss": 0.1729,
"step": 765
},
{
"epoch": 2.67,
"learning_rate": 2.324455205811138e-06,
"loss": 0.1804,
"step": 766
},
{
"epoch": 2.67,
"learning_rate": 2.3002421307506056e-06,
"loss": 0.1729,
"step": 767
},
{
"epoch": 2.68,
"learning_rate": 2.2760290556900726e-06,
"loss": 0.1897,
"step": 768
},
{
"epoch": 2.68,
"learning_rate": 2.25181598062954e-06,
"loss": 0.1711,
"step": 769
},
{
"epoch": 2.68,
"learning_rate": 2.2276029055690074e-06,
"loss": 0.1726,
"step": 770
},
{
"epoch": 2.69,
"learning_rate": 2.203389830508475e-06,
"loss": 0.1794,
"step": 771
},
{
"epoch": 2.69,
"learning_rate": 2.1791767554479422e-06,
"loss": 0.1721,
"step": 772
},
{
"epoch": 2.69,
"learning_rate": 2.1549636803874092e-06,
"loss": 0.1624,
"step": 773
},
{
"epoch": 2.7,
"learning_rate": 2.1307506053268766e-06,
"loss": 0.187,
"step": 774
},
{
"epoch": 2.7,
"learning_rate": 2.106537530266344e-06,
"loss": 0.1794,
"step": 775
},
{
"epoch": 2.7,
"learning_rate": 2.0823244552058115e-06,
"loss": 0.1689,
"step": 776
},
{
"epoch": 2.71,
"learning_rate": 2.058111380145279e-06,
"loss": 0.1614,
"step": 777
},
{
"epoch": 2.71,
"learning_rate": 2.033898305084746e-06,
"loss": 0.1846,
"step": 778
},
{
"epoch": 2.71,
"learning_rate": 2.0096852300242133e-06,
"loss": 0.1611,
"step": 779
},
{
"epoch": 2.72,
"learning_rate": 1.9854721549636803e-06,
"loss": 0.1599,
"step": 780
},
{
"epoch": 2.72,
"learning_rate": 1.9612590799031477e-06,
"loss": 0.1733,
"step": 781
},
{
"epoch": 2.72,
"learning_rate": 1.937046004842615e-06,
"loss": 0.1794,
"step": 782
},
{
"epoch": 2.73,
"learning_rate": 1.9128329297820825e-06,
"loss": 0.1782,
"step": 783
},
{
"epoch": 2.73,
"learning_rate": 1.8886198547215497e-06,
"loss": 0.1658,
"step": 784
},
{
"epoch": 2.74,
"learning_rate": 1.8644067796610171e-06,
"loss": 0.179,
"step": 785
},
{
"epoch": 2.74,
"learning_rate": 1.8401937046004845e-06,
"loss": 0.1738,
"step": 786
},
{
"epoch": 2.74,
"learning_rate": 1.8159806295399517e-06,
"loss": 0.1575,
"step": 787
},
{
"epoch": 2.75,
"learning_rate": 1.7917675544794191e-06,
"loss": 0.1677,
"step": 788
},
{
"epoch": 2.75,
"learning_rate": 1.7675544794188866e-06,
"loss": 0.1699,
"step": 789
},
{
"epoch": 2.75,
"learning_rate": 1.7433414043583535e-06,
"loss": 0.177,
"step": 790
},
{
"epoch": 2.76,
"learning_rate": 1.719128329297821e-06,
"loss": 0.1982,
"step": 791
},
{
"epoch": 2.76,
"learning_rate": 1.6949152542372882e-06,
"loss": 0.1599,
"step": 792
},
{
"epoch": 2.76,
"learning_rate": 1.6707021791767556e-06,
"loss": 0.1724,
"step": 793
},
{
"epoch": 2.77,
"learning_rate": 1.6464891041162228e-06,
"loss": 0.1694,
"step": 794
},
{
"epoch": 2.77,
"learning_rate": 1.6222760290556902e-06,
"loss": 0.1733,
"step": 795
},
{
"epoch": 2.77,
"learning_rate": 1.5980629539951576e-06,
"loss": 0.1582,
"step": 796
},
{
"epoch": 2.78,
"learning_rate": 1.5738498789346248e-06,
"loss": 0.1714,
"step": 797
},
{
"epoch": 2.78,
"learning_rate": 1.5496368038740922e-06,
"loss": 0.177,
"step": 798
},
{
"epoch": 2.78,
"learning_rate": 1.5254237288135596e-06,
"loss": 0.1763,
"step": 799
},
{
"epoch": 2.79,
"learning_rate": 1.5012106537530268e-06,
"loss": 0.1807,
"step": 800
},
{
"epoch": 2.79,
"learning_rate": 1.476997578692494e-06,
"loss": 0.1719,
"step": 801
},
{
"epoch": 2.79,
"learning_rate": 1.4527845036319612e-06,
"loss": 0.1707,
"step": 802
},
{
"epoch": 2.8,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.1621,
"step": 803
},
{
"epoch": 2.8,
"learning_rate": 1.404358353510896e-06,
"loss": 0.1653,
"step": 804
},
{
"epoch": 2.8,
"learning_rate": 1.3801452784503632e-06,
"loss": 0.1653,
"step": 805
},
{
"epoch": 2.81,
"learning_rate": 1.3559322033898307e-06,
"loss": 0.1692,
"step": 806
},
{
"epoch": 2.81,
"learning_rate": 1.3317191283292979e-06,
"loss": 0.1707,
"step": 807
},
{
"epoch": 2.82,
"learning_rate": 1.3075060532687653e-06,
"loss": 0.1621,
"step": 808
},
{
"epoch": 2.82,
"learning_rate": 1.2832929782082327e-06,
"loss": 0.177,
"step": 809
},
{
"epoch": 2.82,
"learning_rate": 1.2590799031476999e-06,
"loss": 0.178,
"step": 810
},
{
"epoch": 2.83,
"learning_rate": 1.234866828087167e-06,
"loss": 0.1724,
"step": 811
},
{
"epoch": 2.83,
"learning_rate": 1.2106537530266345e-06,
"loss": 0.1655,
"step": 812
},
{
"epoch": 2.83,
"learning_rate": 1.186440677966102e-06,
"loss": 0.1765,
"step": 813
},
{
"epoch": 2.84,
"learning_rate": 1.162227602905569e-06,
"loss": 0.1646,
"step": 814
},
{
"epoch": 2.84,
"learning_rate": 1.1380145278450363e-06,
"loss": 0.1753,
"step": 815
},
{
"epoch": 2.84,
"learning_rate": 1.1138014527845037e-06,
"loss": 0.1799,
"step": 816
},
{
"epoch": 2.85,
"learning_rate": 1.0895883777239711e-06,
"loss": 0.1743,
"step": 817
},
{
"epoch": 2.85,
"learning_rate": 1.0653753026634383e-06,
"loss": 0.1768,
"step": 818
},
{
"epoch": 2.85,
"learning_rate": 1.0411622276029057e-06,
"loss": 0.1785,
"step": 819
},
{
"epoch": 2.86,
"learning_rate": 1.016949152542373e-06,
"loss": 0.1675,
"step": 820
},
{
"epoch": 2.86,
"learning_rate": 9.927360774818401e-07,
"loss": 0.1743,
"step": 821
},
{
"epoch": 2.86,
"learning_rate": 9.685230024213075e-07,
"loss": 0.1704,
"step": 822
},
{
"epoch": 2.87,
"learning_rate": 9.443099273607749e-07,
"loss": 0.1687,
"step": 823
},
{
"epoch": 2.87,
"learning_rate": 9.200968523002423e-07,
"loss": 0.1741,
"step": 824
},
{
"epoch": 2.87,
"learning_rate": 8.958837772397096e-07,
"loss": 0.1777,
"step": 825
},
{
"epoch": 2.88,
"learning_rate": 8.716707021791768e-07,
"loss": 0.1643,
"step": 826
},
{
"epoch": 2.88,
"learning_rate": 8.474576271186441e-07,
"loss": 0.1763,
"step": 827
},
{
"epoch": 2.89,
"learning_rate": 8.232445520581114e-07,
"loss": 0.1816,
"step": 828
},
{
"epoch": 2.89,
"learning_rate": 7.990314769975788e-07,
"loss": 0.1699,
"step": 829
},
{
"epoch": 2.89,
"learning_rate": 7.748184019370461e-07,
"loss": 0.1619,
"step": 830
},
{
"epoch": 2.9,
"learning_rate": 7.506053268765134e-07,
"loss": 0.1753,
"step": 831
},
{
"epoch": 2.9,
"learning_rate": 7.263922518159806e-07,
"loss": 0.1685,
"step": 832
},
{
"epoch": 2.9,
"learning_rate": 7.02179176755448e-07,
"loss": 0.1716,
"step": 833
},
{
"epoch": 2.91,
"learning_rate": 6.779661016949153e-07,
"loss": 0.168,
"step": 834
},
{
"epoch": 2.91,
"learning_rate": 6.537530266343826e-07,
"loss": 0.1729,
"step": 835
},
{
"epoch": 2.91,
"learning_rate": 6.295399515738499e-07,
"loss": 0.1743,
"step": 836
},
{
"epoch": 2.92,
"learning_rate": 6.053268765133172e-07,
"loss": 0.1646,
"step": 837
},
{
"epoch": 2.92,
"learning_rate": 5.811138014527845e-07,
"loss": 0.1628,
"step": 838
},
{
"epoch": 2.92,
"learning_rate": 5.569007263922519e-07,
"loss": 0.1658,
"step": 839
},
{
"epoch": 2.93,
"learning_rate": 5.326876513317192e-07,
"loss": 0.1746,
"step": 840
}
],
"max_steps": 861,
"num_train_epochs": 3,
"total_flos": 819001106104320.0,
"trial_name": null,
"trial_params": null
}