Nguyen Tien commited on
Commit
c2f2911
1 Parent(s): 13e9bbb

vuihocrnd/teacher-status-van-tiny-256

Browse files
Files changed (5) hide show
  1. README.md +8 -8
  2. all_results.json +14 -5
  3. eval_results.json +9 -5
  4. train_results.json +6 -6
  5. trainer_state.json +557 -47
README.md CHANGED
@@ -24,13 +24,13 @@ model-index:
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
- value: 0.949438202247191
28
  - name: Recall
29
  type: recall
30
- value: 0.9473684210526315
31
  - name: Precision
32
  type: precision
33
- value: 0.9574468085106383
34
  ---
35
 
36
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -40,11 +40,11 @@ should probably proofread and complete it, then remove this comment. -->
40
 
41
  This model is a fine-tuned version of [Visual-Attention-Network/van-tiny](https://huggingface.co/Visual-Attention-Network/van-tiny) on the imagefolder dataset.
42
  It achieves the following results on the evaluation set:
43
- - Loss: 0.1130
44
- - Accuracy: 0.9494
45
- - F1 Score: 0.9524
46
- - Recall: 0.9474
47
- - Precision: 0.9574
48
 
49
  ## Model description
50
 
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.9831460674157303
28
  - name: Recall
29
  type: recall
30
+ value: 0.9789473684210527
31
  - name: Precision
32
  type: precision
33
+ value: 0.9893617021276596
34
  ---
35
 
36
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
40
 
41
  This model is a fine-tuned version of [Visual-Attention-Network/van-tiny](https://huggingface.co/Visual-Attention-Network/van-tiny) on the imagefolder dataset.
42
  It achieves the following results on the evaluation set:
43
+ - Loss: 0.0988
44
+ - Accuracy: 0.9831
45
+ - F1 Score: 0.9841
46
+ - Recall: 0.9789
47
+ - Precision: 0.9894
48
 
49
  ## Model description
50
 
all_results.json CHANGED
@@ -1,7 +1,16 @@
1
  {
2
- "eval_accuracy": 0.9213483146067416,
3
- "eval_f1_score": 0.9306930693069307,
4
- "eval_loss": 0.21757331490516663,
5
- "eval_precision": 0.9306930693069307,
6
- "eval_recall": 0.9306930693069307
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "epoch": 28.8,
3
+ "eval_accuracy": 0.9831460674157303,
4
+ "eval_f1_score": 0.9841269841269842,
5
+ "eval_loss": 0.09884575009346008,
6
+ "eval_precision": 0.9893617021276596,
7
+ "eval_recall": 0.9789473684210527,
8
+ "eval_runtime": 1.7039,
9
+ "eval_samples_per_second": 104.466,
10
+ "eval_steps_per_second": 3.521,
11
+ "total_flos": 2.0837941191062323e+17,
12
+ "train_loss": 0.3229101174407535,
13
+ "train_runtime": 715.5601,
14
+ "train_samples_per_second": 66.787,
15
+ "train_steps_per_second": 0.503
16
  }
eval_results.json CHANGED
@@ -1,7 +1,11 @@
1
  {
2
- "eval_accuracy": 0.9213483146067416,
3
- "eval_f1_score": 0.9306930693069307,
4
- "eval_loss": 0.21757331490516663,
5
- "eval_precision": 0.9306930693069307,
6
- "eval_recall": 0.9306930693069307
 
 
 
 
7
  }
 
1
  {
2
+ "epoch": 28.8,
3
+ "eval_accuracy": 0.9831460674157303,
4
+ "eval_f1_score": 0.9841269841269842,
5
+ "eval_loss": 0.09884575009346008,
6
+ "eval_precision": 0.9893617021276596,
7
+ "eval_recall": 0.9789473684210527,
8
+ "eval_runtime": 1.7039,
9
+ "eval_samples_per_second": 104.466,
10
+ "eval_steps_per_second": 3.521
11
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.88,
3
- "total_flos": 2.086337325249331e+16,
4
- "train_loss": 0.2572544482019212,
5
- "train_runtime": 70.0705,
6
- "train_samples_per_second": 68.203,
7
- "train_steps_per_second": 0.514
8
  }
 
1
  {
2
+ "epoch": 28.8,
3
+ "total_flos": 2.0837941191062323e+17,
4
+ "train_loss": 0.3229101174407535,
5
+ "train_runtime": 715.5601,
6
+ "train_samples_per_second": 66.787,
7
+ "train_steps_per_second": 0.503
8
  }
trainer_state.json CHANGED
@@ -1,83 +1,593 @@
1
  {
2
- "best_metric": 0.949438202247191,
3
- "best_model_checkpoint": "teacher-status-van-tiny-256/checkpoint-12",
4
- "epoch": 2.88,
5
  "eval_steps": 500,
6
- "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
- "learning_rate": 4.0625000000000005e-05,
14
- "loss": 0.1511,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.96,
19
- "eval_accuracy": 0.949438202247191,
20
- "eval_f1_score": 0.9560975609756097,
21
- "eval_loss": 0.18015821278095245,
22
- "eval_precision": 0.9607843137254902,
23
- "eval_recall": 0.9514563106796117,
24
- "eval_runtime": 1.3097,
25
- "eval_samples_per_second": 135.904,
26
- "eval_steps_per_second": 4.581,
27
  "step": 12
28
  },
29
  {
30
  "epoch": 1.6,
31
- "learning_rate": 2.5e-05,
32
- "loss": 0.2643,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.949438202247191,
38
- "eval_f1_score": 0.9556650246305418,
39
- "eval_loss": 0.16739808022975922,
40
- "eval_precision": 0.97,
41
- "eval_recall": 0.941747572815534,
42
- "eval_runtime": 1.1345,
43
- "eval_samples_per_second": 156.899,
44
- "eval_steps_per_second": 5.289,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 2.4,
49
- "learning_rate": 9.375000000000001e-06,
50
- "loss": 0.3159,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 2.88,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "eval_accuracy": 0.9438202247191011,
56
- "eval_f1_score": 0.9509803921568628,
57
- "eval_loss": 0.16916657984256744,
58
- "eval_precision": 0.9603960396039604,
59
- "eval_recall": 0.941747572815534,
60
- "eval_runtime": 1.0079,
61
- "eval_samples_per_second": 176.605,
62
- "eval_steps_per_second": 5.953,
63
- "step": 36
64
- },
65
- {
66
- "epoch": 2.88,
67
- "step": 36,
68
- "total_flos": 2.086337325249331e+16,
69
- "train_loss": 0.2572544482019212,
70
- "train_runtime": 70.0705,
71
- "train_samples_per_second": 68.203,
72
- "train_steps_per_second": 0.514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  ],
75
  "logging_steps": 10,
76
- "max_steps": 36,
77
  "num_input_tokens_seen": 0,
78
- "num_train_epochs": 3,
79
  "save_steps": 500,
80
- "total_flos": 2.086337325249331e+16,
81
  "train_batch_size": 32,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9841269841269842,
3
+ "best_model_checkpoint": "teacher-status-van-tiny-256/checkpoint-187",
4
+ "epoch": 28.8,
5
  "eval_steps": 500,
6
+ "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
+ "learning_rate": 1.388888888888889e-05,
14
+ "loss": 0.6928,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.96,
19
+ "eval_accuracy": 0.6685393258426966,
20
+ "eval_f1_score": 0.7630522088353413,
21
+ "eval_loss": 0.6904417276382446,
22
+ "eval_precision": 0.6168831168831169,
23
+ "eval_recall": 1.0,
24
+ "eval_runtime": 1.2798,
25
+ "eval_samples_per_second": 139.083,
26
+ "eval_steps_per_second": 4.688,
27
  "step": 12
28
  },
29
  {
30
  "epoch": 1.6,
31
+ "learning_rate": 2.777777777777778e-05,
32
+ "loss": 0.6893,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.5393258426966292,
38
+ "eval_f1_score": 0.698529411764706,
39
+ "eval_loss": 0.6683324575424194,
40
+ "eval_precision": 0.536723163841808,
41
+ "eval_recall": 1.0,
42
+ "eval_runtime": 1.0413,
43
+ "eval_samples_per_second": 170.94,
44
+ "eval_steps_per_second": 5.762,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 2.4,
49
+ "learning_rate": 4.166666666666667e-05,
50
+ "loss": 0.6726,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 2.96,
55
+ "eval_accuracy": 0.5842696629213483,
56
+ "eval_f1_score": 0.7196969696969697,
57
+ "eval_loss": 0.5703710913658142,
58
+ "eval_precision": 0.5621301775147929,
59
+ "eval_recall": 1.0,
60
+ "eval_runtime": 1.3234,
61
+ "eval_samples_per_second": 134.506,
62
+ "eval_steps_per_second": 4.534,
63
+ "step": 37
64
+ },
65
+ {
66
+ "epoch": 3.2,
67
+ "learning_rate": 4.938271604938271e-05,
68
+ "loss": 0.6047,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "learning_rate": 4.783950617283951e-05,
74
+ "loss": 0.5295,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 4.0,
79
+ "eval_accuracy": 0.9213483146067416,
80
+ "eval_f1_score": 0.9263157894736842,
81
+ "eval_loss": 0.4148499667644501,
82
+ "eval_precision": 0.9263157894736842,
83
+ "eval_recall": 0.9263157894736842,
84
+ "eval_runtime": 1.0116,
85
+ "eval_samples_per_second": 175.963,
86
+ "eval_steps_per_second": 5.931,
87
+ "step": 50
88
+ },
89
+ {
90
+ "epoch": 4.8,
91
+ "learning_rate": 4.62962962962963e-05,
92
+ "loss": 0.4745,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 4.96,
97
+ "eval_accuracy": 0.9382022471910112,
98
+ "eval_f1_score": 0.9430051813471502,
99
+ "eval_loss": 0.31081339716911316,
100
+ "eval_precision": 0.9285714285714286,
101
+ "eval_recall": 0.9578947368421052,
102
+ "eval_runtime": 1.3432,
103
+ "eval_samples_per_second": 132.515,
104
+ "eval_steps_per_second": 4.467,
105
+ "step": 62
106
+ },
107
+ {
108
+ "epoch": 5.6,
109
+ "learning_rate": 4.4753086419753084e-05,
110
+ "loss": 0.4206,
111
+ "step": 70
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.9438202247191011,
116
+ "eval_f1_score": 0.9473684210526315,
117
+ "eval_loss": 0.2301165908575058,
118
+ "eval_precision": 0.9473684210526315,
119
+ "eval_recall": 0.9473684210526315,
120
+ "eval_runtime": 1.0463,
121
+ "eval_samples_per_second": 170.129,
122
+ "eval_steps_per_second": 5.735,
123
+ "step": 75
124
+ },
125
+ {
126
+ "epoch": 6.4,
127
+ "learning_rate": 4.3209876543209875e-05,
128
+ "loss": 0.3898,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 6.96,
133
+ "eval_accuracy": 0.949438202247191,
134
+ "eval_f1_score": 0.9518716577540107,
135
+ "eval_loss": 0.1820228397846222,
136
+ "eval_precision": 0.967391304347826,
137
+ "eval_recall": 0.9368421052631579,
138
+ "eval_runtime": 1.0432,
139
+ "eval_samples_per_second": 170.633,
140
+ "eval_steps_per_second": 5.752,
141
+ "step": 87
142
+ },
143
+ {
144
+ "epoch": 7.2,
145
+ "learning_rate": 4.166666666666667e-05,
146
+ "loss": 0.3557,
147
+ "step": 90
148
+ },
149
+ {
150
+ "epoch": 8.0,
151
+ "learning_rate": 4.012345679012346e-05,
152
+ "loss": 0.3153,
153
+ "step": 100
154
+ },
155
+ {
156
+ "epoch": 8.0,
157
+ "eval_accuracy": 0.949438202247191,
158
+ "eval_f1_score": 0.9538461538461539,
159
+ "eval_loss": 0.1545208990573883,
160
+ "eval_precision": 0.93,
161
+ "eval_recall": 0.9789473684210527,
162
+ "eval_runtime": 1.2226,
163
+ "eval_samples_per_second": 145.595,
164
+ "eval_steps_per_second": 4.908,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 8.8,
169
+ "learning_rate": 3.8580246913580246e-05,
170
+ "loss": 0.3077,
171
+ "step": 110
172
+ },
173
+ {
174
+ "epoch": 8.96,
175
+ "eval_accuracy": 0.9606741573033708,
176
+ "eval_f1_score": 0.9621621621621621,
177
+ "eval_loss": 0.1521005630493164,
178
+ "eval_precision": 0.9888888888888889,
179
+ "eval_recall": 0.9368421052631579,
180
+ "eval_runtime": 1.2664,
181
+ "eval_samples_per_second": 140.559,
182
+ "eval_steps_per_second": 4.738,
183
+ "step": 112
184
+ },
185
+ {
186
+ "epoch": 9.6,
187
+ "learning_rate": 3.7037037037037037e-05,
188
+ "loss": 0.3048,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 10.0,
193
+ "eval_accuracy": 0.9606741573033708,
194
+ "eval_f1_score": 0.9625668449197862,
195
+ "eval_loss": 0.133090078830719,
196
+ "eval_precision": 0.9782608695652174,
197
+ "eval_recall": 0.9473684210526315,
198
+ "eval_runtime": 1.032,
199
+ "eval_samples_per_second": 172.478,
200
+ "eval_steps_per_second": 5.814,
201
+ "step": 125
202
+ },
203
+ {
204
+ "epoch": 10.4,
205
+ "learning_rate": 3.5493827160493834e-05,
206
+ "loss": 0.3004,
207
+ "step": 130
208
+ },
209
+ {
210
+ "epoch": 10.96,
211
+ "eval_accuracy": 0.9606741573033708,
212
+ "eval_f1_score": 0.9633507853403142,
213
+ "eval_loss": 0.13144755363464355,
214
+ "eval_precision": 0.9583333333333334,
215
+ "eval_recall": 0.968421052631579,
216
+ "eval_runtime": 1.0224,
217
+ "eval_samples_per_second": 174.103,
218
+ "eval_steps_per_second": 5.869,
219
+ "step": 137
220
+ },
221
+ {
222
+ "epoch": 11.2,
223
+ "learning_rate": 3.395061728395062e-05,
224
+ "loss": 0.2685,
225
+ "step": 140
226
+ },
227
+ {
228
+ "epoch": 12.0,
229
+ "learning_rate": 3.240740740740741e-05,
230
+ "loss": 0.2839,
231
+ "step": 150
232
+ },
233
+ {
234
+ "epoch": 12.0,
235
+ "eval_accuracy": 0.9606741573033708,
236
+ "eval_f1_score": 0.9621621621621621,
237
+ "eval_loss": 0.1272086501121521,
238
+ "eval_precision": 0.9888888888888889,
239
+ "eval_recall": 0.9368421052631579,
240
+ "eval_runtime": 1.0248,
241
+ "eval_samples_per_second": 173.691,
242
+ "eval_steps_per_second": 5.855,
243
+ "step": 150
244
+ },
245
+ {
246
+ "epoch": 12.8,
247
+ "learning_rate": 3.08641975308642e-05,
248
+ "loss": 0.286,
249
+ "step": 160
250
+ },
251
+ {
252
+ "epoch": 12.96,
253
+ "eval_accuracy": 0.9606741573033708,
254
+ "eval_f1_score": 0.9621621621621621,
255
+ "eval_loss": 0.11889710277318954,
256
+ "eval_precision": 0.9888888888888889,
257
+ "eval_recall": 0.9368421052631579,
258
+ "eval_runtime": 1.0343,
259
+ "eval_samples_per_second": 172.102,
260
+ "eval_steps_per_second": 5.801,
261
+ "step": 162
262
+ },
263
+ {
264
+ "epoch": 13.6,
265
+ "learning_rate": 2.9320987654320992e-05,
266
+ "loss": 0.2473,
267
+ "step": 170
268
+ },
269
+ {
270
+ "epoch": 14.0,
271
+ "eval_accuracy": 0.9719101123595506,
272
+ "eval_f1_score": 0.9732620320855614,
273
+ "eval_loss": 0.09774552285671234,
274
+ "eval_precision": 0.9891304347826086,
275
+ "eval_recall": 0.9578947368421052,
276
+ "eval_runtime": 1.2533,
277
+ "eval_samples_per_second": 142.026,
278
+ "eval_steps_per_second": 4.787,
279
+ "step": 175
280
+ },
281
+ {
282
+ "epoch": 14.4,
283
+ "learning_rate": 2.777777777777778e-05,
284
+ "loss": 0.2774,
285
+ "step": 180
286
+ },
287
+ {
288
+ "epoch": 14.96,
289
+ "eval_accuracy": 0.9831460674157303,
290
+ "eval_f1_score": 0.9841269841269842,
291
+ "eval_loss": 0.09884575009346008,
292
+ "eval_precision": 0.9893617021276596,
293
+ "eval_recall": 0.9789473684210527,
294
+ "eval_runtime": 1.2659,
295
+ "eval_samples_per_second": 140.612,
296
+ "eval_steps_per_second": 4.74,
297
+ "step": 187
298
+ },
299
+ {
300
+ "epoch": 15.2,
301
+ "learning_rate": 2.623456790123457e-05,
302
+ "loss": 0.2733,
303
+ "step": 190
304
+ },
305
+ {
306
+ "epoch": 16.0,
307
+ "learning_rate": 2.4691358024691357e-05,
308
+ "loss": 0.2541,
309
+ "step": 200
310
+ },
311
+ {
312
+ "epoch": 16.0,
313
+ "eval_accuracy": 0.9719101123595506,
314
+ "eval_f1_score": 0.9732620320855614,
315
+ "eval_loss": 0.09689934551715851,
316
+ "eval_precision": 0.9891304347826086,
317
+ "eval_recall": 0.9578947368421052,
318
+ "eval_runtime": 1.0329,
319
+ "eval_samples_per_second": 172.331,
320
+ "eval_steps_per_second": 5.809,
321
+ "step": 200
322
+ },
323
+ {
324
+ "epoch": 16.8,
325
+ "learning_rate": 2.314814814814815e-05,
326
+ "loss": 0.2383,
327
+ "step": 210
328
+ },
329
+ {
330
+ "epoch": 16.96,
331
+ "eval_accuracy": 0.9719101123595506,
332
+ "eval_f1_score": 0.9732620320855614,
333
+ "eval_loss": 0.10420462489128113,
334
+ "eval_precision": 0.9891304347826086,
335
+ "eval_recall": 0.9578947368421052,
336
+ "eval_runtime": 1.0218,
337
+ "eval_samples_per_second": 174.204,
338
+ "eval_steps_per_second": 5.872,
339
+ "step": 212
340
+ },
341
+ {
342
+ "epoch": 17.6,
343
+ "learning_rate": 2.1604938271604937e-05,
344
+ "loss": 0.2552,
345
+ "step": 220
346
+ },
347
+ {
348
+ "epoch": 18.0,
349
+ "eval_accuracy": 0.9719101123595506,
350
+ "eval_f1_score": 0.9732620320855614,
351
+ "eval_loss": 0.10807426273822784,
352
+ "eval_precision": 0.9891304347826086,
353
+ "eval_recall": 0.9578947368421052,
354
+ "eval_runtime": 1.0141,
355
+ "eval_samples_per_second": 175.532,
356
+ "eval_steps_per_second": 5.917,
357
+ "step": 225
358
+ },
359
+ {
360
+ "epoch": 18.4,
361
+ "learning_rate": 2.006172839506173e-05,
362
+ "loss": 0.2223,
363
+ "step": 230
364
+ },
365
+ {
366
+ "epoch": 18.96,
367
+ "eval_accuracy": 0.9662921348314607,
368
+ "eval_f1_score": 0.9680851063829788,
369
+ "eval_loss": 0.11498517543077469,
370
+ "eval_precision": 0.978494623655914,
371
+ "eval_recall": 0.9578947368421052,
372
+ "eval_runtime": 1.0007,
373
+ "eval_samples_per_second": 177.868,
374
+ "eval_steps_per_second": 5.996,
375
+ "step": 237
376
+ },
377
+ {
378
+ "epoch": 19.2,
379
+ "learning_rate": 1.8518518518518518e-05,
380
+ "loss": 0.2364,
381
+ "step": 240
382
+ },
383
+ {
384
+ "epoch": 20.0,
385
+ "learning_rate": 1.697530864197531e-05,
386
+ "loss": 0.2561,
387
+ "step": 250
388
+ },
389
+ {
390
+ "epoch": 20.0,
391
+ "eval_accuracy": 0.9550561797752809,
392
+ "eval_f1_score": 0.9574468085106383,
393
+ "eval_loss": 0.12337563186883926,
394
+ "eval_precision": 0.967741935483871,
395
+ "eval_recall": 0.9473684210526315,
396
+ "eval_runtime": 1.025,
397
+ "eval_samples_per_second": 173.653,
398
+ "eval_steps_per_second": 5.853,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 20.8,
403
+ "learning_rate": 1.54320987654321e-05,
404
+ "loss": 0.2462,
405
+ "step": 260
406
+ },
407
+ {
408
+ "epoch": 20.96,
409
+ "eval_accuracy": 0.9606741573033708,
410
+ "eval_f1_score": 0.962962962962963,
411
+ "eval_loss": 0.11782826483249664,
412
+ "eval_precision": 0.9680851063829787,
413
+ "eval_recall": 0.9578947368421052,
414
+ "eval_runtime": 1.277,
415
+ "eval_samples_per_second": 139.389,
416
+ "eval_steps_per_second": 4.698,
417
+ "step": 262
418
+ },
419
+ {
420
+ "epoch": 21.6,
421
+ "learning_rate": 1.388888888888889e-05,
422
+ "loss": 0.2294,
423
+ "step": 270
424
+ },
425
+ {
426
+ "epoch": 22.0,
427
+ "eval_accuracy": 0.9382022471910112,
428
+ "eval_f1_score": 0.9430051813471502,
429
+ "eval_loss": 0.12620893120765686,
430
+ "eval_precision": 0.9285714285714286,
431
+ "eval_recall": 0.9578947368421052,
432
+ "eval_runtime": 1.0278,
433
+ "eval_samples_per_second": 173.184,
434
+ "eval_steps_per_second": 5.838,
435
+ "step": 275
436
+ },
437
+ {
438
+ "epoch": 22.4,
439
+ "learning_rate": 1.2345679012345678e-05,
440
+ "loss": 0.2296,
441
+ "step": 280
442
+ },
443
+ {
444
+ "epoch": 22.96,
445
  "eval_accuracy": 0.9438202247191011,
446
+ "eval_f1_score": 0.9479166666666666,
447
+ "eval_loss": 0.12900537252426147,
448
+ "eval_precision": 0.9381443298969072,
449
+ "eval_recall": 0.9578947368421052,
450
+ "eval_runtime": 1.0136,
451
+ "eval_samples_per_second": 175.614,
452
+ "eval_steps_per_second": 5.92,
453
+ "step": 287
454
+ },
455
+ {
456
+ "epoch": 23.2,
457
+ "learning_rate": 1.0802469135802469e-05,
458
+ "loss": 0.2351,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 24.0,
463
+ "learning_rate": 9.259259259259259e-06,
464
+ "loss": 0.2224,
465
+ "step": 300
466
+ },
467
+ {
468
+ "epoch": 24.0,
469
+ "eval_accuracy": 0.949438202247191,
470
+ "eval_f1_score": 0.9528795811518324,
471
+ "eval_loss": 0.1152968481183052,
472
+ "eval_precision": 0.9479166666666666,
473
+ "eval_recall": 0.9578947368421052,
474
+ "eval_runtime": 1.0182,
475
+ "eval_samples_per_second": 174.822,
476
+ "eval_steps_per_second": 5.893,
477
+ "step": 300
478
+ },
479
+ {
480
+ "epoch": 24.8,
481
+ "learning_rate": 7.71604938271605e-06,
482
+ "loss": 0.2205,
483
+ "step": 310
484
+ },
485
+ {
486
+ "epoch": 24.96,
487
+ "eval_accuracy": 0.949438202247191,
488
+ "eval_f1_score": 0.9528795811518324,
489
+ "eval_loss": 0.11498025059700012,
490
+ "eval_precision": 0.9479166666666666,
491
+ "eval_recall": 0.9578947368421052,
492
+ "eval_runtime": 1.0221,
493
+ "eval_samples_per_second": 174.15,
494
+ "eval_steps_per_second": 5.87,
495
+ "step": 312
496
+ },
497
+ {
498
+ "epoch": 25.6,
499
+ "learning_rate": 6.172839506172839e-06,
500
+ "loss": 0.2169,
501
+ "step": 320
502
+ },
503
+ {
504
+ "epoch": 26.0,
505
+ "eval_accuracy": 0.9550561797752809,
506
+ "eval_f1_score": 0.9574468085106383,
507
+ "eval_loss": 0.1120525524020195,
508
+ "eval_precision": 0.967741935483871,
509
+ "eval_recall": 0.9473684210526315,
510
+ "eval_runtime": 1.0268,
511
+ "eval_samples_per_second": 173.35,
512
+ "eval_steps_per_second": 5.843,
513
+ "step": 325
514
+ },
515
+ {
516
+ "epoch": 26.4,
517
+ "learning_rate": 4.6296296296296296e-06,
518
+ "loss": 0.2212,
519
+ "step": 330
520
+ },
521
+ {
522
+ "epoch": 26.96,
523
+ "eval_accuracy": 0.949438202247191,
524
+ "eval_f1_score": 0.9528795811518324,
525
+ "eval_loss": 0.11452833563089371,
526
+ "eval_precision": 0.9479166666666666,
527
+ "eval_recall": 0.9578947368421052,
528
+ "eval_runtime": 1.951,
529
+ "eval_samples_per_second": 91.236,
530
+ "eval_steps_per_second": 3.075,
531
+ "step": 337
532
+ },
533
+ {
534
+ "epoch": 27.2,
535
+ "learning_rate": 3.0864197530864196e-06,
536
+ "loss": 0.2265,
537
+ "step": 340
538
+ },
539
+ {
540
+ "epoch": 28.0,
541
+ "learning_rate": 1.5432098765432098e-06,
542
+ "loss": 0.2188,
543
+ "step": 350
544
+ },
545
+ {
546
+ "epoch": 28.0,
547
+ "eval_accuracy": 0.949438202247191,
548
+ "eval_f1_score": 0.9523809523809523,
549
+ "eval_loss": 0.11309711635112762,
550
+ "eval_precision": 0.9574468085106383,
551
+ "eval_recall": 0.9473684210526315,
552
+ "eval_runtime": 1.0255,
553
+ "eval_samples_per_second": 173.568,
554
+ "eval_steps_per_second": 5.851,
555
+ "step": 350
556
+ },
557
+ {
558
+ "epoch": 28.8,
559
+ "learning_rate": 0.0,
560
+ "loss": 0.2015,
561
+ "step": 360
562
+ },
563
+ {
564
+ "epoch": 28.8,
565
+ "eval_accuracy": 0.949438202247191,
566
+ "eval_f1_score": 0.9523809523809523,
567
+ "eval_loss": 0.11295311897993088,
568
+ "eval_precision": 0.9574468085106383,
569
+ "eval_recall": 0.9473684210526315,
570
+ "eval_runtime": 1.3178,
571
+ "eval_samples_per_second": 135.073,
572
+ "eval_steps_per_second": 4.553,
573
+ "step": 360
574
+ },
575
+ {
576
+ "epoch": 28.8,
577
+ "step": 360,
578
+ "total_flos": 2.0837941191062323e+17,
579
+ "train_loss": 0.3229101174407535,
580
+ "train_runtime": 715.5601,
581
+ "train_samples_per_second": 66.787,
582
+ "train_steps_per_second": 0.503
583
  }
584
  ],
585
  "logging_steps": 10,
586
+ "max_steps": 360,
587
  "num_input_tokens_seen": 0,
588
+ "num_train_epochs": 30,
589
  "save_steps": 500,
590
+ "total_flos": 2.0837941191062323e+17,
591
  "train_batch_size": 32,
592
  "trial_name": null,
593
  "trial_params": null