JennnDexter commited on
Commit
db6cd00
1 Parent(s): 4f1d55d

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +6 -6
  3. eval_results.json +3 -3
  4. train_results.json +3 -3
  5. trainer_state.json +12 -12
README.md CHANGED
@@ -14,7 +14,7 @@ model-index:
14
  name: Masked Language Modeling
15
  type: fill-mask
16
  dataset:
17
- name: wikitext
18
  type: wikitext
19
  config: wikitext-2-raw-v1
20
  split: validation
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7288328898061153
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # mlm
32
 
33
- This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on the wikitext dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.2628
36
- - Accuracy: 0.7288
37
 
38
  ## Model description
39
 
 
14
  name: Masked Language Modeling
15
  type: fill-mask
16
  dataset:
17
+ name: wikitext wikitext-2-raw-v1
18
  type: wikitext
19
  config: wikitext-2-raw-v1
20
  split: validation
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.7255275697753574
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # mlm
32
 
33
+ This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on the wikitext wikitext-2-raw-v1 dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.2799
36
+ - Accuracy: 0.7255
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -2,15 +2,15 @@
2
  "epoch": 3.0,
3
  "eval_accuracy": 0.7255275697753574,
4
  "eval_loss": 1.2798649072647095,
5
- "eval_runtime": 13.3316,
6
  "eval_samples": 496,
7
- "eval_samples_per_second": 37.205,
8
- "eval_steps_per_second": 4.651,
9
  "perplexity": 3.596153878488844,
10
  "total_flos": 3789443078682624.0,
11
  "train_loss": 1.420832945505778,
12
- "train_runtime": 1064.4338,
13
  "train_samples": 4798,
14
- "train_samples_per_second": 13.523,
15
- "train_steps_per_second": 0.423
16
  }
 
2
  "epoch": 3.0,
3
  "eval_accuracy": 0.7255275697753574,
4
  "eval_loss": 1.2798649072647095,
5
+ "eval_runtime": 13.9997,
6
  "eval_samples": 496,
7
+ "eval_samples_per_second": 35.429,
8
+ "eval_steps_per_second": 4.429,
9
  "perplexity": 3.596153878488844,
10
  "total_flos": 3789443078682624.0,
11
  "train_loss": 1.420832945505778,
12
+ "train_runtime": 1162.9561,
13
  "train_samples": 4798,
14
+ "train_samples_per_second": 12.377,
15
+ "train_steps_per_second": 0.387
16
  }
eval_results.json CHANGED
@@ -2,9 +2,9 @@
2
  "epoch": 3.0,
3
  "eval_accuracy": 0.7255275697753574,
4
  "eval_loss": 1.2798649072647095,
5
- "eval_runtime": 13.3316,
6
  "eval_samples": 496,
7
- "eval_samples_per_second": 37.205,
8
- "eval_steps_per_second": 4.651,
9
  "perplexity": 3.596153878488844
10
  }
 
2
  "epoch": 3.0,
3
  "eval_accuracy": 0.7255275697753574,
4
  "eval_loss": 1.2798649072647095,
5
+ "eval_runtime": 13.9997,
6
  "eval_samples": 496,
7
+ "eval_samples_per_second": 35.429,
8
+ "eval_steps_per_second": 4.429,
9
  "perplexity": 3.596153878488844
10
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 3.0,
3
  "total_flos": 3789443078682624.0,
4
  "train_loss": 1.420832945505778,
5
- "train_runtime": 1064.4338,
6
  "train_samples": 4798,
7
- "train_samples_per_second": 13.523,
8
- "train_steps_per_second": 0.423
9
  }
 
2
  "epoch": 3.0,
3
  "total_flos": 3789443078682624.0,
4
  "train_loss": 1.420832945505778,
5
+ "train_runtime": 1162.9561,
6
  "train_samples": 4798,
7
+ "train_samples_per_second": 12.377,
8
+ "train_steps_per_second": 0.387
9
  }
trainer_state.json CHANGED
@@ -102,9 +102,9 @@
102
  "epoch": 1.0,
103
  "eval_accuracy": 0.7289605637620273,
104
  "eval_loss": 1.282206416130066,
105
- "eval_runtime": 13.5027,
106
- "eval_samples_per_second": 36.733,
107
- "eval_steps_per_second": 4.592,
108
  "step": 150
109
  },
110
  {
@@ -201,9 +201,9 @@
201
  "epoch": 2.0,
202
  "eval_accuracy": 0.7273364801078894,
203
  "eval_loss": 1.275496482849121,
204
- "eval_runtime": 13.2216,
205
- "eval_samples_per_second": 37.514,
206
- "eval_steps_per_second": 4.689,
207
  "step": 300
208
  },
209
  {
@@ -300,9 +300,9 @@
300
  "epoch": 3.0,
301
  "eval_accuracy": 0.7288328898061153,
302
  "eval_loss": 1.2627531290054321,
303
- "eval_runtime": 13.2416,
304
- "eval_samples_per_second": 37.458,
305
- "eval_steps_per_second": 4.682,
306
  "step": 450
307
  },
308
  {
@@ -310,9 +310,9 @@
310
  "step": 450,
311
  "total_flos": 3789443078682624.0,
312
  "train_loss": 1.420832945505778,
313
- "train_runtime": 1064.4338,
314
- "train_samples_per_second": 13.523,
315
- "train_steps_per_second": 0.423
316
  }
317
  ],
318
  "logging_steps": 10,
 
102
  "epoch": 1.0,
103
  "eval_accuracy": 0.7289605637620273,
104
  "eval_loss": 1.282206416130066,
105
+ "eval_runtime": 14.3101,
106
+ "eval_samples_per_second": 34.661,
107
+ "eval_steps_per_second": 4.333,
108
  "step": 150
109
  },
110
  {
 
201
  "epoch": 2.0,
202
  "eval_accuracy": 0.7273364801078894,
203
  "eval_loss": 1.275496482849121,
204
+ "eval_runtime": 14.199,
205
+ "eval_samples_per_second": 34.932,
206
+ "eval_steps_per_second": 4.366,
207
  "step": 300
208
  },
209
  {
 
300
  "epoch": 3.0,
301
  "eval_accuracy": 0.7288328898061153,
302
  "eval_loss": 1.2627531290054321,
303
+ "eval_runtime": 14.2737,
304
+ "eval_samples_per_second": 34.749,
305
+ "eval_steps_per_second": 4.344,
306
  "step": 450
307
  },
308
  {
 
310
  "step": 450,
311
  "total_flos": 3789443078682624.0,
312
  "train_loss": 1.420832945505778,
313
+ "train_runtime": 1162.9561,
314
+ "train_samples_per_second": 12.377,
315
+ "train_steps_per_second": 0.387
316
  }
317
  ],
318
  "logging_steps": 10,