Edelweisse commited on
Commit
2c726f2
·
verified ·
1 Parent(s): 92d3fbd

Edelweisse/Sentiment-Bpjs

Browse files
Files changed (5) hide show
  1. README.md +67 -0
  2. config.json +37 -0
  3. model.safetensors +3 -0
  4. trainer_state.json +433 -0
  5. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ - f1
9
+ model-index:
10
+ - name: results
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # results
18
+
19
+ This model is a fine-tuned version of [ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa](https://huggingface.co/ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.4952
22
+ - Accuracy: 0.8351
23
+ - F1: 0.8359
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 3e-05
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 64
45
+ - seed: 42
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - lr_scheduler_warmup_steps: 200
49
+ - num_epochs: 5
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 |
54
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|
55
+ | 0.6964 | 1.0 | 73 | 0.5906 | 0.8110 | 0.8152 |
56
+ | 0.6407 | 2.0 | 146 | 0.4614 | 0.8007 | 0.8035 |
57
+ | 0.418 | 3.0 | 219 | 0.4952 | 0.8351 | 0.8359 |
58
+ | 0.1811 | 4.0 | 292 | 0.5943 | 0.8110 | 0.8114 |
59
+ | 0.1383 | 5.0 | 365 | 0.6963 | 0.8110 | 0.8121 |
60
+
61
+
62
+ ### Framework versions
63
+
64
+ - Transformers 4.41.1
65
+ - Pytorch 2.3.0+cu118
66
+ - Datasets 2.19.1
67
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.3,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.3,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Positive",
14
+ "1": "Neutral",
15
+ "2": "Negative"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "Negative": 2,
21
+ "Neutral": 1,
22
+ "Positive": 0
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "single_label_classification",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.41.1",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 32000
37
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9497c6351d4ee6aa1bdd31c7912c8e1825912070b994c6dd0c0f2ca501c50b
3
+ size 442502140
trainer_state.json ADDED
@@ -0,0 +1,433 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8897849462365591,
3
+ "best_model_checkpoint": "./results/checkpoint-465",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 465,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10752688172043011,
13
+ "grad_norm": 77.31194305419922,
14
+ "learning_rate": 1.5e-06,
15
+ "loss": 1.0774,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.21505376344086022,
20
+ "grad_norm": 21.743606567382812,
21
+ "learning_rate": 3e-06,
22
+ "loss": 1.205,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.3225806451612903,
27
+ "grad_norm": 46.53227233886719,
28
+ "learning_rate": 4.5e-06,
29
+ "loss": 0.9877,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.43010752688172044,
34
+ "grad_norm": 60.25495147705078,
35
+ "learning_rate": 6e-06,
36
+ "loss": 0.7626,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.5376344086021505,
41
+ "grad_norm": 42.291969299316406,
42
+ "learning_rate": 7.5e-06,
43
+ "loss": 0.6559,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.6451612903225806,
48
+ "grad_norm": 27.09429931640625,
49
+ "learning_rate": 9e-06,
50
+ "loss": 0.3539,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.7526881720430108,
55
+ "grad_norm": 29.117910385131836,
56
+ "learning_rate": 1.05e-05,
57
+ "loss": 0.516,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.8602150537634409,
62
+ "grad_norm": 35.026817321777344,
63
+ "learning_rate": 1.2e-05,
64
+ "loss": 0.5457,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.967741935483871,
69
+ "grad_norm": 17.851289749145508,
70
+ "learning_rate": 1.3500000000000001e-05,
71
+ "loss": 0.5821,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "eval_accuracy": 0.8655913978494624,
77
+ "eval_f1": 0.8661634583786485,
78
+ "eval_loss": 0.3687117397785187,
79
+ "eval_runtime": 1.6159,
80
+ "eval_samples_per_second": 230.212,
81
+ "eval_steps_per_second": 3.713,
82
+ "step": 93
83
+ },
84
+ {
85
+ "epoch": 1.075268817204301,
86
+ "grad_norm": 13.748136520385742,
87
+ "learning_rate": 1.5e-05,
88
+ "loss": 0.307,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 1.1827956989247312,
93
+ "grad_norm": 3.2135958671569824,
94
+ "learning_rate": 1.65e-05,
95
+ "loss": 0.4692,
96
+ "step": 110
97
+ },
98
+ {
99
+ "epoch": 1.2903225806451613,
100
+ "grad_norm": 11.485154151916504,
101
+ "learning_rate": 1.8e-05,
102
+ "loss": 0.3206,
103
+ "step": 120
104
+ },
105
+ {
106
+ "epoch": 1.3978494623655915,
107
+ "grad_norm": 7.797122478485107,
108
+ "learning_rate": 1.95e-05,
109
+ "loss": 0.3138,
110
+ "step": 130
111
+ },
112
+ {
113
+ "epoch": 1.5053763440860215,
114
+ "grad_norm": 6.313217639923096,
115
+ "learning_rate": 2.1e-05,
116
+ "loss": 0.3135,
117
+ "step": 140
118
+ },
119
+ {
120
+ "epoch": 1.6129032258064515,
121
+ "grad_norm": 18.319936752319336,
122
+ "learning_rate": 2.25e-05,
123
+ "loss": 0.2156,
124
+ "step": 150
125
+ },
126
+ {
127
+ "epoch": 1.7204301075268817,
128
+ "grad_norm": 21.939163208007812,
129
+ "learning_rate": 2.4e-05,
130
+ "loss": 0.2749,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.827956989247312,
135
+ "grad_norm": 20.2148380279541,
136
+ "learning_rate": 2.55e-05,
137
+ "loss": 0.4502,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.935483870967742,
142
+ "grad_norm": 22.044946670532227,
143
+ "learning_rate": 2.7000000000000002e-05,
144
+ "loss": 0.364,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 2.0,
149
+ "eval_accuracy": 0.8763440860215054,
150
+ "eval_f1": 0.8742605847498066,
151
+ "eval_loss": 0.34187108278274536,
152
+ "eval_runtime": 1.6404,
153
+ "eval_samples_per_second": 226.774,
154
+ "eval_steps_per_second": 3.658,
155
+ "step": 186
156
+ },
157
+ {
158
+ "epoch": 2.043010752688172,
159
+ "grad_norm": 1.5786309242248535,
160
+ "learning_rate": 2.8499999999999998e-05,
161
+ "loss": 0.251,
162
+ "step": 190
163
+ },
164
+ {
165
+ "epoch": 2.150537634408602,
166
+ "grad_norm": 0.3100162446498871,
167
+ "learning_rate": 3e-05,
168
+ "loss": 0.0491,
169
+ "step": 200
170
+ },
171
+ {
172
+ "epoch": 2.258064516129032,
173
+ "grad_norm": 10.301804542541504,
174
+ "learning_rate": 2.8867924528301887e-05,
175
+ "loss": 0.0978,
176
+ "step": 210
177
+ },
178
+ {
179
+ "epoch": 2.3655913978494625,
180
+ "grad_norm": 0.5391497015953064,
181
+ "learning_rate": 2.7735849056603773e-05,
182
+ "loss": 0.2325,
183
+ "step": 220
184
+ },
185
+ {
186
+ "epoch": 2.4731182795698925,
187
+ "grad_norm": 33.04327392578125,
188
+ "learning_rate": 2.6603773584905663e-05,
189
+ "loss": 0.3053,
190
+ "step": 230
191
+ },
192
+ {
193
+ "epoch": 2.5806451612903225,
194
+ "grad_norm": 1.8449115753173828,
195
+ "learning_rate": 2.547169811320755e-05,
196
+ "loss": 0.1469,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 2.688172043010753,
201
+ "grad_norm": 41.86624526977539,
202
+ "learning_rate": 2.4339622641509435e-05,
203
+ "loss": 0.0985,
204
+ "step": 250
205
+ },
206
+ {
207
+ "epoch": 2.795698924731183,
208
+ "grad_norm": 12.837655067443848,
209
+ "learning_rate": 2.320754716981132e-05,
210
+ "loss": 0.1707,
211
+ "step": 260
212
+ },
213
+ {
214
+ "epoch": 2.903225806451613,
215
+ "grad_norm": 7.691096305847168,
216
+ "learning_rate": 2.2075471698113208e-05,
217
+ "loss": 0.1889,
218
+ "step": 270
219
+ },
220
+ {
221
+ "epoch": 3.0,
222
+ "eval_accuracy": 0.8548387096774194,
223
+ "eval_f1": 0.8514191596326964,
224
+ "eval_loss": 0.5420617461204529,
225
+ "eval_runtime": 1.6403,
226
+ "eval_samples_per_second": 226.793,
227
+ "eval_steps_per_second": 3.658,
228
+ "step": 279
229
+ },
230
+ {
231
+ "epoch": 3.010752688172043,
232
+ "grad_norm": 24.820016860961914,
233
+ "learning_rate": 2.0943396226415094e-05,
234
+ "loss": 0.2798,
235
+ "step": 280
236
+ },
237
+ {
238
+ "epoch": 3.118279569892473,
239
+ "grad_norm": 0.1231377124786377,
240
+ "learning_rate": 1.981132075471698e-05,
241
+ "loss": 0.1421,
242
+ "step": 290
243
+ },
244
+ {
245
+ "epoch": 3.225806451612903,
246
+ "grad_norm": 22.27264404296875,
247
+ "learning_rate": 1.8679245283018867e-05,
248
+ "loss": 0.1455,
249
+ "step": 300
250
+ },
251
+ {
252
+ "epoch": 3.3333333333333335,
253
+ "grad_norm": 43.31319046020508,
254
+ "learning_rate": 1.7547169811320753e-05,
255
+ "loss": 0.1223,
256
+ "step": 310
257
+ },
258
+ {
259
+ "epoch": 3.4408602150537635,
260
+ "grad_norm": 0.16742512583732605,
261
+ "learning_rate": 1.6415094339622643e-05,
262
+ "loss": 0.0214,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 3.5483870967741935,
267
+ "grad_norm": 44.84831619262695,
268
+ "learning_rate": 1.528301886792453e-05,
269
+ "loss": 0.0278,
270
+ "step": 330
271
+ },
272
+ {
273
+ "epoch": 3.6559139784946235,
274
+ "grad_norm": 16.63416862487793,
275
+ "learning_rate": 1.4150943396226415e-05,
276
+ "loss": 0.0238,
277
+ "step": 340
278
+ },
279
+ {
280
+ "epoch": 3.763440860215054,
281
+ "grad_norm": 0.019003387540578842,
282
+ "learning_rate": 1.3018867924528303e-05,
283
+ "loss": 0.0788,
284
+ "step": 350
285
+ },
286
+ {
287
+ "epoch": 3.870967741935484,
288
+ "grad_norm": 0.062045346945524216,
289
+ "learning_rate": 1.188679245283019e-05,
290
+ "loss": 0.0247,
291
+ "step": 360
292
+ },
293
+ {
294
+ "epoch": 3.978494623655914,
295
+ "grad_norm": 36.90345001220703,
296
+ "learning_rate": 1.0754716981132076e-05,
297
+ "loss": 0.1049,
298
+ "step": 370
299
+ },
300
+ {
301
+ "epoch": 4.0,
302
+ "eval_accuracy": 0.8844086021505376,
303
+ "eval_f1": 0.8851071788381587,
304
+ "eval_loss": 0.5284830927848816,
305
+ "eval_runtime": 1.6712,
306
+ "eval_samples_per_second": 222.589,
307
+ "eval_steps_per_second": 3.59,
308
+ "step": 372
309
+ },
310
+ {
311
+ "epoch": 4.086021505376344,
312
+ "grad_norm": 0.04606785252690315,
313
+ "learning_rate": 9.622641509433962e-06,
314
+ "loss": 0.0563,
315
+ "step": 380
316
+ },
317
+ {
318
+ "epoch": 4.193548387096774,
319
+ "grad_norm": 0.11379247903823853,
320
+ "learning_rate": 8.49056603773585e-06,
321
+ "loss": 0.0007,
322
+ "step": 390
323
+ },
324
+ {
325
+ "epoch": 4.301075268817204,
326
+ "grad_norm": 0.057167768478393555,
327
+ "learning_rate": 7.358490566037736e-06,
328
+ "loss": 0.0008,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 4.408602150537634,
333
+ "grad_norm": 1.8930367231369019,
334
+ "learning_rate": 6.226415094339623e-06,
335
+ "loss": 0.0016,
336
+ "step": 410
337
+ },
338
+ {
339
+ "epoch": 4.516129032258064,
340
+ "grad_norm": 0.10506568104028702,
341
+ "learning_rate": 5.094339622641509e-06,
342
+ "loss": 0.0183,
343
+ "step": 420
344
+ },
345
+ {
346
+ "epoch": 4.623655913978495,
347
+ "grad_norm": 0.29419633746147156,
348
+ "learning_rate": 3.962264150943396e-06,
349
+ "loss": 0.0379,
350
+ "step": 430
351
+ },
352
+ {
353
+ "epoch": 4.731182795698925,
354
+ "grad_norm": 1.4433554410934448,
355
+ "learning_rate": 2.830188679245283e-06,
356
+ "loss": 0.0011,
357
+ "step": 440
358
+ },
359
+ {
360
+ "epoch": 4.838709677419355,
361
+ "grad_norm": 0.05199088156223297,
362
+ "learning_rate": 1.69811320754717e-06,
363
+ "loss": 0.0276,
364
+ "step": 450
365
+ },
366
+ {
367
+ "epoch": 4.946236559139785,
368
+ "grad_norm": 0.017505839467048645,
369
+ "learning_rate": 5.660377358490566e-07,
370
+ "loss": 0.0009,
371
+ "step": 460
372
+ },
373
+ {
374
+ "epoch": 5.0,
375
+ "eval_accuracy": 0.8897849462365591,
376
+ "eval_f1": 0.8902518525126154,
377
+ "eval_loss": 0.518719494342804,
378
+ "eval_runtime": 1.6655,
379
+ "eval_samples_per_second": 223.351,
380
+ "eval_steps_per_second": 3.602,
381
+ "step": 465
382
+ },
383
+ {
384
+ "epoch": 5.0,
385
+ "step": 465,
386
+ "total_flos": 312883992936900.0,
387
+ "train_loss": 0.26607618075144546,
388
+ "train_runtime": 161.9779,
389
+ "train_samples_per_second": 45.84,
390
+ "train_steps_per_second": 2.871
391
+ },
392
+ {
393
+ "epoch": 5.0,
394
+ "eval_accuracy": 0.8897849462365591,
395
+ "eval_f1": 0.8902518525126154,
396
+ "eval_loss": 0.518719494342804,
397
+ "eval_runtime": 1.6415,
398
+ "eval_samples_per_second": 226.619,
399
+ "eval_steps_per_second": 3.655,
400
+ "step": 465
401
+ }
402
+ ],
403
+ "logging_steps": 10,
404
+ "max_steps": 465,
405
+ "num_input_tokens_seen": 0,
406
+ "num_train_epochs": 5,
407
+ "save_steps": 500,
408
+ "stateful_callbacks": {
409
+ "EarlyStoppingCallback": {
410
+ "args": {
411
+ "early_stopping_patience": 3,
412
+ "early_stopping_threshold": 0.0
413
+ },
414
+ "attributes": {
415
+ "early_stopping_patience_counter": 0
416
+ }
417
+ },
418
+ "TrainerControl": {
419
+ "args": {
420
+ "should_epoch_stop": false,
421
+ "should_evaluate": false,
422
+ "should_log": false,
423
+ "should_save": true,
424
+ "should_training_stop": true
425
+ },
426
+ "attributes": {}
427
+ }
428
+ },
429
+ "total_flos": 312883992936900.0,
430
+ "train_batch_size": 16,
431
+ "trial_name": null,
432
+ "trial_params": null
433
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d00dd1bd5aac8d5a91f4faf8efeec9a6ea4285ea80b5b6b5863e5b6cd5ae5d
3
+ size 5048