Aratako commited on
Commit
946125f
·
verified ·
1 Parent(s): b44f76f

Aratako/reward-test-modernbert

Browse files
README.md ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ base_model: sbintuitions/modernbert-ja-130m
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - pearsonr
9
+ - spearmanr
10
+ model-index:
11
+ - name: test-clf-modernbert
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # test-clf-modernbert
19
+
20
+ This model is a fine-tuned version of [sbintuitions/modernbert-ja-130m](https://huggingface.co/sbintuitions/modernbert-ja-130m) on an unknown dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 1.2451
23
+ - Mae: 0.8403
24
+ - R2: 0.3130
25
+ - Pearsonr: 0.5931
26
+ - Spearmanr: 0.5922
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 2
50
+ - total_train_batch_size: 16
51
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
+ - lr_scheduler_type: cosine_with_min_lr
53
+ - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 5
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Mae | R2 | Pearsonr | Spearmanr |
59
+ |:-------------:|:------:|:----:|:---------------:|:------:|:-------:|:--------:|:---------:|
60
+ | 9.9933 | 0.0440 | 30 | 6.6719 | 2.0008 | -2.2668 | 0.1397 | 0.1452 |
61
+ | 15.8286 | 0.0880 | 60 | 8.1886 | 2.5934 | -3.0094 | 0.3973 | 0.4013 |
62
+ | 8.4572 | 0.1320 | 90 | 6.8523 | 2.2707 | -2.3551 | 0.4558 | 0.4569 |
63
+ | 5.3474 | 0.1760 | 120 | 6.9153 | 2.3853 | -2.3859 | 0.3748 | 0.3991 |
64
+ | 3.7083 | 0.2199 | 150 | 1.8854 | 1.1120 | 0.0769 | 0.5052 | 0.4925 |
65
+ | 7.227 | 0.2639 | 180 | 9.4957 | 2.8974 | -3.6494 | 0.5055 | 0.4893 |
66
+ | 4.7794 | 0.3079 | 210 | 3.2968 | 1.6055 | -0.6142 | 0.5419 | 0.5234 |
67
+ | 5.8622 | 0.3519 | 240 | 1.6282 | 1.0145 | 0.2028 | 0.4997 | 0.4751 |
68
+ | 29.3694 | 0.3959 | 270 | 3.1598 | 1.2633 | -0.5471 | 0.4217 | 0.4515 |
69
+ | 4.8843 | 0.4399 | 300 | 1.9662 | 0.9848 | 0.0373 | 0.5340 | 0.5243 |
70
+ | 7.2397 | 0.4839 | 330 | 7.8408 | 2.6175 | -2.8391 | 0.5319 | 0.5158 |
71
+ | 6.8313 | 0.5279 | 360 | 8.7982 | 2.7803 | -3.3078 | 0.5732 | 0.5505 |
72
+ | 4.3403 | 0.5718 | 390 | 1.4482 | 0.8975 | 0.2909 | 0.5520 | 0.5218 |
73
+ | 7.2654 | 0.6158 | 420 | 1.5515 | 1.0041 | 0.2403 | 0.5685 | 0.5444 |
74
+ | 9.5751 | 0.6598 | 450 | 5.0151 | 1.9550 | -1.4555 | 0.5610 | 0.5228 |
75
+ | 7.2698 | 0.7038 | 480 | 1.7762 | 1.0876 | 0.1303 | 0.5662 | 0.5323 |
76
+ | 6.6579 | 0.7478 | 510 | 4.4502 | 1.8838 | -1.1790 | 0.5828 | 0.5705 |
77
+ | 7.2724 | 0.7918 | 540 | 1.8251 | 1.0671 | 0.1064 | 0.3696 | 0.4091 |
78
+ | 9.4832 | 0.8358 | 570 | 2.6866 | 1.1866 | -0.3155 | 0.5541 | 0.5260 |
79
+ | 4.5613 | 0.8798 | 600 | 3.3879 | 1.6278 | -0.6588 | 0.5794 | 0.5605 |
80
+ | 12.3981 | 0.9238 | 630 | 3.0805 | 1.3587 | -0.5083 | 0.5904 | 0.5478 |
81
+ | 3.9317 | 0.9677 | 660 | 1.6064 | 0.9136 | 0.2135 | 0.5827 | 0.5508 |
82
+ | 4.8332 | 1.0117 | 690 | 1.5664 | 0.8637 | 0.2330 | 0.5791 | 0.5430 |
83
+ | 6.857 | 1.0557 | 720 | 5.4549 | 2.0870 | -1.6709 | 0.5417 | 0.5282 |
84
+ | 3.9584 | 1.0997 | 750 | 1.3481 | 0.8706 | 0.3399 | 0.5957 | 0.5631 |
85
+ | 8.8648 | 1.1437 | 780 | 1.7614 | 1.1196 | 0.1376 | 0.6047 | 0.5783 |
86
+ | 2.9532 | 1.1877 | 810 | 1.6326 | 0.9802 | 0.2007 | 0.6132 | 0.5764 |
87
+ | 2.388 | 1.2317 | 840 | 1.3209 | 0.8682 | 0.3533 | 0.5980 | 0.5618 |
88
+ | 3.9205 | 1.2757 | 870 | 2.0332 | 1.2101 | 0.0045 | 0.6013 | 0.5713 |
89
+ | 5.0774 | 1.3196 | 900 | 1.9003 | 0.9672 | 0.0696 | 0.5201 | 0.5102 |
90
+ | 11.2205 | 1.3636 | 930 | 6.1741 | 2.3082 | -2.0230 | 0.6104 | 0.5664 |
91
+ | 8.0071 | 1.4076 | 960 | 3.0001 | 1.5480 | -0.4689 | 0.6097 | 0.5837 |
92
+ | 5.4257 | 1.4516 | 990 | 2.4884 | 1.3051 | -0.2184 | 0.6160 | 0.5836 |
93
+ | 4.5131 | 1.4956 | 1020 | 2.6897 | 1.4583 | -0.3169 | 0.6051 | 0.5638 |
94
+ | 4.1723 | 1.5396 | 1050 | 2.0260 | 1.1445 | 0.0080 | 0.6164 | 0.5869 |
95
+ | 3.0571 | 1.5836 | 1080 | 1.5634 | 1.0075 | 0.2345 | 0.6188 | 0.5816 |
96
+ | 9.7371 | 1.6276 | 1110 | 1.4136 | 0.8686 | 0.3078 | 0.6051 | 0.5755 |
97
+ | 5.2573 | 1.6716 | 1140 | 3.5674 | 1.6897 | -0.7467 | 0.6180 | 0.5883 |
98
+ | 3.9977 | 1.7155 | 1170 | 1.3670 | 0.8469 | 0.3307 | 0.5863 | 0.5811 |
99
+ | 2.8537 | 1.7595 | 1200 | 1.9676 | 1.1632 | 0.0366 | 0.5905 | 0.5737 |
100
+ | 3.1709 | 1.8035 | 1230 | 2.9723 | 1.5223 | -0.4553 | 0.5906 | 0.5709 |
101
+ | 1.4803 | 1.8475 | 1260 | 1.3534 | 0.8380 | 0.3373 | 0.6163 | 0.5936 |
102
+ | 2.8163 | 1.8915 | 1290 | 2.0147 | 1.2170 | 0.0136 | 0.6023 | 0.5762 |
103
+ | 2.4021 | 1.9355 | 1320 | 1.7181 | 0.9886 | 0.1588 | 0.6103 | 0.5991 |
104
+ | 3.3673 | 1.9795 | 1350 | 1.3287 | 0.8533 | 0.3494 | 0.6038 | 0.5811 |
105
+ | 5.3784 | 2.0235 | 1380 | 2.5721 | 1.3311 | -0.2594 | 0.5930 | 0.5774 |
106
+ | 1.2611 | 2.0674 | 1410 | 1.4277 | 0.9046 | 0.3010 | 0.6076 | 0.5870 |
107
+ | 3.9501 | 2.1114 | 1440 | 1.9269 | 1.1472 | 0.0565 | 0.5790 | 0.5814 |
108
+ | 2.2798 | 2.1554 | 1470 | 2.5371 | 1.3177 | -0.2422 | 0.5710 | 0.5862 |
109
+ | 3.7578 | 2.1994 | 1500 | 2.5477 | 1.3482 | -0.2474 | 0.5732 | 0.5748 |
110
+ | 1.984 | 2.2434 | 1530 | 1.6790 | 1.0629 | 0.1779 | 0.6074 | 0.5875 |
111
+ | 1.6615 | 2.2874 | 1560 | 1.3589 | 0.8942 | 0.3346 | 0.6133 | 0.5887 |
112
+ | 3.6824 | 2.3314 | 1590 | 1.3974 | 0.8186 | 0.3158 | 0.6202 | 0.5898 |
113
+ | 5.5223 | 2.3754 | 1620 | 1.5382 | 0.9047 | 0.2469 | 0.6262 | 0.5985 |
114
+ | 4.4067 | 2.4194 | 1650 | 1.4642 | 0.8964 | 0.2831 | 0.6047 | 0.5854 |
115
+ | 1.85 | 2.4633 | 1680 | 1.4969 | 0.8974 | 0.2671 | 0.6068 | 0.5953 |
116
+ | 2.2453 | 2.5073 | 1710 | 1.3792 | 0.8889 | 0.3247 | 0.6238 | 0.5967 |
117
+ | 1.222 | 2.5513 | 1740 | 1.4123 | 0.8998 | 0.3085 | 0.5980 | 0.5797 |
118
+ | 3.7706 | 2.5953 | 1770 | 1.8249 | 1.1007 | 0.1065 | 0.6079 | 0.5902 |
119
+ | 3.4938 | 2.6393 | 1800 | 1.5050 | 0.9715 | 0.2631 | 0.6097 | 0.5893 |
120
+ | 2.3874 | 2.6833 | 1830 | 1.3709 | 0.8751 | 0.3288 | 0.6151 | 0.5836 |
121
+ | 4.2677 | 2.7273 | 1860 | 4.1403 | 1.7888 | -1.0272 | 0.5916 | 0.5843 |
122
+ | 1.5007 | 2.7713 | 1890 | 1.3111 | 0.8293 | 0.3580 | 0.6207 | 0.5978 |
123
+ | 3.911 | 2.8152 | 1920 | 1.3087 | 0.8516 | 0.3592 | 0.6119 | 0.6000 |
124
+ | 3.8933 | 2.8592 | 1950 | 2.8415 | 1.4671 | -0.3913 | 0.5876 | 0.5771 |
125
+ | 2.1403 | 2.9032 | 1980 | 1.4062 | 0.8127 | 0.3115 | 0.6227 | 0.5928 |
126
+ | 2.1228 | 2.9472 | 2010 | 1.3771 | 0.8911 | 0.3258 | 0.6016 | 0.5892 |
127
+ | 2.9094 | 2.9912 | 2040 | 1.7354 | 1.0238 | 0.1503 | 0.6079 | 0.5863 |
128
+ | 1.4657 | 3.0352 | 2070 | 1.4149 | 0.8892 | 0.3072 | 0.5983 | 0.5799 |
129
+ | 1.7477 | 3.0792 | 2100 | 1.3193 | 0.8589 | 0.3540 | 0.6004 | 0.5704 |
130
+ | 3.5123 | 3.1232 | 2130 | 1.6118 | 0.9487 | 0.2108 | 0.6201 | 0.5896 |
131
+ | 1.8096 | 3.1672 | 2160 | 1.7808 | 1.0538 | 0.1281 | 0.6019 | 0.5833 |
132
+ | 1.5837 | 3.2111 | 2190 | 1.5396 | 0.9507 | 0.2462 | 0.5828 | 0.5674 |
133
+ | 0.8453 | 3.2551 | 2220 | 1.4974 | 0.9199 | 0.2668 | 0.6007 | 0.5865 |
134
+ | 1.9732 | 3.2991 | 2250 | 1.6253 | 0.9704 | 0.2042 | 0.5843 | 0.5768 |
135
+ | 2.0378 | 3.3431 | 2280 | 1.5907 | 0.9785 | 0.2212 | 0.6044 | 0.5839 |
136
+ | 1.0899 | 3.3871 | 2310 | 1.5984 | 0.9767 | 0.2174 | 0.5932 | 0.5855 |
137
+ | 0.7862 | 3.4311 | 2340 | 1.8230 | 1.0944 | 0.1074 | 0.6046 | 0.5857 |
138
+ | 0.5176 | 3.4751 | 2370 | 1.4034 | 0.8694 | 0.3128 | 0.6024 | 0.5793 |
139
+ | 2.4248 | 3.5191 | 2400 | 1.7378 | 1.0725 | 0.1491 | 0.5870 | 0.5739 |
140
+ | 1.7691 | 3.5630 | 2430 | 1.4056 | 0.8901 | 0.3118 | 0.6017 | 0.5830 |
141
+ | 1.4879 | 3.6070 | 2460 | 1.3290 | 0.8434 | 0.3493 | 0.6151 | 0.5981 |
142
+ | 1.547 | 3.6510 | 2490 | 1.6181 | 1.0174 | 0.2077 | 0.6078 | 0.5895 |
143
+ | 2.0894 | 3.6950 | 2520 | 1.3512 | 0.8452 | 0.3384 | 0.6066 | 0.5894 |
144
+ | 1.5556 | 3.7390 | 2550 | 2.0492 | 1.1739 | -0.0033 | 0.5986 | 0.5850 |
145
+ | 1.3739 | 3.7830 | 2580 | 1.4147 | 0.8854 | 0.3073 | 0.6057 | 0.5929 |
146
+ | 1.2473 | 3.8270 | 2610 | 1.6034 | 0.9910 | 0.2150 | 0.5994 | 0.5934 |
147
+ | 1.9761 | 3.8710 | 2640 | 1.4196 | 0.8876 | 0.3049 | 0.5900 | 0.5857 |
148
+ | 1.8939 | 3.9150 | 2670 | 1.3406 | 0.8412 | 0.3436 | 0.6088 | 0.5962 |
149
+ | 2.0543 | 3.9589 | 2700 | 1.7193 | 1.0429 | 0.1582 | 0.6008 | 0.5919 |
150
+ | 0.7404 | 4.0029 | 2730 | 1.5380 | 0.9383 | 0.2470 | 0.6013 | 0.5890 |
151
+ | 0.5295 | 4.0469 | 2760 | 1.6171 | 0.9787 | 0.2082 | 0.5922 | 0.5839 |
152
+ | 0.7104 | 4.0909 | 2790 | 1.5018 | 0.9479 | 0.2647 | 0.5907 | 0.5843 |
153
+ | 0.7016 | 4.1349 | 2820 | 1.4954 | 0.9353 | 0.2678 | 0.5985 | 0.5878 |
154
+ | 0.3892 | 4.1789 | 2850 | 1.4499 | 0.9028 | 0.2901 | 0.6007 | 0.5888 |
155
+ | 0.884 | 4.2229 | 2880 | 1.5246 | 0.9554 | 0.2535 | 0.5950 | 0.5878 |
156
+ | 0.8623 | 4.2669 | 2910 | 1.3712 | 0.8709 | 0.3286 | 0.6059 | 0.5970 |
157
+ | 0.2444 | 4.3109 | 2940 | 1.6298 | 1.0040 | 0.2020 | 0.6038 | 0.5950 |
158
+ | 0.834 | 4.3548 | 2970 | 1.4498 | 0.9032 | 0.2901 | 0.6063 | 0.5971 |
159
+ | 0.7055 | 4.3988 | 3000 | 1.6280 | 0.9841 | 0.2029 | 0.6037 | 0.5946 |
160
+ | 0.9799 | 4.4428 | 3030 | 1.7397 | 1.0215 | 0.1482 | 0.5993 | 0.5923 |
161
+ | 0.9547 | 4.4868 | 3060 | 1.4419 | 0.9001 | 0.2940 | 0.6049 | 0.5975 |
162
+ | 1.7134 | 4.5308 | 3090 | 1.3458 | 0.8483 | 0.3411 | 0.6074 | 0.5992 |
163
+ | 0.8426 | 4.5748 | 3120 | 1.3720 | 0.8646 | 0.3282 | 0.6031 | 0.5948 |
164
+ | 0.501 | 4.6188 | 3150 | 1.5110 | 0.9412 | 0.2602 | 0.5960 | 0.5916 |
165
+ | 0.8421 | 4.6628 | 3180 | 1.5676 | 0.9520 | 0.2325 | 0.5961 | 0.5908 |
166
+ | 0.7874 | 4.7067 | 3210 | 1.5184 | 0.9517 | 0.2565 | 0.6021 | 0.5939 |
167
+ | 0.7168 | 4.7507 | 3240 | 1.4734 | 0.9022 | 0.2786 | 0.6048 | 0.5968 |
168
+ | 0.5451 | 4.7947 | 3270 | 1.4566 | 0.9136 | 0.2868 | 0.6017 | 0.5959 |
169
+ | 0.3933 | 4.8387 | 3300 | 1.5092 | 0.9213 | 0.2611 | 0.5987 | 0.5912 |
170
+ | 1.5637 | 4.8827 | 3330 | 1.5144 | 0.9262 | 0.2585 | 0.5989 | 0.5902 |
171
+ | 0.6051 | 4.9267 | 3360 | 1.5053 | 0.9501 | 0.2630 | 0.5955 | 0.5861 |
172
+ | 0.0951 | 4.9707 | 3390 | 1.4349 | 0.8909 | 0.2974 | 0.6021 | 0.5913 |
173
+
174
+
175
+ ### Framework versions
176
+
177
+ - Transformers 4.49.0
178
+ - Pytorch 2.4.1+cu124
179
+ - Datasets 3.3.2
180
+ - Tokenizers 0.21.0
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sbintuitions/modernbert-ja-130m",
3
+ "architectures": [
4
+ "ModernBertForSequenceClassification"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "cls",
13
+ "cls_token_id": 6,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 2,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 512,
23
+ "id2label": {
24
+ "0": "LABEL_0"
25
+ },
26
+ "initializer_cutoff_factor": 2.0,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 2048,
29
+ "label2id": {
30
+ "LABEL_0": 0
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "local_attention": 128,
34
+ "local_rope_theta": 10000.0,
35
+ "max_position_embeddings": 8192,
36
+ "mlp_bias": false,
37
+ "mlp_dropout": 0.0,
38
+ "model_type": "modernbert",
39
+ "norm_bias": false,
40
+ "norm_eps": 1e-05,
41
+ "num_attention_heads": 8,
42
+ "num_hidden_layers": 19,
43
+ "pad_token_id": 3,
44
+ "position_embedding_type": "rope",
45
+ "problem_type": "regression",
46
+ "reference_compile": false,
47
+ "repad_logits_with_grad": false,
48
+ "sep_token_id": 4,
49
+ "sparse_pred_ignore_index": -100,
50
+ "sparse_prediction": false,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.49.0",
53
+ "vocab_size": 102400
54
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65486748db028b86abe425ed3ad8960255a02252408f49503e213695767c4f8
3
+ size 529627164
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<cls>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "<sep>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
3
+ size 1831879
tokenizer_config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_dummy_prefix_space": false,
4
+ "add_eos_token": true,
5
+ "add_prefix_space": false,
6
+ "added_tokens_decoder": {
7
+ "0": {
8
+ "content": "<unk>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false,
13
+ "special": true
14
+ },
15
+ "1": {
16
+ "content": "<s>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false,
21
+ "special": true
22
+ },
23
+ "2": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false,
29
+ "special": true
30
+ },
31
+ "3": {
32
+ "content": "<pad>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false,
37
+ "special": true
38
+ },
39
+ "4": {
40
+ "content": "<sep>",
41
+ "lstrip": false,
42
+ "normalized": false,
43
+ "rstrip": false,
44
+ "single_word": false,
45
+ "special": true
46
+ },
47
+ "5": {
48
+ "content": "<mask>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false,
53
+ "special": true
54
+ },
55
+ "6": {
56
+ "content": "<cls>",
57
+ "lstrip": false,
58
+ "normalized": false,
59
+ "rstrip": false,
60
+ "single_word": false,
61
+ "special": true
62
+ },
63
+ "7": {
64
+ "content": "<|system|>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false,
69
+ "special": false
70
+ },
71
+ "8": {
72
+ "content": "<|assistant|>",
73
+ "lstrip": false,
74
+ "normalized": false,
75
+ "rstrip": false,
76
+ "single_word": false,
77
+ "special": false
78
+ },
79
+ "9": {
80
+ "content": "<|user|>",
81
+ "lstrip": false,
82
+ "normalized": false,
83
+ "rstrip": false,
84
+ "single_word": false,
85
+ "special": false
86
+ },
87
+ "10": {
88
+ "content": "<|available_tools|>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false,
93
+ "special": false
94
+ },
95
+ "11": {
96
+ "content": "<|tool_calls|>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false,
101
+ "special": false
102
+ },
103
+ "12": {
104
+ "content": "<|tool_results|>",
105
+ "lstrip": false,
106
+ "normalized": false,
107
+ "rstrip": false,
108
+ "single_word": false,
109
+ "special": false
110
+ },
111
+ "13": {
112
+ "content": "<|code|>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false,
117
+ "special": false
118
+ },
119
+ "14": {
120
+ "content": "<|file|>",
121
+ "lstrip": false,
122
+ "normalized": false,
123
+ "rstrip": false,
124
+ "single_word": false,
125
+ "special": false
126
+ },
127
+ "102397": {
128
+ "content": "<|prefix|>",
129
+ "lstrip": false,
130
+ "normalized": false,
131
+ "rstrip": false,
132
+ "single_word": false,
133
+ "special": false
134
+ },
135
+ "102398": {
136
+ "content": "<|suffix|>",
137
+ "lstrip": false,
138
+ "normalized": false,
139
+ "rstrip": false,
140
+ "single_word": false,
141
+ "special": false
142
+ },
143
+ "102399": {
144
+ "content": "<|middle|>",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false,
149
+ "special": false
150
+ }
151
+ },
152
+ "bos_token": "<s>",
153
+ "clean_up_tokenization_spaces": false,
154
+ "cls_token": "<cls>",
155
+ "do_lower_case": false,
156
+ "eos_token": "</s>",
157
+ "extra_ids": 0,
158
+ "extra_special_tokens": {},
159
+ "keep_accents": true,
160
+ "legacy": false,
161
+ "mask_token": "<mask>",
162
+ "model_max_length": 8192,
163
+ "pad_token": "<pad>",
164
+ "padding_side": "right",
165
+ "sep_token": "<sep>",
166
+ "sp_model_kwargs": {},
167
+ "spaces_between_special_tokens": false,
168
+ "tokenizer_class": "LlamaTokenizer",
169
+ "unk_token": "<unk>",
170
+ "use_default_system_prompt": false
171
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10608735154be53b0e8de2fb6433029bdf2fee33e1c61eb481219b63da3c3e2
3
+ size 5368