Aratako/reward-test-modernbert
Browse files- README.md +180 -0
- config.json +54 -0
- model.safetensors +3 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +171 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: mit
|
4 |
+
base_model: sbintuitions/modernbert-ja-130m
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
metrics:
|
8 |
+
- pearsonr
|
9 |
+
- spearmanr
|
10 |
+
model-index:
|
11 |
+
- name: test-clf-modernbert
|
12 |
+
results: []
|
13 |
+
---
|
14 |
+
|
15 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
16 |
+
should probably proofread and complete it, then remove this comment. -->
|
17 |
+
|
18 |
+
# test-clf-modernbert
|
19 |
+
|
20 |
+
This model is a fine-tuned version of [sbintuitions/modernbert-ja-130m](https://huggingface.co/sbintuitions/modernbert-ja-130m) on an unknown dataset.
|
21 |
+
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 1.2451
|
23 |
+
- Mae: 0.8403
|
24 |
+
- R2: 0.3130
|
25 |
+
- Pearsonr: 0.5931
|
26 |
+
- Spearmanr: 0.5922
|
27 |
+
|
28 |
+
## Model description
|
29 |
+
|
30 |
+
More information needed
|
31 |
+
|
32 |
+
## Intended uses & limitations
|
33 |
+
|
34 |
+
More information needed
|
35 |
+
|
36 |
+
## Training and evaluation data
|
37 |
+
|
38 |
+
More information needed
|
39 |
+
|
40 |
+
## Training procedure
|
41 |
+
|
42 |
+
### Training hyperparameters
|
43 |
+
|
44 |
+
The following hyperparameters were used during training:
|
45 |
+
- learning_rate: 5e-05
|
46 |
+
- train_batch_size: 8
|
47 |
+
- eval_batch_size: 8
|
48 |
+
- seed: 42
|
49 |
+
- gradient_accumulation_steps: 2
|
50 |
+
- total_train_batch_size: 16
|
51 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
52 |
+
- lr_scheduler_type: cosine_with_min_lr
|
53 |
+
- lr_scheduler_warmup_ratio: 0.1
|
54 |
+
- num_epochs: 5
|
55 |
+
|
56 |
+
### Training results
|
57 |
+
|
58 |
+
| Training Loss | Epoch | Step | Validation Loss | Mae | R2 | Pearsonr | Spearmanr |
|
59 |
+
|:-------------:|:------:|:----:|:---------------:|:------:|:-------:|:--------:|:---------:|
|
60 |
+
| 9.9933 | 0.0440 | 30 | 6.6719 | 2.0008 | -2.2668 | 0.1397 | 0.1452 |
|
61 |
+
| 15.8286 | 0.0880 | 60 | 8.1886 | 2.5934 | -3.0094 | 0.3973 | 0.4013 |
|
62 |
+
| 8.4572 | 0.1320 | 90 | 6.8523 | 2.2707 | -2.3551 | 0.4558 | 0.4569 |
|
63 |
+
| 5.3474 | 0.1760 | 120 | 6.9153 | 2.3853 | -2.3859 | 0.3748 | 0.3991 |
|
64 |
+
| 3.7083 | 0.2199 | 150 | 1.8854 | 1.1120 | 0.0769 | 0.5052 | 0.4925 |
|
65 |
+
| 7.227 | 0.2639 | 180 | 9.4957 | 2.8974 | -3.6494 | 0.5055 | 0.4893 |
|
66 |
+
| 4.7794 | 0.3079 | 210 | 3.2968 | 1.6055 | -0.6142 | 0.5419 | 0.5234 |
|
67 |
+
| 5.8622 | 0.3519 | 240 | 1.6282 | 1.0145 | 0.2028 | 0.4997 | 0.4751 |
|
68 |
+
| 29.3694 | 0.3959 | 270 | 3.1598 | 1.2633 | -0.5471 | 0.4217 | 0.4515 |
|
69 |
+
| 4.8843 | 0.4399 | 300 | 1.9662 | 0.9848 | 0.0373 | 0.5340 | 0.5243 |
|
70 |
+
| 7.2397 | 0.4839 | 330 | 7.8408 | 2.6175 | -2.8391 | 0.5319 | 0.5158 |
|
71 |
+
| 6.8313 | 0.5279 | 360 | 8.7982 | 2.7803 | -3.3078 | 0.5732 | 0.5505 |
|
72 |
+
| 4.3403 | 0.5718 | 390 | 1.4482 | 0.8975 | 0.2909 | 0.5520 | 0.5218 |
|
73 |
+
| 7.2654 | 0.6158 | 420 | 1.5515 | 1.0041 | 0.2403 | 0.5685 | 0.5444 |
|
74 |
+
| 9.5751 | 0.6598 | 450 | 5.0151 | 1.9550 | -1.4555 | 0.5610 | 0.5228 |
|
75 |
+
| 7.2698 | 0.7038 | 480 | 1.7762 | 1.0876 | 0.1303 | 0.5662 | 0.5323 |
|
76 |
+
| 6.6579 | 0.7478 | 510 | 4.4502 | 1.8838 | -1.1790 | 0.5828 | 0.5705 |
|
77 |
+
| 7.2724 | 0.7918 | 540 | 1.8251 | 1.0671 | 0.1064 | 0.3696 | 0.4091 |
|
78 |
+
| 9.4832 | 0.8358 | 570 | 2.6866 | 1.1866 | -0.3155 | 0.5541 | 0.5260 |
|
79 |
+
| 4.5613 | 0.8798 | 600 | 3.3879 | 1.6278 | -0.6588 | 0.5794 | 0.5605 |
|
80 |
+
| 12.3981 | 0.9238 | 630 | 3.0805 | 1.3587 | -0.5083 | 0.5904 | 0.5478 |
|
81 |
+
| 3.9317 | 0.9677 | 660 | 1.6064 | 0.9136 | 0.2135 | 0.5827 | 0.5508 |
|
82 |
+
| 4.8332 | 1.0117 | 690 | 1.5664 | 0.8637 | 0.2330 | 0.5791 | 0.5430 |
|
83 |
+
| 6.857 | 1.0557 | 720 | 5.4549 | 2.0870 | -1.6709 | 0.5417 | 0.5282 |
|
84 |
+
| 3.9584 | 1.0997 | 750 | 1.3481 | 0.8706 | 0.3399 | 0.5957 | 0.5631 |
|
85 |
+
| 8.8648 | 1.1437 | 780 | 1.7614 | 1.1196 | 0.1376 | 0.6047 | 0.5783 |
|
86 |
+
| 2.9532 | 1.1877 | 810 | 1.6326 | 0.9802 | 0.2007 | 0.6132 | 0.5764 |
|
87 |
+
| 2.388 | 1.2317 | 840 | 1.3209 | 0.8682 | 0.3533 | 0.5980 | 0.5618 |
|
88 |
+
| 3.9205 | 1.2757 | 870 | 2.0332 | 1.2101 | 0.0045 | 0.6013 | 0.5713 |
|
89 |
+
| 5.0774 | 1.3196 | 900 | 1.9003 | 0.9672 | 0.0696 | 0.5201 | 0.5102 |
|
90 |
+
| 11.2205 | 1.3636 | 930 | 6.1741 | 2.3082 | -2.0230 | 0.6104 | 0.5664 |
|
91 |
+
| 8.0071 | 1.4076 | 960 | 3.0001 | 1.5480 | -0.4689 | 0.6097 | 0.5837 |
|
92 |
+
| 5.4257 | 1.4516 | 990 | 2.4884 | 1.3051 | -0.2184 | 0.6160 | 0.5836 |
|
93 |
+
| 4.5131 | 1.4956 | 1020 | 2.6897 | 1.4583 | -0.3169 | 0.6051 | 0.5638 |
|
94 |
+
| 4.1723 | 1.5396 | 1050 | 2.0260 | 1.1445 | 0.0080 | 0.6164 | 0.5869 |
|
95 |
+
| 3.0571 | 1.5836 | 1080 | 1.5634 | 1.0075 | 0.2345 | 0.6188 | 0.5816 |
|
96 |
+
| 9.7371 | 1.6276 | 1110 | 1.4136 | 0.8686 | 0.3078 | 0.6051 | 0.5755 |
|
97 |
+
| 5.2573 | 1.6716 | 1140 | 3.5674 | 1.6897 | -0.7467 | 0.6180 | 0.5883 |
|
98 |
+
| 3.9977 | 1.7155 | 1170 | 1.3670 | 0.8469 | 0.3307 | 0.5863 | 0.5811 |
|
99 |
+
| 2.8537 | 1.7595 | 1200 | 1.9676 | 1.1632 | 0.0366 | 0.5905 | 0.5737 |
|
100 |
+
| 3.1709 | 1.8035 | 1230 | 2.9723 | 1.5223 | -0.4553 | 0.5906 | 0.5709 |
|
101 |
+
| 1.4803 | 1.8475 | 1260 | 1.3534 | 0.8380 | 0.3373 | 0.6163 | 0.5936 |
|
102 |
+
| 2.8163 | 1.8915 | 1290 | 2.0147 | 1.2170 | 0.0136 | 0.6023 | 0.5762 |
|
103 |
+
| 2.4021 | 1.9355 | 1320 | 1.7181 | 0.9886 | 0.1588 | 0.6103 | 0.5991 |
|
104 |
+
| 3.3673 | 1.9795 | 1350 | 1.3287 | 0.8533 | 0.3494 | 0.6038 | 0.5811 |
|
105 |
+
| 5.3784 | 2.0235 | 1380 | 2.5721 | 1.3311 | -0.2594 | 0.5930 | 0.5774 |
|
106 |
+
| 1.2611 | 2.0674 | 1410 | 1.4277 | 0.9046 | 0.3010 | 0.6076 | 0.5870 |
|
107 |
+
| 3.9501 | 2.1114 | 1440 | 1.9269 | 1.1472 | 0.0565 | 0.5790 | 0.5814 |
|
108 |
+
| 2.2798 | 2.1554 | 1470 | 2.5371 | 1.3177 | -0.2422 | 0.5710 | 0.5862 |
|
109 |
+
| 3.7578 | 2.1994 | 1500 | 2.5477 | 1.3482 | -0.2474 | 0.5732 | 0.5748 |
|
110 |
+
| 1.984 | 2.2434 | 1530 | 1.6790 | 1.0629 | 0.1779 | 0.6074 | 0.5875 |
|
111 |
+
| 1.6615 | 2.2874 | 1560 | 1.3589 | 0.8942 | 0.3346 | 0.6133 | 0.5887 |
|
112 |
+
| 3.6824 | 2.3314 | 1590 | 1.3974 | 0.8186 | 0.3158 | 0.6202 | 0.5898 |
|
113 |
+
| 5.5223 | 2.3754 | 1620 | 1.5382 | 0.9047 | 0.2469 | 0.6262 | 0.5985 |
|
114 |
+
| 4.4067 | 2.4194 | 1650 | 1.4642 | 0.8964 | 0.2831 | 0.6047 | 0.5854 |
|
115 |
+
| 1.85 | 2.4633 | 1680 | 1.4969 | 0.8974 | 0.2671 | 0.6068 | 0.5953 |
|
116 |
+
| 2.2453 | 2.5073 | 1710 | 1.3792 | 0.8889 | 0.3247 | 0.6238 | 0.5967 |
|
117 |
+
| 1.222 | 2.5513 | 1740 | 1.4123 | 0.8998 | 0.3085 | 0.5980 | 0.5797 |
|
118 |
+
| 3.7706 | 2.5953 | 1770 | 1.8249 | 1.1007 | 0.1065 | 0.6079 | 0.5902 |
|
119 |
+
| 3.4938 | 2.6393 | 1800 | 1.5050 | 0.9715 | 0.2631 | 0.6097 | 0.5893 |
|
120 |
+
| 2.3874 | 2.6833 | 1830 | 1.3709 | 0.8751 | 0.3288 | 0.6151 | 0.5836 |
|
121 |
+
| 4.2677 | 2.7273 | 1860 | 4.1403 | 1.7888 | -1.0272 | 0.5916 | 0.5843 |
|
122 |
+
| 1.5007 | 2.7713 | 1890 | 1.3111 | 0.8293 | 0.3580 | 0.6207 | 0.5978 |
|
123 |
+
| 3.911 | 2.8152 | 1920 | 1.3087 | 0.8516 | 0.3592 | 0.6119 | 0.6000 |
|
124 |
+
| 3.8933 | 2.8592 | 1950 | 2.8415 | 1.4671 | -0.3913 | 0.5876 | 0.5771 |
|
125 |
+
| 2.1403 | 2.9032 | 1980 | 1.4062 | 0.8127 | 0.3115 | 0.6227 | 0.5928 |
|
126 |
+
| 2.1228 | 2.9472 | 2010 | 1.3771 | 0.8911 | 0.3258 | 0.6016 | 0.5892 |
|
127 |
+
| 2.9094 | 2.9912 | 2040 | 1.7354 | 1.0238 | 0.1503 | 0.6079 | 0.5863 |
|
128 |
+
| 1.4657 | 3.0352 | 2070 | 1.4149 | 0.8892 | 0.3072 | 0.5983 | 0.5799 |
|
129 |
+
| 1.7477 | 3.0792 | 2100 | 1.3193 | 0.8589 | 0.3540 | 0.6004 | 0.5704 |
|
130 |
+
| 3.5123 | 3.1232 | 2130 | 1.6118 | 0.9487 | 0.2108 | 0.6201 | 0.5896 |
|
131 |
+
| 1.8096 | 3.1672 | 2160 | 1.7808 | 1.0538 | 0.1281 | 0.6019 | 0.5833 |
|
132 |
+
| 1.5837 | 3.2111 | 2190 | 1.5396 | 0.9507 | 0.2462 | 0.5828 | 0.5674 |
|
133 |
+
| 0.8453 | 3.2551 | 2220 | 1.4974 | 0.9199 | 0.2668 | 0.6007 | 0.5865 |
|
134 |
+
| 1.9732 | 3.2991 | 2250 | 1.6253 | 0.9704 | 0.2042 | 0.5843 | 0.5768 |
|
135 |
+
| 2.0378 | 3.3431 | 2280 | 1.5907 | 0.9785 | 0.2212 | 0.6044 | 0.5839 |
|
136 |
+
| 1.0899 | 3.3871 | 2310 | 1.5984 | 0.9767 | 0.2174 | 0.5932 | 0.5855 |
|
137 |
+
| 0.7862 | 3.4311 | 2340 | 1.8230 | 1.0944 | 0.1074 | 0.6046 | 0.5857 |
|
138 |
+
| 0.5176 | 3.4751 | 2370 | 1.4034 | 0.8694 | 0.3128 | 0.6024 | 0.5793 |
|
139 |
+
| 2.4248 | 3.5191 | 2400 | 1.7378 | 1.0725 | 0.1491 | 0.5870 | 0.5739 |
|
140 |
+
| 1.7691 | 3.5630 | 2430 | 1.4056 | 0.8901 | 0.3118 | 0.6017 | 0.5830 |
|
141 |
+
| 1.4879 | 3.6070 | 2460 | 1.3290 | 0.8434 | 0.3493 | 0.6151 | 0.5981 |
|
142 |
+
| 1.547 | 3.6510 | 2490 | 1.6181 | 1.0174 | 0.2077 | 0.6078 | 0.5895 |
|
143 |
+
| 2.0894 | 3.6950 | 2520 | 1.3512 | 0.8452 | 0.3384 | 0.6066 | 0.5894 |
|
144 |
+
| 1.5556 | 3.7390 | 2550 | 2.0492 | 1.1739 | -0.0033 | 0.5986 | 0.5850 |
|
145 |
+
| 1.3739 | 3.7830 | 2580 | 1.4147 | 0.8854 | 0.3073 | 0.6057 | 0.5929 |
|
146 |
+
| 1.2473 | 3.8270 | 2610 | 1.6034 | 0.9910 | 0.2150 | 0.5994 | 0.5934 |
|
147 |
+
| 1.9761 | 3.8710 | 2640 | 1.4196 | 0.8876 | 0.3049 | 0.5900 | 0.5857 |
|
148 |
+
| 1.8939 | 3.9150 | 2670 | 1.3406 | 0.8412 | 0.3436 | 0.6088 | 0.5962 |
|
149 |
+
| 2.0543 | 3.9589 | 2700 | 1.7193 | 1.0429 | 0.1582 | 0.6008 | 0.5919 |
|
150 |
+
| 0.7404 | 4.0029 | 2730 | 1.5380 | 0.9383 | 0.2470 | 0.6013 | 0.5890 |
|
151 |
+
| 0.5295 | 4.0469 | 2760 | 1.6171 | 0.9787 | 0.2082 | 0.5922 | 0.5839 |
|
152 |
+
| 0.7104 | 4.0909 | 2790 | 1.5018 | 0.9479 | 0.2647 | 0.5907 | 0.5843 |
|
153 |
+
| 0.7016 | 4.1349 | 2820 | 1.4954 | 0.9353 | 0.2678 | 0.5985 | 0.5878 |
|
154 |
+
| 0.3892 | 4.1789 | 2850 | 1.4499 | 0.9028 | 0.2901 | 0.6007 | 0.5888 |
|
155 |
+
| 0.884 | 4.2229 | 2880 | 1.5246 | 0.9554 | 0.2535 | 0.5950 | 0.5878 |
|
156 |
+
| 0.8623 | 4.2669 | 2910 | 1.3712 | 0.8709 | 0.3286 | 0.6059 | 0.5970 |
|
157 |
+
| 0.2444 | 4.3109 | 2940 | 1.6298 | 1.0040 | 0.2020 | 0.6038 | 0.5950 |
|
158 |
+
| 0.834 | 4.3548 | 2970 | 1.4498 | 0.9032 | 0.2901 | 0.6063 | 0.5971 |
|
159 |
+
| 0.7055 | 4.3988 | 3000 | 1.6280 | 0.9841 | 0.2029 | 0.6037 | 0.5946 |
|
160 |
+
| 0.9799 | 4.4428 | 3030 | 1.7397 | 1.0215 | 0.1482 | 0.5993 | 0.5923 |
|
161 |
+
| 0.9547 | 4.4868 | 3060 | 1.4419 | 0.9001 | 0.2940 | 0.6049 | 0.5975 |
|
162 |
+
| 1.7134 | 4.5308 | 3090 | 1.3458 | 0.8483 | 0.3411 | 0.6074 | 0.5992 |
|
163 |
+
| 0.8426 | 4.5748 | 3120 | 1.3720 | 0.8646 | 0.3282 | 0.6031 | 0.5948 |
|
164 |
+
| 0.501 | 4.6188 | 3150 | 1.5110 | 0.9412 | 0.2602 | 0.5960 | 0.5916 |
|
165 |
+
| 0.8421 | 4.6628 | 3180 | 1.5676 | 0.9520 | 0.2325 | 0.5961 | 0.5908 |
|
166 |
+
| 0.7874 | 4.7067 | 3210 | 1.5184 | 0.9517 | 0.2565 | 0.6021 | 0.5939 |
|
167 |
+
| 0.7168 | 4.7507 | 3240 | 1.4734 | 0.9022 | 0.2786 | 0.6048 | 0.5968 |
|
168 |
+
| 0.5451 | 4.7947 | 3270 | 1.4566 | 0.9136 | 0.2868 | 0.6017 | 0.5959 |
|
169 |
+
| 0.3933 | 4.8387 | 3300 | 1.5092 | 0.9213 | 0.2611 | 0.5987 | 0.5912 |
|
170 |
+
| 1.5637 | 4.8827 | 3330 | 1.5144 | 0.9262 | 0.2585 | 0.5989 | 0.5902 |
|
171 |
+
| 0.6051 | 4.9267 | 3360 | 1.5053 | 0.9501 | 0.2630 | 0.5955 | 0.5861 |
|
172 |
+
| 0.0951 | 4.9707 | 3390 | 1.4349 | 0.8909 | 0.2974 | 0.6021 | 0.5913 |
|
173 |
+
|
174 |
+
|
175 |
+
### Framework versions
|
176 |
+
|
177 |
+
- Transformers 4.49.0
|
178 |
+
- Pytorch 2.4.1+cu124
|
179 |
+
- Datasets 3.3.2
|
180 |
+
- Tokenizers 0.21.0
|
config.json
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sbintuitions/modernbert-ja-130m",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "cls",
|
13 |
+
"cls_token_id": 6,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 2,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 512,
|
23 |
+
"id2label": {
|
24 |
+
"0": "LABEL_0"
|
25 |
+
},
|
26 |
+
"initializer_cutoff_factor": 2.0,
|
27 |
+
"initializer_range": 0.02,
|
28 |
+
"intermediate_size": 2048,
|
29 |
+
"label2id": {
|
30 |
+
"LABEL_0": 0
|
31 |
+
},
|
32 |
+
"layer_norm_eps": 1e-05,
|
33 |
+
"local_attention": 128,
|
34 |
+
"local_rope_theta": 10000.0,
|
35 |
+
"max_position_embeddings": 8192,
|
36 |
+
"mlp_bias": false,
|
37 |
+
"mlp_dropout": 0.0,
|
38 |
+
"model_type": "modernbert",
|
39 |
+
"norm_bias": false,
|
40 |
+
"norm_eps": 1e-05,
|
41 |
+
"num_attention_heads": 8,
|
42 |
+
"num_hidden_layers": 19,
|
43 |
+
"pad_token_id": 3,
|
44 |
+
"position_embedding_type": "rope",
|
45 |
+
"problem_type": "regression",
|
46 |
+
"reference_compile": false,
|
47 |
+
"repad_logits_with_grad": false,
|
48 |
+
"sep_token_id": 4,
|
49 |
+
"sparse_pred_ignore_index": -100,
|
50 |
+
"sparse_prediction": false,
|
51 |
+
"torch_dtype": "float32",
|
52 |
+
"transformers_version": "4.49.0",
|
53 |
+
"vocab_size": 102400
|
54 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c65486748db028b86abe425ed3ad8960255a02252408f49503e213695767c4f8
|
3 |
+
size 529627164
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<cls>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "<sep>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
|
3 |
+
size 1831879
|
tokenizer_config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_dummy_prefix_space": false,
|
4 |
+
"add_eos_token": true,
|
5 |
+
"add_prefix_space": false,
|
6 |
+
"added_tokens_decoder": {
|
7 |
+
"0": {
|
8 |
+
"content": "<unk>",
|
9 |
+
"lstrip": false,
|
10 |
+
"normalized": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"single_word": false,
|
13 |
+
"special": true
|
14 |
+
},
|
15 |
+
"1": {
|
16 |
+
"content": "<s>",
|
17 |
+
"lstrip": false,
|
18 |
+
"normalized": false,
|
19 |
+
"rstrip": false,
|
20 |
+
"single_word": false,
|
21 |
+
"special": true
|
22 |
+
},
|
23 |
+
"2": {
|
24 |
+
"content": "</s>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false,
|
29 |
+
"special": true
|
30 |
+
},
|
31 |
+
"3": {
|
32 |
+
"content": "<pad>",
|
33 |
+
"lstrip": false,
|
34 |
+
"normalized": false,
|
35 |
+
"rstrip": false,
|
36 |
+
"single_word": false,
|
37 |
+
"special": true
|
38 |
+
},
|
39 |
+
"4": {
|
40 |
+
"content": "<sep>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": false,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": false,
|
45 |
+
"special": true
|
46 |
+
},
|
47 |
+
"5": {
|
48 |
+
"content": "<mask>",
|
49 |
+
"lstrip": false,
|
50 |
+
"normalized": false,
|
51 |
+
"rstrip": false,
|
52 |
+
"single_word": false,
|
53 |
+
"special": true
|
54 |
+
},
|
55 |
+
"6": {
|
56 |
+
"content": "<cls>",
|
57 |
+
"lstrip": false,
|
58 |
+
"normalized": false,
|
59 |
+
"rstrip": false,
|
60 |
+
"single_word": false,
|
61 |
+
"special": true
|
62 |
+
},
|
63 |
+
"7": {
|
64 |
+
"content": "<|system|>",
|
65 |
+
"lstrip": false,
|
66 |
+
"normalized": false,
|
67 |
+
"rstrip": false,
|
68 |
+
"single_word": false,
|
69 |
+
"special": false
|
70 |
+
},
|
71 |
+
"8": {
|
72 |
+
"content": "<|assistant|>",
|
73 |
+
"lstrip": false,
|
74 |
+
"normalized": false,
|
75 |
+
"rstrip": false,
|
76 |
+
"single_word": false,
|
77 |
+
"special": false
|
78 |
+
},
|
79 |
+
"9": {
|
80 |
+
"content": "<|user|>",
|
81 |
+
"lstrip": false,
|
82 |
+
"normalized": false,
|
83 |
+
"rstrip": false,
|
84 |
+
"single_word": false,
|
85 |
+
"special": false
|
86 |
+
},
|
87 |
+
"10": {
|
88 |
+
"content": "<|available_tools|>",
|
89 |
+
"lstrip": false,
|
90 |
+
"normalized": false,
|
91 |
+
"rstrip": false,
|
92 |
+
"single_word": false,
|
93 |
+
"special": false
|
94 |
+
},
|
95 |
+
"11": {
|
96 |
+
"content": "<|tool_calls|>",
|
97 |
+
"lstrip": false,
|
98 |
+
"normalized": false,
|
99 |
+
"rstrip": false,
|
100 |
+
"single_word": false,
|
101 |
+
"special": false
|
102 |
+
},
|
103 |
+
"12": {
|
104 |
+
"content": "<|tool_results|>",
|
105 |
+
"lstrip": false,
|
106 |
+
"normalized": false,
|
107 |
+
"rstrip": false,
|
108 |
+
"single_word": false,
|
109 |
+
"special": false
|
110 |
+
},
|
111 |
+
"13": {
|
112 |
+
"content": "<|code|>",
|
113 |
+
"lstrip": false,
|
114 |
+
"normalized": false,
|
115 |
+
"rstrip": false,
|
116 |
+
"single_word": false,
|
117 |
+
"special": false
|
118 |
+
},
|
119 |
+
"14": {
|
120 |
+
"content": "<|file|>",
|
121 |
+
"lstrip": false,
|
122 |
+
"normalized": false,
|
123 |
+
"rstrip": false,
|
124 |
+
"single_word": false,
|
125 |
+
"special": false
|
126 |
+
},
|
127 |
+
"102397": {
|
128 |
+
"content": "<|prefix|>",
|
129 |
+
"lstrip": false,
|
130 |
+
"normalized": false,
|
131 |
+
"rstrip": false,
|
132 |
+
"single_word": false,
|
133 |
+
"special": false
|
134 |
+
},
|
135 |
+
"102398": {
|
136 |
+
"content": "<|suffix|>",
|
137 |
+
"lstrip": false,
|
138 |
+
"normalized": false,
|
139 |
+
"rstrip": false,
|
140 |
+
"single_word": false,
|
141 |
+
"special": false
|
142 |
+
},
|
143 |
+
"102399": {
|
144 |
+
"content": "<|middle|>",
|
145 |
+
"lstrip": false,
|
146 |
+
"normalized": false,
|
147 |
+
"rstrip": false,
|
148 |
+
"single_word": false,
|
149 |
+
"special": false
|
150 |
+
}
|
151 |
+
},
|
152 |
+
"bos_token": "<s>",
|
153 |
+
"clean_up_tokenization_spaces": false,
|
154 |
+
"cls_token": "<cls>",
|
155 |
+
"do_lower_case": false,
|
156 |
+
"eos_token": "</s>",
|
157 |
+
"extra_ids": 0,
|
158 |
+
"extra_special_tokens": {},
|
159 |
+
"keep_accents": true,
|
160 |
+
"legacy": false,
|
161 |
+
"mask_token": "<mask>",
|
162 |
+
"model_max_length": 8192,
|
163 |
+
"pad_token": "<pad>",
|
164 |
+
"padding_side": "right",
|
165 |
+
"sep_token": "<sep>",
|
166 |
+
"sp_model_kwargs": {},
|
167 |
+
"spaces_between_special_tokens": false,
|
168 |
+
"tokenizer_class": "LlamaTokenizer",
|
169 |
+
"unk_token": "<unk>",
|
170 |
+
"use_default_system_prompt": false
|
171 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e10608735154be53b0e8de2fb6433029bdf2fee33e1c61eb481219b63da3c3e2
|
3 |
+
size 5368
|