Files changed (1) hide show
  1. README.md +387 -1
README.md CHANGED
@@ -8,4 +8,390 @@ tags:
8
 
9
  Merge of top 7B models with PASS method
10
 
11
- > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  Merge of top 7B models with PASS method
10
 
11
+ > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
12
+
13
+ ## Eval
14
+
15
+ ```python
16
+ {
17
+ "all": {
18
+ "acc": 0.6152059168567449,
19
+ "acc_stderr": 0.031951119145286845,
20
+ "acc_norm": 0.6274010157580394,
21
+ "acc_norm_stderr": 0.032831804892806175,
22
+ "mc1": 0.25091799265605874,
23
+ "mc1_stderr": 0.015176985027707694,
24
+ "mc2": 0.5006656333594469,
25
+ "mc2_stderr": 0.01636490303268174
26
+ },
27
+ "harness|arc:challenge|25": {
28
+ "acc": 0.3447098976109215,
29
+ "acc_stderr": 0.013888816286782112,
30
+ "acc_norm": 0.3856655290102389,
31
+ "acc_norm_stderr": 0.01422425097325717
32
+ },
33
+ "harness|hellaswag|10": {
34
+ "acc": 0.34116709818761204,
35
+ "acc_stderr": 0.004731324409133264,
36
+ "acc_norm": 0.515435172276439,
37
+ "acc_norm_stderr": 0.004987403268345035
38
+ },
39
+ "harness|hendrycksTest-abstract_algebra|5": {
40
+ "acc": 0.27,
41
+ "acc_stderr": 0.04461960433384741,
42
+ "acc_norm": 0.27,
43
+ "acc_norm_stderr": 0.04461960433384741
44
+ },
45
+ "harness|hendrycksTest-anatomy|5": {
46
+ "acc": 0.5703703703703704,
47
+ "acc_stderr": 0.042763494943765995,
48
+ "acc_norm": 0.5703703703703704,
49
+ "acc_norm_stderr": 0.042763494943765995
50
+ },
51
+ "harness|hendrycksTest-astronomy|5": {
52
+ "acc": 0.6842105263157895,
53
+ "acc_stderr": 0.0378272898086547,
54
+ "acc_norm": 0.6842105263157895,
55
+ "acc_norm_stderr": 0.0378272898086547
56
+ },
57
+ "harness|hendrycksTest-business_ethics|5": {
58
+ "acc": 0.62,
59
+ "acc_stderr": 0.048783173121456316,
60
+ "acc_norm": 0.62,
61
+ "acc_norm_stderr": 0.048783173121456316
62
+ },
63
+ "harness|hendrycksTest-clinical_knowledge|5": {
64
+ "acc": 0.7169811320754716,
65
+ "acc_stderr": 0.027724236492700918,
66
+ "acc_norm": 0.7169811320754716,
67
+ "acc_norm_stderr": 0.027724236492700918
68
+ },
69
+ "harness|hendrycksTest-college_biology|5": {
70
+ "acc": 0.7638888888888888,
71
+ "acc_stderr": 0.03551446610810826,
72
+ "acc_norm": 0.7638888888888888,
73
+ "acc_norm_stderr": 0.03551446610810826
74
+ },
75
+ "harness|hendrycksTest-college_chemistry|5": {
76
+ "acc": 0.45,
77
+ "acc_stderr": 0.05,
78
+ "acc_norm": 0.45,
79
+ "acc_norm_stderr": 0.05
80
+ },
81
+ "harness|hendrycksTest-college_computer_science|5": {
82
+ "acc": 0.46,
83
+ "acc_stderr": 0.05009082659620333,
84
+ "acc_norm": 0.46,
85
+ "acc_norm_stderr": 0.05009082659620333
86
+ },
87
+ "harness|hendrycksTest-college_mathematics|5": {
88
+ "acc": 0.32,
89
+ "acc_stderr": 0.04688261722621504,
90
+ "acc_norm": 0.32,
91
+ "acc_norm_stderr": 0.04688261722621504
92
+ },
93
+ "harness|hendrycksTest-college_medicine|5": {
94
+ "acc": 0.6358381502890174,
95
+ "acc_stderr": 0.03669072477416907,
96
+ "acc_norm": 0.6358381502890174,
97
+ "acc_norm_stderr": 0.03669072477416907
98
+ },
99
+ "harness|hendrycksTest-college_physics|5": {
100
+ "acc": 0.4019607843137255,
101
+ "acc_stderr": 0.048786087144669955,
102
+ "acc_norm": 0.4019607843137255,
103
+ "acc_norm_stderr": 0.048786087144669955
104
+ },
105
+ "harness|hendrycksTest-computer_security|5": {
106
+ "acc": 0.76,
107
+ "acc_stderr": 0.04292346959909283,
108
+ "acc_norm": 0.76,
109
+ "acc_norm_stderr": 0.04292346959909283
110
+ },
111
+ "harness|hendrycksTest-conceptual_physics|5": {
112
+ "acc": 0.5446808510638298,
113
+ "acc_stderr": 0.03255525359340355,
114
+ "acc_norm": 0.5446808510638298,
115
+ "acc_norm_stderr": 0.03255525359340355
116
+ },
117
+ "harness|hendrycksTest-econometrics|5": {
118
+ "acc": 0.4824561403508772,
119
+ "acc_stderr": 0.04700708033551038,
120
+ "acc_norm": 0.4824561403508772,
121
+ "acc_norm_stderr": 0.04700708033551038
122
+ },
123
+ "harness|hendrycksTest-electrical_engineering|5": {
124
+ "acc": 0.5172413793103449,
125
+ "acc_stderr": 0.04164188720169375,
126
+ "acc_norm": 0.5172413793103449,
127
+ "acc_norm_stderr": 0.04164188720169375
128
+ },
129
+ "harness|hendrycksTest-elementary_mathematics|5": {
130
+ "acc": 0.42857142857142855,
131
+ "acc_stderr": 0.025487187147859372,
132
+ "acc_norm": 0.42857142857142855,
133
+ "acc_norm_stderr": 0.025487187147859372
134
+ },
135
+ "harness|hendrycksTest-formal_logic|5": {
136
+ "acc": 0.3968253968253968,
137
+ "acc_stderr": 0.043758884927270605,
138
+ "acc_norm": 0.3968253968253968,
139
+ "acc_norm_stderr": 0.043758884927270605
140
+ },
141
+ "harness|hendrycksTest-global_facts|5": {
142
+ "acc": 0.34,
143
+ "acc_stderr": 0.04760952285695236,
144
+ "acc_norm": 0.34,
145
+ "acc_norm_stderr": 0.04760952285695236
146
+ },
147
+ "harness|hendrycksTest-high_school_biology|5": {
148
+ "acc": 0.7741935483870968,
149
+ "acc_stderr": 0.023785577884181015,
150
+ "acc_norm": 0.7741935483870968,
151
+ "acc_norm_stderr": 0.023785577884181015
152
+ },
153
+ "harness|hendrycksTest-high_school_chemistry|5": {
154
+ "acc": 0.5123152709359606,
155
+ "acc_stderr": 0.035169204442208966,
156
+ "acc_norm": 0.5123152709359606,
157
+ "acc_norm_stderr": 0.035169204442208966
158
+ },
159
+ "harness|hendrycksTest-high_school_computer_science|5": {
160
+ "acc": 0.66,
161
+ "acc_stderr": 0.04760952285695237,
162
+ "acc_norm": 0.66,
163
+ "acc_norm_stderr": 0.04760952285695237
164
+ },
165
+ "harness|hendrycksTest-high_school_european_history|5": {
166
+ "acc": 0.7636363636363637,
167
+ "acc_stderr": 0.03317505930009181,
168
+ "acc_norm": 0.7636363636363637,
169
+ "acc_norm_stderr": 0.03317505930009181
170
+ },
171
+ "harness|hendrycksTest-high_school_geography|5": {
172
+ "acc": 0.7373737373737373,
173
+ "acc_stderr": 0.03135305009533085,
174
+ "acc_norm": 0.7373737373737373,
175
+ "acc_norm_stderr": 0.03135305009533085
176
+ },
177
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
178
+ "acc": 0.8808290155440415,
179
+ "acc_stderr": 0.023381935348121437,
180
+ "acc_norm": 0.8808290155440415,
181
+ "acc_norm_stderr": 0.023381935348121437
182
+ },
183
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
184
+ "acc": 0.617948717948718,
185
+ "acc_stderr": 0.024635549163908237,
186
+ "acc_norm": 0.617948717948718,
187
+ "acc_norm_stderr": 0.024635549163908237
188
+ },
189
+ "harness|hendrycksTest-high_school_mathematics|5": {
190
+ "acc": 0.2777777777777778,
191
+ "acc_stderr": 0.027309140588230203,
192
+ "acc_norm": 0.2777777777777778,
193
+ "acc_norm_stderr": 0.027309140588230203
194
+ },
195
+ "harness|hendrycksTest-high_school_microeconomics|5": {
196
+ "acc": 0.6512605042016807,
197
+ "acc_stderr": 0.030956636328566545,
198
+ "acc_norm": 0.6512605042016807,
199
+ "acc_norm_stderr": 0.030956636328566545
200
+ },
201
+ "harness|hendrycksTest-high_school_physics|5": {
202
+ "acc": 0.32450331125827814,
203
+ "acc_stderr": 0.038227469376587525,
204
+ "acc_norm": 0.32450331125827814,
205
+ "acc_norm_stderr": 0.038227469376587525
206
+ },
207
+ "harness|hendrycksTest-high_school_psychology|5": {
208
+ "acc": 0.8440366972477065,
209
+ "acc_stderr": 0.015555802713590158,
210
+ "acc_norm": 0.8440366972477065,
211
+ "acc_norm_stderr": 0.015555802713590158
212
+ },
213
+ "harness|hendrycksTest-high_school_statistics|5": {
214
+ "acc": 0.4722222222222222,
215
+ "acc_stderr": 0.0340470532865388,
216
+ "acc_norm": 0.4722222222222222,
217
+ "acc_norm_stderr": 0.0340470532865388
218
+ },
219
+ "harness|hendrycksTest-high_school_us_history|5": {
220
+ "acc": 0.8431372549019608,
221
+ "acc_stderr": 0.025524722324553346,
222
+ "acc_norm": 0.8431372549019608,
223
+ "acc_norm_stderr": 0.025524722324553346
224
+ },
225
+ "harness|hendrycksTest-high_school_world_history|5": {
226
+ "acc": 0.810126582278481,
227
+ "acc_stderr": 0.025530100460233497,
228
+ "acc_norm": 0.810126582278481,
229
+ "acc_norm_stderr": 0.025530100460233497
230
+ },
231
+ "harness|hendrycksTest-human_aging|5": {
232
+ "acc": 0.7174887892376681,
233
+ "acc_stderr": 0.03021683101150877,
234
+ "acc_norm": 0.7174887892376681,
235
+ "acc_norm_stderr": 0.03021683101150877
236
+ },
237
+ "harness|hendrycksTest-human_sexuality|5": {
238
+ "acc": 0.7786259541984732,
239
+ "acc_stderr": 0.0364129708131373,
240
+ "acc_norm": 0.7786259541984732,
241
+ "acc_norm_stderr": 0.0364129708131373
242
+ },
243
+ "harness|hendrycksTest-international_law|5": {
244
+ "acc": 0.7768595041322314,
245
+ "acc_stderr": 0.03800754475228733,
246
+ "acc_norm": 0.7768595041322314,
247
+ "acc_norm_stderr": 0.03800754475228733
248
+ },
249
+ "harness|hendrycksTest-jurisprudence|5": {
250
+ "acc": 0.8148148148148148,
251
+ "acc_stderr": 0.03755265865037181,
252
+ "acc_norm": 0.8148148148148148,
253
+ "acc_norm_stderr": 0.03755265865037181
254
+ },
255
+ "harness|hendrycksTest-logical_fallacies|5": {
256
+ "acc": 0.7914110429447853,
257
+ "acc_stderr": 0.031921934489347235,
258
+ "acc_norm": 0.7914110429447853,
259
+ "acc_norm_stderr": 0.031921934489347235
260
+ },
261
+ "harness|hendrycksTest-machine_learning|5": {
262
+ "acc": 0.5446428571428571,
263
+ "acc_stderr": 0.04726835553719097,
264
+ "acc_norm": 0.5446428571428571,
265
+ "acc_norm_stderr": 0.04726835553719097
266
+ },
267
+ "harness|hendrycksTest-management|5": {
268
+ "acc": 0.8349514563106796,
269
+ "acc_stderr": 0.036756688322331886,
270
+ "acc_norm": 0.8349514563106796,
271
+ "acc_norm_stderr": 0.036756688322331886
272
+ },
273
+ "harness|hendrycksTest-marketing|5": {
274
+ "acc": 0.8290598290598291,
275
+ "acc_stderr": 0.024662496845209804,
276
+ "acc_norm": 0.8290598290598291,
277
+ "acc_norm_stderr": 0.024662496845209804
278
+ },
279
+ "harness|hendrycksTest-medical_genetics|5": {
280
+ "acc": 0.69,
281
+ "acc_stderr": 0.04648231987117316,
282
+ "acc_norm": 0.69,
283
+ "acc_norm_stderr": 0.04648231987117316
284
+ },
285
+ "harness|hendrycksTest-miscellaneous|5": {
286
+ "acc": 0.8250319284802043,
287
+ "acc_stderr": 0.013586619219903324,
288
+ "acc_norm": 0.8250319284802043,
289
+ "acc_norm_stderr": 0.013586619219903324
290
+ },
291
+ "harness|hendrycksTest-moral_disputes|5": {
292
+ "acc": 0.7283236994219653,
293
+ "acc_stderr": 0.023948512905468348,
294
+ "acc_norm": 0.7283236994219653,
295
+ "acc_norm_stderr": 0.023948512905468348
296
+ },
297
+ "harness|hendrycksTest-moral_scenarios|5": {
298
+ "acc": 0.36312849162011174,
299
+ "acc_stderr": 0.016083749986853704,
300
+ "acc_norm": 0.36312849162011174,
301
+ "acc_norm_stderr": 0.016083749986853704
302
+ },
303
+ "harness|hendrycksTest-nutrition|5": {
304
+ "acc": 0.7450980392156863,
305
+ "acc_stderr": 0.02495418432487991,
306
+ "acc_norm": 0.7450980392156863,
307
+ "acc_norm_stderr": 0.02495418432487991
308
+ },
309
+ "harness|hendrycksTest-philosophy|5": {
310
+ "acc": 0.7202572347266881,
311
+ "acc_stderr": 0.02549425935069491,
312
+ "acc_norm": 0.7202572347266881,
313
+ "acc_norm_stderr": 0.02549425935069491
314
+ },
315
+ "harness|hendrycksTest-prehistory|5": {
316
+ "acc": 0.7530864197530864,
317
+ "acc_stderr": 0.023993501709042114,
318
+ "acc_norm": 0.7530864197530864,
319
+ "acc_norm_stderr": 0.023993501709042114
320
+ },
321
+ "harness|hendrycksTest-professional_accounting|5": {
322
+ "acc": 0.4787234042553192,
323
+ "acc_stderr": 0.029800481645628693,
324
+ "acc_norm": 0.4787234042553192,
325
+ "acc_norm_stderr": 0.029800481645628693
326
+ },
327
+ "harness|hendrycksTest-professional_law|5": {
328
+ "acc": 0.4367666232073012,
329
+ "acc_stderr": 0.01266770191960366,
330
+ "acc_norm": 0.4367666232073012,
331
+ "acc_norm_stderr": 0.01266770191960366
332
+ },
333
+ "harness|hendrycksTest-professional_medicine|5": {
334
+ "acc": 0.6176470588235294,
335
+ "acc_stderr": 0.029520095697687765,
336
+ "acc_norm": 0.6176470588235294,
337
+ "acc_norm_stderr": 0.029520095697687765
338
+ },
339
+ "harness|hendrycksTest-professional_psychology|5": {
340
+ "acc": 0.6699346405228758,
341
+ "acc_stderr": 0.019023726160724553,
342
+ "acc_norm": 0.6699346405228758,
343
+ "acc_norm_stderr": 0.019023726160724553
344
+ },
345
+ "harness|hendrycksTest-public_relations|5": {
346
+ "acc": 0.6545454545454545,
347
+ "acc_stderr": 0.04554619617541054,
348
+ "acc_norm": 0.6545454545454545,
349
+ "acc_norm_stderr": 0.04554619617541054
350
+ },
351
+ "harness|hendrycksTest-security_studies|5": {
352
+ "acc": 0.726530612244898,
353
+ "acc_stderr": 0.028535560337128445,
354
+ "acc_norm": 0.726530612244898,
355
+ "acc_norm_stderr": 0.028535560337128445
356
+ },
357
+ "harness|hendrycksTest-sociology|5": {
358
+ "acc": 0.845771144278607,
359
+ "acc_stderr": 0.025538433368578334,
360
+ "acc_norm": 0.845771144278607,
361
+ "acc_norm_stderr": 0.025538433368578334
362
+ },
363
+ "harness|hendrycksTest-us_foreign_policy|5": {
364
+ "acc": 0.85,
365
+ "acc_stderr": 0.03588702812826371,
366
+ "acc_norm": 0.85,
367
+ "acc_norm_stderr": 0.03588702812826371
368
+ },
369
+ "harness|hendrycksTest-virology|5": {
370
+ "acc": 0.5542168674698795,
371
+ "acc_stderr": 0.03869543323472101,
372
+ "acc_norm": 0.5542168674698795,
373
+ "acc_norm_stderr": 0.03869543323472101
374
+ },
375
+ "harness|hendrycksTest-world_religions|5": {
376
+ "acc": 0.8362573099415205,
377
+ "acc_stderr": 0.028380919596145866,
378
+ "acc_norm": 0.8362573099415205,
379
+ "acc_norm_stderr": 0.028380919596145866
380
+ },
381
+ "harness|truthfulqa:mc|0": {
382
+ "mc1": 0.25091799265605874,
383
+ "mc1_stderr": 0.015176985027707694,
384
+ "mc2": 0.5006656333594469,
385
+ "mc2_stderr": 0.01636490303268174
386
+ },
387
+ "harness|winogrande|5": {
388
+ "acc": 0.7261247040252565,
389
+ "acc_stderr": 0.012533292732620296
390
+ },
391
+ "harness|gsm8k|5": {
392
+ "acc": 0.0,
393
+ "acc_stderr": 0.0
394
+ }
395
+ }
396
+
397
+ ```