Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
scottsuk0306
commited on
Commit
β’
3634025
1
Parent(s):
de13e3a
Update
Browse files
data/bgb-leaderboard-gpt-4-turbo-2024-04-09.csv
CHANGED
@@ -1,30 +1,30 @@
|
|
1 |
Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinement π©,Safety β οΈ,Theory of Mind π€,Tool Usage π οΈ,Multilingual π¬π«,Model π€,Model Params (B),Model Type,Average
|
2 |
-
4.288,4.23,4.271,4.22,4.171,4.565,4.24,3.775,3.6,gpt-4-1106-preview
|
3 |
-
4.3,4.2,4.357,4.16,4.145,4.174,4.26,3.925,3.543,gpt-4-0125-preview
|
4 |
-
4.238,4.26,4.357,4.21,4.079,4.058,4.08,3.85,3.643,gpt-4o-2024-05-13
|
5 |
-
4.312,4.13,4.3,4.2,4.105,4.087,4.12,3.8,3.471,gpt-4-turbo-2024-04-09
|
6 |
-
4.288,4.06,4.186,3.97,3.908,4.536,4.09,3.788,3.571,claude-3-opus-20240229
|
7 |
4.125,4.18,4.186,3.87,3.907,4.014,4.04,3.775,3.314,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.935
|
8 |
-
4.25,3.92,4.171,3.91,3.724,4.362,4.0,3.75,3.186,claude-3-sonnet-20240229
|
9 |
-
4.05,4.04,4.129,4.06,3.671,4.116,4.07,3.488,3.257,gemini-pro-1.5
|
10 |
-
4.138,4.01,4.129,3.69,3.632,4.304,3.98,3.75,3.071,claude-3-haiku-20240307
|
11 |
4.15,4.01,4.229,3.94,3.882,4.043,3.99,3.588,2.771,qwen/qwen-110b-chat,110.0,Chat,3.845
|
12 |
-
3.962,3.94,4.029,3.95,3.776,4.058,3.9,3.862,2.929,mistral-medium
|
13 |
-
4.025,3.99,4.029,3.93,3.776,3.913,3.93,3.825,2.886,mistral-large
|
14 |
-
4.012,4.0,4.0,3.96,3.842,4.087,3.87,3.712,2.714,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,
|
15 |
-
4.138,3.91,3.971,3.92,3.453,4.217,3.96,3.625,2.671,google/gemini-flash-1.5
|
16 |
3.888,3.99,4.029,3.68,3.632,3.957,3.96,3.525,2.914,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.73
|
17 |
-
3.988,4.0,4.186,3.64,3.461,3.971,3.94,3.525,2.757,alpindale/c4ai-command-r-plus-GPTQ,
|
18 |
3.788,3.85,4.029,3.62,3.395,4.217,3.87,3.738,2.714,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.691
|
19 |
4.125,3.94,3.929,3.47,3.507,3.725,3.83,3.5,2.914,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.66
|
20 |
3.725,3.88,3.8,3.81,3.974,4.145,3.9,3.338,1.914,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.609
|
21 |
-
3.688,3.7,3.743,3.5,3.539,4.0,3.49,3.188,,mistral-community/Mixtral-8x22B-v0.1-AWQ,
|
22 |
3.812,4.06,3.957,3.53,3.342,3.739,3.79,3.662,2.557,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.606
|
23 |
3.8,3.84,4.0,3.56,3.547,3.87,3.87,3.562,2.271,Starling-LM-7B-beta,7.0,Chat,3.591
|
24 |
-
3.6,3.84,3.871,3.62,3.373,3.942,3.75,3.125,3.186,gemini-1.0-pro
|
25 |
3.9,3.88,3.6,3.71,3.434,3.812,3.81,3.412,2.714,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.586
|
26 |
-
3.925,3.85,3.843,3.65,3.434,3.884,3.79,3.138,2.614,gpt-3.5-turbo-0125
|
27 |
-
4.025,3.79,3.829,3.51,3.434,4.0,3.67,3.162,2.557,gpt-3.5-turbo-1106
|
28 |
3.812,3.77,3.857,3.42,3.382,3.826,3.9,3.412,2.443,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.536
|
29 |
3.738,3.83,3.914,3.57,3.676,3.884,3.96,3.038,2.186,01-ai/Yi-34B-Chat,34.0,Chat,3.533
|
30 |
3.7,3.89,3.9,3.36,3.421,3.754,3.83,3.612,2.314,allenai/tulu-2-dpo-70b,70.0,Chat,3.531
|
@@ -32,7 +32,7 @@ Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinem
|
|
32 |
3.812,3.88,3.9,3.39,3.447,3.899,3.9,3.188,2.186,CohereForAI/c4ai-command-r-v01,35.0,Chat,3.511
|
33 |
3.712,3.8,3.7,3.82,3.513,3.957,3.83,3.1,1.829,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.473
|
34 |
3.7,3.87,3.8,3.18,3.447,3.826,3.77,3.362,2.286,mistralai/Mistral-7B-Instruct-v0.2,7.0,Chat,3.471
|
35 |
-
3.55,3.62,3.957,3.52,3.618,3.449,3.58,3.288,2.586,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,
|
36 |
3.65,3.78,3.714,3.39,3.461,3.609,3.63,3.538,2.4,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.463
|
37 |
3.712,3.58,3.5,3.3,3.237,3.87,3.59,2.775,,mistralai/Mixtral-8x7B-v0.1,46.7,Base,3.445
|
38 |
3.625,3.9,3.857,3.36,3.263,3.855,3.52,3.2,2.386,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.441
|
|
|
1 |
Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinement π©,Safety β οΈ,Theory of Mind π€,Tool Usage π οΈ,Multilingual π¬π«,Model π€,Model Params (B),Model Type,Average
|
2 |
+
4.288,4.23,4.271,4.22,4.171,4.565,4.24,3.775,3.6,gpt-4-1106-preview,,Proprietary,4.151
|
3 |
+
4.3,4.2,4.357,4.16,4.145,4.174,4.26,3.925,3.543,gpt-4-0125-preview,,Proprietary,4.118
|
4 |
+
4.238,4.26,4.357,4.21,4.079,4.058,4.08,3.85,3.643,gpt-4o-2024-05-13,,Proprietary,4.086
|
5 |
+
4.312,4.13,4.3,4.2,4.105,4.087,4.12,3.8,3.471,gpt-4-turbo-2024-04-09,,Proprietary,4.058
|
6 |
+
4.288,4.06,4.186,3.97,3.908,4.536,4.09,3.788,3.571,claude-3-opus-20240229,,Proprietary,4.044
|
7 |
4.125,4.18,4.186,3.87,3.907,4.014,4.04,3.775,3.314,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.935
|
8 |
+
4.25,3.92,4.171,3.91,3.724,4.362,4.0,3.75,3.186,claude-3-sonnet-20240229,,Proprietary,3.919
|
9 |
+
4.05,4.04,4.129,4.06,3.671,4.116,4.07,3.488,3.257,gemini-pro-1.5,,Proprietary,3.876
|
10 |
+
4.138,4.01,4.129,3.69,3.632,4.304,3.98,3.75,3.071,claude-3-haiku-20240307,,Proprietary,3.856
|
11 |
4.15,4.01,4.229,3.94,3.882,4.043,3.99,3.588,2.771,qwen/qwen-110b-chat,110.0,Chat,3.845
|
12 |
+
3.962,3.94,4.029,3.95,3.776,4.058,3.9,3.862,2.929,mistral-medium,,Proprietary,3.823
|
13 |
+
4.025,3.99,4.029,3.93,3.776,3.913,3.93,3.825,2.886,mistral-large,,Proprietary,3.812
|
14 |
+
4.012,4.0,4.0,3.96,3.842,4.087,3.87,3.712,2.714,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,141,Chat,3.8
|
15 |
+
4.138,3.91,3.971,3.92,3.453,4.217,3.96,3.625,2.671,google/gemini-flash-1.5,,Proprietary,3.763
|
16 |
3.888,3.99,4.029,3.68,3.632,3.957,3.96,3.525,2.914,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.73
|
17 |
+
3.988,4.0,4.186,3.64,3.461,3.971,3.94,3.525,2.757,alpindale/c4ai-command-r-plus-GPTQ,104,Chat,3.719
|
18 |
3.788,3.85,4.029,3.62,3.395,4.217,3.87,3.738,2.714,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.691
|
19 |
4.125,3.94,3.929,3.47,3.507,3.725,3.83,3.5,2.914,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.66
|
20 |
3.725,3.88,3.8,3.81,3.974,4.145,3.9,3.338,1.914,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.609
|
21 |
+
3.688,3.7,3.743,3.5,3.539,4.0,3.49,3.188,,mistral-community/Mixtral-8x22B-v0.1-AWQ,141,Base,3.606
|
22 |
3.812,4.06,3.957,3.53,3.342,3.739,3.79,3.662,2.557,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.606
|
23 |
3.8,3.84,4.0,3.56,3.547,3.87,3.87,3.562,2.271,Starling-LM-7B-beta,7.0,Chat,3.591
|
24 |
+
3.6,3.84,3.871,3.62,3.373,3.942,3.75,3.125,3.186,gemini-1.0-pro,,Proprietary,3.59
|
25 |
3.9,3.88,3.6,3.71,3.434,3.812,3.81,3.412,2.714,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.586
|
26 |
+
3.925,3.85,3.843,3.65,3.434,3.884,3.79,3.138,2.614,gpt-3.5-turbo-0125,,Proprietary,3.57
|
27 |
+
4.025,3.79,3.829,3.51,3.434,4.0,3.67,3.162,2.557,gpt-3.5-turbo-1106,,Proprietary,3.553
|
28 |
3.812,3.77,3.857,3.42,3.382,3.826,3.9,3.412,2.443,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.536
|
29 |
3.738,3.83,3.914,3.57,3.676,3.884,3.96,3.038,2.186,01-ai/Yi-34B-Chat,34.0,Chat,3.533
|
30 |
3.7,3.89,3.9,3.36,3.421,3.754,3.83,3.612,2.314,allenai/tulu-2-dpo-70b,70.0,Chat,3.531
|
|
|
32 |
3.812,3.88,3.9,3.39,3.447,3.899,3.9,3.188,2.186,CohereForAI/c4ai-command-r-v01,35.0,Chat,3.511
|
33 |
3.712,3.8,3.7,3.82,3.513,3.957,3.83,3.1,1.829,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.473
|
34 |
3.7,3.87,3.8,3.18,3.447,3.826,3.77,3.362,2.286,mistralai/Mistral-7B-Instruct-v0.2,7.0,Chat,3.471
|
35 |
+
3.55,3.62,3.957,3.52,3.618,3.449,3.58,3.288,2.586,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,141,Chat,3.463
|
36 |
3.65,3.78,3.714,3.39,3.461,3.609,3.63,3.538,2.4,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.463
|
37 |
3.712,3.58,3.5,3.3,3.237,3.87,3.59,2.775,,mistralai/Mixtral-8x7B-v0.1,46.7,Base,3.445
|
38 |
3.625,3.9,3.857,3.36,3.263,3.855,3.52,3.2,2.386,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.441
|
data/bgb-leaderboard-gpt-4-turbo-2024-04-09.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c03b7873825ad92dee16b8ad4dc5a15d558208b50a4af1e3c8b390c8a1f789b
|
3 |
+
size 13905
|
data/bgb-leaderboard-prometheus-bgb-8x7b-v2.0.csv
CHANGED
@@ -1,32 +1,32 @@
|
|
1 |
Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinement π©,Safety β οΈ,Theory of Mind π€,Tool Usage π οΈ,Multilingual π¬π«,Model π€,Model Params (B),Model Type,Average
|
2 |
-
4.012,4.21,4.029,4.01,4.034,4.449,4.09,3.6,3.429,gpt-4-1106-preview
|
3 |
-
4.175,4.14,4.1,3.98,3.789,4.235,4.06,3.788,3.414,gpt-4o-2024-05-13
|
4 |
-
4.112,4.13,3.929,4.15,4.0,4.145,4.15,3.725,3.329,gpt-4-0125-preview
|
5 |
-
4.112,4.09,3.986,3.92,3.862,4.116,4.06,3.688,3.357,gpt-4-turbo-2024-04-09
|
6 |
-
4.075,3.88,4.157,3.8,3.741,4.435,4.05,3.425,3.357,claude-3-opus-20240229
|
7 |
4.175,3.92,3.971,3.76,3.741,4.029,3.97,3.625,3.114,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.812
|
8 |
4.075,4.03,4.0,3.83,3.776,4.13,3.96,3.325,2.771,qwen/qwen-110b-chat,110.0,Chat,3.766
|
9 |
-
3.862,3.83,3.943,3.84,3.69,4.29,3.86,3.5,3.043,claude-3-sonnet-20240229
|
10 |
-
3.925,3.91,3.843,3.82,3.552,4.116,3.91,3.688,2.971,mistral-medium
|
11 |
-
4.0,3.94,3.957,3.58,3.569,4.275,3.93,3.538,2.871,claude-3-haiku-20240307
|
12 |
-
3.875,3.88,3.871,3.83,3.5,4.145,4.01,3.288,3.1,gemini-pro-1.5
|
13 |
-
3.9,3.83,3.757,3.66,3.638,3.957,3.94,3.712,2.871,mistral-large
|
14 |
-
4.05,3.81,3.743,3.81,3.31,4.145,3.97,3.45,2.729,google/gemini-flash-1.5
|
15 |
-
3.925,4.02,3.857,3.46,3.517,3.928,3.91,3.425,2.829,alpindale/c4ai-command-r-plus-GPTQ,
|
16 |
-
3.812,3.96,3.771,3.6,3.379,4.043,3.84,3.45,2.757,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,
|
17 |
3.712,3.92,3.771,3.53,3.586,4.101,3.92,3.425,2.629,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.622
|
18 |
3.85,3.75,3.814,3.3,3.345,3.928,3.71,3.362,3.043,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.567
|
19 |
3.775,3.86,3.8,3.44,3.534,3.986,3.91,3.325,2.429,Starling-LM-7B-beta,7.0,Chat,3.562
|
20 |
3.65,3.85,3.643,3.55,3.121,4.246,3.8,3.488,2.671,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.558
|
21 |
3.9,3.85,3.486,3.54,3.776,4.232,3.81,3.062,1.971,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.514
|
22 |
3.65,3.89,3.571,3.45,3.138,4.014,3.78,3.2,2.743,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.493
|
23 |
-
3.8,3.86,3.757,3.43,3.259,3.957,3.64,2.988,2.586,gpt-3.5-turbo-0125
|
24 |
-
3.812,3.75,3.714,3.41,3.241,4.087,3.65,3.0,2.586,gpt-3.5-turbo-1106
|
25 |
-
3.562,3.65,3.629,3.48,3.069,3.884,3.74,3.062,2.986,gemini-1.0-pro
|
26 |
3.638,3.8,3.8,3.17,3.155,3.826,3.7,3.5,2.4,allenai/tulu-2-dpo-70b,70.0,Chat,3.443
|
27 |
3.7,3.8,3.586,3.21,3.034,3.826,3.7,3.488,2.586,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.437
|
28 |
3.662,3.84,3.671,3.24,3.155,3.783,3.71,3.338,2.529,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.436
|
29 |
-
3.525,3.59,3.5,3.44,3.207,3.942,3.37,2.762,,mistral-community/Mixtral-8x22B-v0.1-AWQ,
|
30 |
3.612,3.72,3.657,2.98,3.155,4.464,3.79,2.888,2.429,meta-llama/Llama-2-70b-chat-hf,70.0,Chat,3.411
|
31 |
3.462,3.74,3.714,3.27,3.414,4.087,3.81,2.812,2.014,01-ai/Yi-34B-Chat,34.0,Chat,3.369
|
32 |
3.588,3.77,3.614,3.26,3.121,3.884,3.5,3.062,2.486,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.365
|
@@ -35,7 +35,7 @@ Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinem
|
|
35 |
3.688,3.69,3.629,3.16,3.103,3.652,3.59,3.225,2.414,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.35
|
36 |
3.588,3.66,3.471,3.66,3.345,3.942,3.7,2.912,1.814,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.344
|
37 |
3.525,3.76,3.514,3.26,3.31,3.841,3.61,2.888,2.314,openchat/openchat-3.5-0106,7.0,Chat,3.336
|
38 |
-
3.288,3.62,3.686,3.25,3.345,3.551,3.45,3.062,2.543,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,
|
39 |
3.712,3.74,3.5,3.2,2.948,3.942,3.53,2.838,2.129,Starling-LM-7B-alpha,7.0,Chat,3.282
|
40 |
3.4,3.74,3.4,3.04,3.0,3.754,3.71,2.975,2.043,Qwen/Qwen1.5-7B-Chat,7.0,Chat,3.229
|
41 |
3.588,3.7,3.343,2.71,2.862,4.319,3.66,2.512,2.343,meta-llama/Llama-2-13b-chat-hf,13.0,Chat,3.226
|
|
|
1 |
Grounding β‘οΈ,Instruction Following π,Planning π
,Reasoning π‘,Refinement π©,Safety β οΈ,Theory of Mind π€,Tool Usage π οΈ,Multilingual π¬π«,Model π€,Model Params (B),Model Type,Average
|
2 |
+
4.012,4.21,4.029,4.01,4.034,4.449,4.09,3.6,3.429,gpt-4-1106-preview,,Proprietary,3.985
|
3 |
+
4.175,4.14,4.1,3.98,3.789,4.235,4.06,3.788,3.414,gpt-4o-2024-05-13,,Proprietary,3.965
|
4 |
+
4.112,4.13,3.929,4.15,4.0,4.145,4.15,3.725,3.329,gpt-4-0125-preview,,Proprietary,3.963
|
5 |
+
4.112,4.09,3.986,3.92,3.862,4.116,4.06,3.688,3.357,gpt-4-turbo-2024-04-09,,Proprietary,3.91
|
6 |
+
4.075,3.88,4.157,3.8,3.741,4.435,4.05,3.425,3.357,claude-3-opus-20240229,,Proprietary,3.88
|
7 |
4.175,3.92,3.971,3.76,3.741,4.029,3.97,3.625,3.114,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.812
|
8 |
4.075,4.03,4.0,3.83,3.776,4.13,3.96,3.325,2.771,qwen/qwen-110b-chat,110.0,Chat,3.766
|
9 |
+
3.862,3.83,3.943,3.84,3.69,4.29,3.86,3.5,3.043,claude-3-sonnet-20240229,,Proprietary,3.762
|
10 |
+
3.925,3.91,3.843,3.82,3.552,4.116,3.91,3.688,2.971,mistral-medium,,Proprietary,3.748
|
11 |
+
4.0,3.94,3.957,3.58,3.569,4.275,3.93,3.538,2.871,claude-3-haiku-20240307,,Proprietary,3.74
|
12 |
+
3.875,3.88,3.871,3.83,3.5,4.145,4.01,3.288,3.1,gemini-pro-1.5,,Proprietary,3.722
|
13 |
+
3.9,3.83,3.757,3.66,3.638,3.957,3.94,3.712,2.871,mistral-large,,Proprietary,3.696
|
14 |
+
4.05,3.81,3.743,3.81,3.31,4.145,3.97,3.45,2.729,google/gemini-flash-1.5,,Proprietary,3.669
|
15 |
+
3.925,4.02,3.857,3.46,3.517,3.928,3.91,3.425,2.829,alpindale/c4ai-command-r-plus-GPTQ,104,Chat,3.652
|
16 |
+
3.812,3.96,3.771,3.6,3.379,4.043,3.84,3.45,2.757,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,141,Chat,3.624
|
17 |
3.712,3.92,3.771,3.53,3.586,4.101,3.92,3.425,2.629,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.622
|
18 |
3.85,3.75,3.814,3.3,3.345,3.928,3.71,3.362,3.043,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.567
|
19 |
3.775,3.86,3.8,3.44,3.534,3.986,3.91,3.325,2.429,Starling-LM-7B-beta,7.0,Chat,3.562
|
20 |
3.65,3.85,3.643,3.55,3.121,4.246,3.8,3.488,2.671,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.558
|
21 |
3.9,3.85,3.486,3.54,3.776,4.232,3.81,3.062,1.971,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.514
|
22 |
3.65,3.89,3.571,3.45,3.138,4.014,3.78,3.2,2.743,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.493
|
23 |
+
3.8,3.86,3.757,3.43,3.259,3.957,3.64,2.988,2.586,gpt-3.5-turbo-0125,,Proprietary,3.475
|
24 |
+
3.812,3.75,3.714,3.41,3.241,4.087,3.65,3.0,2.586,gpt-3.5-turbo-1106,,Proprietary,3.472
|
25 |
+
3.562,3.65,3.629,3.48,3.069,3.884,3.74,3.062,2.986,gemini-1.0-pro,,Proprietary,3.451
|
26 |
3.638,3.8,3.8,3.17,3.155,3.826,3.7,3.5,2.4,allenai/tulu-2-dpo-70b,70.0,Chat,3.443
|
27 |
3.7,3.8,3.586,3.21,3.034,3.826,3.7,3.488,2.586,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.437
|
28 |
3.662,3.84,3.671,3.24,3.155,3.783,3.71,3.338,2.529,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.436
|
29 |
+
3.525,3.59,3.5,3.44,3.207,3.942,3.37,2.762,,mistral-community/Mixtral-8x22B-v0.1-AWQ,141,Base,3.417
|
30 |
3.612,3.72,3.657,2.98,3.155,4.464,3.79,2.888,2.429,meta-llama/Llama-2-70b-chat-hf,70.0,Chat,3.411
|
31 |
3.462,3.74,3.714,3.27,3.414,4.087,3.81,2.812,2.014,01-ai/Yi-34B-Chat,34.0,Chat,3.369
|
32 |
3.588,3.77,3.614,3.26,3.121,3.884,3.5,3.062,2.486,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.365
|
|
|
35 |
3.688,3.69,3.629,3.16,3.103,3.652,3.59,3.225,2.414,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.35
|
36 |
3.588,3.66,3.471,3.66,3.345,3.942,3.7,2.912,1.814,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.344
|
37 |
3.525,3.76,3.514,3.26,3.31,3.841,3.61,2.888,2.314,openchat/openchat-3.5-0106,7.0,Chat,3.336
|
38 |
+
3.288,3.62,3.686,3.25,3.345,3.551,3.45,3.062,2.543,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,141,Chat,3.31
|
39 |
3.712,3.74,3.5,3.2,2.948,3.942,3.53,2.838,2.129,Starling-LM-7B-alpha,7.0,Chat,3.282
|
40 |
3.4,3.74,3.4,3.04,3.0,3.754,3.71,2.975,2.043,Qwen/Qwen1.5-7B-Chat,7.0,Chat,3.229
|
41 |
3.588,3.7,3.343,2.71,2.862,4.319,3.66,2.512,2.343,meta-llama/Llama-2-13b-chat-hf,13.0,Chat,3.226
|
data/bgb-leaderboard-prometheus-bgb-8x7b-v2.0.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54f03922163c1c21e91e3c3e11d29b02c7218be0b5547c6461e0a1d49f8bd68c
|
3 |
+
size 13905
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
huggingface_hub
|
2 |
-
transformers
|
3 |
gradio
|
4 |
plotly
|
5 |
pandas
|
|
|
1 |
huggingface_hub
|
|
|
2 |
gradio
|
3 |
plotly
|
4 |
pandas
|
src/llm_perf.py
CHANGED
@@ -162,10 +162,10 @@ def get_eval_df(eval_model_name: str):
|
|
162 |
eval_df.drop(columns=["model_name"], inplace=True)
|
163 |
|
164 |
eval_df["model_params"] = eval_df["model_name_or_path"].apply(
|
165 |
-
lambda x: MODEL_MAPPING.get(x, [
|
166 |
)
|
167 |
eval_df["model_type"] = eval_df["model_name_or_path"].apply(
|
168 |
-
lambda x: MODEL_MAPPING.get(x, [
|
169 |
)
|
170 |
|
171 |
capabilities = [
|
|
|
162 |
eval_df.drop(columns=["model_name"], inplace=True)
|
163 |
|
164 |
eval_df["model_params"] = eval_df["model_name_or_path"].apply(
|
165 |
+
lambda x: MODEL_MAPPING.get(x, [None, "Unknown"])[0]
|
166 |
)
|
167 |
eval_df["model_type"] = eval_df["model_name_or_path"].apply(
|
168 |
+
lambda x: MODEL_MAPPING.get(x, [None, "Unknown"])[1]
|
169 |
)
|
170 |
|
171 |
capabilities = [
|
src/model_list.py
CHANGED
@@ -391,32 +391,32 @@ MODEL_MAPPING = {
|
|
391 |
"CohereForAI/c4ai-command-r-v01": [35.0, "Chat"],
|
392 |
"meta-llama/Llama-2-70b-hf": [70.0, "Base"],
|
393 |
"codellama/CodeLlama-70b-hf": [70.0, "Base"],
|
394 |
-
"mistral-community/Mixtral-8x22B-v0.1-AWQ": [
|
395 |
"meta-llama/Meta-Llama-3-70B": [70.0, "Base"],
|
396 |
"Qwen/Qwen1.5-72B": [72.0, "Base"],
|
397 |
"meta-llama/Llama-2-70b-chat-hf": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"],
|
398 |
"codellama/CodeLlama-70b-Instruct-hf": [70.0, "Chat", "codellama/CodeLlama-70b-hf"],
|
399 |
"allenai/tulu-2-dpo-70b": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"],
|
400 |
-
"alpindale/c4ai-command-r-plus-GPTQ": [
|
401 |
"meta-llama/Meta-Llama-3-70B-Instruct": [70.0, "Chat", "meta-llama/Meta-Llama-3-70B"],
|
402 |
-
"MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ": ["
|
403 |
-
"MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ": ["
|
404 |
"Qwen/Qwen1.5-72B-Chat": [72.0, "Chat", "Qwen/Qwen1.5-72B"],
|
405 |
"qwen/qwen-110b-chat": [110.0, "Chat", None],
|
406 |
-
"gpt-3.5-turbo-1106": [
|
407 |
-
"gpt-3.5-turbo-0125": [
|
408 |
-
"gpt-4-1106-preview": [
|
409 |
-
"gpt-4-0125-preview": [
|
410 |
-
"gpt-4-turbo-2024-04-09": [
|
411 |
-
"gpt-4o-2024-05-13": [
|
412 |
-
"mistral-medium": [
|
413 |
-
"mistral-large": [
|
414 |
-
"gemini-1.0-pro": [
|
415 |
-
"gemini-pro-1.5": [
|
416 |
-
"google/gemini-flash-1.5": [
|
417 |
-
"claude-3-haiku-20240307": [
|
418 |
-
"claude-3-sonnet-20240229": [
|
419 |
-
"claude-3-opus-20240229": [
|
420 |
}
|
421 |
|
422 |
|
|
|
391 |
"CohereForAI/c4ai-command-r-v01": [35.0, "Chat"],
|
392 |
"meta-llama/Llama-2-70b-hf": [70.0, "Base"],
|
393 |
"codellama/CodeLlama-70b-hf": [70.0, "Base"],
|
394 |
+
"mistral-community/Mixtral-8x22B-v0.1-AWQ": [141, "Base"],
|
395 |
"meta-llama/Meta-Llama-3-70B": [70.0, "Base"],
|
396 |
"Qwen/Qwen1.5-72B": [72.0, "Base"],
|
397 |
"meta-llama/Llama-2-70b-chat-hf": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"],
|
398 |
"codellama/CodeLlama-70b-Instruct-hf": [70.0, "Chat", "codellama/CodeLlama-70b-hf"],
|
399 |
"allenai/tulu-2-dpo-70b": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"],
|
400 |
+
"alpindale/c4ai-command-r-plus-GPTQ": [104, "Chat"],
|
401 |
"meta-llama/Meta-Llama-3-70B-Instruct": [70.0, "Chat", "meta-llama/Meta-Llama-3-70B"],
|
402 |
+
"MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ": ["141", "Chat", "mistral-community/Mixtral-8x22B-v0.1-AWQ"],
|
403 |
+
"MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ": ["141", "Chat", "mistral-community/Mixtral-8x22B-v0.1-AWQ"],
|
404 |
"Qwen/Qwen1.5-72B-Chat": [72.0, "Chat", "Qwen/Qwen1.5-72B"],
|
405 |
"qwen/qwen-110b-chat": [110.0, "Chat", None],
|
406 |
+
"gpt-3.5-turbo-1106": [None, "Proprietary"],
|
407 |
+
"gpt-3.5-turbo-0125": [None, "Proprietary"],
|
408 |
+
"gpt-4-1106-preview": [None, "Proprietary"],
|
409 |
+
"gpt-4-0125-preview": [None, "Proprietary"],
|
410 |
+
"gpt-4-turbo-2024-04-09": [None, "Proprietary"],
|
411 |
+
"gpt-4o-2024-05-13": [None, "Proprietary"],
|
412 |
+
"mistral-medium": [None, "Proprietary"],
|
413 |
+
"mistral-large": [None, "Proprietary"],
|
414 |
+
"gemini-1.0-pro": [None, "Proprietary"],
|
415 |
+
"gemini-pro-1.5": [None, "Proprietary"],
|
416 |
+
"google/gemini-flash-1.5": [None, "Proprietary"],
|
417 |
+
"claude-3-haiku-20240307": [None, "Proprietary"],
|
418 |
+
"claude-3-sonnet-20240229": [None, "Proprietary"],
|
419 |
+
"claude-3-opus-20240229": [None, "Proprietary"],
|
420 |
}
|
421 |
|
422 |
|