cccjc commited on
Commit
6a59158
·
1 Parent(s): 2acbd8f

move all constants out of utils.py

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. constants.py +137 -1
  3. utils.py +9 -135
app.py CHANGED
@@ -72,7 +72,7 @@ with gr.Blocks() as block:
72
  value=list(default_loader.SUPER_GROUPS.keys())[0]
73
  )
74
  model_group_selector = gr.Radio(
75
- choices=list(default_loader.BASE_MODEL_GROUPS.keys()),
76
  label="Select a model group",
77
  value="All"
78
  )
 
72
  value=list(default_loader.SUPER_GROUPS.keys())[0]
73
  )
74
  model_group_selector = gr.Radio(
75
+ choices=list(BASE_MODEL_GROUPS.keys()),
76
  label="Select a model group",
77
  value="All"
78
  )
constants.py CHANGED
@@ -76,4 +76,140 @@ SUBMIT_INTRODUCTION = """# Submit on MEGA-Bench Leaderboard
76
 
77
  Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
78
 
79
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
78
 
79
+ """
80
+
81
+
82
+
83
+ ## Constants related to the leaderboard display
84
+
85
+
86
+ # Keep all the constant mappings outside the class
87
+ MODEL_NAME_MAP = {
88
+ "Claude_3.5_new": "Claude-3.5-Sonnet (1022)",
89
+ "GPT_4o": "GPT-4o (0513)",
90
+ "Claude_3.5": "Claude-3.5-Sonnet (0620)",
91
+ "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002",
92
+ "InternVL2_76B": "InternVL2-Llama3-76B",
93
+ "Qwen2_VL_72B": "Qwen2-VL-72B",
94
+ "llava_onevision_72B": "Llava-OneVision-72B",
95
+ "NVLM": "NVLM-D-72B",
96
+ "GPT_4o_mini": "GPT-4o mini",
97
+ "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
98
+ "Pixtral_12B": "Pixtral 12B",
99
+ "Aria": "Aria-MoE-25B",
100
+ "Qwen2_VL_7B": "Qwen2-VL-7B",
101
+ "InternVL2_8B": "InternVL2-8B",
102
+ "llava_onevision_7B": "Llava-OneVision-7B",
103
+ "Llama_3_2_11B": "Llama-3.2-11B",
104
+ "Phi-3.5-vision": "Phi-3.5-Vision",
105
+ "MiniCPM_v2.6": "MiniCPM-V2.6",
106
+ "Idefics3": "Idefics3-8B-Llama3",
107
+ "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
108
+ "POINTS_7B": "POINTS-Qwen2.5-7B",
109
+ "Qwen2_VL_2B": "Qwen2-VL-2B",
110
+ "InternVL2_2B": "InternVL2-2B",
111
+ "Molmo_7B_D": "Molmo-7B-D-0924",
112
+ "Molmo_72B": "Molmo-72B-0924",
113
+ "Mammoth_VL": "Mammoth-VL-8B",
114
+ "SmolVLM": "SmolVLM-1.7B",
115
+ "POINTS_15_7B": "POINTS-1.5-8B",
116
+ "InternVL2_5_78B": "InternVL2.5-78B",
117
+ "InternVL2_5_2B": "InternVL2.5-2B",
118
+ }
119
+
120
+ DIMENSION_NAME_MAP = {
121
+ "skills": "Skills",
122
+ "input_format": "Input Format",
123
+ "output_format": "Output Format",
124
+ "input_num": "Visual Input Number",
125
+ "app": "Application"
126
+ }
127
+
128
+ KEYWORD_NAME_MAP = {
129
+ # Skills
130
+ "Object Recognition and Classification": "Object Recognition",
131
+ "Text Recognition (OCR)": "OCR",
132
+ "Language Understanding and Generation": "Language",
133
+ "Scene and Event Understanding": "Scene/Event",
134
+ "Mathematical and Logical Reasoning": "Math/Logic",
135
+ "Commonsense and Social Reasoning": "Commonsense",
136
+ "Ethical and Safety Reasoning": "Ethics/Safety",
137
+ "Domain-Specific Knowledge and Skills": "Domain-Specific",
138
+ "Spatial and Temporal Reasoning": "Spatial/Temporal",
139
+ "Planning and Decision Making": "Planning/Decision",
140
+ # Input Format
141
+ 'User Interface Screenshots': "UI related",
142
+ 'Text-Based Images and Documents': "Documents",
143
+ 'Diagrams and Data Visualizations': "Infographics",
144
+ 'Videos': "Videos",
145
+ 'Artistic and Creative Content': "Arts/Creative",
146
+ 'Photographs': "Photographs",
147
+ '3D Models and Aerial Imagery': "3D related",
148
+ # Application
149
+ 'Information_Extraction': "Info Extraction",
150
+ 'Planning' : "Planning",
151
+ 'Coding': "Coding",
152
+ 'Perception': "Perception",
153
+ 'Metrics': "Metrics",
154
+ 'Science': "Science",
155
+ 'Knowledge': "Knowledge",
156
+ 'Mathematics': "Math",
157
+ # Output format
158
+ 'contextual_formatted_text': "Contexual",
159
+ 'structured_output': "Structured",
160
+ 'exact_text': "Exact",
161
+ 'numerical_data': "Numerical",
162
+ 'open_ended_output': "Open-ended",
163
+ 'multiple_choice': "MC",
164
+ "6-8 images": "6-8 imgs",
165
+ "1-image": "1 img",
166
+ "2-3 images": "2-3 imgs",
167
+ "4-5 images": "4-5 imgs",
168
+ "9-image or more": "9+ imgs",
169
+ "video": "Video",
170
+ }
171
+
172
+ MODEL_URLS = {
173
+ "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
174
+ "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
175
+ "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet",
176
+ "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
177
+ "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
178
+ "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
179
+ "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
180
+ "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
181
+ "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
182
+ "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
183
+ "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
184
+ "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
185
+ "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
186
+ "Aria": "https://huggingface.co/rhymes-ai/Aria",
187
+ "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
188
+ "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
189
+ "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
190
+ "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
191
+ "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
192
+ "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
193
+ "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
194
+ "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
195
+ "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
196
+ "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
197
+ "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
198
+ "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
199
+ "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
200
+ "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
201
+ "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
202
+ "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
203
+ "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
204
+ }
205
+
206
+ # Define the base MODEL_GROUPS structure
207
+ BASE_MODEL_GROUPS = {
208
+ "All": list(MODEL_NAME_MAP.keys()),
209
+ "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
210
+ "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
211
+ "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
212
+ "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
213
+ "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
214
+ "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
215
+ }
utils.py CHANGED
@@ -2,139 +2,15 @@ import pandas as pd
2
  import json
3
  from typing import Dict, Any, Tuple
4
  import os
5
-
6
- # Keep all the constant mappings outside the class
7
- MODEL_NAME_MAP = {
8
- "Claude_3.5_new": "Claude-3.5-Sonnet (1022)",
9
- "GPT_4o": "GPT-4o (0513)",
10
- "Claude_3.5": "Claude-3.5-Sonnet (0620)",
11
- "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002",
12
- "InternVL2_76B": "InternVL2-Llama3-76B",
13
- "Qwen2_VL_72B": "Qwen2-VL-72B",
14
- "llava_onevision_72B": "Llava-OneVision-72B",
15
- "NVLM": "NVLM-D-72B",
16
- "GPT_4o_mini": "GPT-4o mini",
17
- "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
18
- "Pixtral_12B": "Pixtral 12B",
19
- "Aria": "Aria-MoE-25B",
20
- "Qwen2_VL_7B": "Qwen2-VL-7B",
21
- "InternVL2_8B": "InternVL2-8B",
22
- "llava_onevision_7B": "Llava-OneVision-7B",
23
- "Llama_3_2_11B": "Llama-3.2-11B",
24
- "Phi-3.5-vision": "Phi-3.5-Vision",
25
- "MiniCPM_v2.6": "MiniCPM-V2.6",
26
- "Idefics3": "Idefics3-8B-Llama3",
27
- "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
28
- "POINTS_7B": "POINTS-Qwen2.5-7B",
29
- "Qwen2_VL_2B": "Qwen2-VL-2B",
30
- "InternVL2_2B": "InternVL2-2B",
31
- "Molmo_7B_D": "Molmo-7B-D-0924",
32
- "Molmo_72B": "Molmo-72B-0924",
33
- "Mammoth_VL": "Mammoth-VL-8B",
34
- "SmolVLM": "SmolVLM-1.7B",
35
- "POINTS_15_7B": "POINTS-1.5-8B",
36
- "InternVL2_5_78B": "InternVL2.5-78B",
37
- "InternVL2_5_2B": "InternVL2.5-2B",
38
- }
39
-
40
- DIMENSION_NAME_MAP = {
41
- "skills": "Skills",
42
- "input_format": "Input Format",
43
- "output_format": "Output Format",
44
- "input_num": "Visual Input Number",
45
- "app": "Application"
46
- }
47
-
48
- KEYWORD_NAME_MAP = {
49
- # Skills
50
- "Object Recognition and Classification": "Object Recognition",
51
- "Text Recognition (OCR)": "OCR",
52
- "Language Understanding and Generation": "Language",
53
- "Scene and Event Understanding": "Scene/Event",
54
- "Mathematical and Logical Reasoning": "Math/Logic",
55
- "Commonsense and Social Reasoning": "Commonsense",
56
- "Ethical and Safety Reasoning": "Ethics/Safety",
57
- "Domain-Specific Knowledge and Skills": "Domain-Specific",
58
- "Spatial and Temporal Reasoning": "Spatial/Temporal",
59
- "Planning and Decision Making": "Planning/Decision",
60
- # Input Format
61
- 'User Interface Screenshots': "UI related",
62
- 'Text-Based Images and Documents': "Documents",
63
- 'Diagrams and Data Visualizations': "Infographics",
64
- 'Videos': "Videos",
65
- 'Artistic and Creative Content': "Arts/Creative",
66
- 'Photographs': "Photographs",
67
- '3D Models and Aerial Imagery': "3D related",
68
- # Application
69
- 'Information_Extraction': "Info Extraction",
70
- 'Planning' : "Planning",
71
- 'Coding': "Coding",
72
- 'Perception': "Perception",
73
- 'Metrics': "Metrics",
74
- 'Science': "Science",
75
- 'Knowledge': "Knowledge",
76
- 'Mathematics': "Math",
77
- # Output format
78
- 'contextual_formatted_text': "Contexual",
79
- 'structured_output': "Structured",
80
- 'exact_text': "Exact",
81
- 'numerical_data': "Numerical",
82
- 'open_ended_output': "Open-ended",
83
- 'multiple_choice': "MC",
84
- "6-8 images": "6-8 imgs",
85
- "1-image": "1 img",
86
- "2-3 images": "2-3 imgs",
87
- "4-5 images": "4-5 imgs",
88
- "9-image or more": "9+ imgs",
89
- "video": "Video",
90
- }
91
-
92
- MODEL_URLS = {
93
- "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
94
- "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
95
- "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet",
96
- "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
97
- "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
98
- "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
99
- "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
100
- "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
101
- "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
102
- "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
103
- "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
104
- "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
105
- "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
106
- "Aria": "https://huggingface.co/rhymes-ai/Aria",
107
- "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
108
- "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
109
- "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
110
- "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
111
- "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
112
- "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
113
- "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
114
- "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
115
- "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
116
- "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
117
- "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
118
- "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
119
- "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
120
- "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
121
- "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
122
- "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
123
- "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
124
- }
125
 
126
  class BaseDataLoader:
127
- # Define the base MODEL_GROUPS structure
128
- BASE_MODEL_GROUPS = {
129
- "All": list(MODEL_NAME_MAP.keys()),
130
- "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
131
- "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
132
- "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
133
- "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
134
- "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
135
- "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
136
- }
137
-
138
  def __init__(self):
139
  self.MODEL_DATA = self._load_model_data()
140
  self.SUMMARY_DATA = self._load_summary_data()
@@ -174,17 +50,15 @@ class BaseDataLoader:
174
  return {k: groups[k] for k in order if k in groups}
175
 
176
  def _initialize_model_groups(self) -> Dict[str, list]:
177
- # Get the list of available models from the loaded data
178
  available_models = set(self.MODEL_DATA.keys())
179
 
180
- # Create filtered groups based on available models
181
  filtered_groups = {}
182
- for group_name, models in self.BASE_MODEL_GROUPS.items():
183
  if group_name == "All":
184
  filtered_groups[group_name] = sorted(list(available_models))
185
  else:
186
  filtered_models = [model for model in models if model in available_models]
187
- if filtered_models: # Only include group if it has models
188
  filtered_groups[group_name] = filtered_models
189
 
190
  return filtered_groups
 
2
  import json
3
  from typing import Dict, Any, Tuple
4
  import os
5
+ from constants import (
6
+ MODEL_NAME_MAP,
7
+ DIMENSION_NAME_MAP,
8
+ KEYWORD_NAME_MAP,
9
+ MODEL_URLS,
10
+ BASE_MODEL_GROUPS
11
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class BaseDataLoader:
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
  self.MODEL_DATA = self._load_model_data()
16
  self.SUMMARY_DATA = self._load_summary_data()
 
50
  return {k: groups[k] for k in order if k in groups}
51
 
52
  def _initialize_model_groups(self) -> Dict[str, list]:
 
53
  available_models = set(self.MODEL_DATA.keys())
54
 
 
55
  filtered_groups = {}
56
+ for group_name, models in BASE_MODEL_GROUPS.items():
57
  if group_name == "All":
58
  filtered_groups[group_name] = sorted(list(available_models))
59
  else:
60
  filtered_models = [model for model in models if model in available_models]
61
+ if filtered_models:
62
  filtered_groups[group_name] = filtered_models
63
 
64
  return filtered_groups