Update app.py
Browse files
app.py
CHANGED
@@ -2,132 +2,36 @@ import gradio as gr
|
|
2 |
from utils import MEGABenchEvalDataLoader
|
3 |
import os
|
4 |
from constants import *
|
5 |
-
from urllib.parse import quote
|
6 |
|
7 |
-
#
|
8 |
-
# Load CSS files
|
9 |
-
# ------------------------------------------------------------------------------
|
10 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
11 |
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
|
12 |
table_css_file = os.path.join(current_dir, "static", "css", "table.css")
|
13 |
|
|
|
14 |
with open(base_css_file, "r") as f:
|
15 |
base_css = f.read()
|
16 |
with open(table_css_file, "r") as f:
|
17 |
table_css = f.read()
|
18 |
|
19 |
-
# ------------------------------------------------------------------------------
|
20 |
# Initialize data loaders
|
21 |
-
# ------------------------------------------------------------------------------
|
22 |
default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
|
23 |
si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
|
24 |
|
25 |
-
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
"""For a given model name, generate a set of search links as HTML."""
|
31 |
-
search_urls = {
|
32 |
-
"📚📖ArXiv": lambda k: f"https://arxiv.org/search/?query={quote(k)}&searchtype=all",
|
33 |
-
"🔮Google": lambda k: f"https://www.google.com/search?q={quote(k)}",
|
34 |
-
"📺Youtube": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
|
35 |
-
"🔭Bing": lambda k: f"https://www.bing.com/search?q={quote(k)}",
|
36 |
-
"💡Truth": lambda k: f"https://truthsocial.com/search?q={quote(k)}",
|
37 |
-
"📱X": lambda k: f"https://twitter.com/search?q={quote(k)}",
|
38 |
-
}
|
39 |
-
# Build a set of inline HTML links (each opens in a new tab)
|
40 |
-
links = " ".join(
|
41 |
-
[f'<a href="{url(model_name)}" target="_blank">{emoji}</a>' for emoji, url in search_urls.items()]
|
42 |
)
|
43 |
-
return links
|
44 |
-
|
45 |
-
def add_search_links_to_table(headers, data):
|
46 |
-
"""
|
47 |
-
Append a "Search Links" column to the table.
|
48 |
-
(Assumes that each row’s column index 1 holds the model name.)
|
49 |
-
"""
|
50 |
-
new_headers = headers.copy()
|
51 |
-
new_headers.append("Search Links")
|
52 |
-
new_data = []
|
53 |
-
for row in data:
|
54 |
-
new_row = row.copy()
|
55 |
-
# Assume the model name is in the second column (index 1)
|
56 |
-
model_name = new_row[1] if len(new_row) > 1 else ""
|
57 |
-
new_row.append(generate_search_links(model_name))
|
58 |
-
new_data.append(new_row)
|
59 |
-
return new_headers, new_data
|
60 |
-
|
61 |
-
def clean_choice(choice):
|
62 |
-
"""
|
63 |
-
Remove a leading emoji and space (if present) from a choice string.
|
64 |
-
For example, "📊 Default" becomes "Default".
|
65 |
-
"""
|
66 |
-
parts = choice.split(" ", 1)
|
67 |
-
return parts[1] if len(parts) > 1 else choice
|
68 |
-
|
69 |
-
def update_table_and_caption(table_type, super_group, model_group):
|
70 |
-
"""
|
71 |
-
Called when any selector changes. Cleans the emoji‐prefixed values, loads new data,
|
72 |
-
appends a Search Links column, and returns a new Dataframe component (with a smaller max height),
|
73 |
-
an updated caption, and the CSS style.
|
74 |
-
"""
|
75 |
-
table_type_clean = clean_choice(table_type)
|
76 |
-
super_group_clean = clean_choice(super_group)
|
77 |
-
model_group_clean = clean_choice(model_group)
|
78 |
-
|
79 |
-
if table_type_clean == "Default":
|
80 |
-
headers, data = default_loader.get_leaderboard_data(super_group_clean, model_group_clean)
|
81 |
-
caption = default_caption
|
82 |
-
else: # "Single Image"
|
83 |
-
headers, data = si_loader.get_leaderboard_data(super_group_clean, model_group_clean)
|
84 |
-
caption = single_image_caption
|
85 |
-
|
86 |
-
# Append search links column to the table data
|
87 |
-
headers, data = add_search_links_to_table(headers, data)
|
88 |
-
n = len(headers)
|
89 |
-
# Assume first column is a number, second (model name) is HTML, the intermediate ones are numbers, and the last (links) is HTML
|
90 |
-
datatype = ["number", "html"] + ["number"] * (n - 3) + ["html"]
|
91 |
-
# Adjust column widths as needed (example: first two fixed, last a bit wider)
|
92 |
-
column_widths = ["100px", "240px"] + ["160px"] * (n - 3) + ["210px"]
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
headers=headers,
|
97 |
-
datatype=datatype,
|
98 |
-
interactive=False,
|
99 |
-
column_widths=column_widths,
|
100 |
-
max_height=600, # smaller height to show the full table in a compact area
|
101 |
-
elem_classes="custom-dataframe"
|
102 |
)
|
103 |
-
|
104 |
-
return [dataframe_component, caption, f"<style>{base_css}\n{table_css}</style>"]
|
105 |
-
|
106 |
-
def update_selectors(table_type):
|
107 |
-
"""
|
108 |
-
When the table selector changes, update the other radio choices.
|
109 |
-
(Also adds an emoji prefix to each choice.)
|
110 |
-
"""
|
111 |
-
table_type_clean = clean_choice(table_type)
|
112 |
-
loader = default_loader if table_type_clean == "Default" else si_loader
|
113 |
-
super_group_choices = [f"🔍 {group}" for group in list(loader.SUPER_GROUPS.keys())]
|
114 |
-
model_group_choices = [f"🤖 {group}" for group in list(loader.MODEL_GROUPS.keys())]
|
115 |
-
return [super_group_choices, model_group_choices]
|
116 |
-
|
117 |
-
# ------------------------------------------------------------------------------
|
118 |
-
# Build Gradio App Layout
|
119 |
-
# ------------------------------------------------------------------------------
|
120 |
-
with gr.Blocks() as block:
|
121 |
-
# Add CSS via an invisible HTML component
|
122 |
-
css_style = gr.HTML(f"<style>{base_css}\n{table_css}</style>", visible=False)
|
123 |
-
|
124 |
-
# NOTE: The original top-level introduction markdown has been removed here.
|
125 |
-
# It is now placed at the bottom of the MEGA-Bench tab in a collapsed Accordion.
|
126 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
127 |
-
|
128 |
-
# -------------------- Tab 1: MEGA-Bench --------------------
|
129 |
with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
|
130 |
-
# A Citation accordion (collapsed by default)
|
131 |
with gr.Row():
|
132 |
with gr.Accordion("Citation", open=False):
|
133 |
citation_button = gr.Textbox(
|
@@ -136,55 +40,81 @@ with gr.Blocks() as block:
|
|
136 |
elem_id="citation-button",
|
137 |
lines=10,
|
138 |
)
|
139 |
-
gr.Markdown(
|
140 |
-
|
141 |
-
|
|
|
142 |
with gr.Row():
|
143 |
table_selector = gr.Radio(
|
144 |
-
choices=["
|
145 |
-
label="Select table to display",
|
146 |
-
value="
|
147 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
with gr.Row():
|
149 |
super_group_selector = gr.Radio(
|
150 |
-
choices=
|
151 |
-
label="Select a dimension",
|
152 |
-
value=
|
153 |
)
|
154 |
model_group_selector = gr.Radio(
|
155 |
-
choices=
|
156 |
label="Select a model group",
|
157 |
-
value="
|
158 |
)
|
159 |
-
|
160 |
-
|
161 |
-
caption_component = gr.Markdown(
|
162 |
-
value=default_caption,
|
163 |
-
elem_classes="table-caption",
|
164 |
-
latex_delimiters=[{"left": "$", "right": "$", "display": False}],
|
165 |
-
)
|
166 |
-
|
167 |
-
# ---- Initial Dataframe (with search links appended) ----
|
168 |
-
initial_headers, initial_data = default_loader.get_leaderboard_data(
|
169 |
-
list(default_loader.SUPER_GROUPS.keys())[0], "All"
|
170 |
-
)
|
171 |
-
initial_headers, initial_data = add_search_links_to_table(initial_headers, initial_data)
|
172 |
-
n = len(initial_headers)
|
173 |
-
initial_datatype = ["number", "html"] + ["number"] * (n - 3) + ["html"]
|
174 |
-
initial_column_widths = ["100px", "240px"] + ["160px"] * (n - 3) + ["210px"]
|
175 |
-
|
176 |
data_component = gr.Dataframe(
|
177 |
value=initial_data,
|
178 |
headers=initial_headers,
|
179 |
-
datatype=
|
180 |
interactive=False,
|
181 |
elem_classes="custom-dataframe",
|
182 |
-
max_height=
|
183 |
-
column_widths=
|
184 |
)
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
refresh_button = gr.Button("Refresh")
|
|
|
|
|
188 |
refresh_button.click(
|
189 |
fn=update_table_and_caption,
|
190 |
inputs=[table_selector, super_group_selector, model_group_selector],
|
@@ -197,7 +127,7 @@ with gr.Blocks() as block:
|
|
197 |
)
|
198 |
model_group_selector.change(
|
199 |
fn=update_table_and_caption,
|
200 |
-
inputs=[table_selector, super_group_selector, model_group_selector],
|
201 |
outputs=[data_component, caption_component, css_style]
|
202 |
)
|
203 |
table_selector.change(
|
@@ -209,19 +139,15 @@ with gr.Blocks() as block:
|
|
209 |
inputs=[table_selector, super_group_selector, model_group_selector],
|
210 |
outputs=[data_component, caption_component, css_style]
|
211 |
)
|
212 |
-
|
213 |
-
# ---- Move the introductory text to a collapsed accordion at the bottom ----
|
214 |
-
with gr.Accordion("Introduction", open=False):
|
215 |
-
gr.Markdown(LEADERBOARD_INTRODUCTION)
|
216 |
-
|
217 |
-
# -------------------- Tab 2: Data Information --------------------
|
218 |
with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
|
219 |
gr.Markdown(DATA_INFO, elem_classes="markdown-text")
|
220 |
-
|
221 |
-
# -------------------- Tab 3: Submit --------------------
|
222 |
with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
|
223 |
with gr.Row():
|
224 |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
225 |
-
|
|
|
|
|
226 |
if __name__ == "__main__":
|
227 |
block.launch(share=True)
|
|
|
2 |
from utils import MEGABenchEvalDataLoader
|
3 |
import os
|
4 |
from constants import *
|
|
|
5 |
|
6 |
+
# Get the directory of the current script
|
|
|
|
|
7 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
8 |
+
|
9 |
+
# Construct paths to CSS files
|
10 |
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
|
11 |
table_css_file = os.path.join(current_dir, "static", "css", "table.css")
|
12 |
|
13 |
+
# Read CSS files
|
14 |
with open(base_css_file, "r") as f:
|
15 |
base_css = f.read()
|
16 |
with open(table_css_file, "r") as f:
|
17 |
table_css = f.read()
|
18 |
|
|
|
19 |
# Initialize data loaders
|
|
|
20 |
default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
|
21 |
si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
|
22 |
|
23 |
+
with gr.Blocks() as block:
|
24 |
+
# Add a style element that we'll update
|
25 |
+
css_style = gr.HTML(
|
26 |
+
f"<style>{base_css}\n{table_css}</style>",
|
27 |
+
visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
gr.Markdown(
|
31 |
+
LEADERBOARD_INTRODUCTION
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
|
|
|
|
34 |
with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
|
|
|
35 |
with gr.Row():
|
36 |
with gr.Accordion("Citation", open=False):
|
37 |
citation_button = gr.Textbox(
|
|
|
40 |
elem_id="citation-button",
|
41 |
lines=10,
|
42 |
)
|
43 |
+
gr.Markdown(
|
44 |
+
TABLE_INTRODUCTION
|
45 |
+
)
|
46 |
+
|
47 |
with gr.Row():
|
48 |
table_selector = gr.Radio(
|
49 |
+
choices=["Default", "Single Image"],
|
50 |
+
label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
|
51 |
+
value="Default"
|
52 |
)
|
53 |
+
|
54 |
+
# Define different captions for each table
|
55 |
+
default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
|
56 |
+
|
57 |
+
single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
|
58 |
+
|
59 |
+
caption_component = gr.Markdown(
|
60 |
+
value=default_caption,
|
61 |
+
elem_classes="table-caption",
|
62 |
+
latex_delimiters=[{"left": "$", "right": "$", "display": False}],
|
63 |
+
)
|
64 |
+
|
65 |
with gr.Row():
|
66 |
super_group_selector = gr.Radio(
|
67 |
+
choices=list(default_loader.SUPER_GROUPS.keys()),
|
68 |
+
label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
|
69 |
+
value=list(default_loader.SUPER_GROUPS.keys())[0]
|
70 |
)
|
71 |
model_group_selector = gr.Radio(
|
72 |
+
choices=list(BASE_MODEL_GROUPS.keys()),
|
73 |
label="Select a model group",
|
74 |
+
value="All"
|
75 |
)
|
76 |
+
|
77 |
+
initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
data_component = gr.Dataframe(
|
79 |
value=initial_data,
|
80 |
headers=initial_headers,
|
81 |
+
datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
|
82 |
interactive=False,
|
83 |
elem_classes="custom-dataframe",
|
84 |
+
max_height=2400,
|
85 |
+
column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
|
86 |
)
|
87 |
+
|
88 |
+
def update_table_and_caption(table_type, super_group, model_group):
|
89 |
+
if table_type == "Default":
|
90 |
+
headers, data = default_loader.get_leaderboard_data(super_group, model_group)
|
91 |
+
caption = default_caption
|
92 |
+
else: # Single-image
|
93 |
+
headers, data = si_loader.get_leaderboard_data(super_group, model_group)
|
94 |
+
caption = single_image_caption
|
95 |
+
|
96 |
+
return [
|
97 |
+
gr.Dataframe(
|
98 |
+
value=data,
|
99 |
+
headers=headers,
|
100 |
+
datatype=["number", "html"] + ["number"] * (len(headers) - 2),
|
101 |
+
interactive=False,
|
102 |
+
column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
|
103 |
+
),
|
104 |
+
caption,
|
105 |
+
f"<style>{base_css}\n{table_css}</style>"
|
106 |
+
]
|
107 |
+
|
108 |
+
def update_selectors(table_type):
|
109 |
+
loader = default_loader if table_type == "Default" else si_loader
|
110 |
+
return [
|
111 |
+
gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
|
112 |
+
gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
|
113 |
+
]
|
114 |
+
|
115 |
refresh_button = gr.Button("Refresh")
|
116 |
+
|
117 |
+
# Update click and change handlers to include caption updates
|
118 |
refresh_button.click(
|
119 |
fn=update_table_and_caption,
|
120 |
inputs=[table_selector, super_group_selector, model_group_selector],
|
|
|
127 |
)
|
128 |
model_group_selector.change(
|
129 |
fn=update_table_and_caption,
|
130 |
+
inputs=[table_selector, super_group_selector, model_group_selector],
|
131 |
outputs=[data_component, caption_component, css_style]
|
132 |
)
|
133 |
table_selector.change(
|
|
|
139 |
inputs=[table_selector, super_group_selector, model_group_selector],
|
140 |
outputs=[data_component, caption_component, css_style]
|
141 |
)
|
142 |
+
|
|
|
|
|
|
|
|
|
|
|
143 |
with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
|
144 |
gr.Markdown(DATA_INFO, elem_classes="markdown-text")
|
145 |
+
|
|
|
146 |
with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
|
147 |
with gr.Row():
|
148 |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
149 |
+
|
150 |
+
|
151 |
+
|
152 |
if __name__ == "__main__":
|
153 |
block.launch(share=True)
|