awacke1 commited on
Commit
1986ded
ยท
verified ยท
1 Parent(s): bae7b69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -29
app.py CHANGED
@@ -27,30 +27,8 @@ with gr.Blocks() as block:
27
  visible=False
28
  )
29
 
30
- gr.Markdown(
31
- LEADERBOARD_INTRODUCTION
32
- )
33
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
34
- with gr.TabItem("๐Ÿ“Š MEGA-Bench", elem_id="qa-tab-table1", id=1):
35
- with gr.Row():
36
- with gr.Accordion("Citation", open=False):
37
- citation_button = gr.Textbox(
38
- value=CITATION_BUTTON_TEXT,
39
- label=CITATION_BUTTON_LABEL,
40
- elem_id="citation-button",
41
- lines=10,
42
- )
43
- gr.Markdown(
44
- TABLE_INTRODUCTION
45
- )
46
-
47
- with gr.Row():
48
- table_selector = gr.Radio(
49
- choices=["Default", "Single Image"],
50
- label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
51
- value="Default"
52
- )
53
-
54
  # Define different captions for each table
55
  default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
56
 
@@ -79,7 +57,7 @@ with gr.Blocks() as block:
79
  value=initial_data,
80
  headers=initial_headers,
81
  datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
82
- interactive=False,
83
  elem_classes="custom-dataframe",
84
  max_height=2400,
85
  column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
@@ -98,13 +76,32 @@ with gr.Blocks() as block:
98
  value=data,
99
  headers=headers,
100
  datatype=["number", "html"] + ["number"] * (len(headers) - 2),
101
- interactive=False,
102
  column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
103
  ),
104
  caption,
105
  f"<style>{base_css}\n{table_css}</style>"
106
  ]
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def update_selectors(table_type):
109
  loader = default_loader if table_type == "Default" else si_loader
110
  return [
@@ -139,7 +136,11 @@ with gr.Blocks() as block:
139
  inputs=[table_selector, super_group_selector, model_group_selector],
140
  outputs=[data_component, caption_component, css_style]
141
  )
142
-
 
 
 
 
143
  with gr.TabItem("๐Ÿ“ Data Information", elem_id="qa-tab-table2", id=2):
144
  gr.Markdown(DATA_INFO, elem_classes="markdown-text")
145
 
@@ -147,7 +148,5 @@ with gr.Blocks() as block:
147
  with gr.Row():
148
  gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
149
 
150
-
151
-
152
  if __name__ == "__main__":
153
- block.launch(share=True)
 
27
  visible=False
28
  )
29
 
 
 
 
30
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
31
+ with gr.TabItem("๐Ÿ“Š MEGA-Bench", elem_id="qa-tab-table1", id=0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Define different captions for each table
33
  default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
34
 
 
57
  value=initial_data,
58
  headers=initial_headers,
59
  datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
60
+ interactive=True,
61
  elem_classes="custom-dataframe",
62
  max_height=2400,
63
  column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
 
76
  value=data,
77
  headers=headers,
78
  datatype=["number", "html"] + ["number"] * (len(headers) - 2),
79
+ interactive=True,
80
  column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
81
  ),
82
  caption,
83
  f"<style>{base_css}\n{table_css}</style>"
84
  ]
85
 
86
+ with gr.Row():
87
+ with gr.Accordion("Citation", open=False):
88
+ citation_button = gr.Textbox(
89
+ value=CITATION_BUTTON_TEXT,
90
+ label=CITATION_BUTTON_LABEL,
91
+ elem_id="citation-button",
92
+ lines=10,
93
+ )
94
+ gr.Markdown(
95
+ TABLE_INTRODUCTION
96
+ )
97
+
98
+ with gr.Row():
99
+ table_selector = gr.Radio(
100
+ choices=["Default", "Single Image"],
101
+ label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
102
+ value="Default"
103
+ )
104
+
105
  def update_selectors(table_type):
106
  loader = default_loader if table_type == "Default" else si_loader
107
  return [
 
136
  inputs=[table_selector, super_group_selector, model_group_selector],
137
  outputs=[data_component, caption_component, css_style]
138
  )
139
+ with gr.TabItem("๐Ÿ“š Introduction", elem_id="intro-tab", id=1):
140
+ gr.Markdown(
141
+ LEADERBOARD_INTRODUCTION
142
+ )
143
+
144
  with gr.TabItem("๐Ÿ“ Data Information", elem_id="qa-tab-table2", id=2):
145
  gr.Markdown(DATA_INFO, elem_classes="markdown-text")
146
 
 
148
  with gr.Row():
149
  gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
150
 
 
 
151
  if __name__ == "__main__":
152
+ block.launch(share=True, show_api=False)