Leaderboard-Deepseek-Gemini-Grok-GPT-Qwen

Running

App Files Files Community

awacke1 commited on about 4 hours ago

Commit

1986ded

verified ·

1 Parent(s): bae7b69

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -29

app.py CHANGED Viewed

@@ -27,30 +27,8 @@ with gr.Blocks() as block:
         visible=False
     )
-    gr.Markdown(
-        LEADERBOARD_INTRODUCTION
-    )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
-            with gr.Row():
-                with gr.Accordion("Citation", open=False):
-                    citation_button = gr.Textbox(
-                        value=CITATION_BUTTON_TEXT,
-                        label=CITATION_BUTTON_LABEL,
-                        elem_id="citation-button",
-                        lines=10,
-                    )
-            gr.Markdown(
-                TABLE_INTRODUCTION
-            )
-            with gr.Row():
-                table_selector = gr.Radio(
-                    choices=["Default", "Single Image"],
-                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
-                    value="Default"
-                )
             # Define different captions for each table
             default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
@@ -79,7 +57,7 @@ with gr.Blocks() as block:
                 value=initial_data,
                 headers=initial_headers,
                 datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
-                interactive=False,
                 elem_classes="custom-dataframe",
                 max_height=2400,
                 column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
@@ -98,13 +76,32 @@ with gr.Blocks() as block:
                         value=data,
                         headers=headers,
                         datatype=["number", "html"] + ["number"] * (len(headers) - 2),
-                        interactive=False,
                         column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
                     ),
                     caption,
                     f"<style>{base_css}\n{table_css}</style>"
                 ]
             def update_selectors(table_type):
                 loader = default_loader if table_type == "Default" else si_loader
                 return [
@@ -139,7 +136,11 @@ with gr.Blocks() as block:
                 inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
         with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
             gr.Markdown(DATA_INFO, elem_classes="markdown-text")
@@ -147,7 +148,5 @@ with gr.Blocks() as block:
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
 if __name__ == "__main__":
-    block.launch(share=True)

         visible=False
     )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=0):
             # Define different captions for each table
             default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
                 value=initial_data,
                 headers=initial_headers,
                 datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
+                interactive=True,
                 elem_classes="custom-dataframe",
                 max_height=2400,
                 column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
                         value=data,
                         headers=headers,
                         datatype=["number", "html"] + ["number"] * (len(headers) - 2),
+                        interactive=True,
                         column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
                     ),
                     caption,
                     f"<style>{base_css}\n{table_css}</style>"
                 ]
+            with gr.Row():
+                with gr.Accordion("Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value=CITATION_BUTTON_TEXT,
+                        label=CITATION_BUTTON_LABEL,
+                        elem_id="citation-button",
+                        lines=10,
+                    )
+            gr.Markdown(
+                TABLE_INTRODUCTION
+            )
+            with gr.Row():
+                table_selector = gr.Radio(
+                    choices=["Default", "Single Image"],
+                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
+                    value="Default"
+                )
             def update_selectors(table_type):
                 loader = default_loader if table_type == "Default" else si_loader
                 return [
                 inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
+        with gr.TabItem("📚 Introduction", elem_id="intro-tab", id=1):
+            gr.Markdown(
+                LEADERBOARD_INTRODUCTION
+            )
         with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
             gr.Markdown(DATA_INFO, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
 if __name__ == "__main__":
+    block.launch(share=True, show_api=False)