Leaderboard-Deepseek-Gemini-Grok-GPT-Qwen

Running

App Files Files Community

awacke1 commited on 8 days ago

Commit

614a477

verified ·

1 Parent(s): c57e896

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -150

app.py CHANGED Viewed

@@ -2,132 +2,36 @@ import gradio as gr
 from utils import MEGABenchEvalDataLoader
 import os
 from constants import *
-from urllib.parse import quote
-# ------------------------------------------------------------------------------
-# Load CSS files
-# ------------------------------------------------------------------------------
 current_dir = os.path.dirname(os.path.abspath(__file__))
 base_css_file = os.path.join(current_dir, "static", "css", "style.css")
 table_css_file = os.path.join(current_dir, "static", "css", "table.css")
 with open(base_css_file, "r") as f:
     base_css = f.read()
 with open(table_css_file, "r") as f:
     table_css = f.read()
-# ------------------------------------------------------------------------------
 # Initialize data loaders
-# ------------------------------------------------------------------------------
 default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
 si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
-# ------------------------------------------------------------------------------
-# Helper Functions
-# ------------------------------------------------------------------------------
-def generate_search_links(model_name):
-    """For a given model name, generate a set of search links as HTML."""
-    search_urls = {
-        "📚📖ArXiv": lambda k: f"https://arxiv.org/search/?query={quote(k)}&searchtype=all",
-        "🔮Google": lambda k: f"https://www.google.com/search?q={quote(k)}",
-        "📺Youtube": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
-        "🔭Bing": lambda k: f"https://www.bing.com/search?q={quote(k)}",
-        "💡Truth": lambda k: f"https://truthsocial.com/search?q={quote(k)}",
-        "📱X": lambda k: f"https://twitter.com/search?q={quote(k)}",
-    }
-    # Build a set of inline HTML links (each opens in a new tab)
-    links = " ".join(
-        [f'<a href="{url(model_name)}" target="_blank">{emoji}</a>' for emoji, url in search_urls.items()]
     )
-    return links
-def add_search_links_to_table(headers, data):
-    """
-    Append a "Search Links" column to the table.
-    (Assumes that each row’s column index 1 holds the model name.)
-    """
-    new_headers = headers.copy()
-    new_headers.append("Search Links")
-    new_data = []
-    for row in data:
-        new_row = row.copy()
-        # Assume the model name is in the second column (index 1)
-        model_name = new_row[1] if len(new_row) > 1 else ""
-        new_row.append(generate_search_links(model_name))
-        new_data.append(new_row)
-    return new_headers, new_data
-def clean_choice(choice):
-    """
-    Remove a leading emoji and space (if present) from a choice string.
-    For example, "📊 Default" becomes "Default".
-    """
-    parts = choice.split(" ", 1)
-    return parts[1] if len(parts) > 1 else choice
-def update_table_and_caption(table_type, super_group, model_group):
-    """
-    Called when any selector changes. Cleans the emoji‐prefixed values, loads new data,
-    appends a Search Links column, and returns a new Dataframe component (with a smaller max height),
-    an updated caption, and the CSS style.
-    """
-    table_type_clean = clean_choice(table_type)
-    super_group_clean = clean_choice(super_group)
-    model_group_clean = clean_choice(model_group)
-    if table_type_clean == "Default":
-        headers, data = default_loader.get_leaderboard_data(super_group_clean, model_group_clean)
-        caption = default_caption
-    else:  # "Single Image"
-        headers, data = si_loader.get_leaderboard_data(super_group_clean, model_group_clean)
-        caption = single_image_caption
-    # Append search links column to the table data
-    headers, data = add_search_links_to_table(headers, data)
-    n = len(headers)
-    # Assume first column is a number, second (model name) is HTML, the intermediate ones are numbers, and the last (links) is HTML
-    datatype = ["number", "html"] + ["number"] * (n - 3) + ["html"]
-    # Adjust column widths as needed (example: first two fixed, last a bit wider)
-    column_widths = ["100px", "240px"] + ["160px"] * (n - 3) + ["210px"]
-    dataframe_component = gr.Dataframe(
-        value=data,
-        headers=headers,
-        datatype=datatype,
-        interactive=False,
-        column_widths=column_widths,
-        max_height=600,  # smaller height to show the full table in a compact area
-        elem_classes="custom-dataframe"
     )
-    return [dataframe_component, caption, f"<style>{base_css}\n{table_css}</style>"]
-def update_selectors(table_type):
-    """
-    When the table selector changes, update the other radio choices.
-    (Also adds an emoji prefix to each choice.)
-    """
-    table_type_clean = clean_choice(table_type)
-    loader = default_loader if table_type_clean == "Default" else si_loader
-    super_group_choices = [f"🔍 {group}" for group in list(loader.SUPER_GROUPS.keys())]
-    model_group_choices = [f"🤖 {group}" for group in list(loader.MODEL_GROUPS.keys())]
-    return [super_group_choices, model_group_choices]
-# ------------------------------------------------------------------------------
-# Build Gradio App Layout
-# ------------------------------------------------------------------------------
-with gr.Blocks() as block:
-    # Add CSS via an invisible HTML component
-    css_style = gr.HTML(f"<style>{base_css}\n{table_css}</style>", visible=False)
-    # NOTE: The original top-level introduction markdown has been removed here.
-    # It is now placed at the bottom of the MEGA-Bench tab in a collapsed Accordion.
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        # -------------------- Tab 1: MEGA-Bench --------------------
         with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
-            # A Citation accordion (collapsed by default)
             with gr.Row():
                 with gr.Accordion("Citation", open=False):
                     citation_button = gr.Textbox(
@@ -136,55 +40,81 @@ with gr.Blocks() as block:
                         elem_id="citation-button",
                         lines=10,
                     )
-            gr.Markdown(TABLE_INTRODUCTION)
-            # ---- Top-left “button‐bar” of radio selectors with emoji labels ----
             with gr.Row():
                 table_selector = gr.Radio(
-                    choices=["📊 Default", "🖼️ Single Image"],
-                    label="Select table to display",
-                    value="📊 Default"
                 )
             with gr.Row():
                 super_group_selector = gr.Radio(
-                    choices=[f"🔍 {group}" for group in list(default_loader.SUPER_GROUPS.keys())],
-                    label="Select a dimension",
-                    value=f"🔍 {list(default_loader.SUPER_GROUPS.keys())[0]}"
                 )
                 model_group_selector = gr.Radio(
-                    choices=[f"🤖 {group}" for group in list(BASE_MODEL_GROUPS.keys())],
                     label="Select a model group",
-                    value="🤖 All"
                 )
-            # A caption component for the table
-            caption_component = gr.Markdown(
-                value=default_caption,
-                elem_classes="table-caption",
-                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
-            )
-            # ---- Initial Dataframe (with search links appended) ----
-            initial_headers, initial_data = default_loader.get_leaderboard_data(
-                list(default_loader.SUPER_GROUPS.keys())[0], "All"
-            )
-            initial_headers, initial_data = add_search_links_to_table(initial_headers, initial_data)
-            n = len(initial_headers)
-            initial_datatype = ["number", "html"] + ["number"] * (n - 3) + ["html"]
-            initial_column_widths = ["100px", "240px"] + ["160px"] * (n - 3) + ["210px"]
             data_component = gr.Dataframe(
                 value=initial_data,
                 headers=initial_headers,
-                datatype=initial_datatype,
                 interactive=False,
                 elem_classes="custom-dataframe",
-                max_height=600,
-                column_widths=initial_column_widths
             )
-            # ---- Controls to update the table ----
             refresh_button = gr.Button("Refresh")
             refresh_button.click(
                 fn=update_table_and_caption,
                 inputs=[table_selector, super_group_selector, model_group_selector],
@@ -197,7 +127,7 @@ with gr.Blocks() as block:
             )
             model_group_selector.change(
                 fn=update_table_and_caption,
-                inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
             table_selector.change(
@@ -209,19 +139,15 @@ with gr.Blocks() as block:
                 inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
-            # ---- Move the introductory text to a collapsed accordion at the bottom ----
-            with gr.Accordion("Introduction", open=False):
-                gr.Markdown(LEADERBOARD_INTRODUCTION)
-        # -------------------- Tab 2: Data Information --------------------
         with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
             gr.Markdown(DATA_INFO, elem_classes="markdown-text")
-        # -------------------- Tab 3: Submit --------------------
         with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
 if __name__ == "__main__":
     block.launch(share=True)

 from utils import MEGABenchEvalDataLoader
 import os
 from constants import *
+# Get the directory of the current script
 current_dir = os.path.dirname(os.path.abspath(__file__))
+# Construct paths to CSS files
 base_css_file = os.path.join(current_dir, "static", "css", "style.css")
 table_css_file = os.path.join(current_dir, "static", "css", "table.css")
+# Read CSS files
 with open(base_css_file, "r") as f:
     base_css = f.read()
 with open(table_css_file, "r") as f:
     table_css = f.read()
 # Initialize data loaders
 default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
 si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
+with gr.Blocks() as block:
+    # Add a style element that we'll update
+    css_style = gr.HTML(
+        f"<style>{base_css}\n{table_css}</style>",
+        visible=False
     )
+    gr.Markdown(
+        LEADERBOARD_INTRODUCTION
     )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
             with gr.Row():
                 with gr.Accordion("Citation", open=False):
                     citation_button = gr.Textbox(
                         elem_id="citation-button",
                         lines=10,
                     )
+            gr.Markdown(
+                TABLE_INTRODUCTION
+            )
             with gr.Row():
                 table_selector = gr.Radio(
+                    choices=["Default", "Single Image"],
+                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
+                    value="Default"
                 )
+            # Define different captions for each table
+            default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
+            single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
+            caption_component = gr.Markdown(
+                value=default_caption,
+                elem_classes="table-caption",
+                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
+            )
             with gr.Row():
                 super_group_selector = gr.Radio(
+                    choices=list(default_loader.SUPER_GROUPS.keys()),
+                    label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
+                    value=list(default_loader.SUPER_GROUPS.keys())[0]
                 )
                 model_group_selector = gr.Radio(
+                    choices=list(BASE_MODEL_GROUPS.keys()),
                     label="Select a model group",
+                    value="All"
                 )
+            initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
             data_component = gr.Dataframe(
                 value=initial_data,
                 headers=initial_headers,
+                datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
                 interactive=False,
                 elem_classes="custom-dataframe",
+                max_height=2400,
+                column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
             )
+            def update_table_and_caption(table_type, super_group, model_group):
+                if table_type == "Default":
+                    headers, data = default_loader.get_leaderboard_data(super_group, model_group)
+                    caption = default_caption
+                else:  # Single-image
+                    headers, data = si_loader.get_leaderboard_data(super_group, model_group)
+                    caption = single_image_caption
+                return [
+                    gr.Dataframe(
+                        value=data,
+                        headers=headers,
+                        datatype=["number", "html"] + ["number"] * (len(headers) - 2),
+                        interactive=False,
+                        column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
+                    ),
+                    caption,
+                    f"<style>{base_css}\n{table_css}</style>"
+                ]
+            def update_selectors(table_type):
+                loader = default_loader if table_type == "Default" else si_loader
+                return [
+                    gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
+                    gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
+                ]
             refresh_button = gr.Button("Refresh")
+            # Update click and change handlers to include caption updates
             refresh_button.click(
                 fn=update_table_and_caption,
                 inputs=[table_selector, super_group_selector, model_group_selector],
             )
             model_group_selector.change(
                 fn=update_table_and_caption,
+                inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
             table_selector.change(
                 inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
         with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
             gr.Markdown(DATA_INFO, elem_classes="markdown-text")
         with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
 if __name__ == "__main__":
     block.launch(share=True)