Spaces:

galileo-ai
/

agent-leaderboard

Running

App Files Files Community

Pratik Bhavsar commited on 10 days ago

Commit

80c01c6

1 Parent(s): fe118de

improved title

Browse files

Files changed (2) hide show

app.py +10 -12
data_loader.py +202 -15

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from data_loader import load_data, CATEGORIES, INSIGHTS, METHODOLOGY, TITLE
 from utils import model_info_tab, filter_leaderboard
 from visualization import setup_matplotlib
@@ -31,10 +31,17 @@ def create_app():
                         )
                     with gr.Column(scale=4):
-                        gr.Markdown(TITLE)
                         output = gr.HTML()
                         plot1 = gr.Plot()
                         plot2 = gr.Plot()
                 for input_comp in [model_type, category, sort_by]:
                     input_comp.change(
@@ -44,7 +51,7 @@ def create_app():
                     )
             with gr.Tab("Model Performance"):
-                gr.Markdown(TITLE)
                 with gr.Row():
                     with gr.Column(scale=1):
                         model_selector = gr.Dropdown(
@@ -65,14 +72,6 @@ def create_app():
                     outputs=[model_info, radar_plot],
                 )
-            with gr.Tab("Methodology"):
-                gr.Markdown(TITLE)
-                gr.Markdown(METHODOLOGY)
-            with gr.Tab("Insights"):
-                gr.Markdown(TITLE)
-                gr.Markdown(INSIGHTS)
         app.load(
             fn=lambda: filter_leaderboard(
                 df, "All", list(CATEGORIES.keys())[0], "Performance"
@@ -90,7 +89,6 @@ def create_app():
     return app
-# main.py
 if __name__ == "__main__":
     demo = create_app()
     demo.launch()

 import gradio as gr
+from data_loader import load_data, CATEGORIES, INSIGHTS, METHODOLOGY, HEADER_CONTENT
 from utils import model_info_tab, filter_leaderboard
 from visualization import setup_matplotlib
                         )
                     with gr.Column(scale=4):
+                        # Add the new header content above everything
+                        gr.HTML(HEADER_CONTENT)
                         output = gr.HTML()
                         plot1 = gr.Plot()
                         plot2 = gr.Plot()
+                        # Add methodology section
+                        gr.Markdown("# Methodology")
+                        gr.Markdown(METHODOLOGY)
+                        # Add insights section
+                        gr.Markdown("# Key Insights")
+                        gr.Markdown(INSIGHTS)
                 for input_comp in [model_type, category, sort_by]:
                     input_comp.change(
                     )
             with gr.Tab("Model Performance"):
+                gr.HTML(HEADER_CONTENT)
                 with gr.Row():
                     with gr.Column(scale=1):
                         model_selector = gr.Dropdown(
                     outputs=[model_info, radar_plot],
                 )
         app.load(
             fn=lambda: filter_leaderboard(
                 df, "All", list(CATEGORIES.keys())[0], "Performance"
     return app
 if __name__ == "__main__":
     demo = create_app()
     demo.launch()

data_loader.py CHANGED Viewed

@@ -77,22 +77,209 @@ METHODOLOGY = """
                 | | 100 | Composite | BFCL_v3_multi_turn_composite | Tests overall robustness in complex scenarios |
                 """
-TITLE = """
-    <div style="text-align: center; margin-bottom: 2rem;">
-        <h1 style="margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold;">Agent Leaderboard</h1>
-        <div style="display: flex; justify-content: center; gap: 2rem; font-size: 1.1rem;">
-            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
-                Blog&nbsp;📝
-            </a>
-            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
-                GitHub&nbsp;⭐
-            </a>
-            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
-                Dataset&nbsp;📊
-            </a>
         </div>
-        <div style="margin-top: 1rem; color: #6B7280; font-size: 0.9rem;">
-            Last updated: January 2025
         </div>
     </div>
 """

                 | | 100 | Composite | BFCL_v3_multi_turn_composite | Tests overall robustness in complex scenarios |
                 """
+HEADER_CONTENT = """
+<style>
+    .header-wrapper {
+        padding: 3rem 2rem;
+        background: rgb(17, 17, 27);
+        border-radius: 16px;
+        display: flex;
+        flex-direction: column;
+        align-items: center;
+        text-align: center;
+    }
+    .title {
+        color: #ffffff;
+        font-size: 2.5rem;
+        font-weight: 600;
+        margin-bottom: 1.5rem;
+        text-align: center;
+    }
+    .description {
+        color: #ffffff;
+        font-size: 1.1rem;
+        line-height: 1.6;
+        max-width: 800px;
+        margin: 0 auto 2rem;
+        text-align: center;
+    }
+    .actions {
+        display: flex;
+        gap: 1rem;
+        justify-content: center;
+        margin-bottom: 2rem;
+    }
+    .action-button {
+        display: flex;
+        align-items: center;
+        gap: 0.5rem;
+        padding: 0.75rem 1.5rem;
+        background: rgba(30, 30, 45, 0.95);
+        border: 1px solid rgba(255, 255, 255, 0.1);
+        border-radius: 100px;
+        color: #fff;
+        text-decoration: none;
+        font-size: 0.95rem;
+        transition: all 0.2s ease;
+    }
+    .action-button:hover {
+        background: rgba(40, 40, 55, 0.95);
+        border-color: rgba(255, 255, 255, 0.2);
+    }
+    .update-info {
+        color: #94a3b8;
+        font-size: 0.9rem;
+        margin-bottom: 3rem;
+    }
+    .features-grid {
+        display: grid;
+        grid-template-columns: repeat(3, 1fr);
+        gap: 1.5rem;
+        width: 100%;
+        max-width: 1200px;
+    }
+    .feature-card {
+        background: rgba(17, 17, 27, 0.6);
+        border: 1px solid rgba(255, 255, 255, 0.1);
+        border-radius: 16px;
+        padding: 2rem;
+        text-align: left;
+    }
+    .feature-icon {
+        background: rgba(79, 70, 229, 0.1);
+        width: 40px;
+        height: 40px;
+        border-radius: 12px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 1.5rem;
+    }
+    .feature-title {
+        color: #ffffff;
+        font-size: 1.25rem;
+        font-weight: 600;
+        margin-bottom: 1rem;
+    }
+    .feature-description {
+        color: #94a3b8;
+        font-size: 0.95rem;
+        margin-bottom: 1.5rem;
+    }
+    .feature-list {
+        list-style: none;
+        padding: 0;
+        margin: 0;
+        display: flex;
+        flex-direction: column;
+        gap: 0.75rem;
+    }
+    .feature-list li {
+        color: #e2e8f0;
+        font-size: 0.95rem;
+        display: flex;
+        align-items: center;
+        gap: 0.5rem;
+    }
+    .feature-list li::before {
+        content: '';
+        width: 6px;
+        height: 6px;
+        background: #4F46E5;
+        border-radius: 50%;
+        flex-shrink: 0;
+    }
+</style>
+<div class="header-wrapper">
+    <h1 class="title">Agent Leaderboard</h1>
+    <p class="description">
+        A comprehensive benchmark for evaluating AI agents in real-world business scenarios, comparing practical performance across multiple domains and use cases.
+    </p>
+    <div class="actions">
+        <a href="#" class="action-button">
+            <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
+                <line x1="8" y1="12" x2="16" y2="12"/>
+            </svg>
+            Blog
+        </a>
+        <a href="#" class="action-button">
+            <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                <path d="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
+            </svg>
+            GitHub
+        </a>
+        <a href="#" class="action-button">
+            <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
+                <polyline points="7 10 12 15 17 10"/>
+                <line x1="12" y1="15" x2="12" y2="3"/>
+            </svg>
+            Dataset
+        </a>
+    </div>
+    <div class="features-grid">
+        <div class="feature-card">
+            <div class="feature-icon">
+                <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
+                    <path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z"/>
+                </svg>
+            </div>
+            <h3 class="feature-title">360° Domain Evaluation</h3>
+            <p class="feature-description">Comprehensive evaluation across multiple benchmarks and domains:</p>
+            <ul class="feature-list">
+                <li>Cross-domain evaluation</li>
+                <li>Real-world use cases</li>
+                <li>Edge case evaluation</li>
+            </ul>
         </div>
+        <div class="feature-card">
+            <div class="feature-icon">
+                <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
+                    <path d="M22 12h-4l-3 9L9 3l-3 9H2"/>
+                </svg>
+            </div>
+            <h3 class="feature-title">Make Better Decisions</h3>
+            <p class="feature-description">Beyond technical metrics, we provide:</p>
+            <ul class="feature-list">
+                <li>Cost-effectiveness analysis</li>
+                <li>Business impact metrics</li>
+                <li>Vendor strategy insights</li>
+            </ul>
+        </div>
+        <div class="feature-card">
+            <div class="feature-icon">
+                <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
+                    <path d="M21 2v6h-6M3 12a9 9 0 0 1 15-6.7L21 8M3 12a9 9 0 0 0 15 6.7L21 16M21 22v-6h-6"/>
+                </svg>
+            </div>
+            <h3 class="feature-title">Updated Periodically</h3>
+            <p class="feature-description">Regular updates with latest models:</p>
+            <ul class="feature-list">
+                <li>11 private models evaluated</li>
+                <li>5 open source models included</li>
+                <li>Monthly model additions</li>
+            </ul>
         </div>
     </div>
+</div>
 """