Pratik Bhavsar commited on
Commit
80c01c6
·
1 Parent(s): fe118de

improved title

Browse files
Files changed (2) hide show
  1. app.py +10 -12
  2. data_loader.py +202 -15
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from data_loader import load_data, CATEGORIES, INSIGHTS, METHODOLOGY, TITLE
3
  from utils import model_info_tab, filter_leaderboard
4
  from visualization import setup_matplotlib
5
 
@@ -31,10 +31,17 @@ def create_app():
31
  )
32
 
33
  with gr.Column(scale=4):
34
- gr.Markdown(TITLE)
 
35
  output = gr.HTML()
36
  plot1 = gr.Plot()
37
  plot2 = gr.Plot()
 
 
 
 
 
 
38
 
39
  for input_comp in [model_type, category, sort_by]:
40
  input_comp.change(
@@ -44,7 +51,7 @@ def create_app():
44
  )
45
 
46
  with gr.Tab("Model Performance"):
47
- gr.Markdown(TITLE)
48
  with gr.Row():
49
  with gr.Column(scale=1):
50
  model_selector = gr.Dropdown(
@@ -65,14 +72,6 @@ def create_app():
65
  outputs=[model_info, radar_plot],
66
  )
67
 
68
- with gr.Tab("Methodology"):
69
- gr.Markdown(TITLE)
70
- gr.Markdown(METHODOLOGY)
71
-
72
- with gr.Tab("Insights"):
73
- gr.Markdown(TITLE)
74
- gr.Markdown(INSIGHTS)
75
-
76
  app.load(
77
  fn=lambda: filter_leaderboard(
78
  df, "All", list(CATEGORIES.keys())[0], "Performance"
@@ -90,7 +89,6 @@ def create_app():
90
  return app
91
 
92
 
93
- # main.py
94
  if __name__ == "__main__":
95
  demo = create_app()
96
  demo.launch()
 
1
  import gradio as gr
2
+ from data_loader import load_data, CATEGORIES, INSIGHTS, METHODOLOGY, HEADER_CONTENT
3
  from utils import model_info_tab, filter_leaderboard
4
  from visualization import setup_matplotlib
5
 
 
31
  )
32
 
33
  with gr.Column(scale=4):
34
+ # Add the new header content above everything
35
+ gr.HTML(HEADER_CONTENT)
36
  output = gr.HTML()
37
  plot1 = gr.Plot()
38
  plot2 = gr.Plot()
39
+ # Add methodology section
40
+ gr.Markdown("# Methodology")
41
+ gr.Markdown(METHODOLOGY)
42
+ # Add insights section
43
+ gr.Markdown("# Key Insights")
44
+ gr.Markdown(INSIGHTS)
45
 
46
  for input_comp in [model_type, category, sort_by]:
47
  input_comp.change(
 
51
  )
52
 
53
  with gr.Tab("Model Performance"):
54
+ gr.HTML(HEADER_CONTENT)
55
  with gr.Row():
56
  with gr.Column(scale=1):
57
  model_selector = gr.Dropdown(
 
72
  outputs=[model_info, radar_plot],
73
  )
74
 
 
 
 
 
 
 
 
 
75
  app.load(
76
  fn=lambda: filter_leaderboard(
77
  df, "All", list(CATEGORIES.keys())[0], "Performance"
 
89
  return app
90
 
91
 
 
92
  if __name__ == "__main__":
93
  demo = create_app()
94
  demo.launch()
data_loader.py CHANGED
@@ -77,22 +77,209 @@ METHODOLOGY = """
77
  | | 100 | Composite | BFCL_v3_multi_turn_composite | Tests overall robustness in complex scenarios |
78
  """
79
 
80
- TITLE = """
81
- <div style="text-align: center; margin-bottom: 2rem;">
82
- <h1 style="margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold;">Agent Leaderboard</h1>
83
- <div style="display: flex; justify-content: center; gap: 2rem; font-size: 1.1rem;">
84
- <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
85
- Blog&nbsp;📝
86
- </a>
87
- <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
88
- GitHub&nbsp;⭐
89
- </a>
90
- <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
91
- Dataset&nbsp;📊
92
- </a>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  </div>
94
- <div style="margin-top: 1rem; color: #6B7280; font-size: 0.9rem;">
95
- Last updated: January 2025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  </div>
97
  </div>
 
98
  """
 
77
  | | 100 | Composite | BFCL_v3_multi_turn_composite | Tests overall robustness in complex scenarios |
78
  """
79
 
80
+ HEADER_CONTENT = """
81
+ <style>
82
+ .header-wrapper {
83
+ padding: 3rem 2rem;
84
+ background: rgb(17, 17, 27);
85
+ border-radius: 16px;
86
+ display: flex;
87
+ flex-direction: column;
88
+ align-items: center;
89
+ text-align: center;
90
+ }
91
+
92
+ .title {
93
+ color: #ffffff;
94
+ font-size: 2.5rem;
95
+ font-weight: 600;
96
+ margin-bottom: 1.5rem;
97
+ text-align: center;
98
+ }
99
+
100
+ .description {
101
+ color: #ffffff;
102
+ font-size: 1.1rem;
103
+ line-height: 1.6;
104
+ max-width: 800px;
105
+ margin: 0 auto 2rem;
106
+ text-align: center;
107
+ }
108
+
109
+ .actions {
110
+ display: flex;
111
+ gap: 1rem;
112
+ justify-content: center;
113
+ margin-bottom: 2rem;
114
+ }
115
+
116
+ .action-button {
117
+ display: flex;
118
+ align-items: center;
119
+ gap: 0.5rem;
120
+ padding: 0.75rem 1.5rem;
121
+ background: rgba(30, 30, 45, 0.95);
122
+ border: 1px solid rgba(255, 255, 255, 0.1);
123
+ border-radius: 100px;
124
+ color: #fff;
125
+ text-decoration: none;
126
+ font-size: 0.95rem;
127
+ transition: all 0.2s ease;
128
+ }
129
+
130
+ .action-button:hover {
131
+ background: rgba(40, 40, 55, 0.95);
132
+ border-color: rgba(255, 255, 255, 0.2);
133
+ }
134
+
135
+ .update-info {
136
+ color: #94a3b8;
137
+ font-size: 0.9rem;
138
+ margin-bottom: 3rem;
139
+ }
140
+
141
+ .features-grid {
142
+ display: grid;
143
+ grid-template-columns: repeat(3, 1fr);
144
+ gap: 1.5rem;
145
+ width: 100%;
146
+ max-width: 1200px;
147
+ }
148
+
149
+ .feature-card {
150
+ background: rgba(17, 17, 27, 0.6);
151
+ border: 1px solid rgba(255, 255, 255, 0.1);
152
+ border-radius: 16px;
153
+ padding: 2rem;
154
+ text-align: left;
155
+ }
156
+
157
+ .feature-icon {
158
+ background: rgba(79, 70, 229, 0.1);
159
+ width: 40px;
160
+ height: 40px;
161
+ border-radius: 12px;
162
+ display: flex;
163
+ align-items: center;
164
+ justify-content: center;
165
+ margin-bottom: 1.5rem;
166
+ }
167
+
168
+ .feature-title {
169
+ color: #ffffff;
170
+ font-size: 1.25rem;
171
+ font-weight: 600;
172
+ margin-bottom: 1rem;
173
+ }
174
+
175
+ .feature-description {
176
+ color: #94a3b8;
177
+ font-size: 0.95rem;
178
+ margin-bottom: 1.5rem;
179
+ }
180
+
181
+ .feature-list {
182
+ list-style: none;
183
+ padding: 0;
184
+ margin: 0;
185
+ display: flex;
186
+ flex-direction: column;
187
+ gap: 0.75rem;
188
+ }
189
+
190
+ .feature-list li {
191
+ color: #e2e8f0;
192
+ font-size: 0.95rem;
193
+ display: flex;
194
+ align-items: center;
195
+ gap: 0.5rem;
196
+ }
197
+
198
+ .feature-list li::before {
199
+ content: '';
200
+ width: 6px;
201
+ height: 6px;
202
+ background: #4F46E5;
203
+ border-radius: 50%;
204
+ flex-shrink: 0;
205
+ }
206
+ </style>
207
+
208
+ <div class="header-wrapper">
209
+ <h1 class="title">Agent Leaderboard</h1>
210
+ <p class="description">
211
+ A comprehensive benchmark for evaluating AI agents in real-world business scenarios, comparing practical performance across multiple domains and use cases.
212
+ </p>
213
+
214
+ <div class="actions">
215
+ <a href="#" class="action-button">
216
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
217
+ <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
218
+ <line x1="8" y1="12" x2="16" y2="12"/>
219
+ </svg>
220
+ Blog
221
+ </a>
222
+ <a href="#" class="action-button">
223
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
224
+ <path d="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
225
+ </svg>
226
+ GitHub
227
+ </a>
228
+ <a href="#" class="action-button">
229
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
230
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
231
+ <polyline points="7 10 12 15 17 10"/>
232
+ <line x1="12" y1="15" x2="12" y2="3"/>
233
+ </svg>
234
+ Dataset
235
+ </a>
236
+ </div>
237
+
238
+ <div class="features-grid">
239
+ <div class="feature-card">
240
+ <div class="feature-icon">
241
+ <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
242
+ <path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z"/>
243
+ </svg>
244
+ </div>
245
+ <h3 class="feature-title">360° Domain Evaluation</h3>
246
+ <p class="feature-description">Comprehensive evaluation across multiple benchmarks and domains:</p>
247
+ <ul class="feature-list">
248
+ <li>Cross-domain evaluation</li>
249
+ <li>Real-world use cases</li>
250
+ <li>Edge case evaluation</li>
251
+ </ul>
252
  </div>
253
+
254
+ <div class="feature-card">
255
+ <div class="feature-icon">
256
+ <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
257
+ <path d="M22 12h-4l-3 9L9 3l-3 9H2"/>
258
+ </svg>
259
+ </div>
260
+ <h3 class="feature-title">Make Better Decisions</h3>
261
+ <p class="feature-description">Beyond technical metrics, we provide:</p>
262
+ <ul class="feature-list">
263
+ <li>Cost-effectiveness analysis</li>
264
+ <li>Business impact metrics</li>
265
+ <li>Vendor strategy insights</li>
266
+ </ul>
267
+ </div>
268
+
269
+ <div class="feature-card">
270
+ <div class="feature-icon">
271
+ <svg width="24" height="24" fill="none" stroke="#4F46E5" stroke-width="2" viewBox="0 0 24 24">
272
+ <path d="M21 2v6h-6M3 12a9 9 0 0 1 15-6.7L21 8M3 12a9 9 0 0 0 15 6.7L21 16M21 22v-6h-6"/>
273
+ </svg>
274
+ </div>
275
+ <h3 class="feature-title">Updated Periodically</h3>
276
+ <p class="feature-description">Regular updates with latest models:</p>
277
+ <ul class="feature-list">
278
+ <li>11 private models evaluated</li>
279
+ <li>5 open source models included</li>
280
+ <li>Monthly model additions</li>
281
+ </ul>
282
  </div>
283
  </div>
284
+ </div>
285
  """