agh123 commited on
Commit
4660782
·
1 Parent(s): 6dbec32

feat: add Model Size vs Performance

Browse files
Files changed (3) hide show
  1. requirements.txt +1 -0
  2. src/app.py +3 -47
  3. src/components/visualizations.py +156 -94
requirements.txt CHANGED
@@ -6,3 +6,4 @@ plotly>=5.18.0
6
  httpx>=0.25.1
7
  pydantic-settings>=2.0.3
8
  firebase-admin==6.6.0
 
 
6
  httpx>=0.25.1
7
  pydantic-settings>=2.0.3
8
  firebase-admin==6.6.0
9
+ statsmodels>=0.14.1
src/app.py CHANGED
@@ -168,53 +168,9 @@ async def main():
168
 
169
  # Render plot section
170
  st.markdown("---")
171
- st.title("📊 Performance Comparison")
172
-
173
- # Plot specific selectors in a row
174
- plot_col1, plot_col2, plot_col3 = st.columns(3)
175
-
176
- with plot_col1:
177
- plot_model = st.selectbox(
178
- "Select Model for Comparison",
179
- options=models,
180
- key="plot_model_selector",
181
- )
182
-
183
- with plot_col2:
184
- pp_options = sorted([int(x) for x in df["PP Config"].unique()])
185
- default_pp_index = (
186
- pp_options.index(std.PP_CONFIG)
187
- if std.PP_CONFIG in pp_options
188
- else 0
189
- )
190
- plot_pp = st.selectbox(
191
- "Select PP Config for Comparison",
192
- options=pp_options,
193
- key="plot_pp_selector",
194
- index=default_pp_index,
195
- )
196
-
197
- with plot_col3:
198
- tg_options = sorted([int(x) for x in df["TG Config"].unique()])
199
- default_tg_index = (
200
- tg_options.index(std.TG_CONFIG)
201
- if std.TG_CONFIG in tg_options
202
- else 0
203
- )
204
- plot_tg = st.selectbox(
205
- "Select TG Config for Comparison",
206
- options=tg_options,
207
- key="plot_tg_selector",
208
- index=default_tg_index,
209
- )
210
-
211
- # Create plot filters based on table filters but override the model and configs
212
- plot_filters = table_filters.copy()
213
- plot_filters["model"] = plot_model
214
- plot_filters["pp_range"] = (plot_pp, plot_pp) # Set exact PP value
215
- plot_filters["tg_range"] = (plot_tg, plot_tg) # Set exact TG value
216
-
217
- render_performance_plots(df, plot_filters)
218
 
219
  with guide_col:
220
  render_contribution_guide()
 
168
 
169
  # Render plot section
170
  st.markdown("---")
171
+
172
+ # Render performance plots with table filters
173
+ render_performance_plots(df, table_filters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  with guide_col:
176
  render_contribution_guide()
src/components/visualizations.py CHANGED
@@ -6,6 +6,7 @@ import streamlit as st
6
  import plotly.express as px
7
  import pandas as pd
8
  from typing import Optional, Dict, List, Set
 
9
 
10
 
11
  def create_performance_plot(
@@ -105,112 +106,173 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
105
  return filtered_df
106
 
107
 
108
- def render_performance_plots(df: pd.DataFrame, filters: Dict):
109
- """Render performance comparison plots"""
110
  if df.empty:
111
- st.warning("No data available for plotting.")
112
- return
113
 
114
- # Apply filters
115
- filtered_df = filter_dataframe(df, filters)
116
- if filtered_df.empty:
117
- st.warning("No data matches the selected filters for plotting.")
118
- return
119
 
120
- # Build aggregation dictionary
121
- agg_dict = {
122
- "Prompt Processing": "mean",
123
- "Token Generation": "mean",
124
- "performance_score": "mean",
125
- "quant_factor": "first",
126
- }
 
 
 
 
 
 
 
127
 
128
- # Include memory metrics if available
129
- if "Memory Usage (%)" in filtered_df.columns:
130
- agg_dict["Memory Usage (%)"] = "mean"
131
- if "Peak Memory (GB)" in filtered_df.columns:
132
- agg_dict["Peak Memory (GB)"] = "mean"
133
-
134
- # Include device info if available
135
- if "CPU Cores" in filtered_df.columns:
136
- agg_dict["CPU Cores"] = "first"
137
-
138
- # Include config values
139
- agg_dict.update(
140
- {
141
- "PP Config": "first",
142
- "TG Config": "first",
143
- }
144
  )
145
 
146
- # Group by device and platform for plotting
147
- plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- # Rename columns for display
150
- column_mapping = {
151
- "Prompt Processing": "PP Avg (t/s)",
152
- "Token Generation": "TG Avg (t/s)",
153
- "Memory Usage (%) (mean)": "Memory Usage (%)",
154
- "Peak Memory (GB) (mean)": "Peak Memory (GB)",
155
- "PP Config (first)": "PP Config",
156
- "TG Config (first)": "TG Config",
157
- "Model Size (first)": "Model Size",
158
- "CPU Cores (first)": "CPU Cores",
159
- "Total Memory (GB) (first)": "Total Memory (GB)",
160
- "n_threads (first)": "n_threads",
161
- "flash_attn (first)": "flash_attn",
162
- "cache_type_k (first)": "cache_type_k",
163
- "cache_type_v (first)": "cache_type_v",
164
- "n_context (first)": "n_context",
165
- "n_batch (first)": "n_batch",
166
- "n_ubatch (first)": "n_ubatch",
167
- "performance_score (mean)": "Performance Score",
168
- "quant_factor (first)": "Quant Factor",
169
- }
170
- plot_group = plot_group.rename(columns=column_mapping)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- # Define hover data
173
- hover_data = [
174
- "CPU Cores",
175
- "Peak Memory (GB)",
176
- "performance_score",
177
- "quant_factor",
178
- ]
179
 
180
- # Create plots in tabs
181
- tab1, tab2, tab3 = st.tabs(
182
- ["Token Generation", "Prompt Processing", "Overall Score"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  )
184
 
185
- with tab1:
186
- fig1 = create_performance_plot(
187
- plot_group,
188
- "TG Avg (t/s)",
189
- f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})",
190
- hover_data=hover_data,
191
- )
192
- if fig1:
193
- st.plotly_chart(fig1, use_container_width=True)
194
-
195
- with tab2:
196
- fig2 = create_performance_plot(
197
- plot_group,
198
- "PP Avg (t/s)",
199
- f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})",
200
- hover_data=hover_data,
201
- )
202
- if fig2:
203
- st.plotly_chart(fig2, use_container_width=True)
204
 
205
- with tab3:
206
- fig3 = create_performance_plot(
207
- plot_group,
208
- "performance_score",
209
- "Overall Performance Score (Normalized)",
210
- hover_data=hover_data,
211
- )
212
- if fig3:
213
- st.plotly_chart(fig3, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
 
216
  def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
 
6
  import plotly.express as px
7
  import pandas as pd
8
  from typing import Optional, Dict, List, Set
9
+ import plotly.graph_objects as go
10
 
11
 
12
  def create_performance_plot(
 
106
  return filtered_df
107
 
108
 
109
+ def create_model_size_performance_plot(df: pd.DataFrame, device: str, title: str):
110
+ """Create a plot showing model size vs performance metrics for a specific device"""
111
  if df.empty:
112
+ return None
 
113
 
114
+ # Filter for the selected device
115
+ device_df = df[df["Device"] == device].copy()
116
+ if device_df.empty:
117
+ return None
 
118
 
119
+ # Create a new figure with secondary y-axis
120
+ fig = go.Figure()
121
+
122
+ # Add Token Generation data (left y-axis)
123
+ fig.add_trace(
124
+ go.Scatter(
125
+ x=device_df["Model Size"],
126
+ y=device_df["Token Generation"],
127
+ name="Token Generation",
128
+ mode="markers",
129
+ marker=dict(color="#2ecc71"),
130
+ yaxis="y",
131
+ )
132
+ )
133
 
134
+ # Add Prompt Processing data (right y-axis)
135
+ fig.add_trace(
136
+ go.Scatter(
137
+ x=device_df["Model Size"],
138
+ y=device_df["Prompt Processing"],
139
+ name="Prompt Processing",
140
+ mode="markers",
141
+ marker=dict(color="#e74c3c"),
142
+ yaxis="y2",
143
+ )
 
 
 
 
 
 
144
  )
145
 
146
+ # Add trend lines if enough points
147
+ if len(device_df) > 2:
148
+ # TG trend line
149
+ tg_trend = px.scatter(
150
+ device_df, x="Model Size", y="Token Generation", trendline="lowess"
151
+ ).data[
152
+ 1
153
+ ] # Get the trend line trace
154
+ tg_trend.update(
155
+ line=dict(color="#2ecc71", dash="solid"),
156
+ name="TG Trend",
157
+ showlegend=False,
158
+ yaxis="y",
159
+ )
160
+ fig.add_trace(tg_trend)
161
+
162
+ # PP trend line
163
+ pp_trend = px.scatter(
164
+ device_df, x="Model Size", y="Prompt Processing", trendline="lowess"
165
+ ).data[
166
+ 1
167
+ ] # Get the trend line trace
168
+ pp_trend.update(
169
+ line=dict(color="#e74c3c", dash="solid"),
170
+ name="PP Trend",
171
+ showlegend=False,
172
+ yaxis="y2",
173
+ )
174
+ fig.add_trace(pp_trend)
175
 
176
+ # Update layout with two y-axes
177
+ fig.update_layout(
178
+ title=title,
179
+ xaxis=dict(
180
+ title="Model Size (B)",
181
+ gridcolor="lightgrey",
182
+ range=[
183
+ 0,
184
+ max(device_df["Model Size"]) * 1.05,
185
+ ], # Start from 0, add 5% padding to max
186
+ ),
187
+ yaxis=dict(
188
+ title="Token Generation (t/s)",
189
+ titlefont=dict(color="#2ecc71"),
190
+ tickfont=dict(color="#2ecc71"),
191
+ gridcolor="lightgrey",
192
+ side="left",
193
+ range=[
194
+ 0,
195
+ max(device_df["Token Generation"]) * 1.05,
196
+ ], # Start from 0, add 5% padding to max
197
+ ),
198
+ yaxis2=dict(
199
+ title="Prompt Processing (t/s)",
200
+ titlefont=dict(color="#e74c3c"),
201
+ tickfont=dict(color="#e74c3c"),
202
+ anchor="x",
203
+ overlaying="y",
204
+ side="right",
205
+ range=[
206
+ 0,
207
+ max(device_df["Prompt Processing"]) * 1.05,
208
+ ], # Start from 0, add 5% padding to max
209
+ ),
210
+ height=400,
211
+ showlegend=True,
212
+ plot_bgcolor="white",
213
+ legend=dict(
214
+ yanchor="middle",
215
+ y=0.8,
216
+ xanchor="right",
217
+ x=0.99,
218
+ bgcolor="rgba(255, 255, 255, 0.8)", # Semi-transparent white background
219
+ bordercolor="lightgrey",
220
+ borderwidth=1,
221
+ ),
222
+ )
223
+
224
+ return fig
225
 
 
 
 
 
 
 
 
226
 
227
+ def render_model_size_performance(df: pd.DataFrame, filters: Dict):
228
+ """Render the model size vs performance section independently"""
229
+ if df.empty:
230
+ st.warning("No data available for plotting.")
231
+ return
232
+
233
+ # Apply only device and platform filters for this section
234
+ size_perf_df = df.copy()
235
+ if filters["platform"] != "All":
236
+ size_perf_df = size_perf_df[size_perf_df["Platform"] == filters["platform"]]
237
+ if filters["device"] != "All":
238
+ size_perf_df = size_perf_df[size_perf_df["Device"] == filters["device"]]
239
+
240
+ # Device selector for size vs performance plots
241
+ selected_device = st.selectbox(
242
+ "Select Device",
243
+ options=sorted(size_perf_df["Device"].unique()),
244
+ help="Select a device to view its performance across different model sizes",
245
+ key="size_perf_device_selector",
246
  )
247
 
248
+ # Create and display the model size vs performance plot
249
+ size_perf_fig = create_model_size_performance_plot(
250
+ size_perf_df,
251
+ selected_device,
252
+ f"Model Size vs Performance Metrics for {selected_device}",
253
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ if size_perf_fig:
256
+ st.plotly_chart(size_perf_fig, use_container_width=True)
257
+ else:
258
+ st.warning("No data available for the selected device.")
259
+
260
+
261
+ def render_performance_plots(df: pd.DataFrame, filters: Dict):
262
+ """Render performance comparison plots"""
263
+ if df.empty:
264
+ st.warning("No data available for plotting.")
265
+ return
266
+
267
+ # Apply filters
268
+ filtered_df = filter_dataframe(df, filters)
269
+ if filtered_df.empty:
270
+ st.warning("No data matches the selected filters for plotting.")
271
+ return
272
+
273
+ # Add Model Size vs Performance section first
274
+ st.markdown("### 📊 Model Size vs Performance")
275
+ render_model_size_performance(df, filters)
276
 
277
 
278
  def render_leaderboard_table(df: pd.DataFrame, filters: Dict):