Spaces:
Running
Running
feat: add Model Size vs Performance
Browse files- requirements.txt +1 -0
- src/app.py +3 -47
- src/components/visualizations.py +156 -94
requirements.txt
CHANGED
@@ -6,3 +6,4 @@ plotly>=5.18.0
|
|
6 |
httpx>=0.25.1
|
7 |
pydantic-settings>=2.0.3
|
8 |
firebase-admin==6.6.0
|
|
|
|
6 |
httpx>=0.25.1
|
7 |
pydantic-settings>=2.0.3
|
8 |
firebase-admin==6.6.0
|
9 |
+
statsmodels>=0.14.1
|
src/app.py
CHANGED
@@ -168,53 +168,9 @@ async def main():
|
|
168 |
|
169 |
# Render plot section
|
170 |
st.markdown("---")
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
plot_col1, plot_col2, plot_col3 = st.columns(3)
|
175 |
-
|
176 |
-
with plot_col1:
|
177 |
-
plot_model = st.selectbox(
|
178 |
-
"Select Model for Comparison",
|
179 |
-
options=models,
|
180 |
-
key="plot_model_selector",
|
181 |
-
)
|
182 |
-
|
183 |
-
with plot_col2:
|
184 |
-
pp_options = sorted([int(x) for x in df["PP Config"].unique()])
|
185 |
-
default_pp_index = (
|
186 |
-
pp_options.index(std.PP_CONFIG)
|
187 |
-
if std.PP_CONFIG in pp_options
|
188 |
-
else 0
|
189 |
-
)
|
190 |
-
plot_pp = st.selectbox(
|
191 |
-
"Select PP Config for Comparison",
|
192 |
-
options=pp_options,
|
193 |
-
key="plot_pp_selector",
|
194 |
-
index=default_pp_index,
|
195 |
-
)
|
196 |
-
|
197 |
-
with plot_col3:
|
198 |
-
tg_options = sorted([int(x) for x in df["TG Config"].unique()])
|
199 |
-
default_tg_index = (
|
200 |
-
tg_options.index(std.TG_CONFIG)
|
201 |
-
if std.TG_CONFIG in tg_options
|
202 |
-
else 0
|
203 |
-
)
|
204 |
-
plot_tg = st.selectbox(
|
205 |
-
"Select TG Config for Comparison",
|
206 |
-
options=tg_options,
|
207 |
-
key="plot_tg_selector",
|
208 |
-
index=default_tg_index,
|
209 |
-
)
|
210 |
-
|
211 |
-
# Create plot filters based on table filters but override the model and configs
|
212 |
-
plot_filters = table_filters.copy()
|
213 |
-
plot_filters["model"] = plot_model
|
214 |
-
plot_filters["pp_range"] = (plot_pp, plot_pp) # Set exact PP value
|
215 |
-
plot_filters["tg_range"] = (plot_tg, plot_tg) # Set exact TG value
|
216 |
-
|
217 |
-
render_performance_plots(df, plot_filters)
|
218 |
|
219 |
with guide_col:
|
220 |
render_contribution_guide()
|
|
|
168 |
|
169 |
# Render plot section
|
170 |
st.markdown("---")
|
171 |
+
|
172 |
+
# Render performance plots with table filters
|
173 |
+
render_performance_plots(df, table_filters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
with guide_col:
|
176 |
render_contribution_guide()
|
src/components/visualizations.py
CHANGED
@@ -6,6 +6,7 @@ import streamlit as st
|
|
6 |
import plotly.express as px
|
7 |
import pandas as pd
|
8 |
from typing import Optional, Dict, List, Set
|
|
|
9 |
|
10 |
|
11 |
def create_performance_plot(
|
@@ -105,112 +106,173 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
|
|
105 |
return filtered_df
|
106 |
|
107 |
|
108 |
-
def
|
109 |
-
"""
|
110 |
if df.empty:
|
111 |
-
|
112 |
-
return
|
113 |
|
114 |
-
#
|
115 |
-
|
116 |
-
if
|
117 |
-
|
118 |
-
return
|
119 |
|
120 |
-
#
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
-
#
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
# Include config values
|
139 |
-
agg_dict.update(
|
140 |
-
{
|
141 |
-
"PP Config": "first",
|
142 |
-
"TG Config": "first",
|
143 |
-
}
|
144 |
)
|
145 |
|
146 |
-
#
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
#
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
# Define hover data
|
173 |
-
hover_data = [
|
174 |
-
"CPU Cores",
|
175 |
-
"Peak Memory (GB)",
|
176 |
-
"performance_score",
|
177 |
-
"quant_factor",
|
178 |
-
]
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
)
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
)
|
192 |
-
if fig1:
|
193 |
-
st.plotly_chart(fig1, use_container_width=True)
|
194 |
-
|
195 |
-
with tab2:
|
196 |
-
fig2 = create_performance_plot(
|
197 |
-
plot_group,
|
198 |
-
"PP Avg (t/s)",
|
199 |
-
f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})",
|
200 |
-
hover_data=hover_data,
|
201 |
-
)
|
202 |
-
if fig2:
|
203 |
-
st.plotly_chart(fig2, use_container_width=True)
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
|
216 |
def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
|
6 |
import plotly.express as px
|
7 |
import pandas as pd
|
8 |
from typing import Optional, Dict, List, Set
|
9 |
+
import plotly.graph_objects as go
|
10 |
|
11 |
|
12 |
def create_performance_plot(
|
|
|
106 |
return filtered_df
|
107 |
|
108 |
|
109 |
+
def create_model_size_performance_plot(df: pd.DataFrame, device: str, title: str):
|
110 |
+
"""Create a plot showing model size vs performance metrics for a specific device"""
|
111 |
if df.empty:
|
112 |
+
return None
|
|
|
113 |
|
114 |
+
# Filter for the selected device
|
115 |
+
device_df = df[df["Device"] == device].copy()
|
116 |
+
if device_df.empty:
|
117 |
+
return None
|
|
|
118 |
|
119 |
+
# Create a new figure with secondary y-axis
|
120 |
+
fig = go.Figure()
|
121 |
+
|
122 |
+
# Add Token Generation data (left y-axis)
|
123 |
+
fig.add_trace(
|
124 |
+
go.Scatter(
|
125 |
+
x=device_df["Model Size"],
|
126 |
+
y=device_df["Token Generation"],
|
127 |
+
name="Token Generation",
|
128 |
+
mode="markers",
|
129 |
+
marker=dict(color="#2ecc71"),
|
130 |
+
yaxis="y",
|
131 |
+
)
|
132 |
+
)
|
133 |
|
134 |
+
# Add Prompt Processing data (right y-axis)
|
135 |
+
fig.add_trace(
|
136 |
+
go.Scatter(
|
137 |
+
x=device_df["Model Size"],
|
138 |
+
y=device_df["Prompt Processing"],
|
139 |
+
name="Prompt Processing",
|
140 |
+
mode="markers",
|
141 |
+
marker=dict(color="#e74c3c"),
|
142 |
+
yaxis="y2",
|
143 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
)
|
145 |
|
146 |
+
# Add trend lines if enough points
|
147 |
+
if len(device_df) > 2:
|
148 |
+
# TG trend line
|
149 |
+
tg_trend = px.scatter(
|
150 |
+
device_df, x="Model Size", y="Token Generation", trendline="lowess"
|
151 |
+
).data[
|
152 |
+
1
|
153 |
+
] # Get the trend line trace
|
154 |
+
tg_trend.update(
|
155 |
+
line=dict(color="#2ecc71", dash="solid"),
|
156 |
+
name="TG Trend",
|
157 |
+
showlegend=False,
|
158 |
+
yaxis="y",
|
159 |
+
)
|
160 |
+
fig.add_trace(tg_trend)
|
161 |
+
|
162 |
+
# PP trend line
|
163 |
+
pp_trend = px.scatter(
|
164 |
+
device_df, x="Model Size", y="Prompt Processing", trendline="lowess"
|
165 |
+
).data[
|
166 |
+
1
|
167 |
+
] # Get the trend line trace
|
168 |
+
pp_trend.update(
|
169 |
+
line=dict(color="#e74c3c", dash="solid"),
|
170 |
+
name="PP Trend",
|
171 |
+
showlegend=False,
|
172 |
+
yaxis="y2",
|
173 |
+
)
|
174 |
+
fig.add_trace(pp_trend)
|
175 |
|
176 |
+
# Update layout with two y-axes
|
177 |
+
fig.update_layout(
|
178 |
+
title=title,
|
179 |
+
xaxis=dict(
|
180 |
+
title="Model Size (B)",
|
181 |
+
gridcolor="lightgrey",
|
182 |
+
range=[
|
183 |
+
0,
|
184 |
+
max(device_df["Model Size"]) * 1.05,
|
185 |
+
], # Start from 0, add 5% padding to max
|
186 |
+
),
|
187 |
+
yaxis=dict(
|
188 |
+
title="Token Generation (t/s)",
|
189 |
+
titlefont=dict(color="#2ecc71"),
|
190 |
+
tickfont=dict(color="#2ecc71"),
|
191 |
+
gridcolor="lightgrey",
|
192 |
+
side="left",
|
193 |
+
range=[
|
194 |
+
0,
|
195 |
+
max(device_df["Token Generation"]) * 1.05,
|
196 |
+
], # Start from 0, add 5% padding to max
|
197 |
+
),
|
198 |
+
yaxis2=dict(
|
199 |
+
title="Prompt Processing (t/s)",
|
200 |
+
titlefont=dict(color="#e74c3c"),
|
201 |
+
tickfont=dict(color="#e74c3c"),
|
202 |
+
anchor="x",
|
203 |
+
overlaying="y",
|
204 |
+
side="right",
|
205 |
+
range=[
|
206 |
+
0,
|
207 |
+
max(device_df["Prompt Processing"]) * 1.05,
|
208 |
+
], # Start from 0, add 5% padding to max
|
209 |
+
),
|
210 |
+
height=400,
|
211 |
+
showlegend=True,
|
212 |
+
plot_bgcolor="white",
|
213 |
+
legend=dict(
|
214 |
+
yanchor="middle",
|
215 |
+
y=0.8,
|
216 |
+
xanchor="right",
|
217 |
+
x=0.99,
|
218 |
+
bgcolor="rgba(255, 255, 255, 0.8)", # Semi-transparent white background
|
219 |
+
bordercolor="lightgrey",
|
220 |
+
borderwidth=1,
|
221 |
+
),
|
222 |
+
)
|
223 |
+
|
224 |
+
return fig
|
225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
+
def render_model_size_performance(df: pd.DataFrame, filters: Dict):
|
228 |
+
"""Render the model size vs performance section independently"""
|
229 |
+
if df.empty:
|
230 |
+
st.warning("No data available for plotting.")
|
231 |
+
return
|
232 |
+
|
233 |
+
# Apply only device and platform filters for this section
|
234 |
+
size_perf_df = df.copy()
|
235 |
+
if filters["platform"] != "All":
|
236 |
+
size_perf_df = size_perf_df[size_perf_df["Platform"] == filters["platform"]]
|
237 |
+
if filters["device"] != "All":
|
238 |
+
size_perf_df = size_perf_df[size_perf_df["Device"] == filters["device"]]
|
239 |
+
|
240 |
+
# Device selector for size vs performance plots
|
241 |
+
selected_device = st.selectbox(
|
242 |
+
"Select Device",
|
243 |
+
options=sorted(size_perf_df["Device"].unique()),
|
244 |
+
help="Select a device to view its performance across different model sizes",
|
245 |
+
key="size_perf_device_selector",
|
246 |
)
|
247 |
|
248 |
+
# Create and display the model size vs performance plot
|
249 |
+
size_perf_fig = create_model_size_performance_plot(
|
250 |
+
size_perf_df,
|
251 |
+
selected_device,
|
252 |
+
f"Model Size vs Performance Metrics for {selected_device}",
|
253 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
+
if size_perf_fig:
|
256 |
+
st.plotly_chart(size_perf_fig, use_container_width=True)
|
257 |
+
else:
|
258 |
+
st.warning("No data available for the selected device.")
|
259 |
+
|
260 |
+
|
261 |
+
def render_performance_plots(df: pd.DataFrame, filters: Dict):
|
262 |
+
"""Render performance comparison plots"""
|
263 |
+
if df.empty:
|
264 |
+
st.warning("No data available for plotting.")
|
265 |
+
return
|
266 |
+
|
267 |
+
# Apply filters
|
268 |
+
filtered_df = filter_dataframe(df, filters)
|
269 |
+
if filtered_df.empty:
|
270 |
+
st.warning("No data matches the selected filters for plotting.")
|
271 |
+
return
|
272 |
+
|
273 |
+
# Add Model Size vs Performance section first
|
274 |
+
st.markdown("### 📊 Model Size vs Performance")
|
275 |
+
render_model_size_performance(df, filters)
|
276 |
|
277 |
|
278 |
def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|