Spaces:

a-ghorbani
/

ai-phone-leaderboard

Running

App Files Files Community

agh123 commited on 10 days ago

Commit

4660782

1 Parent(s): 6dbec32

feat: add Model Size vs Performance

Browse files

Files changed (3) hide show

requirements.txt +1 -0
src/app.py +3 -47
src/components/visualizations.py +156 -94

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ plotly>=5.18.0
 httpx>=0.25.1
 pydantic-settings>=2.0.3
 firebase-admin==6.6.0

 httpx>=0.25.1
 pydantic-settings>=2.0.3
 firebase-admin==6.6.0
+statsmodels>=0.14.1

src/app.py CHANGED Viewed

@@ -168,53 +168,9 @@ async def main():
             # Render plot section
             st.markdown("---")
-            st.title("📊 Performance Comparison")
-            # Plot specific selectors in a row
-            plot_col1, plot_col2, plot_col3 = st.columns(3)
-            with plot_col1:
-                plot_model = st.selectbox(
-                    "Select Model for Comparison",
-                    options=models,
-                    key="plot_model_selector",
-                )
-            with plot_col2:
-                pp_options = sorted([int(x) for x in df["PP Config"].unique()])
-                default_pp_index = (
-                    pp_options.index(std.PP_CONFIG)
-                    if std.PP_CONFIG in pp_options
-                    else 0
-                )
-                plot_pp = st.selectbox(
-                    "Select PP Config for Comparison",
-                    options=pp_options,
-                    key="plot_pp_selector",
-                    index=default_pp_index,
-                )
-            with plot_col3:
-                tg_options = sorted([int(x) for x in df["TG Config"].unique()])
-                default_tg_index = (
-                    tg_options.index(std.TG_CONFIG)
-                    if std.TG_CONFIG in tg_options
-                    else 0
-                )
-                plot_tg = st.selectbox(
-                    "Select TG Config for Comparison",
-                    options=tg_options,
-                    key="plot_tg_selector",
-                    index=default_tg_index,
-                )
-            # Create plot filters based on table filters but override the model and configs
-            plot_filters = table_filters.copy()
-            plot_filters["model"] = plot_model
-            plot_filters["pp_range"] = (plot_pp, plot_pp)  # Set exact PP value
-            plot_filters["tg_range"] = (plot_tg, plot_tg)  # Set exact TG value
-            render_performance_plots(df, plot_filters)
     with guide_col:
         render_contribution_guide()

             # Render plot section
             st.markdown("---")
+            # Render performance plots with table filters
+            render_performance_plots(df, table_filters)
     with guide_col:
         render_contribution_guide()

src/components/visualizations.py CHANGED Viewed

@@ -6,6 +6,7 @@ import streamlit as st
 import plotly.express as px
 import pandas as pd
 from typing import Optional, Dict, List, Set
 def create_performance_plot(
@@ -105,112 +106,173 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
     return filtered_df
-def render_performance_plots(df: pd.DataFrame, filters: Dict):
-    """Render performance comparison plots"""
     if df.empty:
-        st.warning("No data available for plotting.")
-        return
-    # Apply filters
-    filtered_df = filter_dataframe(df, filters)
-    if filtered_df.empty:
-        st.warning("No data matches the selected filters for plotting.")
-        return
-    # Build aggregation dictionary
-    agg_dict = {
-        "Prompt Processing": "mean",
-        "Token Generation": "mean",
-        "performance_score": "mean",
-        "quant_factor": "first",
-    }
-    # Include memory metrics if available
-    if "Memory Usage (%)" in filtered_df.columns:
-        agg_dict["Memory Usage (%)"] = "mean"
-    if "Peak Memory (GB)" in filtered_df.columns:
-        agg_dict["Peak Memory (GB)"] = "mean"
-    # Include device info if available
-    if "CPU Cores" in filtered_df.columns:
-        agg_dict["CPU Cores"] = "first"
-    # Include config values
-    agg_dict.update(
-        {
-            "PP Config": "first",
-            "TG Config": "first",
-        }
     )
-    # Group by device and platform for plotting
-    plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index()
-    # Rename columns for display
-    column_mapping = {
-        "Prompt Processing": "PP Avg (t/s)",
-        "Token Generation": "TG Avg (t/s)",
-        "Memory Usage (%) (mean)": "Memory Usage (%)",
-        "Peak Memory (GB) (mean)": "Peak Memory (GB)",
-        "PP Config (first)": "PP Config",
-        "TG Config (first)": "TG Config",
-        "Model Size (first)": "Model Size",
-        "CPU Cores (first)": "CPU Cores",
-        "Total Memory (GB) (first)": "Total Memory (GB)",
-        "n_threads (first)": "n_threads",
-        "flash_attn (first)": "flash_attn",
-        "cache_type_k (first)": "cache_type_k",
-        "cache_type_v (first)": "cache_type_v",
-        "n_context (first)": "n_context",
-        "n_batch (first)": "n_batch",
-        "n_ubatch (first)": "n_ubatch",
-        "performance_score (mean)": "Performance Score",
-        "quant_factor (first)": "Quant Factor",
-    }
-    plot_group = plot_group.rename(columns=column_mapping)
-    # Define hover data
-    hover_data = [
-        "CPU Cores",
-        "Peak Memory (GB)",
-        "performance_score",
-        "quant_factor",
-    ]
-    # Create plots in tabs
-    tab1, tab2, tab3 = st.tabs(
-        ["Token Generation", "Prompt Processing", "Overall Score"]
     )
-    with tab1:
-        fig1 = create_performance_plot(
-            plot_group,
-            "TG Avg (t/s)",
-            f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})",
-            hover_data=hover_data,
-        )
-        if fig1:
-            st.plotly_chart(fig1, use_container_width=True)
-    with tab2:
-        fig2 = create_performance_plot(
-            plot_group,
-            "PP Avg (t/s)",
-            f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})",
-            hover_data=hover_data,
-        )
-        if fig2:
-            st.plotly_chart(fig2, use_container_width=True)
-    with tab3:
-        fig3 = create_performance_plot(
-            plot_group,
-            "performance_score",
-            "Overall Performance Score (Normalized)",
-            hover_data=hover_data,
-        )
-        if fig3:
-            st.plotly_chart(fig3, use_container_width=True)
 def render_leaderboard_table(df: pd.DataFrame, filters: Dict):

 import plotly.express as px
 import pandas as pd
 from typing import Optional, Dict, List, Set
+import plotly.graph_objects as go
 def create_performance_plot(
     return filtered_df
+def create_model_size_performance_plot(df: pd.DataFrame, device: str, title: str):
+    """Create a plot showing model size vs performance metrics for a specific device"""
     if df.empty:
+        return None
+    # Filter for the selected device
+    device_df = df[df["Device"] == device].copy()
+    if device_df.empty:
+        return None
+    # Create a new figure with secondary y-axis
+    fig = go.Figure()
+    # Add Token Generation data (left y-axis)
+    fig.add_trace(
+        go.Scatter(
+            x=device_df["Model Size"],
+            y=device_df["Token Generation"],
+            name="Token Generation",
+            mode="markers",
+            marker=dict(color="#2ecc71"),
+            yaxis="y",
+        )
+    )
+    # Add Prompt Processing data (right y-axis)
+    fig.add_trace(
+        go.Scatter(
+            x=device_df["Model Size"],
+            y=device_df["Prompt Processing"],
+            name="Prompt Processing",
+            mode="markers",
+            marker=dict(color="#e74c3c"),
+            yaxis="y2",
+        )
     )
+    # Add trend lines if enough points
+    if len(device_df) > 2:
+        # TG trend line
+        tg_trend = px.scatter(
+            device_df, x="Model Size", y="Token Generation", trendline="lowess"
+        ).data[
+            1
+        ]  # Get the trend line trace
+        tg_trend.update(
+            line=dict(color="#2ecc71", dash="solid"),
+            name="TG Trend",
+            showlegend=False,
+            yaxis="y",
+        )
+        fig.add_trace(tg_trend)
+        # PP trend line
+        pp_trend = px.scatter(
+            device_df, x="Model Size", y="Prompt Processing", trendline="lowess"
+        ).data[
+            1
+        ]  # Get the trend line trace
+        pp_trend.update(
+            line=dict(color="#e74c3c", dash="solid"),
+            name="PP Trend",
+            showlegend=False,
+            yaxis="y2",
+        )
+        fig.add_trace(pp_trend)
+    # Update layout with two y-axes
+    fig.update_layout(
+        title=title,
+        xaxis=dict(
+            title="Model Size (B)",
+            gridcolor="lightgrey",
+            range=[
+                0,
+                max(device_df["Model Size"]) * 1.05,
+            ],  # Start from 0, add 5% padding to max
+        ),
+        yaxis=dict(
+            title="Token Generation (t/s)",
+            titlefont=dict(color="#2ecc71"),
+            tickfont=dict(color="#2ecc71"),
+            gridcolor="lightgrey",
+            side="left",
+            range=[
+                0,
+                max(device_df["Token Generation"]) * 1.05,
+            ],  # Start from 0, add 5% padding to max
+        ),
+        yaxis2=dict(
+            title="Prompt Processing (t/s)",
+            titlefont=dict(color="#e74c3c"),
+            tickfont=dict(color="#e74c3c"),
+            anchor="x",
+            overlaying="y",
+            side="right",
+            range=[
+                0,
+                max(device_df["Prompt Processing"]) * 1.05,
+            ],  # Start from 0, add 5% padding to max
+        ),
+        height=400,
+        showlegend=True,
+        plot_bgcolor="white",
+        legend=dict(
+            yanchor="middle",
+            y=0.8,
+            xanchor="right",
+            x=0.99,
+            bgcolor="rgba(255, 255, 255, 0.8)",  # Semi-transparent white background
+            bordercolor="lightgrey",
+            borderwidth=1,
+        ),
+    )
+    return fig
+def render_model_size_performance(df: pd.DataFrame, filters: Dict):
+    """Render the model size vs performance section independently"""
+    if df.empty:
+        st.warning("No data available for plotting.")
+        return
+    # Apply only device and platform filters for this section
+    size_perf_df = df.copy()
+    if filters["platform"] != "All":
+        size_perf_df = size_perf_df[size_perf_df["Platform"] == filters["platform"]]
+    if filters["device"] != "All":
+        size_perf_df = size_perf_df[size_perf_df["Device"] == filters["device"]]
+    # Device selector for size vs performance plots
+    selected_device = st.selectbox(
+        "Select Device",
+        options=sorted(size_perf_df["Device"].unique()),
+        help="Select a device to view its performance across different model sizes",
+        key="size_perf_device_selector",
     )
+    # Create and display the model size vs performance plot
+    size_perf_fig = create_model_size_performance_plot(
+        size_perf_df,
+        selected_device,
+        f"Model Size vs Performance Metrics for {selected_device}",
+    )
+    if size_perf_fig:
+        st.plotly_chart(size_perf_fig, use_container_width=True)
+    else:
+        st.warning("No data available for the selected device.")
+def render_performance_plots(df: pd.DataFrame, filters: Dict):
+    """Render performance comparison plots"""
+    if df.empty:
+        st.warning("No data available for plotting.")
+        return
+    # Apply filters
+    filtered_df = filter_dataframe(df, filters)
+    if filtered_df.empty:
+        st.warning("No data matches the selected filters for plotting.")
+        return
+    # Add Model Size vs Performance section first
+    st.markdown("### 📊 Model Size vs Performance")
+    render_model_size_performance(df, filters)
 def render_leaderboard_table(df: pd.DataFrame, filters: Dict):