agh123 commited on
Commit
d2c5913
·
1 Parent(s): 9543568

update the code based on data format change

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ src/static/images/Bench.gif filter=lfs diff=lfs merge=lfs -text
main.py CHANGED
@@ -1,79 +1,11 @@
1
- import streamlit as st
2
- import asyncio
3
- from src.core.styles import CUSTOM_CSS
4
- from src.components.header import render_header
5
- from src.components.filters import render_table_filters, render_plot_filters
6
- from src.components.visualizations import (
7
- render_performance_plots,
8
- render_leaderboard_table,
9
- )
10
- from src.services.firebase import fetch_leaderboard_data
11
-
12
- # Configure the page
13
- st.set_page_config(
14
- page_title="AI-Phone Leaderboard",
15
- page_icon="src/static/images/favicon.png",
16
- layout="wide",
17
- initial_sidebar_state="expanded",
18
- )
19
-
20
- # Apply custom CSS
21
- st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
22
-
23
- async def main():
24
- # Render header
25
- render_header()
26
-
27
- # Fetch initial data
28
- full_df = await fetch_leaderboard_data()
29
- if full_df.empty:
30
- st.info("No benchmark data available yet!")
31
- return
32
-
33
- # Get unique values for filters
34
- models = sorted(full_df["Model"].unique())
35
- benchmarks = sorted(full_df["Benchmark"].unique())
36
- platforms = sorted(full_df["Platform"].unique())
37
- devices = sorted(full_df["Normalized Device ID"].unique())
38
-
39
- # Render table filters and get selections
40
- (
41
- selected_model_table,
42
- selected_benchmark_table,
43
- selected_platform_table,
44
- selected_device_table,
45
- ) = render_table_filters(models, benchmarks, platforms, devices)
46
 
47
- # Filter data for table
48
- table_df = full_df.copy()
49
- if selected_model_table != "All":
50
- table_df = table_df[table_df["Model"] == selected_model_table]
51
- if selected_benchmark_table != "All":
52
- table_df = table_df[table_df["Benchmark"] == selected_benchmark_table]
53
- if selected_platform_table != "All":
54
- table_df = table_df[table_df["Platform"] == selected_platform_table]
55
- if selected_device_table != "All":
56
- table_df = table_df[table_df["Normalized Device ID"] == selected_device_table]
57
-
58
- # Render leaderboard table
59
- render_leaderboard_table(table_df)
60
-
61
- # Performance plots section
62
- st.subheader("Performance Comparison")
63
-
64
- # Render plot filters and get selections
65
- selected_model_plot, selected_benchmark_plot = render_plot_filters(
66
- models, benchmarks
67
- )
68
-
69
- # Filter data for plots
70
- plot_df = full_df[
71
- (full_df["Model"] == selected_model_plot)
72
- & (full_df["Benchmark"] == selected_benchmark_plot)
73
- ]
74
-
75
- # Render performance plots
76
- render_performance_plots(plot_df, selected_model_plot)
77
 
78
  if __name__ == "__main__":
79
- asyncio.run(main())
 
1
+ """
2
+ Main module for the frontend application.
3
+ This file serves as a module init file.
4
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ import asyncio
7
+ import streamlit as st
8
+ from src.app import main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  if __name__ == "__main__":
11
+ asyncio.run(main())
src/app.py CHANGED
@@ -1,15 +1,54 @@
1
  import asyncio
2
- from typing import Optional
3
  import pandas as pd
 
4
 
5
- async def fetch_and_filter_data(
6
- model_name: Optional[str] = None,
7
- benchmark_label: Optional[str] = None
8
- ) -> pd.DataFrame:
9
- """Fetch and filter data based on parameters"""
10
- from .services.firebase import fetch_leaderboard_data
11
-
12
- return await fetch_leaderboard_data(
13
- model_name=model_name,
14
- benchmark_label=benchmark_label
15
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
+ import streamlit as st
3
  import pandas as pd
4
+ from typing import Optional, List, Set
5
 
6
+ from .components.filters import render_table_filters, render_plot_filters
7
+ from .components.visualizations import (
8
+ render_leaderboard_table,
9
+ render_performance_plots,
10
+ )
11
+ from .services.firebase import fetch_leaderboard_data
12
+
13
+
14
+ def get_unique_values(df: pd.DataFrame) -> tuple[List[str], List[str], List[str]]:
15
+ """Get unique values for filters"""
16
+ models = sorted(df["Model ID"].unique().tolist())
17
+ platforms = sorted(df["Platform"].unique().tolist())
18
+ devices = sorted(df["Device"].unique().tolist())
19
+ return models, platforms, devices
20
+
21
+
22
+ async def main():
23
+ """Main application entry point"""
24
+ st.set_page_config(
25
+ page_title="AI Phone Benchmark Leaderboard",
26
+ page_icon="📱",
27
+ layout="wide",
28
+ )
29
+
30
+ # Fetch initial data
31
+ df = await fetch_leaderboard_data()
32
+
33
+ if df.empty:
34
+ st.error("No data available. Please check your connection and try again.")
35
+ return
36
+
37
+ # Get unique values for filters
38
+ models, platforms, devices = get_unique_values(df)
39
+
40
+ # Render table filters in sidebar
41
+ table_filters = render_table_filters(models, platforms, devices)
42
+
43
+ # Render the main leaderboard table
44
+ st.title("📱 AI Phone Benchmark Leaderboard")
45
+ render_leaderboard_table(df, table_filters)
46
+
47
+ # Render plot section
48
+ st.title("📊 Performance Comparison")
49
+ plot_filters = render_plot_filters(models, platforms, devices)
50
+ render_performance_plots(df, plot_filters)
51
+
52
+
53
+ if __name__ == "__main__":
54
+ asyncio.run(main())
src/components/filters.py CHANGED
@@ -1,50 +1,206 @@
1
  import streamlit as st
2
- from typing import List, Tuple
3
 
4
- def render_table_filters(
5
- models: List[str],
6
- benchmarks: List[str],
7
- platforms: List[str],
8
- devices: List[str]
9
- ) -> Tuple[str, str, str, str]:
10
- """Render and handle table filters"""
11
- table_filters = st.container()
12
- with table_filters:
13
- t1, t2, t3, t4 = st.columns(4)
14
- with t1:
15
- selected_model = st.selectbox(
16
- "Model", ["All"] + list(models), key="table_model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
- with t2:
19
- selected_benchmark = st.selectbox(
20
- "Benchmark", ["All"] + list(benchmarks), key="table_benchmark"
21
  )
22
- with t3:
23
- selected_platform = st.selectbox(
24
- "Platform", ["All"] + list(platforms), key="table_platform"
 
25
  )
26
- with t4:
27
- selected_device = st.selectbox(
28
- "Device", ["All"] + list(devices), key="table_device"
29
  )
30
-
31
- return selected_model, selected_benchmark, selected_platform, selected_device
 
 
 
 
 
 
32
 
33
  def render_plot_filters(
34
- models: List[str],
35
- benchmarks: List[str]
36
- ) -> Tuple[str, str]:
37
  """Render and handle plot filters"""
38
  plot_filters = st.container()
39
  with plot_filters:
40
- p1, p2 = st.columns(2)
41
  with p1:
42
- selected_model = st.selectbox(
43
- "Model for Comparison", models, key="plot_model"
44
- )
45
  with p2:
46
- selected_benchmark = st.selectbox(
47
- "Benchmark for Comparison", benchmarks, key="plot_benchmark"
48
  )
49
-
50
- return selected_model, selected_benchmark
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from typing import List, Tuple, Dict, Set
3
 
4
+
5
+ def render_grouping_options() -> List[str]:
6
+ """Render grouping options selector"""
7
+ available_groups = [
8
+ "Model ID",
9
+ "Device",
10
+ "Platform",
11
+ "n_threads",
12
+ "flash_attn",
13
+ "cache_type_k",
14
+ "cache_type_v",
15
+ "PP Value",
16
+ "TG Value",
17
+ ]
18
+
19
+ default_groups = ["Model ID", "Device", "Platform"]
20
+
21
+ selected_groups = st.multiselect(
22
+ "Group Results By",
23
+ options=available_groups,
24
+ default=default_groups,
25
+ help="Select columns to group the results by",
26
+ )
27
+
28
+ return selected_groups
29
+
30
+
31
+ def render_column_visibility() -> Set[str]:
32
+ """Render column visibility selector"""
33
+ column_categories = {
34
+ "Device Info": [
35
+ "Device",
36
+ "Platform",
37
+ "CPU Cores",
38
+ "Total Memory (GB)",
39
+ "Memory Usage (%)",
40
+ ],
41
+ "Benchmark Info": [
42
+ "PP Value",
43
+ "TG Value",
44
+ "Prompt Processing",
45
+ "Token Generation",
46
+ ],
47
+ "Model Info": [
48
+ "Model",
49
+ "Model Size",
50
+ "Model ID",
51
+ ],
52
+ "Advanced": [
53
+ "n_threads",
54
+ "flash_attn",
55
+ "cache_type_k",
56
+ "cache_type_v",
57
+ ],
58
+ }
59
+
60
+ # Default visible columns
61
+ default_columns = {
62
+ "Device",
63
+ "Platform",
64
+ "Model",
65
+ "Model Size",
66
+ "Prompt Processing",
67
+ "Token Generation",
68
+ }
69
+
70
+ with st.expander("Column Visibility", expanded=False):
71
+ selected_columns = set()
72
+ for category, columns in column_categories.items():
73
+ st.subheader(category)
74
+ for col in columns:
75
+ if st.checkbox(col, value=col in default_columns):
76
+ selected_columns.add(col)
77
+
78
+ return selected_columns
79
+
80
+
81
+ def render_benchmark_filters() -> Dict:
82
+ """Render advanced benchmark configuration filters"""
83
+ with st.expander("Benchmark Configuration", expanded=False):
84
+ use_custom_config = st.checkbox("Use Custom PP/TG Values", value=False)
85
+
86
+ if use_custom_config:
87
+ col1, col2 = st.columns(2)
88
+ with col1:
89
+ pp_min = st.number_input("Min PP", value=0, step=32)
90
+ pp_max = st.number_input("Max PP", value=1024, step=32)
91
+ with col2:
92
+ tg_min = st.number_input("Min TG", value=0, step=32)
93
+ tg_max = st.number_input("Max TG", value=512, step=32)
94
+ else:
95
+ pp_min = pp_max = tg_min = tg_max = None
96
+
97
+ return {
98
+ "use_custom_config": use_custom_config,
99
+ "pp_range": (pp_min, pp_max),
100
+ "tg_range": (tg_min, tg_max),
101
+ }
102
+
103
+
104
+ def render_advanced_filters() -> Dict:
105
+ """Render advanced settings filters"""
106
+ with st.expander("Advanced Settings", expanded=False):
107
+ col1, col2 = st.columns(2)
108
+
109
+ with col1:
110
+ n_threads = st.multiselect(
111
+ "Number of Threads", options=[1, 2, 4, 8, 16], default=None
112
  )
113
+ flash_attn = st.multiselect(
114
+ "Flash Attention", options=[True, False], default=None
 
115
  )
116
+
117
+ with col2:
118
+ cache_type = st.multiselect(
119
+ "Cache Type", options=["f16", "f32"], default=None
120
  )
121
+ memory_usage = st.slider(
122
+ "Max Memory Usage (%)", min_value=0, max_value=100, value=100
 
123
  )
124
+
125
+ return {
126
+ "n_threads": n_threads,
127
+ "flash_attn": flash_attn,
128
+ "cache_type": cache_type,
129
+ "max_memory_usage": memory_usage,
130
+ }
131
+
132
 
133
  def render_plot_filters(
134
+ models: List[str], platforms: List[str], devices: List[str]
135
+ ) -> Dict:
 
136
  """Render and handle plot filters"""
137
  plot_filters = st.container()
138
  with plot_filters:
139
+ p1, p2, p3 = st.columns(3)
140
  with p1:
141
+ selected_model = st.selectbox("Model for Plot", models, key="plot_model")
 
 
142
  with p2:
143
+ selected_platform = st.selectbox(
144
+ "Platform for Plot", ["All"] + list(platforms), key="plot_platform"
145
  )
146
+ with p3:
147
+ selected_device = st.selectbox(
148
+ "Device for Plot", ["All"] + list(devices), key="plot_device"
149
+ )
150
+
151
+ # Use the same benchmark and advanced filters as the table
152
+ benchmark_config = render_benchmark_filters()
153
+ advanced_settings = render_advanced_filters()
154
+
155
+ return {
156
+ "basic_filters": {
157
+ "model": selected_model,
158
+ "platform": selected_platform,
159
+ "device": selected_device,
160
+ },
161
+ "benchmark_config": benchmark_config,
162
+ "advanced_settings": advanced_settings,
163
+ }
164
+
165
+
166
+ def render_table_filters(
167
+ models: List[str], platforms: List[str], devices: List[str]
168
+ ) -> Dict:
169
+ """Render and handle all table filters"""
170
+ st.sidebar.title("Filters")
171
+
172
+ # Basic filters
173
+ selected_model = st.sidebar.selectbox(
174
+ "Model", ["All"] + list(models), key="table_model"
175
+ )
176
+ selected_platform = st.sidebar.selectbox(
177
+ "Platform", ["All"] + list(platforms), key="table_platform"
178
+ )
179
+ selected_device = st.sidebar.selectbox(
180
+ "Device", ["All"] + list(devices), key="table_device"
181
+ )
182
+
183
+ # Grouping options
184
+ st.sidebar.title("Display Options")
185
+ grouping = render_grouping_options()
186
+
187
+ # Column visibility
188
+ visible_columns = render_column_visibility()
189
+
190
+ # Benchmark configuration
191
+ benchmark_config = render_benchmark_filters()
192
+
193
+ # Advanced settings
194
+ advanced_settings = render_advanced_filters()
195
+
196
+ return {
197
+ "basic_filters": {
198
+ "model": selected_model,
199
+ "platform": selected_platform,
200
+ "device": selected_device,
201
+ },
202
+ "grouping": grouping,
203
+ "visible_columns": visible_columns,
204
+ "benchmark_config": benchmark_config,
205
+ "advanced_settings": advanced_settings,
206
+ }
src/components/visualizations.py CHANGED
@@ -1,7 +1,8 @@
1
  import streamlit as st
2
  import plotly.express as px
3
  import pandas as pd
4
- from typing import Optional
 
5
 
6
  def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
7
  """Create a performance comparison plot"""
@@ -27,93 +28,275 @@ def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
27
  )
28
  return fig
29
 
30
- def render_performance_plots(plot_df: pd.DataFrame, model_name: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  """Render performance comparison plots"""
32
- if plot_df.empty:
33
- st.warning(
34
- "No data available for the selected model and benchmark combination."
35
- )
36
  return
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  col1, col2 = st.columns(2)
39
  with col1:
40
  fig1 = create_performance_plot(
41
- plot_df,
42
  "Prompt Processing",
43
- f"Prompt Processing Time - {model_name}",
44
  )
45
  if fig1:
46
  st.plotly_chart(fig1, use_container_width=True)
47
 
48
  with col2:
49
  fig2 = create_performance_plot(
50
- plot_df,
51
  "Token Generation",
52
- f"Token Generation Time - {model_name}",
53
  )
54
  if fig2:
55
  st.plotly_chart(fig2, use_container_width=True)
56
 
57
- def render_leaderboard_table(df: pd.DataFrame):
 
58
  """Render the leaderboard table with grouped and formatted data"""
59
- # Group and average the results
60
- grouped_df = (
61
- df.groupby(
62
- ["Model ID", "Benchmark", "Normalized Device ID", "Platform", "Device", "Model Size", "CPU Cores"]
63
- )
64
- .agg(
65
- {
66
- "Prompt Processing": ["mean", "count", "std"],
67
- "Token Generation": ["mean", "std"],
68
- }
69
- )
70
- .reset_index()
 
 
 
 
71
  )
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # Flatten column names
74
  grouped_df.columns = [
75
  col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
76
  ]
77
 
 
 
 
 
 
 
78
  # Round numeric columns
79
  numeric_cols = [
80
- "Prompt Processing (mean)",
81
- "Prompt Processing (std)",
82
- "Token Generation (mean)",
83
- "Token Generation (std)",
84
  ]
85
  grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
86
 
87
  # Rename columns for display
88
- grouped_df = grouped_df.rename(
89
- columns={
90
- "Prompt Processing (mean)": "PP Avg (s)",
91
- "Prompt Processing (std)": "PP Std",
92
- "Prompt Processing (count)": "Runs",
93
- "Token Generation (mean)": "TG Avg (s)",
94
- "Token Generation (std)": "TG Std",
95
- }
96
- )
 
 
 
97
 
98
- # Reorder columns for display
99
- display_cols = [
100
- "Platform",
101
- "Device",
102
- "Model ID",
103
- "Model Size",
104
- "Benchmark",
105
- "TG Avg (s)",
106
- "TG Std",
107
- "PP Avg (s)",
108
- "PP Std",
109
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  # Display the filtered and grouped table
112
  st.dataframe(
113
- grouped_df[display_cols].sort_values(
114
- ["Model Size", "Benchmark", "TG Avg (s)"],
115
- ascending=[False, True, True],
116
- ),
117
  use_container_width=True,
118
  height=400,
119
- )
 
1
  import streamlit as st
2
  import plotly.express as px
3
  import pandas as pd
4
+ from typing import Optional, Dict, List, Set
5
+
6
 
7
  def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
8
  """Create a performance comparison plot"""
 
28
  )
29
  return fig
30
 
31
+
32
+ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
33
+ """Apply all filters to the dataframe"""
34
+ if df.empty:
35
+ return df
36
+
37
+ # Basic filters
38
+ basic_filters = filters["basic_filters"]
39
+ if basic_filters["model"] != "All":
40
+ df = df[df["Model ID"] == basic_filters["model"]]
41
+ if basic_filters["platform"] != "All":
42
+ df = df[df["Platform"] == basic_filters["platform"]]
43
+ if basic_filters["device"] != "All":
44
+ df = df[df["Device"] == basic_filters["device"]]
45
+
46
+ # Benchmark configuration filters
47
+ benchmark_config = filters["benchmark_config"]
48
+ if benchmark_config["use_custom_config"]:
49
+ pp_min, pp_max = benchmark_config["pp_range"]
50
+ tg_min, tg_max = benchmark_config["tg_range"]
51
+
52
+ # Extract PP/TG values if not already present
53
+ if "PP Value" not in df.columns:
54
+ df["PP Value"] = df["Benchmark"].apply(
55
+ lambda x: int(x.split("pp: ")[1].split(",")[0])
56
+ )
57
+ if "TG Value" not in df.columns:
58
+ df["TG Value"] = df["Benchmark"].apply(
59
+ lambda x: int(x.split("tg: ")[1].split(")")[0])
60
+ )
61
+
62
+ df = df[
63
+ (df["PP Value"] >= pp_min)
64
+ & (df["PP Value"] <= pp_max)
65
+ & (df["TG Value"] >= tg_min)
66
+ & (df["TG Value"] <= tg_max)
67
+ ]
68
+
69
+ # Advanced settings filters
70
+ advanced = filters["advanced_settings"]
71
+ if advanced["n_threads"]:
72
+ df["n_threads"] = df["initSettings"].apply(lambda x: x.get("n_threads"))
73
+ df = df[df["n_threads"].isin(advanced["n_threads"])]
74
+
75
+ if advanced["flash_attn"]:
76
+ df["flash_attn"] = df["initSettings"].apply(lambda x: x.get("flash_attn"))
77
+ df = df[df["flash_attn"].isin(advanced["flash_attn"])]
78
+
79
+ if advanced["cache_type"]:
80
+ df["cache_type_k"] = df["initSettings"].apply(lambda x: x.get("cache_type_k"))
81
+ df["cache_type_v"] = df["initSettings"].apply(lambda x: x.get("cache_type_v"))
82
+ df = df[
83
+ (df["cache_type_k"].isin(advanced["cache_type"]))
84
+ & (df["cache_type_v"].isin(advanced["cache_type"]))
85
+ ]
86
+
87
+ if advanced["max_memory_usage"] < 100:
88
+ df = df[df["Memory Usage (%)"] <= advanced["max_memory_usage"]]
89
+
90
+ return df
91
+
92
+
93
+ def render_performance_plots(df: pd.DataFrame, filters: Dict):
94
  """Render performance comparison plots"""
95
+ if df.empty:
96
+ st.warning("No data available for plotting.")
 
 
97
  return
98
 
99
+ # Apply filters
100
+ filtered_df = filter_dataframe(df, filters)
101
+ if filtered_df.empty:
102
+ st.warning("No data matches the selected filters for plotting.")
103
+ return
104
+
105
+ # Extract PP/TG values if not already present
106
+ if "PP Value" not in filtered_df.columns:
107
+ filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
108
+ lambda x: int(x.split("pp: ")[1].split(",")[0])
109
+ )
110
+ if "TG Value" not in filtered_df.columns:
111
+ filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
112
+ lambda x: int(x.split("tg: ")[1].split(")")[0])
113
+ )
114
+
115
+ # Extract initSettings if not already present
116
+ if "n_threads" not in filtered_df.columns:
117
+ filtered_df["n_threads"] = filtered_df["initSettings"].apply(
118
+ lambda x: x.get("n_threads")
119
+ )
120
+ filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
121
+ lambda x: x.get("flash_attn")
122
+ )
123
+ filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
124
+ lambda x: x.get("cache_type_k")
125
+ )
126
+ filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
127
+ lambda x: x.get("cache_type_v")
128
+ )
129
+
130
+ # Group by device and platform for plotting
131
+ plot_group = (
132
+ filtered_df.groupby(["Device", "Platform"])
133
+ .agg(
134
+ {
135
+ "Prompt Processing": "mean",
136
+ "Token Generation": "mean",
137
+ "Memory Usage (%)": "mean",
138
+ "Memory Usage (GB)": "mean",
139
+ "CPU Cores": "first",
140
+ "Model Size": "first",
141
+ "PP Value": "first",
142
+ "TG Value": "first",
143
+ }
144
+ )
145
+ .reset_index()
146
+ )
147
+
148
  col1, col2 = st.columns(2)
149
  with col1:
150
  fig1 = create_performance_plot(
151
+ plot_group,
152
  "Prompt Processing",
153
+ f"Prompt Processing Time (PP: {plot_group['PP Value'].iloc[0]})",
154
  )
155
  if fig1:
156
  st.plotly_chart(fig1, use_container_width=True)
157
 
158
  with col2:
159
  fig2 = create_performance_plot(
160
+ plot_group,
161
  "Token Generation",
162
+ f"Token Generation Time (TG: {plot_group['TG Value'].iloc[0]})",
163
  )
164
  if fig2:
165
  st.plotly_chart(fig2, use_container_width=True)
166
 
167
+
168
+ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
169
  """Render the leaderboard table with grouped and formatted data"""
170
+ if df.empty:
171
+ st.warning("No data available for the selected filters.")
172
+ return
173
+
174
+ # Apply filters
175
+ filtered_df = filter_dataframe(df, filters)
176
+ if filtered_df.empty:
177
+ st.warning("No data matches the selected filters.")
178
+ return
179
+
180
+ # Extract settings from benchmark results
181
+ filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
182
+ lambda x: int(x.split("pp: ")[1].split(",")[0])
183
+ )
184
+ filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
185
+ lambda x: int(x.split("tg: ")[1].split(")")[0])
186
  )
187
 
188
+ # Extract initSettings
189
+ filtered_df["n_threads"] = filtered_df["initSettings"].apply(
190
+ lambda x: x.get("n_threads")
191
+ )
192
+ filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
193
+ lambda x: x.get("flash_attn")
194
+ )
195
+ filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
196
+ lambda x: x.get("cache_type_k")
197
+ )
198
+ filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
199
+ lambda x: x.get("cache_type_v")
200
+ )
201
+
202
+ # Group by selected columns
203
+ grouping_cols = filters["grouping"]
204
+ if not grouping_cols:
205
+ grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
206
+
207
+ agg_dict = {
208
+ "Prompt Processing": ["mean", "count", "std"],
209
+ "Token Generation": ["mean", "std"],
210
+ "Memory Usage (%)": "mean",
211
+ "Memory Usage (GB)": "mean",
212
+ "Total Memory (GB)": "first",
213
+ "CPU Cores": "first",
214
+ "Model Size": "first",
215
+ "PP Value": "first",
216
+ "TG Value": "first",
217
+ "n_threads": "first",
218
+ "flash_attn": "first",
219
+ "cache_type_k": "first",
220
+ "cache_type_v": "first",
221
+ }
222
+
223
+ grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
224
+
225
  # Flatten column names
226
  grouped_df.columns = [
227
  col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
228
  ]
229
 
230
+ # Sort by Model Size, PP Value, and TG time
231
+ grouped_df = grouped_df.sort_values(
232
+ by=["Model Size (first)", "PP Value (first)", "Token Generation (mean)"],
233
+ ascending=[False, True, True],
234
+ )
235
+
236
  # Round numeric columns
237
  numeric_cols = [
238
+ col
239
+ for col in grouped_df.columns
240
+ if any(x in col for x in ["mean", "std", "Memory", "Size"])
 
241
  ]
242
  grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
243
 
244
  # Rename columns for display
245
+ column_mapping = {
246
+ "Prompt Processing (mean)": "PP Avg (ms)",
247
+ "Prompt Processing (std)": "PP Std",
248
+ "Prompt Processing (count)": "Runs",
249
+ "Token Generation (mean)": "TG Avg (ms)",
250
+ "Token Generation (std)": "TG Std",
251
+ "Memory Usage (%) (mean)": "Memory Usage (%)",
252
+ "Memory Usage (GB) (mean)": "Memory Usage (GB)",
253
+ "PP Value (first)": "PP Value",
254
+ "TG Value (first)": "TG Value",
255
+ }
256
+ grouped_df = grouped_df.rename(columns=column_mapping)
257
 
258
+ # Filter visible columns
259
+ visible_cols = filters["visible_columns"]
260
+ if visible_cols:
261
+ # Map the user-friendly names to actual column names
262
+ column_name_mapping = {
263
+ "Device": "Device",
264
+ "Platform": "Platform",
265
+ "CPU Cores": "CPU Cores (first)",
266
+ "Total Memory (GB)": "Total Memory (GB) (first)",
267
+ "Memory Usage (%)": "Memory Usage (%)",
268
+ "PP Value": "PP Value",
269
+ "TG Value": "TG Value",
270
+ "Prompt Processing": "PP Avg (ms)",
271
+ "Token Generation": "TG Avg (ms)",
272
+ "Model": "Model ID",
273
+ "Model Size": "Model Size (first)",
274
+ "Model ID": "Model ID",
275
+ "n_threads": "n_threads (first)",
276
+ "flash_attn": "flash_attn (first)",
277
+ "cache_type_k": "cache_type_k (first)",
278
+ "cache_type_v": "cache_type_v (first)",
279
+ }
280
+ display_cols = [
281
+ column_name_mapping[col]
282
+ for col in visible_cols
283
+ if col in column_name_mapping
284
+ ]
285
+ else:
286
+ # Default columns if none selected
287
+ display_cols = [
288
+ "Device",
289
+ "Platform",
290
+ "Model ID",
291
+ "Model Size (first)",
292
+ "PP Avg (ms)",
293
+ "TG Avg (ms)",
294
+ "Memory Usage (%)",
295
+ ]
296
 
297
  # Display the filtered and grouped table
298
  st.dataframe(
299
+ grouped_df[display_cols],
 
 
 
300
  use_container_width=True,
301
  height=400,
302
+ )
src/services/firebase.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  import streamlit as st
6
  import json
7
 
 
8
  def initialize_firebase():
9
  """Initialize Firebase with credentials"""
10
  try:
@@ -16,17 +17,20 @@ def initialize_firebase():
16
  firebase_admin.initialize_app(cred)
17
  return firestore.client()
18
 
 
19
  db = initialize_firebase()
20
 
 
21
  def normalize_device_id(device_info: dict) -> str:
22
  """Normalize device identifier for aggregation"""
23
  emulator = "/Emulator" if device_info["isEmulator"] else ""
24
  if device_info["systemName"].lower() == "ios":
25
  return f"iOS/{device_info['model']}{emulator}"
26
-
27
  memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
28
  return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
29
 
 
30
  def format_params_in_b(params: int) -> float:
31
  """Format number of parameters in billions"""
32
  b_value = params / 1e9
@@ -37,78 +41,117 @@ def format_params_in_b(params: int) -> float:
37
  else:
38
  return round(b_value, 3)
39
 
 
40
  def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
41
  """Format submissions for leaderboard display"""
42
  formatted_data = []
43
-
44
  for sub in submissions:
45
  try:
46
- benchmark_result = sub.get('benchmarkResult', {})
47
- device_info = sub.get('deviceInfo', {})
48
-
 
49
  if not benchmark_result or not device_info:
50
  continue
51
-
52
- formatted_data.append({
53
- "Device": f"{device_info.get('model', 'Unknown')} [Emulator]" if device_info.get('isEmulator') else device_info.get('model', 'Unknown'),
54
- "Platform": device_info.get('systemName', 'Unknown'),
55
- "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
56
- "Model": benchmark_result.get('modelName', 'Unknown'),
57
- "Model Size": format_params_in_b(benchmark_result.get('modelNParams', 0)),
58
- "Prompt Processing": round(benchmark_result.get('ppAvg', 0), 2),
59
- "Token Generation": round(benchmark_result.get('tgAvg', 0), 2),
60
- "Memory Usage (%)": benchmark_result.get('peakMemoryUsage', {}).get('percentage'),
61
- "Memory Usage (GB)": round(benchmark_result.get('peakMemoryUsage', {}).get('used', 0) / (1024**3), 2) if benchmark_result.get('peakMemoryUsage', {}).get('used') else None,
62
- "Total Memory (GB)": round(device_info.get('totalMemory', 0) / (1024**3), 2),
63
- "CPU Cores": device_info.get('cpuDetails', {}).get('cores', 'Unknown'),
64
- "Normalized Device ID": normalize_device_id(device_info),
65
- "Timestamp": benchmark_result.get('timestamp', 'Unknown'),
66
- "Model ID": benchmark_result.get('modelId', 'Unknown'),
67
- "OID": benchmark_result.get('oid'),
68
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  except Exception as e:
70
  st.warning(f"Error processing submission: {str(e)}")
71
  continue
72
-
73
  return pd.DataFrame(formatted_data)
74
 
 
75
  async def fetch_leaderboard_data(
76
- model_name: Optional[str] = None,
77
- benchmark_label: Optional[str] = None
78
  ) -> pd.DataFrame:
79
  """Fetch and process leaderboard data from Firestore"""
80
  try:
81
  # Navigate to the correct collection path: benchmarks/v1/submissions
82
- submissions_ref = db.collection('benchmarks').document('v1').collection('submissions')
83
-
 
 
84
  # Get all documents
85
  docs = submissions_ref.stream()
86
  all_docs = list(docs)
87
-
88
  if len(all_docs) == 0:
89
  return pd.DataFrame()
90
-
91
  # Process documents and filter in memory
92
  submissions = []
93
-
94
  for doc in all_docs:
95
  data = doc.to_dict()
96
-
97
- if not data or 'benchmarkResult' not in data:
98
  continue
99
-
100
- benchmark_result = data['benchmarkResult']
101
-
102
  # Apply filters
103
- if model_name and model_name != "All" and benchmark_result.get('modelName') != model_name:
 
 
 
 
104
  continue
105
- if benchmark_label and benchmark_label != "All" and benchmark_result.get('config', {}).get('label') != benchmark_label:
 
 
 
 
106
  continue
107
-
108
  submissions.append(data)
109
-
110
  return format_leaderboard_data(submissions)
111
-
112
  except Exception as e:
113
  st.error(f"Error fetching data from Firestore: {str(e)}")
114
- return pd.DataFrame()
 
5
  import streamlit as st
6
  import json
7
 
8
+
9
  def initialize_firebase():
10
  """Initialize Firebase with credentials"""
11
  try:
 
17
  firebase_admin.initialize_app(cred)
18
  return firestore.client()
19
 
20
+
21
  db = initialize_firebase()
22
 
23
+
24
  def normalize_device_id(device_info: dict) -> str:
25
  """Normalize device identifier for aggregation"""
26
  emulator = "/Emulator" if device_info["isEmulator"] else ""
27
  if device_info["systemName"].lower() == "ios":
28
  return f"iOS/{device_info['model']}{emulator}"
29
+
30
  memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
31
  return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
32
 
33
+
34
  def format_params_in_b(params: int) -> float:
35
  """Format number of parameters in billions"""
36
  b_value = params / 1e9
 
41
  else:
42
  return round(b_value, 3)
43
 
44
+
45
  def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
46
  """Format submissions for leaderboard display"""
47
  formatted_data = []
48
+
49
  for sub in submissions:
50
  try:
51
+ benchmark_result = sub.get("benchmarkResult", {})
52
+ device_info = sub.get("deviceInfo", {})
53
+
54
+ # Skip if missing required data
55
  if not benchmark_result or not device_info:
56
  continue
57
+
58
+ # Skip if missing initSettings
59
+ if "initSettings" not in benchmark_result:
60
+ continue
61
+
62
+ # Skip emulators
63
+ if device_info.get("isEmulator", False):
64
+ continue
65
+
66
+ formatted_data.append(
67
+ {
68
+ "Device": device_info.get("model", "Unknown"),
69
+ "Platform": device_info.get("systemName", "Unknown"),
70
+ "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
71
+ "Model": benchmark_result.get("modelName", "Unknown"),
72
+ "Model Size": format_params_in_b(
73
+ benchmark_result.get("modelNParams", 0)
74
+ ),
75
+ "Prompt Processing": round(benchmark_result.get("ppAvg", 0), 2),
76
+ "Token Generation": round(benchmark_result.get("tgAvg", 0), 2),
77
+ "Memory Usage (%)": benchmark_result.get("peakMemoryUsage", {}).get(
78
+ "percentage"
79
+ ),
80
+ "Memory Usage (GB)": (
81
+ round(
82
+ benchmark_result.get("peakMemoryUsage", {}).get("used", 0)
83
+ / (1024**3),
84
+ 2,
85
+ )
86
+ if benchmark_result.get("peakMemoryUsage", {}).get("used")
87
+ else None
88
+ ),
89
+ "Total Memory (GB)": round(
90
+ device_info.get("totalMemory", 0) / (1024**3), 2
91
+ ),
92
+ "CPU Cores": device_info.get("cpuDetails", {}).get(
93
+ "cores", "Unknown"
94
+ ),
95
+ "Normalized Device ID": normalize_device_id(device_info),
96
+ "Timestamp": benchmark_result.get("timestamp", "Unknown"),
97
+ "Model ID": benchmark_result.get("modelId", "Unknown"),
98
+ "OID": benchmark_result.get("oid"),
99
+ "initSettings": benchmark_result.get("initSettings"),
100
+ }
101
+ )
102
  except Exception as e:
103
  st.warning(f"Error processing submission: {str(e)}")
104
  continue
105
+
106
  return pd.DataFrame(formatted_data)
107
 
108
+
109
  async def fetch_leaderboard_data(
110
+ model_name: Optional[str] = None, benchmark_label: Optional[str] = None
 
111
  ) -> pd.DataFrame:
112
  """Fetch and process leaderboard data from Firestore"""
113
  try:
114
  # Navigate to the correct collection path: benchmarks/v1/submissions
115
+ submissions_ref = (
116
+ db.collection("benchmarks").document("v1").collection("submissions")
117
+ )
118
+
119
  # Get all documents
120
  docs = submissions_ref.stream()
121
  all_docs = list(docs)
122
+
123
  if len(all_docs) == 0:
124
  return pd.DataFrame()
125
+
126
  # Process documents and filter in memory
127
  submissions = []
128
+
129
  for doc in all_docs:
130
  data = doc.to_dict()
131
+
132
+ if not data or "benchmarkResult" not in data:
133
  continue
134
+
135
+ benchmark_result = data["benchmarkResult"]
136
+
137
  # Apply filters
138
+ if (
139
+ model_name
140
+ and model_name != "All"
141
+ and benchmark_result.get("modelName") != model_name
142
+ ):
143
  continue
144
+ if (
145
+ benchmark_label
146
+ and benchmark_label != "All"
147
+ and benchmark_result.get("config", {}).get("label") != benchmark_label
148
+ ):
149
  continue
150
+
151
  submissions.append(data)
152
+
153
  return format_leaderboard_data(submissions)
154
+
155
  except Exception as e:
156
  st.error(f"Error fetching data from Firestore: {str(e)}")
157
+ return pd.DataFrame()