Spaces:

burtenshaw
/

recap

Running

App Files Files Community

burtenshaw commited on 7 days ago

Commit

ec60e9a

•

1 Parent(s): afaf730

use percentile boundaries in app

Browse files

Files changed (8) hide show

app.py +40 -25
default.jpg +0 -0
default.png +0 -0
images/empty.png +0 -0
images/space.png +0 -0
percentiles.json +1 -0
pyproject.toml +3 -0
stats_dataset.ipynb +166 -0

app.py CHANGED Viewed

@@ -3,13 +3,28 @@ import gradio as gr
 from urllib.parse import urlencode
 import os
 from datetime import datetime
 # Load environment variables
 DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
-MAX_MODEL_ACTIVITY = 7354
-MAX_DATASET_ACTIVITY = 6564
-MAX_SPACE_ACTIVITY = 12026
 def create_image(stats, username):
@@ -19,39 +34,39 @@ def create_image(stats, username):
     dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
     space_activity = total_stats["Space Likes"]
-    # Calculate percentiles based on max values (removed min(100,...))
-    top_items = stats["Most Popular Items"]
-    model_percentile = round(
-        (top_items["Top Model"]["likes"] / MAX_MODEL_ACTIVITY) * 100, 2
-    )
-    dataset_percentile = round(
-        (top_items["Top Dataset"]["likes"] / MAX_DATASET_ACTIVITY) * 100, 2
-    )
-    space_percentile = round(
-        (top_items["Top Space"]["likes"] / MAX_SPACE_ACTIVITY) * 100, 2
-    )
-    # Choose base image URL based on highest activity
-    # check if no activity in any category
-    # if everything is 0, we show the empty image
     if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
         url = "https://hub-recap.imglab-cdn.net/images/empty.png"
         avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
     elif model_activity >= max(dataset_activity, space_activity):
-        url = "https://hub-recap.imglab-cdn.net/images/models.png"
-        avatar = f"Model Pro (top {model_percentile}%)"
     elif dataset_activity >= max(model_activity, space_activity):
-        url = "https://hub-recap.imglab-cdn.net/images/datasets.png"
-        avatar = f"Dataset Guru (top {dataset_percentile}%)"
     else:
-        url = "https://hub-recap.imglab-cdn.net/images/spaces.png"
-        avatar = f"Space Artiste (top {space_percentile}%)"
     # Build text content with proper formatting
     text_parts = []
     text_parts.append(
-        f'<span size="11pt" weight="bold">Hugging Face  ❤️ {username} in 2024</span>'
     )
     text_parts.append("")  # Empty line for spacing
@@ -117,7 +132,7 @@ def create_image(stats, username):
     # Update the avatar message with percentile
     text_parts.append("")  # Empty line for spacing
-    text_parts.append(f'<span size="9pt">You are a {avatar}! 🎉</span>')
     # Add additional percentile info if other categories are significant
     other_percentiles = []

 from urllib.parse import urlencode
 import os
 from datetime import datetime
+import json
 # Load environment variables
 DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
+# Load percentiles data
+with open("percentiles.json") as f:
+    PERCENTILES = json.load(f)
+def get_percentile_rank(likes, category):
+    if likes == 0:
+        return 0
+    percentiles = PERCENTILES[f"{category}_percentiles"]
+    if likes >= percentiles["p_99999"]:
+        return 99.999
+    elif likes >= percentiles["p_9999"]:
+        return 99.99
+    elif likes >= percentiles["p_999"]:
+        return 99.9
+    return 0
 def create_image(stats, username):
     dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
     space_activity = total_stats["Space Likes"]
+    # Calculate percentiles based on likes
+    model_percentile = get_percentile_rank(total_stats["Model Likes"], "model")
+    dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset")
+    space_percentile = get_percentile_rank(space_activity, "space")
+    # Choose base image URL based on highest activity (keep using activity for image selection)
     if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
         url = "https://hub-recap.imglab-cdn.net/images/empty.png"
         avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
     elif model_activity >= max(dataset_activity, space_activity):
+        url = "https://hub-recap.imglab-cdn.net/images/model.png"
+        avatar = f"Model Pro" + (
+            f" (top {model_percentile}%)" if model_percentile > 0 else ""
+        )
     elif dataset_activity >= max(model_activity, space_activity):
+        url = "https://hub-recap.imglab-cdn.net/images/dataset.png"
+        avatar = f"Dataset Guru" + (
+            f" (top {dataset_percentile}%)" if dataset_percentile > 0 else ""
+        )
+    elif space_activity >= max(model_activity, dataset_activity):
+        url = "https://hub-recap.imglab-cdn.net/images/space.png"
+        avatar = f"Space Artiste" + (
+            f" (top {space_percentile}%)" if space_percentile > 0 else ""
+        )
     else:
+        url = "https://hub-recap.imglab-cdn.net/images/empty.png"
+        avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
     # Build text content with proper formatting
     text_parts = []
     text_parts.append(
+        f'<span size="11pt" weight="bold">Hugging Face ❤️ {username} in 2024</span>'
     )
     text_parts.append("")  # Empty line for spacing
     # Update the avatar message with percentile
     text_parts.append("")  # Empty line for spacing
+    text_parts.append(f'<span size="9pt">You are a {avatar}!</span>')
     # Add additional percentile info if other categories are significant
     other_percentiles = []

default.jpg DELETED Viewed

Binary file (247 kB)

default.png ADDED Viewed

images/empty.png CHANGED Viewed

images/space.png CHANGED Viewed

percentiles.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"dataset_percentiles": {"p_99999": 1299, "p_9999": 491, "p_999": 125}, "model_percentiles": {"p_99999": 3698, "p_9999": 949, "p_999": 143}, "space_percentiles": {"p_99999": 6040, "p_9999": 1552, "p_999": 326}}

pyproject.toml CHANGED Viewed

@@ -5,6 +5,9 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "gradio>=5.9.1",
     "requests>=2.32.3",
 ]

 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
+    "datasets>=3.2.0",
     "gradio>=5.9.1",
+    "ipykernel>=6.29.5",
+    "pandas>=2.2.3",
     "requests>=2.32.3",
 ]

stats_dataset.ipynb ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ben/code/hub-recap/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "ds = load_dataset(\"cfahlgren1/hub-stats\", \"datasets\")\n",
+    "ds_df = ds[\"train\"].to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = load_dataset(\"cfahlgren1/hub-stats\", \"models\")\n",
+    "md_df = ds[\"train\"].to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Generating train split: 100%|██████████| 309714/309714 [00:00<00:00, 353713.86 examples/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "ds = load_dataset(\"cfahlgren1/hub-stats\", \"spaces\")\n",
+    "sp_df = ds[\"train\"].to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'p_99999': 1299, 'p_9999': 491, 'p_999': 125}\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataset_percentiles = {\n",
+    "    \"p_99999\": int(ds_df[\"likes\"].quantile(0.99999)),\n",
+    "    \"p_9999\": int(ds_df[\"likes\"].quantile(0.9999)),\n",
+    "    \"p_999\": int(ds_df[\"likes\"].quantile(0.999)),\n",
+    "}\n",
+    "print(dataset_percentiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'p_99999': 3698, 'p_9999': 949, 'p_999': 143}\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_percentiles = {\n",
+    "    \"p_99999\": int(md_df[\"likes\"].quantile(0.99999)),\n",
+    "    \"p_9999\": int(md_df[\"likes\"].quantile(0.9999)),\n",
+    "    \"p_999\": int(md_df[\"likes\"].quantile(0.999)),\n",
+    "}\n",
+    "print(model_percentiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'p_99999': 6040, 'p_9999': 1552, 'p_999': 326}\n"
+     ]
+    }
+   ],
+   "source": [
+    "space_percentiles = {\n",
+    "    \"p_99999\": int(sp_df[\"likes\"].quantile(0.99999)),\n",
+    "    \"p_9999\": int(sp_df[\"likes\"].quantile(0.9999)),\n",
+    "    \"p_999\": int(sp_df[\"likes\"].quantile(0.999)),\n",
+    "}\n",
+    "print(space_percentiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open(\"percentiles.json\", \"w\") as f:\n",
+    "    json.dump(\n",
+    "        {\n",
+    "            \"dataset_percentiles\": dataset_percentiles,\n",
+    "            \"model_percentiles\": model_percentiles,\n",
+    "            \"space_percentiles\": space_percentiles,\n",
+    "        },\n",
+    "        f,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}