Spaces:

SaisExperiments
/

HF-User-Sizer

Sleeping

App Files Files Community

SaisExperiments commited on Nov 23, 2024

Commit

d098629

verified ·

1 Parent(s): c6dae9e

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+__all__ = ['iface', 'calculate_total_size']
+import gradio as gr
+import asyncio
+import aiohttp
+from typing import List, Tuple
+async def get_repo_size(session, repo):
+    url = f'https://huggingface.co/api/models/{repo}/treesize/main'
+    try:
+        async with session.get(url) as resp:
+            if resp.status == 200:
+                resp_json = await resp.json()
+                return resp_json['size'] / 1e9
+            else:
+                print(f'Did not find repo: {repo}, Status code: {resp.status}')
+                return 0.0
+    except aiohttp.ClientError as e:
+        print(f"HTTP error for {repo}: {e}")
+        return 0.0
+    except Exception as e:
+        print(f"An error occurred for {repo}: {e}")
+        return 0.0
+async def fetch_all_models(session, author, repo_limit=20000):
+    all_models = []
+    page = 0
+    models_per_page = 100
+    while len(all_models) < repo_limit:
+        models_url = f'https://huggingface.co/api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}'
+        async with session.get(models_url) as response:
+            if response.status == 200:
+                models_data = await response.json()
+                if not models_data:
+                    break
+                all_models.extend(models_data)
+                page += 1
+                if len(models_data) < models_per_page:
+                    break
+            else:
+                print(f"Error fetching models: {response.status}")
+                return []
+        if len(all_models) >= repo_limit:
+            print(f"Reached repository limit of {repo_limit} for author '{author}'.")
+            all_models = all_models[:repo_limit]
+            break
+    return all_models
+async def calculate_total_size(author, repo_limit=20000):
+    async with aiohttp.ClientSession() as session:
+        all_models = await fetch_all_models(session, author, repo_limit)
+        if not all_models:
+            return [["Error fetching models", ""]]
+        total_repos_fetched = len(all_models)
+        total_repos_all = await fetch_total_repo_count(session, author)
+        tasks = []
+        relevant_models = []
+        for model in all_models:
+            repo_id = model['modelId']
+            if 'exl2' not in repo_id.lower():
+                tasks.append(get_repo_size(session, repo_id))
+                relevant_models.append(model)
+        repo_sizes_all = await asyncio.gather(*tasks)
+        repo_sizes = []
+        total_size_gb = 0
+        for i, repo_size in enumerate(repo_sizes_all):
+            if repo_size > 0:
+                repo_sizes.append([relevant_models[i]['modelId'], repo_size])
+                total_size_gb += repo_size
+        repo_sizes.append(["Total Repositories Fetched", total_repos_fetched])
+        repo_sizes.append(["Total Size (non-exl2)", total_size_gb])
+        if total_repos_all > total_repos_fetched:
+             repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."])
+        return repo_sizes
+async def fetch_total_repo_count(session, author):
+    url = f'https://huggingface.co/api/models?author={author}&full=false&limit=1'
+    async with session.get(url) as response:
+        if response.status == 200:
+            headers = response.headers
+            count_str = headers.get('x-total-count')
+            return int(count_str) if count_str else 0
+        else:
+            print(f"Error fetching total repo count: {response.status}")
+            return 0
+iface = gr.Interface(
+    fn=lambda author: asyncio.run(calculate_total_size(author)),
+    inputs=gr.Text(value="bartowski"),
+    outputs=gr.Dataframe(
+        headers=["Repository/Info", "Size (GB) / Count"],
+        value=[["", ""]],
+    ),
+    title="HuggingFace User Size Calculator",
+    description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121"
+)
+iface.launch(height=500, width=600)