Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__all__ = ['iface', 'calculate_total_size']
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import asyncio
|
5 |
+
import aiohttp
|
6 |
+
from typing import List, Tuple
|
7 |
+
|
8 |
+
async def get_repo_size(session, repo):
|
9 |
+
url = f'https://huggingface.co/api/models/{repo}/treesize/main'
|
10 |
+
try:
|
11 |
+
async with session.get(url) as resp:
|
12 |
+
if resp.status == 200:
|
13 |
+
resp_json = await resp.json()
|
14 |
+
return resp_json['size'] / 1e9
|
15 |
+
else:
|
16 |
+
print(f'Did not find repo: {repo}, Status code: {resp.status}')
|
17 |
+
return 0.0
|
18 |
+
except aiohttp.ClientError as e:
|
19 |
+
print(f"HTTP error for {repo}: {e}")
|
20 |
+
return 0.0
|
21 |
+
except Exception as e:
|
22 |
+
print(f"An error occurred for {repo}: {e}")
|
23 |
+
return 0.0
|
24 |
+
|
25 |
+
async def fetch_all_models(session, author, repo_limit=20000):
|
26 |
+
all_models = []
|
27 |
+
page = 0
|
28 |
+
models_per_page = 100
|
29 |
+
while len(all_models) < repo_limit:
|
30 |
+
models_url = f'https://huggingface.co/api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}'
|
31 |
+
async with session.get(models_url) as response:
|
32 |
+
if response.status == 200:
|
33 |
+
models_data = await response.json()
|
34 |
+
if not models_data:
|
35 |
+
break
|
36 |
+
all_models.extend(models_data)
|
37 |
+
page += 1
|
38 |
+
if len(models_data) < models_per_page:
|
39 |
+
break
|
40 |
+
else:
|
41 |
+
print(f"Error fetching models: {response.status}")
|
42 |
+
return []
|
43 |
+
if len(all_models) >= repo_limit:
|
44 |
+
print(f"Reached repository limit of {repo_limit} for author '{author}'.")
|
45 |
+
all_models = all_models[:repo_limit]
|
46 |
+
break
|
47 |
+
return all_models
|
48 |
+
|
49 |
+
async def calculate_total_size(author, repo_limit=20000):
|
50 |
+
async with aiohttp.ClientSession() as session:
|
51 |
+
all_models = await fetch_all_models(session, author, repo_limit)
|
52 |
+
if not all_models:
|
53 |
+
return [["Error fetching models", ""]]
|
54 |
+
|
55 |
+
total_repos_fetched = len(all_models)
|
56 |
+
total_repos_all = await fetch_total_repo_count(session, author)
|
57 |
+
|
58 |
+
tasks = []
|
59 |
+
relevant_models = []
|
60 |
+
|
61 |
+
for model in all_models:
|
62 |
+
repo_id = model['modelId']
|
63 |
+
if 'exl2' not in repo_id.lower():
|
64 |
+
tasks.append(get_repo_size(session, repo_id))
|
65 |
+
relevant_models.append(model)
|
66 |
+
|
67 |
+
repo_sizes_all = await asyncio.gather(*tasks)
|
68 |
+
repo_sizes = []
|
69 |
+
total_size_gb = 0
|
70 |
+
|
71 |
+
for i, repo_size in enumerate(repo_sizes_all):
|
72 |
+
if repo_size > 0:
|
73 |
+
repo_sizes.append([relevant_models[i]['modelId'], repo_size])
|
74 |
+
total_size_gb += repo_size
|
75 |
+
|
76 |
+
repo_sizes.append(["Total Repositories Fetched", total_repos_fetched])
|
77 |
+
repo_sizes.append(["Total Size (non-exl2)", total_size_gb])
|
78 |
+
if total_repos_all > total_repos_fetched:
|
79 |
+
repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."])
|
80 |
+
return repo_sizes
|
81 |
+
|
82 |
+
async def fetch_total_repo_count(session, author):
|
83 |
+
url = f'https://huggingface.co/api/models?author={author}&full=false&limit=1'
|
84 |
+
async with session.get(url) as response:
|
85 |
+
if response.status == 200:
|
86 |
+
headers = response.headers
|
87 |
+
count_str = headers.get('x-total-count')
|
88 |
+
return int(count_str) if count_str else 0
|
89 |
+
else:
|
90 |
+
print(f"Error fetching total repo count: {response.status}")
|
91 |
+
return 0
|
92 |
+
|
93 |
+
iface = gr.Interface(
|
94 |
+
fn=lambda author: asyncio.run(calculate_total_size(author)),
|
95 |
+
inputs=gr.Text(value="bartowski"),
|
96 |
+
outputs=gr.Dataframe(
|
97 |
+
headers=["Repository/Info", "Size (GB) / Count"],
|
98 |
+
value=[["", ""]],
|
99 |
+
),
|
100 |
+
title="HuggingFace User Size Calculator",
|
101 |
+
description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121"
|
102 |
+
)
|
103 |
+
iface.launch(height=500, width=600)
|