__all__ = ['iface', 'calculate_total_size'] import gradio as gr import asyncio import aiohttp from typing import List, Tuple async def get_repo_size(session, repo): url = f'https://huggingface.co./api/models/{repo}/treesize/main' try: async with session.get(url) as resp: if resp.status == 200: resp_json = await resp.json() return resp_json['size'] / 1e9 else: print(f'Did not find repo: {repo}, Status code: {resp.status}') return 0.0 except aiohttp.ClientError as e: print(f"HTTP error for {repo}: {e}") return 0.0 except Exception as e: print(f"An error occurred for {repo}: {e}") return 0.0 async def fetch_all_models(session, author, repo_limit=20000): all_models = [] page = 0 models_per_page = 100 while len(all_models) < repo_limit: models_url = f'https://huggingface.co./api/models?author={author}&full=true&config=true&limit={models_per_page}&skip={page * models_per_page}' async with session.get(models_url) as response: if response.status == 200: models_data = await response.json() if not models_data: break all_models.extend(models_data) page += 1 if len(models_data) < models_per_page: break else: print(f"Error fetching models: {response.status}") return [] if len(all_models) >= repo_limit: print(f"Reached repository limit of {repo_limit} for author '{author}'.") all_models = all_models[:repo_limit] break return all_models async def calculate_total_size(author, repo_limit=20000): async with aiohttp.ClientSession() as session: all_models = await fetch_all_models(session, author, repo_limit) if not all_models: return [["Error fetching models", ""]] total_repos_fetched = len(all_models) total_repos_all = await fetch_total_repo_count(session, author) tasks = [] relevant_models = [] for model in all_models: repo_id = model['modelId'] if 'exl2' not in repo_id.lower(): tasks.append(get_repo_size(session, repo_id)) relevant_models.append(model) repo_sizes_all = await asyncio.gather(*tasks) repo_sizes = [] total_size_gb = 0 for i, repo_size in enumerate(repo_sizes_all): if repo_size > 0: repo_sizes.append([relevant_models[i]['modelId'], repo_size]) total_size_gb += repo_size repo_sizes.append(["Total Repositories Fetched", total_repos_fetched]) repo_sizes.append(["Total Size (non-exl2)", total_size_gb]) if total_repos_all > total_repos_fetched: repo_sizes.append(["API Repo Limit Reached", f"Fetched {total_repos_fetched} out of {total_repos_all} available models."]) return repo_sizes async def fetch_total_repo_count(session, author): url = f'https://huggingface.co./api/models?author={author}&full=false&limit=1' async with session.get(url) as response: if response.status == 200: headers = response.headers count_str = headers.get('x-total-count') return int(count_str) if count_str else 0 else: print(f"Error fetching total repo count: {response.status}") return 0 iface = gr.Interface( fn=lambda author: asyncio.run(calculate_total_size(author)), inputs=gr.Text(value="bartowski"), outputs=gr.Dataframe( headers=["Repository/Info", "Size (GB) / Count"], value=[["", ""]], ), title="HuggingFace User Size Calculator", description="Enter a HuggingFace author to calculate the total size of their non-exl2 repositories. Limited to 20,000 Repos. Janked by Gemini-1121" ) iface.launch(height=500, width=600)