Spaces:

John6666
/

hfsearch

Running

App Files Files Community

John6666 commited on 1 day ago

Commit

a9c10db

verified ·

1 Parent(s): 12b1fe6

Upload 8 files

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +21 -5
hfsearch.py +49 -6

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🤗🔍
 colorFrom: indigo
 colorTo: purple
 sdk: gradio
-sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: indigo
 colorTo: purple
 sdk: gradio
+sdk_version: 5.11.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import spaces
 import gradio as gr
 from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
                       get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
-                      search_ref_repos, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 CSS = """
@@ -14,15 +14,14 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
     gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
     with gr.Column():
         search_result = gr.State(value=HFSearchResult())
-        ui_mode = gr.Radio(label="Mode", choices=["PC", "Phone"], value="Phone")
-        with gr.Tab("Normal Search"):
             with gr.Group():
                 with gr.Row(equal_height=True):
                     repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
                     filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
                     sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
                 with gr.Accordion("Advanced", open=False):
                     with gr.Row(equal_height=True):
                         search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
                         #author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
@@ -93,6 +92,19 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
                     with gr.Row(equal_height=True):
                         rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
             rec_run_button = gr.Button("Search", variant="primary")
         with gr.Group():
             with gr.Accordion("Filter", open=False):
                 hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
@@ -100,6 +112,7 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
                     filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
                     filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
                     filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
             result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
     run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
@@ -119,5 +132,8 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
     gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
           inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
           outputs=[result_df, hide_labels, search_result])
-demo.queue().launch()

 import gradio as gr
 from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
                       get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
+                      search_ref_repos, search_cols, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, TYPES_DESC)
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 CSS = """
     gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
     with gr.Column():
         search_result = gr.State(value=HFSearchResult())
+        with gr.Tab("Basic Search"):
             with gr.Group():
                 with gr.Row(equal_height=True):
                     repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
                     filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
                     sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
                 with gr.Accordion("Advanced", open=False):
+                    ui_mode = gr.Radio(label="Mode", choices=["PC", "Phone"], value="Phone")
                     with gr.Row(equal_height=True):
                         search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
                         #author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
                     with gr.Row(equal_height=True):
                         rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
             rec_run_button = gr.Button("Search", variant="primary")
+        with gr.Tab("Find collections"):
+            with gr.Group():
+                with gr.Row(equal_height=True):
+                    #rec_repo_id = gr.Textbox(label="Repo ID", info="Input your favorite repo", value="")
+                    col_repo_id = HuggingfaceHubSearch(label="Repo ID or User ID", placeholder="Input your favorite Repo ID or User ID", search_type=["model", "dataset", "space", "user"],
+                                                       sumbit_on_select=False)
+                    col_sort = gr.Radio(label="Sort", choices=["last_modified", "trending_score", "likes"], value="likes")
+                with gr.Row(equal_height=True):
+                    col_repo_limit = gr.Number(label="Limit for repos", value=50, step=1, minimum=1, maximum=100)
+                    col_user_limit = gr.Number(label="Limit for users", value=10, step=1, minimum=0, maximum=50)
+                with gr.Accordion("Advanced", open=False):
+                    col_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
+            col_run_button = gr.Button("Search", variant="primary")
         with gr.Group():
             with gr.Accordion("Filter", open=False):
                 hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
                     filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
                     filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
                     filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
+            types_desc = gr.Markdown(TYPES_DESC, elem_classes="info")
             result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
     run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
     gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
           inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
           outputs=[result_df, hide_labels, search_result])
+    gr.on(triggers=[col_run_button.click, col_repo_id.submit], fn=search_cols,
+          inputs=[col_repo_id, col_sort, col_show_labels, col_repo_limit, col_user_limit, ui_mode, search_result],
+          outputs=[result_df, hide_labels, search_result])
+demo.queue().launch(ssr_mode=False)

hfsearch.py CHANGED Viewed

@@ -14,6 +14,10 @@ from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, SPACE
 def dummy_gpu():
     pass
 RESULT_ITEMS = {
     "Type": [1, "str", True],
     "ID": [2, "markdown", True, "40%"],
@@ -224,6 +228,19 @@ def get_ref_collections(repo_id: str, limit=10):
         print(e)
         raise Exception(e)
 def str_to_list(s: str):
     try:
         m = re.split("\n", s)
@@ -435,6 +452,18 @@ class HFSearchResult():
             self.sort(sort)
         except Exception as e:
             raise Exception(f"Search error: {e}") from e
     def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
         try:
@@ -509,10 +538,10 @@ class HFSearchResult():
         def id_to_md(df: pd.DataFrame, verbose=False):
             columns = list(df.index)
-            if df["Type"] == "collection": id = f'[{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
-            elif df["Type"] == "space": id = f'[{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
-            elif df["Type"] == "paper": id = f'[{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
-            else: id = f'[{df["ID"]}]({df["URL"]}){df["Emoji"]}'
             if verbose:
                 l = []
                 if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
@@ -530,9 +559,14 @@ class HFSearchResult():
                         if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
                         if "SDK" in columns: l.append(f'{df["SDK"]}')
                         if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
-                if len(l) > 0: id += f" ({' '.join(l)})"
             return id
         def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
             if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
             else: return df["Emoji"]
@@ -546,6 +580,7 @@ class HFSearchResult():
         def format_md_df(df: pd.DataFrame, verbose=False):
             df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
             return df
         hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
@@ -651,7 +686,15 @@ def search_ref_repos(repo_id: str, repo_types: list, sort: str, show_labels: lis
         return r.get_gr_df(), r.get_gr_hide_labels(), r
     except Exception as e:
         raise gr.Error(e)
 def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
     r.set_hide(hide_labels)
     r.set_filter(filter_item1, filter1)

 def dummy_gpu():
     pass
+TYPES_SHORT = {"model": "M", "dataset": "D", "space": "S", "paper": "P", "collection": "C"}
+TYPES_DESC = " / ".join([f"{v}={k}" for k, v in zip(list(TYPES_SHORT.keys()), list(TYPES_SHORT.values()))])
 RESULT_ITEMS = {
     "Type": [1, "str", True],
     "ID": [2, "markdown", True, "40%"],
         print(e)
         raise Exception(e)
+def get_collections(repo_id: str, repo_limit: int=100, user_limit: int=0):
+    try:
+        if "/" in repo_id: # Repo ID
+            repo_type = get_repo_type(repo_id)
+            likers = get_repo_likers(repo_id, repo_type)[0:user_limit+1]
+            cols = get_collections_by_repo(repo_id, repo_type, repo_limit) + get_collections_by_users(likers, 50)
+        else: cols = get_collections_by_users([repo_id], 50) # User ID
+        cols = list({k.slug: k for k in cols}.values())
+        return cols
+    except Exception as e:
+        print(e)
+        raise Exception(e)
 def str_to_list(s: str):
     try:
         m = re.split("\n", s)
             self.sort(sort)
         except Exception as e:
             raise Exception(f"Search error: {e}") from e
+    def search_collections(self, repo_id: str, sort: str, show_labels: list, repo_limit: int=100, user_limit: int=0, ui_mode="PC"):
+        try:
+            self.reset()
+            self.set_mode(ui_mode)
+            self.show_labels = show_labels.copy()
+            cols = get_collections(repo_id, repo_limit, user_limit)
+            for col in cols:
+                self.add_item(col)
+            self.sort(sort)
+        except Exception as e:
+            raise Exception(f"Search error: {e}") from e
     def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
         try:
         def id_to_md(df: pd.DataFrame, verbose=False):
             columns = list(df.index)
+            if df["Type"] == "collection": id = f'### [{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
+            elif df["Type"] == "space": id = f'### [{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
+            elif df["Type"] == "paper": id = f'### [{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
+            else: id = f'### [{df["ID"]}]({df["URL"]}){df["Emoji"]}'
             if verbose:
                 l = []
                 if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
                         if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
                         if "SDK" in columns: l.append(f'{df["SDK"]}')
                         if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
+                if len(l) > 0: id += f"\n({' '.join(l)})"
             return id
+        def shorten_type(df: pd.DataFrame, shorten=False):
+            if shorten:
+                for k, v in TYPES_SHORT.items():
+                    if df["Type"] == k: return v
         def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
             if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
             else: return df["Emoji"]
         def format_md_df(df: pd.DataFrame, verbose=False):
             df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
+            df["Type"] = df.apply(shorten_type, axis=1, shorten=verbose)
             return df
         hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
         return r.get_gr_df(), r.get_gr_hide_labels(), r
     except Exception as e:
         raise gr.Error(e)
+def search_cols(repo_id: str, sort: str, show_labels: list, repo_limit: int, user_limit: int, ui_mode: str, r: HFSearchResult):
+    try:
+        if not repo_id: raise gr.Error("Input Repo ID or User ID")
+        r.search_collections(repo_id, sort, show_labels, repo_limit, user_limit, ui_mode)
+        return r.get_gr_df(), r.get_gr_hide_labels(), r
+    except Exception as e:
+        raise gr.Error(e)
 def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
     r.set_hide(hide_labels)
     r.set_filter(filter_item1, filter1)