Upload 8 files
Browse files- README.md +1 -1
- app.py +21 -5
- hfsearch.py +49 -6
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗🔍
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.11.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
CHANGED
@@ -2,7 +2,7 @@ import spaces
|
|
2 |
import gradio as gr
|
3 |
from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
|
4 |
get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
|
5 |
-
search_ref_repos, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
|
6 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
7 |
|
8 |
CSS = """
|
@@ -14,15 +14,14 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
|
|
14 |
gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
|
15 |
with gr.Column():
|
16 |
search_result = gr.State(value=HFSearchResult())
|
17 |
-
|
18 |
-
with gr.Tab("Normal Search"):
|
19 |
with gr.Group():
|
20 |
with gr.Row(equal_height=True):
|
21 |
repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
|
22 |
filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
|
23 |
sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
|
24 |
-
|
25 |
with gr.Accordion("Advanced", open=False):
|
|
|
26 |
with gr.Row(equal_height=True):
|
27 |
search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
|
28 |
#author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
|
@@ -93,6 +92,19 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
|
|
93 |
with gr.Row(equal_height=True):
|
94 |
rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
|
95 |
rec_run_button = gr.Button("Search", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
with gr.Group():
|
97 |
with gr.Accordion("Filter", open=False):
|
98 |
hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
|
@@ -100,6 +112,7 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
|
|
100 |
filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
|
101 |
filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
|
102 |
filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
|
|
|
103 |
result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
|
104 |
|
105 |
run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
|
@@ -119,5 +132,8 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
|
|
119 |
gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
|
120 |
inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
|
121 |
outputs=[result_df, hide_labels, search_result])
|
|
|
|
|
|
|
122 |
|
123 |
-
demo.queue().launch()
|
|
|
2 |
import gradio as gr
|
3 |
from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
|
4 |
get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
|
5 |
+
search_ref_repos, search_cols, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, TYPES_DESC)
|
6 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
7 |
|
8 |
CSS = """
|
|
|
14 |
gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
|
15 |
with gr.Column():
|
16 |
search_result = gr.State(value=HFSearchResult())
|
17 |
+
with gr.Tab("Basic Search"):
|
|
|
18 |
with gr.Group():
|
19 |
with gr.Row(equal_height=True):
|
20 |
repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
|
21 |
filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
|
22 |
sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
|
|
|
23 |
with gr.Accordion("Advanced", open=False):
|
24 |
+
ui_mode = gr.Radio(label="Mode", choices=["PC", "Phone"], value="Phone")
|
25 |
with gr.Row(equal_height=True):
|
26 |
search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
|
27 |
#author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
|
|
|
92 |
with gr.Row(equal_height=True):
|
93 |
rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
|
94 |
rec_run_button = gr.Button("Search", variant="primary")
|
95 |
+
with gr.Tab("Find collections"):
|
96 |
+
with gr.Group():
|
97 |
+
with gr.Row(equal_height=True):
|
98 |
+
#rec_repo_id = gr.Textbox(label="Repo ID", info="Input your favorite repo", value="")
|
99 |
+
col_repo_id = HuggingfaceHubSearch(label="Repo ID or User ID", placeholder="Input your favorite Repo ID or User ID", search_type=["model", "dataset", "space", "user"],
|
100 |
+
sumbit_on_select=False)
|
101 |
+
col_sort = gr.Radio(label="Sort", choices=["last_modified", "trending_score", "likes"], value="likes")
|
102 |
+
with gr.Row(equal_height=True):
|
103 |
+
col_repo_limit = gr.Number(label="Limit for repos", value=50, step=1, minimum=1, maximum=100)
|
104 |
+
col_user_limit = gr.Number(label="Limit for users", value=10, step=1, minimum=0, maximum=50)
|
105 |
+
with gr.Accordion("Advanced", open=False):
|
106 |
+
col_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
|
107 |
+
col_run_button = gr.Button("Search", variant="primary")
|
108 |
with gr.Group():
|
109 |
with gr.Accordion("Filter", open=False):
|
110 |
hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
|
|
|
112 |
filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
|
113 |
filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
|
114 |
filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
|
115 |
+
types_desc = gr.Markdown(TYPES_DESC, elem_classes="info")
|
116 |
result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
|
117 |
|
118 |
run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
|
|
|
132 |
gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
|
133 |
inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
|
134 |
outputs=[result_df, hide_labels, search_result])
|
135 |
+
gr.on(triggers=[col_run_button.click, col_repo_id.submit], fn=search_cols,
|
136 |
+
inputs=[col_repo_id, col_sort, col_show_labels, col_repo_limit, col_user_limit, ui_mode, search_result],
|
137 |
+
outputs=[result_df, hide_labels, search_result])
|
138 |
|
139 |
+
demo.queue().launch(ssr_mode=False)
|
hfsearch.py
CHANGED
@@ -14,6 +14,10 @@ from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, SPACE
|
|
14 |
def dummy_gpu():
|
15 |
pass
|
16 |
|
|
|
|
|
|
|
|
|
17 |
RESULT_ITEMS = {
|
18 |
"Type": [1, "str", True],
|
19 |
"ID": [2, "markdown", True, "40%"],
|
@@ -224,6 +228,19 @@ def get_ref_collections(repo_id: str, limit=10):
|
|
224 |
print(e)
|
225 |
raise Exception(e)
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
def str_to_list(s: str):
|
228 |
try:
|
229 |
m = re.split("\n", s)
|
@@ -435,6 +452,18 @@ class HFSearchResult():
|
|
435 |
self.sort(sort)
|
436 |
except Exception as e:
|
437 |
raise Exception(f"Search error: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
|
439 |
def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
|
440 |
try:
|
@@ -509,10 +538,10 @@ class HFSearchResult():
|
|
509 |
|
510 |
def id_to_md(df: pd.DataFrame, verbose=False):
|
511 |
columns = list(df.index)
|
512 |
-
if df["Type"] == "collection": id = f'[{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
|
513 |
-
elif df["Type"] == "space": id = f'[{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
514 |
-
elif df["Type"] == "paper": id = f'[{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
515 |
-
else: id = f'[{df["ID"]}]({df["URL"]}){df["Emoji"]}'
|
516 |
if verbose:
|
517 |
l = []
|
518 |
if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
|
@@ -530,9 +559,14 @@ class HFSearchResult():
|
|
530 |
if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
|
531 |
if "SDK" in columns: l.append(f'{df["SDK"]}')
|
532 |
if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
|
533 |
-
if len(l) > 0: id += f"
|
534 |
return id
|
535 |
|
|
|
|
|
|
|
|
|
|
|
536 |
def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
|
537 |
if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
|
538 |
else: return df["Emoji"]
|
@@ -546,6 +580,7 @@ class HFSearchResult():
|
|
546 |
|
547 |
def format_md_df(df: pd.DataFrame, verbose=False):
|
548 |
df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
|
|
|
549 |
return df
|
550 |
|
551 |
hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
|
@@ -651,7 +686,15 @@ def search_ref_repos(repo_id: str, repo_types: list, sort: str, show_labels: lis
|
|
651 |
return r.get_gr_df(), r.get_gr_hide_labels(), r
|
652 |
except Exception as e:
|
653 |
raise gr.Error(e)
|
654 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
|
656 |
r.set_hide(hide_labels)
|
657 |
r.set_filter(filter_item1, filter1)
|
|
|
14 |
def dummy_gpu():
|
15 |
pass
|
16 |
|
17 |
+
TYPES_SHORT = {"model": "M", "dataset": "D", "space": "S", "paper": "P", "collection": "C"}
|
18 |
+
|
19 |
+
TYPES_DESC = " / ".join([f"{v}={k}" for k, v in zip(list(TYPES_SHORT.keys()), list(TYPES_SHORT.values()))])
|
20 |
+
|
21 |
RESULT_ITEMS = {
|
22 |
"Type": [1, "str", True],
|
23 |
"ID": [2, "markdown", True, "40%"],
|
|
|
228 |
print(e)
|
229 |
raise Exception(e)
|
230 |
|
231 |
+
def get_collections(repo_id: str, repo_limit: int=100, user_limit: int=0):
|
232 |
+
try:
|
233 |
+
if "/" in repo_id: # Repo ID
|
234 |
+
repo_type = get_repo_type(repo_id)
|
235 |
+
likers = get_repo_likers(repo_id, repo_type)[0:user_limit+1]
|
236 |
+
cols = get_collections_by_repo(repo_id, repo_type, repo_limit) + get_collections_by_users(likers, 50)
|
237 |
+
else: cols = get_collections_by_users([repo_id], 50) # User ID
|
238 |
+
cols = list({k.slug: k for k in cols}.values())
|
239 |
+
return cols
|
240 |
+
except Exception as e:
|
241 |
+
print(e)
|
242 |
+
raise Exception(e)
|
243 |
+
|
244 |
def str_to_list(s: str):
|
245 |
try:
|
246 |
m = re.split("\n", s)
|
|
|
452 |
self.sort(sort)
|
453 |
except Exception as e:
|
454 |
raise Exception(f"Search error: {e}") from e
|
455 |
+
|
456 |
+
def search_collections(self, repo_id: str, sort: str, show_labels: list, repo_limit: int=100, user_limit: int=0, ui_mode="PC"):
|
457 |
+
try:
|
458 |
+
self.reset()
|
459 |
+
self.set_mode(ui_mode)
|
460 |
+
self.show_labels = show_labels.copy()
|
461 |
+
cols = get_collections(repo_id, repo_limit, user_limit)
|
462 |
+
for col in cols:
|
463 |
+
self.add_item(col)
|
464 |
+
self.sort(sort)
|
465 |
+
except Exception as e:
|
466 |
+
raise Exception(f"Search error: {e}") from e
|
467 |
|
468 |
def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
|
469 |
try:
|
|
|
538 |
|
539 |
def id_to_md(df: pd.DataFrame, verbose=False):
|
540 |
columns = list(df.index)
|
541 |
+
if df["Type"] == "collection": id = f'### [{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
|
542 |
+
elif df["Type"] == "space": id = f'### [{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
543 |
+
elif df["Type"] == "paper": id = f'### [{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
544 |
+
else: id = f'### [{df["ID"]}]({df["URL"]}){df["Emoji"]}'
|
545 |
if verbose:
|
546 |
l = []
|
547 |
if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
|
|
|
559 |
if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
|
560 |
if "SDK" in columns: l.append(f'{df["SDK"]}')
|
561 |
if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
|
562 |
+
if len(l) > 0: id += f"\n({' '.join(l)})"
|
563 |
return id
|
564 |
|
565 |
+
def shorten_type(df: pd.DataFrame, shorten=False):
|
566 |
+
if shorten:
|
567 |
+
for k, v in TYPES_SHORT.items():
|
568 |
+
if df["Type"] == k: return v
|
569 |
+
|
570 |
def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
|
571 |
if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
|
572 |
else: return df["Emoji"]
|
|
|
580 |
|
581 |
def format_md_df(df: pd.DataFrame, verbose=False):
|
582 |
df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
|
583 |
+
df["Type"] = df.apply(shorten_type, axis=1, shorten=verbose)
|
584 |
return df
|
585 |
|
586 |
hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
|
|
|
686 |
return r.get_gr_df(), r.get_gr_hide_labels(), r
|
687 |
except Exception as e:
|
688 |
raise gr.Error(e)
|
689 |
+
|
690 |
+
def search_cols(repo_id: str, sort: str, show_labels: list, repo_limit: int, user_limit: int, ui_mode: str, r: HFSearchResult):
|
691 |
+
try:
|
692 |
+
if not repo_id: raise gr.Error("Input Repo ID or User ID")
|
693 |
+
r.search_collections(repo_id, sort, show_labels, repo_limit, user_limit, ui_mode)
|
694 |
+
return r.get_gr_df(), r.get_gr_hide_labels(), r
|
695 |
+
except Exception as e:
|
696 |
+
raise gr.Error(e)
|
697 |
+
|
698 |
def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
|
699 |
r.set_hide(hide_labels)
|
700 |
r.set_filter(filter_item1, filter1)
|