John6666 commited on
Commit
a9c10db
·
verified ·
1 Parent(s): 12b1fe6

Upload 8 files

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +21 -5
  3. hfsearch.py +49 -6
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗🔍
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.11.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -2,7 +2,7 @@ import spaces
2
  import gradio as gr
3
  from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
4
  get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
5
- search_ref_repos, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
6
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
7
 
8
  CSS = """
@@ -14,15 +14,14 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
14
  gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
15
  with gr.Column():
16
  search_result = gr.State(value=HFSearchResult())
17
- ui_mode = gr.Radio(label="Mode", choices=["PC", "Phone"], value="Phone")
18
- with gr.Tab("Normal Search"):
19
  with gr.Group():
20
  with gr.Row(equal_height=True):
21
  repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
22
  filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
23
  sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
24
-
25
  with gr.Accordion("Advanced", open=False):
 
26
  with gr.Row(equal_height=True):
27
  search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
28
  #author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
@@ -93,6 +92,19 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
93
  with gr.Row(equal_height=True):
94
  rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
95
  rec_run_button = gr.Button("Search", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  with gr.Group():
97
  with gr.Accordion("Filter", open=False):
98
  hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
@@ -100,6 +112,7 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
100
  filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
101
  filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
102
  filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
 
103
  result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
104
 
105
  run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
@@ -119,5 +132,8 @@ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
119
  gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
120
  inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
121
  outputs=[result_df, hide_labels, search_result])
 
 
 
122
 
123
- demo.queue().launch()
 
2
  import gradio as gr
3
  from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
4
  get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
5
+ search_ref_repos, search_cols, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, TYPES_DESC)
6
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
7
 
8
  CSS = """
 
14
  gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
15
  with gr.Column():
16
  search_result = gr.State(value=HFSearchResult())
17
+ with gr.Tab("Basic Search"):
 
18
  with gr.Group():
19
  with gr.Row(equal_height=True):
20
  repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space", "collection"], value=["model", "dataset", "space"])
21
  filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
22
  sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
 
23
  with gr.Accordion("Advanced", open=False):
24
+ ui_mode = gr.Radio(label="Mode", choices=["PC", "Phone"], value="Phone")
25
  with gr.Row(equal_height=True):
26
  search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
27
  #author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
 
92
  with gr.Row(equal_height=True):
93
  rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
94
  rec_run_button = gr.Button("Search", variant="primary")
95
+ with gr.Tab("Find collections"):
96
+ with gr.Group():
97
+ with gr.Row(equal_height=True):
98
+ #rec_repo_id = gr.Textbox(label="Repo ID", info="Input your favorite repo", value="")
99
+ col_repo_id = HuggingfaceHubSearch(label="Repo ID or User ID", placeholder="Input your favorite Repo ID or User ID", search_type=["model", "dataset", "space", "user"],
100
+ sumbit_on_select=False)
101
+ col_sort = gr.Radio(label="Sort", choices=["last_modified", "trending_score", "likes"], value="likes")
102
+ with gr.Row(equal_height=True):
103
+ col_repo_limit = gr.Number(label="Limit for repos", value=50, step=1, minimum=1, maximum=100)
104
+ col_user_limit = gr.Number(label="Limit for users", value=10, step=1, minimum=0, maximum=50)
105
+ with gr.Accordion("Advanced", open=False):
106
+ col_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
107
+ col_run_button = gr.Button("Search", variant="primary")
108
  with gr.Group():
109
  with gr.Accordion("Filter", open=False):
110
  hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
 
112
  filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
113
  filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
114
  filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
115
+ types_desc = gr.Markdown(TYPES_DESC, elem_classes="info")
116
  result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
117
 
118
  run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
 
132
  gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos,
133
  inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, ui_mode, search_result],
134
  outputs=[result_df, hide_labels, search_result])
135
+ gr.on(triggers=[col_run_button.click, col_repo_id.submit], fn=search_cols,
136
+ inputs=[col_repo_id, col_sort, col_show_labels, col_repo_limit, col_user_limit, ui_mode, search_result],
137
+ outputs=[result_df, hide_labels, search_result])
138
 
139
+ demo.queue().launch(ssr_mode=False)
hfsearch.py CHANGED
@@ -14,6 +14,10 @@ from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, SPACE
14
  def dummy_gpu():
15
  pass
16
 
 
 
 
 
17
  RESULT_ITEMS = {
18
  "Type": [1, "str", True],
19
  "ID": [2, "markdown", True, "40%"],
@@ -224,6 +228,19 @@ def get_ref_collections(repo_id: str, limit=10):
224
  print(e)
225
  raise Exception(e)
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def str_to_list(s: str):
228
  try:
229
  m = re.split("\n", s)
@@ -435,6 +452,18 @@ class HFSearchResult():
435
  self.sort(sort)
436
  except Exception as e:
437
  raise Exception(f"Search error: {e}") from e
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
440
  try:
@@ -509,10 +538,10 @@ class HFSearchResult():
509
 
510
  def id_to_md(df: pd.DataFrame, verbose=False):
511
  columns = list(df.index)
512
- if df["Type"] == "collection": id = f'[{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
513
- elif df["Type"] == "space": id = f'[{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
514
- elif df["Type"] == "paper": id = f'[{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
515
- else: id = f'[{df["ID"]}]({df["URL"]}){df["Emoji"]}'
516
  if verbose:
517
  l = []
518
  if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
@@ -530,9 +559,14 @@ class HFSearchResult():
530
  if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
531
  if "SDK" in columns: l.append(f'{df["SDK"]}')
532
  if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
533
- if len(l) > 0: id += f" ({' '.join(l)})"
534
  return id
535
 
 
 
 
 
 
536
  def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
537
  if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
538
  else: return df["Emoji"]
@@ -546,6 +580,7 @@ class HFSearchResult():
546
 
547
  def format_md_df(df: pd.DataFrame, verbose=False):
548
  df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
 
549
  return df
550
 
551
  hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
@@ -651,7 +686,15 @@ def search_ref_repos(repo_id: str, repo_types: list, sort: str, show_labels: lis
651
  return r.get_gr_df(), r.get_gr_hide_labels(), r
652
  except Exception as e:
653
  raise gr.Error(e)
654
-
 
 
 
 
 
 
 
 
655
  def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
656
  r.set_hide(hide_labels)
657
  r.set_filter(filter_item1, filter1)
 
14
  def dummy_gpu():
15
  pass
16
 
17
+ TYPES_SHORT = {"model": "M", "dataset": "D", "space": "S", "paper": "P", "collection": "C"}
18
+
19
+ TYPES_DESC = " / ".join([f"{v}={k}" for k, v in zip(list(TYPES_SHORT.keys()), list(TYPES_SHORT.values()))])
20
+
21
  RESULT_ITEMS = {
22
  "Type": [1, "str", True],
23
  "ID": [2, "markdown", True, "40%"],
 
228
  print(e)
229
  raise Exception(e)
230
 
231
+ def get_collections(repo_id: str, repo_limit: int=100, user_limit: int=0):
232
+ try:
233
+ if "/" in repo_id: # Repo ID
234
+ repo_type = get_repo_type(repo_id)
235
+ likers = get_repo_likers(repo_id, repo_type)[0:user_limit+1]
236
+ cols = get_collections_by_repo(repo_id, repo_type, repo_limit) + get_collections_by_users(likers, 50)
237
+ else: cols = get_collections_by_users([repo_id], 50) # User ID
238
+ cols = list({k.slug: k for k in cols}.values())
239
+ return cols
240
+ except Exception as e:
241
+ print(e)
242
+ raise Exception(e)
243
+
244
  def str_to_list(s: str):
245
  try:
246
  m = re.split("\n", s)
 
452
  self.sort(sort)
453
  except Exception as e:
454
  raise Exception(f"Search error: {e}") from e
455
+
456
+ def search_collections(self, repo_id: str, sort: str, show_labels: list, repo_limit: int=100, user_limit: int=0, ui_mode="PC"):
457
+ try:
458
+ self.reset()
459
+ self.set_mode(ui_mode)
460
+ self.show_labels = show_labels.copy()
461
+ cols = get_collections(repo_id, repo_limit, user_limit)
462
+ for col in cols:
463
+ self.add_item(col)
464
+ self.sort(sort)
465
+ except Exception as e:
466
+ raise Exception(f"Search error: {e}") from e
467
 
468
  def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10, ui_mode="PC"):
469
  try:
 
538
 
539
  def id_to_md(df: pd.DataFrame, verbose=False):
540
  columns = list(df.index)
541
+ if df["Type"] == "collection": id = f'### [{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
542
+ elif df["Type"] == "space": id = f'### [{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
543
+ elif df["Type"] == "paper": id = f'### [{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
544
+ else: id = f'### [{df["ID"]}]({df["URL"]}){df["Emoji"]}'
545
  if verbose:
546
  l = []
547
  if "NFAA" in columns and df["NFAA"] == "True": l.append('🤐')
 
559
  if "Hardware" in columns and df["Hardware"] in SPACE_HARDWARES and df["Hardware"] != "cpu-basic": l.append(f'{df["Hardware"]}')
560
  if "SDK" in columns: l.append(f'{df["SDK"]}')
561
  if "Stage" in columns and df["Stage"] in SPACE_STAGES_EMOJI.keys(): l.append(f'{SPACE_STAGES_EMOJI[df["Stage"]]}')
562
+ if len(l) > 0: id += f"\n({' '.join(l)})"
563
  return id
564
 
565
+ def shorten_type(df: pd.DataFrame, shorten=False):
566
+ if shorten:
567
+ for k, v in TYPES_SHORT.items():
568
+ if df["Type"] == k: return v
569
+
570
  def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
571
  if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
572
  else: return df["Emoji"]
 
580
 
581
  def format_md_df(df: pd.DataFrame, verbose=False):
582
  df["ID"] = df.apply(id_to_md, axis=1, verbose=verbose)
583
+ df["Type"] = df.apply(shorten_type, axis=1, shorten=verbose)
584
  return df
585
 
586
  hide_labels = [l for l in labels if l in self.hide_labels or l not in self.get_show_labels()]
 
686
  return r.get_gr_df(), r.get_gr_hide_labels(), r
687
  except Exception as e:
688
  raise gr.Error(e)
689
+
690
+ def search_cols(repo_id: str, sort: str, show_labels: list, repo_limit: int, user_limit: int, ui_mode: str, r: HFSearchResult):
691
+ try:
692
+ if not repo_id: raise gr.Error("Input Repo ID or User ID")
693
+ r.search_collections(repo_id, sort, show_labels, repo_limit, user_limit, ui_mode)
694
+ return r.get_gr_df(), r.get_gr_hide_labels(), r
695
+ except Exception as e:
696
+ raise gr.Error(e)
697
+
698
  def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
699
  r.set_hide(hide_labels)
700
  r.set_filter(filter_item1, filter1)