Upload 3 files
Browse files- hfconstants.py +15 -0
- hfsearch.py +31 -13
hfconstants.py
CHANGED
@@ -5,3 +5,18 @@ DS_SIZE_CATEGORIES = ["n<1K", "1K<n<10K", "10K<n<100K", "100K<n<1M", "1M<n<10M",
|
|
5 |
SPACE_HARDWARES = ["cpu-basic", "zero-a10g", "cpu-upgrade", "t4-small", "l4x1", "a10g-large", "l40sx1", "a10g-small", "t4-medium", "cpu-xl", "a100-large"]
|
6 |
|
7 |
SPACE_STAGES = ["RUNNING", "SLEEPING", "RUNTIME_ERROR", "PAUSED", "BUILD_ERROR", "CONFIG_ERROR", "BUILDING", "APP_STARTING", "RUNNING_APP_STARTING"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
SPACE_HARDWARES = ["cpu-basic", "zero-a10g", "cpu-upgrade", "t4-small", "l4x1", "a10g-large", "l40sx1", "a10g-small", "t4-medium", "cpu-xl", "a100-large"]
|
6 |
|
7 |
SPACE_STAGES = ["RUNNING", "SLEEPING", "RUNTIME_ERROR", "PAUSED", "BUILD_ERROR", "CONFIG_ERROR", "BUILDING", "APP_STARTING", "RUNNING_APP_STARTING"]
|
8 |
+
|
9 |
+
PIPELINES = ["text-generation", "text-to-image", "image-text-to-text", "fill-mask", "text-classification", "sentence-similarity", "automatic-speech-recognition",
|
10 |
+
"feature-extraction", "text-to-speech", "text2text-generation", "image-to-image", "text-to-video", "zero-shot-image-classification",
|
11 |
+
"image-classification", "image-to-video", "image-to-text", "token-classification", "translation", "time-series-forecasting", "audio-classification",
|
12 |
+
"object-detection", "audio-text-to-text", "zero-shot-classification", "text-to-audio", "image-segmentation", "image-feature-extraction",
|
13 |
+
"video-text-to-text", "image-to-3d", "any-to-any", "question-answering", "text-to-3d", "zero-shot-object-detection", "summarization",
|
14 |
+
"document-question-answering", "visual-question-answering", "depth-estimation", "voice-activity-detection", "audio-to-audio", "video-classification"]
|
15 |
+
|
16 |
+
EMOJIS = {
|
17 |
+
"Pipeline": {"text-to-image": "π¨", "image-to-image": "πΌοΈ", "text-to-speech": "π", "automatic-speech-recognition": "π€",
|
18 |
+
"text-to-audio": "π΅", "audio-text-to-text": "ποΈ",
|
19 |
+
"image-to-video": "ποΈ", "text-to-video": "π¬",
|
20 |
+
"text-generation": "π€", "text2text-generation": "π€"},
|
21 |
+
"SDK": {"docker": "π³"}
|
22 |
+
}
|
hfsearch.py
CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
|
|
8 |
import datetime
|
9 |
import json
|
10 |
import re
|
11 |
-
from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES
|
12 |
|
13 |
@spaces.GPU
|
14 |
def dummy_gpu():
|
@@ -29,8 +29,10 @@ RESULT_ITEMS = {
|
|
29 |
"LastMod.": [17, "str", True],
|
30 |
"Library": [20, "markdown", False],
|
31 |
"Pipeline": [21, "markdown", True],
|
|
|
32 |
"Hardware": [25, "str", False],
|
33 |
"Stage": [26, "str", False],
|
|
|
34 |
"NFAA": [40, "str", False],
|
35 |
}
|
36 |
|
@@ -265,14 +267,12 @@ class Labels():
|
|
265 |
return labels, label_types
|
266 |
|
267 |
def get_widths(self):
|
268 |
-
|
269 |
-
label_widths = [self.widths[s] for s in labels]
|
270 |
-
return label_widths
|
271 |
|
272 |
def get_null_value(self, type: str):
|
273 |
if type == "bool": return False
|
274 |
-
elif type == "number" or type == "date": return 0
|
275 |
-
else: return "
|
276 |
|
277 |
# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
|
278 |
# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
|
@@ -313,6 +313,7 @@ class HFSearchResult():
|
|
313 |
elif isinstance(i, Collection): type = "collection"
|
314 |
else: return
|
315 |
self._set(type, "Type")
|
|
|
316 |
if type in ["space", "model", "dataset"]:
|
317 |
self._set(i.id, "ID")
|
318 |
self._set(i.id.split("/")[0], "User")
|
@@ -333,9 +334,13 @@ class HFSearchResult():
|
|
333 |
if i.library_name is not None: self._set(i.library_name, "Library")
|
334 |
if i.pipeline_tag is not None: self._set(i.pipeline_tag, "Pipeline")
|
335 |
if type == "space":
|
|
|
336 |
if i.runtime is not None:
|
337 |
self._set(i.runtime.hardware, "Hardware")
|
338 |
self._set(i.runtime.stage, "Stage")
|
|
|
|
|
|
|
339 |
elif type == "paper": # https://github.com/huggingface/huggingface_hub/blob/v0.27.0/src/huggingface_hub/hf_api.py#L1428
|
340 |
self._set(i.id, "ID")
|
341 |
self._set(f"https://hf.co/papers/{i.id}", "URL")
|
@@ -479,8 +484,8 @@ class HFSearchResult():
|
|
479 |
sdf.loc[df["Gated"] == "auto", ["Gated"]] = 'color: dodgerblue'
|
480 |
sdf.loc[df["Gated"] == "manual", ["Gated"]] = 'color: crimson'
|
481 |
if "Stage" in columns and "Hardware" in columns:
|
482 |
-
sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] != "zero-a10g") & (df["Hardware"] != "cpu-basic") & (df["Hardware"]
|
483 |
-
sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] == "zero-a10g"), ["Hardware", "Type"]] = 'color:
|
484 |
sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING")] = 'opacity: 0.5'
|
485 |
sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING"), ["Type"]] = 'color: crimson'
|
486 |
sdf.loc[df["Stage"] == "RUNNING", ["Stage"]] = 'color: lime'
|
@@ -492,16 +497,28 @@ class HFSearchResult():
|
|
492 |
return sdf
|
493 |
|
494 |
def id_to_md(df: pd.DataFrame):
|
495 |
-
if df["Type"] == "collection": return f'[{df["User"]}
|
496 |
-
elif df["Type"] == "
|
497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
499 |
def format_md_df(df: pd.DataFrame):
|
500 |
df["ID"] = df.apply(id_to_md, axis=1)
|
501 |
return df
|
502 |
|
503 |
hide_labels = [l for l in labels if l in self.hide_labels or l not in self.show_labels]
|
504 |
-
df = format_md_df(pd.DataFrame(dflist, columns=labels))
|
505 |
ref_df = df.copy()
|
506 |
df = df.drop(hide_labels, axis=1).style.apply(highlight_df, axis=None, df=ref_df)
|
507 |
return df
|
@@ -554,7 +571,8 @@ class HFSearchResult():
|
|
554 |
def get_gr_df(self):
|
555 |
df, labels, label_types = self.get()
|
556 |
widths = self.labels.get_widths()
|
557 |
-
|
|
|
558 |
|
559 |
def get_gr_hide_labels(self):
|
560 |
return gr.update(choices=self.labels.get()[0], value=[], visible=True)
|
|
|
8 |
import datetime
|
9 |
import json
|
10 |
import re
|
11 |
+
from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES, EMOJIS
|
12 |
|
13 |
@spaces.GPU
|
14 |
def dummy_gpu():
|
|
|
29 |
"LastMod.": [17, "str", True],
|
30 |
"Library": [20, "markdown", False],
|
31 |
"Pipeline": [21, "markdown", True],
|
32 |
+
"SDK": [24, "str", False],
|
33 |
"Hardware": [25, "str", False],
|
34 |
"Stage": [26, "str", False],
|
35 |
+
"Emoji": [35, "str", False],
|
36 |
"NFAA": [40, "str", False],
|
37 |
}
|
38 |
|
|
|
267 |
return labels, label_types
|
268 |
|
269 |
def get_widths(self):
|
270 |
+
return self.widths.copy()
|
|
|
|
|
271 |
|
272 |
def get_null_value(self, type: str):
|
273 |
if type == "bool": return False
|
274 |
+
elif type == "number" or type == "date": return 0 #
|
275 |
+
else: return ""
|
276 |
|
277 |
# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
|
278 |
# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
|
|
|
313 |
elif isinstance(i, Collection): type = "collection"
|
314 |
else: return
|
315 |
self._set(type, "Type")
|
316 |
+
self._set("", "Emoji")
|
317 |
if type in ["space", "model", "dataset"]:
|
318 |
self._set(i.id, "ID")
|
319 |
self._set(i.id.split("/")[0], "User")
|
|
|
334 |
if i.library_name is not None: self._set(i.library_name, "Library")
|
335 |
if i.pipeline_tag is not None: self._set(i.pipeline_tag, "Pipeline")
|
336 |
if type == "space":
|
337 |
+
if i.sdk is not None: self._set(i.sdk, "SDK")
|
338 |
if i.runtime is not None:
|
339 |
self._set(i.runtime.hardware, "Hardware")
|
340 |
self._set(i.runtime.stage, "Stage")
|
341 |
+
if i.card_data is not None:
|
342 |
+
card = i.card_data
|
343 |
+
if card.title is not None: self._set(card.title, "Name")
|
344 |
elif type == "paper": # https://github.com/huggingface/huggingface_hub/blob/v0.27.0/src/huggingface_hub/hf_api.py#L1428
|
345 |
self._set(i.id, "ID")
|
346 |
self._set(f"https://hf.co/papers/{i.id}", "URL")
|
|
|
484 |
sdf.loc[df["Gated"] == "auto", ["Gated"]] = 'color: dodgerblue'
|
485 |
sdf.loc[df["Gated"] == "manual", ["Gated"]] = 'color: crimson'
|
486 |
if "Stage" in columns and "Hardware" in columns:
|
487 |
+
sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] != "zero-a10g") & (df["Hardware"] != "cpu-basic") & (df["Hardware"]), ["Hardware", "Type"]] = 'color: lime'
|
488 |
+
sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] == "zero-a10g"), ["Hardware", "Type"]] = 'color: limegreen'
|
489 |
sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING")] = 'opacity: 0.5'
|
490 |
sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING"), ["Type"]] = 'color: crimson'
|
491 |
sdf.loc[df["Stage"] == "RUNNING", ["Stage"]] = 'color: lime'
|
|
|
497 |
return sdf
|
498 |
|
499 |
def id_to_md(df: pd.DataFrame):
|
500 |
+
if df["Type"] == "collection": return f'[{df["User"]}/{df["Name"]}]({df["URL"]}){df["Emoji"]}'
|
501 |
+
elif df["Type"] == "space": return f'[{df["Name"]} ({df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
502 |
+
elif df["Type"] == "paper": return f'[{df["Name"]} (arxiv:{df["ID"]})]({df["URL"]}){df["Emoji"]}'
|
503 |
+
else: return f'[{df["ID"]}]({df["URL"]}){df["Emoji"]}'
|
504 |
+
|
505 |
+
def to_emoji(df: pd.DataFrame, label: str, key: str, emoji: str):
|
506 |
+
if df[label] == key: return f'{df["Emoji"]}{emoji}' if df["Emoji"] else f' {emoji}'
|
507 |
+
else: return df["Emoji"]
|
508 |
+
|
509 |
+
def apply_emoji_df(df: pd.DataFrame):
|
510 |
+
for label, v in EMOJIS.items():
|
511 |
+
if label not in df.columns: continue
|
512 |
+
for key, emoji in v.items():
|
513 |
+
df["Emoji"] = df.apply(to_emoji, axis=1, label=label, key=key, emoji=emoji)
|
514 |
+
return df
|
515 |
|
516 |
def format_md_df(df: pd.DataFrame):
|
517 |
df["ID"] = df.apply(id_to_md, axis=1)
|
518 |
return df
|
519 |
|
520 |
hide_labels = [l for l in labels if l in self.hide_labels or l not in self.show_labels]
|
521 |
+
df = format_md_df(apply_emoji_df(pd.DataFrame(dflist, columns=labels)))
|
522 |
ref_df = df.copy()
|
523 |
df = df.drop(hide_labels, axis=1).style.apply(highlight_df, axis=None, df=ref_df)
|
524 |
return df
|
|
|
571 |
def get_gr_df(self):
|
572 |
df, labels, label_types = self.get()
|
573 |
widths = self.labels.get_widths()
|
574 |
+
column_widths = [widths[l] for l in labels]
|
575 |
+
return gr.update(type="pandas", value=df, headers=labels, datatype=label_types, column_widths=column_widths, wrap=True)
|
576 |
|
577 |
def get_gr_hide_labels(self):
|
578 |
return gr.update(choices=self.labels.get()[0], value=[], visible=True)
|