Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ st.title("The Stack data Inspection")
|
|
8 |
|
9 |
df = pd.read_csv("extension_distribution.csv")
|
10 |
all_extensions = df["extension"].tolist()
|
|
|
11 |
tags = {}
|
12 |
for index, row in df.iterrows():
|
13 |
if row["language"] not in tags:
|
@@ -18,26 +19,32 @@ all_languages = list(tags.keys())
|
|
18 |
|
19 |
@st.cache()
|
20 |
def load_data(language, ext):
|
21 |
-
ds = load_dataset(
|
|
|
|
|
|
|
|
|
22 |
return ds
|
23 |
|
24 |
-
|
|
|
25 |
with col1:
|
26 |
chosen_language = st.selectbox(
|
27 |
-
|
28 |
-
|
29 |
-
index=0)
|
30 |
with col2:
|
31 |
chosen_ext = st.selectbox(
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
|
|
|
36 |
samples = load_data(chosen_language, chosen_ext)
|
37 |
max_docs = len(samples)
|
38 |
samples = samples.add_column("idx", range(len(samples)))
|
39 |
-
not_lexed = samples.filter(lambda x: not x[
|
40 |
-
indexes_not_lexed = not_lexed[
|
|
|
41 |
|
42 |
# info about extension
|
43 |
st.markdown("### Information about the extension:")
|
@@ -46,23 +53,30 @@ text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == cho
|
|
46 |
are not lexable. These files are at indexes: {indexes_not_lexed}."
|
47 |
st.markdown(text)
|
48 |
|
49 |
-
col_1,
|
50 |
with col_1:
|
51 |
-
index_example = st.number_input(
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
# info about the chosen example
|
55 |
example = samples[index_example]
|
56 |
st.markdown("#### Information about the chosen example:")
|
57 |
-
text_alpha = "**has**" if example[
|
58 |
-
text_lines = "**has**" if example[
|
59 |
-
text_lexer = "is" if example[
|
60 |
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
st.markdown(f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
|
63 |
-
{text_lines} very long lines, and {text_lexer} lexable.")
|
64 |
|
|
|
65 |
st.markdown("#### File content:")
|
66 |
-
|
67 |
st.code(example["content"], language=chosen_language)
|
68 |
-
|
|
|
8 |
|
9 |
df = pd.read_csv("extension_distribution.csv")
|
10 |
all_extensions = df["extension"].tolist()
|
11 |
+
|
12 |
tags = {}
|
13 |
for index, row in df.iterrows():
|
14 |
if row["language"] not in tags:
|
|
|
19 |
|
20 |
@st.cache()
|
21 |
def load_data(language, ext):
|
22 |
+
ds = load_dataset(
|
23 |
+
"loubnabnl/the-stack-inspection-data",
|
24 |
+
data_dir=f"data/{language}/{ext}",
|
25 |
+
split="train",
|
26 |
+
)
|
27 |
return ds
|
28 |
|
29 |
+
|
30 |
+
col1, col2, _ = st.columns([1, 1, 4])
|
31 |
with col1:
|
32 |
chosen_language = st.selectbox(
|
33 |
+
label="Select a programming language", options=all_languages, index=0
|
34 |
+
)
|
|
|
35 |
with col2:
|
36 |
chosen_ext = st.selectbox(
|
37 |
+
label="Select an extension", options=tags[chosen_language], index=0
|
38 |
+
)
|
39 |
+
|
40 |
|
41 |
+
# load the dataset and get indexes of non lexable files
|
42 |
samples = load_data(chosen_language, chosen_ext)
|
43 |
max_docs = len(samples)
|
44 |
samples = samples.add_column("idx", range(len(samples)))
|
45 |
+
not_lexed = samples.filter(lambda x: not x["lexable"])
|
46 |
+
indexes_not_lexed = not_lexed["idx"]
|
47 |
+
|
48 |
|
49 |
# info about extension
|
50 |
st.markdown("### Information about the extension:")
|
|
|
53 |
are not lexable. These files are at indexes: {indexes_not_lexed}."
|
54 |
st.markdown(text)
|
55 |
|
56 |
+
col_1, _ = st.columns([2, 4])
|
57 |
with col_1:
|
58 |
+
index_example = st.number_input(
|
59 |
+
f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
|
60 |
+
min_value=0,
|
61 |
+
max_value=max_docs - 1,
|
62 |
+
value=0,
|
63 |
+
step=1,
|
64 |
+
)
|
65 |
|
66 |
|
67 |
# info about the chosen example
|
68 |
example = samples[index_example]
|
69 |
st.markdown("#### Information about the chosen example:")
|
70 |
+
text_alpha = "**has**" if example["long_lines"] else "doesn't have"
|
71 |
+
text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
|
72 |
+
text_lexer = "is" if example["lexable"] else "**isn't**"
|
73 |
|
74 |
+
st.markdown(
|
75 |
+
f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
|
76 |
+
{text_lines} very long lines, and {text_lexer} lexable."
|
77 |
+
)
|
78 |
|
|
|
|
|
79 |
|
80 |
+
# display file content
|
81 |
st.markdown("#### File content:")
|
|
|
82 |
st.code(example["content"], language=chosen_language)
|
|