Spaces:
Sleeping
Sleeping
cache filtering
Browse files
app.py
CHANGED
@@ -17,15 +17,19 @@ for index, row in df.iterrows():
|
|
17 |
all_languages = list(tags.keys())
|
18 |
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
ds = load_dataset(
|
24 |
"loubnabnl/the-stack-inspection-data",
|
25 |
data_dir=f"data/{language}/{ext}",
|
26 |
split="train",
|
27 |
)
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
col1, col2, _ = st.columns([1, 1, 4])
|
31 |
with col1:
|
@@ -45,15 +49,7 @@ max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 500,
|
|
45 |
st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
|
46 |
`alphanumeric_fraction` is smaller than the selected value.")
|
47 |
|
48 |
-
|
49 |
-
samples = load_data(chosen_language, chosen_ext)
|
50 |
-
|
51 |
-
samples = samples.filter(lambda x: x["alphanum_fraction"] < min_alphanum)
|
52 |
-
samples = samples.filter(lambda x: x["max_line_length"] > max_line_length)
|
53 |
-
samples = samples.filter(lambda x: x["avg_line_length"] > max_mean_line_length)
|
54 |
-
|
55 |
-
if not_lexable:
|
56 |
-
samples = samples.filter(lambda x: not x["lexable"])
|
57 |
|
58 |
max_docs = len(samples)
|
59 |
|
|
|
17 |
all_languages = list(tags.keys())
|
18 |
|
19 |
|
20 |
+
@st.cache(max_entries=100)
|
21 |
+
def load_data(language, ext, min_alphanum, max_line_length, max_mean_line_length, non_lexable):
|
22 |
+
samples = load_dataset(
|
|
|
23 |
"loubnabnl/the-stack-inspection-data",
|
24 |
data_dir=f"data/{language}/{ext}",
|
25 |
split="train",
|
26 |
)
|
27 |
+
samples = samples.filter(lambda x: x["alphanum_fraction"] < min_alphanum)
|
28 |
+
samples = samples.filter(lambda x: x["max_line_length"] > max_line_length)
|
29 |
+
samples = samples.filter(lambda x: x["avg_line_length"] > max_mean_line_length)
|
30 |
+
if non_lexable:
|
31 |
+
samples = samples.filter(lambda x: not x["lexable"])
|
32 |
+
return samples
|
33 |
|
34 |
col1, col2, _ = st.columns([1, 1, 4])
|
35 |
with col1:
|
|
|
49 |
st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
|
50 |
`alphanumeric_fraction` is smaller than the selected value.")
|
51 |
|
52 |
+
samples = load_data(chosen_language, chosen_ext, min_alphanum, max_line_length, max_mean_line_length, not_lexable)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
max_docs = len(samples)
|
55 |
|