Spaces:
Sleeping
Sleeping
""" | |
This code was adapted from https://huggingface.co./spaces/HugoLaurencon/examples_before_after_pii/ | |
""" | |
import streamlit as st | |
import json | |
import pandas as pd | |
st.set_page_config(page_title="PII Visualization", layout="wide") | |
st.title("PII Visualization") | |
tags = ["KEY", "IP_ADDRESS", "EMAIL"] | |
types = ["False positives", "False negatives"] | |
matches = {"False negatives": "fn", "False positives": "fp"} | |
def load_data(): | |
with open(f"data/{chosen_tag.lower()}_detections_{matches[chosen_type]}.json", "r") as f: | |
samples = json.load(f) | |
return samples | |
col1, col2, col3 = st.columns([1, 1, 4]) | |
with col1: | |
chosen_type = st.selectbox( | |
label="Select the type of detections", | |
options=types, | |
index=0) | |
with col2: | |
chosen_tag = st.selectbox( | |
label="Select the PII TAG", | |
options=tags, | |
index=0) | |
samples = load_data() | |
max_docs = len(samples) | |
col1, col2 = st.columns([2, 4]) | |
with col1: | |
index_example = st.number_input(f"Index of the chosen example from the existing {max_docs}", min_value=0, max_value=max_docs-1, value=0, step=1) | |
st.write("Scroll down to visualize PII detections highlighted in yellow, we split the text at the start and end of the key to highlight it.") | |
detection = samples[index_example] | |
delimiter = f"PI:{matches[chosen_type].upper()}" | |
count = detection.count(delimiter) | |
st.subheader(f"{count} {chosen_type.lower()} for {chosen_tag} tag in example {index_example}:") | |
subparts = [] | |
advance, found = 0, 0 | |
last_part = detection | |
while found < count: | |
start = advance + last_part.index(delimiter) | |
end = advance + last_part.index("END_PI")+ 6 | |
st.code(detection[advance:start]) | |
st.markdown("<span style=\"background-color: #FFFF00\">"+detection[start:end]+"</span>", unsafe_allow_html=True) | |
last_part = detection[end:] | |
advance = end | |
found += 1 | |
st.code(last_part) | |