browndw's picture
Update app.py
e57c7ed
# Core Pkgs
import streamlit as st
# NLP Pkgs
import spacy_streamlit
import spacy
import re
nlp = spacy.load('en_docusco_spacy')
def pre_process(txt):
txt = re.sub(r'\bits\b', 'it s', txt)
txt = re.sub(r'\bIts\b', 'It s', txt)
txt = " ".join(txt.split())
return(txt)
#import os
#from PIL import Image
def main():
st.title("DocuScope and Part-of-Speech Tagging with spaCy")
st.markdown("This demo uses a trained spaCy model ([en_docusco_spacy](https://huggingface.co./browndw/en_docusco_spacy)) to identify DocuScope categories in text.")
st.markdown("It is also trained on the [CLAWS7](https://ucrel.lancs.ac.uk/claws7tags.html) part-of-speech tagset.")
st.markdown("NOTE: this demo is public - please don't enter confidential text")
#our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
#st.image(our_image)
menu = ["Tokens","DocuScope"]
choice = st.sidebar.selectbox("Menu",menu)
if choice == "Tokens":
st.subheader("Tokenization")
raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
docx = pre_process(raw_text)
docx = nlp(docx)
if st.button("Tokenize"):
spacy_streamlit.visualize_tokens(docx,attrs=['text','tag_', 'ent_iob_', 'ent_type_'])
elif choice == "DocuScope":
st.subheader("Named Entity Recognition")
raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
docx = pre_process(raw_text)
docx = nlp(docx)
spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
if __name__ == '__main__':
main()