File size: 1,936 Bytes
e57c7ed
 
 
 
 
a4b5cd2
e57c7ed
a4b5cd2
e57c7ed
a4b5cd2
e57c7ed
 
 
 
 
 
 
 
a4b5cd2
 
e57c7ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4b5cd2
e57c7ed
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Core Pkgs
import streamlit as st 

# NLP Pkgs
import spacy_streamlit
import spacy
import re

nlp = spacy.load('en_docusco_spacy')

def pre_process(txt):
    txt = re.sub(r'\bits\b', 'it s', txt)
    txt = re.sub(r'\bIts\b', 'It s', txt)
    txt = " ".join(txt.split())
    return(txt)
    
#import os
#from PIL import Image


def main():

	st.title("DocuScope and Part-of-Speech Tagging with spaCy")
	st.markdown("This demo uses a trained spaCy model ([en_docusco_spacy](https://huggingface.co./browndw/en_docusco_spacy)) to identify DocuScope categories in text.")
	st.markdown("It is also trained on the [CLAWS7](https://ucrel.lancs.ac.uk/claws7tags.html) part-of-speech tagset.")
	st.markdown("NOTE: this demo is public - please don't enter confidential text")

        #our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
	#st.image(our_image)

	menu = ["Tokens","DocuScope"]
	choice = st.sidebar.selectbox("Menu",menu)

	if choice == "Tokens":
		st.subheader("Tokenization")
		raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
		docx = pre_process(raw_text)
		docx = nlp(docx)
		if st.button("Tokenize"):
			spacy_streamlit.visualize_tokens(docx,attrs=['text','tag_', 'ent_iob_', 'ent_type_'])

	elif choice == "DocuScope":
		st.subheader("Named Entity Recognition")
		raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
		docx = pre_process(raw_text)
		docx = nlp(docx)
		spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)


if __name__ == '__main__':
	main()