Spaces:
Runtime error
Runtime error
File size: 2,439 Bytes
846b4a5 8aacbaa 846b4a5 e3ab28a 846b4a5 8aacbaa 846b4a5 e2a4e07 e3ab28a e2a4e07 846b4a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import time
import uuid
import streamlit as st
import nltk
from helpers.entity_extraction_helpers import process_insurance_document
from services.pii_service import PIIService
from services.openai_service import OpenAIService
from services.mongo_service import MongoService
from services.ocr_service import OCRService
def init_session():
print("------------------ Initializing")
if 'a' not in st.session_state:
st.session_state['pii_instance'] = PIIService()
print("PII service initialized")
time.sleep(2)
st.session_state['openai_instance'] = OpenAIService(st.secrets["OPENAI_KEY"],
st.secrets["OPENAI_AZURE_ENDPOINT"],
st.secrets["OPENAI_API_VERSION"],
st.secrets["DEPLOYMENT_NAME"])
print("OpenAI service initialized")
time.sleep(2)
st.session_state['ocr_instance'] = OCRService(st.secrets["OCR_API_KEY"])
print("OCR service initialized")
st.session_state.a = 1
nltk.downloader.download('maxent_ne_chunker')
nltk.downloader.download('words')
nltk.downloader.download('treebank')
nltk.downloader.download('maxent_treebank_pos_tagger')
nltk.downloader.download('punkt')
nltk.download('averaged_perceptron_tagger')
print("-----------------------------")
st.header('', divider='rainbow')
st.title("Data extraction")
st.header('', divider='rainbow')
init_session()
uploaded_doc = st.file_uploader("Upload an insurance document", type=["pdf"])
if uploaded_doc is not None:
with open(uploaded_doc.name,"wb") as f:
f.write(uploaded_doc.getbuffer())
document_id = str(uuid.uuid4())
print(f"File uploaded :: {uploaded_doc.name} :: {document_id}")
process_out = process_insurance_document(st.session_state['pii_instance'], "", st.session_state['openai_instance'],
st.session_state['ocr_instance'] , uploaded_doc.name, document_id)
st.header('Extracted entities !! ', divider='rainbow')
st.write(process_out['entities'])
st.header('', divider='rainbow')
st.header('Identified personal entities ', divider='rainbow')
st.write(process_out['masked_entities'])
st.header('Masked Text ', divider='rainbow')
st.write(process_out['masked_text'])
### TO RUN :: streamlit run ui_app.py |