File size: 3,519 Bytes
1950194 fc1d4ac 1950194 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import streamlit as st
import numpy as np
import transformers
import re
import string
import preprocessor as pre
import torch
from transformers import BertTokenizer, BertForSequenceClassification
with open("style.css") as f:
st.markdown('<style>{}</style>'.format(f.read()), unsafe_allow_html=True)
# Preparation model and tokenizer
model_path = "ninahf1503/SA-BERTchatgptapp"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path, ignore_mismatched_sizes=True )
# Define the maximum sequence length
seq_max_length = 55
# Function to tokenizing input text
def tokenizing_text(sentence):
sentence = preprocess_text(sentence)
encoded = tokenizer.encode_plus(
sentence,
add_special_tokens=True,
max_length=seq_max_length,
truncation=True,
padding='max_length',
return_tensors='pt'
)
input_ids = encoded['input_ids']
attention_mask = encoded['attention_mask']
return input_ids, attention_mask
# Function to preprocessing input text
def preprocess_text(sentence):
re_cleansing = "@\S+|https?:\S+|http?:\S|#[A-Za-z0-9]+|^RT[\s]+|(^|\W)\d+"
for punctuation in string.punctuation:
sentence = sentence.encode().decode('unicode_escape')
sentence = re.sub(r'\n', ' ', sentence)
sentence = pre.clean(sentence)
sentence = re.sub(r'[^\w\s]', ' ', sentence)
sentence = re.sub(r'[0-9]', ' ', sentence)
sentence = re.sub(re_cleansing, ' ', sentence).strip()
sentence = sentence.replace(punctuation, '')
sentence = sentence.lower()
return sentence
# Function to predict sentiment
def predict_sentiment(input_text):
input_ids, attention_mask = tokenizing_text(input_text)
with torch.no_grad():
outputs = model(input_ids, attention_mask)
logits = outputs.logits
predict_class = torch.argmax(logits, dim=1).item()
label_sentiment = {0: "Bad", 1: "Good", 2: "Neutral"}
predict_label = label_sentiment[predict_class]
return predict_label
# Streamlit web app
def main():
st.title("Sentimen Analysis", anchor=False)
tweet_text = st.text_area(" ", placeholder="Enter the sentence you want to analyze", label_visibility="collapsed")
if st.button("SUBMIT"):
if tweet_text.strip() == "":
st.title("Text Input Still Empty", anchor=False)
st.info("Please fill in the sentence you want to analyze")
else:
sentiment = predict_sentiment(tweet_text)
if sentiment == "Good":
st.title("Sentiment Analysis Results", anchor=False)
st.markdown('<div style="background-color: #5d9c59; padding: 16px; border-radius: 5px; font-weight: bold; color:white;">This sentence contains a positive sentiment</div>', unsafe_allow_html=True)
elif sentiment == "Bad":
st.title("Sentiment Analysis Results", anchor=False)
st.markdown('<div style="background-color: #df2e38; padding: 16px; border-radius: 5px; font-weight: bold; color:white;">This sentence contains a negative sentiment</div>', unsafe_allow_html=True)
else:
st.title("Sentiment Analysis Results", anchor=False)
st.markdown('<div style="background-color: #ffa500; padding: 16px; border-radius: 5px; font-weight: bold; color:white;">This sentence is neutral</div>', unsafe_allow_html=True)
if __name__ == "__main__":
main()
|