import streamlit as st import tensorflow as tf import numpy as np import transformers from transformers import AutoTokenizer,TFBertForSequenceClassification import re import string import preprocessor as p from tensorflow import keras # Load tokenizer tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased") # Define the maximum sequence length max_seq = 110 # Function to preprocess the data def preprocess_data(data): data = data.tolist() # Convert numpy array to list processed_data = [] for sentence in data: sentence = text_preprocess(sentence) encoded_data = tokenizer.encode_plus( sentence, add_special_tokens=True, max_length=max_seq, padding="max_length", truncation=True, return_tensors="tf" ) processed_data.append((encoded_data['input_ids'], encoded_data['attention_mask'])) return processed_data # Function to preprocess the sentence def text_preprocess(sentence): pattern = r'[0-9]' for punctuation in string.punctuation: sentence = p.clean(sentence) sentence = re.sub(r'[^a-zA-Z0-9\s]', '', sentence) sentence = re.sub(r'http[s]?://\S+', '', sentence) sentence = sentence.replace(punctuation, '') sentence = re.sub(pattern, '', sentence) sentence = re.sub(r'\r?\n|\r', '', sentence) sentence = sentence.encode('ascii', 'ignore').decode('ascii') sentence = sentence.lower() return sentence # Function to perform sentiment prediction def predict_sentiment(sentence): preprocessed_sentence = preprocess_data(np.array([sentence])) input_ids, attention_mask = preprocessed_sentence[0] prediction = model.predict([input_ids, attention_mask]) predicted_label = np.argmax(prediction) label_mapping = {0: "negative", 1: "neutral", 2: "positive"} predicted_label = label_mapping[predicted_label] return predicted_label # Streamlit app def main(): st.title("Analisis Sentimen Berbahasa Indonesia") sentence = st.text_input("Masukkan teks disini:") if st.button("Cek Kalimat"): st.write("Hasil Klasifikasi:") sentiment = predict_sentiment(sentence) if sentiment == "positive": st.markdown('
Sentiment: positive
', unsafe_allow_html=True) elif sentiment == "negative": st.markdown('
Sentiment: negative
', unsafe_allow_html=True) elif sentiment == "neutral": st.markdown('
Sentiment: neutral
', unsafe_allow_html=True) if __name__ == '__main__': # Register the custom objects using custom_object_scope with keras.utils.custom_object_scope({'TFBertForSequenceClassification': transformers.TFBertForSequenceClassification}): # Load the saved model model = TFBertForSequenceClassification.from_pretrained('muhfrrazi/IndoBERT-Sentiment-Analysist_Dataset-Indonesia') main()