Updated lines 65,57,54,16 with: st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3β) | st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2β) | st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4β) | st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1')
9412f88
verified
# bsky2M_emojis_streamlit.py | |
# Packages required | |
import streamlit as st | |
from datasets import load_dataset | |
import emoji | |
from dateutil import parser | |
from collections import Counter | |
import plotly.express as px | |
import pandas as pd | |
from collections import defaultdict | |
# Streamlit app title | |
st.title("Top 200 Most Frequent Emojis in Bluesky Posts") | |
st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1') | |
# Step 1: Load the Dataset | |
# Load the dataset | |
dataset = load_dataset("alpindale/two-million-bluesky-posts") | |
# Access the first split | |
data = dataset['train'] | |
# Step 2: Extract Emojis from Text | |
def extract_emojis(text): | |
return [e['emoji'] for e in emoji.emoji_list(text)] | |
# Apply the function to the 'text' column and ensure the dataset is updated | |
data = data.map(lambda x: {"emojis": extract_emojis(x["text"])}) | |
# Step 3: Convert created_ad to Datatime | |
# Convert 'created_at' to datetime | |
data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])}) | |
# Step 4: Count Emoji Frequencies | |
# Flatten the list of emojis | |
all_emojis = [emoji for entry in data for emoji in entry["emojis"]] | |
# Count the frequency of each emoji | |
emoji_counts = Counter(all_emojis) | |
# Step 5: Visualize Emoji Frequencies | |
# Get the top 200 most common emojis | |
top_emojis = emoji_counts.most_common(200) | |
# Extract emojis and their counts | |
emojis, counts = zip(*top_emojis) | |
# Create a DataFrame for Plotly | |
df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts}) | |
# Display the dataframe in the Streamlit app | |
st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4') | |
st.write('### Top 200 Emojis Dataframe') | |
st.dataframe(df) | |
st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2') | |
# Plot the bar chart | |
fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis') | |
fig.update_xaxes(title_text='Emojis') | |
fig.update_yaxes(title_text='Frequency') | |
# Display the plot in the Streamlit app | |
st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3') | |
st.plotly_chart(fig) | |