TroglodyteDerivations's picture
Updated lines 65,57,54,16 with: st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3’) | st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2’) | st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4’) | st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1')
9412f88 verified
# bsky2M_emojis_streamlit.py
# Packages required
import streamlit as st
from datasets import load_dataset
import emoji
from dateutil import parser
from collections import Counter
import plotly.express as px
import pandas as pd
from collections import defaultdict
# Streamlit app title
st.title("Top 200 Most Frequent Emojis in Bluesky Posts")
st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1')
# Step 1: Load the Dataset
# Load the dataset
dataset = load_dataset("alpindale/two-million-bluesky-posts")
# Access the first split
data = dataset['train']
# Step 2: Extract Emojis from Text
def extract_emojis(text):
return [e['emoji'] for e in emoji.emoji_list(text)]
# Apply the function to the 'text' column and ensure the dataset is updated
data = data.map(lambda x: {"emojis": extract_emojis(x["text"])})
# Step 3: Convert created_ad to Datatime
# Convert 'created_at' to datetime
data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])})
# Step 4: Count Emoji Frequencies
# Flatten the list of emojis
all_emojis = [emoji for entry in data for emoji in entry["emojis"]]
# Count the frequency of each emoji
emoji_counts = Counter(all_emojis)
# Step 5: Visualize Emoji Frequencies
# Get the top 200 most common emojis
top_emojis = emoji_counts.most_common(200)
# Extract emojis and their counts
emojis, counts = zip(*top_emojis)
# Create a DataFrame for Plotly
df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts})
# Display the dataframe in the Streamlit app
st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4')
st.write('### Top 200 Emojis Dataframe')
st.dataframe(df)
st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2')
# Plot the bar chart
fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis')
fig.update_xaxes(title_text='Emojis')
fig.update_yaxes(title_text='Frequency')
# Display the plot in the Streamlit app
st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3')
st.plotly_chart(fig)