Spaces:
Running
Running
File size: 2,563 Bytes
f560e5e 24d0209 f560e5e 24d0209 f560e5e 849c08d f560e5e 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 bc37cf0 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d 24d0209 849c08d f560e5e 849c08d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st # type: ignore
from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
from parse import parse_with_groq
# Streamlit UI with sidebar
st.set_page_config(page_title="AI Web Scraping App", page_icon="π")
st.sidebar.title("π Model Selection")
selected_model = st.sidebar.selectbox(
"Choose a Model for Parsing:",
[
"llama3-8b-8192",
"llama3-groq-70b-8192-tool-use-preview",
"llama-3.1-8b-instant",
"llava-v1.5-7b-4096-preview",
"mixtral-8x7b-32768",
]
)
# Application title
st.title("AI Web Scraper App π")
st.write("Easily scrape and analyze web content using advanced AI models. π")
# Input for website URL
url = st.text_input("Enter Website URL π")
# Step 1: Scrape the Website
if st.button("Scrape Website"):
if url:
st.write("π΅οΈββοΈ Scraping the website...")
# Scrape the website
dom_content = scrape_website(url)
body_content = extract_body_content(dom_content)
cleaned_content = clean_body_content(body_content)
# Store the DOM content in Streamlit session state
st.session_state.dom_content = cleaned_content
# Display the DOM content in an expandable text box
with st.expander("View DOM Content"):
st.text_area("DOM Content", cleaned_content, height=300)
# Step 2: Parse the Content
if "dom_content" in st.session_state:
parse_description = st.text_area("Describe what you want to parse π")
if st.button("Submit β
"):
if parse_description:
st.write(f"π€ Parsing the content with {selected_model}...")
# Parse content using Groq
dom_chunks = split_dom_content(st.session_state.dom_content)
parsed_result = parse_with_groq(dom_chunks, parse_description, model=selected_model)
st.write(parsed_result)
# CSS for footer at the bottom of the sidebar
st.markdown(
"""
<style>
.footer {
position: fixed;
bottom: 0;
left: 0;
width: 100%;
background-color: #272432; /* Dark background for visibility */
color: white;
text-align: center;
padding: 10px;
font-size: 14px;
}
.sidebar .footer {
position: fixed;
bottom: 0;
}
</style>
<div class="footer">
Made with β€οΈ by Usman Yousaf π<br>
Feel free to improve and expand this app for more powerful insights! π₯
</div>
""",
unsafe_allow_html=True
)
|