Spaces:

dnzblgn
/

RAG_for_customer_reviews

Running

File size: 9,480 Bytes

fda81d5
18caf87
 
91881c1
9ff560f
91881c1
9ff560f
 
91881c1
79f724c
9d8f95d
049a965
91881c1
18caf87
 
7875a05
18caf87
 
7875a05
18caf87
 
7875a05
 
 
 
 
 
 
 
 
 
 
82159c1
91881c1
 
 
18caf87
7875a05
 
82159c1
 
 
 
91881c1
82159c1
91881c1
82159c1
 
 
 
 
91881c1
049a965
91881c1
18caf87
91881c1
 
18caf87
 
91881c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9709389
 
 
 
 
 
 
 
 
91881c1
9ff560f
91881c1
9ff560f
91881c1
9ff560f
91881c1
 
 
 
af1e395
91881c1
 
 
 
01a348e
91881c1
 
9ff560f
 
 
91881c1
9ff560f
 
91881c1
9ff560f
 
af1e395
b0a71d9
c9e8660
94f3992
 
42731a8
94f3992
 
dad7666
94f3992
 
 
 
 
 
42731a8
 
94f3992
42731a8
 
94f3992
42731a8
 
94f3992
 
 
42731a8
94f3992
 
dad7666
 
42731a8
dad7666
 
42731a8
d56ab10
dad7666
 
 
 
 
9d8f95d
42731a8
dad7666
 
c55174e
91881c1
dad7666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898b6df
 
91881c1
dad7666
 
91881c1
 
c1e08be
 
dad7666
f89471a
c1e08be
 
dad7666
e9bbcf4
dad7666
e9bbcf4
 
 
f89471a
c1e08be
 
dad7666
c1e08be
 
 
c55174e
dad7666
 
c1e08be
 
dad7666
c1e08be
 
 
 
 
91881c1
89d3d65
 
 
e9bbcf4
89d3d65
 
 
20a1029
dad7666
c1e08be
d11c1c2
c1e08be
 
 
 
dad7666
ade1d21
c1e08be
 
91881c1
 
 
 
 
dad7666
9d8f95d

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint
import os
import time

# Load models and tokenizers
sentiment_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")

sarcasm_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")
sarcasm_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")

doc_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Customer-Reviews-Classification")
doc_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Customer-Reviews-Classification")

label_mapping = {
    "shipping_and_delivery": 0,
    "customer_service": 1,
    "price_and_value": 2,
    "quality_and_performance": 3,
    "use_and_design": 4,
    "other": 5
}
reverse_label_mapping = {v: k for k, v in label_mapping.items()}

def analyze_reviews(reviews):
    analysis = {
        "overall": {"positive": 0, "negative": 0},
        "categories": {label: {"positive": 0, "negative": 0} for label in label_mapping.keys()}
    }

    for review in reviews:
        sentiment_inputs = sentiment_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            sentiment_outputs = sentiment_model(**sentiment_inputs)
        sentiment_class = torch.argmax(sentiment_outputs.logits, dim=-1).item()
        sentiment = "positive" if sentiment_class == 0 else "negative"

        if sentiment == "positive":
            sarcasm_inputs = sarcasm_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
            with torch.no_grad():
                sarcasm_outputs = sarcasm_model(**sarcasm_inputs)
            sarcasm_class = torch.argmax(sarcasm_outputs.logits, dim=-1).item()
            if sarcasm_class == 1:
                sentiment = "negative"

        doc_inputs = doc_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            doc_outputs = doc_model(**doc_inputs)
        category_class = torch.argmax(doc_outputs.logits, dim=-1).item()
        category = reverse_label_mapping[category_class]

        analysis["overall"][sentiment] += 1
        analysis["categories"][category][sentiment] += 1

    return analysis

def generate_analysis_document(analysis):
    total_reviews = analysis["overall"]["positive"] + analysis["overall"]["negative"]
    overall_positive = analysis["overall"]["positive"]
    overall_negative = analysis["overall"]["negative"]

    doc = [
        f"Overall Sentiment Analysis:",
        f"Positive Feedback: {overall_positive} comments ({(overall_positive / total_reviews) * 100:.0f}%)",
        f"Negative Feedback: {overall_negative} comments ({(overall_negative / total_reviews) * 100:.0f}%)",
        "--END--",
        "Category-Specific Analysis:",
        "--END--"
    ]

    for category, feedback in analysis["categories"].items():
        total_category = feedback["positive"] + feedback["negative"]
        positive_rate = (feedback["positive"] / total_category) * 100 if total_category > 0 else 0
        negative_rate = (feedback["negative"] / total_category) * 100 if total_category > 0 else 0
        doc.extend([
            f"{category.capitalize()}:",
            f"- Positive Feedback: {feedback['positive']} comments ({positive_rate:.0f}%)",
            f"- Negative Feedback: {feedback['negative']} comments ({negative_rate:.0f}%)",
            "--END--"
        ])

    return "\n".join(doc)

def write_analysis_to_file(analysis_document):
    with open("processed_analysis.txt", "w") as f:
        f.write(analysis_document)
    return "processed_analysis.txt"

def read_processed_file():
    with open("processed_analysis.txt", "r") as f:
        return f.read()

def create_db_from_analysis(analysis_document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024, chunk_overlap=64
    )
    splits = text_splitter.create_documents([analysis_document])
    embeddings = HuggingFaceEmbeddings()
    vector_db = FAISS.from_documents(splits, embeddings)
    return vector_db

def initialize_chatbot(vector_db):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    retriever = vector_db.as_retriever()

    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_TOKEN"),
        temperature=0.5,
        max_new_tokens=256
    )

    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        verbose=False
    )
    return qa_chain

def process_and_initialize(file):
    if file is None:
        return None, None, "Please upload a file first."
    
    try:
        if not os.path.exists(file):
            return None, None, "File not found. Please try uploading again."
        
        with open(file, 'r', encoding='utf-8') as f:
            reviews = [line.strip() for line in f if line.strip()]
            
        if not reviews:
            return None, None, "File is empty. Please upload a file with reviews."
            
        analysis = analyze_reviews(reviews)
        analysis_doc = generate_analysis_document(analysis)
        
        processed_file = write_analysis_to_file(analysis_doc)
        processed_content = read_processed_file()
        
        db = create_db_from_analysis(processed_content)
        qa = initialize_chatbot(db)
        
        return db, qa, f"Successfully processed {len(reviews)} reviews! Ready for questions."
        
    except Exception as e:
        return None, None, f"Processing error: {str(e)}"

def user_query_typing_effect(query, qa_chain, chatbot):
    history = chatbot or []
    try:
        response = qa_chain.invoke({"question": query, "chat_history": []})
        assistant_response = response["answer"]

        history.append({"role": "user", "content": query})
        history.append({"role": "assistant", "content": ""})

        for i in range(len(assistant_response)):
            history[-1]["content"] += assistant_response[i]
            yield history, ""
            time.sleep(0.05)  # Slower typing effect
    except Exception as e:
        history.append({"role": "assistant", "content": f"Error: {str(e)}"})
        yield history, ""

def demo():
    custom_css = """
    body {
        background-color: #FF8C00;
        font-family: Arial, sans-serif;
    }
    .gradio-container {
        border-radius: 15px;
        box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.3);
        padding: 20px;
    }
    footer {
        visibility: hidden;
    }
    .chatbot {
        border: 2px solid #000;
        border-radius: 10px;
        background-color: #FFF5E1;
    }
    """

    with gr.Blocks(css=custom_css) as app:
        vector_db = gr.State(None)
        qa_chain = gr.State(None)

        gr.Markdown("### 🌟 **Customer Review Analysis and Chatbot** 🌟")
        gr.Markdown("#### Upload your review file and ask questions interactively!")

        with gr.Row():
            with gr.Column(scale=1):
                txt_file = gr.File(
                    label="📁 Upload Reviews",
                    file_types=[".txt"],
                    type="filepath"
                )
                analyze_btn = gr.Button("🚀 Process Reviews")
                status = gr.Textbox(
                    label="📊 Status",
                    placeholder="Status updates will appear here...",
                    interactive=False
                )

            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    label="🤖 Chat with your data",
                    height=600,
                    bubble_full_width=False,
                    show_label=False,
                    render_markdown=True,
                    type="messages",
                    elem_classes=["chatbot"]
                )
                query_input = gr.Textbox(
                    label="Ask a question",
                    placeholder="Ask about the reviews...",
                    show_label=False,
                    container=False
                )
                query_btn = gr.Button("Ask")

        analyze_btn.click(
            fn=process_and_initialize,
            inputs=[txt_file],
            outputs=[vector_db, qa_chain, status],
            show_progress="minimal"
        )

        query_btn.click(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal"
        )

        query_input.submit(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal"
        )

    app.launch()

if __name__ == "__main__":
    demo()