File size: 9,480 Bytes
fda81d5
18caf87
 
91881c1
9ff560f
91881c1
9ff560f
 
91881c1
79f724c
9d8f95d
049a965
91881c1
18caf87
 
7875a05
18caf87
 
7875a05
18caf87
 
7875a05
 
 
 
 
 
 
 
 
 
 
82159c1
91881c1
 
 
18caf87
7875a05
 
82159c1
 
 
 
91881c1
82159c1
91881c1
82159c1
 
 
 
 
91881c1
049a965
91881c1
18caf87
91881c1
 
18caf87
 
91881c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9709389
 
 
 
 
 
 
 
 
91881c1
9ff560f
91881c1
9ff560f
91881c1
9ff560f
91881c1
 
 
 
af1e395
91881c1
 
 
 
01a348e
91881c1
 
9ff560f
 
 
91881c1
9ff560f
 
91881c1
9ff560f
 
af1e395
b0a71d9
c9e8660
94f3992
 
42731a8
94f3992
 
dad7666
94f3992
 
 
 
 
 
42731a8
 
94f3992
42731a8
 
94f3992
42731a8
 
94f3992
 
 
42731a8
94f3992
 
dad7666
 
42731a8
dad7666
 
42731a8
d56ab10
dad7666
 
 
 
 
9d8f95d
42731a8
dad7666
 
c55174e
91881c1
dad7666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898b6df
 
91881c1
dad7666
 
91881c1
 
c1e08be
 
dad7666
f89471a
c1e08be
 
dad7666
e9bbcf4
dad7666
e9bbcf4
 
 
f89471a
c1e08be
 
dad7666
c1e08be
 
 
c55174e
dad7666
 
c1e08be
 
dad7666
c1e08be
 
 
 
 
91881c1
89d3d65
 
 
e9bbcf4
89d3d65
 
 
20a1029
dad7666
c1e08be
d11c1c2
c1e08be
 
 
 
dad7666
ade1d21
c1e08be
 
91881c1
 
 
 
 
dad7666
9d8f95d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint
import os
import time

# Load models and tokenizers
sentiment_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")

sarcasm_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")
sarcasm_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")

doc_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Customer-Reviews-Classification")
doc_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Customer-Reviews-Classification")

label_mapping = {
    "shipping_and_delivery": 0,
    "customer_service": 1,
    "price_and_value": 2,
    "quality_and_performance": 3,
    "use_and_design": 4,
    "other": 5
}
reverse_label_mapping = {v: k for k, v in label_mapping.items()}

def analyze_reviews(reviews):
    analysis = {
        "overall": {"positive": 0, "negative": 0},
        "categories": {label: {"positive": 0, "negative": 0} for label in label_mapping.keys()}
    }

    for review in reviews:
        sentiment_inputs = sentiment_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            sentiment_outputs = sentiment_model(**sentiment_inputs)
        sentiment_class = torch.argmax(sentiment_outputs.logits, dim=-1).item()
        sentiment = "positive" if sentiment_class == 0 else "negative"

        if sentiment == "positive":
            sarcasm_inputs = sarcasm_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
            with torch.no_grad():
                sarcasm_outputs = sarcasm_model(**sarcasm_inputs)
            sarcasm_class = torch.argmax(sarcasm_outputs.logits, dim=-1).item()
            if sarcasm_class == 1:
                sentiment = "negative"

        doc_inputs = doc_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            doc_outputs = doc_model(**doc_inputs)
        category_class = torch.argmax(doc_outputs.logits, dim=-1).item()
        category = reverse_label_mapping[category_class]

        analysis["overall"][sentiment] += 1
        analysis["categories"][category][sentiment] += 1

    return analysis

def generate_analysis_document(analysis):
    total_reviews = analysis["overall"]["positive"] + analysis["overall"]["negative"]
    overall_positive = analysis["overall"]["positive"]
    overall_negative = analysis["overall"]["negative"]

    doc = [
        f"Overall Sentiment Analysis:",
        f"Positive Feedback: {overall_positive} comments ({(overall_positive / total_reviews) * 100:.0f}%)",
        f"Negative Feedback: {overall_negative} comments ({(overall_negative / total_reviews) * 100:.0f}%)",
        "--END--",
        "Category-Specific Analysis:",
        "--END--"
    ]

    for category, feedback in analysis["categories"].items():
        total_category = feedback["positive"] + feedback["negative"]
        positive_rate = (feedback["positive"] / total_category) * 100 if total_category > 0 else 0
        negative_rate = (feedback["negative"] / total_category) * 100 if total_category > 0 else 0
        doc.extend([
            f"{category.capitalize()}:",
            f"- Positive Feedback: {feedback['positive']} comments ({positive_rate:.0f}%)",
            f"- Negative Feedback: {feedback['negative']} comments ({negative_rate:.0f}%)",
            "--END--"
        ])

    return "\n".join(doc)

def write_analysis_to_file(analysis_document):
    with open("processed_analysis.txt", "w") as f:
        f.write(analysis_document)
    return "processed_analysis.txt"

def read_processed_file():
    with open("processed_analysis.txt", "r") as f:
        return f.read()

def create_db_from_analysis(analysis_document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024, chunk_overlap=64
    )
    splits = text_splitter.create_documents([analysis_document])
    embeddings = HuggingFaceEmbeddings()
    vector_db = FAISS.from_documents(splits, embeddings)
    return vector_db

def initialize_chatbot(vector_db):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    retriever = vector_db.as_retriever()

    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_TOKEN"),
        temperature=0.5,
        max_new_tokens=256
    )

    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        verbose=False
    )
    return qa_chain

def process_and_initialize(file):
    if file is None:
        return None, None, "Please upload a file first."
    
    try:
        if not os.path.exists(file):
            return None, None, "File not found. Please try uploading again."
        
        with open(file, 'r', encoding='utf-8') as f:
            reviews = [line.strip() for line in f if line.strip()]
            
        if not reviews:
            return None, None, "File is empty. Please upload a file with reviews."
            
        analysis = analyze_reviews(reviews)
        analysis_doc = generate_analysis_document(analysis)
        
        processed_file = write_analysis_to_file(analysis_doc)
        processed_content = read_processed_file()
        
        db = create_db_from_analysis(processed_content)
        qa = initialize_chatbot(db)
        
        return db, qa, f"Successfully processed {len(reviews)} reviews! Ready for questions."
        
    except Exception as e:
        return None, None, f"Processing error: {str(e)}"

def user_query_typing_effect(query, qa_chain, chatbot):
    history = chatbot or []
    try:
        response = qa_chain.invoke({"question": query, "chat_history": []})
        assistant_response = response["answer"]

        history.append({"role": "user", "content": query})
        history.append({"role": "assistant", "content": ""})

        for i in range(len(assistant_response)):
            history[-1]["content"] += assistant_response[i]
            yield history, ""
            time.sleep(0.05)  # Slower typing effect
    except Exception as e:
        history.append({"role": "assistant", "content": f"Error: {str(e)}"})
        yield history, ""

def demo():
    custom_css = """
    body {
        background-color: #FF8C00;
        font-family: Arial, sans-serif;
    }
    .gradio-container {
        border-radius: 15px;
        box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.3);
        padding: 20px;
    }
    footer {
        visibility: hidden;
    }
    .chatbot {
        border: 2px solid #000;
        border-radius: 10px;
        background-color: #FFF5E1;
    }
    """

    with gr.Blocks(css=custom_css) as app:
        vector_db = gr.State(None)
        qa_chain = gr.State(None)

        gr.Markdown("### 🌟 **Customer Review Analysis and Chatbot** 🌟")
        gr.Markdown("#### Upload your review file and ask questions interactively!")

        with gr.Row():
            with gr.Column(scale=1):
                txt_file = gr.File(
                    label="πŸ“ Upload Reviews",
                    file_types=[".txt"],
                    type="filepath"
                )
                analyze_btn = gr.Button("πŸš€ Process Reviews")
                status = gr.Textbox(
                    label="πŸ“Š Status",
                    placeholder="Status updates will appear here...",
                    interactive=False
                )

            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    label="πŸ€– Chat with your data",
                    height=600,
                    bubble_full_width=False,
                    show_label=False,
                    render_markdown=True,
                    type="messages",
                    elem_classes=["chatbot"]
                )
                query_input = gr.Textbox(
                    label="Ask a question",
                    placeholder="Ask about the reviews...",
                    show_label=False,
                    container=False
                )
                query_btn = gr.Button("Ask")

        analyze_btn.click(
            fn=process_and_initialize,
            inputs=[txt_file],
            outputs=[vector_db, qa_chain, status],
            show_progress="minimal"
        )

        query_btn.click(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal"
        )

        query_input.submit(
            fn=user_query_typing_effect,
            inputs=[query_input, qa_chain, chatbot],
            outputs=[chatbot, query_input],
            show_progress="minimal"
        )

    app.launch()

if __name__ == "__main__":
    demo()