nirajandhakal commited on
Commit
11772dd
·
verified ·
1 Parent(s): e2698c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -147
app.py CHANGED
@@ -1,15 +1,11 @@
1
- """
2
- This is a book recommendation system.
3
- """
4
-
5
  import pickle
6
- import streamlit as st
7
  import pandas as pd
8
  import numpy as np
9
  from sklearn.preprocessing import LabelEncoder
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
  from sklearn.metrics.pairwise import cosine_similarity
12
  from tensorflow.keras.models import load_model
 
13
 
14
  # Load datasets
15
  books = pd.read_csv("./dataset/books.csv")
@@ -19,7 +15,7 @@ ratings = pd.read_csv("./dataset/ratings.csv")
19
  user_encoder = LabelEncoder()
20
  book_encoder = LabelEncoder()
21
 
22
-
23
  ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
24
  ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
25
 
@@ -30,12 +26,6 @@ with open("tfidf_model_authors.pkl", "rb") as f:
30
  with open("tfidf_model_titles.pkl", "rb") as f:
31
  tfidf_model_titles = pickle.load(f)
32
 
33
- # Define TF-IDF vectorizer
34
- tfidf_vectorizer = TfidfVectorizer(stop_words="english")
35
-
36
- # Fit and transform the book descriptions
37
- tfidf_matrix = tfidf_vectorizer.fit_transform(books["original_title"].fillna(""))
38
-
39
  # Load collaborative filtering model
40
  model_cf = load_model("recommendation_model.keras")
41
 
@@ -44,39 +34,16 @@ model_cf = load_model("recommendation_model.keras")
44
  def content_based_recommendation(
45
  query, books, tfidf_model_authors, tfidf_model_titles, num_recommendations=10
46
  ):
47
- """
48
- Recommend books based on content similarity.
49
- Args:
50
- query (str): The name of the book or author.
51
- books (DataFrame): DataFrame containing book information.
52
- tfidf_model_authors: Pre-trained TF-IDF model for authors.
53
- tfidf_model_titles: Pre-trained TF-IDF model for titles.
54
- num_recommendations (int): The number of books to recommend.
55
- Returns:
56
- DataFrame: A DataFrame containing recommended books with details.
57
- """
58
- # Check if the query corresponds to an author or a book
59
- if query in books["authors"].values:
60
- book_name = books.loc[books["authors"] == query, "original_title"].values[0]
61
- elif query in books["original_title"].values:
62
- book_name = query
63
- else:
64
- print("Query not found in authors or titles.")
65
- return None
66
-
67
- book_author = books.loc[books["original_title"] == book_name, "authors"].values[0]
68
- book_title = books.loc[books["title"] == book_name, "title"].values[0]
69
-
70
  # Transform book author, title, and description into TF-IDF vectors
71
- book_author_tfidf = tfidf_model_authors.transform([book_author])
72
- book_title_tfidf = tfidf_model_titles.transform([book_title])
73
 
74
  # Compute cosine similarity for authors and titles separately
75
  similarity_scores_authors = cosine_similarity(
76
- book_author_tfidf, tfidf_model_authors.transform(books["authors"])
77
  )
78
  similarity_scores_titles = cosine_similarity(
79
- book_title_tfidf, tfidf_model_titles.transform(books["title"])
80
  )
81
 
82
  # Combine similarity scores for authors and titles
@@ -97,31 +64,11 @@ def content_based_recommendation(
97
 
98
  # Collaborative Recommendation
99
  def collaborative_recommendation(user_id, model_cf, ratings, num_recommendations=10):
100
- """
101
- Recommend books based on collaborative filtering.
102
- Args:
103
- user_id (int): The user ID.
104
- model_cf: The trained collaborative filtering model.
105
- ratings (DataFrame): DataFrame containing user ratings.
106
- num_recommendations (int): The number of books to recommend.
107
- Returns:
108
- DataFrame: A DataFrame containing recommended books with details.
109
- """
110
- # Check if the user ID exists in the ratings dataset
111
- if user_id not in ratings["user_id"].unique():
112
- print("User ID not found in ratings dataset.")
113
- return None
114
-
115
  # Get unrated books for the user
116
  unrated_books = ratings[
117
  ~ratings["book_id"].isin(ratings[ratings["user_id"] == user_id]["book_id"])
118
  ]["book_id"].unique()
119
 
120
- # Check if there are unrated books
121
- if len(unrated_books) == 0:
122
- print("No unrated books found for the user.")
123
- return None
124
-
125
  # Predict ratings for unrated books
126
  predictions = model_cf.predict(
127
  [np.full_like(unrated_books, user_id), unrated_books]
@@ -135,25 +82,6 @@ def collaborative_recommendation(user_id, model_cf, ratings, num_recommendations
135
  return recommended_books
136
 
137
 
138
- # History-Based Recommendation
139
- def history_based_recommendation(user_id, ratings, num_recommendations=10):
140
- """
141
- Recommend books based on user's historical ratings.
142
- Args:
143
- user_id (int): The user ID.
144
- ratings (DataFrame): DataFrame containing user ratings.
145
- num_recommendations (int): The number of books to recommend.
146
- Returns:
147
- DataFrame: A DataFrame containing recommended books with details.
148
- """
149
- user_ratings = ratings[ratings["user_id"] == user_id]
150
- top_books = user_ratings.sort_values(by="rating", ascending=False).head(
151
- num_recommendations
152
- )["book_id"]
153
- recommended_books = books[books["book_id"].isin(top_books)]
154
- return recommended_books
155
-
156
-
157
  # Hybrid Recommendation
158
  def hybrid_recommendation(
159
  user_id,
@@ -165,20 +93,6 @@ def hybrid_recommendation(
165
  tfidf_model_titles,
166
  num_recommendations=10,
167
  ):
168
- """
169
- Recommend books using hybrid recommendation approach.
170
- Args:
171
- user_id (int): The user ID.
172
- query (str): The name of the book or author.
173
- model_cf: The collaborative filtering model.
174
- books (DataFrame): DataFrame containing book information.
175
- ratings (DataFrame): DataFrame containing user ratings.
176
- tfidf_model_authors: Pre-trained TF-IDF model for authors.
177
- tfidf_model_titles: Pre-trained TF-IDF model for titles.
178
- num_recommendations (int): The number of books to recommend.
179
- Returns:
180
- DataFrame: A DataFrame containing recommended books with details.
181
- """
182
  content_based_rec = content_based_recommendation(
183
  query,
184
  books,
@@ -189,63 +103,13 @@ def hybrid_recommendation(
189
  collaborative_rec = collaborative_recommendation(
190
  user_id, model_cf, ratings, num_recommendations=num_recommendations
191
  )
192
- history_based_rec = history_based_recommendation(
193
- user_id, ratings, num_recommendations=num_recommendations
194
- )
195
 
196
  # Combine recommendations from different approaches
197
- hybrid_rec = pd.concat(
198
- [content_based_rec, collaborative_rec, history_based_rec]
199
- ).drop_duplicates(subset="book_id", keep="first")
200
- return hybrid_rec
201
-
202
-
203
- # Top Recommendations (most popular books)
204
- def top_recommendations(books, num_recommendations=10):
205
- """
206
- Recommend top books based on popularity (highest ratings count).
207
- Args:
208
- books (DataFrame): DataFrame containing book information.
209
- num_recommendations (int): The number of books to recommend.
210
- Returns:
211
- DataFrame: A DataFrame containing recommended books with details.
212
- """
213
- top_books = books.sort_values(by="ratings_count", ascending=False).head(
214
- num_recommendations
215
  )
216
- return top_books
217
-
218
-
219
- # Test the recommendation functions
220
- query = input("Enter book name or author: ")
221
- USER_ID = 0 # Example user ID for collaborative and history-based recommendations
222
-
223
- print("Content-Based Recommendation:")
224
- print(
225
- content_based_recommendation(query, books, tfidf_model_authors, tfidf_model_titles)
226
- )
227
-
228
- print("\nCollaborative Recommendation:")
229
- print(collaborative_recommendation(USER_ID, model_cf, ratings))
230
-
231
- print("\nHistory-Based Recommendation:")
232
- print(history_based_recommendation(USER_ID, ratings))
233
-
234
- print("\nHybrid Recommendation:")
235
- print(
236
- hybrid_recommendation(
237
- user_id,
238
- query,
239
- model_cf,
240
- books,
241
- ratings,
242
- tfidf_model_authors,
243
- tfidf_model_titles,
244
- )
245
- )
246
 
247
- print("\nTop Recommendations:")
248
- print(top_recommendations(books))
249
 
250
  # Streamlit App
251
  st.title("Book Recommendation System")
@@ -261,12 +125,21 @@ if st.button("Get Recommendations"):
261
  )
262
  st.write(content_based_rec)
263
 
 
 
 
264
  st.write("Collaborative Recommendation:")
265
- collaborative_rec = collaborative_recommendation(0, model_cf, ratings)
266
  st.write(collaborative_rec)
267
 
268
  st.write("Hybrid Recommendation:")
269
  hybrid_rec = hybrid_recommendation(
270
- 0, user_input, model_cf, books, ratings, tfidf_model_authors, tfidf_model_titles
 
 
 
 
 
 
271
  )
272
  st.write(hybrid_rec)
 
 
 
 
 
1
  import pickle
 
2
  import pandas as pd
3
  import numpy as np
4
  from sklearn.preprocessing import LabelEncoder
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from tensorflow.keras.models import load_model
8
+ import streamlit as st
9
 
10
  # Load datasets
11
  books = pd.read_csv("./dataset/books.csv")
 
15
  user_encoder = LabelEncoder()
16
  book_encoder = LabelEncoder()
17
 
18
+ ratings["user_id"] = ratings["user_id"].astype(str)
19
  ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
20
  ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
21
 
 
26
  with open("tfidf_model_titles.pkl", "rb") as f:
27
  tfidf_model_titles = pickle.load(f)
28
 
 
 
 
 
 
 
29
  # Load collaborative filtering model
30
  model_cf = load_model("recommendation_model.keras")
31
 
 
34
  def content_based_recommendation(
35
  query, books, tfidf_model_authors, tfidf_model_titles, num_recommendations=10
36
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Transform book author, title, and description into TF-IDF vectors
38
+ query_author_tfidf = tfidf_model_authors.transform([query])
39
+ query_title_tfidf = tfidf_model_titles.transform([query])
40
 
41
  # Compute cosine similarity for authors and titles separately
42
  similarity_scores_authors = cosine_similarity(
43
+ query_author_tfidf, tfidf_model_authors.transform(books["authors"])
44
  )
45
  similarity_scores_titles = cosine_similarity(
46
+ query_title_tfidf, tfidf_model_titles.transform(books["original_title"])
47
  )
48
 
49
  # Combine similarity scores for authors and titles
 
64
 
65
  # Collaborative Recommendation
66
  def collaborative_recommendation(user_id, model_cf, ratings, num_recommendations=10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Get unrated books for the user
68
  unrated_books = ratings[
69
  ~ratings["book_id"].isin(ratings[ratings["user_id"] == user_id]["book_id"])
70
  ]["book_id"].unique()
71
 
 
 
 
 
 
72
  # Predict ratings for unrated books
73
  predictions = model_cf.predict(
74
  [np.full_like(unrated_books, user_id), unrated_books]
 
82
  return recommended_books
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # Hybrid Recommendation
86
  def hybrid_recommendation(
87
  user_id,
 
93
  tfidf_model_titles,
94
  num_recommendations=10,
95
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  content_based_rec = content_based_recommendation(
97
  query,
98
  books,
 
103
  collaborative_rec = collaborative_recommendation(
104
  user_id, model_cf, ratings, num_recommendations=num_recommendations
105
  )
 
 
 
106
 
107
  # Combine recommendations from different approaches
108
+ hybrid_rec = pd.concat([content_based_rec, collaborative_rec]).drop_duplicates(
109
+ subset="book_id", keep="first"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
111
+ return hybrid_rec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
 
 
113
 
114
  # Streamlit App
115
  st.title("Book Recommendation System")
 
125
  )
126
  st.write(content_based_rec)
127
 
128
+ # Example user ID for collaborative recommendation
129
+ USER_ID = 0
130
+
131
  st.write("Collaborative Recommendation:")
132
+ collaborative_rec = collaborative_recommendation(USER_ID, model_cf, ratings)
133
  st.write(collaborative_rec)
134
 
135
  st.write("Hybrid Recommendation:")
136
  hybrid_rec = hybrid_recommendation(
137
+ USER_ID,
138
+ user_input,
139
+ model_cf,
140
+ books,
141
+ ratings,
142
+ tfidf_model_authors,
143
+ tfidf_model_titles,
144
  )
145
  st.write(hybrid_rec)