Konrad Wojtasik commited on
Commit
3408a19
1 Parent(s): 90405e2

More hits for crossencoder

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. mcontriever-scifact-pl-corpus +3 -0
app.py CHANGED
@@ -170,7 +170,9 @@ def search_func(query, bi_encoder_type, top_k=top_k):
170
  else:
171
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
172
  question_embedding = question_embedding.cpu()
173
- hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k,score_function=util.dot_score)
 
 
174
  hits = hits[0] # Get the hits for the first query
175
 
176
  ##### Re-Ranking #####
@@ -229,7 +231,7 @@ if search:
229
  with st.spinner(
230
  text=f"Loading {bi_encoder_type} bi-encoder and embedding document into vector space. This might take a few seconds depending on the length of your document..."
231
  ):
232
- bi_encoder, corpus_embeddings = bi_encode(bi_encoder_type,passages)
233
  cross_encoder = cross_encode(cross_encoder_type)
234
  bm25 = bm25_api(passages)
235
 
 
170
  else:
171
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
172
  question_embedding = question_embedding.cpu()
173
+
174
+ HITS_NUM=20 # Number of hits for reranker
175
+ hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=HITS_NUM,score_function=util.dot_score)
176
  hits = hits[0] # Get the hits for the first query
177
 
178
  ##### Re-Ranking #####
 
231
  with st.spinner(
232
  text=f"Loading {bi_encoder_type} bi-encoder and embedding document into vector space. This might take a few seconds depending on the length of your document..."
233
  ):
234
+ bi_encoder, corpus_embeddings = bi_encode(bi_encoder_type,passages, dataset_name=dataset_type)
235
  cross_encoder = cross_encode(cross_encoder_type)
236
  bm25 = bm25_api(passages)
237
 
mcontriever-scifact-pl-corpus ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291dd709b7bbe015f1176c9fe37fe8e7a64d55673cac2729a304c7b26a40addd
3
+ size 15923056