eliot-hub commited on
Commit
a303d6f
·
1 Parent(s): bcc0c3e
Files changed (2) hide show
  1. app.py +4 -4
  2. tools.py +7 -7
app.py CHANGED
@@ -80,7 +80,7 @@ with gr.Blocks(theme=theme, js=js_func, title="Dataltist", fill_height=True) as
80
  iface.unload(delete_agent)
81
 
82
  if __name__ == "__main__":
83
- load_dotenv()
84
- AUTH_ID = os.environ.get("AUTH_ID")
85
- AUTH_PASS = os.environ.get("AUTH_PASS")
86
- iface.launch(auth=(AUTH_ID, AUTH_PASS)) #share=True, auth=(AUTH_ID, AUTH_PASS)
 
80
  iface.unload(delete_agent)
81
 
82
  if __name__ == "__main__":
83
+ # load_dotenv()
84
+ # AUTH_ID = os.environ.get("AUTH_ID")
85
+ # AUTH_PASS = os.environ.get("AUTH_PASS")
86
+ iface.launch() #share=True, auth=(AUTH_ID, AUTH_PASS)
tools.py CHANGED
@@ -41,13 +41,13 @@ memoires_ds = load_dataset("eliot-hub/memoires_vec_800", split="data", token=HF_
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
- # for batch in tqdm(batched_ds, desc="Processing dataset batches"):
45
- # collection.add(
46
- # ids=batch["id"],
47
- # metadatas=batch["metadata"],
48
- # documents=batch["document"],
49
- # embeddings=batch["embedding"],
50
- # )
51
  print(f"Collection complete: {collection.count()}")
52
  del memoires_ds, batched_ds
53
 
 
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
+ for batch in tqdm(batched_ds, desc="Processing dataset batches"):
45
+ collection.add(
46
+ ids=batch["id"],
47
+ metadatas=batch["metadata"],
48
+ documents=batch["document"],
49
+ embeddings=batch["embedding"],
50
+ )
51
  print(f"Collection complete: {collection.count()}")
52
  del memoires_ds, batched_ds
53