Spaces:
Sleeping
Sleeping
Update hf_to_chroma_ds.py
Browse files- hf_to_chroma_ds.py +3 -1
hf_to_chroma_ds.py
CHANGED
@@ -69,7 +69,7 @@ class Memoires_DS(Dataset):
|
|
69 |
|
70 |
|
71 |
|
72 |
-
def import_into_chroma(chroma_client, dataset, collection_name=None, embedding_function=None, batch_size=
|
73 |
"""
|
74 |
Imports a dataset into Chroma in batches.
|
75 |
|
@@ -113,12 +113,14 @@ def import_into_chroma(chroma_client, dataset, collection_name=None, embedding_f
|
|
113 |
if ef is None:
|
114 |
ef = embedding_functions.DefaultEmbeddingFunction()
|
115 |
|
|
|
116 |
collection = chroma_client.create_collection(
|
117 |
collection_name,
|
118 |
embedding_function=ef
|
119 |
)
|
120 |
|
121 |
# Retrieve the mapped data
|
|
|
122 |
mapped_data = dataset.to_chroma()
|
123 |
del dataset
|
124 |
|
|
|
69 |
|
70 |
|
71 |
|
72 |
+
def import_into_chroma(chroma_client, dataset, collection_name=None, embedding_function=None, batch_size=5000):
|
73 |
"""
|
74 |
Imports a dataset into Chroma in batches.
|
75 |
|
|
|
113 |
if ef is None:
|
114 |
ef = embedding_functions.DefaultEmbeddingFunction()
|
115 |
|
116 |
+
print("########### Init collection ###########")
|
117 |
collection = chroma_client.create_collection(
|
118 |
collection_name,
|
119 |
embedding_function=ef
|
120 |
)
|
121 |
|
122 |
# Retrieve the mapped data
|
123 |
+
print("########### Init to_chroma ###########")
|
124 |
mapped_data = dataset.to_chroma()
|
125 |
del dataset
|
126 |
|