cheesyFishes
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -37,7 +37,7 @@ The model uses bf16 tensors and allocates ~4.4GB of VRAM when loaded. You can ea
|
|
37 |
| 16 | 11.5 |
|
38 |
| 32 | 19.7 |
|
39 |
|
40 |
-
|
41 |
|
42 |
<details open>
|
43 |
<summary>
|
@@ -52,12 +52,13 @@ pip install -U llama-index-embeddings-huggingface
|
|
52 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
53 |
|
54 |
model = HuggingFaceEmbedding(
|
55 |
-
|
56 |
-
device="mps",
|
57 |
trust_remote_code=True,
|
58 |
)
|
59 |
|
60 |
-
|
|
|
61 |
```
|
62 |
|
63 |
</details>
|
@@ -80,7 +81,7 @@ min_pixels = 1 * 28 * 28
|
|
80 |
|
81 |
# Load the embedding model and processor
|
82 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
83 |
-
'llamaindex/vdr-2b-
|
84 |
# These are the recommended kwargs for the model, but change them as needed
|
85 |
attn_implementation="flash_attention_2",
|
86 |
torch_dtype=torch.bfloat16,
|
@@ -88,7 +89,7 @@ model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
88 |
).eval()
|
89 |
|
90 |
processor = AutoProcessor.from_pretrained(
|
91 |
-
'llamaindex/vdr-2b-
|
92 |
min_pixels=min_pixels,
|
93 |
max_pixels=max_pixels
|
94 |
)
|
@@ -216,10 +217,10 @@ via SentenceTransformers
|
|
216 |
from sentence_transformers import SentenceTransformer
|
217 |
|
218 |
model = SentenceTransformer(
|
219 |
-
model_name_or_path="llamaindex/vdr-2b-
|
220 |
-
device="
|
221 |
trust_remote_code=True,
|
222 |
-
# These are the recommended kwargs for the model, but change them as needed
|
223 |
model_kwargs={
|
224 |
"torch_dtype": torch.bfloat16,
|
225 |
"device_map": "cuda:0",
|
|
|
37 |
| 16 | 11.5 |
|
38 |
| 32 | 19.7 |
|
39 |
|
40 |
+
You can generate embeddings with this model in many different ways:
|
41 |
|
42 |
<details open>
|
43 |
<summary>
|
|
|
52 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
53 |
|
54 |
model = HuggingFaceEmbedding(
|
55 |
+
model_name="llamaindex/vdr-2b-v1",
|
56 |
+
device="cpu", # "mps" for mac, "cuda" for nvidia GPUs
|
57 |
trust_remote_code=True,
|
58 |
)
|
59 |
|
60 |
+
image_embedding = model.get_image_embedding("image.png")
|
61 |
+
query_embedding = model.get_query_embedding("some query")
|
62 |
```
|
63 |
|
64 |
</details>
|
|
|
81 |
|
82 |
# Load the embedding model and processor
|
83 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
84 |
+
'llamaindex/vdr-2b-v1',
|
85 |
# These are the recommended kwargs for the model, but change them as needed
|
86 |
attn_implementation="flash_attention_2",
|
87 |
torch_dtype=torch.bfloat16,
|
|
|
89 |
).eval()
|
90 |
|
91 |
processor = AutoProcessor.from_pretrained(
|
92 |
+
'llamaindex/vdr-2b-v1',
|
93 |
min_pixels=min_pixels,
|
94 |
max_pixels=max_pixels
|
95 |
)
|
|
|
217 |
from sentence_transformers import SentenceTransformer
|
218 |
|
219 |
model = SentenceTransformer(
|
220 |
+
model_name_or_path="llamaindex/vdr-2b-v1",
|
221 |
+
device="cuda",
|
222 |
trust_remote_code=True,
|
223 |
+
# These are the recommended kwargs for the model, but change them as needed if you don't have CUDA
|
224 |
model_kwargs={
|
225 |
"torch_dtype": torch.bfloat16,
|
226 |
"device_map": "cuda:0",
|