llamaindex
/

vdr-2b-multi-v1

Image-Text-to-Text

sentence-transformers

Model card Files Files and versions Community

cheesyFishes commited on 2 days ago

Commit

3974dfd

·

verified ·

1 Parent(s): 1249396

Update README.md

Files changed (1) hide show

README.md +10 -9

README.md CHANGED Viewed

@@ -37,7 +37,7 @@ The model uses bf16 tensors and allocates ~4.4GB of VRAM when loaded. You can ea
 |         16 |            11.5 |
 |         32 |            19.7 |
-Generating embeddings with vdr-2b-multi-v1 is easier than ever with SentenceTransformers and LlamaIndex direct integrations. Get started with just a few lines of code:
 <details open>
 <summary>
@@ -52,12 +52,13 @@ pip install -U llama-index-embeddings-huggingface
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 model = HuggingFaceEmbedding(
-    model_name_or_path="llamaindex/vdr-2b-multi-v1",
-    device="mps",
     trust_remote_code=True,
 )
-embeddings = model.get_image_embedding("image.png")
 ```
 </details>
@@ -80,7 +81,7 @@ min_pixels = 1 * 28 * 28
 # Load the embedding model and processor
 model = Qwen2VLForConditionalGeneration.from_pretrained(
-    'llamaindex/vdr-2b-multi-v1',
     # These are the recommended kwargs for the model, but change them as needed
     attn_implementation="flash_attention_2",
     torch_dtype=torch.bfloat16,
@@ -88,7 +89,7 @@ model = Qwen2VLForConditionalGeneration.from_pretrained(
 ).eval()
 processor = AutoProcessor.from_pretrained(
-    'llamaindex/vdr-2b-multi-v1',
     min_pixels=min_pixels,
     max_pixels=max_pixels
 )
@@ -216,10 +217,10 @@ via SentenceTransformers
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer(
-    model_name_or_path="llamaindex/vdr-2b-multi-v1",
-    device="mps",
     trust_remote_code=True,
-    # These are the recommended kwargs for the model, but change them as needed
     model_kwargs={
         "torch_dtype": torch.bfloat16,
         "device_map": "cuda:0",

 |         16 |            11.5 |
 |         32 |            19.7 |
+You can generate embeddings with this model in many different ways:
 <details open>
 <summary>
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 model = HuggingFaceEmbedding(
+    model_name="llamaindex/vdr-2b-v1",
+    device="cpu",  # "mps" for mac, "cuda" for nvidia GPUs
     trust_remote_code=True,
 )
+image_embedding = model.get_image_embedding("image.png")
+query_embedding = model.get_query_embedding("some query")
 ```
 </details>
 # Load the embedding model and processor
 model = Qwen2VLForConditionalGeneration.from_pretrained(
+    'llamaindex/vdr-2b-v1',
     # These are the recommended kwargs for the model, but change them as needed
     attn_implementation="flash_attention_2",
     torch_dtype=torch.bfloat16,
 ).eval()
 processor = AutoProcessor.from_pretrained(
+    'llamaindex/vdr-2b-v1',
     min_pixels=min_pixels,
     max_pixels=max_pixels
 )
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer(
+    model_name_or_path="llamaindex/vdr-2b-v1",
+    device="cuda",
     trust_remote_code=True,
+    # These are the recommended kwargs for the model, but change them as needed if you don't have CUDA
     model_kwargs={
         "torch_dtype": torch.bfloat16,
         "device_map": "cuda:0",