Set 1024 as default dim, update usage snippets, store prompts in config (#1)
Browse files- Set 1024 as default dim, update usage snippets, store prompts in config (ecefcb649f558159677e33a447c5ca466bba5400)
- README.md +69 -22
- config_sentence_transformers.json +13 -0
- modules.json +1 -1
README.md
CHANGED
@@ -5472,34 +5472,64 @@ Please refer to the following chapters for specific instructions on how to use t
|
|
5472 |
|
5473 |
# Usage
|
5474 |
|
5475 |
-
You can use `
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5476 |
|
5477 |
```python
|
5478 |
import os
|
5479 |
import torch
|
5480 |
from transformers import AutoModel, AutoTokenizer
|
5481 |
-
from sentence_transformers import SentenceTransformer
|
5482 |
from sklearn.preprocessing import normalize
|
5483 |
|
5484 |
-
|
5485 |
-
queries = [
|
5486 |
-
|
5487 |
-
|
5488 |
-
|
5489 |
-
|
|
|
|
|
|
|
|
|
|
|
5490 |
|
|
|
5491 |
model_dir = "{Your MODEL_PATH}"
|
5492 |
|
5493 |
-
|
5494 |
-
|
5495 |
-
model = SentenceTransformer(model_dir, trust_remote_code=True).cuda()
|
5496 |
-
vectors = model.encode(queries, convert_to_numpy=True, normalize_embeddings=True)
|
5497 |
-
print(vectors.shape)
|
5498 |
-
print(vectors[:, :4])
|
5499 |
-
|
5500 |
-
#### method2:transformers
|
5501 |
-
vector_linear_directory = "2_Dense"
|
5502 |
-
vector_dim = 8192
|
5503 |
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
|
5504 |
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
5505 |
vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
|
@@ -5509,17 +5539,34 @@ vector_linear_dict = {
|
|
5509 |
}
|
5510 |
vector_linear.load_state_dict(vector_linear_dict)
|
5511 |
vector_linear.cuda()
|
|
|
|
|
5512 |
with torch.no_grad():
|
5513 |
input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
|
5514 |
input_data = {k: v.cuda() for k, v in input_data.items()}
|
5515 |
attention_mask = input_data["attention_mask"]
|
5516 |
last_hidden_state = model(**input_data)[0]
|
5517 |
last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
5518 |
-
|
5519 |
-
|
5520 |
-
|
5521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5522 |
|
|
|
|
|
|
|
|
|
5523 |
```
|
5524 |
|
5525 |
# FAQ
|
|
|
5472 |
|
5473 |
# Usage
|
5474 |
|
5475 |
+
You can use `SentenceTransformers` or `transformers` library to encode text.
|
5476 |
+
|
5477 |
+
## Sentence Transformers
|
5478 |
+
|
5479 |
+
```python
|
5480 |
+
from sentence_transformers import SentenceTransformer
|
5481 |
+
|
5482 |
+
# This model supports two prompts: "s2p_query" and "s2s_query" for sentence-to-passage and sentence-to-sentence tasks, respectively.
|
5483 |
+
# They are defined in `config_sentence_transformers.json`
|
5484 |
+
prompt_name = "s2p_query"
|
5485 |
+
queries = [
|
5486 |
+
"What are some ways to reduce stress?",
|
5487 |
+
"What are the benefits of drinking green tea?",
|
5488 |
+
]
|
5489 |
+
# docs do not need any prompts
|
5490 |
+
docs = [
|
5491 |
+
"There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
|
5492 |
+
"Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
|
5493 |
+
]
|
5494 |
+
|
5495 |
+
# !The default dimension is 1024, if you need other dimensions, please clone the model and modify `modules.json` to replace `2_Dense_1024` with another dimension, e.g. `2_Dense_256` or `2_Dense_8192` !
|
5496 |
+
model = SentenceTransformer("infgrad/stella_en_400M_v5", trust_remote_code=True).cuda()
|
5497 |
+
query_embeddings = model.encode(queries, prompt_name=query_prompt_name)
|
5498 |
+
doc_embeddings = model.encode(docs)
|
5499 |
+
print(query_embeddings.shape, doc_embeddings.shape)
|
5500 |
+
# (2, 1024) (2, 1024)
|
5501 |
+
|
5502 |
+
similarities = model.similarity(query_embeddings, doc_embeddings)
|
5503 |
+
print(similarities)
|
5504 |
+
# tensor([[0.8398, 0.2990],
|
5505 |
+
# [0.3282, 0.8095]])
|
5506 |
+
```
|
5507 |
+
|
5508 |
+
## Transformers
|
5509 |
|
5510 |
```python
|
5511 |
import os
|
5512 |
import torch
|
5513 |
from transformers import AutoModel, AutoTokenizer
|
|
|
5514 |
from sklearn.preprocessing import normalize
|
5515 |
|
5516 |
+
query_prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: "
|
5517 |
+
queries = [
|
5518 |
+
"What are some ways to reduce stress?",
|
5519 |
+
"What are the benefits of drinking green tea?",
|
5520 |
+
]
|
5521 |
+
queries = [query_prompt + query for query in queries]
|
5522 |
+
# docs do not need any prompts
|
5523 |
+
docs = [
|
5524 |
+
"There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
|
5525 |
+
"Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
|
5526 |
+
]
|
5527 |
|
5528 |
+
# The path of your model after cloning it
|
5529 |
model_dir = "{Your MODEL_PATH}"
|
5530 |
|
5531 |
+
vector_dim = 1024
|
5532 |
+
vector_linear_directory = f"2_Dense_{vector_dim}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5533 |
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
|
5534 |
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
5535 |
vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
|
|
|
5539 |
}
|
5540 |
vector_linear.load_state_dict(vector_linear_dict)
|
5541 |
vector_linear.cuda()
|
5542 |
+
|
5543 |
+
# Embed the queries
|
5544 |
with torch.no_grad():
|
5545 |
input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
|
5546 |
input_data = {k: v.cuda() for k, v in input_data.items()}
|
5547 |
attention_mask = input_data["attention_mask"]
|
5548 |
last_hidden_state = model(**input_data)[0]
|
5549 |
last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
5550 |
+
query_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
5551 |
+
query_vectors = normalize(vector_linear(query_vectors).cpu().numpy())
|
5552 |
+
|
5553 |
+
# Embed the documents
|
5554 |
+
with torch.no_grad():
|
5555 |
+
input_data = tokenizer(docs, padding="longest", truncation=True, max_length=512, return_tensors="pt")
|
5556 |
+
input_data = {k: v.cuda() for k, v in input_data.items()}
|
5557 |
+
attention_mask = input_data["attention_mask"]
|
5558 |
+
last_hidden_state = model(**input_data)[0]
|
5559 |
+
last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
5560 |
+
docs_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
5561 |
+
docs_vectors = normalize(vector_linear(docs_vectors).cpu().numpy())
|
5562 |
+
|
5563 |
+
print(query_vectors.shape, docs_vectors.shape)
|
5564 |
+
# (2, 1024) (2, 1024)
|
5565 |
|
5566 |
+
similarities = query_vectors @ docs_vectors.T
|
5567 |
+
print(similarities)
|
5568 |
+
# [[0.8397531 0.29900077]
|
5569 |
+
# [0.32818374 0.80954516]]
|
5570 |
```
|
5571 |
|
5572 |
# FAQ
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.3",
|
5 |
+
"pytorch": "2.3.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
|
9 |
+
"s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
|
10 |
+
},
|
11 |
+
"default_prompt_name": null,
|
12 |
+
"similarity_fn_name": "cosine"
|
13 |
+
}
|
modules.json
CHANGED
@@ -14,7 +14,7 @@
|
|
14 |
{
|
15 |
"idx": 2,
|
16 |
"name": "2",
|
17 |
-
"path": "
|
18 |
"type": "sentence_transformers.models.Dense"
|
19 |
}
|
20 |
]
|
|
|
14 |
{
|
15 |
"idx": 2,
|
16 |
"name": "2",
|
17 |
+
"path": "2_Dense_1024",
|
18 |
"type": "sentence_transformers.models.Dense"
|
19 |
}
|
20 |
]
|