marmalade
/

efficient-splade-VI-BT-large-query

query-expansion

document-expansion

passage-retrieval

knowledge-distillation

document encoder

Inference Endpoints

Model card Files Files and versions Community

efficient-splade-VI-BT-large-query / handler.py

hsiuchilling's picture

Upload folder using huggingface_hub

9a6d76f 11 months ago

history blame contribute delete

1.58 kB

	from typing import Any, List, Dict
	from pathlib import Path

	import torch
	from transformers import AutoModelForMaskedLM, AutoTokenizer



	class EndpointHandler():
	def __init__(self, path="."):
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = AutoModelForMaskedLM.from_pretrained(path).to(self.device)

	def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	A :obj:`list`:. The list contains the embeddings of the inference inputs
	"""
	inputs = data.get("inputs", data)
	with torch.no_grad():
	tokens = self.tokenizer(
	inputs, padding=True, truncation=True, return_tensors='pt'
	).to(self.device)
	outputs = self.model(**tokens)
	vecs = torch.max(
	torch.log(
	1 + torch.relu(outputs.logits)
	) * tokens.attention_mask.unsqueeze(-1),
	dim=1
	)[0]
	embeds = []
	for vec in vecs:
	# extract non-zero positions
	cols = vec.nonzero().squeeze().cpu().tolist()

	# extract the non-zero values
	weights = vec[cols].cpu().tolist()
	sparse = {
	"indices": cols,
	"values": weights,
	}
	embeds.append(sparse)
	return embeds