Edit model card

Introduction

This model is a downstream task of Qwen/Qwen2-1.5B . We leverage the work of FlagEmbedding reranker , and implement with Qwen2-1.5B as pretrained model.

Dependencies

transformers==4.41.2
flash-attn==2.5.7

Usage

from typing import cast, List, Union, Tuple, Dict, Optional
import numpy as np
import torch
from tqdm import tqdm
import transformers
from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer, DataCollatorWithPadding
from transformers.models.qwen2 import Qwen2Config, Qwen2ForSequenceClassification
from transformers.trainer_pt_utils import LabelSmoother
IGNORE_TOKEN_ID = LabelSmoother.ignore_index

def preprocess(
    sources,
    tokenizer: transformers.PreTrainedTokenizer,
    max_len: int = 1024,
) -> Dict:

    # Apply prompt templates
    input_ids, attention_masks = [], []
    for i, source in enumerate(sources):
        messages = [
            {"role": "user",
            "content": "\n\n".join(source)}
        ]
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        model_inputs = tokenizer([text])
        input_id = model_inputs['input_ids'][0]
        attention_mask = model_inputs['attention_mask'][0]
        if len(input_id) > max_len:
            ## last five tokens: <|im_end|>(151645), \n(198), <|im_start|>(151644), assistant(77091), \n(198)
            diff = len(input_id) - max_len
            input_id = input_id[:-5-diff] + input_id[-5:]
            attention_mask = attention_mask[:-5-diff] + attention_mask[-5:]
            assert len(input_id) == max_len
        input_ids.append(input_id)
        attention_masks.append(attention_mask)

    return dict(
        input_ids=input_ids,
        attention_mask=attention_masks
    )

class FlagRerankerCustom:
    def __init__(
            self,
            model: PreTrainedModel,
            tokenizer: PreTrainedTokenizer,
            use_fp16: bool = False
    ) -> None:
        self.tokenizer = tokenizer
        self.model = model
        self.data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        elif torch.backends.mps.is_available():
            self.device = torch.device('mps')
        else:
            self.device = torch.device('cpu')
            use_fp16 = False
        if use_fp16:
            self.model.half()

        self.model = self.model.to(self.device)

        self.model.eval()

        self.num_gpus = torch.cuda.device_count()
        if self.num_gpus > 1:
            print(f"----------using {self.num_gpus}*GPUs----------")
            self.model = torch.nn.DataParallel(self.model)

    @torch.no_grad()
    def compute_score(self, sentence_pairs: Union[List[Tuple[str, str]], Tuple[str, str]], batch_size: int = 64,
                      max_length: int = 1024) -> List[float]:
        
        if self.num_gpus > 0:
            batch_size = batch_size * self.num_gpus

        assert isinstance(sentence_pairs, list)
        if isinstance(sentence_pairs[0], str):
            sentence_pairs = [sentence_pairs]

        all_scores = []
        for start_index in tqdm(range(0, len(sentence_pairs), batch_size), desc="Compute Scores",
                                disable=True):
            sentences_batch = sentence_pairs[start_index:start_index + batch_size]
            inputs = preprocess(sources=sentences_batch, tokenizer=self.tokenizer, max_len=max_length)
            inputs = [dict(zip(inputs, t)) for t in zip(*inputs.values())]
            inputs = self.data_collator(inputs).to(self.device)
            scores = self.model(**inputs, return_dict=True).logits
            scores = scores.squeeze()
            all_scores.extend(scores.detach().to(torch.float).cpu().numpy().tolist())

        if len(all_scores) == 1:
            return all_scores[0]
        return all_scores

tokenizer = transformers.AutoTokenizer.from_pretrained(
    "neofung/LdIR-Qwen2-reranker-1.5B",
    padding_side="right",
)

config = Qwen2Config.from_pretrained(
    "neofung/LdIR-Qwen2-reranker-1.5B",
    trust_remote_code=True,
    bf16=True,
)

model = Qwen2ForSequenceClassification.from_pretrained(
    "neofung/LdIR-Qwen2-reranker-1.5B",
    config = config,
    trust_remote_code = True,
)

model = FlagRerankerCustom(model=model, tokenizer=tokenizer, use_fp16=False)

pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]

model.compute_score(pairs)

# [-2.655318021774292, 11.7670316696167]

Evaluation on C-MTEB


from C_MTEB.tasks import *
from mteb import MTEB

save_name = "LdIR-Qwen2-reranker-1.5B"

evaluation = MTEB(
    task_types=["Reranking"], task_langs=['zh', 'zh2en', 'en2zh']
    )

evaluation.run(model, output_folder=f"reranker_results/{save_name}")
Downloads last month
808
Safetensors
Model size
1.54B params
Tensor type
I64
·
BF16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Evaluation results