Struggling to reporduce results on the LC 25000 dataset

#2
by chinmay55 - opened

Hi all,

Thank you so much for releasing the code and integrating it with OpenCLIP. I was playing around with the released code and tried to reproduce the results on the LC 25k dataset. From the paper, I understood that no fine-tuning is required, and we can directly use the dataset in ZSL way with the appropriate prompts. However, I am not able to reproduce the results. (Acc for Lungs is ~41% and ~33 % for colon).

I am not sure what exactly is going wrong here. It would be great to get some input from your end. Thanks.

import torch
from tqdm import tqdm
import torch.nn.functional as F
import open_clip

from test_prediction.zsl.dataset import ZeroShotImageDataset

label_encoding = {
    0: 'Colon Adenocarcinoma',
    1: 'Colon Benign Tissue',
    2: 'Lung Adenocarcinoma',
    3: 'Lung Benign Tissue',
    4: 'Lung Squamous Cell Carcinoma',
}

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


def get_templates(disease_label):
    """
    :param disease_label: integer value \in [0,5]
    :return: list of templates
    """
    disease = label_encoding[disease_label]
    if disease_label in [0, 1]:
        first_sentence = f'a photo of {disease}'
    else:
        first_sentence = f"this is an image of {disease}"
    second_sentence = f"{disease} presented in image"
    return [first_sentence, second_sentence]


context_length = 256


# Handling multiple templates together
def zero_shot_class_embedding_generator(model, tokenizer, classnames):
    """
    Returns the embeddings for each possible template [1 x temp]
    :param classnames: The class labels for which template generation
    :param model: CLIP model to use
    :param tokenizer: CLIP tokenizer used for creating the templates
    :return:
    """
    with torch.no_grad():
        zeroshot_class_weights = []
        for classname in tqdm(range(0, 5)):
            templates = get_templates(classname)  # format with class
            texts = tokenizer(templates).to(device)  # tokenize
            class_embeddings = model.encode_text(texts)
            class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0)
            class_embedding /= class_embedding.norm()
            zeroshot_class_weights.append(class_embedding)
        zeroshot_class_weights = torch.stack(zeroshot_class_weights, dim=1).to(device)
    return zeroshot_class_weights


def load_model_weights():
    model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
        'hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
    tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
    model.to(device)
    return model, tokenizer, preprocess_val


def accuracy(output, target, topk=(1,)):
   # Same as obtained from the open_clip `src.training.zero_shot.py`
   
    pred = output.topk(max(topk), 1, True, True)[1].t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk]


def execute(model, test_data, class_weights):
# Same as obtained from the open_clip `src.training.zero_shot.py`
    model.to(device)
    model.eval()
    loader = torch.utils.data.DataLoader(test_data, batch_size=128)
    top1 = n = 0
    for images, labels in tqdm(loader, total=len(loader)):
        with torch.no_grad():
            images, labels = images.to(device), labels.to(device)
            # Encode the image features
            image_features = model.encode_image(images)
            image_features = F.normalize(image_features, dim=-1)
            logits = 100. * image_features @ class_weights
            acc1 = accuracy(logits, labels)
            top1 += acc1.pop()
            n += images.size(0)
    print(f"Accuracy is {top1 / n}")


if __name__ == '__main__':
    model, tokenizer, transform_val = load_model_weights()
    print("Results for Colon:")
    test_data = ZeroShotImageDataset(csv_name='zsl_dataset_colon.csv', imgtransform=transform_val)
    class_weights = zero_shot_class_embedding_generator(model=model, tokenizer=tokenizer, classnames=[0, 1])
    execute(model, test_data, class_weights)
    # Now the same for lung
    print("Results for Lung:")
    class_weights = zero_shot_class_embedding_generator(model=model, tokenizer=tokenizer, classnames=[2, 3, 4])
    test_data = ZeroShotImageDataset(csv_name='zsl_dataset_lung.csv', imgtransform=transform_val)
    execute(model, test_data, class_weights)
Microsoft org
This comment has been hidden
Microsoft org

The task is to classify Colon and Lung separately, so you may want to only iterate over the classes passed by classnames in the zero_shot_class_embedding_generator function instead of iterating over all the 5 classes:

# Handling multiple templates together
def zero_shot_class_embedding_generator(model, tokenizer, classnames):
    """
    ...
        for classname in classnames: # NOT in tqdm(range(0, 5))
            templates = get_templates(classname)  # format with class
    ...

Besides, please also try the following prompts:

label_encoding = {
    0: 'Colon Adenocarcinoma',
    1: 'Normal Colonic Tissue',
    2: 'Lung Adenocarcinoma',
    3: 'Normal Lung Tissue',
    4: 'Lung Squamous Cell Carcinoma',
}

Hope this helps! We are currently working to release a notebook for running zero-shot evaluation on the datasets.

Thank you for the response. I updated the code base, and I am able to get results that are close to the reported numbers. Looking forward to the release of sample notebooks.

chinmay55 changed discussion status to closed

Sign up or log in to comment