Struggling to reporduce results on the LC 25000 dataset
Hi all,
Thank you so much for releasing the code and integrating it with OpenCLIP. I was playing around with the released code and tried to reproduce the results on the LC 25k dataset. From the paper, I understood that no fine-tuning is required, and we can directly use the dataset in ZSL way with the appropriate prompts. However, I am not able to reproduce the results. (Acc for Lungs is ~41% and ~33 % for colon).
I am not sure what exactly is going wrong here. It would be great to get some input from your end. Thanks.
import torch
from tqdm import tqdm
import torch.nn.functional as F
import open_clip
from test_prediction.zsl.dataset import ZeroShotImageDataset
label_encoding = {
0: 'Colon Adenocarcinoma',
1: 'Colon Benign Tissue',
2: 'Lung Adenocarcinoma',
3: 'Lung Benign Tissue',
4: 'Lung Squamous Cell Carcinoma',
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def get_templates(disease_label):
:param disease_label: integer value \in [0,5]
:return: list of templates
disease = label_encoding[disease_label]
if disease_label in [0, 1]:
first_sentence = f'a photo of {disease}'
first_sentence = f"this is an image of {disease}"
second_sentence = f"{disease} presented in image"
return [first_sentence, second_sentence]
context_length = 256
# Handling multiple templates together
def zero_shot_class_embedding_generator(model, tokenizer, classnames):
Returns the embeddings for each possible template [1 x temp]
:param classnames: The class labels for which template generation
:param model: CLIP model to use
:param tokenizer: CLIP tokenizer used for creating the templates
with torch.no_grad():
zeroshot_class_weights = []
for classname in tqdm(range(0, 5)):
templates = get_templates(classname) # format with class
texts = tokenizer(templates).to(device) # tokenize
class_embeddings = model.encode_text(texts)
class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0)
class_embedding /= class_embedding.norm()
zeroshot_class_weights = torch.stack(zeroshot_class_weights, dim=1).to(device)
return zeroshot_class_weights
def load_model_weights():
model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
return model, tokenizer, preprocess_val
def accuracy(output, target, topk=(1,)):
# Same as obtained from the open_clip ``
pred = output.topk(max(topk), 1, True, True)[1].t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk]
def execute(model, test_data, class_weights):
# Same as obtained from the open_clip ``
loader =, batch_size=128)
top1 = n = 0
for images, labels in tqdm(loader, total=len(loader)):
with torch.no_grad():
images, labels =,
# Encode the image features
image_features = model.encode_image(images)
image_features = F.normalize(image_features, dim=-1)
logits = 100. * image_features @ class_weights
acc1 = accuracy(logits, labels)
top1 += acc1.pop()
n += images.size(0)
print(f"Accuracy is {top1 / n}")
if __name__ == '__main__':
model, tokenizer, transform_val = load_model_weights()
print("Results for Colon:")
test_data = ZeroShotImageDataset(csv_name='zsl_dataset_colon.csv', imgtransform=transform_val)
class_weights = zero_shot_class_embedding_generator(model=model, tokenizer=tokenizer, classnames=[0, 1])
execute(model, test_data, class_weights)
# Now the same for lung
print("Results for Lung:")
class_weights = zero_shot_class_embedding_generator(model=model, tokenizer=tokenizer, classnames=[2, 3, 4])
test_data = ZeroShotImageDataset(csv_name='zsl_dataset_lung.csv', imgtransform=transform_val)
execute(model, test_data, class_weights)
The task is to classify Colon and Lung separately, so you may want to only iterate over the classes passed by classnames in the zero_shot_class_embedding_generator function instead of iterating over all the 5 classes:
# Handling multiple templates together
def zero_shot_class_embedding_generator(model, tokenizer, classnames):
for classname in classnames: # NOT in tqdm(range(0, 5))
templates = get_templates(classname) # format with class
Besides, please also try the following prompts:
label_encoding = {
0: 'Colon Adenocarcinoma',
1: 'Normal Colonic Tissue',
2: 'Lung Adenocarcinoma',
3: 'Normal Lung Tissue',
4: 'Lung Squamous Cell Carcinoma',
Hope this helps! We are currently working to release a notebook for running zero-shot evaluation on the datasets.
Thank you for the response. I updated the code base, and I am able to get results that are close to the reported numbers. Looking forward to the release of sample notebooks.