MLAppDemo / app.py
sanket03's picture
removed mean and std calculation due to poor prediction
ca7dac7
import torch, torchvision
from torchvision import transforms
import numpy as np
import gradio as gr
from PIL import Image
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from custom_resnet import Net
model = Net('batch')
model.load_state_dict(torch.load("model.pth", map_location=torch.device('cpu')), strict=False)
classes = ('plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck')
def inference(input_img, transparency = 0.5, target_layer_number = -1, num_top_classes = 5):
"""This function take input as an image and generate Grad Cam image of it.
Args:
input_img (_type_): Input image provided by user.
transparency (float, optional): _description_. Defaults to 0.5.
target_layer_number (int, optional): Output of layer which will be given to Grad Cam. Defaults to -1.
num_top_classes (int, optional): To show number of classes to show in the output. Defaults to 5.
Returns:
top: Top Classes and Confidence level of the prediction
visualization: Grad Cam output
"""
# transform = transforms.ToTensor()
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
org_img = input_img
input_img = transform(input_img)
# input_img = input_img
input_img = input_img.unsqueeze(0)
outputs = model(input_img)
softmax = torch.nn.Softmax(dim=0)
o = softmax(outputs.flatten())
# exp_outputs = torch.exp(outputs.flatten())
confidences = {classes[i]: float(o[i]) for i in range(10)}
# confidences = {classes[i]: float(exp_outputs[i]) for i in range(10)}
_, prediction = torch.max(outputs, 1)
target_layers = [model.layer3_r3[target_layer_number]]
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)
grayscale_cam = cam(input_tensor=input_img, targets=None)
grayscale_cam = grayscale_cam[0, :]
img = input_img.squeeze(0)
rgb_img = np.transpose(img, (1, 2, 0))
rgb_img = rgb_img.numpy()
visualization = show_cam_on_image(org_img/255, grayscale_cam, use_rgb=True, image_weight=transparency)
# Sort confidences dictionary in descending order of values and take top num_top_classes
sorted_confidences = {k: v for k, v in sorted(confidences.items(), key=lambda item: item[1], reverse=True)}
top_classes = list(sorted_confidences.keys())[:num_top_classes]
top = dict((k,v) for k, v in sorted_confidences.items() if k in top_classes)
return top, visualization
title = "CIFAR10 trained on ResNet18 Model with GradCAM"
description = "A simple Gradio interface to infer on ResNet model, and get GradCAM results"
examples = [["airplane.png", 0.5, -1, 5],["bird.jpeg", 0.5, -1, 5], ["car.jpeg", 0.5, -1, 5], ["cat.png", 0.5, -1, 5],
["deer.jpeg", 0.5, -1, 6], ["dog.png", 0.5, -1, 7], ["frog.jpeg", 0.5, -1, 4], ["horse.png", 0.5, -1, 7],
["ship.png", 0.5, -1, 3], ["truck.jpeg", 0.5, -1, 8]]
demo = gr.Interface(
inference,
inputs = [gr.Image(shape=(32, 32), label="Input Image"),
gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM"),
gr.Slider(-2, -1, value = -2, step=1, label="Which Layer?"),
gr.Slider(0, 10, value = 1, step=1, label="Number of Top Classes")],
outputs = [gr.Label(num_top_classes=10), gr.Image(shape=(32, 32), label="Output", style={"width": "128px", "height": "128px"})],
title = title,
description = description,
examples = examples,
)
demo.launch()