Spaces:
Build error
Build error
import torch | |
import torchvision | |
from torchvision import models, transforms | |
from torch.utils.data import DataLoader | |
from torch.utils.data.dataset import Dataset | |
import os | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import cv2 | |
import face_recognition | |
import sys | |
import time | |
from torch.autograd import Variable | |
from torch import nn | |
class Model(nn.Module): | |
def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False): | |
super(Model, self).__init__() | |
model = models.resnext50_32x4d(pretrained=True) | |
self.model = nn.Sequential(*list(model.children())[:-2]) | |
self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, bidirectional) | |
self.relu = nn.LeakyReLU() | |
self.dp = nn.Dropout(0.4) | |
self.linear1 = nn.Linear(2048, num_classes) | |
self.avgpool = nn.AdaptiveAvgPool2d(1) | |
def forward(self, x): | |
batch_size, seq_length, c, h, w = x.shape | |
x = x.view(batch_size * seq_length, c, h, w) | |
fmap = self.model(x) | |
x = self.avgpool(fmap) | |
x = x.view(batch_size, seq_length, 2048) | |
x_lstm, _ = self.lstm(x, None) | |
return fmap, self.dp(self.linear1(x_lstm[:, -1, :])) | |
im_size = 112 | |
mean = [0.485, 0.456, 0.406] | |
std = [0.229, 0.224, 0.225] | |
sm = nn.Softmax() | |
inv_normalize = transforms.Normalize(mean=-1*np.divide(mean, std), std=np.divide([1,1,1], std)) | |
def im_convert(tensor): | |
image = tensor.to('cpu').clone().detach() | |
image = image.squeeze() | |
image = inv_normalize(image) | |
image = image.numpy() | |
image = image.transpose(1, 2, 0) | |
image = image.clip(0, 1) | |
cv2.imwrite('./2.png', image*255) | |
return image | |
def predict(model, img, path = './'): | |
fmap, logits = model(img.to('cuda')) | |
params = list(model.parameters()) | |
weight_softmax = model.linear1.weight.detach().cpu().numpy() | |
logits = sm(logits) | |
_, prediction = torch.max(logits,1) | |
confidence = logits[:, int(prediction.item())].item()*100 | |
print('confidence of prediction', logits[:, int(prediction.item())].item()*100) | |
idx = np.argmax(logits.detach().cpu().numpy()) | |
bz, nc, h, w = fmap.shape | |
out = np.dot(fmap[-1].detach().cpu().numpy().reshape((nc, h*w)).T, weight_softmax[idx, :].T) | |
predict = out.reshape(h, w) | |
predict = predict - np.min(predict) | |
predict_img = predict / np.max(predict) | |
predict_img = np.uint8(255*predict_img) | |
out = cv2.resize(predict_img, (im_size, im_size)) | |
heatmap = cv2.applyColorMap(out, cv2.COLORMAP_JET) | |
img = im_convert(img[:, -1, :, :, :]) | |
result = heatmap * 0.5 + img*0.8*255 | |
cv2.imwrite('./1.png', result) | |
result1 = heatmap * 0.5/255 + img*0.8 | |
r, g, b = cv2.split(result1) | |
result1 = cv2.merge((r, g, b)) | |
plt.imshow(result1) | |
plt.show() | |
return [int(prediction.item()), confidence] | |
class validation_dataset(Dataset): | |
def __init__(self, video_names, sequence_length = 60, transform = None): | |
self.video_names = video_names | |
self.transform = transform | |
self.count = sequence_length | |
def __len__(self): | |
return len(self.video_names) | |
def __getitem__(self, idx): | |
video_path = self.video_names[idx] | |
frames = [] | |
a = int(100/self.count) | |
first_frame = np.random.randint(0, a) | |
for i, frame in enumerate(self.frame_extract(video_path)): | |
faces = face_recognition.face_locations(frame) | |
try: | |
top, right, bottom, left = faces[0] | |
frame = frame[top:bottom, left:right, :] | |
except: | |
pass | |
if self.transform: | |
frames.append(self.transform(frame)) | |
if(len(frames) == self.count): | |
break | |
if len(frames)==0: | |
print('no face found in the video') | |
return None | |
frames = torch.stack(frames) | |
frames = frames[:self.count] | |
return frames.unsqueeze(0) | |
def frame_extract(self, path): | |
vidObj = cv2.VideoCapture(path) | |
success = 1 | |
while success: | |
success, image = vidObj.read() | |
if success: | |
yield image | |
def im_plot(tensor): | |
image = tensor.cpu().numpy().transpose(1,2,0) | |
b,g,r = cv2.split(image) | |
image = cv2.merge((r,g,b)) | |
image = image*[0.22803, 0.22145, 0.216989] + [0.43216, 0.394666, 0.37645] | |
image = image*255.0 | |
plt.imshow(image.astype(int)) | |
plt.show() |