Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,637 Bytes
a84a65c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import torch
import torch.nn as nn
class VGGishish(nn.Module):
def __init__(self, conv_layers, use_bn, num_classes):
'''
Mostly from
https://pytorch.org/vision/0.8/_modules/torchvision/models/vgg.html
'''
super().__init__()
layers = []
in_channels = 1
# a list of channels with 'MP' (maxpool) from config
for v in conv_layers:
if v == 'MP':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, stride=1)
if use_bn:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
self.features = nn.Sequential(*layers)
self.avgpool = nn.AdaptiveAvgPool2d((5, 10))
self.flatten = nn.Flatten()
self.classifier = nn.Sequential(
nn.Linear(512 * 5 * 10, 4096),
nn.ReLU(True),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Linear(4096, num_classes)
)
# weight init
self.reset_parameters()
def forward(self, x):
# adding channel dim for conv2d (B, 1, F, T) <-
x = x.unsqueeze(1)
# backbone (B, 1, 5, 53) <- (B, 1, 80, 860)
x = self.features(x)
# adaptive avg pooling (B, 1, 5, 10) <- (B, 1, 5, 53) – if no MP is used as the end of VGG
x = self.avgpool(x)
# flatten
x = self.flatten(x)
# classify
x = self.classifier(x)
return x
def reset_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
if __name__ == '__main__':
num_classes = 309
inputs = torch.rand(3, 80, 848)
conv_layers = [64, 64, 'MP', 128, 128, 'MP', 256, 256, 256, 'MP', 512, 512, 512, 'MP', 512, 512, 512]
# conv_layers = [64, 'MP', 128, 'MP', 256, 256, 'MP', 512, 512, 'MP']
model = VGGishish(conv_layers, use_bn=False, num_classes=num_classes)
outputs = model(inputs)
print(outputs.shape)
|