File size: 3,001 Bytes
c2ba1ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
INPUT_SIZE = [512, 1024]
def input_transform(image):
"""Preprocess an image
Args:
img (ndarray): Image to be normalized.
Returns:
ndarray: The normalized image.
"""
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
image = image.astype(np.float32)[:, :, ::-1]
image = image / 255.0
image -= mean
image /= std
return image
def pad_image(image, h, w, size, padvalue):
pad_image = image.copy()
pad_h = max(size[0] - h, 0)
pad_w = max(size[1] - w, 0)
if pad_h > 0 or pad_w > 0:
pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0,
pad_w, cv2.BORDER_CONSTANT,
value=padvalue)
return pad_image, pad_h, pad_w
def resize_image(image, re_size, keep_ratio=True):
if not keep_ratio:
re_image = cv2.resize(image,
(re_size[0], re_size[1])).astype('float32')
return re_image, 0, 0
ratio = re_size[0] * 1.0 / re_size[1]
h, w = image.shape[0:2]
if h * 1.0 / w <= ratio:
re_h, re_w = int(h * re_size[1] * 1.0 / w), re_size[1]
else:
re_h, re_w = re_size[0], int(w * re_size[0] * 1.0 / h)
re_image = cv2.resize(image,
(re_w, re_h)).astype('float32')
# print(f're_image shape:{re_image.shape}')
re_image, pad_h, pad_w = pad_image(re_image, re_h, re_w, re_size, (0.0, 0.0, 0.0))
# print(f're_h: {re_h}, re_w: {re_w}')
return re_image, pad_h, pad_w
def preprocess(img):
"""Preprocess an image
Args:
img (ndarray): Image to be normalized.
Returns:
ndarray: The normalized image.
"""
img, pad_h, pad_w = resize_image(img, INPUT_SIZE)
img = input_transform(img)
return img.transpose((2, 0, 1)), pad_h, pad_w
def get_confusion_matrix(label, pred, size, num_class, ignore=-1):
"""
Calcute the confusion matrix by given label and pred
"""
output = pred.cpu().numpy().transpose(0, 2, 3, 1)
seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8)
seg_gt = np.asarray(
label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int32)
ignore_index = seg_gt != ignore
seg_gt = seg_gt[ignore_index]
seg_pred = seg_pred[ignore_index]
index = (seg_gt * num_class + seg_pred).astype('int32')
label_count = np.bincount(index)
confusion_matrix = np.zeros((num_class, num_class))
for i_label in range(num_class):
for i_pred in range(num_class):
cur_index = i_label * num_class + i_pred
if cur_index < len(label_count):
confusion_matrix[i_label,
i_pred] = label_count[cur_index]
return confusion_matrix
|