|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
from tqdm import tqdm |
|
|
|
INPUT_SIZE = [512, 1024] |
|
|
|
|
|
def input_transform(image): |
|
"""Preprocess an image |
|
|
|
Args: |
|
img (ndarray): Image to be normalized. |
|
|
|
Returns: |
|
ndarray: The normalized image. |
|
""" |
|
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) |
|
std = np.array([0.229, 0.224, 0.225], dtype=np.float32) |
|
image = image.astype(np.float32)[:, :, ::-1] |
|
image = image / 255.0 |
|
image -= mean |
|
image /= std |
|
return image |
|
|
|
|
|
def pad_image(image, h, w, size, padvalue): |
|
pad_image = image.copy() |
|
pad_h = max(size[0] - h, 0) |
|
pad_w = max(size[1] - w, 0) |
|
if pad_h > 0 or pad_w > 0: |
|
pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0, |
|
pad_w, cv2.BORDER_CONSTANT, |
|
value=padvalue) |
|
return pad_image, pad_h, pad_w |
|
|
|
|
|
def resize_image(image, re_size, keep_ratio=True): |
|
if not keep_ratio: |
|
re_image = cv2.resize(image, |
|
(re_size[0], re_size[1])).astype('float32') |
|
return re_image, 0, 0 |
|
ratio = re_size[0] * 1.0 / re_size[1] |
|
h, w = image.shape[0:2] |
|
if h * 1.0 / w <= ratio: |
|
re_h, re_w = int(h * re_size[1] * 1.0 / w), re_size[1] |
|
else: |
|
re_h, re_w = re_size[0], int(w * re_size[0] * 1.0 / h) |
|
|
|
re_image = cv2.resize(image, |
|
(re_w, re_h)).astype('float32') |
|
|
|
re_image, pad_h, pad_w = pad_image(re_image, re_h, re_w, re_size, (0.0, 0.0, 0.0)) |
|
|
|
return re_image, pad_h, pad_w |
|
|
|
|
|
def preprocess(img): |
|
"""Preprocess an image |
|
|
|
Args: |
|
img (ndarray): Image to be normalized. |
|
|
|
Returns: |
|
ndarray: The normalized image. |
|
""" |
|
img, pad_h, pad_w = resize_image(img, INPUT_SIZE) |
|
img = input_transform(img) |
|
|
|
return img.transpose((2, 0, 1)), pad_h, pad_w |
|
|
|
|
|
def get_confusion_matrix(label, pred, size, num_class, ignore=-1): |
|
""" |
|
Calcute the confusion matrix by given label and pred |
|
""" |
|
output = pred.cpu().numpy().transpose(0, 2, 3, 1) |
|
seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) |
|
seg_gt = np.asarray( |
|
label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int32) |
|
|
|
ignore_index = seg_gt != ignore |
|
seg_gt = seg_gt[ignore_index] |
|
seg_pred = seg_pred[ignore_index] |
|
|
|
index = (seg_gt * num_class + seg_pred).astype('int32') |
|
label_count = np.bincount(index) |
|
confusion_matrix = np.zeros((num_class, num_class)) |
|
|
|
for i_label in range(num_class): |
|
for i_pred in range(num_class): |
|
cur_index = i_label * num_class + i_pred |
|
if cur_index < len(label_count): |
|
confusion_matrix[i_label, |
|
i_pred] = label_count[cur_index] |
|
return confusion_matrix |
|
|