File size: 3,001 Bytes
c2ba1ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

INPUT_SIZE = [512, 1024]


def input_transform(image):
    """Preprocess an image

    Args:
        img (ndarray): Image to be normalized.

    Returns:
        ndarray: The normalized image.
    """
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32) 
    image = image.astype(np.float32)[:, :, ::-1]
    image = image / 255.0
    image -= mean
    image /= std
    return image


def pad_image(image, h, w, size, padvalue):
    pad_image = image.copy()
    pad_h = max(size[0] - h, 0)
    pad_w = max(size[1] - w, 0)
    if pad_h > 0 or pad_w > 0:
        pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0,
                                       pad_w, cv2.BORDER_CONSTANT,
                                       value=padvalue)
    return pad_image, pad_h, pad_w


def resize_image(image, re_size, keep_ratio=True):
    if not keep_ratio:
        re_image = cv2.resize(image,                                              
                           (re_size[0], re_size[1])).astype('float32')
        return re_image, 0, 0 
    ratio = re_size[0] * 1.0 / re_size[1] 
    h, w = image.shape[0:2]
    if h * 1.0 / w <= ratio:
        re_h, re_w = int(h * re_size[1] * 1.0 / w), re_size[1] 
    else:
        re_h, re_w = re_size[0], int(w * re_size[0] * 1.0 / h)
    
    re_image = cv2.resize(image,                                               
                          (re_w, re_h)).astype('float32')
    # print(f're_image shape:{re_image.shape}')
    re_image, pad_h, pad_w = pad_image(re_image, re_h, re_w, re_size, (0.0, 0.0, 0.0))
    # print(f're_h: {re_h}, re_w: {re_w}')
    return re_image, pad_h, pad_w


def preprocess(img):
    """Preprocess an image

    Args:
        img (ndarray): Image to be normalized.

    Returns:
        ndarray: The normalized image.
    """
    img, pad_h, pad_w = resize_image(img, INPUT_SIZE)
    img = input_transform(img)

    return img.transpose((2, 0, 1)), pad_h, pad_w 


def get_confusion_matrix(label, pred, size, num_class, ignore=-1):
    """
    Calcute the confusion matrix by given label and pred
    """
    output = pred.cpu().numpy().transpose(0, 2, 3, 1)
    seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8)
    seg_gt = np.asarray(
    label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int32)

    ignore_index = seg_gt != ignore
    seg_gt = seg_gt[ignore_index]
    seg_pred = seg_pred[ignore_index]

    index = (seg_gt * num_class + seg_pred).astype('int32')
    label_count = np.bincount(index)
    confusion_matrix = np.zeros((num_class, num_class))

    for i_label in range(num_class):
        for i_pred in range(num_class):
            cur_index = i_label * num_class + i_pred
            if cur_index < len(label_count):
                confusion_matrix[i_label,
                                 i_pred] = label_count[cur_index]
    return confusion_matrix