File size: 5,186 Bytes
1cff332 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import cv2
import pathlib
import argparse
import numpy as np
import onnxruntime as ort
CURRENT_DIR = pathlib.Path(__file__).parent
sys.path.append(str(CURRENT_DIR))
from coco import COCO_CLASSES
from demo_utils import mkdir, multiclass_nms, demo_postprocess, vis
def make_parser():
parser = argparse.ArgumentParser("onnxruntime inference sample")
parser.add_argument(
"-m",
"--model",
type=str,
default="yolox-s-int8.onnx",
help="Input your onnx model.",
)
parser.add_argument(
"-i",
"--image_path",
type=str,
default='test_image.png',
help="Path to your input image.",
)
parser.add_argument(
"-o",
"--output_dir",
type=str,
default='demo_output',
help="Path to your output directory.",
)
parser.add_argument(
"-s",
"--score_thr",
type=float,
default=0.3,
help="Score threshold to filter the result.",
)
parser.add_argument(
"--input_shape",
type=str,
default="640,640",
help="Specify an input shape for inference.",
)
parser.add_argument(
"--ipu",
action="store_true",
help="Use IPU for inference.",
)
parser.add_argument(
"--provider_config",
type=str,
default="vaip_config.json",
help="Path of the config file for setting provider_options.",
)
return parser
def preprocess(img, input_shape, swap=(2, 0, 1)):
"""
Preprocessing part of YOLOX for scaling and padding image as input to the network.
Args:
img (numpy.ndarray): H x W x C, image read with OpenCV
input_shape (tuple(int)): input shape of the network for inference
swap (tuple(int)): new order of axes to transpose the input image
Returns:
padded_img (numpy.ndarray): preprocessed image to be fed to the network
ratio (float): ratio for scaling the image to the input shape
"""
if len(img.shape) == 3:
padded_img = np.ones((input_shape[0], input_shape[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_shape, dtype=np.uint8) * 114
ratio = min(input_shape[0] / img.shape[0], input_shape[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * ratio), : int(img.shape[1] * ratio)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, ratio
def postprocess(outputs, input_shape, ratio):
"""
Post-processing part of YOLOX for generating final results from outputs of the network.
Args:
outputs (tuple(numpy.ndarray)): outputs of the detection heads with onnxruntime session
input_shape (tuple(int)): input shape of the network for inference
ratio (float): ratio for scaling the image to the input shape
Returns:
dets (numpy.ndarray): n x 6, dets[:,:4] -> boxes, dets[:,4] -> scores, dets[:,5] -> class indices
"""
outputs = [out.reshape(*out.shape[:2], -1).transpose(0,2,1) for out in outputs]
outputs = np.concatenate(outputs, axis=1)
outputs[..., 4:] = sigmoid(outputs[..., 4:])
predictions = demo_postprocess(outputs, input_shape, p6=False)[0]
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
boxes_xyxy /= ratio
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
return dets
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
if __name__ == '__main__':
args = make_parser().parse_args()
input_shape = tuple(map(int, args.input_shape.split(',')))
origin_img = cv2.imread(args.image_path)
img, ratio = preprocess(origin_img, input_shape)
if args.ipu:
providers = ["VitisAIExecutionProvider"]
provider_options = [{"config_file": args.provider_config}]
else:
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
provider_options = None
session = ort.InferenceSession(args.model, providers=providers, provider_options=provider_options)
ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}
outputs = session.run(None, ort_inputs)
dets = postprocess(outputs, input_shape, ratio)
if dets is not None:
final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,
conf=args.score_thr, class_names=COCO_CLASSES)
mkdir(args.output_dir)
output_path = os.path.join(args.output_dir, os.path.basename(args.image_path))
cv2.imwrite(output_path, origin_img)
|