File size: 4,675 Bytes


import numpy as np
import cv2
import torch
import onnxruntime
import sys
import pathlib
CURRENT_DIR = pathlib.Path(__file__).parent
sys.path.append(str(CURRENT_DIR))
import argparse
from utils import (
    letterbox,
    non_max_suppression,
    scale_coords,
    Annotator,
    Colors,
)


def pre_process(img):
    img = letterbox(img, [640, 640], stride=32, auto=False)[0]
    # Convert
    img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
    img = np.ascontiguousarray(img)
    img = img.astype("float32")
    img = img / 255.0
    img = img[np.newaxis, :]
    return img


def post_process(x):
    x = list(x)
    z = []  # inference output
    stride = [8, 16, 32]
    for i in range(3):
        bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
        x[i] = (
            torch.tensor(x[i])
            .view(bs, 3, 85, ny, nx)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )
        y = x[i].sigmoid()
        xy = (y[..., 0:2] * 2.0 - 0.5 + grid[i]) * stride[i]
        wh = (y[..., 2:4] * 2) ** 2 * anchor_grid[i]
        y = torch.cat((xy, wh, y[..., 4:]), -1)
        z.append(y.view(bs, -1, 85))

    return (torch.cat(z, 1), x)


def make_parser():
    parser = argparse.ArgumentParser("onnxruntime inference sample")
    parser.add_argument(
        "-m",
        "--onnx_model",
        type=str,
        default="./yolov5s.onnx",
        help="input your onnx model.",
    )
    parser.add_argument(
        "-i",
        "--image_path",
        type=str,
        default='./demo.jpg',
        help="path to your input image.",
    )
    parser.add_argument(
        "-o",
        "--output_path",
        type=str,
        default='./demo_infer.jpg',
        help="path to your output directory.",
    )
    parser.add_argument(
        '--ipu', 
        action='store_true', 
        help='flag for ryzen ai'
    )
    parser.add_argument(
        '--provider_config', 
        default='', 
        type=str, 
        help='provider config for ryzen ai'
    )
    return parser


names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush'] 


if __name__ == '__main__':
    args = make_parser().parse_args()
    onnx_path = args.onnx_model
    if args.ipu:
        providers = ["VitisAIExecutionProvider"]
        provider_options = [{"config_file": args.provider_config}]
        onnx_weight = onnxruntime.InferenceSession(onnx_path, providers=providers, provider_options=provider_options)
    else:
        onnx_weight = onnxruntime.InferenceSession(onnx_path)
    grid = np.load("./grid.npy", allow_pickle=True)
    anchor_grid = np.load("./anchor_grid.npy", allow_pickle=True)
    path = args.image_path 
    new_path = args.output_path
    conf_thres, iou_thres, classes, agnostic_nms, max_det = 0.25, 0.45, None, False, 1000

    img0 = cv2.imread(path)
    img = pre_process(img0)
    onnx_input = {onnx_weight.get_inputs()[0].name: img.transpose(0, 2, 3, 1)}
    onnx_output = onnx_weight.run(None, onnx_input)
    onnx_output = [torch.tensor(item).permute(0, 3, 1, 2) for item in onnx_output]
    onnx_output = post_process(onnx_output)
    pred = non_max_suppression(
        onnx_output[0], conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det
    )
    colors = Colors()
    det = pred[0]
    im0 = img0.copy()
    annotator = Annotator(im0, line_width=2, example=str(names))
    if len(det):
        # Rescale boxes from img_size to im0 size
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

        # Write results
        for *xyxy, conf, cls in reversed(det):
            c = int(cls)  # integer class
            label = f"{names[c]} {conf:.2f}"
            annotator.box_label(xyxy, label, color=colors(c, True))
    # Stream results
    im0 = annotator.result()
    cv2.imwrite(new_path, im0)