Spaces:
Runtime error
Runtime error
# -------------------------------------------------------- | |
# SiamMask | |
# Licensed under The MIT License | |
# Written by Qiang Wang (wangqiang2015 at ia.ac.cn) | |
# -------------------------------------------------------- | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
from __future__ import unicode_literals | |
import argparse | |
import h5py | |
import json | |
import os | |
import scipy.misc | |
import sys | |
import numpy as np | |
import cv2 | |
from os.path import join | |
def parse_args(): | |
parser = argparse.ArgumentParser(description='Convert dataset') | |
parser.add_argument('--outdir', default='./', type=str, | |
help="output dir for json files") | |
parser.add_argument('--datadir', default='./', type=str, | |
help="data dir for annotations to be converted") | |
return parser.parse_args() | |
def xyxy_to_xywh(xyxy): | |
"""Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format.""" | |
if isinstance(xyxy, (list, tuple)): | |
# Single box given as a list of coordinates | |
assert len(xyxy) == 4 | |
x1, y1 = xyxy[0], xyxy[1] | |
w = xyxy[2] - x1 + 1 | |
h = xyxy[3] - y1 + 1 | |
return (x1, y1, w, h) | |
elif isinstance(xyxy, np.ndarray): | |
# Multiple boxes given as a 2D ndarray | |
return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1)) | |
else: | |
raise TypeError('Argument xyxy must be a list, tuple, or numpy array.') | |
def polys_to_boxes(polys): | |
"""Convert a list of polygons into an array of tight bounding boxes.""" | |
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) | |
for i in range(len(polys)): | |
poly = polys[i] | |
x0 = min(min(p[::2]) for p in poly) | |
x1 = max(max(p[::2]) for p in poly) | |
y0 = min(min(p[1::2]) for p in poly) | |
y1 = max(max(p[1::2]) for p in poly) | |
boxes_from_polys[i, :] = [x0, y0, x1, y1] | |
return boxes_from_polys | |
class Instance(object): | |
instID = 0 | |
pixelCount = 0 | |
def __init__(self, imgNp, instID): | |
if (instID ==0 ): | |
return | |
self.instID = int(instID) | |
self.pixelCount = int(self.getInstancePixels(imgNp, instID)) | |
def getInstancePixels(self, imgNp, instLabel): | |
return (imgNp == instLabel).sum() | |
def toDict(self): | |
buildDict = {} | |
buildDict["instID"] = self.instID | |
buildDict["pixelCount"] = self.pixelCount | |
return buildDict | |
def __str__(self): | |
return "("+str(self.instID)+")" | |
def convert_ytb_vos(data_dir, out_dir): | |
sets = ['train'] | |
ann_dirs = ['train/Annotations/'] | |
json_name = 'instances_%s.json' | |
num_obj = 0 | |
num_ann = 0 | |
for data_set, ann_dir in zip(sets, ann_dirs): | |
print('Starting %s' % data_set) | |
ann_dict = {} | |
ann_dir = os.path.join(data_dir, ann_dir) | |
json_ann = json.load(open(os.path.join(ann_dir, '../meta.json'))) | |
for vid, video in enumerate(json_ann['videos']): | |
v = json_ann['videos'][video] | |
frames = [] | |
for obj in v['objects']: | |
o = v['objects'][obj] | |
frames.extend(o['frames']) | |
frames = sorted(set(frames)) | |
annotations = [] | |
instanceIds = [] | |
for frame in frames: | |
file_name = join(video, frame) | |
fullname = os.path.join(ann_dir, file_name+'.png') | |
img = cv2.imread(fullname, 0) | |
h, w = img.shape[:2] | |
objects = dict() | |
for instanceId in np.unique(img): | |
if instanceId == 0: | |
continue | |
instanceObj = Instance(img, instanceId) | |
instanceObj_dict = instanceObj.toDict() | |
mask = (img == instanceId).astype(np.uint8) | |
_, contour, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) | |
polygons = [c.reshape(-1).tolist() for c in contour] | |
instanceObj_dict['contours'] = [p for p in polygons if len(p) > 4] | |
if len(instanceObj_dict['contours']) and instanceObj_dict['pixelCount'] > 1000: | |
objects[instanceId] = instanceObj_dict | |
# else: | |
# cv2.imshow("disappear?", mask) | |
# cv2.waitKey(0) | |
for objId in objects: | |
if len(objects[objId]) == 0: | |
continue | |
obj = objects[objId] | |
len_p = [len(p) for p in obj['contours']] | |
if min(len_p) <= 4: | |
print('Warning: invalid contours.') | |
continue # skip non-instance categories | |
ann = dict() | |
ann['h'] = h | |
ann['w'] = w | |
ann['file_name'] = file_name | |
ann['id'] = int(objId) | |
# ann['segmentation'] = obj['contours'] | |
# ann['iscrowd'] = 0 | |
ann['area'] = obj['pixelCount'] | |
ann['bbox'] = xyxy_to_xywh(polys_to_boxes([obj['contours']])).tolist()[0] | |
annotations.append(ann) | |
instanceIds.append(objId) | |
num_ann += 1 | |
instanceIds = sorted(set(instanceIds)) | |
num_obj += len(instanceIds) | |
video_ann = {str(iId): [] for iId in instanceIds} | |
for ann in annotations: | |
video_ann[str(ann['id'])].append(ann) | |
ann_dict[video] = video_ann | |
if vid % 50 == 0 and vid != 0: | |
print("process: %d video" % (vid+1)) | |
print("Num Videos: %d" % len(ann_dict)) | |
print("Num Objects: %d" % num_obj) | |
print("Num Annotations: %d" % num_ann) | |
items = list(ann_dict.items()) | |
train_dict = dict(items[:3000]) | |
val_dict = dict(items[3000:]) | |
with open(os.path.join(out_dir, json_name % 'train'), 'w') as outfile: | |
json.dump(train_dict, outfile) | |
with open(os.path.join(out_dir, json_name % 'val'), 'w') as outfile: | |
json.dump(val_dict, outfile) | |
if __name__ == '__main__': | |
args = parse_args() | |
convert_ytb_vos(args.datadir, args.outdir) | |