Spaces:

oguzakif
/

video-object-remover

Runtime error

App Files Files Community

video-object-remover / SiamMask /data /ytb_vos /parse_ytb_vos.py

oguzakif

init repo

d4b77ac almost 2 years ago

raw

history blame

6.32 kB

	# --------------------------------------------------------
	# SiamMask
	# Licensed under The MIT License
	# Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
	# --------------------------------------------------------
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals

	import argparse
	import h5py
	import json
	import os
	import scipy.misc
	import sys
	import numpy as np
	import cv2
	from os.path import join


	def parse_args():
	parser = argparse.ArgumentParser(description='Convert dataset')
	parser.add_argument('--outdir', default='./', type=str,
	help="output dir for json files")
	parser.add_argument('--datadir', default='./', type=str,
	help="data dir for annotations to be converted")
	return parser.parse_args()


	def xyxy_to_xywh(xyxy):
	"""Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
	if isinstance(xyxy, (list, tuple)):
	# Single box given as a list of coordinates
	assert len(xyxy) == 4
	x1, y1 = xyxy[0], xyxy[1]
	w = xyxy[2] - x1 + 1
	h = xyxy[3] - y1 + 1
	return (x1, y1, w, h)
	elif isinstance(xyxy, np.ndarray):
	# Multiple boxes given as a 2D ndarray
	return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
	else:
	raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')


	def polys_to_boxes(polys):
	"""Convert a list of polygons into an array of tight bounding boxes."""
	boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
	for i in range(len(polys)):
	poly = polys[i]
	x0 = min(min(p[::2]) for p in poly)
	x1 = max(max(p[::2]) for p in poly)
	y0 = min(min(p[1::2]) for p in poly)
	y1 = max(max(p[1::2]) for p in poly)
	boxes_from_polys[i, :] = [x0, y0, x1, y1]
	return boxes_from_polys


	class Instance(object):
	instID = 0
	pixelCount = 0

	def __init__(self, imgNp, instID):
	if (instID ==0 ):
	return
	self.instID = int(instID)
	self.pixelCount = int(self.getInstancePixels(imgNp, instID))

	def getInstancePixels(self, imgNp, instLabel):
	return (imgNp == instLabel).sum()

	def toDict(self):
	buildDict = {}
	buildDict["instID"] = self.instID
	buildDict["pixelCount"] = self.pixelCount
	return buildDict

	def __str__(self):
	return "("+str(self.instID)+")"


	def convert_ytb_vos(data_dir, out_dir):
	sets = ['train']
	ann_dirs = ['train/Annotations/']
	json_name = 'instances_%s.json'
	num_obj = 0
	num_ann = 0
	for data_set, ann_dir in zip(sets, ann_dirs):
	print('Starting %s' % data_set)
	ann_dict = {}
	ann_dir = os.path.join(data_dir, ann_dir)
	json_ann = json.load(open(os.path.join(ann_dir, '../meta.json')))
	for vid, video in enumerate(json_ann['videos']):
	v = json_ann['videos'][video]
	frames = []
	for obj in v['objects']:
	o = v['objects'][obj]
	frames.extend(o['frames'])
	frames = sorted(set(frames))

	annotations = []
	instanceIds = []
	for frame in frames:
	file_name = join(video, frame)
	fullname = os.path.join(ann_dir, file_name+'.png')
	img = cv2.imread(fullname, 0)
	h, w = img.shape[:2]

	objects = dict()
	for instanceId in np.unique(img):
	if instanceId == 0:
	continue
	instanceObj = Instance(img, instanceId)
	instanceObj_dict = instanceObj.toDict()
	mask = (img == instanceId).astype(np.uint8)
	_, contour, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
	polygons = [c.reshape(-1).tolist() for c in contour]
	instanceObj_dict['contours'] = [p for p in polygons if len(p) > 4]
	if len(instanceObj_dict['contours']) and instanceObj_dict['pixelCount'] > 1000:
	objects[instanceId] = instanceObj_dict
	# else:
	# cv2.imshow("disappear?", mask)
	# cv2.waitKey(0)

	for objId in objects:
	if len(objects[objId]) == 0:
	continue
	obj = objects[objId]
	len_p = [len(p) for p in obj['contours']]
	if min(len_p) <= 4:
	print('Warning: invalid contours.')
	continue # skip non-instance categories

	ann = dict()
	ann['h'] = h
	ann['w'] = w
	ann['file_name'] = file_name
	ann['id'] = int(objId)
	# ann['segmentation'] = obj['contours']
	# ann['iscrowd'] = 0
	ann['area'] = obj['pixelCount']
	ann['bbox'] = xyxy_to_xywh(polys_to_boxes([obj['contours']])).tolist()[0]

	annotations.append(ann)
	instanceIds.append(objId)
	num_ann += 1
	instanceIds = sorted(set(instanceIds))
	num_obj += len(instanceIds)
	video_ann = {str(iId): [] for iId in instanceIds}
	for ann in annotations:
	video_ann[str(ann['id'])].append(ann)

	ann_dict[video] = video_ann
	if vid % 50 == 0 and vid != 0:
	print("process: %d video" % (vid+1))

	print("Num Videos: %d" % len(ann_dict))
	print("Num Objects: %d" % num_obj)
	print("Num Annotations: %d" % num_ann)

	items = list(ann_dict.items())
	train_dict = dict(items[:3000])
	val_dict = dict(items[3000:])
	with open(os.path.join(out_dir, json_name % 'train'), 'w') as outfile:
	json.dump(train_dict, outfile)

	with open(os.path.join(out_dir, json_name % 'val'), 'w') as outfile:
	json.dump(val_dict, outfile)


	if __name__ == '__main__':
	args = parse_args()
	convert_ytb_vos(args.datadir, args.outdir)