zhengrongzhang
commited on
Commit
•
21794d5
1
Parent(s):
2a1a580
init model
Browse files- README.md +93 -0
- center_weight_origin.npy +3 -0
- eval_onnx.py +533 -0
- make_coco_data_17keypoints.py +277 -0
- movenet_int8.onnx +3 -0
- requirements.txt +4 -0
README.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- detection-datasets/coco
|
5 |
+
language:
|
6 |
+
- en
|
7 |
+
metrics:
|
8 |
+
- accuracy
|
9 |
+
tags:
|
10 |
+
- RyzenAI
|
11 |
+
- pose estimation
|
12 |
+
---
|
13 |
+
|
14 |
+
# MoveNet
|
15 |
+
|
16 |
+
MoveNet is an ultra fast and accurate model that detects 17 keypoints of a body. It released in [movenet.pytorch](https://github.com/fire717/movenet.pytorch/blob/master/README.md?plain=1)
|
17 |
+
|
18 |
+
|
19 |
+
We develop a modified version that could be supported by [AMD Ryzen AI](https://ryzenai.docs.amd.com/).
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
## How to use
|
24 |
+
|
25 |
+
### Installation
|
26 |
+
|
27 |
+
Follow [Ryzen AI Installation](https://ryzenai.docs.amd.com/en/latest/inst.html) to prepare the environment for Ryzen AI.
|
28 |
+
Run the following script to install pre-requisites for this model.
|
29 |
+
```bash
|
30 |
+
pip install -r requirements.txt
|
31 |
+
```
|
32 |
+
|
33 |
+
|
34 |
+
### Data Preparation (optional: for accuracy evaluation)
|
35 |
+
|
36 |
+
1.Download COCO dataset2017 from https://cocodataset.org/. (You need train2017.zip, val2017.zip and annotations.)Unzip to `./data/` like this:
|
37 |
+
|
38 |
+
```
|
39 |
+
├── data
|
40 |
+
├── annotations (person_keypoints_train2017.json, person_keypoints_val2017.json, ...)
|
41 |
+
├── train2017 (xx.jpg, xx.jpg,...)
|
42 |
+
└── val2017 (xx.jpg, xx.jpg,...)
|
43 |
+
|
44 |
+
```
|
45 |
+
|
46 |
+
|
47 |
+
2.Make data to our data format.
|
48 |
+
- Modify the path in line 282~287 in make_coco_data_17keypoints.py if needed
|
49 |
+
- run the code to pre-process the dataset
|
50 |
+
```
|
51 |
+
python make_coco_data_17keypoints.py
|
52 |
+
```
|
53 |
+
```
|
54 |
+
Our data format: JSON file
|
55 |
+
Keypoints order:['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
|
56 |
+
'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
|
57 |
+
'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
|
58 |
+
'right_ankle']
|
59 |
+
|
60 |
+
One item:
|
61 |
+
[{"img_name": "0.jpg",
|
62 |
+
"keypoints": [x0,y0,z0,x1,y1,z1,...],
|
63 |
+
#z: 0 for no label, 1 for labeled but invisible, 2 for labeled and visible
|
64 |
+
"center": [x,y],
|
65 |
+
"bbox":[x0,y0,x1,y1],
|
66 |
+
"other_centers": [[x0,y0],[x1,y1],...],
|
67 |
+
"other_keypoints": [[[x0,y0],[x1,y1],...],[[x0,y0],[x1,y1],...],...], #lenth = num_keypoints
|
68 |
+
},
|
69 |
+
...
|
70 |
+
]
|
71 |
+
```
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
### Test & Evaluation
|
77 |
+
|
78 |
+
- Modify the DATASET_PATH in eval_onnx.py if needed
|
79 |
+
- Test accuracy of the quantized model
|
80 |
+
```python
|
81 |
+
python eval_onnx.py --ipu --provider_config Path\To\vaip_config.json
|
82 |
+
```
|
83 |
+
|
84 |
+
### Performance
|
85 |
+
|
86 |
+
|Metric |Accuracy on IPU|
|
87 |
+
| :----: | :----: |
|
88 |
+
|accuracy | 79.745%|
|
89 |
+
|
90 |
+
|
91 |
+
## Citation
|
92 |
+
1.[model card](https://storage.googleapis.com/movenet/MoveNet.SinglePose%20Model%20Card.pdf)
|
93 |
+
2.[movenet.pytorch](https://github.com/fire717/movenet.pytorch/blob/master/README.md?plain=1)
|
center_weight_origin.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a437d41816cc6526e64f0014fb977d4d3f216bbec7daaea59492a1f3f1494a
|
3 |
+
size 9296
|
eval_onnx.py
ADDED
@@ -0,0 +1,533 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import onnxruntime as rt
|
2 |
+
import numpy as np
|
3 |
+
import json
|
4 |
+
import torch
|
5 |
+
import cv2
|
6 |
+
import os
|
7 |
+
from torch.utils.data.dataset import Dataset
|
8 |
+
import random
|
9 |
+
import math
|
10 |
+
import argparse
|
11 |
+
|
12 |
+
# Constants and paths defining model, image, and dataset specifics
|
13 |
+
MODEL_DIR = './movenet_int8.onnx' # Path to the MoveNet model
|
14 |
+
IMG_SIZE = 192 # Image size used for processing
|
15 |
+
FEATURE_MAP_SIZE = 48 # Feature map size used in the model
|
16 |
+
CENTER_WEIGHT_ORIGIN_PATH = './center_weight_origin.npy' # Path to center weight origin file
|
17 |
+
DATASET_PATH = 'your_dataset_path' # Base path for the dataset
|
18 |
+
EVAL_LABLE_PATH = os.path.join(DATASET_PATH, "val2017.json") # Path to validation labels JSON file
|
19 |
+
EVAL_IMG_PATH = os.path.join(DATASET_PATH, 'imgs') # Path to validation images
|
20 |
+
|
21 |
+
|
22 |
+
def getDist(pre, labels):
|
23 |
+
"""
|
24 |
+
Calculate the Euclidean distance between predicted and labeled keypoints.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
pre: Predicted keypoints [batchsize, 14]
|
28 |
+
labels: Labeled keypoints [batchsize, 14]
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
dist: Distance between keypoints [batchsize, 7]
|
32 |
+
"""
|
33 |
+
pre = pre.reshape([-1, 17, 2])
|
34 |
+
labels = labels.reshape([-1, 17, 2])
|
35 |
+
res = np.power(pre[:,:,0]-labels[:,:,0],2)+np.power(pre[:,:,1]-labels[:,:,1],2)
|
36 |
+
return res
|
37 |
+
|
38 |
+
|
39 |
+
def getAccRight(dist, th = 5/IMG_SIZE):
|
40 |
+
"""
|
41 |
+
Compute accuracy for each keypoint based on a threshold.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
dist: Distance between keypoints [batchsize, 7]
|
45 |
+
th: Threshold for accuracy computation
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
res: Accuracy per keypoint [7,] representing the count of correct predictions
|
49 |
+
"""
|
50 |
+
res = np.zeros(dist.shape[1], dtype=np.int64)
|
51 |
+
for i in range(dist.shape[1]):
|
52 |
+
res[i] = sum(dist[:,i]<th)
|
53 |
+
return res
|
54 |
+
|
55 |
+
def myAcc(output, target):
|
56 |
+
'''
|
57 |
+
Compute accuracy across keypoints.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
output: Predicted keypoints
|
61 |
+
target: Labeled keypoints
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
cate_acc: Categorical accuracy [7,] representing the count of correct predictions per keypoint
|
65 |
+
'''
|
66 |
+
# [h, ls, rs, lb, rb, lr, rr]
|
67 |
+
# output[:,6:10] = output[:,6:10]+output[:,2:6]
|
68 |
+
# output[:,10:14] = output[:,10:14]+output[:,6:10]
|
69 |
+
# Calculate distance between predicted and labeled keypoints
|
70 |
+
dist = getDist(output, target)
|
71 |
+
# Calculate accuracy for each keypoint
|
72 |
+
cate_acc = getAccRight(dist)
|
73 |
+
return cate_acc
|
74 |
+
|
75 |
+
# Predefined numpy arrays and weights for calculations
|
76 |
+
_range_weight_x = np.array([[x for x in range(FEATURE_MAP_SIZE)] for _ in range(FEATURE_MAP_SIZE)])
|
77 |
+
_range_weight_y = _range_weight_x.T
|
78 |
+
_center_weight = np.load(CENTER_WEIGHT_ORIGIN_PATH).reshape(FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)
|
79 |
+
|
80 |
+
def maxPoint(heatmap, center=True):
|
81 |
+
"""
|
82 |
+
Find the coordinates of maximum values in a heatmap.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
heatmap: Input heatmap data
|
86 |
+
center: Flag to indicate whether to consider center-weighted points
|
87 |
+
|
88 |
+
Returns:
|
89 |
+
x, y: Coordinates of maximum values in the heatmap
|
90 |
+
"""
|
91 |
+
if len(heatmap.shape) == 3:
|
92 |
+
batch_size,h,w = heatmap.shape
|
93 |
+
c = 1
|
94 |
+
elif len(heatmap.shape) == 4:
|
95 |
+
# n,c,h,w
|
96 |
+
batch_size,c,h,w = heatmap.shape
|
97 |
+
if center:
|
98 |
+
heatmap = heatmap*_center_weight
|
99 |
+
heatmap = heatmap.reshape((batch_size,c, -1)) #64,c, cfg['feature_map_size']xcfg['feature_map_size']
|
100 |
+
max_id = np.argmax(heatmap,2)#64,c, 1
|
101 |
+
y = max_id//w
|
102 |
+
x = max_id%w
|
103 |
+
# bv
|
104 |
+
return x,y
|
105 |
+
|
106 |
+
def movenetDecode(data, kps_mask=None,mode='output', num_joints = 17,
|
107 |
+
img_size=192, hm_th=0.1):
|
108 |
+
|
109 |
+
'''
|
110 |
+
Decode MoveNet output data to predicted keypoints.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
data: MoveNet output data
|
114 |
+
kps_mask: Keypoints mask
|
115 |
+
mode: Mode of decoding ('output' or 'label')
|
116 |
+
num_joints: Number of joints/keypoints
|
117 |
+
img_size: Image size
|
118 |
+
hm_th: Threshold for heatmap processing
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
res: Decoded keypoints
|
122 |
+
'''
|
123 |
+
|
124 |
+
##data [64, 7, 48, 48] [64, 1, 48, 48] [64, 14, 48, 48] [64, 14, 48, 48]
|
125 |
+
#kps_mask [n, 7]
|
126 |
+
if mode == 'output':
|
127 |
+
batch_size = data[0].shape[0]
|
128 |
+
heatmaps = data[0]
|
129 |
+
heatmaps[heatmaps < hm_th] = 0
|
130 |
+
centers = data[1]
|
131 |
+
regs = data[2]
|
132 |
+
offsets = data[3]
|
133 |
+
cx,cy = maxPoint(centers)
|
134 |
+
dim0 = np.arange(batch_size,dtype=np.int32).reshape(batch_size,1)
|
135 |
+
dim1 = np.zeros((batch_size,1),dtype=np.int32)
|
136 |
+
res = []
|
137 |
+
for n in range(num_joints):
|
138 |
+
reg_x_origin = (regs[dim0,dim1+n*2,cy,cx]+0.5).astype(np.int32)
|
139 |
+
reg_y_origin = (regs[dim0,dim1+n*2+1,cy,cx]+0.5).astype(np.int32)
|
140 |
+
reg_x = reg_x_origin+cx
|
141 |
+
reg_y = reg_y_origin+cy
|
142 |
+
### for post process
|
143 |
+
reg_x = np.reshape(reg_x, (reg_x.shape[0],1,1))
|
144 |
+
reg_y = np.reshape(reg_y, (reg_y.shape[0],1,1))
|
145 |
+
reg_x = reg_x.repeat(FEATURE_MAP_SIZE,1).repeat(FEATURE_MAP_SIZE,2)
|
146 |
+
reg_y = reg_y.repeat(FEATURE_MAP_SIZE,1).repeat(FEATURE_MAP_SIZE,2)
|
147 |
+
range_weight_x = np.reshape(_range_weight_x,(1,FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)).repeat(reg_x.shape[0],0)
|
148 |
+
range_weight_y = np.reshape(_range_weight_y,(1,FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)).repeat(reg_x.shape[0],0)
|
149 |
+
tmp_reg_x = (range_weight_x-reg_x)**2
|
150 |
+
tmp_reg_y = (range_weight_y-reg_y)**2
|
151 |
+
tmp_reg = (tmp_reg_x+tmp_reg_y)**0.5+1.8#origin 1.8
|
152 |
+
tmp_reg = heatmaps[:,n,...]/tmp_reg
|
153 |
+
tmp_reg = tmp_reg[:,np.newaxis,:,:]
|
154 |
+
reg_x,reg_y = maxPoint(tmp_reg, center=False)
|
155 |
+
reg_x[reg_x>47] = 47
|
156 |
+
reg_x[reg_x<0] = 0
|
157 |
+
reg_y[reg_y>47] = 47
|
158 |
+
reg_y[reg_y<0] = 0
|
159 |
+
score = heatmaps[dim0,dim1+n,reg_y,reg_x]
|
160 |
+
offset_x = offsets[dim0,dim1+n*2,reg_y,reg_x]#*img_size//4
|
161 |
+
offset_y = offsets[dim0,dim1+n*2+1,reg_y,reg_x]#*img_size//4
|
162 |
+
res_x = (reg_x+offset_x)/(img_size//4)
|
163 |
+
res_y = (reg_y+offset_y)/(img_size//4)
|
164 |
+
res_x[score<hm_th] = -1
|
165 |
+
res_y[score<hm_th] = -1
|
166 |
+
res.extend([res_x, res_y])
|
167 |
+
res = np.concatenate(res,axis=1) #bs*14
|
168 |
+
elif mode == 'label':
|
169 |
+
kps_mask = kps_mask.detach().cpu().numpy()
|
170 |
+
data = data.detach().cpu().numpy()
|
171 |
+
batch_size = data.shape[0]
|
172 |
+
heatmaps = data[:,:17,:,:]
|
173 |
+
centers = data[:,17:18,:,:]
|
174 |
+
regs = data[:,18:52,:,:]
|
175 |
+
offsets = data[:,52:,:,:]
|
176 |
+
cx,cy = maxPoint(centers)
|
177 |
+
dim0 = np.arange(batch_size,dtype=np.int32).reshape(batch_size,1)
|
178 |
+
dim1 = np.zeros((batch_size,1),dtype=np.int32)
|
179 |
+
res = []
|
180 |
+
for n in range(num_joints):
|
181 |
+
reg_x_origin = (regs[dim0,dim1+n*2,cy,cx]+0.5).astype(np.int32)
|
182 |
+
reg_y_origin = (regs[dim0,dim1+n*2+1,cy,cx]+0.5).astype(np.int32)
|
183 |
+
reg_x = reg_x_origin+cx
|
184 |
+
reg_y = reg_y_origin+cy
|
185 |
+
reg_x[reg_x>47] = 47
|
186 |
+
reg_x[reg_x<0] = 0
|
187 |
+
reg_y[reg_y>47] = 47
|
188 |
+
reg_y[reg_y<0] = 0
|
189 |
+
offset_x = offsets[dim0,dim1+n*2,reg_y,reg_x]#*img_size//4
|
190 |
+
offset_y = offsets[dim0,dim1+n*2+1,reg_y,reg_x]#*img_size//4
|
191 |
+
res_x = (reg_x+offset_x)/(img_size//4)
|
192 |
+
res_y = (reg_y+offset_y)/(img_size//4)
|
193 |
+
res_x[kps_mask[:,n]==0] = -1
|
194 |
+
res_y[kps_mask[:,n]==0] = -1
|
195 |
+
res.extend([res_x, res_y])
|
196 |
+
res = np.concatenate(res,axis=1) #bs*14
|
197 |
+
return res
|
198 |
+
|
199 |
+
def label2heatmap(keypoints, other_keypoints, img_size):
|
200 |
+
'''
|
201 |
+
Convert labeled keypoints to heatmaps for keypoints.
|
202 |
+
|
203 |
+
Args:
|
204 |
+
keypoints: Target person's keypoints
|
205 |
+
other_keypoints: Other people's keypoints
|
206 |
+
img_size: Size of the image
|
207 |
+
|
208 |
+
Returns:
|
209 |
+
heatmaps: Heatmaps for keypoints
|
210 |
+
sigma: Value used for heatmap generation
|
211 |
+
'''
|
212 |
+
#keypoints: target person
|
213 |
+
#other_keypoints: other people's keypoints need to be add to the heatmap
|
214 |
+
heatmaps = []
|
215 |
+
keypoints_range = np.reshape(keypoints,(-1,3))
|
216 |
+
keypoints_range = keypoints_range[keypoints_range[:,2]>0]
|
217 |
+
min_x = np.min(keypoints_range[:,0])
|
218 |
+
min_y = np.min(keypoints_range[:,1])
|
219 |
+
max_x = np.max(keypoints_range[:,0])
|
220 |
+
max_y = np.max(keypoints_range[:,1])
|
221 |
+
area = (max_y-min_y)*(max_x-min_x)
|
222 |
+
sigma = 3
|
223 |
+
if area < 0.16:
|
224 |
+
sigma = 3
|
225 |
+
elif area < 0.3:
|
226 |
+
sigma = 5
|
227 |
+
else:
|
228 |
+
sigma = 7
|
229 |
+
for i in range(0,len(keypoints),3):
|
230 |
+
if keypoints[i+2]==0:
|
231 |
+
heatmaps.append(np.zeros((img_size//4, img_size//4)))
|
232 |
+
continue
|
233 |
+
x = int(keypoints[i]*img_size//4)
|
234 |
+
y = int(keypoints[i+1]*img_size//4)
|
235 |
+
if x==img_size//4:x=(img_size//4-1)
|
236 |
+
if y==img_size//4:y=(img_size//4-1)
|
237 |
+
if x>img_size//4 or x<0:x=-1
|
238 |
+
if y>img_size//4 or y<0:y=-1
|
239 |
+
heatmap = generate_heatmap(x, y, other_keypoints[i//3], (img_size//4, img_size//4),sigma)
|
240 |
+
heatmaps.append(heatmap)
|
241 |
+
heatmaps = np.array(heatmaps, dtype=np.float32)
|
242 |
+
return heatmaps,sigma
|
243 |
+
|
244 |
+
def generate_heatmap(x, y, other_keypoints, size, sigma):
|
245 |
+
'''
|
246 |
+
Generate a heatmap for a specific keypoint.
|
247 |
+
|
248 |
+
Args:
|
249 |
+
x, y: Absolute position of the keypoint
|
250 |
+
other_keypoints: Position of other keypoints
|
251 |
+
size: Size of the heatmap
|
252 |
+
sigma: Value used for heatmap generation
|
253 |
+
|
254 |
+
Returns:
|
255 |
+
heatmap: Generated heatmap for the keypoint
|
256 |
+
'''
|
257 |
+
#x,y abs postion
|
258 |
+
#other_keypoints positive position
|
259 |
+
sigma+=6
|
260 |
+
heatmap = np.zeros(size)
|
261 |
+
if x<0 or y<0 or x>=size[0] or y>=size[1]:
|
262 |
+
return heatmap
|
263 |
+
tops = [[x,y]]
|
264 |
+
if len(other_keypoints)>0:
|
265 |
+
#add other people's keypoints
|
266 |
+
for i in range(len(other_keypoints)):
|
267 |
+
x = int(other_keypoints[i][0]*size[0])
|
268 |
+
y = int(other_keypoints[i][1]*size[1])
|
269 |
+
if x==size[0]:x=(size[0]-1)
|
270 |
+
if y==size[1]:y=(size[1]-1)
|
271 |
+
if x>size[0] or x<0 or y>size[1] or y<0: continue
|
272 |
+
tops.append([x,y])
|
273 |
+
for top in tops:
|
274 |
+
#heatmap[top[1]][top[0]] = 1
|
275 |
+
x,y = top
|
276 |
+
x0 = max(0,x-sigma//2)
|
277 |
+
x1 = min(size[0],x+sigma//2)
|
278 |
+
y0 = max(0,y-sigma//2)
|
279 |
+
y1 = min(size[1],y+sigma//2)
|
280 |
+
for map_y in range(y0, y1):
|
281 |
+
for map_x in range(x0, x1):
|
282 |
+
d2 = ((map_x - x) ** 2 + (map_y - y) ** 2)**0.5
|
283 |
+
if d2<=sigma//2:
|
284 |
+
heatmap[map_y, map_x] += math.exp(-d2/(sigma//2)*3)
|
285 |
+
if heatmap[map_y, map_x] > 1:
|
286 |
+
heatmap[map_y, map_x] = 1
|
287 |
+
# heatmap[heatmap<0.1] = 0
|
288 |
+
return heatmap
|
289 |
+
|
290 |
+
def label2center(cx, cy, other_centers, img_size, sigma):
|
291 |
+
'''
|
292 |
+
Convert labeled keypoints to a center heatmap.
|
293 |
+
|
294 |
+
Args:
|
295 |
+
cx, cy: Center coordinates
|
296 |
+
other_centers: Other people's centers
|
297 |
+
img_size: Size of the image
|
298 |
+
sigma: Value used for heatmap generation
|
299 |
+
|
300 |
+
Returns:
|
301 |
+
heatmaps: Heatmap representing the center
|
302 |
+
'''
|
303 |
+
heatmaps = []
|
304 |
+
heatmap = generate_heatmap(cx, cy, other_centers, (img_size//4, img_size//4),sigma+2)
|
305 |
+
heatmaps.append(heatmap)
|
306 |
+
heatmaps = np.array(heatmaps, dtype=np.float32)
|
307 |
+
return heatmaps
|
308 |
+
|
309 |
+
def label2reg(keypoints, cx, cy, img_size):
|
310 |
+
'''
|
311 |
+
Convert labeled keypoints to regression maps.
|
312 |
+
|
313 |
+
Args:
|
314 |
+
keypoints: Labeled keypoints
|
315 |
+
cx, cy: Center coordinates
|
316 |
+
img_size: Size of the image
|
317 |
+
|
318 |
+
Returns:
|
319 |
+
heatmaps: Regression maps for keypoints
|
320 |
+
'''
|
321 |
+
|
322 |
+
heatmaps = np.zeros((len(keypoints)//3*2, img_size//4, img_size//4), dtype=np.float32)
|
323 |
+
for i in range(len(keypoints)//3):
|
324 |
+
if keypoints[i*3+2]==0:
|
325 |
+
continue
|
326 |
+
x = keypoints[i*3]*img_size//4
|
327 |
+
y = keypoints[i*3+1]*img_size//4
|
328 |
+
if x==img_size//4:x=(img_size//4-1)
|
329 |
+
if y==img_size//4:y=(img_size//4-1)
|
330 |
+
if x>img_size//4 or x<0 or y>img_size//4 or y<0:
|
331 |
+
continue
|
332 |
+
reg_x = x-cx
|
333 |
+
reg_y = y-cy
|
334 |
+
for j in range(cy-2,cy+3):
|
335 |
+
if j<0 or j>img_size//4-1:
|
336 |
+
continue
|
337 |
+
for k in range(cx-2,cx+3):
|
338 |
+
if k<0 or k>img_size//4-1:
|
339 |
+
continue
|
340 |
+
if cx<img_size//4/2-1:
|
341 |
+
heatmaps[i*2][j][k] = reg_x-(cx-k)#/(img_size//4)
|
342 |
+
else:
|
343 |
+
heatmaps[i*2][j][k] = reg_x+(cx-k)#/(img_size//4)
|
344 |
+
if cy<img_size//4/2-1:
|
345 |
+
heatmaps[i*2+1][j][k] = reg_y-(cy-j)#/(img_size//4)
|
346 |
+
else:
|
347 |
+
heatmaps[i*2+1][j][k] = reg_y+(cy-j)
|
348 |
+
return heatmaps
|
349 |
+
|
350 |
+
def label2offset(keypoints, cx, cy, regs, img_size):
|
351 |
+
'''
|
352 |
+
Convert labeled keypoints to offset maps.
|
353 |
+
|
354 |
+
Args:
|
355 |
+
keypoints: Labeled keypoints
|
356 |
+
cx, cy: Center coordinates
|
357 |
+
regs: Regression maps
|
358 |
+
img_size: Size of the image
|
359 |
+
|
360 |
+
Returns:
|
361 |
+
heatmaps: Offset maps for keypoints
|
362 |
+
'''
|
363 |
+
heatmaps = np.zeros((len(keypoints)//3*2, img_size//4, img_size//4), dtype=np.float32)
|
364 |
+
for i in range(len(keypoints)//3):
|
365 |
+
if keypoints[i*3+2]==0:
|
366 |
+
continue
|
367 |
+
large_x = int(keypoints[i*3]*img_size)
|
368 |
+
large_y = int(keypoints[i*3+1]*img_size)
|
369 |
+
small_x = int(regs[i*2,cy,cx]+cx)
|
370 |
+
small_y = int(regs[i*2+1,cy,cx]+cy)
|
371 |
+
offset_x = large_x/4-small_x
|
372 |
+
offset_y = large_y/4-small_y
|
373 |
+
if small_x==img_size//4:small_x=(img_size//4-1)
|
374 |
+
if small_y==img_size//4:small_y=(img_size//4-1)
|
375 |
+
if small_x>img_size//4 or small_x<0 or small_y>img_size//4 or small_y<0:
|
376 |
+
continue
|
377 |
+
heatmaps[i*2][small_y][small_x] = offset_x#/(img_size//4)
|
378 |
+
heatmaps[i*2+1][small_y][small_x] = offset_y#/(img_size//4)
|
379 |
+
return heatmaps
|
380 |
+
|
381 |
+
class TensorDataset(Dataset):
|
382 |
+
'''
|
383 |
+
Custom Dataset class for handling data loading and preprocessing
|
384 |
+
'''
|
385 |
+
|
386 |
+
def __init__(self, data_labels, img_dir, img_size, data_aug=None):
|
387 |
+
self.data_labels = data_labels
|
388 |
+
self.img_dir = img_dir
|
389 |
+
self.data_aug = data_aug
|
390 |
+
self.img_size = img_size
|
391 |
+
self.interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA,
|
392 |
+
cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
|
393 |
+
|
394 |
+
|
395 |
+
def __getitem__(self, index):
|
396 |
+
item = self.data_labels[index]
|
397 |
+
"""
|
398 |
+
item = {
|
399 |
+
"img_name":save_name,
|
400 |
+
"keypoints":save_keypoints,
|
401 |
+
"center":save_center,
|
402 |
+
"other_centers":other_centers,
|
403 |
+
"other_keypoints":other_keypoints,
|
404 |
+
}
|
405 |
+
"""
|
406 |
+
# [name,h,w,keypoints...]
|
407 |
+
img_path = os.path.join(self.img_dir, item["img_name"])
|
408 |
+
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
|
409 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
410 |
+
img = cv2.resize(img, (self.img_size, self.img_size),
|
411 |
+
interpolation=random.choice(self.interp_methods))
|
412 |
+
#### Data Augmentation
|
413 |
+
if self.data_aug is not None:
|
414 |
+
img, item = self.data_aug(img, item)
|
415 |
+
img = img.astype(np.float32)
|
416 |
+
img = np.transpose(img,axes=[2,0,1])
|
417 |
+
keypoints = item["keypoints"]
|
418 |
+
center = item['center']
|
419 |
+
other_centers = item["other_centers"]
|
420 |
+
other_keypoints = item["other_keypoints"]
|
421 |
+
kps_mask = np.ones(len(keypoints)//3)
|
422 |
+
for i in range(len(keypoints)//3):
|
423 |
+
if keypoints[i*3+2]==0:
|
424 |
+
kps_mask[i] = 0
|
425 |
+
heatmaps,sigma = label2heatmap(keypoints, other_keypoints, self.img_size) #(17, 48, 48)
|
426 |
+
cx = min(max(0,int(center[0]*self.img_size//4)),self.img_size//4-1)
|
427 |
+
cy = min(max(0,int(center[1]*self.img_size//4)),self.img_size//4-1)
|
428 |
+
centers = label2center(cx, cy, other_centers, self.img_size, sigma) #(1, 48, 48)
|
429 |
+
regs = label2reg(keypoints, cx, cy, self.img_size) #(14, 48, 48)
|
430 |
+
offsets = label2offset(keypoints, cx, cy, regs, self.img_size)#(14, 48, 48)
|
431 |
+
labels = np.concatenate([heatmaps,centers,regs,offsets],axis=0)
|
432 |
+
img = img / 127.5 - 1.0
|
433 |
+
return img, labels, kps_mask, img_path
|
434 |
+
|
435 |
+
def __len__(self):
|
436 |
+
return len(self.data_labels)
|
437 |
+
|
438 |
+
# Function to get data loader based on mode (e.g., evaluation)
|
439 |
+
def getDataLoader(mode, input_data):
|
440 |
+
'''
|
441 |
+
Function to get data loader based on mode (e.g., evaluation).
|
442 |
+
|
443 |
+
Args:
|
444 |
+
mode: Mode of data loader (e.g., 'eval')
|
445 |
+
input_data: Input data
|
446 |
+
|
447 |
+
Returns:
|
448 |
+
data_loader: DataLoader for specified mode
|
449 |
+
'''
|
450 |
+
|
451 |
+
if mode=="eval":
|
452 |
+
val_loader = torch.utils.data.DataLoader(
|
453 |
+
TensorDataset(input_data[0],
|
454 |
+
EVAL_IMG_PATH,
|
455 |
+
IMG_SIZE,
|
456 |
+
),
|
457 |
+
batch_size=1,
|
458 |
+
shuffle=False,
|
459 |
+
num_workers=0,
|
460 |
+
pin_memory=False)
|
461 |
+
return val_loader
|
462 |
+
|
463 |
+
# Class for managing data and obtaining evaluation data loader
|
464 |
+
class Data():
|
465 |
+
'''
|
466 |
+
Class for managing data and obtaining evaluation data loader.
|
467 |
+
'''
|
468 |
+
def __init__(self):
|
469 |
+
pass
|
470 |
+
|
471 |
+
def getEvalDataloader(self):
|
472 |
+
with open(EVAL_LABLE_PATH, 'r') as f:
|
473 |
+
data_label_list = json.loads(f.readlines()[0])
|
474 |
+
print("[INFO] Total images: ", len(data_label_list))
|
475 |
+
input_data = [data_label_list]
|
476 |
+
data_loader = getDataLoader("eval",
|
477 |
+
input_data)
|
478 |
+
return data_loader
|
479 |
+
|
480 |
+
# Configs for onnx inference session
|
481 |
+
def make_parser():
|
482 |
+
'''
|
483 |
+
Create parser for MoveNet ONNX runtime inference.
|
484 |
+
|
485 |
+
Returns:
|
486 |
+
parser: Argument parser for MoveNet inference
|
487 |
+
'''
|
488 |
+
parser = argparse.ArgumentParser("movenet onnxruntime inference")
|
489 |
+
parser.add_argument(
|
490 |
+
"--ipu",
|
491 |
+
action="store_true",
|
492 |
+
help="Use IPU for inference.",
|
493 |
+
)
|
494 |
+
parser.add_argument(
|
495 |
+
"--provider_config",
|
496 |
+
type=str,
|
497 |
+
default="vaip_config.json",
|
498 |
+
help="Path of the config file for seting provider_options.",
|
499 |
+
)
|
500 |
+
return parser.parse_args()
|
501 |
+
|
502 |
+
if __name__ == '__main__':
|
503 |
+
|
504 |
+
args = make_parser()
|
505 |
+
|
506 |
+
if args.ipu:
|
507 |
+
providers = ["VitisAIExecutionProvider"]
|
508 |
+
provider_options = [{"config_file": args.provider_config}]
|
509 |
+
else:
|
510 |
+
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
511 |
+
provider_options = None
|
512 |
+
# Get evaluation data loader using the Data class
|
513 |
+
data = Data()
|
514 |
+
data_loader = data.getEvalDataloader()
|
515 |
+
# Load MoveNet model using ONNX runtime
|
516 |
+
model = rt.InferenceSession(MODEL_DIR, providers=providers, provider_options=provider_options)
|
517 |
+
|
518 |
+
correct = 0
|
519 |
+
total = 0
|
520 |
+
# Loop through the data loader for evaluation
|
521 |
+
for batch_idx, (imgs, labels, kps_mask, img_names) in enumerate(data_loader):
|
522 |
+
if batch_idx%100 == 0:
|
523 |
+
print('Finish ',batch_idx)
|
524 |
+
imgs = imgs.detach().cpu().numpy()
|
525 |
+
output = model.run(['1548','1607','1665','1723'],{'blob.1':imgs})
|
526 |
+
pre = movenetDecode(output, kps_mask,mode='output',img_size=IMG_SIZE)
|
527 |
+
gt = movenetDecode(labels, kps_mask,mode='label',img_size=IMG_SIZE)
|
528 |
+
acc = myAcc(pre, gt)
|
529 |
+
correct += sum(acc)
|
530 |
+
total += len(acc)
|
531 |
+
# Compute and print accuracy based on evaluated data
|
532 |
+
acc = correct/total
|
533 |
+
print('[Info] acc: {:.3f}% \n'.format(100. * acc))
|
make_coco_data_17keypoints.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
@Fire
|
3 |
+
https://github.com/fire717
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import pickle
|
8 |
+
import cv2
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
"""
|
15 |
+
segmentation格式取决于这个实例是一个单个的对象(即iscrowd=0,将使用polygons格式)
|
16 |
+
还是一组对象(即iscrowd=1,将使用RLE格式
|
17 |
+
|
18 |
+
iscrowd=1时(将标注一组对象,比如一群人)
|
19 |
+
|
20 |
+
|
21 |
+
标注说明:x,y,v,x,y,v,...
|
22 |
+
其中v:#0没有标注;1有标注不可见(被遮挡);2有标注可见
|
23 |
+
|
24 |
+
关键点顺序:'keypoints': ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
|
25 |
+
'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
|
26 |
+
'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
|
27 |
+
'right_ankle']
|
28 |
+
|
29 |
+
"""
|
30 |
+
|
31 |
+
|
32 |
+
def main(img_dir, labels_path, output_name, output_img_dir):
|
33 |
+
|
34 |
+
if not os.path.exists(output_img_dir):
|
35 |
+
os.makedirs(output_img_dir)
|
36 |
+
|
37 |
+
|
38 |
+
with open(labels_path, 'r') as f:
|
39 |
+
data = json.load(f)
|
40 |
+
|
41 |
+
#print("total: ", len(data)) 5
|
42 |
+
#print(data.keys())#['info', 'licenses', 'images', 'annotations', 'categories']
|
43 |
+
#print(len(data['annotations']), len(data['images']))#88153 40504
|
44 |
+
#print(data['categories'])
|
45 |
+
"""
|
46 |
+
[{'supercategory': 'person', 'name': 'person',
|
47 |
+
'skeleton': [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
|
48 |
+
[6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
|
49 |
+
4, 6], [5, 7]],
|
50 |
+
'keypoints': ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
|
51 |
+
'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
|
52 |
+
'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
|
53 |
+
'right_ankle'], 'id': 1}]
|
54 |
+
"""
|
55 |
+
#print(data['images'][:3])#有filename和id
|
56 |
+
|
57 |
+
img_id_to_name = {}
|
58 |
+
img_name_to_id = {}
|
59 |
+
for item in data['images']:
|
60 |
+
idx = item['id']
|
61 |
+
name = item['file_name']
|
62 |
+
img_id_to_name[idx] = name
|
63 |
+
img_name_to_id[name] = idx
|
64 |
+
print(len(img_id_to_name))
|
65 |
+
|
66 |
+
|
67 |
+
anno_by_imgname = {}
|
68 |
+
for annotation in data['annotations']:
|
69 |
+
name = img_id_to_name[annotation['image_id']]
|
70 |
+
if name in anno_by_imgname:
|
71 |
+
anno_by_imgname[name] += [annotation]
|
72 |
+
else:
|
73 |
+
anno_by_imgname[name] = [annotation]
|
74 |
+
print(len(anno_by_imgname))
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
new_label = []
|
79 |
+
for k,v in anno_by_imgname.items():
|
80 |
+
#filter out more than 3 people
|
81 |
+
if len(v)>3:
|
82 |
+
continue
|
83 |
+
|
84 |
+
# print(k)
|
85 |
+
# print(v)
|
86 |
+
|
87 |
+
img = cv2.imread(os.path.join(img_dir, k))
|
88 |
+
if img is None:
|
89 |
+
print(os.path.join(img_dir, k))
|
90 |
+
continue
|
91 |
+
h,w = img.shape[:2]
|
92 |
+
for idx,item in enumerate(v):
|
93 |
+
if item['iscrowd'] != 0:
|
94 |
+
continue
|
95 |
+
|
96 |
+
bbox = [int(x) for x in item['bbox']]#x,y,w,h
|
97 |
+
# cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0,255,0), 2)
|
98 |
+
|
99 |
+
keypoints = item['keypoints']
|
100 |
+
|
101 |
+
# for i in range(len(keypoints)//3):
|
102 |
+
# x = keypoints[i*3]
|
103 |
+
# y = keypoints[i*3+1]
|
104 |
+
# z = keypoints[i*3+2]#0没有标注;1有标注不可见(被遮挡);2有标注可见
|
105 |
+
# # print(x,y,z)
|
106 |
+
# if z==1:
|
107 |
+
# color = (255,0,0)
|
108 |
+
# elif z==2:
|
109 |
+
# color = (0,0,255)
|
110 |
+
# else:
|
111 |
+
# continue
|
112 |
+
# cv2.circle(img, (x, y), 4, color, 3)
|
113 |
+
|
114 |
+
|
115 |
+
# merge bbox and keypoints to get max bbox
|
116 |
+
keypoints = np.array(keypoints).reshape((17,3))
|
117 |
+
|
118 |
+
keypoints_v = keypoints[keypoints[:,2]>0]
|
119 |
+
if len(keypoints_v)<8:#filter out keypoints not enough
|
120 |
+
continue
|
121 |
+
min_key_x = np.min(keypoints_v[:,0])
|
122 |
+
max_key_x = np.max(keypoints_v[:,0])
|
123 |
+
min_key_y = np.min(keypoints_v[:,1])
|
124 |
+
max_key_y = np.max(keypoints_v[:,1])
|
125 |
+
|
126 |
+
x0 = min(bbox[0], min_key_x)
|
127 |
+
x1 = max(bbox[0]+bbox[2], max_key_x)
|
128 |
+
y0 = min(bbox[1], min_key_y)
|
129 |
+
y1 = max(bbox[1]+bbox[3], max_key_y)
|
130 |
+
# cv2.rectangle(img, (x0, y0), (x1, y1), (0,255,255), 2)
|
131 |
+
|
132 |
+
# expand to square then expand
|
133 |
+
cx = (x0+x1)/2
|
134 |
+
cy = (y0+y1)/2
|
135 |
+
|
136 |
+
half_size = ((x1-x0)+(y1-y0))/2 * EXPAND_RATIO
|
137 |
+
new_x0 = int(cx - half_size)
|
138 |
+
new_x1 = int(cx + half_size)
|
139 |
+
new_y0 = int(cy - half_size)
|
140 |
+
new_y1 = int(cy + half_size)
|
141 |
+
|
142 |
+
#pad where exceed edge
|
143 |
+
pad_top = 0
|
144 |
+
pad_left = 0
|
145 |
+
pad_right = 0
|
146 |
+
pad_bottom = 0
|
147 |
+
if new_x0 < 0:
|
148 |
+
pad_left = -new_x0+1
|
149 |
+
if new_y0 < 0:
|
150 |
+
pad_top = -new_y0+1
|
151 |
+
if new_x1 > w:
|
152 |
+
pad_right = new_x1-w+1
|
153 |
+
if new_y1 > h:
|
154 |
+
pad_bottom = new_y1-h+1
|
155 |
+
|
156 |
+
pad_img = np.zeros((h+pad_top+pad_bottom, w+pad_left+pad_right, 3))
|
157 |
+
pad_img[pad_top:pad_top+h,pad_left:pad_left+w] = img
|
158 |
+
new_x0 += pad_left
|
159 |
+
new_y0 += pad_top
|
160 |
+
new_x1 += pad_left
|
161 |
+
new_y1 += pad_top
|
162 |
+
# cv2.rectangle(pad_img, (new_x0, new_y0), (new_x1, new_y1), (0,255,0), 2)
|
163 |
+
|
164 |
+
# final save data
|
165 |
+
save_name = k[:-4]+"_"+str(idx)+".jpg"
|
166 |
+
new_w = new_x1-new_x0
|
167 |
+
new_h = new_y1-new_y0
|
168 |
+
save_img = pad_img[new_y0:new_y1,new_x0:new_x1]
|
169 |
+
save_bbox = [(bbox[0]+pad_left-new_x0)/new_w,
|
170 |
+
(bbox[1]+pad_top-new_y0)/new_h,
|
171 |
+
(bbox[0]+bbox[2]+pad_left-new_x0)/new_w,
|
172 |
+
(bbox[1]+bbox[3]+pad_top-new_y0)/new_h
|
173 |
+
]
|
174 |
+
save_center = [(save_bbox[0]+save_bbox[2])/2,(save_bbox[1]+save_bbox[3])/2]
|
175 |
+
|
176 |
+
save_keypoints = []
|
177 |
+
for kid in range(len(keypoints)):
|
178 |
+
save_keypoints.extend([(int(keypoints[kid][0])+pad_left-new_x0)/new_w,
|
179 |
+
(int(keypoints[kid][1])+pad_top-new_y0)/new_h,
|
180 |
+
int(keypoints[kid][2])
|
181 |
+
])
|
182 |
+
other_centers = []
|
183 |
+
other_keypoints = [[] for _ in range(17)]
|
184 |
+
for idx2,item2 in enumerate(v):
|
185 |
+
if item2['iscrowd'] != 0 or idx2==idx:
|
186 |
+
continue
|
187 |
+
bbox2 = [int(x) for x in item2['bbox']]#x,y,w,h
|
188 |
+
|
189 |
+
save_bbox2 = [(bbox2[0]+pad_left-new_x0)/new_w,
|
190 |
+
(bbox2[1]+pad_top-new_y0)/new_h,
|
191 |
+
(bbox2[0]+bbox2[2]+pad_left-new_x0)/new_w,
|
192 |
+
(bbox2[1]+bbox2[3]+pad_top-new_y0)/new_h
|
193 |
+
]
|
194 |
+
save_center2 = [(save_bbox2[0]+save_bbox2[2])/2,
|
195 |
+
(save_bbox2[1]+save_bbox2[3])/2]
|
196 |
+
if save_center2[0]>0 and save_center2[0]<1 and save_center2[1]>0 and save_center2[1]<1:
|
197 |
+
other_centers.append(save_center2)
|
198 |
+
|
199 |
+
keypoints2 = item2['keypoints']
|
200 |
+
keypoints2 = np.array(keypoints2).reshape((17,3))
|
201 |
+
for kid2 in range(17):
|
202 |
+
if keypoints2[kid2][2]==0:
|
203 |
+
continue
|
204 |
+
kx = (keypoints2[kid2][0]+pad_left-new_x0)/new_w
|
205 |
+
ky = (keypoints2[kid2][1]+pad_top-new_y0)/new_h
|
206 |
+
if kx>0 and kx<1 and ky>0 and ky<1:
|
207 |
+
other_keypoints[kid2].append([kx,ky])
|
208 |
+
|
209 |
+
save_item = {
|
210 |
+
"img_name":save_name,
|
211 |
+
"keypoints":save_keypoints,
|
212 |
+
"center":save_center,
|
213 |
+
"bbox":save_bbox,
|
214 |
+
"other_centers":other_centers,
|
215 |
+
"other_keypoints":other_keypoints,
|
216 |
+
}
|
217 |
+
# for k,v in save_item.items():
|
218 |
+
# print(type(v[0]))
|
219 |
+
# b
|
220 |
+
new_label.append(save_item)
|
221 |
+
|
222 |
+
|
223 |
+
|
224 |
+
###visul for exam, comment when use
|
225 |
+
if SHOW_POINTS_ON_IMG:
|
226 |
+
cv2.circle(save_img, (int(save_center[0]*new_w), int(save_center[1]*new_h)), 4, (0,255,0), 3)
|
227 |
+
for show_kid in range(len(save_keypoints)//3):
|
228 |
+
if save_keypoints[show_kid*3+2]==1:
|
229 |
+
color = (255,0,0)
|
230 |
+
elif save_keypoints[show_kid*3+2]==2:
|
231 |
+
color = (0,0,255)
|
232 |
+
else:
|
233 |
+
continue
|
234 |
+
cv2.circle(save_img, (int(save_keypoints[show_kid*3]*new_w),
|
235 |
+
int(save_keypoints[show_kid*3+1]*new_h)), 3, color, 2)
|
236 |
+
cv2.rectangle(save_img, (int(save_bbox[0]*new_w), int(save_bbox[1]*new_h)),
|
237 |
+
(int(save_bbox[2]*new_w), int(save_bbox[3]*new_h)), (0,255,0), 2)
|
238 |
+
for show_c in other_centers:
|
239 |
+
cv2.circle(save_img, (int(show_c[0]*new_w), int(show_c[1]*new_h)), 4, (0,255,255), 3)
|
240 |
+
for show_ks in other_keypoints:
|
241 |
+
for show_k in show_ks:
|
242 |
+
cv2.circle(save_img, (int(show_k[0]*new_w), int(show_k[1]*new_h)), 3, (255,255,0), 2)
|
243 |
+
|
244 |
+
|
245 |
+
cv2.imwrite(os.path.join(output_img_dir, save_name), save_img)
|
246 |
+
|
247 |
+
# print(save_item, save_img.shape)
|
248 |
+
|
249 |
+
# b
|
250 |
+
# cv2.imwrite(os.path.join("show", k), pad_img)
|
251 |
+
|
252 |
+
with open(output_name,'w') as f:
|
253 |
+
json.dump(new_label, f, ensure_ascii=False)
|
254 |
+
print('Total write ', len(new_label))
|
255 |
+
|
256 |
+
|
257 |
+
if __name__ == '__main__':
|
258 |
+
|
259 |
+
#### PARAM ####
|
260 |
+
|
261 |
+
SHOW_POINTS_ON_IMG = False
|
262 |
+
#whether to show points on img for debug
|
263 |
+
|
264 |
+
EXPAND_RATIO = 1.
|
265 |
+
#person body bbox expand range to image edge
|
266 |
+
|
267 |
+
|
268 |
+
output_img_dir = "./data/croped/imgs"
|
269 |
+
img_dir = "./data/val2017"
|
270 |
+
labels_path = "./data/annotations/person_keypoints_val2017.json"
|
271 |
+
output_name = "./data/croped/val2017.json"
|
272 |
+
main(img_dir, labels_path, output_name, output_img_dir)
|
273 |
+
img_dir = "./data/train2017"
|
274 |
+
labels_path = "./data/annotations/person_keypoints_train2017.json"
|
275 |
+
output_name = "./data/croped/train2017.json"
|
276 |
+
main(img_dir, labels_path, output_name, output_img_dir)
|
277 |
+
|
movenet_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e3243a492e8886d29d9dcdd2831da3c69eb18bc76185f2db565cfe8ddcd58d8
|
3 |
+
size 7681846
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
numpy
|
3 |
+
opencv-python
|
4 |
+
# onnxruntime
|