Spaces:
Running
on
Zero
Running
on
Zero
Update
Browse files- .gitattributes +1 -0
- app.py +79 -17
- pyproject.toml +1 -0
- requirements.txt +3 -0
- uv.lock +19 -0
- videos/README.md +6 -0
- videos/pexels-allan-mas-5362370.mp4 +3 -0
- videos/pexels-artem-podrez-6003986.mp4 +3 -0
- videos/pexels-c-technical-6344381.mp4 +3 -0
- videos/pexels-roman-odintsov-6815069.mp4 +3 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.jpg filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -7,16 +7,22 @@ https://colab.research.google.com/drive/1e8fcby5rhKZWcr9LSN8mNbQ0TU4Dxxpo
|
|
7 |
"""
|
8 |
|
9 |
import pathlib
|
|
|
10 |
|
|
|
11 |
import gradio as gr
|
|
|
12 |
import PIL.Image
|
13 |
import spaces
|
14 |
import supervision as sv
|
15 |
import torch
|
|
|
16 |
from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation
|
17 |
|
18 |
DESCRIPTION = "# ViTPose"
|
19 |
|
|
|
|
|
20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
person_detector_name = "PekingU/rtdetr_r50vd_coco_o365"
|
@@ -30,7 +36,7 @@ pose_model = VitPoseForPoseEstimation.from_pretrained(pose_model_name, device_ma
|
|
30 |
|
31 |
@spaces.GPU
|
32 |
@torch.inference_mode()
|
33 |
-
def
|
34 |
inputs = person_image_processor(images=image, return_tensors="pt").to(device)
|
35 |
outputs = person_model(**inputs)
|
36 |
results = person_image_processor.post_process_object_detection(
|
@@ -101,26 +107,82 @@ def run(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
|
|
101 |
return vertex_annotator.annotate(scene=annotated_frame, key_points=keypoints), human_readable_results
|
102 |
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
|
107 |
with gr.Blocks(css_paths="style.css") as demo:
|
108 |
gr.Markdown(DESCRIPTION)
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
|
126 |
if __name__ == "__main__":
|
|
|
7 |
"""
|
8 |
|
9 |
import pathlib
|
10 |
+
import tempfile
|
11 |
|
12 |
+
import cv2
|
13 |
import gradio as gr
|
14 |
+
import numpy as np
|
15 |
import PIL.Image
|
16 |
import spaces
|
17 |
import supervision as sv
|
18 |
import torch
|
19 |
+
import tqdm
|
20 |
from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation
|
21 |
|
22 |
DESCRIPTION = "# ViTPose"
|
23 |
|
24 |
+
MAX_NUM_FRAMES = 300
|
25 |
+
|
26 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
27 |
|
28 |
person_detector_name = "PekingU/rtdetr_r50vd_coco_o365"
|
|
|
36 |
|
37 |
@spaces.GPU
|
38 |
@torch.inference_mode()
|
39 |
+
def process_image(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
|
40 |
inputs = person_image_processor(images=image, return_tensors="pt").to(device)
|
41 |
outputs = person_model(**inputs)
|
42 |
results = person_image_processor.post_process_object_detection(
|
|
|
107 |
return vertex_annotator.annotate(scene=annotated_frame, key_points=keypoints), human_readable_results
|
108 |
|
109 |
|
110 |
+
def process_video(
|
111 |
+
video_path: str,
|
112 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
|
113 |
+
) -> str:
|
114 |
+
cap = cv2.VideoCapture(video_path)
|
115 |
+
|
116 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
117 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
118 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
119 |
+
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
120 |
+
|
121 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
122 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as out_file:
|
123 |
+
writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
|
124 |
+
for _ in tqdm.auto.tqdm(range(min(MAX_NUM_FRAMES, num_frames))):
|
125 |
+
ok, frame = cap.read()
|
126 |
+
if not ok:
|
127 |
+
break
|
128 |
+
rgb_frame = frame[:, :, ::-1]
|
129 |
+
annotated_frame, _ = process_image(PIL.Image.fromarray(rgb_frame))
|
130 |
+
writer.write(np.asarray(annotated_frame)[:, :, ::-1])
|
131 |
+
writer.release()
|
132 |
+
cap.release()
|
133 |
+
return out_file.name
|
134 |
+
|
135 |
+
|
136 |
+
process_video.zerogpu = True # type: ignore
|
137 |
|
138 |
|
139 |
with gr.Blocks(css_paths="style.css") as demo:
|
140 |
gr.Markdown(DESCRIPTION)
|
141 |
+
|
142 |
+
with gr.Tabs():
|
143 |
+
with gr.Tab("Image"):
|
144 |
+
with gr.Row():
|
145 |
+
with gr.Column():
|
146 |
+
input_image = gr.Image(label="Input Image", type="pil")
|
147 |
+
run_button_image = gr.Button()
|
148 |
+
with gr.Column():
|
149 |
+
output_image = gr.Image(label="Output Image")
|
150 |
+
output_json = gr.JSON(label="Output JSON")
|
151 |
+
gr.Examples(
|
152 |
+
examples=sorted(pathlib.Path("images").glob("*.jpg")),
|
153 |
+
inputs=input_image,
|
154 |
+
outputs=[output_image, output_json],
|
155 |
+
fn=process_image,
|
156 |
+
)
|
157 |
+
|
158 |
+
run_button_image.click(
|
159 |
+
fn=process_image,
|
160 |
+
inputs=input_image,
|
161 |
+
outputs=[output_image, output_json],
|
162 |
+
)
|
163 |
+
|
164 |
+
with gr.Tab("Video"):
|
165 |
+
gr.Markdown(f"The input video will be truncated to {MAX_NUM_FRAMES} frames.")
|
166 |
+
|
167 |
+
with gr.Row():
|
168 |
+
with gr.Column():
|
169 |
+
input_video = gr.Video(label="Input Video")
|
170 |
+
run_button_video = gr.Button()
|
171 |
+
with gr.Column():
|
172 |
+
output_video = gr.Video(label="Output Video")
|
173 |
+
|
174 |
+
gr.Examples(
|
175 |
+
examples=sorted(pathlib.Path("videos").glob("*.mp4")),
|
176 |
+
inputs=input_video,
|
177 |
+
outputs=output_video,
|
178 |
+
fn=process_video,
|
179 |
+
cache_examples=False,
|
180 |
+
)
|
181 |
+
run_button_video.click(
|
182 |
+
fn=process_video,
|
183 |
+
inputs=input_video,
|
184 |
+
outputs=output_video,
|
185 |
+
)
|
186 |
|
187 |
|
188 |
if __name__ == "__main__":
|
pyproject.toml
CHANGED
@@ -8,6 +8,7 @@ dependencies = [
|
|
8 |
"accelerate>=1.2.1",
|
9 |
"gradio>=5.11.0",
|
10 |
"hf-transfer>=0.1.9",
|
|
|
11 |
"setuptools>=75.8.0",
|
12 |
"spaces>=0.32.0",
|
13 |
"supervision>=0.25.1",
|
|
|
8 |
"accelerate>=1.2.1",
|
9 |
"gradio>=5.11.0",
|
10 |
"hf-transfer>=0.1.9",
|
11 |
+
"opencv-python-headless>=4.10.0.84",
|
12 |
"setuptools>=75.8.0",
|
13 |
"spaces>=0.32.0",
|
14 |
"supervision>=0.25.1",
|
requirements.txt
CHANGED
@@ -108,6 +108,7 @@ numpy==2.2.1
|
|
108 |
# gradio
|
109 |
# matplotlib
|
110 |
# opencv-python
|
|
|
111 |
# pandas
|
112 |
# scipy
|
113 |
# supervision
|
@@ -145,6 +146,8 @@ nvidia-nvtx-cu12==12.1.105
|
|
145 |
# via torch
|
146 |
opencv-python==4.10.0.84
|
147 |
# via supervision
|
|
|
|
|
148 |
orjson==3.10.14
|
149 |
# via gradio
|
150 |
packaging==24.2
|
|
|
108 |
# gradio
|
109 |
# matplotlib
|
110 |
# opencv-python
|
111 |
+
# opencv-python-headless
|
112 |
# pandas
|
113 |
# scipy
|
114 |
# supervision
|
|
|
146 |
# via torch
|
147 |
opencv-python==4.10.0.84
|
148 |
# via supervision
|
149 |
+
opencv-python-headless==4.10.0.84
|
150 |
+
# via vitpose-transformers (pyproject.toml)
|
151 |
orjson==3.10.14
|
152 |
# via gradio
|
153 |
packaging==24.2
|
uv.lock
CHANGED
@@ -937,6 +937,23 @@ wheels = [
|
|
937 |
{ url = "https://files.pythonhosted.org/packages/ec/6c/fab8113424af5049f85717e8e527ca3773299a3c6b02506e66436e19874f/opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe", size = 38842521 },
|
938 |
]
|
939 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
940 |
[[package]]
|
941 |
name = "orjson"
|
942 |
version = "3.10.14"
|
@@ -1815,6 +1832,7 @@ dependencies = [
|
|
1815 |
{ name = "accelerate" },
|
1816 |
{ name = "gradio" },
|
1817 |
{ name = "hf-transfer" },
|
|
|
1818 |
{ name = "setuptools" },
|
1819 |
{ name = "spaces" },
|
1820 |
{ name = "supervision" },
|
@@ -1827,6 +1845,7 @@ requires-dist = [
|
|
1827 |
{ name = "accelerate", specifier = ">=1.2.1" },
|
1828 |
{ name = "gradio", specifier = ">=5.11.0" },
|
1829 |
{ name = "hf-transfer", specifier = ">=0.1.9" },
|
|
|
1830 |
{ name = "setuptools", specifier = ">=75.8.0" },
|
1831 |
{ name = "spaces", specifier = ">=0.32.0" },
|
1832 |
{ name = "supervision", specifier = ">=0.25.1" },
|
|
|
937 |
{ url = "https://files.pythonhosted.org/packages/ec/6c/fab8113424af5049f85717e8e527ca3773299a3c6b02506e66436e19874f/opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe", size = 38842521 },
|
938 |
]
|
939 |
|
940 |
+
[[package]]
|
941 |
+
name = "opencv-python-headless"
|
942 |
+
version = "4.10.0.84"
|
943 |
+
source = { registry = "https://pypi.org/simple" }
|
944 |
+
dependencies = [
|
945 |
+
{ name = "numpy" },
|
946 |
+
]
|
947 |
+
sdist = { url = "https://files.pythonhosted.org/packages/2f/7e/d20f68a5f1487adf19d74378d349932a386b1ece3be9be9915e5986db468/opencv-python-headless-4.10.0.84.tar.gz", hash = "sha256:f2017c6101d7c2ef8d7bc3b414c37ff7f54d64413a1847d89970b6b7069b4e1a", size = 95117755 }
|
948 |
+
wheels = [
|
949 |
+
{ url = "https://files.pythonhosted.org/packages/1c/9b/583c8d9259f6fc19413f83fd18dd8e6cbc8eefb0b4dc6da52dd151fe3272/opencv_python_headless-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a4f4bcb07d8f8a7704d9c8564c224c8b064c63f430e95b61ac0bffaa374d330e", size = 54835657 },
|
950 |
+
{ url = "https://files.pythonhosted.org/packages/c0/7b/b4c67f5dad7a9a61c47f7a39e4050e8a4628bd64b3c3daaeb755d759f928/opencv_python_headless-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:5ae454ebac0eb0a0b932e3406370aaf4212e6a3fdb5038cc86c7aea15a6851da", size = 56475470 },
|
951 |
+
{ url = "https://files.pythonhosted.org/packages/91/61/f838ce2046f3ec3591ea59ea3549085e399525d3b4558c4ed60b55ed88c0/opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46071015ff9ab40fccd8a163da0ee14ce9846349f06c6c8c0f2870856ffa45db", size = 29329705 },
|
952 |
+
{ url = "https://files.pythonhosted.org/packages/d1/09/248f86a404567303cdf120e4a301f389b68e3b18e5c0cc428de327da609c/opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377d08a7e48a1405b5e84afcbe4798464ce7ee17081c1c23619c8b398ff18295", size = 49858781 },
|
953 |
+
{ url = "https://files.pythonhosted.org/packages/30/c0/66f88d58500e990a9a0a5c06f98862edf1d0a3a430781218a8c193948438/opencv_python_headless-4.10.0.84-cp37-abi3-win32.whl", hash = "sha256:9092404b65458ed87ce932f613ffbb1106ed2c843577501e5768912360fc50ec", size = 28675298 },
|
954 |
+
{ url = "https://files.pythonhosted.org/packages/26/d0/22f68eb23eea053a31655960f133c0be9726c6a881547e6e9e7e2a946c4f/opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05", size = 38754031 },
|
955 |
+
]
|
956 |
+
|
957 |
[[package]]
|
958 |
name = "orjson"
|
959 |
version = "3.10.14"
|
|
|
1832 |
{ name = "accelerate" },
|
1833 |
{ name = "gradio" },
|
1834 |
{ name = "hf-transfer" },
|
1835 |
+
{ name = "opencv-python-headless" },
|
1836 |
{ name = "setuptools" },
|
1837 |
{ name = "spaces" },
|
1838 |
{ name = "supervision" },
|
|
|
1845 |
{ name = "accelerate", specifier = ">=1.2.1" },
|
1846 |
{ name = "gradio", specifier = ">=5.11.0" },
|
1847 |
{ name = "hf-transfer", specifier = ">=0.1.9" },
|
1848 |
+
{ name = "opencv-python-headless", specifier = ">=4.10.0.84" },
|
1849 |
{ name = "setuptools", specifier = ">=75.8.0" },
|
1850 |
{ name = "spaces", specifier = ">=0.32.0" },
|
1851 |
{ name = "supervision", specifier = ">=0.25.1" },
|
videos/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
These videos are from the following public domain:
|
2 |
+
|
3 |
+
- https://www.pexels.com/video/young-guy-doing-break-dance-on-the-street-5362370/
|
4 |
+
- https://www.pexels.com/video/a-woman-dancing-at-home-6003986/
|
5 |
+
- https://www.pexels.com/video/long-haired-man-dancing-in-a-library-6344381/
|
6 |
+
- https://www.pexels.com/video/a-female-model-dancing-around-6815069/
|
videos/pexels-allan-mas-5362370.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:747f9c2f9d19e4955603e1a13b69663187882d4c6a8fbcad18ddbd04ee792d4d
|
3 |
+
size 1972564
|
videos/pexels-artem-podrez-6003986.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1044083afc06aa6f956838c7fcd582c9cfd59ea3a994adc8a0f5889ffca4d9c8
|
3 |
+
size 2494082
|
videos/pexels-c-technical-6344381.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7763476045f4683d53d751fb8befaf637c0101a0693e72f5b582e6aa5ac63cac
|
3 |
+
size 3967587
|
videos/pexels-roman-odintsov-6815069.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44045b239c0f523bfeedc5871019ae9f67525fcf65ba46d7ca4516994e6b2f57
|
3 |
+
size 2617714
|