hysts HF staff commited on
Commit
3c574ec
·
1 Parent(s): 82b20ab
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
37
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -7,16 +7,22 @@ https://colab.research.google.com/drive/1e8fcby5rhKZWcr9LSN8mNbQ0TU4Dxxpo
7
  """
8
 
9
  import pathlib
 
10
 
 
11
  import gradio as gr
 
12
  import PIL.Image
13
  import spaces
14
  import supervision as sv
15
  import torch
 
16
  from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation
17
 
18
  DESCRIPTION = "# ViTPose"
19
 
 
 
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
 
22
  person_detector_name = "PekingU/rtdetr_r50vd_coco_o365"
@@ -30,7 +36,7 @@ pose_model = VitPoseForPoseEstimation.from_pretrained(pose_model_name, device_ma
30
 
31
  @spaces.GPU
32
  @torch.inference_mode()
33
- def run(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
34
  inputs = person_image_processor(images=image, return_tensors="pt").to(device)
35
  outputs = person_model(**inputs)
36
  results = person_image_processor.post_process_object_detection(
@@ -101,26 +107,82 @@ def run(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
101
  return vertex_annotator.annotate(scene=annotated_frame, key_points=keypoints), human_readable_results
102
 
103
 
104
- paths = sorted(pathlib.Path("images").glob("*.jpg"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
 
107
  with gr.Blocks(css_paths="style.css") as demo:
108
  gr.Markdown(DESCRIPTION)
109
- with gr.Row():
110
- with gr.Column():
111
- input_image = gr.Image(label="Input Image", type="pil")
112
- run_button = gr.Button()
113
- with gr.Column():
114
- output_image = gr.Image(label="Output Image")
115
- output_json = gr.JSON(label="Output JSON")
116
-
117
- gr.Examples(examples=paths, inputs=input_image, outputs=[output_image, output_json], fn=run)
118
-
119
- run_button.click(
120
- fn=run,
121
- inputs=input_image,
122
- outputs=[output_image, output_json],
123
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  if __name__ == "__main__":
 
7
  """
8
 
9
  import pathlib
10
+ import tempfile
11
 
12
+ import cv2
13
  import gradio as gr
14
+ import numpy as np
15
  import PIL.Image
16
  import spaces
17
  import supervision as sv
18
  import torch
19
+ import tqdm
20
  from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation
21
 
22
  DESCRIPTION = "# ViTPose"
23
 
24
+ MAX_NUM_FRAMES = 300
25
+
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
 
28
  person_detector_name = "PekingU/rtdetr_r50vd_coco_o365"
 
36
 
37
  @spaces.GPU
38
  @torch.inference_mode()
39
+ def process_image(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
40
  inputs = person_image_processor(images=image, return_tensors="pt").to(device)
41
  outputs = person_model(**inputs)
42
  results = person_image_processor.post_process_object_detection(
 
107
  return vertex_annotator.annotate(scene=annotated_frame, key_points=keypoints), human_readable_results
108
 
109
 
110
+ def process_video(
111
+ video_path: str,
112
+ progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
113
+ ) -> str:
114
+ cap = cv2.VideoCapture(video_path)
115
+
116
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
117
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
118
+ fps = cap.get(cv2.CAP_PROP_FPS)
119
+ num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
120
+
121
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
122
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as out_file:
123
+ writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
124
+ for _ in tqdm.auto.tqdm(range(min(MAX_NUM_FRAMES, num_frames))):
125
+ ok, frame = cap.read()
126
+ if not ok:
127
+ break
128
+ rgb_frame = frame[:, :, ::-1]
129
+ annotated_frame, _ = process_image(PIL.Image.fromarray(rgb_frame))
130
+ writer.write(np.asarray(annotated_frame)[:, :, ::-1])
131
+ writer.release()
132
+ cap.release()
133
+ return out_file.name
134
+
135
+
136
+ process_video.zerogpu = True # type: ignore
137
 
138
 
139
  with gr.Blocks(css_paths="style.css") as demo:
140
  gr.Markdown(DESCRIPTION)
141
+
142
+ with gr.Tabs():
143
+ with gr.Tab("Image"):
144
+ with gr.Row():
145
+ with gr.Column():
146
+ input_image = gr.Image(label="Input Image", type="pil")
147
+ run_button_image = gr.Button()
148
+ with gr.Column():
149
+ output_image = gr.Image(label="Output Image")
150
+ output_json = gr.JSON(label="Output JSON")
151
+ gr.Examples(
152
+ examples=sorted(pathlib.Path("images").glob("*.jpg")),
153
+ inputs=input_image,
154
+ outputs=[output_image, output_json],
155
+ fn=process_image,
156
+ )
157
+
158
+ run_button_image.click(
159
+ fn=process_image,
160
+ inputs=input_image,
161
+ outputs=[output_image, output_json],
162
+ )
163
+
164
+ with gr.Tab("Video"):
165
+ gr.Markdown(f"The input video will be truncated to {MAX_NUM_FRAMES} frames.")
166
+
167
+ with gr.Row():
168
+ with gr.Column():
169
+ input_video = gr.Video(label="Input Video")
170
+ run_button_video = gr.Button()
171
+ with gr.Column():
172
+ output_video = gr.Video(label="Output Video")
173
+
174
+ gr.Examples(
175
+ examples=sorted(pathlib.Path("videos").glob("*.mp4")),
176
+ inputs=input_video,
177
+ outputs=output_video,
178
+ fn=process_video,
179
+ cache_examples=False,
180
+ )
181
+ run_button_video.click(
182
+ fn=process_video,
183
+ inputs=input_video,
184
+ outputs=output_video,
185
+ )
186
 
187
 
188
  if __name__ == "__main__":
pyproject.toml CHANGED
@@ -8,6 +8,7 @@ dependencies = [
8
  "accelerate>=1.2.1",
9
  "gradio>=5.11.0",
10
  "hf-transfer>=0.1.9",
 
11
  "setuptools>=75.8.0",
12
  "spaces>=0.32.0",
13
  "supervision>=0.25.1",
 
8
  "accelerate>=1.2.1",
9
  "gradio>=5.11.0",
10
  "hf-transfer>=0.1.9",
11
+ "opencv-python-headless>=4.10.0.84",
12
  "setuptools>=75.8.0",
13
  "spaces>=0.32.0",
14
  "supervision>=0.25.1",
requirements.txt CHANGED
@@ -108,6 +108,7 @@ numpy==2.2.1
108
  # gradio
109
  # matplotlib
110
  # opencv-python
 
111
  # pandas
112
  # scipy
113
  # supervision
@@ -145,6 +146,8 @@ nvidia-nvtx-cu12==12.1.105
145
  # via torch
146
  opencv-python==4.10.0.84
147
  # via supervision
 
 
148
  orjson==3.10.14
149
  # via gradio
150
  packaging==24.2
 
108
  # gradio
109
  # matplotlib
110
  # opencv-python
111
+ # opencv-python-headless
112
  # pandas
113
  # scipy
114
  # supervision
 
146
  # via torch
147
  opencv-python==4.10.0.84
148
  # via supervision
149
+ opencv-python-headless==4.10.0.84
150
+ # via vitpose-transformers (pyproject.toml)
151
  orjson==3.10.14
152
  # via gradio
153
  packaging==24.2
uv.lock CHANGED
@@ -937,6 +937,23 @@ wheels = [
937
  { url = "https://files.pythonhosted.org/packages/ec/6c/fab8113424af5049f85717e8e527ca3773299a3c6b02506e66436e19874f/opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe", size = 38842521 },
938
  ]
939
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  [[package]]
941
  name = "orjson"
942
  version = "3.10.14"
@@ -1815,6 +1832,7 @@ dependencies = [
1815
  { name = "accelerate" },
1816
  { name = "gradio" },
1817
  { name = "hf-transfer" },
 
1818
  { name = "setuptools" },
1819
  { name = "spaces" },
1820
  { name = "supervision" },
@@ -1827,6 +1845,7 @@ requires-dist = [
1827
  { name = "accelerate", specifier = ">=1.2.1" },
1828
  { name = "gradio", specifier = ">=5.11.0" },
1829
  { name = "hf-transfer", specifier = ">=0.1.9" },
 
1830
  { name = "setuptools", specifier = ">=75.8.0" },
1831
  { name = "spaces", specifier = ">=0.32.0" },
1832
  { name = "supervision", specifier = ">=0.25.1" },
 
937
  { url = "https://files.pythonhosted.org/packages/ec/6c/fab8113424af5049f85717e8e527ca3773299a3c6b02506e66436e19874f/opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe", size = 38842521 },
938
  ]
939
 
940
+ [[package]]
941
+ name = "opencv-python-headless"
942
+ version = "4.10.0.84"
943
+ source = { registry = "https://pypi.org/simple" }
944
+ dependencies = [
945
+ { name = "numpy" },
946
+ ]
947
+ sdist = { url = "https://files.pythonhosted.org/packages/2f/7e/d20f68a5f1487adf19d74378d349932a386b1ece3be9be9915e5986db468/opencv-python-headless-4.10.0.84.tar.gz", hash = "sha256:f2017c6101d7c2ef8d7bc3b414c37ff7f54d64413a1847d89970b6b7069b4e1a", size = 95117755 }
948
+ wheels = [
949
+ { url = "https://files.pythonhosted.org/packages/1c/9b/583c8d9259f6fc19413f83fd18dd8e6cbc8eefb0b4dc6da52dd151fe3272/opencv_python_headless-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a4f4bcb07d8f8a7704d9c8564c224c8b064c63f430e95b61ac0bffaa374d330e", size = 54835657 },
950
+ { url = "https://files.pythonhosted.org/packages/c0/7b/b4c67f5dad7a9a61c47f7a39e4050e8a4628bd64b3c3daaeb755d759f928/opencv_python_headless-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:5ae454ebac0eb0a0b932e3406370aaf4212e6a3fdb5038cc86c7aea15a6851da", size = 56475470 },
951
+ { url = "https://files.pythonhosted.org/packages/91/61/f838ce2046f3ec3591ea59ea3549085e399525d3b4558c4ed60b55ed88c0/opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46071015ff9ab40fccd8a163da0ee14ce9846349f06c6c8c0f2870856ffa45db", size = 29329705 },
952
+ { url = "https://files.pythonhosted.org/packages/d1/09/248f86a404567303cdf120e4a301f389b68e3b18e5c0cc428de327da609c/opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377d08a7e48a1405b5e84afcbe4798464ce7ee17081c1c23619c8b398ff18295", size = 49858781 },
953
+ { url = "https://files.pythonhosted.org/packages/30/c0/66f88d58500e990a9a0a5c06f98862edf1d0a3a430781218a8c193948438/opencv_python_headless-4.10.0.84-cp37-abi3-win32.whl", hash = "sha256:9092404b65458ed87ce932f613ffbb1106ed2c843577501e5768912360fc50ec", size = 28675298 },
954
+ { url = "https://files.pythonhosted.org/packages/26/d0/22f68eb23eea053a31655960f133c0be9726c6a881547e6e9e7e2a946c4f/opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05", size = 38754031 },
955
+ ]
956
+
957
  [[package]]
958
  name = "orjson"
959
  version = "3.10.14"
 
1832
  { name = "accelerate" },
1833
  { name = "gradio" },
1834
  { name = "hf-transfer" },
1835
+ { name = "opencv-python-headless" },
1836
  { name = "setuptools" },
1837
  { name = "spaces" },
1838
  { name = "supervision" },
 
1845
  { name = "accelerate", specifier = ">=1.2.1" },
1846
  { name = "gradio", specifier = ">=5.11.0" },
1847
  { name = "hf-transfer", specifier = ">=0.1.9" },
1848
+ { name = "opencv-python-headless", specifier = ">=4.10.0.84" },
1849
  { name = "setuptools", specifier = ">=75.8.0" },
1850
  { name = "spaces", specifier = ">=0.32.0" },
1851
  { name = "supervision", specifier = ">=0.25.1" },
videos/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ These videos are from the following public domain:
2
+
3
+ - https://www.pexels.com/video/young-guy-doing-break-dance-on-the-street-5362370/
4
+ - https://www.pexels.com/video/a-woman-dancing-at-home-6003986/
5
+ - https://www.pexels.com/video/long-haired-man-dancing-in-a-library-6344381/
6
+ - https://www.pexels.com/video/a-female-model-dancing-around-6815069/
videos/pexels-allan-mas-5362370.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747f9c2f9d19e4955603e1a13b69663187882d4c6a8fbcad18ddbd04ee792d4d
3
+ size 1972564
videos/pexels-artem-podrez-6003986.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1044083afc06aa6f956838c7fcd582c9cfd59ea3a994adc8a0f5889ffca4d9c8
3
+ size 2494082
videos/pexels-c-technical-6344381.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7763476045f4683d53d751fb8befaf637c0101a0693e72f5b582e6aa5ac63cac
3
+ size 3967587
videos/pexels-roman-odintsov-6815069.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44045b239c0f523bfeedc5871019ae9f67525fcf65ba46d7ca4516994e6b2f57
3
+ size 2617714