Spaces:
Running
on
Zero
Running
on
Zero
Initial commit
Browse files- .gitattributes +3 -0
- app.py +164 -0
- assets/controlnet_demo1.png +3 -0
- assets/controlnet_demo2.png +3 -0
- assets/examples/demo1.png +3 -0
- assets/examples/demo10.png +3 -0
- assets/examples/demo11.png +3 -0
- assets/examples/demo12.png +3 -0
- assets/examples/demo13.png +3 -0
- assets/examples/demo14.png +3 -0
- assets/examples/demo15.png +3 -0
- assets/examples/demo16.png +3 -0
- assets/examples/demo17.png +3 -0
- assets/examples/demo18.png +3 -0
- assets/examples/demo19.png +3 -0
- assets/examples/demo2.png +3 -0
- assets/examples/demo20.png +3 -0
- assets/examples/demo3.png +3 -0
- assets/examples/demo4.png +3 -0
- assets/examples/demo5.png +3 -0
- assets/examples/demo7.png +3 -0
- assets/examples/demo8.png +3 -0
- assets/examples/demo9.png +3 -0
- assets/examples_video/davis_dolphins.mp4 +3 -0
- assets/examples_video/davis_rollercoaster.mp4 +3 -0
- assets/examples_video/davis_seasnake.mp4 +3 -0
- assets/paper.pdf +3 -0
- assets/teaser.png +3 -0
- assets/video_edit/demo1_midas.mp4 +3 -0
- assets/video_edit/demo1_ours.mp4 +3 -0
- assets/video_edit/demo1_video.mp4 +3 -0
- assets/video_edit/demo2_midas.mp4 +3 -0
- assets/video_edit/demo2_ours.mp4 +3 -0
- assets/video_edit/demo2_video.mp4 +3 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
import torch
|
6 |
+
import torch.nn.functional as F
|
7 |
+
from torchvision.transforms import Compose
|
8 |
+
import tempfile
|
9 |
+
|
10 |
+
from depth_anything.dpt import DepthAnything
|
11 |
+
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
12 |
+
|
13 |
+
def make_video(video_path, outdir='./vis_video_depth',encoder='vitl'):
|
14 |
+
# Define path for temporary processed frames
|
15 |
+
temp_frame_dir = tempfile.mkdtemp()
|
16 |
+
|
17 |
+
margin_width = 50
|
18 |
+
|
19 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
20 |
+
|
21 |
+
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
|
22 |
+
|
23 |
+
total_params = sum(param.numel() for param in depth_anything.parameters())
|
24 |
+
print('Total parameters: {:.2f}M'.format(total_params / 1e6))
|
25 |
+
|
26 |
+
transform = Compose([
|
27 |
+
Resize(
|
28 |
+
width=518,
|
29 |
+
height=518,
|
30 |
+
resize_target=False,
|
31 |
+
keep_aspect_ratio=True,
|
32 |
+
ensure_multiple_of=14,
|
33 |
+
resize_method='lower_bound',
|
34 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
35 |
+
),
|
36 |
+
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
37 |
+
PrepareForNet(),
|
38 |
+
])
|
39 |
+
|
40 |
+
if os.path.isfile(video_path):
|
41 |
+
if video_path.endswith('txt'):
|
42 |
+
with open(video_path, 'r') as f:
|
43 |
+
lines = f.read().splitlines()
|
44 |
+
else:
|
45 |
+
filenames = [video_path]
|
46 |
+
else:
|
47 |
+
filenames = os.listdir(video_path)
|
48 |
+
filenames = [os.path.join(video_path, filename) for filename in filenames if not filename.startswith('.')]
|
49 |
+
filenames.sort()
|
50 |
+
|
51 |
+
# os.makedirs(outdir, exist_ok=True)
|
52 |
+
|
53 |
+
for k, filename in enumerate(filenames):
|
54 |
+
print('Progress {:}/{:},'.format(k+1, len(filenames)), 'Processing', filename)
|
55 |
+
|
56 |
+
raw_video = cv2.VideoCapture(filename)
|
57 |
+
frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
58 |
+
frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
|
59 |
+
output_width = frame_width * 2 + margin_width
|
60 |
+
|
61 |
+
filename = os.path.basename(filename)
|
62 |
+
# output_path = os.path.join(outdir, filename[:filename.rfind('.')] + '_video_depth.mp4')
|
63 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
|
64 |
+
output_path = tmpfile.name
|
65 |
+
#out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"avc1"), frame_rate, (output_width, frame_height))
|
66 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
67 |
+
out = cv2.VideoWriter(output_path, fourcc, frame_rate, (output_width, frame_height))
|
68 |
+
# count=0
|
69 |
+
while raw_video.isOpened():
|
70 |
+
ret, raw_frame = raw_video.read()
|
71 |
+
if not ret:
|
72 |
+
break
|
73 |
+
|
74 |
+
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
|
75 |
+
|
76 |
+
frame = transform({'image': frame})['image']
|
77 |
+
frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
|
78 |
+
|
79 |
+
with torch.no_grad():
|
80 |
+
depth = depth_anything(frame)
|
81 |
+
|
82 |
+
depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
|
83 |
+
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
84 |
+
|
85 |
+
depth = depth.cpu().numpy().astype(np.uint8)
|
86 |
+
depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
|
87 |
+
|
88 |
+
split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
89 |
+
combined_frame = cv2.hconcat([raw_frame, split_region, depth_color])
|
90 |
+
|
91 |
+
# out.write(combined_frame)
|
92 |
+
# frame_path = os.path.join(temp_frame_dir, f"frame_{count:05d}.png")
|
93 |
+
# cv2.imwrite(frame_path, combined_frame)
|
94 |
+
out.write(combined_frame)
|
95 |
+
# count += 1
|
96 |
+
|
97 |
+
raw_video.release()
|
98 |
+
out.release()
|
99 |
+
return output_path
|
100 |
+
|
101 |
+
css = """
|
102 |
+
#img-display-container {
|
103 |
+
max-height: 100vh;
|
104 |
+
}
|
105 |
+
#img-display-input {
|
106 |
+
max-height: 80vh;
|
107 |
+
}
|
108 |
+
#img-display-output {
|
109 |
+
max-height: 80vh;
|
110 |
+
}
|
111 |
+
"""
|
112 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
113 |
+
model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
|
114 |
+
|
115 |
+
title = "# Depth Anything Video Demo"
|
116 |
+
description = """Depth Anything on full video files.
|
117 |
+
|
118 |
+
Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
|
119 |
+
|
120 |
+
transform = Compose([
|
121 |
+
Resize(
|
122 |
+
width=518,
|
123 |
+
height=518,
|
124 |
+
resize_target=False,
|
125 |
+
keep_aspect_ratio=True,
|
126 |
+
ensure_multiple_of=14,
|
127 |
+
resize_method='lower_bound',
|
128 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
129 |
+
),
|
130 |
+
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
131 |
+
PrepareForNet(),
|
132 |
+
])
|
133 |
+
|
134 |
+
@torch.no_grad()
|
135 |
+
def predict_depth(model, image):
|
136 |
+
return model(image)
|
137 |
+
|
138 |
+
with gr.Blocks(css=css) as demo:
|
139 |
+
gr.Markdown(title)
|
140 |
+
gr.Markdown(description)
|
141 |
+
gr.Markdown("### Video Depth Prediction demo")
|
142 |
+
|
143 |
+
with gr.Row():
|
144 |
+
input_video = gr.Video(label="Input Video")
|
145 |
+
submit = gr.Button("Submit")
|
146 |
+
processed_video = gr.Video(label="Processed Video")
|
147 |
+
|
148 |
+
def on_submit(uploaded_video):
|
149 |
+
|
150 |
+
# Process the video and get the path of the output video
|
151 |
+
output_video_path = make_video(uploaded_video)
|
152 |
+
|
153 |
+
return output_video_path
|
154 |
+
|
155 |
+
submit.click(on_submit, inputs=[input_video], outputs=processed_video)
|
156 |
+
|
157 |
+
example_files = os.listdir('assets/examples_video')
|
158 |
+
example_files.sort()
|
159 |
+
example_files = [os.path.join('assets/examples_video', filename) for filename in example_files]
|
160 |
+
examples = gr.Examples(examples=example_files, inputs=[input_video], outputs=processed_video, fn=on_submit, cache_examples=False)
|
161 |
+
|
162 |
+
|
163 |
+
if __name__ == '__main__':
|
164 |
+
demo.queue().launch()
|
assets/controlnet_demo1.png
ADDED
![]() |
Git LFS Details
|
assets/controlnet_demo2.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo1.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo10.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo11.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo12.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo13.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo14.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo15.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo16.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo17.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo18.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo19.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo2.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo20.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo3.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo4.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo5.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo7.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo8.png
ADDED
![]() |
Git LFS Details
|
assets/examples/demo9.png
ADDED
![]() |
Git LFS Details
|
assets/examples_video/davis_dolphins.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da2bdf883de86f3ad1f7ec58e34f50cd8dc1bbde8288e23a635a7396ba1af13d
|
3 |
+
size 468524
|
assets/examples_video/davis_rollercoaster.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:291361c800b83ead49f50302ffc82f6ecd5205391934cc8354946b4b93e8cbb4
|
3 |
+
size 596021
|
assets/examples_video/davis_seasnake.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:247f92487bc7a14bf2364847f83e23c7c99addf28abaa043bb353edb6531cead
|
3 |
+
size 4010306
|
assets/paper.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e554e412ffc6e9e6edddc963baa2211692860ab0efa92d74bf7d09b18d2c597
|
3 |
+
size 4549655
|
assets/teaser.png
ADDED
![]() |
Git LFS Details
|
assets/video_edit/demo1_midas.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11c668214e74ff067cadd9f5beaa8b103360398f5c689c4a4db6c74b451a6963
|
3 |
+
size 187513
|
assets/video_edit/demo1_ours.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57e88c7610076ee422f53c112907339d5a87338cc9186453eb5bfaf6ed3a9257
|
3 |
+
size 431370
|
assets/video_edit/demo1_video.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfcb14cb7cec14c6b8198a6a455609dc5fac26c42628867e0d4412d53bfa0af7
|
3 |
+
size 174791
|
assets/video_edit/demo2_midas.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:833fd0e7b41f712073ff48373139b1433a0e61b0221a44971a25d57b34a92078
|
3 |
+
size 145623
|
assets/video_edit/demo2_ours.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c49d36aab8eeac806613a80c870fa6d3b62694ad08da0550f2e7d6d1b29553fd
|
3 |
+
size 222693
|
assets/video_edit/demo2_video.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb31e593a2a0973e37a5e004a25e7b5fde0e6e9234e283cc404ae7b9805b45ce
|
3 |
+
size 112277
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio_imageslider
|
2 |
+
gradio==4.14.0
|
3 |
+
torch
|
4 |
+
torchvision
|
5 |
+
opencv-python
|
6 |
+
huggingface_hub
|