zysong212 commited on
Commit
097d08a
·
1 Parent(s): 31f0484

first commit

Browse files
Files changed (3) hide show
  1. app.py +0 -7
  2. depthmaster/depthmaster_pipeline.py +0 -1
  3. run.py +0 -253
app.py CHANGED
@@ -1,14 +1,7 @@
1
  import gradio as gr
2
- import numpy as np
3
- import random
4
- import logging
5
- import os
6
- from glob import glob
7
 
8
- import numpy as np
9
  import torch
10
  from PIL import Image
11
- from tqdm.auto import tqdm
12
 
13
  from depthmaster import DepthMasterPipeline
14
  from depthmaster.modules.unet_2d_condition import UNet2DConditionModel
 
1
  import gradio as gr
 
 
 
 
 
2
 
 
3
  import torch
4
  from PIL import Image
 
5
 
6
  from depthmaster import DepthMasterPipeline
7
  from depthmaster.modules.unet_2d_condition import UNet2DConditionModel
depthmaster/depthmaster_pipeline.py CHANGED
@@ -23,7 +23,6 @@
23
  # --------------------------------------------------------------------------
24
 
25
 
26
- import logging
27
  from typing import Dict, Optional, Union
28
 
29
  import numpy as np
 
23
  # --------------------------------------------------------------------------
24
 
25
 
 
26
  from typing import Dict, Optional, Union
27
 
28
  import numpy as np
run.py DELETED
@@ -1,253 +0,0 @@
1
- # Last modified: 2025-01-14
2
- #
3
- # Copyright 2025 Ziyang Song, USTC. All rights reserved.
4
- #
5
- # This file has been modified from the original version.
6
- # Original copyright (c) 2023 Bingxin Ke, ETH Zurich. All rights reserved.
7
- #
8
- # Licensed under the Apache License, Version 2.0 (the "License");
9
- # you may not use this file except in compliance with the License.
10
- # You may obtain a copy of the License at
11
- #
12
- # http://www.apache.org/licenses/LICENSE-2.0
13
- #
14
- # Unless required by applicable law or agreed to in writing, software
15
- # distributed under the License is distributed on an "AS IS" BASIS,
16
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
- # See the License for the specific language governing permissions and
18
- # limitations under the License.
19
- # --------------------------------------------------------------------------
20
- # If you find this code useful, we kindly ask you to cite our paper in your work.
21
- # Please find bibtex at: https://github.com/indu1ge/DepthMaster#-citation
22
- # More information about the method can be found at https://indu1ge.github.io/DepthMaster_page
23
- # --------------------------------------------------------------------------
24
-
25
-
26
- import argparse
27
- import logging
28
- import os
29
- from glob import glob
30
-
31
- import numpy as np
32
- import torch
33
- from PIL import Image
34
- from tqdm.auto import tqdm
35
-
36
- from depthmaster import DepthMasterPipeline
37
-
38
- EXTENSION_LIST = [".jpg", ".png"]
39
-
40
-
41
- if "__main__" == __name__:
42
- logging.basicConfig(level=logging.INFO)
43
-
44
- # -------------------- Arguments --------------------
45
- parser = argparse.ArgumentParser(
46
- description="Run single-image depth estimation using Marigold."
47
- )
48
- parser.add_argument(
49
- "--checkpoint",
50
- type=str,
51
- default="ckpt/depthmaster",
52
- help="Checkpoint path or hub name.",
53
- )
54
-
55
- parser.add_argument(
56
- "--input_rgb_dir",
57
- type=str,
58
- required=True,
59
- help="Path to the input image folder.",
60
- )
61
-
62
- parser.add_argument(
63
- "--output_dir", type=str, required=True, help="Output directory."
64
- )
65
-
66
-
67
- parser.add_argument(
68
- "--half_precision",
69
- "--fp16",
70
- action="store_true",
71
- help="Run with half-precision (16-bit float), might lead to suboptimal result.",
72
- )
73
-
74
- # resolution setting
75
- parser.add_argument(
76
- "--processing_res",
77
- type=int,
78
- default=None,
79
- help="Maximum resolution of processing. 0 for using input image resolution. Default: 768.",
80
- )
81
- parser.add_argument(
82
- "--output_processing_res",
83
- action="store_true",
84
- help="When input is resized, out put depth at resized operating resolution. Default: False.",
85
- )
86
- parser.add_argument(
87
- "--resample_method",
88
- choices=["bilinear", "bicubic", "nearest"],
89
- default="bilinear",
90
- help="Resampling method used to resize images and depth predictions. This can be one of `bilinear`, `bicubic` or `nearest`. Default: `bilinear`",
91
- )
92
-
93
- # depth map colormap
94
- parser.add_argument(
95
- "--color_map",
96
- type=str,
97
- default="Spectral",
98
- help="Colormap used to render depth predictions.",
99
- )
100
-
101
- # other settings
102
- parser.add_argument(
103
- "--batch_size",
104
- type=int,
105
- default=0,
106
- help="Inference batch size. Default: 0 (will be set automatically).",
107
- )
108
- parser.add_argument(
109
- "--apple_silicon",
110
- action="store_true",
111
- help="Flag of running on Apple Silicon.",
112
- )
113
-
114
- args = parser.parse_args()
115
-
116
- checkpoint_path = args.checkpoint
117
- input_rgb_dir = args.input_rgb_dir
118
- output_dir = args.output_dir
119
-
120
- half_precision = args.half_precision
121
-
122
- processing_res = args.processing_res
123
- match_input_res = not args.output_processing_res
124
- if 0 == processing_res and match_input_res is False:
125
- logging.warning(
126
- "Processing at native resolution without resizing output might NOT lead to exactly the same resolution, due to the padding and pooling properties of conv layers."
127
- )
128
- resample_method = args.resample_method
129
-
130
- color_map = args.color_map
131
- batch_size = args.batch_size
132
- apple_silicon = args.apple_silicon
133
- if apple_silicon and 0 == batch_size:
134
- batch_size = 1 # set default batchsize
135
-
136
- # -------------------- Preparation --------------------
137
- # Output directories
138
- output_dir_color = os.path.join(output_dir, "depth_colored")
139
- output_dir_tif = os.path.join(output_dir, "depth_bw")
140
- # output_dir_npy = os.path.join(output_dir, "depth_npy")
141
- os.makedirs(output_dir, exist_ok=True)
142
- os.makedirs(output_dir_color, exist_ok=True)
143
- os.makedirs(output_dir_tif, exist_ok=True)
144
- # os.makedirs(output_dir_npy, exist_ok=True)
145
- logging.info(f"output dir = {output_dir}")
146
-
147
- # -------------------- Device --------------------
148
- if apple_silicon:
149
- if torch.backends.mps.is_available() and torch.backends.mps.is_built():
150
- device = torch.device("mps:0")
151
- else:
152
- device = torch.device("cpu")
153
- logging.warning("MPS is not available. Running on CPU will be slow.")
154
- else:
155
- if torch.cuda.is_available():
156
- device = torch.device("cuda")
157
- else:
158
- device = torch.device("cpu")
159
- logging.warning("CUDA is not available. Running on CPU will be slow.")
160
- logging.info(f"device = {device}")
161
-
162
- # -------------------- Data --------------------
163
- rgb_filename_list = glob(os.path.join(input_rgb_dir, "*"))
164
- rgb_filename_list = [
165
- f for f in rgb_filename_list if os.path.splitext(f)[1].lower() in EXTENSION_LIST
166
- ]
167
- rgb_filename_list = sorted(rgb_filename_list)
168
- n_images = len(rgb_filename_list)
169
- if n_images > 0:
170
- logging.info(f"Found {n_images} images")
171
- else:
172
- logging.error(f"No image found in '{input_rgb_dir}'")
173
- exit(1)
174
-
175
- # -------------------- Model --------------------
176
- if half_precision:
177
- dtype = torch.float16
178
- variant = "fp16"
179
- logging.info(
180
- f"Running with half precision ({dtype}), might lead to suboptimal result."
181
- )
182
- else:
183
- dtype = torch.float32
184
- variant = None
185
-
186
- pipe: DepthMasterPipeline = DepthMasterPipeline.from_pretrained(
187
- checkpoint_path, variant=variant, torch_dtype=dtype
188
- )
189
-
190
- try:
191
- pipe.enable_xformers_memory_efficient_attention()
192
- except ImportError:
193
- pass # run without xformers
194
-
195
- pipe = pipe.to(device)
196
- logging.info(
197
- f"scale_invariant: {pipe.scale_invariant}, shift_invariant: {pipe.shift_invariant}"
198
- )
199
-
200
- # Print out config
201
- logging.info(
202
- f"Inference settings: checkpoint = `{checkpoint_path}`, "
203
- f"processing resolution = {processing_res or pipe.default_processing_resolution}, "
204
- f"color_map = {color_map}."
205
- )
206
-
207
- # -------------------- Inference and saving --------------------
208
- with torch.no_grad():
209
- os.makedirs(output_dir, exist_ok=True)
210
-
211
- for rgb_path in tqdm(rgb_filename_list, desc="Estimating depth", leave=True):
212
- # Read input image
213
- input_image = Image.open(rgb_path)
214
-
215
- # Predict depth
216
- with torch.no_grad():
217
- pipe_out = pipe(
218
- input_image,
219
- processing_res=processing_res,
220
- match_input_res=match_input_res,
221
- batch_size=batch_size,
222
- color_map=color_map,
223
- show_progress_bar=True,
224
- resample_method=resample_method,
225
- )
226
-
227
- depth_pred: np.ndarray = pipe_out.depth_np
228
- depth_colored: Image.Image = pipe_out.depth_colored
229
-
230
- # Save as npy
231
- rgb_name_base = os.path.splitext(os.path.basename(rgb_path))[0]
232
- pred_name_base = rgb_name_base + "_pred"
233
- # npy_save_path = os.path.join(output_dir_npy, f"{pred_name_base}.npy")
234
- # if os.path.exists(npy_save_path):
235
- # logging.warning(f"Existing file: '{npy_save_path}' will be overwritten")
236
- # np.save(npy_save_path, depth_pred)
237
-
238
- # Save as 16-bit uint png
239
- depth_to_save = (depth_pred * 65535.0).astype(np.uint16)
240
- png_save_path = os.path.join(output_dir_tif, f"{pred_name_base}.png")
241
- if os.path.exists(png_save_path):
242
- logging.warning(f"Existing file: '{png_save_path}' will be overwritten")
243
- Image.fromarray(depth_to_save).save(png_save_path, mode="I;16")
244
-
245
- # Colorize
246
- colored_save_path = os.path.join(
247
- output_dir_color, f"{pred_name_base}_colored.png"
248
- )
249
- if os.path.exists(colored_save_path):
250
- logging.warning(
251
- f"Existing file: '{colored_save_path}' will be overwritten"
252
- )
253
- depth_colored.save(colored_save_path)