Spaces:

Nikhil0987
/

imageTO3d

Runtime error

App Files Files Community

Nikhil0987 commited on Aug 29, 2024

Commit

4b30385

verified ·

1 Parent(s): 1939ee1

Update app.py

Browse files

Files changed (1) hide show

app.py +407 -132

app.py CHANGED Viewed

@@ -1,146 +1,421 @@
 import gradio as gr
 import numpy as np
-import random
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-if torch.cuda.is_available():
-    torch.cuda.max_memory_allocated(device=device)
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-    pipe.enable_xformers_memory_efficient_attention()
-    pipe = pipe.to(device)
-else:
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
-    pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
 ]
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 520px;
-}
-"""
-if torch.cuda.is_available():
-    power_device = "GPU"
-else:
-    power_device = "CPU"
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        Currently running on {power_device}.
-        """)
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
             )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
             with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
                 )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=12,
-                    step=1,
-                    value=2,
                 )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
-    run_button.click(
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result]
     )
-demo.queue().launch()

+import os
+import tempfile
+import time
+from contextlib import nullcontext
+from functools import lru_cache
+from typing import Any
 import gradio as gr
 import numpy as np
+import rembg
 import torch
+from gradio_litmodel3d import LitModel3D
+from PIL import Image
+import sf3d.utils as sf3d_utils
+from sf3d.system import SF3D
+os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.environ.get("TMPDIR", "/tmp"), "gradio")
+rembg_session = rembg.new_session()
+COND_WIDTH = 512
+COND_HEIGHT = 512
+COND_DISTANCE = 1.6
+COND_FOVY_DEG = 40
+BACKGROUND_COLOR = [0.5, 0.5, 0.5]
+# Cached. Doesn't change
+c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
+intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
+    COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
+)
+generated_files = []
+# Delete previous gradio temp dir folder
+if os.path.exists(os.environ["GRADIO_TEMP_DIR"]):
+    print(f"Deleting {os.environ['GRADIO_TEMP_DIR']}")
+    import shutil
+    shutil.rmtree(os.environ["GRADIO_TEMP_DIR"])
+device = sf3d_utils.get_device()
+model = SF3D.from_pretrained(
+    "stabilityai/stable-fast-3d",
+    config_name="config.yaml",
+    weight_name="model.safetensors",
+)
+model.eval()
+model = model.to(device)
+example_files = [
+    os.path.join("demo_files/examples", f) for f in os.listdir("demo_files/examples")
 ]
+def run_model(input_image, remesh_option, vertex_count, texture_size):
+    start = time.time()
+    with torch.no_grad():
+        with torch.autocast(
+            device_type=device, dtype=torch.float16
+        ) if "cuda" in device else nullcontext():
+            model_batch = create_batch(input_image)
+            model_batch = {k: v.to(device) for k, v in model_batch.items()}
+            trimesh_mesh, _glob_dict = model.generate_mesh(
+                model_batch, texture_size, remesh_option, vertex_count
             )
+            trimesh_mesh = trimesh_mesh[0]
+    # Create new tmp file
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
+    trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
+    generated_files.append(tmp_file.name)
+    print("Generation took:", time.time() - start, "s")
+    return tmp_file.name
+def create_batch(input_image: Image) -> dict[str, Any]:
+    img_cond = (
+        torch.from_numpy(
+            np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
+            / 255.0
+        )
+        .float()
+        .clip(0, 1)
+    )
+    mask_cond = img_cond[:, :, -1:]
+    rgb_cond = torch.lerp(
+        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
+    )
+    batch_elem = {
+        "rgb_cond": rgb_cond,
+        "mask_cond": mask_cond,
+        "c2w_cond": c2w_cond.unsqueeze(0),
+        "intrinsic_cond": intrinsic.unsqueeze(0),
+        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
+    }
+    # Add batch dim
+    batched = {k: v.unsqueeze(0) for k, v in batch_elem.items()}
+    return batched
+@lru_cache
+def checkerboard(squares: int, size: int, min_value: float = 0.5):
+    base = np.zeros((squares, squares)) + min_value
+    base[1::2, ::2] = 1
+    base[::2, 1::2] = 1
+    repeat_mult = size // squares
+    return (
+        base.repeat(repeat_mult, axis=0)
+        .repeat(repeat_mult, axis=1)[:, :, None]
+        .repeat(3, axis=-1)
+    )
+def remove_background(input_image: Image) -> Image:
+    return rembg.remove(input_image, session=rembg_session)
+def resize_foreground(
+    image: Image,
+    ratio: float,
+) -> Image:
+    image = np.array(image)
+    assert image.shape[-1] == 4
+    alpha = np.where(image[..., 3] > 0)
+    y1, y2, x1, x2 = (
+        alpha[0].min(),
+        alpha[0].max(),
+        alpha[1].min(),
+        alpha[1].max(),
+    )
+    # crop the foreground
+    fg = image[y1:y2, x1:x2]
+    # pad to square
+    size = max(fg.shape[0], fg.shape[1])
+    ph0, pw0 = (size - fg.shape[0]) // 2, (size - fg.shape[1]) // 2
+    ph1, pw1 = size - fg.shape[0] - ph0, size - fg.shape[1] - pw0
+    new_image = np.pad(
+        fg,
+        ((ph0, ph1), (pw0, pw1), (0, 0)),
+        mode="constant",
+        constant_values=((0, 0), (0, 0), (0, 0)),
+    )
+    # compute padding according to the ratio
+    new_size = int(new_image.shape[0] / ratio)
+    # pad to size, double side
+    ph0, pw0 = (new_size - size) // 2, (new_size - size) // 2
+    ph1, pw1 = new_size - size - ph0, new_size - size - pw0
+    new_image = np.pad(
+        new_image,
+        ((ph0, ph1), (pw0, pw1), (0, 0)),
+        mode="constant",
+        constant_values=((0, 0), (0, 0), (0, 0)),
+    )
+    new_image = Image.fromarray(new_image, mode="RGBA").resize(
+        (COND_WIDTH, COND_HEIGHT)
+    )
+    return new_image
+def square_crop(input_image: Image) -> Image:
+    # Perform a center square crop
+    min_size = min(input_image.size)
+    left = (input_image.size[0] - min_size) // 2
+    top = (input_image.size[1] - min_size) // 2
+    right = (input_image.size[0] + min_size) // 2
+    bottom = (input_image.size[1] + min_size) // 2
+    return input_image.crop((left, top, right, bottom)).resize(
+        (COND_WIDTH, COND_HEIGHT)
+    )
+def show_mask_img(input_image: Image) -> Image:
+    img_numpy = np.array(input_image)
+    alpha = img_numpy[:, :, 3] / 255.0
+    chkb = checkerboard(32, 512) * 255
+    new_img = img_numpy[..., :3] * alpha[:, :, None] + chkb * (1 - alpha[:, :, None])
+    return Image.fromarray(new_img.astype(np.uint8), mode="RGB")
+def run_button(
+    run_btn,
+    input_image,
+    background_state,
+    foreground_ratio,
+    remesh_option,
+    vertex_count,
+    texture_size,
+):
+    if run_btn == "Run":
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        glb_file: str = run_model(
+            background_state, remesh_option.lower(), vertex_count, texture_size
+        )
+        if torch.cuda.is_available():
+            print("Peak Memory:", torch.cuda.max_memory_allocated() / 1024 / 1024, "MB")
+        elif torch.backends.mps.is_available():
+            print(
+                "Peak Memory:", torch.mps.driver_allocated_memory() / 1024 / 1024, "MB"
             )
+        return (
+            gr.update(),
+            gr.update(),
+            gr.update(),
+            gr.update(),
+            gr.update(value=glb_file, visible=True),
+            gr.update(visible=True),
+        )
+    elif run_btn == "Remove Background":
+        rem_removed = remove_background(input_image)
+        sqr_crop = square_crop(rem_removed)
+        fr_res = resize_foreground(sqr_crop, foreground_ratio)
+        return (
+            gr.update(value="Run", visible=True),
+            sqr_crop,
+            fr_res,
+            gr.update(value=show_mask_img(fr_res), visible=True),
+            gr.update(value=None, visible=False),
+            gr.update(visible=False),
+        )
+def requires_bg_remove(image, fr):
+    if image is None:
+        return (
+            gr.update(visible=False, value="Run"),
+            None,
+            None,
+            gr.update(value=None, visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
+    alpha_channel = np.array(image.getchannel("A"))
+    min_alpha = alpha_channel.min()
+    if min_alpha == 0:
+        print("Already has alpha")
+        sqr_crop = square_crop(image)
+        fr_res = resize_foreground(sqr_crop, fr)
+        return (
+            gr.update(value="Run", visible=True),
+            sqr_crop,
+            fr_res,
+            gr.update(value=show_mask_img(fr_res), visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
+    return (
+        gr.update(value="Remove Background", visible=True),
+        None,
+        None,
+        gr.update(value=None, visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+    )
+def update_foreground_ratio(img_proc, fr):
+    foreground_res = resize_foreground(img_proc, fr)
+    return (
+        foreground_res,
+        gr.update(value=show_mask_img(foreground_res)),
+    )
+with gr.Blocks() as demo:
+    img_proc_state = gr.State()
+    background_remove_state = gr.State()
+    gr.Markdown("""
+    # SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement
+    **SF3D** is a state-of-the-art method for 3D mesh reconstruction from a single image.
+    This demo allows you to upload an image and generate a 3D mesh model from it.
+    **Tips**
+    1. If the image already has an alpha channel, you can skip the background removal step.
+    2. You can adjust the foreground ratio to control the size of the foreground object. This can influence the shape
+    3. You can select the remeshing option to control the mesh topology. This can introduce artifacts in the mesh on thin surfaces and should be turned off in such cases.
+    4. You can upload your own HDR environment map to light the 3D model.
+    """)
+    with gr.Row(variant="panel"):
+        with gr.Column():
             with gr.Row():
+                input_img = gr.Image(
+                    type="pil", label="Input Image", sources="upload", image_mode="RGBA"
                 )
+                preview_removal = gr.Image(
+                    label="Preview Background Removal",
+                    type="pil",
+                    image_mode="RGB",
+                    interactive=False,
+                    visible=False,
                 )
+            foreground_ratio = gr.Slider(
+                label="Foreground Ratio",
+                minimum=0.5,
+                maximum=1.0,
+                value=0.85,
+                step=0.05,
+            )
+            foreground_ratio.change(
+                update_foreground_ratio,
+                inputs=[img_proc_state, foreground_ratio],
+                outputs=[background_remove_state, preview_removal],
+            )
+            remesh_option = gr.Radio(
+                choices=["None", "Triangle", "Quad"],
+                label="Remeshing",
+                value="None",
+                visible=True,
+            )
+            vertex_count_slider = gr.Slider(
+                label="Target Vertex Count",
+                minimum=1000,
+                maximum=20000,
+                value=10000,
+                step=1000,
+                visible=True,
+            )
+            texture_size = gr.Slider(
+                label="Texture Size",
+                minimum=512,
+                maximum=2048,
+                value=1024,
+                step=256,
+                visible=True,
+            )
+            run_btn = gr.Button("Run", variant="primary", visible=False)
+        with gr.Column():
+            output_3d = LitModel3D(
+                label="3D Model",
+                visible=False,
+                clear_color=[0.0, 0.0, 0.0, 0.0],
+                tonemapping="aces",
+                contrast=1.0,
+                scale=1.0,
+            )
+            with gr.Column(visible=False, scale=1.0) as hdr_row:
+                gr.Markdown("""## HDR Environment Map
+                Select an HDR environment map to light the 3D model. You can also upload your own HDR environment maps.
+                """)
+                with gr.Row():
+                    hdr_illumination_file = gr.File(
+                        label="HDR Env Map", file_types=[".hdr"], file_count="single"
+                    )
+                    example_hdris = [
+                        os.path.join("demo_files/hdri", f)
+                        for f in os.listdir("demo_files/hdri")
+                    ]
+                    hdr_illumination_example = gr.Examples(
+                        examples=example_hdris,
+                        inputs=hdr_illumination_file,
+                    )
+                    hdr_illumination_file.change(
+                        lambda x: gr.update(env_map=x.name if x is not None else None),
+                        inputs=hdr_illumination_file,
+                        outputs=[output_3d],
+                    )
+    examples = gr.Examples(
+        examples=example_files,
+        inputs=input_img,
+    )
+    input_img.change(
+        requires_bg_remove,
+        inputs=[input_img, foreground_ratio],
+        outputs=[
+            run_btn,
+            img_proc_state,
+            background_remove_state,
+            preview_removal,
+            output_3d,
+            hdr_row,
+        ],
+    )
+    run_btn.click(
+        run_button,
+        inputs=[
+            run_btn,
+            input_img,
+            background_remove_state,
+            foreground_ratio,
+            remesh_option,
+            vertex_count_slider,
+            texture_size,
+        ],
+        outputs=[
+            run_btn,
+            img_proc_state,
+            background_remove_state,
+            preview_removal,
+            output_3d,
+            hdr_row,
+        ],
     )
+demo.queue().launch(share=False)