feizhengcong commited on Jan 12, 2023

Commit

074c857

1 Parent(s): 1d7be4e

Upload 198 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

configuration.py +183 -0
deforum-stable-diffusion/Deforum_Stable_Diffusion.ipynb +580 -0
deforum-stable-diffusion/Deforum_Stable_Diffusion.py +536 -0
deforum-stable-diffusion/LICENSE +0 -0
deforum-stable-diffusion/configs/v1-inference.yaml +70 -0
deforum-stable-diffusion/configs/v2-inference-v.yaml +68 -0
deforum-stable-diffusion/configs/v2-inference.yaml +67 -0
deforum-stable-diffusion/configs/v2-inpainting-inference.yaml +158 -0
deforum-stable-diffusion/configs/v2-midas-inference.yaml +74 -0
deforum-stable-diffusion/configs/x4-upscaling.yaml +76 -0
deforum-stable-diffusion/helpers/__init__.py +9 -0
deforum-stable-diffusion/helpers/__pycache__/__init__.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/__init__.cpython-39.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/aesthetics.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/animation.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/callback.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/colors.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/conditioning.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/depth.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/generate.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/generate.cpython-39.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/k_samplers.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/load_images.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/model_load.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/model_wrap.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/prompt.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/render.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/render.cpython-39.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/save_images.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/save_images.cpython-39.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/settings.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/settings.cpython-39.pyc +0 -0
deforum-stable-diffusion/helpers/__pycache__/simulacra_fit_linear_model.cpython-38.pyc +0 -0
deforum-stable-diffusion/helpers/aesthetics.py +48 -0
deforum-stable-diffusion/helpers/animation.py +338 -0
deforum-stable-diffusion/helpers/callback.py +124 -0
deforum-stable-diffusion/helpers/colors.py +16 -0
deforum-stable-diffusion/helpers/conditioning.py +262 -0
deforum-stable-diffusion/helpers/depth.py +175 -0
deforum-stable-diffusion/helpers/generate.py +282 -0
deforum-stable-diffusion/helpers/k_samplers.py +124 -0
deforum-stable-diffusion/helpers/load_images.py +99 -0
deforum-stable-diffusion/helpers/model_load.py +257 -0
deforum-stable-diffusion/helpers/model_wrap.py +226 -0
deforum-stable-diffusion/helpers/prompt.py +130 -0
deforum-stable-diffusion/helpers/rank_images.py +69 -0
deforum-stable-diffusion/helpers/render.py +472 -0
deforum-stable-diffusion/helpers/save_images.py +60 -0
deforum-stable-diffusion/helpers/settings.py +34 -0
deforum-stable-diffusion/helpers/simulacra_compute_embeddings.py +96 -0

configuration.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+def Root():
+    models_path = "models" #@param {type:"string"}
+    configs_path = "configs" #@param {type:"string"}
+    output_path = "output" #@param {type:"string"}
+    mount_google_drive = False #@param {type:"boolean"}
+    models_path_gdrive = "/content/drive/MyDrive/AI/models" #@param {type:"string"}
+    output_path_gdrive = "/content/drive/MyDrive/AI/StableDiffusion" #@param {type:"string"}
+    #@markdown **Model Setup**
+    model_config = "v1-inference.yaml" #@param ["custom","v1-inference.yaml"]
+    model_checkpoint =  "v1-5-pruned-emaonly.ckpt" #@param ["custom","v1-5-pruned.ckpt","v1-5-pruned-emaonly.ckpt","sd-v1-4-full-ema.ckpt","sd-v1-4.ckpt","sd-v1-3-full-ema.ckpt","sd-v1-3.ckpt","sd-v1-2-full-ema.ckpt","sd-v1-2.ckpt","sd-v1-1-full-ema.ckpt","sd-v1-1.ckpt", "robo-diffusion-v1.ckpt","wd-v1-3-float16.ckpt"]
+    custom_config_path = "" #@param {type:"string"}
+    custom_checkpoint_path = "" #@param {type:"string"}
+    half_precision = True
+    return locals()
+def DeforumAnimArgs():
+    animation_mode = "3D" #@param ['None', '2D', '3D', 'Video Input', 'Interpolation'] {type:'string'}
+    max_frames = 200 #@param {type:"number"}
+    border = 'wrap' #@param ['wrap', 'replicate'] {type:'string'}
+    #@markdown ####**Motion Parameters:**
+    angle = "0:(0)" #@param {type:"string"}
+    zoom = "0:(1.04)" #@param {type:"string"}
+    translation_x = "0:(0)" #@param {type:"string"}
+    translation_y = "0:(0)" #@param {type:"string"}
+    translation_z = "0:(0)" #@param {type:"string"}
+    rotation_3d_x = "0:(0)" #@param {type:"string"}
+    rotation_3d_y = "0:(0)" #@param {type:"string"}
+    rotation_3d_z = "0:(0)" #@param {type:"string"}
+    flip_2d_perspective = False #@param {type:"boolean"}
+    perspective_flip_theta = "0:(0)" #@param {type:"string"}
+    perspective_flip_phi = "0:(t%15)" #@param {type:"string"}
+    perspective_flip_gamma = "0:(0)" #@param {type:"string"}
+    perspective_flip_fv = "0:(0)" #@param {type:"string"}
+    noise_schedule = "0:(0.02)" #@param {type:"string"}
+    strength_schedule = "0:(0.65)" #@param {type:"string"}
+    contrast_schedule = "0:(1.0)" #@param {type:"string"}
+    #@markdown ####**Coherence:**
+    color_coherence = "Match Frame 0 LAB" #@param ['None', 'Match Frame 0 HSV', 'Match Frame 0 LAB', 'Match Frame 0 RGB'] {type:'string'}
+    diffusion_cadence = "3" #@param ['1','2','3','4','5','6','7','8'] {type:'string'}
+    #@markdown #### 3D Depth Warping
+    use_depth_warping = True #@param {type:"boolean"}
+    midas_weight = 0.3 #@param {type:"number"}
+    near_plane = 200
+    far_plane = 10000
+    fov = 40 #@param {type:"number"}
+    padding_mode = "border" #@param ['border', 'reflection', 'zeros'] {type:'string'}
+    sampling_mode = "bicubic" #@param ['bicubic', 'bilinear', 'nearest'] {type:'string'}
+    save_depth_maps = False #@param {type:"boolean"}
+    #@markdown ####**Video Input:**
+    video_init_path = "./input/video_in.mp4" #@param {type:"string"}
+    extract_nth_frame = 1 #@param {type:"number"}
+    overwrite_extracted_frames = True #@param {type:"boolean"}
+    use_mask_video = False #@param {type:"boolean"}
+    video_mask_path = "" #@param {type:"string"}
+    #@markdown ####**Interpolation:**
+    interpolate_key_frames = False #@param {type:"boolean"}
+    interpolate_x_frames = 4 #@param {type:"number"}
+    #@markdown ####**Resume Animation:**
+    resume_from_timestring = False #@param {type:"boolean"}
+    resume_timestring = "20220829210106" #@param {type:"string"}
+    return locals()
+def DeforumArgs():
+    #@markdown **Image Settings**
+    W = 512 #@param
+    H = 512 #@param
+    W, H = map(lambda x: x - x % 64, (W, H))  # resize to integer multiple of 64
+    #@markdown **Sampling Settings**
+    seed = 2022 #@param
+    sampler = "klms" #@param ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral","plms", "ddim", "dpm_fast", "dpm_adaptive", "dpmpp_2s_a", "dpmpp_2m"]
+    steps = 50 #@param
+    scale = 7 #@param
+    ddim_eta = 0.0 #@param
+    dynamic_threshold = None
+    static_threshold = None
+    #@markdown **Save & Display Settings**
+    save_samples = True #@param {type:"boolean"}
+    save_settings = True #@param {type:"boolean"}
+    display_samples = True #@param {type:"boolean"}
+    save_sample_per_step = False #@param {type:"boolean"}
+    show_sample_per_step = False #@param {type:"boolean"}
+    #@markdown **Prompt Settings**
+    prompt_weighting = True #@param {type:"boolean"}
+    normalize_prompt_weights = True #@param {type:"boolean"}
+    log_weighted_subprompts = False #@param {type:"boolean"}
+    #@markdown **Batch Settings**
+    n_batch = 1  #@param
+    batch_name = "data" #@param {type:"string"}
+    filename_format = "{timestring}_{index}_{prompt}.png" #@param ["{timestring}_{index}_{seed}.png","{timestring}_{index}_{prompt}.png"]
+    seed_behavior = "iter" #@param ["iter","fixed","random"]
+    make_grid = False #@param {type:"boolean"}
+    grid_rows = 2 #@param
+    outdir = "./outputs"
+    #@markdown **Init Settings**
+    use_init = False #@param {type:"boolean"}
+    strength = 0.0 #@param {type:"number"}
+    strength_0_no_init = True # Set the strength to 0 automatically when no init image is used
+    init_image = "" #@param {type:"string"}
+    # Whiter areas of the mask are areas that change more
+    use_mask = False #@param {type:"boolean"}
+    use_alpha_as_mask = False # use the alpha channel of the init image as the mask
+    mask_file = "" #@param {type:"string"}
+    invert_mask = False #@param {type:"boolean"}
+    # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.
+    mask_brightness_adjust = 1.0 #@param {type:"number"}
+    mask_contrast_adjust = 1.0  #@param {type:"number"}
+    # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding
+    overlay_mask = True  # {type:"boolean"}
+    # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)
+    mask_overlay_blur = 5 # {type:"number"}
+    #@markdown **Exposure/Contrast Conditional Settings**
+    mean_scale = 0 #@param {type:"number"}
+    var_scale = 0 #@param {type:"number"}
+    exposure_scale = 0 #@param {type:"number"}
+    exposure_target = 0.5 #@param {type:"number"}
+    #@markdown **Color Match Conditional Settings**
+    colormatch_scale = 0 #@param {type:"number"}
+    colormatch_image = "" #@param {type:"string"}
+    colormatch_n_colors = 4 #@param {type:"number"}
+    ignore_sat_weight = 0 #@param {type:"number"}
+    #@markdown **CLIP\Aesthetics Conditional Settings**
+    clip_name = "ViT-L/14" #@param ['ViT-L/14', 'ViT-L/14@336px', 'ViT-B/16', 'ViT-B/32']
+    clip_scale = 0 #@param {type:"number"}
+    aesthetics_scale = 0 #@param {type:"number"}
+    cutn = 1 #@param {type:"number"}
+    cut_pow = 0.0001 #@param {type:"number"}
+    #@markdown **Other Conditional Settings**
+    init_mse_scale = 0 #@param {type:"number"}
+    init_mse_image = "" #@param {type:"string"}
+    blue_scale = 1 #@param {type:"number"}
+    #@markdown **Conditional Gradient Settings**
+    gradient_wrt = "x0_pred" #@param ["x", "x0_pred"]
+    gradient_add_to = "both" #@param ["cond", "uncond", "both"]
+    decode_method = "linear" #@param ["autoencoder","linear"]
+    grad_threshold_type = "dynamic" #@param ["dynamic", "static", "mean", "schedule"]
+    clamp_grad_threshold = 0.2 #@param {type:"number"}
+    clamp_start = 0.2 #@param
+    clamp_stop = 0.01 #@param
+    grad_inject_timing = list(range(1,10)) #@param
+    #@markdown **Speed vs VRAM Settings**
+    cond_uncond_sync = True #@param {type:"boolean"}
+    n_samples = 1 # doesnt do anything
+    precision = 'autocast'
+    C = 4
+    f = 8
+    prompt = ""
+    timestring = ""
+    init_latent = None
+    init_sample = None
+    init_sample_raw = None
+    mask_sample = None
+    init_c = None
+    return locals()

deforum-stable-diffusion/Deforum_Stable_Diffusion.ipynb ADDED Viewed

	@@ -0,0 +1,580 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ByGXyiHZWM_q"
+      },
+      "source": [
+        "# **Deforum Stable Diffusion v0.6**\n",
+        "[Stable Diffusion](https://github.com/CompVis/stable-diffusion) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Bj\u00f6rn Ommer and the [Stability.ai](https://stability.ai/) Team. [K Diffusion](https://github.com/crowsonkb/k-diffusion) by [Katherine Crowson](https://twitter.com/RiversHaveWings).\n",
+        "\n",
+        "[Quick Guide](https://docs.google.com/document/d/1RrQv7FntzOuLg4ohjRZPVL7iptIyBhwwbcEYEW2OfcI/edit?usp=sharing) to Deforum v0.6\n",
+        "\n",
+        "Notebook by [deforum](https://discord.gg/upmXXsrwZc)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "IJjzzkKlWM_s"
+      },
+      "source": [
+        "#@markdown **NVIDIA GPU**\n",
+        "import subprocess, os, sys\n",
+        "sub_p_res = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.free', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')\n",
+        "print(f\"{sub_p_res[:-1]}\")"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UA8-efH-WM_t"
+      },
+      "source": [
+        "# Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "0D2HQO-PWM_t"
+      },
+      "source": [
+        "\n",
+        "import subprocess, time, gc, os, sys\n",
+        "\n",
+        "def setup_environment():\n",
+        "    print_subprocess = False\n",
+        "    use_xformers_for_colab = True\n",
+        "    try:\n",
+        "        ipy = get_ipython()\n",
+        "    except:\n",
+        "        ipy = 'could not get_ipython'\n",
+        "    if 'google.colab' in str(ipy):\n",
+        "        print(\"..setting up environment\")\n",
+        "        start_time = time.time()\n",
+        "        all_process = [\n",
+        "            ['pip', 'install', 'torch==1.12.1+cu113', 'torchvision==0.13.1+cu113', '--extra-index-url', 'https://download.pytorch.org/whl/cu113'],\n",
+        "            ['pip', 'install', 'omegaconf==2.2.3', 'einops==0.4.1', 'pytorch-lightning==1.7.4', 'torchmetrics==0.9.3', 'torchtext==0.13.1', 'transformers==4.21.2', 'kornia==0.6.7'],\n",
+        "            ['git', 'clone', 'https://github.com/deforum-art/deforum-stable-diffusion'],\n",
+        "            ['pip', 'install', 'accelerate', 'ftfy', 'jsonmerge', 'matplotlib', 'resize-right', 'timm', 'torchdiffeq','scikit-learn'],\n",
+        "        ]\n",
+        "        for process in all_process:\n",
+        "            running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')\n",
+        "            if print_subprocess:\n",
+        "                print(running)\n",
+        "        with open('deforum-stable-diffusion/src/k_diffusion/__init__.py', 'w') as f:\n",
+        "            f.write('')\n",
+        "        sys.path.extend([\n",
+        "            'deforum-stable-diffusion/',\n",
+        "            'deforum-stable-diffusion/src',\n",
+        "        ])\n",
+        "        end_time = time.time()\n",
+        "\n",
+        "        if use_xformers_for_colab:\n",
+        "\n",
+        "            print(\"..installing xformers\")\n",
+        "\n",
+        "            all_process = [['pip', 'install', 'triton==2.0.0.dev20220701']]\n",
+        "            for process in all_process:\n",
+        "                running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')\n",
+        "                if print_subprocess:\n",
+        "                    print(running)\n",
+        "                    \n",
+        "            v_card_name = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')\n",
+        "            if 't4' in v_card_name.lower():\n",
+        "                name_to_download = 'T4'\n",
+        "            elif 'v100' in v_card_name.lower():\n",
+        "                name_to_download = 'V100'\n",
+        "            elif 'a100' in v_card_name.lower():\n",
+        "                name_to_download = 'A100'\n",
+        "            elif 'p100' in v_card_name.lower():\n",
+        "                name_to_download = 'P100'\n",
+        "            else:\n",
+        "                print(v_card_name + ' is currently not supported with xformers flash attention in deforum!')\n",
+        "\n",
+        "            x_ver = 'xformers-0.0.13.dev0-py3-none-any.whl'\n",
+        "            x_link = 'https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/' + name_to_download + '/' + x_ver\n",
+        "        \n",
+        "            all_process = [\n",
+        "                ['wget', x_link],\n",
+        "                ['pip', 'install', x_ver],\n",
+        "                ['mv', 'deforum-stable-diffusion/src/ldm/modules/attention.py', 'deforum-stable-diffusion/src/ldm/modules/attention_backup.py'],\n",
+        "                ['mv', 'deforum-stable-diffusion/src/ldm/modules/attention_xformers.py', 'deforum-stable-diffusion/src/ldm/modules/attention.py']\n",
+        "            ]\n",
+        "\n",
+        "            for process in all_process:\n",
+        "                running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')\n",
+        "                if print_subprocess:\n",
+        "                    print(running)\n",
+        "\n",
+        "            print(f\"Environment set up in {end_time-start_time:.0f} seconds\")\n",
+        "    else:\n",
+        "        sys.path.extend([\n",
+        "            'src'\n",
+        "        ])\n",
+        "    return\n",
+        "\n",
+        "setup_environment()\n",
+        "\n",
+        "import torch\n",
+        "import random\n",
+        "import clip\n",
+        "from IPython import display\n",
+        "from types import SimpleNamespace\n",
+        "from helpers.save_images import get_output_folder\n",
+        "from helpers.settings import load_args\n",
+        "from helpers.render import render_animation, render_input_video, render_image_batch, render_interpolation\n",
+        "from helpers.model_load import make_linear_decode, load_model, get_model_output_paths\n",
+        "from helpers.aesthetics import load_aesthetics_model\n",
+        "\n",
+        "#@markdown **Path Setup**\n",
+        "\n",
+        "def Root():\n",
+        "    models_path = \"models\" #@param {type:\"string\"}\n",
+        "    configs_path = \"configs\" #@param {type:\"string\"}\n",
+        "    output_path = \"output\" #@param {type:\"string\"}\n",
+        "    mount_google_drive = True #@param {type:\"boolean\"}\n",
+        "    models_path_gdrive = \"/content/drive/MyDrive/AI/models\" #@param {type:\"string\"}\n",
+        "    output_path_gdrive = \"/content/drive/MyDrive/AI/StableDiffusion\" #@param {type:\"string\"}\n",
+        "\n",
+        "    #@markdown **Model Setup**\n",
+        "    model_config = \"v1-inference.yaml\" #@param [\"custom\",\"v1-inference.yaml\"]\n",
+        "    model_checkpoint =  \"v1-5-pruned-emaonly.ckpt\" #@param [\"custom\",\"v1-5-pruned.ckpt\",\"v1-5-pruned-emaonly.ckpt\",\"sd-v1-4-full-ema.ckpt\",\"sd-v1-4.ckpt\",\"sd-v1-3-full-ema.ckpt\",\"sd-v1-3.ckpt\",\"sd-v1-2-full-ema.ckpt\",\"sd-v1-2.ckpt\",\"sd-v1-1-full-ema.ckpt\",\"sd-v1-1.ckpt\", \"robo-diffusion-v1.ckpt\",\"wd-v1-3-float16.ckpt\"]\n",
+        "    custom_config_path = \"\" #@param {type:\"string\"}\n",
+        "    custom_checkpoint_path = \"\" #@param {type:\"string\"}\n",
+        "    half_precision = True\n",
+        "    return locals()\n",
+        "\n",
+        "root = Root()\n",
+        "root = SimpleNamespace(**root)\n",
+        "\n",
+        "root.models_path, root.output_path = get_model_output_paths(root)\n",
+        "root.model, root.device = load_model(root, \n",
+        "                                    load_on_run_all=True\n",
+        "                                    , \n",
+        "                                    check_sha256=True\n",
+        "                                    )"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6JxwhBwtWM_t"
+      },
+      "source": [
+        "# Settings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "E0tJVYA4WM_u"
+      },
+      "source": [
+        "def DeforumAnimArgs():\n",
+        "\n",
+        "    #@markdown ####**Animation:**\n",
+        "    animation_mode = 'None' #@param ['None', '2D', '3D', 'Video Input', 'Interpolation'] {type:'string'}\n",
+        "    max_frames = 1000 #@param {type:\"number\"}\n",
+        "    border = 'replicate' #@param ['wrap', 'replicate'] {type:'string'}\n",
+        "\n",
+        "    #@markdown ####**Motion Parameters:**\n",
+        "    angle = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    zoom = \"0:(1.04)\"#@param {type:\"string\"}\n",
+        "    translation_x = \"0:(10*sin(2*3.14*t/10))\"#@param {type:\"string\"}\n",
+        "    translation_y = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    translation_z = \"0:(10)\"#@param {type:\"string\"}\n",
+        "    rotation_3d_x = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    rotation_3d_y = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    rotation_3d_z = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    flip_2d_perspective = False #@param {type:\"boolean\"}\n",
+        "    perspective_flip_theta = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    perspective_flip_phi = \"0:(t%15)\"#@param {type:\"string\"}\n",
+        "    perspective_flip_gamma = \"0:(0)\"#@param {type:\"string\"}\n",
+        "    perspective_flip_fv = \"0:(53)\"#@param {type:\"string\"}\n",
+        "    noise_schedule = \"0: (0.02)\"#@param {type:\"string\"}\n",
+        "    strength_schedule = \"0: (0.65)\"#@param {type:\"string\"}\n",
+        "    contrast_schedule = \"0: (1.0)\"#@param {type:\"string\"}\n",
+        "\n",
+        "    #@markdown ####**Coherence:**\n",
+        "    color_coherence = 'Match Frame 0 LAB' #@param ['None', 'Match Frame 0 HSV', 'Match Frame 0 LAB', 'Match Frame 0 RGB'] {type:'string'}\n",
+        "    diffusion_cadence = '1' #@param ['1','2','3','4','5','6','7','8'] {type:'string'}\n",
+        "\n",
+        "    #@markdown ####**3D Depth Warping:**\n",
+        "    use_depth_warping = True #@param {type:\"boolean\"}\n",
+        "    midas_weight = 0.3#@param {type:\"number\"}\n",
+        "    near_plane = 200\n",
+        "    far_plane = 10000\n",
+        "    fov = 40#@param {type:\"number\"}\n",
+        "    padding_mode = 'border'#@param ['border', 'reflection', 'zeros'] {type:'string'}\n",
+        "    sampling_mode = 'bicubic'#@param ['bicubic', 'bilinear', 'nearest'] {type:'string'}\n",
+        "    save_depth_maps = False #@param {type:\"boolean\"}\n",
+        "\n",
+        "    #@markdown ####**Video Input:**\n",
+        "    video_init_path ='/content/video_in.mp4'#@param {type:\"string\"}\n",
+        "    extract_nth_frame = 1#@param {type:\"number\"}\n",
+        "    overwrite_extracted_frames = True #@param {type:\"boolean\"}\n",
+        "    use_mask_video = False #@param {type:\"boolean\"}\n",
+        "    video_mask_path ='/content/video_in.mp4'#@param {type:\"string\"}\n",
+        "\n",
+        "    #@markdown ####**Interpolation:**\n",
+        "    interpolate_key_frames = False #@param {type:\"boolean\"}\n",
+        "    interpolate_x_frames = 4 #@param {type:\"number\"}\n",
+        "    \n",
+        "    #@markdown ####**Resume Animation:**\n",
+        "    resume_from_timestring = False #@param {type:\"boolean\"}\n",
+        "    resume_timestring = \"20220829210106\" #@param {type:\"string\"}\n",
+        "\n",
+        "    return locals()"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "i9fly1RIWM_u"
+      },
+      "source": [
+        "prompts = [\n",
+        "    \"a beautiful lake by Asher Brown Durand, trending on Artstation\", # the first prompt I want\n",
+        "    \"a beautiful portrait of a woman by Artgerm, trending on Artstation\", # the second prompt I want\n",
+        "    #\"this prompt I don't want it I commented it out\",\n",
+        "    #\"a nousr robot, trending on Artstation\", # use \"nousr robot\" with the robot diffusion model (see model_checkpoint setting)\n",
+        "    #\"touhou 1girl komeiji_koishi portrait, green hair\", # waifu diffusion prompts can use danbooru tag groups (see model_checkpoint)\n",
+        "    #\"this prompt has weights if prompt weighting enabled:2 can also do negative:-2\", # (see prompt_weighting)\n",
+        "]\n",
+        "\n",
+        "animation_prompts = {\n",
+        "    0: \"a beautiful apple, trending on Artstation\",\n",
+        "    20: \"a beautiful banana, trending on Artstation\",\n",
+        "    30: \"a beautiful coconut, trending on Artstation\",\n",
+        "    40: \"a beautiful durian, trending on Artstation\",\n",
+        "}"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "XVzhbmizWM_u"
+      },
+      "source": [
+        "#@markdown **Load Settings**\n",
+        "override_settings_with_file = False #@param {type:\"boolean\"}\n",
+        "settings_file = \"custom\" #@param [\"custom\", \"512x512_aesthetic_0.json\",\"512x512_aesthetic_1.json\",\"512x512_colormatch_0.json\",\"512x512_colormatch_1.json\",\"512x512_colormatch_2.json\",\"512x512_colormatch_3.json\"]\n",
+        "custom_settings_file = \"/content/drive/MyDrive/Settings.txt\"#@param {type:\"string\"}\n",
+        "\n",
+        "def DeforumArgs():\n",
+        "    #@markdown **Image Settings**\n",
+        "    W = 512 #@param\n",
+        "    H = 512 #@param\n",
+        "    W, H = map(lambda x: x - x % 64, (W, H))  # resize to integer multiple of 64\n",
+        "\n",
+        "    #@markdown **Sampling Settings**\n",
+        "    seed = -1 #@param\n",
+        "    sampler = 'dpmpp_2s_a' #@param [\"klms\",\"dpm2\",\"dpm2_ancestral\",\"heun\",\"euler\",\"euler_ancestral\",\"plms\", \"ddim\", \"dpm_fast\", \"dpm_adaptive\", \"dpmpp_2s_a\", \"dpmpp_2m\"]\n",
+        "    steps = 80 #@param\n",
+        "    scale = 7 #@param\n",
+        "    ddim_eta = 0.0 #@param\n",
+        "    dynamic_threshold = None\n",
+        "    static_threshold = None   \n",
+        "\n",
+        "    #@markdown **Save & Display Settings**\n",
+        "    save_samples = True #@param {type:\"boolean\"}\n",
+        "    save_settings = True #@param {type:\"boolean\"}\n",
+        "    display_samples = True #@param {type:\"boolean\"}\n",
+        "    save_sample_per_step = False #@param {type:\"boolean\"}\n",
+        "    show_sample_per_step = False #@param {type:\"boolean\"}\n",
+        "\n",
+        "    #@markdown **Prompt Settings**\n",
+        "    prompt_weighting = True #@param {type:\"boolean\"}\n",
+        "    normalize_prompt_weights = True #@param {type:\"boolean\"}\n",
+        "    log_weighted_subprompts = False #@param {type:\"boolean\"}\n",
+        "\n",
+        "    #@markdown **Batch Settings**\n",
+        "    n_batch = 1 #@param\n",
+        "    batch_name = \"StableFun\" #@param {type:\"string\"}\n",
+        "    filename_format = \"{timestring}_{index}_{prompt}.png\" #@param [\"{timestring}_{index}_{seed}.png\",\"{timestring}_{index}_{prompt}.png\"]\n",
+        "    seed_behavior = \"iter\" #@param [\"iter\",\"fixed\",\"random\"]\n",
+        "    make_grid = False #@param {type:\"boolean\"}\n",
+        "    grid_rows = 2 #@param \n",
+        "    outdir = get_output_folder(root.output_path, batch_name)\n",
+        "\n",
+        "    #@markdown **Init Settings**\n",
+        "    use_init = False #@param {type:\"boolean\"}\n",
+        "    strength = 0.0 #@param {type:\"number\"}\n",
+        "    strength_0_no_init = True # Set the strength to 0 automatically when no init image is used\n",
+        "    init_image = \"https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg\" #@param {type:\"string\"}\n",
+        "    # Whiter areas of the mask are areas that change more\n",
+        "    use_mask = False #@param {type:\"boolean\"}\n",
+        "    use_alpha_as_mask = False # use the alpha channel of the init image as the mask\n",
+        "    mask_file = \"https://www.filterforge.com/wiki/images/archive/b/b7/20080927223728%21Polygonal_gradient_thumb.jpg\" #@param {type:\"string\"}\n",
+        "    invert_mask = False #@param {type:\"boolean\"}\n",
+        "    # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.\n",
+        "    mask_brightness_adjust = 1.0  #@param {type:\"number\"}\n",
+        "    mask_contrast_adjust = 1.0  #@param {type:\"number\"}\n",
+        "    # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding\n",
+        "    overlay_mask = True  # {type:\"boolean\"}\n",
+        "    # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)\n",
+        "    mask_overlay_blur = 5 # {type:\"number\"}\n",
+        "\n",
+        "    #@markdown **Exposure/Contrast Conditional Settings**\n",
+        "    mean_scale = 0 #@param {type:\"number\"}\n",
+        "    var_scale = 0 #@param {type:\"number\"}\n",
+        "    exposure_scale = 0 #@param {type:\"number\"}\n",
+        "    exposure_target = 0.5 #@param {type:\"number\"}\n",
+        "\n",
+        "    #@markdown **Color Match Conditional Settings**\n",
+        "    colormatch_scale = 0 #@param {type:\"number\"}\n",
+        "    colormatch_image = \"https://www.saasdesign.io/wp-content/uploads/2021/02/palette-3-min-980x588.png\" #@param {type:\"string\"}\n",
+        "    colormatch_n_colors = 4 #@param {type:\"number\"}\n",
+        "    ignore_sat_weight = 0 #@param {type:\"number\"}\n",
+        "\n",
+        "    #@markdown **CLIP\\Aesthetics Conditional Settings**\n",
+        "    clip_name = 'ViT-L/14' #@param ['ViT-L/14', 'ViT-L/14@336px', 'ViT-B/16', 'ViT-B/32']\n",
+        "    clip_scale = 0 #@param {type:\"number\"}\n",
+        "    aesthetics_scale = 0 #@param {type:\"number\"}\n",
+        "    cutn = 1 #@param {type:\"number\"}\n",
+        "    cut_pow = 0.0001 #@param {type:\"number\"}\n",
+        "\n",
+        "    #@markdown **Other Conditional Settings**\n",
+        "    init_mse_scale = 0 #@param {type:\"number\"}\n",
+        "    init_mse_image = \"https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg\" #@param {type:\"string\"}\n",
+        "\n",
+        "    blue_scale = 0 #@param {type:\"number\"}\n",
+        "    \n",
+        "    #@markdown **Conditional Gradient Settings**\n",
+        "    gradient_wrt = 'x0_pred' #@param [\"x\", \"x0_pred\"]\n",
+        "    gradient_add_to = 'both' #@param [\"cond\", \"uncond\", \"both\"]\n",
+        "    decode_method = 'linear' #@param [\"autoencoder\",\"linear\"]\n",
+        "    grad_threshold_type = 'dynamic' #@param [\"dynamic\", \"static\", \"mean\", \"schedule\"]\n",
+        "    clamp_grad_threshold = 0.2 #@param {type:\"number\"}\n",
+        "    clamp_start = 0.2 #@param\n",
+        "    clamp_stop = 0.01 #@param\n",
+        "    grad_inject_timing = list(range(1,10)) #@param\n",
+        "\n",
+        "    #@markdown **Speed vs VRAM Settings**\n",
+        "    cond_uncond_sync = True #@param {type:\"boolean\"}\n",
+        "\n",
+        "    n_samples = 1 # doesnt do anything\n",
+        "    precision = 'autocast' \n",
+        "    C = 4\n",
+        "    f = 8\n",
+        "\n",
+        "    prompt = \"\"\n",
+        "    timestring = \"\"\n",
+        "    init_latent = None\n",
+        "    init_sample = None\n",
+        "    init_sample_raw = None\n",
+        "    mask_sample = None\n",
+        "    init_c = None\n",
+        "\n",
+        "    return locals()\n",
+        "\n",
+        "args_dict = DeforumArgs()\n",
+        "anim_args_dict = DeforumAnimArgs()\n",
+        "\n",
+        "if override_settings_with_file:\n",
+        "    load_args(args_dict, anim_args_dict, settings_file, custom_settings_file, verbose=False)\n",
+        "\n",
+        "args = SimpleNamespace(**args_dict)\n",
+        "anim_args = SimpleNamespace(**anim_args_dict)\n",
+        "\n",
+        "args.timestring = time.strftime('%Y%m%d%H%M%S')\n",
+        "args.strength = max(0.0, min(1.0, args.strength))\n",
+        "\n",
+        "# Load clip model if using clip guidance\n",
+        "if (args.clip_scale > 0) or (args.aesthetics_scale > 0):\n",
+        "    root.clip_model = clip.load(args.clip_name, jit=False)[0].eval().requires_grad_(False).to(root.device)\n",
+        "    if (args.aesthetics_scale > 0):\n",
+        "        root.aesthetics_model = load_aesthetics_model(args, root)\n",
+        "\n",
+        "if args.seed == -1:\n",
+        "    args.seed = random.randint(0, 2**32 - 1)\n",
+        "if not args.use_init:\n",
+        "    args.init_image = None\n",
+        "if args.sampler == 'plms' and (args.use_init or anim_args.animation_mode != 'None'):\n",
+        "    print(f\"Init images aren't supported with PLMS yet, switching to KLMS\")\n",
+        "    args.sampler = 'klms'\n",
+        "if args.sampler != 'ddim':\n",
+        "    args.ddim_eta = 0\n",
+        "\n",
+        "if anim_args.animation_mode == 'None':\n",
+        "    anim_args.max_frames = 1\n",
+        "elif anim_args.animation_mode == 'Video Input':\n",
+        "    args.use_init = True\n",
+        "\n",
+        "# clean up unused memory\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "# dispatch to appropriate renderer\n",
+        "if anim_args.animation_mode == '2D' or anim_args.animation_mode == '3D':\n",
+        "    render_animation(args, anim_args, animation_prompts, root)\n",
+        "elif anim_args.animation_mode == 'Video Input':\n",
+        "    render_input_video(args, anim_args, animation_prompts, root)\n",
+        "elif anim_args.animation_mode == 'Interpolation':\n",
+        "    render_interpolation(args, anim_args, animation_prompts, root)\n",
+        "else:\n",
+        "    render_image_batch(args, prompts, root)"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gJ88kZ2-WM_v"
+      },
+      "source": [
+        "# Create Video From Frames"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "XQGeqaGAWM_v"
+      },
+      "source": [
+        "skip_video_for_run_all = True #@param {type: 'boolean'}\n",
+        "fps = 12 #@param {type:\"number\"}\n",
+        "#@markdown **Manual Settings**\n",
+        "use_manual_settings = False #@param {type:\"boolean\"}\n",
+        "image_path = \"/content/drive/MyDrive/AI/StableDiffusion/2022-09/20220903000939_%05d.png\" #@param {type:\"string\"}\n",
+        "mp4_path = \"/content/drive/MyDrive/AI/StableDiffusion/2022-09/20220903000939.mp4\" #@param {type:\"string\"}\n",
+        "render_steps = False  #@param {type: 'boolean'}\n",
+        "path_name_modifier = \"x0_pred\" #@param [\"x0_pred\",\"x\"]\n",
+        "make_gif = False\n",
+        "\n",
+        "if skip_video_for_run_all == True:\n",
+        "    print('Skipping video creation, uncheck skip_video_for_run_all if you want to run it')\n",
+        "else:\n",
+        "    import os\n",
+        "    import subprocess\n",
+        "    from base64 import b64encode\n",
+        "\n",
+        "    print(f\"{image_path} -> {mp4_path}\")\n",
+        "\n",
+        "    if use_manual_settings:\n",
+        "        max_frames = \"200\" #@param {type:\"string\"}\n",
+        "    else:\n",
+        "        if render_steps: # render steps from a single image\n",
+        "            fname = f\"{path_name_modifier}_%05d.png\"\n",
+        "            all_step_dirs = [os.path.join(args.outdir, d) for d in os.listdir(args.outdir) if os.path.isdir(os.path.join(args.outdir,d))]\n",
+        "            newest_dir = max(all_step_dirs, key=os.path.getmtime)\n",
+        "            image_path = os.path.join(newest_dir, fname)\n",
+        "            print(f\"Reading images from {image_path}\")\n",
+        "            mp4_path = os.path.join(newest_dir, f\"{args.timestring}_{path_name_modifier}.mp4\")\n",
+        "            max_frames = str(args.steps)\n",
+        "        else: # render images for a video\n",
+        "            image_path = os.path.join(args.outdir, f\"{args.timestring}_%05d.png\")\n",
+        "            mp4_path = os.path.join(args.outdir, f\"{args.timestring}.mp4\")\n",
+        "            max_frames = str(anim_args.max_frames)\n",
+        "\n",
+        "    # make video\n",
+        "    cmd = [\n",
+        "        'ffmpeg',\n",
+        "        '-y',\n",
+        "        '-vcodec', 'png',\n",
+        "        '-r', str(fps),\n",
+        "        '-start_number', str(0),\n",
+        "        '-i', image_path,\n",
+        "        '-frames:v', max_frames,\n",
+        "        '-c:v', 'libx264',\n",
+        "        '-vf',\n",
+        "        f'fps={fps}',\n",
+        "        '-pix_fmt', 'yuv420p',\n",
+        "        '-crf', '17',\n",
+        "        '-preset', 'veryfast',\n",
+        "        '-pattern_type', 'sequence',\n",
+        "        mp4_path\n",
+        "    ]\n",
+        "    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+        "    stdout, stderr = process.communicate()\n",
+        "    if process.returncode != 0:\n",
+        "        print(stderr)\n",
+        "        raise RuntimeError(stderr)\n",
+        "\n",
+        "    mp4 = open(mp4_path,'rb').read()\n",
+        "    data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+        "    display.display(display.HTML(f'<video controls loop><source src=\"{data_url}\" type=\"video/mp4\"></video>') )\n",
+        "    \n",
+        "    if make_gif:\n",
+        "         gif_path = os.path.splitext(mp4_path)[0]+'.gif'\n",
+        "         cmd_gif = [\n",
+        "             'ffmpeg',\n",
+        "             '-y',\n",
+        "             '-i', mp4_path,\n",
+        "             '-r', str(fps),\n",
+        "             gif_path\n",
+        "         ]\n",
+        "         process_gif = subprocess.Popen(cmd_gif, stdout=subprocess.PIPE, stderr=subprocess.PIPE)"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "MMpAcyrYWM_v"
+      },
+      "source": [
+        "skip_disconnect_for_run_all = True #@param {type: 'boolean'}\n",
+        "\n",
+        "if skip_disconnect_for_run_all == True:\n",
+        "    print('Skipping disconnect, uncheck skip_disconnect_for_run_all if you want to run it')\n",
+        "else:\n",
+        "    from google.colab import runtime\n",
+        "    runtime.unassign()"
+      ],
+      "outputs": [],
+      "execution_count": null
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3.10.6 ('dsd')",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.6"
+    },
+    "orig_nbformat": 4,
+    "vscode": {
+      "interpreter": {
+        "hash": "b7e04c8a9537645cbc77fa0cbde8069bc94e341b0d5ced104651213865b24e58"
+      }
+    },
+    "colab": {
+      "provenance": []
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}

deforum-stable-diffusion/Deforum_Stable_Diffusion.py ADDED Viewed

	@@ -0,0 +1,536 @@

+# %%
+# !! {"metadata":{
+# !!   "id": "ByGXyiHZWM_q"
+# !! }}
+"""
+# **Deforum Stable Diffusion v0.6**
+[Stable Diffusion](https://github.com/CompVis/stable-diffusion) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer and the [Stability.ai](https://stability.ai/) Team. [K Diffusion](https://github.com/crowsonkb/k-diffusion) by [Katherine Crowson](https://twitter.com/RiversHaveWings).
+[Quick Guide](https://docs.google.com/document/d/1RrQv7FntzOuLg4ohjRZPVL7iptIyBhwwbcEYEW2OfcI/edit?usp=sharing) to Deforum v0.6
+Notebook by [deforum](https://discord.gg/upmXXsrwZc)
+"""
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "IJjzzkKlWM_s"
+# !! }}
+#@markdown **NVIDIA GPU**
+import subprocess, os, sys
+sub_p_res = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.free', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')
+print(f"{sub_p_res[:-1]}")
+# %%
+# !! {"metadata":{
+# !!   "id": "UA8-efH-WM_t"
+# !! }}
+"""
+# Setup
+"""
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "0D2HQO-PWM_t"
+# !! }}
+import subprocess, time, gc, os, sys
+def setup_environment():
+    print_subprocess = False
+    use_xformers_for_colab = True
+    try:
+        ipy = get_ipython()
+    except:
+        ipy = 'could not get_ipython'
+    if 'google.colab' in str(ipy):
+        print("..setting up environment")
+        start_time = time.time()
+        all_process = [
+            ['pip', 'install', 'torch==1.12.1+cu113', 'torchvision==0.13.1+cu113', '--extra-index-url', 'https://download.pytorch.org/whl/cu113'],
+            ['pip', 'install', 'omegaconf==2.2.3', 'einops==0.4.1', 'pytorch-lightning==1.7.4', 'torchmetrics==0.9.3', 'torchtext==0.13.1', 'transformers==4.21.2', 'kornia==0.6.7'],
+            ['git', 'clone', 'https://github.com/deforum-art/deforum-stable-diffusion'],
+            ['pip', 'install', 'accelerate', 'ftfy', 'jsonmerge', 'matplotlib', 'resize-right', 'timm', 'torchdiffeq','scikit-learn'],
+        ]
+        for process in all_process:
+            running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')
+            if print_subprocess:
+                print(running)
+        with open('deforum-stable-diffusion/src/k_diffusion/__init__.py', 'w') as f:
+            f.write('')
+        sys.path.extend([
+            'deforum-stable-diffusion/',
+            'deforum-stable-diffusion/src',
+        ])
+        end_time = time.time()
+        if use_xformers_for_colab:
+            print("..installing xformers")
+            all_process = [['pip', 'install', 'triton==2.0.0.dev20220701']]
+            for process in all_process:
+                running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')
+                if print_subprocess:
+                    print(running)
+            v_card_name = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')
+            if 't4' in v_card_name.lower():
+                name_to_download = 'T4'
+            elif 'v100' in v_card_name.lower():
+                name_to_download = 'V100'
+            elif 'a100' in v_card_name.lower():
+                name_to_download = 'A100'
+            elif 'p100' in v_card_name.lower():
+                name_to_download = 'P100'
+            else:
+                print(v_card_name + ' is currently not supported with xformers flash attention in deforum!')
+            x_ver = 'xformers-0.0.13.dev0-py3-none-any.whl'
+            x_link = 'https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/' + name_to_download + '/' + x_ver
+            all_process = [
+                ['wget', x_link],
+                ['pip', 'install', x_ver],
+                ['mv', 'deforum-stable-diffusion/src/ldm/modules/attention.py', 'deforum-stable-diffusion/src/ldm/modules/attention_backup.py'],
+                ['mv', 'deforum-stable-diffusion/src/ldm/modules/attention_xformers.py', 'deforum-stable-diffusion/src/ldm/modules/attention.py']
+            ]
+            for process in all_process:
+                running = subprocess.run(process,stdout=subprocess.PIPE).stdout.decode('utf-8')
+                if print_subprocess:
+                    print(running)
+            print(f"Environment set up in {end_time-start_time:.0f} seconds")
+    else:
+        sys.path.extend([
+            'src'
+        ])
+    return
+setup_environment()
+import torch
+import random
+import clip
+from IPython import display
+from types import SimpleNamespace
+from helpers.save_images import get_output_folder
+from helpers.settings import load_args
+from helpers.render import render_animation, render_input_video, render_image_batch, render_interpolation
+from helpers.model_load import make_linear_decode, load_model, get_model_output_paths
+from helpers.aesthetics import load_aesthetics_model
+#@markdown **Path Setup**
+def Root():
+    models_path = "models" #@param {type:"string"}
+    configs_path = "configs" #@param {type:"string"}
+    output_path = "output" #@param {type:"string"}
+    mount_google_drive = True #@param {type:"boolean"}
+    models_path_gdrive = "/content/drive/MyDrive/AI/models" #@param {type:"string"}
+    output_path_gdrive = "/content/drive/MyDrive/AI/StableDiffusion" #@param {type:"string"}
+    #@markdown **Model Setup**
+    model_config = "v1-inference.yaml" #@param ["custom","v1-inference.yaml"]
+    model_checkpoint =  "v1-5-pruned-emaonly.ckpt" #@param ["custom","v1-5-pruned.ckpt","v1-5-pruned-emaonly.ckpt","sd-v1-4-full-ema.ckpt","sd-v1-4.ckpt","sd-v1-3-full-ema.ckpt","sd-v1-3.ckpt","sd-v1-2-full-ema.ckpt","sd-v1-2.ckpt","sd-v1-1-full-ema.ckpt","sd-v1-1.ckpt", "robo-diffusion-v1.ckpt","wd-v1-3-float16.ckpt"]
+    custom_config_path = "" #@param {type:"string"}
+    custom_checkpoint_path = "" #@param {type:"string"}
+    half_precision = True
+    return locals()
+root = Root()
+root = SimpleNamespace(**root)
+root.models_path, root.output_path = get_model_output_paths(root)
+root.model, root.device = load_model(root,
+                                    load_on_run_all=True
+                                    ,
+                                    check_sha256=True
+                                    )
+# %%
+# !! {"metadata":{
+# !!   "id": "6JxwhBwtWM_t"
+# !! }}
+"""
+# Settings
+"""
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "E0tJVYA4WM_u"
+# !! }}
+def DeforumAnimArgs():
+    #@markdown ####**Animation:**
+    animation_mode = 'None' #@param ['None', '2D', '3D', 'Video Input', 'Interpolation'] {type:'string'}
+    max_frames = 1000 #@param {type:"number"}
+    border = 'replicate' #@param ['wrap', 'replicate'] {type:'string'}
+    #@markdown ####**Motion Parameters:**
+    angle = "0:(0)"#@param {type:"string"}
+    zoom = "0:(1.04)"#@param {type:"string"}
+    translation_x = "0:(10*sin(2*3.14*t/10))"#@param {type:"string"}
+    translation_y = "0:(0)"#@param {type:"string"}
+    translation_z = "0:(10)"#@param {type:"string"}
+    rotation_3d_x = "0:(0)"#@param {type:"string"}
+    rotation_3d_y = "0:(0)"#@param {type:"string"}
+    rotation_3d_z = "0:(0)"#@param {type:"string"}
+    flip_2d_perspective = False #@param {type:"boolean"}
+    perspective_flip_theta = "0:(0)"#@param {type:"string"}
+    perspective_flip_phi = "0:(t%15)"#@param {type:"string"}
+    perspective_flip_gamma = "0:(0)"#@param {type:"string"}
+    perspective_flip_fv = "0:(53)"#@param {type:"string"}
+    noise_schedule = "0: (0.02)"#@param {type:"string"}
+    strength_schedule = "0: (0.65)"#@param {type:"string"}
+    contrast_schedule = "0: (1.0)"#@param {type:"string"}
+    #@markdown ####**Coherence:**
+    color_coherence = 'Match Frame 0 LAB' #@param ['None', 'Match Frame 0 HSV', 'Match Frame 0 LAB', 'Match Frame 0 RGB'] {type:'string'}
+    diffusion_cadence = '1' #@param ['1','2','3','4','5','6','7','8'] {type:'string'}
+    #@markdown ####**3D Depth Warping:**
+    use_depth_warping = True #@param {type:"boolean"}
+    midas_weight = 0.3#@param {type:"number"}
+    near_plane = 200
+    far_plane = 10000
+    fov = 40#@param {type:"number"}
+    padding_mode = 'border'#@param ['border', 'reflection', 'zeros'] {type:'string'}
+    sampling_mode = 'bicubic'#@param ['bicubic', 'bilinear', 'nearest'] {type:'string'}
+    save_depth_maps = False #@param {type:"boolean"}
+    #@markdown ####**Video Input:**
+    video_init_path ='/content/video_in.mp4'#@param {type:"string"}
+    extract_nth_frame = 1#@param {type:"number"}
+    overwrite_extracted_frames = True #@param {type:"boolean"}
+    use_mask_video = False #@param {type:"boolean"}
+    video_mask_path ='/content/video_in.mp4'#@param {type:"string"}
+    #@markdown ####**Interpolation:**
+    interpolate_key_frames = False #@param {type:"boolean"}
+    interpolate_x_frames = 4 #@param {type:"number"}
+    #@markdown ####**Resume Animation:**
+    resume_from_timestring = False #@param {type:"boolean"}
+    resume_timestring = "20220829210106" #@param {type:"string"}
+    return locals()
+# %%
+# !! {"metadata":{
+# !!   "id": "i9fly1RIWM_u"
+# !! }}
+prompts = [
+    "a beautiful lake by Asher Brown Durand, trending on Artstation", # the first prompt I want
+    "a beautiful portrait of a woman by Artgerm, trending on Artstation", # the second prompt I want
+    #"this prompt I don't want it I commented it out",
+    #"a nousr robot, trending on Artstation", # use "nousr robot" with the robot diffusion model (see model_checkpoint setting)
+    #"touhou 1girl komeiji_koishi portrait, green hair", # waifu diffusion prompts can use danbooru tag groups (see model_checkpoint)
+    #"this prompt has weights if prompt weighting enabled:2 can also do negative:-2", # (see prompt_weighting)
+]
+animation_prompts = {
+    0: "a beautiful apple, trending on Artstation",
+    20: "a beautiful banana, trending on Artstation",
+    30: "a beautiful coconut, trending on Artstation",
+    40: "a beautiful durian, trending on Artstation",
+}
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "XVzhbmizWM_u"
+# !! }}
+#@markdown **Load Settings**
+override_settings_with_file = False #@param {type:"boolean"}
+settings_file = "custom" #@param ["custom", "512x512_aesthetic_0.json","512x512_aesthetic_1.json","512x512_colormatch_0.json","512x512_colormatch_1.json","512x512_colormatch_2.json","512x512_colormatch_3.json"]
+custom_settings_file = "/content/drive/MyDrive/Settings.txt"#@param {type:"string"}
+def DeforumArgs():
+    #@markdown **Image Settings**
+    W = 512 #@param
+    H = 512 #@param
+    W, H = map(lambda x: x - x % 64, (W, H))  # resize to integer multiple of 64
+    #@markdown **Sampling Settings**
+    seed = -1 #@param
+    sampler = 'dpmpp_2s_a' #@param ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral","plms", "ddim", "dpm_fast", "dpm_adaptive", "dpmpp_2s_a", "dpmpp_2m"]
+    steps = 80 #@param
+    scale = 7 #@param
+    ddim_eta = 0.0 #@param
+    dynamic_threshold = None
+    static_threshold = None
+    #@markdown **Save & Display Settings**
+    save_samples = True #@param {type:"boolean"}
+    save_settings = True #@param {type:"boolean"}
+    display_samples = True #@param {type:"boolean"}
+    save_sample_per_step = False #@param {type:"boolean"}
+    show_sample_per_step = False #@param {type:"boolean"}
+    #@markdown **Prompt Settings**
+    prompt_weighting = True #@param {type:"boolean"}
+    normalize_prompt_weights = True #@param {type:"boolean"}
+    log_weighted_subprompts = False #@param {type:"boolean"}
+    #@markdown **Batch Settings**
+    n_batch = 1 #@param
+    batch_name = "StableFun" #@param {type:"string"}
+    filename_format = "{timestring}_{index}_{prompt}.png" #@param ["{timestring}_{index}_{seed}.png","{timestring}_{index}_{prompt}.png"]
+    seed_behavior = "iter" #@param ["iter","fixed","random"]
+    make_grid = False #@param {type:"boolean"}
+    grid_rows = 2 #@param
+    outdir = get_output_folder(root.output_path, batch_name)
+    #@markdown **Init Settings**
+    use_init = False #@param {type:"boolean"}
+    strength = 0.0 #@param {type:"number"}
+    strength_0_no_init = True # Set the strength to 0 automatically when no init image is used
+    init_image = "https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg" #@param {type:"string"}
+    # Whiter areas of the mask are areas that change more
+    use_mask = False #@param {type:"boolean"}
+    use_alpha_as_mask = False # use the alpha channel of the init image as the mask
+    mask_file = "https://www.filterforge.com/wiki/images/archive/b/b7/20080927223728%21Polygonal_gradient_thumb.jpg" #@param {type:"string"}
+    invert_mask = False #@param {type:"boolean"}
+    # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.
+    mask_brightness_adjust = 1.0  #@param {type:"number"}
+    mask_contrast_adjust = 1.0  #@param {type:"number"}
+    # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding
+    overlay_mask = True  # {type:"boolean"}
+    # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)
+    mask_overlay_blur = 5 # {type:"number"}
+    #@markdown **Exposure/Contrast Conditional Settings**
+    mean_scale = 0 #@param {type:"number"}
+    var_scale = 0 #@param {type:"number"}
+    exposure_scale = 0 #@param {type:"number"}
+    exposure_target = 0.5 #@param {type:"number"}
+    #@markdown **Color Match Conditional Settings**
+    colormatch_scale = 0 #@param {type:"number"}
+    colormatch_image = "https://www.saasdesign.io/wp-content/uploads/2021/02/palette-3-min-980x588.png" #@param {type:"string"}
+    colormatch_n_colors = 4 #@param {type:"number"}
+    ignore_sat_weight = 0 #@param {type:"number"}
+    #@markdown **CLIP\Aesthetics Conditional Settings**
+    clip_name = 'ViT-L/14' #@param ['ViT-L/14', 'ViT-L/14@336px', 'ViT-B/16', 'ViT-B/32']
+    clip_scale = 0 #@param {type:"number"}
+    aesthetics_scale = 0 #@param {type:"number"}
+    cutn = 1 #@param {type:"number"}
+    cut_pow = 0.0001 #@param {type:"number"}
+    #@markdown **Other Conditional Settings**
+    init_mse_scale = 0 #@param {type:"number"}
+    init_mse_image = "https://cdn.pixabay.com/photo/2022/07/30/13/10/green-longhorn-beetle-7353749_1280.jpg" #@param {type:"string"}
+    blue_scale = 0 #@param {type:"number"}
+    #@markdown **Conditional Gradient Settings**
+    gradient_wrt = 'x0_pred' #@param ["x", "x0_pred"]
+    gradient_add_to = 'both' #@param ["cond", "uncond", "both"]
+    decode_method = 'linear' #@param ["autoencoder","linear"]
+    grad_threshold_type = 'dynamic' #@param ["dynamic", "static", "mean", "schedule"]
+    clamp_grad_threshold = 0.2 #@param {type:"number"}
+    clamp_start = 0.2 #@param
+    clamp_stop = 0.01 #@param
+    grad_inject_timing = list(range(1,10)) #@param
+    #@markdown **Speed vs VRAM Settings**
+    cond_uncond_sync = True #@param {type:"boolean"}
+    n_samples = 1 # doesnt do anything
+    precision = 'autocast'
+    C = 4
+    f = 8
+    prompt = ""
+    timestring = ""
+    init_latent = None
+    init_sample = None
+    init_sample_raw = None
+    mask_sample = None
+    init_c = None
+    return locals()
+args_dict = DeforumArgs()
+anim_args_dict = DeforumAnimArgs()
+if override_settings_with_file:
+    load_args(args_dict, anim_args_dict, settings_file, custom_settings_file, verbose=False)
+args = SimpleNamespace(**args_dict)
+anim_args = SimpleNamespace(**anim_args_dict)
+args.timestring = time.strftime('%Y%m%d%H%M%S')
+args.strength = max(0.0, min(1.0, args.strength))
+# Load clip model if using clip guidance
+if (args.clip_scale > 0) or (args.aesthetics_scale > 0):
+    root.clip_model = clip.load(args.clip_name, jit=False)[0].eval().requires_grad_(False).to(root.device)
+    if (args.aesthetics_scale > 0):
+        root.aesthetics_model = load_aesthetics_model(args, root)
+if args.seed == -1:
+    args.seed = random.randint(0, 2**32 - 1)
+if not args.use_init:
+    args.init_image = None
+if args.sampler == 'plms' and (args.use_init or anim_args.animation_mode != 'None'):
+    print(f"Init images aren't supported with PLMS yet, switching to KLMS")
+    args.sampler = 'klms'
+if args.sampler != 'ddim':
+    args.ddim_eta = 0
+if anim_args.animation_mode == 'None':
+    anim_args.max_frames = 1
+elif anim_args.animation_mode == 'Video Input':
+    args.use_init = True
+# clean up unused memory
+gc.collect()
+torch.cuda.empty_cache()
+# dispatch to appropriate renderer
+if anim_args.animation_mode == '2D' or anim_args.animation_mode == '3D':
+    render_animation(args, anim_args, animation_prompts, root)
+elif anim_args.animation_mode == 'Video Input':
+    render_input_video(args, anim_args, animation_prompts, root)
+elif anim_args.animation_mode == 'Interpolation':
+    render_interpolation(args, anim_args, animation_prompts, root)
+else:
+    render_image_batch(args, prompts, root)
+# %%
+# !! {"metadata":{
+# !!   "id": "gJ88kZ2-WM_v"
+# !! }}
+"""
+# Create Video From Frames
+"""
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "XQGeqaGAWM_v"
+# !! }}
+skip_video_for_run_all = True #@param {type: 'boolean'}
+fps = 12 #@param {type:"number"}
+#@markdown **Manual Settings**
+use_manual_settings = False #@param {type:"boolean"}
+image_path = "/content/drive/MyDrive/AI/StableDiffusion/2022-09/20220903000939_%05d.png" #@param {type:"string"}
+mp4_path = "/content/drive/MyDrive/AI/StableDiffusion/2022-09/20220903000939.mp4" #@param {type:"string"}
+render_steps = False  #@param {type: 'boolean'}
+path_name_modifier = "x0_pred" #@param ["x0_pred","x"]
+make_gif = False
+if skip_video_for_run_all == True:
+    print('Skipping video creation, uncheck skip_video_for_run_all if you want to run it')
+else:
+    import os
+    import subprocess
+    from base64 import b64encode
+    print(f"{image_path} -> {mp4_path}")
+    if use_manual_settings:
+        max_frames = "200" #@param {type:"string"}
+    else:
+        if render_steps: # render steps from a single image
+            fname = f"{path_name_modifier}_%05d.png"
+            all_step_dirs = [os.path.join(args.outdir, d) for d in os.listdir(args.outdir) if os.path.isdir(os.path.join(args.outdir,d))]
+            newest_dir = max(all_step_dirs, key=os.path.getmtime)
+            image_path = os.path.join(newest_dir, fname)
+            print(f"Reading images from {image_path}")
+            mp4_path = os.path.join(newest_dir, f"{args.timestring}_{path_name_modifier}.mp4")
+            max_frames = str(args.steps)
+        else: # render images for a video
+            image_path = os.path.join(args.outdir, f"{args.timestring}_%05d.png")
+            mp4_path = os.path.join(args.outdir, f"{args.timestring}.mp4")
+            max_frames = str(anim_args.max_frames)
+    # make video
+    cmd = [
+        'ffmpeg',
+        '-y',
+        '-vcodec', 'png',
+        '-r', str(fps),
+        '-start_number', str(0),
+        '-i', image_path,
+        '-frames:v', max_frames,
+        '-c:v', 'libx264',
+        '-vf',
+        f'fps={fps}',
+        '-pix_fmt', 'yuv420p',
+        '-crf', '17',
+        '-preset', 'veryfast',
+        '-pattern_type', 'sequence',
+        mp4_path
+    ]
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    if process.returncode != 0:
+        print(stderr)
+        raise RuntimeError(stderr)
+    mp4 = open(mp4_path,'rb').read()
+    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
+    display.display(display.HTML(f'<video controls loop><source src="{data_url}" type="video/mp4"></video>') )
+    if make_gif:
+         gif_path = os.path.splitext(mp4_path)[0]+'.gif'
+         cmd_gif = [
+             'ffmpeg',
+             '-y',
+             '-i', mp4_path,
+             '-r', str(fps),
+             gif_path
+         ]
+         process_gif = subprocess.Popen(cmd_gif, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# %%
+# !! {"metadata":{
+# !!   "cellView": "form",
+# !!   "id": "MMpAcyrYWM_v"
+# !! }}
+skip_disconnect_for_run_all = True #@param {type: 'boolean'}
+if skip_disconnect_for_run_all == True:
+    print('Skipping disconnect, uncheck skip_disconnect_for_run_all if you want to run it')
+else:
+    from google.colab import runtime
+    runtime.unassign()
+# %%
+# !! {"main_metadata":{
+# !!   "kernelspec": {
+# !!     "display_name": "Python 3.10.6 ('dsd')",
+# !!     "language": "python",
+# !!     "name": "python3"
+# !!   },
+# !!   "language_info": {
+# !!     "codemirror_mode": {
+# !!       "name": "ipython",
+# !!       "version": 3
+# !!     },
+# !!     "file_extension": ".py",
+# !!     "mimetype": "text/x-python",
+# !!     "name": "python",
+# !!     "nbconvert_exporter": "python",
+# !!     "pygments_lexer": "ipython3",
+# !!     "version": "3.10.6"
+# !!   },
+# !!   "orig_nbformat": 4,
+# !!   "vscode": {
+# !!     "interpreter": {
+# !!       "hash": "b7e04c8a9537645cbc77fa0cbde8069bc94e341b0d5ced104651213865b24e58"
+# !!     }
+# !!   },
+# !!   "colab": {
+# !!     "provenance": []
+# !!   },
+# !!   "accelerator": "GPU",
+# !!   "gpuClass": "standard"
+# !! }}

deforum-stable-diffusion/LICENSE ADDED Viewed

The diff for this file is too large to render. See raw diff

deforum-stable-diffusion/configs/v1-inference.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder

deforum-stable-diffusion/configs/v2-inference-v.yaml ADDED Viewed

	@@ -0,0 +1,68 @@

+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"

deforum-stable-diffusion/configs/v2-inference.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"

deforum-stable-diffusion/configs/v2-inpainting-inference.yaml ADDED Viewed

	@@ -0,0 +1,158 @@

+model:
+  base_learning_rate: 5.0e-05
+  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: hybrid
+    scale_factor: 0.18215
+    monitor: val/loss_simple_ema
+    finetune_keys: null
+    use_ema: False
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 9
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+            - 1
+            - 2
+            - 4
+            - 4
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
+data:
+  target: ldm.data.laion.WebDataModuleFromConfig
+  params:
+    tar_base: null  # for concat as in LAION-A
+    p_unsafe_threshold: 0.1
+    filter_word_list: "data/filters.yaml"
+    max_pwatermark: 0.45
+    batch_size: 8
+    num_workers: 6
+    multinode: True
+    min_size: 512
+    train:
+      shards:
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -"  #{00000-94333}.tar"
+      shuffle: 10000
+      image_key: jpg
+      image_transforms:
+      - target: torchvision.transforms.Resize
+        params:
+          size: 512
+          interpolation: 3
+      - target: torchvision.transforms.RandomCrop
+        params:
+          size: 512
+      postprocess:
+        target: ldm.data.laion.AddMask
+        params:
+          mode: "512train-large"
+          p_drop: 0.25
+    # NOTE use enough shards to avoid empty validation loops in workers
+    validation:
+      shards:
+        - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
+      shuffle: 0
+      image_key: jpg
+      image_transforms:
+      - target: torchvision.transforms.Resize
+        params:
+          size: 512
+          interpolation: 3
+      - target: torchvision.transforms.CenterCrop
+        params:
+          size: 512
+      postprocess:
+        target: ldm.data.laion.AddMask
+        params:
+          mode: "512train-large"
+          p_drop: 0.25
+lightning:
+  find_unused_parameters: True
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 5000
+  callbacks:
+    metrics_over_trainsteps_checkpoint:
+      params:
+        every_n_train_steps: 10000
+    image_logger:
+      target: main.ImageLogger
+      params:
+        enable_autocast: False
+        disabled: False
+        batch_frequency: 1000
+        max_images: 4
+        increase_log_steps: False
+        log_first_step: False
+        log_images_kwargs:
+          use_ema_scope: False
+          inpaint: False
+          plot_progressive_rows: False
+          plot_diffusion_rows: False
+          N: 4
+          unconditional_guidance_scale: 5.0
+          unconditional_guidance_label: [""]
+          ddim_steps: 50  # todo check these out for depth2img,
+          ddim_eta: 0.0   # todo check these out for depth2img,
+  trainer:
+    benchmark: True
+    val_check_interval: 5000000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 1

deforum-stable-diffusion/configs/v2-midas-inference.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+model:
+  base_learning_rate: 5.0e-07
+  target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: hybrid
+    scale_factor: 0.18215
+    monitor: val/loss_simple_ema
+    finetune_keys: null
+    use_ema: False
+    depth_stage_config:
+      target: ldm.modules.midas.api.MiDaSInference
+      params:
+        model_type: "dpt_hybrid"
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 5
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+            - 1
+            - 2
+            - 4
+            - 4
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"

deforum-stable-diffusion/configs/x4-upscaling.yaml ADDED Viewed

	@@ -0,0 +1,76 @@

+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentUpscaleDiffusion
+  params:
+    parameterization: "v"
+    low_scale_key: "lr"
+    linear_start: 0.0001
+    linear_end: 0.02
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 128
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: "hybrid-adm"
+    monitor: val/loss_simple_ema
+    scale_factor: 0.08333
+    use_ema: False
+    low_scale_config:
+      target: ldm.modules.diffusionmodules.upscaling.ImageConcatWithNoiseAugmentation
+      params:
+        noise_schedule_config: # image space
+          linear_start: 0.0001
+          linear_end: 0.02
+        max_noise_level: 350
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        num_classes: 1000  # timesteps for noise conditioning (here constant, just need one)
+        image_size: 128
+        in_channels: 7
+        out_channels: 4
+        model_channels: 256
+        attention_resolutions: [ 2,4,8]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 2, 4]
+        disable_self_attentions: [True, True, True, False]
+        disable_middle_self_attn: False
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+        use_linear_in_transformer: True
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        ddconfig:
+          # attn_type: "vanilla-xformers" this model needs efficient attention to be feasible on HR data, also the decoder seems to break in half precision (UNet is fine though)
+          double_z: True
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"

deforum-stable-diffusion/helpers/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+from .save_images import save_samples, get_output_folder
+from .k_samplers import sampler_fn, make_inject_timing_fn
+from .depth import DepthModel
+from .prompt import sanitize
+from .animation import construct_RotationMatrixHomogenous, getRotationMatrixManual, getPoints_for_PerspectiveTranformEstimation, warpMatrix, anim_frame_warp
+from .generate import add_noise, load_img, load_mask_latent, prepare_mask
+from .load_images import load_img, load_mask_latent, prepare_mask, prepare_overlay_mask
+"""

deforum-stable-diffusion/helpers/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (644 Bytes). View file

deforum-stable-diffusion/helpers/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (685 Bytes). View file

deforum-stable-diffusion/helpers/__pycache__/aesthetics.cpython-38.pyc ADDED Viewed

Binary file (1.65 kB). View file

deforum-stable-diffusion/helpers/__pycache__/animation.cpython-38.pyc ADDED Viewed

Binary file (10.6 kB). View file

deforum-stable-diffusion/helpers/__pycache__/callback.cpython-38.pyc ADDED Viewed

Binary file (4.47 kB). View file

deforum-stable-diffusion/helpers/__pycache__/colors.cpython-38.pyc ADDED Viewed

Binary file (730 Bytes). View file

deforum-stable-diffusion/helpers/__pycache__/conditioning.cpython-38.pyc ADDED Viewed

Binary file (9.93 kB). View file

deforum-stable-diffusion/helpers/__pycache__/depth.cpython-38.pyc ADDED Viewed

Binary file (5.39 kB). View file

deforum-stable-diffusion/helpers/__pycache__/generate.cpython-38.pyc ADDED Viewed

Binary file (7.78 kB). View file

deforum-stable-diffusion/helpers/__pycache__/generate.cpython-39.pyc ADDED Viewed

Binary file (7.91 kB). View file

deforum-stable-diffusion/helpers/__pycache__/k_samplers.cpython-38.pyc ADDED Viewed

Binary file (4.45 kB). View file

deforum-stable-diffusion/helpers/__pycache__/load_images.cpython-38.pyc ADDED Viewed

Binary file (2.48 kB). View file

deforum-stable-diffusion/helpers/__pycache__/model_load.cpython-38.pyc ADDED Viewed

Binary file (7.53 kB). View file

deforum-stable-diffusion/helpers/__pycache__/model_wrap.cpython-38.pyc ADDED Viewed

Binary file (6.46 kB). View file

deforum-stable-diffusion/helpers/__pycache__/prompt.cpython-38.pyc ADDED Viewed

Binary file (4.76 kB). View file

deforum-stable-diffusion/helpers/__pycache__/render.cpython-38.pyc ADDED Viewed

Binary file (10.4 kB). View file

deforum-stable-diffusion/helpers/__pycache__/render.cpython-39.pyc ADDED Viewed

Binary file (10.7 kB). View file

deforum-stable-diffusion/helpers/__pycache__/save_images.cpython-38.pyc ADDED Viewed

Binary file (1.85 kB). View file

deforum-stable-diffusion/helpers/__pycache__/save_images.cpython-39.pyc ADDED Viewed

Binary file (1.88 kB). View file

deforum-stable-diffusion/helpers/__pycache__/settings.cpython-38.pyc ADDED Viewed

Binary file (1.23 kB). View file

deforum-stable-diffusion/helpers/__pycache__/settings.cpython-39.pyc ADDED Viewed

Binary file (1.3 kB). View file

deforum-stable-diffusion/helpers/__pycache__/simulacra_fit_linear_model.cpython-38.pyc ADDED Viewed

Binary file (2.32 kB). View file

deforum-stable-diffusion/helpers/aesthetics.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import torch
+from .simulacra_fit_linear_model import AestheticMeanPredictionLinearModel
+import requests
+def wget(url, outputdir):
+    filename = url.split("/")[-1]
+    ckpt_request = requests.get(url)
+    request_status = ckpt_request.status_code
+    # inform user of errors
+    if request_status == 403:
+        raise ConnectionRefusedError("You have not accepted the license for this model.")
+    elif request_status == 404:
+        raise ConnectionError("Could not make contact with server")
+    elif request_status != 200:
+        raise ConnectionError(f"Some other error has ocurred - response code: {request_status}")
+    # write to model path
+    with open(os.path.join(outputdir, filename), 'wb') as model_file:
+        model_file.write(ckpt_request.content)
+def load_aesthetics_model(args,root):
+    clip_size = {
+        "ViT-B/32": 512,
+        "ViT-B/16": 512,
+        "ViT-L/14": 768,
+        "ViT-L/14@336px": 768,
+    }
+    model_name = {
+        "ViT-B/32": "sac_public_2022_06_29_vit_b_32_linear.pth",
+        "ViT-B/16": "sac_public_2022_06_29_vit_b_16_linear.pth",
+        "ViT-L/14": "sac_public_2022_06_29_vit_l_14_linear.pth",
+    }
+    if not os.path.exists(os.path.join(root.models_path,model_name[args.clip_name])):
+    	print("Downloading aesthetics model...")
+    	os.makedirs(root.models_path, exist_ok=True)
+    	wget("https://github.com/crowsonkb/simulacra-aesthetic-models/raw/master/models/"+model_name[args.clip_name], root.models_path)
+    aesthetics_model = AestheticMeanPredictionLinearModel(clip_size[args.clip_name])
+    aesthetics_model.load_state_dict(torch.load(os.path.join(root.models_path,model_name[args.clip_name])))
+    return aesthetics_model.to(root.device)

deforum-stable-diffusion/helpers/animation.py ADDED Viewed

	@@ -0,0 +1,338 @@

+import numpy as np
+import cv2
+from functools import reduce
+import math
+import py3d_tools as p3d
+import torch
+from einops import rearrange
+import re
+import pathlib
+import os
+import pandas as pd
+def check_is_number(value):
+    float_pattern = r'^(?=.)([+-]?([0-9]*)(\.([0-9]+))?)$'
+    return re.match(float_pattern, value)
+def sample_from_cv2(sample: np.ndarray) -> torch.Tensor:
+    sample = ((sample.astype(float) / 255.0) * 2) - 1
+    sample = sample[None].transpose(0, 3, 1, 2).astype(np.float16)
+    sample = torch.from_numpy(sample)
+    return sample
+def sample_to_cv2(sample: torch.Tensor, type=np.uint8) -> np.ndarray:
+    sample_f32 = rearrange(sample.squeeze().cpu().numpy(), "c h w -> h w c").astype(np.float32)
+    sample_f32 = ((sample_f32 * 0.5) + 0.5).clip(0, 1)
+    sample_int8 = (sample_f32 * 255)
+    return sample_int8.astype(type)
+def construct_RotationMatrixHomogenous(rotation_angles):
+    assert(type(rotation_angles)==list and len(rotation_angles)==3)
+    RH = np.eye(4,4)
+    cv2.Rodrigues(np.array(rotation_angles), RH[0:3, 0:3])
+    return RH
+def vid2frames(video_path, frames_path, n=1, overwrite=True):
+    if not os.path.exists(frames_path) or overwrite:
+        try:
+            for f in pathlib.Path(frames_path).glob('*.jpg'):
+                f.unlink()
+        except:
+            pass
+        assert os.path.exists(video_path), f"Video input {video_path} does not exist"
+        vidcap = cv2.VideoCapture(video_path)
+        success,image = vidcap.read()
+        count = 0
+        t=1
+        success = True
+        while success:
+            if count % n == 0:
+                cv2.imwrite(frames_path + os.path.sep + f"{t:05}.jpg" , image)     # save frame as JPEG file
+                t += 1
+            success,image = vidcap.read()
+            count += 1
+        print("Converted %d frames" % count)
+    else: print("Frames already unpacked")
+# https://en.wikipedia.org/wiki/Rotation_matrix
+def getRotationMatrixManual(rotation_angles):
+    rotation_angles = [np.deg2rad(x) for x in rotation_angles]
+    phi         = rotation_angles[0] # around x
+    gamma       = rotation_angles[1] # around y
+    theta       = rotation_angles[2] # around z
+    # X rotation
+    Rphi        = np.eye(4,4)
+    sp          = np.sin(phi)
+    cp          = np.cos(phi)
+    Rphi[1,1]   = cp
+    Rphi[2,2]   = Rphi[1,1]
+    Rphi[1,2]   = -sp
+    Rphi[2,1]   = sp
+    # Y rotation
+    Rgamma        = np.eye(4,4)
+    sg            = np.sin(gamma)
+    cg            = np.cos(gamma)
+    Rgamma[0,0]   = cg
+    Rgamma[2,2]   = Rgamma[0,0]
+    Rgamma[0,2]   = sg
+    Rgamma[2,0]   = -sg
+    # Z rotation (in-image-plane)
+    Rtheta      = np.eye(4,4)
+    st          = np.sin(theta)
+    ct          = np.cos(theta)
+    Rtheta[0,0] = ct
+    Rtheta[1,1] = Rtheta[0,0]
+    Rtheta[0,1] = -st
+    Rtheta[1,0] = st
+    R           = reduce(lambda x,y : np.matmul(x,y), [Rphi, Rgamma, Rtheta])
+    return R
+def getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sidelength):
+    ptsIn2D      =  ptsIn[0,:]
+    ptsOut2D     =  ptsOut[0,:]
+    ptsOut2Dlist =  []
+    ptsIn2Dlist  =  []
+    for i in range(0,4):
+        ptsOut2Dlist.append([ptsOut2D[i,0], ptsOut2D[i,1]])
+        ptsIn2Dlist.append([ptsIn2D[i,0], ptsIn2D[i,1]])
+    pin  =  np.array(ptsIn2Dlist)   +  [W/2.,H/2.]
+    pout = (np.array(ptsOut2Dlist)  +  [1.,1.]) * (0.5*sidelength)
+    pin  = pin.astype(np.float32)
+    pout = pout.astype(np.float32)
+    return pin, pout
+def warpMatrix(W, H, theta, phi, gamma, scale, fV):
+    # M is to be estimated
+    M          = np.eye(4, 4)
+    fVhalf     = np.deg2rad(fV/2.)
+    d          = np.sqrt(W*W+H*H)
+    sideLength = scale*d/np.cos(fVhalf)
+    h          = d/(2.0*np.sin(fVhalf))
+    n          = h-(d/2.0)
+    f          = h+(d/2.0)
+    # Translation along Z-axis by -h
+    T       = np.eye(4,4)
+    T[2,3]  = -h
+    # Rotation matrices around x,y,z
+    R = getRotationMatrixManual([phi, gamma, theta])
+    # Projection Matrix
+    P       = np.eye(4,4)
+    P[0,0]  = 1.0/np.tan(fVhalf)
+    P[1,1]  = P[0,0]
+    P[2,2]  = -(f+n)/(f-n)
+    P[2,3]  = -(2.0*f*n)/(f-n)
+    P[3,2]  = -1.0
+    # pythonic matrix multiplication
+    F       = reduce(lambda x,y : np.matmul(x,y), [P, T, R])
+    # shape should be 1,4,3 for ptsIn and ptsOut since perspectiveTransform() expects data in this way.
+    # In C++, this can be achieved by Mat ptsIn(1,4,CV_64FC3);
+    ptsIn = np.array([[
+                 [-W/2., H/2., 0.],[ W/2., H/2., 0.],[ W/2.,-H/2., 0.],[-W/2.,-H/2., 0.]
+                 ]])
+    ptsOut  = np.array(np.zeros((ptsIn.shape), dtype=ptsIn.dtype))
+    ptsOut  = cv2.perspectiveTransform(ptsIn, F)
+    ptsInPt2f, ptsOutPt2f = getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sideLength)
+    # check float32 otherwise OpenCV throws an error
+    assert(ptsInPt2f.dtype  == np.float32)
+    assert(ptsOutPt2f.dtype == np.float32)
+    M33 = cv2.getPerspectiveTransform(ptsInPt2f,ptsOutPt2f)
+    return M33, sideLength
+def anim_frame_warp(prev, args, anim_args, keys, frame_idx, depth_model=None, depth=None, device='cuda'):
+    if isinstance(prev, np.ndarray):
+        prev_img_cv2 = prev
+    else:
+        prev_img_cv2 = sample_to_cv2(prev)
+    if anim_args.use_depth_warping:
+        if depth is None and depth_model is not None:
+            depth = depth_model.predict(prev_img_cv2, anim_args)
+    else:
+        depth = None
+    if anim_args.animation_mode == '2D':
+        prev_img = anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx)
+    else: # '3D'
+        prev_img = anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx)
+    return prev_img, depth
+def anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx):
+    angle = keys.angle_series[frame_idx]
+    zoom = keys.zoom_series[frame_idx]
+    translation_x = keys.translation_x_series[frame_idx]
+    translation_y = keys.translation_y_series[frame_idx]
+    center = (args.W // 2, args.H // 2)
+    trans_mat = np.float32([[1, 0, translation_x], [0, 1, translation_y]])
+    rot_mat = cv2.getRotationMatrix2D(center, angle, zoom)
+    trans_mat = np.vstack([trans_mat, [0,0,1]])
+    rot_mat = np.vstack([rot_mat, [0,0,1]])
+    if anim_args.flip_2d_perspective:
+        perspective_flip_theta = keys.perspective_flip_theta_series[frame_idx]
+        perspective_flip_phi = keys.perspective_flip_phi_series[frame_idx]
+        perspective_flip_gamma = keys.perspective_flip_gamma_series[frame_idx]
+        perspective_flip_fv = keys.perspective_flip_fv_series[frame_idx]
+        M,sl = warpMatrix(args.W, args.H, perspective_flip_theta, perspective_flip_phi, perspective_flip_gamma, 1., perspective_flip_fv);
+        post_trans_mat = np.float32([[1, 0, (args.W-sl)/2], [0, 1, (args.H-sl)/2]])
+        post_trans_mat = np.vstack([post_trans_mat, [0,0,1]])
+        bM = np.matmul(M, post_trans_mat)
+        xform = np.matmul(bM, rot_mat, trans_mat)
+    else:
+        xform = np.matmul(rot_mat, trans_mat)
+    return cv2.warpPerspective(
+        prev_img_cv2,
+        xform,
+        (prev_img_cv2.shape[1], prev_img_cv2.shape[0]),
+        borderMode=cv2.BORDER_WRAP if anim_args.border == 'wrap' else cv2.BORDER_REPLICATE
+    )
+def anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx):
+    TRANSLATION_SCALE = 1.0/200.0 # matches Disco
+    translate_xyz = [
+        -keys.translation_x_series[frame_idx] * TRANSLATION_SCALE,
+        keys.translation_y_series[frame_idx] * TRANSLATION_SCALE,
+        -keys.translation_z_series[frame_idx] * TRANSLATION_SCALE
+    ]
+    rotate_xyz = [
+        math.radians(keys.rotation_3d_x_series[frame_idx]),
+        math.radians(keys.rotation_3d_y_series[frame_idx]),
+        math.radians(keys.rotation_3d_z_series[frame_idx])
+    ]
+    rot_mat = p3d.euler_angles_to_matrix(torch.tensor(rotate_xyz, device=device), "XYZ").unsqueeze(0)
+    result = transform_image_3d(device, prev_img_cv2, depth, rot_mat, translate_xyz, anim_args)
+    torch.cuda.empty_cache()
+    return result
+def transform_image_3d(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args):
+    # adapted and optimized version of transform_image_3d from Disco Diffusion https://github.com/alembics/disco-diffusion
+    w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
+    aspect_ratio = float(w)/float(h)
+    near, far, fov_deg = anim_args.near_plane, anim_args.far_plane, anim_args.fov
+    persp_cam_old = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, device=device)
+    persp_cam_new = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
+    # range of [-1,1] is important to torch grid_sample's padding handling
+    y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
+    if depth_tensor is None:
+        z = torch.ones_like(x)
+    else:
+        z = torch.as_tensor(depth_tensor, dtype=torch.float32, device=device)
+    xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
+    xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
+    # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
+    identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
+    # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
+    coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
+    offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
+    image_tensor = rearrange(torch.from_numpy(prev_img_cv2.astype(np.float32)), 'h w c -> c h w').to(device)
+    new_image = torch.nn.functional.grid_sample(
+        image_tensor.add(1/512 - 0.0001).unsqueeze(0),
+        offset_coords_2d,
+        mode=anim_args.sampling_mode,
+        padding_mode=anim_args.padding_mode,
+        align_corners=False
+    )
+    # convert back to cv2 style numpy array
+    result = rearrange(
+        new_image.squeeze().clamp(0,255),
+        'c h w -> h w c'
+    ).cpu().numpy().astype(prev_img_cv2.dtype)
+    return result
+class DeformAnimKeys():
+    def __init__(self, anim_args):
+        self.angle_series = get_inbetweens(parse_key_frames(anim_args.angle), anim_args.max_frames)
+        self.zoom_series = get_inbetweens(parse_key_frames(anim_args.zoom), anim_args.max_frames)
+        self.translation_x_series = get_inbetweens(parse_key_frames(anim_args.translation_x), anim_args.max_frames)
+        self.translation_y_series = get_inbetweens(parse_key_frames(anim_args.translation_y), anim_args.max_frames)
+        self.translation_z_series = get_inbetweens(parse_key_frames(anim_args.translation_z), anim_args.max_frames)
+        self.rotation_3d_x_series = get_inbetweens(parse_key_frames(anim_args.rotation_3d_x), anim_args.max_frames)
+        self.rotation_3d_y_series = get_inbetweens(parse_key_frames(anim_args.rotation_3d_y), anim_args.max_frames)
+        self.rotation_3d_z_series = get_inbetweens(parse_key_frames(anim_args.rotation_3d_z), anim_args.max_frames)
+        self.perspective_flip_theta_series = get_inbetweens(parse_key_frames(anim_args.perspective_flip_theta), anim_args.max_frames)
+        self.perspective_flip_phi_series = get_inbetweens(parse_key_frames(anim_args.perspective_flip_phi), anim_args.max_frames)
+        self.perspective_flip_gamma_series = get_inbetweens(parse_key_frames(anim_args.perspective_flip_gamma), anim_args.max_frames)
+        self.perspective_flip_fv_series = get_inbetweens(parse_key_frames(anim_args.perspective_flip_fv), anim_args.max_frames)
+        self.noise_schedule_series = get_inbetweens(parse_key_frames(anim_args.noise_schedule), anim_args.max_frames)
+        self.strength_schedule_series = get_inbetweens(parse_key_frames(anim_args.strength_schedule), anim_args.max_frames)
+        self.contrast_schedule_series = get_inbetweens(parse_key_frames(anim_args.contrast_schedule), anim_args.max_frames)
+def get_inbetweens(key_frames, max_frames, integer=False, interp_method='Linear'):
+    import numexpr
+    key_frame_series = pd.Series([np.nan for a in range(max_frames)])
+    for i in range(0, max_frames):
+        if i in key_frames:
+            value = key_frames[i]
+            value_is_number = check_is_number(value)
+            # if it's only a number, leave the rest for the default interpolation
+            if value_is_number:
+                t = i
+                key_frame_series[i] = value
+        if not value_is_number:
+            t = i
+            key_frame_series[i] = numexpr.evaluate(value)
+    key_frame_series = key_frame_series.astype(float)
+    if interp_method == 'Cubic' and len(key_frames.items()) <= 3:
+        interp_method = 'Quadratic'
+    if interp_method == 'Quadratic' and len(key_frames.items()) <= 2:
+        interp_method = 'Linear'
+    key_frame_series[0] = key_frame_series[key_frame_series.first_valid_index()]
+    key_frame_series[max_frames-1] = key_frame_series[key_frame_series.last_valid_index()]
+    key_frame_series = key_frame_series.interpolate(method=interp_method.lower(), limit_direction='both')
+    if integer:
+        return key_frame_series.astype(int)
+    return key_frame_series
+def parse_key_frames(string, prompt_parser=None):
+    # because math functions (i.e. sin(t)) can utilize brackets
+    # it extracts the value in form of some stuff
+    # which has previously been enclosed with brackets and
+    # with a comma or end of line existing after the closing one
+    pattern = r'((?P<frame>[0-9]+):[\s]*\((?P<param>[\S\s]*?)\)([,][\s]?|[\s]?$))'
+    frames = dict()
+    for match_object in re.finditer(pattern, string):
+        frame = int(match_object.groupdict()['frame'])
+        param = match_object.groupdict()['param']
+        if prompt_parser:
+            frames[frame] = prompt_parser(param)
+        else:
+            frames[frame] = param
+    if frames == {} and len(string) != 0:
+        raise RuntimeError('Key Frame string not correctly formatted')
+    return frames

deforum-stable-diffusion/helpers/callback.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+import os
+import torchvision.transforms.functional as TF
+from torchvision.utils import make_grid
+import numpy as np
+from IPython import display
+#
+# Callback functions
+#
+class SamplerCallback(object):
+    # Creates the callback function to be passed into the samplers for each step
+    def __init__(self, args, root, mask=None, init_latent=None, sigmas=None, sampler=None,
+                  verbose=False):
+        self.model = root.model
+        self.device = root.device
+        self.sampler_name = args.sampler
+        self.dynamic_threshold = args.dynamic_threshold
+        self.static_threshold = args.static_threshold
+        self.mask = mask
+        self.init_latent = init_latent
+        self.sigmas = sigmas
+        self.sampler = sampler
+        self.verbose = verbose
+        self.batch_size = args.n_samples
+        self.save_sample_per_step = args.save_sample_per_step
+        self.show_sample_per_step = args.show_sample_per_step
+        self.paths_to_image_steps = [os.path.join( args.outdir, f"{args.timestring}_{index:02}_{args.seed}") for index in range(args.n_samples) ]
+        if self.save_sample_per_step:
+            for path in self.paths_to_image_steps:
+                os.makedirs(path, exist_ok=True)
+        self.step_index = 0
+        self.noise = None
+        if init_latent is not None:
+            self.noise = torch.randn_like(init_latent, device=self.device)
+        self.mask_schedule = None
+        if sigmas is not None and len(sigmas) > 0:
+            self.mask_schedule, _ = torch.sort(sigmas/torch.max(sigmas))
+        elif len(sigmas) == 0:
+            self.mask = None # no mask needed if no steps (usually happens because strength==1.0)
+        if self.sampler_name in ["plms","ddim"]:
+            if mask is not None:
+                assert sampler is not None, "Callback function for stable-diffusion samplers requires sampler variable"
+        if self.sampler_name in ["plms","ddim"]:
+            # Callback function formated for compvis latent diffusion samplers
+            self.callback = self.img_callback_
+        else:
+            # Default callback function uses k-diffusion sampler variables
+            self.callback = self.k_callback_
+        self.verbose_print = print if verbose else lambda *args, **kwargs: None
+    def display_images(self, images):
+        images = images.double().cpu().add(1).div(2).clamp(0, 1)
+        images = torch.tensor(np.array(images))
+        grid = make_grid(images, 4).cpu()
+        display.clear_output(wait=True)
+        display.display(TF.to_pil_image(grid))
+        return
+    def view_sample_step(self, latents, path_name_modifier=''):
+        if self.save_sample_per_step:
+            samples = self.model.decode_first_stage(latents)
+            fname = f'{path_name_modifier}_{self.step_index:05}.png'
+            for i, sample in enumerate(samples):
+                sample = sample.double().cpu().add(1).div(2).clamp(0, 1)
+                sample = torch.tensor(np.array(sample))
+                grid = make_grid(sample, 4).cpu()
+                TF.to_pil_image(grid).save(os.path.join(self.paths_to_image_steps[i], fname))
+        if self.show_sample_per_step:
+            samples = self.model.linear_decode(latents)
+            print(path_name_modifier)
+            self.display_images(samples)
+        return
+    # The callback function is applied to the image at each step
+    def dynamic_thresholding_(self, img, threshold):
+        # Dynamic thresholding from Imagen paper (May 2022)
+        s = np.percentile(np.abs(img.cpu()), threshold, axis=tuple(range(1,img.ndim)))
+        s = np.max(np.append(s,1.0))
+        torch.clamp_(img, -1*s, s)
+        torch.FloatTensor.div_(img, s)
+    # Callback for samplers in the k-diffusion repo, called thus:
+    #   callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+    def k_callback_(self, args_dict):
+        self.step_index = args_dict['i']
+        if self.dynamic_threshold is not None:
+            self.dynamic_thresholding_(args_dict['x'], self.dynamic_threshold)
+        if self.static_threshold is not None:
+            torch.clamp_(args_dict['x'], -1*self.static_threshold, self.static_threshold)
+        if self.mask is not None:
+            init_noise = self.init_latent + self.noise * args_dict['sigma']
+            is_masked = torch.logical_and(self.mask >= self.mask_schedule[args_dict['i']], self.mask != 0 )
+            new_img = init_noise * torch.where(is_masked,1,0) + args_dict['x'] * torch.where(is_masked,0,1)
+            args_dict['x'].copy_(new_img)
+        self.view_sample_step(args_dict['denoised'], "x0_pred")
+        self.view_sample_step(args_dict['x'], "x")
+    # Callback for Compvis samplers
+    # Function that is called on the image (img) and step (i) at each step
+    def img_callback_(self, img, pred_x0, i):
+        self.step_index = i
+        # Thresholding functions
+        if self.dynamic_threshold is not None:
+            self.dynamic_thresholding_(img, self.dynamic_threshold)
+        if self.static_threshold is not None:
+            torch.clamp_(img, -1*self.static_threshold, self.static_threshold)
+        if self.mask is not None:
+            i_inv = len(self.sigmas) - i - 1
+            init_noise = self.sampler.stochastic_encode(self.init_latent, torch.tensor([i_inv]*self.batch_size).to(self.device), noise=self.noise)
+            is_masked = torch.logical_and(self.mask >= self.mask_schedule[i], self.mask != 0 )
+            new_img = init_noise * torch.where(is_masked,1,0) + img * torch.where(is_masked,0,1)
+            img.copy_(new_img)
+        self.view_sample_step(pred_x0, "x0_pred")
+        self.view_sample_step(img, "x")

deforum-stable-diffusion/helpers/colors.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from skimage.exposure import match_histograms
+import cv2
+def maintain_colors(prev_img, color_match_sample, mode):
+    if mode == 'Match Frame 0 RGB':
+        return match_histograms(prev_img, color_match_sample, multichannel=True)
+    elif mode == 'Match Frame 0 HSV':
+        prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
+        color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
+        matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, multichannel=True)
+        return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
+    else: # Match Frame 0 LAB
+        prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
+        color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
+        matched_lab = match_histograms(prev_img_lab, color_match_lab, multichannel=True)
+        return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)

deforum-stable-diffusion/helpers/conditioning.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import clip
+from torchvision.transforms import Normalize as Normalize
+from torchvision.utils import make_grid
+import numpy as np
+from IPython import display
+from sklearn.cluster import KMeans
+import torchvision.transforms.functional as TF
+###
+# Loss functions
+###
+## CLIP -----------------------------------------
+class MakeCutouts(nn.Module):
+    def __init__(self, cut_size, cutn, cut_pow=1.):
+        super().__init__()
+        self.cut_size = cut_size
+        self.cutn = cutn
+        self.cut_pow = cut_pow
+    def forward(self, input):
+        sideY, sideX = input.shape[2:4]
+        max_size = min(sideX, sideY)
+        min_size = min(sideX, sideY, self.cut_size)
+        cutouts = []
+        for _ in range(self.cutn):
+            size = int(torch.rand([])**self.cut_pow * (max_size - min_size) + min_size)
+            offsetx = torch.randint(0, sideX - size + 1, ())
+            offsety = torch.randint(0, sideY - size + 1, ())
+            cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size]
+            cutouts.append(F.adaptive_avg_pool2d(cutout, self.cut_size))
+        return torch.cat(cutouts)
+def spherical_dist_loss(x, y):
+    x = F.normalize(x, dim=-1)
+    y = F.normalize(y, dim=-1)
+    return (x - y).norm(dim=-1).div(2).arcsin().pow(2).mul(2)
+def make_clip_loss_fn(root, args):
+    clip_size = root.clip_model.visual.input_resolution # for openslip: clip_model.visual.image_size
+    def parse_prompt(prompt):
+        if prompt.startswith('http://') or prompt.startswith('https://'):
+            vals = prompt.rsplit(':', 2)
+            vals = [vals[0] + ':' + vals[1], *vals[2:]]
+        else:
+            vals = prompt.rsplit(':', 1)
+        vals = vals + ['', '1'][len(vals):]
+        return vals[0], float(vals[1])
+    def parse_clip_prompts(clip_prompt):
+        target_embeds, weights = [], []
+        for prompt in clip_prompt:
+            txt, weight = parse_prompt(prompt)
+            target_embeds.append(root.clip_model.encode_text(clip.tokenize(txt).to(root.device)).float())
+            weights.append(weight)
+        target_embeds = torch.cat(target_embeds)
+        weights = torch.tensor(weights, device=root.device)
+        if weights.sum().abs() < 1e-3:
+            raise RuntimeError('Clip prompt weights must not sum to 0.')
+        weights /= weights.sum().abs()
+        return target_embeds, weights
+    normalize = Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
+                          std=[0.26862954, 0.26130258, 0.27577711])
+    make_cutouts = MakeCutouts(clip_size, args.cutn, args.cut_pow)
+    target_embeds, weights = parse_clip_prompts(args.clip_prompt)
+    def clip_loss_fn(x, sigma, **kwargs):
+        nonlocal target_embeds, weights, make_cutouts, normalize
+        clip_in = normalize(make_cutouts(x.add(1).div(2)))
+        image_embeds = root.clip_model.encode_image(clip_in).float()
+        dists = spherical_dist_loss(image_embeds[:, None], target_embeds[None])
+        dists = dists.view([args.cutn, 1, -1])
+        losses = dists.mul(weights).sum(2).mean(0)
+        return losses.sum()
+    return clip_loss_fn
+def make_aesthetics_loss_fn(root,args):
+    clip_size = root.clip_model.visual.input_resolution # for openslip: clip_model.visual.image_size
+    def aesthetics_cond_fn(x, sigma, **kwargs):
+        clip_in = F.interpolate(x, (clip_size, clip_size))
+        image_embeds = root.clip_model.encode_image(clip_in).float()
+        losses = (10 - root.aesthetics_model(image_embeds)[0])
+        return losses.sum()
+    return aesthetics_cond_fn
+## end CLIP -----------------------------------------
+# blue loss from @johnowhitaker's tutorial on Grokking Stable Diffusion
+def blue_loss_fn(x, sigma, **kwargs):
+  # How far are the blue channel values to 0.9:
+  error = torch.abs(x[:,-1, :, :] - 0.9).mean()
+  return error
+# MSE loss from init
+def make_mse_loss(target):
+    def mse_loss(x, sigma, **kwargs):
+        return (x - target).square().mean()
+    return mse_loss
+# MSE loss from init
+def exposure_loss(target):
+    def exposure_loss_fn(x, sigma, **kwargs):
+        error = torch.abs(x-target).mean()
+        return error
+    return exposure_loss_fn
+def mean_loss_fn(x, sigma, **kwargs):
+  error = torch.abs(x).mean()
+  return error
+def var_loss_fn(x, sigma, **kwargs):
+  error = x.var()
+  return error
+def get_color_palette(root, n_colors, target, verbose=False):
+    def display_color_palette(color_list):
+        # Expand to 64x64 grid of single color pixels
+        images = color_list.unsqueeze(2).repeat(1,1,64).unsqueeze(3).repeat(1,1,1,64)
+        images = images.double().cpu().add(1).div(2).clamp(0, 1)
+        images = torch.tensor(np.array(images))
+        grid = make_grid(images, 8).cpu()
+        display.display(TF.to_pil_image(grid))
+        return
+    # Create color palette
+    kmeans = KMeans(n_clusters=n_colors, random_state=0).fit(torch.flatten(target[0],1,2).T.cpu().numpy())
+    color_list = torch.Tensor(kmeans.cluster_centers_).to(root.device)
+    if verbose:
+        display_color_palette(color_list)
+    # Get ratio of each color class in the target image
+    color_indexes, color_counts = np.unique(kmeans.labels_, return_counts=True)
+    # color_list = color_list[color_indexes]
+    return color_list, color_counts
+def make_rgb_color_match_loss(root, target, n_colors, ignore_sat_weight=None, img_shape=None, device='cuda:0'):
+    """
+    target (tensor): Image sample (values from -1 to 1) to extract the color palette
+    n_colors (int): Number of colors in the color palette
+    ignore_sat_weight (None or number>0): Scale to ignore color saturation in color comparison
+    img_shape (None or (int, int)): shape (width, height) of sample that the conditioning gradient is applied to,
+                                    if None then calculate target color distribution during gradient calculation
+                                    rather than once at the beginning
+    """
+    assert n_colors > 0, "Must use at least one color with color match loss"
+    def adjust_saturation(sample, saturation_factor):
+        # as in torchvision.transforms.functional.adjust_saturation, but for tensors with values from -1,1
+        return blend(sample, TF.rgb_to_grayscale(sample), saturation_factor)
+    def blend(img1, img2, ratio):
+        return (ratio * img1 + (1.0 - ratio) * img2).clamp(-1, 1).to(img1.dtype)
+    def color_distance_distributions(n_colors, img_shape, color_list, color_counts, n_images=1):
+        # Get the target color distance distributions
+        # Ensure color counts total the amout of pixels in the image
+        n_pixels = img_shape[0]*img_shape[1]
+        color_counts = (color_counts * n_pixels / sum(color_counts)).astype(int)
+        # Make color distances for each color, sorted by distance
+        color_distributions = torch.zeros((n_colors, n_images, n_pixels), device=device)
+        for i_image in range(n_images):
+            for ic,color0 in enumerate(color_list):
+                i_dist = 0
+                for jc,color1 in enumerate(color_list):
+                    color_dist = torch.linalg.norm(color0 - color1)
+                    color_distributions[ic, i_image, i_dist:i_dist+color_counts[jc]] = color_dist
+                    i_dist += color_counts[jc]
+        color_distributions, _ = torch.sort(color_distributions,dim=2)
+        return color_distributions
+    color_list, color_counts = get_color_palette(root, n_colors, target)
+    color_distributions = None
+    if img_shape is not None:
+        color_distributions = color_distance_distributions(n_colors, img_shape, color_list, color_counts)
+    def rgb_color_ratio_loss(x, sigma, **kwargs):
+        nonlocal color_distributions
+        all_color_norm_distances = torch.ones(len(color_list), x.shape[0], x.shape[2], x.shape[3]).to(device) * 6.0 # distance to color won't be more than max norm1 distance between -1 and 1 in 3 color dimensions
+        for ic,color in enumerate(color_list):
+            # Make a tensor of entirely one color
+            color = color[None,:,None].repeat(1,1,x.shape[2]).unsqueeze(3).repeat(1,1,1,x.shape[3])
+            # Get the color distances
+            if ignore_sat_weight is None:
+                # Simple color distance
+                color_distances = torch.linalg.norm(x - color,  dim=1)
+            else:
+                # Color distance if the colors were saturated
+                # This is to make color comparison ignore shadows and highlights, for example
+                color_distances = torch.linalg.norm(adjust_saturation(x, ignore_sat_weight) - color,  dim=1)
+            all_color_norm_distances[ic] = color_distances
+        all_color_norm_distances = torch.flatten(all_color_norm_distances,start_dim=2)
+        if color_distributions is None:
+            color_distributions = color_distance_distributions(n_colors,
+                                                               (x.shape[2], x.shape[3]),
+                                                               color_list,
+                                                               color_counts,
+                                                               n_images=x.shape[0])
+        # Sort the color distances so we can compare them as if they were a cumulative distribution function
+        all_color_norm_distances, _ = torch.sort(all_color_norm_distances,dim=2)
+        color_norm_distribution_diff = all_color_norm_distances - color_distributions
+        return color_norm_distribution_diff.square().mean()
+    return rgb_color_ratio_loss
+###
+# Thresholding functions for grad
+###
+def threshold_by(threshold, threshold_type, clamp_schedule):
+  def dynamic_thresholding(vals, sigma):
+      # Dynamic thresholding from Imagen paper (May 2022)
+      s = np.percentile(np.abs(vals.cpu()), threshold, axis=tuple(range(1,vals.ndim)))
+      s = np.max(np.append(s,1.0))
+      vals = torch.clamp(vals, -1*s, s)
+      vals = torch.FloatTensor.div(vals, s)
+      return vals
+  def static_thresholding(vals, sigma):
+      vals = torch.clamp(vals, -1*threshold, threshold)
+      return vals
+  def mean_thresholding(vals, sigma): # Thresholding that appears in Jax and Disco
+      magnitude = vals.square().mean(axis=(1,2,3),keepdims=True).sqrt()
+      vals = vals * torch.where(magnitude > threshold, threshold / magnitude, 1.0)
+      return vals
+  def scheduling(vals, sigma):
+      clamp_val = clamp_schedule[sigma.item()]
+      magnitude = vals.square().mean().sqrt()
+      vals = vals * magnitude.clamp(max=clamp_val) / magnitude
+      #print(clamp_val)
+      return vals
+  if threshold_type == 'dynamic':
+      return dynamic_thresholding
+  elif threshold_type == 'static':
+      return static_thresholding
+  elif threshold_type == 'mean':
+      return mean_thresholding
+  elif threshold_type == 'schedule':
+      return scheduling
+  else:
+      raise Exception(f"Thresholding type {threshold_type} not supported")

deforum-stable-diffusion/helpers/depth.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import cv2
+import math
+import numpy as np
+import os
+import requests
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as TF
+from einops import rearrange, repeat
+from PIL import Image
+from infer import InferenceHelper
+from midas.dpt_depth import DPTDepthModel
+from midas.transforms import Resize, NormalizeImage, PrepareForNet
+def wget(url, outputdir):
+    filename = url.split("/")[-1]
+    ckpt_request = requests.get(url)
+    request_status = ckpt_request.status_code
+    # inform user of errors
+    if request_status == 403:
+        raise ConnectionRefusedError("You have not accepted the license for this model.")
+    elif request_status == 404:
+        raise ConnectionError("Could not make contact with server")
+    elif request_status != 200:
+        raise ConnectionError(f"Some other error has ocurred - response code: {request_status}")
+    # write to model path
+    with open(os.path.join(outputdir, filename), 'wb') as model_file:
+        model_file.write(ckpt_request.content)
+class DepthModel():
+    def __init__(self, device):
+        self.adabins_helper = None
+        self.depth_min = 1000
+        self.depth_max = -1000
+        self.device = device
+        self.midas_model = None
+        self.midas_transform = None
+    def load_adabins(self, models_path):
+        if not os.path.exists(os.path.join(models_path,'AdaBins_nyu.pt')):
+            print("Downloading AdaBins_nyu.pt...")
+            os.makedirs(models_path, exist_ok=True)
+            wget("https://cloudflare-ipfs.com/ipfs/Qmd2mMnDLWePKmgfS8m6ntAg4nhV5VkUyAydYBp8cWWeB7/AdaBins_nyu.pt", models_path)
+        self.adabins_helper = InferenceHelper(models_path, dataset='nyu', device=self.device)
+    def load_midas(self, models_path, half_precision=True):
+        if not os.path.exists(os.path.join(models_path, 'dpt_large-midas-2f21e586.pt')):
+            print("Downloading dpt_large-midas-2f21e586.pt...")
+            wget("https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt", models_path)
+        self.midas_model = DPTDepthModel(
+            path=os.path.join(models_path, "dpt_large-midas-2f21e586.pt"),
+            backbone="vitl16_384",
+            non_negative=True,
+        )
+        normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        self.midas_transform = T.Compose([
+            Resize(
+                384, 384,
+                resize_target=None,
+                keep_aspect_ratio=True,
+                ensure_multiple_of=32,
+                resize_method="minimal",
+                image_interpolation_method=cv2.INTER_CUBIC,
+            ),
+            normalization,
+            PrepareForNet()
+        ])
+        self.midas_model.eval()
+        if half_precision and self.device == torch.device("cuda"):
+            self.midas_model = self.midas_model.to(memory_format=torch.channels_last)
+            self.midas_model = self.midas_model.half()
+        self.midas_model.to(self.device)
+    def predict(self, prev_img_cv2, anim_args) -> torch.Tensor:
+        w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
+        # predict depth with AdaBins
+        use_adabins = anim_args.midas_weight < 1.0 and self.adabins_helper is not None
+        if use_adabins:
+            MAX_ADABINS_AREA = 500000
+            MIN_ADABINS_AREA = 448*448
+            # resize image if too large or too small
+            img_pil = Image.fromarray(cv2.cvtColor(prev_img_cv2.astype(np.uint8), cv2.COLOR_RGB2BGR))
+            image_pil_area = w*h
+            resized = True
+            if image_pil_area > MAX_ADABINS_AREA:
+                scale = math.sqrt(MAX_ADABINS_AREA) / math.sqrt(image_pil_area)
+                depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS) # LANCZOS is good for downsampling
+                print(f"  resized to {depth_input.width}x{depth_input.height}")
+            elif image_pil_area < MIN_ADABINS_AREA:
+                scale = math.sqrt(MIN_ADABINS_AREA) / math.sqrt(image_pil_area)
+                depth_input = img_pil.resize((int(w*scale), int(h*scale)), Image.BICUBIC)
+                print(f"  resized to {depth_input.width}x{depth_input.height}")
+            else:
+                depth_input = img_pil
+                resized = False
+            # predict depth and resize back to original dimensions
+            try:
+                with torch.no_grad():
+                    _, adabins_depth = self.adabins_helper.predict_pil(depth_input)
+                if resized:
+                    adabins_depth = TF.resize(
+                        torch.from_numpy(adabins_depth),
+                        torch.Size([h, w]),
+                        interpolation=TF.InterpolationMode.BICUBIC
+                    )
+                    adabins_depth = adabins_depth.cpu().numpy()
+                adabins_depth = adabins_depth.squeeze()
+            except:
+                print(f"  exception encountered, falling back to pure MiDaS")
+                use_adabins = False
+            torch.cuda.empty_cache()
+        if self.midas_model is not None:
+            # convert image from 0->255 uint8 to 0->1 float for feeding to MiDaS
+            img_midas = prev_img_cv2.astype(np.float32) / 255.0
+            img_midas_input = self.midas_transform({"image": img_midas})["image"]
+            # MiDaS depth estimation implementation
+            sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0)
+            if self.device == torch.device("cuda"):
+                sample = sample.to(memory_format=torch.channels_last)
+                sample = sample.half()
+            with torch.no_grad():
+                midas_depth = self.midas_model.forward(sample)
+            midas_depth = torch.nn.functional.interpolate(
+                midas_depth.unsqueeze(1),
+                size=img_midas.shape[:2],
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze()
+            midas_depth = midas_depth.cpu().numpy()
+            torch.cuda.empty_cache()
+            # MiDaS makes the near values greater, and the far values lesser. Let's reverse that and try to align with AdaBins a bit better.
+            midas_depth = np.subtract(50.0, midas_depth)
+            midas_depth = midas_depth / 19.0
+            # blend between MiDaS and AdaBins predictions
+            if use_adabins:
+                depth_map = midas_depth*anim_args.midas_weight + adabins_depth*(1.0-anim_args.midas_weight)
+            else:
+                depth_map = midas_depth
+            depth_map = np.expand_dims(depth_map, axis=0)
+            depth_tensor = torch.from_numpy(depth_map).squeeze().to(self.device)
+        else:
+            depth_tensor = torch.ones((h, w), device=self.device)
+        return depth_tensor
+    def save(self, filename: str, depth: torch.Tensor):
+        depth = depth.cpu().numpy()
+        if len(depth.shape) == 2:
+            depth = np.expand_dims(depth, axis=0)
+        self.depth_min = min(self.depth_min, depth.min())
+        self.depth_max = max(self.depth_max, depth.max())
+        print(f"  depth min:{depth.min()} max:{depth.max()}")
+        denom = max(1e-8, self.depth_max - self.depth_min)
+        temp = rearrange((depth - self.depth_min) / denom * 255, 'c h w -> h w c')
+        temp = repeat(temp, 'h w 1 -> h w c', c=3)
+        Image.fromarray(temp.astype(np.uint8)).save(filename)

deforum-stable-diffusion/helpers/generate.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import torch
+from PIL import Image
+import requests
+import numpy as np
+import torchvision.transforms.functional as TF
+from pytorch_lightning import seed_everything
+import os
+from ldm.models.diffusion.plms import PLMSSampler
+from ldm.models.diffusion.ddim import DDIMSampler
+from k_diffusion.external import CompVisDenoiser
+from torch import autocast
+from contextlib import nullcontext
+from einops import rearrange, repeat
+from .prompt import get_uc_and_c
+from .k_samplers import sampler_fn, make_inject_timing_fn
+from scipy.ndimage import gaussian_filter
+from .callback import SamplerCallback
+from .conditioning import exposure_loss, make_mse_loss, get_color_palette, make_clip_loss_fn
+from .conditioning import make_rgb_color_match_loss, blue_loss_fn, threshold_by, make_aesthetics_loss_fn, mean_loss_fn, var_loss_fn, exposure_loss
+from .model_wrap import CFGDenoiserWithGrad
+from .load_images import load_img, load_mask_latent, prepare_mask, prepare_overlay_mask
+def add_noise(sample: torch.Tensor, noise_amt: float) -> torch.Tensor:
+    return sample + torch.randn(sample.shape, device=sample.device) * noise_amt
+def generate(args, root, frame = 0, return_latent=False, return_sample=False, return_c=False):
+    seed_everything(args.seed)
+    os.makedirs(args.outdir, exist_ok=True)
+    sampler = PLMSSampler(root.model) if args.sampler == 'plms' else DDIMSampler(root.model)
+    model_wrap = CompVisDenoiser(root.model)
+    batch_size = args.n_samples
+    prompt = args.prompt
+    assert prompt is not None
+    data = [batch_size * [prompt]]
+    precision_scope = autocast if args.precision == "autocast" else nullcontext
+    init_latent = None
+    mask_image = None
+    init_image = None
+    if args.init_latent is not None:
+        init_latent = args.init_latent
+    elif args.init_sample is not None:
+        with precision_scope("cuda"):
+            init_latent = root.model.get_first_stage_encoding(root.model.encode_first_stage(args.init_sample))
+    elif args.use_init and args.init_image != None and args.init_image != '':
+        init_image, mask_image = load_img(args.init_image,
+                                          shape=(args.W, args.H),
+                                          use_alpha_as_mask=args.use_alpha_as_mask)
+        init_image = init_image.to(root.device)
+        init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
+        with precision_scope("cuda"):
+            init_latent = root.model.get_first_stage_encoding(root.model.encode_first_stage(init_image))  # move to latent space
+    if not args.use_init and args.strength > 0 and args.strength_0_no_init:
+        print("\nNo init image, but strength > 0. Strength has been auto set to 0, since use_init is False.")
+        print("If you want to force strength > 0 with no init, please set strength_0_no_init to False.\n")
+        args.strength = 0
+    # Mask functions
+    if args.use_mask:
+        assert args.mask_file is not None or mask_image is not None, "use_mask==True: An mask image is required for a mask. Please enter a mask_file or use an init image with an alpha channel"
+        assert args.use_init, "use_mask==True: use_init is required for a mask"
+        assert init_latent is not None, "use_mask==True: An latent init image is required for a mask"
+        mask = prepare_mask(args.mask_file if mask_image is None else mask_image,
+                            init_latent.shape,
+                            args.mask_contrast_adjust,
+                            args.mask_brightness_adjust,
+                            args.invert_mask)
+        if (torch.all(mask == 0) or torch.all(mask == 1)) and args.use_alpha_as_mask:
+            raise Warning("use_alpha_as_mask==True: Using the alpha channel from the init image as a mask, but the alpha channel is blank.")
+        mask = mask.to(root.device)
+        mask = repeat(mask, '1 ... -> b ...', b=batch_size)
+    else:
+        mask = None
+    assert not ( (args.use_mask and args.overlay_mask) and (args.init_sample is None and init_image is None)), "Need an init image when use_mask == True and overlay_mask == True"
+    # Init MSE loss image
+    init_mse_image = None
+    if args.init_mse_scale and args.init_mse_image != None and args.init_mse_image != '':
+        init_mse_image, mask_image = load_img(args.init_mse_image,
+                                          shape=(args.W, args.H),
+                                          use_alpha_as_mask=args.use_alpha_as_mask)
+        init_mse_image = init_mse_image.to(root.device)
+        init_mse_image = repeat(init_mse_image, '1 ... -> b ...', b=batch_size)
+    assert not ( args.init_mse_scale != 0 and (args.init_mse_image is None or args.init_mse_image == '') ), "Need an init image when init_mse_scale != 0"
+    t_enc = int((1.0-args.strength) * args.steps)
+    # Noise schedule for the k-diffusion samplers (used for masking)
+    k_sigmas = model_wrap.get_sigmas(args.steps)
+    args.clamp_schedule = dict(zip(k_sigmas.tolist(), np.linspace(args.clamp_start,args.clamp_stop,args.steps+1)))
+    k_sigmas = k_sigmas[len(k_sigmas)-t_enc-1:]
+    if args.sampler in ['plms','ddim']:
+        sampler.make_schedule(ddim_num_steps=args.steps, ddim_eta=args.ddim_eta, ddim_discretize='fill', verbose=False)
+    if args.colormatch_scale != 0:
+        assert args.colormatch_image is not None, "If using color match loss, colormatch_image is needed"
+        colormatch_image, _ = load_img(args.colormatch_image)
+        colormatch_image = colormatch_image.to('cpu')
+        del(_)
+    else:
+        colormatch_image = None
+    # Loss functions
+    if args.init_mse_scale != 0:
+        if args.decode_method == "linear":
+            mse_loss_fn = make_mse_loss(root.model.linear_decode(root.model.get_first_stage_encoding(root.model.encode_first_stage(init_mse_image.to(root.device)))))
+        else:
+            mse_loss_fn = make_mse_loss(init_mse_image)
+    else:
+        mse_loss_fn = None
+    if args.colormatch_scale != 0:
+        _,_ = get_color_palette(root, args.colormatch_n_colors, colormatch_image, verbose=True) # display target color palette outside the latent space
+        if args.decode_method == "linear":
+            grad_img_shape = (int(args.W/args.f), int(args.H/args.f))
+            colormatch_image = root.model.linear_decode(root.model.get_first_stage_encoding(root.model.encode_first_stage(colormatch_image.to(root.device))))
+            colormatch_image = colormatch_image.to('cpu')
+        else:
+            grad_img_shape = (args.W, args.H)
+        color_loss_fn = make_rgb_color_match_loss(root,
+                                                  colormatch_image,
+                                                  n_colors=args.colormatch_n_colors,
+                                                  img_shape=grad_img_shape,
+                                                  ignore_sat_weight=args.ignore_sat_weight)
+    else:
+        color_loss_fn = None
+    if args.clip_scale != 0:
+        clip_loss_fn = make_clip_loss_fn(root, args)
+    else:
+        clip_loss_fn = None
+    if args.aesthetics_scale != 0:
+        aesthetics_loss_fn = make_aesthetics_loss_fn(root, args)
+    else:
+        aesthetics_loss_fn = None
+    if args.exposure_scale != 0:
+        exposure_loss_fn = exposure_loss(args.exposure_target)
+    else:
+        exposure_loss_fn = None
+    loss_fns_scales = [
+        [clip_loss_fn,              args.clip_scale],
+        [blue_loss_fn,              args.blue_scale],
+        [mean_loss_fn,              args.mean_scale],
+        [exposure_loss_fn,          args.exposure_scale],
+        [var_loss_fn,               args.var_scale],
+        [mse_loss_fn,               args.init_mse_scale],
+        [color_loss_fn,             args.colormatch_scale],
+        [aesthetics_loss_fn,        args.aesthetics_scale]
+    ]
+    # Conditioning gradients not implemented for ddim or PLMS
+    assert not( any([cond_fs[1]!=0 for cond_fs in loss_fns_scales]) and (args.sampler in ["ddim","plms"]) ), "Conditioning gradients not implemented for ddim or plms. Please use a different sampler."
+    callback = SamplerCallback(args=args,
+                            root=root,
+                            mask=mask,
+                            init_latent=init_latent,
+                            sigmas=k_sigmas,
+                            sampler=sampler,
+                            verbose=False).callback
+    clamp_fn = threshold_by(threshold=args.clamp_grad_threshold, threshold_type=args.grad_threshold_type, clamp_schedule=args.clamp_schedule)
+    grad_inject_timing_fn = make_inject_timing_fn(args.grad_inject_timing, model_wrap, args.steps)
+    cfg_model = CFGDenoiserWithGrad(model_wrap,
+                                    loss_fns_scales,
+                                    clamp_fn,
+                                    args.gradient_wrt,
+                                    args.gradient_add_to,
+                                    args.cond_uncond_sync,
+                                    decode_method=args.decode_method,
+                                    grad_inject_timing_fn=grad_inject_timing_fn, # option to use grad in only a few of the steps
+                                    grad_consolidate_fn=None, # function to add grad to image fn(img, grad, sigma)
+                                    verbose=False)
+    results = []
+    with torch.no_grad():
+        with precision_scope("cuda"):
+            with root.model.ema_scope():
+                for prompts in data:
+                    if isinstance(prompts, tuple):
+                        prompts = list(prompts)
+                    if args.prompt_weighting:
+                        uc, c = get_uc_and_c(prompts, root.model, args, frame)
+                    else:
+                        uc = root.model.get_learned_conditioning(batch_size * [""])
+                        c = root.model.get_learned_conditioning(prompts)
+                    if args.scale == 1.0:
+                        uc = None
+                    if args.init_c != None:
+                        c = args.init_c
+                    if args.sampler in ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral", "dpm_fast", "dpm_adaptive", "dpmpp_2s_a", "dpmpp_2m"]:
+                        samples = sampler_fn(
+                            c=c,
+                            uc=uc,
+                            args=args,
+                            model_wrap=cfg_model,
+                            init_latent=init_latent,
+                            t_enc=t_enc,
+                            device=root.device,
+                            cb=callback,
+                            verbose=False)
+                    else:
+                        # args.sampler == 'plms' or args.sampler == 'ddim':
+                        if init_latent is not None and args.strength > 0:
+                            z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(root.device))
+                        else:
+                            z_enc = torch.randn([args.n_samples, args.C, args.H // args.f, args.W // args.f], device=root.device)
+                        if args.sampler == 'ddim':
+                            samples = sampler.decode(z_enc,
+                                                     c,
+                                                     t_enc,
+                                                     unconditional_guidance_scale=args.scale,
+                                                     unconditional_conditioning=uc,
+                                                     img_callback=callback)
+                        elif args.sampler == 'plms': # no "decode" function in plms, so use "sample"
+                            shape = [args.C, args.H // args.f, args.W // args.f]
+                            samples, _ = sampler.sample(S=args.steps,
+                                                            conditioning=c,
+                                                            batch_size=args.n_samples,
+                                                            shape=shape,
+                                                            verbose=False,
+                                                            unconditional_guidance_scale=args.scale,
+                                                            unconditional_conditioning=uc,
+                                                            eta=args.ddim_eta,
+                                                            x_T=z_enc,
+                                                            img_callback=callback)
+                        else:
+                            raise Exception(f"Sampler {args.sampler} not recognised.")
+                    if return_latent:
+                        results.append(samples.clone())
+                    x_samples = root.model.decode_first_stage(samples)
+                    if args.use_mask and args.overlay_mask:
+                        # Overlay the masked image after the image is generated
+                        if args.init_sample_raw is not None:
+                            img_original = args.init_sample_raw
+                        elif init_image is not None:
+                            img_original = init_image
+                        else:
+                            raise Exception("Cannot overlay the masked image without an init image to overlay")
+                        if args.mask_sample is None:
+                            args.mask_sample = prepare_overlay_mask(args, root, img_original.shape)
+                        x_samples = img_original * args.mask_sample + x_samples * ((args.mask_sample * -1.0) + 1)
+                    if return_sample:
+                        results.append(x_samples.clone())
+                    x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
+                    if return_c:
+                        results.append(c.clone())
+                    for x_sample in x_samples:
+                        x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
+                        image = Image.fromarray(x_sample.astype(np.uint8))
+                        results.append(image)
+    return results

deforum-stable-diffusion/helpers/k_samplers.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from typing import Any, Callable, Optional
+from k_diffusion.external import CompVisDenoiser
+from k_diffusion import sampling
+import torch
+def sampler_fn(
+        c: torch.Tensor,
+        uc: torch.Tensor,
+        args,
+        model_wrap: CompVisDenoiser,
+        init_latent: Optional[torch.Tensor] = None,
+        t_enc: Optional[torch.Tensor] = None,
+        device=torch.device("cpu")
+        if not torch.cuda.is_available()
+        else torch.device("cuda"),
+        cb: Callable[[Any], None] = None,
+        verbose: Optional[bool] = False,
+) -> torch.Tensor:
+    shape = [args.C, args.H // args.f, args.W // args.f]
+    sigmas: torch.Tensor = model_wrap.get_sigmas(args.steps)
+    sigmas = sigmas[len(sigmas) - t_enc - 1 :]
+    if args.use_init:
+        if len(sigmas) > 0:
+            x = (
+                    init_latent
+                    + torch.randn([args.n_samples, *shape], device=device) * sigmas[0]
+            )
+        else:
+            x = init_latent
+    else:
+        if len(sigmas) > 0:
+            x = torch.randn([args.n_samples, *shape], device=device) * sigmas[0]
+        else:
+            x = torch.zeros([args.n_samples, *shape], device=device)
+    sampler_args = {
+        "model": model_wrap,
+        "x": x,
+        "sigmas": sigmas,
+        "extra_args": {"cond": c, "uncond": uc, "cond_scale": args.scale},
+        "disable": False,
+        "callback": cb,
+    }
+    min = sigmas[0].item()
+    max = min
+    for i in sigmas:
+        if i.item() < min and i.item() != 0.0:
+            min = i.item()
+    if args.sampler in ["dpm_fast"]:
+        sampler_args = {
+            "model": model_wrap,
+            "x": x,
+            "sigma_min": min,
+            "sigma_max": max,
+            "extra_args": {"cond": c, "uncond": uc, "cond_scale": args.scale},
+            "disable": False,
+            "callback": cb,
+            "n":args.steps,
+        }
+    elif args.sampler in ["dpm_adaptive"]:
+        sampler_args = {
+            "model": model_wrap,
+            "x": x,
+            "sigma_min": min,
+            "sigma_max": max,
+            "extra_args": {"cond": c, "uncond": uc, "cond_scale": args.scale},
+            "disable": False,
+            "callback": cb,
+        }
+    sampler_map = {
+        "klms": sampling.sample_lms,
+        "dpm2": sampling.sample_dpm_2,
+        "dpm2_ancestral": sampling.sample_dpm_2_ancestral,
+        "heun": sampling.sample_heun,
+        "euler": sampling.sample_euler,
+        "euler_ancestral": sampling.sample_euler_ancestral,
+        "dpm_fast": sampling.sample_dpm_fast,
+        "dpm_adaptive": sampling.sample_dpm_adaptive,
+        "dpmpp_2s_a": sampling.sample_dpmpp_2s_ancestral,
+        "dpmpp_2m": sampling.sample_dpmpp_2m,
+    }
+    samples = sampler_map[args.sampler](**sampler_args)
+    return samples
+def make_inject_timing_fn(inject_timing, model, steps):
+    """
+    inject_timing (int or list of ints or list of floats between 0.0 and 1.0):
+        int: compute every inject_timing steps
+        list of floats: compute on these decimal fraction steps (eg, [0.5, 1.0] for 50 steps would be at steps 25 and 50)
+        list of ints: compute on these steps
+    model (CompVisDenoiser)
+    steps (int): number of steps
+    """
+    all_sigmas = model.get_sigmas(steps)
+    target_sigmas = torch.empty([0], device=all_sigmas.device)
+    def timing_fn(sigma):
+        is_conditioning_step = False
+        if sigma in target_sigmas:
+            is_conditioning_step = True
+        return is_conditioning_step
+    if inject_timing is None:
+        timing_fn = lambda sigma: True
+    elif isinstance(inject_timing,int) and inject_timing <= steps and inject_timing > 0:
+        # Compute every nth step
+        target_sigma_list = [sigma for i,sigma in enumerate(all_sigmas) if (i+1) % inject_timing == 0]
+        target_sigmas = torch.Tensor(target_sigma_list).to(all_sigmas.device)
+    elif all(isinstance(t,float) for t in inject_timing) and all(t>=0.0 and t<=1.0 for t in inject_timing):
+        # Compute on these steps (expressed as a decimal fraction between 0.0 and 1.0)
+        target_indices = [int(frac_step*steps) if frac_step < 1.0 else steps-1 for frac_step in inject_timing]
+        target_sigma_list = [sigma for i,sigma in enumerate(all_sigmas) if i in target_indices]
+        target_sigmas = torch.Tensor(target_sigma_list).to(all_sigmas.device)
+    elif all(isinstance(t,int) for t in inject_timing) and all(t>0 and t<=steps for t in inject_timing):
+        # Compute on these steps
+        target_sigma_list = [sigma for i,sigma in enumerate(all_sigmas) if i+1 in inject_timing]
+        target_sigmas = torch.Tensor(target_sigma_list).to(all_sigmas.device)
+    else:
+        raise Exception(f"Not a valid input: inject_timing={inject_timing}\n" +
+                        f"Must be an int, list of all ints (between step 1 and {steps}), or list of all floats between 0.0 and 1.0")
+    return timing_fn

deforum-stable-diffusion/helpers/load_images.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import torch
+import requests
+from PIL import Image
+import numpy as np
+import torchvision.transforms.functional as TF
+from einops import repeat
+from scipy.ndimage import gaussian_filter
+def load_img(path, shape=None, use_alpha_as_mask=False):
+    # use_alpha_as_mask: Read the alpha channel of the image as the mask image
+    if path.startswith('http://') or path.startswith('https://'):
+        image = Image.open(requests.get(path, stream=True).raw)
+    else:
+        image = Image.open(path)
+    if use_alpha_as_mask:
+        image = image.convert('RGBA')
+    else:
+        image = image.convert('RGB')
+    if shape is not None:
+        image = image.resize(shape, resample=Image.LANCZOS)
+    mask_image = None
+    if use_alpha_as_mask:
+        # Split alpha channel into a mask_image
+        red, green, blue, alpha = Image.Image.split(image)
+        mask_image = alpha.convert('L')
+        image = image.convert('RGB')
+    image = np.array(image).astype(np.float16) / 255.0
+    image = image[None].transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    image = 2.*image - 1.
+    return image, mask_image
+def load_mask_latent(mask_input, shape):
+    # mask_input (str or PIL Image.Image): Path to the mask image or a PIL Image object
+    # shape (list-like len(4)): shape of the image to match, usually latent_image.shape
+    if isinstance(mask_input, str): # mask input is probably a file name
+        if mask_input.startswith('http://') or mask_input.startswith('https://'):
+            mask_image = Image.open(requests.get(mask_input, stream=True).raw).convert('RGBA')
+        else:
+            mask_image = Image.open(mask_input).convert('RGBA')
+    elif isinstance(mask_input, Image.Image):
+        mask_image = mask_input
+    else:
+        raise Exception("mask_input must be a PIL image or a file name")
+    mask_w_h = (shape[-1], shape[-2])
+    mask = mask_image.resize(mask_w_h, resample=Image.LANCZOS)
+    mask = mask.convert("L")
+    return mask
+def prepare_mask(mask_input, mask_shape, mask_brightness_adjust=1.0, mask_contrast_adjust=1.0, invert_mask=False):
+    # mask_input (str or PIL Image.Image): Path to the mask image or a PIL Image object
+    # shape (list-like len(4)): shape of the image to match, usually latent_image.shape
+    # mask_brightness_adjust (non-negative float): amount to adjust brightness of the iamge,
+    #     0 is black, 1 is no adjustment, >1 is brighter
+    # mask_contrast_adjust (non-negative float): amount to adjust contrast of the image,
+    #     0 is a flat grey image, 1 is no adjustment, >1 is more contrast
+    mask = load_mask_latent(mask_input, mask_shape)
+    # Mask brightness/contrast adjustments
+    if mask_brightness_adjust != 1:
+        mask = TF.adjust_brightness(mask, mask_brightness_adjust)
+    if mask_contrast_adjust != 1:
+        mask = TF.adjust_contrast(mask, mask_contrast_adjust)
+    # Mask image to array
+    mask = np.array(mask).astype(np.float32) / 255.0
+    mask = np.tile(mask,(4,1,1))
+    mask = np.expand_dims(mask,axis=0)
+    mask = torch.from_numpy(mask)
+    if invert_mask:
+        mask = ( (mask - 0.5) * -1) + 0.5
+    mask = np.clip(mask,0,1)
+    return mask
+def prepare_overlay_mask(args, root, mask_shape):
+    mask_fullres = prepare_mask(args.mask_file,
+                                mask_shape,
+                                args.mask_contrast_adjust,
+                                args.mask_brightness_adjust,
+                                args.invert_mask)
+    mask_fullres = mask_fullres[:,:3,:,:]
+    mask_fullres = repeat(mask_fullres, '1 ... -> b ...', b=args.n_samples)
+    mask_fullres[mask_fullres < mask_fullres.max()] = 0
+    mask_fullres = gaussian_filter(mask_fullres, args.mask_overlay_blur)
+    mask_fullres = torch.Tensor(mask_fullres).to(root.device)
+    return mask_fullres

deforum-stable-diffusion/helpers/model_load.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import os
+import torch
+# Decodes the image without passing through the upscaler. The resulting image will be the same size as the latent
+# Thanks to Kevin Turner (https://github.com/keturn) we have a shortcut to look at the decoded image!
+def make_linear_decode(model_version, device='cuda:0'):
+    v1_4_rgb_latent_factors = [
+        #   R       G       B
+        [ 0.298,  0.207,  0.208],  # L1
+        [ 0.187,  0.286,  0.173],  # L2
+        [-0.158,  0.189,  0.264],  # L3
+        [-0.184, -0.271, -0.473],  # L4
+    ]
+    if model_version[:5] == "sd-v1":
+        rgb_latent_factors = torch.Tensor(v1_4_rgb_latent_factors).to(device)
+    else:
+        raise Exception(f"Model name {model_version} not recognized.")
+    def linear_decode(latent):
+        latent_image = latent.permute(0, 2, 3, 1) @ rgb_latent_factors
+        latent_image = latent_image.permute(0, 3, 1, 2)
+        return latent_image
+    return linear_decode
+def load_model(root, load_on_run_all=True, check_sha256=True):
+    import requests
+    import torch
+    from ldm.util import instantiate_from_config
+    from omegaconf import OmegaConf
+    from transformers import logging
+    logging.set_verbosity_error()
+    try:
+        ipy = get_ipython()
+    except:
+        ipy = 'could not get_ipython'
+    if 'google.colab' in str(ipy):
+        path_extend = "deforum-stable-diffusion"
+    else:
+        path_extend = ""
+    model_map = {
+        "512-base-ema.ckpt": {
+            'sha256': 'd635794c1fedfdfa261e065370bea59c651fc9bfa65dc6d67ad29e11869a1824',
+            'url': 'https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt',
+            'requires_login': True,
+            },
+        "v1-5-pruned.ckpt": {
+            'sha256': 'e1441589a6f3c5a53f5f54d0975a18a7feb7cdf0b0dee276dfc3331ae376a053',
+            'url': 'https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned.ckpt',
+            'requires_login': True,
+            },
+        "v1-5-pruned-emaonly.ckpt": {
+            'sha256': 'cc6cb27103417325ff94f52b7a5d2dde45a7515b25c255d8e396c90014281516',
+            'url': 'https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-4-full-ema.ckpt": {
+            'sha256': '14749efc0ae8ef0329391ad4436feb781b402f4fece4883c7ad8d10556d8a36a',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-2-original/blob/main/sd-v1-4-full-ema.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-4.ckpt": {
+            'sha256': 'fe4efff1e174c627256e44ec2991ba279b3816e364b49f9be2abc0b3ff3f8556',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-3-full-ema.ckpt": {
+            'sha256': '54632c6e8a36eecae65e36cb0595fab314e1a1545a65209f24fde221a8d4b2ca',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-3-original/blob/main/sd-v1-3-full-ema.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-3.ckpt": {
+            'sha256': '2cff93af4dcc07c3e03110205988ff98481e86539c51a8098d4f2236e41f7f2f',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-3-original/resolve/main/sd-v1-3.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-2-full-ema.ckpt": {
+            'sha256': 'bc5086a904d7b9d13d2a7bccf38f089824755be7261c7399d92e555e1e9ac69a',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-2-original/blob/main/sd-v1-2-full-ema.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-2.ckpt": {
+            'sha256': '3b87d30facd5bafca1cbed71cfb86648aad75d1c264663c0cc78c7aea8daec0d',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-2-original/resolve/main/sd-v1-2.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-1-full-ema.ckpt": {
+            'sha256': 'efdeb5dc418a025d9a8cc0a8617e106c69044bc2925abecc8a254b2910d69829',
+            'url':'https://huggingface.co/CompVis/stable-diffusion-v-1-1-original/resolve/main/sd-v1-1-full-ema.ckpt',
+            'requires_login': True,
+            },
+        "sd-v1-1.ckpt": {
+            'sha256': '86cd1d3ccb044d7ba8db743d717c9bac603c4043508ad2571383f954390f3cea',
+            'url': 'https://huggingface.co/CompVis/stable-diffusion-v-1-1-original/resolve/main/sd-v1-1.ckpt',
+            'requires_login': True,
+            },
+        "robo-diffusion-v1.ckpt": {
+            'sha256': '244dbe0dcb55c761bde9c2ac0e9b46cc9705ebfe5f1f3a7cc46251573ea14e16',
+            'url': 'https://huggingface.co/nousr/robo-diffusion/resolve/main/models/robo-diffusion-v1.ckpt',
+            'requires_login': False,
+            },
+        "wd-v1-3-float16.ckpt": {
+            'sha256': '4afab9126057859b34d13d6207d90221d0b017b7580469ea70cee37757a29edd',
+            'url': 'https://huggingface.co/hakurei/waifu-diffusion-v1-3/resolve/main/wd-v1-3-float16.ckpt',
+            'requires_login': False,
+            },
+    }
+    # config path
+    ckpt_config_path = root.custom_config_path if root.model_config == "custom" else os.path.join(root.configs_path, root.model_config)
+    if os.path.exists(ckpt_config_path):
+        print(f"{ckpt_config_path} exists")
+    else:
+        print(f"Warning: {ckpt_config_path} does not exist.")
+        ckpt_config_path = os.path.join(path_extend,"configs",root.model_config)
+        print(f"Using {ckpt_config_path} instead.")
+    ckpt_config_path = os.path.abspath(ckpt_config_path)
+    # checkpoint path or download
+    ckpt_path = root.custom_checkpoint_path if root.model_checkpoint == "custom" else os.path.join(root.models_path, root.model_checkpoint)
+    ckpt_valid = True
+    if os.path.exists(ckpt_path):
+        pass
+    elif 'url' in model_map[root.model_checkpoint]:
+        url = model_map[root.model_checkpoint]['url']
+        # CLI dialogue to authenticate download
+        if model_map[root.model_checkpoint]['requires_login']:
+            print("This model requires an authentication token")
+            print("Please ensure you have accepted the terms of service before continuing.")
+            username = input("[What is your huggingface username?]: ")
+            token = input("[What is your huggingface token?]: ")
+            _, path = url.split("https://")
+            url = f"https://{username}:{token}@{path}"
+        # contact server for model
+        print(f"..attempting to download {root.model_checkpoint}...this may take a while")
+        ckpt_request = requests.get(url)
+        request_status = ckpt_request.status_code
+        # inform user of errors
+        if request_status == 403:
+            raise ConnectionRefusedError("You have not accepted the license for this model.")
+        elif request_status == 404:
+            raise ConnectionError("Could not make contact with server")
+        elif request_status != 200:
+            raise ConnectionError(f"Some other error has ocurred - response code: {request_status}")
+        # write to model path
+        with open(os.path.join(root.models_path, root.model_checkpoint), 'wb') as model_file:
+            model_file.write(ckpt_request.content)
+    else:
+        print(f"Please download model checkpoint and place in {os.path.join(root.models_path, root.model_checkpoint)}")
+        ckpt_valid = False
+    print(f"config_path: {ckpt_config_path}")
+    print(f"ckpt_path: {ckpt_path}")
+    if check_sha256 and root.model_checkpoint != "custom" and ckpt_valid:
+        try:
+            import hashlib
+            print("..checking sha256")
+            with open(ckpt_path, "rb") as f:
+                bytes = f.read()
+                hash = hashlib.sha256(bytes).hexdigest()
+                del bytes
+            if model_map[root.model_checkpoint]["sha256"] == hash:
+                print("..hash is correct")
+            else:
+                print("..hash in not correct")
+                ckpt_valid = False
+        except:
+            print("..could not verify model integrity")
+    def load_model_from_config(config, ckpt, verbose=False, device='cuda', half_precision=True,print_flag=False):
+        map_location = "cuda" # ["cpu", "cuda"]
+        print(f"..loading model")
+        pl_sd = torch.load(ckpt, map_location=map_location)
+        if "global_step" in pl_sd:
+            if print_flag:
+                print(f"Global Step: {pl_sd['global_step']}")
+        sd = pl_sd["state_dict"]
+        model = instantiate_from_config(config.model)
+        m, u = model.load_state_dict(sd, strict=False)
+        if print_flag:
+            if len(m) > 0 and verbose:
+                print("missing keys:")
+                print(m)
+            if len(u) > 0 and verbose:
+                print("unexpected keys:")
+                print(u)
+        if half_precision:
+            model = model.half().to(device)
+        else:
+            model = model.to(device)
+        model.eval()
+        return model
+    if load_on_run_all and ckpt_valid:
+        local_config = OmegaConf.load(f"{ckpt_config_path}")
+        model = load_model_from_config(local_config, f"{ckpt_path}", half_precision=root.half_precision)
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        model = model.to(device)
+    autoencoder_version = "sd-v1" #TODO this will be different for different models
+    model.linear_decode = make_linear_decode(autoencoder_version, device)
+    return model, device
+def get_model_output_paths(root):
+    models_path = root.models_path
+    output_path = root.output_path
+    #@markdown **Google Drive Path Variables (Optional)**
+    force_remount = False
+    try:
+        ipy = get_ipython()
+    except:
+        ipy = 'could not get_ipython'
+    if 'google.colab' in str(ipy):
+        if root.mount_google_drive:
+            from google.colab import drive # type: ignore
+            try:
+                drive_path = "/content/drive"
+                drive.mount(drive_path,force_remount=force_remount)
+                models_path = root.models_path_gdrive
+                output_path = root.output_path_gdrive
+            except:
+                print("..error mounting drive or with drive path variables")
+                print("..reverting to default path variables")
+    models_path = os.path.abspath(models_path)
+    output_path = os.path.abspath(output_path)
+    os.makedirs(models_path, exist_ok=True)
+    os.makedirs(output_path, exist_ok=True)
+    print(f"models_path: {models_path}")
+    print(f"output_path: {output_path}")
+    return models_path, output_path

deforum-stable-diffusion/helpers/model_wrap.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from torch import nn
+from k_diffusion import utils as k_utils
+import torch
+from k_diffusion.external import CompVisDenoiser
+from torchvision.utils import make_grid
+from IPython import display
+from torchvision.transforms.functional import to_pil_image
+class CFGDenoiser(nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.inner_model = model
+    def forward(self, x, sigma, uncond, cond, cond_scale):
+        x_in = torch.cat([x] * 2)
+        sigma_in = torch.cat([sigma] * 2)
+        cond_in = torch.cat([uncond, cond])
+        uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
+        return uncond + (cond - uncond) * cond_scale
+class CFGDenoiserWithGrad(CompVisDenoiser):
+    def __init__(self, model,
+                       loss_fns_scales, # List of [cond_function, scale] pairs
+                       clamp_func=None,  # Gradient clamping function, clamp_func(grad, sigma)
+                       gradient_wrt=None, # Calculate gradient with respect to ["x", "x0_pred", "both"]
+                       gradient_add_to=None, # Add gradient to ["cond", "uncond", "both"]
+                       cond_uncond_sync=True, # Calculates the cond and uncond simultaneously
+                       decode_method=None, # Function used to decode the latent during gradient calculation
+                       grad_inject_timing_fn=None, # Option to use grad in only a few of the steps
+                       grad_consolidate_fn=None, # Function to add grad to image fn(img, grad, sigma)
+                       verbose=False):
+        super().__init__(model.inner_model)
+        self.inner_model = model
+        self.cond_uncond_sync = cond_uncond_sync
+        # Initialize gradient calculation variables
+        self.clamp_func = clamp_func
+        self.gradient_add_to = gradient_add_to
+        if gradient_wrt is None:
+            self.gradient_wrt = 'x'
+        self.gradient_wrt = gradient_wrt
+        if decode_method is None:
+            decode_fn = lambda x: x
+        elif decode_method == "autoencoder":
+            decode_fn = model.inner_model.differentiable_decode_first_stage
+        elif decode_method == "linear":
+            decode_fn = model.inner_model.linear_decode
+        self.decode_fn = decode_fn
+        # Parse loss function-scale pairs
+        cond_fns = []
+        for loss_fn,scale in loss_fns_scales:
+            if scale != 0:
+                cond_fn = self.make_cond_fn(loss_fn, scale)
+            else:
+                cond_fn = None
+            cond_fns += [cond_fn]
+        self.cond_fns = cond_fns
+        if grad_inject_timing_fn is None:
+            self.grad_inject_timing_fn = lambda sigma: True
+        else:
+            self.grad_inject_timing_fn = grad_inject_timing_fn
+        if grad_consolidate_fn is None:
+            self.grad_consolidate_fn = lambda img, grad, sigma: img + grad * sigma
+        else:
+            self.grad_consolidate_fn = grad_consolidate_fn
+        self.verbose = verbose
+        self.verbose_print = print if self.verbose else lambda *args, **kwargs: None
+    # General denoising model with gradient conditioning
+    def cond_model_fn_(self, x, sigma, inner_model=None, **kwargs):
+        # inner_model: optionally use a different inner_model function or a wrapper function around inner_model, see self.forward._cfg_model
+        if inner_model is None:
+            inner_model = self.inner_model
+        total_cond_grad = torch.zeros_like(x)
+        for cond_fn in self.cond_fns:
+            if cond_fn is None: continue
+            # Gradient with respect to x
+            if self.gradient_wrt == 'x':
+                with torch.enable_grad():
+                    x = x.detach().requires_grad_()
+                    denoised = inner_model(x, sigma, **kwargs)
+                    cond_grad = cond_fn(x, sigma, denoised=denoised, **kwargs).detach()
+            # Gradient wrt x0_pred, so save some compute: don't record grad until after denoised is calculated
+            elif self.gradient_wrt == 'x0_pred':
+                with torch.no_grad():
+                    denoised = inner_model(x, sigma, **kwargs)
+                with torch.enable_grad():
+                    cond_grad = cond_fn(x, sigma, denoised=denoised.detach().requires_grad_(), **kwargs).detach()
+            total_cond_grad += cond_grad
+        total_cond_grad = torch.nan_to_num(total_cond_grad, nan=0.0, posinf=float('inf'), neginf=-float('inf'))
+        # Clamp the gradient
+        total_cond_grad = self.clamp_grad_verbose(total_cond_grad, sigma)
+        # Add gradient to the image
+        if self.gradient_wrt == 'x':
+            x.copy_(self.grad_consolidate_fn(x.detach(), total_cond_grad, k_utils.append_dims(sigma, x.ndim)))
+            cond_denoised = inner_model(x, sigma, **kwargs)
+        elif self.gradient_wrt == 'x0_pred':
+            x.copy_(self.grad_consolidate_fn(x.detach(), total_cond_grad, k_utils.append_dims(sigma, x.ndim)))
+            cond_denoised = self.grad_consolidate_fn(denoised.detach(), total_cond_grad, k_utils.append_dims(sigma, x.ndim))
+        return cond_denoised
+    def forward(self, x, sigma, uncond, cond, cond_scale):
+        def _cfg_model(x, sigma, cond, **kwargs):
+            # Wrapper to add denoised cond and uncond as in a cfg model
+            # input "cond" is both cond and uncond weights: torch.cat([uncond, cond])
+            x_in = torch.cat([x] * 2)
+            sigma_in = torch.cat([sigma] * 2)
+            denoised = self.inner_model(x_in, sigma_in, cond=cond, **kwargs)
+            uncond_x0, cond_x0 = denoised.chunk(2)
+            x0_pred = uncond_x0 + (cond_x0 - uncond_x0) * cond_scale
+            return x0_pred
+        # Conditioning
+        if self.check_conditioning_schedule(sigma):
+            # Apply the conditioning gradient to the completed denoised (after both cond and uncond are combined into the diffused image)
+            if self.cond_uncond_sync:
+                # x0 = self.cfg_cond_model_fn_(x, sigma, uncond=uncond, cond=cond, cond_scale=cond_scale)
+                cond_in = torch.cat([uncond, cond])
+                x0 = self.cond_model_fn_(x, sigma, cond=cond_in, inner_model=_cfg_model)
+            # Calculate cond and uncond separately
+            else:
+                if self.gradient_add_to == "uncond":
+                    uncond = self.cond_model_fn_(x, sigma, cond=uncond)
+                    cond = self.inner_model(x, sigma, cond=cond)
+                    x0 = uncond + (cond - uncond) * cond_scale
+                elif self.gradient_add_to == "cond":
+                    uncond = self.inner_model(x, sigma, cond=uncond)
+                    cond = self.cond_model_fn_(x, sigma, cond=cond)
+                    x0 = uncond + (cond - uncond) * cond_scale
+                elif self.gradient_add_to == "both":
+                    uncond = self.cond_model_fn_(x, sigma, cond=uncond)
+                    cond = self.cond_model_fn_(x, sigma, cond=cond)
+                    x0 = uncond + (cond - uncond) * cond_scale
+                else:
+                    raise Exception(f"Unrecognised option for gradient_add_to: {self.gradient_add_to}")
+        # No conditioning
+        else:
+            # calculate cond and uncond simultaneously
+            if self.cond_uncond_sync:
+                cond_in = torch.cat([uncond, cond])
+                x0 = _cfg_model(x, sigma, cond=cond_in)
+            else:
+                uncond = self.inner_model(x, sigma, cond=uncond)
+                cond = self.inner_model(x, sigma, cond=cond)
+                x0 = uncond + (cond - uncond) * cond_scale
+        return x0
+    def make_cond_fn(self, loss_fn, scale):
+        # Turns a loss function into a cond function that is applied to the decoded RGB sample
+        # loss_fn (function): func(x, sigma, denoised) -> number
+        # scale (number): how much this loss is applied to the image
+        # Cond function with respect to x
+        def cond_fn(x, sigma, denoised, **kwargs):
+            with torch.enable_grad():
+                denoised_sample = self.decode_fn(denoised).requires_grad_()
+                loss = loss_fn(denoised_sample, sigma, **kwargs) * scale
+                grad = -torch.autograd.grad(loss, x)[0]
+            self.verbose_print('Loss:', loss.item())
+            return grad
+        # Cond function with respect to x0_pred
+        def cond_fn_pred(x, sigma, denoised, **kwargs):
+            with torch.enable_grad():
+                denoised_sample = self.decode_fn(denoised).requires_grad_()
+                loss = loss_fn(denoised_sample, sigma, **kwargs) * scale
+                grad = -torch.autograd.grad(loss, denoised)[0]
+            self.verbose_print('Loss:', loss.item())
+            return grad
+        if self.gradient_wrt == 'x':
+            return cond_fn
+        elif self.gradient_wrt == 'x0_pred':
+            return cond_fn_pred
+        else:
+            raise Exception(f"Variable gradient_wrt == {self.gradient_wrt} not recognised.")
+    def clamp_grad_verbose(self, grad, sigma):
+        if self.clamp_func is not None:
+            if self.verbose:
+                print("Grad before clamping:")
+                self.display_samples(torch.abs(grad*2.0) - 1.0)
+            grad = self.clamp_func(grad, sigma)
+        if self.verbose:
+            print("Conditioning gradient")
+            self.display_samples(torch.abs(grad*2.0) - 1.0)
+        return grad
+    def check_conditioning_schedule(self, sigma):
+        is_conditioning_step = False
+        if (self.cond_fns is not None and
+            any(cond_fn is not None for cond_fn in self.cond_fns)):
+            # Conditioning strength != 0
+            # Check if this is a conditioning step
+            if self.grad_inject_timing_fn(sigma):
+                is_conditioning_step = True
+                if self.verbose:
+                    print(f"Conditioning step for sigma={sigma}")
+        return is_conditioning_step
+    def display_samples(self, images):
+        images = images.double().cpu().add(1).div(2).clamp(0, 1)
+        images = torch.tensor(images.numpy())
+        grid = make_grid(images, 4).cpu()
+        display.display(to_pil_image(grid))
+        return

deforum-stable-diffusion/helpers/prompt.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import re
+def sanitize(prompt):
+    whitelist = set('abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+    tmp = ''.join(filter(whitelist.__contains__, prompt))
+    return tmp.replace(' ', '_')
+def check_is_number(value):
+    float_pattern = r'^(?=.)([+-]?([0-9]*)(\.([0-9]+))?)$'
+    return re.match(float_pattern, value)
+# prompt weighting with colons and number coefficients (like 'bacon:0.75 eggs:0.25')
+# borrowed from https://github.com/kylewlacy/stable-diffusion/blob/0a4397094eb6e875f98f9d71193e350d859c4220/ldm/dream/conditioning.py
+# and https://github.com/raefu/stable-diffusion-automatic/blob/unstablediffusion/modules/processing.py
+def get_uc_and_c(prompts, model, args, frame = 0):
+    prompt = prompts[0] # they are the same in a batch anyway
+    # get weighted sub-prompts
+    negative_subprompts, positive_subprompts = split_weighted_subprompts(
+        prompt, frame, not args.normalize_prompt_weights
+    )
+    uc = get_learned_conditioning(model, negative_subprompts, "", args, -1)
+    c = get_learned_conditioning(model, positive_subprompts, prompt, args, 1)
+    return (uc, c)
+def get_learned_conditioning(model, weighted_subprompts, text, args, sign = 1):
+    if len(weighted_subprompts) < 1:
+        log_tokenization(text, model, args.log_weighted_subprompts, sign)
+        c = model.get_learned_conditioning(args.n_samples * [text])
+    else:
+        c = None
+        for subtext, subweight in weighted_subprompts:
+            log_tokenization(subtext, model, args.log_weighted_subprompts, sign * subweight)
+            if c is None:
+                c = model.get_learned_conditioning(args.n_samples * [subtext])
+                c *= subweight
+            else:
+                c.add_(model.get_learned_conditioning(args.n_samples * [subtext]), alpha=subweight)
+    return c
+def parse_weight(match, frame = 0)->float:
+    import numexpr
+    w_raw = match.group("weight")
+    if w_raw == None:
+        return 1
+    if check_is_number(w_raw):
+        return float(w_raw)
+    else:
+        t = frame
+        if len(w_raw) < 3:
+            print('the value inside `-characters cannot represent a math function')
+            return 1
+        return float(numexpr.evaluate(w_raw[1:-1]))
+def normalize_prompt_weights(parsed_prompts):
+    if len(parsed_prompts) == 0:
+        return parsed_prompts
+    weight_sum = sum(map(lambda x: x[1], parsed_prompts))
+    if weight_sum == 0:
+        print(
+            "Warning: Subprompt weights add up to zero. Discarding and using even weights instead.")
+        equal_weight = 1 / max(len(parsed_prompts), 1)
+        return [(x[0], equal_weight) for x in parsed_prompts]
+    return [(x[0], x[1] / weight_sum) for x in parsed_prompts]
+def split_weighted_subprompts(text, frame = 0, skip_normalize=False):
+    """
+    grabs all text up to the first occurrence of ':'
+    uses the grabbed text as a sub-prompt, and takes the value following ':' as weight
+    if ':' has no value defined, defaults to 1.0
+    repeats until no text remaining
+    """
+    prompt_parser = re.compile("""
+            (?P<prompt>         # capture group for 'prompt'
+            (?:\\\:|[^:])+      # match one or more non ':' characters or escaped colons '\:'
+            )                   # end 'prompt'
+            (?:                 # non-capture group
+            :+                  # match one or more ':' characters
+            (?P<weight>((        # capture group for 'weight'
+            -?\d+(?:\.\d+)?     # match positive or negative integer or decimal number
+            )|(                 # or
+            `[\S\s]*?`# a math function
+            )))?                 # end weight capture group, make optional
+            \s*                 # strip spaces after weight
+            |                   # OR
+            $                   # else, if no ':' then match end of line
+            )                   # end non-capture group
+            """, re.VERBOSE)
+    negative_prompts = []
+    positive_prompts = []
+    for match in re.finditer(prompt_parser, text):
+        w = parse_weight(match, frame)
+        if w < 0:
+            # negating the sign as we'll feed this to uc
+            negative_prompts.append((match.group("prompt").replace("\\:", ":"), -w))
+        elif w > 0:
+            positive_prompts.append((match.group("prompt").replace("\\:", ":"), w))
+    if skip_normalize:
+        return (negative_prompts, positive_prompts)
+    return (normalize_prompt_weights(negative_prompts), normalize_prompt_weights(positive_prompts))
+# shows how the prompt is tokenized
+# usually tokens have '</w>' to indicate end-of-word,
+# but for readability it has been replaced with ' '
+def log_tokenization(text, model, log=False, weight=1):
+    if not log:
+        return
+    tokens    = model.cond_stage_model.tokenizer._tokenize(text)
+    tokenized = ""
+    discarded = ""
+    usedTokens = 0
+    totalTokens = len(tokens)
+    for i in range(0, totalTokens):
+        token = tokens[i].replace('</w>', ' ')
+        # alternate color
+        s = (usedTokens % 6) + 1
+        if i < model.cond_stage_model.max_length:
+            tokenized = tokenized + f"\x1b[0;3{s};40m{token}"
+            usedTokens += 1
+        else:  # over max token length
+            discarded = discarded + f"\x1b[0;3{s};40m{token}"
+    print(f"\n>> Tokens ({usedTokens}), Weight ({weight:.2f}):\n{tokenized}\x1b[0m")
+    if discarded != "":
+        print(
+            f">> Tokens Discarded ({totalTokens-usedTokens}):\n{discarded}\x1b[0m"
+        )

deforum-stable-diffusion/helpers/rank_images.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+from argparse import ArgumentParser
+from tqdm import tqdm
+from PIL import Image
+from torch.nn import functional as F
+from torchvision import transforms
+from torchvision.transforms import functional as TF
+import torch
+from simulacra_fit_linear_model import AestheticMeanPredictionLinearModel
+from CLIP import clip
+parser = ArgumentParser()
+parser.add_argument("directory")
+parser.add_argument("-t", "--top-n", default=50)
+args = parser.parse_args()
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+clip_model_name = 'ViT-B/16'
+clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
+clip_model.eval().requires_grad_(False)
+normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
+                                 std=[0.26862954, 0.26130258, 0.27577711])
+# 512 is embed dimension for ViT-B/16 CLIP
+model = AestheticMeanPredictionLinearModel(512)
+model.load_state_dict(
+    torch.load("models/sac_public_2022_06_29_vit_b_16_linear.pth")
+)
+model = model.to(device)
+def get_filepaths(parentpath, filepaths):
+    paths = []
+    for path in filepaths:
+        try:
+            new_parent = os.path.join(parentpath, path)
+            paths += get_filepaths(new_parent, os.listdir(new_parent))
+        except NotADirectoryError:
+            paths.append(os.path.join(parentpath, path))
+    return paths
+filepaths = get_filepaths(args.directory, os.listdir(args.directory))
+scores = []
+for path in tqdm(filepaths):
+    # This is obviously a flawed way to check for an image but this is just
+    # a demo script anyway.
+    if path[-4:] not in (".png", ".jpg"):
+        continue
+    img = Image.open(path).convert('RGB')
+    img = TF.resize(img, 224, transforms.InterpolationMode.LANCZOS)
+    img = TF.center_crop(img, (224,224))
+    img = TF.to_tensor(img).to(device)
+    img = normalize(img)
+    clip_image_embed = F.normalize(
+        clip_model.encode_image(img[None, ...]).float(),
+        dim=-1)
+    score = model(clip_image_embed)
+    if len(scores) < args.top_n:
+        scores.append((score.item(),path))
+        scores.sort()
+    else:
+        if scores[0][0] < score:
+            scores.append((score.item(),path))
+            scores.sort(key=lambda x: x[0])
+            scores = scores[1:]
+for score, path in scores:
+    print(f"{score}: {path}")

deforum-stable-diffusion/helpers/render.py ADDED Viewed

	@@ -0,0 +1,472 @@

+import os
+import json
+from IPython import display
+import random
+from torchvision.utils import make_grid
+from einops import rearrange
+import pandas as pd
+import cv2
+import numpy as np
+from PIL import Image
+import pathlib
+import torchvision.transforms as T
+from .generate import generate, add_noise
+from .prompt import sanitize
+from .animation import DeformAnimKeys, sample_from_cv2, sample_to_cv2, anim_frame_warp, vid2frames
+from .depth import DepthModel
+from .colors import maintain_colors
+from .load_images import prepare_overlay_mask
+def next_seed(args):
+    if args.seed_behavior == 'iter':
+        args.seed += 1
+    elif args.seed_behavior == 'fixed':
+        pass # always keep seed the same
+    else:
+        args.seed = random.randint(0, 2**32 - 1)
+    return args.seed
+def render_image_batch(args, prompts, root):
+    args.prompts = {k: f"{v:05d}" for v, k in enumerate(prompts)}
+    # create output folder for the batch
+    os.makedirs(args.outdir, exist_ok=True)
+    if args.save_settings or args.save_samples:
+        print(f"Saving to {os.path.join(args.outdir, args.timestring)}_*")
+    # save settings for the batch
+    if args.save_settings:
+        filename = os.path.join(args.outdir, f"{args.timestring}_settings.txt")
+        with open(filename, "w+", encoding="utf-8") as f:
+            dictlist = dict(args.__dict__)
+            del dictlist['master_args']
+            del dictlist['root']
+            del dictlist['get_output_folder']
+            json.dump(dictlist, f, ensure_ascii=False, indent=4)
+    index = 0
+    # function for init image batching
+    init_array = []
+    if args.use_init:
+        if args.init_image == "":
+            raise FileNotFoundError("No path was given for init_image")
+        if args.init_image.startswith('http://') or args.init_image.startswith('https://'):
+            init_array.append(args.init_image)
+        elif not os.path.isfile(args.init_image):
+            if args.init_image[-1] != "/": # avoids path error by adding / to end if not there
+                args.init_image += "/"
+            for image in sorted(os.listdir(args.init_image)): # iterates dir and appends images to init_array
+                if image.split(".")[-1] in ("png", "jpg", "jpeg"):
+                    init_array.append(args.init_image + image)
+        else:
+            init_array.append(args.init_image)
+    else:
+        init_array = [""]
+    # when doing large batches don't flood browser with images
+    clear_between_batches = args.n_batch >= 32
+    for iprompt, prompt in enumerate(prompts):
+        args.prompt = prompt
+        args.clip_prompt = prompt
+        print(f"Prompt {iprompt+1} of {len(prompts)}")
+        print(f"{args.prompt}")
+        all_images = []
+        for batch_index in range(args.n_batch):
+            if clear_between_batches and batch_index % 32 == 0:
+                display.clear_output(wait=True)
+            print(f"Batch {batch_index+1} of {args.n_batch}")
+            for image in init_array: # iterates the init images
+                args.init_image = image
+                results = generate(args, root)
+                for image in results:
+                    if args.make_grid:
+                        all_images.append(T.functional.pil_to_tensor(image))
+                    if args.save_samples:
+                        if args.filename_format == "{timestring}_{index}_{prompt}.png":
+                            filename = f"{args.timestring}_{index:05}_{sanitize(prompt)[:160]}.png"
+                        else:
+                            filename = f"{args.timestring}_{index:05}_{args.seed}.png"
+                        image.save(os.path.join(args.outdir, filename))
+                    if args.display_samples:
+                        display.display(image)
+                    index += 1
+                args.seed = next_seed(args)
+        #print(len(all_images))
+        if args.make_grid:
+            grid = make_grid(all_images, nrow=int(len(all_images)/args.grid_rows))
+            grid = rearrange(grid, 'c h w -> h w c').cpu().numpy()
+            filename = f"{args.timestring}_{iprompt:05d}_grid_{args.seed}.png"
+            grid_image = Image.fromarray(grid.astype(np.uint8))
+            grid_image.save(os.path.join(args.outdir, filename))
+            display.clear_output(wait=True)
+            display.display(grid_image)
+def render_animation(args, anim_args, animation_prompts, root):
+    # animations use key framed prompts
+    args.prompts = animation_prompts
+    # expand key frame strings to values
+    keys = DeformAnimKeys(anim_args)
+    # resume animation
+    start_frame = 0
+    if anim_args.resume_from_timestring:
+        for tmp in os.listdir(args.outdir):
+            if tmp.split("_")[0] == anim_args.resume_timestring:
+                start_frame += 1
+        start_frame = start_frame - 1
+    # create output folder for the batch
+    os.makedirs(args.outdir, exist_ok=True)
+    print(f"Saving animation frames to {args.outdir}")
+    # save settings for the batch
+    '''
+    settings_filename = os.path.join(args.outdir, f"{args.timestring}_settings.txt")
+    with open(settings_filename, "w+", encoding="utf-8") as f:
+        s = {**dict(args.__dict__), **dict(anim_args.__dict__)}
+        #DGSpitzer: run.py adds these three parameters
+        del s['master_args']
+        del s['opt']
+        del s['root']
+        del s['get_output_folder']
+        #print(s)
+        json.dump(s, f, ensure_ascii=False, indent=4)
+    '''
+    # resume from timestring
+    if anim_args.resume_from_timestring:
+        args.timestring = anim_args.resume_timestring
+    # expand prompts out to per-frame
+    prompt_series = pd.Series([np.nan for a in range(anim_args.max_frames)])
+    for i, prompt in animation_prompts.items():
+        prompt_series[int(i)] = prompt
+    prompt_series = prompt_series.ffill().bfill()
+    # check for video inits
+    using_vid_init = anim_args.animation_mode == 'Video Input'
+    # load depth model for 3D
+    predict_depths = (anim_args.animation_mode == '3D' and anim_args.use_depth_warping) or anim_args.save_depth_maps
+    if predict_depths:
+        depth_model = DepthModel(root.device)
+        depth_model.load_midas(root.models_path)
+        if anim_args.midas_weight < 1.0:
+            depth_model.load_adabins(root.models_path)
+    else:
+        depth_model = None
+        anim_args.save_depth_maps = False
+    # state for interpolating between diffusion steps
+    turbo_steps = 1 if using_vid_init else int(anim_args.diffusion_cadence)
+    turbo_prev_image, turbo_prev_frame_idx = None, 0
+    turbo_next_image, turbo_next_frame_idx = None, 0
+    # resume animation
+    prev_sample = None
+    color_match_sample = None
+    if anim_args.resume_from_timestring:
+        last_frame = start_frame-1
+        if turbo_steps > 1:
+            last_frame -= last_frame%turbo_steps
+        path = os.path.join(args.outdir,f"{args.timestring}_{last_frame:05}.png")
+        img = cv2.imread(path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        prev_sample = sample_from_cv2(img)
+        if anim_args.color_coherence != 'None':
+            color_match_sample = img
+        if turbo_steps > 1:
+            turbo_next_image, turbo_next_frame_idx = sample_to_cv2(prev_sample, type=np.float32), last_frame
+            turbo_prev_image, turbo_prev_frame_idx = turbo_next_image, turbo_next_frame_idx
+            start_frame = last_frame+turbo_steps
+    args.n_samples = 1
+    frame_idx = start_frame
+    while frame_idx < anim_args.max_frames:
+        print(f"Rendering animation frame {frame_idx} of {anim_args.max_frames}")
+        noise = keys.noise_schedule_series[frame_idx]
+        strength = keys.strength_schedule_series[frame_idx]
+        contrast = keys.contrast_schedule_series[frame_idx]
+        depth = None
+        # emit in-between frames
+        if turbo_steps > 1:
+            tween_frame_start_idx = max(0, frame_idx-turbo_steps)
+            for tween_frame_idx in range(tween_frame_start_idx, frame_idx):
+                tween = float(tween_frame_idx - tween_frame_start_idx + 1) / float(frame_idx - tween_frame_start_idx)
+                print(f"  creating in between frame {tween_frame_idx} tween:{tween:0.2f}")
+                advance_prev = turbo_prev_image is not None and tween_frame_idx > turbo_prev_frame_idx
+                advance_next = tween_frame_idx > turbo_next_frame_idx
+                if depth_model is not None:
+                    assert(turbo_next_image is not None)
+                    depth = depth_model.predict(turbo_next_image, anim_args)
+                if advance_prev:
+                    turbo_prev_image, _ = anim_frame_warp(turbo_prev_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device)
+                if advance_next:
+                    turbo_next_image, _ = anim_frame_warp(turbo_next_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device)
+                # Transformed raw image before color coherence and noise. Used for mask overlay
+                if args.use_mask and args.overlay_mask:
+                    # Apply transforms to the original image
+                    init_image_raw, _ = anim_frame_warp(args.init_sample_raw, args, anim_args, keys, frame_idx, depth_model, depth, device=root.device)
+                    if root.half_precision:
+                        args.init_sample_raw = sample_from_cv2(init_image_raw).half().to(root.device)
+                    else:
+                        args.init_sample_raw = sample_from_cv2(init_image_raw).to(root.device)
+                #Transform the mask image
+                if args.use_mask:
+                    if args.mask_sample is None:
+                        args.mask_sample = prepare_overlay_mask(args, root, prev_sample.shape)
+                    # Transform the mask
+                    mask_image, _ = anim_frame_warp(args.mask_sample, args, anim_args, keys, frame_idx, depth_model, depth, device=root.device)
+                    if root.half_precision:
+                        args.mask_sample = sample_from_cv2(mask_image).half().to(root.device)
+                    else:
+                        args.mask_sample = sample_from_cv2(mask_image).to(root.device)
+                turbo_prev_frame_idx = turbo_next_frame_idx = tween_frame_idx
+                if turbo_prev_image is not None and tween < 1.0:
+                    img = turbo_prev_image*(1.0-tween) + turbo_next_image*tween
+                else:
+                    img = turbo_next_image
+                filename = f"{args.timestring}_{tween_frame_idx:05}.png"
+                cv2.imwrite(os.path.join(args.outdir, filename), cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR))
+                if anim_args.save_depth_maps:
+                    depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{tween_frame_idx:05}.png"), depth)
+            if turbo_next_image is not None:
+                prev_sample = sample_from_cv2(turbo_next_image)
+        # apply transforms to previous frame
+        if prev_sample is not None:
+            prev_img, depth = anim_frame_warp(prev_sample, args, anim_args, keys, frame_idx, depth_model, depth=None, device=root.device)
+            # Transformed raw image before color coherence and noise. Used for mask overlay
+            if args.use_mask and args.overlay_mask:
+                # Apply transforms to the original image
+                init_image_raw, _ = anim_frame_warp(args.init_sample_raw, args, anim_args, keys, frame_idx, depth_model, depth, device=root.device)
+                if root.half_precision:
+                    args.init_sample_raw = sample_from_cv2(init_image_raw).half().to(root.device)
+                else:
+                    args.init_sample_raw = sample_from_cv2(init_image_raw).to(root.device)
+            #Transform the mask image
+            if args.use_mask:
+                if args.mask_sample is None:
+                    args.mask_sample = prepare_overlay_mask(args, root, prev_sample.shape)
+                # Transform the mask
+                mask_sample, _ = anim_frame_warp(args.mask_sample, args, anim_args, keys, frame_idx, depth_model, depth, device=root.device)
+                if root.half_precision:
+                    args.mask_sample = sample_from_cv2(mask_sample).half().to(root.device)
+                else:
+                    args.mask_sample = sample_from_cv2(mask_sample).to(root.device)
+            # apply color matching
+            if anim_args.color_coherence != 'None':
+                if color_match_sample is None:
+                    color_match_sample = prev_img.copy()
+                else:
+                    prev_img = maintain_colors(prev_img, color_match_sample, anim_args.color_coherence)
+            # apply scaling
+            contrast_sample = prev_img * contrast
+            # apply frame noising
+            noised_sample = add_noise(sample_from_cv2(contrast_sample), noise)
+            # use transformed previous frame as init for current
+            args.use_init = True
+            if root.half_precision:
+                args.init_sample = noised_sample.half().to(root.device)
+            else:
+                args.init_sample = noised_sample.to(root.device)
+            args.strength = max(0.0, min(1.0, strength))
+        # grab prompt for current frame
+        args.prompt = prompt_series[frame_idx]
+        args.clip_prompt = args.prompt
+        print(f"{args.prompt} {args.seed}")
+        if not using_vid_init:
+            print(f"Angle: {keys.angle_series[frame_idx]} Zoom: {keys.zoom_series[frame_idx]}")
+            print(f"Tx: {keys.translation_x_series[frame_idx]} Ty: {keys.translation_y_series[frame_idx]} Tz: {keys.translation_z_series[frame_idx]}")
+            print(f"Rx: {keys.rotation_3d_x_series[frame_idx]} Ry: {keys.rotation_3d_y_series[frame_idx]} Rz: {keys.rotation_3d_z_series[frame_idx]}")
+        # grab init image for current frame
+        if using_vid_init:
+            init_frame = os.path.join(args.outdir, 'inputframes', f"{frame_idx+1:05}.jpg")
+            print(f"Using video init frame {init_frame}")
+            args.init_image = init_frame
+            if anim_args.use_mask_video:
+                mask_frame = os.path.join(args.outdir, 'maskframes', f"{frame_idx+1:05}.jpg")
+                args.mask_file = mask_frame
+        # sample the diffusion model
+        sample, image = generate(args, root, frame_idx, return_latent=False, return_sample=True)
+        # First image sample used for masking
+        if not using_vid_init:
+            prev_sample = sample
+            if args.use_mask and args.overlay_mask:
+                if args.init_sample_raw is None:
+                        args.init_sample_raw = sample
+        if turbo_steps > 1:
+            turbo_prev_image, turbo_prev_frame_idx = turbo_next_image, turbo_next_frame_idx
+            turbo_next_image, turbo_next_frame_idx = sample_to_cv2(sample, type=np.float32), frame_idx
+            frame_idx += turbo_steps
+        else:
+            filename = f"{args.timestring}_{frame_idx:05}.png"
+            image.save(os.path.join(args.outdir, filename))
+            if anim_args.save_depth_maps:
+                depth = depth_model.predict(sample_to_cv2(sample), anim_args)
+                depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{frame_idx:05}.png"), depth)
+            frame_idx += 1
+        display.clear_output(wait=True)
+        display.display(image)
+        args.seed = next_seed(args)
+def render_input_video(args, anim_args, animation_prompts, root):
+    # create a folder for the video input frames to live in
+    video_in_frame_path = os.path.join(args.outdir, 'inputframes')
+    os.makedirs(video_in_frame_path, exist_ok=True)
+    # save the video frames from input video
+    print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {video_in_frame_path}...")
+    vid2frames(anim_args.video_init_path, video_in_frame_path, anim_args.extract_nth_frame, anim_args.overwrite_extracted_frames)
+    # determine max frames from length of input frames
+    anim_args.max_frames = len([f for f in pathlib.Path(video_in_frame_path).glob('*.jpg')])
+    args.use_init = True
+    print(f"Loading {anim_args.max_frames} input frames from {video_in_frame_path} and saving video frames to {args.outdir}")
+    if anim_args.use_mask_video:
+        # create a folder for the mask video input frames to live in
+        mask_in_frame_path = os.path.join(args.outdir, 'maskframes')
+        os.makedirs(mask_in_frame_path, exist_ok=True)
+        # save the video frames from mask video
+        print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {mask_in_frame_path}...")
+        vid2frames(anim_args.video_mask_path, mask_in_frame_path, anim_args.extract_nth_frame, anim_args.overwrite_extracted_frames)
+        args.use_mask = True
+        args.overlay_mask = True
+    render_animation(args, anim_args, animation_prompts, root)
+def render_interpolation(args, anim_args, animation_prompts, root):
+    # animations use key framed prompts
+    args.prompts = animation_prompts
+    # create output folder for the batch
+    os.makedirs(args.outdir, exist_ok=True)
+    print(f"Saving animation frames to {args.outdir}")
+    # save settings for the batch
+    settings_filename = os.path.join(args.outdir, f"{args.timestring}_settings.txt")
+    with open(settings_filename, "w+", encoding="utf-8") as f:
+        s = {**dict(args.__dict__), **dict(anim_args.__dict__)}
+        del s['master_args']
+        del s['opt']
+        del s['root']
+        del s['get_output_folder']
+        json.dump(s, f, ensure_ascii=False, indent=4)
+    # Interpolation Settings
+    args.n_samples = 1
+    args.seed_behavior = 'fixed' # force fix seed at the moment bc only 1 seed is available
+    prompts_c_s = [] # cache all the text embeddings
+    print(f"Preparing for interpolation of the following...")
+    for i, prompt in animation_prompts.items():
+        args.prompt = prompt
+        args.clip_prompt = args.prompt
+        # sample the diffusion model
+        results = generate(args, root, return_c=True)
+        c, image = results[0], results[1]
+        prompts_c_s.append(c)
+        # display.clear_output(wait=True)
+        display.display(image)
+        args.seed = next_seed(args)
+    display.clear_output(wait=True)
+    print(f"Interpolation start...")
+    frame_idx = 0
+    if anim_args.interpolate_key_frames:
+        for i in range(len(prompts_c_s)-1):
+            dist_frames = list(animation_prompts.items())[i+1][0] - list(animation_prompts.items())[i][0]
+            if dist_frames <= 0:
+                print("key frames duplicated or reversed. interpolation skipped.")
+                return
+        else:
+            for j in range(dist_frames):
+                # interpolate the text embedding
+                prompt1_c = prompts_c_s[i]
+                prompt2_c = prompts_c_s[i+1]
+                args.init_c = prompt1_c.add(prompt2_c.sub(prompt1_c).mul(j * 1/dist_frames))
+                # sample the diffusion model
+                results = generate(args, root)
+                image = results[0]
+                filename = f"{args.timestring}_{frame_idx:05}.png"
+                image.save(os.path.join(args.outdir, filename))
+                frame_idx += 1
+                display.clear_output(wait=True)
+                display.display(image)
+                args.seed = next_seed(args)
+    else:
+        for i in range(len(prompts_c_s)-1):
+            for j in range(anim_args.interpolate_x_frames+1):
+                # interpolate the text embedding
+                prompt1_c = prompts_c_s[i]
+                prompt2_c = prompts_c_s[i+1]
+                args.init_c = prompt1_c.add(prompt2_c.sub(prompt1_c).mul(j * 1/(anim_args.interpolate_x_frames+1)))
+                # sample the diffusion model
+                results = generate(args, root)
+                image = results[0]
+                filename = f"{args.timestring}_{frame_idx:05}.png"
+                image.save(os.path.join(args.outdir, filename))
+                frame_idx += 1
+                display.clear_output(wait=True)
+                display.display(image)
+                args.seed = next_seed(args)
+    # generate the last prompt
+    args.init_c = prompts_c_s[-1]
+    results = generate(args, root)
+    image = results[0]
+    filename = f"{args.timestring}_{frame_idx:05}.png"
+    image.save(os.path.join(args.outdir, filename))
+    display.clear_output(wait=True)
+    display.display(image)
+    args.seed = next_seed(args)
+    #clear init_c
+    args.init_c = None

deforum-stable-diffusion/helpers/save_images.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from typing import List, Tuple
+from einops import rearrange
+import numpy as np, os, torch
+from PIL import Image
+from torchvision.utils import make_grid
+import time
+def get_output_folder(output_path, batch_folder):
+    out_path = os.path.join(output_path,time.strftime('%Y-%m'))
+    if batch_folder != "":
+        out_path = os.path.join(out_path, batch_folder)
+    os.makedirs(out_path, exist_ok=True)
+    return out_path
+def save_samples(
+    args, x_samples: torch.Tensor, seed: int, n_rows: int
+) -> Tuple[Image.Image, List[Image.Image]]:
+    """Function to save samples to disk.
+    Args:
+        args: Stable deforum diffusion arguments.
+        x_samples: Samples to save.
+        seed: Seed for the experiment.
+        n_rows: Number of rows in the grid.
+    Returns:
+        A tuple of the grid image and a list of the generated images.
+        ( grid_image, generated_images )
+    """
+    # save samples
+    images = []
+    grid_image = None
+    if args.display_samples or args.save_samples:
+        for index, x_sample in enumerate(x_samples):
+            x_sample = 255.0 * rearrange(x_sample.cpu().numpy(), "c h w -> h w c")
+            images.append(Image.fromarray(x_sample.astype(np.uint8)))
+            if args.save_samples:
+                images[-1].save(
+                    os.path.join(
+                        args.outdir, f"{args.timestring}_{index:02}_{seed}.png"
+                    )
+                )
+    # save grid
+    if args.display_grid or args.save_grid:
+        grid = torch.stack([x_samples], 0)
+        grid = rearrange(grid, "n b c h w -> (n b) c h w")
+        grid = make_grid(grid, nrow=n_rows, padding=0)
+        # to image
+        grid = 255.0 * rearrange(grid, "c h w -> h w c").cpu().numpy()
+        grid_image = Image.fromarray(grid.astype(np.uint8))
+        if args.save_grid:
+            grid_image.save(
+                os.path.join(args.outdir, f"{args.timestring}_{seed}_grid.png")
+            )
+    # return grid_image and individual sample images
+    return grid_image, images

deforum-stable-diffusion/helpers/settings.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import json
+def load_args(args_dict, anim_args_dict, settings_file, custom_settings_file, verbose=True):
+    default_settings_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'settings'))
+    if settings_file.lower() == 'custom':
+        settings_filename = custom_settings_file
+    else:
+        settings_filename = os.path.join(default_settings_dir,settings_file)
+    print(f"Reading custom settings from {settings_filename}...")
+    if not os.path.isfile(settings_filename):
+        print('The settings file does not exist. The in-notebook settings will be used instead.')
+    else:
+        if not verbose:
+            print(f"Any settings not included in {settings_filename} will use the in-notebook settings by default.")
+        with open(settings_filename, "r") as f:
+            jdata = json.loads(f.read())
+            if jdata.get("prompts") is not None:
+                animation_prompts = jdata["prompts"]
+            for i, k in enumerate(args_dict):
+                if k in jdata:
+                    args_dict[k] = jdata[k]
+                else:
+                    if verbose:
+                        print(f"key {k} doesn't exist in the custom settings data! using the default value of {args_dict[k]}")
+            for i, k in enumerate(anim_args_dict):
+                if k in jdata:
+                    anim_args_dict[k] = jdata[k]
+                else:
+                    if verbose:
+                        print(f"key {k} doesn't exist in the custom settings data! using the default value of {anim_args_dict[k]}")
+            if verbose:
+                print(args_dict)
+                print(anim_args_dict)

deforum-stable-diffusion/helpers/simulacra_compute_embeddings.py ADDED Viewed

	@@ -0,0 +1,96 @@

+#!/usr/bin/env python3
+"""Precomputes CLIP embeddings for Simulacra Aesthetic Captions."""
+import argparse
+import os
+from pathlib import Path
+import sqlite3
+from PIL import Image
+import torch
+from torch import multiprocessing as mp
+from torch.utils import data
+import torchvision.transforms as transforms
+from tqdm import tqdm
+from CLIP import clip
+class SimulacraDataset(data.Dataset):
+    """Simulacra dataset
+    Args:
+        images_dir: directory
+        transform: preprocessing and augmentation of the training images
+    """
+    def __init__(self, images_dir, db, transform=None):
+        self.images_dir = Path(images_dir)
+        self.transform = transform
+        self.conn = sqlite3.connect(db)
+        self.ratings = []
+        for row in self.conn.execute('SELECT generations.id, images.idx, paths.path, AVG(ratings.rating) FROM images JOIN generations ON images.gid=generations.id JOIN ratings ON images.id=ratings.iid JOIN paths ON images.id=paths.iid GROUP BY images.id'):
+            self.ratings.append(row)
+    def __len__(self):
+        return len(self.ratings)
+    def __getitem__(self, key):
+        gid, idx, filename, rating = self.ratings[key]
+        image = Image.open(self.images_dir / filename).convert('RGB')
+        if self.transform:
+            image = self.transform(image)
+        return image, torch.tensor(rating)
+def main():
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument('--batch-size', '-bs', type=int, default=10,
+                   help='the CLIP model')
+    p.add_argument('--clip-model', type=str, default='ViT-B/16',
+                   help='the CLIP model')
+    p.add_argument('--db', type=str, required=True,
+                   help='the database location')
+    p.add_argument('--device', type=str,
+                   help='the device to use')
+    p.add_argument('--images-dir', type=str, required=True,
+                   help='the dataset images directory')
+    p.add_argument('--num-workers', type=int, default=8,
+                   help='the number of data loader workers')
+    p.add_argument('--output', type=str, required=True,
+                   help='the output file')
+    p.add_argument('--start-method', type=str, default='spawn',
+                   choices=['fork', 'forkserver', 'spawn'],
+                   help='the multiprocessing start method')
+    args = p.parse_args()
+    mp.set_start_method(args.start_method)
+    if args.device:
+        device = torch.device(device)
+    else:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print('Using device:', device)
+    clip_model, clip_tf = clip.load(args.clip_model, device=device, jit=False)
+    clip_model = clip_model.eval().requires_grad_(False)
+    dataset = SimulacraDataset(args.images_dir, args.db, transform=clip_tf)
+    loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers)
+    embeds, ratings = [], []
+    for batch in tqdm(loader):
+        images_batch, ratings_batch = batch
+        embeds.append(clip_model.encode_image(images_batch.to(device)).cpu())
+        ratings.append(ratings_batch.clone())
+    obj = {'clip_model': args.clip_model,
+           'embeds': torch.cat(embeds),
+           'ratings': torch.cat(ratings)}
+    torch.save(obj, args.output)
+if __name__ == '__main__':
+    main()