Spaces:

Doubiiu
/

DynamiCrafter_interp_loop

Running on Zero

App Files Files Community

DynamiCrafter_interp_loop / app.py

Doubiiu

Update app.py

16ab4e3 verified 11 months ago

raw

history blame contribute delete

10.8 kB

	import spaces
	import gradio as gr
	import os
	import sys
	import time
	from omegaconf import OmegaConf
	import torch
	from pytorch_lightning import seed_everything
	from huggingface_hub import hf_hub_download
	from einops import repeat
	import torchvision.transforms as transforms
	from utils.utils import instantiate_from_config
	sys.path.insert(0, "scripts/evaluation")
	from funcs import (
	batch_ddim_sampling,
	load_model_checkpoint,
	get_latent_z,
	save_videos
	)

	def download_model():
	REPO_ID = 'Doubiiu/DynamiCrafter_512_Interp'
	filename_list = ['model.ckpt']
	if not os.path.exists('./checkpoints/dynamicrafter_512_interp_v1/'):
	os.makedirs('./checkpoints/dynamicrafter_512_interp_v1/')
	for filename in filename_list:
	local_file = os.path.join('./checkpoints/dynamicrafter_512_interp_v1/', filename)
	if not os.path.exists(local_file):
	hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir='./checkpoints/dynamicrafter_512_interp_v1/', force_download=True)



	download_model()
	ckpt_path='checkpoints/dynamicrafter_512_interp_v1/model.ckpt'
	config_file='configs/inference_512_v1.0.yaml'
	config = OmegaConf.load(config_file)
	model_config = config.pop("model", OmegaConf.create())
	model_config['params']['unet_config']['params']['use_checkpoint']=False
	model = instantiate_from_config(model_config)
	assert os.path.exists(ckpt_path), "Error: checkpoint Not Found!"
	model = load_model_checkpoint(model, ckpt_path)
	model.eval()
	model = model.cuda()



	@spaces.GPU(duration=300)
	def infer(image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, image2=None):
	resolution = (320, 512)
	save_fps = 8
	seed_everything(seed)
	transform = transforms.Compose([
	transforms.Resize(min(resolution)),
	transforms.CenterCrop(resolution),
	])
	torch.cuda.empty_cache()
	print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
	start = time.time()
	if steps > 60:
	steps = 60

	batch_size=1
	channels = model.model.diffusion_model.out_channels
	frames = model.temporal_length
	h, w = resolution[0] // 8, resolution[1] // 8
	noise_shape = [batch_size, channels, frames, h, w]

	# text cond
	with torch.no_grad(), torch.cuda.amp.autocast():
	text_emb = model.get_learned_conditioning([prompt])

	# img cond
	img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
	img_tensor = (img_tensor / 255. - 0.5) * 2

	image_tensor_resized = transform(img_tensor) #3,256,256
	videos = image_tensor_resized.unsqueeze(0) # bchw

	z = get_latent_z(model, videos.unsqueeze(2)) #bc,1,hw

	if image2 is not None:
	img_tensor2 = torch.from_numpy(image2).permute(2, 0, 1).float().to(model.device)
	img_tensor2 = (img_tensor2 / 255. - 0.5) * 2

	image_tensor_resized2 = transform(img_tensor2) #3,h,w
	videos2 = image_tensor_resized2.unsqueeze(0) # bchw

	z2 = get_latent_z(model, videos2.unsqueeze(2)) #bc,1,hw



	img_tensor_repeat = repeat(z, 'b c t h w -> b c (repeat t) h w', repeat=frames)

	img_tensor_repeat = torch.zeros_like(img_tensor_repeat)

	## old
	img_tensor_repeat[:,:,:1,:,:] = z
	if image2 is not None:
	img_tensor_repeat[:,:,-1:,:,:] = z2
	else:
	img_tensor_repeat[:,:,-1:,:,:] = z

	cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
	img_emb = model.image_proj_model(cond_images)

	imtext_cond = torch.cat([text_emb, img_emb], dim=1)

	fs = torch.tensor([fs], dtype=torch.long, device=model.device)
	cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}

	## inference
	batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
	## b,samples,c,t,h,w
	## remove the last frame for looping video
	if image2 is None:
	batch_samples = batch_samples[:,:,:,:-1,...]
	video_path = './output.mp4'
	save_videos(batch_samples, './', filenames=['output'], fps=save_fps)
	return video_path


	i2v_examples_interp_512 = [
	['prompts/512_interp/smile_01.png', 'a smiling girl', 50, 7.5, 1.0, 5, 12306, 'prompts/512_interp/smile_02.png'],
	['prompts/512_interp/stone01_01.png', 'rotating view', 50, 7.5, 1.0, 5, 123, 'prompts/512_interp/stone01_02.png'],
	['prompts/512_interp/walk_01.png', 'man walking', 50, 7.5, 1.0, 5, 345, 'prompts/512_interp/walk_02.png'],
	]
	i2v_examples_loop_512 = [
	['prompts/512_loop/24.png', 'a beach with waves and clouds at sunset', 50, 7.5, 1.0, 5, 234],
	['prompts/512_loop/36.png', 'clothes swaying in the wind', 50, 7.5, 1.0, 5, 123],
	['prompts/512_loop/40.png', 'flowers swaying in the wind', 50, 7.5, 1.0, 5, 234],
	]




	css = """#input_img {max-width: 512px !important} #input_img2 {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px} """

	with gr.Blocks(analytics_enabled=False, css=css) as dynamicrafter_iface:
	gr.Markdown("<div align='center'> <h1> DynamiCrafter: Animating Open-domain Images with Video Diffusion Priors </span> </h1> \
	<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
	<a href='https://doubiiu.github.io/'>Jinbo Xing</a>, \
	<a href='https://menghanxia.github.io/'>Menghan Xia</a>, <a href='https://yzhang2016.github.io/'>Yong Zhang</a>, \
	<a href=''>Haoxin Chen</a>, <a href=''> Wangbo Yu</a>,\
	<a href='https://github.com/hyliu'>Hanyuan Liu</a>, <a href='https://xinntao.github.io/'>Xintao Wang</a>,\
	<a href='https://www.cse.cuhk.edu.hk/~ttwong/myself.html'>Tien-Tsin Wong</a>,\
	<a href='https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=zh-CN'>Ying Shan</a>\
	</h2> \
	<a style='font-size:18px;color: #000000'>If DynamiCrafter is useful, please help star the </a>\
	<a style='font-size:18px;color: #000000' href='https://github.com/Doubiiu/DynamiCrafter'>[Github Repo]</a>\
	<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\
	<a style='font-size:18px;color: #000000' href='https://arxiv.org/abs/2310.12190'> [ArXiv] </a>\
	<a style='font-size:18px;color: #000000' href='https://doubiiu.github.io/projects/DynamiCrafter/'> [Project Page] </a> </div>")

	#######generative frame interpolation and looping video generation######
	with gr.Tab(label='Generative Frame Interpolation_320x512'):
	with gr.Column():
	with gr.Row():
	with gr.Column():
	with gr.Row():
	i2v_input_image = gr.Image(label="Input Image1",elem_id="input_img")
	with gr.Row():
	i2v_input_text = gr.Text(label='Prompts')
	with gr.Row():
	i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
	i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
	i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
	with gr.Row():
	i2v_steps = gr.Slider(minimum=1, maximum=50, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
	i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=10)
	i2v_end_btn = gr.Button("Generate")
	with gr.Column():
	with gr.Row():
	i2v_input_image2 = gr.Image(label="Input Image2",elem_id="input_img2")
	with gr.Row():
	i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)

	gr.Examples(examples=i2v_examples_interp_512,
	inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, i2v_input_image2],
	outputs=[i2v_output_video],
	fn = infer,
	cache_examples=True,
	)
	i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, i2v_input_image2],
	outputs=[i2v_output_video],
	fn = infer
	)
	#######generative frame interpolation and looping video generation######
	with gr.Tab(label='Looping Video Generation_320x512'):
	with gr.Column():
	with gr.Row():
	with gr.Column():
	with gr.Row():
	i2v_input_image = gr.Image(label="Input Image",elem_id="input_img")
	with gr.Row():
	i2v_input_text = gr.Text(label='Prompts')
	with gr.Row():
	i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
	i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
	i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
	with gr.Row():
	i2v_steps = gr.Slider(minimum=1, maximum=50, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
	i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=5)
	i2v_end_btn = gr.Button("Generate")
	# with gr.Tab(label='Result'):
	with gr.Row():
	i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)

	gr.Examples(examples=i2v_examples_loop_512,
	inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed],
	outputs=[i2v_output_video],
	fn = infer,
	cache_examples=True,
	)
	i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed],
	outputs=[i2v_output_video],
	fn = infer
	)

	dynamicrafter_iface.queue(max_size=12).launch(show_api=True)