0shot / garment_pipeline.py
Himanshu-AT
modify
33b3b46
import torch
from PIL import Image
from diffusers.utils import load_image
from pipeline import FluxConditionalPipeline
def generate_with_garment(
pipe,
garment_image: Image.Image,
text: str,
gemini_prompt: bool = True,
guidance: float = 3.5,
i_guidance: float = 1.0,
t_guidance: float = 1.0,
device="cuda" if torch.cuda.is_available() else "cpu"
):
"""
Generates an image of a model wearing the provided garment with a new background
Args:
pipe: The FluxConditionalPipeline instance
garment_image: Image of the garment to keep in the generated output
text: Text prompt describing the desired output (model, pose, background)
gemini_prompt: Whether to enhance the prompt using Gemini
guidance: General guidance scale
i_guidance: Image-specific guidance scale
t_guidance: Text-specific guidance scale
device: The device to use for generation
Returns:
The generated image
"""
# Process the garment image
w, h, min_size = garment_image.size[0], garment_image.size[1], min(garment_image.size)
garment_image = garment_image.crop(
((w - min_size) // 2, (h - min_size) // 2, (w + min_size) // 2, (h + min_size) // 2)
).resize((512, 512))
# Prepare garment image as control image
control_image = load_image(garment_image)
# Enhance the prompt to focus on keeping the garment while changing the model and background
enhanced_text = text
if not "garment" in enhanced_text.lower() and not "clothing" in enhanced_text.lower():
enhanced_text = f"A model wearing this garment, {text}"
# Generate the image
result_image = pipe(
prompt=enhanced_text.strip(),
negative_prompt="distorted garment, wrong clothing, deformed clothes",
num_inference_steps=28,
height=512,
width=1024,
guidance_scale=guidance,
image=control_image,
guidance_scale_real_i=i_guidance, # Higher value to maintain garment fidelity
guidance_scale_real_t=t_guidance,
gemini_prompt=gemini_prompt,
).images[0]
return result_image