Spaces:

Himanshu806
/

0shot

Running on Zero

App Files Files Community

Himanshu-AT commited on 3 days ago

Commit

33b3b46

1 Parent(s): 4f8239d

modify

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +73 -29
garment_pipeline.py +60 -0
recaption.py +48 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Diffusion Self Distillation
 emoji: 🦀
 colorFrom: red
 colorTo: green

 ---
+title: 0Shot
 emoji: 🦀
 colorFrom: red
 colorTo: green

app.py CHANGED Viewed

@@ -2,17 +2,15 @@ import spaces
 import gradio as gr
 import torch
 from PIL import Image
 from diffusers.utils import load_image
 from pipeline import FluxConditionalPipeline
 from transformer import FluxTransformer2DConditionalModel
 import os
 pipe = None
 CHECKPOINT = "primecai/dsd_model"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
@@ -24,17 +22,20 @@ transformer = FluxTransformer2DConditionalModel.from_pretrained(
     ignore_mismatched_sizes=True,
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe = FluxConditionalPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     transformer=transformer,
     torch_dtype=dtype,
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe.load_lora_weights(
     CHECKPOINT,
     weight_name="pytorch_lora_weights.safetensors",
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe.to(device, dtype=dtype)
 @spaces.GPU
@@ -50,7 +51,7 @@ def generate_image(
     image = image.crop(
         ((w - min_size) // 2, (h - min_size) // 2, (w + min_size) // 2, (h + min_size) // 2)
     ).resize((512, 512))
     control_image = load_image(image)
     result_image = pipe(
         prompt=text.strip(),
@@ -64,9 +65,36 @@ def generate_image(
         guidance_scale_real_t=t_guidance,
         gemini_prompt=gemini_prompt,
     ).images[0]
     return result_image
 def get_samples():
     sample_list = [
@@ -111,7 +139,6 @@ def get_samples():
         for sample in sample_list
     ]
 demo = gr.Blocks()
 with demo:
@@ -128,28 +155,45 @@ with demo:
         </div>
         """
     )
-    iface = gr.Interface(
-        fn=generate_image,
-        inputs=[
-            gr.Image(type="pil", width=512),
-            gr.Textbox(lines=2, label="text", info="Could be something as simple as 'this character playing soccer'."),
-            gr.Checkbox(label="Gemini prompt", value=True, info="Use Gemini to enhance the prompt. This is recommended for most cases, unless you have a specific prompt similar to the examples in mind."),
-            gr.Slider(minimum=1.0, maximum=6.0, step=0.5, value=3.5, label="guidance scale", info="Tip: start with 3.5, then gradually increase if the consistency is consistently off"),
-            gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.5, label="real guidance scale for image", info="Tip: increase if the image is not consistent"),
-            gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.0, label="real guidance scale for prompt", info="Tip: increase if the prompt is not consistent"),
-        ],
-        outputs=gr.Image(type="pil"),
-        # examples=get_samples(),
-        live=False,
-    )
-    gr.Examples(
-        examples=get_samples(),
-        inputs=iface.input_components,
-        outputs=iface.output_components,
-        run_on_click=False  # Prevents auto-submission
-    )
     gr.HTML(
         """
         <div style="text-align: center;">

 import gradio as gr
 import torch
 from PIL import Image
 from diffusers.utils import load_image
 from pipeline import FluxConditionalPipeline
 from transformer import FluxTransformer2DConditionalModel
+from garment_pipeline import generate_with_garment
+from recaption import enhance_prompt, enhance_garment_prompt
 import os
 pipe = None
 CHECKPOINT = "primecai/dsd_model"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
     ignore_mismatched_sizes=True,
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe = FluxConditionalPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     transformer=transformer,
     torch_dtype=dtype,
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe.load_lora_weights(
     CHECKPOINT,
     weight_name="pytorch_lora_weights.safetensors",
     use_auth_token=os.getenv("HF_TOKEN"),
 )
 pipe.to(device, dtype=dtype)
 @spaces.GPU
     image = image.crop(
         ((w - min_size) // 2, (h - min_size) // 2, (w + min_size) // 2, (h + min_size) // 2)
     ).resize((512, 512))
     control_image = load_image(image)
     result_image = pipe(
         prompt=text.strip(),
         guidance_scale_real_t=t_guidance,
         gemini_prompt=gemini_prompt,
     ).images[0]
     return result_image
+@spaces.GPU
+def generate_with_garment_interface(
+    garment_image: Image.Image,
+    text: str,
+    gemini_prompt: bool = True,
+    guidance: float = 3.5,
+    i_guidance: float = 1.5,  # Default higher to maintain garment fidelity
+    t_guidance: float = 1.0
+):
+    """Interface function for generating images with a garment"""
+    # Use garment-specific prompt enhancement if enabled
+    if gemini_prompt:
+        text = enhance_garment_prompt(garment_image, text)
+    # Call the garment-specific generation function
+    result_image = generate_with_garment(
+        pipe=pipe,
+        garment_image=garment_image,
+        text=text,
+        gemini_prompt=False,  # Already enhanced above if needed
+        guidance=guidance,
+        i_guidance=i_guidance,
+        t_guidance=t_guidance,
+        device=device
+    )
+    return result_image
 def get_samples():
     sample_list = [
         for sample in sample_list
     ]
 demo = gr.Blocks()
 with demo:
         </div>
         """
     )
+    with gr.Tabs():
+        with gr.TabItem("Standard Generation"):
+            iface = gr.Interface(
+                fn=generate_image,
+                inputs=[
+                    gr.Image(type="pil", width=512),
+                    gr.Textbox(lines=2, label="text", info="Could be something as simple as 'this character playing soccer'."),
+                    gr.Checkbox(label="Gemini prompt", value=True, info="Use Gemini to enhance the prompt. This is recommended for most cases, unless you have a specific prompt similar to the examples in mind."),
+                    gr.Slider(minimum=1.0, maximum=6.0, step=0.5, value=3.5, label="guidance scale", info="Tip: start with 3.5, then gradually increase if the consistency is consistently off"),
+                    gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.5, label="real guidance scale for image", info="Tip: increase if the image is not consistent"),
+                    gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.0, label="real guidance scale for prompt", info="Tip: increase if the prompt is not consistent"),
+                ],
+                outputs=gr.Image(type="pil"),
+                live=False,
+            )
+            gr.Examples(
+                examples=get_samples(),
+                inputs=iface.input_components,
+                outputs=iface.output_components,
+                run_on_click=False  # Prevents auto-submission
+            )
+        with gr.TabItem("Garment Generation"):
+            garment_iface = gr.Interface(
+                fn=generate_with_garment_interface,
+                inputs=[
+                    gr.Image(type="pil", width=512, label="Garment Image", info="Upload an image of the garment you want to keep in the generated output"),
+                    gr.Textbox(lines=2, label="Model and Background Description", info="Describe the model and setting you want the garment to appear in, e.g., 'A tall model on a beach at sunset'"),
+                    gr.Checkbox(label="Enhance Prompt", value=True, info="Use Gemini to enhance the prompt with detailed garment description"),
+                    gr.Slider(minimum=1.0, maximum=6.0, step=0.5, value=3.5, label="guidance scale", info="Controls overall adherence to the prompt"),
+                    gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.5, label="garment fidelity", info="Controls how closely the output matches the original garment - higher values preserve more details"),
+                    gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.0, label="prompt adherence", info="Controls how closely the output matches the text prompt for model and background"),
+                ],
+                outputs=gr.Image(type="pil"),
+                live=False,
+                description="Generate an image of a model wearing the provided garment in a new setting",
+            )
     gr.HTML(
         """
         <div style="text-align: center;">

garment_pipeline.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+from PIL import Image
+from diffusers.utils import load_image
+from pipeline import FluxConditionalPipeline
+def generate_with_garment(
+    pipe,
+    garment_image: Image.Image,
+    text: str,
+    gemini_prompt: bool = True,
+    guidance: float = 3.5,
+    i_guidance: float = 1.0,
+    t_guidance: float = 1.0,
+    device="cuda" if torch.cuda.is_available() else "cpu"
+):
+    """
+    Generates an image of a model wearing the provided garment with a new background
+    Args:
+        pipe: The FluxConditionalPipeline instance
+        garment_image: Image of the garment to keep in the generated output
+        text: Text prompt describing the desired output (model, pose, background)
+        gemini_prompt: Whether to enhance the prompt using Gemini
+        guidance: General guidance scale
+        i_guidance: Image-specific guidance scale
+        t_guidance: Text-specific guidance scale
+        device: The device to use for generation
+    Returns:
+        The generated image
+    """
+    # Process the garment image
+    w, h, min_size = garment_image.size[0], garment_image.size[1], min(garment_image.size)
+    garment_image = garment_image.crop(
+        ((w - min_size) // 2, (h - min_size) // 2, (w + min_size) // 2, (h + min_size) // 2)
+    ).resize((512, 512))
+    # Prepare garment image as control image
+    control_image = load_image(garment_image)
+    # Enhance the prompt to focus on keeping the garment while changing the model and background
+    enhanced_text = text
+    if not "garment" in enhanced_text.lower() and not "clothing" in enhanced_text.lower():
+        enhanced_text = f"A model wearing this garment, {text}"
+    # Generate the image
+    result_image = pipe(
+        prompt=enhanced_text.strip(),
+        negative_prompt="distorted garment, wrong clothing, deformed clothes",
+        num_inference_steps=28,
+        height=512,
+        width=1024,
+        guidance_scale=guidance,
+        image=control_image,
+        guidance_scale_real_i=i_guidance,  # Higher value to maintain garment fidelity
+        guidance_scale_real_t=t_guidance,
+        gemini_prompt=gemini_prompt,
+    ).images[0]
+    return result_image

recaption.py CHANGED Viewed

@@ -30,4 +30,52 @@ def enhance_prompt(image, prompt):
     print("input_image_prompt: ", input_image_prompt)
     print("prompt: ", prompt)
     print("enhanced_prompt: ", enhanced_prompt)
     return enhanced_prompt

     print("input_image_prompt: ", input_image_prompt)
     print("prompt: ", prompt)
     print("enhanced_prompt: ", enhanced_prompt)
+    return enhanced_prompt
+def enhance_garment_prompt(image, prompt):
+    """
+    Enhances a prompt specifically for garment transformation tasks.
+    Args:
+        image: The garment image
+        prompt: User provided prompt for the desired output
+    Returns:
+        Enhanced prompt that preserves garment details while incorporating user requirements
+    """
+    input_caption_prompt = (
+        "Please provide a detailed description of this garment/clothing item I will show you. "
+        "Focus on describing the garment's color, pattern, style, fabric, cut, and unique details. "
+        "Be specific about the type of garment (e.g., t-shirt, dress, jacket, pants) and its defining characteristics. "
+        "The description should be detailed enough for an image generation model to recreate this exact garment. "
+        "The description should be short and precise, in one-line format."
+    )
+    caption_model = genai.Client(
+        vertexai=False, api_key=os.environ["GOOGLE_API_KEY"]
+    )
+    # Get detailed garment description
+    garment_description = caption_model.models.generate_content(
+        model='gemini-1.5-flash', contents=[input_caption_prompt, image]).text
+    garment_description = garment_description.replace('\r', '').replace('\n', '')
+    # Enhance user prompt to include garment details
+    enhance_instruction = (
+        f"I need to generate an image of a model wearing a specific garment. "
+        f"The garment is described as: '{garment_description}'. "
+        f"The user wants: '{prompt}'. "
+        f"Create a detailed prompt that combines these elements, ensuring the garment description is preserved exactly while "
+        f"incorporating the user's requirements for the model (person wearing it) and setting/background. "
+        f"Focus on describing a photorealistic scene with a model wearing this specific garment. "
+        f"The enhanced prompt should be short and precise, in one-line format, and should not exceed 77 tokens."
+    )
+    enhanced_prompt = caption_model.models.generate_content(
+        model='gemini-1.5-flash', contents=[enhance_instruction]).text.replace('\r', '').replace('\n', '')
+    print("garment_description: ", garment_description)
+    print("user prompt: ", prompt)
+    print("enhanced_prompt: ", enhanced_prompt)
     return enhanced_prompt