Spaces:

personal-apps92
/

audio-to-video-generator

Running

App Files Files Community

wower99 commited on 29 days ago

Commit

a46fd4b

1 Parent(s): 125913a

video generation feature v1 is functional

Browse files

Files changed (6) hide show

.streamlit/config.toml +2 -0
app.py +102 -106
constants.py +5 -1
requirements.txt +83 -1
structured_output_extractor.py +101 -0
utils.py +148 -4

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [server]
2	+ maxUploadSize = 20

app.py CHANGED Viewed

@@ -1,26 +1,19 @@
 import streamlit as st
-import requests
-import io
-from gradio_client import Client, handle_file
-import tempfile
-import os
-from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video
 import constants
-# Initialize the client only once
-if 'client' not in st.session_state:
-    st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN)
-# Initialize state variables
 if 'transcript_visible' not in st.session_state:
     st.session_state.transcript_visible = False
 if 'translation_visible' not in st.session_state:
     st.session_state.translation_visible = False
 if 'uploaded_file_name' not in st.session_state:
     st.session_state.uploaded_file_name = None
-if 'converted_audio' not in st.session_state:
-    st.session_state.converted_audio = None
 if 'was_converted' not in st.session_state:
     st.session_state.was_converted = False
 if 'transcript' not in st.session_state:
@@ -34,43 +27,34 @@ if 'image_prompts' not in st.session_state:
 if 'generated_images' not in st.session_state:
     st.session_state.generated_images = None
-# Function to convert the audio to MP3 using the external API
-def convert_to_mp3(audio_file):
-    if audio_file.name.endswith(".mp3"):
-        return audio_file, False  # File is already MP3
-    else:
-        # Send to the external converter API
-        url = constants.AUDIO_CONVERTER_ENDPOINT
-        files = {"file": (audio_file.name, audio_file, "audio/mp3")}
-        with st.spinner("Converting audio to MP3... Please wait."):
-            response = requests.post(url, files=files)
-        if response.status_code == 200:
-            # If conversion is successful, save and return the MP3 file
-            converted_file = io.BytesIO(response.content)
-            converted_file.name = "converted.mp3"
-            st.success("✅ File successfully converted to MP3!")
-            return converted_file, True  # File was converted
-        else:
-            st.error("❌ Conversion failed. Please try another format.")
-            return None, None
 # Streamlit UI
 st.markdown(
     "<h1 style='text-align: center;'>AI Video Generator</h1>",
     unsafe_allow_html=True
 )
-st.info("Video Generation Feature Currently Under Development")
 # Upload audio file
 audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
 if audio_file:
     # Reset states only when a new file is uploaded
     if st.session_state.uploaded_file_name != audio_file.name:
         st.session_state.uploaded_file_name = audio_file.name
-        st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file)
         st.session_state.transcript = None
         st.session_state.translation = None
         st.session_state.image_prompts = None
@@ -78,78 +62,90 @@ if audio_file:
     st.info(f"Uploaded file: **{audio_file.name}**")
-    if st.session_state.converted_audio:
-        if not st.session_state.was_converted:
-            st.success("🎧 The uploaded file is already in MP3 format.")
-        else:
-            st.success("✅ File successfully converted to MP3!")
-        # Transcription logic
-        if st.session_state.transcript is None:
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(st.session_state.converted_audio.read())
-                tmp_file_path = tmp_file.name
-            with st.spinner("Transcribing audio... Please wait."):
-                result = st.session_state.client.predict(
-                    param_0=handle_file(tmp_file_path),
-                    api_name="/predict"
                 )
-                st.session_state.transcript = clean_response(result)
-                os.remove(tmp_file_path)
-        # Translation logic
-        if st.session_state.transcript and st.session_state.translation is None:
-            with st.spinner("Generating translation... Please wait."):
-                st.session_state.translation = get_translation(st.session_state.transcript)
-        st.audio(st.session_state.converted_audio, format="audio/mp3")
-        # Toggle transcript visibility
-        toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible)
-        st.session_state.transcript_visible = toggle_transcript
-        if st.session_state.transcript_visible:
-            st.write("### Transcription:")
-            st.write(st.session_state.transcript)
-        # Toggle translation visibility
-        toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible)
-        st.session_state.translation_visible = toggle_translation
-        if st.session_state.translation_visible:
-            st.write("### Translation:")
-            st.write(st.session_state.translation)
-        # Image generation logic
-        if st.session_state.translation and st.session_state.image_prompts is None:
-            with st.spinner("Generating image prompts... Please wait."):
-                if 'Already in English' in st.session_state.translation:
-                    st.info("Audio is Already in English. Using Transcription to generate Image Prompts")
-                    st.session_state.image_prompts = get_image_prompts(st.session_state.transcript)['image_prompts']
-                else:
-                    st.session_state.image_prompts = get_image_prompts(st.session_state.translation)['image_prompts']
-        # Ensure that generated_images is always a list
-        if 'generated_images' not in st.session_state or st.session_state.generated_images is None:
-            st.session_state.generated_images = []
-        # Generate images only if they have not been generated already
-        if st.session_state.image_prompts and not st.session_state.generated_images:
-            with st.spinner("Generating images... Please wait."):
-                for prompt, image_path in generate_images(st.session_state.image_prompts):
-                    # Display each image as soon as it's generated
-                    st.image(image_path, caption=f"{prompt}", use_container_width=True)
-                    # Append the generated image to the session state
-                    st.session_state.generated_images.append((prompt, image_path))
-        # Display all previously generated images (including newly generated ones)
-        else:
-            for prompt, image_path in st.session_state.generated_images:
-                # Display each image
-                st.image(image_path, caption=f"{prompt}", use_container_width=True)
 else:
     st.warning("Please upload an audio file to proceed.")

 import streamlit as st
+from utils import get_translation, get_image_prompts, segments_to_chunks, generate_images, generate_video
 import constants
+from groq import Groq
+client = Groq()
+# Initialize state variables if not already set
 if 'transcript_visible' not in st.session_state:
     st.session_state.transcript_visible = False
 if 'translation_visible' not in st.session_state:
     st.session_state.translation_visible = False
 if 'uploaded_file_name' not in st.session_state:
     st.session_state.uploaded_file_name = None
+if 'audio' not in st.session_state:
+    st.session_state.audio = None
 if 'was_converted' not in st.session_state:
     st.session_state.was_converted = False
 if 'transcript' not in st.session_state:
 if 'generated_images' not in st.session_state:
     st.session_state.generated_images = None
 # Streamlit UI
 st.markdown(
     "<h1 style='text-align: center;'>AI Video Generator</h1>",
     unsafe_allow_html=True
 )
+st.info("Video Generation Feature - Functional But Can be Buggy")
 # Upload audio file
 audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
+print(audio_file,'is the upload')
+# if audio_file is not None:
+#     # Check the duration of the uploaded audio file
+#     duration = get_audio_duration(audio_file)
+#     # Allow only files up to 5 minutes (300 seconds)
+#     if duration > 300:
+#         st.error("The uploaded audio file exceeds the 5-minute limit. Please upload a shorter file.")
+#     else:
+#         st.success(f"Audio file uploaded successfully! Duration: {duration/60:.2f} minutes")
 if audio_file:
     # Reset states only when a new file is uploaded
     if st.session_state.uploaded_file_name != audio_file.name:
         st.session_state.uploaded_file_name = audio_file.name
+        st.session_state.audio = audio_file
         st.session_state.transcript = None
         st.session_state.translation = None
         st.session_state.image_prompts = None
     st.info(f"Uploaded file: **{audio_file.name}**")
+    # Read the uploaded file's bytes and send to Groq API for transcription
+    file_bytes = audio_file.read()
+    # Create a transcription of the audio file using Groq API
+    result = client.audio.transcriptions.create(
+        file=(audio_file.name, file_bytes),  # Send the audio file content directly to the API
+        model="whisper-large-v3-turbo",  # Model to use for transcription
+        prompt="Specify context or spelling",  # Optional context for better transcription accuracy
+        response_format="verbose_json",  # Return detailed JSON response
+        temperature=0.0,  # Control randomness in the transcription output
+    )
+    st.session_state.transcript = result.text
+    st.session_state.segments = result.segments
+    # Translation logic
+    if st.session_state.transcript and st.session_state.translation is None:
+        with st.spinner("Generating translation... Please wait."):
+            st.session_state.translation = get_translation(st.session_state.transcript)
+    st.audio(st.session_state.audio, format=f"audio/{audio_file.type}")
+    # Toggle transcript visibility
+    toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible, key="toggle_transcript")
+    st.session_state.transcript_visible = toggle_transcript
+    if st.session_state.transcript_visible:
+        st.write("### Transcription:")
+        st.write(st.session_state.transcript)
+    # Toggle translation visibility
+    toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible, key="toggle_translation")
+    st.session_state.translation_visible = toggle_translation
+    if st.session_state.translation_visible:
+        st.write("### Translation:")
+        st.write(st.session_state.translation)
+    # Image generation logic
+    if st.session_state.translation and st.session_state.image_prompts is None:
+        with st.spinner("Generating image prompts... Please wait."):
+            if 'Already in English' in st.session_state.translation:
+                st.info("Audio is Already in English. Using Transcription to generate Image Prompts")
+                st.session_state.image_prompts = get_image_prompts(segments_to_chunks(st.session_state.segments))['image_prompts']
+            else:
+                st.session_state.image_prompts = get_image_prompts(segments_to_chunks(st.session_state.segments))['image_prompts']
+    print(st.session_state.image_prompts)
+    # Ensure that generated_images is always a list
+    if 'generated_images' not in st.session_state or st.session_state.generated_images is None:
+        st.session_state.generated_images = []
+    # Generate images only if they have not been generated already
+    if st.session_state.image_prompts and not st.session_state.generated_images:
+        with st.spinner("Generating images... Please wait."):
+            for prompt, image_path in generate_images(st.session_state.image_prompts):
+                # # Display each image as soon as it's generated
+                # st.image(image_path, caption=f"{prompt}", use_container_width=True)
+                # Append the generated image to the session state
+                st.session_state.generated_images.append((prompt, image_path))
+    # # Display all previously generated images (including newly generated ones)
+    # else:
+    #     for prompt, image_path in st.session_state.generated_images:
+    #         st.image(image_path, caption=f"{prompt}", use_container_width=True)
+    # Generate video when all images are generated
+    if st.session_state.generated_images and st.session_state.audio:
+        if st.button("Generate Video"):
+            with st.spinner("Generating video... Please wait."):
+                # Map images to segments
+                image_paths = [img[1] for img in st.session_state.generated_images]
+                generated_video_path = generate_video(
+                    audio_file=st.session_state.audio,
+                    images=image_paths,
+                    segments=st.session_state.segments
                 )
+                st.session_state.generated_video = generated_video_path
+                st.success("Video generated successfully!")
+    # Display the generated video
+    if st.session_state.generated_video:
+        st.video(st.session_state.generated_video)
 else:
     st.warning("Please upload an audio file to proceed.")

constants.py CHANGED Viewed

@@ -4,6 +4,8 @@ import os
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 AUDIO_CONVERTER_ENDPOINT="https://audio-converter-api-587c.onrender.com/convert/mp3"
@@ -12,4 +14,6 @@ PROMPT_GENERATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.spa
 IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
 # Supported formats
-SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]

 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN", None)
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 AUDIO_CONVERTER_ENDPOINT="https://audio-converter-api-587c.onrender.com/convert/mp3"
 IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
 # Supported formats
+SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]

requirements.txt CHANGED Viewed

@@ -1,3 +1,85 @@
 python-dotenv==1.0.1
 streamlit==1.41.1
-gradio_client==1.5.2

+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.8.0
+attrs==24.3.0
+audeer==2.2.1
+audiofile==1.5.1
+audmath==1.4.1
+blinker==1.9.0
+cachetools==5.5.0
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+decorator==4.4.2
+distro==1.9.0
+exceptiongroup==1.2.2
+filelock==3.16.1
+fsspec==2024.12.0
+gitdb==4.0.12
+GitPython==3.1.44
+gradio_client==1.5.4
+groq==0.15.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.27.1
+idna==3.10
+imageio==2.36.1
+imageio-ffmpeg==0.5.1
+Jinja2==3.1.5
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+langchain-core==0.3.29
+langchain-groq==0.2.3
+langgraph==0.2.62
+langgraph-checkpoint==2.0.9
+langgraph-sdk==0.1.51
+langsmith==0.2.10
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+moviepy==1.0.3
+msgpack==1.1.0
+narwhals==1.21.1
+numpy==2.2.1
+opencv-python==4.10.0.84
+orjson==3.10.14
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+proglog==0.1.10
+protobuf==5.29.3
+pyarrow==18.1.0
+pycparser==2.22
+pydantic==2.10.5
+pydantic_core==2.27.2
+pydeck==0.9.1
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
+pytz==2024.2
+PyYAML==6.0.2
+referencing==0.35.1
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==13.9.4
+rpds-py==0.22.3
+scipy==1.15.1
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+soundfile==0.13.0
 streamlit==1.41.1
+tenacity==9.0.0
+toml==0.10.2
+tornado==6.4.2
+tqdm==4.67.1
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.3.0
+watchdog==6.0.0
+websockets==14.1

structured_output_extractor.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from typing import Type, Optional
+from pydantic import BaseModel
+from langgraph.graph import StateGraph, START, END
+from typing import TypedDict
+import constants  # Assuming constants.py holds LLM provider configurations
+from langchain_groq import ChatGroq
+# Define the State structure (similar to previous definition)
+class State(TypedDict):
+    messages: list
+    output: Optional[BaseModel]
+# Generic Pydantic model-based structured output extractor
+class StructuredOutputExtractor:
+    def __init__(self, response_schema: Type[BaseModel]):
+        """
+        Initializes the extractor for any given structured output model.
+        :param response_schema: Pydantic model class used for structured output extraction
+        """
+        self.response_schema = response_schema
+        # Initialize language model (provider and API keys come from constants.py)
+        self.llm = ChatGroq(model="llama-3.3-70b-versatile")
+        # Bind the model with structured output capability
+        self.structured_llm = self.llm.with_structured_output(response_schema)
+        # Build the graph for structured output
+        self._build_graph()
+    def _build_graph(self):
+        """
+        Build the LangGraph computational graph for structured extraction.
+        """
+        graph_builder = StateGraph(State)
+        # Add nodes and edges for structured output
+        graph_builder.add_node("extract", self._extract_structured_info)
+        graph_builder.add_edge(START, "extract")
+        graph_builder.add_edge("extract", END)
+        self.graph = graph_builder.compile()
+    def _extract_structured_info(self, state: dict):
+        """
+        Extract structured information using the specified response model.
+        :param state: Current graph state
+        :return: Updated state with structured output
+        """
+        query = state['messages'][-1].content
+        print(f"Processing query: {query}")
+        try:
+            # Extract details using the structured model
+            output = self.structured_llm.invoke(query)
+            # Return the structured response
+            return {"output": output}
+        except Exception as e:
+            print(f"Error during extraction: {e}")
+            return {"output": None}
+    def extract(self, query: str) -> Optional[BaseModel]:
+        """
+        Public method to extract structured information.
+        :param query: Input query for structured output extraction
+        :return: Structured model object or None
+        """
+        from langchain_core.messages import SystemMessage
+        result = self.graph.invoke({
+            "messages": [SystemMessage(content=query)]
+        })
+        # Return the structured model response, if available
+        result = result.get('output')
+        return result
+if __name__ == '__main__':
+        # Example Pydantic model (e.g., Movie)
+        class Movie(BaseModel):
+            title: str
+            year: int
+            genre: str
+            rating: Optional[float] = None
+            actors: list[str] = []
+        # Example usage with a generic structured extractor
+        extractor = StructuredOutputExtractor(response_schema=Movie)
+        query = "Tell me about the movie Inception. Provide details about its title, year, genre, rating, and main actors."
+        result = extractor.extract(query)
+        print(type(result))
+        if result:
+            print(result)

utils.py CHANGED Viewed

@@ -4,6 +4,14 @@ import constants
 import os
 from PIL import Image
 from gradio_client import Client
 def clean_response(result):
@@ -48,7 +56,7 @@ def get_translation(text: str):
-def get_image_prompts(text_input):
     headers = {
         "Authorization": f"Bearer {constants.HF_TOKEN}",  # Replace with your token
         "Content-Type": "application/json"  # Optional, ensures JSON payload
@@ -73,6 +81,29 @@ def get_image_prompts(text_input):
         print(f"Error during request: {e}")
         return {"error": str(e)}
@@ -126,11 +157,124 @@ def tmp_folder(folder_name: str) -> str:
     return folder_path
-def generate_video(image_folder, audio):
-    return os.path.join(os.getcwd(), "test.mp4")
 # Example usage:
 if __name__ == "__main__":
     result = generate_images(["a guy in jungle", "a waterfall","greenery"])
-    print(result,'is the result')

 import os
 from PIL import Image
 from gradio_client import Client
+import moviepy.editor as mp
+from moviepy.video.VideoClip import ImageClip
+from moviepy.editor import AudioFileClip
+from structured_output_extractor import StructuredOutputExtractor
+from pydantic import BaseModel, Field
+from typing import List
+import tempfile
+import os
 def clean_response(result):
+def old_get_image_prompts(text_input):
     headers = {
         "Authorization": f"Bearer {constants.HF_TOKEN}",  # Replace with your token
         "Content-Type": "application/json"  # Optional, ensures JSON payload
         print(f"Error during request: {e}")
         return {"error": str(e)}
+def segments_to_chunks(segments):
+    chunks = []
+    for segment in segments:
+        chunks.append(segment.get("text"))
+    return chunks
+def get_image_prompts(text_input : List):
+        # Example Pydantic model (e.g., Movie)
+    class ImagePromptResponseSchema(BaseModel):
+        image_prompts: List[str] = Field(
+            description="List of detailed image prompts, Each Image Prompt Per Chunk"
+        )
+    extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
+    chunks_count = len(text_input)
+    chunks = "chunk: " + "\nchunk: ".join(text_input)
+    prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer
+    TASK:  Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
+    result = extractor.extract(prompt)
+    return result.model_dump()   # returns dictionary version pydantic model
     return folder_path
+def old_generate_video(audio_file, images, segments):
+    print(f"images: {images}")
+    print(f"segments: {segments}")
+    print(f"audio file: {audio_file.name}")
+    try:
+        # Save the uploaded audio file to a temporary location
+        file_extension = os.path.splitext(audio_file.name)[1]
+        temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
+        temp_audio_path.write(audio_file.read())
+        temp_audio_path.close()
+        # Load the audio file using MoviePy
+        audio = mp.AudioFileClip(temp_audio_path.name)
+        audio_duration = audio.duration
+        # Create video clips for each segment using the corresponding image
+        video_clips = []
+        for i, segment in enumerate(segments):
+            start_time = segment["start"]
+            end_time = segment["end"]
+            # Ensure the image index is within bounds
+            image_path = images[min(i, len(images) - 1)]
+            # Create an ImageClip for the current segment
+            image_clip = ImageClip(image_path, duration=end_time - start_time)
+            image_clip = image_clip.set_start(start_time).set_end(end_time)
+            video_clips.append(image_clip)
+        # Concatenate all the image clips to form the video
+        video = mp.concatenate_videoclips(video_clips, method="compose")
+        # Add the audio to the video
+        video = video.set_audio(audio)
+        # Save the video to a temporary file
+        temp_dir = tempfile.gettempdir()
+        video_path = os.path.join(temp_dir, "generated_video.mp4")
+        video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
+        # Clean up the temporary audio file
+        os.remove(temp_audio_path.name)
+        return video_path
+    except Exception as e:
+        print(f"Error generating video: {e}")
+        return
+from moviepy.editor import *
+def generate_video(audio_file, images, segments):
+    print(f"images: {images}")
+    print(f"segments: {segments}")
+    print(f"audio file: {audio_file.name}")
+    try:
+        # Save the uploaded audio file to a temporary location
+        file_extension = os.path.splitext(audio_file.name)[1]
+        temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
+        temp_audio_path.write(audio_file.read())
+        temp_audio_path.close()
+        # Load the audio file using MoviePy
+        audio = AudioFileClip(temp_audio_path.name)
+        audio_duration = audio.duration
+        # Define YouTube-like dimensions (16:9 aspect ratio, e.g., 1920x1080)
+        frame_width = 1920
+        frame_height = 1080
+        # Create video clips for each segment using the corresponding image
+        video_clips = []
+        for i, segment in enumerate(segments):
+            start_time = segment["start"]
+            end_time = segment["end"]
+            # Ensure the image index is within bounds
+            image_path = images[min(i, len(images) - 1)]
+            # Create an ImageClip for the current segment
+            image_clip = ImageClip(image_path, duration=end_time - start_time)
+            # Resize and pad the image to fit a 16:9 aspect ratio
+            image_clip = image_clip.resize(height=frame_height).on_color(
+                size=(frame_width, frame_height),
+                color=(0, 0, 0),  # Black background
+                pos="center"      # Center the image
+            )
+            # Set the timing of the clip
+            image_clip = image_clip.set_start(start_time).set_end(end_time)
+            video_clips.append(image_clip)
+        # Concatenate all the image clips to form the video
+        video = concatenate_videoclips(video_clips, method="compose")
+        # Add the audio to the video
+        video = video.set_audio(audio)
+        # Save the video to a temporary file
+        temp_dir = tempfile.gettempdir()
+        video_path = os.path.join(temp_dir, "generated_video.mp4")
+        video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
+        # Clean up the temporary audio file
+        os.remove(temp_audio_path.name)
+        return video_path
+    except Exception as e:
+        print(f"Error generating video: {e}")
+        return
 # Example usage:
 if __name__ == "__main__":
     result = generate_images(["a guy in jungle", "a waterfall","greenery"])