Spaces:

personal-apps92
/

audio-to-video-generator

Running

App Files Files Community

wower99 commited on Dec 19, 2024

Commit

c14d84c

1 Parent(s): f84d1b6

first commit

Browse files

Files changed (6) hide show

.gitignore +162 -0
app.py +147 -0
constants.py +15 -0
env.example +1 -0
requirements.txt +3 -0
utils.py +132 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+.env
+venv/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import streamlit as st
+import requests
+import io
+from gradio_client import Client, handle_file
+import tempfile
+import os
+from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video  # Import generate_video
+import constants
+# Initialize the client only once
+if 'client' not in st.session_state:
+    st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN)
+# Initialize state variables
+if 'transcript_visible' not in st.session_state:
+    st.session_state.transcript_visible = False
+if 'translation_visible' not in st.session_state:
+    st.session_state.translation_visible = False
+if 'uploaded_file_name' not in st.session_state:
+    st.session_state.uploaded_file_name = None
+if 'converted_audio' not in st.session_state:
+    st.session_state.converted_audio = None
+if 'was_converted' not in st.session_state:
+    st.session_state.was_converted = False
+if 'transcript' not in st.session_state:
+    st.session_state.transcript = None
+if 'translation' not in st.session_state:
+    st.session_state.translation = None
+if 'generated_video' not in st.session_state:
+    st.session_state.generated_video = None
+# Function to convert the audio to MP3 using the external API
+def convert_to_mp3(audio_file):
+    if audio_file.name.endswith(".mp3"):
+        return audio_file, False  # File is already MP3
+    else:
+        # Send to the external converter API
+        url = constants.AUDIO_CONVERTER_ENDPOINT
+        files = {"file": (audio_file.name, audio_file, "audio/mpeg")}
+        with st.spinner("Converting audio to MP3... Please wait."):
+            response = requests.post(url, files=files)
+        if response.status_code == 200:
+            # If conversion is successful, save and return the MP3 file
+            converted_file = io.BytesIO(response.content)
+            converted_file.name = "converted.mp3"
+            return converted_file, True  # File was converted
+        else:
+            st.error("Conversion failed. Please try another format.")
+            return None, None
+# Streamlit UI
+st.markdown(
+    "<h1 style='text-align: center;'>AI Video Generator</h1>",
+    unsafe_allow_html=True
+)
+# Upload audio file
+audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
+if audio_file:
+    # Reset states when a new file is uploaded
+    if st.session_state.uploaded_file_name != audio_file.name:
+        st.session_state.uploaded_file_name = audio_file.name
+        st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file)
+        st.session_state.transcript = None
+        st.session_state.translation = None
+        st.session_state.generated_video = None  # Reset video generation state
+    # Display uploaded file name
+    st.info(f"Uploaded file: **{audio_file.name}**")
+    if st.session_state.converted_audio:
+        if not st.session_state.was_converted:
+            st.success("🎧 The uploaded file is already in MP3 format.")
+        else:
+            st.success("✅ File successfully converted to MP3!")
+        # Save the file temporarily if no transcript exists
+        if st.session_state.transcript is None:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+                tmp_file.write(st.session_state.converted_audio.read())
+                tmp_file_path = tmp_file.name
+            result = st.session_state.client.predict(
+                param_0=handle_file(tmp_file_path),
+                api_name="/predict"
+            )
+            st.session_state.transcript = clean_response(result)
+            # Clean up temporary file
+            os.remove(tmp_file_path)
+        # Ensure translation is always generated after transcription
+        if st.session_state.transcript and st.session_state.translation is None:
+            with st.spinner("Generating translation..."):
+                st.session_state.translation = get_translation(st.session_state.transcript)
+        # Display and allow playback of the MP3 file
+        st.audio(st.session_state.converted_audio, format="audio/mp3")
+        # Toggle to show or hide the transcript
+        toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible)
+        if toggle_transcript:
+            st.session_state.transcript_visible = True
+            st.write("### Transcription:")
+            st.write(st.session_state.transcript)
+        else:
+            st.session_state.transcript_visible = False
+        # Toggle to show or hide the translation
+        toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible)
+        if toggle_translation:
+            st.session_state.translation_visible = True
+            st.write("### Translation:")
+            st.write(st.session_state.translation)
+        else:
+            st.session_state.translation_visible = False
+        # Image prompts - generated once translation is available
+        if st.session_state.translation:
+            st.write("### Image Prompts")
+            result = get_image_prompts(st.session_state.translation)
+            for prompt in result['image_prompts']:
+                st.write(prompt)
+            # Generate images for prompts
+            images_folder = generate_images(result['image_prompts'])
+            # Generate the video based on the images and translation
+            if images_folder:
+                st.write("### Generating Video...")
+                with st.spinner("Creating video..."):
+                    video_file = generate_video(images_folder, st.session_state.translation)
+                    if video_file:
+                        st.session_state.generated_video = video_file
+                        st.video(video_file)  # Display the video
+                    else:
+                        st.error("Failed to generate the video.")
+else:
+    # If no file is uploaded yet
+    st.warning("Please upload an audio file to proceed.")

constants.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from dotenv import load_dotenv
+import os
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN", None)
+AUDIO_CONVERTER_ENDPOINT="https://audio-converter-api-587c.onrender.com/convert/mp3"
+TRANSLATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/generate"
+PROMPT_GENERATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/get-image-prompts"
+IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
+# Supported formats
+SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]

env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ HF_TOKEN=HUGGING_FACE_TOKEN

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+python-dotenv==1.0.1
+streamlit==1.41.1
+gradio_client==1.5.2

utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import requests
+import constants
+import os
+from PIL import Image
+from gradio_client import Client
+def clean_response(result):
+    """A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string
+    but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life   <- like this the class name still remains
+    in the response, ideally which should have started from "sometimes..." as in the given example  """
+    # Use find() to get the position of the start and end of the text
+    start_pos = result.find('text="') + len('text="')  # Start after 'text="'
+    end_pos = result.find('", chunks=None')  # End before '", chunks=None'
+    # Extract the text using slicing
+    cleaned_result = result[start_pos:end_pos]
+    return cleaned_result
+def get_translation(text: str):
+    # Input payload
+    params = {"text": text}
+    # Headers for authentication
+    headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}
+    try:
+        # Make a GET request
+        response = requests.get(constants.TRANSLATION_ENDPOINT, params=params, headers=headers)
+        # Process response
+        if response.status_code == 200:
+            response_data = response.json()
+            return response_data.get("output", "No output found.")
+        else:
+            print(f"Error: {response.status_code}, {response.text}")
+            return None
+    except Exception as e:
+        print(f"An exception occurred: {e}")
+        return None
+def get_image_prompts(text_input):
+    headers = {
+        "Authorization": f"Bearer {constants.HF_TOKEN}",  # Replace with your token
+        "Content-Type": "application/json"  # Optional, ensures JSON payload
+    }
+    endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}"
+    payload = {"text_input": text_input}
+    try:
+            # Send the POST request
+        print("making post request for image prompts", endpoint)
+        response = requests.post(endpoint, json=payload, headers=headers)
+        # Raise an exception for HTTP errors
+        response.raise_for_status()
+        # Parse JSON response
+        result = response.json()
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"Error during request: {e}")
+        return {"error": str(e)}
+def generate_image(prompt, path='test_image.png'):
+    try:
+        # Initialize the Gradio Client with Hugging Face token
+        client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
+        # Make the API request
+        result = client.predict(
+            param_0=prompt,  # Text prompt for image generation
+            api_name="/predict"
+        )
+        image = Image.open(result)
+        image.save(path)
+        # Return the result (which includes the URL or file path)
+        return result
+    except Exception as e:
+        print(f"Error during image generation: {e}")
+        return {"error": str(e)}
+def generate_images(image_prompts, folder_name='test_folder'):
+    folder_path = tmp_folder(folder_name)
+    for index, prompt in enumerate(image_prompts):
+        print(index, prompt)
+        generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
+    return folder_path
+def tmp_folder(folder_name: str) -> str:
+    # Use the current working directory or any other accessible path for temp folders
+    base_tmp_path = os.path.join(os.getcwd(), "tmp_dir")  # Change this to any path you prefer
+    # Ensure that the base temp folder exists
+    if not os.path.exists(base_tmp_path):
+        os.makedirs(base_tmp_path)
+        print(f"Base temporary folder '{base_tmp_path}' created.")
+    # Define the path for the specific temporary folder
+    folder_path = os.path.join(base_tmp_path, folder_name)
+    # Create the specific temporary folder if it doesn't exist
+    os.makedirs(folder_path, exist_ok=True)
+    print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
+    return folder_path
+def generate_video(image_folder, audio):
+    return os.path.join(os.getcwd(), "test.mp4")
+# Example usage:
+if __name__ == "__main__":
+    result = generate_images(["a guy in jungle", "a waterfall","greenery"])
+    print(result,'is the result')