wower99 commited on
Commit
c14d84c
·
1 Parent(s): f84d1b6

first commit

Browse files
Files changed (6) hide show
  1. .gitignore +162 -0
  2. app.py +147 -0
  3. constants.py +15 -0
  4. env.example +1 -0
  5. requirements.txt +3 -0
  6. utils.py +132 -0
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ venv/
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ # db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import io
4
+ from gradio_client import Client, handle_file
5
+ import tempfile
6
+ import os
7
+ from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video # Import generate_video
8
+ import constants
9
+
10
+ # Initialize the client only once
11
+ if 'client' not in st.session_state:
12
+ st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN)
13
+
14
+ # Initialize state variables
15
+ if 'transcript_visible' not in st.session_state:
16
+ st.session_state.transcript_visible = False
17
+ if 'translation_visible' not in st.session_state:
18
+ st.session_state.translation_visible = False
19
+ if 'uploaded_file_name' not in st.session_state:
20
+ st.session_state.uploaded_file_name = None
21
+ if 'converted_audio' not in st.session_state:
22
+ st.session_state.converted_audio = None
23
+ if 'was_converted' not in st.session_state:
24
+ st.session_state.was_converted = False
25
+ if 'transcript' not in st.session_state:
26
+ st.session_state.transcript = None
27
+ if 'translation' not in st.session_state:
28
+ st.session_state.translation = None
29
+ if 'generated_video' not in st.session_state:
30
+ st.session_state.generated_video = None
31
+
32
+ # Function to convert the audio to MP3 using the external API
33
+ def convert_to_mp3(audio_file):
34
+ if audio_file.name.endswith(".mp3"):
35
+ return audio_file, False # File is already MP3
36
+ else:
37
+ # Send to the external converter API
38
+ url = constants.AUDIO_CONVERTER_ENDPOINT
39
+ files = {"file": (audio_file.name, audio_file, "audio/mpeg")}
40
+
41
+ with st.spinner("Converting audio to MP3... Please wait."):
42
+ response = requests.post(url, files=files)
43
+
44
+ if response.status_code == 200:
45
+ # If conversion is successful, save and return the MP3 file
46
+ converted_file = io.BytesIO(response.content)
47
+ converted_file.name = "converted.mp3"
48
+ return converted_file, True # File was converted
49
+ else:
50
+ st.error("Conversion failed. Please try another format.")
51
+ return None, None
52
+
53
+ # Streamlit UI
54
+ st.markdown(
55
+ "<h1 style='text-align: center;'>AI Video Generator</h1>",
56
+ unsafe_allow_html=True
57
+ )
58
+
59
+ # Upload audio file
60
+ audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
61
+
62
+ if audio_file:
63
+ # Reset states when a new file is uploaded
64
+ if st.session_state.uploaded_file_name != audio_file.name:
65
+ st.session_state.uploaded_file_name = audio_file.name
66
+ st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file)
67
+ st.session_state.transcript = None
68
+ st.session_state.translation = None
69
+ st.session_state.generated_video = None # Reset video generation state
70
+
71
+ # Display uploaded file name
72
+ st.info(f"Uploaded file: **{audio_file.name}**")
73
+
74
+ if st.session_state.converted_audio:
75
+ if not st.session_state.was_converted:
76
+ st.success("🎧 The uploaded file is already in MP3 format.")
77
+ else:
78
+ st.success("✅ File successfully converted to MP3!")
79
+
80
+ # Save the file temporarily if no transcript exists
81
+ if st.session_state.transcript is None:
82
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
83
+ tmp_file.write(st.session_state.converted_audio.read())
84
+ tmp_file_path = tmp_file.name
85
+
86
+ result = st.session_state.client.predict(
87
+ param_0=handle_file(tmp_file_path),
88
+ api_name="/predict"
89
+ )
90
+ st.session_state.transcript = clean_response(result)
91
+
92
+ # Clean up temporary file
93
+ os.remove(tmp_file_path)
94
+
95
+ # Ensure translation is always generated after transcription
96
+ if st.session_state.transcript and st.session_state.translation is None:
97
+ with st.spinner("Generating translation..."):
98
+ st.session_state.translation = get_translation(st.session_state.transcript)
99
+
100
+ # Display and allow playback of the MP3 file
101
+ st.audio(st.session_state.converted_audio, format="audio/mp3")
102
+
103
+ # Toggle to show or hide the transcript
104
+ toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible)
105
+
106
+ if toggle_transcript:
107
+ st.session_state.transcript_visible = True
108
+ st.write("### Transcription:")
109
+ st.write(st.session_state.transcript)
110
+ else:
111
+ st.session_state.transcript_visible = False
112
+
113
+ # Toggle to show or hide the translation
114
+ toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible)
115
+
116
+ if toggle_translation:
117
+ st.session_state.translation_visible = True
118
+ st.write("### Translation:")
119
+ st.write(st.session_state.translation)
120
+ else:
121
+ st.session_state.translation_visible = False
122
+
123
+ # Image prompts - generated once translation is available
124
+ if st.session_state.translation:
125
+ st.write("### Image Prompts")
126
+ result = get_image_prompts(st.session_state.translation)
127
+ for prompt in result['image_prompts']:
128
+ st.write(prompt)
129
+
130
+ # Generate images for prompts
131
+ images_folder = generate_images(result['image_prompts'])
132
+
133
+ # Generate the video based on the images and translation
134
+ if images_folder:
135
+ st.write("### Generating Video...")
136
+ with st.spinner("Creating video..."):
137
+ video_file = generate_video(images_folder, st.session_state.translation)
138
+ if video_file:
139
+ st.session_state.generated_video = video_file
140
+ st.video(video_file) # Display the video
141
+ else:
142
+ st.error("Failed to generate the video.")
143
+
144
+ else:
145
+ # If no file is uploaded yet
146
+ st.warning("Please upload an audio file to proceed.")
147
+
constants.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ load_dotenv()
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
7
+ AUDIO_CONVERTER_ENDPOINT="https://audio-converter-api-587c.onrender.com/convert/mp3"
8
+
9
+
10
+ TRANSLATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/generate"
11
+ PROMPT_GENERATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/get-image-prompts"
12
+ IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
13
+
14
+ # Supported formats
15
+ SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]
env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ HF_TOKEN=HUGGING_FACE_TOKEN
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ python-dotenv==1.0.1
2
+ streamlit==1.41.1
3
+ gradio_client==1.5.2
utils.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import constants
4
+ import os
5
+ from PIL import Image
6
+ from gradio_client import Client
7
+
8
+
9
+ def clean_response(result):
10
+ """A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string
11
+ but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life <- like this the class name still remains
12
+ in the response, ideally which should have started from "sometimes..." as in the given example """
13
+ # Use find() to get the position of the start and end of the text
14
+ start_pos = result.find('text="') + len('text="') # Start after 'text="'
15
+ end_pos = result.find('", chunks=None') # End before '", chunks=None'
16
+
17
+ # Extract the text using slicing
18
+ cleaned_result = result[start_pos:end_pos]
19
+
20
+ return cleaned_result
21
+
22
+
23
+ def get_translation(text: str):
24
+ # Input payload
25
+ params = {"text": text}
26
+
27
+ # Headers for authentication
28
+ headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}
29
+
30
+ try:
31
+ # Make a GET request
32
+ response = requests.get(constants.TRANSLATION_ENDPOINT, params=params, headers=headers)
33
+
34
+ # Process response
35
+ if response.status_code == 200:
36
+ response_data = response.json()
37
+ return response_data.get("output", "No output found.")
38
+ else:
39
+ print(f"Error: {response.status_code}, {response.text}")
40
+ return None
41
+ except Exception as e:
42
+ print(f"An exception occurred: {e}")
43
+ return None
44
+
45
+
46
+
47
+ def get_image_prompts(text_input):
48
+ headers = {
49
+ "Authorization": f"Bearer {constants.HF_TOKEN}", # Replace with your token
50
+ "Content-Type": "application/json" # Optional, ensures JSON payload
51
+ }
52
+
53
+ endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}"
54
+ payload = {"text_input": text_input}
55
+
56
+ try:
57
+ # Send the POST request
58
+ print("making post request for image prompts", endpoint)
59
+ response = requests.post(endpoint, json=payload, headers=headers)
60
+
61
+ # Raise an exception for HTTP errors
62
+ response.raise_for_status()
63
+
64
+ # Parse JSON response
65
+ result = response.json()
66
+ return result
67
+
68
+ except requests.exceptions.RequestException as e:
69
+ print(f"Error during request: {e}")
70
+ return {"error": str(e)}
71
+
72
+
73
+
74
+
75
+ def generate_image(prompt, path='test_image.png'):
76
+ try:
77
+ # Initialize the Gradio Client with Hugging Face token
78
+ client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
79
+
80
+ # Make the API request
81
+ result = client.predict(
82
+ param_0=prompt, # Text prompt for image generation
83
+ api_name="/predict"
84
+ )
85
+
86
+ image = Image.open(result)
87
+ image.save(path)
88
+
89
+ # Return the result (which includes the URL or file path)
90
+ return result
91
+
92
+ except Exception as e:
93
+ print(f"Error during image generation: {e}")
94
+ return {"error": str(e)}
95
+
96
+ def generate_images(image_prompts, folder_name='test_folder'):
97
+ folder_path = tmp_folder(folder_name)
98
+ for index, prompt in enumerate(image_prompts):
99
+ print(index, prompt)
100
+ generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
101
+ return folder_path
102
+
103
+
104
+
105
+ def tmp_folder(folder_name: str) -> str:
106
+ # Use the current working directory or any other accessible path for temp folders
107
+ base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer
108
+
109
+ # Ensure that the base temp folder exists
110
+ if not os.path.exists(base_tmp_path):
111
+ os.makedirs(base_tmp_path)
112
+ print(f"Base temporary folder '{base_tmp_path}' created.")
113
+
114
+ # Define the path for the specific temporary folder
115
+ folder_path = os.path.join(base_tmp_path, folder_name)
116
+
117
+ # Create the specific temporary folder if it doesn't exist
118
+ os.makedirs(folder_path, exist_ok=True)
119
+
120
+ print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
121
+
122
+ return folder_path
123
+
124
+
125
+ def generate_video(image_folder, audio):
126
+ return os.path.join(os.getcwd(), "test.mp4")
127
+
128
+
129
+ # Example usage:
130
+ if __name__ == "__main__":
131
+ result = generate_images(["a guy in jungle", "a waterfall","greenery"])
132
+ print(result,'is the result')