Spaces:

hivecorp
/

text-to-video

Runtime error

App Files Files Community

hivecorp commited on Oct 20, 2024

Commit

8437f06

verified ·

1 Parent(s): 7b61720

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -16

app.py CHANGED Viewed

@@ -26,25 +26,24 @@ async def text_to_speech(text, voice, rate, pitch):
         await communicate.save(tmp_path)
     return tmp_path, None
-# Generate SRT file with 2 lines of subtitles containing 14-16 words total
-def generate_srt(words, audio_duration, srt_path):
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
-        segment_duration = audio_duration / (len(words) // 16)  # Average duration for each 16 words
         current_time = 0
-        for i in range(0, len(words), 22):  # Every 16 words for two lines
-            line1 = ' '.join(words[i:i + 10])  # First line with up to 8 words
-            line2 = ' '.join(words[i + 12:i + 22])  # Second line with the next 8 words
-            if line2.strip() == '':
-                line2 = ''  # Handle case where there aren't enough words for the second line
             start_time = current_time
-            end_time = start_time + segment_duration  # Adjust duration for one line
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
-            srt_file.write(f"{i // 22 + 1}\n{start_time_str} --> {end_time_str}\n{line1}\n{line2}\n\n")
             current_time += segment_duration  # Update current time for the next segment
@@ -60,7 +59,7 @@ def format_srt_time(seconds):
     return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
 # Text to audio and SRT functionality
-async def text_to_audio_and_srt(text, voice, rate, pitch):
     audio_path, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, warning
@@ -72,13 +71,13 @@ async def text_to_audio_and_srt(text, voice, rate, pitch):
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"
     words = text.split()
-    generate_srt(words, audio_duration, srt_path)
     return audio_path, srt_path, None
 # Gradio interface function
-def tts_interface(text, voice, rate, pitch):
-    audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch))
     return audio_path, srt_path, warning
 # Create Gradio app
@@ -100,6 +99,9 @@ async def create_demo():
                 voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
                 rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
                 pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
                 generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
@@ -110,7 +112,7 @@ async def create_demo():
         generate_button.click(
             fn=tts_interface,
-            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
             outputs=[output_audio, output_srt, warning_msg]
         )

         await communicate.save(tmp_path)
     return tmp_path, None
+# Generate SRT file based on user preferences
+def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
+        segment_duration = audio_duration / (len(words) // (words_per_line * lines_per_paragraph))  # Average duration for each segment
         current_time = 0
+        for i in range(0, len(words), words_per_line * lines_per_paragraph):  # Every segment according to specified preferences
+            segment_words = words[i:i + (words_per_line * lines_per_paragraph)]
+            lines = [segment_words[j:j + words_per_line] for j in range(0, len(segment_words), words_per_line)]
+            lines = [' '.join(line) for line in lines]
             start_time = current_time
+            end_time = start_time + segment_duration
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
+            srt_file.write(f"{i // (words_per_line * lines_per_paragraph) + 1}\n{start_time_str} --> {end_time_str}\n")
+            srt_file.write('\n'.join(lines) + '\n\n')
             current_time += segment_duration  # Update current time for the next segment
     return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
 # Text to audio and SRT functionality
+async def text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
     audio_path, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, warning
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"
     words = text.split()
+    generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph)
     return audio_path, srt_path, None
 # Gradio interface function
+def tts_interface(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
+    audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph))
     return audio_path, srt_path, warning
 # Create Gradio app
                 voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
                 rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
                 pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
+                words_per_line = gr.Slider(minimum=8, maximum=10, value=8, label="Words per Line", step=1)
+                lines_per_paragraph = gr.Slider(minimum=1, maximum=4, value=2, label="Lines per Paragraph", step=1)
                 generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
         generate_button.click(
             fn=tts_interface,
+            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider, words_per_line, lines_per_paragraph],
             outputs=[output_audio, output_srt, warning_msg]
         )