Spaces:

hivecorp
/

text-to-video

Runtime error

App Files Files Community

hivecorp commited on Oct 20, 2024

Commit

d011be0

verified ·

1 Parent(s): 12ef859

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -25

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import edge_tts
 import asyncio
 import tempfile
 import os
-from moviepy.editor import AudioFileClip, concatenate_audioclips
 # 获取所有可用的语音
 async def get_voices():
@@ -27,10 +27,8 @@ async def text_to_speech(text, voice, rate, pitch):
     return tmp_path, None
 # 生成SRT文件
-def generate_srt(words, audio_duration, fps=24):
-    srt_path = os.path.join(tempfile.gettempdir(), "output_subtitles.srt")
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
-        # Split audio duration into segments for SRT
         segment_duration = audio_duration / len(words)  # Average duration per word
         current_time = 0
@@ -38,7 +36,6 @@ def generate_srt(words, audio_duration, fps=24):
             start_time = current_time
             end_time = start_time + segment_duration
-            # Convert to SRT format
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
             srt_file.write(f"{i + 1}\n{start_time_str} --> {end_time_str}\n{word}\n\n")
@@ -58,7 +55,6 @@ def format_srt_time(seconds):
 # 文字转音频和SRT功能
 async def text_to_audio_and_srt(text, voice, rate, pitch):
-    # Generate audio for the entire text
     audio_path, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, warning
@@ -67,8 +63,10 @@ async def text_to_audio_and_srt(text, voice, rate, pitch):
     audio_duration = audio_clip.duration
     # Generate SRT file based on the entire text
     words = text.split()
-    srt_path = generate_srt(words, audio_duration)
     return audio_path, srt_path, None
@@ -81,24 +79,34 @@ def tts_interface(text, voice, rate, pitch):
 async def create_demo():
     voices = await get_voices()
-    demo = gr.Interface(
-        fn=tts_interface,
-        inputs=[
-            gr.Textbox(label="Input Text", lines=5),
-            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
-            gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
-            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
-        ],
-        outputs=[
-            gr.Audio(label="Generated Audio", type="filepath"),
-            gr.File(label="Generated SRT", file_count="single"),
-            gr.Markdown(label="Warning", visible=False)
-        ],
-        title="Edge TTS Text to Speech with Subtitles",
-        description="Convert text to speech and generate subtitles (SRT) using Microsoft Edge TTS.",
-        analytics_enabled=False,
-        allow_flagging=False,
-    )
     return demo

 import asyncio
 import tempfile
 import os
+from moviepy.editor import AudioFileClip
 # 获取所有可用的语音
 async def get_voices():
     return tmp_path, None
 # 生成SRT文件
+def generate_srt(words, audio_duration, srt_path, fps=24):
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
         segment_duration = audio_duration / len(words)  # Average duration per word
         current_time = 0
             start_time = current_time
             end_time = start_time + segment_duration
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
             srt_file.write(f"{i + 1}\n{start_time_str} --> {end_time_str}\n{word}\n\n")
 # 文字转音频和SRT功能
 async def text_to_audio_and_srt(text, voice, rate, pitch):
     audio_path, warning = await text_to_speech(text, voice, rate, pitch)
     if warning:
         return None, None, warning
     audio_duration = audio_clip.duration
     # Generate SRT file based on the entire text
+    base_name = os.path.splitext(audio_path)[0]
+    srt_path = f"{base_name}_subtitle.srt"
     words = text.split()
+    generate_srt(words, audio_duration, srt_path)
     return audio_path, srt_path, None
 async def create_demo():
     voices = await get_voices()
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            <h1 style="text-align: center; color: #333;">Text to Speech with Subtitles</h1>
+            <p style="text-align: center; color: #555;">Convert your text to natural-sounding speech and generate subtitles (SRT) for your audio.</p>
+            """,
+            elem_id="header"
+        )
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here...")
+                voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
+                rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
+                pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
+            with gr.Column():
+                generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
+        output_audio = gr.Audio(label="Generated Audio", type="filepath")
+        output_srt = gr.File(label="Generated SRT", file_count="single")
+        warning_msg = gr.Markdown(label="Warning", visible=False)
+        generate_button.click(
+            fn=tts_interface,
+            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
+            outputs=[output_audio, output_srt, warning_msg]
+        )
     return demo