Spaces:
Runtime error
Runtime error
File size: 5,401 Bytes
9e41260 889e1fa d011be0 889e1fa 7bc6887 889e1fa 7bc6887 889e1fa 8437f06 f1779f5 7a5c01c f1779f5 7a5c01c fbe2197 889e1fa f1779f5 ff60c7c 3ae4583 7a5c01c 889e1fa f1779f5 ff60c7c 7a5c01c 3ae4583 889e1fa ed67ff5 889e1fa 7bc6887 8437f06 f1779f5 12ef859 889e1fa 12ef859 d011be0 7a5c01c f1779f5 7a5c01c f1779f5 7bf74d5 12ef859 889e1fa 7bc6887 8437f06 889e1fa 7bc6887 889e1fa d011be0 8437f06 fbe2197 fef1314 d011be0 fef1314 d011be0 8437f06 d011be0 fef1314 889e1fa 7bc6887 889e1fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import AudioFileClip
# Get all available voices
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Text to speech functionality
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, gr.Warning("Please enter the text to convert.")
if not voice:
return None, gr.Warning("Please select a voice.")
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# Generate SRT file based on user preferences
def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
total_words = len(words)
# Calculate how long each segment will be displayed
segment_duration = audio_duration / (total_words // words_per_line // lines_per_paragraph) # Calculate duration based on total segments
current_time = 0
with open(srt_path, 'w', encoding='utf-8') as srt_file:
for i in range(0, total_words, words_per_line):
# Gather lines based on the defined words per line
lines = words[i:i + words_per_line]
line_text = ' '.join(lines)
start_time = current_time
end_time = min(start_time + segment_duration, audio_duration) # Ensure it doesn't exceed audio duration
start_time_str = format_srt_time(start_time)
end_time_str = format_srt_time(end_time)
srt_file.write(f"{(i // words_per_line) + 1}\n{start_time_str} --> {end_time_str}\n")
srt_file.write(f"{line_text}\n\n")
current_time += segment_duration # Update current time for the next segment
return srt_path
def format_srt_time(seconds):
millis = int((seconds - int(seconds)) * 1000)
seconds = int(seconds)
minutes = seconds // 60
hours = seconds // 3600
minutes %= 60
seconds %= 60
return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
# Text to audio and SRT functionality
async def text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
# Clean up input text: remove extra spaces and newlines
cleaned_text = ' '.join(text.split())
audio_path, warning = await text_to_speech(cleaned_text, voice, rate, pitch)
if warning:
return None, None, warning
audio_clip = AudioFileClip(audio_path)
audio_duration = audio_clip.duration
# Generate SRT file based on the entire text
base_name = os.path.splitext(audio_path)[0]
srt_path = f"{base_name}_subtitle.srt"
# Split input text into words
words = cleaned_text.split()
generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph)
return audio_path, srt_path, None
# Gradio interface function
def tts_interface(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph))
return audio_path, srt_path, warning
# Create Gradio app
async def create_demo():
voices = await get_voices()
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1 style="text-align: center; color: #333;">Text to Speech with Subtitles</h1>
<p style="text-align: center; color: #555;">Convert your text to natural-sounding speech and generate subtitles (SRT) for your audio.</p>
""",
elem_id="header"
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here...")
voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
words_per_line = gr.Slider(minimum=3, maximum=8, value=5, label="Words per Line", step=1)
lines_per_paragraph = gr.Slider(minimum=1, maximum=5, value=2, label="Lines per Paragraph", step=1)
generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
with gr.Column():
output_audio = gr.Audio(label="Generated Audio", type="filepath")
output_srt = gr.File(label="Generated SRT", file_count="single")
warning_msg = gr.Markdown(label="Warning", visible=False)
generate_button.click(
fn=tts_interface,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider, words_per_line, lines_per_paragraph],
outputs=[output_audio, output_srt, warning_msg]
)
return demo
# Run the app
if __name__ == "__main__":
demo = asyncio.run(create_demo())
demo.launch()
|