Spaces:

hivecorp
/

text-to-video

Runtime error

File size: 4,299 Bytes

9e41260
889e1fa
 
 
 
7bf74d5
385d7a8
 
 
889e1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385d7a8
3ae4583
889e1fa
 
3ae4583
 
 
889e1fa
 
 
 
 
3ae4583
 
 
 
889e1fa
 
 
 
 
 
 
 
 
 
 
 
 
3ae4583
889e1fa
 
3ae4583
 
385d7a8
 
3ae4583
 
889e1fa
 
3ae4583
889e1fa
 
3ae4583
889e1fa
7bf74d5
 
 
 
 
889e1fa
3ae4583
7bf74d5
889e1fa

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import AudioFileClip, concatenate_audioclips
from wand.image import Image
from wand.drawing import Drawing
from wand.color import Color

# 获取所有可用的语音
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# 文字转语音功能
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, gr.Warning("Please enter the text to convert.")
    if not voice:
        return None, gr.Warning("Please select a voice.")
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

# 生成SRT文件
def generate_srt(words, audio_clips):
    srt_path = os.path.join(tempfile.gettempdir(), "output_subtitles.srt")
    with open(srt_path, 'w', encoding='utf-8') as srt_file:
        current_time = 0
        for i, (word, audio_clip) in enumerate(zip(words, audio_clips)):
            start_time = current_time
            end_time = start_time + audio_clip.duration
            
            # Convert to SRT format
            start_time_str = format_srt_time(start_time)
            end_time_str = format_srt_time(end_time)
            srt_file.write(f"{i + 1}\n{start_time_str} --> {end_time_str}\n{word}\n\n")
            
            current_time += audio_clip.duration  # Update current time

    return srt_path

def format_srt_time(seconds):
    millis = int((seconds - int(seconds)) * 1000)
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes %= 60
    seconds %= 60
    return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"

# 文字转音频和SRT功能
async def text_to_audio_and_srt(text, voice, rate, pitch):
    # Split text into words
    words = text.split()
    audio_clips = []
    subtitle_chunks = []

    # Generate audio for each chunk of 3 words
    for i in range(0, len(words), 3):  # Adjust chunk size if needed
        chunk = ' '.join(words[i:i + 3])  # Create a chunk of 3 words
        audio, warning = await text_to_speech(chunk, voice, rate, pitch)
        if warning:
            return None, None, warning
        
        audio_clip = AudioFileClip(audio)
        audio_clips.append(audio_clip)
        subtitle_chunks.append(chunk)

    # Concatenate all audio clips
    final_audio_path = os.path.join(tempfile.gettempdir(), "output_audio.mp3")
    final_audio = concatenate_audioclips(audio_clips)
    final_audio.write_audiofile(final_audio_path)

    # 生成SRT文件
    srt_path = generate_srt(subtitle_chunks, audio_clips)
    return final_audio_path, srt_path, None

# Gradio接口函数
def tts_interface(text, voice, rate, pitch):
    audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch))
    return audio_path, srt_path, warning

# 创建Gradio应用
async def create_demo():
    voices = await get_voices()
    
    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.File(label="Generated SRT", file_count="single"),
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text to Speech with Subtitles",
        description="Convert text to speech and generate subtitles (SRT) using Microsoft Edge TTS.",
        analytics_enabled=False,
        allow_flagging=False,
    )
    
    return demo

# 运行应用
if __name__ == "__main__":
    demo = asyncio.run(create_demo())
    demo.launch()