text-to-video / app.py
hivecorp's picture
Update app.py
513a56f verified
raw
history blame
2.06 kB
import gradio as gr
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
# Function to generate audio from text (placeholder)
def generate_audio(text, voice, rate, pitch):
# This should generate the audio and return an AudioFileClip
# Implement your audio generation logic here
pass
# Function to create video from text and background media
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
# Generate the audio clip
audio_clip = generate_audio(text, voice, rate, pitch) # Ensure this function is defined to generate audio
# Determine the type of background media and create the appropriate clip
if bg_media.endswith('.mp4'):
bg_clip = VideoFileClip(bg_media).set_duration(audio_clip.duration)
elif bg_media.endswith(('.jpg', '.png', '.jpeg')):
bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration)
else:
return None, "Unsupported media type."
# Create a final video with audio
final_video = bg_clip.set_audio(audio_clip)
# Set the final output video file name
output_file = "output_video.mp4"
# Write the final video to a file
final_video.write_videofile(output_file, codec='libx264')
return output_file, None
# Gradio interface
def tts_interface(text, voice, rate, pitch, bg_media):
video, warning = text_to_video(text, voice, rate, pitch, bg_media, None, None)
if warning:
return warning
return video
iface = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Text"),
gr.Dropdown(label="Voice", choices=["Voice 1", "Voice 2"]), # Update with actual voices
gr.Slider(label="Rate", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
gr.Slider(label="Pitch", minimum=0, maximum=100, step=1, value=50),
gr.File(label="Background Media (Image/Video)")
],
outputs="file",
title="Text to Video with Audio",
description="Upload an image or video and generate a video with audio from text."
)
iface.launch(share=True)