hivecorp commited on
Commit
513a56f
·
verified ·
1 Parent(s): 0692426

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -80
app.py CHANGED
@@ -1,92 +1,55 @@
1
  import gradio as gr
2
- import edge_tts
3
- import asyncio
4
- import tempfile
5
- import os
6
- from moviepy.editor import concatenate_videoclips, AudioFileClip, ImageClip, VideoFileClip
7
 
8
- # Get all available voices
9
- async def get_voices():
10
- voices = await edge_tts.list_voices()
11
- return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
12
 
13
- # Text-to-speech function
14
- async def text_to_speech(text, voice, rate, pitch):
15
- if not text.strip():
16
- return None, gr.Warning("Please enter the text to convert.")
17
- if not voice:
18
- return None, gr.Warning("Please select a voice.")
19
-
20
- voice_short_name = voice.split(" - ")[0]
21
- rate_str = f"{rate:+d}%"
22
- pitch_str = f"{pitch:+d}Hz"
23
- communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
24
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
25
- tmp_path = tmp_file.name
26
- await communicate.save(tmp_path)
27
- return tmp_path, None
28
-
29
- # Text-to-video function
30
  def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
31
- # Generate audio from text
32
- audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
33
- if warning:
34
- return None, warning
35
 
36
- audio_clip = AudioFileClip(audio)
37
-
38
- # Check if bg_media is None
39
- if bg_media is None:
40
- return None, gr.Warning("Please upload a background image or video.")
41
-
42
- # Create background video or image
43
  if bg_media.endswith('.mp4'):
44
- bg_clip = VideoFileClip(bg_media).resize(newsize=(video_width, video_height)).set_duration(audio_clip.duration)
 
 
45
  else:
46
- bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration).resize(newsize=(video_width, video_height))
47
-
48
- # Set audio for the background
49
  final_video = bg_clip.set_audio(audio_clip)
50
 
51
- final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4")
52
- final_video.write_videofile(final_video_path, fps=24, codec="libx264")
53
 
54
- return final_video_path, None
 
55
 
56
- # Gradio interface function
57
- def tts_interface(text, voice, rate, pitch, bg_media, video_width, video_height):
58
- video, warning = text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height)
59
- return None, video, warning
60
 
61
- # Create Gradio app
62
- async def create_demo():
63
- voices = await get_voices()
64
-
65
- demo = gr.Interface(
66
- fn=tts_interface,
67
- inputs=[
68
- gr.Textbox(label="Input Text", lines=5),
69
- gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
70
- gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
71
- gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
72
- gr.File(label="Upload Background Image or Video", type="filepath"),
73
- gr.Slider(minimum=640, maximum=1920, value=1080, label="Video Width", step=10),
74
- gr.Slider(minimum=480, maximum=1080, value=720, label="Video Height", step=10),
75
- ],
76
- outputs=[
77
- gr.Audio(label="Generated Audio", type="filepath"),
78
- gr.Video(label="Generated Video"),
79
- gr.Markdown(label="Warning", visible=False)
80
- ],
81
- title="Edge TTS Text to Speech and Video",
82
- description="Convert text to speech and video using Microsoft Edge TTS. Upload an image or video for the background.",
83
- analytics_enabled=False,
84
- allow_flagging=False,
85
- )
86
-
87
- return demo
88
-
89
- # Run the application
90
- if __name__ == "__main__":
91
- demo = asyncio.run(create_demo())
92
- demo.launch(share=True) # Set share=True for public link
 
1
  import gradio as gr
2
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
 
 
 
 
3
 
4
+ # Function to generate audio from text (placeholder)
5
+ def generate_audio(text, voice, rate, pitch):
6
+ # This should generate the audio and return an AudioFileClip
7
+ # Implement your audio generation logic here
8
+ pass
9
 
10
+ # Function to create video from text and background media
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
12
+ # Generate the audio clip
13
+ audio_clip = generate_audio(text, voice, rate, pitch) # Ensure this function is defined to generate audio
 
 
14
 
15
+ # Determine the type of background media and create the appropriate clip
 
 
 
 
 
 
16
  if bg_media.endswith('.mp4'):
17
+ bg_clip = VideoFileClip(bg_media).set_duration(audio_clip.duration)
18
+ elif bg_media.endswith(('.jpg', '.png', '.jpeg')):
19
+ bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration)
20
  else:
21
+ return None, "Unsupported media type."
22
+
23
+ # Create a final video with audio
24
  final_video = bg_clip.set_audio(audio_clip)
25
 
26
+ # Set the final output video file name
27
+ output_file = "output_video.mp4"
28
 
29
+ # Write the final video to a file
30
+ final_video.write_videofile(output_file, codec='libx264')
31
 
32
+ return output_file, None
 
 
 
33
 
34
+ # Gradio interface
35
+ def tts_interface(text, voice, rate, pitch, bg_media):
36
+ video, warning = text_to_video(text, voice, rate, pitch, bg_media, None, None)
37
+ if warning:
38
+ return warning
39
+ return video
40
+
41
+ iface = gr.Interface(
42
+ fn=tts_interface,
43
+ inputs=[
44
+ gr.Textbox(label="Text"),
45
+ gr.Dropdown(label="Voice", choices=["Voice 1", "Voice 2"]), # Update with actual voices
46
+ gr.Slider(label="Rate", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
47
+ gr.Slider(label="Pitch", minimum=0, maximum=100, step=1, value=50),
48
+ gr.File(label="Background Media (Image/Video)")
49
+ ],
50
+ outputs="file",
51
+ title="Text to Video with Audio",
52
+ description="Upload an image or video and generate a video with audio from text."
53
+ )
54
+
55
+ iface.launch(share=True)