hivecorp commited on
Commit
077e0e7
·
verified ·
1 Parent(s): 85eaa57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -85
app.py CHANGED
@@ -1,92 +1,108 @@
1
- import gradio as gr
2
- from pydub import AudioSegment
3
  import edge_tts
 
4
  import os
5
- import asyncio
6
-
7
- # Function to get the length of an audio file in seconds
8
- def get_audio_length(audio_file):
9
- audio = AudioSegment.from_file(audio_file)
10
- return audio.duration_seconds
11
-
12
- # Function to format time for SRT
13
- def format_time(seconds):
14
- millis = int((seconds % 1) * 1000)
15
- seconds = int(seconds)
16
- hrs = seconds // 3600
17
- mins = (seconds % 3600) // 60
18
- secs = seconds % 60
19
- return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
 
21
- # Function to generate SRT with accurate timing per batch
22
- async def generate_accurate_srt(batch_text, batch_num, start_offset):
23
- audio_file = f"batch_{batch_num}_audio.wav"
 
 
 
 
 
 
 
 
24
 
25
- # Generate the audio using edge-tts
26
- tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
27
- await tts.save(audio_file)
28
-
29
- # Get the actual length of the audio file
30
- actual_length = get_audio_length(audio_file)
31
-
32
- # Initialize SRT content
33
- srt_content = ""
34
- words = batch_text.split()
35
- segment_duration = actual_length / len(words) * 10 # Adjusted for ~10 words per SRT segment
36
- start_time = start_offset
37
-
38
- # Build SRT content with accurate timing
39
- for i in range(0, len(words), 10):
40
- segment_words = words[i:i+10]
41
- end_time = start_time + segment_duration
42
- srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
43
- srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
- srt_content += " ".join(segment_words) + "\n\n"
45
- start_time = end_time
46
-
47
- return srt_content, audio_file, start_time
48
-
49
- # Batch processing function for SRT and audio generation
50
- async def batch_process_srt_and_audio(script_text):
51
- batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
- all_srt_content = ""
53
- combined_audio = AudioSegment.empty()
54
- start_offset = 0.0 # Track cumulative time offset for SRT timing
55
-
56
- for batch_num, batch_text in enumerate(batches):
57
- srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
58
- all_srt_content += srt_content
59
-
60
- # Append the audio of each batch to the combined audio
61
- batch_audio = AudioSegment.from_file(audio_file)
62
- combined_audio += batch_audio
63
- start_offset = end_offset # Update the start offset for the next batch
64
-
65
- # Clean up the individual batch audio file
66
- os.remove(audio_file)
67
-
68
- # Export combined audio and SRT
69
- combined_audio.export("final_audio.wav", format="wav")
70
- with open("final_subtitles.srt", "w") as srt_file:
71
- srt_file.write(all_srt_content)
72
-
73
- return "final_subtitles.srt", "final_audio.wav"
74
 
75
- # Gradio interface function
76
- async def process_script(script_text):
77
- srt_path, audio_path = await batch_process_srt_and_audio(script_text)
78
- return srt_path, audio_path, audio_path
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- # Gradio interface setup
81
- app = gr.Interface(
82
- fn=process_script,
83
- inputs=gr.Textbox(label="Enter Script Text", lines=10),
84
- outputs=[
85
- gr.File(label="Download SRT File"),
86
- gr.File(label="Download Audio File"),
87
- gr.Audio(label="Play Audio")
88
- ],
89
- description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
90
- )
91
 
92
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import edge_tts
2
+ import srt
3
  import os
4
+ import wave
5
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Function to calculate audio duration for a given audio file
8
+ def get_audio_length(audio_path):
9
+ with wave.open(audio_path, 'rb') as audio:
10
+ frames = audio.getnframes()
11
+ rate = audio.getframerate()
12
+ return frames / float(rate)
13
+
14
+ # Function to generate SRT entries for a batch of text with accurate timing
15
+ def generate_accurate_srt(text, start_time, batch_index):
16
+ srt_entries = []
17
+ current_time = start_time
18
 
19
+ for line in text.splitlines():
20
+ # Estimate duration of each line based on audio segment generated
21
+ duration = len(line.split()) * 0.3 # Assuming approx. 0.3 seconds per word
22
+ end_time = current_time + duration
23
+
24
+ srt_entries.append(
25
+ srt.Subtitle(
26
+ index=batch_index,
27
+ start=srt.timedelta(seconds=current_time),
28
+ end=srt.timedelta(seconds=end_time),
29
+ content=line
30
+ )
31
+ )
32
+ current_time = end_time
33
+ batch_index += 1
34
+ return srt_entries, current_time
35
+
36
+ # Process each batch of text, generate audio, and accumulate SRT entries
37
+ def batch_process_srt_and_audio(script_text, batch_size=500):
38
+ total_srt_entries = []
39
+ cumulative_time = 0.0
40
+ batch_index = 1
41
+
42
+ for i in range(0, len(script_text), batch_size):
43
+ batch_text = script_text[i:i+batch_size]
44
+
45
+ # Generate audio for the batch
46
+ audio_file = f"audio_batch_{i}.wav"
47
+ communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
48
+ communicate.save(audio_file)
49
+
50
+ # Get the duration of the generated audio batch
51
+ batch_duration = get_audio_length(audio_file)
52
+
53
+ # Generate SRT entries for this batch and update cumulative time
54
+ srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
55
+
56
+ total_srt_entries.extend(srt_entries)
57
+ batch_index += len(srt_entries)
58
+
59
+ # Write the SRT file
60
+ srt_file = "output.srt"
61
+ with open(srt_file, 'w') as file:
62
+ file.write(srt.compose(total_srt_entries))
63
+
64
+ return srt_file
 
 
 
65
 
66
+ # Final validation to ensure no SRT entry extends beyond total audio duration
67
+ def validate_srt_against_audio(srt_file_path, audio_file_path):
68
+ audio_duration = get_audio_length(audio_file_path)
69
+
70
+ with open(srt_file_path, 'r') as file:
71
+ subtitles = list(srt.parse(file.read()))
72
+
73
+ for subtitle in subtitles:
74
+ if subtitle.end.total_seconds() > audio_duration:
75
+ subtitle.end = srt.timedelta(seconds=audio_duration)
76
+ break
77
+
78
+ # Write the validated SRT back to the file
79
+ with open(srt_file_path, 'w') as file:
80
+ file.write(srt.compose(subtitles))
81
 
82
+ return srt_file_path
 
 
 
 
 
 
 
 
 
 
83
 
84
+ # Gradio Interface
85
+ def process_text_to_srt(script_text):
86
+ # Process the script in batches and create SRT
87
+ srt_file = batch_process_srt_and_audio(script_text)
88
+
89
+ # Validate the final SRT file with the complete audio file
90
+ final_audio_file = "combined_audio.wav" # Assumes you have a combined final audio file
91
+ validate_srt_against_audio(srt_file, final_audio_file)
92
+
93
+ return srt_file, final_audio_file
94
+
95
+ # Gradio app setup
96
+ def main():
97
+ gr.Interface(
98
+ fn=process_text_to_srt,
99
+ inputs="textbox",
100
+ outputs=["file", "audio"],
101
+ live=True,
102
+ title="Text-to-SRT with Accurate Timing",
103
+ description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
104
+ ).launch()
105
+
106
+ # Run the app
107
+ if __name__ == "__main__":
108
+ main()