Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,37 +6,30 @@ import asyncio
|
|
6 |
import uuid
|
7 |
import re
|
8 |
|
9 |
-
# Function to get the length of an audio file in
|
10 |
def get_audio_length(audio_file):
|
11 |
audio = AudioSegment.from_file(audio_file)
|
12 |
-
return audio
|
13 |
-
|
14 |
-
# Function to format time for SRT
|
15 |
-
def
|
16 |
-
|
17 |
-
|
18 |
-
hrs =
|
19 |
-
mins
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
# Function to split text based on punctuation, handling segments over 8 words
|
24 |
def split_text_into_segments(text):
|
25 |
-
# Split based on punctuation marks (.!? and ,)
|
26 |
segments = []
|
27 |
raw_segments = re.split(r'([.!?,])', text)
|
28 |
-
|
29 |
-
|
30 |
for i in range(0, len(raw_segments) - 1, 2):
|
31 |
-
# Combine sentence with punctuation
|
32 |
sentence = raw_segments[i].strip() + raw_segments[i + 1]
|
33 |
words = sentence.split()
|
34 |
-
|
35 |
-
# If the sentence has 8 words or fewer, add as is
|
36 |
if len(words) <= 8:
|
37 |
segments.append(sentence.strip())
|
38 |
else:
|
39 |
-
# Split longer sentences into chunks of max 8 words without splitting words
|
40 |
chunk = ""
|
41 |
for word in words:
|
42 |
if len(chunk.split()) < 8:
|
@@ -47,7 +40,6 @@ def split_text_into_segments(text):
|
|
47 |
if chunk:
|
48 |
segments.append(chunk.strip())
|
49 |
|
50 |
-
# Handle any leftover sentence fragment not followed by punctuation
|
51 |
if len(raw_segments) % 2 == 1:
|
52 |
remaining_text = raw_segments[-1].strip()
|
53 |
if remaining_text:
|
@@ -55,23 +47,19 @@ def split_text_into_segments(text):
|
|
55 |
|
56 |
return segments
|
57 |
|
58 |
-
# Function to generate SRT with
|
59 |
async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
|
60 |
audio_file = f"batch_{batch_num}_audio.wav"
|
61 |
|
62 |
-
# Generate the audio using edge-tts
|
63 |
tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
|
64 |
await tts.save(audio_file)
|
65 |
|
66 |
-
|
67 |
-
actual_length = get_audio_length(audio_file)
|
68 |
|
69 |
-
# Split the text into segments based on punctuation and word count
|
70 |
segments = split_text_into_segments(batch_text)
|
71 |
-
segment_duration = actual_length / len(segments)
|
72 |
start_time = start_offset
|
73 |
|
74 |
-
# Initialize SRT content
|
75 |
srt_content = ""
|
76 |
for index, segment in enumerate(segments):
|
77 |
end_time = start_time + segment_duration
|
@@ -80,14 +68,14 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
|
|
80 |
end_time = start_offset + actual_length
|
81 |
|
82 |
srt_content += f"{index + 1 + (batch_num * 100)}\n"
|
83 |
-
srt_content += f"{
|
84 |
srt_content += segment + "\n\n"
|
85 |
|
86 |
start_time = end_time
|
87 |
|
88 |
return srt_content, audio_file, start_time
|
89 |
|
90 |
-
# Batch processing function
|
91 |
async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
|
92 |
batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
|
93 |
all_srt_content = ""
|
@@ -114,7 +102,7 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
|
|
114 |
end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
|
115 |
if end_time > total_audio_length:
|
116 |
end_time = total_audio_length
|
117 |
-
line = f"{
|
118 |
validated_srt_content += line + "\n"
|
119 |
|
120 |
unique_id = uuid.uuid4()
|
@@ -130,7 +118,6 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
|
|
130 |
|
131 |
# Gradio interface function
|
132 |
async def process_script(script_text, pitch, rate, voice):
|
133 |
-
# Format pitch correctly for edge-tts
|
134 |
pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
|
135 |
formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
|
136 |
srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
|
@@ -138,44 +125,8 @@ async def process_script(script_text, pitch, rate, voice):
|
|
138 |
|
139 |
# Gradio interface setup
|
140 |
voice_options = {
|
141 |
-
|
142 |
"Jenny Female": "en-US-JennyNeural",
|
143 |
-
"Guy Male": "en-US-GuyNeural",
|
144 |
-
"Ana Female": "en-US-AnaNeural",
|
145 |
-
"Aria Female": "en-US-AriaNeural",
|
146 |
-
"Brian Male": "en-US-BrianNeural",
|
147 |
-
"Christopher Male": "en-US-ChristopherNeural",
|
148 |
-
"Eric Male": "en-US-EricNeural",
|
149 |
-
"Michelle Male": "en-US-MichelleNeural",
|
150 |
-
"Roger Male": "en-US-RogerNeural",
|
151 |
-
"Natasha Female": "en-AU-NatashaNeural",
|
152 |
-
"William Male": "en-AU-WilliamNeural",
|
153 |
-
"Clara Female": "en-CA-ClaraNeural",
|
154 |
-
"Liam Female ": "en-CA-LiamNeural",
|
155 |
-
"Libby Female": "en-GB-LibbyNeural",
|
156 |
-
"Maisie": "en-GB-MaisieNeural",
|
157 |
-
"Ryan": "en-GB-RyanNeural",
|
158 |
-
"Sonia": "en-GB-SoniaNeural",
|
159 |
-
"Thomas": "en-GB-ThomasNeural",
|
160 |
-
"Sam": "en-HK-SamNeural",
|
161 |
-
"Yan": "en-HK-YanNeural",
|
162 |
-
"Connor": "en-IE-ConnorNeural",
|
163 |
-
"Emily": "en-IE-EmilyNeural",
|
164 |
-
"Neerja": "en-IN-NeerjaNeural",
|
165 |
-
"Prabhat": "en-IN-PrabhatNeural",
|
166 |
-
"Asilia": "en-KE-AsiliaNeural",
|
167 |
-
"Chilemba": "en-KE-ChilembaNeural",
|
168 |
-
"Abeo": "en-NG-AbeoNeural",
|
169 |
-
"Ezinne": "en-NG-EzinneNeural",
|
170 |
-
"Mitchell": "en-NZ-MitchellNeural",
|
171 |
-
"James": "en-PH-JamesNeural",
|
172 |
-
"Rosa": "en-PH-RosaNeural",
|
173 |
-
"Luna": "en-SG-LunaNeural",
|
174 |
-
"Wayne": "en-SG-WayneNeural",
|
175 |
-
"Elimu": "en-TZ-ElimuNeural",
|
176 |
-
"Imani": "en-TZ-ImaniNeural",
|
177 |
-
"Leah": "en-ZA-LeahNeural",
|
178 |
-
"Luke": "en-ZA-LukeNeural"
|
179 |
# Add other voices here...
|
180 |
}
|
181 |
|
@@ -192,8 +143,8 @@ app = gr.Interface(
|
|
192 |
gr.File(label="Download Audio File"),
|
193 |
gr.Audio(label="Audio Playback")
|
194 |
],
|
195 |
-
title="HIVEcorp Text-to-Speech with SRT Generation",
|
196 |
-
description="Convert your script into audio and generate subtitles.",
|
197 |
theme="compact",
|
198 |
)
|
199 |
|
|
|
6 |
import uuid
|
7 |
import re
|
8 |
|
9 |
+
# Function to get the length of an audio file in milliseconds
|
10 |
def get_audio_length(audio_file):
|
11 |
audio = AudioSegment.from_file(audio_file)
|
12 |
+
return len(audio) / 1000 # Return in seconds for compatibility
|
13 |
+
|
14 |
+
# Function to format time for SRT in milliseconds
|
15 |
+
def format_time_ms(milliseconds):
|
16 |
+
seconds, ms = divmod(int(milliseconds), 1000)
|
17 |
+
mins, secs = divmod(seconds, 60)
|
18 |
+
hrs, mins = divmod(mins, 60)
|
19 |
+
return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
|
20 |
+
|
21 |
+
# Function to split text into segments based on punctuation, ensuring no word is split
|
|
|
|
|
22 |
def split_text_into_segments(text):
|
|
|
23 |
segments = []
|
24 |
raw_segments = re.split(r'([.!?,])', text)
|
25 |
+
|
|
|
26 |
for i in range(0, len(raw_segments) - 1, 2):
|
|
|
27 |
sentence = raw_segments[i].strip() + raw_segments[i + 1]
|
28 |
words = sentence.split()
|
29 |
+
|
|
|
30 |
if len(words) <= 8:
|
31 |
segments.append(sentence.strip())
|
32 |
else:
|
|
|
33 |
chunk = ""
|
34 |
for word in words:
|
35 |
if len(chunk.split()) < 8:
|
|
|
40 |
if chunk:
|
41 |
segments.append(chunk.strip())
|
42 |
|
|
|
43 |
if len(raw_segments) % 2 == 1:
|
44 |
remaining_text = raw_segments[-1].strip()
|
45 |
if remaining_text:
|
|
|
47 |
|
48 |
return segments
|
49 |
|
50 |
+
# Function to generate SRT with millisecond accuracy per batch
|
51 |
async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
|
52 |
audio_file = f"batch_{batch_num}_audio.wav"
|
53 |
|
|
|
54 |
tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
|
55 |
await tts.save(audio_file)
|
56 |
|
57 |
+
actual_length = get_audio_length(audio_file) * 1000 # Convert to milliseconds
|
|
|
58 |
|
|
|
59 |
segments = split_text_into_segments(batch_text)
|
60 |
+
segment_duration = actual_length / len(segments)
|
61 |
start_time = start_offset
|
62 |
|
|
|
63 |
srt_content = ""
|
64 |
for index, segment in enumerate(segments):
|
65 |
end_time = start_time + segment_duration
|
|
|
68 |
end_time = start_offset + actual_length
|
69 |
|
70 |
srt_content += f"{index + 1 + (batch_num * 100)}\n"
|
71 |
+
srt_content += f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
|
72 |
srt_content += segment + "\n\n"
|
73 |
|
74 |
start_time = end_time
|
75 |
|
76 |
return srt_content, audio_file, start_time
|
77 |
|
78 |
+
# Batch processing function with millisecond accuracy
|
79 |
async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
|
80 |
batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
|
81 |
all_srt_content = ""
|
|
|
102 |
end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
|
103 |
if end_time > total_audio_length:
|
104 |
end_time = total_audio_length
|
105 |
+
line = f"{format_time_ms(start_time * 1000)} --> {format_time_ms(end_time * 1000)}"
|
106 |
validated_srt_content += line + "\n"
|
107 |
|
108 |
unique_id = uuid.uuid4()
|
|
|
118 |
|
119 |
# Gradio interface function
|
120 |
async def process_script(script_text, pitch, rate, voice):
|
|
|
121 |
pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
|
122 |
formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
|
123 |
srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
|
|
|
125 |
|
126 |
# Gradio interface setup
|
127 |
voice_options = {
|
128 |
+
"Andrew Male": "en-US-AndrewNeural",
|
129 |
"Jenny Female": "en-US-JennyNeural",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
# Add other voices here...
|
131 |
}
|
132 |
|
|
|
143 |
gr.File(label="Download Audio File"),
|
144 |
gr.Audio(label="Audio Playback")
|
145 |
],
|
146 |
+
title="HIVEcorp Text-to-Speech with Millisecond SRT Generation",
|
147 |
+
description="Convert your script into audio and generate millisecond-accurate subtitles.",
|
148 |
theme="compact",
|
149 |
)
|
150 |
|